91 files changed, 9 insertions, 21010 deletions
diff --git a/Makefile b/Makefile
index d65d4c4968e..cf6555c782d 100644
--- a/Makefile
+++ b/Makefile
@@ -112,8 +112,6 @@ linux \
 linux-i965 \
 linux-alpha \
 linux-alpha-static \
-linux-cell \
-linux-cell-debug \
 linux-debug \
 linux-dri \
 linux-dri-debug \
diff --git a/common.py b/common.py
index 5e2967fc59b..5578f72af1f 100644
--- a/common.py
+++ b/common.py
@@ -83,7 +83,7 @@ def AddOptions(opts):
 	opts.Add(EnumOption('machine', 'use machine-specific assembly code', default_machine,
 											 allowed_values=('generic', 'ppc', 'x86', 'x86_64')))
 	opts.Add(EnumOption('platform', 'target platform', host_platform,
-											 allowed_values=('linux', 'cell', 'windows', 'darwin', 'cygwin', 'sunos', 'freebsd8')))
+											 allowed_values=('linux', 'windows', 'darwin', 'cygwin', 'sunos', 'freebsd8')))
 	opts.Add(BoolOption('embedded', 'embedded build', 'no'))
 	opts.Add('toolchain', 'compiler toolchain', default_toolchain)
 	opts.Add(BoolOption('gles', 'EXPERIMENTAL: enable OpenGL ES support', 'no'))
diff --git a/configs/linux-cell b/configs/linux-cell
deleted file mode 100644
index 7f38da971d1..00000000000
--- a/configs/linux-cell
+++ /dev/null
@@ -1,71 +0,0 @@
-# linux-cell  (non-debug build)
-
-include $(TOP)/configs/linux
-
-CONFIG_NAME = linux-cell
-
-
-# Omiting other gallium drivers:
-GALLIUM_DRIVERS_DIRS = cell softpipe trace rbug identity
-
-
-# Compiler and flags
-CC = ppu32-gcc
-CXX = ppu32-g++
-HOST_CC = gcc
-APP_CC = gcc
-APP_CXX = g++
-
-OPT_FLAGS = -O3
-
-# Cell SDK location
-## For SDK 2.1: (plus, remove -DSPU_MAIN_PARAM_LONG_LONG below)
-#SDK = /opt/ibm/cell-sdk/prototype/sysroot/usr
-## For SDK 3.0:
-SDK = /opt/cell/sdk/usr
-
-
-
-COMMON_C_CPP_FLAGS = $(OPT_FLAGS) -Wall -Winline \
-	-fPIC -m32 -mabi=altivec -maltivec \
-	-I. -I$(SDK)/include \
-	-DGALLIUM_CELL $(DEFINES)
-
-CFLAGS = $(COMMON_C_CPP_FLAGS) -Wmissing-prototypes -std=c99
-
-CXXFLAGS = $(COMMON_C_CPP_FLAGS)
-
-
-SRC_DIRS = glsl mapi/glapi mapi/vgapi mesa \
-	gallium gallium/winsys gallium/targets glu
-
-# Build no traditional Mesa drivers:
-DRIVER_DIRS =
-
-
-MKDEP_OPTIONS = -fdepend -Y
-
-
-GL_LIB_DEPS = $(EXTRA_LIB_PATH) -lX11 -lXext -lm -lpthread \
-	-L$(SDK)/lib -m32 -Wl,-m,elf32ppc -R$(SDK)/lib -lspe2
-
-
-CELL_SPU_LIB = $(TOP)/src/gallium/drivers/cell/spu/g3d_spu.a
-
-
-### SPU stuff
-
-SPU_CC = spu-gcc
-
-SPU_CFLAGS = $(OPT_FLAGS) -W -Wall -Winline -Wmissing-prototypes -Wno-main \
-	-I. -I$(SDK)/spu/include -I$(TOP)/src/mesa/ $(INCLUDE_DIRS) \
-	-DSPU_MAIN_PARAM_LONG_LONG \
-	-include spu_intrinsics.h
-
-SPU_LFLAGS = -L$(SDK)/spu/lib -Wl,-N -lmisc -lm
-
-SPU_AR = ppu-ar
-SPU_AR_FLAGS = -qcs
-
-SPU_EMBED = ppu32-embedspu
-SPU_EMBED_FLAGS = -m32
diff --git a/configs/linux-cell-debug b/configs/linux-cell-debug
deleted file mode 100644
index 42f3245edc9..00000000000
--- a/configs/linux-cell-debug
+++ /dev/null
@@ -1,10 +0,0 @@
-# linux-cell-debug
-
-include $(TOP)/configs/linux-cell
-
-# just override name and OPT_FLAGS here:
-
-CONFIG_NAME = linux-cell-debug
-
-OPT_FLAGS = -g -DDEBUG
-
diff --git a/docs/cell.html b/docs/cell.html
deleted file mode 100644
index 30626b60b42..00000000000
--- a/docs/cell.html
+++ /dev/null
@@ -1,138 +0,0 @@
-<HTML>
-
-<TITLE>Cell Driver</TITLE>
-
-<link rel="stylesheet" type="text/css" href="mesa.css"></head>
-
-<BODY>
-
-<H1>Mesa/Gallium Cell Driver</H1>
-
-<p>
-The Mesa
-<a href="http://en.wikipedia.org/wiki/Cell_%28microprocessor%29" target="_parent">Cell</a>
-driver is part of the 
-<a href="http://wiki.freedesktop.org/wiki/Software/gallium" target="_parent">Gallium3D</a>
-architecture.
-Tungsten Graphics did the original implementation of the Cell driver.
-</p>
-
-
-<H2>Source Code</H2>
-
-<p>
-The latest Cell driver source code is on the master branch of the Mesa
-git repository.
-</p>
-<p>
-To build the driver you'll need the IBM Cell SDK (version 2.1 or 3.0).
-To use the driver you'll need a Cell system, such as a PS3 running Linux,
-or the Cell Simulator (untested, though).
-</p>
-
-<p>
-If using Cell SDK 2.1, see the configs/linux-cell file for some
-special changes.
-</p>
-
-<p>
-To compile the code, run <code>make linux-cell</code>.
-Or to build in debug mode, run <code>make linux-cell-debug</code>.
-</p>
-
-<p>
-To use the library, make sure your current directory is the top of the
-Mesa tree, then set <code>LD_LIBRARY_PATH</code> like this:
-<pre>
-  export LD_LIBRARY_PATH=$PWD/lib/gallium:$PWD/lib/
-</pre>
-
-<p>
-Verify that the Cell driver is being used by running
-<code>progs/xdemos/glxinfo</code> and looking for:
-<pre>
-  OpenGL renderer string: Gallium 0.3, Cell on Xlib
-</pre>
-
-
-<H2>Driver Implementation Summary</H2>
-
-<p>
-Rasterization is parallelized across the SPUs in a tiled-based manner.
-Batches of transformed triangles are sent to the SPUs (actually, pulled by from
-main memory by the SPUs).
-Each SPU loops over a set of 32x32-pixel screen tiles, rendering the triangles
-into each tile.
-Because of the limited SPU memory, framebuffer tiles are paged in/out of
-SPU local store as needed.
-Similarly, textures are tiled and brought into local store as needed.
-</p>
-
-
-<H2>Status</H2>
-
-<p>
-As of October 2008, the driver runs quite a few OpenGL demos.
-Features that work include:
-</p>
-<ul>
-<li>Point/line/triangle rendering, glDrawPixels
-<li>2D, NPOT and cube texture maps with nearest/linear/mipmap filtering
-<li>Dynamic SPU code generation for fragment shaders, but not complete
-<li>Dynamic SPU code generation for fragment ops (blend, Z-test, etc), but not complete
-<li>Dynamic PPU/PPC code generation for vertex shaders, but not complete
-</ul>
-<p>
-Performance has recently improved with the addition of PPC code generation
-for vertex shaders, but the code quality isn't too great yet.
-</p>
-<p>
-Another bottleneck is SwapBuffers.  It may be the limiting factor for
-many simple GL tests.
-</p>
-
-
-
-<H2>Debug Options</H2>
-
-<p>
-The CELL_DEBUG env var can be set to a comma-separated list of one or
-more of the following debug options:
-</p>
-<ul>
-<li><b>checker</b> - use a different background clear color for each SPU.
-   This lets you see which SPU is rendering which screen tiles.
-<li><b>sync</b> - wait/synchronize after each DMA transfer
-<li><b>asm</b> - print generated SPU assembly code to stdout
-<li><b>fragops</b> - emit fragment ops debug messages
-<li><b>fragopfallback</b> - don't use codegen for fragment ops
-<li><b>cmd</b> - print SPU commands as their received
-<li><b>cache</b> - print texture cache statistics when program exits
-</ul>
-<p>
-Note that some of these options may only work for linux-cell-debug builds.
-</p>
-
-<p>
-If the GALLIUM_NOPPC env var is set, PPC code generation will not be used
-and vertex shaders will be run with the TGSI interpreter.
-</p>
-<p>
-If the GALLIUM_NOCELL env var is set, the softpipe driver will be used
-intead of the Cell driver.
-This is useful for comparison/validation.
-</p>
-
-
-
-<H2>Contributing</H2>
-
-<p>
-If you're interested in contributing to the effort, familiarize yourself
-with the code, join the <a href="lists.html">mesa3d-dev mailing list</a>,
-and describe what you'd like to do.
-</p>
-
-
-</BODY>
-</HTML>
diff --git a/docs/contents.html b/docs/contents.html
index 8882e731879..e3cea2a7ce3 100644
--- a/docs/contents.html
+++ b/docs/contents.html
@@ -78,8 +78,7 @@ a:visited {
 <li><a href="devinfo.html" target="MainFrame">Development Notes</a>
 <li><a href="sourcedocs.html" target="MainFrame">Source Documentation</a>
 <li><a href="subset.html" target="MainFrame">Mesa Subset Driver</a>
-<LI><A HREF="dispatch.html" target="MainFrame">GL Dispatch</A>
-<li><a href="cell.html" target="MainFrame">Cell Driver</A>
+<li><a HREF="dispatch.html" target="MainFrame">GL Dispatch</a>
 </ul>
 
 <b>Links</b>
diff --git a/docs/news.html b/docs/news.html
index 91284922880..d6a2aa8e6dd 100644
--- a/docs/news.html
+++ b/docs/news.html
@@ -217,7 +217,7 @@ This is a bug-fix release.
 <h2>January 24, 2008</h2>
 
 <p>
-Added a new page describing the <a href="cell.html">Mesa Cell driver</a>.
+Added a new page describing the Mesa Cell driver.
 </p>
 
 
diff --git a/docs/relnotes-7.12.html b/docs/relnotes-7.12.html
index 0d2211358cf..393b1124e58 100644
--- a/docs/relnotes-7.12.html
+++ b/docs/relnotes-7.12.html
@@ -74,6 +74,8 @@ tbd
   by the gallium drivers for this hardware.</li>
 <li>Removed the i965g driver, which was broken and with nobody in sight to fix
   the situation</li>
+<li>Removed the Gallium cell driver, it was just a burden on Gallium
+  development and nobody seems to use it.</li>
 </ul>
 
 
diff --git a/docs/relnotes-7.5.html b/docs/relnotes-7.5.html
index 56deca6a86c..a25ca8efc11 100644
--- a/docs/relnotes-7.5.html
+++ b/docs/relnotes-7.5.html
@@ -61,7 +61,7 @@ baa7a1e850b6e39bae58868fd0684004  MesaGLUT-7.5.tar.bz2
     <ul>
     <li>softpipe - a software/reference driver
     <li>i915 - Intel 915/945 driver
-    <li><a href="cell.html">Cell</a> - IBM/Sony/Toshiba Cell processor driver
+    <li>Cell - IBM/Sony/Toshiba Cell processor driver
     <li>nouveau (for NVIDIA GPUs) and R300 for (AMD/ATI R300).
         <b>PLEASE NOTE: these drivers are incomplete and still under development.
         It's probably NOT worthwhile to report any bugs unless you have patches.
diff --git a/docs/sourcetree.html b/docs/sourcetree.html
index 3f100df49e1..e26c653abbe 100644
--- a/docs/sourcetree.html
+++ b/docs/sourcetree.html
@@ -86,7 +86,6 @@ each directory.
         interfaces
     <li><b>drivers</b> - Gallium3D device drivers
       <ul>
-      <li><b>cell</b> - Driver for Cell processor.
       <li><b>i915</b> - Driver for Intel i915/i945.
       <li><b>llvmpipe</b> - Software driver using LLVM for runtime code generation.
       <li><b>nv*</b> - Drivers for NVIDIA GPUs.
diff --git a/doxygen/gallium.doc b/doxygen/gallium.doc
index f0ff36075a5..e81b02e1aa6 100644
--- a/doxygen/gallium.doc
+++ b/doxygen/gallium.doc
@@ -34,7 +34,6 @@
   - Pipe drivers:
     - \ref softpipe
     - \ref i915g
-    - Cell driver (cell_context.h, cell_winsys.h)
     - \ref failover
 
   - Winsys drivers:
diff --git a/src/gallium/auxiliary/Makefile.sources b/src/gallium/auxiliary/Makefile.sources
index 15de20cb3a3..5c65533308c 100644
--- a/src/gallium/auxiliary/Makefile.sources
+++ b/src/gallium/auxiliary/Makefile.sources
@@ -63,7 +63,6 @@ C_SOURCES := \
 	rtasm/rtasm_cpu.c \
 	rtasm/rtasm_execmem.c \
 	rtasm/rtasm_ppc.c \
-	rtasm/rtasm_ppc_spe.c \
 	rtasm/rtasm_x86sse.c \
 	tgsi/tgsi_build.c \
 	tgsi/tgsi_dump.c \
diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c
deleted file mode 100644
index 53a0e722cff..00000000000
--- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c
+++ /dev/null
@@ -1,1067 +0,0 @@
-/*
- * (C) Copyright IBM Corporation 2008
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
- * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * \file
- * Real-time assembly generation interface for Cell B.E. SPEs.
- *
- * \author Ian Romanick <idr@us.ibm.com>
- * \author Brian Paul
- */
-
-
-#include <stdio.h>
-#include "pipe/p_compiler.h"
-#include "util/u_memory.h"
-#include "rtasm_ppc_spe.h"
-
-
-#ifdef GALLIUM_CELL
-/**
- * SPE instruction types
- *
- * There are 6 primary instruction encodings used on the Cell's SPEs.  Each of
- * the following unions encodes one type.
- *
- * \bug
- * If, at some point, we start generating SPE code from a little-endian host
- * these unions will not work.
- */
-/*@{*/
-/**
- * Encode one output register with two input registers
- */
-union spe_inst_RR {
-    uint32_t bits;
-    struct {
-	unsigned op:11;
-	unsigned rB:7;
-	unsigned rA:7;
-	unsigned rT:7;
-    } inst;
-};
-
-
-/**
- * Encode one output register with three input registers
- */
-union spe_inst_RRR {
-    uint32_t bits;
-    struct {
-	unsigned op:4;
-	unsigned rT:7;
-	unsigned rB:7;
-	unsigned rA:7;
-	unsigned rC:7;
-    } inst;
-};
-
-
-/**
- * Encode one output register with one input reg. and a 7-bit signed immed
- */
-union spe_inst_RI7 {
-    uint32_t bits;
-    struct {
-	unsigned op:11;
-	unsigned i7:7;
-	unsigned rA:7;
-	unsigned rT:7;
-    } inst;
-};
-
-
-/**
- * Encode one output register with one input reg. and an 8-bit signed immed
- */
-union spe_inst_RI8 {
-    uint32_t bits;
-    struct {
-	unsigned op:10;
-	unsigned i8:8;
-	unsigned rA:7;
-	unsigned rT:7;
-    } inst;
-};
-
-
-/**
- * Encode one output register with one input reg. and a 10-bit signed immed
- */
-union spe_inst_RI10 {
-    uint32_t bits;
-    struct {
-	unsigned op:8;
-	unsigned i10:10;
-	unsigned rA:7;
-	unsigned rT:7;
-    } inst;
-};
-
-
-/**
- * Encode one output register with a 16-bit signed immediate
- */
-union spe_inst_RI16 {
-    uint32_t bits;
-    struct {
-	unsigned op:9;
-	unsigned i16:16;
-	unsigned rT:7;
-    } inst;
-};
-
-
-/**
- * Encode one output register with a 18-bit signed immediate
- */
-union spe_inst_RI18 {
-    uint32_t bits;
-    struct {
-	unsigned op:7;
-	unsigned i18:18;
-	unsigned rT:7;
-    } inst;
-};
-/*@}*/
-
-
-static void
-indent(const struct spe_function *p)
-{
-   int i;
-   for (i = 0; i < p->indent; i++) {
-      putchar(' ');
-   }
-}
-
-
-static const char *
-rem_prefix(const char *longname)
-{
-   return longname + 4;
-}
-
-
-static const char *
-reg_name(int reg)
-{
-   switch (reg) {
-   case SPE_REG_SP:
-      return "$sp";
-   case SPE_REG_RA:
-      return "$lr";
-   default:
-      {
-         /* cycle through four buffers to handle multiple calls per printf */
-         static char buf[4][10];
-         static int b = 0;
-         b = (b + 1) % 4;
-         sprintf(buf[b], "$%d", reg);
-         return buf[b];
-      }
-   }
-}
-
-
-static void
-emit_instruction(struct spe_function *p, uint32_t inst_bits)
-{
-   if (!p->store)
-      return;  /* out of memory, drop the instruction */
-
-   if (p->num_inst == p->max_inst) {
-      /* allocate larger buffer */
-      uint32_t *newbuf;
-      p->max_inst *= 2;  /* 2x larger */
-      newbuf = align_malloc(p->max_inst * SPE_INST_SIZE, 16);
-      if (newbuf) {
-         memcpy(newbuf, p->store, p->num_inst * SPE_INST_SIZE);
-      }
-      align_free(p->store);
-      p->store = newbuf;
-      if (!p->store) {
-         /* out of memory */
-         p->num_inst = 0;
-         return;
-      }
-   }
-
-   p->store[p->num_inst++] = inst_bits;
-}
-
-
-
-static void emit_RR(struct spe_function *p, unsigned op, int rT,
-		    int rA, int rB, const char *name)
-{
-    union spe_inst_RR inst;
-    inst.inst.op = op;
-    inst.inst.rB = rB;
-    inst.inst.rA = rA;
-    inst.inst.rT = rT;
-    emit_instruction(p, inst.bits);
-    if (p->print) {
-       indent(p);
-       printf("%s\t%s, %s, %s\n",
-              rem_prefix(name), reg_name(rT), reg_name(rA), reg_name(rB));
-    }
-}
-
-
-static void emit_RRR(struct spe_function *p, unsigned op, int rT,
-                     int rA, int rB, int rC, const char *name)
-{
-    union spe_inst_RRR inst;
-    inst.inst.op = op;
-    inst.inst.rT = rT;
-    inst.inst.rB = rB;
-    inst.inst.rA = rA;
-    inst.inst.rC = rC;
-    emit_instruction(p, inst.bits);
-    if (p->print) {
-       indent(p);
-       printf("%s\t%s, %s, %s, %s\n", rem_prefix(name), reg_name(rT),
-              reg_name(rA), reg_name(rB), reg_name(rC));
-    }
-}
-
-
-static void emit_RI7(struct spe_function *p, unsigned op, int rT,
-		     int rA, int imm, const char *name)
-{
-    union spe_inst_RI7 inst;
-    inst.inst.op = op;
-    inst.inst.i7 = imm;
-    inst.inst.rA = rA;
-    inst.inst.rT = rT;
-    emit_instruction(p, inst.bits);
-    if (p->print) {
-       indent(p);
-       printf("%s\t%s, %s, 0x%x\n",
-              rem_prefix(name), reg_name(rT), reg_name(rA), imm);
-    }
-}
-
-
-
-static void emit_RI8(struct spe_function *p, unsigned op, int rT,
-		     int rA, int imm, const char *name)
-{
-    union spe_inst_RI8 inst;
-    inst.inst.op = op;
-    inst.inst.i8 = imm;
-    inst.inst.rA = rA;
-    inst.inst.rT = rT;
-    emit_instruction(p, inst.bits);
-    if (p->print) {
-       indent(p);
-       printf("%s\t%s, %s, 0x%x\n",
-              rem_prefix(name), reg_name(rT), reg_name(rA), imm);
-    }
-}
-
-
-
-static void emit_RI10(struct spe_function *p, unsigned op, int rT,
-		      int rA, int imm, const char *name)
-{
-    union spe_inst_RI10 inst;
-    inst.inst.op = op;
-    inst.inst.i10 = imm;
-    inst.inst.rA = rA;
-    inst.inst.rT = rT;
-    emit_instruction(p, inst.bits);
-    if (p->print) {
-       indent(p);
-       printf("%s\t%s, %s, 0x%x\n",
-              rem_prefix(name), reg_name(rT), reg_name(rA), imm);
-    }
-}
-
-
-/** As above, but do range checking on signed immediate value */
-static void emit_RI10s(struct spe_function *p, unsigned op, int rT,
-                       int rA, int imm, const char *name)
-{
-    assert(imm <= 511);
-    assert(imm >= -512);
-    emit_RI10(p, op, rT, rA, imm, name);
-}
-
-
-static void emit_RI16(struct spe_function *p, unsigned op, int rT,
-		      int imm, const char *name)
-{
-    union spe_inst_RI16 inst;
-    inst.inst.op = op;
-    inst.inst.i16 = imm;
-    inst.inst.rT = rT;
-    emit_instruction(p, inst.bits);
-    if (p->print) {
-       indent(p);
-       printf("%s\t%s, 0x%x\n", rem_prefix(name), reg_name(rT), imm);
-    }
-}
-
-
-static void emit_RI18(struct spe_function *p, unsigned op, int rT,
-		      int imm, const char *name)
-{
-    union spe_inst_RI18 inst;
-    inst.inst.op = op;
-    inst.inst.i18 = imm;
-    inst.inst.rT = rT;
-    emit_instruction(p, inst.bits);
-    if (p->print) {
-       indent(p);
-       printf("%s\t%s, 0x%x\n", rem_prefix(name), reg_name(rT), imm);
-    }
-}
-
-
-#define EMIT(_name, _op) \
-void _name (struct spe_function *p) \
-{ \
-   emit_RR(p, _op, 0, 0, 0, __FUNCTION__); \
-}
-
-#define EMIT_(_name, _op) \
-void _name (struct spe_function *p, int rT) \
-{ \
-   emit_RR(p, _op, rT, 0, 0, __FUNCTION__); \
-}
-
-#define EMIT_R(_name, _op) \
-void _name (struct spe_function *p, int rT, int rA) \
-{ \
-   emit_RR(p, _op, rT, rA, 0, __FUNCTION__);                 \
-}
-
-#define EMIT_RR(_name, _op) \
-void _name (struct spe_function *p, int rT, int rA, int rB) \
-{ \
-   emit_RR(p, _op, rT, rA, rB, __FUNCTION__);                \
-}
-
-#define EMIT_RRR(_name, _op) \
-void _name (struct spe_function *p, int rT, int rA, int rB, int rC) \
-{ \
-   emit_RRR(p, _op, rT, rA, rB, rC, __FUNCTION__);           \
-}
-
-#define EMIT_RI7(_name, _op) \
-void _name (struct spe_function *p, int rT, int rA, int imm) \
-{ \
-   emit_RI7(p, _op, rT, rA, imm, __FUNCTION__);              \
-}
-
-#define EMIT_RI8(_name, _op, bias) \
-void _name (struct spe_function *p, int rT, int rA, int imm) \
-{ \
-   emit_RI8(p, _op, rT, rA, bias - imm, __FUNCTION__);       \
-}
-
-#define EMIT_RI10(_name, _op) \
-void _name (struct spe_function *p, int rT, int rA, int imm) \
-{ \
-   emit_RI10(p, _op, rT, rA, imm, __FUNCTION__);             \
-}
-
-#define EMIT_RI10s(_name, _op) \
-void _name (struct spe_function *p, int rT, int rA, int imm) \
-{ \
-   emit_RI10s(p, _op, rT, rA, imm, __FUNCTION__);             \
-}
-
-#define EMIT_RI16(_name, _op) \
-void _name (struct spe_function *p, int rT, int imm) \
-{ \
-   emit_RI16(p, _op, rT, imm, __FUNCTION__);                 \
-}
-
-#define EMIT_RI18(_name, _op) \
-void _name (struct spe_function *p, int rT, int imm) \
-{ \
-   emit_RI18(p, _op, rT, imm, __FUNCTION__);                 \
-}
-
-#define EMIT_I16(_name, _op) \
-void _name (struct spe_function *p, int imm) \
-{ \
-   emit_RI16(p, _op, 0, imm, __FUNCTION__);                  \
-}
-
-#include "rtasm_ppc_spe.h"
-
-
-
-/**
- * Initialize an spe_function.
- * \param code_size  initial size of instruction buffer to allocate, in bytes.
- *                   If zero, use a default.
- */
-void spe_init_func(struct spe_function *p, unsigned code_size)
-{
-    uint i;
-
-    if (!code_size)
-       code_size = 64;
-
-    p->num_inst = 0;
-    p->max_inst = code_size / SPE_INST_SIZE;
-    p->store = align_malloc(code_size, 16);
-
-    p->set_count = 0;
-    memset(p->regs, 0, SPE_NUM_REGS * sizeof(p->regs[0]));
-
-    /* Conservatively treat R0 - R2 and R80 - R127 as non-volatile.
-     */
-    p->regs[0] = p->regs[1] = p->regs[2] = 1;
-    for (i = 80; i <= 127; i++) {
-      p->regs[i] = 1;
-    }
-
-    p->print = FALSE;
-    p->indent = 0;
-}
-
-
-void spe_release_func(struct spe_function *p)
-{
-    assert(p->num_inst <= p->max_inst);
-    if (p->store != NULL) {
-        align_free(p->store);
-    }
-    p->store = NULL;
-}
-
-
-/** Return current code size in bytes. */
-unsigned spe_code_size(const struct spe_function *p)
-{
-   return p->num_inst * SPE_INST_SIZE;
-}
-
-
-/**
- * Allocate a SPE register.
- * \return register index or -1 if none left.
- */
-int spe_allocate_available_register(struct spe_function *p)
-{
-   unsigned i;
-   for (i = 0; i < SPE_NUM_REGS; i++) {
-      if (p->regs[i] == 0) {
-         p->regs[i] = 1;
-         return i;
-      }
-   }
-
-   return -1;
-}
-
-
-/**
- * Mark the given SPE register as "allocated".
- */
-int spe_allocate_register(struct spe_function *p, int reg)
-{
-   assert(reg < SPE_NUM_REGS);
-   assert(p->regs[reg] == 0);
-   p->regs[reg] = 1;
-   return reg;
-}
-
-
-/**
- * Mark the given SPE register as "unallocated".  Note that this should
- * only be used on registers allocated in the current register set; an
- * assertion will fail if an attempt is made to deallocate a register
- * allocated in an earlier register set.
- */
-void spe_release_register(struct spe_function *p, int reg)
-{
-   assert(reg >= 0);
-   assert(reg < SPE_NUM_REGS);
-   assert(p->regs[reg] == 1);
-
-   p->regs[reg] = 0;
-}
-
-/**
- * Start a new set of registers.  This can be called if
- * it will be difficult later to determine exactly what
- * registers were actually allocated during a code generation
- * sequence, and you really just want to deallocate all of them.
- */
-void spe_allocate_register_set(struct spe_function *p)
-{
-   uint i;
-
-   /* Keep track of the set count.  If it ever wraps around to 0, 
-    * we're in trouble.
-    */
-   p->set_count++;
-   assert(p->set_count > 0);
-
-   /* Increment the allocation count of all registers currently
-    * allocated.  Then any registers that are allocated in this set
-    * will be the only ones with a count of 1; they'll all be released
-    * when the register set is released.
-    */
-   for (i = 0; i < SPE_NUM_REGS; i++) {
-      if (p->regs[i] > 0)
-         p->regs[i]++;
-   }
-}
-
-void spe_release_register_set(struct spe_function *p)
-{
-   uint i;
-
-   /* If the set count drops below zero, we're in trouble. */
-   assert(p->set_count > 0);
-   p->set_count--;
-
-   /* Drop the allocation level of all registers.  Any allocated
-    * during this register set will drop to 0 and then become
-    * available.
-    */
-   for (i = 0; i < SPE_NUM_REGS; i++) {
-      if (p->regs[i] > 0)
-         p->regs[i]--;
-   }
-}
-
-
-unsigned
-spe_get_registers_used(const struct spe_function *p, ubyte used[])
-{
-   unsigned i, num = 0;
-   /* only count registers in the range available to callers */
-   for (i = 2; i < 80; i++) {
-      if (p->regs[i]) {
-         used[num++] = i;
-      }
-   }
-   return num;
-}
-
-
-void
-spe_print_code(struct spe_function *p, boolean enable)
-{
-   p->print = enable;
-}
-
-
-void
-spe_indent(struct spe_function *p, int spaces)
-{
-   p->indent += spaces;
-}
-
-
-void
-spe_comment(struct spe_function *p, int rel_indent, const char *s)
-{
-   if (p->print) {
-      p->indent += rel_indent;
-      indent(p);
-      p->indent -= rel_indent;
-      printf("# %s\n", s);
-   }
-}
-
-
-/**
- * Load quad word.
- * NOTE: offset is in bytes and the least significant 4 bits must be zero!
- */
-void spe_lqd(struct spe_function *p, int rT, int rA, int offset)
-{
-   const boolean pSave = p->print;
-
-   /* offset must be a multiple of 16 */
-   assert(offset % 16 == 0);
-   /* offset must fit in 10-bit signed int field, after shifting */
-   assert((offset >> 4) <= 511);
-   assert((offset >> 4) >= -512);
-
-   p->print = FALSE;
-   emit_RI10(p, 0x034, rT, rA, offset >> 4, "spe_lqd");
-   p->print = pSave;
-
-   if (p->print) {
-      indent(p);
-      printf("lqd\t%s, %d(%s)\n", reg_name(rT), offset, reg_name(rA));
-   }
-}
-
-
-/**
- * Store quad word.
- * NOTE: offset is in bytes and the least significant 4 bits must be zero!
- */
-void spe_stqd(struct spe_function *p, int rT, int rA, int offset)
-{
-   const boolean pSave = p->print;
-
-   /* offset must be a multiple of 16 */
-   assert(offset % 16 == 0);
-   /* offset must fit in 10-bit signed int field, after shifting */
-   assert((offset >> 4) <= 511);
-   assert((offset >> 4) >= -512);
-
-   p->print = FALSE;
-   emit_RI10(p, 0x024, rT, rA, offset >> 4, "spe_stqd");
-   p->print = pSave;
-
-   if (p->print) {
-      indent(p);
-      printf("stqd\t%s, %d(%s)\n", reg_name(rT), offset, reg_name(rA));
-   }
-}
-
-
-/**
- * For branch instructions:
- * \param d  if 1, disable interupts if branch is taken
- * \param e  if 1, enable interupts if branch is taken
- * If d and e are both zero, don't change interupt status (right?)
- */
-
-/** Branch Indirect to address in rA */
-void spe_bi(struct spe_function *p, int rA, int d, int e)
-{
-   emit_RI7(p, 0x1a8, 0, rA, (d << 5) | (e << 4), __FUNCTION__);
-}
-
-/** Interupt Return */
-void spe_iret(struct spe_function *p, int rA, int d, int e)
-{
-   emit_RI7(p, 0x1aa, 0, rA, (d << 5) | (e << 4), __FUNCTION__);
-}
-
-/** Branch indirect and set link on external data */
-void spe_bisled(struct spe_function *p, int rT, int rA, int d,
-		int e)
-{
-   emit_RI7(p, 0x1ab, rT, rA, (d << 5) | (e << 4), __FUNCTION__);
-}
-
-/** Branch indirect and set link.  Save PC in rT, jump to rA. */
-void spe_bisl(struct spe_function *p, int rT, int rA, int d,
-		int e)
-{
-   emit_RI7(p, 0x1a9, rT, rA, (d << 5) | (e << 4), __FUNCTION__);
-}
-
-/** Branch indirect if zero word.  If rT.word[0]==0, jump to rA. */
-void spe_biz(struct spe_function *p, int rT, int rA, int d, int e)
-{
-   emit_RI7(p, 0x128, rT, rA, (d << 5) | (e << 4), __FUNCTION__);
-}
-
-/** Branch indirect if non-zero word.  If rT.word[0]!=0, jump to rA. */
-void spe_binz(struct spe_function *p, int rT, int rA, int d, int e)
-{
-   emit_RI7(p, 0x129, rT, rA, (d << 5) | (e << 4), __FUNCTION__);
-}
-
-/** Branch indirect if zero halfword.  If rT.halfword[1]==0, jump to rA. */
-void spe_bihz(struct spe_function *p, int rT, int rA, int d, int e)
-{
-   emit_RI7(p, 0x12a, rT, rA, (d << 5) | (e << 4), __FUNCTION__);
-}
-
-/** Branch indirect if non-zero halfword.  If rT.halfword[1]!=0, jump to rA. */
-void spe_bihnz(struct spe_function *p, int rT, int rA, int d, int e)
-{
-   emit_RI7(p, 0x12b, rT, rA, (d << 5) | (e << 4), __FUNCTION__);
-}
-
-
-/* Hint-for-branch instructions
- */
-#if 0
-hbr;
-hbra;
-hbrr;
-#endif
-
-
-/* Control instructions
- */
-#if 0
-stop;
-EMIT_RR  (spe_stopd, 0x140);
-EMIT_    (spe_nop,   0x201);
-sync;
-EMIT_    (spe_dsync, 0x003);
-EMIT_R   (spe_mfspr, 0x00c);
-EMIT_R   (spe_mtspr, 0x10c);
-#endif
-
-
-/**
- ** Helper / "macro" instructions.
- ** Use somewhat verbose names as a reminder that these aren't native
- ** SPE instructions.
- **/
-
-
-void
-spe_load_float(struct spe_function *p, int rT, float x)
-{
-   if (x == 0.0f) {
-      spe_il(p, rT, 0x0);
-   }
-   else if (x == 0.5f) {
-      spe_ilhu(p, rT, 0x3f00);
-   }
-   else if (x == 1.0f) {
-      spe_ilhu(p, rT, 0x3f80);
-   }
-   else if (x == -1.0f) {
-      spe_ilhu(p, rT, 0xbf80);
-   }
-   else {
-      union {
-         float f;
-         unsigned u;
-      } bits;
-      bits.f = x;
-      spe_ilhu(p, rT, bits.u >> 16);
-      spe_iohl(p, rT, bits.u & 0xffff);
-   }
-}
-
-
-void
-spe_load_int(struct spe_function *p, int rT, int i)
-{
-   if (-32768 <= i && i <= 32767) {
-      spe_il(p, rT, i);
-   }
-   else {
-      spe_ilhu(p, rT, i >> 16);
-      if (i & 0xffff)
-         spe_iohl(p, rT, i & 0xffff);
-   }
-}
-
-void spe_load_uint(struct spe_function *p, int rT, uint ui)
-{
-   /* If the whole value is in the lower 18 bits, use ila, which
-    * doesn't sign-extend.  Otherwise, if the two halfwords of
-    * the constant are identical, use ilh.  Otherwise, if every byte of
-    * the desired value is 0x00 or 0xff, we can use Form Select Mask for
-    * Bytes Immediate (fsmbi) to load the value in a single instruction.
-    * Otherwise, in the general case, we have to use ilhu followed by iohl.
-    */
-   if ((ui & 0x0003ffff) == ui) {
-      spe_ila(p, rT, ui);
-   }
-   else if ((ui >> 16) == (ui & 0xffff)) {
-      spe_ilh(p, rT, ui & 0xffff);
-   }
-   else if (
-      ((ui & 0x000000ff) == 0 || (ui & 0x000000ff) == 0x000000ff) &&
-      ((ui & 0x0000ff00) == 0 || (ui & 0x0000ff00) == 0x0000ff00) &&
-      ((ui & 0x00ff0000) == 0 || (ui & 0x00ff0000) == 0x00ff0000) &&
-      ((ui & 0xff000000) == 0 || (ui & 0xff000000) == 0xff000000)
-   ) {
-      uint mask = 0;
-      /* fsmbi duplicates each bit in the given mask eight times,
-       * using a 16-bit value to initialize a 16-byte quadword.
-       * Each 4-bit nybble of the mask corresponds to a full word
-       * of the result; look at the value and figure out the mask
-       * (replicated for each word in the quadword), and then
-       * form the "select mask" to get the value.
-       */
-      if ((ui & 0x000000ff) == 0x000000ff) mask |= 0x1111;
-      if ((ui & 0x0000ff00) == 0x0000ff00) mask |= 0x2222;
-      if ((ui & 0x00ff0000) == 0x00ff0000) mask |= 0x4444;
-      if ((ui & 0xff000000) == 0xff000000) mask |= 0x8888;
-      spe_fsmbi(p, rT, mask);
-   }
-   else {
-      /* The general case: this usually uses two instructions, but
-       * may use only one if the low-order 16 bits of each word are 0.
-       */
-      spe_ilhu(p, rT, ui >> 16);
-      if (ui & 0xffff)
-         spe_iohl(p, rT, ui & 0xffff);
-   }
-}
-
-/**
- * This function is constructed identically to spe_xor_uint() below.
- * Changes to one should be made in the other.
- */
-void
-spe_and_uint(struct spe_function *p, int rT, int rA, uint ui)
-{
-   /* If we can, emit a single instruction, either And Byte Immediate
-    * (which uses the same constant across each byte), And Halfword Immediate
-    * (which sign-extends a 10-bit immediate to 16 bits and uses that
-    * across each halfword), or And Word Immediate (which sign-extends
-    * a 10-bit immediate to 32 bits).
-    *
-    * Otherwise, we'll need to use a temporary register.
-    */
-   uint tmp;
-
-   /* If the upper 23 bits are all 0s or all 1s, sign extension
-    * will work and we can use And Word Immediate
-    */
-   tmp = ui & 0xfffffe00;
-   if (tmp == 0xfffffe00 || tmp  == 0) {
-      spe_andi(p, rT, rA, ui & 0x000003ff);
-      return;
-   }
-   
-   /* If the ui field is symmetric along halfword boundaries and
-    * the upper 7 bits of each halfword are all 0s or 1s, we
-    * can use And Halfword Immediate
-    */
-   tmp = ui & 0xfe00fe00;
-   if ((tmp == 0xfe00fe00 || tmp == 0) && ((ui >> 16) == (ui & 0x0000ffff))) {
-      spe_andhi(p, rT, rA, ui & 0x000003ff);
-      return;
-   }
-
-   /* If the ui field is symmetric in each byte, then we can use
-    * the And Byte Immediate instruction.
-    */
-   tmp = ui & 0x000000ff;
-   if ((ui >> 24) == tmp && ((ui >> 16) & 0xff) == tmp && ((ui >> 8) & 0xff) == tmp) {
-      spe_andbi(p, rT, rA, tmp);
-      return;
-   }
-
-   /* Otherwise, we'll have to use a temporary register. */
-   int tmp_reg = spe_allocate_available_register(p);
-   spe_load_uint(p, tmp_reg, ui);
-   spe_and(p, rT, rA, tmp_reg);
-   spe_release_register(p, tmp_reg);
-}
-
-
-/**
- * This function is constructed identically to spe_and_uint() above.
- * Changes to one should be made in the other.
- */
-void
-spe_xor_uint(struct spe_function *p, int rT, int rA, uint ui)
-{
-   /* If we can, emit a single instruction, either Exclusive Or Byte 
-    * Immediate (which uses the same constant across each byte), Exclusive 
-    * Or Halfword Immediate (which sign-extends a 10-bit immediate to 
-    * 16 bits and uses that across each halfword), or Exclusive Or Word 
-    * Immediate (which sign-extends a 10-bit immediate to 32 bits).
-    *
-    * Otherwise, we'll need to use a temporary register.
-    */
-   uint tmp;
-
-   /* If the upper 23 bits are all 0s or all 1s, sign extension
-    * will work and we can use Exclusive Or Word Immediate
-    */
-   tmp = ui & 0xfffffe00;
-   if (tmp == 0xfffffe00 || tmp  == 0) {
-      spe_xori(p, rT, rA, ui & 0x000003ff);
-      return;
-   }
-   
-   /* If the ui field is symmetric along halfword boundaries and
-    * the upper 7 bits of each halfword are all 0s or 1s, we
-    * can use Exclusive Or Halfword Immediate
-    */
-   tmp = ui & 0xfe00fe00;
-   if ((tmp == 0xfe00fe00 || tmp == 0) && ((ui >> 16) == (ui & 0x0000ffff))) {
-      spe_xorhi(p, rT, rA, ui & 0x000003ff);
-      return;
-   }
-
-   /* If the ui field is symmetric in each byte, then we can use
-    * the Exclusive Or Byte Immediate instruction.
-    */
-   tmp = ui & 0x000000ff;
-   if ((ui >> 24) == tmp && ((ui >> 16) & 0xff) == tmp && ((ui >> 8) & 0xff) == tmp) {
-      spe_xorbi(p, rT, rA, tmp);
-      return;
-   }
-
-   /* Otherwise, we'll have to use a temporary register. */
-   int tmp_reg = spe_allocate_available_register(p);
-   spe_load_uint(p, tmp_reg, ui);
-   spe_xor(p, rT, rA, tmp_reg);
-   spe_release_register(p, tmp_reg);
-}
-
-void
-spe_compare_equal_uint(struct spe_function *p, int rT, int rA, uint ui)
-{
-   /* If the comparison value is 9 bits or less, it fits inside a
-    * Compare Equal Word Immediate instruction.
-    */
-   if ((ui & 0x000001ff) == ui) {
-      spe_ceqi(p, rT, rA, ui);
-   }
-   /* Otherwise, we're going to have to load a word first. */
-   else {
-      int tmp_reg = spe_allocate_available_register(p);
-      spe_load_uint(p, tmp_reg, ui);
-      spe_ceq(p, rT, rA, tmp_reg);
-      spe_release_register(p, tmp_reg);
-   }
-}
-
-void
-spe_compare_greater_uint(struct spe_function *p, int rT, int rA, uint ui)
-{
-   /* If the comparison value is 10 bits or less, it fits inside a
-    * Compare Logical Greater Than Word Immediate instruction.
-    */
-   if ((ui & 0x000003ff) == ui) {
-      spe_clgti(p, rT, rA, ui);
-   }
-   /* Otherwise, we're going to have to load a word first. */
-   else {
-      int tmp_reg = spe_allocate_available_register(p);
-      spe_load_uint(p, tmp_reg, ui);
-      spe_clgt(p, rT, rA, tmp_reg);
-      spe_release_register(p, tmp_reg);
-   }
-}
-
-void
-spe_splat(struct spe_function *p, int rT, int rA)
-{
-   /* Use a temporary, just in case rT == rA */
-   int tmp_reg = spe_allocate_available_register(p);
-   /* Duplicate bytes 0, 1, 2, and 3 across the whole register */
-   spe_ila(p, tmp_reg, 0x00010203);
-   spe_shufb(p, rT, rA, rA, tmp_reg);
-   spe_release_register(p, tmp_reg);
-}
-
-
-void
-spe_complement(struct spe_function *p, int rT, int rA)
-{
-   spe_nor(p, rT, rA, rA);
-}
-
-
-void
-spe_move(struct spe_function *p, int rT, int rA)
-{
-   /* Use different instructions depending on the instruction address
-    * to take advantage of the dual pipelines.
-    */
-   if (p->num_inst & 1)
-      spe_shlqbyi(p, rT, rA, 0);  /* odd pipe */
-   else
-      spe_ori(p, rT, rA, 0);  /* even pipe */
-}
-
-
-void
-spe_zero(struct spe_function *p, int rT)
-{
-   spe_xor(p, rT, rT, rT);
-}
-
-
-void
-spe_splat_word(struct spe_function *p, int rT, int rA, int word)
-{
-   assert(word >= 0);
-   assert(word <= 3);
-
-   if (word == 0) {
-      int tmp1 = rT;
-      spe_ila(p, tmp1, 66051);
-      spe_shufb(p, rT, rA, rA, tmp1);
-   }
-   else {
-      /* XXX review this, we may not need the rotqbyi instruction */
-      int tmp1 = rT;
-      int tmp2 = spe_allocate_available_register(p);
-
-      spe_ila(p, tmp1, 66051);
-      spe_rotqbyi(p, tmp2, rA, 4 * word);
-      spe_shufb(p, rT, tmp2, tmp2, tmp1);
-
-      spe_release_register(p, tmp2);
-   }
-}
-
-/**
- * For each 32-bit float element of rA and rB, choose the smaller of the
- * two, compositing them into the rT register.
- * 
- * The Float Compare Greater Than (fcgt) instruction will put 1s into
- * compare_reg where rA > rB, and 0s where rA <= rB.
- *
- * Then the Select Bits (selb) instruction will take bits from rA where
- * compare_reg is 0, and from rB where compare_reg is 1; i.e., from rA
- * where rA <= rB and from rB where rB > rA, which is exactly the
- * "min" operation.
- *
- * The compare_reg could in many cases be the same as rT, unless
- * rT == rA || rt == rB.  But since this is common in constructions
- * like "x = min(x, a)", we always allocate a new register to be safe.
- */
-void 
-spe_float_min(struct spe_function *p, int rT, int rA, int rB)
-{
-   int compare_reg = spe_allocate_available_register(p);
-   spe_fcgt(p, compare_reg, rA, rB);
-   spe_selb(p, rT, rA, rB, compare_reg);
-   spe_release_register(p, compare_reg);
-}
-
-/**
- * For each 32-bit float element of rA and rB, choose the greater of the
- * two, compositing them into the rT register.
- * 
- * The logic is similar to that of spe_float_min() above; the only
- * difference is that the registers on spe_selb() have been reversed,
- * so that the larger of the two is selected instead of the smaller.
- */
-void 
-spe_float_max(struct spe_function *p, int rT, int rA, int rB)
-{
-   int compare_reg = spe_allocate_available_register(p);
-   spe_fcgt(p, compare_reg, rA, rB);
-   spe_selb(p, rT, rB, rA, compare_reg);
-   spe_release_register(p, compare_reg);
-}
-
-#endif /* GALLIUM_CELL */
diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h
deleted file mode 100644
index 65d9c774154..00000000000
--- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h
+++ /dev/null
@@ -1,433 +0,0 @@
-/*
- * (C) Copyright IBM Corporation 2008
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
- * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * \file
- * Real-time assembly generation interface for Cell B.E. SPEs.
- * For details, see /opt/cell/sdk/docs/arch/SPU_ISA_v1.2_27Jan2007_pub.pdf
- *
- * \author Ian Romanick <idr@us.ibm.com>
- * \author Brian Paul
- */
-
-#ifndef RTASM_PPC_SPE_H
-#define RTASM_PPC_SPE_H
-
-/** 4 bytes per instruction */
-#define SPE_INST_SIZE 4
-
-/** number of general-purpose SIMD registers */
-#define SPE_NUM_REGS  128
-
-/** Return Address register (aka $lr / Link Register) */
-#define SPE_REG_RA  0
-
-/** Stack Pointer register (aka $sp) */
-#define SPE_REG_SP  1
-
-
-struct spe_function
-{
-   uint32_t *store;  /**< instruction buffer */
-   uint num_inst;
-   uint max_inst;
-
-   /**
-    * The "set count" reflects the number of nested register sets
-    * are allowed.  In the unlikely case that we exceed the set count,
-    * register allocation will start to be confused, which is critical
-    * enough that we check for it.
-    */
-   unsigned char set_count;
-
-   /** 
-    * Flags for used and unused registers.  Each byte corresponds to a
-    * register; a 0 in that byte means that the register is available.
-    * A value of 1 means that the register was allocated in the current
-    * register set.  Any other value N means that the register was allocated
-    * N register sets ago.
-     *
-     * \sa
-     * spe_allocate_register, spe_allocate_available_register,
-     * spe_allocate_register_set, spe_release_register_set, spe_release_register, 
-     */
-    unsigned char regs[SPE_NUM_REGS];
-
-    boolean print; /**< print/dump instructions as they're emitted? */
-    int indent;    /**< number of spaces to indent */
-};
-
-
-extern void spe_init_func(struct spe_function *p, uint code_size);
-extern void spe_release_func(struct spe_function *p);
-extern uint spe_code_size(const struct spe_function *p);
-
-extern int spe_allocate_available_register(struct spe_function *p);
-extern int spe_allocate_register(struct spe_function *p, int reg);
-extern void spe_release_register(struct spe_function *p, int reg);
-extern void spe_allocate_register_set(struct spe_function *p);
-extern void spe_release_register_set(struct spe_function *p);
-
-extern uint spe_get_registers_used(const struct spe_function *p, ubyte used[]);
-
-extern void spe_print_code(struct spe_function *p, boolean enable);
-extern void spe_indent(struct spe_function *p, int spaces);
-extern void spe_comment(struct spe_function *p, int rel_indent, const char *s);
-
-
-#endif /* RTASM_PPC_SPE_H */
-
-#ifndef EMIT
-#define EMIT(_name, _op) \
-    extern void _name (struct spe_function *p);
-#define EMIT_(_name, _op) \
-    extern void _name (struct spe_function *p, int rT);
-#define EMIT_R(_name, _op) \
-    extern void _name (struct spe_function *p, int rT, int rA);
-#define EMIT_RR(_name, _op) \
-    extern void _name (struct spe_function *p, int rT, int rA, int rB);
-#define EMIT_RRR(_name, _op) \
-    extern void _name (struct spe_function *p, int rT, int rA, int rB, int rC);
-#define EMIT_RI7(_name, _op) \
-    extern void _name (struct spe_function *p, int rT, int rA, int imm);
-#define EMIT_RI8(_name, _op, bias) \
-    extern void _name (struct spe_function *p, int rT, int rA, int imm);
-#define EMIT_RI10(_name, _op) \
-    extern void _name (struct spe_function *p, int rT, int rA, int imm);
-#define EMIT_RI10s(_name, _op) \
-    extern void _name (struct spe_function *p, int rT, int rA, int imm);
-#define EMIT_RI16(_name, _op) \
-    extern void _name (struct spe_function *p, int rT, int imm);
-#define EMIT_RI18(_name, _op) \
-    extern void _name (struct spe_function *p, int rT, int imm);
-#define EMIT_I16(_name, _op) \
-    extern void _name (struct spe_function *p, int imm);
-#define UNDEF_EMIT_MACROS
-#endif /* EMIT */
-
-
-/* Memory load / store instructions
- */
-EMIT_RR  (spe_lqx,  0x1c4)
-EMIT_RI16(spe_lqa,  0x061)
-EMIT_RI16(spe_lqr,  0x067)
-EMIT_RR  (spe_stqx, 0x144)
-EMIT_RI16(spe_stqa, 0x041)
-EMIT_RI16(spe_stqr, 0x047)
-EMIT_RI7 (spe_cbd,  0x1f4)
-EMIT_RR  (spe_cbx,  0x1d4)
-EMIT_RI7 (spe_chd,  0x1f5)
-EMIT_RI7 (spe_chx,  0x1d5)
-EMIT_RI7 (spe_cwd,  0x1f6)
-EMIT_RI7 (spe_cwx,  0x1d6)
-EMIT_RI7 (spe_cdd,  0x1f7)
-EMIT_RI7 (spe_cdx,  0x1d7)
-
-
-/* Constant formation instructions
- */
-EMIT_RI16(spe_ilh,   0x083)
-EMIT_RI16(spe_ilhu,  0x082)
-EMIT_RI16(spe_il,    0x081)
-EMIT_RI18(spe_ila,   0x021)
-EMIT_RI16(spe_iohl,  0x0c1)
-EMIT_RI16(spe_fsmbi, 0x065)
-
-
-
-/* Integer and logical instructions
- */
-EMIT_RR  (spe_ah,      0x0c8)
-EMIT_RI10(spe_ahi,     0x01d)
-EMIT_RR  (spe_a,       0x0c0)
-EMIT_RI10s(spe_ai,      0x01c)
-EMIT_RR  (spe_sfh,     0x048)
-EMIT_RI10(spe_sfhi,    0x00d)
-EMIT_RR  (spe_sf,      0x040)
-EMIT_RI10(spe_sfi,     0x00c)
-EMIT_RR  (spe_addx,    0x340)
-EMIT_RR  (spe_cg,      0x0c2)
-EMIT_RR  (spe_cgx,     0x342)
-EMIT_RR  (spe_sfx,     0x341)
-EMIT_RR  (spe_bg,      0x042)
-EMIT_RR  (spe_bgx,     0x343)
-EMIT_RR  (spe_mpy,     0x3c4)
-EMIT_RR  (spe_mpyu,    0x3cc)
-EMIT_RI10(spe_mpyi,    0x074)
-EMIT_RI10(spe_mpyui,   0x075)
-EMIT_RRR (spe_mpya,    0x00c)
-EMIT_RR  (spe_mpyh,    0x3c5)
-EMIT_RR  (spe_mpys,    0x3c7)
-EMIT_RR  (spe_mpyhh,   0x3c6)
-EMIT_RR  (spe_mpyhha,  0x346)
-EMIT_RR  (spe_mpyhhu,  0x3ce)
-EMIT_RR  (spe_mpyhhau, 0x34e)
-EMIT_R   (spe_clz,     0x2a5)
-EMIT_R   (spe_cntb,    0x2b4)
-EMIT_R   (spe_fsmb,    0x1b6)
-EMIT_R   (spe_fsmh,    0x1b5)
-EMIT_R   (spe_fsm,     0x1b4)
-EMIT_R   (spe_gbb,     0x1b2)
-EMIT_R   (spe_gbh,     0x1b1)
-EMIT_R   (spe_gb,      0x1b0)
-EMIT_RR  (spe_avgb,    0x0d3)
-EMIT_RR  (spe_absdb,   0x053)
-EMIT_RR  (spe_sumb,    0x253)
-EMIT_R   (spe_xsbh,    0x2b6)
-EMIT_R   (spe_xshw,    0x2ae)
-EMIT_R   (spe_xswd,    0x2a6)
-EMIT_RR  (spe_and,     0x0c1)
-EMIT_RR  (spe_andc,    0x2c1)
-EMIT_RI10s(spe_andbi,   0x016)
-EMIT_RI10s(spe_andhi,   0x015)
-EMIT_RI10s(spe_andi,    0x014)
-EMIT_RR  (spe_or,      0x041)
-EMIT_RR  (spe_orc,     0x2c9)
-EMIT_RI10s(spe_orbi,    0x006)
-EMIT_RI10s(spe_orhi,    0x005)
-EMIT_RI10s(spe_ori,     0x004)
-EMIT_R   (spe_orx,     0x1f0)
-EMIT_RR  (spe_xor,     0x241)
-EMIT_RI10s(spe_xorbi,   0x046)
-EMIT_RI10s(spe_xorhi,   0x045)
-EMIT_RI10s(spe_xori,    0x044)
-EMIT_RR  (spe_nand,    0x0c9)
-EMIT_RR  (spe_nor,     0x049)
-EMIT_RR  (spe_eqv,     0x249)
-EMIT_RRR (spe_selb,    0x008)
-EMIT_RRR (spe_shufb,   0x00b)
-
-
-/* Shift and rotate instructions
- */
-EMIT_RR  (spe_shlh,      0x05f)
-EMIT_RI7 (spe_shlhi,     0x07f)
-EMIT_RR  (spe_shl,       0x05b)
-EMIT_RI7 (spe_shli,      0x07b)
-EMIT_RR  (spe_shlqbi,    0x1db)
-EMIT_RI7 (spe_shlqbii,   0x1fb)
-EMIT_RR  (spe_shlqby,    0x1df)
-EMIT_RI7 (spe_shlqbyi,   0x1ff)
-EMIT_RR  (spe_shlqbybi,  0x1cf)
-EMIT_RR  (spe_roth,      0x05c)
-EMIT_RI7 (spe_rothi,     0x07c)
-EMIT_RR  (spe_rot,       0x058)
-EMIT_RI7 (spe_roti,      0x078)
-EMIT_RR  (spe_rotqby,    0x1dc)
-EMIT_RI7 (spe_rotqbyi,   0x1fc)
-EMIT_RR  (spe_rotqbybi,  0x1cc)
-EMIT_RR  (spe_rotqbi,    0x1d8)
-EMIT_RI7 (spe_rotqbii,   0x1f8)
-EMIT_RR  (spe_rothm,     0x05d)
-EMIT_RI7 (spe_rothmi,    0x07d)
-EMIT_RR  (spe_rotm,      0x059)
-EMIT_RI7 (spe_rotmi,     0x079)
-EMIT_RR  (spe_rotqmby,   0x1dd)
-EMIT_RI7 (spe_rotqmbyi,  0x1fd)
-EMIT_RR  (spe_rotqmbybi, 0x1cd)
-EMIT_RR  (spe_rotqmbi,   0x1c9)
-EMIT_RI7 (spe_rotqmbii,  0x1f9)
-EMIT_RR  (spe_rotmah,    0x05e)
-EMIT_RI7 (spe_rotmahi,   0x07e)
-EMIT_RR  (spe_rotma,     0x05a)
-EMIT_RI7 (spe_rotmai,    0x07a)
-
-
-/* Compare, branch, and halt instructions
- */
-EMIT_RR  (spe_heq,       0x3d8)
-EMIT_RI10(spe_heqi,      0x07f)
-EMIT_RR  (spe_hgt,       0x258)
-EMIT_RI10(spe_hgti,      0x04f)
-EMIT_RR  (spe_hlgt,      0x2d8)
-EMIT_RI10(spe_hlgti,     0x05f)
-EMIT_RR  (spe_ceqb,      0x3d0)
-EMIT_RI10(spe_ceqbi,     0x07e)
-EMIT_RR  (spe_ceqh,      0x3c8)
-EMIT_RI10(spe_ceqhi,     0x07d)
-EMIT_RR  (spe_ceq,       0x3c0)
-EMIT_RI10(spe_ceqi,      0x07c)
-EMIT_RR  (spe_cgtb,      0x250)
-EMIT_RI10(spe_cgtbi,     0x04e)
-EMIT_RR  (spe_cgth,      0x248)
-EMIT_RI10(spe_cgthi,     0x04d)
-EMIT_RR  (spe_cgt,       0x240)
-EMIT_RI10(spe_cgti,      0x04c)
-EMIT_RR  (spe_clgtb,     0x2d0)
-EMIT_RI10(spe_clgtbi,    0x05e)
-EMIT_RR  (spe_clgth,     0x2c8)
-EMIT_RI10(spe_clgthi,    0x05d)
-EMIT_RR  (spe_clgt,      0x2c0)
-EMIT_RI10(spe_clgti,     0x05c)
-EMIT_I16 (spe_br,        0x064)
-EMIT_I16 (spe_bra,       0x060)
-EMIT_RI16(spe_brsl,      0x066)
-EMIT_RI16(spe_brasl,     0x062)
-EMIT_RI16(spe_brnz,      0x042)
-EMIT_RI16(spe_brz,       0x040)
-EMIT_RI16(spe_brhnz,     0x046)
-EMIT_RI16(spe_brhz,      0x044)
-
-/* Control instructions
- */
-EMIT     (spe_lnop,      0x001)
-
-extern void
-spe_lqd(struct spe_function *p, int rT, int rA, int offset);
-
-extern void
-spe_stqd(struct spe_function *p, int rT, int rA, int offset);
-
-extern void spe_bi(struct spe_function *p, int rA, int d, int e);
-extern void spe_iret(struct spe_function *p, int rA, int d, int e);
-extern void spe_bisled(struct spe_function *p, int rT, int rA,
-    int d, int e);
-extern void spe_bisl(struct spe_function *p, int rT, int rA,
-    int d, int e);
-extern void spe_biz(struct spe_function *p, int rT, int rA,
-    int d, int e);
-extern void spe_binz(struct spe_function *p, int rT, int rA,
-    int d, int e);
-extern void spe_bihz(struct spe_function *p, int rT, int rA,
-    int d, int e);
-extern void spe_bihnz(struct spe_function *p, int rT, int rA,
-    int d, int e);
-
-
-/** Load/splat immediate float into rT. */
-extern void
-spe_load_float(struct spe_function *p, int rT, float x);
-
-/** Load/splat immediate int into rT. */
-extern void
-spe_load_int(struct spe_function *p, int rT, int i);
-
-/** Load/splat immediate unsigned int into rT. */
-extern void
-spe_load_uint(struct spe_function *p, int rT, uint ui);
-
-/** And immediate value into rT. */
-extern void
-spe_and_uint(struct spe_function *p, int rT, int rA, uint ui);
-
-/** Xor immediate value into rT. */
-extern void
-spe_xor_uint(struct spe_function *p, int rT, int rA, uint ui);
-
-/** Compare equal with immediate value. */
-extern void
-spe_compare_equal_uint(struct spe_function *p, int rT, int rA, uint ui);
-
-/** Compare greater with immediate value. */
-extern void
-spe_compare_greater_uint(struct spe_function *p, int rT, int rA, uint ui);
-
-/** Replicate word 0 of rA across rT. */
-extern void
-spe_splat(struct spe_function *p, int rT, int rA);
-
-/** rT = complement_all_bits(rA). */
-extern void
-spe_complement(struct spe_function *p, int rT, int rA);
-
-/** rT = rA. */
-extern void
-spe_move(struct spe_function *p, int rT, int rA);
-
-/** rT = {0,0,0,0}. */
-extern void
-spe_zero(struct spe_function *p, int rT);
-
-/** rT = splat(rA, word) */
-extern void
-spe_splat_word(struct spe_function *p, int rT, int rA, int word);
-
-/** rT = float min(rA, rB) */
-extern void
-spe_float_min(struct spe_function *p, int rT, int rA, int rB);
-
-/** rT = float max(rA, rB) */
-extern void
-spe_float_max(struct spe_function *p, int rT, int rA, int rB);
-
-
-/* Floating-point instructions
- */
-EMIT_RR  (spe_fa,         0x2c4)
-EMIT_RR  (spe_dfa,        0x2cc)
-EMIT_RR  (spe_fs,         0x2c5)
-EMIT_RR  (spe_dfs,        0x2cd)
-EMIT_RR  (spe_fm,         0x2c6)
-EMIT_RR  (spe_dfm,        0x2ce)
-EMIT_RRR (spe_fma,        0x00e)
-EMIT_RR  (spe_dfma,       0x35c)
-EMIT_RRR (spe_fnms,       0x00d)
-EMIT_RR  (spe_dfnms,      0x35e)
-EMIT_RRR (spe_fms,        0x00f)
-EMIT_RR  (spe_dfms,       0x35d)
-EMIT_RR  (spe_dfnma,      0x35f)
-EMIT_R   (spe_frest,      0x1b8)
-EMIT_R   (spe_frsqest,    0x1b9)
-EMIT_RR  (spe_fi,         0x3d4)
-EMIT_RI8 (spe_csflt,      0x1da, 155)
-EMIT_RI8 (spe_cflts,      0x1d8, 173)
-EMIT_RI8 (spe_cuflt,      0x1db, 155)
-EMIT_RI8 (spe_cfltu,      0x1d9, 173)
-EMIT_R   (spe_frds,       0x3b9)
-EMIT_R   (spe_fesd,       0x3b8)
-EMIT_RR  (spe_dfceq,      0x3c3)
-EMIT_RR  (spe_dfcmeq,     0x3cb)
-EMIT_RR  (spe_dfcgt,      0x2c3)
-EMIT_RR  (spe_dfcmgt,     0x2cb)
-EMIT_RI7 (spe_dftsv,      0x3bf)
-EMIT_RR  (spe_fceq,       0x3c2)
-EMIT_RR  (spe_fcmeq,      0x3ca)
-EMIT_RR  (spe_fcgt,       0x2c2)
-EMIT_RR  (spe_fcmgt,      0x2ca)
-EMIT_R   (spe_fscrwr,     0x3ba)
-EMIT_    (spe_fscrrd,     0x398)
-
-
-/* Channel instructions
- */
-EMIT_R   (spe_rdch,       0x00d)
-EMIT_R   (spe_rdchcnt,    0x00f)
-EMIT_R   (spe_wrch,       0x10d)
-
-
-#ifdef UNDEF_EMIT_MACROS
-#undef EMIT
-#undef EMIT_
-#undef EMIT_R
-#undef EMIT_RR
-#undef EMIT_RRR
-#undef EMIT_RI7
-#undef EMIT_RI8
-#undef EMIT_RI10
-#undef EMIT_RI10s
-#undef EMIT_RI16
-#undef EMIT_RI18
-#undef EMIT_I16
-#undef UNDEF_EMIT_MACROS
-#endif /* EMIT_ */
diff --git a/src/gallium/auxiliary/target-helpers/inline_sw_helper.h b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h
index 34bfa527db0..596c691e9c1 100644
--- a/src/gallium/auxiliary/target-helpers/inline_sw_helper.h
+++ b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h
@@ -8,7 +8,7 @@
 
 
 /* Helper function to choose and instantiate one of the software rasterizers:
- * cell, llvmpipe, softpipe.
+ * llvmpipe, softpipe.
  */
 
 #ifdef GALLIUM_SOFTPIPE
@@ -19,21 +19,12 @@
 #include "llvmpipe/lp_public.h"
 #endif
 
-#ifdef GALLIUM_CELL
-#include "cell/ppu/cell_public.h"
-#endif
-
 
 static INLINE struct pipe_screen *
 sw_screen_create_named(struct sw_winsys *winsys, const char *driver)
 {
    struct pipe_screen *screen = NULL;
 
-#if defined(GALLIUM_CELL)
-   if (screen == NULL && strcmp(driver, "cell") == 0)
-      screen = cell_create_screen(winsys);
-#endif
-
 #if defined(GALLIUM_LLVMPIPE)
    if (screen == NULL && strcmp(driver, "llvmpipe") == 0)
       screen = llvmpipe_create_screen(winsys);
@@ -54,9 +45,7 @@ sw_screen_create(struct sw_winsys *winsys)
    const char *default_driver;
    const char *driver;
 
-#if defined(GALLIUM_CELL)
-   default_driver = "cell";
-#elif defined(GALLIUM_LLVMPIPE)
+#if defined(GALLIUM_LLVMPIPE)
    default_driver = "llvmpipe";
 #elif defined(GALLIUM_SOFTPIPE)
    default_driver = "softpipe";
diff --git a/src/gallium/drivers/cell/Makefile b/src/gallium/drivers/cell/Makefile
deleted file mode 100644
index 47aef7b05f6..00000000000
--- a/src/gallium/drivers/cell/Makefile
+++ /dev/null
@@ -1,12 +0,0 @@
-# Cell Gallium driver Makefile
-
-
-default:
-	( cd spu ; make )
-	( cd ppu ; make )
-
-
-
-clean:
-	( cd spu ; make clean )
-	( cd ppu ; make clean )
diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h
deleted file mode 100644
index a8cdde34aa7..00000000000
--- a/src/gallium/drivers/cell/common.h
+++ /dev/null
@@ -1,377 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-/**
- * Types and tokens which are common to the SPU and PPU code.
- */
-
-
-#ifndef CELL_COMMON_H
-#define CELL_COMMON_H
-
-#include "pipe/p_compiler.h"
-#include "pipe/p_format.h"
-#include "pipe/p_state.h"
-#include <stdio.h>
-
-/** The standard assert macro doesn't seem to work reliably */
-#define ASSERT(x) \
-   if (!(x)) { \
-      ubyte *p = NULL; \
-      fprintf(stderr, "%s:%d: %s(): assertion %s failed.\n", \
-              __FILE__, __LINE__, __FUNCTION__, #x);             \
-      *p = 0; \
-      exit(1); \
-   }
-
-
-#define JOIN(x, y) JOIN_AGAIN(x, y)
-#define JOIN_AGAIN(x, y) x ## y
-
-#define STATIC_ASSERT(e) \
-{typedef char JOIN(assertion_failed_at_line_, __LINE__) [(e) ? 1 : -1];}
-
-
-
-/** for sanity checking */
-#define ASSERT_ALIGN16(ptr) \
-  ASSERT((((unsigned long) (ptr)) & 0xf) == 0);
-
-
-/** round up value to next multiple of 4 */
-#define ROUNDUP4(k)  (((k) + 0x3) & ~0x3)
-
-/** round up value to next multiple of 8 */
-#define ROUNDUP8(k)  (((k) + 0x7) & ~0x7)
-
-/** round up value to next multiple of 16 */
-#define ROUNDUP16(k)  (((k) + 0xf) & ~0xf)
-
-
-#define CELL_MAX_SPUS 8
-
-#define CELL_MAX_SAMPLERS 4
-#define CELL_MAX_TEXTURE_LEVELS 12  /* 2k x 2k */
-#define CELL_MAX_CONSTANTS 32  /**< number of float[4] constants */
-#define CELL_MAX_WIDTH 1024    /**< max framebuffer width */
-#define CELL_MAX_HEIGHT 1024   /**< max framebuffer width */
-
-#define TILE_SIZE 32
-
-
-/**
- * The low byte of a mailbox word contains the command opcode.
- * Remaining higher bytes are command specific.
- */
-#define CELL_CMD_OPCODE_MASK 0xff
-
-#define CELL_CMD_EXIT                 1
-#define CELL_CMD_CLEAR_SURFACE        2
-#define CELL_CMD_FINISH               3
-#define CELL_CMD_RENDER               4
-#define CELL_CMD_BATCH                5
-#define CELL_CMD_RELEASE_VERTS        6
-#define CELL_CMD_STATE_FRAMEBUFFER   10
-#define CELL_CMD_STATE_FRAGMENT_OPS  11
-#define CELL_CMD_STATE_SAMPLER       12
-#define CELL_CMD_STATE_TEXTURE       13
-#define CELL_CMD_STATE_VERTEX_INFO   14
-#define CELL_CMD_STATE_VIEWPORT      15
-#define CELL_CMD_STATE_UNIFORMS      16
-#define CELL_CMD_STATE_VS_ARRAY_INFO 17
-#define CELL_CMD_STATE_BIND_VS       18
-#define CELL_CMD_STATE_FRAGMENT_PROGRAM 19
-#define CELL_CMD_STATE_ATTRIB_FETCH  20
-#define CELL_CMD_STATE_FS_CONSTANTS  21
-#define CELL_CMD_STATE_RASTERIZER    22
-#define CELL_CMD_VS_EXECUTE          23
-#define CELL_CMD_FLUSH_BUFFER_RANGE  24
-#define CELL_CMD_FENCE               25
-
-
-/** Command/batch buffers */
-#define CELL_NUM_BUFFERS 4
-#define CELL_BUFFER_SIZE (4*1024)  /**< 16KB would be the max */
-
-#define CELL_BUFFER_STATUS_FREE 10
-#define CELL_BUFFER_STATUS_USED 20
-
-/** Debug flags */
-#define CELL_DEBUG_CHECKER              (1 << 0)
-#define CELL_DEBUG_ASM                  (1 << 1)
-#define CELL_DEBUG_SYNC                 (1 << 2)
-#define CELL_DEBUG_FRAGMENT_OPS         (1 << 3)
-#define CELL_DEBUG_FRAGMENT_OP_FALLBACK (1 << 4)
-#define CELL_DEBUG_CMD                  (1 << 5)
-#define CELL_DEBUG_CACHE                (1 << 6)
-
-#define CELL_FENCE_IDLE      0
-#define CELL_FENCE_EMITTED   1
-#define CELL_FENCE_SIGNALLED 2
-
-#define CELL_FACING_FRONT    0
-#define CELL_FACING_BACK     1
-
-struct cell_fence
-{
-   /** There's a 16-byte status qword per SPU */
-   volatile uint status[CELL_MAX_SPUS][4];
-};
-
-#ifdef __SPU__
-typedef vector unsigned int opcode_t;
-#else
-typedef unsigned int opcode_t[4];
-#endif
-
-/**
- * Fence command sent to SPUs.  In response, the SPUs will write
- * CELL_FENCE_STATUS_SIGNALLED back to the fence status word in main memory.
- */
-struct cell_command_fence
-{
-   opcode_t opcode;      /**< CELL_CMD_FENCE */
-   struct cell_fence *fence;
-   uint32_t pad_[3];
-};
-
-
-/**
- * Command to specify per-fragment operations state and generated code.
- * Note that this is a variant-length structure, allocated with as 
- * much memory as needed to hold the generated code; the "code"
- * field *must* be the last field in the structure.  Also, the entire
- * length of the structure (including the variant code field) must be
- * a multiple of 8 bytes; we require that this structure itself be
- * a multiple of 8 bytes, and that the generated code also be a multiple
- * of 8 bytes.
- *
- * Also note that the dsa, blend, blend_color fields are really only needed
- * for the fallback/C per-pixel code.  They're not used when we generate
- * dynamic SPU fragment code (which is the normal case), and will eventually
- * be removed from this structure.
- */
-struct cell_command_fragment_ops
-{
-   opcode_t opcode;      /**< CELL_CMD_STATE_FRAGMENT_OPS */
-
-   /* Fields for the fallback case */
-   struct pipe_depth_stencil_alpha_state dsa;
-   struct pipe_blend_state blend;
-   struct pipe_blend_color blend_color;
-
-   /* Fields for the generated SPU code */
-   unsigned total_code_size;
-   unsigned front_code_index;
-   unsigned back_code_index;
-   /* this field has variant length, and must be the last field in 
-    * the structure
-    */
-   unsigned code[0];
-};
-
-
-/** Max instructions for fragment programs */
-#define SPU_MAX_FRAGMENT_PROGRAM_INSTS 512
-
-/**
- * Command to send a fragment program to SPUs.
- */
-struct cell_command_fragment_program
-{
-   opcode_t opcode;      /**< CELL_CMD_STATE_FRAGMENT_PROGRAM */
-   uint num_inst;        /**< Number of instructions */
-   uint32_t pad[3];
-   unsigned code[SPU_MAX_FRAGMENT_PROGRAM_INSTS];
-};
-
-
-/**
- * Tell SPUs about the framebuffer size, location
- */
-struct cell_command_framebuffer
-{
-   opcode_t opcode;     /**< CELL_CMD_STATE_FRAMEBUFFER */
-   int width, height;
-   void *color_start, *depth_start;
-   enum pipe_format color_format, depth_format;
-   uint32_t pad_[2];
-};
-
-
-/**
- * Tell SPUs about rasterizer state.
- */
-struct cell_command_rasterizer
-{
-   opcode_t opcode;    /**< CELL_CMD_STATE_RASTERIZER */
-   struct pipe_rasterizer_state rasterizer;
-   /*uint32_t pad[1];*/
-};
-
-
-/**
- * Clear framebuffer to the given value/color.
- */
-struct cell_command_clear_surface
-{
-   opcode_t opcode;     /**< CELL_CMD_CLEAR_SURFACE */
-   uint surface; /**< Temporary: 0=color, 1=Z */
-   uint value;
-   uint32_t pad[2];
-};
-
-
-/**
- * Array info used by the vertex shader's vertex puller.
- */
-struct cell_array_info
-{
-   uint64_t base;      /**< Base address of the 0th element. */
-   uint attr;          /**< Attribute that this state is for. */
-   uint pitch;         /**< Byte pitch from one entry to the next. */
-   uint size;
-   uint function_offset;
-};
-
-
-struct cell_attribute_fetch_code
-{
-   uint64_t base;
-   uint size;
-};
-
-
-struct cell_buffer_range
-{
-   uint64_t base;
-   unsigned size;
-};
-
-
-struct cell_shader_info
-{
-   uint64_t declarations;
-   uint64_t instructions;
-   uint64_t  immediates;
-
-   unsigned num_outputs;
-   unsigned num_declarations;
-   unsigned num_instructions;
-   unsigned num_immediates;
-};
-
-
-#define SPU_VERTS_PER_BATCH 64
-struct cell_command_vs
-{
-   opcode_t opcode;       /**< CELL_CMD_VS_EXECUTE */
-   uint64_t vOut[SPU_VERTS_PER_BATCH];
-   unsigned num_elts;
-   unsigned elts[SPU_VERTS_PER_BATCH];
-   float plane[12][4];
-   unsigned nr_planes;
-   unsigned nr_attrs;
-};
-
-
-struct cell_command_render
-{
-   opcode_t opcode;   /**< CELL_CMD_RENDER */
-   uint prim_type;    /**< PIPE_PRIM_x */
-   uint num_verts;
-   uint vertex_size;  /**< bytes per vertex */
-   uint num_indexes;
-   uint vertex_buf;  /**< which cell->buffer[] contains the vertex data */
-   float xmin, ymin, xmax, ymax;  /* XXX another dummy field */
-   uint min_index;
-   boolean inline_verts;
-   uint32_t pad_[1];
-};
-
-
-struct cell_command_release_verts
-{
-   opcode_t opcode;         /**< CELL_CMD_RELEASE_VERTS */
-   uint vertex_buf;    /**< in [0, CELL_NUM_BUFFERS-1] */
-   uint32_t pad_[3];
-};
-
-
-struct cell_command_sampler
-{
-   opcode_t opcode;         /**< CELL_CMD_STATE_SAMPLER */
-   uint unit;
-   struct pipe_sampler_state state;
-   uint32_t pad_[3];
-};
-
-
-struct cell_command_texture
-{
-   opcode_t opcode;     /**< CELL_CMD_STATE_TEXTURE */
-   uint target;         /**< PIPE_TEXTURE_x */
-   uint unit;
-   void *start[CELL_MAX_TEXTURE_LEVELS];   /**< Address in main memory */
-   ushort width[CELL_MAX_TEXTURE_LEVELS];
-   ushort height[CELL_MAX_TEXTURE_LEVELS];
-   ushort depth[CELL_MAX_TEXTURE_LEVELS];
-};
-
-
-#define MAX_SPU_FUNCTIONS 12
-/**
- * Used to tell the PPU about the address of particular functions in the
- * SPU's address space.
- */
-struct cell_spu_function_info
-{
-   uint num;
-   char names[MAX_SPU_FUNCTIONS][16];
-   uint addrs[MAX_SPU_FUNCTIONS];
-   char pad[12];   /**< Pad struct to multiple of 16 bytes (256 currently) */
-};
-
-
-/** This is the object passed to spe_create_thread() */
-PIPE_ALIGN_TYPE(16,
-struct cell_init_info
-{
-   unsigned id;
-   unsigned num_spus;
-   unsigned debug_flags;  /**< mask of CELL_DEBUG_x flags */
-   float inv_timebase;    /**< 1.0/timebase, for perf measurement */
-
-   /** Buffers for command batches, vertex/index data */
-   ubyte *buffers[CELL_NUM_BUFFERS];
-   uint *buffer_status;  /**< points at cell_context->buffer_status */
-
-   struct cell_spu_function_info *spu_functions;
-});
-
-
-#endif /* CELL_COMMON_H */
diff --git a/src/gallium/drivers/cell/ppu/Makefile b/src/gallium/drivers/cell/ppu/Makefile
deleted file mode 100644
index c92f8e5cba2..00000000000
--- a/src/gallium/drivers/cell/ppu/Makefile
+++ /dev/null
@@ -1,86 +0,0 @@
-# Gallium3D Cell driver: PPU code
-
-# This makefile builds the libcell.a library which gets pulled into
-# the main libGL.so library
-
-
-TOP = ../../../../..
-include $(TOP)/configs/current
-
-
-# This is the "top-level" cell PPU driver code, will get pulled into libGL.so
-# by the winsys Makefile.
-CELL_LIB = ../libcell.a
-
-
-# This is the SPU code.  We'd like to be able to put this into the libcell.a
-# archive with the PPU code, but nesting .a libs doesn't seem to work.
-# So, it's pulled into libGL.so in gallium/winsys/xlib/Makefile
-SPU_CODE_MODULE = ../spu/g3d_spu.a
-
-
-SOURCES = \
-	cell_batch.c \
-	cell_clear.c \
-	cell_context.c \
-	cell_draw_arrays.c \
-	cell_fence.c \
-	cell_flush.c \
-	cell_gen_fragment.c \
-	cell_gen_fp.c \
-	cell_state_derived.c \
-	cell_state_emit.c \
-	cell_state_shader.c \
-	cell_pipe_state.c \
-	cell_screen.c \
-	cell_state_vertex.c \
-	cell_spu.c \
-	cell_surface.c \
-	cell_texture.c \
-	cell_vbuf.c \
-	cell_vertex_fetch.c \
-	cell_vertex_shader.c
-
-
-OBJECTS = $(SOURCES:.c=.o) \
-
-INCLUDE_DIRS = \
-	-I$(TOP)/src/mesa \
-	-I$(TOP)/src/gallium/include \
-	-I$(TOP)/src/gallium/auxiliary \
-	-I$(TOP)/src/gallium/drivers
-
-.c.o:
-	$(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $< -o $@
-
-
-.c.s:
-	$(CC) -S $(INCLUDE_DIRS) $(CFLAGS) $< -o $@
-
-
-default: $(CELL_LIB)
-
-
-$(CELL_LIB): $(OBJECTS) $(SPU_CODE_MODULE)
-#	ar -ru $(CELL_LIB) $(OBJECTS) $(SPU_CODE_MODULE) # doesn't work
-	ar -ru $(CELL_LIB) $(OBJECTS)
-
-#$(PROG): $(PPU_OBJECTS)
-#	$(CC) -o $(PROG) $(PPU_OBJECTS) $(SPU_CODE_MODULE) $(PPU_LFLAGS)
-
-
-
-clean:
-	rm -f *.o *~ $(CELL_LIB)
-
-
-
-depend: $(SOURCES)
-	rm -f depend
-	touch depend
-	$(MKDEP) $(MKDEP_OPTIONS) $(INCLUDE_DIRS) $(SOURCES) 2> /dev/null
-
-include depend
-
-
-
diff --git a/src/gallium/drivers/cell/ppu/cell_batch.c b/src/gallium/drivers/cell/ppu/cell_batch.c
deleted file mode 100644
index fe144f8b849..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_batch.c
+++ /dev/null
@@ -1,260 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-
-#include "cell_context.h"
-#include "cell_batch.h"
-#include "cell_fence.h"
-#include "cell_spu.h"
-
-
-
-/**
- * Search the buffer pool for an empty/free buffer and return its index.
- * Buffers are used for storing vertex data, state and commands which
- * will be sent to the SPUs.
- * If no empty buffers are available, wait for one.
- * \return buffer index in [0, CELL_NUM_BUFFERS-1]
- */
-uint
-cell_get_empty_buffer(struct cell_context *cell)
-{
-   static uint prev_buffer = 0;
-   uint buf = (prev_buffer + 1) % CELL_NUM_BUFFERS;
-   uint tries = 0;
-
-   /* Find a buffer that's marked as free by all SPUs */
-   while (1) {
-      uint spu, num_free = 0;
-
-      for (spu = 0; spu < cell->num_spus; spu++) {
-         if (cell->buffer_status[spu][buf][0] == CELL_BUFFER_STATUS_FREE) {
-            num_free++;
-
-            if (num_free == cell->num_spus) {
-               /* found a free buffer, now mark status as used */
-               for (spu = 0; spu < cell->num_spus; spu++) {
-                  cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_USED;
-               }
-               /*
-               printf("PPU: ALLOC BUFFER %u, %u tries\n", buf, tries);
-               */
-               prev_buffer = buf;
-
-               /* release tex buffer associated w/ prev use of this batch buf */
-               cell_free_fenced_buffers(cell, &cell->fenced_buffers[buf]);
-
-               return buf;
-            }
-         }
-         else {
-            break;
-         }
-      }
-
-      /* try next buf */
-      buf = (buf + 1) % CELL_NUM_BUFFERS;
-
-      tries++;
-      if (tries == 100) {
-         /*
-         printf("PPU WAITING for buffer...\n");
-         */
-      }
-   }
-}
-
-
-/**
- * Append a fence command to the current batch buffer.
- * Note that we're sure there's always room for this because of the
- * adjusted size check in cell_batch_free_space().
- */
-static void
-emit_fence(struct cell_context *cell)
-{
-   const uint batch = cell->cur_batch;
-   const uint size = cell->buffer_size[batch];
-   struct cell_command_fence *fence_cmd;
-   struct cell_fence *fence = &cell->fenced_buffers[batch].fence;
-   uint i;
-
-   /* set fence status to emitted, not yet signalled */
-   for (i = 0; i < cell->num_spus; i++) {
-      fence->status[i][0] = CELL_FENCE_EMITTED;
-   }
-
-   STATIC_ASSERT(sizeof(struct cell_command_fence) % 16 == 0);
-   ASSERT(size % 16 == 0);
-   ASSERT(size + sizeof(struct cell_command_fence) <= CELL_BUFFER_SIZE);
-
-   fence_cmd = (struct cell_command_fence *) (cell->buffer[batch] + size);
-   fence_cmd->opcode[0] = CELL_CMD_FENCE;
-   fence_cmd->fence = fence;
-
-   /* update batch buffer size */
-   cell->buffer_size[batch] = size + sizeof(struct cell_command_fence);
-}
-
-
-/**
- * Flush the current batch buffer to the SPUs.
- * An empty buffer will be found and set as the new current batch buffer
- * for subsequent commands/data.
- */
-void
-cell_batch_flush(struct cell_context *cell)
-{
-   static boolean flushing = FALSE;
-   uint batch = cell->cur_batch;
-   uint size = cell->buffer_size[batch];
-   uint spu, cmd_word;
-
-   assert(!flushing);
-
-   if (size == 0)
-      return;
-
-   /* Before we use this batch buffer, make sure any fenced texture buffers
-    * are released.
-    */
-   if (cell->fenced_buffers[batch].head) {
-      emit_fence(cell);
-      size = cell->buffer_size[batch];
-   }
-
-   flushing = TRUE;
-
-   assert(batch < CELL_NUM_BUFFERS);
-
-   /*
-   printf("cell_batch_dispatch: buf %u at %p, size %u\n",
-          batch, &cell->buffer[batch][0], size);
-   */
-     
-   /*
-    * Build "BATCH" command and send to all SPUs.
-    */
-   cmd_word = CELL_CMD_BATCH | (batch << 8) | (size << 16);
-
-   for (spu = 0; spu < cell->num_spus; spu++) {
-      assert(cell->buffer_status[spu][batch][0] == CELL_BUFFER_STATUS_USED);
-      send_mbox_message(cell_global.spe_contexts[spu], cmd_word);
-   }
-
-   /* When the SPUs are done copying the buffer into their locals stores
-    * they'll write a BUFFER_STATUS_FREE message into the buffer_status[]
-    * array indicating that the PPU can re-use the buffer.
-    */
-
-   batch = cell_get_empty_buffer(cell);
-
-   cell->buffer_size[batch] = 0;  /* empty */
-   cell->cur_batch = batch;
-
-   flushing = FALSE;
-}
-
-
-/**
- * Return the number of bytes free in the current batch buffer.
- */
-uint
-cell_batch_free_space(const struct cell_context *cell)
-{
-   uint free = CELL_BUFFER_SIZE - cell->buffer_size[cell->cur_batch];
-   free -= sizeof(struct cell_command_fence);
-   return free;
-}
-
-
-/**
- * Allocate space in the current batch buffer for 'bytes' space.
- * Bytes must be a multiple of 16 bytes.  Allocation will be 16 byte aligned.
- * \return address in batch buffer to put data
- */
-void *
-cell_batch_alloc16(struct cell_context *cell, uint bytes)
-{
-   void *pos;
-   uint size;
-
-   ASSERT(bytes % 16 == 0);
-   ASSERT(bytes <= CELL_BUFFER_SIZE);
-   ASSERT(cell->cur_batch >= 0);
-
-#ifdef ASSERT
-   {
-      uint spu;
-      for (spu = 0; spu < cell->num_spus; spu++) {
-         ASSERT(cell->buffer_status[spu][cell->cur_batch][0]
-                 == CELL_BUFFER_STATUS_USED);
-      }
-   }
-#endif
-
-   size = cell->buffer_size[cell->cur_batch];
-
-   if (bytes > cell_batch_free_space(cell)) {
-      cell_batch_flush(cell);
-      size = 0;
-   }
-
-   ASSERT(size % 16 == 0);
-   ASSERT(size + bytes <= CELL_BUFFER_SIZE);
-
-   pos = (void *) (cell->buffer[cell->cur_batch] + size);
-
-   cell->buffer_size[cell->cur_batch] = size + bytes;
-
-   return pos;
-}
-
-
-/**
- * One-time init of batch buffers.
- */
-void
-cell_init_batch_buffers(struct cell_context *cell)
-{
-   uint spu, buf;
-
-   /* init command, vertex/index buffer info */
-   for (buf = 0; buf < CELL_NUM_BUFFERS; buf++) {
-      cell->buffer_size[buf] = 0;
-
-      /* init batch buffer status values,
-       * mark 0th buffer as used, rest as free.
-       */
-      for (spu = 0; spu < cell->num_spus; spu++) {
-         if (buf == 0)
-            cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_USED;
-         else
-            cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_FREE;
-      }
-   }
-}
diff --git a/src/gallium/drivers/cell/ppu/cell_batch.h b/src/gallium/drivers/cell/ppu/cell_batch.h
deleted file mode 100644
index 290136031a1..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_batch.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-
-#ifndef CELL_BATCH_H
-#define CELL_BATCH_H
-
-#include "pipe/p_compiler.h"
-
-
-struct cell_context;
-
-
-extern uint
-cell_get_empty_buffer(struct cell_context *cell);
-
-extern void
-cell_batch_flush(struct cell_context *cell);
-
-extern uint
-cell_batch_free_space(const struct cell_context *cell);
-
-extern void *
-cell_batch_alloc16(struct cell_context *cell, uint bytes);
-
-extern void
-cell_init_batch_buffers(struct cell_context *cell);
-
-
-#endif /* CELL_BATCH_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_clear.c b/src/gallium/drivers/cell/ppu/cell_clear.c
deleted file mode 100644
index 6a525ef4e41..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_clear.c
+++ /dev/null
@@ -1,93 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-/**
- * Authors
- *  Brian Paul
- */
-
-#include <stdio.h>
-#include <assert.h>
-#include <stdint.h>
-#include "util/u_inlines.h"
-#include "util/u_memory.h"
-#include "util/u_pack_color.h"
-#include "cell/common.h"
-#include "cell_clear.h"
-#include "cell_context.h"
-#include "cell_batch.h"
-#include "cell_flush.h"
-#include "cell_spu.h"
-#include "cell_state.h"
-
-
-/**
- * Called via pipe->clear()
- */
-void
-cell_clear(struct pipe_context *pipe, unsigned buffers,
-           const pipe_color_union *color,
-           double depth, unsigned stencil)
-{
-   struct cell_context *cell = cell_context(pipe);
-
-   if (cell->dirty)
-      cell_update_derived(cell);
-
-   if (buffers & PIPE_CLEAR_COLOR) {
-      uint surfIndex = 0;
-      union util_color uc;
-
-      util_pack_color(color->f, cell->framebuffer.cbufs[0]->format, &uc);
-
-      /* Build a CLEAR command and place it in the current batch buffer */
-      STATIC_ASSERT(sizeof(struct cell_command_clear_surface) % 16 == 0);
-      struct cell_command_clear_surface *clr
-         = (struct cell_command_clear_surface *)
-         cell_batch_alloc16(cell, sizeof(*clr));
-      clr->opcode[0] = CELL_CMD_CLEAR_SURFACE;
-      clr->surface = surfIndex;
-      clr->value = uc.ui;
-   }
-
-   if (buffers & PIPE_CLEAR_DEPTHSTENCIL) {
-      uint surfIndex = 1;
-      uint clearValue;
-
-      clearValue = util_pack_z_stencil(cell->framebuffer.zsbuf->format,
-                                       depth, stencil);
-
-      /* Build a CLEAR command and place it in the current batch buffer */
-      STATIC_ASSERT(sizeof(struct cell_command_clear_surface) % 16 == 0);
-      struct cell_command_clear_surface *clr
-         = (struct cell_command_clear_surface *)
-         cell_batch_alloc16(cell, sizeof(*clr));
-      clr->opcode[0] = CELL_CMD_CLEAR_SURFACE;
-      clr->surface = surfIndex;
-      clr->value = clearValue;
-   }
-}
diff --git a/src/gallium/drivers/cell/ppu/cell_clear.h b/src/gallium/drivers/cell/ppu/cell_clear.h
deleted file mode 100644
index a365feb0f00..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_clear.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-
-#ifndef CELL_CLEAR_H
-#define CELL_CLEAR_H
-
-
-struct pipe_context;
-
-
-extern void
-cell_clear(struct pipe_context *pipe, unsigned buffers,
-           const union pipe_color_union *color,
-           double depth, unsigned stencil);
-
-
-#endif /* CELL_CLEAR_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c
deleted file mode 100644
index 58e647a39fa..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_context.c
+++ /dev/null
@@ -1,190 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-/**
- * Authors
- *  Brian Paul
- */
-
-
-#include <stdio.h>
-
-#include "pipe/p_defines.h"
-#include "pipe/p_format.h"
-#include "util/u_memory.h"
-#include "pipe/p_screen.h"
-#include "util/u_inlines.h"
-
-#include "draw/draw_context.h"
-#include "draw/draw_private.h"
-
-#include "cell/common.h"
-#include "cell_batch.h"
-#include "cell_clear.h"
-#include "cell_context.h"
-#include "cell_draw_arrays.h"
-#include "cell_fence.h"
-#include "cell_flush.h"
-#include "cell_state.h"
-#include "cell_surface.h"
-#include "cell_spu.h"
-#include "cell_pipe_state.h"
-#include "cell_texture.h"
-#include "cell_vbuf.h"
-
-
-
-static void
-cell_destroy_context( struct pipe_context *pipe )
-{
-   struct cell_context *cell = cell_context(pipe);
-   unsigned i;
-
-   for (i = 0; i < cell->num_vertex_buffers; i++) {
-      pipe_resource_reference(&cell->vertex_buffer[i].buffer, NULL);
-   }
-
-   util_delete_keymap(cell->fragment_ops_cache, NULL);
-
-   cell_spu_exit(cell);
-
-   align_free(cell);
-}
-
-
-static struct draw_context *
-cell_draw_create(struct cell_context *cell)
-{
-   struct draw_context *draw = draw_create(&cell->pipe);
-
-#if 0 /* broken */
-   if (getenv("GALLIUM_CELL_VS")) {
-      /* plug in SPU-based vertex transformation code */
-      draw->shader_queue_flush = cell_vertex_shader_queue_flush;
-      draw->driver_private = cell;
-   }
-#endif
-
-   return draw;
-}
-
-
-static const struct debug_named_value cell_debug_flags[] = {
-   {"checker", CELL_DEBUG_CHECKER, NULL},/**< modulate tile clear color by SPU ID */
-   {"asm", CELL_DEBUG_ASM, NULL},        /**< dump SPU asm code */
-   {"sync", CELL_DEBUG_SYNC, NULL},      /**< SPUs do synchronous DMA */
-   {"fragops", CELL_DEBUG_FRAGMENT_OPS, NULL}, /**< SPUs emit fragment ops debug messages*/
-   {"fragopfallback", CELL_DEBUG_FRAGMENT_OP_FALLBACK, NULL}, /**< SPUs use reference implementation for fragment ops*/
-   {"cmd", CELL_DEBUG_CMD, NULL},       /**< SPUs dump command buffer info */
-   {"cache", CELL_DEBUG_CACHE, NULL},   /**< report texture cache stats on exit */
-   DEBUG_NAMED_VALUE_END
-};
-
-
-struct pipe_context *
-cell_create_context(struct pipe_screen *screen,
-                    void *priv )
-{
-   struct cell_context *cell;
-   uint i;
-
-   /* some fields need to be 16-byte aligned, so align the whole object */
-   cell = (struct cell_context*) align_malloc(sizeof(struct cell_context), 16);
-   if (!cell)
-      return NULL;
-
-   memset(cell, 0, sizeof(*cell));
-
-   cell->winsys = NULL;		/* XXX: fixme - get this from screen? */
-   cell->pipe.winsys = NULL;
-   cell->pipe.screen = screen;
-   cell->pipe.priv = priv;
-   cell->pipe.destroy = cell_destroy_context;
-
-   cell->pipe.clear = cell_clear;
-   cell->pipe.flush = cell_flush;
-
-#if 0
-   cell->pipe.begin_query = cell_begin_query;
-   cell->pipe.end_query = cell_end_query;
-   cell->pipe.wait_query = cell_wait_query;
-#endif
-
-   cell_init_draw_functions(cell);
-   cell_init_state_functions(cell);
-   cell_init_shader_functions(cell);
-   cell_init_surface_functions(cell);
-   cell_init_vertex_functions(cell);
-   cell_init_texture_transfer_funcs(cell);
-
-   cell->draw = cell_draw_create(cell);
-
-   /* Create cache of fragment ops generated code */
-   cell->fragment_ops_cache =
-      util_new_keymap(sizeof(struct cell_fragment_ops_key), ~0, NULL);
-
-   cell_init_vbuf(cell);
-
-   draw_set_rasterize_stage(cell->draw, cell->vbuf);
-
-   /* convert all points/lines to tris for the time being */
-   draw_wide_point_threshold(cell->draw, 0.0);
-   draw_wide_line_threshold(cell->draw, 0.0);
-
-   /* get env vars or read config file to get debug flags */
-   cell->debug_flags = debug_get_flags_option("CELL_DEBUG", 
-                                              cell_debug_flags, 
-                                              0 );
-
-   for (i = 0; i < CELL_NUM_BUFFERS; i++)
-      cell_fence_init(&cell->fenced_buffers[i].fence);
-
-
-   /*
-    * SPU stuff
-    */
-   /* This call only works with SDK 3.0.  Anyone still using 2.1??? */
-   cell->num_cells = spe_cpu_info_get(SPE_COUNT_PHYSICAL_CPU_NODES, -1);
-   cell->num_spus = spe_cpu_info_get(SPE_COUNT_USABLE_SPES, -1);
-   if (cell->debug_flags) {
-      printf("Cell: found %d Cell(s) with %u SPUs\n",
-             cell->num_cells, cell->num_spus);
-   }
-   if (getenv("CELL_NUM_SPUS")) {
-      cell->num_spus = atoi(getenv("CELL_NUM_SPUS"));
-      assert(cell->num_spus > 0);
-   }
-
-   cell_start_spus(cell);
-
-   cell_init_batch_buffers(cell);
-
-   /* make sure SPU initializations are done before proceeding */
-   cell_flush_int(cell, CELL_FLUSH_WAIT);
-
-   return &cell->pipe;
-}
diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h
deleted file mode 100644
index d1aee62ba1e..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_context.h
+++ /dev/null
@@ -1,210 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-
-#ifndef CELL_CONTEXT_H
-#define CELL_CONTEXT_H
-
-
-#include "pipe/p_context.h"
-#include "pipe/p_defines.h"
-#include "draw/draw_vertex.h"
-#include "draw/draw_vbuf.h"
-/*#include "cell_winsys.h"*/
-#include "cell/common.h"
-#include "rtasm/rtasm_ppc_spe.h"
-#include "tgsi/tgsi_scan.h"
-#include "util/u_keymap.h"
-
-
-struct cell_vbuf_render;
-
-
-/**
- * Cell vertex shader state, subclass of pipe_shader_state.
- */
-struct cell_vertex_shader_state
-{
-   struct pipe_shader_state shader;
-   struct tgsi_shader_info info;
-   void *draw_data;
-};
-
-
-/**
- * Cell fragment shader state, subclass of pipe_shader_state.
- */
-struct cell_fragment_shader_state
-{
-   struct pipe_shader_state shader;
-   struct tgsi_shader_info info;
-   struct spe_function code;
-   void *data;
-};
-
-
-/**
- * Key for mapping per-fragment state to cached SPU machine code.
- *  keymap(cell_fragment_ops_key) => cell_command_fragment_ops
- */
-struct cell_fragment_ops_key
-{
-   struct pipe_blend_state blend;
-   struct pipe_blend_color blend_color;
-   struct pipe_depth_stencil_alpha_state dsa;
-   enum pipe_format color_format;
-   enum pipe_format zs_format;
-};
-
-
-struct cell_buffer_node;
-
-/**
- * Fenced buffer list.  List of buffers which can be unreferenced after
- * the fence has been executed/signalled.
- */
-struct cell_buffer_list
-{
-   PIPE_ALIGN_VAR(16) struct cell_fence fence;
-   struct cell_buffer_node *head;
-};
-
-struct cell_velems_state
-{
-   unsigned count;
-   struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS];
-};
-
-/**
- * Per-context state, subclass of pipe_context.
- */
-struct cell_context
-{
-   struct pipe_context pipe;
-
-   struct cell_winsys *winsys;
-
-   const struct pipe_blend_state *blend;
-   const struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS];
-   uint num_samplers;
-   const struct pipe_depth_stencil_alpha_state *depth_stencil;
-   const struct pipe_rasterizer_state *rasterizer;
-   const struct cell_vertex_shader_state *vs;
-   const struct cell_fragment_shader_state *fs;
-   const struct cell_velems_state *velems;
-
-   struct spe_function logic_op;
-
-   struct pipe_blend_color blend_color;
-   struct pipe_stencil_ref stencil_ref;
-   struct pipe_clip_state clip;
-   struct pipe_resource *constants[2];
-   struct pipe_framebuffer_state framebuffer;
-   struct pipe_poly_stipple poly_stipple;
-   struct pipe_scissor_state scissor;
-   struct cell_resource *texture[PIPE_MAX_SAMPLERS];
-   struct pipe_sampler_view *fragment_sampler_views[PIPE_MAX_SAMPLERS];
-   uint num_textures;
-   struct pipe_viewport_state viewport;
-   struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
-   uint num_vertex_buffers;
-   struct pipe_index_buffer index_buffer;
-
-   ubyte *cbuf_map[PIPE_MAX_COLOR_BUFS];
-   ubyte *zsbuf_map;
-
-   uint dirty;
-   uint dirty_textures;  /* bitmask of texture units */
-   uint dirty_samplers;  /* bitmask of sampler units */
-
-   /** Cache of code generated for per-fragment ops */
-   struct keymap *fragment_ops_cache;
-
-   /** The primitive drawing context */
-   struct draw_context *draw;
-   struct draw_stage *render_stage;
-
-   /** For post-transformed vertex buffering: */
-   struct cell_vbuf_render *vbuf_render;
-   struct draw_stage *vbuf;
-
-   struct vertex_info vertex_info;
-
-   /** Mapped constant buffers */
-   const void *mapped_constants[PIPE_SHADER_TYPES];
-
-   PIPE_ALIGN_VAR(16) struct cell_spu_function_info spu_functions;
-
-   uint num_cells, num_spus;
-
-   /** Buffers for command batches, vertex/index data */
-   uint buffer_size[CELL_NUM_BUFFERS];
-   PIPE_ALIGN_VAR(16) ubyte buffer[CELL_NUM_BUFFERS][CELL_BUFFER_SIZE];
-
-   int cur_batch;  /**< which buffer is being filled w/ commands */
-
-   /** [4] to ensure 16-byte alignment for each status word */
-   PIPE_ALIGN_VAR(16) uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BUFFERS][4];
-
-
-   /** Associated with each command/batch buffer is a list of pipe_buffers
-    * that are fenced.  When the last command in a buffer is executed, the
-    * fence will be signalled, indicating that any pipe_buffers preceeding
-    * that fence can be unreferenced (and probably freed).
-    */
-   struct cell_buffer_list fenced_buffers[CELL_NUM_BUFFERS];
-
-
-   struct spe_function attrib_fetch;
-   unsigned attrib_fetch_offsets[PIPE_MAX_ATTRIBS];
-
-   unsigned debug_flags;
-};
-
-
-
-
-static INLINE struct cell_context *
-cell_context(struct pipe_context *pipe)
-{
-   return (struct cell_context *) pipe;
-}
-
-
-struct pipe_context *
-cell_create_context(struct pipe_screen *screen,
-                    void *priv );
-
-extern void
-cell_vertex_shader_queue_flush(struct draw_context *draw);
-
-
-/* XXX find a better home for this */
-extern void cell_update_vertex_fetch(struct draw_context *draw);
-
-
-#endif /* CELL_CONTEXT_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c
deleted file mode 100644
index a367fa3fe15..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c
+++ /dev/null
@@ -1,113 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-/* Author:
- *    Brian Paul
- *    Keith Whitwell
- */
-
-
-#include "pipe/p_defines.h"
-#include "pipe/p_context.h"
-#include "util/u_inlines.h"
-
-#include "cell_context.h"
-#include "cell_draw_arrays.h"
-#include "cell_state.h"
-#include "cell_flush.h"
-#include "cell_texture.h"
-
-#include "draw/draw_context.h"
-
-
-
-
-
-
-/**
- * Draw vertex arrays, with optional indexing.
- * Basically, map the vertex buffers (and drawing surfaces), then hand off
- * the drawing to the 'draw' module.
- *
- * XXX should the element buffer be specified/bound with a separate function?
- */
-static void
-cell_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
-{
-   struct cell_context *cell = cell_context(pipe);
-   struct draw_context *draw = cell->draw;
-   void *mapped_indices = NULL;
-   unsigned i;
-
-   if (cell->dirty)
-      cell_update_derived( cell );
-
-#if 0
-   cell_map_surfaces(cell);
-#endif
-
-   /*
-    * Map vertex buffers
-    */
-   for (i = 0; i < cell->num_vertex_buffers; i++) {
-      void *buf = cell_resource(cell->vertex_buffer[i].buffer)->data;
-      draw_set_mapped_vertex_buffer(draw, i, buf);
-   }
-   /* Map index buffer, if present */
-   if (info->indexed && cell->index_buffer.buffer)
-      mapped_indices = cell_resource(cell->index_buffer.buffer)->data;
-
-   draw_set_mapped_index_buffer(draw, mapped_indices);
-
-   /* draw! */
-   draw_vbo(draw, info);
-
-   /*
-    * unmap vertex/index buffers - will cause draw module to flush
-    */
-   for (i = 0; i < cell->num_vertex_buffers; i++) {
-      draw_set_mapped_vertex_buffer(draw, i, NULL);
-   }
-   if (mapped_indices) {
-      draw_set_mapped_index_buffer(draw, NULL);
-   }
-
-   /*
-    * TODO: Flush only when a user vertex/index buffer is present
-    * (or even better, modify draw module to do this
-    * internally when this condition is seen?)
-    */
-   draw_flush(draw);
-}
-
-
-void
-cell_init_draw_functions(struct cell_context *cell)
-{
-   cell->pipe.draw_vbo = cell_draw_vbo;
-}
-
diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.h b/src/gallium/drivers/cell/ppu/cell_draw_arrays.h
deleted file mode 100644
index 148873aa675..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#ifndef CELL_DRAW_ARRAYS_H
-#define CELL_DRAW_ARRAYS_H
-
-
-extern void
-cell_init_draw_functions(struct cell_context *cell);
-
-
-#endif /* CELL_DRAW_ARRAYS_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_fence.c b/src/gallium/drivers/cell/ppu/cell_fence.c
deleted file mode 100644
index 181fef44f45..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_fence.c
+++ /dev/null
@@ -1,172 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#include <unistd.h>
-#include "util/u_memory.h"
-#include "util/u_inlines.h"
-#include "cell_context.h"
-#include "cell_batch.h"
-#include "cell_fence.h"
-#include "cell_texture.h"
-
-
-void
-cell_fence_init(struct cell_fence *fence)
-{
-   uint i;
-   ASSERT_ALIGN16(fence->status);
-   for (i = 0; i < CELL_MAX_SPUS; i++) {
-      fence->status[i][0] = CELL_FENCE_IDLE;
-   }
-}
-
-
-boolean
-cell_fence_signalled(const struct cell_context *cell,
-                     const struct cell_fence *fence)
-{
-   uint i;
-   for (i = 0; i < cell->num_spus; i++) {
-      if (fence->status[i][0] != CELL_FENCE_SIGNALLED)
-         return FALSE;
-      /*assert(fence->status[i][0] == CELL_FENCE_EMITTED);*/
-   }
-   return TRUE;
-}
-
-
-boolean
-cell_fence_finish(const struct cell_context *cell,
-                  const struct cell_fence *fence,
-                  uint64_t timeout)
-{
-   while (!cell_fence_signalled(cell, fence)) {
-      usleep(10);
-   }
-
-#ifdef DEBUG
-   {
-      uint i;
-      for (i = 0; i < cell->num_spus; i++) {
-         assert(fence->status[i][0] == CELL_FENCE_SIGNALLED);
-      }
-   }
-#endif
-   return TRUE;
-}
-
-
-
-
-struct cell_buffer_node
-{
-   struct pipe_resource *buffer;
-   struct cell_buffer_node *next;
-};
-
-
-#if 0
-static void
-cell_add_buffer_to_list(struct cell_context *cell,
-                        struct cell_buffer_list *list,
-                        struct pipe_resource *buffer)
-{
-   struct cell_buffer_node *node = CALLOC_STRUCT(cell_buffer_node);
-   /* create new list node which references the buffer, insert at head */
-   if (node) {
-      pipe_resource_reference(&node->buffer, buffer);
-      node->next = list->head;
-      list->head = node;
-   }
-}
-#endif
-
-
-/**
- * Wait for completion of the given fence, then unreference any buffers
- * on the list.
- * This typically unrefs/frees texture buffers after any rendering which uses
- * them has completed.
- */
-void
-cell_free_fenced_buffers(struct cell_context *cell,
-                         struct cell_buffer_list *list)
-{
-   if (list->head) {
-      /*struct pipe_screen *ps = cell->pipe.screen;*/
-      struct cell_buffer_node *node;
-
-      cell_fence_finish(cell, &list->fence);
-
-      /* traverse the list, unreferencing buffers, freeing nodes */
-      node = list->head;
-      while (node) {
-         struct cell_buffer_node *next = node->next;
-         assert(node->buffer);
-         /* XXX need this? pipe_buffer_unmap(ps, node->buffer);*/
-#if 0
-         printf("Unref buffer %p\n", node->buffer);
-         if (node->buffer->reference.count == 1)
-            printf("   Delete!\n");
-#endif
-         pipe_resource_reference(&node->buffer, NULL);
-         FREE(node);
-         node = next;
-      }
-      list->head = NULL;
-   }
-}
-
-
-/**
- * This should be called for each render command.
- * Any texture buffers that are current bound will be added to a fenced
- * list to be freed later when the fence is executed/signalled.
- */
-void
-cell_add_fenced_textures(struct cell_context *cell)
-{
-   /*struct cell_buffer_list *list = &cell->fenced_buffers[cell->cur_batch];*/
-   uint i;
-
-   for (i = 0; i < cell->num_textures; i++) {
-      struct cell_resource *ct = cell->texture[i];
-      if (ct) {
-#if 0
-         printf("Adding texture %p buffer %p to list\n",
-                ct, ct->tiled_buffer[level]);
-#endif
-#if 00
-         /* XXX this needs to be fixed/restored!
-          * Maybe keep pointers to textures, not buffers.
-          */
-         if (ct->base.buffer)
-            cell_add_buffer_to_list(cell, list, ct->buffer);
-#endif
-      }
-   }
-}
diff --git a/src/gallium/drivers/cell/ppu/cell_fence.h b/src/gallium/drivers/cell/ppu/cell_fence.h
deleted file mode 100644
index 3568230b1c0..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_fence.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-
-#ifndef CELL_FENCE_H
-#define CELL_FENCE_H
-
-
-extern void
-cell_fence_init(struct cell_fence *fence);
-
-
-extern boolean
-cell_fence_signalled(const struct cell_context *cell,
-                     const struct cell_fence *fence,
-                     unsigned flags);
-
-
-extern boolean
-cell_fence_finish(const struct cell_context *cell,
-                  const struct cell_fence *fence,
-                  unsigned flags,
-                  uint64_t timeout);
-
-
-
-extern void
-cell_free_fenced_buffers(struct cell_context *cell,
-                         struct cell_buffer_list *list);
-
-
-extern void
-cell_add_fenced_textures(struct cell_context *cell);
-
-
-#endif /* CELL_FENCE_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_flush.c b/src/gallium/drivers/cell/ppu/cell_flush.c
deleted file mode 100644
index 463f4d03eb9..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_flush.c
+++ /dev/null
@@ -1,109 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-
-#include "cell_context.h"
-#include "cell_batch.h"
-#include "cell_flush.h"
-#include "cell_spu.h"
-#include "cell_render.h"
-#include "draw/draw_context.h"
-
-
-/**
- * Called via pipe->flush()
- */
-void
-cell_flush(struct pipe_context *pipe,
-           struct pipe_fence_handle **fence)
-{
-   struct cell_context *cell = cell_context(pipe);
-
-   if (fence) {
-      *fence = NULL;
-   }
-
-   flags |= CELL_FLUSH_WAIT;
-
-   draw_flush( cell->draw );
-   cell_flush_int(cell, flags);
-}
-
-
-/**
- * Cell internal flush function.  Send the current batch buffer to all SPUs.
- * If flags & CELL_FLUSH_WAIT, do not return until the SPUs are idle.
- * \param flags  bitmask of flags CELL_FLUSH_WAIT, or zero
- */
-void
-cell_flush_int(struct cell_context *cell, unsigned flags)
-{
-   static boolean flushing = FALSE;  /* recursion catcher */
-   uint i;
-
-   ASSERT(!flushing);
-   flushing = TRUE;
-
-   if (flags & CELL_FLUSH_WAIT) {
-      STATIC_ASSERT(sizeof(opcode_t) % 16 == 0);
-      opcode_t *cmd = (opcode_t*) cell_batch_alloc16(cell, sizeof(opcode_t));
-      *cmd[0] = CELL_CMD_FINISH;
-   }
-
-   cell_batch_flush(cell);
-
-#if 0
-   /* Send CMD_FINISH to all SPUs */
-   for (i = 0; i < cell->num_spus; i++) {
-      send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_FINISH);
-   }
-#endif
-
-   if (flags & CELL_FLUSH_WAIT) {
-      /* Wait for ack */
-      for (i = 0; i < cell->num_spus; i++) {
-         uint k = wait_mbox_message(cell_global.spe_contexts[i]);
-         assert(k == CELL_CMD_FINISH);
-      }
-   }
-
-   flushing = FALSE;
-}
-
-
-void
-cell_flush_buffer_range(struct cell_context *cell, void *ptr,
-			unsigned size)
-{
-   STATIC_ASSERT((sizeof(opcode_t) + sizeof(struct cell_buffer_range)) % 16 == 0);
-   uint32_t *batch = (uint32_t*)cell_batch_alloc16(cell, 
-      sizeof(opcode_t) + sizeof(struct cell_buffer_range));
-   struct cell_buffer_range *br = (struct cell_buffer_range *) &batch[4];
-   batch[0] = CELL_CMD_FLUSH_BUFFER_RANGE;
-   br->base = (uintptr_t) ptr;
-   br->size = size;
-}
diff --git a/src/gallium/drivers/cell/ppu/cell_flush.h b/src/gallium/drivers/cell/ppu/cell_flush.h
deleted file mode 100644
index 509ae6239ac..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_flush.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-
-#ifndef CELL_FLUSH
-#define CELL_FLUSH
-
-#define CELL_FLUSH_WAIT 0x80000000
-
-extern void
-cell_flush(struct pipe_context *pipe, unsigned flags,
-           struct pipe_fence_handle **fence);
-
-extern void
-cell_flush_int(struct cell_context *cell, unsigned flags);
-
-extern void
-cell_flush_buffer_range(struct cell_context *cell, void *ptr,
-			unsigned size);
-
-#endif
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c
deleted file mode 100644
index 1d8a11a4ac9..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c
+++ /dev/null
@@ -1,2036 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * Copyright 2009 VMware, Inc.  All rights reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-
-
-/**
- * Generate SPU fragment program/shader code.
- *
- * Note that we generate SOA-style code here.  So each TGSI instruction
- * operates on four pixels (and is translated into four SPU instructions,
- * generally speaking).
- *
- * \author Brian Paul
- */
-
-#include <math.h>
-#include "pipe/p_defines.h"
-#include "pipe/p_state.h"
-#include "pipe/p_shader_tokens.h"
-#include "tgsi/tgsi_parse.h"
-#include "tgsi/tgsi_util.h"
-#include "tgsi/tgsi_exec.h"
-#include "tgsi/tgsi_dump.h"
-#include "rtasm/rtasm_ppc_spe.h"
-#include "util/u_memory.h"
-#include "cell_context.h"
-#include "cell_gen_fp.h"
-
-
-#define MAX_TEMPS 16
-#define MAX_IMMED  8
-
-#define CHAN_X  0
-#define CHAN_Y  1
-#define CHAN_Z  2
-#define CHAN_W  3
-
-/**
- * Context needed during code generation.
- */
-struct codegen
-{
-   struct cell_context *cell;
-   int inputs_reg;      /**< 1st function parameter */
-   int outputs_reg;     /**< 2nd function parameter */
-   int constants_reg;   /**< 3rd function parameter */
-   int temp_regs[MAX_TEMPS][4]; /**< maps TGSI temps to SPE registers */
-   int imm_regs[MAX_IMMED][4];  /**< maps TGSI immediates to SPE registers */
-
-   int num_imm;  /**< number of immediates */
-
-   int one_reg;         /**< register containing {1.0, 1.0, 1.0, 1.0} */
-
-   int addr_reg;        /**< address register, integer values */
-
-   /** Per-instruction temps / intermediate temps */
-   int num_itemps;
-   int itemps[12];
-
-   /** Current IF/ELSE/ENDIF nesting level */
-   int if_nesting;
-   /** Current BGNLOOP/ENDLOOP nesting level */
-   int loop_nesting;
-   /** Location of start of current loop */
-   int loop_start;
-
-   /** Index of if/conditional mask register */
-   int cond_mask_reg;
-   /** Index of loop mask register */
-   int loop_mask_reg;
-
-   /** Index of master execution mask register */
-   int exec_mask_reg;
-
-   /** KIL mask: indicates which fragments have been killed */
-   int kill_mask_reg;
-
-   int frame_size;  /**< Stack frame size, in words */
-
-   struct spe_function *f;
-   boolean error;
-};
-
-
-/**
- * Allocate an intermediate temporary register.
- */
-static int
-get_itemp(struct codegen *gen)
-{
-   int t = spe_allocate_available_register(gen->f);
-   assert(gen->num_itemps < Elements(gen->itemps));
-   gen->itemps[gen->num_itemps++] = t;
-   return t;
-}
-
-/**
- * Free all intermediate temporary registers.  To be called after each
- * instruction has been emitted.
- */
-static void
-free_itemps(struct codegen *gen)
-{
-   int i;
-   for (i = 0; i < gen->num_itemps; i++) {
-      spe_release_register(gen->f, gen->itemps[i]);
-   }
-   gen->num_itemps = 0;
-}
-
-
-/**
- * Return index of an SPE register containing {1.0, 1.0, 1.0, 1.0}.
- * The register is allocated and initialized upon the first call.
- */
-static int
-get_const_one_reg(struct codegen *gen)
-{
-   if (gen->one_reg <= 0) {
-      gen->one_reg = spe_allocate_available_register(gen->f);
-
-      spe_indent(gen->f, 4);
-      spe_comment(gen->f, -4, "init constant reg = 1.0:");
-
-      /* one = {1.0, 1.0, 1.0, 1.0} */
-      spe_load_float(gen->f, gen->one_reg, 1.0f);
-
-      spe_indent(gen->f, -4);
-   }
-
-   return gen->one_reg;
-}
-
-
-/**
- * Return index of the address register.
- * Used for indirect register loads/stores.
- */
-static int
-get_address_reg(struct codegen *gen)
-{
-   if (gen->addr_reg <= 0) {
-      gen->addr_reg = spe_allocate_available_register(gen->f);
-
-      spe_indent(gen->f, 4);
-      spe_comment(gen->f, -4, "init address reg = 0:");
-
-      /* init addr = {0, 0, 0, 0} */
-      spe_zero(gen->f, gen->addr_reg);
-
-      spe_indent(gen->f, -4);
-   }
-
-   return gen->addr_reg;
-}
-
-
-/**
- * Return index of the master execution mask.
- * The register is allocated an initialized upon the first call.
- *
- * The master execution mask controls which pixels in a quad are
- * modified, according to surrounding conditionals, loops, etc.
- */
-static int
-get_exec_mask_reg(struct codegen *gen)
-{
-   if (gen->exec_mask_reg <= 0) {
-      gen->exec_mask_reg = spe_allocate_available_register(gen->f);
-
-      /* XXX this may not be needed */
-      spe_comment(gen->f, 0*-4, "initialize master execution mask = ~0");
-      spe_load_int(gen->f, gen->exec_mask_reg, ~0);
-   }
-
-   return gen->exec_mask_reg;
-}
-
-
-/** Return index of the conditional (if/else) execution mask register */
-static int
-get_cond_mask_reg(struct codegen *gen)
-{
-   if (gen->cond_mask_reg <= 0) {
-      gen->cond_mask_reg = spe_allocate_available_register(gen->f);
-   }
-
-   return gen->cond_mask_reg;
-}
-
-
-/** Return index of the loop execution mask register */
-static int
-get_loop_mask_reg(struct codegen *gen)
-{
-   if (gen->loop_mask_reg <= 0) {
-      gen->loop_mask_reg = spe_allocate_available_register(gen->f);
-   }
-
-   return gen->loop_mask_reg;
-}
-
-
-
-static boolean
-is_register_src(struct codegen *gen, int channel,
-                const struct tgsi_full_src_register *src)
-{
-   int swizzle = tgsi_util_get_full_src_register_swizzle(src, channel);
-   int sign_op = tgsi_util_get_full_src_register_sign_mode(src, channel);
-
-   if (swizzle > TGSI_SWIZZLE_W || sign_op != TGSI_UTIL_SIGN_KEEP) {
-      return FALSE;
-   }
-   if (src->Register.File == TGSI_FILE_TEMPORARY ||
-       src->Register.File == TGSI_FILE_IMMEDIATE) {
-      return TRUE;
-   }
-   return FALSE;
-}
-
-  
-static boolean
-is_memory_dst(struct codegen *gen, int channel,
-              const struct tgsi_full_dst_register *dst)
-{
-   if (dst->Register.File == TGSI_FILE_OUTPUT) {
-      return TRUE;
-   }
-   else {
-      return FALSE;
-   }
-}
-
-  
-/**
- * Return the index of the SPU temporary containing the named TGSI
- * source register.  If the TGSI register is a TGSI_FILE_TEMPORARY we
- * just return the corresponding SPE register.  If the TGIS register
- * is TGSI_FILE_INPUT/CONSTANT/IMMEDIATE we allocate a new SPE register
- * and emit an SPE load instruction.
- */
-static int
-get_src_reg(struct codegen *gen,
-            int channel,
-            const struct tgsi_full_src_register *src)
-{
-   int reg = -1;
-   int swizzle = tgsi_util_get_full_src_register_swizzle(src, channel);
-   boolean reg_is_itemp = FALSE;
-   uint sign_op;
-
-   assert(swizzle >= TGSI_SWIZZLE_X);
-   assert(swizzle <= TGSI_SWIZZLE_W);
-
-   {
-      int index = src->Register.Index;
-
-      assert(swizzle < 4);
-
-      if (src->Register.Indirect) {
-         /* XXX unfinished */
-      }
-
-      switch (src->Register.File) {
-      case TGSI_FILE_TEMPORARY:
-         reg = gen->temp_regs[index][swizzle];
-         break;
-      case TGSI_FILE_INPUT:
-         {
-            /* offset is measured in quadwords, not bytes */
-            int offset = index * 4 + swizzle;
-            reg = get_itemp(gen);
-            reg_is_itemp = TRUE;
-            /* Load:  reg = memory[(machine_reg) + offset] */
-            spe_lqd(gen->f, reg, gen->inputs_reg, offset * 16);
-         }
-         break;
-      case TGSI_FILE_IMMEDIATE:
-         reg = gen->imm_regs[index][swizzle];
-         break;
-      case TGSI_FILE_CONSTANT:
-         {
-            /* offset is measured in quadwords, not bytes */
-            int offset = index * 4 + swizzle;
-            reg = get_itemp(gen);
-            reg_is_itemp = TRUE;
-            /* Load:  reg = memory[(machine_reg) + offset] */
-            spe_lqd(gen->f, reg, gen->constants_reg, offset * 16);
-         }
-         break;
-      default:
-         assert(0);
-      }
-   }
-
-   /*
-    * Handle absolute value, negate or set-negative of src register.
-    */
-   sign_op = tgsi_util_get_full_src_register_sign_mode(src, channel);
-   if (sign_op != TGSI_UTIL_SIGN_KEEP) {
-      /*
-       * All sign ops are done by manipulating bit 31, the IEEE float sign bit.
-       */
-      const int bit31mask_reg = get_itemp(gen);
-      int result_reg;
-
-      if (reg_is_itemp) {
-         /* re-use 'reg' for the result */
-         result_reg = reg;
-      }
-      else {
-         /* alloc a new reg for the result */
-         result_reg = get_itemp(gen);
-      }
-
-      /* mask with bit 31 set, the rest cleared */
-      spe_load_uint(gen->f, bit31mask_reg, (1 << 31));
-
-      if (sign_op == TGSI_UTIL_SIGN_CLEAR) {
-         spe_andc(gen->f, result_reg, reg, bit31mask_reg);
-      }
-      else if (sign_op == TGSI_UTIL_SIGN_SET) {
-         spe_and(gen->f, result_reg, reg, bit31mask_reg);
-      }
-      else {
-         assert(sign_op == TGSI_UTIL_SIGN_TOGGLE);
-         spe_xor(gen->f, result_reg, reg, bit31mask_reg);
-      }
-
-      reg = result_reg;
-   }
-
-   return reg;
-}
-
-
-/**
- * Return the index of an SPE register to use for the given TGSI register.
- * If the TGSI register is TGSI_FILE_TEMPORARAY, the index of the
- * corresponding SPE register is returned.  If the TGSI register is
- * TGSI_FILE_OUTPUT we allocate an intermediate temporary register.
- * See store_dest_reg() below...
- */
-static int
-get_dst_reg(struct codegen *gen,
-            int channel,
-            const struct tgsi_full_dst_register *dest)
-{
-   int reg = -1;
-
-   switch (dest->Register.File) {
-   case TGSI_FILE_TEMPORARY:
-      if (gen->if_nesting > 0 || gen->loop_nesting > 0)
-         reg = get_itemp(gen);
-      else
-         reg = gen->temp_regs[dest->Register.Index][channel];
-      break;
-   case TGSI_FILE_OUTPUT:
-      reg = get_itemp(gen);
-      break;
-   default:
-      assert(0);
-   }
-
-   return reg;
-}
-
-
-/**
- * When a TGSI instruction is writing to an output register, this
- * function emits the SPE store instruction to store the value_reg.
- * \param value_reg  the SPE register containing the value to store.
- *                   This would have been returned by get_dst_reg().
- */
-static void
-store_dest_reg(struct codegen *gen,
-               int value_reg, int channel,
-               const struct tgsi_full_dst_register *dest)
-{
-   /*
-    * XXX need to implement dst reg clamping/saturation
-    */
-#if 0
-   switch (inst->Instruction.Saturate) {
-   case TGSI_SAT_NONE:
-      break;
-   case TGSI_SAT_ZERO_ONE:
-      break;
-   case TGSI_SAT_MINUS_PLUS_ONE:
-      break;
-   default:
-      assert( 0 );
-   }
-#endif
-
-   switch (dest->Register.File) {
-   case TGSI_FILE_TEMPORARY:
-      if (gen->if_nesting > 0 || gen->loop_nesting > 0) {
-         int d_reg = gen->temp_regs[dest->Register.Index][channel];
-         int exec_reg = get_exec_mask_reg(gen);
-         /* Mix d with new value according to exec mask:
-          * d[i] = mask_reg[i] ? value_reg : d_reg
-          */
-         spe_selb(gen->f, d_reg, d_reg, value_reg, exec_reg);
-      }
-      else {
-         /* we're not inside a condition or loop: do nothing special */
-
-      }
-      break;
-   case TGSI_FILE_OUTPUT:
-      {
-         /* offset is measured in quadwords, not bytes */
-         int offset = dest->Register.Index * 4 + channel;
-         if (gen->if_nesting > 0 || gen->loop_nesting > 0) {
-            int exec_reg = get_exec_mask_reg(gen);
-            int curval_reg = get_itemp(gen);
-            /* First read the current value from memory:
-             * Load:  curval = memory[(machine_reg) + offset]
-             */
-            spe_lqd(gen->f, curval_reg, gen->outputs_reg, offset * 16);
-            /* Mix curval with newvalue according to exec mask:
-             * d[i] = mask_reg[i] ? value_reg : d_reg
-             */
-            spe_selb(gen->f, curval_reg, curval_reg, value_reg, exec_reg);
-            /* Store: memory[(machine_reg) + offset] = curval */
-            spe_stqd(gen->f, curval_reg, gen->outputs_reg, offset * 16);
-         }
-         else {
-            /* Store: memory[(machine_reg) + offset] = reg */
-            spe_stqd(gen->f, value_reg, gen->outputs_reg, offset * 16);
-         }
-      }
-      break;
-   default:
-      assert(0);
-   }
-}
-
-
-
-static void
-emit_prologue(struct codegen *gen)
-{
-   gen->frame_size = 1024; /* XXX temporary, should be dynamic */
-
-   spe_comment(gen->f, 0, "Function prologue:");
-
-   /* save $lr on stack     # stqd $lr,16($sp) */
-   spe_stqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16);
-
-   if (gen->frame_size >= 512) {
-      /* offset is too large for ai instruction */
-      int offset_reg = spe_allocate_available_register(gen->f);
-      int sp_reg = spe_allocate_available_register(gen->f);
-      /* offset = -framesize */
-      spe_load_int(gen->f, offset_reg, -gen->frame_size);
-      /* sp = $sp */
-      spe_move(gen->f, sp_reg, SPE_REG_SP);
-      /* $sp = $sp + offset_reg */
-      spe_a(gen->f, SPE_REG_SP, SPE_REG_SP, offset_reg);
-      /* save $sp in stack frame */
-      spe_stqd(gen->f, sp_reg, SPE_REG_SP, 0);
-      /* clean up */
-      spe_release_register(gen->f, offset_reg);
-      spe_release_register(gen->f, sp_reg);
-   }
-   else {
-      /* save stack pointer    # stqd $sp,-frameSize($sp) */
-      spe_stqd(gen->f, SPE_REG_SP, SPE_REG_SP, -gen->frame_size);
-
-      /* adjust stack pointer  # ai $sp,$sp,-frameSize */
-      spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, -gen->frame_size);
-   }
-}
-
-
-static void
-emit_epilogue(struct codegen *gen)
-{
-   const int return_reg = 3;
-
-   spe_comment(gen->f, 0, "Function epilogue:");
-
-   spe_comment(gen->f, 0, "return the killed mask");
-   if (gen->kill_mask_reg > 0) {
-      /* shader called KIL, return the "alive" mask */
-      spe_move(gen->f, return_reg, gen->kill_mask_reg);
-   }
-   else {
-      /* return {0,0,0,0} */
-      spe_load_uint(gen->f, return_reg, 0);
-   }
-
-   spe_comment(gen->f, 0, "restore stack and return");
-   if (gen->frame_size >= 512) {
-      /* offset is too large for ai instruction */
-      int offset_reg = spe_allocate_available_register(gen->f);
-      /* offset = framesize */
-      spe_load_int(gen->f, offset_reg, gen->frame_size);
-      /* $sp = $sp + offset */
-      spe_a(gen->f, SPE_REG_SP, SPE_REG_SP, offset_reg);
-      /* clean up */
-      spe_release_register(gen->f, offset_reg);
-   }
-   else {
-      /* restore stack pointer    # ai $sp,$sp,frameSize */
-      spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, gen->frame_size);
-   }
-
-   /* restore $lr              # lqd $lr,16($sp) */
-   spe_lqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16);
-
-   /* return from function call */
-   spe_bi(gen->f, SPE_REG_RA, 0, 0);
-}
-
-
-#define FOR_EACH_ENABLED_CHANNEL(inst, ch) \
-   for (ch = 0; ch < 4; ch++) \
-      if (inst->Dst[0].Register.WriteMask & (1 << ch))
-
-
-static boolean
-emit_ARL(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
-   int ch = 0, src_reg, addr_reg;
-
-   src_reg = get_src_reg(gen, ch, &inst->Src[0]);
-   addr_reg = get_address_reg(gen);
-
-   /* convert float to int */
-   spe_cflts(gen->f, addr_reg, src_reg, 0);
-
-   free_itemps(gen);
-
-   return TRUE;
-}
-
-
-static boolean
-emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
-   int ch, src_reg[4], dst_reg[4];
-
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      src_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
-      dst_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
-   }
-
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      if (is_register_src(gen, ch, &inst->Src[0]) &&
-          is_memory_dst(gen, ch, &inst->Dst[0])) {
-         /* special-case: register to memory store */
-         store_dest_reg(gen, src_reg[ch], ch, &inst->Dst[0]);
-      }
-      else {
-         spe_move(gen->f, dst_reg[ch], src_reg[ch]);
-         store_dest_reg(gen, dst_reg[ch], ch, &inst->Dst[0]);
-      }
-   }
-
-   free_itemps(gen);
-
-   return TRUE;
-}
-
-/**
- * Emit binary operation
- */
-static boolean
-emit_binop(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
-   int ch, s1_reg[4], s2_reg[4], d_reg[4];
-
-   /* Loop over Red/Green/Blue/Alpha channels, fetch src operands */
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
-      s2_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]);
-      d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
-   }
-
-   /* Loop over Red/Green/Blue/Alpha channels, do the op, store results */
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      /* Emit actual SPE instruction: d = s1 + s2 */
-      switch (inst->Instruction.Opcode) {
-      case TGSI_OPCODE_ADD:
-         spe_fa(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
-         break;
-      case TGSI_OPCODE_SUB:
-         spe_fs(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
-         break;
-      case TGSI_OPCODE_MUL:
-         spe_fm(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
-         break;
-      default:
-         ;
-      }
-   }
-
-   /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
-   }
-
-   /* Free any intermediate temps we allocated */
-   free_itemps(gen);
-
-   return TRUE;
-}
-
-
-/**
- * Emit multiply add.  See emit_ADD for comments.
- */
-static boolean
-emit_MAD(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
-   int ch, s1_reg[4], s2_reg[4], s3_reg[4], d_reg[4];
-
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
-      s2_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]);
-      s3_reg[ch] = get_src_reg(gen, ch, &inst->Src[2]);
-      d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
-   }
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      spe_fma(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch], s3_reg[ch]);
-   }
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
-   }
-   free_itemps(gen);
-   return TRUE;
-}
-
-
-/**
- * Emit linear interpolate.  See emit_ADD for comments.
- */
-static boolean
-emit_LRP(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
-   int ch, s1_reg[4], s2_reg[4], s3_reg[4], d_reg[4], tmp_reg[4];
-
-   /* setup/get src/dst/temp regs */
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
-      s2_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]);
-      s3_reg[ch] = get_src_reg(gen, ch, &inst->Src[2]);
-      d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
-      tmp_reg[ch] = get_itemp(gen);
-   }
-
-   /* d = s3 + s1(s2 - s3) */
-   /* do all subtracts, then all fma, then all stores to better pipeline */
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      spe_fs(gen->f, tmp_reg[ch], s2_reg[ch], s3_reg[ch]);
-   }
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      spe_fma(gen->f, d_reg[ch], tmp_reg[ch], s1_reg[ch], s3_reg[ch]);
-   }
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
-   }
-   free_itemps(gen);
-   return TRUE;
-}
-
-
-
-/**
- * Emit reciprocal or recip sqrt.
- */
-static boolean
-emit_RCP_RSQ(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
-   int ch, s1_reg[4], d_reg[4], tmp_reg[4];
-
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
-      d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
-      tmp_reg[ch] = get_itemp(gen);
-   }
-
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      if (inst->Instruction.Opcode == TGSI_OPCODE_RCP) {
-         /* tmp = 1/s1 */
-         spe_frest(gen->f, tmp_reg[ch], s1_reg[ch]);
-      }
-      else {
-         /* tmp = 1/sqrt(s1) */
-         spe_frsqest(gen->f, tmp_reg[ch], s1_reg[ch]);
-      }
-   }
-
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      /* d = float_interp(s1, tmp) */
-      spe_fi(gen->f, d_reg[ch], s1_reg[ch], tmp_reg[ch]);
-   }
-
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
-   }
-
-   free_itemps(gen);
-   return TRUE;
-}
-
-
-/**
- * Emit absolute value.  See emit_ADD for comments.
- */
-static boolean
-emit_ABS(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
-   int ch, s1_reg[4], d_reg[4];
-   const int bit31mask_reg = get_itemp(gen);
-
-   /* mask with bit 31 set, the rest cleared */  
-   spe_load_uint(gen->f, bit31mask_reg, (1 << 31));
-
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
-      d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
-   }
-
-   /* d = sign bit cleared in s1 */
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      spe_andc(gen->f, d_reg[ch], s1_reg[ch], bit31mask_reg);
-   }
-
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
-   }
-
-   free_itemps(gen);
-   return TRUE;
-}
-
-/**
- * Emit 3 component dot product.  See emit_ADD for comments.
- */
-static boolean
-emit_DP3(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
-   int ch;
-   int s1x_reg, s1y_reg, s1z_reg;
-   int s2x_reg, s2y_reg, s2z_reg;
-   int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen);
-
-   s1x_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]);
-   s2x_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]);
-   s1y_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]);
-   s2y_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]);
-   s1z_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]);
-   s2z_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]);
-
-   /* t0 = x0 * x1 */
-   spe_fm(gen->f, t0_reg, s1x_reg, s2x_reg);
-
-   /* t1 = y0 * y1 */
-   spe_fm(gen->f, t1_reg, s1y_reg, s2y_reg);
-
-   /* t0 = z0 * z1 + t0 */
-   spe_fma(gen->f, t0_reg, s1z_reg, s2z_reg, t0_reg);
-
-   /* t0 = t0 + t1 */
-   spe_fa(gen->f, t0_reg, t0_reg, t1_reg);
-
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]);
-      spe_move(gen->f, d_reg, t0_reg);
-      store_dest_reg(gen, d_reg, ch, &inst->Dst[0]);
-   }
-
-   free_itemps(gen);
-   return TRUE;
-}
-
-/**
- * Emit 4 component dot product.  See emit_ADD for comments.
- */
-static boolean
-emit_DP4(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
-   int ch;
-   int s0x_reg, s0y_reg, s0z_reg, s0w_reg;
-   int s1x_reg, s1y_reg, s1z_reg, s1w_reg;
-   int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen);
-
-   s0x_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]);
-   s1x_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]);
-   s0y_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]);
-   s1y_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]);
-   s0z_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]);
-   s1z_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]);
-   s0w_reg = get_src_reg(gen, CHAN_W, &inst->Src[0]);
-   s1w_reg = get_src_reg(gen, CHAN_W, &inst->Src[1]);
-
-   /* t0 = x0 * x1 */
-   spe_fm(gen->f, t0_reg, s0x_reg, s1x_reg);
-
-   /* t1 = y0 * y1 */
-   spe_fm(gen->f, t1_reg, s0y_reg, s1y_reg);
-
-   /* t0 = z0 * z1 + t0 */
-   spe_fma(gen->f, t0_reg, s0z_reg, s1z_reg, t0_reg);
-
-   /* t1 = w0 * w1 + t1 */
-   spe_fma(gen->f, t1_reg, s0w_reg, s1w_reg, t1_reg);
-
-   /* t0 = t0 + t1 */
-   spe_fa(gen->f, t0_reg, t0_reg, t1_reg);
-
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]);
-      spe_move(gen->f, d_reg, t0_reg);
-      store_dest_reg(gen, d_reg, ch, &inst->Dst[0]);
-   }
-
-   free_itemps(gen);
-   return TRUE;
-}
-
-/**
- * Emit homogeneous dot product.  See emit_ADD for comments.
- */
-static boolean
-emit_DPH(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
-   /* XXX rewrite this function to look more like DP3/DP4 */
-   int ch;
-   int s1_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]);
-   int s2_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]);
-   int tmp_reg = get_itemp(gen);
-
-   /* t = x0 * x1 */
-   spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
-
-   s1_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]);
-   s2_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]);
-   /* t = y0 * y1 + t */
-   spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
-
-   s1_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]);
-   s2_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]);
-   /* t = z0 * z1 + t */
-   spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
-
-   s2_reg = get_src_reg(gen, CHAN_W, &inst->Src[1]);
-   /* t = w1 + t */
-   spe_fa(gen->f, tmp_reg, s2_reg, tmp_reg);
-
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]);
-      spe_move(gen->f, d_reg, tmp_reg);
-      store_dest_reg(gen, tmp_reg, ch, &inst->Dst[0]);
-   }
-
-   free_itemps(gen);
-   return TRUE;
-}
-
-/**
- * Emit 3-component vector normalize.
- */
-static boolean
-emit_NRM3(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
-   int ch;
-   int src_reg[3];
-   int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen);
-
-   src_reg[0] = get_src_reg(gen, CHAN_X, &inst->Src[0]);
-   src_reg[1] = get_src_reg(gen, CHAN_Y, &inst->Src[0]);
-   src_reg[2] = get_src_reg(gen, CHAN_Z, &inst->Src[0]);
-
-   /* t0 = x * x */
-   spe_fm(gen->f, t0_reg, src_reg[0], src_reg[0]);
-
-   /* t1 = y * y */
-   spe_fm(gen->f, t1_reg, src_reg[1], src_reg[1]);
-
-   /* t0 = z * z + t0 */
-   spe_fma(gen->f, t0_reg, src_reg[2], src_reg[2], t0_reg);
-
-   /* t0 = t0 + t1 */
-   spe_fa(gen->f, t0_reg, t0_reg, t1_reg);
-
-   /* t1 = 1.0 / sqrt(t0) */
-   spe_frsqest(gen->f, t1_reg, t0_reg);
-   spe_fi(gen->f, t1_reg, t0_reg, t1_reg);
-
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]);
-      /* dst = src[ch] * t1 */
-      spe_fm(gen->f, d_reg, src_reg[ch], t1_reg);
-      store_dest_reg(gen, d_reg, ch, &inst->Dst[0]);
-   }
-
-   free_itemps(gen);
-   return TRUE;
-}
-
-
-/**
- * Emit cross product.  See emit_ADD for comments.
- */
-static boolean
-emit_XPD(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
-   int s1_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]);
-   int s2_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]);
-   int tmp_reg = get_itemp(gen);
-
-   /* t = z0 * y1 */
-   spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
-
-   s1_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]);
-   s2_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]);
-   /* t = y0 * z1 - t */
-   spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
-
-   if (inst->Dst[0].Register.WriteMask & (1 << CHAN_X)) {
-      store_dest_reg(gen, tmp_reg, CHAN_X, &inst->Dst[0]);
-   }
-
-   s1_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]);
-   s2_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]);
-   /* t = x0 * z1 */
-   spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
-
-   s1_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]);
-   s2_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]);
-   /* t = z0 * x1 - t */
-   spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
-
-   if (inst->Dst[0].Register.WriteMask & (1 << CHAN_Y)) {
-      store_dest_reg(gen, tmp_reg, CHAN_Y, &inst->Dst[0]);
-   }
-
-   s1_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]);
-   s2_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]);
-   /* t = y0 * x1 */
-   spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
-
-   s1_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]);
-   s2_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]);
-   /* t = x0 * y1 - t */
-   spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
-
-   if (inst->Dst[0].Register.WriteMask & (1 << CHAN_Z)) {
-      store_dest_reg(gen, tmp_reg, CHAN_Z, &inst->Dst[0]);
-   }
-
-   free_itemps(gen);
-   return TRUE;
-}
-
-
-/**
- * Emit inequality instruction.
- * Note that the SPE fcgt instruction produces 0x0 and 0xffffffff as
- * the result but OpenGL/TGSI needs 0.0 and 1.0 results.
- * We can easily convert 0x0/0xffffffff to 0.0/1.0 with a bitwise AND.
- */
-static boolean
-emit_inequality(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
-   int ch, s1_reg[4], s2_reg[4], d_reg[4], one_reg;
-   boolean complement = FALSE;
-
-   one_reg = get_const_one_reg(gen);
-
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
-      s2_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]);
-      d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
-   }
-
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      switch (inst->Instruction.Opcode) {
-      case TGSI_OPCODE_SGT:
-         spe_fcgt(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
-         break;
-      case TGSI_OPCODE_SLT:
-         spe_fcgt(gen->f, d_reg[ch], s2_reg[ch], s1_reg[ch]);
-         break;
-      case TGSI_OPCODE_SGE:
-         spe_fcgt(gen->f, d_reg[ch], s2_reg[ch], s1_reg[ch]);
-         complement = TRUE;
-         break;
-      case TGSI_OPCODE_SLE:
-         spe_fcgt(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
-         complement = TRUE;
-         break;
-      case TGSI_OPCODE_SEQ:
-         spe_fceq(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
-         break;
-      case TGSI_OPCODE_SNE:
-         spe_fceq(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
-         complement = TRUE;
-         break;
-      default:
-         assert(0);
-      }
-   }
-
-   /* convert d from 0x0/0xffffffff to 0.0/1.0 */
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      /* d = d & one_reg */
-      if (complement)
-         spe_andc(gen->f, d_reg[ch], one_reg, d_reg[ch]);
-      else
-         spe_and(gen->f, d_reg[ch], one_reg, d_reg[ch]);
-   }
-
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
-   }
-
-   free_itemps(gen);
-   return TRUE;
-}
-
-
-/**
- * Emit compare.
- */
-static boolean
-emit_CMP(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
-   int ch;
-
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      int s1_reg = get_src_reg(gen, ch, &inst->Src[0]);
-      int s2_reg = get_src_reg(gen, ch, &inst->Src[1]);
-      int s3_reg = get_src_reg(gen, ch, &inst->Src[2]);
-      int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]);
-      int zero_reg = get_itemp(gen);
-   
-      spe_zero(gen->f, zero_reg);
-
-      /* d = (s1 < 0) ? s2 : s3 */
-      spe_fcgt(gen->f, d_reg, zero_reg, s1_reg);
-      spe_selb(gen->f, d_reg, s3_reg, s2_reg, d_reg);
-
-      store_dest_reg(gen, d_reg, ch, &inst->Dst[0]);
-      free_itemps(gen);
-   }
-
-   return TRUE;
-}
-
-/**
- * Emit trunc.  
- * Convert float to signed int
- * Convert signed int to float
- */
-static boolean
-emit_TRUNC(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
-   int ch, s1_reg[4], d_reg[4];
-
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
-      d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
-   }
-
-   /* Convert float to int */
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      spe_cflts(gen->f, d_reg[ch], s1_reg[ch], 0);
-   }
-
-   /* Convert int to float */
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      spe_csflt(gen->f, d_reg[ch], d_reg[ch], 0);
-   }
-
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
-   }
-
-   free_itemps(gen);
-   return TRUE;
-}
-
-
-/**
- * Emit floor.  
- * If negative int subtract one
- * Convert float to signed int
- * Convert signed int to float
- */
-static boolean
-emit_FLR(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
-   int ch, s1_reg[4], d_reg[4], tmp_reg[4], zero_reg, one_reg;
-
-   zero_reg = get_itemp(gen);
-   spe_zero(gen->f, zero_reg);
-   one_reg = get_const_one_reg(gen);
-   
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
-      d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
-      tmp_reg[ch] = get_itemp(gen);
-   }
-
-   /* If negative, subtract 1.0 */
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      spe_fcgt(gen->f, tmp_reg[ch], zero_reg, s1_reg[ch]);
-   }
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      spe_selb(gen->f, tmp_reg[ch], zero_reg, one_reg, tmp_reg[ch]);
-   }
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      spe_fs(gen->f, tmp_reg[ch], s1_reg[ch], tmp_reg[ch]);
-   }
-
-   /* Convert float to int */
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      spe_cflts(gen->f, tmp_reg[ch], tmp_reg[ch], 0);
-   }
-
-   /* Convert int to float */
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      spe_csflt(gen->f, d_reg[ch], tmp_reg[ch], 0);
-   }
-
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
-   }
-
-   free_itemps(gen);
-   return TRUE;
-}
-
-
-/**
- * Compute frac = Input - FLR(Input)
- */
-static boolean
-emit_FRC(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
-   int ch, s1_reg[4], d_reg[4], tmp_reg[4], zero_reg, one_reg;
-
-   zero_reg = get_itemp(gen);
-   spe_zero(gen->f, zero_reg);
-   one_reg = get_const_one_reg(gen);
-
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
-      d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
-      tmp_reg[ch] = get_itemp(gen);
-   }
-
-   /* If negative, subtract 1.0 */
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      spe_fcgt(gen->f, tmp_reg[ch], zero_reg, s1_reg[ch]);
-   }
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      spe_selb(gen->f, tmp_reg[ch], zero_reg, one_reg, tmp_reg[ch]);
-   }
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      spe_fs(gen->f, tmp_reg[ch], s1_reg[ch], tmp_reg[ch]);
-   }
-
-   /* Convert float to int */
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      spe_cflts(gen->f, tmp_reg[ch], tmp_reg[ch], 0);
-   }
-
-   /* Convert int to float */
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      spe_csflt(gen->f, tmp_reg[ch], tmp_reg[ch], 0);
-   }
-
-   /* d = s1 - FLR(s1) */
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      spe_fs(gen->f, d_reg[ch], s1_reg[ch], tmp_reg[ch]);
-   }
-
-   /* store result */
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
-   }
-
-   free_itemps(gen);
-   return TRUE;
-}
-
-
-#if 0
-static void
-print_functions(struct cell_context *cell)
-{
-   struct cell_spu_function_info *funcs = &cell->spu_functions;
-   uint i;
-   for (i = 0; i < funcs->num; i++) {
-      printf("SPU func %u: %s at %u\n",
-             i, funcs->names[i], funcs->addrs[i]);
-   }
-}
-#endif
-
-
-static uint
-lookup_function(struct cell_context *cell, const char *funcname)
-{
-   const struct cell_spu_function_info *funcs = &cell->spu_functions;
-   uint i, addr = 0;
-   for (i = 0; i < funcs->num; i++) {
-      if (strcmp(funcs->names[i], funcname) == 0) {
-         addr = funcs->addrs[i];
-      }
-   }
-   assert(addr && "spu function not found");
-   return addr / 4;  /* discard 2 least significant bits */
-}
-
-
-/**
- * Emit code to call a SPU function.
- * Used to implement instructions like SIN/COS/POW/TEX/etc.
- * If scalar, only the X components of the src regs are used, and the
- * result is replicated across the dest register's XYZW components.
- */
-static boolean
-emit_function_call(struct codegen *gen,
-                   const struct tgsi_full_instruction *inst,
-                   char *funcname, uint num_args, boolean scalar)
-{
-   const uint addr = lookup_function(gen->cell, funcname);
-   char comment[100];
-   int s_regs[3];
-   int func_called = FALSE;
-   uint a, ch;
-   int retval_reg = -1;
-
-   assert(num_args <= 3);
-
-   snprintf(comment, sizeof(comment), "CALL %s:", funcname);
-   spe_comment(gen->f, -4, comment);
-
-   if (scalar) {
-      for (a = 0; a < num_args; a++) {
-         s_regs[a] = get_src_reg(gen, CHAN_X, &inst->Src[a]);
-      }
-      /* we'll call the function, put the return value in this register,
-       * then replicate it across all write-enabled components in d_reg.
-       */
-      retval_reg = spe_allocate_available_register(gen->f);
-   }
-
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      int d_reg;
-      ubyte usedRegs[SPE_NUM_REGS];
-      uint i, numUsed;
-
-      if (!scalar) {
-         for (a = 0; a < num_args; a++) {
-            s_regs[a] = get_src_reg(gen, ch, &inst->Src[a]);
-         }
-      }
-
-      d_reg = get_dst_reg(gen, ch, &inst->Dst[0]);
-
-      if (!scalar || !func_called) {
-         /* for a scalar function, we'll really only call the function once */
-
-         numUsed = spe_get_registers_used(gen->f, usedRegs);
-         assert(numUsed < gen->frame_size / 16 - 2);
-
-         /* save registers to stack */
-         for (i = 0; i < numUsed; i++) {
-            uint reg = usedRegs[i];
-            int offset = 2 + i;
-            spe_stqd(gen->f, reg, SPE_REG_SP, 16 * offset);
-         }
-
-         /* setup function arguments */
-         for (a = 0; a < num_args; a++) {
-            spe_move(gen->f, 3 + a, s_regs[a]);
-         }
-
-         /* branch to function, save return addr */
-         spe_brasl(gen->f, SPE_REG_RA, addr);
-
-         /* save function's return value */
-         if (scalar)
-            spe_move(gen->f, retval_reg, 3);
-         else
-            spe_move(gen->f, d_reg, 3);
-
-         /* restore registers from stack */
-         for (i = 0; i < numUsed; i++) {
-            uint reg = usedRegs[i];
-            if (reg != d_reg && reg != retval_reg) {
-               int offset = 2 + i;
-               spe_lqd(gen->f, reg, SPE_REG_SP, 16 * offset);
-            }
-         }
-
-         func_called = TRUE;
-      }
-
-      if (scalar) {
-         spe_move(gen->f, d_reg, retval_reg);
-      }
-
-      store_dest_reg(gen, d_reg, ch, &inst->Dst[0]);
-      free_itemps(gen);
-   }
-
-   if (scalar) {
-      spe_release_register(gen->f, retval_reg);
-   }
-
-   return TRUE;
-}
-
-
-static boolean
-emit_TEX(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
-   const uint target = inst->Texture.Texture;
-   const uint unit = inst->Src[1].Register.Index;
-   uint addr;
-   int ch;
-   int coord_regs[4], d_regs[4];
-
-   switch (target) {
-   case TGSI_TEXTURE_1D:
-   case TGSI_TEXTURE_2D:
-      addr = lookup_function(gen->cell, "spu_tex_2d");
-      break;
-   case TGSI_TEXTURE_3D:
-      addr = lookup_function(gen->cell, "spu_tex_3d");
-      break;
-   case TGSI_TEXTURE_CUBE:
-      addr = lookup_function(gen->cell, "spu_tex_cube");
-      break;
-   default:
-      ASSERT(0 && "unsupported texture target");
-      return FALSE;
-   }
-
-   assert(inst->Src[1].Register.File == TGSI_FILE_SAMPLER);
-
-   spe_comment(gen->f, -4, "CALL tex:");
-
-   /* get src/dst reg info */
-   for (ch = 0; ch < 4; ch++) {
-      coord_regs[ch] = get_src_reg(gen, ch, &inst->Src[0]);
-      d_regs[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
-   }
-
-   {
-      ubyte usedRegs[SPE_NUM_REGS];
-      uint i, numUsed;
-
-      numUsed = spe_get_registers_used(gen->f, usedRegs);
-      assert(numUsed < gen->frame_size / 16 - 2);
-
-      /* save registers to stack */
-      for (i = 0; i < numUsed; i++) {
-         uint reg = usedRegs[i];
-         int offset = 2 + i;
-         spe_stqd(gen->f, reg, SPE_REG_SP, 16 * offset);
-      }
-
-      /* setup function arguments (XXX depends on target) */
-      for (i = 0; i < 4; i++) {
-         spe_move(gen->f, 3 + i, coord_regs[i]);
-      }
-      spe_load_uint(gen->f, 7, unit); /* sampler unit */
-
-      /* branch to function, save return addr */
-      spe_brasl(gen->f, SPE_REG_RA, addr);
-
-      /* save function's return values (four pixel's colors) */
-      for (i = 0; i < 4; i++) {
-         spe_move(gen->f, d_regs[i], 3 + i);
-      }
-
-      /* restore registers from stack */
-      for (i = 0; i < numUsed; i++) {
-         uint reg = usedRegs[i];
-         if (reg != d_regs[0] &&
-             reg != d_regs[1] &&
-             reg != d_regs[2] &&
-             reg != d_regs[3]) {
-            int offset = 2 + i;
-            spe_lqd(gen->f, reg, SPE_REG_SP, 16 * offset);
-         }
-      }
-   }
-
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      store_dest_reg(gen, d_regs[ch], ch, &inst->Dst[0]);
-      free_itemps(gen);
-   }
-
-   return TRUE;
-}
-
-
-/**
- * KILL if any of src reg values are less than zero.
- */
-static boolean
-emit_KIL(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
-   int ch;
-   int s_regs[4], kil_reg = -1, cmp_reg, zero_reg;
-
-   spe_comment(gen->f, -4, "CALL kil:");
-
-   /* zero = {0,0,0,0} */
-   zero_reg = get_itemp(gen);
-   spe_zero(gen->f, zero_reg);
-
-   cmp_reg = get_itemp(gen);
-
-   /* get src regs */
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      s_regs[ch] = get_src_reg(gen, ch, &inst->Src[0]);
-   }
-
-   /* test if any src regs are < 0 */
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      if (kil_reg >= 0) {
-         /* cmp = 0 > src ? : ~0 : 0 */
-         spe_fcgt(gen->f, cmp_reg, zero_reg, s_regs[ch]);
-         /* kil = kil | cmp */
-         spe_or(gen->f, kil_reg, kil_reg, cmp_reg);
-      }
-      else {
-         kil_reg = get_itemp(gen);
-         /* kil = 0 > src ? : ~0 : 0 */
-         spe_fcgt(gen->f, kil_reg, zero_reg, s_regs[ch]);
-      }
-   }
-
-   if (gen->if_nesting || gen->loop_nesting) {
-      /* may have been a conditional kil */
-      spe_and(gen->f, kil_reg, kil_reg, gen->exec_mask_reg);
-   }
-
-   /* allocate the kill mask reg if needed */
-   if (gen->kill_mask_reg <= 0) {
-      gen->kill_mask_reg = spe_allocate_available_register(gen->f);
-      spe_move(gen->f, gen->kill_mask_reg, kil_reg);
-   }
-   else {
-      spe_or(gen->f, gen->kill_mask_reg, gen->kill_mask_reg, kil_reg);
-   }
-
-   free_itemps(gen);
-
-   return TRUE;
-}
-
-
-
-/**
- * Emit min or max.
- */
-static boolean
-emit_MIN_MAX(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
-   int ch, s0_reg[4], s1_reg[4], d_reg[4], tmp_reg[4];
-
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      s0_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
-      s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]);
-      d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
-      tmp_reg[ch] = get_itemp(gen);         
-   }
-
-   /* d = (s0 > s1) ? s0 : s1 */
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      if (inst->Instruction.Opcode == TGSI_OPCODE_MAX)
-         spe_fcgt(gen->f, tmp_reg[ch], s0_reg[ch], s1_reg[ch]);
-      else
-         spe_fcgt(gen->f, tmp_reg[ch], s1_reg[ch], s0_reg[ch]);
-   }
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      spe_selb(gen->f, d_reg[ch], s1_reg[ch], s0_reg[ch], tmp_reg[ch]);
-   }
-
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
-   }
-
-   free_itemps(gen);
-   return TRUE;
-}
-
-
-/**
- * Emit code to update the execution mask.
- * This needs to be done whenever the execution status of a conditional
- * or loop is changed.
- */
-static void
-emit_update_exec_mask(struct codegen *gen)
-{
-   const int exec_reg = get_exec_mask_reg(gen);
-   const int cond_reg = gen->cond_mask_reg;
-   const int loop_reg = gen->loop_mask_reg;
-
-   spe_comment(gen->f, 0, "Update master execution mask");
-
-   if (gen->if_nesting > 0 && gen->loop_nesting > 0) {
-      /* exec_mask = cond_mask & loop_mask */
-      assert(cond_reg > 0);
-      assert(loop_reg > 0);
-      spe_and(gen->f, exec_reg, cond_reg, loop_reg);
-   }
-   else if (gen->if_nesting > 0) {
-      assert(cond_reg > 0);
-      spe_move(gen->f, exec_reg, cond_reg);
-   }
-   else if (gen->loop_nesting > 0) {
-      assert(loop_reg > 0);
-      spe_move(gen->f, exec_reg, loop_reg);
-   }
-   else {
-      spe_load_int(gen->f, exec_reg, ~0x0);
-   }
-}
-
-
-static boolean
-emit_IF(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
-   const int channel = 0;
-   int cond_reg;
-
-   cond_reg = get_cond_mask_reg(gen);
-
-   /* XXX push cond exec mask */
-
-   spe_comment(gen->f,  0, "init conditional exec mask = ~0:");
-   spe_load_int(gen->f, cond_reg, ~0);
-
-   /* update conditional execution mask with the predicate register */
-   int tmp_reg = get_itemp(gen);
-   int s1_reg = get_src_reg(gen, channel, &inst->Src[0]);
-
-   /* tmp = (s1_reg == 0) */
-   spe_ceqi(gen->f, tmp_reg, s1_reg, 0);
-   /* tmp = !tmp */
-   spe_complement(gen->f, tmp_reg, tmp_reg);
-   /* cond_mask = cond_mask & tmp */
-   spe_and(gen->f, cond_reg, cond_reg, tmp_reg);
-
-   gen->if_nesting++;
-
-   /* update the master execution mask */
-   emit_update_exec_mask(gen);
-
-   free_itemps(gen);
-
-   return TRUE;
-}
-
-
-static boolean
-emit_ELSE(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
-   const int cond_reg = get_cond_mask_reg(gen);
-
-   spe_comment(gen->f, 0, "cond exec mask = !cond exec mask");
-   spe_complement(gen->f, cond_reg, cond_reg);
-   emit_update_exec_mask(gen);
-
-   return TRUE;
-}
-
-
-static boolean
-emit_ENDIF(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
-   /* XXX todo: pop cond exec mask */
-
-   gen->if_nesting--;
-
-   emit_update_exec_mask(gen);
-
-   return TRUE;
-}
-
-
-static boolean
-emit_BGNLOOP(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
-   int exec_reg, loop_reg;
-
-   exec_reg = get_exec_mask_reg(gen);
-   loop_reg = get_loop_mask_reg(gen);
-
-   /* XXX push loop_exec mask */
-
-   spe_comment(gen->f,  0*-4, "initialize loop exec mask = ~0");
-   spe_load_int(gen->f, loop_reg, ~0x0);
-
-   gen->loop_nesting++;
-   gen->loop_start = spe_code_size(gen->f);  /* in bytes */
-
-   return TRUE;
-}
-
-
-static boolean
-emit_ENDLOOP(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
-   const int loop_reg = get_loop_mask_reg(gen);
-   const int tmp_reg = get_itemp(gen);
-   int offset;
-
-   /* tmp_reg = exec[0] | exec[1] | exec[2] | exec[3] */
-   spe_orx(gen->f, tmp_reg, loop_reg);
-
-   offset = gen->loop_start - spe_code_size(gen->f); /* in bytes */
-
-   /* branch back to top of loop if tmp_reg != 0 */
-   spe_brnz(gen->f, tmp_reg, offset / 4);
-
-   /* XXX pop loop_exec mask */
-
-   gen->loop_nesting--;
-
-   emit_update_exec_mask(gen);
-
-   return TRUE;
-}
-
-
-static boolean
-emit_BRK(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
-   const int exec_reg = get_exec_mask_reg(gen);
-   const int loop_reg = get_loop_mask_reg(gen);
-
-   assert(gen->loop_nesting > 0);
-
-   spe_comment(gen->f, 0, "loop exec mask &= ~master exec mask");
-   spe_andc(gen->f, loop_reg, loop_reg, exec_reg);
-
-   emit_update_exec_mask(gen);
-
-   return TRUE;
-}
-
-
-static boolean
-emit_CONT(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
-   assert(gen->loop_nesting > 0);
-
-   return TRUE;
-}
-
-
-static boolean
-emit_DDX_DDY(struct codegen *gen, const struct tgsi_full_instruction *inst,
-             boolean ddx)
-{
-   int ch;
-
-   FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      int s_reg = get_src_reg(gen, ch, &inst->Src[0]);
-      int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]);
-
-      int t1_reg = get_itemp(gen);
-      int t2_reg = get_itemp(gen);
-
-      spe_splat_word(gen->f, t1_reg, s_reg, 0); /* upper-left pixel */
-      if (ddx) {
-         spe_splat_word(gen->f, t2_reg, s_reg, 1); /* upper-right pixel */
-      }
-      else {
-         spe_splat_word(gen->f, t2_reg, s_reg, 2); /* lower-left pixel */
-      }
-      spe_fs(gen->f, d_reg, t2_reg, t1_reg);
-
-      free_itemps(gen);
-   }
-
-   return TRUE;
-}
-
-
-
-
-/**
- * Emit END instruction.
- * We just return from the shader function at this point.
- *
- * Note that there may be more code after this that would be
- * called by TGSI_OPCODE_CALL.
- */
-static boolean
-emit_END(struct codegen *gen)
-{
-   emit_epilogue(gen);
-   return TRUE;
-}
-
-
-/**
- * Emit code for the given instruction.  Just a big switch stmt.
- */
-static boolean
-emit_instruction(struct codegen *gen,
-                 const struct tgsi_full_instruction *inst)
-{
-   switch (inst->Instruction.Opcode) {
-   case TGSI_OPCODE_ARL:
-      return emit_ARL(gen, inst);
-   case TGSI_OPCODE_MOV:
-      return emit_MOV(gen, inst);
-   case TGSI_OPCODE_ADD:
-   case TGSI_OPCODE_SUB:
-   case TGSI_OPCODE_MUL:
-      return emit_binop(gen, inst);
-   case TGSI_OPCODE_MAD:
-      return emit_MAD(gen, inst);
-   case TGSI_OPCODE_LRP:
-      return emit_LRP(gen, inst);
-   case TGSI_OPCODE_DP3:
-      return emit_DP3(gen, inst);
-   case TGSI_OPCODE_DP4:
-      return emit_DP4(gen, inst);
-   case TGSI_OPCODE_DPH:
-      return emit_DPH(gen, inst);
-   case TGSI_OPCODE_NRM:
-      return emit_NRM3(gen, inst);
-   case TGSI_OPCODE_XPD:
-      return emit_XPD(gen, inst);
-   case TGSI_OPCODE_RCP:
-   case TGSI_OPCODE_RSQ:
-      return emit_RCP_RSQ(gen, inst);
-   case TGSI_OPCODE_ABS:
-      return emit_ABS(gen, inst);
-   case TGSI_OPCODE_SGT:
-   case TGSI_OPCODE_SLT:
-   case TGSI_OPCODE_SGE:
-   case TGSI_OPCODE_SLE:
-   case TGSI_OPCODE_SEQ:
-   case TGSI_OPCODE_SNE:
-      return emit_inequality(gen, inst);
-   case TGSI_OPCODE_CMP:
-      return emit_CMP(gen, inst);
-   case TGSI_OPCODE_MIN:
-   case TGSI_OPCODE_MAX:
-      return emit_MIN_MAX(gen, inst);
-   case TGSI_OPCODE_TRUNC:
-      return emit_TRUNC(gen, inst);
-   case TGSI_OPCODE_FLR:
-      return emit_FLR(gen, inst);
-   case TGSI_OPCODE_FRC:
-      return emit_FRC(gen, inst);
-   case TGSI_OPCODE_END:
-      return emit_END(gen);
-
-   case TGSI_OPCODE_COS:
-      return emit_function_call(gen, inst, "spu_cos", 1, TRUE);
-   case TGSI_OPCODE_SIN:
-      return emit_function_call(gen, inst, "spu_sin", 1, TRUE);
-   case TGSI_OPCODE_POW:
-      return emit_function_call(gen, inst, "spu_pow", 2, TRUE);
-   case TGSI_OPCODE_EX2:
-      return emit_function_call(gen, inst, "spu_exp2", 1, TRUE);
-   case TGSI_OPCODE_LG2:
-      return emit_function_call(gen, inst, "spu_log2", 1, TRUE);
-   case TGSI_OPCODE_TEX:
-      /* fall-through for now */
-   case TGSI_OPCODE_TXD:
-      /* fall-through for now */
-   case TGSI_OPCODE_TXB:
-      /* fall-through for now */
-   case TGSI_OPCODE_TXL:
-      /* fall-through for now */
-   case TGSI_OPCODE_TXP:
-      return emit_TEX(gen, inst);
-   case TGSI_OPCODE_KIL:
-      return emit_KIL(gen, inst);
-
-   case TGSI_OPCODE_IF:
-      return emit_IF(gen, inst);
-   case TGSI_OPCODE_ELSE:
-      return emit_ELSE(gen, inst);
-   case TGSI_OPCODE_ENDIF:
-      return emit_ENDIF(gen, inst);
-
-   case TGSI_OPCODE_BGNLOOP:
-      return emit_BGNLOOP(gen, inst);
-   case TGSI_OPCODE_ENDLOOP:
-      return emit_ENDLOOP(gen, inst);
-   case TGSI_OPCODE_BRK:
-      return emit_BRK(gen, inst);
-   case TGSI_OPCODE_CONT:
-      return emit_CONT(gen, inst);
-
-   case TGSI_OPCODE_DDX:
-      return emit_DDX_DDY(gen, inst, TRUE);
-   case TGSI_OPCODE_DDY:
-      return emit_DDX_DDY(gen, inst, FALSE);
-
-   /* XXX lots more cases to do... */
-
-   default:
-      fprintf(stderr, "Cell: unimplemented TGSI instruction %d!\n",
-              inst->Instruction.Opcode);
-      return FALSE;
-   }
-
-   return TRUE;
-}
-
-
-
-/**
- * Emit code for a TGSI immediate value (vector of four floats).
- * This involves register allocation and initialization.
- * XXX the initialization should be done by a "prepare" stage, not
- * per quad execution!
- */
-static boolean
-emit_immediate(struct codegen *gen, const struct tgsi_full_immediate *immed)
-{
-   int ch;
-
-   assert(gen->num_imm < MAX_TEMPS);
-
-   for (ch = 0; ch < 4; ch++) {
-      float val = immed->u[ch].Float;
-
-      if (ch > 0 && val == immed->u[ch - 1].Float) {
-         /* re-use previous register */
-         gen->imm_regs[gen->num_imm][ch] = gen->imm_regs[gen->num_imm][ch - 1];
-      }
-      else {
-         char str[100];
-         int reg = spe_allocate_available_register(gen->f);
-
-         if (reg < 0)
-            return FALSE;
-
-         sprintf(str, "init $%d = %f", reg, val);
-         spe_comment(gen->f, 0, str);
-
-         /* update immediate map */
-         gen->imm_regs[gen->num_imm][ch] = reg;
-
-         /* emit initializer instruction */
-         spe_load_float(gen->f, reg, val);
-      }
-   }
-
-   gen->num_imm++;
-
-   return TRUE;
-}
-
-
-
-/**
- * Emit "code" for a TGSI declaration.
- * We only care about TGSI TEMPORARY register declarations at this time.
- * For each TGSI TEMPORARY we allocate four SPE registers.
- */
-static boolean
-emit_declaration(struct cell_context *cell,
-                 struct codegen *gen, const struct tgsi_full_declaration *decl)
-{
-   int i, ch;
-
-   switch (decl->Declaration.File) {
-   case TGSI_FILE_TEMPORARY:
-      for (i = decl->Range.First;
-           i <= decl->Range.Last;
-           i++) {
-         assert(i < MAX_TEMPS);
-         for (ch = 0; ch < 4; ch++) {
-            gen->temp_regs[i][ch] = spe_allocate_available_register(gen->f);
-            if (gen->temp_regs[i][ch] < 0)
-               return FALSE; /* out of regs */
-         }
-
-         /* XXX if we run out of SPE registers, we need to spill
-          * to SPU memory.  someday...
-          */
-
-         {
-            char buf[100];
-            sprintf(buf, "TGSI temp[%d] maps to SPU regs [$%d $%d $%d $%d]", i,
-                    gen->temp_regs[i][0], gen->temp_regs[i][1],
-                    gen->temp_regs[i][2], gen->temp_regs[i][3]);
-            spe_comment(gen->f, 0, buf);
-         }
-      }
-      break;
-   default:
-      ; /* ignore */
-   }
-
-   return TRUE;
-}
-
-
-
-/**
- * Translate TGSI shader code to SPE instructions.  This is done when
- * the state tracker gives us a new shader (via pipe->create_fs_state()).
- *
- * \param cell    the rendering context (in)
- * \param tokens  the TGSI shader (in)
- * \param f       the generated function (out)
- */
-boolean
-cell_gen_fragment_program(struct cell_context *cell,
-                          const struct tgsi_token *tokens,
-                          struct spe_function *f)
-{
-   struct tgsi_parse_context parse;
-   struct codegen gen;
-   uint ic = 0;
-
-   memset(&gen, 0, sizeof(gen));
-   gen.cell = cell;
-   gen.f = f;
-
-   /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */
-   gen.inputs_reg = 3;     /* pointer to inputs array */
-   gen.outputs_reg = 4;    /* pointer to outputs array */
-   gen.constants_reg = 5;  /* pointer to constants array */
-
-   spe_init_func(f, SPU_MAX_FRAGMENT_PROGRAM_INSTS * SPE_INST_SIZE);
-   spe_allocate_register(f, gen.inputs_reg);
-   spe_allocate_register(f, gen.outputs_reg);
-   spe_allocate_register(f, gen.constants_reg);
-
-   if (cell->debug_flags & CELL_DEBUG_ASM) {
-      spe_print_code(f, TRUE);
-      spe_indent(f, 2*8);
-      printf("Begin %s\n", __FUNCTION__);
-      tgsi_dump(tokens, 0);
-   }
-
-   tgsi_parse_init(&parse, tokens);
-
-   emit_prologue(&gen);
-
-   while (!tgsi_parse_end_of_tokens(&parse) && !gen.error) {
-      tgsi_parse_token(&parse);
-
-      switch (parse.FullToken.Token.Type) {
-      case TGSI_TOKEN_TYPE_IMMEDIATE:
-         if (f->print) {
-            _debug_printf("    # ");
-            tgsi_dump_immediate(&parse.FullToken.FullImmediate);
-         }
-         if (!emit_immediate(&gen, &parse.FullToken.FullImmediate))
-            gen.error = TRUE;
-         break;
-
-      case TGSI_TOKEN_TYPE_DECLARATION:
-         if (f->print) {
-            _debug_printf("    # ");
-            tgsi_dump_declaration(&parse.FullToken.FullDeclaration);
-         }
-         if (!emit_declaration(cell, &gen, &parse.FullToken.FullDeclaration))
-            gen.error = TRUE;
-         break;
-
-      case TGSI_TOKEN_TYPE_INSTRUCTION:
-         if (f->print) {
-            _debug_printf("    # ");
-            ic++;
-            tgsi_dump_instruction(&parse.FullToken.FullInstruction, ic);
-         }
-         if (!emit_instruction(&gen, &parse.FullToken.FullInstruction))
-            gen.error = TRUE;
-         break;
-
-      default:
-         assert(0);
-      }
-   }
-
-   if (gen.error) {
-      /* terminate the SPE code */
-      return emit_END(&gen);
-   }
-
-   if (cell->debug_flags & CELL_DEBUG_ASM) {
-      printf("cell_gen_fragment_program nr instructions: %d\n", f->num_inst);
-      printf("End %s\n", __FUNCTION__);
-   }
-
-   tgsi_parse_free( &parse );
-
-   return !gen.error;
-}
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.h b/src/gallium/drivers/cell/ppu/cell_gen_fp.h
deleted file mode 100644
index 99faea70462..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_gen_fp.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-
-
-#ifndef CELL_GEN_FP_H
-#define CELL_GEN_FP_H
-
-
-
-extern boolean
-cell_gen_fragment_program(struct cell_context *cell,
-                          const struct tgsi_token *tokens,
-                          struct spe_function *f);
-
-
-#endif /* CELL_GEN_FP_H */
-
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c
deleted file mode 100644
index 76a85178007..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c
+++ /dev/null
@@ -1,2189 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * Copyright 2009 VMware, Inc.  All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-/**
- * Generate SPU per-fragment code (actually per-quad code).
- * \author Brian Paul
- * \author Bob Ellison
- */
-
-
-#include "pipe/p_defines.h"
-#include "pipe/p_state.h"
-#include "rtasm/rtasm_ppc_spe.h"
-#include "cell_context.h"
-#include "cell_gen_fragment.h"
-
-
-
-/** Do extra optimizations? */
-#define OPTIMIZATIONS 1
-
-
-/**
- * Generate SPE code to perform Z/depth testing.
- *
- * \param dsa         Gallium depth/stencil/alpha state to gen code for
- * \param f           SPE function to append instruction onto.
- * \param mask_reg    register containing quad/pixel "alive" mask (in/out)
- * \param ifragZ_reg  register containing integer fragment Z values (in)
- * \param ifbZ_reg    register containing integer frame buffer Z values (in/out)
- * \param zmask_reg   register containing result of Z test/comparison (out)
- *
- * Returns TRUE if the Z-buffer needs to be updated.
- */
-static boolean
-gen_depth_test(struct spe_function *f,
-               const struct pipe_depth_stencil_alpha_state *dsa,
-               int mask_reg, int ifragZ_reg, int ifbZ_reg, int zmask_reg)
-{
-   /* NOTE: we use clgt below, not cgt, because we want to compare _unsigned_
-    * quantities.  This only makes a difference for 32-bit Z values though.
-    */
-   ASSERT(dsa->depth.enabled);
-
-   switch (dsa->depth.func) {
-   case PIPE_FUNC_EQUAL:
-      /* zmask = (ifragZ == ref) */
-      spe_ceq(f, zmask_reg, ifragZ_reg, ifbZ_reg);
-      /* mask = (mask & zmask) */
-      spe_and(f, mask_reg, mask_reg, zmask_reg);
-      break;
-
-   case PIPE_FUNC_NOTEQUAL:
-      /* zmask = (ifragZ == ref) */
-      spe_ceq(f, zmask_reg, ifragZ_reg, ifbZ_reg);
-      /* mask = (mask & ~zmask) */
-      spe_andc(f, mask_reg, mask_reg, zmask_reg);
-      break;
-
-   case PIPE_FUNC_GREATER:
-      /* zmask = (ifragZ > ref) */
-      spe_clgt(f, zmask_reg, ifragZ_reg, ifbZ_reg);
-      /* mask = (mask & zmask) */
-      spe_and(f, mask_reg, mask_reg, zmask_reg);
-      break;
-
-   case PIPE_FUNC_LESS:
-      /* zmask = (ref > ifragZ) */
-      spe_clgt(f, zmask_reg, ifbZ_reg, ifragZ_reg);
-      /* mask = (mask & zmask) */
-      spe_and(f, mask_reg, mask_reg, zmask_reg);
-      break;
-
-   case PIPE_FUNC_LEQUAL:
-      /* zmask = (ifragZ > ref) */
-      spe_clgt(f, zmask_reg, ifragZ_reg, ifbZ_reg);
-      /* mask = (mask & ~zmask) */
-      spe_andc(f, mask_reg, mask_reg, zmask_reg);
-      break;
-
-   case PIPE_FUNC_GEQUAL:
-      /* zmask = (ref > ifragZ) */
-      spe_clgt(f, zmask_reg, ifbZ_reg, ifragZ_reg);
-      /* mask = (mask & ~zmask) */
-      spe_andc(f, mask_reg, mask_reg, zmask_reg);
-      break;
-
-   case PIPE_FUNC_NEVER:
-      spe_il(f, mask_reg, 0);  /* mask = {0,0,0,0} */
-      spe_move(f, zmask_reg, mask_reg);  /* zmask = mask */
-      break;
-
-   case PIPE_FUNC_ALWAYS:
-      /* mask unchanged */
-      spe_il(f, zmask_reg, ~0);  /* zmask = {~0,~0,~0,~0} */
-      break;
-
-   default:
-      ASSERT(0);
-      break;
-   }
-
-   if (dsa->depth.writemask) {
-      /*
-       * If (ztest passed) {
-       *    framebufferZ = fragmentZ;
-       * }
-       * OR,
-       * framebufferZ = (ztest_passed ? fragmentZ : framebufferZ;
-       */
-      spe_selb(f, ifbZ_reg, ifbZ_reg, ifragZ_reg, mask_reg);
-      return TRUE;
-   }
-
-   return FALSE;
-}
-
-
-/**
- * Generate SPE code to perform alpha testing.
- *
- * \param dsa        Gallium depth/stencil/alpha state to gen code for
- * \param f          SPE function to append instruction onto.
- * \param mask_reg   register containing quad/pixel "alive" mask (in/out)
- * \param fragA_reg  register containing four fragment alpha values (in)
- */
-static void
-gen_alpha_test(const struct pipe_depth_stencil_alpha_state *dsa,
-               struct spe_function *f, int mask_reg, int fragA_reg)
-{
-   int ref_reg = spe_allocate_available_register(f);
-   int amask_reg = spe_allocate_available_register(f);
-
-   ASSERT(dsa->alpha.enabled);
-
-   if ((dsa->alpha.func != PIPE_FUNC_NEVER) &&
-       (dsa->alpha.func != PIPE_FUNC_ALWAYS)) {
-      /* load/splat the alpha reference float value */
-      spe_load_float(f, ref_reg, dsa->alpha.ref_value);
-   }
-
-   /* emit code to do the alpha comparison, updating 'mask' */
-   switch (dsa->alpha.func) {
-   case PIPE_FUNC_EQUAL:
-      /* amask = (fragA == ref) */
-      spe_fceq(f, amask_reg, fragA_reg, ref_reg);
-      /* mask = (mask & amask) */
-      spe_and(f, mask_reg, mask_reg, amask_reg);
-      break;
-
-   case PIPE_FUNC_NOTEQUAL:
-      /* amask = (fragA == ref) */
-      spe_fceq(f, amask_reg, fragA_reg, ref_reg);
-      /* mask = (mask & ~amask) */
-      spe_andc(f, mask_reg, mask_reg, amask_reg);
-      break;
-
-   case PIPE_FUNC_GREATER:
-      /* amask = (fragA > ref) */
-      spe_fcgt(f, amask_reg, fragA_reg, ref_reg);
-      /* mask = (mask & amask) */
-      spe_and(f, mask_reg, mask_reg, amask_reg);
-      break;
-
-   case PIPE_FUNC_LESS:
-      /* amask = (ref > fragA) */
-      spe_fcgt(f, amask_reg, ref_reg, fragA_reg);
-      /* mask = (mask & amask) */
-      spe_and(f, mask_reg, mask_reg, amask_reg);
-      break;
-
-   case PIPE_FUNC_LEQUAL:
-      /* amask = (fragA > ref) */
-      spe_fcgt(f, amask_reg, fragA_reg, ref_reg);
-      /* mask = (mask & ~amask) */
-      spe_andc(f, mask_reg, mask_reg, amask_reg);
-      break;
-
-   case PIPE_FUNC_GEQUAL:
-      /* amask = (ref > fragA) */
-      spe_fcgt(f, amask_reg, ref_reg, fragA_reg);
-      /* mask = (mask & ~amask) */
-      spe_andc(f, mask_reg, mask_reg, amask_reg);
-      break;
-
-   case PIPE_FUNC_NEVER:
-      spe_il(f, mask_reg, 0);  /* mask = [0,0,0,0] */
-      break;
-
-   case PIPE_FUNC_ALWAYS:
-      /* no-op, mask unchanged */
-      break;
-
-   default:
-      ASSERT(0);
-      break;
-   }
-
-#if OPTIMIZATIONS
-   /* if mask == {0,0,0,0} we're all done, return */
-   {
-      /* re-use amask reg here */
-      int tmp_reg = amask_reg;
-      /* tmp[0] = (mask[0] | mask[1] | mask[2] | mask[3]) */
-      spe_orx(f, tmp_reg, mask_reg);
-      /* if tmp[0] == 0 then return from function call */
-      spe_biz(f, tmp_reg, SPE_REG_RA, 0, 0);
-   }
-#endif
-
-   spe_release_register(f, ref_reg);
-   spe_release_register(f, amask_reg);
-}
-
-
-/**
- * This pair of functions is used inline to allocate and deallocate
- * optional constant registers.  Once a constant is discovered to be 
- * needed, we will likely need it again, so we don't want to deallocate
- * it and have to allocate and load it again unnecessarily.
- */
-static INLINE void
-setup_optional_register(struct spe_function *f,
-                        int *r)
-{
-   if (*r < 0)
-      *r = spe_allocate_available_register(f);
-}
-
-static INLINE void
-release_optional_register(struct spe_function *f,
-                          int r)
-{
-   if (r >= 0)
-      spe_release_register(f, r);
-}
-
-static INLINE void
-setup_const_register(struct spe_function *f,
-                     int *r,
-                     float value)
-{
-   if (*r >= 0)
-      return;
-   setup_optional_register(f, r);
-   spe_load_float(f, *r, value);
-}
-
-static INLINE void
-release_const_register(struct spe_function *f,
-                       int r)
-{
-   release_optional_register(f, r);
-}
-
-
-
-/**
- * Unpack/convert framebuffer colors from four 32-bit packed colors
- * (fbRGBA) to four float RGBA vectors (fbR, fbG, fbB, fbA).
- * Each 8-bit color component is expanded into a float in [0.0, 1.0].
- */
-static void
-unpack_colors(struct spe_function *f,
-              enum pipe_format color_format,
-              int fbRGBA_reg,
-              int fbR_reg, int fbG_reg, int fbB_reg, int fbA_reg)
-{
-   int mask0_reg = spe_allocate_available_register(f);
-   int mask1_reg = spe_allocate_available_register(f);
-   int mask2_reg = spe_allocate_available_register(f);
-   int mask3_reg = spe_allocate_available_register(f);
-
-   spe_load_int(f, mask0_reg, 0xff);
-   spe_load_int(f, mask1_reg, 0xff00);
-   spe_load_int(f, mask2_reg, 0xff0000);
-   spe_load_int(f, mask3_reg, 0xff000000);
-
-   spe_comment(f, 0, "Unpack framebuffer colors, convert to floats");
-
-   switch (color_format) {
-   case PIPE_FORMAT_B8G8R8A8_UNORM:
-      /* fbB = fbRGBA & mask */
-      spe_and(f, fbB_reg, fbRGBA_reg, mask0_reg);
-
-      /* fbG = fbRGBA & mask */
-      spe_and(f, fbG_reg, fbRGBA_reg, mask1_reg);
-
-      /* fbR = fbRGBA & mask */
-      spe_and(f, fbR_reg, fbRGBA_reg, mask2_reg);
-
-      /* fbA = fbRGBA & mask */
-      spe_and(f, fbA_reg, fbRGBA_reg, mask3_reg);
-
-      /* fbG = fbG >> 8 */
-      spe_roti(f, fbG_reg, fbG_reg, -8);
-
-      /* fbR = fbR >> 16 */
-      spe_roti(f, fbR_reg, fbR_reg, -16);
-
-      /* fbA = fbA >> 24 */
-      spe_roti(f, fbA_reg, fbA_reg, -24);
-      break;
-
-   case PIPE_FORMAT_A8R8G8B8_UNORM:
-      /* fbA = fbRGBA & mask */
-      spe_and(f, fbA_reg, fbRGBA_reg, mask0_reg);
-
-      /* fbR = fbRGBA & mask */
-      spe_and(f, fbR_reg, fbRGBA_reg, mask1_reg);
-
-      /* fbG = fbRGBA & mask */
-      spe_and(f, fbG_reg, fbRGBA_reg, mask2_reg);
-
-      /* fbB = fbRGBA & mask */
-      spe_and(f, fbB_reg, fbRGBA_reg, mask3_reg);
-
-      /* fbR = fbR >> 8 */
-      spe_roti(f, fbR_reg, fbR_reg, -8);
-
-      /* fbG = fbG >> 16 */
-      spe_roti(f, fbG_reg, fbG_reg, -16);
-
-      /* fbB = fbB >> 24 */
-      spe_roti(f, fbB_reg, fbB_reg, -24);
-      break;
-
-   default:
-      ASSERT(0);
-   }
-
-   /* convert int[4] in [0,255] to float[4] in [0.0, 1.0] */
-   spe_cuflt(f, fbR_reg, fbR_reg, 8);
-   spe_cuflt(f, fbG_reg, fbG_reg, 8);
-   spe_cuflt(f, fbB_reg, fbB_reg, 8);
-   spe_cuflt(f, fbA_reg, fbA_reg, 8);
-
-   spe_release_register(f, mask0_reg);
-   spe_release_register(f, mask1_reg);
-   spe_release_register(f, mask2_reg);
-   spe_release_register(f, mask3_reg);
-}
-
-
-/**
- * Generate SPE code to implement the given blend mode for a quad of pixels.
- * \param f          SPE function to append instruction onto.
- * \param fragR_reg  register with fragment red values (float) (in/out)
- * \param fragG_reg  register with fragment green values (float) (in/out)
- * \param fragB_reg  register with fragment blue values (float) (in/out)
- * \param fragA_reg  register with fragment alpha values (float) (in/out)
- * \param fbRGBA_reg register with packed framebuffer colors (integer) (in)
- */
-static void
-gen_blend(const struct pipe_blend_state *blend,
-          const struct pipe_blend_color *blend_color,
-          struct spe_function *f,
-          enum pipe_format color_format,
-          int fragR_reg, int fragG_reg, int fragB_reg, int fragA_reg,
-          int fbRGBA_reg)
-{
-   int term1R_reg = spe_allocate_available_register(f);
-   int term1G_reg = spe_allocate_available_register(f);
-   int term1B_reg = spe_allocate_available_register(f);
-   int term1A_reg = spe_allocate_available_register(f);
-
-   int term2R_reg = spe_allocate_available_register(f);
-   int term2G_reg = spe_allocate_available_register(f);
-   int term2B_reg = spe_allocate_available_register(f);
-   int term2A_reg = spe_allocate_available_register(f);
-
-   int fbR_reg = spe_allocate_available_register(f);
-   int fbG_reg = spe_allocate_available_register(f);
-   int fbB_reg = spe_allocate_available_register(f);
-   int fbA_reg = spe_allocate_available_register(f);
-
-   int tmp_reg = spe_allocate_available_register(f);
-
-   /* Optional constant registers we might or might not end up using;
-    * if we do use them, make sure we only allocate them once by
-    * keeping a flag on each one.
-    */
-   int one_reg = -1;
-   int constR_reg = -1, constG_reg = -1, constB_reg = -1, constA_reg = -1;
-
-   ASSERT(blend->rt[0].blend_enable);
-
-   /* packed RGBA -> float colors */
-   unpack_colors(f, color_format, fbRGBA_reg,
-                 fbR_reg, fbG_reg, fbB_reg, fbA_reg);
-
-   /*
-    * Compute Src RGB terms.  We're actually looking for the value
-    * of (the appropriate RGB factors) * (the incoming source RGB color),
-    * because in some cases (like PIPE_BLENDFACTOR_ONE and 
-    * PIPE_BLENDFACTOR_ZERO) we can avoid doing unnecessary math.
-    */
-   switch (blend->rt[0].rgb_src_factor) {
-   case PIPE_BLENDFACTOR_ONE:
-      /* factors = (1,1,1), so term = (R,G,B) */
-      spe_move(f, term1R_reg, fragR_reg);
-      spe_move(f, term1G_reg, fragG_reg);
-      spe_move(f, term1B_reg, fragB_reg);
-      break;
-   case PIPE_BLENDFACTOR_ZERO:
-      /* factors = (0,0,0), so term = (0,0,0) */
-      spe_load_float(f, term1R_reg, 0.0f);
-      spe_load_float(f, term1G_reg, 0.0f);
-      spe_load_float(f, term1B_reg, 0.0f);
-      break;
-   case PIPE_BLENDFACTOR_SRC_COLOR:
-      /* factors = (R,G,B), so term = (R*R, G*G, B*B) */
-      spe_fm(f, term1R_reg, fragR_reg, fragR_reg);
-      spe_fm(f, term1G_reg, fragG_reg, fragG_reg);
-      spe_fm(f, term1B_reg, fragB_reg, fragB_reg);
-      break;
-   case PIPE_BLENDFACTOR_SRC_ALPHA:
-      /* factors = (A,A,A), so term = (R*A, G*A, B*A) */
-      spe_fm(f, term1R_reg, fragR_reg, fragA_reg);
-      spe_fm(f, term1G_reg, fragG_reg, fragA_reg);
-      spe_fm(f, term1B_reg, fragB_reg, fragA_reg);
-      break;
-   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
-      /* factors = (1-R,1-G,1-B), so term = (R*(1-R), G*(1-G), B*(1-B)) 
-       * or in other words term = (R-R*R, G-G*G, B-B*B)
-       * fnms(a,b,c,d) computes a = d - b*c
-       */
-      spe_fnms(f, term1R_reg, fragR_reg, fragR_reg, fragR_reg);
-      spe_fnms(f, term1G_reg, fragG_reg, fragG_reg, fragG_reg);
-      spe_fnms(f, term1B_reg, fragB_reg, fragB_reg, fragB_reg);
-      break;
-   case PIPE_BLENDFACTOR_DST_COLOR:
-      /* factors = (Rfb,Gfb,Bfb), so term = (R*Rfb, G*Gfb, B*Bfb) */
-      spe_fm(f, term1R_reg, fragR_reg, fbR_reg);
-      spe_fm(f, term1G_reg, fragG_reg, fbG_reg);
-      spe_fm(f, term1B_reg, fragB_reg, fbB_reg);
-      break;
-   case PIPE_BLENDFACTOR_INV_DST_COLOR:
-      /* factors = (1-Rfb,1-Gfb,1-Bfb), so term = (R*(1-Rfb),G*(1-Gfb),B*(1-Bfb))
-       * or term = (R-R*Rfb, G-G*Gfb, B-B*Bfb)
-       * fnms(a,b,c,d) computes a = d - b*c
-       */
-      spe_fnms(f, term1R_reg, fragR_reg, fbR_reg, fragR_reg);
-      spe_fnms(f, term1G_reg, fragG_reg, fbG_reg, fragG_reg);
-      spe_fnms(f, term1B_reg, fragB_reg, fbB_reg, fragB_reg);
-      break;
-   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
-      /* factors = (1-A,1-A,1-A), so term = (R*(1-A),G*(1-A),B*(1-A))
-       * or term = (R-R*A,G-G*A,B-B*A)
-       * fnms(a,b,c,d) computes a = d - b*c
-       */
-      spe_fnms(f, term1R_reg, fragR_reg, fragA_reg, fragR_reg);
-      spe_fnms(f, term1G_reg, fragG_reg, fragA_reg, fragG_reg);
-      spe_fnms(f, term1B_reg, fragB_reg, fragA_reg, fragB_reg);
-      break;
-   case PIPE_BLENDFACTOR_DST_ALPHA:
-      /* factors = (Afb, Afb, Afb), so term = (R*Afb, G*Afb, B*Afb) */
-      spe_fm(f, term1R_reg, fragR_reg, fbA_reg);
-      spe_fm(f, term1G_reg, fragG_reg, fbA_reg);
-      spe_fm(f, term1B_reg, fragB_reg, fbA_reg);
-      break;
-   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
-      /* factors = (1-Afb, 1-Afb, 1-Afb), so term = (R*(1-Afb),G*(1-Afb),B*(1-Afb)) 
-       * or term = (R-R*Afb,G-G*Afb,b-B*Afb)
-       * fnms(a,b,c,d) computes a = d - b*c
-       */
-      spe_fnms(f, term1R_reg, fragR_reg, fbA_reg, fragR_reg);
-      spe_fnms(f, term1G_reg, fragG_reg, fbA_reg, fragG_reg);
-      spe_fnms(f, term1B_reg, fragB_reg, fbA_reg, fragB_reg);
-      break;
-   case PIPE_BLENDFACTOR_CONST_COLOR:
-      /* We need the optional constant color registers */
-      setup_const_register(f, &constR_reg, blend_color->color[0]);
-      setup_const_register(f, &constG_reg, blend_color->color[1]);
-      setup_const_register(f, &constB_reg, blend_color->color[2]);
-      /* now, factor = (Rc,Gc,Bc), so term = (R*Rc,G*Gc,B*Bc) */
-      spe_fm(f, term1R_reg, fragR_reg, constR_reg);
-      spe_fm(f, term1G_reg, fragG_reg, constG_reg);
-      spe_fm(f, term1B_reg, fragB_reg, constB_reg);
-      break;
-   case PIPE_BLENDFACTOR_CONST_ALPHA:
-      /* we'll need the optional constant alpha register */
-      setup_const_register(f, &constA_reg, blend_color->color[3]);
-      /* factor = (Ac,Ac,Ac), so term = (R*Ac,G*Ac,B*Ac) */
-      spe_fm(f, term1R_reg, fragR_reg, constA_reg);
-      spe_fm(f, term1G_reg, fragG_reg, constA_reg);
-      spe_fm(f, term1B_reg, fragB_reg, constA_reg);
-      break;
-   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
-      /* We need the optional constant color registers */
-      setup_const_register(f, &constR_reg, blend_color->color[0]);
-      setup_const_register(f, &constG_reg, blend_color->color[1]);
-      setup_const_register(f, &constB_reg, blend_color->color[2]);
-      /* factor = (1-Rc,1-Gc,1-Bc), so term = (R*(1-Rc),G*(1-Gc),B*(1-Bc)) 
-       * or term = (R-R*Rc, G-G*Gc, B-B*Bc)
-       * fnms(a,b,c,d) computes a = d - b*c
-       */
-      spe_fnms(f, term1R_reg, fragR_reg, constR_reg, fragR_reg);
-      spe_fnms(f, term1G_reg, fragG_reg, constG_reg, fragG_reg);
-      spe_fnms(f, term1B_reg, fragB_reg, constB_reg, fragB_reg);
-      break;
-   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
-      /* We need the optional constant color registers */
-      setup_const_register(f, &constR_reg, blend_color->color[0]);
-      setup_const_register(f, &constG_reg, blend_color->color[1]);
-      setup_const_register(f, &constB_reg, blend_color->color[2]);
-      /* factor = (1-Ac,1-Ac,1-Ac), so term = (R*(1-Ac),G*(1-Ac),B*(1-Ac))
-       * or term = (R-R*Ac,G-G*Ac,B-B*Ac)
-       * fnms(a,b,c,d) computes a = d - b*c
-       */
-      spe_fnms(f, term1R_reg, fragR_reg, constA_reg, fragR_reg);
-      spe_fnms(f, term1G_reg, fragG_reg, constA_reg, fragG_reg);
-      spe_fnms(f, term1B_reg, fragB_reg, constA_reg, fragB_reg);
-      break;
-   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
-      /* We'll need the optional {1,1,1,1} register */
-      setup_const_register(f, &one_reg, 1.0f);
-      /* factor = (min(A,1-Afb),min(A,1-Afb),min(A,1-Afb)), so 
-       * term = (R*min(A,1-Afb), G*min(A,1-Afb), B*min(A,1-Afb))
-       * We could expand the term (as a*min(b,c) == min(a*b,a*c)
-       * as long as a is positive), but then we'd have to do three
-       * spe_float_min() functions instead of one, so this is simpler.
-       */
-      /* tmp = 1 - Afb */
-      spe_fs(f, tmp_reg, one_reg, fbA_reg);
-      /* tmp = min(A,tmp) */
-      spe_float_min(f, tmp_reg, fragA_reg, tmp_reg);
-      /* term = R*tmp */
-      spe_fm(f, term1R_reg, fragR_reg, tmp_reg);
-      spe_fm(f, term1G_reg, fragG_reg, tmp_reg);
-      spe_fm(f, term1B_reg, fragB_reg, tmp_reg);
-      break;
-
-      /* These are special D3D cases involving a second color output
-       * from the fragment shader.  I'm not sure we can support them
-       * yet... XXX
-       */
-   case PIPE_BLENDFACTOR_SRC1_COLOR:
-   case PIPE_BLENDFACTOR_SRC1_ALPHA:
-   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
-   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
-
-   default:
-      ASSERT(0);
-   }
-
-   /*
-    * Compute Src Alpha term.  Like the above, we're looking for
-    * the full term A*factor, not just the factor itself, because
-    * in many cases we can avoid doing unnecessary multiplies.
-    */
-   switch (blend->rt[0].alpha_src_factor) {
-   case PIPE_BLENDFACTOR_ZERO:
-      /* factor = 0, so term = 0 */
-      spe_load_float(f, term1A_reg, 0.0f);
-      break;
-
-   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: /* fall through */
-   case PIPE_BLENDFACTOR_ONE:
-      /* factor = 1, so term = A */
-      spe_move(f, term1A_reg, fragA_reg);
-      break;
-
-   case PIPE_BLENDFACTOR_SRC_COLOR:
-      /* factor = A, so term = A*A */
-      spe_fm(f, term1A_reg, fragA_reg, fragA_reg);
-      break;
-   case PIPE_BLENDFACTOR_SRC_ALPHA:
-      spe_fm(f, term1A_reg, fragA_reg, fragA_reg);
-      break;
-
-   case PIPE_BLENDFACTOR_INV_SRC_ALPHA: /* fall through */
-   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
-      /* factor = 1-A, so term = A*(1-A) = A-A*A */
-      /* fnms(a,b,c,d) computes a = d - b*c */
-      spe_fnms(f, term1A_reg, fragA_reg, fragA_reg, fragA_reg);
-      break;
-
-   case PIPE_BLENDFACTOR_DST_ALPHA: /* fall through */
-   case PIPE_BLENDFACTOR_DST_COLOR:
-      /* factor = Afb, so term = A*Afb */
-      spe_fm(f, term1A_reg, fragA_reg, fbA_reg);
-      break;
-
-   case PIPE_BLENDFACTOR_INV_DST_ALPHA: /* fall through */
-   case PIPE_BLENDFACTOR_INV_DST_COLOR:
-      /* factor = 1-Afb, so term = A*(1-Afb) = A - A*Afb */
-      /* fnms(a,b,c,d) computes a = d - b*c */
-      spe_fnms(f, term1A_reg, fragA_reg, fbA_reg, fragA_reg);
-      break;
-
-   case PIPE_BLENDFACTOR_CONST_ALPHA: /* fall through */
-   case PIPE_BLENDFACTOR_CONST_COLOR:
-      /* We need the optional constA_reg register */
-      setup_const_register(f, &constA_reg, blend_color->color[3]);
-      /* factor = Ac, so term = A*Ac */
-      spe_fm(f, term1A_reg, fragA_reg, constA_reg);
-      break;
-
-   case PIPE_BLENDFACTOR_INV_CONST_ALPHA: /* fall through */
-   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
-      /* We need the optional constA_reg register */
-      setup_const_register(f, &constA_reg, blend_color->color[3]);
-      /* factor = 1-Ac, so term = A*(1-Ac) = A-A*Ac */
-      /* fnms(a,b,c,d) computes a = d - b*c */
-      spe_fnms(f, term1A_reg, fragA_reg, constA_reg, fragA_reg);
-      break;
-
-      /* These are special D3D cases involving a second color output
-       * from the fragment shader.  I'm not sure we can support them
-       * yet... XXX
-       */
-   case PIPE_BLENDFACTOR_SRC1_COLOR:
-   case PIPE_BLENDFACTOR_SRC1_ALPHA:
-   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
-   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
-   default:
-      ASSERT(0);
-   }
-
-   /*
-    * Compute Dest RGB term.  Like the above, we're looking for
-    * the full term (Rfb,Gfb,Bfb)*(factor), not just the factor itself, because
-    * in many cases we can avoid doing unnecessary multiplies.
-    */
-   switch (blend->rt[0].rgb_dst_factor) {
-   case PIPE_BLENDFACTOR_ONE:
-      /* factors = (1,1,1), so term = (Rfb,Gfb,Bfb) */
-      spe_move(f, term2R_reg, fbR_reg);
-      spe_move(f, term2G_reg, fbG_reg);
-      spe_move(f, term2B_reg, fbB_reg);
-      break;
-   case PIPE_BLENDFACTOR_ZERO:
-      /* factor s= (0,0,0), so term = (0,0,0) */
-      spe_load_float(f, term2R_reg, 0.0f);
-      spe_load_float(f, term2G_reg, 0.0f);
-      spe_load_float(f, term2B_reg, 0.0f);
-      break;
-   case PIPE_BLENDFACTOR_SRC_COLOR:
-      /* factors = (R,G,B), so term = (R*Rfb, G*Gfb, B*Bfb) */
-      spe_fm(f, term2R_reg, fbR_reg, fragR_reg);
-      spe_fm(f, term2G_reg, fbG_reg, fragG_reg);
-      spe_fm(f, term2B_reg, fbB_reg, fragB_reg);
-      break;
-   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
-      /* factors = (1-R,1-G,1-B), so term = (Rfb*(1-R), Gfb*(1-G), Bfb*(1-B)) 
-       * or in other words term = (Rfb-Rfb*R, Gfb-Gfb*G, Bfb-Bfb*B)
-       * fnms(a,b,c,d) computes a = d - b*c
-       */
-      spe_fnms(f, term2R_reg, fragR_reg, fbR_reg, fbR_reg);
-      spe_fnms(f, term2G_reg, fragG_reg, fbG_reg, fbG_reg);
-      spe_fnms(f, term2B_reg, fragB_reg, fbB_reg, fbB_reg);
-      break;
-   case PIPE_BLENDFACTOR_SRC_ALPHA:
-      /* factors = (A,A,A), so term = (Rfb*A, Gfb*A, Bfb*A) */
-      spe_fm(f, term2R_reg, fbR_reg, fragA_reg);
-      spe_fm(f, term2G_reg, fbG_reg, fragA_reg);
-      spe_fm(f, term2B_reg, fbB_reg, fragA_reg);
-      break;
-   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
-      /* factors = (1-A,1-A,1-A) so term = (Rfb-Rfb*A,Gfb-Gfb*A,Bfb-Bfb*A) */
-      /* fnms(a,b,c,d) computes a = d - b*c */
-      spe_fnms(f, term2R_reg, fbR_reg, fragA_reg, fbR_reg);
-      spe_fnms(f, term2G_reg, fbG_reg, fragA_reg, fbG_reg);
-      spe_fnms(f, term2B_reg, fbB_reg, fragA_reg, fbB_reg);
-      break;
-   case PIPE_BLENDFACTOR_DST_COLOR:
-      /* factors = (Rfb,Gfb,Bfb), so term = (Rfb*Rfb, Gfb*Gfb, Bfb*Bfb) */
-      spe_fm(f, term2R_reg, fbR_reg, fbR_reg);
-      spe_fm(f, term2G_reg, fbG_reg, fbG_reg);
-      spe_fm(f, term2B_reg, fbB_reg, fbB_reg);
-      break;
-   case PIPE_BLENDFACTOR_INV_DST_COLOR:
-      /* factors = (1-Rfb,1-Gfb,1-Bfb), so term = (Rfb*(1-Rfb),Gfb*(1-Gfb),Bfb*(1-Bfb))
-       * or term = (Rfb-Rfb*Rfb, Gfb-Gfb*Gfb, Bfb-Bfb*Bfb)
-       * fnms(a,b,c,d) computes a = d - b*c
-       */
-      spe_fnms(f, term2R_reg, fbR_reg, fbR_reg, fbR_reg);
-      spe_fnms(f, term2G_reg, fbG_reg, fbG_reg, fbG_reg);
-      spe_fnms(f, term2B_reg, fbB_reg, fbB_reg, fbB_reg);
-      break;
-
-   case PIPE_BLENDFACTOR_DST_ALPHA:
-      /* factors = (Afb, Afb, Afb), so term = (Rfb*Afb, Gfb*Afb, Bfb*Afb) */
-      spe_fm(f, term2R_reg, fbR_reg, fbA_reg);
-      spe_fm(f, term2G_reg, fbG_reg, fbA_reg);
-      spe_fm(f, term2B_reg, fbB_reg, fbA_reg);
-      break;
-   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
-      /* factors = (1-Afb, 1-Afb, 1-Afb), so term = (Rfb*(1-Afb),Gfb*(1-Afb),Bfb*(1-Afb)) 
-       * or term = (Rfb-Rfb*Afb,Gfb-Gfb*Afb,Bfb-Bfb*Afb)
-       * fnms(a,b,c,d) computes a = d - b*c
-       */
-      spe_fnms(f, term2R_reg, fbR_reg, fbA_reg, fbR_reg);
-      spe_fnms(f, term2G_reg, fbG_reg, fbA_reg, fbG_reg);
-      spe_fnms(f, term2B_reg, fbB_reg, fbA_reg, fbB_reg);
-      break;
-   case PIPE_BLENDFACTOR_CONST_COLOR:
-      /* We need the optional constant color registers */
-      setup_const_register(f, &constR_reg, blend_color->color[0]);
-      setup_const_register(f, &constG_reg, blend_color->color[1]);
-      setup_const_register(f, &constB_reg, blend_color->color[2]);
-      /* now, factor = (Rc,Gc,Bc), so term = (Rfb*Rc,Gfb*Gc,Bfb*Bc) */
-      spe_fm(f, term2R_reg, fbR_reg, constR_reg);
-      spe_fm(f, term2G_reg, fbG_reg, constG_reg);
-      spe_fm(f, term2B_reg, fbB_reg, constB_reg);
-      break;
-   case PIPE_BLENDFACTOR_CONST_ALPHA:
-      /* we'll need the optional constant alpha register */
-      setup_const_register(f, &constA_reg, blend_color->color[3]);
-      /* factor = (Ac,Ac,Ac), so term = (Rfb*Ac,Gfb*Ac,Bfb*Ac) */
-      spe_fm(f, term2R_reg, fbR_reg, constA_reg);
-      spe_fm(f, term2G_reg, fbG_reg, constA_reg);
-      spe_fm(f, term2B_reg, fbB_reg, constA_reg);
-      break;
-   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
-      /* We need the optional constant color registers */
-      setup_const_register(f, &constR_reg, blend_color->color[0]);
-      setup_const_register(f, &constG_reg, blend_color->color[1]);
-      setup_const_register(f, &constB_reg, blend_color->color[2]);
-      /* factor = (1-Rc,1-Gc,1-Bc), so term = (Rfb*(1-Rc),Gfb*(1-Gc),Bfb*(1-Bc)) 
-       * or term = (Rfb-Rfb*Rc, Gfb-Gfb*Gc, Bfb-Bfb*Bc)
-       * fnms(a,b,c,d) computes a = d - b*c
-       */
-      spe_fnms(f, term2R_reg, fbR_reg, constR_reg, fbR_reg);
-      spe_fnms(f, term2G_reg, fbG_reg, constG_reg, fbG_reg);
-      spe_fnms(f, term2B_reg, fbB_reg, constB_reg, fbB_reg);
-      break;
-   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
-      /* We need the optional constant color registers */
-      setup_const_register(f, &constR_reg, blend_color->color[0]);
-      setup_const_register(f, &constG_reg, blend_color->color[1]);
-      setup_const_register(f, &constB_reg, blend_color->color[2]);
-      /* factor = (1-Ac,1-Ac,1-Ac), so term = (Rfb*(1-Ac),Gfb*(1-Ac),Bfb*(1-Ac))
-       * or term = (Rfb-Rfb*Ac,Gfb-Gfb*Ac,Bfb-Bfb*Ac)
-       * fnms(a,b,c,d) computes a = d - b*c
-       */
-      spe_fnms(f, term2R_reg, fbR_reg, constA_reg, fbR_reg);
-      spe_fnms(f, term2G_reg, fbG_reg, constA_reg, fbG_reg);
-      spe_fnms(f, term2B_reg, fbB_reg, constA_reg, fbB_reg);
-      break;
-   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: /* not supported for dest RGB */
-      ASSERT(0);
-      break;
-
-      /* These are special D3D cases involving a second color output
-       * from the fragment shader.  I'm not sure we can support them
-       * yet... XXX
-       */
-   case PIPE_BLENDFACTOR_SRC1_COLOR:
-   case PIPE_BLENDFACTOR_SRC1_ALPHA:
-   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
-   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
-
-   default:
-      ASSERT(0);
-   }
-
-   /*
-    * Compute Dest Alpha term.  Like the above, we're looking for
-    * the full term Afb*factor, not just the factor itself, because
-    * in many cases we can avoid doing unnecessary multiplies.
-    */
-   switch (blend->rt[0].alpha_dst_factor) {
-   case PIPE_BLENDFACTOR_ONE:
-      /* factor = 1, so term = Afb */
-      spe_move(f, term2A_reg, fbA_reg);
-      break;
-   case PIPE_BLENDFACTOR_ZERO:
-      /* factor = 0, so term = 0 */
-      spe_load_float(f, term2A_reg, 0.0f);
-      break;
-
-   case PIPE_BLENDFACTOR_SRC_ALPHA: /* fall through */
-   case PIPE_BLENDFACTOR_SRC_COLOR:
-      /* factor = A, so term = Afb*A */
-      spe_fm(f, term2A_reg, fbA_reg, fragA_reg);
-      break;
-
-   case PIPE_BLENDFACTOR_INV_SRC_ALPHA: /* fall through */
-   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
-      /* factor = 1-A, so term = Afb*(1-A) = Afb-Afb*A */
-      /* fnms(a,b,c,d) computes a = d - b*c */
-      spe_fnms(f, term2A_reg, fbA_reg, fragA_reg, fbA_reg);
-      break;
-
-   case PIPE_BLENDFACTOR_DST_ALPHA: /* fall through */
-   case PIPE_BLENDFACTOR_DST_COLOR:
-      /* factor = Afb, so term = Afb*Afb */
-      spe_fm(f, term2A_reg, fbA_reg, fbA_reg);
-      break;
-
-   case PIPE_BLENDFACTOR_INV_DST_ALPHA: /* fall through */
-   case PIPE_BLENDFACTOR_INV_DST_COLOR:
-      /* factor = 1-Afb, so term = Afb*(1-Afb) = Afb - Afb*Afb */
-      /* fnms(a,b,c,d) computes a = d - b*c */
-      spe_fnms(f, term2A_reg, fbA_reg, fbA_reg, fbA_reg);
-      break;
-
-   case PIPE_BLENDFACTOR_CONST_ALPHA: /* fall through */
-   case PIPE_BLENDFACTOR_CONST_COLOR:
-      /* We need the optional constA_reg register */
-      setup_const_register(f, &constA_reg, blend_color->color[3]);
-      /* factor = Ac, so term = Afb*Ac */
-      spe_fm(f, term2A_reg, fbA_reg, constA_reg);
-      break;
-
-   case PIPE_BLENDFACTOR_INV_CONST_ALPHA: /* fall through */
-   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
-      /* We need the optional constA_reg register */
-      setup_const_register(f, &constA_reg, blend_color->color[3]);
-      /* factor = 1-Ac, so term = Afb*(1-Ac) = Afb-Afb*Ac */
-      /* fnms(a,b,c,d) computes a = d - b*c */
-      spe_fnms(f, term2A_reg, fbA_reg, constA_reg, fbA_reg);
-      break;
-
-   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: /* not supported for dest alpha */
-      ASSERT(0);
-      break;
-
-      /* These are special D3D cases involving a second color output
-       * from the fragment shader.  I'm not sure we can support them
-       * yet... XXX
-       */
-   case PIPE_BLENDFACTOR_SRC1_COLOR:
-   case PIPE_BLENDFACTOR_SRC1_ALPHA:
-   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
-   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
-   default:
-      ASSERT(0);
-   }
-
-   /*
-    * Combine Src/Dest RGB terms as per the blend equation.
-    */
-   switch (blend->rt[0].rgb_func) {
-   case PIPE_BLEND_ADD:
-      spe_fa(f, fragR_reg, term1R_reg, term2R_reg);
-      spe_fa(f, fragG_reg, term1G_reg, term2G_reg);
-      spe_fa(f, fragB_reg, term1B_reg, term2B_reg);
-      break;
-   case PIPE_BLEND_SUBTRACT:
-      spe_fs(f, fragR_reg, term1R_reg, term2R_reg);
-      spe_fs(f, fragG_reg, term1G_reg, term2G_reg);
-      spe_fs(f, fragB_reg, term1B_reg, term2B_reg);
-      break;
-   case PIPE_BLEND_REVERSE_SUBTRACT:
-      spe_fs(f, fragR_reg, term2R_reg, term1R_reg);
-      spe_fs(f, fragG_reg, term2G_reg, term1G_reg);
-      spe_fs(f, fragB_reg, term2B_reg, term1B_reg);
-      break;
-   case PIPE_BLEND_MIN:
-      spe_float_min(f, fragR_reg, term1R_reg, term2R_reg);
-      spe_float_min(f, fragG_reg, term1G_reg, term2G_reg);
-      spe_float_min(f, fragB_reg, term1B_reg, term2B_reg);
-      break;
-   case PIPE_BLEND_MAX:
-      spe_float_max(f, fragR_reg, term1R_reg, term2R_reg);
-      spe_float_max(f, fragG_reg, term1G_reg, term2G_reg);
-      spe_float_max(f, fragB_reg, term1B_reg, term2B_reg);
-      break;
-   default:
-      ASSERT(0);
-   }
-
-   /*
-    * Combine Src/Dest A term
-    */
-   switch (blend->rt[0].alpha_func) {
-   case PIPE_BLEND_ADD:
-      spe_fa(f, fragA_reg, term1A_reg, term2A_reg);
-      break;
-   case PIPE_BLEND_SUBTRACT:
-      spe_fs(f, fragA_reg, term1A_reg, term2A_reg);
-      break;
-   case PIPE_BLEND_REVERSE_SUBTRACT:
-      spe_fs(f, fragA_reg, term2A_reg, term1A_reg);
-      break;
-   case PIPE_BLEND_MIN:
-      spe_float_min(f, fragA_reg, term1A_reg, term2A_reg);
-      break;
-   case PIPE_BLEND_MAX:
-      spe_float_max(f, fragA_reg, term1A_reg, term2A_reg);
-      break;
-   default:
-      ASSERT(0);
-   }
-
-   spe_release_register(f, term1R_reg);
-   spe_release_register(f, term1G_reg);
-   spe_release_register(f, term1B_reg);
-   spe_release_register(f, term1A_reg);
-
-   spe_release_register(f, term2R_reg);
-   spe_release_register(f, term2G_reg);
-   spe_release_register(f, term2B_reg);
-   spe_release_register(f, term2A_reg);
-
-   spe_release_register(f, fbR_reg);
-   spe_release_register(f, fbG_reg);
-   spe_release_register(f, fbB_reg);
-   spe_release_register(f, fbA_reg);
-
-   spe_release_register(f, tmp_reg);
-
-   /* Free any optional registers that actually got used */
-   release_const_register(f, one_reg);
-   release_const_register(f, constR_reg);
-   release_const_register(f, constG_reg);
-   release_const_register(f, constB_reg);
-   release_const_register(f, constA_reg);
-}
-
-
-static void
-gen_logicop(const struct pipe_blend_state *blend,
-            struct spe_function *f,
-            int fragRGBA_reg, int fbRGBA_reg)
-{
-   /* We've got four 32-bit RGBA packed pixels in each of
-    * fragRGBA_reg and fbRGBA_reg, not sets of floating-point
-    * reds, greens, blues, and alphas.
-    * */
-   ASSERT(blend->logicop_enable);
-
-   switch(blend->logicop_func) {
-      case PIPE_LOGICOP_CLEAR: /* 0 */
-         spe_zero(f, fragRGBA_reg);
-         break;
-      case PIPE_LOGICOP_NOR: /* ~(s | d) */
-         spe_nor(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
-         break;
-      case PIPE_LOGICOP_AND_INVERTED: /* ~s & d */
-         /* andc R, A, B computes R = A & ~B */
-         spe_andc(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg);
-         break;
-      case PIPE_LOGICOP_COPY_INVERTED: /* ~s */
-         spe_complement(f, fragRGBA_reg, fragRGBA_reg);
-         break;
-      case PIPE_LOGICOP_AND_REVERSE: /* s & ~d */
-         /* andc R, A, B computes R = A & ~B */
-         spe_andc(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
-         break;
-      case PIPE_LOGICOP_INVERT: /* ~d */
-         /* Note that (A nor A) == ~(A|A) == ~A */
-         spe_nor(f, fragRGBA_reg, fbRGBA_reg, fbRGBA_reg);
-         break;
-      case PIPE_LOGICOP_XOR: /* s ^ d */
-         spe_xor(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
-         break;
-      case PIPE_LOGICOP_NAND: /* ~(s & d) */
-         spe_nand(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
-         break;
-      case PIPE_LOGICOP_AND: /* s & d */
-         spe_and(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
-         break;
-      case PIPE_LOGICOP_EQUIV: /* ~(s ^ d) */
-         spe_xor(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
-         spe_complement(f, fragRGBA_reg, fragRGBA_reg);
-         break;
-      case PIPE_LOGICOP_NOOP: /* d */
-         spe_move(f, fragRGBA_reg, fbRGBA_reg);
-         break;
-      case PIPE_LOGICOP_OR_INVERTED: /* ~s | d */
-         /* orc R, A, B computes R = A | ~B */
-         spe_orc(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg);
-         break;
-      case PIPE_LOGICOP_COPY: /* s */
-         break;
-      case PIPE_LOGICOP_OR_REVERSE: /* s | ~d */
-         /* orc R, A, B computes R = A | ~B */
-         spe_orc(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
-         break;
-      case PIPE_LOGICOP_OR: /* s | d */
-         spe_or(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
-         break;
-      case PIPE_LOGICOP_SET: /* 1 */
-         spe_load_int(f, fragRGBA_reg, 0xffffffff);
-         break;
-      default:
-         ASSERT(0);
-   }
-}
-
-
-/**
- * Generate code to pack a quad of float colors into four 32-bit integers.
- *
- * \param f             SPE function to append instruction onto.
- * \param color_format  the dest color packing format
- * \param r_reg         register containing four red values (in/clobbered)
- * \param g_reg         register containing four green values (in/clobbered)
- * \param b_reg         register containing four blue values (in/clobbered)
- * \param a_reg         register containing four alpha values (in/clobbered)
- * \param rgba_reg      register to store the packed RGBA colors (out)
- */
-static void
-gen_pack_colors(struct spe_function *f,
-                enum pipe_format color_format,
-                int r_reg, int g_reg, int b_reg, int a_reg,
-                int rgba_reg)
-{
-   int rg_reg = spe_allocate_available_register(f);
-   int ba_reg = spe_allocate_available_register(f);
-
-   /* Convert float[4] in [0.0,1.0] to int[4] in [0,~0], with clamping */
-   spe_cfltu(f, r_reg, r_reg, 32);
-   spe_cfltu(f, g_reg, g_reg, 32);
-   spe_cfltu(f, b_reg, b_reg, 32);
-   spe_cfltu(f, a_reg, a_reg, 32);
-
-   /* Shift the most significant bytes to the least significant positions.
-    * I.e.: reg = reg >> 24
-    */
-   spe_rotmi(f, r_reg, r_reg, -24);
-   spe_rotmi(f, g_reg, g_reg, -24);
-   spe_rotmi(f, b_reg, b_reg, -24);
-   spe_rotmi(f, a_reg, a_reg, -24);
-
-   /* Shift the color bytes according to the surface format */
-   if (color_format == PIPE_FORMAT_B8G8R8A8_UNORM) {
-      spe_roti(f, g_reg, g_reg, 8);   /* green <<= 8 */
-      spe_roti(f, r_reg, r_reg, 16);  /* red <<= 16 */
-      spe_roti(f, a_reg, a_reg, 24);  /* alpha <<= 24 */
-   }
-   else if (color_format == PIPE_FORMAT_A8R8G8B8_UNORM) {
-      spe_roti(f, r_reg, r_reg, 8);   /* red <<= 8 */
-      spe_roti(f, g_reg, g_reg, 16);  /* green <<= 16 */
-      spe_roti(f, b_reg, b_reg, 24);  /* blue <<= 24 */
-   }
-   else {
-      ASSERT(0);
-   }
-
-   /* Merge red, green, blue, alpha registers to make packed RGBA colors.
-    * Eg: after shifting according to color_format we might have:
-    *     R = {0x00ff0000, 0x00110000, 0x00220000, 0x00330000}
-    *     G = {0x0000ff00, 0x00004400, 0x00005500, 0x00006600}
-    *     B = {0x000000ff, 0x00000077, 0x00000088, 0x00000099}
-    *     A = {0xff000000, 0xaa000000, 0xbb000000, 0xcc000000}
-    * OR-ing all those together gives us four packed colors:
-    *  RGBA = {0xffffffff, 0xaa114477, 0xbb225588, 0xcc336699}
-    */
-   spe_or(f, rg_reg, r_reg, g_reg);
-   spe_or(f, ba_reg, a_reg, b_reg);
-   spe_or(f, rgba_reg, rg_reg, ba_reg);
-
-   spe_release_register(f, rg_reg);
-   spe_release_register(f, ba_reg);
-}
-
-
-static void
-gen_colormask(struct spe_function *f,
-              uint colormask,
-              enum pipe_format color_format,
-              int fragRGBA_reg, int fbRGBA_reg)
-{
-   /* We've got four 32-bit RGBA packed pixels in each of
-    * fragRGBA_reg and fbRGBA_reg, not sets of floating-point
-    * reds, greens, blues, and alphas.  Further, the pixels
-    * are packed according to the given color format, not
-    * necessarily RGBA...
-    */
-   uint r_mask;
-   uint g_mask;
-   uint b_mask;
-   uint a_mask;
-
-   /* Calculate exactly where the bits for any particular color
-    * end up, so we can mask them correctly.
-    */
-   switch(color_format) {
-      case PIPE_FORMAT_B8G8R8A8_UNORM:
-         /* ARGB */
-         a_mask = 0xff000000;
-         r_mask = 0x00ff0000;
-         g_mask = 0x0000ff00;
-         b_mask = 0x000000ff;
-         break;
-      case PIPE_FORMAT_A8R8G8B8_UNORM:
-         /* BGRA */
-         b_mask = 0xff000000;
-         g_mask = 0x00ff0000;
-         r_mask = 0x0000ff00;
-         a_mask = 0x000000ff;
-         break;
-      default:
-         ASSERT(0);
-   }
-
-   /* For each R, G, B, and A component we're supposed to mask out, 
-    * clear its bits.   Then our mask operation later will work 
-    * as expected.
-    */
-   if (!(colormask & PIPE_MASK_R)) {
-      r_mask = 0;
-   }
-   if (!(colormask & PIPE_MASK_G)) {
-      g_mask = 0;
-   }
-   if (!(colormask & PIPE_MASK_B)) {
-      b_mask = 0;
-   }
-   if (!(colormask & PIPE_MASK_A)) {
-      a_mask = 0;
-   }
-
-   /* Get a temporary register to hold the mask that will be applied
-    * to the fragment
-    */
-   int colormask_reg = spe_allocate_available_register(f);
-
-   /* The actual mask we're going to use is an OR of the remaining R, G, B,
-    * and A masks.  Load the result value into our temporary register.
-    */
-   spe_load_uint(f, colormask_reg, r_mask | g_mask | b_mask | a_mask);
-
-   /* Use the mask register to select between the fragment color
-    * values and the frame buffer color values.  Wherever the
-    * mask has a 0 bit, the current frame buffer color should override
-    * the fragment color.  Wherever the mask has a 1 bit, the 
-    * fragment color should persevere.  The Select Bits (selb rt, rA, rB, rM)
-    * instruction will select bits from its first operand rA wherever the
-    * the mask bits rM are 0, and from its second operand rB wherever the
-    * mask bits rM are 1.  That means that the frame buffer color is the
-    * first operand, and the fragment color the second.
-    */
-    spe_selb(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg, colormask_reg);
-
-    /* Release the temporary register and we're done */
-    spe_release_register(f, colormask_reg);
-}
-
-
-/**
- * This function is annoyingly similar to gen_depth_test(), above, except
- * that instead of comparing two varying values (i.e. fragment and buffer),
- * we're comparing a varying value with a static value.  As such, we have
- * access to the Compare Immediate instructions where we don't in 
- * gen_depth_test(), which is what makes us very different.
- *
- * There's some added complexity if there's a non-trivial state->mask
- * value; then stencil and reference both must be masked
- *
- * The return value in the stencil_pass_reg is a bitmask of valid
- * fragments that also passed the stencil test.  The bitmask of valid
- * fragments that failed would be found in
- * (fragment_mask_reg & ~stencil_pass_reg).
- */
-static void
-gen_stencil_test(struct spe_function *f,
-                 const struct pipe_stencil_state *state,
-                 const unsigned ref_value,
-                 uint stencil_max_value,
-                 int fragment_mask_reg,
-                 int fbS_reg, 
-                 int stencil_pass_reg)
-{
-   /* Generate code that puts the set of passing fragments into the
-    * stencil_pass_reg register, taking into account whether each fragment
-    * was active to begin with.
-    */
-   switch (state->func) {
-   case PIPE_FUNC_EQUAL:
-      if (state->valuemask == stencil_max_value) {
-         /* stencil_pass = fragment_mask & (s == reference) */
-         spe_compare_equal_uint(f, stencil_pass_reg, fbS_reg, ref_value);
-         spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
-      }
-      else {
-         /* stencil_pass = fragment_mask & ((s&mask) == (reference&mask)) */
-         uint tmp_masked_stencil = spe_allocate_available_register(f);
-         spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask);
-         spe_compare_equal_uint(f, stencil_pass_reg, tmp_masked_stencil,
-                                state->valuemask & ref_value);
-         spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
-         spe_release_register(f, tmp_masked_stencil);
-      }
-      break;
-
-   case PIPE_FUNC_NOTEQUAL:
-      if (state->valuemask == stencil_max_value) {
-         /* stencil_pass = fragment_mask & ~(s == reference) */
-         spe_compare_equal_uint(f, stencil_pass_reg, fbS_reg, ref_value);
-         spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
-      }
-      else {
-         /* stencil_pass = fragment_mask & ~((s&mask) == (reference&mask)) */
-         int tmp_masked_stencil = spe_allocate_available_register(f);
-         spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask);
-         spe_compare_equal_uint(f, stencil_pass_reg, tmp_masked_stencil,
-                                state->valuemask & ref_value);
-         spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
-         spe_release_register(f, tmp_masked_stencil);
-      }
-      break;
-
-   case PIPE_FUNC_LESS:
-      if (state->valuemask == stencil_max_value) {
-         /* stencil_pass = fragment_mask & (reference < s)  */
-         spe_compare_greater_uint(f, stencil_pass_reg, fbS_reg, ref_value);
-         spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
-      }
-      else {
-         /* stencil_pass = fragment_mask & ((reference&mask) < (s & mask)) */
-         int tmp_masked_stencil = spe_allocate_available_register(f);
-         spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask);
-         spe_compare_greater_uint(f, stencil_pass_reg, tmp_masked_stencil,
-                                  state->valuemask & ref_value);
-         spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
-         spe_release_register(f, tmp_masked_stencil);
-      }
-      break;
-
-   case PIPE_FUNC_GREATER:
-      if (state->valuemask == stencil_max_value) {
-         /* stencil_pass = fragment_mask & (reference > s) */
-         /* There's no convenient Compare Less Than Immediate instruction, so
-          * we'll have to do this one the harder way, by loading a register and 
-          * comparing directly.  Compare Logical Greater Than Word (clgt) 
-          * treats its operands as unsigned - no sign extension.
-          */
-         int tmp_reg = spe_allocate_available_register(f);
-         spe_load_uint(f, tmp_reg, ref_value);
-         spe_clgt(f, stencil_pass_reg, tmp_reg, fbS_reg);
-         spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
-         spe_release_register(f, tmp_reg);
-      }
-      else {
-         /* stencil_pass = fragment_mask & ((reference&mask) > (s&mask)) */
-         int tmp_reg = spe_allocate_available_register(f);
-         int tmp_masked_stencil = spe_allocate_available_register(f);
-         spe_load_uint(f, tmp_reg, state->valuemask & ref_value);
-         spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask);
-         spe_clgt(f, stencil_pass_reg, tmp_reg, tmp_masked_stencil);
-         spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
-         spe_release_register(f, tmp_reg);
-         spe_release_register(f, tmp_masked_stencil);
-      }
-      break;
-
-   case PIPE_FUNC_GEQUAL:
-      if (state->valuemask == stencil_max_value) {
-         /* stencil_pass = fragment_mask & (reference >= s) 
-          *              = fragment_mask & ~(s > reference) */
-         spe_compare_greater_uint(f, stencil_pass_reg, fbS_reg,
-                                  ref_value);
-         spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
-      }
-      else {
-         /* stencil_pass = fragment_mask & ~((s&mask) > (reference&mask)) */
-         int tmp_masked_stencil = spe_allocate_available_register(f);
-         spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask);
-         spe_compare_greater_uint(f, stencil_pass_reg, tmp_masked_stencil,
-                                  state->valuemask & ref_value);
-         spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
-         spe_release_register(f, tmp_masked_stencil);
-      }
-      break;
-
-   case PIPE_FUNC_LEQUAL:
-      if (state->valuemask == stencil_max_value) {
-         /* stencil_pass = fragment_mask & (reference <= s) ]
-          *               = fragment_mask & ~(reference > s) */
-         /* As above, we have to do this by loading a register */
-         int tmp_reg = spe_allocate_available_register(f);
-         spe_load_uint(f, tmp_reg, ref_value);
-         spe_clgt(f, stencil_pass_reg, tmp_reg, fbS_reg);
-         spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
-         spe_release_register(f, tmp_reg);
-      }
-      else {
-         /* stencil_pass = fragment_mask & ~((reference&mask) > (s&mask)) */
-         int tmp_reg = spe_allocate_available_register(f);
-         int tmp_masked_stencil = spe_allocate_available_register(f);
-         spe_load_uint(f, tmp_reg, ref_value & state->valuemask);
-         spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask);
-         spe_clgt(f, stencil_pass_reg, tmp_reg, tmp_masked_stencil);
-         spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
-         spe_release_register(f, tmp_reg);
-         spe_release_register(f, tmp_masked_stencil);
-      }
-      break;
-
-   case PIPE_FUNC_NEVER:
-      /* stencil_pass = fragment_mask & 0 = 0 */
-      spe_load_uint(f, stencil_pass_reg, 0);
-      break;
-
-   case PIPE_FUNC_ALWAYS:
-      /* stencil_pass = fragment_mask & 1 = fragment_mask */
-      spe_move(f, stencil_pass_reg, fragment_mask_reg);
-      break;
-   }
-
-   /* The fragments that passed the stencil test are now in stencil_pass_reg.
-    * The fragments that failed would be (fragment_mask_reg & ~stencil_pass_reg).
-    */
-}
-
-
-/**
- * This function generates code that calculates a set of new stencil values
- * given the earlier values and the operation to apply.  It does not
- * apply any tests.  It is intended to be called up to 3 times
- * (for the stencil fail operation, for the stencil pass-z fail operation,
- * and for the stencil pass-z pass operation) to collect up to three
- * possible sets of values, and for the caller to combine them based
- * on the result of the tests.
- *
- * stencil_max_value should be (2^n - 1) where n is the number of bits
- * in the stencil buffer - in other words, it should be usable as a mask.
- */
-static void
-gen_stencil_values(struct spe_function *f,
-                   uint stencil_op,
-                   uint stencil_ref_value,
-                   uint stencil_max_value,
-                   int fbS_reg,
-                   int newS_reg)
-{
-   /* The code below assumes that newS_reg and fbS_reg are not the same
-    * register; if they can be, the calculations below will have to use
-    * an additional temporary register.  For now, mark the assumption
-    * with an assertion that will fail if they are the same.
-    */
-   ASSERT(fbS_reg != newS_reg);
-
-   /* The code also assumes that the stencil_max_value is of the form
-    * 2^n-1 and can therefore be used as a mask for the valid bits in 
-    * addition to a maximum.  Make sure this is the case as well.
-    * The clever math below exploits the fact that incrementing a 
-    * binary number serves to flip all the bits of a number starting at
-    * the LSB and continuing to (and including) the first zero bit
-    * found.  That means that a number and its increment will always
-    * have at least one bit in common (the high order bit, if nothing
-    * else) *unless* the number is zero, *or* the number is of a form
-    * consisting of some number of 1s in the low-order bits followed
-    * by nothing but 0s in the high-order bits.  The latter case
-    * implies it's of the form 2^n-1.
-    */
-   ASSERT(stencil_max_value > 0 && ((stencil_max_value + 1) & stencil_max_value) == 0);
-
-   switch(stencil_op) {
-   case PIPE_STENCIL_OP_KEEP:
-      /* newS = S */
-      spe_move(f, newS_reg, fbS_reg);
-      break;
-
-   case PIPE_STENCIL_OP_ZERO:
-      /* newS = 0 */
-      spe_zero(f, newS_reg);
-      break;
-
-   case PIPE_STENCIL_OP_REPLACE:
-      /* newS = stencil reference value */
-      spe_load_uint(f, newS_reg, stencil_ref_value);
-      break;
-
-   case PIPE_STENCIL_OP_INCR: {
-      /* newS = (s == max ? max : s + 1) */
-      int equals_reg = spe_allocate_available_register(f);
-
-      spe_compare_equal_uint(f, equals_reg, fbS_reg, stencil_max_value);
-      /* Add Word Immediate computes rT = rA + 10-bit signed immediate */
-      spe_ai(f, newS_reg, fbS_reg, 1);
-      /* Select from the current value or the new value based on the equality test */
-      spe_selb(f, newS_reg, newS_reg, fbS_reg, equals_reg);
-
-      spe_release_register(f, equals_reg);
-      break;
-   }
-   case PIPE_STENCIL_OP_DECR: {
-      /* newS = (s == 0 ? 0 : s - 1) */
-      int equals_reg = spe_allocate_available_register(f);
-
-      spe_compare_equal_uint(f, equals_reg, fbS_reg, 0);
-      /* Add Word Immediate with a (-1) value works */
-      spe_ai(f, newS_reg, fbS_reg, -1);
-      /* Select from the current value or the new value based on the equality test */
-      spe_selb(f, newS_reg, newS_reg, fbS_reg, equals_reg);
-
-      spe_release_register(f, equals_reg);
-      break;
-   }
-   case PIPE_STENCIL_OP_INCR_WRAP:
-      /* newS = (s == max ? 0 : s + 1), but since max is 2^n-1, we can
-       * do a normal add and mask off the correct bits 
-       */
-      spe_ai(f, newS_reg, fbS_reg, 1);
-      spe_and_uint(f, newS_reg, newS_reg, stencil_max_value);
-      break;
-
-   case PIPE_STENCIL_OP_DECR_WRAP:
-      /* newS = (s == 0 ? max : s - 1), but we'll pull the same mask trick as above */
-      spe_ai(f, newS_reg, fbS_reg, -1);
-      spe_and_uint(f, newS_reg, newS_reg, stencil_max_value);
-      break;
-
-   case PIPE_STENCIL_OP_INVERT:
-      /* newS = ~s.  We take advantage of the mask/max value to invert only
-       * the valid bits for the field so we don't have to do an extra "and".
-       */
-      spe_xor_uint(f, newS_reg, fbS_reg, stencil_max_value);
-      break;
-
-   default:
-      ASSERT(0);
-   }
-}
-
-
-/**
- * This function generates code to get all the necessary possible
- * stencil values.  For each of the output registers (fail_reg,
- * zfail_reg, and zpass_reg), it either allocates a new register
- * and calculates a new set of values based on the stencil operation,
- * or it reuses a register allocation and calculation done for an
- * earlier (matching) operation, or it reuses the fbS_reg register
- * (if the stencil operation is KEEP, which doesn't change the 
- * stencil buffer).
- *
- * Since this function allocates a variable number of registers,
- * to avoid incurring complex logic to free them, they should
- * be allocated after a spe_allocate_register_set() call
- * and released by the corresponding spe_release_register_set() call.
- */
-static void
-gen_get_stencil_values(struct spe_function *f,
-                       const struct pipe_stencil_state *stencil,
-                       const unsigned ref_value,
-                       const uint depth_enabled,
-                       int fbS_reg, 
-                       int *fail_reg,
-                       int *zfail_reg, 
-                       int *zpass_reg)
-{
-   uint zfail_op;
-
-   /* Stenciling had better be enabled here */
-   ASSERT(stencil->enabled);
-
-   /* If the depth test is not enabled, it is treated as though it always
-    * passes, which means that the zfail_op is not considered - a
-    * failing stencil test triggers the fail_op, and a passing one
-    * triggers the zpass_op
-    *
-    * As an optimization, override calculation of the zfail_op values
-    * if they aren't going to be used.  By setting the value of
-    * the operation to PIPE_STENCIL_OP_KEEP, its value will be assumed
-    * to match the incoming stencil values, and no calculation will
-    * be done.
-    */
-   if (depth_enabled) {
-      zfail_op = stencil->zfail_op;
-   }
-   else {
-      zfail_op = PIPE_STENCIL_OP_KEEP;
-   }
-
-   /* One-sided or front-facing stencil */
-   if (stencil->fail_op == PIPE_STENCIL_OP_KEEP) {
-      *fail_reg = fbS_reg;
-   }
-   else {
-      *fail_reg = spe_allocate_available_register(f);
-      gen_stencil_values(f, stencil->fail_op, ref_value, 
-         0xff, fbS_reg, *fail_reg);
-   }
-
-   /* Check the possibly overridden value, not the structure value */
-   if (zfail_op == PIPE_STENCIL_OP_KEEP) {
-      *zfail_reg = fbS_reg;
-   }
-   else if (zfail_op == stencil->fail_op) {
-      *zfail_reg = *fail_reg;
-   }
-   else {
-      *zfail_reg = spe_allocate_available_register(f);
-      gen_stencil_values(f, stencil->zfail_op, ref_value, 
-         0xff, fbS_reg, *zfail_reg);
-   }
-
-   if (stencil->zpass_op == PIPE_STENCIL_OP_KEEP) {
-      *zpass_reg = fbS_reg;
-   }
-   else if (stencil->zpass_op == stencil->fail_op) {
-      *zpass_reg = *fail_reg;
-   }
-   else if (stencil->zpass_op == zfail_op) {
-      *zpass_reg = *zfail_reg;
-   }
-   else {
-      *zpass_reg = spe_allocate_available_register(f);
-      gen_stencil_values(f, stencil->zpass_op, ref_value, 
-         0xff, fbS_reg, *zpass_reg);
-   }
-}
-
-/**
- * Note that fbZ_reg may *not* be set on entry, if in fact
- * the depth test is not enabled.  This function must not use
- * the register if depth is not enabled.
- */
-static boolean
-gen_stencil_depth_test(struct spe_function *f, 
-                       const struct pipe_depth_stencil_alpha_state *dsa,
-                       const struct pipe_stencil_ref *stencil_ref,
-                       const uint facing,
-                       const int mask_reg, const int fragZ_reg, 
-                       const int fbZ_reg, const int fbS_reg)
-{
-   /* True if we've generated code that could require writeback to the
-    * depth and/or stencil buffers
-    */
-   boolean modified_buffers = FALSE;
-
-   boolean need_to_calculate_stencil_values;
-   boolean need_to_writemask_stencil_values;
-
-   struct pipe_stencil_state *stencil;
-
-   /* Registers.  We may or may not actually allocate these, depending
-    * on whether the state values indicate that we need them.
-    */
-   int stencil_pass_reg, stencil_fail_reg;
-   int stencil_fail_values, stencil_pass_depth_fail_values, stencil_pass_depth_pass_values;
-   int stencil_writemask_reg;
-   int zmask_reg;
-   int newS_reg;
-   unsigned ref_value;
-
-   /* Stenciling is quite complex: up to six different configurable stencil 
-    * operations/calculations can be required (three each for front-facing
-    * and back-facing fragments).  Many of those operations will likely 
-    * be identical, so there's good reason to try to avoid calculating 
-    * the same values more than once (which unfortunately makes the code less 
-    * straightforward).
-    *
-    * To make register management easier, we start a new 
-    * register set; we can release all the registers in the set at
-    * once, and avoid having to keep track of exactly which registers
-    * we allocate.  We can still allocate and free registers as 
-    * desired (if we know we no longer need a register), but we don't
-    * have to spend the complexity to track the more difficult variant
-    * register usage scenarios.
-    */
-   spe_comment(f, 0, "Allocating stencil register set");
-   spe_allocate_register_set(f);
-
-   /* The facing we're given is the fragment facing; it doesn't
-    * exactly match the stencil facing.  If stencil is enabled,
-    * but two-sided stencil is *not* enabled, we use the same
-    * stencil settings for both front- and back-facing fragments.
-    * We only use the "back-facing" stencil for backfacing fragments
-    * if two-sided stenciling is enabled.
-    */
-   if (facing == CELL_FACING_BACK && dsa->stencil[1].enabled) {
-      stencil = &dsa->stencil[1];
-      ref_value = stencil_ref->ref_value[1];
-   }
-   else {
-      stencil = &dsa->stencil[0];
-      ref_value = stencil_ref->ref_value[0];
-   }
-
-   /* Calculate the writemask.  If the writemask is trivial (either
-    * all 0s, meaning that we don't need to calculate any stencil values
-    * because they're not going to change the stencil anyway, or all 1s,
-    * meaning that we have to calculate the stencil values but do not
-    * need to mask them), we can avoid generating code.  Don't forget
-    * that we need to consider backfacing stencil, if enabled.
-    *
-    * Note that if the backface stencil is *not* enabled, the backface
-    * stencil will have the same values as the frontface stencil.
-    */
-   if (stencil->fail_op == PIPE_STENCIL_OP_KEEP &&
-       stencil->zfail_op == PIPE_STENCIL_OP_KEEP &&
-       stencil->zpass_op == PIPE_STENCIL_OP_KEEP) {
-       need_to_calculate_stencil_values = FALSE;
-       need_to_writemask_stencil_values = FALSE;
-    }
-    else if (stencil->writemask == 0x0) {
-      /* All changes are writemasked out, so no need to calculate
-       * what those changes might be, and no need to write anything back.
-       */
-      need_to_calculate_stencil_values = FALSE;
-      need_to_writemask_stencil_values = FALSE;
-   }
-   else if (stencil->writemask == 0xff) {
-      /* Still trivial, but a little less so.  We need to write the stencil
-       * values, but we don't need to mask them.
-       */
-      need_to_calculate_stencil_values = TRUE;
-      need_to_writemask_stencil_values = FALSE;
-   }
-   else {
-      /* The general case: calculate, mask, and write */
-      need_to_calculate_stencil_values = TRUE;
-      need_to_writemask_stencil_values = TRUE;
-
-      /* While we're here, generate code that calculates what the
-       * writemask should be.  If backface stenciling is enabled,
-       * and the backface writemask is not the same as the frontface
-       * writemask, we'll have to generate code that merges the
-       * two masks into a single effective mask based on fragment facing.
-       */
-      spe_comment(f, 0, "Computing stencil writemask");
-      stencil_writemask_reg = spe_allocate_available_register(f);
-      spe_load_uint(f, stencil_writemask_reg, dsa->stencil[facing].writemask);
-   }
-
-   /* At least one-sided stenciling must be on.  Generate code that
-    * runs the stencil test on the basic/front-facing stencil, leaving
-    * the mask of passing stencil bits in stencil_pass_reg.  This mask will
-    * be used both to mask the set of active pixels, and also to
-    * determine how the stencil buffer changes.
-    *
-    * This test will *not* change the value in mask_reg (because we don't
-    * yet know whether to apply the two-sided stencil or one-sided stencil).
-    */
-   spe_comment(f, 0, "Running basic stencil test");
-   stencil_pass_reg = spe_allocate_available_register(f);
-   gen_stencil_test(f, stencil, ref_value, 0xff, mask_reg, fbS_reg, stencil_pass_reg);
-
-   /* Generate code that, given the mask of valid fragments and the
-    * mask of valid fragments that passed the stencil test, computes
-    * the mask of valid fragments that failed the stencil test.  We
-    * have to do this before we run a depth test (because the
-    * depth test should not be performed on fragments that failed the
-    * stencil test, and because the depth test will update the 
-    * mask of valid fragments based on the results of the depth test).
-    */
-   spe_comment(f, 0, "Computing stencil fail mask and updating fragment mask");
-   stencil_fail_reg = spe_allocate_available_register(f);
-   spe_andc(f, stencil_fail_reg, mask_reg, stencil_pass_reg);
-   /* Now remove the stenciled-out pixels from the valid fragment mask,
-    * so we can later use the valid fragment mask in the depth test.
-    */
-   spe_and(f, mask_reg, mask_reg, stencil_pass_reg);
-
-   /* We may not need to calculate stencil values, if the writemask is off */
-   if (need_to_calculate_stencil_values) {
-      /* Generate code that calculates exactly which stencil values we need,
-       * without calculating the same value twice (say, if two different
-       * stencil ops have the same value).  This code will work for one-sided
-       * and two-sided stenciling (so that we take into account that operations
-       * may match between front and back stencils), and will also take into
-       * account whether the depth test is enabled (if the depth test is off,
-       * we don't need any of the zfail results, because the depth test always
-       * is considered to pass if it is disabled).  Any register value that
-       * does not need to be calculated will come back with the same value
-       * that's in fbS_reg.
-       *
-       * This function will allocate a variant number of registers that
-       * will be released as part of the register set.
-       */
-      spe_comment(f, 0, facing == CELL_FACING_FRONT
-                  ? "Computing front-facing stencil values"
-                  : "Computing back-facing stencil values");
-      gen_get_stencil_values(f, stencil, ref_value, dsa->depth.enabled, fbS_reg, 
-         &stencil_fail_values, &stencil_pass_depth_fail_values, 
-         &stencil_pass_depth_pass_values);
-   }  
-
-   /* We now have all the stencil values we need.  We also need 
-    * the results of the depth test to figure out which
-    * stencil values will become the new stencil values.  (Even if
-    * we aren't actually calculating stencil values, we need to apply
-    * the depth test if it's enabled.)
-    *
-    * The code generated by gen_depth_test() returns the results of the
-    * test in the given register, but also alters the mask_reg based
-    * on the results of the test.
-    */
-   if (dsa->depth.enabled) {
-      spe_comment(f, 0, "Running stencil depth test");
-      zmask_reg = spe_allocate_available_register(f);
-      modified_buffers |= gen_depth_test(f, dsa, mask_reg, fragZ_reg,
-                                         fbZ_reg, zmask_reg);
-   }
-
-   if (need_to_calculate_stencil_values) {
-
-      /* If we need to writemask the stencil values before going into
-       * the stencil buffer, we'll have to use a new register to
-       * hold the new values.  If not, we can just keep using the
-       * current register.
-       */
-      if (need_to_writemask_stencil_values) {
-         newS_reg = spe_allocate_available_register(f);
-         spe_comment(f, 0, "Saving current stencil values for writemasking");
-         spe_move(f, newS_reg, fbS_reg);
-      }
-      else {
-         newS_reg = fbS_reg;
-      }
-
-      /* Merge in the selected stencil fail values */
-      if (stencil_fail_values != fbS_reg) {
-         spe_comment(f, 0, "Loading stencil fail values");
-         spe_selb(f, newS_reg, newS_reg, stencil_fail_values, stencil_fail_reg);
-         modified_buffers = TRUE;
-      }
-
-      /* Same for the stencil pass/depth fail values.  If this calculation
-       * is not needed (say, if depth test is off), then the
-       * stencil_pass_depth_fail_values register will be equal to fbS_reg
-       * and we'll skip the calculation.
-       */
-      if (stencil_pass_depth_fail_values != fbS_reg) {
-         /* We don't actually have a stencil pass/depth fail mask yet.
-          * Calculate it here from the stencil passing mask and the
-          * depth passing mask.  Note that zmask_reg *must* have been
-          * set above if we're here.
-          */
-         uint stencil_pass_depth_fail_mask =
-            spe_allocate_available_register(f);
-
-         spe_comment(f, 0, "Loading stencil pass/depth fail values");
-         spe_andc(f, stencil_pass_depth_fail_mask, stencil_pass_reg, zmask_reg);
-
-         spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_fail_values,
-                  stencil_pass_depth_fail_mask);
-
-         spe_release_register(f, stencil_pass_depth_fail_mask);
-         modified_buffers = TRUE;
-      }
-
-      /* Same for the stencil pass/depth pass mask.  Note that we
-       * *can* get here with zmask_reg being unset (if the depth
-       * test is off but the stencil test is on).  In this case,
-       * we assume the depth test passes, and don't need to mask
-       * the stencil pass mask with the Z mask.
-       */
-      if (stencil_pass_depth_pass_values != fbS_reg) {
-         if (dsa->depth.enabled) {
-            uint stencil_pass_depth_pass_mask = spe_allocate_available_register(f);
-            /* We'll need a separate register */
-            spe_comment(f, 0, "Loading stencil pass/depth pass values");
-            spe_and(f, stencil_pass_depth_pass_mask, stencil_pass_reg, zmask_reg);
-            spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_pass_values, stencil_pass_depth_pass_mask);
-            spe_release_register(f, stencil_pass_depth_pass_mask);
-         }
-         else {
-            /* We can use the same stencil-pass register */
-            spe_comment(f, 0, "Loading stencil pass values");
-            spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_pass_values, stencil_pass_reg);
-         }
-         modified_buffers = TRUE;
-      }
-
-      /* Almost done.  If we need to writemask, do it now, leaving the
-       * results in the fbS_reg register passed in.  If we don't need
-       * to writemask, then the results are *already* in the fbS_reg,
-       * so there's nothing more to do.
-       */
-
-      if (need_to_writemask_stencil_values && modified_buffers) {
-         /* The Select Bytes command makes a fine writemask.  Where
-          * the mask is 0, the first (original) values are retained,
-          * effectively masking out changes.  Where the mask is 1, the
-          * second (new) values are retained, incorporating changes.
-          */
-         spe_comment(f, 0, "Writemasking new stencil values");
-         spe_selb(f, fbS_reg, fbS_reg, newS_reg, stencil_writemask_reg);
-      }
-
-   } /* done calculating stencil values */
-
-   /* The stencil and/or depth values have been applied, and the
-    * mask_reg, fbS_reg, and fbZ_reg values have been updated.
-    * We're all done, except that we've allocated a fair number
-    * of registers that we didn't bother tracking.  Release all
-    * those registers as part of the register set, and go home.
-    */
-   spe_comment(f, 0, "Releasing stencil register set");
-   spe_release_register_set(f);
-
-   /* Return TRUE if we could have modified the stencil and/or
-    * depth buffers.
-    */
-   return modified_buffers;
-}
-
-
-/**
- * Generate depth and/or stencil test code.
- * \param cell  context
- * \param dsa  depth/stencil/alpha state
- * \param f  spe function to emit
- * \param facing  either CELL_FACING_FRONT or CELL_FACING_BACK
- * \param mask_reg  register containing the pixel alive/dead mask
- * \param depth_tile_reg  register containing address of z/stencil tile
- * \param quad_offset_reg  offset to quad from start of tile
- * \param fragZ_reg  register containg fragment Z values
- */
-static void
-gen_depth_stencil(struct cell_context *cell,
-                  const struct pipe_depth_stencil_alpha_state *dsa,
-                  const struct pipe_stencil_ref *stencil_ref,
-                  struct spe_function *f,
-                  uint facing,
-                  int mask_reg,
-                  int depth_tile_reg,
-                  int quad_offset_reg,
-                  int fragZ_reg)
-
-{
-   const enum pipe_format zs_format = cell->framebuffer.zsbuf->format;
-   boolean write_depth_stencil;
-
-   /* framebuffer's combined z/stencil values register */
-   int fbZS_reg = spe_allocate_available_register(f);
-
-   /* Framebufer Z values register */
-   int fbZ_reg = spe_allocate_available_register(f);
-
-   /* Framebuffer stencil values register (may not be used) */
-   int fbS_reg = spe_allocate_available_register(f);
-
-   /* 24-bit mask register (may not be used) */
-   int zmask_reg = spe_allocate_available_register(f);
-
-   /**
-    * The following code:
-    * 1. fetch quad of packed Z/S values from the framebuffer tile.
-    * 2. extract the separate the Z and S values from packed values
-    * 3. convert fragment Z values from float in [0,1] to 32/24/16-bit ints
-    *
-    * The instructions for doing this are interleaved for better performance.
-    */
-   spe_comment(f, 0, "Fetch Z/stencil quad from tile");
-
-   switch(zs_format) {
-   case PIPE_FORMAT_Z24_UNORM_S8_UINT: /* fall through */
-   case PIPE_FORMAT_Z24X8_UNORM:
-      /* prepare mask to extract Z vals from ZS vals */
-      spe_load_uint(f, zmask_reg, 0x00ffffff);
-
-      /* convert fragment Z from [0,1] to 32-bit ints */
-      spe_cfltu(f, fragZ_reg, fragZ_reg, 32);
-
-      /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */
-      spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg);
-
-      /* right shift 32-bit fragment Z to 24 bits */
-      spe_rotmi(f, fragZ_reg, fragZ_reg, -8);
-
-      /* extract 24-bit Z values from ZS values by masking */
-      spe_and(f, fbZ_reg, fbZS_reg, zmask_reg);
-
-      /* extract 8-bit stencil values by shifting */
-      spe_rotmi(f, fbS_reg, fbZS_reg, -24);
-      break;
-
-   case PIPE_FORMAT_S8_UINT_Z24_UNORM: /* fall through */
-   case PIPE_FORMAT_X8Z24_UNORM:
-      /* convert fragment Z from [0,1] to 32-bit ints */
-      spe_cfltu(f, fragZ_reg, fragZ_reg, 32);
-
-      /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */
-      spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg);
-
-      /* right shift 32-bit fragment Z to 24 bits */
-      spe_rotmi(f, fragZ_reg, fragZ_reg, -8);
-
-      /* extract 24-bit Z values from ZS values by shifting */
-      spe_rotmi(f, fbZ_reg, fbZS_reg, -8);
-
-      /* extract 8-bit stencil values by masking */
-      spe_and_uint(f, fbS_reg, fbZS_reg, 0x000000ff);
-      break;
-
-   case PIPE_FORMAT_Z32_UNORM:
-      /* Load: fbZ_reg = memory[depth_tile_reg + offset_reg] */
-      spe_lqx(f, fbZ_reg, depth_tile_reg, quad_offset_reg);
-
-      /* convert fragment Z from [0,1] to 32-bit ints */
-      spe_cfltu(f, fragZ_reg, fragZ_reg, 32);
-
-      /* No stencil, so can't do anything there */
-      break;
-
-   case PIPE_FORMAT_Z16_UNORM:
-      /* XXX This code for 16bpp Z is broken! */
-
-      /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */
-      spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg);
-
-      /* Copy over 4 32-bit values */
-      spe_move(f, fbZ_reg, fbZS_reg);
-
-      /* convert Z from [0,1] to 16-bit ints */
-      spe_cfltu(f, fragZ_reg, fragZ_reg, 32);
-      spe_rotmi(f, fragZ_reg, fragZ_reg, -16);
-      /* No stencil */
-      break;
-
-   default:
-      ASSERT(0); /* invalid format */
-   }
-
-   /* If stencil is enabled, use the stencil-specific code
-    * generator to generate both the stencil and depth (if needed)
-    * tests.  Otherwise, if only depth is enabled, generate
-    * a quick depth test.  The test generators themselves will
-    * report back whether the depth/stencil buffer has to be
-    * written back.
-    */
-   if (dsa->stencil[0].enabled) {
-      /* This will perform the stencil and depth tests, and update
-       * the mask_reg, fbZ_reg, and fbS_reg as required by the
-       * tests.
-       */
-      ASSERT(fbS_reg >= 0);
-      spe_comment(f, 0, "Perform stencil test");
-
-      /* Note that fbZ_reg may not be set on entry, if stenciling
-       * is enabled but there's no Z-buffer.  The 
-       * gen_stencil_depth_test() function must ignore the
-       * fbZ_reg register if depth is not enabled.
-       */
-      write_depth_stencil = gen_stencil_depth_test(f, dsa, stencil_ref, facing,
-                                                   mask_reg, fragZ_reg,
-                                                   fbZ_reg, fbS_reg);
-   }
-   else if (dsa->depth.enabled) {
-      int zmask_reg = spe_allocate_available_register(f);
-      ASSERT(fbZ_reg >= 0);
-      spe_comment(f, 0, "Perform depth test");
-      write_depth_stencil = gen_depth_test(f, dsa, mask_reg, fragZ_reg,
-                                           fbZ_reg, zmask_reg);
-      spe_release_register(f, zmask_reg);
-   }
-   else {
-      write_depth_stencil = FALSE;
-   }
-
-   if (write_depth_stencil) {
-      /* Merge latest Z and Stencil values into fbZS_reg.
-       * fbZ_reg has four Z vals in bits [23..0] or bits [15..0].
-       * fbS_reg has four 8-bit Z values in bits [7..0].
-       */
-      spe_comment(f, 0, "Store quad's depth/stencil values in tile");
-      if (zs_format == PIPE_FORMAT_Z24_UNORM_S8_UINT ||
-          zs_format == PIPE_FORMAT_Z24X8_UNORM) {
-         spe_shli(f, fbS_reg, fbS_reg, 24); /* fbS = fbS << 24 */
-         spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */
-      }
-      else if (zs_format == PIPE_FORMAT_S8_UINT_Z24_UNORM ||
-               zs_format == PIPE_FORMAT_X8Z24_UNORM) {
-         spe_shli(f, fbZ_reg, fbZ_reg, 8); /* fbZ = fbZ << 8 */
-         spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */
-      }
-      else if (zs_format == PIPE_FORMAT_Z32_UNORM) {
-         spe_move(f, fbZS_reg, fbZ_reg); /* fbZS = fbZ */
-      }
-      else if (zs_format == PIPE_FORMAT_Z16_UNORM) {
-         spe_move(f, fbZS_reg, fbZ_reg); /* fbZS = fbZ */
-      }
-      else if (zs_format == PIPE_FORMAT_S8_UINT) {
-         ASSERT(0);   /* XXX to do */
-      }
-      else {
-         ASSERT(0); /* bad zs_format */
-      }
-
-      /* Store: memory[depth_tile_reg + quad_offset_reg] = fbZS */
-      spe_stqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg);
-   }
-
-   /* Don't need these any more */
-   spe_release_register(f, fbZS_reg);
-   spe_release_register(f, fbZ_reg);
-   spe_release_register(f, fbS_reg);
-   spe_release_register(f, zmask_reg);
-}
-
-
-
-/**
- * Generate SPE code to implement the fragment operations (alpha test,
- * depth test, stencil test, blending, colormask, and final
- * framebuffer write) as specified by the current context state.
- *
- * Logically, this code will be called after running the fragment
- * shader.  But under some circumstances we could run some of this
- * code before the fragment shader to cull fragments/quads that are
- * totally occluded/discarded.
- *
- * XXX we only support PIPE_FORMAT_S8_UINT_Z24_UNORM z/stencil buffer right now.
- *
- * See the spu_default_fragment_ops() function to see how the per-fragment
- * operations would be done with ordinary C code.
- * The code we generate here though has no branches, is SIMD, etc and
- * should be much faster.
- *
- * \param cell  the rendering context (in)
- * \param facing whether the generated code is for front-facing or 
- *              back-facing fragments
- * \param f     the generated function (in/out); on input, the function
- *              must already have been initialized.  On exit, whatever
- *              instructions within the generated function have had
- *              the fragment ops appended.
- */
-void
-cell_gen_fragment_function(struct cell_context *cell,
-                           const uint facing,
-                           struct spe_function *f)
-{
-   const struct pipe_depth_stencil_alpha_state *dsa = cell->depth_stencil;
-   const struct pipe_stencil_ref *stencil_ref = &cell->stencil_ref;
-   const struct pipe_blend_state *blend = cell->blend;
-   const struct pipe_blend_color *blend_color = &cell->blend_color;
-   const enum pipe_format color_format = cell->framebuffer.cbufs[0]->format;
-
-   /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */
-   const int x_reg = 3;  /* uint */
-   const int y_reg = 4;  /* uint */
-   const int color_tile_reg = 5;  /* tile_t * */
-   const int depth_tile_reg = 6;  /* tile_t * */
-   const int fragZ_reg = 7;   /* vector float */
-   const int fragR_reg = 8;   /* vector float */
-   const int fragG_reg = 9;   /* vector float */
-   const int fragB_reg = 10;  /* vector float */
-   const int fragA_reg = 11;  /* vector float */
-   const int mask_reg = 12;   /* vector uint */
-
-   ASSERT(facing == CELL_FACING_FRONT || facing == CELL_FACING_BACK);
-
-   /* offset of quad from start of tile
-    * XXX assuming 4-byte pixels for color AND Z/stencil!!!!
-    */
-   int quad_offset_reg;
-
-   int fbRGBA_reg;  /**< framebuffer's RGBA colors for quad */
-
-   if (cell->debug_flags & CELL_DEBUG_ASM) {
-      spe_print_code(f, TRUE);
-      spe_indent(f, 8);
-      spe_comment(f, -4, facing == CELL_FACING_FRONT
-                  ? "Begin front-facing per-fragment ops"
-                  : "Begin back-facing per-fragment ops");
-   }
-
-   spe_allocate_register(f, x_reg);
-   spe_allocate_register(f, y_reg);
-   spe_allocate_register(f, color_tile_reg);
-   spe_allocate_register(f, depth_tile_reg);
-   spe_allocate_register(f, fragZ_reg);
-   spe_allocate_register(f, fragR_reg);
-   spe_allocate_register(f, fragG_reg);
-   spe_allocate_register(f, fragB_reg);
-   spe_allocate_register(f, fragA_reg);
-   spe_allocate_register(f, mask_reg);
-
-   quad_offset_reg = spe_allocate_available_register(f);
-   fbRGBA_reg = spe_allocate_available_register(f);
-
-   /* compute offset of quad from start of tile, in bytes */
-   {
-      int x2_reg = spe_allocate_available_register(f);
-      int y2_reg = spe_allocate_available_register(f);
-
-      ASSERT(TILE_SIZE == 32);
-
-      spe_comment(f, 0, "Compute quad offset within tile");
-      spe_rotmi(f, y2_reg, y_reg, -1);  /* y2 = y / 2 */
-      spe_rotmi(f, x2_reg, x_reg, -1);  /* x2 = x / 2 */
-      spe_shli(f, y2_reg, y2_reg, 4);   /* y2 *= 16 */
-      spe_a(f, quad_offset_reg, y2_reg, x2_reg);  /* offset = y2 + x2 */
-      spe_shli(f, quad_offset_reg, quad_offset_reg, 4);   /* offset *= 16 */
-
-      spe_release_register(f, x2_reg);
-      spe_release_register(f, y2_reg);
-   }
-
-   /* Generate the alpha test, if needed. */
-   if (dsa->alpha.enabled) {
-      gen_alpha_test(dsa, f, mask_reg, fragA_reg);
-   }
-
-   /* generate depth and/or stencil test code */
-   if (dsa->depth.enabled || dsa->stencil[0].enabled) {
-      gen_depth_stencil(cell, dsa, stencil_ref, f,
-                        facing,
-                        mask_reg,
-                        depth_tile_reg,
-                        quad_offset_reg,
-                        fragZ_reg);
-   }
-
-   /* Get framebuffer quad/colors.  We'll need these for blending,
-    * color masking, and to obey the quad/pixel mask.
-    * Load: fbRGBA_reg = memory[color_tile + quad_offset]
-    * Note: if mask={~0,~0,~0,~0} and we're not blending or colormasking
-    * we could skip this load.
-    */
-   spe_comment(f, 0, "Fetch quad colors from tile");
-   spe_lqx(f, fbRGBA_reg, color_tile_reg, quad_offset_reg);
-
-   if (blend->rt[0].blend_enable) {
-      spe_comment(f, 0, "Perform blending");
-      gen_blend(blend, blend_color, f, color_format,
-                fragR_reg, fragG_reg, fragB_reg, fragA_reg, fbRGBA_reg);
-   }
-
-   /*
-    * Write fragment colors to framebuffer/tile.
-    * This involves converting the fragment colors from float[4] to the
-    * tile's specific format and obeying the quad/pixel mask.
-    */
-   {
-      int rgba_reg = spe_allocate_available_register(f);
-
-      /* Pack four float colors as four 32-bit int colors */
-      spe_comment(f, 0, "Convert float quad colors to packed int framebuffer colors");
-      gen_pack_colors(f, color_format,
-                      fragR_reg, fragG_reg, fragB_reg, fragA_reg,
-                      rgba_reg);
-
-      if (blend->logicop_enable) {
-         spe_comment(f, 0, "Compute logic op");
-         gen_logicop(blend, f, rgba_reg, fbRGBA_reg);
-      }
-
-      if (blend->rt[0].colormask != PIPE_MASK_RGBA) {
-         spe_comment(f, 0, "Compute color mask");
-         gen_colormask(f, blend->rt[0].colormask, color_format, rgba_reg, fbRGBA_reg);
-      }
-
-      /* Mix fragment colors with framebuffer colors using the quad/pixel mask:
-       * if (mask[i])
-       *    rgba[i] = rgba[i];
-       * else
-       *    rgba[i] = framebuffer[i];
-       */
-      spe_selb(f, rgba_reg, fbRGBA_reg, rgba_reg, mask_reg);
-
-      /* Store updated quad in tile:
-       * memory[color_tile + quad_offset] = rgba_reg;
-       */
-      spe_comment(f, 0, "Store quad colors into color tile");
-      spe_stqx(f, rgba_reg, color_tile_reg, quad_offset_reg);
-
-      spe_release_register(f, rgba_reg);
-   }
-
-   //printf("gen_fragment_ops nr instructions: %u\n", f->num_inst);
-
-   spe_bi(f, SPE_REG_RA, 0, 0);  /* return from function call */
-
-   spe_release_register(f, fbRGBA_reg);
-   spe_release_register(f, quad_offset_reg);
-
-   if (cell->debug_flags & CELL_DEBUG_ASM) {
-      char buffer[1024];
-      sprintf(buffer, "End %s-facing per-fragment ops: %d instructions", 
-         facing == CELL_FACING_FRONT ? "front" : "back", f->num_inst);
-      spe_comment(f, -4, buffer);
-   }
-}
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.h b/src/gallium/drivers/cell/ppu/cell_gen_fragment.h
deleted file mode 100644
index 21b35d1fafe..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-
-#ifndef CELL_GEN_FRAGMENT_H
-#define CELL_GEN_FRAGMENT_H
-
-
-extern void
-cell_gen_fragment_function(struct cell_context *cell, const uint facing, struct spe_function *f);
-
-
-#endif /* CELL_GEN_FRAGMENT_H */
-
diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c
deleted file mode 100644
index 223adda48f0..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c
+++ /dev/null
@@ -1,473 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/* Authors:
- *  Keith Whitwell <keith@tungstengraphics.com>
- *  Brian Paul
- */
-
-#include "util/u_memory.h"
-#include "util/u_inlines.h"
-#include "draw/draw_context.h"
-#include "cell_context.h"
-#include "cell_flush.h"
-#include "cell_pipe_state.h"
-#include "cell_state.h"
-#include "cell_texture.h"
-
-
-
-static void *
-cell_create_blend_state(struct pipe_context *pipe,
-                        const struct pipe_blend_state *blend)
-{
-   return mem_dup(blend, sizeof(*blend));
-}
-
-
-static void
-cell_bind_blend_state(struct pipe_context *pipe, void *blend)
-{
-   struct cell_context *cell = cell_context(pipe);
-
-   draw_flush(cell->draw);
-
-   cell->blend = (struct pipe_blend_state *) blend;
-   cell->dirty |= CELL_NEW_BLEND;
-}
-
-
-static void
-cell_delete_blend_state(struct pipe_context *pipe, void *blend)
-{
-   FREE(blend);
-}
-
-
-static void
-cell_set_blend_color(struct pipe_context *pipe,
-                     const struct pipe_blend_color *blend_color)
-{
-   struct cell_context *cell = cell_context(pipe);
-
-   draw_flush(cell->draw);
-
-   cell->blend_color = *blend_color;
-
-   cell->dirty |= CELL_NEW_BLEND;
-}
-
-
-
-
-static void *
-cell_create_depth_stencil_alpha_state(struct pipe_context *pipe,
-                 const struct pipe_depth_stencil_alpha_state *dsa)
-{
-   return mem_dup(dsa, sizeof(*dsa));
-}
-
-
-static void
-cell_bind_depth_stencil_alpha_state(struct pipe_context *pipe,
-                                    void *dsa)
-{
-   struct cell_context *cell = cell_context(pipe);
-
-   draw_flush(cell->draw);
-
-   cell->depth_stencil = (struct pipe_depth_stencil_alpha_state *) dsa;
-   cell->dirty |= CELL_NEW_DEPTH_STENCIL;
-}
-
-
-static void
-cell_delete_depth_stencil_alpha_state(struct pipe_context *pipe, void *dsa)
-{
-   FREE(dsa);
-}
-
-
-static void
-cell_set_stencil_ref(struct pipe_context *pipe,
-                     const struct pipe_stencil_ref *stencil_ref)
-{
-   struct cell_context *cell = cell_context(pipe);
-
-   draw_flush(cell->draw);
-
-   cell->stencil_ref = *stencil_ref;
-
-   cell->dirty |= CELL_NEW_DEPTH_STENCIL;
-}
-
-
-static void
-cell_set_clip_state(struct pipe_context *pipe,
-                    const struct pipe_clip_state *clip)
-{
-   struct cell_context *cell = cell_context(pipe);
-
-   /* pass the clip state to the draw module */
-   draw_set_clip_state(cell->draw, clip);
-}
-
-
-static void
-cell_set_sample_mask(struct pipe_context *pipe,
-                     unsigned sample_mask)
-{
-}
-
-
-/* Called when driver state tracker notices changes to the viewport
- * matrix:
- */
-static void
-cell_set_viewport_state( struct pipe_context *pipe,
-                         const struct pipe_viewport_state *viewport )
-{
-   struct cell_context *cell = cell_context(pipe);
-
-   cell->viewport = *viewport; /* struct copy */
-   cell->dirty |= CELL_NEW_VIEWPORT;
-
-   /* pass the viewport info to the draw module */
-   draw_set_viewport_state(cell->draw, viewport);
-
-   /* Using tnl/ and vf/ modules is temporary while getting started.
-    * Full pipe will have vertex shader, vertex fetch of its own.
-    */
-}
-
-
-static void
-cell_set_scissor_state( struct pipe_context *pipe,
-                        const struct pipe_scissor_state *scissor )
-{
-   struct cell_context *cell = cell_context(pipe);
-
-   memcpy( &cell->scissor, scissor, sizeof(*scissor) );
-   cell->dirty |= CELL_NEW_SCISSOR;
-}
-
-
-static void
-cell_set_polygon_stipple( struct pipe_context *pipe,
-                          const struct pipe_poly_stipple *stipple )
-{
-   struct cell_context *cell = cell_context(pipe);
-
-   memcpy( &cell->poly_stipple, stipple, sizeof(*stipple) );
-   cell->dirty |= CELL_NEW_STIPPLE;
-}
-
-
-
-static void *
-cell_create_rasterizer_state(struct pipe_context *pipe,
-                             const struct pipe_rasterizer_state *rasterizer)
-{
-   return mem_dup(rasterizer, sizeof(*rasterizer));
-}
-
-
-static void
-cell_bind_rasterizer_state(struct pipe_context *pipe, void *rast)
-{
-   struct pipe_rasterizer_state *rasterizer =
-      (struct pipe_rasterizer_state *) rast;
-   struct cell_context *cell = cell_context(pipe);
-
-   /* pass-through to draw module */
-   draw_set_rasterizer_state(cell->draw, rasterizer, rast);
-
-   cell->rasterizer = rasterizer;
-
-   cell->dirty |= CELL_NEW_RASTERIZER;
-}
-
-
-static void
-cell_delete_rasterizer_state(struct pipe_context *pipe, void *rasterizer)
-{
-   FREE(rasterizer);
-}
-
-
-
-static void *
-cell_create_sampler_state(struct pipe_context *pipe,
-                          const struct pipe_sampler_state *sampler)
-{
-   return mem_dup(sampler, sizeof(*sampler));
-}
-
-
-static void
-cell_bind_sampler_states(struct pipe_context *pipe,
-                         unsigned num, void **samplers)
-{
-   struct cell_context *cell = cell_context(pipe);
-   uint i, changed = 0x0;
-
-   assert(num <= CELL_MAX_SAMPLERS);
-
-   draw_flush(cell->draw);
-
-   for (i = 0; i < CELL_MAX_SAMPLERS; i++) {
-      struct pipe_sampler_state *new_samp = i < num ? samplers[i] : NULL;
-      if (cell->sampler[i] != new_samp) {
-         cell->sampler[i] = new_samp;
-         changed |= (1 << i);
-      }
-   }
-
-   if (changed) {
-      cell->dirty |= CELL_NEW_SAMPLER;
-      cell->dirty_samplers |= changed;
-   }
-}
-
-
-static void
-cell_delete_sampler_state(struct pipe_context *pipe,
-                              void *sampler)
-{
-   FREE( sampler );
-}
-
-
-
-static void
-cell_set_fragment_sampler_views(struct pipe_context *pipe,
-                                unsigned num,
-                                struct pipe_sampler_view **views)
-{
-   struct cell_context *cell = cell_context(pipe);
-   uint i, changed = 0x0;
-
-   assert(num <= CELL_MAX_SAMPLERS);
-
-   for (i = 0; i < CELL_MAX_SAMPLERS; i++) {
-      struct pipe_sampler_view *new_view = i < num ? views[i] : NULL;
-      struct pipe_sampler_view *old_view = cell->fragment_sampler_views[i];
-
-      if (old_view != new_view) {
-         struct pipe_resource *new_tex = new_view ? new_view->texture : NULL;
-
-         pipe_sampler_view_reference(&cell->fragment_sampler_views[i],
-                                     new_view);
-         pipe_resource_reference((struct pipe_resource **) &cell->texture[i],
-                                (struct pipe_resource *) new_tex);
-
-         changed |= (1 << i);
-      }
-   }
-
-   cell->num_textures = num;
-
-   if (changed) {
-      cell->dirty |= CELL_NEW_TEXTURE;
-      cell->dirty_textures |= changed;
-   }
-}
-
-
-static struct pipe_sampler_view *
-cell_create_sampler_view(struct pipe_context *pipe,
-                         struct pipe_resource *texture,
-                         const struct pipe_sampler_view *templ)
-{
-   struct pipe_sampler_view *view = CALLOC_STRUCT(pipe_sampler_view);
-
-   if (view) {
-      *view = *templ;
-      view->reference.count = 1;
-      view->texture = NULL;
-      pipe_resource_reference(&view->texture, texture);
-      view->context = pipe;
-   }
-
-   return view;
-}
-
-
-static void
-cell_sampler_view_destroy(struct pipe_context *pipe,
-                          struct pipe_sampler_view *view)
-{
-   pipe_resource_reference(&view->texture, NULL);
-   FREE(view);
-}
-
-
-/**
- * Map color and z/stencil framebuffer surfaces.
- */
-static void
-cell_map_surfaces(struct cell_context *cell)
-{
-#if 0
-   struct pipe_screen *screen = cell->pipe.screen;
-#endif
-   uint i;
-
-   for (i = 0; i < 1; i++) {
-      struct pipe_surface *ps = cell->framebuffer.cbufs[i];
-      if (ps) {
-         struct cell_resource *ct = cell_resource(ps->texture);
-#if 0
-         cell->cbuf_map[i] = screen->buffer_map(screen,
-                                                ct->buffer,
-                                                (PIPE_BUFFER_USAGE_GPU_READ |
-                                                 PIPE_BUFFER_USAGE_GPU_WRITE));
-#else
-         cell->cbuf_map[i] = ct->data;
-#endif
-      }
-   }
-
-   {
-      struct pipe_surface *ps = cell->framebuffer.zsbuf;
-      if (ps) {
-         struct cell_resource *ct = cell_resource(ps->texture);
-#if 0
-         cell->zsbuf_map = screen->buffer_map(screen,
-                                              ct->buffer,
-                                              (PIPE_BUFFER_USAGE_GPU_READ |
-                                               PIPE_BUFFER_USAGE_GPU_WRITE));
-#else
-         cell->zsbuf_map = ct->data;
-#endif
-      }
-   }
-}
-
-
-/**
- * Unmap color and z/stencil framebuffer surfaces.
- */
-static void
-cell_unmap_surfaces(struct cell_context *cell)
-{
-   /*struct pipe_screen *screen = cell->pipe.screen;*/
-   uint i;
-
-   for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
-      struct pipe_surface *ps = cell->framebuffer.cbufs[i];
-      if (ps && cell->cbuf_map[i]) {
-         /*struct cell_resource *ct = cell_resource(ps->texture);*/
-         assert(ps->texture);
-         /*assert(ct->buffer);*/
-
-         /*screen->buffer_unmap(screen, ct->buffer);*/
-         cell->cbuf_map[i] = NULL;
-      }
-   }
-
-   {
-      struct pipe_surface *ps = cell->framebuffer.zsbuf;
-      if (ps && cell->zsbuf_map) {
-         /*struct cell_resource *ct = cell_resource(ps->texture);*/
-         /*screen->buffer_unmap(screen, ct->buffer);*/
-         cell->zsbuf_map = NULL;
-      }
-   }
-}
-
-
-static void
-cell_set_framebuffer_state(struct pipe_context *pipe,
-                           const struct pipe_framebuffer_state *fb)
-{
-   struct cell_context *cell = cell_context(pipe);
-
-   if (1 /*memcmp(&cell->framebuffer, fb, sizeof(*fb))*/) {
-      uint i;
-
-      /* unmap old surfaces */
-      cell_unmap_surfaces(cell);
-
-      /* Finish any pending rendering to the current surface before
-       * installing a new surface!
-       */
-      cell_flush_int(cell, CELL_FLUSH_WAIT);
-
-      /* update my state
-       * (this is also where old surfaces will finally get freed)
-       */
-      cell->framebuffer.width = fb->width;
-      cell->framebuffer.height = fb->height;
-      cell->framebuffer.nr_cbufs = fb->nr_cbufs;
-      for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
-         pipe_surface_reference(&cell->framebuffer.cbufs[i], fb->cbufs[i]);
-      }
-      pipe_surface_reference(&cell->framebuffer.zsbuf, fb->zsbuf);
-
-      /* map new surfaces */
-      cell_map_surfaces(cell);
-
-      cell->dirty |= CELL_NEW_FRAMEBUFFER;
-   }
-}
-
-
-void
-cell_init_state_functions(struct cell_context *cell)
-{
-   cell->pipe.create_blend_state = cell_create_blend_state;
-   cell->pipe.bind_blend_state   = cell_bind_blend_state;
-   cell->pipe.delete_blend_state = cell_delete_blend_state;
-
-   cell->pipe.create_sampler_state = cell_create_sampler_state;
-   cell->pipe.bind_fragment_sampler_states = cell_bind_sampler_states;
-   cell->pipe.delete_sampler_state = cell_delete_sampler_state;
-
-   cell->pipe.set_fragment_sampler_views = cell_set_fragment_sampler_views;
-   cell->pipe.create_sampler_view = cell_create_sampler_view;
-   cell->pipe.sampler_view_destroy = cell_sampler_view_destroy;
-
-   cell->pipe.create_depth_stencil_alpha_state = cell_create_depth_stencil_alpha_state;
-   cell->pipe.bind_depth_stencil_alpha_state   = cell_bind_depth_stencil_alpha_state;
-   cell->pipe.delete_depth_stencil_alpha_state = cell_delete_depth_stencil_alpha_state;
-
-   cell->pipe.create_rasterizer_state = cell_create_rasterizer_state;
-   cell->pipe.bind_rasterizer_state   = cell_bind_rasterizer_state;
-   cell->pipe.delete_rasterizer_state = cell_delete_rasterizer_state;
-
-   cell->pipe.set_blend_color = cell_set_blend_color;
-   cell->pipe.set_stencil_ref = cell_set_stencil_ref;
-   cell->pipe.set_clip_state = cell_set_clip_state;
-   cell->pipe.set_sample_mask = cell_set_sample_mask;
-
-   cell->pipe.set_framebuffer_state = cell_set_framebuffer_state;
-
-   cell->pipe.set_polygon_stipple = cell_set_polygon_stipple;
-   cell->pipe.set_scissor_state = cell_set_scissor_state;
-   cell->pipe.set_viewport_state = cell_set_viewport_state;
-}
diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.h b/src/gallium/drivers/cell/ppu/cell_pipe_state.h
deleted file mode 100644
index 1889bd52ff5..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_pipe_state.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-
-#ifndef CELL_PIPE_STATE_H
-#define CELL_PIPE_STATE_H
-
-
-struct cell_context;
-
-extern void
-cell_init_state_functions(struct cell_context *cell);
-
-
-#endif /* CELL_PIPE_STATE_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_public.h b/src/gallium/drivers/cell/ppu/cell_public.h
deleted file mode 100644
index 7e2e093565d..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_public.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef CELL_PUBLIC_H
-#define CELL_PUBLIC_H
-
-struct pipe_screen;
-struct sw_winsys;
-
-struct pipe_screen *
-cell_create_screen(struct sw_winsys *winsys);
-
-#endif
diff --git a/src/gallium/drivers/cell/ppu/cell_render.c b/src/gallium/drivers/cell/ppu/cell_render.c
deleted file mode 100644
index f648482c551..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_render.c
+++ /dev/null
@@ -1,211 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-/**
- * \brief  Last stage of 'draw' pipeline: send tris to SPUs.
- * \author  Brian Paul
- */
-
-#include "cell_context.h"
-#include "cell_render.h"
-#include "cell_spu.h"
-#include "util/u_memory.h"
-#include "draw/draw_private.h"
-
-
-struct render_stage {
-   struct draw_stage stage; /**< This must be first (base class) */
-
-   struct cell_context *cell;
-};
-
-
-static INLINE struct render_stage *
-render_stage(struct draw_stage *stage)
-{
-   return (struct render_stage *) stage;
-}
-
-
-static void render_begin( struct draw_stage *stage )
-{
-#if 0
-   struct render_stage *render = render_stage(stage);
-   struct cell_context *sp = render->cell;
-   const struct pipe_shader_state *fs = &render->cell->fs->shader;
-   render->quad.nr_attrs = render->cell->nr_frag_attrs;
-
-   render->firstFpInput = fs->input_semantic_name[0];
-
-   sp->quad.first->begin(sp->quad.first);
-#endif
-}
-
-
-static void render_end( struct draw_stage *stage )
-{
-}
-
-
-static void reset_stipple_counter( struct draw_stage *stage )
-{
-   struct render_stage *render = render_stage(stage);
-   /*render->cell->line_stipple_counter = 0;*/
-}
-
-
-static void
-render_point(struct draw_stage *stage, struct prim_header *prim)
-{
-}
-
-
-static void
-render_line(struct draw_stage *stage, struct prim_header *prim)
-{
-}
-
-
-/** Write a vertex into the prim buffer */
-static void
-save_vertex(struct cell_prim_buffer *buf, uint pos,
-            const struct vertex_header *vert)
-{
-   uint attr, j;
-
-   for (attr = 0; attr < 2; attr++) {
-      for (j = 0; j < 4; j++) {
-         buf->vertex[pos][attr][j] = vert->data[attr][j];
-      }
-   }
-
-   /* update bounding box */
-   if (vert->data[0][0] < buf->xmin)
-      buf->xmin = vert->data[0][0];
-   if (vert->data[0][0] > buf->xmax)
-      buf->xmax = vert->data[0][0];
-   if (vert->data[0][1] < buf->ymin)
-      buf->ymin = vert->data[0][1];
-   if (vert->data[0][1] > buf->ymax)
-      buf->ymax = vert->data[0][1];
-}
-
-
-static void
-render_tri(struct draw_stage *stage, struct prim_header *prim)
-{
-   struct render_stage *rs = render_stage(stage);
-   struct cell_context *cell = rs->cell;
-   struct cell_prim_buffer *buf = &cell->prim_buffer;
-   uint i;
-
-   if (buf->num_verts + 3 > CELL_MAX_VERTS) {
-      cell_flush_prim_buffer(cell);
-   }
-
-   i = buf->num_verts;
-   assert(i+2 <= CELL_MAX_VERTS);
-   save_vertex(buf, i+0, prim->v[0]);
-   save_vertex(buf, i+1, prim->v[1]);
-   save_vertex(buf, i+2, prim->v[2]);
-   buf->num_verts += 3;
-}
-
-
-/**
- * Send the a RENDER command to all SPUs to have them render the prims
- * in the current prim_buffer.
- */
-void
-cell_flush_prim_buffer(struct cell_context *cell)
-{
-   uint i;
-
-   if (cell->prim_buffer.num_verts == 0)
-      return;
-
-   for (i = 0; i < cell->num_spus; i++) {
-      struct cell_command_render *render = &cell_global.command[i].render;
-      render->prim_type = PIPE_PRIM_TRIANGLES;
-      render->num_verts = cell->prim_buffer.num_verts;
-      render->front_ccw = cell->rasterizer->front_ccw;
-      render->vertex_size = cell->vertex_info->size * 4;
-      render->xmin = cell->prim_buffer.xmin;
-      render->ymin = cell->prim_buffer.ymin;
-      render->xmax = cell->prim_buffer.xmax;
-      render->ymax = cell->prim_buffer.ymax;
-      render->vertex_data = &cell->prim_buffer.vertex;
-      ASSERT_ALIGN16(render->vertex_data);
-      send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_RENDER);
-   }
-
-   cell->prim_buffer.num_verts = 0;
-
-   cell->prim_buffer.xmin = 1e100;
-   cell->prim_buffer.ymin = 1e100;
-   cell->prim_buffer.xmax = -1e100;
-   cell->prim_buffer.ymax = -1e100;
-
-   /* XXX temporary, need to double-buffer the prim buffer until we get
-    * a real command buffer/list system.
-    */
-   cell_flush(&cell->pipe, 0x0);
-}
-
-
-
-static void render_destroy( struct draw_stage *stage )
-{
-   FREE( stage );
-}
-
-
-/**
- * Create a new draw/render stage.  This will be plugged into the
- * draw module as the last pipeline stage.
- */
-struct draw_stage *cell_draw_render_stage( struct cell_context *cell )
-{
-   struct render_stage *render = CALLOC_STRUCT(render_stage);
-
-   render->cell = cell;
-   render->stage.draw = cell->draw;
-   render->stage.begin = render_begin;
-   render->stage.point = render_point;
-   render->stage.line = render_line;
-   render->stage.tri = render_tri;
-   render->stage.end = render_end;
-   render->stage.reset_stipple_counter = reset_stipple_counter;
-   render->stage.destroy = render_destroy;
-
-   /*
-   render->quad.coef = render->coef;
-   render->quad.posCoef = &render->posCoef;
-   */
-
-   return &render->stage;
-}
diff --git a/src/gallium/drivers/cell/ppu/cell_render.h b/src/gallium/drivers/cell/ppu/cell_render.h
deleted file mode 100644
index 826dcbafeba..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_render.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#ifndef CELL_RENDER_H
-#define CELL_RENDER_H
-
-struct cell_context;
-struct draw_stage;
-
-extern void
-cell_flush_prim_buffer(struct cell_context *cell);
-
-extern struct draw_stage *cell_draw_render_stage( struct cell_context *cell );
-
-#endif /* CELL_RENDER_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_screen.c b/src/gallium/drivers/cell/ppu/cell_screen.c
deleted file mode 100644
index 7ffdcc51bbd..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_screen.c
+++ /dev/null
@@ -1,221 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-
-#include "util/u_memory.h"
-#include "util/u_simple_screen.h"
-#include "pipe/p_defines.h"
-#include "pipe/p_screen.h"
-
-#include "cell/common.h"
-#include "cell_context.h"
-#include "cell_screen.h"
-#include "cell_texture.h"
-#include "cell_public.h"
-
-#include "state_tracker/sw_winsys.h"
-
-
-static const char *
-cell_get_vendor(struct pipe_screen *screen)
-{
-   return "VMware, Inc.";
-}
-
-
-static const char *
-cell_get_name(struct pipe_screen *screen)
-{
-   return "Cell";
-}
-
-
-static int
-cell_get_param(struct pipe_screen *screen, enum pipe_cap param)
-{
-   switch (param) {
-   case PIPE_CAP_MAX_COMBINED_SAMPLERS:
-      return CELL_MAX_SAMPLERS;
-   case PIPE_CAP_NPOT_TEXTURES:
-      return 1;
-   case PIPE_CAP_TWO_SIDED_STENCIL:
-      return 1;
-   case PIPE_CAP_ANISOTROPIC_FILTER:
-      return 0;
-   case PIPE_CAP_POINT_SPRITE:
-      return 1;
-   case PIPE_CAP_MAX_RENDER_TARGETS:
-      return 1;
-   case PIPE_CAP_OCCLUSION_QUERY:
-      return 1;
-   case PIPE_CAP_TIMER_QUERY:
-      return 0;
-   case PIPE_CAP_TEXTURE_SHADOW_MAP:
-      return 10;
-   case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
-      return CELL_MAX_TEXTURE_LEVELS;
-   case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
-      return 8;  /* max 128x128x128 */
-   case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
-      return CELL_MAX_TEXTURE_LEVELS;
-   case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
-      return 0; /* XXX to do */
-   case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
-   case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
-      return 1;
-   case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
-   case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
-      return 0;
-   case PIPE_CAP_BLEND_EQUATION_SEPARATE:
-      return 1;
-   default:
-      return 0;
-   }
-}
-
-static int
-cell_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_shader_cap param)
-{
-   switch(shader)
-   {
-   case PIPE_SHADER_FRAGMENT:
-      switch (param) {
-      case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
-         return CELL_MAX_SAMPLERS;
-      default:
-         return tgsi_exec_get_shader_param(param);
-      }
-   case PIPE_SHADER_VERTEX:
-   case PIPE_SHADER_GEOMETRY:
-      return draw_get_shader_param(shader, param);
-   default:
-      return 0;
-   }
-}
-
-static float
-cell_get_paramf(struct pipe_screen *screen, enum pipe_capf param)
-{
-   switch (param) {
-   case PIPE_CAPF_MAX_LINE_WIDTH:
-      /* fall-through */
-   case PIPE_CAPF_MAX_LINE_WIDTH_AA:
-      return 255.0; /* arbitrary */
-
-   case PIPE_CAPF_MAX_POINT_WIDTH:
-      /* fall-through */
-   case PIPE_CAPF_MAX_POINT_WIDTH_AA:
-      return 255.0; /* arbitrary */
-
-   case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
-      return 0.0;
-
-   case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
-      return 16.0; /* arbitrary */
-
-   default:
-      return 0;
-   }
-}
-
-
-static boolean
-cell_is_format_supported( struct pipe_screen *screen,
-                          enum pipe_format format,
-                          enum pipe_texture_target target,
-                          unsigned sample_count,
-                          unsigned tex_usage)
-{
-   struct sw_winsys *winsys = cell_screen(screen)->winsys;
-
-   if (sample_count > 1)
-      return FALSE;
-
-   if (tex_usage & (PIPE_BIND_DISPLAY_TARGET |
-                    PIPE_BIND_SCANOUT |
-                    PIPE_BIND_SHARED)) {
-      if (!winsys->is_displaytarget_format_supported(winsys, tex_usage, format))
-         return FALSE;
-   }
-
-   /* only a few formats are known to work at this time */
-   switch (format) {
-   case PIPE_FORMAT_Z24_UNORM_S8_UINT:
-   case PIPE_FORMAT_Z24X8_UNORM:
-   case PIPE_FORMAT_B8G8R8A8_UNORM:
-   case PIPE_FORMAT_I8_UNORM:
-      return TRUE;
-   default:
-      return FALSE;
-   }
-}
-
-
-static void
-cell_destroy_screen( struct pipe_screen *screen )
-{
-   struct cell_screen *sp_screen = cell_screen(screen);
-   struct sw_winsys *winsys = sp_screen->winsys;
-
-   if(winsys->destroy)
-      winsys->destroy(winsys);
-
-   FREE(screen);
-}
-
-
-
-/**
- * Create a new pipe_screen object
- * Note: we're not presently subclassing pipe_screen (no cell_screen) but
- * that would be the place to put SPU thread/context info...
- */
-struct pipe_screen *
-cell_create_screen(struct sw_winsys *winsys)
-{
-   struct cell_screen *screen = CALLOC_STRUCT(cell_screen);
-
-   if (!screen)
-      return NULL;
-
-   screen->winsys = winsys;
-   screen->base.winsys = NULL;
-
-   screen->base.destroy = cell_destroy_screen;
-
-   screen->base.get_name = cell_get_name;
-   screen->base.get_vendor = cell_get_vendor;
-   screen->base.get_param = cell_get_param;
-   screen->base.get_shader_param = cell_get_shader_param;
-   screen->base.get_paramf = cell_get_paramf;
-   screen->base.is_format_supported = cell_is_format_supported;
-   screen->base.context_create = cell_create_context;
-
-   cell_init_screen_texture_funcs(&screen->base);
-
-   return &screen->base;
-}
diff --git a/src/gallium/drivers/cell/ppu/cell_screen.h b/src/gallium/drivers/cell/ppu/cell_screen.h
deleted file mode 100644
index baff9d3b7d4..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_screen.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-
-#ifndef CELL_SCREEN_H
-#define CELL_SCREEN_H
-
-
-#include "pipe/p_screen.h"
-
-struct sw_winsys;
-
-struct cell_screen {
-   struct pipe_screen base;
-
-   struct sw_winsys *winsys;
-
-   /* Increments whenever textures are modified.  Contexts can track
-    * this.
-    */
-   unsigned timestamp;          
-};
-
-static INLINE struct cell_screen *
-cell_screen( struct pipe_screen *pipe )
-{
-   return (struct cell_screen *)pipe;
-}
-
-
-#endif /* CELL_SCREEN_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_spu.c b/src/gallium/drivers/cell/ppu/cell_spu.c
deleted file mode 100644
index 39284f3a5d1..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_spu.c
+++ /dev/null
@@ -1,219 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-
-/**
- * Utility/wrappers for communicating with the SPUs.
- */
-
-
-#include <pthread.h>
-
-#include "cell_spu.h"
-#include "pipe/p_format.h"
-#include "pipe/p_state.h"
-#include "util/u_memory.h"
-#include "cell/common.h"
-
-
-/*
-helpful headers:
-/opt/ibm/cell-sdk/prototype/src/include/ppu/cbe_mfc.h
-*/
-
-
-/**
- * Cell/SPU info that's not per-context.
- */
-struct cell_global_info cell_global;
-
-
-/**
- * Scan /proc/cpuinfo to determine the timebase for the system.
- * This is used by the SPUs to convert 'decrementer' ticks to seconds.
- * There may be a better way to get this value...
- */
-static unsigned
-get_timebase(void)
-{
-   FILE *f = fopen("/proc/cpuinfo", "r");
-   unsigned timebase;
-
-   assert(f);
-   while (!feof(f)) {
-      char line[80];
-      fgets(line, sizeof(line), f);
-      if (strncmp(line, "timebase", 8) == 0) {
-         char *colon = strchr(line, ':');
-         if (colon) {
-            timebase = atoi(colon + 2);
-            break;
-         }
-      }
-   }
-   fclose(f);
-
-   return timebase;
-}
-
-
-/**
- * Write a 1-word message to the given SPE mailbox.
- */
-void
-send_mbox_message(spe_context_ptr_t ctx, unsigned int msg)
-{
-   spe_in_mbox_write(ctx, &msg, 1, SPE_MBOX_ALL_BLOCKING);
-}
-
-
-/**
- * Wait for a 1-word message to arrive in given mailbox.
- */
-uint
-wait_mbox_message(spe_context_ptr_t ctx)
-{
-   do {
-      unsigned data;
-      int count = spe_out_mbox_read(ctx, &data, 1);
-
-      if (count == 1) {
-	 return data;
-      }
-      
-      if (count < 0) {
-	 /* error */ ;
-      }
-   } while (1);
-}
-
-
-/**
- * Called by pthread_create() to spawn an SPU thread.
- */
-static void *
-cell_thread_function(void *arg)
-{
-   struct cell_init_info *init = (struct cell_init_info *) arg;
-   unsigned entry = SPE_DEFAULT_ENTRY;
-
-   ASSERT_ALIGN16(init);
-
-   if (spe_context_run(cell_global.spe_contexts[init->id], &entry, 0,
-                       init, NULL, NULL) < 0) {
-      fprintf(stderr, "spe_context_run() failed\n");
-      exit(1);
-   }
-
-   pthread_exit(NULL);
-}
-
-
-/**
- * Create the SPU threads.  This is done once during driver initialization.
- * This involves setting the "init" message which is sent to each SPU.
- * The init message specifies an SPU id, total number of SPUs, location
- * and number of batch buffers, etc.
- */
-void
-cell_start_spus(struct cell_context *cell)
-{
-   static boolean one_time_init = FALSE;
-   uint i, j;
-   uint timebase = get_timebase();
-
-   if (one_time_init) {
-      fprintf(stderr, "PPU: Multiple rendering contexts not yet supported "
-	      "on Cell.\n");
-      abort();
-   }
-
-   one_time_init = TRUE;
-
-   assert(cell->num_spus <= CELL_MAX_SPUS);
-
-   ASSERT_ALIGN16(&cell_global.inits[0]);
-   ASSERT_ALIGN16(&cell_global.inits[1]);
-
-   /*
-    * Initialize the global 'inits' structure for each SPU.
-    * A pointer to the init struct will be passed to each SPU.
-    * The SPUs will then each grab their init info with mfc_get().
-    */
-   for (i = 0; i < cell->num_spus; i++) {
-      cell_global.inits[i].id = i;
-      cell_global.inits[i].num_spus = cell->num_spus;
-      cell_global.inits[i].debug_flags = cell->debug_flags;
-      cell_global.inits[i].inv_timebase = 1000.0f / timebase;
-
-      for (j = 0; j < CELL_NUM_BUFFERS; j++) {
-         cell_global.inits[i].buffers[j] = cell->buffer[j];
-      }
-      cell_global.inits[i].buffer_status = &cell->buffer_status[0][0][0];
-
-      cell_global.inits[i].spu_functions = &cell->spu_functions;
-
-      cell_global.spe_contexts[i] = spe_context_create(0, NULL);
-      if (!cell_global.spe_contexts[i]) {
-         fprintf(stderr, "spe_context_create() failed\n");
-         exit(1);
-      }
-
-      if (spe_program_load(cell_global.spe_contexts[i], &g3d_spu)) {
-         fprintf(stderr, "spe_program_load() failed\n");
-         exit(1);
-      }
-      
-      pthread_create(&cell_global.spe_threads[i], /* returned thread handle */
-                     NULL,                        /* pthread attribs */
-                     &cell_thread_function,       /* start routine */
-		     &cell_global.inits[i]);      /* thread argument */
-   }
-}
-
-
-/**
- * Tell all the SPUs to stop/exit.
- * This is done when the driver's exiting / cleaning up.
- */
-void
-cell_spu_exit(struct cell_context *cell)
-{
-   uint i;
-
-   for (i = 0; i < cell->num_spus; i++) {
-      send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_EXIT);
-   }
-
-   /* wait for threads to exit */
-   for (i = 0; i < cell->num_spus; i++) {
-      void *value;
-      pthread_join(cell_global.spe_threads[i], &value);
-      cell_global.spe_threads[i] = 0;
-      cell_global.spe_contexts[i] = 0;
-   }
-}
diff --git a/src/gallium/drivers/cell/ppu/cell_spu.h b/src/gallium/drivers/cell/ppu/cell_spu.h
deleted file mode 100644
index c93958a9ed5..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_spu.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#ifndef CELL_SPU
-#define CELL_SPU
-
-
-#include <libspe2.h>
-#include <pthread.h>
-#include "cell/common.h"
-
-#include "cell_context.h"
-
-
-/**
- * Global vars, for now anyway.
- */
-struct cell_global_info
-{
-   /**
-    * SPU/SPE handles, etc
-    */
-   spe_context_ptr_t spe_contexts[CELL_MAX_SPUS];
-   pthread_t spe_threads[CELL_MAX_SPUS];
-
-   /**
-    * Data sent to SPUs at start-up
-    */
-   struct cell_init_info inits[CELL_MAX_SPUS];
-};
-
-
-extern struct cell_global_info cell_global;
-
-
-/** This is the handle for the actual SPE code */
-extern spe_program_handle_t g3d_spu;
-
-
-extern void
-send_mbox_message(spe_context_ptr_t ctx, unsigned int msg);
-
-extern uint
-wait_mbox_message(spe_context_ptr_t ctx);
-
-
-extern void
-cell_start_spus(struct cell_context *cell);
-
-
-extern void
-cell_spu_exit(struct cell_context *cell);
-
-
-#endif /* CELL_SPU */
diff --git a/src/gallium/drivers/cell/ppu/cell_state.h b/src/gallium/drivers/cell/ppu/cell_state.h
deleted file mode 100644
index 7adedcde57c..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_state.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-
-#ifndef CELL_STATE_H
-#define CELL_STATE_H
-
-
-#define CELL_NEW_VIEWPORT      0x1
-#define CELL_NEW_RASTERIZER    0x2
-#define CELL_NEW_FS            0x4
-#define CELL_NEW_BLEND         0x8
-#define CELL_NEW_CLIP          0x10
-#define CELL_NEW_SCISSOR       0x20
-#define CELL_NEW_STIPPLE       0x40
-#define CELL_NEW_FRAMEBUFFER   0x80
-#define CELL_NEW_ALPHA_TEST    0x100
-#define CELL_NEW_DEPTH_STENCIL 0x200
-#define CELL_NEW_SAMPLER       0x400
-#define CELL_NEW_TEXTURE       0x800
-#define CELL_NEW_VERTEX        0x1000
-#define CELL_NEW_VS            0x2000
-#define CELL_NEW_VS_CONSTANTS  0x4000
-#define CELL_NEW_FS_CONSTANTS  0x8000
-#define CELL_NEW_VERTEX_INFO   0x10000
-
-
-extern void
-cell_update_derived( struct cell_context *cell );
-
-
-extern void
-cell_init_shader_functions(struct cell_context *cell);
-
-
-extern void
-cell_init_vertex_functions(struct cell_context *cell);
-
-
-#endif /* CELL_STATE_H */
-
diff --git a/src/gallium/drivers/cell/ppu/cell_state_derived.c b/src/gallium/drivers/cell/ppu/cell_state_derived.c
deleted file mode 100644
index b723e794e71..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_state_derived.c
+++ /dev/null
@@ -1,170 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#include "util/u_memory.h"
-#include "pipe/p_shader_tokens.h"
-#include "draw/draw_context.h"
-#include "draw/draw_vertex.h"
-#include "cell_context.h"
-#include "cell_batch.h"
-#include "cell_state.h"
-#include "cell_state_emit.h"
-
-
-/**
- * Determine how to map vertex program outputs to fragment program inputs.
- * Basically, this will be used when computing the triangle interpolation
- * coefficients from the post-transform vertex attributes.
- */
-static void
-calculate_vertex_layout( struct cell_context *cell )
-{
-   const struct cell_fragment_shader_state *fs = cell->fs;
-   const enum interp_mode colorInterp
-      = cell->rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR;
-   struct vertex_info *vinfo = &cell->vertex_info;
-   uint i;
-   int src;
-
-#if 0
-   if (cell->vbuf) {
-      /* if using the post-transform vertex buffer, tell draw_vbuf to
-       * simply emit the whole post-xform vertex as-is:
-       */
-      struct vertex_info *vinfo_vbuf = &cell->vertex_info_vbuf;
-      vinfo_vbuf->num_attribs = 0;
-      draw_emit_vertex_attr(vinfo_vbuf, EMIT_ALL, INTERP_NONE, 0);
-      vinfo_vbuf->size = 4 * vs->num_outputs + sizeof(struct vertex_header)/4;
-   }
-#endif
-
-   /* reset vinfo */
-   vinfo->num_attribs = 0;
-
-   /* we always want to emit vertex pos */
-   src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_POSITION, 0);
-   assert(src >= 0);
-   draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_POS, src);
-
-
-   /*
-    * Loop over fragment shader inputs, searching for the matching output
-    * from the vertex shader.
-    */
-   for (i = 0; i < fs->info.num_inputs; i++) {
-      switch (fs->info.input_semantic_name[i]) {
-      case TGSI_SEMANTIC_POSITION:
-         /* already done above */
-         break;
-
-      case TGSI_SEMANTIC_COLOR:
-         src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_COLOR, 
-                                   fs->info.input_semantic_index[i]);
-         assert(src >= 0);
-         draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src);
-         break;
-
-      case TGSI_SEMANTIC_FOG:
-         src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_FOG, 0);
-#if 1
-         if (src < 0) /* XXX temp hack, try demos/fogcoord.c with this */
-            src = 0;
-#endif
-         assert(src >= 0);
-         draw_emit_vertex_attr(vinfo, EMIT_1F, INTERP_PERSPECTIVE, src);
-         break;
-
-      case TGSI_SEMANTIC_GENERIC:
-         /* this includes texcoords and varying vars */
-         src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_GENERIC,
-                              fs->info.input_semantic_index[i]);
-         assert(src >= 0);
-         draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
-         break;
-
-      default:
-         assert(0);
-      }
-   }
-
-   draw_compute_vertex_size(vinfo);
-
-   /* XXX only signal this if format really changes */
-   cell->dirty |= CELL_NEW_VERTEX_INFO;
-}
-
-
-#if 0
-/**
- * Recompute cliprect from scissor bounds, scissor enable and surface size.
- */
-static void
-compute_cliprect(struct cell_context *sp)
-{
-   uint surfWidth = sp->framebuffer.width;
-   uint surfHeight = sp->framebuffer.height;
-
-   if (sp->rasterizer->scissor) {
-      /* clip to scissor rect */
-      sp->cliprect.minx = MAX2(sp->scissor.minx, 0);
-      sp->cliprect.miny = MAX2(sp->scissor.miny, 0);
-      sp->cliprect.maxx = MIN2(sp->scissor.maxx, surfWidth);
-      sp->cliprect.maxy = MIN2(sp->scissor.maxy, surfHeight);
-   }
-   else {
-      /* clip to surface bounds */
-      sp->cliprect.minx = 0;
-      sp->cliprect.miny = 0;
-      sp->cliprect.maxx = surfWidth;
-      sp->cliprect.maxy = surfHeight;
-   }
-}
-#endif
-
-
-
-/**
- * Update derived state, send current state to SPUs prior to rendering.
- */
-void cell_update_derived( struct cell_context *cell )
-{
-   if (cell->dirty & (CELL_NEW_RASTERIZER |
-                      CELL_NEW_FS |
-                      CELL_NEW_VS))
-      calculate_vertex_layout( cell );
-
-#if 0
-   if (cell->dirty & (CELL_NEW_SCISSOR |
-                      CELL_NEW_DEPTH_STENCIL_ALPHA |
-                      CELL_NEW_FRAMEBUFFER))
-      compute_cliprect(cell);
-#endif
-
-   cell_emit_state(cell);
-
-   cell->dirty = 0;
-}
diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c
deleted file mode 100644
index bb11c68fa24..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_state_emit.c
+++ /dev/null
@@ -1,343 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#include "util/u_inlines.h"
-#include "util/u_memory.h"
-#include "util/u_math.h"
-#include "util/u_format.h"
-#include "cell_context.h"
-#include "cell_gen_fragment.h"
-#include "cell_state.h"
-#include "cell_state_emit.h"
-#include "cell_batch.h"
-#include "cell_texture.h"
-#include "draw/draw_context.h"
-#include "draw/draw_private.h"
-
-
-/**
- * Find/create a cell_command_fragment_ops object corresponding to the
- * current blend/stencil/z/colormask/etc. state.
- */
-static struct cell_command_fragment_ops *
-lookup_fragment_ops(struct cell_context *cell)
-{
-   struct cell_fragment_ops_key key;
-   struct cell_command_fragment_ops *ops;
-
-   /*
-    * Build key
-    */
-   memset(&key, 0, sizeof(key));
-   key.blend = *cell->blend;
-   key.blend_color = cell->blend_color;
-   key.dsa = *cell->depth_stencil;
-
-   if (cell->framebuffer.cbufs[0])
-      key.color_format = cell->framebuffer.cbufs[0]->format;
-   else
-      key.color_format = PIPE_FORMAT_NONE;
-
-   if (cell->framebuffer.zsbuf)
-      key.zs_format = cell->framebuffer.zsbuf->format;
-   else
-      key.zs_format = PIPE_FORMAT_NONE;
-
-   /*
-    * Look up key in cache.
-    */
-   ops = (struct cell_command_fragment_ops *)
-      util_keymap_lookup(cell->fragment_ops_cache, &key);
-
-   /*
-    * If not found, create/save new fragment ops command.
-    */
-   if (!ops) {
-      struct spe_function spe_code_front, spe_code_back;
-      unsigned int facing_dependent, total_code_size;
-
-      if (0)
-         debug_printf("**** Create New Fragment Ops\n");
-
-      /* Prepare the buffer that will hold the generated code.  The
-       * "0" passed in for the size means that the SPE code will
-       * use a default size.
-       */
-      spe_init_func(&spe_code_front, 0);
-      spe_init_func(&spe_code_back, 0);
-
-      /* Generate new code.  Always generate new code for both front-facing
-       * and back-facing fragments, even if it's the same code in both
-       * cases.
-       */
-      cell_gen_fragment_function(cell, CELL_FACING_FRONT, &spe_code_front);
-      cell_gen_fragment_function(cell, CELL_FACING_BACK, &spe_code_back);
-
-      /* Make sure the code is a multiple of 8 bytes long; this is
-       * required to ensure that the dual pipe instruction alignment
-       * is correct.  It's also important for the SPU unpacking,
-       * which assumes 8-byte boundaries.
-       */
-      unsigned int front_code_size = spe_code_size(&spe_code_front);
-      while (front_code_size % 8 != 0) {
-         spe_lnop(&spe_code_front);
-         front_code_size = spe_code_size(&spe_code_front);
-      }
-      unsigned int back_code_size = spe_code_size(&spe_code_back);
-      while (back_code_size % 8 != 0) {
-         spe_lnop(&spe_code_back);
-         back_code_size = spe_code_size(&spe_code_back);
-      }
-
-      /* Determine whether the code we generated is facing-dependent, by
-       * determining whether the generated code is different for the front-
-       * and back-facing fragments.
-       */
-      if (front_code_size == back_code_size && memcmp(spe_code_front.store, spe_code_back.store, front_code_size) == 0) {
-         /* Code is identical; only need one copy. */
-         facing_dependent = 0;
-         total_code_size = front_code_size;
-      }
-      else {
-         /* Code is different for front-facing and back-facing fragments.
-          * Need to send both copies.
-          */
-         facing_dependent = 1;
-         total_code_size = front_code_size + back_code_size;
-      }
-
-      /* alloc new fragment ops command.  Note that this structure
-       * has variant length based on the total code size required.
-       */
-      ops = CALLOC_VARIANT_LENGTH_STRUCT(cell_command_fragment_ops, total_code_size);
-      /* populate the new cell_command_fragment_ops object */
-      ops->opcode[0] = CELL_CMD_STATE_FRAGMENT_OPS;
-      ops->total_code_size = total_code_size;
-      ops->front_code_index = 0;
-      memcpy(ops->code, spe_code_front.store, front_code_size);
-      if (facing_dependent) {
-        /* We have separate front- and back-facing code.  Append the
-         * back-facing code to the buffer.  Be careful because the code
-         * size is in bytes, but the buffer is of unsigned elements.
-         */
-        ops->back_code_index = front_code_size / sizeof(spe_code_front.store[0]);
-        memcpy(ops->code + ops->back_code_index, spe_code_back.store, back_code_size);
-      }
-      else {
-        /* Use the same code for front- and back-facing fragments */
-        ops->back_code_index = ops->front_code_index;
-      }
-
-      /* Set the fields for the fallback case.  Note that these fields
-       * (and the whole fallback case) will eventually go away.
-       */
-      ops->dsa = *cell->depth_stencil;
-      ops->blend = *cell->blend;
-      ops->blend_color = cell->blend_color;
-
-      /* insert cell_command_fragment_ops object into keymap/cache */
-      util_keymap_insert(cell->fragment_ops_cache, &key, ops, NULL);
-
-      /* release rtasm buffer */
-      spe_release_func(&spe_code_front);
-      spe_release_func(&spe_code_back);
-   }
-   else {
-      if (0)
-         debug_printf("**** Re-use Fragment Ops\n");
-   }
-
-   return ops;
-}
-
-
-
-static void
-emit_state_cmd(struct cell_context *cell, uint cmd,
-               const void *state, uint state_size)
-{
-   uint32_t *dst = (uint32_t *) 
-       cell_batch_alloc16(cell, ROUNDUP16(sizeof(opcode_t) + state_size));
-   *dst = cmd;
-   memcpy(dst + 4, state, state_size);
-}
-
-
-/**
- * For state marked as 'dirty', construct a state-update command block
- * and insert it into the current batch buffer.
- */
-void
-cell_emit_state(struct cell_context *cell)
-{
-   if (cell->dirty & CELL_NEW_FRAMEBUFFER) {
-      struct pipe_surface *cbuf = cell->framebuffer.cbufs[0];
-      struct pipe_surface *zbuf = cell->framebuffer.zsbuf;
-      STATIC_ASSERT(sizeof(struct cell_command_framebuffer) % 16 == 0);
-      struct cell_command_framebuffer *fb
-         = cell_batch_alloc16(cell, sizeof(*fb));
-      fb->opcode[0] = CELL_CMD_STATE_FRAMEBUFFER;
-      fb->color_start = cell->cbuf_map[0];
-      fb->color_format = cbuf->format;
-      fb->depth_start = cell->zsbuf_map;
-      fb->depth_format = zbuf ? zbuf->format : PIPE_FORMAT_NONE;
-      fb->width = cell->framebuffer.width;
-      fb->height = cell->framebuffer.height;
-#if 0
-      printf("EMIT color format %s\n", util_format_name(fb->color_format));
-      printf("EMIT depth format %s\n", util_format_name(fb->depth_format));
-#endif
-   }
-
-   if (cell->dirty & (CELL_NEW_RASTERIZER)) {
-      STATIC_ASSERT(sizeof(struct cell_command_rasterizer) % 16 == 0);
-      struct cell_command_rasterizer *rast =
-         cell_batch_alloc16(cell, sizeof(*rast));
-      rast->opcode[0] = CELL_CMD_STATE_RASTERIZER;
-      rast->rasterizer = *cell->rasterizer;
-   }
-
-   if (cell->dirty & (CELL_NEW_FS)) {
-      /* Send new fragment program to SPUs */
-      STATIC_ASSERT(sizeof(struct cell_command_fragment_program) % 16 == 0);
-      struct cell_command_fragment_program *fp
-            = cell_batch_alloc16(cell, sizeof(*fp));
-      fp->opcode[0] = CELL_CMD_STATE_FRAGMENT_PROGRAM;
-      fp->num_inst = cell->fs->code.num_inst;
-      memcpy(&fp->code, cell->fs->code.store,
-             SPU_MAX_FRAGMENT_PROGRAM_INSTS * SPE_INST_SIZE);
-      if (0) {
-         int i;
-         printf("PPU Emit CELL_CMD_STATE_FRAGMENT_PROGRAM:\n");
-         for (i = 0; i < fp->num_inst; i++) {
-            printf(" %3d: 0x%08x\n", i, fp->code[i]);
-         }
-      }
-   }
-
-   if (cell->dirty & (CELL_NEW_FS_CONSTANTS)) {
-      const uint shader = PIPE_SHADER_FRAGMENT;
-      const uint num_const = cell->constants[shader]->width0 / sizeof(float);
-      uint i, j;
-      float *buf = cell_batch_alloc16(cell, ROUNDUP16(32 + num_const * sizeof(float)));
-      uint32_t *ibuf = (uint32_t *) buf;
-      const float *constants = cell->mapped_constants[shader];
-      ibuf[0] = CELL_CMD_STATE_FS_CONSTANTS;
-      ibuf[4] = num_const;
-      j = 8;
-      for (i = 0; i < num_const; i++) {
-         buf[j++] = constants[i];
-      }
-   }
-
-   if (cell->dirty & (CELL_NEW_FRAMEBUFFER |
-                      CELL_NEW_DEPTH_STENCIL |
-                      CELL_NEW_BLEND)) {
-      struct cell_command_fragment_ops *fops, *fops_cmd;
-      /* Note that cell_command_fragment_ops is a variant-sized record */
-      fops = lookup_fragment_ops(cell);
-      fops_cmd = cell_batch_alloc16(cell, ROUNDUP16(sizeof(*fops_cmd) + fops->total_code_size));
-      memcpy(fops_cmd, fops, sizeof(*fops) + fops->total_code_size);
-   }
-
-   if (cell->dirty & CELL_NEW_SAMPLER) {
-      uint i;
-      for (i = 0; i < CELL_MAX_SAMPLERS; i++) {
-         if (cell->dirty_samplers & (1 << i)) {
-            if (cell->sampler[i]) {
-               STATIC_ASSERT(sizeof(struct cell_command_sampler) % 16 == 0);
-               struct cell_command_sampler *sampler
-                  = cell_batch_alloc16(cell, sizeof(*sampler));
-               sampler->opcode[0] = CELL_CMD_STATE_SAMPLER;
-               sampler->unit = i;
-               sampler->state = *cell->sampler[i];
-            }
-         }
-      }
-      cell->dirty_samplers = 0x0;
-   }
-
-   if (cell->dirty & CELL_NEW_TEXTURE) {
-      uint i;
-      for (i = 0;i < CELL_MAX_SAMPLERS; i++) {
-         if (cell->dirty_textures & (1 << i)) {
-            STATIC_ASSERT(sizeof(struct cell_command_texture) % 16 == 0);
-            struct cell_command_texture *texture =
-               (struct cell_command_texture *)
-               cell_batch_alloc16(cell, sizeof(*texture));
-
-            texture->opcode[0] = CELL_CMD_STATE_TEXTURE;
-            texture->unit = i;
-            if (cell->texture[i]) {
-               struct cell_resource *ct = cell->texture[i];
-               uint level;
-               for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) {
-                  texture->start[level] = (ct->mapped +
-                                           ct->level_offset[level]);
-                  texture->width[level] = u_minify(ct->base.width0, level);
-                  texture->height[level] = u_minify(ct->base.height0, level);
-                  texture->depth[level] = u_minify(ct->base.depth0, level);
-               }
-               texture->target = ct->base.target;
-            }
-            else {
-               uint level;
-               for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) {
-                  texture->start[level] = NULL;
-                  texture->width[level] = 0;
-                  texture->height[level] = 0;
-                  texture->depth[level] = 0;
-               }
-               texture->target = 0;
-            }
-         }
-      }
-      cell->dirty_textures = 0x0;
-   }
-
-   if (cell->dirty & CELL_NEW_VERTEX_INFO) {
-      emit_state_cmd(cell, CELL_CMD_STATE_VERTEX_INFO,
-                     &cell->vertex_info, sizeof(struct vertex_info));
-   }
-
-#if 0
-   if (cell->dirty & CELL_NEW_VS) {
-      const struct draw_context *const draw = cell->draw;
-      struct cell_shader_info info;
-
-      info.num_outputs = draw_num_shader_outputs(draw);
-      info.declarations = (uintptr_t) draw->vs.machine.Declarations;
-      info.num_declarations = draw->vs.machine.NumDeclarations;
-      info.instructions = (uintptr_t) draw->vs.machine.Instructions;
-      info.num_instructions = draw->vs.machine.NumInstructions;
-      info.immediates = (uintptr_t) draw->vs.machine.Imms;
-      info.num_immediates = draw->vs.machine.ImmLimit / 4;
-
-      emit_state_cmd(cell, CELL_CMD_STATE_BIND_VS, &info, sizeof(info));
-   }
-#endif
-}
diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.h b/src/gallium/drivers/cell/ppu/cell_state_emit.h
deleted file mode 100644
index 59f8affe8d3..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_state_emit.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#ifndef CELL_STATE_EMIT_H
-#define CELL_STATE_EMIT_H
-
-
-extern void
-cell_emit_state(struct cell_context *cell);
-
-
-#endif /* CELL_STATE_EMIT_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c
deleted file mode 100644
index dc33e7ccc2c..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c
+++ /dev/null
@@ -1,1432 +0,0 @@
-/*
- * (C) Copyright IBM Corporation 2008
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
- * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * \file
- * Generate code to perform all per-fragment operations.
- *
- * Code generated by these functions perform both alpha, depth, and stencil
- * testing as well as alpha blending.
- *
- * \note
- * Occlusion query is not supported, but this is the right place to add that
- * support.
- *
- * \author Ian Romanick <idr@us.ibm.com>
- */
-
-#include "pipe/p_defines.h"
-#include "pipe/p_state.h"
-
-#include "cell_context.h"
-
-#include "rtasm/rtasm_ppc_spe.h"
-
-
-/**
- * Generate code to perform alpha testing.
- *
- * The code generated by this function uses the register specificed by
- * \c mask as both an input and an output.
- *
- * \param dsa    Current alpha-test state
- * \param f      Function to which code should be appended
- * \param mask   Index of register containing active fragment mask
- * \param alphas Index of register containing per-fragment alpha values
- *
- * \note Emits a maximum of 6 instructions.
- */
-static void
-emit_alpha_test(struct pipe_depth_stencil_alpha_state *dsa,
-                struct spe_function *f, int mask, int alphas)
-{
-   /* If the alpha function is either NEVER or ALWAYS, there is no need to
-    * load the reference value into a register.  ALWAYS is a fairly common
-    * case, and this optimization saves 2 instructions.
-    */
-   if (dsa->alpha.enabled
-       && (dsa->alpha.func != PIPE_FUNC_NEVER)
-       && (dsa->alpha.func != PIPE_FUNC_ALWAYS)) {
-      int ref = spe_allocate_available_register(f);
-      int tmp_a = spe_allocate_available_register(f);
-      int tmp_b = spe_allocate_available_register(f);
-      union {
-         float f;
-         unsigned u;
-      } ref_val;
-      boolean complement = FALSE;
-
-      ref_val.f = dsa->alpha.ref;
-
-      spe_il(f, ref, ref_val.u & 0x0000ffff);
-      spe_ilh(f, ref, ref_val.u >> 16);
-
-      switch (dsa->alpha.func) {
-      case PIPE_FUNC_NOTEQUAL:
-         complement = TRUE;
-         /* FALLTHROUGH */
-
-      case PIPE_FUNC_EQUAL:
-         spe_fceq(f, tmp_a, ref, alphas);
-         break;
-
-      case PIPE_FUNC_LEQUAL:
-         complement = TRUE;
-         /* FALLTHROUGH */
-
-      case PIPE_FUNC_GREATER:
-         spe_fcgt(f, tmp_a, ref, alphas);
-         break;
-
-      case PIPE_FUNC_LESS:
-         complement = TRUE;
-         /* FALLTHROUGH */
-
-      case PIPE_FUNC_GEQUAL:
-         spe_fcgt(f, tmp_a, ref, alphas);
-         spe_fceq(f, tmp_b, ref, alphas);
-         spe_or(f, tmp_a, tmp_b, tmp_a);
-         break;
-
-      case PIPE_FUNC_ALWAYS:
-      case PIPE_FUNC_NEVER:
-      default:
-         assert(0);
-         break;
-      }
-
-      if (complement) {
-         spe_andc(f, mask, mask, tmp_a);
-      } else {
-         spe_and(f, mask, mask, tmp_a);
-      }
-
-      spe_release_register(f, ref);
-      spe_release_register(f, tmp_a);
-      spe_release_register(f, tmp_b);
-   } else if (dsa->alpha.enabled && (dsa->alpha.func == PIPE_FUNC_NEVER)) {
-      spe_il(f, mask, 0);
-   }
-}
-
-
-/**
- * Generate code to perform Z testing.  Four Z values are tested at once.
- * \param dsa        Current depth-test state
- * \param f          Function to which code should be appended
- * \param mask       Index of register to contain depth-pass mask
- * \param stored     Index of register containing values from depth buffer
- * \param calculated Index of register containing per-fragment depth values
- *
- * \return
- * If the calculated depth comparison mask is the actual mask, \c FALSE is
- * returned.  If the calculated depth comparison mask is the compliment of
- * the actual mask, \c TRUE is returned.
- *
- * \note Emits a maximum of 3 instructions.
- */
-static boolean
-emit_depth_test(struct pipe_depth_stencil_alpha_state *dsa,
-                struct spe_function *f, int mask, int stored, int calculated)
-{
-   unsigned func = (dsa->depth.enabled)
-       ? dsa->depth.func : PIPE_FUNC_ALWAYS;
-   int tmp = spe_allocate_available_register(f);
-   boolean compliment = FALSE;
-
-   switch (func) {
-   case PIPE_FUNC_NEVER:
-      spe_il(f, mask, 0);
-      break;
-
-   case PIPE_FUNC_NOTEQUAL:
-      compliment = TRUE;
-      /* FALLTHROUGH */
-   case PIPE_FUNC_EQUAL:
-      spe_ceq(f, mask, calculated, stored);
-      break;
-
-   case PIPE_FUNC_LEQUAL:
-      compliment = TRUE;
-      /* FALLTHROUGH */
-   case PIPE_FUNC_GREATER:
-      spe_clgt(f, mask, calculated, stored);
-      break;
-
-   case PIPE_FUNC_LESS:
-      compliment = TRUE;
-      /* FALLTHROUGH */
-   case PIPE_FUNC_GEQUAL:
-      spe_clgt(f, mask, calculated, stored);
-      spe_ceq(f, tmp, calculated, stored);
-      spe_or(f, mask, mask, tmp);
-      break;
-
-   case PIPE_FUNC_ALWAYS:
-      spe_il(f, mask, ~0);
-      break;
-
-   default:
-      assert(0);
-      break;
-   }
-
-   spe_release_register(f, tmp);
-   return compliment;
-}
-
-
-/**
- * Generate code to apply the stencil operation (after testing).
- * \note Emits a maximum of 5 instructions.
- *
- * \warning
- * Since \c out and \c in might be the same register, this routine cannot
- * generate code that uses \c out as a temporary.
- */
-static void
-emit_stencil_op(struct spe_function *f,
-                int out, int in, int mask, unsigned op, unsigned ref)
-{
-   const int clamp = spe_allocate_available_register(f);
-   const int clamp_mask = spe_allocate_available_register(f);
-   const int result = spe_allocate_available_register(f);
-
-   switch(op) {
-   case PIPE_STENCIL_OP_KEEP:
-      assert(0);
-   case PIPE_STENCIL_OP_ZERO:
-      spe_il(f, result, 0);
-      break;
-   case PIPE_STENCIL_OP_REPLACE:
-      spe_il(f, result, ref);
-      break;
-   case PIPE_STENCIL_OP_INCR:
-      /* clamp = [0xff, 0xff, 0xff, 0xff] */
-      spe_il(f, clamp, 0x0ff);
-      /* result[i] = in[i] + 1 */
-      spe_ai(f, result, in, 1);
-      /* clamp_mask[i] = (result[i] > 0xff) */
-      spe_clgti(f, clamp_mask, result, 0x0ff);
-      /* result[i] = clamp_mask[i] ? clamp[i] : result[i] */
-      spe_selb(f, result, result, clamp, clamp_mask);
-      break;
-   case PIPE_STENCIL_OP_DECR:
-      spe_il(f, clamp, 0);
-      spe_ai(f, result, in, -1);
-
-      /* If "(s-1) < 0" in signed arithemtic, then "(s-1) > MAX" in unsigned
-       * arithmetic.
-       */
-      spe_clgti(f, clamp_mask, result, 0x0ff);
-      spe_selb(f, result, result, clamp, clamp_mask);
-      break;
-   case PIPE_STENCIL_OP_INCR_WRAP:
-      spe_ai(f, result, in, 1);
-      break;
-   case PIPE_STENCIL_OP_DECR_WRAP:
-      spe_ai(f, result, in, -1);
-      break;
-   case PIPE_STENCIL_OP_INVERT:
-      spe_nor(f, result, in, in);
-      break;
-   default:
-      assert(0);
-   }
-
-   spe_selb(f, out, in, result, mask);
-
-   spe_release_register(f, result);
-   spe_release_register(f, clamp_mask);
-   spe_release_register(f, clamp);
-}
-
-
-/**
- * Generate code to do stencil test.  Four pixels are tested at once.
- * \param dsa        Depth / stencil test state
- * \param face       0 for front face, 1 for back face
- * \param f          Function to append instructions to
- * \param mask       Register containing mask of fragments passing the
- *                   alpha test
- * \param depth_mask Register containing mask of fragments passing the
- *                   depth test
- * \param depth_compliment  Is \c depth_mask the compliment of the actual mask?
- * \param stencil    Register containing values from stencil buffer
- * \param depth_pass Register to store mask of fragments passing stencil test
- *                   and depth test
- *
- * \note
- * Emits a maximum of 10 + (3 * 5) = 25 instructions.
- */
-static int
-emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa,
-                  struct pipe_stencil_ref *sr,
-                  unsigned face,
-                  struct spe_function *f,
-                  int mask,
-                  int depth_mask,
-                  boolean depth_complement,
-                  int stencil,
-                  int depth_pass)
-{
-   int stencil_fail = spe_allocate_available_register(f);
-   int depth_fail = spe_allocate_available_register(f);
-   int stencil_mask = spe_allocate_available_register(f);
-   int stencil_pass = spe_allocate_available_register(f);
-   int face_stencil = spe_allocate_available_register(f);
-   int stencil_src = stencil;
-   const unsigned ref = (sr->ref_value[face]
-                         & dsa->stencil[face].valuemask);
-   boolean complement = FALSE;
-   int stored;
-   int tmp = spe_allocate_available_register(f);
-
-
-   if ((dsa->stencil[face].func != PIPE_FUNC_NEVER)
-       && (dsa->stencil[face].func != PIPE_FUNC_ALWAYS)
-       && (dsa->stencil[face].valuemask != 0x0ff)) {
-      stored = spe_allocate_available_register(f);
-      spe_andi(f, stored, stencil, dsa->stencil[face].valuemask);
-   } else {
-      stored = stencil;
-   }
-
-
-   switch (dsa->stencil[face].func) {
-   case PIPE_FUNC_NEVER:
-      spe_il(f, stencil_mask, 0);   /* stencil_mask[0..3] = [0,0,0,0] */
-      break;
-
-   case PIPE_FUNC_NOTEQUAL:
-      complement = TRUE;
-      /* FALLTHROUGH */
-   case PIPE_FUNC_EQUAL:
-      /* stencil_mask[i] = (stored[i] == ref) */
-      spe_ceqi(f, stencil_mask, stored, ref);
-      break;
-
-   case PIPE_FUNC_LEQUAL:
-      complement = TRUE;
-      /* FALLTHROUGH */
-   case PIPE_FUNC_GREATER:
-      complement = TRUE;
-      /* stencil_mask[i] = (stored[i] > ref) */
-      spe_clgti(f, stencil_mask, stored, ref);
-      break;
-
-   case PIPE_FUNC_LESS:
-      complement = TRUE;
-      /* FALLTHROUGH */
-   case PIPE_FUNC_GEQUAL:
-      /* stencil_mask[i] = (stored[i] > ref) */
-      spe_clgti(f, stencil_mask, stored, ref);
-      /* tmp[i] = (stored[i] == ref) */
-      spe_ceqi(f, tmp, stored, ref);
-      /* stencil_mask[i] = stencil_mask[i] | tmp[i] */
-      spe_or(f, stencil_mask, stencil_mask, tmp);
-      break;
-
-   case PIPE_FUNC_ALWAYS:
-      /* See comment below. */
-      break;
-
-   default:
-      assert(0);
-      break;
-   }
-
-   if (stored != stencil) {
-      spe_release_register(f, stored);
-   }
-   spe_release_register(f, tmp);
-
-
-   /* ALWAYS is a very common stencil-test, so some effort is applied to
-    * optimize that case.  The stencil-pass mask is the same as the input
-    * fragment mask.  This makes the stencil-test (above) a no-op, and the
-    * input fragment mask can be "renamed" the stencil-pass mask.
-    */
-   if (dsa->stencil[face].func == PIPE_FUNC_ALWAYS) {
-      spe_release_register(f, stencil_pass);
-      stencil_pass = mask;
-   } else {
-      if (complement) {
-         spe_andc(f, stencil_pass, mask, stencil_mask);
-      } else {
-         spe_and(f, stencil_pass, mask, stencil_mask);
-      }
-   }
-
-   if (depth_complement) {
-      spe_andc(f, depth_pass, stencil_pass, depth_mask);
-   } else {
-      spe_and(f, depth_pass, stencil_pass, depth_mask);
-   }
-
-
-   /* Conditionally emit code to update the stencil value under various
-    * condititons.  Note that there is no need to generate code under the
-    * following circumstances:
-    *
-    * - Stencil write mask is zero.
-    * - For stencil-fail if the stencil test is ALWAYS
-    * - For depth-fail if the stencil test is NEVER
-    * - For depth-pass if the stencil test is NEVER
-    * - Any of the 3 conditions if the operation is KEEP
-    */
-   if (dsa->stencil[face].writemask != 0) {
-      if ((dsa->stencil[face].func != PIPE_FUNC_ALWAYS)
-          && (dsa->stencil[face].fail_op != PIPE_STENCIL_OP_KEEP)) {
-         if (complement) {
-            spe_and(f, stencil_fail, mask, stencil_mask);
-         } else {
-            spe_andc(f, stencil_fail, mask, stencil_mask);
-         }
-
-         emit_stencil_op(f, face_stencil, stencil_src, stencil_fail,
-                         dsa->stencil[face].fail_op,
-                         sr->ref_value[face]);
-
-         stencil_src = face_stencil;
-      }
-
-      if ((dsa->stencil[face].func != PIPE_FUNC_NEVER)
-          && (dsa->stencil[face].zfail_op != PIPE_STENCIL_OP_KEEP)) {
-         if (depth_complement) {
-            spe_and(f, depth_fail, stencil_pass, depth_mask);
-         } else {
-            spe_andc(f, depth_fail, stencil_pass, depth_mask);
-         }
-
-         emit_stencil_op(f, face_stencil, stencil_src, depth_fail,
-                         dsa->stencil[face].zfail_op,
-                         sr->ref_value[face]);
-         stencil_src = face_stencil;
-      }
-
-      if ((dsa->stencil[face].func != PIPE_FUNC_NEVER)
-          && (dsa->stencil[face].zpass_op != PIPE_STENCIL_OP_KEEP)) {
-         emit_stencil_op(f, face_stencil, stencil_src, depth_pass,
-                         dsa->stencil[face].zpass_op,
-                         sr->ref_value[face]);
-         stencil_src = face_stencil;
-      }
-   }
-
-   spe_release_register(f, stencil_fail);
-   spe_release_register(f, depth_fail);
-   spe_release_register(f, stencil_mask);
-   if (stencil_pass != mask) {
-      spe_release_register(f, stencil_pass);
-   }
-
-   /* If all of the stencil operations were KEEP or the stencil write mask was
-    * zero, "stencil_src" will still be set to "stencil".  In this case
-    * release the "face_stencil" register.  Otherwise apply the stencil write
-    * mask to select bits from the calculated stencil value and the previous
-    * stencil value.
-    */
-   if (stencil_src == stencil) {
-      spe_release_register(f, face_stencil);
-   } else if (dsa->stencil[face].writemask != 0x0ff) {
-      int tmp = spe_allocate_available_register(f);
-
-      spe_il(f, tmp, dsa->stencil[face].writemask);
-      spe_selb(f, stencil_src, stencil, stencil_src, tmp);
-
-      spe_release_register(f, tmp);
-   }
-
-   return stencil_src;
-}
-
-
-void
-cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa,
-                                 struct pipe_stencil_ref *sr)
-{
-   struct pipe_depth_stencil_alpha_state *const dsa = &cdsa->base;
-   struct spe_function *const f = &cdsa->code;
-
-   /* This code generates a maximum of 6 (alpha test) + 3 (depth test)
-    * + 25 (front stencil) + 25 (back stencil) + 4 = 63 instructions.  Round
-    * up to 64 to make it a happy power-of-two.
-    */
-   spe_init_func(f, SPE_INST_SIZE * 64);
-
-
-   /* Allocate registers for the function's input parameters.  Cleverly (and
-    * clever code is usually dangerous, but I couldn't resist) the generated
-    * function returns a structure.  Returned structures start with register
-    * 3, and the structure fields are ordered to match up exactly with the
-    * input parameters.
-    */
-   int mask = spe_allocate_register(f, 3);
-   int depth = spe_allocate_register(f, 4);
-   int stencil = spe_allocate_register(f, 5);
-   int zvals = spe_allocate_register(f, 6);
-   int frag_a = spe_allocate_register(f, 7);
-   int facing = spe_allocate_register(f, 8);
-
-   int depth_mask = spe_allocate_available_register(f);
-
-   boolean depth_complement;
-
-
-   emit_alpha_test(dsa, f, mask, frag_a);
-
-   depth_complement = emit_depth_test(dsa, f, depth_mask, depth, zvals);
-
-   if (dsa->stencil[0].enabled) {
-      const int front_depth_pass = spe_allocate_available_register(f);
-      int front_stencil = emit_stencil_test(dsa, sr, 0, f, mask,
-                                            depth_mask, depth_complement,
-                                            stencil, front_depth_pass);
-
-      if (dsa->stencil[1].enabled) {
-         const int back_depth_pass = spe_allocate_available_register(f);
-         int back_stencil = emit_stencil_test(dsa, sr, 1, f, mask,
-                                              depth_mask,  depth_complement,
-                                              stencil, back_depth_pass);
-
-         /* If the front facing stencil value and the back facing stencil
-          * value are stored in the same register, there is no need to select
-          * a value based on the facing.  This can happen if the stencil value
-          * was not modified due to the write masks being zero, the stencil
-          * operations being KEEP, etc.
-          */
-         if (front_stencil != back_stencil) {
-            spe_selb(f, stencil, back_stencil, front_stencil, facing);
-         }
-
-         if (back_stencil != stencil) {
-            spe_release_register(f, back_stencil);
-         }
-
-         if (front_stencil != stencil) {
-            spe_release_register(f, front_stencil);
-         }
-
-         spe_selb(f, mask, back_depth_pass, front_depth_pass, facing);
-
-         spe_release_register(f, back_depth_pass);
-      } else {
-         if (front_stencil != stencil) {
-            spe_or(f, stencil, front_stencil, front_stencil);
-            spe_release_register(f, front_stencil);
-         }
-         spe_or(f, mask, front_depth_pass, front_depth_pass);
-      }
-
-      spe_release_register(f, front_depth_pass);
-   } else if (dsa->depth.enabled) {
-      if (depth_complement) {
-         spe_andc(f, mask, mask, depth_mask);
-      } else {
-         spe_and(f, mask, mask, depth_mask);
-      }
-   }
-
-   if (dsa->depth.writemask) {
-         spe_selb(f, depth, depth, zvals, mask);
-   }
-
-   spe_bi(f, 0, 0, 0);  /* return from function call */
-
-
-#if 0
-   {
-      const uint32_t *p = f->store;
-      unsigned i;
-
-      printf("# alpha (%sabled)\n",
-             (dsa->alpha.enabled) ? "en" : "dis");
-      printf("#    func: %u\n", dsa->alpha.func);
-      printf("#    ref: %.2f\n", dsa->alpha.ref);
-
-      printf("# depth (%sabled)\n",
-             (dsa->depth.enabled) ? "en" : "dis");
-      printf("#    func: %u\n", dsa->depth.func);
-
-      for (i = 0; i < 2; i++) {
-         printf("# %s stencil (%sabled)\n",
-                (i == 0) ? "front" : "back",
-                (dsa->stencil[i].enabled) ? "en" : "dis");
-
-         printf("#    func: %u\n", dsa->stencil[i].func);
-         printf("#    op (sf, zf, zp): %u %u %u\n",
-                dsa->stencil[i].fail_op,
-                dsa->stencil[i].zfail_op,
-                dsa->stencil[i].zpass_op);
-         printf("#    ref value / value mask / write mask: %02x %02x %02x\n",
-                sr->ref_value[i],
-                dsa->stencil[i].valuemask,
-                dsa->stencil[i].writemask);
-      }
-
-      printf("\t.text\n");
-      for (/* empty */; p < f->csr; p++) {
-         printf("\t.long\t0x%04x\n", *p);
-      }
-      fflush(stdout);
-   }
-#endif
-}
-
-
-/**
- * \note Emits a maximum of 3 instructions
- */
-static int
-emit_alpha_factor_calculation(struct spe_function *f,
-                              unsigned factor,
-                              int src_alpha, int dst_alpha, int const_alpha)
-{
-   int factor_reg;
-   int tmp;
-
-
-   switch (factor) {
-   case PIPE_BLENDFACTOR_ONE:
-      factor_reg = -1;
-      break;
-
-   case PIPE_BLENDFACTOR_SRC_ALPHA:
-      factor_reg = spe_allocate_available_register(f);
-
-      spe_or(f, factor_reg, src_alpha, src_alpha);
-      break;
-
-   case PIPE_BLENDFACTOR_DST_ALPHA:
-      factor_reg = dst_alpha;
-      break;
-
-   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
-      factor_reg = -1;
-      break;
-
-   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
-      factor_reg = spe_allocate_available_register(f);
-
-      tmp = spe_allocate_available_register(f);
-      spe_il(f, tmp, 1);
-      spe_cuflt(f, tmp, tmp, 0);
-      spe_fs(f, factor_reg, tmp, const_alpha);
-      spe_release_register(f, tmp);
-      break;
-
-   case PIPE_BLENDFACTOR_CONST_ALPHA:
-      factor_reg = const_alpha;
-      break;
-
-   case PIPE_BLENDFACTOR_ZERO:
-      factor_reg = -1;
-      break;
-
-   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
-      tmp = spe_allocate_available_register(f);
-      factor_reg = spe_allocate_available_register(f);
-
-      spe_il(f, tmp, 1);
-      spe_cuflt(f, tmp, tmp, 0);
-      spe_fs(f, factor_reg, tmp, src_alpha);
-
-      spe_release_register(f, tmp);
-      break;
-
-   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
-      tmp = spe_allocate_available_register(f);
-      factor_reg = spe_allocate_available_register(f);
-
-      spe_il(f, tmp, 1);
-      spe_cuflt(f, tmp, tmp, 0);
-      spe_fs(f, factor_reg, tmp, dst_alpha);
-
-      spe_release_register(f, tmp);
-      break;
-
-   case PIPE_BLENDFACTOR_SRC1_ALPHA:
-   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
-   default:
-      assert(0);
-      factor_reg = -1;
-      break;
-   }
-
-   return factor_reg;
-}
-
-
-/**
- * \note Emits a maximum of 6 instructions
- */
-static void
-emit_color_factor_calculation(struct spe_function *f,
-                              unsigned sF, unsigned mask,
-                              const int *src,
-                              const int *dst,
-                              const int *const_color,
-                              int *factor)
-{
-   int tmp;
-   unsigned i;
-
-
-   factor[0] = -1;
-   factor[1] = -1;
-   factor[2] = -1;
-   factor[3] = -1;
-
-   switch (sF) {
-   case PIPE_BLENDFACTOR_ONE:
-      break;
-
-   case PIPE_BLENDFACTOR_SRC_COLOR:
-      for (i = 0; i < 3; ++i) {
-         if ((mask & (1U << i)) != 0) {
-            factor[i] = spe_allocate_available_register(f);
-            spe_or(f, factor[i], src[i], src[i]);
-         }
-      }
-      break;
-
-   case PIPE_BLENDFACTOR_SRC_ALPHA:
-      factor[0] = spe_allocate_available_register(f);
-      factor[1] = factor[0];
-      factor[2] = factor[0];
-
-      spe_or(f, factor[0], src[3], src[3]);
-      break;
-
-   case PIPE_BLENDFACTOR_DST_ALPHA:
-      factor[0] = dst[3];
-      factor[1] = dst[3];
-      factor[2] = dst[3];
-      break;
-
-   case PIPE_BLENDFACTOR_DST_COLOR:
-      factor[0] = dst[0];
-      factor[1] = dst[1];
-      factor[2] = dst[2];
-      break;
-
-   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
-      tmp = spe_allocate_available_register(f);
-      factor[0] = spe_allocate_available_register(f);
-      factor[1] = factor[0];
-      factor[2] = factor[0];
-
-      /* Alpha saturate means min(As, 1-Ad).
-       */
-      spe_il(f, tmp, 1);
-      spe_cuflt(f, tmp, tmp, 0);
-      spe_fs(f, tmp, tmp, dst[3]);
-      spe_fcgt(f, factor[0], tmp, src[3]);
-      spe_selb(f, factor[0], src[3], tmp, factor[0]);
-
-      spe_release_register(f, tmp);
-      break;
-
-   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
-      tmp = spe_allocate_available_register(f);
-      spe_il(f, tmp, 1);
-      spe_cuflt(f, tmp, tmp, 0);
-
-      for (i = 0; i < 3; i++) {
-         factor[i] = spe_allocate_available_register(f);
-
-         spe_fs(f, factor[i], tmp, const_color[i]);
-      }
-      spe_release_register(f, tmp);
-      break;
-
-   case PIPE_BLENDFACTOR_CONST_COLOR:
-      for (i = 0; i < 3; i++) {
-         factor[i] = const_color[i];
-      }
-      break;
-
-   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
-      factor[0] = spe_allocate_available_register(f);
-      factor[1] = factor[0];
-      factor[2] = factor[0];
-
-      tmp = spe_allocate_available_register(f);
-      spe_il(f, tmp, 1);
-      spe_cuflt(f, tmp, tmp, 0);
-      spe_fs(f, factor[0], tmp, const_color[3]);
-      spe_release_register(f, tmp);
-      break;
-
-   case PIPE_BLENDFACTOR_CONST_ALPHA:
-      factor[0] = const_color[3];
-      factor[1] = factor[0];
-      factor[2] = factor[0];
-      break;
-
-   case PIPE_BLENDFACTOR_ZERO:
-      break;
-
-   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
-      tmp = spe_allocate_available_register(f);
-
-      spe_il(f, tmp, 1);
-      spe_cuflt(f, tmp, tmp, 0);
-
-      for (i = 0; i < 3; ++i) {
-         if ((mask & (1U << i)) != 0) {
-            factor[i] = spe_allocate_available_register(f);
-            spe_fs(f, factor[i], tmp, src[i]);
-         }
-      }
-
-      spe_release_register(f, tmp);
-      break;
-
-   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
-      tmp = spe_allocate_available_register(f);
-      factor[0] = spe_allocate_available_register(f);
-      factor[1] = factor[0];
-      factor[2] = factor[0];
-
-      spe_il(f, tmp, 1);
-      spe_cuflt(f, tmp, tmp, 0);
-      spe_fs(f, factor[0], tmp, src[3]);
-
-      spe_release_register(f, tmp);
-      break;
-
-   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
-      tmp = spe_allocate_available_register(f);
-      factor[0] = spe_allocate_available_register(f);
-      factor[1] = factor[0];
-      factor[2] = factor[0];
-
-      spe_il(f, tmp, 1);
-      spe_cuflt(f, tmp, tmp, 0);
-      spe_fs(f, factor[0], tmp, dst[3]);
-
-      spe_release_register(f, tmp);
-      break;
-
-   case PIPE_BLENDFACTOR_INV_DST_COLOR:
-      tmp = spe_allocate_available_register(f);
-
-      spe_il(f, tmp, 1);
-      spe_cuflt(f, tmp, tmp, 0);
-
-      for (i = 0; i < 3; ++i) {
-         if ((mask & (1U << i)) != 0) {
-            factor[i] = spe_allocate_available_register(f);
-            spe_fs(f, factor[i], tmp, dst[i]);
-         }
-      }
-
-      spe_release_register(f, tmp);
-      break;
-
-   case PIPE_BLENDFACTOR_SRC1_COLOR:
-   case PIPE_BLENDFACTOR_SRC1_ALPHA:
-   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
-   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
-   default:
-      assert(0);
-   }
-}
-
-
-static void
-emit_blend_calculation(struct spe_function *f,
-                       unsigned func, unsigned sF, unsigned dF,
-                       int src, int src_factor, int dst, int dst_factor)
-{
-   int tmp = spe_allocate_available_register(f);
-
-   switch (func) {
-   case PIPE_BLEND_ADD:
-      if (sF == PIPE_BLENDFACTOR_ONE) {
-         if (dF == PIPE_BLENDFACTOR_ZERO) {
-            /* Do nothing. */
-         } else if (dF == PIPE_BLENDFACTOR_ONE) {
-            spe_fa(f, src, src, dst);
-         }
-      } else if (sF == PIPE_BLENDFACTOR_ZERO) {
-         if (dF == PIPE_BLENDFACTOR_ZERO) {
-            spe_il(f, src, 0);
-         } else if (dF == PIPE_BLENDFACTOR_ONE) {
-            spe_or(f, src, dst, dst);
-         } else {
-            spe_fm(f, src, dst, dst_factor);
-         }
-      } else if (dF == PIPE_BLENDFACTOR_ZERO) {
-         spe_fm(f, src, src, src_factor);
-      } else {
-         spe_fm(f, tmp, dst, dst_factor);
-         spe_fma(f, src, src, src_factor, tmp);
-      }
-      break;
-
-   case PIPE_BLEND_SUBTRACT:
-      if (sF == PIPE_BLENDFACTOR_ONE) {
-         if (dF == PIPE_BLENDFACTOR_ZERO) {
-            /* Do nothing. */
-         } else if (dF == PIPE_BLENDFACTOR_ONE) {
-            spe_fs(f, src, src, dst);
-         }
-      } else if (sF == PIPE_BLENDFACTOR_ZERO) {
-         if (dF == PIPE_BLENDFACTOR_ZERO) {
-            spe_il(f, src, 0);
-         } else if (dF == PIPE_BLENDFACTOR_ONE) {
-            spe_il(f, tmp, 0);
-            spe_fs(f, src, tmp, dst);
-         } else {
-            spe_fm(f, src, dst, dst_factor);
-         }
-      } else if (dF == PIPE_BLENDFACTOR_ZERO) {
-         spe_fm(f, src, src, src_factor);
-      } else {
-         spe_fm(f, tmp, dst, dst_factor);
-         spe_fms(f, src, src, src_factor, tmp);
-      }
-      break;
-
-   case PIPE_BLEND_REVERSE_SUBTRACT:
-      if (sF == PIPE_BLENDFACTOR_ONE) {
-         if (dF == PIPE_BLENDFACTOR_ZERO) {
-            spe_il(f, tmp, 0);
-            spe_fs(f, src, tmp, src);
-         } else if (dF == PIPE_BLENDFACTOR_ONE) {
-            spe_fs(f, src, dst, src);
-         }
-      } else if (sF == PIPE_BLENDFACTOR_ZERO) {
-         if (dF == PIPE_BLENDFACTOR_ZERO) {
-            spe_il(f, src, 0);
-         } else if (dF == PIPE_BLENDFACTOR_ONE) {
-            spe_or(f, src, dst, dst);
-         } else {
-            spe_fm(f, src, dst, dst_factor);
-         }
-      } else if (dF == PIPE_BLENDFACTOR_ZERO) {
-         spe_fm(f, src, src, src_factor);
-      } else {
-         spe_fm(f, tmp, src, src_factor);
-         spe_fms(f, src, src, dst_factor, tmp);
-      }
-      break;
-
-   case PIPE_BLEND_MIN:
-      spe_cgt(f, tmp, src, dst);
-      spe_selb(f, src, src, dst, tmp);
-      break;
-
-   case PIPE_BLEND_MAX:
-      spe_cgt(f, tmp, src, dst);
-      spe_selb(f, src, dst, src, tmp);
-      break;
-
-   default:
-      assert(0);
-   }
-
-   spe_release_register(f, tmp);
-}
-
-
-/**
- * Generate code to perform alpha blending on the SPE
- */
-void
-cell_generate_alpha_blend(struct cell_blend_state *cb)
-{
-   struct pipe_blend_state *const b = &cb->base;
-   struct spe_function *const f = &cb->code;
-
-   /* This code generates a maximum of 3 (source alpha factor)
-    * + 3 (destination alpha factor) + (3 * 6) (source color factor)
-    * + (3 * 6) (destination color factor) + (4 * 2) (blend equation)
-    * + 4 (fragment mask) + 1 (return) = 55 instlructions.  Round up to 64 to
-    * make it a happy power-of-two.
-    */
-   spe_init_func(f, SPE_INST_SIZE * 64);
-
-
-   const int frag[4] = {
-      spe_allocate_register(f, 3),
-      spe_allocate_register(f, 4),
-      spe_allocate_register(f, 5),
-      spe_allocate_register(f, 6),
-   };
-   const int pixel[4] = {
-      spe_allocate_register(f, 7),
-      spe_allocate_register(f, 8),
-      spe_allocate_register(f, 9),
-      spe_allocate_register(f, 10),
-   };
-   const int const_color[4] = {
-      spe_allocate_register(f, 11),
-      spe_allocate_register(f, 12),
-      spe_allocate_register(f, 13),
-      spe_allocate_register(f, 14),
-   };
-   unsigned func[4];
-   unsigned sF[4];
-   unsigned dF[4];
-   unsigned i;
-   int src_factor[4];
-   int dst_factor[4];
-
-
-   /* Does the selected blend mode make use of the source / destination
-    * color (RGB) blend factors?
-    */
-   boolean need_color_factor = b->rt[0].blend_enable
-       && (b->rt[0].rgb_func != PIPE_BLEND_MIN)
-       && (b->rt[0].rgb_func != PIPE_BLEND_MAX);
-
-   /* Does the selected blend mode make use of the source / destination
-    * alpha blend factors?
-    */
-   boolean need_alpha_factor = b->rt[0].blend_enable
-       && (b->rt[0].alpha_func != PIPE_BLEND_MIN)
-       && (b->rt[0].alpha_func != PIPE_BLEND_MAX);
-
-
-   if (b->rt[0].blend_enable) {
-      sF[0] = b->rt[0].rgb_src_factor;
-      sF[1] = sF[0];
-      sF[2] = sF[0];
-      switch (b->rt[0].alpha_src_factor & 0x0f) {
-      case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
-         sF[3] = PIPE_BLENDFACTOR_ONE;
-         break;
-      case PIPE_BLENDFACTOR_SRC_COLOR:
-      case PIPE_BLENDFACTOR_DST_COLOR:
-      case PIPE_BLENDFACTOR_CONST_COLOR:
-      case PIPE_BLENDFACTOR_SRC1_COLOR:
-         sF[3] = b->rt[0].alpha_src_factor + 1;
-         break;
-      default:
-         sF[3] = b->rt[0].alpha_src_factor;
-      }
-
-      dF[0] = b->rt[0].rgb_dst_factor;
-      dF[1] = dF[0];
-      dF[2] = dF[0];
-      switch (b->rt[0].alpha_dst_factor & 0x0f) {
-      case PIPE_BLENDFACTOR_SRC_COLOR:
-      case PIPE_BLENDFACTOR_DST_COLOR:
-      case PIPE_BLENDFACTOR_CONST_COLOR:
-      case PIPE_BLENDFACTOR_SRC1_COLOR:
-         dF[3] = b->rt[0].alpha_dst_factor + 1;
-         break;
-      default:
-         dF[3] = b->rt[0].alpha_dst_factor;
-      }
-
-      func[0] = b->rt[0].rgb_func;
-      func[1] = func[0];
-      func[2] = func[0];
-      func[3] = b->rt[0].alpha_func;
-   } else {
-      sF[0] = PIPE_BLENDFACTOR_ONE;
-      sF[1] = PIPE_BLENDFACTOR_ONE;
-      sF[2] = PIPE_BLENDFACTOR_ONE;
-      sF[3] = PIPE_BLENDFACTOR_ONE;
-      dF[0] = PIPE_BLENDFACTOR_ZERO;
-      dF[1] = PIPE_BLENDFACTOR_ZERO;
-      dF[2] = PIPE_BLENDFACTOR_ZERO;
-      dF[3] = PIPE_BLENDFACTOR_ZERO;
-
-      func[0] = PIPE_BLEND_ADD;
-      func[1] = PIPE_BLEND_ADD;
-      func[2] = PIPE_BLEND_ADD;
-      func[3] = PIPE_BLEND_ADD;
-   }
-
-
-   /* If alpha writing is enabled and the alpha blend mode requires use of
-    * the alpha factor, calculate the alpha factor.
-    */
-   if (((b->rt[0].colormask & 8) != 0) && need_alpha_factor) {
-      src_factor[3] = emit_alpha_factor_calculation(f, sF[3], const_color[3],
-                                                    frag[3], pixel[3]);
-
-      /* If the alpha destination blend factor is the same as the alpha source
-       * blend factor, re-use the previously calculated value.
-       */
-      dst_factor[3] = (dF[3] == sF[3])
-          ? src_factor[3]
-          : emit_alpha_factor_calculation(f, dF[3], const_color[3],
-                                          frag[3], pixel[3]);
-   }
-
-
-   if (sF[0] == sF[3]) {
-      src_factor[0] = src_factor[3];
-      src_factor[1] = src_factor[3];
-      src_factor[2] = src_factor[3];
-   } else if (sF[0] == dF[3]) {
-      src_factor[0] = dst_factor[3];
-      src_factor[1] = dst_factor[3];
-      src_factor[2] = dst_factor[3];
-   } else if (need_color_factor) {
-      emit_color_factor_calculation(f,
-                                    b->rt[0].rgb_src_factor,
-                                    b->rt[0].colormask,
-                                    frag, pixel, const_color, src_factor);
-   }
-
-
-   if (dF[0] == sF[3]) {
-      dst_factor[0] = src_factor[3];
-      dst_factor[1] = src_factor[3];
-      dst_factor[2] = src_factor[3];
-   } else if (dF[0] == dF[3]) {
-      dst_factor[0] = dst_factor[3];
-      dst_factor[1] = dst_factor[3];
-      dst_factor[2] = dst_factor[3];
-   } else if (dF[0] == sF[0]) {
-      dst_factor[0] = src_factor[0];
-      dst_factor[1] = src_factor[1];
-      dst_factor[2] = src_factor[2];
-   } else if (need_color_factor) {
-      emit_color_factor_calculation(f,
-                                    b->rt[0].rgb_dst_factor,
-                                    b->rt[0].colormask,
-                                    frag, pixel, const_color, dst_factor);
-   }
-
-
-
-   for (i = 0; i < 4; ++i) {
-      if ((b->rt[0].colormask & (1U << i)) != 0) {
-         emit_blend_calculation(f,
-                                func[i], sF[i], dF[i],
-                                frag[i], src_factor[i],
-                                pixel[i], dst_factor[i]);
-      }
-   }
-
-   spe_bi(f, 0, 0, 0);
-
-#if 0
-   {
-      const uint32_t *p = f->store;
-
-      printf("# %u instructions\n", f->csr - f->store);
-      printf("# blend (%sabled)\n",
-             (cb->base.blend_enable) ? "en" : "dis");
-      printf("#    RGB func / sf / df: %u %u %u\n",
-             cb->base.rgb_func,
-             cb->base.rgb_src_factor,
-             cb->base.rgb_dst_factor);
-      printf("#    ALP func / sf / df: %u %u %u\n",
-             cb->base.alpha_func,
-             cb->base.alpha_src_factor,
-             cb->base.alpha_dst_factor);
-
-      printf("\t.text\n");
-      for (/* empty */; p < f->csr; p++) {
-         printf("\t.long\t0x%04x\n", *p);
-      }
-      fflush(stdout);
-   }
-#endif
-}
-
-
-static int
-PC_OFFSET(const struct spe_function *f, const void *d)
-{
-   const intptr_t pc = (intptr_t) &f->store[f->num_inst];
-   const intptr_t ea = ~0x0f & (intptr_t) d;
-
-   return (ea - pc) >> 2;
-}
-
-
-/**
- * Generate code to perform color conversion and logic op
- *
- * \bug
- * The code generated by this function should also perform dithering.
- *
- * \bug
- * The code generated by this function should also perform color-write
- * masking.
- *
- * \bug
- * Only two framebuffer formats are supported at this time.
- */
-void
-cell_generate_logic_op(struct spe_function *f,
-                       const struct pipe_blend_state *blend,
-                       struct pipe_surface *surf)
-{
-   const unsigned logic_op = (blend->logicop_enable)
-       ? blend->logicop_func : PIPE_LOGICOP_COPY;
-
-   /* This code generates a maximum of 37 instructions.  An additional 32
-    * bytes (equiv. to 8 instructions) are needed for data storage.  Round up
-    * to 64 to make it a happy power-of-two.
-    */
-   spe_init_func(f, SPE_INST_SIZE * 64);
-
-
-   /* Pixel colors in framebuffer format in AoS layout.
-    */
-   const int pixel[4] = {
-      spe_allocate_register(f, 3),
-      spe_allocate_register(f, 4),
-      spe_allocate_register(f, 5),
-      spe_allocate_register(f, 6),
-   };
-
-   /* Fragment colors stored as floats in SoA layout.
-    */
-   const int frag[4] = {
-      spe_allocate_register(f, 7),
-      spe_allocate_register(f, 8),
-      spe_allocate_register(f, 9),
-      spe_allocate_register(f, 10),
-   };
-
-   const int mask = spe_allocate_register(f, 11);
-
-
-   /* Short-circuit the noop and invert cases.
-    */
-   if ((logic_op == PIPE_LOGICOP_NOOP) || (blend->rt[0].colormask == 0)) {
-      spe_bi(f, 0, 0, 0);
-      return;
-   } else if (logic_op == PIPE_LOGICOP_INVERT) {
-      spe_nor(f, pixel[0], pixel[0], pixel[0]);
-      spe_nor(f, pixel[1], pixel[1], pixel[1]);
-      spe_nor(f, pixel[2], pixel[2], pixel[2]);
-      spe_nor(f, pixel[3], pixel[3], pixel[3]);
-      spe_bi(f, 0, 0, 0);
-      return;
-   }
-
-
-   const int tmp[4] = {
-      spe_allocate_available_register(f),
-      spe_allocate_available_register(f),
-      spe_allocate_available_register(f),
-      spe_allocate_available_register(f),
-   };
-
-   const int shuf_xpose_hi = spe_allocate_available_register(f);
-   const int shuf_xpose_lo = spe_allocate_available_register(f);
-   const int shuf_color = spe_allocate_available_register(f);
-
-
-   /* Pointer to the begining of the function's private data area.
-    */
-   uint32_t *const data = ((uint32_t *) f->store) + (64 - 8);
-
-
-   /* Convert fragment colors to framebuffer format in AoS layout.
-    */
-   switch (surf->format) {
-   case PIPE_FORMAT_B8G8R8A8_UNORM:
-      data[0] = 0x00010203;
-      data[1] = 0x10111213;
-      data[2] = 0x04050607;
-      data[3] = 0x14151617;
-      data[4] = 0x0c000408;
-      data[5] = 0x80808080;
-      data[6] = 0x80808080;
-      data[7] = 0x80808080;
-      break;
-   case PIPE_FORMAT_A8R8G8B8_UNORM:
-      data[0] = 0x03020100;
-      data[1] = 0x13121110;
-      data[2] = 0x07060504;
-      data[3] = 0x17161514;
-      data[4] = 0x0804000c;
-      data[5] = 0x80808080;
-      data[6] = 0x80808080;
-      data[7] = 0x80808080;
-      break;
-   default:
-      fprintf(stderr, "CELL: Bad pixel format in cell_generate_logic_op()");
-      ASSERT(0);
-   }
-
-   spe_ilh(f, tmp[0], 0x0808);
-   spe_lqr(f, shuf_xpose_hi, PC_OFFSET(f, data+0));
-   spe_lqr(f, shuf_color, PC_OFFSET(f, data+4));
-   spe_a(f, shuf_xpose_lo, shuf_xpose_hi, tmp[0]);
-
-   spe_shufb(f, tmp[0], frag[0], frag[2], shuf_xpose_hi);
-   spe_shufb(f, tmp[1], frag[0], frag[2], shuf_xpose_lo);
-   spe_shufb(f, tmp[2], frag[1], frag[3], shuf_xpose_hi);
-   spe_shufb(f, tmp[3], frag[1], frag[3], shuf_xpose_lo);
-
-   spe_shufb(f, frag[0], tmp[0], tmp[2], shuf_xpose_hi);
-   spe_shufb(f, frag[1], tmp[0], tmp[2], shuf_xpose_lo);
-   spe_shufb(f, frag[2], tmp[1], tmp[3], shuf_xpose_hi);
-   spe_shufb(f, frag[3], tmp[1], tmp[3], shuf_xpose_lo);
-
-   spe_cfltu(f, frag[0], frag[0], 32);
-   spe_cfltu(f, frag[1], frag[1], 32);
-   spe_cfltu(f, frag[2], frag[2], 32);
-   spe_cfltu(f, frag[3], frag[3], 32);
-
-   spe_shufb(f, frag[0], frag[0], pixel[0], shuf_color);
-   spe_shufb(f, frag[1], frag[1], pixel[1], shuf_color);
-   spe_shufb(f, frag[2], frag[2], pixel[2], shuf_color);
-   spe_shufb(f, frag[3], frag[3], pixel[3], shuf_color);
-
-
-   /* If logic op is enabled, perform the requested logical operation on the
-    * converted fragment colors and the pixel colors.
-    */
-   switch (logic_op) {
-   case PIPE_LOGICOP_CLEAR:
-      spe_il(f, frag[0], 0);
-      spe_il(f, frag[1], 0);
-      spe_il(f, frag[2], 0);
-      spe_il(f, frag[3], 0);
-      break;
-   case PIPE_LOGICOP_NOR:
-      spe_nor(f, frag[0], frag[0], pixel[0]);
-      spe_nor(f, frag[1], frag[1], pixel[1]);
-      spe_nor(f, frag[2], frag[2], pixel[2]);
-      spe_nor(f, frag[3], frag[3], pixel[3]);
-      break;
-   case PIPE_LOGICOP_AND_INVERTED:
-      spe_andc(f, frag[0], pixel[0], frag[0]);
-      spe_andc(f, frag[1], pixel[1], frag[1]);
-      spe_andc(f, frag[2], pixel[2], frag[2]);
-      spe_andc(f, frag[3], pixel[3], frag[3]);
-      break;
-   case PIPE_LOGICOP_COPY_INVERTED:
-      spe_nor(f, frag[0], frag[0], frag[0]);
-      spe_nor(f, frag[1], frag[1], frag[1]);
-      spe_nor(f, frag[2], frag[2], frag[2]);
-      spe_nor(f, frag[3], frag[3], frag[3]);
-      break;
-   case PIPE_LOGICOP_AND_REVERSE:
-      spe_andc(f, frag[0], frag[0], pixel[0]);
-      spe_andc(f, frag[1], frag[1], pixel[1]);
-      spe_andc(f, frag[2], frag[2], pixel[2]);
-      spe_andc(f, frag[3], frag[3], pixel[3]);
-      break;
-   case PIPE_LOGICOP_XOR:
-      spe_xor(f, frag[0], frag[0], pixel[0]);
-      spe_xor(f, frag[1], frag[1], pixel[1]);
-      spe_xor(f, frag[2], frag[2], pixel[2]);
-      spe_xor(f, frag[3], frag[3], pixel[3]);
-      break;
-   case PIPE_LOGICOP_NAND:
-      spe_nand(f, frag[0], frag[0], pixel[0]);
-      spe_nand(f, frag[1], frag[1], pixel[1]);
-      spe_nand(f, frag[2], frag[2], pixel[2]);
-      spe_nand(f, frag[3], frag[3], pixel[3]);
-      break;
-   case PIPE_LOGICOP_AND:
-      spe_and(f, frag[0], frag[0], pixel[0]);
-      spe_and(f, frag[1], frag[1], pixel[1]);
-      spe_and(f, frag[2], frag[2], pixel[2]);
-      spe_and(f, frag[3], frag[3], pixel[3]);
-      break;
-   case PIPE_LOGICOP_EQUIV:
-      spe_eqv(f, frag[0], frag[0], pixel[0]);
-      spe_eqv(f, frag[1], frag[1], pixel[1]);
-      spe_eqv(f, frag[2], frag[2], pixel[2]);
-      spe_eqv(f, frag[3], frag[3], pixel[3]);
-      break;
-   case PIPE_LOGICOP_OR_INVERTED:
-      spe_orc(f, frag[0], pixel[0], frag[0]);
-      spe_orc(f, frag[1], pixel[1], frag[1]);
-      spe_orc(f, frag[2], pixel[2], frag[2]);
-      spe_orc(f, frag[3], pixel[3], frag[3]);
-      break;
-   case PIPE_LOGICOP_COPY:
-      break;
-   case PIPE_LOGICOP_OR_REVERSE:
-      spe_orc(f, frag[0], frag[0], pixel[0]);
-      spe_orc(f, frag[1], frag[1], pixel[1]);
-      spe_orc(f, frag[2], frag[2], pixel[2]);
-      spe_orc(f, frag[3], frag[3], pixel[3]);
-      break;
-   case PIPE_LOGICOP_OR:
-      spe_or(f, frag[0], frag[0], pixel[0]);
-      spe_or(f, frag[1], frag[1], pixel[1]);
-      spe_or(f, frag[2], frag[2], pixel[2]);
-      spe_or(f, frag[3], frag[3], pixel[3]);
-      break;
-   case PIPE_LOGICOP_SET:
-      spe_il(f, frag[0], ~0);
-      spe_il(f, frag[1], ~0);
-      spe_il(f, frag[2], ~0);
-      spe_il(f, frag[3], ~0);
-      break;
-
-   /* These two cases are short-circuited above.
-    */
-   case PIPE_LOGICOP_INVERT:
-   case PIPE_LOGICOP_NOOP:
-   default:
-      assert(0);
-   }
-
-
-   /* Apply fragment mask.
-    */
-   spe_ilh(f, tmp[0], 0x0000);
-   spe_ilh(f, tmp[1], 0x0404);
-   spe_ilh(f, tmp[2], 0x0808);
-   spe_ilh(f, tmp[3], 0x0c0c);
-
-   spe_shufb(f, tmp[0], mask, mask, tmp[0]);
-   spe_shufb(f, tmp[1], mask, mask, tmp[1]);
-   spe_shufb(f, tmp[2], mask, mask, tmp[2]);
-   spe_shufb(f, tmp[3], mask, mask, tmp[3]);
-
-   spe_selb(f, pixel[0], pixel[0], frag[0], tmp[0]);
-   spe_selb(f, pixel[1], pixel[1], frag[1], tmp[1]);
-   spe_selb(f, pixel[2], pixel[2], frag[2], tmp[2]);
-   spe_selb(f, pixel[3], pixel[3], frag[3], tmp[3]);
-
-   spe_bi(f, 0, 0, 0);
-
-#if 0
-   {
-      const uint32_t *p = f->store;
-      unsigned i;
-
-      printf("# %u instructions\n", f->csr - f->store);
-
-      printf("\t.text\n");
-      for (i = 0; i < 64; i++) {
-         printf("\t.long\t0x%04x\n", p[i]);
-      }
-      fflush(stdout);
-   }
-#endif
-}
diff --git a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.h b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.h
deleted file mode 100644
index a8267a51331..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * (C) Copyright IBM Corporation 2008
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
- * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef CELL_STATE_PER_FRAGMENT_H
-#define CELL_STATE_PER_FRAGMENT_H
-
-extern void
-cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa);
-
-extern void
-cell_generate_alpha_blend(struct cell_blend_state *cb);
-
-extern void
-cell_generate_logic_op(struct spe_function *f,
-                       const struct pipe_blend_state *blend,
-                       struct pipe_surface *surf);
-
-#endif /* CELL_STATE_PER_FRAGMENT_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_state_shader.c b/src/gallium/drivers/cell/ppu/cell_state_shader.c
deleted file mode 100644
index ddf14772689..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_state_shader.c
+++ /dev/null
@@ -1,229 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#include "pipe/p_defines.h"
-#include "util/u_memory.h"
-#include "util/u_inlines.h"
-#include "draw/draw_context.h"
-#include "tgsi/tgsi_parse.h"
-
-#include "cell_context.h"
-#include "cell_state.h"
-#include "cell_gen_fp.h"
-#include "cell_texture.h"
-
-
-/** cast wrapper */
-static INLINE struct cell_fragment_shader_state *
-cell_fragment_shader_state(void *shader)
-{
-   return (struct cell_fragment_shader_state *) shader;
-}
-
-
-/** cast wrapper */
-static INLINE struct cell_vertex_shader_state *
-cell_vertex_shader_state(void *shader)
-{
-   return (struct cell_vertex_shader_state *) shader;
-}
-
-
-/**
- * Create fragment shader state.
- * Called via pipe->create_fs_state()
- */
-static void *
-cell_create_fs_state(struct pipe_context *pipe,
-                     const struct pipe_shader_state *templ)
-{
-   struct cell_context *cell = cell_context(pipe);
-   struct cell_fragment_shader_state *cfs;
-
-   cfs = CALLOC_STRUCT(cell_fragment_shader_state);
-   if (!cfs)
-      return NULL;
-
-   cfs->shader.tokens = tgsi_dup_tokens(templ->tokens);
-   if (!cfs->shader.tokens) {
-      FREE(cfs);
-      return NULL;
-   }
-
-   tgsi_scan_shader(templ->tokens, &cfs->info);
-
-   cell_gen_fragment_program(cell, cfs->shader.tokens, &cfs->code);
-
-   return cfs;
-}
-
-
-/**
- * Called via pipe->bind_fs_state()
- */
-static void
-cell_bind_fs_state(struct pipe_context *pipe, void *fs)
-{
-   struct cell_context *cell = cell_context(pipe);
-
-   cell->fs = cell_fragment_shader_state(fs);
-
-   cell->dirty |= CELL_NEW_FS;
-}
-
-
-/**
- * Called via pipe->delete_fs_state()
- */
-static void
-cell_delete_fs_state(struct pipe_context *pipe, void *fs)
-{
-   struct cell_fragment_shader_state *cfs = cell_fragment_shader_state(fs);
-
-   spe_release_func(&cfs->code);
-
-   FREE((void *) cfs->shader.tokens);
-   FREE(cfs);
-}
-
-
-/**
- * Create vertex shader state.
- * Called via pipe->create_vs_state()
- */
-static void *
-cell_create_vs_state(struct pipe_context *pipe,
-                     const struct pipe_shader_state *templ)
-{
-   struct cell_context *cell = cell_context(pipe);
-   struct cell_vertex_shader_state *cvs;
-
-   cvs = CALLOC_STRUCT(cell_vertex_shader_state);
-   if (!cvs)
-      return NULL;
-
-   cvs->shader.tokens = tgsi_dup_tokens(templ->tokens);
-   if (!cvs->shader.tokens) {
-      FREE(cvs);
-      return NULL;
-   }
-
-   tgsi_scan_shader(templ->tokens, &cvs->info);
-
-   cvs->draw_data = draw_create_vertex_shader(cell->draw, &cvs->shader);
-   if (cvs->draw_data == NULL) {
-      FREE( (void *) cvs->shader.tokens );
-      FREE( cvs );
-      return NULL;
-   }
-
-   return cvs;
-}
-
-
-/**
- * Called via pipe->bind_vs_state()
- */
-static void
-cell_bind_vs_state(struct pipe_context *pipe, void *vs)
-{
-   struct cell_context *cell = cell_context(pipe);
-
-   cell->vs = cell_vertex_shader_state(vs);
-
-   draw_bind_vertex_shader(cell->draw,
-                           (cell->vs ? cell->vs->draw_data : NULL));
-
-   cell->dirty |= CELL_NEW_VS;
-}
-
-
-/**
- * Called via pipe->delete_vs_state()
- */
-static void
-cell_delete_vs_state(struct pipe_context *pipe, void *vs)
-{
-   struct cell_context *cell = cell_context(pipe);
-   struct cell_vertex_shader_state *cvs = cell_vertex_shader_state(vs);
-
-   draw_delete_vertex_shader(cell->draw, cvs->draw_data);
-   FREE( (void *) cvs->shader.tokens );
-   FREE( cvs );
-}
-
-
-/**
- * Called via pipe->set_constant_buffer()
- */
-static void
-cell_set_constant_buffer(struct pipe_context *pipe,
-                         uint shader, uint index,
-                         struct pipe_resource *constants)
-{
-   struct cell_context *cell = cell_context(pipe);
-   unsigned size = constants ? constants->width0 : 0;
-   const void *data = constants ? cell_resource(constants)->data : NULL;
-
-   assert(shader < PIPE_SHADER_TYPES);
-   assert(index == 0);
-
-   if (cell->constants[shader] == constants)
-      return;
-
-   draw_flush(cell->draw);
-
-   /* note: reference counting */
-   pipe_resource_reference(&cell->constants[shader], constants);
-
-   if(shader == PIPE_SHADER_VERTEX) {
-      draw_set_mapped_constant_buffer(cell->draw, PIPE_SHADER_VERTEX, 0,
-                                      data, size);
-   }
-
-   cell->mapped_constants[shader] = data;
-
-   if (shader == PIPE_SHADER_VERTEX)
-      cell->dirty |= CELL_NEW_VS_CONSTANTS;
-   else if (shader == PIPE_SHADER_FRAGMENT)
-      cell->dirty |= CELL_NEW_FS_CONSTANTS;
-}
-
-
-void
-cell_init_shader_functions(struct cell_context *cell)
-{
-   cell->pipe.create_fs_state = cell_create_fs_state;
-   cell->pipe.bind_fs_state   = cell_bind_fs_state;
-   cell->pipe.delete_fs_state = cell_delete_fs_state;
-
-   cell->pipe.create_vs_state = cell_create_vs_state;
-   cell->pipe.bind_vs_state   = cell_bind_vs_state;
-   cell->pipe.delete_vs_state = cell_delete_vs_state;
-
-   cell->pipe.set_constant_buffer = cell_set_constant_buffer;
-}
diff --git a/src/gallium/drivers/cell/ppu/cell_state_vertex.c b/src/gallium/drivers/cell/ppu/cell_state_vertex.c
deleted file mode 100644
index 7f65b82619e..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_state_vertex.c
+++ /dev/null
@@ -1,120 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-/* Authors:  Keith Whitwell <keith@tungstengraphics.com>
- */
-
-
-#include "cell_context.h"
-#include "cell_state.h"
-
-#include "util/u_memory.h"
-#include "util/u_transfer.h"
-#include "draw/draw_context.h"
-
-
-static void *
-cell_create_vertex_elements_state(struct pipe_context *pipe,
-                                  unsigned count,
-                                  const struct pipe_vertex_element *attribs)
-{
-   struct cell_velems_state *velems;
-   assert(count <= PIPE_MAX_ATTRIBS);
-   velems = (struct cell_velems_state *) MALLOC(sizeof(struct cell_velems_state));
-   if (velems) {
-      velems->count = count;
-      memcpy(velems->velem, attribs, sizeof(*attribs) * count);
-   }
-   return velems;
-}
-
-static void
-cell_bind_vertex_elements_state(struct pipe_context *pipe,
-                                void *velems)
-{
-   struct cell_context *cell = cell_context(pipe);
-   struct cell_velems_state *cell_velems = (struct cell_velems_state *) velems;
-
-   cell->velems = cell_velems;
-
-   cell->dirty |= CELL_NEW_VERTEX;
-
-   if (cell_velems)
-      draw_set_vertex_elements(cell->draw, cell_velems->count, cell_velems->velem);
-}
-
-static void
-cell_delete_vertex_elements_state(struct pipe_context *pipe, void *velems)
-{
-   FREE( velems );
-}
-
-
-static void
-cell_set_vertex_buffers(struct pipe_context *pipe,
-                        unsigned count,
-                        const struct pipe_vertex_buffer *buffers)
-{
-   struct cell_context *cell = cell_context(pipe);
-
-   assert(count <= PIPE_MAX_ATTRIBS);
-
-   util_copy_vertex_buffers(cell->vertex_buffer,
-                            &cell->num_vertex_buffers,
-                            buffers, count);
-
-   cell->dirty |= CELL_NEW_VERTEX;
-
-   draw_set_vertex_buffers(cell->draw, count, buffers);
-}
-
-
-static void
-cell_set_index_buffer(struct pipe_context *pipe,
-                      const struct pipe_index_buffer *ib)
-{
-   struct cell_context *cell = cell_context(pipe);
-
-   if (ib)
-      memcpy(&cell->index_buffer, ib, sizeof(cell->index_buffer));
-   else
-      memset(&cell->index_buffer, 0, sizeof(cell->index_buffer));
-
-   draw_set_index_buffer(cell->draw, ib);
-}
-
-
-void
-cell_init_vertex_functions(struct cell_context *cell)
-{
-   cell->pipe.set_vertex_buffers = cell_set_vertex_buffers;
-   cell->pipe.set_index_buffer = cell_set_index_buffer;
-   cell->pipe.create_vertex_elements_state = cell_create_vertex_elements_state;
-   cell->pipe.bind_vertex_elements_state = cell_bind_vertex_elements_state;
-   cell->pipe.delete_vertex_elements_state = cell_delete_vertex_elements_state;
-   cell->pipe.redefine_user_buffer = u_default_redefine_user_buffer;
-}
diff --git a/src/gallium/drivers/cell/ppu/cell_surface.c b/src/gallium/drivers/cell/ppu/cell_surface.c
deleted file mode 100644
index 777454479b1..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_surface.c
+++ /dev/null
@@ -1,37 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#include "util/u_surface.h"
-#include "cell_context.h"
-#include "cell_surface.h"
-
-
-void
-cell_init_surface_functions(struct cell_context *cell)
-{
-   cell->pipe.resource_copy_region = util_resource_copy_region;
-}
diff --git a/src/gallium/drivers/cell/ppu/cell_surface.h b/src/gallium/drivers/cell/ppu/cell_surface.h
deleted file mode 100644
index 9e58f329443..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_surface.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-/* Authors:  Keith Whitwell <keith@tungstengraphics.com>
- */
-
-#ifndef CELL_SURFACE_H
-#define CELL_SURFACE_H
-
-
-struct cell_context;
-
-
-extern void
-cell_init_surface_functions(struct cell_context *cell);
-
-
-#endif /* SP_SURFACE_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c
deleted file mode 100644
index 946a7050e5f..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_texture.c
+++ /dev/null
@@ -1,644 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
- /*
-  * Authors:
-  *   Keith Whitwell <keith@tungstengraphics.com>
-  *   Michel Dänzer <michel@tungstengraphics.com>
-  *   Brian Paul
-  */
-
-#include "pipe/p_context.h"
-#include "pipe/p_defines.h"
-#include "util/u_inlines.h"
-#include "util/u_transfer.h"
-#include "util/u_format.h"
-#include "util/u_math.h"
-#include "util/u_memory.h"
-
-#include "cell_context.h"
-#include "cell_screen.h"
-#include "cell_state.h"
-#include "cell_texture.h"
-
-#include "state_tracker/sw_winsys.h"
-
-
-
-static boolean
-cell_resource_layout(struct pipe_screen *screen, 
-		     struct cell_resource *ct)
-{
-   struct pipe_resource *pt = &ct->base;
-   unsigned level;
-   unsigned width = pt->width0;
-   unsigned height = pt->height0;
-   unsigned depth = pt->depth0;
-
-   ct->buffer_size = 0;
-
-   for (level = 0; level <= pt->last_level; level++) {
-      unsigned size;
-      unsigned w_tile, h_tile;
-
-      assert(level < CELL_MAX_TEXTURE_LEVELS);
-
-      /* width, height, rounded up to tile size */
-      w_tile = align(width, TILE_SIZE);
-      h_tile = align(height, TILE_SIZE);
-
-      ct->stride[level] = util_format_get_stride(pt->format, w_tile);
-
-      ct->level_offset[level] = ct->buffer_size;
-
-      size = ct->stride[level] * util_format_get_nblocksy(pt->format, h_tile);
-      if (pt->target == PIPE_TEXTURE_CUBE)
-         size *= 6;
-      else
-         size *= depth;
-
-      ct->buffer_size += size;
-
-      width = u_minify(width, 1);
-      height = u_minify(height, 1);
-      depth = u_minify(depth, 1);
-   }
-
-   ct->data = align_malloc(ct->buffer_size, 16);
- 
-   return ct->data != NULL;
-}
-
-
-/**
- * Texture layout for simple color buffers.
- */
-static boolean
-cell_displaytarget_layout(struct pipe_screen *screen,
-                          struct cell_resource * ct)
-{
-   struct sw_winsys *winsys = cell_screen(screen)->winsys;
-
-   /* Round up the surface size to a multiple of the tile size?
-    */
-   ct->dt = winsys->displaytarget_create(winsys,
-                                          ct->base.bind,
-                                          ct->base.format,
-                                          ct->base.width0, 
-                                          ct->base.height0,
-                                          16,
-                                          &ct->dt_stride );
-
-   return ct->dt != NULL;
-}
-
-static struct pipe_resource *
-cell_resource_create(struct pipe_screen *screen,
-                    const struct pipe_resource *templat)
-{
-   struct cell_resource *ct = CALLOC_STRUCT(cell_resource);
-   if (!ct)
-      return NULL;
-
-   ct->base = *templat;
-   pipe_reference_init(&ct->base.reference, 1);
-   ct->base.screen = screen;
-
-   /* Create both a displaytarget (linear) and regular texture
-    * (twiddled).  Convert twiddled->linear at flush_frontbuffer time.
-    */
-   if (ct->base.bind & (PIPE_BIND_DISPLAY_TARGET |
-                        PIPE_BIND_SCANOUT |
-                        PIPE_BIND_SHARED)) {
-      if (!cell_displaytarget_layout(screen, ct))
-         goto fail;
-   }
-
-   if (!cell_resource_layout(screen, ct))
-      goto fail;
-
-   return &ct->base;
-
-fail:
-   if (ct->dt) {
-      struct sw_winsys *winsys = cell_screen(screen)->winsys;
-      winsys->displaytarget_destroy(winsys, ct->dt);
-   }
-
-   FREE(ct);
-
-   return NULL;
-}
-
-
-static void
-cell_resource_destroy(struct pipe_screen *scrn, struct pipe_resource *pt)
-{
-   struct cell_screen *screen = cell_screen(scrn);
-   struct sw_winsys *winsys = screen->winsys;
-   struct cell_resource *ct = cell_resource(pt);
-
-   if (ct->dt) {
-      /* display target */
-      winsys->displaytarget_destroy(winsys, ct->dt);
-   }
-   else if (!ct->userBuffer) {
-      align_free(ct->data);
-   }
-
-   FREE(ct);
-}
-
-
-
-/**
- * Convert image from linear layout to tiled layout.  4-byte pixels.
- */
-static void
-twiddle_image_uint(uint w, uint h, uint tile_size, uint *dst,
-                   uint src_stride, const uint *src)
-{
-   const uint tile_size2 = tile_size * tile_size;
-   const uint h_t = (h + tile_size - 1) / tile_size;
-   const uint w_t = (w + tile_size - 1) / tile_size;
-
-   uint it, jt;  /* tile counters */
-   uint i, j;    /* intra-tile counters */
-
-   src_stride /= 4; /* convert from bytes to pixels */
-
-   /* loop over dest tiles */
-   for (it = 0; it < h_t; it++) {
-      for (jt = 0; jt < w_t; jt++) {
-         /* start of dest tile: */
-         uint *tdst = dst + (it * w_t + jt) * tile_size2;
-
-         /* compute size of this tile (may be smaller than tile_size) */
-         /* XXX note: a compiler bug was found here. That's why the code
-          * looks as it does.
-          */
-         uint tile_width = w - jt * tile_size;
-         tile_width = MIN2(tile_width, tile_size);
-         uint tile_height = h - it * tile_size;
-         tile_height = MIN2(tile_height, tile_size);
-
-         /* loop over texels in the tile */
-         for (i = 0; i < tile_height; i++) {
-            for (j = 0; j < tile_width; j++) {
-               const uint srci = it * tile_size + i;
-               const uint srcj = jt * tile_size + j;
-               ASSERT(srci < h);
-               ASSERT(srcj < w);
-               tdst[i * tile_size + j] = src[srci * src_stride + srcj];
-            }
-         }
-      }
-   }
-}
-
-
-/**
- * For Cell.  Basically, rearrange the pixels/quads from this layout:
- *  +--+--+--+--+
- *  |p0|p1|p2|p3|....
- *  +--+--+--+--+
- *
- * to this layout:
- *  +--+--+
- *  |p0|p1|....
- *  +--+--+
- *  |p2|p3|
- *  +--+--+
- */
-static void
-twiddle_tile(const uint *tileIn, uint *tileOut)
-{
-   int y, x;
-
-   for (y = 0; y < TILE_SIZE; y+=2) {
-      for (x = 0; x < TILE_SIZE; x+=2) {
-         int k = 4 * (y/2 * TILE_SIZE/2 + x/2);
-         tileOut[y * TILE_SIZE + (x + 0)] = tileIn[k];
-         tileOut[y * TILE_SIZE + (x + 1)] = tileIn[k+1];
-         tileOut[(y + 1) * TILE_SIZE + (x + 0)] = tileIn[k+2];
-         tileOut[(y + 1) * TILE_SIZE + (x + 1)] = tileIn[k+3];
-      }
-   }
-}
-
-
-/**
- * Convert image from tiled layout to linear layout.  4-byte pixels.
- */
-static void
-untwiddle_image_uint(uint w, uint h, uint tile_size, uint *dst,
-                     uint dst_stride, const uint *src)
-{
-   const uint tile_size2 = tile_size * tile_size;
-   const uint h_t = (h + tile_size - 1) / tile_size;
-   const uint w_t = (w + tile_size - 1) / tile_size;
-   uint *tile_buf;
-   uint it, jt;  /* tile counters */
-   uint i, j;    /* intra-tile counters */
-
-   dst_stride /= 4; /* convert from bytes to pixels */
-
-   tile_buf = align_malloc(tile_size * tile_size * 4, 16);
-   
-   /* loop over src tiles */
-   for (it = 0; it < h_t; it++) {
-      for (jt = 0; jt < w_t; jt++) {
-         /* start of src tile: */
-         const uint *tsrc = src + (it * w_t + jt) * tile_size2;
-         
-         twiddle_tile(tsrc, tile_buf);
-         tsrc = tile_buf;
-
-         /* compute size of this tile (may be smaller than tile_size) */
-         /* XXX note: a compiler bug was found here. That's why the code
-          * looks as it does.
-          */
-         uint tile_width = w - jt * tile_size;
-         tile_width = MIN2(tile_width, tile_size);
-         uint tile_height = h - it * tile_size;
-         tile_height = MIN2(tile_height, tile_size);
-
-         /* loop over texels in the tile */
-         for (i = 0; i < tile_height; i++) {
-            for (j = 0; j < tile_width; j++) {
-               uint dsti = it * tile_size + i;
-               uint dstj = jt * tile_size + j;
-               ASSERT(dsti < h);
-               ASSERT(dstj < w);
-               dst[dsti * dst_stride + dstj] = tsrc[i * tile_size + j];
-            }
-         }
-      }
-   }
-
-   align_free(tile_buf);
-}
-
-
-static struct pipe_surface *
-cell_create_surface(struct pipe_context *ctx,
-                    struct pipe_resource *pt,
-                    const struct pipe_surface *surf_tmpl)
-{
-   struct cell_resource *ct = cell_resource(pt);
-   struct pipe_surface *ps;
-
-   assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer);
-   ps = CALLOC_STRUCT(pipe_surface);
-   if (ps) {
-      pipe_reference_init(&ps->reference, 1);
-      pipe_resource_reference(&ps->texture, pt);
-      ps->format = surf_tmpl->format;
-      ps->context = ctx;
-      ps->width = u_minify(pt->width0, surf_tmpl->u.tex.level);
-      ps->height = u_minify(pt->height0, surf_tmpl->u.tex.level);
-      /* XXX may need to override usage flags (see sp_texture.c) */
-      ps->usage = surf_tmpl->usage;
-      ps->u.tex.level = surf_tmpl->u.tex.level;
-      ps->u.tex.first_layer = surf_tmpl->u.tex.first_layer;
-      ps->u.tex.last_layer = surf_tmpl->u.tex.last_layer;
-   }
-   return ps;
-}
-
-
-static void 
-cell_surface_destroy(struct pipe_context *ctx, struct pipe_surface *surf)
-{
-   pipe_resource_reference(&surf->texture, NULL);
-   FREE(surf);
-}
-
-
-/**
- * Create new pipe_transfer object.
- * This is used by the user to put tex data into a texture (and get it
- * back out for glGetTexImage).
- */
-static struct pipe_transfer *
-cell_get_transfer(struct pipe_context *ctx,
-                  struct pipe_resource *resource,
-                  unsigned level,
-                  unsigned usage,
-                  const struct pipe_box *box)
-{
-   struct cell_resource *ct = cell_resource(resource);
-   struct cell_transfer *ctrans;
-   enum pipe_format format = resource->format;
-
-   assert(resource);
-   assert(level <= resource->last_level);
-
-   /* make sure the requested region is in the image bounds */
-   assert(box->x + box->width <= u_minify(resource->width0, level));
-   assert(box->y + box->height <= u_minify(resource->height0, level));
-   assert(box->z + box->depth <= (u_minify(resource->depth0, level) + resource->array_size - 1));
-
-   ctrans = CALLOC_STRUCT(cell_transfer);
-   if (ctrans) {
-      struct pipe_transfer *pt = &ctrans->base;
-      pipe_resource_reference(&pt->resource, resource);
-      pt->level = level;
-      pt->usage = usage;
-      pt->box = *box;
-      pt->stride = ct->stride[level];
-
-      ctrans->offset = ct->level_offset[level];
-
-      if (resource->target == PIPE_TEXTURE_CUBE || resource->target == PIPE_TEXTURE_3D) {
-         unsigned h_tile = align(u_minify(resource->height0, level), TILE_SIZE);
-         ctrans->offset += box->z * util_format_get_nblocksy(format, h_tile) * pt->stride;
-      }
-      else {
-         assert(box->z == 0);
-      }
-
-      return pt;
-   }
-   return NULL;
-}
-
-
-static void 
-cell_transfer_destroy(struct pipe_context *ctx, struct pipe_transfer *t)
-{
-   struct cell_transfer *transfer = cell_transfer(t);
-   /* Effectively do the texture_update work here - if texture images
-    * needed post-processing to put them into hardware layout, this is
-    * where it would happen.  For cell, nothing to do.
-    */
-   assert (transfer->base.resource);
-   pipe_resource_reference(&transfer->base.resource, NULL);
-   FREE(transfer);
-}
-
-
-/**
- * Return pointer to texture image data in linear layout.
- */
-static void *
-cell_transfer_map(struct pipe_context *ctx, struct pipe_transfer *transfer)
-{
-   struct cell_transfer *ctrans = cell_transfer(transfer);
-   struct pipe_resource *pt = transfer->resource;
-   struct cell_resource *ct = cell_resource(pt);
-
-   assert(transfer->resource);
-
-   if (ct->mapped == NULL) {
-      ct->mapped = ct->data;
-   }
-
-
-   /* Better test would be resource->is_linear
-    */
-   if (transfer->resource->target != PIPE_BUFFER) {
-      const uint level = ctrans->base.level;
-      const uint texWidth = u_minify(pt->width0, level);
-      const uint texHeight = u_minify(pt->height0, level);
-      unsigned size;
-
-
-      /*
-       * Create a buffer of ordinary memory for the linear texture.
-       * This is the memory that the user will read/write.
-       */
-      size = (util_format_get_stride(pt->format, align(texWidth, TILE_SIZE)) *
-	      util_format_get_nblocksy(pt->format, align(texHeight, TILE_SIZE)));
-
-      ctrans->map = align_malloc(size, 16);
-      if (!ctrans->map)
-	 return NULL; /* out of memory */
-
-      if (transfer->usage & PIPE_TRANSFER_READ) {
-	 /* Textures always stored twiddled, need to untwiddle the
-	  * texture to make a linear version.
-	  */
-	 const uint bpp = util_format_get_blocksize(ct->base.format);
-	 if (bpp == 4) {
-	    const uint *src = (uint *) (ct->mapped + ctrans->offset);
-	    uint *dst = ctrans->map;
-	    untwiddle_image_uint(texWidth, texHeight, TILE_SIZE,
-				 dst, transfer->stride, src);
-	 }
-	 else {
-	    // xxx fix
-	 }
-      }
-   }
-   else {
-      unsigned stride = transfer->stride;
-      enum pipe_format format = pt->format;
-      unsigned blocksize = util_format_get_blocksize(format);
-
-      ctrans->map = (ct->mapped + 
-		     ctrans->offset +
-		     ctrans->base.box.y / util_format_get_blockheight(format) * stride +
-		     ctrans->base.box.x / util_format_get_blockwidth(format) * blocksize);
-   }
-
-
-   return ctrans->map;
-}
-
-
-/**
- * Called when user is done reading/writing texture data.
- * If new data was written, this is where we convert the linear data
- * to tiled data.
- */
-static void
-cell_transfer_unmap(struct pipe_context *ctx,
-                    struct pipe_transfer *transfer)
-{
-   struct cell_transfer *ctrans = cell_transfer(transfer);
-   struct pipe_resource *pt = transfer->resource;
-   struct cell_resource *ct = cell_resource(pt);
-   const uint level = ctrans->base.level;
-   const uint texWidth = u_minify(pt->width0, level);
-   const uint texHeight = u_minify(pt->height0, level);
-   const uint stride = ct->stride[level];
-
-   if (!ct->mapped) {
-      assert(0);
-      return;
-   }
-
-   if (pt->target != PIPE_BUFFER) {
-      if (transfer->usage & PIPE_TRANSFER_WRITE) {
-	 /* The user wrote new texture data into the mapped buffer.
-	  * We need to convert the new linear data into the twiddled/tiled format.
-	  */
-	 const uint bpp = util_format_get_blocksize(ct->base.format);
-	 if (bpp == 4) {
-	    const uint *src = ctrans->map;
-	    uint *dst = (uint *) (ct->mapped + ctrans->offset);
-	    twiddle_image_uint(texWidth, texHeight, TILE_SIZE, dst, stride, src);
-	 }
-	 else {
-	    // xxx fix
-	 }
-      }
-      
-      align_free(ctrans->map);
-   }
-   else {
-      /* nothing to do */
-   }
-
-   ctrans->map = NULL;
-}
-
-
-
-/* This used to be overriden by the co-state tracker, but really needs
- * to be active with sw_winsys.
- *
- * Contrasting with llvmpipe and softpipe, this is the only place
- * where we use the ct->dt display target in any real sense.
- *
- * Basically just untwiddle our local data into the linear
- * displaytarget.
- */
-static void
-cell_flush_frontbuffer(struct pipe_screen *_screen,
-                       struct pipe_resource *resource,
-                       unsigned level, unsigned layer,
-                       void *context_private)
-{
-   struct cell_screen *screen = cell_screen(_screen);
-   struct sw_winsys *winsys = screen->winsys;
-   struct cell_resource *ct = cell_resource(resource);
-
-   if (!ct->dt)
-      return;
-
-   /* Need to untwiddle from our internal representation here:
-    */
-   {
-      unsigned *map = winsys->displaytarget_map(winsys, ct->dt,
-                                                (PIPE_TRANSFER_READ |
-                                                 PIPE_TRANSFER_WRITE));
-      unsigned *src = (unsigned *)(ct->data + ct->level_offset[level]);
-
-      untwiddle_image_uint(u_minify(resource->width0, level),
-                           u_minify(resource->height0, level),
-                           TILE_SIZE,
-                           map,
-                           ct->dt_stride,
-                           src);
-
-      winsys->displaytarget_unmap(winsys, ct->dt);
-   }
-
-   winsys->displaytarget_display(winsys, ct->dt, context_private);
-}
-
-
-
-/**
- * Create buffer which wraps user-space data.
- */
-static struct pipe_resource *
-cell_user_buffer_create(struct pipe_screen *screen,
-                            void *ptr,
-                            unsigned bytes,
-			    unsigned bind_flags)
-{
-   struct cell_resource *buffer;
-
-   buffer = CALLOC_STRUCT(cell_resource);
-   if(!buffer)
-      return NULL;
-
-   pipe_reference_init(&buffer->base.reference, 1);
-   buffer->base.screen = screen;
-   buffer->base.format = PIPE_FORMAT_R8_UNORM; /* ?? */
-   buffer->base.bind = PIPE_BIND_TRANSFER_READ | bind_flags;
-   buffer->base.usage = PIPE_USAGE_IMMUTABLE;
-   buffer->base.flags = 0;
-   buffer->base.width0 = bytes;
-   buffer->base.height0 = 1;
-   buffer->base.depth0 = 1;
-   buffer->base.array_size = 1;
-   buffer->userBuffer = TRUE;
-   buffer->data = ptr;
-
-   return &buffer->base;
-}
-
-
-static struct pipe_resource *
-cell_resource_from_handle(struct pipe_screen *screen,
-                          const struct pipe_resource *templat,
-                          struct winsys_handle *handle)
-{
-   /* XXX todo */
-   return NULL;
-}
-
-
-static boolean 
-cell_resource_get_handle(struct pipe_screen *scree,
-                         struct pipe_resource *tex,
-                         struct winsys_handle *handle)
-{
-   /* XXX todo */
-   return FALSE;
-}
-
-
-void
-cell_init_screen_texture_funcs(struct pipe_screen *screen)
-{
-   screen->resource_create = cell_resource_create;
-   screen->resource_destroy = cell_resource_destroy;
-   screen->resource_from_handle = cell_resource_from_handle;
-   screen->resource_get_handle = cell_resource_get_handle;
-   screen->user_buffer_create = cell_user_buffer_create;
-
-   screen->flush_frontbuffer = cell_flush_frontbuffer;
-}
-
-void
-cell_init_texture_transfer_funcs(struct cell_context *cell)
-{
-   cell->pipe.get_transfer = cell_get_transfer;
-   cell->pipe.transfer_destroy = cell_transfer_destroy;
-   cell->pipe.transfer_map = cell_transfer_map;
-   cell->pipe.transfer_unmap = cell_transfer_unmap;
-
-   cell->pipe.transfer_flush_region = u_default_transfer_flush_region;
-   cell->pipe.transfer_inline_write = u_default_transfer_inline_write;
-
-   cell->pipe.create_surface = cell_create_surface;
-   cell->pipe.surface_destroy = cell_surface_destroy;
-}
diff --git a/src/gallium/drivers/cell/ppu/cell_texture.h b/src/gallium/drivers/cell/ppu/cell_texture.h
deleted file mode 100644
index bd8224b3b7b..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_texture.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#ifndef CELL_TEXTURE_H
-#define CELL_TEXTURE_H
-
-#include "cell/common.h"
-
-struct cell_context;
-struct pipe_resource;
-
-
-/**
- * Subclass of pipe_resource
- */
-struct cell_resource
-{
-   struct pipe_resource base;
-
-   unsigned long level_offset[CELL_MAX_TEXTURE_LEVELS];
-   unsigned long stride[CELL_MAX_TEXTURE_LEVELS];
-
-   /**
-    * Display target, for textures with the PIPE_BIND_DISPLAY_TARGET
-    * usage.
-    */
-   struct sw_displaytarget *dt;
-   unsigned dt_stride;
-
-   /**
-    * Malloc'ed data for regular textures, or a mapping to dt above.
-    */
-   void *data;
-   boolean userBuffer;
-
-   /* Size of the linear buffer??
-    */
-   unsigned long buffer_size;
-
-   /** The buffer above, mapped.  This is the memory from which the
-    * SPUs will fetch texels.  This texture data is in the tiled layout.
-    */
-   ubyte *mapped;
-};
-
-
-struct cell_transfer
-{
-   struct pipe_transfer base;
-
-   unsigned long offset;
-   void *map;
-};
-
-
-/** cast wrapper */
-static INLINE struct cell_resource *
-cell_resource(struct pipe_resource *pt)
-{
-   return (struct cell_resource *) pt;
-}
-
-
-/** cast wrapper */
-static INLINE struct cell_transfer *
-cell_transfer(struct pipe_transfer *pt)
-{
-   return (struct cell_transfer *) pt;
-}
-
-
-extern void
-cell_init_screen_texture_funcs(struct pipe_screen *screen);
-
-extern void
-cell_init_texture_transfer_funcs(struct cell_context *cell);
-
-#endif /* CELL_TEXTURE_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_vbuf.c b/src/gallium/drivers/cell/ppu/cell_vbuf.c
deleted file mode 100644
index 37b71956482..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_vbuf.c
+++ /dev/null
@@ -1,332 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-/**
- * Vertex buffer code.  The draw module transforms vertices to window
- * coords, etc. and emits the vertices into buffer supplied by this module.
- * When a vertex buffer is full, or we flush, we'll send the vertex data
- * to the SPUs.
- *
- * Authors
- *  Brian Paul
- */
-
-
-#include "cell_batch.h"
-#include "cell_context.h"
-#include "cell_fence.h"
-#include "cell_flush.h"
-#include "cell_spu.h"
-#include "cell_vbuf.h"
-#include "draw/draw_vbuf.h"
-#include "util/u_memory.h"
-
-
-/** Allow vertex data to be inlined after RENDER command */
-#define ALLOW_INLINE_VERTS 1
-
-
-/**
- * Subclass of vbuf_render because we need a cell_context pointer in
- * a few places.
- */
-struct cell_vbuf_render
-{
-   struct vbuf_render base;
-   struct cell_context *cell;
-   uint prim;            /**< PIPE_PRIM_x */
-   uint vertex_size;     /**< in bytes */
-   void *vertex_buffer;  /**< just for debug, really */
-   uint vertex_buf;      /**< in [0, CELL_NUM_BUFFERS-1] */
-   uint vertex_buffer_size;  /**< size in bytes */
-};
-
-
-/** cast wrapper */
-static struct cell_vbuf_render *
-cell_vbuf_render(struct vbuf_render *vbr)
-{
-   return (struct cell_vbuf_render *) vbr;
-}
-
-
-
-static const struct vertex_info *
-cell_vbuf_get_vertex_info(struct vbuf_render *vbr)
-{
-   struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr);
-   return &cvbr->cell->vertex_info;
-}
-
-
-static boolean
-cell_vbuf_allocate_vertices(struct vbuf_render *vbr,
-                            ushort vertex_size, ushort nr_vertices)
-{
-   struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr);
-   unsigned size = vertex_size * nr_vertices;
-   /*printf("Alloc verts %u * %u\n", vertex_size, nr_vertices);*/
-
-   assert(cvbr->vertex_buf == ~0);
-   cvbr->vertex_buf = cell_get_empty_buffer(cvbr->cell);
-   cvbr->vertex_buffer = cvbr->cell->buffer[cvbr->vertex_buf];
-   cvbr->vertex_buffer_size = size;
-   cvbr->vertex_size = vertex_size;
-
-   return cvbr->vertex_buffer != NULL;
-}
-
-
-static void
-cell_vbuf_release_vertices(struct vbuf_render *vbr)
-{
-   struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr);
-   struct cell_context *cell = cvbr->cell;
-
-   /*
-   printf("%s vertex_buf = %u  count = %u\n",
-          __FUNCTION__, cvbr->vertex_buf, vertices_used);
-   */
-
-   /* Make sure texture buffers aren't released until we're done rendering
-    * with them.
-    */
-   cell_add_fenced_textures(cell);
-
-   /* Tell SPUs they can release the vert buf */
-   if (cvbr->vertex_buf != ~0U) {
-      STATIC_ASSERT(sizeof(struct cell_command_release_verts) % 16 == 0);
-      struct cell_command_release_verts *release
-         = (struct cell_command_release_verts *)
-         cell_batch_alloc16(cell, sizeof(struct cell_command_release_verts));
-      release->opcode[0] = CELL_CMD_RELEASE_VERTS;
-      release->vertex_buf = cvbr->vertex_buf;
-   }
-
-   cvbr->vertex_buf = ~0;
-   cell_flush_int(cell, 0x0);
-
-   cvbr->vertex_buffer = NULL;
-}
-
-
-static void *
-cell_vbuf_map_vertices(struct vbuf_render *vbr)
-{
-   struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr);
-   return cvbr->vertex_buffer;
-}
-
-
-static void 
-cell_vbuf_unmap_vertices(struct vbuf_render *vbr, 
-                         ushort min_index,
-                         ushort max_index )
-{
-   struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr);
-   assert( cvbr->vertex_buffer_size >= (max_index+1) * cvbr->vertex_size );
-   /* do nothing */
-}
-
-
-
-static boolean
-cell_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim)
-{
-   struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr);
-   cvbr->prim = prim;
-   /*printf("cell_set_prim %u\n", prim);*/
-   return TRUE;
-}
-
-
-static void
-cell_vbuf_draw_elements(struct vbuf_render *vbr,
-                        const ushort *indices,
-                        uint nr_indices)
-{
-   struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr);
-   struct cell_context *cell = cvbr->cell;
-   float xmin, ymin, xmax, ymax;
-   uint i;
-   uint nr_vertices = 0, min_index = ~0;
-   const void *vertices = cvbr->vertex_buffer;
-   const uint vertex_size = cvbr->vertex_size;
-
-   for (i = 0; i < nr_indices; i++) {
-      if (indices[i] > nr_vertices)
-         nr_vertices = indices[i];
-      if (indices[i] < min_index)
-         min_index = indices[i];
-   }
-   nr_vertices++;
-
-#if 0
-   /*if (min_index > 0)*/
-      printf("%s min_index = %u\n", __FUNCTION__, min_index);
-#endif
-
-#if 0
-   printf("cell_vbuf_draw() nr_indices = %u nr_verts = %u\n",
-          nr_indices, nr_vertices);
-   printf("  ");
-   for (i = 0; i < nr_indices; i += 3) {
-      printf("%u %u %u, ", indices[i+0], indices[i+1], indices[i+2]);
-   }
-   printf("\n");
-#elif 0
-   printf("cell_vbuf_draw() nr_indices = %u nr_verts = %u  indexes = [%u %u %u ...]\n",
-          nr_indices, nr_vertices,
-          indices[0], indices[1], indices[2]);
-   printf("ind space = %u, vert space = %u, space = %u\n",
-          nr_indices * 2,
-          nr_vertices * 4 * cell->vertex_info.size,
-          cell_batch_free_space(cell));
-#endif
-
-   /* compute x/y bounding box */
-   xmin = ymin = 1e50;
-   xmax = ymax = -1e50;
-   for (i = min_index; i < nr_vertices; i++) {
-      const float *v = (float *) ((ubyte *) vertices + i * vertex_size);
-      if (v[0] < xmin)
-         xmin = v[0];
-      if (v[0] > xmax)
-         xmax = v[0];
-      if (v[1] < ymin)
-         ymin = v[1];
-      if (v[1] > ymax)
-         ymax = v[1];
-   }
-#if 0
-   printf("PPU Bounds %g, %g .. %g, %g\n", xmin, ymin, xmax, ymax);
-   fflush(stdout);
-#endif
-
-   if (cvbr->prim != PIPE_PRIM_TRIANGLES)
-      return; /* only render tris for now */
-
-   /* build/insert batch RENDER command */
-   {
-      const uint index_bytes = ROUNDUP16(nr_indices * 2);
-      const uint vertex_bytes = ROUNDUP16(nr_vertices * 4 * cell->vertex_info.size);
-      STATIC_ASSERT(sizeof(struct cell_command_render) % 16 == 0);
-      const uint batch_size = sizeof(struct cell_command_render) + index_bytes;
-
-      struct cell_command_render *render
-         = (struct cell_command_render *)
-         cell_batch_alloc16(cell, batch_size);
-
-      render->opcode[0] = CELL_CMD_RENDER;
-      render->prim_type = cvbr->prim;
-
-      render->num_indexes = nr_indices;
-      render->min_index = min_index;
-
-      /* append indices after render command */
-      memcpy(render + 1, indices, nr_indices * 2);
-
-      /* if there's room, append vertices after the indices, else leave
-       * vertices in the original/separate buffer.
-       */
-      render->vertex_size = 4 * cell->vertex_info.size;
-      render->num_verts = nr_vertices;
-      if (ALLOW_INLINE_VERTS &&
-          min_index == 0 &&
-          vertex_bytes + 16 <= cell_batch_free_space(cell)) {
-         /* vertex data inlined, after indices, at 16-byte boundary */
-         void *dst = cell_batch_alloc16(cell, vertex_bytes);
-         memcpy(dst, vertices, vertex_bytes);
-         render->inline_verts = TRUE;
-         render->vertex_buf = ~0;
-      }
-      else {
-         /* vertex data in separate buffer */
-         render->inline_verts = FALSE;
-         ASSERT(cvbr->vertex_buf >= 0);
-         render->vertex_buf = cvbr->vertex_buf;
-      }
-
-      render->xmin = xmin;
-      render->ymin = ymin;
-      render->xmax = xmax;
-      render->ymax = ymax;
-   }
-
-#if 0
-   /* helpful for debug */
-   cell_flush_int(cell, CELL_FLUSH_WAIT);
-#endif
-}
-
-
-static void
-cell_vbuf_destroy(struct vbuf_render *vbr)
-{
-   struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr);
-   cvbr->cell->vbuf_render = NULL;
-   FREE(cvbr);
-}
-
-
-/**
- * Initialize the post-transform vertex buffer information for the given
- * context.
- */
-void
-cell_init_vbuf(struct cell_context *cell)
-{
-   assert(cell->draw);
-
-   cell->vbuf_render = CALLOC_STRUCT(cell_vbuf_render);
-
-   /* The max number of indexes is what can fix into a batch buffer,
-    * minus the render and release-verts commands.
-    */
-   cell->vbuf_render->base.max_indices
-      = (CELL_BUFFER_SIZE
-         - sizeof(struct cell_command_render)
-         - sizeof(struct cell_command_release_verts))
-      / sizeof(ushort);
-   cell->vbuf_render->base.max_vertex_buffer_bytes = CELL_BUFFER_SIZE;
-
-   cell->vbuf_render->base.get_vertex_info = cell_vbuf_get_vertex_info;
-   cell->vbuf_render->base.allocate_vertices = cell_vbuf_allocate_vertices;
-   cell->vbuf_render->base.map_vertices = cell_vbuf_map_vertices;
-   cell->vbuf_render->base.unmap_vertices = cell_vbuf_unmap_vertices;
-   cell->vbuf_render->base.set_primitive = cell_vbuf_set_primitive;
-   cell->vbuf_render->base.draw_elements = cell_vbuf_draw_elements;
-   cell->vbuf_render->base.release_vertices = cell_vbuf_release_vertices;
-   cell->vbuf_render->base.destroy = cell_vbuf_destroy;
-
-   cell->vbuf_render->cell = cell;
-#if 1
-   cell->vbuf_render->vertex_buf = ~0;
-#endif
-
-   cell->vbuf = draw_vbuf_stage(cell->draw, &cell->vbuf_render->base);
-}
diff --git a/src/gallium/drivers/cell/ppu/cell_vbuf.h b/src/gallium/drivers/cell/ppu/cell_vbuf.h
deleted file mode 100644
index d265cbf7701..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_vbuf.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#ifndef CELL_VBUF_H
-#define CELL_VBUF_H
-
-
-struct cell_context;
-
-extern void
-cell_init_vbuf(struct cell_context *cell);
-
-
-#endif /* CELL_VBUF_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c
deleted file mode 100644
index 9cba537d9eb..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c
+++ /dev/null
@@ -1,346 +0,0 @@
-/*
- * (C) Copyright IBM Corporation 2008
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
- * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include <inttypes.h>
-#include "pipe/p_defines.h"
-#include "pipe/p_context.h"
-#include "pipe/p_format.h"
-
-#include "../auxiliary/draw/draw_context.h"
-#include "../auxiliary/draw/draw_private.h"
-
-#include "cell_context.h"
-#include "rtasm/rtasm_ppc_spe.h"
-
-
-/**
- * Emit a 4x4 matrix transpose operation
- *
- * \param p         Function that the transpose operation is to be appended to
- * \param row0      Register containing row 0 of the source matrix
- * \param row1      Register containing row 1 of the source matrix
- * \param row2      Register containing row 2 of the source matrix
- * \param row3      Register containing row 3 of the source matrix
- * \param dest_ptr  Register containing the address of the destination matrix
- * \param shuf_ptr  Register containing the address of the shuffled data
- * \param count     Number of colums to actually be written to the destination
- *
- * \note
- * This function assumes that the registers named by \c row0, \c row1,
- * \c row2, and \c row3 are scratch and can be modified by the generated code.
- * Furthermore, these registers will be released, via calls to
- * \c release_register, by this function.
- * 
- * \note
- * This function requires that four temporary are available on entry.
- */
-static void
-emit_matrix_transpose(struct spe_function *p,
-		      unsigned row0, unsigned row1, unsigned row2,
-		      unsigned row3, unsigned dest_ptr,
-		      unsigned shuf_ptr, unsigned count)
-{
-   int shuf_hi = spe_allocate_available_register(p);
-   int shuf_lo = spe_allocate_available_register(p);
-   int t1 = spe_allocate_available_register(p);
-   int t2 = spe_allocate_available_register(p);
-   int t3;
-   int t4;
-   int col0;
-   int col1;
-   int col2;
-   int col3;
-
-
-   spe_lqd(p, shuf_hi, shuf_ptr, 3*16);
-   spe_lqd(p, shuf_lo, shuf_ptr, 4*16);
-   spe_shufb(p, t1, row0, row2, shuf_hi);
-   spe_shufb(p, t2, row0, row2, shuf_lo);
-
-
-   /* row0 and row2 are now no longer needed.  Re-use those registers as
-    * temporaries.
-    */
-   t3 = row0;
-   t4 = row2;
-
-   spe_shufb(p, t3, row1, row3, shuf_hi);
-   spe_shufb(p, t4, row1, row3, shuf_lo);
-
-
-   /* row1 and row3 are now no longer needed.  Re-use those registers as
-    * temporaries.
-    */
-   col0 = row1;
-   col1 = row3;
-
-   spe_shufb(p, col0, t1, t3, shuf_hi);
-   if (count > 1) {
-      spe_shufb(p, col1, t1, t3, shuf_lo);
-   }
-
-   /* t1 and t3 are now no longer needed.  Re-use those registers as
-    * temporaries.
-    */
-   col2 = t1;
-   col3 = t3;
-
-   if (count > 2) {
-      spe_shufb(p, col2, t2, t4, shuf_hi);
-   }
-
-   if (count > 3) {
-      spe_shufb(p, col3, t2, t4, shuf_lo);
-   }
-
-
-   /* Store the results.  Remember that the stqd instruction is encoded using
-    * the qword offset (stand-alone assemblers to the byte-offset to
-    * qword-offset conversion for you), so the byte-offset needs be divided by
-    * 16.
-    */
-   switch (count) {
-   case 4:
-      spe_stqd(p, col3, dest_ptr, 3 * 16);
-   case 3:
-      spe_stqd(p, col2, dest_ptr, 2 * 16);
-   case 2:
-      spe_stqd(p, col1, dest_ptr, 1 * 16);
-   case 1:
-      spe_stqd(p, col0, dest_ptr, 0 * 16);
-   }
-
-
-   /* Release all of the temporary registers used.
-    */
-   spe_release_register(p, col0);
-   spe_release_register(p, col1);
-   spe_release_register(p, col2);
-   spe_release_register(p, col3);
-   spe_release_register(p, shuf_hi);
-   spe_release_register(p, shuf_lo);
-   spe_release_register(p, t2);
-   spe_release_register(p, t4);
-}
-
-
-#if 0
-/* This appears to not be used currently */
-static void
-emit_fetch(struct spe_function *p,
-	   unsigned in_ptr, unsigned *offset,
-	   unsigned out_ptr, unsigned shuf_ptr,
-	   enum pipe_format format)
-{
-   const unsigned count = (pf_size_x(format) != 0) + (pf_size_y(format) != 0)
-       + (pf_size_z(format) != 0) + (pf_size_w(format) != 0);
-   const unsigned type = pf_type(format);
-   const unsigned bytes = pf_size_x(format);
-
-   int v0 = spe_allocate_available_register(p);
-   int v1 = spe_allocate_available_register(p);
-   int v2 = spe_allocate_available_register(p);
-   int v3 = spe_allocate_available_register(p);
-   int tmp = spe_allocate_available_register(p);
-   int float_zero = -1;
-   int float_one = -1;
-   float scale_signed = 0.0;
-   float scale_unsigned = 0.0;
-
-   spe_lqd(p, v0, in_ptr, (0 + offset[0]) * 16);
-   spe_lqd(p, v1, in_ptr, (1 + offset[0]) * 16);
-   spe_lqd(p, v2, in_ptr, (2 + offset[0]) * 16);
-   spe_lqd(p, v3, in_ptr, (3 + offset[0]) * 16);
-   offset[0] += 4;
-   
-   switch (bytes) {
-   case 1:
-      scale_signed = 1.0f / 127.0f;
-      scale_unsigned = 1.0f / 255.0f;
-      spe_lqd(p, tmp, shuf_ptr, 1 * 16);
-      spe_shufb(p, v0, v0, v0, tmp);
-      spe_shufb(p, v1, v1, v1, tmp);
-      spe_shufb(p, v2, v2, v2, tmp);
-      spe_shufb(p, v3, v3, v3, tmp);
-      break;
-   case 2:
-      scale_signed = 1.0f / 32767.0f;
-      scale_unsigned = 1.0f / 65535.0f;
-      spe_lqd(p, tmp, shuf_ptr, 2 * 16);
-      spe_shufb(p, v0, v0, v0, tmp);
-      spe_shufb(p, v1, v1, v1, tmp);
-      spe_shufb(p, v2, v2, v2, tmp);
-      spe_shufb(p, v3, v3, v3, tmp);
-      break;
-   case 4:
-      scale_signed = 1.0f / 2147483647.0f;
-      scale_unsigned = 1.0f / 4294967295.0f;
-      break;
-   default:
-      assert(0);
-      break;
-   }
-
-   switch (type) {
-   case PIPE_FORMAT_TYPE_FLOAT:
-      break;
-   case PIPE_FORMAT_TYPE_UNORM:
-      spe_ilhu(p, tmp, ((unsigned) scale_unsigned) >> 16);
-      spe_iohl(p, tmp, ((unsigned) scale_unsigned) & 0x0ffff);
-      spe_cuflt(p, v0, v0, 0);
-      spe_fm(p, v0, v0, tmp);
-      break;
-   case PIPE_FORMAT_TYPE_SNORM:
-      spe_ilhu(p, tmp, ((unsigned) scale_signed) >> 16);
-      spe_iohl(p, tmp, ((unsigned) scale_signed) & 0x0ffff);
-      spe_csflt(p, v0, v0, 0);
-      spe_fm(p, v0, v0, tmp);
-      break;
-   case PIPE_FORMAT_TYPE_USCALED:
-      spe_cuflt(p, v0, v0, 0);
-      break;
-   case PIPE_FORMAT_TYPE_SSCALED:
-      spe_csflt(p, v0, v0, 0);
-      break;
-   }
-
-
-   if (count < 4) {
-      float_one = spe_allocate_available_register(p);
-      spe_il(p, float_one, 1);
-      spe_cuflt(p, float_one, float_one, 0);
-      
-      if (count < 3) {
-	 float_zero = spe_allocate_available_register(p);
-	 spe_il(p, float_zero, 0);
-      }
-   }
-
-   spe_release_register(p, tmp);
-
-   emit_matrix_transpose(p, v0, v1, v2, v3, out_ptr, shuf_ptr, count);
-
-   switch (count) {
-   case 1:
-      spe_stqd(p, float_zero, out_ptr, 1 * 16);
-   case 2:
-      spe_stqd(p, float_zero, out_ptr, 2 * 16);
-   case 3:
-      spe_stqd(p, float_one, out_ptr, 3 * 16);
-   }
-
-   if (float_zero != -1) {
-      spe_release_register(p, float_zero);
-   }
-
-   if (float_one != -1) {
-      spe_release_register(p, float_one);
-   }
-}
-#endif
-
-
-void cell_update_vertex_fetch(struct draw_context *draw)
-{
-#if 0
-   struct cell_context *const cell =
-       (struct cell_context *) draw->driver_private;
-   struct spe_function *p = &cell->attrib_fetch;
-   unsigned function_index[PIPE_MAX_ATTRIBS];
-   unsigned unique_attr_formats;
-   int out_ptr;
-   int in_ptr;
-   int shuf_ptr;
-   unsigned i;
-   unsigned j;
-
-
-   /* Determine how many unique input attribute formats there are.  At the
-    * same time, store the index of the lowest numbered attribute that has
-    * the same format as any non-unique format.
-    */
-   unique_attr_formats = 1;
-   function_index[0] = 0;
-   for (i = 1; i < draw->vertex_fetch.nr_attrs; i++) {
-      const enum pipe_format curr_fmt = draw->vertex_element[i].src_format;
-
-      for (j = 0; j < i; j++) {
-	 if (curr_fmt == draw->vertex_element[j].src_format) {
-	    break;
-	 }
-      }
-      
-      if (j == i) {
-	 unique_attr_formats++;
-      }
-
-      function_index[i] = j;
-   }
-
-
-   /* Each fetch function can be a maximum of 34 instructions (note: this is
-    * actually a slight over-estimate).
-    */
-   spe_init_func(p, 34 * SPE_INST_SIZE * unique_attr_formats);
-
-
-   /* Allocate registers for the function's input parameters.
-    */
-   out_ptr = spe_allocate_register(p, 3);
-   in_ptr = spe_allocate_register(p, 4);
-   shuf_ptr = spe_allocate_register(p, 5);
-
-
-   /* Generate code for the individual attribute fetch functions.
-    */
-   for (i = 0; i < draw->vertex_fetch.nr_attrs; i++) {
-      unsigned offset;
-
-      if (function_index[i] == i) {
-	 cell->attrib_fetch_offsets[i] = (unsigned) ((void *) p->csr 
-						     - (void *) p->store);
-
-	 offset = 0;
-	 emit_fetch(p, in_ptr, &offset, out_ptr, shuf_ptr,
-		    draw->vertex_element[i].src_format);
-	 spe_bi(p, 0, 0, 0);
-
-	 /* Round up to the next 16-byte boundary.
-	  */
-	 if ((((unsigned) p->store) & 0x0f) != 0) {
-	    const unsigned align = ((unsigned) p->store) & 0x0f;
-	    p->store = (uint32_t *) (((void *) p->store) + align);
-	 }
-      } else {
-	 /* Use the same function entry-point as a previously seen attribute
-	  * with the same format.
-	  */
-	 cell->attrib_fetch_offsets[i] = 
-	     cell->attrib_fetch_offsets[function_index[i]];
-      }
-   }
-#else
-   assert(0);
-#endif
-}
diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c
deleted file mode 100644
index 3d389d6ea36..00000000000
--- a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * (C) Copyright IBM Corporation 2008
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
- * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * \file cell_vertex_shader.c
- * Vertex shader interface routines for Cell.
- *
- * \author Ian Romanick <idr@us.ibm.com>
- */
-
-#include "pipe/p_defines.h"
-#include "pipe/p_context.h"
-#include "util/u_math.h"
-
-#include "cell_context.h"
-#include "cell_draw_arrays.h"
-#include "cell_flush.h"
-#include "cell_spu.h"
-#include "cell_batch.h"
-
-#include "cell/common.h"
-#include "draw/draw_context.h"
-#include "draw/draw_private.h"
-
-/**
- * Run the vertex shader on all vertices in the vertex queue.
- * Called by the draw module when the vertx cache needs to be flushed.
- */
-void
-cell_vertex_shader_queue_flush(struct draw_context *draw)
-{
-#if 0
-   struct cell_context *const cell =
-       (struct cell_context *) draw->driver_private;
-   struct cell_command_vs *const vs = &cell_global.command[0].vs;
-   uint64_t *batch;
-   struct cell_array_info *array_info;
-   unsigned i, j;
-   struct cell_attribute_fetch_code *cf;
-
-   assert(draw->vs.queue_nr != 0);
-
-   /* XXX: do this on statechange: 
-    */
-   draw_update_vertex_fetch(draw);
-   cell_update_vertex_fetch(draw);
-
-
-   batch = cell_batch_alloc(cell, sizeof(batch[0]) + sizeof(*cf));
-   batch[0] = CELL_CMD_STATE_ATTRIB_FETCH;
-   cf = (struct cell_attribute_fetch_code *) (&batch[1]);
-   cf->base = (uint64_t) cell->attrib_fetch.store;
-   cf->size = ROUNDUP16((unsigned)((void *) cell->attrib_fetch.csr 
-				   - (void *) cell->attrib_fetch.store));
-
-
-   for (i = 0; i < draw->vertex_fetch.nr_attrs; i++) {
-      const enum pipe_format format = draw->vertex_element[i].src_format;
-      const unsigned count = ((pf_size_x(format) != 0)
-			      + (pf_size_y(format) != 0)
-			      + (pf_size_z(format) != 0)
-			      + (pf_size_w(format) != 0));
-      const unsigned size = pf_size_x(format) * count;
-
-      batch = cell_batch_alloc(cell, sizeof(batch[0]) + sizeof(*array_info));
-
-      batch[0] = CELL_CMD_STATE_VS_ARRAY_INFO;
-
-      array_info = (struct cell_array_info *) &batch[1];
-      assert(draw->vertex_fetch.src_ptr[i] != NULL);
-      array_info->base = (uintptr_t) draw->vertex_fetch.src_ptr[i];
-      array_info->attr = i;
-      array_info->pitch = draw->vertex_fetch.pitch[i];
-      array_info->size = size;
-      array_info->function_offset = cell->attrib_fetch_offsets[i];
-   }
-
-   batch = cell_batch_alloc(cell, sizeof(batch[0])
-                            + sizeof(struct pipe_viewport_state));
-   batch[0] = CELL_CMD_STATE_VIEWPORT;
-   (void) memcpy(&batch[1], &draw->viewport,
-                 sizeof(struct pipe_viewport_state));
-
-   {
-      uint64_t uniforms = (uintptr_t) draw->user.constants;
-
-      batch = cell_batch_alloc(cell, 2 *sizeof(batch[0]));
-      batch[0] = CELL_CMD_STATE_UNIFORMS;
-      batch[1] = uniforms;
-   }
-
-   cell_batch_flush(cell);
-
-   vs->opcode = CELL_CMD_VS_EXECUTE;
-   vs->nr_attrs = draw->vertex_fetch.nr_attrs;
-
-   (void) memcpy(vs->plane, draw->plane, sizeof(draw->plane));
-   vs->nr_planes = draw->nr_planes;
-
-   for (i = 0; i < draw->vs.queue_nr; i += SPU_VERTS_PER_BATCH) {
-      const unsigned n = MIN2(SPU_VERTS_PER_BATCH, draw->vs.queue_nr - i);
-
-      for (j = 0; j < n; j++) {
-         vs->elts[j] = draw->vs.queue[i + j].elt;
-         vs->vOut[j] = (uintptr_t) draw->vs.queue[i + j].vertex;
-      }
-
-      for (/* empty */; j < SPU_VERTS_PER_BATCH; j++) {
-         vs->elts[j] = vs->elts[0];
-         vs->vOut[j] = (uintptr_t) draw->vs.queue[i + j].vertex;
-      }
-
-      vs->num_elts = n;
-      send_mbox_message(cell_global.spe_contexts[0], CELL_CMD_VS_EXECUTE);
-
-      cell_flush_int(cell, CELL_FLUSH_WAIT);
-   }
-
-   draw->vs.post_nr = draw->vs.queue_nr;
-   draw->vs.queue_nr = 0;
-#else
-   assert(0);
-#endif
-}
diff --git a/src/gallium/drivers/cell/spu/.gitignore b/src/gallium/drivers/cell/spu/.gitignore
deleted file mode 100644
index 2be9a2d3242..00000000000
--- a/src/gallium/drivers/cell/spu/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-g3d_spu
diff --git a/src/gallium/drivers/cell/spu/Makefile b/src/gallium/drivers/cell/spu/Makefile
deleted file mode 100644
index 3cc52301da2..00000000000
--- a/src/gallium/drivers/cell/spu/Makefile
+++ /dev/null
@@ -1,83 +0,0 @@
-# Gallium3D Cell driver: SPU code
-
-# This makefile builds the g3d_spu.a file that's linked into the
-# PPU code/library.
-
-
-TOP = ../../../../..
-include $(TOP)/configs/current
-
-
-PROG = g3d
-
-PROG_SPU = $(PROG)_spu
-PROG_SPU_A = $(PROG)_spu.a
-PROG_SPU_EMBED_O = $(PROG)_spu-embed.o
-
-
-SOURCES = \
-	spu_command.c \
-	spu_dcache.c \
-	spu_funcs.c \
-	spu_main.c \
-	spu_per_fragment_op.c \
-	spu_render.c \
-	spu_texture.c \
-	spu_tile.c \
-	spu_tri.c
-
-OLD_SOURCES = \
-	spu_exec.c \
-	spu_util.c \
-	spu_vertex_fetch.c \
-	spu_vertex_shader.c
-
-
-SPU_OBJECTS = $(SOURCES:.c=.o)
-
-SPU_ASM_OUT = $(SOURCES:.c=.s)
-
-
-INCLUDE_DIRS = \
-	-I$(TOP)/src/mesa \
-	-I$(TOP)/src/gallium/include \
-	-I$(TOP)/src/gallium/auxiliary \
-	-I$(TOP)/src/gallium/drivers
-
-
-.c.o:
-	$(SPU_CC) $(SPU_CFLAGS) -c $<
-
-.c.s:
-	$(SPU_CC) $(SPU_CFLAGS) -O3 -S $<
-
-
-# The .a file will be linked into the main/PPU executable
-default: $(PROG_SPU_A)
-
-$(PROG_SPU_A): $(PROG_SPU_EMBED_O)
-	$(SPU_AR) $(SPU_AR_FLAGS) $(PROG_SPU_A) $(PROG_SPU_EMBED_O)
-
-$(PROG_SPU_EMBED_O): $(PROG_SPU)
-	$(SPU_EMBED) $(SPU_EMBED_FLAGS) $(PROG_SPU) $(PROG_SPU) $(PROG_SPU_EMBED_O)
-
-$(PROG_SPU): $(SPU_OBJECTS)
-	$(SPU_CC) -o $(PROG_SPU) $(SPU_OBJECTS) $(SPU_LFLAGS)
-
-
-
-asmfiles: $(SPU_ASM_OUT)
-
-
-clean:
-	rm -f *~ *.o *.a *.d *.s $(PROG_SPU)
-
-
-
-depend: $(SOURCES)
-	rm -f depend
-	touch depend
-	$(MKDEP) $(MKDEP_OPTIONS) $(INCLUDE_DIRS) $(SOURCES) 2> /dev/null
-
-include depend
-
diff --git a/src/gallium/drivers/cell/spu/spu_colorpack.h b/src/gallium/drivers/cell/spu/spu_colorpack.h
deleted file mode 100644
index d7ce0055248..00000000000
--- a/src/gallium/drivers/cell/spu/spu_colorpack.h
+++ /dev/null
@@ -1,145 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-
-
-#ifndef SPU_COLORPACK_H
-#define SPU_COLORPACK_H
-
-
-#include <transpose_matrix4x4.h>
-#include <spu_intrinsics.h>
-
-
-static INLINE unsigned int
-spu_pack_R8G8B8A8(vector float rgba)
-{
-  vector unsigned int out = spu_convtu(rgba, 32);
-
-  out = spu_shuffle(out, out, ((vector unsigned char) {
-                                  0, 4, 8, 12, 0, 0, 0, 0, 
-                                  0, 0, 0, 0, 0, 0, 0, 0 }) );
-
-  return spu_extract(out, 0);
-}
-
-
-static INLINE unsigned int
-spu_pack_A8R8G8B8(vector float rgba)
-{
-  vector unsigned int out = spu_convtu(rgba, 32);
-  out = spu_shuffle(out, out, ((vector unsigned char) {
-                                  12, 0, 4, 8, 0, 0, 0, 0, 
-                                  0, 0, 0, 0, 0, 0, 0, 0}) );
-  return spu_extract(out, 0);
-}
-
-
-static INLINE unsigned int
-spu_pack_B8G8R8A8(vector float rgba)
-{
-  vector unsigned int out = spu_convtu(rgba, 32);
-  out = spu_shuffle(out, out, ((vector unsigned char) {
-                                  8, 4, 0, 12, 0, 0, 0, 0, 
-                                  0, 0, 0, 0, 0, 0, 0, 0}) );
-  return spu_extract(out, 0);
-}
-
-
-static INLINE unsigned int
-spu_pack_color_shuffle(vector float rgba, vector unsigned char shuffle)
-{
-  vector unsigned int out = spu_convtu(rgba, 32);
-  out = spu_shuffle(out, out, shuffle);
-  return spu_extract(out, 0);
-}
-
-
-static INLINE vector float
-spu_unpack_B8G8R8A8(uint color)
-{
-   vector unsigned int color_u4 = spu_splats(color);
-   color_u4 = spu_shuffle(color_u4, color_u4,
-                          ((vector unsigned char) {
-                             2, 2, 2, 2,
-                             1, 1, 1, 1,
-                             0, 0, 0, 0,
-                             3, 3, 3, 3}) );
-   return spu_convtf(color_u4, 32);
-}
-
-
-static INLINE vector float
-spu_unpack_A8R8G8B8(uint color)
-{
-   vector unsigned int color_u4 = spu_splats(color);
-   color_u4 = spu_shuffle(color_u4, color_u4,
-                          ((vector unsigned char) {
-                             1, 1, 1, 1,
-                             2, 2, 2, 2,
-                             3, 3, 3, 3,
-                             0, 0, 0, 0}) );
-   return spu_convtf(color_u4, 32);
-}
-
-
-/**
- * \param color_in - array of 32-bit packed ARGB colors
- * \param color_out - returns float colors in RRRR, GGGG, BBBB, AAAA order
- */
-static INLINE void
-spu_unpack_A8R8G8B8_transpose4(const vector unsigned int color_in[4],
-                               vector float color_out[4])
-{
-   vector unsigned int c0;
-
-   c0 = spu_shuffle(color_in[0], color_in[0],
-                    ((vector unsigned char) {
-                       1, 1, 1, 1,  2, 2, 2, 2,  3, 3, 3, 3,  0, 0, 0, 0}) );
-   color_out[0] = spu_convtf(c0, 32);
-
-   c0 = spu_shuffle(color_in[1], color_in[1],
-                    ((vector unsigned char) {
-                       1, 1, 1, 1,  2, 2, 2, 2,  3, 3, 3, 3,  0, 0, 0, 0}) );
-   color_out[1] = spu_convtf(c0, 32);
-
-   c0 = spu_shuffle(color_in[2], color_in[2],
-                    ((vector unsigned char) {
-                       1, 1, 1, 1,  2, 2, 2, 2,  3, 3, 3, 3,  0, 0, 0, 0}) );
-   color_out[2] = spu_convtf(c0, 32);
-
-   c0 = spu_shuffle(color_in[3], color_in[3],
-                    ((vector unsigned char) {
-                       1, 1, 1, 1,  2, 2, 2, 2,  3, 3, 3, 3,  0, 0, 0, 0}) );
-   color_out[3] = spu_convtf(c0, 32);
-
-   _transpose_matrix4x4(color_out, color_out);
-}
-
-
-
-#endif /* SPU_COLORPACK_H */
diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c
deleted file mode 100644
index 6f8ba9562d2..00000000000
--- a/src/gallium/drivers/cell/spu/spu_command.c
+++ /dev/null
@@ -1,810 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-
-/**
- * SPU command processing code
- */
-
-
-#include <stdio.h>
-#include <libmisc.h>
-
-#include "pipe/p_defines.h"
-
-#include "spu_command.h"
-#include "spu_main.h"
-#include "spu_render.h"
-#include "spu_per_fragment_op.h"
-#include "spu_texture.h"
-#include "spu_tile.h"
-#include "spu_vertex_shader.h"
-#include "spu_dcache.h"
-#include "cell/common.h"
-
-
-struct spu_vs_context draw;
-
-
-/**
- * Buffers containing dynamically generated SPU code:
- */
-PIPE_ALIGN_VAR(16) static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS];
-
-
-
-static INLINE int
-align(int value, int alignment)
-{
-   return (value + alignment - 1) & ~(alignment - 1);
-}
-
-
-
-/**
- * Tell the PPU that this SPU has finished copying a buffer to
- * local store and that it may be reused by the PPU.
- * This is done by writting a 16-byte batch-buffer-status block back into
- * main memory (in cell_context->buffer_status[]).
- */
-static void
-release_buffer(uint buffer)
-{
-   /* Evidently, using less than a 16-byte status doesn't work reliably */
-   static const vector unsigned int status = {CELL_BUFFER_STATUS_FREE,
-                                              CELL_BUFFER_STATUS_FREE,
-                                              CELL_BUFFER_STATUS_FREE,
-                                              CELL_BUFFER_STATUS_FREE};
-   const uint index = 4 * (spu.init.id * CELL_NUM_BUFFERS + buffer);
-   uint *dst = spu.init.buffer_status + index;
-
-   ASSERT(buffer < CELL_NUM_BUFFERS);
-
-   mfc_put((void *) &status,    /* src in local memory */
-           (unsigned int) dst,  /* dst in main memory */
-           sizeof(status),      /* size */
-           TAG_MISC,            /* tag is unimportant */
-           0, /* tid */
-           0  /* rid */);
-}
-
-
-/**
- * Write CELL_FENCE_SIGNALLED back to the fence status qword in main memory.
- * There's a qword of status per SPU.
- */
-static void
-cmd_fence(struct cell_command_fence *fence_cmd)
-{
-   static const vector unsigned int status = {CELL_FENCE_SIGNALLED,
-                                              CELL_FENCE_SIGNALLED,
-                                              CELL_FENCE_SIGNALLED,
-                                              CELL_FENCE_SIGNALLED};
-   uint *dst = (uint *) fence_cmd->fence;
-   dst += 4 * spu.init.id;  /* main store/memory address, not local store */
-   ASSERT_ALIGN16(dst);
-   mfc_put((void *) &status,    /* src in local memory */
-           (unsigned int) dst,  /* dst in main memory */
-           sizeof(status),      /* size */
-           TAG_FENCE,           /* tag */
-           0, /* tid */
-           0  /* rid */);
-}
-
-
-static void
-cmd_clear_surface(const struct cell_command_clear_surface *clear)
-{
-   D_PRINTF(CELL_DEBUG_CMD, "CLEAR SURF %u to 0x%08x\n", clear->surface, clear->value);
-
-   if (clear->surface == 0) {
-      spu.fb.color_clear_value = clear->value;
-      if (spu.init.debug_flags & CELL_DEBUG_CHECKER) {
-         uint x = (spu.init.id << 4) | (spu.init.id << 12) |
-            (spu.init.id << 20) | (spu.init.id << 28);
-         spu.fb.color_clear_value ^= x;
-      }
-   }
-   else {
-      spu.fb.depth_clear_value = clear->value;
-   }
-
-#define CLEAR_OPT 1
-#if CLEAR_OPT
-
-   /* Simply set all tiles' status to CLEAR.
-    * When we actually begin rendering into a tile, we'll initialize it to
-    * the clear value.  If any tiles go untouched during the frame,
-    * really_clear_tiles() will set them to the clear value.
-    */
-   if (clear->surface == 0) {
-      memset(spu.ctile_status, TILE_STATUS_CLEAR, sizeof(spu.ctile_status));
-   }
-   else {
-      memset(spu.ztile_status, TILE_STATUS_CLEAR, sizeof(spu.ztile_status));
-   }
-
-#else
-
-   /*
-    * This path clears the whole framebuffer to the clear color right now.
-    */
-
-   /*
-   printf("SPU: %s num=%d w=%d h=%d\n",
-          __FUNCTION__, num_tiles, spu.fb.width_tiles, spu.fb.height_tiles);
-   */
-
-   /* init a single tile to the clear value */
-   if (clear->surface == 0) {
-      clear_c_tile(&spu.ctile);
-   }
-   else {
-      clear_z_tile(&spu.ztile);
-   }
-
-   /* walk over my tiles, writing the 'clear' tile's data */
-   {
-      const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles;
-      uint i;
-      for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) {
-         uint tx = i % spu.fb.width_tiles;
-         uint ty = i / spu.fb.width_tiles;
-         if (clear->surface == 0)
-            put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0);
-         else
-            put_tile(tx, ty, &spu.ztile, TAG_SURFACE_CLEAR, 1);
-      }
-   }
-
-   if (spu.init.debug_flags & CELL_DEBUG_SYNC) {
-      wait_on_mask(1 << TAG_SURFACE_CLEAR);
-   }
-
-#endif /* CLEAR_OPT */
-
-   D_PRINTF(CELL_DEBUG_CMD, "CLEAR SURF done\n");
-}
-
-
-static void
-cmd_release_verts(const struct cell_command_release_verts *release)
-{
-   D_PRINTF(CELL_DEBUG_CMD, "RELEASE VERTS %u\n", release->vertex_buf);
-   ASSERT(release->vertex_buf != ~0U);
-   release_buffer(release->vertex_buf);
-}
-
-
-/**
- * Process a CELL_CMD_STATE_FRAGMENT_OPS command.
- * This involves installing new fragment ops SPU code.
- * If this function is never called, we'll use a regular C fallback function
- * for fragment processing.
- */
-static void
-cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops)
-{
-   D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FRAGMENT_OPS\n");
-
-   /* Copy state info (for fallback case only - this will eventually
-    * go away when the fallback case goes away)
-    */
-   memcpy(&spu.depth_stencil_alpha, &fops->dsa, sizeof(fops->dsa));
-   memcpy(&spu.blend, &fops->blend, sizeof(fops->blend));
-   memcpy(&spu.blend_color, &fops->blend_color, sizeof(fops->blend_color));
-
-   /* Make sure the SPU knows which buffers it's expected to read when
-    * it's told to pull tiles.
-    */
-   spu.read_depth_stencil = (spu.depth_stencil_alpha.depth.enabled || spu.depth_stencil_alpha.stencil[0].enabled);
-
-   /* If we're forcing the fallback code to be used (for debug purposes),
-    * install that.  Otherwise install the incoming SPU code.
-    */
-   if ((spu.init.debug_flags & CELL_DEBUG_FRAGMENT_OP_FALLBACK) != 0) {
-      static unsigned int warned = 0;
-      if (!warned) {
-         fprintf(stderr, "Cell Warning: using fallback per-fragment code\n");
-         warned = 1;
-      }
-      /* The following two lines aren't really necessary if you
-       * know the debug flags won't change during a run, and if you
-       * know that the function pointers are initialized correctly.
-       * We set them here to allow a person to change the debug
-       * flags during a run (from inside a debugger).
-       */
-      spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops;
-      spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops;
-      return;
-   }
-
-   /* Make sure the SPU code buffer is large enough to hold the incoming code.
-    * Note that we *don't* use align_malloc() and align_free(), because
-    * those utility functions are *not* available in SPU code.
-    * */
-   if (spu.fragment_ops_code_size < fops->total_code_size) {
-      if (spu.fragment_ops_code != NULL) {
-         free(spu.fragment_ops_code);
-      }
-      spu.fragment_ops_code_size = fops->total_code_size;
-      spu.fragment_ops_code = malloc(fops->total_code_size);
-      if (spu.fragment_ops_code == NULL) {
-         /* Whoops. */
-         fprintf(stderr, "CELL Warning: failed to allocate fragment ops code (%d bytes) - using fallback\n", fops->total_code_size);
-         spu.fragment_ops_code = NULL;
-         spu.fragment_ops_code_size = 0;
-         spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops;
-         spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops;
-         return;
-      }
-   }
-
-   /* Copy the SPU code from the command buffer to the spu buffer */
-   memcpy(spu.fragment_ops_code, fops->code, fops->total_code_size);
-
-   /* Set the pointers for the front-facing and back-facing fragments
-    * to the specified offsets within the code.  Note that if the
-    * front-facing and back-facing code are the same, they'll have
-    * the same offset.
-    */
-   spu.fragment_ops[CELL_FACING_FRONT] = (spu_fragment_ops_func) &spu.fragment_ops_code[fops->front_code_index];
-   spu.fragment_ops[CELL_FACING_BACK] = (spu_fragment_ops_func) &spu.fragment_ops_code[fops->back_code_index];
-}
-
-static void
-cmd_state_fragment_program(const struct cell_command_fragment_program *fp)
-{
-   D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FRAGMENT_PROGRAM\n");
-   /* Copy SPU code from batch buffer to spu buffer */
-   memcpy(spu.fragment_program_code, fp->code,
-          SPU_MAX_FRAGMENT_PROGRAM_INSTS * 4);
-#if 01
-   /* Point function pointer at new code */
-   spu.fragment_program = (spu_fragment_program_func)spu.fragment_program_code;
-#endif
-}
-
-
-static uint
-cmd_state_fs_constants(const qword *buffer, uint pos)
-{
-   const uint num_const = spu_extract((vector unsigned int)buffer[pos+1], 0);
-   const float *constants = (const float *) &buffer[pos+2];
-   uint i;
-
-   D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FS_CONSTANTS (%u)\n", num_const);
-
-   /* Expand each float to float[4] for SOA execution */
-   for (i = 0; i < num_const; i++) {
-      D_PRINTF(CELL_DEBUG_CMD, "  const[%u] = %f\n", i, constants[i]);
-      spu.constants[i] = spu_splats(constants[i]);
-   }
-
-   /* return new buffer pos (in 16-byte words) */
-   return pos + 2 + (ROUNDUP16(num_const * sizeof(float)) / 16);
-}
-
-
-static void
-cmd_state_framebuffer(const struct cell_command_framebuffer *cmd)
-{
-   D_PRINTF(CELL_DEBUG_CMD, "FRAMEBUFFER: %d x %d at %p, cformat 0x%x  zformat 0x%x\n",
-             cmd->width,
-             cmd->height,
-             cmd->color_start,
-             cmd->color_format,
-             cmd->depth_format);
-
-   ASSERT_ALIGN16(cmd->color_start);
-   ASSERT_ALIGN16(cmd->depth_start);
-
-   spu.fb.color_start = cmd->color_start;
-   spu.fb.depth_start = cmd->depth_start;
-   spu.fb.color_format = cmd->color_format;
-   spu.fb.depth_format = cmd->depth_format;
-   spu.fb.width = cmd->width;
-   spu.fb.height = cmd->height;
-   spu.fb.width_tiles = (spu.fb.width + TILE_SIZE - 1) / TILE_SIZE;
-   spu.fb.height_tiles = (spu.fb.height + TILE_SIZE - 1) / TILE_SIZE;
-
-   switch (spu.fb.depth_format) {
-   case PIPE_FORMAT_Z32_UNORM:
-      spu.fb.zsize = 4;
-      spu.fb.zscale = (float) 0xffffffffu;
-      break;
-   case PIPE_FORMAT_S8_UINT_Z24_UNORM:
-   case PIPE_FORMAT_Z24_UNORM_S8_UINT:
-   case PIPE_FORMAT_X8Z24_UNORM:
-   case PIPE_FORMAT_Z24X8_UNORM:
-      spu.fb.zsize = 4;
-      spu.fb.zscale = (float) 0x00ffffffu;
-      break;
-   case PIPE_FORMAT_Z16_UNORM:
-      spu.fb.zsize = 2;
-      spu.fb.zscale = (float) 0xffffu;
-      break;
-   default:
-      spu.fb.zsize = 0;
-      break;
-   }
-}
-
-
-/**
- * Tex texture mask_s/t and scale_s/t fields depend on the texture size and
- * sampler wrap modes.
- */
-static void
-update_tex_masks(struct spu_texture *texture,
-                 const struct pipe_sampler_state *sampler)
-{
-   uint i;
-
-   for (i = 0; i < CELL_MAX_TEXTURE_LEVELS; i++) {
-      int width = texture->level[i].width;
-      int height = texture->level[i].height;
-
-      if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT)
-         texture->level[i].mask_s = spu_splats(width - 1);
-      else
-         texture->level[i].mask_s = spu_splats(~0);
-
-      if (sampler->wrap_t == PIPE_TEX_WRAP_REPEAT)
-         texture->level[i].mask_t = spu_splats(height - 1);
-      else
-         texture->level[i].mask_t = spu_splats(~0);
-
-      if (sampler->normalized_coords) {
-         texture->level[i].scale_s = spu_splats((float) width);
-         texture->level[i].scale_t = spu_splats((float) height);
-      }
-      else {
-         texture->level[i].scale_s = spu_splats(1.0f);
-         texture->level[i].scale_t = spu_splats(1.0f);
-      }
-   }
-}
-
-
-static void
-cmd_state_sampler(const struct cell_command_sampler *sampler)
-{
-   uint unit = sampler->unit;
-
-   D_PRINTF(CELL_DEBUG_CMD, "SAMPLER [%u]\n", unit);
-
-   spu.sampler[unit] = sampler->state;
-
-   switch (spu.sampler[unit].min_img_filter) {
-   case PIPE_TEX_FILTER_LINEAR:
-      spu.min_sample_texture_2d[unit] = sample_texture_2d_bilinear;
-      break;
-   case PIPE_TEX_FILTER_NEAREST:
-      spu.min_sample_texture_2d[unit] = sample_texture_2d_nearest;
-      break;
-   default:
-      ASSERT(0);
-   }
-
-   switch (spu.sampler[sampler->unit].mag_img_filter) {
-   case PIPE_TEX_FILTER_LINEAR:
-      spu.mag_sample_texture_2d[unit] = sample_texture_2d_bilinear;
-      break;
-   case PIPE_TEX_FILTER_NEAREST:
-      spu.mag_sample_texture_2d[unit] = sample_texture_2d_nearest;
-      break;
-   default:
-      ASSERT(0);
-   }
-
-   switch (spu.sampler[sampler->unit].min_mip_filter) {
-   case PIPE_TEX_MIPFILTER_NEAREST:
-   case PIPE_TEX_MIPFILTER_LINEAR:
-      spu.sample_texture_2d[unit] = sample_texture_2d_lod;
-      break;
-   case PIPE_TEX_MIPFILTER_NONE:
-      spu.sample_texture_2d[unit] = spu.mag_sample_texture_2d[unit];
-      break;
-   default:
-      ASSERT(0);
-   }
-
-   update_tex_masks(&spu.texture[unit], &spu.sampler[unit]);
-}
-
-
-static void
-cmd_state_texture(const struct cell_command_texture *texture)
-{
-   const uint unit = texture->unit;
-   uint i;
-
-   D_PRINTF(CELL_DEBUG_CMD, "TEXTURE [%u]\n", texture->unit);
-
-   spu.texture[unit].max_level = 0;
-   spu.texture[unit].target = texture->target;
-
-   for (i = 0; i < CELL_MAX_TEXTURE_LEVELS; i++) {
-      uint width = texture->width[i];
-      uint height = texture->height[i];
-      uint depth = texture->depth[i];
-
-      D_PRINTF(CELL_DEBUG_CMD, "  LEVEL %u: at %p  size[0] %u x %u\n", i,
-             texture->start[i], texture->width[i], texture->height[i]);
-
-      spu.texture[unit].level[i].start = texture->start[i];
-      spu.texture[unit].level[i].width = width;
-      spu.texture[unit].level[i].height = height;
-      spu.texture[unit].level[i].depth = depth;
-
-      spu.texture[unit].level[i].tiles_per_row =
-         (width + TILE_SIZE - 1) / TILE_SIZE;
-
-      spu.texture[unit].level[i].bytes_per_image =
-         4 * align(width, TILE_SIZE) * align(height, TILE_SIZE) * depth;
-
-      spu.texture[unit].level[i].max_s = spu_splats((int) width - 1);
-      spu.texture[unit].level[i].max_t = spu_splats((int) height - 1);
-
-      if (texture->start[i])
-         spu.texture[unit].max_level = i;
-   }
-
-   update_tex_masks(&spu.texture[unit], &spu.sampler[unit]);
-}
-
-
-static void
-cmd_state_vertex_info(const struct vertex_info *vinfo)
-{
-   D_PRINTF(CELL_DEBUG_CMD, "VERTEX_INFO num_attribs=%u\n", vinfo->num_attribs);
-   ASSERT(vinfo->num_attribs >= 1);
-   ASSERT(vinfo->num_attribs <= 8);
-   memcpy(&spu.vertex_info, vinfo, sizeof(*vinfo));
-}
-
-
-static void
-cmd_state_vs_array_info(const struct cell_array_info *vs_info)
-{
-   const unsigned attr = vs_info->attr;
-
-   ASSERT(attr < PIPE_MAX_ATTRIBS);
-   draw.vertex_fetch.src_ptr[attr] = vs_info->base;
-   draw.vertex_fetch.pitch[attr] = vs_info->pitch;
-   draw.vertex_fetch.size[attr] = vs_info->size;
-   draw.vertex_fetch.code_offset[attr] = vs_info->function_offset;
-   draw.vertex_fetch.dirty = 1;
-}
-
-
-static void
-cmd_state_attrib_fetch(const struct cell_attribute_fetch_code *code)
-{
-   mfc_get(attribute_fetch_code_buffer,
-           (unsigned int) code->base,  /* src */
-           code->size,
-           TAG_BATCH_BUFFER,
-           0, /* tid */
-           0  /* rid */);
-   wait_on_mask(1 << TAG_BATCH_BUFFER);
-
-   draw.vertex_fetch.code = attribute_fetch_code_buffer;
-}
-
-
-static void
-cmd_finish(void)
-{
-   D_PRINTF(CELL_DEBUG_CMD, "FINISH\n");
-   really_clear_tiles(0);
-   /* wait for all outstanding DMAs to finish */
-   mfc_write_tag_mask(~0);
-   mfc_read_tag_status_all();
-   /* send mbox message to PPU */
-   spu_write_out_mbox(CELL_CMD_FINISH);
-}
-
-
-/**
- * Execute a batch of commands which was sent to us by the PPU.
- * See the cell_emit_state.c code to see where the commands come from.
- *
- * The opcode param encodes the location of the buffer and its size.
- */
-static void
-cmd_batch(uint opcode)
-{
-   const uint buf = (opcode >> 8) & 0xff;
-   uint size = (opcode >> 16);
-   PIPE_ALIGN_VAR(16) qword buffer[CELL_BUFFER_SIZE / 16];
-   const unsigned usize = ROUNDUP16(size) / sizeof(buffer[0]);
-   uint pos;
-
-   D_PRINTF(CELL_DEBUG_CMD, "BATCH buffer %u, len %u, from %p\n",
-             buf, size, spu.init.buffers[buf]);
-
-   ASSERT((opcode & CELL_CMD_OPCODE_MASK) == CELL_CMD_BATCH);
-
-   ASSERT_ALIGN16(spu.init.buffers[buf]);
-
-   size = ROUNDUP16(size);
-
-   ASSERT_ALIGN16(spu.init.buffers[buf]);
-
-   mfc_get(buffer,  /* dest */
-           (unsigned int) spu.init.buffers[buf],  /* src */
-           size,
-           TAG_BATCH_BUFFER,
-           0, /* tid */
-           0  /* rid */);
-   wait_on_mask(1 << TAG_BATCH_BUFFER);
-
-   /* Tell PPU we're done copying the buffer to local store */
-   D_PRINTF(CELL_DEBUG_CMD, "release batch buf %u\n", buf);
-   release_buffer(buf);
-
-   /*
-    * Loop over commands in the batch buffer
-    */
-   for (pos = 0; pos < usize; /* no incr */) {
-      switch (si_to_uint(buffer[pos])) {
-      /*
-       * rendering commands
-       */
-      case CELL_CMD_CLEAR_SURFACE:
-         {
-            struct cell_command_clear_surface *clr
-               = (struct cell_command_clear_surface *) &buffer[pos];
-            cmd_clear_surface(clr);
-            pos += sizeof(*clr) / 16;
-         }
-         break;
-      case CELL_CMD_RENDER:
-         {
-            struct cell_command_render *render
-               = (struct cell_command_render *) &buffer[pos];
-            uint pos_incr;
-            cmd_render(render, &pos_incr);
-            pos += ((pos_incr+1)&~1) / 2; // should 'fix' cmd_render return
-         }
-         break;
-      /*
-       * state-update commands
-       */
-      case CELL_CMD_STATE_FRAMEBUFFER:
-         {
-            struct cell_command_framebuffer *fb
-               = (struct cell_command_framebuffer *) &buffer[pos];
-            cmd_state_framebuffer(fb);
-            pos += sizeof(*fb) / 16;
-         }
-         break;
-      case CELL_CMD_STATE_FRAGMENT_OPS:
-         {
-            struct cell_command_fragment_ops *fops
-               = (struct cell_command_fragment_ops *) &buffer[pos];
-            cmd_state_fragment_ops(fops);
-            /* This is a variant-sized command */
-            pos += ROUNDUP16(sizeof(*fops) + fops->total_code_size) / 16;
-         }
-         break;
-      case CELL_CMD_STATE_FRAGMENT_PROGRAM:
-         {
-            struct cell_command_fragment_program *fp
-               = (struct cell_command_fragment_program *) &buffer[pos];
-            cmd_state_fragment_program(fp);
-            pos += sizeof(*fp) / 16;
-         }
-         break;
-      case CELL_CMD_STATE_FS_CONSTANTS:
-         pos = cmd_state_fs_constants(buffer, pos);
-         break;
-      case CELL_CMD_STATE_RASTERIZER:
-         {
-            struct cell_command_rasterizer *rast =
-               (struct cell_command_rasterizer *) &buffer[pos];
-            spu.rasterizer = rast->rasterizer;
-            pos += sizeof(*rast) / 16;
-         }
-         break;
-      case CELL_CMD_STATE_SAMPLER:
-         {
-            struct cell_command_sampler *sampler
-               = (struct cell_command_sampler *) &buffer[pos];
-            cmd_state_sampler(sampler);
-            pos += sizeof(*sampler) / 16;
-         }
-         break;
-      case CELL_CMD_STATE_TEXTURE:
-         {
-            struct cell_command_texture *texture
-               = (struct cell_command_texture *) &buffer[pos];
-            cmd_state_texture(texture);
-            pos += sizeof(*texture) / 16;
-         }
-         break;
-      case CELL_CMD_STATE_VERTEX_INFO:
-         cmd_state_vertex_info((struct vertex_info *) &buffer[pos+1]);
-         pos += 1 + ROUNDUP16(sizeof(struct vertex_info)) / 16;
-         break;
-      case CELL_CMD_STATE_VIEWPORT:
-         (void) memcpy(& draw.viewport, &buffer[pos+1],
-                       sizeof(struct pipe_viewport_state));
-         pos += 1 + ROUNDUP16(sizeof(struct pipe_viewport_state)) / 16;
-         break;
-      case CELL_CMD_STATE_UNIFORMS:
-         draw.constants = (const float (*)[4]) (uintptr_t)spu_extract((vector unsigned int)buffer[pos+1],0);
-         pos += 2;
-         break;
-      case CELL_CMD_STATE_VS_ARRAY_INFO:
-         cmd_state_vs_array_info((struct cell_array_info *) &buffer[pos+1]);
-         pos += 1 + ROUNDUP16(sizeof(struct cell_array_info)) / 16;
-         break;
-      case CELL_CMD_STATE_BIND_VS:
-#if 0
-         spu_bind_vertex_shader(&draw,
-                                (struct cell_shader_info *) &buffer[pos+1]);
-#endif
-         pos += 1 + ROUNDUP16(sizeof(struct cell_shader_info)) / 16;
-         break;
-      case CELL_CMD_STATE_ATTRIB_FETCH:
-         cmd_state_attrib_fetch((struct cell_attribute_fetch_code *)
-                                &buffer[pos+1]);
-         pos += 1 + ROUNDUP16(sizeof(struct cell_attribute_fetch_code)) / 16;
-         break;
-      /*
-       * misc commands
-       */
-      case CELL_CMD_FINISH:
-         cmd_finish();
-         pos += 1;
-         break;
-      case CELL_CMD_FENCE:
-         {
-            struct cell_command_fence *fence_cmd =
-               (struct cell_command_fence *) &buffer[pos];
-            cmd_fence(fence_cmd);
-            pos += sizeof(*fence_cmd) / 16;
-         }
-         break;
-      case CELL_CMD_RELEASE_VERTS:
-         {
-            struct cell_command_release_verts *release
-               = (struct cell_command_release_verts *) &buffer[pos];
-            cmd_release_verts(release);
-            pos += sizeof(*release) / 16;
-         }
-         break;
-      case CELL_CMD_FLUSH_BUFFER_RANGE: {
-	 struct cell_buffer_range *br = (struct cell_buffer_range *)
-	     &buffer[pos+1];
-
-	 spu_dcache_mark_dirty((unsigned) br->base, br->size);
-         pos += 1 + ROUNDUP16(sizeof(struct cell_buffer_range)) / 16;
-	 break;
-      }
-      default:
-         printf("SPU %u: bad opcode: 0x%x\n", spu.init.id, si_to_uint(buffer[pos]));
-         ASSERT(0);
-         break;
-      }
-   }
-
-   D_PRINTF(CELL_DEBUG_CMD, "BATCH complete\n");
-}
-
-
-#define PERF 0
-
-
-/**
- * Main loop for SPEs: Get a command, execute it, repeat.
- */
-void
-command_loop(void)
-{
-   int exitFlag = 0;
-   uint t0, t1;
-
-   D_PRINTF(CELL_DEBUG_CMD, "Enter command loop\n");
-
-   while (!exitFlag) {
-      unsigned opcode;
-
-      D_PRINTF(CELL_DEBUG_CMD, "Wait for cmd...\n");
-
-      if (PERF)
-         spu_write_decrementer(~0);
-
-      /* read/wait from mailbox */
-      opcode = (unsigned int) spu_read_in_mbox();
-      D_PRINTF(CELL_DEBUG_CMD, "got cmd 0x%x\n", opcode);
-
-      if (PERF)
-         t0 = spu_read_decrementer();
-
-      switch (opcode & CELL_CMD_OPCODE_MASK) {
-      case CELL_CMD_EXIT:
-         D_PRINTF(CELL_DEBUG_CMD, "EXIT\n");
-         exitFlag = 1;
-         break;
-      case CELL_CMD_VS_EXECUTE:
-#if 0
-         spu_execute_vertex_shader(&draw, &cmd.vs);
-#endif
-         break;
-      case CELL_CMD_BATCH:
-         cmd_batch(opcode);
-         break;
-      default:
-         printf("Bad opcode 0x%x!\n", opcode & CELL_CMD_OPCODE_MASK);
-      }
-
-      if (PERF) {
-         t1 = spu_read_decrementer();
-         printf("wait mbox time: %gms   batch time: %gms\n",
-                (~0u - t0) * spu.init.inv_timebase,
-                (t0 - t1) * spu.init.inv_timebase);
-      }
-   }
-
-   D_PRINTF(CELL_DEBUG_CMD, "Exit command loop\n");
-
-   if (spu.init.debug_flags & CELL_DEBUG_CACHE)
-      spu_dcache_report();
-}
-
-/* Initialize this module; we manage the fragment ops buffer here. */
-void
-spu_command_init(void)
-{
-   /* Install default/fallback fragment processing function.
-    * This will normally be overriden by a code-gen'd function
-    * unless CELL_FORCE_FRAGMENT_OPS_FALLBACK is set.
-    */
-   spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops;
-   spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops;
-
-   /* Set up the basic empty buffer for code-gen'ed fragment ops */
-   spu.fragment_ops_code = NULL;
-   spu.fragment_ops_code_size = 0;
-}
-
-void
-spu_command_close(void)
-{
-   /* Deallocate the code-gen buffer for fragment ops, and reset the
-    * fragment ops functions to their initial setting (just to leave
-    * things in a good state).
-    */
-   if (spu.fragment_ops_code != NULL) {
-      free(spu.fragment_ops_code);
-   }
-   spu_command_init();
-}
diff --git a/src/gallium/drivers/cell/spu/spu_command.h b/src/gallium/drivers/cell/spu/spu_command.h
deleted file mode 100644
index 83dcdade288..00000000000
--- a/src/gallium/drivers/cell/spu/spu_command.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-extern void
-command_loop(void);
-
-extern void
-spu_command_init(void);
-
-extern void
-spu_command_close(void);
diff --git a/src/gallium/drivers/cell/spu/spu_dcache.c b/src/gallium/drivers/cell/spu/spu_dcache.c
deleted file mode 100644
index a6d67634fd8..00000000000
--- a/src/gallium/drivers/cell/spu/spu_dcache.c
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * (C) Copyright IBM Corporation 2008
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
- * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "cell/common.h"
-#include "spu_main.h"
-#include "spu_dcache.h"
-
-#define CACHELINE_LOG2SIZE    7
-#define LINE_SIZE             (1U << 7)
-#define ALIGN_MASK            (~(LINE_SIZE - 1))
-
-#define CACHE_NAME            data
-#define CACHED_TYPE           qword
-#define CACHE_TYPE            CACHE_TYPE_RO
-#define CACHE_SET_TAGID(set)  (((set) & 0x03) + TAG_DCACHE0)
-#define CACHE_LOG2NNWAY       2
-#define CACHE_LOG2NSETS       6
-#ifdef DEBUG
-#define CACHE_STATS           1
-#endif
-#include <cache-api.h>
-
-/* Yes folks, this is ugly.
- */
-#undef CACHE_NWAY
-#undef CACHE_NSETS
-#define CACHE_NAME            data
-#define CACHE_NWAY            4
-#define CACHE_NSETS           (1U << 6)
-
-
-/**
- * Fetch between arbitrary number of bytes from an unaligned address
- *
- * \param dst   Destination data buffer
- * \param ea    Main memory effective address of source data
- * \param size  Number of bytes to read
- *
- * \warning
- * As is hinted by the type of the \c dst pointer, this function writes
- * multiples of 16-bytes.
- */
-void
-spu_dcache_fetch_unaligned(qword *dst, unsigned ea, unsigned size)
-{
-   const int shift = ea & 0x0f;
-   const unsigned read_size = ROUNDUP16(size + shift);
-   const unsigned last_read = ROUNDUP16(ea + size);
-   const qword *const last_write = dst + (ROUNDUP16(size) / 16);
-   unsigned i;
-
-
-   if (shift == 0) {
-      /* Data is already aligned.  Fetch directly into the destination buffer.
-       */
-      for (i = 0; i < size; i += 16) {
-         *(dst++) = cache_rd(data, ea + i);
-      }
-   } else {
-      qword hi;
-
-
-      /* Please exercise extreme caution when modifying this code.  This code
-       * must not read past the end of the page containing the source data,
-       * and it must not write more than ((size + 15) / 16) qwords to the
-       * destination buffer.
-       */
-      ea &= ~0x0f;
-      hi = cache_rd(data, ea);
-      for (i = 16; i < read_size; i += 16) {
-         qword lo = cache_rd(data, ea + i);
-
-         *(dst++) = si_or((qword) spu_slqwbyte(hi, shift),
-                          (qword) spu_rlmaskqwbyte(lo, shift - 16));
-         hi = lo;
-      }
-
-      if (dst != last_write) {
-         *(dst++) = si_or((qword) spu_slqwbyte(hi, shift), si_il(0));
-      }
-   }
-   
-   ASSERT((ea + i) == last_read);
-   ASSERT(dst == last_write);
-}
-
-
-/**
- * Notify the cache that a range of main memory may have been modified
- */
-void
-spu_dcache_mark_dirty(unsigned ea, unsigned size)
-{
-   unsigned i;
-   const unsigned aligned_start = (ea & ALIGN_MASK);
-   const unsigned aligned_end = (ea + size + (LINE_SIZE - 1)) 
-       & ALIGN_MASK;
-
-
-   for (i = 0; i < (CACHE_NWAY * CACHE_NSETS); i++) {
-      const unsigned entry = __cache_dir[i];
-      const unsigned addr = entry & ~0x0f;
-
-      __cache_dir[i] = ((addr >= aligned_start) && (addr < aligned_end))
-          ? (entry & ~CACHELINE_VALID) : entry;
-   }
-}
-
-
-/**
- * Print cache utilization report
- */
-void
-spu_dcache_report(void)
-{
-#ifdef CACHE_STATS
-   if (spu.init.id == 0) {
-      printf("SPU 0: Texture cache report:\n");
-      cache_pr_stats(data);
-   }
-#endif
-}
-
-
diff --git a/src/gallium/drivers/cell/spu/spu_dcache.h b/src/gallium/drivers/cell/spu/spu_dcache.h
deleted file mode 100644
index 39a19eb31b5..00000000000
--- a/src/gallium/drivers/cell/spu/spu_dcache.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * (C) Copyright IBM Corporation 2008
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
- * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef SPU_DCACHE_H
-#define SPU_DCACHE_H
-
-extern void
-spu_dcache_fetch_unaligned(qword *dst, unsigned ea, unsigned size);
-
-extern void
-spu_dcache_mark_dirty(unsigned ea, unsigned size);
-
-extern void
-spu_dcache_report(void);
-
-#endif /* SPU_DCACHE_H */
diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c
deleted file mode 100644
index e4ebeb595ce..00000000000
--- a/src/gallium/drivers/cell/spu/spu_exec.c
+++ /dev/null
@@ -1,1870 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-/**
- * TGSI interpretor/executor.
- *
- * Flow control information:
- *
- * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
- * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
- * care since a condition may be true for some quad components but false
- * for other components.
- *
- * We basically execute all statements (even if they're in the part of
- * an IF/ELSE clause that's "not taken") and use a special mask to
- * control writing to destination registers.  This is the ExecMask.
- * See store_dest().
- *
- * The ExecMask is computed from three other masks (CondMask, LoopMask and
- * ContMask) which are controlled by the flow control instructions (namely:
- * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
- *
- *
- * Authors:
- *   Michal Krol
- *   Brian Paul
- */
-
-#include <transpose_matrix4x4.h>
-#include <simdmath/ceilf4.h>
-#include <simdmath/cosf4.h>
-#include <simdmath/divf4.h>
-#include <simdmath/floorf4.h>
-#include <simdmath/log2f4.h>
-#include <simdmath/powf4.h>
-#include <simdmath/sinf4.h>
-#include <simdmath/sqrtf4.h>
-#include <simdmath/truncf4.h>
-
-#include "pipe/p_compiler.h"
-#include "pipe/p_state.h"
-#include "pipe/p_shader_tokens.h"
-#include "tgsi/tgsi_parse.h"
-#include "tgsi/tgsi_util.h"
-#include "spu_exec.h"
-#include "spu_main.h"
-#include "spu_vertex_shader.h"
-#include "spu_dcache.h"
-#include "cell/common.h"
-
-#define TILE_TOP_LEFT     0
-#define TILE_TOP_RIGHT    1
-#define TILE_BOTTOM_LEFT  2
-#define TILE_BOTTOM_RIGHT 3
-
-/*
- * Shorthand locations of various utility registers (_I = Index, _C = Channel)
- */
-#define TEMP_0_I           TGSI_EXEC_TEMP_00000000_I
-#define TEMP_0_C           TGSI_EXEC_TEMP_00000000_C
-#define TEMP_7F_I          TGSI_EXEC_TEMP_7FFFFFFF_I
-#define TEMP_7F_C          TGSI_EXEC_TEMP_7FFFFFFF_C
-#define TEMP_80_I          TGSI_EXEC_TEMP_80000000_I
-#define TEMP_80_C          TGSI_EXEC_TEMP_80000000_C
-#define TEMP_FF_I          TGSI_EXEC_TEMP_FFFFFFFF_I
-#define TEMP_FF_C          TGSI_EXEC_TEMP_FFFFFFFF_C
-#define TEMP_1_I           TGSI_EXEC_TEMP_ONE_I
-#define TEMP_1_C           TGSI_EXEC_TEMP_ONE_C
-#define TEMP_2_I           TGSI_EXEC_TEMP_TWO_I
-#define TEMP_2_C           TGSI_EXEC_TEMP_TWO_C
-#define TEMP_128_I         TGSI_EXEC_TEMP_128_I
-#define TEMP_128_C         TGSI_EXEC_TEMP_128_C
-#define TEMP_M128_I        TGSI_EXEC_TEMP_MINUS_128_I
-#define TEMP_M128_C        TGSI_EXEC_TEMP_MINUS_128_C
-#define TEMP_KILMASK_I     TGSI_EXEC_TEMP_KILMASK_I
-#define TEMP_KILMASK_C     TGSI_EXEC_TEMP_KILMASK_C
-#define TEMP_OUTPUT_I      TGSI_EXEC_TEMP_OUTPUT_I
-#define TEMP_OUTPUT_C      TGSI_EXEC_TEMP_OUTPUT_C
-#define TEMP_PRIMITIVE_I   TGSI_EXEC_TEMP_PRIMITIVE_I
-#define TEMP_PRIMITIVE_C   TGSI_EXEC_TEMP_PRIMITIVE_C
-#define TEMP_R0            TGSI_EXEC_TEMP_R0
-
-#define FOR_EACH_CHANNEL(CHAN)\
-   for (CHAN = 0; CHAN < 4; CHAN++)
-
-#define IS_CHANNEL_ENABLED(INST, CHAN)\
-   ((INST).Dst[0].Register.WriteMask & (1 << (CHAN)))
-
-#define IS_CHANNEL_ENABLED2(INST, CHAN)\
-   ((INST).Dst[1].Register.WriteMask & (1 << (CHAN)))
-
-#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
-   FOR_EACH_CHANNEL( CHAN )\
-      if (IS_CHANNEL_ENABLED( INST, CHAN ))
-
-#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
-   FOR_EACH_CHANNEL( CHAN )\
-      if (IS_CHANNEL_ENABLED2( INST, CHAN ))
-
-
-/** The execution mask depends on the conditional mask and the loop mask */
-#define UPDATE_EXEC_MASK(MACH) \
-      MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
-
-
-#define CHAN_X  0
-#define CHAN_Y  1
-#define CHAN_Z  2
-#define CHAN_W  3
-
-
-
-/**
- * Initialize machine state by expanding tokens to full instructions,
- * allocating temporary storage, setting up constants, etc.
- * After this, we can call spu_exec_machine_run() many times.
- */
-void
-spu_exec_machine_init(struct spu_exec_machine *mach,
-                      uint numSamplers,
-                      struct spu_sampler *samplers,
-                      unsigned processor)
-{
-   const qword zero = si_il(0);
-   const qword not_zero = si_il(~0);
-
-   (void) numSamplers;
-   mach->Samplers = samplers;
-   mach->Processor = processor;
-   mach->Addrs = &mach->Temps[TGSI_EXEC_NUM_TEMPS];
-
-   /* Setup constants. */
-   mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q = zero;
-   mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].q = not_zero;
-   mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].q = si_shli(not_zero, -1);
-   mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].q = si_shli(not_zero, 31);
-
-   mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q = (qword) spu_splats(1.0f);
-   mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q = (qword) spu_splats(2.0f);
-   mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].q = (qword) spu_splats(128.0f);
-   mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].q = (qword) spu_splats(-128.0f);
-}
-
-
-static INLINE qword
-micro_abs(qword src)
-{
-   return si_rotmi(si_shli(src, 1), -1);
-}
-
-static INLINE qword
-micro_ceil(qword src)
-{
-   return (qword) _ceilf4((vec_float4) src);
-}
-
-static INLINE qword
-micro_cos(qword src)
-{
-   return (qword) _cosf4((vec_float4) src);
-}
-
-static const qword br_shuf = {
-   TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1,
-   TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3,
-   TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1,
-   TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3,
-   TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1,
-   TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3,
-   TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1,
-   TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3,
-};
-
-static const qword bl_shuf = {
-   TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1,
-   TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3,
-   TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1,
-   TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3,
-   TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1,
-   TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3,
-   TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1,
-   TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3,
-};
-
-static const qword tl_shuf = {
-   TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1,
-   TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3,
-   TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1,
-   TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3,
-   TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1,
-   TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3,
-   TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1,
-   TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3,
-};
-
-static qword
-micro_ddx(qword src)
-{
-   qword bottom_right = si_shufb(src, src, br_shuf);
-   qword bottom_left = si_shufb(src, src, bl_shuf);
-
-   return si_fs(bottom_right, bottom_left);
-}
-
-static qword
-micro_ddy(qword src)
-{
-   qword top_left = si_shufb(src, src, tl_shuf);
-   qword bottom_left = si_shufb(src, src, bl_shuf);
-
-   return si_fs(top_left, bottom_left);
-}
-
-static INLINE qword
-micro_div(qword src0, qword src1)
-{
-   return (qword) _divf4((vec_float4) src0, (vec_float4) src1);
-}
-
-static qword
-micro_flr(qword src)
-{
-   return (qword) _floorf4((vec_float4) src);
-}
-
-static qword
-micro_frc(qword src)
-{
-   return si_fs(src, (qword) _floorf4((vec_float4) src));
-}
-
-static INLINE qword
-micro_ge(qword src0, qword src1)
-{
-   return si_or(si_fceq(src0, src1), si_fcgt(src0, src1));
-}
-
-static qword
-micro_lg2(qword src)
-{
-   return (qword) _log2f4((vec_float4) src);
-}
-
-static INLINE qword
-micro_lt(qword src0, qword src1)
-{
-   const qword tmp = si_or(si_fceq(src0, src1), si_fcgt(src0, src1));
-
-   return si_xori(tmp, 0xff);
-}
-
-static INLINE qword
-micro_max(qword src0, qword src1)
-{
-   return si_selb(src1, src0, si_fcgt(src0, src1));
-}
-
-static INLINE qword
-micro_min(qword src0, qword src1)
-{
-   return si_selb(src0, src1, si_fcgt(src0, src1));
-}
-
-static qword
-micro_neg(qword src)
-{
-   return si_xor(src, (qword) spu_splats(0x80000000));
-}
-
-static qword
-micro_set_sign(qword src)
-{
-   return si_or(src, (qword) spu_splats(0x80000000));
-}
-
-static qword
-micro_pow(qword src0, qword src1)
-{
-   return (qword) _powf4((vec_float4) src0, (vec_float4) src1);
-}
-
-static qword
-micro_rnd(qword src)
-{
-   const qword half = (qword) spu_splats(0.5f);
-
-   /* May be able to use _roundf4.  There may be some difference, though.
-    */
-   return (qword) _floorf4((vec_float4) si_fa(src, half));
-}
-
-static INLINE qword
-micro_ishr(qword src0, qword src1)
-{
-   return si_rotma(src0, si_sfi(src1, 0));
-}
-
-static qword
-micro_trunc(qword src)
-{
-   return (qword) _truncf4((vec_float4) src);
-}
-
-static qword
-micro_sin(qword src)
-{
-   return (qword) _sinf4((vec_float4) src);
-}
-
-static INLINE qword
-micro_sqrt(qword src)
-{
-   return (qword) _sqrtf4((vec_float4) src);
-}
-
-static void
-fetch_src_file_channel(
-   const struct spu_exec_machine *mach,
-   const uint file,
-   const uint swizzle,
-   const union spu_exec_channel *index,
-   union spu_exec_channel *chan )
-{
-   switch( swizzle ) {
-   case TGSI_SWIZZLE_X:
-   case TGSI_SWIZZLE_Y:
-   case TGSI_SWIZZLE_Z:
-   case TGSI_SWIZZLE_W:
-      switch( file ) {
-      case TGSI_FILE_CONSTANT: {
-         unsigned i;
-
-         for (i = 0; i < 4; i++) {
-            const float *ptr = mach->Consts[index->i[i]];
-            float tmp[4];
-
-            spu_dcache_fetch_unaligned((qword *) tmp,
-                                       (uintptr_t)(ptr + swizzle),
-                                       sizeof(float));
-
-            chan->f[i] = tmp[0];
-         }
-         break;
-      }
-
-      case TGSI_FILE_INPUT:
-         chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
-         chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
-         chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
-         chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
-         break;
-
-      case TGSI_FILE_TEMPORARY:
-         chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
-         chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
-         chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
-         chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
-         break;
-
-      case TGSI_FILE_IMMEDIATE:
-         ASSERT( index->i[0] < (int) mach->ImmLimit );
-         ASSERT( index->i[1] < (int) mach->ImmLimit );
-         ASSERT( index->i[2] < (int) mach->ImmLimit );
-         ASSERT( index->i[3] < (int) mach->ImmLimit );
-
-         chan->f[0] = mach->Imms[index->i[0]][swizzle];
-         chan->f[1] = mach->Imms[index->i[1]][swizzle];
-         chan->f[2] = mach->Imms[index->i[2]][swizzle];
-         chan->f[3] = mach->Imms[index->i[3]][swizzle];
-         break;
-
-      case TGSI_FILE_ADDRESS:
-         chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
-         chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
-         chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
-         chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
-         break;
-
-      case TGSI_FILE_OUTPUT:
-         /* vertex/fragment output vars can be read too */
-         chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
-         chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
-         chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
-         chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
-         break;
-
-      default:
-         ASSERT( 0 );
-      }
-      break;
-
-   default:
-      ASSERT( 0 );
-   }
-}
-
-static void
-fetch_source(
-   const struct spu_exec_machine *mach,
-   union spu_exec_channel *chan,
-   const struct tgsi_full_src_register *reg,
-   const uint chan_index )
-{
-   union spu_exec_channel index;
-   uint swizzle;
-
-   index.i[0] =
-   index.i[1] =
-   index.i[2] =
-   index.i[3] = reg->Register.Index;
-
-   if (reg->Register.Indirect) {
-      union spu_exec_channel index2;
-      union spu_exec_channel indir_index;
-
-      index2.i[0] =
-      index2.i[1] =
-      index2.i[2] =
-      index2.i[3] = reg->Indirect.Index;
-
-      swizzle = tgsi_util_get_src_register_swizzle(&reg->Indirect,
-                                                   CHAN_X);
-      fetch_src_file_channel(
-         mach,
-         reg->Indirect.File,
-         swizzle,
-         &index2,
-         &indir_index );
-
-      index.q = si_a(index.q, indir_index.q);
-   }
-
-   if( reg->Register.Dimension ) {
-      switch( reg->Register.File ) {
-      case TGSI_FILE_INPUT:
-         index.q = si_mpyi(index.q, 17);
-         break;
-      case TGSI_FILE_CONSTANT:
-         index.q = si_shli(index.q, 12);
-         break;
-      default:
-         ASSERT( 0 );
-      }
-
-      index.i[0] += reg->Dimension.Index;
-      index.i[1] += reg->Dimension.Index;
-      index.i[2] += reg->Dimension.Index;
-      index.i[3] += reg->Dimension.Index;
-
-      if (reg->Dimension.Indirect) {
-         union spu_exec_channel index2;
-         union spu_exec_channel indir_index;
-
-         index2.i[0] =
-         index2.i[1] =
-         index2.i[2] =
-         index2.i[3] = reg->DimIndirect.Index;
-
-         swizzle = tgsi_util_get_src_register_swizzle( &reg->DimIndirect, CHAN_X );
-         fetch_src_file_channel(
-            mach,
-            reg->DimIndirect.File,
-            swizzle,
-            &index2,
-            &indir_index );
-
-         index.q = si_a(index.q, indir_index.q);
-      }
-   }
-
-   swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
-   fetch_src_file_channel(
-      mach,
-      reg->Register.File,
-      swizzle,
-      &index,
-      chan );
-
-   switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) {
-   case TGSI_UTIL_SIGN_CLEAR:
-      chan->q = micro_abs(chan->q);
-      break;
-
-   case TGSI_UTIL_SIGN_SET:
-      chan->q = micro_set_sign(chan->q);
-      break;
-
-   case TGSI_UTIL_SIGN_TOGGLE:
-      chan->q = micro_neg(chan->q);
-      break;
-
-   case TGSI_UTIL_SIGN_KEEP:
-      break;
-   }
-
-   if (reg->RegisterExtMod.Complement) {
-      chan->q = si_fs(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, chan->q);
-   }
-}
-
-static void
-store_dest(
-   struct spu_exec_machine *mach,
-   const union spu_exec_channel *chan,
-   const struct tgsi_full_dst_register *reg,
-   const struct tgsi_full_instruction *inst,
-   uint chan_index )
-{
-   union spu_exec_channel *dst;
-
-   switch( reg->Register.File ) {
-   case TGSI_FILE_NULL:
-      return;
-
-   case TGSI_FILE_OUTPUT:
-      dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
-                           + reg->Register.Index].xyzw[chan_index];
-      break;
-
-   case TGSI_FILE_TEMPORARY:
-      dst = &mach->Temps[reg->Register.Index].xyzw[chan_index];
-      break;
-
-   case TGSI_FILE_ADDRESS:
-      dst = &mach->Addrs[reg->Register.Index].xyzw[chan_index];
-      break;
-
-   default:
-      ASSERT( 0 );
-      return;
-   }
-
-   switch (inst->Instruction.Saturate)
-   {
-   case TGSI_SAT_NONE:
-      if (mach->ExecMask & 0x1)
-         dst->i[0] = chan->i[0];
-      if (mach->ExecMask & 0x2)
-         dst->i[1] = chan->i[1];
-      if (mach->ExecMask & 0x4)
-         dst->i[2] = chan->i[2];
-      if (mach->ExecMask & 0x8)
-         dst->i[3] = chan->i[3];
-      break;
-
-   case TGSI_SAT_ZERO_ONE:
-      /* XXX need to obey ExecMask here */
-      dst->q = micro_max(chan->q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q);
-      dst->q = micro_min(dst->q, mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q);
-      break;
-
-   case TGSI_SAT_MINUS_PLUS_ONE:
-      ASSERT( 0 );
-      break;
-
-   default:
-      ASSERT( 0 );
-   }
-}
-
-#define FETCH(VAL,INDEX,CHAN)\
-    fetch_source (mach, VAL, &inst->Src[INDEX], CHAN)
-
-#define STORE(VAL,INDEX,CHAN)\
-    store_dest (mach, VAL, &inst->Dst[INDEX], inst, CHAN )
-
-
-/**
- * Execute ARB-style KIL which is predicated by a src register.
- * Kill fragment if any of the four values is less than zero.
- */
-static void
-exec_kil(struct spu_exec_machine *mach,
-         const struct tgsi_full_instruction *inst)
-{
-   uint uniquemask;
-   uint chan_index;
-   uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
-   union spu_exec_channel r[1];
-
-   /* This mask stores component bits that were already tested. */
-   uniquemask = 0;
-
-   for (chan_index = 0; chan_index < 4; chan_index++)
-   {
-      uint swizzle;
-      uint i;
-
-      /* unswizzle channel */
-      swizzle = tgsi_util_get_full_src_register_swizzle (
-                        &inst->Src[0],
-                        chan_index);
-
-      /* check if the component has not been already tested */
-      if (uniquemask & (1 << swizzle))
-         continue;
-      uniquemask |= 1 << swizzle;
-
-      FETCH(&r[0], 0, chan_index);
-      for (i = 0; i < 4; i++)
-         if (r[0].f[i] < 0.0f)
-            kilmask |= 1 << i;
-   }
-
-   mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
-}
-
-/**
- * Execute NVIDIA-style KIL which is predicated by a condition code.
- * Kill fragment if the condition code is TRUE.
- */
-static void
-exec_kilp(struct spu_exec_machine *mach,
-          const struct tgsi_full_instruction *inst)
-{
-   uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
-
-   /* TODO: build kilmask from CC mask */
-
-   mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
-}
-
-/*
- * Fetch a texel using STR texture coordinates.
- */
-static void
-fetch_texel( struct spu_sampler *sampler,
-             const union spu_exec_channel *s,
-             const union spu_exec_channel *t,
-             const union spu_exec_channel *p,
-             float lodbias,  /* XXX should be float[4] */
-             union spu_exec_channel *r,
-             union spu_exec_channel *g,
-             union spu_exec_channel *b,
-             union spu_exec_channel *a )
-{
-   qword rgba[4];
-   qword out[4];
-
-   sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, 
-			(float (*)[4]) rgba);
-
-   _transpose_matrix4x4((vec_float4 *) out, (vec_float4 *) rgba);
-   r->q = out[0];
-   g->q = out[1];
-   b->q = out[2];
-   a->q = out[3];
-}
-
-
-static void
-exec_tex(struct spu_exec_machine *mach,
-         const struct tgsi_full_instruction *inst,
-         boolean biasLod, boolean projected)
-{
-   const uint unit = inst->Src[1].Register.Index;
-   union spu_exec_channel r[8];
-   uint chan_index;
-   float lodBias;
-
-   /*   printf("Sampler %u unit %u\n", sampler, unit); */
-
-   switch (inst->InstructionExtTexture.Texture) {
-   case TGSI_TEXTURE_1D:
-
-      FETCH(&r[0], 0, CHAN_X);
-
-      if (projected) {
-         FETCH(&r[1], 0, CHAN_W);
-         r[0].q = micro_div(r[0].q, r[1].q);
-      }
-
-      if (biasLod) {
-         FETCH(&r[1], 0, CHAN_W);
-         lodBias = r[2].f[0];
-      }
-      else
-         lodBias = 0.0;
-
-      fetch_texel(&mach->Samplers[unit],
-                  &r[0], NULL, NULL, lodBias,  /* S, T, P, BIAS */
-                  &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
-      break;
-
-   case TGSI_TEXTURE_2D:
-   case TGSI_TEXTURE_RECT:
-
-      FETCH(&r[0], 0, CHAN_X);
-      FETCH(&r[1], 0, CHAN_Y);
-      FETCH(&r[2], 0, CHAN_Z);
-
-      if (projected) {
-         FETCH(&r[3], 0, CHAN_W);
-         r[0].q = micro_div(r[0].q, r[3].q);
-         r[1].q = micro_div(r[1].q, r[3].q);
-         r[2].q = micro_div(r[2].q, r[3].q);
-      }
-
-      if (biasLod) {
-         FETCH(&r[3], 0, CHAN_W);
-         lodBias = r[3].f[0];
-      }
-      else
-         lodBias = 0.0;
-
-      fetch_texel(&mach->Samplers[unit],
-                  &r[0], &r[1], &r[2], lodBias,  /* inputs */
-                  &r[0], &r[1], &r[2], &r[3]);  /* outputs */
-      break;
-
-   case TGSI_TEXTURE_3D:
-   case TGSI_TEXTURE_CUBE:
-
-      FETCH(&r[0], 0, CHAN_X);
-      FETCH(&r[1], 0, CHAN_Y);
-      FETCH(&r[2], 0, CHAN_Z);
-
-      if (projected) {
-         FETCH(&r[3], 0, CHAN_W);
-         r[0].q = micro_div(r[0].q, r[3].q);
-         r[1].q = micro_div(r[1].q, r[3].q);
-         r[2].q = micro_div(r[2].q, r[3].q);
-      }
-
-      if (biasLod) {
-         FETCH(&r[3], 0, CHAN_W);
-         lodBias = r[3].f[0];
-      }
-      else
-         lodBias = 0.0;
-
-      fetch_texel(&mach->Samplers[unit],
-                  &r[0], &r[1], &r[2], lodBias,
-                  &r[0], &r[1], &r[2], &r[3]);
-      break;
-
-   default:
-      ASSERT (0);
-   }
-
-   FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-      STORE( &r[chan_index], 0, chan_index );
-   }
-}
-
-
-
-static void
-constant_interpolation(
-   struct spu_exec_machine *mach,
-   unsigned attrib,
-   unsigned chan )
-{
-   unsigned i;
-
-   for( i = 0; i < QUAD_SIZE; i++ ) {
-      mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
-   }
-}
-
-static void
-linear_interpolation(
-   struct spu_exec_machine *mach,
-   unsigned attrib,
-   unsigned chan )
-{
-   const float x = mach->QuadPos.xyzw[0].f[0];
-   const float y = mach->QuadPos.xyzw[1].f[0];
-   const float dadx = mach->InterpCoefs[attrib].dadx[chan];
-   const float dady = mach->InterpCoefs[attrib].dady[chan];
-   const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
-   mach->Inputs[attrib].xyzw[chan].f[0] = a0;
-   mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
-   mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
-   mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
-}
-
-static void
-perspective_interpolation(
-   struct spu_exec_machine *mach,
-   unsigned attrib,
-   unsigned chan )
-{
-   const float x = mach->QuadPos.xyzw[0].f[0];
-   const float y = mach->QuadPos.xyzw[1].f[0];
-   const float dadx = mach->InterpCoefs[attrib].dadx[chan];
-   const float dady = mach->InterpCoefs[attrib].dady[chan];
-   const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
-   const float *w = mach->QuadPos.xyzw[3].f;
-   /* divide by W here */
-   mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
-   mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
-   mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
-   mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
-}
-
-
-typedef void (* interpolation_func)(
-   struct spu_exec_machine *mach,
-   unsigned attrib,
-   unsigned chan );
-
-static void
-exec_declaration(struct spu_exec_machine *mach,
-                 const struct tgsi_full_declaration *decl)
-{
-   if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
-      if( decl->Declaration.File == TGSI_FILE_INPUT ) {
-         unsigned first, last, mask;
-         interpolation_func interp;
-
-         first = decl->Range.First;
-         last = decl->Range.Last;
-         mask = decl->Declaration.UsageMask;
-
-         switch( decl->Declaration.Interpolate ) {
-         case TGSI_INTERPOLATE_CONSTANT:
-            interp = constant_interpolation;
-            break;
-
-         case TGSI_INTERPOLATE_LINEAR:
-            interp = linear_interpolation;
-            break;
-
-         case TGSI_INTERPOLATE_PERSPECTIVE:
-            interp = perspective_interpolation;
-            break;
-
-         default:
-            ASSERT( 0 );
-         }
-
-         if( mask == TGSI_WRITEMASK_XYZW ) {
-            unsigned i, j;
-
-            for( i = first; i <= last; i++ ) {
-               for( j = 0; j < NUM_CHANNELS; j++ ) {
-                  interp( mach, i, j );
-               }
-            }
-         }
-         else {
-            unsigned i, j;
-
-            for( j = 0; j < NUM_CHANNELS; j++ ) {
-               if( mask & (1 << j) ) {
-                  for( i = first; i <= last; i++ ) {
-                     interp( mach, i, j );
-                  }
-               }
-            }
-         }
-      }
-   }
-}
-
-static void
-exec_instruction(
-   struct spu_exec_machine *mach,
-   const struct tgsi_full_instruction *inst,
-   int *pc )
-{
-   uint chan_index;
-   union spu_exec_channel r[8];
-
-   (*pc)++;
-
-   switch (inst->Instruction.Opcode) {
-   case TGSI_OPCODE_ARL:
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-	 FETCH( &r[0], 0, chan_index );
-         r[0].q = si_cflts(r[0].q, 0);
-	 STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_MOV:
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( &r[0], 0, chan_index );
-         STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_LIT:
-      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
-	 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
-      }
-
-      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
-	 FETCH( &r[0], 0, CHAN_X );
-         if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
-            r[0].q = micro_max(r[0].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q);
-	    STORE( &r[0], 0, CHAN_Y );
-	 }
-
-         if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
-            FETCH( &r[1], 0, CHAN_Y );
-            r[1].q = micro_max(r[1].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q);
-
-            FETCH( &r[2], 0, CHAN_W );
-            r[2].q = micro_min(r[2].q, mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].q);
-            r[2].q = micro_max(r[2].q, mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].q);
-            r[1].q = micro_pow(r[1].q, r[2].q);
-
-            /* r0 = (r0 > 0.0) ? r1 : 0.0
-             */
-            r[0].q = si_fcgt(r[0].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q);
-            r[0].q = si_selb(mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q, r[1].q,
-                             r[0].q);
-            STORE( &r[0], 0, CHAN_Z );
-         }
-      }
-
-      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
-	 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
-      }
-      break;
-
-   case TGSI_OPCODE_RCP:
-      FETCH( &r[0], 0, CHAN_X );
-      r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q);
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-	 STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_RSQ:
-      FETCH( &r[0], 0, CHAN_X );
-      r[0].q = micro_sqrt(r[0].q);
-      r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q);
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-	 STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_EXP:
-      ASSERT (0);
-      break;
-
-   case TGSI_OPCODE_LOG:
-      ASSERT (0);
-      break;
-
-   case TGSI_OPCODE_MUL:
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index )
-      {
-         FETCH(&r[0], 0, chan_index);
-         FETCH(&r[1], 1, chan_index);
-
-         r[0].q = si_fm(r[0].q, r[1].q);
-
-         STORE(&r[0], 0, chan_index);
-      }
-      break;
-
-   case TGSI_OPCODE_ADD:
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( &r[0], 0, chan_index );
-         FETCH( &r[1], 1, chan_index );
-         r[0].q = si_fa(r[0].q, r[1].q);
-         STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_DP3:
-   /* TGSI_OPCODE_DOT3 */
-      FETCH( &r[0], 0, CHAN_X );
-      FETCH( &r[1], 1, CHAN_X );
-      r[0].q = si_fm(r[0].q, r[1].q);
-
-      FETCH( &r[1], 0, CHAN_Y );
-      FETCH( &r[2], 1, CHAN_Y );
-      r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
-
-
-      FETCH( &r[1], 0, CHAN_Z );
-      FETCH( &r[2], 1, CHAN_Z );
-      r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
-
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-    case TGSI_OPCODE_DP4:
-    /* TGSI_OPCODE_DOT4 */
-       FETCH(&r[0], 0, CHAN_X);
-       FETCH(&r[1], 1, CHAN_X);
-
-      r[0].q = si_fm(r[0].q, r[1].q);
-
-       FETCH(&r[1], 0, CHAN_Y);
-       FETCH(&r[2], 1, CHAN_Y);
-
-      r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
-
-       FETCH(&r[1], 0, CHAN_Z);
-       FETCH(&r[2], 1, CHAN_Z);
-
-      r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
-
-       FETCH(&r[1], 0, CHAN_W);
-       FETCH(&r[2], 1, CHAN_W);
-
-      r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
-
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-	 STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_DST:
-      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
-	 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
-      }
-
-      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
-	 FETCH( &r[0], 0, CHAN_Y );
-	 FETCH( &r[1], 1, CHAN_Y);
-      r[0].q = si_fm(r[0].q, r[1].q);
-	 STORE( &r[0], 0, CHAN_Y );
-      }
-
-      if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
-	 FETCH( &r[0], 0, CHAN_Z );
-	 STORE( &r[0], 0, CHAN_Z );
-      }
-
-      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
-	 FETCH( &r[0], 1, CHAN_W );
-	 STORE( &r[0], 0, CHAN_W );
-      }
-      break;
-
-   case TGSI_OPCODE_MIN:
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH(&r[0], 0, chan_index);
-         FETCH(&r[1], 1, chan_index);
-
-         r[0].q = micro_min(r[0].q, r[1].q);
-
-         STORE(&r[0], 0, chan_index);
-      }
-      break;
-
-   case TGSI_OPCODE_MAX:
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH(&r[0], 0, chan_index);
-         FETCH(&r[1], 1, chan_index);
-
-         r[0].q = micro_max(r[0].q, r[1].q);
-
-         STORE(&r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_SLT:
-   /* TGSI_OPCODE_SETLT */
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( &r[0], 0, chan_index );
-         FETCH( &r[1], 1, chan_index );
-
-         r[0].q = micro_ge(r[0].q, r[1].q);
-         r[0].q = si_xori(r[0].q, 0xff);
-
-         STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_SGE:
-   /* TGSI_OPCODE_SETGE */
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( &r[0], 0, chan_index );
-         FETCH( &r[1], 1, chan_index );
-         r[0].q = micro_ge(r[0].q, r[1].q);
-         STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_MAD:
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( &r[0], 0, chan_index );
-         FETCH( &r[1], 1, chan_index );
-         FETCH( &r[2], 2, chan_index );
-         r[0].q = si_fma(r[0].q, r[1].q, r[2].q);
-         STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_SUB:
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH(&r[0], 0, chan_index);
-         FETCH(&r[1], 1, chan_index);
-
-         r[0].q = si_fs(r[0].q, r[1].q);
-
-         STORE(&r[0], 0, chan_index);
-      }
-      break;
-
-   case TGSI_OPCODE_LRP:
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH(&r[0], 0, chan_index);
-         FETCH(&r[1], 1, chan_index);
-         FETCH(&r[2], 2, chan_index);
-
-         r[1].q = si_fs(r[1].q, r[2].q);
-         r[0].q = si_fma(r[0].q, r[1].q, r[2].q);
-
-         STORE(&r[0], 0, chan_index);
-      }
-      break;
-
-   case TGSI_OPCODE_CND:
-      ASSERT (0);
-      break;
-
-   case TGSI_OPCODE_DP2A:
-      ASSERT (0);
-      break;
-
-   case TGSI_OPCODE_FRC:
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( &r[0], 0, chan_index );
-         r[0].q = micro_frc(r[0].q);
-         STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_CLAMP:
-      ASSERT (0);
-      break;
-
-   case TGSI_OPCODE_FLR:
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( &r[0], 0, chan_index );
-         r[0].q = micro_flr(r[0].q);
-         STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_ROUND:
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( &r[0], 0, chan_index );
-         r[0].q = micro_rnd(r[0].q);
-         STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_EX2:
-      FETCH(&r[0], 0, CHAN_X);
-
-      r[0].q = micro_pow(mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q, r[0].q);
-
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-	 STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_LG2:
-      FETCH( &r[0], 0, CHAN_X );
-      r[0].q = micro_lg2(r[0].q);
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_POW:
-      FETCH(&r[0], 0, CHAN_X);
-      FETCH(&r[1], 1, CHAN_X);
-
-      r[0].q = micro_pow(r[0].q, r[1].q);
-
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-	 STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_XPD:
-      /* TGSI_OPCODE_XPD */
-      FETCH(&r[0], 0, CHAN_Y);
-      FETCH(&r[1], 1, CHAN_Z);
-      FETCH(&r[3], 0, CHAN_Z);
-      FETCH(&r[4], 1, CHAN_Y);
-
-      /* r2 = (r0 * r1) - (r3 * r5)
-       */
-      r[2].q = si_fm(r[3].q, r[5].q);
-      r[2].q = si_fms(r[0].q, r[1].q, r[2].q);
-
-      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
-         STORE( &r[2], 0, CHAN_X );
-      }
-
-      FETCH(&r[2], 1, CHAN_X);
-      FETCH(&r[5], 0, CHAN_X);
-
-      /* r3 = (r3 * r2) - (r1 * r5)
-       */
-      r[1].q = si_fm(r[1].q, r[5].q);
-      r[3].q = si_fms(r[3].q, r[2].q, r[1].q);
-
-      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
-         STORE( &r[3], 0, CHAN_Y );
-      }
-
-      /* r5 = (r5 * r4) - (r0 * r2)
-       */
-      r[0].q = si_fm(r[0].q, r[2].q);
-      r[5].q = si_fms(r[5].q, r[4].q, r[0].q);
-
-      if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
-         STORE( &r[5], 0, CHAN_Z );
-      }
-
-      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
-         STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
-      }
-      break;
-
-    case TGSI_OPCODE_ABS:
-       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-          FETCH(&r[0], 0, chan_index);
-
-          r[0].q = micro_abs(r[0].q);
-
-          STORE(&r[0], 0, chan_index);
-       }
-       break;
-
-   case TGSI_OPCODE_RCC:
-      ASSERT (0);
-      break;
-
-   case TGSI_OPCODE_DPH:
-      FETCH(&r[0], 0, CHAN_X);
-      FETCH(&r[1], 1, CHAN_X);
-
-      r[0].q = si_fm(r[0].q, r[1].q);
-
-      FETCH(&r[1], 0, CHAN_Y);
-      FETCH(&r[2], 1, CHAN_Y);
-
-      r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
-
-      FETCH(&r[1], 0, CHAN_Z);
-      FETCH(&r[2], 1, CHAN_Z);
-
-      r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
-
-      FETCH(&r[1], 1, CHAN_W);
-
-      r[0].q = si_fa(r[0].q, r[1].q);
-
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-	 STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_COS:
-      FETCH(&r[0], 0, CHAN_X);
-
-      r[0].q = micro_cos(r[0].q);
-
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-	 STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_DDX:
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( &r[0], 0, chan_index );
-         r[0].q = micro_ddx(r[0].q);
-         STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_DDY:
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( &r[0], 0, chan_index );
-         r[0].q = micro_ddy(r[0].q);
-         STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_KILP:
-      exec_kilp (mach, inst);
-      break;
-
-   case TGSI_OPCODE_KIL:
-      exec_kil (mach, inst);
-      break;
-
-   case TGSI_OPCODE_PK2H:
-      ASSERT (0);
-      break;
-
-   case TGSI_OPCODE_PK2US:
-      ASSERT (0);
-      break;
-
-   case TGSI_OPCODE_PK4B:
-      ASSERT (0);
-      break;
-
-   case TGSI_OPCODE_PK4UB:
-      ASSERT (0);
-      break;
-
-   case TGSI_OPCODE_RFL:
-      ASSERT (0);
-      break;
-
-   case TGSI_OPCODE_SEQ:
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( &r[0], 0, chan_index );
-         FETCH( &r[1], 1, chan_index );
-
-         r[0].q = si_fceq(r[0].q, r[1].q);
-
-         STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_SFL:
-      ASSERT (0);
-      break;
-
-   case TGSI_OPCODE_SGT:
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( &r[0], 0, chan_index );
-         FETCH( &r[1], 1, chan_index );
-         r[0].q = si_fcgt(r[0].q, r[1].q);
-         STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_SIN:
-      FETCH( &r[0], 0, CHAN_X );
-      r[0].q = micro_sin(r[0].q);
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_SLE:
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( &r[0], 0, chan_index );
-         FETCH( &r[1], 1, chan_index );
-
-         r[0].q = si_fcgt(r[0].q, r[1].q);
-         r[0].q = si_xori(r[0].q, 0xff);
-
-         STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_SNE:
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( &r[0], 0, chan_index );
-         FETCH( &r[1], 1, chan_index );
-
-         r[0].q = si_fceq(r[0].q, r[1].q);
-         r[0].q = si_xori(r[0].q, 0xff);
-
-         STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_STR:
-      ASSERT (0);
-      break;
-
-   case TGSI_OPCODE_TEX:
-      /* simple texture lookup */
-      /* src[0] = texcoord */
-      /* src[1] = sampler unit */
-      exec_tex(mach, inst, FALSE, FALSE);
-      break;
-
-   case TGSI_OPCODE_TXB:
-      /* Texture lookup with lod bias */
-      /* src[0] = texcoord (src[0].w = load bias) */
-      /* src[1] = sampler unit */
-      exec_tex(mach, inst, TRUE, FALSE);
-      break;
-
-   case TGSI_OPCODE_TXD:
-      /* Texture lookup with explict partial derivatives */
-      /* src[0] = texcoord */
-      /* src[1] = d[strq]/dx */
-      /* src[2] = d[strq]/dy */
-      /* src[3] = sampler unit */
-      ASSERT (0);
-      break;
-
-   case TGSI_OPCODE_TXL:
-      /* Texture lookup with explit LOD */
-      /* src[0] = texcoord (src[0].w = load bias) */
-      /* src[1] = sampler unit */
-      exec_tex(mach, inst, TRUE, FALSE);
-      break;
-
-   case TGSI_OPCODE_TXP:
-      /* Texture lookup with projection */
-      /* src[0] = texcoord (src[0].w = projection) */
-      /* src[1] = sampler unit */
-      exec_tex(mach, inst, TRUE, TRUE);
-      break;
-
-   case TGSI_OPCODE_UP2H:
-      ASSERT (0);
-      break;
-
-   case TGSI_OPCODE_UP2US:
-      ASSERT (0);
-      break;
-
-   case TGSI_OPCODE_UP4B:
-      ASSERT (0);
-      break;
-
-   case TGSI_OPCODE_UP4UB:
-      ASSERT (0);
-      break;
-
-   case TGSI_OPCODE_X2D:
-      ASSERT (0);
-      break;
-
-   case TGSI_OPCODE_ARA:
-      ASSERT (0);
-      break;
-
-   case TGSI_OPCODE_ARR:
-      ASSERT (0);
-      break;
-
-   case TGSI_OPCODE_BRA:
-      ASSERT (0);
-      break;
-
-   case TGSI_OPCODE_CAL:
-      /* skip the call if no execution channels are enabled */
-      if (mach->ExecMask) {
-         /* do the call */
-
-         /* push the Cond, Loop, Cont stacks */
-         ASSERT(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
-         mach->CondStack[mach->CondStackTop++] = mach->CondMask;
-         ASSERT(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
-         mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
-         ASSERT(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
-         mach->ContStack[mach->ContStackTop++] = mach->ContMask;
-
-         ASSERT(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
-         mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
-
-         /* note that PC was already incremented above */
-         mach->CallStack[mach->CallStackTop++] = *pc;
-         *pc = inst->InstructionExtLabel.Label;
-      }
-      break;
-
-   case TGSI_OPCODE_RET:
-      mach->FuncMask &= ~mach->ExecMask;
-      UPDATE_EXEC_MASK(mach);
-
-      if (mach->ExecMask == 0x0) {
-         /* really return now (otherwise, keep executing */
-
-         if (mach->CallStackTop == 0) {
-            /* returning from main() */
-            *pc = -1;
-            return;
-         }
-         *pc = mach->CallStack[--mach->CallStackTop];
-
-         /* pop the Cond, Loop, Cont stacks */
-         ASSERT(mach->CondStackTop > 0);
-         mach->CondMask = mach->CondStack[--mach->CondStackTop];
-         ASSERT(mach->LoopStackTop > 0);
-         mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
-         ASSERT(mach->ContStackTop > 0);
-         mach->ContMask = mach->ContStack[--mach->ContStackTop];
-         ASSERT(mach->FuncStackTop > 0);
-         mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
-
-         UPDATE_EXEC_MASK(mach);
-      }
-      break;
-
-   case TGSI_OPCODE_SSG:
-      ASSERT (0);
-      break;
-
-   case TGSI_OPCODE_CMP:
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH(&r[0], 0, chan_index);
-         FETCH(&r[1], 1, chan_index);
-         FETCH(&r[2], 2, chan_index);
-
-         /* r0 = (r0 < 0.0) ? r1 : r2
-          */
-         r[3].q = si_xor(r[3].q, r[3].q);
-         r[0].q = micro_lt(r[0].q, r[3].q);
-         r[0].q = si_selb(r[1].q, r[2].q, r[0].q);
-
-         STORE(&r[0], 0, chan_index);
-      }
-      break;
-
-   case TGSI_OPCODE_SCS:
-      if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
-         FETCH( &r[0], 0, CHAN_X );
-      }
-      if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
-         r[1].q = micro_cos(r[0].q);
-         STORE( &r[1], 0, CHAN_X );
-      }
-      if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
-         r[1].q = micro_sin(r[0].q);
-         STORE( &r[1], 0, CHAN_Y );
-      }
-      if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
-         STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
-      }
-      if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
-         STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
-      }
-      break;
-
-   case TGSI_OPCODE_NRM:
-      ASSERT (0);
-      break;
-
-   case TGSI_OPCODE_DIV:
-      ASSERT( 0 );
-      break;
-
-   case TGSI_OPCODE_DP2:
-      FETCH( &r[0], 0, CHAN_X );
-      FETCH( &r[1], 1, CHAN_X );
-      r[0].q = si_fm(r[0].q, r[1].q);
-
-      FETCH( &r[1], 0, CHAN_Y );
-      FETCH( &r[2], 1, CHAN_Y );
-      r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
-
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_IF:
-      /* push CondMask */
-      ASSERT(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
-      mach->CondStack[mach->CondStackTop++] = mach->CondMask;
-      FETCH( &r[0], 0, CHAN_X );
-      /* update CondMask */
-      if( ! r[0].u[0] ) {
-         mach->CondMask &= ~0x1;
-      }
-      if( ! r[0].u[1] ) {
-         mach->CondMask &= ~0x2;
-      }
-      if( ! r[0].u[2] ) {
-         mach->CondMask &= ~0x4;
-      }
-      if( ! r[0].u[3] ) {
-         mach->CondMask &= ~0x8;
-      }
-      UPDATE_EXEC_MASK(mach);
-      /* Todo: If CondMask==0, jump to ELSE */
-      break;
-
-   case TGSI_OPCODE_ELSE:
-      /* invert CondMask wrt previous mask */
-      {
-         uint prevMask;
-         ASSERT(mach->CondStackTop > 0);
-         prevMask = mach->CondStack[mach->CondStackTop - 1];
-         mach->CondMask = ~mach->CondMask & prevMask;
-         UPDATE_EXEC_MASK(mach);
-         /* Todo: If CondMask==0, jump to ENDIF */
-      }
-      break;
-
-   case TGSI_OPCODE_ENDIF:
-      /* pop CondMask */
-      ASSERT(mach->CondStackTop > 0);
-      mach->CondMask = mach->CondStack[--mach->CondStackTop];
-      UPDATE_EXEC_MASK(mach);
-      break;
-
-   case TGSI_OPCODE_END:
-      /* halt execution */
-      *pc = -1;
-      break;
-
-   case TGSI_OPCODE_PUSHA:
-      ASSERT (0);
-      break;
-
-   case TGSI_OPCODE_POPA:
-      ASSERT (0);
-      break;
-
-   case TGSI_OPCODE_CEIL:
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( &r[0], 0, chan_index );
-         r[0].q = micro_ceil(r[0].q);
-         STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_I2F:
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( &r[0], 0, chan_index );
-         r[0].q = si_csflt(r[0].q, 0);
-         STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_NOT:
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( &r[0], 0, chan_index );
-         r[0].q = si_xorbi(r[0].q, 0xff);
-         STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_TRUNC:
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( &r[0], 0, chan_index );
-         r[0].q = micro_trunc(r[0].q);
-         STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_SHL:
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( &r[0], 0, chan_index );
-         FETCH( &r[1], 1, chan_index );
-
-         r[0].q = si_shl(r[0].q, r[1].q);
-
-         STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_ISHR:
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( &r[0], 0, chan_index );
-         FETCH( &r[1], 1, chan_index );
-         r[0].q = micro_ishr(r[0].q, r[1].q);
-         STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_AND:
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( &r[0], 0, chan_index );
-         FETCH( &r[1], 1, chan_index );
-         r[0].q = si_and(r[0].q, r[1].q);
-         STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_OR:
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( &r[0], 0, chan_index );
-         FETCH( &r[1], 1, chan_index );
-         r[0].q = si_or(r[0].q, r[1].q);
-         STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_MOD:
-      ASSERT (0);
-      break;
-
-   case TGSI_OPCODE_XOR:
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-         FETCH( &r[0], 0, chan_index );
-         FETCH( &r[1], 1, chan_index );
-         r[0].q = si_xor(r[0].q, r[1].q);
-         STORE( &r[0], 0, chan_index );
-      }
-      break;
-
-   case TGSI_OPCODE_SAD:
-      ASSERT (0);
-      break;
-
-   case TGSI_OPCODE_TXF:
-      ASSERT (0);
-      break;
-
-   case TGSI_OPCODE_TXQ:
-      ASSERT (0);
-      break;
-
-   case TGSI_OPCODE_EMIT:
-      mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16;
-      mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
-      break;
-
-   case TGSI_OPCODE_ENDPRIM:
-      mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++;
-      mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
-      break;
-
-   case TGSI_OPCODE_BGNLOOP:
-      /* push LoopMask and ContMasks */
-      ASSERT(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
-      mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
-      ASSERT(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
-      mach->ContStack[mach->ContStackTop++] = mach->ContMask;
-      break;
-
-   case TGSI_OPCODE_ENDLOOP:
-      /* Restore ContMask, but don't pop */
-      ASSERT(mach->ContStackTop > 0);
-      mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
-      if (mach->LoopMask) {
-         /* repeat loop: jump to instruction just past BGNLOOP */
-         *pc = inst->InstructionExtLabel.Label + 1;
-      }
-      else {
-         /* exit loop: pop LoopMask */
-         ASSERT(mach->LoopStackTop > 0);
-         mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
-         /* pop ContMask */
-         ASSERT(mach->ContStackTop > 0);
-         mach->ContMask = mach->ContStack[--mach->ContStackTop];
-      }
-      UPDATE_EXEC_MASK(mach);
-      break;
-
-   case TGSI_OPCODE_BRK:
-      /* turn off loop channels for each enabled exec channel */
-      mach->LoopMask &= ~mach->ExecMask;
-      /* Todo: if mach->LoopMask == 0, jump to end of loop */
-      UPDATE_EXEC_MASK(mach);
-      break;
-
-   case TGSI_OPCODE_CONT:
-      /* turn off cont channels for each enabled exec channel */
-      mach->ContMask &= ~mach->ExecMask;
-      /* Todo: if mach->LoopMask == 0, jump to end of loop */
-      UPDATE_EXEC_MASK(mach);
-      break;
-
-   case TGSI_OPCODE_BGNSUB:
-      /* no-op */
-      break;
-
-   case TGSI_OPCODE_ENDSUB:
-      /* no-op */
-      break;
-
-   case TGSI_OPCODE_NOP:
-      break;
-
-   default:
-      ASSERT( 0 );
-   }
-}
-
-
-/**
- * Run TGSI interpreter.
- * \return bitmask of "alive" quad components
- */
-uint
-spu_exec_machine_run( struct spu_exec_machine *mach )
-{
-   uint i;
-   int pc = 0;
-
-   mach->CondMask = 0xf;
-   mach->LoopMask = 0xf;
-   mach->ContMask = 0xf;
-   mach->FuncMask = 0xf;
-   mach->ExecMask = 0xf;
-
-   mach->CondStackTop = 0; /* temporarily subvert this ASSERTion */
-   ASSERT(mach->CondStackTop == 0);
-   ASSERT(mach->LoopStackTop == 0);
-   ASSERT(mach->ContStackTop == 0);
-   ASSERT(mach->CallStackTop == 0);
-
-   mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
-   mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
-
-   if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
-      mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
-      mach->Primitives[0] = 0;
-   }
-
-
-   /* execute declarations (interpolants) */
-   if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
-      for (i = 0; i < mach->NumDeclarations; i++) {
-         PIPE_ALIGN_VAR(16)
-         union {
-            struct tgsi_full_declaration decl;
-            qword buffer[ROUNDUP16(sizeof(struct tgsi_full_declaration)) / 16];
-         } d;
-         unsigned ea = (unsigned) (mach->Declarations + pc);
-
-         spu_dcache_fetch_unaligned(d.buffer, ea, sizeof(d.decl));
-
-         exec_declaration( mach, &d.decl );
-      }
-   }
-
-   /* execute instructions, until pc is set to -1 */
-   while (pc != -1) {
-      PIPE_ALIGN_VAR(16)
-      union {
-         struct tgsi_full_instruction inst;
-         qword buffer[ROUNDUP16(sizeof(struct tgsi_full_instruction)) / 16];
-      } i;
-      unsigned ea = (unsigned) (mach->Instructions + pc);
-
-      spu_dcache_fetch_unaligned(i.buffer, ea, sizeof(i.inst));
-      exec_instruction( mach, & i.inst, &pc );
-   }
-
-#if 0
-   /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
-   if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
-      /*
-       * Scale back depth component.
-       */
-      for (i = 0; i < 4; i++)
-         mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
-   }
-#endif
-
-   return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
-}
-
-
diff --git a/src/gallium/drivers/cell/spu/spu_exec.h b/src/gallium/drivers/cell/spu/spu_exec.h
deleted file mode 100644
index 68f4479e53d..00000000000
--- a/src/gallium/drivers/cell/spu/spu_exec.h
+++ /dev/null
@@ -1,173 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#if !defined SPU_EXEC_H
-#define SPU_EXEC_H
-
-#include "pipe/p_compiler.h"
-
-#include "spu_tgsi_exec.h"
-
-#if defined __cplusplus
-extern "C" {
-#endif
-
-/**
-  * Registers may be treated as float, signed int or unsigned int.
-  */
-union spu_exec_channel
-{
-   float    f[QUAD_SIZE];
-   int      i[QUAD_SIZE];
-   unsigned u[QUAD_SIZE];
-   qword    q;
-};
-
-/**
-  * A vector[RGBA] of channels[4 pixels]
-  */
-struct spu_exec_vector
-{
-   union spu_exec_channel xyzw[NUM_CHANNELS];
-};
-
-/**
- * For fragment programs, information for computing fragment input
- * values from plane equation of the triangle/line.
- */
-struct spu_interp_coef
-{
-   float a0[NUM_CHANNELS];	/* in an xyzw layout */
-   float dadx[NUM_CHANNELS];
-   float dady[NUM_CHANNELS];
-};
-
-
-struct softpipe_tile_cache;  /**< Opaque to TGSI */
-
-/**
- * Information for sampling textures, which must be implemented
- * by code outside the TGSI executor.
- */
-struct spu_sampler
-{
-   const struct pipe_sampler_state *state;
-   struct pipe_resource *texture;
-   /** Get samples for four fragments in a quad */
-   void (*get_samples)(struct spu_sampler *sampler,
-                       const float s[QUAD_SIZE],
-                       const float t[QUAD_SIZE],
-                       const float p[QUAD_SIZE],
-                       float lodbias,
-                       float rgba[NUM_CHANNELS][QUAD_SIZE]);
-   void *pipe; /*XXX temporary*/
-   struct softpipe_tile_cache *cache;
-};
-
-
-/**
- * Run-time virtual machine state for executing TGSI shader.
- */
-struct spu_exec_machine
-{
-   /*
-    * 32 program temporaries
-    * 4  internal temporaries
-    * 1  address
-    */
-   PIPE_ALIGN_VAR(16)
-   struct spu_exec_vector       Temps[TGSI_EXEC_NUM_TEMPS 
-                                      + TGSI_EXEC_NUM_TEMP_EXTRAS + 1];
-
-   struct spu_exec_vector       *Addrs;
-
-   struct spu_sampler           *Samplers;
-
-   float                         Imms[TGSI_EXEC_NUM_IMMEDIATES][4];
-   unsigned                      ImmLimit;
-   float                         (*Consts)[4];
-   struct spu_exec_vector       *Inputs;
-   struct spu_exec_vector       *Outputs;
-   unsigned                      Processor;
-
-   /* GEOMETRY processor only. */
-   unsigned                      *Primitives;
-
-   /* FRAGMENT processor only. */
-   const struct spu_interp_coef *InterpCoefs;
-   struct spu_exec_vector       QuadPos;
-
-   /* Conditional execution masks */
-   uint CondMask;  /**< For IF/ELSE/ENDIF */
-   uint LoopMask;  /**< For BGNLOOP/ENDLOOP */
-   uint ContMask;  /**< For loop CONT statements */
-   uint FuncMask;  /**< For function calls */
-   uint ExecMask;  /**< = CondMask & LoopMask */
-
-   /** Condition mask stack (for nested conditionals) */
-   uint CondStack[TGSI_EXEC_MAX_COND_NESTING];
-   int CondStackTop;
-
-   /** Loop mask stack (for nested loops) */
-   uint LoopStack[TGSI_EXEC_MAX_LOOP_NESTING];
-   int LoopStackTop;
-
-   /** Loop continue mask stack (see comments in tgsi_exec.c) */
-   uint ContStack[TGSI_EXEC_MAX_LOOP_NESTING];
-   int ContStackTop;
-
-   /** Function execution mask stack (for executing subroutine code) */
-   uint FuncStack[TGSI_EXEC_MAX_CALL_NESTING];
-   int FuncStackTop;
-
-   /** Function call stack for saving/restoring the program counter */
-   uint CallStack[TGSI_EXEC_MAX_CALL_NESTING];
-   int CallStackTop;
-
-   struct tgsi_full_instruction *Instructions;
-   uint NumInstructions;
-
-   struct tgsi_full_declaration *Declarations;
-   uint NumDeclarations;
-};
-
-
-extern void
-spu_exec_machine_init(struct spu_exec_machine *mach,
-                      uint numSamplers,
-                      struct spu_sampler *samplers,
-                      unsigned processor);
-
-extern uint
-spu_exec_machine_run( struct spu_exec_machine *mach );
-
-
-#if defined __cplusplus
-} /* extern "C" */
-#endif
-
-#endif /* SPU_EXEC_H */
diff --git a/src/gallium/drivers/cell/spu/spu_funcs.c b/src/gallium/drivers/cell/spu/spu_funcs.c
deleted file mode 100644
index 98919c43ffc..00000000000
--- a/src/gallium/drivers/cell/spu/spu_funcs.c
+++ /dev/null
@@ -1,173 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-
-/**
- * SPU functions accessed by shaders.
- *
- * Authors: Brian Paul
- */
-
-
-#include <string.h>
-#include <libmisc.h>
-#include <math.h>
-#include <cos14_v.h>
-#include <sin14_v.h>
-#include <simdmath/exp2f4.h>
-#include <simdmath/log2f4.h>
-#include <simdmath/powf4.h>
-
-#include "cell/common.h"
-#include "spu_main.h"
-#include "spu_funcs.h"
-#include "spu_texture.h"
-
-
-/** For "return"-ing four vectors */
-struct vec_4x4
-{
-   vector float v[4];
-};
-
-
-static vector float
-spu_cos(vector float x)
-{
-   return _cos14_v(x);
-}
-
-static vector float
-spu_sin(vector float x)
-{
-   return _sin14_v(x);
-}
-
-static vector float
-spu_pow(vector float x, vector float y)
-{
-   return _powf4(x, y);
-}
-
-static vector float
-spu_exp2(vector float x)
-{
-   return _exp2f4(x);
-}
-
-static vector float
-spu_log2(vector float x)
-{
-   return _log2f4(x);
-}
-
-
-static struct vec_4x4
-spu_tex_2d(vector float s, vector float t, vector float r, vector float q,
-           unsigned unit)
-{
-   struct vec_4x4 colors;
-   (void) r;
-   (void) q;
-   spu.sample_texture_2d[unit](s, t, unit, 0, 0, colors.v);
-   return colors;
-}
-
-static struct vec_4x4
-spu_tex_3d(vector float s, vector float t, vector float r, vector float q,
-           unsigned unit)
-{
-   struct vec_4x4 colors;
-   (void) r;
-   (void) q;
-   spu.sample_texture_2d[unit](s, t, unit, 0, 0, colors.v);
-   return colors;
-}
-
-static struct vec_4x4
-spu_tex_cube(vector float s, vector float t, vector float r, vector float q,
-           unsigned unit)
-{
-   struct vec_4x4 colors;
-   (void) q;
-   sample_texture_cube(s, t, r, unit, colors.v);
-   return colors;
-}
-
-
-/**
- * Add named function to list of "exported" functions that will be
- * made available to the PPU-hosted code generator.
- */
-static void
-export_func(struct cell_spu_function_info *spu_functions,
-            const char *name, void *addr)
-{
-   uint n = spu_functions->num;
-   ASSERT(strlen(name) < 16);
-   strcpy(spu_functions->names[n], name);
-   spu_functions->addrs[n] = (uint) addr;
-   spu_functions->num++;
-   ASSERT(spu_functions->num <= 16);
-}
-
-
-/**
- * Return info about the SPU's function to the PPU / main memory.
- * The PPU needs to know the address of some SPU-side functions so
- * that we can generate shader code with function calls.
- */
-void
-return_function_info(void)
-{
-   PIPE_ALIGN_VAR(16) struct cell_spu_function_info funcs;
-   int tag = TAG_MISC;
-
-   ASSERT(sizeof(funcs) == 256); /* must be multiple of 16 bytes */
-
-   funcs.num = 0;
-   export_func(&funcs, "spu_cos", &spu_cos);
-   export_func(&funcs, "spu_sin", &spu_sin);
-   export_func(&funcs, "spu_pow", &spu_pow);
-   export_func(&funcs, "spu_exp2", &spu_exp2);
-   export_func(&funcs, "spu_log2", &spu_log2);
-   export_func(&funcs, "spu_tex_2d", &spu_tex_2d);
-   export_func(&funcs, "spu_tex_3d", &spu_tex_3d);
-   export_func(&funcs, "spu_tex_cube", &spu_tex_cube);
-
-   /* Send the function info back to the PPU / main memory */
-   mfc_put((void *) &funcs,  /* src in local store */
-           (unsigned int) spu.init.spu_functions, /* dst in main memory */
-           sizeof(funcs),  /* bytes */
-           tag,
-           0, /* tid */
-           0  /* rid */);
-   wait_on_mask(1 << tag);
-}
-
-
-
diff --git a/src/gallium/drivers/cell/spu/spu_funcs.h b/src/gallium/drivers/cell/spu/spu_funcs.h
deleted file mode 100644
index 3adb6ae99f9..00000000000
--- a/src/gallium/drivers/cell/spu/spu_funcs.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#ifndef SPU_FUNCS_H
-#define SPU_FUNCS_H
-
-extern void
-return_function_info(void);
-
-#endif
-
diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c
deleted file mode 100644
index 97c86d194da..00000000000
--- a/src/gallium/drivers/cell/spu/spu_main.c
+++ /dev/null
@@ -1,117 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-
-/* main() for Cell SPU code */
-
-
-#include <stdio.h>
-#include <libmisc.h>
-
-#include "pipe/p_defines.h"
-
-#include "spu_funcs.h"
-#include "spu_command.h"
-#include "spu_main.h"
-#include "spu_per_fragment_op.h"
-#include "spu_texture.h"
-//#include "spu_test.h"
-#include "cell/common.h"
-
-
-/*
-helpful headers:
-/usr/lib/gcc/spu/4.1.1/include/spu_mfcio.h
-/opt/cell/sdk/usr/include/libmisc.h
-*/
-
-struct spu_global spu;
-
-
-static void
-one_time_init(void)
-{
-   memset(spu.ctile_status, TILE_STATUS_DEFINED, sizeof(spu.ctile_status));
-   memset(spu.ztile_status, TILE_STATUS_DEFINED, sizeof(spu.ztile_status));
-   invalidate_tex_cache();
-}
-
-/* In some versions of the SDK the SPE main takes 'unsigned long' as a
- * parameter.  In others it takes 'unsigned long long'.  Use a define to
- * select between the two.
- */
-#ifdef SPU_MAIN_PARAM_LONG_LONG
-typedef unsigned long long main_param_t;
-#else
-typedef unsigned long main_param_t;
-#endif
-
-/**
- * SPE entrypoint.
- */
-int
-main(main_param_t speid, main_param_t argp)
-{
-   int tag = 0;
-
-   (void) speid;
-
-   ASSERT(sizeof(tile_t) == TILE_SIZE * TILE_SIZE * 4);
-   ASSERT(sizeof(struct cell_command_render) % 8 == 0);
-   ASSERT(sizeof(struct cell_command_fragment_ops) % 8 == 0);
-   ASSERT(((unsigned long) &spu.fragment_program_code) % 8 == 0);
-
-   one_time_init();
-   spu_command_init();
-
-   D_PRINTF(CELL_DEBUG_CMD, "main() speid=%lu\n", (unsigned long) speid);
-   D_PRINTF(CELL_DEBUG_FRAGMENT_OP_FALLBACK, "using fragment op fallback\n");
-
-   /* get initialization data */
-   mfc_get(&spu.init,  /* dest */
-           (unsigned int) argp, /* src */
-           sizeof(struct cell_init_info), /* bytes */
-           tag,
-           0, /* tid */
-           0  /* rid */);
-   wait_on_mask( 1 << tag );
-
-   if (spu.init.id == 0) {
-      return_function_info();
-   }
-
-#if 0
-   if (spu.init.id==0)
-      spu_test_misc(spu.init.id);
-#endif
-
-   command_loop();
-
-   spu_command_close();
-
-   return 0;
-}
diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h
deleted file mode 100644
index a9d72f84d56..00000000000
--- a/src/gallium/drivers/cell/spu/spu_main.h
+++ /dev/null
@@ -1,269 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#ifndef SPU_MAIN_H
-#define SPU_MAIN_H
-
-
-#include <spu_mfcio.h>
-
-#include "cell/common.h"
-#include "draw/draw_vertex.h"
-#include "pipe/p_state.h"
-
-
-#if DEBUG
-/* These debug macros use the unusual construction ", ##__VA_ARGS__"
- * which expands to the expected comma + args if variadic arguments
- * are supplied, but swallows the comma if there are no variadic
- * arguments (which avoids syntax errors that would otherwise occur).
- */
-#define D_PRINTF(flag, format,...) \
-   if (spu.init.debug_flags & (flag)) \
-      printf("SPU %u: " format, spu.init.id, ##__VA_ARGS__)
-#else
-#define D_PRINTF(...)
-#endif
-
-
-/**
- * A tile is basically a TILE_SIZE x TILE_SIZE block of 4-byte pixels.
- * The data may be addressed through several different types.
- */
-typedef union {
-   ushort us[TILE_SIZE][TILE_SIZE];
-   uint   ui[TILE_SIZE][TILE_SIZE];
-   vector unsigned short us8[TILE_SIZE/2][TILE_SIZE/4];
-   vector unsigned int ui4[TILE_SIZE/2][TILE_SIZE/2];
-} tile_t;
-
-
-#define TILE_STATUS_CLEAR   1
-#define TILE_STATUS_DEFINED 2  /**< defined in FB, but not in local store */
-#define TILE_STATUS_CLEAN   3  /**< in local store, but not changed */
-#define TILE_STATUS_DIRTY   4  /**< modified locally, but not put back yet */
-#define TILE_STATUS_GETTING 5  /**< mfc_get() called but not yet arrived */
-
-
-/** Function for sampling textures */
-typedef void (*spu_sample_texture_2d_func)(vector float s,
-                                           vector float t,
-                                           uint unit, uint level, uint face,
-                                           vector float colors[4]);
-
-
-/** Function for performing per-fragment ops */
-typedef void (*spu_fragment_ops_func)(uint x, uint y,
-                                      tile_t *colorTile,
-                                      tile_t *depthStencilTile,
-                                      vector float fragZ,
-                                      vector float fragRed,
-                                      vector float fragGreen,
-                                      vector float fragBlue,
-                                      vector float fragAlpha,
-                                      vector unsigned int mask);
-
-/** Function for running fragment program */
-typedef vector unsigned int (*spu_fragment_program_func)(vector float *inputs,
-                                                         vector float *outputs,
-                                                         vector float *constants);
-
-
-PIPE_ALIGN_TYPE(16,
-struct spu_framebuffer
-{
-   void *color_start;              /**< addr of color surface in main memory */
-   void *depth_start;              /**< addr of depth surface in main memory */
-   enum pipe_format color_format;
-   enum pipe_format depth_format;
-   uint width;                     /**< width in pixels */
-   uint height;                    /**< height in pixels */
-   uint width_tiles;               /**< width in tiles */
-   uint height_tiles;              /**< width in tiles */
-
-   uint color_clear_value;
-   uint depth_clear_value;
-
-   uint zsize;                     /**< 0, 2 or 4 bytes per Z */
-   float zscale;                   /**< 65535.0, 2^24-1 or 2^32-1 */
-});
-
-
-/** per-texture level info */
-PIPE_ALIGN_TYPE(16,
-struct spu_texture_level
-{
-   void *start;
-   ushort width;
-   ushort height;
-   ushort depth;
-   ushort tiles_per_row;
-   uint bytes_per_image;
-   /** texcoord scale factors */
-   vector float scale_s;
-   vector float scale_t;
-   vector float scale_r;
-   /** texcoord masks (if REPEAT then size-1, else ~0) */
-   vector signed int mask_s;
-   vector signed int mask_t;
-   vector signed int mask_r;
-   /** texcoord clamp limits */
-   vector signed int max_s;
-   vector signed int max_t;
-   vector signed int max_r;
-});
-
-
-PIPE_ALIGN_TYPE(16,
-struct spu_texture
-{
-   struct spu_texture_level level[CELL_MAX_TEXTURE_LEVELS];
-   uint max_level;
-   uint target;  /**< PIPE_TEXTURE_x */
-});
-
-
-/**
- * All SPU global/context state will be in a singleton object of this type:
- */
-PIPE_ALIGN_TYPE(16,
-struct spu_global
-{
-   /** One-time init/constant info */
-   struct cell_init_info init;
-
-   /*
-    * Current state
-    */
-   struct spu_framebuffer fb;
-   struct pipe_depth_stencil_alpha_state depth_stencil_alpha;
-   struct pipe_blend_state blend;
-   struct pipe_blend_color blend_color;
-   struct pipe_sampler_state sampler[PIPE_MAX_SAMPLERS];
-   struct pipe_rasterizer_state rasterizer;
-   struct spu_texture texture[PIPE_MAX_SAMPLERS];
-   struct vertex_info vertex_info;
-
-   /** Current color and Z tiles */
-   PIPE_ALIGN_VAR(16) tile_t ctile;
-   PIPE_ALIGN_VAR(16) tile_t ztile;
-
-   /** Read depth/stencil tiles? */
-   boolean read_depth_stencil;
-
-   /** Current tiles' status */
-   ubyte cur_ctile_status;
-   ubyte cur_ztile_status;
-
-   /** Status of all tiles in framebuffer */
-   PIPE_ALIGN_VAR(16) ubyte ctile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE];
-   PIPE_ALIGN_VAR(16) ubyte ztile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE];
-
-   /** Current fragment ops machine code, at 8-byte boundary */
-   uint *fragment_ops_code;
-   uint fragment_ops_code_size;
-   /** Current fragment ops functions, 0 = frontfacing, 1 = backfacing */
-   spu_fragment_ops_func fragment_ops[2];
-
-   /** Current fragment program machine code, at 8-byte boundary */
-   PIPE_ALIGN_VAR(8) uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS];
-   /** Current fragment ops function */
-   spu_fragment_program_func fragment_program;
-
-   /** Current texture sampler function */
-   spu_sample_texture_2d_func sample_texture_2d[CELL_MAX_SAMPLERS];
-   spu_sample_texture_2d_func min_sample_texture_2d[CELL_MAX_SAMPLERS];
-   spu_sample_texture_2d_func mag_sample_texture_2d[CELL_MAX_SAMPLERS];
-
-   /** Fragment program constants */
-   vector float constants[4 * CELL_MAX_CONSTANTS];
-
-});
-
-
-extern struct spu_global spu;
-
-
-
-/* DMA TAGS */
-
-#define TAG_SURFACE_CLEAR     10
-#define TAG_VERTEX_BUFFER     11
-#define TAG_READ_TILE_COLOR   12
-#define TAG_READ_TILE_Z       13
-#define TAG_WRITE_TILE_COLOR  14
-#define TAG_WRITE_TILE_Z      15
-#define TAG_INDEX_BUFFER      16
-#define TAG_BATCH_BUFFER      17
-#define TAG_MISC              18
-#define TAG_DCACHE0           20
-#define TAG_DCACHE1           21
-#define TAG_DCACHE2           22
-#define TAG_DCACHE3           23
-#define TAG_FENCE             24
-
-
-static INLINE void
-wait_on_mask(unsigned tagMask)
-{
-   mfc_write_tag_mask( tagMask );
-   /* wait for completion of _any_ DMAs specified by tagMask */
-   mfc_read_tag_status_any();
-}
-
-
-static INLINE void
-wait_on_mask_all(unsigned tagMask)
-{
-   mfc_write_tag_mask( tagMask );
-   /* wait for completion of _any_ DMAs specified by tagMask */
-   mfc_read_tag_status_all();
-}
-
-
-
-
-
-static INLINE void
-memset16(ushort *d, ushort value, uint count)
-{
-   uint i;
-   for (i = 0; i < count; i++)
-      d[i] = value;
-}
-
-
-static INLINE void
-memset32(uint *d, uint value, uint count)
-{
-   uint i;
-   for (i = 0; i < count; i++)
-      d[i] = value;
-}
-
-
-#endif /* SPU_MAIN_H */
diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c
deleted file mode 100644
index 2415226a244..00000000000
--- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c
+++ /dev/null
@@ -1,631 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-/**
- * \author Brian Paul
- */
-
-
-#include <transpose_matrix4x4.h>
-#include "pipe/p_format.h"
-#include "spu_main.h"
-#include "spu_colorpack.h"
-#include "spu_per_fragment_op.h"
-
-
-#define LINEAR_QUAD_LAYOUT 1
-
-
-static INLINE vector float
-spu_min(vector float a, vector float b)
-{
-   vector unsigned int m;
-   m = spu_cmpgt(a, b);    /* m = a > b ? ~0 : 0 */
-   return spu_sel(a, b, m);
-}
-
-
-static INLINE vector float
-spu_max(vector float a, vector float b)
-{
-   vector unsigned int m;
-   m = spu_cmpgt(a, b);    /* m = a > b ? ~0 : 0 */
-   return spu_sel(b, a, m);
-}
-
-
-/**
- * Called by rasterizer for each quad after the shader has run.  Do
- * all the per-fragment operations including alpha test, z test,
- * stencil test, blend, colormask and logicops.  This is a
- * fallback/debug function.  In reality we'll use a generated function
- * produced by the PPU.  But this function is useful for
- * debug/validation.
- */
-void
-spu_fallback_fragment_ops(uint x, uint y,
-                          tile_t *colorTile,
-                          tile_t *depthStencilTile,
-                          vector float fragZ,
-                          vector float fragR,
-                          vector float fragG,
-                          vector float fragB,
-                          vector float fragA,
-                          vector unsigned int mask)
-{
-   vector float frag_aos[4];
-   unsigned int fbc0, fbc1, fbc2, fbc3 ; /* framebuffer/tile colors */
-   unsigned int fragc0, fragc1, fragc2, fragc3;  /* fragment colors */
-
-   /*
-    * Do alpha test
-    */
-   if (spu.depth_stencil_alpha.alpha.enabled) {
-      vector float ref = spu_splats(spu.depth_stencil_alpha.alpha.ref_value);
-      vector unsigned int amask;
-
-      switch (spu.depth_stencil_alpha.alpha.func) {
-      case PIPE_FUNC_LESS:
-         amask = spu_cmpgt(ref, fragA);  /* mask = (fragA < ref) */
-         break;
-      case PIPE_FUNC_GREATER:
-         amask = spu_cmpgt(fragA, ref);  /* mask = (fragA > ref) */
-         break;
-      case PIPE_FUNC_GEQUAL:
-         amask = spu_cmpgt(ref, fragA);
-         amask = spu_nor(amask, amask);
-         break;
-      case PIPE_FUNC_LEQUAL:
-         amask = spu_cmpgt(fragA, ref);
-         amask = spu_nor(amask, amask);
-         break;
-      case PIPE_FUNC_EQUAL:
-         amask = spu_cmpeq(ref, fragA);
-         break;
-      case PIPE_FUNC_NOTEQUAL:
-         amask = spu_cmpeq(ref, fragA);
-         amask = spu_nor(amask, amask);
-         break;
-      case PIPE_FUNC_ALWAYS:
-         amask = spu_splats(0xffffffffU);
-         break;
-      case PIPE_FUNC_NEVER:
-         amask = spu_splats( 0x0U);
-         break;
-      default:
-         ;
-      }
-
-      mask = spu_and(mask, amask);
-   }
-
-
-   /*
-    * Z and/or stencil testing...
-    */
-   if (spu.depth_stencil_alpha.depth.enabled ||
-       spu.depth_stencil_alpha.stencil[0].enabled) {
-
-      /* get four Z/Stencil values from tile */
-      vector unsigned int mask24 = spu_splats((unsigned int)0x00ffffffU);
-      vector unsigned int ifbZS = depthStencilTile->ui4[y/2][x/2];
-      vector unsigned int ifbZ = spu_and(ifbZS, mask24);
-      vector unsigned int ifbS = spu_andc(ifbZS, mask24);
-
-      if (spu.depth_stencil_alpha.stencil[0].enabled) {
-         /* do stencil test */
-         ASSERT(spu.fb.depth_format == PIPE_FORMAT_Z24_UNORM_S8_UINT);
-
-      }
-      else if (spu.depth_stencil_alpha.depth.enabled) {
-         /* do depth test */
-
-         ASSERT(spu.fb.depth_format == PIPE_FORMAT_Z24_UNORM_S8_UINT ||
-                spu.fb.depth_format == PIPE_FORMAT_Z24X8_UNORM);
-
-         vector unsigned int ifragZ;
-         vector unsigned int zmask;
-
-         /* convert four fragZ from float to uint */
-         fragZ = spu_mul(fragZ, spu_splats((float) 0xffffff));
-         ifragZ = spu_convtu(fragZ, 0);
-
-         /* do depth comparison, setting zmask with results */
-         switch (spu.depth_stencil_alpha.depth.func) {
-         case PIPE_FUNC_LESS:
-            zmask = spu_cmpgt(ifbZ, ifragZ);  /* mask = (ifragZ < ifbZ) */
-            break;
-         case PIPE_FUNC_GREATER:
-            zmask = spu_cmpgt(ifragZ, ifbZ);  /* mask = (ifbZ > ifragZ) */
-            break;
-         case PIPE_FUNC_GEQUAL:
-            zmask = spu_cmpgt(ifbZ, ifragZ);
-            zmask = spu_nor(zmask, zmask);
-            break;
-         case PIPE_FUNC_LEQUAL:
-            zmask = spu_cmpgt(ifragZ, ifbZ);
-            zmask = spu_nor(zmask, zmask);
-            break;
-         case PIPE_FUNC_EQUAL:
-            zmask = spu_cmpeq(ifbZ, ifragZ);
-            break;
-         case PIPE_FUNC_NOTEQUAL:
-            zmask = spu_cmpeq(ifbZ, ifragZ);
-            zmask = spu_nor(zmask, zmask);
-            break;
-         case PIPE_FUNC_ALWAYS:
-            zmask = spu_splats(0xffffffffU);
-            break;
-         case PIPE_FUNC_NEVER:
-            zmask = spu_splats( 0x0U);
-            break;
-         default:
-            ;
-         }
-
-         mask = spu_and(mask, zmask);
-
-         /* merge framebuffer Z and fragment Z according to the mask */
-         ifbZ = spu_or(spu_and(ifragZ, mask),
-                       spu_andc(ifbZ, mask));
-      }
-
-      if (spu_extract(spu_orx(mask), 0)) {
-         /* put new fragment Z/Stencil values back into Z/Stencil tile */
-         depthStencilTile->ui4[y/2][x/2] = spu_or(ifbZ, ifbS);
-
-         spu.cur_ztile_status = TILE_STATUS_DIRTY;
-      }
-   }
-
-
-   /*
-    * If we'll need the current framebuffer/tile colors for blending
-    * or logicop or colormask, fetch them now.
-    */
-   if (spu.blend.rt[0].blend_enable ||
-       spu.blend.logicop_enable ||
-       spu.blend.rt[0].colormask != 0xf) {
-
-#if LINEAR_QUAD_LAYOUT /* See comments/diagram below */
-      fbc0 = colorTile->ui[y][x*2+0];
-      fbc1 = colorTile->ui[y][x*2+1];
-      fbc2 = colorTile->ui[y][x*2+2];
-      fbc3 = colorTile->ui[y][x*2+3];
-#else
-      fbc0 = colorTile->ui[y+0][x+0];
-      fbc1 = colorTile->ui[y+0][x+1];
-      fbc2 = colorTile->ui[y+1][x+0];
-      fbc3 = colorTile->ui[y+1][x+1];
-#endif
-   }
-
-
-   /*
-    * Do blending
-    */
-   if (spu.blend.rt[0].blend_enable) {
-      /* blending terms, misc regs */
-      vector float term1r, term1g, term1b, term1a;
-      vector float term2r, term2g, term2b, term2a;
-      vector float one, tmp;
-
-      vector float fbRGBA[4];  /* current framebuffer colors */
-
-      /* convert framebuffer colors from packed int to vector float */
-      {
-         vector float temp[4]; /* float colors in AOS form */
-         switch (spu.fb.color_format) {
-         case PIPE_FORMAT_A8R8G8B8_UNORM:
-            temp[0] = spu_unpack_B8G8R8A8(fbc0);
-            temp[1] = spu_unpack_B8G8R8A8(fbc1);
-            temp[2] = spu_unpack_B8G8R8A8(fbc2);
-            temp[3] = spu_unpack_B8G8R8A8(fbc3);
-            break;
-         case PIPE_FORMAT_B8G8R8A8_UNORM:
-            temp[0] = spu_unpack_A8R8G8B8(fbc0);
-            temp[1] = spu_unpack_A8R8G8B8(fbc1);
-            temp[2] = spu_unpack_A8R8G8B8(fbc2);
-            temp[3] = spu_unpack_A8R8G8B8(fbc3);
-            break;
-         default:
-            ASSERT(0);
-         }
-         _transpose_matrix4x4(fbRGBA, temp); /* fbRGBA = transpose(temp) */
-      }
-
-      /*
-       * Compute Src RGB terms (fragment color * factor)
-       */
-      switch (spu.blend.rt[0].rgb_src_factor) {
-      case PIPE_BLENDFACTOR_ONE:
-         term1r = fragR;
-         term1g = fragG;
-         term1b = fragB;
-         break;
-      case PIPE_BLENDFACTOR_ZERO:
-         term1r =
-         term1g =
-         term1b = spu_splats(0.0f);
-         break;
-      case PIPE_BLENDFACTOR_SRC_COLOR:
-         term1r = spu_mul(fragR, fragR);
-         term1g = spu_mul(fragG, fragG);
-         term1b = spu_mul(fragB, fragB);
-         break;
-      case PIPE_BLENDFACTOR_SRC_ALPHA:
-         term1r = spu_mul(fragR, fragA);
-         term1g = spu_mul(fragG, fragA);
-         term1b = spu_mul(fragB, fragA);
-         break;
-      case PIPE_BLENDFACTOR_DST_COLOR:
-         term1r = spu_mul(fragR, fbRGBA[0]);
-         term1g = spu_mul(fragG, fbRGBA[1]);
-         term1b = spu_mul(fragB, fbRGBA[1]);
-         break;
-      case PIPE_BLENDFACTOR_DST_ALPHA:
-         term1r = spu_mul(fragR, fbRGBA[3]);
-         term1g = spu_mul(fragG, fbRGBA[3]);
-         term1b = spu_mul(fragB, fbRGBA[3]);
-         break;
-      case PIPE_BLENDFACTOR_CONST_COLOR:
-         term1r = spu_mul(fragR, spu_splats(spu.blend_color.color[0]));
-         term1g = spu_mul(fragG, spu_splats(spu.blend_color.color[1]));
-         term1b = spu_mul(fragB, spu_splats(spu.blend_color.color[2]));
-         break;
-      case PIPE_BLENDFACTOR_CONST_ALPHA:
-         term1r = spu_mul(fragR, spu_splats(spu.blend_color.color[3]));
-         term1g = spu_mul(fragG, spu_splats(spu.blend_color.color[3]));
-         term1b = spu_mul(fragB, spu_splats(spu.blend_color.color[3]));
-         break;
-      /* XXX more cases */
-      default:
-         ASSERT(0);
-      }
-
-      /*
-       * Compute Src Alpha term (fragment alpha * factor)
-       */
-      switch (spu.blend.rt[0].alpha_src_factor) {
-      case PIPE_BLENDFACTOR_ONE:
-         term1a = fragA;
-         break;
-      case PIPE_BLENDFACTOR_SRC_COLOR:
-         term1a = spu_splats(0.0f);
-         break;
-      case PIPE_BLENDFACTOR_SRC_ALPHA:
-         term1a = spu_mul(fragA, fragA);
-         break;
-      case PIPE_BLENDFACTOR_DST_COLOR:
-         /* fall-through */
-      case PIPE_BLENDFACTOR_DST_ALPHA:
-         term1a = spu_mul(fragA, fbRGBA[3]);
-         break;
-      case PIPE_BLENDFACTOR_CONST_COLOR:
-         /* fall-through */
-      case PIPE_BLENDFACTOR_CONST_ALPHA:
-         term1a = spu_mul(fragR, spu_splats(spu.blend_color.color[3]));
-         break;
-      /* XXX more cases */
-      default:
-         ASSERT(0);
-      }
-
-      /*
-       * Compute Dest RGB terms (framebuffer color * factor)
-       */
-      switch (spu.blend.rt[0].rgb_dst_factor) {
-      case PIPE_BLENDFACTOR_ONE:
-         term2r = fbRGBA[0];
-         term2g = fbRGBA[1];
-         term2b = fbRGBA[2];
-         break;
-      case PIPE_BLENDFACTOR_ZERO:
-         term2r =
-         term2g =
-         term2b = spu_splats(0.0f);
-         break;
-      case PIPE_BLENDFACTOR_SRC_COLOR:
-         term2r = spu_mul(fbRGBA[0], fragR);
-         term2g = spu_mul(fbRGBA[1], fragG);
-         term2b = spu_mul(fbRGBA[2], fragB);
-         break;
-      case PIPE_BLENDFACTOR_SRC_ALPHA:
-         term2r = spu_mul(fbRGBA[0], fragA);
-         term2g = spu_mul(fbRGBA[1], fragA);
-         term2b = spu_mul(fbRGBA[2], fragA);
-         break;
-      case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
-         one = spu_splats(1.0f);
-         tmp = spu_sub(one, fragA);
-         term2r = spu_mul(fbRGBA[0], tmp);
-         term2g = spu_mul(fbRGBA[1], tmp);
-         term2b = spu_mul(fbRGBA[2], tmp);
-         break;
-      case PIPE_BLENDFACTOR_DST_COLOR:
-         term2r = spu_mul(fbRGBA[0], fbRGBA[0]);
-         term2g = spu_mul(fbRGBA[1], fbRGBA[1]);
-         term2b = spu_mul(fbRGBA[2], fbRGBA[2]);
-         break;
-      case PIPE_BLENDFACTOR_DST_ALPHA:
-         term2r = spu_mul(fbRGBA[0], fbRGBA[3]);
-         term2g = spu_mul(fbRGBA[1], fbRGBA[3]);
-         term2b = spu_mul(fbRGBA[2], fbRGBA[3]);
-         break;
-      case PIPE_BLENDFACTOR_CONST_COLOR:
-         term2r = spu_mul(fbRGBA[0], spu_splats(spu.blend_color.color[0]));
-         term2g = spu_mul(fbRGBA[1], spu_splats(spu.blend_color.color[1]));
-         term2b = spu_mul(fbRGBA[2], spu_splats(spu.blend_color.color[2]));
-         break;
-      case PIPE_BLENDFACTOR_CONST_ALPHA:
-         term2r = spu_mul(fbRGBA[0], spu_splats(spu.blend_color.color[3]));
-         term2g = spu_mul(fbRGBA[1], spu_splats(spu.blend_color.color[3]));
-         term2b = spu_mul(fbRGBA[2], spu_splats(spu.blend_color.color[3]));
-         break;
-       /* XXX more cases */
-      default:
-         ASSERT(0);
-      }
-
-      /*
-       * Compute Dest Alpha term (framebuffer alpha * factor)
-       */
-      switch (spu.blend.rt[0].alpha_dst_factor) {
-      case PIPE_BLENDFACTOR_ONE:
-         term2a = fbRGBA[3];
-         break;
-      case PIPE_BLENDFACTOR_SRC_COLOR:
-         term2a = spu_splats(0.0f);
-         break;
-      case PIPE_BLENDFACTOR_SRC_ALPHA:
-         term2a = spu_mul(fbRGBA[3], fragA);
-         break;
-      case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
-         one = spu_splats(1.0f);
-         tmp = spu_sub(one, fragA);
-         term2a = spu_mul(fbRGBA[3], tmp);
-         break;
-      case PIPE_BLENDFACTOR_DST_COLOR:
-         /* fall-through */
-      case PIPE_BLENDFACTOR_DST_ALPHA:
-         term2a = spu_mul(fbRGBA[3], fbRGBA[3]);
-         break;
-      case PIPE_BLENDFACTOR_CONST_COLOR:
-         /* fall-through */
-      case PIPE_BLENDFACTOR_CONST_ALPHA:
-         term2a = spu_mul(fbRGBA[3], spu_splats(spu.blend_color.color[3]));
-         break;
-      /* XXX more cases */
-      default:
-         ASSERT(0);
-      }
-
-      /*
-       * Combine Src/Dest RGB terms
-       */
-      switch (spu.blend.rt[0].rgb_func) {
-      case PIPE_BLEND_ADD:
-         fragR = spu_add(term1r, term2r);
-         fragG = spu_add(term1g, term2g);
-         fragB = spu_add(term1b, term2b);
-         break;
-      case PIPE_BLEND_SUBTRACT:
-         fragR = spu_sub(term1r, term2r);
-         fragG = spu_sub(term1g, term2g);
-         fragB = spu_sub(term1b, term2b);
-         break;
-      case PIPE_BLEND_REVERSE_SUBTRACT:
-         fragR = spu_sub(term2r, term1r);
-         fragG = spu_sub(term2g, term1g);
-         fragB = spu_sub(term2b, term1b);
-         break;
-      case PIPE_BLEND_MIN:
-         fragR = spu_min(term1r, term2r);
-         fragG = spu_min(term1g, term2g);
-         fragB = spu_min(term1b, term2b);
-         break;
-      case PIPE_BLEND_MAX:
-         fragR = spu_max(term1r, term2r);
-         fragG = spu_max(term1g, term2g);
-         fragB = spu_max(term1b, term2b);
-         break;
-      default:
-         ASSERT(0);
-      }
-
-      /*
-       * Combine Src/Dest A term
-       */
-      switch (spu.blend.rt[0].alpha_func) {
-      case PIPE_BLEND_ADD:
-         fragA = spu_add(term1a, term2a);
-         break;
-      case PIPE_BLEND_SUBTRACT:
-         fragA = spu_sub(term1a, term2a);
-         break;
-      case PIPE_BLEND_REVERSE_SUBTRACT:
-         fragA = spu_sub(term2a, term1a);
-         break;
-      case PIPE_BLEND_MIN:
-         fragA = spu_min(term1a, term2a);
-         break;
-      case PIPE_BLEND_MAX:
-         fragA = spu_max(term1a, term2a);
-         break;
-      default:
-         ASSERT(0);
-      }
-   }
-
-
-   /*
-    * Convert RRRR,GGGG,BBBB,AAAA to RGBA,RGBA,RGBA,RGBA.
-    */
-#if 0
-   /* original code */
-   {
-      vector float frag_soa[4];
-      frag_soa[0] = fragR;
-      frag_soa[1] = fragG;
-      frag_soa[2] = fragB;
-      frag_soa[3] = fragA;
-      _transpose_matrix4x4(frag_aos, frag_soa);
-   }
-#else
-   /* short-cut relying on function parameter layout: */
-   _transpose_matrix4x4(frag_aos, &fragR);
-   (void) fragG;
-   (void) fragB;
-#endif
-
-   /*
-    * Pack fragment float colors into 32-bit RGBA words.
-    */
-   switch (spu.fb.color_format) {
-   case PIPE_FORMAT_B8G8R8A8_UNORM:
-      fragc0 = spu_pack_A8R8G8B8(frag_aos[0]);
-      fragc1 = spu_pack_A8R8G8B8(frag_aos[1]);
-      fragc2 = spu_pack_A8R8G8B8(frag_aos[2]);
-      fragc3 = spu_pack_A8R8G8B8(frag_aos[3]);
-      break;
-   case PIPE_FORMAT_A8R8G8B8_UNORM:
-      fragc0 = spu_pack_B8G8R8A8(frag_aos[0]);
-      fragc1 = spu_pack_B8G8R8A8(frag_aos[1]);
-      fragc2 = spu_pack_B8G8R8A8(frag_aos[2]);
-      fragc3 = spu_pack_B8G8R8A8(frag_aos[3]);
-      break;
-   default:
-      fprintf(stderr, "SPU: Bad pixel format in spu_default_fragment_ops\n");
-      ASSERT(0);
-   }
-
-
-   /*
-    * Do color masking
-    */
-   if (spu.blend.rt[0].colormask != 0xf) {
-      uint cmask = 0x0; /* each byte corresponds to a color channel */
-
-      /* Form bitmask depending on color buffer format and colormask bits */
-      switch (spu.fb.color_format) {
-      case PIPE_FORMAT_B8G8R8A8_UNORM:
-         if (spu.blend.rt[0].colormask & PIPE_MASK_R)
-            cmask |= 0x00ff0000; /* red */
-         if (spu.blend.rt[0].colormask & PIPE_MASK_G)
-            cmask |= 0x0000ff00; /* green */
-         if (spu.blend.rt[0].colormask & PIPE_MASK_B)
-            cmask |= 0x000000ff; /* blue */
-         if (spu.blend.rt[0].colormask & PIPE_MASK_A)
-            cmask |= 0xff000000; /* alpha */
-         break;
-      case PIPE_FORMAT_A8R8G8B8_UNORM:
-         if (spu.blend.rt[0].colormask & PIPE_MASK_R)
-            cmask |= 0x0000ff00; /* red */
-         if (spu.blend.rt[0].colormask & PIPE_MASK_G)
-            cmask |= 0x00ff0000; /* green */
-         if (spu.blend.rt[0].colormask & PIPE_MASK_B)
-            cmask |= 0xff000000; /* blue */
-         if (spu.blend.rt[0].colormask & PIPE_MASK_A)
-            cmask |= 0x000000ff; /* alpha */
-         break;
-      default:
-         ASSERT(0);
-      }
-
-      /*
-       * Apply color mask to the 32-bit packed colors.
-       * if (cmask[i])
-       *    frag color[i] = frag color[i];
-       * else
-       *    frag color[i] = framebuffer color[i];
-       */
-      fragc0 = (fragc0 & cmask) | (fbc0 & ~cmask);
-      fragc1 = (fragc1 & cmask) | (fbc1 & ~cmask);
-      fragc2 = (fragc2 & cmask) | (fbc2 & ~cmask);
-      fragc3 = (fragc3 & cmask) | (fbc3 & ~cmask);
-   }
-
-
-   /*
-    * Do logic ops
-    */
-   if (spu.blend.logicop_enable) {
-      /* XXX to do */
-      /* apply logicop to 32-bit packed colors (fragcx and fbcx) */
-   }
-
-
-   /*
-    * If mask is non-zero, mark tile as dirty.
-    */
-   if (spu_extract(spu_orx(mask), 0)) {
-      spu.cur_ctile_status = TILE_STATUS_DIRTY;
-   }
-   else {
-      /* write no fragments */
-      return;
-   }
-
-
-   /*
-    * Write new fragment/quad colors to the framebuffer/tile.
-    * Only write pixels where the corresponding mask word is set.
-    */
-#if LINEAR_QUAD_LAYOUT
-   /*
-    * Quad layout:
-    *  +--+--+--+--+
-    *  |p0|p1|p2|p3|...
-    *  +--+--+--+--+
-    */
-   if (spu_extract(mask, 0))
-      colorTile->ui[y][x*2] = fragc0;
-   if (spu_extract(mask, 1))
-      colorTile->ui[y][x*2+1] = fragc1;
-   if (spu_extract(mask, 2))
-      colorTile->ui[y][x*2+2] = fragc2;
-   if (spu_extract(mask, 3))
-      colorTile->ui[y][x*2+3] = fragc3;
-#else
-   /*
-    * Quad layout:
-    *  +--+--+
-    *  |p0|p1|...
-    *  +--+--+
-    *  |p2|p3|...
-    *  +--+--+
-    */
-   if (spu_extract(mask, 0))
-      colorTile->ui[y+0][x+0] = fragc0;
-   if (spu_extract(mask, 1))
-      colorTile->ui[y+0][x+1] = fragc1;
-   if (spu_extract(mask, 2))
-      colorTile->ui[y+1][x+0] = fragc2;
-   if (spu_extract(mask, 3))
-      colorTile->ui[y+1][x+1] = fragc3;
-#endif
-}
diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.h b/src/gallium/drivers/cell/spu/spu_per_fragment_op.h
deleted file mode 100644
index f817abf0463..00000000000
--- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#ifndef SPU_PER_FRAGMENT_OP
-#define SPU_PER_FRAGMENT_OP
-
-
-extern void
-spu_fallback_fragment_ops(uint x, uint y,
-                          tile_t *colorTile,
-                          tile_t *depthStencilTile,
-                          vector float fragZ,
-                          vector float fragRed,
-                          vector float fragGreen,
-                          vector float fragBlue,
-                          vector float fragAlpha,
-                          vector unsigned int mask);
-
-
-#endif /* SPU_PER_FRAGMENT_OP */
diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c
deleted file mode 100644
index 14987e3c3a2..00000000000
--- a/src/gallium/drivers/cell/spu/spu_render.c
+++ /dev/null
@@ -1,356 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-
-#include <stdio.h>
-#include <libmisc.h>
-#include <spu_mfcio.h>
-
-#include "spu_main.h"
-#include "spu_render.h"
-#include "spu_shuffle.h"
-#include "spu_tri.h"
-#include "spu_tile.h"
-#include "cell/common.h"
-#include "util/u_memory.h"
-
-
-/**
- * Given a rendering command's bounding box (in pixels) compute the
- * location of the corresponding screen tile bounding box.
- */
-static INLINE void
-tile_bounding_box(const struct cell_command_render *render,
-                  uint *txmin, uint *tymin,
-                  uint *box_num_tiles, uint *box_width_tiles)
-{
-#if 0
-   /* Debug: full-window bounding box */
-   uint txmax = spu.fb.width_tiles - 1;
-   uint tymax = spu.fb.height_tiles - 1;
-   *txmin = 0;
-   *tymin = 0;
-   *box_num_tiles = spu.fb.width_tiles * spu.fb.height_tiles;
-   *box_width_tiles = spu.fb.width_tiles;
-   (void) render;
-   (void) txmax;
-   (void) tymax;
-#else
-   uint txmax, tymax, box_height_tiles;
-
-   *txmin = (uint) render->xmin / TILE_SIZE;
-   *tymin = (uint) render->ymin / TILE_SIZE;
-   txmax = (uint) render->xmax / TILE_SIZE;
-   tymax = (uint) render->ymax / TILE_SIZE;
-   if (txmax >= spu.fb.width_tiles)
-      txmax = spu.fb.width_tiles-1;
-   if (tymax >= spu.fb.height_tiles)
-      tymax = spu.fb.height_tiles-1;
-   *box_width_tiles = txmax - *txmin + 1;
-   box_height_tiles = tymax - *tymin + 1;
-   *box_num_tiles = *box_width_tiles * box_height_tiles;
-#endif
-#if 0
-   printf("SPU %u: bounds: %g, %g  ...  %g, %g\n", spu.init.id,
-          render->xmin, render->ymin, render->xmax, render->ymax);
-   printf("SPU %u: tiles:  %u, %u .. %u, %u\n",
-           spu.init.id, *txmin, *tymin, txmax, tymax);
-   ASSERT(render->xmin <= render->xmax);
-   ASSERT(render->ymin <= render->ymax);
-#endif
-}
-
-
-/** Check if the tile at (tx,ty) belongs to this SPU */
-static INLINE boolean
-my_tile(uint tx, uint ty)
-{
-   return (spu.fb.width_tiles * ty + tx) % spu.init.num_spus == spu.init.id;
-}
-
-
-/**
- * Start fetching non-clear color/Z tiles from main memory
- */
-static INLINE void
-get_cz_tiles(uint tx, uint ty)
-{
-   if (spu.read_depth_stencil) {
-      if (spu.cur_ztile_status != TILE_STATUS_CLEAR) {
-         //printf("SPU %u: getting Z tile %u, %u\n", spu.init.id, tx, ty);
-         get_tile(tx, ty, &spu.ztile, TAG_READ_TILE_Z, 1);
-         spu.cur_ztile_status = TILE_STATUS_GETTING;
-      }
-   }
-
-   if (spu.cur_ctile_status != TILE_STATUS_CLEAR) {
-      //printf("SPU %u: getting C tile %u, %u\n", spu.init.id, tx, ty);
-      get_tile(tx, ty, &spu.ctile, TAG_READ_TILE_COLOR, 0);
-      spu.cur_ctile_status = TILE_STATUS_GETTING;
-   }
-}
-
-
-/**
- * Start putting dirty color/Z tiles back to main memory
- */
-static INLINE void
-put_cz_tiles(uint tx, uint ty)
-{
-   if (spu.cur_ztile_status == TILE_STATUS_DIRTY) {
-      /* tile was modified and needs to be written back */
-      //printf("SPU %u: put dirty Z tile %u, %u\n", spu.init.id, tx, ty);
-      put_tile(tx, ty, &spu.ztile, TAG_WRITE_TILE_Z, 1);
-      spu.cur_ztile_status = TILE_STATUS_DEFINED;
-   }
-   else if (spu.cur_ztile_status == TILE_STATUS_GETTING) {
-      /* tile was never used */
-      spu.cur_ztile_status = TILE_STATUS_DEFINED;
-      //printf("SPU %u: put getting Z tile %u, %u\n", spu.init.id, tx, ty);
-   }
-
-   if (spu.cur_ctile_status == TILE_STATUS_DIRTY) {
-      /* tile was modified and needs to be written back */
-      //printf("SPU %u: put dirty C tile %u, %u\n", spu.init.id, tx, ty);
-      put_tile(tx, ty, &spu.ctile, TAG_WRITE_TILE_COLOR, 0);
-      spu.cur_ctile_status = TILE_STATUS_DEFINED;
-   }
-   else if (spu.cur_ctile_status == TILE_STATUS_GETTING) {
-      /* tile was never used */
-      spu.cur_ctile_status = TILE_STATUS_DEFINED;
-      //printf("SPU %u: put getting C tile %u, %u\n", spu.init.id, tx, ty);
-   }
-}
-
-
-/**
- * Wait for 'put' of color/z tiles to complete.
- */
-static INLINE void
-wait_put_cz_tiles(void)
-{
-   wait_on_mask(1 << TAG_WRITE_TILE_COLOR);
-   if (spu.read_depth_stencil) {
-      wait_on_mask(1 << TAG_WRITE_TILE_Z);
-   }
-}
-
-
-/**
- * Render primitives
- * \param pos_incr  returns value indicating how may words to skip after
- *                  this command in the batch buffer
- */
-void
-cmd_render(const struct cell_command_render *render, uint *pos_incr)
-{
-   /* we'll DMA into these buffers */
-   PIPE_ALIGN_VAR(16) ubyte vertex_data[CELL_BUFFER_SIZE];
-   const uint vertex_size = render->vertex_size; /* in bytes */
-   /*const*/ uint total_vertex_bytes = render->num_verts * vertex_size;
-   uint index_bytes;
-   const ubyte *vertices;
-   const ushort *indexes;
-   uint i, j;
-   uint num_tiles;
-
-   D_PRINTF(CELL_DEBUG_CMD,
-            "RENDER prim=%u num_vert=%u num_ind=%u inline_vert=%u\n",
-            render->prim_type,
-            render->num_verts,
-            render->num_indexes,
-            render->inline_verts);
-
-   ASSERT(sizeof(*render) % 4 == 0);
-   ASSERT(total_vertex_bytes % 16 == 0);
-   ASSERT(render->prim_type == PIPE_PRIM_TRIANGLES);
-   ASSERT(render->num_indexes % 3 == 0);
-
-
-   /* indexes are right after the render command in the batch buffer */
-   indexes = (const ushort *) (render + 1);
-   index_bytes = ROUNDUP8(render->num_indexes * 2);
-   *pos_incr = index_bytes / 8 + sizeof(*render) / 8;
-
-
-   if (render->inline_verts) {
-      /* Vertices are after indexes in batch buffer at next 16-byte addr */
-      vertices = (const ubyte *) render + (*pos_incr * 8);
-      vertices = (const ubyte *) align_pointer((void *) vertices, 16);
-      ASSERT_ALIGN16(vertices);
-      *pos_incr = ((vertices + total_vertex_bytes) - (ubyte *) render) / 8;
-   }
-   else {
-      /* Begin DMA fetch of vertex buffer */
-      ubyte *src = spu.init.buffers[render->vertex_buf];
-      ubyte *dest = vertex_data;
-
-      /* skip vertex data we won't use */
-#if 01
-      src += render->min_index * vertex_size;
-      dest += render->min_index * vertex_size;
-      total_vertex_bytes -= render->min_index * vertex_size;
-#endif
-      ASSERT(total_vertex_bytes % 16 == 0);
-      ASSERT_ALIGN16(dest);
-      ASSERT_ALIGN16(src);
-
-      mfc_get(dest,   /* in vertex_data[] array */
-              (unsigned int) src,  /* src in main memory */
-              total_vertex_bytes,  /* size */
-              TAG_VERTEX_BUFFER,
-              0, /* tid */
-              0  /* rid */);
-
-      vertices = vertex_data;
-
-      wait_on_mask(1 << TAG_VERTEX_BUFFER);
-   }
-
-
-   /**
-    ** find tiles which intersect the prim bounding box
-    **/
-   uint txmin, tymin, box_width_tiles, box_num_tiles;
-   tile_bounding_box(render, &txmin, &tymin,
-                     &box_num_tiles, &box_width_tiles);
-
-
-   /* make sure any pending clears have completed */
-   wait_on_mask(1 << TAG_SURFACE_CLEAR); /* XXX temporary */
-
-
-   num_tiles = 0;
-
-   /**
-    ** loop over tiles, rendering tris
-    **/
-   for (i = 0; i < box_num_tiles; i++) {
-      const uint tx = txmin + i % box_width_tiles;
-      const uint ty = tymin + i / box_width_tiles;
-
-      ASSERT(tx < spu.fb.width_tiles);
-      ASSERT(ty < spu.fb.height_tiles);
-
-      if (!my_tile(tx, ty))
-         continue;
-
-      num_tiles++;
-
-      spu.cur_ctile_status = spu.ctile_status[ty][tx];
-      spu.cur_ztile_status = spu.ztile_status[ty][tx];
-
-      get_cz_tiles(tx, ty);
-
-      uint drawn = 0;
-
-      const qword vertex_sizes = (qword)spu_splats(vertex_size);
-      const qword verticess = (qword)spu_splats((uint)vertices);
-
-      ASSERT_ALIGN16(&indexes[0]);
-
-      const uint num_indexes = render->num_indexes;
-
-      /* loop over tris
-	   * &indexes[0] will be 16 byte aligned.  This loop is heavily unrolled
-	   * avoiding variable rotates when extracting vertex indices.
-	   */
-      for (j = 0; j < num_indexes; j += 24) {
-         /* Load three vectors, containing 24 ushort indices */
-         const qword* lower_qword = (qword*)&indexes[j];
-         const qword indices0 = lower_qword[0];
-         const qword indices1 = lower_qword[1];
-         const qword indices2 = lower_qword[2];
-
-         /* stores three indices for each tri n in slots 0, 1 and 2 of vsn */
-		 /* Straightforward rotates for these */
-         qword vs0 = indices0;
-         qword vs1 = si_shlqbyi(indices0, 6);
-         qword vs3 = si_shlqbyi(indices1, 2);
-         qword vs4 = si_shlqbyi(indices1, 8);
-         qword vs6 = si_shlqbyi(indices2, 4);
-         qword vs7 = si_shlqbyi(indices2, 10);
-
-         /* For tri 2 and 5, the three indices are split across two machine
-		  * words - rotate and combine */
-         const qword tmp2a = si_shlqbyi(indices0, 12);
-         const qword tmp2b = si_rotqmbyi(indices1, 12|16);
-         qword vs2 = si_selb(tmp2a, tmp2b, si_fsmh(si_from_uint(0x20)));
-
-         const qword tmp5a = si_shlqbyi(indices1, 14);
-         const qword tmp5b = si_rotqmbyi(indices2, 14|16);
-         qword vs5 = si_selb(tmp5a, tmp5b, si_fsmh(si_from_uint(0x60)));
-
-         /* unpack indices from halfword slots to word slots */
-         vs0 = si_shufb(vs0, vs0, SHUFB8(0,A,0,B,0,C,0,0));
-         vs1 = si_shufb(vs1, vs1, SHUFB8(0,A,0,B,0,C,0,0));
-         vs2 = si_shufb(vs2, vs2, SHUFB8(0,A,0,B,0,C,0,0));
-         vs3 = si_shufb(vs3, vs3, SHUFB8(0,A,0,B,0,C,0,0));
-         vs4 = si_shufb(vs4, vs4, SHUFB8(0,A,0,B,0,C,0,0));
-         vs5 = si_shufb(vs5, vs5, SHUFB8(0,A,0,B,0,C,0,0));
-         vs6 = si_shufb(vs6, vs6, SHUFB8(0,A,0,B,0,C,0,0));
-         vs7 = si_shufb(vs7, vs7, SHUFB8(0,A,0,B,0,C,0,0));
-
-         /* Calculate address of vertex in vertices[] */
-         vs0 = si_mpya(vs0, vertex_sizes, verticess);
-         vs1 = si_mpya(vs1, vertex_sizes, verticess);
-         vs2 = si_mpya(vs2, vertex_sizes, verticess);
-         vs3 = si_mpya(vs3, vertex_sizes, verticess);
-         vs4 = si_mpya(vs4, vertex_sizes, verticess);
-         vs5 = si_mpya(vs5, vertex_sizes, verticess);
-         vs6 = si_mpya(vs6, vertex_sizes, verticess);
-         vs7 = si_mpya(vs7, vertex_sizes, verticess);
-
-         /* Select the appropriate call based on the number of vertices 
-		  * remaining */
-         switch(num_indexes - j) {
-            default: drawn += tri_draw(vs7, tx, ty);
-            case 21: drawn += tri_draw(vs6, tx, ty);
-            case 18: drawn += tri_draw(vs5, tx, ty);
-            case 15: drawn += tri_draw(vs4, tx, ty);
-            case 12: drawn += tri_draw(vs3, tx, ty);
-            case 9:  drawn += tri_draw(vs2, tx, ty);
-            case 6:  drawn += tri_draw(vs1, tx, ty);
-            case 3:  drawn += tri_draw(vs0, tx, ty);
-         }
-      }
-
-      //printf("SPU %u: drew %u of %u\n", spu.init.id, drawn, render->num_indexes/3);
-
-      /* write color/z tiles back to main framebuffer, if dirtied */
-      put_cz_tiles(tx, ty);
-
-      wait_put_cz_tiles(); /* XXX seems unnecessary... */
-
-      spu.ctile_status[ty][tx] = spu.cur_ctile_status;
-      spu.ztile_status[ty][tx] = spu.cur_ztile_status;
-   }
-
-   D_PRINTF(CELL_DEBUG_CMD,
-            "RENDER done (%u tiles hit)\n",
-            num_tiles);
-}
diff --git a/src/gallium/drivers/cell/spu/spu_render.h b/src/gallium/drivers/cell/spu/spu_render.h
deleted file mode 100644
index 493434f0878..00000000000
--- a/src/gallium/drivers/cell/spu/spu_render.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-
-#ifndef SPU_RENDER_H
-#define SPU_RENDER_H
-
-#include "cell/common.h"
-
-extern void
-cmd_render(const struct cell_command_render *render, uint *pos_incr);
-
-#endif /* SPU_RENDER_H */
-
diff --git a/src/gallium/drivers/cell/spu/spu_shuffle.h b/src/gallium/drivers/cell/spu/spu_shuffle.h
deleted file mode 100644
index 74f2a0b6d2e..00000000000
--- a/src/gallium/drivers/cell/spu/spu_shuffle.h
+++ /dev/null
@@ -1,186 +0,0 @@
-#ifndef SPU_SHUFFLE_H
-#define SPU_SHUFFLE_H
-
-/*
- * Generate shuffle patterns with minimal fuss.
- *
- * Based on ideas from 
- * http://www.insomniacgames.com/tech/articles/0408/files/shuffles.pdf
- *
- * A-P indicates 0-15th position in first vector
- * a-p indicates 0-15th position in second vector
- *
- * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
- * |00|01|02|03|04|05|06|07|08|09|0a|0b|0c|0d|0e|0f|
- * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
- * |          A|          B|          C|          D|
- * +-----+-----+-----+-----+-----+-----+-----+-----+
- * |    A|    B|    C|    D|    E|    F|    G|    H|
- * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
- * | A| B| C| D| E| F| G| H| I| J| K| L| M| N| O| P|
- * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
- *
- * x or X indicates 0xff
- * 8 indicates 0x80
- * 0 indicates 0x00
- *
- * The macros SHUFFLE4() SHUFFLE8() and SHUFFLE16() provide a const vector 
- * unsigned char literal suitable for use with spu_shuffle().
- *
- * The macros SHUFB4() SHUFB8() and SHUFB16() provide a const qword vector 
- * literal suitable for use with si_shufb().
- *
- *
- * For example :
- * SHUFB4(A,A,A,A)
- * expands to :
- * ((const qword){0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3})
- * 
- * SHUFFLE8(A,B,a,b,C,c,8,8)
- * expands to :
- * ((const vector unsigned char){0x00,0x01,0x02,0x03,0x10,0x11,0x12,0x13,
- *				 0x04,0x05,0x14,0x15,0xe0,0xe0,0xe0,0xe0})
- *
- */
-
-#include <spu_intrinsics.h>
-
-#define SHUFFLE_PATTERN_4_A__  0x00, 0x01, 0x02, 0x03
-#define SHUFFLE_PATTERN_4_B__  0x04, 0x05, 0x06, 0x07
-#define SHUFFLE_PATTERN_4_C__  0x08, 0x09, 0x0a, 0x0b
-#define SHUFFLE_PATTERN_4_D__  0x0c, 0x0d, 0x0e, 0x0f
-#define SHUFFLE_PATTERN_4_a__  0x10, 0x11, 0x12, 0x13
-#define SHUFFLE_PATTERN_4_b__  0x14, 0x15, 0x16, 0x17
-#define SHUFFLE_PATTERN_4_c__  0x18, 0x19, 0x1a, 0x1b
-#define SHUFFLE_PATTERN_4_d__  0x1c, 0x1d, 0x1e, 0x1f
-#define SHUFFLE_PATTERN_4_X__  0xc0, 0xc0, 0xc0, 0xc0
-#define SHUFFLE_PATTERN_4_x__  0xc0, 0xc0, 0xc0, 0xc0
-#define SHUFFLE_PATTERN_4_0__  0x80, 0x80, 0x80, 0x80
-#define SHUFFLE_PATTERN_4_8__  0xe0, 0xe0, 0xe0, 0xe0
-
-#define SHUFFLE_VECTOR_4__(A, B, C, D) \
-   SHUFFLE_PATTERN_4_##A##__, \
-   SHUFFLE_PATTERN_4_##B##__, \
-   SHUFFLE_PATTERN_4_##C##__, \
-   SHUFFLE_PATTERN_4_##D##__
-
-#define SHUFFLE4(A, B, C, D) \
-   ((const vector unsigned char){ \
-      SHUFFLE_VECTOR_4__(A, B, C, D) \
-   })
-
-#define SHUFB4(A, B, C, D) \
-   ((const qword){ \
-      SHUFFLE_VECTOR_4__(A, B, C, D) \
-   })
-
-
-#define SHUFFLE_PATTERN_8_A__  0x00, 0x01
-#define SHUFFLE_PATTERN_8_B__  0x02, 0x03
-#define SHUFFLE_PATTERN_8_C__  0x04, 0x05
-#define SHUFFLE_PATTERN_8_D__  0x06, 0x07
-#define SHUFFLE_PATTERN_8_E__  0x08, 0x09
-#define SHUFFLE_PATTERN_8_F__  0x0a, 0x0b
-#define SHUFFLE_PATTERN_8_G__  0x0c, 0x0d
-#define SHUFFLE_PATTERN_8_H__  0x0e, 0x0f
-#define SHUFFLE_PATTERN_8_a__  0x10, 0x11
-#define SHUFFLE_PATTERN_8_b__  0x12, 0x13
-#define SHUFFLE_PATTERN_8_c__  0x14, 0x15
-#define SHUFFLE_PATTERN_8_d__  0x16, 0x17
-#define SHUFFLE_PATTERN_8_e__  0x18, 0x19
-#define SHUFFLE_PATTERN_8_f__  0x1a, 0x1b
-#define SHUFFLE_PATTERN_8_g__  0x1c, 0x1d
-#define SHUFFLE_PATTERN_8_h__  0x1e, 0x1f
-#define SHUFFLE_PATTERN_8_X__  0xc0, 0xc0
-#define SHUFFLE_PATTERN_8_x__  0xc0, 0xc0
-#define SHUFFLE_PATTERN_8_0__  0x80, 0x80
-#define SHUFFLE_PATTERN_8_8__  0xe0, 0xe0
-
-
-#define SHUFFLE_VECTOR_8__(A, B, C, D, E, F, G, H) \
-   SHUFFLE_PATTERN_8_##A##__, \
-   SHUFFLE_PATTERN_8_##B##__, \
-   SHUFFLE_PATTERN_8_##C##__, \
-   SHUFFLE_PATTERN_8_##D##__, \
-   SHUFFLE_PATTERN_8_##E##__, \
-   SHUFFLE_PATTERN_8_##F##__, \
-   SHUFFLE_PATTERN_8_##G##__, \
-   SHUFFLE_PATTERN_8_##H##__
-
-#define SHUFFLE8(A, B, C, D, E, F, G, H) \
-   ((const vector unsigned char){ \
-      SHUFFLE_VECTOR_8__(A, B, C, D, E, F, G, H) \
-   })
-
-#define SHUFB8(A, B, C, D, E, F, G, H) \
-   ((const qword){ \
-      SHUFFLE_VECTOR_8__(A, B, C, D, E, F, G, H) \
-   })
-
-
-#define SHUFFLE_PATTERN_16_A__  0x00
-#define SHUFFLE_PATTERN_16_B__  0x01
-#define SHUFFLE_PATTERN_16_C__  0x02
-#define SHUFFLE_PATTERN_16_D__  0x03
-#define SHUFFLE_PATTERN_16_E__  0x04
-#define SHUFFLE_PATTERN_16_F__  0x05
-#define SHUFFLE_PATTERN_16_G__  0x06
-#define SHUFFLE_PATTERN_16_H__  0x07
-#define SHUFFLE_PATTERN_16_I__  0x08
-#define SHUFFLE_PATTERN_16_J__  0x09
-#define SHUFFLE_PATTERN_16_K__  0x0a
-#define SHUFFLE_PATTERN_16_L__  0x0b
-#define SHUFFLE_PATTERN_16_M__  0x0c
-#define SHUFFLE_PATTERN_16_N__  0x0d
-#define SHUFFLE_PATTERN_16_O__  0x0e
-#define SHUFFLE_PATTERN_16_P__  0x0f
-#define SHUFFLE_PATTERN_16_a__  0x10
-#define SHUFFLE_PATTERN_16_b__  0x11
-#define SHUFFLE_PATTERN_16_c__  0x12
-#define SHUFFLE_PATTERN_16_d__  0x13
-#define SHUFFLE_PATTERN_16_e__  0x14
-#define SHUFFLE_PATTERN_16_f__  0x15
-#define SHUFFLE_PATTERN_16_g__  0x16
-#define SHUFFLE_PATTERN_16_h__  0x17
-#define SHUFFLE_PATTERN_16_i__  0x18
-#define SHUFFLE_PATTERN_16_j__  0x19
-#define SHUFFLE_PATTERN_16_k__  0x1a
-#define SHUFFLE_PATTERN_16_l__  0x1b
-#define SHUFFLE_PATTERN_16_m__  0x1c
-#define SHUFFLE_PATTERN_16_n__  0x1d
-#define SHUFFLE_PATTERN_16_o__  0x1e
-#define SHUFFLE_PATTERN_16_p__  0x1f
-#define SHUFFLE_PATTERN_16_X__  0xc0
-#define SHUFFLE_PATTERN_16_x__  0xc0
-#define SHUFFLE_PATTERN_16_0__  0x80
-#define SHUFFLE_PATTERN_16_8__  0xe0
-
-#define SHUFFLE_VECTOR_16__(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \
-   SHUFFLE_PATTERN_16_##A##__, \
-   SHUFFLE_PATTERN_16_##B##__, \
-   SHUFFLE_PATTERN_16_##C##__, \
-   SHUFFLE_PATTERN_16_##D##__, \
-   SHUFFLE_PATTERN_16_##E##__, \
-   SHUFFLE_PATTERN_16_##F##__, \
-   SHUFFLE_PATTERN_16_##G##__, \
-   SHUFFLE_PATTERN_16_##H##__, \
-   SHUFFLE_PATTERN_16_##I##__, \
-   SHUFFLE_PATTERN_16_##J##__, \
-   SHUFFLE_PATTERN_16_##K##__, \
-   SHUFFLE_PATTERN_16_##L##__, \
-   SHUFFLE_PATTERN_16_##M##__, \
-   SHUFFLE_PATTERN_16_##N##__, \
-   SHUFFLE_PATTERN_16_##O##__, \
-   SHUFFLE_PATTERN_16_##P##__
-
-#define SHUFFLE16(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \
-   ((const vector unsigned char){ \
-      SHUFFLE_VECTOR_16__(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \
-   })
-
-#define SHUFB16(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \
-   ((const qword){ \
-      SHUFFLE_VECTOR_16__(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \
-   })
-
-#endif
diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c
deleted file mode 100644
index 69784c89788..00000000000
--- a/src/gallium/drivers/cell/spu/spu_texture.c
+++ /dev/null
@@ -1,641 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-
-#include <math.h>
-
-#include "pipe/p_compiler.h"
-#include "spu_main.h"
-#include "spu_texture.h"
-#include "spu_tile.h"
-#include "spu_colorpack.h"
-#include "spu_dcache.h"
-
-
-/**
- * Mark all tex cache entries as invalid.
- */
-void
-invalidate_tex_cache(void)
-{
-   uint lvl;
-   for (lvl = 0; lvl < CELL_MAX_TEXTURE_LEVELS; lvl++) {
-      uint unit = 0;
-      uint bytes = 4 * spu.texture[unit].level[lvl].width
-         * spu.texture[unit].level[lvl].height;
-
-      if (spu.texture[unit].target == PIPE_TEXTURE_CUBE)
-         bytes *= 6;
-      else if (spu.texture[unit].target == PIPE_TEXTURE_3D)
-         bytes *= spu.texture[unit].level[lvl].depth;
-
-      spu_dcache_mark_dirty((unsigned) spu.texture[unit].level[lvl].start, bytes);
-   }
-}
-
-
-/**
- * Get four texels from locations (x[0], y[0]), (x[1], y[1]) ...
- *
- * NOTE: in the typical case of bilinear filtering, the four texels
- * are in a 2x2 group so we could get by with just two dcache fetches
- * (two side-by-side texels per fetch).  But when bilinear filtering
- * wraps around a texture edge, we'll probably need code like we have
- * now.
- * FURTHERMORE: since we're rasterizing a quad of 2x2 pixels at a time,
- * it's quite likely that the four pixels in a quad will need some of the
- * same texels.  So look into doing texture fetches for four pixels at
- * a time.
- */
-static void
-get_four_texels(const struct spu_texture_level *tlevel, uint face,
-                vec_int4 x, vec_int4 y,
-                vec_uint4 *texels)
-{
-   unsigned texture_ea = (uintptr_t) tlevel->start;
-   const vec_int4 tile_x = spu_rlmask(x, -5);  /* tile_x = x / 32 */
-   const vec_int4 tile_y = spu_rlmask(y, -5);  /* tile_y = y / 32 */
-   const qword offset_x = si_andi((qword) x, 0x1f); /* offset_x = x & 0x1f */
-   const qword offset_y = si_andi((qword) y, 0x1f); /* offset_y = y & 0x1f */
-
-   const qword tiles_per_row = (qword) spu_splats(tlevel->tiles_per_row);
-   const qword tile_size = (qword) spu_splats((unsigned) sizeof(tile_t));
-
-   qword tile_offset = si_mpya((qword) tile_y, tiles_per_row, (qword) tile_x);
-   tile_offset = si_mpy((qword) tile_offset, tile_size);
-
-   qword texel_offset = si_a(si_mpyui(offset_y, 32), offset_x);
-   texel_offset = si_mpyui(texel_offset, 4);
-   
-   vec_uint4 offset = (vec_uint4) si_a(tile_offset, texel_offset);
-   
-   texture_ea = texture_ea + face * tlevel->bytes_per_image;
-
-   spu_dcache_fetch_unaligned((qword *) & texels[0],
-                              texture_ea + spu_extract(offset, 0), 4);
-   spu_dcache_fetch_unaligned((qword *) & texels[1],
-                              texture_ea + spu_extract(offset, 1), 4);
-   spu_dcache_fetch_unaligned((qword *) & texels[2],
-                              texture_ea + spu_extract(offset, 2), 4);
-   spu_dcache_fetch_unaligned((qword *) & texels[3],
-                              texture_ea + spu_extract(offset, 3), 4);
-}
-
-
-/** clamp vec to [0, max] */
-static INLINE vector signed int
-spu_clamp(vector signed int vec, vector signed int max)
-{
-   static const vector signed int zero = {0,0,0,0};
-   vector unsigned int c;
-   c = spu_cmpgt(vec, zero);    /* c = vec > zero ? ~0 : 0 */
-   vec = spu_sel(zero, vec, c);
-   c = spu_cmpgt(vec, max);    /* c = vec > max ? ~0 : 0 */
-   vec = spu_sel(vec, max, c);
-   return vec;
-}
-
-
-
-/**
- * Do nearest texture sampling for four pixels.
- * \param colors  returned colors in SOA format (rrrr, gggg, bbbb, aaaa).
- */
-void
-sample_texture_2d_nearest(vector float s, vector float t,
-                          uint unit, uint level, uint face,
-                          vector float colors[4])
-{
-   const struct spu_texture_level *tlevel = &spu.texture[unit].level[level];
-   vector float ss = spu_mul(s, tlevel->scale_s);
-   vector float tt = spu_mul(t, tlevel->scale_t);
-   vector signed int is = spu_convts(ss, 0);
-   vector signed int it = spu_convts(tt, 0);
-   vec_uint4 texels[4];
-
-   /* PIPE_TEX_WRAP_REPEAT */
-   is = spu_and(is, tlevel->mask_s);
-   it = spu_and(it, tlevel->mask_t);
-
-   /* PIPE_TEX_WRAP_CLAMP */
-   is = spu_clamp(is, tlevel->max_s);
-   it = spu_clamp(it, tlevel->max_t);
-
-   get_four_texels(tlevel, face, is, it, texels);
-
-   /* convert four packed ARGBA pixels to float RRRR,GGGG,BBBB,AAAA */
-   spu_unpack_A8R8G8B8_transpose4(texels, colors);
-}
-
-
-/**
- * Do bilinear texture sampling for four pixels.
- * \param colors  returned colors in SOA format (rrrr, gggg, bbbb, aaaa).
- */
-void
-sample_texture_2d_bilinear(vector float s, vector float t,
-                           uint unit, uint level, uint face,
-                           vector float colors[4])
-{
-   const struct spu_texture_level *tlevel = &spu.texture[unit].level[level];
-   static const vector float half = {-0.5f, -0.5f, -0.5f, -0.5f};
-
-   vector float ss = spu_madd(s, tlevel->scale_s, half);
-   vector float tt = spu_madd(t, tlevel->scale_t, half);
-
-   vector signed int is0 = spu_convts(ss, 0);
-   vector signed int it0 = spu_convts(tt, 0);
-
-   /* is + 1, it + 1 */
-   vector signed int is1 = spu_add(is0, 1);
-   vector signed int it1 = spu_add(it0, 1);
-
-   /* PIPE_TEX_WRAP_REPEAT */
-   is0 = spu_and(is0, tlevel->mask_s);
-   it0 = spu_and(it0, tlevel->mask_t);
-   is1 = spu_and(is1, tlevel->mask_s);
-   it1 = spu_and(it1, tlevel->mask_t);
-
-   /* PIPE_TEX_WRAP_CLAMP */
-   is0 = spu_clamp(is0, tlevel->max_s);
-   it0 = spu_clamp(it0, tlevel->max_t);
-   is1 = spu_clamp(is1, tlevel->max_s);
-   it1 = spu_clamp(it1, tlevel->max_t);
-
-   /* get packed int texels */
-   vector unsigned int texels[16];
-   get_four_texels(tlevel, face, is0, it0, texels + 0);  /* upper-left */
-   get_four_texels(tlevel, face, is1, it0, texels + 4);  /* upper-right */
-   get_four_texels(tlevel, face, is0, it1, texels + 8);  /* lower-left */
-   get_four_texels(tlevel, face, is1, it1, texels + 12); /* lower-right */
-
-   /* convert packed int texels to float colors */
-   vector float ftexels[16];
-   spu_unpack_A8R8G8B8_transpose4(texels + 0, ftexels + 0);
-   spu_unpack_A8R8G8B8_transpose4(texels + 4, ftexels + 4);
-   spu_unpack_A8R8G8B8_transpose4(texels + 8, ftexels + 8);
-   spu_unpack_A8R8G8B8_transpose4(texels + 12, ftexels + 12);
-
-   /* Compute weighting factors in [0,1]
-    * Multiply texcoord by 1024, AND with 1023, convert back to float.
-    */
-   vector float ss1024 = spu_mul(ss, spu_splats(1024.0f));
-   vector signed int iss1024 = spu_convts(ss1024, 0);
-   iss1024 = spu_and(iss1024, 1023);
-   vector float sWeights0 = spu_convtf(iss1024, 10);
-
-   vector float tt1024 = spu_mul(tt, spu_splats(1024.0f));
-   vector signed int itt1024 = spu_convts(tt1024, 0);
-   itt1024 = spu_and(itt1024, 1023);
-   vector float tWeights0 = spu_convtf(itt1024, 10);
-
-   /* 1 - sWeight and 1 - tWeight */
-   vector float sWeights1 = spu_sub(spu_splats(1.0f), sWeights0);
-   vector float tWeights1 = spu_sub(spu_splats(1.0f), tWeights0);
-
-   /* reds, for four pixels */
-   ftexels[ 0] = spu_mul(ftexels[ 0], spu_mul(sWeights1, tWeights1)); /*ul*/
-   ftexels[ 4] = spu_mul(ftexels[ 4], spu_mul(sWeights0, tWeights1)); /*ur*/
-   ftexels[ 8] = spu_mul(ftexels[ 8], spu_mul(sWeights1, tWeights0)); /*ll*/
-   ftexels[12] = spu_mul(ftexels[12], spu_mul(sWeights0, tWeights0)); /*lr*/
-   colors[0] = spu_add(spu_add(ftexels[0], ftexels[4]),
-                       spu_add(ftexels[8], ftexels[12]));
-
-   /* greens, for four pixels */
-   ftexels[ 1] = spu_mul(ftexels[ 1], spu_mul(sWeights1, tWeights1)); /*ul*/
-   ftexels[ 5] = spu_mul(ftexels[ 5], spu_mul(sWeights0, tWeights1)); /*ur*/
-   ftexels[ 9] = spu_mul(ftexels[ 9], spu_mul(sWeights1, tWeights0)); /*ll*/
-   ftexels[13] = spu_mul(ftexels[13], spu_mul(sWeights0, tWeights0)); /*lr*/
-   colors[1] = spu_add(spu_add(ftexels[1], ftexels[5]),
-                       spu_add(ftexels[9], ftexels[13]));
-
-   /* blues, for four pixels */
-   ftexels[ 2] = spu_mul(ftexels[ 2], spu_mul(sWeights1, tWeights1)); /*ul*/
-   ftexels[ 6] = spu_mul(ftexels[ 6], spu_mul(sWeights0, tWeights1)); /*ur*/
-   ftexels[10] = spu_mul(ftexels[10], spu_mul(sWeights1, tWeights0)); /*ll*/
-   ftexels[14] = spu_mul(ftexels[14], spu_mul(sWeights0, tWeights0)); /*lr*/
-   colors[2] = spu_add(spu_add(ftexels[2], ftexels[6]),
-                       spu_add(ftexels[10], ftexels[14]));
-
-   /* alphas, for four pixels */
-   ftexels[ 3] = spu_mul(ftexels[ 3], spu_mul(sWeights1, tWeights1)); /*ul*/
-   ftexels[ 7] = spu_mul(ftexels[ 7], spu_mul(sWeights0, tWeights1)); /*ur*/
-   ftexels[11] = spu_mul(ftexels[11], spu_mul(sWeights1, tWeights0)); /*ll*/
-   ftexels[15] = spu_mul(ftexels[15], spu_mul(sWeights0, tWeights0)); /*lr*/
-   colors[3] = spu_add(spu_add(ftexels[3], ftexels[7]),
-                       spu_add(ftexels[11], ftexels[15]));
-}
-
-
-
-/**
- * Adapted from /opt/cell/sdk/usr/spu/include/transpose_matrix4x4.h
- */
-static INLINE void
-transpose(vector unsigned int *mOut0,
-          vector unsigned int *mOut1,
-          vector unsigned int *mOut2,
-          vector unsigned int *mOut3,
-          vector unsigned int *mIn)
-{
-  vector unsigned int abcd, efgh, ijkl, mnop;	/* input vectors */
-  vector unsigned int aeim, bfjn, cgko, dhlp;	/* output vectors */
-  vector unsigned int aibj, ckdl, emfn, gohp;	/* intermediate vectors */
-
-  vector unsigned char shufflehi = ((vector unsigned char) {
-					       0x00, 0x01, 0x02, 0x03,
-					       0x10, 0x11, 0x12, 0x13,
-					       0x04, 0x05, 0x06, 0x07,
-					       0x14, 0x15, 0x16, 0x17});
-  vector unsigned char shufflelo = ((vector unsigned char) {
-					       0x08, 0x09, 0x0A, 0x0B,
-					       0x18, 0x19, 0x1A, 0x1B,
-					       0x0C, 0x0D, 0x0E, 0x0F,
-					       0x1C, 0x1D, 0x1E, 0x1F});
-  abcd = *(mIn+0);
-  efgh = *(mIn+1);
-  ijkl = *(mIn+2);
-  mnop = *(mIn+3);
-
-  aibj = spu_shuffle(abcd, ijkl, shufflehi);
-  ckdl = spu_shuffle(abcd, ijkl, shufflelo);
-  emfn = spu_shuffle(efgh, mnop, shufflehi);
-  gohp = spu_shuffle(efgh, mnop, shufflelo);
-
-  aeim = spu_shuffle(aibj, emfn, shufflehi);
-  bfjn = spu_shuffle(aibj, emfn, shufflelo);
-  cgko = spu_shuffle(ckdl, gohp, shufflehi);
-  dhlp = spu_shuffle(ckdl, gohp, shufflelo);
-
-  *mOut0 = aeim;
-  *mOut1 = bfjn;
-  *mOut2 = cgko;
-  *mOut3 = dhlp;
-}
-
-
-/**
- * Bilinear filtering, using int instead of float arithmetic for computing
- * sample weights.
- */
-void
-sample_texture_2d_bilinear_int(vector float s, vector float t,
-                               uint unit, uint level, uint face,
-                               vector float colors[4])
-{
-   const struct spu_texture_level *tlevel = &spu.texture[unit].level[level];
-   static const vector float half = {-0.5f, -0.5f, -0.5f, -0.5f};
-
-   /* Scale texcoords by size of texture, and add half pixel bias */
-   vector float ss = spu_madd(s, tlevel->scale_s, half);
-   vector float tt = spu_madd(t, tlevel->scale_t, half);
-
-   /* convert float coords to fixed-pt coords with 7 fraction bits */
-   vector signed int is = spu_convts(ss, 7);  /* XXX really need floor() here */
-   vector signed int it = spu_convts(tt, 7);  /* XXX really need floor() here */
-
-   /* compute integer texel weights in [0, 127] */
-   vector signed int sWeights0 = spu_and(is, 127);
-   vector signed int tWeights0 = spu_and(it, 127);
-   vector signed int sWeights1 = spu_sub(127, sWeights0);
-   vector signed int tWeights1 = spu_sub(127, tWeights0);
-
-   /* texel coords: is0 = is / 128, it0 = is / 128 */
-   vector signed int is0 = spu_rlmask(is, -7);
-   vector signed int it0 = spu_rlmask(it, -7);
-
-   /* texel coords: i1 = is0 + 1, it1 = it0 + 1 */
-   vector signed int is1 = spu_add(is0, 1);
-   vector signed int it1 = spu_add(it0, 1);
-
-   /* PIPE_TEX_WRAP_REPEAT */
-   is0 = spu_and(is0, tlevel->mask_s);
-   it0 = spu_and(it0, tlevel->mask_t);
-   is1 = spu_and(is1, tlevel->mask_s);
-   it1 = spu_and(it1, tlevel->mask_t);
-
-   /* PIPE_TEX_WRAP_CLAMP */
-   is0 = spu_clamp(is0, tlevel->max_s);
-   it0 = spu_clamp(it0, tlevel->max_t);
-   is1 = spu_clamp(is1, tlevel->max_s);
-   it1 = spu_clamp(it1, tlevel->max_t);
-
-   /* get packed int texels */
-   vector unsigned int texels[16];
-   get_four_texels(tlevel, face, is0, it0, texels + 0);  /* upper-left */
-   get_four_texels(tlevel, face, is1, it0, texels + 4);  /* upper-right */
-   get_four_texels(tlevel, face, is0, it1, texels + 8);  /* lower-left */
-   get_four_texels(tlevel, face, is1, it1, texels + 12); /* lower-right */
-
-   /* twiddle packed 32-bit BGRA pixels into RGBA as four unsigned ints */
-   {
-      static const unsigned char ZERO = 0x80;
-      int i;
-      for (i = 0; i < 16; i++) {
-         texels[i] = spu_shuffle(texels[i], texels[i],
-                                 ((vector unsigned char) {
-                                    ZERO, ZERO, ZERO, 1,
-                                    ZERO, ZERO, ZERO, 2,
-                                    ZERO, ZERO, ZERO, 3,
-                                    ZERO, ZERO, ZERO, 0}));
-      }
-   }
-
-   /* convert RGBA,RGBA,RGBA,RGBA to RRRR,GGGG,BBBB,AAAA */
-   vector unsigned int texel0, texel1, texel2, texel3, texel4, texel5, texel6, texel7,
-      texel8, texel9, texel10, texel11, texel12, texel13, texel14, texel15;
-   transpose(&texel0, &texel1, &texel2, &texel3, texels + 0);
-   transpose(&texel4, &texel5, &texel6, &texel7, texels + 4);
-   transpose(&texel8, &texel9, &texel10, &texel11, texels + 8);
-   transpose(&texel12, &texel13, &texel14, &texel15, texels + 12);
-
-   /* computed weighted colors */
-   vector unsigned int c0, c1, c2, c3, cSum;
-
-   /* red */
-   c0 = (vector unsigned int) si_mpy((qword) texel0, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/
-   c1 = (vector unsigned int) si_mpy((qword) texel4, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/
-   c2 = (vector unsigned int) si_mpy((qword) texel8, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/
-   c3 = (vector unsigned int) si_mpy((qword) texel12, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/
-   cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3));
-   colors[0] = spu_convtf(cSum, 22);
-
-   /* green */
-   c0 = (vector unsigned int) si_mpy((qword) texel1, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/
-   c1 = (vector unsigned int) si_mpy((qword) texel5, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/
-   c2 = (vector unsigned int) si_mpy((qword) texel9, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/
-   c3 = (vector unsigned int) si_mpy((qword) texel13, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/
-   cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3));
-   colors[1] = spu_convtf(cSum, 22);
-
-   /* blue */
-   c0 = (vector unsigned int) si_mpy((qword) texel2, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/
-   c1 = (vector unsigned int) si_mpy((qword) texel6, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/
-   c2 = (vector unsigned int) si_mpy((qword) texel10, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/
-   c3 = (vector unsigned int) si_mpy((qword) texel14, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/
-   cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3));
-   colors[2] = spu_convtf(cSum, 22);
-
-   /* alpha */
-   c0 = (vector unsigned int) si_mpy((qword) texel3, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/
-   c1 = (vector unsigned int) si_mpy((qword) texel7, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/
-   c2 = (vector unsigned int) si_mpy((qword) texel11, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/
-   c3 = (vector unsigned int) si_mpy((qword) texel15, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/
-   cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3));
-   colors[3] = spu_convtf(cSum, 22);
-}
-
-
-
-/**
- * Compute level of detail factor from texcoords.
- */
-static INLINE float
-compute_lambda_2d(uint unit, vector float s, vector float t)
-{
-   uint baseLevel = 0;
-   float width = spu.texture[unit].level[baseLevel].width;
-   float height = spu.texture[unit].level[baseLevel].width;
-   float dsdx = width * (spu_extract(s, 1) - spu_extract(s, 0));
-   float dsdy = width * (spu_extract(s, 2) - spu_extract(s, 0));
-   float dtdx = height * (spu_extract(t, 1) - spu_extract(t, 0));
-   float dtdy = height * (spu_extract(t, 2) - spu_extract(t, 0));
-#if 0
-   /* ideal value */
-   float x = dsdx * dsdx + dtdx * dtdx;
-   float y = dsdy * dsdy + dtdy * dtdy;
-   float rho = x > y ? x : y;
-   rho = sqrtf(rho);
-#else
-   /* approximation */
-   dsdx = fabsf(dsdx);
-   dsdy = fabsf(dsdy);
-   dtdx = fabsf(dtdx);
-   dtdy = fabsf(dtdy);
-   float rho = (dsdx + dsdy + dtdx + dtdy) * 0.5;
-#endif
-   float lambda = logf(rho) * 1.442695f; /* compute logbase2(rho) */
-   return lambda;
-}
-
-
-/**
- * Blend two sets of colors according to weight.
- */
-static void
-blend_colors(vector float c0[4], const vector float c1[4], float weight)
-{
-   vector float t = spu_splats(weight);
-   vector float dc0 = spu_sub(c1[0], c0[0]);
-   vector float dc1 = spu_sub(c1[1], c0[1]);
-   vector float dc2 = spu_sub(c1[2], c0[2]);
-   vector float dc3 = spu_sub(c1[3], c0[3]);
-   c0[0] = spu_madd(dc0, t, c0[0]);
-   c0[1] = spu_madd(dc1, t, c0[1]);
-   c0[2] = spu_madd(dc2, t, c0[2]);
-   c0[3] = spu_madd(dc3, t, c0[3]);
-}
-
-
-/**
- * Texture sampling with level of detail selection and possibly mipmap
- * interpolation.
- */
-void
-sample_texture_2d_lod(vector float s, vector float t,
-                      uint unit, uint level_ignored, uint face,
-                      vector float colors[4])
-{
-   /*
-    * Note that we're computing a lambda/lod here that's used for all
-    * four pixels in the quad.
-    */
-   float lambda = compute_lambda_2d(unit, s, t);
-
-   (void) face;
-   (void) level_ignored;
-
-   /* apply lod bias */
-   lambda += spu.sampler[unit].lod_bias;
-
-   /* clamp */
-   if (lambda < spu.sampler[unit].min_lod)
-      lambda = spu.sampler[unit].min_lod;
-   else if (lambda > spu.sampler[unit].max_lod)
-      lambda = spu.sampler[unit].max_lod;
-
-   if (lambda <= 0.0f) {
-      /* magnify */
-      spu.mag_sample_texture_2d[unit](s, t, unit, 0, face, colors);
-   }
-   else {
-      /* minify */
-      if (spu.sampler[unit].min_img_filter == PIPE_TEX_FILTER_LINEAR) {
-         /* sample two mipmap levels and interpolate */
-         int level = (int) lambda;
-         if (level > (int) spu.texture[unit].max_level)
-            level = spu.texture[unit].max_level;
-         spu.min_sample_texture_2d[unit](s, t, unit, level, face, colors);
-         if (spu.sampler[unit].min_img_filter == PIPE_TEX_FILTER_LINEAR) {
-            /* sample second mipmap level */
-            float weight = lambda - (float) level;
-            level++;
-            if (level <= (int) spu.texture[unit].max_level) {
-               vector float colors2[4];
-               spu.min_sample_texture_2d[unit](s, t, unit, level, face, colors2);
-               blend_colors(colors, colors2, weight);
-            }
-         }
-      }
-      else {
-         /* sample one mipmap level */
-         int level = (int) (lambda + 0.5f);
-         if (level > (int) spu.texture[unit].max_level)
-            level = spu.texture[unit].max_level;
-         spu.min_sample_texture_2d[unit](s, t, unit, level, face, colors);
-      }
-   }
-}
-
-
-/** XXX need a SIMD version of this */
-static unsigned
-choose_cube_face(float rx, float ry, float rz, float *newS, float *newT)
-{
-   /*
-      major axis
-      direction     target                             sc     tc    ma
-      ----------    -------------------------------    ---    ---   ---
-       +rx          TEXTURE_CUBE_MAP_POSITIVE_X_EXT    -rz    -ry   rx
-       -rx          TEXTURE_CUBE_MAP_NEGATIVE_X_EXT    +rz    -ry   rx
-       +ry          TEXTURE_CUBE_MAP_POSITIVE_Y_EXT    +rx    +rz   ry
-       -ry          TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT    +rx    -rz   ry
-       +rz          TEXTURE_CUBE_MAP_POSITIVE_Z_EXT    +rx    -ry   rz
-       -rz          TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT    -rx    -ry   rz
-   */
-   const float arx = fabsf(rx);
-   const float ary = fabsf(ry);
-   const float arz = fabsf(rz);
-   unsigned face;
-   float sc, tc, ma;
-
-   if (arx > ary && arx > arz) {
-      if (rx >= 0.0F) {
-         face = PIPE_TEX_FACE_POS_X;
-         sc = -rz;
-         tc = -ry;
-         ma = arx;
-      }
-      else {
-         face = PIPE_TEX_FACE_NEG_X;
-         sc = rz;
-         tc = -ry;
-         ma = arx;
-      }
-   }
-   else if (ary > arx && ary > arz) {
-      if (ry >= 0.0F) {
-         face = PIPE_TEX_FACE_POS_Y;
-         sc = rx;
-         tc = rz;
-         ma = ary;
-      }
-      else {
-         face = PIPE_TEX_FACE_NEG_Y;
-         sc = rx;
-         tc = -rz;
-         ma = ary;
-      }
-   }
-   else {
-      if (rz > 0.0F) {
-         face = PIPE_TEX_FACE_POS_Z;
-         sc = rx;
-         tc = -ry;
-         ma = arz;
-      }
-      else {
-         face = PIPE_TEX_FACE_NEG_Z;
-         sc = -rx;
-         tc = -ry;
-         ma = arz;
-      }
-   }
-
-   *newS = (sc / ma + 1.0F) * 0.5F;
-   *newT = (tc / ma + 1.0F) * 0.5F;
-
-   return face;
-}
-
-
-
-void
-sample_texture_cube(vector float s, vector float t, vector float r,
-                    uint unit, vector float colors[4])
-{
-   uint p, faces[4], level = 0;
-   float newS[4], newT[4];
-
-   /* Compute cube faces referenced by the four sets of texcoords.
-    * XXX we should SIMD-ize this.
-    */
-   for (p = 0; p < 4; p++) {      
-      float rx = spu_extract(s, p);
-      float ry = spu_extract(t, p);
-      float rz = spu_extract(r, p);
-      faces[p] = choose_cube_face(rx, ry, rz, &newS[p], &newT[p]);
-   }
-
-   if (faces[0] == faces[1] &&
-       faces[0] == faces[2] &&
-       faces[0] == faces[3]) {
-      /* GOOD!  All four texcoords refer to the same cube face */
-      s = (vector float) {newS[0], newS[1], newS[2], newS[3]};
-      t = (vector float) {newT[0], newT[1], newT[2], newT[3]};
-      spu.sample_texture_2d[unit](s, t, unit, level, faces[0], colors);
-   }
-   else {
-      /* BAD!  The four texcoords refer to different faces */
-      for (p = 0; p < 4; p++) {      
-         vector float c[4];
-
-         spu.sample_texture_2d[unit](spu_splats(newS[p]), spu_splats(newT[p]),
-                                     unit, level, faces[p], c);
-
-         float red = spu_extract(c[0], p);
-         float green = spu_extract(c[1], p);
-         float blue = spu_extract(c[2], p);
-         float alpha = spu_extract(c[3], p);
-
-         colors[0] = spu_insert(red,   colors[0], p);
-         colors[1] = spu_insert(green, colors[1], p);
-         colors[2] = spu_insert(blue,  colors[2], p);
-         colors[3] = spu_insert(alpha, colors[3], p);
-      }
-   }
-}
diff --git a/src/gallium/drivers/cell/spu/spu_texture.h b/src/gallium/drivers/cell/spu/spu_texture.h
deleted file mode 100644
index 7b75b007b5a..00000000000
--- a/src/gallium/drivers/cell/spu/spu_texture.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#ifndef SPU_TEXTURE_H
-#define SPU_TEXTURE_H
-
-
-#include "pipe/p_compiler.h"
-
-
-extern void
-invalidate_tex_cache(void);
-
-
-extern void
-sample_texture_2d_nearest(vector float s, vector float t,
-                          uint unit, uint level, uint face,
-                          vector float colors[4]);
-
-
-extern void
-sample_texture_2d_bilinear(vector float s, vector float t,
-                           uint unit, uint level, uint face,
-                           vector float colors[4]);
-
-extern void
-sample_texture_2d_bilinear_int(vector float s, vector float t,
-                               uint unit, uint level, uint face,
-                               vector float colors[4]);
-
-
-extern void
-sample_texture_2d_lod(vector float s, vector float t,
-                      uint unit, uint level, uint face,
-                      vector float colors[4]);
-
-
-extern void
-sample_texture_cube(vector float s, vector float t, vector float r,
-                    uint unit, vector float colors[4]);
-
-
-#endif /* SPU_TEXTURE_H */
diff --git a/src/gallium/drivers/cell/spu/spu_tgsi_exec.h b/src/gallium/drivers/cell/spu/spu_tgsi_exec.h
deleted file mode 100644
index 6f2a3d30b91..00000000000
--- a/src/gallium/drivers/cell/spu/spu_tgsi_exec.h
+++ /dev/null
@@ -1,158 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * Copyright 2009-2010 VMware, Inc.  All rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#ifndef SPU_TGSI_EXEC_H
-#define SPU_TGSI_EXEC_H
-
-#include "pipe/p_compiler.h"
-#include "pipe/p_state.h"
-
-#if defined __cplusplus
-extern "C" {
-#endif
-
-
-#define NUM_CHANNELS 4  /* R,G,B,A */
-#define QUAD_SIZE    4  /* 4 pixel/quad */
-
-
-
-#define TGSI_EXEC_NUM_TEMPS       128
-#define TGSI_EXEC_NUM_IMMEDIATES  256
-
-/*
- * Locations of various utility registers (_I = Index, _C = Channel)
- */
-#define TGSI_EXEC_TEMP_00000000_IDX    (TGSI_EXEC_NUM_TEMPS + 0)
-#define TGSI_EXEC_TEMP_00000000_CHAN   0
-
-#define TGSI_EXEC_TEMP_7FFFFFFF_IDX    (TGSI_EXEC_NUM_TEMPS + 0)
-#define TGSI_EXEC_TEMP_7FFFFFFF_CHAN   1
-
-#define TGSI_EXEC_TEMP_80000000_IDX    (TGSI_EXEC_NUM_TEMPS + 0)
-#define TGSI_EXEC_TEMP_80000000_CHAN   2
-
-#define TGSI_EXEC_TEMP_FFFFFFFF_IDX    (TGSI_EXEC_NUM_TEMPS + 0)
-#define TGSI_EXEC_TEMP_FFFFFFFF_CHAN   3
-
-#define TGSI_EXEC_TEMP_ONE_IDX         (TGSI_EXEC_NUM_TEMPS + 1)
-#define TGSI_EXEC_TEMP_ONE_CHAN        0
-
-#define TGSI_EXEC_TEMP_TWO_IDX         (TGSI_EXEC_NUM_TEMPS + 1)
-#define TGSI_EXEC_TEMP_TWO_CHAN        1
-
-#define TGSI_EXEC_TEMP_128_IDX         (TGSI_EXEC_NUM_TEMPS + 1)
-#define TGSI_EXEC_TEMP_128_CHAN        2
-
-#define TGSI_EXEC_TEMP_MINUS_128_IDX   (TGSI_EXEC_NUM_TEMPS + 1)
-#define TGSI_EXEC_TEMP_MINUS_128_CHAN  3
-
-#define TGSI_EXEC_TEMP_KILMASK_IDX     (TGSI_EXEC_NUM_TEMPS + 2)
-#define TGSI_EXEC_TEMP_KILMASK_CHAN    0
-
-#define TGSI_EXEC_TEMP_OUTPUT_IDX      (TGSI_EXEC_NUM_TEMPS + 2)
-#define TGSI_EXEC_TEMP_OUTPUT_CHAN     1
-
-#define TGSI_EXEC_TEMP_PRIMITIVE_IDX   (TGSI_EXEC_NUM_TEMPS + 2)
-#define TGSI_EXEC_TEMP_PRIMITIVE_CHAN  2
-
-/* NVIDIA condition code (CC) vector
- */
-#define TGSI_EXEC_CC_GT       0x01
-#define TGSI_EXEC_CC_EQ       0x02
-#define TGSI_EXEC_CC_LT       0x04
-#define TGSI_EXEC_CC_UN       0x08
-
-#define TGSI_EXEC_CC_X_MASK   0x000000ff
-#define TGSI_EXEC_CC_X_SHIFT  0
-#define TGSI_EXEC_CC_Y_MASK   0x0000ff00
-#define TGSI_EXEC_CC_Y_SHIFT  8
-#define TGSI_EXEC_CC_Z_MASK   0x00ff0000
-#define TGSI_EXEC_CC_Z_SHIFT  16
-#define TGSI_EXEC_CC_W_MASK   0xff000000
-#define TGSI_EXEC_CC_W_SHIFT  24
-
-#define TGSI_EXEC_TEMP_CC_IDX         (TGSI_EXEC_NUM_TEMPS + 2)
-#define TGSI_EXEC_TEMP_CC_CHAN         3
-
-#define TGSI_EXEC_TEMP_THREE_IDX      (TGSI_EXEC_NUM_TEMPS + 3)
-#define TGSI_EXEC_TEMP_THREE_CHAN      0
-
-#define TGSI_EXEC_TEMP_HALF_IDX       (TGSI_EXEC_NUM_TEMPS + 3)
-#define TGSI_EXEC_TEMP_HALF_CHAN       1
-
-/* execution mask, each value is either 0 or ~0 */
-#define TGSI_EXEC_MASK_IDX            (TGSI_EXEC_NUM_TEMPS + 3)
-#define TGSI_EXEC_MASK_CHAN            2
-
-/* 4 register buffer for various purposes */
-#define TGSI_EXEC_TEMP_R0           (TGSI_EXEC_NUM_TEMPS + 4)
-#define TGSI_EXEC_NUM_TEMP_R        4
-
-#define TGSI_EXEC_TEMP_ADDR         (TGSI_EXEC_NUM_TEMPS + 8)
-#define TGSI_EXEC_NUM_ADDRS         1
-
-/* predicate register */
-#define TGSI_EXEC_TEMP_P0           (TGSI_EXEC_NUM_TEMPS + 9)
-#define TGSI_EXEC_NUM_PREDS         1
-
-#define TGSI_EXEC_NUM_TEMP_EXTRAS   10
-
-
-
-#define TGSI_EXEC_MAX_NESTING  32
-#define TGSI_EXEC_MAX_COND_NESTING  TGSI_EXEC_MAX_NESTING
-#define TGSI_EXEC_MAX_LOOP_NESTING  TGSI_EXEC_MAX_NESTING
-#define TGSI_EXEC_MAX_SWITCH_NESTING TGSI_EXEC_MAX_NESTING
-#define TGSI_EXEC_MAX_CALL_NESTING  TGSI_EXEC_MAX_NESTING
-
-/* The maximum number of input attributes per vertex. For 2D
- * input register files, this is the stride between two 1D
- * arrays.
- */
-#define TGSI_EXEC_MAX_INPUT_ATTRIBS 17
-
-/* The maximum number of constant vectors per constant buffer.
- */
-#define TGSI_EXEC_MAX_CONST_BUFFER  4096
-
-/* The maximum number of vertices per primitive */
-#define TGSI_MAX_PRIM_VERTICES 6
-
-/* The maximum number of primitives to be generated */
-#define TGSI_MAX_PRIMITIVES 64
-
-/* The maximum total number of vertices */
-#define TGSI_MAX_TOTAL_VERTICES (TGSI_MAX_PRIM_VERTICES * TGSI_MAX_PRIMITIVES * PIPE_MAX_ATTRIBS)
-
-
-#if defined __cplusplus
-} /* extern "C" */
-#endif
-
-#endif /* TGSI_EXEC_H */
diff --git a/src/gallium/drivers/cell/spu/spu_tile.c b/src/gallium/drivers/cell/spu/spu_tile.c
deleted file mode 100644
index 6905015a483..00000000000
--- a/src/gallium/drivers/cell/spu/spu_tile.c
+++ /dev/null
@@ -1,126 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-
-
-#include "spu_tile.h"
-#include "spu_main.h"
-
-
-/**
- * Get tile of color or Z values from main memory, put into SPU memory.
- */
-void
-get_tile(uint tx, uint ty, tile_t *tile, int tag, int zBuf)
-{
-   const uint offset = ty * spu.fb.width_tiles + tx;
-   const uint bytesPerTile = TILE_SIZE * TILE_SIZE * (zBuf ? spu.fb.zsize : 4);
-   const ubyte *src = zBuf ? spu.fb.depth_start : spu.fb.color_start;
-
-   src += offset * bytesPerTile;
-
-   ASSERT(tx < spu.fb.width_tiles);
-   ASSERT(ty < spu.fb.height_tiles);
-   ASSERT_ALIGN16(tile);
-   /*
-   printf("get_tile:  dest: %p  src: 0x%x  size: %d\n",
-          tile, (unsigned int) src, bytesPerTile);
-   */
-   mfc_get(tile->ui,  /* dest in local memory */
-           (unsigned int) src, /* src in main memory */
-           bytesPerTile,
-           tag,
-           0, /* tid */
-           0  /* rid */);
-}
-
-
-/**
- * Move tile of color or Z values from SPU memory to main memory.
- */
-void
-put_tile(uint tx, uint ty, const tile_t *tile, int tag, int zBuf)
-{
-   const uint offset = ty * spu.fb.width_tiles + tx;
-   const uint bytesPerTile = TILE_SIZE * TILE_SIZE * (zBuf ? spu.fb.zsize : 4);
-   ubyte *dst = zBuf ? spu.fb.depth_start : spu.fb.color_start;
-
-   dst += offset * bytesPerTile;
-
-   ASSERT(tx < spu.fb.width_tiles);
-   ASSERT(ty < spu.fb.height_tiles);
-   ASSERT_ALIGN16(tile);
-   /*
-   printf("SPU %u: put_tile:  src: %p  dst: 0x%x  size: %d\n",
-          spu.init.id,
-          tile, (unsigned int) dst, bytesPerTile);
-   */
-   mfc_put((void *) tile->ui,  /* src in local memory */
-           (unsigned int) dst,  /* dst in main memory */
-           bytesPerTile,
-           tag,
-           0, /* tid */
-           0  /* rid */);
-}
-
-
-/**
- * For tiles whose status is TILE_STATUS_CLEAR, write solid-filled
- * tiles back to the main framebuffer.
- */
-void
-really_clear_tiles(uint surfaceIndex)
-{
-   const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles;
-   uint i;
-
-   if (surfaceIndex == 0) {
-      clear_c_tile(&spu.ctile);
-
-      for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) {
-         uint tx = i % spu.fb.width_tiles;
-         uint ty = i / spu.fb.width_tiles;
-         if (spu.ctile_status[ty][tx] == TILE_STATUS_CLEAR) {
-            put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0);
-         }
-      }
-   }
-   else {
-      clear_z_tile(&spu.ztile);
-
-      for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) {
-         uint tx = i % spu.fb.width_tiles;
-         uint ty = i / spu.fb.width_tiles;
-         if (spu.ztile_status[ty][tx] == TILE_STATUS_CLEAR)
-            put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 1);
-      }
-   }
-
-#if 0
-   wait_on_mask(1 << TAG_SURFACE_CLEAR);
-#endif
-}
diff --git a/src/gallium/drivers/cell/spu/spu_tile.h b/src/gallium/drivers/cell/spu/spu_tile.h
deleted file mode 100644
index 7bfb52be8f3..00000000000
--- a/src/gallium/drivers/cell/spu/spu_tile.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#ifndef SPU_TILE_H
-#define SPU_TILE_H
-
-
-#include <libmisc.h>
-#include <spu_mfcio.h>
-#include "spu_main.h"
-#include "cell/common.h"
-
-
-
-extern void
-get_tile(uint tx, uint ty, tile_t *tile, int tag, int zBuf);
-
-extern void
-put_tile(uint tx, uint ty, const tile_t *tile, int tag, int zBuf);
-
-extern void
-really_clear_tiles(uint surfaceIndex);
-
-
-static INLINE void
-clear_c_tile(tile_t *ctile)
-{
-   memset32((uint*) ctile->ui,
-            spu.fb.color_clear_value,
-            TILE_SIZE * TILE_SIZE);
-}
-
-
-static INLINE void
-clear_z_tile(tile_t *ztile)
-{
-   if (spu.fb.zsize == 2) {
-      memset16((ushort*) ztile->us,
-               spu.fb.depth_clear_value,
-               TILE_SIZE * TILE_SIZE);
-   }
-   else {
-      ASSERT(spu.fb.zsize != 0);
-      memset32((uint*) ztile->ui,
-               spu.fb.depth_clear_value,
-               TILE_SIZE * TILE_SIZE);
-   }
-}
-
-
-#endif /* SPU_TILE_H */
diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c
deleted file mode 100644
index efeebca27bb..00000000000
--- a/src/gallium/drivers/cell/spu/spu_tri.c
+++ /dev/null
@@ -1,843 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-/**
- * Triangle rendering within a tile.
- */
-
-#include "pipe/p_compiler.h"
-#include "pipe/p_format.h"
-#include "util/u_math.h"
-#include "spu_colorpack.h"
-#include "spu_main.h"
-#include "spu_shuffle.h"
-#include "spu_texture.h"
-#include "spu_tile.h"
-#include "spu_tri.h"
-
-
-/** Masks are uint[4] vectors with each element being 0 or 0xffffffff */
-typedef vector unsigned int mask_t;
-
-
-
-/**
- * Simplified types taken from other parts of Gallium
- */
-struct vertex_header {
-   vector float data[1];
-};
-
-
-
-/* XXX fix this */
-#undef CEILF
-#define CEILF(X) ((float) (int) ((X) + 0.99999f))
-
-
-#define QUAD_TOP_LEFT     0
-#define QUAD_TOP_RIGHT    1
-#define QUAD_BOTTOM_LEFT  2
-#define QUAD_BOTTOM_RIGHT 3
-#define MASK_TOP_LEFT     (1 << QUAD_TOP_LEFT)
-#define MASK_TOP_RIGHT    (1 << QUAD_TOP_RIGHT)
-#define MASK_BOTTOM_LEFT  (1 << QUAD_BOTTOM_LEFT)
-#define MASK_BOTTOM_RIGHT (1 << QUAD_BOTTOM_RIGHT)
-#define MASK_ALL          0xf
-
-
-#define CHAN0 0
-#define CHAN1 1
-#define CHAN2 2
-#define CHAN3 3
-
-
-#define DEBUG_VERTS 0
-
-/**
- * Triangle edge info
- */
-struct edge {
-   union {
-      struct {
-         float dx;	/**< X(v1) - X(v0), used only during setup */
-         float dy;	/**< Y(v1) - Y(v0), used only during setup */
-      };
-      vec_float4 ds;    /**< vector accessor for dx and dy */
-   };
-   float dxdy;		/**< dx/dy */
-   float sx, sy;	/**< first sample point coord */
-   int lines;		/**< number of lines on this edge */
-};
-
-
-struct interp_coef
-{
-   vector float a0;
-   vector float dadx;
-   vector float dady;
-};
-
-
-/**
- * Triangle setup info (derived from draw_stage).
- * Also used for line drawing (taking some liberties).
- */
-struct setup_stage {
-
-   /* Vertices are just an array of floats making up each attribute in
-    * turn.  Currently fixed at 4 floats, but should change in time.
-    * Codegen will help cope with this.
-    */
-   union {
-      struct {
-         const struct vertex_header *vmin;
-         const struct vertex_header *vmid;
-         const struct vertex_header *vmax;
-         const struct vertex_header *vprovoke;
-      };
-      qword vertex_headers;
-   };
-
-   struct edge ebot;
-   struct edge etop;
-   struct edge emaj;
-
-   float oneOverArea;  /* XXX maybe make into vector? */
-
-   uint facing;
-
-   uint tx, ty;  /**< position of current tile (x, y) */
-
-   union {
-      struct {
-         int cliprect_minx;
-         int cliprect_miny;
-         int cliprect_maxx;
-         int cliprect_maxy;
-      };
-      qword cliprect;
-   };
-
-   struct interp_coef coef[PIPE_MAX_SHADER_INPUTS];
-
-   struct {
-      vec_int4 quad; /**< [0] = row0, [1] = row1; {left[0],left[1],right[0],right[1]} */
-      int y;
-      unsigned y_flags;
-      unsigned mask;     /**< mask of MASK_BOTTOM/TOP_LEFT/RIGHT bits */
-   } span;
-};
-
-
-static struct setup_stage setup;
-
-
-static INLINE vector float
-splatx(vector float v)
-{
-   return spu_splats(spu_extract(v, CHAN0));
-}
-
-static INLINE vector float
-splaty(vector float v)
-{
-   return spu_splats(spu_extract(v, CHAN1));
-}
-
-static INLINE vector float
-splatz(vector float v)
-{
-   return spu_splats(spu_extract(v, CHAN2));
-}
-
-static INLINE vector float
-splatw(vector float v)
-{
-   return spu_splats(spu_extract(v, CHAN3));
-}
-
-
-/**
- * Setup fragment shader inputs by evaluating triangle's vertex
- * attribute coefficient info.
- * \param x  quad x pos
- * \param y  quad y pos
- * \param fragZ  returns quad Z values
- * \param fragInputs  returns fragment program inputs
- * Note: this code could be incorporated into the fragment program
- * itself to avoid the loop and switch.
- */
-static void
-eval_inputs(float x, float y, vector float *fragZ, vector float fragInputs[])
-{
-   static const vector float deltaX = (const vector float) {0, 1, 0, 1};
-   static const vector float deltaY = (const vector float) {0, 0, 1, 1};
-
-   const uint posSlot = 0;
-   const vector float pos = setup.coef[posSlot].a0;
-   const vector float dposdx = setup.coef[posSlot].dadx;
-   const vector float dposdy = setup.coef[posSlot].dady;
-   const vector float fragX = spu_splats(x) + deltaX;
-   const vector float fragY = spu_splats(y) + deltaY;
-   vector float fragW, wInv;
-   uint i;
-
-   *fragZ = splatz(pos) + fragX * splatz(dposdx) + fragY * splatz(dposdy);
-   fragW =  splatw(pos) + fragX * splatw(dposdx) + fragY * splatw(dposdy);
-   wInv = spu_re(fragW);  /* 1 / w */
-
-   /* loop over fragment program inputs */
-   for (i = 0; i < spu.vertex_info.num_attribs; i++) {
-      uint attr = i + 1;
-      enum interp_mode interp = spu.vertex_info.attrib[attr].interp_mode;
-
-      /* constant term */
-      vector float a0 = setup.coef[attr].a0;
-      vector float r0 = splatx(a0);
-      vector float r1 = splaty(a0);
-      vector float r2 = splatz(a0);
-      vector float r3 = splatw(a0);
-
-      if (interp == INTERP_LINEAR || interp == INTERP_PERSPECTIVE) {
-         /* linear term */
-         vector float dadx = setup.coef[attr].dadx;
-         vector float dady = setup.coef[attr].dady;
-         /* Use SPU intrinsics here to get slightly better code.
-          * originally: r0 += fragX * splatx(dadx) + fragY * splatx(dady);
-          */
-         r0 = spu_madd(fragX, splatx(dadx), spu_madd(fragY, splatx(dady), r0));
-         r1 = spu_madd(fragX, splaty(dadx), spu_madd(fragY, splaty(dady), r1));
-         r2 = spu_madd(fragX, splatz(dadx), spu_madd(fragY, splatz(dady), r2));
-         r3 = spu_madd(fragX, splatw(dadx), spu_madd(fragY, splatw(dady), r3));
-         if (interp == INTERP_PERSPECTIVE) {
-            /* perspective term */
-            r0 *= wInv;
-            r1 *= wInv;
-            r2 *= wInv;
-            r3 *= wInv;
-         }
-      }
-      fragInputs[CHAN0] = r0;
-      fragInputs[CHAN1] = r1;
-      fragInputs[CHAN2] = r2;
-      fragInputs[CHAN3] = r3;
-      fragInputs += 4;
-   }
-}
-
-
-/**
- * Emit a quad (pass to next stage).  No clipping is done.
- * Note: about 1/5 to 1/7 of the time, mask is zero and this function
- * should be skipped.  But adding the test for that slows things down
- * overall.
- */
-static INLINE void
-emit_quad( int x, int y, mask_t mask)
-{
-   /* If any bits in mask are set... */
-   if (spu_extract(spu_orx(mask), 0)) {
-      const int ix = x - setup.cliprect_minx;
-      const int iy = y - setup.cliprect_miny;
-
-      spu.cur_ctile_status = TILE_STATUS_DIRTY;
-      spu.cur_ztile_status = TILE_STATUS_DIRTY;
-
-      {
-         /*
-          * Run fragment shader, execute per-fragment ops, update fb/tile.
-          */
-         vector float inputs[4*4], outputs[2*4];
-         vector unsigned int kill_mask;
-         vector float fragZ;
-
-         eval_inputs((float) x, (float) y, &fragZ, inputs);
-
-         ASSERT(spu.fragment_program);
-         ASSERT(spu.fragment_ops);
-
-         /* Execute the current fragment program */
-         kill_mask = spu.fragment_program(inputs, outputs, spu.constants);
-
-         mask = spu_andc(mask, kill_mask);
-
-         /* Execute per-fragment/quad operations, including:
-          * alpha test, z test, stencil test, blend and framebuffer writing.
-          * Note that there are two different fragment operations functions
-          * that can be called, one for front-facing fragments, and one
-          * for back-facing fragments.  (Often the two are the same;
-          * but in some cases, like two-sided stenciling, they can be
-          * very different.)  So choose the correct function depending
-          * on the calculated facing.
-          */
-         spu.fragment_ops[setup.facing](ix, iy, &spu.ctile, &spu.ztile,
-                          fragZ,
-                          outputs[0*4+0],
-                          outputs[0*4+1],
-                          outputs[0*4+2],
-                          outputs[0*4+3],
-                          mask);
-      }
-   }
-}
-
-
-/**
- * Given an X or Y coordinate, return the block/quad coordinate that it
- * belongs to.
- */
-static INLINE int
-block(int x)
-{
-   return x & ~1;
-}
-
-
-/**
- * Render a horizontal span of quads
- */
-static void
-flush_spans(void)
-{
-   int minleft, maxright;
-
-   const int l0 = spu_extract(setup.span.quad, 0);
-   const int l1 = spu_extract(setup.span.quad, 1);
-   const int r0 = spu_extract(setup.span.quad, 2);
-   const int r1 = spu_extract(setup.span.quad, 3);
-
-   switch (setup.span.y_flags) {
-   case 0x3:
-      /* both odd and even lines written (both quad rows) */
-      minleft = MIN2(l0, l1);
-      maxright = MAX2(r0, r1);
-      break;
-
-   case 0x1:
-      /* only even line written (quad top row) */
-      minleft = l0;
-      maxright = r0;
-      break;
-
-   case 0x2:
-      /* only odd line written (quad bottom row) */
-      minleft = l1;
-      maxright = r1;
-      break;
-
-   default:
-      return;
-   }
-
-   /* OK, we're very likely to need the tile data now.
-    * clear or finish waiting if needed.
-    */
-   if (spu.cur_ctile_status == TILE_STATUS_GETTING) {
-      /* wait for mfc_get() to complete */
-      //printf("SPU: %u: waiting for ctile\n", spu.init.id);
-      wait_on_mask(1 << TAG_READ_TILE_COLOR);
-      spu.cur_ctile_status = TILE_STATUS_CLEAN;
-   }
-   else if (spu.cur_ctile_status == TILE_STATUS_CLEAR) {
-      //printf("SPU %u: clearing C tile %u, %u\n", spu.init.id, setup.tx, setup.ty);
-      clear_c_tile(&spu.ctile);
-      spu.cur_ctile_status = TILE_STATUS_DIRTY;
-   }
-   ASSERT(spu.cur_ctile_status != TILE_STATUS_DEFINED);
-
-   if (spu.read_depth_stencil) {
-      if (spu.cur_ztile_status == TILE_STATUS_GETTING) {
-         /* wait for mfc_get() to complete */
-         //printf("SPU: %u: waiting for ztile\n", spu.init.id);
-         wait_on_mask(1 << TAG_READ_TILE_Z);
-         spu.cur_ztile_status = TILE_STATUS_CLEAN;
-      }
-      else if (spu.cur_ztile_status == TILE_STATUS_CLEAR) {
-         //printf("SPU %u: clearing Z tile %u, %u\n", spu.init.id, setup.tx, setup.ty);
-         clear_z_tile(&spu.ztile);
-         spu.cur_ztile_status = TILE_STATUS_DIRTY;
-      }
-      ASSERT(spu.cur_ztile_status != TILE_STATUS_DEFINED);
-   }
-
-   /* XXX this loop could be moved into the above switch cases... */
-   
-   /* Setup for mask calculation */
-   const vec_int4 quad_LlRr = setup.span.quad;
-   const vec_int4 quad_RrLl = spu_rlqwbyte(quad_LlRr, 8);
-   const vec_int4 quad_LLll = spu_shuffle(quad_LlRr, quad_LlRr, SHUFFLE4(A,A,B,B));
-   const vec_int4 quad_RRrr = spu_shuffle(quad_RrLl, quad_RrLl, SHUFFLE4(A,A,B,B));
-
-   const vec_int4 twos = spu_splats(2);
-
-   const int x = block(minleft);
-   vec_int4 xs = {x, x+1, x, x+1};
-
-   for (; spu_extract(xs, 0) <= block(maxright); xs += twos) {
-      /**
-       * Computes mask to indicate which pixels in the 2x2 quad are actually
-       * inside the triangle's bounds.
-       */
-      
-      /* Calculate ({x,x+1,x,x+1} >= {l[0],l[0],l[1],l[1]}) */
-      const mask_t gt_LLll_xs = spu_cmpgt(quad_LLll, xs);
-      const mask_t gte_xs_LLll = spu_nand(gt_LLll_xs, gt_LLll_xs); 
-      
-      /* Calculate ({r[0],r[0],r[1],r[1]} > {x,x+1,x,x+1}) */
-      const mask_t gt_RRrr_xs = spu_cmpgt(quad_RRrr, xs);
-
-      /* Combine results to create mask */
-      const mask_t mask = spu_and(gte_xs_LLll, gt_RRrr_xs);
-
-      emit_quad(spu_extract(xs, 0), setup.span.y, mask);
-   }
-
-   setup.span.y = 0;
-   setup.span.y_flags = 0;
-   /* Zero right elements */
-   setup.span.quad = spu_shuffle(setup.span.quad, setup.span.quad, SHUFFLE4(A,B,0,0));
-}
-
-
-#if DEBUG_VERTS
-static void
-print_vertex(const struct vertex_header *v)
-{
-   uint i;
-   fprintf(stderr, "  Vertex: (%p)\n", v);
-   for (i = 0; i < spu.vertex_info.num_attribs; i++) {
-      fprintf(stderr, "    %d: %f %f %f %f\n",  i, 
-              spu_extract(v->data[i], 0),
-              spu_extract(v->data[i], 1),
-              spu_extract(v->data[i], 2),
-              spu_extract(v->data[i], 3));
-   }
-}
-#endif
-
-/* Returns the minimum of each slot of two vec_float4s as qwords.
- * i.e. return[n] = min(q0[n],q1[n]);
- */
-static qword
-minfq(qword q0, qword q1)
-{
-   const qword q0q1m = si_fcgt(q0, q1);
-   return si_selb(q0, q1, q0q1m);
-}
-
-/* Returns the minimum of each slot of three vec_float4s as qwords.
- * i.e. return[n] = min(q0[n],q1[n],q2[n]);
- */
-static qword
-min3fq(qword q0, qword q1, qword q2)
-{
-   return minfq(minfq(q0, q1), q2);
-}
-
-/* Returns the maximum of each slot of two vec_float4s as qwords.
- * i.e. return[n] = min(q0[n],q1[n],q2[n]);
- */
-static qword
-maxfq(qword q0, qword q1) {
-   const qword q0q1m = si_fcgt(q0, q1);
-   return si_selb(q1, q0, q0q1m);
-}
-
-/* Returns the maximum of each slot of three vec_float4s as qwords.
- * i.e. return[n] = min(q0[n],q1[n],q2[n]);
- */
-static qword
-max3fq(qword q0, qword q1, qword q2) {
-   return maxfq(maxfq(q0, q1), q2);
-}
-
-/**
- * Sort vertices from top to bottom.
- * Compute area and determine front vs. back facing.
- * Do coarse clip test against tile bounds
- * \return  FALSE if tri is totally outside tile, TRUE otherwise
- */
-static boolean
-setup_sort_vertices(const qword vs)
-{
-   float area, sign;
-
-#if DEBUG_VERTS
-   if (spu.init.id==0) {
-      fprintf(stderr, "SPU %u: Triangle:\n", spu.init.id);
-      print_vertex(v0);
-      print_vertex(v1);
-      print_vertex(v2);
-   }
-#endif
-
-   {
-      /* Load the float values for various processing... */
-      const qword f0 = (qword)(((const struct vertex_header*)si_to_ptr(vs))->data[0]);
-      const qword f1 = (qword)(((const struct vertex_header*)si_to_ptr(si_rotqbyi(vs, 4)))->data[0]);
-      const qword f2 = (qword)(((const struct vertex_header*)si_to_ptr(si_rotqbyi(vs, 8)))->data[0]);
-
-      /* Check if triangle is completely outside the tile bounds
-       * Find the min and max x and y positions of the three poits */
-      const qword minf = min3fq(f0, f1, f2);
-      const qword maxf = max3fq(f0, f1, f2);
-
-      /* Compare min and max against cliprect vals */
-      const qword maxsmins = si_shufb(maxf, minf, SHUFB4(A,B,a,b));
-      const qword outside = si_fcgt(maxsmins, si_csflt(setup.cliprect, 0));
-
-      /* Use a little magic to work out of the tri is visible or not */
-      if(si_to_uint(si_xori(si_gb(outside), 0xc))) return FALSE;
-
-      /* determine bottom to top order of vertices */
-      /* A table of shuffle patterns for putting vertex_header pointers into
-         correct order.  Quite magical. */
-      const qword sort_order_patterns[] = {
-         SHUFB4(A,B,C,C),
-         SHUFB4(C,A,B,C),
-         SHUFB4(A,C,B,C),
-         SHUFB4(B,C,A,C),
-         SHUFB4(B,A,C,C),
-         SHUFB4(C,B,A,C) };
-
-      /* Collate y values into two vectors for comparison.
-         Using only one shuffle constant! ;) */
-      const qword y_02_ = si_shufb(f0, f2, SHUFB4(0,B,b,C));
-      const qword y_10_ = si_shufb(f1, f0, SHUFB4(0,B,b,C));
-      const qword y_012 = si_shufb(y_02_, f1, SHUFB4(0,B,b,C));
-      const qword y_120 = si_shufb(y_10_, f2, SHUFB4(0,B,b,C));
-
-      /* Perform comparison: {y0,y1,y2} > {y1,y2,y0} */
-      const qword compare = si_fcgt(y_012, y_120);
-      /* Compress the result of the comparison into 4 bits */
-      const qword gather = si_gb(compare);
-      /* Subtract one to attain the index into the LUT.  Magical. */
-      const unsigned int index = si_to_uint(gather) - 1;
-
-      /* Load the appropriate pattern and construct the desired vector. */
-      setup.vertex_headers = si_shufb(vs, vs, sort_order_patterns[index]);
-
-      /* Using the result of the comparison, set sign.
-         Very magical. */
-      sign = ((si_to_uint(si_cntb(gather)) == 2) ? 1.0f : -1.0f);
-   }
-
-   setup.ebot.ds = spu_sub(setup.vmid->data[0], setup.vmin->data[0]);
-   setup.emaj.ds = spu_sub(setup.vmax->data[0], setup.vmin->data[0]);
-   setup.etop.ds = spu_sub(setup.vmax->data[0], setup.vmid->data[0]);
-
-   /*
-    * Compute triangle's area.  Use 1/area to compute partial
-    * derivatives of attributes later.
-    */
-   area = setup.emaj.dx * setup.ebot.dy - setup.ebot.dx * setup.emaj.dy;
-
-   setup.oneOverArea = 1.0f / area;
-
-   /* The product of area * sign indicates front/back orientation (0/1).
-    * Just in case someone gets the bright idea of switching the front
-    * and back constants without noticing that we're assuming their
-    * values in this operation, also assert that the values are
-    * what we think they are.
-    */
-   ASSERT(CELL_FACING_FRONT == 0);
-   ASSERT(CELL_FACING_BACK == 1);
-   setup.facing = (area * sign > 0.0f)
-      ^ (!spu.rasterizer.front_ccw);
-
-   return TRUE;
-}
-
-
-/**
- * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
- * The value value comes from vertex->data[slot].
- * The result will be put into setup.coef[slot].a0.
- * \param slot  which attribute slot 
- */
-static INLINE void
-const_coeff4(uint slot)
-{
-   setup.coef[slot].dadx = (vector float) {0.0, 0.0, 0.0, 0.0};
-   setup.coef[slot].dady = (vector float) {0.0, 0.0, 0.0, 0.0};
-   setup.coef[slot].a0 = setup.vprovoke->data[slot];
-}
-
-
-/**
- * As above, but interp setup all four vector components.
- */
-static INLINE void
-tri_linear_coeff4(uint slot)
-{
-   const vector float vmin_d = setup.vmin->data[slot];
-   const vector float vmid_d = setup.vmid->data[slot];
-   const vector float vmax_d = setup.vmax->data[slot];
-   const vector float xxxx = spu_splats(spu_extract(setup.vmin->data[0], 0) - 0.5f);
-   const vector float yyyy = spu_splats(spu_extract(setup.vmin->data[0], 1) - 0.5f);
-
-   vector float botda = vmid_d - vmin_d;
-   vector float majda = vmax_d - vmin_d;
-
-   vector float a = spu_sub(spu_mul(spu_splats(setup.ebot.dy), majda),
-                            spu_mul(botda, spu_splats(setup.emaj.dy)));
-   vector float b = spu_sub(spu_mul(spu_splats(setup.emaj.dx), botda),
-                            spu_mul(majda, spu_splats(setup.ebot.dx)));
-
-   setup.coef[slot].dadx = spu_mul(a, spu_splats(setup.oneOverArea));
-   setup.coef[slot].dady = spu_mul(b, spu_splats(setup.oneOverArea));
-
-   vector float tempx = spu_mul(setup.coef[slot].dadx, xxxx);
-   vector float tempy = spu_mul(setup.coef[slot].dady, yyyy);
-                         
-   setup.coef[slot].a0 = spu_sub(vmin_d, spu_add(tempx, tempy));
-}
-
-
-/**
- * Compute a0, dadx and dady for a perspective-corrected interpolant,
- * for a triangle.
- * We basically multiply the vertex value by 1/w before computing
- * the plane coefficients (a0, dadx, dady).
- * Later, when we compute the value at a particular fragment position we'll
- * divide the interpolated value by the interpolated W at that fragment.
- */
-static void
-tri_persp_coeff4(uint slot)
-{
-   const vector float xxxx = spu_splats(spu_extract(setup.vmin->data[0], 0) - 0.5f);
-   const vector float yyyy = spu_splats(spu_extract(setup.vmin->data[0], 1) - 0.5f);
-
-   const vector float vmin_w = spu_splats(spu_extract(setup.vmin->data[0], 3));
-   const vector float vmid_w = spu_splats(spu_extract(setup.vmid->data[0], 3));
-   const vector float vmax_w = spu_splats(spu_extract(setup.vmax->data[0], 3));
-
-   vector float vmin_d = setup.vmin->data[slot];
-   vector float vmid_d = setup.vmid->data[slot];
-   vector float vmax_d = setup.vmax->data[slot];
-
-   vmin_d = spu_mul(vmin_d, vmin_w);
-   vmid_d = spu_mul(vmid_d, vmid_w);
-   vmax_d = spu_mul(vmax_d, vmax_w);
-
-   vector float botda = vmid_d - vmin_d;
-   vector float majda = vmax_d - vmin_d;
-
-   vector float a = spu_sub(spu_mul(spu_splats(setup.ebot.dy), majda),
-                            spu_mul(botda, spu_splats(setup.emaj.dy)));
-   vector float b = spu_sub(spu_mul(spu_splats(setup.emaj.dx), botda),
-                            spu_mul(majda, spu_splats(setup.ebot.dx)));
-
-   setup.coef[slot].dadx = spu_mul(a, spu_splats(setup.oneOverArea));
-   setup.coef[slot].dady = spu_mul(b, spu_splats(setup.oneOverArea));
-
-   vector float tempx = spu_mul(setup.coef[slot].dadx, xxxx);
-   vector float tempy = spu_mul(setup.coef[slot].dady, yyyy);
-                         
-   setup.coef[slot].a0 = spu_sub(vmin_d, spu_add(tempx, tempy));
-}
-
-
-
-/**
- * Compute the setup.coef[] array dadx, dady, a0 values.
- * Must be called after setup.vmin,vmid,vmax,vprovoke are initialized.
- */
-static void
-setup_tri_coefficients(void)
-{
-   uint i;
-
-   for (i = 0; i < spu.vertex_info.num_attribs; i++) {
-      switch (spu.vertex_info.attrib[i].interp_mode) {
-      case INTERP_NONE:
-         break;
-      case INTERP_CONSTANT:
-         const_coeff4(i);
-         break;
-      case INTERP_POS:
-         /* fall-through */
-      case INTERP_LINEAR:
-         tri_linear_coeff4(i);
-         break;
-      case INTERP_PERSPECTIVE:
-         tri_persp_coeff4(i);
-         break;
-      default:
-         ASSERT(0);
-      }
-   }
-}
-
-
-static void
-setup_tri_edges(void)
-{
-   float vmin_x = spu_extract(setup.vmin->data[0], 0) + 0.5f;
-   float vmid_x = spu_extract(setup.vmid->data[0], 0) + 0.5f;
-
-   float vmin_y = spu_extract(setup.vmin->data[0], 1) - 0.5f;
-   float vmid_y = spu_extract(setup.vmid->data[0], 1) - 0.5f;
-   float vmax_y = spu_extract(setup.vmax->data[0], 1) - 0.5f;
-
-   setup.emaj.sy = CEILF(vmin_y);
-   setup.emaj.lines = (int) CEILF(vmax_y - setup.emaj.sy);
-   setup.emaj.dxdy = setup.emaj.dx / setup.emaj.dy;
-   setup.emaj.sx = vmin_x + (setup.emaj.sy - vmin_y) * setup.emaj.dxdy;
-
-   setup.etop.sy = CEILF(vmid_y);
-   setup.etop.lines = (int) CEILF(vmax_y - setup.etop.sy);
-   setup.etop.dxdy = setup.etop.dx / setup.etop.dy;
-   setup.etop.sx = vmid_x + (setup.etop.sy - vmid_y) * setup.etop.dxdy;
-
-   setup.ebot.sy = CEILF(vmin_y);
-   setup.ebot.lines = (int) CEILF(vmid_y - setup.ebot.sy);
-   setup.ebot.dxdy = setup.ebot.dx / setup.ebot.dy;
-   setup.ebot.sx = vmin_x + (setup.ebot.sy - vmin_y) * setup.ebot.dxdy;
-}
-
-
-/**
- * Render the upper or lower half of a triangle.
- * Scissoring/cliprect is applied here too.
- */
-static void
-subtriangle(struct edge *eleft, struct edge *eright, unsigned lines)
-{
-   const int minx = setup.cliprect_minx;
-   const int maxx = setup.cliprect_maxx;
-   const int miny = setup.cliprect_miny;
-   const int maxy = setup.cliprect_maxy;
-   int y, start_y, finish_y;
-   int sy = (int)eleft->sy;
-
-   ASSERT((int)eleft->sy == (int) eright->sy);
-
-   /* clip top/bottom */
-   start_y = sy;
-   finish_y = sy + lines;
-
-   if (start_y < miny)
-      start_y = miny;
-
-   if (finish_y > maxy)
-      finish_y = maxy;
-
-   start_y -= sy;
-   finish_y -= sy;
-
-   /*
-   printf("%s %d %d\n", __FUNCTION__, start_y, finish_y);  
-   */
-
-   for (y = start_y; y < finish_y; y++) {
-
-      /* avoid accumulating adds as floats don't have the precision to
-       * accurately iterate large triangle edges that way.  luckily we
-       * can just multiply these days.
-       *
-       * this is all drowned out by the attribute interpolation anyway.
-       */
-      int left = (int)(eleft->sx + y * eleft->dxdy);
-      int right = (int)(eright->sx + y * eright->dxdy);
-
-      /* clip left/right */
-      if (left < minx)
-         left = minx;
-      if (right > maxx)
-         right = maxx;
-
-      if (left < right) {
-         int _y = sy + y;
-         if (block(_y) != setup.span.y) {
-            flush_spans();
-            setup.span.y = block(_y);
-         }
-
-         int offset = _y&1;
-         vec_int4 quad_LlRr = {left, left, right, right};
-         /* Store left and right in 0 or 1 row of quad based on offset */
-         setup.span.quad = spu_sel(quad_LlRr, setup.span.quad, spu_maskw(5<<offset));
-         setup.span.y_flags |= 1<<offset;
-      }
-   }
-
-
-   /* save the values so that emaj can be restarted:
-    */
-   eleft->sx += lines * eleft->dxdy;
-   eright->sx += lines * eright->dxdy;
-   eleft->sy += lines;
-   eright->sy += lines;
-}
-
-
-/**
- * Draw triangle into tile at (tx, ty) (tile coords)
- * The tile data should have already been fetched.
- */
-boolean
-tri_draw(const qword vs,
-         uint tx, uint ty)
-{
-   setup.tx = tx;
-   setup.ty = ty;
-
-   /* set clipping bounds to tile bounds */
-   const qword clipbase = (qword)((vec_uint4){tx, ty});
-   const qword clipmin = si_mpyui(clipbase, TILE_SIZE);
-   const qword clipmax = si_ai(clipmin, TILE_SIZE);
-   setup.cliprect = si_shufb(clipmin, clipmax, SHUFB4(A,B,a,b));
-
-   if(!setup_sort_vertices(vs)) {
-      return FALSE; /* totally clipped */
-   }
-
-   setup_tri_coefficients();
-   setup_tri_edges();
-
-   setup.span.y = 0;
-   setup.span.y_flags = 0;
-   /* Zero right elements */
-   setup.span.quad = spu_shuffle(setup.span.quad, setup.span.quad, SHUFFLE4(A,B,0,0));
-
-   if (setup.oneOverArea < 0.0) {
-      /* emaj on left */
-      subtriangle( &setup.emaj, &setup.ebot, setup.ebot.lines );
-      subtriangle( &setup.emaj, &setup.etop, setup.etop.lines );
-   }
-   else {
-      /* emaj on right */
-      subtriangle( &setup.ebot, &setup.emaj, setup.ebot.lines );
-      subtriangle( &setup.etop, &setup.emaj, setup.etop.lines );
-   }
-
-   flush_spans();
-
-   return TRUE;
-}
diff --git a/src/gallium/drivers/cell/spu/spu_tri.h b/src/gallium/drivers/cell/spu/spu_tri.h
deleted file mode 100644
index 82e3b19ad7e..00000000000
--- a/src/gallium/drivers/cell/spu/spu_tri.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-
-#ifndef SPU_TRI_H
-#define SPU_TRI_H
-
-
-extern boolean
-tri_draw(const qword vs, uint tx, uint ty);
-
-
-#endif /* SPU_TRI_H */
diff --git a/src/gallium/drivers/cell/spu/spu_util.c b/src/gallium/drivers/cell/spu/spu_util.c
deleted file mode 100644
index 24057e29e36..00000000000
--- a/src/gallium/drivers/cell/spu/spu_util.c
+++ /dev/null
@@ -1,77 +0,0 @@
-
-#include "cell/common.h"
-#include "pipe/p_shader_tokens.h"
-#include "util/u_debug.h"
-#include "tgsi/tgsi_parse.h"
-//#include "tgsi_build.h"
-#include "tgsi/tgsi_util.h"
-
-unsigned
-tgsi_util_get_src_register_swizzle(
-   const struct tgsi_src_register *reg,
-   unsigned component )
-{
-   switch( component ) {
-   case 0:
-      return reg->SwizzleX;
-   case 1:
-      return reg->SwizzleY;
-   case 2:
-      return reg->SwizzleZ;
-   case 3:
-      return reg->SwizzleW;
-   default:
-      ASSERT( 0 );
-   }
-   return 0;
-}
-
-
-unsigned
-tgsi_util_get_full_src_register_swizzle(
-   const struct tgsi_full_src_register  *reg,
-   unsigned component )
-{
-   return tgsi_util_get_src_register_swizzle(
-      reg->Register,
-      component );
-}
-
-
-unsigned
-tgsi_util_get_full_src_register_sign_mode(
-   const struct  tgsi_full_src_register *reg,
-   unsigned component )
-{
-   unsigned sign_mode;
-
-   if( reg->RegisterExtMod.Absolute ) {
-      /* Consider only the post-abs negation. */
-
-      if( reg->RegisterExtMod.Negate ) {
-         sign_mode = TGSI_UTIL_SIGN_SET;
-      }
-      else {
-         sign_mode = TGSI_UTIL_SIGN_CLEAR;
-      }
-   }
-   else {
-      /* Accumulate the three negations. */
-
-      unsigned negate;
-
-      negate = reg->Register.Negate;
-      if( reg->RegisterExtMod.Negate ) {
-         negate = !negate;
-      }
-
-      if( negate ) {
-         sign_mode = TGSI_UTIL_SIGN_TOGGLE;
-      }
-      else {
-         sign_mode = TGSI_UTIL_SIGN_KEEP;
-      }
-   }
-
-   return sign_mode;
-}
diff --git a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c
deleted file mode 100644
index 087963960df..00000000000
--- a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c
+++ /dev/null
@@ -1,146 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * (C) Copyright IBM Corporation 2008
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
- /*
-  * Authors:
-  *   Keith Whitwell <keith@tungstengraphics.com>
-  *   Ian Romanick <idr@us.ibm.com>
-  */
-
-#include "pipe/p_state.h"
-#include "pipe/p_shader_tokens.h"
-#include "spu_exec.h"
-#include "spu_vertex_shader.h"
-#include "spu_main.h"
-#include "spu_dcache.h"
-
-typedef void (*spu_fetch_func)(qword *out, const qword *in,
-			       const qword *shuffle_data);
-
-
-PIPE_ALIGN_VAR(16) static const qword
-fetch_shuffle_data[5] = {
-   /* Shuffle used by CVT_64_FLOAT
-    */
-   {
-      0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
-      0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-   },
-
-   /* Shuffle used by CVT_8_USCALED and CVT_8_SSCALED
-    */
-   {
-      0x00, 0x80, 0x80, 0x80, 0x01, 0x80, 0x80, 0x80,
-      0x02, 0x80, 0x80, 0x80, 0x03, 0x80, 0x80, 0x80,
-   },
-   
-   /* Shuffle used by CVT_16_USCALED and CVT_16_SSCALED
-    */
-   {
-      0x00, 0x01, 0x80, 0x80, 0x02, 0x03, 0x80, 0x80,
-      0x04, 0x05, 0x80, 0x80, 0x06, 0x07, 0x80, 0x80,
-   },
-   
-   /* High value shuffle used by trans4x4.
-    */
-   {
-      0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
-      0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17
-   },
-
-   /* Low value shuffle used by trans4x4.
-    */
-   {
-      0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B,
-      0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F
-   }
-};
-
-
-/**
- * Fetch vertex attributes for 'count' vertices.
- */
-static void generic_vertex_fetch(struct spu_vs_context *draw,
-                                 struct spu_exec_machine *machine,
-                                 const unsigned *elts,
-                                 unsigned count)
-{
-   unsigned nr_attrs = draw->vertex_fetch.nr_attrs;
-   unsigned attr;
-
-   ASSERT(count <= 4);
-
-#if DRAW_DBG
-   printf("SPU: %s count = %u, nr_attrs = %u\n", 
-          __FUNCTION__, count, nr_attrs);
-#endif
-
-   /* loop over vertex attributes (vertex shader inputs)
-    */
-   for (attr = 0; attr < nr_attrs; attr++) {
-      const unsigned pitch = draw->vertex_fetch.pitch[attr];
-      const uint64_t src = draw->vertex_fetch.src_ptr[attr];
-      const spu_fetch_func fetch = (spu_fetch_func)
-	  (draw->vertex_fetch.code + draw->vertex_fetch.code_offset[attr]);
-      unsigned i;
-      unsigned idx;
-      const unsigned bytes_per_entry = draw->vertex_fetch.size[attr];
-      const unsigned quads_per_entry = (bytes_per_entry + 15) / 16;
-      PIPE_ALIGN_VAR(16) qword in[2 * 4];
-
-
-      /* Fetch four attributes for four vertices.  
-       */
-      idx = 0;
-      for (i = 0; i < count; i++) {
-         const uint64_t addr = src + (elts[i] * pitch);
-
-#if DRAW_DBG
-         printf("SPU: fetching = 0x%llx\n", addr);
-#endif
-
-         spu_dcache_fetch_unaligned(& in[idx], addr, bytes_per_entry);
-         idx += quads_per_entry;
-      }
-
-      /* Be nice and zero out any missing vertices.
-       */
-      (void) memset(& in[idx], 0, (8 - idx) * sizeof(qword));
-
-
-      /* Convert all 4 vertices to vectors of float.
-       */
-      (*fetch)(&machine->Inputs[attr].xyzw[0].q, in, fetch_shuffle_data);
-   }
-}
-
-
-void spu_update_vertex_fetch( struct spu_vs_context *draw )
-{
-   draw->vertex_fetch.fetch_func = generic_vertex_fetch;
-}
diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.c b/src/gallium/drivers/cell/spu/spu_vertex_shader.c
deleted file mode 100644
index d6febd36f41..00000000000
--- a/src/gallium/drivers/cell/spu/spu_vertex_shader.c
+++ /dev/null
@@ -1,245 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
- /*
-  * Authors:
-  *   Keith Whitwell <keith@tungstengraphics.com>
-  *   Brian Paul
-  *   Ian Romanick <idr@us.ibm.com>
-  */
-
-#include <spu_mfcio.h>
-
-#include "pipe/p_state.h"
-#include "pipe/p_shader_tokens.h"
-#include "util/u_math.h"
-#include "draw/draw_private.h"
-#include "draw/draw_context.h"
-#include "cell/common.h"
-#include "spu_vertex_shader.h"
-#include "spu_exec.h"
-#include "spu_main.h"
-
-
-#define MAX_VERTEX_SIZE ((2 + PIPE_MAX_SHADER_OUTPUTS) * 4 * sizeof(float))
-
-
-#define CLIP_RIGHT_BIT 0x01
-#define CLIP_LEFT_BIT 0x02
-#define CLIP_TOP_BIT 0x04
-#define CLIP_BOTTOM_BIT 0x08
-#define CLIP_FAR_BIT 0x10
-#define CLIP_NEAR_BIT 0x20
-
-
-static INLINE float
-dot4(const float *a, const float *b)
-{
-   return (a[0]*b[0] +
-           a[1]*b[1] +
-           a[2]*b[2] +
-           a[3]*b[3]);
-}
-
-static INLINE unsigned
-compute_clipmask(const float *clip, /*const*/ float plane[][4], unsigned nr)
-{
-   unsigned mask = 0;
-   unsigned i;
-
-   /* Do the hardwired planes first:
-    */
-   if (-clip[0] + clip[3] < 0) mask |= CLIP_RIGHT_BIT;
-   if ( clip[0] + clip[3] < 0) mask |= CLIP_LEFT_BIT;
-   if (-clip[1] + clip[3] < 0) mask |= CLIP_TOP_BIT;
-   if ( clip[1] + clip[3] < 0) mask |= CLIP_BOTTOM_BIT;
-   if (-clip[2] + clip[3] < 0) mask |= CLIP_FAR_BIT;
-   if ( clip[2] + clip[3] < 0) mask |= CLIP_NEAR_BIT;
-
-   /* Followed by any remaining ones:
-    */
-   for (i = 6; i < nr; i++) {
-      if (dot4(clip, plane[i]) < 0) 
-         mask |= (1<<i);
-   }
-
-   return mask;
-}
-
-
-/**
- * Transform vertices with the current vertex program/shader
- * Up to four vertices can be shaded at a time.
- * \param vbuffer  the input vertex data
- * \param elts  indexes of four input vertices
- * \param count  number of vertices to shade [1..4]
- * \param vOut  array of pointers to four output vertices
- */
-static void
-run_vertex_program(struct spu_vs_context *draw,
-                   unsigned elts[4], unsigned count,
-                   const uint64_t *vOut)
-{
-   struct spu_exec_machine *machine = &draw->machine;
-   unsigned int j;
-
-   PIPE_ALIGN_VAR(16) struct spu_exec_vector inputs[PIPE_MAX_ATTRIBS];
-   PIPE_ALIGN_VAR(16) struct spu_exec_vector outputs[PIPE_MAX_ATTRIBS];
-   const float *scale = draw->viewport.scale;
-   const float *trans = draw->viewport.translate;
-
-   ASSERT(count <= 4);
-
-   machine->Processor = TGSI_PROCESSOR_VERTEX;
-
-   ASSERT_ALIGN16(draw->constants);
-   machine->Consts = (float (*)[4]) draw->constants;
-
-   machine->Inputs = inputs;
-   machine->Outputs = outputs;
-
-   spu_vertex_fetch( draw, machine, elts, count );
-
-   /* run shader */
-   spu_exec_machine_run( machine );
-
-
-   /* store machine results */
-   for (j = 0; j < count; j++) {
-      unsigned slot;
-      float x, y, z, w;
-      PIPE_ALIGN_VAR(16)
-      unsigned char buffer[sizeof(struct vertex_header)
-          + MAX_VERTEX_SIZE];
-      struct vertex_header *const tmpOut =
-          (struct vertex_header *) buffer;
-      const unsigned vert_size = ROUNDUP16(sizeof(struct vertex_header)
-                                           + (sizeof(float) * 4 
-                                              * draw->num_vs_outputs));
-
-      mfc_get(tmpOut, vOut[j], vert_size, TAG_VERTEX_BUFFER, 0, 0);
-      wait_on_mask(1 << TAG_VERTEX_BUFFER);
-
-
-      /* Handle attr[0] (position) specially:
-       *
-       * XXX: Computing the clipmask should be done in the vertex
-       * program as a set of DP4 instructions appended to the
-       * user-provided code.
-       */
-      x = tmpOut->clip[0] = machine->Outputs[0].xyzw[0].f[j];
-      y = tmpOut->clip[1] = machine->Outputs[0].xyzw[1].f[j];
-      z = tmpOut->clip[2] = machine->Outputs[0].xyzw[2].f[j];
-      w = tmpOut->clip[3] = machine->Outputs[0].xyzw[3].f[j];
-
-      tmpOut->clipmask = compute_clipmask(tmpOut->clip, draw->plane,
-					   draw->nr_planes);
-      tmpOut->edgeflag = 1;
-
-      /* divide by w */
-      w = 1.0f / w;
-      x *= w;
-      y *= w;
-      z *= w;
-
-      /* Viewport mapping */
-      tmpOut->data[0][0] = x * scale[0] + trans[0];
-      tmpOut->data[0][1] = y * scale[1] + trans[1];
-      tmpOut->data[0][2] = z * scale[2] + trans[2];
-      tmpOut->data[0][3] = w;
-
-      /* Remaining attributes are packed into sequential post-transform
-       * vertex attrib slots.
-       */
-      for (slot = 1; slot < draw->num_vs_outputs; slot++) {
-         tmpOut->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j];
-         tmpOut->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
-         tmpOut->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
-         tmpOut->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
-      }
-
-      mfc_put(tmpOut, vOut[j], vert_size, TAG_VERTEX_BUFFER, 0, 0);
-   } /* loop over vertices */
-}
-
-
-PIPE_ALIGN_VAR(16) unsigned char
-immediates[(sizeof(float) * 4 * TGSI_EXEC_NUM_IMMEDIATES) + 32];
-
-
-void
-spu_bind_vertex_shader(struct spu_vs_context *draw,
-		       struct cell_shader_info *vs)
-{
-   const unsigned immediate_addr = vs->immediates;
-   const unsigned immediate_size = 
-       ROUNDUP16((sizeof(float) * 4 * vs->num_immediates)
-		 + (immediate_addr & 0x0f));
- 
-
-   mfc_get(immediates, immediate_addr & ~0x0f, immediate_size,
-           TAG_VERTEX_BUFFER, 0, 0);
-
-   draw->machine.Instructions = (struct tgsi_full_instruction *)
-       vs->instructions;
-   draw->machine.NumInstructions = vs->num_instructions;
-
-   draw->machine.Declarations = (struct tgsi_full_declaration *)
-       vs->declarations;
-   draw->machine.NumDeclarations = vs->num_declarations;
-
-   draw->num_vs_outputs = vs->num_outputs;
-
-   /* specify the shader to interpret/execute */
-   spu_exec_machine_init(&draw->machine,
-			 PIPE_MAX_SAMPLERS,
-			 NULL /*samplers*/,
-			 PIPE_SHADER_VERTEX);
-
-   wait_on_mask(1 << TAG_VERTEX_BUFFER);
-
-   (void) memcpy(& draw->machine.Imms, &immediates[immediate_addr & 0x0f],
-                 sizeof(float) * 4 * vs->num_immediates);
-}
-
-
-void
-spu_execute_vertex_shader(struct spu_vs_context *draw,
-                          const struct cell_command_vs *vs)
-{
-   unsigned i;
-
-   (void) memcpy(draw->plane, vs->plane, sizeof(float) * 4 * vs->nr_planes);
-   draw->nr_planes = vs->nr_planes;
-   draw->vertex_fetch.nr_attrs = vs->nr_attrs;
-
-   for (i = 0; i < vs->num_elts; i += 4) {
-      const unsigned batch_size = MIN2(vs->num_elts - i, 4);
-
-      run_vertex_program(draw, & vs->elts[i], batch_size, &vs->vOut[i]);
-   }
-}
diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.h b/src/gallium/drivers/cell/spu/spu_vertex_shader.h
deleted file mode 100644
index 4c74f5e74d5..00000000000
--- a/src/gallium/drivers/cell/spu/spu_vertex_shader.h
+++ /dev/null
@@ -1,66 +0,0 @@
-#ifndef SPU_VERTEX_SHADER_H
-#define SPU_VERTEX_SHADER_H
-
-#include "cell/common.h"
-#include "pipe/p_format.h"
-#include "spu_exec.h"
-
-struct spu_vs_context;
-
-typedef void (*spu_full_fetch_func)( struct spu_vs_context *draw,
-				     struct spu_exec_machine *machine,
-				     const unsigned *elts,
-				     unsigned count );
-
-struct spu_vs_context {
-   struct pipe_viewport_state viewport;
-
-   struct {
-      uint64_t src_ptr[PIPE_MAX_ATTRIBS];
-      unsigned pitch[PIPE_MAX_ATTRIBS];
-      unsigned size[PIPE_MAX_ATTRIBS];
-      unsigned code_offset[PIPE_MAX_ATTRIBS];
-      unsigned nr_attrs;
-      boolean dirty;
-
-      spu_full_fetch_func fetch_func;
-      void *code;
-   } vertex_fetch;
-   
-   /* Clip derived state:
-    */
-   float plane[12][4];
-   unsigned nr_planes;
-
-   struct spu_exec_machine machine;
-   const float (*constants)[4];
-
-   unsigned num_vs_outputs;
-};
-
-extern void spu_update_vertex_fetch(struct spu_vs_context *draw);
-
-static INLINE void spu_vertex_fetch(struct spu_vs_context *draw,
-				    struct spu_exec_machine *machine,
-				    const unsigned *elts,
-				    unsigned count)
-{
-   if (draw->vertex_fetch.dirty) {
-      spu_update_vertex_fetch(draw);
-      draw->vertex_fetch.dirty = 0;
-   }
-   
-   (*draw->vertex_fetch.fetch_func)(draw, machine, elts, count);
-}
-
-struct cell_command_vs;
-
-extern void
-spu_bind_vertex_shader(struct spu_vs_context *draw,
-		       struct cell_shader_info *vs);
-
-extern void
-spu_execute_vertex_shader(struct spu_vs_context *draw,
-			  const struct cell_command_vs *vs);
-
-#endif /* SPU_VERTEX_SHADER_H */
diff --git a/src/gallium/targets/libgl-xlib/Makefile b/src/gallium/targets/libgl-xlib/Makefile
index f8f6c81b3f2..867b2da323b 100644
--- a/src/gallium/targets/libgl-xlib/Makefile
+++ b/src/gallium/targets/libgl-xlib/Makefile
@@ -29,7 +29,6 @@ DEFINES += \
 	-DGALLIUM_RBUG \
 	-DGALLIUM_TRACE \
 	-DGALLIUM_GALAHAD
-#-DGALLIUM_CELL will be defined by the config */
 
 XLIB_TARGET_SOURCES = \
 	xlib.c
@@ -38,7 +37,6 @@ XLIB_TARGET_SOURCES = \
 XLIB_TARGET_OBJECTS = $(XLIB_TARGET_SOURCES:.c=.o)
 
 
-# Note: CELL_SPU_LIB is only defined for cell configs
 
 LIBS = \
 	$(GALLIUM_DRIVERS) \
@@ -50,7 +48,6 @@ LIBS = \
 	$(TOP)/src/mapi/glapi/libglapi.a \
 	$(TOP)/src/mesa/libmesagallium.a \
 	$(GALLIUM_AUXILIARIES) \
-	$(CELL_SPU_LIB) \
 
 
 # LLVM
diff --git a/src/gallium/targets/libgl-xlib/SConscript b/src/gallium/targets/libgl-xlib/SConscript
index ad8b0992e46..25a4582d7a3 100644
--- a/src/gallium/targets/libgl-xlib/SConscript
+++ b/src/gallium/targets/libgl-xlib/SConscript
@@ -42,11 +42,6 @@ if True:
 if env['llvm']:
     env.Append(CPPDEFINES = ['GALLIUM_LLVMPIPE'])
     env.Prepend(LIBS = [llvmpipe])
-    
-if False:
-    # TODO: Detect Cell SDK
-    env.Append(CPPDEFINES = 'GALLIUM_CELL')
-    env.Prepend(LIBS = [cell])
 
 # libGL.so.1.5
 libgl_1_5 = env.SharedLibrary(
diff --git a/src/gallium/targets/libgl-xlib/xlib.c b/src/gallium/targets/libgl-xlib/xlib.c
index 1a5892b94a0..0ede7e6096b 100644
--- a/src/gallium/targets/libgl-xlib/xlib.c
+++ b/src/gallium/targets/libgl-xlib/xlib.c
@@ -42,7 +42,7 @@
 
 
 /* Helper function to build a subset of a driver stack consisting of
- * one of the software rasterizers (cell, llvmpipe, softpipe) and the
+ * one of the software rasterizers (llvmpipe, softpipe) and the
  * xlib winsys.
  */
 static struct pipe_screen *