diff options
author | Kai Wasserbäch <kai@dev.carbon-project.org> | 2011-11-29 18:17:47 +0100 |
---|---|---|
committer | José Fonseca <jfonseca@vmware.com> | 2011-11-29 20:26:53 +0000 |
commit | ccd4d4367f2b4e5aebfc59b832599812a4a1c7d8 (patch) | |
tree | b5e510235d4be8dd4644a72c721693eaae02c1b5 /src/gallium | |
parent | 09e67706e9a74600e16fe012ecfd192b0d31960a (diff) |
gallium/cell: Remove the driver.
Complicates Gallium3D development and doesn't seem to have active users.
Signed-off-by: Kai Wasserbäch <kai@dev.carbon-project.org>
Signed-off-by: José Fonseca <jfonseca@vmware.com>
Diffstat (limited to 'src/gallium')
80 files changed, 3 insertions, 20782 deletions
diff --git a/src/gallium/auxiliary/Makefile.sources b/src/gallium/auxiliary/Makefile.sources index 15de20cb3a..5c65533308 100644 --- a/src/gallium/auxiliary/Makefile.sources +++ b/src/gallium/auxiliary/Makefile.sources @@ -63,7 +63,6 @@ C_SOURCES := \ rtasm/rtasm_cpu.c \ rtasm/rtasm_execmem.c \ rtasm/rtasm_ppc.c \ - rtasm/rtasm_ppc_spe.c \ rtasm/rtasm_x86sse.c \ tgsi/tgsi_build.c \ tgsi/tgsi_dump.c \ diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c deleted file mode 100644 index 53a0e722cf..0000000000 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ /dev/null @@ -1,1067 +0,0 @@ -/* - * (C) Copyright IBM Corporation 2008 - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * \file - * Real-time assembly generation interface for Cell B.E. SPEs. - * - * \author Ian Romanick <idr@us.ibm.com> - * \author Brian Paul - */ - - -#include <stdio.h> -#include "pipe/p_compiler.h" -#include "util/u_memory.h" -#include "rtasm_ppc_spe.h" - - -#ifdef GALLIUM_CELL -/** - * SPE instruction types - * - * There are 6 primary instruction encodings used on the Cell's SPEs. Each of - * the following unions encodes one type. - * - * \bug - * If, at some point, we start generating SPE code from a little-endian host - * these unions will not work. - */ -/*@{*/ -/** - * Encode one output register with two input registers - */ -union spe_inst_RR { - uint32_t bits; - struct { - unsigned op:11; - unsigned rB:7; - unsigned rA:7; - unsigned rT:7; - } inst; -}; - - -/** - * Encode one output register with three input registers - */ -union spe_inst_RRR { - uint32_t bits; - struct { - unsigned op:4; - unsigned rT:7; - unsigned rB:7; - unsigned rA:7; - unsigned rC:7; - } inst; -}; - - -/** - * Encode one output register with one input reg. and a 7-bit signed immed - */ -union spe_inst_RI7 { - uint32_t bits; - struct { - unsigned op:11; - unsigned i7:7; - unsigned rA:7; - unsigned rT:7; - } inst; -}; - - -/** - * Encode one output register with one input reg. and an 8-bit signed immed - */ -union spe_inst_RI8 { - uint32_t bits; - struct { - unsigned op:10; - unsigned i8:8; - unsigned rA:7; - unsigned rT:7; - } inst; -}; - - -/** - * Encode one output register with one input reg. and a 10-bit signed immed - */ -union spe_inst_RI10 { - uint32_t bits; - struct { - unsigned op:8; - unsigned i10:10; - unsigned rA:7; - unsigned rT:7; - } inst; -}; - - -/** - * Encode one output register with a 16-bit signed immediate - */ -union spe_inst_RI16 { - uint32_t bits; - struct { - unsigned op:9; - unsigned i16:16; - unsigned rT:7; - } inst; -}; - - -/** - * Encode one output register with a 18-bit signed immediate - */ -union spe_inst_RI18 { - uint32_t bits; - struct { - unsigned op:7; - unsigned i18:18; - unsigned rT:7; - } inst; -}; -/*@}*/ - - -static void -indent(const struct spe_function *p) -{ - int i; - for (i = 0; i < p->indent; i++) { - putchar(' '); - } -} - - -static const char * -rem_prefix(const char *longname) -{ - return longname + 4; -} - - -static const char * -reg_name(int reg) -{ - switch (reg) { - case SPE_REG_SP: - return "$sp"; - case SPE_REG_RA: - return "$lr"; - default: - { - /* cycle through four buffers to handle multiple calls per printf */ - static char buf[4][10]; - static int b = 0; - b = (b + 1) % 4; - sprintf(buf[b], "$%d", reg); - return buf[b]; - } - } -} - - -static void -emit_instruction(struct spe_function *p, uint32_t inst_bits) -{ - if (!p->store) - return; /* out of memory, drop the instruction */ - - if (p->num_inst == p->max_inst) { - /* allocate larger buffer */ - uint32_t *newbuf; - p->max_inst *= 2; /* 2x larger */ - newbuf = align_malloc(p->max_inst * SPE_INST_SIZE, 16); - if (newbuf) { - memcpy(newbuf, p->store, p->num_inst * SPE_INST_SIZE); - } - align_free(p->store); - p->store = newbuf; - if (!p->store) { - /* out of memory */ - p->num_inst = 0; - return; - } - } - - p->store[p->num_inst++] = inst_bits; -} - - - -static void emit_RR(struct spe_function *p, unsigned op, int rT, - int rA, int rB, const char *name) -{ - union spe_inst_RR inst; - inst.inst.op = op; - inst.inst.rB = rB; - inst.inst.rA = rA; - inst.inst.rT = rT; - emit_instruction(p, inst.bits); - if (p->print) { - indent(p); - printf("%s\t%s, %s, %s\n", - rem_prefix(name), reg_name(rT), reg_name(rA), reg_name(rB)); - } -} - - -static void emit_RRR(struct spe_function *p, unsigned op, int rT, - int rA, int rB, int rC, const char *name) -{ - union spe_inst_RRR inst; - inst.inst.op = op; - inst.inst.rT = rT; - inst.inst.rB = rB; - inst.inst.rA = rA; - inst.inst.rC = rC; - emit_instruction(p, inst.bits); - if (p->print) { - indent(p); - printf("%s\t%s, %s, %s, %s\n", rem_prefix(name), reg_name(rT), - reg_name(rA), reg_name(rB), reg_name(rC)); - } -} - - -static void emit_RI7(struct spe_function *p, unsigned op, int rT, - int rA, int imm, const char *name) -{ - union spe_inst_RI7 inst; - inst.inst.op = op; - inst.inst.i7 = imm; - inst.inst.rA = rA; - inst.inst.rT = rT; - emit_instruction(p, inst.bits); - if (p->print) { - indent(p); - printf("%s\t%s, %s, 0x%x\n", - rem_prefix(name), reg_name(rT), reg_name(rA), imm); - } -} - - - -static void emit_RI8(struct spe_function *p, unsigned op, int rT, - int rA, int imm, const char *name) -{ - union spe_inst_RI8 inst; - inst.inst.op = op; - inst.inst.i8 = imm; - inst.inst.rA = rA; - inst.inst.rT = rT; - emit_instruction(p, inst.bits); - if (p->print) { - indent(p); - printf("%s\t%s, %s, 0x%x\n", - rem_prefix(name), reg_name(rT), reg_name(rA), imm); - } -} - - - -static void emit_RI10(struct spe_function *p, unsigned op, int rT, - int rA, int imm, const char *name) -{ - union spe_inst_RI10 inst; - inst.inst.op = op; - inst.inst.i10 = imm; - inst.inst.rA = rA; - inst.inst.rT = rT; - emit_instruction(p, inst.bits); - if (p->print) { - indent(p); - printf("%s\t%s, %s, 0x%x\n", - rem_prefix(name), reg_name(rT), reg_name(rA), imm); - } -} - - -/** As above, but do range checking on signed immediate value */ -static void emit_RI10s(struct spe_function *p, unsigned op, int rT, - int rA, int imm, const char *name) -{ - assert(imm <= 511); - assert(imm >= -512); - emit_RI10(p, op, rT, rA, imm, name); -} - - -static void emit_RI16(struct spe_function *p, unsigned op, int rT, - int imm, const char *name) -{ - union spe_inst_RI16 inst; - inst.inst.op = op; - inst.inst.i16 = imm; - inst.inst.rT = rT; - emit_instruction(p, inst.bits); - if (p->print) { - indent(p); - printf("%s\t%s, 0x%x\n", rem_prefix(name), reg_name(rT), imm); - } -} - - -static void emit_RI18(struct spe_function *p, unsigned op, int rT, - int imm, const char *name) -{ - union spe_inst_RI18 inst; - inst.inst.op = op; - inst.inst.i18 = imm; - inst.inst.rT = rT; - emit_instruction(p, inst.bits); - if (p->print) { - indent(p); - printf("%s\t%s, 0x%x\n", rem_prefix(name), reg_name(rT), imm); - } -} - - -#define EMIT(_name, _op) \ -void _name (struct spe_function *p) \ -{ \ - emit_RR(p, _op, 0, 0, 0, __FUNCTION__); \ -} - -#define EMIT_(_name, _op) \ -void _name (struct spe_function *p, int rT) \ -{ \ - emit_RR(p, _op, rT, 0, 0, __FUNCTION__); \ -} - -#define EMIT_R(_name, _op) \ -void _name (struct spe_function *p, int rT, int rA) \ -{ \ - emit_RR(p, _op, rT, rA, 0, __FUNCTION__); \ -} - -#define EMIT_RR(_name, _op) \ -void _name (struct spe_function *p, int rT, int rA, int rB) \ -{ \ - emit_RR(p, _op, rT, rA, rB, __FUNCTION__); \ -} - -#define EMIT_RRR(_name, _op) \ -void _name (struct spe_function *p, int rT, int rA, int rB, int rC) \ -{ \ - emit_RRR(p, _op, rT, rA, rB, rC, __FUNCTION__); \ -} - -#define EMIT_RI7(_name, _op) \ -void _name (struct spe_function *p, int rT, int rA, int imm) \ -{ \ - emit_RI7(p, _op, rT, rA, imm, __FUNCTION__); \ -} - -#define EMIT_RI8(_name, _op, bias) \ -void _name (struct spe_function *p, int rT, int rA, int imm) \ -{ \ - emit_RI8(p, _op, rT, rA, bias - imm, __FUNCTION__); \ -} - -#define EMIT_RI10(_name, _op) \ -void _name (struct spe_function *p, int rT, int rA, int imm) \ -{ \ - emit_RI10(p, _op, rT, rA, imm, __FUNCTION__); \ -} - -#define EMIT_RI10s(_name, _op) \ -void _name (struct spe_function *p, int rT, int rA, int imm) \ -{ \ - emit_RI10s(p, _op, rT, rA, imm, __FUNCTION__); \ -} - -#define EMIT_RI16(_name, _op) \ -void _name (struct spe_function *p, int rT, int imm) \ -{ \ - emit_RI16(p, _op, rT, imm, __FUNCTION__); \ -} - -#define EMIT_RI18(_name, _op) \ -void _name (struct spe_function *p, int rT, int imm) \ -{ \ - emit_RI18(p, _op, rT, imm, __FUNCTION__); \ -} - -#define EMIT_I16(_name, _op) \ -void _name (struct spe_function *p, int imm) \ -{ \ - emit_RI16(p, _op, 0, imm, __FUNCTION__); \ -} - -#include "rtasm_ppc_spe.h" - - - -/** - * Initialize an spe_function. - * \param code_size initial size of instruction buffer to allocate, in bytes. - * If zero, use a default. - */ -void spe_init_func(struct spe_function *p, unsigned code_size) -{ - uint i; - - if (!code_size) - code_size = 64; - - p->num_inst = 0; - p->max_inst = code_size / SPE_INST_SIZE; - p->store = align_malloc(code_size, 16); - - p->set_count = 0; - memset(p->regs, 0, SPE_NUM_REGS * sizeof(p->regs[0])); - - /* Conservatively treat R0 - R2 and R80 - R127 as non-volatile. - */ - p->regs[0] = p->regs[1] = p->regs[2] = 1; - for (i = 80; i <= 127; i++) { - p->regs[i] = 1; - } - - p->print = FALSE; - p->indent = 0; -} - - -void spe_release_func(struct spe_function *p) -{ - assert(p->num_inst <= p->max_inst); - if (p->store != NULL) { - align_free(p->store); - } - p->store = NULL; -} - - -/** Return current code size in bytes. */ -unsigned spe_code_size(const struct spe_function *p) -{ - return p->num_inst * SPE_INST_SIZE; -} - - -/** - * Allocate a SPE register. - * \return register index or -1 if none left. - */ -int spe_allocate_available_register(struct spe_function *p) -{ - unsigned i; - for (i = 0; i < SPE_NUM_REGS; i++) { - if (p->regs[i] == 0) { - p->regs[i] = 1; - return i; - } - } - - return -1; -} - - -/** - * Mark the given SPE register as "allocated". - */ -int spe_allocate_register(struct spe_function *p, int reg) -{ - assert(reg < SPE_NUM_REGS); - assert(p->regs[reg] == 0); - p->regs[reg] = 1; - return reg; -} - - -/** - * Mark the given SPE register as "unallocated". Note that this should - * only be used on registers allocated in the current register set; an - * assertion will fail if an attempt is made to deallocate a register - * allocated in an earlier register set. - */ -void spe_release_register(struct spe_function *p, int reg) -{ - assert(reg >= 0); - assert(reg < SPE_NUM_REGS); - assert(p->regs[reg] == 1); - - p->regs[reg] = 0; -} - -/** - * Start a new set of registers. This can be called if - * it will be difficult later to determine exactly what - * registers were actually allocated during a code generation - * sequence, and you really just want to deallocate all of them. - */ -void spe_allocate_register_set(struct spe_function *p) -{ - uint i; - - /* Keep track of the set count. If it ever wraps around to 0, - * we're in trouble. - */ - p->set_count++; - assert(p->set_count > 0); - - /* Increment the allocation count of all registers currently - * allocated. Then any registers that are allocated in this set - * will be the only ones with a count of 1; they'll all be released - * when the register set is released. - */ - for (i = 0; i < SPE_NUM_REGS; i++) { - if (p->regs[i] > 0) - p->regs[i]++; - } -} - -void spe_release_register_set(struct spe_function *p) -{ - uint i; - - /* If the set count drops below zero, we're in trouble. */ - assert(p->set_count > 0); - p->set_count--; - - /* Drop the allocation level of all registers. Any allocated - * during this register set will drop to 0 and then become - * available. - */ - for (i = 0; i < SPE_NUM_REGS; i++) { - if (p->regs[i] > 0) - p->regs[i]--; - } -} - - -unsigned -spe_get_registers_used(const struct spe_function *p, ubyte used[]) -{ - unsigned i, num = 0; - /* only count registers in the range available to callers */ - for (i = 2; i < 80; i++) { - if (p->regs[i]) { - used[num++] = i; - } - } - return num; -} - - -void -spe_print_code(struct spe_function *p, boolean enable) -{ - p->print = enable; -} - - -void -spe_indent(struct spe_function *p, int spaces) -{ - p->indent += spaces; -} - - -void -spe_comment(struct spe_function *p, int rel_indent, const char *s) -{ - if (p->print) { - p->indent += rel_indent; - indent(p); - p->indent -= rel_indent; - printf("# %s\n", s); - } -} - - -/** - * Load quad word. - * NOTE: offset is in bytes and the least significant 4 bits must be zero! - */ -void spe_lqd(struct spe_function *p, int rT, int rA, int offset) -{ - const boolean pSave = p->print; - - /* offset must be a multiple of 16 */ - assert(offset % 16 == 0); - /* offset must fit in 10-bit signed int field, after shifting */ - assert((offset >> 4) <= 511); - assert((offset >> 4) >= -512); - - p->print = FALSE; - emit_RI10(p, 0x034, rT, rA, offset >> 4, "spe_lqd"); - p->print = pSave; - - if (p->print) { - indent(p); - printf("lqd\t%s, %d(%s)\n", reg_name(rT), offset, reg_name(rA)); - } -} - - -/** - * Store quad word. - * NOTE: offset is in bytes and the least significant 4 bits must be zero! - */ -void spe_stqd(struct spe_function *p, int rT, int rA, int offset) -{ - const boolean pSave = p->print; - - /* offset must be a multiple of 16 */ - assert(offset % 16 == 0); - /* offset must fit in 10-bit signed int field, after shifting */ - assert((offset >> 4) <= 511); - assert((offset >> 4) >= -512); - - p->print = FALSE; - emit_RI10(p, 0x024, rT, rA, offset >> 4, "spe_stqd"); - p->print = pSave; - - if (p->print) { - indent(p); - printf("stqd\t%s, %d(%s)\n", reg_name(rT), offset, reg_name(rA)); - } -} - - -/** - * For branch instructions: - * \param d if 1, disable interupts if branch is taken - * \param e if 1, enable interupts if branch is taken - * If d and e are both zero, don't change interupt status (right?) - */ - -/** Branch Indirect to address in rA */ -void spe_bi(struct spe_function *p, int rA, int d, int e) -{ - emit_RI7(p, 0x1a8, 0, rA, (d << 5) | (e << 4), __FUNCTION__); -} - -/** Interupt Return */ -void spe_iret(struct spe_function *p, int rA, int d, int e) -{ - emit_RI7(p, 0x1aa, 0, rA, (d << 5) | (e << 4), __FUNCTION__); -} - -/** Branch indirect and set link on external data */ -void spe_bisled(struct spe_function *p, int rT, int rA, int d, - int e) -{ - emit_RI7(p, 0x1ab, rT, rA, (d << 5) | (e << 4), __FUNCTION__); -} - -/** Branch indirect and set link. Save PC in rT, jump to rA. */ -void spe_bisl(struct spe_function *p, int rT, int rA, int d, - int e) -{ - emit_RI7(p, 0x1a9, rT, rA, (d << 5) | (e << 4), __FUNCTION__); -} - -/** Branch indirect if zero word. If rT.word[0]==0, jump to rA. */ -void spe_biz(struct spe_function *p, int rT, int rA, int d, int e) -{ - emit_RI7(p, 0x128, rT, rA, (d << 5) | (e << 4), __FUNCTION__); -} - -/** Branch indirect if non-zero word. If rT.word[0]!=0, jump to rA. */ -void spe_binz(struct spe_function *p, int rT, int rA, int d, int e) -{ - emit_RI7(p, 0x129, rT, rA, (d << 5) | (e << 4), __FUNCTION__); -} - -/** Branch indirect if zero halfword. If rT.halfword[1]==0, jump to rA. */ -void spe_bihz(struct spe_function *p, int rT, int rA, int d, int e) -{ - emit_RI7(p, 0x12a, rT, rA, (d << 5) | (e << 4), __FUNCTION__); -} - -/** Branch indirect if non-zero halfword. If rT.halfword[1]!=0, jump to rA. */ -void spe_bihnz(struct spe_function *p, int rT, int rA, int d, int e) -{ - emit_RI7(p, 0x12b, rT, rA, (d << 5) | (e << 4), __FUNCTION__); -} - - -/* Hint-for-branch instructions - */ -#if 0 -hbr; -hbra; -hbrr; -#endif - - -/* Control instructions - */ -#if 0 -stop; -EMIT_RR (spe_stopd, 0x140); -EMIT_ (spe_nop, 0x201); -sync; -EMIT_ (spe_dsync, 0x003); -EMIT_R (spe_mfspr, 0x00c); -EMIT_R (spe_mtspr, 0x10c); -#endif - - -/** - ** Helper / "macro" instructions. - ** Use somewhat verbose names as a reminder that these aren't native - ** SPE instructions. - **/ - - -void -spe_load_float(struct spe_function *p, int rT, float x) -{ - if (x == 0.0f) { - spe_il(p, rT, 0x0); - } - else if (x == 0.5f) { - spe_ilhu(p, rT, 0x3f00); - } - else if (x == 1.0f) { - spe_ilhu(p, rT, 0x3f80); - } - else if (x == -1.0f) { - spe_ilhu(p, rT, 0xbf80); - } - else { - union { - float f; - unsigned u; - } bits; - bits.f = x; - spe_ilhu(p, rT, bits.u >> 16); - spe_iohl(p, rT, bits.u & 0xffff); - } -} - - -void -spe_load_int(struct spe_function *p, int rT, int i) -{ - if (-32768 <= i && i <= 32767) { - spe_il(p, rT, i); - } - else { - spe_ilhu(p, rT, i >> 16); - if (i & 0xffff) - spe_iohl(p, rT, i & 0xffff); - } -} - -void spe_load_uint(struct spe_function *p, int rT, uint ui) -{ - /* If the whole value is in the lower 18 bits, use ila, which - * doesn't sign-extend. Otherwise, if the two halfwords of - * the constant are identical, use ilh. Otherwise, if every byte of - * the desired value is 0x00 or 0xff, we can use Form Select Mask for - * Bytes Immediate (fsmbi) to load the value in a single instruction. - * Otherwise, in the general case, we have to use ilhu followed by iohl. - */ - if ((ui & 0x0003ffff) == ui) { - spe_ila(p, rT, ui); - } - else if ((ui >> 16) == (ui & 0xffff)) { - spe_ilh(p, rT, ui & 0xffff); - } - else if ( - ((ui & 0x000000ff) == 0 || (ui & 0x000000ff) == 0x000000ff) && - ((ui & 0x0000ff00) == 0 || (ui & 0x0000ff00) == 0x0000ff00) && - ((ui & 0x00ff0000) == 0 || (ui & 0x00ff0000) == 0x00ff0000) && - ((ui & 0xff000000) == 0 || (ui & 0xff000000) == 0xff000000) - ) { - uint mask = 0; - /* fsmbi duplicates each bit in the given mask eight times, - * using a 16-bit value to initialize a 16-byte quadword. - * Each 4-bit nybble of the mask corresponds to a full word - * of the result; look at the value and figure out the mask - * (replicated for each word in the quadword), and then - * form the "select mask" to get the value. - */ - if ((ui & 0x000000ff) == 0x000000ff) mask |= 0x1111; - if ((ui & 0x0000ff00) == 0x0000ff00) mask |= 0x2222; - if ((ui & 0x00ff0000) == 0x00ff0000) mask |= 0x4444; - if ((ui & 0xff000000) == 0xff000000) mask |= 0x8888; - spe_fsmbi(p, rT, mask); - } - else { - /* The general case: this usually uses two instructions, but - * may use only one if the low-order 16 bits of each word are 0. - */ - spe_ilhu(p, rT, ui >> 16); - if (ui & 0xffff) - spe_iohl(p, rT, ui & 0xffff); - } -} - -/** - * This function is constructed identically to spe_xor_uint() below. - * Changes to one should be made in the other. - */ -void -spe_and_uint(struct spe_function *p, int rT, int rA, uint ui) -{ - /* If we can, emit a single instruction, either And Byte Immediate - * (which uses the same constant across each byte), And Halfword Immediate - * (which sign-extends a 10-bit immediate to 16 bits and uses that - * across each halfword), or And Word Immediate (which sign-extends - * a 10-bit immediate to 32 bits). - * - * Otherwise, we'll need to use a temporary register. - */ - uint tmp; - - /* If the upper 23 bits are all 0s or all 1s, sign extension - * will work and we can use And Word Immediate - */ - tmp = ui & 0xfffffe00; - if (tmp == 0xfffffe00 || tmp == 0) { - spe_andi(p, rT, rA, ui & 0x000003ff); - return; - } - - /* If the ui field is symmetric along halfword boundaries and - * the upper 7 bits of each halfword are all 0s or 1s, we - * can use And Halfword Immediate - */ - tmp = ui & 0xfe00fe00; - if ((tmp == 0xfe00fe00 || tmp == 0) && ((ui >> 16) == (ui & 0x0000ffff))) { - spe_andhi(p, rT, rA, ui & 0x000003ff); - return; - } - - /* If the ui field is symmetric in each byte, then we can use - * the And Byte Immediate instruction. - */ - tmp = ui & 0x000000ff; - if ((ui >> 24) == tmp && ((ui >> 16) & 0xff) == tmp && ((ui >> 8) & 0xff) == tmp) { - spe_andbi(p, rT, rA, tmp); - return; - } - - /* Otherwise, we'll have to use a temporary register. */ - int tmp_reg = spe_allocate_available_register(p); - spe_load_uint(p, tmp_reg, ui); - spe_and(p, rT, rA, tmp_reg); - spe_release_register(p, tmp_reg); -} - - -/** - * This function is constructed identically to spe_and_uint() above. - * Changes to one should be made in the other. - */ -void -spe_xor_uint(struct spe_function *p, int rT, int rA, uint ui) -{ - /* If we can, emit a single instruction, either Exclusive Or Byte - * Immediate (which uses the same constant across each byte), Exclusive - * Or Halfword Immediate (which sign-extends a 10-bit immediate to - * 16 bits and uses that across each halfword), or Exclusive Or Word - * Immediate (which sign-extends a 10-bit immediate to 32 bits). - * - * Otherwise, we'll need to use a temporary register. - */ - uint tmp; - - /* If the upper 23 bits are all 0s or all 1s, sign extension - * will work and we can use Exclusive Or Word Immediate - */ - tmp = ui & 0xfffffe00; - if (tmp == 0xfffffe00 || tmp == 0) { - spe_xori(p, rT, rA, ui & 0x000003ff); - return; - } - - /* If the ui field is symmetric along halfword boundaries and - * the upper 7 bits of each halfword are all 0s or 1s, we - * can use Exclusive Or Halfword Immediate - */ - tmp = ui & 0xfe00fe00; - if ((tmp == 0xfe00fe00 || tmp == 0) && ((ui >> 16) == (ui & 0x0000ffff))) { - spe_xorhi(p, rT, rA, ui & 0x000003ff); - return; - } - - /* If the ui field is symmetric in each byte, then we can use - * the Exclusive Or Byte Immediate instruction. - */ - tmp = ui & 0x000000ff; - if ((ui >> 24) == tmp && ((ui >> 16) & 0xff) == tmp && ((ui >> 8) & 0xff) == tmp) { - spe_xorbi(p, rT, rA, tmp); - return; - } - - /* Otherwise, we'll have to use a temporary register. */ - int tmp_reg = spe_allocate_available_register(p); - spe_load_uint(p, tmp_reg, ui); - spe_xor(p, rT, rA, tmp_reg); - spe_release_register(p, tmp_reg); -} - -void -spe_compare_equal_uint(struct spe_function *p, int rT, int rA, uint ui) -{ - /* If the comparison value is 9 bits or less, it fits inside a - * Compare Equal Word Immediate instruction. - */ - if ((ui & 0x000001ff) == ui) { - spe_ceqi(p, rT, rA, ui); - } - /* Otherwise, we're going to have to load a word first. */ - else { - int tmp_reg = spe_allocate_available_register(p); - spe_load_uint(p, tmp_reg, ui); - spe_ceq(p, rT, rA, tmp_reg); - spe_release_register(p, tmp_reg); - } -} - -void -spe_compare_greater_uint(struct spe_function *p, int rT, int rA, uint ui) -{ - /* If the comparison value is 10 bits or less, it fits inside a - * Compare Logical Greater Than Word Immediate instruction. - */ - if ((ui & 0x000003ff) == ui) { - spe_clgti(p, rT, rA, ui); - } - /* Otherwise, we're going to have to load a word first. */ - else { - int tmp_reg = spe_allocate_available_register(p); - spe_load_uint(p, tmp_reg, ui); - spe_clgt(p, rT, rA, tmp_reg); - spe_release_register(p, tmp_reg); - } -} - -void -spe_splat(struct spe_function *p, int rT, int rA) -{ - /* Use a temporary, just in case rT == rA */ - int tmp_reg = spe_allocate_available_register(p); - /* Duplicate bytes 0, 1, 2, and 3 across the whole register */ - spe_ila(p, tmp_reg, 0x00010203); - spe_shufb(p, rT, rA, rA, tmp_reg); - spe_release_register(p, tmp_reg); -} - - -void -spe_complement(struct spe_function *p, int rT, int rA) -{ - spe_nor(p, rT, rA, rA); -} - - -void -spe_move(struct spe_function *p, int rT, int rA) -{ - /* Use different instructions depending on the instruction address - * to take advantage of the dual pipelines. - */ - if (p->num_inst & 1) - spe_shlqbyi(p, rT, rA, 0); /* odd pipe */ - else - spe_ori(p, rT, rA, 0); /* even pipe */ -} - - -void -spe_zero(struct spe_function *p, int rT) -{ - spe_xor(p, rT, rT, rT); -} - - -void -spe_splat_word(struct spe_function *p, int rT, int rA, int word) -{ - assert(word >= 0); - assert(word <= 3); - - if (word == 0) { - int tmp1 = rT; - spe_ila(p, tmp1, 66051); - spe_shufb(p, rT, rA, rA, tmp1); - } - else { - /* XXX review this, we may not need the rotqbyi instruction */ - int tmp1 = rT; - int tmp2 = spe_allocate_available_register(p); - - spe_ila(p, tmp1, 66051); - spe_rotqbyi(p, tmp2, rA, 4 * word); - spe_shufb(p, rT, tmp2, tmp2, tmp1); - - spe_release_register(p, tmp2); - } -} - -/** - * For each 32-bit float element of rA and rB, choose the smaller of the - * two, compositing them into the rT register. - * - * The Float Compare Greater Than (fcgt) instruction will put 1s into - * compare_reg where rA > rB, and 0s where rA <= rB. - * - * Then the Select Bits (selb) instruction will take bits from rA where - * compare_reg is 0, and from rB where compare_reg is 1; i.e., from rA - * where rA <= rB and from rB where rB > rA, which is exactly the - * "min" operation. - * - * The compare_reg could in many cases be the same as rT, unless - * rT == rA || rt == rB. But since this is common in constructions - * like "x = min(x, a)", we always allocate a new register to be safe. - */ -void -spe_float_min(struct spe_function *p, int rT, int rA, int rB) -{ - int compare_reg = spe_allocate_available_register(p); - spe_fcgt(p, compare_reg, rA, rB); - spe_selb(p, rT, rA, rB, compare_reg); - spe_release_register(p, compare_reg); -} - -/** - * For each 32-bit float element of rA and rB, choose the greater of the - * two, compositing them into the rT register. - * - * The logic is similar to that of spe_float_min() above; the only - * difference is that the registers on spe_selb() have been reversed, - * so that the larger of the two is selected instead of the smaller. - */ -void -spe_float_max(struct spe_function *p, int rT, int rA, int rB) -{ - int compare_reg = spe_allocate_available_register(p); - spe_fcgt(p, compare_reg, rA, rB); - spe_selb(p, rT, rB, rA, compare_reg); - spe_release_register(p, compare_reg); -} - -#endif /* GALLIUM_CELL */ diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h deleted file mode 100644 index 65d9c77415..0000000000 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h +++ /dev/null @@ -1,433 +0,0 @@ -/* - * (C) Copyright IBM Corporation 2008 - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * \file - * Real-time assembly generation interface for Cell B.E. SPEs. - * For details, see /opt/cell/sdk/docs/arch/SPU_ISA_v1.2_27Jan2007_pub.pdf - * - * \author Ian Romanick <idr@us.ibm.com> - * \author Brian Paul - */ - -#ifndef RTASM_PPC_SPE_H -#define RTASM_PPC_SPE_H - -/** 4 bytes per instruction */ -#define SPE_INST_SIZE 4 - -/** number of general-purpose SIMD registers */ -#define SPE_NUM_REGS 128 - -/** Return Address register (aka $lr / Link Register) */ -#define SPE_REG_RA 0 - -/** Stack Pointer register (aka $sp) */ -#define SPE_REG_SP 1 - - -struct spe_function -{ - uint32_t *store; /**< instruction buffer */ - uint num_inst; - uint max_inst; - - /** - * The "set count" reflects the number of nested register sets - * are allowed. In the unlikely case that we exceed the set count, - * register allocation will start to be confused, which is critical - * enough that we check for it. - */ - unsigned char set_count; - - /** - * Flags for used and unused registers. Each byte corresponds to a - * register; a 0 in that byte means that the register is available. - * A value of 1 means that the register was allocated in the current - * register set. Any other value N means that the register was allocated - * N register sets ago. - * - * \sa - * spe_allocate_register, spe_allocate_available_register, - * spe_allocate_register_set, spe_release_register_set, spe_release_register, - */ - unsigned char regs[SPE_NUM_REGS]; - - boolean print; /**< print/dump instructions as they're emitted? */ - int indent; /**< number of spaces to indent */ -}; - - -extern void spe_init_func(struct spe_function *p, uint code_size); -extern void spe_release_func(struct spe_function *p); -extern uint spe_code_size(const struct spe_function *p); - -extern int spe_allocate_available_register(struct spe_function *p); -extern int spe_allocate_register(struct spe_function *p, int reg); -extern void spe_release_register(struct spe_function *p, int reg); -extern void spe_allocate_register_set(struct spe_function *p); -extern void spe_release_register_set(struct spe_function *p); - -extern uint spe_get_registers_used(const struct spe_function *p, ubyte used[]); - -extern void spe_print_code(struct spe_function *p, boolean enable); -extern void spe_indent(struct spe_function *p, int spaces); -extern void spe_comment(struct spe_function *p, int rel_indent, const char *s); - - -#endif /* RTASM_PPC_SPE_H */ - -#ifndef EMIT -#define EMIT(_name, _op) \ - extern void _name (struct spe_function *p); -#define EMIT_(_name, _op) \ - extern void _name (struct spe_function *p, int rT); -#define EMIT_R(_name, _op) \ - extern void _name (struct spe_function *p, int rT, int rA); -#define EMIT_RR(_name, _op) \ - extern void _name (struct spe_function *p, int rT, int rA, int rB); -#define EMIT_RRR(_name, _op) \ - extern void _name (struct spe_function *p, int rT, int rA, int rB, int rC); -#define EMIT_RI7(_name, _op) \ - extern void _name (struct spe_function *p, int rT, int rA, int imm); -#define EMIT_RI8(_name, _op, bias) \ - extern void _name (struct spe_function *p, int rT, int rA, int imm); -#define EMIT_RI10(_name, _op) \ - extern void _name (struct spe_function *p, int rT, int rA, int imm); -#define EMIT_RI10s(_name, _op) \ - extern void _name (struct spe_function *p, int rT, int rA, int imm); -#define EMIT_RI16(_name, _op) \ - extern void _name (struct spe_function *p, int rT, int imm); -#define EMIT_RI18(_name, _op) \ - extern void _name (struct spe_function *p, int rT, int imm); -#define EMIT_I16(_name, _op) \ - extern void _name (struct spe_function *p, int imm); -#define UNDEF_EMIT_MACROS -#endif /* EMIT */ - - -/* Memory load / store instructions - */ -EMIT_RR (spe_lqx, 0x1c4) -EMIT_RI16(spe_lqa, 0x061) -EMIT_RI16(spe_lqr, 0x067) -EMIT_RR (spe_stqx, 0x144) -EMIT_RI16(spe_stqa, 0x041) -EMIT_RI16(spe_stqr, 0x047) -EMIT_RI7 (spe_cbd, 0x1f4) -EMIT_RR (spe_cbx, 0x1d4) -EMIT_RI7 (spe_chd, 0x1f5) -EMIT_RI7 (spe_chx, 0x1d5) -EMIT_RI7 (spe_cwd, 0x1f6) -EMIT_RI7 (spe_cwx, 0x1d6) -EMIT_RI7 (spe_cdd, 0x1f7) -EMIT_RI7 (spe_cdx, 0x1d7) - - -/* Constant formation instructions - */ -EMIT_RI16(spe_ilh, 0x083) -EMIT_RI16(spe_ilhu, 0x082) -EMIT_RI16(spe_il, 0x081) -EMIT_RI18(spe_ila, 0x021) -EMIT_RI16(spe_iohl, 0x0c1) -EMIT_RI16(spe_fsmbi, 0x065) - - - -/* Integer and logical instructions - */ -EMIT_RR (spe_ah, 0x0c8) -EMIT_RI10(spe_ahi, 0x01d) -EMIT_RR (spe_a, 0x0c0) -EMIT_RI10s(spe_ai, 0x01c) -EMIT_RR (spe_sfh, 0x048) -EMIT_RI10(spe_sfhi, 0x00d) -EMIT_RR (spe_sf, 0x040) -EMIT_RI10(spe_sfi, 0x00c) -EMIT_RR (spe_addx, 0x340) -EMIT_RR (spe_cg, 0x0c2) -EMIT_RR (spe_cgx, 0x342) -EMIT_RR (spe_sfx, 0x341) -EMIT_RR (spe_bg, 0x042) -EMIT_RR (spe_bgx, 0x343) -EMIT_RR (spe_mpy, 0x3c4) -EMIT_RR (spe_mpyu, 0x3cc) -EMIT_RI10(spe_mpyi, 0x074) -EMIT_RI10(spe_mpyui, 0x075) -EMIT_RRR (spe_mpya, 0x00c) -EMIT_RR (spe_mpyh, 0x3c5) -EMIT_RR (spe_mpys, 0x3c7) -EMIT_RR (spe_mpyhh, 0x3c6) -EMIT_RR (spe_mpyhha, 0x346) -EMIT_RR (spe_mpyhhu, 0x3ce) -EMIT_RR (spe_mpyhhau, 0x34e) -EMIT_R (spe_clz, 0x2a5) -EMIT_R (spe_cntb, 0x2b4) -EMIT_R (spe_fsmb, 0x1b6) -EMIT_R (spe_fsmh, 0x1b5) -EMIT_R (spe_fsm, 0x1b4) -EMIT_R (spe_gbb, 0x1b2) -EMIT_R (spe_gbh, 0x1b1) -EMIT_R (spe_gb, 0x1b0) -EMIT_RR (spe_avgb, 0x0d3) -EMIT_RR (spe_absdb, 0x053) -EMIT_RR (spe_sumb, 0x253) -EMIT_R (spe_xsbh, 0x2b6) -EMIT_R (spe_xshw, 0x2ae) -EMIT_R (spe_xswd, 0x2a6) -EMIT_RR (spe_and, 0x0c1) -EMIT_RR (spe_andc, 0x2c1) -EMIT_RI10s(spe_andbi, 0x016) -EMIT_RI10s(spe_andhi, 0x015) -EMIT_RI10s(spe_andi, 0x014) -EMIT_RR (spe_or, 0x041) -EMIT_RR (spe_orc, 0x2c9) -EMIT_RI10s(spe_orbi, 0x006) -EMIT_RI10s(spe_orhi, 0x005) -EMIT_RI10s(spe_ori, 0x004) -EMIT_R (spe_orx, 0x1f0) -EMIT_RR (spe_xor, 0x241) -EMIT_RI10s(spe_xorbi, 0x046) -EMIT_RI10s(spe_xorhi, 0x045) -EMIT_RI10s(spe_xori, 0x044) -EMIT_RR (spe_nand, 0x0c9) -EMIT_RR (spe_nor, 0x049) -EMIT_RR (spe_eqv, 0x249) -EMIT_RRR (spe_selb, 0x008) -EMIT_RRR (spe_shufb, 0x00b) - - -/* Shift and rotate instructions - */ -EMIT_RR (spe_shlh, 0x05f) -EMIT_RI7 (spe_shlhi, 0x07f) -EMIT_RR (spe_shl, 0x05b) -EMIT_RI7 (spe_shli, 0x07b) -EMIT_RR (spe_shlqbi, 0x1db) -EMIT_RI7 (spe_shlqbii, 0x1fb) -EMIT_RR (spe_shlqby, 0x1df) -EMIT_RI7 (spe_shlqbyi, 0x1ff) -EMIT_RR (spe_shlqbybi, 0x1cf) -EMIT_RR (spe_roth, 0x05c) -EMIT_RI7 (spe_rothi, 0x07c) -EMIT_RR (spe_rot, 0x058) -EMIT_RI7 (spe_roti, 0x078) -EMIT_RR (spe_rotqby, 0x1dc) -EMIT_RI7 (spe_rotqbyi, 0x1fc) -EMIT_RR (spe_rotqbybi, 0x1cc) -EMIT_RR (spe_rotqbi, 0x1d8) -EMIT_RI7 (spe_rotqbii, 0x1f8) -EMIT_RR (spe_rothm, 0x05d) -EMIT_RI7 (spe_rothmi, 0x07d) -EMIT_RR (spe_rotm, 0x059) -EMIT_RI7 (spe_rotmi, 0x079) -EMIT_RR (spe_rotqmby, 0x1dd) -EMIT_RI7 (spe_rotqmbyi, 0x1fd) -EMIT_RR (spe_rotqmbybi, 0x1cd) -EMIT_RR (spe_rotqmbi, 0x1c9) -EMIT_RI7 (spe_rotqmbii, 0x1f9) -EMIT_RR (spe_rotmah, 0x05e) -EMIT_RI7 (spe_rotmahi, 0x07e) -EMIT_RR (spe_rotma, 0x05a) -EMIT_RI7 (spe_rotmai, 0x07a) - - -/* Compare, branch, and halt instructions - */ -EMIT_RR (spe_heq, 0x3d8) -EMIT_RI10(spe_heqi, 0x07f) -EMIT_RR (spe_hgt, 0x258) -EMIT_RI10(spe_hgti, 0x04f) -EMIT_RR (spe_hlgt, 0x2d8) -EMIT_RI10(spe_hlgti, 0x05f) -EMIT_RR (spe_ceqb, 0x3d0) -EMIT_RI10(spe_ceqbi, 0x07e) -EMIT_RR (spe_ceqh, 0x3c8) -EMIT_RI10(spe_ceqhi, 0x07d) -EMIT_RR (spe_ceq, 0x3c0) -EMIT_RI10(spe_ceqi, 0x07c) -EMIT_RR (spe_cgtb, 0x250) -EMIT_RI10(spe_cgtbi, 0x04e) -EMIT_RR (spe_cgth, 0x248) -EMIT_RI10(spe_cgthi, 0x04d) -EMIT_RR (spe_cgt, 0x240) -EMIT_RI10(spe_cgti, 0x04c) -EMIT_RR (spe_clgtb, 0x2d0) -EMIT_RI10(spe_clgtbi, 0x05e) -EMIT_RR (spe_clgth, 0x2c8) -EMIT_RI10(spe_clgthi, 0x05d) -EMIT_RR (spe_clgt, 0x2c0) -EMIT_RI10(spe_clgti, 0x05c) -EMIT_I16 (spe_br, 0x064) -EMIT_I16 (spe_bra, 0x060) -EMIT_RI16(spe_brsl, 0x066) -EMIT_RI16(spe_brasl, 0x062) -EMIT_RI16(spe_brnz, 0x042) -EMIT_RI16(spe_brz, 0x040) -EMIT_RI16(spe_brhnz, 0x046) -EMIT_RI16(spe_brhz, 0x044) - -/* Control instructions - */ -EMIT (spe_lnop, 0x001) - -extern void -spe_lqd(struct spe_function *p, int rT, int rA, int offset); - -extern void -spe_stqd(struct spe_function *p, int rT, int rA, int offset); - -extern void spe_bi(struct spe_function *p, int rA, int d, int e); -extern void spe_iret(struct spe_function *p, int rA, int d, int e); -extern void spe_bisled(struct spe_function *p, int rT, int rA, - int d, int e); -extern void spe_bisl(struct spe_function *p, int rT, int rA, - int d, int e); -extern void spe_biz(struct spe_function *p, int rT, int rA, - int d, int e); -extern void spe_binz(struct spe_function *p, int rT, int rA, - int d, int e); -extern void spe_bihz(struct spe_function *p, int rT, int rA, - int d, int e); -extern void spe_bihnz(struct spe_function *p, int rT, int rA, - int d, int e); - - -/** Load/splat immediate float into rT. */ -extern void -spe_load_float(struct spe_function *p, int rT, float x); - -/** Load/splat immediate int into rT. */ -extern void -spe_load_int(struct spe_function *p, int rT, int i); - -/** Load/splat immediate unsigned int into rT. */ -extern void -spe_load_uint(struct spe_function *p, int rT, uint ui); - -/** And immediate value into rT. */ -extern void -spe_and_uint(struct spe_function *p, int rT, int rA, uint ui); - -/** Xor immediate value into rT. */ -extern void -spe_xor_uint(struct spe_function *p, int rT, int rA, uint ui); - -/** Compare equal with immediate value. */ -extern void -spe_compare_equal_uint(struct spe_function *p, int rT, int rA, uint ui); - -/** Compare greater with immediate value. */ -extern void -spe_compare_greater_uint(struct spe_function *p, int rT, int rA, uint ui); - -/** Replicate word 0 of rA across rT. */ -extern void -spe_splat(struct spe_function *p, int rT, int rA); - -/** rT = complement_all_bits(rA). */ -extern void -spe_complement(struct spe_function *p, int rT, int rA); - -/** rT = rA. */ -extern void -spe_move(struct spe_function *p, int rT, int rA); - -/** rT = {0,0,0,0}. */ -extern void -spe_zero(struct spe_function *p, int rT); - -/** rT = splat(rA, word) */ -extern void -spe_splat_word(struct spe_function *p, int rT, int rA, int word); - -/** rT = float min(rA, rB) */ -extern void -spe_float_min(struct spe_function *p, int rT, int rA, int rB); - -/** rT = float max(rA, rB) */ -extern void -spe_float_max(struct spe_function *p, int rT, int rA, int rB); - - -/* Floating-point instructions - */ -EMIT_RR (spe_fa, 0x2c4) -EMIT_RR (spe_dfa, 0x2cc) -EMIT_RR (spe_fs, 0x2c5) -EMIT_RR (spe_dfs, 0x2cd) -EMIT_RR (spe_fm, 0x2c6) -EMIT_RR (spe_dfm, 0x2ce) -EMIT_RRR (spe_fma, 0x00e) -EMIT_RR (spe_dfma, 0x35c) -EMIT_RRR (spe_fnms, 0x00d) -EMIT_RR (spe_dfnms, 0x35e) -EMIT_RRR (spe_fms, 0x00f) -EMIT_RR (spe_dfms, 0x35d) -EMIT_RR (spe_dfnma, 0x35f) -EMIT_R (spe_frest, 0x1b8) -EMIT_R (spe_frsqest, 0x1b9) -EMIT_RR (spe_fi, 0x3d4) -EMIT_RI8 (spe_csflt, 0x1da, 155) -EMIT_RI8 (spe_cflts, 0x1d8, 173) -EMIT_RI8 (spe_cuflt, 0x1db, 155) -EMIT_RI8 (spe_cfltu, 0x1d9, 173) -EMIT_R (spe_frds, 0x3b9) -EMIT_R (spe_fesd, 0x3b8) -EMIT_RR (spe_dfceq, 0x3c3) -EMIT_RR (spe_dfcmeq, 0x3cb) -EMIT_RR (spe_dfcgt, 0x2c3) -EMIT_RR (spe_dfcmgt, 0x2cb) -EMIT_RI7 (spe_dftsv, 0x3bf) -EMIT_RR (spe_fceq, 0x3c2) -EMIT_RR (spe_fcmeq, 0x3ca) -EMIT_RR (spe_fcgt, 0x2c2) -EMIT_RR (spe_fcmgt, 0x2ca) -EMIT_R (spe_fscrwr, 0x3ba) -EMIT_ (spe_fscrrd, 0x398) - - -/* Channel instructions - */ -EMIT_R (spe_rdch, 0x00d) -EMIT_R (spe_rdchcnt, 0x00f) -EMIT_R (spe_wrch, 0x10d) - - -#ifdef UNDEF_EMIT_MACROS -#undef EMIT -#undef EMIT_ -#undef EMIT_R -#undef EMIT_RR -#undef EMIT_RRR -#undef EMIT_RI7 -#undef EMIT_RI8 -#undef EMIT_RI10 -#undef EMIT_RI10s -#undef EMIT_RI16 -#undef EMIT_RI18 -#undef EMIT_I16 -#undef UNDEF_EMIT_MACROS -#endif /* EMIT_ */ diff --git a/src/gallium/auxiliary/target-helpers/inline_sw_helper.h b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h index 34bfa527db..596c691e9c 100644 --- a/src/gallium/auxiliary/target-helpers/inline_sw_helper.h +++ b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h @@ -8,7 +8,7 @@ /* Helper function to choose and instantiate one of the software rasterizers: - * cell, llvmpipe, softpipe. + * llvmpipe, softpipe. */ #ifdef GALLIUM_SOFTPIPE @@ -19,21 +19,12 @@ #include "llvmpipe/lp_public.h" #endif -#ifdef GALLIUM_CELL -#include "cell/ppu/cell_public.h" -#endif - static INLINE struct pipe_screen * sw_screen_create_named(struct sw_winsys *winsys, const char *driver) { struct pipe_screen *screen = NULL; -#if defined(GALLIUM_CELL) - if (screen == NULL && strcmp(driver, "cell") == 0) - screen = cell_create_screen(winsys); -#endif - #if defined(GALLIUM_LLVMPIPE) if (screen == NULL && strcmp(driver, "llvmpipe") == 0) screen = llvmpipe_create_screen(winsys); @@ -54,9 +45,7 @@ sw_screen_create(struct sw_winsys *winsys) const char *default_driver; const char *driver; -#if defined(GALLIUM_CELL) - default_driver = "cell"; -#elif defined(GALLIUM_LLVMPIPE) +#if defined(GALLIUM_LLVMPIPE) default_driver = "llvmpipe"; #elif defined(GALLIUM_SOFTPIPE) default_driver = "softpipe"; diff --git a/src/gallium/drivers/cell/Makefile b/src/gallium/drivers/cell/Makefile deleted file mode 100644 index 47aef7b05f..0000000000 --- a/src/gallium/drivers/cell/Makefile +++ /dev/null @@ -1,12 +0,0 @@ -# Cell Gallium driver Makefile - - -default: - ( cd spu ; make ) - ( cd ppu ; make ) - - - -clean: - ( cd spu ; make clean ) - ( cd ppu ; make clean ) diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h deleted file mode 100644 index a8cdde34aa..0000000000 --- a/src/gallium/drivers/cell/common.h +++ /dev/null @@ -1,377 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * Types and tokens which are common to the SPU and PPU code. - */ - - -#ifndef CELL_COMMON_H -#define CELL_COMMON_H - -#include "pipe/p_compiler.h" -#include "pipe/p_format.h" -#include "pipe/p_state.h" -#include <stdio.h> - -/** The standard assert macro doesn't seem to work reliably */ -#define ASSERT(x) \ - if (!(x)) { \ - ubyte *p = NULL; \ - fprintf(stderr, "%s:%d: %s(): assertion %s failed.\n", \ - __FILE__, __LINE__, __FUNCTION__, #x); \ - *p = 0; \ - exit(1); \ - } - - -#define JOIN(x, y) JOIN_AGAIN(x, y) -#define JOIN_AGAIN(x, y) x ## y - -#define STATIC_ASSERT(e) \ -{typedef char JOIN(assertion_failed_at_line_, __LINE__) [(e) ? 1 : -1];} - - - -/** for sanity checking */ -#define ASSERT_ALIGN16(ptr) \ - ASSERT((((unsigned long) (ptr)) & 0xf) == 0); - - -/** round up value to next multiple of 4 */ -#define ROUNDUP4(k) (((k) + 0x3) & ~0x3) - -/** round up value to next multiple of 8 */ -#define ROUNDUP8(k) (((k) + 0x7) & ~0x7) - -/** round up value to next multiple of 16 */ -#define ROUNDUP16(k) (((k) + 0xf) & ~0xf) - - -#define CELL_MAX_SPUS 8 - -#define CELL_MAX_SAMPLERS 4 -#define CELL_MAX_TEXTURE_LEVELS 12 /* 2k x 2k */ -#define CELL_MAX_CONSTANTS 32 /**< number of float[4] constants */ -#define CELL_MAX_WIDTH 1024 /**< max framebuffer width */ -#define CELL_MAX_HEIGHT 1024 /**< max framebuffer width */ - -#define TILE_SIZE 32 - - -/** - * The low byte of a mailbox word contains the command opcode. - * Remaining higher bytes are command specific. - */ -#define CELL_CMD_OPCODE_MASK 0xff - -#define CELL_CMD_EXIT 1 -#define CELL_CMD_CLEAR_SURFACE 2 -#define CELL_CMD_FINISH 3 -#define CELL_CMD_RENDER 4 -#define CELL_CMD_BATCH 5 -#define CELL_CMD_RELEASE_VERTS 6 -#define CELL_CMD_STATE_FRAMEBUFFER 10 -#define CELL_CMD_STATE_FRAGMENT_OPS 11 -#define CELL_CMD_STATE_SAMPLER 12 -#define CELL_CMD_STATE_TEXTURE 13 -#define CELL_CMD_STATE_VERTEX_INFO 14 -#define CELL_CMD_STATE_VIEWPORT 15 -#define CELL_CMD_STATE_UNIFORMS 16 -#define CELL_CMD_STATE_VS_ARRAY_INFO 17 -#define CELL_CMD_STATE_BIND_VS 18 -#define CELL_CMD_STATE_FRAGMENT_PROGRAM 19 -#define CELL_CMD_STATE_ATTRIB_FETCH 20 -#define CELL_CMD_STATE_FS_CONSTANTS 21 -#define CELL_CMD_STATE_RASTERIZER 22 -#define CELL_CMD_VS_EXECUTE 23 -#define CELL_CMD_FLUSH_BUFFER_RANGE 24 -#define CELL_CMD_FENCE 25 - - -/** Command/batch buffers */ -#define CELL_NUM_BUFFERS 4 -#define CELL_BUFFER_SIZE (4*1024) /**< 16KB would be the max */ - -#define CELL_BUFFER_STATUS_FREE 10 -#define CELL_BUFFER_STATUS_USED 20 - -/** Debug flags */ -#define CELL_DEBUG_CHECKER (1 << 0) -#define CELL_DEBUG_ASM (1 << 1) -#define CELL_DEBUG_SYNC (1 << 2) -#define CELL_DEBUG_FRAGMENT_OPS (1 << 3) -#define CELL_DEBUG_FRAGMENT_OP_FALLBACK (1 << 4) -#define CELL_DEBUG_CMD (1 << 5) -#define CELL_DEBUG_CACHE (1 << 6) - -#define CELL_FENCE_IDLE 0 -#define CELL_FENCE_EMITTED 1 -#define CELL_FENCE_SIGNALLED 2 - -#define CELL_FACING_FRONT 0 -#define CELL_FACING_BACK 1 - -struct cell_fence -{ - /** There's a 16-byte status qword per SPU */ - volatile uint status[CELL_MAX_SPUS][4]; -}; - -#ifdef __SPU__ -typedef vector unsigned int opcode_t; -#else -typedef unsigned int opcode_t[4]; -#endif - -/** - * Fence command sent to SPUs. In response, the SPUs will write - * CELL_FENCE_STATUS_SIGNALLED back to the fence status word in main memory. - */ -struct cell_command_fence -{ - opcode_t opcode; /**< CELL_CMD_FENCE */ - struct cell_fence *fence; - uint32_t pad_[3]; -}; - - -/** - * Command to specify per-fragment operations state and generated code. - * Note that this is a variant-length structure, allocated with as - * much memory as needed to hold the generated code; the "code" - * field *must* be the last field in the structure. Also, the entire - * length of the structure (including the variant code field) must be - * a multiple of 8 bytes; we require that this structure itself be - * a multiple of 8 bytes, and that the generated code also be a multiple - * of 8 bytes. - * - * Also note that the dsa, blend, blend_color fields are really only needed - * for the fallback/C per-pixel code. They're not used when we generate - * dynamic SPU fragment code (which is the normal case), and will eventually - * be removed from this structure. - */ -struct cell_command_fragment_ops -{ - opcode_t opcode; /**< CELL_CMD_STATE_FRAGMENT_OPS */ - - /* Fields for the fallback case */ - struct pipe_depth_stencil_alpha_state dsa; - struct pipe_blend_state blend; - struct pipe_blend_color blend_color; - - /* Fields for the generated SPU code */ - unsigned total_code_size; - unsigned front_code_index; - unsigned back_code_index; - /* this field has variant length, and must be the last field in - * the structure - */ - unsigned code[0]; -}; - - -/** Max instructions for fragment programs */ -#define SPU_MAX_FRAGMENT_PROGRAM_INSTS 512 - -/** - * Command to send a fragment program to SPUs. - */ -struct cell_command_fragment_program -{ - opcode_t opcode; /**< CELL_CMD_STATE_FRAGMENT_PROGRAM */ - uint num_inst; /**< Number of instructions */ - uint32_t pad[3]; - unsigned code[SPU_MAX_FRAGMENT_PROGRAM_INSTS]; -}; - - -/** - * Tell SPUs about the framebuffer size, location - */ -struct cell_command_framebuffer -{ - opcode_t opcode; /**< CELL_CMD_STATE_FRAMEBUFFER */ - int width, height; - void *color_start, *depth_start; - enum pipe_format color_format, depth_format; - uint32_t pad_[2]; -}; - - -/** - * Tell SPUs about rasterizer state. - */ -struct cell_command_rasterizer -{ - opcode_t opcode; /**< CELL_CMD_STATE_RASTERIZER */ - struct pipe_rasterizer_state rasterizer; - /*uint32_t pad[1];*/ -}; - - -/** - * Clear framebuffer to the given value/color. - */ -struct cell_command_clear_surface -{ - opcode_t opcode; /**< CELL_CMD_CLEAR_SURFACE */ - uint surface; /**< Temporary: 0=color, 1=Z */ - uint value; - uint32_t pad[2]; -}; - - -/** - * Array info used by the vertex shader's vertex puller. - */ -struct cell_array_info -{ - uint64_t base; /**< Base address of the 0th element. */ - uint attr; /**< Attribute that this state is for. */ - uint pitch; /**< Byte pitch from one entry to the next. */ - uint size; - uint function_offset; -}; - - -struct cell_attribute_fetch_code -{ - uint64_t base; - uint size; -}; - - -struct cell_buffer_range -{ - uint64_t base; - unsigned size; -}; - - -struct cell_shader_info -{ - uint64_t declarations; - uint64_t instructions; - uint64_t immediates; - - unsigned num_outputs; - unsigned num_declarations; - unsigned num_instructions; - unsigned num_immediates; -}; - - -#define SPU_VERTS_PER_BATCH 64 -struct cell_command_vs -{ - opcode_t opcode; /**< CELL_CMD_VS_EXECUTE */ - uint64_t vOut[SPU_VERTS_PER_BATCH]; - unsigned num_elts; - unsigned elts[SPU_VERTS_PER_BATCH]; - float plane[12][4]; - unsigned nr_planes; - unsigned nr_attrs; -}; - - -struct cell_command_render -{ - opcode_t opcode; /**< CELL_CMD_RENDER */ - uint prim_type; /**< PIPE_PRIM_x */ - uint num_verts; - uint vertex_size; /**< bytes per vertex */ - uint num_indexes; - uint vertex_buf; /**< which cell->buffer[] contains the vertex data */ - float xmin, ymin, xmax, ymax; /* XXX another dummy field */ - uint min_index; - boolean inline_verts; - uint32_t pad_[1]; -}; - - -struct cell_command_release_verts -{ - opcode_t opcode; /**< CELL_CMD_RELEASE_VERTS */ - uint vertex_buf; /**< in [0, CELL_NUM_BUFFERS-1] */ - uint32_t pad_[3]; -}; - - -struct cell_command_sampler -{ - opcode_t opcode; /**< CELL_CMD_STATE_SAMPLER */ - uint unit; - struct pipe_sampler_state state; - uint32_t pad_[3]; -}; - - -struct cell_command_texture -{ - opcode_t opcode; /**< CELL_CMD_STATE_TEXTURE */ - uint target; /**< PIPE_TEXTURE_x */ - uint unit; - void *start[CELL_MAX_TEXTURE_LEVELS]; /**< Address in main memory */ - ushort width[CELL_MAX_TEXTURE_LEVELS]; - ushort height[CELL_MAX_TEXTURE_LEVELS]; - ushort depth[CELL_MAX_TEXTURE_LEVELS]; -}; - - -#define MAX_SPU_FUNCTIONS 12 -/** - * Used to tell the PPU about the address of particular functions in the - * SPU's address space. - */ -struct cell_spu_function_info -{ - uint num; - char names[MAX_SPU_FUNCTIONS][16]; - uint addrs[MAX_SPU_FUNCTIONS]; - char pad[12]; /**< Pad struct to multiple of 16 bytes (256 currently) */ -}; - - -/** This is the object passed to spe_create_thread() */ -PIPE_ALIGN_TYPE(16, -struct cell_init_info -{ - unsigned id; - unsigned num_spus; - unsigned debug_flags; /**< mask of CELL_DEBUG_x flags */ - float inv_timebase; /**< 1.0/timebase, for perf measurement */ - - /** Buffers for command batches, vertex/index data */ - ubyte *buffers[CELL_NUM_BUFFERS]; - uint *buffer_status; /**< points at cell_context->buffer_status */ - - struct cell_spu_function_info *spu_functions; -}); - - -#endif /* CELL_COMMON_H */ diff --git a/src/gallium/drivers/cell/ppu/Makefile b/src/gallium/drivers/cell/ppu/Makefile deleted file mode 100644 index c92f8e5cba..0000000000 --- a/src/gallium/drivers/cell/ppu/Makefile +++ /dev/null @@ -1,86 +0,0 @@ -# Gallium3D Cell driver: PPU code - -# This makefile builds the libcell.a library which gets pulled into -# the main libGL.so library - - -TOP = ../../../../.. -include $(TOP)/configs/current - - -# This is the "top-level" cell PPU driver code, will get pulled into libGL.so -# by the winsys Makefile. -CELL_LIB = ../libcell.a - - -# This is the SPU code. We'd like to be able to put this into the libcell.a -# archive with the PPU code, but nesting .a libs doesn't seem to work. -# So, it's pulled into libGL.so in gallium/winsys/xlib/Makefile -SPU_CODE_MODULE = ../spu/g3d_spu.a - - -SOURCES = \ - cell_batch.c \ - cell_clear.c \ - cell_context.c \ - cell_draw_arrays.c \ - cell_fence.c \ - cell_flush.c \ - cell_gen_fragment.c \ - cell_gen_fp.c \ - cell_state_derived.c \ - cell_state_emit.c \ - cell_state_shader.c \ - cell_pipe_state.c \ - cell_screen.c \ - cell_state_vertex.c \ - cell_spu.c \ - cell_surface.c \ - cell_texture.c \ - cell_vbuf.c \ - cell_vertex_fetch.c \ - cell_vertex_shader.c - - -OBJECTS = $(SOURCES:.c=.o) \ - -INCLUDE_DIRS = \ - -I$(TOP)/src/mesa \ - -I$(TOP)/src/gallium/include \ - -I$(TOP)/src/gallium/auxiliary \ - -I$(TOP)/src/gallium/drivers - -.c.o: - $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $< -o $@ - - -.c.s: - $(CC) -S $(INCLUDE_DIRS) $(CFLAGS) $< -o $@ - - -default: $(CELL_LIB) - - -$(CELL_LIB): $(OBJECTS) $(SPU_CODE_MODULE) -# ar -ru $(CELL_LIB) $(OBJECTS) $(SPU_CODE_MODULE) # doesn't work - ar -ru $(CELL_LIB) $(OBJECTS) - -#$(PROG): $(PPU_OBJECTS) -# $(CC) -o $(PROG) $(PPU_OBJECTS) $(SPU_CODE_MODULE) $(PPU_LFLAGS) - - - -clean: - rm -f *.o *~ $(CELL_LIB) - - - -depend: $(SOURCES) - rm -f depend - touch depend - $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDE_DIRS) $(SOURCES) 2> /dev/null - -include depend - - - diff --git a/src/gallium/drivers/cell/ppu/cell_batch.c b/src/gallium/drivers/cell/ppu/cell_batch.c deleted file mode 100644 index fe144f8b84..0000000000 --- a/src/gallium/drivers/cell/ppu/cell_batch.c +++ /dev/null @@ -1,260 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include "cell_context.h" -#include "cell_batch.h" -#include "cell_fence.h" -#include "cell_spu.h" - - - -/** - * Search the buffer pool for an empty/free buffer and return its index. - * Buffers are used for storing vertex data, state and commands which - * will be sent to the SPUs. - * If no empty buffers are available, wait for one. - * \return buffer index in [0, CELL_NUM_BUFFERS-1] - */ -uint -cell_get_empty_buffer(struct cell_context *cell) -{ - static uint prev_buffer = 0; - uint buf = (prev_buffer + 1) % CELL_NUM_BUFFERS; - uint tries = 0; - - /* Find a buffer that's marked as free by all SPUs */ - while (1) { - uint spu, num_free = 0; - - for (spu = 0; spu < cell->num_spus; spu++) { - if (cell->buffer_status[spu][buf][0] == CELL_BUFFER_STATUS_FREE) { - num_free++; - - if (num_free == cell->num_spus) { - /* found a free buffer, now mark status as used */ - for (spu = 0; spu < cell->num_spus; spu++) { - cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_USED; - } - /* - printf("PPU: ALLOC BUFFER %u, %u tries\n", buf, tries); - */ - prev_buffer = buf; - - /* release tex buffer associated w/ prev use of this batch buf */ - cell_free_fenced_buffers(cell, &cell->fenced_buffers[buf]); - - return buf; - } - } - else { - break; - } - } - - /* try next buf */ - buf = (buf + 1) % CELL_NUM_BUFFERS; - - tries++; - if (tries == 100) { - /* - printf("PPU WAITING for buffer...\n"); - */ - } - } -} - - -/** - * Append a fence command to the current batch buffer. - * Note that we're sure there's always room for this because of the - * adjusted size check in cell_batch_free_space(). - */ -static void -emit_fence(struct cell_context *cell) -{ - const uint batch = cell->cur_batch; - const uint size = cell->buffer_size[batch]; - struct cell_command_fence *fence_cmd; - struct cell_fence *fence = &cell->fenced_buffers[batch].fence; - uint i; - - /* set fence status to emitted, not yet signalled */ - for (i = 0; i < cell->num_spus; i++) { - fence->status[i][0] = CELL_FENCE_EMITTED; - } - - STATIC_ASSERT(sizeof(struct cell_command_fence) % 16 == 0); - ASSERT(size % 16 == 0); - ASSERT(size + sizeof(struct cell_command_fence) <= CELL_BUFFER_SIZE); - - fence_cmd = (struct cell_command_fence *) (cell->buffer[batch] + size); - fence_cmd->opcode[0] = CELL_CMD_FENCE; - fence_cmd->fence = fence; - - /* update batch buffer size */ - cell->buffer_size[batch] = size + sizeof(struct cell_command_fence); -} - - -/** - * Flush the current batch buffer to the SPUs. - * An empty buffer will be found and set as the new current batch buffer - * for subsequent commands/data. - */ -void -cell_batch_flush(struct cell_context *cell) -{ - static boolean flushing = FALSE; - uint batch = cell->cur_batch; - uint size = cell->buffer_size[batch]; - uint spu, cmd_word; - - assert(!flushing); - - if (size == 0) - return; - - /* Before we use this batch buffer, make sure any fenced texture buffers - * are released. - */ - if (cell->fenced_buffers[batch].head) { - emit_fence(cell); - size = cell->buffer_size[batch]; - } - - flushing = TRUE; - - assert(batch < CELL_NUM_BUFFERS); - - /* - printf("cell_batch_dispatch: buf %u at %p, size %u\n", - batch, &cell->buffer[batch][0], size); - */ - - /* - * Build "BATCH" command and send to all SPUs. - */ - cmd_word = CELL_CMD_BATCH | (batch << 8) | (size << 16); - - for (spu = 0; spu < cell->num_spus; spu++) { - assert(cell->buffer_status[spu][batch][0] == CELL_BUFFER_STATUS_USED); - send_mbox_message(cell_global.spe_contexts[spu], cmd_word); - } - - /* When the SPUs are done copying the buffer into their locals stores - * they'll write a BUFFER_STATUS_FREE message into the buffer_status[] - * array indicating that the PPU can re-use the buffer. - */ - - batch = cell_get_empty_buffer(cell); - - cell->buffer_size[batch] = 0; /* empty */ - cell->cur_batch = batch; - - flushing = FALSE; -} - - -/** - * Return the number of bytes free in the current batch buffer. - */ -uint -cell_batch_free_space(const struct cell_context *cell) -{ - uint free = CELL_BUFFER_SIZE - cell->buffer_size[cell->cur_batch]; - free -= sizeof(struct cell_command_fence); - return free; -} - - -/** - * Allocate space in the current batch buffer for 'bytes' space. - * Bytes must be a multiple of 16 bytes. Allocation will be 16 byte aligned. - * \return address in batch buffer to put data - */ -void * -cell_batch_alloc16(struct cell_context *cell, uint bytes) -{ - void *pos; - uint size; - - ASSERT(bytes % 16 == 0); - ASSERT(bytes <= CELL_BUFFER_SIZE); - ASSERT(cell->cur_batch >= 0); - -#ifdef ASSERT - { - uint spu; - for (spu = 0; spu < cell->num_spus; spu++) { - ASSERT(cell->buffer_status[spu][cell->cur_batch][0] - == CELL_BUFFER_STATUS_USED); - } - } -#endif - - size = cell->buffer_size[cell->cur_batch]; - - if (bytes > cell_batch_free_space(cell)) { - cell_batch_flush(cell); - size = 0; - } - - ASSERT(size % 16 == 0); - ASSERT(size + bytes <= CELL_BUFFER_SIZE); - - pos = (void *) (cell->buffer[cell->cur_batch] + size); - - cell->buffer_size[cell->cur_batch] = size + bytes; - - return pos; -} - - -/** - * One-time init of batch buffers. - */ -void -cell_init_batch_buffers(struct cell_context *cell) -{ - uint spu, buf; - - /* init command, vertex/index buffer info */ - for (buf = 0; buf < CELL_NUM_BUFFERS; buf++) { - cell->buffer_size[buf] = 0; - - /* init batch buffer status values, - * mark 0th buffer as used, rest as free. - */ - for (spu = 0; spu < cell->num_spus; spu++) { - if (buf == 0) - cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_USED; - else - cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_FREE; - } - } -} diff --git a/src/gallium/drivers/cell/ppu/cell_batch.h b/src/gallium/drivers/cell/ppu/cell_batch.h deleted file mode 100644 index 290136031a..0000000000 --- a/src/gallium/drivers/cell/ppu/cell_batch.h +++ /dev/null @@ -1,54 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef CELL_BATCH_H -#define CELL_BATCH_H - -#include "pipe/p_compiler.h" - - -struct cell_context; - - -extern uint -cell_get_empty_buffer(struct cell_context *cell); - -extern void -cell_batch_flush(struct cell_context *cell); - -extern uint -cell_batch_free_space(const struct cell_context *cell); - -extern void * -cell_batch_alloc16(struct cell_context *cell, uint bytes); - -extern void -cell_init_batch_buffers(struct cell_context *cell); - - -#endif /* CELL_BATCH_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_clear.c b/src/gallium/drivers/cell/ppu/cell_clear.c deleted file mode 100644 index 6a525ef4e4..0000000000 --- a/src/gallium/drivers/cell/ppu/cell_clear.c +++ /dev/null @@ -1,93 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * Authors - * Brian Paul - */ - -#include <stdio.h> -#include <assert.h> -#include <stdint.h> -#include "util/u_inlines.h" -#include "util/u_memory.h" -#include "util/u_pack_color.h" -#include "cell/common.h" -#include "cell_clear.h" -#include "cell_context.h" -#include "cell_batch.h" -#include "cell_flush.h" -#include "cell_spu.h" -#include "cell_state.h" - - -/** - * Called via pipe->clear() - */ -void -cell_clear(struct pipe_context *pipe, unsigned buffers, - const pipe_color_union *color, - double depth, unsigned stencil) -{ - struct cell_context *cell = cell_context(pipe); - - if (cell->dirty) - cell_update_derived(cell); - - if (buffers & PIPE_CLEAR_COLOR) { - uint surfIndex = 0; - union util_color uc; - - util_pack_color(color->f, cell->framebuffer.cbufs[0]->format, &uc); - - /* Build a CLEAR command and place it in the current batch buffer */ - STATIC_ASSERT(sizeof(struct cell_command_clear_surface) % 16 == 0); - struct cell_command_clear_surface *clr - = (struct cell_command_clear_surface *) - cell_batch_alloc16(cell, sizeof(*clr)); - clr->opcode[0] = CELL_CMD_CLEAR_SURFACE; - clr->surface = surfIndex; - clr->value = uc.ui; - } - - if (buffers & PIPE_CLEAR_DEPTHSTENCIL) { - uint surfIndex = 1; - uint clearValue; - - clearValue = util_pack_z_stencil(cell->framebuffer.zsbuf->format, - depth, stencil); - - /* Build a CLEAR command and place it in the current batch buffer */ - STATIC_ASSERT(sizeof(struct cell_command_clear_surface) % 16 == 0); - struct cell_command_clear_surface *clr - = (struct cell_command_clear_surface *) - cell_batch_alloc16(cell, sizeof(*clr)); - clr->opcode[0] = CELL_CMD_CLEAR_SURFACE; - clr->surface = surfIndex; - clr->value = clearValue; - } -} diff --git a/src/gallium/drivers/cell/ppu/cell_clear.h b/src/gallium/drivers/cell/ppu/cell_clear.h deleted file mode 100644 index a365feb0f0..0000000000 --- a/src/gallium/drivers/cell/ppu/cell_clear.h +++ /dev/null @@ -1,42 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef CELL_CLEAR_H -#define CELL_CLEAR_H - - -struct pipe_context; - - -extern void -cell_clear(struct pipe_context *pipe, unsigned buffers, - const union pipe_color_union *color, - double depth, unsigned stencil); - - -#endif /* CELL_CLEAR_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c deleted file mode 100644 index 58e647a39f..0000000000 --- a/src/gallium/drivers/cell/ppu/cell_context.c +++ /dev/null @@ -1,190 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * Authors - * Brian Paul - */ - - -#include <stdio.h> - -#include "pipe/p_defines.h" -#include "pipe/p_format.h" -#include "util/u_memory.h" -#include "pipe/p_screen.h" -#include "util/u_inlines.h" - -#include "draw/draw_context.h" -#include "draw/draw_private.h" - -#include "cell/common.h" -#include "cell_batch.h" -#include "cell_clear.h" -#include "cell_context.h" -#include "cell_draw_arrays.h" -#include "cell_fence.h" -#include "cell_flush.h" -#include "cell_state.h" -#include "cell_surface.h" -#include "cell_spu.h" -#include "cell_pipe_state.h" -#include "cell_texture.h" -#include "cell_vbuf.h" - - - -static void -cell_destroy_context( struct pipe_context *pipe ) -{ - struct cell_context *cell = cell_context(pipe); - unsigned i; - - for (i = 0; i < cell->num_vertex_buffers; i++) { - pipe_resource_reference(&cell->vertex_buffer[i].buffer, NULL); - } - - util_delete_keymap(cell->fragment_ops_cache, NULL); - - cell_spu_exit(cell); - - align_free(cell); -} - - -static struct draw_context * -cell_draw_create(struct cell_context *cell) -{ - struct draw_context *draw = draw_create(&cell->pipe); - -#if 0 /* broken */ - if (getenv("GALLIUM_CELL_VS")) { - /* plug in SPU-based vertex transformation code */ - draw->shader_queue_flush = cell_vertex_shader_queue_flush; - draw->driver_private = cell; - } -#endif - - return draw; -} - - -static const struct debug_named_value cell_debug_flags[] = { - {"checker", CELL_DEBUG_CHECKER, NULL},/**< modulate tile clear color by SPU ID */ - {"asm", CELL_DEBUG_ASM, NULL}, /**< dump SPU asm code */ - {"sync", CELL_DEBUG_SYNC, NULL}, /**< SPUs do synchronous DMA */ - {"fragops", CELL_DEBUG_FRAGMENT_OPS, NULL}, /**< SPUs emit fragment ops debug messages*/ - {"fragopfallback", CELL_DEBUG_FRAGMENT_OP_FALLBACK, NULL}, /**< SPUs use reference implementation for fragment ops*/ - {"cmd", CELL_DEBUG_CMD, NULL}, /**< SPUs dump command buffer info */ - {"cache", CELL_DEBUG_CACHE, NULL}, /**< report texture cache stats on exit */ - DEBUG_NAMED_VALUE_END -}; - - -struct pipe_context * -cell_create_context(struct pipe_screen *screen, - void *priv ) -{ - struct cell_context *cell; - uint i; - - /* some fields need to be 16-byte aligned, so align the whole object */ - cell = (struct cell_context*) align_malloc(sizeof(struct cell_context), 16); - if (!cell) - return NULL; - - memset(cell, 0, sizeof(*cell)); - - cell->winsys = NULL; /* XXX: fixme - get this from screen? */ - cell->pipe.winsys = NULL; - cell->pipe.screen = screen; - cell->pipe.priv = priv; - cell->pipe.destroy = cell_destroy_context; - - cell->pipe.clear = cell_clear; - cell->pipe.flush = cell_flush; - -#if 0 - cell->pipe.begin_query = cell_begin_query; - cell->pipe.end_query = cell_end_query; - cell->pipe.wait_query = cell_wait_query; -#endif - - cell_init_draw_functions(cell); - cell_init_state_functions(cell); - cell_init_shader_functions(cell); - cell_init_surface_functions(cell); - cell_init_vertex_functions(cell); - cell_init_texture_transfer_funcs(cell); - - cell->draw = cell_draw_create(cell); - - /* Create cache of fragment ops generated code */ - cell->fragment_ops_cache = - util_new_keymap(sizeof(struct cell_fragment_ops_key), ~0, NULL); - - cell_init_vbuf(cell); - - draw_set_rasterize_stage(cell->draw, cell->vbuf); - - /* convert all points/lines to tris for the time being */ - draw_wide_point_threshold(cell->draw, 0.0); - draw_wide_line_threshold(cell->draw, 0.0); - - /* get env vars or read config file to get debug flags */ - cell->debug_flags = debug_get_flags_option("CELL_DEBUG", - cell_debug_flags, - 0 ); - - for (i = 0; i < CELL_NUM_BUFFERS; i++) - cell_fence_init(&cell->fenced_buffers[i].fence); - - - /* - * SPU stuff - */ - /* This call only works with SDK 3.0. Anyone still using 2.1??? */ - cell->num_cells = spe_cpu_info_get(SPE_COUNT_PHYSICAL_CPU_NODES, -1); - cell->num_spus = spe_cpu_info_get(SPE_COUNT_USABLE_SPES, -1); - if (cell->debug_flags) { - printf("Cell: found %d Cell(s) with %u SPUs\n", - cell->num_cells, cell->num_spus); - } - if (getenv("CELL_NUM_SPUS")) { - cell->num_spus = atoi(getenv("CELL_NUM_SPUS")); - assert(cell->num_spus > 0); - } - - cell_start_spus(cell); - - cell_init_batch_buffers(cell); - - /* make sure SPU initializations are done before proceeding */ - cell_flush_int(cell, CELL_FLUSH_WAIT); - - return &cell->pipe; -} diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h deleted file mode 100644 index d1aee62ba1..0000000000 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ /dev/null @@ -1,210 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef CELL_CONTEXT_H -#define CELL_CONTEXT_H - - -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "draw/draw_vertex.h" -#include "draw/draw_vbuf.h" -/*#include "cell_winsys.h"*/ -#include "cell/common.h" -#include "rtasm/rtasm_ppc_spe.h" -#include "tgsi/tgsi_scan.h" -#include "util/u_keymap.h" - - -struct cell_vbuf_render; - - -/** - * Cell vertex shader state, subclass of pipe_shader_state. - */ -struct cell_vertex_shader_state -{ - struct pipe_shader_state shader; - struct tgsi_shader_info info; - void *draw_data; -}; - - -/** - * Cell fragment shader state, subclass of pipe_shader_state. - */ -struct cell_fragment_shader_state -{ - struct pipe_shader_state shader; - struct tgsi_shader_info info; - struct spe_function code; - void *data; -}; - - -/** - * Key for mapping per-fragment state to cached SPU machine code. - * keymap(cell_fragment_ops_key) => cell_command_fragment_ops - */ -struct cell_fragment_ops_key -{ - struct pipe_blend_state blend; - struct pipe_blend_color blend_color; - struct pipe_depth_stencil_alpha_state dsa; - enum pipe_format color_format; - enum pipe_format zs_format; -}; - - -struct cell_buffer_node; - -/** - * Fenced buffer list. List of buffers which can be unreferenced after - * the fence has been executed/signalled. - */ -struct cell_buffer_list -{ - PIPE_ALIGN_VAR(16) struct cell_fence fence; - struct cell_buffer_node *head; -}; - -struct cell_velems_state -{ - unsigned count; - struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS]; -}; - -/** - * Per-context state, subclass of pipe_context. - */ -struct cell_context -{ - struct pipe_context pipe; - - struct cell_winsys *winsys; - - const struct pipe_blend_state *blend; - const struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS]; - uint num_samplers; - const struct pipe_depth_stencil_alpha_state *depth_stencil; - const struct pipe_rasterizer_state *rasterizer; - const struct cell_vertex_shader_state *vs; - const struct cell_fragment_shader_state *fs; - const struct cell_velems_state *velems; - - struct spe_function logic_op; - - struct pipe_blend_color blend_color; - struct pipe_stencil_ref stencil_ref; - struct pipe_clip_state clip; - struct pipe_resource *constants[2]; - struct pipe_framebuffer_state framebuffer; - struct pipe_poly_stipple poly_stipple; - struct pipe_scissor_state scissor; - struct cell_resource *texture[PIPE_MAX_SAMPLERS]; - struct pipe_sampler_view *fragment_sampler_views[PIPE_MAX_SAMPLERS]; - uint num_textures; - struct pipe_viewport_state viewport; - struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; - uint num_vertex_buffers; - struct pipe_index_buffer index_buffer; - - ubyte *cbuf_map[PIPE_MAX_COLOR_BUFS]; - ubyte *zsbuf_map; - - uint dirty; - uint dirty_textures; /* bitmask of texture units */ - uint dirty_samplers; /* bitmask of sampler units */ - - /** Cache of code generated for per-fragment ops */ - struct keymap *fragment_ops_cache; - - /** The primitive drawing context */ - struct draw_context *draw; - struct draw_stage *render_stage; - - /** For post-transformed vertex buffering: */ - struct cell_vbuf_render *vbuf_render; - struct draw_stage *vbuf; - - struct vertex_info vertex_info; - - /** Mapped constant buffers */ - const void *mapped_constants[PIPE_SHADER_TYPES]; - - PIPE_ALIGN_VAR(16) struct cell_spu_function_info spu_functions; - - uint num_cells, num_spus; - - /** Buffers for command batches, vertex/index data */ - uint buffer_size[CELL_NUM_BUFFERS]; - PIPE_ALIGN_VAR(16) ubyte buffer[CELL_NUM_BUFFERS][CELL_BUFFER_SIZE]; - - int cur_batch; /**< which buffer is being filled w/ commands */ - - /** [4] to ensure 16-byte alignment for each status word */ - PIPE_ALIGN_VAR(16) uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BUFFERS][4]; - - - /** Associated with each command/batch buffer is a list of pipe_buffers - * that are fenced. When the last command in a buffer is executed, the - * fence will be signalled, indicating that any pipe_buffers preceeding - * that fence can be unreferenced (and probably freed). - */ - struct cell_buffer_list fenced_buffers[CELL_NUM_BUFFERS]; - - - struct spe_function attrib_fetch; - unsigned attrib_fetch_offsets[PIPE_MAX_ATTRIBS]; - - unsigned debug_flags; -}; - - - - -static INLINE struct cell_context * -cell_context(struct pipe_context *pipe) -{ - return (struct cell_context *) pipe; -} - - -struct pipe_context * -cell_create_context(struct pipe_screen *screen, - void *priv ); - -extern void -cell_vertex_shader_queue_flush(struct draw_context *draw); - - -/* XXX find a better home for this */ -extern void cell_update_vertex_fetch(struct draw_context *draw); - - -#endif /* CELL_CONTEXT_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c deleted file mode 100644 index a367fa3fe1..0000000000 --- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c +++ /dev/null @@ -1,113 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Author: - * Brian Paul - * Keith Whitwell - */ - - -#include "pipe/p_defines.h" -#include "pipe/p_context.h" -#include "util/u_inlines.h" - -#include "cell_context.h" -#include "cell_draw_arrays.h" -#include "cell_state.h" -#include "cell_flush.h" -#include "cell_texture.h" - -#include "draw/draw_context.h" - - - - - - -/** - * Draw vertex arrays, with optional indexing. - * Basically, map the vertex buffers (and drawing surfaces), then hand off - * the drawing to the 'draw' module. - * - * XXX should the element buffer be specified/bound with a separate function? - */ -static void -cell_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) -{ - struct cell_context *cell = cell_context(pipe); - struct draw_context *draw = cell->draw; - void *mapped_indices = NULL; - unsigned i; - - if (cell->dirty) - cell_update_derived( cell ); - -#if 0 - cell_map_surfaces(cell); -#endif - - /* - * Map vertex buffers - */ - for (i = 0; i < cell->num_vertex_buffers; i++) { - void *buf = cell_resource(cell->vertex_buffer[i].buffer)->data; - draw_set_mapped_vertex_buffer(draw, i, buf); - } - /* Map index buffer, if present */ - if (info->indexed && cell->index_buffer.buffer) - mapped_indices = cell_resource(cell->index_buffer.buffer)->data; - - draw_set_mapped_index_buffer(draw, mapped_indices); - - /* draw! */ - draw_vbo(draw, info); - - /* - * unmap vertex/index buffers - will cause draw module to flush - */ - for (i = 0; i < cell->num_vertex_buffers; i++) { - draw_set_mapped_vertex_buffer(draw, i, NULL); - } - if (mapped_indices) { - draw_set_mapped_index_buffer(draw, NULL); - } - - /* - * TODO: Flush only when a user vertex/index buffer is present - * (or even better, modify draw module to do this - * internally when this condition is seen?) - */ - draw_flush(draw); -} - - -void -cell_init_draw_functions(struct cell_context *cell) -{ - cell->pipe.draw_vbo = cell_draw_vbo; -} - diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.h b/src/gallium/drivers/cell/ppu/cell_draw_arrays.h deleted file mode 100644 index 148873aa67..0000000000 --- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.h +++ /dev/null @@ -1,36 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef CELL_DRAW_ARRAYS_H -#define CELL_DRAW_ARRAYS_H - - -extern void -cell_init_draw_functions(struct cell_context *cell); - - -#endif /* CELL_DRAW_ARRAYS_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_fence.c b/src/gallium/drivers/cell/ppu/cell_fence.c deleted file mode 100644 index 181fef44f4..0000000000 --- a/src/gallium/drivers/cell/ppu/cell_fence.c +++ /dev/null @@ -1,172 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include <unistd.h> -#include "util/u_memory.h" -#include "util/u_inlines.h" -#include "cell_context.h" -#include "cell_batch.h" -#include "cell_fence.h" -#include "cell_texture.h" - - -void -cell_fence_init(struct cell_fence *fence) -{ - uint i; - ASSERT_ALIGN16(fence->status); - for (i = 0; i < CELL_MAX_SPUS; i++) { - fence->status[i][0] = CELL_FENCE_IDLE; - } -} - - -boolean -cell_fence_signalled(const struct cell_context *cell, - const struct cell_fence *fence) -{ - uint i; - for (i = 0; i < cell->num_spus; i++) { - if (fence->status[i][0] != CELL_FENCE_SIGNALLED) - return FALSE; - /*assert(fence->status[i][0] == CELL_FENCE_EMITTED);*/ - } - return TRUE; -} - - -boolean -cell_fence_finish(const struct cell_context *cell, - const struct cell_fence *fence, - uint64_t timeout) -{ - while (!cell_fence_signalled(cell, fence)) { - usleep(10); - } - -#ifdef DEBUG - { - uint i; - for (i = 0; i < cell->num_spus; i++) { - assert(fence->status[i][0] == CELL_FENCE_SIGNALLED); - } - } -#endif - return TRUE; -} - - - - -struct cell_buffer_node -{ - struct pipe_resource *buffer; - struct cell_buffer_node *next; -}; - - -#if 0 -static void -cell_add_buffer_to_list(struct cell_context *cell, - struct cell_buffer_list *list, - struct pipe_resource *buffer) -{ - struct cell_buffer_node *node = CALLOC_STRUCT(cell_buffer_node); - /* create new list node which references the buffer, insert at head */ - if (node) { - pipe_resource_reference(&node->buffer, buffer); - node->next = list->head; - list->head = node; - } -} -#endif - - -/** - * Wait for completion of the given fence, then unreference any buffers - * on the list. - * This typically unrefs/frees texture buffers after any rendering which uses - * them has completed. - */ -void -cell_free_fenced_buffers(struct cell_context *cell, - struct cell_buffer_list *list) -{ - if (list->head) { - /*struct pipe_screen *ps = cell->pipe.screen;*/ - struct cell_buffer_node *node; - - cell_fence_finish(cell, &list->fence); - - /* traverse the list, unreferencing buffers, freeing nodes */ - node = list->head; - while (node) { - struct cell_buffer_node *next = node->next; - assert(node->buffer); - /* XXX need this? pipe_buffer_unmap(ps, node->buffer);*/ -#if 0 - printf("Unref buffer %p\n", node->buffer); - if (node->buffer->reference.count == 1) - printf(" Delete!\n"); -#endif - pipe_resource_reference(&node->buffer, NULL); - FREE(node); - node = next; - } - list->head = NULL; - } -} - - -/** - * This should be called for each render command. - * Any texture buffers that are current bound will be added to a fenced - * list to be freed later when the fence is executed/signalled. - */ -void -cell_add_fenced_textures(struct cell_context *cell) -{ - /*struct cell_buffer_list *list = &cell->fenced_buffers[cell->cur_batch];*/ - uint i; - - for (i = 0; i < cell->num_textures; i++) { - struct cell_resource *ct = cell->texture[i]; - if (ct) { -#if 0 - printf("Adding texture %p buffer %p to list\n", - ct, ct->tiled_buffer[level]); -#endif -#if 00 - /* XXX this needs to be fixed/restored! - * Maybe keep pointers to textures, not buffers. - */ - if (ct->base.buffer) - cell_add_buffer_to_list(cell, list, ct->buffer); -#endif - } - } -} diff --git a/src/gallium/drivers/cell/ppu/cell_fence.h b/src/gallium/drivers/cell/ppu/cell_fence.h deleted file mode 100644 index 3568230b1c..0000000000 --- a/src/gallium/drivers/cell/ppu/cell_fence.h +++ /dev/null @@ -1,60 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef CELL_FENCE_H -#define CELL_FENCE_H - - -extern void -cell_fence_init(struct cell_fence *fence); - - -extern boolean -cell_fence_signalled(const struct cell_context *cell, - const struct cell_fence *fence, - unsigned flags); - - -extern boolean -cell_fence_finish(const struct cell_context *cell, - const struct cell_fence *fence, - unsigned flags, - uint64_t timeout); - - - -extern void -cell_free_fenced_buffers(struct cell_context *cell, - struct cell_buffer_list *list); - - -extern void -cell_add_fenced_textures(struct cell_context *cell); - - -#endif /* CELL_FENCE_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_flush.c b/src/gallium/drivers/cell/ppu/cell_flush.c deleted file mode 100644 index 463f4d03eb..0000000000 --- a/src/gallium/drivers/cell/ppu/cell_flush.c +++ /dev/null @@ -1,109 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include "cell_context.h" -#include "cell_batch.h" -#include "cell_flush.h" -#include "cell_spu.h" -#include "cell_render.h" -#include "draw/draw_context.h" - - -/** - * Called via pipe->flush() - */ -void -cell_flush(struct pipe_context *pipe, - struct pipe_fence_handle **fence) -{ - struct cell_context *cell = cell_context(pipe); - - if (fence) { - *fence = NULL; - } - - flags |= CELL_FLUSH_WAIT; - - draw_flush( cell->draw ); - cell_flush_int(cell, flags); -} - - -/** - * Cell internal flush function. Send the current batch buffer to all SPUs. - * If flags & CELL_FLUSH_WAIT, do not return until the SPUs are idle. - * \param flags bitmask of flags CELL_FLUSH_WAIT, or zero - */ -void -cell_flush_int(struct cell_context *cell, unsigned flags) -{ - static boolean flushing = FALSE; /* recursion catcher */ - uint i; - - ASSERT(!flushing); - flushing = TRUE; - - if (flags & CELL_FLUSH_WAIT) { - STATIC_ASSERT(sizeof(opcode_t) % 16 == 0); - opcode_t *cmd = (opcode_t*) cell_batch_alloc16(cell, sizeof(opcode_t)); - *cmd[0] = CELL_CMD_FINISH; - } - - cell_batch_flush(cell); - -#if 0 - /* Send CMD_FINISH to all SPUs */ - for (i = 0; i < cell->num_spus; i++) { - send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_FINISH); - } -#endif - - if (flags & CELL_FLUSH_WAIT) { - /* Wait for ack */ - for (i = 0; i < cell->num_spus; i++) { - uint k = wait_mbox_message(cell_global.spe_contexts[i]); - assert(k == CELL_CMD_FINISH); - } - } - - flushing = FALSE; -} - - -void -cell_flush_buffer_range(struct cell_context *cell, void *ptr, - unsigned size) -{ - STATIC_ASSERT((sizeof(opcode_t) + sizeof(struct cell_buffer_range)) % 16 == 0); - uint32_t *batch = (uint32_t*)cell_batch_alloc16(cell, - sizeof(opcode_t) + sizeof(struct cell_buffer_range)); - struct cell_buffer_range *br = (struct cell_buffer_range *) &batch[4]; - batch[0] = CELL_CMD_FLUSH_BUFFER_RANGE; - br->base = (uintptr_t) ptr; - br->size = size; -} diff --git a/src/gallium/drivers/cell/ppu/cell_flush.h b/src/gallium/drivers/cell/ppu/cell_flush.h deleted file mode 100644 index 509ae6239a..0000000000 --- a/src/gallium/drivers/cell/ppu/cell_flush.h +++ /dev/null @@ -1,45 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef CELL_FLUSH -#define CELL_FLUSH - -#define CELL_FLUSH_WAIT 0x80000000 - -extern void -cell_flush(struct pipe_context *pipe, unsigned flags, - struct pipe_fence_handle **fence); - -extern void -cell_flush_int(struct cell_context *cell, unsigned flags); - -extern void -cell_flush_buffer_range(struct cell_context *cell, void *ptr, - unsigned size); - -#endif diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c deleted file mode 100644 index 1d8a11a4ac..0000000000 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ /dev/null @@ -1,2036 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * Copyright 2009 VMware, Inc. All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - - -/** - * Generate SPU fragment program/shader code. - * - * Note that we generate SOA-style code here. So each TGSI instruction - * operates on four pixels (and is translated into four SPU instructions, - * generally speaking). - * - * \author Brian Paul - */ - -#include <math.h> -#include "pipe/p_defines.h" -#include "pipe/p_state.h" -#include "pipe/p_shader_tokens.h" -#include "tgsi/tgsi_parse.h" -#include "tgsi/tgsi_util.h" -#include "tgsi/tgsi_exec.h" -#include "tgsi/tgsi_dump.h" -#include "rtasm/rtasm_ppc_spe.h" -#include "util/u_memory.h" -#include "cell_context.h" -#include "cell_gen_fp.h" - - -#define MAX_TEMPS 16 -#define MAX_IMMED 8 - -#define CHAN_X 0 -#define CHAN_Y 1 -#define CHAN_Z 2 -#define CHAN_W 3 - -/** - * Context needed during code generation. - */ -struct codegen -{ - struct cell_context *cell; - int inputs_reg; /**< 1st function parameter */ - int outputs_reg; /**< 2nd function parameter */ - int constants_reg; /**< 3rd function parameter */ - int temp_regs[MAX_TEMPS][4]; /**< maps TGSI temps to SPE registers */ - int imm_regs[MAX_IMMED][4]; /**< maps TGSI immediates to SPE registers */ - - int num_imm; /**< number of immediates */ - - int one_reg; /**< register containing {1.0, 1.0, 1.0, 1.0} */ - - int addr_reg; /**< address register, integer values */ - - /** Per-instruction temps / intermediate temps */ - int num_itemps; - int itemps[12]; - - /** Current IF/ELSE/ENDIF nesting level */ - int if_nesting; - /** Current BGNLOOP/ENDLOOP nesting level */ - int loop_nesting; - /** Location of start of current loop */ - int loop_start; - - /** Index of if/conditional mask register */ - int cond_mask_reg; - /** Index of loop mask register */ - int loop_mask_reg; - - /** Index of master execution mask register */ - int exec_mask_reg; - - /** KIL mask: indicates which fragments have been killed */ - int kill_mask_reg; - - int frame_size; /**< Stack frame size, in words */ - - struct spe_function *f; - boolean error; -}; - - -/** - * Allocate an intermediate temporary register. - */ -static int -get_itemp(struct codegen *gen) -{ - int t = spe_allocate_available_register(gen->f); - assert(gen->num_itemps < Elements(gen->itemps)); - gen->itemps[gen->num_itemps++] = t; - return t; -} - -/** - * Free all intermediate temporary registers. To be called after each - * instruction has been emitted. - */ -static void -free_itemps(struct codegen *gen) -{ - int i; - for (i = 0; i < gen->num_itemps; i++) { - spe_release_register(gen->f, gen->itemps[i]); - } - gen->num_itemps = 0; -} - - -/** - * Return index of an SPE register containing {1.0, 1.0, 1.0, 1.0}. - * The register is allocated and initialized upon the first call. - */ -static int -get_const_one_reg(struct codegen *gen) -{ - if (gen->one_reg <= 0) { - gen->one_reg = spe_allocate_available_register(gen->f); - - spe_indent(gen->f, 4); - spe_comment(gen->f, -4, "init constant reg = 1.0:"); - - /* one = {1.0, 1.0, 1.0, 1.0} */ - spe_load_float(gen->f, gen->one_reg, 1.0f); - - spe_indent(gen->f, -4); - } - - return gen->one_reg; -} - - -/** - * Return index of the address register. - * Used for indirect register loads/stores. - */ -static int -get_address_reg(struct codegen *gen) -{ - if (gen->addr_reg <= 0) { - gen->addr_reg = spe_allocate_available_register(gen->f); - - spe_indent(gen->f, 4); - spe_comment(gen->f, -4, "init address reg = 0:"); - - /* init addr = {0, 0, 0, 0} */ - spe_zero(gen->f, gen->addr_reg); - - spe_indent(gen->f, -4); - } - - return gen->addr_reg; -} - - -/** - * Return index of the master execution mask. - * The register is allocated an initialized upon the first call. - * - * The master execution mask controls which pixels in a quad are - * modified, according to surrounding conditionals, loops, etc. - */ -static int -get_exec_mask_reg(struct codegen *gen) -{ - if (gen->exec_mask_reg <= 0) { - gen->exec_mask_reg = spe_allocate_available_register(gen->f); - - /* XXX this may not be needed */ - spe_comment(gen->f, 0*-4, "initialize master execution mask = ~0"); - spe_load_int(gen->f, gen->exec_mask_reg, ~0); - } - - return gen->exec_mask_reg; -} - - -/** Return index of the conditional (if/else) execution mask register */ -static int -get_cond_mask_reg(struct codegen *gen) -{ - if (gen->cond_mask_reg <= 0) { - gen->cond_mask_reg = spe_allocate_available_register(gen->f); - } - - return gen->cond_mask_reg; -} - - -/** Return index of the loop execution mask register */ -static int -get_loop_mask_reg(struct codegen *gen) -{ - if (gen->loop_mask_reg <= 0) { - gen->loop_mask_reg = spe_allocate_available_register(gen->f); - } - - return gen->loop_mask_reg; -} - - - -static boolean -is_register_src(struct codegen *gen, int channel, - const struct tgsi_full_src_register *src) -{ - int swizzle = tgsi_util_get_full_src_register_swizzle(src, channel); - int sign_op = tgsi_util_get_full_src_register_sign_mode(src, channel); - - if (swizzle > TGSI_SWIZZLE_W || sign_op != TGSI_UTIL_SIGN_KEEP) { - return FALSE; - } - if (src->Register.File == TGSI_FILE_TEMPORARY || - src->Register.File == TGSI_FILE_IMMEDIATE) { - return TRUE; - } - return FALSE; -} - - -static boolean -is_memory_dst(struct codegen *gen, int channel, - const struct tgsi_full_dst_register *dst) -{ - if (dst->Register.File == TGSI_FILE_OUTPUT) { - return TRUE; - } - else { - return FALSE; - } -} - - -/** - * Return the index of the SPU temporary containing the named TGSI - * source register. If the TGSI register is a TGSI_FILE_TEMPORARY we - * just return the corresponding SPE register. If the TGIS register - * is TGSI_FILE_INPUT/CONSTANT/IMMEDIATE we allocate a new SPE register - * and emit an SPE load instruction. - */ -static int -get_src_reg(struct codegen *gen, - int channel, - const struct tgsi_full_src_register *src) -{ - int reg = -1; - int swizzle = tgsi_util_get_full_src_register_swizzle(src, channel); - boolean reg_is_itemp = FALSE; - uint sign_op; - - assert(swizzle >= TGSI_SWIZZLE_X); - assert(swizzle <= TGSI_SWIZZLE_W); - - { - int index = src->Register.Index; - - assert(swizzle < 4); - - if (src->Register.Indirect) { - /* XXX unfinished */ - } - - switch (src->Register.File) { - case TGSI_FILE_TEMPORARY: - reg = gen->temp_regs[index][swizzle]; - break; - case TGSI_FILE_INPUT: - { - /* offset is measured in quadwords, not bytes */ - int offset = index * 4 + swizzle; - reg = get_itemp(gen); - reg_is_itemp = TRUE; - /* Load: reg = memory[(machine_reg) + offset] */ - spe_lqd(gen->f, reg, gen->inputs_reg, offset * 16); - } - break; - case TGSI_FILE_IMMEDIATE: - reg = gen->imm_regs[index][swizzle]; - break; - case TGSI_FILE_CONSTANT: - { - /* offset is measured in quadwords, not bytes */ - int offset = index * 4 + swizzle; - reg = get_itemp(gen); - reg_is_itemp = TRUE; - /* Load: reg = memory[(machine_reg) + offset] */ - spe_lqd(gen->f, reg, gen->constants_reg, offset * 16); - } - break; - default: - assert(0); - } - } - - /* - * Handle absolute value, negate or set-negative of src register. - */ - sign_op = tgsi_util_get_full_src_register_sign_mode(src, channel); - if (sign_op != TGSI_UTIL_SIGN_KEEP) { - /* - * All sign ops are done by manipulating bit 31, the IEEE float sign bit. - */ - const int bit31mask_reg = get_itemp(gen); - int result_reg; - - if (reg_is_itemp) { - /* re-use 'reg' for the result */ - result_reg = reg; - } - else { - /* alloc a new reg for the result */ - result_reg = get_itemp(gen); - } - - /* mask with bit 31 set, the rest cleared */ - spe_load_uint(gen->f, bit31mask_reg, (1 << 31)); - - if (sign_op == TGSI_UTIL_SIGN_CLEAR) { - spe_andc(gen->f, result_reg, reg, bit31mask_reg); - } - else if (sign_op == TGSI_UTIL_SIGN_SET) { - spe_and(gen->f, result_reg, reg, bit31mask_reg); - } - else { - assert(sign_op == TGSI_UTIL_SIGN_TOGGLE); - spe_xor(gen->f, result_reg, reg, bit31mask_reg); - } - - reg = result_reg; - } - - return reg; -} - - -/** - * Return the index of an SPE register to use for the given TGSI register. - * If the TGSI register is TGSI_FILE_TEMPORARAY, the index of the - * corresponding SPE register is returned. If the TGSI register is - * TGSI_FILE_OUTPUT we allocate an intermediate temporary register. - * See store_dest_reg() below... - */ -static int -get_dst_reg(struct codegen *gen, - int channel, - const struct tgsi_full_dst_register *dest) -{ - int reg = -1; - - switch (dest->Register.File) { - case TGSI_FILE_TEMPORARY: - if (gen->if_nesting > 0 || gen->loop_nesting > 0) - reg = get_itemp(gen); - else - reg = gen->temp_regs[dest->Register.Index][channel]; - break; - case TGSI_FILE_OUTPUT: - reg = get_itemp(gen); - break; - default: - assert(0); - } - - return reg; -} - - -/** - * When a TGSI instruction is writing to an output register, this - * function emits the SPE store instruction to store the value_reg. - * \param value_reg the SPE register containing the value to store. - * This would have been returned by get_dst_reg(). - */ -static void -store_dest_reg(struct codegen *gen, - int value_reg, int channel, - const struct tgsi_full_dst_register *dest) -{ - /* - * XXX need to implement dst reg clamping/saturation - */ -#if 0 - switch (inst->Instruction.Saturate) { - case TGSI_SAT_NONE: - break; - case TGSI_SAT_ZERO_ONE: - break; - case TGSI_SAT_MINUS_PLUS_ONE: - break; - default: - assert( 0 ); - } -#endif - - switch (dest->Register.File) { - case TGSI_FILE_TEMPORARY: - if (gen->if_nesting > 0 || gen->loop_nesting > 0) { - int d_reg = gen->temp_regs[dest->Register.Index][channel]; - int exec_reg = get_exec_mask_reg(gen); - /* Mix d with new value according to exec mask: - * d[i] = mask_reg[i] ? value_reg : d_reg - */ - spe_selb(gen->f, d_reg, d_reg, value_reg, exec_reg); - } - else { - /* we're not inside a condition or loop: do nothing special */ - - } - break; - case TGSI_FILE_OUTPUT: - { - /* offset is measured in quadwords, not bytes */ - int offset = dest->Register.Index * 4 + channel; - if (gen->if_nesting > 0 || gen->loop_nesting > 0) { - int exec_reg = get_exec_mask_reg(gen); - int curval_reg = get_itemp(gen); - /* First read the current value from memory: - * Load: curval = memory[(machine_reg) + offset] - */ - spe_lqd(gen->f, curval_reg, gen->outputs_reg, offset * 16); - /* Mix curval with newvalue according to exec mask: - * d[i] = mask_reg[i] ? value_reg : d_reg - */ - spe_selb(gen->f, curval_reg, curval_reg, value_reg, exec_reg); - /* Store: memory[(machine_reg) + offset] = curval */ - spe_stqd(gen->f, curval_reg, gen->outputs_reg, offset * 16); - } - else { - /* Store: memory[(machine_reg) + offset] = reg */ - spe_stqd(gen->f, value_reg, gen->outputs_reg, offset * 16); - } - } - break; - default: - assert(0); - } -} - - - -static void -emit_prologue(struct codegen *gen) -{ - gen->frame_size = 1024; /* XXX temporary, should be dynamic */ - - spe_comment(gen->f, 0, "Function prologue:"); - - /* save $lr on stack # stqd $lr,16($sp) */ - spe_stqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16); - - if (gen->frame_size >= 512) { - /* offset is too large for ai instruction */ - int offset_reg = spe_allocate_available_register(gen->f); - int sp_reg = spe_allocate_available_register(gen->f); - /* offset = -framesize */ - spe_load_int(gen->f, offset_reg, -gen->frame_size); - /* sp = $sp */ - spe_move(gen->f, sp_reg, SPE_REG_SP); - /* $sp = $sp + offset_reg */ - spe_a(gen->f, SPE_REG_SP, SPE_REG_SP, offset_reg); - /* save $sp in stack frame */ - spe_stqd(gen->f, sp_reg, SPE_REG_SP, 0); - /* clean up */ - spe_release_register(gen->f, offset_reg); - spe_release_register(gen->f, sp_reg); - } - else { - /* save stack pointer # stqd $sp,-frameSize($sp) */ - spe_stqd(gen->f, SPE_REG_SP, SPE_REG_SP, -gen->frame_size); - - /* adjust stack pointer # ai $sp,$sp,-frameSize */ - spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, -gen->frame_size); - } -} - - -static void -emit_epilogue(struct codegen *gen) -{ - const int return_reg = 3; - - spe_comment(gen->f, 0, "Function epilogue:"); - - spe_comment(gen->f, 0, "return the killed mask"); - if (gen->kill_mask_reg > 0) { - /* shader called KIL, return the "alive" mask */ - spe_move(gen->f, return_reg, gen->kill_mask_reg); - } - else { - /* return {0,0,0,0} */ - spe_load_uint(gen->f, return_reg, 0); - } - - spe_comment(gen->f, 0, "restore stack and return"); - if (gen->frame_size >= 512) { - /* offset is too large for ai instruction */ - int offset_reg = spe_allocate_available_register(gen->f); - /* offset = framesize */ - spe_load_int(gen->f, offset_reg, gen->frame_size); - /* $sp = $sp + offset */ - spe_a(gen->f, SPE_REG_SP, SPE_REG_SP, offset_reg); - /* clean up */ - spe_release_register(gen->f, offset_reg); - } - else { - /* restore stack pointer # ai $sp,$sp,frameSize */ - spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, gen->frame_size); - } - - /* restore $lr # lqd $lr,16($sp) */ - spe_lqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16); - - /* return from function call */ - spe_bi(gen->f, SPE_REG_RA, 0, 0); -} - - -#define FOR_EACH_ENABLED_CHANNEL(inst, ch) \ - for (ch = 0; ch < 4; ch++) \ - if (inst->Dst[0].Register.WriteMask & (1 << ch)) - - -static boolean -emit_ARL(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch = 0, src_reg, addr_reg; - - src_reg = get_src_reg(gen, ch, &inst->Src[0]); - addr_reg = get_address_reg(gen); - - /* convert float to int */ - spe_cflts(gen->f, addr_reg, src_reg, 0); - - free_itemps(gen); - - return TRUE; -} - - -static boolean -emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch, src_reg[4], dst_reg[4]; - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - src_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); - dst_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); - } - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - if (is_register_src(gen, ch, &inst->Src[0]) && - is_memory_dst(gen, ch, &inst->Dst[0])) { - /* special-case: register to memory store */ - store_dest_reg(gen, src_reg[ch], ch, &inst->Dst[0]); - } - else { - spe_move(gen->f, dst_reg[ch], src_reg[ch]); - store_dest_reg(gen, dst_reg[ch], ch, &inst->Dst[0]); - } - } - - free_itemps(gen); - - return TRUE; -} - -/** - * Emit binary operation - */ -static boolean -emit_binop(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch, s1_reg[4], s2_reg[4], d_reg[4]; - - /* Loop over Red/Green/Blue/Alpha channels, fetch src operands */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); - s2_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); - } - - /* Loop over Red/Green/Blue/Alpha channels, do the op, store results */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - /* Emit actual SPE instruction: d = s1 + s2 */ - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_ADD: - spe_fa(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); - break; - case TGSI_OPCODE_SUB: - spe_fs(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); - break; - case TGSI_OPCODE_MUL: - spe_fm(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); - break; - default: - ; - } - } - - /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); - } - - /* Free any intermediate temps we allocated */ - free_itemps(gen); - - return TRUE; -} - - -/** - * Emit multiply add. See emit_ADD for comments. - */ -static boolean -emit_MAD(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch, s1_reg[4], s2_reg[4], s3_reg[4], d_reg[4]; - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); - s2_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]); - s3_reg[ch] = get_src_reg(gen, ch, &inst->Src[2]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); - } - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - spe_fma(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch], s3_reg[ch]); - } - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); - } - free_itemps(gen); - return TRUE; -} - - -/** - * Emit linear interpolate. See emit_ADD for comments. - */ -static boolean -emit_LRP(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch, s1_reg[4], s2_reg[4], s3_reg[4], d_reg[4], tmp_reg[4]; - - /* setup/get src/dst/temp regs */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); - s2_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]); - s3_reg[ch] = get_src_reg(gen, ch, &inst->Src[2]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); - tmp_reg[ch] = get_itemp(gen); - } - - /* d = s3 + s1(s2 - s3) */ - /* do all subtracts, then all fma, then all stores to better pipeline */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - spe_fs(gen->f, tmp_reg[ch], s2_reg[ch], s3_reg[ch]); - } - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - spe_fma(gen->f, d_reg[ch], tmp_reg[ch], s1_reg[ch], s3_reg[ch]); - } - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); - } - free_itemps(gen); - return TRUE; -} - - - -/** - * Emit reciprocal or recip sqrt. - */ -static boolean -emit_RCP_RSQ(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch, s1_reg[4], d_reg[4], tmp_reg[4]; - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); - tmp_reg[ch] = get_itemp(gen); - } - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - if (inst->Instruction.Opcode == TGSI_OPCODE_RCP) { - /* tmp = 1/s1 */ - spe_frest(gen->f, tmp_reg[ch], s1_reg[ch]); - } - else { - /* tmp = 1/sqrt(s1) */ - spe_frsqest(gen->f, tmp_reg[ch], s1_reg[ch]); - } - } - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - /* d = float_interp(s1, tmp) */ - spe_fi(gen->f, d_reg[ch], s1_reg[ch], tmp_reg[ch]); - } - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); - } - - free_itemps(gen); - return TRUE; -} - - -/** - * Emit absolute value. See emit_ADD for comments. - */ -static boolean -emit_ABS(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch, s1_reg[4], d_reg[4]; - const int bit31mask_reg = get_itemp(gen); - - /* mask with bit 31 set, the rest cleared */ - spe_load_uint(gen->f, bit31mask_reg, (1 << 31)); - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); - } - - /* d = sign bit cleared in s1 */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - spe_andc(gen->f, d_reg[ch], s1_reg[ch], bit31mask_reg); - } - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); - } - - free_itemps(gen); - return TRUE; -} - -/** - * Emit 3 component dot product. See emit_ADD for comments. - */ -static boolean -emit_DP3(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch; - int s1x_reg, s1y_reg, s1z_reg; - int s2x_reg, s2y_reg, s2z_reg; - int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen); - - s1x_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]); - s2x_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]); - s1y_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]); - s2y_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]); - s1z_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]); - s2z_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]); - - /* t0 = x0 * x1 */ - spe_fm(gen->f, t0_reg, s1x_reg, s2x_reg); - - /* t1 = y0 * y1 */ - spe_fm(gen->f, t1_reg, s1y_reg, s2y_reg); - - /* t0 = z0 * z1 + t0 */ - spe_fma(gen->f, t0_reg, s1z_reg, s2z_reg, t0_reg); - - /* t0 = t0 + t1 */ - spe_fa(gen->f, t0_reg, t0_reg, t1_reg); - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]); - spe_move(gen->f, d_reg, t0_reg); - store_dest_reg(gen, d_reg, ch, &inst->Dst[0]); - } - - free_itemps(gen); - return TRUE; -} - -/** - * Emit 4 component dot product. See emit_ADD for comments. - */ -static boolean -emit_DP4(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch; - int s0x_reg, s0y_reg, s0z_reg, s0w_reg; - int s1x_reg, s1y_reg, s1z_reg, s1w_reg; - int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen); - - s0x_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]); - s1x_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]); - s0y_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]); - s1y_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]); - s0z_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]); - s1z_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]); - s0w_reg = get_src_reg(gen, CHAN_W, &inst->Src[0]); - s1w_reg = get_src_reg(gen, CHAN_W, &inst->Src[1]); - - /* t0 = x0 * x1 */ - spe_fm(gen->f, t0_reg, s0x_reg, s1x_reg); - - /* t1 = y0 * y1 */ - spe_fm(gen->f, t1_reg, s0y_reg, s1y_reg); - - /* t0 = z0 * z1 + t0 */ - spe_fma(gen->f, t0_reg, s0z_reg, s1z_reg, t0_reg); - - /* t1 = w0 * w1 + t1 */ - spe_fma(gen->f, t1_reg, s0w_reg, s1w_reg, t1_reg); - - /* t0 = t0 + t1 */ - spe_fa(gen->f, t0_reg, t0_reg, t1_reg); - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]); - spe_move(gen->f, d_reg, t0_reg); - store_dest_reg(gen, d_reg, ch, &inst->Dst[0]); - } - - free_itemps(gen); - return TRUE; -} - -/** - * Emit homogeneous dot product. See emit_ADD for comments. - */ -static boolean -emit_DPH(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - /* XXX rewrite this function to look more like DP3/DP4 */ - int ch; - int s1_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]); - int s2_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]); - int tmp_reg = get_itemp(gen); - - /* t = x0 * x1 */ - spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); - - s1_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]); - s2_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]); - /* t = y0 * y1 + t */ - spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); - - s1_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]); - s2_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]); - /* t = z0 * z1 + t */ - spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); - - s2_reg = get_src_reg(gen, CHAN_W, &inst->Src[1]); - /* t = w1 + t */ - spe_fa(gen->f, tmp_reg, s2_reg, tmp_reg); - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]); - spe_move(gen->f, d_reg, tmp_reg); - store_dest_reg(gen, tmp_reg, ch, &inst->Dst[0]); - } - - free_itemps(gen); - return TRUE; -} - -/** - * Emit 3-component vector normalize. - */ -static boolean -emit_NRM3(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch; - int src_reg[3]; - int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen); - - src_reg[0] = get_src_reg(gen, CHAN_X, &inst->Src[0]); - src_reg[1] = get_src_reg(gen, CHAN_Y, &inst->Src[0]); - src_reg[2] = get_src_reg(gen, CHAN_Z, &inst->Src[0]); - - /* t0 = x * x */ - spe_fm(gen->f, t0_reg, src_reg[0], src_reg[0]); - - /* t1 = y * y */ - spe_fm(gen->f, t1_reg, src_reg[1], src_reg[1]); - - /* t0 = z * z + t0 */ - spe_fma(gen->f, t0_reg, src_reg[2], src_reg[2], t0_reg); - - /* t0 = t0 + t1 */ - spe_fa(gen->f, t0_reg, t0_reg, t1_reg); - - /* t1 = 1.0 / sqrt(t0) */ - spe_frsqest(gen->f, t1_reg, t0_reg); - spe_fi(gen->f, t1_reg, t0_reg, t1_reg); - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]); - /* dst = src[ch] * t1 */ - spe_fm(gen->f, d_reg, src_reg[ch], t1_reg); - store_dest_reg(gen, d_reg, ch, &inst->Dst[0]); - } - - free_itemps(gen); - return TRUE; -} - - -/** - * Emit cross product. See emit_ADD for comments. - */ -static boolean -emit_XPD(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int s1_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]); - int s2_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]); - int tmp_reg = get_itemp(gen); - - /* t = z0 * y1 */ - spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); - - s1_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]); - s2_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]); - /* t = y0 * z1 - t */ - spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); - - if (inst->Dst[0].Register.WriteMask & (1 << CHAN_X)) { - store_dest_reg(gen, tmp_reg, CHAN_X, &inst->Dst[0]); - } - - s1_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]); - s2_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]); - /* t = x0 * z1 */ - spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); - - s1_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]); - s2_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]); - /* t = z0 * x1 - t */ - spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); - - if (inst->Dst[0].Register.WriteMask & (1 << CHAN_Y)) { - store_dest_reg(gen, tmp_reg, CHAN_Y, &inst->Dst[0]); - } - - s1_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]); - s2_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]); - /* t = y0 * x1 */ - spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); - - s1_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]); - s2_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]); - /* t = x0 * y1 - t */ - spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); - - if (inst->Dst[0].Register.WriteMask & (1 << CHAN_Z)) { - store_dest_reg(gen, tmp_reg, CHAN_Z, &inst->Dst[0]); - } - - free_itemps(gen); - return TRUE; -} - - -/** - * Emit inequality instruction. - * Note that the SPE fcgt instruction produces 0x0 and 0xffffffff as - * the result but OpenGL/TGSI needs 0.0 and 1.0 results. - * We can easily convert 0x0/0xffffffff to 0.0/1.0 with a bitwise AND. - */ -static boolean -emit_inequality(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch, s1_reg[4], s2_reg[4], d_reg[4], one_reg; - boolean complement = FALSE; - - one_reg = get_const_one_reg(gen); - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); - s2_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); - } - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_SGT: - spe_fcgt(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); - break; - case TGSI_OPCODE_SLT: - spe_fcgt(gen->f, d_reg[ch], s2_reg[ch], s1_reg[ch]); - break; - case TGSI_OPCODE_SGE: - spe_fcgt(gen->f, d_reg[ch], s2_reg[ch], s1_reg[ch]); - complement = TRUE; - break; - case TGSI_OPCODE_SLE: - spe_fcgt(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); - complement = TRUE; - break; - case TGSI_OPCODE_SEQ: - spe_fceq(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); - break; - case TGSI_OPCODE_SNE: - spe_fceq(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); - complement = TRUE; - break; - default: - assert(0); - } - } - - /* convert d from 0x0/0xffffffff to 0.0/1.0 */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - /* d = d & one_reg */ - if (complement) - spe_andc(gen->f, d_reg[ch], one_reg, d_reg[ch]); - else - spe_and(gen->f, d_reg[ch], one_reg, d_reg[ch]); - } - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); - } - - free_itemps(gen); - return TRUE; -} - - -/** - * Emit compare. - */ -static boolean -emit_CMP(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch; - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - int s1_reg = get_src_reg(gen, ch, &inst->Src[0]); - int s2_reg = get_src_reg(gen, ch, &inst->Src[1]); - int s3_reg = get_src_reg(gen, ch, &inst->Src[2]); - int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]); - int zero_reg = get_itemp(gen); - - spe_zero(gen->f, zero_reg); - - /* d = (s1 < 0) ? s2 : s3 */ - spe_fcgt(gen->f, d_reg, zero_reg, s1_reg); - spe_selb(gen->f, d_reg, s3_reg, s2_reg, d_reg); - - store_dest_reg(gen, d_reg, ch, &inst->Dst[0]); - free_itemps(gen); - } - - return TRUE; -} - -/** - * Emit trunc. - * Convert float to signed int - * Convert signed int to float - */ -static boolean -emit_TRUNC(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch, s1_reg[4], d_reg[4]; - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); - } - - /* Convert float to int */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - spe_cflts(gen->f, d_reg[ch], s1_reg[ch], 0); - } - - /* Convert int to float */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - spe_csflt(gen->f, d_reg[ch], d_reg[ch], 0); - } - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); - } - - free_itemps(gen); - return TRUE; -} - - -/** - * Emit floor. - * If negative int subtract one - * Convert float to signed int - * Convert signed int to float - */ -static boolean -emit_FLR(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch, s1_reg[4], d_reg[4], tmp_reg[4], zero_reg, one_reg; - - zero_reg = get_itemp(gen); - spe_zero(gen->f, zero_reg); - one_reg = get_const_one_reg(gen); - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); - tmp_reg[ch] = get_itemp(gen); - } - - /* If negative, subtract 1.0 */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - spe_fcgt(gen->f, tmp_reg[ch], zero_reg, s1_reg[ch]); - } - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - spe_selb(gen->f, tmp_reg[ch], zero_reg, one_reg, tmp_reg[ch]); - } - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - spe_fs(gen->f, tmp_reg[ch], s1_reg[ch], tmp_reg[ch]); - } - - /* Convert float to int */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - spe_cflts(gen->f, tmp_reg[ch], tmp_reg[ch], 0); - } - - /* Convert int to float */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - spe_csflt(gen->f, d_reg[ch], tmp_reg[ch], 0); - } - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); - } - - free_itemps(gen); - return TRUE; -} - - -/** - * Compute frac = Input - FLR(Input) - */ -static boolean -emit_FRC(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch, s1_reg[4], d_reg[4], tmp_reg[4], zero_reg, one_reg; - - zero_reg = get_itemp(gen); - spe_zero(gen->f, zero_reg); - one_reg = get_const_one_reg(gen); - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); - tmp_reg[ch] = get_itemp(gen); - } - - /* If negative, subtract 1.0 */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - spe_fcgt(gen->f, tmp_reg[ch], zero_reg, s1_reg[ch]); - } - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - spe_selb(gen->f, tmp_reg[ch], zero_reg, one_reg, tmp_reg[ch]); - } - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - spe_fs(gen->f, tmp_reg[ch], s1_reg[ch], tmp_reg[ch]); - } - - /* Convert float to int */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - spe_cflts(gen->f, tmp_reg[ch], tmp_reg[ch], 0); - } - - /* Convert int to float */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - spe_csflt(gen->f, tmp_reg[ch], tmp_reg[ch], 0); - } - - /* d = s1 - FLR(s1) */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - spe_fs(gen->f, d_reg[ch], s1_reg[ch], tmp_reg[ch]); - } - - /* store result */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); - } - - free_itemps(gen); - return TRUE; -} - - -#if 0 -static void -print_functions(struct cell_context *cell) -{ - struct cell_spu_function_info *funcs = &cell->spu_functions; - uint i; - for (i = 0; i < funcs->num; i++) { - printf("SPU func %u: %s at %u\n", - i, funcs->names[i], funcs->addrs[i]); - } -} -#endif - - -static uint -lookup_function(struct cell_context *cell, const char *funcname) -{ - const struct cell_spu_function_info *funcs = &cell->spu_functions; - uint i, addr = 0; - for (i = 0; i < funcs->num; i++) { - if (strcmp(funcs->names[i], funcname) == 0) { - addr = funcs->addrs[i]; - } - } - assert(addr && "spu function not found"); - return addr / 4; /* discard 2 least significant bits */ -} - - -/** - * Emit code to call a SPU function. - * Used to implement instructions like SIN/COS/POW/TEX/etc. - * If scalar, only the X components of the src regs are used, and the - * result is replicated across the dest register's XYZW components. - */ -static boolean -emit_function_call(struct codegen *gen, - const struct tgsi_full_instruction *inst, - char *funcname, uint num_args, boolean scalar) -{ - const uint addr = lookup_function(gen->cell, funcname); - char comment[100]; - int s_regs[3]; - int func_called = FALSE; - uint a, ch; - int retval_reg = -1; - - assert(num_args <= 3); - - snprintf(comment, sizeof(comment), "CALL %s:", funcname); - spe_comment(gen->f, -4, comment); - - if (scalar) { - for (a = 0; a < num_args; a++) { - s_regs[a] = get_src_reg(gen, CHAN_X, &inst->Src[a]); - } - /* we'll call the function, put the return value in this register, - * then replicate it across all write-enabled components in d_reg. - */ - retval_reg = spe_allocate_available_register(gen->f); - } - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - int d_reg; - ubyte usedRegs[SPE_NUM_REGS]; - uint i, numUsed; - - if (!scalar) { - for (a = 0; a < num_args; a++) { - s_regs[a] = get_src_reg(gen, ch, &inst->Src[a]); - } - } - - d_reg = get_dst_reg(gen, ch, &inst->Dst[0]); - - if (!scalar || !func_called) { - /* for a scalar function, we'll really only call the function once */ - - numUsed = spe_get_registers_used(gen->f, usedRegs); - assert(numUsed < gen->frame_size / 16 - 2); - - /* save registers to stack */ - for (i = 0; i < numUsed; i++) { - uint reg = usedRegs[i]; - int offset = 2 + i; - spe_stqd(gen->f, reg, SPE_REG_SP, 16 * offset); - } - - /* setup function arguments */ - for (a = 0; a < num_args; a++) { - spe_move(gen->f, 3 + a, s_regs[a]); - } - - /* branch to function, save return addr */ - spe_brasl(gen->f, SPE_REG_RA, addr); - - /* save function's return value */ - if (scalar) - spe_move(gen->f, retval_reg, 3); - else - spe_move(gen->f, d_reg, 3); - - /* restore registers from stack */ - for (i = 0; i < numUsed; i++) { - uint reg = usedRegs[i]; - if (reg != d_reg && reg != retval_reg) { - int offset = 2 + i; - spe_lqd(gen->f, reg, SPE_REG_SP, 16 * offset); - } - } - - func_called = TRUE; - } - - if (scalar) { - spe_move(gen->f, d_reg, retval_reg); - } - - store_dest_reg(gen, d_reg, ch, &inst->Dst[0]); - free_itemps(gen); - } - - if (scalar) { - spe_release_register(gen->f, retval_reg); - } - - return TRUE; -} - - -static boolean -emit_TEX(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - const uint target = inst->Texture.Texture; - const uint unit = inst->Src[1].Register.Index; - uint addr; - int ch; - int coord_regs[4], d_regs[4]; - - switch (target) { - case TGSI_TEXTURE_1D: - case TGSI_TEXTURE_2D: - addr = lookup_function(gen->cell, "spu_tex_2d"); - break; - case TGSI_TEXTURE_3D: - addr = lookup_function(gen->cell, "spu_tex_3d"); - break; - case TGSI_TEXTURE_CUBE: - addr = lookup_function(gen->cell, "spu_tex_cube"); - break; - default: - ASSERT(0 && "unsupported texture target"); - return FALSE; - } - - assert(inst->Src[1].Register.File == TGSI_FILE_SAMPLER); - - spe_comment(gen->f, -4, "CALL tex:"); - - /* get src/dst reg info */ - for (ch = 0; ch < 4; ch++) { - coord_regs[ch] = get_src_reg(gen, ch, &inst->Src[0]); - d_regs[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); - } - - { - ubyte usedRegs[SPE_NUM_REGS]; - uint i, numUsed; - - numUsed = spe_get_registers_used(gen->f, usedRegs); - assert(numUsed < gen->frame_size / 16 - 2); - - /* save registers to stack */ - for (i = 0; i < numUsed; i++) { - uint reg = usedRegs[i]; - int offset = 2 + i; - spe_stqd(gen->f, reg, SPE_REG_SP, 16 * offset); - } - - /* setup function arguments (XXX depends on target) */ - for (i = 0; i < 4; i++) { - spe_move(gen->f, 3 + i, coord_regs[i]); - } - spe_load_uint(gen->f, 7, unit); /* sampler unit */ - - /* branch to function, save return addr */ - spe_brasl(gen->f, SPE_REG_RA, addr); - - /* save function's return values (four pixel's colors) */ - for (i = 0; i < 4; i++) { - spe_move(gen->f, d_regs[i], 3 + i); - } - - /* restore registers from stack */ - for (i = 0; i < numUsed; i++) { - uint reg = usedRegs[i]; - if (reg != d_regs[0] && - reg != d_regs[1] && - reg != d_regs[2] && - reg != d_regs[3]) { - int offset = 2 + i; - spe_lqd(gen->f, reg, SPE_REG_SP, 16 * offset); - } - } - } - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_regs[ch], ch, &inst->Dst[0]); - free_itemps(gen); - } - - return TRUE; -} - - -/** - * KILL if any of src reg values are less than zero. - */ -static boolean -emit_KIL(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch; - int s_regs[4], kil_reg = -1, cmp_reg, zero_reg; - - spe_comment(gen->f, -4, "CALL kil:"); - - /* zero = {0,0,0,0} */ - zero_reg = get_itemp(gen); - spe_zero(gen->f, zero_reg); - - cmp_reg = get_itemp(gen); - - /* get src regs */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s_regs[ch] = get_src_reg(gen, ch, &inst->Src[0]); - } - - /* test if any src regs are < 0 */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - if (kil_reg >= 0) { - /* cmp = 0 > src ? : ~0 : 0 */ - spe_fcgt(gen->f, cmp_reg, zero_reg, s_regs[ch]); - /* kil = kil | cmp */ - spe_or(gen->f, kil_reg, kil_reg, cmp_reg); - } - else { - kil_reg = get_itemp(gen); - /* kil = 0 > src ? : ~0 : 0 */ - spe_fcgt(gen->f, kil_reg, zero_reg, s_regs[ch]); - } - } - - if (gen->if_nesting || gen->loop_nesting) { - /* may have been a conditional kil */ - spe_and(gen->f, kil_reg, kil_reg, gen->exec_mask_reg); - } - - /* allocate the kill mask reg if needed */ - if (gen->kill_mask_reg <= 0) { - gen->kill_mask_reg = spe_allocate_available_register(gen->f); - spe_move(gen->f, gen->kill_mask_reg, kil_reg); - } - else { - spe_or(gen->f, gen->kill_mask_reg, gen->kill_mask_reg, kil_reg); - } - - free_itemps(gen); - - return TRUE; -} - - - -/** - * Emit min or max. - */ -static boolean -emit_MIN_MAX(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch, s0_reg[4], s1_reg[4], d_reg[4], tmp_reg[4]; - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s0_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); - s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); - tmp_reg[ch] = get_itemp(gen); - } - - /* d = (s0 > s1) ? s0 : s1 */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - if (inst->Instruction.Opcode == TGSI_OPCODE_MAX) - spe_fcgt(gen->f, tmp_reg[ch], s0_reg[ch], s1_reg[ch]); - else - spe_fcgt(gen->f, tmp_reg[ch], s1_reg[ch], s0_reg[ch]); - } - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - spe_selb(gen->f, d_reg[ch], s1_reg[ch], s0_reg[ch], tmp_reg[ch]); - } - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); - } - - free_itemps(gen); - return TRUE; -} - - -/** - * Emit code to update the execution mask. - * This needs to be done whenever the execution status of a conditional - * or loop is changed. - */ -static void -emit_update_exec_mask(struct codegen *gen) -{ - const int exec_reg = get_exec_mask_reg(gen); - const int cond_reg = gen->cond_mask_reg; - const int loop_reg = gen->loop_mask_reg; - - spe_comment(gen->f, 0, "Update master execution mask"); - - if (gen->if_nesting > 0 && gen->loop_nesting > 0) { - /* exec_mask = cond_mask & loop_mask */ - assert(cond_reg > 0); - assert(loop_reg > 0); - spe_and(gen->f, exec_reg, cond_reg, loop_reg); - } - else if (gen->if_nesting > 0) { - assert(cond_reg > 0); - spe_move(gen->f, exec_reg, cond_reg); - } - else if (gen->loop_nesting > 0) { - assert(loop_reg > 0); - spe_move(gen->f, exec_reg, loop_reg); - } - else { - spe_load_int(gen->f, exec_reg, ~0x0); - } -} - - -static boolean -emit_IF(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - const int channel = 0; - int cond_reg; - - cond_reg = get_cond_mask_reg(gen); - - /* XXX push cond exec mask */ - - spe_comment(gen->f, 0, "init conditional exec mask = ~0:"); - spe_load_int(gen->f, cond_reg, ~0); - - /* update conditional execution mask with the predicate register */ - int tmp_reg = get_itemp(gen); - int s1_reg = get_src_reg(gen, channel, &inst->Src[0]); - - /* tmp = (s1_reg == 0) */ - spe_ceqi(gen->f, tmp_reg, s1_reg, 0); - /* tmp = !tmp */ - spe_complement(gen->f, tmp_reg, tmp_reg); - /* cond_mask = cond_mask & tmp */ - spe_and(gen->f, cond_reg, cond_reg, tmp_reg); - - gen->if_nesting++; - - /* update the master execution mask */ - emit_update_exec_mask(gen); - - free_itemps(gen); - - return TRUE; -} - - -static boolean -emit_ELSE(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - const int cond_reg = get_cond_mask_reg(gen); - - spe_comment(gen->f, 0, "cond exec mask = !cond exec mask"); - spe_complement(gen->f, cond_reg, cond_reg); - emit_update_exec_mask(gen); - - return TRUE; -} - - -static boolean -emit_ENDIF(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - /* XXX todo: pop cond exec mask */ - - gen->if_nesting--; - - emit_update_exec_mask(gen); - - return TRUE; -} - - -static boolean -emit_BGNLOOP(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int exec_reg, loop_reg; - - exec_reg = get_exec_mask_reg(gen); - loop_reg = get_loop_mask_reg(gen); - - /* XXX push loop_exec mask */ - - spe_comment(gen->f, 0*-4, "initialize loop exec mask = ~0"); - spe_load_int(gen->f, loop_reg, ~0x0); - - gen->loop_nesting++; - gen->loop_start = spe_code_size(gen->f); /* in bytes */ - - return TRUE; -} - - -static boolean -emit_ENDLOOP(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - const int loop_reg = get_loop_mask_reg(gen); - const int tmp_reg = get_itemp(gen); - int offset; - - /* tmp_reg = exec[0] | exec[1] | exec[2] | exec[3] */ - spe_orx(gen->f, tmp_reg, loop_reg); - - offset = gen->loop_start - spe_code_size(gen->f); /* in bytes */ - - /* branch back to top of loop if tmp_reg != 0 */ - spe_brnz(gen->f, tmp_reg, offset / 4); - - /* XXX pop loop_exec mask */ - - gen->loop_nesting--; - - emit_update_exec_mask(gen); - - return TRUE; -} - - -static boolean -emit_BRK(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - const int exec_reg = get_exec_mask_reg(gen); - const int loop_reg = get_loop_mask_reg(gen); - - assert(gen->loop_nesting > 0); - - spe_comment(gen->f, 0, "loop exec mask &= ~master exec mask"); - spe_andc(gen->f, loop_reg, loop_reg, exec_reg); - - emit_update_exec_mask(gen); - - return TRUE; -} - - -static boolean -emit_CONT(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - assert(gen->loop_nesting > 0); - - return TRUE; -} - - -static boolean -emit_DDX_DDY(struct codegen *gen, const struct tgsi_full_instruction *inst, - boolean ddx) -{ - int ch; - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - int s_reg = get_src_reg(gen, ch, &inst->Src[0]); - int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]); - - int t1_reg = get_itemp(gen); - int t2_reg = get_itemp(gen); - - spe_splat_word(gen->f, t1_reg, s_reg, 0); /* upper-left pixel */ - if (ddx) { - spe_splat_word(gen->f, t2_reg, s_reg, 1); /* upper-right pixel */ - } - else { - spe_splat_word(gen->f, t2_reg, s_reg, 2); /* lower-left pixel */ - } - spe_fs(gen->f, d_reg, t2_reg, t1_reg); - - free_itemps(gen); - } - - return TRUE; -} - - - - -/** - * Emit END instruction. - * We just return from the shader function at this point. - * - * Note that there may be more code after this that would be - * called by TGSI_OPCODE_CALL. - */ -static boolean -emit_END(struct codegen *gen) -{ - emit_epilogue(gen); - return TRUE; -} - - -/** - * Emit code for the given instruction. Just a big switch stmt. - */ -static boolean -emit_instruction(struct codegen *gen, - const struct tgsi_full_instruction *inst) -{ - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_ARL: - return emit_ARL(gen, inst); - case TGSI_OPCODE_MOV: - return emit_MOV(gen, inst); - case TGSI_OPCODE_ADD: - case TGSI_OPCODE_SUB: - case TGSI_OPCODE_MUL: - return emit_binop(gen, inst); - case TGSI_OPCODE_MAD: - return emit_MAD(gen, inst); - case TGSI_OPCODE_LRP: - return emit_LRP(gen, inst); - case TGSI_OPCODE_DP3: - return emit_DP3(gen, inst); - case TGSI_OPCODE_DP4: - return emit_DP4(gen, inst); - case TGSI_OPCODE_DPH: - return emit_DPH(gen, inst); - case TGSI_OPCODE_NRM: - return emit_NRM3(gen, inst); - case TGSI_OPCODE_XPD: - return emit_XPD(gen, inst); - case TGSI_OPCODE_RCP: - case TGSI_OPCODE_RSQ: - return emit_RCP_RSQ(gen, inst); - case TGSI_OPCODE_ABS: - return emit_ABS(gen, inst); - case TGSI_OPCODE_SGT: - case TGSI_OPCODE_SLT: - case TGSI_OPCODE_SGE: - case TGSI_OPCODE_SLE: - case TGSI_OPCODE_SEQ: - case TGSI_OPCODE_SNE: - return emit_inequality(gen, inst); - case TGSI_OPCODE_CMP: - return emit_CMP(gen, inst); - case TGSI_OPCODE_MIN: - case TGSI_OPCODE_MAX: - return emit_MIN_MAX(gen, inst); - case TGSI_OPCODE_TRUNC: - return emit_TRUNC(gen, inst); - case TGSI_OPCODE_FLR: - return emit_FLR(gen, inst); - case TGSI_OPCODE_FRC: - return emit_FRC(gen, inst); - case TGSI_OPCODE_END: - return emit_END(gen); - - case TGSI_OPCODE_COS: - return emit_function_call(gen, inst, "spu_cos", 1, TRUE); - case TGSI_OPCODE_SIN: - return emit_function_call(gen, inst, "spu_sin", 1, TRUE); - case TGSI_OPCODE_POW: - return emit_function_call(gen, inst, "spu_pow", 2, TRUE); - case TGSI_OPCODE_EX2: - return emit_function_call(gen, inst, "spu_exp2", 1, TRUE); - case TGSI_OPCODE_LG2: - return emit_function_call(gen, inst, "spu_log2", 1, TRUE); - case TGSI_OPCODE_TEX: - /* fall-through for now */ - case TGSI_OPCODE_TXD: - /* fall-through for now */ - case TGSI_OPCODE_TXB: - /* fall-through for now */ - case TGSI_OPCODE_TXL: - /* fall-through for now */ - case TGSI_OPCODE_TXP: - return emit_TEX(gen, inst); - case TGSI_OPCODE_KIL: - return emit_KIL(gen, inst); - - case TGSI_OPCODE_IF: - return emit_IF(gen, inst); - case TGSI_OPCODE_ELSE: - return emit_ELSE(gen, inst); - case TGSI_OPCODE_ENDIF: - return emit_ENDIF(gen, inst); - - case TGSI_OPCODE_BGNLOOP: - return emit_BGNLOOP(gen, inst); - case TGSI_OPCODE_ENDLOOP: - return emit_ENDLOOP(gen, inst); - case TGSI_OPCODE_BRK: - return emit_BRK(gen, inst); - case TGSI_OPCODE_CONT: - return emit_CONT(gen, inst); - - case TGSI_OPCODE_DDX: - return emit_DDX_DDY(gen, inst, TRUE); - case TGSI_OPCODE_DDY: - return emit_DDX_DDY(gen, inst, FALSE); - - /* XXX lots more cases to do... */ - - default: - fprintf(stderr, "Cell: unimplemented TGSI instruction %d!\n", - inst->Instruction.Opcode); - return FALSE; - } - - return TRUE; -} - - - -/** - * Emit code for a TGSI immediate value (vector of four floats). - * This involves register allocation and initialization. - * XXX the initialization should be done by a "prepare" stage, not - * per quad execution! - */ -static boolean -emit_immediate(struct codegen *gen, const struct tgsi_full_immediate *immed) -{ - int ch; - - assert(gen->num_imm < MAX_TEMPS); - - for (ch = 0; ch < 4; ch++) { - float val = immed->u[ch].Float; - - if (ch > 0 && val == immed->u[ch - 1].Float) { - /* re-use previous register */ - gen->imm_regs[gen->num_imm][ch] = gen->imm_regs[gen->num_imm][ch - 1]; - } - else { - char str[100]; - int reg = spe_allocate_available_register(gen->f); - - if (reg < 0) - return FALSE; - - sprintf(str, "init $%d = %f", reg, val); - spe_comment(gen->f, 0, str); - - /* update immediate map */ - gen->imm_regs[gen->num_imm][ch] = reg; - - /* emit initializer instruction */ - spe_load_float(gen->f, reg, val); - } - } - - gen->num_imm++; - - return TRUE; -} - - - -/** - * Emit "code" for a TGSI declaration. - * We only care about TGSI TEMPORARY register declarations at this time. - * For each TGSI TEMPORARY we allocate four SPE registers. - */ -static boolean -emit_declaration(struct cell_context *cell, - struct codegen *gen, const struct tgsi_full_declaration *decl) -{ - int i, ch; - - switch (decl->Declaration.File) { - case TGSI_FILE_TEMPORARY: - for (i = decl->Range.First; - i <= decl->Range.Last; - i++) { - assert(i < MAX_TEMPS); - for (ch = 0; ch < 4; ch++) { - gen->temp_regs[i][ch] = spe_allocate_available_register(gen->f); - if (gen->temp_regs[i][ch] < 0) - return FALSE; /* out of regs */ - } - - /* XXX if we run out of SPE registers, we need to spill - * to SPU memory. someday... - */ - - { - char buf[100]; - sprintf(buf, "TGSI temp[%d] maps to SPU regs [$%d $%d $%d $%d]", i, - gen->temp_regs[i][0], gen->temp_regs[i][1], - gen->temp_regs[i][2], gen->temp_regs[i][3]); - spe_comment(gen->f, 0, buf); - } - } - break; - default: - ; /* ignore */ - } - - return TRUE; -} - - - -/** - * Translate TGSI shader code to SPE instructions. This is done when - * the state tracker gives us a new shader (via pipe->create_fs_state()). - * - * \param cell the rendering context (in) - * \param tokens the TGSI shader (in) - * \param f the generated function (out) - */ -boolean -cell_gen_fragment_program(struct cell_context *cell, - const struct tgsi_token *tokens, - struct spe_function *f) -{ - struct tgsi_parse_context parse; - struct codegen gen; - uint ic = 0; - - memset(&gen, 0, sizeof(gen)); - gen.cell = cell; - gen.f = f; - - /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */ - gen.inputs_reg = 3; /* pointer to inputs array */ - gen.outputs_reg = 4; /* pointer to outputs array */ - gen.constants_reg = 5; /* pointer to constants array */ - - spe_init_func(f, SPU_MAX_FRAGMENT_PROGRAM_INSTS * SPE_INST_SIZE); - spe_allocate_register(f, gen.inputs_reg); - spe_allocate_register(f, gen.outputs_reg); - spe_allocate_register(f, gen.constants_reg); - - if (cell->debug_flags & CELL_DEBUG_ASM) { - spe_print_code(f, TRUE); - spe_indent(f, 2*8); - printf("Begin %s\n", __FUNCTION__); - tgsi_dump(tokens, 0); - } - - tgsi_parse_init(&parse, tokens); - - emit_prologue(&gen); - - while (!tgsi_parse_end_of_tokens(&parse) && !gen.error) { - tgsi_parse_token(&parse); - - switch (parse.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_IMMEDIATE: - if (f->print) { - _debug_printf(" # "); - tgsi_dump_immediate(&parse.FullToken.FullImmediate); - } - if (!emit_immediate(&gen, &parse.FullToken.FullImmediate)) - gen.error = TRUE; - break; - - case TGSI_TOKEN_TYPE_DECLARATION: - if (f->print) { - _debug_printf(" # "); - tgsi_dump_declaration(&parse.FullToken.FullDeclaration); - } - if (!emit_declaration(cell, &gen, &parse.FullToken.FullDeclaration)) - gen.error = TRUE; - break; - - case TGSI_TOKEN_TYPE_INSTRUCTION: - if (f->print) { - _debug_printf(" # "); - ic++; - tgsi_dump_instruction(&parse.FullToken.FullInstruction, ic); - } - if (!emit_instruction(&gen, &parse.FullToken.FullInstruction)) - gen.error = TRUE; - break; - - default: - assert(0); - } - } - - if (gen.error) { - /* terminate the SPE code */ - return emit_END(&gen); - } - - if (cell->debug_flags & CELL_DEBUG_ASM) { - printf("cell_gen_fragment_program nr instructions: %d\n", f->num_inst); - printf("End %s\n", __FUNCTION__); - } - - tgsi_parse_free( &parse ); - - return !gen.error; -} diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.h b/src/gallium/drivers/cell/ppu/cell_gen_fp.h deleted file mode 100644 index 99faea7046..0000000000 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.h +++ /dev/null @@ -1,42 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - - -#ifndef CELL_GEN_FP_H -#define CELL_GEN_FP_H - - - -extern boolean -cell_gen_fragment_program(struct cell_context *cell, - const struct tgsi_token *tokens, - struct spe_function *f); - - -#endif /* CELL_GEN_FP_H */ - diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c deleted file mode 100644 index 76a8517800..0000000000 --- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c +++ /dev/null @@ -1,2189 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * Copyright 2009 VMware, Inc. All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * Generate SPU per-fragment code (actually per-quad code). - * \author Brian Paul - * \author Bob Ellison - */ - - -#include "pipe/p_defines.h" -#include "pipe/p_state.h" -#include "rtasm/rtasm_ppc_spe.h" -#include "cell_context.h" -#include "cell_gen_fragment.h" - - - -/** Do extra optimizations? */ -#define OPTIMIZATIONS 1 - - -/** - * Generate SPE code to perform Z/depth testing. - * - * \param dsa Gallium depth/stencil/alpha state to gen code for - * \param f SPE function to append instruction onto. - * \param mask_reg register containing quad/pixel "alive" mask (in/out) - * \param ifragZ_reg register containing integer fragment Z values (in) - * \param ifbZ_reg register containing integer frame buffer Z values (in/out) - * \param zmask_reg register containing result of Z test/comparison (out) - * - * Returns TRUE if the Z-buffer needs to be updated. - */ -static boolean -gen_depth_test(struct spe_function *f, - const struct pipe_depth_stencil_alpha_state *dsa, - int mask_reg, int ifragZ_reg, int ifbZ_reg, int zmask_reg) -{ - /* NOTE: we use clgt below, not cgt, because we want to compare _unsigned_ - * quantities. This only makes a difference for 32-bit Z values though. - */ - ASSERT(dsa->depth.enabled); - - switch (dsa->depth.func) { - case PIPE_FUNC_EQUAL: - /* zmask = (ifragZ == ref) */ - spe_ceq(f, zmask_reg, ifragZ_reg, ifbZ_reg); - /* mask = (mask & zmask) */ - spe_and(f, mask_reg, mask_reg, zmask_reg); - break; - - case PIPE_FUNC_NOTEQUAL: - /* zmask = (ifragZ == ref) */ - spe_ceq(f, zmask_reg, ifragZ_reg, ifbZ_reg); - /* mask = (mask & ~zmask) */ - spe_andc(f, mask_reg, mask_reg, zmask_reg); - break; - - case PIPE_FUNC_GREATER: - /* zmask = (ifragZ > ref) */ - spe_clgt(f, zmask_reg, ifragZ_reg, ifbZ_reg); - /* mask = (mask & zmask) */ - spe_and(f, mask_reg, mask_reg, zmask_reg); - break; - - case PIPE_FUNC_LESS: - /* zmask = (ref > ifragZ) */ - spe_clgt(f, zmask_reg, ifbZ_reg, ifragZ_reg); - /* mask = (mask & zmask) */ - spe_and(f, mask_reg, mask_reg, zmask_reg); - break; - - case PIPE_FUNC_LEQUAL: - /* zmask = (ifragZ > ref) */ - spe_clgt(f, zmask_reg, ifragZ_reg, ifbZ_reg); - /* mask = (mask & ~zmask) */ - spe_andc(f, mask_reg, mask_reg, zmask_reg); - break; - - case PIPE_FUNC_GEQUAL: - /* zmask = (ref > ifragZ) */ - spe_clgt(f, zmask_reg, ifbZ_reg, ifragZ_reg); - /* mask = (mask & ~zmask) */ - spe_andc(f, mask_reg, mask_reg, zmask_reg); - break; - - case PIPE_FUNC_NEVER: - spe_il(f, mask_reg, 0); /* mask = {0,0,0,0} */ - spe_move(f, zmask_reg, mask_reg); /* zmask = mask */ - break; - - case PIPE_FUNC_ALWAYS: - /* mask unchanged */ - spe_il(f, zmask_reg, ~0); /* zmask = {~0,~0,~0,~0} */ - break; - - default: - ASSERT(0); - break; - } - - if (dsa->depth.writemask) { - /* - * If (ztest passed) { - * framebufferZ = fragmentZ; - * } - * OR, - * framebufferZ = (ztest_passed ? fragmentZ : framebufferZ; - */ - spe_selb(f, ifbZ_reg, ifbZ_reg, ifragZ_reg, mask_reg); - return TRUE; - } - - return FALSE; -} - - -/** - * Generate SPE code to perform alpha testing. - * - * \param dsa Gallium depth/stencil/alpha state to gen code for - * \param f SPE function to append instruction onto. - * \param mask_reg register containing quad/pixel "alive" mask (in/out) - * \param fragA_reg register containing four fragment alpha values (in) - */ -static void -gen_alpha_test(const struct pipe_depth_stencil_alpha_state *dsa, - struct spe_function *f, int mask_reg, int fragA_reg) -{ - int ref_reg = spe_allocate_available_register(f); - int amask_reg = spe_allocate_available_register(f); - - ASSERT(dsa->alpha.enabled); - - if ((dsa->alpha.func != PIPE_FUNC_NEVER) && - (dsa->alpha.func != PIPE_FUNC_ALWAYS)) { - /* load/splat the alpha reference float value */ - spe_load_float(f, ref_reg, dsa->alpha.ref_value); - } - - /* emit code to do the alpha comparison, updating 'mask' */ - switch (dsa->alpha.func) { - case PIPE_FUNC_EQUAL: - /* amask = (fragA == ref) */ - spe_fceq(f, amask_reg, fragA_reg, ref_reg); - /* mask = (mask & amask) */ - spe_and(f, mask_reg, mask_reg, amask_reg); - break; - - case PIPE_FUNC_NOTEQUAL: - /* amask = (fragA == ref) */ - spe_fceq(f, amask_reg, fragA_reg, ref_reg); - /* mask = (mask & ~amask) */ - spe_andc(f, mask_reg, mask_reg, amask_reg); - break; - - case PIPE_FUNC_GREATER: - /* amask = (fragA > ref) */ - spe_fcgt(f, amask_reg, fragA_reg, ref_reg); - /* mask = (mask & amask) */ - spe_and(f, mask_reg, mask_reg, amask_reg); - break; - - case PIPE_FUNC_LESS: - /* amask = (ref > fragA) */ - spe_fcgt(f, amask_reg, ref_reg, fragA_reg); - /* mask = (mask & amask) */ - spe_and(f, mask_reg, mask_reg, amask_reg); - break; - - case PIPE_FUNC_LEQUAL: - /* amask = (fragA > ref) */ - spe_fcgt(f, amask_reg, fragA_reg, ref_reg); - /* mask = (mask & ~amask) */ - spe_andc(f, mask_reg, mask_reg, amask_reg); - break; - - case PIPE_FUNC_GEQUAL: - /* amask = (ref > fragA) */ - spe_fcgt(f, amask_reg, ref_reg, fragA_reg); - /* mask = (mask & ~amask) */ - spe_andc(f, mask_reg, mask_reg, amask_reg); - break; - - case PIPE_FUNC_NEVER: - spe_il(f, mask_reg, 0); /* mask = [0,0,0,0] */ - break; - - case PIPE_FUNC_ALWAYS: - /* no-op, mask unchanged */ - break; - - default: - ASSERT(0); - break; - } - -#if OPTIMIZATIONS - /* if mask == {0,0,0,0} we're all done, return */ - { - /* re-use amask reg here */ - int tmp_reg = amask_reg; - /* tmp[0] = (mask[0] | mask[1] | mask[2] | mask[3]) */ - spe_orx(f, tmp_reg, mask_reg); - /* if tmp[0] == 0 then return from function call */ - spe_biz(f, tmp_reg, SPE_REG_RA, 0, 0); - } -#endif - - spe_release_register(f, ref_reg); - spe_release_register(f, amask_reg); -} - - -/** - * This pair of functions is used inline to allocate and deallocate - * optional constant registers. Once a constant is discovered to be - * needed, we will likely need it again, so we don't want to deallocate - * it and have to allocate and load it again unnecessarily. - */ -static INLINE void -setup_optional_register(struct spe_function *f, - int *r) -{ - if (*r < 0) - *r = spe_allocate_available_register(f); -} - -static INLINE void -release_optional_register(struct spe_function *f, - int r) -{ - if (r >= 0) - spe_release_register(f, r); -} - -static INLINE void -setup_const_register(struct spe_function *f, - int *r, - float value) -{ - if (*r >= 0) - return; - setup_optional_register(f, r); - spe_load_float(f, *r, value); -} - -static INLINE void -release_const_register(struct spe_function *f, - int r) -{ - release_optional_register(f, r); -} - - - -/** - * Unpack/convert framebuffer colors from four 32-bit packed colors - * (fbRGBA) to four float RGBA vectors (fbR, fbG, fbB, fbA). - * Each 8-bit color component is expanded into a float in [0.0, 1.0]. - */ -static void -unpack_colors(struct spe_function *f, - enum pipe_format color_format, - int fbRGBA_reg, - int fbR_reg, int fbG_reg, int fbB_reg, int fbA_reg) -{ - int mask0_reg = spe_allocate_available_register(f); - int mask1_reg = spe_allocate_available_register(f); - int mask2_reg = spe_allocate_available_register(f); - int mask3_reg = spe_allocate_available_register(f); - - spe_load_int(f, mask0_reg, 0xff); - spe_load_int(f, mask1_reg, 0xff00); - spe_load_int(f, mask2_reg, 0xff0000); - spe_load_int(f, mask3_reg, 0xff000000); - - spe_comment(f, 0, "Unpack framebuffer colors, convert to floats"); - - switch (color_format) { - case PIPE_FORMAT_B8G8R8A8_UNORM: - /* fbB = fbRGBA & mask */ - spe_and(f, fbB_reg, fbRGBA_reg, mask0_reg); - - /* fbG = fbRGBA & mask */ - spe_and(f, fbG_reg, fbRGBA_reg, mask1_reg); - - /* fbR = fbRGBA & mask */ - spe_and(f, fbR_reg, fbRGBA_reg, mask2_reg); - - /* fbA = fbRGBA & mask */ - spe_and(f, fbA_reg, fbRGBA_reg, mask3_reg); - - /* fbG = fbG >> 8 */ - spe_roti(f, fbG_reg, fbG_reg, -8); - - /* fbR = fbR >> 16 */ - spe_roti(f, fbR_reg, fbR_reg, -16); - - /* fbA = fbA >> 24 */ - spe_roti(f, fbA_reg, fbA_reg, -24); - break; - - case PIPE_FORMAT_A8R8G8B8_UNORM: - /* fbA = fbRGBA & mask */ - spe_and(f, fbA_reg, fbRGBA_reg, mask0_reg); - - /* fbR = fbRGBA & mask */ - spe_and(f, fbR_reg, fbRGBA_reg, mask1_reg); - - /* fbG = fbRGBA & mask */ - spe_and(f, fbG_reg, fbRGBA_reg, mask2_reg); - - /* fbB = fbRGBA & mask */ - spe_and(f, fbB_reg, fbRGBA_reg, mask3_reg); - - /* fbR = fbR >> 8 */ - spe_roti(f, fbR_reg, fbR_reg, -8); - - /* fbG = fbG >> 16 */ - spe_roti(f, fbG_reg, fbG_reg, -16); - - /* fbB = fbB >> 24 */ - spe_roti(f, fbB_reg, fbB_reg, -24); - break; - - default: - ASSERT(0); - } - - /* convert int[4] in [0,255] to float[4] in [0.0, 1.0] */ - spe_cuflt(f, fbR_reg, fbR_reg, 8); - spe_cuflt(f, fbG_reg, fbG_reg, 8); - spe_cuflt(f, fbB_reg, fbB_reg, 8); - spe_cuflt(f, fbA_reg, fbA_reg, 8); - - spe_release_register(f, mask0_reg); - spe_release_register(f, mask1_reg); - spe_release_register(f, mask2_reg); - spe_release_register(f, mask3_reg); -} - - -/** - * Generate SPE code to implement the given blend mode for a quad of pixels. - * \param f SPE function to append instruction onto. - * \param fragR_reg register with fragment red values (float) (in/out) - * \param fragG_reg register with fragment green values (float) (in/out) - * \param fragB_reg register with fragment blue values (float) (in/out) - * \param fragA_reg register with fragment alpha values (float) (in/out) - * \param fbRGBA_reg register with packed framebuffer colors (integer) (in) - */ -static void -gen_blend(const struct pipe_blend_state *blend, - const struct pipe_blend_color *blend_color, - struct spe_function *f, - enum pipe_format color_format, - int fragR_reg, int fragG_reg, int fragB_reg, int fragA_reg, - int fbRGBA_reg) -{ - int term1R_reg = spe_allocate_available_register(f); - int term1G_reg = spe_allocate_available_register(f); - int term1B_reg = spe_allocate_available_register(f); - int term1A_reg = spe_allocate_available_register(f); - - int term2R_reg = spe_allocate_available_register(f); - int term2G_reg = spe_allocate_available_register(f); - int term2B_reg = spe_allocate_available_register(f); - int term2A_reg = spe_allocate_available_register(f); - - int fbR_reg = spe_allocate_available_register(f); - int fbG_reg = spe_allocate_available_register(f); - int fbB_reg = spe_allocate_available_register(f); - int fbA_reg = spe_allocate_available_register(f); - - int tmp_reg = spe_allocate_available_register(f); - - /* Optional constant registers we might or might not end up using; - * if we do use them, make sure we only allocate them once by - * keeping a flag on each one. - */ - int one_reg = -1; - int constR_reg = -1, constG_reg = -1, constB_reg = -1, constA_reg = -1; - - ASSERT(blend->rt[0].blend_enable); - - /* packed RGBA -> float colors */ - unpack_colors(f, color_format, fbRGBA_reg, - fbR_reg, fbG_reg, fbB_reg, fbA_reg); - - /* - * Compute Src RGB terms. We're actually looking for the value - * of (the appropriate RGB factors) * (the incoming source RGB color), - * because in some cases (like PIPE_BLENDFACTOR_ONE and - * PIPE_BLENDFACTOR_ZERO) we can avoid doing unnecessary math. - */ - switch (blend->rt[0].rgb_src_factor) { - case PIPE_BLENDFACTOR_ONE: - /* factors = (1,1,1), so term = (R,G,B) */ - spe_move(f, term1R_reg, fragR_reg); - spe_move(f, term1G_reg, fragG_reg); - spe_move(f, term1B_reg, fragB_reg); - break; - case PIPE_BLENDFACTOR_ZERO: - /* factors = (0,0,0), so term = (0,0,0) */ - spe_load_float(f, term1R_reg, 0.0f); - spe_load_float(f, term1G_reg, 0.0f); - spe_load_float(f, term1B_reg, 0.0f); - break; - case PIPE_BLENDFACTOR_SRC_COLOR: - /* factors = (R,G,B), so term = (R*R, G*G, B*B) */ - spe_fm(f, term1R_reg, fragR_reg, fragR_reg); - spe_fm(f, term1G_reg, fragG_reg, fragG_reg); - spe_fm(f, term1B_reg, fragB_reg, fragB_reg); - break; - case PIPE_BLENDFACTOR_SRC_ALPHA: - /* factors = (A,A,A), so term = (R*A, G*A, B*A) */ - spe_fm(f, term1R_reg, fragR_reg, fragA_reg); - spe_fm(f, term1G_reg, fragG_reg, fragA_reg); - spe_fm(f, term1B_reg, fragB_reg, fragA_reg); - break; - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - /* factors = (1-R,1-G,1-B), so term = (R*(1-R), G*(1-G), B*(1-B)) - * or in other words term = (R-R*R, G-G*G, B-B*B) - * fnms(a,b,c,d) computes a = d - b*c - */ - spe_fnms(f, term1R_reg, fragR_reg, fragR_reg, fragR_reg); - spe_fnms(f, term1G_reg, fragG_reg, fragG_reg, fragG_reg); - spe_fnms(f, term1B_reg, fragB_reg, fragB_reg, fragB_reg); - break; - case PIPE_BLENDFACTOR_DST_COLOR: - /* factors = (Rfb,Gfb,Bfb), so term = (R*Rfb, G*Gfb, B*Bfb) */ - spe_fm(f, term1R_reg, fragR_reg, fbR_reg); - spe_fm(f, term1G_reg, fragG_reg, fbG_reg); - spe_fm(f, term1B_reg, fragB_reg, fbB_reg); - break; - case PIPE_BLENDFACTOR_INV_DST_COLOR: - /* factors = (1-Rfb,1-Gfb,1-Bfb), so term = (R*(1-Rfb),G*(1-Gfb),B*(1-Bfb)) - * or term = (R-R*Rfb, G-G*Gfb, B-B*Bfb) - * fnms(a,b,c,d) computes a = d - b*c - */ - spe_fnms(f, term1R_reg, fragR_reg, fbR_reg, fragR_reg); - spe_fnms(f, term1G_reg, fragG_reg, fbG_reg, fragG_reg); - spe_fnms(f, term1B_reg, fragB_reg, fbB_reg, fragB_reg); - break; - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - /* factors = (1-A,1-A,1-A), so term = (R*(1-A),G*(1-A),B*(1-A)) - * or term = (R-R*A,G-G*A,B-B*A) - * fnms(a,b,c,d) computes a = d - b*c - */ - spe_fnms(f, term1R_reg, fragR_reg, fragA_reg, fragR_reg); - spe_fnms(f, term1G_reg, fragG_reg, fragA_reg, fragG_reg); - spe_fnms(f, term1B_reg, fragB_reg, fragA_reg, fragB_reg); - break; - case PIPE_BLENDFACTOR_DST_ALPHA: - /* factors = (Afb, Afb, Afb), so term = (R*Afb, G*Afb, B*Afb) */ - spe_fm(f, term1R_reg, fragR_reg, fbA_reg); - spe_fm(f, term1G_reg, fragG_reg, fbA_reg); - spe_fm(f, term1B_reg, fragB_reg, fbA_reg); - break; - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - /* factors = (1-Afb, 1-Afb, 1-Afb), so term = (R*(1-Afb),G*(1-Afb),B*(1-Afb)) - * or term = (R-R*Afb,G-G*Afb,b-B*Afb) - * fnms(a,b,c,d) computes a = d - b*c - */ - spe_fnms(f, term1R_reg, fragR_reg, fbA_reg, fragR_reg); - spe_fnms(f, term1G_reg, fragG_reg, fbA_reg, fragG_reg); - spe_fnms(f, term1B_reg, fragB_reg, fbA_reg, fragB_reg); - break; - case PIPE_BLENDFACTOR_CONST_COLOR: - /* We need the optional constant color registers */ - setup_const_register(f, &constR_reg, blend_color->color[0]); - setup_const_register(f, &constG_reg, blend_color->color[1]); - setup_const_register(f, &constB_reg, blend_color->color[2]); - /* now, factor = (Rc,Gc,Bc), so term = (R*Rc,G*Gc,B*Bc) */ - spe_fm(f, term1R_reg, fragR_reg, constR_reg); - spe_fm(f, term1G_reg, fragG_reg, constG_reg); - spe_fm(f, term1B_reg, fragB_reg, constB_reg); - break; - case PIPE_BLENDFACTOR_CONST_ALPHA: - /* we'll need the optional constant alpha register */ - setup_const_register(f, &constA_reg, blend_color->color[3]); - /* factor = (Ac,Ac,Ac), so term = (R*Ac,G*Ac,B*Ac) */ - spe_fm(f, term1R_reg, fragR_reg, constA_reg); - spe_fm(f, term1G_reg, fragG_reg, constA_reg); - spe_fm(f, term1B_reg, fragB_reg, constA_reg); - break; - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - /* We need the optional constant color registers */ - setup_const_register(f, &constR_reg, blend_color->color[0]); - setup_const_register(f, &constG_reg, blend_color->color[1]); - setup_const_register(f, &constB_reg, blend_color->color[2]); - /* factor = (1-Rc,1-Gc,1-Bc), so term = (R*(1-Rc),G*(1-Gc),B*(1-Bc)) - * or term = (R-R*Rc, G-G*Gc, B-B*Bc) - * fnms(a,b,c,d) computes a = d - b*c - */ - spe_fnms(f, term1R_reg, fragR_reg, constR_reg, fragR_reg); - spe_fnms(f, term1G_reg, fragG_reg, constG_reg, fragG_reg); - spe_fnms(f, term1B_reg, fragB_reg, constB_reg, fragB_reg); - break; - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - /* We need the optional constant color registers */ - setup_const_register(f, &constR_reg, blend_color->color[0]); - setup_const_register(f, &constG_reg, blend_color->color[1]); - setup_const_register(f, &constB_reg, blend_color->color[2]); - /* factor = (1-Ac,1-Ac,1-Ac), so term = (R*(1-Ac),G*(1-Ac),B*(1-Ac)) - * or term = (R-R*Ac,G-G*Ac,B-B*Ac) - * fnms(a,b,c,d) computes a = d - b*c - */ - spe_fnms(f, term1R_reg, fragR_reg, constA_reg, fragR_reg); - spe_fnms(f, term1G_reg, fragG_reg, constA_reg, fragG_reg); - spe_fnms(f, term1B_reg, fragB_reg, constA_reg, fragB_reg); - break; - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - /* We'll need the optional {1,1,1,1} register */ - setup_const_register(f, &one_reg, 1.0f); - /* factor = (min(A,1-Afb),min(A,1-Afb),min(A,1-Afb)), so - * term = (R*min(A,1-Afb), G*min(A,1-Afb), B*min(A,1-Afb)) - * We could expand the term (as a*min(b,c) == min(a*b,a*c) - * as long as a is positive), but then we'd have to do three - * spe_float_min() functions instead of one, so this is simpler. - */ - /* tmp = 1 - Afb */ - spe_fs(f, tmp_reg, one_reg, fbA_reg); - /* tmp = min(A,tmp) */ - spe_float_min(f, tmp_reg, fragA_reg, tmp_reg); - /* term = R*tmp */ - spe_fm(f, term1R_reg, fragR_reg, tmp_reg); - spe_fm(f, term1G_reg, fragG_reg, tmp_reg); - spe_fm(f, term1B_reg, fragB_reg, tmp_reg); - break; - - /* These are special D3D cases involving a second color output - * from the fragment shader. I'm not sure we can support them - * yet... XXX - */ - case PIPE_BLENDFACTOR_SRC1_COLOR: - case PIPE_BLENDFACTOR_SRC1_ALPHA: - case PIPE_BLENDFACTOR_INV_SRC1_COLOR: - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: - - default: - ASSERT(0); - } - - /* - * Compute Src Alpha term. Like the above, we're looking for - * the full term A*factor, not just the factor itself, because - * in many cases we can avoid doing unnecessary multiplies. - */ - switch (blend->rt[0].alpha_src_factor) { - case PIPE_BLENDFACTOR_ZERO: - /* factor = 0, so term = 0 */ - spe_load_float(f, term1A_reg, 0.0f); - break; - - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: /* fall through */ - case PIPE_BLENDFACTOR_ONE: - /* factor = 1, so term = A */ - spe_move(f, term1A_reg, fragA_reg); - break; - - case PIPE_BLENDFACTOR_SRC_COLOR: - /* factor = A, so term = A*A */ - spe_fm(f, term1A_reg, fragA_reg, fragA_reg); - break; - case PIPE_BLENDFACTOR_SRC_ALPHA: - spe_fm(f, term1A_reg, fragA_reg, fragA_reg); - break; - - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: /* fall through */ - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - /* factor = 1-A, so term = A*(1-A) = A-A*A */ - /* fnms(a,b,c,d) computes a = d - b*c */ - spe_fnms(f, term1A_reg, fragA_reg, fragA_reg, fragA_reg); - break; - - case PIPE_BLENDFACTOR_DST_ALPHA: /* fall through */ - case PIPE_BLENDFACTOR_DST_COLOR: - /* factor = Afb, so term = A*Afb */ - spe_fm(f, term1A_reg, fragA_reg, fbA_reg); - break; - - case PIPE_BLENDFACTOR_INV_DST_ALPHA: /* fall through */ - case PIPE_BLENDFACTOR_INV_DST_COLOR: - /* factor = 1-Afb, so term = A*(1-Afb) = A - A*Afb */ - /* fnms(a,b,c,d) computes a = d - b*c */ - spe_fnms(f, term1A_reg, fragA_reg, fbA_reg, fragA_reg); - break; - - case PIPE_BLENDFACTOR_CONST_ALPHA: /* fall through */ - case PIPE_BLENDFACTOR_CONST_COLOR: - /* We need the optional constA_reg register */ - setup_const_register(f, &constA_reg, blend_color->color[3]); - /* factor = Ac, so term = A*Ac */ - spe_fm(f, term1A_reg, fragA_reg, constA_reg); - break; - - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: /* fall through */ - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - /* We need the optional constA_reg register */ - setup_const_register(f, &constA_reg, blend_color->color[3]); - /* factor = 1-Ac, so term = A*(1-Ac) = A-A*Ac */ - /* fnms(a,b,c,d) computes a = d - b*c */ - spe_fnms(f, term1A_reg, fragA_reg, constA_reg, fragA_reg); - break; - - /* These are special D3D cases involving a second color output - * from the fragment shader. I'm not sure we can support them - * yet... XXX - */ - case PIPE_BLENDFACTOR_SRC1_COLOR: - case PIPE_BLENDFACTOR_SRC1_ALPHA: - case PIPE_BLENDFACTOR_INV_SRC1_COLOR: - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: - default: - ASSERT(0); - } - - /* - * Compute Dest RGB term. Like the above, we're looking for - * the full term (Rfb,Gfb,Bfb)*(factor), not just the factor itself, because - * in many cases we can avoid doing unnecessary multiplies. - */ - switch (blend->rt[0].rgb_dst_factor) { - case PIPE_BLENDFACTOR_ONE: - /* factors = (1,1,1), so term = (Rfb,Gfb,Bfb) */ - spe_move(f, term2R_reg, fbR_reg); - spe_move(f, term2G_reg, fbG_reg); - spe_move(f, term2B_reg, fbB_reg); - break; - case PIPE_BLENDFACTOR_ZERO: - /* factor s= (0,0,0), so term = (0,0,0) */ - spe_load_float(f, term2R_reg, 0.0f); - spe_load_float(f, term2G_reg, 0.0f); - spe_load_float(f, term2B_reg, 0.0f); - break; - case PIPE_BLENDFACTOR_SRC_COLOR: - /* factors = (R,G,B), so term = (R*Rfb, G*Gfb, B*Bfb) */ - spe_fm(f, term2R_reg, fbR_reg, fragR_reg); - spe_fm(f, term2G_reg, fbG_reg, fragG_reg); - spe_fm(f, term2B_reg, fbB_reg, fragB_reg); - break; - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - /* factors = (1-R,1-G,1-B), so term = (Rfb*(1-R), Gfb*(1-G), Bfb*(1-B)) - * or in other words term = (Rfb-Rfb*R, Gfb-Gfb*G, Bfb-Bfb*B) - * fnms(a,b,c,d) computes a = d - b*c - */ - spe_fnms(f, term2R_reg, fragR_reg, fbR_reg, fbR_reg); - spe_fnms(f, term2G_reg, fragG_reg, fbG_reg, fbG_reg); - spe_fnms(f, term2B_reg, fragB_reg, fbB_reg, fbB_reg); - break; - case PIPE_BLENDFACTOR_SRC_ALPHA: - /* factors = (A,A,A), so term = (Rfb*A, Gfb*A, Bfb*A) */ - spe_fm(f, term2R_reg, fbR_reg, fragA_reg); - spe_fm(f, term2G_reg, fbG_reg, fragA_reg); - spe_fm(f, term2B_reg, fbB_reg, fragA_reg); - break; - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - /* factors = (1-A,1-A,1-A) so term = (Rfb-Rfb*A,Gfb-Gfb*A,Bfb-Bfb*A) */ - /* fnms(a,b,c,d) computes a = d - b*c */ - spe_fnms(f, term2R_reg, fbR_reg, fragA_reg, fbR_reg); - spe_fnms(f, term2G_reg, fbG_reg, fragA_reg, fbG_reg); - spe_fnms(f, term2B_reg, fbB_reg, fragA_reg, fbB_reg); - break; - case PIPE_BLENDFACTOR_DST_COLOR: - /* factors = (Rfb,Gfb,Bfb), so term = (Rfb*Rfb, Gfb*Gfb, Bfb*Bfb) */ - spe_fm(f, term2R_reg, fbR_reg, fbR_reg); - spe_fm(f, term2G_reg, fbG_reg, fbG_reg); - spe_fm(f, term2B_reg, fbB_reg, fbB_reg); - break; - case PIPE_BLENDFACTOR_INV_DST_COLOR: - /* factors = (1-Rfb,1-Gfb,1-Bfb), so term = (Rfb*(1-Rfb),Gfb*(1-Gfb),Bfb*(1-Bfb)) - * or term = (Rfb-Rfb*Rfb, Gfb-Gfb*Gfb, Bfb-Bfb*Bfb) - * fnms(a,b,c,d) computes a = d - b*c - */ - spe_fnms(f, term2R_reg, fbR_reg, fbR_reg, fbR_reg); - spe_fnms(f, term2G_reg, fbG_reg, fbG_reg, fbG_reg); - spe_fnms(f, term2B_reg, fbB_reg, fbB_reg, fbB_reg); - break; - - case PIPE_BLENDFACTOR_DST_ALPHA: - /* factors = (Afb, Afb, Afb), so term = (Rfb*Afb, Gfb*Afb, Bfb*Afb) */ - spe_fm(f, term2R_reg, fbR_reg, fbA_reg); - spe_fm(f, term2G_reg, fbG_reg, fbA_reg); - spe_fm(f, term2B_reg, fbB_reg, fbA_reg); - break; - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - /* factors = (1-Afb, 1-Afb, 1-Afb), so term = (Rfb*(1-Afb),Gfb*(1-Afb),Bfb*(1-Afb)) - * or term = (Rfb-Rfb*Afb,Gfb-Gfb*Afb,Bfb-Bfb*Afb) - * fnms(a,b,c,d) computes a = d - b*c - */ - spe_fnms(f, term2R_reg, fbR_reg, fbA_reg, fbR_reg); - spe_fnms(f, term2G_reg, fbG_reg, fbA_reg, fbG_reg); - spe_fnms(f, term2B_reg, fbB_reg, fbA_reg, fbB_reg); - break; - case PIPE_BLENDFACTOR_CONST_COLOR: - /* We need the optional constant color registers */ - setup_const_register(f, &constR_reg, blend_color->color[0]); - setup_const_register(f, &constG_reg, blend_color->color[1]); - setup_const_register(f, &constB_reg, blend_color->color[2]); - /* now, factor = (Rc,Gc,Bc), so term = (Rfb*Rc,Gfb*Gc,Bfb*Bc) */ - spe_fm(f, term2R_reg, fbR_reg, constR_reg); - spe_fm(f, term2G_reg, fbG_reg, constG_reg); - spe_fm(f, term2B_reg, fbB_reg, constB_reg); - break; - case PIPE_BLENDFACTOR_CONST_ALPHA: - /* we'll need the optional constant alpha register */ - setup_const_register(f, &constA_reg, blend_color->color[3]); - /* factor = (Ac,Ac,Ac), so term = (Rfb*Ac,Gfb*Ac,Bfb*Ac) */ - spe_fm(f, term2R_reg, fbR_reg, constA_reg); - spe_fm(f, term2G_reg, fbG_reg, constA_reg); - spe_fm(f, term2B_reg, fbB_reg, constA_reg); - break; - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - /* We need the optional constant color registers */ - setup_const_register(f, &constR_reg, blend_color->color[0]); - setup_const_register(f, &constG_reg, blend_color->color[1]); - setup_const_register(f, &constB_reg, blend_color->color[2]); - /* factor = (1-Rc,1-Gc,1-Bc), so term = (Rfb*(1-Rc),Gfb*(1-Gc),Bfb*(1-Bc)) - * or term = (Rfb-Rfb*Rc, Gfb-Gfb*Gc, Bfb-Bfb*Bc) - * fnms(a,b,c,d) computes a = d - b*c - */ - spe_fnms(f, term2R_reg, fbR_reg, constR_reg, fbR_reg); - spe_fnms(f, term2G_reg, fbG_reg, constG_reg, fbG_reg); - spe_fnms(f, term2B_reg, fbB_reg, constB_reg, fbB_reg); - break; - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - /* We need the optional constant color registers */ - setup_const_register(f, &constR_reg, blend_color->color[0]); - setup_const_register(f, &constG_reg, blend_color->color[1]); - setup_const_register(f, &constB_reg, blend_color->color[2]); - /* factor = (1-Ac,1-Ac,1-Ac), so term = (Rfb*(1-Ac),Gfb*(1-Ac),Bfb*(1-Ac)) - * or term = (Rfb-Rfb*Ac,Gfb-Gfb*Ac,Bfb-Bfb*Ac) - * fnms(a,b,c,d) computes a = d - b*c - */ - spe_fnms(f, term2R_reg, fbR_reg, constA_reg, fbR_reg); - spe_fnms(f, term2G_reg, fbG_reg, constA_reg, fbG_reg); - spe_fnms(f, term2B_reg, fbB_reg, constA_reg, fbB_reg); - break; - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: /* not supported for dest RGB */ - ASSERT(0); - break; - - /* These are special D3D cases involving a second color output - * from the fragment shader. I'm not sure we can support them - * yet... XXX - */ - case PIPE_BLENDFACTOR_SRC1_COLOR: - case PIPE_BLENDFACTOR_SRC1_ALPHA: - case PIPE_BLENDFACTOR_INV_SRC1_COLOR: - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: - - default: - ASSERT(0); - } - - /* - * Compute Dest Alpha term. Like the above, we're looking for - * the full term Afb*factor, not just the factor itself, because - * in many cases we can avoid doing unnecessary multiplies. - */ - switch (blend->rt[0].alpha_dst_factor) { - case PIPE_BLENDFACTOR_ONE: - /* factor = 1, so term = Afb */ - spe_move(f, term2A_reg, fbA_reg); - break; - case PIPE_BLENDFACTOR_ZERO: - /* factor = 0, so term = 0 */ - spe_load_float(f, term2A_reg, 0.0f); - break; - - case PIPE_BLENDFACTOR_SRC_ALPHA: /* fall through */ - case PIPE_BLENDFACTOR_SRC_COLOR: - /* factor = A, so term = Afb*A */ - spe_fm(f, term2A_reg, fbA_reg, fragA_reg); - break; - - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: /* fall through */ - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - /* factor = 1-A, so term = Afb*(1-A) = Afb-Afb*A */ - /* fnms(a,b,c,d) computes a = d - b*c */ - spe_fnms(f, term2A_reg, fbA_reg, fragA_reg, fbA_reg); - break; - - case PIPE_BLENDFACTOR_DST_ALPHA: /* fall through */ - case PIPE_BLENDFACTOR_DST_COLOR: - /* factor = Afb, so term = Afb*Afb */ - spe_fm(f, term2A_reg, fbA_reg, fbA_reg); - break; - - case PIPE_BLENDFACTOR_INV_DST_ALPHA: /* fall through */ - case PIPE_BLENDFACTOR_INV_DST_COLOR: - /* factor = 1-Afb, so term = Afb*(1-Afb) = Afb - Afb*Afb */ - /* fnms(a,b,c,d) computes a = d - b*c */ - spe_fnms(f, term2A_reg, fbA_reg, fbA_reg, fbA_reg); - break; - - case PIPE_BLENDFACTOR_CONST_ALPHA: /* fall through */ - case PIPE_BLENDFACTOR_CONST_COLOR: - /* We need the optional constA_reg register */ - setup_const_register(f, &constA_reg, blend_color->color[3]); - /* factor = Ac, so term = Afb*Ac */ - spe_fm(f, term2A_reg, fbA_reg, constA_reg); - break; - - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: /* fall through */ - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - /* We need the optional constA_reg register */ - setup_const_register(f, &constA_reg, blend_color->color[3]); - /* factor = 1-Ac, so term = Afb*(1-Ac) = Afb-Afb*Ac */ - /* fnms(a,b,c,d) computes a = d - b*c */ - spe_fnms(f, term2A_reg, fbA_reg, constA_reg, fbA_reg); - break; - - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: /* not supported for dest alpha */ - ASSERT(0); - break; - - /* These are special D3D cases involving a second color output - * from the fragment shader. I'm not sure we can support them - * yet... XXX - */ - case PIPE_BLENDFACTOR_SRC1_COLOR: - case PIPE_BLENDFACTOR_SRC1_ALPHA: - case PIPE_BLENDFACTOR_INV_SRC1_COLOR: - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: - default: - ASSERT(0); - } - - /* - * Combine Src/Dest RGB terms as per the blend equation. - */ - switch (blend->rt[0].rgb_func) { - case PIPE_BLEND_ADD: - spe_fa(f, fragR_reg, term1R_reg, term2R_reg); - spe_fa(f, fragG_reg, term1G_reg, term2G_reg); - spe_fa(f, fragB_reg, term1B_reg, term2B_reg); - break; - case PIPE_BLEND_SUBTRACT: - spe_fs(f, fragR_reg, term1R_reg, term2R_reg); - spe_fs(f, fragG_reg, term1G_reg, term2G_reg); - spe_fs(f, fragB_reg, term1B_reg, term2B_reg); - break; - case PIPE_BLEND_REVERSE_SUBTRACT: - spe_fs(f, fragR_reg, term2R_reg, term1R_reg); - spe_fs(f, fragG_reg, term2G_reg, term1G_reg); - spe_fs(f, fragB_reg, term2B_reg, term1B_reg); - break; - case PIPE_BLEND_MIN: - spe_float_min(f, fragR_reg, term1R_reg, term2R_reg); - spe_float_min(f, fragG_reg, term1G_reg, term2G_reg); - spe_float_min(f, fragB_reg, term1B_reg, term2B_reg); - break; - case PIPE_BLEND_MAX: - spe_float_max(f, fragR_reg, term1R_reg, term2R_reg); - spe_float_max(f, fragG_reg, term1G_reg, term2G_reg); - spe_float_max(f, fragB_reg, term1B_reg, term2B_reg); - break; - default: - ASSERT(0); - } - - /* - * Combine Src/Dest A term - */ - switch (blend->rt[0].alpha_func) { - case PIPE_BLEND_ADD: - spe_fa(f, fragA_reg, term1A_reg, term2A_reg); - break; - case PIPE_BLEND_SUBTRACT: - spe_fs(f, fragA_reg, term1A_reg, term2A_reg); - break; - case PIPE_BLEND_REVERSE_SUBTRACT: - spe_fs(f, fragA_reg, term2A_reg, term1A_reg); - break; - case PIPE_BLEND_MIN: - spe_float_min(f, fragA_reg, term1A_reg, term2A_reg); - break; - case PIPE_BLEND_MAX: - spe_float_max(f, fragA_reg, term1A_reg, term2A_reg); - break; - default: - ASSERT(0); - } - - spe_release_register(f, term1R_reg); - spe_release_register(f, term1G_reg); - spe_release_register(f, term1B_reg); - spe_release_register(f, term1A_reg); - - spe_release_register(f, term2R_reg); - spe_release_register(f, term2G_reg); - spe_release_register(f, term2B_reg); - spe_release_register(f, term2A_reg); - - spe_release_register(f, fbR_reg); - spe_release_register(f, fbG_reg); - spe_release_register(f, fbB_reg); - spe_release_register(f, fbA_reg); - - spe_release_register(f, tmp_reg); - - /* Free any optional registers that actually got used */ - release_const_register(f, one_reg); - release_const_register(f, constR_reg); - release_const_register(f, constG_reg); - release_const_register(f, constB_reg); - release_const_register(f, constA_reg); -} - - -static void -gen_logicop(const struct pipe_blend_state *blend, - struct spe_function *f, - int fragRGBA_reg, int fbRGBA_reg) -{ - /* We've got four 32-bit RGBA packed pixels in each of - * fragRGBA_reg and fbRGBA_reg, not sets of floating-point - * reds, greens, blues, and alphas. - * */ - ASSERT(blend->logicop_enable); - - switch(blend->logicop_func) { - case PIPE_LOGICOP_CLEAR: /* 0 */ - spe_zero(f, fragRGBA_reg); - break; - case PIPE_LOGICOP_NOR: /* ~(s | d) */ - spe_nor(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); - break; - case PIPE_LOGICOP_AND_INVERTED: /* ~s & d */ - /* andc R, A, B computes R = A & ~B */ - spe_andc(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg); - break; - case PIPE_LOGICOP_COPY_INVERTED: /* ~s */ - spe_complement(f, fragRGBA_reg, fragRGBA_reg); - break; - case PIPE_LOGICOP_AND_REVERSE: /* s & ~d */ - /* andc R, A, B computes R = A & ~B */ - spe_andc(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); - break; - case PIPE_LOGICOP_INVERT: /* ~d */ - /* Note that (A nor A) == ~(A|A) == ~A */ - spe_nor(f, fragRGBA_reg, fbRGBA_reg, fbRGBA_reg); - break; - case PIPE_LOGICOP_XOR: /* s ^ d */ - spe_xor(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); - break; - case PIPE_LOGICOP_NAND: /* ~(s & d) */ - spe_nand(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); - break; - case PIPE_LOGICOP_AND: /* s & d */ - spe_and(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); - break; - case PIPE_LOGICOP_EQUIV: /* ~(s ^ d) */ - spe_xor(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); - spe_complement(f, fragRGBA_reg, fragRGBA_reg); - break; - case PIPE_LOGICOP_NOOP: /* d */ - spe_move(f, fragRGBA_reg, fbRGBA_reg); - break; - case PIPE_LOGICOP_OR_INVERTED: /* ~s | d */ - /* orc R, A, B computes R = A | ~B */ - spe_orc(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg); - break; - case PIPE_LOGICOP_COPY: /* s */ - break; - case PIPE_LOGICOP_OR_REVERSE: /* s | ~d */ - /* orc R, A, B computes R = A | ~B */ - spe_orc(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); - break; - case PIPE_LOGICOP_OR: /* s | d */ - spe_or(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); - break; - case PIPE_LOGICOP_SET: /* 1 */ - spe_load_int(f, fragRGBA_reg, 0xffffffff); - break; - default: - ASSERT(0); - } -} - - -/** - * Generate code to pack a quad of float colors into four 32-bit integers. - * - * \param f SPE function to append instruction onto. - * \param color_format the dest color packing format - * \param r_reg register containing four red values (in/clobbered) - * \param g_reg register containing four green values (in/clobbered) - * \param b_reg register containing four blue values (in/clobbered) - * \param a_reg register containing four alpha values (in/clobbered) - * \param rgba_reg register to store the packed RGBA colors (out) - */ -static void -gen_pack_colors(struct spe_function *f, - enum pipe_format color_format, - int r_reg, int g_reg, int b_reg, int a_reg, - int rgba_reg) -{ - int rg_reg = spe_allocate_available_register(f); - int ba_reg = spe_allocate_available_register(f); - - /* Convert float[4] in [0.0,1.0] to int[4] in [0,~0], with clamping */ - spe_cfltu(f, r_reg, r_reg, 32); - spe_cfltu(f, g_reg, g_reg, 32); - spe_cfltu(f, b_reg, b_reg, 32); - spe_cfltu(f, a_reg, a_reg, 32); - - /* Shift the most significant bytes to the least significant positions. - * I.e.: reg = reg >> 24 - */ - spe_rotmi(f, r_reg, r_reg, -24); - spe_rotmi(f, g_reg, g_reg, -24); - spe_rotmi(f, b_reg, b_reg, -24); - spe_rotmi(f, a_reg, a_reg, -24); - - /* Shift the color bytes according to the surface format */ - if (color_format == PIPE_FORMAT_B8G8R8A8_UNORM) { - spe_roti(f, g_reg, g_reg, 8); /* green <<= 8 */ - spe_roti(f, r_reg, r_reg, 16); /* red <<= 16 */ - spe_roti(f, a_reg, a_reg, 24); /* alpha <<= 24 */ - } - else if (color_format == PIPE_FORMAT_A8R8G8B8_UNORM) { - spe_roti(f, r_reg, r_reg, 8); /* red <<= 8 */ - spe_roti(f, g_reg, g_reg, 16); /* green <<= 16 */ - spe_roti(f, b_reg, b_reg, 24); /* blue <<= 24 */ - } - else { - ASSERT(0); - } - - /* Merge red, green, blue, alpha registers to make packed RGBA colors. - * Eg: after shifting according to color_format we might have: - * R = {0x00ff0000, 0x00110000, 0x00220000, 0x00330000} - * G = {0x0000ff00, 0x00004400, 0x00005500, 0x00006600} - * B = {0x000000ff, 0x00000077, 0x00000088, 0x00000099} - * A = {0xff000000, 0xaa000000, 0xbb000000, 0xcc000000} - * OR-ing all those together gives us four packed colors: - * RGBA = {0xffffffff, 0xaa114477, 0xbb225588, 0xcc336699} - */ - spe_or(f, rg_reg, r_reg, g_reg); - spe_or(f, ba_reg, a_reg, b_reg); - spe_or(f, rgba_reg, rg_reg, ba_reg); - - spe_release_register(f, rg_reg); - spe_release_register(f, ba_reg); -} - - -static void -gen_colormask(struct spe_function *f, - uint colormask, - enum pipe_format color_format, - int fragRGBA_reg, int fbRGBA_reg) -{ - /* We've got four 32-bit RGBA packed pixels in each of - * fragRGBA_reg and fbRGBA_reg, not sets of floating-point - * reds, greens, blues, and alphas. Further, the pixels - * are packed according to the given color format, not - * necessarily RGBA... - */ - uint r_mask; - uint g_mask; - uint b_mask; - uint a_mask; - - /* Calculate exactly where the bits for any particular color - * end up, so we can mask them correctly. - */ - switch(color_format) { - case PIPE_FORMAT_B8G8R8A8_UNORM: - /* ARGB */ - a_mask = 0xff000000; - r_mask = 0x00ff0000; - g_mask = 0x0000ff00; - b_mask = 0x000000ff; - break; - case PIPE_FORMAT_A8R8G8B8_UNORM: - /* BGRA */ - b_mask = 0xff000000; - g_mask = 0x00ff0000; - r_mask = 0x0000ff00; - a_mask = 0x000000ff; - break; - default: - ASSERT(0); - } - - /* For each R, G, B, and A component we're supposed to mask out, - * clear its bits. Then our mask operation later will work - * as expected. - */ - if (!(colormask & PIPE_MASK_R)) { - r_mask = 0; - } - if (!(colormask & PIPE_MASK_G)) { - g_mask = 0; - } - if (!(colormask & PIPE_MASK_B)) { - b_mask = 0; - } - if (!(colormask & PIPE_MASK_A)) { - a_mask = 0; - } - - /* Get a temporary register to hold the mask that will be applied - * to the fragment - */ - int colormask_reg = spe_allocate_available_register(f); - - /* The actual mask we're going to use is an OR of the remaining R, G, B, - * and A masks. Load the result value into our temporary register. - */ - spe_load_uint(f, colormask_reg, r_mask | g_mask | b_mask | a_mask); - - /* Use the mask register to select between the fragment color - * values and the frame buffer color values. Wherever the - * mask has a 0 bit, the current frame buffer color should override - * the fragment color. Wherever the mask has a 1 bit, the - * fragment color should persevere. The Select Bits (selb rt, rA, rB, rM) - * instruction will select bits from its first operand rA wherever the - * the mask bits rM are 0, and from its second operand rB wherever the - * mask bits rM are 1. That means that the frame buffer color is the - * first operand, and the fragment color the second. - */ - spe_selb(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg, colormask_reg); - - /* Release the temporary register and we're done */ - spe_release_register(f, colormask_reg); -} - - -/** - * This function is annoyingly similar to gen_depth_test(), above, except - * that instead of comparing two varying values (i.e. fragment and buffer), - * we're comparing a varying value with a static value. As such, we have - * access to the Compare Immediate instructions where we don't in - * gen_depth_test(), which is what makes us very different. - * - * There's some added complexity if there's a non-trivial state->mask - * value; then stencil and reference both must be masked - * - * The return value in the stencil_pass_reg is a bitmask of valid - * fragments that also passed the stencil test. The bitmask of valid - * fragments that failed would be found in - * (fragment_mask_reg & ~stencil_pass_reg). - */ -static void -gen_stencil_test(struct spe_function *f, - const struct pipe_stencil_state *state, - const unsigned ref_value, - uint stencil_max_value, - int fragment_mask_reg, - int fbS_reg, - int stencil_pass_reg) -{ - /* Generate code that puts the set of passing fragments into the - * stencil_pass_reg register, taking into account whether each fragment - * was active to begin with. - */ - switch (state->func) { - case PIPE_FUNC_EQUAL: - if (state->valuemask == stencil_max_value) { - /* stencil_pass = fragment_mask & (s == reference) */ - spe_compare_equal_uint(f, stencil_pass_reg, fbS_reg, ref_value); - spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); - } - else { - /* stencil_pass = fragment_mask & ((s&mask) == (reference&mask)) */ - uint tmp_masked_stencil = spe_allocate_available_register(f); - spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask); - spe_compare_equal_uint(f, stencil_pass_reg, tmp_masked_stencil, - state->valuemask & ref_value); - spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); - spe_release_register(f, tmp_masked_stencil); - } - break; - - case PIPE_FUNC_NOTEQUAL: - if (state->valuemask == stencil_max_value) { - /* stencil_pass = fragment_mask & ~(s == reference) */ - spe_compare_equal_uint(f, stencil_pass_reg, fbS_reg, ref_value); - spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); - } - else { - /* stencil_pass = fragment_mask & ~((s&mask) == (reference&mask)) */ - int tmp_masked_stencil = spe_allocate_available_register(f); - spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask); - spe_compare_equal_uint(f, stencil_pass_reg, tmp_masked_stencil, - state->valuemask & ref_value); - spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); - spe_release_register(f, tmp_masked_stencil); - } - break; - - case PIPE_FUNC_LESS: - if (state->valuemask == stencil_max_value) { - /* stencil_pass = fragment_mask & (reference < s) */ - spe_compare_greater_uint(f, stencil_pass_reg, fbS_reg, ref_value); - spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); - } - else { - /* stencil_pass = fragment_mask & ((reference&mask) < (s & mask)) */ - int tmp_masked_stencil = spe_allocate_available_register(f); - spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask); - spe_compare_greater_uint(f, stencil_pass_reg, tmp_masked_stencil, - state->valuemask & ref_value); - spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); - spe_release_register(f, tmp_masked_stencil); - } - break; - - case PIPE_FUNC_GREATER: - if (state->valuemask == stencil_max_value) { - /* stencil_pass = fragment_mask & (reference > s) */ - /* There's no convenient Compare Less Than Immediate instruction, so - * we'll have to do this one the harder way, by loading a register and - * comparing directly. Compare Logical Greater Than Word (clgt) - * treats its operands as unsigned - no sign extension. - */ - int tmp_reg = spe_allocate_available_register(f); - spe_load_uint(f, tmp_reg, ref_value); - spe_clgt(f, stencil_pass_reg, tmp_reg, fbS_reg); - spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); - spe_release_register(f, tmp_reg); - } - else { - /* stencil_pass = fragment_mask & ((reference&mask) > (s&mask)) */ - int tmp_reg = spe_allocate_available_register(f); - int tmp_masked_stencil = spe_allocate_available_register(f); - spe_load_uint(f, tmp_reg, state->valuemask & ref_value); - spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask); - spe_clgt(f, stencil_pass_reg, tmp_reg, tmp_masked_stencil); - spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); - spe_release_register(f, tmp_reg); - spe_release_register(f, tmp_masked_stencil); - } - break; - - case PIPE_FUNC_GEQUAL: - if (state->valuemask == stencil_max_value) { - /* stencil_pass = fragment_mask & (reference >= s) - * = fragment_mask & ~(s > reference) */ - spe_compare_greater_uint(f, stencil_pass_reg, fbS_reg, - ref_value); - spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); - } - else { - /* stencil_pass = fragment_mask & ~((s&mask) > (reference&mask)) */ - int tmp_masked_stencil = spe_allocate_available_register(f); - spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask); - spe_compare_greater_uint(f, stencil_pass_reg, tmp_masked_stencil, - state->valuemask & ref_value); - spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); - spe_release_register(f, tmp_masked_stencil); - } - break; - - case PIPE_FUNC_LEQUAL: - if (state->valuemask == stencil_max_value) { - /* stencil_pass = fragment_mask & (reference <= s) ] - * = fragment_mask & ~(reference > s) */ - /* As above, we have to do this by loading a register */ - int tmp_reg = spe_allocate_available_register(f); - spe_load_uint(f, tmp_reg, ref_value); - spe_clgt(f, stencil_pass_reg, tmp_reg, fbS_reg); - spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); - spe_release_register(f, tmp_reg); - } - else { - /* stencil_pass = fragment_mask & ~((reference&mask) > (s&mask)) */ - int tmp_reg = spe_allocate_available_register(f); - int tmp_masked_stencil = spe_allocate_available_register(f); - spe_load_uint(f, tmp_reg, ref_value & state->valuemask); - spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask); - spe_clgt(f, stencil_pass_reg, tmp_reg, tmp_masked_stencil); - spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); - spe_release_register(f, tmp_reg); - spe_release_register(f, tmp_masked_stencil); - } - break; - - case PIPE_FUNC_NEVER: - /* stencil_pass = fragment_mask & 0 = 0 */ - spe_load_uint(f, stencil_pass_reg, 0); - break; - - case PIPE_FUNC_ALWAYS: - /* stencil_pass = fragment_mask & 1 = fragment_mask */ - spe_move(f, stencil_pass_reg, fragment_mask_reg); - break; - } - - /* The fragments that passed the stencil test are now in stencil_pass_reg. - * The fragments that failed would be (fragment_mask_reg & ~stencil_pass_reg). - */ -} - - -/** - * This function generates code that calculates a set of new stencil values - * given the earlier values and the operation to apply. It does not - * apply any tests. It is intended to be called up to 3 times - * (for the stencil fail operation, for the stencil pass-z fail operation, - * and for the stencil pass-z pass operation) to collect up to three - * possible sets of values, and for the caller to combine them based - * on the result of the tests. - * - * stencil_max_value should be (2^n - 1) where n is the number of bits - * in the stencil buffer - in other words, it should be usable as a mask. - */ -static void -gen_stencil_values(struct spe_function *f, - uint stencil_op, - uint stencil_ref_value, - uint stencil_max_value, - int fbS_reg, - int newS_reg) -{ - /* The code below assumes that newS_reg and fbS_reg are not the same - * register; if they can be, the calculations below will have to use - * an additional temporary register. For now, mark the assumption - * with an assertion that will fail if they are the same. - */ - ASSERT(fbS_reg != newS_reg); - - /* The code also assumes that the stencil_max_value is of the form - * 2^n-1 and can therefore be used as a mask for the valid bits in - * addition to a maximum. Make sure this is the case as well. - * The clever math below exploits the fact that incrementing a - * binary number serves to flip all the bits of a number starting at - * the LSB and continuing to (and including) the first zero bit - * found. That means that a number and its increment will always - * have at least one bit in common (the high order bit, if nothing - * else) *unless* the number is zero, *or* the number is of a form - * consisting of some number of 1s in the low-order bits followed - * by nothing but 0s in the high-order bits. The latter case - * implies it's of the form 2^n-1. - */ - ASSERT(stencil_max_value > 0 && ((stencil_max_value + 1) & stencil_max_value) == 0); - - switch(stencil_op) { - case PIPE_STENCIL_OP_KEEP: - /* newS = S */ - spe_move(f, newS_reg, fbS_reg); - break; - - case PIPE_STENCIL_OP_ZERO: - /* newS = 0 */ - spe_zero(f, newS_reg); - break; - - case PIPE_STENCIL_OP_REPLACE: - /* newS = stencil reference value */ - spe_load_uint(f, newS_reg, stencil_ref_value); - break; - - case PIPE_STENCIL_OP_INCR: { - /* newS = (s == max ? max : s + 1) */ - int equals_reg = spe_allocate_available_register(f); - - spe_compare_equal_uint(f, equals_reg, fbS_reg, stencil_max_value); - /* Add Word Immediate computes rT = rA + 10-bit signed immediate */ - spe_ai(f, newS_reg, fbS_reg, 1); - /* Select from the current value or the new value based on the equality test */ - spe_selb(f, newS_reg, newS_reg, fbS_reg, equals_reg); - - spe_release_register(f, equals_reg); - break; - } - case PIPE_STENCIL_OP_DECR: { - /* newS = (s == 0 ? 0 : s - 1) */ - int equals_reg = spe_allocate_available_register(f); - - spe_compare_equal_uint(f, equals_reg, fbS_reg, 0); - /* Add Word Immediate with a (-1) value works */ - spe_ai(f, newS_reg, fbS_reg, -1); - /* Select from the current value or the new value based on the equality test */ - spe_selb(f, newS_reg, newS_reg, fbS_reg, equals_reg); - - spe_release_register(f, equals_reg); - break; - } - case PIPE_STENCIL_OP_INCR_WRAP: - /* newS = (s == max ? 0 : s + 1), but since max is 2^n-1, we can - * do a normal add and mask off the correct bits - */ - spe_ai(f, newS_reg, fbS_reg, 1); - spe_and_uint(f, newS_reg, newS_reg, stencil_max_value); - break; - - case PIPE_STENCIL_OP_DECR_WRAP: - /* newS = (s == 0 ? max : s - 1), but we'll pull the same mask trick as above */ - spe_ai(f, newS_reg, fbS_reg, -1); - spe_and_uint(f, newS_reg, newS_reg, stencil_max_value); - break; - - case PIPE_STENCIL_OP_INVERT: - /* newS = ~s. We take advantage of the mask/max value to invert only - * the valid bits for the field so we don't have to do an extra "and". - */ - spe_xor_uint(f, newS_reg, fbS_reg, stencil_max_value); - break; - - default: - ASSERT(0); - } -} - - -/** - * This function generates code to get all the necessary possible - * stencil values. For each of the output registers (fail_reg, - * zfail_reg, and zpass_reg), it either allocates a new register - * and calculates a new set of values based on the stencil operation, - * or it reuses a register allocation and calculation done for an - * earlier (matching) operation, or it reuses the fbS_reg register - * (if the stencil operation is KEEP, which doesn't change the - * stencil buffer). - * - * Since this function allocates a variable number of registers, - * to avoid incurring complex logic to free them, they should - * be allocated after a spe_allocate_register_set() call - * and released by the corresponding spe_release_register_set() call. - */ -static void -gen_get_stencil_values(struct spe_function *f, - const struct pipe_stencil_state *stencil, - const unsigned ref_value, - const uint depth_enabled, - int fbS_reg, - int *fail_reg, - int *zfail_reg, - int *zpass_reg) -{ - uint zfail_op; - - /* Stenciling had better be enabled here */ - ASSERT(stencil->enabled); - - /* If the depth test is not enabled, it is treated as though it always - * passes, which means that the zfail_op is not considered - a - * failing stencil test triggers the fail_op, and a passing one - * triggers the zpass_op - * - * As an optimization, override calculation of the zfail_op values - * if they aren't going to be used. By setting the value of - * the operation to PIPE_STENCIL_OP_KEEP, its value will be assumed - * to match the incoming stencil values, and no calculation will - * be done. - */ - if (depth_enabled) { - zfail_op = stencil->zfail_op; - } - else { - zfail_op = PIPE_STENCIL_OP_KEEP; - } - - /* One-sided or front-facing stencil */ - if (stencil->fail_op == PIPE_STENCIL_OP_KEEP) { - *fail_reg = fbS_reg; - } - else { - *fail_reg = spe_allocate_available_register(f); - gen_stencil_values(f, stencil->fail_op, ref_value, - 0xff, fbS_reg, *fail_reg); - } - - /* Check the possibly overridden value, not the structure value */ - if (zfail_op == PIPE_STENCIL_OP_KEEP) { - *zfail_reg = fbS_reg; - } - else if (zfail_op == stencil->fail_op) { - *zfail_reg = *fail_reg; - } - else { - *zfail_reg = spe_allocate_available_register(f); - gen_stencil_values(f, stencil->zfail_op, ref_value, - 0xff, fbS_reg, *zfail_reg); - } - - if (stencil->zpass_op == PIPE_STENCIL_OP_KEEP) { - *zpass_reg = fbS_reg; - } - else if (stencil->zpass_op == stencil->fail_op) { - *zpass_reg = *fail_reg; - } - else if (stencil->zpass_op == zfail_op) { - *zpass_reg = *zfail_reg; - } - else { - *zpass_reg = spe_allocate_available_register(f); - gen_stencil_values(f, stencil->zpass_op, ref_value, - 0xff, fbS_reg, *zpass_reg); - } -} - -/** - * Note that fbZ_reg may *not* be set on entry, if in fact - * the depth test is not enabled. This function must not use - * the register if depth is not enabled. - */ -static boolean -gen_stencil_depth_test(struct spe_function *f, - const struct pipe_depth_stencil_alpha_state *dsa, - const struct pipe_stencil_ref *stencil_ref, - const uint facing, - const int mask_reg, const int fragZ_reg, - const int fbZ_reg, const int fbS_reg) -{ - /* True if we've generated code that could require writeback to the - * depth and/or stencil buffers - */ - boolean modified_buffers = FALSE; - - boolean need_to_calculate_stencil_values; - boolean need_to_writemask_stencil_values; - - struct pipe_stencil_state *stencil; - - /* Registers. We may or may not actually allocate these, depending - * on whether the state values indicate that we need them. - */ - int stencil_pass_reg, stencil_fail_reg; - int stencil_fail_values, stencil_pass_depth_fail_values, stencil_pass_depth_pass_values; - int stencil_writemask_reg; - int zmask_reg; - int newS_reg; - unsigned ref_value; - - /* Stenciling is quite complex: up to six different configurable stencil - * operations/calculations can be required (three each for front-facing - * and back-facing fragments). Many of those operations will likely - * be identical, so there's good reason to try to avoid calculating - * the same values more than once (which unfortunately makes the code less - * straightforward). - * - * To make register management easier, we start a new - * register set; we can release all the registers in the set at - * once, and avoid having to keep track of exactly which registers - * we allocate. We can still allocate and free registers as - * desired (if we know we no longer need a register), but we don't - * have to spend the complexity to track the more difficult variant - * register usage scenarios. - */ - spe_comment(f, 0, "Allocating stencil register set"); - spe_allocate_register_set(f); - - /* The facing we're given is the fragment facing; it doesn't - * exactly match the stencil facing. If stencil is enabled, - * but two-sided stencil is *not* enabled, we use the same - * stencil settings for both front- and back-facing fragments. - * We only use the "back-facing" stencil for backfacing fragments - * if two-sided stenciling is enabled. - */ - if (facing == CELL_FACING_BACK && dsa->stencil[1].enabled) { - stencil = &dsa->stencil[1]; - ref_value = stencil_ref->ref_value[1]; - } - else { - stencil = &dsa->stencil[0]; - ref_value = stencil_ref->ref_value[0]; - } - - /* Calculate the writemask. If the writemask is trivial (either - * all 0s, meaning that we don't need to calculate any stencil values - * because they're not going to change the stencil anyway, or all 1s, - * meaning that we have to calculate the stencil values but do not - * need to mask them), we can avoid generating code. Don't forget - * that we need to consider backfacing stencil, if enabled. - * - * Note that if the backface stencil is *not* enabled, the backface - * stencil will have the same values as the frontface stencil. - */ - if (stencil->fail_op == PIPE_STENCIL_OP_KEEP && - stencil->zfail_op == PIPE_STENCIL_OP_KEEP && - stencil->zpass_op == PIPE_STENCIL_OP_KEEP) { - need_to_calculate_stencil_values = FALSE; - need_to_writemask_stencil_values = FALSE; - } - else if (stencil->writemask == 0x0) { - /* All changes are writemasked out, so no need to calculate - * what those changes might be, and no need to write anything back. - */ - need_to_calculate_stencil_values = FALSE; - need_to_writemask_stencil_values = FALSE; - } - else if (stencil->writemask == 0xff) { - /* Still trivial, but a little less so. We need to write the stencil - * values, but we don't need to mask them. - */ - need_to_calculate_stencil_values = TRUE; - need_to_writemask_stencil_values = FALSE; - } - else { - /* The general case: calculate, mask, and write */ - need_to_calculate_stencil_values = TRUE; - need_to_writemask_stencil_values = TRUE; - - /* While we're here, generate code that calculates what the - * writemask should be. If backface stenciling is enabled, - * and the backface writemask is not the same as the frontface - * writemask, we'll have to generate code that merges the - * two masks into a single effective mask based on fragment facing. - */ - spe_comment(f, 0, "Computing stencil writemask"); - stencil_writemask_reg = spe_allocate_available_register(f); - spe_load_uint(f, stencil_writemask_reg, dsa->stencil[facing].writemask); - } - - /* At least one-sided stenciling must be on. Generate code that - * runs the stencil test on the basic/front-facing stencil, leaving - * the mask of passing stencil bits in stencil_pass_reg. This mask will - * be used both to mask the set of active pixels, and also to - * determine how the stencil buffer changes. - * - * This test will *not* change the value in mask_reg (because we don't - * yet know whether to apply the two-sided stencil or one-sided stencil). - */ - spe_comment(f, 0, "Running basic stencil test"); - stencil_pass_reg = spe_allocate_available_register(f); - gen_stencil_test(f, stencil, ref_value, 0xff, mask_reg, fbS_reg, stencil_pass_reg); - - /* Generate code that, given the mask of valid fragments and the - * mask of valid fragments that passed the stencil test, computes - * the mask of valid fragments that failed the stencil test. We - * have to do this before we run a depth test (because the - * depth test should not be performed on fragments that failed the - * stencil test, and because the depth test will update the - * mask of valid fragments based on the results of the depth test). - */ - spe_comment(f, 0, "Computing stencil fail mask and updating fragment mask"); - stencil_fail_reg = spe_allocate_available_register(f); - spe_andc(f, stencil_fail_reg, mask_reg, stencil_pass_reg); - /* Now remove the stenciled-out pixels from the valid fragment mask, - * so we can later use the valid fragment mask in the depth test. - */ - spe_and(f, mask_reg, mask_reg, stencil_pass_reg); - - /* We may not need to calculate stencil values, if the writemask is off */ - if (need_to_calculate_stencil_values) { - /* Generate code that calculates exactly which stencil values we need, - * without calculating the same value twice (say, if two different - * stencil ops have the same value). This code will work for one-sided - * and two-sided stenciling (so that we take into account that operations - * may match between front and back stencils), and will also take into - * account whether the depth test is enabled (if the depth test is off, - * we don't need any of the zfail results, because the depth test always - * is considered to pass if it is disabled). Any register value that - * does not need to be calculated will come back with the same value - * that's in fbS_reg. - * - * This function will allocate a variant number of registers that - * will be released as part of the register set. - */ - spe_comment(f, 0, facing == CELL_FACING_FRONT - ? "Computing front-facing stencil values" - : "Computing back-facing stencil values"); - gen_get_stencil_values(f, stencil, ref_value, dsa->depth.enabled, fbS_reg, - &stencil_fail_values, &stencil_pass_depth_fail_values, - &stencil_pass_depth_pass_values); - } - - /* We now have all the stencil values we need. We also need - * the results of the depth test to figure out which - * stencil values will become the new stencil values. (Even if - * we aren't actually calculating stencil values, we need to apply - * the depth test if it's enabled.) - * - * The code generated by gen_depth_test() returns the results of the - * test in the given register, but also alters the mask_reg based - * on the results of the test. - */ - if (dsa->depth.enabled) { - spe_comment(f, 0, "Running stencil depth test"); - zmask_reg = spe_allocate_available_register(f); - modified_buffers |= gen_depth_test(f, dsa, mask_reg, fragZ_reg, - fbZ_reg, zmask_reg); - } - - if (need_to_calculate_stencil_values) { - - /* If we need to writemask the stencil values before going into - * the stencil buffer, we'll have to use a new register to - * hold the new values. If not, we can just keep using the - * current register. - */ - if (need_to_writemask_stencil_values) { - newS_reg = spe_allocate_available_register(f); - spe_comment(f, 0, "Saving current stencil values for writemasking"); - spe_move(f, newS_reg, fbS_reg); - } - else { - newS_reg = fbS_reg; - } - - /* Merge in the selected stencil fail values */ - if (stencil_fail_values != fbS_reg) { - spe_comment(f, 0, "Loading stencil fail values"); - spe_selb(f, newS_reg, newS_reg, stencil_fail_values, stencil_fail_reg); - modified_buffers = TRUE; - } - - /* Same for the stencil pass/depth fail values. If this calculation - * is not needed (say, if depth test is off), then the - * stencil_pass_depth_fail_values register will be equal to fbS_reg - * and we'll skip the calculation. - */ - if (stencil_pass_depth_fail_values != fbS_reg) { - /* We don't actually have a stencil pass/depth fail mask yet. - * Calculate it here from the stencil passing mask and the - * depth passing mask. Note that zmask_reg *must* have been - * set above if we're here. - */ - uint stencil_pass_depth_fail_mask = - spe_allocate_available_register(f); - - spe_comment(f, 0, "Loading stencil pass/depth fail values"); - spe_andc(f, stencil_pass_depth_fail_mask, stencil_pass_reg, zmask_reg); - - spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_fail_values, - stencil_pass_depth_fail_mask); - - spe_release_register(f, stencil_pass_depth_fail_mask); - modified_buffers = TRUE; - } - - /* Same for the stencil pass/depth pass mask. Note that we - * *can* get here with zmask_reg being unset (if the depth - * test is off but the stencil test is on). In this case, - * we assume the depth test passes, and don't need to mask - * the stencil pass mask with the Z mask. - */ - if (stencil_pass_depth_pass_values != fbS_reg) { - if (dsa->depth.enabled) { - uint stencil_pass_depth_pass_mask = spe_allocate_available_register(f); - /* We'll need a separate register */ - spe_comment(f, 0, "Loading stencil pass/depth pass values"); - spe_and(f, stencil_pass_depth_pass_mask, stencil_pass_reg, zmask_reg); - spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_pass_values, stencil_pass_depth_pass_mask); - spe_release_register(f, stencil_pass_depth_pass_mask); - } - else { - /* We can use the same stencil-pass register */ - spe_comment(f, 0, "Loading stencil pass values"); - spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_pass_values, stencil_pass_reg); - } - modified_buffers = TRUE; - } - - /* Almost done. If we need to writemask, do it now, leaving the - * results in the fbS_reg register passed in. If we don't need - * to writemask, then the results are *already* in the fbS_reg, - * so there's nothing more to do. - */ - - if (need_to_writemask_stencil_values && modified_buffers) { - /* The Select Bytes command makes a fine writemask. Where - * the mask is 0, the first (original) values are retained, - * effectively masking out changes. Where the mask is 1, the - * second (new) values are retained, incorporating changes. - */ - spe_comment(f, 0, "Writemasking new stencil values"); - spe_selb(f, fbS_reg, fbS_reg, newS_reg, stencil_writemask_reg); - } - - } /* done calculating stencil values */ - - /* The stencil and/or depth values have been applied, and the - * mask_reg, fbS_reg, and fbZ_reg values have been updated. - * We're all done, except that we've allocated a fair number - * of registers that we didn't bother tracking. Release all - * those registers as part of the register set, and go home. - */ - spe_comment(f, 0, "Releasing stencil register set"); - spe_release_register_set(f); - - /* Return TRUE if we could have modified the stencil and/or - * depth buffers. - */ - return modified_buffers; -} - - -/** - * Generate depth and/or stencil test code. - * \param cell context - * \param dsa depth/stencil/alpha state - * \param f spe function to emit - * \param facing either CELL_FACING_FRONT or CELL_FACING_BACK - * \param mask_reg register containing the pixel alive/dead mask - * \param depth_tile_reg register containing address of z/stencil tile - * \param quad_offset_reg offset to quad from start of tile - * \param fragZ_reg register containg fragment Z values - */ -static void -gen_depth_stencil(struct cell_context *cell, - const struct pipe_depth_stencil_alpha_state *dsa, - const struct pipe_stencil_ref *stencil_ref, - struct spe_function *f, - uint facing, - int mask_reg, - int depth_tile_reg, - int quad_offset_reg, - int fragZ_reg) - -{ - const enum pipe_format zs_format = cell->framebuffer.zsbuf->format; - boolean write_depth_stencil; - - /* framebuffer's combined z/stencil values register */ - int fbZS_reg = spe_allocate_available_register(f); - - /* Framebufer Z values register */ - int fbZ_reg = spe_allocate_available_register(f); - - /* Framebuffer stencil values register (may not be used) */ - int fbS_reg = spe_allocate_available_register(f); - - /* 24-bit mask register (may not be used) */ - int zmask_reg = spe_allocate_available_register(f); - - /** - * The following code: - * 1. fetch quad of packed Z/S values from the framebuffer tile. - * 2. extract the separate the Z and S values from packed values - * 3. convert fragment Z values from float in [0,1] to 32/24/16-bit ints - * - * The instructions for doing this are interleaved for better performance. - */ - spe_comment(f, 0, "Fetch Z/stencil quad from tile"); - - switch(zs_format) { - case PIPE_FORMAT_Z24_UNORM_S8_UINT: /* fall through */ - case PIPE_FORMAT_Z24X8_UNORM: - /* prepare mask to extract Z vals from ZS vals */ - spe_load_uint(f, zmask_reg, 0x00ffffff); - - /* convert fragment Z from [0,1] to 32-bit ints */ - spe_cfltu(f, fragZ_reg, fragZ_reg, 32); - - /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */ - spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg); - - /* right shift 32-bit fragment Z to 24 bits */ - spe_rotmi(f, fragZ_reg, fragZ_reg, -8); - - /* extract 24-bit Z values from ZS values by masking */ - spe_and(f, fbZ_reg, fbZS_reg, zmask_reg); - - /* extract 8-bit stencil values by shifting */ - spe_rotmi(f, fbS_reg, fbZS_reg, -24); - break; - - case PIPE_FORMAT_S8_UINT_Z24_UNORM: /* fall through */ - case PIPE_FORMAT_X8Z24_UNORM: - /* convert fragment Z from [0,1] to 32-bit ints */ - spe_cfltu(f, fragZ_reg, fragZ_reg, 32); - - /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */ - spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg); - - /* right shift 32-bit fragment Z to 24 bits */ - spe_rotmi(f, fragZ_reg, fragZ_reg, -8); - - /* extract 24-bit Z values from ZS values by shifting */ - spe_rotmi(f, fbZ_reg, fbZS_reg, -8); - - /* extract 8-bit stencil values by masking */ - spe_and_uint(f, fbS_reg, fbZS_reg, 0x000000ff); - break; - - case PIPE_FORMAT_Z32_UNORM: - /* Load: fbZ_reg = memory[depth_tile_reg + offset_reg] */ - spe_lqx(f, fbZ_reg, depth_tile_reg, quad_offset_reg); - - /* convert fragment Z from [0,1] to 32-bit ints */ - spe_cfltu(f, fragZ_reg, fragZ_reg, 32); - - /* No stencil, so can't do anything there */ - break; - - case PIPE_FORMAT_Z16_UNORM: - /* XXX This code for 16bpp Z is broken! */ - - /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */ - spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg); - - /* Copy over 4 32-bit values */ - spe_move(f, fbZ_reg, fbZS_reg); - - /* convert Z from [0,1] to 16-bit ints */ - spe_cfltu(f, fragZ_reg, fragZ_reg, 32); - spe_rotmi(f, fragZ_reg, fragZ_reg, -16); - /* No stencil */ - break; - - default: - ASSERT(0); /* invalid format */ - } - - /* If stencil is enabled, use the stencil-specific code - * generator to generate both the stencil and depth (if needed) - * tests. Otherwise, if only depth is enabled, generate - * a quick depth test. The test generators themselves will - * report back whether the depth/stencil buffer has to be - * written back. - */ - if (dsa->stencil[0].enabled) { - /* This will perform the stencil and depth tests, and update - * the mask_reg, fbZ_reg, and fbS_reg as required by the - * tests. - */ - ASSERT(fbS_reg >= 0); - spe_comment(f, 0, "Perform stencil test"); - - /* Note that fbZ_reg may not be set on entry, if stenciling - * is enabled but there's no Z-buffer. The - * gen_stencil_depth_test() function must ignore the - * fbZ_reg register if depth is not enabled. - */ - write_depth_stencil = gen_stencil_depth_test(f, dsa, stencil_ref, facing, - mask_reg, fragZ_reg, - fbZ_reg, fbS_reg); - } - else if (dsa->depth.enabled) { - int zmask_reg = spe_allocate_available_register(f); - ASSERT(fbZ_reg >= 0); - spe_comment(f, 0, "Perform depth test"); - write_depth_stencil = gen_depth_test(f, dsa, mask_reg, fragZ_reg, - fbZ_reg, zmask_reg); - spe_release_register(f, zmask_reg); - } - else { - write_depth_stencil = FALSE; - } - - if (write_depth_stencil) { - /* Merge latest Z and Stencil values into fbZS_reg. - * fbZ_reg has four Z vals in bits [23..0] or bits [15..0]. - * fbS_reg has four 8-bit Z values in bits [7..0]. - */ - spe_comment(f, 0, "Store quad's depth/stencil values in tile"); - if (zs_format == PIPE_FORMAT_Z24_UNORM_S8_UINT || - zs_format == PIPE_FORMAT_Z24X8_UNORM) { - spe_shli(f, fbS_reg, fbS_reg, 24); /* fbS = fbS << 24 */ - spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */ - } - else if (zs_format == PIPE_FORMAT_S8_UINT_Z24_UNORM || - zs_format == PIPE_FORMAT_X8Z24_UNORM) { - spe_shli(f, fbZ_reg, fbZ_reg, 8); /* fbZ = fbZ << 8 */ - spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */ - } - else if (zs_format == PIPE_FORMAT_Z32_UNORM) { - spe_move(f, fbZS_reg, fbZ_reg); /* fbZS = fbZ */ - } - else if (zs_format == PIPE_FORMAT_Z16_UNORM) { - spe_move(f, fbZS_reg, fbZ_reg); /* fbZS = fbZ */ - } - else if (zs_format == PIPE_FORMAT_S8_UINT) { - ASSERT(0); /* XXX to do */ - } - else { - ASSERT(0); /* bad zs_format */ - } - - /* Store: memory[depth_tile_reg + quad_offset_reg] = fbZS */ - spe_stqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg); - } - - /* Don't need these any more */ - spe_release_register(f, fbZS_reg); - spe_release_register(f, fbZ_reg); - spe_release_register(f, fbS_reg); - spe_release_register(f, zmask_reg); -} - - - -/** - * Generate SPE code to implement the fragment operations (alpha test, - * depth test, stencil test, blending, colormask, and final - * framebuffer write) as specified by the current context state. - * - * Logically, this code will be called after running the fragment - * shader. But under some circumstances we could run some of this - * code before the fragment shader to cull fragments/quads that are - * totally occluded/discarded. - * - * XXX we only support PIPE_FORMAT_S8_UINT_Z24_UNORM z/stencil buffer right now. - * - * See the spu_default_fragment_ops() function to see how the per-fragment - * operations would be done with ordinary C code. - * The code we generate here though has no branches, is SIMD, etc and - * should be much faster. - * - * \param cell the rendering context (in) - * \param facing whether the generated code is for front-facing or - * back-facing fragments - * \param f the generated function (in/out); on input, the function - * must already have been initialized. On exit, whatever - * instructions within the generated function have had - * the fragment ops appended. - */ -void -cell_gen_fragment_function(struct cell_context *cell, - const uint facing, - struct spe_function *f) -{ - const struct pipe_depth_stencil_alpha_state *dsa = cell->depth_stencil; - const struct pipe_stencil_ref *stencil_ref = &cell->stencil_ref; - const struct pipe_blend_state *blend = cell->blend; - const struct pipe_blend_color *blend_color = &cell->blend_color; - const enum pipe_format color_format = cell->framebuffer.cbufs[0]->format; - - /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */ - const int x_reg = 3; /* uint */ - const int y_reg = 4; /* uint */ - const int color_tile_reg = 5; /* tile_t * */ - const int depth_tile_reg = 6; /* tile_t * */ - const int fragZ_reg = 7; /* vector float */ - const int fragR_reg = 8; /* vector float */ - const int fragG_reg = 9; /* vector float */ - const int fragB_reg = 10; /* vector float */ - const int fragA_reg = 11; /* vector float */ - const int mask_reg = 12; /* vector uint */ - - ASSERT(facing == CELL_FACING_FRONT || facing == CELL_FACING_BACK); - - /* offset of quad from start of tile - * XXX assuming 4-byte pixels for color AND Z/stencil!!!! - */ - int quad_offset_reg; - - int fbRGBA_reg; /**< framebuffer's RGBA colors for quad */ - - if (cell->debug_flags & CELL_DEBUG_ASM) { - spe_print_code(f, TRUE); - spe_indent(f, 8); - spe_comment(f, -4, facing == CELL_FACING_FRONT - ? "Begin front-facing per-fragment ops" - : "Begin back-facing per-fragment ops"); - } - - spe_allocate_register(f, x_reg); - spe_allocate_register(f, y_reg); - spe_allocate_register(f, color_tile_reg); - spe_allocate_register(f, depth_tile_reg); - spe_allocate_register(f, fragZ_reg); - spe_allocate_register(f, fragR_reg); - spe_allocate_register(f, fragG_reg); - spe_allocate_register(f, fragB_reg); - spe_allocate_register(f, fragA_reg); - spe_allocate_register(f, mask_reg); - - quad_offset_reg = spe_allocate_available_register(f); - fbRGBA_reg = spe_allocate_available_register(f); - - /* compute offset of quad from start of tile, in bytes */ - { - int x2_reg = spe_allocate_available_register(f); - int y2_reg = spe_allocate_available_register(f); - - ASSERT(TILE_SIZE == 32); - - spe_comment(f, 0, "Compute quad offset within tile"); - spe_rotmi(f, y2_reg, y_reg, -1); /* y2 = y / 2 */ - spe_rotmi(f, x2_reg, x_reg, -1); /* x2 = x / 2 */ - spe_shli(f, y2_reg, y2_reg, 4); /* y2 *= 16 */ - spe_a(f, quad_offset_reg, y2_reg, x2_reg); /* offset = y2 + x2 */ - spe_shli(f, quad_offset_reg, quad_offset_reg, 4); /* offset *= 16 */ - - spe_release_register(f, x2_reg); - spe_release_register(f, y2_reg); - } - - /* Generate the alpha test, if needed. */ - if (dsa->alpha.enabled) { - gen_alpha_test(dsa, f, mask_reg, fragA_reg); - } - - /* generate depth and/or stencil test code */ - if (dsa->depth.enabled || dsa->stencil[0].enabled) { - gen_depth_stencil(cell, dsa, stencil_ref, f, - facing, - mask_reg, - depth_tile_reg, - quad_offset_reg, - fragZ_reg); - } - - /* Get framebuffer quad/colors. We'll need these for blending, - * color masking, and to obey the quad/pixel mask. - * Load: fbRGBA_reg = memory[color_tile + quad_offset] - * Note: if mask={~0,~0,~0,~0} and we're not blending or colormasking - * we could skip this load. - */ - spe_comment(f, 0, "Fetch quad colors from tile"); - spe_lqx(f, fbRGBA_reg, color_tile_reg, quad_offset_reg); - - if (blend->rt[0].blend_enable) { - spe_comment(f, 0, "Perform blending"); - gen_blend(blend, blend_color, f, color_format, - fragR_reg, fragG_reg, fragB_reg, fragA_reg, fbRGBA_reg); - } - - /* - * Write fragment colors to framebuffer/tile. - * This involves converting the fragment colors from float[4] to the - * tile's specific format and obeying the quad/pixel mask. - */ - { - int rgba_reg = spe_allocate_available_register(f); - - /* Pack four float colors as four 32-bit int colors */ - spe_comment(f, 0, "Convert float quad colors to packed int framebuffer colors"); - gen_pack_colors(f, color_format, - fragR_reg, fragG_reg, fragB_reg, fragA_reg, - rgba_reg); - - if (blend->logicop_enable) { - spe_comment(f, 0, "Compute logic op"); - gen_logicop(blend, f, rgba_reg, fbRGBA_reg); - } - - if (blend->rt[0].colormask != PIPE_MASK_RGBA) { - spe_comment(f, 0, "Compute color mask"); - gen_colormask(f, blend->rt[0].colormask, color_format, rgba_reg, fbRGBA_reg); - } - - /* Mix fragment colors with framebuffer colors using the quad/pixel mask: - * if (mask[i]) - * rgba[i] = rgba[i]; - * else - * rgba[i] = framebuffer[i]; - */ - spe_selb(f, rgba_reg, fbRGBA_reg, rgba_reg, mask_reg); - - /* Store updated quad in tile: - * memory[color_tile + quad_offset] = rgba_reg; - */ - spe_comment(f, 0, "Store quad colors into color tile"); - spe_stqx(f, rgba_reg, color_tile_reg, quad_offset_reg); - - spe_release_register(f, rgba_reg); - } - - //printf("gen_fragment_ops nr instructions: %u\n", f->num_inst); - - spe_bi(f, SPE_REG_RA, 0, 0); /* return from function call */ - - spe_release_register(f, fbRGBA_reg); - spe_release_register(f, quad_offset_reg); - - if (cell->debug_flags & CELL_DEBUG_ASM) { - char buffer[1024]; - sprintf(buffer, "End %s-facing per-fragment ops: %d instructions", - facing == CELL_FACING_FRONT ? "front" : "back", f->num_inst); - spe_comment(f, -4, buffer); - } -} diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.h b/src/gallium/drivers/cell/ppu/cell_gen_fragment.h deleted file mode 100644 index 21b35d1faf..0000000000 --- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.h +++ /dev/null @@ -1,38 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef CELL_GEN_FRAGMENT_H -#define CELL_GEN_FRAGMENT_H - - -extern void -cell_gen_fragment_function(struct cell_context *cell, const uint facing, struct spe_function *f); - - -#endif /* CELL_GEN_FRAGMENT_H */ - diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c deleted file mode 100644 index 223adda48f..0000000000 --- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c +++ /dev/null @@ -1,473 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Authors: - * Keith Whitwell <keith@tungstengraphics.com> - * Brian Paul - */ - -#include "util/u_memory.h" -#include "util/u_inlines.h" -#include "draw/draw_context.h" -#include "cell_context.h" -#include "cell_flush.h" -#include "cell_pipe_state.h" -#include "cell_state.h" -#include "cell_texture.h" - - - -static void * -cell_create_blend_state(struct pipe_context *pipe, - const struct pipe_blend_state *blend) -{ - return mem_dup(blend, sizeof(*blend)); -} - - -static void -cell_bind_blend_state(struct pipe_context *pipe, void *blend) -{ - struct cell_context *cell = cell_context(pipe); - - draw_flush(cell->draw); - - cell->blend = (struct pipe_blend_state *) blend; - cell->dirty |= CELL_NEW_BLEND; -} - - -static void -cell_delete_blend_state(struct pipe_context *pipe, void *blend) -{ - FREE(blend); -} - - -static void -cell_set_blend_color(struct pipe_context *pipe, - const struct pipe_blend_color *blend_color) -{ - struct cell_context *cell = cell_context(pipe); - - draw_flush(cell->draw); - - cell->blend_color = *blend_color; - - cell->dirty |= CELL_NEW_BLEND; -} - - - - -static void * -cell_create_depth_stencil_alpha_state(struct pipe_context *pipe, - const struct pipe_depth_stencil_alpha_state *dsa) -{ - return mem_dup(dsa, sizeof(*dsa)); -} - - -static void -cell_bind_depth_stencil_alpha_state(struct pipe_context *pipe, - void *dsa) -{ - struct cell_context *cell = cell_context(pipe); - - draw_flush(cell->draw); - - cell->depth_stencil = (struct pipe_depth_stencil_alpha_state *) dsa; - cell->dirty |= CELL_NEW_DEPTH_STENCIL; -} - - -static void -cell_delete_depth_stencil_alpha_state(struct pipe_context *pipe, void *dsa) -{ - FREE(dsa); -} - - -static void -cell_set_stencil_ref(struct pipe_context *pipe, - const struct pipe_stencil_ref *stencil_ref) -{ - struct cell_context *cell = cell_context(pipe); - - draw_flush(cell->draw); - - cell->stencil_ref = *stencil_ref; - - cell->dirty |= CELL_NEW_DEPTH_STENCIL; -} - - -static void -cell_set_clip_state(struct pipe_context *pipe, - const struct pipe_clip_state *clip) -{ - struct cell_context *cell = cell_context(pipe); - - /* pass the clip state to the draw module */ - draw_set_clip_state(cell->draw, clip); -} - - -static void -cell_set_sample_mask(struct pipe_context *pipe, - unsigned sample_mask) -{ -} - - -/* Called when driver state tracker notices changes to the viewport - * matrix: - */ -static void -cell_set_viewport_state( struct pipe_context *pipe, - const struct pipe_viewport_state *viewport ) -{ - struct cell_context *cell = cell_context(pipe); - - cell->viewport = *viewport; /* struct copy */ - cell->dirty |= CELL_NEW_VIEWPORT; - - /* pass the viewport info to the draw module */ - draw_set_viewport_state(cell->draw, viewport); - - /* Using tnl/ and vf/ modules is temporary while getting started. - * Full pipe will have vertex shader, vertex fetch of its own. - */ -} - - -static void -cell_set_scissor_state( struct pipe_context *pipe, - const struct pipe_scissor_state *scissor ) -{ - struct cell_context *cell = cell_context(pipe); - - memcpy( &cell->scissor, scissor, sizeof(*scissor) ); - cell->dirty |= CELL_NEW_SCISSOR; -} - - -static void -cell_set_polygon_stipple( struct pipe_context *pipe, - const struct pipe_poly_stipple *stipple ) -{ - struct cell_context *cell = cell_context(pipe); - - memcpy( &cell->poly_stipple, stipple, sizeof(*stipple) ); - cell->dirty |= CELL_NEW_STIPPLE; -} - - - -static void * -cell_create_rasterizer_state(struct pipe_context *pipe, - const struct pipe_rasterizer_state *rasterizer) -{ - return mem_dup(rasterizer, sizeof(*rasterizer)); -} - - -static void -cell_bind_rasterizer_state(struct pipe_context *pipe, void *rast) -{ - struct pipe_rasterizer_state *rasterizer = - (struct pipe_rasterizer_state *) rast; - struct cell_context *cell = cell_context(pipe); - - /* pass-through to draw module */ - draw_set_rasterizer_state(cell->draw, rasterizer, rast); - - cell->rasterizer = rasterizer; - - cell->dirty |= CELL_NEW_RASTERIZER; -} - - -static void -cell_delete_rasterizer_state(struct pipe_context *pipe, void *rasterizer) -{ - FREE(rasterizer); -} - - - -static void * -cell_create_sampler_state(struct pipe_context *pipe, - const struct pipe_sampler_state *sampler) -{ - return mem_dup(sampler, sizeof(*sampler)); -} - - -static void -cell_bind_sampler_states(struct pipe_context *pipe, - unsigned num, void **samplers) -{ - struct cell_context *cell = cell_context(pipe); - uint i, changed = 0x0; - - assert(num <= CELL_MAX_SAMPLERS); - - draw_flush(cell->draw); - - for (i = 0; i < CELL_MAX_SAMPLERS; i++) { - struct pipe_sampler_state *new_samp = i < num ? samplers[i] : NULL; - if (cell->sampler[i] != new_samp) { - cell->sampler[i] = new_samp; - changed |= (1 << i); - } - } - - if (changed) { - cell->dirty |= CELL_NEW_SAMPLER; - cell->dirty_samplers |= changed; - } -} - - -static void -cell_delete_sampler_state(struct pipe_context *pipe, - void *sampler) -{ - FREE( sampler ); -} - - - -static void -cell_set_fragment_sampler_views(struct pipe_context *pipe, - unsigned num, - struct pipe_sampler_view **views) -{ - struct cell_context *cell = cell_context(pipe); - uint i, changed = 0x0; - - assert(num <= CELL_MAX_SAMPLERS); - - for (i = 0; i < CELL_MAX_SAMPLERS; i++) { - struct pipe_sampler_view *new_view = i < num ? views[i] : NULL; - struct pipe_sampler_view *old_view = cell->fragment_sampler_views[i]; - - if (old_view != new_view) { - struct pipe_resource *new_tex = new_view ? new_view->texture : NULL; - - pipe_sampler_view_reference(&cell->fragment_sampler_views[i], - new_view); - pipe_resource_reference((struct pipe_resource **) &cell->texture[i], - (struct pipe_resource *) new_tex); - - changed |= (1 << i); - } - } - - cell->num_textures = num; - - if (changed) { - cell->dirty |= CELL_NEW_TEXTURE; - cell->dirty_textures |= changed; - } -} - - -static struct pipe_sampler_view * -cell_create_sampler_view(struct pipe_context *pipe, - struct pipe_resource *texture, - const struct pipe_sampler_view *templ) -{ - struct pipe_sampler_view *view = CALLOC_STRUCT(pipe_sampler_view); - - if (view) { - *view = *templ; - view->reference.count = 1; - view->texture = NULL; - pipe_resource_reference(&view->texture, texture); - view->context = pipe; - } - - return view; -} - - -static void -cell_sampler_view_destroy(struct pipe_context *pipe, - struct pipe_sampler_view *view) -{ - pipe_resource_reference(&view->texture, NULL); - FREE(view); -} - - -/** - * Map color and z/stencil framebuffer surfaces. - */ -static void -cell_map_surfaces(struct cell_context *cell) -{ -#if 0 - struct pipe_screen *screen = cell->pipe.screen; -#endif - uint i; - - for (i = 0; i < 1; i++) { - struct pipe_surface *ps = cell->framebuffer.cbufs[i]; - if (ps) { - struct cell_resource *ct = cell_resource(ps->texture); -#if 0 - cell->cbuf_map[i] = screen->buffer_map(screen, - ct->buffer, - (PIPE_BUFFER_USAGE_GPU_READ | - PIPE_BUFFER_USAGE_GPU_WRITE)); -#else - cell->cbuf_map[i] = ct->data; -#endif - } - } - - { - struct pipe_surface *ps = cell->framebuffer.zsbuf; - if (ps) { - struct cell_resource *ct = cell_resource(ps->texture); -#if 0 - cell->zsbuf_map = screen->buffer_map(screen, - ct->buffer, - (PIPE_BUFFER_USAGE_GPU_READ | - PIPE_BUFFER_USAGE_GPU_WRITE)); -#else - cell->zsbuf_map = ct->data; -#endif - } - } -} - - -/** - * Unmap color and z/stencil framebuffer surfaces. - */ -static void -cell_unmap_surfaces(struct cell_context *cell) -{ - /*struct pipe_screen *screen = cell->pipe.screen;*/ - uint i; - - for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { - struct pipe_surface *ps = cell->framebuffer.cbufs[i]; - if (ps && cell->cbuf_map[i]) { - /*struct cell_resource *ct = cell_resource(ps->texture);*/ - assert(ps->texture); - /*assert(ct->buffer);*/ - - /*screen->buffer_unmap(screen, ct->buffer);*/ - cell->cbuf_map[i] = NULL; - } - } - - { - struct pipe_surface *ps = cell->framebuffer.zsbuf; - if (ps && cell->zsbuf_map) { - /*struct cell_resource *ct = cell_resource(ps->texture);*/ - /*screen->buffer_unmap(screen, ct->buffer);*/ - cell->zsbuf_map = NULL; - } - } -} - - -static void -cell_set_framebuffer_state(struct pipe_context *pipe, - const struct pipe_framebuffer_state *fb) -{ - struct cell_context *cell = cell_context(pipe); - - if (1 /*memcmp(&cell->framebuffer, fb, sizeof(*fb))*/) { - uint i; - - /* unmap old surfaces */ - cell_unmap_surfaces(cell); - - /* Finish any pending rendering to the current surface before - * installing a new surface! - */ - cell_flush_int(cell, CELL_FLUSH_WAIT); - - /* update my state - * (this is also where old surfaces will finally get freed) - */ - cell->framebuffer.width = fb->width; - cell->framebuffer.height = fb->height; - cell->framebuffer.nr_cbufs = fb->nr_cbufs; - for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { - pipe_surface_reference(&cell->framebuffer.cbufs[i], fb->cbufs[i]); - } - pipe_surface_reference(&cell->framebuffer.zsbuf, fb->zsbuf); - - /* map new surfaces */ - cell_map_surfaces(cell); - - cell->dirty |= CELL_NEW_FRAMEBUFFER; - } -} - - -void -cell_init_state_functions(struct cell_context *cell) -{ - cell->pipe.create_blend_state = cell_create_blend_state; - cell->pipe.bind_blend_state = cell_bind_blend_state; - cell->pipe.delete_blend_state = cell_delete_blend_state; - - cell->pipe.create_sampler_state = cell_create_sampler_state; - cell->pipe.bind_fragment_sampler_states = cell_bind_sampler_states; - cell->pipe.delete_sampler_state = cell_delete_sampler_state; - - cell->pipe.set_fragment_sampler_views = cell_set_fragment_sampler_views; - cell->pipe.create_sampler_view = cell_create_sampler_view; - cell->pipe.sampler_view_destroy = cell_sampler_view_destroy; - - cell->pipe.create_depth_stencil_alpha_state = cell_create_depth_stencil_alpha_state; - cell->pipe.bind_depth_stencil_alpha_state = cell_bind_depth_stencil_alpha_state; - cell->pipe.delete_depth_stencil_alpha_state = cell_delete_depth_stencil_alpha_state; - - cell->pipe.create_rasterizer_state = cell_create_rasterizer_state; - cell->pipe.bind_rasterizer_state = cell_bind_rasterizer_state; - cell->pipe.delete_rasterizer_state = cell_delete_rasterizer_state; - - cell->pipe.set_blend_color = cell_set_blend_color; - cell->pipe.set_stencil_ref = cell_set_stencil_ref; - cell->pipe.set_clip_state = cell_set_clip_state; - cell->pipe.set_sample_mask = cell_set_sample_mask; - - cell->pipe.set_framebuffer_state = cell_set_framebuffer_state; - - cell->pipe.set_polygon_stipple = cell_set_polygon_stipple; - cell->pipe.set_scissor_state = cell_set_scissor_state; - cell->pipe.set_viewport_state = cell_set_viewport_state; -} diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.h b/src/gallium/drivers/cell/ppu/cell_pipe_state.h deleted file mode 100644 index 1889bd52ff..0000000000 --- a/src/gallium/drivers/cell/ppu/cell_pipe_state.h +++ /dev/null @@ -1,39 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef CELL_PIPE_STATE_H -#define CELL_PIPE_STATE_H - - -struct cell_context; - -extern void -cell_init_state_functions(struct cell_context *cell); - - -#endif /* CELL_PIPE_STATE_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_public.h b/src/gallium/drivers/cell/ppu/cell_public.h deleted file mode 100644 index 7e2e093565..0000000000 --- a/src/gallium/drivers/cell/ppu/cell_public.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef CELL_PUBLIC_H -#define CELL_PUBLIC_H - -struct pipe_screen; -struct sw_winsys; - -struct pipe_screen * -cell_create_screen(struct sw_winsys *winsys); - -#endif diff --git a/src/gallium/drivers/cell/ppu/cell_render.c b/src/gallium/drivers/cell/ppu/cell_render.c deleted file mode 100644 index f648482c55..0000000000 --- a/src/gallium/drivers/cell/ppu/cell_render.c +++ /dev/null @@ -1,211 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * \brief Last stage of 'draw' pipeline: send tris to SPUs. - * \author Brian Paul - */ - -#include "cell_context.h" -#include "cell_render.h" -#include "cell_spu.h" -#include "util/u_memory.h" -#include "draw/draw_private.h" - - -struct render_stage { - struct draw_stage stage; /**< This must be first (base class) */ - - struct cell_context *cell; -}; - - -static INLINE struct render_stage * -render_stage(struct draw_stage *stage) -{ - return (struct render_stage *) stage; -} - - -static void render_begin( struct draw_stage *stage ) -{ -#if 0 - struct render_stage *render = render_stage(stage); - struct cell_context *sp = render->cell; - const struct pipe_shader_state *fs = &render->cell->fs->shader; - render->quad.nr_attrs = render->cell->nr_frag_attrs; - - render->firstFpInput = fs->input_semantic_name[0]; - - sp->quad.first->begin(sp->quad.first); -#endif -} - - -static void render_end( struct draw_stage *stage ) -{ -} - - -static void reset_stipple_counter( struct draw_stage *stage ) -{ - struct render_stage *render = render_stage(stage); - /*render->cell->line_stipple_counter = 0;*/ -} - - -static void -render_point(struct draw_stage *stage, struct prim_header *prim) -{ -} - - -static void -render_line(struct draw_stage *stage, struct prim_header *prim) -{ -} - - -/** Write a vertex into the prim buffer */ -static void -save_vertex(struct cell_prim_buffer *buf, uint pos, - const struct vertex_header *vert) -{ - uint attr, j; - - for (attr = 0; attr < 2; attr++) { - for (j = 0; j < 4; j++) { - buf->vertex[pos][attr][j] = vert->data[attr][j]; - } - } - - /* update bounding box */ - if (vert->data[0][0] < buf->xmin) - buf->xmin = vert->data[0][0]; - if (vert->data[0][0] > buf->xmax) - buf->xmax = vert->data[0][0]; - if (vert->data[0][1] < buf->ymin) - buf->ymin = vert->data[0][1]; - if (vert->data[0][1] > buf->ymax) - buf->ymax = vert->data[0][1]; -} - - -static void -render_tri(struct draw_stage *stage, struct prim_header *prim) -{ - struct render_stage *rs = render_stage(stage); - struct cell_context *cell = rs->cell; - struct cell_prim_buffer *buf = &cell->prim_buffer; - uint i; - - if (buf->num_verts + 3 > CELL_MAX_VERTS) { - cell_flush_prim_buffer(cell); - } - - i = buf->num_verts; - assert(i+2 <= CELL_MAX_VERTS); - save_vertex(buf, i+0, prim->v[0]); - save_vertex(buf, i+1, prim->v[1]); - save_vertex(buf, i+2, prim->v[2]); - buf->num_verts += 3; -} - - -/** - * Send the a RENDER command to all SPUs to have them render the prims - * in the current prim_buffer. - */ -void -cell_flush_prim_buffer(struct cell_context *cell) -{ - uint i; - - if (cell->prim_buffer.num_verts == 0) - return; - - for (i = 0; i < cell->num_spus; i++) { - struct cell_command_render *render = &cell_global.command[i].render; - render->prim_type = PIPE_PRIM_TRIANGLES; - render->num_verts = cell->prim_buffer.num_verts; - render->front_ccw = cell->rasterizer->front_ccw; - render->vertex_size = cell->vertex_info->size * 4; - render->xmin = cell->prim_buffer.xmin; - render->ymin = cell->prim_buffer.ymin; - render->xmax = cell->prim_buffer.xmax; - render->ymax = cell->prim_buffer.ymax; - render->vertex_data = &cell->prim_buffer.vertex; - ASSERT_ALIGN16(render->vertex_data); - send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_RENDER); - } - - cell->prim_buffer.num_verts = 0; - - cell->prim_buffer.xmin = 1e100; - cell->prim_buffer.ymin = 1e100; - cell->prim_buffer.xmax = -1e100; - cell->prim_buffer.ymax = -1e100; - - /* XXX temporary, need to double-buffer the prim buffer until we get - * a real command buffer/list system. - */ - cell_flush(&cell->pipe, 0x0); -} - - - -static void render_destroy( struct draw_stage *stage ) -{ - FREE( stage ); -} - - -/** - * Create a new draw/render stage. This will be plugged into the - * draw module as the last pipeline stage. - */ -struct draw_stage *cell_draw_render_stage( struct cell_context *cell ) -{ - struct render_stage *render = CALLOC_STRUCT(render_stage); - - render->cell = cell; - render->stage.draw = cell->draw; - render->stage.begin = render_begin; - render->stage.point = render_point; - render->stage.line = render_line; - render->stage.tri = render_tri; - render->stage.end = render_end; - render->stage.reset_stipple_counter = reset_stipple_counter; - render->stage.destroy = render_destroy; - - /* - render->quad.coef = render->coef; - render->quad.posCoef = &render->posCoef; - */ - - return &render->stage; -} diff --git a/src/gallium/drivers/cell/ppu/cell_render.h b/src/gallium/drivers/cell/ppu/cell_render.h deleted file mode 100644 index 826dcbafeb..0000000000 --- a/src/gallium/drivers/cell/ppu/cell_render.h +++ /dev/null @@ -1,39 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef CELL_RENDER_H -#define CELL_RENDER_H - -struct cell_context; -struct draw_stage; - -extern void -cell_flush_prim_buffer(struct cell_context *cell); - -extern struct draw_stage *cell_draw_render_stage( struct cell_context *cell ); - -#endif /* CELL_RENDER_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_screen.c b/src/gallium/drivers/cell/ppu/cell_screen.c deleted file mode 100644 index 7ffdcc51bb..0000000000 --- a/src/gallium/drivers/cell/ppu/cell_screen.c +++ /dev/null @@ -1,221 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include "util/u_memory.h" -#include "util/u_simple_screen.h" -#include "pipe/p_defines.h" -#include "pipe/p_screen.h" - -#include "cell/common.h" -#include "cell_context.h" -#include "cell_screen.h" -#include "cell_texture.h" -#include "cell_public.h" - -#include "state_tracker/sw_winsys.h" - - -static const char * -cell_get_vendor(struct pipe_screen *screen) -{ - return "VMware, Inc."; -} - - -static const char * -cell_get_name(struct pipe_screen *screen) -{ - return "Cell"; -} - - -static int -cell_get_param(struct pipe_screen *screen, enum pipe_cap param) -{ - switch (param) { - case PIPE_CAP_MAX_COMBINED_SAMPLERS: - return CELL_MAX_SAMPLERS; - case PIPE_CAP_NPOT_TEXTURES: - return 1; - case PIPE_CAP_TWO_SIDED_STENCIL: - return 1; - case PIPE_CAP_ANISOTROPIC_FILTER: - return 0; - case PIPE_CAP_POINT_SPRITE: - return 1; - case PIPE_CAP_MAX_RENDER_TARGETS: - return 1; - case PIPE_CAP_OCCLUSION_QUERY: - return 1; - case PIPE_CAP_TIMER_QUERY: - return 0; - case PIPE_CAP_TEXTURE_SHADOW_MAP: - return 10; - case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: - return CELL_MAX_TEXTURE_LEVELS; - case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - return 8; /* max 128x128x128 */ - case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: - return CELL_MAX_TEXTURE_LEVELS; - case PIPE_CAP_TEXTURE_MIRROR_CLAMP: - return 0; /* XXX to do */ - case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: - case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: - return 1; - case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: - case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: - return 0; - case PIPE_CAP_BLEND_EQUATION_SEPARATE: - return 1; - default: - return 0; - } -} - -static int -cell_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_shader_cap param) -{ - switch(shader) - { - case PIPE_SHADER_FRAGMENT: - switch (param) { - case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: - return CELL_MAX_SAMPLERS; - default: - return tgsi_exec_get_shader_param(param); - } - case PIPE_SHADER_VERTEX: - case PIPE_SHADER_GEOMETRY: - return draw_get_shader_param(shader, param); - default: - return 0; - } -} - -static float -cell_get_paramf(struct pipe_screen *screen, enum pipe_capf param) -{ - switch (param) { - case PIPE_CAPF_MAX_LINE_WIDTH: - /* fall-through */ - case PIPE_CAPF_MAX_LINE_WIDTH_AA: - return 255.0; /* arbitrary */ - - case PIPE_CAPF_MAX_POINT_WIDTH: - /* fall-through */ - case PIPE_CAPF_MAX_POINT_WIDTH_AA: - return 255.0; /* arbitrary */ - - case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY: - return 0.0; - - case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS: - return 16.0; /* arbitrary */ - - default: - return 0; - } -} - - -static boolean -cell_is_format_supported( struct pipe_screen *screen, - enum pipe_format format, - enum pipe_texture_target target, - unsigned sample_count, - unsigned tex_usage) -{ - struct sw_winsys *winsys = cell_screen(screen)->winsys; - - if (sample_count > 1) - return FALSE; - - if (tex_usage & (PIPE_BIND_DISPLAY_TARGET | - PIPE_BIND_SCANOUT | - PIPE_BIND_SHARED)) { - if (!winsys->is_displaytarget_format_supported(winsys, tex_usage, format)) - return FALSE; - } - - /* only a few formats are known to work at this time */ - switch (format) { - case PIPE_FORMAT_Z24_UNORM_S8_UINT: - case PIPE_FORMAT_Z24X8_UNORM: - case PIPE_FORMAT_B8G8R8A8_UNORM: - case PIPE_FORMAT_I8_UNORM: - return TRUE; - default: - return FALSE; - } -} - - -static void -cell_destroy_screen( struct pipe_screen *screen ) -{ - struct cell_screen *sp_screen = cell_screen(screen); - struct sw_winsys *winsys = sp_screen->winsys; - - if(winsys->destroy) - winsys->destroy(winsys); - - FREE(screen); -} - - - -/** - * Create a new pipe_screen object - * Note: we're not presently subclassing pipe_screen (no cell_screen) but - * that would be the place to put SPU thread/context info... - */ -struct pipe_screen * -cell_create_screen(struct sw_winsys *winsys) -{ - struct cell_screen *screen = CALLOC_STRUCT(cell_screen); - - if (!screen) - return NULL; - - screen->winsys = winsys; - screen->base.winsys = NULL; - - screen->base.destroy = cell_destroy_screen; - - screen->base.get_name = cell_get_name; - screen->base.get_vendor = cell_get_vendor; - screen->base.get_param = cell_get_param; - screen->base.get_shader_param = cell_get_shader_param; - screen->base.get_paramf = cell_get_paramf; - screen->base.is_format_supported = cell_is_format_supported; - screen->base.context_create = cell_create_context; - - cell_init_screen_texture_funcs(&screen->base); - - return &screen->base; -} diff --git a/src/gallium/drivers/cell/ppu/cell_screen.h b/src/gallium/drivers/cell/ppu/cell_screen.h deleted file mode 100644 index baff9d3b7d..0000000000 --- a/src/gallium/drivers/cell/ppu/cell_screen.h +++ /dev/null @@ -1,55 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef CELL_SCREEN_H -#define CELL_SCREEN_H - - -#include "pipe/p_screen.h" - -struct sw_winsys; - -struct cell_screen { - struct pipe_screen base; - - struct sw_winsys *winsys; - - /* Increments whenever textures are modified. Contexts can track - * this. - */ - unsigned timestamp; -}; - -static INLINE struct cell_screen * -cell_screen( struct pipe_screen *pipe ) -{ - return (struct cell_screen *)pipe; -} - - -#endif /* CELL_SCREEN_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_spu.c b/src/gallium/drivers/cell/ppu/cell_spu.c deleted file mode 100644 index 39284f3a5d..0000000000 --- a/src/gallium/drivers/cell/ppu/cell_spu.c +++ /dev/null @@ -1,219 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -/** - * Utility/wrappers for communicating with the SPUs. - */ - - -#include <pthread.h> - -#include "cell_spu.h" -#include "pipe/p_format.h" -#include "pipe/p_state.h" -#include "util/u_memory.h" -#include "cell/common.h" - - -/* -helpful headers: -/opt/ibm/cell-sdk/prototype/src/include/ppu/cbe_mfc.h -*/ - - -/** - * Cell/SPU info that's not per-context. - */ -struct cell_global_info cell_global; - - -/** - * Scan /proc/cpuinfo to determine the timebase for the system. - * This is used by the SPUs to convert 'decrementer' ticks to seconds. - * There may be a better way to get this value... - */ -static unsigned -get_timebase(void) -{ - FILE *f = fopen("/proc/cpuinfo", "r"); - unsigned timebase; - - assert(f); - while (!feof(f)) { - char line[80]; - fgets(line, sizeof(line), f); - if (strncmp(line, "timebase", 8) == 0) { - char *colon = strchr(line, ':'); - if (colon) { - timebase = atoi(colon + 2); - break; - } - } - } - fclose(f); - - return timebase; -} - - -/** - * Write a 1-word message to the given SPE mailbox. - */ -void -send_mbox_message(spe_context_ptr_t ctx, unsigned int msg) -{ - spe_in_mbox_write(ctx, &msg, 1, SPE_MBOX_ALL_BLOCKING); -} - - -/** - * Wait for a 1-word message to arrive in given mailbox. - */ -uint -wait_mbox_message(spe_context_ptr_t ctx) -{ - do { - unsigned data; - int count = spe_out_mbox_read(ctx, &data, 1); - - if (count == 1) { - return data; - } - - if (count < 0) { - /* error */ ; - } - } while (1); -} - - -/** - * Called by pthread_create() to spawn an SPU thread. - */ -static void * -cell_thread_function(void *arg) -{ - struct cell_init_info *init = (struct cell_init_info *) arg; - unsigned entry = SPE_DEFAULT_ENTRY; - - ASSERT_ALIGN16(init); - - if (spe_context_run(cell_global.spe_contexts[init->id], &entry, 0, - init, NULL, NULL) < 0) { - fprintf(stderr, "spe_context_run() failed\n"); - exit(1); - } - - pthread_exit(NULL); -} - - -/** - * Create the SPU threads. This is done once during driver initialization. - * This involves setting the "init" message which is sent to each SPU. - * The init message specifies an SPU id, total number of SPUs, location - * and number of batch buffers, etc. - */ -void -cell_start_spus(struct cell_context *cell) -{ - static boolean one_time_init = FALSE; - uint i, j; - uint timebase = get_timebase(); - - if (one_time_init) { - fprintf(stderr, "PPU: Multiple rendering contexts not yet supported " - "on Cell.\n"); - abort(); - } - - one_time_init = TRUE; - - assert(cell->num_spus <= CELL_MAX_SPUS); - - ASSERT_ALIGN16(&cell_global.inits[0]); - ASSERT_ALIGN16(&cell_global.inits[1]); - - /* - * Initialize the global 'inits' structure for each SPU. - * A pointer to the init struct will be passed to each SPU. - * The SPUs will then each grab their init info with mfc_get(). - */ - for (i = 0; i < cell->num_spus; i++) { - cell_global.inits[i].id = i; - cell_global.inits[i].num_spus = cell->num_spus; - cell_global.inits[i].debug_flags = cell->debug_flags; - cell_global.inits[i].inv_timebase = 1000.0f / timebase; - - for (j = 0; j < CELL_NUM_BUFFERS; j++) { - cell_global.inits[i].buffers[j] = cell->buffer[j]; - } - cell_global.inits[i].buffer_status = &cell->buffer_status[0][0][0]; - - cell_global.inits[i].spu_functions = &cell->spu_functions; - - cell_global.spe_contexts[i] = spe_context_create(0, NULL); - if (!cell_global.spe_contexts[i]) { - fprintf(stderr, "spe_context_create() failed\n"); - exit(1); - } - - if (spe_program_load(cell_global.spe_contexts[i], &g3d_spu)) { - fprintf(stderr, "spe_program_load() failed\n"); - exit(1); - } - - pthread_create(&cell_global.spe_threads[i], /* returned thread handle */ - NULL, /* pthread attribs */ - &cell_thread_function, /* start routine */ - &cell_global.inits[i]); /* thread argument */ - } -} - - -/** - * Tell all the SPUs to stop/exit. - * This is done when the driver's exiting / cleaning up. - */ -void -cell_spu_exit(struct cell_context *cell) -{ - uint i; - - for (i = 0; i < cell->num_spus; i++) { - send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_EXIT); - } - - /* wait for threads to exit */ - for (i = 0; i < cell->num_spus; i++) { - void *value; - pthread_join(cell_global.spe_threads[i], &value); - cell_global.spe_threads[i] = 0; - cell_global.spe_contexts[i] = 0; - } -} diff --git a/src/gallium/drivers/cell/ppu/cell_spu.h b/src/gallium/drivers/cell/ppu/cell_spu.h deleted file mode 100644 index c93958a9ed..0000000000 --- a/src/gallium/drivers/cell/ppu/cell_spu.h +++ /dev/null @@ -1,79 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef CELL_SPU -#define CELL_SPU - - -#include <libspe2.h> -#include <pthread.h> -#include "cell/common.h" - -#include "cell_context.h" - - -/** - * Global vars, for now anyway. - */ -struct cell_global_info -{ - /** - * SPU/SPE handles, etc - */ - spe_context_ptr_t spe_contexts[CELL_MAX_SPUS]; - pthread_t spe_threads[CELL_MAX_SPUS]; - - /** - * Data sent to SPUs at start-up - */ - struct cell_init_info inits[CELL_MAX_SPUS]; -}; - - -extern struct cell_global_info cell_global; - - -/** This is the handle for the actual SPE code */ -extern spe_program_handle_t g3d_spu; - - -extern void -send_mbox_message(spe_context_ptr_t ctx, unsigned int msg); - -extern uint -wait_mbox_message(spe_context_ptr_t ctx); - - -extern void -cell_start_spus(struct cell_context *cell); - - -extern void -cell_spu_exit(struct cell_context *cell); - - -#endif /* CELL_SPU */ diff --git a/src/gallium/drivers/cell/ppu/cell_state.h b/src/gallium/drivers/cell/ppu/cell_state.h deleted file mode 100644 index 7adedcde57..0000000000 --- a/src/gallium/drivers/cell/ppu/cell_state.h +++ /dev/null @@ -1,65 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef CELL_STATE_H -#define CELL_STATE_H - - -#define CELL_NEW_VIEWPORT 0x1 -#define CELL_NEW_RASTERIZER 0x2 -#define CELL_NEW_FS 0x4 -#define CELL_NEW_BLEND 0x8 -#define CELL_NEW_CLIP 0x10 -#define CELL_NEW_SCISSOR 0x20 -#define CELL_NEW_STIPPLE 0x40 -#define CELL_NEW_FRAMEBUFFER 0x80 -#define CELL_NEW_ALPHA_TEST 0x100 -#define CELL_NEW_DEPTH_STENCIL 0x200 -#define CELL_NEW_SAMPLER 0x400 -#define CELL_NEW_TEXTURE 0x800 -#define CELL_NEW_VERTEX 0x1000 -#define CELL_NEW_VS 0x2000 -#define CELL_NEW_VS_CONSTANTS 0x4000 -#define CELL_NEW_FS_CONSTANTS 0x8000 -#define CELL_NEW_VERTEX_INFO 0x10000 - - -extern void -cell_update_derived( struct cell_context *cell ); - - -extern void -cell_init_shader_functions(struct cell_context *cell); - - -extern void -cell_init_vertex_functions(struct cell_context *cell); - - -#endif /* CELL_STATE_H */ - diff --git a/src/gallium/drivers/cell/ppu/cell_state_derived.c b/src/gallium/drivers/cell/ppu/cell_state_derived.c deleted file mode 100644 index b723e794e7..0000000000 --- a/src/gallium/drivers/cell/ppu/cell_state_derived.c +++ /dev/null @@ -1,170 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "util/u_memory.h" -#include "pipe/p_shader_tokens.h" -#include "draw/draw_context.h" -#include "draw/draw_vertex.h" -#include "cell_context.h" -#include "cell_batch.h" -#include "cell_state.h" -#include "cell_state_emit.h" - - -/** - * Determine how to map vertex program outputs to fragment program inputs. - * Basically, this will be used when computing the triangle interpolation - * coefficients from the post-transform vertex attributes. - */ -static void -calculate_vertex_layout( struct cell_context *cell ) -{ - const struct cell_fragment_shader_state *fs = cell->fs; - const enum interp_mode colorInterp - = cell->rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR; - struct vertex_info *vinfo = &cell->vertex_info; - uint i; - int src; - -#if 0 - if (cell->vbuf) { - /* if using the post-transform vertex buffer, tell draw_vbuf to - * simply emit the whole post-xform vertex as-is: - */ - struct vertex_info *vinfo_vbuf = &cell->vertex_info_vbuf; - vinfo_vbuf->num_attribs = 0; - draw_emit_vertex_attr(vinfo_vbuf, EMIT_ALL, INTERP_NONE, 0); - vinfo_vbuf->size = 4 * vs->num_outputs + sizeof(struct vertex_header)/4; - } -#endif - - /* reset vinfo */ - vinfo->num_attribs = 0; - - /* we always want to emit vertex pos */ - src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_POSITION, 0); - assert(src >= 0); - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_POS, src); - - - /* - * Loop over fragment shader inputs, searching for the matching output - * from the vertex shader. - */ - for (i = 0; i < fs->info.num_inputs; i++) { - switch (fs->info.input_semantic_name[i]) { - case TGSI_SEMANTIC_POSITION: - /* already done above */ - break; - - case TGSI_SEMANTIC_COLOR: - src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_COLOR, - fs->info.input_semantic_index[i]); - assert(src >= 0); - draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); - break; - - case TGSI_SEMANTIC_FOG: - src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_FOG, 0); -#if 1 - if (src < 0) /* XXX temp hack, try demos/fogcoord.c with this */ - src = 0; -#endif - assert(src >= 0); - draw_emit_vertex_attr(vinfo, EMIT_1F, INTERP_PERSPECTIVE, src); - break; - - case TGSI_SEMANTIC_GENERIC: - /* this includes texcoords and varying vars */ - src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_GENERIC, - fs->info.input_semantic_index[i]); - assert(src >= 0); - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); - break; - - default: - assert(0); - } - } - - draw_compute_vertex_size(vinfo); - - /* XXX only signal this if format really changes */ - cell->dirty |= CELL_NEW_VERTEX_INFO; -} - - -#if 0 -/** - * Recompute cliprect from scissor bounds, scissor enable and surface size. - */ -static void -compute_cliprect(struct cell_context *sp) -{ - uint surfWidth = sp->framebuffer.width; - uint surfHeight = sp->framebuffer.height; - - if (sp->rasterizer->scissor) { - /* clip to scissor rect */ - sp->cliprect.minx = MAX2(sp->scissor.minx, 0); - sp->cliprect.miny = MAX2(sp->scissor.miny, 0); - sp->cliprect.maxx = MIN2(sp->scissor.maxx, surfWidth); - sp->cliprect.maxy = MIN2(sp->scissor.maxy, surfHeight); - } - else { - /* clip to surface bounds */ - sp->cliprect.minx = 0; - sp->cliprect.miny = 0; - sp->cliprect.maxx = surfWidth; - sp->cliprect.maxy = surfHeight; - } -} -#endif - - - -/** - * Update derived state, send current state to SPUs prior to rendering. - */ -void cell_update_derived( struct cell_context *cell ) -{ - if (cell->dirty & (CELL_NEW_RASTERIZER | - CELL_NEW_FS | - CELL_NEW_VS)) - calculate_vertex_layout( cell ); - -#if 0 - if (cell->dirty & (CELL_NEW_SCISSOR | - CELL_NEW_DEPTH_STENCIL_ALPHA | - CELL_NEW_FRAMEBUFFER)) - compute_cliprect(cell); -#endif - - cell_emit_state(cell); - - cell->dirty = 0; -} diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c deleted file mode 100644 index bb11c68fa2..0000000000 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ /dev/null @@ -1,343 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "util/u_inlines.h" -#include "util/u_memory.h" -#include "util/u_math.h" -#include "util/u_format.h" -#include "cell_context.h" -#include "cell_gen_fragment.h" -#include "cell_state.h" -#include "cell_state_emit.h" -#include "cell_batch.h" -#include "cell_texture.h" -#include "draw/draw_context.h" -#include "draw/draw_private.h" - - -/** - * Find/create a cell_command_fragment_ops object corresponding to the - * current blend/stencil/z/colormask/etc. state. - */ -static struct cell_command_fragment_ops * -lookup_fragment_ops(struct cell_context *cell) -{ - struct cell_fragment_ops_key key; - struct cell_command_fragment_ops *ops; - - /* - * Build key - */ - memset(&key, 0, sizeof(key)); - key.blend = *cell->blend; - key.blend_color = cell->blend_color; - key.dsa = *cell->depth_stencil; - - if (cell->framebuffer.cbufs[0]) - key.color_format = cell->framebuffer.cbufs[0]->format; - else - key.color_format = PIPE_FORMAT_NONE; - - if (cell->framebuffer.zsbuf) - key.zs_format = cell->framebuffer.zsbuf->format; - else - key.zs_format = PIPE_FORMAT_NONE; - - /* - * Look up key in cache. - */ - ops = (struct cell_command_fragment_ops *) - util_keymap_lookup(cell->fragment_ops_cache, &key); - - /* - * If not found, create/save new fragment ops command. - */ - if (!ops) { - struct spe_function spe_code_front, spe_code_back; - unsigned int facing_dependent, total_code_size; - - if (0) - debug_printf("**** Create New Fragment Ops\n"); - - /* Prepare the buffer that will hold the generated code. The - * "0" passed in for the size means that the SPE code will - * use a default size. - */ - spe_init_func(&spe_code_front, 0); - spe_init_func(&spe_code_back, 0); - - /* Generate new code. Always generate new code for both front-facing - * and back-facing fragments, even if it's the same code in both - * cases. - */ - cell_gen_fragment_function(cell, CELL_FACING_FRONT, &spe_code_front); - cell_gen_fragment_function(cell, CELL_FACING_BACK, &spe_code_back); - - /* Make sure the code is a multiple of 8 bytes long; this is - * required to ensure that the dual pipe instruction alignment - * is correct. It's also important for the SPU unpacking, - * which assumes 8-byte boundaries. - */ - unsigned int front_code_size = spe_code_size(&spe_code_front); - while (front_code_size % 8 != 0) { - spe_lnop(&spe_code_front); - front_code_size = spe_code_size(&spe_code_front); - } - unsigned int back_code_size = spe_code_size(&spe_code_back); - while (back_code_size % 8 != 0) { - spe_lnop(&spe_code_back); - back_code_size = spe_code_size(&spe_code_back); - } - - /* Determine whether the code we generated is facing-dependent, by - * determining whether the generated code is different for the front- - * and back-facing fragments. - */ - if (front_code_size == back_code_size && memcmp(spe_code_front.store, spe_code_back.store, front_code_size) == 0) { - /* Code is identical; only need one copy. */ - facing_dependent = 0; - total_code_size = front_code_size; - } - else { - /* Code is different for front-facing and back-facing fragments. - * Need to send both copies. - */ - facing_dependent = 1; - total_code_size = front_code_size + back_code_size; - } - - /* alloc new fragment ops command. Note that this structure - * has variant length based on the total code size required. - */ - ops = CALLOC_VARIANT_LENGTH_STRUCT(cell_command_fragment_ops, total_code_size); - /* populate the new cell_command_fragment_ops object */ - ops->opcode[0] = CELL_CMD_STATE_FRAGMENT_OPS; - ops->total_code_size = total_code_size; - ops->front_code_index = 0; - memcpy(ops->code, spe_code_front.store, front_code_size); - if (facing_dependent) { - /* We have separate front- and back-facing code. Append the - * back-facing code to the buffer. Be careful because the code - * size is in bytes, but the buffer is of unsigned elements. - */ - ops->back_code_index = front_code_size / sizeof(spe_code_front.store[0]); - memcpy(ops->code + ops->back_code_index, spe_code_back.store, back_code_size); - } - else { - /* Use the same code for front- and back-facing fragments */ - ops->back_code_index = ops->front_code_index; - } - - /* Set the fields for the fallback case. Note that these fields - * (and the whole fallback case) will eventually go away. - */ - ops->dsa = *cell->depth_stencil; - ops->blend = *cell->blend; - ops->blend_color = cell->blend_color; - - /* insert cell_command_fragment_ops object into keymap/cache */ - util_keymap_insert(cell->fragment_ops_cache, &key, ops, NULL); - - /* release rtasm buffer */ - spe_release_func(&spe_code_front); - spe_release_func(&spe_code_back); - } - else { - if (0) - debug_printf("**** Re-use Fragment Ops\n"); - } - - return ops; -} - - - -static void -emit_state_cmd(struct cell_context *cell, uint cmd, - const void *state, uint state_size) -{ - uint32_t *dst = (uint32_t *) - cell_batch_alloc16(cell, ROUNDUP16(sizeof(opcode_t) + state_size)); - *dst = cmd; - memcpy(dst + 4, state, state_size); -} - - -/** - * For state marked as 'dirty', construct a state-update command block - * and insert it into the current batch buffer. - */ -void -cell_emit_state(struct cell_context *cell) -{ - if (cell->dirty & CELL_NEW_FRAMEBUFFER) { - struct pipe_surface *cbuf = cell->framebuffer.cbufs[0]; - struct pipe_surface *zbuf = cell->framebuffer.zsbuf; - STATIC_ASSERT(sizeof(struct cell_command_framebuffer) % 16 == 0); - struct cell_command_framebuffer *fb - = cell_batch_alloc16(cell, sizeof(*fb)); - fb->opcode[0] = CELL_CMD_STATE_FRAMEBUFFER; - fb->color_start = cell->cbuf_map[0]; - fb->color_format = cbuf->format; - fb->depth_start = cell->zsbuf_map; - fb->depth_format = zbuf ? zbuf->format : PIPE_FORMAT_NONE; - fb->width = cell->framebuffer.width; - fb->height = cell->framebuffer.height; -#if 0 - printf("EMIT color format %s\n", util_format_name(fb->color_format)); - printf("EMIT depth format %s\n", util_format_name(fb->depth_format)); -#endif - } - - if (cell->dirty & (CELL_NEW_RASTERIZER)) { - STATIC_ASSERT(sizeof(struct cell_command_rasterizer) % 16 == 0); - struct cell_command_rasterizer *rast = - cell_batch_alloc16(cell, sizeof(*rast)); - rast->opcode[0] = CELL_CMD_STATE_RASTERIZER; - rast->rasterizer = *cell->rasterizer; - } - - if (cell->dirty & (CELL_NEW_FS)) { - /* Send new fragment program to SPUs */ - STATIC_ASSERT(sizeof(struct cell_command_fragment_program) % 16 == 0); - struct cell_command_fragment_program *fp - = cell_batch_alloc16(cell, sizeof(*fp)); - fp->opcode[0] = CELL_CMD_STATE_FRAGMENT_PROGRAM; - fp->num_inst = cell->fs->code.num_inst; - memcpy(&fp->code, cell->fs->code.store, - SPU_MAX_FRAGMENT_PROGRAM_INSTS * SPE_INST_SIZE); - if (0) { - int i; - printf("PPU Emit CELL_CMD_STATE_FRAGMENT_PROGRAM:\n"); - for (i = 0; i < fp->num_inst; i++) { - printf(" %3d: 0x%08x\n", i, fp->code[i]); - } - } - } - - if (cell->dirty & (CELL_NEW_FS_CONSTANTS)) { - const uint shader = PIPE_SHADER_FRAGMENT; - const uint num_const = cell->constants[shader]->width0 / sizeof(float); - uint i, j; - float *buf = cell_batch_alloc16(cell, ROUNDUP16(32 + num_const * sizeof(float))); - uint32_t *ibuf = (uint32_t *) buf; - const float *constants = cell->mapped_constants[shader]; - ibuf[0] = CELL_CMD_STATE_FS_CONSTANTS; - ibuf[4] = num_const; - j = 8; - for (i = 0; i < num_const; i++) { - buf[j++] = constants[i]; - } - } - - if (cell->dirty & (CELL_NEW_FRAMEBUFFER | - CELL_NEW_DEPTH_STENCIL | - CELL_NEW_BLEND)) { - struct cell_command_fragment_ops *fops, *fops_cmd; - /* Note that cell_command_fragment_ops is a variant-sized record */ - fops = lookup_fragment_ops(cell); - fops_cmd = cell_batch_alloc16(cell, ROUNDUP16(sizeof(*fops_cmd) + fops->total_code_size)); - memcpy(fops_cmd, fops, sizeof(*fops) + fops->total_code_size); - } - - if (cell->dirty & CELL_NEW_SAMPLER) { - uint i; - for (i = 0; i < CELL_MAX_SAMPLERS; i++) { - if (cell->dirty_samplers & (1 << i)) { - if (cell->sampler[i]) { - STATIC_ASSERT(sizeof(struct cell_command_sampler) % 16 == 0); - struct cell_command_sampler *sampler - = cell_batch_alloc16(cell, sizeof(*sampler)); - sampler->opcode[0] = CELL_CMD_STATE_SAMPLER; - sampler->unit = i; - sampler->state = *cell->sampler[i]; - } - } - } - cell->dirty_samplers = 0x0; - } - - if (cell->dirty & CELL_NEW_TEXTURE) { - uint i; - for (i = 0;i < CELL_MAX_SAMPLERS; i++) { - if (cell->dirty_textures & (1 << i)) { - STATIC_ASSERT(sizeof(struct cell_command_texture) % 16 == 0); - struct cell_command_texture *texture = - (struct cell_command_texture *) - cell_batch_alloc16(cell, sizeof(*texture)); - - texture->opcode[0] = CELL_CMD_STATE_TEXTURE; - texture->unit = i; - if (cell->texture[i]) { - struct cell_resource *ct = cell->texture[i]; - uint level; - for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) { - texture->start[level] = (ct->mapped + - ct->level_offset[level]); - texture->width[level] = u_minify(ct->base.width0, level); - texture->height[level] = u_minify(ct->base.height0, level); - texture->depth[level] = u_minify(ct->base.depth0, level); - } - texture->target = ct->base.target; - } - else { - uint level; - for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) { - texture->start[level] = NULL; - texture->width[level] = 0; - texture->height[level] = 0; - texture->depth[level] = 0; - } - texture->target = 0; - } - } - } - cell->dirty_textures = 0x0; - } - - if (cell->dirty & CELL_NEW_VERTEX_INFO) { - emit_state_cmd(cell, CELL_CMD_STATE_VERTEX_INFO, - &cell->vertex_info, sizeof(struct vertex_info)); - } - -#if 0 - if (cell->dirty & CELL_NEW_VS) { - const struct draw_context *const draw = cell->draw; - struct cell_shader_info info; - - info.num_outputs = draw_num_shader_outputs(draw); - info.declarations = (uintptr_t) draw->vs.machine.Declarations; - info.num_declarations = draw->vs.machine.NumDeclarations; - info.instructions = (uintptr_t) draw->vs.machine.Instructions; - info.num_instructions = draw->vs.machine.NumInstructions; - info.immediates = (uintptr_t) draw->vs.machine.Imms; - info.num_immediates = draw->vs.machine.ImmLimit / 4; - - emit_state_cmd(cell, CELL_CMD_STATE_BIND_VS, &info, sizeof(info)); - } -#endif -} diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.h b/src/gallium/drivers/cell/ppu/cell_state_emit.h deleted file mode 100644 index 59f8affe8d..0000000000 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.h +++ /dev/null @@ -1,36 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef CELL_STATE_EMIT_H -#define CELL_STATE_EMIT_H - - -extern void -cell_emit_state(struct cell_context *cell); - - -#endif /* CELL_STATE_EMIT_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c deleted file mode 100644 index dc33e7ccc2..0000000000 --- a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c +++ /dev/null @@ -1,1432 +0,0 @@ -/* - * (C) Copyright IBM Corporation 2008 - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * \file - * Generate code to perform all per-fragment operations. - * - * Code generated by these functions perform both alpha, depth, and stencil - * testing as well as alpha blending. - * - * \note - * Occlusion query is not supported, but this is the right place to add that - * support. - * - * \author Ian Romanick <idr@us.ibm.com> - */ - -#include "pipe/p_defines.h" -#include "pipe/p_state.h" - -#include "cell_context.h" - -#include "rtasm/rtasm_ppc_spe.h" - - -/** - * Generate code to perform alpha testing. - * - * The code generated by this function uses the register specificed by - * \c mask as both an input and an output. - * - * \param dsa Current alpha-test state - * \param f Function to which code should be appended - * \param mask Index of register containing active fragment mask - * \param alphas Index of register containing per-fragment alpha values - * - * \note Emits a maximum of 6 instructions. - */ -static void -emit_alpha_test(struct pipe_depth_stencil_alpha_state *dsa, - struct spe_function *f, int mask, int alphas) -{ - /* If the alpha function is either NEVER or ALWAYS, there is no need to - * load the reference value into a register. ALWAYS is a fairly common - * case, and this optimization saves 2 instructions. - */ - if (dsa->alpha.enabled - && (dsa->alpha.func != PIPE_FUNC_NEVER) - && (dsa->alpha.func != PIPE_FUNC_ALWAYS)) { - int ref = spe_allocate_available_register(f); - int tmp_a = spe_allocate_available_register(f); - int tmp_b = spe_allocate_available_register(f); - union { - float f; - unsigned u; - } ref_val; - boolean complement = FALSE; - - ref_val.f = dsa->alpha.ref; - - spe_il(f, ref, ref_val.u & 0x0000ffff); - spe_ilh(f, ref, ref_val.u >> 16); - - switch (dsa->alpha.func) { - case PIPE_FUNC_NOTEQUAL: - complement = TRUE; - /* FALLTHROUGH */ - - case PIPE_FUNC_EQUAL: - spe_fceq(f, tmp_a, ref, alphas); - break; - - case PIPE_FUNC_LEQUAL: - complement = TRUE; - /* FALLTHROUGH */ - - case PIPE_FUNC_GREATER: - spe_fcgt(f, tmp_a, ref, alphas); - break; - - case PIPE_FUNC_LESS: - complement = TRUE; - /* FALLTHROUGH */ - - case PIPE_FUNC_GEQUAL: - spe_fcgt(f, tmp_a, ref, alphas); - spe_fceq(f, tmp_b, ref, alphas); - spe_or(f, tmp_a, tmp_b, tmp_a); - break; - - case PIPE_FUNC_ALWAYS: - case PIPE_FUNC_NEVER: - default: - assert(0); - break; - } - - if (complement) { - spe_andc(f, mask, mask, tmp_a); - } else { - spe_and(f, mask, mask, tmp_a); - } - - spe_release_register(f, ref); - spe_release_register(f, tmp_a); - spe_release_register(f, tmp_b); - } else if (dsa->alpha.enabled && (dsa->alpha.func == PIPE_FUNC_NEVER)) { - spe_il(f, mask, 0); - } -} - - -/** - * Generate code to perform Z testing. Four Z values are tested at once. - * \param dsa Current depth-test state - * \param f Function to which code should be appended - * \param mask Index of register to contain depth-pass mask - * \param stored Index of register containing values from depth buffer - * \param calculated Index of register containing per-fragment depth values - * - * \return - * If the calculated depth comparison mask is the actual mask, \c FALSE is - * returned. If the calculated depth comparison mask is the compliment of - * the actual mask, \c TRUE is returned. - * - * \note Emits a maximum of 3 instructions. - */ -static boolean -emit_depth_test(struct pipe_depth_stencil_alpha_state *dsa, - struct spe_function *f, int mask, int stored, int calculated) -{ - unsigned func = (dsa->depth.enabled) - ? dsa->depth.func : PIPE_FUNC_ALWAYS; - int tmp = spe_allocate_available_register(f); - boolean compliment = FALSE; - - switch (func) { - case PIPE_FUNC_NEVER: - spe_il(f, mask, 0); - break; - - case PIPE_FUNC_NOTEQUAL: - compliment = TRUE; - /* FALLTHROUGH */ - case PIPE_FUNC_EQUAL: - spe_ceq(f, mask, calculated, stored); - break; - - case PIPE_FUNC_LEQUAL: - compliment = TRUE; - /* FALLTHROUGH */ - case PIPE_FUNC_GREATER: - spe_clgt(f, mask, calculated, stored); - break; - - case PIPE_FUNC_LESS: - compliment = TRUE; - /* FALLTHROUGH */ - case PIPE_FUNC_GEQUAL: - spe_clgt(f, mask, calculated, stored); - spe_ceq(f, tmp, calculated, stored); - spe_or(f, mask, mask, tmp); - break; - - case PIPE_FUNC_ALWAYS: - spe_il(f, mask, ~0); - break; - - default: - assert(0); - break; - } - - spe_release_register(f, tmp); - return compliment; -} - - -/** - * Generate code to apply the stencil operation (after testing). - * \note Emits a maximum of 5 instructions. - * - * \warning - * Since \c out and \c in might be the same register, this routine cannot - * generate code that uses \c out as a temporary. - */ -static void -emit_stencil_op(struct spe_function *f, - int out, int in, int mask, unsigned op, unsigned ref) -{ - const int clamp = spe_allocate_available_register(f); - const int clamp_mask = spe_allocate_available_register(f); - const int result = spe_allocate_available_register(f); - - switch(op) { - case PIPE_STENCIL_OP_KEEP: - assert(0); - case PIPE_STENCIL_OP_ZERO: - spe_il(f, result, 0); - break; - case PIPE_STENCIL_OP_REPLACE: - spe_il(f, result, ref); - break; - case PIPE_STENCIL_OP_INCR: - /* clamp = [0xff, 0xff, 0xff, 0xff] */ - spe_il(f, clamp, 0x0ff); - /* result[i] = in[i] + 1 */ - spe_ai(f, result, in, 1); - /* clamp_mask[i] = (result[i] > 0xff) */ - spe_clgti(f, clamp_mask, result, 0x0ff); - /* result[i] = clamp_mask[i] ? clamp[i] : result[i] */ - spe_selb(f, result, result, clamp, clamp_mask); - break; - case PIPE_STENCIL_OP_DECR: - spe_il(f, clamp, 0); - spe_ai(f, result, in, -1); - - /* If "(s-1) < 0" in signed arithemtic, then "(s-1) > MAX" in unsigned - * arithmetic. - */ - spe_clgti(f, clamp_mask, result, 0x0ff); - spe_selb(f, result, result, clamp, clamp_mask); - break; - case PIPE_STENCIL_OP_INCR_WRAP: - spe_ai(f, result, in, 1); - break; - case PIPE_STENCIL_OP_DECR_WRAP: - spe_ai(f, result, in, -1); - break; - case PIPE_STENCIL_OP_INVERT: - spe_nor(f, result, in, in); - break; - default: - assert(0); - } - - spe_selb(f, out, in, result, mask); - - spe_release_register(f, result); - spe_release_register(f, clamp_mask); - spe_release_register(f, clamp); -} - - -/** - * Generate code to do stencil test. Four pixels are tested at once. - * \param dsa Depth / stencil test state - * \param face 0 for front face, 1 for back face - * \param f Function to append instructions to - * \param mask Register containing mask of fragments passing the - * alpha test - * \param depth_mask Register containing mask of fragments passing the - * depth test - * \param depth_compliment Is \c depth_mask the compliment of the actual mask? - * \param stencil Register containing values from stencil buffer - * \param depth_pass Register to store mask of fragments passing stencil test - * and depth test - * - * \note - * Emits a maximum of 10 + (3 * 5) = 25 instructions. - */ -static int -emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa, - struct pipe_stencil_ref *sr, - unsigned face, - struct spe_function *f, - int mask, - int depth_mask, - boolean depth_complement, - int stencil, - int depth_pass) -{ - int stencil_fail = spe_allocate_available_register(f); - int depth_fail = spe_allocate_available_register(f); - int stencil_mask = spe_allocate_available_register(f); - int stencil_pass = spe_allocate_available_register(f); - int face_stencil = spe_allocate_available_register(f); - int stencil_src = stencil; - const unsigned ref = (sr->ref_value[face] - & dsa->stencil[face].valuemask); - boolean complement = FALSE; - int stored; - int tmp = spe_allocate_available_register(f); - - - if ((dsa->stencil[face].func != PIPE_FUNC_NEVER) - && (dsa->stencil[face].func != PIPE_FUNC_ALWAYS) - && (dsa->stencil[face].valuemask != 0x0ff)) { - stored = spe_allocate_available_register(f); - spe_andi(f, stored, stencil, dsa->stencil[face].valuemask); - } else { - stored = stencil; - } - - - switch (dsa->stencil[face].func) { - case PIPE_FUNC_NEVER: - spe_il(f, stencil_mask, 0); /* stencil_mask[0..3] = [0,0,0,0] */ - break; - - case PIPE_FUNC_NOTEQUAL: - complement = TRUE; - /* FALLTHROUGH */ - case PIPE_FUNC_EQUAL: - /* stencil_mask[i] = (stored[i] == ref) */ - spe_ceqi(f, stencil_mask, stored, ref); - break; - - case PIPE_FUNC_LEQUAL: - complement = TRUE; - /* FALLTHROUGH */ - case PIPE_FUNC_GREATER: - complement = TRUE; - /* stencil_mask[i] = (stored[i] > ref) */ - spe_clgti(f, stencil_mask, stored, ref); - break; - - case PIPE_FUNC_LESS: - complement = TRUE; - /* FALLTHROUGH */ - case PIPE_FUNC_GEQUAL: - /* stencil_mask[i] = (stored[i] > ref) */ - spe_clgti(f, stencil_mask, stored, ref); - /* tmp[i] = (stored[i] == ref) */ - spe_ceqi(f, tmp, stored, ref); - /* stencil_mask[i] = stencil_mask[i] | tmp[i] */ - spe_or(f, stencil_mask, stencil_mask, tmp); - break; - - case PIPE_FUNC_ALWAYS: - /* See comment below. */ - break; - - default: - assert(0); - break; - } - - if (stored != stencil) { - spe_release_register(f, stored); - } - spe_release_register(f, tmp); - - - /* ALWAYS is a very common stencil-test, so some effort is applied to - * optimize that case. The stencil-pass mask is the same as the input - * fragment mask. This makes the stencil-test (above) a no-op, and the - * input fragment mask can be "renamed" the stencil-pass mask. - */ - if (dsa->stencil[face].func == PIPE_FUNC_ALWAYS) { - spe_release_register(f, stencil_pass); - stencil_pass = mask; - } else { - if (complement) { - spe_andc(f, stencil_pass, mask, stencil_mask); - } else { - spe_and(f, stencil_pass, mask, stencil_mask); - } - } - - if (depth_complement) { - spe_andc(f, depth_pass, stencil_pass, depth_mask); - } else { - spe_and(f, depth_pass, stencil_pass, depth_mask); - } - - - /* Conditionally emit code to update the stencil value under various - * condititons. Note that there is no need to generate code under the - * following circumstances: - * - * - Stencil write mask is zero. - * - For stencil-fail if the stencil test is ALWAYS - * - For depth-fail if the stencil test is NEVER - * - For depth-pass if the stencil test is NEVER - * - Any of the 3 conditions if the operation is KEEP - */ - if (dsa->stencil[face].writemask != 0) { - if ((dsa->stencil[face].func != PIPE_FUNC_ALWAYS) - && (dsa->stencil[face].fail_op != PIPE_STENCIL_OP_KEEP)) { - if (complement) { - spe_and(f, stencil_fail, mask, stencil_mask); - } else { - spe_andc(f, stencil_fail, mask, stencil_mask); - } - - emit_stencil_op(f, face_stencil, stencil_src, stencil_fail, - dsa->stencil[face].fail_op, - sr->ref_value[face]); - - stencil_src = face_stencil; - } - - if ((dsa->stencil[face].func != PIPE_FUNC_NEVER) - && (dsa->stencil[face].zfail_op != PIPE_STENCIL_OP_KEEP)) { - if (depth_complement) { - spe_and(f, depth_fail, stencil_pass, depth_mask); - } else { - spe_andc(f, depth_fail, stencil_pass, depth_mask); - } - - emit_stencil_op(f, face_stencil, stencil_src, depth_fail, - dsa->stencil[face].zfail_op, - sr->ref_value[face]); - stencil_src = face_stencil; - } - - if ((dsa->stencil[face].func != PIPE_FUNC_NEVER) - && (dsa->stencil[face].zpass_op != PIPE_STENCIL_OP_KEEP)) { - emit_stencil_op(f, face_stencil, stencil_src, depth_pass, - dsa->stencil[face].zpass_op, - sr->ref_value[face]); - stencil_src = face_stencil; - } - } - - spe_release_register(f, stencil_fail); - spe_release_register(f, depth_fail); - spe_release_register(f, stencil_mask); - if (stencil_pass != mask) { - spe_release_register(f, stencil_pass); - } - - /* If all of the stencil operations were KEEP or the stencil write mask was - * zero, "stencil_src" will still be set to "stencil". In this case - * release the "face_stencil" register. Otherwise apply the stencil write - * mask to select bits from the calculated stencil value and the previous - * stencil value. - */ - if (stencil_src == stencil) { - spe_release_register(f, face_stencil); - } else if (dsa->stencil[face].writemask != 0x0ff) { - int tmp = spe_allocate_available_register(f); - - spe_il(f, tmp, dsa->stencil[face].writemask); - spe_selb(f, stencil_src, stencil, stencil_src, tmp); - - spe_release_register(f, tmp); - } - - return stencil_src; -} - - -void -cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa, - struct pipe_stencil_ref *sr) -{ - struct pipe_depth_stencil_alpha_state *const dsa = &cdsa->base; - struct spe_function *const f = &cdsa->code; - - /* This code generates a maximum of 6 (alpha test) + 3 (depth test) - * + 25 (front stencil) + 25 (back stencil) + 4 = 63 instructions. Round - * up to 64 to make it a happy power-of-two. - */ - spe_init_func(f, SPE_INST_SIZE * 64); - - - /* Allocate registers for the function's input parameters. Cleverly (and - * clever code is usually dangerous, but I couldn't resist) the generated - * function returns a structure. Returned structures start with register - * 3, and the structure fields are ordered to match up exactly with the - * input parameters. - */ - int mask = spe_allocate_register(f, 3); - int depth = spe_allocate_register(f, 4); - int stencil = spe_allocate_register(f, 5); - int zvals = spe_allocate_register(f, 6); - int frag_a = spe_allocate_register(f, 7); - int facing = spe_allocate_register(f, 8); - - int depth_mask = spe_allocate_available_register(f); - - boolean depth_complement; - - - emit_alpha_test(dsa, f, mask, frag_a); - - depth_complement = emit_depth_test(dsa, f, depth_mask, depth, zvals); - - if (dsa->stencil[0].enabled) { - const int front_depth_pass = spe_allocate_available_register(f); - int front_stencil = emit_stencil_test(dsa, sr, 0, f, mask, - depth_mask, depth_complement, - stencil, front_depth_pass); - - if (dsa->stencil[1].enabled) { - const int back_depth_pass = spe_allocate_available_register(f); - int back_stencil = emit_stencil_test(dsa, sr, 1, f, mask, - depth_mask, depth_complement, - stencil, back_depth_pass); - - /* If the front facing stencil value and the back facing stencil - * value are stored in the same register, there is no need to select - * a value based on the facing. This can happen if the stencil value - * was not modified due to the write masks being zero, the stencil - * operations being KEEP, etc. - */ - if (front_stencil != back_stencil) { - spe_selb(f, stencil, back_stencil, front_stencil, facing); - } - - if (back_stencil != stencil) { - spe_release_register(f, back_stencil); - } - - if (front_stencil != stencil) { - spe_release_register(f, front_stencil); - } - - spe_selb(f, mask, back_depth_pass, front_depth_pass, facing); - - spe_release_register(f, back_depth_pass); - } else { - if (front_stencil != stencil) { - spe_or(f, stencil, front_stencil, front_stencil); - spe_release_register(f, front_stencil); - } - spe_or(f, mask, front_depth_pass, front_depth_pass); - } - - spe_release_register(f, front_depth_pass); - } else if (dsa->depth.enabled) { - if (depth_complement) { - spe_andc(f, mask, mask, depth_mask); - } else { - spe_and(f, mask, mask, depth_mask); - } - } - - if (dsa->depth.writemask) { - spe_selb(f, depth, depth, zvals, mask); - } - - spe_bi(f, 0, 0, 0); /* return from function call */ - - -#if 0 - { - const uint32_t *p = f->store; - unsigned i; - - printf("# alpha (%sabled)\n", - (dsa->alpha.enabled) ? "en" : "dis"); - printf("# func: %u\n", dsa->alpha.func); - printf("# ref: %.2f\n", dsa->alpha.ref); - - printf("# depth (%sabled)\n", - (dsa->depth.enabled) ? "en" : "dis"); - printf("# func: %u\n", dsa->depth.func); - - for (i = 0; i < 2; i++) { - printf("# %s stencil (%sabled)\n", - (i == 0) ? "front" : "back", - (dsa->stencil[i].enabled) ? "en" : "dis"); - - printf("# func: %u\n", dsa->stencil[i].func); - printf("# op (sf, zf, zp): %u %u %u\n", - dsa->stencil[i].fail_op, - dsa->stencil[i].zfail_op, - dsa->stencil[i].zpass_op); - printf("# ref value / value mask / write mask: %02x %02x %02x\n", - sr->ref_value[i], - dsa->stencil[i].valuemask, - dsa->stencil[i].writemask); - } - - printf("\t.text\n"); - for (/* empty */; p < f->csr; p++) { - printf("\t.long\t0x%04x\n", *p); - } - fflush(stdout); - } -#endif -} - - -/** - * \note Emits a maximum of 3 instructions - */ -static int -emit_alpha_factor_calculation(struct spe_function *f, - unsigned factor, - int src_alpha, int dst_alpha, int const_alpha) -{ - int factor_reg; - int tmp; - - - switch (factor) { - case PIPE_BLENDFACTOR_ONE: - factor_reg = -1; - break; - - case PIPE_BLENDFACTOR_SRC_ALPHA: - factor_reg = spe_allocate_available_register(f); - - spe_or(f, factor_reg, src_alpha, src_alpha); - break; - - case PIPE_BLENDFACTOR_DST_ALPHA: - factor_reg = dst_alpha; - break; - - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - factor_reg = -1; - break; - - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - factor_reg = spe_allocate_available_register(f); - - tmp = spe_allocate_available_register(f); - spe_il(f, tmp, 1); - spe_cuflt(f, tmp, tmp, 0); - spe_fs(f, factor_reg, tmp, const_alpha); - spe_release_register(f, tmp); - break; - - case PIPE_BLENDFACTOR_CONST_ALPHA: - factor_reg = const_alpha; - break; - - case PIPE_BLENDFACTOR_ZERO: - factor_reg = -1; - break; - - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - tmp = spe_allocate_available_register(f); - factor_reg = spe_allocate_available_register(f); - - spe_il(f, tmp, 1); - spe_cuflt(f, tmp, tmp, 0); - spe_fs(f, factor_reg, tmp, src_alpha); - - spe_release_register(f, tmp); - break; - - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - tmp = spe_allocate_available_register(f); - factor_reg = spe_allocate_available_register(f); - - spe_il(f, tmp, 1); - spe_cuflt(f, tmp, tmp, 0); - spe_fs(f, factor_reg, tmp, dst_alpha); - - spe_release_register(f, tmp); - break; - - case PIPE_BLENDFACTOR_SRC1_ALPHA: - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: - default: - assert(0); - factor_reg = -1; - break; - } - - return factor_reg; -} - - -/** - * \note Emits a maximum of 6 instructions - */ -static void -emit_color_factor_calculation(struct spe_function *f, - unsigned sF, unsigned mask, - const int *src, - const int *dst, - const int *const_color, - int *factor) -{ - int tmp; - unsigned i; - - - factor[0] = -1; - factor[1] = -1; - factor[2] = -1; - factor[3] = -1; - - switch (sF) { - case PIPE_BLENDFACTOR_ONE: - break; - - case PIPE_BLENDFACTOR_SRC_COLOR: - for (i = 0; i < 3; ++i) { - if ((mask & (1U << i)) != 0) { - factor[i] = spe_allocate_available_register(f); - spe_or(f, factor[i], src[i], src[i]); - } - } - break; - - case PIPE_BLENDFACTOR_SRC_ALPHA: - factor[0] = spe_allocate_available_register(f); - factor[1] = factor[0]; - factor[2] = factor[0]; - - spe_or(f, factor[0], src[3], src[3]); - break; - - case PIPE_BLENDFACTOR_DST_ALPHA: - factor[0] = dst[3]; - factor[1] = dst[3]; - factor[2] = dst[3]; - break; - - case PIPE_BLENDFACTOR_DST_COLOR: - factor[0] = dst[0]; - factor[1] = dst[1]; - factor[2] = dst[2]; - break; - - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - tmp = spe_allocate_available_register(f); - factor[0] = spe_allocate_available_register(f); - factor[1] = factor[0]; - factor[2] = factor[0]; - - /* Alpha saturate means min(As, 1-Ad). - */ - spe_il(f, tmp, 1); - spe_cuflt(f, tmp, tmp, 0); - spe_fs(f, tmp, tmp, dst[3]); - spe_fcgt(f, factor[0], tmp, src[3]); - spe_selb(f, factor[0], src[3], tmp, factor[0]); - - spe_release_register(f, tmp); - break; - - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - tmp = spe_allocate_available_register(f); - spe_il(f, tmp, 1); - spe_cuflt(f, tmp, tmp, 0); - - for (i = 0; i < 3; i++) { - factor[i] = spe_allocate_available_register(f); - - spe_fs(f, factor[i], tmp, const_color[i]); - } - spe_release_register(f, tmp); - break; - - case PIPE_BLENDFACTOR_CONST_COLOR: - for (i = 0; i < 3; i++) { - factor[i] = const_color[i]; - } - break; - - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - factor[0] = spe_allocate_available_register(f); - factor[1] = factor[0]; - factor[2] = factor[0]; - - tmp = spe_allocate_available_register(f); - spe_il(f, tmp, 1); - spe_cuflt(f, tmp, tmp, 0); - spe_fs(f, factor[0], tmp, const_color[3]); - spe_release_register(f, tmp); - break; - - case PIPE_BLENDFACTOR_CONST_ALPHA: - factor[0] = const_color[3]; - factor[1] = factor[0]; - factor[2] = factor[0]; - break; - - case PIPE_BLENDFACTOR_ZERO: - break; - - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - tmp = spe_allocate_available_register(f); - - spe_il(f, tmp, 1); - spe_cuflt(f, tmp, tmp, 0); - - for (i = 0; i < 3; ++i) { - if ((mask & (1U << i)) != 0) { - factor[i] = spe_allocate_available_register(f); - spe_fs(f, factor[i], tmp, src[i]); - } - } - - spe_release_register(f, tmp); - break; - - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - tmp = spe_allocate_available_register(f); - factor[0] = spe_allocate_available_register(f); - factor[1] = factor[0]; - factor[2] = factor[0]; - - spe_il(f, tmp, 1); - spe_cuflt(f, tmp, tmp, 0); - spe_fs(f, factor[0], tmp, src[3]); - - spe_release_register(f, tmp); - break; - - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - tmp = spe_allocate_available_register(f); - factor[0] = spe_allocate_available_register(f); - factor[1] = factor[0]; - factor[2] = factor[0]; - - spe_il(f, tmp, 1); - spe_cuflt(f, tmp, tmp, 0); - spe_fs(f, factor[0], tmp, dst[3]); - - spe_release_register(f, tmp); - break; - - case PIPE_BLENDFACTOR_INV_DST_COLOR: - tmp = spe_allocate_available_register(f); - - spe_il(f, tmp, 1); - spe_cuflt(f, tmp, tmp, 0); - - for (i = 0; i < 3; ++i) { - if ((mask & (1U << i)) != 0) { - factor[i] = spe_allocate_available_register(f); - spe_fs(f, factor[i], tmp, dst[i]); - } - } - - spe_release_register(f, tmp); - break; - - case PIPE_BLENDFACTOR_SRC1_COLOR: - case PIPE_BLENDFACTOR_SRC1_ALPHA: - case PIPE_BLENDFACTOR_INV_SRC1_COLOR: - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: - default: - assert(0); - } -} - - -static void -emit_blend_calculation(struct spe_function *f, - unsigned func, unsigned sF, unsigned dF, - int src, int src_factor, int dst, int dst_factor) -{ - int tmp = spe_allocate_available_register(f); - - switch (func) { - case PIPE_BLEND_ADD: - if (sF == PIPE_BLENDFACTOR_ONE) { - if (dF == PIPE_BLENDFACTOR_ZERO) { - /* Do nothing. */ - } else if (dF == PIPE_BLENDFACTOR_ONE) { - spe_fa(f, src, src, dst); - } - } else if (sF == PIPE_BLENDFACTOR_ZERO) { - if (dF == PIPE_BLENDFACTOR_ZERO) { - spe_il(f, src, 0); - } else if (dF == PIPE_BLENDFACTOR_ONE) { - spe_or(f, src, dst, dst); - } else { - spe_fm(f, src, dst, dst_factor); - } - } else if (dF == PIPE_BLENDFACTOR_ZERO) { - spe_fm(f, src, src, src_factor); - } else { - spe_fm(f, tmp, dst, dst_factor); - spe_fma(f, src, src, src_factor, tmp); - } - break; - - case PIPE_BLEND_SUBTRACT: - if (sF == PIPE_BLENDFACTOR_ONE) { - if (dF == PIPE_BLENDFACTOR_ZERO) { - /* Do nothing. */ - } else if (dF == PIPE_BLENDFACTOR_ONE) { - spe_fs(f, src, src, dst); - } - } else if (sF == PIPE_BLENDFACTOR_ZERO) { - if (dF == PIPE_BLENDFACTOR_ZERO) { - spe_il(f, src, 0); - } else if (dF == PIPE_BLENDFACTOR_ONE) { - spe_il(f, tmp, 0); - spe_fs(f, src, tmp, dst); - } else { - spe_fm(f, src, dst, dst_factor); - } - } else if (dF == PIPE_BLENDFACTOR_ZERO) { - spe_fm(f, src, src, src_factor); - } else { - spe_fm(f, tmp, dst, dst_factor); - spe_fms(f, src, src, src_factor, tmp); - } - break; - - case PIPE_BLEND_REVERSE_SUBTRACT: - if (sF == PIPE_BLENDFACTOR_ONE) { - if (dF == PIPE_BLENDFACTOR_ZERO) { - spe_il(f, tmp, 0); - spe_fs(f, src, tmp, src); - } else if (dF == PIPE_BLENDFACTOR_ONE) { - spe_fs(f, src, dst, src); - } - } else if (sF == PIPE_BLENDFACTOR_ZERO) { - if (dF == PIPE_BLENDFACTOR_ZERO) { - spe_il(f, src, 0); - } else if (dF == PIPE_BLENDFACTOR_ONE) { - spe_or(f, src, dst, dst); - } else { - spe_fm(f, src, dst, dst_factor); - } - } else if (dF == PIPE_BLENDFACTOR_ZERO) { - spe_fm(f, src, src, src_factor); - } else { - spe_fm(f, tmp, src, src_factor); - spe_fms(f, src, src, dst_factor, tmp); - } - break; - - case PIPE_BLEND_MIN: - spe_cgt(f, tmp, src, dst); - spe_selb(f, src, src, dst, tmp); - break; - - case PIPE_BLEND_MAX: - spe_cgt(f, tmp, src, dst); - spe_selb(f, src, dst, src, tmp); - break; - - default: - assert(0); - } - - spe_release_register(f, tmp); -} - - -/** - * Generate code to perform alpha blending on the SPE - */ -void -cell_generate_alpha_blend(struct cell_blend_state *cb) -{ - struct pipe_blend_state *const b = &cb->base; - struct spe_function *const f = &cb->code; - - /* This code generates a maximum of 3 (source alpha factor) - * + 3 (destination alpha factor) + (3 * 6) (source color factor) - * + (3 * 6) (destination color factor) + (4 * 2) (blend equation) - * + 4 (fragment mask) + 1 (return) = 55 instlructions. Round up to 64 to - * make it a happy power-of-two. - */ - spe_init_func(f, SPE_INST_SIZE * 64); - - - const int frag[4] = { - spe_allocate_register(f, 3), - spe_allocate_register(f, 4), - spe_allocate_register(f, 5), - spe_allocate_register(f, 6), - }; - const int pixel[4] = { - spe_allocate_register(f, 7), - spe_allocate_register(f, 8), - spe_allocate_register(f, 9), - spe_allocate_register(f, 10), - }; - const int const_color[4] = { - spe_allocate_register(f, 11), - spe_allocate_register(f, 12), - spe_allocate_register(f, 13), - spe_allocate_register(f, 14), - }; - unsigned func[4]; - unsigned sF[4]; - unsigned dF[4]; - unsigned i; - int src_factor[4]; - int dst_factor[4]; - - - /* Does the selected blend mode make use of the source / destination - * color (RGB) blend factors? - */ - boolean need_color_factor = b->rt[0].blend_enable - && (b->rt[0].rgb_func != PIPE_BLEND_MIN) - && (b->rt[0].rgb_func != PIPE_BLEND_MAX); - - /* Does the selected blend mode make use of the source / destination - * alpha blend factors? - */ - boolean need_alpha_factor = b->rt[0].blend_enable - && (b->rt[0].alpha_func != PIPE_BLEND_MIN) - && (b->rt[0].alpha_func != PIPE_BLEND_MAX); - - - if (b->rt[0].blend_enable) { - sF[0] = b->rt[0].rgb_src_factor; - sF[1] = sF[0]; - sF[2] = sF[0]; - switch (b->rt[0].alpha_src_factor & 0x0f) { - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - sF[3] = PIPE_BLENDFACTOR_ONE; - break; - case PIPE_BLENDFACTOR_SRC_COLOR: - case PIPE_BLENDFACTOR_DST_COLOR: - case PIPE_BLENDFACTOR_CONST_COLOR: - case PIPE_BLENDFACTOR_SRC1_COLOR: - sF[3] = b->rt[0].alpha_src_factor + 1; - break; - default: - sF[3] = b->rt[0].alpha_src_factor; - } - - dF[0] = b->rt[0].rgb_dst_factor; - dF[1] = dF[0]; - dF[2] = dF[0]; - switch (b->rt[0].alpha_dst_factor & 0x0f) { - case PIPE_BLENDFACTOR_SRC_COLOR: - case PIPE_BLENDFACTOR_DST_COLOR: - case PIPE_BLENDFACTOR_CONST_COLOR: - case PIPE_BLENDFACTOR_SRC1_COLOR: - dF[3] = b->rt[0].alpha_dst_factor + 1; - break; - default: - dF[3] = b->rt[0].alpha_dst_factor; - } - - func[0] = b->rt[0].rgb_func; - func[1] = func[0]; - func[2] = func[0]; - func[3] = b->rt[0].alpha_func; - } else { - sF[0] = PIPE_BLENDFACTOR_ONE; - sF[1] = PIPE_BLENDFACTOR_ONE; - sF[2] = PIPE_BLENDFACTOR_ONE; - sF[3] = PIPE_BLENDFACTOR_ONE; - dF[0] = PIPE_BLENDFACTOR_ZERO; - dF[1] = PIPE_BLENDFACTOR_ZERO; - dF[2] = PIPE_BLENDFACTOR_ZERO; - dF[3] = PIPE_BLENDFACTOR_ZERO; - - func[0] = PIPE_BLEND_ADD; - func[1] = PIPE_BLEND_ADD; - func[2] = PIPE_BLEND_ADD; - func[3] = PIPE_BLEND_ADD; - } - - - /* If alpha writing is enabled and the alpha blend mode requires use of - * the alpha factor, calculate the alpha factor. - */ - if (((b->rt[0].colormask & 8) != 0) && need_alpha_factor) { - src_factor[3] = emit_alpha_factor_calculation(f, sF[3], const_color[3], - frag[3], pixel[3]); - - /* If the alpha destination blend factor is the same as the alpha source - * blend factor, re-use the previously calculated value. - */ - dst_factor[3] = (dF[3] == sF[3]) - ? src_factor[3] - : emit_alpha_factor_calculation(f, dF[3], const_color[3], - frag[3], pixel[3]); - } - - - if (sF[0] == sF[3]) { - src_factor[0] = src_factor[3]; - src_factor[1] = src_factor[3]; - src_factor[2] = src_factor[3]; - } else if (sF[0] == dF[3]) { - src_factor[0] = dst_factor[3]; - src_factor[1] = dst_factor[3]; - src_factor[2] = dst_factor[3]; - } else if (need_color_factor) { - emit_color_factor_calculation(f, - b->rt[0].rgb_src_factor, - b->rt[0].colormask, - frag, pixel, const_color, src_factor); - } - - - if (dF[0] == sF[3]) { - dst_factor[0] = src_factor[3]; - dst_factor[1] = src_factor[3]; - dst_factor[2] = src_factor[3]; - } else if (dF[0] == dF[3]) { - dst_factor[0] = dst_factor[3]; - dst_factor[1] = dst_factor[3]; - dst_factor[2] = dst_factor[3]; - } else if (dF[0] == sF[0]) { - dst_factor[0] = src_factor[0]; - dst_factor[1] = src_factor[1]; - dst_factor[2] = src_factor[2]; - } else if (need_color_factor) { - emit_color_factor_calculation(f, - b->rt[0].rgb_dst_factor, - b->rt[0].colormask, - frag, pixel, const_color, dst_factor); - } - - - - for (i = 0; i < 4; ++i) { - if ((b->rt[0].colormask & (1U << i)) != 0) { - emit_blend_calculation(f, - func[i], sF[i], dF[i], - frag[i], src_factor[i], - pixel[i], dst_factor[i]); - } - } - - spe_bi(f, 0, 0, 0); - -#if 0 - { - const uint32_t *p = f->store; - - printf("# %u instructions\n", f->csr - f->store); - printf("# blend (%sabled)\n", - (cb->base.blend_enable) ? "en" : "dis"); - printf("# RGB func / sf / df: %u %u %u\n", - cb->base.rgb_func, - cb->base.rgb_src_factor, - cb->base.rgb_dst_factor); - printf("# ALP func / sf / df: %u %u %u\n", - cb->base.alpha_func, - cb->base.alpha_src_factor, - cb->base.alpha_dst_factor); - - printf("\t.text\n"); - for (/* empty */; p < f->csr; p++) { - printf("\t.long\t0x%04x\n", *p); - } - fflush(stdout); - } -#endif -} - - -static int -PC_OFFSET(const struct spe_function *f, const void *d) -{ - const intptr_t pc = (intptr_t) &f->store[f->num_inst]; - const intptr_t ea = ~0x0f & (intptr_t) d; - - return (ea - pc) >> 2; -} - - -/** - * Generate code to perform color conversion and logic op - * - * \bug - * The code generated by this function should also perform dithering. - * - * \bug - * The code generated by this function should also perform color-write - * masking. - * - * \bug - * Only two framebuffer formats are supported at this time. - */ -void -cell_generate_logic_op(struct spe_function *f, - const struct pipe_blend_state *blend, - struct pipe_surface *surf) -{ - const unsigned logic_op = (blend->logicop_enable) - ? blend->logicop_func : PIPE_LOGICOP_COPY; - - /* This code generates a maximum of 37 instructions. An additional 32 - * bytes (equiv. to 8 instructions) are needed for data storage. Round up - * to 64 to make it a happy power-of-two. - */ - spe_init_func(f, SPE_INST_SIZE * 64); - - - /* Pixel colors in framebuffer format in AoS layout. - */ - const int pixel[4] = { - spe_allocate_register(f, 3), - spe_allocate_register(f, 4), - spe_allocate_register(f, 5), - spe_allocate_register(f, 6), - }; - - /* Fragment colors stored as floats in SoA layout. - */ - const int frag[4] = { - spe_allocate_register(f, 7), - spe_allocate_register(f, 8), - spe_allocate_register(f, 9), - spe_allocate_register(f, 10), - }; - - const int mask = spe_allocate_register(f, 11); - - - /* Short-circuit the noop and invert cases. - */ - if ((logic_op == PIPE_LOGICOP_NOOP) || (blend->rt[0].colormask == 0)) { - spe_bi(f, 0, 0, 0); - return; - } else if (logic_op == PIPE_LOGICOP_INVERT) { - spe_nor(f, pixel[0], pixel[0], pixel[0]); - spe_nor(f, pixel[1], pixel[1], pixel[1]); - spe_nor(f, pixel[2], pixel[2], pixel[2]); - spe_nor(f, pixel[3], pixel[3], pixel[3]); - spe_bi(f, 0, 0, 0); - return; - } - - - const int tmp[4] = { - spe_allocate_available_register(f), - spe_allocate_available_register(f), - spe_allocate_available_register(f), - spe_allocate_available_register(f), - }; - - const int shuf_xpose_hi = spe_allocate_available_register(f); - const int shuf_xpose_lo = spe_allocate_available_register(f); - const int shuf_color = spe_allocate_available_register(f); - - - /* Pointer to the begining of the function's private data area. - */ - uint32_t *const data = ((uint32_t *) f->store) + (64 - 8); - - - /* Convert fragment colors to framebuffer format in AoS layout. - */ - switch (surf->format) { - case PIPE_FORMAT_B8G8R8A8_UNORM: - data[0] = 0x00010203; - data[1] = 0x10111213; - data[2] = 0x04050607; - data[3] = 0x14151617; - data[4] = 0x0c000408; - data[5] = 0x80808080; - data[6] = 0x80808080; - data[7] = 0x80808080; - break; - case PIPE_FORMAT_A8R8G8B8_UNORM: - data[0] = 0x03020100; - data[1] = 0x13121110; - data[2] = 0x07060504; - data[3] = 0x17161514; - data[4] = 0x0804000c; - data[5] = 0x80808080; - data[6] = 0x80808080; - data[7] = 0x80808080; - break; - default: - fprintf(stderr, "CELL: Bad pixel format in cell_generate_logic_op()"); - ASSERT(0); - } - - spe_ilh(f, tmp[0], 0x0808); - spe_lqr(f, shuf_xpose_hi, PC_OFFSET(f, data+0)); - spe_lqr(f, shuf_color, PC_OFFSET(f, data+4)); - spe_a(f, shuf_xpose_lo, shuf_xpose_hi, tmp[0]); - - spe_shufb(f, tmp[0], frag[0], frag[2], shuf_xpose_hi); - spe_shufb(f, tmp[1], frag[0], frag[2], shuf_xpose_lo); - spe_shufb(f, tmp[2], frag[1], frag[3], shuf_xpose_hi); - spe_shufb(f, tmp[3], frag[1], frag[3], shuf_xpose_lo); - - spe_shufb(f, frag[0], tmp[0], tmp[2], shuf_xpose_hi); - spe_shufb(f, frag[1], tmp[0], tmp[2], shuf_xpose_lo); - spe_shufb(f, frag[2], tmp[1], tmp[3], shuf_xpose_hi); - spe_shufb(f, frag[3], tmp[1], tmp[3], shuf_xpose_lo); - - spe_cfltu(f, frag[0], frag[0], 32); - spe_cfltu(f, frag[1], frag[1], 32); - spe_cfltu(f, frag[2], frag[2], 32); - spe_cfltu(f, frag[3], frag[3], 32); - - spe_shufb(f, frag[0], frag[0], pixel[0], shuf_color); - spe_shufb(f, frag[1], frag[1], pixel[1], shuf_color); - spe_shufb(f, frag[2], frag[2], pixel[2], shuf_color); - spe_shufb(f, frag[3], frag[3], pixel[3], shuf_color); - - - /* If logic op is enabled, perform the requested logical operation on the - * converted fragment colors and the pixel colors. - */ - switch (logic_op) { - case PIPE_LOGICOP_CLEAR: - spe_il(f, frag[0], 0); - spe_il(f, frag[1], 0); - spe_il(f, frag[2], 0); - spe_il(f, frag[3], 0); - break; - case PIPE_LOGICOP_NOR: - spe_nor(f, frag[0], frag[0], pixel[0]); - spe_nor(f, frag[1], frag[1], pixel[1]); - spe_nor(f, frag[2], frag[2], pixel[2]); - spe_nor(f, frag[3], frag[3], pixel[3]); - break; - case PIPE_LOGICOP_AND_INVERTED: - spe_andc(f, frag[0], pixel[0], frag[0]); - spe_andc(f, frag[1], pixel[1], frag[1]); - spe_andc(f, frag[2], pixel[2], frag[2]); - spe_andc(f, frag[3], pixel[3], frag[3]); - break; - case PIPE_LOGICOP_COPY_INVERTED: - spe_nor(f, frag[0], frag[0], frag[0]); - spe_nor(f, frag[1], frag[1], frag[1]); - spe_nor(f, frag[2], frag[2], frag[2]); - spe_nor(f, frag[3], frag[3], frag[3]); - break; - case PIPE_LOGICOP_AND_REVERSE: - spe_andc(f, frag[0], frag[0], pixel[0]); - spe_andc(f, frag[1], frag[1], pixel[1]); - spe_andc(f, frag[2], frag[2], pixel[2]); - spe_andc(f, frag[3], frag[3], pixel[3]); - break; - case PIPE_LOGICOP_XOR: - spe_xor(f, frag[0], frag[0], pixel[0]); - spe_xor(f, frag[1], frag[1], pixel[1]); - spe_xor(f, frag[2], frag[2], pixel[2]); - spe_xor(f, frag[3], frag[3], pixel[3]); - break; - case PIPE_LOGICOP_NAND: - spe_nand(f, frag[0], frag[0], pixel[0]); - spe_nand(f, frag[1], frag[1], pixel[1]); - spe_nand(f, frag[2], frag[2], pixel[2]); - spe_nand(f, frag[3], frag[3], pixel[3]); - break; - case PIPE_LOGICOP_AND: - spe_and(f, frag[0], frag[0], pixel[0]); - spe_and(f, frag[1], frag[1], pixel[1]); - spe_and(f, frag[2], frag[2], pixel[2]); - spe_and(f, frag[3], frag[3], pixel[3]); - break; - case PIPE_LOGICOP_EQUIV: - spe_eqv(f, frag[0], frag[0], pixel[0]); - spe_eqv(f, frag[1], frag[1], pixel[1]); - spe_eqv(f, frag[2], frag[2], pixel[2]); - spe_eqv(f, frag[3], frag[3], pixel[3]); - break; - case PIPE_LOGICOP_OR_INVERTED: - spe_orc(f, frag[0], pixel[0], frag[0]); - spe_orc(f, frag[1], pixel[1], frag[1]); - spe_orc(f, frag[2], pixel[2], frag[2]); - spe_orc(f, frag[3], pixel[3], frag[3]); - break; - case PIPE_LOGICOP_COPY: - break; - case PIPE_LOGICOP_OR_REVERSE: - spe_orc(f, frag[0], frag[0], pixel[0]); - spe_orc(f, frag[1], frag[1], pixel[1]); - spe_orc(f, frag[2], frag[2], pixel[2]); - spe_orc(f, frag[3], frag[3], pixel[3]); - break; - case PIPE_LOGICOP_OR: - spe_or(f, frag[0], frag[0], pixel[0]); - spe_or(f, frag[1], frag[1], pixel[1]); - spe_or(f, frag[2], frag[2], pixel[2]); - spe_or(f, frag[3], frag[3], pixel[3]); - break; - case PIPE_LOGICOP_SET: - spe_il(f, frag[0], ~0); - spe_il(f, frag[1], ~0); - spe_il(f, frag[2], ~0); - spe_il(f, frag[3], ~0); - break; - - /* These two cases are short-circuited above. - */ - case PIPE_LOGICOP_INVERT: - case PIPE_LOGICOP_NOOP: - default: - assert(0); - } - - - /* Apply fragment mask. - */ - spe_ilh(f, tmp[0], 0x0000); - spe_ilh(f, tmp[1], 0x0404); - spe_ilh(f, tmp[2], 0x0808); - spe_ilh(f, tmp[3], 0x0c0c); - - spe_shufb(f, tmp[0], mask, mask, tmp[0]); - spe_shufb(f, tmp[1], mask, mask, tmp[1]); - spe_shufb(f, tmp[2], mask, mask, tmp[2]); - spe_shufb(f, tmp[3], mask, mask, tmp[3]); - - spe_selb(f, pixel[0], pixel[0], frag[0], tmp[0]); - spe_selb(f, pixel[1], pixel[1], frag[1], tmp[1]); - spe_selb(f, pixel[2], pixel[2], frag[2], tmp[2]); - spe_selb(f, pixel[3], pixel[3], frag[3], tmp[3]); - - spe_bi(f, 0, 0, 0); - -#if 0 - { - const uint32_t *p = f->store; - unsigned i; - - printf("# %u instructions\n", f->csr - f->store); - - printf("\t.text\n"); - for (i = 0; i < 64; i++) { - printf("\t.long\t0x%04x\n", p[i]); - } - fflush(stdout); - } -#endif -} diff --git a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.h b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.h deleted file mode 100644 index a8267a5133..0000000000 --- a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * (C) Copyright IBM Corporation 2008 - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef CELL_STATE_PER_FRAGMENT_H -#define CELL_STATE_PER_FRAGMENT_H - -extern void -cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa); - -extern void -cell_generate_alpha_blend(struct cell_blend_state *cb); - -extern void -cell_generate_logic_op(struct spe_function *f, - const struct pipe_blend_state *blend, - struct pipe_surface *surf); - -#endif /* CELL_STATE_PER_FRAGMENT_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_state_shader.c b/src/gallium/drivers/cell/ppu/cell_state_shader.c deleted file mode 100644 index ddf1477268..0000000000 --- a/src/gallium/drivers/cell/ppu/cell_state_shader.c +++ /dev/null @@ -1,229 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "pipe/p_defines.h" -#include "util/u_memory.h" -#include "util/u_inlines.h" -#include "draw/draw_context.h" -#include "tgsi/tgsi_parse.h" - -#include "cell_context.h" -#include "cell_state.h" -#include "cell_gen_fp.h" -#include "cell_texture.h" - - -/** cast wrapper */ -static INLINE struct cell_fragment_shader_state * -cell_fragment_shader_state(void *shader) -{ - return (struct cell_fragment_shader_state *) shader; -} - - -/** cast wrapper */ -static INLINE struct cell_vertex_shader_state * -cell_vertex_shader_state(void *shader) -{ - return (struct cell_vertex_shader_state *) shader; -} - - -/** - * Create fragment shader state. - * Called via pipe->create_fs_state() - */ -static void * -cell_create_fs_state(struct pipe_context *pipe, - const struct pipe_shader_state *templ) -{ - struct cell_context *cell = cell_context(pipe); - struct cell_fragment_shader_state *cfs; - - cfs = CALLOC_STRUCT(cell_fragment_shader_state); - if (!cfs) - return NULL; - - cfs->shader.tokens = tgsi_dup_tokens(templ->tokens); - if (!cfs->shader.tokens) { - FREE(cfs); - return NULL; - } - - tgsi_scan_shader(templ->tokens, &cfs->info); - - cell_gen_fragment_program(cell, cfs->shader.tokens, &cfs->code); - - return cfs; -} - - -/** - * Called via pipe->bind_fs_state() - */ -static void -cell_bind_fs_state(struct pipe_context *pipe, void *fs) -{ - struct cell_context *cell = cell_context(pipe); - - cell->fs = cell_fragment_shader_state(fs); - - cell->dirty |= CELL_NEW_FS; -} - - -/** - * Called via pipe->delete_fs_state() - */ -static void -cell_delete_fs_state(struct pipe_context *pipe, void *fs) -{ - struct cell_fragment_shader_state *cfs = cell_fragment_shader_state(fs); - - spe_release_func(&cfs->code); - - FREE((void *) cfs->shader.tokens); - FREE(cfs); -} - - -/** - * Create vertex shader state. - * Called via pipe->create_vs_state() - */ -static void * -cell_create_vs_state(struct pipe_context *pipe, - const struct pipe_shader_state *templ) -{ - struct cell_context *cell = cell_context(pipe); - struct cell_vertex_shader_state *cvs; - - cvs = CALLOC_STRUCT(cell_vertex_shader_state); - if (!cvs) - return NULL; - - cvs->shader.tokens = tgsi_dup_tokens(templ->tokens); - if (!cvs->shader.tokens) { - FREE(cvs); - return NULL; - } - - tgsi_scan_shader(templ->tokens, &cvs->info); - - cvs->draw_data = draw_create_vertex_shader(cell->draw, &cvs->shader); - if (cvs->draw_data == NULL) { - FREE( (void *) cvs->shader.tokens ); - FREE( cvs ); - return NULL; - } - - return cvs; -} - - -/** - * Called via pipe->bind_vs_state() - */ -static void -cell_bind_vs_state(struct pipe_context *pipe, void *vs) -{ - struct cell_context *cell = cell_context(pipe); - - cell->vs = cell_vertex_shader_state(vs); - - draw_bind_vertex_shader(cell->draw, - (cell->vs ? cell->vs->draw_data : NULL)); - - cell->dirty |= CELL_NEW_VS; -} - - -/** - * Called via pipe->delete_vs_state() - */ -static void -cell_delete_vs_state(struct pipe_context *pipe, void *vs) -{ - struct cell_context *cell = cell_context(pipe); - struct cell_vertex_shader_state *cvs = cell_vertex_shader_state(vs); - - draw_delete_vertex_shader(cell->draw, cvs->draw_data); - FREE( (void *) cvs->shader.tokens ); - FREE( cvs ); -} - - -/** - * Called via pipe->set_constant_buffer() - */ -static void -cell_set_constant_buffer(struct pipe_context *pipe, - uint shader, uint index, - struct pipe_resource *constants) -{ - struct cell_context *cell = cell_context(pipe); - unsigned size = constants ? constants->width0 : 0; - const void *data = constants ? cell_resource(constants)->data : NULL; - - assert(shader < PIPE_SHADER_TYPES); - assert(index == 0); - - if (cell->constants[shader] == constants) - return; - - draw_flush(cell->draw); - - /* note: reference counting */ - pipe_resource_reference(&cell->constants[shader], constants); - - if(shader == PIPE_SHADER_VERTEX) { - draw_set_mapped_constant_buffer(cell->draw, PIPE_SHADER_VERTEX, 0, - data, size); - } - - cell->mapped_constants[shader] = data; - - if (shader == PIPE_SHADER_VERTEX) - cell->dirty |= CELL_NEW_VS_CONSTANTS; - else if (shader == PIPE_SHADER_FRAGMENT) - cell->dirty |= CELL_NEW_FS_CONSTANTS; -} - - -void -cell_init_shader_functions(struct cell_context *cell) -{ - cell->pipe.create_fs_state = cell_create_fs_state; - cell->pipe.bind_fs_state = cell_bind_fs_state; - cell->pipe.delete_fs_state = cell_delete_fs_state; - - cell->pipe.create_vs_state = cell_create_vs_state; - cell->pipe.bind_vs_state = cell_bind_vs_state; - cell->pipe.delete_vs_state = cell_delete_vs_state; - - cell->pipe.set_constant_buffer = cell_set_constant_buffer; -} diff --git a/src/gallium/drivers/cell/ppu/cell_state_vertex.c b/src/gallium/drivers/cell/ppu/cell_state_vertex.c deleted file mode 100644 index 7f65b82619..0000000000 --- a/src/gallium/drivers/cell/ppu/cell_state_vertex.c +++ /dev/null @@ -1,120 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Authors: Keith Whitwell <keith@tungstengraphics.com> - */ - - -#include "cell_context.h" -#include "cell_state.h" - -#include "util/u_memory.h" -#include "util/u_transfer.h" -#include "draw/draw_context.h" - - -static void * -cell_create_vertex_elements_state(struct pipe_context *pipe, - unsigned count, - const struct pipe_vertex_element *attribs) -{ - struct cell_velems_state *velems; - assert(count <= PIPE_MAX_ATTRIBS); - velems = (struct cell_velems_state *) MALLOC(sizeof(struct cell_velems_state)); - if (velems) { - velems->count = count; - memcpy(velems->velem, attribs, sizeof(*attribs) * count); - } - return velems; -} - -static void -cell_bind_vertex_elements_state(struct pipe_context *pipe, - void *velems) -{ - struct cell_context *cell = cell_context(pipe); - struct cell_velems_state *cell_velems = (struct cell_velems_state *) velems; - - cell->velems = cell_velems; - - cell->dirty |= CELL_NEW_VERTEX; - - if (cell_velems) - draw_set_vertex_elements(cell->draw, cell_velems->count, cell_velems->velem); -} - -static void -cell_delete_vertex_elements_state(struct pipe_context *pipe, void *velems) -{ - FREE( velems ); -} - - -static void -cell_set_vertex_buffers(struct pipe_context *pipe, - unsigned count, - const struct pipe_vertex_buffer *buffers) -{ - struct cell_context *cell = cell_context(pipe); - - assert(count <= PIPE_MAX_ATTRIBS); - - util_copy_vertex_buffers(cell->vertex_buffer, - &cell->num_vertex_buffers, - buffers, count); - - cell->dirty |= CELL_NEW_VERTEX; - - draw_set_vertex_buffers(cell->draw, count, buffers); -} - - -static void -cell_set_index_buffer(struct pipe_context *pipe, - const struct pipe_index_buffer *ib) -{ - struct cell_context *cell = cell_context(pipe); - - if (ib) - memcpy(&cell->index_buffer, ib, sizeof(cell->index_buffer)); - else - memset(&cell->index_buffer, 0, sizeof(cell->index_buffer)); - - draw_set_index_buffer(cell->draw, ib); -} - - -void -cell_init_vertex_functions(struct cell_context *cell) -{ - cell->pipe.set_vertex_buffers = cell_set_vertex_buffers; - cell->pipe.set_index_buffer = cell_set_index_buffer; - cell->pipe.create_vertex_elements_state = cell_create_vertex_elements_state; - cell->pipe.bind_vertex_elements_state = cell_bind_vertex_elements_state; - cell->pipe.delete_vertex_elements_state = cell_delete_vertex_elements_state; - cell->pipe.redefine_user_buffer = u_default_redefine_user_buffer; -} diff --git a/src/gallium/drivers/cell/ppu/cell_surface.c b/src/gallium/drivers/cell/ppu/cell_surface.c deleted file mode 100644 index 777454479b..0000000000 --- a/src/gallium/drivers/cell/ppu/cell_surface.c +++ /dev/null @@ -1,37 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "util/u_surface.h" -#include "cell_context.h" -#include "cell_surface.h" - - -void -cell_init_surface_functions(struct cell_context *cell) -{ - cell->pipe.resource_copy_region = util_resource_copy_region; -} diff --git a/src/gallium/drivers/cell/ppu/cell_surface.h b/src/gallium/drivers/cell/ppu/cell_surface.h deleted file mode 100644 index 9e58f32944..0000000000 --- a/src/gallium/drivers/cell/ppu/cell_surface.h +++ /dev/null @@ -1,42 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Authors: Keith Whitwell <keith@tungstengraphics.com> - */ - -#ifndef CELL_SURFACE_H -#define CELL_SURFACE_H - - -struct cell_context; - - -extern void -cell_init_surface_functions(struct cell_context *cell); - - -#endif /* SP_SURFACE_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c deleted file mode 100644 index 946a7050e5..0000000000 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ /dev/null @@ -1,644 +0,0 @@ -/************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - * Michel Dänzer <michel@tungstengraphics.com> - * Brian Paul - */ - -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "util/u_inlines.h" -#include "util/u_transfer.h" -#include "util/u_format.h" -#include "util/u_math.h" -#include "util/u_memory.h" - -#include "cell_context.h" -#include "cell_screen.h" -#include "cell_state.h" -#include "cell_texture.h" - -#include "state_tracker/sw_winsys.h" - - - -static boolean -cell_resource_layout(struct pipe_screen *screen, - struct cell_resource *ct) -{ - struct pipe_resource *pt = &ct->base; - unsigned level; - unsigned width = pt->width0; - unsigned height = pt->height0; - unsigned depth = pt->depth0; - - ct->buffer_size = 0; - - for (level = 0; level <= pt->last_level; level++) { - unsigned size; - unsigned w_tile, h_tile; - - assert(level < CELL_MAX_TEXTURE_LEVELS); - - /* width, height, rounded up to tile size */ - w_tile = align(width, TILE_SIZE); - h_tile = align(height, TILE_SIZE); - - ct->stride[level] = util_format_get_stride(pt->format, w_tile); - - ct->level_offset[level] = ct->buffer_size; - - size = ct->stride[level] * util_format_get_nblocksy(pt->format, h_tile); - if (pt->target == PIPE_TEXTURE_CUBE) - size *= 6; - else - size *= depth; - - ct->buffer_size += size; - - width = u_minify(width, 1); - height = u_minify(height, 1); - depth = u_minify(depth, 1); - } - - ct->data = align_malloc(ct->buffer_size, 16); - - return ct->data != NULL; -} - - -/** - * Texture layout for simple color buffers. - */ -static boolean -cell_displaytarget_layout(struct pipe_screen *screen, - struct cell_resource * ct) -{ - struct sw_winsys *winsys = cell_screen(screen)->winsys; - - /* Round up the surface size to a multiple of the tile size? - */ - ct->dt = winsys->displaytarget_create(winsys, - ct->base.bind, - ct->base.format, - ct->base.width0, - ct->base.height0, - 16, - &ct->dt_stride ); - - return ct->dt != NULL; -} - -static struct pipe_resource * -cell_resource_create(struct pipe_screen *screen, - const struct pipe_resource *templat) -{ - struct cell_resource *ct = CALLOC_STRUCT(cell_resource); - if (!ct) - return NULL; - - ct->base = *templat; - pipe_reference_init(&ct->base.reference, 1); - ct->base.screen = screen; - - /* Create both a displaytarget (linear) and regular texture - * (twiddled). Convert twiddled->linear at flush_frontbuffer time. - */ - if (ct->base.bind & (PIPE_BIND_DISPLAY_TARGET | - PIPE_BIND_SCANOUT | - PIPE_BIND_SHARED)) { - if (!cell_displaytarget_layout(screen, ct)) - goto fail; - } - - if (!cell_resource_layout(screen, ct)) - goto fail; - - return &ct->base; - -fail: - if (ct->dt) { - struct sw_winsys *winsys = cell_screen(screen)->winsys; - winsys->displaytarget_destroy(winsys, ct->dt); - } - - FREE(ct); - - return NULL; -} - - -static void -cell_resource_destroy(struct pipe_screen *scrn, struct pipe_resource *pt) -{ - struct cell_screen *screen = cell_screen(scrn); - struct sw_winsys *winsys = screen->winsys; - struct cell_resource *ct = cell_resource(pt); - - if (ct->dt) { - /* display target */ - winsys->displaytarget_destroy(winsys, ct->dt); - } - else if (!ct->userBuffer) { - align_free(ct->data); - } - - FREE(ct); -} - - - -/** - * Convert image from linear layout to tiled layout. 4-byte pixels. - */ -static void -twiddle_image_uint(uint w, uint h, uint tile_size, uint *dst, - uint src_stride, const uint *src) -{ - const uint tile_size2 = tile_size * tile_size; - const uint h_t = (h + tile_size - 1) / tile_size; - const uint w_t = (w + tile_size - 1) / tile_size; - - uint it, jt; /* tile counters */ - uint i, j; /* intra-tile counters */ - - src_stride /= 4; /* convert from bytes to pixels */ - - /* loop over dest tiles */ - for (it = 0; it < h_t; it++) { - for (jt = 0; jt < w_t; jt++) { - /* start of dest tile: */ - uint *tdst = dst + (it * w_t + jt) * tile_size2; - - /* compute size of this tile (may be smaller than tile_size) */ - /* XXX note: a compiler bug was found here. That's why the code - * looks as it does. - */ - uint tile_width = w - jt * tile_size; - tile_width = MIN2(tile_width, tile_size); - uint tile_height = h - it * tile_size; - tile_height = MIN2(tile_height, tile_size); - - /* loop over texels in the tile */ - for (i = 0; i < tile_height; i++) { - for (j = 0; j < tile_width; j++) { - const uint srci = it * tile_size + i; - const uint srcj = jt * tile_size + j; - ASSERT(srci < h); - ASSERT(srcj < w); - tdst[i * tile_size + j] = src[srci * src_stride + srcj]; - } - } - } - } -} - - -/** - * For Cell. Basically, rearrange the pixels/quads from this layout: - * +--+--+--+--+ - * |p0|p1|p2|p3|.... - * +--+--+--+--+ - * - * to this layout: - * +--+--+ - * |p0|p1|.... - * +--+--+ - * |p2|p3| - * +--+--+ - */ -static void -twiddle_tile(const uint *tileIn, uint *tileOut) -{ - int y, x; - - for (y = 0; y < TILE_SIZE; y+=2) { - for (x = 0; x < TILE_SIZE; x+=2) { - int k = 4 * (y/2 * TILE_SIZE/2 + x/2); - tileOut[y * TILE_SIZE + (x + 0)] = tileIn[k]; - tileOut[y * TILE_SIZE + (x + 1)] = tileIn[k+1]; - tileOut[(y + 1) * TILE_SIZE + (x + 0)] = tileIn[k+2]; - tileOut[(y + 1) * TILE_SIZE + (x + 1)] = tileIn[k+3]; - } - } -} - - -/** - * Convert image from tiled layout to linear layout. 4-byte pixels. - */ -static void -untwiddle_image_uint(uint w, uint h, uint tile_size, uint *dst, - uint dst_stride, const uint *src) -{ - const uint tile_size2 = tile_size * tile_size; - const uint h_t = (h + tile_size - 1) / tile_size; - const uint w_t = (w + tile_size - 1) / tile_size; - uint *tile_buf; - uint it, jt; /* tile counters */ - uint i, j; /* intra-tile counters */ - - dst_stride /= 4; /* convert from bytes to pixels */ - - tile_buf = align_malloc(tile_size * tile_size * 4, 16); - - /* loop over src tiles */ - for (it = 0; it < h_t; it++) { - for (jt = 0; jt < w_t; jt++) { - /* start of src tile: */ - const uint *tsrc = src + (it * w_t + jt) * tile_size2; - - twiddle_tile(tsrc, tile_buf); - tsrc = tile_buf; - - /* compute size of this tile (may be smaller than tile_size) */ - /* XXX note: a compiler bug was found here. That's why the code - * looks as it does. - */ - uint tile_width = w - jt * tile_size; - tile_width = MIN2(tile_width, tile_size); - uint tile_height = h - it * tile_size; - tile_height = MIN2(tile_height, tile_size); - - /* loop over texels in the tile */ - for (i = 0; i < tile_height; i++) { - for (j = 0; j < tile_width; j++) { - uint dsti = it * tile_size + i; - uint dstj = jt * tile_size + j; - ASSERT(dsti < h); - ASSERT(dstj < w); - dst[dsti * dst_stride + dstj] = tsrc[i * tile_size + j]; - } - } - } - } - - align_free(tile_buf); -} - - -static struct pipe_surface * -cell_create_surface(struct pipe_context *ctx, - struct pipe_resource *pt, - const struct pipe_surface *surf_tmpl) -{ - struct cell_resource *ct = cell_resource(pt); - struct pipe_surface *ps; - - assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer); - ps = CALLOC_STRUCT(pipe_surface); - if (ps) { - pipe_reference_init(&ps->reference, 1); - pipe_resource_reference(&ps->texture, pt); - ps->format = surf_tmpl->format; - ps->context = ctx; - ps->width = u_minify(pt->width0, surf_tmpl->u.tex.level); - ps->height = u_minify(pt->height0, surf_tmpl->u.tex.level); - /* XXX may need to override usage flags (see sp_texture.c) */ - ps->usage = surf_tmpl->usage; - ps->u.tex.level = surf_tmpl->u.tex.level; - ps->u.tex.first_layer = surf_tmpl->u.tex.first_layer; - ps->u.tex.last_layer = surf_tmpl->u.tex.last_layer; - } - return ps; -} - - -static void -cell_surface_destroy(struct pipe_context *ctx, struct pipe_surface *surf) -{ - pipe_resource_reference(&surf->texture, NULL); - FREE(surf); -} - - -/** - * Create new pipe_transfer object. - * This is used by the user to put tex data into a texture (and get it - * back out for glGetTexImage). - */ -static struct pipe_transfer * -cell_get_transfer(struct pipe_context *ctx, - struct pipe_resource *resource, - unsigned level, - unsigned usage, - const struct pipe_box *box) -{ - struct cell_resource *ct = cell_resource(resource); - struct cell_transfer *ctrans; - enum pipe_format format = resource->format; - - assert(resource); - assert(level <= resource->last_level); - - /* make sure the requested region is in the image bounds */ - assert(box->x + box->width <= u_minify(resource->width0, level)); - assert(box->y + box->height <= u_minify(resource->height0, level)); - assert(box->z + box->depth <= (u_minify(resource->depth0, level) + resource->array_size - 1)); - - ctrans = CALLOC_STRUCT(cell_transfer); - if (ctrans) { - struct pipe_transfer *pt = &ctrans->base; - pipe_resource_reference(&pt->resource, resource); - pt->level = level; - pt->usage = usage; - pt->box = *box; - pt->stride = ct->stride[level]; - - ctrans->offset = ct->level_offset[level]; - - if (resource->target == PIPE_TEXTURE_CUBE || resource->target == PIPE_TEXTURE_3D) { - unsigned h_tile = align(u_minify(resource->height0, level), TILE_SIZE); - ctrans->offset += box->z * util_format_get_nblocksy(format, h_tile) * pt->stride; - } - else { - assert(box->z == 0); - } - - return pt; - } - return NULL; -} - - -static void -cell_transfer_destroy(struct pipe_context *ctx, struct pipe_transfer *t) -{ - struct cell_transfer *transfer = cell_transfer(t); - /* Effectively do the texture_update work here - if texture images - * needed post-processing to put them into hardware layout, this is - * where it would happen. For cell, nothing to do. - */ - assert (transfer->base.resource); - pipe_resource_reference(&transfer->base.resource, NULL); - FREE(transfer); -} - - -/** - * Return pointer to texture image data in linear layout. - */ -static void * -cell_transfer_map(struct pipe_context *ctx, struct pipe_transfer *transfer) -{ - struct cell_transfer *ctrans = cell_transfer(transfer); - struct pipe_resource *pt = transfer->resource; - struct cell_resource *ct = cell_resource(pt); - - assert(transfer->resource); - - if (ct->mapped == NULL) { - ct->mapped = ct->data; - } - - - /* Better test would be resource->is_linear - */ - if (transfer->resource->target != PIPE_BUFFER) { - const uint level = ctrans->base.level; - const uint texWidth = u_minify(pt->width0, level); - const uint texHeight = u_minify(pt->height0, level); - unsigned size; - - - /* - * Create a buffer of ordinary memory for the linear texture. - * This is the memory that the user will read/write. - */ - size = (util_format_get_stride(pt->format, align(texWidth, TILE_SIZE)) * - util_format_get_nblocksy(pt->format, align(texHeight, TILE_SIZE))); - - ctrans->map = align_malloc(size, 16); - if (!ctrans->map) - return NULL; /* out of memory */ - - if (transfer->usage & PIPE_TRANSFER_READ) { - /* Textures always stored twiddled, need to untwiddle the - * texture to make a linear version. - */ - const uint bpp = util_format_get_blocksize(ct->base.format); - if (bpp == 4) { - const uint *src = (uint *) (ct->mapped + ctrans->offset); - uint *dst = ctrans->map; - untwiddle_image_uint(texWidth, texHeight, TILE_SIZE, - dst, transfer->stride, src); - } - else { - // xxx fix - } - } - } - else { - unsigned stride = transfer->stride; - enum pipe_format format = pt->format; - unsigned blocksize = util_format_get_blocksize(format); - - ctrans->map = (ct->mapped + - ctrans->offset + - ctrans->base.box.y / util_format_get_blockheight(format) * stride + - ctrans->base.box.x / util_format_get_blockwidth(format) * blocksize); - } - - - return ctrans->map; -} - - -/** - * Called when user is done reading/writing texture data. - * If new data was written, this is where we convert the linear data - * to tiled data. - */ -static void -cell_transfer_unmap(struct pipe_context *ctx, - struct pipe_transfer *transfer) -{ - struct cell_transfer *ctrans = cell_transfer(transfer); - struct pipe_resource *pt = transfer->resource; - struct cell_resource *ct = cell_resource(pt); - const uint level = ctrans->base.level; - const uint texWidth = u_minify(pt->width0, level); - const uint texHeight = u_minify(pt->height0, level); - const uint stride = ct->stride[level]; - - if (!ct->mapped) { - assert(0); - return; - } - - if (pt->target != PIPE_BUFFER) { - if (transfer->usage & PIPE_TRANSFER_WRITE) { - /* The user wrote new texture data into the mapped buffer. - * We need to convert the new linear data into the twiddled/tiled format. - */ - const uint bpp = util_format_get_blocksize(ct->base.format); - if (bpp == 4) { - const uint *src = ctrans->map; - uint *dst = (uint *) (ct->mapped + ctrans->offset); - twiddle_image_uint(texWidth, texHeight, TILE_SIZE, dst, stride, src); - } - else { - // xxx fix - } - } - - align_free(ctrans->map); - } - else { - /* nothing to do */ - } - - ctrans->map = NULL; -} - - - -/* This used to be overriden by the co-state tracker, but really needs - * to be active with sw_winsys. - * - * Contrasting with llvmpipe and softpipe, this is the only place - * where we use the ct->dt display target in any real sense. - * - * Basically just untwiddle our local data into the linear - * displaytarget. - */ -static void -cell_flush_frontbuffer(struct pipe_screen *_screen, - struct pipe_resource *resource, - unsigned level, unsigned layer, - void *context_private) -{ - struct cell_screen *screen = cell_screen(_screen); - struct sw_winsys *winsys = screen->winsys; - struct cell_resource *ct = cell_resource(resource); - - if (!ct->dt) - return; - - /* Need to untwiddle from our internal representation here: - */ - { - unsigned *map = winsys->displaytarget_map(winsys, ct->dt, - (PIPE_TRANSFER_READ | - PIPE_TRANSFER_WRITE)); - unsigned *src = (unsigned *)(ct->data + ct->level_offset[level]); - - untwiddle_image_uint(u_minify(resource->width0, level), - u_minify(resource->height0, level), - TILE_SIZE, - map, - ct->dt_stride, - src); - - winsys->displaytarget_unmap(winsys, ct->dt); - } - - winsys->displaytarget_display(winsys, ct->dt, context_private); -} - - - -/** - * Create buffer which wraps user-space data. - */ -static struct pipe_resource * -cell_user_buffer_create(struct pipe_screen *screen, - void *ptr, - unsigned bytes, - unsigned bind_flags) -{ - struct cell_resource *buffer; - - buffer = CALLOC_STRUCT(cell_resource); - if(!buffer) - return NULL; - - pipe_reference_init(&buffer->base.reference, 1); - buffer->base.screen = screen; - buffer->base.format = PIPE_FORMAT_R8_UNORM; /* ?? */ - buffer->base.bind = PIPE_BIND_TRANSFER_READ | bind_flags; - buffer->base.usage = PIPE_USAGE_IMMUTABLE; - buffer->base.flags = 0; - buffer->base.width0 = bytes; - buffer->base.height0 = 1; - buffer->base.depth0 = 1; - buffer->base.array_size = 1; - buffer->userBuffer = TRUE; - buffer->data = ptr; - - return &buffer->base; -} - - -static struct pipe_resource * -cell_resource_from_handle(struct pipe_screen *screen, - const struct pipe_resource *templat, - struct winsys_handle *handle) -{ - /* XXX todo */ - return NULL; -} - - -static boolean -cell_resource_get_handle(struct pipe_screen *scree, - struct pipe_resource *tex, - struct winsys_handle *handle) -{ - /* XXX todo */ - return FALSE; -} - - -void -cell_init_screen_texture_funcs(struct pipe_screen *screen) -{ - screen->resource_create = cell_resource_create; - screen->resource_destroy = cell_resource_destroy; - screen->resource_from_handle = cell_resource_from_handle; - screen->resource_get_handle = cell_resource_get_handle; - screen->user_buffer_create = cell_user_buffer_create; - - screen->flush_frontbuffer = cell_flush_frontbuffer; -} - -void -cell_init_texture_transfer_funcs(struct cell_context *cell) -{ - cell->pipe.get_transfer = cell_get_transfer; - cell->pipe.transfer_destroy = cell_transfer_destroy; - cell->pipe.transfer_map = cell_transfer_map; - cell->pipe.transfer_unmap = cell_transfer_unmap; - - cell->pipe.transfer_flush_region = u_default_transfer_flush_region; - cell->pipe.transfer_inline_write = u_default_transfer_inline_write; - - cell->pipe.create_surface = cell_create_surface; - cell->pipe.surface_destroy = cell_surface_destroy; -} diff --git a/src/gallium/drivers/cell/ppu/cell_texture.h b/src/gallium/drivers/cell/ppu/cell_texture.h deleted file mode 100644 index bd8224b3b7..0000000000 --- a/src/gallium/drivers/cell/ppu/cell_texture.h +++ /dev/null @@ -1,102 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef CELL_TEXTURE_H -#define CELL_TEXTURE_H - -#include "cell/common.h" - -struct cell_context; -struct pipe_resource; - - -/** - * Subclass of pipe_resource - */ -struct cell_resource -{ - struct pipe_resource base; - - unsigned long level_offset[CELL_MAX_TEXTURE_LEVELS]; - unsigned long stride[CELL_MAX_TEXTURE_LEVELS]; - - /** - * Display target, for textures with the PIPE_BIND_DISPLAY_TARGET - * usage. - */ - struct sw_displaytarget *dt; - unsigned dt_stride; - - /** - * Malloc'ed data for regular textures, or a mapping to dt above. - */ - void *data; - boolean userBuffer; - - /* Size of the linear buffer?? - */ - unsigned long buffer_size; - - /** The buffer above, mapped. This is the memory from which the - * SPUs will fetch texels. This texture data is in the tiled layout. - */ - ubyte *mapped; -}; - - -struct cell_transfer -{ - struct pipe_transfer base; - - unsigned long offset; - void *map; -}; - - -/** cast wrapper */ -static INLINE struct cell_resource * -cell_resource(struct pipe_resource *pt) -{ - return (struct cell_resource *) pt; -} - - -/** cast wrapper */ -static INLINE struct cell_transfer * -cell_transfer(struct pipe_transfer *pt) -{ - return (struct cell_transfer *) pt; -} - - -extern void -cell_init_screen_texture_funcs(struct pipe_screen *screen); - -extern void -cell_init_texture_transfer_funcs(struct cell_context *cell); - -#endif /* CELL_TEXTURE_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_vbuf.c b/src/gallium/drivers/cell/ppu/cell_vbuf.c deleted file mode 100644 index 37b7195648..0000000000 --- a/src/gallium/drivers/cell/ppu/cell_vbuf.c +++ /dev/null @@ -1,332 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * Vertex buffer code. The draw module transforms vertices to window - * coords, etc. and emits the vertices into buffer supplied by this module. - * When a vertex buffer is full, or we flush, we'll send the vertex data - * to the SPUs. - * - * Authors - * Brian Paul - */ - - -#include "cell_batch.h" -#include "cell_context.h" -#include "cell_fence.h" -#include "cell_flush.h" -#include "cell_spu.h" -#include "cell_vbuf.h" -#include "draw/draw_vbuf.h" -#include "util/u_memory.h" - - -/** Allow vertex data to be inlined after RENDER command */ -#define ALLOW_INLINE_VERTS 1 - - -/** - * Subclass of vbuf_render because we need a cell_context pointer in - * a few places. - */ -struct cell_vbuf_render -{ - struct vbuf_render base; - struct cell_context *cell; - uint prim; /**< PIPE_PRIM_x */ - uint vertex_size; /**< in bytes */ - void *vertex_buffer; /**< just for debug, really */ - uint vertex_buf; /**< in [0, CELL_NUM_BUFFERS-1] */ - uint vertex_buffer_size; /**< size in bytes */ -}; - - -/** cast wrapper */ -static struct cell_vbuf_render * -cell_vbuf_render(struct vbuf_render *vbr) -{ - return (struct cell_vbuf_render *) vbr; -} - - - -static const struct vertex_info * -cell_vbuf_get_vertex_info(struct vbuf_render *vbr) -{ - struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); - return &cvbr->cell->vertex_info; -} - - -static boolean -cell_vbuf_allocate_vertices(struct vbuf_render *vbr, - ushort vertex_size, ushort nr_vertices) -{ - struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); - unsigned size = vertex_size * nr_vertices; - /*printf("Alloc verts %u * %u\n", vertex_size, nr_vertices);*/ - - assert(cvbr->vertex_buf == ~0); - cvbr->vertex_buf = cell_get_empty_buffer(cvbr->cell); - cvbr->vertex_buffer = cvbr->cell->buffer[cvbr->vertex_buf]; - cvbr->vertex_buffer_size = size; - cvbr->vertex_size = vertex_size; - - return cvbr->vertex_buffer != NULL; -} - - -static void -cell_vbuf_release_vertices(struct vbuf_render *vbr) -{ - struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); - struct cell_context *cell = cvbr->cell; - - /* - printf("%s vertex_buf = %u count = %u\n", - __FUNCTION__, cvbr->vertex_buf, vertices_used); - */ - - /* Make sure texture buffers aren't released until we're done rendering - * with them. - */ - cell_add_fenced_textures(cell); - - /* Tell SPUs they can release the vert buf */ - if (cvbr->vertex_buf != ~0U) { - STATIC_ASSERT(sizeof(struct cell_command_release_verts) % 16 == 0); - struct cell_command_release_verts *release - = (struct cell_command_release_verts *) - cell_batch_alloc16(cell, sizeof(struct cell_command_release_verts)); - release->opcode[0] = CELL_CMD_RELEASE_VERTS; - release->vertex_buf = cvbr->vertex_buf; - } - - cvbr->vertex_buf = ~0; - cell_flush_int(cell, 0x0); - - cvbr->vertex_buffer = NULL; -} - - -static void * -cell_vbuf_map_vertices(struct vbuf_render *vbr) -{ - struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); - return cvbr->vertex_buffer; -} - - -static void -cell_vbuf_unmap_vertices(struct vbuf_render *vbr, - ushort min_index, - ushort max_index ) -{ - struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); - assert( cvbr->vertex_buffer_size >= (max_index+1) * cvbr->vertex_size ); - /* do nothing */ -} - - - -static boolean -cell_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) -{ - struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); - cvbr->prim = prim; - /*printf("cell_set_prim %u\n", prim);*/ - return TRUE; -} - - -static void -cell_vbuf_draw_elements(struct vbuf_render *vbr, - const ushort *indices, - uint nr_indices) -{ - struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); - struct cell_context *cell = cvbr->cell; - float xmin, ymin, xmax, ymax; - uint i; - uint nr_vertices = 0, min_index = ~0; - const void *vertices = cvbr->vertex_buffer; - const uint vertex_size = cvbr->vertex_size; - - for (i = 0; i < nr_indices; i++) { - if (indices[i] > nr_vertices) - nr_vertices = indices[i]; - if (indices[i] < min_index) - min_index = indices[i]; - } - nr_vertices++; - -#if 0 - /*if (min_index > 0)*/ - printf("%s min_index = %u\n", __FUNCTION__, min_index); -#endif - -#if 0 - printf("cell_vbuf_draw() nr_indices = %u nr_verts = %u\n", - nr_indices, nr_vertices); - printf(" "); - for (i = 0; i < nr_indices; i += 3) { - printf("%u %u %u, ", indices[i+0], indices[i+1], indices[i+2]); - } - printf("\n"); -#elif 0 - printf("cell_vbuf_draw() nr_indices = %u nr_verts = %u indexes = [%u %u %u ...]\n", - nr_indices, nr_vertices, - indices[0], indices[1], indices[2]); - printf("ind space = %u, vert space = %u, space = %u\n", - nr_indices * 2, - nr_vertices * 4 * cell->vertex_info.size, - cell_batch_free_space(cell)); -#endif - - /* compute x/y bounding box */ - xmin = ymin = 1e50; - xmax = ymax = -1e50; - for (i = min_index; i < nr_vertices; i++) { - const float *v = (float *) ((ubyte *) vertices + i * vertex_size); - if (v[0] < xmin) - xmin = v[0]; - if (v[0] > xmax) - xmax = v[0]; - if (v[1] < ymin) - ymin = v[1]; - if (v[1] > ymax) - ymax = v[1]; - } -#if 0 - printf("PPU Bounds %g, %g .. %g, %g\n", xmin, ymin, xmax, ymax); - fflush(stdout); -#endif - - if (cvbr->prim != PIPE_PRIM_TRIANGLES) - return; /* only render tris for now */ - - /* build/insert batch RENDER command */ - { - const uint index_bytes = ROUNDUP16(nr_indices * 2); - const uint vertex_bytes = ROUNDUP16(nr_vertices * 4 * cell->vertex_info.size); - STATIC_ASSERT(sizeof(struct cell_command_render) % 16 == 0); - const uint batch_size = sizeof(struct cell_command_render) + index_bytes; - - struct cell_command_render *render - = (struct cell_command_render *) - cell_batch_alloc16(cell, batch_size); - - render->opcode[0] = CELL_CMD_RENDER; - render->prim_type = cvbr->prim; - - render->num_indexes = nr_indices; - render->min_index = min_index; - - /* append indices after render command */ - memcpy(render + 1, indices, nr_indices * 2); - - /* if there's room, append vertices after the indices, else leave - * vertices in the original/separate buffer. - */ - render->vertex_size = 4 * cell->vertex_info.size; - render->num_verts = nr_vertices; - if (ALLOW_INLINE_VERTS && - min_index == 0 && - vertex_bytes + 16 <= cell_batch_free_space(cell)) { - /* vertex data inlined, after indices, at 16-byte boundary */ - void *dst = cell_batch_alloc16(cell, vertex_bytes); - memcpy(dst, vertices, vertex_bytes); - render->inline_verts = TRUE; - render->vertex_buf = ~0; - } - else { - /* vertex data in separate buffer */ - render->inline_verts = FALSE; - ASSERT(cvbr->vertex_buf >= 0); - render->vertex_buf = cvbr->vertex_buf; - } - - render->xmin = xmin; - render->ymin = ymin; - render->xmax = xmax; - render->ymax = ymax; - } - -#if 0 - /* helpful for debug */ - cell_flush_int(cell, CELL_FLUSH_WAIT); -#endif -} - - -static void -cell_vbuf_destroy(struct vbuf_render *vbr) -{ - struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); - cvbr->cell->vbuf_render = NULL; - FREE(cvbr); -} - - -/** - * Initialize the post-transform vertex buffer information for the given - * context. - */ -void -cell_init_vbuf(struct cell_context *cell) -{ - assert(cell->draw); - - cell->vbuf_render = CALLOC_STRUCT(cell_vbuf_render); - - /* The max number of indexes is what can fix into a batch buffer, - * minus the render and release-verts commands. - */ - cell->vbuf_render->base.max_indices - = (CELL_BUFFER_SIZE - - sizeof(struct cell_command_render) - - sizeof(struct cell_command_release_verts)) - / sizeof(ushort); - cell->vbuf_render->base.max_vertex_buffer_bytes = CELL_BUFFER_SIZE; - - cell->vbuf_render->base.get_vertex_info = cell_vbuf_get_vertex_info; - cell->vbuf_render->base.allocate_vertices = cell_vbuf_allocate_vertices; - cell->vbuf_render->base.map_vertices = cell_vbuf_map_vertices; - cell->vbuf_render->base.unmap_vertices = cell_vbuf_unmap_vertices; - cell->vbuf_render->base.set_primitive = cell_vbuf_set_primitive; - cell->vbuf_render->base.draw_elements = cell_vbuf_draw_elements; - cell->vbuf_render->base.release_vertices = cell_vbuf_release_vertices; - cell->vbuf_render->base.destroy = cell_vbuf_destroy; - - cell->vbuf_render->cell = cell; -#if 1 - cell->vbuf_render->vertex_buf = ~0; -#endif - - cell->vbuf = draw_vbuf_stage(cell->draw, &cell->vbuf_render->base); -} diff --git a/src/gallium/drivers/cell/ppu/cell_vbuf.h b/src/gallium/drivers/cell/ppu/cell_vbuf.h deleted file mode 100644 index d265cbf770..0000000000 --- a/src/gallium/drivers/cell/ppu/cell_vbuf.h +++ /dev/null @@ -1,38 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef CELL_VBUF_H -#define CELL_VBUF_H - - -struct cell_context; - -extern void -cell_init_vbuf(struct cell_context *cell); - - -#endif /* CELL_VBUF_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c deleted file mode 100644 index 9cba537d9e..0000000000 --- a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c +++ /dev/null @@ -1,346 +0,0 @@ -/* - * (C) Copyright IBM Corporation 2008 - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include <inttypes.h> -#include "pipe/p_defines.h" -#include "pipe/p_context.h" -#include "pipe/p_format.h" - -#include "../auxiliary/draw/draw_context.h" -#include "../auxiliary/draw/draw_private.h" - -#include "cell_context.h" -#include "rtasm/rtasm_ppc_spe.h" - - -/** - * Emit a 4x4 matrix transpose operation - * - * \param p Function that the transpose operation is to be appended to - * \param row0 Register containing row 0 of the source matrix - * \param row1 Register containing row 1 of the source matrix - * \param row2 Register containing row 2 of the source matrix - * \param row3 Register containing row 3 of the source matrix - * \param dest_ptr Register containing the address of the destination matrix - * \param shuf_ptr Register containing the address of the shuffled data - * \param count Number of colums to actually be written to the destination - * - * \note - * This function assumes that the registers named by \c row0, \c row1, - * \c row2, and \c row3 are scratch and can be modified by the generated code. - * Furthermore, these registers will be released, via calls to - * \c release_register, by this function. - * - * \note - * This function requires that four temporary are available on entry. - */ -static void -emit_matrix_transpose(struct spe_function *p, - unsigned row0, unsigned row1, unsigned row2, - unsigned row3, unsigned dest_ptr, - unsigned shuf_ptr, unsigned count) -{ - int shuf_hi = spe_allocate_available_register(p); - int shuf_lo = spe_allocate_available_register(p); - int t1 = spe_allocate_available_register(p); - int t2 = spe_allocate_available_register(p); - int t3; - int t4; - int col0; - int col1; - int col2; - int col3; - - - spe_lqd(p, shuf_hi, shuf_ptr, 3*16); - spe_lqd(p, shuf_lo, shuf_ptr, 4*16); - spe_shufb(p, t1, row0, row2, shuf_hi); - spe_shufb(p, t2, row0, row2, shuf_lo); - - - /* row0 and row2 are now no longer needed. Re-use those registers as - * temporaries. - */ - t3 = row0; - t4 = row2; - - spe_shufb(p, t3, row1, row3, shuf_hi); - spe_shufb(p, t4, row1, row3, shuf_lo); - - - /* row1 and row3 are now no longer needed. Re-use those registers as - * temporaries. - */ - col0 = row1; - col1 = row3; - - spe_shufb(p, col0, t1, t3, shuf_hi); - if (count > 1) { - spe_shufb(p, col1, t1, t3, shuf_lo); - } - - /* t1 and t3 are now no longer needed. Re-use those registers as - * temporaries. - */ - col2 = t1; - col3 = t3; - - if (count > 2) { - spe_shufb(p, col2, t2, t4, shuf_hi); - } - - if (count > 3) { - spe_shufb(p, col3, t2, t4, shuf_lo); - } - - - /* Store the results. Remember that the stqd instruction is encoded using - * the qword offset (stand-alone assemblers to the byte-offset to - * qword-offset conversion for you), so the byte-offset needs be divided by - * 16. - */ - switch (count) { - case 4: - spe_stqd(p, col3, dest_ptr, 3 * 16); - case 3: - spe_stqd(p, col2, dest_ptr, 2 * 16); - case 2: - spe_stqd(p, col1, dest_ptr, 1 * 16); - case 1: - spe_stqd(p, col0, dest_ptr, 0 * 16); - } - - - /* Release all of the temporary registers used. - */ - spe_release_register(p, col0); - spe_release_register(p, col1); - spe_release_register(p, col2); - spe_release_register(p, col3); - spe_release_register(p, shuf_hi); - spe_release_register(p, shuf_lo); - spe_release_register(p, t2); - spe_release_register(p, t4); -} - - -#if 0 -/* This appears to not be used currently */ -static void -emit_fetch(struct spe_function *p, - unsigned in_ptr, unsigned *offset, - unsigned out_ptr, unsigned shuf_ptr, - enum pipe_format format) -{ - const unsigned count = (pf_size_x(format) != 0) + (pf_size_y(format) != 0) - + (pf_size_z(format) != 0) + (pf_size_w(format) != 0); - const unsigned type = pf_type(format); - const unsigned bytes = pf_size_x(format); - - int v0 = spe_allocate_available_register(p); - int v1 = spe_allocate_available_register(p); - int v2 = spe_allocate_available_register(p); - int v3 = spe_allocate_available_register(p); - int tmp = spe_allocate_available_register(p); - int float_zero = -1; - int float_one = -1; - float scale_signed = 0.0; - float scale_unsigned = 0.0; - - spe_lqd(p, v0, in_ptr, (0 + offset[0]) * 16); - spe_lqd(p, v1, in_ptr, (1 + offset[0]) * 16); - spe_lqd(p, v2, in_ptr, (2 + offset[0]) * 16); - spe_lqd(p, v3, in_ptr, (3 + offset[0]) * 16); - offset[0] += 4; - - switch (bytes) { - case 1: - scale_signed = 1.0f / 127.0f; - scale_unsigned = 1.0f / 255.0f; - spe_lqd(p, tmp, shuf_ptr, 1 * 16); - spe_shufb(p, v0, v0, v0, tmp); - spe_shufb(p, v1, v1, v1, tmp); - spe_shufb(p, v2, v2, v2, tmp); - spe_shufb(p, v3, v3, v3, tmp); - break; - case 2: - scale_signed = 1.0f / 32767.0f; - scale_unsigned = 1.0f / 65535.0f; - spe_lqd(p, tmp, shuf_ptr, 2 * 16); - spe_shufb(p, v0, v0, v0, tmp); - spe_shufb(p, v1, v1, v1, tmp); - spe_shufb(p, v2, v2, v2, tmp); - spe_shufb(p, v3, v3, v3, tmp); - break; - case 4: - scale_signed = 1.0f / 2147483647.0f; - scale_unsigned = 1.0f / 4294967295.0f; - break; - default: - assert(0); - break; - } - - switch (type) { - case PIPE_FORMAT_TYPE_FLOAT: - break; - case PIPE_FORMAT_TYPE_UNORM: - spe_ilhu(p, tmp, ((unsigned) scale_unsigned) >> 16); - spe_iohl(p, tmp, ((unsigned) scale_unsigned) & 0x0ffff); - spe_cuflt(p, v0, v0, 0); - spe_fm(p, v0, v0, tmp); - break; - case PIPE_FORMAT_TYPE_SNORM: - spe_ilhu(p, tmp, ((unsigned) scale_signed) >> 16); - spe_iohl(p, tmp, ((unsigned) scale_signed) & 0x0ffff); - spe_csflt(p, v0, v0, 0); - spe_fm(p, v0, v0, tmp); - break; - case PIPE_FORMAT_TYPE_USCALED: - spe_cuflt(p, v0, v0, 0); - break; - case PIPE_FORMAT_TYPE_SSCALED: - spe_csflt(p, v0, v0, 0); - break; - } - - - if (count < 4) { - float_one = spe_allocate_available_register(p); - spe_il(p, float_one, 1); - spe_cuflt(p, float_one, float_one, 0); - - if (count < 3) { - float_zero = spe_allocate_available_register(p); - spe_il(p, float_zero, 0); - } - } - - spe_release_register(p, tmp); - - emit_matrix_transpose(p, v0, v1, v2, v3, out_ptr, shuf_ptr, count); - - switch (count) { - case 1: - spe_stqd(p, float_zero, out_ptr, 1 * 16); - case 2: - spe_stqd(p, float_zero, out_ptr, 2 * 16); - case 3: - spe_stqd(p, float_one, out_ptr, 3 * 16); - } - - if (float_zero != -1) { - spe_release_register(p, float_zero); - } - - if (float_one != -1) { - spe_release_register(p, float_one); - } -} -#endif - - -void cell_update_vertex_fetch(struct draw_context *draw) -{ -#if 0 - struct cell_context *const cell = - (struct cell_context *) draw->driver_private; - struct spe_function *p = &cell->attrib_fetch; - unsigned function_index[PIPE_MAX_ATTRIBS]; - unsigned unique_attr_formats; - int out_ptr; - int in_ptr; - int shuf_ptr; - unsigned i; - unsigned j; - - - /* Determine how many unique input attribute formats there are. At the - * same time, store the index of the lowest numbered attribute that has - * the same format as any non-unique format. - */ - unique_attr_formats = 1; - function_index[0] = 0; - for (i = 1; i < draw->vertex_fetch.nr_attrs; i++) { - const enum pipe_format curr_fmt = draw->vertex_element[i].src_format; - - for (j = 0; j < i; j++) { - if (curr_fmt == draw->vertex_element[j].src_format) { - break; - } - } - - if (j == i) { - unique_attr_formats++; - } - - function_index[i] = j; - } - - - /* Each fetch function can be a maximum of 34 instructions (note: this is - * actually a slight over-estimate). - */ - spe_init_func(p, 34 * SPE_INST_SIZE * unique_attr_formats); - - - /* Allocate registers for the function's input parameters. - */ - out_ptr = spe_allocate_register(p, 3); - in_ptr = spe_allocate_register(p, 4); - shuf_ptr = spe_allocate_register(p, 5); - - - /* Generate code for the individual attribute fetch functions. - */ - for (i = 0; i < draw->vertex_fetch.nr_attrs; i++) { - unsigned offset; - - if (function_index[i] == i) { - cell->attrib_fetch_offsets[i] = (unsigned) ((void *) p->csr - - (void *) p->store); - - offset = 0; - emit_fetch(p, in_ptr, &offset, out_ptr, shuf_ptr, - draw->vertex_element[i].src_format); - spe_bi(p, 0, 0, 0); - - /* Round up to the next 16-byte boundary. - */ - if ((((unsigned) p->store) & 0x0f) != 0) { - const unsigned align = ((unsigned) p->store) & 0x0f; - p->store = (uint32_t *) (((void *) p->store) + align); - } - } else { - /* Use the same function entry-point as a previously seen attribute - * with the same format. - */ - cell->attrib_fetch_offsets[i] = - cell->attrib_fetch_offsets[function_index[i]]; - } - } -#else - assert(0); -#endif -} diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c deleted file mode 100644 index 3d389d6ea3..0000000000 --- a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c +++ /dev/null @@ -1,145 +0,0 @@ -/* - * (C) Copyright IBM Corporation 2008 - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * \file cell_vertex_shader.c - * Vertex shader interface routines for Cell. - * - * \author Ian Romanick <idr@us.ibm.com> - */ - -#include "pipe/p_defines.h" -#include "pipe/p_context.h" -#include "util/u_math.h" - -#include "cell_context.h" -#include "cell_draw_arrays.h" -#include "cell_flush.h" -#include "cell_spu.h" -#include "cell_batch.h" - -#include "cell/common.h" -#include "draw/draw_context.h" -#include "draw/draw_private.h" - -/** - * Run the vertex shader on all vertices in the vertex queue. - * Called by the draw module when the vertx cache needs to be flushed. - */ -void -cell_vertex_shader_queue_flush(struct draw_context *draw) -{ -#if 0 - struct cell_context *const cell = - (struct cell_context *) draw->driver_private; - struct cell_command_vs *const vs = &cell_global.command[0].vs; - uint64_t *batch; - struct cell_array_info *array_info; - unsigned i, j; - struct cell_attribute_fetch_code *cf; - - assert(draw->vs.queue_nr != 0); - - /* XXX: do this on statechange: - */ - draw_update_vertex_fetch(draw); - cell_update_vertex_fetch(draw); - - - batch = cell_batch_alloc(cell, sizeof(batch[0]) + sizeof(*cf)); - batch[0] = CELL_CMD_STATE_ATTRIB_FETCH; - cf = (struct cell_attribute_fetch_code *) (&batch[1]); - cf->base = (uint64_t) cell->attrib_fetch.store; - cf->size = ROUNDUP16((unsigned)((void *) cell->attrib_fetch.csr - - (void *) cell->attrib_fetch.store)); - - - for (i = 0; i < draw->vertex_fetch.nr_attrs; i++) { - const enum pipe_format format = draw->vertex_element[i].src_format; - const unsigned count = ((pf_size_x(format) != 0) - + (pf_size_y(format) != 0) - + (pf_size_z(format) != 0) - + (pf_size_w(format) != 0)); - const unsigned size = pf_size_x(format) * count; - - batch = cell_batch_alloc(cell, sizeof(batch[0]) + sizeof(*array_info)); - - batch[0] = CELL_CMD_STATE_VS_ARRAY_INFO; - - array_info = (struct cell_array_info *) &batch[1]; - assert(draw->vertex_fetch.src_ptr[i] != NULL); - array_info->base = (uintptr_t) draw->vertex_fetch.src_ptr[i]; - array_info->attr = i; - array_info->pitch = draw->vertex_fetch.pitch[i]; - array_info->size = size; - array_info->function_offset = cell->attrib_fetch_offsets[i]; - } - - batch = cell_batch_alloc(cell, sizeof(batch[0]) - + sizeof(struct pipe_viewport_state)); - batch[0] = CELL_CMD_STATE_VIEWPORT; - (void) memcpy(&batch[1], &draw->viewport, - sizeof(struct pipe_viewport_state)); - - { - uint64_t uniforms = (uintptr_t) draw->user.constants; - - batch = cell_batch_alloc(cell, 2 *sizeof(batch[0])); - batch[0] = CELL_CMD_STATE_UNIFORMS; - batch[1] = uniforms; - } - - cell_batch_flush(cell); - - vs->opcode = CELL_CMD_VS_EXECUTE; - vs->nr_attrs = draw->vertex_fetch.nr_attrs; - - (void) memcpy(vs->plane, draw->plane, sizeof(draw->plane)); - vs->nr_planes = draw->nr_planes; - - for (i = 0; i < draw->vs.queue_nr; i += SPU_VERTS_PER_BATCH) { - const unsigned n = MIN2(SPU_VERTS_PER_BATCH, draw->vs.queue_nr - i); - - for (j = 0; j < n; j++) { - vs->elts[j] = draw->vs.queue[i + j].elt; - vs->vOut[j] = (uintptr_t) draw->vs.queue[i + j].vertex; - } - - for (/* empty */; j < SPU_VERTS_PER_BATCH; j++) { - vs->elts[j] = vs->elts[0]; - vs->vOut[j] = (uintptr_t) draw->vs.queue[i + j].vertex; - } - - vs->num_elts = n; - send_mbox_message(cell_global.spe_contexts[0], CELL_CMD_VS_EXECUTE); - - cell_flush_int(cell, CELL_FLUSH_WAIT); - } - - draw->vs.post_nr = draw->vs.queue_nr; - draw->vs.queue_nr = 0; -#else - assert(0); -#endif -} diff --git a/src/gallium/drivers/cell/spu/.gitignore b/src/gallium/drivers/cell/spu/.gitignore deleted file mode 100644 index 2be9a2d324..0000000000 --- a/src/gallium/drivers/cell/spu/.gitignore +++ /dev/null @@ -1 +0,0 @@ -g3d_spu diff --git a/src/gallium/drivers/cell/spu/Makefile b/src/gallium/drivers/cell/spu/Makefile deleted file mode 100644 index 3cc52301da..0000000000 --- a/src/gallium/drivers/cell/spu/Makefile +++ /dev/null @@ -1,83 +0,0 @@ -# Gallium3D Cell driver: SPU code - -# This makefile builds the g3d_spu.a file that's linked into the -# PPU code/library. - - -TOP = ../../../../.. -include $(TOP)/configs/current - - -PROG = g3d - -PROG_SPU = $(PROG)_spu -PROG_SPU_A = $(PROG)_spu.a -PROG_SPU_EMBED_O = $(PROG)_spu-embed.o - - -SOURCES = \ - spu_command.c \ - spu_dcache.c \ - spu_funcs.c \ - spu_main.c \ - spu_per_fragment_op.c \ - spu_render.c \ - spu_texture.c \ - spu_tile.c \ - spu_tri.c - -OLD_SOURCES = \ - spu_exec.c \ - spu_util.c \ - spu_vertex_fetch.c \ - spu_vertex_shader.c - - -SPU_OBJECTS = $(SOURCES:.c=.o) - -SPU_ASM_OUT = $(SOURCES:.c=.s) - - -INCLUDE_DIRS = \ - -I$(TOP)/src/mesa \ - -I$(TOP)/src/gallium/include \ - -I$(TOP)/src/gallium/auxiliary \ - -I$(TOP)/src/gallium/drivers - - -.c.o: - $(SPU_CC) $(SPU_CFLAGS) -c $< - -.c.s: - $(SPU_CC) $(SPU_CFLAGS) -O3 -S $< - - -# The .a file will be linked into the main/PPU executable -default: $(PROG_SPU_A) - -$(PROG_SPU_A): $(PROG_SPU_EMBED_O) - $(SPU_AR) $(SPU_AR_FLAGS) $(PROG_SPU_A) $(PROG_SPU_EMBED_O) - -$(PROG_SPU_EMBED_O): $(PROG_SPU) - $(SPU_EMBED) $(SPU_EMBED_FLAGS) $(PROG_SPU) $(PROG_SPU) $(PROG_SPU_EMBED_O) - -$(PROG_SPU): $(SPU_OBJECTS) - $(SPU_CC) -o $(PROG_SPU) $(SPU_OBJECTS) $(SPU_LFLAGS) - - - -asmfiles: $(SPU_ASM_OUT) - - -clean: - rm -f *~ *.o *.a *.d *.s $(PROG_SPU) - - - -depend: $(SOURCES) - rm -f depend - touch depend - $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDE_DIRS) $(SOURCES) 2> /dev/null - -include depend - diff --git a/src/gallium/drivers/cell/spu/spu_colorpack.h b/src/gallium/drivers/cell/spu/spu_colorpack.h deleted file mode 100644 index d7ce005524..0000000000 --- a/src/gallium/drivers/cell/spu/spu_colorpack.h +++ /dev/null @@ -1,145 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - - -#ifndef SPU_COLORPACK_H -#define SPU_COLORPACK_H - - -#include <transpose_matrix4x4.h> -#include <spu_intrinsics.h> - - -static INLINE unsigned int -spu_pack_R8G8B8A8(vector float rgba) -{ - vector unsigned int out = spu_convtu(rgba, 32); - - out = spu_shuffle(out, out, ((vector unsigned char) { - 0, 4, 8, 12, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0 }) ); - - return spu_extract(out, 0); -} - - -static INLINE unsigned int -spu_pack_A8R8G8B8(vector float rgba) -{ - vector unsigned int out = spu_convtu(rgba, 32); - out = spu_shuffle(out, out, ((vector unsigned char) { - 12, 0, 4, 8, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0}) ); - return spu_extract(out, 0); -} - - -static INLINE unsigned int -spu_pack_B8G8R8A8(vector float rgba) -{ - vector unsigned int out = spu_convtu(rgba, 32); - out = spu_shuffle(out, out, ((vector unsigned char) { - 8, 4, 0, 12, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0}) ); - return spu_extract(out, 0); -} - - -static INLINE unsigned int -spu_pack_color_shuffle(vector float rgba, vector unsigned char shuffle) -{ - vector unsigned int out = spu_convtu(rgba, 32); - out = spu_shuffle(out, out, shuffle); - return spu_extract(out, 0); -} - - -static INLINE vector float -spu_unpack_B8G8R8A8(uint color) -{ - vector unsigned int color_u4 = spu_splats(color); - color_u4 = spu_shuffle(color_u4, color_u4, - ((vector unsigned char) { - 2, 2, 2, 2, - 1, 1, 1, 1, - 0, 0, 0, 0, - 3, 3, 3, 3}) ); - return spu_convtf(color_u4, 32); -} - - -static INLINE vector float -spu_unpack_A8R8G8B8(uint color) -{ - vector unsigned int color_u4 = spu_splats(color); - color_u4 = spu_shuffle(color_u4, color_u4, - ((vector unsigned char) { - 1, 1, 1, 1, - 2, 2, 2, 2, - 3, 3, 3, 3, - 0, 0, 0, 0}) ); - return spu_convtf(color_u4, 32); -} - - -/** - * \param color_in - array of 32-bit packed ARGB colors - * \param color_out - returns float colors in RRRR, GGGG, BBBB, AAAA order - */ -static INLINE void -spu_unpack_A8R8G8B8_transpose4(const vector unsigned int color_in[4], - vector float color_out[4]) -{ - vector unsigned int c0; - - c0 = spu_shuffle(color_in[0], color_in[0], - ((vector unsigned char) { - 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0, 0}) ); - color_out[0] = spu_convtf(c0, 32); - - c0 = spu_shuffle(color_in[1], color_in[1], - ((vector unsigned char) { - 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0, 0}) ); - color_out[1] = spu_convtf(c0, 32); - - c0 = spu_shuffle(color_in[2], color_in[2], - ((vector unsigned char) { - 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0, 0}) ); - color_out[2] = spu_convtf(c0, 32); - - c0 = spu_shuffle(color_in[3], color_in[3], - ((vector unsigned char) { - 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0, 0}) ); - color_out[3] = spu_convtf(c0, 32); - - _transpose_matrix4x4(color_out, color_out); -} - - - -#endif /* SPU_COLORPACK_H */ diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c deleted file mode 100644 index 6f8ba9562d..0000000000 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ /dev/null @@ -1,810 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -/** - * SPU command processing code - */ - - -#include <stdio.h> -#include <libmisc.h> - -#include "pipe/p_defines.h" - -#include "spu_command.h" -#include "spu_main.h" -#include "spu_render.h" -#include "spu_per_fragment_op.h" -#include "spu_texture.h" -#include "spu_tile.h" -#include "spu_vertex_shader.h" -#include "spu_dcache.h" -#include "cell/common.h" - - -struct spu_vs_context draw; - - -/** - * Buffers containing dynamically generated SPU code: - */ -PIPE_ALIGN_VAR(16) static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS]; - - - -static INLINE int -align(int value, int alignment) -{ - return (value + alignment - 1) & ~(alignment - 1); -} - - - -/** - * Tell the PPU that this SPU has finished copying a buffer to - * local store and that it may be reused by the PPU. - * This is done by writting a 16-byte batch-buffer-status block back into - * main memory (in cell_context->buffer_status[]). - */ -static void -release_buffer(uint buffer) -{ - /* Evidently, using less than a 16-byte status doesn't work reliably */ - static const vector unsigned int status = {CELL_BUFFER_STATUS_FREE, - CELL_BUFFER_STATUS_FREE, - CELL_BUFFER_STATUS_FREE, - CELL_BUFFER_STATUS_FREE}; - const uint index = 4 * (spu.init.id * CELL_NUM_BUFFERS + buffer); - uint *dst = spu.init.buffer_status + index; - - ASSERT(buffer < CELL_NUM_BUFFERS); - - mfc_put((void *) &status, /* src in local memory */ - (unsigned int) dst, /* dst in main memory */ - sizeof(status), /* size */ - TAG_MISC, /* tag is unimportant */ - 0, /* tid */ - 0 /* rid */); -} - - -/** - * Write CELL_FENCE_SIGNALLED back to the fence status qword in main memory. - * There's a qword of status per SPU. - */ -static void -cmd_fence(struct cell_command_fence *fence_cmd) -{ - static const vector unsigned int status = {CELL_FENCE_SIGNALLED, - CELL_FENCE_SIGNALLED, - CELL_FENCE_SIGNALLED, - CELL_FENCE_SIGNALLED}; - uint *dst = (uint *) fence_cmd->fence; - dst += 4 * spu.init.id; /* main store/memory address, not local store */ - ASSERT_ALIGN16(dst); - mfc_put((void *) &status, /* src in local memory */ - (unsigned int) dst, /* dst in main memory */ - sizeof(status), /* size */ - TAG_FENCE, /* tag */ - 0, /* tid */ - 0 /* rid */); -} - - -static void -cmd_clear_surface(const struct cell_command_clear_surface *clear) -{ - D_PRINTF(CELL_DEBUG_CMD, "CLEAR SURF %u to 0x%08x\n", clear->surface, clear->value); - - if (clear->surface == 0) { - spu.fb.color_clear_value = clear->value; - if (spu.init.debug_flags & CELL_DEBUG_CHECKER) { - uint x = (spu.init.id << 4) | (spu.init.id << 12) | - (spu.init.id << 20) | (spu.init.id << 28); - spu.fb.color_clear_value ^= x; - } - } - else { - spu.fb.depth_clear_value = clear->value; - } - -#define CLEAR_OPT 1 -#if CLEAR_OPT - - /* Simply set all tiles' status to CLEAR. - * When we actually begin rendering into a tile, we'll initialize it to - * the clear value. If any tiles go untouched during the frame, - * really_clear_tiles() will set them to the clear value. - */ - if (clear->surface == 0) { - memset(spu.ctile_status, TILE_STATUS_CLEAR, sizeof(spu.ctile_status)); - } - else { - memset(spu.ztile_status, TILE_STATUS_CLEAR, sizeof(spu.ztile_status)); - } - -#else - - /* - * This path clears the whole framebuffer to the clear color right now. - */ - - /* - printf("SPU: %s num=%d w=%d h=%d\n", - __FUNCTION__, num_tiles, spu.fb.width_tiles, spu.fb.height_tiles); - */ - - /* init a single tile to the clear value */ - if (clear->surface == 0) { - clear_c_tile(&spu.ctile); - } - else { - clear_z_tile(&spu.ztile); - } - - /* walk over my tiles, writing the 'clear' tile's data */ - { - const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles; - uint i; - for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) { - uint tx = i % spu.fb.width_tiles; - uint ty = i / spu.fb.width_tiles; - if (clear->surface == 0) - put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0); - else - put_tile(tx, ty, &spu.ztile, TAG_SURFACE_CLEAR, 1); - } - } - - if (spu.init.debug_flags & CELL_DEBUG_SYNC) { - wait_on_mask(1 << TAG_SURFACE_CLEAR); - } - -#endif /* CLEAR_OPT */ - - D_PRINTF(CELL_DEBUG_CMD, "CLEAR SURF done\n"); -} - - -static void -cmd_release_verts(const struct cell_command_release_verts *release) -{ - D_PRINTF(CELL_DEBUG_CMD, "RELEASE VERTS %u\n", release->vertex_buf); - ASSERT(release->vertex_buf != ~0U); - release_buffer(release->vertex_buf); -} - - -/** - * Process a CELL_CMD_STATE_FRAGMENT_OPS command. - * This involves installing new fragment ops SPU code. - * If this function is never called, we'll use a regular C fallback function - * for fragment processing. - */ -static void -cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops) -{ - D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FRAGMENT_OPS\n"); - - /* Copy state info (for fallback case only - this will eventually - * go away when the fallback case goes away) - */ - memcpy(&spu.depth_stencil_alpha, &fops->dsa, sizeof(fops->dsa)); - memcpy(&spu.blend, &fops->blend, sizeof(fops->blend)); - memcpy(&spu.blend_color, &fops->blend_color, sizeof(fops->blend_color)); - - /* Make sure the SPU knows which buffers it's expected to read when - * it's told to pull tiles. - */ - spu.read_depth_stencil = (spu.depth_stencil_alpha.depth.enabled || spu.depth_stencil_alpha.stencil[0].enabled); - - /* If we're forcing the fallback code to be used (for debug purposes), - * install that. Otherwise install the incoming SPU code. - */ - if ((spu.init.debug_flags & CELL_DEBUG_FRAGMENT_OP_FALLBACK) != 0) { - static unsigned int warned = 0; - if (!warned) { - fprintf(stderr, "Cell Warning: using fallback per-fragment code\n"); - warned = 1; - } - /* The following two lines aren't really necessary if you - * know the debug flags won't change during a run, and if you - * know that the function pointers are initialized correctly. - * We set them here to allow a person to change the debug - * flags during a run (from inside a debugger). - */ - spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops; - spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops; - return; - } - - /* Make sure the SPU code buffer is large enough to hold the incoming code. - * Note that we *don't* use align_malloc() and align_free(), because - * those utility functions are *not* available in SPU code. - * */ - if (spu.fragment_ops_code_size < fops->total_code_size) { - if (spu.fragment_ops_code != NULL) { - free(spu.fragment_ops_code); - } - spu.fragment_ops_code_size = fops->total_code_size; - spu.fragment_ops_code = malloc(fops->total_code_size); - if (spu.fragment_ops_code == NULL) { - /* Whoops. */ - fprintf(stderr, "CELL Warning: failed to allocate fragment ops code (%d bytes) - using fallback\n", fops->total_code_size); - spu.fragment_ops_code = NULL; - spu.fragment_ops_code_size = 0; - spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops; - spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops; - return; - } - } - - /* Copy the SPU code from the command buffer to the spu buffer */ - memcpy(spu.fragment_ops_code, fops->code, fops->total_code_size); - - /* Set the pointers for the front-facing and back-facing fragments - * to the specified offsets within the code. Note that if the - * front-facing and back-facing code are the same, they'll have - * the same offset. - */ - spu.fragment_ops[CELL_FACING_FRONT] = (spu_fragment_ops_func) &spu.fragment_ops_code[fops->front_code_index]; - spu.fragment_ops[CELL_FACING_BACK] = (spu_fragment_ops_func) &spu.fragment_ops_code[fops->back_code_index]; -} - -static void -cmd_state_fragment_program(const struct cell_command_fragment_program *fp) -{ - D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FRAGMENT_PROGRAM\n"); - /* Copy SPU code from batch buffer to spu buffer */ - memcpy(spu.fragment_program_code, fp->code, - SPU_MAX_FRAGMENT_PROGRAM_INSTS * 4); -#if 01 - /* Point function pointer at new code */ - spu.fragment_program = (spu_fragment_program_func)spu.fragment_program_code; -#endif -} - - -static uint -cmd_state_fs_constants(const qword *buffer, uint pos) -{ - const uint num_const = spu_extract((vector unsigned int)buffer[pos+1], 0); - const float *constants = (const float *) &buffer[pos+2]; - uint i; - - D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FS_CONSTANTS (%u)\n", num_const); - - /* Expand each float to float[4] for SOA execution */ - for (i = 0; i < num_const; i++) { - D_PRINTF(CELL_DEBUG_CMD, " const[%u] = %f\n", i, constants[i]); - spu.constants[i] = spu_splats(constants[i]); - } - - /* return new buffer pos (in 16-byte words) */ - return pos + 2 + (ROUNDUP16(num_const * sizeof(float)) / 16); -} - - -static void -cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) -{ - D_PRINTF(CELL_DEBUG_CMD, "FRAMEBUFFER: %d x %d at %p, cformat 0x%x zformat 0x%x\n", - cmd->width, - cmd->height, - cmd->color_start, - cmd->color_format, - cmd->depth_format); - - ASSERT_ALIGN16(cmd->color_start); - ASSERT_ALIGN16(cmd->depth_start); - - spu.fb.color_start = cmd->color_start; - spu.fb.depth_start = cmd->depth_start; - spu.fb.color_format = cmd->color_format; - spu.fb.depth_format = cmd->depth_format; - spu.fb.width = cmd->width; - spu.fb.height = cmd->height; - spu.fb.width_tiles = (spu.fb.width + TILE_SIZE - 1) / TILE_SIZE; - spu.fb.height_tiles = (spu.fb.height + TILE_SIZE - 1) / TILE_SIZE; - - switch (spu.fb.depth_format) { - case PIPE_FORMAT_Z32_UNORM: - spu.fb.zsize = 4; - spu.fb.zscale = (float) 0xffffffffu; - break; - case PIPE_FORMAT_S8_UINT_Z24_UNORM: - case PIPE_FORMAT_Z24_UNORM_S8_UINT: - case PIPE_FORMAT_X8Z24_UNORM: - case PIPE_FORMAT_Z24X8_UNORM: - spu.fb.zsize = 4; - spu.fb.zscale = (float) 0x00ffffffu; - break; - case PIPE_FORMAT_Z16_UNORM: - spu.fb.zsize = 2; - spu.fb.zscale = (float) 0xffffu; - break; - default: - spu.fb.zsize = 0; - break; - } -} - - -/** - * Tex texture mask_s/t and scale_s/t fields depend on the texture size and - * sampler wrap modes. - */ -static void -update_tex_masks(struct spu_texture *texture, - const struct pipe_sampler_state *sampler) -{ - uint i; - - for (i = 0; i < CELL_MAX_TEXTURE_LEVELS; i++) { - int width = texture->level[i].width; - int height = texture->level[i].height; - - if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT) - texture->level[i].mask_s = spu_splats(width - 1); - else - texture->level[i].mask_s = spu_splats(~0); - - if (sampler->wrap_t == PIPE_TEX_WRAP_REPEAT) - texture->level[i].mask_t = spu_splats(height - 1); - else - texture->level[i].mask_t = spu_splats(~0); - - if (sampler->normalized_coords) { - texture->level[i].scale_s = spu_splats((float) width); - texture->level[i].scale_t = spu_splats((float) height); - } - else { - texture->level[i].scale_s = spu_splats(1.0f); - texture->level[i].scale_t = spu_splats(1.0f); - } - } -} - - -static void -cmd_state_sampler(const struct cell_command_sampler *sampler) -{ - uint unit = sampler->unit; - - D_PRINTF(CELL_DEBUG_CMD, "SAMPLER [%u]\n", unit); - - spu.sampler[unit] = sampler->state; - - switch (spu.sampler[unit].min_img_filter) { - case PIPE_TEX_FILTER_LINEAR: - spu.min_sample_texture_2d[unit] = sample_texture_2d_bilinear; - break; - case PIPE_TEX_FILTER_NEAREST: - spu.min_sample_texture_2d[unit] = sample_texture_2d_nearest; - break; - default: - ASSERT(0); - } - - switch (spu.sampler[sampler->unit].mag_img_filter) { - case PIPE_TEX_FILTER_LINEAR: - spu.mag_sample_texture_2d[unit] = sample_texture_2d_bilinear; - break; - case PIPE_TEX_FILTER_NEAREST: - spu.mag_sample_texture_2d[unit] = sample_texture_2d_nearest; - break; - default: - ASSERT(0); - } - - switch (spu.sampler[sampler->unit].min_mip_filter) { - case PIPE_TEX_MIPFILTER_NEAREST: - case PIPE_TEX_MIPFILTER_LINEAR: - spu.sample_texture_2d[unit] = sample_texture_2d_lod; - break; - case PIPE_TEX_MIPFILTER_NONE: - spu.sample_texture_2d[unit] = spu.mag_sample_texture_2d[unit]; - break; - default: - ASSERT(0); - } - - update_tex_masks(&spu.texture[unit], &spu.sampler[unit]); -} - - -static void -cmd_state_texture(const struct cell_command_texture *texture) -{ - const uint unit = texture->unit; - uint i; - - D_PRINTF(CELL_DEBUG_CMD, "TEXTURE [%u]\n", texture->unit); - - spu.texture[unit].max_level = 0; - spu.texture[unit].target = texture->target; - - for (i = 0; i < CELL_MAX_TEXTURE_LEVELS; i++) { - uint width = texture->width[i]; - uint height = texture->height[i]; - uint depth = texture->depth[i]; - - D_PRINTF(CELL_DEBUG_CMD, " LEVEL %u: at %p size[0] %u x %u\n", i, - texture->start[i], texture->width[i], texture->height[i]); - - spu.texture[unit].level[i].start = texture->start[i]; - spu.texture[unit].level[i].width = width; - spu.texture[unit].level[i].height = height; - spu.texture[unit].level[i].depth = depth; - - spu.texture[unit].level[i].tiles_per_row = - (width + TILE_SIZE - 1) / TILE_SIZE; - - spu.texture[unit].level[i].bytes_per_image = - 4 * align(width, TILE_SIZE) * align(height, TILE_SIZE) * depth; - - spu.texture[unit].level[i].max_s = spu_splats((int) width - 1); - spu.texture[unit].level[i].max_t = spu_splats((int) height - 1); - - if (texture->start[i]) - spu.texture[unit].max_level = i; - } - - update_tex_masks(&spu.texture[unit], &spu.sampler[unit]); -} - - -static void -cmd_state_vertex_info(const struct vertex_info *vinfo) -{ - D_PRINTF(CELL_DEBUG_CMD, "VERTEX_INFO num_attribs=%u\n", vinfo->num_attribs); - ASSERT(vinfo->num_attribs >= 1); - ASSERT(vinfo->num_attribs <= 8); - memcpy(&spu.vertex_info, vinfo, sizeof(*vinfo)); -} - - -static void -cmd_state_vs_array_info(const struct cell_array_info *vs_info) -{ - const unsigned attr = vs_info->attr; - - ASSERT(attr < PIPE_MAX_ATTRIBS); - draw.vertex_fetch.src_ptr[attr] = vs_info->base; - draw.vertex_fetch.pitch[attr] = vs_info->pitch; - draw.vertex_fetch.size[attr] = vs_info->size; - draw.vertex_fetch.code_offset[attr] = vs_info->function_offset; - draw.vertex_fetch.dirty = 1; -} - - -static void -cmd_state_attrib_fetch(const struct cell_attribute_fetch_code *code) -{ - mfc_get(attribute_fetch_code_buffer, - (unsigned int) code->base, /* src */ - code->size, - TAG_BATCH_BUFFER, - 0, /* tid */ - 0 /* rid */); - wait_on_mask(1 << TAG_BATCH_BUFFER); - - draw.vertex_fetch.code = attribute_fetch_code_buffer; -} - - -static void -cmd_finish(void) -{ - D_PRINTF(CELL_DEBUG_CMD, "FINISH\n"); - really_clear_tiles(0); - /* wait for all outstanding DMAs to finish */ - mfc_write_tag_mask(~0); - mfc_read_tag_status_all(); - /* send mbox message to PPU */ - spu_write_out_mbox(CELL_CMD_FINISH); -} - - -/** - * Execute a batch of commands which was sent to us by the PPU. - * See the cell_emit_state.c code to see where the commands come from. - * - * The opcode param encodes the location of the buffer and its size. - */ -static void -cmd_batch(uint opcode) -{ - const uint buf = (opcode >> 8) & 0xff; - uint size = (opcode >> 16); - PIPE_ALIGN_VAR(16) qword buffer[CELL_BUFFER_SIZE / 16]; - const unsigned usize = ROUNDUP16(size) / sizeof(buffer[0]); - uint pos; - - D_PRINTF(CELL_DEBUG_CMD, "BATCH buffer %u, len %u, from %p\n", - buf, size, spu.init.buffers[buf]); - - ASSERT((opcode & CELL_CMD_OPCODE_MASK) == CELL_CMD_BATCH); - - ASSERT_ALIGN16(spu.init.buffers[buf]); - - size = ROUNDUP16(size); - - ASSERT_ALIGN16(spu.init.buffers[buf]); - - mfc_get(buffer, /* dest */ - (unsigned int) spu.init.buffers[buf], /* src */ - size, - TAG_BATCH_BUFFER, - 0, /* tid */ - 0 /* rid */); - wait_on_mask(1 << TAG_BATCH_BUFFER); - - /* Tell PPU we're done copying the buffer to local store */ - D_PRINTF(CELL_DEBUG_CMD, "release batch buf %u\n", buf); - release_buffer(buf); - - /* - * Loop over commands in the batch buffer - */ - for (pos = 0; pos < usize; /* no incr */) { - switch (si_to_uint(buffer[pos])) { - /* - * rendering commands - */ - case CELL_CMD_CLEAR_SURFACE: - { - struct cell_command_clear_surface *clr - = (struct cell_command_clear_surface *) &buffer[pos]; - cmd_clear_surface(clr); - pos += sizeof(*clr) / 16; - } - break; - case CELL_CMD_RENDER: - { - struct cell_command_render *render - = (struct cell_command_render *) &buffer[pos]; - uint pos_incr; - cmd_render(render, &pos_incr); - pos += ((pos_incr+1)&~1) / 2; // should 'fix' cmd_render return - } - break; - /* - * state-update commands - */ - case CELL_CMD_STATE_FRAMEBUFFER: - { - struct cell_command_framebuffer *fb - = (struct cell_command_framebuffer *) &buffer[pos]; - cmd_state_framebuffer(fb); - pos += sizeof(*fb) / 16; - } - break; - case CELL_CMD_STATE_FRAGMENT_OPS: - { - struct cell_command_fragment_ops *fops - = (struct cell_command_fragment_ops *) &buffer[pos]; - cmd_state_fragment_ops(fops); - /* This is a variant-sized command */ - pos += ROUNDUP16(sizeof(*fops) + fops->total_code_size) / 16; - } - break; - case CELL_CMD_STATE_FRAGMENT_PROGRAM: - { - struct cell_command_fragment_program *fp - = (struct cell_command_fragment_program *) &buffer[pos]; - cmd_state_fragment_program(fp); - pos += sizeof(*fp) / 16; - } - break; - case CELL_CMD_STATE_FS_CONSTANTS: - pos = cmd_state_fs_constants(buffer, pos); - break; - case CELL_CMD_STATE_RASTERIZER: - { - struct cell_command_rasterizer *rast = - (struct cell_command_rasterizer *) &buffer[pos]; - spu.rasterizer = rast->rasterizer; - pos += sizeof(*rast) / 16; - } - break; - case CELL_CMD_STATE_SAMPLER: - { - struct cell_command_sampler *sampler - = (struct cell_command_sampler *) &buffer[pos]; - cmd_state_sampler(sampler); - pos += sizeof(*sampler) / 16; - } - break; - case CELL_CMD_STATE_TEXTURE: - { - struct cell_command_texture *texture - = (struct cell_command_texture *) &buffer[pos]; - cmd_state_texture(texture); - pos += sizeof(*texture) / 16; - } - break; - case CELL_CMD_STATE_VERTEX_INFO: - cmd_state_vertex_info((struct vertex_info *) &buffer[pos+1]); - pos += 1 + ROUNDUP16(sizeof(struct vertex_info)) / 16; - break; - case CELL_CMD_STATE_VIEWPORT: - (void) memcpy(& draw.viewport, &buffer[pos+1], - sizeof(struct pipe_viewport_state)); - pos += 1 + ROUNDUP16(sizeof(struct pipe_viewport_state)) / 16; - break; - case CELL_CMD_STATE_UNIFORMS: - draw.constants = (const float (*)[4]) (uintptr_t)spu_extract((vector unsigned int)buffer[pos+1],0); - pos += 2; - break; - case CELL_CMD_STATE_VS_ARRAY_INFO: - cmd_state_vs_array_info((struct cell_array_info *) &buffer[pos+1]); - pos += 1 + ROUNDUP16(sizeof(struct cell_array_info)) / 16; - break; - case CELL_CMD_STATE_BIND_VS: -#if 0 - spu_bind_vertex_shader(&draw, - (struct cell_shader_info *) &buffer[pos+1]); -#endif - pos += 1 + ROUNDUP16(sizeof(struct cell_shader_info)) / 16; - break; - case CELL_CMD_STATE_ATTRIB_FETCH: - cmd_state_attrib_fetch((struct cell_attribute_fetch_code *) - &buffer[pos+1]); - pos += 1 + ROUNDUP16(sizeof(struct cell_attribute_fetch_code)) / 16; - break; - /* - * misc commands - */ - case CELL_CMD_FINISH: - cmd_finish(); - pos += 1; - break; - case CELL_CMD_FENCE: - { - struct cell_command_fence *fence_cmd = - (struct cell_command_fence *) &buffer[pos]; - cmd_fence(fence_cmd); - pos += sizeof(*fence_cmd) / 16; - } - break; - case CELL_CMD_RELEASE_VERTS: - { - struct cell_command_release_verts *release - = (struct cell_command_release_verts *) &buffer[pos]; - cmd_release_verts(release); - pos += sizeof(*release) / 16; - } - break; - case CELL_CMD_FLUSH_BUFFER_RANGE: { - struct cell_buffer_range *br = (struct cell_buffer_range *) - &buffer[pos+1]; - - spu_dcache_mark_dirty((unsigned) br->base, br->size); - pos += 1 + ROUNDUP16(sizeof(struct cell_buffer_range)) / 16; - break; - } - default: - printf("SPU %u: bad opcode: 0x%x\n", spu.init.id, si_to_uint(buffer[pos])); - ASSERT(0); - break; - } - } - - D_PRINTF(CELL_DEBUG_CMD, "BATCH complete\n"); -} - - -#define PERF 0 - - -/** - * Main loop for SPEs: Get a command, execute it, repeat. - */ -void -command_loop(void) -{ - int exitFlag = 0; - uint t0, t1; - - D_PRINTF(CELL_DEBUG_CMD, "Enter command loop\n"); - - while (!exitFlag) { - unsigned opcode; - - D_PRINTF(CELL_DEBUG_CMD, "Wait for cmd...\n"); - - if (PERF) - spu_write_decrementer(~0); - - /* read/wait from mailbox */ - opcode = (unsigned int) spu_read_in_mbox(); - D_PRINTF(CELL_DEBUG_CMD, "got cmd 0x%x\n", opcode); - - if (PERF) - t0 = spu_read_decrementer(); - - switch (opcode & CELL_CMD_OPCODE_MASK) { - case CELL_CMD_EXIT: - D_PRINTF(CELL_DEBUG_CMD, "EXIT\n"); - exitFlag = 1; - break; - case CELL_CMD_VS_EXECUTE: -#if 0 - spu_execute_vertex_shader(&draw, &cmd.vs); -#endif - break; - case CELL_CMD_BATCH: - cmd_batch(opcode); - break; - default: - printf("Bad opcode 0x%x!\n", opcode & CELL_CMD_OPCODE_MASK); - } - - if (PERF) { - t1 = spu_read_decrementer(); - printf("wait mbox time: %gms batch time: %gms\n", - (~0u - t0) * spu.init.inv_timebase, - (t0 - t1) * spu.init.inv_timebase); - } - } - - D_PRINTF(CELL_DEBUG_CMD, "Exit command loop\n"); - - if (spu.init.debug_flags & CELL_DEBUG_CACHE) - spu_dcache_report(); -} - -/* Initialize this module; we manage the fragment ops buffer here. */ -void -spu_command_init(void) -{ - /* Install default/fallback fragment processing function. - * This will normally be overriden by a code-gen'd function - * unless CELL_FORCE_FRAGMENT_OPS_FALLBACK is set. - */ - spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops; - spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops; - - /* Set up the basic empty buffer for code-gen'ed fragment ops */ - spu.fragment_ops_code = NULL; - spu.fragment_ops_code_size = 0; -} - -void -spu_command_close(void) -{ - /* Deallocate the code-gen buffer for fragment ops, and reset the - * fragment ops functions to their initial setting (just to leave - * things in a good state). - */ - if (spu.fragment_ops_code != NULL) { - free(spu.fragment_ops_code); - } - spu_command_init(); -} diff --git a/src/gallium/drivers/cell/spu/spu_command.h b/src/gallium/drivers/cell/spu/spu_command.h deleted file mode 100644 index 83dcdade28..0000000000 --- a/src/gallium/drivers/cell/spu/spu_command.h +++ /dev/null @@ -1,35 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -extern void -command_loop(void); - -extern void -spu_command_init(void); - -extern void -spu_command_close(void); diff --git a/src/gallium/drivers/cell/spu/spu_dcache.c b/src/gallium/drivers/cell/spu/spu_dcache.c deleted file mode 100644 index a6d67634fd..0000000000 --- a/src/gallium/drivers/cell/spu/spu_dcache.c +++ /dev/null @@ -1,145 +0,0 @@ -/* - * (C) Copyright IBM Corporation 2008 - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "cell/common.h" -#include "spu_main.h" -#include "spu_dcache.h" - -#define CACHELINE_LOG2SIZE 7 -#define LINE_SIZE (1U << 7) -#define ALIGN_MASK (~(LINE_SIZE - 1)) - -#define CACHE_NAME data -#define CACHED_TYPE qword -#define CACHE_TYPE CACHE_TYPE_RO -#define CACHE_SET_TAGID(set) (((set) & 0x03) + TAG_DCACHE0) -#define CACHE_LOG2NNWAY 2 -#define CACHE_LOG2NSETS 6 -#ifdef DEBUG -#define CACHE_STATS 1 -#endif -#include <cache-api.h> - -/* Yes folks, this is ugly. - */ -#undef CACHE_NWAY -#undef CACHE_NSETS -#define CACHE_NAME data -#define CACHE_NWAY 4 -#define CACHE_NSETS (1U << 6) - - -/** - * Fetch between arbitrary number of bytes from an unaligned address - * - * \param dst Destination data buffer - * \param ea Main memory effective address of source data - * \param size Number of bytes to read - * - * \warning - * As is hinted by the type of the \c dst pointer, this function writes - * multiples of 16-bytes. - */ -void -spu_dcache_fetch_unaligned(qword *dst, unsigned ea, unsigned size) -{ - const int shift = ea & 0x0f; - const unsigned read_size = ROUNDUP16(size + shift); - const unsigned last_read = ROUNDUP16(ea + size); - const qword *const last_write = dst + (ROUNDUP16(size) / 16); - unsigned i; - - - if (shift == 0) { - /* Data is already aligned. Fetch directly into the destination buffer. - */ - for (i = 0; i < size; i += 16) { - *(dst++) = cache_rd(data, ea + i); - } - } else { - qword hi; - - - /* Please exercise extreme caution when modifying this code. This code - * must not read past the end of the page containing the source data, - * and it must not write more than ((size + 15) / 16) qwords to the - * destination buffer. - */ - ea &= ~0x0f; - hi = cache_rd(data, ea); - for (i = 16; i < read_size; i += 16) { - qword lo = cache_rd(data, ea + i); - - *(dst++) = si_or((qword) spu_slqwbyte(hi, shift), - (qword) spu_rlmaskqwbyte(lo, shift - 16)); - hi = lo; - } - - if (dst != last_write) { - *(dst++) = si_or((qword) spu_slqwbyte(hi, shift), si_il(0)); - } - } - - ASSERT((ea + i) == last_read); - ASSERT(dst == last_write); -} - - -/** - * Notify the cache that a range of main memory may have been modified - */ -void -spu_dcache_mark_dirty(unsigned ea, unsigned size) -{ - unsigned i; - const unsigned aligned_start = (ea & ALIGN_MASK); - const unsigned aligned_end = (ea + size + (LINE_SIZE - 1)) - & ALIGN_MASK; - - - for (i = 0; i < (CACHE_NWAY * CACHE_NSETS); i++) { - const unsigned entry = __cache_dir[i]; - const unsigned addr = entry & ~0x0f; - - __cache_dir[i] = ((addr >= aligned_start) && (addr < aligned_end)) - ? (entry & ~CACHELINE_VALID) : entry; - } -} - - -/** - * Print cache utilization report - */ -void -spu_dcache_report(void) -{ -#ifdef CACHE_STATS - if (spu.init.id == 0) { - printf("SPU 0: Texture cache report:\n"); - cache_pr_stats(data); - } -#endif -} - - diff --git a/src/gallium/drivers/cell/spu/spu_dcache.h b/src/gallium/drivers/cell/spu/spu_dcache.h deleted file mode 100644 index 39a19eb31b..0000000000 --- a/src/gallium/drivers/cell/spu/spu_dcache.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * (C) Copyright IBM Corporation 2008 - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef SPU_DCACHE_H -#define SPU_DCACHE_H - -extern void -spu_dcache_fetch_unaligned(qword *dst, unsigned ea, unsigned size); - -extern void -spu_dcache_mark_dirty(unsigned ea, unsigned size); - -extern void -spu_dcache_report(void); - -#endif /* SPU_DCACHE_H */ diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c deleted file mode 100644 index e4ebeb595c..0000000000 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ /dev/null @@ -1,1870 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * TGSI interpretor/executor. - * - * Flow control information: - * - * Since we operate on 'quads' (4 pixels or 4 vertices in parallel) - * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special - * care since a condition may be true for some quad components but false - * for other components. - * - * We basically execute all statements (even if they're in the part of - * an IF/ELSE clause that's "not taken") and use a special mask to - * control writing to destination registers. This is the ExecMask. - * See store_dest(). - * - * The ExecMask is computed from three other masks (CondMask, LoopMask and - * ContMask) which are controlled by the flow control instructions (namely: - * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT). - * - * - * Authors: - * Michal Krol - * Brian Paul - */ - -#include <transpose_matrix4x4.h> -#include <simdmath/ceilf4.h> -#include <simdmath/cosf4.h> -#include <simdmath/divf4.h> -#include <simdmath/floorf4.h> -#include <simdmath/log2f4.h> -#include <simdmath/powf4.h> -#include <simdmath/sinf4.h> -#include <simdmath/sqrtf4.h> -#include <simdmath/truncf4.h> - -#include "pipe/p_compiler.h" -#include "pipe/p_state.h" -#include "pipe/p_shader_tokens.h" -#include "tgsi/tgsi_parse.h" -#include "tgsi/tgsi_util.h" -#include "spu_exec.h" -#include "spu_main.h" -#include "spu_vertex_shader.h" -#include "spu_dcache.h" -#include "cell/common.h" - -#define TILE_TOP_LEFT 0 -#define TILE_TOP_RIGHT 1 -#define TILE_BOTTOM_LEFT 2 -#define TILE_BOTTOM_RIGHT 3 - -/* - * Shorthand locations of various utility registers (_I = Index, _C = Channel) - */ -#define TEMP_0_I TGSI_EXEC_TEMP_00000000_I -#define TEMP_0_C TGSI_EXEC_TEMP_00000000_C -#define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I -#define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C -#define TEMP_80_I TGSI_EXEC_TEMP_80000000_I -#define TEMP_80_C TGSI_EXEC_TEMP_80000000_C -#define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I -#define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C -#define TEMP_1_I TGSI_EXEC_TEMP_ONE_I -#define TEMP_1_C TGSI_EXEC_TEMP_ONE_C -#define TEMP_2_I TGSI_EXEC_TEMP_TWO_I -#define TEMP_2_C TGSI_EXEC_TEMP_TWO_C -#define TEMP_128_I TGSI_EXEC_TEMP_128_I -#define TEMP_128_C TGSI_EXEC_TEMP_128_C -#define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I -#define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C -#define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I -#define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C -#define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I -#define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C -#define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I -#define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C -#define TEMP_R0 TGSI_EXEC_TEMP_R0 - -#define FOR_EACH_CHANNEL(CHAN)\ - for (CHAN = 0; CHAN < 4; CHAN++) - -#define IS_CHANNEL_ENABLED(INST, CHAN)\ - ((INST).Dst[0].Register.WriteMask & (1 << (CHAN))) - -#define IS_CHANNEL_ENABLED2(INST, CHAN)\ - ((INST).Dst[1].Register.WriteMask & (1 << (CHAN))) - -#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\ - FOR_EACH_CHANNEL( CHAN )\ - if (IS_CHANNEL_ENABLED( INST, CHAN )) - -#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\ - FOR_EACH_CHANNEL( CHAN )\ - if (IS_CHANNEL_ENABLED2( INST, CHAN )) - - -/** The execution mask depends on the conditional mask and the loop mask */ -#define UPDATE_EXEC_MASK(MACH) \ - MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask - - -#define CHAN_X 0 -#define CHAN_Y 1 -#define CHAN_Z 2 -#define CHAN_W 3 - - - -/** - * Initialize machine state by expanding tokens to full instructions, - * allocating temporary storage, setting up constants, etc. - * After this, we can call spu_exec_machine_run() many times. - */ -void -spu_exec_machine_init(struct spu_exec_machine *mach, - uint numSamplers, - struct spu_sampler *samplers, - unsigned processor) -{ - const qword zero = si_il(0); - const qword not_zero = si_il(~0); - - (void) numSamplers; - mach->Samplers = samplers; - mach->Processor = processor; - mach->Addrs = &mach->Temps[TGSI_EXEC_NUM_TEMPS]; - - /* Setup constants. */ - mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q = zero; - mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].q = not_zero; - mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].q = si_shli(not_zero, -1); - mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].q = si_shli(not_zero, 31); - - mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q = (qword) spu_splats(1.0f); - mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q = (qword) spu_splats(2.0f); - mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].q = (qword) spu_splats(128.0f); - mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].q = (qword) spu_splats(-128.0f); -} - - -static INLINE qword -micro_abs(qword src) -{ - return si_rotmi(si_shli(src, 1), -1); -} - -static INLINE qword -micro_ceil(qword src) -{ - return (qword) _ceilf4((vec_float4) src); -} - -static INLINE qword -micro_cos(qword src) -{ - return (qword) _cosf4((vec_float4) src); -} - -static const qword br_shuf = { - TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, - TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, - TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, - TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, - TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, - TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, - TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, - TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, -}; - -static const qword bl_shuf = { - TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, - TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, - TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, - TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, - TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, - TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, - TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, - TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, -}; - -static const qword tl_shuf = { - TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, - TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, - TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, - TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, - TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, - TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, - TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, - TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, -}; - -static qword -micro_ddx(qword src) -{ - qword bottom_right = si_shufb(src, src, br_shuf); - qword bottom_left = si_shufb(src, src, bl_shuf); - - return si_fs(bottom_right, bottom_left); -} - -static qword -micro_ddy(qword src) -{ - qword top_left = si_shufb(src, src, tl_shuf); - qword bottom_left = si_shufb(src, src, bl_shuf); - - return si_fs(top_left, bottom_left); -} - -static INLINE qword -micro_div(qword src0, qword src1) -{ - return (qword) _divf4((vec_float4) src0, (vec_float4) src1); -} - -static qword -micro_flr(qword src) -{ - return (qword) _floorf4((vec_float4) src); -} - -static qword -micro_frc(qword src) -{ - return si_fs(src, (qword) _floorf4((vec_float4) src)); -} - -static INLINE qword -micro_ge(qword src0, qword src1) -{ - return si_or(si_fceq(src0, src1), si_fcgt(src0, src1)); -} - -static qword -micro_lg2(qword src) -{ - return (qword) _log2f4((vec_float4) src); -} - -static INLINE qword -micro_lt(qword src0, qword src1) -{ - const qword tmp = si_or(si_fceq(src0, src1), si_fcgt(src0, src1)); - - return si_xori(tmp, 0xff); -} - -static INLINE qword -micro_max(qword src0, qword src1) -{ - return si_selb(src1, src0, si_fcgt(src0, src1)); -} - -static INLINE qword -micro_min(qword src0, qword src1) -{ - return si_selb(src0, src1, si_fcgt(src0, src1)); -} - -static qword -micro_neg(qword src) -{ - return si_xor(src, (qword) spu_splats(0x80000000)); -} - -static qword -micro_set_sign(qword src) -{ - return si_or(src, (qword) spu_splats(0x80000000)); -} - -static qword -micro_pow(qword src0, qword src1) -{ - return (qword) _powf4((vec_float4) src0, (vec_float4) src1); -} - -static qword -micro_rnd(qword src) -{ - const qword half = (qword) spu_splats(0.5f); - - /* May be able to use _roundf4. There may be some difference, though. - */ - return (qword) _floorf4((vec_float4) si_fa(src, half)); -} - -static INLINE qword -micro_ishr(qword src0, qword src1) -{ - return si_rotma(src0, si_sfi(src1, 0)); -} - -static qword -micro_trunc(qword src) -{ - return (qword) _truncf4((vec_float4) src); -} - -static qword -micro_sin(qword src) -{ - return (qword) _sinf4((vec_float4) src); -} - -static INLINE qword -micro_sqrt(qword src) -{ - return (qword) _sqrtf4((vec_float4) src); -} - -static void -fetch_src_file_channel( - const struct spu_exec_machine *mach, - const uint file, - const uint swizzle, - const union spu_exec_channel *index, - union spu_exec_channel *chan ) -{ - switch( swizzle ) { - case TGSI_SWIZZLE_X: - case TGSI_SWIZZLE_Y: - case TGSI_SWIZZLE_Z: - case TGSI_SWIZZLE_W: - switch( file ) { - case TGSI_FILE_CONSTANT: { - unsigned i; - - for (i = 0; i < 4; i++) { - const float *ptr = mach->Consts[index->i[i]]; - float tmp[4]; - - spu_dcache_fetch_unaligned((qword *) tmp, - (uintptr_t)(ptr + swizzle), - sizeof(float)); - - chan->f[i] = tmp[0]; - } - break; - } - - case TGSI_FILE_INPUT: - chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0]; - chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1]; - chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2]; - chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3]; - break; - - case TGSI_FILE_TEMPORARY: - chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0]; - chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1]; - chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2]; - chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3]; - break; - - case TGSI_FILE_IMMEDIATE: - ASSERT( index->i[0] < (int) mach->ImmLimit ); - ASSERT( index->i[1] < (int) mach->ImmLimit ); - ASSERT( index->i[2] < (int) mach->ImmLimit ); - ASSERT( index->i[3] < (int) mach->ImmLimit ); - - chan->f[0] = mach->Imms[index->i[0]][swizzle]; - chan->f[1] = mach->Imms[index->i[1]][swizzle]; - chan->f[2] = mach->Imms[index->i[2]][swizzle]; - chan->f[3] = mach->Imms[index->i[3]][swizzle]; - break; - - case TGSI_FILE_ADDRESS: - chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0]; - chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1]; - chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2]; - chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3]; - break; - - case TGSI_FILE_OUTPUT: - /* vertex/fragment output vars can be read too */ - chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0]; - chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1]; - chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2]; - chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3]; - break; - - default: - ASSERT( 0 ); - } - break; - - default: - ASSERT( 0 ); - } -} - -static void -fetch_source( - const struct spu_exec_machine *mach, - union spu_exec_channel *chan, - const struct tgsi_full_src_register *reg, - const uint chan_index ) -{ - union spu_exec_channel index; - uint swizzle; - - index.i[0] = - index.i[1] = - index.i[2] = - index.i[3] = reg->Register.Index; - - if (reg->Register.Indirect) { - union spu_exec_channel index2; - union spu_exec_channel indir_index; - - index2.i[0] = - index2.i[1] = - index2.i[2] = - index2.i[3] = reg->Indirect.Index; - - swizzle = tgsi_util_get_src_register_swizzle(®->Indirect, - CHAN_X); - fetch_src_file_channel( - mach, - reg->Indirect.File, - swizzle, - &index2, - &indir_index ); - - index.q = si_a(index.q, indir_index.q); - } - - if( reg->Register.Dimension ) { - switch( reg->Register.File ) { - case TGSI_FILE_INPUT: - index.q = si_mpyi(index.q, 17); - break; - case TGSI_FILE_CONSTANT: - index.q = si_shli(index.q, 12); - break; - default: - ASSERT( 0 ); - } - - index.i[0] += reg->Dimension.Index; - index.i[1] += reg->Dimension.Index; - index.i[2] += reg->Dimension.Index; - index.i[3] += reg->Dimension.Index; - - if (reg->Dimension.Indirect) { - union spu_exec_channel index2; - union spu_exec_channel indir_index; - - index2.i[0] = - index2.i[1] = - index2.i[2] = - index2.i[3] = reg->DimIndirect.Index; - - swizzle = tgsi_util_get_src_register_swizzle( ®->DimIndirect, CHAN_X ); - fetch_src_file_channel( - mach, - reg->DimIndirect.File, - swizzle, - &index2, - &indir_index ); - - index.q = si_a(index.q, indir_index.q); - } - } - - swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); - fetch_src_file_channel( - mach, - reg->Register.File, - swizzle, - &index, - chan ); - - switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) { - case TGSI_UTIL_SIGN_CLEAR: - chan->q = micro_abs(chan->q); - break; - - case TGSI_UTIL_SIGN_SET: - chan->q = micro_set_sign(chan->q); - break; - - case TGSI_UTIL_SIGN_TOGGLE: - chan->q = micro_neg(chan->q); - break; - - case TGSI_UTIL_SIGN_KEEP: - break; - } - - if (reg->RegisterExtMod.Complement) { - chan->q = si_fs(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, chan->q); - } -} - -static void -store_dest( - struct spu_exec_machine *mach, - const union spu_exec_channel *chan, - const struct tgsi_full_dst_register *reg, - const struct tgsi_full_instruction *inst, - uint chan_index ) -{ - union spu_exec_channel *dst; - - switch( reg->Register.File ) { - case TGSI_FILE_NULL: - return; - - case TGSI_FILE_OUTPUT: - dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] - + reg->Register.Index].xyzw[chan_index]; - break; - - case TGSI_FILE_TEMPORARY: - dst = &mach->Temps[reg->Register.Index].xyzw[chan_index]; - break; - - case TGSI_FILE_ADDRESS: - dst = &mach->Addrs[reg->Register.Index].xyzw[chan_index]; - break; - - default: - ASSERT( 0 ); - return; - } - - switch (inst->Instruction.Saturate) - { - case TGSI_SAT_NONE: - if (mach->ExecMask & 0x1) - dst->i[0] = chan->i[0]; - if (mach->ExecMask & 0x2) - dst->i[1] = chan->i[1]; - if (mach->ExecMask & 0x4) - dst->i[2] = chan->i[2]; - if (mach->ExecMask & 0x8) - dst->i[3] = chan->i[3]; - break; - - case TGSI_SAT_ZERO_ONE: - /* XXX need to obey ExecMask here */ - dst->q = micro_max(chan->q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); - dst->q = micro_min(dst->q, mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q); - break; - - case TGSI_SAT_MINUS_PLUS_ONE: - ASSERT( 0 ); - break; - - default: - ASSERT( 0 ); - } -} - -#define FETCH(VAL,INDEX,CHAN)\ - fetch_source (mach, VAL, &inst->Src[INDEX], CHAN) - -#define STORE(VAL,INDEX,CHAN)\ - store_dest (mach, VAL, &inst->Dst[INDEX], inst, CHAN ) - - -/** - * Execute ARB-style KIL which is predicated by a src register. - * Kill fragment if any of the four values is less than zero. - */ -static void -exec_kil(struct spu_exec_machine *mach, - const struct tgsi_full_instruction *inst) -{ - uint uniquemask; - uint chan_index; - uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ - union spu_exec_channel r[1]; - - /* This mask stores component bits that were already tested. */ - uniquemask = 0; - - for (chan_index = 0; chan_index < 4; chan_index++) - { - uint swizzle; - uint i; - - /* unswizzle channel */ - swizzle = tgsi_util_get_full_src_register_swizzle ( - &inst->Src[0], - chan_index); - - /* check if the component has not been already tested */ - if (uniquemask & (1 << swizzle)) - continue; - uniquemask |= 1 << swizzle; - - FETCH(&r[0], 0, chan_index); - for (i = 0; i < 4; i++) - if (r[0].f[i] < 0.0f) - kilmask |= 1 << i; - } - - mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; -} - -/** - * Execute NVIDIA-style KIL which is predicated by a condition code. - * Kill fragment if the condition code is TRUE. - */ -static void -exec_kilp(struct spu_exec_machine *mach, - const struct tgsi_full_instruction *inst) -{ - uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ - - /* TODO: build kilmask from CC mask */ - - mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; -} - -/* - * Fetch a texel using STR texture coordinates. - */ -static void -fetch_texel( struct spu_sampler *sampler, - const union spu_exec_channel *s, - const union spu_exec_channel *t, - const union spu_exec_channel *p, - float lodbias, /* XXX should be float[4] */ - union spu_exec_channel *r, - union spu_exec_channel *g, - union spu_exec_channel *b, - union spu_exec_channel *a ) -{ - qword rgba[4]; - qword out[4]; - - sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, - (float (*)[4]) rgba); - - _transpose_matrix4x4((vec_float4 *) out, (vec_float4 *) rgba); - r->q = out[0]; - g->q = out[1]; - b->q = out[2]; - a->q = out[3]; -} - - -static void -exec_tex(struct spu_exec_machine *mach, - const struct tgsi_full_instruction *inst, - boolean biasLod, boolean projected) -{ - const uint unit = inst->Src[1].Register.Index; - union spu_exec_channel r[8]; - uint chan_index; - float lodBias; - - /* printf("Sampler %u unit %u\n", sampler, unit); */ - - switch (inst->InstructionExtTexture.Texture) { - case TGSI_TEXTURE_1D: - - FETCH(&r[0], 0, CHAN_X); - - if (projected) { - FETCH(&r[1], 0, CHAN_W); - r[0].q = micro_div(r[0].q, r[1].q); - } - - if (biasLod) { - FETCH(&r[1], 0, CHAN_W); - lodBias = r[2].f[0]; - } - else - lodBias = 0.0; - - fetch_texel(&mach->Samplers[unit], - &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */ - &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ - break; - - case TGSI_TEXTURE_2D: - case TGSI_TEXTURE_RECT: - - FETCH(&r[0], 0, CHAN_X); - FETCH(&r[1], 0, CHAN_Y); - FETCH(&r[2], 0, CHAN_Z); - - if (projected) { - FETCH(&r[3], 0, CHAN_W); - r[0].q = micro_div(r[0].q, r[3].q); - r[1].q = micro_div(r[1].q, r[3].q); - r[2].q = micro_div(r[2].q, r[3].q); - } - - if (biasLod) { - FETCH(&r[3], 0, CHAN_W); - lodBias = r[3].f[0]; - } - else - lodBias = 0.0; - - fetch_texel(&mach->Samplers[unit], - &r[0], &r[1], &r[2], lodBias, /* inputs */ - &r[0], &r[1], &r[2], &r[3]); /* outputs */ - break; - - case TGSI_TEXTURE_3D: - case TGSI_TEXTURE_CUBE: - - FETCH(&r[0], 0, CHAN_X); - FETCH(&r[1], 0, CHAN_Y); - FETCH(&r[2], 0, CHAN_Z); - - if (projected) { - FETCH(&r[3], 0, CHAN_W); - r[0].q = micro_div(r[0].q, r[3].q); - r[1].q = micro_div(r[1].q, r[3].q); - r[2].q = micro_div(r[2].q, r[3].q); - } - - if (biasLod) { - FETCH(&r[3], 0, CHAN_W); - lodBias = r[3].f[0]; - } - else - lodBias = 0.0; - - fetch_texel(&mach->Samplers[unit], - &r[0], &r[1], &r[2], lodBias, - &r[0], &r[1], &r[2], &r[3]); - break; - - default: - ASSERT (0); - } - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[chan_index], 0, chan_index ); - } -} - - - -static void -constant_interpolation( - struct spu_exec_machine *mach, - unsigned attrib, - unsigned chan ) -{ - unsigned i; - - for( i = 0; i < QUAD_SIZE; i++ ) { - mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; - } -} - -static void -linear_interpolation( - struct spu_exec_machine *mach, - unsigned attrib, - unsigned chan ) -{ - const float x = mach->QuadPos.xyzw[0].f[0]; - const float y = mach->QuadPos.xyzw[1].f[0]; - const float dadx = mach->InterpCoefs[attrib].dadx[chan]; - const float dady = mach->InterpCoefs[attrib].dady[chan]; - const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; - mach->Inputs[attrib].xyzw[chan].f[0] = a0; - mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; - mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; - mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; -} - -static void -perspective_interpolation( - struct spu_exec_machine *mach, - unsigned attrib, - unsigned chan ) -{ - const float x = mach->QuadPos.xyzw[0].f[0]; - const float y = mach->QuadPos.xyzw[1].f[0]; - const float dadx = mach->InterpCoefs[attrib].dadx[chan]; - const float dady = mach->InterpCoefs[attrib].dady[chan]; - const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; - const float *w = mach->QuadPos.xyzw[3].f; - /* divide by W here */ - mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; - mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; - mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; - mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; -} - - -typedef void (* interpolation_func)( - struct spu_exec_machine *mach, - unsigned attrib, - unsigned chan ); - -static void -exec_declaration(struct spu_exec_machine *mach, - const struct tgsi_full_declaration *decl) -{ - if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { - if( decl->Declaration.File == TGSI_FILE_INPUT ) { - unsigned first, last, mask; - interpolation_func interp; - - first = decl->Range.First; - last = decl->Range.Last; - mask = decl->Declaration.UsageMask; - - switch( decl->Declaration.Interpolate ) { - case TGSI_INTERPOLATE_CONSTANT: - interp = constant_interpolation; - break; - - case TGSI_INTERPOLATE_LINEAR: - interp = linear_interpolation; - break; - - case TGSI_INTERPOLATE_PERSPECTIVE: - interp = perspective_interpolation; - break; - - default: - ASSERT( 0 ); - } - - if( mask == TGSI_WRITEMASK_XYZW ) { - unsigned i, j; - - for( i = first; i <= last; i++ ) { - for( j = 0; j < NUM_CHANNELS; j++ ) { - interp( mach, i, j ); - } - } - } - else { - unsigned i, j; - - for( j = 0; j < NUM_CHANNELS; j++ ) { - if( mask & (1 << j) ) { - for( i = first; i <= last; i++ ) { - interp( mach, i, j ); - } - } - } - } - } - } -} - -static void -exec_instruction( - struct spu_exec_machine *mach, - const struct tgsi_full_instruction *inst, - int *pc ) -{ - uint chan_index; - union spu_exec_channel r[8]; - - (*pc)++; - - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_ARL: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - r[0].q = si_cflts(r[0].q, 0); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_MOV: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_LIT: - if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); - } - - if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { - FETCH( &r[0], 0, CHAN_X ); - if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { - r[0].q = micro_max(r[0].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); - STORE( &r[0], 0, CHAN_Y ); - } - - if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { - FETCH( &r[1], 0, CHAN_Y ); - r[1].q = micro_max(r[1].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); - - FETCH( &r[2], 0, CHAN_W ); - r[2].q = micro_min(r[2].q, mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].q); - r[2].q = micro_max(r[2].q, mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].q); - r[1].q = micro_pow(r[1].q, r[2].q); - - /* r0 = (r0 > 0.0) ? r1 : 0.0 - */ - r[0].q = si_fcgt(r[0].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); - r[0].q = si_selb(mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q, r[1].q, - r[0].q); - STORE( &r[0], 0, CHAN_Z ); - } - } - - if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); - } - break; - - case TGSI_OPCODE_RCP: - FETCH( &r[0], 0, CHAN_X ); - r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_RSQ: - FETCH( &r[0], 0, CHAN_X ); - r[0].q = micro_sqrt(r[0].q); - r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_EXP: - ASSERT (0); - break; - - case TGSI_OPCODE_LOG: - ASSERT (0); - break; - - case TGSI_OPCODE_MUL: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) - { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - - r[0].q = si_fm(r[0].q, r[1].q); - - STORE(&r[0], 0, chan_index); - } - break; - - case TGSI_OPCODE_ADD: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - r[0].q = si_fa(r[0].q, r[1].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_DP3: - /* TGSI_OPCODE_DOT3 */ - FETCH( &r[0], 0, CHAN_X ); - FETCH( &r[1], 1, CHAN_X ); - r[0].q = si_fm(r[0].q, r[1].q); - - FETCH( &r[1], 0, CHAN_Y ); - FETCH( &r[2], 1, CHAN_Y ); - r[0].q = si_fma(r[1].q, r[2].q, r[0].q); - - - FETCH( &r[1], 0, CHAN_Z ); - FETCH( &r[2], 1, CHAN_Z ); - r[0].q = si_fma(r[1].q, r[2].q, r[0].q); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_DP4: - /* TGSI_OPCODE_DOT4 */ - FETCH(&r[0], 0, CHAN_X); - FETCH(&r[1], 1, CHAN_X); - - r[0].q = si_fm(r[0].q, r[1].q); - - FETCH(&r[1], 0, CHAN_Y); - FETCH(&r[2], 1, CHAN_Y); - - r[0].q = si_fma(r[1].q, r[2].q, r[0].q); - - FETCH(&r[1], 0, CHAN_Z); - FETCH(&r[2], 1, CHAN_Z); - - r[0].q = si_fma(r[1].q, r[2].q, r[0].q); - - FETCH(&r[1], 0, CHAN_W); - FETCH(&r[2], 1, CHAN_W); - - r[0].q = si_fma(r[1].q, r[2].q, r[0].q); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_DST: - if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); - } - - if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { - FETCH( &r[0], 0, CHAN_Y ); - FETCH( &r[1], 1, CHAN_Y); - r[0].q = si_fm(r[0].q, r[1].q); - STORE( &r[0], 0, CHAN_Y ); - } - - if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { - FETCH( &r[0], 0, CHAN_Z ); - STORE( &r[0], 0, CHAN_Z ); - } - - if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { - FETCH( &r[0], 1, CHAN_W ); - STORE( &r[0], 0, CHAN_W ); - } - break; - - case TGSI_OPCODE_MIN: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - - r[0].q = micro_min(r[0].q, r[1].q); - - STORE(&r[0], 0, chan_index); - } - break; - - case TGSI_OPCODE_MAX: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - - r[0].q = micro_max(r[0].q, r[1].q); - - STORE(&r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SLT: - /* TGSI_OPCODE_SETLT */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - - r[0].q = micro_ge(r[0].q, r[1].q); - r[0].q = si_xori(r[0].q, 0xff); - - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SGE: - /* TGSI_OPCODE_SETGE */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - r[0].q = micro_ge(r[0].q, r[1].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_MAD: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - FETCH( &r[2], 2, chan_index ); - r[0].q = si_fma(r[0].q, r[1].q, r[2].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SUB: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - - r[0].q = si_fs(r[0].q, r[1].q); - - STORE(&r[0], 0, chan_index); - } - break; - - case TGSI_OPCODE_LRP: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - FETCH(&r[2], 2, chan_index); - - r[1].q = si_fs(r[1].q, r[2].q); - r[0].q = si_fma(r[0].q, r[1].q, r[2].q); - - STORE(&r[0], 0, chan_index); - } - break; - - case TGSI_OPCODE_CND: - ASSERT (0); - break; - - case TGSI_OPCODE_DP2A: - ASSERT (0); - break; - - case TGSI_OPCODE_FRC: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - r[0].q = micro_frc(r[0].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_CLAMP: - ASSERT (0); - break; - - case TGSI_OPCODE_FLR: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - r[0].q = micro_flr(r[0].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_ROUND: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - r[0].q = micro_rnd(r[0].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_EX2: - FETCH(&r[0], 0, CHAN_X); - - r[0].q = micro_pow(mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q, r[0].q); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_LG2: - FETCH( &r[0], 0, CHAN_X ); - r[0].q = micro_lg2(r[0].q); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_POW: - FETCH(&r[0], 0, CHAN_X); - FETCH(&r[1], 1, CHAN_X); - - r[0].q = micro_pow(r[0].q, r[1].q); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_XPD: - /* TGSI_OPCODE_XPD */ - FETCH(&r[0], 0, CHAN_Y); - FETCH(&r[1], 1, CHAN_Z); - FETCH(&r[3], 0, CHAN_Z); - FETCH(&r[4], 1, CHAN_Y); - - /* r2 = (r0 * r1) - (r3 * r5) - */ - r[2].q = si_fm(r[3].q, r[5].q); - r[2].q = si_fms(r[0].q, r[1].q, r[2].q); - - if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { - STORE( &r[2], 0, CHAN_X ); - } - - FETCH(&r[2], 1, CHAN_X); - FETCH(&r[5], 0, CHAN_X); - - /* r3 = (r3 * r2) - (r1 * r5) - */ - r[1].q = si_fm(r[1].q, r[5].q); - r[3].q = si_fms(r[3].q, r[2].q, r[1].q); - - if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { - STORE( &r[3], 0, CHAN_Y ); - } - - /* r5 = (r5 * r4) - (r0 * r2) - */ - r[0].q = si_fm(r[0].q, r[2].q); - r[5].q = si_fms(r[5].q, r[4].q, r[0].q); - - if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { - STORE( &r[5], 0, CHAN_Z ); - } - - if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); - } - break; - - case TGSI_OPCODE_ABS: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - - r[0].q = micro_abs(r[0].q); - - STORE(&r[0], 0, chan_index); - } - break; - - case TGSI_OPCODE_RCC: - ASSERT (0); - break; - - case TGSI_OPCODE_DPH: - FETCH(&r[0], 0, CHAN_X); - FETCH(&r[1], 1, CHAN_X); - - r[0].q = si_fm(r[0].q, r[1].q); - - FETCH(&r[1], 0, CHAN_Y); - FETCH(&r[2], 1, CHAN_Y); - - r[0].q = si_fma(r[1].q, r[2].q, r[0].q); - - FETCH(&r[1], 0, CHAN_Z); - FETCH(&r[2], 1, CHAN_Z); - - r[0].q = si_fma(r[1].q, r[2].q, r[0].q); - - FETCH(&r[1], 1, CHAN_W); - - r[0].q = si_fa(r[0].q, r[1].q); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_COS: - FETCH(&r[0], 0, CHAN_X); - - r[0].q = micro_cos(r[0].q); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_DDX: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - r[0].q = micro_ddx(r[0].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_DDY: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - r[0].q = micro_ddy(r[0].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_KILP: - exec_kilp (mach, inst); - break; - - case TGSI_OPCODE_KIL: - exec_kil (mach, inst); - break; - - case TGSI_OPCODE_PK2H: - ASSERT (0); - break; - - case TGSI_OPCODE_PK2US: - ASSERT (0); - break; - - case TGSI_OPCODE_PK4B: - ASSERT (0); - break; - - case TGSI_OPCODE_PK4UB: - ASSERT (0); - break; - - case TGSI_OPCODE_RFL: - ASSERT (0); - break; - - case TGSI_OPCODE_SEQ: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - - r[0].q = si_fceq(r[0].q, r[1].q); - - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SFL: - ASSERT (0); - break; - - case TGSI_OPCODE_SGT: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - r[0].q = si_fcgt(r[0].q, r[1].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SIN: - FETCH( &r[0], 0, CHAN_X ); - r[0].q = micro_sin(r[0].q); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SLE: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - - r[0].q = si_fcgt(r[0].q, r[1].q); - r[0].q = si_xori(r[0].q, 0xff); - - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SNE: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - - r[0].q = si_fceq(r[0].q, r[1].q); - r[0].q = si_xori(r[0].q, 0xff); - - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_STR: - ASSERT (0); - break; - - case TGSI_OPCODE_TEX: - /* simple texture lookup */ - /* src[0] = texcoord */ - /* src[1] = sampler unit */ - exec_tex(mach, inst, FALSE, FALSE); - break; - - case TGSI_OPCODE_TXB: - /* Texture lookup with lod bias */ - /* src[0] = texcoord (src[0].w = load bias) */ - /* src[1] = sampler unit */ - exec_tex(mach, inst, TRUE, FALSE); - break; - - case TGSI_OPCODE_TXD: - /* Texture lookup with explict partial derivatives */ - /* src[0] = texcoord */ - /* src[1] = d[strq]/dx */ - /* src[2] = d[strq]/dy */ - /* src[3] = sampler unit */ - ASSERT (0); - break; - - case TGSI_OPCODE_TXL: - /* Texture lookup with explit LOD */ - /* src[0] = texcoord (src[0].w = load bias) */ - /* src[1] = sampler unit */ - exec_tex(mach, inst, TRUE, FALSE); - break; - - case TGSI_OPCODE_TXP: - /* Texture lookup with projection */ - /* src[0] = texcoord (src[0].w = projection) */ - /* src[1] = sampler unit */ - exec_tex(mach, inst, TRUE, TRUE); - break; - - case TGSI_OPCODE_UP2H: - ASSERT (0); - break; - - case TGSI_OPCODE_UP2US: - ASSERT (0); - break; - - case TGSI_OPCODE_UP4B: - ASSERT (0); - break; - - case TGSI_OPCODE_UP4UB: - ASSERT (0); - break; - - case TGSI_OPCODE_X2D: - ASSERT (0); - break; - - case TGSI_OPCODE_ARA: - ASSERT (0); - break; - - case TGSI_OPCODE_ARR: - ASSERT (0); - break; - - case TGSI_OPCODE_BRA: - ASSERT (0); - break; - - case TGSI_OPCODE_CAL: - /* skip the call if no execution channels are enabled */ - if (mach->ExecMask) { - /* do the call */ - - /* push the Cond, Loop, Cont stacks */ - ASSERT(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); - mach->CondStack[mach->CondStackTop++] = mach->CondMask; - ASSERT(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; - ASSERT(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - mach->ContStack[mach->ContStackTop++] = mach->ContMask; - - ASSERT(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); - mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; - - /* note that PC was already incremented above */ - mach->CallStack[mach->CallStackTop++] = *pc; - *pc = inst->InstructionExtLabel.Label; - } - break; - - case TGSI_OPCODE_RET: - mach->FuncMask &= ~mach->ExecMask; - UPDATE_EXEC_MASK(mach); - - if (mach->ExecMask == 0x0) { - /* really return now (otherwise, keep executing */ - - if (mach->CallStackTop == 0) { - /* returning from main() */ - *pc = -1; - return; - } - *pc = mach->CallStack[--mach->CallStackTop]; - - /* pop the Cond, Loop, Cont stacks */ - ASSERT(mach->CondStackTop > 0); - mach->CondMask = mach->CondStack[--mach->CondStackTop]; - ASSERT(mach->LoopStackTop > 0); - mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; - ASSERT(mach->ContStackTop > 0); - mach->ContMask = mach->ContStack[--mach->ContStackTop]; - ASSERT(mach->FuncStackTop > 0); - mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; - - UPDATE_EXEC_MASK(mach); - } - break; - - case TGSI_OPCODE_SSG: - ASSERT (0); - break; - - case TGSI_OPCODE_CMP: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - FETCH(&r[2], 2, chan_index); - - /* r0 = (r0 < 0.0) ? r1 : r2 - */ - r[3].q = si_xor(r[3].q, r[3].q); - r[0].q = micro_lt(r[0].q, r[3].q); - r[0].q = si_selb(r[1].q, r[2].q, r[0].q); - - STORE(&r[0], 0, chan_index); - } - break; - - case TGSI_OPCODE_SCS: - if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { - FETCH( &r[0], 0, CHAN_X ); - } - if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) { - r[1].q = micro_cos(r[0].q); - STORE( &r[1], 0, CHAN_X ); - } - if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { - r[1].q = micro_sin(r[0].q); - STORE( &r[1], 0, CHAN_Y ); - } - if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { - STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z ); - } - if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); - } - break; - - case TGSI_OPCODE_NRM: - ASSERT (0); - break; - - case TGSI_OPCODE_DIV: - ASSERT( 0 ); - break; - - case TGSI_OPCODE_DP2: - FETCH( &r[0], 0, CHAN_X ); - FETCH( &r[1], 1, CHAN_X ); - r[0].q = si_fm(r[0].q, r[1].q); - - FETCH( &r[1], 0, CHAN_Y ); - FETCH( &r[2], 1, CHAN_Y ); - r[0].q = si_fma(r[1].q, r[2].q, r[0].q); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_IF: - /* push CondMask */ - ASSERT(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); - mach->CondStack[mach->CondStackTop++] = mach->CondMask; - FETCH( &r[0], 0, CHAN_X ); - /* update CondMask */ - if( ! r[0].u[0] ) { - mach->CondMask &= ~0x1; - } - if( ! r[0].u[1] ) { - mach->CondMask &= ~0x2; - } - if( ! r[0].u[2] ) { - mach->CondMask &= ~0x4; - } - if( ! r[0].u[3] ) { - mach->CondMask &= ~0x8; - } - UPDATE_EXEC_MASK(mach); - /* Todo: If CondMask==0, jump to ELSE */ - break; - - case TGSI_OPCODE_ELSE: - /* invert CondMask wrt previous mask */ - { - uint prevMask; - ASSERT(mach->CondStackTop > 0); - prevMask = mach->CondStack[mach->CondStackTop - 1]; - mach->CondMask = ~mach->CondMask & prevMask; - UPDATE_EXEC_MASK(mach); - /* Todo: If CondMask==0, jump to ENDIF */ - } - break; - - case TGSI_OPCODE_ENDIF: - /* pop CondMask */ - ASSERT(mach->CondStackTop > 0); - mach->CondMask = mach->CondStack[--mach->CondStackTop]; - UPDATE_EXEC_MASK(mach); - break; - - case TGSI_OPCODE_END: - /* halt execution */ - *pc = -1; - break; - - case TGSI_OPCODE_PUSHA: - ASSERT (0); - break; - - case TGSI_OPCODE_POPA: - ASSERT (0); - break; - - case TGSI_OPCODE_CEIL: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - r[0].q = micro_ceil(r[0].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_I2F: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - r[0].q = si_csflt(r[0].q, 0); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_NOT: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - r[0].q = si_xorbi(r[0].q, 0xff); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_TRUNC: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - r[0].q = micro_trunc(r[0].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SHL: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - - r[0].q = si_shl(r[0].q, r[1].q); - - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_ISHR: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - r[0].q = micro_ishr(r[0].q, r[1].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_AND: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - r[0].q = si_and(r[0].q, r[1].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_OR: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - r[0].q = si_or(r[0].q, r[1].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_MOD: - ASSERT (0); - break; - - case TGSI_OPCODE_XOR: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - r[0].q = si_xor(r[0].q, r[1].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SAD: - ASSERT (0); - break; - - case TGSI_OPCODE_TXF: - ASSERT (0); - break; - - case TGSI_OPCODE_TXQ: - ASSERT (0); - break; - - case TGSI_OPCODE_EMIT: - mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16; - mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; - break; - - case TGSI_OPCODE_ENDPRIM: - mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++; - mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0; - break; - - case TGSI_OPCODE_BGNLOOP: - /* push LoopMask and ContMasks */ - ASSERT(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; - ASSERT(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - mach->ContStack[mach->ContStackTop++] = mach->ContMask; - break; - - case TGSI_OPCODE_ENDLOOP: - /* Restore ContMask, but don't pop */ - ASSERT(mach->ContStackTop > 0); - mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; - if (mach->LoopMask) { - /* repeat loop: jump to instruction just past BGNLOOP */ - *pc = inst->InstructionExtLabel.Label + 1; - } - else { - /* exit loop: pop LoopMask */ - ASSERT(mach->LoopStackTop > 0); - mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; - /* pop ContMask */ - ASSERT(mach->ContStackTop > 0); - mach->ContMask = mach->ContStack[--mach->ContStackTop]; - } - UPDATE_EXEC_MASK(mach); - break; - - case TGSI_OPCODE_BRK: - /* turn off loop channels for each enabled exec channel */ - mach->LoopMask &= ~mach->ExecMask; - /* Todo: if mach->LoopMask == 0, jump to end of loop */ - UPDATE_EXEC_MASK(mach); - break; - - case TGSI_OPCODE_CONT: - /* turn off cont channels for each enabled exec channel */ - mach->ContMask &= ~mach->ExecMask; - /* Todo: if mach->LoopMask == 0, jump to end of loop */ - UPDATE_EXEC_MASK(mach); - break; - - case TGSI_OPCODE_BGNSUB: - /* no-op */ - break; - - case TGSI_OPCODE_ENDSUB: - /* no-op */ - break; - - case TGSI_OPCODE_NOP: - break; - - default: - ASSERT( 0 ); - } -} - - -/** - * Run TGSI interpreter. - * \return bitmask of "alive" quad components - */ -uint -spu_exec_machine_run( struct spu_exec_machine *mach ) -{ - uint i; - int pc = 0; - - mach->CondMask = 0xf; - mach->LoopMask = 0xf; - mach->ContMask = 0xf; - mach->FuncMask = 0xf; - mach->ExecMask = 0xf; - - mach->CondStackTop = 0; /* temporarily subvert this ASSERTion */ - ASSERT(mach->CondStackTop == 0); - ASSERT(mach->LoopStackTop == 0); - ASSERT(mach->ContStackTop == 0); - ASSERT(mach->CallStackTop == 0); - - mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; - mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; - - if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) { - mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0; - mach->Primitives[0] = 0; - } - - - /* execute declarations (interpolants) */ - if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { - for (i = 0; i < mach->NumDeclarations; i++) { - PIPE_ALIGN_VAR(16) - union { - struct tgsi_full_declaration decl; - qword buffer[ROUNDUP16(sizeof(struct tgsi_full_declaration)) / 16]; - } d; - unsigned ea = (unsigned) (mach->Declarations + pc); - - spu_dcache_fetch_unaligned(d.buffer, ea, sizeof(d.decl)); - - exec_declaration( mach, &d.decl ); - } - } - - /* execute instructions, until pc is set to -1 */ - while (pc != -1) { - PIPE_ALIGN_VAR(16) - union { - struct tgsi_full_instruction inst; - qword buffer[ROUNDUP16(sizeof(struct tgsi_full_instruction)) / 16]; - } i; - unsigned ea = (unsigned) (mach->Instructions + pc); - - spu_dcache_fetch_unaligned(i.buffer, ea, sizeof(i.inst)); - exec_instruction( mach, & i.inst, &pc ); - } - -#if 0 - /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */ - if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { - /* - * Scale back depth component. - */ - for (i = 0; i < 4; i++) - mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF; - } -#endif - - return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; -} - - diff --git a/src/gallium/drivers/cell/spu/spu_exec.h b/src/gallium/drivers/cell/spu/spu_exec.h deleted file mode 100644 index 68f4479e53..0000000000 --- a/src/gallium/drivers/cell/spu/spu_exec.h +++ /dev/null @@ -1,173 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#if !defined SPU_EXEC_H -#define SPU_EXEC_H - -#include "pipe/p_compiler.h" - -#include "spu_tgsi_exec.h" - -#if defined __cplusplus -extern "C" { -#endif - -/** - * Registers may be treated as float, signed int or unsigned int. - */ -union spu_exec_channel -{ - float f[QUAD_SIZE]; - int i[QUAD_SIZE]; - unsigned u[QUAD_SIZE]; - qword q; -}; - -/** - * A vector[RGBA] of channels[4 pixels] - */ -struct spu_exec_vector -{ - union spu_exec_channel xyzw[NUM_CHANNELS]; -}; - -/** - * For fragment programs, information for computing fragment input - * values from plane equation of the triangle/line. - */ -struct spu_interp_coef -{ - float a0[NUM_CHANNELS]; /* in an xyzw layout */ - float dadx[NUM_CHANNELS]; - float dady[NUM_CHANNELS]; -}; - - -struct softpipe_tile_cache; /**< Opaque to TGSI */ - -/** - * Information for sampling textures, which must be implemented - * by code outside the TGSI executor. - */ -struct spu_sampler -{ - const struct pipe_sampler_state *state; - struct pipe_resource *texture; - /** Get samples for four fragments in a quad */ - void (*get_samples)(struct spu_sampler *sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]); - void *pipe; /*XXX temporary*/ - struct softpipe_tile_cache *cache; -}; - - -/** - * Run-time virtual machine state for executing TGSI shader. - */ -struct spu_exec_machine -{ - /* - * 32 program temporaries - * 4 internal temporaries - * 1 address - */ - PIPE_ALIGN_VAR(16) - struct spu_exec_vector Temps[TGSI_EXEC_NUM_TEMPS - + TGSI_EXEC_NUM_TEMP_EXTRAS + 1]; - - struct spu_exec_vector *Addrs; - - struct spu_sampler *Samplers; - - float Imms[TGSI_EXEC_NUM_IMMEDIATES][4]; - unsigned ImmLimit; - float (*Consts)[4]; - struct spu_exec_vector *Inputs; - struct spu_exec_vector *Outputs; - unsigned Processor; - - /* GEOMETRY processor only. */ - unsigned *Primitives; - - /* FRAGMENT processor only. */ - const struct spu_interp_coef *InterpCoefs; - struct spu_exec_vector QuadPos; - - /* Conditional execution masks */ - uint CondMask; /**< For IF/ELSE/ENDIF */ - uint LoopMask; /**< For BGNLOOP/ENDLOOP */ - uint ContMask; /**< For loop CONT statements */ - uint FuncMask; /**< For function calls */ - uint ExecMask; /**< = CondMask & LoopMask */ - - /** Condition mask stack (for nested conditionals) */ - uint CondStack[TGSI_EXEC_MAX_COND_NESTING]; - int CondStackTop; - - /** Loop mask stack (for nested loops) */ - uint LoopStack[TGSI_EXEC_MAX_LOOP_NESTING]; - int LoopStackTop; - - /** Loop continue mask stack (see comments in tgsi_exec.c) */ - uint ContStack[TGSI_EXEC_MAX_LOOP_NESTING]; - int ContStackTop; - - /** Function execution mask stack (for executing subroutine code) */ - uint FuncStack[TGSI_EXEC_MAX_CALL_NESTING]; - int FuncStackTop; - - /** Function call stack for saving/restoring the program counter */ - uint CallStack[TGSI_EXEC_MAX_CALL_NESTING]; - int CallStackTop; - - struct tgsi_full_instruction *Instructions; - uint NumInstructions; - - struct tgsi_full_declaration *Declarations; - uint NumDeclarations; -}; - - -extern void -spu_exec_machine_init(struct spu_exec_machine *mach, - uint numSamplers, - struct spu_sampler *samplers, - unsigned processor); - -extern uint -spu_exec_machine_run( struct spu_exec_machine *mach ); - - -#if defined __cplusplus -} /* extern "C" */ -#endif - -#endif /* SPU_EXEC_H */ diff --git a/src/gallium/drivers/cell/spu/spu_funcs.c b/src/gallium/drivers/cell/spu/spu_funcs.c deleted file mode 100644 index 98919c43ff..0000000000 --- a/src/gallium/drivers/cell/spu/spu_funcs.c +++ /dev/null @@ -1,173 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -/** - * SPU functions accessed by shaders. - * - * Authors: Brian Paul - */ - - -#include <string.h> -#include <libmisc.h> -#include <math.h> -#include <cos14_v.h> -#include <sin14_v.h> -#include <simdmath/exp2f4.h> -#include <simdmath/log2f4.h> -#include <simdmath/powf4.h> - -#include "cell/common.h" -#include "spu_main.h" -#include "spu_funcs.h" -#include "spu_texture.h" - - -/** For "return"-ing four vectors */ -struct vec_4x4 -{ - vector float v[4]; -}; - - -static vector float -spu_cos(vector float x) -{ - return _cos14_v(x); -} - -static vector float -spu_sin(vector float x) -{ - return _sin14_v(x); -} - -static vector float -spu_pow(vector float x, vector float y) -{ - return _powf4(x, y); -} - -static vector float -spu_exp2(vector float x) -{ - return _exp2f4(x); -} - -static vector float -spu_log2(vector float x) -{ - return _log2f4(x); -} - - -static struct vec_4x4 -spu_tex_2d(vector float s, vector float t, vector float r, vector float q, - unsigned unit) -{ - struct vec_4x4 colors; - (void) r; - (void) q; - spu.sample_texture_2d[unit](s, t, unit, 0, 0, colors.v); - return colors; -} - -static struct vec_4x4 -spu_tex_3d(vector float s, vector float t, vector float r, vector float q, - unsigned unit) -{ - struct vec_4x4 colors; - (void) r; - (void) q; - spu.sample_texture_2d[unit](s, t, unit, 0, 0, colors.v); - return colors; -} - -static struct vec_4x4 -spu_tex_cube(vector float s, vector float t, vector float r, vector float q, - unsigned unit) -{ - struct vec_4x4 colors; - (void) q; - sample_texture_cube(s, t, r, unit, colors.v); - return colors; -} - - -/** - * Add named function to list of "exported" functions that will be - * made available to the PPU-hosted code generator. - */ -static void -export_func(struct cell_spu_function_info *spu_functions, - const char *name, void *addr) -{ - uint n = spu_functions->num; - ASSERT(strlen(name) < 16); - strcpy(spu_functions->names[n], name); - spu_functions->addrs[n] = (uint) addr; - spu_functions->num++; - ASSERT(spu_functions->num <= 16); -} - - -/** - * Return info about the SPU's function to the PPU / main memory. - * The PPU needs to know the address of some SPU-side functions so - * that we can generate shader code with function calls. - */ -void -return_function_info(void) -{ - PIPE_ALIGN_VAR(16) struct cell_spu_function_info funcs; - int tag = TAG_MISC; - - ASSERT(sizeof(funcs) == 256); /* must be multiple of 16 bytes */ - - funcs.num = 0; - export_func(&funcs, "spu_cos", &spu_cos); - export_func(&funcs, "spu_sin", &spu_sin); - export_func(&funcs, "spu_pow", &spu_pow); - export_func(&funcs, "spu_exp2", &spu_exp2); - export_func(&funcs, "spu_log2", &spu_log2); - export_func(&funcs, "spu_tex_2d", &spu_tex_2d); - export_func(&funcs, "spu_tex_3d", &spu_tex_3d); - export_func(&funcs, "spu_tex_cube", &spu_tex_cube); - - /* Send the function info back to the PPU / main memory */ - mfc_put((void *) &funcs, /* src in local store */ - (unsigned int) spu.init.spu_functions, /* dst in main memory */ - sizeof(funcs), /* bytes */ - tag, - 0, /* tid */ - 0 /* rid */); - wait_on_mask(1 << tag); -} - - - diff --git a/src/gallium/drivers/cell/spu/spu_funcs.h b/src/gallium/drivers/cell/spu/spu_funcs.h deleted file mode 100644 index 3adb6ae99f..0000000000 --- a/src/gallium/drivers/cell/spu/spu_funcs.h +++ /dev/null @@ -1,35 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef SPU_FUNCS_H -#define SPU_FUNCS_H - -extern void -return_function_info(void); - -#endif - diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c deleted file mode 100644 index 97c86d194d..0000000000 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ /dev/null @@ -1,117 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -/* main() for Cell SPU code */ - - -#include <stdio.h> -#include <libmisc.h> - -#include "pipe/p_defines.h" - -#include "spu_funcs.h" -#include "spu_command.h" -#include "spu_main.h" -#include "spu_per_fragment_op.h" -#include "spu_texture.h" -//#include "spu_test.h" -#include "cell/common.h" - - -/* -helpful headers: -/usr/lib/gcc/spu/4.1.1/include/spu_mfcio.h -/opt/cell/sdk/usr/include/libmisc.h -*/ - -struct spu_global spu; - - -static void -one_time_init(void) -{ - memset(spu.ctile_status, TILE_STATUS_DEFINED, sizeof(spu.ctile_status)); - memset(spu.ztile_status, TILE_STATUS_DEFINED, sizeof(spu.ztile_status)); - invalidate_tex_cache(); -} - -/* In some versions of the SDK the SPE main takes 'unsigned long' as a - * parameter. In others it takes 'unsigned long long'. Use a define to - * select between the two. - */ -#ifdef SPU_MAIN_PARAM_LONG_LONG -typedef unsigned long long main_param_t; -#else -typedef unsigned long main_param_t; -#endif - -/** - * SPE entrypoint. - */ -int -main(main_param_t speid, main_param_t argp) -{ - int tag = 0; - - (void) speid; - - ASSERT(sizeof(tile_t) == TILE_SIZE * TILE_SIZE * 4); - ASSERT(sizeof(struct cell_command_render) % 8 == 0); - ASSERT(sizeof(struct cell_command_fragment_ops) % 8 == 0); - ASSERT(((unsigned long) &spu.fragment_program_code) % 8 == 0); - - one_time_init(); - spu_command_init(); - - D_PRINTF(CELL_DEBUG_CMD, "main() speid=%lu\n", (unsigned long) speid); - D_PRINTF(CELL_DEBUG_FRAGMENT_OP_FALLBACK, "using fragment op fallback\n"); - - /* get initialization data */ - mfc_get(&spu.init, /* dest */ - (unsigned int) argp, /* src */ - sizeof(struct cell_init_info), /* bytes */ - tag, - 0, /* tid */ - 0 /* rid */); - wait_on_mask( 1 << tag ); - - if (spu.init.id == 0) { - return_function_info(); - } - -#if 0 - if (spu.init.id==0) - spu_test_misc(spu.init.id); -#endif - - command_loop(); - - spu_command_close(); - - return 0; -} diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h deleted file mode 100644 index a9d72f84d5..0000000000 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ /dev/null @@ -1,269 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef SPU_MAIN_H -#define SPU_MAIN_H - - -#include <spu_mfcio.h> - -#include "cell/common.h" -#include "draw/draw_vertex.h" -#include "pipe/p_state.h" - - -#if DEBUG -/* These debug macros use the unusual construction ", ##__VA_ARGS__" - * which expands to the expected comma + args if variadic arguments - * are supplied, but swallows the comma if there are no variadic - * arguments (which avoids syntax errors that would otherwise occur). - */ -#define D_PRINTF(flag, format,...) \ - if (spu.init.debug_flags & (flag)) \ - printf("SPU %u: " format, spu.init.id, ##__VA_ARGS__) -#else -#define D_PRINTF(...) -#endif - - -/** - * A tile is basically a TILE_SIZE x TILE_SIZE block of 4-byte pixels. - * The data may be addressed through several different types. - */ -typedef union { - ushort us[TILE_SIZE][TILE_SIZE]; - uint ui[TILE_SIZE][TILE_SIZE]; - vector unsigned short us8[TILE_SIZE/2][TILE_SIZE/4]; - vector unsigned int ui4[TILE_SIZE/2][TILE_SIZE/2]; -} tile_t; - - -#define TILE_STATUS_CLEAR 1 -#define TILE_STATUS_DEFINED 2 /**< defined in FB, but not in local store */ -#define TILE_STATUS_CLEAN 3 /**< in local store, but not changed */ -#define TILE_STATUS_DIRTY 4 /**< modified locally, but not put back yet */ -#define TILE_STATUS_GETTING 5 /**< mfc_get() called but not yet arrived */ - - -/** Function for sampling textures */ -typedef void (*spu_sample_texture_2d_func)(vector float s, - vector float t, - uint unit, uint level, uint face, - vector float colors[4]); - - -/** Function for performing per-fragment ops */ -typedef void (*spu_fragment_ops_func)(uint x, uint y, - tile_t *colorTile, - tile_t *depthStencilTile, - vector float fragZ, - vector float fragRed, - vector float fragGreen, - vector float fragBlue, - vector float fragAlpha, - vector unsigned int mask); - -/** Function for running fragment program */ -typedef vector unsigned int (*spu_fragment_program_func)(vector float *inputs, - vector float *outputs, - vector float *constants); - - -PIPE_ALIGN_TYPE(16, -struct spu_framebuffer -{ - void *color_start; /**< addr of color surface in main memory */ - void *depth_start; /**< addr of depth surface in main memory */ - enum pipe_format color_format; - enum pipe_format depth_format; - uint width; /**< width in pixels */ - uint height; /**< height in pixels */ - uint width_tiles; /**< width in tiles */ - uint height_tiles; /**< width in tiles */ - - uint color_clear_value; - uint depth_clear_value; - - uint zsize; /**< 0, 2 or 4 bytes per Z */ - float zscale; /**< 65535.0, 2^24-1 or 2^32-1 */ -}); - - -/** per-texture level info */ -PIPE_ALIGN_TYPE(16, -struct spu_texture_level -{ - void *start; - ushort width; - ushort height; - ushort depth; - ushort tiles_per_row; - uint bytes_per_image; - /** texcoord scale factors */ - vector float scale_s; - vector float scale_t; - vector float scale_r; - /** texcoord masks (if REPEAT then size-1, else ~0) */ - vector signed int mask_s; - vector signed int mask_t; - vector signed int mask_r; - /** texcoord clamp limits */ - vector signed int max_s; - vector signed int max_t; - vector signed int max_r; -}); - - -PIPE_ALIGN_TYPE(16, -struct spu_texture -{ - struct spu_texture_level level[CELL_MAX_TEXTURE_LEVELS]; - uint max_level; - uint target; /**< PIPE_TEXTURE_x */ -}); - - -/** - * All SPU global/context state will be in a singleton object of this type: - */ -PIPE_ALIGN_TYPE(16, -struct spu_global -{ - /** One-time init/constant info */ - struct cell_init_info init; - - /* - * Current state - */ - struct spu_framebuffer fb; - struct pipe_depth_stencil_alpha_state depth_stencil_alpha; - struct pipe_blend_state blend; - struct pipe_blend_color blend_color; - struct pipe_sampler_state sampler[PIPE_MAX_SAMPLERS]; - struct pipe_rasterizer_state rasterizer; - struct spu_texture texture[PIPE_MAX_SAMPLERS]; - struct vertex_info vertex_info; - - /** Current color and Z tiles */ - PIPE_ALIGN_VAR(16) tile_t ctile; - PIPE_ALIGN_VAR(16) tile_t ztile; - - /** Read depth/stencil tiles? */ - boolean read_depth_stencil; - - /** Current tiles' status */ - ubyte cur_ctile_status; - ubyte cur_ztile_status; - - /** Status of all tiles in framebuffer */ - PIPE_ALIGN_VAR(16) ubyte ctile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE]; - PIPE_ALIGN_VAR(16) ubyte ztile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE]; - - /** Current fragment ops machine code, at 8-byte boundary */ - uint *fragment_ops_code; - uint fragment_ops_code_size; - /** Current fragment ops functions, 0 = frontfacing, 1 = backfacing */ - spu_fragment_ops_func fragment_ops[2]; - - /** Current fragment program machine code, at 8-byte boundary */ - PIPE_ALIGN_VAR(8) uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS]; - /** Current fragment ops function */ - spu_fragment_program_func fragment_program; - - /** Current texture sampler function */ - spu_sample_texture_2d_func sample_texture_2d[CELL_MAX_SAMPLERS]; - spu_sample_texture_2d_func min_sample_texture_2d[CELL_MAX_SAMPLERS]; - spu_sample_texture_2d_func mag_sample_texture_2d[CELL_MAX_SAMPLERS]; - - /** Fragment program constants */ - vector float constants[4 * CELL_MAX_CONSTANTS]; - -}); - - -extern struct spu_global spu; - - - -/* DMA TAGS */ - -#define TAG_SURFACE_CLEAR 10 -#define TAG_VERTEX_BUFFER 11 -#define TAG_READ_TILE_COLOR 12 -#define TAG_READ_TILE_Z 13 -#define TAG_WRITE_TILE_COLOR 14 -#define TAG_WRITE_TILE_Z 15 -#define TAG_INDEX_BUFFER 16 -#define TAG_BATCH_BUFFER 17 -#define TAG_MISC 18 -#define TAG_DCACHE0 20 -#define TAG_DCACHE1 21 -#define TAG_DCACHE2 22 -#define TAG_DCACHE3 23 -#define TAG_FENCE 24 - - -static INLINE void -wait_on_mask(unsigned tagMask) -{ - mfc_write_tag_mask( tagMask ); - /* wait for completion of _any_ DMAs specified by tagMask */ - mfc_read_tag_status_any(); -} - - -static INLINE void -wait_on_mask_all(unsigned tagMask) -{ - mfc_write_tag_mask( tagMask ); - /* wait for completion of _any_ DMAs specified by tagMask */ - mfc_read_tag_status_all(); -} - - - - - -static INLINE void -memset16(ushort *d, ushort value, uint count) -{ - uint i; - for (i = 0; i < count; i++) - d[i] = value; -} - - -static INLINE void -memset32(uint *d, uint value, uint count) -{ - uint i; - for (i = 0; i < count; i++) - d[i] = value; -} - - -#endif /* SPU_MAIN_H */ diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c deleted file mode 100644 index 2415226a24..0000000000 --- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c +++ /dev/null @@ -1,631 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * \author Brian Paul - */ - - -#include <transpose_matrix4x4.h> -#include "pipe/p_format.h" -#include "spu_main.h" -#include "spu_colorpack.h" -#include "spu_per_fragment_op.h" - - -#define LINEAR_QUAD_LAYOUT 1 - - -static INLINE vector float -spu_min(vector float a, vector float b) -{ - vector unsigned int m; - m = spu_cmpgt(a, b); /* m = a > b ? ~0 : 0 */ - return spu_sel(a, b, m); -} - - -static INLINE vector float -spu_max(vector float a, vector float b) -{ - vector unsigned int m; - m = spu_cmpgt(a, b); /* m = a > b ? ~0 : 0 */ - return spu_sel(b, a, m); -} - - -/** - * Called by rasterizer for each quad after the shader has run. Do - * all the per-fragment operations including alpha test, z test, - * stencil test, blend, colormask and logicops. This is a - * fallback/debug function. In reality we'll use a generated function - * produced by the PPU. But this function is useful for - * debug/validation. - */ -void -spu_fallback_fragment_ops(uint x, uint y, - tile_t *colorTile, - tile_t *depthStencilTile, - vector float fragZ, - vector float fragR, - vector float fragG, - vector float fragB, - vector float fragA, - vector unsigned int mask) -{ - vector float frag_aos[4]; - unsigned int fbc0, fbc1, fbc2, fbc3 ; /* framebuffer/tile colors */ - unsigned int fragc0, fragc1, fragc2, fragc3; /* fragment colors */ - - /* - * Do alpha test - */ - if (spu.depth_stencil_alpha.alpha.enabled) { - vector float ref = spu_splats(spu.depth_stencil_alpha.alpha.ref_value); - vector unsigned int amask; - - switch (spu.depth_stencil_alpha.alpha.func) { - case PIPE_FUNC_LESS: - amask = spu_cmpgt(ref, fragA); /* mask = (fragA < ref) */ - break; - case PIPE_FUNC_GREATER: - amask = spu_cmpgt(fragA, ref); /* mask = (fragA > ref) */ - break; - case PIPE_FUNC_GEQUAL: - amask = spu_cmpgt(ref, fragA); - amask = spu_nor(amask, amask); - break; - case PIPE_FUNC_LEQUAL: - amask = spu_cmpgt(fragA, ref); - amask = spu_nor(amask, amask); - break; - case PIPE_FUNC_EQUAL: - amask = spu_cmpeq(ref, fragA); - break; - case PIPE_FUNC_NOTEQUAL: - amask = spu_cmpeq(ref, fragA); - amask = spu_nor(amask, amask); - break; - case PIPE_FUNC_ALWAYS: - amask = spu_splats(0xffffffffU); - break; - case PIPE_FUNC_NEVER: - amask = spu_splats( 0x0U); - break; - default: - ; - } - - mask = spu_and(mask, amask); - } - - - /* - * Z and/or stencil testing... - */ - if (spu.depth_stencil_alpha.depth.enabled || - spu.depth_stencil_alpha.stencil[0].enabled) { - - /* get four Z/Stencil values from tile */ - vector unsigned int mask24 = spu_splats((unsigned int)0x00ffffffU); - vector unsigned int ifbZS = depthStencilTile->ui4[y/2][x/2]; - vector unsigned int ifbZ = spu_and(ifbZS, mask24); - vector unsigned int ifbS = spu_andc(ifbZS, mask24); - - if (spu.depth_stencil_alpha.stencil[0].enabled) { - /* do stencil test */ - ASSERT(spu.fb.depth_format == PIPE_FORMAT_Z24_UNORM_S8_UINT); - - } - else if (spu.depth_stencil_alpha.depth.enabled) { - /* do depth test */ - - ASSERT(spu.fb.depth_format == PIPE_FORMAT_Z24_UNORM_S8_UINT || - spu.fb.depth_format == PIPE_FORMAT_Z24X8_UNORM); - - vector unsigned int ifragZ; - vector unsigned int zmask; - - /* convert four fragZ from float to uint */ - fragZ = spu_mul(fragZ, spu_splats((float) 0xffffff)); - ifragZ = spu_convtu(fragZ, 0); - - /* do depth comparison, setting zmask with results */ - switch (spu.depth_stencil_alpha.depth.func) { - case PIPE_FUNC_LESS: - zmask = spu_cmpgt(ifbZ, ifragZ); /* mask = (ifragZ < ifbZ) */ - break; - case PIPE_FUNC_GREATER: - zmask = spu_cmpgt(ifragZ, ifbZ); /* mask = (ifbZ > ifragZ) */ - break; - case PIPE_FUNC_GEQUAL: - zmask = spu_cmpgt(ifbZ, ifragZ); - zmask = spu_nor(zmask, zmask); - break; - case PIPE_FUNC_LEQUAL: - zmask = spu_cmpgt(ifragZ, ifbZ); - zmask = spu_nor(zmask, zmask); - break; - case PIPE_FUNC_EQUAL: - zmask = spu_cmpeq(ifbZ, ifragZ); - break; - case PIPE_FUNC_NOTEQUAL: - zmask = spu_cmpeq(ifbZ, ifragZ); - zmask = spu_nor(zmask, zmask); - break; - case PIPE_FUNC_ALWAYS: - zmask = spu_splats(0xffffffffU); - break; - case PIPE_FUNC_NEVER: - zmask = spu_splats( 0x0U); - break; - default: - ; - } - - mask = spu_and(mask, zmask); - - /* merge framebuffer Z and fragment Z according to the mask */ - ifbZ = spu_or(spu_and(ifragZ, mask), - spu_andc(ifbZ, mask)); - } - - if (spu_extract(spu_orx(mask), 0)) { - /* put new fragment Z/Stencil values back into Z/Stencil tile */ - depthStencilTile->ui4[y/2][x/2] = spu_or(ifbZ, ifbS); - - spu.cur_ztile_status = TILE_STATUS_DIRTY; - } - } - - - /* - * If we'll need the current framebuffer/tile colors for blending - * or logicop or colormask, fetch them now. - */ - if (spu.blend.rt[0].blend_enable || - spu.blend.logicop_enable || - spu.blend.rt[0].colormask != 0xf) { - -#if LINEAR_QUAD_LAYOUT /* See comments/diagram below */ - fbc0 = colorTile->ui[y][x*2+0]; - fbc1 = colorTile->ui[y][x*2+1]; - fbc2 = colorTile->ui[y][x*2+2]; - fbc3 = colorTile->ui[y][x*2+3]; -#else - fbc0 = colorTile->ui[y+0][x+0]; - fbc1 = colorTile->ui[y+0][x+1]; - fbc2 = colorTile->ui[y+1][x+0]; - fbc3 = colorTile->ui[y+1][x+1]; -#endif - } - - - /* - * Do blending - */ - if (spu.blend.rt[0].blend_enable) { - /* blending terms, misc regs */ - vector float term1r, term1g, term1b, term1a; - vector float term2r, term2g, term2b, term2a; - vector float one, tmp; - - vector float fbRGBA[4]; /* current framebuffer colors */ - - /* convert framebuffer colors from packed int to vector float */ - { - vector float temp[4]; /* float colors in AOS form */ - switch (spu.fb.color_format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - temp[0] = spu_unpack_B8G8R8A8(fbc0); - temp[1] = spu_unpack_B8G8R8A8(fbc1); - temp[2] = spu_unpack_B8G8R8A8(fbc2); - temp[3] = spu_unpack_B8G8R8A8(fbc3); - break; - case PIPE_FORMAT_B8G8R8A8_UNORM: - temp[0] = spu_unpack_A8R8G8B8(fbc0); - temp[1] = spu_unpack_A8R8G8B8(fbc1); - temp[2] = spu_unpack_A8R8G8B8(fbc2); - temp[3] = spu_unpack_A8R8G8B8(fbc3); - break; - default: - ASSERT(0); - } - _transpose_matrix4x4(fbRGBA, temp); /* fbRGBA = transpose(temp) */ - } - - /* - * Compute Src RGB terms (fragment color * factor) - */ - switch (spu.blend.rt[0].rgb_src_factor) { - case PIPE_BLENDFACTOR_ONE: - term1r = fragR; - term1g = fragG; - term1b = fragB; - break; - case PIPE_BLENDFACTOR_ZERO: - term1r = - term1g = - term1b = spu_splats(0.0f); - break; - case PIPE_BLENDFACTOR_SRC_COLOR: - term1r = spu_mul(fragR, fragR); - term1g = spu_mul(fragG, fragG); - term1b = spu_mul(fragB, fragB); - break; - case PIPE_BLENDFACTOR_SRC_ALPHA: - term1r = spu_mul(fragR, fragA); - term1g = spu_mul(fragG, fragA); - term1b = spu_mul(fragB, fragA); - break; - case PIPE_BLENDFACTOR_DST_COLOR: - term1r = spu_mul(fragR, fbRGBA[0]); - term1g = spu_mul(fragG, fbRGBA[1]); - term1b = spu_mul(fragB, fbRGBA[1]); - break; - case PIPE_BLENDFACTOR_DST_ALPHA: - term1r = spu_mul(fragR, fbRGBA[3]); - term1g = spu_mul(fragG, fbRGBA[3]); - term1b = spu_mul(fragB, fbRGBA[3]); - break; - case PIPE_BLENDFACTOR_CONST_COLOR: - term1r = spu_mul(fragR, spu_splats(spu.blend_color.color[0])); - term1g = spu_mul(fragG, spu_splats(spu.blend_color.color[1])); - term1b = spu_mul(fragB, spu_splats(spu.blend_color.color[2])); - break; - case PIPE_BLENDFACTOR_CONST_ALPHA: - term1r = spu_mul(fragR, spu_splats(spu.blend_color.color[3])); - term1g = spu_mul(fragG, spu_splats(spu.blend_color.color[3])); - term1b = spu_mul(fragB, spu_splats(spu.blend_color.color[3])); - break; - /* XXX more cases */ - default: - ASSERT(0); - } - - /* - * Compute Src Alpha term (fragment alpha * factor) - */ - switch (spu.blend.rt[0].alpha_src_factor) { - case PIPE_BLENDFACTOR_ONE: - term1a = fragA; - break; - case PIPE_BLENDFACTOR_SRC_COLOR: - term1a = spu_splats(0.0f); - break; - case PIPE_BLENDFACTOR_SRC_ALPHA: - term1a = spu_mul(fragA, fragA); - break; - case PIPE_BLENDFACTOR_DST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_DST_ALPHA: - term1a = spu_mul(fragA, fbRGBA[3]); - break; - case PIPE_BLENDFACTOR_CONST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_CONST_ALPHA: - term1a = spu_mul(fragR, spu_splats(spu.blend_color.color[3])); - break; - /* XXX more cases */ - default: - ASSERT(0); - } - - /* - * Compute Dest RGB terms (framebuffer color * factor) - */ - switch (spu.blend.rt[0].rgb_dst_factor) { - case PIPE_BLENDFACTOR_ONE: - term2r = fbRGBA[0]; - term2g = fbRGBA[1]; - term2b = fbRGBA[2]; - break; - case PIPE_BLENDFACTOR_ZERO: - term2r = - term2g = - term2b = spu_splats(0.0f); - break; - case PIPE_BLENDFACTOR_SRC_COLOR: - term2r = spu_mul(fbRGBA[0], fragR); - term2g = spu_mul(fbRGBA[1], fragG); - term2b = spu_mul(fbRGBA[2], fragB); - break; - case PIPE_BLENDFACTOR_SRC_ALPHA: - term2r = spu_mul(fbRGBA[0], fragA); - term2g = spu_mul(fbRGBA[1], fragA); - term2b = spu_mul(fbRGBA[2], fragA); - break; - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - one = spu_splats(1.0f); - tmp = spu_sub(one, fragA); - term2r = spu_mul(fbRGBA[0], tmp); - term2g = spu_mul(fbRGBA[1], tmp); - term2b = spu_mul(fbRGBA[2], tmp); - break; - case PIPE_BLENDFACTOR_DST_COLOR: - term2r = spu_mul(fbRGBA[0], fbRGBA[0]); - term2g = spu_mul(fbRGBA[1], fbRGBA[1]); - term2b = spu_mul(fbRGBA[2], fbRGBA[2]); - break; - case PIPE_BLENDFACTOR_DST_ALPHA: - term2r = spu_mul(fbRGBA[0], fbRGBA[3]); - term2g = spu_mul(fbRGBA[1], fbRGBA[3]); - term2b = spu_mul(fbRGBA[2], fbRGBA[3]); - break; - case PIPE_BLENDFACTOR_CONST_COLOR: - term2r = spu_mul(fbRGBA[0], spu_splats(spu.blend_color.color[0])); - term2g = spu_mul(fbRGBA[1], spu_splats(spu.blend_color.color[1])); - term2b = spu_mul(fbRGBA[2], spu_splats(spu.blend_color.color[2])); - break; - case PIPE_BLENDFACTOR_CONST_ALPHA: - term2r = spu_mul(fbRGBA[0], spu_splats(spu.blend_color.color[3])); - term2g = spu_mul(fbRGBA[1], spu_splats(spu.blend_color.color[3])); - term2b = spu_mul(fbRGBA[2], spu_splats(spu.blend_color.color[3])); - break; - /* XXX more cases */ - default: - ASSERT(0); - } - - /* - * Compute Dest Alpha term (framebuffer alpha * factor) - */ - switch (spu.blend.rt[0].alpha_dst_factor) { - case PIPE_BLENDFACTOR_ONE: - term2a = fbRGBA[3]; - break; - case PIPE_BLENDFACTOR_SRC_COLOR: - term2a = spu_splats(0.0f); - break; - case PIPE_BLENDFACTOR_SRC_ALPHA: - term2a = spu_mul(fbRGBA[3], fragA); - break; - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - one = spu_splats(1.0f); - tmp = spu_sub(one, fragA); - term2a = spu_mul(fbRGBA[3], tmp); - break; - case PIPE_BLENDFACTOR_DST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_DST_ALPHA: - term2a = spu_mul(fbRGBA[3], fbRGBA[3]); - break; - case PIPE_BLENDFACTOR_CONST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_CONST_ALPHA: - term2a = spu_mul(fbRGBA[3], spu_splats(spu.blend_color.color[3])); - break; - /* XXX more cases */ - default: - ASSERT(0); - } - - /* - * Combine Src/Dest RGB terms - */ - switch (spu.blend.rt[0].rgb_func) { - case PIPE_BLEND_ADD: - fragR = spu_add(term1r, term2r); - fragG = spu_add(term1g, term2g); - fragB = spu_add(term1b, term2b); - break; - case PIPE_BLEND_SUBTRACT: - fragR = spu_sub(term1r, term2r); - fragG = spu_sub(term1g, term2g); - fragB = spu_sub(term1b, term2b); - break; - case PIPE_BLEND_REVERSE_SUBTRACT: - fragR = spu_sub(term2r, term1r); - fragG = spu_sub(term2g, term1g); - fragB = spu_sub(term2b, term1b); - break; - case PIPE_BLEND_MIN: - fragR = spu_min(term1r, term2r); - fragG = spu_min(term1g, term2g); - fragB = spu_min(term1b, term2b); - break; - case PIPE_BLEND_MAX: - fragR = spu_max(term1r, term2r); - fragG = spu_max(term1g, term2g); - fragB = spu_max(term1b, term2b); - break; - default: - ASSERT(0); - } - - /* - * Combine Src/Dest A term - */ - switch (spu.blend.rt[0].alpha_func) { - case PIPE_BLEND_ADD: - fragA = spu_add(term1a, term2a); - break; - case PIPE_BLEND_SUBTRACT: - fragA = spu_sub(term1a, term2a); - break; - case PIPE_BLEND_REVERSE_SUBTRACT: - fragA = spu_sub(term2a, term1a); - break; - case PIPE_BLEND_MIN: - fragA = spu_min(term1a, term2a); - break; - case PIPE_BLEND_MAX: - fragA = spu_max(term1a, term2a); - break; - default: - ASSERT(0); - } - } - - - /* - * Convert RRRR,GGGG,BBBB,AAAA to RGBA,RGBA,RGBA,RGBA. - */ -#if 0 - /* original code */ - { - vector float frag_soa[4]; - frag_soa[0] = fragR; - frag_soa[1] = fragG; - frag_soa[2] = fragB; - frag_soa[3] = fragA; - _transpose_matrix4x4(frag_aos, frag_soa); - } -#else - /* short-cut relying on function parameter layout: */ - _transpose_matrix4x4(frag_aos, &fragR); - (void) fragG; - (void) fragB; -#endif - - /* - * Pack fragment float colors into 32-bit RGBA words. - */ - switch (spu.fb.color_format) { - case PIPE_FORMAT_B8G8R8A8_UNORM: - fragc0 = spu_pack_A8R8G8B8(frag_aos[0]); - fragc1 = spu_pack_A8R8G8B8(frag_aos[1]); - fragc2 = spu_pack_A8R8G8B8(frag_aos[2]); - fragc3 = spu_pack_A8R8G8B8(frag_aos[3]); - break; - case PIPE_FORMAT_A8R8G8B8_UNORM: - fragc0 = spu_pack_B8G8R8A8(frag_aos[0]); - fragc1 = spu_pack_B8G8R8A8(frag_aos[1]); - fragc2 = spu_pack_B8G8R8A8(frag_aos[2]); - fragc3 = spu_pack_B8G8R8A8(frag_aos[3]); - break; - default: - fprintf(stderr, "SPU: Bad pixel format in spu_default_fragment_ops\n"); - ASSERT(0); - } - - - /* - * Do color masking - */ - if (spu.blend.rt[0].colormask != 0xf) { - uint cmask = 0x0; /* each byte corresponds to a color channel */ - - /* Form bitmask depending on color buffer format and colormask bits */ - switch (spu.fb.color_format) { - case PIPE_FORMAT_B8G8R8A8_UNORM: - if (spu.blend.rt[0].colormask & PIPE_MASK_R) - cmask |= 0x00ff0000; /* red */ - if (spu.blend.rt[0].colormask & PIPE_MASK_G) - cmask |= 0x0000ff00; /* green */ - if (spu.blend.rt[0].colormask & PIPE_MASK_B) - cmask |= 0x000000ff; /* blue */ - if (spu.blend.rt[0].colormask & PIPE_MASK_A) - cmask |= 0xff000000; /* alpha */ - break; - case PIPE_FORMAT_A8R8G8B8_UNORM: - if (spu.blend.rt[0].colormask & PIPE_MASK_R) - cmask |= 0x0000ff00; /* red */ - if (spu.blend.rt[0].colormask & PIPE_MASK_G) - cmask |= 0x00ff0000; /* green */ - if (spu.blend.rt[0].colormask & PIPE_MASK_B) - cmask |= 0xff000000; /* blue */ - if (spu.blend.rt[0].colormask & PIPE_MASK_A) - cmask |= 0x000000ff; /* alpha */ - break; - default: - ASSERT(0); - } - - /* - * Apply color mask to the 32-bit packed colors. - * if (cmask[i]) - * frag color[i] = frag color[i]; - * else - * frag color[i] = framebuffer color[i]; - */ - fragc0 = (fragc0 & cmask) | (fbc0 & ~cmask); - fragc1 = (fragc1 & cmask) | (fbc1 & ~cmask); - fragc2 = (fragc2 & cmask) | (fbc2 & ~cmask); - fragc3 = (fragc3 & cmask) | (fbc3 & ~cmask); - } - - - /* - * Do logic ops - */ - if (spu.blend.logicop_enable) { - /* XXX to do */ - /* apply logicop to 32-bit packed colors (fragcx and fbcx) */ - } - - - /* - * If mask is non-zero, mark tile as dirty. - */ - if (spu_extract(spu_orx(mask), 0)) { - spu.cur_ctile_status = TILE_STATUS_DIRTY; - } - else { - /* write no fragments */ - return; - } - - - /* - * Write new fragment/quad colors to the framebuffer/tile. - * Only write pixels where the corresponding mask word is set. - */ -#if LINEAR_QUAD_LAYOUT - /* - * Quad layout: - * +--+--+--+--+ - * |p0|p1|p2|p3|... - * +--+--+--+--+ - */ - if (spu_extract(mask, 0)) - colorTile->ui[y][x*2] = fragc0; - if (spu_extract(mask, 1)) - colorTile->ui[y][x*2+1] = fragc1; - if (spu_extract(mask, 2)) - colorTile->ui[y][x*2+2] = fragc2; - if (spu_extract(mask, 3)) - colorTile->ui[y][x*2+3] = fragc3; -#else - /* - * Quad layout: - * +--+--+ - * |p0|p1|... - * +--+--+ - * |p2|p3|... - * +--+--+ - */ - if (spu_extract(mask, 0)) - colorTile->ui[y+0][x+0] = fragc0; - if (spu_extract(mask, 1)) - colorTile->ui[y+0][x+1] = fragc1; - if (spu_extract(mask, 2)) - colorTile->ui[y+1][x+0] = fragc2; - if (spu_extract(mask, 3)) - colorTile->ui[y+1][x+1] = fragc3; -#endif -} diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.h b/src/gallium/drivers/cell/spu/spu_per_fragment_op.h deleted file mode 100644 index f817abf046..0000000000 --- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.h +++ /dev/null @@ -1,44 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef SPU_PER_FRAGMENT_OP -#define SPU_PER_FRAGMENT_OP - - -extern void -spu_fallback_fragment_ops(uint x, uint y, - tile_t *colorTile, - tile_t *depthStencilTile, - vector float fragZ, - vector float fragRed, - vector float fragGreen, - vector float fragBlue, - vector float fragAlpha, - vector unsigned int mask); - - -#endif /* SPU_PER_FRAGMENT_OP */ diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c deleted file mode 100644 index 14987e3c3a..0000000000 --- a/src/gallium/drivers/cell/spu/spu_render.c +++ /dev/null @@ -1,356 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include <stdio.h> -#include <libmisc.h> -#include <spu_mfcio.h> - -#include "spu_main.h" -#include "spu_render.h" -#include "spu_shuffle.h" -#include "spu_tri.h" -#include "spu_tile.h" -#include "cell/common.h" -#include "util/u_memory.h" - - -/** - * Given a rendering command's bounding box (in pixels) compute the - * location of the corresponding screen tile bounding box. - */ -static INLINE void -tile_bounding_box(const struct cell_command_render *render, - uint *txmin, uint *tymin, - uint *box_num_tiles, uint *box_width_tiles) -{ -#if 0 - /* Debug: full-window bounding box */ - uint txmax = spu.fb.width_tiles - 1; - uint tymax = spu.fb.height_tiles - 1; - *txmin = 0; - *tymin = 0; - *box_num_tiles = spu.fb.width_tiles * spu.fb.height_tiles; - *box_width_tiles = spu.fb.width_tiles; - (void) render; - (void) txmax; - (void) tymax; -#else - uint txmax, tymax, box_height_tiles; - - *txmin = (uint) render->xmin / TILE_SIZE; - *tymin = (uint) render->ymin / TILE_SIZE; - txmax = (uint) render->xmax / TILE_SIZE; - tymax = (uint) render->ymax / TILE_SIZE; - if (txmax >= spu.fb.width_tiles) - txmax = spu.fb.width_tiles-1; - if (tymax >= spu.fb.height_tiles) - tymax = spu.fb.height_tiles-1; - *box_width_tiles = txmax - *txmin + 1; - box_height_tiles = tymax - *tymin + 1; - *box_num_tiles = *box_width_tiles * box_height_tiles; -#endif -#if 0 - printf("SPU %u: bounds: %g, %g ... %g, %g\n", spu.init.id, - render->xmin, render->ymin, render->xmax, render->ymax); - printf("SPU %u: tiles: %u, %u .. %u, %u\n", - spu.init.id, *txmin, *tymin, txmax, tymax); - ASSERT(render->xmin <= render->xmax); - ASSERT(render->ymin <= render->ymax); -#endif -} - - -/** Check if the tile at (tx,ty) belongs to this SPU */ -static INLINE boolean -my_tile(uint tx, uint ty) -{ - return (spu.fb.width_tiles * ty + tx) % spu.init.num_spus == spu.init.id; -} - - -/** - * Start fetching non-clear color/Z tiles from main memory - */ -static INLINE void -get_cz_tiles(uint tx, uint ty) -{ - if (spu.read_depth_stencil) { - if (spu.cur_ztile_status != TILE_STATUS_CLEAR) { - //printf("SPU %u: getting Z tile %u, %u\n", spu.init.id, tx, ty); - get_tile(tx, ty, &spu.ztile, TAG_READ_TILE_Z, 1); - spu.cur_ztile_status = TILE_STATUS_GETTING; - } - } - - if (spu.cur_ctile_status != TILE_STATUS_CLEAR) { - //printf("SPU %u: getting C tile %u, %u\n", spu.init.id, tx, ty); - get_tile(tx, ty, &spu.ctile, TAG_READ_TILE_COLOR, 0); - spu.cur_ctile_status = TILE_STATUS_GETTING; - } -} - - -/** - * Start putting dirty color/Z tiles back to main memory - */ -static INLINE void -put_cz_tiles(uint tx, uint ty) -{ - if (spu.cur_ztile_status == TILE_STATUS_DIRTY) { - /* tile was modified and needs to be written back */ - //printf("SPU %u: put dirty Z tile %u, %u\n", spu.init.id, tx, ty); - put_tile(tx, ty, &spu.ztile, TAG_WRITE_TILE_Z, 1); - spu.cur_ztile_status = TILE_STATUS_DEFINED; - } - else if (spu.cur_ztile_status == TILE_STATUS_GETTING) { - /* tile was never used */ - spu.cur_ztile_status = TILE_STATUS_DEFINED; - //printf("SPU %u: put getting Z tile %u, %u\n", spu.init.id, tx, ty); - } - - if (spu.cur_ctile_status == TILE_STATUS_DIRTY) { - /* tile was modified and needs to be written back */ - //printf("SPU %u: put dirty C tile %u, %u\n", spu.init.id, tx, ty); - put_tile(tx, ty, &spu.ctile, TAG_WRITE_TILE_COLOR, 0); - spu.cur_ctile_status = TILE_STATUS_DEFINED; - } - else if (spu.cur_ctile_status == TILE_STATUS_GETTING) { - /* tile was never used */ - spu.cur_ctile_status = TILE_STATUS_DEFINED; - //printf("SPU %u: put getting C tile %u, %u\n", spu.init.id, tx, ty); - } -} - - -/** - * Wait for 'put' of color/z tiles to complete. - */ -static INLINE void -wait_put_cz_tiles(void) -{ - wait_on_mask(1 << TAG_WRITE_TILE_COLOR); - if (spu.read_depth_stencil) { - wait_on_mask(1 << TAG_WRITE_TILE_Z); - } -} - - -/** - * Render primitives - * \param pos_incr returns value indicating how may words to skip after - * this command in the batch buffer - */ -void -cmd_render(const struct cell_command_render *render, uint *pos_incr) -{ - /* we'll DMA into these buffers */ - PIPE_ALIGN_VAR(16) ubyte vertex_data[CELL_BUFFER_SIZE]; - const uint vertex_size = render->vertex_size; /* in bytes */ - /*const*/ uint total_vertex_bytes = render->num_verts * vertex_size; - uint index_bytes; - const ubyte *vertices; - const ushort *indexes; - uint i, j; - uint num_tiles; - - D_PRINTF(CELL_DEBUG_CMD, - "RENDER prim=%u num_vert=%u num_ind=%u inline_vert=%u\n", - render->prim_type, - render->num_verts, - render->num_indexes, - render->inline_verts); - - ASSERT(sizeof(*render) % 4 == 0); - ASSERT(total_vertex_bytes % 16 == 0); - ASSERT(render->prim_type == PIPE_PRIM_TRIANGLES); - ASSERT(render->num_indexes % 3 == 0); - - - /* indexes are right after the render command in the batch buffer */ - indexes = (const ushort *) (render + 1); - index_bytes = ROUNDUP8(render->num_indexes * 2); - *pos_incr = index_bytes / 8 + sizeof(*render) / 8; - - - if (render->inline_verts) { - /* Vertices are after indexes in batch buffer at next 16-byte addr */ - vertices = (const ubyte *) render + (*pos_incr * 8); - vertices = (const ubyte *) align_pointer((void *) vertices, 16); - ASSERT_ALIGN16(vertices); - *pos_incr = ((vertices + total_vertex_bytes) - (ubyte *) render) / 8; - } - else { - /* Begin DMA fetch of vertex buffer */ - ubyte *src = spu.init.buffers[render->vertex_buf]; - ubyte *dest = vertex_data; - - /* skip vertex data we won't use */ -#if 01 - src += render->min_index * vertex_size; - dest += render->min_index * vertex_size; - total_vertex_bytes -= render->min_index * vertex_size; -#endif - ASSERT(total_vertex_bytes % 16 == 0); - ASSERT_ALIGN16(dest); - ASSERT_ALIGN16(src); - - mfc_get(dest, /* in vertex_data[] array */ - (unsigned int) src, /* src in main memory */ - total_vertex_bytes, /* size */ - TAG_VERTEX_BUFFER, - 0, /* tid */ - 0 /* rid */); - - vertices = vertex_data; - - wait_on_mask(1 << TAG_VERTEX_BUFFER); - } - - - /** - ** find tiles which intersect the prim bounding box - **/ - uint txmin, tymin, box_width_tiles, box_num_tiles; - tile_bounding_box(render, &txmin, &tymin, - &box_num_tiles, &box_width_tiles); - - - /* make sure any pending clears have completed */ - wait_on_mask(1 << TAG_SURFACE_CLEAR); /* XXX temporary */ - - - num_tiles = 0; - - /** - ** loop over tiles, rendering tris - **/ - for (i = 0; i < box_num_tiles; i++) { - const uint tx = txmin + i % box_width_tiles; - const uint ty = tymin + i / box_width_tiles; - - ASSERT(tx < spu.fb.width_tiles); - ASSERT(ty < spu.fb.height_tiles); - - if (!my_tile(tx, ty)) - continue; - - num_tiles++; - - spu.cur_ctile_status = spu.ctile_status[ty][tx]; - spu.cur_ztile_status = spu.ztile_status[ty][tx]; - - get_cz_tiles(tx, ty); - - uint drawn = 0; - - const qword vertex_sizes = (qword)spu_splats(vertex_size); - const qword verticess = (qword)spu_splats((uint)vertices); - - ASSERT_ALIGN16(&indexes[0]); - - const uint num_indexes = render->num_indexes; - - /* loop over tris - * &indexes[0] will be 16 byte aligned. This loop is heavily unrolled - * avoiding variable rotates when extracting vertex indices. - */ - for (j = 0; j < num_indexes; j += 24) { - /* Load three vectors, containing 24 ushort indices */ - const qword* lower_qword = (qword*)&indexes[j]; - const qword indices0 = lower_qword[0]; - const qword indices1 = lower_qword[1]; - const qword indices2 = lower_qword[2]; - - /* stores three indices for each tri n in slots 0, 1 and 2 of vsn */ - /* Straightforward rotates for these */ - qword vs0 = indices0; - qword vs1 = si_shlqbyi(indices0, 6); - qword vs3 = si_shlqbyi(indices1, 2); - qword vs4 = si_shlqbyi(indices1, 8); - qword vs6 = si_shlqbyi(indices2, 4); - qword vs7 = si_shlqbyi(indices2, 10); - - /* For tri 2 and 5, the three indices are split across two machine - * words - rotate and combine */ - const qword tmp2a = si_shlqbyi(indices0, 12); - const qword tmp2b = si_rotqmbyi(indices1, 12|16); - qword vs2 = si_selb(tmp2a, tmp2b, si_fsmh(si_from_uint(0x20))); - - const qword tmp5a = si_shlqbyi(indices1, 14); - const qword tmp5b = si_rotqmbyi(indices2, 14|16); - qword vs5 = si_selb(tmp5a, tmp5b, si_fsmh(si_from_uint(0x60))); - - /* unpack indices from halfword slots to word slots */ - vs0 = si_shufb(vs0, vs0, SHUFB8(0,A,0,B,0,C,0,0)); - vs1 = si_shufb(vs1, vs1, SHUFB8(0,A,0,B,0,C,0,0)); - vs2 = si_shufb(vs2, vs2, SHUFB8(0,A,0,B,0,C,0,0)); - vs3 = si_shufb(vs3, vs3, SHUFB8(0,A,0,B,0,C,0,0)); - vs4 = si_shufb(vs4, vs4, SHUFB8(0,A,0,B,0,C,0,0)); - vs5 = si_shufb(vs5, vs5, SHUFB8(0,A,0,B,0,C,0,0)); - vs6 = si_shufb(vs6, vs6, SHUFB8(0,A,0,B,0,C,0,0)); - vs7 = si_shufb(vs7, vs7, SHUFB8(0,A,0,B,0,C,0,0)); - - /* Calculate address of vertex in vertices[] */ - vs0 = si_mpya(vs0, vertex_sizes, verticess); - vs1 = si_mpya(vs1, vertex_sizes, verticess); - vs2 = si_mpya(vs2, vertex_sizes, verticess); - vs3 = si_mpya(vs3, vertex_sizes, verticess); - vs4 = si_mpya(vs4, vertex_sizes, verticess); - vs5 = si_mpya(vs5, vertex_sizes, verticess); - vs6 = si_mpya(vs6, vertex_sizes, verticess); - vs7 = si_mpya(vs7, vertex_sizes, verticess); - - /* Select the appropriate call based on the number of vertices - * remaining */ - switch(num_indexes - j) { - default: drawn += tri_draw(vs7, tx, ty); - case 21: drawn += tri_draw(vs6, tx, ty); - case 18: drawn += tri_draw(vs5, tx, ty); - case 15: drawn += tri_draw(vs4, tx, ty); - case 12: drawn += tri_draw(vs3, tx, ty); - case 9: drawn += tri_draw(vs2, tx, ty); - case 6: drawn += tri_draw(vs1, tx, ty); - case 3: drawn += tri_draw(vs0, tx, ty); - } - } - - //printf("SPU %u: drew %u of %u\n", spu.init.id, drawn, render->num_indexes/3); - - /* write color/z tiles back to main framebuffer, if dirtied */ - put_cz_tiles(tx, ty); - - wait_put_cz_tiles(); /* XXX seems unnecessary... */ - - spu.ctile_status[ty][tx] = spu.cur_ctile_status; - spu.ztile_status[ty][tx] = spu.cur_ztile_status; - } - - D_PRINTF(CELL_DEBUG_CMD, - "RENDER done (%u tiles hit)\n", - num_tiles); -} diff --git a/src/gallium/drivers/cell/spu/spu_render.h b/src/gallium/drivers/cell/spu/spu_render.h deleted file mode 100644 index 493434f087..0000000000 --- a/src/gallium/drivers/cell/spu/spu_render.h +++ /dev/null @@ -1,38 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef SPU_RENDER_H -#define SPU_RENDER_H - -#include "cell/common.h" - -extern void -cmd_render(const struct cell_command_render *render, uint *pos_incr); - -#endif /* SPU_RENDER_H */ - diff --git a/src/gallium/drivers/cell/spu/spu_shuffle.h b/src/gallium/drivers/cell/spu/spu_shuffle.h deleted file mode 100644 index 74f2a0b6d2..0000000000 --- a/src/gallium/drivers/cell/spu/spu_shuffle.h +++ /dev/null @@ -1,186 +0,0 @@ -#ifndef SPU_SHUFFLE_H -#define SPU_SHUFFLE_H - -/* - * Generate shuffle patterns with minimal fuss. - * - * Based on ideas from - * http://www.insomniacgames.com/tech/articles/0408/files/shuffles.pdf - * - * A-P indicates 0-15th position in first vector - * a-p indicates 0-15th position in second vector - * - * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ - * |00|01|02|03|04|05|06|07|08|09|0a|0b|0c|0d|0e|0f| - * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ - * | A| B| C| D| - * +-----+-----+-----+-----+-----+-----+-----+-----+ - * | A| B| C| D| E| F| G| H| - * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ - * | A| B| C| D| E| F| G| H| I| J| K| L| M| N| O| P| - * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ - * - * x or X indicates 0xff - * 8 indicates 0x80 - * 0 indicates 0x00 - * - * The macros SHUFFLE4() SHUFFLE8() and SHUFFLE16() provide a const vector - * unsigned char literal suitable for use with spu_shuffle(). - * - * The macros SHUFB4() SHUFB8() and SHUFB16() provide a const qword vector - * literal suitable for use with si_shufb(). - * - * - * For example : - * SHUFB4(A,A,A,A) - * expands to : - * ((const qword){0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3}) - * - * SHUFFLE8(A,B,a,b,C,c,8,8) - * expands to : - * ((const vector unsigned char){0x00,0x01,0x02,0x03,0x10,0x11,0x12,0x13, - * 0x04,0x05,0x14,0x15,0xe0,0xe0,0xe0,0xe0}) - * - */ - -#include <spu_intrinsics.h> - -#define SHUFFLE_PATTERN_4_A__ 0x00, 0x01, 0x02, 0x03 -#define SHUFFLE_PATTERN_4_B__ 0x04, 0x05, 0x06, 0x07 -#define SHUFFLE_PATTERN_4_C__ 0x08, 0x09, 0x0a, 0x0b -#define SHUFFLE_PATTERN_4_D__ 0x0c, 0x0d, 0x0e, 0x0f -#define SHUFFLE_PATTERN_4_a__ 0x10, 0x11, 0x12, 0x13 -#define SHUFFLE_PATTERN_4_b__ 0x14, 0x15, 0x16, 0x17 -#define SHUFFLE_PATTERN_4_c__ 0x18, 0x19, 0x1a, 0x1b -#define SHUFFLE_PATTERN_4_d__ 0x1c, 0x1d, 0x1e, 0x1f -#define SHUFFLE_PATTERN_4_X__ 0xc0, 0xc0, 0xc0, 0xc0 -#define SHUFFLE_PATTERN_4_x__ 0xc0, 0xc0, 0xc0, 0xc0 -#define SHUFFLE_PATTERN_4_0__ 0x80, 0x80, 0x80, 0x80 -#define SHUFFLE_PATTERN_4_8__ 0xe0, 0xe0, 0xe0, 0xe0 - -#define SHUFFLE_VECTOR_4__(A, B, C, D) \ - SHUFFLE_PATTERN_4_##A##__, \ - SHUFFLE_PATTERN_4_##B##__, \ - SHUFFLE_PATTERN_4_##C##__, \ - SHUFFLE_PATTERN_4_##D##__ - -#define SHUFFLE4(A, B, C, D) \ - ((const vector unsigned char){ \ - SHUFFLE_VECTOR_4__(A, B, C, D) \ - }) - -#define SHUFB4(A, B, C, D) \ - ((const qword){ \ - SHUFFLE_VECTOR_4__(A, B, C, D) \ - }) - - -#define SHUFFLE_PATTERN_8_A__ 0x00, 0x01 -#define SHUFFLE_PATTERN_8_B__ 0x02, 0x03 -#define SHUFFLE_PATTERN_8_C__ 0x04, 0x05 -#define SHUFFLE_PATTERN_8_D__ 0x06, 0x07 -#define SHUFFLE_PATTERN_8_E__ 0x08, 0x09 -#define SHUFFLE_PATTERN_8_F__ 0x0a, 0x0b -#define SHUFFLE_PATTERN_8_G__ 0x0c, 0x0d -#define SHUFFLE_PATTERN_8_H__ 0x0e, 0x0f -#define SHUFFLE_PATTERN_8_a__ 0x10, 0x11 -#define SHUFFLE_PATTERN_8_b__ 0x12, 0x13 -#define SHUFFLE_PATTERN_8_c__ 0x14, 0x15 -#define SHUFFLE_PATTERN_8_d__ 0x16, 0x17 -#define SHUFFLE_PATTERN_8_e__ 0x18, 0x19 -#define SHUFFLE_PATTERN_8_f__ 0x1a, 0x1b -#define SHUFFLE_PATTERN_8_g__ 0x1c, 0x1d -#define SHUFFLE_PATTERN_8_h__ 0x1e, 0x1f -#define SHUFFLE_PATTERN_8_X__ 0xc0, 0xc0 -#define SHUFFLE_PATTERN_8_x__ 0xc0, 0xc0 -#define SHUFFLE_PATTERN_8_0__ 0x80, 0x80 -#define SHUFFLE_PATTERN_8_8__ 0xe0, 0xe0 - - -#define SHUFFLE_VECTOR_8__(A, B, C, D, E, F, G, H) \ - SHUFFLE_PATTERN_8_##A##__, \ - SHUFFLE_PATTERN_8_##B##__, \ - SHUFFLE_PATTERN_8_##C##__, \ - SHUFFLE_PATTERN_8_##D##__, \ - SHUFFLE_PATTERN_8_##E##__, \ - SHUFFLE_PATTERN_8_##F##__, \ - SHUFFLE_PATTERN_8_##G##__, \ - SHUFFLE_PATTERN_8_##H##__ - -#define SHUFFLE8(A, B, C, D, E, F, G, H) \ - ((const vector unsigned char){ \ - SHUFFLE_VECTOR_8__(A, B, C, D, E, F, G, H) \ - }) - -#define SHUFB8(A, B, C, D, E, F, G, H) \ - ((const qword){ \ - SHUFFLE_VECTOR_8__(A, B, C, D, E, F, G, H) \ - }) - - -#define SHUFFLE_PATTERN_16_A__ 0x00 -#define SHUFFLE_PATTERN_16_B__ 0x01 -#define SHUFFLE_PATTERN_16_C__ 0x02 -#define SHUFFLE_PATTERN_16_D__ 0x03 -#define SHUFFLE_PATTERN_16_E__ 0x04 -#define SHUFFLE_PATTERN_16_F__ 0x05 -#define SHUFFLE_PATTERN_16_G__ 0x06 -#define SHUFFLE_PATTERN_16_H__ 0x07 -#define SHUFFLE_PATTERN_16_I__ 0x08 -#define SHUFFLE_PATTERN_16_J__ 0x09 -#define SHUFFLE_PATTERN_16_K__ 0x0a -#define SHUFFLE_PATTERN_16_L__ 0x0b -#define SHUFFLE_PATTERN_16_M__ 0x0c -#define SHUFFLE_PATTERN_16_N__ 0x0d -#define SHUFFLE_PATTERN_16_O__ 0x0e -#define SHUFFLE_PATTERN_16_P__ 0x0f -#define SHUFFLE_PATTERN_16_a__ 0x10 -#define SHUFFLE_PATTERN_16_b__ 0x11 -#define SHUFFLE_PATTERN_16_c__ 0x12 -#define SHUFFLE_PATTERN_16_d__ 0x13 -#define SHUFFLE_PATTERN_16_e__ 0x14 -#define SHUFFLE_PATTERN_16_f__ 0x15 -#define SHUFFLE_PATTERN_16_g__ 0x16 -#define SHUFFLE_PATTERN_16_h__ 0x17 -#define SHUFFLE_PATTERN_16_i__ 0x18 -#define SHUFFLE_PATTERN_16_j__ 0x19 -#define SHUFFLE_PATTERN_16_k__ 0x1a -#define SHUFFLE_PATTERN_16_l__ 0x1b -#define SHUFFLE_PATTERN_16_m__ 0x1c -#define SHUFFLE_PATTERN_16_n__ 0x1d -#define SHUFFLE_PATTERN_16_o__ 0x1e -#define SHUFFLE_PATTERN_16_p__ 0x1f -#define SHUFFLE_PATTERN_16_X__ 0xc0 -#define SHUFFLE_PATTERN_16_x__ 0xc0 -#define SHUFFLE_PATTERN_16_0__ 0x80 -#define SHUFFLE_PATTERN_16_8__ 0xe0 - -#define SHUFFLE_VECTOR_16__(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \ - SHUFFLE_PATTERN_16_##A##__, \ - SHUFFLE_PATTERN_16_##B##__, \ - SHUFFLE_PATTERN_16_##C##__, \ - SHUFFLE_PATTERN_16_##D##__, \ - SHUFFLE_PATTERN_16_##E##__, \ - SHUFFLE_PATTERN_16_##F##__, \ - SHUFFLE_PATTERN_16_##G##__, \ - SHUFFLE_PATTERN_16_##H##__, \ - SHUFFLE_PATTERN_16_##I##__, \ - SHUFFLE_PATTERN_16_##J##__, \ - SHUFFLE_PATTERN_16_##K##__, \ - SHUFFLE_PATTERN_16_##L##__, \ - SHUFFLE_PATTERN_16_##M##__, \ - SHUFFLE_PATTERN_16_##N##__, \ - SHUFFLE_PATTERN_16_##O##__, \ - SHUFFLE_PATTERN_16_##P##__ - -#define SHUFFLE16(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \ - ((const vector unsigned char){ \ - SHUFFLE_VECTOR_16__(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \ - }) - -#define SHUFB16(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \ - ((const qword){ \ - SHUFFLE_VECTOR_16__(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \ - }) - -#endif diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c deleted file mode 100644 index 69784c8978..0000000000 --- a/src/gallium/drivers/cell/spu/spu_texture.c +++ /dev/null @@ -1,641 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include <math.h> - -#include "pipe/p_compiler.h" -#include "spu_main.h" -#include "spu_texture.h" -#include "spu_tile.h" -#include "spu_colorpack.h" -#include "spu_dcache.h" - - -/** - * Mark all tex cache entries as invalid. - */ -void -invalidate_tex_cache(void) -{ - uint lvl; - for (lvl = 0; lvl < CELL_MAX_TEXTURE_LEVELS; lvl++) { - uint unit = 0; - uint bytes = 4 * spu.texture[unit].level[lvl].width - * spu.texture[unit].level[lvl].height; - - if (spu.texture[unit].target == PIPE_TEXTURE_CUBE) - bytes *= 6; - else if (spu.texture[unit].target == PIPE_TEXTURE_3D) - bytes *= spu.texture[unit].level[lvl].depth; - - spu_dcache_mark_dirty((unsigned) spu.texture[unit].level[lvl].start, bytes); - } -} - - -/** - * Get four texels from locations (x[0], y[0]), (x[1], y[1]) ... - * - * NOTE: in the typical case of bilinear filtering, the four texels - * are in a 2x2 group so we could get by with just two dcache fetches - * (two side-by-side texels per fetch). But when bilinear filtering - * wraps around a texture edge, we'll probably need code like we have - * now. - * FURTHERMORE: since we're rasterizing a quad of 2x2 pixels at a time, - * it's quite likely that the four pixels in a quad will need some of the - * same texels. So look into doing texture fetches for four pixels at - * a time. - */ -static void -get_four_texels(const struct spu_texture_level *tlevel, uint face, - vec_int4 x, vec_int4 y, - vec_uint4 *texels) -{ - unsigned texture_ea = (uintptr_t) tlevel->start; - const vec_int4 tile_x = spu_rlmask(x, -5); /* tile_x = x / 32 */ - const vec_int4 tile_y = spu_rlmask(y, -5); /* tile_y = y / 32 */ - const qword offset_x = si_andi((qword) x, 0x1f); /* offset_x = x & 0x1f */ - const qword offset_y = si_andi((qword) y, 0x1f); /* offset_y = y & 0x1f */ - - const qword tiles_per_row = (qword) spu_splats(tlevel->tiles_per_row); - const qword tile_size = (qword) spu_splats((unsigned) sizeof(tile_t)); - - qword tile_offset = si_mpya((qword) tile_y, tiles_per_row, (qword) tile_x); - tile_offset = si_mpy((qword) tile_offset, tile_size); - - qword texel_offset = si_a(si_mpyui(offset_y, 32), offset_x); - texel_offset = si_mpyui(texel_offset, 4); - - vec_uint4 offset = (vec_uint4) si_a(tile_offset, texel_offset); - - texture_ea = texture_ea + face * tlevel->bytes_per_image; - - spu_dcache_fetch_unaligned((qword *) & texels[0], - texture_ea + spu_extract(offset, 0), 4); - spu_dcache_fetch_unaligned((qword *) & texels[1], - texture_ea + spu_extract(offset, 1), 4); - spu_dcache_fetch_unaligned((qword *) & texels[2], - texture_ea + spu_extract(offset, 2), 4); - spu_dcache_fetch_unaligned((qword *) & texels[3], - texture_ea + spu_extract(offset, 3), 4); -} - - -/** clamp vec to [0, max] */ -static INLINE vector signed int -spu_clamp(vector signed int vec, vector signed int max) -{ - static const vector signed int zero = {0,0,0,0}; - vector unsigned int c; - c = spu_cmpgt(vec, zero); /* c = vec > zero ? ~0 : 0 */ - vec = spu_sel(zero, vec, c); - c = spu_cmpgt(vec, max); /* c = vec > max ? ~0 : 0 */ - vec = spu_sel(vec, max, c); - return vec; -} - - - -/** - * Do nearest texture sampling for four pixels. - * \param colors returned colors in SOA format (rrrr, gggg, bbbb, aaaa). - */ -void -sample_texture_2d_nearest(vector float s, vector float t, - uint unit, uint level, uint face, - vector float colors[4]) -{ - const struct spu_texture_level *tlevel = &spu.texture[unit].level[level]; - vector float ss = spu_mul(s, tlevel->scale_s); - vector float tt = spu_mul(t, tlevel->scale_t); - vector signed int is = spu_convts(ss, 0); - vector signed int it = spu_convts(tt, 0); - vec_uint4 texels[4]; - - /* PIPE_TEX_WRAP_REPEAT */ - is = spu_and(is, tlevel->mask_s); - it = spu_and(it, tlevel->mask_t); - - /* PIPE_TEX_WRAP_CLAMP */ - is = spu_clamp(is, tlevel->max_s); - it = spu_clamp(it, tlevel->max_t); - - get_four_texels(tlevel, face, is, it, texels); - - /* convert four packed ARGBA pixels to float RRRR,GGGG,BBBB,AAAA */ - spu_unpack_A8R8G8B8_transpose4(texels, colors); -} - - -/** - * Do bilinear texture sampling for four pixels. - * \param colors returned colors in SOA format (rrrr, gggg, bbbb, aaaa). - */ -void -sample_texture_2d_bilinear(vector float s, vector float t, - uint unit, uint level, uint face, - vector float colors[4]) -{ - const struct spu_texture_level *tlevel = &spu.texture[unit].level[level]; - static const vector float half = {-0.5f, -0.5f, -0.5f, -0.5f}; - - vector float ss = spu_madd(s, tlevel->scale_s, half); - vector float tt = spu_madd(t, tlevel->scale_t, half); - - vector signed int is0 = spu_convts(ss, 0); - vector signed int it0 = spu_convts(tt, 0); - - /* is + 1, it + 1 */ - vector signed int is1 = spu_add(is0, 1); - vector signed int it1 = spu_add(it0, 1); - - /* PIPE_TEX_WRAP_REPEAT */ - is0 = spu_and(is0, tlevel->mask_s); - it0 = spu_and(it0, tlevel->mask_t); - is1 = spu_and(is1, tlevel->mask_s); - it1 = spu_and(it1, tlevel->mask_t); - - /* PIPE_TEX_WRAP_CLAMP */ - is0 = spu_clamp(is0, tlevel->max_s); - it0 = spu_clamp(it0, tlevel->max_t); - is1 = spu_clamp(is1, tlevel->max_s); - it1 = spu_clamp(it1, tlevel->max_t); - - /* get packed int texels */ - vector unsigned int texels[16]; - get_four_texels(tlevel, face, is0, it0, texels + 0); /* upper-left */ - get_four_texels(tlevel, face, is1, it0, texels + 4); /* upper-right */ - get_four_texels(tlevel, face, is0, it1, texels + 8); /* lower-left */ - get_four_texels(tlevel, face, is1, it1, texels + 12); /* lower-right */ - - /* convert packed int texels to float colors */ - vector float ftexels[16]; - spu_unpack_A8R8G8B8_transpose4(texels + 0, ftexels + 0); - spu_unpack_A8R8G8B8_transpose4(texels + 4, ftexels + 4); - spu_unpack_A8R8G8B8_transpose4(texels + 8, ftexels + 8); - spu_unpack_A8R8G8B8_transpose4(texels + 12, ftexels + 12); - - /* Compute weighting factors in [0,1] - * Multiply texcoord by 1024, AND with 1023, convert back to float. - */ - vector float ss1024 = spu_mul(ss, spu_splats(1024.0f)); - vector signed int iss1024 = spu_convts(ss1024, 0); - iss1024 = spu_and(iss1024, 1023); - vector float sWeights0 = spu_convtf(iss1024, 10); - - vector float tt1024 = spu_mul(tt, spu_splats(1024.0f)); - vector signed int itt1024 = spu_convts(tt1024, 0); - itt1024 = spu_and(itt1024, 1023); - vector float tWeights0 = spu_convtf(itt1024, 10); - - /* 1 - sWeight and 1 - tWeight */ - vector float sWeights1 = spu_sub(spu_splats(1.0f), sWeights0); - vector float tWeights1 = spu_sub(spu_splats(1.0f), tWeights0); - - /* reds, for four pixels */ - ftexels[ 0] = spu_mul(ftexels[ 0], spu_mul(sWeights1, tWeights1)); /*ul*/ - ftexels[ 4] = spu_mul(ftexels[ 4], spu_mul(sWeights0, tWeights1)); /*ur*/ - ftexels[ 8] = spu_mul(ftexels[ 8], spu_mul(sWeights1, tWeights0)); /*ll*/ - ftexels[12] = spu_mul(ftexels[12], spu_mul(sWeights0, tWeights0)); /*lr*/ - colors[0] = spu_add(spu_add(ftexels[0], ftexels[4]), - spu_add(ftexels[8], ftexels[12])); - - /* greens, for four pixels */ - ftexels[ 1] = spu_mul(ftexels[ 1], spu_mul(sWeights1, tWeights1)); /*ul*/ - ftexels[ 5] = spu_mul(ftexels[ 5], spu_mul(sWeights0, tWeights1)); /*ur*/ - ftexels[ 9] = spu_mul(ftexels[ 9], spu_mul(sWeights1, tWeights0)); /*ll*/ - ftexels[13] = spu_mul(ftexels[13], spu_mul(sWeights0, tWeights0)); /*lr*/ - colors[1] = spu_add(spu_add(ftexels[1], ftexels[5]), - spu_add(ftexels[9], ftexels[13])); - - /* blues, for four pixels */ - ftexels[ 2] = spu_mul(ftexels[ 2], spu_mul(sWeights1, tWeights1)); /*ul*/ - ftexels[ 6] = spu_mul(ftexels[ 6], spu_mul(sWeights0, tWeights1)); /*ur*/ - ftexels[10] = spu_mul(ftexels[10], spu_mul(sWeights1, tWeights0)); /*ll*/ - ftexels[14] = spu_mul(ftexels[14], spu_mul(sWeights0, tWeights0)); /*lr*/ - colors[2] = spu_add(spu_add(ftexels[2], ftexels[6]), - spu_add(ftexels[10], ftexels[14])); - - /* alphas, for four pixels */ - ftexels[ 3] = spu_mul(ftexels[ 3], spu_mul(sWeights1, tWeights1)); /*ul*/ - ftexels[ 7] = spu_mul(ftexels[ 7], spu_mul(sWeights0, tWeights1)); /*ur*/ - ftexels[11] = spu_mul(ftexels[11], spu_mul(sWeights1, tWeights0)); /*ll*/ - ftexels[15] = spu_mul(ftexels[15], spu_mul(sWeights0, tWeights0)); /*lr*/ - colors[3] = spu_add(spu_add(ftexels[3], ftexels[7]), - spu_add(ftexels[11], ftexels[15])); -} - - - -/** - * Adapted from /opt/cell/sdk/usr/spu/include/transpose_matrix4x4.h - */ -static INLINE void -transpose(vector unsigned int *mOut0, - vector unsigned int *mOut1, - vector unsigned int *mOut2, - vector unsigned int *mOut3, - vector unsigned int *mIn) -{ - vector unsigned int abcd, efgh, ijkl, mnop; /* input vectors */ - vector unsigned int aeim, bfjn, cgko, dhlp; /* output vectors */ - vector unsigned int aibj, ckdl, emfn, gohp; /* intermediate vectors */ - - vector unsigned char shufflehi = ((vector unsigned char) { - 0x00, 0x01, 0x02, 0x03, - 0x10, 0x11, 0x12, 0x13, - 0x04, 0x05, 0x06, 0x07, - 0x14, 0x15, 0x16, 0x17}); - vector unsigned char shufflelo = ((vector unsigned char) { - 0x08, 0x09, 0x0A, 0x0B, - 0x18, 0x19, 0x1A, 0x1B, - 0x0C, 0x0D, 0x0E, 0x0F, - 0x1C, 0x1D, 0x1E, 0x1F}); - abcd = *(mIn+0); - efgh = *(mIn+1); - ijkl = *(mIn+2); - mnop = *(mIn+3); - - aibj = spu_shuffle(abcd, ijkl, shufflehi); - ckdl = spu_shuffle(abcd, ijkl, shufflelo); - emfn = spu_shuffle(efgh, mnop, shufflehi); - gohp = spu_shuffle(efgh, mnop, shufflelo); - - aeim = spu_shuffle(aibj, emfn, shufflehi); - bfjn = spu_shuffle(aibj, emfn, shufflelo); - cgko = spu_shuffle(ckdl, gohp, shufflehi); - dhlp = spu_shuffle(ckdl, gohp, shufflelo); - - *mOut0 = aeim; - *mOut1 = bfjn; - *mOut2 = cgko; - *mOut3 = dhlp; -} - - -/** - * Bilinear filtering, using int instead of float arithmetic for computing - * sample weights. - */ -void -sample_texture_2d_bilinear_int(vector float s, vector float t, - uint unit, uint level, uint face, - vector float colors[4]) -{ - const struct spu_texture_level *tlevel = &spu.texture[unit].level[level]; - static const vector float half = {-0.5f, -0.5f, -0.5f, -0.5f}; - - /* Scale texcoords by size of texture, and add half pixel bias */ - vector float ss = spu_madd(s, tlevel->scale_s, half); - vector float tt = spu_madd(t, tlevel->scale_t, half); - - /* convert float coords to fixed-pt coords with 7 fraction bits */ - vector signed int is = spu_convts(ss, 7); /* XXX really need floor() here */ - vector signed int it = spu_convts(tt, 7); /* XXX really need floor() here */ - - /* compute integer texel weights in [0, 127] */ - vector signed int sWeights0 = spu_and(is, 127); - vector signed int tWeights0 = spu_and(it, 127); - vector signed int sWeights1 = spu_sub(127, sWeights0); - vector signed int tWeights1 = spu_sub(127, tWeights0); - - /* texel coords: is0 = is / 128, it0 = is / 128 */ - vector signed int is0 = spu_rlmask(is, -7); - vector signed int it0 = spu_rlmask(it, -7); - - /* texel coords: i1 = is0 + 1, it1 = it0 + 1 */ - vector signed int is1 = spu_add(is0, 1); - vector signed int it1 = spu_add(it0, 1); - - /* PIPE_TEX_WRAP_REPEAT */ - is0 = spu_and(is0, tlevel->mask_s); - it0 = spu_and(it0, tlevel->mask_t); - is1 = spu_and(is1, tlevel->mask_s); - it1 = spu_and(it1, tlevel->mask_t); - - /* PIPE_TEX_WRAP_CLAMP */ - is0 = spu_clamp(is0, tlevel->max_s); - it0 = spu_clamp(it0, tlevel->max_t); - is1 = spu_clamp(is1, tlevel->max_s); - it1 = spu_clamp(it1, tlevel->max_t); - - /* get packed int texels */ - vector unsigned int texels[16]; - get_four_texels(tlevel, face, is0, it0, texels + 0); /* upper-left */ - get_four_texels(tlevel, face, is1, it0, texels + 4); /* upper-right */ - get_four_texels(tlevel, face, is0, it1, texels + 8); /* lower-left */ - get_four_texels(tlevel, face, is1, it1, texels + 12); /* lower-right */ - - /* twiddle packed 32-bit BGRA pixels into RGBA as four unsigned ints */ - { - static const unsigned char ZERO = 0x80; - int i; - for (i = 0; i < 16; i++) { - texels[i] = spu_shuffle(texels[i], texels[i], - ((vector unsigned char) { - ZERO, ZERO, ZERO, 1, - ZERO, ZERO, ZERO, 2, - ZERO, ZERO, ZERO, 3, - ZERO, ZERO, ZERO, 0})); - } - } - - /* convert RGBA,RGBA,RGBA,RGBA to RRRR,GGGG,BBBB,AAAA */ - vector unsigned int texel0, texel1, texel2, texel3, texel4, texel5, texel6, texel7, - texel8, texel9, texel10, texel11, texel12, texel13, texel14, texel15; - transpose(&texel0, &texel1, &texel2, &texel3, texels + 0); - transpose(&texel4, &texel5, &texel6, &texel7, texels + 4); - transpose(&texel8, &texel9, &texel10, &texel11, texels + 8); - transpose(&texel12, &texel13, &texel14, &texel15, texels + 12); - - /* computed weighted colors */ - vector unsigned int c0, c1, c2, c3, cSum; - - /* red */ - c0 = (vector unsigned int) si_mpy((qword) texel0, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/ - c1 = (vector unsigned int) si_mpy((qword) texel4, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/ - c2 = (vector unsigned int) si_mpy((qword) texel8, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/ - c3 = (vector unsigned int) si_mpy((qword) texel12, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/ - cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3)); - colors[0] = spu_convtf(cSum, 22); - - /* green */ - c0 = (vector unsigned int) si_mpy((qword) texel1, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/ - c1 = (vector unsigned int) si_mpy((qword) texel5, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/ - c2 = (vector unsigned int) si_mpy((qword) texel9, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/ - c3 = (vector unsigned int) si_mpy((qword) texel13, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/ - cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3)); - colors[1] = spu_convtf(cSum, 22); - - /* blue */ - c0 = (vector unsigned int) si_mpy((qword) texel2, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/ - c1 = (vector unsigned int) si_mpy((qword) texel6, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/ - c2 = (vector unsigned int) si_mpy((qword) texel10, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/ - c3 = (vector unsigned int) si_mpy((qword) texel14, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/ - cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3)); - colors[2] = spu_convtf(cSum, 22); - - /* alpha */ - c0 = (vector unsigned int) si_mpy((qword) texel3, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/ - c1 = (vector unsigned int) si_mpy((qword) texel7, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/ - c2 = (vector unsigned int) si_mpy((qword) texel11, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/ - c3 = (vector unsigned int) si_mpy((qword) texel15, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/ - cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3)); - colors[3] = spu_convtf(cSum, 22); -} - - - -/** - * Compute level of detail factor from texcoords. - */ -static INLINE float -compute_lambda_2d(uint unit, vector float s, vector float t) -{ - uint baseLevel = 0; - float width = spu.texture[unit].level[baseLevel].width; - float height = spu.texture[unit].level[baseLevel].width; - float dsdx = width * (spu_extract(s, 1) - spu_extract(s, 0)); - float dsdy = width * (spu_extract(s, 2) - spu_extract(s, 0)); - float dtdx = height * (spu_extract(t, 1) - spu_extract(t, 0)); - float dtdy = height * (spu_extract(t, 2) - spu_extract(t, 0)); -#if 0 - /* ideal value */ - float x = dsdx * dsdx + dtdx * dtdx; - float y = dsdy * dsdy + dtdy * dtdy; - float rho = x > y ? x : y; - rho = sqrtf(rho); -#else - /* approximation */ - dsdx = fabsf(dsdx); - dsdy = fabsf(dsdy); - dtdx = fabsf(dtdx); - dtdy = fabsf(dtdy); - float rho = (dsdx + dsdy + dtdx + dtdy) * 0.5; -#endif - float lambda = logf(rho) * 1.442695f; /* compute logbase2(rho) */ - return lambda; -} - - -/** - * Blend two sets of colors according to weight. - */ -static void -blend_colors(vector float c0[4], const vector float c1[4], float weight) -{ - vector float t = spu_splats(weight); - vector float dc0 = spu_sub(c1[0], c0[0]); - vector float dc1 = spu_sub(c1[1], c0[1]); - vector float dc2 = spu_sub(c1[2], c0[2]); - vector float dc3 = spu_sub(c1[3], c0[3]); - c0[0] = spu_madd(dc0, t, c0[0]); - c0[1] = spu_madd(dc1, t, c0[1]); - c0[2] = spu_madd(dc2, t, c0[2]); - c0[3] = spu_madd(dc3, t, c0[3]); -} - - -/** - * Texture sampling with level of detail selection and possibly mipmap - * interpolation. - */ -void -sample_texture_2d_lod(vector float s, vector float t, - uint unit, uint level_ignored, uint face, - vector float colors[4]) -{ - /* - * Note that we're computing a lambda/lod here that's used for all - * four pixels in the quad. - */ - float lambda = compute_lambda_2d(unit, s, t); - - (void) face; - (void) level_ignored; - - /* apply lod bias */ - lambda += spu.sampler[unit].lod_bias; - - /* clamp */ - if (lambda < spu.sampler[unit].min_lod) - lambda = spu.sampler[unit].min_lod; - else if (lambda > spu.sampler[unit].max_lod) - lambda = spu.sampler[unit].max_lod; - - if (lambda <= 0.0f) { - /* magnify */ - spu.mag_sample_texture_2d[unit](s, t, unit, 0, face, colors); - } - else { - /* minify */ - if (spu.sampler[unit].min_img_filter == PIPE_TEX_FILTER_LINEAR) { - /* sample two mipmap levels and interpolate */ - int level = (int) lambda; - if (level > (int) spu.texture[unit].max_level) - level = spu.texture[unit].max_level; - spu.min_sample_texture_2d[unit](s, t, unit, level, face, colors); - if (spu.sampler[unit].min_img_filter == PIPE_TEX_FILTER_LINEAR) { - /* sample second mipmap level */ - float weight = lambda - (float) level; - level++; - if (level <= (int) spu.texture[unit].max_level) { - vector float colors2[4]; - spu.min_sample_texture_2d[unit](s, t, unit, level, face, colors2); - blend_colors(colors, colors2, weight); - } - } - } - else { - /* sample one mipmap level */ - int level = (int) (lambda + 0.5f); - if (level > (int) spu.texture[unit].max_level) - level = spu.texture[unit].max_level; - spu.min_sample_texture_2d[unit](s, t, unit, level, face, colors); - } - } -} - - -/** XXX need a SIMD version of this */ -static unsigned -choose_cube_face(float rx, float ry, float rz, float *newS, float *newT) -{ - /* - major axis - direction target sc tc ma - ---------- ------------------------------- --- --- --- - +rx TEXTURE_CUBE_MAP_POSITIVE_X_EXT -rz -ry rx - -rx TEXTURE_CUBE_MAP_NEGATIVE_X_EXT +rz -ry rx - +ry TEXTURE_CUBE_MAP_POSITIVE_Y_EXT +rx +rz ry - -ry TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT +rx -rz ry - +rz TEXTURE_CUBE_MAP_POSITIVE_Z_EXT +rx -ry rz - -rz TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT -rx -ry rz - */ - const float arx = fabsf(rx); - const float ary = fabsf(ry); - const float arz = fabsf(rz); - unsigned face; - float sc, tc, ma; - - if (arx > ary && arx > arz) { - if (rx >= 0.0F) { - face = PIPE_TEX_FACE_POS_X; - sc = -rz; - tc = -ry; - ma = arx; - } - else { - face = PIPE_TEX_FACE_NEG_X; - sc = rz; - tc = -ry; - ma = arx; - } - } - else if (ary > arx && ary > arz) { - if (ry >= 0.0F) { - face = PIPE_TEX_FACE_POS_Y; - sc = rx; - tc = rz; - ma = ary; - } - else { - face = PIPE_TEX_FACE_NEG_Y; - sc = rx; - tc = -rz; - ma = ary; - } - } - else { - if (rz > 0.0F) { - face = PIPE_TEX_FACE_POS_Z; - sc = rx; - tc = -ry; - ma = arz; - } - else { - face = PIPE_TEX_FACE_NEG_Z; - sc = -rx; - tc = -ry; - ma = arz; - } - } - - *newS = (sc / ma + 1.0F) * 0.5F; - *newT = (tc / ma + 1.0F) * 0.5F; - - return face; -} - - - -void -sample_texture_cube(vector float s, vector float t, vector float r, - uint unit, vector float colors[4]) -{ - uint p, faces[4], level = 0; - float newS[4], newT[4]; - - /* Compute cube faces referenced by the four sets of texcoords. - * XXX we should SIMD-ize this. - */ - for (p = 0; p < 4; p++) { - float rx = spu_extract(s, p); - float ry = spu_extract(t, p); - float rz = spu_extract(r, p); - faces[p] = choose_cube_face(rx, ry, rz, &newS[p], &newT[p]); - } - - if (faces[0] == faces[1] && - faces[0] == faces[2] && - faces[0] == faces[3]) { - /* GOOD! All four texcoords refer to the same cube face */ - s = (vector float) {newS[0], newS[1], newS[2], newS[3]}; - t = (vector float) {newT[0], newT[1], newT[2], newT[3]}; - spu.sample_texture_2d[unit](s, t, unit, level, faces[0], colors); - } - else { - /* BAD! The four texcoords refer to different faces */ - for (p = 0; p < 4; p++) { - vector float c[4]; - - spu.sample_texture_2d[unit](spu_splats(newS[p]), spu_splats(newT[p]), - unit, level, faces[p], c); - - float red = spu_extract(c[0], p); - float green = spu_extract(c[1], p); - float blue = spu_extract(c[2], p); - float alpha = spu_extract(c[3], p); - - colors[0] = spu_insert(red, colors[0], p); - colors[1] = spu_insert(green, colors[1], p); - colors[2] = spu_insert(blue, colors[2], p); - colors[3] = spu_insert(alpha, colors[3], p); - } - } -} diff --git a/src/gallium/drivers/cell/spu/spu_texture.h b/src/gallium/drivers/cell/spu/spu_texture.h deleted file mode 100644 index 7b75b007b5..0000000000 --- a/src/gallium/drivers/cell/spu/spu_texture.h +++ /dev/null @@ -1,67 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef SPU_TEXTURE_H -#define SPU_TEXTURE_H - - -#include "pipe/p_compiler.h" - - -extern void -invalidate_tex_cache(void); - - -extern void -sample_texture_2d_nearest(vector float s, vector float t, - uint unit, uint level, uint face, - vector float colors[4]); - - -extern void -sample_texture_2d_bilinear(vector float s, vector float t, - uint unit, uint level, uint face, - vector float colors[4]); - -extern void -sample_texture_2d_bilinear_int(vector float s, vector float t, - uint unit, uint level, uint face, - vector float colors[4]); - - -extern void -sample_texture_2d_lod(vector float s, vector float t, - uint unit, uint level, uint face, - vector float colors[4]); - - -extern void -sample_texture_cube(vector float s, vector float t, vector float r, - uint unit, vector float colors[4]); - - -#endif /* SPU_TEXTURE_H */ diff --git a/src/gallium/drivers/cell/spu/spu_tgsi_exec.h b/src/gallium/drivers/cell/spu/spu_tgsi_exec.h deleted file mode 100644 index 6f2a3d30b9..0000000000 --- a/src/gallium/drivers/cell/spu/spu_tgsi_exec.h +++ /dev/null @@ -1,158 +0,0 @@ -/************************************************************************** - * - * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * Copyright 2009-2010 VMware, Inc. All rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef SPU_TGSI_EXEC_H -#define SPU_TGSI_EXEC_H - -#include "pipe/p_compiler.h" -#include "pipe/p_state.h" - -#if defined __cplusplus -extern "C" { -#endif - - -#define NUM_CHANNELS 4 /* R,G,B,A */ -#define QUAD_SIZE 4 /* 4 pixel/quad */ - - - -#define TGSI_EXEC_NUM_TEMPS 128 -#define TGSI_EXEC_NUM_IMMEDIATES 256 - -/* - * Locations of various utility registers (_I = Index, _C = Channel) - */ -#define TGSI_EXEC_TEMP_00000000_IDX (TGSI_EXEC_NUM_TEMPS + 0) -#define TGSI_EXEC_TEMP_00000000_CHAN 0 - -#define TGSI_EXEC_TEMP_7FFFFFFF_IDX (TGSI_EXEC_NUM_TEMPS + 0) -#define TGSI_EXEC_TEMP_7FFFFFFF_CHAN 1 - -#define TGSI_EXEC_TEMP_80000000_IDX (TGSI_EXEC_NUM_TEMPS + 0) -#define TGSI_EXEC_TEMP_80000000_CHAN 2 - -#define TGSI_EXEC_TEMP_FFFFFFFF_IDX (TGSI_EXEC_NUM_TEMPS + 0) -#define TGSI_EXEC_TEMP_FFFFFFFF_CHAN 3 - -#define TGSI_EXEC_TEMP_ONE_IDX (TGSI_EXEC_NUM_TEMPS + 1) -#define TGSI_EXEC_TEMP_ONE_CHAN 0 - -#define TGSI_EXEC_TEMP_TWO_IDX (TGSI_EXEC_NUM_TEMPS + 1) -#define TGSI_EXEC_TEMP_TWO_CHAN 1 - -#define TGSI_EXEC_TEMP_128_IDX (TGSI_EXEC_NUM_TEMPS + 1) -#define TGSI_EXEC_TEMP_128_CHAN 2 - -#define TGSI_EXEC_TEMP_MINUS_128_IDX (TGSI_EXEC_NUM_TEMPS + 1) -#define TGSI_EXEC_TEMP_MINUS_128_CHAN 3 - -#define TGSI_EXEC_TEMP_KILMASK_IDX (TGSI_EXEC_NUM_TEMPS + 2) -#define TGSI_EXEC_TEMP_KILMASK_CHAN 0 - -#define TGSI_EXEC_TEMP_OUTPUT_IDX (TGSI_EXEC_NUM_TEMPS + 2) -#define TGSI_EXEC_TEMP_OUTPUT_CHAN 1 - -#define TGSI_EXEC_TEMP_PRIMITIVE_IDX (TGSI_EXEC_NUM_TEMPS + 2) -#define TGSI_EXEC_TEMP_PRIMITIVE_CHAN 2 - -/* NVIDIA condition code (CC) vector - */ -#define TGSI_EXEC_CC_GT 0x01 -#define TGSI_EXEC_CC_EQ 0x02 -#define TGSI_EXEC_CC_LT 0x04 -#define TGSI_EXEC_CC_UN 0x08 - -#define TGSI_EXEC_CC_X_MASK 0x000000ff -#define TGSI_EXEC_CC_X_SHIFT 0 -#define TGSI_EXEC_CC_Y_MASK 0x0000ff00 -#define TGSI_EXEC_CC_Y_SHIFT 8 -#define TGSI_EXEC_CC_Z_MASK 0x00ff0000 -#define TGSI_EXEC_CC_Z_SHIFT 16 -#define TGSI_EXEC_CC_W_MASK 0xff000000 -#define TGSI_EXEC_CC_W_SHIFT 24 - -#define TGSI_EXEC_TEMP_CC_IDX (TGSI_EXEC_NUM_TEMPS + 2) -#define TGSI_EXEC_TEMP_CC_CHAN 3 - -#define TGSI_EXEC_TEMP_THREE_IDX (TGSI_EXEC_NUM_TEMPS + 3) -#define TGSI_EXEC_TEMP_THREE_CHAN 0 - -#define TGSI_EXEC_TEMP_HALF_IDX (TGSI_EXEC_NUM_TEMPS + 3) -#define TGSI_EXEC_TEMP_HALF_CHAN 1 - -/* execution mask, each value is either 0 or ~0 */ -#define TGSI_EXEC_MASK_IDX (TGSI_EXEC_NUM_TEMPS + 3) -#define TGSI_EXEC_MASK_CHAN 2 - -/* 4 register buffer for various purposes */ -#define TGSI_EXEC_TEMP_R0 (TGSI_EXEC_NUM_TEMPS + 4) -#define TGSI_EXEC_NUM_TEMP_R 4 - -#define TGSI_EXEC_TEMP_ADDR (TGSI_EXEC_NUM_TEMPS + 8) -#define TGSI_EXEC_NUM_ADDRS 1 - -/* predicate register */ -#define TGSI_EXEC_TEMP_P0 (TGSI_EXEC_NUM_TEMPS + 9) -#define TGSI_EXEC_NUM_PREDS 1 - -#define TGSI_EXEC_NUM_TEMP_EXTRAS 10 - - - -#define TGSI_EXEC_MAX_NESTING 32 -#define TGSI_EXEC_MAX_COND_NESTING TGSI_EXEC_MAX_NESTING -#define TGSI_EXEC_MAX_LOOP_NESTING TGSI_EXEC_MAX_NESTING -#define TGSI_EXEC_MAX_SWITCH_NESTING TGSI_EXEC_MAX_NESTING -#define TGSI_EXEC_MAX_CALL_NESTING TGSI_EXEC_MAX_NESTING - -/* The maximum number of input attributes per vertex. For 2D - * input register files, this is the stride between two 1D - * arrays. - */ -#define TGSI_EXEC_MAX_INPUT_ATTRIBS 17 - -/* The maximum number of constant vectors per constant buffer. - */ -#define TGSI_EXEC_MAX_CONST_BUFFER 4096 - -/* The maximum number of vertices per primitive */ -#define TGSI_MAX_PRIM_VERTICES 6 - -/* The maximum number of primitives to be generated */ -#define TGSI_MAX_PRIMITIVES 64 - -/* The maximum total number of vertices */ -#define TGSI_MAX_TOTAL_VERTICES (TGSI_MAX_PRIM_VERTICES * TGSI_MAX_PRIMITIVES * PIPE_MAX_ATTRIBS) - - -#if defined __cplusplus -} /* extern "C" */ -#endif - -#endif /* TGSI_EXEC_H */ diff --git a/src/gallium/drivers/cell/spu/spu_tile.c b/src/gallium/drivers/cell/spu/spu_tile.c deleted file mode 100644 index 6905015a48..0000000000 --- a/src/gallium/drivers/cell/spu/spu_tile.c +++ /dev/null @@ -1,126 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - - -#include "spu_tile.h" -#include "spu_main.h" - - -/** - * Get tile of color or Z values from main memory, put into SPU memory. - */ -void -get_tile(uint tx, uint ty, tile_t *tile, int tag, int zBuf) -{ - const uint offset = ty * spu.fb.width_tiles + tx; - const uint bytesPerTile = TILE_SIZE * TILE_SIZE * (zBuf ? spu.fb.zsize : 4); - const ubyte *src = zBuf ? spu.fb.depth_start : spu.fb.color_start; - - src += offset * bytesPerTile; - - ASSERT(tx < spu.fb.width_tiles); - ASSERT(ty < spu.fb.height_tiles); - ASSERT_ALIGN16(tile); - /* - printf("get_tile: dest: %p src: 0x%x size: %d\n", - tile, (unsigned int) src, bytesPerTile); - */ - mfc_get(tile->ui, /* dest in local memory */ - (unsigned int) src, /* src in main memory */ - bytesPerTile, - tag, - 0, /* tid */ - 0 /* rid */); -} - - -/** - * Move tile of color or Z values from SPU memory to main memory. - */ -void -put_tile(uint tx, uint ty, const tile_t *tile, int tag, int zBuf) -{ - const uint offset = ty * spu.fb.width_tiles + tx; - const uint bytesPerTile = TILE_SIZE * TILE_SIZE * (zBuf ? spu.fb.zsize : 4); - ubyte *dst = zBuf ? spu.fb.depth_start : spu.fb.color_start; - - dst += offset * bytesPerTile; - - ASSERT(tx < spu.fb.width_tiles); - ASSERT(ty < spu.fb.height_tiles); - ASSERT_ALIGN16(tile); - /* - printf("SPU %u: put_tile: src: %p dst: 0x%x size: %d\n", - spu.init.id, - tile, (unsigned int) dst, bytesPerTile); - */ - mfc_put((void *) tile->ui, /* src in local memory */ - (unsigned int) dst, /* dst in main memory */ - bytesPerTile, - tag, - 0, /* tid */ - 0 /* rid */); -} - - -/** - * For tiles whose status is TILE_STATUS_CLEAR, write solid-filled - * tiles back to the main framebuffer. - */ -void -really_clear_tiles(uint surfaceIndex) -{ - const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles; - uint i; - - if (surfaceIndex == 0) { - clear_c_tile(&spu.ctile); - - for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) { - uint tx = i % spu.fb.width_tiles; - uint ty = i / spu.fb.width_tiles; - if (spu.ctile_status[ty][tx] == TILE_STATUS_CLEAR) { - put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0); - } - } - } - else { - clear_z_tile(&spu.ztile); - - for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) { - uint tx = i % spu.fb.width_tiles; - uint ty = i / spu.fb.width_tiles; - if (spu.ztile_status[ty][tx] == TILE_STATUS_CLEAR) - put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 1); - } - } - -#if 0 - wait_on_mask(1 << TAG_SURFACE_CLEAR); -#endif -} diff --git a/src/gallium/drivers/cell/spu/spu_tile.h b/src/gallium/drivers/cell/spu/spu_tile.h deleted file mode 100644 index 7bfb52be8f..0000000000 --- a/src/gallium/drivers/cell/spu/spu_tile.h +++ /dev/null @@ -1,75 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef SPU_TILE_H -#define SPU_TILE_H - - -#include <libmisc.h> -#include <spu_mfcio.h> -#include "spu_main.h" -#include "cell/common.h" - - - -extern void -get_tile(uint tx, uint ty, tile_t *tile, int tag, int zBuf); - -extern void -put_tile(uint tx, uint ty, const tile_t *tile, int tag, int zBuf); - -extern void -really_clear_tiles(uint surfaceIndex); - - -static INLINE void -clear_c_tile(tile_t *ctile) -{ - memset32((uint*) ctile->ui, - spu.fb.color_clear_value, - TILE_SIZE * TILE_SIZE); -} - - -static INLINE void -clear_z_tile(tile_t *ztile) -{ - if (spu.fb.zsize == 2) { - memset16((ushort*) ztile->us, - spu.fb.depth_clear_value, - TILE_SIZE * TILE_SIZE); - } - else { - ASSERT(spu.fb.zsize != 0); - memset32((uint*) ztile->ui, - spu.fb.depth_clear_value, - TILE_SIZE * TILE_SIZE); - } -} - - -#endif /* SPU_TILE_H */ diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c deleted file mode 100644 index efeebca27b..0000000000 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ /dev/null @@ -1,843 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * Triangle rendering within a tile. - */ - -#include "pipe/p_compiler.h" -#include "pipe/p_format.h" -#include "util/u_math.h" -#include "spu_colorpack.h" -#include "spu_main.h" -#include "spu_shuffle.h" -#include "spu_texture.h" -#include "spu_tile.h" -#include "spu_tri.h" - - -/** Masks are uint[4] vectors with each element being 0 or 0xffffffff */ -typedef vector unsigned int mask_t; - - - -/** - * Simplified types taken from other parts of Gallium - */ -struct vertex_header { - vector float data[1]; -}; - - - -/* XXX fix this */ -#undef CEILF -#define CEILF(X) ((float) (int) ((X) + 0.99999f)) - - -#define QUAD_TOP_LEFT 0 -#define QUAD_TOP_RIGHT 1 -#define QUAD_BOTTOM_LEFT 2 -#define QUAD_BOTTOM_RIGHT 3 -#define MASK_TOP_LEFT (1 << QUAD_TOP_LEFT) -#define MASK_TOP_RIGHT (1 << QUAD_TOP_RIGHT) -#define MASK_BOTTOM_LEFT (1 << QUAD_BOTTOM_LEFT) -#define MASK_BOTTOM_RIGHT (1 << QUAD_BOTTOM_RIGHT) -#define MASK_ALL 0xf - - -#define CHAN0 0 -#define CHAN1 1 -#define CHAN2 2 -#define CHAN3 3 - - -#define DEBUG_VERTS 0 - -/** - * Triangle edge info - */ -struct edge { - union { - struct { - float dx; /**< X(v1) - X(v0), used only during setup */ - float dy; /**< Y(v1) - Y(v0), used only during setup */ - }; - vec_float4 ds; /**< vector accessor for dx and dy */ - }; - float dxdy; /**< dx/dy */ - float sx, sy; /**< first sample point coord */ - int lines; /**< number of lines on this edge */ -}; - - -struct interp_coef -{ - vector float a0; - vector float dadx; - vector float dady; -}; - - -/** - * Triangle setup info (derived from draw_stage). - * Also used for line drawing (taking some liberties). - */ -struct setup_stage { - - /* Vertices are just an array of floats making up each attribute in - * turn. Currently fixed at 4 floats, but should change in time. - * Codegen will help cope with this. - */ - union { - struct { - const struct vertex_header *vmin; - const struct vertex_header *vmid; - const struct vertex_header *vmax; - const struct vertex_header *vprovoke; - }; - qword vertex_headers; - }; - - struct edge ebot; - struct edge etop; - struct edge emaj; - - float oneOverArea; /* XXX maybe make into vector? */ - - uint facing; - - uint tx, ty; /**< position of current tile (x, y) */ - - union { - struct { - int cliprect_minx; - int cliprect_miny; - int cliprect_maxx; - int cliprect_maxy; - }; - qword cliprect; - }; - - struct interp_coef coef[PIPE_MAX_SHADER_INPUTS]; - - struct { - vec_int4 quad; /**< [0] = row0, [1] = row1; {left[0],left[1],right[0],right[1]} */ - int y; - unsigned y_flags; - unsigned mask; /**< mask of MASK_BOTTOM/TOP_LEFT/RIGHT bits */ - } span; -}; - - -static struct setup_stage setup; - - -static INLINE vector float -splatx(vector float v) -{ - return spu_splats(spu_extract(v, CHAN0)); -} - -static INLINE vector float -splaty(vector float v) -{ - return spu_splats(spu_extract(v, CHAN1)); -} - -static INLINE vector float -splatz(vector float v) -{ - return spu_splats(spu_extract(v, CHAN2)); -} - -static INLINE vector float -splatw(vector float v) -{ - return spu_splats(spu_extract(v, CHAN3)); -} - - -/** - * Setup fragment shader inputs by evaluating triangle's vertex - * attribute coefficient info. - * \param x quad x pos - * \param y quad y pos - * \param fragZ returns quad Z values - * \param fragInputs returns fragment program inputs - * Note: this code could be incorporated into the fragment program - * itself to avoid the loop and switch. - */ -static void -eval_inputs(float x, float y, vector float *fragZ, vector float fragInputs[]) -{ - static const vector float deltaX = (const vector float) {0, 1, 0, 1}; - static const vector float deltaY = (const vector float) {0, 0, 1, 1}; - - const uint posSlot = 0; - const vector float pos = setup.coef[posSlot].a0; - const vector float dposdx = setup.coef[posSlot].dadx; - const vector float dposdy = setup.coef[posSlot].dady; - const vector float fragX = spu_splats(x) + deltaX; - const vector float fragY = spu_splats(y) + deltaY; - vector float fragW, wInv; - uint i; - - *fragZ = splatz(pos) + fragX * splatz(dposdx) + fragY * splatz(dposdy); - fragW = splatw(pos) + fragX * splatw(dposdx) + fragY * splatw(dposdy); - wInv = spu_re(fragW); /* 1 / w */ - - /* loop over fragment program inputs */ - for (i = 0; i < spu.vertex_info.num_attribs; i++) { - uint attr = i + 1; - enum interp_mode interp = spu.vertex_info.attrib[attr].interp_mode; - - /* constant term */ - vector float a0 = setup.coef[attr].a0; - vector float r0 = splatx(a0); - vector float r1 = splaty(a0); - vector float r2 = splatz(a0); - vector float r3 = splatw(a0); - - if (interp == INTERP_LINEAR || interp == INTERP_PERSPECTIVE) { - /* linear term */ - vector float dadx = setup.coef[attr].dadx; - vector float dady = setup.coef[attr].dady; - /* Use SPU intrinsics here to get slightly better code. - * originally: r0 += fragX * splatx(dadx) + fragY * splatx(dady); - */ - r0 = spu_madd(fragX, splatx(dadx), spu_madd(fragY, splatx(dady), r0)); - r1 = spu_madd(fragX, splaty(dadx), spu_madd(fragY, splaty(dady), r1)); - r2 = spu_madd(fragX, splatz(dadx), spu_madd(fragY, splatz(dady), r2)); - r3 = spu_madd(fragX, splatw(dadx), spu_madd(fragY, splatw(dady), r3)); - if (interp == INTERP_PERSPECTIVE) { - /* perspective term */ - r0 *= wInv; - r1 *= wInv; - r2 *= wInv; - r3 *= wInv; - } - } - fragInputs[CHAN0] = r0; - fragInputs[CHAN1] = r1; - fragInputs[CHAN2] = r2; - fragInputs[CHAN3] = r3; - fragInputs += 4; - } -} - - -/** - * Emit a quad (pass to next stage). No clipping is done. - * Note: about 1/5 to 1/7 of the time, mask is zero and this function - * should be skipped. But adding the test for that slows things down - * overall. - */ -static INLINE void -emit_quad( int x, int y, mask_t mask) -{ - /* If any bits in mask are set... */ - if (spu_extract(spu_orx(mask), 0)) { - const int ix = x - setup.cliprect_minx; - const int iy = y - setup.cliprect_miny; - - spu.cur_ctile_status = TILE_STATUS_DIRTY; - spu.cur_ztile_status = TILE_STATUS_DIRTY; - - { - /* - * Run fragment shader, execute per-fragment ops, update fb/tile. - */ - vector float inputs[4*4], outputs[2*4]; - vector unsigned int kill_mask; - vector float fragZ; - - eval_inputs((float) x, (float) y, &fragZ, inputs); - - ASSERT(spu.fragment_program); - ASSERT(spu.fragment_ops); - - /* Execute the current fragment program */ - kill_mask = spu.fragment_program(inputs, outputs, spu.constants); - - mask = spu_andc(mask, kill_mask); - - /* Execute per-fragment/quad operations, including: - * alpha test, z test, stencil test, blend and framebuffer writing. - * Note that there are two different fragment operations functions - * that can be called, one for front-facing fragments, and one - * for back-facing fragments. (Often the two are the same; - * but in some cases, like two-sided stenciling, they can be - * very different.) So choose the correct function depending - * on the calculated facing. - */ - spu.fragment_ops[setup.facing](ix, iy, &spu.ctile, &spu.ztile, - fragZ, - outputs[0*4+0], - outputs[0*4+1], - outputs[0*4+2], - outputs[0*4+3], - mask); - } - } -} - - -/** - * Given an X or Y coordinate, return the block/quad coordinate that it - * belongs to. - */ -static INLINE int -block(int x) -{ - return x & ~1; -} - - -/** - * Render a horizontal span of quads - */ -static void -flush_spans(void) -{ - int minleft, maxright; - - const int l0 = spu_extract(setup.span.quad, 0); - const int l1 = spu_extract(setup.span.quad, 1); - const int r0 = spu_extract(setup.span.quad, 2); - const int r1 = spu_extract(setup.span.quad, 3); - - switch (setup.span.y_flags) { - case 0x3: - /* both odd and even lines written (both quad rows) */ - minleft = MIN2(l0, l1); - maxright = MAX2(r0, r1); - break; - - case 0x1: - /* only even line written (quad top row) */ - minleft = l0; - maxright = r0; - break; - - case 0x2: - /* only odd line written (quad bottom row) */ - minleft = l1; - maxright = r1; - break; - - default: - return; - } - - /* OK, we're very likely to need the tile data now. - * clear or finish waiting if needed. - */ - if (spu.cur_ctile_status == TILE_STATUS_GETTING) { - /* wait for mfc_get() to complete */ - //printf("SPU: %u: waiting for ctile\n", spu.init.id); - wait_on_mask(1 << TAG_READ_TILE_COLOR); - spu.cur_ctile_status = TILE_STATUS_CLEAN; - } - else if (spu.cur_ctile_status == TILE_STATUS_CLEAR) { - //printf("SPU %u: clearing C tile %u, %u\n", spu.init.id, setup.tx, setup.ty); - clear_c_tile(&spu.ctile); - spu.cur_ctile_status = TILE_STATUS_DIRTY; - } - ASSERT(spu.cur_ctile_status != TILE_STATUS_DEFINED); - - if (spu.read_depth_stencil) { - if (spu.cur_ztile_status == TILE_STATUS_GETTING) { - /* wait for mfc_get() to complete */ - //printf("SPU: %u: waiting for ztile\n", spu.init.id); - wait_on_mask(1 << TAG_READ_TILE_Z); - spu.cur_ztile_status = TILE_STATUS_CLEAN; - } - else if (spu.cur_ztile_status == TILE_STATUS_CLEAR) { - //printf("SPU %u: clearing Z tile %u, %u\n", spu.init.id, setup.tx, setup.ty); - clear_z_tile(&spu.ztile); - spu.cur_ztile_status = TILE_STATUS_DIRTY; - } - ASSERT(spu.cur_ztile_status != TILE_STATUS_DEFINED); - } - - /* XXX this loop could be moved into the above switch cases... */ - - /* Setup for mask calculation */ - const vec_int4 quad_LlRr = setup.span.quad; - const vec_int4 quad_RrLl = spu_rlqwbyte(quad_LlRr, 8); - const vec_int4 quad_LLll = spu_shuffle(quad_LlRr, quad_LlRr, SHUFFLE4(A,A,B,B)); - const vec_int4 quad_RRrr = spu_shuffle(quad_RrLl, quad_RrLl, SHUFFLE4(A,A,B,B)); - - const vec_int4 twos = spu_splats(2); - - const int x = block(minleft); - vec_int4 xs = {x, x+1, x, x+1}; - - for (; spu_extract(xs, 0) <= block(maxright); xs += twos) { - /** - * Computes mask to indicate which pixels in the 2x2 quad are actually - * inside the triangle's bounds. - */ - - /* Calculate ({x,x+1,x,x+1} >= {l[0],l[0],l[1],l[1]}) */ - const mask_t gt_LLll_xs = spu_cmpgt(quad_LLll, xs); - const mask_t gte_xs_LLll = spu_nand(gt_LLll_xs, gt_LLll_xs); - - /* Calculate ({r[0],r[0],r[1],r[1]} > {x,x+1,x,x+1}) */ - const mask_t gt_RRrr_xs = spu_cmpgt(quad_RRrr, xs); - - /* Combine results to create mask */ - const mask_t mask = spu_and(gte_xs_LLll, gt_RRrr_xs); - - emit_quad(spu_extract(xs, 0), setup.span.y, mask); - } - - setup.span.y = 0; - setup.span.y_flags = 0; - /* Zero right elements */ - setup.span.quad = spu_shuffle(setup.span.quad, setup.span.quad, SHUFFLE4(A,B,0,0)); -} - - -#if DEBUG_VERTS -static void -print_vertex(const struct vertex_header *v) -{ - uint i; - fprintf(stderr, " Vertex: (%p)\n", v); - for (i = 0; i < spu.vertex_info.num_attribs; i++) { - fprintf(stderr, " %d: %f %f %f %f\n", i, - spu_extract(v->data[i], 0), - spu_extract(v->data[i], 1), - spu_extract(v->data[i], 2), - spu_extract(v->data[i], 3)); - } -} -#endif - -/* Returns the minimum of each slot of two vec_float4s as qwords. - * i.e. return[n] = min(q0[n],q1[n]); - */ -static qword -minfq(qword q0, qword q1) -{ - const qword q0q1m = si_fcgt(q0, q1); - return si_selb(q0, q1, q0q1m); -} - -/* Returns the minimum of each slot of three vec_float4s as qwords. - * i.e. return[n] = min(q0[n],q1[n],q2[n]); - */ -static qword -min3fq(qword q0, qword q1, qword q2) -{ - return minfq(minfq(q0, q1), q2); -} - -/* Returns the maximum of each slot of two vec_float4s as qwords. - * i.e. return[n] = min(q0[n],q1[n],q2[n]); - */ -static qword -maxfq(qword q0, qword q1) { - const qword q0q1m = si_fcgt(q0, q1); - return si_selb(q1, q0, q0q1m); -} - -/* Returns the maximum of each slot of three vec_float4s as qwords. - * i.e. return[n] = min(q0[n],q1[n],q2[n]); - */ -static qword -max3fq(qword q0, qword q1, qword q2) { - return maxfq(maxfq(q0, q1), q2); -} - -/** - * Sort vertices from top to bottom. - * Compute area and determine front vs. back facing. - * Do coarse clip test against tile bounds - * \return FALSE if tri is totally outside tile, TRUE otherwise - */ -static boolean -setup_sort_vertices(const qword vs) -{ - float area, sign; - -#if DEBUG_VERTS - if (spu.init.id==0) { - fprintf(stderr, "SPU %u: Triangle:\n", spu.init.id); - print_vertex(v0); - print_vertex(v1); - print_vertex(v2); - } -#endif - - { - /* Load the float values for various processing... */ - const qword f0 = (qword)(((const struct vertex_header*)si_to_ptr(vs))->data[0]); - const qword f1 = (qword)(((const struct vertex_header*)si_to_ptr(si_rotqbyi(vs, 4)))->data[0]); - const qword f2 = (qword)(((const struct vertex_header*)si_to_ptr(si_rotqbyi(vs, 8)))->data[0]); - - /* Check if triangle is completely outside the tile bounds - * Find the min and max x and y positions of the three poits */ - const qword minf = min3fq(f0, f1, f2); - const qword maxf = max3fq(f0, f1, f2); - - /* Compare min and max against cliprect vals */ - const qword maxsmins = si_shufb(maxf, minf, SHUFB4(A,B,a,b)); - const qword outside = si_fcgt(maxsmins, si_csflt(setup.cliprect, 0)); - - /* Use a little magic to work out of the tri is visible or not */ - if(si_to_uint(si_xori(si_gb(outside), 0xc))) return FALSE; - - /* determine bottom to top order of vertices */ - /* A table of shuffle patterns for putting vertex_header pointers into - correct order. Quite magical. */ - const qword sort_order_patterns[] = { - SHUFB4(A,B,C,C), - SHUFB4(C,A,B,C), - SHUFB4(A,C,B,C), - SHUFB4(B,C,A,C), - SHUFB4(B,A,C,C), - SHUFB4(C,B,A,C) }; - - /* Collate y values into two vectors for comparison. - Using only one shuffle constant! ;) */ - const qword y_02_ = si_shufb(f0, f2, SHUFB4(0,B,b,C)); - const qword y_10_ = si_shufb(f1, f0, SHUFB4(0,B,b,C)); - const qword y_012 = si_shufb(y_02_, f1, SHUFB4(0,B,b,C)); - const qword y_120 = si_shufb(y_10_, f2, SHUFB4(0,B,b,C)); - - /* Perform comparison: {y0,y1,y2} > {y1,y2,y0} */ - const qword compare = si_fcgt(y_012, y_120); - /* Compress the result of the comparison into 4 bits */ - const qword gather = si_gb(compare); - /* Subtract one to attain the index into the LUT. Magical. */ - const unsigned int index = si_to_uint(gather) - 1; - - /* Load the appropriate pattern and construct the desired vector. */ - setup.vertex_headers = si_shufb(vs, vs, sort_order_patterns[index]); - - /* Using the result of the comparison, set sign. - Very magical. */ - sign = ((si_to_uint(si_cntb(gather)) == 2) ? 1.0f : -1.0f); - } - - setup.ebot.ds = spu_sub(setup.vmid->data[0], setup.vmin->data[0]); - setup.emaj.ds = spu_sub(setup.vmax->data[0], setup.vmin->data[0]); - setup.etop.ds = spu_sub(setup.vmax->data[0], setup.vmid->data[0]); - - /* - * Compute triangle's area. Use 1/area to compute partial - * derivatives of attributes later. - */ - area = setup.emaj.dx * setup.ebot.dy - setup.ebot.dx * setup.emaj.dy; - - setup.oneOverArea = 1.0f / area; - - /* The product of area * sign indicates front/back orientation (0/1). - * Just in case someone gets the bright idea of switching the front - * and back constants without noticing that we're assuming their - * values in this operation, also assert that the values are - * what we think they are. - */ - ASSERT(CELL_FACING_FRONT == 0); - ASSERT(CELL_FACING_BACK == 1); - setup.facing = (area * sign > 0.0f) - ^ (!spu.rasterizer.front_ccw); - - return TRUE; -} - - -/** - * Compute a0 for a constant-valued coefficient (GL_FLAT shading). - * The value value comes from vertex->data[slot]. - * The result will be put into setup.coef[slot].a0. - * \param slot which attribute slot - */ -static INLINE void -const_coeff4(uint slot) -{ - setup.coef[slot].dadx = (vector float) {0.0, 0.0, 0.0, 0.0}; - setup.coef[slot].dady = (vector float) {0.0, 0.0, 0.0, 0.0}; - setup.coef[slot].a0 = setup.vprovoke->data[slot]; -} - - -/** - * As above, but interp setup all four vector components. - */ -static INLINE void -tri_linear_coeff4(uint slot) -{ - const vector float vmin_d = setup.vmin->data[slot]; - const vector float vmid_d = setup.vmid->data[slot]; - const vector float vmax_d = setup.vmax->data[slot]; - const vector float xxxx = spu_splats(spu_extract(setup.vmin->data[0], 0) - 0.5f); - const vector float yyyy = spu_splats(spu_extract(setup.vmin->data[0], 1) - 0.5f); - - vector float botda = vmid_d - vmin_d; - vector float majda = vmax_d - vmin_d; - - vector float a = spu_sub(spu_mul(spu_splats(setup.ebot.dy), majda), - spu_mul(botda, spu_splats(setup.emaj.dy))); - vector float b = spu_sub(spu_mul(spu_splats(setup.emaj.dx), botda), - spu_mul(majda, spu_splats(setup.ebot.dx))); - - setup.coef[slot].dadx = spu_mul(a, spu_splats(setup.oneOverArea)); - setup.coef[slot].dady = spu_mul(b, spu_splats(setup.oneOverArea)); - - vector float tempx = spu_mul(setup.coef[slot].dadx, xxxx); - vector float tempy = spu_mul(setup.coef[slot].dady, yyyy); - - setup.coef[slot].a0 = spu_sub(vmin_d, spu_add(tempx, tempy)); -} - - -/** - * Compute a0, dadx and dady for a perspective-corrected interpolant, - * for a triangle. - * We basically multiply the vertex value by 1/w before computing - * the plane coefficients (a0, dadx, dady). - * Later, when we compute the value at a particular fragment position we'll - * divide the interpolated value by the interpolated W at that fragment. - */ -static void -tri_persp_coeff4(uint slot) -{ - const vector float xxxx = spu_splats(spu_extract(setup.vmin->data[0], 0) - 0.5f); - const vector float yyyy = spu_splats(spu_extract(setup.vmin->data[0], 1) - 0.5f); - - const vector float vmin_w = spu_splats(spu_extract(setup.vmin->data[0], 3)); - const vector float vmid_w = spu_splats(spu_extract(setup.vmid->data[0], 3)); - const vector float vmax_w = spu_splats(spu_extract(setup.vmax->data[0], 3)); - - vector float vmin_d = setup.vmin->data[slot]; - vector float vmid_d = setup.vmid->data[slot]; - vector float vmax_d = setup.vmax->data[slot]; - - vmin_d = spu_mul(vmin_d, vmin_w); - vmid_d = spu_mul(vmid_d, vmid_w); - vmax_d = spu_mul(vmax_d, vmax_w); - - vector float botda = vmid_d - vmin_d; - vector float majda = vmax_d - vmin_d; - - vector float a = spu_sub(spu_mul(spu_splats(setup.ebot.dy), majda), - spu_mul(botda, spu_splats(setup.emaj.dy))); - vector float b = spu_sub(spu_mul(spu_splats(setup.emaj.dx), botda), - spu_mul(majda, spu_splats(setup.ebot.dx))); - - setup.coef[slot].dadx = spu_mul(a, spu_splats(setup.oneOverArea)); - setup.coef[slot].dady = spu_mul(b, spu_splats(setup.oneOverArea)); - - vector float tempx = spu_mul(setup.coef[slot].dadx, xxxx); - vector float tempy = spu_mul(setup.coef[slot].dady, yyyy); - - setup.coef[slot].a0 = spu_sub(vmin_d, spu_add(tempx, tempy)); -} - - - -/** - * Compute the setup.coef[] array dadx, dady, a0 values. - * Must be called after setup.vmin,vmid,vmax,vprovoke are initialized. - */ -static void -setup_tri_coefficients(void) -{ - uint i; - - for (i = 0; i < spu.vertex_info.num_attribs; i++) { - switch (spu.vertex_info.attrib[i].interp_mode) { - case INTERP_NONE: - break; - case INTERP_CONSTANT: - const_coeff4(i); - break; - case INTERP_POS: - /* fall-through */ - case INTERP_LINEAR: - tri_linear_coeff4(i); - break; - case INTERP_PERSPECTIVE: - tri_persp_coeff4(i); - break; - default: - ASSERT(0); - } - } -} - - -static void -setup_tri_edges(void) -{ - float vmin_x = spu_extract(setup.vmin->data[0], 0) + 0.5f; - float vmid_x = spu_extract(setup.vmid->data[0], 0) + 0.5f; - - float vmin_y = spu_extract(setup.vmin->data[0], 1) - 0.5f; - float vmid_y = spu_extract(setup.vmid->data[0], 1) - 0.5f; - float vmax_y = spu_extract(setup.vmax->data[0], 1) - 0.5f; - - setup.emaj.sy = CEILF(vmin_y); - setup.emaj.lines = (int) CEILF(vmax_y - setup.emaj.sy); - setup.emaj.dxdy = setup.emaj.dx / setup.emaj.dy; - setup.emaj.sx = vmin_x + (setup.emaj.sy - vmin_y) * setup.emaj.dxdy; - - setup.etop.sy = CEILF(vmid_y); - setup.etop.lines = (int) CEILF(vmax_y - setup.etop.sy); - setup.etop.dxdy = setup.etop.dx / setup.etop.dy; - setup.etop.sx = vmid_x + (setup.etop.sy - vmid_y) * setup.etop.dxdy; - - setup.ebot.sy = CEILF(vmin_y); - setup.ebot.lines = (int) CEILF(vmid_y - setup.ebot.sy); - setup.ebot.dxdy = setup.ebot.dx / setup.ebot.dy; - setup.ebot.sx = vmin_x + (setup.ebot.sy - vmin_y) * setup.ebot.dxdy; -} - - -/** - * Render the upper or lower half of a triangle. - * Scissoring/cliprect is applied here too. - */ -static void -subtriangle(struct edge *eleft, struct edge *eright, unsigned lines) -{ - const int minx = setup.cliprect_minx; - const int maxx = setup.cliprect_maxx; - const int miny = setup.cliprect_miny; - const int maxy = setup.cliprect_maxy; - int y, start_y, finish_y; - int sy = (int)eleft->sy; - - ASSERT((int)eleft->sy == (int) eright->sy); - - /* clip top/bottom */ - start_y = sy; - finish_y = sy + lines; - - if (start_y < miny) - start_y = miny; - - if (finish_y > maxy) - finish_y = maxy; - - start_y -= sy; - finish_y -= sy; - - /* - printf("%s %d %d\n", __FUNCTION__, start_y, finish_y); - */ - - for (y = start_y; y < finish_y; y++) { - - /* avoid accumulating adds as floats don't have the precision to - * accurately iterate large triangle edges that way. luckily we - * can just multiply these days. - * - * this is all drowned out by the attribute interpolation anyway. - */ - int left = (int)(eleft->sx + y * eleft->dxdy); - int right = (int)(eright->sx + y * eright->dxdy); - - /* clip left/right */ - if (left < minx) - left = minx; - if (right > maxx) - right = maxx; - - if (left < right) { - int _y = sy + y; - if (block(_y) != setup.span.y) { - flush_spans(); - setup.span.y = block(_y); - } - - int offset = _y&1; - vec_int4 quad_LlRr = {left, left, right, right}; - /* Store left and right in 0 or 1 row of quad based on offset */ - setup.span.quad = spu_sel(quad_LlRr, setup.span.quad, spu_maskw(5<<offset)); - setup.span.y_flags |= 1<<offset; - } - } - - - /* save the values so that emaj can be restarted: - */ - eleft->sx += lines * eleft->dxdy; - eright->sx += lines * eright->dxdy; - eleft->sy += lines; - eright->sy += lines; -} - - -/** - * Draw triangle into tile at (tx, ty) (tile coords) - * The tile data should have already been fetched. - */ -boolean -tri_draw(const qword vs, - uint tx, uint ty) -{ - setup.tx = tx; - setup.ty = ty; - - /* set clipping bounds to tile bounds */ - const qword clipbase = (qword)((vec_uint4){tx, ty}); - const qword clipmin = si_mpyui(clipbase, TILE_SIZE); - const qword clipmax = si_ai(clipmin, TILE_SIZE); - setup.cliprect = si_shufb(clipmin, clipmax, SHUFB4(A,B,a,b)); - - if(!setup_sort_vertices(vs)) { - return FALSE; /* totally clipped */ - } - - setup_tri_coefficients(); - setup_tri_edges(); - - setup.span.y = 0; - setup.span.y_flags = 0; - /* Zero right elements */ - setup.span.quad = spu_shuffle(setup.span.quad, setup.span.quad, SHUFFLE4(A,B,0,0)); - - if (setup.oneOverArea < 0.0) { - /* emaj on left */ - subtriangle( &setup.emaj, &setup.ebot, setup.ebot.lines ); - subtriangle( &setup.emaj, &setup.etop, setup.etop.lines ); - } - else { - /* emaj on right */ - subtriangle( &setup.ebot, &setup.emaj, setup.ebot.lines ); - subtriangle( &setup.etop, &setup.emaj, setup.etop.lines ); - } - - flush_spans(); - - return TRUE; -} diff --git a/src/gallium/drivers/cell/spu/spu_tri.h b/src/gallium/drivers/cell/spu/spu_tri.h deleted file mode 100644 index 82e3b19ad7..0000000000 --- a/src/gallium/drivers/cell/spu/spu_tri.h +++ /dev/null @@ -1,37 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef SPU_TRI_H -#define SPU_TRI_H - - -extern boolean -tri_draw(const qword vs, uint tx, uint ty); - - -#endif /* SPU_TRI_H */ diff --git a/src/gallium/drivers/cell/spu/spu_util.c b/src/gallium/drivers/cell/spu/spu_util.c deleted file mode 100644 index 24057e29e3..0000000000 --- a/src/gallium/drivers/cell/spu/spu_util.c +++ /dev/null @@ -1,77 +0,0 @@ - -#include "cell/common.h" -#include "pipe/p_shader_tokens.h" -#include "util/u_debug.h" -#include "tgsi/tgsi_parse.h" -//#include "tgsi_build.h" -#include "tgsi/tgsi_util.h" - -unsigned -tgsi_util_get_src_register_swizzle( - const struct tgsi_src_register *reg, - unsigned component ) -{ - switch( component ) { - case 0: - return reg->SwizzleX; - case 1: - return reg->SwizzleY; - case 2: - return reg->SwizzleZ; - case 3: - return reg->SwizzleW; - default: - ASSERT( 0 ); - } - return 0; -} - - -unsigned -tgsi_util_get_full_src_register_swizzle( - const struct tgsi_full_src_register *reg, - unsigned component ) -{ - return tgsi_util_get_src_register_swizzle( - reg->Register, - component ); -} - - -unsigned -tgsi_util_get_full_src_register_sign_mode( - const struct tgsi_full_src_register *reg, - unsigned component ) -{ - unsigned sign_mode; - - if( reg->RegisterExtMod.Absolute ) { - /* Consider only the post-abs negation. */ - - if( reg->RegisterExtMod.Negate ) { - sign_mode = TGSI_UTIL_SIGN_SET; - } - else { - sign_mode = TGSI_UTIL_SIGN_CLEAR; - } - } - else { - /* Accumulate the three negations. */ - - unsigned negate; - - negate = reg->Register.Negate; - if( reg->RegisterExtMod.Negate ) { - negate = !negate; - } - - if( negate ) { - sign_mode = TGSI_UTIL_SIGN_TOGGLE; - } - else { - sign_mode = TGSI_UTIL_SIGN_KEEP; - } - } - - return sign_mode; -} diff --git a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c deleted file mode 100644 index 087963960d..0000000000 --- a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c +++ /dev/null @@ -1,146 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * (C) Copyright IBM Corporation 2008 - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - * Ian Romanick <idr@us.ibm.com> - */ - -#include "pipe/p_state.h" -#include "pipe/p_shader_tokens.h" -#include "spu_exec.h" -#include "spu_vertex_shader.h" -#include "spu_main.h" -#include "spu_dcache.h" - -typedef void (*spu_fetch_func)(qword *out, const qword *in, - const qword *shuffle_data); - - -PIPE_ALIGN_VAR(16) static const qword -fetch_shuffle_data[5] = { - /* Shuffle used by CVT_64_FLOAT - */ - { - 0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, - 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - }, - - /* Shuffle used by CVT_8_USCALED and CVT_8_SSCALED - */ - { - 0x00, 0x80, 0x80, 0x80, 0x01, 0x80, 0x80, 0x80, - 0x02, 0x80, 0x80, 0x80, 0x03, 0x80, 0x80, 0x80, - }, - - /* Shuffle used by CVT_16_USCALED and CVT_16_SSCALED - */ - { - 0x00, 0x01, 0x80, 0x80, 0x02, 0x03, 0x80, 0x80, - 0x04, 0x05, 0x80, 0x80, 0x06, 0x07, 0x80, 0x80, - }, - - /* High value shuffle used by trans4x4. - */ - { - 0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, - 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17 - }, - - /* Low value shuffle used by trans4x4. - */ - { - 0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B, - 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F - } -}; - - -/** - * Fetch vertex attributes for 'count' vertices. - */ -static void generic_vertex_fetch(struct spu_vs_context *draw, - struct spu_exec_machine *machine, - const unsigned *elts, - unsigned count) -{ - unsigned nr_attrs = draw->vertex_fetch.nr_attrs; - unsigned attr; - - ASSERT(count <= 4); - -#if DRAW_DBG - printf("SPU: %s count = %u, nr_attrs = %u\n", - __FUNCTION__, count, nr_attrs); -#endif - - /* loop over vertex attributes (vertex shader inputs) - */ - for (attr = 0; attr < nr_attrs; attr++) { - const unsigned pitch = draw->vertex_fetch.pitch[attr]; - const uint64_t src = draw->vertex_fetch.src_ptr[attr]; - const spu_fetch_func fetch = (spu_fetch_func) - (draw->vertex_fetch.code + draw->vertex_fetch.code_offset[attr]); - unsigned i; - unsigned idx; - const unsigned bytes_per_entry = draw->vertex_fetch.size[attr]; - const unsigned quads_per_entry = (bytes_per_entry + 15) / 16; - PIPE_ALIGN_VAR(16) qword in[2 * 4]; - - - /* Fetch four attributes for four vertices. - */ - idx = 0; - for (i = 0; i < count; i++) { - const uint64_t addr = src + (elts[i] * pitch); - -#if DRAW_DBG - printf("SPU: fetching = 0x%llx\n", addr); -#endif - - spu_dcache_fetch_unaligned(& in[idx], addr, bytes_per_entry); - idx += quads_per_entry; - } - - /* Be nice and zero out any missing vertices. - */ - (void) memset(& in[idx], 0, (8 - idx) * sizeof(qword)); - - - /* Convert all 4 vertices to vectors of float. - */ - (*fetch)(&machine->Inputs[attr].xyzw[0].q, in, fetch_shuffle_data); - } -} - - -void spu_update_vertex_fetch( struct spu_vs_context *draw ) -{ - draw->vertex_fetch.fetch_func = generic_vertex_fetch; -} diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.c b/src/gallium/drivers/cell/spu/spu_vertex_shader.c deleted file mode 100644 index d6febd36f4..0000000000 --- a/src/gallium/drivers/cell/spu/spu_vertex_shader.c +++ /dev/null @@ -1,245 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - * Brian Paul - * Ian Romanick <idr@us.ibm.com> - */ - -#include <spu_mfcio.h> - -#include "pipe/p_state.h" -#include "pipe/p_shader_tokens.h" -#include "util/u_math.h" -#include "draw/draw_private.h" -#include "draw/draw_context.h" -#include "cell/common.h" -#include "spu_vertex_shader.h" -#include "spu_exec.h" -#include "spu_main.h" - - -#define MAX_VERTEX_SIZE ((2 + PIPE_MAX_SHADER_OUTPUTS) * 4 * sizeof(float)) - - -#define CLIP_RIGHT_BIT 0x01 -#define CLIP_LEFT_BIT 0x02 -#define CLIP_TOP_BIT 0x04 -#define CLIP_BOTTOM_BIT 0x08 -#define CLIP_FAR_BIT 0x10 -#define CLIP_NEAR_BIT 0x20 - - -static INLINE float -dot4(const float *a, const float *b) -{ - return (a[0]*b[0] + - a[1]*b[1] + - a[2]*b[2] + - a[3]*b[3]); -} - -static INLINE unsigned -compute_clipmask(const float *clip, /*const*/ float plane[][4], unsigned nr) -{ - unsigned mask = 0; - unsigned i; - - /* Do the hardwired planes first: - */ - if (-clip[0] + clip[3] < 0) mask |= CLIP_RIGHT_BIT; - if ( clip[0] + clip[3] < 0) mask |= CLIP_LEFT_BIT; - if (-clip[1] + clip[3] < 0) mask |= CLIP_TOP_BIT; - if ( clip[1] + clip[3] < 0) mask |= CLIP_BOTTOM_BIT; - if (-clip[2] + clip[3] < 0) mask |= CLIP_FAR_BIT; - if ( clip[2] + clip[3] < 0) mask |= CLIP_NEAR_BIT; - - /* Followed by any remaining ones: - */ - for (i = 6; i < nr; i++) { - if (dot4(clip, plane[i]) < 0) - mask |= (1<<i); - } - - return mask; -} - - -/** - * Transform vertices with the current vertex program/shader - * Up to four vertices can be shaded at a time. - * \param vbuffer the input vertex data - * \param elts indexes of four input vertices - * \param count number of vertices to shade [1..4] - * \param vOut array of pointers to four output vertices - */ -static void -run_vertex_program(struct spu_vs_context *draw, - unsigned elts[4], unsigned count, - const uint64_t *vOut) -{ - struct spu_exec_machine *machine = &draw->machine; - unsigned int j; - - PIPE_ALIGN_VAR(16) struct spu_exec_vector inputs[PIPE_MAX_ATTRIBS]; - PIPE_ALIGN_VAR(16) struct spu_exec_vector outputs[PIPE_MAX_ATTRIBS]; - const float *scale = draw->viewport.scale; - const float *trans = draw->viewport.translate; - - ASSERT(count <= 4); - - machine->Processor = TGSI_PROCESSOR_VERTEX; - - ASSERT_ALIGN16(draw->constants); - machine->Consts = (float (*)[4]) draw->constants; - - machine->Inputs = inputs; - machine->Outputs = outputs; - - spu_vertex_fetch( draw, machine, elts, count ); - - /* run shader */ - spu_exec_machine_run( machine ); - - - /* store machine results */ - for (j = 0; j < count; j++) { - unsigned slot; - float x, y, z, w; - PIPE_ALIGN_VAR(16) - unsigned char buffer[sizeof(struct vertex_header) - + MAX_VERTEX_SIZE]; - struct vertex_header *const tmpOut = - (struct vertex_header *) buffer; - const unsigned vert_size = ROUNDUP16(sizeof(struct vertex_header) - + (sizeof(float) * 4 - * draw->num_vs_outputs)); - - mfc_get(tmpOut, vOut[j], vert_size, TAG_VERTEX_BUFFER, 0, 0); - wait_on_mask(1 << TAG_VERTEX_BUFFER); - - - /* Handle attr[0] (position) specially: - * - * XXX: Computing the clipmask should be done in the vertex - * program as a set of DP4 instructions appended to the - * user-provided code. - */ - x = tmpOut->clip[0] = machine->Outputs[0].xyzw[0].f[j]; - y = tmpOut->clip[1] = machine->Outputs[0].xyzw[1].f[j]; - z = tmpOut->clip[2] = machine->Outputs[0].xyzw[2].f[j]; - w = tmpOut->clip[3] = machine->Outputs[0].xyzw[3].f[j]; - - tmpOut->clipmask = compute_clipmask(tmpOut->clip, draw->plane, - draw->nr_planes); - tmpOut->edgeflag = 1; - - /* divide by w */ - w = 1.0f / w; - x *= w; - y *= w; - z *= w; - - /* Viewport mapping */ - tmpOut->data[0][0] = x * scale[0] + trans[0]; - tmpOut->data[0][1] = y * scale[1] + trans[1]; - tmpOut->data[0][2] = z * scale[2] + trans[2]; - tmpOut->data[0][3] = w; - - /* Remaining attributes are packed into sequential post-transform - * vertex attrib slots. - */ - for (slot = 1; slot < draw->num_vs_outputs; slot++) { - tmpOut->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; - tmpOut->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; - tmpOut->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; - tmpOut->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; - } - - mfc_put(tmpOut, vOut[j], vert_size, TAG_VERTEX_BUFFER, 0, 0); - } /* loop over vertices */ -} - - -PIPE_ALIGN_VAR(16) unsigned char -immediates[(sizeof(float) * 4 * TGSI_EXEC_NUM_IMMEDIATES) + 32]; - - -void -spu_bind_vertex_shader(struct spu_vs_context *draw, - struct cell_shader_info *vs) -{ - const unsigned immediate_addr = vs->immediates; - const unsigned immediate_size = - ROUNDUP16((sizeof(float) * 4 * vs->num_immediates) - + (immediate_addr & 0x0f)); - - - mfc_get(immediates, immediate_addr & ~0x0f, immediate_size, - TAG_VERTEX_BUFFER, 0, 0); - - draw->machine.Instructions = (struct tgsi_full_instruction *) - vs->instructions; - draw->machine.NumInstructions = vs->num_instructions; - - draw->machine.Declarations = (struct tgsi_full_declaration *) - vs->declarations; - draw->machine.NumDeclarations = vs->num_declarations; - - draw->num_vs_outputs = vs->num_outputs; - - /* specify the shader to interpret/execute */ - spu_exec_machine_init(&draw->machine, - PIPE_MAX_SAMPLERS, - NULL /*samplers*/, - PIPE_SHADER_VERTEX); - - wait_on_mask(1 << TAG_VERTEX_BUFFER); - - (void) memcpy(& draw->machine.Imms, &immediates[immediate_addr & 0x0f], - sizeof(float) * 4 * vs->num_immediates); -} - - -void -spu_execute_vertex_shader(struct spu_vs_context *draw, - const struct cell_command_vs *vs) -{ - unsigned i; - - (void) memcpy(draw->plane, vs->plane, sizeof(float) * 4 * vs->nr_planes); - draw->nr_planes = vs->nr_planes; - draw->vertex_fetch.nr_attrs = vs->nr_attrs; - - for (i = 0; i < vs->num_elts; i += 4) { - const unsigned batch_size = MIN2(vs->num_elts - i, 4); - - run_vertex_program(draw, & vs->elts[i], batch_size, &vs->vOut[i]); - } -} diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.h b/src/gallium/drivers/cell/spu/spu_vertex_shader.h deleted file mode 100644 index 4c74f5e74d..0000000000 --- a/src/gallium/drivers/cell/spu/spu_vertex_shader.h +++ /dev/null @@ -1,66 +0,0 @@ -#ifndef SPU_VERTEX_SHADER_H -#define SPU_VERTEX_SHADER_H - -#include "cell/common.h" -#include "pipe/p_format.h" -#include "spu_exec.h" - -struct spu_vs_context; - -typedef void (*spu_full_fetch_func)( struct spu_vs_context *draw, - struct spu_exec_machine *machine, - const unsigned *elts, - unsigned count ); - -struct spu_vs_context { - struct pipe_viewport_state viewport; - - struct { - uint64_t src_ptr[PIPE_MAX_ATTRIBS]; - unsigned pitch[PIPE_MAX_ATTRIBS]; - unsigned size[PIPE_MAX_ATTRIBS]; - unsigned code_offset[PIPE_MAX_ATTRIBS]; - unsigned nr_attrs; - boolean dirty; - - spu_full_fetch_func fetch_func; - void *code; - } vertex_fetch; - - /* Clip derived state: - */ - float plane[12][4]; - unsigned nr_planes; - - struct spu_exec_machine machine; - const float (*constants)[4]; - - unsigned num_vs_outputs; -}; - -extern void spu_update_vertex_fetch(struct spu_vs_context *draw); - -static INLINE void spu_vertex_fetch(struct spu_vs_context *draw, - struct spu_exec_machine *machine, - const unsigned *elts, - unsigned count) -{ - if (draw->vertex_fetch.dirty) { - spu_update_vertex_fetch(draw); - draw->vertex_fetch.dirty = 0; - } - - (*draw->vertex_fetch.fetch_func)(draw, machine, elts, count); -} - -struct cell_command_vs; - -extern void -spu_bind_vertex_shader(struct spu_vs_context *draw, - struct cell_shader_info *vs); - -extern void -spu_execute_vertex_shader(struct spu_vs_context *draw, - const struct cell_command_vs *vs); - -#endif /* SPU_VERTEX_SHADER_H */ diff --git a/src/gallium/targets/libgl-xlib/Makefile b/src/gallium/targets/libgl-xlib/Makefile index f8f6c81b3f..867b2da323 100644 --- a/src/gallium/targets/libgl-xlib/Makefile +++ b/src/gallium/targets/libgl-xlib/Makefile @@ -29,7 +29,6 @@ DEFINES += \ -DGALLIUM_RBUG \ -DGALLIUM_TRACE \ -DGALLIUM_GALAHAD -#-DGALLIUM_CELL will be defined by the config */ XLIB_TARGET_SOURCES = \ xlib.c @@ -38,7 +37,6 @@ XLIB_TARGET_SOURCES = \ XLIB_TARGET_OBJECTS = $(XLIB_TARGET_SOURCES:.c=.o) -# Note: CELL_SPU_LIB is only defined for cell configs LIBS = \ $(GALLIUM_DRIVERS) \ @@ -50,7 +48,6 @@ LIBS = \ $(TOP)/src/mapi/glapi/libglapi.a \ $(TOP)/src/mesa/libmesagallium.a \ $(GALLIUM_AUXILIARIES) \ - $(CELL_SPU_LIB) \ # LLVM diff --git a/src/gallium/targets/libgl-xlib/SConscript b/src/gallium/targets/libgl-xlib/SConscript index ad8b0992e4..25a4582d7a 100644 --- a/src/gallium/targets/libgl-xlib/SConscript +++ b/src/gallium/targets/libgl-xlib/SConscript @@ -42,11 +42,6 @@ if True: if env['llvm']: env.Append(CPPDEFINES = ['GALLIUM_LLVMPIPE']) env.Prepend(LIBS = [llvmpipe]) - -if False: - # TODO: Detect Cell SDK - env.Append(CPPDEFINES = 'GALLIUM_CELL') - env.Prepend(LIBS = [cell]) # libGL.so.1.5 libgl_1_5 = env.SharedLibrary( diff --git a/src/gallium/targets/libgl-xlib/xlib.c b/src/gallium/targets/libgl-xlib/xlib.c index 1a5892b94a..0ede7e6096 100644 --- a/src/gallium/targets/libgl-xlib/xlib.c +++ b/src/gallium/targets/libgl-xlib/xlib.c @@ -42,7 +42,7 @@ /* Helper function to build a subset of a driver stack consisting of - * one of the software rasterizers (cell, llvmpipe, softpipe) and the + * one of the software rasterizers (llvmpipe, softpipe) and the * xlib winsys. */ static struct pipe_screen * |