diff options
author | Keith Whitwell <keithw@vmware.com> | 2009-12-21 19:18:41 +0000 |
---|---|---|
committer | Keith Whitwell <keithw@vmware.com> | 2009-12-21 19:18:41 +0000 |
commit | a5585cb533af3d4e5d5324d5f526447b98597402 (patch) | |
tree | 6706dbb8b4f994b919e247647c3e8853d067b45c /src/gallium/drivers | |
parent | d288a30610767f87e3e7c069730d4bc255246568 (diff) | |
parent | 574715d8368f99c0a5720a9676385d58d6cfdf30 (diff) |
Merge commit 'origin/master' into i965g-restart
Conflicts:
SConstruct
configs/default
configs/linux-dri
Diffstat (limited to 'src/gallium/drivers')
223 files changed, 29691 insertions, 3060 deletions
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index 19e3ab0844..4d43f65d29 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -237,8 +237,8 @@ is_register_src(struct codegen *gen, int channel, if (swizzle > TGSI_SWIZZLE_W || sign_op != TGSI_UTIL_SIGN_KEEP) { return FALSE; } - if (src->SrcRegister.File == TGSI_FILE_TEMPORARY || - src->SrcRegister.File == TGSI_FILE_IMMEDIATE) { + if (src->Register.File == TGSI_FILE_TEMPORARY || + src->Register.File == TGSI_FILE_IMMEDIATE) { return TRUE; } return FALSE; @@ -249,7 +249,7 @@ static boolean is_memory_dst(struct codegen *gen, int channel, const struct tgsi_full_dst_register *dst) { - if (dst->DstRegister.File == TGSI_FILE_OUTPUT) { + if (dst->Register.File == TGSI_FILE_OUTPUT) { return TRUE; } else { @@ -279,15 +279,15 @@ get_src_reg(struct codegen *gen, assert(swizzle <= TGSI_SWIZZLE_W); { - int index = src->SrcRegister.Index; + int index = src->Register.Index; assert(swizzle < 4); - if (src->SrcRegister.Indirect) { + if (src->Register.Indirect) { /* XXX unfinished */ } - switch (src->SrcRegister.File) { + switch (src->Register.File) { case TGSI_FILE_TEMPORARY: reg = gen->temp_regs[index][swizzle]; break; @@ -374,12 +374,12 @@ get_dst_reg(struct codegen *gen, { int reg = -1; - switch (dest->DstRegister.File) { + switch (dest->Register.File) { case TGSI_FILE_TEMPORARY: if (gen->if_nesting > 0 || gen->loop_nesting > 0) reg = get_itemp(gen); else - reg = gen->temp_regs[dest->DstRegister.Index][channel]; + reg = gen->temp_regs[dest->Register.Index][channel]; break; case TGSI_FILE_OUTPUT: reg = get_itemp(gen); @@ -419,10 +419,10 @@ store_dest_reg(struct codegen *gen, } #endif - switch (dest->DstRegister.File) { + switch (dest->Register.File) { case TGSI_FILE_TEMPORARY: if (gen->if_nesting > 0 || gen->loop_nesting > 0) { - int d_reg = gen->temp_regs[dest->DstRegister.Index][channel]; + int d_reg = gen->temp_regs[dest->Register.Index][channel]; int exec_reg = get_exec_mask_reg(gen); /* Mix d with new value according to exec mask: * d[i] = mask_reg[i] ? value_reg : d_reg @@ -437,7 +437,7 @@ store_dest_reg(struct codegen *gen, case TGSI_FILE_OUTPUT: { /* offset is measured in quadwords, not bytes */ - int offset = dest->DstRegister.Index * 4 + channel; + int offset = dest->Register.Index * 4 + channel; if (gen->if_nesting > 0 || gen->loop_nesting > 0) { int exec_reg = get_exec_mask_reg(gen); int curval_reg = get_itemp(gen); @@ -544,7 +544,7 @@ emit_epilogue(struct codegen *gen) #define FOR_EACH_ENABLED_CHANNEL(inst, ch) \ for (ch = 0; ch < 4; ch++) \ - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) + if (inst->Dst[0].Register.WriteMask & (1 << ch)) static boolean @@ -552,7 +552,7 @@ emit_ARL(struct codegen *gen, const struct tgsi_full_instruction *inst) { int ch = 0, src_reg, addr_reg; - src_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + src_reg = get_src_reg(gen, ch, &inst->Src[0]); addr_reg = get_address_reg(gen); /* convert float to int */ @@ -570,19 +570,19 @@ emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst) int ch, src_reg[4], dst_reg[4]; FOR_EACH_ENABLED_CHANNEL(inst, ch) { - src_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - dst_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + src_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); + dst_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); } FOR_EACH_ENABLED_CHANNEL(inst, ch) { - if (is_register_src(gen, ch, &inst->FullSrcRegisters[0]) && - is_memory_dst(gen, ch, &inst->FullDstRegisters[0])) { + if (is_register_src(gen, ch, &inst->Src[0]) && + is_memory_dst(gen, ch, &inst->Dst[0])) { /* special-case: register to memory store */ - store_dest_reg(gen, src_reg[ch], ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, src_reg[ch], ch, &inst->Dst[0]); } else { spe_move(gen->f, dst_reg[ch], src_reg[ch]); - store_dest_reg(gen, dst_reg[ch], ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, dst_reg[ch], ch, &inst->Dst[0]); } } @@ -601,9 +601,9 @@ emit_binop(struct codegen *gen, const struct tgsi_full_instruction *inst) /* Loop over Red/Green/Blue/Alpha channels, fetch src operands */ FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); + s2_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); } /* Loop over Red/Green/Blue/Alpha channels, do the op, store results */ @@ -626,7 +626,7 @@ emit_binop(struct codegen *gen, const struct tgsi_full_instruction *inst) /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */ FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); } /* Free any intermediate temps we allocated */ @@ -645,16 +645,16 @@ emit_MAD(struct codegen *gen, const struct tgsi_full_instruction *inst) int ch, s1_reg[4], s2_reg[4], s3_reg[4], d_reg[4]; FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); - s3_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); + s2_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]); + s3_reg[ch] = get_src_reg(gen, ch, &inst->Src[2]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); } FOR_EACH_ENABLED_CHANNEL(inst, ch) { spe_fma(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch], s3_reg[ch]); } FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); } free_itemps(gen); return TRUE; @@ -671,10 +671,10 @@ emit_LRP(struct codegen *gen, const struct tgsi_full_instruction *inst) /* setup/get src/dst/temp regs */ FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); - s3_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); + s2_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]); + s3_reg[ch] = get_src_reg(gen, ch, &inst->Src[2]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); tmp_reg[ch] = get_itemp(gen); } @@ -687,7 +687,7 @@ emit_LRP(struct codegen *gen, const struct tgsi_full_instruction *inst) spe_fma(gen->f, d_reg[ch], tmp_reg[ch], s1_reg[ch], s3_reg[ch]); } FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); } free_itemps(gen); return TRUE; @@ -704,8 +704,8 @@ emit_RCP_RSQ(struct codegen *gen, const struct tgsi_full_instruction *inst) int ch, s1_reg[4], d_reg[4], tmp_reg[4]; FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); tmp_reg[ch] = get_itemp(gen); } @@ -726,7 +726,7 @@ emit_RCP_RSQ(struct codegen *gen, const struct tgsi_full_instruction *inst) } FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); } free_itemps(gen); @@ -747,8 +747,8 @@ emit_ABS(struct codegen *gen, const struct tgsi_full_instruction *inst) spe_load_uint(gen->f, bit31mask_reg, (1 << 31)); FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); } /* d = sign bit cleared in s1 */ @@ -757,7 +757,7 @@ emit_ABS(struct codegen *gen, const struct tgsi_full_instruction *inst) } FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); } free_itemps(gen); @@ -775,12 +775,12 @@ emit_DP3(struct codegen *gen, const struct tgsi_full_instruction *inst) int s2x_reg, s2y_reg, s2z_reg; int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen); - s1x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); - s2x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); - s1y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); - s2y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]); - s1z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]); - s2z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]); + s1x_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]); + s2x_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]); + s1y_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]); + s2y_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]); + s1z_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]); + s2z_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]); /* t0 = x0 * x1 */ spe_fm(gen->f, t0_reg, s1x_reg, s2x_reg); @@ -795,9 +795,9 @@ emit_DP3(struct codegen *gen, const struct tgsi_full_instruction *inst) spe_fa(gen->f, t0_reg, t0_reg, t1_reg); FOR_EACH_ENABLED_CHANNEL(inst, ch) { - int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]); spe_move(gen->f, d_reg, t0_reg); - store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, d_reg, ch, &inst->Dst[0]); } free_itemps(gen); @@ -815,14 +815,14 @@ emit_DP4(struct codegen *gen, const struct tgsi_full_instruction *inst) int s1x_reg, s1y_reg, s1z_reg, s1w_reg; int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen); - s0x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); - s1x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); - s0y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); - s1y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]); - s0z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]); - s1z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]); - s0w_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[0]); - s1w_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[1]); + s0x_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]); + s1x_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]); + s0y_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]); + s1y_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]); + s0z_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]); + s1z_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]); + s0w_reg = get_src_reg(gen, CHAN_W, &inst->Src[0]); + s1w_reg = get_src_reg(gen, CHAN_W, &inst->Src[1]); /* t0 = x0 * x1 */ spe_fm(gen->f, t0_reg, s0x_reg, s1x_reg); @@ -840,9 +840,9 @@ emit_DP4(struct codegen *gen, const struct tgsi_full_instruction *inst) spe_fa(gen->f, t0_reg, t0_reg, t1_reg); FOR_EACH_ENABLED_CHANNEL(inst, ch) { - int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]); spe_move(gen->f, d_reg, t0_reg); - store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, d_reg, ch, &inst->Dst[0]); } free_itemps(gen); @@ -857,31 +857,31 @@ emit_DPH(struct codegen *gen, const struct tgsi_full_instruction *inst) { /* XXX rewrite this function to look more like DP3/DP4 */ int ch; - int s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); - int s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); + int s1_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]); + int s2_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]); int tmp_reg = get_itemp(gen); /* t = x0 * x1 */ spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); - s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); - s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]); + s1_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]); + s2_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]); /* t = y0 * y1 + t */ spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); - s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]); - s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]); + s1_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]); + s2_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]); /* t = z0 * z1 + t */ spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); - s2_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[1]); + s2_reg = get_src_reg(gen, CHAN_W, &inst->Src[1]); /* t = w1 + t */ spe_fa(gen->f, tmp_reg, s2_reg, tmp_reg); FOR_EACH_ENABLED_CHANNEL(inst, ch) { - int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]); spe_move(gen->f, d_reg, tmp_reg); - store_dest_reg(gen, tmp_reg, ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, tmp_reg, ch, &inst->Dst[0]); } free_itemps(gen); @@ -898,9 +898,9 @@ emit_NRM3(struct codegen *gen, const struct tgsi_full_instruction *inst) int src_reg[3]; int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen); - src_reg[0] = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); - src_reg[1] = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); - src_reg[2] = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]); + src_reg[0] = get_src_reg(gen, CHAN_X, &inst->Src[0]); + src_reg[1] = get_src_reg(gen, CHAN_Y, &inst->Src[0]); + src_reg[2] = get_src_reg(gen, CHAN_Z, &inst->Src[0]); /* t0 = x * x */ spe_fm(gen->f, t0_reg, src_reg[0], src_reg[0]); @@ -919,10 +919,10 @@ emit_NRM3(struct codegen *gen, const struct tgsi_full_instruction *inst) spe_fi(gen->f, t1_reg, t0_reg, t1_reg); FOR_EACH_ENABLED_CHANNEL(inst, ch) { - int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]); /* dst = src[ch] * t1 */ spe_fm(gen->f, d_reg, src_reg[ch], t1_reg); - store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, d_reg, ch, &inst->Dst[0]); } free_itemps(gen); @@ -936,48 +936,48 @@ emit_NRM3(struct codegen *gen, const struct tgsi_full_instruction *inst) static boolean emit_XPD(struct codegen *gen, const struct tgsi_full_instruction *inst) { - int s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]); - int s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]); + int s1_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]); + int s2_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]); int tmp_reg = get_itemp(gen); /* t = z0 * y1 */ spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); - s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); - s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]); + s1_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]); + s2_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]); /* t = y0 * z1 - t */ spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_X)) { - store_dest_reg(gen, tmp_reg, CHAN_X, &inst->FullDstRegisters[0]); + if (inst->Dst[0].Register.WriteMask & (1 << CHAN_X)) { + store_dest_reg(gen, tmp_reg, CHAN_X, &inst->Dst[0]); } - s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); - s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]); + s1_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]); + s2_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]); /* t = x0 * z1 */ spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); - s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]); - s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); + s1_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]); + s2_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]); /* t = z0 * x1 - t */ spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_Y)) { - store_dest_reg(gen, tmp_reg, CHAN_Y, &inst->FullDstRegisters[0]); + if (inst->Dst[0].Register.WriteMask & (1 << CHAN_Y)) { + store_dest_reg(gen, tmp_reg, CHAN_Y, &inst->Dst[0]); } - s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); - s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); + s1_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]); + s2_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]); /* t = y0 * x1 */ spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); - s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); - s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]); + s1_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]); + s2_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]); /* t = x0 * y1 - t */ spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_Z)) { - store_dest_reg(gen, tmp_reg, CHAN_Z, &inst->FullDstRegisters[0]); + if (inst->Dst[0].Register.WriteMask & (1 << CHAN_Z)) { + store_dest_reg(gen, tmp_reg, CHAN_Z, &inst->Dst[0]); } free_itemps(gen); @@ -1000,9 +1000,9 @@ emit_inequality(struct codegen *gen, const struct tgsi_full_instruction *inst) one_reg = get_const_one_reg(gen); FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); + s2_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); } FOR_EACH_ENABLED_CHANNEL(inst, ch) { @@ -1043,7 +1043,7 @@ emit_inequality(struct codegen *gen, const struct tgsi_full_instruction *inst) } FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); } free_itemps(gen); @@ -1060,10 +1060,10 @@ emit_CMP(struct codegen *gen, const struct tgsi_full_instruction *inst) int ch; FOR_EACH_ENABLED_CHANNEL(inst, ch) { - int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); - int s3_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]); - int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + int s1_reg = get_src_reg(gen, ch, &inst->Src[0]); + int s2_reg = get_src_reg(gen, ch, &inst->Src[1]); + int s3_reg = get_src_reg(gen, ch, &inst->Src[2]); + int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]); int zero_reg = get_itemp(gen); spe_zero(gen->f, zero_reg); @@ -1072,7 +1072,7 @@ emit_CMP(struct codegen *gen, const struct tgsi_full_instruction *inst) spe_fcgt(gen->f, d_reg, zero_reg, s1_reg); spe_selb(gen->f, d_reg, s3_reg, s2_reg, d_reg); - store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, d_reg, ch, &inst->Dst[0]); free_itemps(gen); } @@ -1090,8 +1090,8 @@ emit_TRUNC(struct codegen *gen, const struct tgsi_full_instruction *inst) int ch, s1_reg[4], d_reg[4]; FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); } /* Convert float to int */ @@ -1105,7 +1105,7 @@ emit_TRUNC(struct codegen *gen, const struct tgsi_full_instruction *inst) } FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); } free_itemps(gen); @@ -1129,8 +1129,8 @@ emit_FLR(struct codegen *gen, const struct tgsi_full_instruction *inst) one_reg = get_const_one_reg(gen); FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); tmp_reg[ch] = get_itemp(gen); } @@ -1156,7 +1156,7 @@ emit_FLR(struct codegen *gen, const struct tgsi_full_instruction *inst) } FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); } free_itemps(gen); @@ -1177,8 +1177,8 @@ emit_FRC(struct codegen *gen, const struct tgsi_full_instruction *inst) one_reg = get_const_one_reg(gen); FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); tmp_reg[ch] = get_itemp(gen); } @@ -1210,7 +1210,7 @@ emit_FRC(struct codegen *gen, const struct tgsi_full_instruction *inst) /* store result */ FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); } free_itemps(gen); @@ -1272,7 +1272,7 @@ emit_function_call(struct codegen *gen, if (scalar) { for (a = 0; a < num_args; a++) { - s_regs[a] = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[a]); + s_regs[a] = get_src_reg(gen, CHAN_X, &inst->Src[a]); } /* we'll call the function, put the return value in this register, * then replicate it across all write-enabled components in d_reg. @@ -1287,11 +1287,11 @@ emit_function_call(struct codegen *gen, if (!scalar) { for (a = 0; a < num_args; a++) { - s_regs[a] = get_src_reg(gen, ch, &inst->FullSrcRegisters[a]); + s_regs[a] = get_src_reg(gen, ch, &inst->Src[a]); } } - d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + d_reg = get_dst_reg(gen, ch, &inst->Dst[0]); if (!scalar || !func_called) { /* for a scalar function, we'll really only call the function once */ @@ -1336,7 +1336,7 @@ emit_function_call(struct codegen *gen, spe_move(gen->f, d_reg, retval_reg); } - store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, d_reg, ch, &inst->Dst[0]); free_itemps(gen); } @@ -1352,7 +1352,7 @@ static boolean emit_TEX(struct codegen *gen, const struct tgsi_full_instruction *inst) { const uint target = inst->InstructionExtTexture.Texture; - const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; + const uint unit = inst->Src[1].Register.Index; uint addr; int ch; int coord_regs[4], d_regs[4]; @@ -1373,14 +1373,14 @@ emit_TEX(struct codegen *gen, const struct tgsi_full_instruction *inst) return FALSE; } - assert(inst->FullSrcRegisters[1].SrcRegister.File == TGSI_FILE_SAMPLER); + assert(inst->Src[1].Register.File == TGSI_FILE_SAMPLER); spe_comment(gen->f, -4, "CALL tex:"); /* get src/dst reg info */ for (ch = 0; ch < 4; ch++) { - coord_regs[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - d_regs[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + coord_regs[ch] = get_src_reg(gen, ch, &inst->Src[0]); + d_regs[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); } { @@ -1425,7 +1425,7 @@ emit_TEX(struct codegen *gen, const struct tgsi_full_instruction *inst) } FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_regs[ch], ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, d_regs[ch], ch, &inst->Dst[0]); free_itemps(gen); } @@ -1452,7 +1452,7 @@ emit_KIL(struct codegen *gen, const struct tgsi_full_instruction *inst) /* get src regs */ FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s_regs[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + s_regs[ch] = get_src_reg(gen, ch, &inst->Src[0]); } /* test if any src regs are < 0 */ @@ -1500,9 +1500,9 @@ emit_MIN_MAX(struct codegen *gen, const struct tgsi_full_instruction *inst) int ch, s0_reg[4], s1_reg[4], d_reg[4], tmp_reg[4]; FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s0_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + s0_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); + s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); tmp_reg[ch] = get_itemp(gen); } @@ -1518,7 +1518,7 @@ emit_MIN_MAX(struct codegen *gen, const struct tgsi_full_instruction *inst) } FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); } free_itemps(gen); @@ -1575,7 +1575,7 @@ emit_IF(struct codegen *gen, const struct tgsi_full_instruction *inst) /* update conditional execution mask with the predicate register */ int tmp_reg = get_itemp(gen); - int s1_reg = get_src_reg(gen, channel, &inst->FullSrcRegisters[0]); + int s1_reg = get_src_reg(gen, channel, &inst->Src[0]); /* tmp = (s1_reg == 0) */ spe_ceqi(gen->f, tmp_reg, s1_reg, 0); @@ -1699,8 +1699,8 @@ emit_DDX_DDY(struct codegen *gen, const struct tgsi_full_instruction *inst, int ch; FOR_EACH_ENABLED_CHANNEL(inst, ch) { - int s_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + int s_reg = get_src_reg(gen, ch, &inst->Src[0]); + int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]); int t1_reg = get_itemp(gen); int t2_reg = get_itemp(gen); @@ -1909,8 +1909,8 @@ emit_declaration(struct cell_context *cell, switch (decl->Declaration.File) { case TGSI_FILE_TEMPORARY: - for (i = decl->DeclarationRange.First; - i <= decl->DeclarationRange.Last; + for (i = decl->Range.First; + i <= decl->Range.Last; i++) { assert(i < MAX_TEMPS); for (ch = 0; ch < 4; ch++) { diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c index ccd0fef6e8..c18a5d0635 100644 --- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c +++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c @@ -383,10 +383,10 @@ cell_init_state_functions(struct cell_context *cell) cell->pipe.delete_blend_state = cell_delete_blend_state; cell->pipe.create_sampler_state = cell_create_sampler_state; - cell->pipe.bind_sampler_states = cell_bind_sampler_states; + cell->pipe.bind_fragment_sampler_states = cell_bind_sampler_states; cell->pipe.delete_sampler_state = cell_delete_sampler_state; - cell->pipe.set_sampler_textures = cell_set_sampler_textures; + cell->pipe.set_fragment_sampler_textures = cell_set_sampler_textures; cell->pipe.create_depth_stencil_alpha_state = cell_create_depth_stencil_alpha_state; cell->pipe.bind_depth_stencil_alpha_state = cell_bind_depth_stencil_alpha_state; diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index 9479c0898f..ac5fafec1a 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -27,6 +27,7 @@ #include "pipe/p_inlines.h" #include "util/u_memory.h" +#include "util/u_math.h" #include "cell_context.h" #include "cell_gen_fragment.h" #include "cell_state.h" @@ -299,9 +300,9 @@ cell_emit_state(struct cell_context *cell) for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) { texture->start[level] = (ct->mapped + ct->level_offset[level]); - texture->width[level] = ct->base.width[level]; - texture->height[level] = ct->base.height[level]; - texture->depth[level] = ct->base.depth[level]; + texture->width[level] = u_minify(ct->base.width0, level); + texture->height[level] = u_minify(ct->base.height0, level); + texture->depth[level] = u_minify(ct->base.depth0, level); } texture->target = ct->base.target; } diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index ae4c61efb3..e6b8a87045 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -49,9 +49,9 @@ cell_texture_layout(struct cell_texture *ct) { struct pipe_texture *pt = &ct->base; unsigned level; - unsigned width = pt->width[0]; - unsigned height = pt->height[0]; - unsigned depth = pt->depth[0]; + unsigned width = pt->width0; + unsigned height = pt->height0; + unsigned depth = pt->depth0; ct->buffer_size = 0; @@ -65,9 +65,6 @@ cell_texture_layout(struct cell_texture *ct) w_tile = align(width, TILE_SIZE); h_tile = align(height, TILE_SIZE); - pt->width[level] = width; - pt->height[level] = height; - pt->depth[level] = depth; pt->nblocksx[level] = pf_get_nblocksx(&pt->block, w_tile); pt->nblocksy[level] = pf_get_nblocksy(&pt->block, h_tile); @@ -83,9 +80,9 @@ cell_texture_layout(struct cell_texture *ct) ct->buffer_size += size; - width = minify(width); - height = minify(height); - depth = minify(depth); + width = u_minify(width, 1); + height = u_minify(height, 1); + depth = u_minify(depth, 1); } } @@ -276,8 +273,8 @@ cell_get_tex_surface(struct pipe_screen *screen, pipe_reference_init(&ps->reference, 1); pipe_texture_reference(&ps->texture, pt); ps->format = pt->format; - ps->width = pt->width[level]; - ps->height = pt->height[level]; + ps->width = u_minify(pt->width0, level); + ps->height = u_minify(pt->height0, level); ps->offset = ct->level_offset[level]; /* XXX may need to override usage flags (see sp_texture.c) */ ps->usage = usage; @@ -386,8 +383,8 @@ cell_transfer_map(struct pipe_screen *screen, struct pipe_transfer *transfer) struct pipe_texture *pt = transfer->texture; struct cell_texture *ct = cell_texture(pt); const uint level = ctrans->base.level; - const uint texWidth = pt->width[level]; - const uint texHeight = pt->height[level]; + const uint texWidth = u_minify(pt->width0, level); + const uint texHeight = u_minify(pt->height0, level); const uint stride = ct->stride[level]; unsigned size; @@ -440,8 +437,8 @@ cell_transfer_unmap(struct pipe_screen *screen, struct pipe_texture *pt = transfer->texture; struct cell_texture *ct = cell_texture(pt); const uint level = ctrans->base.level; - const uint texWidth = pt->width[level]; - const uint texHeight = pt->height[level]; + const uint texWidth = u_minify(pt->width0, level); + const uint texHeight = u_minify(pt->height0, level); const uint stride = ct->stride[level]; if (!ct->mapped) { diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c index 4c32b2d06d..5ed330aa6e 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -108,10 +108,10 @@ for (CHAN = 0; CHAN < 4; CHAN++) #define IS_CHANNEL_ENABLED(INST, CHAN)\ - ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) + ((INST).Dst[0].Register.WriteMask & (1 << (CHAN))) #define IS_CHANNEL_ENABLED2(INST, CHAN)\ - ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN))) + ((INST).Dst[1].Register.WriteMask & (1 << (CHAN))) #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\ FOR_EACH_CHANNEL( CHAN )\ @@ -431,22 +431,22 @@ fetch_source( index.i[0] = index.i[1] = index.i[2] = - index.i[3] = reg->SrcRegister.Index; + index.i[3] = reg->Register.Index; - if (reg->SrcRegister.Indirect) { + if (reg->Register.Indirect) { union spu_exec_channel index2; union spu_exec_channel indir_index; index2.i[0] = index2.i[1] = index2.i[2] = - index2.i[3] = reg->SrcRegisterInd.Index; + index2.i[3] = reg->Indirect.Index; - swizzle = tgsi_util_get_src_register_swizzle(®->SrcRegisterInd, + swizzle = tgsi_util_get_src_register_swizzle(®->Indirect, CHAN_X); fetch_src_file_channel( mach, - reg->SrcRegisterInd.File, + reg->Indirect.File, swizzle, &index2, &indir_index ); @@ -454,8 +454,8 @@ fetch_source( index.q = si_a(index.q, indir_index.q); } - if( reg->SrcRegister.Dimension ) { - switch( reg->SrcRegister.File ) { + if( reg->Register.Dimension ) { + switch( reg->Register.File ) { case TGSI_FILE_INPUT: index.q = si_mpyi(index.q, 17); break; @@ -466,24 +466,24 @@ fetch_source( ASSERT( 0 ); } - index.i[0] += reg->SrcRegisterDim.Index; - index.i[1] += reg->SrcRegisterDim.Index; - index.i[2] += reg->SrcRegisterDim.Index; - index.i[3] += reg->SrcRegisterDim.Index; + index.i[0] += reg->Dimension.Index; + index.i[1] += reg->Dimension.Index; + index.i[2] += reg->Dimension.Index; + index.i[3] += reg->Dimension.Index; - if (reg->SrcRegisterDim.Indirect) { + if (reg->Dimension.Indirect) { union spu_exec_channel index2; union spu_exec_channel indir_index; index2.i[0] = index2.i[1] = index2.i[2] = - index2.i[3] = reg->SrcRegisterDimInd.Index; + index2.i[3] = reg->DimIndirect.Index; - swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterDimInd, CHAN_X ); + swizzle = tgsi_util_get_src_register_swizzle( ®->DimIndirect, CHAN_X ); fetch_src_file_channel( mach, - reg->SrcRegisterDimInd.File, + reg->DimIndirect.File, swizzle, &index2, &indir_index ); @@ -495,7 +495,7 @@ fetch_source( swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); fetch_src_file_channel( mach, - reg->SrcRegister.File, + reg->Register.File, swizzle, &index, chan ); @@ -517,7 +517,7 @@ fetch_source( break; } - if (reg->SrcRegisterExtMod.Complement) { + if (reg->RegisterExtMod.Complement) { chan->q = si_fs(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, chan->q); } } @@ -532,21 +532,21 @@ store_dest( { union spu_exec_channel *dst; - switch( reg->DstRegister.File ) { + switch( reg->Register.File ) { case TGSI_FILE_NULL: return; case TGSI_FILE_OUTPUT: dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] - + reg->DstRegister.Index].xyzw[chan_index]; + + reg->Register.Index].xyzw[chan_index]; break; case TGSI_FILE_TEMPORARY: - dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index]; + dst = &mach->Temps[reg->Register.Index].xyzw[chan_index]; break; case TGSI_FILE_ADDRESS: - dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index]; + dst = &mach->Addrs[reg->Register.Index].xyzw[chan_index]; break; default: @@ -583,10 +583,10 @@ store_dest( } #define FETCH(VAL,INDEX,CHAN)\ - fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN) + fetch_source (mach, VAL, &inst->Src[INDEX], CHAN) #define STORE(VAL,INDEX,CHAN)\ - store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN ) + store_dest (mach, VAL, &inst->Dst[INDEX], inst, CHAN ) /** @@ -612,7 +612,7 @@ exec_kil(struct spu_exec_machine *mach, /* unswizzle channel */ swizzle = tgsi_util_get_full_src_register_swizzle ( - &inst->FullSrcRegisters[0], + &inst->Src[0], chan_index); /* check if the component has not been already tested */ @@ -677,7 +677,7 @@ exec_tex(struct spu_exec_machine *mach, const struct tgsi_full_instruction *inst, boolean biasLod, boolean projected) { - const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; + const uint unit = inst->Src[1].Register.Index; union spu_exec_channel r[8]; uint chan_index; float lodBias; @@ -833,8 +833,8 @@ exec_declaration(struct spu_exec_machine *mach, unsigned first, last, mask; interpolation_func interp; - first = decl->DeclarationRange.First; - last = decl->DeclarationRange.Last; + first = decl->Range.First; + last = decl->Range.Last; mask = decl->Declaration.UsageMask; switch( decl->Declaration.Interpolate ) { diff --git a/src/gallium/drivers/cell/spu/spu_util.c b/src/gallium/drivers/cell/spu/spu_util.c index c2c32b22d5..24057e29e3 100644 --- a/src/gallium/drivers/cell/spu/spu_util.c +++ b/src/gallium/drivers/cell/spu/spu_util.c @@ -33,7 +33,7 @@ tgsi_util_get_full_src_register_swizzle( unsigned component ) { return tgsi_util_get_src_register_swizzle( - reg->SrcRegister, + reg->Register, component ); } @@ -45,10 +45,10 @@ tgsi_util_get_full_src_register_sign_mode( { unsigned sign_mode; - if( reg->SrcRegisterExtMod.Absolute ) { + if( reg->RegisterExtMod.Absolute ) { /* Consider only the post-abs negation. */ - if( reg->SrcRegisterExtMod.Negate ) { + if( reg->RegisterExtMod.Negate ) { sign_mode = TGSI_UTIL_SIGN_SET; } else { @@ -60,8 +60,8 @@ tgsi_util_get_full_src_register_sign_mode( unsigned negate; - negate = reg->SrcRegister.Negate; - if( reg->SrcRegisterExtMod.Negate ) { + negate = reg->Register.Negate; + if( reg->RegisterExtMod.Negate ) { negate = !negate; } diff --git a/src/gallium/drivers/failover/fo_context.h b/src/gallium/drivers/failover/fo_context.h index 9ba86ba866..149393712a 100644 --- a/src/gallium/drivers/failover/fo_context.h +++ b/src/gallium/drivers/failover/fo_context.h @@ -72,6 +72,7 @@ struct failover_context { */ const struct fo_state *blend; const struct fo_state *sampler[PIPE_MAX_SAMPLERS]; + const struct fo_state *vertex_samplers[PIPE_MAX_VERTEX_SAMPLERS]; const struct fo_state *depth_stencil; const struct fo_state *rasterizer; const struct fo_state *fragment_shader; @@ -83,6 +84,7 @@ struct failover_context { struct pipe_poly_stipple poly_stipple; struct pipe_scissor_state scissor; struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; + struct pipe_texture *vertex_textures[PIPE_MAX_VERTEX_SAMPLERS]; struct pipe_viewport_state viewport; struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS]; struct pipe_vertex_element vertex_elements[PIPE_MAX_ATTRIBS]; @@ -92,11 +94,15 @@ struct failover_context { void *sw_sampler_state[PIPE_MAX_SAMPLERS]; void *hw_sampler_state[PIPE_MAX_SAMPLERS]; + void *sw_vertex_sampler_state[PIPE_MAX_VERTEX_SAMPLERS]; + void *hw_vertex_sampler_state[PIPE_MAX_VERTEX_SAMPLERS]; unsigned dirty; unsigned num_samplers; + unsigned num_vertex_samplers; unsigned num_textures; + unsigned num_vertex_textures; unsigned mode; struct pipe_context *hw; diff --git a/src/gallium/drivers/failover/fo_state.c b/src/gallium/drivers/failover/fo_state.c index c8eb926299..3f5f556032 100644 --- a/src/gallium/drivers/failover/fo_state.c +++ b/src/gallium/drivers/failover/fo_state.c @@ -322,8 +322,9 @@ failover_create_sampler_state(struct pipe_context *pipe, } static void -failover_bind_sampler_states(struct pipe_context *pipe, - unsigned num, void **sampler) +failover_bind_fragment_sampler_states(struct pipe_context *pipe, + unsigned num, + void **sampler) { struct failover_context *failover = failover_context(pipe); struct fo_state *state = (struct fo_state*)sampler; @@ -339,10 +340,40 @@ failover_bind_sampler_states(struct pipe_context *pipe, } failover->dirty |= FO_NEW_SAMPLER; failover->num_samplers = num; - failover->sw->bind_sampler_states(failover->sw, num, - failover->sw_sampler_state); - failover->hw->bind_sampler_states(failover->hw, num, - failover->hw_sampler_state); + failover->sw->bind_fragment_sampler_states(failover->sw, num, + failover->sw_sampler_state); + failover->hw->bind_fragment_sampler_states(failover->hw, num, + failover->hw_sampler_state); +} + +static void +failover_bind_vertex_sampler_states(struct pipe_context *pipe, + unsigned num_samplers, + void **samplers) +{ + struct failover_context *failover = failover_context(pipe); + struct fo_state *state = (struct fo_state*)samplers; + uint i; + + assert(num_samplers <= PIPE_MAX_VERTEX_SAMPLERS); + + /* Check for no-op */ + if (num_samplers == failover->num_vertex_samplers && + !memcmp(failover->vertex_samplers, samplers, num_samplers * sizeof(void *))) { + return; + } + for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { + failover->sw_vertex_sampler_state[i] = i < num_samplers ? state[i].sw_state : NULL; + failover->hw_vertex_sampler_state[i] = i < num_samplers ? state[i].hw_state : NULL; + } + failover->dirty |= FO_NEW_SAMPLER; + failover->num_vertex_samplers = num_samplers; + failover->sw->bind_vertex_sampler_states(failover->sw, + num_samplers, + failover->sw_vertex_sampler_state); + failover->hw->bind_vertex_sampler_states(failover->hw, + num_samplers, + failover->hw_vertex_sampler_state); } static void @@ -360,9 +391,9 @@ failover_delete_sampler_state(struct pipe_context *pipe, void *sampler) static void -failover_set_sampler_textures(struct pipe_context *pipe, - unsigned num, - struct pipe_texture **texture) +failover_set_fragment_sampler_textures(struct pipe_context *pipe, + unsigned num, + struct pipe_texture **texture) { struct failover_context *failover = failover_context(pipe); uint i; @@ -381,8 +412,38 @@ failover_set_sampler_textures(struct pipe_context *pipe, NULL); failover->dirty |= FO_NEW_TEXTURE; failover->num_textures = num; - failover->sw->set_sampler_textures( failover->sw, num, texture ); - failover->hw->set_sampler_textures( failover->hw, num, texture ); + failover->sw->set_fragment_sampler_textures( failover->sw, num, texture ); + failover->hw->set_fragment_sampler_textures( failover->hw, num, texture ); +} + + +static void +failover_set_vertex_sampler_textures(struct pipe_context *pipe, + unsigned num_textures, + struct pipe_texture **textures) +{ + struct failover_context *failover = failover_context(pipe); + uint i; + + assert(num_textures <= PIPE_MAX_VERTEX_SAMPLERS); + + /* Check for no-op */ + if (num_textures == failover->num_vertex_textures && + !memcmp(failover->vertex_textures, textures, num_textures * sizeof(struct pipe_texture *))) { + return; + } + for (i = 0; i < num_textures; i++) { + pipe_texture_reference((struct pipe_texture **)&failover->vertex_textures[i], + textures[i]); + } + for (i = num_textures; i < failover->num_vertex_textures; i++) { + pipe_texture_reference((struct pipe_texture **)&failover->vertex_textures[i], + NULL); + } + failover->dirty |= FO_NEW_TEXTURE; + failover->num_vertex_textures = num_textures; + failover->sw->set_vertex_sampler_textures(failover->sw, num_textures, textures); + failover->hw->set_vertex_sampler_textures(failover->hw, num_textures, textures); } @@ -453,7 +514,8 @@ failover_init_state_functions( struct failover_context *failover ) failover->pipe.bind_blend_state = failover_bind_blend_state; failover->pipe.delete_blend_state = failover_delete_blend_state; failover->pipe.create_sampler_state = failover_create_sampler_state; - failover->pipe.bind_sampler_states = failover_bind_sampler_states; + failover->pipe.bind_fragment_sampler_states = failover_bind_fragment_sampler_states; + failover->pipe.bind_vertex_sampler_states = failover_bind_vertex_sampler_states; failover->pipe.delete_sampler_state = failover_delete_sampler_state; failover->pipe.create_depth_stencil_alpha_state = failover_create_depth_stencil_state; failover->pipe.bind_depth_stencil_alpha_state = failover_bind_depth_stencil_state; @@ -473,7 +535,8 @@ failover_init_state_functions( struct failover_context *failover ) failover->pipe.set_framebuffer_state = failover_set_framebuffer_state; failover->pipe.set_polygon_stipple = failover_set_polygon_stipple; failover->pipe.set_scissor_state = failover_set_scissor_state; - failover->pipe.set_sampler_textures = failover_set_sampler_textures; + failover->pipe.set_fragment_sampler_textures = failover_set_fragment_sampler_textures; + failover->pipe.set_vertex_sampler_textures = failover_set_vertex_sampler_textures; failover->pipe.set_viewport_state = failover_set_viewport_state; failover->pipe.set_vertex_buffers = failover_set_vertex_buffers; failover->pipe.set_vertex_elements = failover_set_vertex_elements; diff --git a/src/gallium/drivers/failover/fo_state_emit.c b/src/gallium/drivers/failover/fo_state_emit.c index bd4fce9d20..a3341e33f8 100644 --- a/src/gallium/drivers/failover/fo_state_emit.c +++ b/src/gallium/drivers/failover/fo_state_emit.c @@ -92,13 +92,19 @@ failover_state_emit( struct failover_context *failover ) failover->sw->set_viewport_state( failover->sw, &failover->viewport ); if (failover->dirty & FO_NEW_SAMPLER) { - failover->sw->bind_sampler_states( failover->sw, failover->num_samplers, - failover->sw_sampler_state ); + failover->sw->bind_fragment_sampler_states( failover->sw, failover->num_samplers, + failover->sw_sampler_state ); + failover->sw->bind_vertex_sampler_states(failover->sw, + failover->num_vertex_samplers, + failover->sw_vertex_sampler_state); } if (failover->dirty & FO_NEW_TEXTURE) { - failover->sw->set_sampler_textures( failover->sw, failover->num_textures, - failover->texture ); + failover->sw->set_fragment_sampler_textures( failover->sw, failover->num_textures, + failover->texture ); + failover->sw->set_vertex_sampler_textures(failover->sw, + failover->num_vertex_textures, + failover->vertex_textures); } if (failover->dirty & FO_NEW_VERTEX_BUFFER) { diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c index e745f3342d..94c8aee30f 100644 --- a/src/gallium/drivers/i915/i915_context.c +++ b/src/gallium/drivers/i915/i915_context.c @@ -155,15 +155,11 @@ static unsigned int i915_is_buffer_referenced(struct pipe_context *pipe, struct pipe_buffer *buf) { - /** - * FIXME: Return the corrent result. We can't alays return referenced - * since it causes a double flush within the vbo module. + /* + * Since we never expose hardware buffers to the state tracker + * they can never be referenced, so this isn't a lie */ -#if 0 - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; -#else return 0; -#endif } diff --git a/src/gallium/drivers/i915/i915_debug.c b/src/gallium/drivers/i915/i915_debug.c index e6640e587b..c6e6d6fd31 100644 --- a/src/gallium/drivers/i915/i915_debug.c +++ b/src/gallium/drivers/i915/i915_debug.c @@ -851,6 +851,7 @@ static boolean i915_debug_packet( struct debug_stream *stream ) default: return debug(stream, "", 0); } + break; default: assert(0); return 0; diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c index 379d47e79a..25c53210be 100644 --- a/src/gallium/drivers/i915/i915_fpc_translate.c +++ b/src/gallium/drivers/i915/i915_fpc_translate.c @@ -143,12 +143,12 @@ static uint src_vector(struct i915_fp_compile *p, const struct tgsi_full_src_register *source) { - uint index = source->SrcRegister.Index; + uint index = source->Register.Index; uint src = 0, sem_name, sem_ind; - switch (source->SrcRegister.File) { + switch (source->Register.File) { case TGSI_FILE_TEMPORARY: - if (source->SrcRegister.Index >= I915_MAX_TEMPORARY) { + if (source->Register.Index >= I915_MAX_TEMPORARY) { i915_program_error(p, "Exceeded max temporary reg"); return 0; } @@ -215,26 +215,25 @@ src_vector(struct i915_fp_compile *p, } src = swizzle(src, - source->SrcRegister.SwizzleX, - source->SrcRegister.SwizzleY, - source->SrcRegister.SwizzleZ, - source->SrcRegister.SwizzleW); + source->Register.SwizzleX, + source->Register.SwizzleY, + source->Register.SwizzleZ, + source->Register.SwizzleW); /* There's both negate-all-components and per-component negation. * Try to handle both here. */ { - int n = source->SrcRegister.Negate; + int n = source->Register.Negate; src = negate(src, n, n, n, n); } - /* no abs() or post-abs negation */ + /* no abs() */ #if 0 /* XXX assertions disabled to allow arbfplight.c to run */ /* XXX enable these assertions, or fix things */ - assert(!source->SrcRegisterExtMod.Absolute); - assert(!source->SrcRegisterExtMod.Negate); + assert(!source->Register.Absolute); #endif return src; } @@ -247,10 +246,10 @@ static uint get_result_vector(struct i915_fp_compile *p, const struct tgsi_full_dst_register *dest) { - switch (dest->DstRegister.File) { + switch (dest->Register.File) { case TGSI_FILE_OUTPUT: { - uint sem_name = p->shader->info.output_semantic_name[dest->DstRegister.Index]; + uint sem_name = p->shader->info.output_semantic_name[dest->Register.Index]; switch (sem_name) { case TGSI_SEMANTIC_POSITION: return UREG(REG_TYPE_OD, 0); @@ -262,7 +261,7 @@ get_result_vector(struct i915_fp_compile *p, } } case TGSI_FILE_TEMPORARY: - return UREG(REG_TYPE_R, dest->DstRegister.Index); + return UREG(REG_TYPE_R, dest->Register.Index); default: i915_program_error(p, "Bad inst->DstReg.File"); return 0; @@ -277,7 +276,7 @@ static uint get_result_flags(const struct tgsi_full_instruction *inst) { const uint writeMask - = inst->FullDstRegisters[0].DstRegister.WriteMask; + = inst->Dst[0].Register.WriteMask; uint flags = 0x0; if (inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE) @@ -339,14 +338,14 @@ emit_tex(struct i915_fp_compile *p, const struct tgsi_full_instruction *inst, uint opcode) { - uint texture = inst->InstructionExtTexture.Texture; - uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; + uint texture = inst->Texture.Texture; + uint unit = inst->Src[1].Register.Index; uint tex = translate_tex_src_target( p, texture ); uint sampler = i915_emit_decl(p, REG_TYPE_S, unit, tex); - uint coord = src_vector( p, &inst->FullSrcRegisters[0]); + uint coord = src_vector( p, &inst->Src[0]); i915_emit_texld( p, - get_result_vector( p, &inst->FullDstRegisters[0] ), + get_result_vector( p, &inst->Dst[0] ), get_result_flags( inst ), sampler, coord, @@ -368,13 +367,13 @@ emit_simple_arith(struct i915_fp_compile *p, assert(numArgs <= 3); - arg1 = (numArgs < 1) ? 0 : src_vector( p, &inst->FullSrcRegisters[0] ); - arg2 = (numArgs < 2) ? 0 : src_vector( p, &inst->FullSrcRegisters[1] ); - arg3 = (numArgs < 3) ? 0 : src_vector( p, &inst->FullSrcRegisters[2] ); + arg1 = (numArgs < 1) ? 0 : src_vector( p, &inst->Src[0] ); + arg2 = (numArgs < 2) ? 0 : src_vector( p, &inst->Src[1] ); + arg3 = (numArgs < 3) ? 0 : src_vector( p, &inst->Src[2] ); i915_emit_arith( p, opcode, - get_result_vector( p, &inst->FullDstRegisters[0]), + get_result_vector( p, &inst->Dst[0]), get_result_flags( inst ), 0, arg1, arg2, @@ -394,8 +393,8 @@ emit_simple_arith_swap2(struct i915_fp_compile *p, /* transpose first two registers */ inst2 = *inst; - inst2.FullSrcRegisters[0] = inst->FullSrcRegisters[1]; - inst2.FullSrcRegisters[1] = inst->FullSrcRegisters[0]; + inst2.Src[0] = inst->Src[1]; + inst2.Src[1] = inst->Src[0]; emit_simple_arith(p, &inst2, opcode, numArgs); } @@ -424,10 +423,10 @@ i915_translate_instruction(struct i915_fp_compile *p, switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ABS: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src0 = src_vector(p, &inst->Src[0]); i915_emit_arith(p, A0_MAX, - get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_vector(p, &inst->Dst[0]), get_result_flags(inst), 0, src0, negate(src0, 1, 1, 1, 1), 0); break; @@ -437,17 +436,17 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_CMP: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); - src1 = src_vector(p, &inst->FullSrcRegisters[1]); - src2 = src_vector(p, &inst->FullSrcRegisters[2]); + src0 = src_vector(p, &inst->Src[0]); + src1 = src_vector(p, &inst->Src[1]); + src2 = src_vector(p, &inst->Src[2]); i915_emit_arith(p, A0_CMP, - get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_vector(p, &inst->Dst[0]), get_result_flags(inst), 0, src0, src2, src1); /* NOTE: order of src2, src1 */ break; case TGSI_OPCODE_COS: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src0 = src_vector(p, &inst->Src[0]); tmp = i915_get_utemp(p); i915_emit_arith(p, @@ -490,7 +489,7 @@ i915_translate_instruction(struct i915_fp_compile *p, i915_emit_arith(p, A0_DP4, - get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_vector(p, &inst->Dst[0]), get_result_flags(inst), 0, swizzle(tmp, ONE, Z, Y, X), i915_emit_const4fv(p, cos_constants), 0); @@ -505,19 +504,19 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_DPH: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); - src1 = src_vector(p, &inst->FullSrcRegisters[1]); + src0 = src_vector(p, &inst->Src[0]); + src1 = src_vector(p, &inst->Src[1]); i915_emit_arith(p, A0_DP4, - get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_vector(p, &inst->Dst[0]), get_result_flags(inst), 0, swizzle(src0, X, Y, Z, ONE), src1, 0); break; case TGSI_OPCODE_DST: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); - src1 = src_vector(p, &inst->FullSrcRegisters[1]); + src0 = src_vector(p, &inst->Src[0]); + src1 = src_vector(p, &inst->Src[1]); /* result[0] = 1 * 1; * result[1] = a[1] * b[1]; @@ -526,7 +525,7 @@ i915_translate_instruction(struct i915_fp_compile *p, */ i915_emit_arith(p, A0_MUL, - get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_vector(p, &inst->Dst[0]), get_result_flags(inst), 0, swizzle(src0, ONE, Y, Z, ONE), swizzle(src1, ONE, Y, ONE, W), 0); @@ -537,11 +536,11 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_EX2: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src0 = src_vector(p, &inst->Src[0]); i915_emit_arith(p, A0_EXP, - get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_vector(p, &inst->Dst[0]), get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0, 0); break; @@ -556,7 +555,7 @@ i915_translate_instruction(struct i915_fp_compile *p, case TGSI_OPCODE_KIL: /* kill if src[0].x < 0 || src[0].y < 0 ... */ - src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src0 = src_vector(p, &inst->Src[0]); tmp = i915_get_utemp(p); i915_emit_texld(p, @@ -572,17 +571,17 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_LG2: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src0 = src_vector(p, &inst->Src[0]); i915_emit_arith(p, A0_LOG, - get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_vector(p, &inst->Dst[0]), get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0, 0); break; case TGSI_OPCODE_LIT: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src0 = src_vector(p, &inst->Src[0]); tmp = i915_get_utemp(p); /* tmp = max( a.xyzw, a.00zw ) @@ -606,7 +605,7 @@ i915_translate_instruction(struct i915_fp_compile *p, swizzle(tmp, Y, Y, Y, Y), 0, 0); i915_emit_arith(p, A0_CMP, - get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_vector(p, &inst->Dst[0]), get_result_flags(inst), 0, negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0), swizzle(tmp, ONE, X, ZERO, ONE), @@ -615,9 +614,9 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_LRP: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); - src1 = src_vector(p, &inst->FullSrcRegisters[1]); - src2 = src_vector(p, &inst->FullSrcRegisters[2]); + src0 = src_vector(p, &inst->Src[0]); + src1 = src_vector(p, &inst->Src[1]); + src2 = src_vector(p, &inst->Src[2]); flags = get_result_flags(inst); tmp = i915_get_utemp(p); @@ -632,7 +631,7 @@ i915_translate_instruction(struct i915_fp_compile *p, flags & A0_DEST_CHANNEL_ALL, 0, src1, src0, src2); i915_emit_arith(p, A0_MAD, - get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_vector(p, &inst->Dst[0]), flags, 0, negate(src2, 1, 1, 1, 1), src0, tmp); break; @@ -645,8 +644,8 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_MIN: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); - src1 = src_vector(p, &inst->FullSrcRegisters[1]); + src0 = src_vector(p, &inst->Src[0]); + src1 = src_vector(p, &inst->Src[1]); tmp = i915_get_utemp(p); flags = get_result_flags(inst); @@ -658,7 +657,7 @@ i915_translate_instruction(struct i915_fp_compile *p, i915_emit_arith(p, A0_MOV, - get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_vector(p, &inst->Dst[0]), flags, 0, negate(tmp, 1, 1, 1, 1), 0, 0); break; @@ -671,8 +670,8 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_POW: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); - src1 = src_vector(p, &inst->FullSrcRegisters[1]); + src0 = src_vector(p, &inst->Src[0]); + src1 = src_vector(p, &inst->Src[1]); tmp = i915_get_utemp(p); flags = get_result_flags(inst); @@ -687,7 +686,7 @@ i915_translate_instruction(struct i915_fp_compile *p, i915_emit_arith(p, A0_EXP, - get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_vector(p, &inst->Dst[0]), flags, 0, swizzle(tmp, X, X, X, X), 0, 0); break; @@ -696,27 +695,27 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_RCP: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src0 = src_vector(p, &inst->Src[0]); i915_emit_arith(p, A0_RCP, - get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_vector(p, &inst->Dst[0]), get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0, 0); break; case TGSI_OPCODE_RSQ: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src0 = src_vector(p, &inst->Src[0]); i915_emit_arith(p, A0_RSQ, - get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_vector(p, &inst->Dst[0]), get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0, 0); break; case TGSI_OPCODE_SCS: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src0 = src_vector(p, &inst->Src[0]); tmp = i915_get_utemp(p); /* @@ -739,7 +738,7 @@ i915_translate_instruction(struct i915_fp_compile *p, swizzle(tmp, X, Y, X, Y), swizzle(tmp, X, X, ONE, ONE), 0); - writemask = inst->FullDstRegisters[0].DstRegister.WriteMask; + writemask = inst->Dst[0].Register.WriteMask; if (writemask & TGSI_WRITEMASK_Y) { uint tmp1; @@ -757,7 +756,7 @@ i915_translate_instruction(struct i915_fp_compile *p, i915_emit_arith(p, A0_DP4, - get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_vector(p, &inst->Dst[0]), A0_DEST_CHANNEL_Y, 0, swizzle(tmp1, W, Z, Y, X), i915_emit_const4fv(p, sin_constants), 0); @@ -772,7 +771,7 @@ i915_translate_instruction(struct i915_fp_compile *p, i915_emit_arith(p, A0_DP4, - get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_vector(p, &inst->Dst[0]), A0_DEST_CHANNEL_X, 0, swizzle(tmp, ONE, Z, Y, X), i915_emit_const4fv(p, cos_constants), 0); @@ -789,7 +788,7 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_SIN: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src0 = src_vector(p, &inst->Src[0]); tmp = i915_get_utemp(p); i915_emit_arith(p, @@ -832,7 +831,7 @@ i915_translate_instruction(struct i915_fp_compile *p, i915_emit_arith(p, A0_DP4, - get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_vector(p, &inst->Dst[0]), get_result_flags(inst), 0, swizzle(tmp, W, Z, Y, X), i915_emit_const4fv(p, sin_constants), 0); @@ -848,12 +847,12 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_SUB: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); - src1 = src_vector(p, &inst->FullSrcRegisters[1]); + src0 = src_vector(p, &inst->Src[0]); + src1 = src_vector(p, &inst->Src[1]); i915_emit_arith(p, A0_ADD, - get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_vector(p, &inst->Dst[0]), get_result_flags(inst), 0, src0, negate(src1, 1, 1, 1, 1), 0); break; @@ -877,8 +876,8 @@ i915_translate_instruction(struct i915_fp_compile *p, * result.z = src0.x * src1.y - src0.y * src1.x; * result.w = undef; */ - src0 = src_vector(p, &inst->FullSrcRegisters[0]); - src1 = src_vector(p, &inst->FullSrcRegisters[1]); + src0 = src_vector(p, &inst->Src[0]); + src1 = src_vector(p, &inst->Src[1]); tmp = i915_get_utemp(p); i915_emit_arith(p, @@ -889,7 +888,7 @@ i915_translate_instruction(struct i915_fp_compile *p, i915_emit_arith(p, A0_MAD, - get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_vector(p, &inst->Dst[0]), get_result_flags(inst), 0, swizzle(src0, Y, Z, X, ONE), swizzle(src1, Z, X, Y, ONE), @@ -929,8 +928,8 @@ i915_translate_instructions(struct i915_fp_compile *p, if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_CONSTANT) { uint i; - for (i = parse.FullToken.FullDeclaration.DeclarationRange.First; - i <= parse.FullToken.FullDeclaration.DeclarationRange.Last; + for (i = parse.FullToken.FullDeclaration.Range.First; + i <= parse.FullToken.FullDeclaration.Range.Last; i++) { assert(ifs->constant_flags[i] == 0x0); ifs->constant_flags[i] = I915_CONSTFLAG_USER; @@ -940,8 +939,8 @@ i915_translate_instructions(struct i915_fp_compile *p, else if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_TEMPORARY) { uint i; - for (i = parse.FullToken.FullDeclaration.DeclarationRange.First; - i <= parse.FullToken.FullDeclaration.DeclarationRange.Last; + for (i = parse.FullToken.FullDeclaration.Range.First; + i <= parse.FullToken.FullDeclaration.Range.Last; i++) { assert(i < I915_MAX_TEMPORARY); /* XXX just use shader->info->file_mask[TGSI_FILE_TEMPORARY] */ diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c index c66558c320..d4ee8f5339 100644 --- a/src/gallium/drivers/i915/i915_screen.c +++ b/src/gallium/drivers/i915/i915_screen.c @@ -271,6 +271,7 @@ i915_create_screen(struct intel_winsys *iws, uint pci_id) default: debug_printf("%s: unknown pci id 0x%x, cannot create screen\n", __FUNCTION__, pci_id); + FREE(is); return NULL; } diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c index 7d48e6e84d..9103847f1c 100644 --- a/src/gallium/drivers/i915/i915_state.c +++ b/src/gallium/drivers/i915/i915_state.c @@ -58,8 +58,10 @@ translate_wrap_mode(unsigned wrap) return TEXCOORDMODE_CLAMP_EDGE; case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return TEXCOORDMODE_CLAMP_BORDER; -// case PIPE_TEX_WRAP_MIRRORED_REPEAT: -// return TEXCOORDMODE_MIRROR; +/* + case PIPE_TEX_WRAP_MIRRORED_REPEAT: + return TEXCOORDMODE_MIRROR; +*/ default: return TEXCOORDMODE_WRAP; } @@ -765,7 +767,7 @@ i915_init_state_functions( struct i915_context *i915 ) i915->base.delete_blend_state = i915_delete_blend_state; i915->base.create_sampler_state = i915_create_sampler_state; - i915->base.bind_sampler_states = i915_bind_sampler_states; + i915->base.bind_fragment_sampler_states = i915_bind_sampler_states; i915->base.delete_sampler_state = i915_delete_sampler_state; i915->base.create_depth_stencil_alpha_state = i915_create_depth_stencil_state; @@ -789,7 +791,7 @@ i915_init_state_functions( struct i915_context *i915 ) i915->base.set_polygon_stipple = i915_set_polygon_stipple; i915->base.set_scissor_state = i915_set_scissor_state; - i915->base.set_sampler_textures = i915_set_sampler_textures; + i915->base.set_fragment_sampler_textures = i915_set_sampler_textures; i915->base.set_viewport_state = i915_set_viewport_state; i915->base.set_vertex_buffers = i915_set_vertex_buffers; i915->base.set_vertex_elements = i915_set_vertex_elements; diff --git a/src/gallium/drivers/i915/i915_state_sampler.c b/src/gallium/drivers/i915/i915_state_sampler.c index c5e9084d12..cbac4175c8 100644 --- a/src/gallium/drivers/i915/i915_state_sampler.c +++ b/src/gallium/drivers/i915/i915_state_sampler.c @@ -231,7 +231,7 @@ i915_update_texture(struct i915_context *i915, { const struct pipe_texture *pt = &tex->base; uint format, pitch; - const uint width = pt->width[0], height = pt->height[0], depth = pt->depth[0]; + const uint width = pt->width0, height = pt->height0, depth = pt->depth0; const uint num_levels = pt->last_level; unsigned max_lod = num_levels * 4; unsigned tiled = MS3_USE_FENCE_REGS; diff --git a/src/gallium/drivers/i915/i915_texture.c b/src/gallium/drivers/i915/i915_texture.c index 286c9ace8e..c7b86dd4c5 100644 --- a/src/gallium/drivers/i915/i915_texture.c +++ b/src/gallium/drivers/i915/i915_texture.c @@ -105,10 +105,6 @@ i915_miptree_set_level_info(struct i915_texture *tex, assert(level < PIPE_MAX_TEXTURE_LEVELS); - pt->width[level] = w; - pt->height[level] = h; - pt->depth[level] = d; - pt->nblocksx[level] = pf_get_nblocksx(&pt->block, w); pt->nblocksy[level] = pf_get_nblocksy(&pt->block, h); @@ -168,16 +164,16 @@ i915_scanout_layout(struct i915_texture *tex) return FALSE; i915_miptree_set_level_info(tex, 0, 1, - tex->base.width[0], - tex->base.height[0], + tex->base.width0, + tex->base.height0, 1); i915_miptree_set_image_offset(tex, 0, 0, 0, 0); - if (tex->base.width[0] >= 240) { + if (tex->base.width0 >= 240) { tex->stride = power_of_two(tex->base.nblocksx[0] * pt->block.size); tex->total_nblocksy = round_up(tex->base.nblocksy[0], 8); tex->hw_tiled = INTEL_TILE_X; - } else if (tex->base.width[0] == 64 && tex->base.height[0] == 64) { + } else if (tex->base.width0 == 64 && tex->base.height0 == 64) { tex->stride = power_of_two(tex->base.nblocksx[0] * pt->block.size); tex->total_nblocksy = round_up(tex->base.nblocksy[0], 8); } else { @@ -185,7 +181,7 @@ i915_scanout_layout(struct i915_texture *tex) } debug_printf("%s size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__, - tex->base.width[0], tex->base.height[0], pt->block.size, + tex->base.width0, tex->base.height0, pt->block.size, tex->stride, tex->total_nblocksy, tex->stride * tex->total_nblocksy); return TRUE; @@ -203,12 +199,12 @@ i915_display_target_layout(struct i915_texture *tex) return FALSE; /* fallback to normal textures for small textures */ - if (tex->base.width[0] < 240) + if (tex->base.width0 < 240) return FALSE; i915_miptree_set_level_info(tex, 0, 1, - tex->base.width[0], - tex->base.height[0], + tex->base.width0, + tex->base.height0, 1); i915_miptree_set_image_offset(tex, 0, 0, 0, 0); @@ -217,7 +213,7 @@ i915_display_target_layout(struct i915_texture *tex) tex->hw_tiled = INTEL_TILE_X; debug_printf("%s size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__, - tex->base.width[0], tex->base.height[0], pt->block.size, + tex->base.width0, tex->base.height0, pt->block.size, tex->stride, tex->total_nblocksy, tex->stride * tex->total_nblocksy); return TRUE; @@ -228,8 +224,8 @@ i915_miptree_layout_2d(struct i915_texture *tex) { struct pipe_texture *pt = &tex->base; unsigned level; - unsigned width = pt->width[0]; - unsigned height = pt->height[0]; + unsigned width = pt->width0; + unsigned height = pt->height0; unsigned nblocksx = pt->nblocksx[0]; unsigned nblocksy = pt->nblocksy[0]; @@ -254,8 +250,8 @@ i915_miptree_layout_2d(struct i915_texture *tex) tex->total_nblocksy += nblocksy; - width = minify(width); - height = minify(height); + width = u_minify(width, 1); + height = u_minify(height, 1); nblocksx = pf_get_nblocksx(&pt->block, width); nblocksy = pf_get_nblocksy(&pt->block, height); } @@ -267,9 +263,9 @@ i915_miptree_layout_3d(struct i915_texture *tex) struct pipe_texture *pt = &tex->base; unsigned level; - unsigned width = pt->width[0]; - unsigned height = pt->height[0]; - unsigned depth = pt->depth[0]; + unsigned width = pt->width0; + unsigned height = pt->height0; + unsigned depth = pt->depth0; unsigned nblocksx = pt->nblocksx[0]; unsigned nblocksy = pt->nblocksy[0]; unsigned stack_nblocksy = 0; @@ -285,36 +281,34 @@ i915_miptree_layout_3d(struct i915_texture *tex) stack_nblocksy += MAX2(2, nblocksy); - width = minify(width); - height = minify(height); - depth = minify(depth); + width = u_minify(width, 1); + height = u_minify(height, 1); nblocksx = pf_get_nblocksx(&pt->block, width); nblocksy = pf_get_nblocksy(&pt->block, height); } /* Fixup depth image_offsets: */ - depth = pt->depth[0]; for (level = 0; level <= pt->last_level; level++) { unsigned i; for (i = 0; i < depth; i++) i915_miptree_set_image_offset(tex, level, i, 0, i * stack_nblocksy); - depth = minify(depth); + depth = u_minify(depth, 1); } /* Multiply slice size by texture depth for total size. It's * remarkable how wasteful of memory the i915 texture layouts * are. They are largely fixed in the i945. */ - tex->total_nblocksy = stack_nblocksy * pt->depth[0]; + tex->total_nblocksy = stack_nblocksy * pt->depth0; } static void i915_miptree_layout_cube(struct i915_texture *tex) { struct pipe_texture *pt = &tex->base; - unsigned width = pt->width[0], height = pt->height[0]; + unsigned width = pt->width0, height = pt->height0; const unsigned nblocks = pt->nblocksx[0]; unsigned level; unsigned face; @@ -383,8 +377,8 @@ i945_miptree_layout_2d(struct i915_texture *tex) unsigned level; unsigned x = 0; unsigned y = 0; - unsigned width = pt->width[0]; - unsigned height = pt->height[0]; + unsigned width = pt->width0; + unsigned height = pt->height0; unsigned nblocksx = pt->nblocksx[0]; unsigned nblocksy = pt->nblocksy[0]; @@ -407,8 +401,8 @@ i945_miptree_layout_2d(struct i915_texture *tex) */ if (pt->last_level > 0) { unsigned mip1_nblocksx - = align(pf_get_nblocksx(&pt->block, minify(width)), align_x) - + pf_get_nblocksx(&pt->block, minify(minify(width))); + = align(pf_get_nblocksx(&pt->block, u_minify(width, 1)), align_x) + + pf_get_nblocksx(&pt->block, u_minify(width, 2)); if (mip1_nblocksx > nblocksx) tex->stride = mip1_nblocksx * pt->block.size; @@ -439,8 +433,8 @@ i945_miptree_layout_2d(struct i915_texture *tex) y += nblocksy; } - width = minify(width); - height = minify(height); + width = u_minify(width, 1); + height = u_minify(height, 1); nblocksx = pf_get_nblocksx(&pt->block, width); nblocksy = pf_get_nblocksy(&pt->block, height); } @@ -450,9 +444,9 @@ static void i945_miptree_layout_3d(struct i915_texture *tex) { struct pipe_texture *pt = &tex->base; - unsigned width = pt->width[0]; - unsigned height = pt->height[0]; - unsigned depth = pt->depth[0]; + unsigned width = pt->width0; + unsigned height = pt->height0; + unsigned depth = pt->depth0; unsigned nblocksx = pt->nblocksx[0]; unsigned nblocksy = pt->nblocksy[0]; unsigned pack_x_pitch, pack_x_nr; @@ -495,9 +489,9 @@ i945_miptree_layout_3d(struct i915_texture *tex) pack_y_pitch >>= 1; } - width = minify(width); - height = minify(height); - depth = minify(depth); + width = u_minify(width, 1); + height = u_minify(height, 1); + depth = u_minify(depth, 1); nblocksx = pf_get_nblocksx(&pt->block, width); nblocksy = pf_get_nblocksy(&pt->block, height); } @@ -511,11 +505,11 @@ i945_miptree_layout_cube(struct i915_texture *tex) const unsigned nblocks = pt->nblocksx[0]; unsigned face; - unsigned width = pt->width[0]; - unsigned height = pt->height[0]; + unsigned width = pt->width0; + unsigned height = pt->height0; /* - printf("%s %i, %i\n", __FUNCTION__, pt->width[0], pt->height[0]); + printf("%s %i, %i\n", __FUNCTION__, pt->width0, pt->height0); */ assert(width == height); /* cubemap images are square */ @@ -651,8 +645,8 @@ i915_texture_create(struct pipe_screen *screen, pipe_reference_init(&tex->base.reference, 1); tex->base.screen = screen; - tex->base.nblocksx[0] = pf_get_nblocksx(&tex->base.block, tex->base.width[0]); - tex->base.nblocksy[0] = pf_get_nblocksy(&tex->base.block, tex->base.height[0]); + tex->base.nblocksx[0] = pf_get_nblocksx(&tex->base.block, tex->base.width0); + tex->base.nblocksy[0] = pf_get_nblocksy(&tex->base.block, tex->base.height0); if (is->is_i945) { if (!i945_miptree_layout(tex)) @@ -667,7 +661,7 @@ i915_texture_create(struct pipe_screen *screen, /* for scanouts and cursors, cursors arn't scanouts */ - if (templat->tex_usage & PIPE_TEXTURE_USAGE_PRIMARY && templat->width[0] != 64) + if (templat->tex_usage & PIPE_TEXTURE_USAGE_PRIMARY && templat->width0 != 64) buf_usage = INTEL_NEW_SCANOUT; else buf_usage = INTEL_NEW_TEXTURE; @@ -710,7 +704,7 @@ i915_texture_blanket(struct pipe_screen * screen, /* Only supports one type */ if (base->target != PIPE_TEXTURE_2D || base->last_level != 0 || - base->depth[0] != 1) { + base->depth0 != 1) { return NULL; } @@ -724,7 +718,7 @@ i915_texture_blanket(struct pipe_screen * screen, tex->stride = stride[0]; - i915_miptree_set_level_info(tex, 0, 1, base->width[0], base->height[0], 1); + i915_miptree_set_level_info(tex, 0, 1, base->width0, base->height0, 1); i915_miptree_set_image_offset(tex, 0, 0, 0, 0); pipe_buffer_reference(&tex->buffer, buffer); @@ -788,8 +782,8 @@ i915_get_tex_surface(struct pipe_screen *screen, pipe_reference_init(&ps->reference, 1); pipe_texture_reference(&ps->texture, pt); ps->format = pt->format; - ps->width = pt->width[level]; - ps->height = pt->height[level]; + ps->width = u_minify(pt->width0, level); + ps->height = u_minify(pt->height0, level); ps->offset = offset; ps->usage = flags; } @@ -919,7 +913,7 @@ i915_texture_blanket_intel(struct pipe_screen *screen, /* Only supports one type */ if (base->target != PIPE_TEXTURE_2D || base->last_level != 0 || - base->depth[0] != 1) { + base->depth0 != 1) { return NULL; } @@ -933,7 +927,7 @@ i915_texture_blanket_intel(struct pipe_screen *screen, tex->stride = stride; - i915_miptree_set_level_info(tex, 0, 1, base->width[0], base->height[0], 1); + i915_miptree_set_level_info(tex, 0, 1, base->width0, base->height0, 1); i915_miptree_set_image_offset(tex, 0, 0, 0, 0); tex->buffer = buffer; diff --git a/src/gallium/drivers/i915/intel_winsys.h b/src/gallium/drivers/i915/intel_winsys.h index 2c8dc63f3f..c6bf6e6f7f 100644 --- a/src/gallium/drivers/i915/intel_winsys.h +++ b/src/gallium/drivers/i915/intel_winsys.h @@ -42,21 +42,21 @@ enum intel_buffer_usage INTEL_USAGE_2D_TARGET = 0x04, INTEL_USAGE_2D_SOURCE = 0x08, /* use on vertex */ - INTEL_USAGE_VERTEX = 0x10, + INTEL_USAGE_VERTEX = 0x10 }; enum intel_buffer_type { INTEL_NEW_TEXTURE, INTEL_NEW_SCANOUT, /**< a texture used for scanning out from */ - INTEL_NEW_VERTEX, + INTEL_NEW_VERTEX }; enum intel_buffer_tile { INTEL_TILE_NONE, INTEL_TILE_X, - INTEL_TILE_Y, + INTEL_TILE_Y }; struct intel_batchbuffer { diff --git a/src/gallium/drivers/identity/id_context.c b/src/gallium/drivers/identity/id_context.c index 4e700089e3..4509c7b1e5 100644 --- a/src/gallium/drivers/identity/id_context.c +++ b/src/gallium/drivers/identity/id_context.c @@ -221,16 +221,29 @@ identity_create_sampler_state(struct pipe_context *_pipe, } static void -identity_bind_sampler_states(struct pipe_context *_pipe, - unsigned num, - void **samplers) +identity_bind_fragment_sampler_states(struct pipe_context *_pipe, + unsigned num_samplers, + void **samplers) { struct identity_context *id_pipe = identity_context(_pipe); struct pipe_context *pipe = id_pipe->pipe; - pipe->bind_sampler_states(pipe, - num, - samplers); + pipe->bind_fragment_sampler_states(pipe, + num_samplers, + samplers); +} + +static void +identity_bind_vertex_sampler_states(struct pipe_context *_pipe, + unsigned num_samplers, + void **samplers) +{ + struct identity_context *id_pipe = identity_context(_pipe); + struct pipe_context *pipe = id_pipe->pipe; + + pipe->bind_vertex_sampler_states(pipe, + num_samplers, + samplers); } static void @@ -480,9 +493,9 @@ identity_set_viewport_state(struct pipe_context *_pipe, } static void -identity_set_sampler_textures(struct pipe_context *_pipe, - unsigned num_textures, - struct pipe_texture **_textures) +identity_set_fragment_sampler_textures(struct pipe_context *_pipe, + unsigned num_textures, + struct pipe_texture **_textures) { struct identity_context *id_pipe = identity_context(_pipe); struct pipe_context *pipe = id_pipe->pipe; @@ -499,9 +512,34 @@ identity_set_sampler_textures(struct pipe_context *_pipe, textures = unwrapped_textures; } - pipe->set_sampler_textures(pipe, - num_textures, - textures); + pipe->set_fragment_sampler_textures(pipe, + num_textures, + textures); +} + +static void +identity_set_vertex_sampler_textures(struct pipe_context *_pipe, + unsigned num_textures, + struct pipe_texture **_textures) +{ + struct identity_context *id_pipe = identity_context(_pipe); + struct pipe_context *pipe = id_pipe->pipe; + struct pipe_texture *unwrapped_textures[PIPE_MAX_VERTEX_SAMPLERS]; + struct pipe_texture **textures = NULL; + unsigned i; + + if (_textures) { + for (i = 0; i < num_textures; i++) + unwrapped_textures[i] = identity_texture_unwrap(_textures[i]); + for (; i < PIPE_MAX_VERTEX_SAMPLERS; i++) + unwrapped_textures[i] = NULL; + + textures = unwrapped_textures; + } + + pipe->set_vertex_sampler_textures(pipe, + num_textures, + textures); } static void @@ -682,7 +720,8 @@ identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) id_pipe->base.bind_blend_state = identity_bind_blend_state; id_pipe->base.delete_blend_state = identity_delete_blend_state; id_pipe->base.create_sampler_state = identity_create_sampler_state; - id_pipe->base.bind_sampler_states = identity_bind_sampler_states; + id_pipe->base.bind_fragment_sampler_states = identity_bind_fragment_sampler_states; + id_pipe->base.bind_vertex_sampler_states = identity_bind_vertex_sampler_states; id_pipe->base.delete_sampler_state = identity_delete_sampler_state; id_pipe->base.create_rasterizer_state = identity_create_rasterizer_state; id_pipe->base.bind_rasterizer_state = identity_bind_rasterizer_state; @@ -703,7 +742,8 @@ identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) id_pipe->base.set_polygon_stipple = identity_set_polygon_stipple; id_pipe->base.set_scissor_state = identity_set_scissor_state; id_pipe->base.set_viewport_state = identity_set_viewport_state; - id_pipe->base.set_sampler_textures = identity_set_sampler_textures; + id_pipe->base.set_fragment_sampler_textures = identity_set_vertex_sampler_textures; + id_pipe->base.set_vertex_sampler_textures = identity_set_vertex_sampler_textures; id_pipe->base.set_vertex_buffers = identity_set_vertex_buffers; id_pipe->base.set_vertex_elements = identity_set_vertex_elements; id_pipe->base.surface_copy = identity_surface_copy; diff --git a/src/gallium/drivers/identity/id_objects.c b/src/gallium/drivers/identity/id_objects.c index e893e59940..bc9bc7121d 100644 --- a/src/gallium/drivers/identity/id_objects.c +++ b/src/gallium/drivers/identity/id_objects.c @@ -180,3 +180,42 @@ identity_transfer_destroy(struct identity_transfer *id_transfer) screen->tex_transfer_destroy(id_transfer->transfer); FREE(id_transfer); } + +struct pipe_video_surface * +identity_video_surface_create(struct identity_screen *id_screen, + struct pipe_video_surface *video_surface) +{ + struct identity_video_surface *id_video_surface; + + if (!video_surface) { + goto error; + } + + assert(video_surface->screen == id_screen->screen); + + id_video_surface = CALLOC_STRUCT(identity_video_surface); + if (!id_video_surface) { + goto error; + } + + memcpy(&id_video_surface->base, + video_surface, + sizeof(struct pipe_video_surface)); + + pipe_reference_init(&id_video_surface->base.reference, 1); + id_video_surface->base.screen = &id_screen->base; + id_video_surface->video_surface = video_surface; + + return &id_video_surface->base; + +error: + pipe_video_surface_reference(&video_surface, NULL); + return NULL; +} + +void +identity_video_surface_destroy(struct identity_video_surface *id_video_surface) +{ + pipe_video_surface_reference(&id_video_surface->video_surface, NULL); + FREE(id_video_surface); +} diff --git a/src/gallium/drivers/identity/id_objects.h b/src/gallium/drivers/identity/id_objects.h index ce58faa3c7..77cc719079 100644 --- a/src/gallium/drivers/identity/id_objects.h +++ b/src/gallium/drivers/identity/id_objects.h @@ -31,6 +31,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_state.h" +#include "pipe/p_video_state.h" #include "id_screen.h" @@ -67,6 +68,14 @@ struct identity_transfer }; +struct identity_video_surface +{ + struct pipe_video_surface base; + + struct pipe_video_surface *video_surface; +}; + + static INLINE struct identity_buffer * identity_buffer(struct pipe_buffer *_buffer) { @@ -103,6 +112,15 @@ identity_transfer(struct pipe_transfer *_transfer) return (struct identity_transfer *)_transfer; } +static INLINE struct identity_video_surface * +identity_video_surface(struct pipe_video_surface *_video_surface) +{ + if (!_video_surface) { + return NULL; + } + (void)identity_screen(_video_surface->screen); + return (struct identity_video_surface *)_video_surface; +} static INLINE struct pipe_buffer * identity_buffer_unwrap(struct pipe_buffer *_buffer) @@ -165,5 +183,12 @@ identity_transfer_create(struct identity_texture *id_texture, void identity_transfer_destroy(struct identity_transfer *id_transfer); +struct pipe_video_surface * +identity_video_surface_create(struct identity_screen *id_screen, + struct pipe_video_surface *video_surface); + +void +identity_video_surface_destroy(struct identity_video_surface *id_video_surface); + #endif /* ID_OBJECTS_H */ diff --git a/src/gallium/drivers/identity/id_public.h b/src/gallium/drivers/identity/id_public.h index cac14cfd60..3d2862eaa0 100644 --- a/src/gallium/drivers/identity/id_public.h +++ b/src/gallium/drivers/identity/id_public.h @@ -37,4 +37,4 @@ identity_screen_create(struct pipe_screen *screen); struct pipe_context * identity_context_create(struct pipe_screen *screen, struct pipe_context *pipe); -#endif /* PT_PUBLIC_H */ +#endif /* ID_PUBLIC_H */ diff --git a/src/gallium/drivers/identity/id_screen.c b/src/gallium/drivers/identity/id_screen.c index 26439637d0..53eae3ef54 100644 --- a/src/gallium/drivers/identity/id_screen.c +++ b/src/gallium/drivers/identity/id_screen.c @@ -379,6 +379,33 @@ identity_screen_buffer_destroy(struct pipe_buffer *_buffer) identity_buffer_destroy(identity_buffer(_buffer)); } +static struct pipe_video_surface * +identity_screen_video_surface_create(struct pipe_screen *_screen, + enum pipe_video_chroma_format chroma_format, + unsigned width, + unsigned height) +{ + struct identity_screen *id_screen = identity_screen(_screen); + struct pipe_screen *screen = id_screen->screen; + struct pipe_video_surface *result; + + result = screen->video_surface_create(screen, + chroma_format, + width, + height); + + if (result) { + return identity_video_surface_create(id_screen, result); + } + return NULL; +} + +static void +identity_screen_video_surface_destroy(struct pipe_video_surface *_vsfc) +{ + identity_video_surface_destroy(identity_video_surface(_vsfc)); +} + static void identity_screen_flush_frontbuffer(struct pipe_screen *_screen, struct pipe_surface *_surface, @@ -472,6 +499,12 @@ identity_screen_create(struct pipe_screen *screen) if (screen->buffer_unmap) id_screen->base.buffer_unmap = identity_screen_buffer_unmap; id_screen->base.buffer_destroy = identity_screen_buffer_destroy; + if (screen->video_surface_create) { + id_screen->base.video_surface_create = identity_screen_video_surface_create; + } + if (screen->video_surface_destroy) { + id_screen->base.video_surface_destroy = identity_screen_video_surface_destroy; + } id_screen->base.flush_frontbuffer = identity_screen_flush_frontbuffer; id_screen->base.fence_reference = identity_screen_fence_reference; id_screen->base.fence_signalled = identity_screen_fence_signalled; diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index b96ee23a99..e038a5229e 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -17,11 +17,13 @@ C_SOURCES = \ lp_bld_depth.c \ lp_bld_flow.c \ lp_bld_format_aos.c \ + lp_bld_format_query.c \ lp_bld_format_soa.c \ lp_bld_interp.c \ lp_bld_intr.c \ lp_bld_logic.c \ lp_bld_pack.c \ + lp_bld_sample.c \ lp_bld_sample_soa.c \ lp_bld_swizzle.c \ lp_bld_struct.c \ @@ -33,7 +35,6 @@ C_SOURCES = \ lp_draw_arrays.c \ lp_flush.c \ lp_jit.c \ - lp_prim_setup.c \ lp_prim_vbuf.c \ lp_setup.c \ lp_query.c \ @@ -55,6 +56,9 @@ C_SOURCES = \ lp_tile_cache.c \ lp_tile_soa.c +CPP_SOURCES = \ + lp_bld_misc.cpp + include ../../Makefile.template lp_tile_soa.c: lp_tile_soa.py ../../auxiliary/util/u_format_parse.py ../../auxiliary/util/u_format_access.py ../../auxiliary/util/u_format.csv diff --git a/src/gallium/drivers/llvmpipe/README b/src/gallium/drivers/llvmpipe/README index 89d08834a3..0c3f00fd58 100644 --- a/src/gallium/drivers/llvmpipe/README +++ b/src/gallium/drivers/llvmpipe/README @@ -51,21 +51,22 @@ Requirements - Linux - - udis86, http://udis86.sourceforge.net/ . Use my repository, which decodes - opcodes not yet supported by upstream. + - A x86 or amd64 processor. 64bit mode is preferred. - git clone git://people.freedesktop.org/~jrfonseca/udis86 - cd udis86 - ./configure --with-pic - make - sudo make install + Support for sse2 is strongly encouraged. Support for ssse3, and sse4.1 will + yield the most efficient code. The less features the CPU has the more + likely is that you ran into underperforming, buggy, or incomplete code. + + See /proc/cpuinfo to know what your CPU supports. + + - LLVM 2.5 or greater. LLVM 2.6 is preferred. - - LLVM 2.5. On Debian based distributions do: + On Debian based distributions do: aptitude install llvm-dev - There is a typo in one of the llvm-dev 2.5 headers, that causes compilation - errors in the debug build: + There is a typo in one of the llvm 2.5 headers, that may cause compilation + errors. To fix it apply the change: --- /usr/include/llvm-c/Core.h.orig 2009-08-10 15:38:54.000000000 +0100 +++ /usr/include/llvm-c/Core.h 2009-08-10 15:38:25.000000000 +0100 @@ -79,12 +80,17 @@ Requirements #endif return reinterpret_cast<T**>(Vals); - - A x86 or amd64 processor with support for sse2, sse3, and sse4.1 SIMD - instructions. This is necessary because we emit several SSE intrinsics for - convenience. See /proc/cpuinfo to know what your CPU supports. - - - scons + - scons (optional) + - udis86, http://udis86.sourceforge.net/ (optional): + + git clone git://udis86.git.sourceforge.net/gitroot/udis86/udis86 + cd udis86 + ./autogen.sh + ./configure --with-pic + make + sudo make install + Building ======== diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index 403e4daa43..3bd2e70013 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -30,10 +30,13 @@ llvmpipe = env.ConvenienceLibrary( 'lp_bld_depth.c', 'lp_bld_flow.c', 'lp_bld_format_aos.c', + 'lp_bld_format_query.c', 'lp_bld_format_soa.c', 'lp_bld_interp.c', 'lp_bld_intr.c', + 'lp_bld_misc.cpp', 'lp_bld_pack.c', + 'lp_bld_sample.c', 'lp_bld_sample_soa.c', 'lp_bld_struct.c', 'lp_bld_logic.c', @@ -46,7 +49,6 @@ llvmpipe = env.ConvenienceLibrary( 'lp_draw_arrays.c', 'lp_flush.c', 'lp_jit.c', - 'lp_prim_setup.c', 'lp_prim_vbuf.c', 'lp_setup.c', 'lp_query.c', @@ -76,7 +78,7 @@ env.Prepend(LIBS = [llvmpipe] + auxiliaries) env.Program( target = 'lp_test_format', - source = ['lp_test_format.c'], + source = ['lp_test_format.c', 'lp_test_main.c'], ) env.Program( diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c index 83ca06acf8..9c59677a74 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c @@ -47,6 +47,7 @@ #include "util/u_memory.h" #include "util/u_debug.h" +#include "util/u_math.h" #include "util/u_string.h" #include "util/u_cpu_detect.h" @@ -361,6 +362,8 @@ lp_build_mul(struct lp_build_context *bld, LLVMValueRef b) { const struct lp_type type = bld->type; + LLVMValueRef shift; + LLVMValueRef res; if(a == bld->zero) return bld->zero; @@ -394,10 +397,84 @@ lp_build_mul(struct lp_build_context *bld, assert(0); } - if(LLVMIsConstant(a) && LLVMIsConstant(b)) - return LLVMConstMul(a, b); + if(type.fixed) + shift = lp_build_int_const_scalar(type, type.width/2); + else + shift = NULL; + + if(LLVMIsConstant(a) && LLVMIsConstant(b)) { + res = LLVMConstMul(a, b); + if(shift) { + if(type.sign) + res = LLVMConstAShr(res, shift); + else + res = LLVMConstLShr(res, shift); + } + } + else { + res = LLVMBuildMul(bld->builder, a, b, ""); + if(shift) { + if(type.sign) + res = LLVMBuildAShr(bld->builder, res, shift, ""); + else + res = LLVMBuildLShr(bld->builder, res, shift, ""); + } + } - return LLVMBuildMul(bld->builder, a, b, ""); + return res; +} + + +/** + * Small vector x scale multiplication optimization. + */ +LLVMValueRef +lp_build_mul_imm(struct lp_build_context *bld, + LLVMValueRef a, + int b) +{ + LLVMValueRef factor; + + if(b == 0) + return bld->zero; + + if(b == 1) + return a; + + if(b == -1) + return LLVMBuildNeg(bld->builder, a, ""); + + if(b == 2 && bld->type.floating) + return lp_build_add(bld, a, a); + + if(util_is_pot(b)) { + unsigned shift = ffs(b) - 1; + + if(bld->type.floating) { +#if 0 + /* + * Power of two multiplication by directly manipulating the mantissa. + * + * XXX: This might not be always faster, it will introduce a small error + * for multiplication by zero, and it will produce wrong results + * for Inf and NaN. + */ + unsigned mantissa = lp_mantissa(bld->type); + factor = lp_build_int_const_scalar(bld->type, (unsigned long long)shift << mantissa); + a = LLVMBuildBitCast(bld->builder, a, lp_build_int_vec_type(bld->type), ""); + a = LLVMBuildAdd(bld->builder, a, factor, ""); + a = LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(bld->type), ""); + return a; +#endif + } + else { + factor = lp_build_const_scalar(bld->type, shift); + return LLVMBuildShl(bld->builder, a, factor, ""); + } + } + + factor = lp_build_const_scalar(bld->type, (double)b); + return lp_build_mul(bld, a, factor); } @@ -432,13 +509,36 @@ lp_build_div(struct lp_build_context *bld, } +/** + * Linear interpolation. + * + * This also works for integer values with a few caveats. + * + * @sa http://www.stereopsis.com/doubleblend.html + */ LLVMValueRef lp_build_lerp(struct lp_build_context *bld, LLVMValueRef x, LLVMValueRef v0, LLVMValueRef v1) { - return lp_build_add(bld, v0, lp_build_mul(bld, x, lp_build_sub(bld, v1, v0))); + LLVMValueRef delta; + LLVMValueRef res; + + delta = lp_build_sub(bld, v1, v0); + + res = lp_build_mul(bld, x, delta); + + res = lp_build_add(bld, v0, res); + + if(bld->type.fixed) + /* XXX: This step is necessary for lerping 8bit colors stored on 16bits, + * but it will be wrong for other uses. Basically we need a more + * powerful lp_type, capable of further distinguishing the values + * interpretation from the value storage. */ + res = LLVMBuildAnd(bld->builder, res, lp_build_int_const_scalar(bld->type, (1 << bld->type.width/2) - 1), ""); + + return res; } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.h b/src/gallium/drivers/llvmpipe/lp_bld_arit.h index 4e568c055e..62be4b9aee 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.h @@ -67,6 +67,11 @@ lp_build_mul(struct lp_build_context *bld, LLVMValueRef b); LLVMValueRef +lp_build_mul_imm(struct lp_build_context *bld, + LLVMValueRef a, + int b); + +LLVMValueRef lp_build_div(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c index 98ec1cb1b9..d438c0e63d 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c @@ -90,7 +90,7 @@ lp_depth_type(const struct util_format_description *format_desc, if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_FLOAT) { type.floating = TRUE; - assert(swizzle = 0); + assert(swizzle == 0); assert(format_desc->channel[swizzle].size == format_desc->block.bits); } else if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED) { diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format.h b/src/gallium/drivers/llvmpipe/lp_bld_format.h index c087fc986e..970bee379f 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_format.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_format.h @@ -42,62 +42,34 @@ struct util_format_description; struct lp_type; -/** - * Unpack a pixel into its RGBA components. - * - * @param packed integer. - * - * @return RGBA in a 4 floats vector. - */ -LLVMValueRef -lp_build_unpack_rgba_aos(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef packed); +boolean +lp_format_is_rgba8(const struct util_format_description *desc); -/** - * Pack a pixel. - * - * @param rgba 4 float vector with the unpacked components. - */ -LLVMValueRef -lp_build_pack_rgba_aos(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef rgba); +void +lp_build_format_swizzle_soa(const struct util_format_description *format_desc, + struct lp_type type, + const LLVMValueRef *unswizzled, + LLVMValueRef *swizzled); -/** - * Load a pixel into its RGBA components. - * - * @param ptr value with the pointer to the packed pixel. Pointer type is - * irrelevant. - * - * @return RGBA in a 4 floats vector. - */ LLVMValueRef -lp_build_load_rgba_aos(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef ptr); +lp_build_unpack_rgba_aos(LLVMBuilderRef builder, + const struct util_format_description *desc, + LLVMValueRef packed); -/** - * Store a pixel. - * - * @param rgba 4 float vector with the unpacked components. - */ -void -lp_build_store_rgba_aos(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef ptr, - LLVMValueRef rgba); +LLVMValueRef +lp_build_unpack_rgba8_aos(LLVMBuilderRef builder, + const struct util_format_description *desc, + struct lp_type type, + LLVMValueRef packed); + LLVMValueRef -lp_build_gather(LLVMBuilderRef builder, - unsigned length, - unsigned src_width, - unsigned dst_width, - LLVMValueRef base_ptr, - LLVMValueRef offsets); +lp_build_pack_rgba_aos(LLVMBuilderRef builder, + const struct util_format_description *desc, + LLVMValueRef rgba); void @@ -108,12 +80,4 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder, LLVMValueRef *rgba); -void -lp_build_load_rgba_soa(LLVMBuilderRef builder, - const struct util_format_description *format_desc, - struct lp_type type, - LLVMValueRef base_ptr, - LLVMValueRef offsets, - LLVMValueRef *rgba); - #endif /* !LP_BLD_FORMAT_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c index b9b5d84bed..5836e0173f 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c @@ -25,18 +25,39 @@ * **************************************************************************/ +/** + * @file + * AoS pixel format manipulation. + * + * @author Jose Fonseca <jfonseca@vmware.com> + */ + +#include "util/u_cpu_detect.h" #include "util/u_format.h" +#include "lp_bld_type.h" +#include "lp_bld_const.h" +#include "lp_bld_logic.h" +#include "lp_bld_swizzle.h" #include "lp_bld_format.h" +/** + * Unpack a single pixel into its RGBA components. + * + * @param packed integer. + * + * @return RGBA in a 4 floats vector. + * + * XXX: This is mostly for reference and testing -- operating a single pixel at + * a time is rarely if ever needed. + */ LLVMValueRef lp_build_unpack_rgba_aos(LLVMBuilderRef builder, - enum pipe_format format, + const struct util_format_description *desc, LLVMValueRef packed) { - const struct util_format_description *desc; LLVMTypeRef type; LLVMValueRef shifted, casted, scaled, masked; LLVMValueRef shifts[4]; @@ -49,8 +70,6 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder, unsigned shift; unsigned i; - desc = util_format_description(format); - /* FIXME: Support more formats */ assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH); assert(desc->block.width == 1); @@ -151,12 +170,130 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder, } +/** + * Take a vector with packed pixels and unpack into a rgba8 vector. + * + * Formats with bit depth smaller than 32bits are accepted, but they must be + * padded to 32bits. + */ +LLVMValueRef +lp_build_unpack_rgba8_aos(LLVMBuilderRef builder, + const struct util_format_description *desc, + struct lp_type type, + LLVMValueRef packed) +{ + struct lp_build_context bld; + bool rgba8; + LLVMValueRef res; + unsigned i; + + lp_build_context_init(&bld, builder, type); + + /* FIXME: Support more formats */ + assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH); + assert(desc->block.width == 1); + assert(desc->block.height == 1); + assert(desc->block.bits <= 32); + + assert(!type.floating); + assert(!type.fixed); + assert(type.norm); + assert(type.width == 8); + assert(type.length % 4 == 0); + + rgba8 = TRUE; + for(i = 0; i < 4; ++i) { + assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED || + desc->channel[i].type == UTIL_FORMAT_TYPE_VOID); + if(desc->channel[0].size != 8) + rgba8 = FALSE; + } + + if(rgba8) { + /* + * The pixel is already in a rgba8 format variant. All it is necessary + * is to swizzle the channels. + */ + + unsigned char swizzles[4]; + boolean zeros[4]; /* bitwise AND mask */ + boolean ones[4]; /* bitwise OR mask */ + boolean swizzles_needed = FALSE; + boolean zeros_needed = FALSE; + boolean ones_needed = FALSE; + + for(i = 0; i < 4; ++i) { + enum util_format_swizzle swizzle = desc->swizzle[i]; + + /* Initialize with the no-op case */ + swizzles[i] = util_cpu_caps.little_endian ? 3 - i : i; + zeros[i] = TRUE; + ones[i] = FALSE; + + switch (swizzle) { + case UTIL_FORMAT_SWIZZLE_X: + case UTIL_FORMAT_SWIZZLE_Y: + case UTIL_FORMAT_SWIZZLE_Z: + case UTIL_FORMAT_SWIZZLE_W: + if(swizzle != swizzles[i]) { + swizzles[i] = swizzle; + swizzles_needed = TRUE; + } + break; + case UTIL_FORMAT_SWIZZLE_0: + zeros[i] = FALSE; + zeros_needed = TRUE; + break; + case UTIL_FORMAT_SWIZZLE_1: + ones[i] = TRUE; + ones_needed = TRUE; + break; + case UTIL_FORMAT_SWIZZLE_NONE: + assert(0); + break; + } + } + + res = packed; + + if(swizzles_needed) + res = lp_build_swizzle1_aos(&bld, res, swizzles); + + if(zeros_needed) { + /* Mask out zero channels */ + LLVMValueRef mask = lp_build_const_mask_aos(type, zeros); + res = LLVMBuildAnd(builder, res, mask, ""); + } + + if(ones_needed) { + /* Or one channels */ + LLVMValueRef mask = lp_build_const_mask_aos(type, ones); + res = LLVMBuildOr(builder, res, mask, ""); + } + } + else { + /* FIXME */ + assert(0); + res = lp_build_undef(type); + } + + return res; +} + + +/** + * Pack a single pixel. + * + * @param rgba 4 float vector with the unpacked components. + * + * XXX: This is mostly for reference and testing -- operating a single pixel at + * a time is rarely if ever needed. + */ LLVMValueRef lp_build_pack_rgba_aos(LLVMBuilderRef builder, - enum pipe_format format, + const struct util_format_description *desc, LLVMValueRef rgba) { - const struct util_format_description *desc; LLVMTypeRef type; LLVMValueRef packed = NULL; LLVMValueRef swizzles[4]; @@ -167,8 +304,6 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder, unsigned shift; unsigned i, j; - desc = util_format_description(format); - assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH); assert(desc->block.width == 1); assert(desc->block.height == 1); @@ -247,57 +382,3 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder, return packed; } - - -LLVMValueRef -lp_build_load_rgba_aos(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef ptr) -{ - const struct util_format_description *desc; - LLVMTypeRef type; - LLVMValueRef packed; - - desc = util_format_description(format); - - /* FIXME: Support more formats */ - assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH); - assert(desc->block.width == 1); - assert(desc->block.height == 1); - assert(desc->block.bits <= 32); - - type = LLVMIntType(desc->block.bits); - - ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, 0), ""); - - packed = LLVMBuildLoad(builder, ptr, ""); - - return lp_build_unpack_rgba_aos(builder, format, packed); -} - - -void -lp_build_store_rgba_aos(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef ptr, - LLVMValueRef rgba) -{ - const struct util_format_description *desc; - LLVMTypeRef type; - LLVMValueRef packed; - - desc = util_format_description(format); - - assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH); - assert(desc->block.width == 1); - assert(desc->block.height == 1); - - type = LLVMIntType(desc->block.bits); - - packed = lp_build_pack_rgba_aos(builder, format, rgba); - - ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, 0), ""); - - LLVMBuildStore(builder, packed, ptr); -} - diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_query.c b/src/gallium/drivers/llvmpipe/lp_bld_format_query.c new file mode 100644 index 0000000000..f3832d07ff --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_format_query.c @@ -0,0 +1,72 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Utility functions to make assertions about formats. + * + * This module centralizes most of logic used when determining what algorithm + * is most suitable (i.e., most efficient yet correct) for a given format. + * + * It might be possible to move some of these functions to u_format module, + * but since tiny differences in the format my render it more/less + * appropriate to a given algorithm it is impossible to make any long term + * guarantee about the semantics of these functions. + * + * @author Jose Fonseca <jfonseca@vmware.com> + */ + + +#include "util/u_format.h" + +#include "lp_bld_format.h" + + +/** + * Whether this format is a 4 rgba8 variant + */ +boolean +lp_format_is_rgba8(const struct util_format_description *desc) +{ + unsigned chan; + + if(desc->block.width != 1 || + desc->block.height != 1 || + desc->block.bits != 32) + return FALSE; + + for(chan = 0; chan < 4; ++chan) { + if(desc->channel[chan].type != UTIL_FORMAT_TYPE_UNSIGNED && + desc->channel[chan].type != UTIL_FORMAT_TYPE_SIGNED && + desc->channel[chan].type != UTIL_FORMAT_TYPE_VOID) + return FALSE; + if(desc->channel[chan].size != 8) + return FALSE; + } + + return TRUE; +} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c index 66bebdcdec..64151d169d 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c @@ -34,66 +34,17 @@ #include "lp_bld_format.h" -/** - * Gather elements from scatter positions in memory into a single vector. - * - * @param src_width src element width - * @param dst_width result element width (source will be expanded to fit) - * @param length length of the offsets, - * @param base_ptr base pointer, should be a i8 pointer type. - * @param offsets vector with offsets - */ -LLVMValueRef -lp_build_gather(LLVMBuilderRef builder, - unsigned length, - unsigned src_width, - unsigned dst_width, - LLVMValueRef base_ptr, - LLVMValueRef offsets) -{ - LLVMTypeRef src_type = LLVMIntType(src_width); - LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0); - LLVMTypeRef dst_elem_type = LLVMIntType(dst_width); - LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length); - LLVMValueRef res; - unsigned i; - - res = LLVMGetUndef(dst_vec_type); - for(i = 0; i < length; ++i) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - LLVMValueRef elem_offset; - LLVMValueRef elem_ptr; - LLVMValueRef elem; - - elem_offset = LLVMBuildExtractElement(builder, offsets, index, ""); - elem_ptr = LLVMBuildGEP(builder, base_ptr, &elem_offset, 1, ""); - elem_ptr = LLVMBuildBitCast(builder, elem_ptr, src_ptr_type, ""); - elem = LLVMBuildLoad(builder, elem_ptr, ""); - - assert(src_width <= dst_width); - if(src_width > dst_width) - elem = LLVMBuildTrunc(builder, elem, dst_elem_type, ""); - if(src_width < dst_width) - elem = LLVMBuildZExt(builder, elem, dst_elem_type, ""); - - res = LLVMBuildInsertElement(builder, res, elem, index, ""); - } - - return res; -} - - static LLVMValueRef -lp_build_format_swizzle(struct lp_type type, - const LLVMValueRef *inputs, - enum util_format_swizzle swizzle) +lp_build_format_swizzle_chan_soa(struct lp_type type, + const LLVMValueRef *unswizzled, + enum util_format_swizzle swizzle) { switch (swizzle) { case UTIL_FORMAT_SWIZZLE_X: case UTIL_FORMAT_SWIZZLE_Y: case UTIL_FORMAT_SWIZZLE_Z: case UTIL_FORMAT_SWIZZLE_W: - return inputs[swizzle]; + return unswizzled[swizzle]; case UTIL_FORMAT_SWIZZLE_0: return lp_build_zero(type); case UTIL_FORMAT_SWIZZLE_1: @@ -108,6 +59,28 @@ lp_build_format_swizzle(struct lp_type type, void +lp_build_format_swizzle_soa(const struct util_format_description *format_desc, + struct lp_type type, + const LLVMValueRef *unswizzled, + LLVMValueRef *swizzled) +{ + if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { + enum util_format_swizzle swizzle = format_desc->swizzle[0]; + LLVMValueRef depth = lp_build_format_swizzle_chan_soa(type, unswizzled, swizzle); + swizzled[2] = swizzled[1] = swizzled[0] = depth; + swizzled[3] = lp_build_one(type); + } + else { + unsigned chan; + for (chan = 0; chan < 4; ++chan) { + enum util_format_swizzle swizzle = format_desc->swizzle[chan]; + swizzled[chan] = lp_build_format_swizzle_chan_soa(type, unswizzled, swizzle); + } + } +} + + +void lp_build_unpack_rgba_soa(LLVMBuilderRef builder, const struct util_format_description *format_desc, struct lp_type type, @@ -172,38 +145,5 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder, start = stop; } - if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { - enum util_format_swizzle swizzle = format_desc->swizzle[0]; - LLVMValueRef depth = lp_build_format_swizzle(type, inputs, swizzle); - rgba[2] = rgba[1] = rgba[0] = depth; - rgba[3] = lp_build_one(type); - } - else { - for (chan = 0; chan < 4; ++chan) { - enum util_format_swizzle swizzle = format_desc->swizzle[chan]; - rgba[chan] = lp_build_format_swizzle(type, inputs, swizzle); - } - } -} - - -void -lp_build_load_rgba_soa(LLVMBuilderRef builder, - const struct util_format_description *format_desc, - struct lp_type type, - LLVMValueRef base_ptr, - LLVMValueRef offsets, - LLVMValueRef *rgba) -{ - LLVMValueRef packed; - - assert(format_desc->block.width == 1); - assert(format_desc->block.height == 1); - assert(format_desc->block.bits <= type.width); - - packed = lp_build_gather(builder, - type.length, format_desc->block.bits, type.width, - base_ptr, offsets); - - lp_build_unpack_rgba_soa(builder, format_desc, type, packed, rgba); + lp_build_format_swizzle_soa(format_desc, type, inputs, rgba); } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c index 338dbca6d1..49dab8ab61 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c @@ -109,32 +109,6 @@ coeffs_init(struct lp_build_interp_soa_context *bld, /** - * Small vector x scale multiplication optimization. - * - * TODO: Should be elsewhere. - */ -static LLVMValueRef -coeff_multiply(struct lp_build_interp_soa_context *bld, - LLVMValueRef coeff, - int step) -{ - LLVMValueRef factor; - - switch(step) { - case 0: - return bld->base.zero; - case 1: - return coeff; - case 2: - return lp_build_add(&bld->base, coeff, coeff); - default: - factor = lp_build_const_scalar(bld->base.type, (double)step); - return lp_build_mul(&bld->base, coeff, factor); - } -} - - -/** * Multiply the dadx and dady with the xstep and ystep respectively. */ static void @@ -149,8 +123,8 @@ coeffs_update(struct lp_build_interp_soa_context *bld) if (mode != TGSI_INTERPOLATE_CONSTANT) { for(chan = 0; chan < NUM_CHANNELS; ++chan) { if(mask & (1 << chan)) { - bld->dadx[attrib][chan] = coeff_multiply(bld, bld->dadx[attrib][chan], bld->xstep); - bld->dady[attrib][chan] = coeff_multiply(bld, bld->dady[attrib][chan], bld->ystep); + bld->dadx[attrib][chan] = lp_build_mul_imm(&bld->base, bld->dadx[attrib][chan], bld->xstep); + bld->dady[attrib][chan] = lp_build_mul_imm(&bld->base, bld->dady[attrib][chan], bld->ystep); } } } @@ -329,8 +303,8 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, unsigned first, last, mask; unsigned attrib; - first = decl->DeclarationRange.First; - last = decl->DeclarationRange.Last; + first = decl->Range.First; + last = decl->Range.Last; mask = decl->Declaration.UsageMask; for( attrib = first; attrib <= last; ++attrib ) { diff --git a/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp b/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp new file mode 100644 index 0000000000..d3f78c06d9 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp @@ -0,0 +1,61 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_config.h" + +#include "lp_bld_misc.h" + + +#ifndef LLVM_NATIVE_ARCH + +namespace llvm { + extern void LinkInJIT(); +} + + +void +LLVMLinkInJIT(void) +{ + llvm::LinkInJIT(); +} + + +extern "C" int X86TargetMachineModule; + + +int +LLVMInitializeNativeTarget(void) +{ +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) + X86TargetMachineModule = 1; +#endif + return 0; +} + + +#endif diff --git a/src/gallium/drivers/llvmpipe/lp_bld_misc.h b/src/gallium/drivers/llvmpipe/lp_bld_misc.h new file mode 100644 index 0000000000..0e787e0b9c --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_misc.h @@ -0,0 +1,56 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef LP_BLD_MISC_H +#define LP_BLD_MISC_H + + +#include "llvm/Config/config.h" + +#ifdef __cplusplus +extern "C" { +#endif + + +#ifndef LLVM_NATIVE_ARCH + +void +LLVMLinkInJIT(void); + +int +LLVMInitializeNativeTarget(void); + +#endif /* !LLVM_NATIVE_ARCH */ + + +#ifdef __cplusplus +} +#endif + + +#endif /* !LP_BLD_MISC_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_pack.c b/src/gallium/drivers/llvmpipe/lp_bld_pack.c index fe82fda039..bc360ad77a 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_pack.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_pack.c @@ -159,11 +159,10 @@ lp_build_unpack2(LLVMBuilderRef builder, assert(!src_type.floating); assert(!dst_type.floating); - assert(dst_type.sign == src_type.sign); assert(dst_type.width == src_type.width * 2); assert(dst_type.length * 2 == src_type.length); - if(src_type.sign) { + if(dst_type.sign && src_type.sign) { /* Replicate the sign bit in the most significant bits */ msb = LLVMBuildAShr(builder, src, lp_build_int_const_scalar(src_type, src_type.width - 1), ""); } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample.c b/src/gallium/drivers/llvmpipe/lp_bld_sample.c new file mode 100644 index 0000000000..af70ddc6ab --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample.c @@ -0,0 +1,190 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Texture sampling -- common code. + * + * @author Jose Fonseca <jfonseca@vmware.com> + */ + +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "util/u_format.h" +#include "util/u_math.h" +#include "lp_bld_debug.h" +#include "lp_bld_const.h" +#include "lp_bld_arit.h" +#include "lp_bld_type.h" +#include "lp_bld_format.h" +#include "lp_bld_sample.h" + + +void +lp_sampler_static_state(struct lp_sampler_static_state *state, + const struct pipe_texture *texture, + const struct pipe_sampler_state *sampler) +{ + memset(state, 0, sizeof *state); + + if(!texture) + return; + + if(!sampler) + return; + + state->format = texture->format; + state->target = texture->target; + state->pot_width = util_is_pot(texture->width0); + state->pot_height = util_is_pot(texture->height0); + state->pot_depth = util_is_pot(texture->depth0); + + state->wrap_s = sampler->wrap_s; + state->wrap_t = sampler->wrap_t; + state->wrap_r = sampler->wrap_r; + state->min_img_filter = sampler->min_img_filter; + state->min_mip_filter = sampler->min_mip_filter; + state->mag_img_filter = sampler->mag_img_filter; + if(sampler->compare_mode) { + state->compare_mode = sampler->compare_mode; + state->compare_func = sampler->compare_func; + } + state->normalized_coords = sampler->normalized_coords; + state->prefilter = sampler->prefilter; +} + + +/** + * Gather elements from scatter positions in memory into a single vector. + * + * @param src_width src element width + * @param dst_width result element width (source will be expanded to fit) + * @param length length of the offsets, + * @param base_ptr base pointer, should be a i8 pointer type. + * @param offsets vector with offsets + */ +LLVMValueRef +lp_build_gather(LLVMBuilderRef builder, + unsigned length, + unsigned src_width, + unsigned dst_width, + LLVMValueRef base_ptr, + LLVMValueRef offsets) +{ + LLVMTypeRef src_type = LLVMIntType(src_width); + LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0); + LLVMTypeRef dst_elem_type = LLVMIntType(dst_width); + LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length); + LLVMValueRef res; + unsigned i; + + res = LLVMGetUndef(dst_vec_type); + for(i = 0; i < length; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef elem_offset; + LLVMValueRef elem_ptr; + LLVMValueRef elem; + + elem_offset = LLVMBuildExtractElement(builder, offsets, index, ""); + elem_ptr = LLVMBuildGEP(builder, base_ptr, &elem_offset, 1, ""); + elem_ptr = LLVMBuildBitCast(builder, elem_ptr, src_ptr_type, ""); + elem = LLVMBuildLoad(builder, elem_ptr, ""); + + assert(src_width <= dst_width); + if(src_width > dst_width) + elem = LLVMBuildTrunc(builder, elem, dst_elem_type, ""); + if(src_width < dst_width) + elem = LLVMBuildZExt(builder, elem, dst_elem_type, ""); + + res = LLVMBuildInsertElement(builder, res, elem, index, ""); + } + + return res; +} + + +/** + * Compute the offset of a pixel. + * + * x, y, y_stride are vectors + */ +LLVMValueRef +lp_build_sample_offset(struct lp_build_context *bld, + const struct util_format_description *format_desc, + LLVMValueRef x, + LLVMValueRef y, + LLVMValueRef y_stride, + LLVMValueRef data_ptr) +{ + LLVMValueRef x_stride; + LLVMValueRef offset; + + x_stride = lp_build_const_scalar(bld->type, format_desc->block.bits/8); + + if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { + LLVMValueRef x_lo, x_hi; + LLVMValueRef y_lo, y_hi; + LLVMValueRef x_stride_lo, x_stride_hi; + LLVMValueRef y_stride_lo, y_stride_hi; + LLVMValueRef x_offset_lo, x_offset_hi; + LLVMValueRef y_offset_lo, y_offset_hi; + LLVMValueRef offset_lo, offset_hi; + + x_lo = LLVMBuildAnd(bld->builder, x, bld->one, ""); + y_lo = LLVMBuildAnd(bld->builder, y, bld->one, ""); + + x_hi = LLVMBuildLShr(bld->builder, x, bld->one, ""); + y_hi = LLVMBuildLShr(bld->builder, y, bld->one, ""); + + x_stride_lo = x_stride; + y_stride_lo = lp_build_const_scalar(bld->type, 2*format_desc->block.bits/8); + + x_stride_hi = lp_build_const_scalar(bld->type, 4*format_desc->block.bits/8); + y_stride_hi = LLVMBuildShl(bld->builder, y_stride, bld->one, ""); + + x_offset_lo = lp_build_mul(bld, x_lo, x_stride_lo); + y_offset_lo = lp_build_mul(bld, y_lo, y_stride_lo); + offset_lo = lp_build_add(bld, x_offset_lo, y_offset_lo); + + x_offset_hi = lp_build_mul(bld, x_hi, x_stride_hi); + y_offset_hi = lp_build_mul(bld, y_hi, y_stride_hi); + offset_hi = lp_build_add(bld, x_offset_hi, y_offset_hi); + + offset = lp_build_add(bld, offset_hi, offset_lo); + } + else { + LLVMValueRef x_offset; + LLVMValueRef y_offset; + + x_offset = lp_build_mul(bld, x, x_stride); + y_offset = lp_build_mul(bld, y, y_stride); + + offset = lp_build_add(bld, x_offset, y_offset); + } + + return offset; +} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample.h b/src/gallium/drivers/llvmpipe/lp_bld_sample.h index 403d0e4836..8cb8210ca7 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_sample.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample.h @@ -40,7 +40,9 @@ struct pipe_texture; struct pipe_sampler_state; +struct util_format_description; struct lp_type; +struct lp_build_context; /** @@ -119,6 +121,24 @@ lp_sampler_static_state(struct lp_sampler_static_state *state, const struct pipe_sampler_state *sampler); +LLVMValueRef +lp_build_gather(LLVMBuilderRef builder, + unsigned length, + unsigned src_width, + unsigned dst_width, + LLVMValueRef base_ptr, + LLVMValueRef offsets); + + +LLVMValueRef +lp_build_sample_offset(struct lp_build_context *bld, + const struct util_format_description *format_desc, + LLVMValueRef x, + LLVMValueRef y, + LLVMValueRef y_stride, + LLVMValueRef data_ptr); + + void lp_build_sample_soa(LLVMBuilderRef builder, const struct lp_sampler_static_state *static_state, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c index 1a47ca32d2..47b68b71e2 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c @@ -27,7 +27,7 @@ /** * @file - * Texture sampling. + * Texture sampling -- SoA. * * @author Jose Fonseca <jfonseca@vmware.com> */ @@ -35,54 +35,23 @@ #include "pipe/p_defines.h" #include "pipe/p_state.h" #include "util/u_debug.h" +#include "util/u_debug_dump.h" #include "util/u_memory.h" #include "util/u_math.h" #include "util/u_format.h" +#include "util/u_cpu_detect.h" #include "lp_bld_debug.h" #include "lp_bld_type.h" #include "lp_bld_const.h" +#include "lp_bld_conv.h" #include "lp_bld_arit.h" #include "lp_bld_logic.h" #include "lp_bld_swizzle.h" +#include "lp_bld_pack.h" #include "lp_bld_format.h" #include "lp_bld_sample.h" -void -lp_sampler_static_state(struct lp_sampler_static_state *state, - const struct pipe_texture *texture, - const struct pipe_sampler_state *sampler) -{ - memset(state, 0, sizeof *state); - - if(!texture) - return; - - if(!sampler) - return; - - state->format = texture->format; - state->target = texture->target; - state->pot_width = util_is_pot(texture->width[0]); - state->pot_height = util_is_pot(texture->height[0]); - state->pot_depth = util_is_pot(texture->depth[0]); - - state->wrap_s = sampler->wrap_s; - state->wrap_t = sampler->wrap_t; - state->wrap_r = sampler->wrap_r; - state->min_img_filter = sampler->min_img_filter; - state->min_mip_filter = sampler->min_mip_filter; - state->mag_img_filter = sampler->mag_img_filter; - if(sampler->compare_mode) { - state->compare_mode = sampler->compare_mode; - state->compare_func = sampler->compare_func; - } - state->normalized_coords = sampler->normalized_coords; - state->prefilter = sampler->prefilter; -} - - - /** * Keep all information for sampling code generation in a single place. */ @@ -111,66 +80,61 @@ struct lp_build_sample_context static void -lp_build_sample_texel(struct lp_build_sample_context *bld, - LLVMValueRef x, - LLVMValueRef y, - LLVMValueRef y_stride, - LLVMValueRef data_ptr, - LLVMValueRef *texel) +lp_build_sample_texel_soa(struct lp_build_sample_context *bld, + LLVMValueRef x, + LLVMValueRef y, + LLVMValueRef y_stride, + LLVMValueRef data_ptr, + LLVMValueRef *texel) { - struct lp_build_context *int_coord_bld = &bld->int_coord_bld; - LLVMValueRef x_stride; LLVMValueRef offset; + LLVMValueRef packed; + + offset = lp_build_sample_offset(&bld->int_coord_bld, + bld->format_desc, + x, y, y_stride, + data_ptr); + + assert(bld->format_desc->block.width == 1); + assert(bld->format_desc->block.height == 1); + assert(bld->format_desc->block.bits <= bld->texel_type.width); + + packed = lp_build_gather(bld->builder, + bld->texel_type.length, + bld->format_desc->block.bits, + bld->texel_type.width, + data_ptr, offset); + + lp_build_unpack_rgba_soa(bld->builder, + bld->format_desc, + bld->texel_type, + packed, texel); +} - x_stride = lp_build_const_scalar(bld->int_coord_type, bld->format_desc->block.bits/8); - - if(bld->format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { - LLVMValueRef x_lo, x_hi; - LLVMValueRef y_lo, y_hi; - LLVMValueRef x_stride_lo, x_stride_hi; - LLVMValueRef y_stride_lo, y_stride_hi; - LLVMValueRef x_offset_lo, x_offset_hi; - LLVMValueRef y_offset_lo, y_offset_hi; - LLVMValueRef offset_lo, offset_hi; - - x_lo = LLVMBuildAnd(bld->builder, x, int_coord_bld->one, ""); - y_lo = LLVMBuildAnd(bld->builder, y, int_coord_bld->one, ""); - - x_hi = LLVMBuildLShr(bld->builder, x, int_coord_bld->one, ""); - y_hi = LLVMBuildLShr(bld->builder, y, int_coord_bld->one, ""); - - x_stride_lo = x_stride; - y_stride_lo = lp_build_const_scalar(bld->int_coord_type, 2*bld->format_desc->block.bits/8); - - x_stride_hi = lp_build_const_scalar(bld->int_coord_type, 4*bld->format_desc->block.bits/8); - y_stride_hi = LLVMBuildShl(bld->builder, y_stride, int_coord_bld->one, ""); - - x_offset_lo = lp_build_mul(int_coord_bld, x_lo, x_stride_lo); - y_offset_lo = lp_build_mul(int_coord_bld, y_lo, y_stride_lo); - offset_lo = lp_build_add(int_coord_bld, x_offset_lo, y_offset_lo); - - x_offset_hi = lp_build_mul(int_coord_bld, x_hi, x_stride_hi); - y_offset_hi = lp_build_mul(int_coord_bld, y_hi, y_stride_hi); - offset_hi = lp_build_add(int_coord_bld, x_offset_hi, y_offset_hi); - offset = lp_build_add(int_coord_bld, offset_hi, offset_lo); - } - else { - LLVMValueRef x_offset; - LLVMValueRef y_offset; +static LLVMValueRef +lp_build_sample_packed(struct lp_build_sample_context *bld, + LLVMValueRef x, + LLVMValueRef y, + LLVMValueRef y_stride, + LLVMValueRef data_ptr) +{ + LLVMValueRef offset; - x_offset = lp_build_mul(int_coord_bld, x, x_stride); - y_offset = lp_build_mul(int_coord_bld, y, y_stride); + offset = lp_build_sample_offset(&bld->int_coord_bld, + bld->format_desc, + x, y, y_stride, + data_ptr); - offset = lp_build_add(int_coord_bld, x_offset, y_offset); - } + assert(bld->format_desc->block.width == 1); + assert(bld->format_desc->block.height == 1); + assert(bld->format_desc->block.bits <= bld->texel_type.width); - lp_build_load_rgba_soa(bld->builder, - bld->format_desc, - bld->texel_type, - data_ptr, - offset, - texel); + return lp_build_gather(bld->builder, + bld->texel_type.length, + bld->format_desc->block.bits, + bld->texel_type.width, + data_ptr, offset); } @@ -208,7 +172,8 @@ lp_build_sample_wrap(struct lp_build_sample_context *bld, case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: /* FIXME */ - _debug_printf("warning: failed to translate texture wrap mode %u\n", wrap_mode); + _debug_printf("warning: failed to translate texture wrap mode %s\n", + debug_dump_tex_wrap(wrap_mode, TRUE)); coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero); coord = lp_build_min(int_coord_bld, coord, length_minus_one); break; @@ -240,7 +205,7 @@ lp_build_sample_2d_nearest_soa(struct lp_build_sample_context *bld, x = lp_build_sample_wrap(bld, x, width, bld->static_state->pot_width, bld->static_state->wrap_s); y = lp_build_sample_wrap(bld, y, height, bld->static_state->pot_height, bld->static_state->wrap_t); - lp_build_sample_texel(bld, x, y, stride, data_ptr, texel); + lp_build_sample_texel_soa(bld, x, y, stride, data_ptr, texel); } @@ -286,10 +251,10 @@ lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld, x1 = lp_build_sample_wrap(bld, x1, width, bld->static_state->pot_width, bld->static_state->wrap_s); y1 = lp_build_sample_wrap(bld, y1, height, bld->static_state->pot_height, bld->static_state->wrap_t); - lp_build_sample_texel(bld, x0, y0, stride, data_ptr, neighbors[0][0]); - lp_build_sample_texel(bld, x1, y0, stride, data_ptr, neighbors[0][1]); - lp_build_sample_texel(bld, x0, y1, stride, data_ptr, neighbors[1][0]); - lp_build_sample_texel(bld, x1, y1, stride, data_ptr, neighbors[1][1]); + lp_build_sample_texel_soa(bld, x0, y0, stride, data_ptr, neighbors[0][0]); + lp_build_sample_texel_soa(bld, x1, y0, stride, data_ptr, neighbors[0][1]); + lp_build_sample_texel_soa(bld, x0, y1, stride, data_ptr, neighbors[1][0]); + lp_build_sample_texel_soa(bld, x1, y1, stride, data_ptr, neighbors[1][1]); /* TODO: Don't interpolate missing channels */ for(chan = 0; chan < 4; ++chan) { @@ -304,6 +269,217 @@ lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld, static void +lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder, + struct lp_type dst_type, + LLVMValueRef packed, + LLVMValueRef *rgba) +{ + LLVMValueRef mask = lp_build_int_const_scalar(dst_type, 0xff); + unsigned chan; + + /* Decode the input vector components */ + for (chan = 0; chan < 4; ++chan) { + unsigned start = chan*8; + unsigned stop = start + 8; + LLVMValueRef input; + + input = packed; + + if(start) + input = LLVMBuildLShr(builder, input, lp_build_int_const_scalar(dst_type, start), ""); + + if(stop < 32) + input = LLVMBuildAnd(builder, input, mask, ""); + + input = lp_build_unsigned_norm_to_float(builder, 8, dst_type, input); + + rgba[chan] = input; + } +} + + +static void +lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, + LLVMValueRef s, + LLVMValueRef t, + LLVMValueRef width, + LLVMValueRef height, + LLVMValueRef stride, + LLVMValueRef data_ptr, + LLVMValueRef *texel) +{ + LLVMBuilderRef builder = bld->builder; + struct lp_build_context i32, h16, u8n; + LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type; + LLVMValueRef i32_c8, i32_c128, i32_c255; + LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi; + LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi; + LLVMValueRef x0, x1; + LLVMValueRef y0, y1; + LLVMValueRef neighbors[2][2]; + LLVMValueRef neighbors_lo[2][2]; + LLVMValueRef neighbors_hi[2][2]; + LLVMValueRef packed, packed_lo, packed_hi; + LLVMValueRef unswizzled[4]; + + lp_build_context_init(&i32, builder, lp_type_int(32)); + lp_build_context_init(&h16, builder, lp_type_ufixed(16)); + lp_build_context_init(&u8n, builder, lp_type_unorm(8)); + + i32_vec_type = lp_build_vec_type(i32.type); + h16_vec_type = lp_build_vec_type(h16.type); + u8n_vec_type = lp_build_vec_type(u8n.type); + + s = lp_build_mul_imm(&bld->coord_bld, s, 256); + t = lp_build_mul_imm(&bld->coord_bld, t, 256); + + s = LLVMBuildFPToSI(builder, s, i32_vec_type, ""); + t = LLVMBuildFPToSI(builder, t, i32_vec_type, ""); + + i32_c128 = lp_build_int_const_scalar(i32.type, -128); + s = LLVMBuildAdd(builder, s, i32_c128, ""); + t = LLVMBuildAdd(builder, t, i32_c128, ""); + + i32_c8 = lp_build_int_const_scalar(i32.type, 8); + s_ipart = LLVMBuildAShr(builder, s, i32_c8, ""); + t_ipart = LLVMBuildAShr(builder, t, i32_c8, ""); + + i32_c255 = lp_build_int_const_scalar(i32.type, 255); + s_fpart = LLVMBuildAnd(builder, s, i32_c255, ""); + t_fpart = LLVMBuildAnd(builder, t, i32_c255, ""); + + x0 = s_ipart; + y0 = t_ipart; + + x0 = lp_build_sample_wrap(bld, x0, width, bld->static_state->pot_width, bld->static_state->wrap_s); + y0 = lp_build_sample_wrap(bld, y0, height, bld->static_state->pot_height, bld->static_state->wrap_t); + + x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one); + y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one); + + x1 = lp_build_sample_wrap(bld, x1, width, bld->static_state->pot_width, bld->static_state->wrap_s); + y1 = lp_build_sample_wrap(bld, y1, height, bld->static_state->pot_height, bld->static_state->wrap_t); + + /* + * Transform 4 x i32 in + * + * s_fpart = {s0, s1, s2, s3} + * + * into 8 x i16 + * + * s_fpart = {00, s0, 00, s1, 00, s2, 00, s3} + * + * into two 8 x i16 + * + * s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1} + * s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3} + * + * and likewise for t_fpart. There is no risk of loosing precision here + * since the fractional parts only use the lower 8bits. + */ + + s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, ""); + t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, ""); + + { + LLVMTypeRef elem_type = LLVMInt32Type(); + LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH]; + LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH]; + LLVMValueRef shuffle_lo; + LLVMValueRef shuffle_hi; + unsigned i, j; + + for(j = 0; j < h16.type.length; j += 4) { + unsigned subindex = util_cpu_caps.little_endian ? 0 : 1; + LLVMValueRef index; + + index = LLVMConstInt(elem_type, j/2 + subindex, 0); + for(i = 0; i < 4; ++i) + shuffles_lo[j + i] = index; + + index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0); + for(i = 0; i < 4; ++i) + shuffles_hi[j + i] = index; + } + + shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length); + shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length); + + s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_lo, ""); + t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_lo, ""); + s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_hi, ""); + t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_hi, ""); + } + + /* + * Fetch the pixels as 4 x 32bit (rgba order might differ): + * + * rgba0 rgba1 rgba2 rgba3 + * + * bit cast them into 16 x u8 + * + * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3 + * + * unpack them into two 8 x i16: + * + * r0 g0 b0 a0 r1 g1 b1 a1 + * r2 g2 b2 a2 r3 g3 b3 a3 + * + * The higher 8 bits of the resulting elements will be zero. + */ + + neighbors[0][0] = lp_build_sample_packed(bld, x0, y0, stride, data_ptr); + neighbors[0][1] = lp_build_sample_packed(bld, x1, y0, stride, data_ptr); + neighbors[1][0] = lp_build_sample_packed(bld, x0, y1, stride, data_ptr); + neighbors[1][1] = lp_build_sample_packed(bld, x1, y1, stride, data_ptr); + + neighbors[0][0] = LLVMBuildBitCast(builder, neighbors[0][0], u8n_vec_type, ""); + neighbors[0][1] = LLVMBuildBitCast(builder, neighbors[0][1], u8n_vec_type, ""); + neighbors[1][0] = LLVMBuildBitCast(builder, neighbors[1][0], u8n_vec_type, ""); + neighbors[1][1] = LLVMBuildBitCast(builder, neighbors[1][1], u8n_vec_type, ""); + + lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][0], &neighbors_lo[0][0], &neighbors_hi[0][0]); + lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][1], &neighbors_lo[0][1], &neighbors_hi[0][1]); + lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][0], &neighbors_lo[1][0], &neighbors_hi[1][0]); + lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][1], &neighbors_lo[1][1], &neighbors_hi[1][1]); + + /* + * Linear interpolate with 8.8 fixed point. + */ + + packed_lo = lp_build_lerp_2d(&h16, + s_fpart_lo, t_fpart_lo, + neighbors_lo[0][0], + neighbors_lo[0][1], + neighbors_lo[1][0], + neighbors_lo[1][1]); + + packed_hi = lp_build_lerp_2d(&h16, + s_fpart_hi, t_fpart_hi, + neighbors_hi[0][0], + neighbors_hi[0][1], + neighbors_hi[1][0], + neighbors_hi[1][1]); + + packed = lp_build_pack2(builder, h16.type, u8n.type, packed_lo, packed_hi); + + /* + * Convert to SoA and swizzle. + */ + + packed = LLVMBuildBitCast(builder, packed, i32_vec_type, ""); + + lp_build_rgba8_to_f32_soa(bld->builder, + bld->texel_type, + packed, unswizzled); + + lp_build_format_swizzle_soa(bld->format_desc, + bld->texel_type, unswizzled, + texel); +} + + +static void lp_build_sample_compare(struct lp_build_sample_context *bld, LLVMValueRef p, LLVMValueRef *texel) @@ -402,7 +578,10 @@ lp_build_sample_soa(LLVMBuilderRef builder, break; case PIPE_TEX_FILTER_LINEAR: case PIPE_TEX_FILTER_ANISO: - lp_build_sample_2d_linear_soa(&bld, s, t, width, height, stride, data_ptr, texel); + if(lp_format_is_rgba8(bld.format_desc)) + lp_build_sample_2d_linear_aos(&bld, s, t, width, height, stride, data_ptr, texel); + else + lp_build_sample_2d_linear_soa(&bld, s, t, width, height, stride, data_ptr, texel); break; default: assert(0); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c index 64027de6aa..fe2db04d8f 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c @@ -64,7 +64,7 @@ for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++) #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ - ((INST)->FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) + ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN))) #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ if (IS_DST0_CHANNEL_ENABLED( INST, CHAN )) @@ -157,7 +157,7 @@ emit_fetch( unsigned index, const unsigned chan_index ) { - const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[index]; + const struct tgsi_full_src_register *reg = &inst->Src[index]; unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); LLVMValueRef res; @@ -167,9 +167,9 @@ emit_fetch( case TGSI_SWIZZLE_Z: case TGSI_SWIZZLE_W: - switch (reg->SrcRegister.File) { + switch (reg->Register.File) { case TGSI_FILE_CONSTANT: { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->SrcRegister.Index*4 + swizzle, 0); + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->Register.Index*4 + swizzle, 0); LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, ""); LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, ""); res = lp_build_broadcast_scalar(&bld->base, scalar); @@ -177,17 +177,17 @@ emit_fetch( } case TGSI_FILE_IMMEDIATE: - res = bld->immediates[reg->SrcRegister.Index][swizzle]; + res = bld->immediates[reg->Register.Index][swizzle]; assert(res); break; case TGSI_FILE_INPUT: - res = bld->inputs[reg->SrcRegister.Index][swizzle]; + res = bld->inputs[reg->Register.Index][swizzle]; assert(res); break; case TGSI_FILE_TEMPORARY: - res = bld->temps[reg->SrcRegister.Index][swizzle]; + res = bld->temps[reg->Register.Index][swizzle]; if(!res) return bld->base.undef; break; @@ -267,7 +267,7 @@ emit_store( unsigned chan_index, LLVMValueRef value) { - const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[index]; + const struct tgsi_full_dst_register *reg = &inst->Dst[index]; switch( inst->Instruction.Saturate ) { case TGSI_SAT_NONE: @@ -287,13 +287,13 @@ emit_store( assert(0); } - switch( reg->DstRegister.File ) { + switch( reg->Register.File ) { case TGSI_FILE_OUTPUT: - bld->outputs[reg->DstRegister.Index][chan_index] = value; + bld->outputs[reg->Register.Index][chan_index] = value; break; case TGSI_FILE_TEMPORARY: - bld->temps[reg->DstRegister.Index][chan_index] = value; + bld->temps[reg->Register.Index][chan_index] = value; break; case TGSI_FILE_ADDRESS: @@ -319,14 +319,14 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, boolean projected, LLVMValueRef *texel) { - const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; + const uint unit = inst->Src[1].Register.Index; LLVMValueRef lodbias; LLVMValueRef oow; LLVMValueRef coords[3]; unsigned num_coords; unsigned i; - switch (inst->InstructionExtTexture.Texture) { + switch (inst->Texture.Texture) { case TGSI_TEXTURE_1D: num_coords = 1; break; @@ -375,7 +375,7 @@ emit_kil( struct lp_build_tgsi_soa_context *bld, const struct tgsi_full_instruction *inst ) { - const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[0]; + const struct tgsi_full_src_register *reg = &inst->Src[0]; LLVMValueRef terms[NUM_CHANNELS]; LLVMValueRef mask; unsigned chan_index; @@ -423,15 +423,15 @@ indirect_temp_reference(const struct tgsi_full_instruction *inst) { uint i; for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[i]; - if (reg->SrcRegister.File == TGSI_FILE_TEMPORARY && - reg->SrcRegister.Indirect) + const struct tgsi_full_src_register *reg = &inst->Src[i]; + if (reg->Register.File == TGSI_FILE_TEMPORARY && + reg->Register.Indirect) return TRUE; } for (i = 0; i < inst->Instruction.NumDstRegs; i++) { - const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[i]; - if (reg->DstRegister.File == TGSI_FILE_TEMPORARY && - reg->DstRegister.Indirect) + const struct tgsi_full_dst_register *reg = &inst->Dst[i]; + if (reg->Register.File == TGSI_FILE_TEMPORARY && + reg->Register.Indirect) return TRUE; } return FALSE; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.h b/src/gallium/drivers/llvmpipe/lp_bld_type.h index 46c298fa20..2fb233d335 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_type.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_type.h @@ -43,13 +43,18 @@ /** + * Native SIMD register width. + * + * 128 for all architectures we care about. + */ +#define LP_NATIVE_VECTOR_WIDTH 128 + +/** * Several functions can only cope with vectors of length up to this value. * You may need to increase that value if you want to represent bigger vectors. */ #define LP_MAX_VECTOR_LENGTH 16 -#define LP_MAX_TYPE_WIDTH 64 - /** * The LLVM type system can't conveniently express all the things we care about @@ -134,6 +139,91 @@ struct lp_build_context }; +static INLINE struct lp_type +lp_type_float(unsigned width) +{ + struct lp_type res_type; + + memset(&res_type, 0, sizeof res_type); + res_type.floating = TRUE; + res_type.sign = TRUE; + res_type.width = width; + res_type.length = LP_NATIVE_VECTOR_WIDTH / width; + + return res_type; +} + + +static INLINE struct lp_type +lp_type_int(unsigned width) +{ + struct lp_type res_type; + + memset(&res_type, 0, sizeof res_type); + res_type.sign = TRUE; + res_type.width = width; + res_type.length = LP_NATIVE_VECTOR_WIDTH / width; + + return res_type; +} + + +static INLINE struct lp_type +lp_type_uint(unsigned width) +{ + struct lp_type res_type; + + memset(&res_type, 0, sizeof res_type); + res_type.width = width; + res_type.length = LP_NATIVE_VECTOR_WIDTH / width; + + return res_type; +} + + +static INLINE struct lp_type +lp_type_unorm(unsigned width) +{ + struct lp_type res_type; + + memset(&res_type, 0, sizeof res_type); + res_type.norm = TRUE; + res_type.width = width; + res_type.length = LP_NATIVE_VECTOR_WIDTH / width; + + return res_type; +} + + +static INLINE struct lp_type +lp_type_fixed(unsigned width) +{ + struct lp_type res_type; + + memset(&res_type, 0, sizeof res_type); + res_type.sign = TRUE; + res_type.fixed = TRUE; + res_type.width = width; + res_type.length = LP_NATIVE_VECTOR_WIDTH / width; + + return res_type; +} + + +static INLINE struct lp_type +lp_type_ufixed(unsigned width) +{ + struct lp_type res_type; + + memset(&res_type, 0, sizeof res_type); + res_type.fixed = TRUE; + res_type.width = width; + res_type.length = LP_NATIVE_VECTOR_WIDTH / width; + + return res_type; +} + + LLVMTypeRef lp_build_elem_type(struct lp_type type); diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 202cb8ef43..c081f6de03 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -31,13 +31,13 @@ */ #include "draw/draw_context.h" +#include "draw/draw_vbuf.h" #include "pipe/p_defines.h" #include "util/u_math.h" #include "util/u_memory.h" #include "lp_clear.h" #include "lp_context.h" #include "lp_flush.h" -#include "lp_prim_setup.h" #include "lp_prim_vbuf.h" #include "lp_state.h" #include "lp_surface.h" @@ -180,7 +180,7 @@ llvmpipe_create( struct pipe_screen *screen ) llvmpipe->pipe.delete_blend_state = llvmpipe_delete_blend_state; llvmpipe->pipe.create_sampler_state = llvmpipe_create_sampler_state; - llvmpipe->pipe.bind_sampler_states = llvmpipe_bind_sampler_states; + llvmpipe->pipe.bind_fragment_sampler_states = llvmpipe_bind_sampler_states; llvmpipe->pipe.delete_sampler_state = llvmpipe_delete_sampler_state; llvmpipe->pipe.create_depth_stencil_alpha_state = llvmpipe_create_depth_stencil_state; @@ -205,7 +205,7 @@ llvmpipe_create( struct pipe_screen *screen ) llvmpipe->pipe.set_framebuffer_state = llvmpipe_set_framebuffer_state; llvmpipe->pipe.set_polygon_stipple = llvmpipe_set_polygon_stipple; llvmpipe->pipe.set_scissor_state = llvmpipe_set_scissor_state; - llvmpipe->pipe.set_sampler_textures = llvmpipe_set_sampler_textures; + llvmpipe->pipe.set_fragment_sampler_textures = llvmpipe_set_sampler_textures; llvmpipe->pipe.set_viewport_state = llvmpipe_set_viewport_state; llvmpipe->pipe.set_vertex_buffers = llvmpipe_set_vertex_buffers; @@ -264,21 +264,21 @@ llvmpipe_create( struct pipe_screen *screen ) (struct tgsi_sampler **) llvmpipe->tgsi.vert_samplers_list); - llvmpipe->setup = lp_draw_render_stage(llvmpipe); - if (!llvmpipe->setup) - goto fail; - if (debug_get_bool_option( "LP_NO_RAST", FALSE )) llvmpipe->no_rast = TRUE; - if (debug_get_bool_option( "LP_NO_VBUF", FALSE )) { - /* Deprecated path -- vbuf is the intended interface to the draw module: - */ - draw_set_rasterize_stage(llvmpipe->draw, llvmpipe->setup); - } - else { - lp_init_vbuf(llvmpipe); - } + llvmpipe->vbuf_backend = lp_create_vbuf_backend(llvmpipe); + if (!llvmpipe->vbuf_backend) + goto fail; + + llvmpipe->vbuf = draw_vbuf_stage(llvmpipe->draw, llvmpipe->vbuf_backend); + if (!llvmpipe->vbuf) + goto fail; + + draw_set_rasterize_stage(llvmpipe->draw, llvmpipe->vbuf); + draw_set_render(llvmpipe->draw, llvmpipe->vbuf_backend); + + /* plug in AA line/point stages */ draw_install_aaline_stage(llvmpipe->draw, &llvmpipe->pipe); diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index 7df340554e..3ad95d0bfc 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -121,9 +121,10 @@ struct llvmpipe_context { /** The primitive drawing context */ struct draw_context *draw; - struct draw_stage *setup; + + /** Draw module backend */ + struct vbuf_render *vbuf_backend; struct draw_stage *vbuf; - struct llvmpipe_vbuf_render *vbuf_render; boolean dirty_render_cache; diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index 1126bf90b9..bce3baec16 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -39,6 +39,7 @@ #include "util/u_cpu_detect.h" #include "lp_screen.h" #include "lp_bld_intr.h" +#include "lp_bld_misc.h" #include "lp_jit.h" @@ -153,13 +154,12 @@ lp_jit_screen_init(struct llvmpipe_screen *screen) #if 0 /* For simulating less capable machines */ util_cpu_caps.has_sse3 = 0; + util_cpu_caps.has_ssse3 = 0; util_cpu_caps.has_sse4_1 = 0; #endif -#ifdef LLVM_NATIVE_ARCH LLVMLinkInJIT(); LLVMInitializeNativeTarget(); -#endif screen->module = LLVMModuleCreateWithName("llvmpipe"); @@ -168,7 +168,7 @@ lp_jit_screen_init(struct llvmpipe_screen *screen) if (LLVMCreateJITCompiler(&screen->engine, screen->provider, 1, &error)) { _debug_printf("%s\n", error); LLVMDisposeMessage(error); - abort(); + assert(0); } screen->target = LLVMGetExecutionEngineTargetData(screen->engine); diff --git a/src/gallium/drivers/llvmpipe/lp_prim_setup.c b/src/gallium/drivers/llvmpipe/lp_prim_setup.c deleted file mode 100644 index b14f8fb99d..0000000000 --- a/src/gallium/drivers/llvmpipe/lp_prim_setup.c +++ /dev/null @@ -1,190 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * \brief A draw stage that drives our triangle setup routines from - * within the draw pipeline. One of two ways to drive setup, the - * other being in lp_prim_vbuf.c. - * - * \author Keith Whitwell <keith@tungstengraphics.com> - * \author Brian Paul - */ - - -#include "lp_context.h" -#include "lp_setup.h" -#include "lp_state.h" -#include "lp_prim_setup.h" -#include "draw/draw_pipe.h" -#include "draw/draw_vertex.h" -#include "util/u_memory.h" - -/** - * Triangle setup info (derived from draw_stage). - * Also used for line drawing (taking some liberties). - */ -struct setup_stage { - struct draw_stage stage; /**< This must be first (base class) */ - - struct setup_context *setup; -}; - - - -/** - * Basically a cast wrapper. - */ -static INLINE struct setup_stage *setup_stage( struct draw_stage *stage ) -{ - return (struct setup_stage *)stage; -} - - -typedef const float (*cptrf4)[4]; - -static void -do_tri(struct draw_stage *stage, struct prim_header *prim) -{ - struct setup_stage *setup = setup_stage( stage ); - - llvmpipe_setup_tri( setup->setup, - (cptrf4)prim->v[0]->data, - (cptrf4)prim->v[1]->data, - (cptrf4)prim->v[2]->data ); -} - -static void -do_line(struct draw_stage *stage, struct prim_header *prim) -{ - struct setup_stage *setup = setup_stage( stage ); - - llvmpipe_setup_line( setup->setup, - (cptrf4)prim->v[0]->data, - (cptrf4)prim->v[1]->data ); -} - -static void -do_point(struct draw_stage *stage, struct prim_header *prim) -{ - struct setup_stage *setup = setup_stage( stage ); - - llvmpipe_setup_point( setup->setup, - (cptrf4)prim->v[0]->data ); -} - - - - -static void setup_begin( struct draw_stage *stage ) -{ - struct setup_stage *setup = setup_stage(stage); - - llvmpipe_setup_prepare( setup->setup ); - - stage->point = do_point; - stage->line = do_line; - stage->tri = do_tri; -} - - -static void setup_first_point( struct draw_stage *stage, - struct prim_header *header ) -{ - setup_begin(stage); - stage->point( stage, header ); -} - -static void setup_first_line( struct draw_stage *stage, - struct prim_header *header ) -{ - setup_begin(stage); - stage->line( stage, header ); -} - - -static void setup_first_tri( struct draw_stage *stage, - struct prim_header *header ) -{ - setup_begin(stage); - stage->tri( stage, header ); -} - - - -static void setup_flush( struct draw_stage *stage, - unsigned flags ) -{ - stage->point = setup_first_point; - stage->line = setup_first_line; - stage->tri = setup_first_tri; -} - - -static void reset_stipple_counter( struct draw_stage *stage ) -{ -} - - -static void render_destroy( struct draw_stage *stage ) -{ - struct setup_stage *ssetup = setup_stage(stage); - llvmpipe_setup_destroy_context(ssetup->setup); - FREE( stage ); -} - - -/** - * Create a new primitive setup/render stage. - */ -struct draw_stage *lp_draw_render_stage( struct llvmpipe_context *llvmpipe ) -{ - struct setup_stage *sstage = CALLOC_STRUCT(setup_stage); - - sstage->setup = llvmpipe_setup_create_context(llvmpipe); - sstage->stage.draw = llvmpipe->draw; - sstage->stage.point = setup_first_point; - sstage->stage.line = setup_first_line; - sstage->stage.tri = setup_first_tri; - sstage->stage.flush = setup_flush; - sstage->stage.reset_stipple_counter = reset_stipple_counter; - sstage->stage.destroy = render_destroy; - - return (struct draw_stage *)sstage; -} - -struct setup_context * -lp_draw_setup_context( struct draw_stage *stage ) -{ - struct setup_stage *ssetup = setup_stage(stage); - return ssetup->setup; -} - -void -lp_draw_flush( struct draw_stage *stage ) -{ - stage->flush( stage, 0 ); -} diff --git a/src/gallium/drivers/llvmpipe/lp_prim_setup.h b/src/gallium/drivers/llvmpipe/lp_prim_setup.h deleted file mode 100644 index da6cae6375..0000000000 --- a/src/gallium/drivers/llvmpipe/lp_prim_setup.h +++ /dev/null @@ -1,85 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef LP_PRIM_SETUP_H -#define LP_PRIM_SETUP_H - - -/** - * vbuf is a special stage to gather the stream of triangles, lines, points - * together and reconstruct vertex buffers for hardware upload. - * - * First attempt, work in progress. - * - * TODO: - * - separate out vertex buffer building and primitive emit, ie >1 draw per vb. - * - tell vbuf stage how to build hw vertices directly - * - pass vbuf stage a buffer pointer for direct emit to agp/vram. - * - * - * - * Vertices are just an array of floats, with all the attributes - * packed. We currently assume a layout like: - * - * attr[0][0..3] - window position - * attr[1..n][0..3] - remaining attributes. - * - * Attributes are assumed to be 4 floats wide but are packed so that - * all the enabled attributes run contiguously. - */ - - -struct draw_stage; -struct llvmpipe_context; - - -typedef void (*vbuf_draw_func)( struct pipe_context *pipe, - unsigned prim, - const ushort *elements, - unsigned nr_elements, - const void *vertex_buffer, - unsigned nr_vertices ); - - -extern struct draw_stage * -lp_draw_render_stage( struct llvmpipe_context *llvmpipe ); - -extern struct setup_context * -lp_draw_setup_context( struct draw_stage * ); - -extern void -lp_draw_flush( struct draw_stage * ); - - -extern struct draw_stage * -lp_draw_vbuf_stage( struct draw_context *draw_context, - struct pipe_context *pipe, - vbuf_draw_func draw ); - - -#endif /* LP_PRIM_SETUP_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c index c394dcb61d..4abff4eccc 100644 --- a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c +++ b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c @@ -37,10 +37,9 @@ #include "lp_context.h" +#include "lp_setup.h" #include "lp_state.h" #include "lp_prim_vbuf.h" -#include "lp_prim_setup.h" -#include "lp_setup.h" #include "draw/draw_context.h" #include "draw/draw_vbuf.h" #include "util/u_memory.h" @@ -59,6 +58,8 @@ struct llvmpipe_vbuf_render { struct vbuf_render base; struct llvmpipe_context *llvmpipe; + struct setup_context *setup; + uint prim; uint vertex_size; uint nr_vertices; @@ -75,6 +76,11 @@ llvmpipe_vbuf_render(struct vbuf_render *vbr) } + + + + + static const struct vertex_info * lp_vbuf_get_vertex_info(struct vbuf_render *vbr) { @@ -105,36 +111,6 @@ lp_vbuf_allocate_vertices(struct vbuf_render *vbr, static void lp_vbuf_release_vertices(struct vbuf_render *vbr) { -#if 0 - { - struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); - const struct vertex_info *info = - llvmpipe_get_vbuf_vertex_info(cvbr->llvmpipe); - const float *vtx = (const float *) cvbr->vertex_buffer; - uint i, j; - debug_printf("%s (vtx_size = %u, vtx_used = %u)\n", - __FUNCTION__, cvbr->vertex_size, cvbr->nr_vertices); - for (i = 0; i < cvbr->nr_vertices; i++) { - for (j = 0; j < info->num_attribs; j++) { - uint k; - switch (info->attrib[j].emit) { - case EMIT_4F: k = 4; break; - case EMIT_3F: k = 3; break; - case EMIT_2F: k = 2; break; - case EMIT_1F: k = 1; break; - default: assert(0); - } - debug_printf("Vert %u attr %u: ", i, j); - while (k-- > 0) { - debug_printf("%g ", vtx[0]); - vtx++; - } - debug_printf("\n"); - } - } - } -#endif - /* keep the old allocation for next time */ } @@ -160,11 +136,7 @@ static boolean lp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) { struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); - - /* XXX: break this dependency - make setup_context live under - * llvmpipe, rename the old "setup" draw stage to something else. - */ - struct setup_context *setup_ctx = lp_draw_setup_context(cvbr->llvmpipe->setup); + struct setup_context *setup_ctx = cvbr->setup; llvmpipe_setup_prepare( setup_ctx ); @@ -193,14 +165,9 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) struct llvmpipe_context *llvmpipe = cvbr->llvmpipe; const unsigned stride = llvmpipe->vertex_info_vbuf.size * sizeof(float); const void *vertex_buffer = cvbr->vertex_buffer; + struct setup_context *setup_ctx = cvbr->setup; unsigned i; - /* XXX: break this dependency - make setup_context live under - * llvmpipe, rename the old "setup" draw stage to something else. - */ - struct draw_stage *setup = llvmpipe->setup; - struct setup_context *setup_ctx = lp_draw_setup_context(setup); - switch (cvbr->prim) { case PIPE_PRIM_POINTS: for (i = 0; i < nr; i++) { @@ -367,11 +334,6 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) default: assert(0); } - - /* XXX: why are we calling this??? If we had to call something, it - * would be a function in lp_setup.c: - */ - lp_draw_flush( setup ); } @@ -384,17 +346,12 @@ lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) { struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); struct llvmpipe_context *llvmpipe = cvbr->llvmpipe; + struct setup_context *setup_ctx = cvbr->setup; const unsigned stride = llvmpipe->vertex_info_vbuf.size * sizeof(float); const void *vertex_buffer = (void *) get_vert(cvbr->vertex_buffer, start, stride); unsigned i; - /* XXX: break this dependency - make setup_context live under - * llvmpipe, rename the old "setup" draw stage to something else. - */ - struct draw_stage *setup = llvmpipe->setup; - struct setup_context *setup_ctx = lp_draw_setup_context(setup); - switch (cvbr->prim) { case PIPE_PRIM_POINTS: for (i = 0; i < nr; i++) { @@ -568,40 +525,38 @@ static void lp_vbuf_destroy(struct vbuf_render *vbr) { struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); - cvbr->llvmpipe->vbuf_render = NULL; + llvmpipe_setup_destroy_context(cvbr->setup); FREE(cvbr); } /** - * Initialize the post-transform vertex buffer information for the given - * context. + * Create the post-transform vertex handler for the given context. */ -void -lp_init_vbuf(struct llvmpipe_context *lp) +struct vbuf_render * +lp_create_vbuf_backend(struct llvmpipe_context *lp) { - assert(lp->draw); + struct llvmpipe_vbuf_render *cvbr = CALLOC_STRUCT(llvmpipe_vbuf_render); - lp->vbuf_render = CALLOC_STRUCT(llvmpipe_vbuf_render); + assert(lp->draw); - lp->vbuf_render->base.max_indices = LP_MAX_VBUF_INDEXES; - lp->vbuf_render->base.max_vertex_buffer_bytes = LP_MAX_VBUF_SIZE; - lp->vbuf_render->base.get_vertex_info = lp_vbuf_get_vertex_info; - lp->vbuf_render->base.allocate_vertices = lp_vbuf_allocate_vertices; - lp->vbuf_render->base.map_vertices = lp_vbuf_map_vertices; - lp->vbuf_render->base.unmap_vertices = lp_vbuf_unmap_vertices; - lp->vbuf_render->base.set_primitive = lp_vbuf_set_primitive; - lp->vbuf_render->base.draw = lp_vbuf_draw; - lp->vbuf_render->base.draw_arrays = lp_vbuf_draw_arrays; - lp->vbuf_render->base.release_vertices = lp_vbuf_release_vertices; - lp->vbuf_render->base.destroy = lp_vbuf_destroy; + cvbr->base.max_indices = LP_MAX_VBUF_INDEXES; + cvbr->base.max_vertex_buffer_bytes = LP_MAX_VBUF_SIZE; - lp->vbuf_render->llvmpipe = lp; + cvbr->base.get_vertex_info = lp_vbuf_get_vertex_info; + cvbr->base.allocate_vertices = lp_vbuf_allocate_vertices; + cvbr->base.map_vertices = lp_vbuf_map_vertices; + cvbr->base.unmap_vertices = lp_vbuf_unmap_vertices; + cvbr->base.set_primitive = lp_vbuf_set_primitive; + cvbr->base.draw = lp_vbuf_draw; + cvbr->base.draw_arrays = lp_vbuf_draw_arrays; + cvbr->base.release_vertices = lp_vbuf_release_vertices; + cvbr->base.destroy = lp_vbuf_destroy; - lp->vbuf = draw_vbuf_stage(lp->draw, &lp->vbuf_render->base); + cvbr->llvmpipe = lp; - draw_set_rasterize_stage(lp->draw, lp->vbuf); + cvbr->setup = llvmpipe_setup_create_context(cvbr->llvmpipe); - draw_set_render(lp->draw, &lp->vbuf_render->base); + return &cvbr->base; } diff --git a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.h b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.h index 6c4e6063e6..0676e2f42a 100644 --- a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.h +++ b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.h @@ -31,8 +31,8 @@ struct llvmpipe_context; -extern void -lp_init_vbuf(struct llvmpipe_context *llvmpipe); +extern struct vbuf_render * +lp_create_vbuf_backend(struct llvmpipe_context *llvmpipe); #endif /* LP_VBUF_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 0518927458..0fb133486a 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -27,6 +27,7 @@ #include "util/u_memory.h" +#include "util/u_format.h" #include "pipe/p_defines.h" #include "pipe/p_screen.h" @@ -131,17 +132,17 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen, { struct llvmpipe_screen *screen = llvmpipe_screen(_screen); struct llvmpipe_winsys *winsys = screen->winsys; + const struct util_format_description *format_desc; + + format_desc = util_format_description(format); + if(!format_desc) + return FALSE; assert(target == PIPE_TEXTURE_1D || target == PIPE_TEXTURE_2D || target == PIPE_TEXTURE_3D || target == PIPE_TEXTURE_CUBE); - if(format == PIPE_FORMAT_Z16_UNORM) - return FALSE; - if(format == PIPE_FORMAT_S8_UNORM) - return FALSE; - switch(format) { case PIPE_FORMAT_DXT1_RGB: case PIPE_FORMAT_DXT1_RGBA: @@ -152,8 +153,51 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen, break; } - if(tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET) - return winsys->is_displaytarget_format_supported(winsys, format); + if(tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { + if(format_desc->block.width != 1 || + format_desc->block.height != 1) + return FALSE; + + if(format_desc->layout != UTIL_FORMAT_LAYOUT_SCALAR && + format_desc->layout != UTIL_FORMAT_LAYOUT_ARITH && + format_desc->layout != UTIL_FORMAT_LAYOUT_ARRAY) + return FALSE; + + if(format_desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB && + format_desc->colorspace != UTIL_FORMAT_COLORSPACE_SRGB) + return FALSE; + } + + if(tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET) { + if(!winsys->is_displaytarget_format_supported(winsys, format)) + return FALSE; + } + + if(tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL) { + if(format_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) + return FALSE; + + /* FIXME: Temporary restriction. See lp_state_fs.c. */ + if(format_desc->block.bits != 32) + return FALSE; + } + + /* FIXME: Temporary restrictions. See lp_bld_sample_soa.c */ + if(tex_usage & PIPE_TEXTURE_USAGE_SAMPLER) { + if(format_desc->block.width != 1 || + format_desc->block.height != 1) + return FALSE; + + if(format_desc->layout != UTIL_FORMAT_LAYOUT_SCALAR && + format_desc->layout != UTIL_FORMAT_LAYOUT_ARITH && + format_desc->layout != UTIL_FORMAT_LAYOUT_ARRAY) + return FALSE; + + if(format_desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB && + format_desc->colorspace != UTIL_FORMAT_COLORSPACE_SRGB && + format_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) + return FALSE; + } return TRUE; } diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index c43b3da450..ffcbc9a379 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -33,7 +33,6 @@ */ #include "lp_context.h" -#include "lp_prim_setup.h" #include "lp_quad.h" #include "lp_setup.h" #include "lp_state.h" @@ -90,6 +89,8 @@ struct setup_context { float oneoverarea; int facing; + float pixel_offset; + struct quad_header quad[MAX_QUADS]; struct quad_header *quad_ptrs[MAX_QUADS]; unsigned count; @@ -483,6 +484,16 @@ static boolean setup_sort_vertices( struct setup_context *setup, ((det > 0.0) ^ (setup->llvmpipe->rasterizer->front_winding == PIPE_WINDING_CW)); + /* Prepare pixel offset for rasterisation: + * - pixel center (0.5, 0.5) for GL, or + * - assume (0.0, 0.0) for other APIs. + */ + if (setup->llvmpipe->rasterizer->gl_rasterization_rules) { + setup->pixel_offset = 0.5f; + } else { + setup->pixel_offset = 0.0f; + } + return TRUE; } @@ -508,7 +519,7 @@ static void tri_pos_coeff( struct setup_context *setup, /* calculate a0 as the value which would be sampled for the * fragment at (0,0), taking into account that we want to sample at - * pixel centers, in other words (0.5, 0.5). + * pixel centers, in other words (pixel_offset, pixel_offset). * * this is neat but unfortunately not a good way to do things for * triangles with very large values of dadx or dady as it will @@ -519,8 +530,8 @@ static void tri_pos_coeff( struct setup_context *setup, * instead - i'll switch to this later. */ setup->coef.a0[0][i] = (setup->vmin[vertSlot][i] - - (dadx * (setup->vmin[0][0] - 0.5f) + - dady * (setup->vmin[0][1] - 0.5f))); + (dadx * (setup->vmin[0][0] - setup->pixel_offset) + + dady * (setup->vmin[0][1] - setup->pixel_offset))); /* debug_printf("attr[%d].%c: %f dx:%f dy:%f\n", @@ -609,8 +620,8 @@ static void tri_linear_coeff( struct setup_context *setup, * instead - i'll switch to this later. */ setup->coef.a0[1 + attrib][i] = (setup->vmin[vertSlot][i] - - (dadx * (setup->vmin[0][0] - 0.5f) + - dady * (setup->vmin[0][1] - 0.5f))); + (dadx * (setup->vmin[0][0] - setup->pixel_offset) + + dady * (setup->vmin[0][1] - setup->pixel_offset))); /* debug_printf("attr[%d].%c: %f dx:%f dy:%f\n", @@ -661,8 +672,8 @@ static void tri_persp_coeff( struct setup_context *setup, setup->coef.dadx[1 + attrib][i] = dadx; setup->coef.dady[1 + attrib][i] = dady; setup->coef.a0[1 + attrib][i] = (mina - - (dadx * (setup->vmin[0][0] - 0.5f) + - dady * (setup->vmin[0][1] - 0.5f))); + (dadx * (setup->vmin[0][0] - setup->pixel_offset) + + dady * (setup->vmin[0][1] - setup->pixel_offset))); } } @@ -746,12 +757,12 @@ static void setup_tri_coefficients( struct setup_context *setup ) static void setup_tri_edges( struct setup_context *setup ) { - float vmin_x = setup->vmin[0][0] + 0.5f; - float vmid_x = setup->vmid[0][0] + 0.5f; + float vmin_x = setup->vmin[0][0] + setup->pixel_offset; + float vmid_x = setup->vmid[0][0] + setup->pixel_offset; - float vmin_y = setup->vmin[0][1] - 0.5f; - float vmid_y = setup->vmid[0][1] - 0.5f; - float vmax_y = setup->vmax[0][1] - 0.5f; + float vmin_y = setup->vmin[0][1] - setup->pixel_offset; + float vmid_y = setup->vmid[0][1] - setup->pixel_offset; + float vmax_y = setup->vmax[0][1] - setup->pixel_offset; setup->emaj.sy = ceilf(vmin_y); setup->emaj.lines = (int) ceilf(vmax_y - setup->emaj.sy); @@ -950,8 +961,8 @@ linear_pos_coeff(struct setup_context *setup, setup->coef.dadx[0][i] = dadx; setup->coef.dady[0][i] = dady; setup->coef.a0[0][i] = (setup->vmin[vertSlot][i] - - (dadx * (setup->vmin[0][0] - 0.5f) + - dady * (setup->vmin[0][1] - 0.5f))); + (dadx * (setup->vmin[0][0] - setup->pixel_offset) + + dady * (setup->vmin[0][1] - setup->pixel_offset))); } @@ -972,8 +983,8 @@ line_linear_coeff(struct setup_context *setup, setup->coef.dadx[1 + attrib][i] = dadx; setup->coef.dady[1 + attrib][i] = dady; setup->coef.a0[1 + attrib][i] = (setup->vmin[vertSlot][i] - - (dadx * (setup->vmin[0][0] - 0.5f) + - dady * (setup->vmin[0][1] - 0.5f))); + (dadx * (setup->vmin[0][0] - setup->pixel_offset) + + dady * (setup->vmin[0][1] - setup->pixel_offset))); } } @@ -998,8 +1009,8 @@ line_persp_coeff(struct setup_context *setup, setup->coef.dadx[1 + attrib][i] = dadx; setup->coef.dady[1 + attrib][i] = dady; setup->coef.a0[1 + attrib][i] = (setup->vmin[vertSlot][i] - - (dadx * (setup->vmin[0][0] - 0.5f) + - dady * (setup->vmin[0][1] - 0.5f))); + (dadx * (setup->vmin[0][0] - setup->pixel_offset) + + dady * (setup->vmin[0][1] - setup->pixel_offset))); } } diff --git a/src/gallium/drivers/llvmpipe/lp_state_blend.c b/src/gallium/drivers/llvmpipe/lp_state_blend.c index 3f03bd0057..b2e75d3b14 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_state_blend.c @@ -76,7 +76,7 @@ void llvmpipe_set_blend_color( struct pipe_context *pipe, for (i = 0; i < 4; ++i) { uint8_t c = float_to_ubyte(blend_color->color[i]); for (j = 0; j < 16; ++j) - llvmpipe->jit_context.blend_color[i*4 + j] = c; + llvmpipe->jit_context.blend_color[i*16 + j] = c; } } diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index 30fb41ea65..c753b183c0 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -65,26 +65,19 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe) if (vinfo->num_attribs == 0) { /* compute vertex layout now */ const struct lp_fragment_shader *lpfs = llvmpipe->fs; - const enum interp_mode colorInterp - = llvmpipe->rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR; + struct vertex_info *vinfo_vbuf = &llvmpipe->vertex_info_vbuf; + const uint num = draw_num_vs_outputs(llvmpipe->draw); uint i; - if (llvmpipe->vbuf) { - /* if using the post-transform vertex buffer, tell draw_vbuf to - * simply emit the whole post-xform vertex as-is: - */ - struct vertex_info *vinfo_vbuf = &llvmpipe->vertex_info_vbuf; - const uint num = draw_num_vs_outputs(llvmpipe->draw); - uint i; - - /* No longer any need to try and emit draw vertex_header info. - */ - vinfo_vbuf->num_attribs = 0; - for (i = 0; i < num; i++) { - draw_emit_vertex_attr(vinfo_vbuf, EMIT_4F, INTERP_PERSPECTIVE, i); - } - draw_compute_vertex_size(vinfo_vbuf); + /* Tell draw_vbuf to simply emit the whole post-xform vertex + * as-is. No longer any need to try and emit draw vertex_header + * info. + */ + vinfo_vbuf->num_attribs = 0; + for (i = 0; i < num; i++) { + draw_emit_vertex_attr(vinfo_vbuf, EMIT_4F, INTERP_PERSPECTIVE, i); } + draw_compute_vertex_size(vinfo_vbuf); /* * Loop over fragment shader inputs, searching for the matching output @@ -112,33 +105,21 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe) switch (lpfs->info.input_semantic_name[i]) { case TGSI_SEMANTIC_POSITION: - src = draw_find_vs_output(llvmpipe->draw, - TGSI_SEMANTIC_POSITION, 0); - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_POS, src); + interp = INTERP_POS; break; case TGSI_SEMANTIC_COLOR: - src = draw_find_vs_output(llvmpipe->draw, TGSI_SEMANTIC_COLOR, - lpfs->info.input_semantic_index[i]); - draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); - break; - - case TGSI_SEMANTIC_FOG: - src = draw_find_vs_output(llvmpipe->draw, TGSI_SEMANTIC_FOG, 0); - draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src); - break; - - case TGSI_SEMANTIC_GENERIC: - case TGSI_SEMANTIC_FACE: - /* this includes texcoords and varying vars */ - src = draw_find_vs_output(llvmpipe->draw, TGSI_SEMANTIC_GENERIC, - lpfs->info.input_semantic_index[i]); - draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src); + if (llvmpipe->rasterizer->flatshade) { + interp = INTERP_CONSTANT; + } break; - - default: - assert(0); } + + /* this includes texcoords and varying vars */ + src = draw_find_vs_output(llvmpipe->draw, + lpfs->info.input_semantic_name[i], + lpfs->info.input_semantic_index[i]); + draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src); } llvmpipe->psize_slot = draw_find_vs_output(llvmpipe->draw, diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 7728ba6076..ee0f69b2af 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -148,6 +148,20 @@ generate_depth(LLVMBuilderRef builder, format_desc = util_format_description(key->zsbuf_format); assert(format_desc); + /* + * Depths are expected to be between 0 and 1, even if they are stored in + * floats. Setting these bits here will ensure that the lp_build_conv() call + * below won't try to unnecessarily clamp the incoming values. + */ + if(src_type.floating) { + src_type.sign = FALSE; + src_type.norm = TRUE; + } + else { + assert(!src_type.sign); + assert(src_type.norm); + } + /* Pick the depth type. */ dst_type = lp_depth_type(format_desc, src_type.width*src_type.length); @@ -155,14 +169,11 @@ generate_depth(LLVMBuilderRef builder, assert(dst_type.width == src_type.width); assert(dst_type.length == src_type.length); -#if 1 - src = lp_build_clamped_float_to_unsigned_norm(builder, - src_type, - dst_type.width, - src); -#else lp_build_conv(builder, src_type, dst_type, &src, 1, &src, 1); -#endif + + dst_ptr = LLVMBuildBitCast(builder, + dst_ptr, + LLVMPointerType(lp_build_vec_type(dst_type), 0), ""); lp_build_depth_test(builder, &key->depth, @@ -400,6 +411,7 @@ generate_fragment(struct llvmpipe_context *lp, #ifdef DEBUG tgsi_dump(shader->base.tokens, 0); if(key->depth.enabled) { + debug_printf("depth.format = %s\n", pf_name(key->zsbuf_format)); debug_printf("depth.func = %s\n", debug_dump_func(key->depth.func, TRUE)); debug_printf("depth.writemask = %u\n", key->depth.writemask); } @@ -419,6 +431,34 @@ generate_fragment(struct llvmpipe_context *lp, debug_printf("alpha_dst_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_dst_factor, TRUE)); } debug_printf("blend.colormask = 0x%x\n", key->blend.colormask); + for(i = 0; i < PIPE_MAX_SAMPLERS; ++i) { + if(key->sampler[i].format) { + debug_printf("sampler[%u] = \n", i); + debug_printf(" .format = %s\n", + pf_name(key->sampler[i].format)); + debug_printf(" .target = %s\n", + debug_dump_tex_target(key->sampler[i].target, TRUE)); + debug_printf(" .pot = %u %u %u\n", + key->sampler[i].pot_width, + key->sampler[i].pot_height, + key->sampler[i].pot_depth); + debug_printf(" .wrap = %s %s %s\n", + debug_dump_tex_wrap(key->sampler[i].wrap_s, TRUE), + debug_dump_tex_wrap(key->sampler[i].wrap_t, TRUE), + debug_dump_tex_wrap(key->sampler[i].wrap_r, TRUE)); + debug_printf(" .min_img_filter = %s\n", + debug_dump_tex_filter(key->sampler[i].min_img_filter, TRUE)); + debug_printf(" .min_mip_filter = %s\n", + debug_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE)); + debug_printf(" .mag_img_filter = %s\n", + debug_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE)); + if(key->sampler[i].compare_mode) + debug_printf(" .compare_mode = %s\n", debug_dump_func(key->sampler[i].compare_func, TRUE)); + debug_printf(" .normalized_coords = %u\n", key->sampler[i].normalized_coords); + debug_printf(" .prefilter = %u\n", key->sampler[i].prefilter); + } + } + #endif variant = CALLOC_STRUCT(lp_fragment_shader_variant); @@ -582,10 +622,12 @@ generate_fragment(struct llvmpipe_context *lp, * Translate the LLVM IR into machine code. */ +#ifdef DEBUG if(LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) { LLVMDumpValue(variant->function); - abort(); + assert(0); } +#endif LLVMRunFunctionPassManager(screen->pass, variant->function); diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c index c69d90c723..8333805a3f 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c +++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c @@ -102,8 +102,8 @@ llvmpipe_set_sampler_textures(struct pipe_context *pipe, if(tex) { struct llvmpipe_texture *lp_tex = llvmpipe_texture(tex); struct lp_jit_texture *jit_tex = &llvmpipe->jit_context.textures[i]; - jit_tex->width = tex->width[0]; - jit_tex->height = tex->height[0]; + jit_tex->width = tex->width0; + jit_tex->height = tex->height0; jit_tex->stride = lp_tex->stride[0]; if(!lp_tex->dt) jit_tex->data = lp_tex->data; diff --git a/src/gallium/drivers/llvmpipe/lp_state_vs.c b/src/gallium/drivers/llvmpipe/lp_state_vs.c index 15c3029614..8a761648e7 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_vs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_vs.c @@ -92,5 +92,6 @@ llvmpipe_delete_vs_state(struct pipe_context *pipe, void *vs) (struct lp_vertex_shader *)vs; draw_delete_vertex_shader(llvmpipe->draw, state->draw_data); + FREE( (void *)state->shader.tokens ); FREE( state ); } diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c index e3af81cffb..29fff91981 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c @@ -462,6 +462,7 @@ compute_blend_ref(const struct pipe_blend_state *blend, } +ALIGN_STACK static boolean test_one(unsigned verbose, FILE *fp, @@ -530,11 +531,11 @@ test_one(unsigned verbose, success = TRUE; for(i = 0; i < n && success; ++i) { if(mode == AoS) { - uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; - uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; - uint8_t con[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; - uint8_t res[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; - uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; + ALIGN16_ATTRIB uint8_t src[LP_NATIVE_VECTOR_WIDTH/8]; + ALIGN16_ATTRIB uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8]; + ALIGN16_ATTRIB uint8_t con[LP_NATIVE_VECTOR_WIDTH/8]; + ALIGN16_ATTRIB uint8_t res[LP_NATIVE_VECTOR_WIDTH/8]; + ALIGN16_ATTRIB uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8]; int64_t start_counter = 0; int64_t end_counter = 0; @@ -595,11 +596,11 @@ test_one(unsigned verbose, if(mode == SoA) { const unsigned stride = type.length*type.width/8; - uint8_t src[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; - uint8_t dst[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; - uint8_t con[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; - uint8_t res[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; - uint8_t ref[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; + ALIGN16_ATTRIB uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8]; + ALIGN16_ATTRIB uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8]; + ALIGN16_ATTRIB uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8]; + ALIGN16_ATTRIB uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8]; + ALIGN16_ATTRIB uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8]; int64_t start_counter = 0; int64_t end_counter = 0; boolean mismatch; diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c index ac2a6d05e3..968c7a2d4a 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_conv.c +++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c @@ -142,6 +142,7 @@ add_conv_test(LLVMModuleRef module, } +ALIGN_STACK static boolean test_one(unsigned verbose, FILE *fp, @@ -229,8 +230,8 @@ test_one(unsigned verbose, for(i = 0; i < n && success; ++i) { unsigned src_stride = src_type.length*src_type.width/8; unsigned dst_stride = dst_type.length*dst_type.width/8; - uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; - uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; + ALIGN16_ATTRIB uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; + ALIGN16_ATTRIB uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; double fref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; int64_t start_counter = 0; diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c index 5dc8297fe9..23ea9ebbe7 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_format.c +++ b/src/gallium/drivers/llvmpipe/lp_test_format.c @@ -39,6 +39,7 @@ #include "util/u_format.h" #include "lp_bld_format.h" +#include "lp_test.h" struct pixel_test_case @@ -89,34 +90,62 @@ struct pixel_test_case test_cases[] = }; -typedef void (*load_ptr_t)(const void *, float *); +void +write_tsv_header(FILE *fp) +{ + fprintf(fp, + "result\t" + "format\n"); + + fflush(fp); +} + + +static void +write_tsv_row(FILE *fp, + const struct util_format_description *desc, + boolean success) +{ + fprintf(fp, "%s\t", success ? "pass" : "fail"); + + fprintf(fp, "%s\n", desc->name); + + fflush(fp); +} + + +typedef void (*load_ptr_t)(const uint32_t packed, float *); static LLVMValueRef add_load_rgba_test(LLVMModuleRef module, - enum pipe_format format) + const struct util_format_description *desc) { LLVMTypeRef args[2]; LLVMValueRef func; - LLVMValueRef ptr; + LLVMValueRef packed; LLVMValueRef rgba_ptr; LLVMBasicBlockRef block; LLVMBuilderRef builder; LLVMValueRef rgba; - args[0] = LLVMPointerType(LLVMInt8Type(), 0); + args[0] = LLVMInt32Type(); args[1] = LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0); func = LLVMAddFunction(module, "load", LLVMFunctionType(LLVMVoidType(), args, 2, 0)); LLVMSetFunctionCallConv(func, LLVMCCallConv); - ptr = LLVMGetParam(func, 0); + packed = LLVMGetParam(func, 0); rgba_ptr = LLVMGetParam(func, 1); block = LLVMAppendBasicBlock(func, "entry"); builder = LLVMCreateBuilder(); LLVMPositionBuilderAtEnd(builder, block); - rgba = lp_build_load_rgba_aos(builder, format, ptr); + if(desc->block.bits < 32) + packed = LLVMBuildTrunc(builder, packed, LLVMIntType(desc->block.bits), ""); + + rgba = lp_build_unpack_rgba_aos(builder, desc, packed); + LLVMBuildStore(builder, rgba, rgba_ptr); LLVMBuildRetVoid(builder); @@ -126,27 +155,28 @@ add_load_rgba_test(LLVMModuleRef module, } -typedef void (*store_ptr_t)(void *, const float *); +typedef void (*store_ptr_t)(uint32_t *, const float *); static LLVMValueRef add_store_rgba_test(LLVMModuleRef module, - enum pipe_format format) + const struct util_format_description *desc) { LLVMTypeRef args[2]; LLVMValueRef func; - LLVMValueRef ptr; + LLVMValueRef packed_ptr; LLVMValueRef rgba_ptr; LLVMBasicBlockRef block; LLVMBuilderRef builder; LLVMValueRef rgba; + LLVMValueRef packed; - args[0] = LLVMPointerType(LLVMInt8Type(), 0); + args[0] = LLVMPointerType(LLVMInt32Type(), 0); args[1] = LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0); func = LLVMAddFunction(module, "store", LLVMFunctionType(LLVMVoidType(), args, 2, 0)); LLVMSetFunctionCallConv(func, LLVMCCallConv); - ptr = LLVMGetParam(func, 0); + packed_ptr = LLVMGetParam(func, 0); rgba_ptr = LLVMGetParam(func, 1); block = LLVMAppendBasicBlock(func, "entry"); @@ -155,7 +185,12 @@ add_store_rgba_test(LLVMModuleRef module, rgba = LLVMBuildLoad(builder, rgba_ptr, ""); - lp_build_store_rgba_aos(builder, format, ptr, rgba); + packed = lp_build_pack_rgba_aos(builder, desc, rgba); + + if(desc->block.bits < 32) + packed = LLVMBuildZExt(builder, packed, LLVMInt32Type(), ""); + + LLVMBuildStore(builder, packed, packed_ptr); LLVMBuildRetVoid(builder); @@ -164,8 +199,9 @@ add_store_rgba_test(LLVMModuleRef module, } +ALIGN_STACK static boolean -test_format(const struct pixel_test_case *test) +test_format(unsigned verbose, FILE *fp, const struct pixel_test_case *test) { LLVMModuleRef module = NULL; LLVMValueRef load = NULL; @@ -187,8 +223,8 @@ test_format(const struct pixel_test_case *test) module = LLVMModuleCreateWithName("test"); - load = add_load_rgba_test(module, test->format); - store = add_store_rgba_test(module, test->format); + load = add_load_rgba_test(module, desc); + store = add_store_rgba_test(module, desc); if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) { LLVMDumpModule(module); @@ -224,7 +260,7 @@ test_format(const struct pixel_test_case *test) memset(unpacked, 0, sizeof unpacked); packed = 0; - load_ptr(&test->packed, unpacked); + load_ptr(test->packed, unpacked); store_ptr(&packed, unpacked); success = TRUE; @@ -250,25 +286,29 @@ test_format(const struct pixel_test_case *test) if(pass) LLVMDisposePassManager(pass); + if(fp) + write_tsv_row(fp, desc, success); + return success; } -int main(int argc, char **argv) +boolean +test_all(unsigned verbose, FILE *fp) { unsigned i; - int ret; + bool success = TRUE; -#ifdef LLVM_NATIVE_ARCH - LLVMLinkInJIT(); - LLVMInitializeNativeTarget(); -#endif + for (i = 0; i < sizeof(test_cases)/sizeof(test_cases[0]); ++i) + if(!test_format(verbose, fp, &test_cases[i])) + success = FALSE; - util_cpu_detect(); + return success; +} - for (i = 0; i < sizeof(test_cases)/sizeof(test_cases[0]); ++i) - if(!test_format(&test_cases[i])) - ret = 1; - return ret; +boolean +test_some(unsigned verbose, FILE *fp, unsigned long n) +{ + return test_all(verbose, fp); } diff --git a/src/gallium/drivers/llvmpipe/lp_test_main.c b/src/gallium/drivers/llvmpipe/lp_test_main.c index d4767ff52b..314544aa9a 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_main.c +++ b/src/gallium/drivers/llvmpipe/lp_test_main.c @@ -37,9 +37,22 @@ #include "util/u_cpu_detect.h" #include "lp_bld_const.h" +#include "lp_bld_misc.h" #include "lp_test.h" +#ifdef PIPE_CC_MSVC +static INLINE double +round(double x) +{ + if (x >= 0.0) + return floor(x + 0.5); + else + return ceil(x - 0.5); +} +#endif + + void dump_type(FILE *fp, struct lp_type type) @@ -367,10 +380,8 @@ int main(int argc, char **argv) n = atoi(argv[i]); } -#ifdef LLVM_NATIVE_ARCH LLVMLinkInJIT(); LLVMInitializeNativeTarget(); -#endif util_cpu_detect(); diff --git a/src/gallium/drivers/llvmpipe/lp_tex_cache.c b/src/gallium/drivers/llvmpipe/lp_tex_cache.c index 773e848242..c7c4143bc6 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_cache.c +++ b/src/gallium/drivers/llvmpipe/lp_tex_cache.c @@ -36,6 +36,7 @@ #include "util/u_memory.h" #include "util/u_tile.h" #include "util/u_format.h" +#include "util/u_math.h" #include "lp_context.h" #include "lp_surface.h" #include "lp_texture.h" @@ -270,8 +271,8 @@ lp_find_cached_tex_tile(struct llvmpipe_tex_tile_cache *tc, addr.bits.level, addr.bits.z, PIPE_TRANSFER_READ, 0, 0, - tc->texture->width[addr.bits.level], - tc->texture->height[addr.bits.level]); + u_minify(tc->texture->width0, addr.bits.level), + u_minify(tc->texture->height0, addr.bits.level)); tc->tex_trans_map = screen->transfer_map(screen, tc->tex_trans); diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c index a1365a045f..0d01c07fb5 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c @@ -544,7 +544,7 @@ compute_lambda(struct tgsi_sampler *tgsi_sampler, float dsdy = s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]; dsdx = fabsf(dsdx); dsdy = fabsf(dsdy); - rho = MAX2(dsdx, dsdy) * texture->width[0]; + rho = MAX2(dsdx, dsdy) * texture->width0; } if (t) { float dtdx = t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]; @@ -552,7 +552,7 @@ compute_lambda(struct tgsi_sampler *tgsi_sampler, float max; dtdx = fabsf(dtdx); dtdy = fabsf(dtdy); - max = MAX2(dtdx, dtdy) * texture->height[0]; + max = MAX2(dtdx, dtdy) * texture->height0; rho = MAX2(rho, max); } if (p) { @@ -561,7 +561,7 @@ compute_lambda(struct tgsi_sampler *tgsi_sampler, float max; dpdx = fabsf(dpdx); dpdy = fabsf(dpdy); - max = MAX2(dpdx, dpdy) * texture->depth[0]; + max = MAX2(dpdx, dpdy) * texture->depth0; rho = MAX2(rho, max); } @@ -726,9 +726,9 @@ get_texel(const struct tgsi_sampler *tgsi_sampler, const struct pipe_texture *texture = samp->texture; const struct pipe_sampler_state *sampler = samp->sampler; - if (x < 0 || x >= (int) texture->width[level] || - y < 0 || y >= (int) texture->height[level] || - z < 0 || z >= (int) texture->depth[level]) { + if (x < 0 || x >= (int) u_minify(texture->width0, level) || + y < 0 || y >= (int) u_minify(texture->height0, level) || + z < 0 || z >= (int) u_minify(texture->depth0, level)) { rgba[0][j] = sampler->border_color[0]; rgba[1][j] = sampler->border_color[1]; rgba[2][j] = sampler->border_color[2]; @@ -1093,8 +1093,8 @@ lp_get_samples_2d_common(struct tgsi_sampler *tgsi_sampler, assert(sampler->normalized_coords); - width = texture->width[level0]; - height = texture->height[level0]; + width = u_minify(texture->width0, level0); + height = u_minify(texture->height0, level0); assert(width > 0); @@ -1250,9 +1250,9 @@ lp_get_samples_3d(struct tgsi_sampler *tgsi_sampler, assert(sampler->normalized_coords); - width = texture->width[level0]; - height = texture->height[level0]; - depth = texture->depth[level0]; + width = u_minify(texture->width0, level0); + height = u_minify(texture->height0, level0); + depth = u_minify(texture->depth0, level0); assert(width > 0); assert(height > 0); @@ -1394,8 +1394,8 @@ lp_get_samples_rect(struct tgsi_sampler *tgsi_sampler, /* texture RECTS cannot be mipmapped */ assert(level0 == level1); - width = texture->width[level0]; - height = texture->height[level0]; + width = u_minify(texture->width0, level0); + height = u_minify(texture->height0, level0); assert(width > 0); @@ -1513,8 +1513,8 @@ lp_get_samples(struct tgsi_sampler *tgsi_sampler, /* Do this elsewhere: */ - samp->xpot = util_unsigned_logbase2( samp->texture->width[0] ); - samp->ypot = util_unsigned_logbase2( samp->texture->height[0] ); + samp->xpot = util_unsigned_logbase2( samp->texture->width0 ); + samp->ypot = util_unsigned_logbase2( samp->texture->height0 ); /* Try to hook in a faster sampler. Ultimately we'll have to * code-generate these. Luckily most of this looks like it is diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c index a00f2495df..65d62fd072 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.c +++ b/src/gallium/drivers/llvmpipe/lp_texture.c @@ -57,9 +57,9 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen, { struct pipe_texture *pt = &lpt->base; unsigned level; - unsigned width = pt->width[0]; - unsigned height = pt->height[0]; - unsigned depth = pt->depth[0]; + unsigned width = pt->width0; + unsigned height = pt->height0; + unsigned depth = pt->depth0; unsigned buffer_size = 0; @@ -68,9 +68,6 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen, for (level = 0; level <= pt->last_level; level++) { unsigned nblocksx, nblocksy; - pt->width[level] = width; - pt->height[level] = height; - pt->depth[level] = depth; pt->nblocksx[level] = pf_get_nblocksx(&pt->block, width); pt->nblocksy[level] = pf_get_nblocksy(&pt->block, height); @@ -87,9 +84,9 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen, ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) * lpt->stride[level]); - width = minify(width); - height = minify(height); - depth = minify(depth); + width = u_minify(width, 1); + height = u_minify(height, 1); + depth = u_minify(depth, 1); } lpt->data = align_malloc(buffer_size, 16); @@ -104,13 +101,13 @@ llvmpipe_displaytarget_layout(struct llvmpipe_screen *screen, struct llvmpipe_winsys *winsys = screen->winsys; pf_get_block(lpt->base.format, &lpt->base.block); - lpt->base.nblocksx[0] = pf_get_nblocksx(&lpt->base.block, lpt->base.width[0]); - lpt->base.nblocksy[0] = pf_get_nblocksy(&lpt->base.block, lpt->base.height[0]); + lpt->base.nblocksx[0] = pf_get_nblocksx(&lpt->base.block, lpt->base.width0); + lpt->base.nblocksy[0] = pf_get_nblocksy(&lpt->base.block, lpt->base.height0); lpt->dt = winsys->displaytarget_create(winsys, lpt->base.format, - lpt->base.width[0], - lpt->base.height[0], + lpt->base.width0, + lpt->base.height0, 16, &lpt->stride[0] ); @@ -172,7 +169,7 @@ llvmpipe_texture_blanket(struct pipe_screen * screen, /* Only supports one type */ if (base->target != PIPE_TEXTURE_2D || base->last_level != 0 || - base->depth[0] != 1) { + base->depth0 != 1) { return NULL; } @@ -183,8 +180,8 @@ llvmpipe_texture_blanket(struct pipe_screen * screen, lpt->base = *base; pipe_reference_init(&lpt->base.reference, 1); lpt->base.screen = screen; - lpt->base.nblocksx[0] = pf_get_nblocksx(&lpt->base.block, lpt->base.width[0]); - lpt->base.nblocksy[0] = pf_get_nblocksy(&lpt->base.block, lpt->base.height[0]); + lpt->base.nblocksx[0] = pf_get_nblocksx(&lpt->base.block, lpt->base.width0); + lpt->base.nblocksy[0] = pf_get_nblocksy(&lpt->base.block, lpt->base.height0); lpt->stride[0] = stride[0]; pipe_buffer_reference(&lpt->buffer, buffer); @@ -229,8 +226,8 @@ llvmpipe_get_tex_surface(struct pipe_screen *screen, pipe_reference_init(&ps->reference, 1); pipe_texture_reference(&ps->texture, pt); ps->format = pt->format; - ps->width = pt->width[level]; - ps->height = pt->height[level]; + ps->width = u_minify(pt->width0, level); + ps->height = u_minify(pt->height0, level); ps->offset = lpt->level_offset[level]; ps->usage = usage; diff --git a/src/gallium/drivers/nv04/nv04_fragtex.c b/src/gallium/drivers/nv04/nv04_fragtex.c index 21f990fd53..0cce71ad1d 100644 --- a/src/gallium/drivers/nv04/nv04_fragtex.c +++ b/src/gallium/drivers/nv04/nv04_fragtex.c @@ -57,8 +57,8 @@ nv04_fragtex_build(struct nv04_context *nv04, int unit) | NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_FOH_CORNER | nv04_fragtex_format(pt->format) | ( (pt->last_level + 1) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_MIPMAP_LEVELS_SHIFT ) - | ( log2i(pt->width[0]) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_U_SHIFT ) - | ( log2i(pt->height[0]) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_V_SHIFT ) + | ( log2i(pt->width0) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_U_SHIFT ) + | ( log2i(pt->height0) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_V_SHIFT ) | NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE | NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_CLAMP_TO_EDGE ; diff --git a/src/gallium/drivers/nv04/nv04_miptree.c b/src/gallium/drivers/nv04/nv04_miptree.c index 93f752faec..4fd72c82e6 100644 --- a/src/gallium/drivers/nv04/nv04_miptree.c +++ b/src/gallium/drivers/nv04/nv04_miptree.c @@ -1,6 +1,7 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" +#include "util/u_math.h" #include "nv04_context.h" #include "nv04_screen.h" @@ -9,31 +10,29 @@ static void nv04_miptree_layout(struct nv04_miptree *nv04mt) { struct pipe_texture *pt = &nv04mt->base; - uint width = pt->width[0], height = pt->height[0]; + uint width = pt->width0, height = pt->height0; uint offset = 0; int nr_faces, l; nr_faces = 1; for (l = 0; l <= pt->last_level; l++) { - pt->width[l] = width; - pt->height[l] = height; pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width); pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height); - nv04mt->level[l].pitch = pt->width[0]; + nv04mt->level[l].pitch = pt->width0; nv04mt->level[l].pitch = (nv04mt->level[l].pitch + 63) & ~63; - width = MAX2(1, width >> 1); - height = MAX2(1, height >> 1); + width = u_minify(width, 1); + height = u_minify(height, 1); } for (l = 0; l <= pt->last_level; l++) { nv04mt->level[l].image_offset = CALLOC(nr_faces, sizeof(unsigned)); - offset += nv04mt->level[l].pitch * pt->height[l]; + offset += nv04mt->level[l].pitch * u_minify(pt->height0, l); } nv04mt->total_size = offset; @@ -75,7 +74,7 @@ nv04_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, /* Only supports 2D, non-mipmapped textures for the moment */ if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 || - pt->depth[0] != 1) + pt->depth0 != 1) return NULL; mt = CALLOC_STRUCT(nv04_miptree); @@ -120,8 +119,8 @@ nv04_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; pipe_texture_reference(&ns->base.texture, pt); ns->base.format = pt->format; - ns->base.width = pt->width[level]; - ns->base.height = pt->height[level]; + ns->base.width = u_minify(pt->width0, level); + ns->base.height = u_minify(pt->height0, level); ns->base.usage = flags; pipe_reference_init(&ns->base.reference, 1); ns->base.face = face; diff --git a/src/gallium/drivers/nv04/nv04_state.c b/src/gallium/drivers/nv04/nv04_state.c index d356ebd8b3..ef3005db5f 100644 --- a/src/gallium/drivers/nv04/nv04_state.c +++ b/src/gallium/drivers/nv04/nv04_state.c @@ -425,9 +425,9 @@ nv04_init_state_functions(struct nv04_context *nv04) nv04->pipe.delete_blend_state = nv04_blend_state_delete; nv04->pipe.create_sampler_state = nv04_sampler_state_create; - nv04->pipe.bind_sampler_states = nv04_sampler_state_bind; + nv04->pipe.bind_fragment_sampler_states = nv04_sampler_state_bind; nv04->pipe.delete_sampler_state = nv04_sampler_state_delete; - nv04->pipe.set_sampler_textures = nv04_set_sampler_texture; + nv04->pipe.set_fragment_sampler_textures = nv04_set_sampler_texture; nv04->pipe.create_rasterizer_state = nv04_rasterizer_state_create; nv04->pipe.bind_rasterizer_state = nv04_rasterizer_state_bind; diff --git a/src/gallium/drivers/nv04/nv04_surface_2d.c b/src/gallium/drivers/nv04/nv04_surface_2d.c index 8c7eb367e2..8be134b83d 100644 --- a/src/gallium/drivers/nv04/nv04_surface_2d.c +++ b/src/gallium/drivers/nv04/nv04_surface_2d.c @@ -42,6 +42,7 @@ nv04_rect_format(enum pipe_format format) case PIPE_FORMAT_A8L8_UNORM: case PIPE_FORMAT_Z16_UNORM: return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A16R5G6B5; + case PIPE_FORMAT_X8R8G8B8_UNORM: case PIPE_FORMAT_A8R8G8B8_UNORM: case PIPE_FORMAT_Z24S8_UNORM: case PIPE_FORMAT_Z24X8_UNORM: diff --git a/src/gallium/drivers/nv04/nv04_transfer.c b/src/gallium/drivers/nv04/nv04_transfer.c index 6618660743..e6456429f4 100644 --- a/src/gallium/drivers/nv04/nv04_transfer.c +++ b/src/gallium/drivers/nv04/nv04_transfer.c @@ -2,6 +2,7 @@ #include <pipe/p_defines.h> #include <pipe/p_inlines.h> #include <util/u_memory.h> +#include <util/u_math.h> #include <nouveau/nouveau_winsys.h> #include "nv04_context.h" #include "nv04_screen.h" @@ -20,9 +21,9 @@ nv04_compatible_transfer_tex(struct pipe_texture *pt, unsigned level, memset(template, 0, sizeof(struct pipe_texture)); template->target = pt->target; template->format = pt->format; - template->width[0] = pt->width[level]; - template->height[0] = pt->height[level]; - template->depth[0] = 1; + template->width0 = u_minify(pt->width0, level); + template->height0 = u_minify(pt->height0, level); + template->depth0 = 1; template->block = pt->block; template->nblocksx[0] = pt->nblocksx[level]; template->nblocksy[0] = pt->nblocksx[level]; diff --git a/src/gallium/drivers/nv10/nv10_context.c b/src/gallium/drivers/nv10/nv10_context.c index 933176fc32..65a22b175e 100644 --- a/src/gallium/drivers/nv10/nv10_context.c +++ b/src/gallium/drivers/nv10/nv10_context.c @@ -243,7 +243,7 @@ static void nv10_init_hwctx(struct nv10_context *nv10) OUT_RING (0.0); OUT_RINGf (16777216.0); - BEGIN_RING(celsius, NV10TCL_VIEWPORT_SCALE_X, 4); + BEGIN_RING(celsius, NV10TCL_VIEWPORT_TRANSLATE_X, 4); OUT_RINGf (-2048.0); OUT_RINGf (-2048.0); OUT_RINGf (16777215.0 * 0.5); diff --git a/src/gallium/drivers/nv10/nv10_fragtex.c b/src/gallium/drivers/nv10/nv10_fragtex.c index 27f2f87584..906fdfeeb9 100644 --- a/src/gallium/drivers/nv10/nv10_fragtex.c +++ b/src/gallium/drivers/nv10/nv10_fragtex.c @@ -62,9 +62,9 @@ nv10_fragtex_build(struct nv10_context *nv10, int unit) txf = tf->format << 8; txf |= (pt->last_level + 1) << 16; - txf |= log2i(pt->width[0]) << 20; - txf |= log2i(pt->height[0]) << 24; - txf |= log2i(pt->depth[0]) << 28; + txf |= log2i(pt->width0) << 20; + txf |= log2i(pt->height0) << 24; + txf |= log2i(pt->depth0) << 28; txf |= 8; switch (pt->target) { @@ -89,7 +89,7 @@ nv10_fragtex_build(struct nv10_context *nv10, int unit) OUT_RING (0x40000000); /* enable */ OUT_RING (txs); OUT_RING (ps->filt | 0x2000 /* magic */); - OUT_RING ((pt->width[0] << 16) | pt->height[0]); + OUT_RING ((pt->width0 << 16) | pt->height0); OUT_RING (ps->bcol); #endif } diff --git a/src/gallium/drivers/nv10/nv10_miptree.c b/src/gallium/drivers/nv10/nv10_miptree.c index 34e3c2ebd7..b2a6c59b74 100644 --- a/src/gallium/drivers/nv10/nv10_miptree.c +++ b/src/gallium/drivers/nv10/nv10_miptree.c @@ -1,6 +1,7 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" +#include "util/u_math.h" #include "nv10_context.h" #include "nv10_screen.h" @@ -10,7 +11,7 @@ nv10_miptree_layout(struct nv10_miptree *nv10mt) { struct pipe_texture *pt = &nv10mt->base; boolean swizzled = FALSE; - uint width = pt->width[0], height = pt->height[0]; + uint width = pt->width0, height = pt->height0; uint offset = 0; int nr_faces, l, f; @@ -21,8 +22,7 @@ nv10_miptree_layout(struct nv10_miptree *nv10mt) } for (l = 0; l <= pt->last_level; l++) { - pt->width[l] = width; - pt->height[l] = height; + pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width); pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height); @@ -35,15 +35,15 @@ nv10_miptree_layout(struct nv10_miptree *nv10mt) nv10mt->level[l].image_offset = CALLOC(nr_faces, sizeof(unsigned)); - width = MAX2(1, width >> 1); - height = MAX2(1, height >> 1); + width = u_minify(width, 1); + height = u_minify(height, 1); } for (f = 0; f < nr_faces; f++) { for (l = 0; l <= pt->last_level; l++) { nv10mt->level[l].image_offset[f] = offset; - offset += nv10mt->level[l].pitch * pt->height[l]; + offset += nv10mt->level[l].pitch * u_minify(pt->height0, l); } } @@ -58,7 +58,7 @@ nv10_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, /* Only supports 2D, non-mipmapped textures for the moment */ if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 || - pt->depth[0] != 1) + pt->depth0 != 1) return NULL; mt = CALLOC_STRUCT(nv10_miptree); @@ -133,8 +133,8 @@ nv10_miptree_surface_get(struct pipe_screen *screen, struct pipe_texture *pt, return NULL; pipe_texture_reference(&ns->base.texture, pt); ns->base.format = pt->format; - ns->base.width = pt->width[level]; - ns->base.height = pt->height[level]; + ns->base.width = u_minify(pt->width0, level); + ns->base.height = u_minify(pt->height0, level); ns->base.usage = flags; pipe_reference_init(&ns->base.reference, 1); ns->base.face = face; diff --git a/src/gallium/drivers/nv10/nv10_prim_vbuf.c b/src/gallium/drivers/nv10/nv10_prim_vbuf.c index 1806d5f8cc..7ba9777a22 100644 --- a/src/gallium/drivers/nv10/nv10_prim_vbuf.c +++ b/src/gallium/drivers/nv10/nv10_prim_vbuf.c @@ -69,9 +69,9 @@ void nv10_vtxbuf_bind( struct nv10_context* nv10 ) { int i; for(i = 0; i < 8; i++) { - BEGIN_RING(celsius, NV10TCL_VERTEX_ARRAY_ATTRIB_OFFSET(i), 1); + BEGIN_RING(celsius, NV10TCL_VTXBUF_ADDRESS(i), 1); OUT_RING(0/*nv10->vtxbuf*/); - BEGIN_RING(celsius, NV10TCL_VERTEX_ARRAY_ATTRIB_FORMAT(i) ,1); + BEGIN_RING(celsius, NV10TCL_VTXFMT(i), 1); OUT_RING(0/*XXX*/); } } diff --git a/src/gallium/drivers/nv10/nv10_state.c b/src/gallium/drivers/nv10/nv10_state.c index 9b38219b99..ffc6be3c40 100644 --- a/src/gallium/drivers/nv10/nv10_state.c +++ b/src/gallium/drivers/nv10/nv10_state.c @@ -553,9 +553,9 @@ nv10_init_state_functions(struct nv10_context *nv10) nv10->pipe.delete_blend_state = nv10_blend_state_delete; nv10->pipe.create_sampler_state = nv10_sampler_state_create; - nv10->pipe.bind_sampler_states = nv10_sampler_state_bind; + nv10->pipe.bind_fragment_sampler_states = nv10_sampler_state_bind; nv10->pipe.delete_sampler_state = nv10_sampler_state_delete; - nv10->pipe.set_sampler_textures = nv10_set_sampler_texture; + nv10->pipe.set_fragment_sampler_textures = nv10_set_sampler_texture; nv10->pipe.create_rasterizer_state = nv10_rasterizer_state_create; nv10->pipe.bind_rasterizer_state = nv10_rasterizer_state_bind; diff --git a/src/gallium/drivers/nv10/nv10_state_emit.c b/src/gallium/drivers/nv10/nv10_state_emit.c index d8691ef9c6..2577ab73b5 100644 --- a/src/gallium/drivers/nv10/nv10_state_emit.c +++ b/src/gallium/drivers/nv10/nv10_state_emit.c @@ -129,6 +129,9 @@ static void nv10_state_emit_framebuffer(struct nv10_context* nv10) rt_format = NV10TCL_RT_FORMAT_TYPE_LINEAR; switch (colour_format) { + case PIPE_FORMAT_X8R8G8B8_UNORM: + rt_format |= NV10TCL_RT_FORMAT_COLOR_X8R8G8B8; + break; case PIPE_FORMAT_A8R8G8B8_UNORM: case 0: rt_format |= NV10TCL_RT_FORMAT_COLOR_A8R8G8B8; diff --git a/src/gallium/drivers/nv10/nv10_transfer.c b/src/gallium/drivers/nv10/nv10_transfer.c index 8feb85e4bd..ec54297ab0 100644 --- a/src/gallium/drivers/nv10/nv10_transfer.c +++ b/src/gallium/drivers/nv10/nv10_transfer.c @@ -2,6 +2,7 @@ #include <pipe/p_defines.h> #include <pipe/p_inlines.h> #include <util/u_memory.h> +#include <util/u_math.h> #include <nouveau/nouveau_winsys.h> #include "nv10_context.h" #include "nv10_screen.h" @@ -20,9 +21,9 @@ nv10_compatible_transfer_tex(struct pipe_texture *pt, unsigned level, memset(template, 0, sizeof(struct pipe_texture)); template->target = pt->target; template->format = pt->format; - template->width[0] = pt->width[level]; - template->height[0] = pt->height[level]; - template->depth[0] = 1; + template->width0 = u_minify(pt->width0, level); + template->height0 = u_minify(pt->height0, level); + template->depth0 = 1; template->block = pt->block; template->nblocksx[0] = pt->nblocksx[level]; template->nblocksy[0] = pt->nblocksx[level]; diff --git a/src/gallium/drivers/nv20/nv20_context.c b/src/gallium/drivers/nv20/nv20_context.c index 9a48739661..276db8b57b 100644 --- a/src/gallium/drivers/nv20/nv20_context.c +++ b/src/gallium/drivers/nv20/nv20_context.c @@ -360,13 +360,13 @@ static void nv20_init_hwctx(struct nv20_context *nv20) OUT_RINGf (0.0); OUT_RINGf (16777216.0); /* [0, 1] scaled approx to [0, 2^24] */ - BEGIN_RING(kelvin, NV20TCL_VIEWPORT_SCALE0_X, 4); + BEGIN_RING(kelvin, NV20TCL_VIEWPORT_TRANSLATE_X, 4); OUT_RINGf (0.0); /* x-offset, w/2 + 1.031250 */ OUT_RINGf (0.0); /* y-offset, h/2 + 0.030762 */ OUT_RINGf (0.0); OUT_RINGf (16777215.0); - BEGIN_RING(kelvin, NV20TCL_VIEWPORT_SCALE1_X, 4); + BEGIN_RING(kelvin, NV20TCL_VIEWPORT_SCALE_X, 4); OUT_RINGf (0.0); /* no effect?, w/2 */ OUT_RINGf (0.0); /* no effect?, h/2 */ OUT_RINGf (16777215.0 * 0.5); diff --git a/src/gallium/drivers/nv20/nv20_fragtex.c b/src/gallium/drivers/nv20/nv20_fragtex.c index 495a7be912..2db4a4015a 100644 --- a/src/gallium/drivers/nv20/nv20_fragtex.c +++ b/src/gallium/drivers/nv20/nv20_fragtex.c @@ -62,9 +62,9 @@ nv20_fragtex_build(struct nv20_context *nv20, int unit) txf = tf->format << 8; txf |= (pt->last_level + 1) << 16; - txf |= log2i(pt->width[0]) << 20; - txf |= log2i(pt->height[0]) << 24; - txf |= log2i(pt->depth[0]) << 28; + txf |= log2i(pt->width0) << 20; + txf |= log2i(pt->height0) << 24; + txf |= log2i(pt->depth0) << 28; txf |= 8; switch (pt->target) { @@ -89,7 +89,7 @@ nv20_fragtex_build(struct nv20_context *nv20, int unit) OUT_RING (0x40000000); /* enable */ OUT_RING (txs); OUT_RING (ps->filt | 0x2000 /* magic */); - OUT_RING ((pt->width[0] << 16) | pt->height[0]); + OUT_RING ((pt->width0 << 16) | pt->height0); OUT_RING (ps->bcol); #endif } diff --git a/src/gallium/drivers/nv20/nv20_miptree.c b/src/gallium/drivers/nv20/nv20_miptree.c index 185fbf53e0..554e28e47d 100644 --- a/src/gallium/drivers/nv20/nv20_miptree.c +++ b/src/gallium/drivers/nv20/nv20_miptree.c @@ -1,6 +1,7 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" +#include "util/u_math.h" #include "nv20_context.h" #include "nv20_screen.h" @@ -9,7 +10,7 @@ static void nv20_miptree_layout(struct nv20_miptree *nv20mt) { struct pipe_texture *pt = &nv20mt->base; - uint width = pt->width[0], height = pt->height[0]; + uint width = pt->width0, height = pt->height0; uint offset = 0; int nr_faces, l, f; uint wide_pitch = pt->tex_usage & (PIPE_TEXTURE_USAGE_SAMPLER | @@ -25,21 +26,19 @@ nv20_miptree_layout(struct nv20_miptree *nv20mt) } for (l = 0; l <= pt->last_level; l++) { - pt->width[l] = width; - pt->height[l] = height; pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width); pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height); if (wide_pitch && (pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) - nv20mt->level[l].pitch = align(pt->width[0] * pt->block.size, 64); + nv20mt->level[l].pitch = align(pt->width0 * pt->block.size, 64); else - nv20mt->level[l].pitch = pt->width[l] * pt->block.size; + nv20mt->level[l].pitch = u_minify(pt->width0, l) * pt->block.size; nv20mt->level[l].image_offset = CALLOC(nr_faces, sizeof(unsigned)); - width = MAX2(1, width >> 1); - height = MAX2(1, height >> 1); + width = u_minify(width, 1); + height = u_minify(height, 1); } for (f = 0; f < nr_faces; f++) { @@ -47,14 +46,14 @@ nv20_miptree_layout(struct nv20_miptree *nv20mt) nv20mt->level[l].image_offset[f] = offset; if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR) && - pt->width[l + 1] > 1 && pt->height[l + 1] > 1) - offset += align(nv20mt->level[l].pitch * pt->height[l], 64); + u_minify(pt->width0, l + 1) > 1 && u_minify(pt->height0, l + 1) > 1) + offset += align(nv20mt->level[l].pitch * u_minify(pt->height0, l), 64); else - offset += nv20mt->level[l].pitch * pt->height[l]; + offset += nv20mt->level[l].pitch * u_minify(pt->height0, l); } nv20mt->level[l].image_offset[f] = offset; - offset += nv20mt->level[l].pitch * pt->height[l]; + offset += nv20mt->level[l].pitch * u_minify(pt->height0, l); } nv20mt->total_size = offset; @@ -68,7 +67,7 @@ nv20_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, /* Only supports 2D, non-mipmapped textures for the moment */ if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 || - pt->depth[0] != 1) + pt->depth0 != 1) return NULL; mt = CALLOC_STRUCT(nv20_miptree); @@ -100,8 +99,8 @@ nv20_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt) mt->base.screen = screen; /* Swizzled textures must be POT */ - if (pt->width[0] & (pt->width[0] - 1) || - pt->height[0] & (pt->height[0] - 1)) + if (pt->width0 & (pt->width0 - 1) || + pt->height0 & (pt->height0 - 1)) mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; else if (pt->tex_usage & (PIPE_TEXTURE_USAGE_PRIMARY | @@ -167,8 +166,8 @@ nv20_miptree_surface_get(struct pipe_screen *screen, struct pipe_texture *pt, return NULL; pipe_texture_reference(&ns->base.texture, pt); ns->base.format = pt->format; - ns->base.width = pt->width[level]; - ns->base.height = pt->height[level]; + ns->base.width = u_minify(pt->width0, level); + ns->base.height = u_minify(pt->height0, level); ns->base.usage = flags; pipe_reference_init(&ns->base.reference, 1); ns->base.face = face; diff --git a/src/gallium/drivers/nv20/nv20_state.c b/src/gallium/drivers/nv20/nv20_state.c index ed4084980f..3a82e63423 100644 --- a/src/gallium/drivers/nv20/nv20_state.c +++ b/src/gallium/drivers/nv20/nv20_state.c @@ -546,9 +546,9 @@ nv20_init_state_functions(struct nv20_context *nv20) nv20->pipe.delete_blend_state = nv20_blend_state_delete; nv20->pipe.create_sampler_state = nv20_sampler_state_create; - nv20->pipe.bind_sampler_states = nv20_sampler_state_bind; + nv20->pipe.bind_fragment_sampler_states = nv20_sampler_state_bind; nv20->pipe.delete_sampler_state = nv20_sampler_state_delete; - nv20->pipe.set_sampler_textures = nv20_set_sampler_texture; + nv20->pipe.set_fragment_sampler_textures = nv20_set_sampler_texture; nv20->pipe.create_rasterizer_state = nv20_rasterizer_state_create; nv20->pipe.bind_rasterizer_state = nv20_rasterizer_state_bind; diff --git a/src/gallium/drivers/nv20/nv20_state_emit.c b/src/gallium/drivers/nv20/nv20_state_emit.c index 4042f46d05..0122b1c2cd 100644 --- a/src/gallium/drivers/nv20/nv20_state_emit.c +++ b/src/gallium/drivers/nv20/nv20_state_emit.c @@ -135,6 +135,9 @@ static void nv20_state_emit_framebuffer(struct nv20_context* nv20) rt_format = NV20TCL_RT_FORMAT_TYPE_LINEAR | 0x20; switch (colour_format) { + case PIPE_FORMAT_X8R8G8B8_UNORM: + rt_format |= NV20TCL_RT_FORMAT_COLOR_X8R8G8B8; + break; case PIPE_FORMAT_A8R8G8B8_UNORM: case 0: rt_format |= NV20TCL_RT_FORMAT_COLOR_A8R8G8B8; diff --git a/src/gallium/drivers/nv20/nv20_transfer.c b/src/gallium/drivers/nv20/nv20_transfer.c index 81b4f1a917..87b5c14a3c 100644 --- a/src/gallium/drivers/nv20/nv20_transfer.c +++ b/src/gallium/drivers/nv20/nv20_transfer.c @@ -2,6 +2,7 @@ #include <pipe/p_defines.h> #include <pipe/p_inlines.h> #include <util/u_memory.h> +#include <util/u_math.h> #include <nouveau/nouveau_winsys.h> #include "nv20_context.h" #include "nv20_screen.h" @@ -20,9 +21,9 @@ nv20_compatible_transfer_tex(struct pipe_texture *pt, unsigned level, memset(template, 0, sizeof(struct pipe_texture)); template->target = pt->target; template->format = pt->format; - template->width[0] = pt->width[level]; - template->height[0] = pt->height[level]; - template->depth[0] = 1; + template->width0 = u_minify(pt->width0, level); + template->height0 = u_minify(pt->height0, level); + template->depth0 = 1; template->block = pt->block; template->nblocksx[0] = pt->nblocksx[level]; template->nblocksy[0] = pt->nblocksx[level]; diff --git a/src/gallium/drivers/nv20/nv20_vertprog.c b/src/gallium/drivers/nv20/nv20_vertprog.c index 388245ecb0..9e8aab9754 100644 --- a/src/gallium/drivers/nv20/nv20_vertprog.c +++ b/src/gallium/drivers/nv20/nv20_vertprog.c @@ -253,32 +253,32 @@ static INLINE struct nv20_sreg tgsi_src(struct nv20_vpc *vpc, const struct tgsi_full_src_register *fsrc) { struct nv20_sreg src; - switch (fsrc->SrcRegister.File) { + switch (fsrc->Register.File) { case TGSI_FILE_INPUT: - src = nv20_sr(NV30SR_INPUT, fsrc->SrcRegister.Index); + src = nv20_sr(NV30SR_INPUT, fsrc->Register.Index); break; case TGSI_FILE_CONSTANT: - src = constant(vpc, fsrc->SrcRegister.Index, 0, 0, 0, 0); + src = constant(vpc, fsrc->Register.Index, 0, 0, 0, 0); break; case TGSI_FILE_IMMEDIATE: - src = vpc->imm[fsrc->SrcRegister.Index]; + src = vpc->imm[fsrc->Register.Index]; break; case TGSI_FILE_TEMPORARY: - if (vpc->high_temp < fsrc->SrcRegister.Index) - vpc->high_temp = fsrc->SrcRegister.Index; - src = nv20_sr(NV30SR_TEMP, fsrc->SrcRegister.Index); + if (vpc->high_temp < fsrc->Register.Index) + vpc->high_temp = fsrc->Register.Index; + src = nv20_sr(NV30SR_TEMP, fsrc->Register.Index); break; default: NOUVEAU_ERR("bad src file\n"); break; } - src.abs = fsrc->SrcRegisterExtMod.Absolute; - src.negate = fsrc->SrcRegister.Negate; - src.swz[0] = fsrc->SrcRegister.SwizzleX; - src.swz[1] = fsrc->SrcRegister.SwizzleY; - src.swz[2] = fsrc->SrcRegister.SwizzleZ; - src.swz[3] = fsrc->SrcRegister.SwizzleW; + src.abs = fsrc->Register.Absolute; + src.negate = fsrc->Register.Negate; + src.swz[0] = fsrc->Register.SwizzleX; + src.swz[1] = fsrc->Register.SwizzleY; + src.swz[2] = fsrc->Register.SwizzleZ; + src.swz[3] = fsrc->Register.SwizzleW; return src; } @@ -286,14 +286,14 @@ static INLINE struct nv20_sreg tgsi_dst(struct nv20_vpc *vpc, const struct tgsi_full_dst_register *fdst) { struct nv20_sreg dst; - switch (fdst->DstRegister.File) { + switch (fdst->Register.File) { case TGSI_FILE_OUTPUT: dst = nv20_sr(NV30SR_OUTPUT, - vpc->output_map[fdst->DstRegister.Index]); + vpc->output_map[fdst->Register.Index]); break; case TGSI_FILE_TEMPORARY: - dst = nv20_sr(NV30SR_TEMP, fdst->DstRegister.Index); + dst = nv20_sr(NV30SR_TEMP, fdst->Register.Index); if (vpc->high_temp < dst.index) vpc->high_temp = dst.index; break; @@ -334,8 +334,8 @@ nv20_vertprog_parse_instruction(struct nv20_vpc *vpc, for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *fsrc; - fsrc = &finst->FullSrcRegisters[i]; - if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) { + fsrc = &finst->Src[i]; + if (fsrc->Register.File == TGSI_FILE_TEMPORARY) { src[i] = tgsi_src(vpc, fsrc); } } @@ -343,11 +343,11 @@ nv20_vertprog_parse_instruction(struct nv20_vpc *vpc, for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *fsrc; - fsrc = &finst->FullSrcRegisters[i]; - switch (fsrc->SrcRegister.File) { + fsrc = &finst->Src[i]; + switch (fsrc->Register.File) { case TGSI_FILE_INPUT: - if (ai == -1 || ai == fsrc->SrcRegister.Index) { - ai = fsrc->SrcRegister.Index; + if (ai == -1 || ai == fsrc->Register.Index) { + ai = fsrc->Register.Index; src[i] = tgsi_src(vpc, fsrc); } else { src[i] = temp(vpc); @@ -360,8 +360,8 @@ nv20_vertprog_parse_instruction(struct nv20_vpc *vpc, */ case TGSI_FILE_CONSTANT: case TGSI_FILE_IMMEDIATE: - if (ci == -1 || ci == fsrc->SrcRegister.Index) { - ci = fsrc->SrcRegister.Index; + if (ci == -1 || ci == fsrc->Register.Index) { + ci = fsrc->Register.Index; src[i] = tgsi_src(vpc, fsrc); } else { src[i] = temp(vpc); @@ -378,8 +378,8 @@ nv20_vertprog_parse_instruction(struct nv20_vpc *vpc, } } - dst = tgsi_dst(vpc, &finst->FullDstRegisters[0]); - mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask); + dst = tgsi_dst(vpc, &finst->Dst[0]); + mask = tgsi_mask(finst->Dst[0].Register.WriteMask); switch (finst->Instruction.Opcode) { case TGSI_OPCODE_ABS: @@ -490,15 +490,15 @@ nv20_vertprog_parse_decl_output(struct nv20_vpc *vpc, { int hw; - switch (fdec->Semantic.SemanticName) { + switch (fdec->Semantic.Name) { case TGSI_SEMANTIC_POSITION: hw = NV30_VP_INST_DEST_POS; break; case TGSI_SEMANTIC_COLOR: - if (fdec->Semantic.SemanticIndex == 0) { + if (fdec->Semantic.Index == 0) { hw = NV30_VP_INST_DEST_COL0; } else - if (fdec->Semantic.SemanticIndex == 1) { + if (fdec->Semantic.Index == 1) { hw = NV30_VP_INST_DEST_COL1; } else { NOUVEAU_ERR("bad colour semantic index\n"); @@ -506,10 +506,10 @@ nv20_vertprog_parse_decl_output(struct nv20_vpc *vpc, } break; case TGSI_SEMANTIC_BCOLOR: - if (fdec->Semantic.SemanticIndex == 0) { + if (fdec->Semantic.Index == 0) { hw = NV30_VP_INST_DEST_BFC0; } else - if (fdec->Semantic.SemanticIndex == 1) { + if (fdec->Semantic.Index == 1) { hw = NV30_VP_INST_DEST_BFC1; } else { NOUVEAU_ERR("bad bcolour semantic index\n"); @@ -523,8 +523,8 @@ nv20_vertprog_parse_decl_output(struct nv20_vpc *vpc, hw = NV30_VP_INST_DEST_PSZ; break; case TGSI_SEMANTIC_GENERIC: - if (fdec->Semantic.SemanticIndex <= 7) { - hw = NV30_VP_INST_DEST_TC(fdec->Semantic.SemanticIndex); + if (fdec->Semantic.Index <= 7) { + hw = NV30_VP_INST_DEST_TC(fdec->Semantic.Index); } else { NOUVEAU_ERR("bad generic semantic index\n"); return FALSE; @@ -535,7 +535,7 @@ nv20_vertprog_parse_decl_output(struct nv20_vpc *vpc, return FALSE; } - vpc->output_map[fdec->DeclarationRange.First] = hw; + vpc->output_map[fdec->Range.First] = hw; return TRUE; } diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c index cc0385426c..40965a9772 100644 --- a/src/gallium/drivers/nv30/nv30_fragprog.c +++ b/src/gallium/drivers/nv30/nv30_fragprog.c @@ -4,6 +4,7 @@ #include "pipe/p_inlines.h" #include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" @@ -131,7 +132,7 @@ emit_src(struct nv30_fpc *fpc, int pos, struct nv30_sreg src) sizeof(uint32_t) * 4); } - sr |= (NV30_FP_REG_TYPE_CONST << NV30_FP_REG_TYPE_SHIFT); + sr |= (NV30_FP_REG_TYPE_CONST << NV30_FP_REG_TYPE_SHIFT); break; case NV30SR_NONE: sr |= (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT); @@ -236,20 +237,20 @@ tgsi_src(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc) { struct nv30_sreg src; - switch (fsrc->SrcRegister.File) { + switch (fsrc->Register.File) { case TGSI_FILE_INPUT: src = nv30_sr(NV30SR_INPUT, - fpc->attrib_map[fsrc->SrcRegister.Index]); + fpc->attrib_map[fsrc->Register.Index]); break; case TGSI_FILE_CONSTANT: - src = constant(fpc, fsrc->SrcRegister.Index, NULL); + src = constant(fpc, fsrc->Register.Index, NULL); break; case TGSI_FILE_IMMEDIATE: - assert(fsrc->SrcRegister.Index < fpc->nr_imm); - src = fpc->imm[fsrc->SrcRegister.Index]; + assert(fsrc->Register.Index < fpc->nr_imm); + src = fpc->imm[fsrc->Register.Index]; break; case TGSI_FILE_TEMPORARY: - src = nv30_sr(NV30SR_TEMP, fsrc->SrcRegister.Index + 1); + src = nv30_sr(NV30SR_TEMP, fsrc->Register.Index + 1); if (fpc->high_temp < src.index) fpc->high_temp = src.index; break; @@ -257,7 +258,7 @@ tgsi_src(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc) * Luckily fragprog results are just temp regs.. */ case TGSI_FILE_OUTPUT: - if (fsrc->SrcRegister.Index == fpc->colour_id) + if (fsrc->Register.Index == fpc->colour_id) return nv30_sr(NV30SR_OUTPUT, 0); else return nv30_sr(NV30SR_OUTPUT, 1); @@ -267,12 +268,12 @@ tgsi_src(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc) break; } - src.abs = fsrc->SrcRegisterExtMod.Absolute; - src.negate = fsrc->SrcRegister.Negate; - src.swz[0] = fsrc->SrcRegister.SwizzleX; - src.swz[1] = fsrc->SrcRegister.SwizzleY; - src.swz[2] = fsrc->SrcRegister.SwizzleZ; - src.swz[3] = fsrc->SrcRegister.SwizzleW; + src.abs = fsrc->Register.Absolute; + src.negate = fsrc->Register.Negate; + src.swz[0] = fsrc->Register.SwizzleX; + src.swz[1] = fsrc->Register.SwizzleY; + src.swz[2] = fsrc->Register.SwizzleZ; + src.swz[3] = fsrc->Register.SwizzleW; return src; } @@ -280,22 +281,22 @@ static INLINE struct nv30_sreg tgsi_dst(struct nv30_fpc *fpc, const struct tgsi_full_dst_register *fdst) { int idx; - switch (fdst->DstRegister.File) { + switch (fdst->Register.File) { case TGSI_FILE_OUTPUT: - if (fdst->DstRegister.Index == fpc->colour_id) + if (fdst->Register.Index == fpc->colour_id) return nv30_sr(NV30SR_OUTPUT, 0); else return nv30_sr(NV30SR_OUTPUT, 1); break; case TGSI_FILE_TEMPORARY: - idx = fdst->DstRegister.Index + 1; + idx = fdst->Register.Index + 1; if (fpc->high_temp < idx) fpc->high_temp = idx; return nv30_sr(NV30SR_TEMP, idx); case TGSI_FILE_NULL: return nv30_sr(NV30SR_NONE, 0); default: - NOUVEAU_ERR("bad dst file %d\n", fdst->DstRegister.File); + NOUVEAU_ERR("bad dst file %d\n", fdst->Register.File); return nv30_sr(NV30SR_NONE, 0); } } @@ -362,8 +363,8 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *fsrc; - fsrc = &finst->FullSrcRegisters[i]; - if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) { + fsrc = &finst->Src[i]; + if (fsrc->Register.File == TGSI_FILE_TEMPORARY) { src[i] = tgsi_src(fpc, fsrc); } } @@ -371,9 +372,9 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *fsrc; - fsrc = &finst->FullSrcRegisters[i]; + fsrc = &finst->Src[i]; - switch (fsrc->SrcRegister.File) { + switch (fsrc->Register.File) { case TGSI_FILE_INPUT: case TGSI_FILE_CONSTANT: case TGSI_FILE_TEMPORARY: @@ -384,14 +385,14 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, break; } - switch (fsrc->SrcRegister.File) { + switch (fsrc->Register.File) { case TGSI_FILE_INPUT: - if (ai == -1 || ai == fsrc->SrcRegister.Index) { - ai = fsrc->SrcRegister.Index; + if (ai == -1 || ai == fsrc->Register.Index) { + ai = fsrc->Register.Index; src[i] = tgsi_src(fpc, fsrc); } else { NOUVEAU_MSG("extra src attr %d\n", - fsrc->SrcRegister.Index); + fsrc->Register.Index); src[i] = temp(fpc); arith(fpc, 0, MOV, src[i], MASK_ALL, tgsi_src(fpc, fsrc), none, none); @@ -399,8 +400,8 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, break; case TGSI_FILE_CONSTANT: case TGSI_FILE_IMMEDIATE: - if (ci == -1 || ci == fsrc->SrcRegister.Index) { - ci = fsrc->SrcRegister.Index; + if (ci == -1 || ci == fsrc->Register.Index) { + ci = fsrc->Register.Index; src[i] = tgsi_src(fpc, fsrc); } else { src[i] = temp(fpc); @@ -412,7 +413,7 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, /* handled above */ break; case TGSI_FILE_SAMPLER: - unit = fsrc->SrcRegister.Index; + unit = fsrc->Register.Index; break; case TGSI_FILE_OUTPUT: break; @@ -422,8 +423,8 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, } } - dst = tgsi_dst(fpc, &finst->FullDstRegisters[0]); - mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask); + dst = tgsi_dst(fpc, &finst->Dst[0]); + mask = tgsi_mask(finst->Dst[0].Register.WriteMask); sat = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE); switch (finst->Instruction.Opcode) { @@ -571,15 +572,15 @@ nv30_fragprog_parse_decl_attrib(struct nv30_fpc *fpc, { int hw; - switch (fdec->Semantic.SemanticName) { + switch (fdec->Semantic.Name) { case TGSI_SEMANTIC_POSITION: hw = NV30_FP_OP_INPUT_SRC_POSITION; break; case TGSI_SEMANTIC_COLOR: - if (fdec->Semantic.SemanticIndex == 0) { + if (fdec->Semantic.Index == 0) { hw = NV30_FP_OP_INPUT_SRC_COL0; } else - if (fdec->Semantic.SemanticIndex == 1) { + if (fdec->Semantic.Index == 1) { hw = NV30_FP_OP_INPUT_SRC_COL1; } else { NOUVEAU_ERR("bad colour semantic index\n"); @@ -590,9 +591,9 @@ nv30_fragprog_parse_decl_attrib(struct nv30_fpc *fpc, hw = NV30_FP_OP_INPUT_SRC_FOGC; break; case TGSI_SEMANTIC_GENERIC: - if (fdec->Semantic.SemanticIndex <= 7) { + if (fdec->Semantic.Index <= 7) { hw = NV30_FP_OP_INPUT_SRC_TC(fdec->Semantic. - SemanticIndex); + Index); } else { NOUVEAU_ERR("bad generic semantic index\n"); return FALSE; @@ -603,7 +604,7 @@ nv30_fragprog_parse_decl_attrib(struct nv30_fpc *fpc, return FALSE; } - fpc->attrib_map[fdec->DeclarationRange.First] = hw; + fpc->attrib_map[fdec->Range.First] = hw; return TRUE; } @@ -611,12 +612,12 @@ static boolean nv30_fragprog_parse_decl_output(struct nv30_fpc *fpc, const struct tgsi_full_declaration *fdec) { - switch (fdec->Semantic.SemanticName) { + switch (fdec->Semantic.Name) { case TGSI_SEMANTIC_POSITION: - fpc->depth_id = fdec->DeclarationRange.First; + fpc->depth_id = fdec->Range.First; break; case TGSI_SEMANTIC_COLOR: - fpc->colour_id = fdec->DeclarationRange.First; + fpc->colour_id = fdec->Range.First; break; default: NOUVEAU_ERR("bad output semantic\n"); @@ -652,9 +653,9 @@ nv30_fragprog_prepare(struct nv30_fpc *fpc) goto out_err; break; /*case TGSI_FILE_TEMPORARY: - if (fdec->DeclarationRange.Last > high_temp) { + if (fdec->Range.Last > high_temp) { high_temp = - fdec->DeclarationRange.Last; + fdec->Range.Last; } break;*/ default: @@ -666,7 +667,7 @@ nv30_fragprog_prepare(struct nv30_fpc *fpc) { struct tgsi_full_immediate *imm; float vals[4]; - + imm = &p.FullToken.FullImmediate; assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); assert(fpc->nr_imm < MAX_IMM); @@ -754,7 +755,7 @@ nv30_fragprog_translate(struct nv30_context *nv30, fp->insn[fpc->inst_offset + 1] = 0x00000000; fp->insn[fpc->inst_offset + 2] = 0x00000000; fp->insn[fpc->inst_offset + 3] = 0x00000000; - + fp->translated = TRUE; fp->on_hw = FALSE; out_err: @@ -838,7 +839,7 @@ nv30_fragprog_validate(struct nv30_context *nv30) update_constants: if (fp->nr_consts) { float *map; - + map = pipe_buffer_map(pscreen, constbuf, PIPE_BUFFER_USAGE_CPU_READ); for (i = 0; i < fp->nr_consts; i++) { diff --git a/src/gallium/drivers/nv30/nv30_fragtex.c b/src/gallium/drivers/nv30/nv30_fragtex.c index f5f17d4071..b3293ee700 100644 --- a/src/gallium/drivers/nv30/nv30_fragtex.c +++ b/src/gallium/drivers/nv30/nv30_fragtex.c @@ -21,6 +21,7 @@ struct nv30_texture_format { static struct nv30_texture_format nv30_texture_formats[] = { + _(X8R8G8B8_UNORM, A8R8G8B8, S1, S1, S1, ONE, X, Y, Z, W), _(A8R8G8B8_UNORM, A8R8G8B8, S1, S1, S1, S1, X, Y, Z, W), _(A1R5G5B5_UNORM, A1R5G5B5, S1, S1, S1, S1, X, Y, Z, W), _(A4R4G4B4_UNORM, A4R4G4B4, S1, S1, S1, S1, X, Y, Z, W), @@ -29,7 +30,7 @@ nv30_texture_formats[] = { _(A8_UNORM , L8 , ZERO, ZERO, ZERO, S1, X, X, X, X), _(I8_UNORM , L8 , S1, S1, S1, S1, X, X, X, X), _(A8L8_UNORM , A8L8 , S1, S1, S1, S1, X, X, X, Y), -// _(Z16_UNORM , Z16 , S1, S1, S1, ONE, X, X, X, X), + _(Z16_UNORM , R5G6B5 , S1, S1, S1, ONE, X, X, X, X), _(Z24S8_UNORM , A8R8G8B8, S1, S1, S1, ONE, X, X, X, X), _(DXT1_RGB , DXT1 , S1, S1, S1, ONE, X, Y, Z, W), _(DXT1_RGBA , DXT1 , S1, S1, S1, S1, X, Y, Z, W), @@ -73,9 +74,9 @@ nv30_fragtex_build(struct nv30_context *nv30, int unit) txf = tf->format; txf |= ((pt->last_level>0) ? NV34TCL_TX_FORMAT_MIPMAP : 0); - txf |= log2i(pt->width[0]) << NV34TCL_TX_FORMAT_BASE_SIZE_U_SHIFT; - txf |= log2i(pt->height[0]) << NV34TCL_TX_FORMAT_BASE_SIZE_V_SHIFT; - txf |= log2i(pt->depth[0]) << NV34TCL_TX_FORMAT_BASE_SIZE_W_SHIFT; + txf |= log2i(pt->width0) << NV34TCL_TX_FORMAT_BASE_SIZE_U_SHIFT; + txf |= log2i(pt->height0) << NV34TCL_TX_FORMAT_BASE_SIZE_V_SHIFT; + txf |= log2i(pt->depth0) << NV34TCL_TX_FORMAT_BASE_SIZE_W_SHIFT; txf |= NV34TCL_TX_FORMAT_NO_BORDER | 0x10000; switch (pt->target) { @@ -114,8 +115,8 @@ nv30_fragtex_build(struct nv30_context *nv30, int unit) so_data (so, NV34TCL_TX_ENABLE_ENABLE | ps->en); so_data (so, txs); so_data (so, ps->filt | 0x2000 /*voodoo*/); - so_data (so, (pt->width[0] << NV34TCL_TX_NPOT_SIZE_W_SHIFT) | - pt->height[0]); + so_data (so, (pt->width0 << NV34TCL_TX_NPOT_SIZE_W_SHIFT) | + pt->height0); so_data (so, ps->bcol); return so; diff --git a/src/gallium/drivers/nv30/nv30_miptree.c b/src/gallium/drivers/nv30/nv30_miptree.c index 17acca61ab..b4c306d127 100644 --- a/src/gallium/drivers/nv30/nv30_miptree.c +++ b/src/gallium/drivers/nv30/nv30_miptree.c @@ -1,6 +1,7 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" +#include "util/u_math.h" #include "nv30_context.h" @@ -8,7 +9,7 @@ static void nv30_miptree_layout(struct nv30_miptree *nv30mt) { struct pipe_texture *pt = &nv30mt->base; - uint width = pt->width[0], height = pt->height[0], depth = pt->depth[0]; + uint width = pt->width0, height = pt->height0, depth = pt->depth0; uint offset = 0; int nr_faces, l, f; uint wide_pitch = pt->tex_usage & (PIPE_TEXTURE_USAGE_SAMPLER | @@ -21,29 +22,26 @@ nv30_miptree_layout(struct nv30_miptree *nv30mt) nr_faces = 6; } else if (pt->target == PIPE_TEXTURE_3D) { - nr_faces = pt->depth[0]; + nr_faces = pt->depth0; } else { nr_faces = 1; } for (l = 0; l <= pt->last_level; l++) { - pt->width[l] = width; - pt->height[l] = height; - pt->depth[l] = depth; pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width); pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height); if (wide_pitch && (pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) - nv30mt->level[l].pitch = align(pt->width[0] * pt->block.size, 64); + nv30mt->level[l].pitch = align(pt->width0 * pt->block.size, 64); else - nv30mt->level[l].pitch = pt->width[l] * pt->block.size; + nv30mt->level[l].pitch = u_minify(pt->width0, l) * pt->block.size; nv30mt->level[l].image_offset = CALLOC(nr_faces, sizeof(unsigned)); - width = MAX2(1, width >> 1); - height = MAX2(1, height >> 1); - depth = MAX2(1, depth >> 1); + width = u_minify(width, 1); + height = u_minify(height, 1); + depth = u_minify(depth, 1); } for (f = 0; f < nr_faces; f++) { @@ -51,14 +49,14 @@ nv30_miptree_layout(struct nv30_miptree *nv30mt) nv30mt->level[l].image_offset[f] = offset; if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR) && - pt->width[l + 1] > 1 && pt->height[l + 1] > 1) - offset += align(nv30mt->level[l].pitch * pt->height[l], 64); + u_minify(pt->width0, l + 1) > 1 && u_minify(pt->height0, l + 1) > 1) + offset += align(nv30mt->level[l].pitch * u_minify(pt->height0, l), 64); else - offset += nv30mt->level[l].pitch * pt->height[l]; + offset += nv30mt->level[l].pitch * u_minify(pt->height0, l); } nv30mt->level[l].image_offset[f] = offset; - offset += nv30mt->level[l].pitch * pt->height[l]; + offset += nv30mt->level[l].pitch * u_minify(pt->height0, l); } nv30mt->total_size = offset; @@ -79,8 +77,8 @@ nv30_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) mt->base.screen = pscreen; /* Swizzled textures must be POT */ - if (pt->width[0] & (pt->width[0] - 1) || - pt->height[0] & (pt->height[0] - 1)) + if (pt->width0 & (pt->width0 - 1) || + pt->height0 & (pt->height0 - 1)) mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; else if (pt->tex_usage & (PIPE_TEXTURE_USAGE_PRIMARY | @@ -134,7 +132,7 @@ nv30_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, /* Only supports 2D, non-mipmapped textures for the moment */ if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 || - pt->depth[0] != 1) + pt->depth0 != 1) return NULL; mt = CALLOC_STRUCT(nv30_miptree); @@ -147,6 +145,9 @@ nv30_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, mt->level[0].pitch = stride[0]; mt->level[0].image_offset = CALLOC(1, sizeof(unsigned)); + /* Assume whoever created this buffer expects it to be linear for now */ + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + pipe_buffer_reference(&mt->buffer, pb); return &mt->base; } @@ -179,8 +180,8 @@ nv30_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; pipe_texture_reference(&ns->base.texture, pt); ns->base.format = pt->format; - ns->base.width = pt->width[level]; - ns->base.height = pt->height[level]; + ns->base.width = u_minify(pt->width0, level); + ns->base.height = u_minify(pt->height0, level); ns->base.usage = flags; pipe_reference_init(&ns->base.reference, 1); ns->base.face = face; diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c index bb40e1803d..7cd36902eb 100644 --- a/src/gallium/drivers/nv30/nv30_screen.c +++ b/src/gallium/drivers/nv30/nv30_screen.c @@ -102,15 +102,24 @@ nv30_screen_surface_format_supported(struct pipe_screen *pscreen, struct pipe_surface *front = ((struct nouveau_winsys *) pscreen->winsys)->front; if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { - return (format == front->format); + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + return TRUE; + default: + break; + } } else if (tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL) { switch (format) { case PIPE_FORMAT_Z24S8_UNORM: case PIPE_FORMAT_Z24X8_UNORM: - return (front->format == PIPE_FORMAT_A8R8G8B8_UNORM); + return TRUE; case PIPE_FORMAT_Z16_UNORM: - return (front->format == PIPE_FORMAT_R5G6B5_UNORM); + if (front) { + return (front->format == PIPE_FORMAT_R5G6B5_UNORM); + } + return TRUE; default: break; } diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c index b91e972c12..3f802d9241 100644 --- a/src/gallium/drivers/nv30/nv30_state.c +++ b/src/gallium/drivers/nv30/nv30_state.c @@ -690,9 +690,9 @@ nv30_init_state_functions(struct nv30_context *nv30) nv30->pipe.delete_blend_state = nv30_blend_state_delete; nv30->pipe.create_sampler_state = nv30_sampler_state_create; - nv30->pipe.bind_sampler_states = nv30_sampler_state_bind; + nv30->pipe.bind_fragment_sampler_states = nv30_sampler_state_bind; nv30->pipe.delete_sampler_state = nv30_sampler_state_delete; - nv30->pipe.set_sampler_textures = nv30_set_sampler_texture; + nv30->pipe.set_fragment_sampler_textures = nv30_set_sampler_texture; nv30->pipe.create_rasterizer_state = nv30_rasterizer_state_create; nv30->pipe.bind_rasterizer_state = nv30_rasterizer_state_bind; diff --git a/src/gallium/drivers/nv30/nv30_state_fb.c b/src/gallium/drivers/nv30/nv30_state_fb.c index f90681b0f9..6f6d1740d6 100644 --- a/src/gallium/drivers/nv30/nv30_state_fb.c +++ b/src/gallium/drivers/nv30/nv30_state_fb.c @@ -66,6 +66,9 @@ nv30_state_framebuffer_validate(struct nv30_context *nv30) } switch (colour_format) { + case PIPE_FORMAT_X8R8G8B8_UNORM: + rt_format |= NV34TCL_RT_FORMAT_COLOR_X8R8G8B8; + break; case PIPE_FORMAT_A8R8G8B8_UNORM: case 0: rt_format |= NV34TCL_RT_FORMAT_COLOR_A8R8G8B8; @@ -92,7 +95,7 @@ nv30_state_framebuffer_validate(struct nv30_context *nv30) assert(0); } - if (colour_bits != zeta_bits) { + if (colour_bits > zeta_bits) { return FALSE; } diff --git a/src/gallium/drivers/nv30/nv30_transfer.c b/src/gallium/drivers/nv30/nv30_transfer.c index 98011decf7..5e429b4d85 100644 --- a/src/gallium/drivers/nv30/nv30_transfer.c +++ b/src/gallium/drivers/nv30/nv30_transfer.c @@ -2,6 +2,7 @@ #include <pipe/p_defines.h> #include <pipe/p_inlines.h> #include <util/u_memory.h> +#include <util/u_math.h> #include <nouveau/nouveau_winsys.h> #include "nv30_context.h" #include "nv30_screen.h" @@ -20,9 +21,9 @@ nv30_compatible_transfer_tex(struct pipe_texture *pt, unsigned level, memset(template, 0, sizeof(struct pipe_texture)); template->target = pt->target; template->format = pt->format; - template->width[0] = pt->width[level]; - template->height[0] = pt->height[level]; - template->depth[0] = 1; + template->width0 = u_minify(pt->width0, level); + template->height0 = u_minify(pt->height0, level); + template->depth0 = 1; template->block = pt->block; template->nblocksx[0] = pt->nblocksx[level]; template->nblocksy[0] = pt->nblocksx[level]; diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c index 14a5c0260d..36ac8299f0 100644 --- a/src/gallium/drivers/nv30/nv30_vertprog.c +++ b/src/gallium/drivers/nv30/nv30_vertprog.c @@ -253,32 +253,32 @@ static INLINE struct nv30_sreg tgsi_src(struct nv30_vpc *vpc, const struct tgsi_full_src_register *fsrc) { struct nv30_sreg src; - switch (fsrc->SrcRegister.File) { + switch (fsrc->Register.File) { case TGSI_FILE_INPUT: - src = nv30_sr(NV30SR_INPUT, fsrc->SrcRegister.Index); + src = nv30_sr(NV30SR_INPUT, fsrc->Register.Index); break; case TGSI_FILE_CONSTANT: - src = constant(vpc, fsrc->SrcRegister.Index, 0, 0, 0, 0); + src = constant(vpc, fsrc->Register.Index, 0, 0, 0, 0); break; case TGSI_FILE_IMMEDIATE: - src = vpc->imm[fsrc->SrcRegister.Index]; + src = vpc->imm[fsrc->Register.Index]; break; case TGSI_FILE_TEMPORARY: - if (vpc->high_temp < fsrc->SrcRegister.Index) - vpc->high_temp = fsrc->SrcRegister.Index; - src = nv30_sr(NV30SR_TEMP, fsrc->SrcRegister.Index); + if (vpc->high_temp < fsrc->Register.Index) + vpc->high_temp = fsrc->Register.Index; + src = nv30_sr(NV30SR_TEMP, fsrc->Register.Index); break; default: NOUVEAU_ERR("bad src file\n"); break; } - src.abs = fsrc->SrcRegisterExtMod.Absolute; - src.negate = fsrc->SrcRegister.Negate; - src.swz[0] = fsrc->SrcRegister.SwizzleX; - src.swz[1] = fsrc->SrcRegister.SwizzleY; - src.swz[2] = fsrc->SrcRegister.SwizzleZ; - src.swz[3] = fsrc->SrcRegister.SwizzleW; + src.abs = fsrc->Register.Absolute; + src.negate = fsrc->Register.Negate; + src.swz[0] = fsrc->Register.SwizzleX; + src.swz[1] = fsrc->Register.SwizzleY; + src.swz[2] = fsrc->Register.SwizzleZ; + src.swz[3] = fsrc->Register.SwizzleW; return src; } @@ -286,14 +286,14 @@ static INLINE struct nv30_sreg tgsi_dst(struct nv30_vpc *vpc, const struct tgsi_full_dst_register *fdst) { struct nv30_sreg dst; - switch (fdst->DstRegister.File) { + switch (fdst->Register.File) { case TGSI_FILE_OUTPUT: dst = nv30_sr(NV30SR_OUTPUT, - vpc->output_map[fdst->DstRegister.Index]); + vpc->output_map[fdst->Register.Index]); break; case TGSI_FILE_TEMPORARY: - dst = nv30_sr(NV30SR_TEMP, fdst->DstRegister.Index); + dst = nv30_sr(NV30SR_TEMP, fdst->Register.Index); if (vpc->high_temp < dst.index) vpc->high_temp = dst.index; break; @@ -334,8 +334,8 @@ nv30_vertprog_parse_instruction(struct nv30_vpc *vpc, for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *fsrc; - fsrc = &finst->FullSrcRegisters[i]; - if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) { + fsrc = &finst->Src[i]; + if (fsrc->Register.File == TGSI_FILE_TEMPORARY) { src[i] = tgsi_src(vpc, fsrc); } } @@ -343,11 +343,11 @@ nv30_vertprog_parse_instruction(struct nv30_vpc *vpc, for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *fsrc; - fsrc = &finst->FullSrcRegisters[i]; - switch (fsrc->SrcRegister.File) { + fsrc = &finst->Src[i]; + switch (fsrc->Register.File) { case TGSI_FILE_INPUT: - if (ai == -1 || ai == fsrc->SrcRegister.Index) { - ai = fsrc->SrcRegister.Index; + if (ai == -1 || ai == fsrc->Register.Index) { + ai = fsrc->Register.Index; src[i] = tgsi_src(vpc, fsrc); } else { src[i] = temp(vpc); @@ -360,8 +360,8 @@ nv30_vertprog_parse_instruction(struct nv30_vpc *vpc, */ case TGSI_FILE_CONSTANT: case TGSI_FILE_IMMEDIATE: - if (ci == -1 || ci == fsrc->SrcRegister.Index) { - ci = fsrc->SrcRegister.Index; + if (ci == -1 || ci == fsrc->Register.Index) { + ci = fsrc->Register.Index; src[i] = tgsi_src(vpc, fsrc); } else { src[i] = temp(vpc); @@ -378,8 +378,8 @@ nv30_vertprog_parse_instruction(struct nv30_vpc *vpc, } } - dst = tgsi_dst(vpc, &finst->FullDstRegisters[0]); - mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask); + dst = tgsi_dst(vpc, &finst->Dst[0]); + mask = tgsi_mask(finst->Dst[0].Register.WriteMask); switch (finst->Instruction.Opcode) { case TGSI_OPCODE_ABS: @@ -490,15 +490,15 @@ nv30_vertprog_parse_decl_output(struct nv30_vpc *vpc, { int hw; - switch (fdec->Semantic.SemanticName) { + switch (fdec->Semantic.Name) { case TGSI_SEMANTIC_POSITION: hw = NV30_VP_INST_DEST_POS; break; case TGSI_SEMANTIC_COLOR: - if (fdec->Semantic.SemanticIndex == 0) { + if (fdec->Semantic.Index == 0) { hw = NV30_VP_INST_DEST_COL0; } else - if (fdec->Semantic.SemanticIndex == 1) { + if (fdec->Semantic.Index == 1) { hw = NV30_VP_INST_DEST_COL1; } else { NOUVEAU_ERR("bad colour semantic index\n"); @@ -506,10 +506,10 @@ nv30_vertprog_parse_decl_output(struct nv30_vpc *vpc, } break; case TGSI_SEMANTIC_BCOLOR: - if (fdec->Semantic.SemanticIndex == 0) { + if (fdec->Semantic.Index == 0) { hw = NV30_VP_INST_DEST_BFC0; } else - if (fdec->Semantic.SemanticIndex == 1) { + if (fdec->Semantic.Index == 1) { hw = NV30_VP_INST_DEST_BFC1; } else { NOUVEAU_ERR("bad bcolour semantic index\n"); @@ -523,8 +523,8 @@ nv30_vertprog_parse_decl_output(struct nv30_vpc *vpc, hw = NV30_VP_INST_DEST_PSZ; break; case TGSI_SEMANTIC_GENERIC: - if (fdec->Semantic.SemanticIndex <= 7) { - hw = NV30_VP_INST_DEST_TC(fdec->Semantic.SemanticIndex); + if (fdec->Semantic.Index <= 7) { + hw = NV30_VP_INST_DEST_TC(fdec->Semantic.Index); } else { NOUVEAU_ERR("bad generic semantic index\n"); return FALSE; @@ -535,7 +535,7 @@ nv30_vertprog_parse_decl_output(struct nv30_vpc *vpc, return FALSE; } - vpc->output_map[fdec->DeclarationRange.First] = hw; + vpc->output_map[fdec->Range.First] = hw; return TRUE; } diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c index 99277506fc..1bf16726d1 100644 --- a/src/gallium/drivers/nv40/nv40_fragprog.c +++ b/src/gallium/drivers/nv40/nv40_fragprog.c @@ -255,50 +255,50 @@ tgsi_src(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc) { struct nv40_sreg src; - switch (fsrc->SrcRegister.File) { + switch (fsrc->Register.File) { case TGSI_FILE_INPUT: src = nv40_sr(NV40SR_INPUT, - fpc->attrib_map[fsrc->SrcRegister.Index]); + fpc->attrib_map[fsrc->Register.Index]); break; case TGSI_FILE_CONSTANT: - src = constant(fpc, fsrc->SrcRegister.Index, NULL); + src = constant(fpc, fsrc->Register.Index, NULL); break; case TGSI_FILE_IMMEDIATE: - assert(fsrc->SrcRegister.Index < fpc->nr_imm); - src = fpc->imm[fsrc->SrcRegister.Index]; + assert(fsrc->Register.Index < fpc->nr_imm); + src = fpc->imm[fsrc->Register.Index]; break; case TGSI_FILE_TEMPORARY: - src = fpc->r_temp[fsrc->SrcRegister.Index]; + src = fpc->r_temp[fsrc->Register.Index]; break; /* NV40 fragprog result regs are just temps, so this is simple */ case TGSI_FILE_OUTPUT: - src = fpc->r_result[fsrc->SrcRegister.Index]; + src = fpc->r_result[fsrc->Register.Index]; break; default: NOUVEAU_ERR("bad src file\n"); break; } - src.abs = fsrc->SrcRegisterExtMod.Absolute; - src.negate = fsrc->SrcRegister.Negate; - src.swz[0] = fsrc->SrcRegister.SwizzleX; - src.swz[1] = fsrc->SrcRegister.SwizzleY; - src.swz[2] = fsrc->SrcRegister.SwizzleZ; - src.swz[3] = fsrc->SrcRegister.SwizzleW; + src.abs = fsrc->Register.Absolute; + src.negate = fsrc->Register.Negate; + src.swz[0] = fsrc->Register.SwizzleX; + src.swz[1] = fsrc->Register.SwizzleY; + src.swz[2] = fsrc->Register.SwizzleZ; + src.swz[3] = fsrc->Register.SwizzleW; return src; } static INLINE struct nv40_sreg tgsi_dst(struct nv40_fpc *fpc, const struct tgsi_full_dst_register *fdst) { - switch (fdst->DstRegister.File) { + switch (fdst->Register.File) { case TGSI_FILE_OUTPUT: - return fpc->r_result[fdst->DstRegister.Index]; + return fpc->r_result[fdst->Register.Index]; case TGSI_FILE_TEMPORARY: - return fpc->r_temp[fdst->DstRegister.Index]; + return fpc->r_temp[fdst->Register.Index]; case TGSI_FILE_NULL: return nv40_sr(NV40SR_NONE, 0); default: - NOUVEAU_ERR("bad dst file %d\n", fdst->DstRegister.File); + NOUVEAU_ERR("bad dst file %d\n", fdst->Register.File); return nv40_sr(NV40SR_NONE, 0); } } @@ -364,8 +364,8 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *fsrc; - fsrc = &finst->FullSrcRegisters[i]; - if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) { + fsrc = &finst->Src[i]; + if (fsrc->Register.File == TGSI_FILE_TEMPORARY) { src[i] = tgsi_src(fpc, fsrc); } } @@ -373,9 +373,9 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *fsrc; - fsrc = &finst->FullSrcRegisters[i]; + fsrc = &finst->Src[i]; - switch (fsrc->SrcRegister.File) { + switch (fsrc->Register.File) { case TGSI_FILE_INPUT: case TGSI_FILE_CONSTANT: case TGSI_FILE_TEMPORARY: @@ -386,10 +386,10 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, break; } - switch (fsrc->SrcRegister.File) { + switch (fsrc->Register.File) { case TGSI_FILE_INPUT: - if (ai == -1 || ai == fsrc->SrcRegister.Index) { - ai = fsrc->SrcRegister.Index; + if (ai == -1 || ai == fsrc->Register.Index) { + ai = fsrc->Register.Index; src[i] = tgsi_src(fpc, fsrc); } else { src[i] = temp(fpc); @@ -399,8 +399,8 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, break; case TGSI_FILE_CONSTANT: if ((ci == -1 && ii == -1) || - ci == fsrc->SrcRegister.Index) { - ci = fsrc->SrcRegister.Index; + ci == fsrc->Register.Index) { + ci = fsrc->Register.Index; src[i] = tgsi_src(fpc, fsrc); } else { src[i] = temp(fpc); @@ -410,8 +410,8 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, break; case TGSI_FILE_IMMEDIATE: if ((ci == -1 && ii == -1) || - ii == fsrc->SrcRegister.Index) { - ii = fsrc->SrcRegister.Index; + ii == fsrc->Register.Index) { + ii = fsrc->Register.Index; src[i] = tgsi_src(fpc, fsrc); } else { src[i] = temp(fpc); @@ -423,7 +423,7 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, /* handled above */ break; case TGSI_FILE_SAMPLER: - unit = fsrc->SrcRegister.Index; + unit = fsrc->Register.Index; break; case TGSI_FILE_OUTPUT: break; @@ -433,8 +433,8 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, } } - dst = tgsi_dst(fpc, &finst->FullDstRegisters[0]); - mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask); + dst = tgsi_dst(fpc, &finst->Dst[0]); + mask = tgsi_mask(finst->Dst[0].Register.WriteMask); sat = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE); switch (finst->Instruction.Opcode) { @@ -644,15 +644,15 @@ nv40_fragprog_parse_decl_attrib(struct nv40_fpc *fpc, { int hw; - switch (fdec->Semantic.SemanticName) { + switch (fdec->Semantic.Name) { case TGSI_SEMANTIC_POSITION: hw = NV40_FP_OP_INPUT_SRC_POSITION; break; case TGSI_SEMANTIC_COLOR: - if (fdec->Semantic.SemanticIndex == 0) { + if (fdec->Semantic.Index == 0) { hw = NV40_FP_OP_INPUT_SRC_COL0; } else - if (fdec->Semantic.SemanticIndex == 1) { + if (fdec->Semantic.Index == 1) { hw = NV40_FP_OP_INPUT_SRC_COL1; } else { NOUVEAU_ERR("bad colour semantic index\n"); @@ -663,9 +663,9 @@ nv40_fragprog_parse_decl_attrib(struct nv40_fpc *fpc, hw = NV40_FP_OP_INPUT_SRC_FOGC; break; case TGSI_SEMANTIC_GENERIC: - if (fdec->Semantic.SemanticIndex <= 7) { + if (fdec->Semantic.Index <= 7) { hw = NV40_FP_OP_INPUT_SRC_TC(fdec->Semantic. - SemanticIndex); + Index); } else { NOUVEAU_ERR("bad generic semantic index\n"); return FALSE; @@ -676,7 +676,7 @@ nv40_fragprog_parse_decl_attrib(struct nv40_fpc *fpc, return FALSE; } - fpc->attrib_map[fdec->DeclarationRange.First] = hw; + fpc->attrib_map[fdec->Range.First] = hw; return TRUE; } @@ -684,15 +684,15 @@ static boolean nv40_fragprog_parse_decl_output(struct nv40_fpc *fpc, const struct tgsi_full_declaration *fdec) { - unsigned idx = fdec->DeclarationRange.First; + unsigned idx = fdec->Range.First; unsigned hw; - switch (fdec->Semantic.SemanticName) { + switch (fdec->Semantic.Name) { case TGSI_SEMANTIC_POSITION: hw = 1; break; case TGSI_SEMANTIC_COLOR: - switch (fdec->Semantic.SemanticIndex) { + switch (fdec->Semantic.Index) { case 0: hw = 0; break; case 1: hw = 2; break; case 2: hw = 3; break; @@ -738,9 +738,9 @@ nv40_fragprog_prepare(struct nv40_fpc *fpc) goto out_err; break; case TGSI_FILE_TEMPORARY: - if (fdec->DeclarationRange.Last > high_temp) { + if (fdec->Range.Last > high_temp) { high_temp = - fdec->DeclarationRange.Last; + fdec->Range.Last; } break; default: diff --git a/src/gallium/drivers/nv40/nv40_fragtex.c b/src/gallium/drivers/nv40/nv40_fragtex.c index f6cdf31dfe..44abc84596 100644 --- a/src/gallium/drivers/nv40/nv40_fragtex.c +++ b/src/gallium/drivers/nv40/nv40_fragtex.c @@ -23,6 +23,7 @@ struct nv40_texture_format { static struct nv40_texture_format nv40_texture_formats[] = { + _(X8R8G8B8_UNORM, A8R8G8B8, S1, S1, S1, ONE, X, Y, Z, W, 0, 0, 0, 0), _(A8R8G8B8_UNORM, A8R8G8B8, S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), _(A1R5G5B5_UNORM, A1R5G5B5, S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), _(A4R4G4B4_UNORM, A4R4G4B4, S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), @@ -116,11 +117,11 @@ nv40_fragtex_build(struct nv40_context *nv40, int unit) so_data (so, NV40TCL_TEX_ENABLE_ENABLE | ps->en); so_data (so, txs); so_data (so, ps->filt | tf->sign | 0x2000 /*voodoo*/); - so_data (so, (pt->width[0] << NV40TCL_TEX_SIZE0_W_SHIFT) | - pt->height[0]); + so_data (so, (pt->width0 << NV40TCL_TEX_SIZE0_W_SHIFT) | + pt->height0); so_data (so, ps->bcol); so_method(so, nv40->screen->curie, NV40TCL_TEX_SIZE1(unit), 1); - so_data (so, (pt->depth[0] << NV40TCL_TEX_SIZE1_DEPTH_SHIFT) | txp); + so_data (so, (pt->depth0 << NV40TCL_TEX_SIZE1_DEPTH_SHIFT) | txp); return so; } diff --git a/src/gallium/drivers/nv40/nv40_miptree.c b/src/gallium/drivers/nv40/nv40_miptree.c index 5a201ccf45..f73bedff6d 100644 --- a/src/gallium/drivers/nv40/nv40_miptree.c +++ b/src/gallium/drivers/nv40/nv40_miptree.c @@ -1,6 +1,7 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" +#include "util/u_math.h" #include "nv40_context.h" @@ -8,7 +9,7 @@ static void nv40_miptree_layout(struct nv40_miptree *mt) { struct pipe_texture *pt = &mt->base; - uint width = pt->width[0], height = pt->height[0], depth = pt->depth[0]; + uint width = pt->width0, height = pt->height0, depth = pt->depth0; uint offset = 0; int nr_faces, l, f; uint wide_pitch = pt->tex_usage & (PIPE_TEXTURE_USAGE_SAMPLER | @@ -21,29 +22,26 @@ nv40_miptree_layout(struct nv40_miptree *mt) nr_faces = 6; } else if (pt->target == PIPE_TEXTURE_3D) { - nr_faces = pt->depth[0]; + nr_faces = pt->depth0; } else { nr_faces = 1; } for (l = 0; l <= pt->last_level; l++) { - pt->width[l] = width; - pt->height[l] = height; - pt->depth[l] = depth; pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width); pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height); if (wide_pitch && (pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) - mt->level[l].pitch = align(pt->width[0] * pt->block.size, 64); + mt->level[l].pitch = align(pt->width0 * pt->block.size, 64); else - mt->level[l].pitch = pt->width[l] * pt->block.size; + mt->level[l].pitch = u_minify(pt->width0, l) * pt->block.size; mt->level[l].image_offset = CALLOC(nr_faces, sizeof(unsigned)); - width = MAX2(1, width >> 1); - height = MAX2(1, height >> 1); - depth = MAX2(1, depth >> 1); + width = u_minify(width, 1); + height = u_minify(height, 1); + depth = u_minify(depth, 1); } for (f = 0; f < nr_faces; f++) { @@ -51,14 +49,14 @@ nv40_miptree_layout(struct nv40_miptree *mt) mt->level[l].image_offset[f] = offset; if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR) && - pt->width[l + 1] > 1 && pt->height[l + 1] > 1) - offset += align(mt->level[l].pitch * pt->height[l], 64); + u_minify(pt->width0, l + 1) > 1 && u_minify(pt->height0, l + 1) > 1) + offset += align(mt->level[l].pitch * u_minify(pt->height0, l), 64); else - offset += mt->level[l].pitch * pt->height[l]; + offset += mt->level[l].pitch * u_minify(pt->height0, l); } mt->level[l].image_offset[f] = offset; - offset += mt->level[l].pitch * pt->height[l]; + offset += mt->level[l].pitch * u_minify(pt->height0, l); } mt->total_size = offset; @@ -79,8 +77,8 @@ nv40_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) mt->base.screen = pscreen; /* Swizzled textures must be POT */ - if (pt->width[0] & (pt->width[0] - 1) || - pt->height[0] & (pt->height[0] - 1)) + if (pt->width0 & (pt->width0 - 1) || + pt->height0 & (pt->height0 - 1)) mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; else if (pt->tex_usage & (PIPE_TEXTURE_USAGE_PRIMARY | @@ -128,7 +126,7 @@ nv40_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, /* Only supports 2D, non-mipmapped textures for the moment */ if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 || - pt->depth[0] != 1) + pt->depth0 != 1) return NULL; mt = CALLOC_STRUCT(nv40_miptree); @@ -141,6 +139,9 @@ nv40_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, mt->level[0].pitch = stride[0]; mt->level[0].image_offset = CALLOC(1, sizeof(unsigned)); + /* Assume whoever created this buffer expects it to be linear for now */ + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + pipe_buffer_reference(&mt->buffer, pb); return &mt->base; } @@ -173,8 +174,8 @@ nv40_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; pipe_texture_reference(&ns->base.texture, pt); ns->base.format = pt->format; - ns->base.width = pt->width[level]; - ns->base.height = pt->height[level]; + ns->base.width = u_minify(pt->width0, level); + ns->base.height = u_minify(pt->height0, level); ns->base.usage = flags; pipe_reference_init(&ns->base.reference, 1); ns->base.face = face; diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c index c3ee4d2345..bc34e32a4b 100644 --- a/src/gallium/drivers/nv40/nv40_state.c +++ b/src/gallium/drivers/nv40/nv40_state.c @@ -705,9 +705,9 @@ nv40_init_state_functions(struct nv40_context *nv40) nv40->pipe.delete_blend_state = nv40_blend_state_delete; nv40->pipe.create_sampler_state = nv40_sampler_state_create; - nv40->pipe.bind_sampler_states = nv40_sampler_state_bind; + nv40->pipe.bind_fragment_sampler_states = nv40_sampler_state_bind; nv40->pipe.delete_sampler_state = nv40_sampler_state_delete; - nv40->pipe.set_sampler_textures = nv40_set_sampler_texture; + nv40->pipe.set_fragment_sampler_textures = nv40_set_sampler_texture; nv40->pipe.create_rasterizer_state = nv40_rasterizer_state_create; nv40->pipe.bind_rasterizer_state = nv40_rasterizer_state_bind; diff --git a/src/gallium/drivers/nv40/nv40_state_fb.c b/src/gallium/drivers/nv40/nv40_state_fb.c index c2f739157a..1c7a7cd64f 100644 --- a/src/gallium/drivers/nv40/nv40_state_fb.c +++ b/src/gallium/drivers/nv40/nv40_state_fb.c @@ -57,6 +57,9 @@ nv40_state_framebuffer_validate(struct nv40_context *nv40) rt_format = NV40TCL_RT_FORMAT_TYPE_LINEAR; switch (colour_format) { + case PIPE_FORMAT_X8R8G8B8_UNORM: + rt_format |= NV40TCL_RT_FORMAT_COLOR_X8R8G8B8; + break; case PIPE_FORMAT_A8R8G8B8_UNORM: case 0: rt_format |= NV40TCL_RT_FORMAT_COLOR_A8R8G8B8; diff --git a/src/gallium/drivers/nv40/nv40_transfer.c b/src/gallium/drivers/nv40/nv40_transfer.c index 92caee6f38..36e253c96f 100644 --- a/src/gallium/drivers/nv40/nv40_transfer.c +++ b/src/gallium/drivers/nv40/nv40_transfer.c @@ -2,6 +2,7 @@ #include <pipe/p_defines.h> #include <pipe/p_inlines.h> #include <util/u_memory.h> +#include <util/u_math.h> #include <nouveau/nouveau_winsys.h> #include "nv40_context.h" #include "nv40_screen.h" @@ -20,9 +21,9 @@ nv40_compatible_transfer_tex(struct pipe_texture *pt, unsigned level, memset(template, 0, sizeof(struct pipe_texture)); template->target = pt->target; template->format = pt->format; - template->width[0] = pt->width[level]; - template->height[0] = pt->height[level]; - template->depth[0] = 1; + template->width0 = u_minify(pt->width0, level); + template->height0 = u_minify(pt->height0, level); + template->depth0 = 1; template->block = pt->block; template->nblocksx[0] = pt->nblocksx[level]; template->nblocksy[0] = pt->nblocksx[level]; diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c index 31dae2457f..55835ee644 100644 --- a/src/gallium/drivers/nv40/nv40_vertprog.c +++ b/src/gallium/drivers/nv40/nv40_vertprog.c @@ -295,30 +295,30 @@ static INLINE struct nv40_sreg tgsi_src(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc) { struct nv40_sreg src; - switch (fsrc->SrcRegister.File) { + switch (fsrc->Register.File) { case TGSI_FILE_INPUT: - src = nv40_sr(NV40SR_INPUT, fsrc->SrcRegister.Index); + src = nv40_sr(NV40SR_INPUT, fsrc->Register.Index); break; case TGSI_FILE_CONSTANT: - src = constant(vpc, fsrc->SrcRegister.Index, 0, 0, 0, 0); + src = constant(vpc, fsrc->Register.Index, 0, 0, 0, 0); break; case TGSI_FILE_IMMEDIATE: - src = vpc->imm[fsrc->SrcRegister.Index]; + src = vpc->imm[fsrc->Register.Index]; break; case TGSI_FILE_TEMPORARY: - src = vpc->r_temp[fsrc->SrcRegister.Index]; + src = vpc->r_temp[fsrc->Register.Index]; break; default: NOUVEAU_ERR("bad src file\n"); break; } - src.abs = fsrc->SrcRegisterExtMod.Absolute; - src.negate = fsrc->SrcRegister.Negate; - src.swz[0] = fsrc->SrcRegister.SwizzleX; - src.swz[1] = fsrc->SrcRegister.SwizzleY; - src.swz[2] = fsrc->SrcRegister.SwizzleZ; - src.swz[3] = fsrc->SrcRegister.SwizzleW; + src.abs = fsrc->Register.Absolute; + src.negate = fsrc->Register.Negate; + src.swz[0] = fsrc->Register.SwizzleX; + src.swz[1] = fsrc->Register.SwizzleY; + src.swz[2] = fsrc->Register.SwizzleZ; + src.swz[3] = fsrc->Register.SwizzleW; return src; } @@ -326,15 +326,15 @@ static INLINE struct nv40_sreg tgsi_dst(struct nv40_vpc *vpc, const struct tgsi_full_dst_register *fdst) { struct nv40_sreg dst; - switch (fdst->DstRegister.File) { + switch (fdst->Register.File) { case TGSI_FILE_OUTPUT: - dst = vpc->r_result[fdst->DstRegister.Index]; + dst = vpc->r_result[fdst->Register.Index]; break; case TGSI_FILE_TEMPORARY: - dst = vpc->r_temp[fdst->DstRegister.Index]; + dst = vpc->r_temp[fdst->Register.Index]; break; case TGSI_FILE_ADDRESS: - dst = vpc->r_address[fdst->DstRegister.Index]; + dst = vpc->r_address[fdst->Register.Index]; break; default: NOUVEAU_ERR("bad dst file\n"); @@ -405,8 +405,8 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *fsrc; - fsrc = &finst->FullSrcRegisters[i]; - if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) { + fsrc = &finst->Src[i]; + if (fsrc->Register.File == TGSI_FILE_TEMPORARY) { src[i] = tgsi_src(vpc, fsrc); } } @@ -414,9 +414,9 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *fsrc; - fsrc = &finst->FullSrcRegisters[i]; + fsrc = &finst->Src[i]; - switch (fsrc->SrcRegister.File) { + switch (fsrc->Register.File) { case TGSI_FILE_INPUT: case TGSI_FILE_CONSTANT: case TGSI_FILE_TEMPORARY: @@ -427,10 +427,10 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, break; } - switch (fsrc->SrcRegister.File) { + switch (fsrc->Register.File) { case TGSI_FILE_INPUT: - if (ai == -1 || ai == fsrc->SrcRegister.Index) { - ai = fsrc->SrcRegister.Index; + if (ai == -1 || ai == fsrc->Register.Index) { + ai = fsrc->Register.Index; src[i] = tgsi_src(vpc, fsrc); } else { src[i] = temp(vpc); @@ -440,8 +440,8 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, break; case TGSI_FILE_CONSTANT: if ((ci == -1 && ii == -1) || - ci == fsrc->SrcRegister.Index) { - ci = fsrc->SrcRegister.Index; + ci == fsrc->Register.Index) { + ci = fsrc->Register.Index; src[i] = tgsi_src(vpc, fsrc); } else { src[i] = temp(vpc); @@ -451,8 +451,8 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, break; case TGSI_FILE_IMMEDIATE: if ((ci == -1 && ii == -1) || - ii == fsrc->SrcRegister.Index) { - ii = fsrc->SrcRegister.Index; + ii == fsrc->Register.Index) { + ii = fsrc->Register.Index; src[i] = tgsi_src(vpc, fsrc); } else { src[i] = temp(vpc); @@ -469,8 +469,8 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, } } - dst = tgsi_dst(vpc, &finst->FullDstRegisters[0]); - mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask); + dst = tgsi_dst(vpc, &finst->Dst[0]); + mask = tgsi_mask(finst->Dst[0].Register.WriteMask); switch (finst->Instruction.Opcode) { case TGSI_OPCODE_ABS: @@ -577,19 +577,19 @@ static boolean nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc, const struct tgsi_full_declaration *fdec) { - unsigned idx = fdec->DeclarationRange.First; + unsigned idx = fdec->Range.First; int hw; - switch (fdec->Semantic.SemanticName) { + switch (fdec->Semantic.Name) { case TGSI_SEMANTIC_POSITION: hw = NV40_VP_INST_DEST_POS; vpc->hpos_idx = idx; break; case TGSI_SEMANTIC_COLOR: - if (fdec->Semantic.SemanticIndex == 0) { + if (fdec->Semantic.Index == 0) { hw = NV40_VP_INST_DEST_COL0; } else - if (fdec->Semantic.SemanticIndex == 1) { + if (fdec->Semantic.Index == 1) { hw = NV40_VP_INST_DEST_COL1; } else { NOUVEAU_ERR("bad colour semantic index\n"); @@ -597,10 +597,10 @@ nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc, } break; case TGSI_SEMANTIC_BCOLOR: - if (fdec->Semantic.SemanticIndex == 0) { + if (fdec->Semantic.Index == 0) { hw = NV40_VP_INST_DEST_BFC0; } else - if (fdec->Semantic.SemanticIndex == 1) { + if (fdec->Semantic.Index == 1) { hw = NV40_VP_INST_DEST_BFC1; } else { NOUVEAU_ERR("bad bcolour semantic index\n"); @@ -614,8 +614,8 @@ nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc, hw = NV40_VP_INST_DEST_PSZ; break; case TGSI_SEMANTIC_GENERIC: - if (fdec->Semantic.SemanticIndex <= 7) { - hw = NV40_VP_INST_DEST_TC(fdec->Semantic.SemanticIndex); + if (fdec->Semantic.Index <= 7) { + hw = NV40_VP_INST_DEST_TC(fdec->Semantic.Index); } else { NOUVEAU_ERR("bad generic semantic index\n"); return FALSE; @@ -652,16 +652,16 @@ nv40_vertprog_prepare(struct nv40_vpc *vpc) fdec = &p.FullToken.FullDeclaration; switch (fdec->Declaration.File) { case TGSI_FILE_TEMPORARY: - if (fdec->DeclarationRange.Last > high_temp) { + if (fdec->Range.Last > high_temp) { high_temp = - fdec->DeclarationRange.Last; + fdec->Range.Last; } break; #if 0 /* this would be nice.. except gallium doesn't track it */ case TGSI_FILE_ADDRESS: - if (fdec->DeclarationRange.Last > high_addr) { + if (fdec->Range.Last > high_addr) { high_addr = - fdec->DeclarationRange.Last; + fdec->Range.Last; } break; #endif @@ -681,11 +681,11 @@ nv40_vertprog_prepare(struct nv40_vpc *vpc) const struct tgsi_full_dst_register *fdst; finst = &p.FullToken.FullInstruction; - fdst = &finst->FullDstRegisters[0]; + fdst = &finst->Dst[0]; - if (fdst->DstRegister.File == TGSI_FILE_ADDRESS) { - if (fdst->DstRegister.Index > high_addr) - high_addr = fdst->DstRegister.Index; + if (fdst->Register.File == TGSI_FILE_ADDRESS) { + if (fdst->Register.Index > high_addr) + high_addr = fdst->Register.Index; } } diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 33667e8765..4b0f062295 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -69,6 +69,18 @@ struct nv50_sampler_stateobj { unsigned tsc[8]; }; +static INLINE unsigned +get_tile_height(uint32_t tile_mode) +{ + return 1 << ((tile_mode & 0xf) + 2); +} + +static INLINE unsigned +get_tile_depth(uint32_t tile_mode) +{ + return 1 << (tile_mode >> 4); +} + struct nv50_miptree_level { int *image_offset; unsigned pitch; @@ -196,7 +208,8 @@ extern void nv50_clear(struct pipe_context *pipe, unsigned buffers, extern void nv50_vertprog_validate(struct nv50_context *nv50); extern void nv50_fragprog_validate(struct nv50_context *nv50); extern void nv50_linkage_validate(struct nv50_context *nv50); -extern void nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p); +extern void nv50_program_destroy(struct nv50_context *nv50, + struct nv50_program *p); /* nv50_state_validate.c */ extern boolean nv50_state_validate(struct nv50_context *nv50); @@ -210,4 +223,12 @@ extern void nv50_so_init_sifc(struct nv50_context *nv50, /* nv50_tex.c */ extern void nv50_tex_validate(struct nv50_context *); +/* nv50_transfer.c */ +extern void +nv50_upload_sifc(struct nv50_context *nv50, + struct nouveau_bo *bo, unsigned dst_offset, unsigned reloc, + unsigned dst_format, int dst_w, int dst_h, int dst_pitch, + void *src, unsigned src_format, int src_pitch, + int x, int y, int w, int h, int cpp); + #endif diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c index 93479a0314..3d58746793 100644 --- a/src/gallium/drivers/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -26,15 +26,44 @@ #include "nv50_context.h" +/* The restrictions in tile mode selection probably aren't necessary. */ +static INLINE uint32_t +get_tile_mode(unsigned ny, unsigned d) +{ + uint32_t tile_mode = 0x00; + + if (ny > 32) tile_mode = 0x04; /* height 64 tiles */ + else + if (ny > 16) tile_mode = 0x03; /* height 32 tiles */ + else + if (ny > 8) tile_mode = 0x02; /* height 16 tiles */ + else + if (ny > 4) tile_mode = 0x01; /* height 8 tiles */ + + if (d == 1) + return tile_mode; + else + if (tile_mode > 0x02) + tile_mode = 0x02; + + if (d > 16 && tile_mode < 0x02) + return tile_mode | 0x50; /* depth 32 tiles */ + if (d > 8) return tile_mode | 0x40; /* depth 16 tiles */ + if (d > 4) return tile_mode | 0x30; /* depth 8 tiles */ + if (d > 2) return tile_mode | 0x20; /* depth 4 tiles */ + + return tile_mode | 0x10; +} + static struct pipe_texture * nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp) { struct nouveau_device *dev = nouveau_screen(pscreen)->device; struct nv50_miptree *mt = CALLOC_STRUCT(nv50_miptree); struct pipe_texture *pt = &mt->base.base; - unsigned width = tmp->width[0], height = tmp->height[0]; - unsigned depth = tmp->depth[0]; - uint32_t tile_mode, tile_flags, tile_h; + unsigned width = tmp->width0, height = tmp->height0; + unsigned depth = tmp->depth0, image_alignment; + uint32_t tile_flags; int ret, i, l; *pt = *tmp; @@ -57,62 +86,46 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp) break; } - if (pt->height[0] > 32) tile_mode = 4; - else if (pt->height[0] > 16) tile_mode = 3; - else if (pt->height[0] > 8) tile_mode = 2; - else if (pt->height[0] > 4) tile_mode = 1; - else tile_mode = 0; - tile_h = 1 << (tile_mode + 2); - - switch (pt->target) { - case PIPE_TEXTURE_3D: - mt->image_nr = pt->depth[0]; - break; - case PIPE_TEXTURE_CUBE: - mt->image_nr = 6; - break; - default: - mt->image_nr = 1; - break; - } + /* XXX: texture arrays */ + mt->image_nr = (pt->target == PIPE_TEXTURE_CUBE) ? 6 : 1; for (l = 0; l <= pt->last_level; l++) { struct nv50_miptree_level *lvl = &mt->level[l]; - pt->width[l] = width; - pt->height[l] = height; - pt->depth[l] = depth; pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width); pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height); lvl->image_offset = CALLOC(mt->image_nr, sizeof(int)); - lvl->pitch = align(pt->width[l] * pt->block.size, 64); - lvl->tile_mode = tile_mode; - - width = MAX2(1, width >> 1); - height = MAX2(1, height >> 1); - depth = MAX2(1, depth >> 1); + lvl->pitch = align(pt->nblocksx[l] * pt->block.size, 64); + lvl->tile_mode = get_tile_mode(pt->nblocksy[l], depth); - if (tile_mode && height <= (tile_h >> 1)) { - tile_mode--; - tile_h >>= 1; - } + width = u_minify(width, 1); + height = u_minify(height, 1); + depth = u_minify(depth, 1); } + image_alignment = get_tile_height(mt->level[0].tile_mode) * 64; + image_alignment *= get_tile_depth(mt->level[0].tile_mode); + + /* NOTE the distinction between arrays of mip-mapped 2D textures and + * mip-mapped 3D textures. We can't use image_nr == depth for 3D mip. + */ for (i = 0; i < mt->image_nr; i++) { for (l = 0; l <= pt->last_level; l++) { struct nv50_miptree_level *lvl = &mt->level[l]; int size; - tile_h = 1 << (lvl->tile_mode + 2); + unsigned tile_h = get_tile_height(lvl->tile_mode); + unsigned tile_d = get_tile_depth(lvl->tile_mode); - size = align(pt->width[l], 8) * pt->block.size; - size = align(size, 64); - size *= align(pt->height[l], tile_h); + size = lvl->pitch; + size *= align(pt->nblocksy[l], tile_h); + size *= align(u_minify(pt->depth0, l), tile_d); lvl->image_offset[i] = mt->total_size; mt->total_size += size; } + mt->total_size = align(mt->total_size, image_alignment); } ret = nouveau_bo_new_tile(dev, NOUVEAU_BO_VRAM, 256, mt->total_size, @@ -135,7 +148,7 @@ nv50_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, /* Only supports 2D, non-mipmapped textures for the moment */ if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 || - pt->depth[0] != 1) + pt->depth0 != 1) return NULL; mt = CALLOC_STRUCT(nv50_miptree); @@ -186,8 +199,8 @@ nv50_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; pipe_texture_reference(&ps->texture, pt); ps->format = pt->format; - ps->width = pt->width[level]; - ps->height = pt->height[level]; + ps->width = u_minify(pt->width0, level); + ps->height = u_minify(pt->height0, level); ps->usage = flags; pipe_reference_init(&ps->reference, 1); ps->face = face; diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 5c691877e0..f0fe7e6168 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -31,10 +31,12 @@ #include "nv50_context.h" -#define NV50_SU_MAX_TEMP 64 -#define NV50_SU_MAX_ADDR 7 +#define NV50_SU_MAX_TEMP 127 +#define NV50_SU_MAX_ADDR 4 //#define NV50_PROGRAM_DUMP +/* $a5 and $a6 always seem to be 0, and using $a7 gives you noise */ + /* ARL - gallium craps itself on progs/vp/arl.txt * * MSB - Like MAD, but MUL+SUB @@ -86,12 +88,16 @@ struct nv50_reg { int index; int hw; - int neg; + int mod; int rhw; /* result hw for FP outputs, or interpolant index */ int acc; /* instruction where this reg is last read (first insn == 1) */ }; +#define NV50_MOD_NEG 1 +#define NV50_MOD_ABS 2 +#define NV50_MOD_SAT 4 + /* arbitrary limits */ #define MAX_IF_DEPTH 4 #define MAX_LOOP_DEPTH 4 @@ -150,7 +156,7 @@ ctor_reg(struct nv50_reg *reg, unsigned type, int index, int hw) reg->type = type; reg->index = index; reg->hw = hw; - reg->neg = 0; + reg->mod = 0; reg->rhw = -1; reg->acc = 0; } @@ -450,14 +456,20 @@ set_dst(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_program_exec *e) } alloc_reg(pc, dst); + if (dst->hw > 63) + set_long(pc, e); e->inst[0] |= (dst->hw << 2); } static INLINE void set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e) { + unsigned val; float f = pc->immd_buf[imm->hw]; - unsigned val = fui(imm->neg ? -f : f); + + if (imm->mod & NV50_MOD_ABS) + f = fabsf(f); + val = fui((imm->mod & NV50_MOD_NEG) ? -f : f); set_long(pc, e); /*XXX: can't be predicated - bits overlap.. catch cases where both @@ -470,16 +482,28 @@ set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e) e->inst[1] |= (val >> 6) << 2; } +static INLINE void +set_addr(struct nv50_program_exec *e, struct nv50_reg *a) +{ + assert(!(e->inst[0] & 0x0c000000)); + assert(!(e->inst[1] & 0x00000004)); + + e->inst[0] |= (a->hw & 3) << 26; + e->inst[1] |= (a->hw >> 2) << 2; +} + static void -emit_set_addr(struct nv50_pc *pc, struct nv50_reg *dst, unsigned val) +emit_add_addr_imm(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *src0, uint16_t src1_val) { struct nv50_program_exec *e = exec(pc); - assert(val <= 0xffff); - e->inst[0] = 0xd0000000 | ((val & 0xffff) << 9); + e->inst[0] = 0xd0000000 | (src1_val << 9); e->inst[1] = 0x20000000; - e->inst[0] |= dst->hw << 2; set_long(pc, e); + e->inst[0] |= dst->hw << 2; + if (src0) /* otherwise will add to $a0, which is always 0 */ + set_addr(e, src0); emit(pc, e); } @@ -488,9 +512,10 @@ static struct nv50_reg * alloc_addr(struct nv50_pc *pc, struct nv50_reg *ref) { int i; - struct nv50_reg *a = NULL; + struct nv50_reg *a_tgsi = NULL, *a = NULL; if (!ref) { + /* allocate for TGSI address reg */ for (i = 0; i < NV50_SU_MAX_ADDR; ++i) { if (pc->r_addr[i].index >= 0) continue; @@ -506,6 +531,13 @@ alloc_addr(struct nv50_pc *pc, struct nv50_reg *ref) return NULL; } + /* Allocate and set an address reg so we can access 'ref'. + * + * If and r_addr has index < 0, it is not reserved for TGSI, + * and index will be the negative of the TGSI addr index the + * value in rhw is relative to, or -256 if rhw is an offset + * from 0. If rhw < 0, the reg has not been initialized. + */ for (i = NV50_SU_MAX_ADDR - 1; i >= 0; --i) { if (pc->r_addr[i].index >= 0) /* occupied for TGSI */ continue; @@ -516,17 +548,25 @@ alloc_addr(struct nv50_pc *pc, struct nv50_reg *ref) if (!a && pc->r_addr[i].acc != pc->insn_cur) a = &pc->r_addr[i]; - if (ref->hw - pc->r_addr[i].rhw < 128) { - /* alloc'd & suitable */ + if (ref->hw - pc->r_addr[i].rhw >= 128) + continue; + + if ((ref->acc >= 0 && pc->r_addr[i].index == -256) || + (ref->acc < 0 && -pc->r_addr[i].index == ref->index)) { pc->r_addr[i].acc = pc->insn_cur; return &pc->r_addr[i]; } } assert(a); - emit_set_addr(pc, a, ref->hw * 4); - a->rhw = ref->hw % 128; + if (ref->acc < 0) + a_tgsi = pc->addr[ref->index]; + + emit_add_addr_imm(pc, a, a_tgsi, (ref->hw & ~0x7f) * 4); + + a->rhw = ref->hw & ~0x7f; a->acc = pc->insn_cur; + a->index = a_tgsi ? -ref->index : -256; return a; } @@ -563,23 +603,13 @@ emit_interp(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *iv, emit(pc, e); } -static INLINE void -set_addr(struct nv50_program_exec *e, struct nv50_reg *a) -{ - assert(!(e->inst[0] & 0x0c000000)); - assert(!(e->inst[1] & 0x00000004)); - - e->inst[0] |= (a->hw & 3) << 26; - e->inst[1] |= (a->hw >> 2) << 2; -} - static void set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s, struct nv50_program_exec *e) { set_long(pc, e); - e->param.index = src->hw; + e->param.index = src->hw & 127; e->param.shift = s; e->param.mask = m << (s % 32); @@ -622,6 +652,8 @@ emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) } alloc_reg(pc, src); + if (src->hw > 63) + set_long(pc, e); e->inst[0] |= (src->hw << 9); } @@ -681,6 +713,8 @@ set_src_0_restricted(struct nv50_pc *pc, struct nv50_reg *src, } alloc_reg(pc, src); + if (src->hw > 63) + set_long(pc, e); e->inst[0] |= (src->hw << 9); } @@ -699,6 +733,8 @@ set_src_0(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) } alloc_reg(pc, src); + if (src->hw > 63) + set_long(pc, e); e->inst[0] |= (src->hw << 9); } @@ -725,6 +761,8 @@ set_src_1(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) } alloc_reg(pc, src); + if (src->hw > 63) + set_long(pc, e); e->inst[0] |= ((src->hw & 127) << 16); } @@ -771,12 +809,12 @@ emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, set_dst(pc, dst, e); set_src_0(pc, src0, e); if (src1->type == P_IMMD && !is_long(e)) { - if (src0->neg) + if (src0->mod & NV50_MOD_NEG) e->inst[0] |= 0x00008000; set_immd(pc, src1, e); } else { set_src_1(pc, src1, e); - if (src0->neg ^ src1->neg) { + if ((src0->mod ^ src1->mod) & NV50_MOD_NEG) { if (is_long(e)) e->inst[1] |= 0x08000000; else @@ -793,13 +831,15 @@ emit_add(struct nv50_pc *pc, struct nv50_reg *dst, { struct nv50_program_exec *e = exec(pc); - e->inst[0] |= 0xb0000000; + e->inst[0] = 0xb0000000; + alloc_reg(pc, src1); check_swap_src_0_1(pc, &src0, &src1); - if (!pc->allow32 || src0->neg || src1->neg) { + if (!pc->allow32 || (src0->mod | src1->mod) || src1->hw > 63) { set_long(pc, e); - e->inst[1] |= (src0->neg << 26) | (src1->neg << 27); + e->inst[1] |= ((src0->mod & NV50_MOD_NEG) << 26) | + ((src1->mod & NV50_MOD_NEG) << 27); } set_dst(pc, dst, e); @@ -846,6 +886,11 @@ emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst, set_src_0(pc, src0, e); set_src_1(pc, src1, e); + if (src0->mod & NV50_MOD_ABS) + e->inst[1] |= 0x00100000; + if (src1->mod & NV50_MOD_ABS) + e->inst[1] |= 0x00080000; + emit(pc, e); } @@ -853,9 +898,47 @@ static INLINE void emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, struct nv50_reg *src1) { - src1->neg ^= 1; + assert(src0 != src1); + src1->mod ^= NV50_MOD_NEG; emit_add(pc, dst, src0, src1); - src1->neg ^= 1; + src1->mod ^= NV50_MOD_NEG; +} + +static void +emit_bitop2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, + struct nv50_reg *src1, unsigned op) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0xd0000000; + set_long(pc, e); + + check_swap_src_0_1(pc, &src0, &src1); + set_dst(pc, dst, e); + set_src_0(pc, src0, e); + + if (op != TGSI_OPCODE_AND && op != TGSI_OPCODE_OR && + op != TGSI_OPCODE_XOR) + assert(!"invalid bit op"); + + if (src1->type == P_IMMD && src0->type == P_TEMP && pc->allow32) { + set_immd(pc, src1, e); + if (op == TGSI_OPCODE_OR) + e->inst[0] |= 0x0100; + else + if (op == TGSI_OPCODE_XOR) + e->inst[0] |= 0x8000; + } else { + set_src_1(pc, src1, e); + e->inst[1] |= 0x04000000; /* 32 bit */ + if (op == TGSI_OPCODE_OR) + e->inst[1] |= 0x4000; + else + if (op == TGSI_OPCODE_XOR) + e->inst[1] |= 0x8000; + } + + emit(pc, e); } static void @@ -872,9 +955,9 @@ emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, set_src_1(pc, src1, e); set_src_2(pc, src2, e); - if (src0->neg ^ src1->neg) + if ((src0->mod ^ src1->mod) & NV50_MOD_NEG) e->inst[1] |= 0x04000000; - if (src2->neg) + if (src2->mod & NV50_MOD_NEG) e->inst[1] |= 0x08000000; emit(pc, e); @@ -884,9 +967,10 @@ static INLINE void emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, struct nv50_reg *src1, struct nv50_reg *src2) { - src2->neg ^= 1; + assert(src2 != src0 && src2 != src1); + src2->mod ^= NV50_MOD_NEG; emit_mad(pc, dst, src0, src1, src2); - src2->neg ^= 1; + src2->mod ^= NV50_MOD_NEG; } static void @@ -953,7 +1037,6 @@ emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) /* 0x80 == src is float */ #define CVT_F32_F32 0xc4 #define CVT_F32_S32 0x44 -#define CVT_F32_U32 0x64 #define CVT_S32_F32 0x8c #define CVT_S32_S32 0x0c #define CVT_NEG 0x20 @@ -1161,7 +1244,7 @@ emit_kil(struct nv50_pc *pc, struct nv50_reg *src) const int r_pred = 1; unsigned cvn = CVT_F32_F32; - if (src->neg) + if (src->mod & NV50_MOD_NEG) cvn |= CVT_NEG; /* write predicate reg */ emit_cvt(pc, NULL, src, r_pred, CVTOP_RN, cvn); @@ -1175,10 +1258,36 @@ emit_kil(struct nv50_pc *pc, struct nv50_reg *src) } static void +load_cube_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4], + struct nv50_reg **src, boolean proj) +{ + int mod[3] = { src[0]->mod, src[1]->mod, src[2]->mod }; + + src[0]->mod |= NV50_MOD_ABS; + src[1]->mod |= NV50_MOD_ABS; + src[2]->mod |= NV50_MOD_ABS; + + emit_minmax(pc, 4, t[2], src[0], src[1]); + emit_minmax(pc, 4, t[2], src[2], t[2]); + + src[0]->mod = mod[0]; + src[1]->mod = mod[1]; + src[2]->mod = mod[2]; + + if (proj && 0 /* looks more correct without this */) + emit_mul(pc, t[2], t[2], src[3]); + emit_flop(pc, 0, t[2], t[2]); + + emit_mul(pc, t[0], src[0], t[2]); + emit_mul(pc, t[1], src[1], t[2]); + emit_mul(pc, t[2], src[2], t[2]); +} + +static void emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, struct nv50_reg **src, unsigned unit, unsigned type, boolean proj) { - struct nv50_reg *temp, *t[4]; + struct nv50_reg *t[4]; struct nv50_program_exec *e; unsigned c, mode, dim; @@ -1207,6 +1316,9 @@ emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, /* some cards need t[0]'s hw index to be a multiple of 4 */ alloc_temp4(pc, t, 0); + if (type == TGSI_TEXTURE_CUBE) { + load_cube_tex_coords(pc, t, src, proj); + } else if (proj) { if (src[0]->type == P_TEMP && src[0]->rhw != -1) { mode = pc->interp_mode[src[0]->index]; @@ -1231,17 +1343,8 @@ emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, */ } } else { - if (type == TGSI_TEXTURE_CUBE) { - temp = temp_temp(pc); - emit_minmax(pc, 4, temp, src[0], src[1]); - emit_minmax(pc, 4, temp, temp, src[2]); - emit_flop(pc, 0, temp, temp); - for (c = 0; c < 3; c++) - emit_mul(pc, t[c], src[c], temp); - } else { - for (c = 0; c < dim; c++) - emit_mov(pc, t[c], src[c]); - } + for (c = 0; c < dim; c++) + emit_mov(pc, t[c], src[c]); } e = exec(pc); @@ -1254,14 +1357,16 @@ emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, if (dim == 2) e->inst[0] |= 0x00400000; else - if (dim == 3) + if (dim == 3) { e->inst[0] |= 0x00800000; + if (type == TGSI_TEXTURE_CUBE) + e->inst[0] |= 0x08000000; + } e->inst[0] |= (mask & 0x3) << 25; e->inst[1] |= (mask & 0xc) << 12; emit(pc, e); - #if 1 c = 0; if (mask & 1) emit_mov(pc, dst[0], t[c++]); @@ -1335,19 +1440,25 @@ emit_ddx(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) static void emit_ddy(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) { + struct nv50_reg *r = src; struct nv50_program_exec *e = exec(pc); assert(src->type == P_TEMP); - if (!src->neg) /* ! double negation */ - emit_neg(pc, src, src); + if (!(src->mod & NV50_MOD_NEG)) { /* ! double negation */ + r = alloc_temp(pc, NULL); + emit_neg(pc, r, src); + } e->inst[0] = 0xc0150000; e->inst[1] = 0x8a400000; set_long(pc, e); set_dst(pc, dst, e); - set_src_0(pc, src, e); - set_src_2(pc, src, e); + set_src_0(pc, r, e); + set_src_2(pc, r, e); + + if (r != src) + free_temp(pc, r); emit(pc, e); } @@ -1430,10 +1541,10 @@ negate_supported(const struct tgsi_full_instruction *insn, int i) for (s = 0; s < insn->Instruction.NumSrcRegs; ++s) { if (s == i) continue; - if ((insn->FullSrcRegisters[s].SrcRegister.Index == - insn->FullSrcRegisters[i].SrcRegister.Index) && - (insn->FullSrcRegisters[s].SrcRegister.File == - insn->FullSrcRegisters[i].SrcRegister.File)) + if ((insn->Src[s].Register.Index == + insn->Src[i].Register.Index) && + (insn->Src[s].Register.File == + insn->Src[i].Register.File)) return FALSE; } @@ -1444,7 +1555,7 @@ negate_supported(const struct tgsi_full_instruction *insn, int i) static unsigned nv50_tgsi_src_mask(const struct tgsi_full_instruction *insn, int c) { - unsigned x, mask = insn->FullDstRegisters[0].DstRegister.WriteMask; + unsigned x, mask = insn->Dst[0].Register.WriteMask; switch (insn->Instruction.Opcode) { case TGSI_OPCODE_COS: @@ -1470,10 +1581,10 @@ nv50_tgsi_src_mask(const struct tgsi_full_instruction *insn, int c) case TGSI_OPCODE_TEX: case TGSI_OPCODE_TXP: { - const struct tgsi_instruction_ext_texture *tex; + const struct tgsi_instruction_texture *tex; - assert(insn->Instruction.Extended); - tex = &insn->InstructionExtTexture; + assert(insn->Instruction.Texture); + tex = &insn->Texture; mask = 0x7; if (insn->Instruction.Opcode == TGSI_OPCODE_TXP) @@ -1507,17 +1618,17 @@ nv50_tgsi_src_mask(const struct tgsi_full_instruction *insn, int c) static struct nv50_reg * tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) { - switch (dst->DstRegister.File) { + switch (dst->Register.File) { case TGSI_FILE_TEMPORARY: - return &pc->temp[dst->DstRegister.Index * 4 + c]; + return &pc->temp[dst->Register.Index * 4 + c]; case TGSI_FILE_OUTPUT: - return &pc->result[dst->DstRegister.Index * 4 + c]; + return &pc->result[dst->Register.Index * 4 + c]; case TGSI_FILE_ADDRESS: { - struct nv50_reg *r = pc->addr[dst->DstRegister.Index * 4 + c]; + struct nv50_reg *r = pc->addr[dst->Register.Index * 4 + c]; if (!r) { r = alloc_addr(pc, NULL); - pc->addr[dst->DstRegister.Index * 4 + c] = r; + pc->addr[dst->Register.Index * 4 + c] = r; } assert(r); return r; @@ -1539,8 +1650,8 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, struct nv50_reg *temp; unsigned sgn, c, swz; - if (src->SrcRegister.File != TGSI_FILE_CONSTANT) - assert(!src->SrcRegister.Indirect); + if (src->Register.File != TGSI_FILE_CONSTANT) + assert(!src->Register.Indirect); sgn = tgsi_util_get_full_src_register_sign_mode(src, chan); @@ -1550,16 +1661,16 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, case TGSI_SWIZZLE_Y: case TGSI_SWIZZLE_Z: case TGSI_SWIZZLE_W: - switch (src->SrcRegister.File) { + switch (src->Register.File) { case TGSI_FILE_INPUT: - r = &pc->attr[src->SrcRegister.Index * 4 + c]; + r = &pc->attr[src->Register.Index * 4 + c]; break; case TGSI_FILE_TEMPORARY: - r = &pc->temp[src->SrcRegister.Index * 4 + c]; + r = &pc->temp[src->Register.Index * 4 + c]; break; case TGSI_FILE_CONSTANT: - if (!src->SrcRegister.Indirect) { - r = &pc->param[src->SrcRegister.Index * 4 + c]; + if (!src->Register.Indirect) { + r = &pc->param[src->Register.Index * 4 + c]; break; } /* Indicate indirection by setting r->acc < 0 and @@ -1567,18 +1678,19 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, */ r = MALLOC_STRUCT(nv50_reg); swz = tgsi_util_get_src_register_swizzle( - &src->SrcRegisterInd, 0); + &src->Indirect, 0); ctor_reg(r, P_CONST, - src->SrcRegisterInd.Index * 4 + swz, c); + src->Indirect.Index * 4 + swz, + src->Register.Index * 4 + c); r->acc = -1; break; case TGSI_FILE_IMMEDIATE: - r = &pc->immd[src->SrcRegister.Index * 4 + c]; + r = &pc->immd[src->Register.Index * 4 + c]; break; case TGSI_FILE_SAMPLER: break; case TGSI_FILE_ADDRESS: - r = pc->addr[src->SrcRegister.Index * 4 + c]; + r = pc->addr[src->Register.Index * 4 + c]; assert(r); break; default: @@ -1601,7 +1713,7 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, break; case TGSI_UTIL_SIGN_TOGGLE: if (neg) - r->neg = 1; + r->mod = NV50_MOD_NEG; else { temp = temp_temp(pc); emit_neg(pc, temp, r); @@ -1610,11 +1722,7 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, break; case TGSI_UTIL_SIGN_SET: temp = temp_temp(pc); - emit_abs(pc, temp, r); - if (neg) - temp->neg = 1; - else - emit_neg(pc, temp, temp); + emit_cvt(pc, temp, r, -1, CVTOP_ABS, CVT_F32_F32 | CVT_NEG); r = temp; break; default: @@ -1748,29 +1856,29 @@ nv50_program_tx_insn(struct nv50_pc *pc, unsigned mask, sat, unit; int i, c; - mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + mask = inst->Dst[0].Register.WriteMask; sat = inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE; memset(src, 0, sizeof(src)); for (c = 0; c < 4; c++) { if ((mask & (1 << c)) && !pc->r_dst[c]) - dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]); + dst[c] = tgsi_dst(pc, c, &inst->Dst[0]); else dst[c] = pc->r_dst[c]; rdst[c] = dst[c]; } for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - const struct tgsi_full_src_register *fs = &inst->FullSrcRegisters[i]; + const struct tgsi_full_src_register *fs = &inst->Src[i]; unsigned src_mask; boolean neg_supp; src_mask = nv50_tgsi_src_mask(inst, i); neg_supp = negate_supported(inst, i); - if (fs->SrcRegister.File == TGSI_FILE_SAMPLER) - unit = fs->SrcRegister.Index; + if (fs->Register.File == TGSI_FILE_SAMPLER) + unit = fs->Register.Index; for (c = 0; c < 4; c++) if (src_mask & (1 << c)) @@ -1787,7 +1895,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, for (c = 0; c < 4; c++) { if (!(mask & (1 << c)) || dst[c]->type == P_TEMP) continue; - rdst[c] = dst[c]; + /* rdst[c] = dst[c]; */ /* done above */ dst[c] = temp_temp(pc); } } @@ -1809,6 +1917,16 @@ nv50_program_tx_insn(struct nv50_pc *pc, emit_add(pc, dst[c], src[0][c], src[1][c]); } break; + case TGSI_OPCODE_AND: + case TGSI_OPCODE_XOR: + case TGSI_OPCODE_OR: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_bitop2(pc, dst[c], src[0][c], src[1][c], + inst->Instruction.Opcode); + } + break; case TGSI_OPCODE_ARL: assert(src[0][0]); temp = temp_temp(pc); @@ -1950,7 +2068,9 @@ nv50_program_tx_insn(struct nv50_pc *pc, case TGSI_OPCODE_IF: /* emitting a join_at may not be necessary */ assert(pc->if_lvl < MAX_IF_DEPTH); - set_pred_wr(pc, 1, 0, pc->if_cond); + /* set_pred_wr(pc, 1, 0, pc->if_cond); */ + emit_cvt(pc, NULL, src[0][0], 0, CVTOP_ABS | CVTOP_RN, + CVT_F32_F32); emit_branch(pc, 0, 2, &pc->br_join[pc->if_lvl]); pc->if_insn[pc->if_lvl++] = pc->p->exec_tail; terminate_mbb(pc); @@ -2067,11 +2187,11 @@ nv50_program_tx_insn(struct nv50_pc *pc, break; case TGSI_OPCODE_TEX: emit_tex(pc, dst, mask, src[0], unit, - inst->InstructionExtTexture.Texture, FALSE); + inst->Texture.Texture, FALSE); break; case TGSI_OPCODE_TXP: emit_tex(pc, dst, mask, src[0], unit, - inst->InstructionExtTexture.Texture, TRUE); + inst->Texture.Texture, TRUE); break; case TGSI_OPCODE_TRUNC: for (c = 0; c < 4; c++) { @@ -2116,8 +2236,10 @@ nv50_program_tx_insn(struct nv50_pc *pc, for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - /* in this case we saturate later */ - if (dst[c]->type == P_TEMP && dst[c]->index < 0) + /* In this case we saturate later, and dst[c] won't + * be another temp_temp (and thus lost), since rdst + * already is TEMP (see above). */ + if (rdst[c]->type == P_TEMP && rdst[c]->index < 0) continue; emit_sat(pc, rdst[c], dst[c]); } @@ -2127,7 +2249,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, for (c = 0; c < 4; c++) { if (!src[i][c]) continue; - src[i][c]->neg = 0; + src[i][c]->mod = 0; if (src[i][c]->index == -1 && src[i][c]->type == P_IMMD) FREE(src[i][c]); else @@ -2148,7 +2270,7 @@ prep_inspect_insn(struct nv50_pc *pc, const struct tgsi_full_instruction *insn) const struct tgsi_dst_register *dst; unsigned i, c, k, mask; - dst = &insn->FullDstRegisters[0].DstRegister; + dst = &insn->Dst[0].Register; mask = dst->WriteMask; if (dst->File == TGSI_FILE_TEMPORARY) @@ -2166,12 +2288,12 @@ prep_inspect_insn(struct nv50_pc *pc, const struct tgsi_full_instruction *insn) } for (i = 0; i < insn->Instruction.NumSrcRegs; i++) { - src = &insn->FullSrcRegisters[i]; + src = &insn->Src[i]; - if (src->SrcRegister.File == TGSI_FILE_TEMPORARY) + if (src->Register.File == TGSI_FILE_TEMPORARY) reg = pc->temp; else - if (src->SrcRegister.File == TGSI_FILE_INPUT) + if (src->Register.File == TGSI_FILE_INPUT) reg = pc->attr; else continue; @@ -2183,7 +2305,7 @@ prep_inspect_insn(struct nv50_pc *pc, const struct tgsi_full_instruction *insn) continue; k = tgsi_util_get_full_src_register_swizzle(src, c); - reg[src->SrcRegister.Index * 4 + k].acc = pc->insn_nr; + reg[src->Register.Index * 4 + k].acc = pc->insn_nr; } } } @@ -2243,13 +2365,13 @@ static struct nv50_reg * tgsi_broadcast_dst(struct nv50_pc *pc, const struct tgsi_full_dst_register *fd, unsigned mask) { - if (fd->DstRegister.File == TGSI_FILE_TEMPORARY) { - int c = ffs(~mask & fd->DstRegister.WriteMask); + if (fd->Register.File == TGSI_FILE_TEMPORARY) { + int c = ffs(~mask & fd->Register.WriteMask); if (c) return tgsi_dst(pc, c - 1, fd); } else { - int c = ffs(fd->DstRegister.WriteMask) - 1; - if ((1 << c) == fd->DstRegister.WriteMask) + int c = ffs(fd->Register.WriteMask) - 1; + if ((1 << c) == fd->Register.WriteMask) return tgsi_dst(pc, c, fd); } @@ -2263,7 +2385,7 @@ static unsigned nv50_tgsi_scan_swizzle(const struct tgsi_full_instruction *insn, unsigned rdep[4]) { - const struct tgsi_full_dst_register *fd = &insn->FullDstRegisters[0]; + const struct tgsi_full_dst_register *fd = &insn->Dst[0]; const struct tgsi_full_src_register *fs; unsigned i, deqs = 0; @@ -2274,9 +2396,9 @@ nv50_tgsi_scan_swizzle(const struct tgsi_full_instruction *insn, unsigned chn, mask = nv50_tgsi_src_mask(insn, i); boolean neg_supp = negate_supported(insn, i); - fs = &insn->FullSrcRegisters[i]; - if (fs->SrcRegister.File != fd->DstRegister.File || - fs->SrcRegister.Index != fd->DstRegister.Index) + fs = &insn->Src[i]; + if (fs->Register.File != fd->Register.File || + fs->Register.Index != fd->Register.Index) continue; for (chn = 0; chn < 4; ++chn) { @@ -2287,7 +2409,7 @@ nv50_tgsi_scan_swizzle(const struct tgsi_full_instruction *insn, c = tgsi_util_get_full_src_register_swizzle(fs, chn); s = tgsi_util_get_full_src_register_sign_mode(fs, chn); - if (!(fd->DstRegister.WriteMask & (1 << c))) + if (!(fd->Register.WriteMask & (1 << c))) continue; /* no danger if src is copied to TEMP first */ @@ -2311,7 +2433,7 @@ nv50_tgsi_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) const struct tgsi_full_dst_register *fd; unsigned i, deqs, rdep[4], m[4]; - fd = &tok->FullInstruction.FullDstRegisters[0]; + fd = &tok->FullInstruction.Dst[0]; deqs = nv50_tgsi_scan_swizzle(&insn, rdep); if (is_scalar_op(insn.Instruction.Opcode)) { @@ -2330,10 +2452,10 @@ nv50_tgsi_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) for (i = 0; i < 4; ++i) { assert(pc->r_dst[m[i]] == NULL); - insn.FullDstRegisters[0].DstRegister.WriteMask = - fd->DstRegister.WriteMask & (1 << m[i]); + insn.Dst[0].Register.WriteMask = + fd->Register.WriteMask & (1 << m[i]); - if (!insn.FullDstRegisters[0].DstRegister.WriteMask) + if (!insn.Dst[0].Register.WriteMask) continue; if (deqs & (1 << i)) @@ -2383,6 +2505,23 @@ load_interpolant(struct nv50_pc *pc, struct nv50_reg *reg) emit_interp(pc, reg, iv, mode); } +/* The face input is always at v[255] (varying space), with a + * value of 0 for back-facing, and 0xffffffff for front-facing. + */ +static void +load_frontfacing(struct nv50_pc *pc, struct nv50_reg *a) +{ + struct nv50_reg *one = alloc_immd(pc, 1.0f); + + assert(a->rhw == -1); + alloc_reg(pc, a); /* do this before rhw is set */ + a->rhw = 255; + load_interpolant(pc, a); + emit_bitop2(pc, a, a, one, TGSI_OPCODE_AND); + + FREE(one); +} + static boolean nv50_program_tx_prep(struct nv50_pc *pc) { @@ -2414,8 +2553,8 @@ nv50_program_tx_prep(struct nv50_pc *pc) unsigned si, last, first, mode; d = &tp.FullToken.FullDeclaration; - first = d->DeclarationRange.First; - last = d->DeclarationRange.Last; + first = d->Range.First; + last = d->Range.Last; switch (d->Declaration.File) { case TGSI_FILE_TEMPORARY: @@ -2425,8 +2564,8 @@ nv50_program_tx_prep(struct nv50_pc *pc) p->type == PIPE_SHADER_FRAGMENT) break; - si = d->Semantic.SemanticIndex; - switch (d->Semantic.SemanticName) { + si = d->Semantic.Index; + switch (d->Semantic.Name) { case TGSI_SEMANTIC_BCOLOR: p->cfg.two_side[si].hw = first; if (p->cfg.io_nr > first) @@ -2504,7 +2643,7 @@ nv50_program_tx_prep(struct nv50_pc *pc) for (i = 0, rid = 0; i < pc->result_nr; ++i) { p->cfg.io[i].hw = rid; - p->cfg.io[i].id_vp = i; + p->cfg.io[i].id = i; for (c = 0; c < 4; ++c) { int n = i * 4 + c; @@ -2527,6 +2666,8 @@ nv50_program_tx_prep(struct nv50_pc *pc) int rid, aid; unsigned n = 0, m = pc->attr_nr - flat_nr; + pc->allow32 = TRUE; + int base = (TGSI_SEMANTIC_POSITION == p->info.input_semantic_name[0]) ? 0 : 1; @@ -2534,14 +2675,12 @@ nv50_program_tx_prep(struct nv50_pc *pc) * the lower hardware IDs, so sort them: */ for (i = 0; i < pc->attr_nr; i++) { - if (pc->interp_mode[i] == INTERP_FLAT) { - p->cfg.io[m].id_vp = i + base; - p->cfg.io[m++].id_fp = i; - } else { + if (pc->interp_mode[i] == INTERP_FLAT) + p->cfg.io[m++].id = i; + else { if (!(pc->interp_mode[i] & INTERP_PERSPECTIVE)) p->cfg.io[n].linear = TRUE; - p->cfg.io[n].id_vp = i + base; - p->cfg.io[n++].id_fp = i; + p->cfg.io[n++].id = i; } } @@ -2553,7 +2692,13 @@ nv50_program_tx_prep(struct nv50_pc *pc) for (n = 0; n < pc->attr_nr; ++n) { p->cfg.io[n].hw = rid = aid; - i = p->cfg.io[n].id_fp; + i = p->cfg.io[n].id; + + if (p->info.input_semantic_name[n] == + TGSI_SEMANTIC_FACE) { + load_frontfacing(pc, &pc->attr[i * 4]); + continue; + } for (c = 0; c < 4; ++c) { if (!pc->attr[i * 4 + c].acc) @@ -2587,8 +2732,8 @@ nv50_program_tx_prep(struct nv50_pc *pc) for (i = 0; i < pc->attr_nr; i++) { ubyte si, sn; - sn = p->info.input_semantic_name[p->cfg.io[i].id_fp]; - si = p->info.input_semantic_index[p->cfg.io[i].id_fp]; + sn = p->info.input_semantic_name[p->cfg.io[i].id]; + si = p->info.input_semantic_index[p->cfg.io[i].id]; if (sn == TGSI_SEMANTIC_COLOR) { p->cfg.two_side[si] = p->cfg.io[i]; @@ -2613,6 +2758,10 @@ nv50_program_tx_prep(struct nv50_pc *pc) pc->result[2].rhw = rid; p->cfg.high_result = rid; + + /* separate/different colour results for MRTs ? */ + if (pc->result_nr - (p->info.writes_z ? 1 : 0) > 1) + p->cfg.regs[2] |= 1; } if (pc->immd_nr) { @@ -2743,7 +2892,7 @@ ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p) return FALSE; } for (i = 0; i < NV50_SU_MAX_ADDR; ++i) - ctor_reg(&pc->r_addr[i], P_ADDR, -1, i + 1); + ctor_reg(&pc->r_addr[i], P_ADDR, -256, i + 1); return TRUE; } @@ -2769,7 +2918,7 @@ nv50_fp_move_results(struct nv50_pc *pc) static void nv50_program_fixup_insns(struct nv50_pc *pc) { - struct nv50_program_exec *e, *prev = NULL, **bra_list; + struct nv50_program_exec *e, **bra_list; unsigned i, n, pos; bra_list = CALLOC(pc->p->exec_size, sizeof(struct nv50_program_exec *)); @@ -2781,6 +2930,16 @@ nv50_program_fixup_insns(struct nv50_pc *pc) if (e->param.index >= 0 && !e->param.mask) bra_list[n++] = e; + /* last instruction must be long so it can have the exit bit set */ + if (!is_long(pc->p->exec_tail)) + convert_to_long(pc, pc->p->exec_tail); + /* set exit bit */ + pc->p->exec_tail->inst[1] |= 1; + + /* !immd on exit insn simultaneously means !join */ + assert(!is_immd(pc->p->exec_head)); + assert(!is_immd(pc->p->exec_tail)); + /* Make sure we don't have any single 32 bit instructions. */ for (e = pc->p->exec_head, pos = 0; e; e = e->next) { pos += is_long(e) ? 2 : 1; @@ -2792,23 +2951,8 @@ nv50_program_fixup_insns(struct nv50_pc *pc) convert_to_long(pc, e); ++pos; } - if (e->next) - prev = e; } - assert(!is_immd(pc->p->exec_head)); - assert(!is_immd(pc->p->exec_tail)); - - /* last instruction must be long so it can have the end bit set */ - if (!is_long(pc->p->exec_tail)) { - convert_to_long(pc, pc->p->exec_tail); - if (prev) - convert_to_long(pc, prev); - } - assert(!(pc->p->exec_tail->inst[1] & 2)); - /* set the end-bit */ - pc->p->exec_tail->inst[1] |= 1; - FREE(bra_list); } @@ -2945,11 +3089,8 @@ static void nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) { struct nouveau_channel *chan = nv50->screen->base.channel; - struct nouveau_grobj *tesla = nv50->screen->tesla; struct nv50_program_exec *e; - struct nouveau_stateobj *so; - const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_WR; - unsigned start, count, *up, *ptr; + uint32_t *up, i; boolean upload = FALSE; if (!p->bo) { @@ -2964,32 +3105,37 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) if (!upload) return; - for (e = p->exec_head; e; e = e->next) { + up = MALLOC(p->exec_size * 4); + + for (i = 0, e = p->exec_head; e; e = e->next) { unsigned ei, ci, bs; - if (e->param.index < 0) - continue; + if (e->param.index >= 0 && e->param.mask) { + bs = (e->inst[1] >> 22) & 0x07; + assert(bs < 2); + ei = e->param.shift >> 5; + ci = e->param.index; + if (bs == 0) + ci += p->data[bs]->start; - if (e->param.mask == 0) { + e->inst[ei] &= ~e->param.mask; + e->inst[ei] |= (ci << e->param.shift); + } else + if (e->param.index >= 0) { + /* zero mask means param is a jump/branch offset */ assert(!(e->param.index & 1)); /* seem to be 8 byte steps */ ei = (e->param.index >> 1) + 0 /* START_ID */; e->inst[0] &= 0xf0000fff; e->inst[0] |= ei << 12; - continue; } - bs = (e->inst[1] >> 22) & 0x07; - assert(bs < 2); - ei = e->param.shift >> 5; - ci = e->param.index; - if (bs == 0) - ci += p->data[bs]->start; - - e->inst[ei] &= ~e->param.mask; - e->inst[ei] |= (ci << e->param.shift); + up[i++] = e->inst[0]; + if (is_long(e)) + up[i++] = e->inst[1]; } + assert(i == p->exec_size); if (p->data[0]) p->data_start[0] = p->data[0]->start; @@ -3002,45 +3148,12 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) NOUVEAU_ERR("0x%08x\n", e->inst[1]); } #endif - - up = ptr = MALLOC(p->exec_size * 4); - for (e = p->exec_head; e; e = e->next) { - *(ptr++) = e->inst[0]; - if (is_long(e)) - *(ptr++) = e->inst[1]; - } - - so = so_new(4,2); - so_method(so, nv50->screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3); - so_reloc (so, p->bo, 0, flags | NOUVEAU_BO_HIGH, 0, 0); - so_reloc (so, p->bo, 0, flags | NOUVEAU_BO_LOW, 0, 0); - so_data (so, (NV50_CB_PUPLOAD << 16) | 0x0800); //(p->exec_size * 4)); - - start = 0; count = p->exec_size; - while (count) { - struct nouveau_channel *chan = nv50->screen->base.channel; - unsigned nr; - - so_emit(chan, so); - - nr = MIN2(count, 2047); - nr = MIN2(chan->pushbuf->remaining, nr); - if (chan->pushbuf->remaining < (nr + 3)) { - FIRE_RING(chan); - continue; - } - - BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 1); - OUT_RING (chan, (start << 8) | NV50_CB_PUPLOAD); - BEGIN_RING(chan, tesla, NV50TCL_CB_DATA(0) | 0x40000000, nr); - OUT_RINGp (chan, up + start, nr); - - start += nr; - count -= nr; - } + nv50_upload_sifc(nv50, p->bo, 0, NOUVEAU_BO_VRAM, + NV50_2D_DST_FORMAT_R8_UNORM, 65536, 1, 262144, + up, NV50_2D_SIFC_FORMAT_R8_UNORM, 0, + 0, 0, p->exec_size * 4, 1, 1); FREE(up); - so_ref(NULL, &so); } void @@ -3122,15 +3235,15 @@ nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned base) struct nv50_program *vp = nv50->vertprog; unsigned i, c, m = base; - /* XXX: This can't work correctly in all cases yet, we either - * have to create TGSI_SEMANTIC_PNTC or sprite_coord_mode has - * to be per FP input instead of per VP output + /* XXX: this might not work correctly in all cases yet - we'll + * just assume that an FP generic input that is not written in + * the VP is PointCoord. */ memset(pntc, 0, 8 * sizeof(uint32_t)); for (i = 0; i < fp->cfg.io_nr; i++) { uint8_t sn, si; - uint8_t j = fp->cfg.io[i].id_vp, k = fp->cfg.io[i].id_fp; + uint8_t j, k = fp->cfg.io[i].id; unsigned n = popcnt4(fp->cfg.io[i].mask); if (fp->info.input_semantic_name[k] != TGSI_SEMANTIC_GENERIC) { @@ -3138,10 +3251,16 @@ nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned base) continue; } - sn = vp->info.input_semantic_name[j]; - si = vp->info.input_semantic_index[j]; + for (j = 0; j < vp->info.num_outputs; ++j) { + sn = vp->info.output_semantic_name[j]; + si = vp->info.output_semantic_index[j]; - if (j < fp->cfg.io_nr && sn == TGSI_SEMANTIC_GENERIC) { + if (sn == fp->info.input_semantic_name[k] && + si == fp->info.input_semantic_index[k]) + break; + } + + if (j < vp->info.num_outputs) { ubyte mode = nv50->rasterizer->pipe.sprite_coord_mode[si]; @@ -3229,20 +3348,24 @@ nv50_linkage_validate(struct nv50_context *nv50) reg[0] += m - 4; /* adjust FFC0 id */ reg[4] |= m << 8; /* set mid where 'normal' FP inputs start */ - i = 0; - if (fp->info.input_semantic_name[0] == TGSI_SEMANTIC_POSITION) - i = 1; - for (; i < fp->cfg.io_nr; i++) { - ubyte sn = fp->info.input_semantic_name[fp->cfg.io[i].id_fp]; - ubyte si = fp->info.input_semantic_index[fp->cfg.io[i].id_fp]; - - n = fp->cfg.io[i].id_vp; - if (n >= vp->cfg.io_nr || - vp->info.output_semantic_name[n] != sn || - vp->info.output_semantic_index[n] != si) - vpo = &dummy; - else - vpo = &vp->cfg.io[n]; + for (i = 0; i < fp->cfg.io_nr; i++) { + ubyte sn = fp->info.input_semantic_name[fp->cfg.io[i].id]; + ubyte si = fp->info.input_semantic_index[fp->cfg.io[i].id]; + + /* position must be mapped first */ + assert(i == 0 || sn != TGSI_SEMANTIC_POSITION); + + /* maybe even remove these from cfg.io */ + if (sn == TGSI_SEMANTIC_POSITION || sn == TGSI_SEMANTIC_FACE) + continue; + + /* VP outputs and vp->cfg.io are in the same order */ + for (n = 0; n < vp->info.num_outputs; ++n) { + if (vp->info.output_semantic_name[n] == sn && + vp->info.output_semantic_index[n] == si) + break; + } + vpo = (n < vp->info.num_outputs) ? &vp->cfg.io[n] : &dummy; m = nv50_sreg4_map(map, m, lin, &fp->cfg.io[i], vpo); } diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h index d78dee083f..255c7c737e 100644 --- a/src/gallium/drivers/nv50/nv50_program.h +++ b/src/gallium/drivers/nv50/nv50_program.h @@ -17,8 +17,7 @@ struct nv50_program_exec { struct nv50_sreg4 { uint8_t hw; - uint8_t id_vp; - uint8_t id_fp; + uint8_t id; /* tgsi index, nv50 needs them sorted: flat ones last */ uint8_t mask; boolean linear; diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index 63dce0f4c2..e1b2f11239 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -38,6 +38,11 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen, case PIPE_FORMAT_X8R8G8B8_UNORM: case PIPE_FORMAT_A8R8G8B8_UNORM: case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_R16G16B16A16_SNORM: + case PIPE_FORMAT_R16G16B16A16_UNORM: + case PIPE_FORMAT_R32G32B32A32_FLOAT: + case PIPE_FORMAT_R16G16_SNORM: + case PIPE_FORMAT_R16G16_UNORM: return TRUE; default: break; @@ -57,6 +62,8 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen, switch (format) { case PIPE_FORMAT_A8R8G8B8_UNORM: case PIPE_FORMAT_X8R8G8B8_UNORM: + case PIPE_FORMAT_A8R8G8B8_SRGB: + case PIPE_FORMAT_X8R8G8B8_SRGB: case PIPE_FORMAT_A1R5G5B5_UNORM: case PIPE_FORMAT_A4R4G4B4_UNORM: case PIPE_FORMAT_R5G6B5_UNORM: @@ -68,6 +75,13 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen, case PIPE_FORMAT_DXT1_RGBA: case PIPE_FORMAT_DXT3_RGBA: case PIPE_FORMAT_DXT5_RGBA: + case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_Z32_FLOAT: + case PIPE_FORMAT_R16G16B16A16_SNORM: + case PIPE_FORMAT_R16G16B16A16_UNORM: + case PIPE_FORMAT_R32G32B32A32_FLOAT: + case PIPE_FORMAT_R16G16_SNORM: + case PIPE_FORMAT_R16G16_UNORM: return TRUE; default: break; @@ -295,6 +309,12 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) so_method(so, screen->tesla, 0x121c, 1); so_data (so, 1); + /* activate all 32 lanes (threads) in a warp */ + so_method(so, screen->tesla, 0x19a0, 1); + so_data (so, 0x2); + so_method(so, screen->tesla, 0x1400, 1); + so_data (so, 0xf); + so_method(so, screen->tesla, 0x13bc, 1); so_data (so, 0x54); /* origin is top left (set to 1 for bottom left) */ diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index ffaa5e29d1..07318f2394 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -648,9 +648,9 @@ nv50_init_state_functions(struct nv50_context *nv50) nv50->pipe.delete_blend_state = nv50_blend_state_delete; nv50->pipe.create_sampler_state = nv50_sampler_state_create; - nv50->pipe.bind_sampler_states = nv50_sampler_state_bind; + nv50->pipe.bind_fragment_sampler_states = nv50_sampler_state_bind; nv50->pipe.delete_sampler_state = nv50_sampler_state_delete; - nv50->pipe.set_sampler_textures = nv50_set_sampler_texture; + nv50->pipe.set_fragment_sampler_textures = nv50_set_sampler_texture; nv50->pipe.create_rasterizer_state = nv50_rasterizer_state_create; nv50->pipe.bind_rasterizer_state = nv50_rasterizer_state_bind; diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index 012911f41b..c871acaab8 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -23,6 +23,12 @@ #include "nv50_context.h" #include "nouveau/nouveau_stateobj.h" +#define NV50_CBUF_FORMAT_CASE(n) \ + case PIPE_FORMAT_##n: so_data(so, NV50TCL_RT_FORMAT_##n); break + +#define NV50_ZETA_FORMAT_CASE(n) \ + case PIPE_FORMAT_##n: so_data(so, NV50TCL_ZETA_FORMAT_##n); break + static void nv50_state_validate_fb(struct nv50_context *nv50) { @@ -31,6 +37,15 @@ nv50_state_validate_fb(struct nv50_context *nv50) struct pipe_framebuffer_state *fb = &nv50->framebuffer; unsigned i, w, h, gw = 0; + /* Set nr of active RTs and select RT for each colour output. + * FP result 0 always goes to RT[0], bits 4 - 6 are ignored. + * Ambiguous assignment results in no rendering (no DATA_ERROR). + */ + so_method(so, tesla, 0x121c, 1); + so_data (so, fb->nr_cbufs | + (0 << 4) | (1 << 7) | (2 << 10) | (3 << 13) | + (4 << 16) | (5 << 19) | (6 << 22) | (7 << 25)); + for (i = 0; i < fb->nr_cbufs; i++) { struct pipe_texture *pt = fb->cbufs[i]->texture; struct nouveau_bo *bo = nv50_miptree(pt)->base.bo; @@ -54,15 +69,14 @@ nv50_state_validate_fb(struct nv50_context *nv50) so_reloc (so, bo, fb->cbufs[i]->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW | NOUVEAU_BO_RDWR, 0, 0); switch (fb->cbufs[i]->format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - so_data(so, NV50TCL_RT_FORMAT_A8R8G8B8_UNORM); - break; - case PIPE_FORMAT_X8R8G8B8_UNORM: - so_data(so, NV50TCL_RT_FORMAT_X8R8G8B8_UNORM); - break; - case PIPE_FORMAT_R5G6B5_UNORM: - so_data(so, NV50TCL_RT_FORMAT_R5G6B5_UNORM); - break; + NV50_CBUF_FORMAT_CASE(A8R8G8B8_UNORM); + NV50_CBUF_FORMAT_CASE(X8R8G8B8_UNORM); + NV50_CBUF_FORMAT_CASE(R5G6B5_UNORM); + NV50_CBUF_FORMAT_CASE(R16G16B16A16_SNORM); + NV50_CBUF_FORMAT_CASE(R16G16B16A16_UNORM); + NV50_CBUF_FORMAT_CASE(R32G32B32A32_FLOAT); + NV50_CBUF_FORMAT_CASE(R16G16_SNORM); + NV50_CBUF_FORMAT_CASE(R16G16_UNORM); default: NOUVEAU_ERR("AIIII unknown format %s\n", pf_name(fb->cbufs[i]->format)); @@ -96,18 +110,10 @@ nv50_state_validate_fb(struct nv50_context *nv50) so_reloc (so, bo, fb->zsbuf->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW | NOUVEAU_BO_RDWR, 0, 0); switch (fb->zsbuf->format) { - case PIPE_FORMAT_Z32_FLOAT: - so_data(so, NV50TCL_ZETA_FORMAT_Z32_FLOAT); - break; - case PIPE_FORMAT_Z24S8_UNORM: - so_data(so, NV50TCL_ZETA_FORMAT_Z24S8_UNORM); - break; - case PIPE_FORMAT_X8Z24_UNORM: - so_data(so, NV50TCL_ZETA_FORMAT_X8Z24_UNORM); - break; - case PIPE_FORMAT_S8Z24_UNORM: - so_data(so, NV50TCL_ZETA_FORMAT_S8Z24_UNORM); - break; + NV50_ZETA_FORMAT_CASE(S8Z24_UNORM); + NV50_ZETA_FORMAT_CASE(X8Z24_UNORM); + NV50_ZETA_FORMAT_CASE(Z24S8_UNORM); + NV50_ZETA_FORMAT_CASE(Z32_FLOAT); default: NOUVEAU_ERR("AIIII unknown format %s\n", pf_name(fb->zsbuf->format)); @@ -124,6 +130,9 @@ nv50_state_validate_fb(struct nv50_context *nv50) so_data (so, fb->zsbuf->width); so_data (so, fb->zsbuf->height); so_data (so, 0x00010001); + } else { + so_method(so, tesla, 0x1538, 1); + so_data (so, 0); } so_method(so, tesla, NV50TCL_VIEWPORT_HORIZ, 2); @@ -192,7 +201,8 @@ nv50_state_emit(struct nv50_context *nv50) so_emit(chan, nv50->state.vertprog); if (nv50->state.dirty & NV50_NEW_FRAGPROG) so_emit(chan, nv50->state.fragprog); - if (nv50->state.dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG)) + if (nv50->state.dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG | + NV50_NEW_RASTERIZER)) so_emit(chan, nv50->state.programs); if (nv50->state.dirty & NV50_NEW_RASTERIZER) so_emit(chan, nv50->state.rast); @@ -255,7 +265,8 @@ nv50_state_validate(struct nv50_context *nv50) if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_FRAGPROG_CB)) nv50_fragprog_validate(nv50); - if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG)) + if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG | + NV50_NEW_RASTERIZER)) nv50_linkage_validate(nv50); if (nv50->dirty & NV50_NEW_RASTERIZER) @@ -276,7 +287,7 @@ nv50_state_validate(struct nv50_context *nv50) so = so_new(33, 0); so_method(so, tesla, NV50TCL_POLYGON_STIPPLE_PATTERN(0), 32); for (i = 0; i < 32; i++) - so_data(so, nv50->stipple.stipple[i]); + so_data(so, util_bswap32(nv50->stipple.stipple[i])); so_ref(so, &nv50->state.stipple); so_ref(NULL, &so); } diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c index e12a6ad648..417d367942 100644 --- a/src/gallium/drivers/nv50/nv50_tex.c +++ b/src/gallium/drivers/nv50/nv50_tex.c @@ -25,16 +25,18 @@ #include "nouveau/nouveau_stateobj.h" -#define _(pf, tt, r, g, b, a, tf) \ +#define _MIXED(pf, t0, t1, t2, t3, cr, cg, cb, ca, f) \ { \ PIPE_FORMAT_##pf, \ - NV50TIC_0_0_MAPR_##r | NV50TIC_0_0_TYPER_##tt | \ - NV50TIC_0_0_MAPG_##g | NV50TIC_0_0_TYPEG_##tt | \ - NV50TIC_0_0_MAPB_##b | NV50TIC_0_0_TYPEB_##tt | \ - NV50TIC_0_0_MAPA_##a | NV50TIC_0_0_TYPEA_##tt | \ - NV50TIC_0_0_FMT_##tf \ + NV50TIC_0_0_MAPR_##cr | NV50TIC_0_0_TYPER_##t0 | \ + NV50TIC_0_0_MAPG_##cg | NV50TIC_0_0_TYPEG_##t1 | \ + NV50TIC_0_0_MAPB_##cb | NV50TIC_0_0_TYPEB_##t2 | \ + NV50TIC_0_0_MAPA_##ca | NV50TIC_0_0_TYPEA_##t3 | \ + NV50TIC_0_0_FMT_##f \ } +#define _(pf, t, cr, cg, cb, ca, f) _MIXED(pf, t, t, t, t, cr, cg, cb, ca, f) + struct nv50_texture_format { enum pipe_format pf; uint32_t hw; @@ -46,7 +48,9 @@ struct nv50_texture_format { static const struct nv50_texture_format nv50_tex_format_list[] = { _(A8R8G8B8_UNORM, UNORM, C2, C1, C0, C3, 8_8_8_8), + _(A8R8G8B8_SRGB, UNORM, C2, C1, C0, C3, 8_8_8_8), _(X8R8G8B8_UNORM, UNORM, C2, C1, C0, ONE, 8_8_8_8), + _(X8R8G8B8_SRGB, UNORM, C2, C1, C0, ONE, 8_8_8_8), _(A1R5G5B5_UNORM, UNORM, C2, C1, C0, C3, 1_5_5_5), _(A4R4G4B4_UNORM, UNORM, C2, C1, C0, C3, 4_4_4_4), @@ -61,16 +65,30 @@ static const struct nv50_texture_format nv50_tex_format_list[] = _(DXT1_RGB, UNORM, C0, C1, C2, ONE, DXT1), _(DXT1_RGBA, UNORM, C0, C1, C2, C3, DXT1), _(DXT3_RGBA, UNORM, C0, C1, C2, C3, DXT3), - _(DXT5_RGBA, UNORM, C0, C1, C2, C3, DXT5) + _(DXT5_RGBA, UNORM, C0, C1, C2, C3, DXT5), + + _MIXED(Z24S8_UNORM, UINT, UNORM, UINT, UINT, C1, C1, C1, ONE, 24_8), + + _(R16G16B16A16_SNORM, UNORM, C0, C1, C2, C3, 16_16_16_16), + _(R16G16B16A16_UNORM, SNORM, C0, C1, C2, C3, 16_16_16_16), + _(R32G32B32A32_FLOAT, FLOAT, C0, C1, C2, C3, 32_32_32_32), + + _(R16G16_SNORM, SNORM, C0, C1, ZERO, ONE, 16_16), + _(R16G16_UNORM, UNORM, C0, C1, ZERO, ONE, 16_16), + + _MIXED(Z32_FLOAT, FLOAT, UINT, UINT, UINT, C0, C0, C0, ONE, 32_DEPTH) + }; #undef _ +#undef _MIXED static int nv50_tex_construct(struct nv50_context *nv50, struct nouveau_stateobj *so, struct nv50_miptree *mt, int unit) { unsigned i; + uint32_t mode; for (i = 0; i < NV50_TEX_FORMAT_LIST_SIZE; i++) if (nv50_tex_format_list[i].pf == mt->base.base.format) @@ -78,17 +96,44 @@ nv50_tex_construct(struct nv50_context *nv50, struct nouveau_stateobj *so, if (i == NV50_TEX_FORMAT_LIST_SIZE) return 1; + if (nv50->sampler[unit]->normalized) + mode = 0x50001000 | (1 << 31); + else { + mode = 0x50001000 | (7 << 14); + assert(mt->base.base.target == PIPE_TEXTURE_2D); + } + + mode |= ((mt->base.bo->tile_mode & 0x0f) << 22) | + ((mt->base.bo->tile_mode & 0xf0) << 21); + + if (pf_type(mt->base.base.format) == PIPE_FORMAT_TYPE_SRGB) + mode |= 0x0400; + + switch (mt->base.base.target) { + case PIPE_TEXTURE_1D: + break; + case PIPE_TEXTURE_2D: + mode |= (1 << 14); + break; + case PIPE_TEXTURE_3D: + mode |= (2 << 14); + break; + case PIPE_TEXTURE_CUBE: + mode |= (3 << 14); + break; + default: + assert(!"unsupported texture target"); + break; + } + so_data (so, nv50_tex_format_list[i].hw); so_reloc(so, mt->base.bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW | - NOUVEAU_BO_RD, 0, 0); - if (nv50->sampler[unit]->normalized) - so_data (so, 0xd0005000 | mt->base.bo->tile_mode << 22); - else - so_data (so, 0x5001d000 | mt->base.bo->tile_mode << 22); + NOUVEAU_BO_RD, 0, 0); + so_data (so, mode); so_data (so, 0x00300000); - so_data (so, mt->base.base.width[0]); + so_data (so, mt->base.base.width0 | (1 << 31)); so_data (so, (mt->base.base.last_level << 28) | - (mt->base.base.depth[0] << 16) | mt->base.base.height[0]); + (mt->base.base.depth0 << 16) | mt->base.base.height0); so_data (so, 0x03000000); so_data (so, mt->base.base.last_level << 4); @@ -104,7 +149,7 @@ nv50_tex_validate(struct nv50_context *nv50) unsigned i, unit, push; push = MAX2(nv50->miptree_nr, nv50->state.miptree_nr) * 2 + 23 + 6; - so = so_new(nv50->miptree_nr * 9 + push, nv50->miptree_nr + 2); + so = so_new(nv50->miptree_nr * 9 + push, nv50->miptree_nr * 2 + 2); nv50_so_init_sifc(nv50, so, nv50->screen->tic, NOUVEAU_BO_VRAM, nv50->miptree_nr * 8 * 4); diff --git a/src/gallium/drivers/nv50/nv50_texture.h b/src/gallium/drivers/nv50/nv50_texture.h index 13f74c11c6..d531e61132 100644 --- a/src/gallium/drivers/nv50/nv50_texture.h +++ b/src/gallium/drivers/nv50/nv50_texture.h @@ -38,18 +38,26 @@ #define NV50TIC_0_0_TYPEA_MASK 0x00038000 #define NV50TIC_0_0_TYPEA_UNORM 0x00010000 #define NV50TIC_0_0_TYPEA_SNORM 0x00008000 +#define NV50TIC_0_0_TYPEA_SINT 0x00018000 +#define NV50TIC_0_0_TYPEA_UINT 0x00020000 #define NV50TIC_0_0_TYPEA_FLOAT 0x00038000 #define NV50TIC_0_0_TYPEB_MASK 0x00007000 #define NV50TIC_0_0_TYPEB_UNORM 0x00002000 #define NV50TIC_0_0_TYPEB_SNORM 0x00001000 +#define NV50TIC_0_0_TYPEB_SINT 0x00003000 +#define NV50TIC_0_0_TYPEB_UINT 0x00004000 #define NV50TIC_0_0_TYPEB_FLOAT 0x00007000 #define NV50TIC_0_0_TYPEG_MASK 0x00000e00 #define NV50TIC_0_0_TYPEG_UNORM 0x00000400 #define NV50TIC_0_0_TYPEG_SNORM 0x00000200 +#define NV50TIC_0_0_TYPEG_SINT 0x00000600 +#define NV50TIC_0_0_TYPEG_UINT 0x00000800 #define NV50TIC_0_0_TYPEG_FLOAT 0x00000e00 #define NV50TIC_0_0_TYPER_MASK 0x000001c0 #define NV50TIC_0_0_TYPER_UNORM 0x00000080 #define NV50TIC_0_0_TYPER_SNORM 0x00000040 +#define NV50TIC_0_0_TYPER_SINT 0x000000c0 +#define NV50TIC_0_0_TYPER_UINT 0x00000100 #define NV50TIC_0_0_TYPER_FLOAT 0x000001c0 #define NV50TIC_0_0_FMT_MASK 0x0000003f #define NV50TIC_0_0_FMT_32_32_32_32 0x00000001 @@ -57,6 +65,7 @@ #define NV50TIC_0_0_FMT_32_32 0x00000004 #define NV50TIC_0_0_FMT_8_8_8_8 0x00000008 #define NV50TIC_0_0_FMT_2_10_10_10 0x00000009 +#define NV50TIC_0_0_FMT_16_16 0x0000000c #define NV50TIC_0_0_FMT_32 0x0000000f #define NV50TIC_0_0_FMT_4_4_4_4 0x00000012 /* #define NV50TIC_0_0_FMT_1_5_5_5 0x00000013 */ @@ -65,12 +74,16 @@ #define NV50TIC_0_0_FMT_8_8 0x00000018 #define NV50TIC_0_0_FMT_16 0x0000001b #define NV50TIC_0_0_FMT_8 0x0000001d +#define NV50TIC_0_0_FMT_5_9_9_9 0x00000020 #define NV50TIC_0_0_FMT_10_11_11 0x00000021 #define NV50TIC_0_0_FMT_DXT1 0x00000024 #define NV50TIC_0_0_FMT_DXT3 0x00000025 #define NV50TIC_0_0_FMT_DXT5 0x00000026 #define NV50TIC_0_0_FMT_RGTC1 0x00000027 #define NV50TIC_0_0_FMT_RGTC2 0x00000028 +#define NV50TIC_0_0_FMT_24_8 0x00000029 +#define NV50TIC_0_0_FMT_32_DEPTH 0x0000002f +#define NV50TIC_0_0_FMT_32_8 0x00000030 #define NV50TIC_0_1_OFFSET_LOW_MASK 0xffffffff #define NV50TIC_0_1_OFFSET_LOW_SHIFT 0 diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c index 9c289026bb..39d65279fc 100644 --- a/src/gallium/drivers/nv50/nv50_transfer.c +++ b/src/gallium/drivers/nv50/nv50_transfer.c @@ -1,6 +1,7 @@ #include "pipe/p_context.h" #include "pipe/p_inlines.h" +#include "util/u_math.h" #include "nv50_context.h" @@ -12,6 +13,7 @@ struct nv50_transfer { int level_pitch; int level_width; int level_height; + int level_depth; int level_x; int level_y; }; @@ -20,10 +22,10 @@ static void nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, struct nouveau_bo *src_bo, unsigned src_offset, int src_pitch, unsigned src_tile_mode, - int sx, int sy, int sw, int sh, + int sx, int sy, int sw, int sh, int sd, struct nouveau_bo *dst_bo, unsigned dst_offset, int dst_pitch, unsigned dst_tile_mode, - int dx, int dy, int dw, int dh, + int dx, int dy, int dw, int dh, int dd, int cpp, int width, int height, unsigned src_reloc, unsigned dst_reloc) { @@ -51,7 +53,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, OUT_RING (chan, src_tile_mode << 4); OUT_RING (chan, sw * cpp); OUT_RING (chan, sh); - OUT_RING (chan, 1); + OUT_RING (chan, sd); OUT_RING (chan, 0); } @@ -70,7 +72,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, OUT_RING (chan, dst_tile_mode << 4); OUT_RING (chan, dw * cpp); OUT_RING (chan, dh); - OUT_RING (chan, 1); + OUT_RING (chan, dd); OUT_RING (chan, 0); } @@ -114,6 +116,20 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, } } +static INLINE unsigned +get_zslice_offset(unsigned tile_mode, unsigned z, unsigned pitch, unsigned ny) +{ + unsigned tile_h = get_tile_height(tile_mode); + unsigned tile_d = get_tile_depth(tile_mode); + + /* pitch_2d == to next slice within this volume-tile */ + /* pitch_3d == to next slice in next 2D array of blocks */ + unsigned pitch_2d = tile_h * 64; + unsigned pitch_3d = tile_d * align(ny, tile_h) * pitch; + + return (z % tile_d) * pitch_2d + (z / tile_d) * pitch_3d; +} + static struct pipe_transfer * nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, unsigned face, unsigned level, unsigned zslice, @@ -124,14 +140,11 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, struct nv50_miptree *mt = nv50_miptree(pt); struct nv50_miptree_level *lvl = &mt->level[level]; struct nv50_transfer *tx; - unsigned image = 0; + unsigned nx, ny, image = 0; int ret; if (pt->target == PIPE_TEXTURE_CUBE) image = face; - else - if (pt->target == PIPE_TEXTURE_3D) - image = zslice; tx = CALLOC_STRUCT(nv50_transfer); if (!tx) @@ -142,34 +155,52 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->base.width = w; tx->base.height = h; tx->base.block = pt->block; - tx->base.nblocksx = pt->nblocksx[level]; - tx->base.nblocksy = pt->nblocksy[level]; - tx->base.stride = (w * pt->block.size); + if (!pt->nblocksx[level]) { + tx->base.nblocksx = pf_get_nblocksx(&pt->block, + u_minify(pt->width0, level)); + tx->base.nblocksy = pf_get_nblocksy(&pt->block, + u_minify(pt->height0, level)); + } else { + tx->base.nblocksx = pt->nblocksx[level]; + tx->base.nblocksy = pt->nblocksy[level]; + } + tx->base.stride = tx->base.nblocksx * pt->block.size; tx->base.usage = usage; tx->level_pitch = lvl->pitch; - tx->level_width = mt->base.base.width[level]; - tx->level_height = mt->base.base.height[level]; + tx->level_width = u_minify(mt->base.base.width0, level); + tx->level_height = u_minify(mt->base.base.height0, level); + tx->level_depth = u_minify(mt->base.base.depth0, level); tx->level_offset = lvl->image_offset[image]; tx->level_tiling = lvl->tile_mode; - tx->level_x = x; - tx->level_y = y; + tx->level_x = pf_get_nblocksx(&tx->base.block, x); + tx->level_y = pf_get_nblocksy(&tx->base.block, y); ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, - w * pt->block.size * h, &tx->bo); + tx->base.nblocksy * tx->base.stride, &tx->bo); if (ret) { FREE(tx); return NULL; } + if (pt->target == PIPE_TEXTURE_3D) + tx->level_offset += get_zslice_offset(lvl->tile_mode, zslice, + lvl->pitch, + tx->base.nblocksy); + if (usage & PIPE_TRANSFER_READ) { + nx = pf_get_nblocksx(&tx->base.block, tx->base.width); + ny = pf_get_nblocksy(&tx->base.block, tx->base.height); + nv50_transfer_rect_m2mf(pscreen, mt->base.bo, tx->level_offset, tx->level_pitch, tx->level_tiling, x, y, - tx->level_width, tx->level_height, - tx->bo, 0, tx->base.stride, - tx->bo->tile_mode, 0, 0, - tx->base.width, tx->base.height, - tx->base.block.size, w, h, + tx->base.nblocksx, tx->base.nblocksy, + tx->level_depth, + tx->bo, 0, + tx->base.stride, tx->bo->tile_mode, + 0, 0, + tx->base.nblocksx, tx->base.nblocksy, 1, + tx->base.block.size, nx, ny, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART, NOUVEAU_BO_GART); } @@ -183,17 +214,22 @@ nv50_transfer_del(struct pipe_transfer *ptx) struct nv50_transfer *tx = (struct nv50_transfer *)ptx; struct nv50_miptree *mt = nv50_miptree(ptx->texture); + unsigned nx = pf_get_nblocksx(&tx->base.block, tx->base.width); + unsigned ny = pf_get_nblocksy(&tx->base.block, tx->base.height); + if (ptx->usage & PIPE_TRANSFER_WRITE) { struct pipe_screen *pscreen = ptx->texture->screen; - nv50_transfer_rect_m2mf(pscreen, tx->bo, 0, tx->base.stride, - tx->bo->tile_mode, 0, 0, - tx->base.width, tx->base.height, + + nv50_transfer_rect_m2mf(pscreen, tx->bo, 0, + tx->base.stride, tx->bo->tile_mode, + 0, 0, + tx->base.nblocksx, tx->base.nblocksy, 1, mt->base.bo, tx->level_offset, tx->level_pitch, tx->level_tiling, tx->level_x, tx->level_y, - tx->level_width, tx->level_height, - tx->base.block.size, tx->base.width, - tx->base.height, + tx->base.nblocksx, tx->base.nblocksy, + tx->level_depth, + tx->base.block.size, nx, ny, NOUVEAU_BO_GART, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART); } @@ -237,3 +273,89 @@ nv50_transfer_init_screen_functions(struct pipe_screen *pscreen) pscreen->transfer_map = nv50_transfer_map; pscreen->transfer_unmap = nv50_transfer_unmap; } + +void +nv50_upload_sifc(struct nv50_context *nv50, + struct nouveau_bo *bo, unsigned dst_offset, unsigned reloc, + unsigned dst_format, int dst_w, int dst_h, int dst_pitch, + void *src, unsigned src_format, int src_pitch, + int x, int y, int w, int h, int cpp) +{ + struct nouveau_channel *chan = nv50->screen->base.channel; + struct nouveau_grobj *eng2d = nv50->screen->eng2d; + struct nouveau_grobj *tesla = nv50->screen->tesla; + unsigned line_dwords = (w * cpp + 3) / 4; + + reloc |= NOUVEAU_BO_WR; + + WAIT_RING (chan, 32); + + if (bo->tile_flags) { + BEGIN_RING(chan, eng2d, NV50_2D_DST_FORMAT, 5); + OUT_RING (chan, dst_format); + OUT_RING (chan, 0); + OUT_RING (chan, bo->tile_mode << 4); + OUT_RING (chan, 1); + OUT_RING (chan, 0); + } else { + BEGIN_RING(chan, eng2d, NV50_2D_DST_FORMAT, 2); + OUT_RING (chan, dst_format); + OUT_RING (chan, 1); + BEGIN_RING(chan, eng2d, NV50_2D_DST_PITCH, 1); + OUT_RING (chan, dst_pitch); + } + + BEGIN_RING(chan, eng2d, NV50_2D_DST_WIDTH, 4); + OUT_RING (chan, dst_w); + OUT_RING (chan, dst_h); + OUT_RELOCh(chan, bo, dst_offset, reloc); + OUT_RELOCl(chan, bo, dst_offset, reloc); + + /* NV50_2D_OPERATION_SRCCOPY assumed already set */ + + BEGIN_RING(chan, eng2d, NV50_2D_SIFC_UNK0800, 2); + OUT_RING (chan, 0); + OUT_RING (chan, src_format); + BEGIN_RING(chan, eng2d, NV50_2D_SIFC_WIDTH, 10); + OUT_RING (chan, w); + OUT_RING (chan, h); + OUT_RING (chan, 0); + OUT_RING (chan, 1); + OUT_RING (chan, 0); + OUT_RING (chan, 1); + OUT_RING (chan, 0); + OUT_RING (chan, x); + OUT_RING (chan, 0); + OUT_RING (chan, y); + + while (h--) { + const uint32_t *p = src; + unsigned count = line_dwords; + + while (count) { + unsigned nr = MIN2(count, 1792); + + if (chan->pushbuf->remaining <= nr) { + FIRE_RING (chan); + + BEGIN_RING(chan, eng2d, + NV50_2D_DST_ADDRESS_HIGH, 2); + OUT_RELOCh(chan, bo, dst_offset, reloc); + OUT_RELOCl(chan, bo, dst_offset, reloc); + } + assert(chan->pushbuf->remaining > nr); + + BEGIN_RING(chan, eng2d, + NV50_2D_SIFC_DATA | (2 << 29), nr); + OUT_RINGp (chan, p, nr); + + p += nr; + count -= nr; + } + + src += src_pitch; + } + + BEGIN_RING(chan, tesla, 0x1440, 1); + OUT_RING (chan, 0); +} diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile index f73d80de88..d13bb7a36b 100644 --- a/src/gallium/drivers/r300/Makefile +++ b/src/gallium/drivers/r300/Makefile @@ -17,6 +17,7 @@ C_SOURCES = \ r300_state.c \ r300_state_derived.c \ r300_state_invariant.c \ + r300_vbo.c \ r300_vs.c \ r300_texture.c \ r300_tgsi_to_rc.c diff --git a/src/gallium/drivers/r300/SConscript b/src/gallium/drivers/r300/SConscript index b4c8ba2015..97989040d2 100644 --- a/src/gallium/drivers/r300/SConscript +++ b/src/gallium/drivers/r300/SConscript @@ -9,8 +9,6 @@ env.Append(CPPPATH = ['#/src/mesa/drivers/dri/r300/compiler', '#/include', '#/sr r300 = env.ConvenienceLibrary( target = 'r300', source = [ - 'r3xx_fs.c', - 'r5xx_fs.c', 'r300_chipset.c', 'r300_clear.c', 'r300_context.c', @@ -25,7 +23,6 @@ r300 = env.ConvenienceLibrary( 'r300_state_derived.c', 'r300_state_invariant.c', 'r300_vs.c', - 'r300_surface.c', 'r300_texture.c', 'r300_tgsi_to_rc.c', ] + r300compiler) + r300compiler diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index c34fbb1123..769733b6dd 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -22,8 +22,6 @@ #include "draw/draw_context.h" -#include "pipe/p_inlines.h" - #include "tgsi/tgsi_scan.h" #include "util/u_hash_table.h" @@ -71,31 +69,32 @@ static void r300_destroy_context(struct pipe_context* context) FREE(r300->blend_color_state); FREE(r300->rs_block); FREE(r300->scissor_state); + FREE(r300->vertex_info); FREE(r300->viewport_state); FREE(r300); } static unsigned int -r300_is_texture_referenced( struct pipe_context *pipe, - struct pipe_texture *texture, - unsigned face, unsigned level) +r300_is_texture_referenced(struct pipe_context *pipe, + struct pipe_texture *texture, + unsigned face, unsigned level) { - /** - * FIXME: Optimize. - */ + struct pipe_buffer* buf = 0; + + r300_get_texture_buffer(texture, &buf, NULL); - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; + return pipe->is_buffer_referenced(pipe, buf); } static unsigned int -r300_is_buffer_referenced( struct pipe_context *pipe, - struct pipe_buffer *buf) +r300_is_buffer_referenced(struct pipe_context *pipe, + struct pipe_buffer *buf) { - /** - * FIXME: Optimize. - */ - - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; + /* This only checks to see whether actual hardware buffers are + * referenced. Since we use managed BOs and transfers, it's actually not + * possible for pipe_buffers to ever reference the actual hardware, so + * buffers are never referenced. */ + return 0; } static void r300_flush_cb(void *data) @@ -109,6 +108,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, struct r300_winsys* r300_winsys) { struct r300_context* r300 = CALLOC_STRUCT(r300_context); + struct r300_screen* r300screen = r300_screen(screen); if (!r300) return NULL; @@ -124,9 +124,25 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->context.clear = r300_clear; - r300->context.draw_arrays = r300_draw_arrays; - r300->context.draw_elements = r300_draw_elements; - r300->context.draw_range_elements = r300_swtcl_draw_range_elements; + if (r300screen->caps->has_tcl) { + r300->context.draw_arrays = r300_draw_arrays; + r300->context.draw_elements = r300_draw_elements; + r300->context.draw_range_elements = r300_draw_range_elements; + } else { + r300->context.draw_arrays = r300_swtcl_draw_arrays; + r300->context.draw_elements = r300_draw_elements; + r300->context.draw_range_elements = r300_swtcl_draw_range_elements; + + /* Create a Draw. This is used for SW TCL. */ + r300->draw = draw_create(); + /* Enable our renderer. */ + draw_set_rasterize_stage(r300->draw, r300_draw_stage(r300)); + /* Enable Draw's clipping. */ + draw_set_driver_clipping(r300->draw, FALSE); + /* Force Draw to never do viewport transform, since we can do + * transform in hardware, always. */ + draw_set_viewport_state(r300->draw, &r300_viewport_identity); + } r300->context.is_texture_referenced = r300_is_texture_referenced; r300->context.is_buffer_referenced = r300_is_buffer_referenced; @@ -137,21 +153,13 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->blend_color_state = CALLOC_STRUCT(r300_blend_color_state); r300->rs_block = CALLOC_STRUCT(r300_rs_block); r300->scissor_state = CALLOC_STRUCT(r300_scissor_state); + r300->vertex_info = CALLOC_STRUCT(r300_vertex_info); r300->viewport_state = CALLOC_STRUCT(r300_viewport_state); - /* Create a Draw. This is used for vert collation and SW TCL. */ - r300->draw = draw_create(); - /* Enable our renderer. */ - draw_set_rasterize_stage(r300->draw, r300_draw_stage(r300)); - /* Disable Draw's clipping if TCL is present. */ - draw_set_driver_clipping(r300->draw, r300_screen(screen)->caps->has_tcl); - /* Force Draw to never do viewport transform, since (again) we can do - * transform in hardware, always. */ - draw_set_viewport_state(r300->draw, &r300_viewport_identity); - /* Open up the OQ BO. */ r300->oqbo = screen->buffer_create(screen, 4096, PIPE_BUFFER_USAGE_VERTEX, 4096); + make_empty_list(&r300->query_list); r300_init_flush_functions(r300); @@ -166,6 +174,5 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->winsys->set_flush_cb(r300->winsys, r300_flush_cb, r300); r300->dirty_state = R300_NEW_KITCHEN_SINK; r300->dirty_hw++; - make_empty_list(&r300->query_list); return &r300->context; } diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 30b80fa9db..39c0914cff 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -26,6 +26,7 @@ #include "draw/draw_vertex.h" #include "pipe/p_context.h" +#include "pipe/p_inlines.h" struct r300_fragment_shader; struct r300_vertex_shader; @@ -33,6 +34,7 @@ struct r300_vertex_shader; struct r300_blend_state { uint32_t blend_control; /* R300_RB3D_CBLEND: 0x4e04 */ uint32_t alpha_blend_control; /* R300_RB3D_ABLEND: 0x4e08 */ + uint32_t color_channel_mask; /* R300_RB3D_COLOR_CHANNEL_MASK: 0x4e0c */ uint32_t rop; /* R300_RB3D_ROPCNTL: 0x4e18 */ uint32_t dither; /* R300_RB3D_DITHER_CTL: 0x4e50 */ }; @@ -76,6 +78,7 @@ struct r300_rs_state { uint32_t line_stipple_config; /* R300_GA_LINE_STIPPLE_CONFIG: 0x4328 */ uint32_t line_stipple_value; /* R300_GA_LINE_STIPPLE_VALUE: 0x4260 */ uint32_t color_control; /* R300_GA_COLOR_CONTROL: 0x4278 */ + uint32_t polygon_mode; /* R300_GA_POLY_MODE: 0x4288 */ }; struct r300_rs_block { @@ -89,6 +92,10 @@ struct r300_sampler_state { uint32_t filter0; /* R300_TX_FILTER0: 0x4400 */ uint32_t filter1; /* R300_TX_FILTER1: 0x4440 */ uint32_t border_color; /* R300_TX_BORDER_COLOR: 0x45c0 */ + + /* Min/max LOD must be clamped to [0, last_level], thus + * it's dependent on a currently bound texture */ + unsigned min_lod, max_lod; }; struct r300_scissor_state { @@ -119,10 +126,10 @@ struct r300_ztop_state { #define R300_NEW_BLEND 0x00000001 #define R300_NEW_BLEND_COLOR 0x00000002 #define R300_NEW_CLIP 0x00000004 -#define R300_NEW_CONSTANTS 0x00000008 -#define R300_NEW_DSA 0x00000010 -#define R300_NEW_FRAMEBUFFERS 0x00000020 -#define R300_NEW_FRAGMENT_SHADER 0x00000040 +#define R300_NEW_DSA 0x00000008 +#define R300_NEW_FRAMEBUFFERS 0x00000010 +#define R300_NEW_FRAGMENT_SHADER 0x00000020 +#define R300_NEW_FRAGMENT_SHADER_CONSTANTS 0x00000040 #define R300_NEW_RASTERIZER 0x00000080 #define R300_NEW_RS_BLOCK 0x00000100 #define R300_NEW_SAMPLER 0x00000200 @@ -132,9 +139,10 @@ struct r300_ztop_state { #define R300_ANY_NEW_TEXTURES 0x03fc0000 #define R300_NEW_VERTEX_FORMAT 0x04000000 #define R300_NEW_VERTEX_SHADER 0x08000000 -#define R300_NEW_VIEWPORT 0x10000000 -#define R300_NEW_QUERY 0x20000000 -#define R300_NEW_KITCHEN_SINK 0x3fffffff +#define R300_NEW_VERTEX_SHADER_CONSTANTS 0x10000000 +#define R300_NEW_VIEWPORT 0x20000000 +#define R300_NEW_QUERY 0x40000000 +#define R300_NEW_KITCHEN_SINK 0x7fffffff /* The next several objects are not pure Radeon state; they inherit from * various Gallium classes. */ @@ -181,6 +189,12 @@ struct r300_texture { /* Offsets into the buffer. */ unsigned offset[PIPE_MAX_TEXTURE_LEVELS]; + /* A pitch for each mip-level */ + unsigned pitch[PIPE_MAX_TEXTURE_LEVELS]; + + /* Size of one zslice or face based on the texture target */ + unsigned layer_size[PIPE_MAX_TEXTURE_LEVELS]; + /** * If non-zero, override the natural texture layout with * a custom stride (in bytes). @@ -194,6 +208,11 @@ struct r300_texture { /* Total size of this texture, in bytes. */ unsigned size; + /* Whether this texture has non-power-of-two dimensions. + * It can be either a regular texture or a rectangle one. + */ + boolean is_npot; + /* Pipe buffer backing this texture. */ struct pipe_buffer* buffer; @@ -201,18 +220,14 @@ struct r300_texture { struct r300_texture_state state; }; -struct r300_vertex_format { +struct r300_vertex_info { /* Parent class */ struct vertex_info vinfo; + /* R300_VAP_PROG_STREAK_CNTL_[0-7] */ uint32_t vap_prog_stream_cntl[8]; /* R300_VAP_PROG_STREAK_CNTL_EXT_[0-7] */ uint32_t vap_prog_stream_cntl_ext[8]; - /* Map of vertex attributes into PVS memory for HW TCL, - * or GA memory for SW TCL. */ - int vs_tab[16]; - /* Map of rasterizer attributes from GB through RS to US. */ - int fs_tab[16]; }; extern struct pipe_viewport_state r300_viewport_identity; @@ -241,7 +256,7 @@ struct r300_context { * depends on the combination of both currently loaded shaders. */ struct util_hash_table* shader_hash_table; /* Vertex formatting information. */ - struct r300_vertex_format* vertex_info; + struct r300_vertex_info* vertex_info; /* Various CSO state objects. */ /* Blend state. */ @@ -270,12 +285,6 @@ struct r300_context { /* Texture states. */ struct r300_texture* textures[8]; int texture_count; - /* Vertex buffers for Gallium. */ - struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS]; - int vertex_buffer_count; - /* Vertex elements for Gallium. */ - struct pipe_vertex_element vertex_elements[PIPE_MAX_ATTRIBS]; - int vertex_element_count; /* Vertex shader. */ struct r300_vertex_shader* vs; /* Viewport state. */ @@ -283,6 +292,13 @@ struct r300_context { /* ZTOP state. */ struct r300_ztop_state ztop_state; + /* Vertex buffers for Gallium. */ + struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + int vertex_buffer_count; + /* Vertex elements for Gallium. */ + struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; + int vertex_element_count; + /* Bitmask of dirty state objects. */ uint32_t dirty_state; /* Flag indicating whether or not the HW is dirty. */ @@ -326,7 +342,7 @@ void r300_init_surface_functions(struct r300_context* r300); static INLINE boolean DBG_ON(struct r300_context * ctx, unsigned flags) { - return (ctx->debug & flags) ? true : false; + return (ctx->debug & flags) ? TRUE : FALSE; } static INLINE void DBG(struct r300_context * ctx, unsigned flags, const char * fmt, ...) diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index 883f0a02dc..86ba91db52 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -34,8 +34,8 @@ #define MAX_CS_SIZE 64 * 1024 / 4 -#define VERY_VERBOSE_CS 0 -#define VERY_VERBOSE_REGISTERS 0 +#define VERY_VERBOSE_CS 1 +#define VERY_VERBOSE_REGISTERS 1 /* XXX stolen from radeon_drm.h */ #define RADEON_GEM_DOMAIN_CPU 0x1 diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c index 421253ca72..2a6ed54ac9 100644 --- a/src/gallium/drivers/r300/r300_debug.c +++ b/src/gallium/drivers/r300/r300_debug.c @@ -49,7 +49,7 @@ static struct debug_option debug_options[] = { void r300_init_debug(struct r300_context * ctx) { const char * options = debug_get_option("RADEON_DEBUG", 0); - boolean printhint = false; + boolean printhint = FALSE; size_t length; struct debug_option * opt; @@ -71,14 +71,14 @@ void r300_init_debug(struct r300_context * ctx) if (!opt->name) { debug_printf("Unknown debug option: %s\n", options); - printhint = true; + printhint = TRUE; } options += length; } if (!ctx->debug) - printhint = true; + printhint = TRUE; } if (printhint || ctx->debug & DBG_HELP) { diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 258c38fefd..98a39390bf 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -31,16 +31,18 @@ #include "r300_screen.h" #include "r300_state_derived.h" #include "r300_state_inlines.h" +#include "r300_texture.h" #include "r300_vs.h" void r300_emit_blend_state(struct r300_context* r300, struct r300_blend_state* blend) { CS_LOCALS(r300); - BEGIN_CS(7); - OUT_CS_REG_SEQ(R300_RB3D_CBLEND, 2); + BEGIN_CS(8); + OUT_CS_REG_SEQ(R300_RB3D_CBLEND, 3); OUT_CS(blend->blend_control); OUT_CS(blend->alpha_blend_control); + OUT_CS(blend->color_channel_mask); OUT_CS_REG(R300_RB3D_ROPCNTL, blend->rop); OUT_CS_REG(R300_RB3D_DITHER_CTL, blend->dither); END_CS; @@ -101,19 +103,23 @@ void r300_emit_dsa_state(struct r300_context* r300, struct r300_screen* r300screen = r300_screen(r300->context.screen); CS_LOCALS(r300); - BEGIN_CS(r300screen->caps->is_r500 ? 8 : 8); + BEGIN_CS(r300screen->caps->is_r500 ? 10 : 8); OUT_CS_REG(R300_FG_ALPHA_FUNC, dsa->alpha_function); - /* XXX figure out the r300 counterpart for this */ - if (r300screen->caps->is_r500) { - /* OUT_CS_REG(R500_FG_ALPHA_VALUE, dsa->alpha_reference); */ - } + + /* not needed since we use the 8bit alpha ref */ + /*if (r300screen->caps->is_r500) { + OUT_CS_REG(R500_FG_ALPHA_VALUE, dsa->alpha_reference); + }*/ + OUT_CS_REG_SEQ(R300_ZB_CNTL, 3); OUT_CS(dsa->z_buffer_control); OUT_CS(dsa->z_stencil_control); OUT_CS(dsa->stencil_ref_mask); OUT_CS_REG(R300_ZB_ZTOP, r300->ztop_state.z_buffer_top); + + /* XXX it seems r3xx doesn't support STENCILREFMASK_BF */ if (r300screen->caps->is_r500) { - /* OUT_CS_REG(R500_ZB_STENCILREFMASK_BF, dsa->stencil_ref_bf); */ + OUT_CS_REG(R500_ZB_STENCILREFMASK_BF, dsa->stencil_ref_bf); } END_CS; } @@ -123,7 +129,9 @@ static const float * get_shader_constant( struct rc_constant * constant, struct r300_constant_buffer * externals) { - static const float zero[4] = { 0.0, 0.0, 0.0, 0.0 }; + static float vec[4] = { 0.0, 0.0, 0.0, 1.0 }; + struct pipe_texture *tex; + switch(constant->Type) { case RC_CONSTANT_EXTERNAL: return externals->constants[constant->u.External]; @@ -131,11 +139,31 @@ static const float * get_shader_constant( case RC_CONSTANT_IMMEDIATE: return constant->u.Immediate; + case RC_CONSTANT_STATE: + switch (constant->u.State[0]) { + /* Factor for converting rectangle coords to + * normalized coords. Should only show up on non-r500. */ + case RC_STATE_R300_TEXRECT_FACTOR: + tex = &r300->textures[constant->u.State[1]]->tex; + vec[0] = 1.0 / tex->width0; + vec[1] = 1.0 / tex->height0; + break; + + default: + debug_printf("r300: Implementation error: " + "Unknown RC_CONSTANT type %d\n", constant->u.State[0]); + } + break; + default: - debug_printf("r300: Implementation error: Unhandled constant type %i\n", - constant->Type); - return zero; + debug_printf("r300: Implementation error: " + "Unhandled constant type %d\n", constant->Type); } + + /* This should either be (0, 0, 0, 1), which should be a relatively safe + * RGBA or STRQ value, or it could be one of the RC_CONSTANT_STATE + * state factors. */ + return vec; } /* Convert a normal single-precision float into the 7.16 format @@ -173,18 +201,15 @@ static uint32_t pack_float24(float f) } void r300_emit_fragment_program_code(struct r300_context* r300, - struct rX00_fragment_program_code* generic_code, - struct r300_constant_buffer* externals) + struct rX00_fragment_program_code* generic_code) { struct r300_fragment_program_code * code = &generic_code->code.r300; - struct rc_constant_list * constants = &generic_code->constants; int i; CS_LOCALS(r300); BEGIN_CS(15 + code->alu.length * 4 + - (code->tex.length ? (1 + code->tex.length) : 0) + - (constants->Count ? (1 + constants->Count * 4) : 0)); + (code->tex.length ? (1 + code->tex.length) : 0)); OUT_CS_REG(R300_US_CONFIG, code->config); OUT_CS_REG(R300_US_PIXSIZE, code->pixsize); @@ -216,32 +241,41 @@ void r300_emit_fragment_program_code(struct r300_context* r300, OUT_CS(code->tex.inst[i]); } - if (constants->Count) { - OUT_CS_REG_SEQ(R300_PFS_PARAM_0_X, constants->Count * 4); - for(i = 0; i < constants->Count; ++i) { - const float * data = get_shader_constant(r300, &constants->Constants[i], externals); - OUT_CS(pack_float24(data[0])); - OUT_CS(pack_float24(data[1])); - OUT_CS(pack_float24(data[2])); - OUT_CS(pack_float24(data[3])); - } - } + END_CS; +} + +void r300_emit_fs_constant_buffer(struct r300_context* r300, + struct rc_constant_list* constants) +{ + int i; + CS_LOCALS(r300); + + if (constants->Count == 0) + return; + BEGIN_CS(constants->Count * 4 + 1); + OUT_CS_REG_SEQ(R300_PFS_PARAM_0_X, constants->Count * 4); + for(i = 0; i < constants->Count; ++i) { + const float * data = get_shader_constant(r300, + &constants->Constants[i], + &r300->shader_constants[PIPE_SHADER_FRAGMENT]); + OUT_CS(pack_float24(data[0])); + OUT_CS(pack_float24(data[1])); + OUT_CS(pack_float24(data[2])); + OUT_CS(pack_float24(data[3])); + } END_CS; } void r500_emit_fragment_program_code(struct r300_context* r300, - struct rX00_fragment_program_code* generic_code, - struct r300_constant_buffer* externals) + struct rX00_fragment_program_code* generic_code) { struct r500_fragment_program_code * code = &generic_code->code.r500; - struct rc_constant_list * constants = &generic_code->constants; int i; CS_LOCALS(r300); BEGIN_CS(13 + - ((code->inst_end + 1) * 6) + - (constants->Count ? (3 + (constants->Count * 4)) : 0)); + ((code->inst_end + 1) * 6)); OUT_CS_REG(R500_US_CONFIG, 0); OUT_CS_REG(R500_US_PIXSIZE, code->max_temp_idx); OUT_CS_REG(R500_US_CODE_RANGE, @@ -261,18 +295,30 @@ void r500_emit_fragment_program_code(struct r300_context* r300, OUT_CS(code->inst[i].inst5); } - if (constants->Count) { - OUT_CS_REG(R500_GA_US_VECTOR_INDEX, R500_GA_US_VECTOR_INDEX_TYPE_CONST); - OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, constants->Count * 4); - for (i = 0; i < constants->Count; i++) { - const float * data = get_shader_constant(r300, &constants->Constants[i], externals); - OUT_CS_32F(data[0]); - OUT_CS_32F(data[1]); - OUT_CS_32F(data[2]); - OUT_CS_32F(data[3]); - } - } + END_CS; +} + +void r500_emit_fs_constant_buffer(struct r300_context* r300, + struct rc_constant_list* constants) +{ + int i; + CS_LOCALS(r300); + + if (constants->Count == 0) + return; + BEGIN_CS(constants->Count * 4 + 3); + OUT_CS_REG(R500_GA_US_VECTOR_INDEX, R500_GA_US_VECTOR_INDEX_TYPE_CONST); + OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, constants->Count * 4); + for (i = 0; i < constants->Count; i++) { + const float * data = get_shader_constant(r300, + &constants->Constants[i], + &r300->shader_constants[PIPE_SHADER_FRAGMENT]); + OUT_CS_32F(data[0]); + OUT_CS_32F(data[1]); + OUT_CS_32F(data[2]); + OUT_CS_32F(data[3]); + } END_CS; } @@ -280,48 +326,50 @@ void r300_emit_fb_state(struct r300_context* r300, struct pipe_framebuffer_state* fb) { struct r300_texture* tex; - unsigned pixpitch; + struct pipe_surface* surf; int i; CS_LOCALS(r300); BEGIN_CS((10 * fb->nr_cbufs) + (fb->zsbuf ? 10 : 0) + 4); + OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT, + R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS | + R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D); + OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT, + R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE | + R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); + for (i = 0; i < fb->nr_cbufs; i++) { - tex = (struct r300_texture*)fb->cbufs[i]->texture; + surf = fb->cbufs[i]; + tex = (struct r300_texture*)surf->texture; assert(tex && tex->buffer && "cbuf is marked, but NULL!"); - pixpitch = r300_texture_get_stride(tex, 0) / tex->tex.block.size; OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0 + (4 * i), 1); - OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); + OUT_CS_RELOC(tex->buffer, surf->offset, 0, RADEON_GEM_DOMAIN_VRAM, 0); OUT_CS_REG_SEQ(R300_RB3D_COLORPITCH0 + (4 * i), 1); - OUT_CS_RELOC(tex->buffer, pixpitch | + OUT_CS_RELOC(tex->buffer, tex->pitch[surf->level] | r300_translate_colorformat(tex->tex.format), 0, RADEON_GEM_DOMAIN_VRAM, 0); OUT_CS_REG(R300_US_OUT_FMT_0 + (4 * i), - r300_translate_out_fmt(fb->cbufs[i]->format)); + r300_translate_out_fmt(surf->format)); } if (fb->zsbuf) { - tex = (struct r300_texture*)fb->zsbuf->texture; + surf = fb->zsbuf; + tex = (struct r300_texture*)surf->texture; assert(tex && tex->buffer && "zsbuf is marked, but NULL!"); - pixpitch = r300_texture_get_stride(tex, 0) / tex->tex.block.size; OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1); - OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); + OUT_CS_RELOC(tex->buffer, surf->offset, 0, RADEON_GEM_DOMAIN_VRAM, 0); OUT_CS_REG(R300_ZB_FORMAT, r300_translate_zsformat(tex->tex.format)); OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1); - OUT_CS_RELOC(tex->buffer, pixpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0); + OUT_CS_RELOC(tex->buffer, tex->pitch[surf->level], 0, + RADEON_GEM_DOMAIN_VRAM, 0); } - OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT, - R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS | - R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D); - OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT, - R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE | - R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); END_CS; } @@ -457,7 +505,7 @@ void r300_emit_rs_state(struct r300_context* r300, struct r300_rs_state* rs) { CS_LOCALS(r300); - BEGIN_CS(20); + BEGIN_CS(22); OUT_CS_REG(R300_VAP_CNTL_STATUS, rs->vap_control_status); OUT_CS_REG(R300_GA_POINT_SIZE, rs->point_size); OUT_CS_REG_SEQ(R300_GA_POINT_MINMAX, 2); @@ -473,6 +521,7 @@ void r300_emit_rs_state(struct r300_context* r300, struct r300_rs_state* rs) OUT_CS_REG(R300_GA_LINE_STIPPLE_CONFIG, rs->line_stipple_config); OUT_CS_REG(R300_GA_LINE_STIPPLE_VALUE, rs->line_stipple_value); OUT_CS_REG(R300_GA_COLOR_CONTROL, rs->color_control); + OUT_CS_REG(R300_GA_POLY_MODE, rs->polygon_mode); END_CS; } @@ -483,6 +532,8 @@ void r300_emit_rs_block_state(struct r300_context* r300, struct r300_screen* r300screen = r300_screen(r300->context.screen); CS_LOCALS(r300); + DBG(r300, DBG_DRAW, "r300: RS emit:\n"); + BEGIN_CS(21); if (r300screen->caps->is_r500) { OUT_CS_REG_SEQ(R500_RS_IP_0, 8); @@ -491,7 +542,7 @@ void r300_emit_rs_block_state(struct r300_context* r300, } for (i = 0; i < 8; i++) { OUT_CS(rs->ip[i]); - /* debug_printf("ip %d: 0x%08x\n", i, rs->ip[i]); */ + DBG(r300, DBG_DRAW, " : ip %d: 0x%08x\n", i, rs->ip[i]); } OUT_CS_REG_SEQ(R300_RS_COUNT, 2); @@ -505,11 +556,11 @@ void r300_emit_rs_block_state(struct r300_context* r300, } for (i = 0; i < 8; i++) { OUT_CS(rs->inst[i]); - /* debug_printf("inst %d: 0x%08x\n", i, rs->inst[i]); */ + DBG(r300, DBG_DRAW, " : inst %d: 0x%08x\n", i, rs->inst[i]); } - /* debug_printf("count: 0x%08x inst_count: 0x%08x\n", rs->count, - * rs->inst_count); */ + DBG(r300, DBG_DRAW, " : count: 0x%08x inst_count: 0x%08x\n", + rs->count, rs->inst_count); END_CS; } @@ -531,15 +582,31 @@ void r300_emit_texture(struct r300_context* r300, struct r300_texture* tex, unsigned offset) { + uint32_t filter0 = sampler->filter0; + uint32_t format0 = tex->state.format0; + unsigned min_level, max_level; CS_LOCALS(r300); + /* to emulate 1D textures through 2D ones correctly */ + if (tex->tex.target == PIPE_TEXTURE_1D) { + filter0 &= ~R300_TX_WRAP_T_MASK; + filter0 |= R300_TX_WRAP_T(R300_TX_CLAMP_TO_EDGE); + } + + /* determine min/max levels */ + /* the MAX_MIP level is the largest (finest) one */ + max_level = MIN2(sampler->max_lod, tex->tex.last_level); + min_level = MIN2(sampler->min_lod, max_level); + format0 |= R300_TX_NUM_LEVELS(max_level); + filter0 |= R300_TX_MAX_MIP_LEVEL(min_level); + BEGIN_CS(16); - OUT_CS_REG(R300_TX_FILTER0_0 + (offset * 4), sampler->filter0 | + OUT_CS_REG(R300_TX_FILTER0_0 + (offset * 4), filter0 | (offset << 28)); OUT_CS_REG(R300_TX_FILTER1_0 + (offset * 4), sampler->filter1); OUT_CS_REG(R300_TX_BORDER_COLOR_0 + (offset * 4), sampler->border_color); - OUT_CS_REG(R300_TX_FORMAT0_0 + (offset * 4), tex->state.format0); + OUT_CS_REG(R300_TX_FORMAT0_0 + (offset * 4), format0); OUT_CS_REG(R300_TX_FORMAT1_0 + (offset * 4), tex->state.format1); OUT_CS_REG(R300_TX_FORMAT2_0 + (offset * 4), tex->state.format2); OUT_CS_REG_SEQ(R300_TX_OFFSET_0 + (offset * 4), 1); @@ -548,7 +615,52 @@ void r300_emit_texture(struct r300_context* r300, END_CS; } -void r300_emit_vertex_buffer(struct r300_context* r300) +/* XXX I can't read this and that's not good */ +void r300_emit_aos(struct r300_context* r300, unsigned offset) +{ + struct pipe_vertex_buffer *vbuf = r300->vertex_buffer; + struct pipe_vertex_element *velem = r300->vertex_element; + CS_LOCALS(r300); + int i; + unsigned aos_count = r300->vertex_element_count; + + unsigned packet_size = (aos_count * 3 + 1) / 2; + BEGIN_CS(2 + packet_size + aos_count * 2); + OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, packet_size); + OUT_CS(aos_count); + for (i = 0; i < aos_count - 1; i += 2) { + int buf_num1 = velem[i].vertex_buffer_index; + int buf_num2 = velem[i+1].vertex_buffer_index; + assert(vbuf[buf_num1].stride % 4 == 0 && pf_get_size(velem[i].src_format) % 4 == 0); + assert(vbuf[buf_num2].stride % 4 == 0 && pf_get_size(velem[i+1].src_format) % 4 == 0); + OUT_CS((pf_get_size(velem[i].src_format) >> 2) | (vbuf[buf_num1].stride << 6) | + (pf_get_size(velem[i+1].src_format) << 14) | (vbuf[buf_num2].stride << 22)); + OUT_CS(vbuf[buf_num1].buffer_offset + velem[i].src_offset + + offset * vbuf[buf_num1].stride); + OUT_CS(vbuf[buf_num2].buffer_offset + velem[i+1].src_offset + + offset * vbuf[buf_num2].stride); + } + if (aos_count & 1) { + int buf_num = velem[i].vertex_buffer_index; + assert(vbuf[buf_num].stride % 4 == 0 && pf_get_size(velem[i].src_format) % 4 == 0); + OUT_CS((pf_get_size(velem[i].src_format) >> 2) | (vbuf[buf_num].stride << 6)); + OUT_CS(vbuf[buf_num].buffer_offset + velem[i].src_offset + + offset * vbuf[buf_num].stride); + } + + /* XXX bare CS reloc */ + for (i = 0; i < aos_count; i++) { + cs_winsys->write_cs_reloc(cs_winsys, + vbuf[velem[i].vertex_buffer_index].buffer, + RADEON_GEM_DOMAIN_GTT, + 0, + 0); + cs_count -= 2; + } + END_CS; +} +#if 0 +void r300_emit_draw_packet(struct r300_context* r300) { CS_LOCALS(r300); @@ -571,12 +683,15 @@ void r300_emit_vertex_buffer(struct r300_context* r300) OUT_CS_RELOC(r300->vbo, 0, RADEON_GEM_DOMAIN_GTT, 0, 0); END_CS; } +#endif void r300_emit_vertex_format_state(struct r300_context* r300) { int i; CS_LOCALS(r300); + DBG(r300, DBG_DRAW, "r300: VAP/PSC emit:\n"); + BEGIN_CS(26); OUT_CS_REG(R300_VAP_VTX_SIZE, r300->vertex_info->vinfo.size); @@ -586,33 +701,42 @@ void r300_emit_vertex_format_state(struct r300_context* r300) OUT_CS_REG_SEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2); OUT_CS(r300->vertex_info->vinfo.hwfmt[2]); OUT_CS(r300->vertex_info->vinfo.hwfmt[3]); - /* for (i = 0; i < 4; i++) { - * debug_printf("hwfmt%d: 0x%08x\n", i, - * r300->vertex_info->vinfo.hwfmt[i]); - * } */ + for (i = 0; i < 4; i++) { + DBG(r300, DBG_DRAW, " : hwfmt%d: 0x%08x\n", i, + r300->vertex_info->vinfo.hwfmt[i]); + } OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_0, 8); for (i = 0; i < 8; i++) { OUT_CS(r300->vertex_info->vap_prog_stream_cntl[i]); - /* debug_printf("prog_stream_cntl%d: 0x%08x\n", i, - * r300->vertex_info->vap_prog_stream_cntl[i]); */ + DBG(r300, DBG_DRAW, " : prog_stream_cntl%d: 0x%08x\n", i, + r300->vertex_info->vap_prog_stream_cntl[i]); } OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_EXT_0, 8); for (i = 0; i < 8; i++) { OUT_CS(r300->vertex_info->vap_prog_stream_cntl_ext[i]); - /* debug_printf("prog_stream_cntl_ext%d: 0x%08x\n", i, - * r300->vertex_info->vap_prog_stream_cntl_ext[i]); */ + DBG(r300, DBG_DRAW, " : prog_stream_cntl_ext%d: 0x%08x\n", i, + r300->vertex_info->vap_prog_stream_cntl_ext[i]); } END_CS; } + void r300_emit_vertex_program_code(struct r300_context* r300, - struct r300_vertex_program_code* code, - struct r300_constant_buffer* constants) + struct r300_vertex_program_code* code) { int i; struct r300_screen* r300screen = r300_screen(r300->context.screen); unsigned instruction_count = code->length / 4; + + int vtx_mem_size = r300screen->caps->is_r500 ? 128 : 72; + int input_count = MAX2(util_bitcount(code->InputsRead), 1); + int output_count = MAX2(util_bitcount(code->OutputsWritten), 1); + int temp_count = MAX2(code->num_temporaries, 1); + int pvs_num_slots = MIN3(vtx_mem_size / input_count, + vtx_mem_size / output_count, 10); + int pvs_num_controllers = MIN2(vtx_mem_size / temp_count, 6); + CS_LOCALS(r300); if (!r300screen->caps->has_tcl) { @@ -621,17 +745,11 @@ void r300_emit_vertex_program_code(struct r300_context* r300, return; } - if (code->constants.Count) { - BEGIN_CS(14 + code->length + (code->constants.Count * 4)); - } else { - BEGIN_CS(11 + code->length); - } - + BEGIN_CS(9 + code->length); /* R300_VAP_PVS_CODE_CNTL_0 * R300_VAP_PVS_CONST_CNTL * R300_VAP_PVS_CODE_CNTL_1 - * See the r5xx docs for instructions on how to use these. - * XXX these could be optimized to select better values... */ + * See the r5xx docs for instructions on how to use these. */ OUT_CS_REG_SEQ(R300_VAP_PVS_CODE_CNTL_0, 3); OUT_CS(R300_PVS_FIRST_INST(0) | R300_PVS_XYZW_VALID_INST(instruction_count - 1) | @@ -644,32 +762,51 @@ void r300_emit_vertex_program_code(struct r300_context* r300, for (i = 0; i < code->length; i++) OUT_CS(code->body.d[i]); - if (code->constants.Count) { - OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG, - (r300screen->caps->is_r500 ? - R500_PVS_CONST_START : R300_PVS_CONST_START)); - OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, code->constants.Count * 4); - for (i = 0; i < code->constants.Count; i++) { - const float * data = get_shader_constant(r300, &code->constants.Constants[i], constants); - OUT_CS_32F(data[0]); - OUT_CS_32F(data[1]); - OUT_CS_32F(data[2]); - OUT_CS_32F(data[3]); - } - } - - OUT_CS_REG(R300_VAP_CNTL, R300_PVS_NUM_SLOTS(10) | - R300_PVS_NUM_CNTLRS(5) | + OUT_CS_REG(R300_VAP_CNTL, R300_PVS_NUM_SLOTS(pvs_num_slots) | + R300_PVS_NUM_CNTLRS(pvs_num_controllers) | R300_PVS_NUM_FPUS(r300screen->caps->num_vert_fpus) | - R300_PVS_VF_MAX_VTX_NUM(12)); - OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x0); + R300_PVS_VF_MAX_VTX_NUM(12) | + (r300screen->caps->is_r500 ? R500_TCL_STATE_OPTIMIZATION : 0)); END_CS; } void r300_emit_vertex_shader(struct r300_context* r300, struct r300_vertex_shader* vs) { - r300_emit_vertex_program_code(r300, &vs->code, &r300->shader_constants[PIPE_SHADER_VERTEX]); + r300_emit_vertex_program_code(r300, &vs->code); +} + +void r300_emit_vs_constant_buffer(struct r300_context* r300, + struct rc_constant_list* constants) +{ + int i; + struct r300_screen* r300screen = r300_screen(r300->context.screen); + CS_LOCALS(r300); + + if (!r300screen->caps->has_tcl) { + debug_printf("r300: Implementation error: emit_vertex_shader called," + " but has_tcl is FALSE!\n"); + return; + } + + if (constants->Count == 0) + return; + + BEGIN_CS(constants->Count * 4 + 3); + OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG, + (r300screen->caps->is_r500 ? + R500_PVS_CONST_START : R300_PVS_CONST_START)); + OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, constants->Count * 4); + for (i = 0; i < constants->Count; i++) { + const float * data = get_shader_constant(r300, + &constants->Constants[i], + &r300->shader_constants[PIPE_SHADER_VERTEX]); + OUT_CS_32F(data[0]); + OUT_CS_32F(data[1]); + OUT_CS_32F(data[2]); + OUT_CS_32F(data[3]); + } + END_CS; } void r300_emit_viewport_state(struct r300_context* r300, @@ -694,13 +831,31 @@ void r300_emit_viewport_state(struct r300_context* r300, END_CS; } +void r300_emit_texture_count(struct r300_context* r300) +{ + CS_LOCALS(r300); + + BEGIN_CS(2); + OUT_CS_REG(R300_TX_ENABLE, (1 << r300->texture_count) - 1); + END_CS; + +} + void r300_flush_textures(struct r300_context* r300) { CS_LOCALS(r300); - BEGIN_CS(4); + BEGIN_CS(2); OUT_CS_REG(R300_TX_INVALTAGS, 0); - OUT_CS_REG(R300_TX_ENABLE, (1 << r300->texture_count) - 1); + END_CS; +} + +static void r300_flush_pvs(struct r300_context* r300) +{ + CS_LOCALS(r300); + + BEGIN_CS(2); + OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x0); END_CS; } @@ -716,8 +871,6 @@ void r300_emit_dirty_state(struct r300_context* r300) return; } - r300_update_derived_state(r300); - /* Clean out BOs. */ r300->winsys->reset_bos(r300->winsys); @@ -747,7 +900,7 @@ validate: for (i = 0; i < r300->texture_count; i++) { tex = r300->textures[i]; if (!tex) - continue; + continue; if (!r300->winsys->add_buffer(r300->winsys, tex->buffer, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0)) { r300->context.flush(&r300->context, 0, NULL); @@ -768,7 +921,7 @@ validate: goto validate; } } else { - debug_printf("No VBO while emitting dirty state!\n"); + // debug_printf("No VBO while emitting dirty state!\n"); } if (!r300->winsys->validate(r300->winsys)) { r300->context.flush(&r300->context, 0, NULL); @@ -808,13 +961,22 @@ validate: if (r300->dirty_state & R300_NEW_FRAGMENT_SHADER) { if (r300screen->caps->is_r500) { - r500_emit_fragment_program_code(r300, &r300->fs->code, &r300->shader_constants[PIPE_SHADER_FRAGMENT]); + r500_emit_fragment_program_code(r300, &r300->fs->code); } else { - r300_emit_fragment_program_code(r300, &r300->fs->code, &r300->shader_constants[PIPE_SHADER_FRAGMENT]); + r300_emit_fragment_program_code(r300, &r300->fs->code); } r300->dirty_state &= ~R300_NEW_FRAGMENT_SHADER; } + if (r300->dirty_state & R300_NEW_FRAGMENT_SHADER_CONSTANTS) { + if (r300screen->caps->is_r500) { + r500_emit_fs_constant_buffer(r300, &r300->fs->code.constants); + } else { + r300_emit_fs_constant_buffer(r300, &r300->fs->code.constants); + } + r300->dirty_state &= ~R300_NEW_FRAGMENT_SHADER_CONSTANTS; + } + if (r300->dirty_state & R300_NEW_FRAMEBUFFERS) { r300_emit_fb_state(r300, &r300->framebuffer_state); r300->dirty_state &= ~R300_NEW_FRAMEBUFFERS; @@ -838,6 +1000,8 @@ validate: /* Samplers and textures are tracked separately but emitted together. */ if (r300->dirty_state & (R300_ANY_NEW_SAMPLERS | R300_ANY_NEW_TEXTURES)) { + r300_emit_texture_count(r300); + for (i = 0; i < MIN2(r300->sampler_count, r300->texture_count); i++) { if (r300->dirty_state & ((R300_NEW_SAMPLER << i) | (R300_NEW_TEXTURE << i))) { @@ -868,17 +1032,26 @@ validate: r300->dirty_state &= ~R300_NEW_VERTEX_FORMAT; } + if (r300->dirty_state & (R300_NEW_VERTEX_SHADER | R300_NEW_VERTEX_SHADER_CONSTANTS)) { + r300_flush_pvs(r300); + } + if (r300->dirty_state & R300_NEW_VERTEX_SHADER) { r300_emit_vertex_shader(r300, r300->vs); r300->dirty_state &= ~R300_NEW_VERTEX_SHADER; } + if (r300->dirty_state & R300_NEW_VERTEX_SHADER_CONSTANTS) { + r300_emit_vs_constant_buffer(r300, &r300->vs->code.constants); + r300->dirty_state &= ~R300_NEW_VERTEX_SHADER_CONSTANTS; + } + /* XXX assert(r300->dirty_state == 0); */ /* Finally, emit the VBO. */ - r300_emit_vertex_buffer(r300); + //r300_emit_vertex_buffer(r300); r300->dirty_hw++; } diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 02ac5bebbd..3797d3d332 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -23,9 +23,14 @@ #ifndef R300_EMIT_H #define R300_EMIT_H +#include "r300_context.h" +#include "radeon_code.h" + struct rX00_fragment_program_code; struct r300_vertex_program_code; +void r300_emit_aos(struct r300_context* r300, unsigned offset); + void r300_emit_blend_state(struct r300_context* r300, struct r300_blend_state* blend); @@ -39,12 +44,16 @@ void r300_emit_dsa_state(struct r300_context* r300, struct r300_dsa_state* dsa); void r300_emit_fragment_program_code(struct r300_context* r300, - struct rX00_fragment_program_code* generic_code, - struct r300_constant_buffer* externals); + struct rX00_fragment_program_code* generic_code); + +void r300_emit_fs_constant_buffer(struct r300_context* r300, + struct rc_constant_list* constants); void r500_emit_fragment_program_code(struct r300_context* r300, - struct rX00_fragment_program_code* generic_code, - struct r300_constant_buffer* externals); + struct rX00_fragment_program_code* generic_code); + +void r500_emit_fs_constant_buffer(struct r300_context* r300, + struct rc_constant_list* constants); void r300_emit_fb_state(struct r300_context* r300, struct pipe_framebuffer_state* fb); @@ -72,8 +81,10 @@ void r300_emit_vertex_buffer(struct r300_context* r300); void r300_emit_vertex_format_state(struct r300_context* r300); void r300_emit_vertex_program_code(struct r300_context* r300, - struct r300_vertex_program_code* code, - struct r300_constant_buffer* constants); + struct r300_vertex_program_code* code); + +void r300_emit_vs_constant_buffer(struct r300_context* r300, + struct rc_constant_list* constants); void r300_emit_vertex_shader(struct r300_context* r300, struct r300_vertex_shader* vs); @@ -81,6 +92,8 @@ void r300_emit_vertex_shader(struct r300_context* r300, void r300_emit_viewport_state(struct r300_context* r300, struct r300_viewport_state* viewport); +void r300_emit_texture_count(struct r300_context* r300); + void r300_flush_textures(struct r300_context* r300); /* Emit all dirty state. */ diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index 29ddc84c41..79b01bb4dc 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -1,6 +1,7 @@ /* * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> * Joakim Sindholt <opensource@zhasha.com> + * Copyright 2009 Marek Olšák <maraeo@gmail.com> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -31,6 +32,41 @@ #include "radeon_code.h" #include "radeon_compiler.h" +/* Convert info about FS input semantics to r300_shader_semantics. */ +static void r300_shader_read_fs_inputs(struct tgsi_shader_info* info, + struct r300_shader_semantics* fs_inputs) +{ + int i; + unsigned index; + + r300_shader_semantics_reset(fs_inputs); + + for (i = 0; i < info->num_inputs; i++) { + index = info->input_semantic_index[i]; + + switch (info->input_semantic_name[i]) { + case TGSI_SEMANTIC_COLOR: + assert(index <= ATTR_COLOR_COUNT); + fs_inputs->color[index] = i; + break; + + case TGSI_SEMANTIC_GENERIC: + assert(index <= ATTR_GENERIC_COUNT); + fs_inputs->generic[index] = i; + break; + + case TGSI_SEMANTIC_FOG: + assert(index == 0); + fs_inputs->fog = i; + break; + + default: + assert(0); + } + } +} + + static void find_output_registers(struct r300_fragment_program_compiler * compiler, struct r300_fragment_shader * fs) { @@ -58,38 +94,24 @@ static void allocate_hardware_inputs( void (*allocate)(void * data, unsigned input, unsigned hwreg), void * mydata) { - struct tgsi_shader_info* info = &((struct r300_fragment_shader*)c->UserData)->info; - int total_colors = 0; - int colors = 0; - int total_generic = 0; - int generic = 0; - int i; - - for (i = 0; i < info->num_inputs; i++) { - switch (info->input_semantic_name[i]) { - case TGSI_SEMANTIC_COLOR: - total_colors++; - break; - case TGSI_SEMANTIC_FOG: - case TGSI_SEMANTIC_GENERIC: - total_generic++; - break; + struct r300_shader_semantics* inputs = + &((struct r300_fragment_shader*)c->UserData)->inputs; + int i, reg = 0; + + /* Allocate input registers. */ + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + if (inputs->color[i] != ATTR_UNUSED) { + allocate(mydata, inputs->color[i], reg++); } } - - for(i = 0; i < info->num_inputs; i++) { - switch (info->input_semantic_name[i]) { - case TGSI_SEMANTIC_COLOR: - allocate(mydata, i, colors); - colors++; - break; - case TGSI_SEMANTIC_FOG: - case TGSI_SEMANTIC_GENERIC: - allocate(mydata, i, total_colors + generic); - generic++; - break; + for (i = 0; i < ATTR_GENERIC_COUNT; i++) { + if (inputs->generic[i] != ATTR_UNUSED) { + allocate(mydata, inputs->generic[i], reg++); } } + if (inputs->fog != ATTR_UNUSED) { + allocate(mydata, inputs->fog, reg++); + } } void r300_translate_fragment_shader(struct r300_context* r300, @@ -98,6 +120,10 @@ void r300_translate_fragment_shader(struct r300_context* r300, struct r300_fragment_program_compiler compiler; struct tgsi_to_rc ttr; + /* Initialize. */ + r300_shader_read_fs_inputs(&fs->info, &fs->inputs); + + /* Setup the compiler. */ memset(&compiler, 0, sizeof(compiler)); rc_init(&compiler.Base); compiler.Base.Debug = DBG_ON(r300, DBG_FP); @@ -107,7 +133,7 @@ void r300_translate_fragment_shader(struct r300_context* r300, compiler.AllocateHwInputs = &allocate_hardware_inputs; compiler.UserData = fs; - /* TODO: Program compilation depends on texture compare modes, + /* XXX: Program compilation depends on texture compare modes, * which are sampler state. Therefore, programs need to be recompiled * depending on this state as in the classic Mesa driver. * @@ -133,6 +159,7 @@ void r300_translate_fragment_shader(struct r300_context* r300, /* XXX failover maybe? */ DBG(r300, DBG_FP, "r300: Error compiling fragment program: %s\n", compiler.Base.ErrorMsg); + assert(0); } /* And, finally... */ diff --git a/src/gallium/drivers/r300/r300_fs.h b/src/gallium/drivers/r300/r300_fs.h index e831c30301..630e2d0c8a 100644 --- a/src/gallium/drivers/r300/r300_fs.h +++ b/src/gallium/drivers/r300/r300_fs.h @@ -1,6 +1,7 @@ /* * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> * Joakim Sindholt <opensource@zhasha.com> + * Copyright 2009 Marek Olšák <maraeo@gmail.com> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -25,15 +26,16 @@ #define R300_FS_H #include "pipe/p_state.h" - #include "tgsi/tgsi_scan.h" - #include "radeon_code.h" +#include "r300_shader_semantics.h" struct r300_fragment_shader { /* Parent class */ struct pipe_shader_state state; + struct tgsi_shader_info info; + struct r300_shader_semantics inputs; /* Has this shader been translated yet? */ boolean translated; diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c index 007f11efae..ca00b043c5 100644 --- a/src/gallium/drivers/r300/r300_query.c +++ b/src/gallium/drivers/r300/r300_query.c @@ -113,7 +113,7 @@ static boolean r300_get_query_result(struct pipe_context* pipe, unsigned flags = PIPE_BUFFER_USAGE_CPU_READ; uint32_t* map; uint32_t temp = 0; - unsigned i; + unsigned i, num_results; if (q->flushed == FALSE) pipe->flush(pipe, 0, NULL); @@ -125,7 +125,13 @@ static boolean r300_get_query_result(struct pipe_context* pipe, if (!map) return FALSE; map += q->offset / 4; - for (i = 0; i < r300screen->caps->num_frag_pipes; i++) { + + if (r300screen->caps->family == CHIP_FAMILY_RV530) + num_results = r300screen->caps->num_z_pipes; + else + num_results = r300screen->caps->num_frag_pipes; + + for (i = 0; i < num_results; i++) { if (*map == ~0U) { /* Looks like our results aren't ready yet. */ if (wait) { diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index e920b2a5e7..85b1ea568a 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -661,20 +661,20 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_GB_SUPER_TILE_B (1 << 15) # define R300_GB_SUBPIXEL_1_12 (0 << 16) # define R300_GB_SUBPIXEL_1_16 (1 << 16) -# define GB_TILE_CONFIG_QUADS_PER_RAS_4 (0 << 17) -# define GB_TILE_CONFIG_QUADS_PER_RAS_8 (1 << 17) -# define GB_TILE_CONFIG_QUADS_PER_RAS_16 (2 << 17) -# define GB_TILE_CONFIG_QUADS_PER_RAS_32 (3 << 17) -# define GB_TILE_CONFIG_BB_SCAN_INTERCEPT (0 << 19) -# define GB_TILE_CONFIG_BB_SCAN_BOUND_BOX (1 << 19) -# define GB_TILE_CONFIG_ALT_SCAN_EN_LR (0 << 20) -# define GB_TILE_CONFIG_ALT_SCAN_EN_LRL (1 << 20) -# define GB_TILE_CONFIG_ALT_OFFSET (0 << 21) -# define GB_TILE_CONFIG_SUBPRECISION (0 << 22) -# define GB_TILE_CONFIG_ALT_TILING_DEF (0 << 23) -# define GB_TILE_CONFIG_ALT_TILING_3_2 (1 << 23) -# define GB_TILE_CONFIG_Z_EXTENDED_24_1 (0 << 24) -# define GB_TILE_CONFIG_Z_EXTENDED_S25_1 (1 << 24) +# define R300_GB_TILE_CONFIG_QUADS_PER_RAS_4 (0 << 17) +# define R300_GB_TILE_CONFIG_QUADS_PER_RAS_8 (1 << 17) +# define R300_GB_TILE_CONFIG_QUADS_PER_RAS_16 (2 << 17) +# define R300_GB_TILE_CONFIG_QUADS_PER_RAS_32 (3 << 17) +# define R300_GB_TILE_CONFIG_BB_SCAN_INTERCEPT (0 << 19) +# define R300_GB_TILE_CONFIG_BB_SCAN_BOUND_BOX (1 << 19) +# define R300_GB_TILE_CONFIG_ALT_SCAN_EN_LR (0 << 20) +# define R300_GB_TILE_CONFIG_ALT_SCAN_EN_LRL (1 << 20) +# define R300_GB_TILE_CONFIG_ALT_OFFSET (0 << 21) +# define R300_GB_TILE_CONFIG_SUBPRECISION (0 << 22) +# define R300_GB_TILE_CONFIG_ALT_TILING_DEF (0 << 23) +# define R300_GB_TILE_CONFIG_ALT_TILING_3_2 (1 << 23) +# define R300_GB_TILE_CONFIG_Z_EXTENDED_24_1 (0 << 24) +# define R300_GB_TILE_CONFIG_Z_EXTENDED_S25_1 (1 << 24) /* Specifies the sizes of the various FIFO`s in the sc/rs/us. This register must be the first one written */ #define R300_GB_FIFO_SIZE 0x4024 @@ -700,9 +700,9 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_OFIFO_HIGHWATER_SHIFT 22 /* two bits only */ # define R300_CUBE_FIFO_HIGHWATER_COL_SHIFT 24 -#define GB_Z_PEQ_CONFIG 0x4028 -# define GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_4_4 (0 << 0) -# define GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8 (1 << 0) +#define R300_GB_Z_PEQ_CONFIG 0x4028 +# define R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_4_4 (0 << 0) +# define R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8 (1 << 0) /* Specifies various polygon specific selects (fog, depth, perspective). */ #define R300_GB_SELECT 0x401c @@ -725,39 +725,39 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* Specifies the graphics pipeline configuration for antialiasing. */ #define R300_GB_AA_CONFIG 0x4020 -# define GB_AA_CONFIG_AA_DISABLE (0 << 0) -# define GB_AA_CONFIG_AA_ENABLE (1 << 0) -# define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2 (0 << 1) -# define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_3 (1 << 1) -# define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4 (2 << 1) -# define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6 (3 << 1) +# define R300_GB_AA_CONFIG_AA_DISABLE (0 << 0) +# define R300_GB_AA_CONFIG_AA_ENABLE (1 << 0) +# define R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2 (0 << 1) +# define R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_3 (1 << 1) +# define R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4 (2 << 1) +# define R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6 (3 << 1) /* Selects which of 4 pipes are active. */ -#define GB_PIPE_SELECT 0x402c -# define GB_PIPE_SELECT_PIPE0_ID_SHIFT 0 -# define GB_PIPE_SELECT_PIPE1_ID_SHIFT 2 -# define GB_PIPE_SELECT_PIPE2_ID_SHIFT 4 -# define GB_PIPE_SELECT_PIPE3_ID_SHIFT 6 -# define GB_PIPE_SELECT_PIPE_MASK_SHIFT 8 -# define GB_PIPE_SELECT_MAX_PIPE 12 -# define GB_PIPE_SELECT_BAD_PIPES 14 -# define GB_PIPE_SELECT_CONFIG_PIPES 18 +#define R300_GB_PIPE_SELECT 0x402c +# define R300_GB_PIPE_SELECT_PIPE0_ID_SHIFT 0 +# define R300_GB_PIPE_SELECT_PIPE1_ID_SHIFT 2 +# define R300_GB_PIPE_SELECT_PIPE2_ID_SHIFT 4 +# define R300_GB_PIPE_SELECT_PIPE3_ID_SHIFT 6 +# define R300_GB_PIPE_SELECT_PIPE_MASK_SHIFT 8 +# define R300_GB_PIPE_SELECT_MAX_PIPE 12 +# define R300_GB_PIPE_SELECT_BAD_PIPES 14 +# define R300_GB_PIPE_SELECT_CONFIG_PIPES 18 /* Specifies the sizes of the various FIFO`s in the sc/rs. */ -#define GB_FIFO_SIZE1 0x4070 +#define R300_GB_FIFO_SIZE1 0x4070 /* High water mark for SC input fifo */ -# define GB_FIFO_SIZE1_SC_HIGHWATER_IFIFO_SHIFT 0 -# define GB_FIFO_SIZE1_SC_HIGHWATER_IFIFO_MASK 0x0000003f +# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_IFIFO_SHIFT 0 +# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_IFIFO_MASK 0x0000003f /* High water mark for SC input fifo (B) */ -# define GB_FIFO_SIZE1_SC_HIGHWATER_BFIFO_SHIFT 6 -# define GB_FIFO_SIZE1_SC_HIGHWATER_BFIFO_MASK 0x00000fc0 +# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_BFIFO_SHIFT 6 +# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_BFIFO_MASK 0x00000fc0 /* High water mark for RS colors' fifo */ -# define GB_FIFO_SIZE1_SC_HIGHWATER_COL_SHIFT 12 -# define GB_FIFO_SIZE1_SC_HIGHWATER_COL_MASK 0x0003f000 +# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_COL_SHIFT 12 +# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_COL_MASK 0x0003f000 /* High water mark for RS textures' fifo */ -# define GB_FIFO_SIZE1_SC_HIGHWATER_TEX_SHIFT 18 -# define GB_FIFO_SIZE1_SC_HIGHWATER_TEX_MASK 0x00fc0000 +# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_TEX_SHIFT 18 +# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_TEX_MASK 0x00fc0000 /* This table specifies the source location and format for up to 16 texture * addresses (i[0]:i[15]) and four colors (c[0]:c[3]) @@ -862,10 +862,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_POINTSIZE_X_MASK 0xffff0000 # define R300_POINTSIZE_MAX (R300_POINTSIZE_Y_MASK / 6) -/* Blue fill color */ +/* Red fill color */ #define R500_GA_FILL_R 0x4220 -/* Blue fill color */ +/* Green fill color */ #define R500_GA_FILL_G 0x4224 /* Blue fill color */ @@ -1293,7 +1293,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R500_RS_INST_TEX_ID(x) ((x) << 0) #define R500_RS_INST_TEX_CN_WRITE (1 << 4) #define R500_RS_INST_TEX_ADDR_SHIFT 5 -# define R500_RS_INST_TEX_ADDR(x) ((x) << 0) +# define R500_RS_INST_TEX_ADDR(x) ((x) << 5) #define R500_RS_INST_COL_ID_SHIFT 12 # define R500_RS_INST_COL_ID(x) ((x) << 12) #define R500_RS_INST_COL_CN_NO_WRITE (0 << 16) @@ -1463,6 +1463,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_TX_MIN_FILTER_MIP_NEAREST (1 << 13) # define R300_TX_MIN_FILTER_MIP_LINEAR (2 << 13) # define R300_TX_MIN_FILTER_MIP_MASK (3 << 13) +# define R300_TX_MAX_MIP_LEVEL_SHIFT 17 +# define R300_TX_MAX_MIP_LEVEL_MASK (0xf << 17) # define R300_TX_MAX_ANISO_1_TO_1 (0 << 21) # define R300_TX_MAX_ANISO_2_TO_1 (1 << 21) # define R300_TX_MAX_ANISO_4_TO_1 (2 << 21) @@ -1471,6 +1473,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_TX_MAX_ANISO_MASK (7 << 21) # define R300_TX_WRAP_S(x) ((x) << 0) # define R300_TX_WRAP_T(x) ((x) << 3) +# define R300_TX_MAX_MIP_LEVEL(x) ((x) << 17) #define R300_TX_FILTER1_0 0x4440 # define R300_CHROMA_KEY_MODE_DISABLE 0 @@ -1500,8 +1503,6 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_TX_HEIGHTMASK_MASK (2047 << 11) # define R300_TX_DEPTHMASK_SHIFT 22 # define R300_TX_DEPTHMASK_MASK (0xf << 22) -# define R300_TX_MAX_MIP_LEVEL_SHIFT 26 -# define R300_TX_MAX_MIP_LEVEL_MASK (0xf << 26) # define R300_TX_SIZE_PROJECTED (1 << 30) # define R300_TX_PITCH_EN (1 << 31) # define R300_TX_WIDTH(x) ((x) << 0) @@ -1884,6 +1885,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_RGB_ADDR0(x) ((x) << 0) # define R300_RGB_ADDR1(x) ((x) << 6) # define R300_RGB_ADDR2(x) ((x) << 12) +# define R300_RGB_TARGET(x) ((x) << 29) #define R300_US_ALU_ALPHA_ADDR_0 0x47C0 # define R300_ALU_SRC0A_SHIFT 0 @@ -1901,9 +1903,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_ALU_DSTA_REG (1 << 23) # define R300_ALU_DSTA_OUTPUT (1 << 24) # define R300_ALU_DSTA_DEPTH (1 << 27) -# define R300_ALPHA_ADDR0(x) ((x) << 0) -# define R300_ALPHA_ADDR1(x) ((x) << 6) -# define R300_ALPHA_ADDR2(x) ((x) << 12) +# define R300_ALPHA_ADDR0(x) ((x) << 0) +# define R300_ALPHA_ADDR1(x) ((x) << 6) +# define R300_ALPHA_ADDR2(x) ((x) << 12) +# define R300_ALPHA_TARGET(x) ((x) << 25) #define R300_US_ALU_RGB_INST_0 0x48C0 # define R300_ALU_ARGC_SRC0C_XYZ 0 @@ -2416,6 +2419,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_Z_WRITE_ENABLE (1 << 2) # define R300_Z_SIGNED_COMPARE (1 << 3) # define R300_STENCIL_FRONT_BACK (1 << 4) +# define R500_STENCIL_ZSIGNED_MAGNITUDE (1 << 5) +# define R500_STENCIL_REFMASK_FRONT_BACK (1 << 6) #define R300_ZB_ZSTENCILCNTL 0x4f04 /* functions */ diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 6e2bcc62da..4c5fb405c6 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -20,12 +20,16 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/* r300_render: Vertex and index buffer primitive emission. Contains both + * HW TCL fastpath rendering, and SW TCL Draw-assisted rendering. */ + #include "draw/draw_context.h" #include "draw/draw_vbuf.h" #include "pipe/p_inlines.h" #include "util/u_memory.h" +#include "util/u_prim.h" #include "r300_cs.h" #include "r300_context.h" @@ -33,11 +37,12 @@ #include "r300_reg.h" #include "r300_render.h" #include "r300_state_derived.h" +#include "r300_vbo.h" /* r300_render: Vertex and index buffer primitive emission. */ #define R300_MAX_VBO_SIZE (1024 * 1024) -static uint32_t r300_translate_primitive(unsigned prim) +uint32_t r300_translate_primitive(unsigned prim) { switch (prim) { case PIPE_PRIM_POINTS: @@ -65,6 +70,93 @@ static uint32_t r300_translate_primitive(unsigned prim) } } +static void r300_emit_draw_arrays(struct r300_context *r300, + unsigned mode, + unsigned count) +{ + CS_LOCALS(r300); + + BEGIN_CS(4); + OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count); + OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0); + OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) | + r300_translate_primitive(mode)); + END_CS; +} + +static void r300_emit_draw_elements(struct r300_context *r300, + struct pipe_buffer* indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count) +{ + uint32_t count_dwords; + uint32_t offset_dwords = indexSize * start / sizeof(uint32_t); + CS_LOCALS(r300); + + /* XXX most of these are stupid */ + assert(indexSize == 4 || indexSize == 2); + assert((start * indexSize) % 4 == 0); + assert(offset_dwords == 0); + + BEGIN_CS(10); + OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, maxIndex); + OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 0); + if (indexSize == 4) { + count_dwords = count + start; + OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | + R300_VAP_VF_CNTL__INDEX_SIZE_32bit | + r300_translate_primitive(mode)); + } else { + count_dwords = (count + start + 1) / 2; + OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | + r300_translate_primitive(mode)); + } + + /* INDX_BUFFER is a truly special packet3. + * Unlike most other packet3, where the offset is after the count, + * the order is reversed, so the relocation ends up carrying the + * size of the indexbuf instead of the offset. + * + * XXX Fix offset + */ + OUT_CS_PKT3(R300_PACKET3_INDX_BUFFER, 2); + OUT_CS(R300_INDX_BUFFER_ONE_REG_WR | (R300_VAP_PORT_IDX0 >> 2) | + (0 << R300_INDX_BUFFER_SKIP_SHIFT)); + OUT_CS(offset_dwords); + OUT_CS_RELOC(indexBuffer, count_dwords, + RADEON_GEM_DOMAIN_GTT, 0, 0); + + END_CS; +} + + +static boolean r300_setup_vertex_buffers(struct r300_context *r300) +{ + struct pipe_vertex_buffer *vbuf = r300->vertex_buffer; + struct pipe_vertex_element *velem = r300->vertex_element; + +validate: + for (int i = 0; i < r300->vertex_element_count; i++) { + if (!r300->winsys->add_buffer(r300->winsys, + vbuf[velem[i].vertex_buffer_index].buffer, + RADEON_GEM_DOMAIN_GTT, 0)) { + r300->context.flush(&r300->context, 0, NULL); + goto validate; + } + } + + if (!r300->winsys->validate(r300->winsys)) { + r300->context.flush(&r300->context, 0, NULL); + return r300->winsys->validate(r300->winsys); + } + + return TRUE; +} + /* This is the fast-path drawing & emission for HW TCL. */ boolean r300_draw_range_elements(struct pipe_context* pipe, struct pipe_buffer* indexBuffer, @@ -76,82 +168,29 @@ boolean r300_draw_range_elements(struct pipe_context* pipe, unsigned count) { struct r300_context* r300 = r300_context(pipe); - CS_LOCALS(r300); - uint32_t prim = r300_translate_primitive(mode); - struct pipe_vertex_buffer* aos = r300->vertex_buffers; - unsigned aos_count = r300->vertex_buffer_count; - short* indices; - unsigned packet_size; - unsigned i; - bool invalid = FALSE; -validate: - for (i = 0; i < aos_count; i++) { - if (!r300->winsys->add_buffer(r300->winsys, aos[i].buffer, - RADEON_GEM_DOMAIN_GTT, 0)) { - pipe->flush(pipe, 0, NULL); - goto validate; - } + if (!u_trim_pipe_prim(mode, &count)) { + return FALSE; } - if (!r300->winsys->validate(r300->winsys)) { - pipe->flush(pipe, 0, NULL); - if (invalid) { - /* Well, hell. */ - debug_printf("r300: Stuck in validation loop, gonna quit now."); - exit(1); - } - invalid = TRUE; - goto validate; + + if (count > 65535) { + return FALSE; } - r300_emit_dirty_state(r300); + r300_update_derived_state(r300); - packet_size = (aos_count >> 1) * 3 + (aos_count & 1) * 2; - - BEGIN_CS(3 + packet_size + (aos_count * 2)); - OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, packet_size); - OUT_CS(aos_count); - for (i = 0; i < aos_count - 1; i += 2) { - OUT_CS(aos[i].stride | - (aos[i].stride << 8) | - (aos[i + 1].stride << 16) | - (aos[i + 1].stride << 24)); - OUT_CS(aos[i].buffer_offset + start * 4 * aos[i].stride); - OUT_CS(aos[i + 1].buffer_offset + start * 4 * aos[i + 1].stride); - } - if (aos_count & 1) { - OUT_CS(aos[i].stride | (aos[i].stride << 8)); - OUT_CS(aos[i].buffer_offset + start * 4 * aos[i].stride); - } - for (i = 0; i < aos_count; i++) { - OUT_CS_RELOC(aos[i].buffer, 0, RADEON_GEM_DOMAIN_GTT, 0, 0); + if (!r300_setup_vertex_buffers(r300)) { + return FALSE; } - END_CS; - if (indexBuffer) { - indices = (short*)pipe_buffer_map(pipe->screen, indexBuffer, - PIPE_BUFFER_USAGE_CPU_READ); + setup_index_buffer(r300, indexBuffer, indexSize); - /* Set the starting point. */ - indices += start; + r300_emit_dirty_state(r300); - BEGIN_CS(2 + (count+1)/2); - OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, (count + 1)/2); - OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | prim); - for (i = 0; i < count - 1; i += 2) { - OUT_CS(indices[i + 1] << 16 | indices[i]); - } - if (count % 2) { - OUT_CS(indices[count - 1]); - } - END_CS; - } else { - BEGIN_CS(2); - OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0); - OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) | - prim); - END_CS; - } + r300_emit_aos(r300, 0); + + r300_emit_draw_elements(r300, indexBuffer, indexSize, minIndex, maxIndex, + mode, start, count); return TRUE; } @@ -169,7 +208,29 @@ boolean r300_draw_elements(struct pipe_context* pipe, boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode, unsigned start, unsigned count) { - return pipe->draw_elements(pipe, NULL, 0, mode, start, count); + struct r300_context* r300 = r300_context(pipe); + + if (!u_trim_pipe_prim(mode, &count)) { + return FALSE; + } + + if (count > 65535) { + return FALSE; + } + + r300_update_derived_state(r300); + + if (!r300_setup_vertex_buffers(r300)) { + return FALSE; + } + + r300_emit_dirty_state(r300); + + r300_emit_aos(r300, start); + + r300_emit_draw_arrays(r300, mode, count); + + return TRUE; } /**************************************************************************** @@ -177,7 +238,44 @@ boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode, * keep these functions separated so that they are easier to locate. ~C. * ***************************************************************************/ -/* Draw-based drawing for SW TCL chipsets. */ +/* SW TCL arrays, using Draw. */ +boolean r300_swtcl_draw_arrays(struct pipe_context* pipe, + unsigned mode, + unsigned start, + unsigned count) +{ + struct r300_context* r300 = r300_context(pipe); + int i; + + if (!u_trim_pipe_prim(mode, &count)) { + return FALSE; + } + + for (i = 0; i < r300->vertex_buffer_count; i++) { + void* buf = pipe_buffer_map(pipe->screen, + r300->vertex_buffer[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_vertex_buffer(r300->draw, i, buf); + } + + draw_set_mapped_element_buffer(r300->draw, 0, NULL); + + draw_set_mapped_constant_buffer(r300->draw, + r300->shader_constants[PIPE_SHADER_VERTEX].constants, + r300->shader_constants[PIPE_SHADER_VERTEX].count * + (sizeof(float) * 4)); + + draw_arrays(r300->draw, mode, start, count); + + for (i = 0; i < r300->vertex_buffer_count; i++) { + pipe_buffer_unmap(pipe->screen, r300->vertex_buffer[i].buffer); + draw_set_mapped_vertex_buffer(r300->draw, i, NULL); + } + + return TRUE; +} + +/* SW TCL elements, using Draw. */ boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe, struct pipe_buffer* indexBuffer, unsigned indexSize, @@ -190,21 +288,21 @@ boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe, struct r300_context* r300 = r300_context(pipe); int i; + if (!u_trim_pipe_prim(mode, &count)) { + return FALSE; + } + for (i = 0; i < r300->vertex_buffer_count; i++) { void* buf = pipe_buffer_map(pipe->screen, - r300->vertex_buffers[i].buffer, + r300->vertex_buffer[i].buffer, PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_vertex_buffer(r300->draw, i, buf); } - if (indexBuffer) { - void* indices = pipe_buffer_map(pipe->screen, indexBuffer, - PIPE_BUFFER_USAGE_CPU_READ); - draw_set_mapped_element_buffer_range(r300->draw, indexSize, - minIndex, maxIndex, indices); - } else { - draw_set_mapped_element_buffer(r300->draw, 0, NULL); - } + void* indices = pipe_buffer_map(pipe->screen, indexBuffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_element_buffer_range(r300->draw, indexSize, + minIndex, maxIndex, indices); draw_set_mapped_constant_buffer(r300->draw, r300->shader_constants[PIPE_SHADER_VERTEX].constants, @@ -214,15 +312,13 @@ boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe, draw_arrays(r300->draw, mode, start, count); for (i = 0; i < r300->vertex_buffer_count; i++) { - pipe_buffer_unmap(pipe->screen, r300->vertex_buffers[i].buffer); + pipe_buffer_unmap(pipe->screen, r300->vertex_buffer[i].buffer); draw_set_mapped_vertex_buffer(r300->draw, i, NULL); } - if (indexBuffer) { - pipe_buffer_unmap(pipe->screen, indexBuffer); - draw_set_mapped_element_buffer_range(r300->draw, 0, start, - start + count - 1, NULL); - } + pipe_buffer_unmap(pipe->screen, indexBuffer); + draw_set_mapped_element_buffer_range(r300->draw, 0, start, + start + count - 1, NULL); return TRUE; } diff --git a/src/gallium/drivers/r300/r300_render.h b/src/gallium/drivers/r300/r300_render.h index 3d8f47ba75..da83069083 100644 --- a/src/gallium/drivers/r300/r300_render.h +++ b/src/gallium/drivers/r300/r300_render.h @@ -23,6 +23,8 @@ #ifndef R300_RENDER_H #define R300_RENDER_H +uint32_t r300_translate_primitive(unsigned prim); + boolean r300_draw_range_elements(struct pipe_context* pipe, struct pipe_buffer* indexBuffer, unsigned indexSize, @@ -40,6 +42,11 @@ boolean r300_draw_elements(struct pipe_context* pipe, boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode, unsigned start, unsigned count); +boolean r300_swtcl_draw_arrays(struct pipe_context* pipe, + unsigned mode, + unsigned start, + unsigned count); + boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe, struct pipe_buffer* indexBuffer, unsigned indexSize, diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 1d9f91d0f7..390b63007e 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -84,7 +84,9 @@ static int r300_get_param(struct pipe_screen* pscreen, int param) /* XXX I'm told this goes up to 16 */ return 8; case PIPE_CAP_NPOT_TEXTURES: - return 0; + /* XXX enable now to get GL2.1 API, + * figure out later how to emulate this */ + return 1; case PIPE_CAP_TWO_SIDED_STENCIL: if (r300screen->caps->is_r500) { return 1; @@ -119,32 +121,13 @@ static int r300_get_param(struct pipe_screen* pscreen, int param) case PIPE_CAP_TEXTURE_SHADOW_MAP: return 1; case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: - if (r300screen->caps->is_r500) { - /* 13 == 4096x4096 */ - return 13; - } else { - /* 12 == 2048x2048 */ - return 12; - } case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - /* So, technically, the limit is the same as above, but some math - * shows why this is silly. Assuming RGBA, 4cpp, we can see that - * 4096*4096*4096 = 64.0 GiB exactly, so it's not exactly - * practical. However, if at some point a game really wants this, - * then we can remove or raise this limit. */ - if (r300screen->caps->is_r500) { - /* 9 == 256x256x256 */ - return 9; - } else { - /* 8 == 128*128*128 */ - return 8; - } case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: if (r300screen->caps->is_r500) { - /* 13 == 4096x4096 */ + /* 13 == 4096 */ return 13; } else { - /* 12 == 2048x2048 */ + /* 12 == 2048 */ return 12; } case PIPE_CAP_TEXTURE_MIRROR_CLAMP: @@ -191,8 +174,8 @@ static float r300_get_paramf(struct pipe_screen* pscreen, int param) } } -static boolean check_tex_2d_format(enum pipe_format format, uint32_t usage, - boolean is_r500) +static boolean check_tex_format(enum pipe_format format, uint32_t usage, + boolean is_r500) { uint32_t retval = 0; @@ -216,6 +199,8 @@ static boolean check_tex_2d_format(enum pipe_format format, uint32_t usage, case PIPE_FORMAT_DXT3_RGBA: case PIPE_FORMAT_DXT5_RGBA: case PIPE_FORMAT_YCBCR: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_A8L8_UNORM: retval = usage & PIPE_TEXTURE_USAGE_SAMPLER; break; @@ -286,7 +271,6 @@ static boolean check_tex_2d_format(enum pipe_format format, uint32_t usage, return (retval >= usage); } -/* XXX moar targets */ static boolean r300_is_format_supported(struct pipe_screen* pscreen, enum pipe_format format, enum pipe_texture_target target, @@ -294,15 +278,13 @@ static boolean r300_is_format_supported(struct pipe_screen* pscreen, unsigned geom_flags) { switch (target) { + case PIPE_TEXTURE_1D: /* handle 1D textures as 2D ones */ case PIPE_TEXTURE_2D: - return check_tex_2d_format(format, tex_usage, - r300_screen(pscreen)->caps->is_r500); - case PIPE_TEXTURE_1D: case PIPE_TEXTURE_3D: case PIPE_TEXTURE_CUBE: - debug_printf("r300: Implementation error: Unsupported format " - "target: %d\n", target); - break; + return check_tex_format(format, tex_usage, + r300_screen(pscreen)->caps->is_r500); + default: debug_printf("r300: Fatal: This is not a format target: %d\n", target); @@ -322,22 +304,9 @@ r300_get_tex_transfer(struct pipe_screen *screen, { struct r300_texture *tex = (struct r300_texture *)texture; struct r300_transfer *trans; - unsigned offset = 0; /* in bytes */ + unsigned offset; - /* XXX Add support for these things */ - if (texture->target == PIPE_TEXTURE_CUBE) { - debug_printf("PIPE_TEXTURE_CUBE is not yet supported.\n"); - /* offset = tex->image_offset[level][face]; */ - } - else if (texture->target == PIPE_TEXTURE_3D) { - debug_printf("PIPE_TEXTURE_3D is not yet supported.\n"); - /* offset = tex->image_offset[level][zslice]; */ - } - else { - offset = tex->offset[level]; - assert(face == 0); - assert(zslice == 0); - } + offset = r300_texture_get_offset(tex, level, zslice, face); /* in bytes */ trans = CALLOC_STRUCT(r300_transfer); if (trans) { @@ -352,6 +321,12 @@ r300_get_tex_transfer(struct pipe_screen *screen, trans->transfer.nblocksy = texture->nblocksy[level]; trans->transfer.stride = r300_texture_get_stride(tex, level); trans->transfer.usage = usage; + + /* XXX not sure whether it's required to set these two, + the driver doesn't use them */ + trans->transfer.zslice = zslice; + trans->transfer.face = face; + trans->offset = offset; } return &trans->transfer; diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h index 41df31f670..1ce5ff3904 100644 --- a/src/gallium/drivers/r300/r300_screen.h +++ b/src/gallium/drivers/r300/r300_screen.h @@ -27,6 +27,8 @@ #include "r300_chipset.h" +struct r300_winsys; + struct r300_screen { /* Parent class */ struct pipe_screen screen; diff --git a/src/gallium/drivers/r300/r300_shader_semantics.h b/src/gallium/drivers/r300/r300_shader_semantics.h new file mode 100644 index 0000000000..85184e2cfd --- /dev/null +++ b/src/gallium/drivers/r300/r300_shader_semantics.h @@ -0,0 +1,64 @@ +/* + * Copyright 2009 Marek Olšák <maraeo@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef R300_SHADER_SEMANTICS_H +#define R300_SHADER_SEMANTICS_H + +#define ATTR_UNUSED (-1) +#define ATTR_COLOR_COUNT 2 +#define ATTR_GENERIC_COUNT 16 + +/* This structure contains information about what attributes are written by VS + * or read by FS. (but not both) It's much easier to work with than + * tgsi_shader_info. + * + * The variables contain indices to tgsi_shader_info semantics and those + * indices are nothing else than input/output register numbers. */ +struct r300_shader_semantics { + int pos; + int psize; + int color[ATTR_COLOR_COUNT]; + int bcolor[ATTR_COLOR_COUNT]; + int generic[ATTR_GENERIC_COUNT]; + int fog; +}; + +static INLINE void r300_shader_semantics_reset( + struct r300_shader_semantics* info) +{ + int i; + + info->pos = ATTR_UNUSED; + info->psize = ATTR_UNUSED; + info->fog = ATTR_UNUSED; + + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + info->color[i] = ATTR_UNUSED; + info->bcolor[i] = ATTR_UNUSED; + } + + for (i = 0; i < ATTR_GENERIC_COUNT; i++) { + info->generic[i] = ATTR_UNUSED; + } +} + +#endif diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index a3e1bc621a..7505353953 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -49,46 +49,49 @@ static void* r300_create_blend_state(struct pipe_context* pipe, { struct r300_blend_state* blend = CALLOC_STRUCT(r300_blend_state); + if (state->blend_enable) { - unsigned eqRGB = state->rgb_func; - unsigned srcRGB = state->rgb_src_factor; - unsigned dstRGB = state->rgb_dst_factor; - - unsigned eqA = state->alpha_func; - unsigned srcA = state->alpha_src_factor; - unsigned dstA = state->alpha_dst_factor; - - if (srcA != srcRGB || - dstA != dstRGB || - eqA != eqRGB) { - blend->alpha_blend_control = - r300_translate_blend_function(eqA) | - (r300_translate_blend_factor(srcA) << - R300_SRC_BLEND_SHIFT) | - (r300_translate_blend_factor(dstA) << - R300_DST_BLEND_SHIFT); - blend->blend_control |= R300_ALPHA_BLEND_ENABLE | - R300_SEPARATE_ALPHA_ENABLE; - } else { - blend->alpha_blend_control = R300_COMB_FCN_ADD_CLAMP | - (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) | - (R300_BLEND_GL_ZERO << R300_DST_BLEND_SHIFT); - } - } - if (state->blend_enable) { - /* XXX for now, always do separate alpha... - * is it faster to do it with one reg? */ - blend->blend_control |= R300_READ_ENABLE | - r300_translate_blend_function(state->rgb_func) | - (r300_translate_blend_factor(state->rgb_src_factor) << - R300_SRC_BLEND_SHIFT) | - (r300_translate_blend_factor(state->rgb_dst_factor) << - R300_DST_BLEND_SHIFT); - } else { - blend->blend_control = - R300_COMB_FCN_ADD_CLAMP | - (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) | - (R300_BLEND_GL_ZERO << R300_DST_BLEND_SHIFT); + unsigned eqRGB = state->rgb_func; + unsigned srcRGB = state->rgb_src_factor; + unsigned dstRGB = state->rgb_dst_factor; + + unsigned eqA = state->alpha_func; + unsigned srcA = state->alpha_src_factor; + unsigned dstA = state->alpha_dst_factor; + + /* despite the name, ALPHA_BLEND_ENABLE has nothing to do with alpha, + * this is just the crappy D3D naming */ + blend->blend_control = R300_ALPHA_BLEND_ENABLE | + r300_translate_blend_function(eqRGB) | + ( r300_translate_blend_factor(srcRGB) << R300_SRC_BLEND_SHIFT) | + ( r300_translate_blend_factor(dstRGB) << R300_DST_BLEND_SHIFT); + + /* optimization: some operations do not require the destination color */ + if (eqRGB == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MIN || + eqRGB == PIPE_BLEND_MAX || eqA == PIPE_BLEND_MAX || + dstRGB != PIPE_BLENDFACTOR_ZERO || + dstA != PIPE_BLENDFACTOR_ZERO || + srcRGB == PIPE_BLENDFACTOR_DST_COLOR || + srcRGB == PIPE_BLENDFACTOR_DST_ALPHA || + srcRGB == PIPE_BLENDFACTOR_INV_DST_COLOR || + srcRGB == PIPE_BLENDFACTOR_INV_DST_ALPHA || + srcA == PIPE_BLENDFACTOR_DST_COLOR || + srcA == PIPE_BLENDFACTOR_DST_ALPHA || + srcA == PIPE_BLENDFACTOR_INV_DST_COLOR || + srcA == PIPE_BLENDFACTOR_INV_DST_ALPHA) + blend->blend_control |= R300_READ_ENABLE; + + /* XXX implement the optimization with DISCARD_SRC_PIXELS*/ + /* XXX implement the optimization with SRC_ALPHA_?_NO_READ */ + + /* separate alpha */ + if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { + blend->blend_control |= R300_SEPARATE_ALPHA_ENABLE; + blend->alpha_blend_control = + r300_translate_blend_function(eqA) | + (r300_translate_blend_factor(srcA) << R300_SRC_BLEND_SHIFT) | + (r300_translate_blend_factor(dstA) << R300_DST_BLEND_SHIFT); + } } /* PIPE_LOGICOP_* don't need to be translated, fortunately. */ @@ -97,6 +100,20 @@ static void* r300_create_blend_state(struct pipe_context* pipe, (state->logicop_func) << R300_RB3D_ROPCNTL_ROP_SHIFT; } + /* Color Channel Mask */ + if (state->colormask & PIPE_MASK_R) { + blend->color_channel_mask |= RB3D_COLOR_CHANNEL_MASK_RED_MASK0; + } + if (state->colormask & PIPE_MASK_G) { + blend->color_channel_mask |= RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0; + } + if (state->colormask & PIPE_MASK_B) { + blend->color_channel_mask |= RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0; + } + if (state->colormask & PIPE_MASK_A) { + blend->color_channel_mask |= RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0; + } + if (state->dither) { blend->dither = R300_RB3D_DITHER_CTL_DITHER_MODE_LUT | R300_RB3D_DITHER_CTL_ALPHA_DITHER_MODE_LUT; @@ -122,25 +139,29 @@ static void r300_delete_blend_state(struct pipe_context* pipe, FREE(state); } +/* Convert float to 10bit integer */ +static unsigned float_to_fixed10(float f) +{ + return CLAMP((unsigned)(f * 1023.9f), 0, 1023); +} + /* Set blend color. * Setup both R300 and R500 registers, figure out later which one to write. */ static void r300_set_blend_color(struct pipe_context* pipe, const struct pipe_blend_color* color) { struct r300_context* r300 = r300_context(pipe); - ubyte ur, ug, ub, ua; - - ur = float_to_ubyte(color->color[0]); - ug = float_to_ubyte(color->color[1]); - ub = float_to_ubyte(color->color[2]); - ua = float_to_ubyte(color->color[3]); util_pack_color(color->color, PIPE_FORMAT_A8R8G8B8_UNORM, &r300->blend_color_state->blend_color); - /* XXX this is wrong */ - r300->blend_color_state->blend_color_red_alpha = ur | (ua << 16); - r300->blend_color_state->blend_color_green_blue = ub | (ug << 16); + /* XXX if FP16 blending is enabled, we should use the FP16 format */ + r300->blend_color_state->blend_color_red_alpha = + float_to_fixed10(color->color[0]) | + (float_to_fixed10(color->color[3]) << 16); + r300->blend_color_state->blend_color_green_blue = + float_to_fixed10(color->color[2]) | + (float_to_fixed10(color->color[1]) << 16); r300->dirty_state |= R300_NEW_BLEND_COLOR; } @@ -159,31 +180,6 @@ static void r300_set_clip_state(struct pipe_context* pipe, } } -static void - r300_set_constant_buffer(struct pipe_context* pipe, - uint shader, uint index, - const struct pipe_constant_buffer* buffer) -{ - struct r300_context* r300 = r300_context(pipe); - - /* This entire chunk of code seems ever-so-slightly baked. - * It's as if I've got pipe_buffer* matryoshkas... */ - if (buffer && buffer->buffer && buffer->buffer->size) { - void* map = pipe->winsys->buffer_map(pipe->winsys, buffer->buffer, - PIPE_BUFFER_USAGE_CPU_READ); - memcpy(r300->shader_constants[shader].constants, - map, buffer->buffer->size); - pipe->winsys->buffer_unmap(pipe->winsys, buffer->buffer); - - r300->shader_constants[shader].count = - buffer->buffer->size / (sizeof(float) * 4); - } else { - r300->shader_constants[shader].count = 0; - } - - r300->dirty_state |= R300_NEW_CONSTANTS; -} - /* Create a new depth, stencil, and alpha state based on the CSO dsa state. * * This contains the depth buffer, stencil buffer, alpha test, and such. @@ -193,6 +189,8 @@ static void* r300_create_dsa_state(struct pipe_context* pipe, const struct pipe_depth_stencil_alpha_state* state) { + struct r300_capabilities *caps = + r300_screen(r300_context(pipe)->context.screen)->caps; struct r300_dsa_state* dsa = CALLOC_STRUCT(r300_dsa_state); /* Depth test setup. */ @@ -237,9 +235,16 @@ static void* (r300_translate_stencil_op(state->stencil[1].zfail_op) << R300_S_BACK_ZFAIL_OP_SHIFT); - dsa->stencil_ref_bf = (state->stencil[1].ref_value) | - (state->stencil[1].valuemask << R300_STENCILMASK_SHIFT) | - (state->stencil[1].writemask << R300_STENCILWRITEMASK_SHIFT); + /* XXX it seems r3xx doesn't support STENCILREFMASK_BF */ + if (caps->is_r500) + { + dsa->z_buffer_control |= R500_STENCIL_REFMASK_FRONT_BACK; + dsa->stencil_ref_bf = (state->stencil[1].ref_value) | + (state->stencil[1].valuemask << + R300_STENCILMASK_SHIFT) | + (state->stencil[1].writemask << + R300_STENCILWRITEMASK_SHIFT); + } } } @@ -248,8 +253,13 @@ static void* dsa->alpha_function = r300_translate_alpha_function(state->alpha.func) | R300_FG_ALPHA_FUNC_ENABLE; - dsa->alpha_reference = CLAMP(state->alpha.ref_value * 1023.0f, - 0, 1023); + + /* XXX figure out why emitting 10bit alpha ref causes CS to dump */ + /* always use 8bit alpha ref */ + dsa->alpha_function |= float_to_ubyte(state->alpha.ref_value); + + if (caps->is_r500) + dsa->alpha_function |= R500_FG_ALPHA_FUNC_8BIT; } return (void*)dsa; @@ -285,7 +295,9 @@ static void { struct r300_context* r300 = r300_context(pipe); - draw_flush(r300->draw); + if (r300->draw) { + draw_flush(r300->draw); + } r300->framebuffer_state = *state; @@ -324,7 +336,7 @@ static void r300_bind_fs_state(struct pipe_context* pipe, void* shader) r300->fs = fs; - r300->dirty_state |= R300_NEW_FRAGMENT_SHADER; + r300->dirty_state |= R300_NEW_FRAGMENT_SHADER | R300_NEW_FRAGMENT_SHADER_CONSTANTS; } /* Delete fragment shader state. */ @@ -386,25 +398,52 @@ static void* r300_create_rs_state(struct pipe_context* pipe, rs->line_control = pack_float_16_6x(state->line_width) | R300_GA_LINE_CNTL_END_TYPE_COMP; + /* XXX I think there is something wrong with the polygon mode, + * XXX re-test when r300g is in a better shape */ + + /* Enable polygon mode */ + if (state->fill_cw != PIPE_POLYGON_MODE_FILL || + state->fill_ccw != PIPE_POLYGON_MODE_FILL) { + rs->polygon_mode = R300_GA_POLY_MODE_DUAL; + } + /* Radeons don't think in "CW/CCW", they think in "front/back". */ if (state->front_winding == PIPE_WINDING_CW) { rs->cull_mode = R300_FRONT_FACE_CW; + /* Polygon offset */ if (state->offset_cw) { rs->polygon_offset_enable |= R300_FRONT_ENABLE; } if (state->offset_ccw) { rs->polygon_offset_enable |= R300_BACK_ENABLE; } + + /* Polygon mode */ + if (rs->polygon_mode) { + rs->polygon_mode |= + r300_translate_polygon_mode_front(state->fill_cw); + rs->polygon_mode |= + r300_translate_polygon_mode_back(state->fill_ccw); + } } else { rs->cull_mode = R300_FRONT_FACE_CCW; + /* Polygon offset */ if (state->offset_ccw) { rs->polygon_offset_enable |= R300_FRONT_ENABLE; } if (state->offset_cw) { rs->polygon_offset_enable |= R300_BACK_ENABLE; } + + /* Polygon mode */ + if (rs->polygon_mode) { + rs->polygon_mode |= + r300_translate_polygon_mode_front(state->fill_ccw); + rs->polygon_mode |= + r300_translate_polygon_mode_back(state->fill_cw); + } } if (state->front_winding & state->cull_mode) { rs->cull_mode |= R300_CULL_FRONT; @@ -448,10 +487,13 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state) struct r300_context* r300 = r300_context(pipe); struct r300_rs_state* rs = (struct r300_rs_state*)state; - draw_flush(r300->draw); - draw_set_rasterizer_state(r300->draw, &rs->rs); + if (r300->draw) { + draw_flush(r300->draw); + draw_set_rasterizer_state(r300->draw, &rs->rs); + } r300->rs_state = rs; + /* XXX Clean these up when we move to atom emits */ r300->dirty_state |= R300_NEW_RASTERIZER; r300->dirty_state |= R300_NEW_RS_BLOCK; r300->dirty_state |= R300_NEW_SCISSOR; @@ -481,6 +523,11 @@ static void* state->mag_img_filter, state->min_mip_filter); + /* Unfortunately, r300-r500 don't support floating-point mipmap lods. */ + /* We must pass these to the emit function to clamp them properly. */ + sampler->min_lod = MAX2((unsigned)state->min_lod, 0); + sampler->max_lod = MAX2((unsigned)ceilf(state->max_lod), 0); + lod_bias = CLAMP((int)(state->lod_bias * 32), -(1 << 9), (1 << 9) - 1); sampler->filter1 |= lod_bias << R300_LOD_BIAS_SHIFT; @@ -529,18 +576,28 @@ static void r300_set_sampler_textures(struct pipe_context* pipe, struct pipe_texture** texture) { struct r300_context* r300 = r300_context(pipe); + boolean is_r500 = r300_screen(r300->context.screen)->caps->is_r500; int i; /* XXX magic num */ if (count > 8) { return; } + + r300->context.flush(&r300->context, 0, NULL); for (i = 0; i < count; i++) { if (r300->textures[i] != (struct r300_texture*)texture[i]) { pipe_texture_reference((struct pipe_texture**)&r300->textures[i], texture[i]); r300->dirty_state |= (R300_NEW_TEXTURE << i); + + /* R300-specific - set the texrect factor in a fragment shader */ + if (!is_r500 && r300->textures[i]->is_npot) { + /* XXX It would be nice to re-emit just 1 constant, + * XXX not all of them */ + r300->dirty_state |= R300_NEW_FRAGMENT_SHADER_CONSTANTS; + } } } @@ -589,17 +646,14 @@ static void r300_set_viewport_state(struct pipe_context* pipe, r300->viewport_state->vte_control = R300_VTX_W0_FMT; if (state->scale[0] != 1.0f) { - assert(state->scale[0] != 0.0f); r300->viewport_state->xscale = state->scale[0]; r300->viewport_state->vte_control |= R300_VPORT_X_SCALE_ENA; } if (state->scale[1] != 1.0f) { - assert(state->scale[1] != 0.0f); r300->viewport_state->yscale = state->scale[1]; r300->viewport_state->vte_control |= R300_VPORT_Y_SCALE_ENA; } if (state->scale[2] != 1.0f) { - assert(state->scale[2] != 0.0f); r300->viewport_state->zscale = state->scale[2]; r300->viewport_state->vte_control |= R300_VPORT_Z_SCALE_ENA; } @@ -625,13 +679,16 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); - memcpy(r300->vertex_buffers, buffers, + memcpy(r300->vertex_buffer, buffers, sizeof(struct pipe_vertex_buffer) * count); - r300->vertex_buffer_count = count; - draw_flush(r300->draw); - draw_set_vertex_buffers(r300->draw, count, buffers); + if (r300->draw) { + draw_flush(r300->draw); + draw_set_vertex_buffers(r300->draw, count, buffers); + } + + r300->dirty_state |= R300_NEW_VERTEX_FORMAT; } static void r300_set_vertex_elements(struct pipe_context* pipe, @@ -640,8 +697,15 @@ static void r300_set_vertex_elements(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); - draw_flush(r300->draw); - draw_set_vertex_elements(r300->draw, count, elements); + memcpy(r300->vertex_element, + elements, + sizeof(struct pipe_vertex_element) * count); + r300->vertex_element_count = count; + + if (r300->draw) { + draw_flush(r300->draw); + draw_set_vertex_elements(r300->draw, count, elements); + } } static void* r300_create_vs_state(struct pipe_context* pipe, @@ -657,9 +721,6 @@ static void* r300_create_vs_state(struct pipe_context* pipe, tgsi_scan_shader(shader->tokens, &vs->info); - /* Appease Draw. */ - vs->draw = draw_create_vertex_shader(r300->draw, shader); - return (void*)vs; } else { return draw_create_vertex_shader(r300->draw, shader); @@ -670,8 +731,6 @@ static void r300_bind_vs_state(struct pipe_context* pipe, void* shader) { struct r300_context* r300 = r300_context(pipe); - draw_flush(r300->draw); - if (r300_screen(pipe->screen)->caps->has_tcl) { struct r300_vertex_shader* vs = (struct r300_vertex_shader*)shader; @@ -682,10 +741,10 @@ static void r300_bind_vs_state(struct pipe_context* pipe, void* shader) r300_translate_vertex_shader(r300, vs); } - draw_bind_vertex_shader(r300->draw, vs->draw); r300->vs = vs; - r300->dirty_state |= R300_NEW_VERTEX_SHADER; + r300->dirty_state |= R300_NEW_VERTEX_SHADER | R300_NEW_VERTEX_SHADER_CONSTANTS; } else { + draw_flush(r300->draw); draw_bind_vertex_shader(r300->draw, (struct draw_vertex_shader*)shader); } @@ -699,7 +758,6 @@ static void r300_delete_vs_state(struct pipe_context* pipe, void* shader) struct r300_vertex_shader* vs = (struct r300_vertex_shader*)shader; rc_constants_destroy(&vs->code.constants); - draw_delete_vertex_shader(r300->draw, vs->draw); FREE((void*)vs->state.tokens); FREE(shader); } else { @@ -708,6 +766,31 @@ static void r300_delete_vs_state(struct pipe_context* pipe, void* shader) } } +static void r300_set_constant_buffer(struct pipe_context *pipe, + uint shader, uint index, + const struct pipe_constant_buffer *buf) +{ + struct r300_context* r300 = r300_context(pipe); + void *mapped; + + if (buf == NULL || buf->buffer->size == 0 || + (mapped = pipe_buffer_map(pipe->screen, buf->buffer, PIPE_BUFFER_USAGE_CPU_READ)) == NULL) + { + r300->shader_constants[shader].count = 0; + return; + } + + assert((buf->buffer->size % 4 * sizeof(float)) == 0); + memcpy(r300->shader_constants[shader].constants, mapped, buf->buffer->size); + r300->shader_constants[shader].count = buf->buffer->size / (4 * sizeof(float)); + pipe_buffer_unmap(pipe->screen, buf->buffer); + + if (shader == PIPE_SHADER_VERTEX) + r300->dirty_state |= R300_NEW_VERTEX_SHADER_CONSTANTS; + else if (shader == PIPE_SHADER_FRAGMENT) + r300->dirty_state |= R300_NEW_FRAGMENT_SHADER_CONSTANTS; +} + void r300_init_state_functions(struct r300_context* r300) { r300->context.create_blend_state = r300_create_blend_state; @@ -739,10 +822,10 @@ void r300_init_state_functions(struct r300_context* r300) r300->context.delete_rasterizer_state = r300_delete_rs_state; r300->context.create_sampler_state = r300_create_sampler_state; - r300->context.bind_sampler_states = r300_bind_sampler_states; + r300->context.bind_fragment_sampler_states = r300_bind_sampler_states; r300->context.delete_sampler_state = r300_delete_sampler_state; - r300->context.set_sampler_textures = r300_set_sampler_textures; + r300->context.set_fragment_sampler_textures = r300_set_sampler_textures; r300->context.set_scissor_state = r300_set_scissor_state; diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 7d000e9e2d..cd969d633b 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -1,5 +1,6 @@ /* * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * Copyright 2009 Marek Olšák <maraeo@gmail.com> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -28,6 +29,7 @@ #include "r300_context.h" #include "r300_fs.h" #include "r300_screen.h" +#include "r300_shader_semantics.h" #include "r300_state_derived.h" #include "r300_state_inlines.h" #include "r300_vs.h" @@ -47,8 +49,8 @@ struct r300_shader_derived_value { unsigned r300_shader_key_hash(void* key) { struct r300_shader_key* shader_key = (struct r300_shader_key*)key; - unsigned vs = (unsigned)shader_key->vs; - unsigned fs = (unsigned)shader_key->fs; + unsigned vs = (intptr_t)shader_key->vs; + unsigned fs = (intptr_t)shader_key->fs; return (vs << 16) | (fs & 0xffff); } @@ -61,200 +63,143 @@ int r300_shader_key_compare(void* key1, void* key2) { (shader_key1->fs == shader_key2->fs); } -/* Set up the vs_tab and routes. */ -static void r300_vs_tab_routes(struct r300_context* r300, - struct r300_vertex_format* vformat) +static void r300_draw_emit_attrib(struct r300_context* r300, + enum attrib_emit emit, + enum interp_mode interp, + int index) { - struct r300_screen* r300screen = r300_screen(r300->context.screen); - struct vertex_info* vinfo = &vformat->vinfo; - int* tab = vformat->vs_tab; - boolean pos = FALSE, psize = FALSE, fog = FALSE; - int i, texs = 0, cols = 0; - struct tgsi_shader_info* info; + struct tgsi_shader_info* info = &r300->vs->info; + int output; - if (r300screen->caps->has_tcl) { - /* Use vertex shader to determine required routes. */ - info = &r300->vs->info; + output = draw_find_vs_output(r300->draw, + info->output_semantic_name[index], + info->output_semantic_index[index]); + draw_emit_vertex_attr(&r300->vertex_info->vinfo, emit, interp, output); +} + +static void r300_draw_emit_all_attribs(struct r300_context* r300) +{ + struct r300_shader_semantics* vs_outputs = &r300->vs->outputs; + int i, gen_count; + + /* Position. */ + if (vs_outputs->pos != ATTR_UNUSED) { + r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE, + vs_outputs->pos); } else { - /* Use fragment shader to determine required routes. */ - info = &r300->fs->info; + assert(0); } - assert(info->num_inputs <= 16); - - if (!r300screen->caps->has_tcl || !r300->rs_state->enable_vte) - { - for (i = 0; i < info->num_inputs; i++) { - switch (r300->vs->code.inputs[i]) { - case TGSI_SEMANTIC_POSITION: - pos = TRUE; - tab[i] = 0; - break; - case TGSI_SEMANTIC_COLOR: - tab[i] = 2 + cols; - cols++; - break; - case TGSI_SEMANTIC_PSIZE: - assert(psize == FALSE); - psize = TRUE; - tab[i] = 15; - break; - case TGSI_SEMANTIC_FOG: - assert(fog == FALSE); - fog = TRUE; - /* Fall through */ - case TGSI_SEMANTIC_GENERIC: - tab[i] = 6 + texs; - texs++; - break; - default: - debug_printf("r300: Unknown vertex input %d\n", - info->input_semantic_name[i]); - break; - } - } + /* Point size. */ + if (vs_outputs->psize != ATTR_UNUSED) { + r300_draw_emit_attrib(r300, EMIT_1F_PSIZE, INTERP_POS, + vs_outputs->psize); } - else - { - /* Just copy vert attribs over as-is. */ - for (i = 0; i < info->num_inputs; i++) { - tab[i] = i; - } - for (i = 0; i < info->num_outputs; i++) { - switch (info->output_semantic_name[i]) { - case TGSI_SEMANTIC_POSITION: - pos = TRUE; - break; - case TGSI_SEMANTIC_COLOR: - cols++; - break; - case TGSI_SEMANTIC_PSIZE: - psize = TRUE; - break; - case TGSI_SEMANTIC_FOG: - fog = TRUE; - /* Fall through */ - case TGSI_SEMANTIC_GENERIC: - texs++; - break; - default: - debug_printf("r300: Unknown vertex output %d\n", - info->output_semantic_name[i]); - break; - } + /* Colors. */ + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + if (vs_outputs->color[i] != ATTR_UNUSED) { + r300_draw_emit_attrib(r300, EMIT_4F, INTERP_LINEAR, + vs_outputs->color[i]); } } - /* XXX magic */ - assert(texs <= 8); + /* XXX Back-face colors. */ - /* Do the actual vertex_info setup. - * - * vertex_info has four uints of hardware-specific data in it. - * vinfo.hwfmt[0] is R300_VAP_VTX_STATE_CNTL - * vinfo.hwfmt[1] is R300_VAP_VSM_VTX_ASSM - * vinfo.hwfmt[2] is R300_VAP_OUTPUT_VTX_FMT_0 - * vinfo.hwfmt[3] is R300_VAP_OUTPUT_VTX_FMT_1 */ - - vinfo->hwfmt[0] = 0x5555; /* XXX this is classic Mesa bonghits */ - - /* We need to add vertex position attribute only for SW TCL case, - * for HW TCL case it could be generated by vertex shader */ - if (!pos && !r300screen->caps->has_tcl) { - debug_printf("r300: Forcing vertex position attribute emit...\n"); - /* Make room for the position attribute - * at the beginning of the tab. */ - for (i = 15; i > 0; i--) { - tab[i] = tab[i-1]; + /* Texture coordinates. */ + gen_count = 0; + for (i = 0; i < ATTR_GENERIC_COUNT; i++) { + if (vs_outputs->generic[i] != ATTR_UNUSED) { + r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE, + vs_outputs->generic[i]); + gen_count++; } - tab[0] = 0; - } - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, - draw_find_vs_output(r300->draw, TGSI_SEMANTIC_POSITION, 0)); - vinfo->hwfmt[1] |= R300_INPUT_CNTL_POS; - vinfo->hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT; - - if (psize) { - draw_emit_vertex_attr(vinfo, EMIT_1F_PSIZE, INTERP_POS, - draw_find_vs_output(r300->draw, TGSI_SEMANTIC_PSIZE, 0)); - vinfo->hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT; } - for (i = 0; i < cols; i++) { - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_LINEAR, - draw_find_vs_output(r300->draw, TGSI_SEMANTIC_COLOR, i)); - vinfo->hwfmt[1] |= R300_INPUT_CNTL_COLOR; - vinfo->hwfmt[2] |= (R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i); + /* Fog coordinates. */ + if (vs_outputs->fog != ATTR_UNUSED) { + r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE, + vs_outputs->fog); + gen_count++; } - /* Init i right here, increment it if fog is enabled. - * This gets around a double-increment problem. */ - i = 0; + /* XXX magic */ + assert(gen_count <= 8); +} - if (fog) { - i++; - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, - draw_find_vs_output(r300->draw, TGSI_SEMANTIC_FOG, 0)); - vinfo->hwfmt[1] |= (R300_INPUT_CNTL_TC0 << i); - vinfo->hwfmt[3] |= (4 << (3 * i)); - } +/* Update the PSC tables. */ +static void r300_vertex_psc(struct r300_context* r300) +{ + struct r300_vertex_info *vformat = r300->vertex_info; + uint16_t type, swizzle; + enum pipe_format format; + unsigned i; - for (; i < texs; i++) { - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, - draw_find_vs_output(r300->draw, TGSI_SEMANTIC_GENERIC, i)); - vinfo->hwfmt[1] |= (R300_INPUT_CNTL_TC0 << i); - vinfo->hwfmt[3] |= (4 << (3 * i)); - } + /* Vertex shaders have no semantics on their inputs, + * so PSC should just route stuff based on the vertex elements, + * and not on attrib information. */ + DBG(r300, DBG_DRAW, "r300: vs expects %d attribs, routing %d elements" + " in psc\n", + r300->vs->info.num_inputs, + r300->vertex_element_count); - /* Handle the case where the vertex shader will be generating some of - * the attribs based on its inputs. */ - if (r300screen->caps->has_tcl && - info->num_inputs < info->num_outputs) { - vinfo->num_attribs = info->num_inputs; + for (i = 0; i < r300->vertex_element_count; i++) { + format = r300->vertex_element[i].src_format; + + type = r300_translate_vertex_data_type(format) | + (i << R300_DST_VEC_LOC_SHIFT); + swizzle = r300_translate_vertex_data_swizzle(format); + + if (i % 2) { + vformat->vap_prog_stream_cntl[i >> 1] |= type << 16; + vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16; + } else { + vformat->vap_prog_stream_cntl[i >> 1] |= type; + vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle; + } } - draw_compute_vertex_size(vinfo); + + assert(i <= 15); + + /* Set the last vector in the PSC. */ + if (i) { + i -= 1; + } + vformat->vap_prog_stream_cntl[i >> 1] |= + (R300_LAST_VEC << (i & 1 ? 16 : 0)); } -/* Update the PSC tables. */ -static void r300_vertex_psc(struct r300_context* r300, - struct r300_vertex_format* vformat) +/* Update the PSC tables for SW TCL, using Draw. */ +static void r300_swtcl_vertex_psc(struct r300_context* r300) { - struct r300_screen* r300screen = r300_screen(r300->context.screen); + struct r300_vertex_info *vformat = r300->vertex_info; struct vertex_info* vinfo = &vformat->vinfo; - int* tab = vformat->vs_tab; uint16_t type, swizzle; enum pipe_format format; unsigned i, attrib_count; + int* vs_output_tab = r300->vs->output_stream_loc_swtcl; - /* Vertex shaders have no semantics on their inputs, - * so PSC should just route stuff based on their info, - * and not on attrib information. */ - if (r300screen->caps->has_tcl) { - attrib_count = r300->vs->info.num_inputs; - DBG(r300, DBG_DRAW, "r300: routing %d attribs in psc for vs\n", - attrib_count); - } else { - attrib_count = vinfo->num_attribs; - DBG(r300, DBG_DRAW, "r300: attrib count: %d\n", attrib_count); - for (i = 0; i < attrib_count; i++) { - DBG(r300, DBG_DRAW, "r300: attrib: offset %d, interp %d, size %d," - " tab %d\n", vinfo->attrib[i].src_index, - vinfo->attrib[i].interp_mode, vinfo->attrib[i].emit, - tab[i]); - } + /* For each Draw attribute, route it to the fragment shader according + * to the vs_output_tab. */ + attrib_count = vinfo->num_attribs; + DBG(r300, DBG_DRAW, "r300: attrib count: %d\n", attrib_count); + for (i = 0; i < attrib_count; i++) { + DBG(r300, DBG_DRAW, "r300: attrib: offset %d, interp %d, size %d," + " vs_output_tab %d\n", vinfo->attrib[i].src_index, + vinfo->attrib[i].interp_mode, vinfo->attrib[i].emit, + vs_output_tab[i]); } for (i = 0; i < attrib_count; i++) { /* Make sure we have a proper destination for our attribute. */ - assert(tab[i] != -1); + assert(vs_output_tab[i] != -1); format = draw_translate_vinfo_format(vinfo->attrib[i].emit); /* Obtain the type of data in this attribute. */ type = r300_translate_vertex_data_type(format) | - tab[i] << R300_DST_VEC_LOC_SHIFT; + vs_output_tab[i] << R300_DST_VEC_LOC_SHIFT; /* Obtain the swizzle for this attribute. Note that the default * swizzle in the hardware is not XYZW! */ @@ -263,12 +208,10 @@ static void r300_vertex_psc(struct r300_context* r300, /* Add the attribute to the PSC table. */ if (i & 1) { vformat->vap_prog_stream_cntl[i >> 1] |= type << 16; - vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16; } else { - vformat->vap_prog_stream_cntl[i >> 1] |= type << 0; - - vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 0; + vformat->vap_prog_stream_cntl[i >> 1] |= type; + vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle; } } @@ -280,188 +223,204 @@ static void r300_vertex_psc(struct r300_context* r300, (R300_LAST_VEC << (i & 1 ? 16 : 0)); } -/* Set up the mappings from GB to US, for RS block. */ -static void r300_update_fs_tab(struct r300_context* r300, - struct r300_vertex_format* vformat) +static void r300_rs_col(struct r300_rs_block* rs, int id, int ptr, + boolean swizzle_0001) +{ + rs->ip[id] |= R300_RS_COL_PTR(ptr); + if (swizzle_0001) { + rs->ip[id] |= R300_RS_COL_FMT(R300_RS_COL_FMT_0001); + } else { + rs->ip[id] |= R300_RS_COL_FMT(R300_RS_COL_FMT_RGBA); + } + rs->inst[id] |= R300_RS_INST_COL_ID(id); +} + +static void r300_rs_col_write(struct r300_rs_block* rs, int id, int fp_offset) { - struct tgsi_shader_info* info = &r300->fs->info; - int i, cols = 0, texs = 0, cols_emitted = 0; - int* tab = vformat->fs_tab; + rs->inst[id] |= R300_RS_INST_COL_CN_WRITE | + R300_RS_INST_COL_ADDR(fp_offset); +} - for (i = 0; i < 16; i++) { - tab[i] = -1; +static void r300_rs_tex(struct r300_rs_block* rs, int id, int ptr, + boolean swizzle_X001) +{ + if (swizzle_X001) { + rs->ip[id] |= R300_RS_TEX_PTR(ptr*4) | + R300_RS_SEL_S(R300_RS_SEL_C0) | + R300_RS_SEL_T(R300_RS_SEL_K0) | + R300_RS_SEL_R(R300_RS_SEL_K0) | + R300_RS_SEL_Q(R300_RS_SEL_K1); + } else { + rs->ip[id] |= R300_RS_TEX_PTR(ptr*4) | + R300_RS_SEL_S(R300_RS_SEL_C0) | + R300_RS_SEL_T(R300_RS_SEL_C1) | + R300_RS_SEL_R(R300_RS_SEL_C2) | + R300_RS_SEL_Q(R300_RS_SEL_C3); } + rs->inst[id] |= R300_RS_INST_TEX_ID(id); +} - assert(info->num_inputs <= 16); - for (i = 0; i < info->num_inputs; i++) { - switch (info->input_semantic_name[i]) { - case TGSI_SEMANTIC_COLOR: - tab[i] = INTERP_LINEAR; - cols++; - break; - case TGSI_SEMANTIC_POSITION: - case TGSI_SEMANTIC_PSIZE: - debug_printf("r300: Implementation error: Can't use " - "pos attribs in fragshader yet!\n"); - /* Pass through for now */ - case TGSI_SEMANTIC_FOG: - case TGSI_SEMANTIC_GENERIC: - tab[i] = INTERP_PERSPECTIVE; - break; - default: - debug_printf("r300: Unknown vertex input %d\n", - info->input_semantic_name[i]); - break; - } +static void r300_rs_tex_write(struct r300_rs_block* rs, int id, int fp_offset) +{ + rs->inst[id] |= R300_RS_INST_TEX_CN_WRITE | + R300_RS_INST_TEX_ADDR(fp_offset); +} + +static void r500_rs_col(struct r300_rs_block* rs, int id, int ptr, + boolean swizzle_0001) +{ + rs->ip[id] |= R500_RS_COL_PTR(ptr); + if (swizzle_0001) { + rs->ip[id] |= R500_RS_COL_FMT(R300_RS_COL_FMT_0001); + } else { + rs->ip[id] |= R500_RS_COL_FMT(R300_RS_COL_FMT_RGBA); } + rs->inst[id] |= R500_RS_INST_COL_ID(id); +} - /* Now that we know where everything is... */ - DBG(r300, DBG_DRAW, "r300: fp input count: %d\n", info->num_inputs); - for (i = 0; i < info->num_inputs; i++) { - switch (tab[i]) { - case INTERP_LINEAR: - DBG(r300, DBG_DRAW, "r300: attrib: " - "stack offset %d, color, tab %d\n", - i, cols_emitted); - tab[i] = cols_emitted; - cols_emitted++; - break; - case INTERP_PERSPECTIVE: - DBG(r300, DBG_DRAW, "r300: attrib: " - "stack offset %d, texcoord, tab %d\n", - i, cols + texs); - tab[i] = cols + texs; - texs++; - break; - case -1: - debug_printf("r300: Implementation error: Bad fp interp!\n"); - default: - break; - } +static void r500_rs_col_write(struct r300_rs_block* rs, int id, int fp_offset) +{ + rs->inst[id] |= R500_RS_INST_COL_CN_WRITE | + R500_RS_INST_COL_ADDR(fp_offset); +} + +static void r500_rs_tex(struct r300_rs_block* rs, int id, int ptr, + boolean swizzle_X001) +{ + int rs_tex_comp = ptr*4; + + if (swizzle_X001) { + rs->ip[id] |= R500_RS_SEL_S(rs_tex_comp) | + R500_RS_SEL_T(R500_RS_IP_PTR_K0) | + R500_RS_SEL_R(R500_RS_IP_PTR_K0) | + R500_RS_SEL_Q(R500_RS_IP_PTR_K1); + } else { + rs->ip[id] |= R500_RS_SEL_S(rs_tex_comp) | + R500_RS_SEL_T(rs_tex_comp + 1) | + R500_RS_SEL_R(rs_tex_comp + 2) | + R500_RS_SEL_Q(rs_tex_comp + 3); } + rs->inst[id] |= R500_RS_INST_TEX_ID(id); +} +static void r500_rs_tex_write(struct r300_rs_block* rs, int id, int fp_offset) +{ + rs->inst[id] |= R500_RS_INST_TEX_CN_WRITE | + R500_RS_INST_TEX_ADDR(fp_offset); } -/* Set up the RS block. This is the part of the chipset that actually does - * the rasterization of vertices into fragments. This is also the part of the - * chipset that locks up if any part of it is even slightly wrong. */ +/* Set up the RS block. + * + * This is the part of the chipset that actually does the rasterization + * of vertices into fragments. This is also the part of the chipset that + * locks up if any part of it is even slightly wrong. */ static void r300_update_rs_block(struct r300_context* r300, - struct r300_rs_block* rs) + struct r300_shader_semantics* vs_outputs, + struct r300_shader_semantics* fs_inputs) { - struct tgsi_shader_info* info = &r300->fs->info; - int col_count = 0, fp_offset = 0, i, tex_count = 0; - int rs_tex_comp = 0; + struct r300_rs_block* rs = r300->rs_block; + int i, col_count = 0, tex_count = 0, fp_offset = 0; + void (*rX00_rs_col)(struct r300_rs_block*, int, int, boolean); + void (*rX00_rs_col_write)(struct r300_rs_block*, int, int); + void (*rX00_rs_tex)(struct r300_rs_block*, int, int, boolean); + void (*rX00_rs_tex_write)(struct r300_rs_block*, int, int); if (r300_screen(r300->context.screen)->caps->is_r500) { - for (i = 0; i < info->num_inputs; i++) { - switch (info->input_semantic_name[i]) { - case TGSI_SEMANTIC_COLOR: - rs->ip[col_count] |= - R500_RS_COL_PTR(col_count) | - R500_RS_COL_FMT(R300_RS_COL_FMT_RGBA); - col_count++; - break; - case TGSI_SEMANTIC_GENERIC: - rs->ip[tex_count] |= - R500_RS_SEL_S(rs_tex_comp) | - R500_RS_SEL_T(rs_tex_comp + 1) | - R500_RS_SEL_R(rs_tex_comp + 2) | - R500_RS_SEL_Q(rs_tex_comp + 3); - tex_count++; - rs_tex_comp += 4; - break; - default: - break; - } - } + rX00_rs_col = r500_rs_col; + rX00_rs_col_write = r500_rs_col_write; + rX00_rs_tex = r500_rs_tex; + rX00_rs_tex_write = r500_rs_tex_write; + } else { + rX00_rs_col = r300_rs_col; + rX00_rs_col_write = r300_rs_col_write; + rX00_rs_tex = r300_rs_tex; + rX00_rs_tex_write = r300_rs_tex_write; + } - /* Rasterize at least one color, or bad things happen. */ - if ((col_count == 0) && (tex_count == 0)) { - rs->ip[0] |= R500_RS_COL_FMT(R300_RS_COL_FMT_0001); + /* Rasterize colors. */ + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + if (vs_outputs->color[i] != ATTR_UNUSED) { + /* Always rasterize if it's written by the VS, + * otherwise it locks up. */ + rX00_rs_col(rs, col_count, i, FALSE); + + /* Write it to the FS input register if it's used by the FS. */ + if (fs_inputs->color[i] != ATTR_UNUSED) { + rX00_rs_col_write(rs, col_count, fp_offset); + fp_offset++; + } col_count++; - } - - for (i = 0; i < tex_count; i++) { - rs->inst[i] |= R500_RS_INST_TEX_ID(i) | - R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_offset); - fp_offset++; - } - - for (i = 0; i < col_count; i++) { - rs->inst[i] |= R500_RS_INST_COL_ID(i) | - R500_RS_INST_COL_CN_WRITE | R500_RS_INST_COL_ADDR(fp_offset); - fp_offset++; - } - } else { - for (i = 0; i < info->num_inputs; i++) { - switch (info->input_semantic_name[i]) { - case TGSI_SEMANTIC_COLOR: - rs->ip[col_count] |= - R300_RS_COL_PTR(col_count) | - R300_RS_COL_FMT(R300_RS_COL_FMT_RGBA); - col_count++; - break; - case TGSI_SEMANTIC_GENERIC: - rs->ip[tex_count] |= - R300_RS_TEX_PTR(rs_tex_comp) | - R300_RS_SEL_S(R300_RS_SEL_C0) | - R300_RS_SEL_T(R300_RS_SEL_C1) | - R300_RS_SEL_R(R300_RS_SEL_C2) | - R300_RS_SEL_Q(R300_RS_SEL_C3); - tex_count++; - rs_tex_comp+=4; - break; - default: - break; + } else { + /* Skip the FS input register, leave it uninitialized. */ + /* If we try to set it to (0,0,0,1), it will lock up. */ + if (fs_inputs->color[i] != ATTR_UNUSED) { + fp_offset++; } } + } - if (col_count == 0) { - rs->ip[0] |= R300_RS_COL_FMT(R300_RS_COL_FMT_0001); - } - - if (tex_count == 0) { - rs->ip[0] |= - R300_RS_SEL_S(R300_RS_SEL_K0) | - R300_RS_SEL_T(R300_RS_SEL_K0) | - R300_RS_SEL_R(R300_RS_SEL_K0) | - R300_RS_SEL_Q(R300_RS_SEL_K1); + /* Rasterize texture coordinates. */ + for (i = 0; i < ATTR_GENERIC_COUNT; i++) { + if (vs_outputs->generic[i] != ATTR_UNUSED) { + /* Always rasterize if it's written by the VS, + * otherwise it locks up. */ + rX00_rs_tex(rs, tex_count, tex_count, FALSE); + + /* Write it to the FS input register if it's used by the FS. */ + if (fs_inputs->generic[i] != ATTR_UNUSED) { + rX00_rs_tex_write(rs, tex_count, fp_offset); + fp_offset++; + } + tex_count++; + } else { + /* Skip the FS input register, leave it uninitialized. */ + /* If we try to set it to (0,0,0,1), it will lock up. */ + if (fs_inputs->generic[i] != ATTR_UNUSED) { + fp_offset++; + } } + } - /* Rasterize at least one color, or bad things happen. */ - if ((col_count == 0) && (tex_count == 0)) { - col_count++; - } + /* Rasterize fog coordinates. */ + if (vs_outputs->fog != ATTR_UNUSED) { + /* Always rasterize if it's written by the VS, + * otherwise it locks up. */ + rX00_rs_tex(rs, tex_count, tex_count, TRUE); - for (i = 0; i < tex_count; i++) { - rs->inst[i] |= R300_RS_INST_TEX_ID(i) | - R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_offset); + /* Write it to the FS input register if it's used by the FS. */ + if (fs_inputs->fog != ATTR_UNUSED) { + rX00_rs_tex_write(rs, tex_count, fp_offset); fp_offset++; } - - for (i = 0; i < col_count; i++) { - rs->inst[i] |= R300_RS_INST_COL_ID(i) | - R300_RS_INST_COL_CN_WRITE | R300_RS_INST_COL_ADDR(fp_offset); + tex_count++; + } else { + /* Skip the FS input register, leave it uninitialized. */ + /* If we try to set it to (0,0,0,1), it will lock up. */ + if (fs_inputs->fog != ATTR_UNUSED) { fp_offset++; } } - rs->count = (rs_tex_comp) | (col_count << R300_IC_COUNT_SHIFT) | + /* Rasterize at least one color, or bad things happen. */ + if (col_count == 0 && tex_count == 0) { + rX00_rs_col(rs, 0, 0, TRUE); + col_count++; + } + + rs->count = (tex_count*4) | (col_count << R300_IC_COUNT_SHIFT) | R300_HIRES_EN; - rs->inst_count = MAX2(MAX2(col_count - 1, tex_count - 1), 0); + rs->inst_count = MAX3(col_count - 1, tex_count - 1, 0); } /* Update the vertex format. */ static void r300_update_derived_shader_state(struct r300_context* r300) { - struct r300_shader_key* key; - struct r300_vertex_format* vformat; - struct r300_rs_block* rs_block; - struct r300_shader_derived_value* value; - int i; + struct r300_screen* r300screen = r300_screen(r300->context.screen); /* + struct r300_shader_key* key; + struct r300_shader_derived_value* value; key = CALLOC_STRUCT(r300_shader_key); key->vs = r300->vs; key->fs = r300->fs; @@ -485,27 +444,22 @@ static void r300_update_derived_shader_state(struct r300_context* r300) (void*)key, (void*)value); } */ - /* XXX This will be refactored ASAP. */ - vformat = CALLOC_STRUCT(r300_vertex_format); - rs_block = CALLOC_STRUCT(r300_rs_block); - - for (i = 0; i < 16; i++) { - vformat->vs_tab[i] = -1; - vformat->fs_tab[i] = -1; - } - - r300_vs_tab_routes(r300, vformat); - r300_vertex_psc(r300, vformat); - r300_update_fs_tab(r300, vformat); + /* Reset structures */ + memset(r300->rs_block, 0, sizeof(struct r300_rs_block)); + memset(r300->vertex_info, 0, sizeof(struct r300_vertex_info)); + memcpy(r300->vertex_info->vinfo.hwfmt, r300->vs->hwfmt, sizeof(uint)*4); - r300_update_rs_block(r300, rs_block); + r300_update_rs_block(r300, &r300->vs->outputs, &r300->fs->inputs); - FREE(r300->vertex_info); - FREE(r300->rs_block); + if (r300screen->caps->has_tcl) { + r300_vertex_psc(r300); + } else { + r300_draw_emit_all_attribs(r300); + draw_compute_vertex_size(&r300->vertex_info->vinfo); + r300_swtcl_vertex_psc(r300); + } - r300->vertex_info = vformat; - r300->rs_block = rs_block; - r300->dirty_state |= (R300_NEW_VERTEX_FORMAT | R300_NEW_RS_BLOCK); + r300->dirty_state |= R300_NEW_RS_BLOCK; } static void r300_update_ztop(struct r300_context* r300) @@ -544,7 +498,8 @@ static void r300_update_ztop(struct r300_context* r300) void r300_update_derived_state(struct r300_context* r300) { if (r300->dirty_state & - (R300_NEW_FRAGMENT_SHADER | R300_NEW_VERTEX_SHADER)) { + (R300_NEW_FRAGMENT_SHADER | R300_NEW_VERTEX_SHADER | + R300_NEW_VERTEX_FORMAT)) { r300_update_derived_shader_state(r300); } diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h index ec11a41253..e6c1cb54da 100644 --- a/src/gallium/drivers/r300/r300_state_inlines.h +++ b/src/gallium/drivers/r300/r300_state_inlines.h @@ -191,6 +191,42 @@ static INLINE uint32_t r300_translate_alpha_function(int alpha_func) return 0; } +static INLINE uint32_t +r300_translate_polygon_mode_front(unsigned mode) { + switch (mode) + { + case PIPE_POLYGON_MODE_FILL: + return R300_GA_POLY_MODE_FRONT_PTYPE_TRI; + case PIPE_POLYGON_MODE_LINE: + return R300_GA_POLY_MODE_FRONT_PTYPE_LINE; + case PIPE_POLYGON_MODE_POINT: + return R300_GA_POLY_MODE_FRONT_PTYPE_POINT; + + default: + debug_printf("r300: Bad polygon mode %i in %s\n", mode, + __FUNCTION__); + return R300_GA_POLY_MODE_FRONT_PTYPE_TRI; + } +} + +static INLINE uint32_t +r300_translate_polygon_mode_back(unsigned mode) { + switch (mode) + { + case PIPE_POLYGON_MODE_FILL: + return R300_GA_POLY_MODE_BACK_PTYPE_TRI; + case PIPE_POLYGON_MODE_LINE: + return R300_GA_POLY_MODE_BACK_PTYPE_LINE; + case PIPE_POLYGON_MODE_POINT: + return R300_GA_POLY_MODE_BACK_PTYPE_POINT; + + default: + debug_printf("r300: Bad polygon mode %i in %s\n", mode, + __FUNCTION__); + return R300_GA_POLY_MODE_BACK_PTYPE_TRI; + } +} + /* Texture sampler state. */ static INLINE uint32_t r300_translate_wrap(int wrap) @@ -402,58 +438,114 @@ static INLINE uint32_t r300_translate_gb_pipes(int pipe_count) return 0; } +/* Utility function to count the number of components in RGBAZS formats. + * XXX should go to util or p_format.h */ +static INLINE unsigned pf_component_count(enum pipe_format format) { + unsigned count = 0; + + if (pf_layout(format) != PIPE_FORMAT_LAYOUT_RGBAZS) { + return count; + } + + if (pf_size_x(format)) { + count++; + } + if (pf_size_y(format)) { + count++; + } + if (pf_size_z(format)) { + count++; + } + if (pf_size_w(format)) { + count++; + } + + return count; +} + /* Translate pipe_formats into PSC vertex types. */ static INLINE uint16_t r300_translate_vertex_data_type(enum pipe_format format) { - switch (format) { - case PIPE_FORMAT_R32_FLOAT: - return R300_DATA_TYPE_FLOAT_1; - break; - case PIPE_FORMAT_R32G32_FLOAT: - return R300_DATA_TYPE_FLOAT_2; - break; - case PIPE_FORMAT_R32G32B32_FLOAT: - return R300_DATA_TYPE_FLOAT_3; - break; - case PIPE_FORMAT_R32G32B32A32_FLOAT: - return R300_DATA_TYPE_FLOAT_4; - break; - case PIPE_FORMAT_R8G8B8A8_UNORM: - return R300_DATA_TYPE_BYTE | - R300_NORMALIZE; + uint32_t result = 0; + unsigned components = pf_component_count(format); + + if (pf_layout(format) != PIPE_FORMAT_LAYOUT_RGBAZS) { + debug_printf("r300: Bad format %s in %s:%d\n", pf_name(format), + __FUNCTION__, __LINE__); + assert(0); + } + + switch (pf_type(format)) { + /* Half-floats, floats, doubles */ + case PIPE_FORMAT_TYPE_FLOAT: + switch (pf_size_x(format)) { + case 4: + result = R300_DATA_TYPE_FLOAT_1 + (components - 1); + break; + default: + debug_printf("r300: Bad format %s in %s:%d\n", + pf_name(format), __FUNCTION__, __LINE__); + assert(0); + } + break; + /* Normalized unsigned ints */ + case PIPE_FORMAT_TYPE_UNORM: + /* Normalized signed ints */ + case PIPE_FORMAT_TYPE_SNORM: + /* Non-normalized unsigned ints */ + case PIPE_FORMAT_TYPE_USCALED: + /* Non-normalized signed ints */ + case PIPE_FORMAT_TYPE_SSCALED: + switch (pf_size_x(format)) { + case 1: + result = R300_DATA_TYPE_BYTE; + break; + case 2: + if (components > 2) { + result = R300_DATA_TYPE_SHORT_4; + } else { + result = R300_DATA_TYPE_SHORT_2; + } + break; + default: + debug_printf("r300: Bad format %s in %s:%d\n", + pf_name(format), __FUNCTION__, __LINE__); + debug_printf("r300: pf_size_x(format) == %d\n", + pf_size_x(format)); + assert(0); + } break; default: - debug_printf("r300: Implementation error: " - "Bad vertex data format %s!\n", pf_name(format)); + debug_printf("r300: Bad format %s in %s:%d\n", + pf_name(format), __FUNCTION__, __LINE__); assert(0); - break; } - return 0; + + if (pf_type(format) == PIPE_FORMAT_TYPE_SSCALED) { + result |= R300_SIGNED; + } else if (pf_type(format) == PIPE_FORMAT_TYPE_UNORM) { + result |= R300_NORMALIZE; + } else if (pf_type(format) == PIPE_FORMAT_TYPE_SNORM) { + result |= (R300_SIGNED | R300_NORMALIZE); + } + + return result; } static INLINE uint16_t r300_translate_vertex_data_swizzle(enum pipe_format format) { - switch (format) { - case PIPE_FORMAT_R32_FLOAT: - return R300_VAP_SWIZZLE_X001; - break; - case PIPE_FORMAT_R32G32_FLOAT: - return R300_VAP_SWIZZLE_XY01; - break; - case PIPE_FORMAT_R32G32B32_FLOAT: - return R300_VAP_SWIZZLE_XYZ1; - break; - case PIPE_FORMAT_R32G32B32A32_FLOAT: - case PIPE_FORMAT_R8G8B8A8_UNORM: - return R300_VAP_SWIZZLE_XYZW; - break; - default: - debug_printf("r300: Implementation error: " - "Bad vertex data format %s!\n", pf_name(format)); - assert(0); - break; + + if (pf_layout(format) != PIPE_FORMAT_LAYOUT_RGBAZS) { + debug_printf("r300: Bad format %s in %s:%d\n", + pf_name(format), __FUNCTION__, __LINE__); + return 0; } - return 0; + + return ((pf_swizzle_x(format) << R300_SWIZZLE_SELECT_X_SHIFT) | + (pf_swizzle_y(format) << R300_SWIZZLE_SELECT_Y_SHIFT) | + (pf_swizzle_z(format) << R300_SWIZZLE_SELECT_Z_SHIFT) | + (pf_swizzle_w(format) << R300_SWIZZLE_SELECT_W_SHIFT) | + (0xf << R300_WRITE_ENA_SHIFT)); } #endif /* R300_STATE_INLINES_H */ diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c index 4865f16058..46d1cb39b5 100644 --- a/src/gallium/drivers/r300/r300_state_invariant.c +++ b/src/gallium/drivers/r300/r300_state_invariant.c @@ -84,7 +84,7 @@ void r300_emit_invariant_state(struct r300_context* r300) END_CS; /* XXX unsorted stuff from surface_fill */ - BEGIN_CS(64 + (caps->has_tcl ? 5 : 0) + (caps->is_r500 ? 4 : 0)); + BEGIN_CS(56 + (caps->has_tcl ? 5 : 0) + (caps->is_r500 ? 4 : 0)); /* Flush PVS. */ OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x0); @@ -114,7 +114,6 @@ void r300_emit_invariant_state(struct r300_context* r300) /* XXX this big chunk should be refactored into rs_state */ OUT_CS_REG(R300_GA_SOLID_RG, 0x00000000); OUT_CS_REG(R300_GA_SOLID_BA, 0x00000000); - OUT_CS_REG(R300_GA_POLY_MODE, 0x00000000); OUT_CS_REG(R300_GA_ROUND_MODE, 0x00000001); OUT_CS_REG(R300_GA_OFFSET, 0x00000000); OUT_CS_REG(R300_GA_FOG_SCALE, 0x3DBF1412); @@ -125,7 +124,6 @@ void r300_emit_invariant_state(struct r300_context* r300) OUT_CS_REG(R300_SC_HYPERZ, 0x0000001C); OUT_CS_REG(R300_SC_EDGERULE, 0x2DA49525); OUT_CS_REG(R300_RB3D_CCTL, 0x00000000); - OUT_CS_REG(RB3D_COLOR_CHANNEL_MASK, 0x0000000F); OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, 0x00000000); if (caps->is_r500) { OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x00000000); @@ -137,8 +135,6 @@ void r300_emit_invariant_state(struct r300_context* r300) OUT_CS_REG(R300_ZB_DEPTHCLEARVALUE, 0x00000000); OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0x00000000); OUT_CS_REG(R300_ZB_HIZ_PITCH, 0x00000000); - OUT_CS_REG(R300_VAP_VTX_STATE_CNTL, 0x1); - OUT_CS_REG(R300_VAP_VSM_VTX_ASSM, 0x405); OUT_CS_REG(R300_SE_VTE_CNTL, 0x0000043F); /* XXX */ diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 3c8ff24e17..093a21ebe2 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -27,19 +27,25 @@ #include "r300_context.h" #include "r300_texture.h" +#include "r300_screen.h" -static void r300_setup_texture_state(struct r300_texture* tex) +static void r300_setup_texture_state(struct r300_texture* tex, boolean is_r500) { struct r300_texture_state* state = &tex->state; struct pipe_texture *pt = &tex->tex; - state->format0 = R300_TX_WIDTH((pt->width[0] - 1) & 0x7ff) | - R300_TX_HEIGHT((pt->height[0] - 1) & 0x7ff) | - R300_TX_DEPTH(util_logbase2(pt->depth[0]) & 0xf) | - R300_TX_NUM_LEVELS(pt->last_level) | - R300_TX_PITCH_EN; + state->format0 = R300_TX_WIDTH((pt->width0 - 1) & 0x7ff) | + R300_TX_HEIGHT((pt->height0 - 1) & 0x7ff); + + if (tex->is_npot) { + /* rectangles love this */ + state->format0 |= R300_TX_PITCH_EN; + state->format2 = (tex->pitch[0] - 1) & 0x1fff; + } else { + /* power of two textures (3D, mipmaps, and no pitch) */ + state->format0 |= R300_TX_DEPTH(util_logbase2(pt->depth0) & 0xf); + } - /* XXX */ state->format1 = r300_translate_texformat(pt->format); if (pt->target == PIPE_TEXTURE_CUBE) { state->format1 |= R300_TX_FORMAT_CUBIC_MAP; @@ -48,19 +54,40 @@ static void r300_setup_texture_state(struct r300_texture* tex) state->format1 |= R300_TX_FORMAT_3D; } - state->format2 = (r300_texture_get_stride(tex, 0) / pt->block.size) - 1; - - /* Don't worry about accidentally setting this bit on non-r500; - * the kernel should catch it. */ - if (pt->width[0] > 2048) { - state->format2 |= R500_TXWIDTH_BIT11; - } - if (pt->height[0] > 2048) { - state->format2 |= R500_TXHEIGHT_BIT11; + /* large textures on r500 */ + if (is_r500) + { + if (pt->width0 > 2048) { + state->format2 |= R500_TXWIDTH_BIT11; + } + if (pt->height0 > 2048) { + state->format2 |= R500_TXHEIGHT_BIT11; + } } + assert(is_r500 || (pt->width0 <= 2048 && pt->height0 <= 2048)); debug_printf("r300: Set texture state (%dx%d, %d levels)\n", - pt->width[0], pt->height[0], pt->last_level); + pt->width0, pt->height0, pt->last_level); +} + +unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level, + unsigned zslice, unsigned face) +{ + unsigned offset = tex->offset[level]; + + switch (tex->tex.target) { + case PIPE_TEXTURE_3D: + assert(face == 0); + return offset + zslice * tex->layer_size[level]; + + case PIPE_TEXTURE_CUBE: + assert(zslice == 0); + return offset + face * tex->layer_size[level]; + + default: + assert(zslice == 0 && face == 0); + return offset; + } } /** @@ -78,38 +105,45 @@ unsigned r300_texture_get_stride(struct r300_texture* tex, unsigned level) return 0; } - return align(pf_get_stride(&tex->tex.block, tex->tex.width[level]), 32); + return align(pf_get_stride(&tex->tex.block, u_minify(tex->tex.width0, level)), 32); } static void r300_setup_miptree(struct r300_texture* tex) { struct pipe_texture* base = &tex->tex; - int stride, size; + int stride, size, layer_size; int i; for (i = 0; i <= base->last_level; i++) { - if (i > 0) { - base->width[i] = minify(base->width[i-1]); - base->height[i] = minify(base->height[i-1]); - base->depth[i] = minify(base->depth[i-1]); - } - - base->nblocksx[i] = pf_get_nblocksx(&base->block, base->width[i]); - base->nblocksy[i] = pf_get_nblocksy(&base->block, base->height[i]); + base->nblocksx[i] = pf_get_nblocksx(&base->block, u_minify(base->width0, i)); + base->nblocksy[i] = pf_get_nblocksy(&base->block, u_minify(base->height0, i)); stride = r300_texture_get_stride(tex, i); - size = stride * base->nblocksy[i] * base->depth[i]; + layer_size = stride * base->nblocksy[i]; + + if (base->target == PIPE_TEXTURE_CUBE) + size = layer_size * 6; + else + size = layer_size * u_minify(base->depth0, i); tex->offset[i] = align(tex->size, 32); tex->size = tex->offset[i] + size; + tex->layer_size[i] = layer_size; + tex->pitch[i] = stride / base->block.size; debug_printf("r300: Texture miptree: Level %d " "(%dx%dx%d px, pitch %d bytes)\n", - i, base->width[i], base->height[i], base->depth[i], - stride); + i, u_minify(base->width0, i), u_minify(base->height0, i), + u_minify(base->depth0, i), stride); } } +static void r300_setup_flags(struct r300_texture* tex) +{ + tex->is_npot = !util_is_power_of_two(tex->tex.width0) || + !util_is_power_of_two(tex->tex.height0); +} + /* Create a new texture. */ static struct pipe_texture* r300_texture_create(struct pipe_screen* screen, @@ -125,9 +159,9 @@ static struct pipe_texture* pipe_reference_init(&tex->tex.reference, 1); tex->tex.screen = screen; + r300_setup_flags(tex); r300_setup_miptree(tex); - - r300_setup_texture_state(tex); + r300_setup_texture_state(tex, r300_screen(screen)->caps->is_r500); tex->buffer = screen->buffer_create(screen, 1024, PIPE_BUFFER_USAGE_PIXEL, @@ -161,17 +195,20 @@ static struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen, struct pipe_surface* surface = CALLOC_STRUCT(pipe_surface); unsigned offset; - /* XXX this is certainly dependent on tex target */ - offset = tex->offset[level]; + offset = r300_texture_get_offset(tex, level, zslice, face); if (surface) { pipe_reference_init(&surface->reference, 1); pipe_texture_reference(&surface->texture, texture); surface->format = texture->format; - surface->width = texture->width[level]; - surface->height = texture->height[level]; + surface->width = u_minify(texture->width0, level); + surface->height = u_minify(texture->height0, level); surface->offset = offset; surface->usage = flags; + surface->zslice = zslice; + surface->texture = texture; + surface->face = face; + surface->level = level; } return surface; @@ -191,8 +228,10 @@ static struct pipe_texture* { struct r300_texture* tex; + /* Support only 2D textures without mipmaps */ if (base->target != PIPE_TEXTURE_2D || - base->depth[0] != 1) { + base->depth0 != 1 || + base->last_level != 0) { return NULL; } @@ -206,8 +245,10 @@ static struct pipe_texture* tex->tex.screen = screen; tex->stride_override = *stride; + tex->pitch[0] = *stride / base->block.size; - r300_setup_texture_state(tex); + r300_setup_flags(tex); + r300_setup_texture_state(tex, r300_screen(screen)->caps->is_r500); pipe_buffer_reference(&tex->buffer, buffer); @@ -239,9 +280,9 @@ r300_video_surface_create(struct pipe_screen *screen, template.target = PIPE_TEXTURE_2D; template.format = PIPE_FORMAT_X8R8G8B8_UNORM; template.last_level = 0; - template.width[0] = util_next_power_of_two(width); - template.height[0] = util_next_power_of_two(height); - template.depth[0] = 1; + template.width0 = util_next_power_of_two(width); + template.height0 = util_next_power_of_two(height); + template.depth0 = 1; pf_get_block(template.format, &template.block); template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_RENDER_TARGET; diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h index 55d1a0ac5c..55ceb1a513 100644 --- a/src/gallium/drivers/r300/r300_texture.h +++ b/src/gallium/drivers/r300/r300_texture.h @@ -33,6 +33,9 @@ void r300_init_screen_texture_functions(struct pipe_screen* screen); unsigned r300_texture_get_stride(struct r300_texture* tex, unsigned level); +unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level, + unsigned zslice, unsigned face); + /* Note the signature of R300_EASY_TX_FORMAT(A, R, G, B, FORMAT)... */ static INLINE uint32_t r300_translate_texformat(enum pipe_format format) { @@ -40,6 +43,8 @@ static INLINE uint32_t r300_translate_texformat(enum pipe_format format) /* X8 */ case PIPE_FORMAT_I8_UNORM: return R300_EASY_TX_FORMAT(X, X, X, X, X8); + case PIPE_FORMAT_L8_UNORM: + return R300_EASY_TX_FORMAT(X, X, X, ONE, X8); /* X16 */ case PIPE_FORMAT_R16_UNORM: return R300_EASY_TX_FORMAT(X, X, X, X, X16); @@ -48,6 +53,9 @@ static INLINE uint32_t r300_translate_texformat(enum pipe_format format) R300_TX_FORMAT_SIGNED; case PIPE_FORMAT_Z16_UNORM: return R300_EASY_TX_FORMAT(X, X, X, X, X16); + /* Y8X8 */ + case PIPE_FORMAT_A8L8_UNORM: + return R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8); /* W8Z8Y8X8 */ case PIPE_FORMAT_A8R8G8B8_UNORM: return R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8); @@ -82,6 +90,7 @@ static INLINE uint32_t r300_translate_texformat(enum pipe_format format) case PIPE_FORMAT_Z24S8_UNORM: case PIPE_FORMAT_Z24X8_UNORM: return R300_EASY_TX_FORMAT(X, X, X, X, W24_FP); + default: debug_printf("r300: Implementation error: " "Got unsupported texture format %s in %s\n", diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index 589f1984ee..9fb2de2403 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -190,10 +190,10 @@ static void transform_dstreg( struct rc_dst_register * dst, struct tgsi_full_dst_register * src) { - dst->File = translate_register_file(src->DstRegister.File); - dst->Index = translate_register_index(ttr, src->DstRegister.File, src->DstRegister.Index); - dst->WriteMask = src->DstRegister.WriteMask; - dst->RelAddr = src->DstRegister.Indirect; + dst->File = translate_register_file(src->Register.File); + dst->Index = translate_register_index(ttr, src->Register.File, src->Register.Index); + dst->WriteMask = src->Register.WriteMask; + dst->RelAddr = src->Register.Indirect; } static void transform_srcreg( @@ -201,18 +201,18 @@ static void transform_srcreg( struct rc_src_register * dst, struct tgsi_full_src_register * src) { - dst->File = translate_register_file(src->SrcRegister.File); - dst->Index = translate_register_index(ttr, src->SrcRegister.File, src->SrcRegister.Index); - dst->RelAddr = src->SrcRegister.Indirect; + dst->File = translate_register_file(src->Register.File); + dst->Index = translate_register_index(ttr, src->Register.File, src->Register.Index); + dst->RelAddr = src->Register.Indirect; dst->Swizzle = tgsi_util_get_full_src_register_swizzle(src, 0); dst->Swizzle |= tgsi_util_get_full_src_register_swizzle(src, 1) << 3; dst->Swizzle |= tgsi_util_get_full_src_register_swizzle(src, 2) << 6; dst->Swizzle |= tgsi_util_get_full_src_register_swizzle(src, 3) << 9; - dst->Abs = src->SrcRegisterExtMod.Absolute; - dst->Negate = src->SrcRegister.Negate ? RC_MASK_XYZW : 0; + dst->Abs = src->Register.Absolute; + dst->Negate = src->Register.Negate ? RC_MASK_XYZW : 0; } -static void transform_texture(struct rc_instruction * dst, struct tgsi_instruction_ext_texture src) +static void transform_texture(struct rc_instruction * dst, struct tgsi_instruction_texture src) { switch(src.Texture) { case TGSI_TEXTURE_1D: @@ -258,17 +258,18 @@ static void transform_instruction(struct tgsi_to_rc * ttr, struct tgsi_full_inst dst->U.I.SaturateMode = translate_saturate(src->Instruction.Saturate); if (src->Instruction.NumDstRegs) - transform_dstreg(ttr, &dst->U.I.DstReg, &src->FullDstRegisters[0]); + transform_dstreg(ttr, &dst->U.I.DstReg, &src->Dst[0]); for(i = 0; i < src->Instruction.NumSrcRegs; ++i) { - if (src->FullSrcRegisters[i].SrcRegister.File == TGSI_FILE_SAMPLER) - dst->U.I.TexSrcUnit = src->FullSrcRegisters[i].SrcRegister.Index; + if (src->Src[i].Register.File == TGSI_FILE_SAMPLER) + dst->U.I.TexSrcUnit = src->Src[i].Register.Index; else - transform_srcreg(ttr, &dst->U.I.SrcReg[i], &src->FullSrcRegisters[i]); + transform_srcreg(ttr, &dst->U.I.SrcReg[i], &src->Src[i]); } /* Texturing. */ - transform_texture(dst, src->InstructionExtTexture); + if (src->Instruction.Texture) + transform_texture(dst, src->Texture); } static void handle_immediate(struct tgsi_to_rc * ttr, struct tgsi_full_immediate * imm) diff --git a/src/gallium/drivers/r300/r300_vbo.c b/src/gallium/drivers/r300/r300_vbo.c new file mode 100644 index 0000000000..6ebaf715dc --- /dev/null +++ b/src/gallium/drivers/r300/r300_vbo.c @@ -0,0 +1,86 @@ +/* + * Copyright 2009 Maciej Cencora <m.cencora@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* r300_vbo: Various helpers for emitting vertex buffers. Needs cleanup, + * refactoring, etc. */ + +#include "r300_vbo.h" + +#include "pipe/p_format.h" + +#include "r300_cs.h" +#include "r300_context.h" +#include "r300_state_inlines.h" +#include "r300_reg.h" +#include "r300_winsys.h" + +static INLINE int get_buffer_offset(struct r300_context *r300, + unsigned int buf_nr, + unsigned int elem_offset) +{ + return r300->vertex_buffer[buf_nr].buffer_offset + elem_offset; +} +#if 0 +/* XXX not called at all */ +static void setup_vertex_buffers(struct r300_context *r300) +{ + struct pipe_vertex_element *vert_elem; + int i; + + for (i = 0; i < r300->aos_count; i++) + { + vert_elem = &r300->vertex_element[i]; + /* XXX use translate module to convert the data */ + if (!format_is_supported(vert_elem->src_format, + vert_elem->nr_components)) { + assert(0); + /* + struct pipe_buffer *buf; + const unsigned int max_index = r300->vertex_buffers[vert_elem->vertex_buffer_index].max_index; + buf = pipe_buffer_create(r300->context.screen, 4, usage, vert_elem->nr_components * max_index * sizeof(float)); + */ + } + + if (get_buffer_offset(r300, + vert_elem->vertex_buffer_index, + vert_elem->src_offset) % 4) { + /* XXX need to align buffer */ + assert(0); + } + } +} +#endif +/* XXX these shouldn't be asserts since we can work around bad indexbufs */ +void setup_index_buffer(struct r300_context *r300, + struct pipe_buffer* indexBuffer, + unsigned indexSize) +{ + if (!r300->winsys->add_buffer(r300->winsys, indexBuffer, + RADEON_GEM_DOMAIN_GTT, 0)) { + assert(0); + } + + if (!r300->winsys->validate(r300->winsys)) { + assert(0); + } +} diff --git a/src/gallium/drivers/r300/r300_vbo.h b/src/gallium/drivers/r300/r300_vbo.h new file mode 100644 index 0000000000..7afa75899c --- /dev/null +++ b/src/gallium/drivers/r300/r300_vbo.h @@ -0,0 +1,36 @@ +/* + * Copyright 2009 Maciej Cencora <m.cencora@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef R300_VBO_H +#define R300_VBO_H + +struct r300_context; +struct pipe_buffer; + +void setup_vertex_attributes(struct r300_context *r300); + +void setup_index_buffer(struct r300_context *r300, + struct pipe_buffer* indexBuffer, + unsigned indexSize); + +#endif diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index eca85879a7..31248346bc 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -1,5 +1,6 @@ /* * Copyright 2009 Corbin Simpson <MostAwesomeDude@gmail.com> + * Copyright 2009 Marek Olšák <maraeo@gmail.com> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -23,89 +24,226 @@ #include "r300_vs.h" #include "r300_context.h" +#include "r300_screen.h" #include "r300_tgsi_to_rc.h" +#include "r300_reg.h" #include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_parse.h" #include "radeon_compiler.h" - -static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c) +/* Convert info about VS output semantics into r300_shader_semantics. */ +static void r300_shader_read_vs_outputs( + struct tgsi_shader_info* info, + struct r300_shader_semantics* vs_outputs) { - struct r300_vertex_shader * vs = c->UserData; - struct tgsi_shader_info* info = &vs->info; - struct tgsi_parse_context parser; - struct tgsi_full_declaration * decl; - boolean pointsize = false; - int out_colors = 0; - int colors = 0; - int out_generic = 0; - int generic = 0; int i; + unsigned index; - /* Fill in the input mapping */ - for (i = 0; i < info->num_inputs; i++) - c->code->inputs[i] = i; + r300_shader_semantics_reset(vs_outputs); - /* Fill in the output mapping */ for (i = 0; i < info->num_outputs; i++) { + index = info->output_semantic_index[i]; + switch (info->output_semantic_name[i]) { + case TGSI_SEMANTIC_POSITION: + assert(index == 0); + vs_outputs->pos = i; + break; + case TGSI_SEMANTIC_PSIZE: - pointsize = true; + assert(index == 0); + vs_outputs->psize = i; break; + case TGSI_SEMANTIC_COLOR: - out_colors++; + assert(index <= ATTR_COLOR_COUNT); + vs_outputs->color[index] = i; break; - case TGSI_SEMANTIC_FOG: + + case TGSI_SEMANTIC_BCOLOR: + assert(index <= ATTR_COLOR_COUNT); + vs_outputs->bcolor[index] = i; + break; + case TGSI_SEMANTIC_GENERIC: - out_generic++; + assert(index <= ATTR_GENERIC_COUNT); + vs_outputs->generic[index] = i; break; + + case TGSI_SEMANTIC_FOG: + assert(index == 0); + vs_outputs->fog = i; + break; + + default: + assert(0); } } +} - tgsi_parse_init(&parser, vs->state.tokens); +static void r300_shader_vap_output_fmt( + struct r300_shader_semantics* vs_outputs, + uint* hwfmt) +{ + int i, gen_count; - while (!tgsi_parse_end_of_tokens(&parser)) { - tgsi_parse_token(&parser); + /* Do the actual vertex_info setup. + * + * vertex_info has four uints of hardware-specific data in it. + * vinfo.hwfmt[0] is R300_VAP_VTX_STATE_CNTL + * vinfo.hwfmt[1] is R300_VAP_VSM_VTX_ASSM + * vinfo.hwfmt[2] is R300_VAP_OUTPUT_VTX_FMT_0 + * vinfo.hwfmt[3] is R300_VAP_OUTPUT_VTX_FMT_1 */ - if (parser.FullToken.Token.Type != TGSI_TOKEN_TYPE_DECLARATION) - continue; + hwfmt[0] = 0x5555; /* XXX this is classic Mesa bonghits */ - decl = &parser.FullToken.FullDeclaration; + /* Position. */ + if (vs_outputs->pos != ATTR_UNUSED) { + hwfmt[1] |= R300_INPUT_CNTL_POS; + hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT; + } else { + assert(0); + } - if (decl->Declaration.File != TGSI_FILE_OUTPUT) - continue; + /* Point size. */ + if (vs_outputs->psize != ATTR_UNUSED) { + hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT; + } - switch (decl->Semantic.SemanticName) { - case TGSI_SEMANTIC_POSITION: - c->code->outputs[decl->DeclarationRange.First] = 0; - break; - case TGSI_SEMANTIC_PSIZE: - c->code->outputs[decl->DeclarationRange.First] = 1; - break; - case TGSI_SEMANTIC_COLOR: - c->code->outputs[decl->DeclarationRange.First] = 1 + - (pointsize ? 1 : 0) + - colors++; - break; - case TGSI_SEMANTIC_FOG: - case TGSI_SEMANTIC_GENERIC: - c->code->outputs[decl->DeclarationRange.First] = 1 + - (pointsize ? 1 : 0) + - out_colors + - generic++; - break; - default: - debug_printf("r300: vs: Bad semantic declaration %d\n", - decl->Semantic.SemanticName); - break; + /* Colors. */ + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + if (vs_outputs->color[i] != ATTR_UNUSED) { + hwfmt[1] |= R300_INPUT_CNTL_COLOR; + hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i; + } + } + + /* XXX Back-face colors. */ + + /* Texture coordinates. */ + gen_count = 0; + for (i = 0; i < ATTR_GENERIC_COUNT; i++) { + if (vs_outputs->generic[i] != ATTR_UNUSED) { + hwfmt[1] |= (R300_INPUT_CNTL_TC0 << gen_count); + hwfmt[3] |= (4 << (3 * gen_count)); + gen_count++; + } + } + + /* Fog coordinates. */ + if (vs_outputs->fog != ATTR_UNUSED) { + hwfmt[1] |= (R300_INPUT_CNTL_TC0 << gen_count); + hwfmt[3] |= (4 << (3 * gen_count)); + gen_count++; + } + + /* XXX magic */ + assert(gen_count <= 8); +} + +/* Set VS output stream locations for SWTCL. */ +static void r300_stream_locations_swtcl( + struct r300_shader_semantics* vs_outputs, + int* output_stream_loc) +{ + int i, tabi = 0, gen_count; + + /* XXX Check whether the numbers (0, 1, 2+i, etc.) are correct. + * These should go to VAP_PROG_STREAM_CNTL/DST_VEC_LOC. */ + + /* Position. */ + output_stream_loc[tabi++] = 0; + + /* Point size. */ + if (vs_outputs->psize != ATTR_UNUSED) { + output_stream_loc[tabi++] = 1; + } + + /* Colors. */ + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + if (vs_outputs->color[i] != ATTR_UNUSED) { + output_stream_loc[tabi++] = 2 + i; + } + } + + /* Back-face colors. */ + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + if (vs_outputs->bcolor[i] != ATTR_UNUSED) { + output_stream_loc[tabi++] = 4 + i; + } + } + + /* Texture coordinates. */ + gen_count = 0; + for (i = 0; i < ATTR_GENERIC_COUNT; i++) { + if (vs_outputs->bcolor[i] != ATTR_UNUSED) { + assert(tabi < 16); + output_stream_loc[tabi++] = 6 + gen_count; + gen_count++; } } - tgsi_parse_free(&parser); + /* Fog coordinates. */ + if (vs_outputs->fog != ATTR_UNUSED) { + assert(tabi < 16); + output_stream_loc[tabi++] = 6 + gen_count; + gen_count++; + } + + /* XXX magic */ + assert(gen_count <= 8); + + for (; tabi < 16;) { + output_stream_loc[tabi++] = -1; + } } +static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c) +{ + struct r300_vertex_shader * vs = c->UserData; + struct r300_shader_semantics* outputs = &vs->outputs; + struct tgsi_shader_info* info = &vs->info; + int i, reg = 0; + + /* Fill in the input mapping */ + for (i = 0; i < info->num_inputs; i++) + c->code->inputs[i] = i; + + /* Position. */ + if (outputs->pos != ATTR_UNUSED) { + c->code->outputs[outputs->pos] = reg++; + } else { + assert(0); + } + + /* Point size. */ + if (outputs->psize != ATTR_UNUSED) { + c->code->outputs[outputs->psize] = reg++; + } + + /* Colors. */ + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + if (outputs->color[i] != ATTR_UNUSED) { + c->code->outputs[outputs->color[i]] = reg++; + } + } + + /* XXX Back-face colors. */ + + /* Texture coordinates. */ + for (i = 0; i < ATTR_GENERIC_COUNT; i++) { + if (outputs->generic[i] != ATTR_UNUSED) { + c->code->outputs[outputs->generic[i]] = reg++; + } + } + + /* Fog coordinates. */ + if (outputs->fog != ATTR_UNUSED) { + c->code->outputs[outputs->fog] = reg++; + } +} void r300_translate_vertex_shader(struct r300_context* r300, struct r300_vertex_shader* vs) @@ -113,6 +251,14 @@ void r300_translate_vertex_shader(struct r300_context* r300, struct r300_vertex_program_compiler compiler; struct tgsi_to_rc ttr; + /* Initialize. */ + r300_shader_read_vs_outputs(&vs->info, &vs->outputs); + r300_shader_vap_output_fmt(&vs->outputs, vs->hwfmt); + + if (!r300_screen(r300->context.screen)->caps->has_tcl) { + r300_stream_locations_swtcl(&vs->outputs, vs->output_stream_loc_swtcl); + } + /* Setup the compiler */ rc_init(&compiler.Base); @@ -137,7 +283,7 @@ void r300_translate_vertex_shader(struct r300_context* r300, /* Invoke the compiler */ r3xx_compile_vertex_program(&compiler); if (compiler.Base.Error) { - /* Todo: Fail gracefully */ + /* XXX Fail gracefully */ fprintf(stderr, "r300 VP: Compiler error\n"); abort(); } diff --git a/src/gallium/drivers/r300/r300_vs.h b/src/gallium/drivers/r300/r300_vs.h index 2a4ce315e3..283dd5a9e8 100644 --- a/src/gallium/drivers/r300/r300_vs.h +++ b/src/gallium/drivers/r300/r300_vs.h @@ -1,5 +1,6 @@ /* * Copyright 2009 Corbin Simpson <MostAwesomeDude@gmail.com> + * Copyright 2009 Marek Olšák <maraeo@gmail.com> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -25,18 +26,20 @@ #include "pipe/p_state.h" #include "tgsi/tgsi_scan.h" - #include "radeon_code.h" +#include "r300_shader_semantics.h" + struct r300_context; struct r300_vertex_shader { /* Parent class */ struct pipe_shader_state state; - struct tgsi_shader_info info; - /* Fallback shader, because Draw has issues */ - struct draw_vertex_shader* draw; + struct tgsi_shader_info info; + struct r300_shader_semantics outputs; + int output_stream_loc_swtcl[16]; + uint hwfmt[4]; /* Has this shader been translated yet? */ boolean translated; @@ -45,9 +48,6 @@ struct r300_vertex_shader { struct r300_vertex_program_code code; }; - -extern struct r300_vertex_program_code r300_passthrough_vertex_shader; - void r300_translate_vertex_shader(struct r300_context* r300, struct r300_vertex_shader* vs); diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 94d000a5ac..f8bf3e9974 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -90,14 +90,15 @@ softpipe_destroy( struct pipe_context *pipe ) if (softpipe->draw) draw_destroy( softpipe->draw ); - softpipe->quad.shade->destroy( softpipe->quad.shade ); - softpipe->quad.depth_test->destroy( softpipe->quad.depth_test ); - softpipe->quad.blend->destroy( softpipe->quad.blend ); + softpipe->quad.shade->destroy( softpipe->quad.shade ); + softpipe->quad.depth_test->destroy( softpipe->quad.depth_test ); + softpipe->quad.blend->destroy( softpipe->quad.blend ); for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { sp_destroy_tile_cache(softpipe->cbuf_cache[i]); pipe_surface_reference(&softpipe->framebuffer.cbufs[i], NULL); } + sp_destroy_tile_cache(softpipe->zsbuf_cache); pipe_surface_reference(&softpipe->framebuffer.zsbuf, NULL); @@ -106,6 +107,11 @@ softpipe_destroy( struct pipe_context *pipe ) pipe_texture_reference(&softpipe->texture[i], NULL); } + for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { + sp_destroy_tex_tile_cache(softpipe->vertex_tex_cache[i]); + pipe_texture_reference(&softpipe->vertex_textures[i], NULL); + } + for (i = 0; i < Elements(softpipe->constants); i++) { if (softpipe->constants[i].buffer) { pipe_buffer_reference(&softpipe->constants[i].buffer, NULL); @@ -120,7 +126,7 @@ softpipe_destroy( struct pipe_context *pipe ) * if (the texture is being used as a framebuffer surface) * return PIPE_REFERENCED_FOR_WRITE * else if (the texture is a bound texture source) - * return PIPE_REFERENCED_FOR_READ XXX not done yet + * return PIPE_REFERENCED_FOR_READ * else * return PIPE_UNREFERENCED */ @@ -132,6 +138,7 @@ softpipe_is_texture_referenced( struct pipe_context *pipe, struct softpipe_context *softpipe = softpipe_context( pipe ); unsigned i; + /* check if any of the bound drawing surfaces are this texture */ if (softpipe->dirty_render_cache) { for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++) { if (softpipe->framebuffer.cbufs[i] && @@ -145,7 +152,17 @@ softpipe_is_texture_referenced( struct pipe_context *pipe, } } - /* FIXME: we also need to do the same for the texture cache */ + /* check if any of the tex_cache textures are this texture */ + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + if (softpipe->tex_cache[i] && + softpipe->tex_cache[i]->texture == texture) + return PIPE_REFERENCED_FOR_READ; + } + for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { + if (softpipe->vertex_tex_cache[i] && + softpipe->vertex_tex_cache[i]->texture == texture) + return PIPE_REFERENCED_FOR_READ; + } return PIPE_UNREFERENCED; } @@ -185,7 +202,8 @@ softpipe_create( struct pipe_screen *screen ) softpipe->pipe.delete_blend_state = softpipe_delete_blend_state; softpipe->pipe.create_sampler_state = softpipe_create_sampler_state; - softpipe->pipe.bind_sampler_states = softpipe_bind_sampler_states; + softpipe->pipe.bind_fragment_sampler_states = softpipe_bind_sampler_states; + softpipe->pipe.bind_vertex_sampler_states = softpipe_bind_vertex_sampler_states; softpipe->pipe.delete_sampler_state = softpipe_delete_sampler_state; softpipe->pipe.create_depth_stencil_alpha_state = softpipe_create_depth_stencil_state; @@ -210,7 +228,8 @@ softpipe_create( struct pipe_screen *screen ) softpipe->pipe.set_framebuffer_state = softpipe_set_framebuffer_state; softpipe->pipe.set_polygon_stipple = softpipe_set_polygon_stipple; softpipe->pipe.set_scissor_state = softpipe_set_scissor_state; - softpipe->pipe.set_sampler_textures = softpipe_set_sampler_textures; + softpipe->pipe.set_fragment_sampler_textures = softpipe_set_sampler_textures; + softpipe->pipe.set_vertex_sampler_textures = softpipe_set_vertex_sampler_textures; softpipe->pipe.set_viewport_state = softpipe_set_viewport_state; softpipe->pipe.set_vertex_buffers = softpipe_set_vertex_buffers; @@ -240,12 +259,14 @@ softpipe_create( struct pipe_screen *screen ) for (i = 0; i < PIPE_MAX_SAMPLERS; i++) softpipe->tex_cache[i] = sp_create_tex_tile_cache( screen ); - + for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { + softpipe->vertex_tex_cache[i] = sp_create_tex_tile_cache(screen); + } /* setup quad rendering stages */ - softpipe->quad.shade = sp_quad_shade_stage(softpipe); - softpipe->quad.depth_test = sp_quad_depth_test_stage(softpipe); - softpipe->quad.blend = sp_quad_blend_stage(softpipe); + softpipe->quad.shade = sp_quad_shade_stage(softpipe); + softpipe->quad.depth_test = sp_quad_depth_test_stage(softpipe); + softpipe->quad.blend = sp_quad_blend_stage(softpipe); /* @@ -256,7 +277,7 @@ softpipe_create( struct pipe_screen *screen ) goto fail; draw_texture_samplers(softpipe->draw, - PIPE_MAX_SAMPLERS, + PIPE_MAX_VERTEX_SAMPLERS, (struct tgsi_sampler **) softpipe->tgsi.vert_samplers_list); @@ -275,7 +296,6 @@ softpipe_create( struct pipe_screen *screen ) draw_set_render(softpipe->draw, softpipe->vbuf_backend); - /* plug in AA line/point stages */ draw_install_aaline_stage(softpipe->draw, &softpipe->pipe); draw_install_aapoint_stage(softpipe->draw, &softpipe->pipe); @@ -291,4 +311,3 @@ softpipe_create( struct pipe_screen *screen ) softpipe_destroy(&softpipe->pipe); return NULL; } - diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index a735573d6f..8ce20c5744 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -53,6 +53,7 @@ struct softpipe_context { /** Constant state objects */ struct pipe_blend_state *blend; struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS]; + struct pipe_sampler_state *vertex_samplers[PIPE_MAX_VERTEX_SAMPLERS]; struct pipe_depth_stencil_alpha_state *depth_stencil; struct pipe_rasterizer_state *rasterizer; struct sp_fragment_shader *fs; @@ -66,12 +67,15 @@ struct softpipe_context { struct pipe_poly_stipple poly_stipple; struct pipe_scissor_state scissor; struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; + struct pipe_texture *vertex_textures[PIPE_MAX_VERTEX_SAMPLERS]; struct pipe_viewport_state viewport; struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; unsigned num_samplers; unsigned num_textures; + unsigned num_vertex_samplers; + unsigned num_vertex_textures; unsigned num_vertex_elements; unsigned num_vertex_buffers; @@ -121,7 +125,7 @@ struct softpipe_context { /** TGSI exec things */ struct { - struct sp_sampler_varient *vert_samplers_list[PIPE_MAX_SAMPLERS]; + struct sp_sampler_varient *vert_samplers_list[PIPE_MAX_VERTEX_SAMPLERS]; struct sp_sampler_varient *frag_samplers_list[PIPE_MAX_SAMPLERS]; } tgsi; @@ -139,6 +143,7 @@ struct softpipe_context { unsigned tex_timestamp; struct softpipe_tex_tile_cache *tex_cache[PIPE_MAX_SAMPLERS]; + struct softpipe_tex_tile_cache *vertex_tex_cache[PIPE_MAX_VERTEX_SAMPLERS]; unsigned use_sse : 1; unsigned dump_fs : 1; diff --git a/src/gallium/drivers/softpipe/sp_flush.c b/src/gallium/drivers/softpipe/sp_flush.c index e38b767cf2..75dac810a1 100644 --- a/src/gallium/drivers/softpipe/sp_flush.c +++ b/src/gallium/drivers/softpipe/sp_flush.c @@ -55,6 +55,9 @@ softpipe_flush( struct pipe_context *pipe, for (i = 0; i < softpipe->num_textures; i++) { sp_flush_tex_tile_cache(softpipe->tex_cache[i]); } + for (i = 0; i < softpipe->num_vertex_textures; i++) { + sp_flush_tex_tile_cache(softpipe->vertex_tex_cache[i]); + } } if (flags & PIPE_FLUSH_SWAPBUFFERS) { diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c index 4076114d39..a8999ed347 100644 --- a/src/gallium/drivers/softpipe/sp_fs_exec.c +++ b/src/gallium/drivers/softpipe/sp_fs_exec.c @@ -126,7 +126,13 @@ exec_run( const struct sp_fragment_shader *base, setup_pos_vector(quad->posCoef, (float)quad->input.x0, (float)quad->input.y0, &machine->QuadPos); - + + if (quad->input.facing) { + machine->Face = -1.0f; + } else { + machine->Face = 1.0f; + } + quad->inout.mask &= tgsi_exec_machine_run( machine ); if (quad->inout.mask == 0) return FALSE; diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c index 0ad0b98654..fe6b6cec35 100644 --- a/src/gallium/drivers/softpipe/sp_quad_blend.c +++ b/src/gallium/drivers/softpipe/sp_quad_blend.c @@ -478,7 +478,15 @@ blend_quad(struct quad_stage *qs, VEC4_MUL(dest[2], dest[2], dest[2]); /* B */ break; case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - assert(0); /* illegal */ + { + const float *alpha = quadColor[3]; + float diff[4], temp[4]; + VEC4_SUB(diff, one, dest[3]); + VEC4_MIN(temp, alpha, diff); + VEC4_MUL(dest[0], quadColor[0], temp); /* R */ + VEC4_MUL(dest[1], quadColor[1], temp); /* G */ + VEC4_MUL(dest[2], quadColor[2], temp); /* B */ + } break; case PIPE_BLENDFACTOR_CONST_COLOR: { @@ -600,7 +608,7 @@ blend_quad(struct quad_stage *qs, VEC4_MUL(dest[3], dest[3], dest[3]); /* A */ break; case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - assert(0); /* illegal */ + /* dest = dest * 1 NO-OP, leave dest as-is */ break; case PIPE_BLENDFACTOR_CONST_COLOR: /* fall-through */ diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index 81fb7aa20c..6bf3df8e6a 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -58,7 +58,9 @@ softpipe_get_param(struct pipe_screen *screen, int param) case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: return PIPE_MAX_SAMPLERS; case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: - return PIPE_MAX_SAMPLERS; + return PIPE_MAX_VERTEX_SAMPLERS; + case PIPE_CAP_MAX_COMBINED_SAMPLERS: + return PIPE_MAX_SAMPLERS + PIPE_MAX_VERTEX_SAMPLERS; case PIPE_CAP_NPOT_TEXTURES: return 1; case PIPE_CAP_TWO_SIDED_STENCIL: @@ -143,6 +145,10 @@ softpipe_is_format_supported( struct pipe_screen *screen, case PIPE_FORMAT_DXT3_RGBA: case PIPE_FORMAT_DXT5_RGBA: case PIPE_FORMAT_Z32_FLOAT: + case PIPE_FORMAT_R8G8_SNORM: + case PIPE_FORMAT_B6UG5SR5S_NORM: + case PIPE_FORMAT_X8UB8UG8SR8S_NORM: + case PIPE_FORMAT_A8B8G8R8_SNORM: return FALSE; default: return TRUE; diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index 00fb52a64f..615581b95f 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -87,6 +87,8 @@ struct setup_context { float oneoverarea; int facing; + float pixel_offset; + struct quad_header quad[MAX_QUADS]; struct quad_header *quad_ptrs[MAX_QUADS]; unsigned count; @@ -379,6 +381,16 @@ static boolean setup_sort_vertices( struct setup_context *setup, ((det > 0.0) ^ (setup->softpipe->rasterizer->front_winding == PIPE_WINDING_CW)); + /* Prepare pixel offset for rasterisation: + * - pixel center (0.5, 0.5) for GL, or + * - assume (0.0, 0.0) for other APIs. + */ + if (setup->softpipe->rasterizer->gl_rasterization_rules) { + setup->pixel_offset = 0.5f; + } else { + setup->pixel_offset = 0.0f; + } + return TRUE; } @@ -427,7 +439,7 @@ static void tri_linear_coeff( struct setup_context *setup, /* calculate a0 as the value which would be sampled for the * fragment at (0,0), taking into account that we want to sample at - * pixel centers, in other words (0.5, 0.5). + * pixel centers, in other words (pixel_offset, pixel_offset). * * this is neat but unfortunately not a good way to do things for * triangles with very large values of dadx or dady as it will @@ -438,8 +450,8 @@ static void tri_linear_coeff( struct setup_context *setup, * instead - i'll switch to this later. */ coef->a0[i] = (setup->vmin[vertSlot][i] - - (dadx * (setup->vmin[0][0] - 0.5f) + - dady * (setup->vmin[0][1] - 0.5f))); + (dadx * (setup->vmin[0][0] - setup->pixel_offset) + + dady * (setup->vmin[0][1] - setup->pixel_offset))); /* debug_printf("attr[%d].%c: %f dx:%f dy:%f\n", @@ -487,8 +499,8 @@ static void tri_persp_coeff( struct setup_context *setup, coef->dadx[i] = dadx; coef->dady[i] = dady; coef->a0[i] = (mina - - (dadx * (setup->vmin[0][0] - 0.5f) + - dady * (setup->vmin[0][1] - 0.5f))); + (dadx * (setup->vmin[0][0] - setup->pixel_offset) + + dady * (setup->vmin[0][1] - setup->pixel_offset))); } @@ -575,12 +587,12 @@ static void setup_tri_coefficients( struct setup_context *setup ) static void setup_tri_edges( struct setup_context *setup ) { - float vmin_x = setup->vmin[0][0] + 0.5f; - float vmid_x = setup->vmid[0][0] + 0.5f; + float vmin_x = setup->vmin[0][0] + setup->pixel_offset; + float vmid_x = setup->vmid[0][0] + setup->pixel_offset; - float vmin_y = setup->vmin[0][1] - 0.5f; - float vmid_y = setup->vmid[0][1] - 0.5f; - float vmax_y = setup->vmax[0][1] - 0.5f; + float vmin_y = setup->vmin[0][1] - setup->pixel_offset; + float vmid_y = setup->vmid[0][1] - setup->pixel_offset; + float vmax_y = setup->vmax[0][1] - setup->pixel_offset; setup->emaj.sy = ceilf(vmin_y); setup->emaj.lines = (int) ceilf(vmax_y - setup->emaj.sy); @@ -780,8 +792,8 @@ line_linear_coeff(const struct setup_context *setup, coef->dadx[i] = dadx; coef->dady[i] = dady; coef->a0[i] = (setup->vmin[vertSlot][i] - - (dadx * (setup->vmin[0][0] - 0.5f) + - dady * (setup->vmin[0][1] - 0.5f))); + (dadx * (setup->vmin[0][0] - setup->pixel_offset) + + dady * (setup->vmin[0][1] - setup->pixel_offset))); } @@ -803,8 +815,8 @@ line_persp_coeff(const struct setup_context *setup, coef->dadx[i] = dadx; coef->dady[i] = dady; coef->a0[i] = (setup->vmin[vertSlot][i] - - (dadx * (setup->vmin[0][0] - 0.5f) + - dady * (setup->vmin[0][1] - 0.5f))); + (dadx * (setup->vmin[0][0] - setup->pixel_offset) + + dady * (setup->vmin[0][1] - setup->pixel_offset))); } diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index 77ee3c1136..d488fb8710 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -104,6 +104,10 @@ void * softpipe_create_sampler_state(struct pipe_context *, const struct pipe_sampler_state *); void softpipe_bind_sampler_states(struct pipe_context *, unsigned, void **); +void +softpipe_bind_vertex_sampler_states(struct pipe_context *, + unsigned num_samplers, + void **samplers); void softpipe_delete_sampler_state(struct pipe_context *, void *); void * @@ -150,6 +154,11 @@ void softpipe_set_sampler_textures( struct pipe_context *, unsigned num, struct pipe_texture ** ); +void +softpipe_set_vertex_sampler_textures(struct pipe_context *, + unsigned num_textures, + struct pipe_texture **); + void softpipe_set_viewport_state( struct pipe_context *, const struct pipe_viewport_state * ); diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index 1faeca1c2a..c24a737d07 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -66,8 +66,6 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) if (vinfo->num_attribs == 0) { /* compute vertex layout now */ const struct sp_fragment_shader *spfs = softpipe->fs; - const enum interp_mode colorInterp - = softpipe->rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR; struct vertex_info *vinfo_vbuf = &softpipe->vertex_info_vbuf; const uint num = draw_num_vs_outputs(softpipe->draw); uint i; @@ -108,33 +106,21 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) switch (spfs->info.input_semantic_name[i]) { case TGSI_SEMANTIC_POSITION: - src = draw_find_vs_output(softpipe->draw, - TGSI_SEMANTIC_POSITION, 0); - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_POS, src); + interp = INTERP_POS; break; case TGSI_SEMANTIC_COLOR: - src = draw_find_vs_output(softpipe->draw, TGSI_SEMANTIC_COLOR, - spfs->info.input_semantic_index[i]); - draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); + if (softpipe->rasterizer->flatshade) { + interp = INTERP_CONSTANT; + } break; - - case TGSI_SEMANTIC_FOG: - src = draw_find_vs_output(softpipe->draw, TGSI_SEMANTIC_FOG, 0); - draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src); - break; - - case TGSI_SEMANTIC_GENERIC: - case TGSI_SEMANTIC_FACE: - /* this includes texcoords and varying vars */ - src = draw_find_vs_output(softpipe->draw, TGSI_SEMANTIC_GENERIC, - spfs->info.input_semantic_index[i]); - draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src); - break; - - default: - assert(0); } + + /* this includes texcoords and varying vars */ + src = draw_find_vs_output(softpipe->draw, + spfs->info.input_semantic_name[i], + spfs->info.input_semantic_index[i]); + draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src); } softpipe->psize_slot = draw_find_vs_output(softpipe->draw, @@ -227,6 +213,19 @@ update_tgsi_samplers( struct softpipe_context *softpipe ) } } } + + for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { + struct softpipe_tex_tile_cache *tc = softpipe->vertex_tex_cache[i]; + + if (tc->texture) { + struct softpipe_texture *spt = softpipe_texture(tc->texture); + + if (spt->timestamp != tc->timestamp) { + sp_tex_tile_cache_validate_texture(tc); + tc->timestamp = spt->timestamp; + } + } + } } diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index 256faa94b8..b41f7e8ab7 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -143,6 +143,7 @@ softpipe_delete_vs_state(struct pipe_context *pipe, void *vs) struct sp_vertex_shader *state = (struct sp_vertex_shader *) vs; draw_delete_vertex_shader(softpipe->draw, state->draw_data); + FREE( (void *)state->shader.tokens ); FREE( state ); } diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c index db0b8ab76b..ceb4e338f1 100644 --- a/src/gallium/drivers/softpipe/sp_state_sampler.c +++ b/src/gallium/drivers/softpipe/sp_state_sampler.c @@ -94,6 +94,34 @@ softpipe_bind_sampler_states(struct pipe_context *pipe, void +softpipe_bind_vertex_sampler_states(struct pipe_context *pipe, + unsigned num_samplers, + void **samplers) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + unsigned i; + + assert(num_samplers <= PIPE_MAX_VERTEX_SAMPLERS); + + /* Check for no-op */ + if (num_samplers == softpipe->num_vertex_samplers && + !memcmp(softpipe->vertex_samplers, samplers, num_samplers * sizeof(void *))) + return; + + draw_flush(softpipe->draw); + + for (i = 0; i < num_samplers; ++i) + softpipe->vertex_samplers[i] = samplers[i]; + for (i = num_samplers; i < PIPE_MAX_VERTEX_SAMPLERS; ++i) + softpipe->vertex_samplers[i] = NULL; + + softpipe->num_vertex_samplers = num_samplers; + + softpipe->dirty |= SP_NEW_SAMPLER; +} + + +void softpipe_set_sampler_textures(struct pipe_context *pipe, unsigned num, struct pipe_texture **texture) { @@ -122,6 +150,37 @@ softpipe_set_sampler_textures(struct pipe_context *pipe, } +void +softpipe_set_vertex_sampler_textures(struct pipe_context *pipe, + unsigned num_textures, + struct pipe_texture **textures) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + uint i; + + assert(num_textures <= PIPE_MAX_VERTEX_SAMPLERS); + + /* Check for no-op */ + if (num_textures == softpipe->num_vertex_textures && + !memcmp(softpipe->vertex_textures, textures, num_textures * sizeof(struct pipe_texture *))) { + return; + } + + draw_flush(softpipe->draw); + + for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { + struct pipe_texture *tex = i < num_textures ? textures[i] : NULL; + + pipe_texture_reference(&softpipe->vertex_textures[i], tex); + sp_tex_tile_cache_set_texture(softpipe->vertex_tex_cache[i], tex); + } + + softpipe->num_vertex_textures = num_textures; + + softpipe->dirty |= SP_NEW_TEXTURE; +} + + /** * Find/create an sp_sampler_varient object for sampling the given texture, * sampler and tex unit. @@ -185,16 +244,16 @@ softpipe_reset_sampler_varients(struct softpipe_context *softpipe) * fragment programs. */ for (i = 0; i <= softpipe->vs->max_sampler; i++) { - if (softpipe->sampler[i]) { + if (softpipe->vertex_samplers[i]) { softpipe->tgsi.vert_samplers_list[i] = get_sampler_varient( i, - sp_sampler(softpipe->sampler[i]), - softpipe->texture[i], + sp_sampler(softpipe->vertex_samplers[i]), + softpipe->vertex_textures[i], TGSI_PROCESSOR_VERTEX ); sp_sampler_varient_bind_texture( softpipe->tgsi.vert_samplers_list[i], - softpipe->tex_cache[i], - softpipe->texture[i] ); + softpipe->vertex_tex_cache[i], + softpipe->vertex_textures[i] ); } } diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index c22ee86b66..e26153b1d9 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -521,7 +521,7 @@ compute_lambda_1d(const struct sp_sampler_varient *samp, const struct pipe_sampler_state *sampler = samp->sampler; float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]); float dsdy = fabsf(s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]); - float rho = MAX2(dsdx, dsdy) * texture->width[0]; + float rho = MAX2(dsdx, dsdy) * texture->width0; float lambda; lambda = util_fast_log2(rho); @@ -545,8 +545,8 @@ compute_lambda_2d(const struct sp_sampler_varient *samp, float dsdy = fabsf(s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]); float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]); float dtdy = fabsf(t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT]); - float maxx = MAX2(dsdx, dsdy) * texture->width[0]; - float maxy = MAX2(dtdx, dtdy) * texture->height[0]; + float maxx = MAX2(dsdx, dsdy) * texture->width0; + float maxy = MAX2(dtdx, dtdy) * texture->height0; float rho = MAX2(maxx, maxy); float lambda; @@ -573,9 +573,9 @@ compute_lambda_3d(const struct sp_sampler_varient *samp, float dtdy = fabsf(t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT]); float dpdx = fabsf(p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT]); float dpdy = fabsf(p[QUAD_TOP_LEFT] - p[QUAD_BOTTOM_LEFT]); - float maxx = MAX2(dsdx, dsdy) * texture->width[0]; - float maxy = MAX2(dtdx, dtdy) * texture->height[0]; - float maxz = MAX2(dpdx, dpdy) * texture->depth[0]; + float maxx = MAX2(dsdx, dsdy) * texture->width0; + float maxy = MAX2(dtdx, dtdy) * texture->height0; + float maxz = MAX2(dpdx, dpdy) * texture->depth0; float rho, lambda; rho = MAX2(maxx, maxy); @@ -644,8 +644,8 @@ get_texel_2d(const struct sp_sampler_varient *samp, const struct pipe_texture *texture = samp->texture; unsigned level = addr.bits.level; - if (x < 0 || x >= (int) texture->width[level] || - y < 0 || y >= (int) texture->height[level]) { + if (x < 0 || x >= (int) u_minify(texture->width0, level) || + y < 0 || y >= (int) u_minify(texture->height0, level)) { return samp->sampler->border_color; } else { @@ -737,9 +737,9 @@ get_texel_3d(const struct sp_sampler_varient *samp, const struct pipe_texture *texture = samp->texture; unsigned level = addr.bits.level; - if (x < 0 || x >= (int) texture->width[level] || - y < 0 || y >= (int) texture->height[level] || - z < 0 || z >= (int) texture->depth[level]) { + if (x < 0 || x >= (int) u_minify(texture->width0, level) || + y < 0 || y >= (int) u_minify(texture->height0, level) || + z < 0 || z >= (int) u_minify(texture->depth0, level)) { return samp->sampler->border_color; } else { @@ -925,7 +925,7 @@ img_filter_1d_nearest(struct tgsi_sampler *tgsi_sampler, union tex_tile_address addr; level0 = samp->level; - width = texture->width[level0]; + width = u_minify(texture->width0, level0); assert(width > 0); @@ -961,8 +961,8 @@ img_filter_2d_nearest(struct tgsi_sampler *tgsi_sampler, level0 = samp->level; - width = texture->width[level0]; - height = texture->height[level0]; + width = u_minify(texture->width0, level0); + height = u_minify(texture->height0, level0); assert(width > 0); assert(height > 0); @@ -1008,8 +1008,8 @@ img_filter_cube_nearest(struct tgsi_sampler *tgsi_sampler, union tex_tile_address addr; level0 = samp->level; - width = texture->width[level0]; - height = texture->height[level0]; + width = u_minify(texture->width0, level0); + height = u_minify(texture->height0, level0); assert(width > 0); assert(height > 0); @@ -1046,9 +1046,9 @@ img_filter_3d_nearest(struct tgsi_sampler *tgsi_sampler, union tex_tile_address addr; level0 = samp->level; - width = texture->width[level0]; - height = texture->height[level0]; - depth = texture->depth[level0]; + width = u_minify(texture->width0, level0); + height = u_minify(texture->height0, level0); + depth = u_minify(texture->depth0, level0); assert(width > 0); assert(height > 0); @@ -1088,7 +1088,7 @@ img_filter_1d_linear(struct tgsi_sampler *tgsi_sampler, union tex_tile_address addr; level0 = samp->level; - width = texture->width[level0]; + width = u_minify(texture->width0, level0); assert(width > 0); @@ -1127,8 +1127,8 @@ img_filter_2d_linear(struct tgsi_sampler *tgsi_sampler, union tex_tile_address addr; level0 = samp->level; - width = texture->width[level0]; - height = texture->height[level0]; + width = u_minify(texture->width0, level0); + height = u_minify(texture->height0, level0); assert(width > 0); assert(height > 0); @@ -1174,8 +1174,8 @@ img_filter_cube_linear(struct tgsi_sampler *tgsi_sampler, union tex_tile_address addr; level0 = samp->level; - width = texture->width[level0]; - height = texture->height[level0]; + width = u_minify(texture->width0, level0); + height = u_minify(texture->height0, level0); assert(width > 0); assert(height > 0); @@ -1221,9 +1221,9 @@ img_filter_3d_linear(struct tgsi_sampler *tgsi_sampler, union tex_tile_address addr; level0 = samp->level; - width = texture->width[level0]; - height = texture->height[level0]; - depth = texture->depth[level0]; + width = u_minify(texture->width0, level0); + height = u_minify(texture->height0, level0); + depth = u_minify(texture->depth0, level0); addr.value = 0; addr.bits.level = level0; @@ -1778,8 +1778,8 @@ sp_sampler_varient_bind_texture( struct sp_sampler_varient *samp, samp->texture = texture; samp->cache = tex_cache; - samp->xpot = util_unsigned_logbase2( texture->width[0] ); - samp->ypot = util_unsigned_logbase2( texture->height[0] ); + samp->xpot = util_unsigned_logbase2( texture->width0 ); + samp->ypot = util_unsigned_logbase2( texture->height0 ); samp->level = CLAMP((int) sampler->min_lod, 0, (int) texture->last_level); } diff --git a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c index 407a22a9f4..e50a76a73b 100644 --- a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c @@ -35,6 +35,7 @@ #include "pipe/p_inlines.h" #include "util/u_memory.h" #include "util/u_tile.h" +#include "util/u_math.h" #include "sp_context.h" #include "sp_surface.h" #include "sp_texture.h" @@ -246,9 +247,9 @@ sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc, addr.bits.level, addr.bits.z, PIPE_TRANSFER_READ, 0, 0, - tc->texture->width[addr.bits.level], - tc->texture->height[addr.bits.level]); - + u_minify(tc->texture->width0, addr.bits.level), + u_minify(tc->texture->height0, addr.bits.level)); + tc->tex_trans_map = screen->transfer_map(screen, tc->tex_trans); tc->tex_face = addr.bits.face; diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 7caf2928b4..ac5f61e46f 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -52,16 +52,17 @@ softpipe_texture_layout(struct pipe_screen *screen, { struct pipe_texture *pt = &spt->base; unsigned level; - unsigned width = pt->width[0]; - unsigned height = pt->height[0]; - unsigned depth = pt->depth[0]; + unsigned width = pt->width0; + unsigned height = pt->height0; + unsigned depth = pt->depth0; unsigned buffer_size = 0; + pt->width0 = width; + pt->height0 = height; + pt->depth0 = depth; + for (level = 0; level <= pt->last_level; level++) { - pt->width[level] = width; - pt->height[level] = height; - pt->depth[level] = depth; pt->nblocksx[level] = pf_get_nblocksx(&pt->block, width); pt->nblocksy[level] = pf_get_nblocksy(&pt->block, height); spt->stride[level] = pt->nblocksx[level]*pt->block.size; @@ -72,9 +73,9 @@ softpipe_texture_layout(struct pipe_screen *screen, ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) * spt->stride[level]); - width = minify(width); - height = minify(height); - depth = minify(depth); + width = u_minify(width, 1); + height = u_minify(height, 1); + depth = u_minify(depth, 1); } spt->buffer = screen->buffer_create(screen, 32, @@ -96,12 +97,12 @@ softpipe_displaytarget_layout(struct pipe_screen *screen, PIPE_BUFFER_USAGE_GPU_READ_WRITE); unsigned tex_usage = spt->base.tex_usage; - spt->base.nblocksx[0] = pf_get_nblocksx(&spt->base.block, spt->base.width[0]); - spt->base.nblocksy[0] = pf_get_nblocksy(&spt->base.block, spt->base.height[0]); + spt->base.nblocksx[0] = pf_get_nblocksx(&spt->base.block, spt->base.width0); + spt->base.nblocksy[0] = pf_get_nblocksy(&spt->base.block, spt->base.height0); spt->buffer = screen->surface_buffer_create( screen, - spt->base.width[0], - spt->base.height[0], + spt->base.width0, + spt->base.height0, spt->base.format, usage, tex_usage, @@ -126,9 +127,9 @@ softpipe_texture_create(struct pipe_screen *screen, pipe_reference_init(&spt->base.reference, 1); spt->base.screen = screen; - spt->pot = (util_is_power_of_two(template->width[0]) && - util_is_power_of_two(template->height[0]) && - util_is_power_of_two(template->depth[0])); + spt->pot = (util_is_power_of_two(template->width0) && + util_is_power_of_two(template->height0) && + util_is_power_of_two(template->depth0)); if (spt->base.tex_usage & (PIPE_TEXTURE_USAGE_DISPLAY_TARGET | PIPE_TEXTURE_USAGE_PRIMARY)) { @@ -163,7 +164,7 @@ softpipe_texture_blanket(struct pipe_screen * screen, /* Only supports one type */ if (base->target != PIPE_TEXTURE_2D || base->last_level != 0 || - base->depth[0] != 1) { + base->depth0 != 1) { return NULL; } @@ -174,8 +175,8 @@ softpipe_texture_blanket(struct pipe_screen * screen, spt->base = *base; pipe_reference_init(&spt->base.reference, 1); spt->base.screen = screen; - spt->base.nblocksx[0] = pf_get_nblocksx(&spt->base.block, spt->base.width[0]); - spt->base.nblocksy[0] = pf_get_nblocksy(&spt->base.block, spt->base.height[0]); + spt->base.nblocksx[0] = pf_get_nblocksx(&spt->base.block, spt->base.width0); + spt->base.nblocksy[0] = pf_get_nblocksy(&spt->base.block, spt->base.height0); spt->stride[0] = stride[0]; pipe_buffer_reference(&spt->buffer, buffer); @@ -213,8 +214,8 @@ softpipe_get_tex_surface(struct pipe_screen *screen, pipe_reference_init(&ps->reference, 1); pipe_texture_reference(&ps->texture, pt); ps->format = pt->format; - ps->width = pt->width[level]; - ps->height = pt->height[level]; + ps->width = u_minify(pt->width0, level); + ps->height = u_minify(pt->height0, level); ps->offset = spt->level_offset[level]; ps->usage = usage; @@ -434,9 +435,9 @@ softpipe_video_surface_create(struct pipe_screen *screen, template.format = PIPE_FORMAT_X8R8G8B8_UNORM; template.last_level = 0; /* vl_mpeg12_mc_renderer expects this when it's initialized with pot_buffers=true */ - template.width[0] = util_next_power_of_two(width); - template.height[0] = util_next_power_of_two(height); - template.depth[0] = 1; + template.width0 = util_next_power_of_two(width); + template.height0 = util_next_power_of_two(height); + template.depth0 = 1; pf_get_block(template.format, &template.block); template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_RENDER_TARGET; diff --git a/src/gallium/drivers/svga/Makefile b/src/gallium/drivers/svga/Makefile new file mode 100644 index 0000000000..f361908187 --- /dev/null +++ b/src/gallium/drivers/svga/Makefile @@ -0,0 +1,60 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = svga + +C_SOURCES = \ + svgadump/svga_shader_dump.c \ + svgadump/svga_shader_op.c \ + svgadump/svga_dump.c \ + svga_cmd.c \ + svga_context.c \ + svga_draw.c \ + svga_draw_arrays.c \ + svga_draw_elements.c \ + svga_pipe_blend.c \ + svga_pipe_blit.c \ + svga_pipe_clear.c \ + svga_pipe_constants.c \ + svga_pipe_depthstencil.c \ + svga_pipe_draw.c \ + svga_pipe_flush.c \ + svga_pipe_fs.c \ + svga_pipe_misc.c \ + svga_pipe_query.c \ + svga_pipe_rasterizer.c \ + svga_pipe_sampler.c \ + svga_pipe_vertex.c \ + svga_pipe_vs.c \ + svga_screen.c \ + svga_screen_buffer.c \ + svga_screen_texture.c \ + svga_screen_cache.c \ + svga_state.c \ + svga_state_need_swtnl.c \ + svga_state_constants.c \ + svga_state_framebuffer.c \ + svga_state_rss.c \ + svga_state_tss.c \ + svga_state_vdecl.c \ + svga_state_fs.c \ + svga_state_vs.c \ + svga_swtnl_backend.c \ + svga_swtnl_draw.c \ + svga_swtnl_state.c \ + svga_tgsi.c \ + svga_tgsi_decl_sm20.c \ + svga_tgsi_decl_sm30.c \ + svga_tgsi_insn.c + +LIBRARY_INCLUDES = \ + -I$(TOP)/src/gallium/drivers/svga/include + +# With linux-debug we get a lots of warnings, filter out the bad flags. +CFLAGS := $(filter-out -pedantic, $(filter-out -ansi, $(CFLAGS))) + +LIBRARY_DEFINES = \ + -std=gnu99 -fvisibility=hidden \ + -DHAVE_STDINT_H -DHAVE_SYS_TYPES_H + +include ../../Makefile.template diff --git a/src/gallium/drivers/svga/SConscript b/src/gallium/drivers/svga/SConscript new file mode 100644 index 0000000000..737b791ceb --- /dev/null +++ b/src/gallium/drivers/svga/SConscript @@ -0,0 +1,72 @@ +Import('*') + +env = env.Clone() + +if env['platform'] in ['linux']: + env.Append(CCFLAGS = ['-fvisibility=hidden']) + +if env['gcc']: + env.Append(CPPDEFINES = [ + 'HAVE_STDINT_H', + 'HAVE_SYS_TYPES_H', + ]) + +env.Prepend(CPPPATH = [ + 'include', +]) + +env.Append(CPPDEFINES = [ +]) + +sources = [ + 'svga_cmd.c', + 'svga_context.c', + 'svga_draw.c', + 'svga_draw_arrays.c', + 'svga_draw_elements.c', + 'svga_pipe_blend.c', + 'svga_pipe_blit.c', + 'svga_pipe_clear.c', + 'svga_pipe_constants.c', + 'svga_pipe_depthstencil.c', + 'svga_pipe_draw.c', + 'svga_pipe_flush.c', + 'svga_pipe_fs.c', + 'svga_pipe_misc.c', + 'svga_pipe_query.c', + 'svga_pipe_rasterizer.c', + 'svga_pipe_sampler.c', + 'svga_pipe_vertex.c', + 'svga_pipe_vs.c', + 'svga_screen.c', + 'svga_screen_buffer.c', + 'svga_screen_cache.c', + 'svga_screen_texture.c', + 'svga_state.c', + 'svga_state_constants.c', + 'svga_state_framebuffer.c', + 'svga_state_need_swtnl.c', + 'svga_state_rss.c', + 'svga_state_tss.c', + 'svga_state_vdecl.c', + 'svga_state_fs.c', + 'svga_state_vs.c', + 'svga_swtnl_backend.c', + 'svga_swtnl_draw.c', + 'svga_swtnl_state.c', + 'svga_tgsi.c', + 'svga_tgsi_decl_sm20.c', + 'svga_tgsi_decl_sm30.c', + 'svga_tgsi_insn.c', + + 'svgadump/svga_dump.c', + 'svgadump/svga_shader_dump.c', + 'svgadump/svga_shader_op.c', +] + +svga = env.ConvenienceLibrary( + target = 'svga', + source = sources, +) + +Export('svga') diff --git a/src/gallium/drivers/svga/include/README b/src/gallium/drivers/svga/include/README new file mode 100644 index 0000000000..a0b8916104 --- /dev/null +++ b/src/gallium/drivers/svga/include/README @@ -0,0 +1,3 @@ +This directory contains the headers from the VMware SVGA Device Developer Kit: + + https://vmware-svga.svn.sourceforge.net/svnroot/vmware-svga/trunk/lib/vmware/ diff --git a/src/gallium/drivers/svga/include/svga3d_caps.h b/src/gallium/drivers/svga/include/svga3d_caps.h new file mode 100644 index 0000000000..714ce9f45f --- /dev/null +++ b/src/gallium/drivers/svga/include/svga3d_caps.h @@ -0,0 +1,139 @@ +/********************************************************** + * Copyright 2007-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +/* + * svga3d_caps.h -- + * + * Definitions for SVGA3D hardware capabilities. Capabilities + * are used to query for optional rendering features during + * driver initialization. The capability data is stored as very + * basic key/value dictionary within the "FIFO register" memory + * area at the beginning of BAR2. + * + * Note that these definitions are only for 3D capabilities. + * The SVGA device also has "device capabilities" and "FIFO + * capabilities", which are non-3D-specific and are stored as + * bitfields rather than key/value pairs. + */ + +#ifndef _SVGA3D_CAPS_H_ +#define _SVGA3D_CAPS_H_ + +#define SVGA_FIFO_3D_CAPS_SIZE (SVGA_FIFO_3D_CAPS_LAST - \ + SVGA_FIFO_3D_CAPS + 1) + + +/* + * SVGA3dCapsRecordType + * + * Record types that can be found in the caps block. + * Related record types are grouped together numerically so that + * SVGA3dCaps_FindRecord() can be applied on a range of record + * types. + */ + +typedef enum { + SVGA3DCAPS_RECORD_UNKNOWN = 0, + SVGA3DCAPS_RECORD_DEVCAPS_MIN = 0x100, + SVGA3DCAPS_RECORD_DEVCAPS = 0x100, + SVGA3DCAPS_RECORD_DEVCAPS_MAX = 0x1ff, +} SVGA3dCapsRecordType; + + +/* + * SVGA3dCapsRecordHeader + * + * Header field leading each caps block record. Contains the offset (in + * register words, NOT bytes) to the next caps block record (or the end + * of caps block records which will be a zero word) and the record type + * as defined above. + */ + +typedef +struct SVGA3dCapsRecordHeader { + uint32 length; + SVGA3dCapsRecordType type; +} +SVGA3dCapsRecordHeader; + + +/* + * SVGA3dCapsRecord + * + * Caps block record; "data" is a placeholder for the actual data structure + * contained within the record; for example a record containing a FOOBAR + * structure would be of size "sizeof(SVGA3dCapsRecordHeader) + + * sizeof(FOOBAR)". + */ + +typedef +struct SVGA3dCapsRecord { + SVGA3dCapsRecordHeader header; + uint32 data[1]; +} +SVGA3dCapsRecord; + + +typedef uint32 SVGA3dCapPair[2]; + + +/* + *---------------------------------------------------------------------- + * + * SVGA3dCaps_FindRecord + * + * Finds the record with the highest-valued type within the given range + * in the caps block. + * + * Result: pointer to found record, or NULL if not found. + * + *---------------------------------------------------------------------- + */ + +static INLINE SVGA3dCapsRecord * +SVGA3dCaps_FindRecord(const uint32 *capsBlock, + SVGA3dCapsRecordType recordTypeMin, + SVGA3dCapsRecordType recordTypeMax) +{ + SVGA3dCapsRecord *record, *found = NULL; + uint32 offset; + + /* + * Search linearly through the caps block records for the specified type. + */ + for (offset = 0; capsBlock[offset] != 0; offset += capsBlock[offset]) { + record = (SVGA3dCapsRecord *) (capsBlock + offset); + if ((record->header.type >= recordTypeMin) && + (record->header.type <= recordTypeMax) && + (!found || (record->header.type > found->header.type))) { + found = record; + } + } + + return found; +} + + +#endif // _SVGA3D_CAPS_H_ diff --git a/src/gallium/drivers/svga/include/svga3d_reg.h b/src/gallium/drivers/svga/include/svga3d_reg.h new file mode 100644 index 0000000000..77cb453310 --- /dev/null +++ b/src/gallium/drivers/svga/include/svga3d_reg.h @@ -0,0 +1,1793 @@ +/********************************************************** + * Copyright 1998-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +/* + * svga3d_reg.h -- + * + * SVGA 3D hardware definitions + */ + +#ifndef _SVGA3D_REG_H_ +#define _SVGA3D_REG_H_ + +#include "svga_reg.h" + + +/* + * 3D Hardware Version + * + * The hardware version is stored in the SVGA_FIFO_3D_HWVERSION fifo + * register. Is set by the host and read by the guest. This lets + * us make new guest drivers which are backwards-compatible with old + * SVGA hardware revisions. It does not let us support old guest + * drivers. Good enough for now. + * + */ + +#define SVGA3D_MAKE_HWVERSION(major, minor) (((major) << 16) | ((minor) & 0xFF)) +#define SVGA3D_MAJOR_HWVERSION(version) ((version) >> 16) +#define SVGA3D_MINOR_HWVERSION(version) ((version) & 0xFF) + +typedef enum { + SVGA3D_HWVERSION_WS5_RC1 = SVGA3D_MAKE_HWVERSION(0, 1), + SVGA3D_HWVERSION_WS5_RC2 = SVGA3D_MAKE_HWVERSION(0, 2), + SVGA3D_HWVERSION_WS51_RC1 = SVGA3D_MAKE_HWVERSION(0, 3), + SVGA3D_HWVERSION_WS6_B1 = SVGA3D_MAKE_HWVERSION(1, 1), + SVGA3D_HWVERSION_FUSION_11 = SVGA3D_MAKE_HWVERSION(1, 4), + SVGA3D_HWVERSION_WS65_B1 = SVGA3D_MAKE_HWVERSION(2, 0), + SVGA3D_HWVERSION_CURRENT = SVGA3D_HWVERSION_WS65_B1, +} SVGA3dHardwareVersion; + +/* + * Generic Types + */ + +typedef uint32 SVGA3dBool; /* 32-bit Bool definition */ +#define SVGA3D_NUM_CLIPPLANES 6 +#define SVGA3D_MAX_SIMULTANEOUS_RENDER_TARGETS 8 + + +/* + * Surface formats. + * + * If you modify this list, be sure to keep GLUtil.c in sync. It + * includes the internal format definition of each surface in + * GLUtil_ConvertSurfaceFormat, and it contains a table of + * human-readable names in GLUtil_GetFormatName. + */ + +typedef enum SVGA3dSurfaceFormat { + SVGA3D_FORMAT_INVALID = 0, + + SVGA3D_X8R8G8B8 = 1, + SVGA3D_A8R8G8B8 = 2, + + SVGA3D_R5G6B5 = 3, + SVGA3D_X1R5G5B5 = 4, + SVGA3D_A1R5G5B5 = 5, + SVGA3D_A4R4G4B4 = 6, + + SVGA3D_Z_D32 = 7, + SVGA3D_Z_D16 = 8, + SVGA3D_Z_D24S8 = 9, + SVGA3D_Z_D15S1 = 10, + + SVGA3D_LUMINANCE8 = 11, + SVGA3D_LUMINANCE4_ALPHA4 = 12, + SVGA3D_LUMINANCE16 = 13, + SVGA3D_LUMINANCE8_ALPHA8 = 14, + + SVGA3D_DXT1 = 15, + SVGA3D_DXT2 = 16, + SVGA3D_DXT3 = 17, + SVGA3D_DXT4 = 18, + SVGA3D_DXT5 = 19, + + SVGA3D_BUMPU8V8 = 20, + SVGA3D_BUMPL6V5U5 = 21, + SVGA3D_BUMPX8L8V8U8 = 22, + SVGA3D_BUMPL8V8U8 = 23, + + SVGA3D_ARGB_S10E5 = 24, /* 16-bit floating-point ARGB */ + SVGA3D_ARGB_S23E8 = 25, /* 32-bit floating-point ARGB */ + + SVGA3D_A2R10G10B10 = 26, + + /* signed formats */ + SVGA3D_V8U8 = 27, + SVGA3D_Q8W8V8U8 = 28, + SVGA3D_CxV8U8 = 29, + + /* mixed formats */ + SVGA3D_X8L8V8U8 = 30, + SVGA3D_A2W10V10U10 = 31, + + SVGA3D_ALPHA8 = 32, + + /* Single- and dual-component floating point formats */ + SVGA3D_R_S10E5 = 33, + SVGA3D_R_S23E8 = 34, + SVGA3D_RG_S10E5 = 35, + SVGA3D_RG_S23E8 = 36, + + /* + * Any surface can be used as a buffer object, but SVGA3D_BUFFER is + * the most efficient format to use when creating new surfaces + * expressly for index or vertex data. + */ + SVGA3D_BUFFER = 37, + + SVGA3D_Z_D24X8 = 38, + + SVGA3D_V16U16 = 39, + + SVGA3D_G16R16 = 40, + SVGA3D_A16B16G16R16 = 41, + + /* Packed Video formats */ + SVGA3D_UYVY = 42, + SVGA3D_YUY2 = 43, + + SVGA3D_FORMAT_MAX +} SVGA3dSurfaceFormat; + +typedef uint32 SVGA3dColor; /* a, r, g, b */ + +/* + * These match the D3DFORMAT_OP definitions used by Direct3D. We need + * them so that we can query the host for what the supported surface + * operations are (when we're using the D3D backend, in particular), + * and so we can send those operations to the guest. + */ +typedef enum { + SVGA3DFORMAT_OP_TEXTURE = 0x00000001, + SVGA3DFORMAT_OP_VOLUMETEXTURE = 0x00000002, + SVGA3DFORMAT_OP_CUBETEXTURE = 0x00000004, + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET = 0x00000008, + SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET = 0x00000010, + SVGA3DFORMAT_OP_ZSTENCIL = 0x00000040, + SVGA3DFORMAT_OP_ZSTENCIL_WITH_ARBITRARY_COLOR_DEPTH = 0x00000080, + +/* + * This format can be used as a render target if the current display mode + * is the same depth if the alpha channel is ignored. e.g. if the device + * can render to A8R8G8B8 when the display mode is X8R8G8B8, then the + * format op list entry for A8R8G8B8 should have this cap. + */ + SVGA3DFORMAT_OP_SAME_FORMAT_UP_TO_ALPHA_RENDERTARGET = 0x00000100, + +/* + * This format contains DirectDraw support (including Flip). This flag + * should not to be set on alpha formats. + */ + SVGA3DFORMAT_OP_DISPLAYMODE = 0x00000400, + +/* + * The rasterizer can support some level of Direct3D support in this format + * and implies that the driver can create a Context in this mode (for some + * render target format). When this flag is set, the SVGA3DFORMAT_OP_DISPLAYMODE + * flag must also be set. + */ + SVGA3DFORMAT_OP_3DACCELERATION = 0x00000800, + +/* + * This is set for a private format when the driver has put the bpp in + * the structure. + */ + SVGA3DFORMAT_OP_PIXELSIZE = 0x00001000, + +/* + * Indicates that this format can be converted to any RGB format for which + * SVGA3DFORMAT_OP_MEMBEROFGROUP_ARGB is specified + */ + SVGA3DFORMAT_OP_CONVERT_TO_ARGB = 0x00002000, + +/* + * Indicates that this format can be used to create offscreen plain surfaces. + */ + SVGA3DFORMAT_OP_OFFSCREENPLAIN = 0x00004000, + +/* + * Indicated that this format can be read as an SRGB texture (meaning that the + * sampler will linearize the looked up data) + */ + SVGA3DFORMAT_OP_SRGBREAD = 0x00008000, + +/* + * Indicates that this format can be used in the bumpmap instructions + */ + SVGA3DFORMAT_OP_BUMPMAP = 0x00010000, + +/* + * Indicates that this format can be sampled by the displacement map sampler + */ + SVGA3DFORMAT_OP_DMAP = 0x00020000, + +/* + * Indicates that this format cannot be used with texture filtering + */ + SVGA3DFORMAT_OP_NOFILTER = 0x00040000, + +/* + * Indicates that format conversions are supported to this RGB format if + * SVGA3DFORMAT_OP_CONVERT_TO_ARGB is specified in the source format. + */ + SVGA3DFORMAT_OP_MEMBEROFGROUP_ARGB = 0x00080000, + +/* + * Indicated that this format can be written as an SRGB target (meaning that the + * pixel pipe will DE-linearize data on output to format) + */ + SVGA3DFORMAT_OP_SRGBWRITE = 0x00100000, + +/* + * Indicates that this format cannot be used with alpha blending + */ + SVGA3DFORMAT_OP_NOALPHABLEND = 0x00200000, + +/* + * Indicates that the device can auto-generated sublevels for resources + * of this format + */ + SVGA3DFORMAT_OP_AUTOGENMIPMAP = 0x00400000, + +/* + * Indicates that this format can be used by vertex texture sampler + */ + SVGA3DFORMAT_OP_VERTEXTEXTURE = 0x00800000, + +/* + * Indicates that this format supports neither texture coordinate wrap + * modes, nor mipmapping + */ + SVGA3DFORMAT_OP_NOTEXCOORDWRAPNORMIP = 0x01000000 +} SVGA3dFormatOp; + +/* + * This structure is a conversion of SVGA3DFORMAT_OP_*. + * Entries must be located at the same position. + */ +typedef union { + uint32 value; + struct { + uint32 texture : 1; + uint32 volumeTexture : 1; + uint32 cubeTexture : 1; + uint32 offscreenRenderTarget : 1; + uint32 sameFormatRenderTarget : 1; + uint32 unknown1 : 1; + uint32 zStencil : 1; + uint32 zStencilArbitraryDepth : 1; + uint32 sameFormatUpToAlpha : 1; + uint32 unknown2 : 1; + uint32 displayMode : 1; + uint32 acceleration3d : 1; + uint32 pixelSize : 1; + uint32 convertToARGB : 1; + uint32 offscreenPlain : 1; + uint32 sRGBRead : 1; + uint32 bumpMap : 1; + uint32 dmap : 1; + uint32 noFilter : 1; + uint32 memberOfGroupARGB : 1; + uint32 sRGBWrite : 1; + uint32 noAlphaBlend : 1; + uint32 autoGenMipMap : 1; + uint32 vertexTexture : 1; + uint32 noTexCoordWrapNorMip : 1; + }; +} SVGA3dSurfaceFormatCaps; + +/* + * SVGA_3D_CMD_SETRENDERSTATE Types. All value types + * must fit in a uint32. + */ + +typedef enum { + SVGA3D_RS_INVALID = 0, + SVGA3D_RS_ZENABLE = 1, /* SVGA3dBool */ + SVGA3D_RS_ZWRITEENABLE = 2, /* SVGA3dBool */ + SVGA3D_RS_ALPHATESTENABLE = 3, /* SVGA3dBool */ + SVGA3D_RS_DITHERENABLE = 4, /* SVGA3dBool */ + SVGA3D_RS_BLENDENABLE = 5, /* SVGA3dBool */ + SVGA3D_RS_FOGENABLE = 6, /* SVGA3dBool */ + SVGA3D_RS_SPECULARENABLE = 7, /* SVGA3dBool */ + SVGA3D_RS_STENCILENABLE = 8, /* SVGA3dBool */ + SVGA3D_RS_LIGHTINGENABLE = 9, /* SVGA3dBool */ + SVGA3D_RS_NORMALIZENORMALS = 10, /* SVGA3dBool */ + SVGA3D_RS_POINTSPRITEENABLE = 11, /* SVGA3dBool */ + SVGA3D_RS_POINTSCALEENABLE = 12, /* SVGA3dBool */ + SVGA3D_RS_STENCILREF = 13, /* uint32 */ + SVGA3D_RS_STENCILMASK = 14, /* uint32 */ + SVGA3D_RS_STENCILWRITEMASK = 15, /* uint32 */ + SVGA3D_RS_FOGSTART = 16, /* float */ + SVGA3D_RS_FOGEND = 17, /* float */ + SVGA3D_RS_FOGDENSITY = 18, /* float */ + SVGA3D_RS_POINTSIZE = 19, /* float */ + SVGA3D_RS_POINTSIZEMIN = 20, /* float */ + SVGA3D_RS_POINTSIZEMAX = 21, /* float */ + SVGA3D_RS_POINTSCALE_A = 22, /* float */ + SVGA3D_RS_POINTSCALE_B = 23, /* float */ + SVGA3D_RS_POINTSCALE_C = 24, /* float */ + SVGA3D_RS_FOGCOLOR = 25, /* SVGA3dColor */ + SVGA3D_RS_AMBIENT = 26, /* SVGA3dColor */ + SVGA3D_RS_CLIPPLANEENABLE = 27, /* SVGA3dClipPlanes */ + SVGA3D_RS_FOGMODE = 28, /* SVGA3dFogMode */ + SVGA3D_RS_FILLMODE = 29, /* SVGA3dFillMode */ + SVGA3D_RS_SHADEMODE = 30, /* SVGA3dShadeMode */ + SVGA3D_RS_LINEPATTERN = 31, /* SVGA3dLinePattern */ + SVGA3D_RS_SRCBLEND = 32, /* SVGA3dBlendOp */ + SVGA3D_RS_DSTBLEND = 33, /* SVGA3dBlendOp */ + SVGA3D_RS_BLENDEQUATION = 34, /* SVGA3dBlendEquation */ + SVGA3D_RS_CULLMODE = 35, /* SVGA3dFace */ + SVGA3D_RS_ZFUNC = 36, /* SVGA3dCmpFunc */ + SVGA3D_RS_ALPHAFUNC = 37, /* SVGA3dCmpFunc */ + SVGA3D_RS_STENCILFUNC = 38, /* SVGA3dCmpFunc */ + SVGA3D_RS_STENCILFAIL = 39, /* SVGA3dStencilOp */ + SVGA3D_RS_STENCILZFAIL = 40, /* SVGA3dStencilOp */ + SVGA3D_RS_STENCILPASS = 41, /* SVGA3dStencilOp */ + SVGA3D_RS_ALPHAREF = 42, /* float (0.0 .. 1.0) */ + SVGA3D_RS_FRONTWINDING = 43, /* SVGA3dFrontWinding */ + SVGA3D_RS_COORDINATETYPE = 44, /* SVGA3dCoordinateType */ + SVGA3D_RS_ZBIAS = 45, /* float */ + SVGA3D_RS_RANGEFOGENABLE = 46, /* SVGA3dBool */ + SVGA3D_RS_COLORWRITEENABLE = 47, /* SVGA3dColorMask */ + SVGA3D_RS_VERTEXMATERIALENABLE = 48, /* SVGA3dBool */ + SVGA3D_RS_DIFFUSEMATERIALSOURCE = 49, /* SVGA3dVertexMaterial */ + SVGA3D_RS_SPECULARMATERIALSOURCE = 50, /* SVGA3dVertexMaterial */ + SVGA3D_RS_AMBIENTMATERIALSOURCE = 51, /* SVGA3dVertexMaterial */ + SVGA3D_RS_EMISSIVEMATERIALSOURCE = 52, /* SVGA3dVertexMaterial */ + SVGA3D_RS_TEXTUREFACTOR = 53, /* SVGA3dColor */ + SVGA3D_RS_LOCALVIEWER = 54, /* SVGA3dBool */ + SVGA3D_RS_SCISSORTESTENABLE = 55, /* SVGA3dBool */ + SVGA3D_RS_BLENDCOLOR = 56, /* SVGA3dColor */ + SVGA3D_RS_STENCILENABLE2SIDED = 57, /* SVGA3dBool */ + SVGA3D_RS_CCWSTENCILFUNC = 58, /* SVGA3dCmpFunc */ + SVGA3D_RS_CCWSTENCILFAIL = 59, /* SVGA3dStencilOp */ + SVGA3D_RS_CCWSTENCILZFAIL = 60, /* SVGA3dStencilOp */ + SVGA3D_RS_CCWSTENCILPASS = 61, /* SVGA3dStencilOp */ + SVGA3D_RS_VERTEXBLEND = 62, /* SVGA3dVertexBlendFlags */ + SVGA3D_RS_SLOPESCALEDEPTHBIAS = 63, /* float */ + SVGA3D_RS_DEPTHBIAS = 64, /* float */ + + + /* + * Output Gamma Level + * + * Output gamma effects the gamma curve of colors that are output from the + * rendering pipeline. A value of 1.0 specifies a linear color space. If the + * value is <= 0.0, gamma correction is ignored and linear color space is + * used. + */ + + SVGA3D_RS_OUTPUTGAMMA = 65, /* float */ + SVGA3D_RS_ZVISIBLE = 66, /* SVGA3dBool */ + SVGA3D_RS_LASTPIXEL = 67, /* SVGA3dBool */ + SVGA3D_RS_CLIPPING = 68, /* SVGA3dBool */ + SVGA3D_RS_WRAP0 = 69, /* SVGA3dWrapFlags */ + SVGA3D_RS_WRAP1 = 70, /* SVGA3dWrapFlags */ + SVGA3D_RS_WRAP2 = 71, /* SVGA3dWrapFlags */ + SVGA3D_RS_WRAP3 = 72, /* SVGA3dWrapFlags */ + SVGA3D_RS_WRAP4 = 73, /* SVGA3dWrapFlags */ + SVGA3D_RS_WRAP5 = 74, /* SVGA3dWrapFlags */ + SVGA3D_RS_WRAP6 = 75, /* SVGA3dWrapFlags */ + SVGA3D_RS_WRAP7 = 76, /* SVGA3dWrapFlags */ + SVGA3D_RS_WRAP8 = 77, /* SVGA3dWrapFlags */ + SVGA3D_RS_WRAP9 = 78, /* SVGA3dWrapFlags */ + SVGA3D_RS_WRAP10 = 79, /* SVGA3dWrapFlags */ + SVGA3D_RS_WRAP11 = 80, /* SVGA3dWrapFlags */ + SVGA3D_RS_WRAP12 = 81, /* SVGA3dWrapFlags */ + SVGA3D_RS_WRAP13 = 82, /* SVGA3dWrapFlags */ + SVGA3D_RS_WRAP14 = 83, /* SVGA3dWrapFlags */ + SVGA3D_RS_WRAP15 = 84, /* SVGA3dWrapFlags */ + SVGA3D_RS_MULTISAMPLEANTIALIAS = 85, /* SVGA3dBool */ + SVGA3D_RS_MULTISAMPLEMASK = 86, /* uint32 */ + SVGA3D_RS_INDEXEDVERTEXBLENDENABLE = 87, /* SVGA3dBool */ + SVGA3D_RS_TWEENFACTOR = 88, /* float */ + SVGA3D_RS_ANTIALIASEDLINEENABLE = 89, /* SVGA3dBool */ + SVGA3D_RS_COLORWRITEENABLE1 = 90, /* SVGA3dColorMask */ + SVGA3D_RS_COLORWRITEENABLE2 = 91, /* SVGA3dColorMask */ + SVGA3D_RS_COLORWRITEENABLE3 = 92, /* SVGA3dColorMask */ + SVGA3D_RS_SEPARATEALPHABLENDENABLE = 93, /* SVGA3dBool */ + SVGA3D_RS_SRCBLENDALPHA = 94, /* SVGA3dBlendOp */ + SVGA3D_RS_DSTBLENDALPHA = 95, /* SVGA3dBlendOp */ + SVGA3D_RS_BLENDEQUATIONALPHA = 96, /* SVGA3dBlendEquation */ + SVGA3D_RS_MAX +} SVGA3dRenderStateName; + +typedef enum { + SVGA3D_VERTEXMATERIAL_NONE = 0, /* Use the value in the current material */ + SVGA3D_VERTEXMATERIAL_DIFFUSE = 1, /* Use the value in the diffuse component */ + SVGA3D_VERTEXMATERIAL_SPECULAR = 2, /* Use the value in the specular component */ +} SVGA3dVertexMaterial; + +typedef enum { + SVGA3D_FILLMODE_INVALID = 0, + SVGA3D_FILLMODE_POINT = 1, + SVGA3D_FILLMODE_LINE = 2, + SVGA3D_FILLMODE_FILL = 3, + SVGA3D_FILLMODE_MAX +} SVGA3dFillModeType; + + +typedef +union { + struct { + uint16 mode; /* SVGA3dFillModeType */ + uint16 face; /* SVGA3dFace */ + }; + uint32 uintValue; +} SVGA3dFillMode; + +typedef enum { + SVGA3D_SHADEMODE_INVALID = 0, + SVGA3D_SHADEMODE_FLAT = 1, + SVGA3D_SHADEMODE_SMOOTH = 2, + SVGA3D_SHADEMODE_PHONG = 3, /* Not supported */ + SVGA3D_SHADEMODE_MAX +} SVGA3dShadeMode; + +typedef +union { + struct { + uint16 repeat; + uint16 pattern; + }; + uint32 uintValue; +} SVGA3dLinePattern; + +typedef enum { + SVGA3D_BLENDOP_INVALID = 0, + SVGA3D_BLENDOP_ZERO = 1, + SVGA3D_BLENDOP_ONE = 2, + SVGA3D_BLENDOP_SRCCOLOR = 3, + SVGA3D_BLENDOP_INVSRCCOLOR = 4, + SVGA3D_BLENDOP_SRCALPHA = 5, + SVGA3D_BLENDOP_INVSRCALPHA = 6, + SVGA3D_BLENDOP_DESTALPHA = 7, + SVGA3D_BLENDOP_INVDESTALPHA = 8, + SVGA3D_BLENDOP_DESTCOLOR = 9, + SVGA3D_BLENDOP_INVDESTCOLOR = 10, + SVGA3D_BLENDOP_SRCALPHASAT = 11, + SVGA3D_BLENDOP_BLENDFACTOR = 12, + SVGA3D_BLENDOP_INVBLENDFACTOR = 13, + SVGA3D_BLENDOP_MAX +} SVGA3dBlendOp; + +typedef enum { + SVGA3D_BLENDEQ_INVALID = 0, + SVGA3D_BLENDEQ_ADD = 1, + SVGA3D_BLENDEQ_SUBTRACT = 2, + SVGA3D_BLENDEQ_REVSUBTRACT = 3, + SVGA3D_BLENDEQ_MINIMUM = 4, + SVGA3D_BLENDEQ_MAXIMUM = 5, + SVGA3D_BLENDEQ_MAX +} SVGA3dBlendEquation; + +typedef enum { + SVGA3D_FRONTWINDING_INVALID = 0, + SVGA3D_FRONTWINDING_CW = 1, + SVGA3D_FRONTWINDING_CCW = 2, + SVGA3D_FRONTWINDING_MAX +} SVGA3dFrontWinding; + +typedef enum { + SVGA3D_FACE_INVALID = 0, + SVGA3D_FACE_NONE = 1, + SVGA3D_FACE_FRONT = 2, + SVGA3D_FACE_BACK = 3, + SVGA3D_FACE_FRONT_BACK = 4, + SVGA3D_FACE_MAX +} SVGA3dFace; + +/* + * The order and the values should not be changed + */ + +typedef enum { + SVGA3D_CMP_INVALID = 0, + SVGA3D_CMP_NEVER = 1, + SVGA3D_CMP_LESS = 2, + SVGA3D_CMP_EQUAL = 3, + SVGA3D_CMP_LESSEQUAL = 4, + SVGA3D_CMP_GREATER = 5, + SVGA3D_CMP_NOTEQUAL = 6, + SVGA3D_CMP_GREATEREQUAL = 7, + SVGA3D_CMP_ALWAYS = 8, + SVGA3D_CMP_MAX +} SVGA3dCmpFunc; + +/* + * SVGA3D_FOGFUNC_* specifies the fog equation, or PER_VERTEX which allows + * the fog factor to be specified in the alpha component of the specular + * (a.k.a. secondary) vertex color. + */ +typedef enum { + SVGA3D_FOGFUNC_INVALID = 0, + SVGA3D_FOGFUNC_EXP = 1, + SVGA3D_FOGFUNC_EXP2 = 2, + SVGA3D_FOGFUNC_LINEAR = 3, + SVGA3D_FOGFUNC_PER_VERTEX = 4 +} SVGA3dFogFunction; + +/* + * SVGA3D_FOGTYPE_* specifies if fog factors are computed on a per-vertex + * or per-pixel basis. + */ +typedef enum { + SVGA3D_FOGTYPE_INVALID = 0, + SVGA3D_FOGTYPE_VERTEX = 1, + SVGA3D_FOGTYPE_PIXEL = 2, + SVGA3D_FOGTYPE_MAX = 3 +} SVGA3dFogType; + +/* + * SVGA3D_FOGBASE_* selects depth or range-based fog. Depth-based fog is + * computed using the eye Z value of each pixel (or vertex), whereas range- + * based fog is computed using the actual distance (range) to the eye. + */ +typedef enum { + SVGA3D_FOGBASE_INVALID = 0, + SVGA3D_FOGBASE_DEPTHBASED = 1, + SVGA3D_FOGBASE_RANGEBASED = 2, + SVGA3D_FOGBASE_MAX = 3 +} SVGA3dFogBase; + +typedef enum { + SVGA3D_STENCILOP_INVALID = 0, + SVGA3D_STENCILOP_KEEP = 1, + SVGA3D_STENCILOP_ZERO = 2, + SVGA3D_STENCILOP_REPLACE = 3, + SVGA3D_STENCILOP_INCRSAT = 4, + SVGA3D_STENCILOP_DECRSAT = 5, + SVGA3D_STENCILOP_INVERT = 6, + SVGA3D_STENCILOP_INCR = 7, + SVGA3D_STENCILOP_DECR = 8, + SVGA3D_STENCILOP_MAX +} SVGA3dStencilOp; + +typedef enum { + SVGA3D_CLIPPLANE_0 = (1 << 0), + SVGA3D_CLIPPLANE_1 = (1 << 1), + SVGA3D_CLIPPLANE_2 = (1 << 2), + SVGA3D_CLIPPLANE_3 = (1 << 3), + SVGA3D_CLIPPLANE_4 = (1 << 4), + SVGA3D_CLIPPLANE_5 = (1 << 5), +} SVGA3dClipPlanes; + +typedef enum { + SVGA3D_CLEAR_COLOR = 0x1, + SVGA3D_CLEAR_DEPTH = 0x2, + SVGA3D_CLEAR_STENCIL = 0x4 +} SVGA3dClearFlag; + +typedef enum { + SVGA3D_RT_DEPTH = 0, + SVGA3D_RT_STENCIL = 1, + SVGA3D_RT_COLOR0 = 2, + SVGA3D_RT_COLOR1 = 3, + SVGA3D_RT_COLOR2 = 4, + SVGA3D_RT_COLOR3 = 5, + SVGA3D_RT_COLOR4 = 6, + SVGA3D_RT_COLOR5 = 7, + SVGA3D_RT_COLOR6 = 8, + SVGA3D_RT_COLOR7 = 9, + SVGA3D_RT_MAX, + SVGA3D_RT_INVALID = ((uint32)-1), +} SVGA3dRenderTargetType; + +#define SVGA3D_MAX_RT_COLOR (SVGA3D_RT_COLOR7 - SVGA3D_RT_COLOR0 + 1) + +typedef +union { + struct { + uint32 red : 1; + uint32 green : 1; + uint32 blue : 1; + uint32 alpha : 1; + }; + uint32 uintValue; +} SVGA3dColorMask; + +typedef enum { + SVGA3D_VBLEND_DISABLE = 0, + SVGA3D_VBLEND_1WEIGHT = 1, + SVGA3D_VBLEND_2WEIGHT = 2, + SVGA3D_VBLEND_3WEIGHT = 3, +} SVGA3dVertexBlendFlags; + +typedef enum { + SVGA3D_WRAPCOORD_0 = 1 << 0, + SVGA3D_WRAPCOORD_1 = 1 << 1, + SVGA3D_WRAPCOORD_2 = 1 << 2, + SVGA3D_WRAPCOORD_3 = 1 << 3, + SVGA3D_WRAPCOORD_ALL = 0xF, +} SVGA3dWrapFlags; + +/* + * SVGA_3D_CMD_TEXTURESTATE Types. All value types + * must fit in a uint32. + */ + +typedef enum { + SVGA3D_TS_INVALID = 0, + SVGA3D_TS_BIND_TEXTURE = 1, /* SVGA3dSurfaceId */ + SVGA3D_TS_COLOROP = 2, /* SVGA3dTextureCombiner */ + SVGA3D_TS_COLORARG1 = 3, /* SVGA3dTextureArgData */ + SVGA3D_TS_COLORARG2 = 4, /* SVGA3dTextureArgData */ + SVGA3D_TS_ALPHAOP = 5, /* SVGA3dTextureCombiner */ + SVGA3D_TS_ALPHAARG1 = 6, /* SVGA3dTextureArgData */ + SVGA3D_TS_ALPHAARG2 = 7, /* SVGA3dTextureArgData */ + SVGA3D_TS_ADDRESSU = 8, /* SVGA3dTextureAddress */ + SVGA3D_TS_ADDRESSV = 9, /* SVGA3dTextureAddress */ + SVGA3D_TS_MIPFILTER = 10, /* SVGA3dTextureFilter */ + SVGA3D_TS_MAGFILTER = 11, /* SVGA3dTextureFilter */ + SVGA3D_TS_MINFILTER = 12, /* SVGA3dTextureFilter */ + SVGA3D_TS_BORDERCOLOR = 13, /* SVGA3dColor */ + SVGA3D_TS_TEXCOORDINDEX = 14, /* uint32 */ + SVGA3D_TS_TEXTURETRANSFORMFLAGS = 15, /* SVGA3dTexTransformFlags */ + SVGA3D_TS_TEXCOORDGEN = 16, /* SVGA3dTextureCoordGen */ + SVGA3D_TS_BUMPENVMAT00 = 17, /* float */ + SVGA3D_TS_BUMPENVMAT01 = 18, /* float */ + SVGA3D_TS_BUMPENVMAT10 = 19, /* float */ + SVGA3D_TS_BUMPENVMAT11 = 20, /* float */ + SVGA3D_TS_TEXTURE_MIPMAP_LEVEL = 21, /* uint32 */ + SVGA3D_TS_TEXTURE_LOD_BIAS = 22, /* float */ + SVGA3D_TS_TEXTURE_ANISOTROPIC_LEVEL = 23, /* uint32 */ + SVGA3D_TS_ADDRESSW = 24, /* SVGA3dTextureAddress */ + + + /* + * Sampler Gamma Level + * + * Sampler gamma effects the color of samples taken from the sampler. A + * value of 1.0 will produce linear samples. If the value is <= 0.0 the + * gamma value is ignored and a linear space is used. + */ + + SVGA3D_TS_GAMMA = 25, /* float */ + SVGA3D_TS_BUMPENVLSCALE = 26, /* float */ + SVGA3D_TS_BUMPENVLOFFSET = 27, /* float */ + SVGA3D_TS_COLORARG0 = 28, /* SVGA3dTextureArgData */ + SVGA3D_TS_ALPHAARG0 = 29, /* SVGA3dTextureArgData */ + SVGA3D_TS_MAX +} SVGA3dTextureStateName; + +typedef enum { + SVGA3D_TC_INVALID = 0, + SVGA3D_TC_DISABLE = 1, + SVGA3D_TC_SELECTARG1 = 2, + SVGA3D_TC_SELECTARG2 = 3, + SVGA3D_TC_MODULATE = 4, + SVGA3D_TC_ADD = 5, + SVGA3D_TC_ADDSIGNED = 6, + SVGA3D_TC_SUBTRACT = 7, + SVGA3D_TC_BLENDTEXTUREALPHA = 8, + SVGA3D_TC_BLENDDIFFUSEALPHA = 9, + SVGA3D_TC_BLENDCURRENTALPHA = 10, + SVGA3D_TC_BLENDFACTORALPHA = 11, + SVGA3D_TC_MODULATE2X = 12, + SVGA3D_TC_MODULATE4X = 13, + SVGA3D_TC_DSDT = 14, + SVGA3D_TC_DOTPRODUCT3 = 15, + SVGA3D_TC_BLENDTEXTUREALPHAPM = 16, + SVGA3D_TC_ADDSIGNED2X = 17, + SVGA3D_TC_ADDSMOOTH = 18, + SVGA3D_TC_PREMODULATE = 19, + SVGA3D_TC_MODULATEALPHA_ADDCOLOR = 20, + SVGA3D_TC_MODULATECOLOR_ADDALPHA = 21, + SVGA3D_TC_MODULATEINVALPHA_ADDCOLOR = 22, + SVGA3D_TC_MODULATEINVCOLOR_ADDALPHA = 23, + SVGA3D_TC_BUMPENVMAPLUMINANCE = 24, + SVGA3D_TC_MULTIPLYADD = 25, + SVGA3D_TC_LERP = 26, + SVGA3D_TC_MAX +} SVGA3dTextureCombiner; + +#define SVGA3D_TC_CAP_BIT(svga3d_tc_op) (svga3d_tc_op ? (1 << (svga3d_tc_op - 1)) : 0) + +typedef enum { + SVGA3D_TEX_ADDRESS_INVALID = 0, + SVGA3D_TEX_ADDRESS_WRAP = 1, + SVGA3D_TEX_ADDRESS_MIRROR = 2, + SVGA3D_TEX_ADDRESS_CLAMP = 3, + SVGA3D_TEX_ADDRESS_BORDER = 4, + SVGA3D_TEX_ADDRESS_MIRRORONCE = 5, + SVGA3D_TEX_ADDRESS_EDGE = 6, + SVGA3D_TEX_ADDRESS_MAX +} SVGA3dTextureAddress; + +/* + * SVGA3D_TEX_FILTER_NONE as the minification filter means mipmapping is + * disabled, and the rasterizer should use the magnification filter instead. + */ +typedef enum { + SVGA3D_TEX_FILTER_NONE = 0, + SVGA3D_TEX_FILTER_NEAREST = 1, + SVGA3D_TEX_FILTER_LINEAR = 2, + SVGA3D_TEX_FILTER_ANISOTROPIC = 3, + SVGA3D_TEX_FILTER_FLATCUBIC = 4, // Deprecated, not implemented + SVGA3D_TEX_FILTER_GAUSSIANCUBIC = 5, // Deprecated, not implemented + SVGA3D_TEX_FILTER_PYRAMIDALQUAD = 6, // Not currently implemented + SVGA3D_TEX_FILTER_GAUSSIANQUAD = 7, // Not currently implemented + SVGA3D_TEX_FILTER_MAX +} SVGA3dTextureFilter; + +typedef enum { + SVGA3D_TEX_TRANSFORM_OFF = 0, + SVGA3D_TEX_TRANSFORM_S = (1 << 0), + SVGA3D_TEX_TRANSFORM_T = (1 << 1), + SVGA3D_TEX_TRANSFORM_R = (1 << 2), + SVGA3D_TEX_TRANSFORM_Q = (1 << 3), + SVGA3D_TEX_PROJECTED = (1 << 15), +} SVGA3dTexTransformFlags; + +typedef enum { + SVGA3D_TEXCOORD_GEN_OFF = 0, + SVGA3D_TEXCOORD_GEN_EYE_POSITION = 1, + SVGA3D_TEXCOORD_GEN_EYE_NORMAL = 2, + SVGA3D_TEXCOORD_GEN_REFLECTIONVECTOR = 3, + SVGA3D_TEXCOORD_GEN_SPHERE = 4, + SVGA3D_TEXCOORD_GEN_MAX +} SVGA3dTextureCoordGen; + +/* + * Texture argument constants for texture combiner + */ +typedef enum { + SVGA3D_TA_INVALID = 0, + SVGA3D_TA_CONSTANT = 1, + SVGA3D_TA_PREVIOUS = 2, + SVGA3D_TA_DIFFUSE = 3, + SVGA3D_TA_TEXTURE = 4, + SVGA3D_TA_SPECULAR = 5, + SVGA3D_TA_MAX +} SVGA3dTextureArgData; + +#define SVGA3D_TM_MASK_LEN 4 + +/* Modifiers for texture argument constants defined above. */ +typedef enum { + SVGA3D_TM_NONE = 0, + SVGA3D_TM_ALPHA = (1 << SVGA3D_TM_MASK_LEN), + SVGA3D_TM_ONE_MINUS = (2 << SVGA3D_TM_MASK_LEN), +} SVGA3dTextureArgModifier; + +#define SVGA3D_INVALID_ID ((uint32)-1) +#define SVGA3D_MAX_CLIP_PLANES 6 + +/* + * This is the limit to the number of fixed-function texture + * transforms and texture coordinates we can support. It does *not* + * correspond to the number of texture image units (samplers) we + * support! + */ +#define SVGA3D_MAX_TEXTURE_COORDS 8 + +/* + * Vertex declarations + * + * Notes: + * + * SVGA3D_DECLUSAGE_POSITIONT is for pre-transformed vertices. If you + * draw with any POSITIONT vertex arrays, the programmable vertex + * pipeline will be implicitly disabled. Drawing will take place as if + * no vertex shader was bound. + */ + +typedef enum { + SVGA3D_DECLUSAGE_POSITION = 0, + SVGA3D_DECLUSAGE_BLENDWEIGHT, // 1 + SVGA3D_DECLUSAGE_BLENDINDICES, // 2 + SVGA3D_DECLUSAGE_NORMAL, // 3 + SVGA3D_DECLUSAGE_PSIZE, // 4 + SVGA3D_DECLUSAGE_TEXCOORD, // 5 + SVGA3D_DECLUSAGE_TANGENT, // 6 + SVGA3D_DECLUSAGE_BINORMAL, // 7 + SVGA3D_DECLUSAGE_TESSFACTOR, // 8 + SVGA3D_DECLUSAGE_POSITIONT, // 9 + SVGA3D_DECLUSAGE_COLOR, // 10 + SVGA3D_DECLUSAGE_FOG, // 11 + SVGA3D_DECLUSAGE_DEPTH, // 12 + SVGA3D_DECLUSAGE_SAMPLE, // 13 + SVGA3D_DECLUSAGE_MAX +} SVGA3dDeclUsage; + +typedef enum { + SVGA3D_DECLMETHOD_DEFAULT = 0, + SVGA3D_DECLMETHOD_PARTIALU, + SVGA3D_DECLMETHOD_PARTIALV, + SVGA3D_DECLMETHOD_CROSSUV, // Normal + SVGA3D_DECLMETHOD_UV, + SVGA3D_DECLMETHOD_LOOKUP, // Lookup a displacement map + SVGA3D_DECLMETHOD_LOOKUPPRESAMPLED, // Lookup a pre-sampled displacement map +} SVGA3dDeclMethod; + +typedef enum { + SVGA3D_DECLTYPE_FLOAT1 = 0, + SVGA3D_DECLTYPE_FLOAT2 = 1, + SVGA3D_DECLTYPE_FLOAT3 = 2, + SVGA3D_DECLTYPE_FLOAT4 = 3, + SVGA3D_DECLTYPE_D3DCOLOR = 4, + SVGA3D_DECLTYPE_UBYTE4 = 5, + SVGA3D_DECLTYPE_SHORT2 = 6, + SVGA3D_DECLTYPE_SHORT4 = 7, + SVGA3D_DECLTYPE_UBYTE4N = 8, + SVGA3D_DECLTYPE_SHORT2N = 9, + SVGA3D_DECLTYPE_SHORT4N = 10, + SVGA3D_DECLTYPE_USHORT2N = 11, + SVGA3D_DECLTYPE_USHORT4N = 12, + SVGA3D_DECLTYPE_UDEC3 = 13, + SVGA3D_DECLTYPE_DEC3N = 14, + SVGA3D_DECLTYPE_FLOAT16_2 = 15, + SVGA3D_DECLTYPE_FLOAT16_4 = 16, + SVGA3D_DECLTYPE_MAX, +} SVGA3dDeclType; + +/* + * This structure is used for the divisor for geometry instancing; + * it's a direct translation of the Direct3D equivalent. + */ +typedef union { + struct { + /* + * For index data, this number represents the number of instances to draw. + * For instance data, this number represents the number of + * instances/vertex in this stream + */ + uint32 count : 30; + + /* + * This is 1 if this is supposed to be the data that is repeated for + * every instance. + */ + uint32 indexedData : 1; + + /* + * This is 1 if this is supposed to be the per-instance data. + */ + uint32 instanceData : 1; + }; + + uint32 value; +} SVGA3dVertexDivisor; + +typedef enum { + SVGA3D_PRIMITIVE_INVALID = 0, + SVGA3D_PRIMITIVE_TRIANGLELIST = 1, + SVGA3D_PRIMITIVE_POINTLIST = 2, + SVGA3D_PRIMITIVE_LINELIST = 3, + SVGA3D_PRIMITIVE_LINESTRIP = 4, + SVGA3D_PRIMITIVE_TRIANGLESTRIP = 5, + SVGA3D_PRIMITIVE_TRIANGLEFAN = 6, + SVGA3D_PRIMITIVE_MAX +} SVGA3dPrimitiveType; + +typedef enum { + SVGA3D_COORDINATE_INVALID = 0, + SVGA3D_COORDINATE_LEFTHANDED = 1, + SVGA3D_COORDINATE_RIGHTHANDED = 2, + SVGA3D_COORDINATE_MAX +} SVGA3dCoordinateType; + +typedef enum { + SVGA3D_TRANSFORM_INVALID = 0, + SVGA3D_TRANSFORM_WORLD = 1, + SVGA3D_TRANSFORM_VIEW = 2, + SVGA3D_TRANSFORM_PROJECTION = 3, + SVGA3D_TRANSFORM_TEXTURE0 = 4, + SVGA3D_TRANSFORM_TEXTURE1 = 5, + SVGA3D_TRANSFORM_TEXTURE2 = 6, + SVGA3D_TRANSFORM_TEXTURE3 = 7, + SVGA3D_TRANSFORM_TEXTURE4 = 8, + SVGA3D_TRANSFORM_TEXTURE5 = 9, + SVGA3D_TRANSFORM_TEXTURE6 = 10, + SVGA3D_TRANSFORM_TEXTURE7 = 11, + SVGA3D_TRANSFORM_WORLD1 = 12, + SVGA3D_TRANSFORM_WORLD2 = 13, + SVGA3D_TRANSFORM_WORLD3 = 14, + SVGA3D_TRANSFORM_MAX +} SVGA3dTransformType; + +typedef enum { + SVGA3D_LIGHTTYPE_INVALID = 0, + SVGA3D_LIGHTTYPE_POINT = 1, + SVGA3D_LIGHTTYPE_SPOT1 = 2, /* 1-cone, in degrees */ + SVGA3D_LIGHTTYPE_SPOT2 = 3, /* 2-cone, in radians */ + SVGA3D_LIGHTTYPE_DIRECTIONAL = 4, + SVGA3D_LIGHTTYPE_MAX +} SVGA3dLightType; + +typedef enum { + SVGA3D_CUBEFACE_POSX = 0, + SVGA3D_CUBEFACE_NEGX = 1, + SVGA3D_CUBEFACE_POSY = 2, + SVGA3D_CUBEFACE_NEGY = 3, + SVGA3D_CUBEFACE_POSZ = 4, + SVGA3D_CUBEFACE_NEGZ = 5, +} SVGA3dCubeFace; + +typedef enum { + SVGA3D_SHADERTYPE_COMPILED_DX8 = 0, + SVGA3D_SHADERTYPE_VS = 1, + SVGA3D_SHADERTYPE_PS = 2, + SVGA3D_SHADERTYPE_MAX +} SVGA3dShaderType; + +typedef enum { + SVGA3D_CONST_TYPE_FLOAT = 0, + SVGA3D_CONST_TYPE_INT = 1, + SVGA3D_CONST_TYPE_BOOL = 2, +} SVGA3dShaderConstType; + +#define SVGA3D_MAX_SURFACE_FACES 6 + +typedef enum { + SVGA3D_STRETCH_BLT_POINT = 0, + SVGA3D_STRETCH_BLT_LINEAR = 1, + SVGA3D_STRETCH_BLT_MAX +} SVGA3dStretchBltMode; + +typedef enum { + SVGA3D_QUERYTYPE_OCCLUSION = 0, + SVGA3D_QUERYTYPE_MAX +} SVGA3dQueryType; + +typedef enum { + SVGA3D_QUERYSTATE_PENDING = 0, /* Waiting on the host (set by guest) */ + SVGA3D_QUERYSTATE_SUCCEEDED = 1, /* Completed successfully (set by host) */ + SVGA3D_QUERYSTATE_FAILED = 2, /* Completed unsuccessfully (set by host) */ + SVGA3D_QUERYSTATE_NEW = 3, /* Never submitted (For guest use only) */ +} SVGA3dQueryState; + +typedef enum { + SVGA3D_WRITE_HOST_VRAM = 1, + SVGA3D_READ_HOST_VRAM = 2, +} SVGA3dTransferType; + +/* + * The maximum number vertex arrays we're guaranteed to support in + * SVGA_3D_CMD_DRAWPRIMITIVES. + */ +#define SVGA3D_MAX_VERTEX_ARRAYS 32 + +/* + * Identifiers for commands in the command FIFO. + * + * IDs between 1000 and 1039 (inclusive) were used by obsolete versions of + * the SVGA3D protocol and remain reserved; they should not be used in the + * future. + * + * IDs between 1040 and 1999 (inclusive) are available for use by the + * current SVGA3D protocol. + * + * FIFO clients other than SVGA3D should stay below 1000, or at 2000 + * and up. + */ + +#define SVGA_3D_CMD_LEGACY_BASE 1000 +#define SVGA_3D_CMD_BASE 1040 + +#define SVGA_3D_CMD_SURFACE_DEFINE SVGA_3D_CMD_BASE + 0 +#define SVGA_3D_CMD_SURFACE_DESTROY SVGA_3D_CMD_BASE + 1 +#define SVGA_3D_CMD_SURFACE_COPY SVGA_3D_CMD_BASE + 2 +#define SVGA_3D_CMD_SURFACE_STRETCHBLT SVGA_3D_CMD_BASE + 3 +#define SVGA_3D_CMD_SURFACE_DMA SVGA_3D_CMD_BASE + 4 +#define SVGA_3D_CMD_CONTEXT_DEFINE SVGA_3D_CMD_BASE + 5 +#define SVGA_3D_CMD_CONTEXT_DESTROY SVGA_3D_CMD_BASE + 6 +#define SVGA_3D_CMD_SETTRANSFORM SVGA_3D_CMD_BASE + 7 +#define SVGA_3D_CMD_SETZRANGE SVGA_3D_CMD_BASE + 8 +#define SVGA_3D_CMD_SETRENDERSTATE SVGA_3D_CMD_BASE + 9 +#define SVGA_3D_CMD_SETRENDERTARGET SVGA_3D_CMD_BASE + 10 +#define SVGA_3D_CMD_SETTEXTURESTATE SVGA_3D_CMD_BASE + 11 +#define SVGA_3D_CMD_SETMATERIAL SVGA_3D_CMD_BASE + 12 +#define SVGA_3D_CMD_SETLIGHTDATA SVGA_3D_CMD_BASE + 13 +#define SVGA_3D_CMD_SETLIGHTENABLED SVGA_3D_CMD_BASE + 14 +#define SVGA_3D_CMD_SETVIEWPORT SVGA_3D_CMD_BASE + 15 +#define SVGA_3D_CMD_SETCLIPPLANE SVGA_3D_CMD_BASE + 16 +#define SVGA_3D_CMD_CLEAR SVGA_3D_CMD_BASE + 17 +#define SVGA_3D_CMD_PRESENT SVGA_3D_CMD_BASE + 18 // Deprecated +#define SVGA_3D_CMD_SHADER_DEFINE SVGA_3D_CMD_BASE + 19 +#define SVGA_3D_CMD_SHADER_DESTROY SVGA_3D_CMD_BASE + 20 +#define SVGA_3D_CMD_SET_SHADER SVGA_3D_CMD_BASE + 21 +#define SVGA_3D_CMD_SET_SHADER_CONST SVGA_3D_CMD_BASE + 22 +#define SVGA_3D_CMD_DRAW_PRIMITIVES SVGA_3D_CMD_BASE + 23 +#define SVGA_3D_CMD_SETSCISSORRECT SVGA_3D_CMD_BASE + 24 +#define SVGA_3D_CMD_BEGIN_QUERY SVGA_3D_CMD_BASE + 25 +#define SVGA_3D_CMD_END_QUERY SVGA_3D_CMD_BASE + 26 +#define SVGA_3D_CMD_WAIT_FOR_QUERY SVGA_3D_CMD_BASE + 27 +#define SVGA_3D_CMD_PRESENT_READBACK SVGA_3D_CMD_BASE + 28 // Deprecated +#define SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN SVGA_3D_CMD_BASE + 29 +#define SVGA_3D_CMD_MAX SVGA_3D_CMD_BASE + 30 + +#define SVGA_3D_CMD_FUTURE_MAX 2000 + +/* + * Common substructures used in multiple FIFO commands: + */ + +typedef struct { + union { + struct { + uint16 function; // SVGA3dFogFunction + uint8 type; // SVGA3dFogType + uint8 base; // SVGA3dFogBase + }; + uint32 uintValue; + }; +} SVGA3dFogMode; + +/* + * Uniquely identify one image (a 1D/2D/3D array) from a surface. This + * is a surface ID as well as face/mipmap indices. + */ + +typedef +struct SVGA3dSurfaceImageId { + uint32 sid; + uint32 face; + uint32 mipmap; +} SVGA3dSurfaceImageId; + +typedef +struct SVGA3dGuestImage { + SVGAGuestPtr ptr; + + /* + * A note on interpretation of pitch: This value of pitch is the + * number of bytes between vertically adjacent image + * blocks. Normally this is the number of bytes between the first + * pixel of two adjacent scanlines. With compressed textures, + * however, this may represent the number of bytes between + * compression blocks rather than between rows of pixels. + * + * XXX: Compressed textures currently must be tightly packed in guest memory. + * + * If the image is 1-dimensional, pitch is ignored. + * + * If 'pitch' is zero, the SVGA3D device calculates a pitch value + * assuming each row of blocks is tightly packed. + */ + uint32 pitch; +} SVGA3dGuestImage; + + +/* + * FIFO command format definitions: + */ + +/* + * The data size header following cmdNum for every 3d command + */ +typedef +struct { + uint32 id; + uint32 size; +} SVGA3dCmdHeader; + +/* + * A surface is a hierarchy of host VRAM surfaces: 1D, 2D, or 3D, with + * optional mipmaps and cube faces. + */ + +typedef +struct { + uint32 width; + uint32 height; + uint32 depth; +} SVGA3dSize; + +typedef enum { + SVGA3D_SURFACE_CUBEMAP = (1 << 0), + SVGA3D_SURFACE_HINT_STATIC = (1 << 1), + SVGA3D_SURFACE_HINT_DYNAMIC = (1 << 2), + SVGA3D_SURFACE_HINT_INDEXBUFFER = (1 << 3), + SVGA3D_SURFACE_HINT_VERTEXBUFFER = (1 << 4), + SVGA3D_SURFACE_HINT_TEXTURE = (1 << 5), + SVGA3D_SURFACE_HINT_RENDERTARGET = (1 << 6), + SVGA3D_SURFACE_HINT_DEPTHSTENCIL = (1 << 7), + SVGA3D_SURFACE_HINT_WRITEONLY = (1 << 8), +} SVGA3dSurfaceFlags; + +typedef +struct { + uint32 numMipLevels; +} SVGA3dSurfaceFace; + +typedef +struct { + uint32 sid; + SVGA3dSurfaceFlags surfaceFlags; + SVGA3dSurfaceFormat format; + SVGA3dSurfaceFace face[SVGA3D_MAX_SURFACE_FACES]; + /* + * Followed by an SVGA3dSize structure for each mip level in each face. + * + * A note on surface sizes: Sizes are always specified in pixels, + * even if the true surface size is not a multiple of the minimum + * block size of the surface's format. For example, a 3x3x1 DXT1 + * compressed texture would actually be stored as a 4x4x1 image in + * memory. + */ +} SVGA3dCmdDefineSurface; /* SVGA_3D_CMD_SURFACE_DEFINE */ + +typedef +struct { + uint32 sid; +} SVGA3dCmdDestroySurface; /* SVGA_3D_CMD_SURFACE_DESTROY */ + +typedef +struct { + uint32 cid; +} SVGA3dCmdDefineContext; /* SVGA_3D_CMD_CONTEXT_DEFINE */ + +typedef +struct { + uint32 cid; +} SVGA3dCmdDestroyContext; /* SVGA_3D_CMD_CONTEXT_DESTROY */ + +typedef +struct { + uint32 cid; + SVGA3dClearFlag clearFlag; + uint32 color; + float depth; + uint32 stencil; + /* Followed by variable number of SVGA3dRect structures */ +} SVGA3dCmdClear; /* SVGA_3D_CMD_CLEAR */ + +typedef +struct SVGA3dCopyRect { + uint32 x; + uint32 y; + uint32 w; + uint32 h; + uint32 srcx; + uint32 srcy; +} SVGA3dCopyRect; + +typedef +struct SVGA3dCopyBox { + uint32 x; + uint32 y; + uint32 z; + uint32 w; + uint32 h; + uint32 d; + uint32 srcx; + uint32 srcy; + uint32 srcz; +} SVGA3dCopyBox; + +typedef +struct { + uint32 x; + uint32 y; + uint32 w; + uint32 h; +} SVGA3dRect; + +typedef +struct { + uint32 x; + uint32 y; + uint32 z; + uint32 w; + uint32 h; + uint32 d; +} SVGA3dBox; + +typedef +struct { + uint32 x; + uint32 y; + uint32 z; +} SVGA3dPoint; + +typedef +struct { + SVGA3dLightType type; + SVGA3dBool inWorldSpace; + float diffuse[4]; + float specular[4]; + float ambient[4]; + float position[4]; + float direction[4]; + float range; + float falloff; + float attenuation0; + float attenuation1; + float attenuation2; + float theta; + float phi; +} SVGA3dLightData; + +typedef +struct { + uint32 sid; + /* Followed by variable number of SVGA3dCopyRect structures */ +} SVGA3dCmdPresent; /* SVGA_3D_CMD_PRESENT */ + +typedef +struct { + SVGA3dRenderStateName state; + union { + uint32 uintValue; + float floatValue; + }; +} SVGA3dRenderState; + +typedef +struct { + uint32 cid; + /* Followed by variable number of SVGA3dRenderState structures */ +} SVGA3dCmdSetRenderState; /* SVGA_3D_CMD_SETRENDERSTATE */ + +typedef +struct { + uint32 cid; + SVGA3dRenderTargetType type; + SVGA3dSurfaceImageId target; +} SVGA3dCmdSetRenderTarget; /* SVGA_3D_CMD_SETRENDERTARGET */ + +typedef +struct { + SVGA3dSurfaceImageId src; + SVGA3dSurfaceImageId dest; + /* Followed by variable number of SVGA3dCopyBox structures */ +} SVGA3dCmdSurfaceCopy; /* SVGA_3D_CMD_SURFACE_COPY */ + +typedef +struct { + SVGA3dSurfaceImageId src; + SVGA3dSurfaceImageId dest; + SVGA3dBox boxSrc; + SVGA3dBox boxDest; + SVGA3dStretchBltMode mode; +} SVGA3dCmdSurfaceStretchBlt; /* SVGA_3D_CMD_SURFACE_STRETCHBLT */ + +typedef +struct { + /* + * If the discard flag is present in a surface DMA operation, the host may + * discard the contents of the current mipmap level and face of the target + * surface before applying the surface DMA contents. + */ + uint32 discard : 1; + + /* + * If the unsynchronized flag is present, the host may perform this upload + * without syncing to pending reads on this surface. + */ + uint32 unsynchronized : 1; + + /* + * Guests *MUST* set the reserved bits to 0 before submitting the command + * suffix as future flags may occupy these bits. + */ + uint32 reserved : 30; +} SVGA3dSurfaceDMAFlags; + +typedef +struct { + SVGA3dGuestImage guest; + SVGA3dSurfaceImageId host; + SVGA3dTransferType transfer; + /* + * Followed by variable number of SVGA3dCopyBox structures. For consistency + * in all clipping logic and coordinate translation, we define the + * "source" in each copyBox as the guest image and the + * "destination" as the host image, regardless of transfer + * direction. + * + * For efficiency, the SVGA3D device is free to copy more data than + * specified. For example, it may round copy boxes outwards such + * that they lie on particular alignment boundaries. + */ +} SVGA3dCmdSurfaceDMA; /* SVGA_3D_CMD_SURFACE_DMA */ + +/* + * SVGA3dCmdSurfaceDMASuffix -- + * + * This is a command suffix that will appear after a SurfaceDMA command in + * the FIFO. It contains some extra information that hosts may use to + * optimize performance or protect the guest. This suffix exists to preserve + * backwards compatibility while also allowing for new functionality to be + * implemented. + */ + +typedef +struct { + uint32 suffixSize; + + /* + * The maximum offset is used to determine the maximum offset from the + * guestPtr base address that will be accessed or written to during this + * surfaceDMA. If the suffix is supported, the host will respect this + * boundary while performing surface DMAs. + * + * Defaults to MAX_UINT32 + */ + uint32 maximumOffset; + + /* + * A set of flags that describes optimizations that the host may perform + * while performing this surface DMA operation. The guest should never rely + * on behaviour that is different when these flags are set for correctness. + * + * Defaults to 0 + */ + SVGA3dSurfaceDMAFlags flags; +} SVGA3dCmdSurfaceDMASuffix; + +/* + * SVGA_3D_CMD_DRAW_PRIMITIVES -- + * + * This command is the SVGA3D device's generic drawing entry point. + * It can draw multiple ranges of primitives, optionally using an + * index buffer, using an arbitrary collection of vertex buffers. + * + * Each SVGA3dVertexDecl defines a distinct vertex array to bind + * during this draw call. The declarations specify which surface + * the vertex data lives in, what that vertex data is used for, + * and how to interpret it. + * + * Each SVGA3dPrimitiveRange defines a collection of primitives + * to render using the same vertex arrays. An index buffer is + * optional. + */ + +typedef +struct { + /* + * A range hint is an optional specification for the range of indices + * in an SVGA3dArray that will be used. If 'last' is zero, it is assumed + * that the entire array will be used. + * + * These are only hints. The SVGA3D device may use them for + * performance optimization if possible, but it's also allowed to + * ignore these values. + */ + uint32 first; + uint32 last; +} SVGA3dArrayRangeHint; + +typedef +struct { + /* + * Define the origin and shape of a vertex or index array. Both + * 'offset' and 'stride' are in bytes. The provided surface will be + * reinterpreted as a flat array of bytes in the same format used + * by surface DMA operations. To avoid unnecessary conversions, the + * surface should be created with the SVGA3D_BUFFER format. + * + * Index 0 in the array starts 'offset' bytes into the surface. + * Index 1 begins at byte 'offset + stride', etc. Array indices may + * not be negative. + */ + uint32 surfaceId; + uint32 offset; + uint32 stride; +} SVGA3dArray; + +typedef +struct { + /* + * Describe a vertex array's data type, and define how it is to be + * used by the fixed function pipeline or the vertex shader. It + * isn't useful to have two VertexDecls with the same + * VertexArrayIdentity in one draw call. + */ + SVGA3dDeclType type; + SVGA3dDeclMethod method; + SVGA3dDeclUsage usage; + uint32 usageIndex; +} SVGA3dVertexArrayIdentity; + +typedef +struct { + SVGA3dVertexArrayIdentity identity; + SVGA3dArray array; + SVGA3dArrayRangeHint rangeHint; +} SVGA3dVertexDecl; + +typedef +struct { + /* + * Define a group of primitives to render, from sequential indices. + * + * The value of 'primitiveType' and 'primitiveCount' imply the + * total number of vertices that will be rendered. + */ + SVGA3dPrimitiveType primType; + uint32 primitiveCount; + + /* + * Optional index buffer. If indexArray.surfaceId is + * SVGA3D_INVALID_ID, we render without an index buffer. Rendering + * without an index buffer is identical to rendering with an index + * buffer containing the sequence [0, 1, 2, 3, ...]. + * + * If an index buffer is in use, indexWidth specifies the width in + * bytes of each index value. It must be less than or equal to + * indexArray.stride. + * + * (Currently, the SVGA3D device requires index buffers to be tightly + * packed. In other words, indexWidth == indexArray.stride) + */ + SVGA3dArray indexArray; + uint32 indexWidth; + + /* + * Optional index bias. This number is added to all indices from + * indexArray before they are used as vertex array indices. This + * can be used in multiple ways: + * + * - When not using an indexArray, this bias can be used to + * specify where in the vertex arrays to begin rendering. + * + * - A positive number here is equivalent to increasing the + * offset in each vertex array. + * + * - A negative number can be used to render using a small + * vertex array and an index buffer that contains large + * values. This may be used by some applications that + * crop a vertex buffer without modifying their index + * buffer. + * + * Note that rendering with a negative bias value may be slower and + * use more memory than rendering with a positive or zero bias. + */ + int32 indexBias; +} SVGA3dPrimitiveRange; + +typedef +struct { + uint32 cid; + uint32 numVertexDecls; + uint32 numRanges; + + /* + * There are two variable size arrays after the + * SVGA3dCmdDrawPrimitives structure. In order, + * they are: + * + * 1. SVGA3dVertexDecl, quantity 'numVertexDecls' + * 2. SVGA3dPrimitiveRange, quantity 'numRanges' + * 3. Optionally, SVGA3dVertexDivisor, quantity 'numVertexDecls' (contains + * the frequency divisor for this the corresponding vertex decl) + */ +} SVGA3dCmdDrawPrimitives; /* SVGA_3D_CMD_DRAWPRIMITIVES */ + +typedef +struct { + uint32 stage; + SVGA3dTextureStateName name; + union { + uint32 value; + float floatValue; + }; +} SVGA3dTextureState; + +typedef +struct { + uint32 cid; + /* Followed by variable number of SVGA3dTextureState structures */ +} SVGA3dCmdSetTextureState; /* SVGA_3D_CMD_SETTEXTURESTATE */ + +typedef +struct { + uint32 cid; + SVGA3dTransformType type; + float matrix[16]; +} SVGA3dCmdSetTransform; /* SVGA_3D_CMD_SETTRANSFORM */ + +typedef +struct { + float min; + float max; +} SVGA3dZRange; + +typedef +struct { + uint32 cid; + SVGA3dZRange zRange; +} SVGA3dCmdSetZRange; /* SVGA_3D_CMD_SETZRANGE */ + +typedef +struct { + float diffuse[4]; + float ambient[4]; + float specular[4]; + float emissive[4]; + float shininess; +} SVGA3dMaterial; + +typedef +struct { + uint32 cid; + SVGA3dFace face; + SVGA3dMaterial material; +} SVGA3dCmdSetMaterial; /* SVGA_3D_CMD_SETMATERIAL */ + +typedef +struct { + uint32 cid; + uint32 index; + SVGA3dLightData data; +} SVGA3dCmdSetLightData; /* SVGA_3D_CMD_SETLIGHTDATA */ + +typedef +struct { + uint32 cid; + uint32 index; + uint32 enabled; +} SVGA3dCmdSetLightEnabled; /* SVGA_3D_CMD_SETLIGHTENABLED */ + +typedef +struct { + uint32 cid; + SVGA3dRect rect; +} SVGA3dCmdSetViewport; /* SVGA_3D_CMD_SETVIEWPORT */ + +typedef +struct { + uint32 cid; + SVGA3dRect rect; +} SVGA3dCmdSetScissorRect; /* SVGA_3D_CMD_SETSCISSORRECT */ + +typedef +struct { + uint32 cid; + uint32 index; + float plane[4]; +} SVGA3dCmdSetClipPlane; /* SVGA_3D_CMD_SETCLIPPLANE */ + +typedef +struct { + uint32 cid; + uint32 shid; + SVGA3dShaderType type; + /* Followed by variable number of DWORDs for shader bycode */ +} SVGA3dCmdDefineShader; /* SVGA_3D_CMD_SHADER_DEFINE */ + +typedef +struct { + uint32 cid; + uint32 shid; + SVGA3dShaderType type; +} SVGA3dCmdDestroyShader; /* SVGA_3D_CMD_SHADER_DESTROY */ + +typedef +struct { + uint32 cid; + uint32 reg; /* register number */ + SVGA3dShaderType type; + SVGA3dShaderConstType ctype; + uint32 values[4]; +} SVGA3dCmdSetShaderConst; /* SVGA_3D_CMD_SET_SHADER_CONST */ + +typedef +struct { + uint32 cid; + SVGA3dShaderType type; + uint32 shid; +} SVGA3dCmdSetShader; /* SVGA_3D_CMD_SET_SHADER */ + +typedef +struct { + uint32 cid; + SVGA3dQueryType type; +} SVGA3dCmdBeginQuery; /* SVGA_3D_CMD_BEGIN_QUERY */ + +typedef +struct { + uint32 cid; + SVGA3dQueryType type; + SVGAGuestPtr guestResult; /* Points to an SVGA3dQueryResult structure */ +} SVGA3dCmdEndQuery; /* SVGA_3D_CMD_END_QUERY */ + +typedef +struct { + uint32 cid; /* Same parameters passed to END_QUERY */ + SVGA3dQueryType type; + SVGAGuestPtr guestResult; +} SVGA3dCmdWaitForQuery; /* SVGA_3D_CMD_WAIT_FOR_QUERY */ + +typedef +struct { + uint32 totalSize; /* Set by guest before query is ended. */ + SVGA3dQueryState state; /* Set by host or guest. See SVGA3dQueryState. */ + union { /* Set by host on exit from PENDING state */ + uint32 result32; + }; +} SVGA3dQueryResult; + +/* + * SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN -- + * + * This is a blit from an SVGA3D surface to a Screen Object. Just + * like GMR-to-screen blits, this blit may be directed at a + * specific screen or to the virtual coordinate space. + * + * The blit copies from a rectangular region of an SVGA3D surface + * image to a rectangular region of a screen or screens. + * + * This command takes an optional variable-length list of clipping + * rectangles after the body of the command. If no rectangles are + * specified, there is no clipping region. The entire destRect is + * drawn to. If one or more rectangles are included, they describe + * a clipping region. The clip rectangle coordinates are measured + * relative to the top-left corner of destRect. + * + * This clipping region serves multiple purposes: + * + * - It can be used to perform an irregularly shaped blit more + * efficiently than by issuing many separate blit commands. + * + * - It is equivalent to allowing blits with non-integer + * source coordinates. You could blit just one half-pixel + * of a source, for example, by specifying a larger + * destination rectangle than you need, then removing + * part of it using a clip rectangle. + * + * Availability: + * SVGA_FIFO_CAP_SCREEN_OBJECT + * + * Limitations: + * + * - Currently, no backend supports blits from a mipmap or face + * other than the first one. + */ + +typedef +struct { + SVGA3dSurfaceImageId srcImage; + SVGASignedRect srcRect; + uint32 destScreenId; /* Screen ID or SVGA_ID_INVALID for virt. coords */ + SVGASignedRect destRect; /* Supports scaling if src/rest different size */ + /* Clipping: zero or more SVGASignedRects follow */ +} SVGA3dCmdBlitSurfaceToScreen; /* SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN */ + + +/* + * Capability query index. + * + * Notes: + * + * 1. SVGA3D_DEVCAP_MAX_TEXTURES reflects the maximum number of + * fixed-function texture units available. Each of these units + * work in both FFP and Shader modes, and they support texture + * transforms and texture coordinates. The host may have additional + * texture image units that are only usable with shaders. + * + * 2. The BUFFER_FORMAT capabilities are deprecated, and they always + * return TRUE. Even on physical hardware that does not support + * these formats natively, the SVGA3D device will provide an emulation + * which should be invisible to the guest OS. + * + * In general, the SVGA3D device should support any operation on + * any surface format, it just may perform some of these + * operations in software depending on the capabilities of the + * available physical hardware. + * + * XXX: In the future, we will add capabilities that describe in + * detail what formats are supported in hardware for what kinds + * of operations. + */ + +typedef enum { + SVGA3D_DEVCAP_3D = 0, + SVGA3D_DEVCAP_MAX_LIGHTS = 1, + SVGA3D_DEVCAP_MAX_TEXTURES = 2, /* See note (1) */ + SVGA3D_DEVCAP_MAX_CLIP_PLANES = 3, + SVGA3D_DEVCAP_VERTEX_SHADER_VERSION = 4, + SVGA3D_DEVCAP_VERTEX_SHADER = 5, + SVGA3D_DEVCAP_FRAGMENT_SHADER_VERSION = 6, + SVGA3D_DEVCAP_FRAGMENT_SHADER = 7, + SVGA3D_DEVCAP_MAX_RENDER_TARGETS = 8, + SVGA3D_DEVCAP_S23E8_TEXTURES = 9, + SVGA3D_DEVCAP_S10E5_TEXTURES = 10, + SVGA3D_DEVCAP_MAX_FIXED_VERTEXBLEND = 11, + SVGA3D_DEVCAP_D16_BUFFER_FORMAT = 12, /* See note (2) */ + SVGA3D_DEVCAP_D24S8_BUFFER_FORMAT = 13, /* See note (2) */ + SVGA3D_DEVCAP_D24X8_BUFFER_FORMAT = 14, /* See note (2) */ + SVGA3D_DEVCAP_QUERY_TYPES = 15, + SVGA3D_DEVCAP_TEXTURE_GRADIENT_SAMPLING = 16, + SVGA3D_DEVCAP_MAX_POINT_SIZE = 17, + SVGA3D_DEVCAP_MAX_SHADER_TEXTURES = 18, + SVGA3D_DEVCAP_MAX_TEXTURE_WIDTH = 19, + SVGA3D_DEVCAP_MAX_TEXTURE_HEIGHT = 20, + SVGA3D_DEVCAP_MAX_VOLUME_EXTENT = 21, + SVGA3D_DEVCAP_MAX_TEXTURE_REPEAT = 22, + SVGA3D_DEVCAP_MAX_TEXTURE_ASPECT_RATIO = 23, + SVGA3D_DEVCAP_MAX_TEXTURE_ANISOTROPY = 24, + SVGA3D_DEVCAP_MAX_PRIMITIVE_COUNT = 25, + SVGA3D_DEVCAP_MAX_VERTEX_INDEX = 26, + SVGA3D_DEVCAP_MAX_VERTEX_SHADER_INSTRUCTIONS = 27, + SVGA3D_DEVCAP_MAX_FRAGMENT_SHADER_INSTRUCTIONS = 28, + SVGA3D_DEVCAP_MAX_VERTEX_SHADER_TEMPS = 29, + SVGA3D_DEVCAP_MAX_FRAGMENT_SHADER_TEMPS = 30, + SVGA3D_DEVCAP_TEXTURE_OPS = 31, + SVGA3D_DEVCAP_SURFACEFMT_X8R8G8B8 = 32, + SVGA3D_DEVCAP_SURFACEFMT_A8R8G8B8 = 33, + SVGA3D_DEVCAP_SURFACEFMT_A2R10G10B10 = 34, + SVGA3D_DEVCAP_SURFACEFMT_X1R5G5B5 = 35, + SVGA3D_DEVCAP_SURFACEFMT_A1R5G5B5 = 36, + SVGA3D_DEVCAP_SURFACEFMT_A4R4G4B4 = 37, + SVGA3D_DEVCAP_SURFACEFMT_R5G6B5 = 38, + SVGA3D_DEVCAP_SURFACEFMT_LUMINANCE16 = 39, + SVGA3D_DEVCAP_SURFACEFMT_LUMINANCE8_ALPHA8 = 40, + SVGA3D_DEVCAP_SURFACEFMT_ALPHA8 = 41, + SVGA3D_DEVCAP_SURFACEFMT_LUMINANCE8 = 42, + SVGA3D_DEVCAP_SURFACEFMT_Z_D16 = 43, + SVGA3D_DEVCAP_SURFACEFMT_Z_D24S8 = 44, + SVGA3D_DEVCAP_SURFACEFMT_Z_D24X8 = 45, + SVGA3D_DEVCAP_SURFACEFMT_DXT1 = 46, + SVGA3D_DEVCAP_SURFACEFMT_DXT2 = 47, + SVGA3D_DEVCAP_SURFACEFMT_DXT3 = 48, + SVGA3D_DEVCAP_SURFACEFMT_DXT4 = 49, + SVGA3D_DEVCAP_SURFACEFMT_DXT5 = 50, + SVGA3D_DEVCAP_SURFACEFMT_BUMPX8L8V8U8 = 51, + SVGA3D_DEVCAP_SURFACEFMT_A2W10V10U10 = 52, + SVGA3D_DEVCAP_SURFACEFMT_BUMPU8V8 = 53, + SVGA3D_DEVCAP_SURFACEFMT_Q8W8V8U8 = 54, + SVGA3D_DEVCAP_SURFACEFMT_CxV8U8 = 55, + SVGA3D_DEVCAP_SURFACEFMT_R_S10E5 = 56, + SVGA3D_DEVCAP_SURFACEFMT_R_S23E8 = 57, + SVGA3D_DEVCAP_SURFACEFMT_RG_S10E5 = 58, + SVGA3D_DEVCAP_SURFACEFMT_RG_S23E8 = 59, + SVGA3D_DEVCAP_SURFACEFMT_ARGB_S10E5 = 60, + SVGA3D_DEVCAP_SURFACEFMT_ARGB_S23E8 = 61, + SVGA3D_DEVCAP_MAX_VERTEX_SHADER_TEXTURES = 63, + + /* + * Note that MAX_SIMULTANEOUS_RENDER_TARGETS is a maximum count of color + * render targets. This does no include the depth or stencil targets. + */ + SVGA3D_DEVCAP_MAX_SIMULTANEOUS_RENDER_TARGETS = 64, + + SVGA3D_DEVCAP_SURFACEFMT_V16U16 = 65, + SVGA3D_DEVCAP_SURFACEFMT_G16R16 = 66, + SVGA3D_DEVCAP_SURFACEFMT_A16B16G16R16 = 67, + SVGA3D_DEVCAP_SURFACEFMT_UYVY = 68, + SVGA3D_DEVCAP_SURFACEFMT_YUY2 = 69, + + /* + * Don't add new caps into the previous section; the values in this + * enumeration must not change. You can put new values right before + * SVGA3D_DEVCAP_MAX. + */ + SVGA3D_DEVCAP_MAX /* This must be the last index. */ +} SVGA3dDevCapIndex; + +typedef union { + Bool b; + uint32 u; + int32 i; + float f; +} SVGA3dDevCapResult; + +#endif /* _SVGA3D_REG_H_ */ diff --git a/src/gallium/drivers/svga/include/svga3d_shaderdefs.h b/src/gallium/drivers/svga/include/svga3d_shaderdefs.h new file mode 100644 index 0000000000..2078c4a8a4 --- /dev/null +++ b/src/gallium/drivers/svga/include/svga3d_shaderdefs.h @@ -0,0 +1,519 @@ +/********************************************************** + * Copyright 2007-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +/* + * svga3d_shaderdefs.h -- + * + * SVGA3D byte code format and limit definitions. + * + * The format of the byte code directly corresponds to that defined + * by Microsoft DirectX SDK 9.0c (file d3d9types.h). The format can + * also be extended so that different shader formats can be supported + * for example GLSL, ARB vp/fp, NV/ATI shader formats, etc. + * + */ + +#ifndef __SVGA3D_SHADER_DEFS__ +#define __SVGA3D_SHADER_DEFS__ + +/* SVGA3D shader hardware limits. */ + +#define SVGA3D_INPUTREG_MAX 16 +#define SVGA3D_OUTPUTREG_MAX 12 +#define SVGA3D_VERTEX_SAMPLERREG_MAX 4 +#define SVGA3D_PIXEL_SAMPLERREG_MAX 16 +#define SVGA3D_SAMPLERREG_MAX (SVGA3D_PIXEL_SAMPLERREG_MAX+\ + SVGA3D_VERTEX_SAMPLERREG_MAX) +#define SVGA3D_TEMPREG_MAX 32 +#define SVGA3D_CONSTREG_MAX 256 +#define SVGA3D_CONSTINTREG_MAX 16 +#define SVGA3D_CONSTBOOLREG_MAX 16 +#define SVGA3D_ADDRREG_MAX 1 +#define SVGA3D_PREDREG_MAX 1 + +/* SVGA3D byte code specific limits */ + +#define SVGA3D_MAX_SRC_REGS 4 +#define SVGA3D_MAX_NESTING_LEVEL 32 + +/* SVGA3D version information. */ + +#define SVGA3D_VS_TYPE 0xFFFE +#define SVGA3D_PS_TYPE 0xFFFF + +typedef struct { + union { + struct { + uint32 minor : 8; + uint32 major : 8; + uint32 type : 16; + }; + + uint32 value; + }; +} SVGA3dShaderVersion; + +#define SVGA3D_VS_10 ((SVGA3D_VS_TYPE << 16) | 1 << 8) +#define SVGA3D_VS_11 (SVGA3D_VS_10 | 1) +#define SVGA3D_VS_20 ((SVGA3D_VS_TYPE << 16) | 2 << 8) +#define SVGA3D_VS_30 ((SVGA3D_VS_TYPE << 16) | 3 << 8) + +#define SVGA3D_PS_10 ((SVGA3D_PS_TYPE << 16) | 1 << 8) +#define SVGA3D_PS_11 (SVGA3D_PS_10 | 1) +#define SVGA3D_PS_12 (SVGA3D_PS_10 | 2) +#define SVGA3D_PS_13 (SVGA3D_PS_10 | 3) +#define SVGA3D_PS_14 (SVGA3D_PS_10 | 4) +#define SVGA3D_PS_20 ((SVGA3D_PS_TYPE << 16) | 2 << 8) +#define SVGA3D_PS_30 ((SVGA3D_PS_TYPE << 16) | 3 << 8) + +/* The *_ENABLED are for backwards compatibility with old drivers */ +typedef enum { + SVGA3DPSVERSION_NONE = 0, + SVGA3DPSVERSION_ENABLED = 1, + SVGA3DPSVERSION_11 = 3, + SVGA3DPSVERSION_12 = 5, + SVGA3DPSVERSION_13 = 7, + SVGA3DPSVERSION_14 = 9, + SVGA3DPSVERSION_20 = 11, + SVGA3DPSVERSION_30 = 13, + SVGA3DPSVERSION_40 = 15, + SVGA3DPSVERSION_MAX +} SVGA3dPixelShaderVersion; + +typedef enum { + SVGA3DVSVERSION_NONE = 0, + SVGA3DVSVERSION_ENABLED = 1, + SVGA3DVSVERSION_11 = 3, + SVGA3DVSVERSION_20 = 5, + SVGA3DVSVERSION_30 = 7, + SVGA3DVSVERSION_40 = 9, + SVGA3DVSVERSION_MAX +} SVGA3dVertexShaderVersion; + +/* SVGA3D instruction op codes. */ + +typedef enum { + SVGA3DOP_NOP = 0, + SVGA3DOP_MOV, + SVGA3DOP_ADD, + SVGA3DOP_SUB, + SVGA3DOP_MAD, + SVGA3DOP_MUL, + SVGA3DOP_RCP, + SVGA3DOP_RSQ, + SVGA3DOP_DP3, + SVGA3DOP_DP4, + SVGA3DOP_MIN, + SVGA3DOP_MAX, + SVGA3DOP_SLT, + SVGA3DOP_SGE, + SVGA3DOP_EXP, + SVGA3DOP_LOG, + SVGA3DOP_LIT, + SVGA3DOP_DST, + SVGA3DOP_LRP, + SVGA3DOP_FRC, + SVGA3DOP_M4x4, + SVGA3DOP_M4x3, + SVGA3DOP_M3x4, + SVGA3DOP_M3x3, + SVGA3DOP_M3x2, + SVGA3DOP_CALL, + SVGA3DOP_CALLNZ, + SVGA3DOP_LOOP, + SVGA3DOP_RET, + SVGA3DOP_ENDLOOP, + SVGA3DOP_LABEL, + SVGA3DOP_DCL, + SVGA3DOP_POW, + SVGA3DOP_CRS, + SVGA3DOP_SGN, + SVGA3DOP_ABS, + SVGA3DOP_NRM, + SVGA3DOP_SINCOS, + SVGA3DOP_REP, + SVGA3DOP_ENDREP, + SVGA3DOP_IF, + SVGA3DOP_IFC, + SVGA3DOP_ELSE, + SVGA3DOP_ENDIF, + SVGA3DOP_BREAK, + SVGA3DOP_BREAKC, + SVGA3DOP_MOVA, + SVGA3DOP_DEFB, + SVGA3DOP_DEFI, + SVGA3DOP_TEXCOORD = 64, + SVGA3DOP_TEXKILL, + SVGA3DOP_TEX, + SVGA3DOP_TEXBEM, + SVGA3DOP_TEXBEML, + SVGA3DOP_TEXREG2AR, + SVGA3DOP_TEXREG2GB = 70, + SVGA3DOP_TEXM3x2PAD, + SVGA3DOP_TEXM3x2TEX, + SVGA3DOP_TEXM3x3PAD, + SVGA3DOP_TEXM3x3TEX, + SVGA3DOP_RESERVED0, + SVGA3DOP_TEXM3x3SPEC, + SVGA3DOP_TEXM3x3VSPEC, + SVGA3DOP_EXPP, + SVGA3DOP_LOGP, + SVGA3DOP_CND = 80, + SVGA3DOP_DEF, + SVGA3DOP_TEXREG2RGB, + SVGA3DOP_TEXDP3TEX, + SVGA3DOP_TEXM3x2DEPTH, + SVGA3DOP_TEXDP3, + SVGA3DOP_TEXM3x3, + SVGA3DOP_TEXDEPTH, + SVGA3DOP_CMP, + SVGA3DOP_BEM, + SVGA3DOP_DP2ADD = 90, + SVGA3DOP_DSX, + SVGA3DOP_DSY, + SVGA3DOP_TEXLDD, + SVGA3DOP_SETP, + SVGA3DOP_TEXLDL, + SVGA3DOP_BREAKP = 96, + SVGA3DOP_LAST_INST, + SVGA3DOP_PHASE = 0xFFFD, + SVGA3DOP_COMMENT = 0xFFFE, + SVGA3DOP_END = 0xFFFF, +} SVGA3dShaderOpCodeType; + +/* SVGA3D operation control/comparison function types */ + +typedef enum { + SVGA3DOPCONT_NONE, + SVGA3DOPCONT_PROJECT, /* Projective texturing */ + SVGA3DOPCONT_BIAS, /* Texturing with a LOD bias */ +} SVGA3dShaderOpCodeControlFnType; + +typedef enum { + SVGA3DOPCOMP_RESERVED0 = 0, + SVGA3DOPCOMP_GT, + SVGA3DOPCOMP_EQ, + SVGA3DOPCOMP_GE, + SVGA3DOPCOMP_LT, + SVGA3DOPCOMPC_NE, + SVGA3DOPCOMP_LE, + SVGA3DOPCOMP_RESERVED1 +} SVGA3dShaderOpCodeCompFnType; + +/* SVGA3D register types */ + +typedef enum { + SVGA3DREG_TEMP = 0, /* Temporary register file */ + SVGA3DREG_INPUT, /* Input register file */ + SVGA3DREG_CONST, /* Constant register file */ + SVGA3DREG_ADDR, /* Address register for VS */ + SVGA3DREG_TEXTURE = 3, /* Texture register file for PS */ + SVGA3DREG_RASTOUT, /* Rasterizer register file */ + SVGA3DREG_ATTROUT, /* Attribute output register file */ + SVGA3DREG_TEXCRDOUT, /* Texture coordinate output register file */ + SVGA3DREG_OUTPUT = 6, /* Output register file for VS 3.0+ */ + SVGA3DREG_CONSTINT, /* Constant integer vector register file */ + SVGA3DREG_COLOROUT, /* Color output register file */ + SVGA3DREG_DEPTHOUT, /* Depth output register file */ + SVGA3DREG_SAMPLER, /* Sampler state register file */ + SVGA3DREG_CONST2, /* Constant register file 2048 - 4095 */ + SVGA3DREG_CONST3, /* Constant register file 4096 - 6143 */ + SVGA3DREG_CONST4, /* Constant register file 6144 - 8191 */ + SVGA3DREG_CONSTBOOL, /* Constant boolean register file */ + SVGA3DREG_LOOP, /* Loop counter register file */ + SVGA3DREG_TEMPFLOAT16, /* 16-bit float temp register file */ + SVGA3DREG_MISCTYPE, /* Miscellaneous (single) registers */ + SVGA3DREG_LABEL, /* Label */ + SVGA3DREG_PREDICATE, /* Predicate register */ +} SVGA3dShaderRegType; + +/* SVGA3D rasterizer output register types */ + +typedef enum { + SVGA3DRASTOUT_POSITION = 0, + SVGA3DRASTOUT_FOG, + SVGA3DRASTOUT_PSIZE +} SVGA3dShaderRastOutRegType; + +/* SVGA3D miscellaneous register types */ + +typedef enum { + SVGA3DMISCREG_POSITION = 0, /* Input position x,y,z,rhw (PS) */ + SVGA3DMISCREG_FACE /* Floating point primitive area (PS) */ +} SVGA3DShaderMiscRegType; + +/* SVGA3D sampler types */ + +typedef enum { + SVGA3DSAMP_UNKNOWN = 0, /* Uninitialized value */ + SVGA3DSAMP_2D = 2, /* dcl_2d s# (for declaring a 2-D texture) */ + SVGA3DSAMP_CUBE, /* dcl_cube s# (for declaring a cube texture) */ + SVGA3DSAMP_VOLUME, /* dcl_volume s# (for declaring a volume texture) */ +} SVGA3dShaderSamplerType; + +/* SVGA3D sampler format classes */ + +typedef enum { + SVGA3DSAMPFORMAT_ARGB, /* ARGB formats */ + SVGA3DSAMPFORMAT_V8U8, /* Sign and normalize (SNORM) V & U */ + SVGA3DSAMPFORMAT_Q8W8V8U8, /* SNORM all */ + SVGA3DSAMPFORMAT_CxV8U8, /* SNORM V & U, C=SQRT(1-U^2-V^2) */ + SVGA3DSAMPFORMAT_X8L8V8U8, /* SNORM V & U */ + SVGA3DSAMPFORMAT_A2W10V10U10, /* SNORM W, V & U */ + SVGA3DSAMPFORMAT_DXT_PMA, /* DXT pre-multiplied alpha */ + SVGA3DSAMPFORMAT_YUV, /* YUV video format */ + SVGA3DSAMPFORMAT_UYVY, /* UYVY video format */ + SVGA3DSAMPFORMAT_Rx, /* R16F/32F */ + SVGA3DSAMPFORMAT_RxGx, /* R16FG16F, R32FG32F */ + SVGA3DSAMPFORMAT_V16U16, /* SNORM all */ +} SVGA3DShaderSamplerFormatClass; + +/* SVGA3D write mask */ + +#define SVGA3DWRITEMASK_0 1 /* Component 0 (X;Red) */ +#define SVGA3DWRITEMASK_1 2 /* Component 1 (Y;Green) */ +#define SVGA3DWRITEMASK_2 4 /* Component 2 (Z;Blue) */ +#define SVGA3DWRITEMASK_3 8 /* Component 3 (W;Alpha) */ +#define SVGA3DWRITEMASK_ALL 15 /* All components */ + +/* SVGA3D destination modifiers */ + +#define SVGA3DDSTMOD_NONE 0 /* nop */ +#define SVGA3DDSTMOD_SATURATE 1 /* clamp to [0, 1] */ +#define SVGA3DDSTMOD_PARTIALPRECISION 2 /* Partial precision hint */ + +/* + * Relevant to multisampling only: + * When the pixel center is not covered, sample + * attribute or compute gradients/LOD + * using multisample "centroid" location. + * "Centroid" is some location within the covered + * region of the pixel. + */ + +#define SVGA3DDSTMOD_MSAMPCENTROID 4 + +/* SVGA3D source swizzle */ + +#define SVGA3DSWIZZLE_REPLICATEX 0x00 +#define SVGA3DSWIZZLE_REPLICATEY 0x55 +#define SVGA3DSWIZZLE_REPLICATEZ 0xAA +#define SVGA3DSWIZZLE_REPLICATEW 0xFF +#define SVGA3DSWIZZLE_NONE 0xE4 +#define SVGA3DSWIZZLE_YZXW 0xC9 +#define SVGA3DSWIZZLE_ZXYW 0xD2 +#define SVGA3DSWIZZLE_WXYZ 0x1B + +/* SVGA3D source modifiers */ + +typedef enum { + SVGA3DSRCMOD_NONE = 0, /* nop */ + SVGA3DSRCMOD_NEG, /* negate */ + SVGA3DSRCMOD_BIAS, /* bias */ + SVGA3DSRCMOD_BIASNEG, /* bias and negate */ + SVGA3DSRCMOD_SIGN, /* sign */ + SVGA3DSRCMOD_SIGNNEG, /* sign and negate */ + SVGA3DSRCMOD_COMP, /* complement */ + SVGA3DSRCMOD_X2, /* x2 */ + SVGA3DSRCMOD_X2NEG, /* x2 and negate */ + SVGA3DSRCMOD_DZ, /* divide through by z component */ + SVGA3DSRCMOD_DW, /* divide through by w component */ + SVGA3DSRCMOD_ABS, /* abs() */ + SVGA3DSRCMOD_ABSNEG, /* -abs() */ + SVGA3DSRCMOD_NOT, /* ! (for predicate register) */ +} SVGA3dShaderSrcModType; + +/* SVGA3D instruction token */ + +typedef struct { + union { + struct { + uint32 comment_op : 16; + uint32 comment_size : 16; + }; + + struct { + uint32 op : 16; + uint32 control : 3; + uint32 reserved2 : 5; + uint32 size : 4; + uint32 predicated : 1; + uint32 reserved1 : 1; + uint32 coissue : 1; + uint32 reserved0 : 1; + }; + + uint32 value; + }; +} SVGA3dShaderInstToken; + +/* SVGA3D destination parameter token */ + +typedef struct { + union { + struct { + uint32 num : 11; + uint32 type_upper : 2; + uint32 relAddr : 1; + uint32 reserved1 : 2; + uint32 mask : 4; + uint32 dstMod : 4; + uint32 shfScale : 4; + uint32 type_lower : 3; + uint32 reserved0 : 1; + }; + + uint32 value; + }; +} SVGA3dShaderDestToken; + +/* SVGA3D source parameter token */ + +typedef struct { + union { + struct { + uint32 num : 11; + uint32 type_upper : 2; + uint32 relAddr : 1; + uint32 reserved1 : 2; + uint32 swizzle : 8; + uint32 srcMod : 4; + uint32 type_lower : 3; + uint32 reserved0 : 1; + }; + + uint32 value; + }; +} SVGA3dShaderSrcToken; + +/* SVGA3DOP_DCL parameter tokens */ + +typedef struct { + union { + struct { + union { + struct { + uint32 usage : 5; + uint32 reserved1 : 11; + uint32 index : 4; + uint32 reserved0 : 12; + }; /* input / output declaration */ + + struct { + uint32 reserved3 : 27; + uint32 type : 4; + uint32 reserved2 : 1; + }; /* sampler declaration */ + }; + + SVGA3dShaderDestToken dst; + }; + + uint32 values[2]; + }; +} SVGA3DOpDclArgs; + +/* SVGA3DOP_DEF parameter tokens */ + +typedef struct { + union { + struct { + SVGA3dShaderDestToken dst; + + union { + float constValues[4]; + int constIValues[4]; + Bool constBValue; + }; + }; + + uint32 values[5]; + }; +} SVGA3DOpDefArgs; + +/* SVGA3D shader token */ + +typedef union { + uint32 value; + SVGA3dShaderInstToken inst; + SVGA3dShaderDestToken dest; + SVGA3dShaderSrcToken src; +} SVGA3dShaderToken; + +/* SVGA3D shader program */ + +typedef struct { + SVGA3dShaderVersion version; + /* SVGA3dShaderToken stream */ +} SVGA3dShaderProgram; + +/* SVGA3D version specific register assignments */ + +static const uint32 SVGA3D_INPUT_REG_POSITION_VS11 = 0; +static const uint32 SVGA3D_INPUT_REG_PSIZE_VS11 = 1; +static const uint32 SVGA3D_INPUT_REG_FOG_VS11 = 3; +static const uint32 SVGA3D_INPUT_REG_FOG_MASK_VS11 = SVGA3DWRITEMASK_3; +static const uint32 SVGA3D_INPUT_REG_COLOR_BASE_VS11 = 2; +static const uint32 SVGA3D_INPUT_REG_TEXCOORD_BASE_VS11 = 4; + +static const uint32 SVGA3D_INPUT_REG_COLOR_BASE_PS11 = 0; +static const uint32 SVGA3D_INPUT_REG_TEXCOORD_BASE_PS11 = 2; +static const uint32 SVGA3D_OUTPUT_REG_DEPTH_PS11 = 0; +static const uint32 SVGA3D_OUTPUT_REG_COLOR_PS11 = 1; + +static const uint32 SVGA3D_INPUT_REG_COLOR_BASE_PS20 = 0; +static const uint32 SVGA3D_INPUT_REG_COLOR_NUM_PS20 = 2; +static const uint32 SVGA3D_INPUT_REG_TEXCOORD_BASE_PS20 = 2; +static const uint32 SVGA3D_INPUT_REG_TEXCOORD_NUM_PS20 = 8; +static const uint32 SVGA3D_OUTPUT_REG_COLOR_BASE_PS20 = 1; +static const uint32 SVGA3D_OUTPUT_REG_COLOR_NUM_PS20 = 4; +static const uint32 SVGA3D_OUTPUT_REG_DEPTH_BASE_PS20 = 0; +static const uint32 SVGA3D_OUTPUT_REG_DEPTH_NUM_PS20 = 1; + +/* + *---------------------------------------------------------------------- + * + * SVGA3dShaderGetRegType -- + * + * As the register type is split into two non sequential fields, + * this function provides an useful way of accessing the actual + * register type without having to manually concatenate the + * type_upper and type_lower fields. + * + * Results: + * Returns the register type. + * + *---------------------------------------------------------------------- + */ + +static INLINE SVGA3dShaderRegType +SVGA3dShaderGetRegType(uint32 token) +{ + SVGA3dShaderSrcToken src; + src.value = token; + return (SVGA3dShaderRegType)(src.type_upper << 3 | src.type_lower); +} + +#endif /* __SVGA3D_SHADER_DEFS__ */ diff --git a/src/gallium/drivers/svga/include/svga_reg.h b/src/gallium/drivers/svga/include/svga_reg.h new file mode 100644 index 0000000000..1b96c2ec07 --- /dev/null +++ b/src/gallium/drivers/svga/include/svga_reg.h @@ -0,0 +1,1346 @@ +/********************************************************** + * Copyright 1998-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +/* + * svga_reg.h -- + * + * Virtual hardware definitions for the VMware SVGA II device. + */ + +#ifndef _SVGA_REG_H_ +#define _SVGA_REG_H_ + +/* + * PCI device IDs. + */ +#define PCI_VENDOR_ID_VMWARE 0x15AD +#define PCI_DEVICE_ID_VMWARE_SVGA2 0x0405 + +/* + * Legal values for the SVGA_REG_CURSOR_ON register in old-fashioned + * cursor bypass mode. This is still supported, but no new guest + * drivers should use it. + */ +#define SVGA_CURSOR_ON_HIDE 0x0 /* Must be 0 to maintain backward compatibility */ +#define SVGA_CURSOR_ON_SHOW 0x1 /* Must be 1 to maintain backward compatibility */ +#define SVGA_CURSOR_ON_REMOVE_FROM_FB 0x2 /* Remove the cursor from the framebuffer because we need to see what's under it */ +#define SVGA_CURSOR_ON_RESTORE_TO_FB 0x3 /* Put the cursor back in the framebuffer so the user can see it */ + +/* + * The maximum framebuffer size that can traced for e.g. guests in VESA mode. + * The changeMap in the monitor is proportional to this number. Therefore, we'd + * like to keep it as small as possible to reduce monitor overhead (using + * SVGA_VRAM_MAX_SIZE for this increases the size of the shared area by over + * 4k!). + * + * NB: For compatibility reasons, this value must be greater than 0xff0000. + * See bug 335072. + */ +#define SVGA_FB_MAX_TRACEABLE_SIZE 0x1000000 + +#define SVGA_MAX_PSEUDOCOLOR_DEPTH 8 +#define SVGA_MAX_PSEUDOCOLORS (1 << SVGA_MAX_PSEUDOCOLOR_DEPTH) +#define SVGA_NUM_PALETTE_REGS (3 * SVGA_MAX_PSEUDOCOLORS) + +#define SVGA_MAGIC 0x900000UL +#define SVGA_MAKE_ID(ver) (SVGA_MAGIC << 8 | (ver)) + +/* Version 2 let the address of the frame buffer be unsigned on Win32 */ +#define SVGA_VERSION_2 2 +#define SVGA_ID_2 SVGA_MAKE_ID(SVGA_VERSION_2) + +/* Version 1 has new registers starting with SVGA_REG_CAPABILITIES so + PALETTE_BASE has moved */ +#define SVGA_VERSION_1 1 +#define SVGA_ID_1 SVGA_MAKE_ID(SVGA_VERSION_1) + +/* Version 0 is the initial version */ +#define SVGA_VERSION_0 0 +#define SVGA_ID_0 SVGA_MAKE_ID(SVGA_VERSION_0) + +/* "Invalid" value for all SVGA IDs. (Version ID, screen object ID, surface ID...) */ +#define SVGA_ID_INVALID 0xFFFFFFFF + +/* Port offsets, relative to BAR0 */ +#define SVGA_INDEX_PORT 0x0 +#define SVGA_VALUE_PORT 0x1 +#define SVGA_BIOS_PORT 0x2 +#define SVGA_IRQSTATUS_PORT 0x8 + +/* + * Interrupt source flags for IRQSTATUS_PORT and IRQMASK. + * + * Interrupts are only supported when the + * SVGA_CAP_IRQMASK capability is present. + */ +#define SVGA_IRQFLAG_ANY_FENCE 0x1 /* Any fence was passed */ +#define SVGA_IRQFLAG_FIFO_PROGRESS 0x2 /* Made forward progress in the FIFO */ +#define SVGA_IRQFLAG_FENCE_GOAL 0x4 /* SVGA_FIFO_FENCE_GOAL reached */ + +/* + * Registers + */ + +enum { + SVGA_REG_ID = 0, + SVGA_REG_ENABLE = 1, + SVGA_REG_WIDTH = 2, + SVGA_REG_HEIGHT = 3, + SVGA_REG_MAX_WIDTH = 4, + SVGA_REG_MAX_HEIGHT = 5, + SVGA_REG_DEPTH = 6, + SVGA_REG_BITS_PER_PIXEL = 7, /* Current bpp in the guest */ + SVGA_REG_PSEUDOCOLOR = 8, + SVGA_REG_RED_MASK = 9, + SVGA_REG_GREEN_MASK = 10, + SVGA_REG_BLUE_MASK = 11, + SVGA_REG_BYTES_PER_LINE = 12, + SVGA_REG_FB_START = 13, /* (Deprecated) */ + SVGA_REG_FB_OFFSET = 14, + SVGA_REG_VRAM_SIZE = 15, + SVGA_REG_FB_SIZE = 16, + + /* ID 0 implementation only had the above registers, then the palette */ + + SVGA_REG_CAPABILITIES = 17, + SVGA_REG_MEM_START = 18, /* (Deprecated) */ + SVGA_REG_MEM_SIZE = 19, + SVGA_REG_CONFIG_DONE = 20, /* Set when memory area configured */ + SVGA_REG_SYNC = 21, /* See "FIFO Synchronization Registers" */ + SVGA_REG_BUSY = 22, /* See "FIFO Synchronization Registers" */ + SVGA_REG_GUEST_ID = 23, /* Set guest OS identifier */ + SVGA_REG_CURSOR_ID = 24, /* (Deprecated) */ + SVGA_REG_CURSOR_X = 25, /* (Deprecated) */ + SVGA_REG_CURSOR_Y = 26, /* (Deprecated) */ + SVGA_REG_CURSOR_ON = 27, /* (Deprecated) */ + SVGA_REG_HOST_BITS_PER_PIXEL = 28, /* (Deprecated) */ + SVGA_REG_SCRATCH_SIZE = 29, /* Number of scratch registers */ + SVGA_REG_MEM_REGS = 30, /* Number of FIFO registers */ + SVGA_REG_NUM_DISPLAYS = 31, /* (Deprecated) */ + SVGA_REG_PITCHLOCK = 32, /* Fixed pitch for all modes */ + SVGA_REG_IRQMASK = 33, /* Interrupt mask */ + + /* Legacy multi-monitor support */ + SVGA_REG_NUM_GUEST_DISPLAYS = 34,/* Number of guest displays in X/Y direction */ + SVGA_REG_DISPLAY_ID = 35, /* Display ID for the following display attributes */ + SVGA_REG_DISPLAY_IS_PRIMARY = 36,/* Whether this is a primary display */ + SVGA_REG_DISPLAY_POSITION_X = 37,/* The display position x */ + SVGA_REG_DISPLAY_POSITION_Y = 38,/* The display position y */ + SVGA_REG_DISPLAY_WIDTH = 39, /* The display's width */ + SVGA_REG_DISPLAY_HEIGHT = 40, /* The display's height */ + + /* See "Guest memory regions" below. */ + SVGA_REG_GMR_ID = 41, + SVGA_REG_GMR_DESCRIPTOR = 42, + SVGA_REG_GMR_MAX_IDS = 43, + SVGA_REG_GMR_MAX_DESCRIPTOR_LENGTH = 44, + + SVGA_REG_TRACES = 45, /* Enable trace-based updates even when FIFO is on */ + SVGA_REG_TOP = 46, /* Must be 1 more than the last register */ + + SVGA_PALETTE_BASE = 1024, /* Base of SVGA color map */ + /* Next 768 (== 256*3) registers exist for colormap */ + + SVGA_SCRATCH_BASE = SVGA_PALETTE_BASE + SVGA_NUM_PALETTE_REGS + /* Base of scratch registers */ + /* Next reg[SVGA_REG_SCRATCH_SIZE] registers exist for scratch usage: + First 4 are reserved for VESA BIOS Extension; any remaining are for + the use of the current SVGA driver. */ +}; + + +/* + * Guest memory regions (GMRs): + * + * This is a new memory mapping feature available in SVGA devices + * which have the SVGA_CAP_GMR bit set. Previously, there were two + * fixed memory regions available with which to share data between the + * device and the driver: the FIFO ('MEM') and the framebuffer. GMRs + * are our name for an extensible way of providing arbitrary DMA + * buffers for use between the driver and the SVGA device. They are a + * new alternative to framebuffer memory, usable for both 2D and 3D + * graphics operations. + * + * Since GMR mapping must be done synchronously with guest CPU + * execution, we use a new pair of SVGA registers: + * + * SVGA_REG_GMR_ID -- + * + * Read/write. + * This register holds the 32-bit ID (a small positive integer) + * of a GMR to create, delete, or redefine. Writing this register + * has no side-effects. + * + * SVGA_REG_GMR_DESCRIPTOR -- + * + * Write-only. + * Writing this register will create, delete, or redefine the GMR + * specified by the above ID register. If this register is zero, + * the GMR is deleted. Any pointers into this GMR (including those + * currently being processed by FIFO commands) will be + * synchronously invalidated. + * + * If this register is nonzero, it must be the physical page + * number (PPN) of a data structure which describes the physical + * layout of the memory region this GMR should describe. The + * descriptor structure will be read synchronously by the SVGA + * device when this register is written. The descriptor need not + * remain allocated for the lifetime of the GMR. + * + * The guest driver should write SVGA_REG_GMR_ID first, then + * SVGA_REG_GMR_DESCRIPTOR. + * + * SVGA_REG_GMR_MAX_IDS -- + * + * Read-only. + * The SVGA device may choose to support a maximum number of + * user-defined GMR IDs. This register holds the number of supported + * IDs. (The maximum supported ID plus 1) + * + * SVGA_REG_GMR_MAX_DESCRIPTOR_LENGTH -- + * + * Read-only. + * The SVGA device may choose to put a limit on the total number + * of SVGAGuestMemDescriptor structures it will read when defining + * a single GMR. + * + * The descriptor structure is an array of SVGAGuestMemDescriptor + * structures. Each structure may do one of three things: + * + * - Terminate the GMR descriptor list. + * (ppn==0, numPages==0) + * + * - Add a PPN or range of PPNs to the GMR's virtual address space. + * (ppn != 0, numPages != 0) + * + * - Provide the PPN of the next SVGAGuestMemDescriptor, in order to + * support multi-page GMR descriptor tables without forcing the + * driver to allocate physically contiguous memory. + * (ppn != 0, numPages == 0) + * + * Note that each physical page of SVGAGuestMemDescriptor structures + * can describe at least 2MB of guest memory. If the driver needs to + * use more than one page of descriptor structures, it must use one of + * its SVGAGuestMemDescriptors to point to an additional page. The + * device will never automatically cross a page boundary. + * + * Once the driver has described a GMR, it is immediately available + * for use via any FIFO command that uses an SVGAGuestPtr structure. + * These pointers include a GMR identifier plus an offset into that + * GMR. + * + * The driver must check the SVGA_CAP_GMR bit before using the GMR + * registers. + */ + +/* + * Special GMR IDs, allowing SVGAGuestPtrs to point to framebuffer + * memory as well. In the future, these IDs could even be used to + * allow legacy memory regions to be redefined by the guest as GMRs. + * + * Using the guest framebuffer (GFB) at BAR1 for general purpose DMA + * is being phased out. Please try to use user-defined GMRs whenever + * possible. + */ +#define SVGA_GMR_NULL ((uint32) -1) +#define SVGA_GMR_FRAMEBUFFER ((uint32) -2) // Guest Framebuffer (GFB) + +typedef +struct SVGAGuestMemDescriptor { + uint32 ppn; + uint32 numPages; +} SVGAGuestMemDescriptor; + +typedef +struct SVGAGuestPtr { + uint32 gmrId; + uint32 offset; +} SVGAGuestPtr; + + +/* + * SVGAGMRImageFormat -- + * + * This is a packed representation of the source 2D image format + * for a GMR-to-screen blit. Currently it is defined as an encoding + * of the screen's color depth and bits-per-pixel, however, 16 bits + * are reserved for future use to identify other encodings (such as + * RGBA or higher-precision images). + * + * Currently supported formats: + * + * bpp depth Format Name + * --- ----- ----------- + * 32 24 32-bit BGRX + * 24 24 24-bit BGR + * 16 16 RGB 5-6-5 + * 16 15 RGB 5-5-5 + * + */ + +typedef +struct SVGAGMRImageFormat { + union { + struct { + uint32 bitsPerPixel : 8; + uint32 colorDepth : 8; + uint32 reserved : 16; // Must be zero + }; + + uint32 value; + }; +} SVGAGMRImageFormat; + +/* + * SVGAColorBGRX -- + * + * A 24-bit color format (BGRX), which does not depend on the + * format of the legacy guest framebuffer (GFB) or the current + * GMRFB state. + */ + +typedef +struct SVGAColorBGRX { + union { + struct { + uint32 b : 8; + uint32 g : 8; + uint32 r : 8; + uint32 x : 8; // Unused + }; + + uint32 value; + }; +} SVGAColorBGRX; + + +/* + * SVGASignedRect -- + * SVGASignedPoint -- + * + * Signed rectangle and point primitives. These are used by the new + * 2D primitives for drawing to Screen Objects, which can occupy a + * signed virtual coordinate space. + * + * SVGASignedRect specifies a half-open interval: the (left, top) + * pixel is part of the rectangle, but the (right, bottom) pixel is + * not. + */ + +typedef +struct SVGASignedRect { + int32 left; + int32 top; + int32 right; + int32 bottom; +} SVGASignedRect; + +typedef +struct SVGASignedPoint { + int32 x; + int32 y; +} SVGASignedPoint; + + +/* + * Capabilities + * + * Note the holes in the bitfield. Missing bits have been deprecated, + * and must not be reused. Those capabilities will never be reported + * by new versions of the SVGA device. + */ + +#define SVGA_CAP_NONE 0x00000000 +#define SVGA_CAP_RECT_COPY 0x00000002 +#define SVGA_CAP_CURSOR 0x00000020 +#define SVGA_CAP_CURSOR_BYPASS 0x00000040 // Legacy (Use Cursor Bypass 3 instead) +#define SVGA_CAP_CURSOR_BYPASS_2 0x00000080 // Legacy (Use Cursor Bypass 3 instead) +#define SVGA_CAP_8BIT_EMULATION 0x00000100 +#define SVGA_CAP_ALPHA_CURSOR 0x00000200 +#define SVGA_CAP_3D 0x00004000 +#define SVGA_CAP_EXTENDED_FIFO 0x00008000 +#define SVGA_CAP_MULTIMON 0x00010000 // Legacy multi-monitor support +#define SVGA_CAP_PITCHLOCK 0x00020000 +#define SVGA_CAP_IRQMASK 0x00040000 +#define SVGA_CAP_DISPLAY_TOPOLOGY 0x00080000 // Legacy multi-monitor support +#define SVGA_CAP_GMR 0x00100000 +#define SVGA_CAP_TRACES 0x00200000 + + +/* + * FIFO register indices. + * + * The FIFO is a chunk of device memory mapped into guest physmem. It + * is always treated as 32-bit words. + * + * The guest driver gets to decide how to partition it between + * - FIFO registers (there are always at least 4, specifying where the + * following data area is and how much data it contains; there may be + * more registers following these, depending on the FIFO protocol + * version in use) + * - FIFO data, written by the guest and slurped out by the VMX. + * These indices are 32-bit word offsets into the FIFO. + */ + +enum { + /* + * Block 1 (basic registers): The originally defined FIFO registers. + * These exist and are valid for all versions of the FIFO protocol. + */ + + SVGA_FIFO_MIN = 0, + SVGA_FIFO_MAX, /* The distance from MIN to MAX must be at least 10K */ + SVGA_FIFO_NEXT_CMD, + SVGA_FIFO_STOP, + + /* + * Block 2 (extended registers): Mandatory registers for the extended + * FIFO. These exist if the SVGA caps register includes + * SVGA_CAP_EXTENDED_FIFO; some of them are valid only if their + * associated capability bit is enabled. + * + * Note that when originally defined, SVGA_CAP_EXTENDED_FIFO implied + * support only for (FIFO registers) CAPABILITIES, FLAGS, and FENCE. + * This means that the guest has to test individually (in most cases + * using FIFO caps) for the presence of registers after this; the VMX + * can define "extended FIFO" to mean whatever it wants, and currently + * won't enable it unless there's room for that set and much more. + */ + + SVGA_FIFO_CAPABILITIES = 4, + SVGA_FIFO_FLAGS, + // Valid with SVGA_FIFO_CAP_FENCE: + SVGA_FIFO_FENCE, + + /* + * Block 3a (optional extended registers): Additional registers for the + * extended FIFO, whose presence isn't actually implied by + * SVGA_CAP_EXTENDED_FIFO; these exist if SVGA_FIFO_MIN is high enough to + * leave room for them. + * + * These in block 3a, the VMX currently considers mandatory for the + * extended FIFO. + */ + + // Valid if exists (i.e. if extended FIFO enabled): + SVGA_FIFO_3D_HWVERSION, /* See SVGA3dHardwareVersion in svga3d_reg.h */ + // Valid with SVGA_FIFO_CAP_PITCHLOCK: + SVGA_FIFO_PITCHLOCK, + + // Valid with SVGA_FIFO_CAP_CURSOR_BYPASS_3: + SVGA_FIFO_CURSOR_ON, /* Cursor bypass 3 show/hide register */ + SVGA_FIFO_CURSOR_X, /* Cursor bypass 3 x register */ + SVGA_FIFO_CURSOR_Y, /* Cursor bypass 3 y register */ + SVGA_FIFO_CURSOR_COUNT, /* Incremented when any of the other 3 change */ + SVGA_FIFO_CURSOR_LAST_UPDATED,/* Last time the host updated the cursor */ + + // Valid with SVGA_FIFO_CAP_RESERVE: + SVGA_FIFO_RESERVED, /* Bytes past NEXT_CMD with real contents */ + + /* + * Valid with SVGA_FIFO_CAP_SCREEN_OBJECT: + * + * By default this is SVGA_ID_INVALID, to indicate that the cursor + * coordinates are specified relative to the virtual root. If this + * is set to a specific screen ID, cursor position is reinterpreted + * as a signed offset relative to that screen's origin. This is the + * only way to place the cursor on a non-rooted screen. + */ + SVGA_FIFO_CURSOR_SCREEN_ID, + + /* + * XXX: The gap here, up until SVGA_FIFO_3D_CAPS, can be used for new + * registers, but this must be done carefully and with judicious use of + * capability bits, since comparisons based on SVGA_FIFO_MIN aren't + * enough to tell you whether the register exists: we've shipped drivers + * and products that used SVGA_FIFO_3D_CAPS but didn't know about some of + * the earlier ones. The actual order of introduction was: + * - PITCHLOCK + * - 3D_CAPS + * - CURSOR_* (cursor bypass 3) + * - RESERVED + * So, code that wants to know whether it can use any of the + * aforementioned registers, or anything else added after PITCHLOCK and + * before 3D_CAPS, needs to reason about something other than + * SVGA_FIFO_MIN. + */ + + /* + * 3D caps block space; valid with 3D hardware version >= + * SVGA3D_HWVERSION_WS6_B1. + */ + SVGA_FIFO_3D_CAPS = 32, + SVGA_FIFO_3D_CAPS_LAST = 32 + 255, + + /* + * End of VMX's current definition of "extended-FIFO registers". + * Registers before here are always enabled/disabled as a block; either + * the extended FIFO is enabled and includes all preceding registers, or + * it's disabled entirely. + * + * Block 3b (truly optional extended registers): Additional registers for + * the extended FIFO, which the VMX already knows how to enable and + * disable with correct granularity. + * + * Registers after here exist if and only if the guest SVGA driver + * sets SVGA_FIFO_MIN high enough to leave room for them. + */ + + // Valid if register exists: + SVGA_FIFO_GUEST_3D_HWVERSION, /* Guest driver's 3D version */ + SVGA_FIFO_FENCE_GOAL, /* Matching target for SVGA_IRQFLAG_FENCE_GOAL */ + SVGA_FIFO_BUSY, /* See "FIFO Synchronization Registers" */ + + /* + * Always keep this last. This defines the maximum number of + * registers we know about. At power-on, this value is placed in + * the SVGA_REG_MEM_REGS register, and we expect the guest driver + * to allocate this much space in FIFO memory for registers. + */ + SVGA_FIFO_NUM_REGS +}; + + +/* + * Definition of registers included in extended FIFO support. + * + * The guest SVGA driver gets to allocate the FIFO between registers + * and data. It must always allocate at least 4 registers, but old + * drivers stopped there. + * + * The VMX will enable extended FIFO support if and only if the guest + * left enough room for all registers defined as part of the mandatory + * set for the extended FIFO. + * + * Note that the guest drivers typically allocate the FIFO only at + * initialization time, not at mode switches, so it's likely that the + * number of FIFO registers won't change without a reboot. + * + * All registers less than this value are guaranteed to be present if + * svgaUser->fifo.extended is set. Any later registers must be tested + * individually for compatibility at each use (in the VMX). + * + * This value is used only by the VMX, so it can change without + * affecting driver compatibility; keep it that way? + */ +#define SVGA_FIFO_EXTENDED_MANDATORY_REGS (SVGA_FIFO_3D_CAPS_LAST + 1) + + +/* + * FIFO Synchronization Registers + * + * This explains the relationship between the various FIFO + * sync-related registers in IOSpace and in FIFO space. + * + * SVGA_REG_SYNC -- + * + * The SYNC register can be used in two different ways by the guest: + * + * 1. If the guest wishes to fully sync (drain) the FIFO, + * it will write once to SYNC then poll on the BUSY + * register. The FIFO is sync'ed once BUSY is zero. + * + * 2. If the guest wants to asynchronously wake up the host, + * it will write once to SYNC without polling on BUSY. + * Ideally it will do this after some new commands have + * been placed in the FIFO, and after reading a zero + * from SVGA_FIFO_BUSY. + * + * (1) is the original behaviour that SYNC was designed to + * support. Originally, a write to SYNC would implicitly + * trigger a read from BUSY. This causes us to synchronously + * process the FIFO. + * + * This behaviour has since been changed so that writing SYNC + * will *not* implicitly cause a read from BUSY. Instead, it + * makes a channel call which asynchronously wakes up the MKS + * thread. + * + * New guests can use this new behaviour to implement (2) + * efficiently. This lets guests get the host's attention + * without waiting for the MKS to poll, which gives us much + * better CPU utilization on SMP hosts and on UP hosts while + * we're blocked on the host GPU. + * + * Old guests shouldn't notice the behaviour change. SYNC was + * never guaranteed to process the entire FIFO, since it was + * bounded to a particular number of CPU cycles. Old guests will + * still loop on the BUSY register until the FIFO is empty. + * + * Writing to SYNC currently has the following side-effects: + * + * - Sets SVGA_REG_BUSY to TRUE (in the monitor) + * - Asynchronously wakes up the MKS thread for FIFO processing + * - The value written to SYNC is recorded as a "reason", for + * stats purposes. + * + * If SVGA_FIFO_BUSY is available, drivers are advised to only + * write to SYNC if SVGA_FIFO_BUSY is FALSE. Drivers should set + * SVGA_FIFO_BUSY to TRUE after writing to SYNC. The MKS will + * eventually set SVGA_FIFO_BUSY on its own, but this approach + * lets the driver avoid sending multiple asynchronous wakeup + * messages to the MKS thread. + * + * SVGA_REG_BUSY -- + * + * This register is set to TRUE when SVGA_REG_SYNC is written, + * and it reads as FALSE when the FIFO has been completely + * drained. + * + * Every read from this register causes us to synchronously + * process FIFO commands. There is no guarantee as to how many + * commands each read will process. + * + * CPU time spent processing FIFO commands will be billed to + * the guest. + * + * New drivers should avoid using this register unless they + * need to guarantee that the FIFO is completely drained. It + * is overkill for performing a sync-to-fence. Older drivers + * will use this register for any type of synchronization. + * + * SVGA_FIFO_BUSY -- + * + * This register is a fast way for the guest driver to check + * whether the FIFO is already being processed. It reads and + * writes at normal RAM speeds, with no monitor intervention. + * + * If this register reads as TRUE, the host is guaranteeing that + * any new commands written into the FIFO will be noticed before + * the MKS goes back to sleep. + * + * If this register reads as FALSE, no such guarantee can be + * made. + * + * The guest should use this register to quickly determine + * whether or not it needs to wake up the host. If the guest + * just wrote a command or group of commands that it would like + * the host to begin processing, it should: + * + * 1. Read SVGA_FIFO_BUSY. If it reads as TRUE, no further + * action is necessary. + * + * 2. Write TRUE to SVGA_FIFO_BUSY. This informs future guest + * code that we've already sent a SYNC to the host and we + * don't need to send a duplicate. + * + * 3. Write a reason to SVGA_REG_SYNC. This will send an + * asynchronous wakeup to the MKS thread. + */ + + +/* + * FIFO Capabilities + * + * Fence -- Fence register and command are supported + * Accel Front -- Front buffer only commands are supported + * Pitch Lock -- Pitch lock register is supported + * Video -- SVGA Video overlay units are supported + * Escape -- Escape command is supported + * + * XXX: Add longer descriptions for each capability, including a list + * of the new features that each capability provides. + * + * SVGA_FIFO_CAP_SCREEN_OBJECT -- + * + * Provides dynamic multi-screen rendering, for improved Unity and + * multi-monitor modes. With Screen Object, the guest can + * dynamically create and destroy 'screens', which can represent + * Unity windows or virtual monitors. Screen Object also provides + * strong guarantees that DMA operations happen only when + * guest-initiated. Screen Object deprecates the BAR1 guest + * framebuffer (GFB) and all commands that work only with the GFB. + * + * New registers: + * FIFO_CURSOR_SCREEN_ID, VIDEO_DATA_GMRID, VIDEO_DST_SCREEN_ID + * + * New 2D commands: + * DEFINE_SCREEN, DESTROY_SCREEN, DEFINE_GMRFB, BLIT_GMRFB_TO_SCREEN, + * BLIT_SCREEN_TO_GMRFB, ANNOTATION_FILL, ANNOTATION_COPY + * + * New 3D commands: + * BLIT_SURFACE_TO_SCREEN + * + * New guarantees: + * + * - The host will not read or write guest memory, including the GFB, + * except when explicitly initiated by a DMA command. + * + * - All DMA, including legacy DMA like UPDATE and PRESENT_READBACK, + * is guaranteed to complete before any subsequent FENCEs. + * + * - All legacy commands which affect a Screen (UPDATE, PRESENT, + * PRESENT_READBACK) as well as new Screen blit commands will + * all behave consistently as blits, and memory will be read + * or written in FIFO order. + * + * For example, if you PRESENT from one SVGA3D surface to multiple + * places on the screen, the data copied will always be from the + * SVGA3D surface at the time the PRESENT was issued in the FIFO. + * This was not necessarily true on devices without Screen Object. + * + * This means that on devices that support Screen Object, the + * PRESENT_READBACK command should not be necessary unless you + * actually want to read back the results of 3D rendering into + * system memory. (And for that, the BLIT_SCREEN_TO_GMRFB + * command provides a strict superset of functionality.) + * + * - When a screen is resized, either using Screen Object commands or + * legacy multimon registers, its contents are preserved. + */ + +#define SVGA_FIFO_CAP_NONE 0 +#define SVGA_FIFO_CAP_FENCE (1<<0) +#define SVGA_FIFO_CAP_ACCELFRONT (1<<1) +#define SVGA_FIFO_CAP_PITCHLOCK (1<<2) +#define SVGA_FIFO_CAP_VIDEO (1<<3) +#define SVGA_FIFO_CAP_CURSOR_BYPASS_3 (1<<4) +#define SVGA_FIFO_CAP_ESCAPE (1<<5) +#define SVGA_FIFO_CAP_RESERVE (1<<6) +#define SVGA_FIFO_CAP_SCREEN_OBJECT (1<<7) + + +/* + * FIFO Flags + * + * Accel Front -- Driver should use front buffer only commands + */ + +#define SVGA_FIFO_FLAG_NONE 0 +#define SVGA_FIFO_FLAG_ACCELFRONT (1<<0) +#define SVGA_FIFO_FLAG_RESERVED (1<<31) // Internal use only + +/* + * FIFO reservation sentinel value + */ + +#define SVGA_FIFO_RESERVED_UNKNOWN 0xffffffff + + +/* + * Video overlay support + */ + +#define SVGA_NUM_OVERLAY_UNITS 32 + + +/* + * Video capabilities that the guest is currently using + */ + +#define SVGA_VIDEO_FLAG_COLORKEY 0x0001 + + +/* + * Offsets for the video overlay registers + */ + +enum { + SVGA_VIDEO_ENABLED = 0, + SVGA_VIDEO_FLAGS, + SVGA_VIDEO_DATA_OFFSET, + SVGA_VIDEO_FORMAT, + SVGA_VIDEO_COLORKEY, + SVGA_VIDEO_SIZE, // Deprecated + SVGA_VIDEO_WIDTH, + SVGA_VIDEO_HEIGHT, + SVGA_VIDEO_SRC_X, + SVGA_VIDEO_SRC_Y, + SVGA_VIDEO_SRC_WIDTH, + SVGA_VIDEO_SRC_HEIGHT, + SVGA_VIDEO_DST_X, // Signed int32 + SVGA_VIDEO_DST_Y, // Signed int32 + SVGA_VIDEO_DST_WIDTH, + SVGA_VIDEO_DST_HEIGHT, + SVGA_VIDEO_PITCH_1, + SVGA_VIDEO_PITCH_2, + SVGA_VIDEO_PITCH_3, + SVGA_VIDEO_DATA_GMRID, // Optional, defaults to SVGA_GMR_FRAMEBUFFER + SVGA_VIDEO_DST_SCREEN_ID, // Optional, defaults to virtual coords (SVGA_ID_INVALID) + SVGA_VIDEO_NUM_REGS +}; + + +/* + * SVGA Overlay Units + * + * width and height relate to the entire source video frame. + * srcX, srcY, srcWidth and srcHeight represent subset of the source + * video frame to be displayed. + */ + +typedef struct SVGAOverlayUnit { + uint32 enabled; + uint32 flags; + uint32 dataOffset; + uint32 format; + uint32 colorKey; + uint32 size; + uint32 width; + uint32 height; + uint32 srcX; + uint32 srcY; + uint32 srcWidth; + uint32 srcHeight; + int32 dstX; + int32 dstY; + uint32 dstWidth; + uint32 dstHeight; + uint32 pitches[3]; + uint32 dataGMRId; + uint32 dstScreenId; +} SVGAOverlayUnit; + + +/* + * SVGAScreenObject -- + * + * This is a new way to represent a guest's multi-monitor screen or + * Unity window. Screen objects are only supported if the + * SVGA_FIFO_CAP_SCREEN_OBJECT capability bit is set. + * + * If Screen Objects are supported, they can be used to fully + * replace the functionality provided by the framebuffer registers + * (SVGA_REG_WIDTH, HEIGHT, etc.) and by SVGA_CAP_DISPLAY_TOPOLOGY. + * + * The screen object is a struct with guaranteed binary + * compatibility. New flags can be added, and the struct may grow, + * but existing fields must retain their meaning. + * + */ + +#define SVGA_SCREEN_HAS_ROOT (1 << 0) // Screen is present in the virtual coord space +#define SVGA_SCREEN_IS_PRIMARY (1 << 1) // Guest considers this screen to be 'primary' +#define SVGA_SCREEN_FULLSCREEN_HINT (1 << 2) // Guest is running a fullscreen app here + +typedef +struct SVGAScreenObject { + uint32 structSize; // sizeof(SVGAScreenObject) + uint32 id; + uint32 flags; + struct { + uint32 width; + uint32 height; + } size; + struct { + int32 x; + int32 y; + } root; // Only used if SVGA_SCREEN_HAS_ROOT is set. +} SVGAScreenObject; + + +/* + * Commands in the command FIFO: + * + * Command IDs defined below are used for the traditional 2D FIFO + * communication (not all commands are available for all versions of the + * SVGA FIFO protocol). + * + * Note the holes in the command ID numbers: These commands have been + * deprecated, and the old IDs must not be reused. + * + * Command IDs from 1000 to 1999 are reserved for use by the SVGA3D + * protocol. + * + * Each command's parameters are described by the comments and + * structs below. + */ + +typedef enum { + SVGA_CMD_INVALID_CMD = 0, + SVGA_CMD_UPDATE = 1, + SVGA_CMD_RECT_COPY = 3, + SVGA_CMD_DEFINE_CURSOR = 19, + SVGA_CMD_DEFINE_ALPHA_CURSOR = 22, + SVGA_CMD_UPDATE_VERBOSE = 25, + SVGA_CMD_FRONT_ROP_FILL = 29, + SVGA_CMD_FENCE = 30, + SVGA_CMD_ESCAPE = 33, + SVGA_CMD_DEFINE_SCREEN = 34, + SVGA_CMD_DESTROY_SCREEN = 35, + SVGA_CMD_DEFINE_GMRFB = 36, + SVGA_CMD_BLIT_GMRFB_TO_SCREEN = 37, + SVGA_CMD_BLIT_SCREEN_TO_GMRFB = 38, + SVGA_CMD_ANNOTATION_FILL = 39, + SVGA_CMD_ANNOTATION_COPY = 40, + SVGA_CMD_MAX +} SVGAFifoCmdId; + +#define SVGA_CMD_MAX_ARGS 64 + + +/* + * SVGA_CMD_UPDATE -- + * + * This is a DMA transfer which copies from the Guest Framebuffer + * (GFB) at BAR1 + SVGA_REG_FB_OFFSET to any screens which + * intersect with the provided virtual rectangle. + * + * This command does not support using arbitrary guest memory as a + * data source- it only works with the pre-defined GFB memory. + * This command also does not support signed virtual coordinates. + * If you have defined screens (using SVGA_CMD_DEFINE_SCREEN) with + * negative root x/y coordinates, the negative portion of those + * screens will not be reachable by this command. + * + * This command is not necessary when using framebuffer + * traces. Traces are automatically enabled if the SVGA FIFO is + * disabled, and you may explicitly enable/disable traces using + * SVGA_REG_TRACES. With traces enabled, any write to the GFB will + * automatically act as if a subsequent SVGA_CMD_UPDATE was issued. + * + * Traces and SVGA_CMD_UPDATE are the only supported ways to render + * pseudocolor screen updates. The newer Screen Object commands + * only support true color formats. + * + * Availability: + * Always available. + */ + +typedef +struct { + uint32 x; + uint32 y; + uint32 width; + uint32 height; +} SVGAFifoCmdUpdate; + + +/* + * SVGA_CMD_RECT_COPY -- + * + * Perform a rectangular DMA transfer from one area of the GFB to + * another, and copy the result to any screens which intersect it. + * + * Availability: + * SVGA_CAP_RECT_COPY + */ + +typedef +struct { + uint32 srcX; + uint32 srcY; + uint32 destX; + uint32 destY; + uint32 width; + uint32 height; +} SVGAFifoCmdRectCopy; + + +/* + * SVGA_CMD_DEFINE_CURSOR -- + * + * Provide a new cursor image, as an AND/XOR mask. + * + * The recommended way to position the cursor overlay is by using + * the SVGA_FIFO_CURSOR_* registers, supported by the + * SVGA_FIFO_CAP_CURSOR_BYPASS_3 capability. + * + * Availability: + * SVGA_CAP_CURSOR + */ + +typedef +struct { + uint32 id; // Reserved, must be zero. + uint32 hotspotX; + uint32 hotspotY; + uint32 width; + uint32 height; + uint32 andMaskDepth; // Value must be 1 or equal to BITS_PER_PIXEL + uint32 xorMaskDepth; // Value must be 1 or equal to BITS_PER_PIXEL + /* + * Followed by scanline data for AND mask, then XOR mask. + * Each scanline is padded to a 32-bit boundary. + */ +} SVGAFifoCmdDefineCursor; + + +/* + * SVGA_CMD_DEFINE_ALPHA_CURSOR -- + * + * Provide a new cursor image, in 32-bit BGRA format. + * + * The recommended way to position the cursor overlay is by using + * the SVGA_FIFO_CURSOR_* registers, supported by the + * SVGA_FIFO_CAP_CURSOR_BYPASS_3 capability. + * + * Availability: + * SVGA_CAP_ALPHA_CURSOR + */ + +typedef +struct { + uint32 id; // Reserved, must be zero. + uint32 hotspotX; + uint32 hotspotY; + uint32 width; + uint32 height; + /* Followed by scanline data */ +} SVGAFifoCmdDefineAlphaCursor; + + +/* + * SVGA_CMD_UPDATE_VERBOSE -- + * + * Just like SVGA_CMD_UPDATE, but also provide a per-rectangle + * 'reason' value, an opaque cookie which is used by internal + * debugging tools. Third party drivers should not use this + * command. + * + * Availability: + * SVGA_CAP_EXTENDED_FIFO + */ + +typedef +struct { + uint32 x; + uint32 y; + uint32 width; + uint32 height; + uint32 reason; +} SVGAFifoCmdUpdateVerbose; + + +/* + * SVGA_CMD_FRONT_ROP_FILL -- + * + * This is a hint which tells the SVGA device that the driver has + * just filled a rectangular region of the GFB with a solid + * color. Instead of reading these pixels from the GFB, the device + * can assume that they all equal 'color'. This is primarily used + * for remote desktop protocols. + * + * Availability: + * SVGA_FIFO_CAP_ACCELFRONT + */ + +#define SVGA_ROP_COPY 0x03 + +typedef +struct { + uint32 color; // In the same format as the GFB + uint32 x; + uint32 y; + uint32 width; + uint32 height; + uint32 rop; // Must be SVGA_ROP_COPY +} SVGAFifoCmdFrontRopFill; + + +/* + * SVGA_CMD_FENCE -- + * + * Insert a synchronization fence. When the SVGA device reaches + * this command, it will copy the 'fence' value into the + * SVGA_FIFO_FENCE register. It will also compare the fence against + * SVGA_FIFO_FENCE_GOAL. If the fence matches the goal and the + * SVGA_IRQFLAG_FENCE_GOAL interrupt is enabled, the device will + * raise this interrupt. + * + * Availability: + * SVGA_FIFO_FENCE for this command, + * SVGA_CAP_IRQMASK for SVGA_FIFO_FENCE_GOAL. + */ + +typedef +struct { + uint32 fence; +} SVGAFifoCmdFence; + + +/* + * SVGA_CMD_ESCAPE -- + * + * Send an extended or vendor-specific variable length command. + * This is used for video overlay, third party plugins, and + * internal debugging tools. See svga_escape.h + * + * Availability: + * SVGA_FIFO_CAP_ESCAPE + */ + +typedef +struct { + uint32 nsid; + uint32 size; + /* followed by 'size' bytes of data */ +} SVGAFifoCmdEscape; + + +/* + * SVGA_CMD_DEFINE_SCREEN -- + * + * Define or redefine an SVGAScreenObject. See the description of + * SVGAScreenObject above. The video driver is responsible for + * generating new screen IDs. They should be small positive + * integers. The virtual device will have an implementation + * specific upper limit on the number of screen IDs + * supported. Drivers are responsible for recycling IDs. The first + * valid ID is zero. + * + * - Interaction with other registers: + * + * For backwards compatibility, when the GFB mode registers (WIDTH, + * HEIGHT, PITCHLOCK, BITS_PER_PIXEL) are modified, the SVGA device + * deletes all screens other than screen #0, and redefines screen + * #0 according to the specified mode. Drivers that use + * SVGA_CMD_DEFINE_SCREEN should destroy or redefine screen #0. + * + * If you use screen objects, do not use the legacy multi-mon + * registers (SVGA_REG_NUM_GUEST_DISPLAYS, SVGA_REG_DISPLAY_*). + * + * Availability: + * SVGA_FIFO_CAP_SCREEN_OBJECT + */ + +typedef +struct { + SVGAScreenObject screen; // Variable-length according to version +} SVGAFifoCmdDefineScreen; + + +/* + * SVGA_CMD_DESTROY_SCREEN -- + * + * Destroy an SVGAScreenObject. Its ID is immediately available for + * re-use. + * + * Availability: + * SVGA_FIFO_CAP_SCREEN_OBJECT + */ + +typedef +struct { + uint32 screenId; +} SVGAFifoCmdDestroyScreen; + + +/* + * SVGA_CMD_DEFINE_GMRFB -- + * + * This command sets a piece of SVGA device state called the + * Guest Memory Region Framebuffer, or GMRFB. The GMRFB is a + * piece of light-weight state which identifies the location and + * format of an image in guest memory or in BAR1. The GMRFB has + * an arbitrary size, and it doesn't need to match the geometry + * of the GFB or any screen object. + * + * The GMRFB can be redefined as often as you like. You could + * always use the same GMRFB, you could redefine it before + * rendering from a different guest screen, or you could even + * redefine it before every blit. + * + * There are multiple ways to use this command. The simplest way is + * to use it to move the framebuffer either to elsewhere in the GFB + * (BAR1) memory region, or to a user-defined GMR. This lets a + * driver use a framebuffer allocated entirely out of normal system + * memory, which we encourage. + * + * Another way to use this command is to set up a ring buffer of + * updates in GFB memory. If a driver wants to ensure that no + * frames are skipped by the SVGA device, it is important that the + * driver not modify the source data for a blit until the device is + * done processing the command. One efficient way to accomplish + * this is to use a ring of small DMA buffers. Each buffer is used + * for one blit, then we move on to the next buffer in the + * ring. The FENCE mechanism is used to protect each buffer from + * re-use until the device is finished with that buffer's + * corresponding blit. + * + * This command does not affect the meaning of SVGA_CMD_UPDATE. + * UPDATEs always occur from the legacy GFB memory area. This + * command has no support for pseudocolor GMRFBs. Currently only + * true-color 15, 16, and 24-bit depths are supported. Future + * devices may expose capabilities for additional framebuffer + * formats. + * + * The default GMRFB value is undefined. Drivers must always send + * this command at least once before performing any blit from the + * GMRFB. + * + * Availability: + * SVGA_FIFO_CAP_SCREEN_OBJECT + */ + +typedef +struct { + SVGAGuestPtr ptr; + uint32 bytesPerLine; + SVGAGMRImageFormat format; +} SVGAFifoCmdDefineGMRFB; + + +/* + * SVGA_CMD_BLIT_GMRFB_TO_SCREEN -- + * + * This is a guest-to-host blit. It performs a DMA operation to + * copy a rectangular region of pixels from the current GMRFB to + * one or more Screen Objects. + * + * The destination coordinate may be specified relative to a + * screen's origin (if a screen ID is specified) or relative to the + * virtual coordinate system's origin (if the screen ID is + * SVGA_ID_INVALID). The actual destination may span zero or more + * screens, in the case of a virtual destination rect or a rect + * which extends off the edge of the specified screen. + * + * This command writes to the screen's "base layer": the underlying + * framebuffer which exists below any cursor or video overlays. No + * action is necessary to explicitly hide or update any overlays + * which exist on top of the updated region. + * + * The SVGA device is guaranteed to finish reading from the GMRFB + * by the time any subsequent FENCE commands are reached. + * + * This command consumes an annotation. See the + * SVGA_CMD_ANNOTATION_* commands for details. + * + * Availability: + * SVGA_FIFO_CAP_SCREEN_OBJECT + */ + +typedef +struct { + SVGASignedPoint srcOrigin; + SVGASignedRect destRect; + uint32 destScreenId; +} SVGAFifoCmdBlitGMRFBToScreen; + + +/* + * SVGA_CMD_BLIT_SCREEN_TO_GMRFB -- + * + * This is a host-to-guest blit. It performs a DMA operation to + * copy a rectangular region of pixels from a single Screen Object + * back to the current GMRFB. + * + * Usage note: This command should be used rarely. It will + * typically be inefficient, but it is necessary for some types of + * synchronization between 3D (GPU) and 2D (CPU) rendering into + * overlapping areas of a screen. + * + * The source coordinate is specified relative to a screen's + * origin. The provided screen ID must be valid. If any parameters + * are invalid, the resulting pixel values are undefined. + * + * This command reads the screen's "base layer". Overlays like + * video and cursor are not included, but any data which was sent + * using a blit-to-screen primitive will be available, no matter + * whether the data's original source was the GMRFB or the 3D + * acceleration hardware. + * + * Note that our guest-to-host blits and host-to-guest blits aren't + * symmetric in their current implementation. While the parameters + * are identical, host-to-guest blits are a lot less featureful. + * They do not support clipping: If the source parameters don't + * fully fit within a screen, the blit fails. They must originate + * from exactly one screen. Virtual coordinates are not directly + * supported. + * + * Host-to-guest blits do support the same set of GMRFB formats + * offered by guest-to-host blits. + * + * The SVGA device is guaranteed to finish writing to the GMRFB by + * the time any subsequent FENCE commands are reached. + * + * Availability: + * SVGA_FIFO_CAP_SCREEN_OBJECT + */ + +typedef +struct { + SVGASignedPoint destOrigin; + SVGASignedRect srcRect; + uint32 srcScreenId; +} SVGAFifoCmdBlitScreenToGMRFB; + + +/* + * SVGA_CMD_ANNOTATION_FILL -- + * + * This is a blit annotation. This command stores a small piece of + * device state which is consumed by the next blit-to-screen + * command. The state is only cleared by commands which are + * specifically documented as consuming an annotation. Other + * commands (such as ESCAPEs for debugging) may intervene between + * the annotation and its associated blit. + * + * This annotation is a promise about the contents of the next + * blit: The video driver is guaranteeing that all pixels in that + * blit will have the same value, specified here as a color in + * SVGAColorBGRX format. + * + * The SVGA device can still render the blit correctly even if it + * ignores this annotation, but the annotation may allow it to + * perform the blit more efficiently, for example by ignoring the + * source data and performing a fill in hardware. + * + * This annotation is most important for performance when the + * user's display is being remoted over a network connection. + * + * Availability: + * SVGA_FIFO_CAP_SCREEN_OBJECT + */ + +typedef +struct { + SVGAColorBGRX color; +} SVGAFifoCmdAnnotationFill; + + +/* + * SVGA_CMD_ANNOTATION_COPY -- + * + * This is a blit annotation. See SVGA_CMD_ANNOTATION_FILL for more + * information about annotations. + * + * This annotation is a promise about the contents of the next + * blit: The video driver is guaranteeing that all pixels in that + * blit will have the same value as those which already exist at an + * identically-sized region on the same or a different screen. + * + * Note that the source pixels for the COPY in this annotation are + * sampled before applying the anqnotation's associated blit. They + * are allowed to overlap with the blit's destination pixels. + * + * The copy source rectangle is specified the same way as the blit + * destination: it can be a rectangle which spans zero or more + * screens, specified relative to either a screen or to the virtual + * coordinate system's origin. If the source rectangle includes + * pixels which are not from exactly one screen, the results are + * undefined. + * + * Availability: + * SVGA_FIFO_CAP_SCREEN_OBJECT + */ + +typedef +struct { + SVGASignedPoint srcOrigin; + uint32 srcScreenId; +} SVGAFifoCmdAnnotationCopy; + +#endif diff --git a/src/gallium/drivers/svga/include/svga_types.h b/src/gallium/drivers/svga/include/svga_types.h new file mode 100644 index 0000000000..7fd9bab03a --- /dev/null +++ b/src/gallium/drivers/svga/include/svga_types.h @@ -0,0 +1,46 @@ +/********************************************************** + * Copyright 1998-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#ifndef _SVGA_TYPES_H_ +#define _SVGA_TYPES_H_ + +#include "pipe/p_compiler.h" + +typedef int64_t int64; +typedef uint64_t uint64; + +typedef int32_t int32; +typedef uint32_t uint32; + +typedef int16_t int16; +typedef uint16_t uint16; + +typedef int8_t int8; +typedef uint8_t uint8; + +typedef uint8_t Bool; + +#endif /* _SVGA_TYPES_H_ */ + diff --git a/src/gallium/drivers/svga/svga_cmd.c b/src/gallium/drivers/svga/svga_cmd.c new file mode 100644 index 0000000000..a0da7d7e5d --- /dev/null +++ b/src/gallium/drivers/svga/svga_cmd.c @@ -0,0 +1,1427 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +/** + * svga_cmd.c -- + * + * Command construction utility for the SVGA3D protocol used by + * the VMware SVGA device, based on the svgautil library. + */ + +#include "svga_winsys.h" +#include "svga_screen_buffer.h" +#include "svga_screen_texture.h" +#include "svga_cmd.h" + +/* + *---------------------------------------------------------------------- + * + * surface_to_surfaceid -- + * + * Utility function for surface ids. + * Can handle null surface. Does a surface_reallocation so you need + * to have allocated the fifo space before converting. + * + * Results: + * id is filld out. + * + * Side effects: + * One surface relocation is preformed for texture handle. + * + *---------------------------------------------------------------------- + */ + +static INLINE +void surface_to_surfaceid(struct svga_winsys_context *swc, // IN + struct pipe_surface *surface, // IN + SVGA3dSurfaceImageId *id, // OUT + unsigned flags) // IN +{ + if(surface) { + struct svga_surface *s = svga_surface(surface); + swc->surface_relocation(swc, &id->sid, s->handle, flags); + id->face = s->real_face; /* faces have the same order */ + id->mipmap = s->real_level; + } + else { + id->sid = SVGA3D_INVALID_ID; + id->face = 0; + id->mipmap = 0; + } +} + + +/* + *---------------------------------------------------------------------- + * + * SVGA3D_FIFOReserve -- + * + * Reserve space for an SVGA3D FIFO command. + * + * The 2D SVGA commands have been around for a while, so they + * have a rather asymmetric structure. The SVGA3D protocol is + * more uniform: each command begins with a header containing the + * command number and the full size. + * + * This is a convenience wrapper around SVGA_FIFOReserve. We + * reserve space for the whole command, and write the header. + * + * This function must be paired with SVGA_FIFOCommitAll(). + * + * Results: + * Returns a pointer to the space reserved for command-specific + * data. It must be 'cmdSize' bytes long. + * + * Side effects: + * Begins a FIFO reservation. + * + *---------------------------------------------------------------------- + */ + +void * +SVGA3D_FIFOReserve(struct svga_winsys_context *swc, + uint32 cmd, // IN + uint32 cmdSize, // IN + uint32 nr_relocs) // IN +{ + SVGA3dCmdHeader *header; + + header = swc->reserve(swc, sizeof *header + cmdSize, nr_relocs); + if(!header) + return NULL; + + header->id = cmd; + header->size = cmdSize; + + return &header[1]; +} + + +void +SVGA_FIFOCommitAll(struct svga_winsys_context *swc) +{ + swc->commit(swc); +} + + +/* + *---------------------------------------------------------------------- + * + * SVGA3D_DefineContext -- + * + * Create a new context, to be referred to with the provided ID. + * + * Context objects encapsulate all render state, and shader + * objects are per-context. + * + * Surfaces are not per-context. The same surface can be shared + * between multiple contexts, and surface operations can occur + * without a context. + * + * If the provided context ID already existed, it is redefined. + * + * Context IDs are arbitrary small non-negative integers, + * global to the entire SVGA device. + * + * Results: + * None. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +enum pipe_error +SVGA3D_DefineContext(struct svga_winsys_context *swc) // IN +{ + SVGA3dCmdDefineContext *cmd; + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_CONTEXT_DEFINE, sizeof *cmd, 0); + if(!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + cmd->cid = swc->cid; + + swc->commit(swc); + + return PIPE_OK; +} + + +/* + *---------------------------------------------------------------------- + * + * SVGA3D_DestroyContext -- + * + * Delete a context created with SVGA3D_DefineContext. + * + * Results: + * None. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +enum pipe_error +SVGA3D_DestroyContext(struct svga_winsys_context *swc) // IN +{ + SVGA3dCmdDestroyContext *cmd; + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_CONTEXT_DESTROY, sizeof *cmd, 0); + if(!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + cmd->cid = swc->cid; + + swc->commit(swc); + + return PIPE_OK; +} + + +/* + *---------------------------------------------------------------------- + * + * SVGA3D_BeginDefineSurface -- + * + * Begin a SURFACE_DEFINE command. This reserves space for it in + * the FIFO, and returns pointers to the command's faces and + * mipsizes arrays. + * + * This function must be paired with SVGA_FIFOCommitAll(). + * The faces and mipSizes arrays are initialized to zero. + * + * This creates a "surface" object in the SVGA3D device, + * with the provided surface ID (sid). Surfaces are generic + * containers for host VRAM objects like textures, vertex + * buffers, and depth/stencil buffers. + * + * Surfaces are hierarchial: + * + * - Surface may have multiple faces (for cube maps) + * + * - Each face has a list of mipmap levels + * + * - Each mipmap image may have multiple volume + * slices, if the image is three dimensional. + * + * - Each slice is a 2D array of 'blocks' + * + * - Each block may be one or more pixels. + * (Usually 1, more for DXT or YUV formats.) + * + * Surfaces are generic host VRAM objects. The SVGA3D device + * may optimize surfaces according to the format they were + * created with, but this format does not limit the ways in + * which the surface may be used. For example, a depth surface + * can be used as a texture, or a floating point image may + * be used as a vertex buffer. Some surface usages may be + * lower performance, due to software emulation, but any + * usage should work with any surface. + * + * If 'sid' is already defined, the old surface is deleted + * and this new surface replaces it. + * + * Surface IDs are arbitrary small non-negative integers, + * global to the entire SVGA device. + * + * Results: + * Returns pointers to arrays allocated in the FIFO for 'faces' + * and 'mipSizes'. + * + * Side effects: + * Begins a FIFO reservation. + * + *---------------------------------------------------------------------- + */ + +enum pipe_error +SVGA3D_BeginDefineSurface(struct svga_winsys_context *swc, + struct svga_winsys_surface *sid, // IN + SVGA3dSurfaceFlags flags, // IN + SVGA3dSurfaceFormat format, // IN + SVGA3dSurfaceFace **faces, // OUT + SVGA3dSize **mipSizes, // OUT + uint32 numMipSizes) // IN +{ + SVGA3dCmdDefineSurface *cmd; + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_SURFACE_DEFINE, sizeof *cmd + + sizeof **mipSizes * numMipSizes, 1); + if(!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + swc->surface_relocation(swc, &cmd->sid, sid, PIPE_BUFFER_USAGE_GPU_WRITE); + cmd->surfaceFlags = flags; + cmd->format = format; + + *faces = &cmd->face[0]; + *mipSizes = (SVGA3dSize*) &cmd[1]; + + memset(*faces, 0, sizeof **faces * SVGA3D_MAX_SURFACE_FACES); + memset(*mipSizes, 0, sizeof **mipSizes * numMipSizes); + + return PIPE_OK; +} + + +/* + *---------------------------------------------------------------------- + * + * SVGA3D_DefineSurface2D -- + * + * This is a simplified version of SVGA3D_BeginDefineSurface(), + * which does not support cube maps, mipmaps, or volume textures. + * + * Results: + * None. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +enum pipe_error +SVGA3D_DefineSurface2D(struct svga_winsys_context *swc, // IN + struct svga_winsys_surface *sid, // IN + uint32 width, // IN + uint32 height, // IN + SVGA3dSurfaceFormat format) // IN +{ + SVGA3dSize *mipSizes; + SVGA3dSurfaceFace *faces; + enum pipe_error ret; + + ret = SVGA3D_BeginDefineSurface(swc, + sid, 0, format, &faces, &mipSizes, 1); + if(ret != PIPE_OK) + return ret; + + faces[0].numMipLevels = 1; + + mipSizes[0].width = width; + mipSizes[0].height = height; + mipSizes[0].depth = 1; + + swc->commit(swc);; + + return PIPE_OK; +} + + +/* + *---------------------------------------------------------------------- + * + * SVGA3D_DestroySurface -- + * + * Release the host VRAM encapsulated by a particular surface ID. + * + * Results: + * None. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +enum pipe_error +SVGA3D_DestroySurface(struct svga_winsys_context *swc, + struct svga_winsys_surface *sid) // IN +{ + SVGA3dCmdDestroySurface *cmd; + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_SURFACE_DESTROY, sizeof *cmd, 1); + if(!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + swc->surface_relocation(swc, &cmd->sid, sid, PIPE_BUFFER_USAGE_GPU_READ); + swc->commit(swc);; + + return PIPE_OK; +} + + +/* + *---------------------------------------------------------------------- + * + * SVGA3D_BeginSurfaceDMA-- + * + * Begin a SURFACE_DMA command. This reserves space for it in + * the FIFO, and returns a pointer to the command's box array. + * This function must be paired with SVGA_FIFOCommitAll(). + * + * When the SVGA3D device asynchronously processes this FIFO + * command, a DMA operation is performed between host VRAM and + * a generic SVGAGuestPtr. The guest pointer may refer to guest + * VRAM (provided by the SVGA PCI device) or to guest system + * memory that has been set up as a Guest Memory Region (GMR) + * by the SVGA device. + * + * The guest's DMA buffer must remain valid (not freed, paged out, + * or overwritten) until the host has finished processing this + * command. The guest can determine that the host has finished + * by using the SVGA device's FIFO Fence mechanism. + * + * The guest's image buffer can be an arbitrary size and shape. + * Guest image data is interpreted according to the SVGA3D surface + * format specified when the surface was defined. + * + * The caller may optionally define the guest image's pitch. + * guestImage->pitch can either be zero (assume image is tightly + * packed) or it must be the number of bytes between vertically + * adjacent image blocks. + * + * The provided copybox list specifies which regions of the source + * image are to be copied, and where they appear on the destination. + * + * NOTE: srcx/srcy are always on the guest image and x/y are + * always on the host image, regardless of the actual transfer + * direction! + * + * For efficiency, the SVGA3D device is free to copy more data + * than specified. For example, it may round copy boxes outwards + * such that they lie on particular alignment boundaries. + * + *---------------------------------------------------------------------- + */ + +enum pipe_error +SVGA3D_SurfaceDMA(struct svga_winsys_context *swc, + struct svga_transfer *st, // IN + SVGA3dTransferType transfer, // IN + const SVGA3dCopyBox *boxes, // IN + uint32 numBoxes) // IN +{ + struct svga_texture *texture = svga_texture(st->base.texture); + SVGA3dCmdSurfaceDMA *cmd; + SVGA3dCmdSurfaceDMASuffix *pSuffix; + uint32 boxesSize = sizeof *boxes * numBoxes; + unsigned region_flags; + unsigned surface_flags; + + if(transfer == SVGA3D_WRITE_HOST_VRAM) { + region_flags = PIPE_BUFFER_USAGE_GPU_READ; + surface_flags = PIPE_BUFFER_USAGE_GPU_WRITE; + } + else if(transfer == SVGA3D_READ_HOST_VRAM) { + region_flags = PIPE_BUFFER_USAGE_GPU_WRITE; + surface_flags = PIPE_BUFFER_USAGE_GPU_READ; + } + else { + assert(0); + return PIPE_ERROR_BAD_INPUT; + } + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_SURFACE_DMA, + sizeof *cmd + boxesSize + sizeof *pSuffix, + 2); + if(!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + swc->region_relocation(swc, &cmd->guest.ptr, st->hwbuf, 0, region_flags); + cmd->guest.pitch = st->base.stride; + + swc->surface_relocation(swc, &cmd->host.sid, texture->handle, surface_flags); + cmd->host.face = st->base.face; /* PIPE_TEX_FACE_* and SVGA3D_CUBEFACE_* match */ + cmd->host.mipmap = st->base.level; + + cmd->transfer = transfer; + + memcpy(&cmd[1], boxes, boxesSize); + + pSuffix = (SVGA3dCmdSurfaceDMASuffix *)((uint8_t*)cmd + sizeof *cmd + boxesSize); + pSuffix->suffixSize = sizeof *pSuffix; + pSuffix->maximumOffset = st->hw_nblocksy*st->base.stride; + memset(&pSuffix->flags, 0, sizeof pSuffix->flags); + + swc->commit(swc); + + return PIPE_OK; +} + + +enum pipe_error +SVGA3D_BufferDMA(struct svga_winsys_context *swc, + struct svga_winsys_buffer *guest, + struct svga_winsys_surface *host, + SVGA3dTransferType transfer, // IN + uint32 size, // IN + uint32 offset, // IN + SVGA3dSurfaceDMAFlags flags) // IN +{ + SVGA3dCmdSurfaceDMA *cmd; + SVGA3dCopyBox *box; + SVGA3dCmdSurfaceDMASuffix *pSuffix; + unsigned region_flags; + unsigned surface_flags; + + if(transfer == SVGA3D_WRITE_HOST_VRAM) { + region_flags = PIPE_BUFFER_USAGE_GPU_READ; + surface_flags = PIPE_BUFFER_USAGE_GPU_WRITE; + } + else if(transfer == SVGA3D_READ_HOST_VRAM) { + region_flags = PIPE_BUFFER_USAGE_GPU_WRITE; + surface_flags = PIPE_BUFFER_USAGE_GPU_READ; + } + else { + assert(0); + return PIPE_ERROR_BAD_INPUT; + } + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_SURFACE_DMA, + sizeof *cmd + sizeof *box + sizeof *pSuffix, + 2); + if(!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + swc->region_relocation(swc, &cmd->guest.ptr, guest, 0, region_flags); + cmd->guest.pitch = 0; + + swc->surface_relocation(swc, &cmd->host.sid, host, surface_flags); + cmd->host.face = 0; + cmd->host.mipmap = 0; + + cmd->transfer = transfer; + + box = (SVGA3dCopyBox *)&cmd[1]; + box->x = offset; + box->y = 0; + box->z = 0; + box->w = size; + box->h = 1; + box->d = 1; + box->srcx = offset; + box->srcy = 0; + box->srcz = 0; + + pSuffix = (SVGA3dCmdSurfaceDMASuffix *)((uint8_t*)cmd + sizeof *cmd + sizeof *box); + pSuffix->suffixSize = sizeof *pSuffix; + pSuffix->maximumOffset = offset + size; + pSuffix->flags = flags; + + swc->commit(swc); + + return PIPE_OK; +} + + +/* + *---------------------------------------------------------------------- + * + * SVGA3D_SetRenderTarget -- + * + * Bind a surface object to a particular render target attachment + * point on the current context. Render target attachment points + * exist for color buffers, a depth buffer, and a stencil buffer. + * + * The SVGA3D device is quite lenient about the types of surfaces + * that may be used as render targets. The color buffers must + * all be the same size, but the depth and stencil buffers do not + * have to be the same size as the color buffer. All attachments + * are optional. + * + * Some combinations of render target formats may require software + * emulation, depending on the capabilities of the host graphics + * API and graphics hardware. + * + * Results: + * None. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +enum pipe_error +SVGA3D_SetRenderTarget(struct svga_winsys_context *swc, + SVGA3dRenderTargetType type, // IN + struct pipe_surface *surface) // IN +{ + SVGA3dCmdSetRenderTarget *cmd; + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_SETRENDERTARGET, sizeof *cmd, 1); + if(!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + + cmd->cid = swc->cid; + + cmd->type = type; + + surface_to_surfaceid(swc, surface, &cmd->target, PIPE_BUFFER_USAGE_GPU_WRITE); + + swc->commit(swc); + + return PIPE_OK; +} + + + + + + +/* + *---------------------------------------------------------------------- + * + * SVGA3D_DefineShader -- + * + * Upload the bytecode for a new shader. The bytecode is "SVGA3D + * format", which is theoretically a binary-compatible superset + * of Microsoft's DirectX shader bytecode. In practice, the + * SVGA3D bytecode doesn't yet have any extensions to DirectX's + * bytecode format. + * + * The SVGA3D device supports shader models 1.1 through 2.0. + * + * The caller chooses a shader ID (small positive integer) by + * which this shader will be identified in future commands. This + * ID is in a namespace which is per-context and per-shader-type. + * + * 'bytecodeLen' is specified in bytes. It must be a multiple of 4. + * + * Results: + * None. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +enum pipe_error +SVGA3D_DefineShader(struct svga_winsys_context *swc, + uint32 shid, // IN + SVGA3dShaderType type, // IN + const uint32 *bytecode, // IN + uint32 bytecodeLen) // IN +{ + SVGA3dCmdDefineShader *cmd; + + assert(bytecodeLen % 4 == 0); + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_SHADER_DEFINE, sizeof *cmd + bytecodeLen, + 0); + if(!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + cmd->cid = swc->cid; + cmd->shid = shid; + cmd->type = type; + memcpy(&cmd[1], bytecode, bytecodeLen); + swc->commit(swc); + + return PIPE_OK; +} + + +/* + *---------------------------------------------------------------------- + * + * SVGA3D_DestroyShader -- + * + * Delete a shader that was created by SVGA3D_DefineShader. If + * the shader was the current vertex or pixel shader for its + * context, rendering results are undefined until a new shader is + * bound. + * + * Results: + * None. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +enum pipe_error +SVGA3D_DestroyShader(struct svga_winsys_context *swc, + uint32 shid, // IN + SVGA3dShaderType type) // IN +{ + SVGA3dCmdDestroyShader *cmd; + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_SHADER_DESTROY, sizeof *cmd, + 0); + if(!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + cmd->cid = swc->cid; + cmd->shid = shid; + cmd->type = type; + swc->commit(swc); + + return PIPE_OK; +} + + +/* + *---------------------------------------------------------------------- + * + * SVGA3D_SetShaderConst -- + * + * Set the value of a shader constant. + * + * Shader constants are analogous to uniform variables in GLSL, + * except that they belong to the render context rather than to + * an individual shader. + * + * Constants may have one of three types: A 4-vector of floats, + * a 4-vector of integers, or a single boolean flag. + * + * Results: + * None. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +enum pipe_error +SVGA3D_SetShaderConst(struct svga_winsys_context *swc, + uint32 reg, // IN + SVGA3dShaderType type, // IN + SVGA3dShaderConstType ctype, // IN + const void *value) // IN +{ + SVGA3dCmdSetShaderConst *cmd; + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_SET_SHADER_CONST, sizeof *cmd, + 0); + if(!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + cmd->cid = swc->cid; + cmd->reg = reg; + cmd->type = type; + cmd->ctype = ctype; + + switch (ctype) { + + case SVGA3D_CONST_TYPE_FLOAT: + case SVGA3D_CONST_TYPE_INT: + memcpy(&cmd->values, value, sizeof cmd->values); + break; + + case SVGA3D_CONST_TYPE_BOOL: + memset(&cmd->values, 0, sizeof cmd->values); + cmd->values[0] = *(uint32*)value; + break; + + default: + assert(0); + break; + + } + swc->commit(swc); + + return PIPE_OK; +} + + + + + +/* + *---------------------------------------------------------------------- + * + * SVGA3D_SetShader -- + * + * Switch active shaders. This binds a new vertex or pixel shader + * to the specified context. + * + * A shader ID of SVGA3D_INVALID_ID unbinds any shader, switching + * back to the fixed function vertex or pixel pipeline. + * + * Results: + * None. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +enum pipe_error +SVGA3D_SetShader(struct svga_winsys_context *swc, + SVGA3dShaderType type, // IN + uint32 shid) // IN +{ + SVGA3dCmdSetShader *cmd; + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_SET_SHADER, sizeof *cmd, + 0); + if(!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + cmd->cid = swc->cid; + cmd->type = type; + cmd->shid = shid; + swc->commit(swc); + + return PIPE_OK; +} + + +/* + *---------------------------------------------------------------------- + * + * SVGA3D_BeginClear -- + * + * Begin a CLEAR command. This reserves space for it in the FIFO, + * and returns a pointer to the command's rectangle array. This + * function must be paired with SVGA_FIFOCommitAll(). + * + * Clear is a rendering operation which fills a list of + * rectangles with constant values on all render target types + * indicated by 'flags'. + * + * Clear is not affected by clipping, depth test, or other + * render state which affects the fragment pipeline. + * + * Results: + * None. + * + * Side effects: + * May write to attached render target surfaces. + * + *---------------------------------------------------------------------- + */ + +enum pipe_error +SVGA3D_BeginClear(struct svga_winsys_context *swc, + SVGA3dClearFlag flags, // IN + uint32 color, // IN + float depth, // IN + uint32 stencil, // IN + SVGA3dRect **rects, // OUT + uint32 numRects) // IN +{ + SVGA3dCmdClear *cmd; + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_CLEAR, + sizeof *cmd + sizeof **rects * numRects, + 0); + if(!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + cmd->cid = swc->cid; + cmd->clearFlag = flags; + cmd->color = color; + cmd->depth = depth; + cmd->stencil = stencil; + *rects = (SVGA3dRect*) &cmd[1]; + + return PIPE_OK; +} + + +/* + *---------------------------------------------------------------------- + * + * SVGA3D_ClearRect -- + * + * This is a simplified version of SVGA3D_BeginClear(). + * + * Results: + * None. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +enum pipe_error +SVGA3D_ClearRect(struct svga_winsys_context *swc, + SVGA3dClearFlag flags, // IN + uint32 color, // IN + float depth, // IN + uint32 stencil, // IN + uint32 x, // IN + uint32 y, // IN + uint32 w, // IN + uint32 h) // IN +{ + SVGA3dRect *rect; + enum pipe_error ret; + + ret = SVGA3D_BeginClear(swc, flags, color, depth, stencil, &rect, 1); + if(ret != PIPE_OK) + return PIPE_ERROR_OUT_OF_MEMORY; + + memset(rect, 0, sizeof *rect); + rect->x = x; + rect->y = y; + rect->w = w; + rect->h = h; + swc->commit(swc); + + return PIPE_OK; +} + + +/* + *---------------------------------------------------------------------- + * + * SVGA3D_BeginDrawPrimitives -- + * + * Begin a DRAW_PRIMITIVES command. This reserves space for it in + * the FIFO, and returns a pointer to the command's arrays. + * This function must be paired with SVGA_FIFOCommitAll(). + * + * Drawing commands consist of two variable-length arrays: + * SVGA3dVertexDecl elements declare a set of vertex buffers to + * use while rendering, and SVGA3dPrimitiveRange elements specify + * groups of primitives each with an optional index buffer. + * + * The decls and ranges arrays are initialized to zero. + * + * Results: + * None. + * + * Side effects: + * May write to attached render target surfaces. + * + *---------------------------------------------------------------------- + */ + +enum pipe_error +SVGA3D_BeginDrawPrimitives(struct svga_winsys_context *swc, + SVGA3dVertexDecl **decls, // OUT + uint32 numVertexDecls, // IN + SVGA3dPrimitiveRange **ranges, // OUT + uint32 numRanges) // IN +{ + SVGA3dCmdDrawPrimitives *cmd; + SVGA3dVertexDecl *declArray; + SVGA3dPrimitiveRange *rangeArray; + uint32 declSize = sizeof **decls * numVertexDecls; + uint32 rangeSize = sizeof **ranges * numRanges; + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_DRAW_PRIMITIVES, + sizeof *cmd + declSize + rangeSize, + numVertexDecls + numRanges); + if(!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + cmd->cid = swc->cid; + cmd->numVertexDecls = numVertexDecls; + cmd->numRanges = numRanges; + + declArray = (SVGA3dVertexDecl*) &cmd[1]; + rangeArray = (SVGA3dPrimitiveRange*) &declArray[numVertexDecls]; + + memset(declArray, 0, declSize); + memset(rangeArray, 0, rangeSize); + + *decls = declArray; + *ranges = rangeArray; + + return PIPE_OK; +} + + +/* + *---------------------------------------------------------------------- + * + * SVGA3D_BeginSurfaceCopy -- + * + * Begin a SURFACE_COPY command. This reserves space for it in + * the FIFO, and returns a pointer to the command's arrays. This + * function must be paired with SVGA_FIFOCommitAll(). + * + * The box array is initialized with zeroes. + * + * Results: + * None. + * + * Side effects: + * Asynchronously copies a list of boxes from surface to surface. + * + *---------------------------------------------------------------------- + */ + +enum pipe_error +SVGA3D_BeginSurfaceCopy(struct svga_winsys_context *swc, + struct pipe_surface *src, // IN + struct pipe_surface *dest, // IN + SVGA3dCopyBox **boxes, // OUT + uint32 numBoxes) // IN +{ + SVGA3dCmdSurfaceCopy *cmd; + uint32 boxesSize = sizeof **boxes * numBoxes; + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_SURFACE_COPY, sizeof *cmd + boxesSize, + 2); + if(!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + surface_to_surfaceid(swc, src, &cmd->src, PIPE_BUFFER_USAGE_GPU_READ); + surface_to_surfaceid(swc, dest, &cmd->dest, PIPE_BUFFER_USAGE_GPU_WRITE); + *boxes = (SVGA3dCopyBox*) &cmd[1]; + + memset(*boxes, 0, boxesSize); + + return PIPE_OK; +} + + +/* + *---------------------------------------------------------------------- + * + * SVGA3D_SurfaceStretchBlt -- + * + * Issue a SURFACE_STRETCHBLT command: an asynchronous + * surface-to-surface blit, with scaling. + * + * Results: + * None. + * + * Side effects: + * Asynchronously copies one box from surface to surface. + * + *---------------------------------------------------------------------- + */ + +enum pipe_error +SVGA3D_SurfaceStretchBlt(struct svga_winsys_context *swc, + struct pipe_surface *src, // IN + struct pipe_surface *dest, // IN + SVGA3dBox *boxSrc, // IN + SVGA3dBox *boxDest, // IN + SVGA3dStretchBltMode mode) // IN +{ + SVGA3dCmdSurfaceStretchBlt *cmd; + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_SURFACE_STRETCHBLT, sizeof *cmd, + 2); + if(!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + surface_to_surfaceid(swc, src, &cmd->src, PIPE_BUFFER_USAGE_GPU_READ); + surface_to_surfaceid(swc, dest, &cmd->dest, PIPE_BUFFER_USAGE_GPU_WRITE); + cmd->boxSrc = *boxSrc; + cmd->boxDest = *boxDest; + cmd->mode = mode; + swc->commit(swc); + + return PIPE_OK; +} + + +/* + *---------------------------------------------------------------------- + * + * SVGA3D_SetViewport -- + * + * Set the current context's viewport rectangle. The viewport + * is clipped to the dimensions of the current render target, + * then all rendering is clipped to the viewport. + * + * Results: + * None. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +enum pipe_error +SVGA3D_SetViewport(struct svga_winsys_context *swc, + SVGA3dRect *rect) // IN +{ + SVGA3dCmdSetViewport *cmd; + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_SETVIEWPORT, sizeof *cmd, + 0); + if(!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + cmd->cid = swc->cid; + cmd->rect = *rect; + swc->commit(swc); + + return PIPE_OK; +} + + + + +/* + *---------------------------------------------------------------------- + * + * SVGA3D_SetScissorRect -- + * + * Set the current context's scissor rectangle. If scissor + * is enabled then all rendering is clipped to the scissor. + * + * Results: + * None. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +enum pipe_error +SVGA3D_SetScissorRect(struct svga_winsys_context *swc, + SVGA3dRect *rect) // IN +{ + SVGA3dCmdSetScissorRect *cmd; + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_SETSCISSORRECT, sizeof *cmd, + 0); + if(!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + cmd->cid = swc->cid; + cmd->rect = *rect; + swc->commit(swc); + + return PIPE_OK; +} + +/* + *---------------------------------------------------------------------- + * + * SVGA3D_SetClipPlane -- + * + * Set one of the current context's clip planes. If the clip + * plane is enabled then all 3d rendering is clipped to against + * the plane. + * + * Results: + * None. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +enum pipe_error SVGA3D_SetClipPlane(struct svga_winsys_context *swc, + uint32 index, const float *plane) +{ + SVGA3dCmdSetClipPlane *cmd; + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_SETCLIPPLANE, sizeof *cmd, + 0); + if(!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + cmd->cid = swc->cid; + cmd->index = index; + cmd->plane[0] = plane[0]; + cmd->plane[1] = plane[1]; + cmd->plane[2] = plane[2]; + cmd->plane[3] = plane[3]; + swc->commit(swc); + + return PIPE_OK; +} + +/* + *---------------------------------------------------------------------- + * + * SVGA3D_SetZRange -- + * + * Set the range of the depth buffer to use. 'min' and 'max' + * are values between 0.0 and 1.0. + * + * Results: + * None. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +enum pipe_error +SVGA3D_SetZRange(struct svga_winsys_context *swc, + float zMin, // IN + float zMax) // IN +{ + SVGA3dCmdSetZRange *cmd; + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_SETZRANGE, sizeof *cmd, + 0); + if(!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + cmd->cid = swc->cid; + cmd->zRange.min = zMin; + cmd->zRange.max = zMax; + swc->commit(swc); + + return PIPE_OK; +} + + +/* + *---------------------------------------------------------------------- + * + * SVGA3D_BeginSetTextureState -- + * + * Begin a SETTEXTURESTATE command. This reserves space for it in + * the FIFO, and returns a pointer to the command's texture state + * array. This function must be paired with SVGA_FIFOCommitAll(). + * + * This command sets rendering state which is per-texture-unit. + * + * XXX: Individual texture states need documentation. However, + * they are very similar to the texture states defined by + * Direct3D. The D3D documentation is a good starting point + * for understanding SVGA3D texture states. + * + * Results: + * None. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +enum pipe_error +SVGA3D_BeginSetTextureState(struct svga_winsys_context *swc, + SVGA3dTextureState **states, // OUT + uint32 numStates) // IN +{ + SVGA3dCmdSetTextureState *cmd; + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_SETTEXTURESTATE, + sizeof *cmd + sizeof **states * numStates, + numStates); + if(!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + cmd->cid = swc->cid; + *states = (SVGA3dTextureState*) &cmd[1]; + + return PIPE_OK; +} + + +/* + *---------------------------------------------------------------------- + * + * SVGA3D_BeginSetRenderState -- + * + * Begin a SETRENDERSTATE command. This reserves space for it in + * the FIFO, and returns a pointer to the command's texture state + * array. This function must be paired with SVGA_FIFOCommitAll(). + * + * This command sets rendering state which is global to the context. + * + * XXX: Individual render states need documentation. However, + * they are very similar to the render states defined by + * Direct3D. The D3D documentation is a good starting point + * for understanding SVGA3D render states. + * + * Results: + * None. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +enum pipe_error +SVGA3D_BeginSetRenderState(struct svga_winsys_context *swc, + SVGA3dRenderState **states, // OUT + uint32 numStates) // IN +{ + SVGA3dCmdSetRenderState *cmd; + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_SETRENDERSTATE, + sizeof *cmd + sizeof **states * numStates, + 0); + if(!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + cmd->cid = swc->cid; + *states = (SVGA3dRenderState*) &cmd[1]; + + return PIPE_OK; +} + + +/* + *---------------------------------------------------------------------- + * + * SVGA3D_BeginQuery-- + * + * Issues a SVGA_3D_CMD_BEGIN_QUERY command. + * + * Results: + * None. + * + * Side effects: + * Commits space in the FIFO memory. + * + *---------------------------------------------------------------------- + */ + +enum pipe_error +SVGA3D_BeginQuery(struct svga_winsys_context *swc, + SVGA3dQueryType type) // IN +{ + SVGA3dCmdBeginQuery *cmd; + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_BEGIN_QUERY, + sizeof *cmd, + 0); + if(!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + cmd->cid = swc->cid; + cmd->type = type; + + swc->commit(swc); + + return PIPE_OK; +} + + +/* + *---------------------------------------------------------------------- + * + * SVGA3D_EndQuery-- + * + * Issues a SVGA_3D_CMD_END_QUERY command. + * + * Results: + * None. + * + * Side effects: + * Commits space in the FIFO memory. + * + *---------------------------------------------------------------------- + */ + +enum pipe_error +SVGA3D_EndQuery(struct svga_winsys_context *swc, + SVGA3dQueryType type, // IN + struct svga_winsys_buffer *buffer) // IN/OUT +{ + SVGA3dCmdEndQuery *cmd; + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_END_QUERY, + sizeof *cmd, + 1); + if(!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + cmd->cid = swc->cid; + cmd->type = type; + + swc->region_relocation(swc, &cmd->guestResult, buffer, 0, + PIPE_BUFFER_USAGE_GPU_WRITE); + + swc->commit(swc); + + return PIPE_OK; +} + + +/* + *---------------------------------------------------------------------- + * + * SVGA3D_WaitForQuery-- + * + * Issues a SVGA_3D_CMD_WAIT_FOR_QUERY command. This reserves space + * for it in the FIFO. This doesn't actually wait for the query to + * finish but instead tells the host to start a wait at the driver + * level. The caller can wait on the status variable in the + * guestPtr memory or send an insert fence instruction after this + * command and wait on the fence. + * + * Results: + * None. + * + * Side effects: + * Commits space in the FIFO memory. + * + *---------------------------------------------------------------------- + */ + +enum pipe_error +SVGA3D_WaitForQuery(struct svga_winsys_context *swc, + SVGA3dQueryType type, // IN + struct svga_winsys_buffer *buffer) // IN/OUT +{ + SVGA3dCmdWaitForQuery *cmd; + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_WAIT_FOR_QUERY, + sizeof *cmd, + 1); + if(!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + cmd->cid = swc->cid; + cmd->type = type; + + swc->region_relocation(swc, &cmd->guestResult, buffer, 0, + PIPE_BUFFER_USAGE_GPU_WRITE); + + swc->commit(swc); + + return PIPE_OK; +} diff --git a/src/gallium/drivers/svga/svga_cmd.h b/src/gallium/drivers/svga/svga_cmd.h new file mode 100644 index 0000000000..8041054769 --- /dev/null +++ b/src/gallium/drivers/svga/svga_cmd.h @@ -0,0 +1,235 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +/* + * svga_cmd.h -- + * + * Command construction utility for the SVGA3D protocol used by + * the VMware SVGA device, based on the svgautil library. + */ + +#ifndef __SVGA3D_H__ +#define __SVGA3D_H__ + + +#include "svga_types.h" +#include "svga_reg.h" +#include "svga3d_reg.h" + +#include "pipe/p_defines.h" + + +struct pipe_buffer; +struct pipe_surface; +struct svga_transfer; +struct svga_winsys_context; +struct svga_winsys_buffer; +struct svga_winsys_surface; + + +/* + * SVGA Device Interoperability + */ + +void * +SVGA3D_FIFOReserve(struct svga_winsys_context *swc, uint32 cmd, uint32 cmdSize, uint32 nr_relocs); + +void +SVGA_FIFOCommitAll(struct svga_winsys_context *swc); + + +/* + * Context Management + */ + +enum pipe_error +SVGA3D_DefineContext(struct svga_winsys_context *swc); + +enum pipe_error +SVGA3D_DestroyContext(struct svga_winsys_context *swc); + + +/* + * Surface Management + */ + +enum pipe_error +SVGA3D_BeginDefineSurface(struct svga_winsys_context *swc, + struct svga_winsys_surface *sid, + SVGA3dSurfaceFlags flags, + SVGA3dSurfaceFormat format, + SVGA3dSurfaceFace **faces, + SVGA3dSize **mipSizes, + uint32 numMipSizes); +enum pipe_error +SVGA3D_DefineSurface2D(struct svga_winsys_context *swc, + struct svga_winsys_surface *sid, + uint32 width, + uint32 height, + SVGA3dSurfaceFormat format); +enum pipe_error +SVGA3D_DestroySurface(struct svga_winsys_context *swc, + struct svga_winsys_surface *sid); + + +/* + * Surface Operations + */ + +enum pipe_error +SVGA3D_SurfaceDMA(struct svga_winsys_context *swc, + struct svga_transfer *st, + SVGA3dTransferType transfer, + const SVGA3dCopyBox *boxes, + uint32 numBoxes); + +enum pipe_error +SVGA3D_BufferDMA(struct svga_winsys_context *swc, + struct svga_winsys_buffer *guest, + struct svga_winsys_surface *host, + SVGA3dTransferType transfer, + uint32 size, + uint32 offset, + SVGA3dSurfaceDMAFlags flags); + +/* + * Drawing Operations + */ + + +enum pipe_error +SVGA3D_BeginClear(struct svga_winsys_context *swc, + SVGA3dClearFlag flags, + uint32 color, float depth, uint32 stencil, + SVGA3dRect **rects, uint32 numRects); + +enum pipe_error +SVGA3D_ClearRect(struct svga_winsys_context *swc, + SVGA3dClearFlag flags, uint32 color, float depth, + uint32 stencil, uint32 x, uint32 y, uint32 w, uint32 h); + +enum pipe_error +SVGA3D_BeginDrawPrimitives(struct svga_winsys_context *swc, + SVGA3dVertexDecl **decls, + uint32 numVertexDecls, + SVGA3dPrimitiveRange **ranges, + uint32 numRanges); + +/* + * Blits + */ + +enum pipe_error +SVGA3D_BeginSurfaceCopy(struct svga_winsys_context *swc, + struct pipe_surface *src, + struct pipe_surface *dest, + SVGA3dCopyBox **boxes, uint32 numBoxes); + + +enum pipe_error +SVGA3D_SurfaceStretchBlt(struct svga_winsys_context *swc, + struct pipe_surface *src, + struct pipe_surface *dest, + SVGA3dBox *boxSrc, SVGA3dBox *boxDest, + SVGA3dStretchBltMode mode); + +/* + * Shared FFP/Shader Render State + */ + +enum pipe_error +SVGA3D_SetRenderTarget(struct svga_winsys_context *swc, + SVGA3dRenderTargetType type, + struct pipe_surface *surface); + +enum pipe_error +SVGA3D_SetZRange(struct svga_winsys_context *swc, + float zMin, float zMax); + +enum pipe_error +SVGA3D_SetViewport(struct svga_winsys_context *swc, + SVGA3dRect *rect); + +enum pipe_error +SVGA3D_SetScissorRect(struct svga_winsys_context *swc, + SVGA3dRect *rect); + +enum pipe_error +SVGA3D_SetClipPlane(struct svga_winsys_context *swc, + uint32 index, const float *plane); + +enum pipe_error +SVGA3D_BeginSetTextureState(struct svga_winsys_context *swc, + SVGA3dTextureState **states, + uint32 numStates); + +enum pipe_error +SVGA3D_BeginSetRenderState(struct svga_winsys_context *swc, + SVGA3dRenderState **states, + uint32 numStates); + + +/* + * Shaders + */ + +enum pipe_error +SVGA3D_DefineShader(struct svga_winsys_context *swc, + uint32 shid, SVGA3dShaderType type, + const uint32 *bytecode, uint32 bytecodeLen); + +enum pipe_error +SVGA3D_DestroyShader(struct svga_winsys_context *swc, + uint32 shid, SVGA3dShaderType type); + +enum pipe_error +SVGA3D_SetShaderConst(struct svga_winsys_context *swc, + uint32 reg, SVGA3dShaderType type, + SVGA3dShaderConstType ctype, const void *value); + +enum pipe_error +SVGA3D_SetShader(struct svga_winsys_context *swc, + SVGA3dShaderType type, uint32 shid); + + +/* + * Queries + */ + +enum pipe_error +SVGA3D_BeginQuery(struct svga_winsys_context *swc, + SVGA3dQueryType type); + +enum pipe_error +SVGA3D_EndQuery(struct svga_winsys_context *swc, + SVGA3dQueryType type, + struct svga_winsys_buffer *buffer); + +enum pipe_error +SVGA3D_WaitForQuery(struct svga_winsys_context *swc, + SVGA3dQueryType type, + struct svga_winsys_buffer *buffer); + +#endif /* __SVGA3D_H__ */ diff --git a/src/gallium/drivers/svga/svga_context.c b/src/gallium/drivers/svga/svga_context.c new file mode 100644 index 0000000000..c3de12b4a3 --- /dev/null +++ b/src/gallium/drivers/svga/svga_context.c @@ -0,0 +1,271 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "svga_cmd.h" + +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_screen.h" +#include "util/u_memory.h" +#include "util/u_upload_mgr.h" + +#include "svga_context.h" +#include "svga_screen.h" +#include "svga_screen_texture.h" +#include "svga_screen_buffer.h" +#include "svga_winsys.h" +#include "svga_swtnl.h" +#include "svga_draw.h" +#include "svga_debug.h" +#include "svga_state.h" + + +static void svga_destroy( struct pipe_context *pipe ) +{ + struct svga_context *svga = svga_context( pipe ); + unsigned shader; + + svga_cleanup_framebuffer( svga ); + svga_cleanup_tss_binding( svga ); + + svga_hwtnl_destroy( svga->hwtnl ); + + svga_cleanup_vertex_state(svga); + + svga->swc->destroy(svga->swc); + + svga_destroy_swtnl( svga ); + + u_upload_destroy( svga->upload_vb ); + u_upload_destroy( svga->upload_ib ); + + for(shader = 0; shader < PIPE_SHADER_TYPES; ++shader) + pipe_buffer_reference( &svga->curr.cb[shader], NULL ); + + FREE( svga ); +} + +static unsigned int +svga_is_texture_referenced( struct pipe_context *pipe, + struct pipe_texture *texture, + unsigned face, unsigned level) +{ + struct svga_texture *tex = svga_texture(texture); + struct svga_screen *ss = svga_screen(pipe->screen); + + /** + * The screen does not cache texture writes. + */ + + if (!tex->handle || ss->sws->surface_is_flushed(ss->sws, tex->handle)) + return PIPE_UNREFERENCED; + + /** + * sws->surface_is_flushed() does not distinguish between read references + * and write references. So assume a reference is both. + */ + + return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; +} + +static unsigned int +svga_is_buffer_referenced( struct pipe_context *pipe, + struct pipe_buffer *buf) + +{ + struct svga_screen *ss = svga_screen(pipe->screen); + struct svga_buffer *sbuf = svga_buffer(buf); + + /** + * XXX: Check this. + * The screen may cache buffer writes, but when we map, we map out + * of those cached writes, so we don't need to set a + * PIPE_REFERENCED_FOR_WRITE flag for cached buffers. + */ + + if (!sbuf->handle || ss->sws->surface_is_flushed(ss->sws, sbuf->handle)) + return PIPE_UNREFERENCED; + + /** + * sws->surface_is_flushed() does not distinguish between read references + * and write references. So assume a reference is both, + * however, we make an exception for index- and vertex buffers, to avoid + * a flush in st_bufferobj_get_subdata, during display list replay. + */ + + if (sbuf->base.usage & (PIPE_BUFFER_USAGE_VERTEX | PIPE_BUFFER_USAGE_INDEX)) + return PIPE_REFERENCED_FOR_READ; + + return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; +} + + +struct pipe_context *svga_context_create( struct pipe_screen *screen ) +{ + struct svga_screen *svgascreen = svga_screen(screen); + struct svga_context *svga = NULL; + enum pipe_error ret; + + svga = CALLOC_STRUCT(svga_context); + if (svga == NULL) + goto error1; + + svga->pipe.winsys = screen->winsys; + svga->pipe.screen = screen; + svga->pipe.destroy = svga_destroy; + svga->pipe.clear = svga_clear; + + svga->pipe.is_texture_referenced = svga_is_texture_referenced; + svga->pipe.is_buffer_referenced = svga_is_buffer_referenced; + + svga->swc = svgascreen->sws->context_create(svgascreen->sws); + if(!svga->swc) + goto error2; + + svga_init_blend_functions(svga); + svga_init_blit_functions(svga); + svga_init_depth_stencil_functions(svga); + svga_init_draw_functions(svga); + svga_init_flush_functions(svga); + svga_init_misc_functions(svga); + svga_init_rasterizer_functions(svga); + svga_init_sampler_functions(svga); + svga_init_fs_functions(svga); + svga_init_vs_functions(svga); + svga_init_vertex_functions(svga); + svga_init_constbuffer_functions(svga); + svga_init_query_functions(svga); + + /* debug */ + svga->debug.no_swtnl = debug_get_bool_option("SVGA_NO_SWTNL", FALSE); + svga->debug.force_swtnl = debug_get_bool_option("SVGA_FORCE_SWTNL", FALSE); + svga->debug.use_min_mipmap = debug_get_bool_option("SVGA_USE_MIN_MIPMAP", FALSE); + svga->debug.disable_shader = debug_get_num_option("SVGA_DISABLE_SHADER", ~0); + + if (!svga_init_swtnl(svga)) + goto error3; + + svga->upload_ib = u_upload_create( svga->pipe.screen, + 32 * 1024, + 16, + PIPE_BUFFER_USAGE_INDEX ); + if (svga->upload_ib == NULL) + goto error4; + + svga->upload_vb = u_upload_create( svga->pipe.screen, + 128 * 1024, + 16, + PIPE_BUFFER_USAGE_VERTEX ); + if (svga->upload_vb == NULL) + goto error5; + + svga->hwtnl = svga_hwtnl_create( svga, + svga->upload_ib, + svga->swc ); + if (svga->hwtnl == NULL) + goto error6; + + + ret = svga_emit_initial_state( svga ); + if (ret) + goto error7; + + /* Avoid shortcircuiting state with initial value of zero. + */ + memset(&svga->state.hw_clear, 0xcd, sizeof(svga->state.hw_clear)); + memset(&svga->state.hw_clear.framebuffer, 0x0, + sizeof(svga->state.hw_clear.framebuffer)); + + memset(&svga->state.hw_draw, 0xcd, sizeof(svga->state.hw_draw)); + memset(&svga->state.hw_draw.views, 0x0, sizeof(svga->state.hw_draw.views)); + svga->state.hw_draw.num_views = 0; + + svga->dirty = ~0; + svga->state.white_fs_id = SVGA3D_INVALID_ID; + + LIST_INITHEAD(&svga->dirty_buffers); + + return &svga->pipe; + +error7: + svga_hwtnl_destroy( svga->hwtnl ); +error6: + u_upload_destroy( svga->upload_vb ); +error5: + u_upload_destroy( svga->upload_ib ); +error4: + svga_destroy_swtnl(svga); +error3: + svga->swc->destroy(svga->swc); +error2: + FREE(svga); +error1: + return NULL; +} + + +void svga_context_flush( struct svga_context *svga, + struct pipe_fence_handle **pfence ) +{ + struct svga_screen *svgascreen = svga_screen(svga->pipe.screen); + + svga->curr.nr_fbs = 0; + + /* Unmap upload manager buffers: + */ + u_upload_flush(svga->upload_vb); + u_upload_flush(svga->upload_ib); + + /* Flush screen, to ensure that texture dma uploads are processed + * before submitting commands. + */ + svga_screen_flush(svgascreen, NULL); + + svga_context_flush_buffers(svga); + + /* Flush pending commands to hardware: + */ + svga->swc->flush(svga->swc, pfence); + + if (SVGA_DEBUG & DEBUG_SYNC) { + if (pfence && *pfence) + svga->pipe.screen->fence_finish( svga->pipe.screen, *pfence, 0); + } +} + + +void svga_hwtnl_flush_retry( struct svga_context *svga ) +{ + enum pipe_error ret = PIPE_OK; + + ret = svga_hwtnl_flush( svga->hwtnl ); + if (ret == PIPE_ERROR_OUT_OF_MEMORY) { + svga_context_flush( svga, NULL ); + ret = svga_hwtnl_flush( svga->hwtnl ); + } + + assert(ret == 0); +} + diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h new file mode 100644 index 0000000000..e650a251d1 --- /dev/null +++ b/src/gallium/drivers/svga/svga_context.h @@ -0,0 +1,448 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#ifndef SVGA_CONTEXT_H +#define SVGA_CONTEXT_H + + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "util/u_double_list.h" + +#include "tgsi/tgsi_scan.h" + + +#define SVGA_TEX_UNITS 8 + +struct draw_vertex_shader; +struct svga_shader_result; +struct SVGACmdMemory; +struct u_upload_mgr; + + +struct svga_shader +{ + const struct tgsi_token *tokens; + + struct tgsi_shader_info info; + + struct svga_shader_result *results; + + unsigned id; + + boolean use_sm30; +}; + +struct svga_fragment_shader +{ + struct svga_shader base; +}; + +struct svga_vertex_shader +{ + struct svga_shader base; + + struct draw_vertex_shader *draw_shader; +}; + + +struct svga_cache_context; +struct svga_tracked_state; + +struct svga_blend_state { + + boolean need_white_fragments; + + /* Should be per-render-target: + */ + struct { + uint8_t writemask; + + boolean blend_enable; + uint8_t srcblend; + uint8_t dstblend; + uint8_t blendeq; + + boolean separate_alpha_blend_enable; + uint8_t srcblend_alpha; + uint8_t dstblend_alpha; + uint8_t blendeq_alpha; + + } rt[1]; +}; + +struct svga_depth_stencil_state { + unsigned zfunc:8; + unsigned zenable:1; + unsigned zwriteenable:1; + + unsigned alphatestenable:1; + unsigned alphafunc:8; + + struct { + unsigned enabled:1; + unsigned func:8; + unsigned fail:8; + unsigned zfail:8; + unsigned pass:8; + } stencil[2]; + + /* SVGA3D has one ref/mask/writemask triple shared between front & + * back face stencil. We really need two: + */ + unsigned stencil_ref:8; + unsigned stencil_mask:8; + unsigned stencil_writemask:8; + + float alpharef; +}; + +#define SVGA_UNFILLED_DISABLE 0 +#define SVGA_UNFILLED_LINE 1 +#define SVGA_UNFILLED_POINT 2 + +#define SVGA_PIPELINE_FLAG_POINTS (1<<PIPE_PRIM_POINTS) +#define SVGA_PIPELINE_FLAG_LINES (1<<PIPE_PRIM_LINES) +#define SVGA_PIPELINE_FLAG_TRIS (1<<PIPE_PRIM_TRIANGLES) + +struct svga_rasterizer_state { + struct pipe_rasterizer_state templ; /* needed for draw module */ + + unsigned shademode:8; + unsigned cullmode:8; + unsigned scissortestenable:1; + unsigned multisampleantialias:1; + unsigned antialiasedlineenable:1; + unsigned lastpixel:1; + + unsigned linepattern; + + float slopescaledepthbias; + float depthbias; + float pointsize; + float pointsize_min; + float pointsize_max; + + unsigned hw_unfilled:16; /* PIPE_POLYGON_MODE_x */ + unsigned need_pipeline:16; /* which prims do we need help for? */ +}; + +struct svga_sampler_state { + unsigned mipfilter; + unsigned magfilter; + unsigned minfilter; + unsigned aniso_level; + float lod_bias; + unsigned addressu; + unsigned addressv; + unsigned addressw; + unsigned bordercolor; + unsigned normalized_coords:1; + unsigned compare_mode:1; + unsigned compare_func:3; + + unsigned min_lod; + unsigned view_min_lod; + unsigned view_max_lod; +}; + +/* Use to calculate differences between state emitted to hardware and + * current driver-calculated state. + */ +struct svga_state +{ + const struct svga_blend_state *blend; + const struct svga_depth_stencil_state *depth; + const struct svga_rasterizer_state *rast; + const struct svga_sampler_state *sampler[PIPE_MAX_SAMPLERS]; + + struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; /* or texture ID's? */ + struct svga_fragment_shader *fs; + struct svga_vertex_shader *vs; + + struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS]; + struct pipe_vertex_element ve[PIPE_MAX_ATTRIBS]; + struct pipe_buffer *cb[PIPE_SHADER_TYPES]; + + struct pipe_framebuffer_state framebuffer; + float depthscale; + + /* Hack to limit the number of different render targets between + * flushes. Helps avoid blowing out our surface cache in EXA. + */ + int nr_fbs; + + struct pipe_poly_stipple poly_stipple; + struct pipe_scissor_state scissor; + struct pipe_blend_color blend_color; + struct pipe_clip_state clip; + struct pipe_viewport_state viewport; + + const unsigned *edgeflags; + + unsigned num_samplers; + unsigned num_textures; + unsigned num_vertex_elements; + unsigned num_vertex_buffers; + unsigned reduced_prim; + + struct { + unsigned flag_1d; + unsigned flag_srgb; + } tex_flags; + + boolean any_user_vertex_buffers; + + unsigned zero_stride_vertex_elements; + unsigned num_zero_stride_vertex_elements; + /* ### maybe dynamically allocate this */ + float zero_stride_constants[PIPE_MAX_ATTRIBS*4]; +}; + +#define RS_MAX 97 +#define TS_MAX 30 +#define CB_MAX 256 + +struct svga_prescale { + float translate[4]; + float scale[4]; + boolean enabled; +}; + + +/* Updated by calling svga_update_state( SVGA_STATE_HW_VIEWPORT ) + */ +struct svga_hw_clear_state +{ + struct { + unsigned x,y,w,h; + } viewport; + + struct { + float zmin, zmax; + } depthrange; + + struct pipe_framebuffer_state framebuffer; + struct svga_prescale prescale; +}; + +struct svga_hw_view_state +{ + struct pipe_texture *texture; + struct svga_sampler_view *v; + unsigned min_lod; + unsigned max_lod; + int dirty; +}; + +/* Updated by calling svga_update_state( SVGA_STATE_HW_DRAW ) + */ +struct svga_hw_draw_state +{ + unsigned rs[RS_MAX]; + unsigned ts[16][TS_MAX]; + float cb[PIPE_SHADER_TYPES][CB_MAX][4]; + + unsigned shader_id[PIPE_SHADER_TYPES]; + + struct svga_shader_result *fs; + struct svga_shader_result *vs; + struct svga_hw_view_state views[PIPE_MAX_SAMPLERS]; + + unsigned num_views; +}; + + +/* Updated by calling svga_update_state( SVGA_STATE_NEED_SWTNL ) + */ +struct svga_sw_state +{ + unsigned ve_format[PIPE_MAX_ATTRIBS]; /* NEW_VELEMENT */ + + /* which parts we need */ + boolean need_swvfetch; + boolean need_pipeline; + boolean need_swtnl; +}; + + +/* Queue some state updates (like rss) and submit them to hardware in + * a single packet. + */ +struct svga_hw_queue; + +struct svga_query; + +struct svga_context +{ + struct pipe_context pipe; + struct svga_winsys_context *swc; + + struct { + boolean no_swtnl; + boolean force_swtnl; + boolean use_min_mipmap; + + /* incremented for each shader */ + unsigned shader_id; + + unsigned disable_shader; + } debug; + + struct { + struct draw_context *draw; + struct vbuf_render *backend; + unsigned hw_prim; + boolean new_vbuf; + boolean new_vdecl; + } swtnl; + + struct { + unsigned dirty[4]; + + unsigned texture_timestamp; + unsigned next_fs_id; + unsigned next_vs_id; + + /* Internally generated shaders: + */ + unsigned white_fs_id; + + /* + */ + struct svga_sw_state sw; + struct svga_hw_draw_state hw_draw; + struct svga_hw_clear_state hw_clear; + } state; + + struct svga_state curr; /* state from the state tracker */ + unsigned dirty; /* statechanges since last update_state() */ + + struct u_upload_mgr *upload_ib; + struct u_upload_mgr *upload_vb; + struct svga_hwtnl *hwtnl; + + /** The occlusion query currently in progress */ + struct svga_query *sq; + + /** List of buffers with queued transfers */ + struct list_head dirty_buffers; +}; + +/* A flag for each state_tracker state object: + */ +#define SVGA_NEW_BLEND 0x1 +#define SVGA_NEW_DEPTH_STENCIL 0x2 +#define SVGA_NEW_RAST 0x4 +#define SVGA_NEW_SAMPLER 0x8 +#define SVGA_NEW_TEXTURE 0x10 +#define SVGA_NEW_VBUFFER 0x20 +#define SVGA_NEW_VELEMENT 0x40 +#define SVGA_NEW_FS 0x80 +#define SVGA_NEW_VS 0x100 +#define SVGA_NEW_FS_CONST_BUFFER 0x200 +#define SVGA_NEW_VS_CONST_BUFFER 0x400 +#define SVGA_NEW_FRAME_BUFFER 0x800 +#define SVGA_NEW_STIPPLE 0x1000 +#define SVGA_NEW_SCISSOR 0x2000 +#define SVGA_NEW_BLEND_COLOR 0x5000 +#define SVGA_NEW_CLIP 0x8000 +#define SVGA_NEW_VIEWPORT 0x10000 +#define SVGA_NEW_PRESCALE 0x20000 +#define SVGA_NEW_REDUCED_PRIMITIVE 0x40000 +#define SVGA_NEW_TEXTURE_BINDING 0x80000 +#define SVGA_NEW_NEED_PIPELINE 0x100000 +#define SVGA_NEW_NEED_SWVFETCH 0x200000 +#define SVGA_NEW_NEED_SWTNL 0x400000 +#define SVGA_NEW_FS_RESULT 0x800000 +#define SVGA_NEW_VS_RESULT 0x1000000 +#define SVGA_NEW_EDGEFLAGS 0x2000000 +#define SVGA_NEW_ZERO_STRIDE 0x4000000 +#define SVGA_NEW_TEXTURE_FLAGS 0x8000000 + + + + + +/*********************************************************************** + * svga_clear.c: + */ +void svga_clear(struct pipe_context *pipe, + unsigned buffers, + const float *rgba, + double depth, + unsigned stencil); + + +/*********************************************************************** + * svga_screen_texture.c: + */ +void svga_mark_surfaces_dirty(struct svga_context *svga); + + + + +void svga_init_state_functions( struct svga_context *svga ); +void svga_init_flush_functions( struct svga_context *svga ); +void svga_init_string_functions( struct svga_context *svga ); +void svga_init_blit_functions(struct svga_context *svga); + +void svga_init_blend_functions( struct svga_context *svga ); +void svga_init_depth_stencil_functions( struct svga_context *svga ); +void svga_init_misc_functions( struct svga_context *svga ); +void svga_init_rasterizer_functions( struct svga_context *svga ); +void svga_init_sampler_functions( struct svga_context *svga ); +void svga_init_fs_functions( struct svga_context *svga ); +void svga_init_vs_functions( struct svga_context *svga ); +void svga_init_vertex_functions( struct svga_context *svga ); +void svga_init_constbuffer_functions( struct svga_context *svga ); +void svga_init_draw_functions( struct svga_context *svga ); +void svga_init_query_functions( struct svga_context *svga ); + +void svga_cleanup_vertex_state( struct svga_context *svga ); +void svga_cleanup_tss_binding( struct svga_context *svga ); +void svga_cleanup_framebuffer( struct svga_context *svga ); + +void svga_context_flush( struct svga_context *svga, + struct pipe_fence_handle **pfence ); + +void svga_hwtnl_flush_retry( struct svga_context *svga ); + + +/*********************************************************************** + * Inline conversion functions. These are better-typed than the + * macros used previously: + */ +static INLINE struct svga_context * +svga_context( struct pipe_context *pipe ) +{ + return (struct svga_context *)pipe; +} + + + +#endif diff --git a/src/gallium/drivers/svga/svga_debug.h b/src/gallium/drivers/svga/svga_debug.h new file mode 100644 index 0000000000..3a3fcd8fae --- /dev/null +++ b/src/gallium/drivers/svga/svga_debug.h @@ -0,0 +1,75 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#ifndef SVGA_DEBUG_H +#define SVGA_DEBUG_H + +#include "pipe/p_compiler.h" +#include "util/u_debug.h" + +#define DEBUG_DMA 0x1 +#define DEBUG_TGSI 0x4 +#define DEBUG_PIPE 0x8 +#define DEBUG_STATE 0x10 +#define DEBUG_SCREEN 0x20 +#define DEBUG_TEX 0x40 +#define DEBUG_SWTNL 0x80 +#define DEBUG_CONSTS 0x100 +#define DEBUG_VIEWPORT 0x200 +#define DEBUG_VIEWS 0x400 +#define DEBUG_PERF 0x800 /* print something when we hit any slow path operation */ +#define DEBUG_FLUSH 0x1000 /* flush after every draw */ +#define DEBUG_SYNC 0x2000 /* sync after every flush */ +#define DEBUG_QUERY 0x4000 +#define DEBUG_CACHE 0x8000 + +#ifdef DEBUG +extern int SVGA_DEBUG; +#define DBSTR(x) x +#else +#define SVGA_DEBUG 0 +#define DBSTR(x) "" +#endif + +static INLINE void +SVGA_DBG( unsigned flag, const char *fmt, ... ) +{ +#ifdef DEBUG + if (SVGA_DEBUG & flag) + { + va_list args; + + va_start( args, fmt ); + debug_vprintf( fmt, args ); + va_end( args ); + } +#else + (void)flag; + (void)fmt; +#endif +} + + +#endif diff --git a/src/gallium/drivers/svga/svga_draw.c b/src/gallium/drivers/svga/svga_draw.c new file mode 100644 index 0000000000..8db40d0fd5 --- /dev/null +++ b/src/gallium/drivers/svga/svga_draw.c @@ -0,0 +1,377 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "pipe/p_compiler.h" +#include "pipe/p_inlines.h" +#include "pipe/p_defines.h" +#include "util/u_memory.h" +#include "util/u_math.h" + +#include "svga_context.h" +#include "svga_draw.h" +#include "svga_draw_private.h" +#include "svga_debug.h" +#include "svga_screen.h" +#include "svga_screen_buffer.h" +#include "svga_screen_texture.h" +#include "svga_winsys.h" +#include "svga_cmd.h" + + +struct svga_hwtnl *svga_hwtnl_create( struct svga_context *svga, + struct u_upload_mgr *upload_ib, + struct svga_winsys_context *swc ) +{ + struct svga_hwtnl *hwtnl = CALLOC_STRUCT(svga_hwtnl); + if (hwtnl == NULL) + goto fail; + + hwtnl->svga = svga; + hwtnl->upload_ib = upload_ib; + + hwtnl->cmd.swc = swc; + + return hwtnl; + +fail: + return NULL; +} + +void svga_hwtnl_destroy( struct svga_hwtnl *hwtnl ) +{ + int i, j; + + for (i = 0; i < PIPE_PRIM_MAX; i++) { + for (j = 0; j < IDX_CACHE_MAX; j++) { + pipe_buffer_reference( &hwtnl->index_cache[i][j].buffer, + NULL ); + } + } + + for (i = 0; i < hwtnl->cmd.vdecl_count; i++) + pipe_buffer_reference(&hwtnl->cmd.vdecl_vb[i], NULL); + + for (i = 0; i < hwtnl->cmd.prim_count; i++) + pipe_buffer_reference(&hwtnl->cmd.prim_ib[i], NULL); + + + FREE(hwtnl); +} + + +void svga_hwtnl_set_flatshade( struct svga_hwtnl *hwtnl, + boolean flatshade, + boolean flatshade_first ) +{ + hwtnl->hw_pv = PV_FIRST; + hwtnl->api_pv = (flatshade && !flatshade_first) ? PV_LAST : PV_FIRST; +} + +void svga_hwtnl_set_unfilled( struct svga_hwtnl *hwtnl, + unsigned mode ) +{ + hwtnl->api_fillmode = mode; +} + +void svga_hwtnl_reset_vdecl( struct svga_hwtnl *hwtnl, + unsigned count ) +{ + unsigned i; + + assert(hwtnl->cmd.prim_count == 0); + + for (i = count; i < hwtnl->cmd.vdecl_count; i++) { + pipe_buffer_reference(&hwtnl->cmd.vdecl_vb[i], + NULL); + } + + hwtnl->cmd.vdecl_count = count; +} + + +void svga_hwtnl_vdecl( struct svga_hwtnl *hwtnl, + unsigned i, + const SVGA3dVertexDecl *decl, + struct pipe_buffer *vb) +{ + assert(hwtnl->cmd.prim_count == 0); + + assert( i < hwtnl->cmd.vdecl_count ); + + hwtnl->cmd.vdecl[i] = *decl; + + pipe_buffer_reference(&hwtnl->cmd.vdecl_vb[i], + vb); +} + + + +enum pipe_error +svga_hwtnl_flush( struct svga_hwtnl *hwtnl ) +{ + struct svga_winsys_context *swc = hwtnl->cmd.swc; + struct svga_context *svga = hwtnl->svga; + enum pipe_error ret; + + if (hwtnl->cmd.prim_count) { + struct svga_winsys_surface *vb_handle[SVGA3D_INPUTREG_MAX]; + struct svga_winsys_surface *ib_handle[QSZ]; + struct svga_winsys_surface *handle; + SVGA3dVertexDecl *vdecl; + SVGA3dPrimitiveRange *prim; + unsigned i; + + for (i = 0; i < hwtnl->cmd.vdecl_count; i++) { + handle = svga_buffer_handle(svga, hwtnl->cmd.vdecl_vb[i]); + if (handle == NULL) + return PIPE_ERROR_OUT_OF_MEMORY; + + vb_handle[i] = handle; + } + + for (i = 0; i < hwtnl->cmd.prim_count; i++) { + if (hwtnl->cmd.prim_ib[i]) { + handle = svga_buffer_handle(svga, hwtnl->cmd.prim_ib[i]); + if (handle == NULL) + return PIPE_ERROR_OUT_OF_MEMORY; + } + else + handle = NULL; + + ib_handle[i] = handle; + } + + SVGA_DBG(DEBUG_DMA, "draw to sid %p, %d prims\n", + svga_surface(svga->curr.framebuffer.cbufs[0])->handle, + hwtnl->cmd.prim_count); + + ret = SVGA3D_BeginDrawPrimitives(swc, + &vdecl, + hwtnl->cmd.vdecl_count, + &prim, + hwtnl->cmd.prim_count); + if (ret != PIPE_OK) + return ret; + + + memcpy( vdecl, + hwtnl->cmd.vdecl, + hwtnl->cmd.vdecl_count * sizeof hwtnl->cmd.vdecl[0]); + + for (i = 0; i < hwtnl->cmd.vdecl_count; i++) { + /* Given rangeHint is considered to be relative to indexBias, and + * indexBias varies per primitive, we cannot accurately supply an + * rangeHint when emitting more than one primitive per draw command. + */ + if (hwtnl->cmd.prim_count == 1) { + vdecl[i].rangeHint.first = hwtnl->cmd.min_index[0]; + vdecl[i].rangeHint.last = hwtnl->cmd.max_index[0] + 1; + } + else { + vdecl[i].rangeHint.first = 0; + vdecl[i].rangeHint.last = 0; + } + + swc->surface_relocation(swc, + &vdecl[i].array.surfaceId, + vb_handle[i], + PIPE_BUFFER_USAGE_GPU_READ); + } + + memcpy( prim, + hwtnl->cmd.prim, + hwtnl->cmd.prim_count * sizeof hwtnl->cmd.prim[0]); + + for (i = 0; i < hwtnl->cmd.prim_count; i++) { + swc->surface_relocation(swc, + &prim[i].indexArray.surfaceId, + ib_handle[i], + PIPE_BUFFER_USAGE_GPU_READ); + pipe_buffer_reference(&hwtnl->cmd.prim_ib[i], NULL); + } + + SVGA_FIFOCommitAll( swc ); + hwtnl->cmd.prim_count = 0; + } + + return PIPE_OK; +} + + + + + +/*********************************************************************** + * Internal functions: + */ + +enum pipe_error svga_hwtnl_prim( struct svga_hwtnl *hwtnl, + const SVGA3dPrimitiveRange *range, + unsigned min_index, + unsigned max_index, + struct pipe_buffer *ib ) +{ + int ret = PIPE_OK; + +#ifdef DEBUG + { + unsigned i; + for (i = 0; i < hwtnl->cmd.vdecl_count; i++) { + struct pipe_buffer *vb = hwtnl->cmd.vdecl_vb[i]; + unsigned size = vb ? vb->size : 0; + unsigned offset = hwtnl->cmd.vdecl[i].array.offset; + unsigned stride = hwtnl->cmd.vdecl[i].array.stride; + unsigned index_bias = range->indexBias; + unsigned width; + + assert(vb); + assert(size); + assert(offset < size); + assert(index_bias >= 0); + assert(min_index <= max_index); + assert(offset + index_bias*stride < size); + assert(offset + (index_bias + min_index)*stride < size); + + switch (hwtnl->cmd.vdecl[i].identity.type) { + case SVGA3D_DECLTYPE_FLOAT1: + width = 4; + break; + case SVGA3D_DECLTYPE_FLOAT2: + width = 4*2; + break; + case SVGA3D_DECLTYPE_FLOAT3: + width = 4*3; + break; + case SVGA3D_DECLTYPE_FLOAT4: + width = 4*4; + break; + case SVGA3D_DECLTYPE_D3DCOLOR: + width = 4; + break; + case SVGA3D_DECLTYPE_UBYTE4: + width = 1*4; + break; + case SVGA3D_DECLTYPE_SHORT2: + width = 2*2; + break; + case SVGA3D_DECLTYPE_SHORT4: + width = 2*4; + break; + case SVGA3D_DECLTYPE_UBYTE4N: + width = 1*4; + break; + case SVGA3D_DECLTYPE_SHORT2N: + width = 2*2; + break; + case SVGA3D_DECLTYPE_SHORT4N: + width = 2*4; + break; + case SVGA3D_DECLTYPE_USHORT2N: + width = 2*2; + break; + case SVGA3D_DECLTYPE_USHORT4N: + width = 2*4; + break; + case SVGA3D_DECLTYPE_UDEC3: + width = 4; + break; + case SVGA3D_DECLTYPE_DEC3N: + width = 4; + break; + case SVGA3D_DECLTYPE_FLOAT16_2: + width = 2*2; + break; + case SVGA3D_DECLTYPE_FLOAT16_4: + width = 2*4; + break; + default: + assert(0); + width = 0; + break; + } + + assert(!stride || width <= stride); + assert(offset + (index_bias + max_index)*stride + width <= size); + } + + assert(range->indexWidth == range->indexArray.stride); + + if(ib) { + unsigned size = ib->size; + unsigned offset = range->indexArray.offset; + unsigned stride = range->indexArray.stride; + unsigned count; + + assert(size); + assert(offset < size); + assert(stride); + + switch (range->primType) { + case SVGA3D_PRIMITIVE_POINTLIST: + count = range->primitiveCount; + break; + case SVGA3D_PRIMITIVE_LINELIST: + count = range->primitiveCount * 2; + break; + case SVGA3D_PRIMITIVE_LINESTRIP: + count = range->primitiveCount + 1; + break; + case SVGA3D_PRIMITIVE_TRIANGLELIST: + count = range->primitiveCount * 3; + break; + case SVGA3D_PRIMITIVE_TRIANGLESTRIP: + count = range->primitiveCount + 2; + break; + case SVGA3D_PRIMITIVE_TRIANGLEFAN: + count = range->primitiveCount + 2; + break; + default: + assert(0); + count = 0; + break; + } + + assert(offset + count*stride <= size); + } + } +#endif + + if (hwtnl->cmd.prim_count+1 >= QSZ) { + ret = svga_hwtnl_flush( hwtnl ); + if (ret != PIPE_OK) + return ret; + } + + /* min/max indices are relative to bias */ + hwtnl->cmd.min_index[hwtnl->cmd.prim_count] = min_index; + hwtnl->cmd.max_index[hwtnl->cmd.prim_count] = max_index; + + hwtnl->cmd.prim[hwtnl->cmd.prim_count] = *range; + + pipe_buffer_reference(&hwtnl->cmd.prim_ib[hwtnl->cmd.prim_count], ib); + hwtnl->cmd.prim_count++; + + return ret; +} diff --git a/src/gallium/drivers/svga/svga_draw.h b/src/gallium/drivers/svga/svga_draw.h new file mode 100644 index 0000000000..14553b17b5 --- /dev/null +++ b/src/gallium/drivers/svga/svga_draw.h @@ -0,0 +1,83 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#ifndef SVGA_DRAW_H +#define SVGA_DRAW_H + +#include "pipe/p_compiler.h" + +#include "svga_hw_reg.h" + +struct svga_hwtnl; +struct svga_winsys_context; +struct svga_screen; +struct svga_context; +struct pipe_buffer; +struct u_upload_mgr; + +struct svga_hwtnl *svga_hwtnl_create( struct svga_context *svga, + struct u_upload_mgr *upload_ib, + struct svga_winsys_context *swc ); + +void svga_hwtnl_destroy( struct svga_hwtnl *hwtnl ); + +void svga_hwtnl_set_flatshade( struct svga_hwtnl *hwtnl, + boolean flatshade, + boolean flatshade_first ); + +void svga_hwtnl_set_unfilled( struct svga_hwtnl *hwtnl, + unsigned mode ); + +void svga_hwtnl_vdecl( struct svga_hwtnl *hwtnl, + unsigned i, + const SVGA3dVertexDecl *decl, + struct pipe_buffer *vb); + +void svga_hwtnl_reset_vdecl( struct svga_hwtnl *hwtnl, + unsigned count ); + + +enum pipe_error +svga_hwtnl_draw_arrays( struct svga_hwtnl *hwtnl, + unsigned prim, + unsigned start, + unsigned count); + +enum pipe_error +svga_hwtnl_draw_range_elements( struct svga_hwtnl *hwtnl, + struct pipe_buffer *indexBuffer, + unsigned index_size, + unsigned min_index, + unsigned max_index, + unsigned prim, + unsigned start, + unsigned count, + unsigned bias ); + +enum pipe_error +svga_hwtnl_flush( struct svga_hwtnl *hwtnl ); + + +#endif /* SVGA_DRAW_H_ */ diff --git a/src/gallium/drivers/svga/svga_draw_arrays.c b/src/gallium/drivers/svga/svga_draw_arrays.c new file mode 100644 index 0000000000..75492dffca --- /dev/null +++ b/src/gallium/drivers/svga/svga_draw_arrays.c @@ -0,0 +1,297 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "svga_cmd.h" + +#include "pipe/p_inlines.h" +#include "util/u_prim.h" +#include "indices/u_indices.h" + +#include "svga_hw_reg.h" +#include "svga_draw.h" +#include "svga_draw_private.h" +#include "svga_context.h" + + +#define DBG 0 + + + + +static enum pipe_error generate_indices( struct svga_hwtnl *hwtnl, + unsigned nr, + unsigned index_size, + u_generate_func generate, + struct pipe_buffer **out_buf ) +{ + struct pipe_screen *screen = hwtnl->svga->pipe.screen; + unsigned size = index_size * nr; + struct pipe_buffer *dst = NULL; + void *dst_map = NULL; + + dst = screen->buffer_create( screen, 32, + PIPE_BUFFER_USAGE_INDEX | + PIPE_BUFFER_USAGE_CPU_WRITE | + PIPE_BUFFER_USAGE_GPU_READ, + size ); + if (dst == NULL) + goto fail; + + dst_map = pipe_buffer_map( screen, dst, PIPE_BUFFER_USAGE_CPU_WRITE ); + if (dst_map == NULL) + goto fail; + + generate( nr, + dst_map ); + + pipe_buffer_unmap( screen, dst ); + + *out_buf = dst; + return PIPE_OK; + +fail: + if (dst_map) + screen->buffer_unmap( screen, dst ); + + if (dst) + screen->buffer_destroy( dst ); + + return PIPE_ERROR_OUT_OF_MEMORY; +} + +static boolean compare( unsigned cached_nr, + unsigned nr, + unsigned type ) +{ + if (type == U_GENERATE_REUSABLE) + return cached_nr >= nr; + else + return cached_nr == nr; +} + +static enum pipe_error retrieve_or_generate_indices( struct svga_hwtnl *hwtnl, + unsigned prim, + unsigned gen_type, + unsigned gen_nr, + unsigned gen_size, + u_generate_func generate, + struct pipe_buffer **out_buf ) +{ + enum pipe_error ret = PIPE_OK; + int i; + + for (i = 0; i < IDX_CACHE_MAX; i++) { + if (hwtnl->index_cache[prim][i].buffer != NULL && + hwtnl->index_cache[prim][i].generate == generate) + { + if (compare(hwtnl->index_cache[prim][i].gen_nr, gen_nr, gen_type)) + { + pipe_buffer_reference( out_buf, + hwtnl->index_cache[prim][i].buffer ); + + if (DBG) + debug_printf("%s retrieve %d/%d\n", __FUNCTION__, i, gen_nr); + + return PIPE_OK; + } + else if (gen_type == U_GENERATE_REUSABLE) + { + pipe_buffer_reference( &hwtnl->index_cache[prim][i].buffer, + NULL ); + + if (DBG) + debug_printf("%s discard %d/%d\n", __FUNCTION__, + i, hwtnl->index_cache[prim][i].gen_nr); + + break; + } + } + } + + if (i == IDX_CACHE_MAX) + { + unsigned smallest = 0; + unsigned smallest_size = ~0; + + for (i = 0; i < IDX_CACHE_MAX && smallest_size; i++) { + if (hwtnl->index_cache[prim][i].buffer == NULL) + { + smallest = i; + smallest_size = 0; + } + else if (hwtnl->index_cache[prim][i].gen_nr < smallest) + { + smallest = i; + smallest_size = hwtnl->index_cache[prim][i].gen_nr; + } + } + + assert (smallest != IDX_CACHE_MAX); + + pipe_buffer_reference( &hwtnl->index_cache[prim][smallest].buffer, + NULL ); + + if (DBG) + debug_printf("%s discard smallest %d/%d\n", __FUNCTION__, + smallest, smallest_size); + + i = smallest; + } + + + ret = generate_indices( hwtnl, + gen_nr, + gen_size, + generate, + out_buf ); + if (ret != PIPE_OK) + return ret; + + + hwtnl->index_cache[prim][i].generate = generate; + hwtnl->index_cache[prim][i].gen_nr = gen_nr; + pipe_buffer_reference( &hwtnl->index_cache[prim][i].buffer, + *out_buf ); + + if (DBG) + debug_printf("%s cache %d/%d\n", __FUNCTION__, + i, hwtnl->index_cache[prim][i].gen_nr); + + return PIPE_OK; +} + + + +static enum pipe_error +simple_draw_arrays( struct svga_hwtnl *hwtnl, + unsigned prim, unsigned start, unsigned count ) +{ + SVGA3dPrimitiveRange range; + unsigned hw_prim; + unsigned hw_count; + + hw_prim = svga_translate_prim(prim, count, &hw_count); + if (hw_count == 0) + return PIPE_ERROR_BAD_INPUT; + + range.primType = hw_prim; + range.primitiveCount = hw_count; + range.indexArray.surfaceId = SVGA3D_INVALID_ID; + range.indexArray.offset = 0; + range.indexArray.stride = 0; + range.indexWidth = 0; + range.indexBias = start; + + /* Min/max index should be calculated prior to applying bias, so we + * end up with min_index = 0, max_index = count - 1 and everybody + * looking at those numbers knows to adjust them by + * range.indexBias. + */ + return svga_hwtnl_prim( hwtnl, &range, 0, count - 1, NULL ); +} + + + + + + + + + + +enum pipe_error +svga_hwtnl_draw_arrays( struct svga_hwtnl *hwtnl, + unsigned prim, + unsigned start, + unsigned count) +{ + unsigned gen_prim, gen_size, gen_nr, gen_type; + u_generate_func gen_func; + enum pipe_error ret = PIPE_OK; + + if (hwtnl->api_fillmode != PIPE_POLYGON_MODE_FILL && + prim >= PIPE_PRIM_TRIANGLES) + { + gen_type = u_unfilled_generator( prim, + start, + count, + hwtnl->api_fillmode, + &gen_prim, + &gen_size, + &gen_nr, + &gen_func ); + } + else { + gen_type = u_index_generator( svga_hw_prims, + prim, + start, + count, + hwtnl->api_pv, + hwtnl->hw_pv, + &gen_prim, + &gen_size, + &gen_nr, + &gen_func ); + } + + if (gen_type == U_GENERATE_LINEAR) { + return simple_draw_arrays( hwtnl, gen_prim, start, count ); + } + else { + struct pipe_buffer *gen_buf = NULL; + + /* Need to draw as indexed primitive. + * Potentially need to run the gen func to build an index buffer. + */ + ret = retrieve_or_generate_indices( hwtnl, + prim, + gen_type, + gen_nr, + gen_size, + gen_func, + &gen_buf ); + if (ret) + goto done; + + ret = svga_hwtnl_simple_draw_range_elements( hwtnl, + gen_buf, + gen_size, + 0, + count - 1, + gen_prim, + 0, + gen_nr, + start ); + if (ret) + goto done; + + done: + if (gen_buf) + pipe_buffer_reference( &gen_buf, NULL ); + + return ret; + } +} + diff --git a/src/gallium/drivers/svga/svga_draw_elements.c b/src/gallium/drivers/svga/svga_draw_elements.c new file mode 100644 index 0000000000..167d817831 --- /dev/null +++ b/src/gallium/drivers/svga/svga_draw_elements.c @@ -0,0 +1,255 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "pipe/p_inlines.h" +#include "util/u_prim.h" +#include "util/u_upload_mgr.h" +#include "indices/u_indices.h" + +#include "svga_cmd.h" +#include "svga_draw.h" +#include "svga_draw_private.h" +#include "svga_screen_buffer.h" +#include "svga_winsys.h" +#include "svga_context.h" + +#include "svga_hw_reg.h" + + +static enum pipe_error +translate_indices( struct svga_hwtnl *hwtnl, + struct pipe_buffer *src, + unsigned offset, + unsigned nr, + unsigned index_size, + u_translate_func translate, + struct pipe_buffer **out_buf ) +{ + struct pipe_screen *screen = hwtnl->svga->pipe.screen; + unsigned size = index_size * nr; + const void *src_map = NULL; + struct pipe_buffer *dst = NULL; + void *dst_map = NULL; + + dst = screen->buffer_create( screen, 32, + PIPE_BUFFER_USAGE_INDEX | + PIPE_BUFFER_USAGE_CPU_WRITE | + PIPE_BUFFER_USAGE_GPU_READ, + size ); + if (dst == NULL) + goto fail; + + src_map = pipe_buffer_map( screen, src, PIPE_BUFFER_USAGE_CPU_READ ); + if (src_map == NULL) + goto fail; + + dst_map = pipe_buffer_map( screen, dst, PIPE_BUFFER_USAGE_CPU_WRITE ); + if (dst_map == NULL) + goto fail; + + translate( (const char *)src_map + offset, + nr, + dst_map ); + + pipe_buffer_unmap( screen, src ); + pipe_buffer_unmap( screen, dst ); + + *out_buf = dst; + return PIPE_OK; + +fail: + if (src_map) + screen->buffer_unmap( screen, src ); + + if (dst_map) + screen->buffer_unmap( screen, dst ); + + if (dst) + screen->buffer_destroy( dst ); + + return PIPE_ERROR_OUT_OF_MEMORY; +} + + + + + +enum pipe_error +svga_hwtnl_simple_draw_range_elements( struct svga_hwtnl *hwtnl, + struct pipe_buffer *index_buffer, + unsigned index_size, + unsigned min_index, + unsigned max_index, + unsigned prim, + unsigned start, + unsigned count, + unsigned bias ) +{ + struct pipe_buffer *upload_buffer = NULL; + SVGA3dPrimitiveRange range; + unsigned hw_prim; + unsigned hw_count; + unsigned index_offset = start * index_size; + int ret = PIPE_OK; + + hw_prim = svga_translate_prim(prim, count, &hw_count); + if (hw_count == 0) + goto done; + + if (index_buffer && + svga_buffer_is_user_buffer(index_buffer)) + { + assert( index_buffer->size >= index_offset + count * index_size ); + + ret = u_upload_buffer( hwtnl->upload_ib, + index_offset, + count * index_size, + index_buffer, + &index_offset, + &upload_buffer ); + if (ret) + goto done; + + /* Don't need to worry about refcounting index_buffer as this is + * just a stack variable without a counted reference of its own. + * The caller holds the reference. + */ + index_buffer = upload_buffer; + } + + range.primType = hw_prim; + range.primitiveCount = hw_count; + range.indexArray.offset = index_offset; + range.indexArray.stride = index_size; + range.indexWidth = index_size; + range.indexBias = bias; + + ret = svga_hwtnl_prim( hwtnl, &range, min_index, max_index, index_buffer ); + if (ret) + goto done; + +done: + if (upload_buffer) + pipe_buffer_reference( &upload_buffer, NULL ); + + return ret; +} + + + + +enum pipe_error +svga_hwtnl_draw_range_elements( struct svga_hwtnl *hwtnl, + struct pipe_buffer *index_buffer, + unsigned index_size, + unsigned min_index, + unsigned max_index, + unsigned prim, unsigned start, unsigned count, + unsigned bias) +{ + unsigned gen_prim, gen_size, gen_nr, gen_type; + u_translate_func gen_func; + enum pipe_error ret = PIPE_OK; + + if (hwtnl->api_fillmode != PIPE_POLYGON_MODE_FILL && + prim >= PIPE_PRIM_TRIANGLES) + { + gen_type = u_unfilled_translator( prim, + index_size, + count, + hwtnl->api_fillmode, + &gen_prim, + &gen_size, + &gen_nr, + &gen_func ); + } + else + { + gen_type = u_index_translator( svga_hw_prims, + prim, + index_size, + count, + hwtnl->api_pv, + hwtnl->hw_pv, + &gen_prim, + &gen_size, + &gen_nr, + &gen_func ); + } + + + if (gen_type == U_TRANSLATE_MEMCPY) { + /* No need for translation, just pass through to hardware: + */ + return svga_hwtnl_simple_draw_range_elements( hwtnl, index_buffer, + index_size, + min_index, + max_index, + gen_prim, start, count, bias ); + } + else { + struct pipe_buffer *gen_buf = NULL; + + /* Need to allocate a new index buffer and run the translate + * func to populate it. Could potentially cache this translated + * index buffer with the original to avoid future + * re-translations. Not much point if we're just accelerating + * GL though, as index buffers are typically used only once + * there. + */ + ret = translate_indices( hwtnl, + index_buffer, + start * index_size, + gen_nr, + gen_size, + gen_func, + &gen_buf ); + if (ret) + goto done; + + ret = svga_hwtnl_simple_draw_range_elements( hwtnl, + gen_buf, + gen_size, + min_index, + max_index, + gen_prim, + 0, + gen_nr, + bias ); + if (ret) + goto done; + + done: + if (gen_buf) + pipe_buffer_reference( &gen_buf, NULL ); + + return ret; + } +} + + + + + diff --git a/src/gallium/drivers/svga/svga_draw_private.h b/src/gallium/drivers/svga/svga_draw_private.h new file mode 100644 index 0000000000..9aa40e1664 --- /dev/null +++ b/src/gallium/drivers/svga/svga_draw_private.h @@ -0,0 +1,158 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#ifndef SVGA_DRAW_H_ +#define SVGA_DRAW_H_ + +#include "pipe/p_compiler.h" +#include "pipe/p_defines.h" +#include "indices/u_indices.h" +#include "svga_hw_reg.h" +#include "svga3d_shaderdefs.h" + +struct svga_context; +struct u_upload_mgr; + +/* Should include polygon? + */ +static const unsigned svga_hw_prims = + ((1 << PIPE_PRIM_POINTS) | + (1 << PIPE_PRIM_LINES) | + (1 << PIPE_PRIM_LINE_STRIP) | + (1 << PIPE_PRIM_TRIANGLES) | + (1 << PIPE_PRIM_TRIANGLE_STRIP) | + (1 << PIPE_PRIM_TRIANGLE_FAN)); + + +static INLINE unsigned svga_translate_prim(unsigned mode, + unsigned count, + unsigned *out_count) +{ + switch (mode) { + case PIPE_PRIM_POINTS: + *out_count = count; + return SVGA3D_PRIMITIVE_POINTLIST; + + case PIPE_PRIM_LINES: + *out_count = count / 2; + return SVGA3D_PRIMITIVE_LINELIST; + + case PIPE_PRIM_LINE_STRIP: + *out_count = count - 1; + return SVGA3D_PRIMITIVE_LINESTRIP; + + case PIPE_PRIM_TRIANGLES: + *out_count = count / 3; + return SVGA3D_PRIMITIVE_TRIANGLELIST; + + case PIPE_PRIM_TRIANGLE_STRIP: + *out_count = count - 2; + return SVGA3D_PRIMITIVE_TRIANGLESTRIP; + + case PIPE_PRIM_TRIANGLE_FAN: + *out_count = count - 2; + return SVGA3D_PRIMITIVE_TRIANGLEFAN; + + default: + assert(0); + *out_count = 0; + return 0; + } +} + + +struct index_cache { + u_generate_func generate; + unsigned gen_nr; + + /* If non-null, this buffer is filled by calling + * generate(nr, map(buffer)) + */ + struct pipe_buffer *buffer; +}; + +#define QSZ 32 + +struct draw_cmd { + struct svga_winsys_context *swc; + + SVGA3dVertexDecl vdecl[SVGA3D_INPUTREG_MAX]; + struct pipe_buffer *vdecl_vb[SVGA3D_INPUTREG_MAX]; + unsigned vdecl_count; + + SVGA3dPrimitiveRange prim[QSZ]; + struct pipe_buffer *prim_ib[QSZ]; + unsigned prim_count; + unsigned min_index[QSZ]; + unsigned max_index[QSZ]; +}; + +#define IDX_CACHE_MAX 8 + +struct svga_hwtnl { + struct svga_context *svga; + struct u_upload_mgr *upload_ib; + + /* Flatshade information: + */ + unsigned api_pv; + unsigned hw_pv; + unsigned api_fillmode; + + /* Cache the results of running a particular generate func on each + * primitive type. + */ + struct index_cache index_cache[PIPE_PRIM_MAX][IDX_CACHE_MAX]; + + /* Try to build the maximal draw command packet before emitting: + */ + struct draw_cmd cmd; +}; + + + +/*********************************************************************** + * Internal functions + */ +enum pipe_error +svga_hwtnl_prim( struct svga_hwtnl *hwtnl, + const SVGA3dPrimitiveRange *range, + unsigned min_index, + unsigned max_index, + struct pipe_buffer *ib ); + +enum pipe_error +svga_hwtnl_simple_draw_range_elements( struct svga_hwtnl *hwtnl, + struct pipe_buffer *indexBuffer, + unsigned index_size, + unsigned min_index, + unsigned max_index, + unsigned prim, + unsigned start, + unsigned count, + unsigned bias ); + + +#endif diff --git a/src/gallium/drivers/svga/svga_hw_reg.h b/src/gallium/drivers/svga/svga_hw_reg.h new file mode 100644 index 0000000000..183f4b918e --- /dev/null +++ b/src/gallium/drivers/svga/svga_hw_reg.h @@ -0,0 +1,42 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#ifndef SVGA_HW_REG_H +#define SVGA_HW_REG_H + +#include "pipe/p_compiler.h" + +#if defined(PIPE_CC_GCC) +#ifndef HAVE_STDINT_H +#define HAVE_STDINT_H +#endif +#endif + +#include "svga_types.h" + +#include "svga3d_reg.h" + + +#endif diff --git a/src/gallium/drivers/svga/svga_pipe_blend.c b/src/gallium/drivers/svga/svga_pipe_blend.c new file mode 100644 index 0000000000..855d228755 --- /dev/null +++ b/src/gallium/drivers/svga/svga_pipe_blend.c @@ -0,0 +1,246 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "pipe/p_inlines.h" +#include "pipe/p_defines.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "svga_context.h" +#include "svga_state.h" + +#include "svga_hw_reg.h" + + +static INLINE unsigned +svga_translate_blend_factor(unsigned factor) +{ + switch (factor) { + case PIPE_BLENDFACTOR_ZERO: return SVGA3D_BLENDOP_ZERO; + case PIPE_BLENDFACTOR_SRC_ALPHA: return SVGA3D_BLENDOP_SRCALPHA; + case PIPE_BLENDFACTOR_ONE: return SVGA3D_BLENDOP_ONE; + case PIPE_BLENDFACTOR_SRC_COLOR: return SVGA3D_BLENDOP_SRCCOLOR; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: return SVGA3D_BLENDOP_INVSRCCOLOR; + case PIPE_BLENDFACTOR_DST_COLOR: return SVGA3D_BLENDOP_DESTCOLOR; + case PIPE_BLENDFACTOR_INV_DST_COLOR: return SVGA3D_BLENDOP_INVDESTCOLOR; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: return SVGA3D_BLENDOP_INVSRCALPHA; + case PIPE_BLENDFACTOR_DST_ALPHA: return SVGA3D_BLENDOP_DESTALPHA; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: return SVGA3D_BLENDOP_INVDESTALPHA; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: return SVGA3D_BLENDOP_SRCALPHASAT; + case PIPE_BLENDFACTOR_CONST_COLOR: return SVGA3D_BLENDOP_BLENDFACTOR; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: return SVGA3D_BLENDOP_INVBLENDFACTOR; + case PIPE_BLENDFACTOR_CONST_ALPHA: return SVGA3D_BLENDOP_BLENDFACTOR; /* ? */ + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: return SVGA3D_BLENDOP_INVBLENDFACTOR; /* ? */ + default: + assert(0); + return SVGA3D_BLENDOP_ZERO; + } +} + +static INLINE unsigned +svga_translate_blend_func(unsigned mode) +{ + switch (mode) { + case PIPE_BLEND_ADD: return SVGA3D_BLENDEQ_ADD; + case PIPE_BLEND_SUBTRACT: return SVGA3D_BLENDEQ_SUBTRACT; + case PIPE_BLEND_REVERSE_SUBTRACT: return SVGA3D_BLENDEQ_REVSUBTRACT; + case PIPE_BLEND_MIN: return SVGA3D_BLENDEQ_MINIMUM; + case PIPE_BLEND_MAX: return SVGA3D_BLENDEQ_MAXIMUM; + default: + assert(0); + return SVGA3D_BLENDEQ_ADD; + } +} + + +static void * +svga_create_blend_state(struct pipe_context *pipe, + const struct pipe_blend_state *templ) +{ + struct svga_blend_state *blend = CALLOC_STRUCT( svga_blend_state ); + unsigned i; + + + /* Fill in the per-rendertarget blend state. We currently only + * have one rendertarget. + */ + for (i = 0; i < 1; i++) { + /* No way to set this in SVGA3D, and no way to correctly implement it on + * top of D3D9 API. Instead we try to simulate with various blend modes. + */ + if (templ->logicop_enable) { + switch (templ->logicop_func) { + case PIPE_LOGICOP_XOR: + blend->need_white_fragments = TRUE; + blend->rt[i].blend_enable = TRUE; + blend->rt[i].srcblend = SVGA3D_BLENDOP_ONE; + blend->rt[i].dstblend = SVGA3D_BLENDOP_ONE; + blend->rt[i].blendeq = SVGA3D_BLENDEQ_SUBTRACT; + break; + case PIPE_LOGICOP_CLEAR: + blend->rt[i].blend_enable = TRUE; + blend->rt[i].srcblend = SVGA3D_BLENDOP_ZERO; + blend->rt[i].dstblend = SVGA3D_BLENDOP_ZERO; + blend->rt[i].blendeq = SVGA3D_BLENDEQ_MINIMUM; + break; + case PIPE_LOGICOP_COPY: + blend->rt[i].blend_enable = FALSE; + break; + case PIPE_LOGICOP_COPY_INVERTED: + blend->rt[i].blend_enable = TRUE; + blend->rt[i].srcblend = SVGA3D_BLENDOP_INVSRCCOLOR; + blend->rt[i].dstblend = SVGA3D_BLENDOP_ZERO; + blend->rt[i].blendeq = SVGA3D_BLENDEQ_ADD; + break; + case PIPE_LOGICOP_NOOP: + blend->rt[i].blend_enable = TRUE; + blend->rt[i].srcblend = SVGA3D_BLENDOP_ZERO; + blend->rt[i].dstblend = SVGA3D_BLENDOP_DESTCOLOR; + blend->rt[i].blendeq = SVGA3D_BLENDEQ_ADD; + break; + case PIPE_LOGICOP_SET: + blend->rt[i].blend_enable = TRUE; + blend->rt[i].srcblend = SVGA3D_BLENDOP_ONE; + blend->rt[i].dstblend = SVGA3D_BLENDOP_ONE; + blend->rt[i].blendeq = SVGA3D_BLENDEQ_MAXIMUM; + break; + case PIPE_LOGICOP_INVERT: + blend->rt[i].blend_enable = TRUE; + blend->rt[i].srcblend = SVGA3D_BLENDOP_INVSRCCOLOR; + blend->rt[i].dstblend = SVGA3D_BLENDOP_ZERO; + blend->rt[i].blendeq = SVGA3D_BLENDEQ_ADD; + break; + case PIPE_LOGICOP_AND: + /* Approximate with minimum - works for the 0 & anything case: */ + blend->rt[i].blend_enable = TRUE; + blend->rt[i].srcblend = SVGA3D_BLENDOP_SRCCOLOR; + blend->rt[i].dstblend = SVGA3D_BLENDOP_DESTCOLOR; + blend->rt[i].blendeq = SVGA3D_BLENDEQ_MINIMUM; + break; + case PIPE_LOGICOP_AND_REVERSE: + blend->rt[i].blend_enable = TRUE; + blend->rt[i].srcblend = SVGA3D_BLENDOP_SRCCOLOR; + blend->rt[i].dstblend = SVGA3D_BLENDOP_INVDESTCOLOR; + blend->rt[i].blendeq = SVGA3D_BLENDEQ_MINIMUM; + break; + case PIPE_LOGICOP_AND_INVERTED: + blend->rt[i].blend_enable = TRUE; + blend->rt[i].srcblend = SVGA3D_BLENDOP_INVSRCCOLOR; + blend->rt[i].dstblend = SVGA3D_BLENDOP_DESTCOLOR; + blend->rt[i].blendeq = SVGA3D_BLENDEQ_MINIMUM; + break; + case PIPE_LOGICOP_OR: + /* Approximate with maximum - works for the 1 | anything case: */ + blend->rt[i].blend_enable = TRUE; + blend->rt[i].srcblend = SVGA3D_BLENDOP_SRCCOLOR; + blend->rt[i].dstblend = SVGA3D_BLENDOP_DESTCOLOR; + blend->rt[i].blendeq = SVGA3D_BLENDEQ_MAXIMUM; + break; + case PIPE_LOGICOP_OR_REVERSE: + blend->rt[i].blend_enable = TRUE; + blend->rt[i].srcblend = SVGA3D_BLENDOP_SRCCOLOR; + blend->rt[i].dstblend = SVGA3D_BLENDOP_INVDESTCOLOR; + blend->rt[i].blendeq = SVGA3D_BLENDEQ_MAXIMUM; + break; + case PIPE_LOGICOP_OR_INVERTED: + blend->rt[i].blend_enable = TRUE; + blend->rt[i].srcblend = SVGA3D_BLENDOP_INVSRCCOLOR; + blend->rt[i].dstblend = SVGA3D_BLENDOP_DESTCOLOR; + blend->rt[i].blendeq = SVGA3D_BLENDEQ_MAXIMUM; + break; + case PIPE_LOGICOP_NAND: + case PIPE_LOGICOP_NOR: + case PIPE_LOGICOP_EQUIV: + /* Fill these in with plausible values */ + blend->rt[i].blend_enable = FALSE; + break; + default: + assert(0); + break; + } + } + else { + blend->rt[i].blend_enable = templ->blend_enable; + + if (templ->blend_enable) { + blend->rt[i].srcblend = svga_translate_blend_factor(templ->rgb_src_factor); + blend->rt[i].dstblend = svga_translate_blend_factor(templ->rgb_dst_factor); + blend->rt[i].blendeq = svga_translate_blend_func(templ->rgb_func); + blend->rt[i].srcblend_alpha = svga_translate_blend_factor(templ->alpha_src_factor); + blend->rt[i].dstblend_alpha = svga_translate_blend_factor(templ->alpha_dst_factor); + blend->rt[i].blendeq_alpha = svga_translate_blend_func(templ->alpha_func); + + if (blend->rt[i].srcblend_alpha != blend->rt[i].srcblend || + blend->rt[i].dstblend_alpha != blend->rt[i].dstblend || + blend->rt[i].blendeq_alpha != blend->rt[i].blendeq) + { + blend->rt[i].separate_alpha_blend_enable = TRUE; + } + } + } + + blend->rt[i].writemask = templ->colormask; + } + + return blend; +} + +static void svga_bind_blend_state(struct pipe_context *pipe, + void *blend) +{ + struct svga_context *svga = svga_context(pipe); + + svga->curr.blend = (struct svga_blend_state*)blend; + svga->dirty |= SVGA_NEW_BLEND; +} + + +static void svga_delete_blend_state(struct pipe_context *pipe, void *blend) +{ + FREE(blend); +} + +static void svga_set_blend_color( struct pipe_context *pipe, + const struct pipe_blend_color *blend_color ) +{ + struct svga_context *svga = svga_context(pipe); + + svga->curr.blend_color = *blend_color; + + svga->dirty |= SVGA_NEW_BLEND; +} + + +void svga_init_blend_functions( struct svga_context *svga ) +{ + svga->pipe.create_blend_state = svga_create_blend_state; + svga->pipe.bind_blend_state = svga_bind_blend_state; + svga->pipe.delete_blend_state = svga_delete_blend_state; + + svga->pipe.set_blend_color = svga_set_blend_color; +} + + + diff --git a/src/gallium/drivers/svga/svga_pipe_blit.c b/src/gallium/drivers/svga/svga_pipe_blit.c new file mode 100644 index 0000000000..4f575b06e6 --- /dev/null +++ b/src/gallium/drivers/svga/svga_pipe_blit.c @@ -0,0 +1,92 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "svga_screen_texture.h" +#include "svga_context.h" +#include "svga_debug.h" +#include "svga_cmd.h" + +#define FILE_DEBUG_FLAG DEBUG_BLIT + + +static void svga_surface_copy(struct pipe_context *pipe, + struct pipe_surface *dest, + unsigned destx, unsigned desty, + struct pipe_surface *src, + unsigned srcx, unsigned srcy, + unsigned width, unsigned height) +{ + struct svga_context *svga = svga_context(pipe); + SVGA3dCopyBox *box; + enum pipe_error ret; + + svga_hwtnl_flush_retry( svga ); + + SVGA_DBG(DEBUG_DMA, "blit to sid %p (%d,%d), from sid %p (%d,%d) sz %dx%d\n", + svga_surface(dest)->handle, + destx, desty, + svga_surface(src)->handle, + srcx, srcy, + width, height); + + ret = SVGA3D_BeginSurfaceCopy(svga->swc, + src, + dest, + &box, + 1); + if(ret != PIPE_OK) { + + svga_context_flush(svga, NULL); + + ret = SVGA3D_BeginSurfaceCopy(svga->swc, + src, + dest, + &box, + 1); + assert(ret == PIPE_OK); + } + + box->x = destx; + box->y = desty; + box->z = 0; + box->w = width; + box->h = height; + box->d = 1; + box->srcx = srcx; + box->srcy = srcy; + box->srcz = 0; + + SVGA_FIFOCommitAll(svga->swc); + + svga_surface(dest)->dirty = TRUE; + svga_propagate_surface(pipe, dest); +} + + +void +svga_init_blit_functions(struct svga_context *svga) +{ + svga->pipe.surface_copy = svga_surface_copy; +} diff --git a/src/gallium/drivers/svga/svga_pipe_clear.c b/src/gallium/drivers/svga/svga_pipe_clear.c new file mode 100644 index 0000000000..6195c3897e --- /dev/null +++ b/src/gallium/drivers/svga/svga_pipe_clear.c @@ -0,0 +1,125 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "svga_cmd.h" +#include "svga_debug.h" + +#include "pipe/p_defines.h" +#include "util/u_pack_color.h" + +#include "svga_context.h" +#include "svga_state.h" +#include "svga_screen_texture.h" + + +static enum pipe_error +try_clear(struct svga_context *svga, + unsigned buffers, + const float *rgba, + double depth, + unsigned stencil) +{ + int ret = PIPE_OK; + SVGA3dRect rect = { 0, 0, 0, 0 }; + boolean restore_viewport = FALSE; + SVGA3dClearFlag flags = 0; + struct pipe_framebuffer_state *fb = &svga->curr.framebuffer; + unsigned color = 0; + + ret = svga_update_state(svga, SVGA_STATE_HW_CLEAR); + if (ret) + return ret; + + if ((buffers & PIPE_CLEAR_COLOR) && fb->cbufs[0]) { + flags |= SVGA3D_CLEAR_COLOR; + util_pack_color(rgba, PIPE_FORMAT_A8R8G8B8_UNORM, &color); + + rect.w = fb->cbufs[0]->width; + rect.h = fb->cbufs[0]->height; + } + + if ((buffers & PIPE_CLEAR_DEPTHSTENCIL) && fb->zsbuf) { + flags |= SVGA3D_CLEAR_DEPTH; + + if (svga->curr.framebuffer.zsbuf->format == PIPE_FORMAT_Z24S8_UNORM) + flags |= SVGA3D_CLEAR_STENCIL; + + rect.w = MAX2(rect.w, fb->zsbuf->width); + rect.h = MAX2(rect.h, fb->zsbuf->height); + } + + if (memcmp(&rect, &svga->state.hw_clear.viewport, sizeof(rect)) != 0) { + restore_viewport = TRUE; + ret = SVGA3D_SetViewport(svga->swc, &rect); + if (ret) + return ret; + } + + ret = SVGA3D_ClearRect(svga->swc, flags, color, depth, stencil, + rect.x, rect.y, rect.w, rect.h); + if (ret != PIPE_OK) + return ret; + + if (restore_viewport) { + memcpy(&rect, &svga->state.hw_clear.viewport, sizeof rect); + ret = SVGA3D_SetViewport(svga->swc, &rect); + } + + return ret; +} + +/** + * Clear the given surface to the specified value. + * No masking, no scissor (clear entire buffer). + */ +void +svga_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, + double depth, unsigned stencil) +{ + struct svga_context *svga = svga_context( pipe ); + int ret; + + if (buffers & PIPE_CLEAR_COLOR) + SVGA_DBG(DEBUG_DMA, "clear sid %p\n", + svga_surface(svga->curr.framebuffer.cbufs[0])->handle); + + ret = try_clear( svga, buffers, rgba, depth, stencil ); + + if (ret == PIPE_ERROR_OUT_OF_MEMORY) { + /* Flush command buffer and retry: + */ + svga_context_flush( svga, NULL ); + + ret = try_clear( svga, buffers, rgba, depth, stencil ); + } + + /* + * Mark target surfaces as dirty + * TODO Mark only cleared surfaces. + */ + svga_mark_surfaces_dirty(svga); + + assert (ret == PIPE_OK); +} diff --git a/src/gallium/drivers/svga/svga_pipe_constants.c b/src/gallium/drivers/svga/svga_pipe_constants.c new file mode 100644 index 0000000000..10e7a12189 --- /dev/null +++ b/src/gallium/drivers/svga/svga_pipe_constants.c @@ -0,0 +1,74 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "pipe/p_inlines.h" +#include "pipe/p_defines.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "tgsi/tgsi_parse.h" + +#include "svga_context.h" +#include "svga_state.h" +#include "svga_hw_reg.h" +#include "svga_cmd.h" + +/*********************************************************************** + * Constant buffers + */ + +struct svga_constbuf +{ + unsigned type; + float (*data)[4]; + unsigned count; +}; + + + +static void svga_set_constant_buffer(struct pipe_context *pipe, + uint shader, uint index, + const struct pipe_constant_buffer *buf) +{ + struct svga_context *svga = svga_context(pipe); + + assert(shader < PIPE_SHADER_TYPES); + assert(index == 0); + + pipe_buffer_reference( &svga->curr.cb[shader], + buf->buffer ); + + if (shader == PIPE_SHADER_FRAGMENT) + svga->dirty |= SVGA_NEW_FS_CONST_BUFFER; + else + svga->dirty |= SVGA_NEW_VS_CONST_BUFFER; +} + + + +void svga_init_constbuffer_functions( struct svga_context *svga ) +{ + svga->pipe.set_constant_buffer = svga_set_constant_buffer; +} + diff --git a/src/gallium/drivers/svga/svga_pipe_depthstencil.c b/src/gallium/drivers/svga/svga_pipe_depthstencil.c new file mode 100644 index 0000000000..df636c08a0 --- /dev/null +++ b/src/gallium/drivers/svga/svga_pipe_depthstencil.c @@ -0,0 +1,153 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "pipe/p_inlines.h" +#include "pipe/p_defines.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "svga_context.h" +#include "svga_state.h" +#include "svga_hw_reg.h" + + +static INLINE unsigned +svga_translate_compare_func(unsigned func) +{ + switch (func) { + case PIPE_FUNC_NEVER: return SVGA3D_CMP_NEVER; + case PIPE_FUNC_LESS: return SVGA3D_CMP_LESS; + case PIPE_FUNC_LEQUAL: return SVGA3D_CMP_LESSEQUAL; + case PIPE_FUNC_GREATER: return SVGA3D_CMP_GREATER; + case PIPE_FUNC_GEQUAL: return SVGA3D_CMP_GREATEREQUAL; + case PIPE_FUNC_NOTEQUAL: return SVGA3D_CMP_NOTEQUAL; + case PIPE_FUNC_EQUAL: return SVGA3D_CMP_EQUAL; + case PIPE_FUNC_ALWAYS: return SVGA3D_CMP_ALWAYS; + default: + assert(0); + return SVGA3D_CMP_ALWAYS; + } +} + +static INLINE unsigned +svga_translate_stencil_op(unsigned op) +{ + switch (op) { + case PIPE_STENCIL_OP_KEEP: return SVGA3D_STENCILOP_KEEP; + case PIPE_STENCIL_OP_ZERO: return SVGA3D_STENCILOP_ZERO; + case PIPE_STENCIL_OP_REPLACE: return SVGA3D_STENCILOP_REPLACE; + case PIPE_STENCIL_OP_INCR: return SVGA3D_STENCILOP_INCR; + case PIPE_STENCIL_OP_DECR: return SVGA3D_STENCILOP_DECR; + case PIPE_STENCIL_OP_INCR_WRAP: return SVGA3D_STENCILOP_INCRSAT; /* incorrect? */ + case PIPE_STENCIL_OP_DECR_WRAP: return SVGA3D_STENCILOP_DECRSAT; /* incorrect? */ + case PIPE_STENCIL_OP_INVERT: return SVGA3D_STENCILOP_INVERT; + default: + assert(0); + return SVGA3D_STENCILOP_KEEP; + } +} + + +static void * +svga_create_depth_stencil_state(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *templ) +{ + struct svga_depth_stencil_state *ds = CALLOC_STRUCT( svga_depth_stencil_state ); + + /* Don't try to figure out CW/CCW correspondence with + * stencil[0]/[1] at this point. Presumably this can change as + * back/front face are modified. + */ + ds->stencil[0].enabled = templ->stencil[0].enabled; + if (ds->stencil[0].enabled) { + ds->stencil[0].func = svga_translate_compare_func(templ->stencil[0].func); + ds->stencil[0].fail = svga_translate_stencil_op(templ->stencil[0].fail_op); + ds->stencil[0].zfail = svga_translate_stencil_op(templ->stencil[0].zfail_op); + ds->stencil[0].pass = svga_translate_stencil_op(templ->stencil[0].zpass_op); + + /* SVGA3D has one ref/mask/writemask triple shared between front & + * back face stencil. We really need two: + */ + ds->stencil_ref = templ->stencil[0].ref_value & 0xff; + ds->stencil_mask = templ->stencil[0].valuemask & 0xff; + ds->stencil_writemask = templ->stencil[0].writemask & 0xff; + } + + + ds->stencil[1].enabled = templ->stencil[1].enabled; + if (templ->stencil[1].enabled) { + ds->stencil[1].func = svga_translate_compare_func(templ->stencil[1].func); + ds->stencil[1].fail = svga_translate_stencil_op(templ->stencil[1].fail_op); + ds->stencil[1].zfail = svga_translate_stencil_op(templ->stencil[1].zfail_op); + ds->stencil[1].pass = svga_translate_stencil_op(templ->stencil[1].zpass_op); + + ds->stencil_ref = templ->stencil[1].ref_value & 0xff; + ds->stencil_mask = templ->stencil[1].valuemask & 0xff; + ds->stencil_writemask = templ->stencil[1].writemask & 0xff; + } + + + ds->zenable = templ->depth.enabled; + if (ds->zenable) { + ds->zfunc = svga_translate_compare_func(templ->depth.func); + ds->zwriteenable = templ->depth.writemask; + } + + ds->alphatestenable = templ->alpha.enabled; + if (ds->alphatestenable) { + ds->alphafunc = svga_translate_compare_func(templ->alpha.func); + ds->alpharef = templ->alpha.ref_value; + } + + return ds; +} + +static void svga_bind_depth_stencil_state(struct pipe_context *pipe, + void *depth_stencil) +{ + struct svga_context *svga = svga_context(pipe); + + svga->curr.depth = (const struct svga_depth_stencil_state *)depth_stencil; + svga->dirty |= SVGA_NEW_DEPTH_STENCIL; +} + +static void svga_delete_depth_stencil_state(struct pipe_context *pipe, + void *depth_stencil) +{ + FREE(depth_stencil); +} + + + +void svga_init_depth_stencil_functions( struct svga_context *svga ) +{ + svga->pipe.create_depth_stencil_alpha_state = svga_create_depth_stencil_state; + svga->pipe.bind_depth_stencil_alpha_state = svga_bind_depth_stencil_state; + svga->pipe.delete_depth_stencil_alpha_state = svga_delete_depth_stencil_state; +} + + + + diff --git a/src/gallium/drivers/svga/svga_pipe_draw.c b/src/gallium/drivers/svga/svga_pipe_draw.c new file mode 100644 index 0000000000..71a552862e --- /dev/null +++ b/src/gallium/drivers/svga/svga_pipe_draw.c @@ -0,0 +1,261 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "svga_cmd.h" + +#include "pipe/p_inlines.h" +#include "util/u_prim.h" +#include "util/u_time.h" +#include "indices/u_indices.h" + +#include "svga_hw_reg.h" +#include "svga_context.h" +#include "svga_screen.h" +#include "svga_winsys.h" +#include "svga_draw.h" +#include "svga_state.h" +#include "svga_swtnl.h" +#include "svga_debug.h" + + + +static enum pipe_error +retry_draw_range_elements( struct svga_context *svga, + struct pipe_buffer *index_buffer, + unsigned index_size, + unsigned min_index, + unsigned max_index, + unsigned prim, + unsigned start, + unsigned count, + boolean do_retry ) +{ + enum pipe_error ret = 0; + + svga_hwtnl_set_unfilled( svga->hwtnl, + svga->curr.rast->hw_unfilled ); + + svga_hwtnl_set_flatshade( svga->hwtnl, + svga->curr.rast->templ.flatshade, + svga->curr.rast->templ.flatshade_first ); + + + ret = svga_update_state( svga, SVGA_STATE_HW_DRAW ); + if (ret) + goto retry; + + ret = svga_hwtnl_draw_range_elements( svga->hwtnl, + index_buffer, index_size, + min_index, max_index, + prim, start, count, 0 ); + if (ret) + goto retry; + + if (svga->curr.any_user_vertex_buffers) { + ret = svga_hwtnl_flush( svga->hwtnl ); + if (ret) + goto retry; + } + + return PIPE_OK; + +retry: + svga_context_flush( svga, NULL ); + + if (do_retry) + { + return retry_draw_range_elements( svga, + index_buffer, index_size, + min_index, max_index, + prim, start, count, + FALSE ); + } + + return ret; +} + + +static enum pipe_error +retry_draw_arrays( struct svga_context *svga, + unsigned prim, + unsigned start, + unsigned count, + boolean do_retry ) +{ + enum pipe_error ret; + + svga_hwtnl_set_unfilled( svga->hwtnl, + svga->curr.rast->hw_unfilled ); + + svga_hwtnl_set_flatshade( svga->hwtnl, + svga->curr.rast->templ.flatshade, + svga->curr.rast->templ.flatshade_first ); + + ret = svga_update_state( svga, SVGA_STATE_HW_DRAW ); + if (ret) + goto retry; + + ret = svga_hwtnl_draw_arrays( svga->hwtnl, prim, + start, count ); + if (ret) + goto retry; + + if (svga->curr.any_user_vertex_buffers) { + ret = svga_hwtnl_flush( svga->hwtnl ); + if (ret) + goto retry; + } + + return 0; + +retry: + if (ret == PIPE_ERROR_OUT_OF_MEMORY && do_retry) + { + svga_context_flush( svga, NULL ); + + return retry_draw_arrays( svga, + prim, + start, + count, + FALSE ); + } + + return ret; +} + + + + + +static boolean +svga_draw_range_elements( struct pipe_context *pipe, + struct pipe_buffer *index_buffer, + unsigned index_size, + unsigned min_index, + unsigned max_index, + unsigned prim, unsigned start, unsigned count) +{ + struct svga_context *svga = svga_context( pipe ); + unsigned reduced_prim = u_reduced_prim(prim); + enum pipe_error ret = 0; + + if (!u_trim_pipe_prim( prim, &count )) + return TRUE; + + /* + * Mark currently bound target surfaces as dirty + * doesn't really matter if it is done before drawing. + * + * TODO If we ever normaly return something other then + * true we should not mark it as dirty then. + */ + svga_mark_surfaces_dirty(svga_context(pipe)); + + if (svga->curr.reduced_prim != reduced_prim) { + svga->curr.reduced_prim = reduced_prim; + svga->dirty |= SVGA_NEW_REDUCED_PRIMITIVE; + } + + svga_update_state_retry( svga, SVGA_STATE_NEED_SWTNL ); + +#ifdef DEBUG + if (svga->curr.vs->base.id == svga->debug.disable_shader || + svga->curr.fs->base.id == svga->debug.disable_shader) + return 0; +#endif + + if (svga->state.sw.need_swtnl) + { + ret = svga_swtnl_draw_range_elements( svga, + index_buffer, + index_size, + min_index, max_index, + prim, + start, count ); + } + else { + if (index_buffer) { + ret = retry_draw_range_elements( svga, + index_buffer, + index_size, + min_index, + max_index, + prim, + start, + count, + TRUE ); + } + else { + ret = retry_draw_arrays( svga, + prim, + start, + count, + TRUE ); + } + } + + if (SVGA_DEBUG & DEBUG_FLUSH) { + static unsigned id; + debug_printf("%s %d\n", __FUNCTION__, id++); + if (id > 1300) + util_time_sleep( 2000 ); + + svga_hwtnl_flush_retry( svga ); + svga_context_flush(svga, NULL); + } + + return ret == PIPE_OK; +} + + +static boolean +svga_draw_elements( struct pipe_context *pipe, + struct pipe_buffer *index_buffer, + unsigned index_size, + unsigned prim, unsigned start, unsigned count) +{ + return svga_draw_range_elements( pipe, index_buffer, + index_size, + 0, 0xffffffff, + prim, start, count ); +} + +static boolean +svga_draw_arrays( struct pipe_context *pipe, + unsigned prim, unsigned start, unsigned count) +{ + return svga_draw_range_elements(pipe, NULL, 0, + start, start + count - 1, + prim, + start, count); +} + + +void svga_init_draw_functions( struct svga_context *svga ) +{ + svga->pipe.draw_arrays = svga_draw_arrays; + svga->pipe.draw_elements = svga_draw_elements; + svga->pipe.draw_range_elements = svga_draw_range_elements; +} diff --git a/src/gallium/drivers/svga/svga_pipe_flush.c b/src/gallium/drivers/svga/svga_pipe_flush.c new file mode 100644 index 0000000000..0becb0765a --- /dev/null +++ b/src/gallium/drivers/svga/svga_pipe_flush.c @@ -0,0 +1,71 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "pipe/p_defines.h" +#include "svga_screen.h" +#include "svga_screen_texture.h" +#include "svga_context.h" +#include "svga_winsys.h" +#include "svga_draw.h" +#include "svga_debug.h" + +#include "svga_hw_reg.h" + + + + +static void svga_flush( struct pipe_context *pipe, + unsigned flags, + struct pipe_fence_handle **fence ) +{ + struct svga_context *svga = svga_context(pipe); + int i; + + /* Emit buffered drawing commands. + */ + svga_hwtnl_flush_retry( svga ); + + /* Emit back-copy from render target view to texture. + */ + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { + if (svga->curr.framebuffer.cbufs[i]) + svga_propagate_surface(pipe, svga->curr.framebuffer.cbufs[i]); + } + if (svga->curr.framebuffer.zsbuf) + svga_propagate_surface(pipe, svga->curr.framebuffer.zsbuf); + + /* Flush command queue. + */ + svga_context_flush(svga, fence); + + SVGA_DBG(DEBUG_DMA|DEBUG_PERF, "%s flags %x fence_ptr %p\n", + __FUNCTION__, flags, fence ? *fence : 0x0); +} + + +void svga_init_flush_functions( struct svga_context *svga ) +{ + svga->pipe.flush = svga_flush; +} diff --git a/src/gallium/drivers/svga/svga_pipe_fs.c b/src/gallium/drivers/svga/svga_pipe_fs.c new file mode 100644 index 0000000000..e3be840d92 --- /dev/null +++ b/src/gallium/drivers/svga/svga_pipe_fs.c @@ -0,0 +1,124 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "pipe/p_inlines.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_text.h" + +#include "svga_screen.h" +#include "svga_context.h" +#include "svga_state.h" +#include "svga_tgsi.h" +#include "svga_hw_reg.h" +#include "svga_cmd.h" +#include "svga_draw.h" +#include "svga_debug.h" + + +/*********************************************************************** + * Fragment shaders + */ + +static void * +svga_create_fs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct svga_context *svga = svga_context(pipe); + struct svga_screen *svgascreen = svga_screen(pipe->screen); + struct svga_fragment_shader *fs; + + fs = CALLOC_STRUCT(svga_fragment_shader); + if (!fs) + return NULL; + + fs->base.tokens = tgsi_dup_tokens(templ->tokens); + + /* Collect basic info that we'll need later: + */ + tgsi_scan_shader(fs->base.tokens, &fs->base.info); + + fs->base.id = svga->debug.shader_id++; + fs->base.use_sm30 = svgascreen->use_ps30; + + if (SVGA_DEBUG & DEBUG_TGSI || 0) { + debug_printf("%s id: %u, inputs: %u, outputs: %u\n", + __FUNCTION__, fs->base.id, + fs->base.info.num_inputs, fs->base.info.num_outputs); + } + + return fs; +} + +static void +svga_bind_fs_state(struct pipe_context *pipe, void *shader) +{ + struct svga_fragment_shader *fs = (struct svga_fragment_shader *) shader; + struct svga_context *svga = svga_context(pipe); + + svga->curr.fs = fs; + svga->dirty |= SVGA_NEW_FS; +} + +static +void svga_delete_fs_state(struct pipe_context *pipe, void *shader) +{ + struct svga_context *svga = svga_context(pipe); + struct svga_fragment_shader *fs = (struct svga_fragment_shader *) shader; + struct svga_shader_result *result, *tmp; + enum pipe_error ret; + + svga_hwtnl_flush_retry( svga ); + + for (result = fs->base.results; result; result = tmp ) { + tmp = result->next; + + ret = SVGA3D_DestroyShader(svga->swc, + result->id, + SVGA3D_SHADERTYPE_PS ); + if(ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = SVGA3D_DestroyShader(svga->swc, + result->id, + SVGA3D_SHADERTYPE_PS ); + assert(ret == PIPE_OK); + } + + svga_destroy_shader_result( result ); + } + + FREE((void *)fs->base.tokens); + FREE(fs); +} + + +void svga_init_fs_functions( struct svga_context *svga ) +{ + svga->pipe.create_fs_state = svga_create_fs_state; + svga->pipe.bind_fs_state = svga_bind_fs_state; + svga->pipe.delete_fs_state = svga_delete_fs_state; +} + diff --git a/src/gallium/drivers/svga/svga_pipe_misc.c b/src/gallium/drivers/svga/svga_pipe_misc.c new file mode 100644 index 0000000000..58cb1e6e23 --- /dev/null +++ b/src/gallium/drivers/svga/svga_pipe_misc.c @@ -0,0 +1,187 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "svga_cmd.h" + +#include "svga_context.h" +#include "svga_screen_texture.h" +#include "svga_state.h" +#include "svga_winsys.h" + +#include "svga_hw_reg.h" + + + + +static void svga_set_scissor_state( struct pipe_context *pipe, + const struct pipe_scissor_state *scissor ) +{ + struct svga_context *svga = svga_context(pipe); + + memcpy( &svga->curr.scissor, scissor, sizeof(*scissor) ); + svga->dirty |= SVGA_NEW_SCISSOR; +} + + +static void svga_set_polygon_stipple( struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple ) +{ + /* overridden by the draw module */ +} + + +void svga_cleanup_framebuffer(struct svga_context *svga) +{ + struct pipe_framebuffer_state *curr = &svga->curr.framebuffer; + struct pipe_framebuffer_state *hw = &svga->state.hw_clear.framebuffer; + int i; + + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { + pipe_surface_reference(&curr->cbufs[i], NULL); + pipe_surface_reference(&hw->cbufs[i], NULL); + } + + pipe_surface_reference(&curr->zsbuf, NULL); + pipe_surface_reference(&hw->zsbuf, NULL); +} + + +#define DEPTH_BIAS_SCALE_FACTOR_D16 ((float)(1<<15)) +#define DEPTH_BIAS_SCALE_FACTOR_D24S8 ((float)(1<<23)) +#define DEPTH_BIAS_SCALE_FACTOR_D32 ((float)(1<<31)) + + +static void svga_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct svga_context *svga = svga_context(pipe); + struct pipe_framebuffer_state *dst = &svga->curr.framebuffer; + boolean propagate = FALSE; + int i; + + dst->width = fb->width; + dst->height = fb->height; + dst->nr_cbufs = fb->nr_cbufs; + + /* check if we need to propaget any of the target surfaces */ + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { + if (dst->cbufs[i] && dst->cbufs[i] != fb->cbufs[i]) + if (svga_surface_needs_propagation(dst->cbufs[i])) + propagate = TRUE; + } + + if (propagate) { + /* make sure that drawing calls comes before propagation calls */ + svga_hwtnl_flush_retry( svga ); + + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) + if (dst->cbufs[i] && dst->cbufs[i] != fb->cbufs[i]) + svga_propagate_surface(pipe, dst->cbufs[i]); + } + + /* XXX: Actually the virtual hardware may support rendertargets with + * different size, depending on the host API and driver, but since we cannot + * know that make no such assumption here. */ + for(i = 0; i < fb->nr_cbufs; ++i) { + if (fb->zsbuf && fb->cbufs[i]) { + assert(fb->zsbuf->width == fb->cbufs[i]->width); + assert(fb->zsbuf->height == fb->cbufs[i]->height); + } + } + + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) + pipe_surface_reference(&dst->cbufs[i], fb->cbufs[i]); + pipe_surface_reference(&dst->zsbuf, fb->zsbuf); + + + if (svga->curr.framebuffer.zsbuf) + { + switch (svga->curr.framebuffer.zsbuf->format) { + case PIPE_FORMAT_Z16_UNORM: + svga->curr.depthscale = 1.0f / DEPTH_BIAS_SCALE_FACTOR_D16; + break; + case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_Z24X8_UNORM: + svga->curr.depthscale = 1.0f / DEPTH_BIAS_SCALE_FACTOR_D24S8; + break; + case PIPE_FORMAT_Z32_UNORM: + svga->curr.depthscale = 1.0f / DEPTH_BIAS_SCALE_FACTOR_D32; + break; + case PIPE_FORMAT_Z32_FLOAT: + svga->curr.depthscale = 1.0f / ((float)(1<<23)); + break; + default: + svga->curr.depthscale = 0.0f; + break; + } + } + else { + svga->curr.depthscale = 0.0f; + } + + svga->dirty |= SVGA_NEW_FRAME_BUFFER; +} + + + +static void svga_set_clip_state( struct pipe_context *pipe, + const struct pipe_clip_state *clip ) +{ + struct svga_context *svga = svga_context(pipe); + + svga->curr.clip = *clip; /* struct copy */ + + svga->dirty |= SVGA_NEW_CLIP; +} + + + +/* Called when driver state tracker notices changes to the viewport + * matrix: + */ +static void svga_set_viewport_state( struct pipe_context *pipe, + const struct pipe_viewport_state *viewport ) +{ + struct svga_context *svga = svga_context(pipe); + + svga->curr.viewport = *viewport; /* struct copy */ + + svga->dirty |= SVGA_NEW_VIEWPORT; +} + + + +void svga_init_misc_functions( struct svga_context *svga ) +{ + svga->pipe.set_scissor_state = svga_set_scissor_state; + svga->pipe.set_polygon_stipple = svga_set_polygon_stipple; + svga->pipe.set_framebuffer_state = svga_set_framebuffer_state; + svga->pipe.set_clip_state = svga_set_clip_state; + svga->pipe.set_viewport_state = svga_set_viewport_state; +} + + diff --git a/src/gallium/drivers/svga/svga_pipe_query.c b/src/gallium/drivers/svga/svga_pipe_query.c new file mode 100644 index 0000000000..01336b0a2c --- /dev/null +++ b/src/gallium/drivers/svga/svga_pipe_query.c @@ -0,0 +1,267 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "pipe/p_state.h" +#include "pipe/p_context.h" +#include "util/u_memory.h" + +#include "svga_cmd.h" +#include "svga_context.h" +#include "svga_screen.h" +#include "svga_screen_buffer.h" +#include "svga_winsys.h" +#include "svga_draw.h" +#include "svga_debug.h" + + +/* Fixme: want a public base class for all pipe structs, even if there + * isn't much in them. + */ +struct pipe_query { + int dummy; +}; + +struct svga_query { + struct pipe_query base; + SVGA3dQueryType type; + struct svga_winsys_buffer *hwbuf; + volatile SVGA3dQueryResult *queryResult; + struct pipe_fence_handle *fence; +}; + +/*********************************************************************** + * Inline conversion functions. These are better-typed than the + * macros used previously: + */ +static INLINE struct svga_query * +svga_query( struct pipe_query *q ) +{ + return (struct svga_query *)q; +} + +static boolean svga_get_query_result(struct pipe_context *pipe, + struct pipe_query *q, + boolean wait, + uint64_t *result); + +static struct pipe_query *svga_create_query( struct pipe_context *pipe, + unsigned query_type ) +{ + struct svga_screen *svgascreen = svga_screen(pipe->screen); + struct svga_winsys_screen *sws = svgascreen->sws; + struct svga_query *sq; + + SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__); + + sq = CALLOC_STRUCT(svga_query); + if (!sq) + goto no_sq; + + sq->type = SVGA3D_QUERYTYPE_OCCLUSION; + + sq->hwbuf = svga_winsys_buffer_create(svgascreen, + 1, + SVGA_BUFFER_USAGE_PINNED, + sizeof *sq->queryResult); + if(!sq->hwbuf) + goto no_hwbuf; + + sq->queryResult = (SVGA3dQueryResult *)sws->buffer_map(sws, + sq->hwbuf, + PIPE_BUFFER_USAGE_CPU_WRITE); + if(!sq->queryResult) + goto no_query_result; + + sq->queryResult->totalSize = sizeof *sq->queryResult; + sq->queryResult->state = SVGA3D_QUERYSTATE_NEW; + + /* + * We request the buffer to be pinned and assume it is always mapped. + * + * The reason is that we don't want to wait for fences when checking the + * query status. + */ + sws->buffer_unmap(sws, sq->hwbuf); + + return &sq->base; + +no_query_result: + sws->buffer_destroy(sws, sq->hwbuf); +no_hwbuf: + FREE(sq); +no_sq: + return NULL; +} + +static void svga_destroy_query(struct pipe_context *pipe, + struct pipe_query *q) +{ + struct svga_screen *svgascreen = svga_screen(pipe->screen); + struct svga_winsys_screen *sws = svgascreen->sws; + struct svga_query *sq = svga_query( q ); + + SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__); + sws->buffer_destroy(sws, sq->hwbuf); + sws->fence_reference(sws, &sq->fence, NULL); + FREE(sq); +} + +static void svga_begin_query(struct pipe_context *pipe, + struct pipe_query *q) +{ + struct svga_screen *svgascreen = svga_screen(pipe->screen); + struct svga_winsys_screen *sws = svgascreen->sws; + struct svga_context *svga = svga_context( pipe ); + struct svga_query *sq = svga_query( q ); + enum pipe_error ret; + + SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__); + + assert(!svga->sq); + + /* Need to flush out buffered drawing commands so that they don't + * get counted in the query results. + */ + svga_hwtnl_flush_retry(svga); + + if(sq->queryResult->state == SVGA3D_QUERYSTATE_PENDING) { + /* The application doesn't care for the pending query result. We cannot + * let go the existing buffer and just get a new one because its storage + * may be reused for other purposes and clobbered by the host when it + * determines the query result. So the only option here is to wait for + * the existing query's result -- not a big deal, given that no sane + * application would do this. + */ + uint64_t result; + + svga_get_query_result(pipe, q, TRUE, &result); + + assert(sq->queryResult->state != SVGA3D_QUERYSTATE_PENDING); + } + + sq->queryResult->state = SVGA3D_QUERYSTATE_NEW; + sws->fence_reference(sws, &sq->fence, NULL); + + ret = SVGA3D_BeginQuery(svga->swc, sq->type); + if(ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = SVGA3D_BeginQuery(svga->swc, sq->type); + assert(ret == PIPE_OK); + } + + svga->sq = sq; +} + +static void svga_end_query(struct pipe_context *pipe, + struct pipe_query *q) +{ + struct svga_context *svga = svga_context( pipe ); + struct svga_query *sq = svga_query( q ); + enum pipe_error ret; + + SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__); + assert(svga->sq == sq); + + svga_hwtnl_flush_retry(svga); + + /* Set to PENDING before sending EndQuery. */ + sq->queryResult->state = SVGA3D_QUERYSTATE_PENDING; + + ret = SVGA3D_EndQuery( svga->swc, sq->type, sq->hwbuf); + if(ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = SVGA3D_EndQuery( svga->swc, sq->type, sq->hwbuf); + assert(ret == PIPE_OK); + } + + /* TODO: Delay flushing. We don't really need to flush here, just ensure + * that there is one flush before svga_get_query_result attempts to get the + * result */ + svga_context_flush(svga, NULL); + + svga->sq = NULL; +} + +static boolean svga_get_query_result(struct pipe_context *pipe, + struct pipe_query *q, + boolean wait, + uint64_t *result) +{ + struct svga_context *svga = svga_context( pipe ); + struct svga_screen *svgascreen = svga_screen( pipe->screen ); + struct svga_winsys_screen *sws = svgascreen->sws; + struct svga_query *sq = svga_query( q ); + SVGA3dQueryState state; + + SVGA_DBG(DEBUG_QUERY, "%s wait: %d\n", __FUNCTION__); + + /* The query status won't be updated by the host unless + * SVGA_3D_CMD_WAIT_FOR_QUERY is emitted. Unfortunately this will cause a + * synchronous wait on the host */ + if(!sq->fence) { + enum pipe_error ret; + + ret = SVGA3D_WaitForQuery( svga->swc, sq->type, sq->hwbuf); + if(ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = SVGA3D_WaitForQuery( svga->swc, sq->type, sq->hwbuf); + assert(ret == PIPE_OK); + } + + svga_context_flush(svga, &sq->fence); + + assert(sq->fence); + } + + state = sq->queryResult->state; + if(state == SVGA3D_QUERYSTATE_PENDING) { + if(!wait) + return FALSE; + + sws->fence_finish(sws, sq->fence, 0); + + state = sq->queryResult->state; + } + + assert(state == SVGA3D_QUERYSTATE_SUCCEEDED || + state == SVGA3D_QUERYSTATE_FAILED); + + *result = (uint64_t)sq->queryResult->result32; + + SVGA_DBG(DEBUG_QUERY, "%s result %d\n", __FUNCTION__, (unsigned)*result); + + return TRUE; +} + + + +void svga_init_query_functions( struct svga_context *svga ) +{ + svga->pipe.create_query = svga_create_query; + svga->pipe.destroy_query = svga_destroy_query; + svga->pipe.begin_query = svga_begin_query; + svga->pipe.end_query = svga_end_query; + svga->pipe.get_query_result = svga_get_query_result; +} diff --git a/src/gallium/drivers/svga/svga_pipe_rasterizer.c b/src/gallium/drivers/svga/svga_pipe_rasterizer.c new file mode 100644 index 0000000000..b03f8eb9cf --- /dev/null +++ b/src/gallium/drivers/svga/svga_pipe_rasterizer.c @@ -0,0 +1,250 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "draw/draw_context.h" +#include "pipe/p_inlines.h" +#include "pipe/p_defines.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "svga_context.h" +#include "svga_state.h" + +#include "svga_hw_reg.h" + +/* Hardware frontwinding is always set up as SVGA3D_FRONTWINDING_CW. + */ +static SVGA3dFace svga_translate_cullmode( unsigned mode, + unsigned front_winding ) +{ + switch (mode) { + case PIPE_WINDING_NONE: + return SVGA3D_FACE_NONE; + case PIPE_WINDING_CCW: + return SVGA3D_FACE_BACK; + case PIPE_WINDING_CW: + return SVGA3D_FACE_FRONT; + case PIPE_WINDING_BOTH: + return SVGA3D_FACE_FRONT_BACK; + default: + assert(0); + return SVGA3D_FACE_NONE; + } +} + +static SVGA3dShadeMode svga_translate_flatshade( unsigned mode ) +{ + return mode ? SVGA3D_SHADEMODE_FLAT : SVGA3D_SHADEMODE_SMOOTH; +} + + +static void * +svga_create_rasterizer_state(struct pipe_context *pipe, + const struct pipe_rasterizer_state *templ) +{ + struct svga_rasterizer_state *rast = CALLOC_STRUCT( svga_rasterizer_state ); + /* need this for draw module. */ + rast->templ = *templ; + + /* light_twoside - XXX: need fragment shader varient */ + /* poly_smooth - XXX: no fallback available */ + /* poly_stipple_enable - draw module */ + /* point_sprite - ? */ + /* point_size_per_vertex - ? */ + /* sprite_coord_mode - ??? */ + /* bypass_vs_viewport_and_clip - handled by viewport setup */ + /* flatshade_first - handled by index translation */ + /* gl_rasterization_rules - XXX - viewport code */ + /* line_width - draw module */ + /* fill_cw, fill_ccw - draw module or index translation */ + + rast->shademode = svga_translate_flatshade( templ->flatshade ); + rast->cullmode = svga_translate_cullmode( templ->cull_mode, + templ->front_winding ); + rast->scissortestenable = templ->scissor; + rast->multisampleantialias = templ->multisample; + rast->antialiasedlineenable = templ->line_smooth; + rast->lastpixel = templ->line_last_pixel; + rast->pointsize = templ->point_size; + rast->pointsize_min = templ->point_size_min; + rast->pointsize_max = templ->point_size_max; + rast->hw_unfilled = PIPE_POLYGON_MODE_FILL; + + /* Use swtnl + decomposition implement these: + */ + if (templ->poly_stipple_enable) + rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS; + + if (templ->line_width != 1.0 && + templ->line_width != 0.0) + rast->need_pipeline |= SVGA_PIPELINE_FLAG_LINES; + + if (templ->line_stipple_enable) { + /* LinePattern not implemented on all backends. + */ + if (0) { + SVGA3dLinePattern lp; + lp.repeat = templ->line_stipple_factor + 1; + lp.pattern = templ->line_stipple_pattern; + rast->linepattern = lp.uintValue; + } + else { + rast->need_pipeline |= SVGA_PIPELINE_FLAG_LINES; + } + } + + if (templ->point_smooth) + rast->need_pipeline |= SVGA_PIPELINE_FLAG_POINTS; + + { + boolean offset_cw = templ->offset_cw; + boolean offset_ccw = templ->offset_ccw; + boolean offset = 0; + int fill_cw = templ->fill_cw; + int fill_ccw = templ->fill_ccw; + int fill = PIPE_POLYGON_MODE_FILL; + + switch (templ->cull_mode) { + case PIPE_WINDING_BOTH: + offset = 0; + fill = PIPE_POLYGON_MODE_FILL; + break; + + case PIPE_WINDING_CW: + offset = offset_ccw; + fill = fill_ccw; + break; + + case PIPE_WINDING_CCW: + offset = offset_cw; + fill = fill_cw; + break; + + case PIPE_WINDING_NONE: + if (fill_cw != fill_ccw || offset_cw != offset_ccw) + { + /* Always need the draw module to work out different + * front/back fill modes: + */ + rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS; + } + else { + offset = offset_ccw; + fill = fill_ccw; + } + break; + + default: + assert(0); + break; + } + + /* Unfilled primitive modes aren't implemented on all virtual + * hardware. We can do some unfilled processing with index + * translation, but otherwise need the draw module: + */ + if (fill != PIPE_POLYGON_MODE_FILL && + (templ->flatshade || + templ->light_twoside || + offset || + templ->cull_mode != PIPE_WINDING_NONE)) + { + fill = PIPE_POLYGON_MODE_FILL; + rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS; + } + + /* If we are decomposing to lines, and lines need the pipeline, + * then we also need the pipeline for tris. + */ + if (fill == PIPE_POLYGON_MODE_LINE && + (rast->need_pipeline & SVGA_PIPELINE_FLAG_LINES)) + { + fill = PIPE_POLYGON_MODE_FILL; + rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS; + } + + /* Similarly for points: + */ + if (fill == PIPE_POLYGON_MODE_POINT && + (rast->need_pipeline & SVGA_PIPELINE_FLAG_POINTS)) + { + fill = PIPE_POLYGON_MODE_FILL; + rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS; + } + + if (offset) { + rast->slopescaledepthbias = templ->offset_scale; + rast->depthbias = templ->offset_units; + } + + rast->hw_unfilled = fill; + } + + + + + if (rast->need_pipeline & SVGA_PIPELINE_FLAG_TRIS) { + /* Turn off stuff which will get done in the draw module: + */ + rast->hw_unfilled = PIPE_POLYGON_MODE_FILL; + rast->slopescaledepthbias = 0; + rast->depthbias = 0; + } + + return rast; +} + +static void svga_bind_rasterizer_state( struct pipe_context *pipe, + void *state ) +{ + struct svga_context *svga = svga_context(pipe); + struct svga_rasterizer_state *raster = (struct svga_rasterizer_state *)state; + + svga->curr.rast = raster; + + draw_set_rasterizer_state(svga->swtnl.draw, raster ? &raster->templ : NULL); + + svga->dirty |= SVGA_NEW_RAST; +} + +static void svga_delete_rasterizer_state(struct pipe_context *pipe, + void *raster) +{ + FREE(raster); +} + + +void svga_init_rasterizer_functions( struct svga_context *svga ) +{ + svga->pipe.create_rasterizer_state = svga_create_rasterizer_state; + svga->pipe.bind_rasterizer_state = svga_bind_rasterizer_state; + svga->pipe.delete_rasterizer_state = svga_delete_rasterizer_state; +} + + +/*********************************************************************** + * Hardware state update + */ + diff --git a/src/gallium/drivers/svga/svga_pipe_sampler.c b/src/gallium/drivers/svga/svga_pipe_sampler.c new file mode 100644 index 0000000000..b4e57c5d15 --- /dev/null +++ b/src/gallium/drivers/svga/svga_pipe_sampler.c @@ -0,0 +1,243 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "pipe/p_inlines.h" +#include "pipe/p_defines.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_pack_color.h" +#include "tgsi/tgsi_parse.h" + +#include "svga_context.h" +#include "svga_screen_texture.h" +#include "svga_state.h" + +#include "svga_hw_reg.h" + +#include "svga_debug.h" + +static INLINE unsigned +translate_wrap_mode(unsigned wrap) +{ + switch (wrap) { + case PIPE_TEX_WRAP_REPEAT: + return SVGA3D_TEX_ADDRESS_WRAP; + + case PIPE_TEX_WRAP_CLAMP: + return SVGA3D_TEX_ADDRESS_CLAMP; + + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + /* Unfortunately SVGA3D_TEX_ADDRESS_EDGE not respected by + * hardware. + */ + return SVGA3D_TEX_ADDRESS_CLAMP; + + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + return SVGA3D_TEX_ADDRESS_BORDER; + + case PIPE_TEX_WRAP_MIRROR_REPEAT: + return SVGA3D_TEX_ADDRESS_MIRROR; + + case PIPE_TEX_WRAP_MIRROR_CLAMP: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + return SVGA3D_TEX_ADDRESS_MIRRORONCE; + + default: + assert(0); + return SVGA3D_TEX_ADDRESS_WRAP; + } +} + +static INLINE unsigned translate_img_filter( unsigned filter ) +{ + switch (filter) { + case PIPE_TEX_FILTER_NEAREST: return SVGA3D_TEX_FILTER_NEAREST; + case PIPE_TEX_FILTER_LINEAR: return SVGA3D_TEX_FILTER_LINEAR; + case PIPE_TEX_FILTER_ANISO: return SVGA3D_TEX_FILTER_ANISOTROPIC; + default: + assert(0); + return SVGA3D_TEX_FILTER_NEAREST; + } +} + +static INLINE unsigned translate_mip_filter( unsigned filter ) +{ + switch (filter) { + case PIPE_TEX_MIPFILTER_NONE: return SVGA3D_TEX_FILTER_NONE; + case PIPE_TEX_MIPFILTER_NEAREST: return SVGA3D_TEX_FILTER_NEAREST; + case PIPE_TEX_MIPFILTER_LINEAR: return SVGA3D_TEX_FILTER_LINEAR; + default: + assert(0); + return SVGA3D_TEX_FILTER_NONE; + } +} + +static void * +svga_create_sampler_state(struct pipe_context *pipe, + const struct pipe_sampler_state *sampler) +{ + struct svga_context *svga = svga_context(pipe); + struct svga_sampler_state *cso = CALLOC_STRUCT( svga_sampler_state ); + + cso->mipfilter = translate_mip_filter(sampler->min_mip_filter); + cso->magfilter = translate_img_filter( sampler->mag_img_filter ); + cso->minfilter = translate_img_filter( sampler->min_img_filter ); + cso->aniso_level = MAX2( (unsigned) sampler->max_anisotropy, 1 ); + cso->lod_bias = sampler->lod_bias; + cso->addressu = translate_wrap_mode(sampler->wrap_s); + cso->addressv = translate_wrap_mode(sampler->wrap_t); + cso->addressw = translate_wrap_mode(sampler->wrap_r); + cso->normalized_coords = sampler->normalized_coords; + cso->compare_mode = sampler->compare_mode; + cso->compare_func = sampler->compare_func; + + { + ubyte r = float_to_ubyte(sampler->border_color[0]); + ubyte g = float_to_ubyte(sampler->border_color[1]); + ubyte b = float_to_ubyte(sampler->border_color[2]); + ubyte a = float_to_ubyte(sampler->border_color[3]); + + util_pack_color_ub( r, g, b, a, + PIPE_FORMAT_B8G8R8A8_UNORM, + &cso->bordercolor ); + } + + /* No SVGA3D support for: + * - min/max LOD clamping + */ + cso->min_lod = 0; + cso->view_min_lod = MAX2(sampler->min_lod, 0); + cso->view_max_lod = MAX2(sampler->max_lod, 0); + + /* Use min_mipmap */ + if (svga->debug.use_min_mipmap) { + if (cso->view_min_lod == cso->view_max_lod) { + cso->min_lod = cso->view_min_lod; + cso->view_min_lod = 0; + cso->view_max_lod = 1000; /* Just a high number */ + cso->mipfilter = SVGA3D_TEX_FILTER_NONE; + } + } + + SVGA_DBG(DEBUG_VIEWS, "min %u, view(min %u, max %u) lod, mipfilter %s\n", + cso->min_lod, cso->view_min_lod, cso->view_max_lod, + cso->mipfilter == SVGA3D_TEX_FILTER_NONE ? "SVGA3D_TEX_FILTER_NONE" : "SOMETHING"); + + return cso; +} + +static void svga_bind_sampler_states(struct pipe_context *pipe, + unsigned num, void **sampler) +{ + struct svga_context *svga = svga_context(pipe); + unsigned i; + + assert(num <= PIPE_MAX_SAMPLERS); + + /* Check for no-op */ + if (num == svga->curr.num_samplers && + !memcmp(svga->curr.sampler, sampler, num * sizeof(void *))) { + debug_printf("sampler noop\n"); + return; + } + + for (i = 0; i < num; i++) + svga->curr.sampler[i] = sampler[i]; + + for (i = num; i < svga->curr.num_samplers; i++) + svga->curr.sampler[i] = NULL; + + svga->curr.num_samplers = num; + svga->dirty |= SVGA_NEW_SAMPLER; +} + +static void svga_delete_sampler_state(struct pipe_context *pipe, + void *sampler) +{ + FREE(sampler); +} + + +static void svga_set_sampler_textures(struct pipe_context *pipe, + unsigned num, + struct pipe_texture **texture) +{ + struct svga_context *svga = svga_context(pipe); + unsigned flag_1d = 0; + unsigned flag_srgb = 0; + uint i; + + assert(num <= PIPE_MAX_SAMPLERS); + + /* Check for no-op */ + if (num == svga->curr.num_textures && + !memcmp(svga->curr.texture, texture, num * sizeof(struct pipe_texture *))) { + if (0) debug_printf("texture noop\n"); + return; + } + + for (i = 0; i < num; i++) { + pipe_texture_reference(&svga->curr.texture[i], + texture[i]); + + if (!texture[i]) + continue; + + if (texture[i]->format == PIPE_FORMAT_A8R8G8B8_SRGB) + flag_srgb |= 1 << i; + + if (texture[i]->target == PIPE_TEXTURE_1D) + flag_1d |= 1 << i; + } + + for (i = num; i < svga->curr.num_textures; i++) + pipe_texture_reference(&svga->curr.texture[i], + NULL); + + svga->curr.num_textures = num; + svga->dirty |= SVGA_NEW_TEXTURE_BINDING; + + if (flag_srgb != svga->curr.tex_flags.flag_srgb || + flag_1d != svga->curr.tex_flags.flag_1d) + { + svga->dirty |= SVGA_NEW_TEXTURE_FLAGS; + svga->curr.tex_flags.flag_1d = flag_1d; + svga->curr.tex_flags.flag_srgb = flag_srgb; + } +} + + + +void svga_init_sampler_functions( struct svga_context *svga ) +{ + svga->pipe.create_sampler_state = svga_create_sampler_state; + svga->pipe.bind_fragment_sampler_states = svga_bind_sampler_states; + svga->pipe.delete_sampler_state = svga_delete_sampler_state; + svga->pipe.set_fragment_sampler_textures = svga_set_sampler_textures; +} + + + diff --git a/src/gallium/drivers/svga/svga_pipe_vertex.c b/src/gallium/drivers/svga/svga_pipe_vertex.c new file mode 100644 index 0000000000..28e2787e0d --- /dev/null +++ b/src/gallium/drivers/svga/svga_pipe_vertex.c @@ -0,0 +1,115 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "pipe/p_inlines.h" +#include "pipe/p_defines.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "tgsi/tgsi_parse.h" + +#include "svga_screen.h" +#include "svga_screen_buffer.h" +#include "svga_context.h" +#include "svga_state.h" +#include "svga_winsys.h" + +#include "svga_hw_reg.h" + + +static void svga_set_vertex_buffers(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_buffer *buffers) +{ + struct svga_context *svga = svga_context(pipe); + unsigned i; + boolean any_user_buffer = FALSE; + + /* Check for no change */ + if (count == svga->curr.num_vertex_buffers && + memcmp(svga->curr.vb, buffers, count * sizeof buffers[0]) == 0) + return; + + /* Adjust refcounts */ + for (i = 0; i < count; i++) { + pipe_buffer_reference(&svga->curr.vb[i].buffer, buffers[i].buffer); + if (svga_buffer(buffers[i].buffer)->user) + any_user_buffer = TRUE; + } + + for ( ; i < svga->curr.num_vertex_buffers; i++) + pipe_buffer_reference(&svga->curr.vb[i].buffer, NULL); + + /* Copy remaining data */ + memcpy(svga->curr.vb, buffers, count * sizeof buffers[0]); + svga->curr.num_vertex_buffers = count; + svga->curr.any_user_vertex_buffers = any_user_buffer; + + svga->dirty |= SVGA_NEW_VBUFFER; +} + +static void svga_set_vertex_elements(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_element *elements) +{ + struct svga_context *svga = svga_context(pipe); + unsigned i; + + for (i = 0; i < count; i++) + svga->curr.ve[i] = elements[i]; + + svga->curr.num_vertex_elements = count; + svga->dirty |= SVGA_NEW_VELEMENT; +} + + +static void svga_set_edgeflags(struct pipe_context *pipe, + const unsigned *bitfield) +{ + struct svga_context *svga = svga_context(pipe); + + if (bitfield != NULL || svga->curr.edgeflags != NULL) { + svga->curr.edgeflags = bitfield; + svga->dirty |= SVGA_NEW_EDGEFLAGS; + } +} + + +void svga_cleanup_vertex_state( struct svga_context *svga ) +{ + unsigned i; + + for (i = 0 ; i < svga->curr.num_vertex_buffers; i++) + pipe_buffer_reference(&svga->curr.vb[i].buffer, NULL); +} + + +void svga_init_vertex_functions( struct svga_context *svga ) +{ + svga->pipe.set_vertex_buffers = svga_set_vertex_buffers; + svga->pipe.set_vertex_elements = svga_set_vertex_elements; + svga->pipe.set_edgeflags = svga_set_edgeflags; +} + + diff --git a/src/gallium/drivers/svga/svga_pipe_vs.c b/src/gallium/drivers/svga/svga_pipe_vs.c new file mode 100644 index 0000000000..c104c41f5f --- /dev/null +++ b/src/gallium/drivers/svga/svga_pipe_vs.c @@ -0,0 +1,189 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "draw/draw_context.h" +#include "pipe/p_inlines.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_text.h" + +#include "svga_screen.h" +#include "svga_context.h" +#include "svga_state.h" +#include "svga_tgsi.h" +#include "svga_hw_reg.h" +#include "svga_cmd.h" +#include "svga_debug.h" + + +static const struct tgsi_token *substitute_vs( + unsigned shader_id, + const struct tgsi_token *old_tokens ) +{ +#if 0 + if (shader_id == 12) { + static struct tgsi_token tokens[300]; + + const char *text = + "VERT\n" + "DCL IN[0]\n" + "DCL IN[1]\n" + "DCL IN[2]\n" + "DCL OUT[0], POSITION\n" + "DCL TEMP[0..4]\n" + "IMM FLT32 { 1.0000, 1.0000, 1.0000, 1.0000 }\n" + "IMM FLT32 { 0.45, 1.0000, 1.0000, 1.0000 }\n" + "IMM FLT32 { 1.297863, 0.039245, 0.035993, 0.035976}\n" + "IMM FLT32 { -0.019398, 1.696131, -0.202151, -0.202050 }\n" + "IMM FLT32 { 0.051711, -0.348713, -0.979204, -0.978714 }\n" + "IMM FLT32 { 0.000000, 0.000003, 139.491577, 141.421356 }\n" + "DCL CONST[0..7]\n" + "DCL CONST[9..16]\n" + " MOV TEMP[2], IMM[0]\n" + + " MOV TEMP[2].xyz, IN[2]\n" + " MOV TEMP[2].xyz, IN[0]\n" + " MOV TEMP[2].xyz, IN[1]\n" + + " MUL TEMP[1], IMM[3], TEMP[2].yyyy\n" + " MAD TEMP[3], IMM[2], TEMP[2].xxxx, TEMP[1]\n" + " MAD TEMP[1], IMM[4], TEMP[2].zzzz, TEMP[3]\n" + " MAD TEMP[4], IMM[5], TEMP[2].wwww, TEMP[1]\n" + + " MOV OUT[0], TEMP[4]\n" + " END\n"; + + if (!tgsi_text_translate( text, + tokens, + Elements(tokens) )) + { + assert(0); + return NULL; + } + + return tokens; + } +#endif + + return old_tokens; +} + + +/*********************************************************************** + * Vertex shaders + */ + +static void * +svga_create_vs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct svga_context *svga = svga_context(pipe); + struct svga_screen *svgascreen = svga_screen(pipe->screen); + struct svga_vertex_shader *vs = CALLOC_STRUCT(svga_vertex_shader); + if (!vs) + return NULL; + + /* substitute a debug shader? + */ + vs->base.tokens = tgsi_dup_tokens(substitute_vs(svga->debug.shader_id, + templ->tokens)); + + + /* Collect basic info that we'll need later: + */ + tgsi_scan_shader(vs->base.tokens, &vs->base.info); + + { + /* Need to do construct a new template in case we substitued a + * debug shader. + */ + struct pipe_shader_state tmp2 = *templ; + tmp2.tokens = vs->base.tokens; + vs->draw_shader = draw_create_vertex_shader(svga->swtnl.draw, &tmp2); + } + + vs->base.id = svga->debug.shader_id++; + vs->base.use_sm30 = svgascreen->use_vs30; + + if (SVGA_DEBUG & DEBUG_TGSI || 0) { + debug_printf("%s id: %u, inputs: %u, outputs: %u\n", + __FUNCTION__, vs->base.id, + vs->base.info.num_inputs, vs->base.info.num_outputs); + } + + return vs; +} + +static void svga_bind_vs_state(struct pipe_context *pipe, void *shader) +{ + struct svga_vertex_shader *vs = (struct svga_vertex_shader *)shader; + struct svga_context *svga = svga_context(pipe); + + svga->curr.vs = vs; + svga->dirty |= SVGA_NEW_VS; +} + + +static void svga_delete_vs_state(struct pipe_context *pipe, void *shader) +{ + struct svga_context *svga = svga_context(pipe); + struct svga_vertex_shader *vs = (struct svga_vertex_shader *)shader; + struct svga_shader_result *result, *tmp; + enum pipe_error ret; + + svga_hwtnl_flush_retry( svga ); + + draw_delete_vertex_shader(svga->swtnl.draw, vs->draw_shader); + + for (result = vs->base.results; result; result = tmp ) { + tmp = result->next; + + ret = SVGA3D_DestroyShader(svga->swc, + result->id, + SVGA3D_SHADERTYPE_VS ); + if(ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = SVGA3D_DestroyShader(svga->swc, + result->id, + SVGA3D_SHADERTYPE_VS ); + assert(ret == PIPE_OK); + } + + svga_destroy_shader_result( result ); + } + + FREE((void *)vs->base.tokens); + FREE(vs); +} + + +void svga_init_vs_functions( struct svga_context *svga ) +{ + svga->pipe.create_vs_state = svga_create_vs_state; + svga->pipe.bind_vs_state = svga_bind_vs_state; + svga->pipe.delete_vs_state = svga_delete_vs_state; +} + diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c new file mode 100644 index 0000000000..fc1b3c980e --- /dev/null +++ b/src/gallium/drivers/svga/svga_screen.c @@ -0,0 +1,440 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "util/u_memory.h" +#include "pipe/p_inlines.h" +#include "util/u_string.h" +#include "util/u_math.h" + +#include "svga_winsys.h" +#include "svga_context.h" +#include "svga_screen.h" +#include "svga_screen_texture.h" +#include "svga_screen_buffer.h" +#include "svga_cmd.h" +#include "svga_debug.h" + +#include "svga_hw_reg.h" +#include "svga3d_shaderdefs.h" + + +#ifdef DEBUG +int SVGA_DEBUG = 0; + +static const struct debug_named_value svga_debug_flags[] = { + { "dma", DEBUG_DMA }, + { "tgsi", DEBUG_TGSI }, + { "pipe", DEBUG_PIPE }, + { "state", DEBUG_STATE }, + { "screen", DEBUG_SCREEN }, + { "tex", DEBUG_TEX }, + { "swtnl", DEBUG_SWTNL }, + { "const", DEBUG_CONSTS }, + { "viewport", DEBUG_VIEWPORT }, + { "views", DEBUG_VIEWS }, + { "perf", DEBUG_PERF }, + { "flush", DEBUG_FLUSH }, + { "sync", DEBUG_SYNC }, + { "cache", DEBUG_CACHE }, + {NULL, 0} +}; +#endif + +static const char * +svga_get_vendor( struct pipe_screen *pscreen ) +{ + return "VMware, Inc."; +} + + +static const char * +svga_get_name( struct pipe_screen *pscreen ) +{ +#ifdef DEBUG + /* Only return internal details in the DEBUG version: + */ + return "SVGA3D; build: DEBUG; mutex: " PIPE_ATOMIC; +#else + return "SVGA3D; build: RELEASE; "; +#endif +} + + + + +static float +svga_get_paramf(struct pipe_screen *screen, int param) +{ + struct svga_screen *svgascreen = svga_screen(screen); + struct svga_winsys_screen *sws = svgascreen->sws; + SVGA3dDevCapResult result; + + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 7.0; + + case PIPE_CAP_MAX_POINT_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_POINT_WIDTH_AA: + /* Keep this to a reasonable size to avoid failures in + * conform/pntaa.c: + */ + return 80.0; + + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 4.0; + + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 16.0; + + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return 16; + case PIPE_CAP_NPOT_TEXTURES: + return 1; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_GLSL: + return svgascreen->use_ps30 && svgascreen->use_vs30; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 1; + case PIPE_CAP_POINT_SPRITE: + return 1; + case PIPE_CAP_MAX_RENDER_TARGETS: + if(!sws->get_cap(sws, SVGA3D_DEVCAP_MAX_RENDER_TARGETS, &result)) + return 1; + if(!result.u) + return 1; + return MIN2(result.u, PIPE_MAX_COLOR_BUFS); + case PIPE_CAP_OCCLUSION_QUERY: + return 1; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 1; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return SVGA_MAX_TEXTURE_LEVELS; + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 8; /* max 128x128x128 */ + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return SVGA_MAX_TEXTURE_LEVELS; + + case PIPE_CAP_TEXTURE_MIRROR_REPEAT: /* req. for GL 1.4 */ + return 1; + + case PIPE_CAP_BLEND_EQUATION_SEPARATE: /* req. for GL 1.5 */ + return 1; + + default: + return 0; + } +} + + +/* This is a fairly pointless interface + */ +static int +svga_get_param(struct pipe_screen *screen, int param) +{ + return (int) svga_get_paramf( screen, param ); +} + + +static INLINE SVGA3dDevCapIndex +svga_translate_format_cap(enum pipe_format format) +{ + switch(format) { + + case PIPE_FORMAT_A8R8G8B8_UNORM: + return SVGA3D_DEVCAP_SURFACEFMT_A8R8G8B8; + case PIPE_FORMAT_X8R8G8B8_UNORM: + return SVGA3D_DEVCAP_SURFACEFMT_X8R8G8B8; + + case PIPE_FORMAT_R5G6B5_UNORM: + return SVGA3D_DEVCAP_SURFACEFMT_R5G6B5; + case PIPE_FORMAT_A1R5G5B5_UNORM: + return SVGA3D_DEVCAP_SURFACEFMT_A1R5G5B5; + case PIPE_FORMAT_A4R4G4B4_UNORM: + return SVGA3D_DEVCAP_SURFACEFMT_A4R4G4B4; + + case PIPE_FORMAT_Z16_UNORM: + return SVGA3D_DEVCAP_SURFACEFMT_Z_D16; + case PIPE_FORMAT_Z24S8_UNORM: + return SVGA3D_DEVCAP_SURFACEFMT_Z_D24S8; + case PIPE_FORMAT_Z24X8_UNORM: + return SVGA3D_DEVCAP_SURFACEFMT_Z_D24X8; + + case PIPE_FORMAT_A8_UNORM: + return SVGA3D_DEVCAP_SURFACEFMT_ALPHA8; + case PIPE_FORMAT_L8_UNORM: + return SVGA3D_DEVCAP_SURFACEFMT_LUMINANCE8; + + case PIPE_FORMAT_DXT1_RGB: + case PIPE_FORMAT_DXT1_RGBA: + return SVGA3D_DEVCAP_SURFACEFMT_DXT1; + case PIPE_FORMAT_DXT3_RGBA: + return SVGA3D_DEVCAP_SURFACEFMT_DXT3; + case PIPE_FORMAT_DXT5_RGBA: + return SVGA3D_DEVCAP_SURFACEFMT_DXT5; + + default: + return SVGA3D_DEVCAP_MAX; + } +} + + +static boolean +svga_is_format_supported( struct pipe_screen *screen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, + unsigned geom_flags ) +{ + struct svga_winsys_screen *sws = svga_screen(screen)->sws; + SVGA3dDevCapIndex index; + SVGA3dDevCapResult result; + + assert(tex_usage); + + /* Override host capabilities */ + if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { + switch(format) { + + /* Often unsupported/problematic. This means we end up with the same + * visuals for all virtual hardware implementations. + */ + case PIPE_FORMAT_A4R4G4B4_UNORM: + case PIPE_FORMAT_A1R5G5B5_UNORM: + return FALSE; + + /* Simulate ability to render into compressed textures */ + case PIPE_FORMAT_DXT1_RGB: + case PIPE_FORMAT_DXT1_RGBA: + case PIPE_FORMAT_DXT3_RGBA: + case PIPE_FORMAT_DXT5_RGBA: + return TRUE; + + default: + break; + } + } + + /* Try to query the host */ + index = svga_translate_format_cap(format); + if( index < SVGA3D_DEVCAP_MAX && + sws->get_cap(sws, index, &result) ) + { + SVGA3dSurfaceFormatCaps mask; + + mask.value = 0; + if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) + mask.offscreenRenderTarget = 1; + if (tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL) + mask.zStencil = 1; + if (tex_usage & PIPE_TEXTURE_USAGE_SAMPLER) + mask.texture = 1; + + if ((result.u & mask.value) == mask.value) + return TRUE; + else + return FALSE; + } + + /* Use our translate functions directly rather than relying on a + * duplicated list of supported formats which is prone to getting + * out of sync: + */ + if(tex_usage & (PIPE_TEXTURE_USAGE_RENDER_TARGET | PIPE_TEXTURE_USAGE_DEPTH_STENCIL)) + return svga_translate_format_render(format) != SVGA3D_FORMAT_INVALID; + else + return svga_translate_format(format) != SVGA3D_FORMAT_INVALID; +} + + +static void +svga_fence_reference(struct pipe_screen *screen, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence) +{ + struct svga_winsys_screen *sws = svga_screen(screen)->sws; + sws->fence_reference(sws, ptr, fence); +} + + +static int +svga_fence_signalled(struct pipe_screen *screen, + struct pipe_fence_handle *fence, + unsigned flag) +{ + struct svga_winsys_screen *sws = svga_screen(screen)->sws; + return sws->fence_signalled(sws, fence, flag); +} + + +static int +svga_fence_finish(struct pipe_screen *screen, + struct pipe_fence_handle *fence, + unsigned flag) +{ + struct svga_winsys_screen *sws = svga_screen(screen)->sws; + + SVGA_DBG(DEBUG_DMA|DEBUG_PERF, "%s fence_ptr %p\n", + __FUNCTION__, fence); + + return sws->fence_finish(sws, fence, flag); +} + + +static void +svga_destroy_screen( struct pipe_screen *screen ) +{ + struct svga_screen *svgascreen = svga_screen(screen); + + svga_screen_cache_cleanup(svgascreen); + + pipe_mutex_destroy(svgascreen->swc_mutex); + pipe_mutex_destroy(svgascreen->tex_mutex); + + svgascreen->swc->destroy(svgascreen->swc); + + svgascreen->sws->destroy(svgascreen->sws); + + FREE(svgascreen); +} + + +/** + * Create a new svga_screen object + */ +struct pipe_screen * +svga_screen_create(struct svga_winsys_screen *sws) +{ + struct svga_screen *svgascreen; + struct pipe_screen *screen; + SVGA3dDevCapResult result; + +#ifdef DEBUG + SVGA_DEBUG = debug_get_flags_option("SVGA_DEBUG", svga_debug_flags, 0 ); +#endif + + svgascreen = CALLOC_STRUCT(svga_screen); + if (!svgascreen) + goto error1; + + svgascreen->debug.force_level_surface_view = + debug_get_bool_option("SVGA_FORCE_LEVEL_SURFACE_VIEW", FALSE); + svgascreen->debug.force_surface_view = + debug_get_bool_option("SVGA_FORCE_SURFACE_VIEW", FALSE); + svgascreen->debug.force_sampler_view = + debug_get_bool_option("SVGA_FORCE_SAMPLER_VIEW", FALSE); + svgascreen->debug.no_surface_view = + debug_get_bool_option("SVGA_NO_SURFACE_VIEW", FALSE); + svgascreen->debug.no_sampler_view = + debug_get_bool_option("SVGA_NO_SAMPLER_VIEW", FALSE); + + screen = &svgascreen->screen; + + screen->destroy = svga_destroy_screen; + screen->get_name = svga_get_name; + screen->get_vendor = svga_get_vendor; + screen->get_param = svga_get_param; + screen->get_paramf = svga_get_paramf; + screen->is_format_supported = svga_is_format_supported; + screen->fence_reference = svga_fence_reference; + screen->fence_signalled = svga_fence_signalled; + screen->fence_finish = svga_fence_finish; + svgascreen->sws = sws; + + svga_screen_init_texture_functions(screen); + svga_screen_init_buffer_functions(screen); + + svgascreen->use_ps30 = + sws->get_cap(sws, SVGA3D_DEVCAP_FRAGMENT_SHADER_VERSION, &result) && + result.u >= SVGA3DPSVERSION_30 ? TRUE : FALSE; + + svgascreen->use_vs30 = + sws->get_cap(sws, SVGA3D_DEVCAP_VERTEX_SHADER_VERSION, &result) && + result.u >= SVGA3DVSVERSION_30 ? TRUE : FALSE; + +#if 1 + /* Shader model 2.0 is unsupported at the moment. */ + if(!svgascreen->use_ps30 || !svgascreen->use_vs30) + goto error2; +#else + if(debug_get_bool_option("SVGA_NO_SM30", FALSE)) + svgascreen->use_vs30 = svgascreen->use_ps30 = FALSE; +#endif + + svgascreen->swc = sws->context_create(sws); + if(!svgascreen->swc) + goto error2; + + pipe_mutex_init(svgascreen->tex_mutex); + pipe_mutex_init(svgascreen->swc_mutex); + + LIST_INITHEAD(&svgascreen->cached_buffers); + + svga_screen_cache_init(svgascreen); + + return screen; +error2: + FREE(svgascreen); +error1: + return NULL; +} + +void svga_screen_flush( struct svga_screen *svgascreen, + struct pipe_fence_handle **pfence ) +{ + struct pipe_fence_handle *fence = NULL; + + SVGA_DBG(DEBUG_PERF, "%s\n", __FUNCTION__); + + pipe_mutex_lock(svgascreen->swc_mutex); + svgascreen->swc->flush(svgascreen->swc, &fence); + pipe_mutex_unlock(svgascreen->swc_mutex); + + svga_screen_cache_flush(svgascreen, fence); + + if(pfence) + *pfence = fence; + else + svgascreen->sws->fence_reference(svgascreen->sws, &fence, NULL); +} + +struct svga_winsys_screen * +svga_winsys_screen(struct pipe_screen *screen) +{ + return svga_screen(screen)->sws; +} + +#ifdef DEBUG +struct svga_screen * +svga_screen(struct pipe_screen *screen) +{ + assert(screen); + assert(screen->destroy == svga_destroy_screen); + return (struct svga_screen *)screen; +} +#endif diff --git a/src/gallium/drivers/svga/svga_screen.h b/src/gallium/drivers/svga/svga_screen.h new file mode 100644 index 0000000000..b94ca7fc1c --- /dev/null +++ b/src/gallium/drivers/svga/svga_screen.h @@ -0,0 +1,95 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#ifndef SVGA_SCREEN_H +#define SVGA_SCREEN_H + + +#include "pipe/p_screen.h" +#include "pipe/p_thread.h" + +#include "util/u_double_list.h" + +#include "svga_screen_cache.h" + + +struct svga_winsys_screen; +struct svga_winsys_context; +struct SVGACmdMemory; + +#define SVGA_COMBINE_USERBUFFERS 1 + +/** + * Subclass of pipe_screen + */ +struct svga_screen +{ + struct pipe_screen screen; + struct svga_winsys_screen *sws; + + unsigned use_ps30; + unsigned use_vs30; + + struct { + boolean force_level_surface_view; + boolean force_surface_view; + boolean no_surface_view; + boolean force_sampler_view; + boolean no_sampler_view; + } debug; + + /* The screen needs its own context */ + struct svga_winsys_context *swc; + struct SVGACmdMemory *fifo; + + unsigned texture_timestamp; + pipe_mutex tex_mutex; + pipe_mutex swc_mutex; /* Protects the use of swc and dirty_buffers */ + + /** + * List of buffers with cached GMR. Ordered from the most recently used to + * the least recently used + */ + struct list_head cached_buffers; + + struct svga_host_surface_cache cache; +}; + +#ifndef DEBUG +/** cast wrapper */ +static INLINE struct svga_screen * +svga_screen(struct pipe_screen *pscreen) +{ + return (struct svga_screen *) pscreen; +} +#else +struct svga_screen * +svga_screen(struct pipe_screen *screen); +#endif + +void svga_screen_flush( struct svga_screen *svga_screen, + struct pipe_fence_handle **pfence ); + +#endif /* SVGA_SCREEN_H */ diff --git a/src/gallium/drivers/svga/svga_screen_buffer.c b/src/gallium/drivers/svga/svga_screen_buffer.c new file mode 100644 index 0000000000..1f8a889672 --- /dev/null +++ b/src/gallium/drivers/svga/svga_screen_buffer.c @@ -0,0 +1,824 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "svga_cmd.h" + +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_thread.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "svga_context.h" +#include "svga_screen.h" +#include "svga_screen_buffer.h" +#include "svga_winsys.h" +#include "svga_debug.h" + + +/** + * Vertex and index buffers have to be treated slightly differently from + * regular guest memory regions because the SVGA device sees them as + * surfaces, and the state tracker can create/destroy without the pipe + * driver, therefore we must do the uploads from the vws. + */ +static INLINE boolean +svga_buffer_needs_hw_storage(unsigned usage) +{ + return usage & (PIPE_BUFFER_USAGE_VERTEX | PIPE_BUFFER_USAGE_INDEX); +} + + +static INLINE enum pipe_error +svga_buffer_create_host_surface(struct svga_screen *ss, + struct svga_buffer *sbuf) +{ + if(!sbuf->handle) { + sbuf->key.flags = 0; + + sbuf->key.format = SVGA3D_BUFFER; + if(sbuf->base.usage & PIPE_BUFFER_USAGE_VERTEX) + sbuf->key.flags |= SVGA3D_SURFACE_HINT_VERTEXBUFFER; + if(sbuf->base.usage & PIPE_BUFFER_USAGE_INDEX) + sbuf->key.flags |= SVGA3D_SURFACE_HINT_INDEXBUFFER; + + sbuf->key.size.width = sbuf->base.size; + sbuf->key.size.height = 1; + sbuf->key.size.depth = 1; + + sbuf->key.numFaces = 1; + sbuf->key.numMipLevels = 1; + sbuf->key.cachable = 1; + + SVGA_DBG(DEBUG_DMA, "surface_create for buffer sz %d\n", sbuf->base.size); + + sbuf->handle = svga_screen_surface_create(ss, &sbuf->key); + if(!sbuf->handle) + return PIPE_ERROR_OUT_OF_MEMORY; + + /* Always set the discard flag on the first time the buffer is written + * as svga_screen_surface_create might have passed a recycled host + * buffer. + */ + sbuf->hw.flags.discard = TRUE; + + SVGA_DBG(DEBUG_DMA, " --> got sid %p sz %d (buffer)\n", sbuf->handle, sbuf->base.size); + } + + return PIPE_OK; +} + + +static INLINE void +svga_buffer_destroy_host_surface(struct svga_screen *ss, + struct svga_buffer *sbuf) +{ + if(sbuf->handle) { + SVGA_DBG(DEBUG_DMA, " ungrab sid %p sz %d\n", sbuf->handle, sbuf->base.size); + svga_screen_surface_destroy(ss, &sbuf->key, &sbuf->handle); + } +} + + +static INLINE void +svga_buffer_destroy_hw_storage(struct svga_screen *ss, struct svga_buffer *sbuf) +{ + struct svga_winsys_screen *sws = ss->sws; + + assert(!sbuf->map.count); + assert(sbuf->hw.buf); + if(sbuf->hw.buf) { + sws->buffer_destroy(sws, sbuf->hw.buf); + sbuf->hw.buf = NULL; + assert(sbuf->head.prev && sbuf->head.next); + LIST_DEL(&sbuf->head); +#ifdef DEBUG + sbuf->head.next = sbuf->head.prev = NULL; +#endif + } +} + +static INLINE enum pipe_error +svga_buffer_backup(struct svga_screen *ss, struct svga_buffer *sbuf) +{ + if (sbuf->hw.buf && sbuf->hw.num_ranges) { + void *src; + + if (!sbuf->swbuf) + sbuf->swbuf = align_malloc(sbuf->base.size, sbuf->base.alignment); + if (!sbuf->swbuf) + return PIPE_ERROR_OUT_OF_MEMORY; + + src = ss->sws->buffer_map(ss->sws, sbuf->hw.buf, + PIPE_BUFFER_USAGE_CPU_READ); + if (!src) + return PIPE_ERROR; + + memcpy(sbuf->swbuf, src, sbuf->base.size); + ss->sws->buffer_unmap(ss->sws, sbuf->hw.buf); + } + + return PIPE_OK; +} + +/** + * Try to make GMR space available by freeing the hardware storage of + * unmapped + */ +boolean +svga_buffer_free_cached_hw_storage(struct svga_screen *ss) +{ + struct list_head *curr; + struct svga_buffer *sbuf; + enum pipe_error ret = PIPE_OK; + + curr = ss->cached_buffers.prev; + + /* free the least recently used buffer's hw storage which is not mapped */ + do { + if(curr == &ss->cached_buffers) + return FALSE; + + sbuf = LIST_ENTRY(struct svga_buffer, curr, head); + + curr = curr->prev; + if (sbuf->map.count == 0) + ret = svga_buffer_backup(ss, sbuf); + + } while(sbuf->map.count != 0 || ret != PIPE_OK); + + svga_buffer_destroy_hw_storage(ss, sbuf); + + return TRUE; +} + +struct svga_winsys_buffer * +svga_winsys_buffer_create( struct svga_screen *ss, + unsigned alignment, + unsigned usage, + unsigned size ) +{ + struct svga_winsys_screen *sws = ss->sws; + struct svga_winsys_buffer *buf; + + /* Just try */ + buf = sws->buffer_create(sws, alignment, usage, size); + if(!buf) { + + SVGA_DBG(DEBUG_DMA|DEBUG_PERF, "flushing screen to find %d bytes GMR\n", + size); + + /* Try flushing all pending DMAs */ + svga_screen_flush(ss, NULL); + buf = sws->buffer_create(sws, alignment, usage, size); + + SVGA_DBG(DEBUG_DMA|DEBUG_PERF, "evicting buffers to find %d bytes GMR\n", + size); + + /* Try evicing all buffer storage */ + while(!buf && svga_buffer_free_cached_hw_storage(ss)) + buf = sws->buffer_create(sws, alignment, usage, size); + } + + return buf; +} + + +/** + * Allocate DMA'ble storage for the buffer. + * + * Called before mapping a buffer. + */ +static INLINE enum pipe_error +svga_buffer_create_hw_storage(struct svga_screen *ss, + struct svga_buffer *sbuf) +{ + if(!sbuf->hw.buf) { + unsigned alignment = sbuf->base.alignment; + unsigned usage = 0; + unsigned size = sbuf->base.size; + + sbuf->hw.buf = svga_winsys_buffer_create(ss, alignment, usage, size); + if(!sbuf->hw.buf) + return PIPE_ERROR_OUT_OF_MEMORY; + + assert(!sbuf->needs_flush); + assert(!sbuf->head.prev && !sbuf->head.next); + LIST_ADD(&sbuf->head, &ss->cached_buffers); + } + + return PIPE_OK; +} + + +/** + * Variant of SVGA3D_BufferDMA which leaves the copy box temporarily in blank. + */ +static enum pipe_error +svga_buffer_upload_command(struct svga_context *svga, + struct svga_buffer *sbuf) +{ + struct svga_winsys_context *swc = svga->swc; + struct svga_winsys_buffer *guest = sbuf->hw.buf; + struct svga_winsys_surface *host = sbuf->handle; + SVGA3dTransferType transfer = SVGA3D_WRITE_HOST_VRAM; + SVGA3dSurfaceDMAFlags flags = sbuf->hw.flags; + SVGA3dCmdSurfaceDMA *cmd; + uint32 numBoxes = sbuf->hw.num_ranges; + SVGA3dCopyBox *boxes; + SVGA3dCmdSurfaceDMASuffix *pSuffix; + unsigned region_flags; + unsigned surface_flags; + struct pipe_buffer *dummy; + + if(transfer == SVGA3D_WRITE_HOST_VRAM) { + region_flags = PIPE_BUFFER_USAGE_GPU_READ; + surface_flags = PIPE_BUFFER_USAGE_GPU_WRITE; + } + else if(transfer == SVGA3D_READ_HOST_VRAM) { + region_flags = PIPE_BUFFER_USAGE_GPU_WRITE; + surface_flags = PIPE_BUFFER_USAGE_GPU_READ; + } + else { + assert(0); + return PIPE_ERROR_BAD_INPUT; + } + + assert(numBoxes); + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_SURFACE_DMA, + sizeof *cmd + numBoxes * sizeof *boxes + sizeof *pSuffix, + 2); + if(!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + swc->region_relocation(swc, &cmd->guest.ptr, guest, 0, region_flags); + cmd->guest.pitch = 0; + + swc->surface_relocation(swc, &cmd->host.sid, host, surface_flags); + cmd->host.face = 0; + cmd->host.mipmap = 0; + + cmd->transfer = transfer; + + sbuf->hw.boxes = (SVGA3dCopyBox *)&cmd[1]; + sbuf->hw.svga = svga; + + /* Increment reference count */ + dummy = NULL; + pipe_buffer_reference(&dummy, &sbuf->base); + + pSuffix = (SVGA3dCmdSurfaceDMASuffix *)((uint8_t*)cmd + sizeof *cmd + numBoxes * sizeof *boxes); + pSuffix->suffixSize = sizeof *pSuffix; + pSuffix->maximumOffset = sbuf->base.size; + pSuffix->flags = flags; + + swc->commit(swc); + + return PIPE_OK; +} + + +/** + * Patch up the upload DMA command reserved by svga_buffer_upload_command + * with the final ranges. + */ +static void +svga_buffer_upload_flush(struct svga_context *svga, + struct svga_buffer *sbuf) +{ + struct svga_screen *ss = svga_screen(svga->pipe.screen); + SVGA3dCopyBox *boxes; + unsigned i; + + assert(sbuf->handle); + assert(sbuf->hw.buf); + assert(sbuf->hw.num_ranges); + assert(sbuf->hw.svga == svga); + assert(sbuf->hw.boxes); + + /* + * Patch the DMA command with the final copy box. + */ + + SVGA_DBG(DEBUG_DMA, "dma to sid %p\n", sbuf->handle); + + boxes = sbuf->hw.boxes; + for(i = 0; i < sbuf->hw.num_ranges; ++i) { + SVGA_DBG(DEBUG_DMA, " bytes %u - %u\n", + sbuf->hw.ranges[i].start, sbuf->hw.ranges[i].end); + + boxes[i].x = sbuf->hw.ranges[i].start; + boxes[i].y = 0; + boxes[i].z = 0; + boxes[i].w = sbuf->hw.ranges[i].end - sbuf->hw.ranges[i].start; + boxes[i].h = 1; + boxes[i].d = 1; + boxes[i].srcx = sbuf->hw.ranges[i].start; + boxes[i].srcy = 0; + boxes[i].srcz = 0; + } + + sbuf->hw.num_ranges = 0; + memset(&sbuf->hw.flags, 0, sizeof sbuf->hw.flags); + + assert(sbuf->head.prev && sbuf->head.next); + LIST_DEL(&sbuf->head); + sbuf->needs_flush = FALSE; + /* XXX: do we care about cached_buffers any more ?*/ + LIST_ADD(&sbuf->head, &ss->cached_buffers); + + sbuf->hw.svga = NULL; + sbuf->hw.boxes = NULL; + + /* Decrement reference count */ + pipe_buffer_reference((struct pipe_buffer **)&sbuf, NULL); +} + + +/** + * Queue a DMA upload of a range of this buffer to the host. + * + * This function only notes the range down. It doesn't actually emit a DMA + * upload command. That only happens when a context tries to refer to this + * buffer, and the DMA upload command is added to that context's command buffer. + * + * We try to lump as many contiguous DMA transfers together as possible. + */ +static void +svga_buffer_upload_queue(struct svga_buffer *sbuf, + unsigned start, + unsigned end) +{ + unsigned i; + + assert(sbuf->hw.buf); + assert(end > start); + + /* + * Try to grow one of the ranges. + * + * Note that it is not this function task to care about overlapping ranges, + * as the GMR was already given so it is too late to do anything. Situations + * where overlapping ranges may pose a problem should be detected via + * pipe_context::is_buffer_referenced and the context that refers to the + * buffer should be flushed. + */ + + for(i = 0; i < sbuf->hw.num_ranges; ++i) { + if(start <= sbuf->hw.ranges[i].end && sbuf->hw.ranges[i].start <= end) { + sbuf->hw.ranges[i].start = MIN2(sbuf->hw.ranges[i].start, start); + sbuf->hw.ranges[i].end = MAX2(sbuf->hw.ranges[i].end, end); + return; + } + } + + /* + * We cannot add a new range to an existing DMA command, so patch-up the + * pending DMA upload and start clean. + */ + + if(sbuf->needs_flush) + svga_buffer_upload_flush(sbuf->hw.svga, sbuf); + + assert(!sbuf->needs_flush); + assert(!sbuf->hw.svga); + assert(!sbuf->hw.boxes); + + /* + * Add a new range. + */ + + sbuf->hw.ranges[sbuf->hw.num_ranges].start = start; + sbuf->hw.ranges[sbuf->hw.num_ranges].end = end; + ++sbuf->hw.num_ranges; +} + + +static void * +svga_buffer_map_range( struct pipe_screen *screen, + struct pipe_buffer *buf, + unsigned offset, unsigned length, + unsigned usage ) +{ + struct svga_screen *ss = svga_screen(screen); + struct svga_winsys_screen *sws = ss->sws; + struct svga_buffer *sbuf = svga_buffer( buf ); + void *map; + + if(sbuf->swbuf) { + /* User/malloc buffer */ + map = sbuf->swbuf; + } + else { + if(!sbuf->hw.buf) { + struct svga_winsys_surface *handle = sbuf->handle; + + if(svga_buffer_create_hw_storage(ss, sbuf) != PIPE_OK) + return NULL; + + /* Populate the hardware storage if the host surface pre-existed */ + if((usage & PIPE_BUFFER_USAGE_CPU_READ) && handle) { + SVGA3dSurfaceDMAFlags flags; + enum pipe_error ret; + struct pipe_fence_handle *fence = NULL; + + SVGA_DBG(DEBUG_DMA|DEBUG_PERF, "dma from sid %p (buffer), bytes %u - %u\n", + sbuf->handle, 0, sbuf->base.size); + + memset(&flags, 0, sizeof flags); + + ret = SVGA3D_BufferDMA(ss->swc, + sbuf->hw.buf, + sbuf->handle, + SVGA3D_READ_HOST_VRAM, + sbuf->base.size, + 0, + flags); + if(ret != PIPE_OK) { + ss->swc->flush(ss->swc, NULL); + + ret = SVGA3D_BufferDMA(ss->swc, + sbuf->hw.buf, + sbuf->handle, + SVGA3D_READ_HOST_VRAM, + sbuf->base.size, + 0, + flags); + assert(ret == PIPE_OK); + } + + ss->swc->flush(ss->swc, &fence); + sws->fence_finish(sws, fence, 0); + sws->fence_reference(sws, &fence, NULL); + } + } + else { + if((usage & PIPE_BUFFER_USAGE_CPU_READ) && !sbuf->needs_flush) { + /* We already had the hardware storage but we would have to issue + * a download if we hadn't, so move the buffer to the begginning + * of the LRU list. + */ + assert(sbuf->head.prev && sbuf->head.next); + LIST_DEL(&sbuf->head); + LIST_ADD(&sbuf->head, &ss->cached_buffers); + } + } + + map = sws->buffer_map(sws, sbuf->hw.buf, usage); + } + + if(map) { + pipe_mutex_lock(ss->swc_mutex); + + ++sbuf->map.count; + + if (usage & PIPE_BUFFER_USAGE_CPU_WRITE) { + assert(sbuf->map.count <= 1); + sbuf->map.writing = TRUE; + if (usage & PIPE_BUFFER_USAGE_FLUSH_EXPLICIT) + sbuf->map.flush_explicit = TRUE; + } + + pipe_mutex_unlock(ss->swc_mutex); + } + + return map; +} + +static void +svga_buffer_flush_mapped_range( struct pipe_screen *screen, + struct pipe_buffer *buf, + unsigned offset, unsigned length) +{ + struct svga_buffer *sbuf = svga_buffer( buf ); + struct svga_screen *ss = svga_screen(screen); + + pipe_mutex_lock(ss->swc_mutex); + assert(sbuf->map.writing); + if(sbuf->map.writing) { + assert(sbuf->map.flush_explicit); + if(sbuf->hw.buf) + svga_buffer_upload_queue(sbuf, offset, offset + length); + } + pipe_mutex_unlock(ss->swc_mutex); +} + +static void +svga_buffer_unmap( struct pipe_screen *screen, + struct pipe_buffer *buf) +{ + struct svga_screen *ss = svga_screen(screen); + struct svga_winsys_screen *sws = ss->sws; + struct svga_buffer *sbuf = svga_buffer( buf ); + + pipe_mutex_lock(ss->swc_mutex); + + assert(sbuf->map.count); + if(sbuf->map.count) + --sbuf->map.count; + + if(sbuf->hw.buf) + sws->buffer_unmap(sws, sbuf->hw.buf); + + if(sbuf->map.writing) { + if(!sbuf->map.flush_explicit) { + /* No mapped range was flushed -- flush the whole buffer */ + SVGA_DBG(DEBUG_DMA, "flushing the whole buffer\n"); + + if(sbuf->hw.buf) + svga_buffer_upload_queue(sbuf, 0, sbuf->base.size); + } + + sbuf->map.writing = FALSE; + sbuf->map.flush_explicit = FALSE; + } + + pipe_mutex_unlock(ss->swc_mutex); +} + +static void +svga_buffer_destroy( struct pipe_buffer *buf ) +{ + struct svga_screen *ss = svga_screen(buf->screen); + struct svga_buffer *sbuf = svga_buffer( buf ); + + assert(!p_atomic_read(&buf->reference.count)); + + assert(!sbuf->needs_flush); + + if(sbuf->handle) { + SVGA_DBG(DEBUG_DMA, "release sid %p sz %d\n", sbuf->handle, sbuf->base.size); + svga_screen_surface_destroy(ss, &sbuf->key, &sbuf->handle); + } + + if(sbuf->hw.buf) + svga_buffer_destroy_hw_storage(ss, sbuf); + + if(sbuf->swbuf && !sbuf->user) + align_free(sbuf->swbuf); + + FREE(sbuf); +} + +static struct pipe_buffer * +svga_buffer_create(struct pipe_screen *screen, + unsigned alignment, + unsigned usage, + unsigned size) +{ + struct svga_screen *ss = svga_screen(screen); + struct svga_buffer *sbuf; + + sbuf = CALLOC_STRUCT(svga_buffer); + if(!sbuf) + goto error1; + + sbuf->magic = SVGA_BUFFER_MAGIC; + + pipe_reference_init(&sbuf->base.reference, 1); + sbuf->base.screen = screen; + sbuf->base.alignment = alignment; + sbuf->base.usage = usage; + sbuf->base.size = size; + + if(svga_buffer_needs_hw_storage(usage)) { + if(svga_buffer_create_host_surface(ss, sbuf) != PIPE_OK) + goto error2; + } + else { + if(alignment < sizeof(void*)) + alignment = sizeof(void*); + + usage |= PIPE_BUFFER_USAGE_CPU_READ_WRITE; + + sbuf->swbuf = align_malloc(size, alignment); + if(!sbuf->swbuf) + goto error2; + } + + return &sbuf->base; + +error2: + FREE(sbuf); +error1: + return NULL; +} + +static struct pipe_buffer * +svga_user_buffer_create(struct pipe_screen *screen, + void *ptr, + unsigned bytes) +{ + struct svga_buffer *sbuf; + + sbuf = CALLOC_STRUCT(svga_buffer); + if(!sbuf) + goto no_sbuf; + + sbuf->magic = SVGA_BUFFER_MAGIC; + + sbuf->swbuf = ptr; + sbuf->user = TRUE; + + pipe_reference_init(&sbuf->base.reference, 1); + sbuf->base.screen = screen; + sbuf->base.alignment = 1; + sbuf->base.usage = 0; + sbuf->base.size = bytes; + + return &sbuf->base; + +no_sbuf: + return NULL; +} + + +void +svga_screen_init_buffer_functions(struct pipe_screen *screen) +{ + screen->buffer_create = svga_buffer_create; + screen->user_buffer_create = svga_user_buffer_create; + screen->buffer_map_range = svga_buffer_map_range; + screen->buffer_flush_mapped_range = svga_buffer_flush_mapped_range; + screen->buffer_unmap = svga_buffer_unmap; + screen->buffer_destroy = svga_buffer_destroy; +} + + +/** + * Copy the contents of the user buffer / malloc buffer to a hardware buffer. + */ +static INLINE enum pipe_error +svga_buffer_update_hw(struct svga_screen *ss, struct svga_buffer *sbuf) +{ + if(!sbuf->hw.buf) { + enum pipe_error ret; + void *map; + + assert(sbuf->swbuf); + if(!sbuf->swbuf) + return PIPE_ERROR; + + ret = svga_buffer_create_hw_storage(ss, sbuf); + assert(ret == PIPE_OK); + if(ret != PIPE_OK) + return ret; + + pipe_mutex_lock(ss->swc_mutex); + map = ss->sws->buffer_map(ss->sws, sbuf->hw.buf, PIPE_BUFFER_USAGE_CPU_WRITE); + assert(map); + if(!map) { + pipe_mutex_unlock(ss->swc_mutex); + return PIPE_ERROR_OUT_OF_MEMORY; + } + + memcpy(map, sbuf->swbuf, sbuf->base.size); + ss->sws->buffer_unmap(ss->sws, sbuf->hw.buf); + + /* This user/malloc buffer is now indistinguishable from a gpu buffer */ + assert(!sbuf->map.count); + if(!sbuf->map.count) { + if(sbuf->user) + sbuf->user = FALSE; + else + align_free(sbuf->swbuf); + sbuf->swbuf = NULL; + } + + svga_buffer_upload_queue(sbuf, 0, sbuf->base.size); + } + + pipe_mutex_unlock(ss->swc_mutex); + return PIPE_OK; +} + + +struct svga_winsys_surface * +svga_buffer_handle(struct svga_context *svga, + struct pipe_buffer *buf) +{ + struct pipe_screen *screen = svga->pipe.screen; + struct svga_screen *ss = svga_screen(screen); + struct svga_buffer *sbuf; + enum pipe_error ret; + + if(!buf) + return NULL; + + sbuf = svga_buffer(buf); + + assert(!sbuf->map.count); + + if(!sbuf->handle) { + ret = svga_buffer_create_host_surface(ss, sbuf); + if(ret != PIPE_OK) + return NULL; + + ret = svga_buffer_update_hw(ss, sbuf); + if(ret != PIPE_OK) + return NULL; + } + + if(!sbuf->needs_flush && sbuf->hw.num_ranges) { + /* Queue the buffer for flushing */ + ret = svga_buffer_upload_command(svga, sbuf); + if(ret != PIPE_OK) + /* XXX: Should probably have a richer return value */ + return NULL; + + assert(sbuf->hw.svga == svga); + + sbuf->needs_flush = TRUE; + assert(sbuf->head.prev && sbuf->head.next); + LIST_DEL(&sbuf->head); + LIST_ADDTAIL(&sbuf->head, &svga->dirty_buffers); + } + + return sbuf->handle; +} + +struct pipe_buffer * +svga_screen_buffer_wrap_surface(struct pipe_screen *screen, + enum SVGA3dSurfaceFormat format, + struct svga_winsys_surface *srf) +{ + struct pipe_buffer *buf; + struct svga_buffer *sbuf; + struct svga_winsys_screen *sws = svga_winsys_screen(screen); + + buf = svga_buffer_create(screen, 0, SVGA_BUFFER_USAGE_WRAPPED, 0); + if (!buf) + return NULL; + + sbuf = svga_buffer(buf); + + /* + * We are not the creator of this surface and therefore we must not + * cache it for reuse. Set the cacheable flag to zero in the key to + * prevent this. + */ + sbuf->key.format = format; + sbuf->key.cachable = 0; + sws->surface_reference(sws, &sbuf->handle, srf); + + return buf; +} + + +struct svga_winsys_surface * +svga_screen_buffer_get_winsys_surface(struct pipe_buffer *buffer) +{ + struct svga_winsys_screen *sws = svga_winsys_screen(buffer->screen); + struct svga_winsys_surface *vsurf = NULL; + + assert(svga_buffer(buffer)->key.cachable == 0); + svga_buffer(buffer)->key.cachable = 0; + sws->surface_reference(sws, &vsurf, svga_buffer(buffer)->handle); + return vsurf; +} + +void +svga_context_flush_buffers(struct svga_context *svga) +{ + struct list_head *curr, *next; + struct svga_buffer *sbuf; + + curr = svga->dirty_buffers.next; + next = curr->next; + while(curr != &svga->dirty_buffers) { + sbuf = LIST_ENTRY(struct svga_buffer, curr, head); + + assert(p_atomic_read(&sbuf->base.reference.count) != 0); + assert(sbuf->needs_flush); + + svga_buffer_upload_flush(svga, sbuf); + + curr = next; + next = curr->next; + } +} diff --git a/src/gallium/drivers/svga/svga_screen_buffer.h b/src/gallium/drivers/svga/svga_screen_buffer.h new file mode 100644 index 0000000000..5d7af5a7c5 --- /dev/null +++ b/src/gallium/drivers/svga/svga_screen_buffer.h @@ -0,0 +1,190 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#ifndef SVGA_BUFFER_H +#define SVGA_BUFFER_H + + +#include "pipe/p_compiler.h" +#include "pipe/p_state.h" + +#include "util/u_double_list.h" + +#include "svga_screen_cache.h" + + +#define SVGA_BUFFER_MAGIC 0x344f9005 + +/** + * Maximum number of discontiguous ranges + */ +#define SVGA_BUFFER_MAX_RANGES 32 + + +struct svga_screen; +struct svga_context; +struct svga_winsys_buffer; +struct svga_winsys_surface; + + +struct svga_buffer_range +{ + unsigned start; + unsigned end; +}; + + +/** + * Describe a + * + * This holds the information to emit a SVGA3dCmdSurfaceDMA. + */ +struct svga_buffer_upload +{ + /** + * Guest memory region. + */ + struct svga_winsys_buffer *buf; + + struct svga_buffer_range ranges[SVGA_BUFFER_MAX_RANGES]; + unsigned num_ranges; + + SVGA3dSurfaceDMAFlags flags; + + /** + * Pointer to the DMA copy box *inside* the command buffer. + */ + SVGA3dCopyBox *boxes; + + /** + * Context that has the pending DMA to this buffer. + */ + struct svga_context *svga; +}; + + +/** + * SVGA pipe buffer. + */ +struct svga_buffer +{ + struct pipe_buffer base; + + /** + * Marker to detect bad casts in runtime. + */ + uint32_t magic; + + /** + * Regular (non DMA'able) memory. + * + * Used for user buffers or for buffers which we know before hand that can + * never be used by the virtual hardware directly, such as constant buffers. + */ + void *swbuf; + + /** + * Whether swbuf was created by the user or not. + */ + boolean user; + + /** + * DMA'ble memory. + * + * A piece of GMR memory. It is created when mapping the buffer, and will be + * used to upload/download vertex data from the host. + */ + struct svga_buffer_upload hw; + + /** + * Creation key for the host surface handle. + * + * This structure describes all the host surface characteristics so that it + * can be looked up in cache, since creating a host surface is often a slow + * operation. + */ + struct svga_host_surface_cache_key key; + + /** + * Host surface handle. + * + * This is a platform independent abstraction for host SID. We create when + * trying to bind + */ + struct svga_winsys_surface *handle; + + struct { + unsigned count; + boolean writing; + boolean flush_explicit; + } map; + + boolean needs_flush; + struct list_head head; +}; + + +static INLINE struct svga_buffer * +svga_buffer(struct pipe_buffer *buffer) +{ + if (buffer) { + assert(((struct svga_buffer *)buffer)->magic == SVGA_BUFFER_MAGIC); + return (struct svga_buffer *)buffer; + } + return NULL; +} + + +/** + * Returns TRUE for user buffers. We may + * decide to use an alternate upload path for these buffers. + */ +static INLINE boolean +svga_buffer_is_user_buffer( struct pipe_buffer *buffer ) +{ + return svga_buffer(buffer)->user; +} + + +void +svga_screen_init_buffer_functions(struct pipe_screen *screen); + +struct svga_winsys_surface * +svga_buffer_handle(struct svga_context *svga, + struct pipe_buffer *buf); + +void +svga_context_flush_buffers(struct svga_context *svga); + +boolean +svga_buffer_free_cached_hw_storage(struct svga_screen *ss); + +struct svga_winsys_buffer * +svga_winsys_buffer_create(struct svga_screen *ss, + unsigned alignment, + unsigned usage, + unsigned size); + +#endif /* SVGA_BUFFER_H */ diff --git a/src/gallium/drivers/svga/svga_screen_cache.c b/src/gallium/drivers/svga/svga_screen_cache.c new file mode 100644 index 0000000000..8a06383f61 --- /dev/null +++ b/src/gallium/drivers/svga/svga_screen_cache.c @@ -0,0 +1,338 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "util/u_memory.h" +#include "util/u_hash.h" + +#include "svga_debug.h" +#include "svga_winsys.h" +#include "svga_screen.h" +#include "svga_screen_cache.h" + + +#define SVGA_SURFACE_CACHE_ENABLED 1 + + +/** + * Compute the bucket for this key. + */ +static INLINE unsigned +svga_screen_cache_bucket(const struct svga_host_surface_cache_key *key) +{ + return util_hash_crc32( key, sizeof *key ) % SVGA_HOST_SURFACE_CACHE_BUCKETS; +} + + +static INLINE struct svga_winsys_surface * +svga_screen_cache_lookup(struct svga_screen *svgascreen, + const struct svga_host_surface_cache_key *key) +{ + struct svga_host_surface_cache *cache = &svgascreen->cache; + struct svga_winsys_screen *sws = svgascreen->sws; + struct svga_host_surface_cache_entry *entry; + struct svga_winsys_surface *handle = NULL; + struct list_head *curr, *next; + unsigned bucket; + unsigned tries = 0; + + assert(key->cachable); + + bucket = svga_screen_cache_bucket(key); + + pipe_mutex_lock(cache->mutex); + + curr = cache->bucket[bucket].next; + next = curr->next; + while(curr != &cache->bucket[bucket]) { + ++tries; + + entry = LIST_ENTRY(struct svga_host_surface_cache_entry, curr, bucket_head); + + assert(entry->handle); + + if(memcmp(&entry->key, key, sizeof *key) == 0 && + sws->fence_signalled( sws, entry->fence, 0 ) == 0) { + assert(sws->surface_is_flushed(sws, entry->handle)); + + handle = entry->handle; // Reference is transfered here. + entry->handle = NULL; + + LIST_DEL(&entry->bucket_head); + + LIST_DEL(&entry->head); + + LIST_ADD(&entry->head, &cache->empty); + + break; + } + + curr = next; + next = curr->next; + } + + pipe_mutex_unlock(cache->mutex); + + if (SVGA_DEBUG & DEBUG_DMA) + debug_printf("%s: cache %s after %u tries (bucket %d)\n", __FUNCTION__, + handle ? "hit" : "miss", tries, bucket); + + return handle; +} + + +/* + * Transfers a handle reference. + */ + +static INLINE void +svga_screen_cache_add(struct svga_screen *svgascreen, + const struct svga_host_surface_cache_key *key, + struct svga_winsys_surface **p_handle) +{ + struct svga_host_surface_cache *cache = &svgascreen->cache; + struct svga_winsys_screen *sws = svgascreen->sws; + struct svga_host_surface_cache_entry *entry = NULL; + struct svga_winsys_surface *handle = *p_handle; + + assert(key->cachable); + + assert(handle); + if(!handle) + return; + + *p_handle = NULL; + pipe_mutex_lock(cache->mutex); + + if(!LIST_IS_EMPTY(&cache->empty)) { + /* use the first empty entry */ + entry = LIST_ENTRY(struct svga_host_surface_cache_entry, cache->empty.next, head); + + LIST_DEL(&entry->head); + } + else if(!LIST_IS_EMPTY(&cache->unused)) { + /* free the last used buffer and reuse its entry */ + entry = LIST_ENTRY(struct svga_host_surface_cache_entry, cache->unused.prev, head); + SVGA_DBG(DEBUG_CACHE|DEBUG_DMA, + "unref sid %p (make space)\n", entry->handle); + sws->surface_reference(sws, &entry->handle, NULL); + + LIST_DEL(&entry->bucket_head); + + LIST_DEL(&entry->head); + } + + if(entry) { + entry->handle = handle; + memcpy(&entry->key, key, sizeof entry->key); + + SVGA_DBG(DEBUG_CACHE|DEBUG_DMA, + "cache sid %p\n", entry->handle); + LIST_ADD(&entry->head, &cache->validated); + } + else { + /* Couldn't cache the buffer -- this really shouldn't happen */ + SVGA_DBG(DEBUG_CACHE|DEBUG_DMA, + "unref sid %p (couldn't find space)\n", handle); + sws->surface_reference(sws, &handle, NULL); + } + + pipe_mutex_unlock(cache->mutex); +} + + +/** + * Called during the screen flush to move all buffers not in a validate list + * into the unused list. + */ +void +svga_screen_cache_flush(struct svga_screen *svgascreen, + struct pipe_fence_handle *fence) +{ + struct svga_host_surface_cache *cache = &svgascreen->cache; + struct svga_winsys_screen *sws = svgascreen->sws; + struct svga_host_surface_cache_entry *entry; + struct list_head *curr, *next; + unsigned bucket; + + pipe_mutex_lock(cache->mutex); + + curr = cache->validated.next; + next = curr->next; + while(curr != &cache->validated) { + entry = LIST_ENTRY(struct svga_host_surface_cache_entry, curr, head); + + assert(entry->handle); + + if(sws->surface_is_flushed(sws, entry->handle)) { + LIST_DEL(&entry->head); + + svgascreen->sws->fence_reference(svgascreen->sws, &entry->fence, fence); + + LIST_ADD(&entry->head, &cache->unused); + + bucket = svga_screen_cache_bucket(&entry->key); + LIST_ADD(&entry->bucket_head, &cache->bucket[bucket]); + } + + curr = next; + next = curr->next; + } + + pipe_mutex_unlock(cache->mutex); +} + + +void +svga_screen_cache_cleanup(struct svga_screen *svgascreen) +{ + struct svga_host_surface_cache *cache = &svgascreen->cache; + struct svga_winsys_screen *sws = svgascreen->sws; + unsigned i; + + for(i = 0; i < SVGA_HOST_SURFACE_CACHE_SIZE; ++i) { + if(cache->entries[i].handle) { + SVGA_DBG(DEBUG_CACHE|DEBUG_DMA, + "unref sid %p (shutdown)\n", cache->entries[i].handle); + sws->surface_reference(sws, &cache->entries[i].handle, NULL); + } + + if(cache->entries[i].fence) + svgascreen->sws->fence_reference(svgascreen->sws, &cache->entries[i].fence, NULL); + } + + pipe_mutex_destroy(cache->mutex); +} + + +enum pipe_error +svga_screen_cache_init(struct svga_screen *svgascreen) +{ + struct svga_host_surface_cache *cache = &svgascreen->cache; + unsigned i; + + pipe_mutex_init(cache->mutex); + + for(i = 0; i < SVGA_HOST_SURFACE_CACHE_BUCKETS; ++i) + LIST_INITHEAD(&cache->bucket[i]); + + LIST_INITHEAD(&cache->unused); + + LIST_INITHEAD(&cache->validated); + + LIST_INITHEAD(&cache->empty); + for(i = 0; i < SVGA_HOST_SURFACE_CACHE_SIZE; ++i) + LIST_ADDTAIL(&cache->entries[i].head, &cache->empty); + + return PIPE_OK; +} + + +struct svga_winsys_surface * +svga_screen_surface_create(struct svga_screen *svgascreen, + struct svga_host_surface_cache_key *key) +{ + struct svga_winsys_screen *sws = svgascreen->sws; + struct svga_winsys_surface *handle = NULL; + boolean cachable = SVGA_SURFACE_CACHE_ENABLED && key->cachable; + + SVGA_DBG(DEBUG_CACHE|DEBUG_DMA, + "%s sz %dx%dx%d mips %d faces %d cachable %d\n", + __FUNCTION__, + key->size.width, + key->size.height, + key->size.depth, + key->numMipLevels, + key->numFaces, + key->cachable); + + if (cachable) { + if (key->format == SVGA3D_BUFFER) { + /* For buffers, round the buffer size up to the nearest power + * of two to increase the probability of cache hits. Keep + * texture surface dimensions unchanged. + */ + uint32_t size = 1; + while(size < key->size.width) + size <<= 1; + key->size.width = size; + } + + handle = svga_screen_cache_lookup(svgascreen, key); + if (handle) { + if (key->format == SVGA3D_BUFFER) + SVGA_DBG(DEBUG_CACHE|DEBUG_DMA, + "reuse sid %p sz %d (buffer)\n", handle, + key->size.width); + else + SVGA_DBG(DEBUG_CACHE|DEBUG_DMA, + "reuse sid %p sz %dx%dx%d mips %d faces %d\n", handle, + key->size.width, + key->size.height, + key->size.depth, + key->numMipLevels, + key->numFaces); + } + } + + if (!handle) { + handle = sws->surface_create(sws, + key->flags, + key->format, + key->size, + key->numFaces, + key->numMipLevels); + if (handle) + SVGA_DBG(DEBUG_CACHE|DEBUG_DMA, + " CREATE sid %p sz %dx%dx%d\n", + handle, + key->size.width, + key->size.height, + key->size.depth); + } + + return handle; +} + + +void +svga_screen_surface_destroy(struct svga_screen *svgascreen, + const struct svga_host_surface_cache_key *key, + struct svga_winsys_surface **p_handle) +{ + struct svga_winsys_screen *sws = svgascreen->sws; + + /* We only set the cachable flag for surfaces of which we are the + * exclusive owner. So just hold onto our existing reference in + * that case. + */ + if(SVGA_SURFACE_CACHE_ENABLED && key->cachable) { + svga_screen_cache_add(svgascreen, key, p_handle); + } + else { + SVGA_DBG(DEBUG_DMA, + "unref sid %p (uncachable)\n", *p_handle); + sws->surface_reference(sws, p_handle, NULL); + } +} diff --git a/src/gallium/drivers/svga/svga_screen_cache.h b/src/gallium/drivers/svga/svga_screen_cache.h new file mode 100644 index 0000000000..f5aa740d40 --- /dev/null +++ b/src/gallium/drivers/svga/svga_screen_cache.h @@ -0,0 +1,144 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#ifndef SVGA_SCREEN_CACHE_H_ +#define SVGA_SCREEN_CACHE_H_ + + +#include "svga_types.h" +#include "svga_reg.h" +#include "svga3d_reg.h" + +#include "pipe/p_thread.h" + +#include "util/u_double_list.h" + + +/* Guess the storage size of cached surfaces and try and keep it under + * this amount: + */ +#define SVGA_HOST_SURFACE_CACHE_BYTES 16*1024*1024 + +/* Maximum number of discrete surfaces in the cache: + */ +#define SVGA_HOST_SURFACE_CACHE_SIZE 1024 + +/* Number of hash buckets: + */ +#define SVGA_HOST_SURFACE_CACHE_BUCKETS 256 + + +struct svga_winsys_surface; +struct svga_screen; + +/** + * Same as svga_winsys_screen::surface_create. + */ +struct svga_host_surface_cache_key +{ + SVGA3dSurfaceFlags flags; + SVGA3dSurfaceFormat format; + SVGA3dSize size; + uint32_t numFaces:24; + uint32_t numMipLevels:7; + uint32_t cachable:1; /* False if this is a shared surface */ +}; + + +struct svga_host_surface_cache_entry +{ + /** + * Head for the LRU list, svga_host_surface_cache::unused, and + * svga_host_surface_cache::empty + */ + struct list_head head; + + /** Head for the bucket lists. */ + struct list_head bucket_head; + + struct svga_host_surface_cache_key key; + struct svga_winsys_surface *handle; + + struct pipe_fence_handle *fence; +}; + + +/** + * Cache of the host surfaces. + * + * A cache entry can be in the following stages: + * 1. empty + * 2. holding a buffer in a validate list + * 3. holding a flushed buffer (not in any validate list) with an active fence + * 4. holding a flushed buffer with an expired fence + * + * An entry progresses from 1 -> 2 -> 3 -> 4. When we need an entry to put a + * buffer into we preferencial take from 1, or from the least recentely used + * buffer from 3/4. + */ +struct svga_host_surface_cache +{ + pipe_mutex mutex; + + /* Unused buffers are put in buckets to speed up lookups */ + struct list_head bucket[SVGA_HOST_SURFACE_CACHE_BUCKETS]; + + /* Entries with unused buffers, ordered from most to least recently used + * (3 and 4) */ + struct list_head unused; + + /* Entries with buffers still in validate lists (2) */ + struct list_head validated; + + /** Empty entries (1) */ + struct list_head empty; + + /** The actual storage for the entries */ + struct svga_host_surface_cache_entry entries[SVGA_HOST_SURFACE_CACHE_SIZE]; +}; + + +void +svga_screen_cache_cleanup(struct svga_screen *svgascreen); + +void +svga_screen_cache_flush(struct svga_screen *svgascreen, + struct pipe_fence_handle *fence); + +enum pipe_error +svga_screen_cache_init(struct svga_screen *svgascreen); + + +struct svga_winsys_surface * +svga_screen_surface_create(struct svga_screen *svgascreen, + struct svga_host_surface_cache_key *key); + +void +svga_screen_surface_destroy(struct svga_screen *svgascreen, + const struct svga_host_surface_cache_key *key, + struct svga_winsys_surface **handle); + + +#endif /* SVGA_SCREEN_CACHE_H_ */ diff --git a/src/gallium/drivers/svga/svga_screen_texture.c b/src/gallium/drivers/svga/svga_screen_texture.c new file mode 100644 index 0000000000..6e10d65a20 --- /dev/null +++ b/src/gallium/drivers/svga/svga_screen_texture.c @@ -0,0 +1,1089 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "svga_cmd.h" + +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_thread.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "svga_screen.h" +#include "svga_context.h" +#include "svga_screen_texture.h" +#include "svga_screen_buffer.h" +#include "svga_winsys.h" +#include "svga_debug.h" +#include "svga_screen_buffer.h" + +#include <util/u_string.h> + + +/* XXX: This isn't a real hardware flag, but just a hack for kernel to + * know about primary surfaces. Find a better way to accomplish this. + */ +#define SVGA3D_SURFACE_HINT_SCANOUT (1 << 9) + + +/* + * Helper function and arrays + */ + +SVGA3dSurfaceFormat +svga_translate_format(enum pipe_format format) +{ + switch(format) { + + case PIPE_FORMAT_A8R8G8B8_UNORM: + return SVGA3D_A8R8G8B8; + case PIPE_FORMAT_X8R8G8B8_UNORM: + return SVGA3D_X8R8G8B8; + + /* Required for GL2.1: + */ + case PIPE_FORMAT_A8R8G8B8_SRGB: + return SVGA3D_A8R8G8B8; + + case PIPE_FORMAT_R5G6B5_UNORM: + return SVGA3D_R5G6B5; + case PIPE_FORMAT_A1R5G5B5_UNORM: + return SVGA3D_A1R5G5B5; + case PIPE_FORMAT_A4R4G4B4_UNORM: + return SVGA3D_A4R4G4B4; + + + /* XXX: Doesn't seem to work properly. + case PIPE_FORMAT_Z32_UNORM: + return SVGA3D_Z_D32; + */ + case PIPE_FORMAT_Z16_UNORM: + return SVGA3D_Z_D16; + case PIPE_FORMAT_Z24S8_UNORM: + return SVGA3D_Z_D24S8; + case PIPE_FORMAT_Z24X8_UNORM: + return SVGA3D_Z_D24X8; + + case PIPE_FORMAT_A8_UNORM: + return SVGA3D_ALPHA8; + case PIPE_FORMAT_L8_UNORM: + return SVGA3D_LUMINANCE8; + + case PIPE_FORMAT_DXT1_RGB: + case PIPE_FORMAT_DXT1_RGBA: + return SVGA3D_DXT1; + case PIPE_FORMAT_DXT3_RGBA: + return SVGA3D_DXT3; + case PIPE_FORMAT_DXT5_RGBA: + return SVGA3D_DXT5; + + default: + return SVGA3D_FORMAT_INVALID; + } +} + + +SVGA3dSurfaceFormat +svga_translate_format_render(enum pipe_format format) +{ + switch(format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_X8R8G8B8_UNORM: + case PIPE_FORMAT_A1R5G5B5_UNORM: + case PIPE_FORMAT_A4R4G4B4_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z32_UNORM: + case PIPE_FORMAT_Z16_UNORM: + case PIPE_FORMAT_L8_UNORM: + return svga_translate_format(format); + +#if 1 + /* For on host conversion */ + case PIPE_FORMAT_DXT1_RGB: + return SVGA3D_X8R8G8B8; + case PIPE_FORMAT_DXT1_RGBA: + case PIPE_FORMAT_DXT3_RGBA: + case PIPE_FORMAT_DXT5_RGBA: + return SVGA3D_A8R8G8B8; +#endif + + default: + return SVGA3D_FORMAT_INVALID; + } +} + + +static INLINE void +svga_transfer_dma_band(struct svga_transfer *st, + SVGA3dTransferType transfer, + unsigned y, unsigned h, unsigned srcy) +{ + struct svga_texture *texture = svga_texture(st->base.texture); + struct svga_screen *screen = svga_screen(texture->base.screen); + SVGA3dCopyBox box; + enum pipe_error ret; + + SVGA_DBG(DEBUG_DMA, "dma %s sid %p, face %u, (%u, %u, %u) - (%u, %u, %u), %ubpp\n", + transfer == SVGA3D_WRITE_HOST_VRAM ? "to" : "from", + texture->handle, + st->base.face, + st->base.x, + y, + st->base.zslice, + st->base.x + st->base.width, + y + h, + st->base.zslice + 1, + texture->base.block.size*8/(texture->base.block.width*texture->base.block.height)); + + box.x = st->base.x; + box.y = y; + box.z = st->base.zslice; + box.w = st->base.width; + box.h = h; + box.d = 1; + box.srcx = 0; + box.srcy = srcy; + box.srcz = 0; + + pipe_mutex_lock(screen->swc_mutex); + ret = SVGA3D_SurfaceDMA(screen->swc, st, transfer, &box, 1); + if(ret != PIPE_OK) { + screen->swc->flush(screen->swc, NULL); + ret = SVGA3D_SurfaceDMA(screen->swc, st, transfer, &box, 1); + assert(ret == PIPE_OK); + } + pipe_mutex_unlock(screen->swc_mutex); +} + + +static INLINE void +svga_transfer_dma(struct svga_transfer *st, + SVGA3dTransferType transfer) +{ + struct svga_texture *texture = svga_texture(st->base.texture); + struct svga_screen *screen = svga_screen(texture->base.screen); + struct svga_winsys_screen *sws = screen->sws; + struct pipe_fence_handle *fence = NULL; + + if (transfer == SVGA3D_READ_HOST_VRAM) { + SVGA_DBG(DEBUG_PERF, "%s: readback transfer\n", __FUNCTION__); + } + + + if(!st->swbuf) { + /* Do the DMA transfer in a single go */ + + svga_transfer_dma_band(st, transfer, st->base.y, st->base.height, 0); + + if(transfer == SVGA3D_READ_HOST_VRAM) { + svga_screen_flush(screen, &fence); + sws->fence_finish(sws, fence, 0); + //sws->fence_reference(sws, &fence, NULL); + } + } + else { + unsigned y, h, srcy; + h = st->hw_nblocksy * st->base.block.height; + srcy = 0; + for(y = 0; y < st->base.height; y += h) { + unsigned offset, length; + void *hw, *sw; + + if (y + h > st->base.height) + h = st->base.height - y; + + /* Transfer band must be aligned to pixel block boundaries */ + assert(y % st->base.block.height == 0); + assert(h % st->base.block.height == 0); + + offset = y * st->base.stride / st->base.block.height; + length = h * st->base.stride / st->base.block.height; + + sw = (uint8_t *)st->swbuf + offset; + + if(transfer == SVGA3D_WRITE_HOST_VRAM) { + /* Wait for the previous DMAs to complete */ + /* TODO: keep one DMA (at half the size) in the background */ + if(y) { + svga_screen_flush(screen, &fence); + sws->fence_finish(sws, fence, 0); + //sws->fence_reference(sws, &fence, NULL); + } + + hw = sws->buffer_map(sws, st->hwbuf, PIPE_BUFFER_USAGE_CPU_WRITE); + assert(hw); + if(hw) { + memcpy(hw, sw, length); + sws->buffer_unmap(sws, st->hwbuf); + } + } + + svga_transfer_dma_band(st, transfer, y, h, srcy); + + if(transfer == SVGA3D_READ_HOST_VRAM) { + svga_screen_flush(screen, &fence); + sws->fence_finish(sws, fence, 0); + + hw = sws->buffer_map(sws, st->hwbuf, PIPE_BUFFER_USAGE_CPU_READ); + assert(hw); + if(hw) { + memcpy(sw, hw, length); + sws->buffer_unmap(sws, st->hwbuf); + } + } + } + } +} + + +static struct pipe_texture * +svga_texture_create(struct pipe_screen *screen, + const struct pipe_texture *templat) +{ + struct svga_screen *svgascreen = svga_screen(screen); + struct svga_texture *tex = CALLOC_STRUCT(svga_texture); + unsigned width, height, depth; + unsigned level; + + if (!tex) + goto error1; + + tex->base = *templat; + pipe_reference_init(&tex->base.reference, 1); + tex->base.screen = screen; + + assert(templat->last_level < SVGA_MAX_TEXTURE_LEVELS); + if(templat->last_level >= SVGA_MAX_TEXTURE_LEVELS) + goto error2; + + width = templat->width0; + height = templat->height0; + depth = templat->depth0; + for(level = 0; level <= templat->last_level; ++level) { + tex->base.nblocksx[level] = pf_get_nblocksx(&tex->base.block, width); + tex->base.nblocksy[level] = pf_get_nblocksy(&tex->base.block, height); + width = u_minify(width, 1); + height = u_minify(height, 1); + depth = u_minify(depth, 1); + } + + tex->key.flags = 0; + tex->key.size.width = templat->width0; + tex->key.size.height = templat->height0; + tex->key.size.depth = templat->depth0; + + if(templat->target == PIPE_TEXTURE_CUBE) { + tex->key.flags |= SVGA3D_SURFACE_CUBEMAP; + tex->key.numFaces = 6; + } + else { + tex->key.numFaces = 1; + } + + if(templat->tex_usage & PIPE_TEXTURE_USAGE_SAMPLER) + tex->key.flags |= SVGA3D_SURFACE_HINT_TEXTURE; + + if(templat->tex_usage & PIPE_TEXTURE_USAGE_PRIMARY) + tex->key.flags |= SVGA3D_SURFACE_HINT_SCANOUT; + + /* + * XXX: Never pass the SVGA3D_SURFACE_HINT_RENDERTARGET hint. Mesa cannot + * know beforehand whether a texture will be used as a rendertarget or not + * and it always requests PIPE_TEXTURE_USAGE_RENDER_TARGET, therefore + * passing the SVGA3D_SURFACE_HINT_RENDERTARGET here defeats its purpose. + */ +#if 0 + if((templat->tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) && + !pf_is_compressed(templat->format)) + tex->key.flags |= SVGA3D_SURFACE_HINT_RENDERTARGET; +#endif + + if(templat->tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL) + tex->key.flags |= SVGA3D_SURFACE_HINT_DEPTHSTENCIL; + + tex->key.numMipLevels = templat->last_level + 1; + + tex->key.format = svga_translate_format(templat->format); + if(tex->key.format == SVGA3D_FORMAT_INVALID) + goto error2; + + tex->key.cachable = 1; + + SVGA_DBG(DEBUG_DMA, "surface_create for texture\n", tex->handle); + tex->handle = svga_screen_surface_create(svgascreen, &tex->key); + if (tex->handle) + SVGA_DBG(DEBUG_DMA, " --> got sid %p (texture)\n", tex->handle); + + return &tex->base; + +error2: + FREE(tex); +error1: + return NULL; +} + + +static struct pipe_texture * +svga_texture_blanket(struct pipe_screen * screen, + const struct pipe_texture *base, + const unsigned *stride, + struct pipe_buffer *buffer) +{ + struct svga_texture *tex; + struct svga_buffer *sbuf = svga_buffer(buffer); + struct svga_winsys_screen *sws = svga_winsys_screen(screen); + assert(screen); + + /* Only supports one type */ + if (base->target != PIPE_TEXTURE_2D || + base->last_level != 0 || + base->depth0 != 1) { + return NULL; + } + + /** + * We currently can't do texture blanket on + * SVGA3D_BUFFER. Need to blit to a temporary surface? + */ + + assert(sbuf->handle); + if (!sbuf->handle) + return NULL; + + if (svga_translate_format(base->format) != sbuf->key.format) { + unsigned f1 = svga_translate_format(base->format); + unsigned f2 = sbuf->key.format; + + /* It's okay for XRGB and ARGB or depth with/out stencil to get mixed up */ + if ( !( (f1 == SVGA3D_X8R8G8B8 && f2 == SVGA3D_A8R8G8B8) || + (f1 == SVGA3D_A8R8G8B8 && f2 == SVGA3D_X8R8G8B8) || + (f1 == SVGA3D_Z_D24X8 && f2 == SVGA3D_Z_D24S8) ) ) { + debug_printf("%s wrong format %u != %u\n", __FUNCTION__, f1, f2); + return NULL; + } + } + + tex = CALLOC_STRUCT(svga_texture); + if (!tex) + return NULL; + + tex->base = *base; + + + if (sbuf->key.format == 1) + tex->base.format = PIPE_FORMAT_X8R8G8B8_UNORM; + else if (sbuf->key.format == 2) + tex->base.format = PIPE_FORMAT_A8R8G8B8_UNORM; + + pipe_reference_init(&tex->base.reference, 1); + tex->base.screen = screen; + + SVGA_DBG(DEBUG_DMA, "blanket sid %p\n", sbuf->handle); + + /* We don't own this storage, so don't try to cache it. + */ + assert(sbuf->key.cachable == 0); + tex->key.cachable = 0; + sws->surface_reference(sws, &tex->handle, sbuf->handle); + + return &tex->base; +} + + +static void +svga_texture_destroy(struct pipe_texture *pt) +{ + struct svga_screen *ss = svga_screen(pt->screen); + struct svga_texture *tex = (struct svga_texture *)pt; + + ss->texture_timestamp++; + + svga_sampler_view_reference(&tex->cached_view, NULL); + + /* + DBG("%s deleting %p\n", __FUNCTION__, (void *) tex); + */ + SVGA_DBG(DEBUG_DMA, "unref sid %p (texture)\n", tex->handle); + svga_screen_surface_destroy(ss, &tex->key, &tex->handle); + + FREE(tex); +} + + +static void +svga_texture_copy_handle(struct svga_context *svga, + struct svga_screen *ss, + struct svga_winsys_surface *src_handle, + unsigned src_x, unsigned src_y, unsigned src_z, + unsigned src_level, unsigned src_face, + struct svga_winsys_surface *dst_handle, + unsigned dst_x, unsigned dst_y, unsigned dst_z, + unsigned dst_level, unsigned dst_face, + unsigned width, unsigned height, unsigned depth) +{ + struct svga_surface dst, src; + enum pipe_error ret; + SVGA3dCopyBox box, *boxes; + + assert(svga || ss); + + src.handle = src_handle; + src.real_level = src_level; + src.real_face = src_face; + src.real_zslice = 0; + + dst.handle = dst_handle; + dst.real_level = dst_level; + dst.real_face = dst_face; + dst.real_zslice = 0; + + box.x = dst_x; + box.y = dst_y; + box.z = dst_z; + box.w = width; + box.h = height; + box.d = depth; + box.srcx = src_x; + box.srcy = src_y; + box.srcz = src_z; + +/* + SVGA_DBG(DEBUG_VIEWS, "mipcopy src: %p %u (%ux%ux%u), dst: %p %u (%ux%ux%u)\n", + src_handle, src_level, src_x, src_y, src_z, + dst_handle, dst_level, dst_x, dst_y, dst_z); +*/ + + if (svga) { + ret = SVGA3D_BeginSurfaceCopy(svga->swc, + &src.base, + &dst.base, + &boxes, 1); + if(ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = SVGA3D_BeginSurfaceCopy(svga->swc, + &src.base, + &dst.base, + &boxes, 1); + assert(ret == PIPE_OK); + } + *boxes = box; + SVGA_FIFOCommitAll(svga->swc); + } else { + pipe_mutex_lock(ss->swc_mutex); + ret = SVGA3D_BeginSurfaceCopy(ss->swc, + &src.base, + &dst.base, + &boxes, 1); + if(ret != PIPE_OK) { + ss->swc->flush(ss->swc, NULL); + ret = SVGA3D_BeginSurfaceCopy(ss->swc, + &src.base, + &dst.base, + &boxes, 1); + assert(ret == PIPE_OK); + } + *boxes = box; + SVGA_FIFOCommitAll(ss->swc); + pipe_mutex_unlock(ss->swc_mutex); + } +} + +static struct svga_winsys_surface * +svga_texture_view_surface(struct pipe_context *pipe, + struct svga_texture *tex, + SVGA3dSurfaceFormat format, + unsigned start_mip, + unsigned num_mip, + int face_pick, + int zslice_pick, + struct svga_host_surface_cache_key *key) /* OUT */ +{ + struct svga_screen *ss = svga_screen(tex->base.screen); + struct svga_winsys_surface *handle; + int i, j; + unsigned z_offset = 0; + + SVGA_DBG(DEBUG_PERF, + "svga: Create surface view: face %d zslice %d mips %d..%d\n", + face_pick, zslice_pick, start_mip, start_mip+num_mip-1); + + key->flags = 0; + key->format = format; + key->numMipLevels = num_mip; + key->size.width = u_minify(tex->base.width0, start_mip); + key->size.height = u_minify(tex->base.height0, start_mip); + key->size.depth = zslice_pick < 0 ? u_minify(tex->base.depth0, start_mip) : 1; + key->cachable = 1; + assert(key->size.depth == 1); + + if(tex->base.target == PIPE_TEXTURE_CUBE && face_pick < 0) { + key->flags |= SVGA3D_SURFACE_CUBEMAP; + key->numFaces = 6; + } else { + key->numFaces = 1; + } + + if(key->format == SVGA3D_FORMAT_INVALID) { + key->cachable = 0; + return NULL; + } + + SVGA_DBG(DEBUG_DMA, "surface_create for texture view\n"); + handle = svga_screen_surface_create(ss, key); + if (!handle) { + key->cachable = 0; + return NULL; + } + + SVGA_DBG(DEBUG_DMA, " --> got sid %p (texture view)\n", handle); + + if (face_pick < 0) + face_pick = 0; + + if (zslice_pick >= 0) + z_offset = zslice_pick; + + for (i = 0; i < key->numMipLevels; i++) { + for (j = 0; j < key->numFaces; j++) { + if(tex->defined[j + face_pick][i + start_mip]) { + unsigned depth = (zslice_pick < 0 ? + u_minify(tex->base.depth0, i + start_mip) : + 1); + + svga_texture_copy_handle(svga_context(pipe), + ss, + tex->handle, + 0, 0, z_offset, + i + start_mip, + j + face_pick, + handle, 0, 0, 0, i, j, + u_minify(tex->base.width0, i + start_mip), + u_minify(tex->base.height0, i + start_mip), + depth); + } + } + } + + return handle; +} + + +static struct pipe_surface * +svga_get_tex_surface(struct pipe_screen *screen, + struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice, + unsigned flags) +{ + struct svga_texture *tex = svga_texture(pt); + struct svga_surface *s; + boolean render = flags & PIPE_BUFFER_USAGE_GPU_WRITE ? TRUE : FALSE; + boolean view = FALSE; + SVGA3dSurfaceFormat format; + + s = CALLOC_STRUCT(svga_surface); + if (!s) + return NULL; + + pipe_reference_init(&s->base.reference, 1); + pipe_texture_reference(&s->base.texture, pt); + s->base.format = pt->format; + s->base.width = u_minify(pt->width0, level); + s->base.height = u_minify(pt->height0, level); + s->base.usage = flags; + s->base.level = level; + s->base.face = face; + s->base.zslice = zslice; + + if (!render) + format = svga_translate_format(pt->format); + else + format = svga_translate_format_render(pt->format); + + assert(format != SVGA3D_FORMAT_INVALID); + assert(!(flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE)); + + + if (svga_screen(screen)->debug.force_surface_view) + view = TRUE; + + /* Currently only used for compressed textures */ + if (render && + format != svga_translate_format(pt->format)) { + view = TRUE; + } + + if (level != 0 && + svga_screen(screen)->debug.force_level_surface_view) + view = TRUE; + + if (pt->target == PIPE_TEXTURE_3D) + view = TRUE; + + if (svga_screen(screen)->debug.no_surface_view) + view = FALSE; + + if (view) { + SVGA_DBG(DEBUG_VIEWS, "svga: Surface view: yes %p, level %u face %u z %u, %p\n", + pt, level, face, zslice, s); + + s->handle = svga_texture_view_surface(NULL, tex, format, level, 1, face, zslice, + &s->key); + s->real_face = 0; + s->real_level = 0; + s->real_zslice = 0; + } else { + struct svga_winsys_screen *sws = svga_winsys_screen(screen); + + SVGA_DBG(DEBUG_VIEWS, "svga: Surface view: no %p, level %u, face %u, z %u, %p\n", + pt, level, face, zslice, s); + + memset(&s->key, 0, sizeof s->key); + sws->surface_reference(sws, &s->handle, tex->handle); + s->real_face = face; + s->real_level = level; + s->real_zslice = zslice; + } + + return &s->base; +} + + +static void +svga_tex_surface_destroy(struct pipe_surface *surf) +{ + struct svga_surface *s = svga_surface(surf); + struct svga_screen *ss = svga_screen(surf->texture->screen); + + SVGA_DBG(DEBUG_DMA, "unref sid %p (tex surface)\n", s->handle); + assert(s->key.cachable == 0); + svga_screen_surface_destroy(ss, &s->key, &s->handle); + pipe_texture_reference(&surf->texture, NULL); + FREE(surf); +} + + +static INLINE void +svga_mark_surface_dirty(struct pipe_surface *surf) +{ + struct svga_surface *s = svga_surface(surf); + + if(!s->dirty) { + struct svga_texture *tex = svga_texture(surf->texture); + + s->dirty = TRUE; + + if (s->handle == tex->handle) + tex->defined[surf->face][surf->level] = TRUE; + else { + /* this will happen later in svga_propagate_surface */ + } + } +} + + +void svga_mark_surfaces_dirty(struct svga_context *svga) +{ + unsigned i; + + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { + if (svga->curr.framebuffer.cbufs[i]) + svga_mark_surface_dirty(svga->curr.framebuffer.cbufs[i]); + } + if (svga->curr.framebuffer.zsbuf) + svga_mark_surface_dirty(svga->curr.framebuffer.zsbuf); +} + +/** + * Progagate any changes from surfaces to texture. + * pipe is optional context to inline the blit command in. + */ +void +svga_propagate_surface(struct pipe_context *pipe, struct pipe_surface *surf) +{ + struct svga_surface *s = svga_surface(surf); + struct svga_texture *tex = svga_texture(surf->texture); + struct svga_screen *ss = svga_screen(surf->texture->screen); + + if (!s->dirty) + return; + + s->dirty = FALSE; + ss->texture_timestamp++; + tex->view_age[surf->level] = ++(tex->age); + + if (s->handle != tex->handle) { + SVGA_DBG(DEBUG_VIEWS, "svga: Surface propagate: tex %p, level %u, from %p\n", tex, surf->level, surf); + svga_texture_copy_handle(svga_context(pipe), ss, + s->handle, 0, 0, 0, s->real_level, s->real_face, + tex->handle, 0, 0, surf->zslice, surf->level, surf->face, + u_minify(tex->base.width0, surf->level), + u_minify(tex->base.height0, surf->level), 1); + tex->defined[surf->face][surf->level] = TRUE; + } +} + +/** + * Check if we should call svga_propagate_surface on the surface. + */ +extern boolean +svga_surface_needs_propagation(struct pipe_surface *surf) +{ + struct svga_surface *s = svga_surface(surf); + struct svga_texture *tex = svga_texture(surf->texture); + + return s->dirty && s->handle != tex->handle; +} + + +static struct pipe_transfer * +svga_get_tex_transfer(struct pipe_screen *screen, + struct pipe_texture *texture, + unsigned face, unsigned level, unsigned zslice, + enum pipe_transfer_usage usage, unsigned x, unsigned y, + unsigned w, unsigned h) +{ + struct svga_screen *ss = svga_screen(screen); + struct svga_winsys_screen *sws = ss->sws; + struct svga_transfer *st; + + /* We can't map texture storage directly */ + if (usage & PIPE_TRANSFER_MAP_DIRECTLY) + return NULL; + + st = CALLOC_STRUCT(svga_transfer); + if (!st) + return NULL; + + st->base.format = texture->format; + st->base.block = texture->block; + st->base.x = x; + st->base.y = y; + st->base.width = w; + st->base.height = h; + st->base.nblocksx = pf_get_nblocksx(&texture->block, w); + st->base.nblocksy = pf_get_nblocksy(&texture->block, h); + st->base.stride = st->base.nblocksx*st->base.block.size; + st->base.usage = usage; + st->base.face = face; + st->base.level = level; + st->base.zslice = zslice; + + st->hw_nblocksy = st->base.nblocksy; + + st->hwbuf = svga_winsys_buffer_create(ss, + 1, + 0, + st->hw_nblocksy*st->base.stride); + while(!st->hwbuf && (st->hw_nblocksy /= 2)) { + st->hwbuf = svga_winsys_buffer_create(ss, + 1, + 0, + st->hw_nblocksy*st->base.stride); + } + + if(!st->hwbuf) + goto no_hwbuf; + + if(st->hw_nblocksy < st->base.nblocksy) { + /* We couldn't allocate a hardware buffer big enough for the transfer, + * so allocate regular malloc memory instead */ + debug_printf("%s: failed to allocate %u KB of DMA, splitting into %u x %u KB DMA transfers\n", + __FUNCTION__, + (st->base.nblocksy*st->base.stride + 1023)/1024, + (st->base.nblocksy + st->hw_nblocksy - 1)/st->hw_nblocksy, + (st->hw_nblocksy*st->base.stride + 1023)/1024); + st->swbuf = MALLOC(st->base.nblocksy*st->base.stride); + if(!st->swbuf) + goto no_swbuf; + } + + pipe_texture_reference(&st->base.texture, texture); + + if (usage & PIPE_TRANSFER_READ) + svga_transfer_dma(st, SVGA3D_READ_HOST_VRAM); + + return &st->base; + +no_swbuf: + sws->buffer_destroy(sws, st->hwbuf); +no_hwbuf: + FREE(st); + return NULL; +} + + +static void * +svga_transfer_map( struct pipe_screen *screen, + struct pipe_transfer *transfer ) +{ + struct svga_screen *ss = svga_screen(screen); + struct svga_winsys_screen *sws = ss->sws; + struct svga_transfer *st = svga_transfer(transfer); + + if(st->swbuf) + return st->swbuf; + else + /* The wait for read transfers already happened when svga_transfer_dma + * was called. */ + return sws->buffer_map(sws, st->hwbuf, + pipe_transfer_buffer_flags(transfer)); +} + + +static void +svga_transfer_unmap(struct pipe_screen *screen, + struct pipe_transfer *transfer) +{ + struct svga_screen *ss = svga_screen(screen); + struct svga_winsys_screen *sws = ss->sws; + struct svga_transfer *st = svga_transfer(transfer); + + if(!st->swbuf) + sws->buffer_unmap(sws, st->hwbuf); +} + + +static void +svga_tex_transfer_destroy(struct pipe_transfer *transfer) +{ + struct svga_texture *tex = svga_texture(transfer->texture); + struct svga_screen *ss = svga_screen(transfer->texture->screen); + struct svga_winsys_screen *sws = ss->sws; + struct svga_transfer *st = svga_transfer(transfer); + + if (st->base.usage & PIPE_TRANSFER_WRITE) { + svga_transfer_dma(st, SVGA3D_WRITE_HOST_VRAM); + ss->texture_timestamp++; + tex->view_age[transfer->level] = ++(tex->age); + tex->defined[transfer->face][transfer->level] = TRUE; + } + + pipe_texture_reference(&st->base.texture, NULL); + FREE(st->swbuf); + sws->buffer_destroy(sws, st->hwbuf); + FREE(st); +} + +void +svga_screen_init_texture_functions(struct pipe_screen *screen) +{ + screen->texture_create = svga_texture_create; + screen->texture_destroy = svga_texture_destroy; + screen->get_tex_surface = svga_get_tex_surface; + screen->tex_surface_destroy = svga_tex_surface_destroy; + screen->texture_blanket = svga_texture_blanket; + screen->get_tex_transfer = svga_get_tex_transfer; + screen->transfer_map = svga_transfer_map; + screen->transfer_unmap = svga_transfer_unmap; + screen->tex_transfer_destroy = svga_tex_transfer_destroy; +} + +/*********************************************************************** + */ + +struct svga_sampler_view * +svga_get_tex_sampler_view(struct pipe_context *pipe, struct pipe_texture *pt, + unsigned min_lod, unsigned max_lod) +{ + struct svga_screen *ss = svga_screen(pt->screen); + struct svga_winsys_screen *sws = ss->sws; + struct svga_texture *tex = svga_texture(pt); + struct svga_sampler_view *sv = NULL; + SVGA3dSurfaceFormat format = svga_translate_format(pt->format); + boolean view = TRUE; + + assert(pt); + assert(min_lod >= 0); + assert(min_lod <= max_lod); + assert(max_lod <= pt->last_level); + + + /* Is a view needed */ + { + /* + * Can't control max lod. For first level views and when we only + * look at one level we disable mip filtering to achive the same + * results as a view. + */ + if (min_lod == 0 && max_lod >= pt->last_level) + view = FALSE; + + if (pf_is_compressed(pt->format) && view) { + format = svga_translate_format_render(pt->format); + } + + if (ss->debug.no_sampler_view) + view = FALSE; + + if (ss->debug.force_sampler_view) + view = TRUE; + } + + /* First try the cache */ + if (view) { + pipe_mutex_lock(ss->tex_mutex); + if (tex->cached_view && + tex->cached_view->min_lod == min_lod && + tex->cached_view->max_lod == max_lod) { + svga_sampler_view_reference(&sv, tex->cached_view); + pipe_mutex_unlock(ss->tex_mutex); + SVGA_DBG(DEBUG_VIEWS, "svga: Sampler view: reuse %p, %u %u, last %u\n", + pt, min_lod, max_lod, pt->last_level); + svga_validate_sampler_view(svga_context(pipe), sv); + return sv; + } + pipe_mutex_unlock(ss->tex_mutex); + } + + sv = CALLOC_STRUCT(svga_sampler_view); + pipe_reference_init(&sv->reference, 1); + sv->texture = tex; + sv->min_lod = min_lod; + sv->max_lod = max_lod; + + /* No view needed just use the whole texture */ + if (!view) { + SVGA_DBG(DEBUG_VIEWS, + "svga: Sampler view: no %p, mips %u..%u, nr %u, size (%ux%ux%u), last %u\n", + pt, min_lod, max_lod, + max_lod - min_lod + 1, + pt->width0, + pt->height0, + pt->depth0, + pt->last_level); + sv->key.cachable = 0; + sws->surface_reference(sws, &sv->handle, tex->handle); + return sv; + } + + SVGA_DBG(DEBUG_VIEWS, + "svga: Sampler view: yes %p, mips %u..%u, nr %u, size (%ux%ux%u), last %u\n", + pt, min_lod, max_lod, + max_lod - min_lod + 1, + pt->width0, + pt->height0, + pt->depth0, + pt->last_level); + + sv->age = tex->age; + sv->handle = svga_texture_view_surface(pipe, tex, format, + min_lod, + max_lod - min_lod + 1, + -1, -1, + &sv->key); + + if (!sv->handle) { + assert(0); + sv->key.cachable = 0; + sws->surface_reference(sws, &sv->handle, tex->handle); + return sv; + } + + pipe_mutex_lock(ss->tex_mutex); + svga_sampler_view_reference(&tex->cached_view, sv); + pipe_mutex_unlock(ss->tex_mutex); + + return sv; +} + +void +svga_validate_sampler_view(struct svga_context *svga, struct svga_sampler_view *v) +{ + struct svga_texture *tex = v->texture; + unsigned numFaces; + unsigned age = 0; + int i, k; + + assert(svga); + + if (v->handle == v->texture->handle) + return; + + age = tex->age; + + if(tex->base.target == PIPE_TEXTURE_CUBE) + numFaces = 6; + else + numFaces = 1; + + for (i = v->min_lod; i <= v->max_lod; i++) { + for (k = 0; k < numFaces; k++) { + if (v->age < tex->view_age[i]) + svga_texture_copy_handle(svga, NULL, + tex->handle, 0, 0, 0, i, k, + v->handle, 0, 0, 0, i - v->min_lod, k, + u_minify(tex->base.width0, i), + u_minify(tex->base.height0, i), + u_minify(tex->base.depth0, i)); + } + } + + v->age = age; +} + +void +svga_destroy_sampler_view_priv(struct svga_sampler_view *v) +{ + struct svga_screen *ss = svga_screen(v->texture->base.screen); + + SVGA_DBG(DEBUG_DMA, "unref sid %p (sampler view)\n", v->handle); + svga_screen_surface_destroy(ss, &v->key, &v->handle); + + FREE(v); +} + +boolean +svga_screen_buffer_from_texture(struct pipe_texture *texture, + struct pipe_buffer **buffer, + unsigned *stride) +{ + struct svga_texture *stex = svga_texture(texture); + + *buffer = svga_screen_buffer_wrap_surface + (texture->screen, + svga_translate_format(texture->format), + stex->handle); + + *stride = pf_get_nblocksx(&texture->block, texture->width0) * + texture->block.size; + + return *buffer != NULL; +} + + +struct svga_winsys_surface * +svga_screen_texture_get_winsys_surface(struct pipe_texture *texture) +{ + struct svga_winsys_screen *sws = svga_winsys_screen(texture->screen); + struct svga_winsys_surface *vsurf = NULL; + + assert(svga_texture(texture)->key.cachable == 0); + svga_texture(texture)->key.cachable = 0; + sws->surface_reference(sws, &vsurf, svga_texture(texture)->handle); + return vsurf; +} diff --git a/src/gallium/drivers/svga/svga_screen_texture.h b/src/gallium/drivers/svga/svga_screen_texture.h new file mode 100644 index 0000000000..1cc4063e65 --- /dev/null +++ b/src/gallium/drivers/svga/svga_screen_texture.h @@ -0,0 +1,187 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#ifndef SVGA_TEXTURE_H +#define SVGA_TEXTURE_H + + +#include "pipe/p_compiler.h" +#include "pipe/p_state.h" +#include "svga_screen_cache.h" + +struct pipe_context; +struct pipe_screen; +struct svga_context; +struct svga_winsys_surface; +enum SVGA3dSurfaceFormat; + + +#define SVGA_MAX_TEXTURE_LEVELS 12 /* 2048x2048 */ + + +/** + * A sampler's view into a texture + * + * We currently cache one sampler view on + * the texture and in there by holding a reference + * from the texture to the sampler view. + * + * Because of this we can not hold a refernce to the + * texture from the sampler view. So the user + * of the sampler views must make sure that the + * texture has a reference take for as long as + * the sampler view is refrenced. + * + * Just unreferencing the sampler_view before the + * texture is enough. + */ +struct svga_sampler_view +{ + struct pipe_reference reference; + + struct svga_texture *texture; + + int min_lod; + int max_lod; + + unsigned age; + + struct svga_host_surface_cache_key key; + struct svga_winsys_surface *handle; +}; + + +struct svga_texture +{ + struct pipe_texture base; + + boolean defined[6][PIPE_MAX_TEXTURE_LEVELS]; + + struct svga_sampler_view *cached_view; + + unsigned view_age[SVGA_MAX_TEXTURE_LEVELS]; + unsigned age; + + boolean views_modified; + + /** + * Creation key for the host surface handle. + * + * This structure describes all the host surface characteristics so that it + * can be looked up in cache, since creating a host surface is often a slow + * operation. + */ + struct svga_host_surface_cache_key key; + struct svga_winsys_surface *handle; +}; + + +struct svga_surface +{ + struct pipe_surface base; + + struct svga_host_surface_cache_key key; + struct svga_winsys_surface *handle; + + unsigned real_face; + unsigned real_level; + unsigned real_zslice; + + boolean dirty; +}; + + +struct svga_transfer +{ + struct pipe_transfer base; + + struct svga_winsys_buffer *hwbuf; + + /* Height of the hardware buffer in pixel blocks */ + unsigned hw_nblocksy; + + /* Temporary malloc buffer when we can't allocate a hardware buffer + * big enough */ + void *swbuf; +}; + + +static INLINE struct svga_texture * +svga_texture(struct pipe_texture *texture) +{ + return (struct svga_texture *)texture; +} + +static INLINE struct svga_surface * +svga_surface(struct pipe_surface *surface) +{ + assert(surface); + return (struct svga_surface *)surface; +} + +static INLINE struct svga_transfer * +svga_transfer(struct pipe_transfer *transfer) +{ + assert(transfer); + return (struct svga_transfer *)transfer; +} + +extern struct svga_sampler_view * +svga_get_tex_sampler_view(struct pipe_context *pipe, + struct pipe_texture *pt, + unsigned min_lod, unsigned max_lod); + +void +svga_validate_sampler_view(struct svga_context *svga, struct svga_sampler_view *v); + +void +svga_destroy_sampler_view_priv(struct svga_sampler_view *v); + +static INLINE void +svga_sampler_view_reference(struct svga_sampler_view **ptr, struct svga_sampler_view *v) +{ + struct svga_sampler_view *old = *ptr; + + if (pipe_reference((struct pipe_reference **)ptr, &v->reference)) + svga_destroy_sampler_view_priv(old); +} + +extern void +svga_propagate_surface(struct pipe_context *pipe, struct pipe_surface *surf); + +extern boolean +svga_surface_needs_propagation(struct pipe_surface *surf); + +extern void +svga_screen_init_texture_functions(struct pipe_screen *screen); + +enum SVGA3dSurfaceFormat +svga_translate_format(enum pipe_format format); + +enum SVGA3dSurfaceFormat +svga_translate_format_render(enum pipe_format format); + + +#endif /* SVGA_TEXTURE_H */ diff --git a/src/gallium/drivers/svga/svga_state.c b/src/gallium/drivers/svga/svga_state.c new file mode 100644 index 0000000000..1c21d3acfe --- /dev/null +++ b/src/gallium/drivers/svga/svga_state.c @@ -0,0 +1,278 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "util/u_debug.h" +#include "pipe/p_defines.h" +#include "util/u_memory.h" +#include "draw/draw_context.h" + +#include "svga_context.h" +#include "svga_screen.h" +#include "svga_state.h" +#include "svga_draw.h" +#include "svga_cmd.h" +#include "svga_hw_reg.h" + +/* This is just enough to decide whether we need to use the draw + * module (swtnl) or not. + */ +static const struct svga_tracked_state *need_swtnl_state[] = +{ + &svga_update_need_swvfetch, + &svga_update_need_pipeline, + &svga_update_need_swtnl, + NULL +}; + + +/* Atoms to update hardware state prior to emitting a clear or draw + * packet. + */ +static const struct svga_tracked_state *hw_clear_state[] = +{ + &svga_hw_scissor, + &svga_hw_viewport, + &svga_hw_framebuffer, + NULL +}; + + +/* Atoms to update hardware state prior to emitting a draw packet. + */ +static const struct svga_tracked_state *hw_draw_state[] = +{ + &svga_hw_update_zero_stride, + &svga_hw_fs, + &svga_hw_vs, + &svga_hw_rss, + &svga_hw_tss, + &svga_hw_tss_binding, + &svga_hw_clip_planes, + &svga_hw_vdecl, + &svga_hw_fs_parameters, + &svga_hw_vs_parameters, + NULL +}; + + +static const struct svga_tracked_state *swtnl_draw_state[] = +{ + &svga_update_swtnl_draw, + &svga_update_swtnl_vdecl, + NULL +}; + +/* Flattens the graph of state dependencies. Could swap the positions + * of hw_clear_state and need_swtnl_state without breaking anything. + */ +static const struct svga_tracked_state **state_levels[] = +{ + need_swtnl_state, + hw_clear_state, + hw_draw_state, + swtnl_draw_state +}; + + + +static unsigned check_state( unsigned a, + unsigned b ) +{ + return (a & b); +} + +static void accumulate_state( unsigned *a, + unsigned b ) +{ + *a |= b; +} + + +static void xor_states( unsigned *result, + unsigned a, + unsigned b ) +{ + *result = a ^ b; +} + + + +static int update_state( struct svga_context *svga, + const struct svga_tracked_state *atoms[], + unsigned *state ) +{ + boolean debug = TRUE; + enum pipe_error ret = 0; + unsigned i; + + ret = svga_hwtnl_flush( svga->hwtnl ); + if (ret != 0) + return ret; + + if (debug) { + /* Debug version which enforces various sanity checks on the + * state flags which are generated and checked to help ensure + * state atoms are ordered correctly in the list. + */ + unsigned examined, prev; + + examined = 0; + prev = *state; + + for (i = 0; atoms[i] != NULL; i++) { + unsigned generated; + + assert(atoms[i]->dirty); + assert(atoms[i]->update); + + if (check_state(*state, atoms[i]->dirty)) { + if (0) + debug_printf("update: %s\n", atoms[i]->name); + ret = atoms[i]->update( svga, *state ); + if (ret != 0) + return ret; + } + + /* generated = (prev ^ state) + * if (examined & generated) + * fail; + */ + xor_states(&generated, prev, *state); + if (check_state(examined, generated)) { + debug_printf("state atom %s generated state already examined\n", + atoms[i]->name); + assert(0); + } + + prev = *state; + accumulate_state(&examined, atoms[i]->dirty); + } + } + else { + for (i = 0; atoms[i] != NULL; i++) { + if (check_state(*state, atoms[i]->dirty)) { + ret = atoms[i]->update( svga, *state ); + if (ret != 0) + return ret; + } + } + } + + return 0; +} + + + +int svga_update_state( struct svga_context *svga, + unsigned max_level ) +{ + struct svga_screen *screen = svga_screen(svga->pipe.screen); + int ret = 0; + int i; + + /* Check for updates to bound textures. This can't be done in an + * atom as there is no flag which could provoke this test, and we + * cannot create one. + */ + if (svga->state.texture_timestamp != screen->texture_timestamp) { + svga->state.texture_timestamp = screen->texture_timestamp; + svga->dirty |= SVGA_NEW_TEXTURE; + } + + for (i = 0; i <= max_level; i++) { + svga->dirty |= svga->state.dirty[i]; + + if (svga->dirty) { + ret = update_state( svga, + state_levels[i], + &svga->dirty ); + if (ret != 0) + return ret; + + svga->state.dirty[i] = 0; + } + } + + for (; i < SVGA_STATE_MAX; i++) + svga->state.dirty[i] |= svga->dirty; + + svga->dirty = 0; + return 0; +} + + + + +void svga_update_state_retry( struct svga_context *svga, + unsigned max_level ) +{ + int ret; + + ret = svga_update_state( svga, max_level ); + + if (ret == PIPE_ERROR_OUT_OF_MEMORY) { + svga_context_flush(svga, NULL); + ret = svga_update_state( svga, max_level ); + } + + assert( ret == 0 ); +} + + + +#define EMIT_RS(_rs, _count, _name, _value) \ +do { \ + _rs[_count].state = _name; \ + _rs[_count].uintValue = _value; \ + _count++; \ +} while (0) + + +/* Setup any hardware state which will be constant through the life of + * a context. + */ +enum pipe_error svga_emit_initial_state( struct svga_context *svga ) +{ + SVGA3dRenderState *rs; + unsigned count = 0; + const unsigned COUNT = 2; + enum pipe_error ret; + + ret = SVGA3D_BeginSetRenderState( svga->swc, &rs, COUNT ); + if (ret) + return ret; + + /* Always use D3D style coordinate space as this is the only one + * which is implemented on all backends. + */ + EMIT_RS(rs, count, SVGA3D_RS_COORDINATETYPE, SVGA3D_COORDINATE_LEFTHANDED ); + EMIT_RS(rs, count, SVGA3D_RS_FRONTWINDING, SVGA3D_FRONTWINDING_CW ); + + assert( COUNT == count ); + SVGA_FIFOCommitAll( svga->swc ); + + return 0; + +} diff --git a/src/gallium/drivers/svga/svga_state.h b/src/gallium/drivers/svga/svga_state.h new file mode 100644 index 0000000000..22d5a6d552 --- /dev/null +++ b/src/gallium/drivers/svga/svga_state.h @@ -0,0 +1,95 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#ifndef SVGA_STATE_H +#define SVGA_STATE_H + + +#include "pipe/p_compiler.h" +#include "pipe/p_defines.h" + +struct svga_context; + + +void svga_init_state( struct svga_context *svga ); +void svga_destroy_state( struct svga_context *svga ); + + +struct svga_tracked_state { + const char *name; + unsigned dirty; + int (*update)( struct svga_context *svga, unsigned dirty ); +}; + +/* NEED_SWTNL + */ +extern struct svga_tracked_state svga_update_need_swvfetch; +extern struct svga_tracked_state svga_update_need_pipeline; +extern struct svga_tracked_state svga_update_need_swtnl; + +/* HW_CLEAR + */ +extern struct svga_tracked_state svga_hw_viewport; +extern struct svga_tracked_state svga_hw_scissor; +extern struct svga_tracked_state svga_hw_framebuffer; + +/* HW_DRAW + */ +extern struct svga_tracked_state svga_hw_vs; +extern struct svga_tracked_state svga_hw_fs; +extern struct svga_tracked_state svga_hw_rss; +extern struct svga_tracked_state svga_hw_tss; +extern struct svga_tracked_state svga_hw_tss_binding; +extern struct svga_tracked_state svga_hw_clip_planes; +extern struct svga_tracked_state svga_hw_vdecl; +extern struct svga_tracked_state svga_hw_fs_parameters; +extern struct svga_tracked_state svga_hw_vs_parameters; +extern struct svga_tracked_state svga_hw_update_zero_stride; + +/* SWTNL_DRAW + */ +extern struct svga_tracked_state svga_update_swtnl_draw; +extern struct svga_tracked_state svga_update_swtnl_vdecl; + +/* Bring the hardware fully up-to-date so that we can emit draw + * commands. + */ +#define SVGA_STATE_NEED_SWTNL 0 +#define SVGA_STATE_HW_CLEAR 1 +#define SVGA_STATE_HW_DRAW 2 +#define SVGA_STATE_SWTNL_DRAW 3 +#define SVGA_STATE_MAX 4 + + +enum pipe_error svga_update_state( struct svga_context *svga, + unsigned level ); + +void svga_update_state_retry( struct svga_context *svga, + unsigned level ); + + +enum pipe_error svga_emit_initial_state( struct svga_context *svga ); + +#endif diff --git a/src/gallium/drivers/svga/svga_state_constants.c b/src/gallium/drivers/svga/svga_state_constants.c new file mode 100644 index 0000000000..209ed28245 --- /dev/null +++ b/src/gallium/drivers/svga/svga_state_constants.c @@ -0,0 +1,239 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "pipe/p_inlines.h" +#include "pipe/p_defines.h" + +#include "svga_context.h" +#include "svga_state.h" +#include "svga_cmd.h" +#include "svga_tgsi.h" +#include "svga_debug.h" + +#include "svga_hw_reg.h" + +/*********************************************************************** + * Hardware update + */ + +/* Convert from PIPE_SHADER_* to SVGA3D_SHADERTYPE_* + */ +static int svga_shader_type( int unit ) +{ + return unit + 1; +} + + +static int emit_const( struct svga_context *svga, + int unit, + int i, + const float *value ) +{ + int ret = PIPE_OK; + + if (memcmp(svga->state.hw_draw.cb[unit][i], value, 4 * sizeof(float)) != 0) { + if (SVGA_DEBUG & DEBUG_CONSTS) + debug_printf("%s %s %d: %f %f %f %f\n", + __FUNCTION__, + unit == PIPE_SHADER_VERTEX ? "VERT" : "FRAG", + i, + value[0], + value[1], + value[2], + value[3]); + + ret = SVGA3D_SetShaderConst( svga->swc, + i, + svga_shader_type(unit), + SVGA3D_CONST_TYPE_FLOAT, + value ); + if (ret) + return ret; + + memcpy(svga->state.hw_draw.cb[unit][i], value, 4 * sizeof(float)); + } + + return ret; +} + +static int emit_consts( struct svga_context *svga, + int offset, + int unit ) +{ + struct pipe_screen *screen = svga->pipe.screen; + unsigned count; + const float (*data)[4] = NULL; + unsigned i; + int ret = PIPE_OK; + + if (svga->curr.cb[unit] == NULL) + goto done; + + count = svga->curr.cb[unit]->size / (4 * sizeof(float)); + + data = (const float (*)[4])pipe_buffer_map(screen, + svga->curr.cb[unit], + PIPE_BUFFER_USAGE_CPU_READ); + if (data == NULL) { + ret = PIPE_ERROR_OUT_OF_MEMORY; + goto done; + } + + for (i = 0; i < count; i++) { + ret = emit_const( svga, unit, offset + i, data[i] ); + if (ret) + goto done; + } + +done: + if (data) + pipe_buffer_unmap(screen, svga->curr.cb[unit]); + + return ret; +} + +static int emit_fs_consts( struct svga_context *svga, + unsigned dirty ) +{ + const struct svga_shader_result *result = svga->state.hw_draw.fs; + const struct svga_fs_compile_key *key = &result->key.fkey; + int ret = 0; + + ret = emit_consts( svga, 0, PIPE_SHADER_FRAGMENT ); + if (ret) + return ret; + + /* The internally generated fragment shader for xor blending + * doesn't have a 'result' struct. It should be fixed to avoid + * this special case, but work around it with a NULL check: + */ + if (result != NULL && + key->num_unnormalized_coords) + { + unsigned offset = result->shader->info.file_max[TGSI_FILE_CONSTANT] + 1; + int i; + + for (i = 0; i < key->num_textures; i++) { + if (key->tex[i].unnormalized) { + struct pipe_texture *tex = svga->curr.texture[i]; + float data[4]; + + data[0] = 1.0 / (float)tex->width0; + data[1] = 1.0 / (float)tex->height0; + data[2] = 1.0; + data[3] = 1.0; + + ret = emit_const( svga, + PIPE_SHADER_FRAGMENT, + key->tex[i].width_height_idx + offset, + data ); + if (ret) + return ret; + } + } + + offset += key->num_unnormalized_coords; + } + + return 0; +} + + +struct svga_tracked_state svga_hw_fs_parameters = +{ + "hw fs params", + (SVGA_NEW_FS_CONST_BUFFER | + SVGA_NEW_FS_RESULT | + SVGA_NEW_TEXTURE_BINDING), + emit_fs_consts +}; + +/*********************************************************************** + */ + +static int emit_vs_consts( struct svga_context *svga, + unsigned dirty ) +{ + const struct svga_shader_result *result = svga->state.hw_draw.vs; + const struct svga_vs_compile_key *key = &result->key.vkey; + int ret = 0; + unsigned offset; + + /* SVGA_NEW_VS_RESULT + */ + if (result == NULL) + return 0; + + /* SVGA_NEW_VS_CONST_BUFFER + */ + ret = emit_consts( svga, 0, PIPE_SHADER_VERTEX ); + if (ret) + return ret; + + offset = result->shader->info.file_max[TGSI_FILE_CONSTANT] + 1; + + /* SVGA_NEW_VS_RESULT + */ + if (key->need_prescale) { + ret = emit_const( svga, PIPE_SHADER_VERTEX, offset++, + svga->state.hw_clear.prescale.scale ); + if (ret) + return ret; + + ret = emit_const( svga, PIPE_SHADER_VERTEX, offset++, + svga->state.hw_clear.prescale.translate ); + if (ret) + return ret; + } + + /* SVGA_NEW_ZERO_STRIDE + */ + if (key->zero_stride_vertex_elements) { + unsigned i, curr_zero_stride = 0; + for (i = 0; i < PIPE_MAX_ATTRIBS; ++i) { + if (key->zero_stride_vertex_elements & (1 << i)) { + ret = emit_const( svga, PIPE_SHADER_VERTEX, offset++, + svga->curr.zero_stride_constants + + 4 * curr_zero_stride ); + if (ret) + return ret; + ++curr_zero_stride; + } + } + } + + return 0; +} + + +struct svga_tracked_state svga_hw_vs_parameters = +{ + "hw vs params", + (SVGA_NEW_VS_CONST_BUFFER | + SVGA_NEW_ZERO_STRIDE | + SVGA_NEW_VS_RESULT), + emit_vs_consts +}; + diff --git a/src/gallium/drivers/svga/svga_state_framebuffer.c b/src/gallium/drivers/svga/svga_state_framebuffer.c new file mode 100644 index 0000000000..cfdcae4ee4 --- /dev/null +++ b/src/gallium/drivers/svga/svga_state_framebuffer.c @@ -0,0 +1,458 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "pipe/p_inlines.h" +#include "pipe/p_defines.h" +#include "util/u_math.h" + +#include "svga_context.h" +#include "svga_state.h" +#include "svga_cmd.h" +#include "svga_debug.h" + +#include "svga_hw_reg.h" + + +/*********************************************************************** + * Hardware state update + */ + + +static int emit_framebuffer( struct svga_context *svga, + unsigned dirty ) +{ + const struct pipe_framebuffer_state *curr = &svga->curr.framebuffer; + struct pipe_framebuffer_state *hw = &svga->state.hw_clear.framebuffer; + unsigned i; + enum pipe_error ret; + + /* XXX: Need shadow state in svga->hw to eliminate redundant + * uploads, especially of NULL buffers. + */ + + for(i = 0; i < PIPE_MAX_COLOR_BUFS; ++i) { + if (curr->cbufs[i] != hw->cbufs[i]) { + if (svga->curr.nr_fbs++ > 8) + return PIPE_ERROR_OUT_OF_MEMORY; + + ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_COLOR0 + i, curr->cbufs[i]); + if (ret != PIPE_OK) + return ret; + + pipe_surface_reference(&hw->cbufs[i], curr->cbufs[i]); + } + } + + + if (curr->zsbuf != hw->zsbuf) { + ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_DEPTH, curr->zsbuf); + if (ret != PIPE_OK) + return ret; + + if (curr->zsbuf && + curr->zsbuf->format == PIPE_FORMAT_Z24S8_UNORM) { + ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_STENCIL, curr->zsbuf); + if (ret != PIPE_OK) + return ret; + } + else { + ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_STENCIL, NULL); + if (ret != PIPE_OK) + return ret; + } + + pipe_surface_reference(&hw->zsbuf, curr->zsbuf); + } + + + return 0; +} + + +struct svga_tracked_state svga_hw_framebuffer = +{ + "hw framebuffer state", + SVGA_NEW_FRAME_BUFFER, + emit_framebuffer +}; + + + + +/*********************************************************************** + */ + +static int emit_viewport( struct svga_context *svga, + unsigned dirty ) +{ + const struct pipe_viewport_state *viewport = &svga->curr.viewport; + struct svga_prescale prescale; + SVGA3dRect rect; + /* Not sure if this state is relevant with POSITIONT. Probably + * not, but setting to 0,1 avoids some state pingponging. + */ + float range_min = 0.0; + float range_max = 1.0; + float flip = -1.0; + boolean degenerate = FALSE; + enum pipe_error ret; + + float fb_width = svga->curr.framebuffer.width; + float fb_height = svga->curr.framebuffer.height; + + memset( &prescale, 0, sizeof(prescale) ); + + if (svga->curr.rast->templ.bypass_vs_clip_and_viewport) { + + /* Avoid POSITIONT as it has a non trivial implementation outside the D3D + * API. Always generate a vertex shader. + */ + rect.x = 0; + rect.y = 0; + rect.w = svga->curr.framebuffer.width; + rect.h = svga->curr.framebuffer.height; + + prescale.scale[0] = 2.0 / (float)rect.w; + prescale.scale[1] = - 2.0 / (float)rect.h; + prescale.scale[2] = 1.0; + prescale.scale[3] = 1.0; + prescale.translate[0] = -1.0f; + prescale.translate[1] = 1.0f; + prescale.translate[2] = 0; + prescale.translate[3] = 0; + prescale.enabled = TRUE; + } else { + + /* Examine gallium viewport transformation and produce a screen + * rectangle and possibly vertex shader pre-transformation to + * get the same results. + */ + float fx = viewport->scale[0] * -1.0 + viewport->translate[0]; + float fy = flip * viewport->scale[1] * -1.0 + viewport->translate[1]; + float fw = viewport->scale[0] * 2; + float fh = flip * viewport->scale[1] * 2; + + SVGA_DBG(DEBUG_VIEWPORT, + "\ninitial %f,%f %fx%f\n", + fx, + fy, + fw, + fh); + + prescale.scale[0] = 1.0; + prescale.scale[1] = 1.0; + prescale.scale[2] = 1.0; + prescale.scale[3] = 1.0; + prescale.translate[0] = 0; + prescale.translate[1] = 0; + prescale.translate[2] = 0; + prescale.translate[3] = 0; + prescale.enabled = TRUE; + + + + if (fw < 0) { + prescale.scale[0] *= -1.0; + prescale.translate[0] += -fw; + fw = -fw; + fx = viewport->scale[0] * 1.0 + viewport->translate[0]; + } + + if (fh < 0) { + prescale.scale[1] *= -1.0; + prescale.translate[1] += -fh; + fh = -fh; + fy = flip * viewport->scale[1] * 1.0 + viewport->translate[1]; + } + + if (fx < 0) { + prescale.translate[0] += fx; + prescale.scale[0] *= fw / (fw + fx); + fw += fx; + fx = 0; + } + + if (fy < 0) { + prescale.translate[1] += fy; + prescale.scale[1] *= fh / (fh + fy); + fh += fy; + fy = 0; + } + + if (fx + fw > fb_width) { + prescale.scale[0] *= fw / (fb_width - fx); + prescale.translate[0] -= fx * (fw / (fb_width - fx)); + prescale.translate[0] += fx; + fw = fb_width - fx; + + } + + if (fy + fh > fb_height) { + prescale.scale[1] *= fh / (fb_height - fy); + prescale.translate[1] -= fy * (fh / (fb_height - fy)); + prescale.translate[1] += fy; + fh = fb_height - fy; + } + + if (fw < 0 || fh < 0) { + fw = fh = fx = fy = 0; + degenerate = TRUE; + goto out; + } + + + /* D3D viewport is integer space. Convert fx,fy,etc. to + * integers. + * + * TODO: adjust pretranslate correct for any subpixel error + * introduced converting to integers. + */ + rect.x = fx; + rect.y = fy; + rect.w = fw; + rect.h = fh; + + SVGA_DBG(DEBUG_VIEWPORT, + "viewport error %f,%f %fx%f\n", + fabs((float)rect.x - fx), + fabs((float)rect.y - fy), + fabs((float)rect.w - fw), + fabs((float)rect.h - fh)); + + SVGA_DBG(DEBUG_VIEWPORT, + "viewport %d,%d %dx%d\n", + rect.x, + rect.y, + rect.w, + rect.h); + + + /* Finally, to get GL rasterization rules, need to tweak the + * screen-space coordinates slightly relative to D3D which is + * what hardware implements natively. + */ + if (svga->curr.rast->templ.gl_rasterization_rules) { + float adjust_x = 0.0; + float adjust_y = 0.0; + + switch (svga->curr.reduced_prim) { + case PIPE_PRIM_LINES: + adjust_x = -0.5; + adjust_y = 0; + break; + case PIPE_PRIM_POINTS: + case PIPE_PRIM_TRIANGLES: + adjust_x = -0.375; + adjust_y = -0.5; + break; + } + + prescale.translate[0] += adjust_x; + prescale.translate[1] += adjust_y; + prescale.translate[2] = 0.5; /* D3D clip space */ + prescale.scale[2] = 0.5; /* D3D clip space */ + } + + + range_min = viewport->scale[2] * -1.0 + viewport->translate[2]; + range_max = viewport->scale[2] * 1.0 + viewport->translate[2]; + + /* D3D (and by implication SVGA) doesn't like dealing with zmax + * less than zmin. Detect that case, flip the depth range and + * invert our z-scale factor to achieve the same effect. + */ + if (range_min > range_max) { + float range_tmp; + range_tmp = range_min; + range_min = range_max; + range_max = range_tmp; + prescale.scale[2] = -prescale.scale[2]; + } + } + + if (prescale.enabled) { + float H[2]; + float J[2]; + int i; + + SVGA_DBG(DEBUG_VIEWPORT, + "prescale %f,%f %fx%f\n", + prescale.translate[0], + prescale.translate[1], + prescale.scale[0], + prescale.scale[1]); + + H[0] = (float)rect.w / 2.0; + H[1] = -(float)rect.h / 2.0; + J[0] = (float)rect.x + (float)rect.w / 2.0; + J[1] = (float)rect.y + (float)rect.h / 2.0; + + SVGA_DBG(DEBUG_VIEWPORT, + "H %f,%f\n" + "J %fx%f\n", + H[0], + H[1], + J[0], + J[1]); + + /* Adjust prescale to take into account the fact that it is + * going to be applied prior to the perspective divide and + * viewport transformation. + * + * Vwin = H(Vc/Vc.w) + J + * + * We want to tweak Vwin with scale and translation from above, + * as in: + * + * Vwin' = S Vwin + T + * + * But we can only modify the values at Vc. Plugging all the + * above together, and rearranging, eventually we get: + * + * Vwin' = H(Vc'/Vc'.w) + J + * where: + * Vc' = SVc + KVc.w + * K = (T + (S-1)J) / H + * + * Overwrite prescale.translate with values for K: + */ + for (i = 0; i < 2; i++) { + prescale.translate[i] = ((prescale.translate[i] + + (prescale.scale[i] - 1.0) * J[i]) / H[i]); + } + + SVGA_DBG(DEBUG_VIEWPORT, + "clipspace %f,%f %fx%f\n", + prescale.translate[0], + prescale.translate[1], + prescale.scale[0], + prescale.scale[1]); + } + +out: + if (degenerate) { + rect.x = 0; + rect.y = 0; + rect.w = 1; + rect.h = 1; + prescale.enabled = FALSE; + } + + if (memcmp(&rect, &svga->state.hw_clear.viewport, sizeof(rect)) != 0) { + ret = SVGA3D_SetViewport(svga->swc, &rect); + if(ret != PIPE_OK) + return ret; + + memcpy(&svga->state.hw_clear.viewport, &rect, sizeof(rect)); + assert(sizeof(rect) == sizeof(svga->state.hw_clear.viewport)); + } + + if (svga->state.hw_clear.depthrange.zmin != range_min || + svga->state.hw_clear.depthrange.zmax != range_max) + { + ret = SVGA3D_SetZRange(svga->swc, range_min, range_max ); + if(ret != PIPE_OK) + return ret; + + svga->state.hw_clear.depthrange.zmin = range_min; + svga->state.hw_clear.depthrange.zmax = range_max; + } + + if (memcmp(&prescale, &svga->state.hw_clear.prescale, sizeof prescale) != 0) { + svga->dirty |= SVGA_NEW_PRESCALE; + svga->state.hw_clear.prescale = prescale; + } + + return 0; +} + + +struct svga_tracked_state svga_hw_viewport = +{ + "hw viewport state", + ( SVGA_NEW_FRAME_BUFFER | + SVGA_NEW_VIEWPORT | + SVGA_NEW_RAST | + SVGA_NEW_REDUCED_PRIMITIVE ), + emit_viewport +}; + + +/*********************************************************************** + * Scissor state + */ +static int emit_scissor_rect( struct svga_context *svga, + unsigned dirty ) +{ + const struct pipe_scissor_state *scissor = &svga->curr.scissor; + SVGA3dRect rect; + + rect.x = scissor->minx; + rect.y = scissor->miny; + rect.w = scissor->maxx - scissor->minx; /* + 1 ?? */ + rect.h = scissor->maxy - scissor->miny; /* + 1 ?? */ + + return SVGA3D_SetScissorRect(svga->swc, &rect); +} + + +struct svga_tracked_state svga_hw_scissor = +{ + "hw scissor state", + SVGA_NEW_SCISSOR, + emit_scissor_rect +}; + + +/*********************************************************************** + * Userclip state + */ + +static int emit_clip_planes( struct svga_context *svga, + unsigned dirty ) +{ + unsigned i; + enum pipe_error ret; + + /* TODO: just emit directly from svga_set_clip_state()? + */ + for (i = 0; i < svga->curr.clip.nr; i++) { + ret = SVGA3D_SetClipPlane( svga->swc, + i, + svga->curr.clip.ucp[i] ); + if(ret != PIPE_OK) + return ret; + } + + return 0; +} + + +struct svga_tracked_state svga_hw_clip_planes = +{ + "hw viewport state", + SVGA_NEW_CLIP, + emit_clip_planes +}; diff --git a/src/gallium/drivers/svga/svga_state_fs.c b/src/gallium/drivers/svga/svga_state_fs.c new file mode 100644 index 0000000000..6ec38ed3e4 --- /dev/null +++ b/src/gallium/drivers/svga/svga_state_fs.c @@ -0,0 +1,282 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "pipe/p_inlines.h" +#include "pipe/p_defines.h" +#include "util/u_math.h" + +#include "svga_context.h" +#include "svga_state.h" +#include "svga_cmd.h" +#include "svga_tgsi.h" + +#include "svga_hw_reg.h" + + + +static INLINE int compare_fs_keys( const struct svga_fs_compile_key *a, + const struct svga_fs_compile_key *b ) +{ + unsigned keysize = svga_fs_key_size( a ); + return memcmp( a, b, keysize ); +} + + +static struct svga_shader_result *search_fs_key( struct svga_fragment_shader *fs, + const struct svga_fs_compile_key *key ) +{ + struct svga_shader_result *result = fs->base.results; + + assert(key); + + for ( ; result; result = result->next) { + if (compare_fs_keys( key, &result->key.fkey ) == 0) + return result; + } + + return NULL; +} + + +static enum pipe_error compile_fs( struct svga_context *svga, + struct svga_fragment_shader *fs, + const struct svga_fs_compile_key *key, + struct svga_shader_result **out_result ) +{ + struct svga_shader_result *result; + enum pipe_error ret; + + result = svga_translate_fragment_program( fs, key ); + if (result == NULL) { + ret = PIPE_ERROR_OUT_OF_MEMORY; + goto fail; + } + + + ret = SVGA3D_DefineShader(svga->swc, + svga->state.next_fs_id, + SVGA3D_SHADERTYPE_PS, + result->tokens, + result->nr_tokens * sizeof result->tokens[0]); + if (ret) + goto fail; + + *out_result = result; + result->id = svga->state.next_fs_id++; + result->next = fs->base.results; + fs->base.results = result; + return PIPE_OK; + +fail: + if (result) + svga_destroy_shader_result( result ); + return ret; +} + +/* The blend workaround for simulating logicop xor behaviour requires + * that the incoming fragment color be white. This change achieves + * that by hooking up a hard-wired fragment shader that just emits + * color 1,1,1,1 + * + * This is a slightly incomplete solution as it assumes that the + * actual bound shader has no other effects beyond generating a + * fragment color. In particular shaders containing TEXKIL and/or + * depth-write will not have the correct behaviour, nor will those + * expecting to use alphatest. + * + * These are avoidable issues, but they are not much worse than the + * unavoidable ones associated with this technique, so it's not clear + * how much effort should be expended trying to resolve them - the + * ultimate result will still not be correct in most cases. + * + * Shader below was generated with: + * SVGA_DEBUG=tgsi ./mesa/progs/fp/fp-tri white.txt + */ +static int emit_white_fs( struct svga_context *svga ) +{ + int ret; + + /* ps_3_0 + * def c0, 1.000000, 0.000000, 0.000000, 1.000000 + * mov oC0, c0.x + * end + */ + static const unsigned white_tokens[] = { + 0xffff0300, + 0x05000051, + 0xa00f0000, + 0x3f800000, + 0x00000000, + 0x00000000, + 0x3f800000, + 0x02000001, + 0x800f0800, + 0xa0000000, + 0x0000ffff, + }; + + ret = SVGA3D_DefineShader(svga->swc, + svga->state.next_fs_id, + SVGA3D_SHADERTYPE_PS, + white_tokens, + sizeof(white_tokens)); + if (ret) + return ret; + + svga->state.white_fs_id = svga->state.next_fs_id++; + return 0; +} + + +/* SVGA_NEW_TEXTURE_BINDING + * SVGA_NEW_RAST + * SVGA_NEW_NEED_SWTNL + * SVGA_NEW_SAMPLER + */ +static int make_fs_key( const struct svga_context *svga, + struct svga_fs_compile_key *key ) +{ + int i; + int idx = 0; + + memset(key, 0, sizeof *key); + + /* Only need fragment shader fixup for twoside lighting if doing + * hwtnl. Otherwise the draw module does the whole job for us. + * + * SVGA_NEW_SWTNL + */ + if (!svga->state.sw.need_swtnl) { + /* SVGA_NEW_RAST + */ + key->light_twoside = svga->curr.rast->templ.light_twoside; + key->front_cw = (svga->curr.rast->templ.front_winding == + PIPE_WINDING_CW); + } + + + /* XXX: want to limit this to the textures that the shader actually + * refers to. + * + * SVGA_NEW_TEXTURE_BINDING | SVGA_NEW_SAMPLER + */ + for (i = 0; i < svga->curr.num_textures; i++) { + if (svga->curr.texture[i]) { + assert(svga->curr.sampler[i]); + key->tex[i].texture_target = svga->curr.texture[i]->target; + if (!svga->curr.sampler[i]->normalized_coords) { + key->tex[i].width_height_idx = idx++; + key->tex[i].unnormalized = TRUE; + ++key->num_unnormalized_coords; + } + } + } + key->num_textures = svga->curr.num_textures; + + idx = 0; + for (i = 0; i < svga->curr.num_samplers; ++i) { + if (svga->curr.sampler[i]) { + key->tex[i].compare_mode = svga->curr.sampler[i]->compare_mode; + key->tex[i].compare_func = svga->curr.sampler[i]->compare_func; + } + } + + return 0; +} + + + +static int emit_hw_fs( struct svga_context *svga, + unsigned dirty ) +{ + struct svga_shader_result *result = NULL; + unsigned id = SVGA3D_INVALID_ID; + int ret = 0; + + /* SVGA_NEW_BLEND + */ + if (svga->curr.blend->need_white_fragments) { + if (svga->state.white_fs_id == SVGA3D_INVALID_ID) { + ret = emit_white_fs( svga ); + if (ret) + return ret; + } + id = svga->state.white_fs_id; + } + else { + struct svga_fragment_shader *fs = svga->curr.fs; + struct svga_fs_compile_key key; + + /* SVGA_NEW_TEXTURE_BINDING + * SVGA_NEW_RAST + * SVGA_NEW_NEED_SWTNL + * SVGA_NEW_SAMPLER + */ + ret = make_fs_key( svga, &key ); + if (ret) + return ret; + + result = search_fs_key( fs, &key ); + if (!result) { + ret = compile_fs( svga, fs, &key, &result ); + if (ret) + return ret; + } + + assert (result); + id = result->id; + } + + assert(id != SVGA3D_INVALID_ID); + + if (id != svga->state.hw_draw.shader_id[PIPE_SHADER_FRAGMENT]) { + ret = SVGA3D_SetShader(svga->swc, + SVGA3D_SHADERTYPE_PS, + id ); + if (ret) + return ret; + + svga->dirty |= SVGA_NEW_FS_RESULT; + svga->state.hw_draw.shader_id[PIPE_SHADER_FRAGMENT] = id; + svga->state.hw_draw.fs = result; + } + + return 0; +} + +struct svga_tracked_state svga_hw_fs = +{ + "fragment shader (hwtnl)", + (SVGA_NEW_FS | + SVGA_NEW_TEXTURE_BINDING | + SVGA_NEW_NEED_SWTNL | + SVGA_NEW_RAST | + SVGA_NEW_SAMPLER | + SVGA_NEW_BLEND), + emit_hw_fs +}; + + + diff --git a/src/gallium/drivers/svga/svga_state_need_swtnl.c b/src/gallium/drivers/svga/svga_state_need_swtnl.c new file mode 100644 index 0000000000..00201b8091 --- /dev/null +++ b/src/gallium/drivers/svga/svga_state_need_swtnl.c @@ -0,0 +1,200 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "pipe/p_inlines.h" +#include "pipe/p_state.h" + + +#include "svga_context.h" +#include "svga_state.h" +#include "svga_debug.h" +#include "svga_hw_reg.h" + +/*********************************************************************** + */ + +static INLINE SVGA3dDeclType +svga_translate_vertex_format(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_R32_FLOAT: return SVGA3D_DECLTYPE_FLOAT1; + case PIPE_FORMAT_R32G32_FLOAT: return SVGA3D_DECLTYPE_FLOAT2; + case PIPE_FORMAT_R32G32B32_FLOAT: return SVGA3D_DECLTYPE_FLOAT3; + case PIPE_FORMAT_R32G32B32A32_FLOAT: return SVGA3D_DECLTYPE_FLOAT4; + case PIPE_FORMAT_B8G8R8A8_UNORM: return SVGA3D_DECLTYPE_D3DCOLOR; + case PIPE_FORMAT_R8G8B8A8_USCALED: return SVGA3D_DECLTYPE_UBYTE4; + case PIPE_FORMAT_R16G16_SSCALED: return SVGA3D_DECLTYPE_SHORT2; + case PIPE_FORMAT_R16G16B16A16_SSCALED: return SVGA3D_DECLTYPE_SHORT4; + case PIPE_FORMAT_R8G8B8A8_UNORM: return SVGA3D_DECLTYPE_UBYTE4N; + case PIPE_FORMAT_R16G16_SNORM: return SVGA3D_DECLTYPE_SHORT2N; + case PIPE_FORMAT_R16G16B16A16_SNORM: return SVGA3D_DECLTYPE_SHORT4N; + case PIPE_FORMAT_R16G16_UNORM: return SVGA3D_DECLTYPE_USHORT2N; + case PIPE_FORMAT_R16G16B16A16_UNORM: return SVGA3D_DECLTYPE_USHORT4N; + + /* These formats don't exist yet: + * + case PIPE_FORMAT_R10G10B10_USCALED: return SVGA3D_DECLTYPE_UDEC3; + case PIPE_FORMAT_R10G10B10_SNORM: return SVGA3D_DECLTYPE_DEC3N; + case PIPE_FORMAT_R16G16_FLOAT: return SVGA3D_DECLTYPE_FLOAT16_2; + case PIPE_FORMAT_R16G16B16A16_FLOAT: return SVGA3D_DECLTYPE_FLOAT16_4; + */ + + default: + /* There are many formats without hardware support. This case + * will be hit regularly, meaning we'll need swvfetch. + */ + return SVGA3D_DECLTYPE_MAX; + } +} + + +static int update_need_swvfetch( struct svga_context *svga, + unsigned dirty ) +{ + unsigned i; + boolean need_swvfetch = FALSE; + + for (i = 0; i < svga->curr.num_vertex_elements; i++) { + svga->state.sw.ve_format[i] = svga_translate_vertex_format(svga->curr.ve[i].src_format); + if (svga->state.sw.ve_format[i] == SVGA3D_DECLTYPE_MAX) { + need_swvfetch = TRUE; + break; + } + } + + if (need_swvfetch != svga->state.sw.need_swvfetch) { + svga->state.sw.need_swvfetch = need_swvfetch; + svga->dirty |= SVGA_NEW_NEED_SWVFETCH; + } + + return 0; +} + +struct svga_tracked_state svga_update_need_swvfetch = +{ + "update need_swvfetch", + ( SVGA_NEW_VELEMENT ), + update_need_swvfetch +}; + + +/*********************************************************************** + */ + +static int update_need_pipeline( struct svga_context *svga, + unsigned dirty ) +{ + + boolean need_pipeline = FALSE; + + /* SVGA_NEW_RAST, SVGA_NEW_REDUCED_PRIMITIVE + */ + if (svga->curr.rast->need_pipeline & (1 << svga->curr.reduced_prim)) { + SVGA_DBG(DEBUG_SWTNL, "%s: rast need_pipeline (%d) & prim (%x)\n", + __FUNCTION__, + svga->curr.rast->need_pipeline, + (1 << svga->curr.reduced_prim) ); + need_pipeline = TRUE; + } + + /* SVGA_NEW_EDGEFLAGS + */ + if (svga->curr.rast->hw_unfilled != PIPE_POLYGON_MODE_FILL && + svga->curr.reduced_prim == PIPE_PRIM_TRIANGLES && + svga->curr.edgeflags != NULL) { + SVGA_DBG(DEBUG_SWTNL, "%s: edgeflags\n", __FUNCTION__); + need_pipeline = TRUE; + } + + /* SVGA_NEW_CLIP + */ + if (!svga->curr.rast->templ.bypass_vs_clip_and_viewport && + svga->curr.clip.nr) { + SVGA_DBG(DEBUG_SWTNL, "%s: userclip\n", __FUNCTION__); + need_pipeline = TRUE; + } + + if (need_pipeline != svga->state.sw.need_pipeline) { + svga->state.sw.need_pipeline = need_pipeline; + svga->dirty |= SVGA_NEW_NEED_PIPELINE; + } + + return 0; +} + + +struct svga_tracked_state svga_update_need_pipeline = +{ + "need pipeline", + (SVGA_NEW_RAST | + SVGA_NEW_CLIP | + SVGA_NEW_REDUCED_PRIMITIVE), + update_need_pipeline +}; + + +/*********************************************************************** + */ + +static int update_need_swtnl( struct svga_context *svga, + unsigned dirty ) +{ + boolean need_swtnl; + + if (svga->debug.no_swtnl) { + svga->state.sw.need_swvfetch = 0; + svga->state.sw.need_pipeline = 0; + } + + need_swtnl = (svga->state.sw.need_swvfetch || + svga->state.sw.need_pipeline); + + if (svga->debug.force_swtnl) { + need_swtnl = 1; + } + + if (need_swtnl != svga->state.sw.need_swtnl) { + SVGA_DBG(DEBUG_SWTNL|DEBUG_PERF, + "%s need_swvfetch: %s, need_pipeline %s\n", + __FUNCTION__, + svga->state.sw.need_swvfetch ? "true" : "false", + svga->state.sw.need_pipeline ? "true" : "false"); + + svga->state.sw.need_swtnl = need_swtnl; + svga->dirty |= SVGA_NEW_NEED_SWTNL; + svga->swtnl.new_vdecl = TRUE; + } + + return 0; +} + + +struct svga_tracked_state svga_update_need_swtnl = +{ + "need swtnl", + (SVGA_NEW_NEED_PIPELINE | + SVGA_NEW_NEED_SWVFETCH), + update_need_swtnl +}; diff --git a/src/gallium/drivers/svga/svga_state_rss.c b/src/gallium/drivers/svga/svga_state_rss.c new file mode 100644 index 0000000000..8b6803a285 --- /dev/null +++ b/src/gallium/drivers/svga/svga_state_rss.c @@ -0,0 +1,268 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "pipe/p_inlines.h" +#include "pipe/p_defines.h" +#include "util/u_math.h" + +#include "svga_context.h" +#include "svga_state.h" +#include "svga_cmd.h" + +#include "svga_hw_reg.h" + + + +struct rs_queue { + unsigned rs_count; + SVGA3dRenderState rs[SVGA3D_RS_MAX]; +}; + + +#define EMIT_RS(svga, value, token, fail) \ +do { \ + if (svga->state.hw_draw.rs[SVGA3D_RS_##token] != value) { \ + svga_queue_rs( &queue, SVGA3D_RS_##token, value ); \ + svga->state.hw_draw.rs[SVGA3D_RS_##token] = value; \ + } \ +} while (0) + +#define EMIT_RS_FLOAT(svga, fvalue, token, fail) \ +do { \ + unsigned value = fui(fvalue); \ + if (svga->state.hw_draw.rs[SVGA3D_RS_##token] != value) { \ + svga_queue_rs( &queue, SVGA3D_RS_##token, value ); \ + svga->state.hw_draw.rs[SVGA3D_RS_##token] = value; \ + } \ +} while (0) + + +static INLINE void +svga_queue_rs( struct rs_queue *q, + unsigned rss, + unsigned value ) +{ + q->rs[q->rs_count].state = rss; + q->rs[q->rs_count].uintValue = value; + q->rs_count++; +} + + +/* Compare old and new render states and emit differences between them + * to hardware. Simplest implementation would be to emit the whole of + * the "to" state. + */ +static int emit_rss( struct svga_context *svga, + unsigned dirty ) +{ + struct rs_queue queue; + + queue.rs_count = 0; + + if (dirty & SVGA_NEW_BLEND) { + const struct svga_blend_state *curr = svga->curr.blend; + + EMIT_RS( svga, curr->rt[0].writemask, COLORWRITEENABLE, fail ); + EMIT_RS( svga, curr->rt[0].blend_enable, BLENDENABLE, fail ); + + if (curr->rt[0].blend_enable) { + EMIT_RS( svga, curr->rt[0].srcblend, SRCBLEND, fail ); + EMIT_RS( svga, curr->rt[0].dstblend, DSTBLEND, fail ); + EMIT_RS( svga, curr->rt[0].blendeq, BLENDEQUATION, fail ); + + EMIT_RS( svga, curr->rt[0].separate_alpha_blend_enable, + SEPARATEALPHABLENDENABLE, fail ); + + if (curr->rt[0].separate_alpha_blend_enable) { + EMIT_RS( svga, curr->rt[0].srcblend_alpha, SRCBLENDALPHA, fail ); + EMIT_RS( svga, curr->rt[0].dstblend_alpha, DSTBLENDALPHA, fail ); + EMIT_RS( svga, curr->rt[0].blendeq_alpha, BLENDEQUATIONALPHA, fail ); + } + } + } + + + if (dirty & (SVGA_NEW_DEPTH_STENCIL | SVGA_NEW_RAST)) { + const struct svga_depth_stencil_state *curr = svga->curr.depth; + const struct svga_rasterizer_state *rast = svga->curr.rast; + + if (!curr->stencil[0].enabled) + { + /* Stencil disabled + */ + EMIT_RS( svga, FALSE, STENCILENABLE, fail ); + EMIT_RS( svga, FALSE, STENCILENABLE2SIDED, fail ); + } + else if (curr->stencil[0].enabled && !curr->stencil[1].enabled) + { + /* Regular stencil + */ + EMIT_RS( svga, TRUE, STENCILENABLE, fail ); + EMIT_RS( svga, FALSE, STENCILENABLE2SIDED, fail ); + + EMIT_RS( svga, curr->stencil[0].func, STENCILFUNC, fail ); + EMIT_RS( svga, curr->stencil[0].fail, STENCILFAIL, fail ); + EMIT_RS( svga, curr->stencil[0].zfail, STENCILZFAIL, fail ); + EMIT_RS( svga, curr->stencil[0].pass, STENCILPASS, fail ); + + EMIT_RS( svga, curr->stencil_ref, STENCILREF, fail ); + EMIT_RS( svga, curr->stencil_mask, STENCILMASK, fail ); + EMIT_RS( svga, curr->stencil_writemask, STENCILWRITEMASK, fail ); + } + else + { + int cw, ccw; + + /* Hardware frontwinding is always CW, so if ours is also CW, + * then our definition of front face agrees with hardware. + * Otherwise need to flip. + */ + if (rast->templ.front_winding == PIPE_WINDING_CW) { + cw = 0; + ccw = 1; + } + else { + cw = 1; + ccw = 0; + } + + /* Twoside stencil + */ + EMIT_RS( svga, TRUE, STENCILENABLE, fail ); + EMIT_RS( svga, TRUE, STENCILENABLE2SIDED, fail ); + + EMIT_RS( svga, curr->stencil[cw].func, STENCILFUNC, fail ); + EMIT_RS( svga, curr->stencil[cw].fail, STENCILFAIL, fail ); + EMIT_RS( svga, curr->stencil[cw].zfail, STENCILZFAIL, fail ); + EMIT_RS( svga, curr->stencil[cw].pass, STENCILPASS, fail ); + + EMIT_RS( svga, curr->stencil[ccw].func, CCWSTENCILFUNC, fail ); + EMIT_RS( svga, curr->stencil[ccw].fail, CCWSTENCILFAIL, fail ); + EMIT_RS( svga, curr->stencil[ccw].zfail, CCWSTENCILZFAIL, fail ); + EMIT_RS( svga, curr->stencil[ccw].pass, CCWSTENCILPASS, fail ); + + EMIT_RS( svga, curr->stencil_ref, STENCILREF, fail ); + EMIT_RS( svga, curr->stencil_mask, STENCILMASK, fail ); + EMIT_RS( svga, curr->stencil_writemask, STENCILWRITEMASK, fail ); + } + + EMIT_RS( svga, curr->zenable, ZENABLE, fail ); + if (curr->zenable) { + EMIT_RS( svga, curr->zfunc, ZFUNC, fail ); + EMIT_RS( svga, curr->zwriteenable, ZWRITEENABLE, fail ); + } + + EMIT_RS( svga, curr->alphatestenable, ALPHATESTENABLE, fail ); + if (curr->alphatestenable) { + EMIT_RS( svga, curr->alphafunc, ALPHAFUNC, fail ); + EMIT_RS_FLOAT( svga, curr->alpharef, ALPHAREF, fail ); + } + } + + + if (dirty & SVGA_NEW_RAST) + { + const struct svga_rasterizer_state *curr = svga->curr.rast; + + /* Shademode: still need to rearrange index list to move + * flat-shading PV first vertex. + */ + EMIT_RS( svga, curr->shademode, SHADEMODE, fail ); + EMIT_RS( svga, curr->cullmode, CULLMODE, fail ); + EMIT_RS( svga, curr->scissortestenable, SCISSORTESTENABLE, fail ); + EMIT_RS( svga, curr->multisampleantialias, MULTISAMPLEANTIALIAS, fail ); + EMIT_RS( svga, curr->lastpixel, LASTPIXEL, fail ); + EMIT_RS( svga, curr->linepattern, LINEPATTERN, fail ); + EMIT_RS_FLOAT( svga, curr->pointsize, POINTSIZE, fail ); + EMIT_RS_FLOAT( svga, curr->pointsize_min, POINTSIZEMIN, fail ); + EMIT_RS_FLOAT( svga, curr->pointsize_max, POINTSIZEMAX, fail ); + } + + if (dirty & (SVGA_NEW_RAST | SVGA_NEW_FRAME_BUFFER | SVGA_NEW_NEED_PIPELINE)) + { + const struct svga_rasterizer_state *curr = svga->curr.rast; + float slope = 0.0; + float bias = 0.0; + + /* Need to modify depth bias according to bound depthbuffer + * format. Don't do hardware depthbias while the software + * pipeline is active. + */ + if (!svga->state.sw.need_pipeline && + svga->curr.framebuffer.zsbuf) + { + slope = curr->slopescaledepthbias; + bias = svga->curr.depthscale * curr->depthbias; + } + + EMIT_RS_FLOAT( svga, slope, SLOPESCALEDEPTHBIAS, fail ); + EMIT_RS_FLOAT( svga, bias, DEPTHBIAS, fail ); + } + + + if (queue.rs_count) { + SVGA3dRenderState *rs; + + if (SVGA3D_BeginSetRenderState( svga->swc, + &rs, + queue.rs_count ) != PIPE_OK) + goto fail; + + memcpy( rs, + queue.rs, + queue.rs_count * sizeof queue.rs[0]); + + SVGA_FIFOCommitAll( svga->swc ); + } + + /* Also blend color: + */ + + return 0; + +fail: + /* XXX: need to poison cached hardware state on failure to ensure + * dirty state gets re-emitted. Fix this by re-instating partial + * FIFOCommit command and only updating cached hw state once the + * initial allocation has succeeded. + */ + memset(svga->state.hw_draw.rs, 0xcd, sizeof(svga->state.hw_draw.rs)); + + return PIPE_ERROR_OUT_OF_MEMORY; +} + + +struct svga_tracked_state svga_hw_rss = +{ + "hw rss state", + + (SVGA_NEW_BLEND | + SVGA_NEW_DEPTH_STENCIL | + SVGA_NEW_RAST | + SVGA_NEW_FRAME_BUFFER | + SVGA_NEW_NEED_PIPELINE), + + emit_rss +}; diff --git a/src/gallium/drivers/svga/svga_state_tss.c b/src/gallium/drivers/svga/svga_state_tss.c new file mode 100644 index 0000000000..b313794520 --- /dev/null +++ b/src/gallium/drivers/svga/svga_state_tss.c @@ -0,0 +1,279 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "pipe/p_inlines.h" +#include "pipe/p_defines.h" +#include "util/u_math.h" + +#include "svga_screen_texture.h" +#include "svga_winsys.h" +#include "svga_context.h" +#include "svga_state.h" +#include "svga_cmd.h" + +#include "svga_hw_reg.h" + + +void svga_cleanup_tss_binding(struct svga_context *svga) +{ + int i; + unsigned count = MAX2( svga->curr.num_textures, + svga->state.hw_draw.num_views ); + + for (i = 0; i < count; i++) { + struct svga_hw_view_state *view = &svga->state.hw_draw.views[i]; + + svga_sampler_view_reference(&view->v, NULL); + pipe_texture_reference( &svga->curr.texture[i], NULL ); + pipe_texture_reference( &view->texture, NULL ); + + view->dirty = 1; + } +} + + +static int +update_tss_binding(struct svga_context *svga, + unsigned dirty ) +{ + unsigned i; + unsigned count = MAX2( svga->curr.num_textures, + svga->state.hw_draw.num_views ); + unsigned min_lod; + unsigned max_lod; + + + struct { + struct { + unsigned unit; + struct svga_hw_view_state *view; + } bind[PIPE_MAX_SAMPLERS]; + + unsigned bind_count; + } queue; + + queue.bind_count = 0; + + for (i = 0; i < count; i++) { + const struct svga_sampler_state *s = svga->curr.sampler[i]; + struct svga_hw_view_state *view = &svga->state.hw_draw.views[i]; + + /* get min max lod */ + if (svga->curr.texture[i]) { + min_lod = MAX2(s->view_min_lod, 0); + max_lod = MIN2(s->view_max_lod, svga->curr.texture[i]->last_level); + } else { + min_lod = 0; + max_lod = 0; + } + + if (view->texture != svga->curr.texture[i] || + view->min_lod != min_lod || + view->max_lod != max_lod) { + + svga_sampler_view_reference(&view->v, NULL); + pipe_texture_reference( &view->texture, svga->curr.texture[i] ); + + view->dirty = TRUE; + view->min_lod = min_lod; + view->max_lod = max_lod; + + if (svga->curr.texture[i]) + view->v = svga_get_tex_sampler_view(&svga->pipe, + svga->curr.texture[i], + min_lod, + max_lod); + } + + if (view->dirty) { + queue.bind[queue.bind_count].unit = i; + queue.bind[queue.bind_count].view = view; + queue.bind_count++; + } + else if (view->v) { + svga_validate_sampler_view(svga, view->v); + } + } + + svga->state.hw_draw.num_views = svga->curr.num_textures; + + if (queue.bind_count) { + SVGA3dTextureState *ts; + + if (SVGA3D_BeginSetTextureState( svga->swc, + &ts, + queue.bind_count ) != PIPE_OK) + goto fail; + + for (i = 0; i < queue.bind_count; i++) { + ts[i].stage = queue.bind[i].unit; + ts[i].name = SVGA3D_TS_BIND_TEXTURE; + + if (queue.bind[i].view->v) { + svga->swc->surface_relocation(svga->swc, + &ts[i].value, + queue.bind[i].view->v->handle, + PIPE_BUFFER_USAGE_GPU_READ); + } + else { + ts[i].value = SVGA3D_INVALID_ID; + } + + queue.bind[i].view->dirty = FALSE; + } + + SVGA_FIFOCommitAll( svga->swc ); + } + + return 0; + +fail: + return PIPE_ERROR_OUT_OF_MEMORY; +} + + +struct svga_tracked_state svga_hw_tss_binding = { + "texture binding emit", + SVGA_NEW_TEXTURE_BINDING | + SVGA_NEW_SAMPLER, + update_tss_binding +}; + + +/*********************************************************************** + */ + +struct ts_queue { + unsigned ts_count; + SVGA3dTextureState ts[PIPE_MAX_SAMPLERS*SVGA3D_TS_MAX]; +}; + + +#define EMIT_TS(svga, unit, val, token, fail) \ +do { \ + if (svga->state.hw_draw.ts[unit][SVGA3D_TS_##token] != val) { \ + svga_queue_tss( &queue, unit, SVGA3D_TS_##token, val ); \ + svga->state.hw_draw.ts[unit][SVGA3D_TS_##token] = val; \ + } \ +} while (0) + +#define EMIT_TS_FLOAT(svga, unit, fvalue, token, fail) \ +do { \ + unsigned val = fui(fvalue); \ + if (svga->state.hw_draw.ts[unit][SVGA3D_TS_##token] != val) { \ + svga_queue_tss( &queue, unit, SVGA3D_TS_##token, val ); \ + svga->state.hw_draw.ts[unit][SVGA3D_TS_##token] = val; \ + } \ +} while (0) + + +static INLINE void +svga_queue_tss( struct ts_queue *q, + unsigned unit, + unsigned tss, + unsigned value ) +{ + assert(q->ts_count < sizeof(q->ts)/sizeof(q->ts[0])); + q->ts[q->ts_count].stage = unit; + q->ts[q->ts_count].name = tss; + q->ts[q->ts_count].value = value; + q->ts_count++; +} + + +static int +update_tss(struct svga_context *svga, + unsigned dirty ) +{ + unsigned i; + struct ts_queue queue; + + queue.ts_count = 0; + for (i = 0; i < svga->curr.num_samplers; i++) { + if (svga->curr.sampler[i]) { + const struct svga_sampler_state *curr = svga->curr.sampler[i]; + + EMIT_TS(svga, i, curr->mipfilter, MIPFILTER, fail); + EMIT_TS(svga, i, curr->min_lod, TEXTURE_MIPMAP_LEVEL, fail); + EMIT_TS(svga, i, curr->magfilter, MAGFILTER, fail); + EMIT_TS(svga, i, curr->minfilter, MINFILTER, fail); + EMIT_TS(svga, i, curr->aniso_level, TEXTURE_ANISOTROPIC_LEVEL, fail); + EMIT_TS_FLOAT(svga, i, curr->lod_bias, TEXTURE_LOD_BIAS, fail); + EMIT_TS(svga, i, curr->addressu, ADDRESSU, fail); + EMIT_TS(svga, i, curr->addressw, ADDRESSW, fail); + EMIT_TS(svga, i, curr->bordercolor, BORDERCOLOR, fail); + // TEXCOORDINDEX -- hopefully not needed + + if (svga->curr.tex_flags.flag_1d & (1 << i)) { + debug_printf("wrap 1d tex %d\n", i); + EMIT_TS(svga, i, SVGA3D_TEX_ADDRESS_WRAP, ADDRESSV, fail); + } + else + EMIT_TS(svga, i, curr->addressv, ADDRESSV, fail); + + if (svga->curr.tex_flags.flag_srgb & (1 << i)) + EMIT_TS_FLOAT(svga, i, 2.2f, GAMMA, fail); + else + EMIT_TS_FLOAT(svga, i, 1.0f, GAMMA, fail); + + } + } + + if (queue.ts_count) { + SVGA3dTextureState *ts; + + if (SVGA3D_BeginSetTextureState( svga->swc, + &ts, + queue.ts_count ) != PIPE_OK) + goto fail; + + memcpy( ts, + queue.ts, + queue.ts_count * sizeof queue.ts[0]); + + SVGA_FIFOCommitAll( svga->swc ); + } + + return 0; + +fail: + /* XXX: need to poison cached hardware state on failure to ensure + * dirty state gets re-emitted. Fix this by re-instating partial + * FIFOCommit command and only updating cached hw state once the + * initial allocation has succeeded. + */ + memset(svga->state.hw_draw.ts, 0xcd, sizeof(svga->state.hw_draw.ts)); + + return PIPE_ERROR_OUT_OF_MEMORY; +} + + +struct svga_tracked_state svga_hw_tss = { + "texture state emit", + (SVGA_NEW_SAMPLER | + SVGA_NEW_TEXTURE_FLAGS), + update_tss +}; + diff --git a/src/gallium/drivers/svga/svga_state_vdecl.c b/src/gallium/drivers/svga/svga_state_vdecl.c new file mode 100644 index 0000000000..c534308f50 --- /dev/null +++ b/src/gallium/drivers/svga/svga_state_vdecl.c @@ -0,0 +1,182 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "pipe/p_inlines.h" +#include "pipe/p_defines.h" +#include "util/u_math.h" +#include "util/u_upload_mgr.h" + +#include "svga_context.h" +#include "svga_state.h" +#include "svga_draw.h" +#include "svga_tgsi.h" +#include "svga_screen.h" +#include "svga_screen_buffer.h" + +#include "svga_hw_reg.h" + + +static int +upload_user_buffers( struct svga_context *svga ) +{ + enum pipe_error ret = PIPE_OK; + int i; + int nr; + + if (0) + debug_printf("%s: %d\n", __FUNCTION__, svga->curr.num_vertex_buffers); + + nr = svga->curr.num_vertex_buffers; + + for (i = 0; i < nr; i++) + { + if (svga_buffer_is_user_buffer(svga->curr.vb[i].buffer)) + { + struct pipe_buffer *upload_buffer = NULL; + unsigned offset = /*svga->curr.vb[i].buffer_offset*/ 0; + unsigned size = svga->curr.vb[i].buffer->size /*- offset*/; + unsigned upload_offset; + + ret = u_upload_buffer( svga->upload_vb, + offset, + size, + svga->curr.vb[i].buffer, + &upload_offset, + &upload_buffer ); + if (ret) + return ret; + + if (0) + debug_printf("%s: %d: orig buf %p upl buf %p ofs %d sz %d\n", + __FUNCTION__, + i, + svga->curr.vb[i].buffer, + upload_buffer, upload_offset, size); + + /* Make sure we release the old buffer and end up with the + * correct refcount on the uploaded buffer. + */ + pipe_buffer_reference( &svga->curr.vb[i].buffer, NULL ); + svga->curr.vb[i].buffer = upload_buffer; + svga->curr.vb[i].buffer_offset = upload_offset; + } + } + + if (0) + debug_printf("%s: DONE\n", __FUNCTION__); + + return ret; +} + + +/*********************************************************************** + */ + + +static int emit_hw_vs_vdecl( struct svga_context *svga, + unsigned dirty ) +{ + const struct pipe_vertex_element *ve = svga->curr.ve; + SVGA3dVertexDecl decl; + unsigned i; + + assert(svga->curr.num_vertex_elements >= + svga->curr.vs->base.info.file_count[TGSI_FILE_INPUT]); + + svga_hwtnl_reset_vdecl( svga->hwtnl, + svga->curr.num_vertex_elements ); + + for (i = 0; i < svga->curr.num_vertex_elements; i++) { + const struct pipe_vertex_buffer *vb = &svga->curr.vb[ve[i].vertex_buffer_index]; + unsigned usage, index; + + + svga_generate_vdecl_semantics( i, &usage, &index ); + + /* SVGA_NEW_VELEMENT + */ + decl.identity.type = svga->state.sw.ve_format[i]; + decl.identity.method = SVGA3D_DECLMETHOD_DEFAULT; + decl.identity.usage = usage; + decl.identity.usageIndex = index; + decl.array.stride = vb->stride; + decl.array.offset = (vb->buffer_offset + + ve[i].src_offset); + + svga_hwtnl_vdecl( svga->hwtnl, + i, + &decl, + vb->buffer ); + } + + return 0; +} + + +static int emit_hw_vdecl( struct svga_context *svga, + unsigned dirty ) +{ + int ret = 0; + + /* SVGA_NEW_NEED_SWTNL + */ + if (svga->state.sw.need_swtnl) + return 0; /* Do not emit during swtnl */ + + /* If we get to here, we know that we're going to draw. Upload + * userbuffers now and try to combine multiple userbuffers from + * multiple draw calls into a single host buffer for performance. + */ + if (svga->curr.any_user_vertex_buffers && + SVGA_COMBINE_USERBUFFERS) + { + ret = upload_user_buffers( svga ); + if (ret) + return ret; + + svga->curr.any_user_vertex_buffers = FALSE; + } + + return emit_hw_vs_vdecl( svga, dirty ); +} + + +struct svga_tracked_state svga_hw_vdecl = +{ + "hw vertex decl state (hwtnl version)", + ( SVGA_NEW_NEED_SWTNL | + SVGA_NEW_VELEMENT | + SVGA_NEW_VBUFFER | + SVGA_NEW_RAST | + SVGA_NEW_FS | + SVGA_NEW_VS ), + emit_hw_vdecl +}; + + + + + + diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c new file mode 100644 index 0000000000..a947745732 --- /dev/null +++ b/src/gallium/drivers/svga/svga_state_vs.c @@ -0,0 +1,239 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "pipe/p_inlines.h" +#include "pipe/p_defines.h" +#include "util/u_math.h" +#include "translate/translate.h" + +#include "svga_context.h" +#include "svga_state.h" +#include "svga_cmd.h" +#include "svga_tgsi.h" + +#include "svga_hw_reg.h" + +/*********************************************************************** + */ + + +static INLINE int compare_vs_keys( const struct svga_vs_compile_key *a, + const struct svga_vs_compile_key *b ) +{ + unsigned keysize = svga_vs_key_size( a ); + return memcmp( a, b, keysize ); +} + + +static struct svga_shader_result *search_vs_key( struct svga_vertex_shader *vs, + const struct svga_vs_compile_key *key ) +{ + struct svga_shader_result *result = vs->base.results; + + assert(key); + + for ( ; result; result = result->next) { + if (compare_vs_keys( key, &result->key.vkey ) == 0) + return result; + } + + return NULL; +} + + +static enum pipe_error compile_vs( struct svga_context *svga, + struct svga_vertex_shader *vs, + const struct svga_vs_compile_key *key, + struct svga_shader_result **out_result ) +{ + struct svga_shader_result *result; + enum pipe_error ret = PIPE_OK; + + result = svga_translate_vertex_program( vs, key ); + if (result == NULL) { + ret = PIPE_ERROR_OUT_OF_MEMORY; + goto fail; + } + + ret = SVGA3D_DefineShader(svga->swc, + svga->state.next_vs_id, + SVGA3D_SHADERTYPE_VS, + result->tokens, + result->nr_tokens * sizeof result->tokens[0]); + if (ret) + goto fail; + + *out_result = result; + result->id = svga->state.next_vs_id++; + result->next = vs->base.results; + vs->base.results = result; + return PIPE_OK; + +fail: + if (result) + svga_destroy_shader_result( result ); + return ret; +} + +/* SVGA_NEW_PRESCALE, SVGA_NEW_RAST, SVGA_NEW_ZERO_STRIDE + */ +static int make_vs_key( struct svga_context *svga, + struct svga_vs_compile_key *key ) +{ + memset(key, 0, sizeof *key); + key->need_prescale = svga->state.hw_clear.prescale.enabled; + key->allow_psiz = svga->curr.rast->templ.point_size_per_vertex; + key->zero_stride_vertex_elements = + svga->curr.zero_stride_vertex_elements; + key->num_zero_stride_vertex_elements = + svga->curr.num_zero_stride_vertex_elements; + return 0; +} + + + +static int emit_hw_vs( struct svga_context *svga, + unsigned dirty ) +{ + struct svga_shader_result *result = NULL; + unsigned id = SVGA3D_INVALID_ID; + int ret = 0; + + /* SVGA_NEW_NEED_SWTNL */ + if (!svga->state.sw.need_swtnl) { + struct svga_vertex_shader *vs = svga->curr.vs; + struct svga_vs_compile_key key; + + ret = make_vs_key( svga, &key ); + if (ret) + return ret; + + result = search_vs_key( vs, &key ); + if (!result) { + ret = compile_vs( svga, vs, &key, &result ); + if (ret) + return ret; + } + + assert (result); + id = result->id; + } + + if (id != svga->state.hw_draw.shader_id[PIPE_SHADER_VERTEX]) { + ret = SVGA3D_SetShader(svga->swc, + SVGA3D_SHADERTYPE_VS, + id ); + if (ret) + return ret; + + svga->dirty |= SVGA_NEW_VS_RESULT; + svga->state.hw_draw.shader_id[PIPE_SHADER_VERTEX] = id; + svga->state.hw_draw.vs = result; + } + + return 0; +} + +struct svga_tracked_state svga_hw_vs = +{ + "vertex shader (hwtnl)", + (SVGA_NEW_VS | + SVGA_NEW_PRESCALE | + SVGA_NEW_NEED_SWTNL | + SVGA_NEW_ZERO_STRIDE), + emit_hw_vs +}; + + +/*********************************************************************** + */ +static int update_zero_stride( struct svga_context *svga, + unsigned dirty ) +{ + unsigned i; + + svga->curr.zero_stride_vertex_elements = 0; + svga->curr.num_zero_stride_vertex_elements = 0; + + for (i = 0; i < svga->curr.num_vertex_elements; i++) { + const struct pipe_vertex_element *vel = &svga->curr.ve[i]; + const struct pipe_vertex_buffer *vbuffer = &svga->curr.vb[ + vel->vertex_buffer_index]; + if (vbuffer->stride == 0) { + unsigned const_idx = + svga->curr.num_zero_stride_vertex_elements; + struct translate *translate; + struct translate_key key; + void *mapped_buffer; + + svga->curr.zero_stride_vertex_elements |= (1 << i); + ++svga->curr.num_zero_stride_vertex_elements; + + key.output_stride = 4 * sizeof(float); + key.nr_elements = 1; + key.element[0].input_format = vel->src_format; + key.element[0].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + key.element[0].input_buffer = vel->vertex_buffer_index; + key.element[0].input_offset = vel->src_offset; + key.element[0].output_offset = const_idx * 4 * sizeof(float); + + translate_key_sanitize(&key); + /* translate_generic_create is technically private but + * we don't want to code-generate, just want generic + * translation */ + translate = translate_generic_create(&key); + + assert(vel->src_offset == 0); + + mapped_buffer = pipe_buffer_map_range(svga->pipe.screen, + vbuffer->buffer, + vel->src_offset, + pf_get_size(vel->src_format), + PIPE_BUFFER_USAGE_CPU_READ); + translate->set_buffer(translate, vel->vertex_buffer_index, + mapped_buffer, + vbuffer->stride); + translate->run(translate, 0, 1, + svga->curr.zero_stride_constants); + + pipe_buffer_unmap(svga->pipe.screen, + vbuffer->buffer); + translate->release(translate); + } + } + + if (svga->curr.num_zero_stride_vertex_elements) + svga->dirty |= SVGA_NEW_ZERO_STRIDE; + + return 0; +} + +struct svga_tracked_state svga_hw_update_zero_stride = +{ + "update zero_stride", + ( SVGA_NEW_VELEMENT | + SVGA_NEW_VBUFFER ), + update_zero_stride +}; diff --git a/src/gallium/drivers/svga/svga_swtnl.h b/src/gallium/drivers/svga/svga_swtnl.h new file mode 100644 index 0000000000..4882f26b17 --- /dev/null +++ b/src/gallium/drivers/svga/svga_swtnl.h @@ -0,0 +1,52 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#ifndef SVGA_SWTNL_H +#define SVGA_SWTNL_H + +#include "pipe/p_compiler.h" + +struct svga_context; +struct pipe_context; +struct pipe_buffer; +struct vbuf_render; + + +boolean svga_init_swtnl( struct svga_context *svga ); +void svga_destroy_swtnl( struct svga_context *svga ); + + +enum pipe_error +svga_swtnl_draw_range_elements(struct svga_context *svga, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned min_index, + unsigned max_index, + unsigned prim, + unsigned start, + unsigned count); + + +#endif diff --git a/src/gallium/drivers/svga/svga_swtnl_backend.c b/src/gallium/drivers/svga/svga_swtnl_backend.c new file mode 100644 index 0000000000..b4f757a47a --- /dev/null +++ b/src/gallium/drivers/svga/svga_swtnl_backend.c @@ -0,0 +1,349 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "draw/draw_vbuf.h" +#include "draw/draw_context.h" +#include "draw/draw_vertex.h" + +#include "util/u_debug.h" +#include "pipe/p_inlines.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_simple_shaders.h" + +#include "svga_context.h" +#include "svga_state.h" +#include "svga_swtnl.h" + +#include "svga_types.h" +#include "svga_reg.h" +#include "svga3d_reg.h" +#include "svga_draw.h" +#include "svga_swtnl_private.h" + + +static const struct vertex_info * +svga_vbuf_render_get_vertex_info( struct vbuf_render *render ) +{ + struct svga_vbuf_render *svga_render = svga_vbuf_render(render); + struct svga_context *svga = svga_render->svga; + + svga_swtnl_update_vdecl(svga); + + return &svga_render->vertex_info; +} + + +static boolean +svga_vbuf_render_allocate_vertices( struct vbuf_render *render, + ushort vertex_size, + ushort nr_vertices ) +{ + struct svga_vbuf_render *svga_render = svga_vbuf_render(render); + struct svga_context *svga = svga_render->svga; + struct pipe_screen *screen = svga->pipe.screen; + size_t size = (size_t)nr_vertices * (size_t)vertex_size; + boolean new_vbuf = FALSE; + boolean new_ibuf = FALSE; + + if (svga_render->vertex_size != vertex_size) + svga->swtnl.new_vdecl = TRUE; + svga_render->vertex_size = (size_t)vertex_size; + + if (svga->swtnl.new_vbuf) + new_ibuf = new_vbuf = TRUE; + svga->swtnl.new_vbuf = FALSE; + + if (svga_render->vbuf_size < svga_render->vbuf_offset + svga_render->vbuf_used + size) + new_vbuf = TRUE; + + if (new_vbuf) + pipe_buffer_reference(&svga_render->vbuf, NULL); + if (new_ibuf) + pipe_buffer_reference(&svga_render->ibuf, NULL); + + if (!svga_render->vbuf) { + svga_render->vbuf_size = MAX2(size, svga_render->vbuf_alloc_size); + svga_render->vbuf = pipe_buffer_create(screen, + 0, + PIPE_BUFFER_USAGE_VERTEX, + svga_render->vbuf_size); + if(!svga_render->vbuf) { + svga_context_flush(svga, NULL); + svga_render->vbuf = pipe_buffer_create(screen, + 0, + PIPE_BUFFER_USAGE_VERTEX, + svga_render->vbuf_size); + assert(svga_render->vbuf); + } + + svga->swtnl.new_vdecl = TRUE; + svga_render->vbuf_offset = 0; + } else { + svga_render->vbuf_offset += svga_render->vbuf_used; + } + + svga_render->vbuf_used = 0; + + if (svga->swtnl.new_vdecl) + svga_render->vdecl_offset = svga_render->vbuf_offset; + + return TRUE; +} + +static void * +svga_vbuf_render_map_vertices( struct vbuf_render *render ) +{ + struct svga_vbuf_render *svga_render = svga_vbuf_render(render); + struct svga_context *svga = svga_render->svga; + struct pipe_screen *screen = svga->pipe.screen; + + char *ptr = (char*)pipe_buffer_map(screen, + svga_render->vbuf, + PIPE_BUFFER_USAGE_CPU_WRITE | + PIPE_BUFFER_USAGE_FLUSH_EXPLICIT); + return ptr + svga_render->vbuf_offset; +} + +static void +svga_vbuf_render_unmap_vertices( struct vbuf_render *render, + ushort min_index, + ushort max_index ) +{ + struct svga_vbuf_render *svga_render = svga_vbuf_render(render); + struct svga_context *svga = svga_render->svga; + struct pipe_screen *screen = svga->pipe.screen; + unsigned offset, length; + size_t used = svga_render->vertex_size * ((size_t)max_index + 1); + + offset = svga_render->vbuf_offset + svga_render->vertex_size * min_index; + length = svga_render->vertex_size * (max_index + 1 - min_index); + pipe_buffer_flush_mapped_range(screen, svga_render->vbuf, offset, length); + pipe_buffer_unmap(screen, svga_render->vbuf); + svga_render->min_index = min_index; + svga_render->max_index = max_index; + svga_render->vbuf_used = MAX2(svga_render->vbuf_used, used); +} + +static boolean +svga_vbuf_render_set_primitive( struct vbuf_render *render, + unsigned prim ) +{ + struct svga_vbuf_render *svga_render = svga_vbuf_render(render); + svga_render->prim = prim; + + return TRUE; +} + +static void +svga_vbuf_sumbit_state( struct svga_vbuf_render *svga_render ) +{ + struct svga_context *svga = svga_render->svga; + SVGA3dVertexDecl vdecl[PIPE_MAX_ATTRIBS]; + enum pipe_error ret; + int i; + + /* if the vdecl or vbuf hasn't changed do nothing */ + if (!svga->swtnl.new_vdecl) + return; + + memcpy(vdecl, svga_render->vdecl, sizeof(vdecl)); + + /* flush the hw state */ + ret = svga_hwtnl_flush(svga->hwtnl); + if (ret) { + svga_context_flush(svga, NULL); + ret = svga_hwtnl_flush(svga->hwtnl); + /* if we hit this path we might become synced with hw */ + svga->swtnl.new_vbuf = TRUE; + assert(ret == 0); + } + + svga_hwtnl_reset_vdecl(svga->hwtnl, svga_render->vdecl_count); + + for (i = 0; i < svga_render->vdecl_count; i++) { + vdecl[i].array.offset += svga_render->vdecl_offset; + + svga_hwtnl_vdecl( svga->hwtnl, + i, + &vdecl[i], + svga_render->vbuf ); + } + + /* We have already taken care of flatshading, so let the hwtnl + * module use whatever is most convenient: + */ + if (svga->state.sw.need_pipeline) { + svga_hwtnl_set_flatshade(svga->hwtnl, FALSE, FALSE); + svga_hwtnl_set_unfilled(svga->hwtnl, PIPE_POLYGON_MODE_FILL); + } + else { + svga_hwtnl_set_flatshade( svga->hwtnl, + svga->curr.rast->templ.flatshade, + svga->curr.rast->templ.flatshade_first ); + + svga_hwtnl_set_unfilled( svga->hwtnl, + svga->curr.rast->hw_unfilled ); + } + + svga->swtnl.new_vdecl = FALSE; +} + +static void +svga_vbuf_render_draw_arrays( struct vbuf_render *render, + unsigned start, + uint nr ) +{ + struct svga_vbuf_render *svga_render = svga_vbuf_render(render); + struct svga_context *svga = svga_render->svga; + unsigned bias = (svga_render->vbuf_offset - svga_render->vdecl_offset) / svga_render->vertex_size; + enum pipe_error ret = 0; + + svga_vbuf_sumbit_state(svga_render); + + /* Need to call update_state() again as the draw module may have + * altered some of our state behind our backs. Testcase: + * redbook/polys.c + */ + svga_update_state_retry( svga, SVGA_STATE_HW_DRAW ); + + ret = svga_hwtnl_draw_arrays(svga->hwtnl, svga_render->prim, start + bias, nr); + if (ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = svga_hwtnl_draw_arrays(svga->hwtnl, svga_render->prim, start + bias, nr); + svga->swtnl.new_vbuf = TRUE; + assert(ret == PIPE_OK); + } +} + + +static void +svga_vbuf_render_draw( struct vbuf_render *render, + const ushort *indices, + uint nr_indices) +{ + struct svga_vbuf_render *svga_render = svga_vbuf_render(render); + struct svga_context *svga = svga_render->svga; + struct pipe_screen *screen = svga->pipe.screen; + unsigned bias = (svga_render->vbuf_offset - svga_render->vdecl_offset) / svga_render->vertex_size; + boolean ret; + size_t size = 2 * nr_indices; + + assert(( svga_render->vbuf_offset - svga_render->vdecl_offset) % svga_render->vertex_size == 0); + + if (svga_render->ibuf_size < svga_render->ibuf_offset + size) + pipe_buffer_reference(&svga_render->ibuf, NULL); + + if (!svga_render->ibuf) { + svga_render->ibuf_size = MAX2(size, svga_render->ibuf_alloc_size); + svga_render->ibuf = pipe_buffer_create(screen, + 0, + PIPE_BUFFER_USAGE_VERTEX, + svga_render->ibuf_size); + svga_render->ibuf_offset = 0; + } + + pipe_buffer_write(screen, svga_render->ibuf, + svga_render->ibuf_offset, 2 * nr_indices, indices); + + + /* off to hardware */ + svga_vbuf_sumbit_state(svga_render); + + /* Need to call update_state() again as the draw module may have + * altered some of our state behind our backs. Testcase: + * redbook/polys.c + */ + svga_update_state_retry( svga, SVGA_STATE_HW_DRAW ); + + ret = svga_hwtnl_draw_range_elements(svga->hwtnl, + svga_render->ibuf, + 2, + svga_render->min_index, + svga_render->max_index, + svga_render->prim, + svga_render->ibuf_offset / 2, nr_indices, bias); + if(ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = svga_hwtnl_draw_range_elements(svga->hwtnl, + svga_render->ibuf, + 2, + svga_render->min_index, + svga_render->max_index, + svga_render->prim, + svga_render->ibuf_offset / 2, nr_indices, bias); + svga->swtnl.new_vbuf = TRUE; + assert(ret == PIPE_OK); + } + + svga_render->ibuf_offset += size; +} + + +static void +svga_vbuf_render_release_vertices( struct vbuf_render *render ) +{ + +} + + +static void +svga_vbuf_render_destroy( struct vbuf_render *render ) +{ + struct svga_vbuf_render *svga_render = svga_vbuf_render(render); + + pipe_buffer_reference(&svga_render->vbuf, NULL); + pipe_buffer_reference(&svga_render->ibuf, NULL); + FREE(svga_render); +} + + +/** + * Create a new primitive render. + */ +struct vbuf_render * +svga_vbuf_render_create( struct svga_context *svga ) +{ + struct svga_vbuf_render *svga_render = CALLOC_STRUCT(svga_vbuf_render); + + svga_render->svga = svga; + svga_render->ibuf_size = 0; + svga_render->vbuf_size = 0; + svga_render->ibuf_alloc_size = 4*1024; + svga_render->vbuf_alloc_size = 64*1024; + svga_render->base.max_vertex_buffer_bytes = 64*1024/10; + svga_render->base.max_indices = 65536; + svga_render->base.get_vertex_info = svga_vbuf_render_get_vertex_info; + svga_render->base.allocate_vertices = svga_vbuf_render_allocate_vertices; + svga_render->base.map_vertices = svga_vbuf_render_map_vertices; + svga_render->base.unmap_vertices = svga_vbuf_render_unmap_vertices; + svga_render->base.set_primitive = svga_vbuf_render_set_primitive; + svga_render->base.draw = svga_vbuf_render_draw; + svga_render->base.draw_arrays = svga_vbuf_render_draw_arrays; + svga_render->base.release_vertices = svga_vbuf_render_release_vertices; + svga_render->base.destroy = svga_vbuf_render_destroy; + + return &svga_render->base; +} diff --git a/src/gallium/drivers/svga/svga_swtnl_draw.c b/src/gallium/drivers/svga/svga_swtnl_draw.c new file mode 100644 index 0000000000..8b14c913f7 --- /dev/null +++ b/src/gallium/drivers/svga/svga_swtnl_draw.c @@ -0,0 +1,170 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "draw/draw_context.h" +#include "draw/draw_vbuf.h" +#include "pipe/p_inlines.h" +#include "pipe/p_state.h" +#include "util/u_memory.h" + +#include "svga_context.h" +#include "svga_swtnl.h" +#include "svga_state.h" +#include "svga_swtnl_private.h" + + + +enum pipe_error +svga_swtnl_draw_range_elements(struct svga_context *svga, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned min_index, + unsigned max_index, + unsigned prim, unsigned start, unsigned count) +{ + struct draw_context *draw = svga->swtnl.draw; + unsigned i; + const void *map; + enum pipe_error ret; + + assert(!svga->dirty); + assert(svga->state.sw.need_swtnl); + assert(draw); + + ret = svga_update_state(svga, SVGA_STATE_SWTNL_DRAW); + if (ret) { + svga_context_flush(svga, NULL); + ret = svga_update_state(svga, SVGA_STATE_SWTNL_DRAW); + svga->swtnl.new_vbuf = TRUE; + assert(ret == PIPE_OK); + } + + /* + * Map vertex buffers + */ + for (i = 0; i < svga->curr.num_vertex_buffers; i++) { + map = pipe_buffer_map(svga->pipe.screen, + svga->curr.vb[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + + draw_set_mapped_vertex_buffer(draw, i, map); + } + + /* Map index buffer, if present */ + if (indexBuffer) { + map = pipe_buffer_map(svga->pipe.screen, indexBuffer, + PIPE_BUFFER_USAGE_CPU_READ); + + draw_set_mapped_element_buffer_range(draw, + indexSize, + min_index, + max_index, + map); + } + + if (svga->curr.cb[PIPE_SHADER_VERTEX]) { + map = pipe_buffer_map(svga->pipe.screen, + svga->curr.cb[PIPE_SHADER_VERTEX], + PIPE_BUFFER_USAGE_CPU_READ); + assert(map); + draw_set_mapped_constant_buffer( + draw, + map, + svga->curr.cb[PIPE_SHADER_VERTEX]->size); + } + + draw_arrays(svga->swtnl.draw, prim, start, count); + + draw_flush(svga->swtnl.draw); + + /* Ensure the draw module didn't touch this */ + assert(i == svga->curr.num_vertex_buffers); + + /* + * unmap vertex/index buffers + */ + for (i = 0; i < svga->curr.num_vertex_buffers; i++) { + pipe_buffer_unmap(svga->pipe.screen, svga->curr.vb[i].buffer); + draw_set_mapped_vertex_buffer(draw, i, NULL); + } + + if (indexBuffer) { + pipe_buffer_unmap(svga->pipe.screen, indexBuffer); + draw_set_mapped_element_buffer(draw, 0, NULL); + } + + if (svga->curr.cb[PIPE_SHADER_VERTEX]) { + pipe_buffer_unmap(svga->pipe.screen, + svga->curr.cb[PIPE_SHADER_VERTEX]); + } + + return ret; +} + + + + +boolean svga_init_swtnl( struct svga_context *svga ) +{ + svga->swtnl.backend = svga_vbuf_render_create(svga); + if(!svga->swtnl.backend) + goto fail; + + /* + * Create drawing context and plug our rendering stage into it. + */ + svga->swtnl.draw = draw_create(); + if (svga->swtnl.draw == NULL) + goto fail; + + + draw_set_rasterize_stage(svga->swtnl.draw, + draw_vbuf_stage( svga->swtnl.draw, svga->swtnl.backend )); + + draw_set_render(svga->swtnl.draw, svga->swtnl.backend); + + draw_install_aaline_stage(svga->swtnl.draw, &svga->pipe); + draw_install_aapoint_stage(svga->swtnl.draw, &svga->pipe); + draw_install_pstipple_stage(svga->swtnl.draw, &svga->pipe); + + draw_set_driver_clipping(svga->swtnl.draw, debug_get_bool_option("SVGA_SWTNL_FSE", FALSE)); + + return TRUE; + +fail: + if (svga->swtnl.backend) + svga->swtnl.backend->destroy( svga->swtnl.backend ); + + if (svga->swtnl.draw) + draw_destroy( svga->swtnl.draw ); + + return FALSE; +} + + +void svga_destroy_swtnl( struct svga_context *svga ) +{ + draw_destroy( svga->swtnl.draw ); +} diff --git a/src/gallium/drivers/svga/svga_swtnl_private.h b/src/gallium/drivers/svga/svga_swtnl_private.h new file mode 100644 index 0000000000..9bbb42910f --- /dev/null +++ b/src/gallium/drivers/svga/svga_swtnl_private.h @@ -0,0 +1,93 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#ifndef SVGA_SWTNL_PRIVATE_H +#define SVGA_SWTNL_PRIVATE_H + +#include "svga_swtnl.h" +#include "draw/draw_vertex.h" + +#include "svga_types.h" +#include "svga3d_reg.h" + +/** + * Primitive renderer for svga. + */ +struct svga_vbuf_render { + struct vbuf_render base; + + struct svga_context *svga; + struct vertex_info vertex_info; + + unsigned vertex_size; + + unsigned prim; + + struct pipe_buffer *vbuf; + struct pipe_buffer *ibuf; + + /* current size of buffer */ + size_t vbuf_size; + size_t ibuf_size; + + /* size of that the buffer should be */ + size_t vbuf_alloc_size; + size_t ibuf_alloc_size; + + /* current write place */ + size_t vbuf_offset; + size_t ibuf_offset; + + /* currently used */ + size_t vbuf_used; + + SVGA3dVertexDecl vdecl[PIPE_MAX_ATTRIBS]; + unsigned vdecl_offset; + unsigned vdecl_count; + + ushort min_index; + ushort max_index; +}; + +/** + * Basically a cast wrapper. + */ +static INLINE struct svga_vbuf_render * +svga_vbuf_render( struct vbuf_render *render ) +{ + assert(render); + return (struct svga_vbuf_render *)render; +} + + +struct vbuf_render * +svga_vbuf_render_create( struct svga_context *svga ); + + +int +svga_swtnl_update_vdecl( struct svga_context *svga ); + + +#endif diff --git a/src/gallium/drivers/svga/svga_swtnl_state.c b/src/gallium/drivers/svga/svga_swtnl_state.c new file mode 100644 index 0000000000..1616312113 --- /dev/null +++ b/src/gallium/drivers/svga/svga_swtnl_state.c @@ -0,0 +1,242 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "draw/draw_context.h" +#include "draw/draw_vbuf.h" +#include "pipe/p_inlines.h" +#include "pipe/p_state.h" +#include "util/u_memory.h" + +#include "svga_context.h" +#include "svga_swtnl.h" +#include "svga_state.h" + +#include "svga_swtnl_private.h" + + +#define SVGA_POINT_ADJ_X -0.375 +#define SVGA_POINT_ADJ_Y -0.5 + +#define SVGA_LINE_ADJ_X -0.5 +#define SVGA_LINE_ADJ_Y -0.5 + +#define SVGA_TRIANGLE_ADJ_X -0.375 +#define SVGA_TRIANGLE_ADJ_Y -0.5 + + +static void set_draw_viewport( struct svga_context *svga ) +{ + struct pipe_viewport_state vp = svga->curr.viewport; + float adjx = 0; + float adjy = 0; + + switch (svga->curr.reduced_prim) { + case PIPE_PRIM_POINTS: + adjx = SVGA_POINT_ADJ_X; + adjy = SVGA_POINT_ADJ_Y; + break; + case PIPE_PRIM_LINES: + /* XXX: This is to compensate for the fact that wide lines are + * going to be drawn with triangles, but we're not catching all + * cases where that will happen. + */ + if (svga->curr.rast->templ.line_width > 1.0) + { + adjx = SVGA_LINE_ADJ_X + 0.175; + adjy = SVGA_LINE_ADJ_Y - 0.175; + } + else { + adjx = SVGA_LINE_ADJ_X; + adjy = SVGA_LINE_ADJ_Y; + } + break; + case PIPE_PRIM_TRIANGLES: + adjx += SVGA_TRIANGLE_ADJ_X; + adjy += SVGA_TRIANGLE_ADJ_Y; + break; + } + + vp.translate[0] += adjx; + vp.translate[1] += adjy; + + draw_set_viewport_state(svga->swtnl.draw, &vp); +} + +static int update_swtnl_draw( struct svga_context *svga, + unsigned dirty ) +{ + draw_flush( svga->swtnl.draw ); + + if (dirty & SVGA_NEW_VS) + draw_bind_vertex_shader(svga->swtnl.draw, + svga->curr.vs->draw_shader); + + if (dirty & SVGA_NEW_VBUFFER) + draw_set_vertex_buffers(svga->swtnl.draw, + svga->curr.num_vertex_buffers, + svga->curr.vb); + + if (dirty & SVGA_NEW_VELEMENT) + draw_set_vertex_elements(svga->swtnl.draw, + svga->curr.num_vertex_elements, + svga->curr.ve ); + + if (dirty & SVGA_NEW_CLIP) + draw_set_clip_state(svga->swtnl.draw, + &svga->curr.clip); + + if (dirty & (SVGA_NEW_VIEWPORT | + SVGA_NEW_REDUCED_PRIMITIVE | + SVGA_NEW_RAST)) + set_draw_viewport( svga ); + + if (dirty & SVGA_NEW_RAST) + draw_set_rasterizer_state(svga->swtnl.draw, + &svga->curr.rast->templ); + + if (dirty & SVGA_NEW_FRAME_BUFFER) + draw_set_mrd(svga->swtnl.draw, + svga->curr.depthscale); + + if (dirty & SVGA_NEW_EDGEFLAGS) + draw_set_edgeflags( svga->swtnl.draw, + svga->curr.edgeflags ); + + return 0; +} + + +struct svga_tracked_state svga_update_swtnl_draw = +{ + "update draw module state", + (SVGA_NEW_VS | + SVGA_NEW_VBUFFER | + SVGA_NEW_VELEMENT | + SVGA_NEW_CLIP | + SVGA_NEW_VIEWPORT | + SVGA_NEW_RAST | + SVGA_NEW_FRAME_BUFFER | + SVGA_NEW_REDUCED_PRIMITIVE | + SVGA_NEW_EDGEFLAGS), + update_swtnl_draw +}; + + +int svga_swtnl_update_vdecl( struct svga_context *svga ) +{ + struct svga_vbuf_render *svga_render = svga_vbuf_render(svga->swtnl.backend); + struct draw_context *draw = svga->swtnl.draw; + struct vertex_info *vinfo = &svga_render->vertex_info; + SVGA3dVertexDecl vdecl[PIPE_MAX_ATTRIBS]; + const enum interp_mode colorInterp = + svga->curr.rast->templ.flatshade ? INTERP_CONSTANT : INTERP_LINEAR; + const struct svga_fragment_shader *fs = svga->curr.fs; + int offset = 0; + int nr_decls = 0; + int src, i; + + memset(vinfo, 0, sizeof(*vinfo)); + memset(vdecl, 0, sizeof(vdecl)); + + /* always add position */ + src = draw_find_vs_output(draw, TGSI_SEMANTIC_POSITION, 0); + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_LINEAR, src); + vinfo->attrib[0].emit = EMIT_4F; + vdecl[0].array.offset = offset; + vdecl[0].identity.type = SVGA3D_DECLTYPE_FLOAT4; + vdecl[0].identity.usage = SVGA3D_DECLUSAGE_POSITIONT; + vdecl[0].identity.usageIndex = 0; + offset += 16; + nr_decls++; + + for (i = 0; i < fs->base.info.num_inputs; i++) { + unsigned name = fs->base.info.input_semantic_name[i]; + unsigned index = fs->base.info.input_semantic_index[i]; + src = draw_find_vs_output(draw, name, index); + vdecl[nr_decls].array.offset = offset; + vdecl[nr_decls].identity.usageIndex = fs->base.info.input_semantic_index[i]; + + switch (name) { + case TGSI_SEMANTIC_COLOR: + draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); + vdecl[nr_decls].identity.usage = SVGA3D_DECLUSAGE_COLOR; + vdecl[nr_decls].identity.type = SVGA3D_DECLTYPE_FLOAT4; + offset += 16; + nr_decls++; + break; + case TGSI_SEMANTIC_GENERIC: + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); + vdecl[nr_decls].identity.usage = SVGA3D_DECLUSAGE_TEXCOORD; + vdecl[nr_decls].identity.type = SVGA3D_DECLTYPE_FLOAT4; + vdecl[nr_decls].identity.usageIndex += 1; + offset += 16; + nr_decls++; + break; + case TGSI_SEMANTIC_FOG: + draw_emit_vertex_attr(vinfo, EMIT_1F, INTERP_PERSPECTIVE, src); + vdecl[nr_decls].identity.usage = SVGA3D_DECLUSAGE_TEXCOORD; + vdecl[nr_decls].identity.type = SVGA3D_DECLTYPE_FLOAT1; + assert(vdecl[nr_decls].identity.usageIndex == 0); + offset += 4; + nr_decls++; + break; + case TGSI_SEMANTIC_POSITION: + /* generated internally, not a vertex shader output */ + break; + default: + assert(0); + } + } + + draw_compute_vertex_size(vinfo); + + svga_render->vdecl_count = nr_decls; + for (i = 0; i < svga_render->vdecl_count; i++) + vdecl[i].array.stride = offset; + + if (memcmp(svga_render->vdecl, vdecl, sizeof(vdecl)) == 0) + return 0; + + memcpy(svga_render->vdecl, vdecl, sizeof(vdecl)); + svga->swtnl.new_vdecl = TRUE; + + return 0; +} + + +static int update_swtnl_vdecl( struct svga_context *svga, + unsigned dirty ) +{ + return svga_swtnl_update_vdecl( svga ); +} + + +struct svga_tracked_state svga_update_swtnl_vdecl = +{ + "update draw module vdecl", + (SVGA_NEW_VS | + SVGA_NEW_FS), + update_swtnl_vdecl +}; diff --git a/src/gallium/drivers/svga/svga_tgsi.c b/src/gallium/drivers/svga/svga_tgsi.c new file mode 100644 index 0000000000..b8ef137c01 --- /dev/null +++ b/src/gallium/drivers/svga/svga_tgsi.c @@ -0,0 +1,280 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + + +#include "pipe/p_compiler.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/p_defines.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_scan.h" +#include "util/u_memory.h" + +#include "svgadump/svga_shader_dump.h" + +#include "svga_context.h" +#include "svga_tgsi.h" +#include "svga_tgsi_emit.h" +#include "svga_debug.h" + +#include "svga_hw_reg.h" +#include "svga3d_shaderdefs.h" + + +/* Sinkhole used only in error conditions. + */ +static char err_buf[128]; + +#if 0 +static void svga_destroy_shader_emitter( struct svga_shader_emitter *emit ) +{ + if (emit->buf != err_buf) + FREE(emit->buf); +} +#endif + + +static boolean svga_shader_expand( struct svga_shader_emitter *emit ) +{ + char *new_buf; + unsigned newsize = emit->size * 2; + + if(emit->buf != err_buf) + new_buf = REALLOC(emit->buf, emit->size, newsize); + else + new_buf = NULL; + + if (new_buf == NULL) { + emit->ptr = err_buf; + emit->buf = err_buf; + emit->size = sizeof(err_buf); + return FALSE; + } + + emit->size = newsize; + emit->ptr = new_buf + (emit->ptr - emit->buf); + emit->buf = new_buf; + return TRUE; +} + +static INLINE boolean reserve( struct svga_shader_emitter *emit, + unsigned nr_dwords ) +{ + if (emit->ptr - emit->buf + nr_dwords * sizeof(unsigned) >= emit->size) { + if (!svga_shader_expand( emit )) + return FALSE; + } + + return TRUE; +} + +boolean svga_shader_emit_dword( struct svga_shader_emitter *emit, + unsigned dword ) +{ + if (!reserve(emit, 1)) + return FALSE; + + *(unsigned *)emit->ptr = dword; + emit->ptr += sizeof dword; + return TRUE; +} + +boolean svga_shader_emit_dwords( struct svga_shader_emitter *emit, + const unsigned *dwords, + unsigned nr ) +{ + if (!reserve(emit, nr)) + return FALSE; + + memcpy( emit->ptr, dwords, nr * sizeof *dwords ); + emit->ptr += nr * sizeof *dwords; + return TRUE; +} + +boolean svga_shader_emit_opcode( struct svga_shader_emitter *emit, + unsigned opcode ) +{ + SVGA3dShaderInstToken *here; + + if (!reserve(emit, 1)) + return FALSE; + + here = (SVGA3dShaderInstToken *)emit->ptr; + here->value = opcode; + + if (emit->insn_offset) { + SVGA3dShaderInstToken *prev = (SVGA3dShaderInstToken *)(emit->buf + + emit->insn_offset); + prev->size = (here - prev) - 1; + } + + emit->insn_offset = emit->ptr - emit->buf; + emit->ptr += sizeof(unsigned); + return TRUE; +} + +#define SVGA3D_PS_2X (SVGA3D_PS_20 | 1) +#define SVGA3D_VS_2X (SVGA3D_VS_20 | 1) + +static boolean svga_shader_emit_header( struct svga_shader_emitter *emit ) +{ + SVGA3dShaderVersion header; + + memset( &header, 0, sizeof header ); + + switch (emit->unit) { + case PIPE_SHADER_FRAGMENT: + header.value = emit->use_sm30 ? SVGA3D_PS_30 : SVGA3D_PS_2X; + break; + case PIPE_SHADER_VERTEX: + header.value = emit->use_sm30 ? SVGA3D_VS_30 : SVGA3D_VS_2X; + break; + } + + return svga_shader_emit_dword( emit, header.value ); +} + + + + + +/* Parse TGSI shader and translate to SVGA/DX9 serialized + * representation. + * + * In this function SVGA shader is emitted to an in-memory buffer that + * can be dynamically grown. Once we've finished and know how large + * it is, it will be copied to a hardware buffer for upload. + */ +static struct svga_shader_result * +svga_tgsi_translate( const struct svga_shader *shader, + union svga_compile_key key, + unsigned unit ) +{ + struct svga_shader_result *result = NULL; + struct svga_shader_emitter emit; + int ret = 0; + + memset(&emit, 0, sizeof(emit)); + + emit.use_sm30 = shader->use_sm30; + emit.size = 1024; + emit.buf = MALLOC(emit.size); + if (emit.buf == NULL) { + ret = PIPE_ERROR_OUT_OF_MEMORY; + goto fail; + } + + emit.ptr = emit.buf; + emit.unit = unit; + emit.key = key; + + tgsi_scan_shader( shader->tokens, &emit.info); + + emit.imm_start = emit.info.file_max[TGSI_FILE_CONSTANT] + 1; + + if (unit == PIPE_SHADER_FRAGMENT) + emit.imm_start += key.fkey.num_unnormalized_coords; + + if (unit == PIPE_SHADER_VERTEX) { + emit.imm_start += key.vkey.need_prescale ? 2 : 0; + emit.imm_start += key.vkey.num_zero_stride_vertex_elements; + } + + emit.nr_hw_const = (emit.imm_start + emit.info.file_max[TGSI_FILE_IMMEDIATE] + 1); + + emit.nr_hw_temp = emit.info.file_max[TGSI_FILE_TEMPORARY] + 1; + emit.in_main_func = TRUE; + + if (!svga_shader_emit_header( &emit )) + goto fail; + + if (!svga_shader_emit_instructions( &emit, shader->tokens )) + goto fail; + + result = CALLOC_STRUCT(svga_shader_result); + if (result == NULL) + goto fail; + + result->shader = shader; + result->tokens = (const unsigned *)emit.buf; + result->nr_tokens = (emit.ptr - emit.buf) / sizeof(unsigned); + memcpy(&result->key, &key, sizeof key); + + if (SVGA_DEBUG & DEBUG_TGSI) + { + debug_printf( "#####################################\n" ); + debug_printf( "Shader %u below\n", shader->id ); + tgsi_dump( shader->tokens, 0 ); + if (SVGA_DEBUG & DEBUG_TGSI) { + debug_printf( "Shader %u compiled below\n", shader->id ); + svga_shader_dump( result->tokens, + result->nr_tokens , + FALSE ); + } + debug_printf( "#####################################\n" ); + } + + return result; + +fail: + FREE(result); + FREE(emit.buf); + return NULL; +} + + + + +struct svga_shader_result * +svga_translate_fragment_program( const struct svga_fragment_shader *fs, + const struct svga_fs_compile_key *fkey ) +{ + union svga_compile_key key; + memcpy(&key.fkey, fkey, sizeof *fkey); + + return svga_tgsi_translate( &fs->base, + key, + PIPE_SHADER_FRAGMENT ); +} + +struct svga_shader_result * +svga_translate_vertex_program( const struct svga_vertex_shader *vs, + const struct svga_vs_compile_key *vkey ) +{ + union svga_compile_key key; + memcpy(&key.vkey, vkey, sizeof *vkey); + + return svga_tgsi_translate( &vs->base, + key, + PIPE_SHADER_VERTEX ); +} + + +void svga_destroy_shader_result( struct svga_shader_result *result ) +{ + FREE((unsigned *)result->tokens); + FREE(result); +} + diff --git a/src/gallium/drivers/svga/svga_tgsi.h b/src/gallium/drivers/svga/svga_tgsi.h new file mode 100644 index 0000000000..896c90a89a --- /dev/null +++ b/src/gallium/drivers/svga/svga_tgsi.h @@ -0,0 +1,139 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#ifndef SVGA_TGSI_H +#define SVGA_TGSI_H + +#include "pipe/p_state.h" + +#include "svga_hw_reg.h" + +struct svga_fragment_shader; +struct svga_vertex_shader; +struct svga_shader; +struct tgsi_shader_info; +struct tgsi_token; + + +struct svga_vs_compile_key +{ + ubyte need_prescale:1; + ubyte allow_psiz:1; + unsigned zero_stride_vertex_elements; + ubyte num_zero_stride_vertex_elements:6; +}; + +struct svga_fs_compile_key +{ + boolean light_twoside:1; + boolean front_cw:1; + ubyte num_textures; + ubyte num_unnormalized_coords; + struct { + ubyte compare_mode : 1; + ubyte compare_func : 3; + ubyte unnormalized : 1; + + ubyte width_height_idx : 7; + + ubyte texture_target; + } tex[PIPE_MAX_SAMPLERS]; +}; + +union svga_compile_key { + struct svga_vs_compile_key vkey; + struct svga_fs_compile_key fkey; +}; + +struct svga_shader_result +{ + const struct svga_shader *shader; + + /* Parameters used to generate this compilation result: + */ + union svga_compile_key key; + + /* Compiled shader tokens: + */ + const unsigned *tokens; + unsigned nr_tokens; + + /* SVGA Shader ID: + */ + unsigned id; + + /* Next compilation result: + */ + struct svga_shader_result *next; +}; + + +/* TGSI doesn't provide use with VS input semantics (they're actually + * pretty meaningless), so we just generate some plausible ones here. + * This is called both from within the TGSI translator and when + * building vdecls to ensure they match up. + * + * The real use of this information is matching vertex elements to + * fragment shader inputs in the case where vertex shader is disabled. + */ +static INLINE void svga_generate_vdecl_semantics( unsigned idx, + unsigned *usage, + unsigned *usage_index ) +{ + if (idx == 0) { + *usage = SVGA3D_DECLUSAGE_POSITION; + *usage_index = 0; + } + else { + *usage = SVGA3D_DECLUSAGE_TEXCOORD; + *usage_index = idx - 1; + } +} + + + +static INLINE unsigned svga_vs_key_size( const struct svga_vs_compile_key *key ) +{ + return sizeof *key; +} + +static INLINE unsigned svga_fs_key_size( const struct svga_fs_compile_key *key ) +{ + return (const char *)&key->tex[key->num_textures].texture_target - + (const char *)key; +} + +struct svga_shader_result * +svga_translate_fragment_program( const struct svga_fragment_shader *fs, + const struct svga_fs_compile_key *fkey ); + +struct svga_shader_result * +svga_translate_vertex_program( const struct svga_vertex_shader *fs, + const struct svga_vs_compile_key *vkey ); + + +void svga_destroy_shader_result( struct svga_shader_result *result ); + +#endif diff --git a/src/gallium/drivers/svga/svga_tgsi_decl_sm20.c b/src/gallium/drivers/svga/svga_tgsi_decl_sm20.c new file mode 100644 index 0000000000..23b3ace7f3 --- /dev/null +++ b/src/gallium/drivers/svga/svga_tgsi_decl_sm20.c @@ -0,0 +1,280 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + + +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "util/u_memory.h" + +#include "svga_tgsi_emit.h" +#include "svga_context.h" + + + + +static boolean ps20_input( struct svga_shader_emitter *emit, + struct tgsi_declaration_semantic semantic, + unsigned idx ) +{ + struct src_register reg; + SVGA3DOpDclArgs dcl; + SVGA3dShaderInstToken opcode; + + opcode = inst_token( SVGA3DOP_DCL ); + dcl.values[0] = 0; + dcl.values[1] = 0; + + switch (semantic.Name) { + case TGSI_SEMANTIC_POSITION: + /* Special case: + */ + reg = src_register( SVGA3DREG_MISCTYPE, + SVGA3DMISCREG_POSITION ); + break; + case TGSI_SEMANTIC_COLOR: + reg = src_register( SVGA3DREG_INPUT, + semantic.Index ); + break; + case TGSI_SEMANTIC_FOG: + assert(semantic.Index == 0); + reg = src_register( SVGA3DREG_TEXTURE, 0 ); + break; + case TGSI_SEMANTIC_GENERIC: + reg = src_register( SVGA3DREG_TEXTURE, + semantic.Index + 1 ); + break; + default: + assert(0); + return TRUE; + } + + emit->input_map[idx] = reg; + + dcl.dst = dst( reg ); + + dcl.usage = 0; + dcl.index = 0; + + dcl.values[0] |= 1<<31; + + return (emit_instruction(emit, opcode) && + svga_shader_emit_dwords( emit, dcl.values, Elements(dcl.values))); +} + + +static boolean ps20_output( struct svga_shader_emitter *emit, + struct tgsi_declaration_semantic semantic, + unsigned idx ) +{ + SVGA3dShaderDestToken reg; + + switch (semantic.Name) { + case TGSI_SEMANTIC_COLOR: + if (semantic.Index < PIPE_MAX_COLOR_BUFS) { + unsigned cbuf = semantic.Index; + + emit->output_map[idx] = dst_register( SVGA3DREG_TEMP, + emit->nr_hw_temp++ ); + emit->temp_col[cbuf] = emit->output_map[idx]; + emit->true_col[cbuf] = dst_register( SVGA3DREG_COLOROUT, + semantic.Index ); + } + else { + assert(0); + reg = dst_register( SVGA3DREG_COLOROUT, 0 ); + } + break; + case TGSI_SEMANTIC_POSITION: + emit->output_map[idx] = dst_register( SVGA3DREG_TEMP, + emit->nr_hw_temp++ ); + emit->temp_pos = emit->output_map[idx]; + emit->true_pos = dst_register( SVGA3DREG_DEPTHOUT, + semantic.Index ); + break; + default: + assert(0); + reg = dst_register( SVGA3DREG_COLOROUT, 0 ); + break; + } + + return TRUE; +} + + +static boolean vs20_input( struct svga_shader_emitter *emit, + struct tgsi_declaration_semantic semantic, + unsigned idx ) +{ + SVGA3DOpDclArgs dcl; + SVGA3dShaderInstToken opcode; + + opcode = inst_token( SVGA3DOP_DCL ); + dcl.values[0] = 0; + dcl.values[1] = 0; + + emit->input_map[idx] = src_register( SVGA3DREG_INPUT, idx ); + dcl.dst = dst_register( SVGA3DREG_INPUT, idx ); + + assert(dcl.dst.reserved0); + + /* Mesa doesn't provide use with VS input semantics (they're + * actually pretty meaningless), so we just generate some plausible + * ones here. This has to match what we declare in the vdecl code + * in svga_pipe_vertex.c. + */ + if (idx == 0) { + dcl.usage = SVGA3D_DECLUSAGE_POSITION; + dcl.index = 0; + } + else { + dcl.usage = SVGA3D_DECLUSAGE_TEXCOORD; + dcl.index = idx - 1; + } + + dcl.values[0] |= 1<<31; + + return (emit_instruction(emit, opcode) && + svga_shader_emit_dwords( emit, dcl.values, Elements(dcl.values))); +} + + +static boolean vs20_output( struct svga_shader_emitter *emit, + struct tgsi_declaration_semantic semantic, + unsigned idx ) +{ + /* Don't emit dcl instruction for vs20 inputs + */ + + /* Just build the register map table: + */ + switch (semantic.Name) { + case TGSI_SEMANTIC_POSITION: + assert(semantic.Index == 0); + emit->output_map[idx] = dst_register( SVGA3DREG_TEMP, + emit->nr_hw_temp++ ); + emit->temp_pos = emit->output_map[idx]; + emit->true_pos = dst_register( SVGA3DREG_RASTOUT, + SVGA3DRASTOUT_POSITION); + break; + case TGSI_SEMANTIC_PSIZE: + assert(semantic.Index == 0); + emit->output_map[idx] = dst_register( SVGA3DREG_TEMP, + emit->nr_hw_temp++ ); + emit->temp_psiz = emit->output_map[idx]; + emit->true_psiz = dst_register( SVGA3DREG_RASTOUT, + SVGA3DRASTOUT_PSIZE ); + break; + case TGSI_SEMANTIC_FOG: + assert(semantic.Index == 0); + emit->output_map[idx] = dst_register( SVGA3DREG_TEXCRDOUT, 0 ); + break; + case TGSI_SEMANTIC_COLOR: + /* oD0 */ + emit->output_map[idx] = dst_register( SVGA3DREG_ATTROUT, + semantic.Index ); + break; + case TGSI_SEMANTIC_GENERIC: + emit->output_map[idx] = dst_register( SVGA3DREG_TEXCRDOUT, + semantic.Index + 1 ); + break; + default: + assert(0); + emit->output_map[idx] = dst_register( SVGA3DREG_TEMP, 0 ); + return FALSE; + } + + return TRUE; +} + +static boolean ps20_sampler( struct svga_shader_emitter *emit, + struct tgsi_declaration_semantic semantic, + unsigned idx ) +{ + SVGA3DOpDclArgs dcl; + SVGA3dShaderInstToken opcode; + + opcode = inst_token( SVGA3DOP_DCL ); + dcl.values[0] = 0; + dcl.values[1] = 0; + + dcl.dst = dst_register( SVGA3DREG_SAMPLER, idx ); + dcl.type = svga_tgsi_sampler_type( emit, idx ); + + return (emit_instruction(emit, opcode) && + svga_shader_emit_dwords( emit, dcl.values, Elements(dcl.values))); +} + + +boolean svga_translate_decl_sm20( struct svga_shader_emitter *emit, + const struct tgsi_full_declaration *decl ) +{ + unsigned first = decl->Range.First; + unsigned last = decl->Range.Last; + unsigned semantic = 0; + unsigned semantic_idx = 0; + unsigned idx; + + if (decl->Declaration.Semantic) { + semantic = decl->Semantic.Name; + semantic_idx = decl->Semantic.Index; + } + + for( idx = first; idx <= last; idx++ ) { + boolean ok; + + switch (decl->Declaration.File) { + case TGSI_FILE_SAMPLER: + assert (emit->unit == PIPE_SHADER_FRAGMENT); + ok = ps20_sampler( emit, decl->Semantic, idx ); + break; + + case TGSI_FILE_INPUT: + if (emit->unit == PIPE_SHADER_VERTEX) + ok = vs20_input( emit, decl->Semantic, idx ); + else + ok = ps20_input( emit, decl->Semantic, idx ); + break; + + case TGSI_FILE_OUTPUT: + if (emit->unit == PIPE_SHADER_VERTEX) + ok = vs20_output( emit, decl->Semantic, idx ); + else + ok = ps20_output( emit, decl->Semantic, idx ); + break; + + default: + /* don't need to declare other vars */ + ok = TRUE; + } + + if (!ok) + return FALSE; + } + + return TRUE; +} + + + diff --git a/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c b/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c new file mode 100644 index 0000000000..d1c7336dec --- /dev/null +++ b/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c @@ -0,0 +1,385 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + + +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "util/u_memory.h" + +#include "svga_tgsi_emit.h" +#include "svga_context.h" + +static boolean translate_vs_ps_semantic( struct tgsi_declaration_semantic semantic, + unsigned *usage, + unsigned *idx ) +{ + switch (semantic.Name) { + case TGSI_SEMANTIC_POSITION: + *idx = semantic.Index; + *usage = SVGA3D_DECLUSAGE_POSITION; + break; + case TGSI_SEMANTIC_COLOR: + + *idx = semantic.Index; + *usage = SVGA3D_DECLUSAGE_COLOR; + break; + case TGSI_SEMANTIC_BCOLOR: + *idx = semantic.Index + 2; /* sharing with COLOR */ + *usage = SVGA3D_DECLUSAGE_COLOR; + break; + case TGSI_SEMANTIC_FOG: + *idx = 0; + assert(semantic.Index == 0); + *usage = SVGA3D_DECLUSAGE_TEXCOORD; + break; + case TGSI_SEMANTIC_PSIZE: + *idx = semantic.Index; + *usage = SVGA3D_DECLUSAGE_PSIZE; + break; + case TGSI_SEMANTIC_GENERIC: + *idx = semantic.Index + 1; /* texcoord[0] is reserved for fog */ + *usage = SVGA3D_DECLUSAGE_TEXCOORD; + break; + case TGSI_SEMANTIC_NORMAL: + *idx = semantic.Index; + *usage = SVGA3D_DECLUSAGE_NORMAL; + break; + default: + assert(0); + *usage = SVGA3D_DECLUSAGE_TEXCOORD; + *idx = 0; + return FALSE; + } + + return TRUE; +} + + +static boolean emit_decl( struct svga_shader_emitter *emit, + SVGA3dShaderDestToken reg, + unsigned usage, + unsigned index ) +{ + SVGA3DOpDclArgs dcl; + SVGA3dShaderInstToken opcode; + + opcode = inst_token( SVGA3DOP_DCL ); + dcl.values[0] = 0; + dcl.values[1] = 0; + + dcl.dst = reg; + dcl.usage = usage; + dcl.index = index; + dcl.values[0] |= 1<<31; + + return (emit_instruction(emit, opcode) && + svga_shader_emit_dwords( emit, dcl.values, Elements(dcl.values))); +} + +static boolean emit_vface_decl( struct svga_shader_emitter *emit ) +{ + if (!emit->emitted_vface) { + SVGA3dShaderDestToken reg = + dst_register( SVGA3DREG_MISCTYPE, + SVGA3DMISCREG_FACE ); + + if (!emit_decl( emit, reg, 0, 0 )) + return FALSE; + + emit->emitted_vface = TRUE; + } + return TRUE; +} + +static boolean ps30_input( struct svga_shader_emitter *emit, + struct tgsi_declaration_semantic semantic, + unsigned idx ) +{ + unsigned usage, index; + SVGA3dShaderDestToken reg; + + if (semantic.Name == TGSI_SEMANTIC_POSITION) { + emit->input_map[idx] = src_register( SVGA3DREG_MISCTYPE, + SVGA3DMISCREG_POSITION ); + + emit->input_map[idx].base.swizzle = TRANSLATE_SWIZZLE( TGSI_SWIZZLE_X, + TGSI_SWIZZLE_Y, + TGSI_SWIZZLE_Y, + TGSI_SWIZZLE_Y ); + + reg = writemask( dst(emit->input_map[idx]), + TGSI_WRITEMASK_XY ); + + return emit_decl( emit, reg, 0, 0 ); + } + else if (emit->key.fkey.light_twoside && + (semantic.Name == TGSI_SEMANTIC_COLOR)) { + + if (!translate_vs_ps_semantic( semantic, &usage, &index )) + return FALSE; + + emit->internal_color_idx[emit->internal_color_count] = idx; + emit->input_map[idx] = src_register( SVGA3DREG_INPUT, emit->ps30_input_count ); + emit->ps30_input_count++; + emit->internal_color_count++; + + reg = dst( emit->input_map[idx] ); + + if (!emit_decl( emit, reg, usage, index )) + return FALSE; + + semantic.Name = TGSI_SEMANTIC_BCOLOR; + if (!translate_vs_ps_semantic( semantic, &usage, &index )) + return FALSE; + + reg = dst_register( SVGA3DREG_INPUT, emit->ps30_input_count++ ); + + if (!emit_decl( emit, reg, usage, index )) + return FALSE; + + if (!emit_vface_decl( emit )) + return FALSE; + + return TRUE; + } + else if (semantic.Name == TGSI_SEMANTIC_FACE) { + if (!emit_vface_decl( emit )) + return FALSE; + emit->emit_frontface = TRUE; + emit->internal_frontface_idx = idx; + return TRUE; + } + else { + + if (!translate_vs_ps_semantic( semantic, &usage, &index )) + return FALSE; + + emit->input_map[idx] = src_register( SVGA3DREG_INPUT, emit->ps30_input_count++ ); + reg = dst( emit->input_map[idx] ); + + return emit_decl( emit, reg, usage, index ); + } + +} + + +/* PS output registers are the same as 2.0 + */ +static boolean ps30_output( struct svga_shader_emitter *emit, + struct tgsi_declaration_semantic semantic, + unsigned idx ) +{ + SVGA3dShaderDestToken reg; + + switch (semantic.Name) { + case TGSI_SEMANTIC_COLOR: + emit->output_map[idx] = dst_register( SVGA3DREG_COLOROUT, + semantic.Index ); + break; + case TGSI_SEMANTIC_POSITION: + emit->output_map[idx] = dst_register( SVGA3DREG_TEMP, + emit->nr_hw_temp++ ); + emit->temp_pos = emit->output_map[idx]; + emit->true_pos = dst_register( SVGA3DREG_DEPTHOUT, + semantic.Index ); + break; + default: + assert(0); + reg = dst_register( SVGA3DREG_COLOROUT, 0 ); + break; + } + + return TRUE; +} + + +/* We still make up the input semantics the same as in 2.0 + */ +static boolean vs30_input( struct svga_shader_emitter *emit, + struct tgsi_declaration_semantic semantic, + unsigned idx ) +{ + SVGA3DOpDclArgs dcl; + SVGA3dShaderInstToken opcode; + unsigned usage, index; + + opcode = inst_token( SVGA3DOP_DCL ); + dcl.values[0] = 0; + dcl.values[1] = 0; + + if (emit->key.vkey.zero_stride_vertex_elements & (1 << idx)) { + unsigned i; + unsigned offset = 0; + unsigned start_idx = emit->info.file_max[TGSI_FILE_CONSTANT] + 1; + /* adjust for prescale constants */ + start_idx += emit->key.vkey.need_prescale ? 2 : 0; + /* compute the offset from the start of zero stride constants */ + for (i = 0; i < PIPE_MAX_ATTRIBS && i < idx; ++i) { + if (emit->key.vkey.zero_stride_vertex_elements & (1<<i)) + ++offset; + } + emit->input_map[idx] = src_register( SVGA3DREG_CONST, + start_idx + offset ); + } else { + emit->input_map[idx] = src_register( SVGA3DREG_INPUT, idx ); + dcl.dst = dst_register( SVGA3DREG_INPUT, idx ); + + assert(dcl.dst.reserved0); + + svga_generate_vdecl_semantics( idx, &usage, &index ); + + dcl.usage = usage; + dcl.index = index; + dcl.values[0] |= 1<<31; + + return (emit_instruction(emit, opcode) && + svga_shader_emit_dwords( emit, dcl.values, Elements(dcl.values))); + } + return TRUE; +} + +/* VS3.0 outputs have proper declarations and semantic info for + * matching against PS inputs. + */ +static boolean vs30_output( struct svga_shader_emitter *emit, + struct tgsi_declaration_semantic semantic, + unsigned idx ) +{ + SVGA3DOpDclArgs dcl; + SVGA3dShaderInstToken opcode; + unsigned usage, index; + + opcode = inst_token( SVGA3DOP_DCL ); + dcl.values[0] = 0; + dcl.values[1] = 0; + + if (!translate_vs_ps_semantic( semantic, &usage, &index )) + return FALSE; + + dcl.dst = dst_register( SVGA3DREG_OUTPUT, idx ); + dcl.usage = usage; + dcl.index = index; + dcl.values[0] |= 1<<31; + + if (semantic.Name == TGSI_SEMANTIC_POSITION) { + assert(idx == 0); + emit->output_map[idx] = dst_register( SVGA3DREG_TEMP, + emit->nr_hw_temp++ ); + emit->temp_pos = emit->output_map[idx]; + emit->true_pos = dcl.dst; + } + else if (semantic.Name == TGSI_SEMANTIC_PSIZE) { + emit->output_map[idx] = dst_register( SVGA3DREG_TEMP, + emit->nr_hw_temp++ ); + emit->temp_psiz = emit->output_map[idx]; + + /* This has the effect of not declaring psiz (below) and not + * emitting the final MOV to true_psiz in the postamble. + */ + if (!emit->key.vkey.allow_psiz) + return TRUE; + + emit->true_psiz = dcl.dst; + } + else { + emit->output_map[idx] = dcl.dst; + } + + + return (emit_instruction(emit, opcode) && + svga_shader_emit_dwords( emit, dcl.values, Elements(dcl.values))); +} + +static boolean ps30_sampler( struct svga_shader_emitter *emit, + struct tgsi_declaration_semantic semantic, + unsigned idx ) +{ + SVGA3DOpDclArgs dcl; + SVGA3dShaderInstToken opcode; + + opcode = inst_token( SVGA3DOP_DCL ); + dcl.values[0] = 0; + dcl.values[1] = 0; + + dcl.dst = dst_register( SVGA3DREG_SAMPLER, idx ); + dcl.type = svga_tgsi_sampler_type( emit, idx ); + dcl.values[0] |= 1<<31; + + return (emit_instruction(emit, opcode) && + svga_shader_emit_dwords( emit, dcl.values, Elements(dcl.values))); +} + + +boolean svga_translate_decl_sm30( struct svga_shader_emitter *emit, + const struct tgsi_full_declaration *decl ) +{ + unsigned first = decl->Range.First; + unsigned last = decl->Range.Last; + unsigned semantic = 0; + unsigned semantic_idx = 0; + unsigned idx; + + if (decl->Declaration.Semantic) { + semantic = decl->Semantic.Name; + semantic_idx = decl->Semantic.Index; + } + + for( idx = first; idx <= last; idx++ ) { + boolean ok; + + switch (decl->Declaration.File) { + case TGSI_FILE_SAMPLER: + assert (emit->unit == PIPE_SHADER_FRAGMENT); + ok = ps30_sampler( emit, decl->Semantic, idx ); + break; + + case TGSI_FILE_INPUT: + if (emit->unit == PIPE_SHADER_VERTEX) + ok = vs30_input( emit, decl->Semantic, idx ); + else + ok = ps30_input( emit, decl->Semantic, idx ); + break; + + case TGSI_FILE_OUTPUT: + if (emit->unit == PIPE_SHADER_VERTEX) + ok = vs30_output( emit, decl->Semantic, idx ); + else + ok = ps30_output( emit, decl->Semantic, idx ); + break; + + default: + /* don't need to declare other vars */ + ok = TRUE; + } + + if (!ok) + return FALSE; + } + + return TRUE; +} + + + diff --git a/src/gallium/drivers/svga/svga_tgsi_emit.h b/src/gallium/drivers/svga/svga_tgsi_emit.h new file mode 100644 index 0000000000..2557824293 --- /dev/null +++ b/src/gallium/drivers/svga/svga_tgsi_emit.h @@ -0,0 +1,345 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#ifndef SVGA_TGSI_EMIT_H +#define SVGA_TGSI_EMIT_H + +#include "tgsi/tgsi_scan.h" +#include "svga_hw_reg.h" +#include "svga_tgsi.h" +#include "svga3d_shaderdefs.h" + +struct src_register +{ + SVGA3dShaderSrcToken base; + SVGA3dShaderSrcToken indirect; +}; + + +struct svga_arl_consts { + int number; + int idx; + int swizzle; + int arl_num; +}; + +/* Internal functions: + */ + +struct svga_shader_emitter +{ + boolean use_sm30; + + unsigned size; + char *buf; + char *ptr; + + union svga_compile_key key; + struct tgsi_shader_info info; + int unit; + + int imm_start; + + int nr_hw_const; + int nr_hw_temp; + + int insn_offset; + + int internal_temp_count; + int internal_imm_count; + + int internal_color_idx[2]; /* diffuse, specular */ + int internal_color_count; + + boolean emitted_vface; + boolean emit_frontface; + int internal_frontface_idx; + + int ps30_input_count; + + boolean in_main_func; + + boolean created_zero_immediate; + int zero_immediate_idx; + + boolean created_loop_const; + int loop_const_idx; + + boolean created_sincos_consts; + int sincos_consts_idx; + + unsigned label[32]; + unsigned nr_labels; + + struct src_register input_map[PIPE_MAX_ATTRIBS]; + SVGA3dShaderDestToken output_map[PIPE_MAX_ATTRIBS]; + + struct src_register imm_0055; + SVGA3dShaderDestToken temp_pos; + SVGA3dShaderDestToken true_pos; + + SVGA3dShaderDestToken temp_col[PIPE_MAX_COLOR_BUFS]; + SVGA3dShaderDestToken true_col[PIPE_MAX_COLOR_BUFS]; + + SVGA3dShaderDestToken temp_psiz; + SVGA3dShaderDestToken true_psiz; + + struct svga_arl_consts arl_consts[12]; + int num_arl_consts; + int current_arl; +}; + + +boolean svga_shader_emit_dword( struct svga_shader_emitter *emit, + unsigned dword ); + +boolean svga_shader_emit_dwords( struct svga_shader_emitter *emit, + const unsigned *dwords, + unsigned nr ); + +boolean svga_shader_emit_opcode( struct svga_shader_emitter *emit, + unsigned opcode ); + +boolean svga_shader_emit_instructions( struct svga_shader_emitter *emit, + const struct tgsi_token *tokens ); + +boolean svga_translate_decl_sm20( struct svga_shader_emitter *emit, + const struct tgsi_full_declaration *decl ); + +boolean svga_translate_decl_sm30( struct svga_shader_emitter *emit, + const struct tgsi_full_declaration *decl ); + + +static INLINE boolean emit_dst( struct svga_shader_emitter *emit, + SVGA3dShaderDestToken dest ) +{ + assert(dest.reserved0); + return svga_shader_emit_dword( emit, dest.value ); +} + +static INLINE boolean emit_src( struct svga_shader_emitter *emit, + const struct src_register src ) +{ + if (src.base.relAddr) { + assert(src.base.reserved0); + assert(src.indirect.reserved0); + return (svga_shader_emit_dword( emit, src.base.value ) && + svga_shader_emit_dword( emit, src.indirect.value )); + } + else { + assert(src.base.reserved0); + return svga_shader_emit_dword( emit, src.base.value ); + } +} + + +static INLINE boolean emit_instruction( struct svga_shader_emitter *emit, + SVGA3dShaderInstToken opcode ) +{ + return svga_shader_emit_opcode( emit, opcode.value ); +} + + +static INLINE boolean emit_op1( struct svga_shader_emitter *emit, + SVGA3dShaderInstToken inst, + SVGA3dShaderDestToken dest, + struct src_register src0 ) +{ + return (emit_instruction( emit, inst ) && + emit_dst( emit, dest ) && + emit_src( emit, src0 )); +} + +static INLINE boolean emit_op2( struct svga_shader_emitter *emit, + SVGA3dShaderInstToken inst, + SVGA3dShaderDestToken dest, + struct src_register src0, + struct src_register src1 ) +{ + return (emit_instruction( emit, inst ) && + emit_dst( emit, dest ) && + emit_src( emit, src0 ) && + emit_src( emit, src1 )); +} + +static INLINE boolean emit_op3( struct svga_shader_emitter *emit, + SVGA3dShaderInstToken inst, + SVGA3dShaderDestToken dest, + struct src_register src0, + struct src_register src1, + struct src_register src2 ) +{ + return (emit_instruction( emit, inst ) && + emit_dst( emit, dest ) && + emit_src( emit, src0 ) && + emit_src( emit, src1 ) && + emit_src( emit, src2 )); +} + + +#define TRANSLATE_SWIZZLE(x,y,z,w) ((x) | ((y) << 2) | ((z) << 4) | ((w) << 6)) +#define SWIZZLE_XYZW \ + TRANSLATE_SWIZZLE(TGSI_SWIZZLE_X,TGSI_SWIZZLE_Y,TGSI_SWIZZLE_Z,TGSI_SWIZZLE_W) +#define SWIZZLE_XXXX \ + TRANSLATE_SWIZZLE(TGSI_SWIZZLE_X,TGSI_SWIZZLE_X,TGSI_SWIZZLE_X,TGSI_SWIZZLE_X) +#define SWIZZLE_YYYY \ + TRANSLATE_SWIZZLE(TGSI_SWIZZLE_Y,TGSI_SWIZZLE_Y,TGSI_SWIZZLE_Y,TGSI_SWIZZLE_Y) +#define SWIZZLE_ZZZZ \ + TRANSLATE_SWIZZLE(TGSI_SWIZZLE_Z,TGSI_SWIZZLE_Z,TGSI_SWIZZLE_Z,TGSI_SWIZZLE_Z) +#define SWIZZLE_WWWW \ + TRANSLATE_SWIZZLE(TGSI_SWIZZLE_W,TGSI_SWIZZLE_W,TGSI_SWIZZLE_W,TGSI_SWIZZLE_W) + + + +static INLINE SVGA3dShaderInstToken +inst_token( unsigned opcode ) +{ + SVGA3dShaderInstToken inst; + + inst.value = 0; + inst.op = opcode; + + return inst; +} + +static INLINE SVGA3dShaderDestToken +dst_register( unsigned file, + int number ) +{ + SVGA3dShaderDestToken dest; + + dest.value = 0; + dest.num = number; + dest.type_upper = file >> 3; + dest.relAddr = 0; + dest.reserved1 = 0; + dest.mask = 0xf; + dest.dstMod = 0; + dest.shfScale = 0; + dest.type_lower = file & 0x7; + dest.reserved0 = 1; /* is_reg */ + + return dest; +} + +static INLINE SVGA3dShaderDestToken +writemask( SVGA3dShaderDestToken dest, + unsigned mask ) +{ + dest.mask &= mask; + return dest; +} + + +static INLINE SVGA3dShaderSrcToken +src_token( unsigned file, int number ) +{ + SVGA3dShaderSrcToken src; + + src.value = 0; + src.num = number; + src.type_upper = file >> 3; + src.relAddr = 0; + src.reserved1 = 0; + src.swizzle = SWIZZLE_XYZW; + src.srcMod = 0; + src.type_lower = file & 0x7; + src.reserved0 = 1; /* is_reg */ + + return src; +} + + +static INLINE struct src_register +absolute( struct src_register src ) +{ + src.base.srcMod = SVGA3DSRCMOD_ABS; + + return src; +} + + +static INLINE struct src_register +negate( struct src_register src ) +{ + switch (src.base.srcMod) { + case SVGA3DSRCMOD_ABS: + src.base.srcMod = SVGA3DSRCMOD_ABSNEG; + break; + case SVGA3DSRCMOD_ABSNEG: + src.base.srcMod = SVGA3DSRCMOD_ABS; + break; + case SVGA3DSRCMOD_NEG: + src.base.srcMod = SVGA3DSRCMOD_NONE; + break; + case SVGA3DSRCMOD_NONE: + src.base.srcMod = SVGA3DSRCMOD_NEG; + break; + } + return src; +} + + +static INLINE struct src_register +src_register( unsigned file, int number ) +{ + struct src_register src; + + src.base = src_token( file, number ); + src.indirect.value = 0; + + return src; +} + +static INLINE SVGA3dShaderDestToken dst( struct src_register src ) +{ + return dst_register( SVGA3dShaderGetRegType( src.base.value ), + src.base.num ); +} + +static INLINE struct src_register src( SVGA3dShaderDestToken dst ) +{ + return src_register( SVGA3dShaderGetRegType( dst.value ), + dst.num ); +} + +static INLINE ubyte svga_tgsi_sampler_type( struct svga_shader_emitter *emit, + int idx ) +{ + switch (emit->key.fkey.tex[idx].texture_target) { + case PIPE_TEXTURE_1D: + return SVGA3DSAMP_2D; + case PIPE_TEXTURE_2D: + return SVGA3DSAMP_2D; + case PIPE_TEXTURE_3D: + return SVGA3DSAMP_VOLUME; + case PIPE_TEXTURE_CUBE: + return SVGA3DSAMP_CUBE; + } + + return SVGA3DSAMP_UNKNOWN; +} + +#endif diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c new file mode 100644 index 0000000000..1670da8bfa --- /dev/null +++ b/src/gallium/drivers/svga/svga_tgsi_insn.c @@ -0,0 +1,2716 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + + +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "util/u_memory.h" + +#include "svga_tgsi_emit.h" +#include "svga_context.h" + + +static boolean emit_vs_postamble( struct svga_shader_emitter *emit ); +static boolean emit_ps_postamble( struct svga_shader_emitter *emit ); + + + + +static unsigned +translate_opcode( + uint opcode ) +{ + switch (opcode) { + case TGSI_OPCODE_ABS: return SVGA3DOP_ABS; + case TGSI_OPCODE_ADD: return SVGA3DOP_ADD; + case TGSI_OPCODE_BREAKC: return SVGA3DOP_BREAKC; + case TGSI_OPCODE_DDX: return SVGA3DOP_DSX; + case TGSI_OPCODE_DDY: return SVGA3DOP_DSY; + case TGSI_OPCODE_DP2A: return SVGA3DOP_DP2ADD; + case TGSI_OPCODE_DP3: return SVGA3DOP_DP3; + case TGSI_OPCODE_DP4: return SVGA3DOP_DP4; + case TGSI_OPCODE_ENDFOR: return SVGA3DOP_ENDLOOP; + case TGSI_OPCODE_FRC: return SVGA3DOP_FRC; + case TGSI_OPCODE_BGNFOR: return SVGA3DOP_LOOP; + case TGSI_OPCODE_MAD: return SVGA3DOP_MAD; + case TGSI_OPCODE_MAX: return SVGA3DOP_MAX; + case TGSI_OPCODE_MIN: return SVGA3DOP_MIN; + case TGSI_OPCODE_MOV: return SVGA3DOP_MOV; + case TGSI_OPCODE_MUL: return SVGA3DOP_MUL; + case TGSI_OPCODE_NOP: return SVGA3DOP_NOP; + case TGSI_OPCODE_NRM4: return SVGA3DOP_NRM; + case TGSI_OPCODE_SSG: return SVGA3DOP_SGN; + default: + debug_printf("Unkown opcode %u\n", opcode); + assert( 0 ); + return SVGA3DOP_LAST_INST; + } +} + + +static unsigned translate_file( unsigned file ) +{ + switch (file) { + case TGSI_FILE_TEMPORARY: return SVGA3DREG_TEMP; + case TGSI_FILE_INPUT: return SVGA3DREG_INPUT; + case TGSI_FILE_OUTPUT: return SVGA3DREG_OUTPUT; /* VS3.0+ only */ + case TGSI_FILE_IMMEDIATE: return SVGA3DREG_CONST; + case TGSI_FILE_CONSTANT: return SVGA3DREG_CONST; + case TGSI_FILE_SAMPLER: return SVGA3DREG_SAMPLER; + case TGSI_FILE_ADDRESS: return SVGA3DREG_ADDR; + default: + assert( 0 ); + return SVGA3DREG_TEMP; + } +} + + + + + + +static SVGA3dShaderDestToken +translate_dst_register( struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn, + unsigned idx ) +{ + const struct tgsi_full_dst_register *reg = &insn->Dst[idx]; + SVGA3dShaderDestToken dest; + + switch (reg->Register.File) { + case TGSI_FILE_OUTPUT: + /* Output registers encode semantic information in their name. + * Need to lookup a table built at decl time: + */ + dest = emit->output_map[reg->Register.Index]; + break; + + default: + dest = dst_register( translate_file( reg->Register.File ), + reg->Register.Index ); + break; + } + + dest.mask = reg->Register.WriteMask; + + if (insn->Instruction.Saturate) + dest.dstMod = SVGA3DDSTMOD_SATURATE; + + return dest; +} + + +static struct src_register +swizzle( struct src_register src, + int x, + int y, + int z, + int w ) +{ + x = (src.base.swizzle >> (x * 2)) & 0x3; + y = (src.base.swizzle >> (y * 2)) & 0x3; + z = (src.base.swizzle >> (z * 2)) & 0x3; + w = (src.base.swizzle >> (w * 2)) & 0x3; + + src.base.swizzle = TRANSLATE_SWIZZLE(x,y,z,w); + + return src; +} + +static struct src_register +scalar( struct src_register src, + int comp ) +{ + return swizzle( src, comp, comp, comp, comp ); +} + +static INLINE boolean +svga_arl_needs_adjustment( const struct svga_shader_emitter *emit ) +{ + int i; + + for (i = 0; i < emit->num_arl_consts; ++i) { + if (emit->arl_consts[i].arl_num == emit->current_arl) + return TRUE; + } + return FALSE; +} + +static INLINE int +svga_arl_adjustment( const struct svga_shader_emitter *emit ) +{ + int i; + + for (i = 0; i < emit->num_arl_consts; ++i) { + if (emit->arl_consts[i].arl_num == emit->current_arl) + return emit->arl_consts[i].number; + } + return 0; +} + +static struct src_register +translate_src_register( const struct svga_shader_emitter *emit, + const struct tgsi_full_src_register *reg ) +{ + struct src_register src; + + switch (reg->Register.File) { + case TGSI_FILE_INPUT: + /* Input registers are referred to by their semantic name rather + * than by index. Use the mapping build up from the decls: + */ + src = emit->input_map[reg->Register.Index]; + break; + + case TGSI_FILE_IMMEDIATE: + /* Immediates are appended after TGSI constants in the D3D + * constant buffer. + */ + src = src_register( translate_file( reg->Register.File ), + reg->Register.Index + + emit->imm_start ); + break; + + default: + src = src_register( translate_file( reg->Register.File ), + reg->Register.Index ); + + break; + } + + /* Indirect addressing (for coninstant buffer lookups only) + */ + if (reg->Register.Indirect) + { + /* we shift the offset towards the minimum */ + if (svga_arl_needs_adjustment( emit )) { + src.base.num -= svga_arl_adjustment( emit ); + } + src.base.relAddr = 1; + + /* Not really sure what should go in the second token: + */ + src.indirect = src_token( SVGA3DREG_ADDR, + reg->Indirect.Index ); + + src.indirect.swizzle = SWIZZLE_XXXX; + } + + src = swizzle( src, + reg->Register.SwizzleX, + reg->Register.SwizzleY, + reg->Register.SwizzleZ, + reg->Register.SwizzleW ); + + /* src.mod isn't a bitfield, unfortunately: + * See tgsi_util_get_full_src_register_sign_mode for implementation details. + */ + if (reg->Register.Absolute) { + if (reg->Register.Negate) + src.base.srcMod = SVGA3DSRCMOD_ABSNEG; + else + src.base.srcMod = SVGA3DSRCMOD_ABS; + } + else { + if (reg->Register.Negate) + src.base.srcMod = SVGA3DSRCMOD_NEG; + else + src.base.srcMod = SVGA3DSRCMOD_NONE; + } + + return src; +} + + +/* + * Get a temporary register, return -1 if none available + */ +static INLINE SVGA3dShaderDestToken +get_temp( struct svga_shader_emitter *emit ) +{ + int i = emit->nr_hw_temp + emit->internal_temp_count++; + + return dst_register( SVGA3DREG_TEMP, i ); +} + +/* Release a single temp. Currently only effective if it was the last + * allocated temp, otherwise release will be delayed until the next + * call to reset_temp_regs(). + */ +static INLINE void +release_temp( struct svga_shader_emitter *emit, + SVGA3dShaderDestToken temp ) +{ + if (temp.num == emit->internal_temp_count - 1) + emit->internal_temp_count--; +} + +static void reset_temp_regs( struct svga_shader_emitter *emit ) +{ + emit->internal_temp_count = 0; +} + + +static boolean submit_op0( struct svga_shader_emitter *emit, + SVGA3dShaderInstToken inst, + SVGA3dShaderDestToken dest ) +{ + return (emit_instruction( emit, inst ) && + emit_dst( emit, dest )); +} + +static boolean submit_op1( struct svga_shader_emitter *emit, + SVGA3dShaderInstToken inst, + SVGA3dShaderDestToken dest, + struct src_register src0 ) +{ + return emit_op1( emit, inst, dest, src0 ); +} + + +/* SVGA shaders may not refer to >1 constant register in a single + * instruction. This function checks for that usage and inserts a + * move to temporary if detected. + * + * The same applies to input registers -- at most a single input + * register may be read by any instruction. + */ +static boolean submit_op2( struct svga_shader_emitter *emit, + SVGA3dShaderInstToken inst, + SVGA3dShaderDestToken dest, + struct src_register src0, + struct src_register src1 ) +{ + SVGA3dShaderDestToken temp; + SVGA3dShaderRegType type0, type1; + boolean need_temp = FALSE; + + temp.value = 0; + type0 = SVGA3dShaderGetRegType( src0.base.value ); + type1 = SVGA3dShaderGetRegType( src1.base.value ); + + if (type0 == SVGA3DREG_CONST && + type1 == SVGA3DREG_CONST && + src0.base.num != src1.base.num) + need_temp = TRUE; + + if (type0 == SVGA3DREG_INPUT && + type1 == SVGA3DREG_INPUT && + src0.base.num != src1.base.num) + need_temp = TRUE; + + if (need_temp) + { + temp = get_temp( emit ); + + if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp, src0 )) + return FALSE; + + src0 = src( temp ); + } + + if (!emit_op2( emit, inst, dest, src0, src1 )) + return FALSE; + + if (need_temp) + release_temp( emit, temp ); + + return TRUE; +} + + +/* SVGA shaders may not refer to >1 constant register in a single + * instruction. This function checks for that usage and inserts a + * move to temporary if detected. + */ +static boolean submit_op3( struct svga_shader_emitter *emit, + SVGA3dShaderInstToken inst, + SVGA3dShaderDestToken dest, + struct src_register src0, + struct src_register src1, + struct src_register src2 ) +{ + SVGA3dShaderDestToken temp0; + SVGA3dShaderDestToken temp1; + boolean need_temp0 = FALSE; + boolean need_temp1 = FALSE; + SVGA3dShaderRegType type0, type1, type2; + + temp0.value = 0; + temp1.value = 0; + type0 = SVGA3dShaderGetRegType( src0.base.value ); + type1 = SVGA3dShaderGetRegType( src1.base.value ); + type2 = SVGA3dShaderGetRegType( src2.base.value ); + + if (inst.op != SVGA3DOP_SINCOS) { + if (type0 == SVGA3DREG_CONST && + ((type1 == SVGA3DREG_CONST && src0.base.num != src1.base.num) || + (type2 == SVGA3DREG_CONST && src0.base.num != src2.base.num))) + need_temp0 = TRUE; + + if (type1 == SVGA3DREG_CONST && + (type2 == SVGA3DREG_CONST && src1.base.num != src2.base.num)) + need_temp1 = TRUE; + } + + if (type0 == SVGA3DREG_INPUT && + ((type1 == SVGA3DREG_INPUT && src0.base.num != src1.base.num) || + (type2 == SVGA3DREG_INPUT && src0.base.num != src2.base.num))) + need_temp0 = TRUE; + + if (type1 == SVGA3DREG_INPUT && + (type2 == SVGA3DREG_INPUT && src1.base.num != src2.base.num)) + need_temp1 = TRUE; + + if (need_temp0) + { + temp0 = get_temp( emit ); + + if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp0, src0 )) + return FALSE; + + src0 = src( temp0 ); + } + + if (need_temp1) + { + temp1 = get_temp( emit ); + + if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp1, src1 )) + return FALSE; + + src1 = src( temp1 ); + } + + if (!emit_op3( emit, inst, dest, src0, src1, src2 )) + return FALSE; + + if (need_temp1) + release_temp( emit, temp1 ); + if (need_temp0) + release_temp( emit, temp0 ); + return TRUE; +} + + +static boolean emit_def_const( struct svga_shader_emitter *emit, + SVGA3dShaderConstType type, + unsigned idx, + float a, + float b, + float c, + float d ) +{ + SVGA3DOpDefArgs def; + SVGA3dShaderInstToken opcode; + + switch (type) { + case SVGA3D_CONST_TYPE_FLOAT: + opcode = inst_token( SVGA3DOP_DEF ); + def.dst = dst_register( SVGA3DREG_CONST, idx ); + def.constValues[0] = a; + def.constValues[1] = b; + def.constValues[2] = c; + def.constValues[3] = d; + break; + case SVGA3D_CONST_TYPE_INT: + opcode = inst_token( SVGA3DOP_DEFI ); + def.dst = dst_register( SVGA3DREG_CONSTINT, idx ); + def.constIValues[0] = (int)a; + def.constIValues[1] = (int)b; + def.constIValues[2] = (int)c; + def.constIValues[3] = (int)d; + break; + default: + assert(0); + break; + } + + if (!emit_instruction(emit, opcode) || + !svga_shader_emit_dwords( emit, def.values, Elements(def.values))) + return FALSE; + + return TRUE; +} + +static INLINE boolean +create_zero_immediate( struct svga_shader_emitter *emit ) +{ + unsigned idx = emit->nr_hw_const++; + + if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, + idx, 0, 0, 0, 1 )) + return FALSE; + + emit->zero_immediate_idx = idx; + emit->created_zero_immediate = TRUE; + + return TRUE; +} + +static INLINE boolean +create_loop_const( struct svga_shader_emitter *emit ) +{ + unsigned idx = emit->nr_hw_const++; + + if (!emit_def_const( emit, SVGA3D_CONST_TYPE_INT, idx, + 255, /* iteration count */ + 0, /* initial value */ + 1, /* step size */ + 0 /* not used, must be 0 */)) + return FALSE; + + emit->loop_const_idx = idx; + emit->created_loop_const = TRUE; + + return TRUE; +} + +static INLINE boolean +create_sincos_consts( struct svga_shader_emitter *emit ) +{ + unsigned idx = emit->nr_hw_const++; + + if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx, + -1.5500992e-006f, + -2.1701389e-005f, + 0.0026041667f, + 0.00026041668f )) + return FALSE; + + emit->sincos_consts_idx = idx; + idx = emit->nr_hw_const++; + + if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx, + -0.020833334f, + -0.12500000f, + 1.0f, + 0.50000000f )) + return FALSE; + + emit->created_sincos_consts = TRUE; + + return TRUE; +} + +static INLINE boolean +create_arl_consts( struct svga_shader_emitter *emit ) +{ + int i; + + for (i = 0; i < emit->num_arl_consts; i += 4) { + int j; + unsigned idx = emit->nr_hw_const++; + float vals[4]; + for (j = 0; j < 4 && (j + i) < emit->num_arl_consts; ++j) { + vals[j] = emit->arl_consts[i + j].number; + emit->arl_consts[i + j].idx = idx; + switch (j) { + case 0: + emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_X; + break; + case 1: + emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_Y; + break; + case 2: + emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_Z; + break; + case 3: + emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_W; + break; + } + } + while (j < 4) + vals[j++] = 0; + + if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx, + vals[0], vals[1], + vals[2], vals[3])) + return FALSE; + } + + return TRUE; +} + +static INLINE struct src_register +get_vface( struct svga_shader_emitter *emit ) +{ + assert(emit->emitted_vface); + return src_register(SVGA3DREG_MISCTYPE, + SVGA3DMISCREG_FACE); +} + +/* returns {0, 0, 0, 1} immediate */ +static INLINE struct src_register +get_zero_immediate( struct svga_shader_emitter *emit ) +{ + assert(emit->created_zero_immediate); + assert(emit->zero_immediate_idx >= 0); + return src_register( SVGA3DREG_CONST, + emit->zero_immediate_idx ); +} + +/* returns the loop const */ +static INLINE struct src_register +get_loop_const( struct svga_shader_emitter *emit ) +{ + assert(emit->created_loop_const); + assert(emit->loop_const_idx >= 0); + return src_register( SVGA3DREG_CONSTINT, + emit->loop_const_idx ); +} + +/* returns a sincos const */ +static INLINE struct src_register +get_sincos_const( struct svga_shader_emitter *emit, + unsigned index ) +{ + assert(emit->created_sincos_consts); + assert(emit->sincos_consts_idx >= 0); + assert(index == 0 || index == 1); + return src_register( SVGA3DREG_CONST, + emit->sincos_consts_idx + index ); +} + +static INLINE struct src_register +get_fake_arl_const( struct svga_shader_emitter *emit ) +{ + struct src_register reg; + int idx = 0, swizzle = 0, i; + + for (i = 0; i < emit->num_arl_consts; ++ i) { + if (emit->arl_consts[i].arl_num == emit->current_arl) { + idx = emit->arl_consts[i].idx; + swizzle = emit->arl_consts[i].swizzle; + } + } + + reg = src_register( SVGA3DREG_CONST, idx ); + return scalar(reg, swizzle); +} + +static INLINE struct src_register +get_tex_dimensions( struct svga_shader_emitter *emit, int sampler_num ) +{ + int idx; + struct src_register reg; + + /* the width/height indexes start right after constants */ + idx = emit->key.fkey.tex[sampler_num].width_height_idx + + emit->info.file_max[TGSI_FILE_CONSTANT] + 1; + + reg = src_register( SVGA3DREG_CONST, idx ); + return reg; +} + +static boolean emit_fake_arl(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn) +{ + const struct src_register src0 = translate_src_register( + emit, &insn->Src[0] ); + struct src_register src1 = get_fake_arl_const( emit ); + SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); + SVGA3dShaderDestToken tmp = get_temp( emit ); + + if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), tmp, src0)) + return FALSE; + + if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), tmp, src( tmp ), + src1)) + return FALSE; + + /* replicate the original swizzle */ + src1 = src(tmp); + src1.base.swizzle = src0.base.swizzle; + + return submit_op1( emit, inst_token( SVGA3DOP_MOVA ), + dst, src1 ); +} + +static boolean emit_if(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn) +{ + const struct src_register src = translate_src_register( + emit, &insn->Src[0] ); + struct src_register zero = get_zero_immediate( emit ); + SVGA3dShaderInstToken if_token = inst_token( SVGA3DOP_IFC ); + + if_token.control = SVGA3DOPCOMPC_NE; + zero = scalar(zero, TGSI_SWIZZLE_X); + + return (emit_instruction( emit, if_token ) && + emit_src( emit, src ) && + emit_src( emit, zero ) ); +} + +static boolean emit_endif(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn) +{ + return (emit_instruction( emit, + inst_token( SVGA3DOP_ENDIF ))); +} + +static boolean emit_else(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn) +{ + return (emit_instruction( emit, + inst_token( SVGA3DOP_ELSE ))); +} + +/* Translate the following TGSI FLR instruction. + * FLR DST, SRC + * To the following SVGA3D instruction sequence. + * FRC TMP, SRC + * SUB DST, SRC, TMP + */ +static boolean emit_floor(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn ) +{ + SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); + const struct src_register src0 = translate_src_register( + emit, &insn->Src[0] ); + SVGA3dShaderDestToken temp = get_temp( emit ); + + /* FRC TMP, SRC */ + if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ), temp, src0 )) + return FALSE; + + /* SUB DST, SRC, TMP */ + if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, src0, + negate( src( temp ) ) )) + return FALSE; + + return TRUE; +} + + +/* Translate the following TGSI CMP instruction. + * CMP DST, SRC0, SRC1, SRC2 + * To the following SVGA3D instruction sequence. + * CMP DST, SRC0, SRC2, SRC1 + */ +static boolean emit_cmp(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn ) +{ + SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); + const struct src_register src0 = translate_src_register( + emit, &insn->Src[0] ); + const struct src_register src1 = translate_src_register( + emit, &insn->Src[1] ); + const struct src_register src2 = translate_src_register( + emit, &insn->Src[2] ); + + /* CMP DST, SRC0, SRC2, SRC1 */ + return submit_op3( emit, inst_token( SVGA3DOP_CMP ), dst, src0, src2, src1); +} + + + +/* Translate the following TGSI DIV instruction. + * DIV DST.xy, SRC0, SRC1 + * To the following SVGA3D instruction sequence. + * RCP TMP.x, SRC1.xxxx + * RCP TMP.y, SRC1.yyyy + * MUL DST.xy, SRC0, TMP + */ +static boolean emit_div(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn ) +{ + SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); + const struct src_register src0 = translate_src_register( + emit, &insn->Src[0] ); + const struct src_register src1 = translate_src_register( + emit, &insn->Src[1] ); + SVGA3dShaderDestToken temp = get_temp( emit ); + int i; + + /* For each enabled element, perform a RCP instruction. Note that + * RCP is scalar in SVGA3D: + */ + for (i = 0; i < 4; i++) { + unsigned channel = 1 << i; + if (dst.mask & channel) { + /* RCP TMP.?, SRC1.???? */ + if (!submit_op1( emit, inst_token( SVGA3DOP_RCP ), + writemask(temp, channel), + scalar(src1, i) )) + return FALSE; + } + } + + /* Then multiply them out with a single mul: + * + * MUL DST, SRC0, TMP + */ + if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), dst, src0, + src( temp ) )) + return FALSE; + + return TRUE; +} + +/* Translate the following TGSI DP2 instruction. + * DP2 DST, SRC1, SRC2 + * To the following SVGA3D instruction sequence. + * MUL TMP, SRC1, SRC2 + * ADD DST, TMP.xxxx, TMP.yyyy + */ +static boolean emit_dp2(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn ) +{ + SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); + const struct src_register src0 = translate_src_register( + emit, &insn->Src[0] ); + const struct src_register src1 = translate_src_register( + emit, &insn->Src[1] ); + SVGA3dShaderDestToken temp = get_temp( emit ); + struct src_register temp_src0, temp_src1; + + /* MUL TMP, SRC1, SRC2 */ + if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), temp, src0, src1 )) + return FALSE; + + temp_src0 = scalar(src( temp ), TGSI_SWIZZLE_X); + temp_src1 = scalar(src( temp ), TGSI_SWIZZLE_Y); + + /* ADD DST, TMP.xxxx, TMP.yyyy */ + if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, + temp_src0, temp_src1 )) + return FALSE; + + return TRUE; +} + + +/* Translate the following TGSI DPH instruction. + * DPH DST, SRC1, SRC2 + * To the following SVGA3D instruction sequence. + * DP3 TMP, SRC1, SRC2 + * ADD DST, TMP, SRC2.wwww + */ +static boolean emit_dph(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn ) +{ + SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); + const struct src_register src0 = translate_src_register( + emit, &insn->Src[0] ); + struct src_register src1 = translate_src_register( + emit, &insn->Src[1] ); + SVGA3dShaderDestToken temp = get_temp( emit ); + + /* DP3 TMP, SRC1, SRC2 */ + if (!submit_op2( emit, inst_token( SVGA3DOP_DP3 ), temp, src0, src1 )) + return FALSE; + + src1 = scalar(src1, TGSI_SWIZZLE_W); + + /* ADD DST, TMP, SRC2.wwww */ + if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, + src( temp ), src1 )) + return FALSE; + + return TRUE; +} + +/* Translate the following TGSI DST instruction. + * NRM DST, SRC + * To the following SVGA3D instruction sequence. + * DP3 TMP, SRC, SRC + * RSQ TMP, TMP + * MUL DST, SRC, TMP + */ +static boolean emit_nrm(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn ) +{ + SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); + const struct src_register src0 = translate_src_register( + emit, &insn->Src[0] ); + SVGA3dShaderDestToken temp = get_temp( emit ); + + /* DP3 TMP, SRC, SRC */ + if (!submit_op2( emit, inst_token( SVGA3DOP_DP3 ), temp, src0, src0 )) + return FALSE; + + /* RSQ TMP, TMP */ + if (!submit_op1( emit, inst_token( SVGA3DOP_RSQ ), temp, src( temp ))) + return FALSE; + + /* MUL DST, SRC, TMP */ + if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), dst, + src0, src( temp ))) + return FALSE; + + return TRUE; + +} + +static boolean do_emit_sincos(struct svga_shader_emitter *emit, + SVGA3dShaderDestToken dst, + struct src_register src0) +{ + src0 = scalar(src0, TGSI_SWIZZLE_X); + + if (emit->use_sm30) { + return submit_op1( emit, inst_token( SVGA3DOP_SINCOS ), + dst, src0 ); + } else { + struct src_register const1 = get_sincos_const( emit, 0 ); + struct src_register const2 = get_sincos_const( emit, 1 ); + + return submit_op3( emit, inst_token( SVGA3DOP_SINCOS ), + dst, src0, const1, const2 ); + } +} + +static boolean emit_sincos(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn) +{ + SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); + struct src_register src0 = translate_src_register( + emit, &insn->Src[0] ); + SVGA3dShaderDestToken temp = get_temp( emit ); + + /* SCS TMP SRC */ + if (!do_emit_sincos(emit, writemask(temp, TGSI_WRITEMASK_XY), src0 )) + return FALSE; + + /* MOV DST TMP */ + if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src( temp ) )) + return FALSE; + + return TRUE; +} + +/* + * SCS TMP SRC + * MOV DST TMP.yyyy + */ +static boolean emit_sin(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn ) +{ + SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); + struct src_register src0 = translate_src_register( + emit, &insn->Src[0] ); + SVGA3dShaderDestToken temp = get_temp( emit ); + + /* SCS TMP SRC */ + if (!do_emit_sincos(emit, writemask(temp, TGSI_WRITEMASK_Y), src0)) + return FALSE; + + src0 = scalar(src( temp ), TGSI_SWIZZLE_Y); + + /* MOV DST TMP.yyyy */ + if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src0 )) + return FALSE; + + return TRUE; +} + +/* + * SCS TMP SRC + * MOV DST TMP.xxxx + */ +static boolean emit_cos(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn ) +{ + SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); + struct src_register src0 = translate_src_register( + emit, &insn->Src[0] ); + SVGA3dShaderDestToken temp = get_temp( emit ); + + /* SCS TMP SRC */ + if (!do_emit_sincos( emit, writemask(temp, TGSI_WRITEMASK_X), src0 )) + return FALSE; + + src0 = scalar(src( temp ), TGSI_SWIZZLE_X); + + /* MOV DST TMP.xxxx */ + if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src0 )) + return FALSE; + + return TRUE; +} + + +/* + * ADD DST SRC0, negate(SRC0) + */ +static boolean emit_sub(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn) +{ + SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); + struct src_register src0 = translate_src_register( + emit, &insn->Src[0] ); + struct src_register src1 = translate_src_register( + emit, &insn->Src[1] ); + + src1 = negate(src1); + + if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, + src0, src1 )) + return FALSE; + + return TRUE; +} + + +static boolean emit_kil(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn ) +{ + SVGA3dShaderInstToken inst; + const struct tgsi_full_src_register *reg = &insn->Src[0]; + struct src_register src0; + + inst = inst_token( SVGA3DOP_TEXKILL ); + src0 = translate_src_register( emit, reg ); + + if (reg->Register.Absolute || + reg->Register.Negate || + reg->Register.Indirect || + reg->Register.SwizzleX != 0 || + reg->Register.SwizzleY != 1 || + reg->Register.SwizzleZ != 2 || + reg->Register.File != TGSI_FILE_TEMPORARY) + { + SVGA3dShaderDestToken temp = get_temp( emit ); + + submit_op1( emit, inst_token( SVGA3DOP_MOV ), temp, src0 ); + src0 = src( temp ); + } + + return submit_op0( emit, inst, dst(src0) ); +} + + +/* mesa state tracker always emits kilp as an unconditional + * kil */ +static boolean emit_kilp(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn ) +{ + SVGA3dShaderInstToken inst; + SVGA3dShaderDestToken temp; + struct src_register one = get_zero_immediate( emit ); + + inst = inst_token( SVGA3DOP_TEXKILL ); + one = scalar( one, TGSI_SWIZZLE_W ); + + /* texkill doesn't allow negation on the operand so lets move + * negation of {1} to a temp register */ + temp = get_temp( emit ); + if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), temp, + negate( one ) )) + return FALSE; + + return submit_op0( emit, inst, temp ); +} + +/* Implement conditionals by initializing destination reg to 'fail', + * then set predicate reg with UFOP_SETP, then move 'pass' to dest + * based on predicate reg. + * + * SETP src0, cmp, src1 -- do this first to avoid aliasing problems. + * MOV dst, fail + * MOV dst, pass, p0 + */ +static boolean +emit_conditional(struct svga_shader_emitter *emit, + unsigned compare_func, + SVGA3dShaderDestToken dst, + struct src_register src0, + struct src_register src1, + struct src_register pass, + struct src_register fail) +{ + SVGA3dShaderDestToken pred_reg = dst_register( SVGA3DREG_PREDICATE, 0 ); + SVGA3dShaderInstToken setp_token, mov_token; + setp_token = inst_token( SVGA3DOP_SETP ); + + switch (compare_func) { + case PIPE_FUNC_NEVER: + return submit_op1( emit, inst_token( SVGA3DOP_MOV ), + dst, fail ); + break; + case PIPE_FUNC_LESS: + setp_token.control = SVGA3DOPCOMP_LT; + break; + case PIPE_FUNC_EQUAL: + setp_token.control = SVGA3DOPCOMP_EQ; + break; + case PIPE_FUNC_LEQUAL: + setp_token.control = SVGA3DOPCOMP_LE; + break; + case PIPE_FUNC_GREATER: + setp_token.control = SVGA3DOPCOMP_GT; + break; + case PIPE_FUNC_NOTEQUAL: + setp_token.control = SVGA3DOPCOMPC_NE; + break; + case PIPE_FUNC_GEQUAL: + setp_token.control = SVGA3DOPCOMP_GE; + break; + case PIPE_FUNC_ALWAYS: + return submit_op1( emit, inst_token( SVGA3DOP_MOV ), + dst, pass ); + break; + } + + /* SETP src0, COMPOP, src1 */ + if (!submit_op2( emit, setp_token, pred_reg, + src0, src1 )) + return FALSE; + + mov_token = inst_token( SVGA3DOP_MOV ); + + /* MOV dst, fail */ + if (!submit_op1( emit, mov_token, dst, + fail )) + return FALSE; + + /* MOV dst, pass (predicated) + * + * Note that the predicate reg (and possible modifiers) is passed + * as the first source argument. + */ + mov_token.predicated = 1; + if (!submit_op2( emit, mov_token, dst, + src( pred_reg ), pass )) + return FALSE; + + return TRUE; +} + + +static boolean +emit_select(struct svga_shader_emitter *emit, + unsigned compare_func, + SVGA3dShaderDestToken dst, + struct src_register src0, + struct src_register src1 ) +{ + /* There are some SVGA instructions which implement some selects + * directly, but they are only available in the vertex shader. + */ + if (emit->unit == PIPE_SHADER_VERTEX) { + switch (compare_func) { + case PIPE_FUNC_GEQUAL: + return submit_op2( emit, inst_token( SVGA3DOP_SGE ), dst, src0, src1 ); + case PIPE_FUNC_LEQUAL: + return submit_op2( emit, inst_token( SVGA3DOP_SGE ), dst, src1, src0 ); + case PIPE_FUNC_GREATER: + return submit_op2( emit, inst_token( SVGA3DOP_SLT ), dst, src1, src0 ); + case PIPE_FUNC_LESS: + return submit_op2( emit, inst_token( SVGA3DOP_SLT ), dst, src0, src1 ); + default: + break; + } + } + + + /* Otherwise, need to use the setp approach: + */ + { + struct src_register one, zero; + /* zero immediate is 0,0,0,1 */ + zero = get_zero_immediate( emit ); + one = scalar( zero, TGSI_SWIZZLE_W ); + zero = scalar( zero, TGSI_SWIZZLE_X ); + + return emit_conditional( + emit, + compare_func, + dst, + src0, + src1, + one, zero); + } +} + + +static boolean emit_select_op(struct svga_shader_emitter *emit, + unsigned compare, + const struct tgsi_full_instruction *insn) +{ + SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); + struct src_register src0 = translate_src_register( + emit, &insn->Src[0] ); + struct src_register src1 = translate_src_register( + emit, &insn->Src[1] ); + + return emit_select( emit, compare, dst, src0, src1 ); +} + + +/* Translate texture instructions to SVGA3D representation. + */ +static boolean emit_tex2(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn, + SVGA3dShaderDestToken dst ) +{ + SVGA3dShaderInstToken inst; + struct src_register src0; + struct src_register src1; + + inst.value = 0; + inst.op = SVGA3DOP_TEX; + + switch (insn->Instruction.Opcode) { + case TGSI_OPCODE_TEX: + break; + case TGSI_OPCODE_TXP: + inst.control = SVGA3DOPCONT_PROJECT; + break; + case TGSI_OPCODE_TXB: + inst.control = SVGA3DOPCONT_BIAS; + break; + default: + assert(0); + return FALSE; + } + + src0 = translate_src_register( emit, &insn->Src[0] ); + src1 = translate_src_register( emit, &insn->Src[1] ); + + if (emit->key.fkey.tex[src1.base.num].unnormalized) { + struct src_register wh = get_tex_dimensions( emit, src1.base.num ); + SVGA3dShaderDestToken tmp = get_temp( emit ); + + /* MUL tmp, SRC0, WH */ + if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), + tmp, src0, wh )) + return FALSE; + src0 = src( tmp ); + } + + return submit_op2( emit, inst, dst, src0, src1 ); +} + + + + +/* Translate texture instructions to SVGA3D representation. + */ +static boolean emit_tex3(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn, + SVGA3dShaderDestToken dst ) +{ + SVGA3dShaderInstToken inst; + struct src_register src0; + struct src_register src1; + struct src_register src2; + + inst.value = 0; + + switch (insn->Instruction.Opcode) { + case TGSI_OPCODE_TXD: + inst.op = SVGA3DOP_TEXLDD; + break; + case TGSI_OPCODE_TXL: + inst.op = SVGA3DOP_TEXLDL; + break; + } + + src0 = translate_src_register( emit, &insn->Src[0] ); + src1 = translate_src_register( emit, &insn->Src[1] ); + src2 = translate_src_register( emit, &insn->Src[2] ); + + return submit_op3( emit, inst, dst, src0, src1, src2 ); +} + + +static boolean emit_tex(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn ) +{ + SVGA3dShaderDestToken dst = + translate_dst_register( emit, insn, 0 ); + struct src_register src0 = + translate_src_register( emit, &insn->Src[0] ); + struct src_register src1 = + translate_src_register( emit, &insn->Src[1] ); + + SVGA3dShaderDestToken tex_result; + + /* check for shadow samplers */ + boolean compare = (emit->key.fkey.tex[src1.base.num].compare_mode == + PIPE_TEX_COMPARE_R_TO_TEXTURE); + + + /* If doing compare processing, need to put this value into a + * temporary so it can be used as a source later on. + */ + if (compare || + (!emit->use_sm30 && dst.mask != TGSI_WRITEMASK_XYZW) ) { + tex_result = get_temp( emit ); + } + else { + tex_result = dst; + } + + switch(insn->Instruction.Opcode) { + case TGSI_OPCODE_TEX: + case TGSI_OPCODE_TXB: + case TGSI_OPCODE_TXP: + if (!emit_tex2( emit, insn, tex_result )) + return FALSE; + break; + case TGSI_OPCODE_TXL: + case TGSI_OPCODE_TXD: + if (!emit_tex3( emit, insn, tex_result )) + return FALSE; + break; + default: + assert(0); + } + + + if (compare) { + SVGA3dShaderDestToken src0_zdivw = get_temp( emit ); + struct src_register tex_src_x = scalar(src(tex_result), TGSI_SWIZZLE_Y); + struct src_register one = + scalar( get_zero_immediate( emit ), TGSI_SWIZZLE_W ); + + /* Divide texcoord R by Q */ + if (!submit_op1( emit, inst_token( SVGA3DOP_RCP ), + src0_zdivw, + scalar(src0, TGSI_SWIZZLE_W) )) + return FALSE; + + if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), + src0_zdivw, + scalar(src0, TGSI_SWIZZLE_Z), + src(src0_zdivw) )) + return FALSE; + + if (!emit_select( + emit, + emit->key.fkey.tex[src1.base.num].compare_func, + dst, + src(src0_zdivw), + tex_src_x)) + return FALSE; + + return submit_op1( emit, inst_token( SVGA3DOP_MOV ), + writemask( dst, TGSI_WRITEMASK_W), + one ); + } + else if (!emit->use_sm30 && dst.mask != TGSI_WRITEMASK_XYZW) + { + if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src(tex_result) )) + return FALSE; + } + + return TRUE; +} + +static boolean emit_bgnloop2( struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn ) +{ + SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_LOOP ); + struct src_register loop_reg = src_register( SVGA3DREG_LOOP, 0 ); + struct src_register const_int = get_loop_const( emit ); + + return (emit_instruction( emit, inst ) && + emit_src( emit, loop_reg ) && + emit_src( emit, const_int ) ); +} + +static boolean emit_endloop2( struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn ) +{ + SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_ENDLOOP ); + return emit_instruction( emit, inst ); +} + +static boolean emit_brk( struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn ) +{ + SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_BREAK ); + return emit_instruction( emit, inst ); +} + +static boolean emit_scalar_op1( struct svga_shader_emitter *emit, + unsigned opcode, + const struct tgsi_full_instruction *insn ) +{ + SVGA3dShaderInstToken inst; + SVGA3dShaderDestToken dst; + struct src_register src; + + inst = inst_token( opcode ); + dst = translate_dst_register( emit, insn, 0 ); + src = translate_src_register( emit, &insn->Src[0] ); + src = scalar( src, TGSI_SWIZZLE_X ); + + return submit_op1( emit, inst, dst, src ); +} + + +static boolean emit_simple_instruction(struct svga_shader_emitter *emit, + unsigned opcode, + const struct tgsi_full_instruction *insn ) +{ + const struct tgsi_full_src_register *src = insn->Src; + SVGA3dShaderInstToken inst; + SVGA3dShaderDestToken dst; + + inst = inst_token( opcode ); + dst = translate_dst_register( emit, insn, 0 ); + + switch (insn->Instruction.NumSrcRegs) { + case 0: + return submit_op0( emit, inst, dst ); + case 1: + return submit_op1( emit, inst, dst, + translate_src_register( emit, &src[0] )); + case 2: + return submit_op2( emit, inst, dst, + translate_src_register( emit, &src[0] ), + translate_src_register( emit, &src[1] ) ); + case 3: + return submit_op3( emit, inst, dst, + translate_src_register( emit, &src[0] ), + translate_src_register( emit, &src[1] ), + translate_src_register( emit, &src[2] ) ); + default: + assert(0); + return FALSE; + } +} + +static boolean emit_arl(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn) +{ + ++emit->current_arl; + if (svga_arl_needs_adjustment( emit )) { + return emit_fake_arl( emit, insn ); + } else { + /* no need to adjust, just emit straight arl */ + return emit_simple_instruction(emit, SVGA3DOP_MOVA, insn); + } +} + +static boolean alias_src_dst( struct src_register src, + SVGA3dShaderDestToken dst ) +{ + if (src.base.num != dst.num) + return FALSE; + + if (SVGA3dShaderGetRegType(dst.value) != + SVGA3dShaderGetRegType(src.base.value)) + return FALSE; + + return TRUE; +} + +static boolean emit_pow(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn) +{ + SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); + struct src_register src0 = translate_src_register( + emit, &insn->Src[0] ); + struct src_register src1 = translate_src_register( + emit, &insn->Src[1] ); + boolean need_tmp = FALSE; + + /* POW can only output to a temporary */ + if (insn->Dst[0].Register.File != TGSI_FILE_TEMPORARY) + need_tmp = TRUE; + + /* POW src1 must not be the same register as dst */ + if (alias_src_dst( src1, dst )) + need_tmp = TRUE; + + /* it's a scalar op */ + src0 = scalar( src0, TGSI_SWIZZLE_X ); + src1 = scalar( src1, TGSI_SWIZZLE_X ); + + if (need_tmp) { + SVGA3dShaderDestToken tmp = writemask(get_temp( emit ), TGSI_WRITEMASK_X ); + + if (!submit_op2(emit, inst_token( SVGA3DOP_POW ), tmp, src0, src1)) + return FALSE; + + return submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, scalar(src(tmp), 0) ); + } + else { + return submit_op2(emit, inst_token( SVGA3DOP_POW ), dst, src0, src1); + } +} + +static boolean emit_xpd(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn) +{ + SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); + const struct src_register src0 = translate_src_register( + emit, &insn->Src[0] ); + const struct src_register src1 = translate_src_register( + emit, &insn->Src[1] ); + boolean need_dst_tmp = FALSE; + + /* XPD can only output to a temporary */ + if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP) + need_dst_tmp = TRUE; + + /* The dst reg must not be the same as src0 or src1*/ + if (alias_src_dst(src0, dst) || + alias_src_dst(src1, dst)) + need_dst_tmp = TRUE; + + if (need_dst_tmp) { + SVGA3dShaderDestToken tmp = get_temp( emit ); + + /* Obey DX9 restrictions on mask: + */ + tmp.mask = dst.mask & TGSI_WRITEMASK_XYZ; + + if (!submit_op2(emit, inst_token( SVGA3DOP_CRS ), tmp, src0, src1)) + return FALSE; + + if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp ))) + return FALSE; + } + else { + if (!submit_op2(emit, inst_token( SVGA3DOP_CRS ), dst, src0, src1)) + return FALSE; + } + + /* Need to emit 1.0 to dst.w? + */ + if (dst.mask & TGSI_WRITEMASK_W) { + struct src_register zero = get_zero_immediate( emit ); + + if (!submit_op1(emit, + inst_token( SVGA3DOP_MOV ), + writemask(dst, TGSI_WRITEMASK_W), + zero)) + return FALSE; + } + + return TRUE; +} + + +static boolean emit_lrp(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn) +{ + SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); + SVGA3dShaderDestToken tmp; + const struct src_register src0 = translate_src_register( + emit, &insn->Src[0] ); + const struct src_register src1 = translate_src_register( + emit, &insn->Src[1] ); + const struct src_register src2 = translate_src_register( + emit, &insn->Src[2] ); + boolean need_dst_tmp = FALSE; + + /* The dst reg must not be the same as src0 or src2 */ + if (alias_src_dst(src0, dst) || + alias_src_dst(src2, dst)) + need_dst_tmp = TRUE; + + if (need_dst_tmp) { + tmp = get_temp( emit ); + tmp.mask = dst.mask; + } + else { + tmp = dst; + } + + if (!submit_op3(emit, inst_token( SVGA3DOP_LRP ), tmp, src0, src1, src2)) + return FALSE; + + if (need_dst_tmp) { + if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp ))) + return FALSE; + } + + return TRUE; +} + + +static boolean emit_dst_insn(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn ) +{ + if (emit->unit == PIPE_SHADER_VERTEX) { + /* SVGA/DX9 has a DST instruction, but only for vertex shaders: + */ + return emit_simple_instruction(emit, SVGA3DOP_DST, insn); + } + else { + + /* result[0] = 1 * 1; + * result[1] = a[1] * b[1]; + * result[2] = a[2] * 1; + * result[3] = 1 * b[3]; + */ + + SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); + SVGA3dShaderDestToken tmp; + const struct src_register src0 = translate_src_register( + emit, &insn->Src[0] ); + const struct src_register src1 = translate_src_register( + emit, &insn->Src[1] ); + struct src_register zero = get_zero_immediate( emit ); + boolean need_tmp = FALSE; + + if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP || + alias_src_dst(src0, dst) || + alias_src_dst(src1, dst)) + need_tmp = TRUE; + + if (need_tmp) { + tmp = get_temp( emit ); + } + else { + tmp = dst; + } + + /* tmp.xw = 1.0 + */ + if (tmp.mask & TGSI_WRITEMASK_XW) { + if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), + writemask(tmp, TGSI_WRITEMASK_XW ), + scalar( zero, 3 ))) + return FALSE; + } + + /* tmp.yz = src0 + */ + if (tmp.mask & TGSI_WRITEMASK_YZ) { + if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), + writemask(tmp, TGSI_WRITEMASK_YZ ), + src0)) + return FALSE; + } + + /* tmp.yw = tmp * src1 + */ + if (tmp.mask & TGSI_WRITEMASK_YW) { + if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), + writemask(tmp, TGSI_WRITEMASK_YW ), + src(tmp), + src1)) + return FALSE; + } + + /* dst = tmp + */ + if (need_tmp) { + if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), + dst, + src(tmp))) + return FALSE; + } + } + + return TRUE; +} + + +static boolean emit_exp(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn) +{ + SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); + struct src_register src0 = + translate_src_register( emit, &insn->Src[0] ); + struct src_register zero = get_zero_immediate( emit ); + SVGA3dShaderDestToken fraction; + + if (dst.mask & TGSI_WRITEMASK_Y) + fraction = dst; + else if (dst.mask & TGSI_WRITEMASK_X) + fraction = get_temp( emit ); + + /* If y is being written, fill it with src0 - floor(src0). + */ + if (dst.mask & TGSI_WRITEMASK_XY) { + if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ), + writemask( fraction, TGSI_WRITEMASK_Y ), + src0 )) + return FALSE; + } + + /* If x is being written, fill it with 2 ^ floor(src0). + */ + if (dst.mask & TGSI_WRITEMASK_X) { + if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), + writemask( dst, dst.mask & TGSI_WRITEMASK_X ), + src0, + scalar( negate( src( fraction ) ), TGSI_SWIZZLE_Y ) ) ) + return FALSE; + + if (!submit_op1( emit, inst_token( SVGA3DOP_EXP ), + writemask( dst, dst.mask & TGSI_WRITEMASK_X ), + scalar( src( dst ), TGSI_SWIZZLE_X ) ) ) + return FALSE; + + if (!(dst.mask & TGSI_WRITEMASK_Y)) + release_temp( emit, fraction ); + } + + /* If z is being written, fill it with 2 ^ src0 (partial precision). + */ + if (dst.mask & TGSI_WRITEMASK_Z) { + if (!submit_op1( emit, inst_token( SVGA3DOP_EXPP ), + writemask( dst, dst.mask & TGSI_WRITEMASK_Z ), + src0 ) ) + return FALSE; + } + + /* If w is being written, fill it with one. + */ + if (dst.mask & TGSI_WRITEMASK_W) { + if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), + writemask(dst, TGSI_WRITEMASK_W), + scalar( zero, TGSI_SWIZZLE_W ) )) + return FALSE; + } + + return TRUE; +} + +static boolean emit_lit(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn ) +{ + if (emit->unit == PIPE_SHADER_VERTEX) { + /* SVGA/DX9 has a LIT instruction, but only for vertex shaders: + */ + return emit_simple_instruction(emit, SVGA3DOP_LIT, insn); + } + else { + + /* D3D vs. GL semantics can be fairly easily accomodated by + * variations on this sequence. + * + * GL: + * tmp.y = src.x + * tmp.z = pow(src.y,src.w) + * p0 = src0.xxxx > 0 + * result = zero.wxxw + * (p0) result.yz = tmp + * + * D3D: + * tmp.y = src.x + * tmp.z = pow(src.y,src.w) + * p0 = src0.xxyy > 0 + * result = zero.wxxw + * (p0) result.yz = tmp + * + * Will implement the GL version for now. + */ + + SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); + SVGA3dShaderDestToken tmp = get_temp( emit ); + const struct src_register src0 = translate_src_register( + emit, &insn->Src[0] ); + struct src_register zero = get_zero_immediate( emit ); + + /* tmp = pow(src.y, src.w) + */ + if (dst.mask & TGSI_WRITEMASK_Z) { + if (!submit_op2(emit, inst_token( SVGA3DOP_POW ), + tmp, + scalar(src0, 1), + scalar(src0, 3))) + return FALSE; + } + + /* tmp.y = src.x + */ + if (dst.mask & TGSI_WRITEMASK_Y) { + if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), + writemask(tmp, TGSI_WRITEMASK_Y ), + scalar(src0, 0))) + return FALSE; + } + + /* Can't quite do this with emit conditional due to the extra + * writemask on the predicated mov: + */ + { + SVGA3dShaderDestToken pred_reg = dst_register( SVGA3DREG_PREDICATE, 0 ); + SVGA3dShaderInstToken setp_token, mov_token; + struct src_register predsrc; + + setp_token = inst_token( SVGA3DOP_SETP ); + mov_token = inst_token( SVGA3DOP_MOV ); + + setp_token.control = SVGA3DOPCOMP_GT; + + /* D3D vs GL semantics: + */ + if (0) + predsrc = swizzle(src0, 0, 0, 1, 1); /* D3D */ + else + predsrc = swizzle(src0, 0, 0, 0, 0); /* GL */ + + /* SETP src0.xxyy, GT, {0}.x */ + if (!submit_op2( emit, setp_token, pred_reg, + predsrc, + swizzle(zero, 0, 0, 0, 0) )) + return FALSE; + + /* MOV dst, fail */ + if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, + swizzle(zero, 3, 0, 0, 3 ))) + return FALSE; + + /* MOV dst.yz, tmp (predicated) + * + * Note that the predicate reg (and possible modifiers) is passed + * as the first source argument. + */ + if (dst.mask & TGSI_WRITEMASK_YZ) { + mov_token.predicated = 1; + if (!submit_op2( emit, mov_token, + writemask(dst, TGSI_WRITEMASK_YZ), + src( pred_reg ), src( tmp ) )) + return FALSE; + } + } + } + + return TRUE; +} + + + + +static boolean emit_ex2( struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn ) +{ + SVGA3dShaderInstToken inst; + SVGA3dShaderDestToken dst; + struct src_register src0; + + inst = inst_token( SVGA3DOP_EXP ); + dst = translate_dst_register( emit, insn, 0 ); + src0 = translate_src_register( emit, &insn->Src[0] ); + src0 = scalar( src0, TGSI_SWIZZLE_X ); + + if (dst.mask != TGSI_WRITEMASK_XYZW) { + SVGA3dShaderDestToken tmp = get_temp( emit ); + + if (!submit_op1( emit, inst, tmp, src0 )) + return FALSE; + + return submit_op1( emit, inst_token( SVGA3DOP_MOV ), + dst, + scalar( src( tmp ), TGSI_SWIZZLE_X ) ); + } + + return submit_op1( emit, inst, dst, src0 ); +} + + +static boolean emit_log(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn) +{ + SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); + struct src_register src0 = + translate_src_register( emit, &insn->Src[0] ); + struct src_register zero = get_zero_immediate( emit ); + SVGA3dShaderDestToken abs_tmp; + struct src_register abs_src0; + SVGA3dShaderDestToken log2_abs; + + if (dst.mask & TGSI_WRITEMASK_Z) + log2_abs = dst; + else if (dst.mask & TGSI_WRITEMASK_XY) + log2_abs = get_temp( emit ); + + /* If z is being written, fill it with log2( abs( src0 ) ). + */ + if (dst.mask & TGSI_WRITEMASK_XYZ) { + if (!src0.base.srcMod || src0.base.srcMod == SVGA3DSRCMOD_ABS) + abs_src0 = src0; + else { + abs_tmp = get_temp( emit ); + + if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), + abs_tmp, + src0 ) ) + return FALSE; + + abs_src0 = src( abs_tmp ); + } + + abs_src0 = absolute( scalar( abs_src0, TGSI_SWIZZLE_X ) ); + + if (!submit_op1( emit, inst_token( SVGA3DOP_LOG ), + writemask( log2_abs, TGSI_WRITEMASK_Z ), + abs_src0 ) ) + return FALSE; + } + + if (dst.mask & TGSI_WRITEMASK_XY) { + SVGA3dShaderDestToken floor_log2; + + if (dst.mask & TGSI_WRITEMASK_X) + floor_log2 = dst; + else + floor_log2 = get_temp( emit ); + + /* If x is being written, fill it with floor( log2( abs( src0 ) ) ). + */ + if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ), + writemask( floor_log2, TGSI_WRITEMASK_X ), + scalar( src( log2_abs ), TGSI_SWIZZLE_Z ) ) ) + return FALSE; + + if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), + writemask( floor_log2, TGSI_WRITEMASK_X ), + scalar( src( log2_abs ), TGSI_SWIZZLE_Z ), + negate( src( floor_log2 ) ) ) ) + return FALSE; + + /* If y is being written, fill it with + * abs ( src0 ) / ( 2 ^ floor( log2( abs( src0 ) ) ) ). + */ + if (dst.mask & TGSI_WRITEMASK_Y) { + if (!submit_op1( emit, inst_token( SVGA3DOP_EXP ), + writemask( dst, TGSI_WRITEMASK_Y ), + negate( scalar( src( floor_log2 ), + TGSI_SWIZZLE_X ) ) ) ) + return FALSE; + + if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), + writemask( dst, TGSI_WRITEMASK_Y ), + src( dst ), + abs_src0 ) ) + return FALSE; + } + + if (!(dst.mask & TGSI_WRITEMASK_X)) + release_temp( emit, floor_log2 ); + + if (!(dst.mask & TGSI_WRITEMASK_Z)) + release_temp( emit, log2_abs ); + } + + if (dst.mask & TGSI_WRITEMASK_XYZ && src0.base.srcMod && + src0.base.srcMod != SVGA3DSRCMOD_ABS) + release_temp( emit, abs_tmp ); + + /* If w is being written, fill it with one. + */ + if (dst.mask & TGSI_WRITEMASK_W) { + if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), + writemask(dst, TGSI_WRITEMASK_W), + scalar( zero, TGSI_SWIZZLE_W ) )) + return FALSE; + } + + return TRUE; +} + + +static boolean emit_bgnsub( struct svga_shader_emitter *emit, + unsigned position, + const struct tgsi_full_instruction *insn ) +{ + unsigned i; + + /* Note that we've finished the main function and are now emitting + * subroutines. This affects how we terminate the generated + * shader. + */ + emit->in_main_func = FALSE; + + for (i = 0; i < emit->nr_labels; i++) { + if (emit->label[i] == position) { + return (emit_instruction( emit, inst_token( SVGA3DOP_RET ) ) && + emit_instruction( emit, inst_token( SVGA3DOP_LABEL ) ) && + emit_src( emit, src_register( SVGA3DREG_LABEL, i ))); + } + } + + assert(0); + return TRUE; +} + +static boolean emit_call( struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn ) +{ + unsigned position = insn->Label.Label; + unsigned i; + + for (i = 0; i < emit->nr_labels; i++) { + if (emit->label[i] == position) + break; + } + + if (emit->nr_labels == Elements(emit->label)) + return FALSE; + + if (i == emit->nr_labels) { + emit->label[i] = position; + emit->nr_labels++; + } + + return (emit_instruction( emit, inst_token( SVGA3DOP_CALL ) ) && + emit_src( emit, src_register( SVGA3DREG_LABEL, i ))); +} + + +static boolean emit_end( struct svga_shader_emitter *emit ) +{ + if (emit->unit == PIPE_SHADER_VERTEX) { + return emit_vs_postamble( emit ); + } + else { + return emit_ps_postamble( emit ); + } +} + + + +static boolean svga_emit_instruction( struct svga_shader_emitter *emit, + unsigned position, + const struct tgsi_full_instruction *insn ) +{ + switch (insn->Instruction.Opcode) { + + case TGSI_OPCODE_ARL: + return emit_arl( emit, insn ); + + case TGSI_OPCODE_TEX: + case TGSI_OPCODE_TXB: + case TGSI_OPCODE_TXP: + case TGSI_OPCODE_TXL: + case TGSI_OPCODE_TXD: + return emit_tex( emit, insn ); + + case TGSI_OPCODE_BGNSUB: + return emit_bgnsub( emit, position, insn ); + + case TGSI_OPCODE_ENDSUB: + return TRUE; + + case TGSI_OPCODE_CAL: + return emit_call( emit, insn ); + + case TGSI_OPCODE_FLR: + case TGSI_OPCODE_TRUNC: /* should be TRUNC, not FLR */ + return emit_floor( emit, insn ); + + case TGSI_OPCODE_CMP: + return emit_cmp( emit, insn ); + + case TGSI_OPCODE_DIV: + return emit_div( emit, insn ); + + case TGSI_OPCODE_DP2: + return emit_dp2( emit, insn ); + + case TGSI_OPCODE_DPH: + return emit_dph( emit, insn ); + + case TGSI_OPCODE_NRM: + return emit_nrm( emit, insn ); + + case TGSI_OPCODE_COS: + return emit_cos( emit, insn ); + + case TGSI_OPCODE_SIN: + return emit_sin( emit, insn ); + + case TGSI_OPCODE_SCS: + return emit_sincos( emit, insn ); + + case TGSI_OPCODE_END: + /* TGSI always finishes the main func with an END */ + return emit_end( emit ); + + case TGSI_OPCODE_KIL: + return emit_kil( emit, insn ); + + /* Selection opcodes. The underlying language is fairly + * non-orthogonal about these. + */ + case TGSI_OPCODE_SEQ: + return emit_select_op( emit, PIPE_FUNC_EQUAL, insn ); + + case TGSI_OPCODE_SNE: + return emit_select_op( emit, PIPE_FUNC_NOTEQUAL, insn ); + + case TGSI_OPCODE_SGT: + return emit_select_op( emit, PIPE_FUNC_GREATER, insn ); + + case TGSI_OPCODE_SGE: + return emit_select_op( emit, PIPE_FUNC_GEQUAL, insn ); + + case TGSI_OPCODE_SLT: + return emit_select_op( emit, PIPE_FUNC_LESS, insn ); + + case TGSI_OPCODE_SLE: + return emit_select_op( emit, PIPE_FUNC_LEQUAL, insn ); + + case TGSI_OPCODE_SUB: + return emit_sub( emit, insn ); + + case TGSI_OPCODE_POW: + return emit_pow( emit, insn ); + + case TGSI_OPCODE_EX2: + return emit_ex2( emit, insn ); + + case TGSI_OPCODE_EXP: + return emit_exp( emit, insn ); + + case TGSI_OPCODE_LOG: + return emit_log( emit, insn ); + + case TGSI_OPCODE_LG2: + return emit_scalar_op1( emit, SVGA3DOP_LOG, insn ); + + case TGSI_OPCODE_RSQ: + return emit_scalar_op1( emit, SVGA3DOP_RSQ, insn ); + + case TGSI_OPCODE_RCP: + return emit_scalar_op1( emit, SVGA3DOP_RCP, insn ); + + case TGSI_OPCODE_CONT: + case TGSI_OPCODE_RET: + /* This is a noop -- we tell mesa that we can't support RET + * within a function (early return), so this will always be + * followed by an ENDSUB. + */ + return TRUE; + + /* These aren't actually used by any of the frontends we care + * about: + */ + case TGSI_OPCODE_CLAMP: + case TGSI_OPCODE_ROUND: + case TGSI_OPCODE_AND: + case TGSI_OPCODE_OR: + case TGSI_OPCODE_I2F: + case TGSI_OPCODE_NOT: + case TGSI_OPCODE_SHL: + case TGSI_OPCODE_SHR: + case TGSI_OPCODE_XOR: + return FALSE; + + case TGSI_OPCODE_IF: + return emit_if( emit, insn ); + case TGSI_OPCODE_ELSE: + return emit_else( emit, insn ); + case TGSI_OPCODE_ENDIF: + return emit_endif( emit, insn ); + + case TGSI_OPCODE_BGNLOOP: + return emit_bgnloop2( emit, insn ); + case TGSI_OPCODE_ENDLOOP: + return emit_endloop2( emit, insn ); + case TGSI_OPCODE_BRK: + return emit_brk( emit, insn ); + + case TGSI_OPCODE_XPD: + return emit_xpd( emit, insn ); + + case TGSI_OPCODE_KILP: + return emit_kilp( emit, insn ); + + case TGSI_OPCODE_DST: + return emit_dst_insn( emit, insn ); + + case TGSI_OPCODE_LIT: + return emit_lit( emit, insn ); + + case TGSI_OPCODE_LRP: + return emit_lrp( emit, insn ); + + default: { + unsigned opcode = translate_opcode(insn->Instruction.Opcode); + + if (opcode == SVGA3DOP_LAST_INST) + return FALSE; + + if (!emit_simple_instruction( emit, opcode, insn )) + return FALSE; + } + } + + return TRUE; +} + + +static boolean svga_emit_immediate( struct svga_shader_emitter *emit, + struct tgsi_full_immediate *imm) +{ + static const float id[4] = {0,0,0,1}; + float value[4]; + unsigned i; + + assert(1 <= imm->Immediate.NrTokens && imm->Immediate.NrTokens <= 5); + for (i = 0; i < imm->Immediate.NrTokens - 1; i++) + value[i] = imm->u[i].Float; + + for ( ; i < 4; i++ ) + value[i] = id[i]; + + return emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, + emit->imm_start + emit->internal_imm_count++, + value[0], value[1], value[2], value[3]); +} + +static boolean make_immediate( struct svga_shader_emitter *emit, + float a, + float b, + float c, + float d, + struct src_register *out ) +{ + unsigned idx = emit->nr_hw_const++; + + if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, + idx, a, b, c, d )) + return FALSE; + + *out = src_register( SVGA3DREG_CONST, idx ); + + return TRUE; +} + +static boolean emit_vs_preamble( struct svga_shader_emitter *emit ) +{ + if (!emit->key.vkey.need_prescale) { + if (!make_immediate( emit, 0, 0, .5, .5, + &emit->imm_0055)) + return FALSE; + } + + return TRUE; +} + +static boolean emit_ps_preamble( struct svga_shader_emitter *emit ) +{ + unsigned i; + + /* For SM20, need to initialize the temporaries we're using to hold + * color outputs to some value. Shaders which don't set all of + * these values are likely to be rejected by the DX9 runtime. + */ + if (!emit->use_sm30) { + struct src_register zero = get_zero_immediate( emit ); + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { + if (SVGA3dShaderGetRegType(emit->true_col[i].value) != 0) { + + if (!submit_op1( emit, + inst_token(SVGA3DOP_MOV), + emit->temp_col[i], + zero )) + return FALSE; + } + } + } + + return TRUE; +} + +static boolean emit_ps_postamble( struct svga_shader_emitter *emit ) +{ + unsigned i; + + /* PS oDepth is incredibly fragile and it's very hard to catch the + * types of usage that break it during shader emit. Easier just to + * redirect the main program to a temporary and then only touch + * oDepth with a hand-crafted MOV below. + */ + if (SVGA3dShaderGetRegType(emit->true_pos.value) != 0) { + + if (!submit_op1( emit, + inst_token(SVGA3DOP_MOV), + emit->true_pos, + scalar(src(emit->temp_pos), TGSI_SWIZZLE_Z) )) + return FALSE; + } + + /* Similarly for SM20 color outputs... Luckily SM30 isn't so + * fragile. + */ + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { + if (SVGA3dShaderGetRegType(emit->true_col[i].value) != 0) { + + if (!submit_op1( emit, + inst_token(SVGA3DOP_MOV), + emit->true_col[i], + src(emit->temp_col[i]) )) + return FALSE; + } + } + + return TRUE; +} + +static boolean emit_vs_postamble( struct svga_shader_emitter *emit ) +{ + /* PSIZ output is incredibly fragile and it's very hard to catch + * the types of usage that break it during shader emit. Easier + * just to redirect the main program to a temporary and then only + * touch PSIZ with a hand-crafted MOV below. + */ + if (SVGA3dShaderGetRegType(emit->true_psiz.value) != 0) { + + if (!submit_op1( emit, + inst_token(SVGA3DOP_MOV), + emit->true_psiz, + scalar(src(emit->temp_psiz), TGSI_SWIZZLE_X) )) + return FALSE; + } + + /* Need to perform various manipulations on vertex position to cope + * with the different GL and D3D clip spaces. + */ + if (emit->key.vkey.need_prescale) { + SVGA3dShaderDestToken temp_pos = emit->temp_pos; + SVGA3dShaderDestToken pos = emit->true_pos; + unsigned offset = emit->info.file_max[TGSI_FILE_CONSTANT] + 1; + struct src_register prescale_scale = src_register( SVGA3DREG_CONST, + offset + 0 ); + struct src_register prescale_trans = src_register( SVGA3DREG_CONST, + offset + 1 ); + + /* MUL temp_pos.xyz, temp_pos, prescale.scale + * MAD result.position, temp_pos.wwww, prescale.trans, temp_pos + * --> Note that prescale.trans.w == 0 + */ + if (!submit_op2( emit, + inst_token(SVGA3DOP_MUL), + writemask(temp_pos, TGSI_WRITEMASK_XYZ), + src(temp_pos), + prescale_scale )) + return FALSE; + + if (!submit_op3( emit, + inst_token(SVGA3DOP_MAD), + pos, + swizzle(src(temp_pos), 3, 3, 3, 3), + prescale_trans, + src(temp_pos))) + return FALSE; + } + else { + SVGA3dShaderDestToken temp_pos = emit->temp_pos; + SVGA3dShaderDestToken pos = emit->true_pos; + struct src_register imm_0055 = emit->imm_0055; + + /* Adjust GL clipping coordinate space to hardware (D3D-style): + * + * DP4 temp_pos.z, {0,0,.5,.5}, temp_pos + * MOV result.position, temp_pos + */ + if (!submit_op2( emit, + inst_token(SVGA3DOP_DP4), + writemask(temp_pos, TGSI_WRITEMASK_Z), + imm_0055, + src(temp_pos) )) + return FALSE; + + if (!submit_op1( emit, + inst_token(SVGA3DOP_MOV), + pos, + src(temp_pos) )) + return FALSE; + } + + return TRUE; +} + +/* + 0: IF VFACE :4 + 1: COLOR = FrontColor; + 2: ELSE + 3: COLOR = BackColor; + 4: ENDIF + */ +static boolean emit_light_twoside( struct svga_shader_emitter *emit ) +{ + struct src_register vface, zero; + struct src_register front[2]; + struct src_register back[2]; + SVGA3dShaderDestToken color[2]; + int count = emit->internal_color_count; + int i; + SVGA3dShaderInstToken if_token; + + if (count == 0) + return TRUE; + + vface = get_vface( emit ); + zero = get_zero_immediate( emit ); + + /* Can't use get_temp() to allocate the color reg as such + * temporaries will be reclaimed after each instruction by the call + * to reset_temp_regs(). + */ + for (i = 0; i < count; i++) { + color[i] = dst_register( SVGA3DREG_TEMP, + emit->nr_hw_temp++ ); + + front[i] = emit->input_map[emit->internal_color_idx[i]]; + + /* Back is always the next input: + */ + back[i] = front[i]; + back[i].base.num = front[i].base.num + 1; + + /* Reassign the input_map to the actual front-face color: + */ + emit->input_map[emit->internal_color_idx[i]] = src(color[i]); + } + + if_token = inst_token( SVGA3DOP_IFC ); + + if (emit->key.fkey.front_cw) + if_token.control = SVGA3DOPCOMP_GT; + else + if_token.control = SVGA3DOPCOMP_LT; + + zero = scalar(zero, TGSI_SWIZZLE_X); + + if (!(emit_instruction( emit, if_token ) && + emit_src( emit, vface ) && + emit_src( emit, zero ) )) + return FALSE; + + for (i = 0; i < count; i++) { + if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), color[i], front[i] )) + return FALSE; + } + + if (!(emit_instruction( emit, inst_token( SVGA3DOP_ELSE)))) + return FALSE; + + for (i = 0; i < count; i++) { + if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), color[i], back[i] )) + return FALSE; + } + + if (!emit_instruction( emit, inst_token( SVGA3DOP_ENDIF ) )) + return FALSE; + + return TRUE; +} + +/* + 0: SETP_GT TEMP, VFACE, 0 + where TEMP is a fake frontface register + */ +static boolean emit_frontface( struct svga_shader_emitter *emit ) +{ + struct src_register vface, zero; + SVGA3dShaderDestToken temp; + struct src_register pass, fail; + + vface = get_vface( emit ); + zero = get_zero_immediate( emit ); + + /* Can't use get_temp() to allocate the fake frontface reg as such + * temporaries will be reclaimed after each instruction by the call + * to reset_temp_regs(). + */ + temp = dst_register( SVGA3DREG_TEMP, + emit->nr_hw_temp++ ); + + if (emit->key.fkey.front_cw) { + pass = scalar( zero, TGSI_SWIZZLE_W ); + fail = scalar( zero, TGSI_SWIZZLE_X ); + } else { + pass = scalar( zero, TGSI_SWIZZLE_X ); + fail = scalar( zero, TGSI_SWIZZLE_W ); + } + + if (!emit_conditional(emit, PIPE_FUNC_GREATER, + temp, vface, scalar( zero, TGSI_SWIZZLE_X ), + pass, fail)) + return FALSE; + + /* Reassign the input_map to the actual front-face color: + */ + emit->input_map[emit->internal_frontface_idx] = src(temp); + + return TRUE; +} + +static INLINE boolean +needs_to_create_zero( struct svga_shader_emitter *emit ) +{ + int i; + + if (emit->unit == PIPE_SHADER_FRAGMENT) { + if (!emit->use_sm30) + return TRUE; + + if (emit->key.fkey.light_twoside) + return TRUE; + + if (emit->emit_frontface) + return TRUE; + + if (emit->info.opcode_count[TGSI_OPCODE_DST] >= 1 || + emit->info.opcode_count[TGSI_OPCODE_LIT] >= 1) + return TRUE; + } + + if (emit->info.opcode_count[TGSI_OPCODE_IF] >= 1 || + emit->info.opcode_count[TGSI_OPCODE_SGE] >= 1 || + emit->info.opcode_count[TGSI_OPCODE_SGT] >= 1 || + emit->info.opcode_count[TGSI_OPCODE_SLE] >= 1 || + emit->info.opcode_count[TGSI_OPCODE_SLT] >= 1 || + emit->info.opcode_count[TGSI_OPCODE_SNE] >= 1 || + emit->info.opcode_count[TGSI_OPCODE_SEQ] >= 1 || + emit->info.opcode_count[TGSI_OPCODE_EXP] >= 1 || + emit->info.opcode_count[TGSI_OPCODE_LOG] >= 1 || + emit->info.opcode_count[TGSI_OPCODE_XPD] >= 1 || + emit->info.opcode_count[TGSI_OPCODE_KILP] >= 1) + return TRUE; + + for (i = 0; i < emit->key.fkey.num_textures; i++) { + if (emit->key.fkey.tex[i].compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) + return TRUE; + } + + return FALSE; +} + +static INLINE boolean +needs_to_create_loop_const( struct svga_shader_emitter *emit ) +{ + return (emit->info.opcode_count[TGSI_OPCODE_BGNLOOP] >= 1); +} + +static INLINE boolean +needs_to_create_sincos_consts( struct svga_shader_emitter *emit ) +{ + return !emit->use_sm30 && (emit->info.opcode_count[TGSI_OPCODE_SIN] >= 1 || + emit->info.opcode_count[TGSI_OPCODE_COS] >= 1 || + emit->info.opcode_count[TGSI_OPCODE_SCS] >= 1); +} + +static INLINE boolean +needs_to_create_arl_consts( struct svga_shader_emitter *emit ) +{ + return (emit->num_arl_consts > 0); +} + +static INLINE boolean +pre_parse_add_indirect( struct svga_shader_emitter *emit, + int num, int current_arl) +{ + int i; + assert(num < 0); + + for (i = 0; i < emit->num_arl_consts; ++i) { + if (emit->arl_consts[i].arl_num == current_arl) + break; + } + /* new entry */ + if (emit->num_arl_consts == i) { + ++emit->num_arl_consts; + } + emit->arl_consts[i].number = (emit->arl_consts[i].number > num) ? + num : + emit->arl_consts[i].number; + emit->arl_consts[i].arl_num = current_arl; + return TRUE; +} + +static boolean +pre_parse_instruction( struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn, + int current_arl) +{ + if (insn->Src[0].Register.Indirect && + insn->Src[0].Indirect.File == TGSI_FILE_ADDRESS) { + const struct tgsi_full_src_register *reg = &insn->Src[0]; + if (reg->Register.Index < 0) { + pre_parse_add_indirect(emit, reg->Register.Index, current_arl); + } + } + + if (insn->Src[1].Register.Indirect && + insn->Src[1].Indirect.File == TGSI_FILE_ADDRESS) { + const struct tgsi_full_src_register *reg = &insn->Src[1]; + if (reg->Register.Index < 0) { + pre_parse_add_indirect(emit, reg->Register.Index, current_arl); + } + } + + if (insn->Src[2].Register.Indirect && + insn->Src[2].Indirect.File == TGSI_FILE_ADDRESS) { + const struct tgsi_full_src_register *reg = &insn->Src[2]; + if (reg->Register.Index < 0) { + pre_parse_add_indirect(emit, reg->Register.Index, current_arl); + } + } + + return TRUE; +} + +static boolean +pre_parse_tokens( struct svga_shader_emitter *emit, + const struct tgsi_token *tokens ) +{ + struct tgsi_parse_context parse; + int current_arl = 0; + + tgsi_parse_init( &parse, tokens ); + + while (!tgsi_parse_end_of_tokens( &parse )) { + tgsi_parse_token( &parse ); + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_IMMEDIATE: + case TGSI_TOKEN_TYPE_DECLARATION: + break; + case TGSI_TOKEN_TYPE_INSTRUCTION: + if (parse.FullToken.FullInstruction.Instruction.Opcode == + TGSI_OPCODE_ARL) { + ++current_arl; + } + if (!pre_parse_instruction( emit, &parse.FullToken.FullInstruction, + current_arl )) + return FALSE; + break; + default: + break; + } + + } + return TRUE; +} + +static boolean svga_shader_emit_helpers( struct svga_shader_emitter *emit ) + +{ + if (needs_to_create_zero( emit )) { + create_zero_immediate( emit ); + } + if (needs_to_create_loop_const( emit )) { + create_loop_const( emit ); + } + if (needs_to_create_sincos_consts( emit )) { + create_sincos_consts( emit ); + } + if (needs_to_create_arl_consts( emit )) { + create_arl_consts( emit ); + } + + if (emit->unit == PIPE_SHADER_FRAGMENT) { + if (!emit_ps_preamble( emit )) + return FALSE; + + if (emit->key.fkey.light_twoside) { + if (!emit_light_twoside( emit )) + return FALSE; + } + if (emit->emit_frontface) { + if (!emit_frontface( emit )) + return FALSE; + } + } + + return TRUE; +} + +boolean svga_shader_emit_instructions( struct svga_shader_emitter *emit, + const struct tgsi_token *tokens ) +{ + struct tgsi_parse_context parse; + boolean ret = TRUE; + boolean helpers_emitted = FALSE; + unsigned line_nr = 0; + + tgsi_parse_init( &parse, tokens ); + emit->internal_imm_count = 0; + + if (emit->unit == PIPE_SHADER_VERTEX) { + ret = emit_vs_preamble( emit ); + if (!ret) + goto done; + } + + pre_parse_tokens(emit, tokens); + + while (!tgsi_parse_end_of_tokens( &parse )) { + tgsi_parse_token( &parse ); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_IMMEDIATE: + ret = svga_emit_immediate( emit, &parse.FullToken.FullImmediate ); + if (!ret) + goto done; + break; + + case TGSI_TOKEN_TYPE_DECLARATION: + if (emit->use_sm30) + ret = svga_translate_decl_sm30( emit, &parse.FullToken.FullDeclaration ); + else + ret = svga_translate_decl_sm20( emit, &parse.FullToken.FullDeclaration ); + if (!ret) + goto done; + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + if (!helpers_emitted) { + if (!svga_shader_emit_helpers( emit )) + goto done; + helpers_emitted = TRUE; + } + ret = svga_emit_instruction( emit, + line_nr++, + &parse.FullToken.FullInstruction ); + if (!ret) + goto done; + break; + default: + break; + } + + reset_temp_regs( emit ); + } + + /* Need to terminate the current subroutine. Note that the + * hardware doesn't tolerate shaders without sub-routines + * terminating with RET+END. + */ + if (!emit->in_main_func) { + ret = emit_instruction( emit, inst_token( SVGA3DOP_RET ) ); + if (!ret) + goto done; + } + + /* Need to terminate the whole shader: + */ + ret = emit_instruction( emit, inst_token( SVGA3DOP_END ) ); + if (!ret) + goto done; + +done: + assert(ret); + tgsi_parse_free( &parse ); + return ret; +} + diff --git a/src/gallium/drivers/svga/svga_winsys.h b/src/gallium/drivers/svga/svga_winsys.h new file mode 100644 index 0000000000..59f299c185 --- /dev/null +++ b/src/gallium/drivers/svga/svga_winsys.h @@ -0,0 +1,299 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +/** + * @file + * VMware SVGA specific winsys interface. + * + * @author Jose Fonseca <jfonseca@vmware.com> + * + * Documentation taken from the VMware SVGA DDK. + */ + +#ifndef SVGA_WINSYS_H_ +#define SVGA_WINSYS_H_ + + +#include "svga_types.h" +#include "svga_reg.h" +#include "svga3d_reg.h" + +#include "pipe/p_compiler.h" +#include "pipe/p_defines.h" + + +struct svga_winsys_screen; +struct svga_winsys_buffer; +struct pipe_screen; +struct pipe_context; +struct pipe_fence_handle; +struct pipe_texture; +struct svga_region; + + +#define SVGA_BUFFER_USAGE_PINNED (PIPE_BUFFER_USAGE_CUSTOM << 0) +#define SVGA_BUFFER_USAGE_WRAPPED (PIPE_BUFFER_USAGE_CUSTOM << 1) + + +/** Opaque surface handle */ +struct svga_winsys_surface; + +/** Opaque buffer handle */ +struct svga_winsys_handle; + + +/** + * SVGA per-context winsys interface. + */ +struct svga_winsys_context +{ + void + (*destroy)(struct svga_winsys_context *swc); + + void * + (*reserve)(struct svga_winsys_context *swc, + uint32_t nr_bytes, uint32_t nr_relocs ); + + /** + * Emit a relocation for a host surface. + * + * @param flags PIPE_BUFFER_USAGE_GPU_READ/WRITE + * + * NOTE: Order of this call does matter. It should be the same order + * as relocations appear in the command buffer. + */ + void + (*surface_relocation)(struct svga_winsys_context *swc, + uint32 *sid, + struct svga_winsys_surface *surface, + unsigned flags); + + /** + * Emit a relocation for a guest memory region. + * + * @param flags PIPE_BUFFER_USAGE_GPU_READ/WRITE + * + * NOTE: Order of this call does matter. It should be the same order + * as relocations appear in the command buffer. + */ + void + (*region_relocation)(struct svga_winsys_context *swc, + struct SVGAGuestPtr *ptr, + struct svga_winsys_buffer *buffer, + uint32 offset, + unsigned flags); + + void + (*commit)(struct svga_winsys_context *swc); + + enum pipe_error + (*flush)(struct svga_winsys_context *swc, + struct pipe_fence_handle **pfence); + + /** + * Context ID used to fill in the commands + * + * Context IDs are arbitrary small non-negative integers, + * global to the entire SVGA device. + */ + uint32 cid; +}; + + +/** + * SVGA per-screen winsys interface. + */ +struct svga_winsys_screen +{ + void + (*destroy)(struct svga_winsys_screen *sws); + + boolean + (*get_cap)(struct svga_winsys_screen *sws, + SVGA3dDevCapIndex index, + SVGA3dDevCapResult *result); + + /** + * Create a new context. + * + * Context objects encapsulate all render state, and shader + * objects are per-context. + * + * Surfaces are not per-context. The same surface can be shared + * between multiple contexts, and surface operations can occur + * without a context. + */ + struct svga_winsys_context * + (*context_create)(struct svga_winsys_screen *sws); + + + /** + * This creates a "surface" object in the SVGA3D device, + * and returns the surface ID (sid). Surfaces are generic + * containers for host VRAM objects like textures, vertex + * buffers, and depth/stencil buffers. + * + * Surfaces are hierarchial: + * + * - Surface may have multiple faces (for cube maps) + * + * - Each face has a list of mipmap levels + * + * - Each mipmap image may have multiple volume + * slices, if the image is three dimensional. + * + * - Each slice is a 2D array of 'blocks' + * + * - Each block may be one or more pixels. + * (Usually 1, more for DXT or YUV formats.) + * + * Surfaces are generic host VRAM objects. The SVGA3D device + * may optimize surfaces according to the format they were + * created with, but this format does not limit the ways in + * which the surface may be used. For example, a depth surface + * can be used as a texture, or a floating point image may + * be used as a vertex buffer. Some surface usages may be + * lower performance, due to software emulation, but any + * usage should work with any surface. + */ + struct svga_winsys_surface * + (*surface_create)(struct svga_winsys_screen *sws, + SVGA3dSurfaceFlags flags, + SVGA3dSurfaceFormat format, + SVGA3dSize size, + uint32 numFaces, + uint32 numMipLevels); + + /** + * Whether this surface is sitting in a validate list + */ + boolean + (*surface_is_flushed)(struct svga_winsys_screen *sws, + struct svga_winsys_surface *surface); + + /** + * Reference a SVGA3D surface object. This allows sharing of a + * surface between different objects. + */ + void + (*surface_reference)(struct svga_winsys_screen *sws, + struct svga_winsys_surface **pdst, + struct svga_winsys_surface *src); + + /** + * Buffer management. Buffer attributes are mostly fixed over its lifetime. + * + * Remember that gallium gets to choose the interface it needs, and the + * window systems must then implement that interface (rather than the + * other way around...). + * + * usage is a bitmask of PIPE_BUFFER_USAGE_PIXEL/VERTEX/INDEX/CONSTANT. This + * usage argument is only an optimization hint, not a guarantee, therefore + * proper behavior must be observed in all circumstances. + * + * alignment indicates the client's alignment requirements, eg for + * SSE instructions. + */ + struct svga_winsys_buffer * + (*buffer_create)( struct svga_winsys_screen *sws, + unsigned alignment, + unsigned usage, + unsigned size ); + + /** + * Map the entire data store of a buffer object into the client's address. + * flags is a bitmask of: + * - PIPE_BUFFER_USAGE_CPU_READ/WRITE + * - PIPE_BUFFER_USAGE_DONTBLOCK + * - PIPE_BUFFER_USAGE_UNSYNCHRONIZED + */ + void * + (*buffer_map)( struct svga_winsys_screen *sws, + struct svga_winsys_buffer *buf, + unsigned usage ); + + void + (*buffer_unmap)( struct svga_winsys_screen *sws, + struct svga_winsys_buffer *buf ); + + void + (*buffer_destroy)( struct svga_winsys_screen *sws, + struct svga_winsys_buffer *buf ); + + + /** + * Reference a fence object. + */ + void + (*fence_reference)( struct svga_winsys_screen *sws, + struct pipe_fence_handle **pdst, + struct pipe_fence_handle *src ); + + /** + * Checks whether the fence has been signalled. + * \param flags driver-specific meaning + * \return zero on success. + */ + int (*fence_signalled)( struct svga_winsys_screen *sws, + struct pipe_fence_handle *fence, + unsigned flag ); + + /** + * Wait for the fence to finish. + * \param flags driver-specific meaning + * \return zero on success. + */ + int (*fence_finish)( struct svga_winsys_screen *sws, + struct pipe_fence_handle *fence, + unsigned flag ); + +}; + + +struct pipe_context * +svga_context_create(struct pipe_screen *screen); + +struct pipe_screen * +svga_screen_create(struct svga_winsys_screen *sws); + +struct svga_winsys_screen * +svga_winsys_screen(struct pipe_screen *screen); + +struct pipe_buffer * +svga_screen_buffer_wrap_surface(struct pipe_screen *screen, + enum SVGA3dSurfaceFormat format, + struct svga_winsys_surface *srf); + +struct svga_winsys_surface * +svga_screen_texture_get_winsys_surface(struct pipe_texture *texture); +struct svga_winsys_surface * +svga_screen_buffer_get_winsys_surface(struct pipe_buffer *buffer); + +boolean +svga_screen_buffer_from_texture(struct pipe_texture *texture, + struct pipe_buffer **buffer, + unsigned *stride); + +#endif /* SVGA_WINSYS_H_ */ diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.c b/src/gallium/drivers/svga/svgadump/svga_dump.c new file mode 100644 index 0000000000..910afa2528 --- /dev/null +++ b/src/gallium/drivers/svga/svgadump/svga_dump.c @@ -0,0 +1,1736 @@ +/********************************************************** + * Copyright 2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +/** + * @file + * Dump SVGA commands. + * + * Generated automatically from svga3d_reg.h by svga_dump.py. + */ + +#include "svga_types.h" +#include "svga_shader_dump.h" +#include "svga3d_reg.h" + +#include "util/u_debug.h" +#include "svga_dump.h" + +static void +dump_SVGA3dVertexDecl(const SVGA3dVertexDecl *cmd) +{ + switch((*cmd).identity.type) { + case SVGA3D_DECLTYPE_FLOAT1: + debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT1\n"); + break; + case SVGA3D_DECLTYPE_FLOAT2: + debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT2\n"); + break; + case SVGA3D_DECLTYPE_FLOAT3: + debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT3\n"); + break; + case SVGA3D_DECLTYPE_FLOAT4: + debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT4\n"); + break; + case SVGA3D_DECLTYPE_D3DCOLOR: + debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_D3DCOLOR\n"); + break; + case SVGA3D_DECLTYPE_UBYTE4: + debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_UBYTE4\n"); + break; + case SVGA3D_DECLTYPE_SHORT2: + debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_SHORT2\n"); + break; + case SVGA3D_DECLTYPE_SHORT4: + debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_SHORT4\n"); + break; + case SVGA3D_DECLTYPE_UBYTE4N: + debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_UBYTE4N\n"); + break; + case SVGA3D_DECLTYPE_SHORT2N: + debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_SHORT2N\n"); + break; + case SVGA3D_DECLTYPE_SHORT4N: + debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_SHORT4N\n"); + break; + case SVGA3D_DECLTYPE_USHORT2N: + debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_USHORT2N\n"); + break; + case SVGA3D_DECLTYPE_USHORT4N: + debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_USHORT4N\n"); + break; + case SVGA3D_DECLTYPE_UDEC3: + debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_UDEC3\n"); + break; + case SVGA3D_DECLTYPE_DEC3N: + debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_DEC3N\n"); + break; + case SVGA3D_DECLTYPE_FLOAT16_2: + debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT16_2\n"); + break; + case SVGA3D_DECLTYPE_FLOAT16_4: + debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT16_4\n"); + break; + case SVGA3D_DECLTYPE_MAX: + debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_MAX\n"); + break; + default: + debug_printf("\t\t.identity.type = %i\n", (*cmd).identity.type); + break; + } + switch((*cmd).identity.method) { + case SVGA3D_DECLMETHOD_DEFAULT: + debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_DEFAULT\n"); + break; + case SVGA3D_DECLMETHOD_PARTIALU: + debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_PARTIALU\n"); + break; + case SVGA3D_DECLMETHOD_PARTIALV: + debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_PARTIALV\n"); + break; + case SVGA3D_DECLMETHOD_CROSSUV: + debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_CROSSUV\n"); + break; + case SVGA3D_DECLMETHOD_UV: + debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_UV\n"); + break; + case SVGA3D_DECLMETHOD_LOOKUP: + debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_LOOKUP\n"); + break; + case SVGA3D_DECLMETHOD_LOOKUPPRESAMPLED: + debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_LOOKUPPRESAMPLED\n"); + break; + default: + debug_printf("\t\t.identity.method = %i\n", (*cmd).identity.method); + break; + } + switch((*cmd).identity.usage) { + case SVGA3D_DECLUSAGE_POSITION: + debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_POSITION\n"); + break; + case SVGA3D_DECLUSAGE_BLENDWEIGHT: + debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_BLENDWEIGHT\n"); + break; + case SVGA3D_DECLUSAGE_BLENDINDICES: + debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_BLENDINDICES\n"); + break; + case SVGA3D_DECLUSAGE_NORMAL: + debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_NORMAL\n"); + break; + case SVGA3D_DECLUSAGE_PSIZE: + debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_PSIZE\n"); + break; + case SVGA3D_DECLUSAGE_TEXCOORD: + debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_TEXCOORD\n"); + break; + case SVGA3D_DECLUSAGE_TANGENT: + debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_TANGENT\n"); + break; + case SVGA3D_DECLUSAGE_BINORMAL: + debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_BINORMAL\n"); + break; + case SVGA3D_DECLUSAGE_TESSFACTOR: + debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_TESSFACTOR\n"); + break; + case SVGA3D_DECLUSAGE_POSITIONT: + debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_POSITIONT\n"); + break; + case SVGA3D_DECLUSAGE_COLOR: + debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_COLOR\n"); + break; + case SVGA3D_DECLUSAGE_FOG: + debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_FOG\n"); + break; + case SVGA3D_DECLUSAGE_DEPTH: + debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_DEPTH\n"); + break; + case SVGA3D_DECLUSAGE_SAMPLE: + debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_SAMPLE\n"); + break; + case SVGA3D_DECLUSAGE_MAX: + debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_MAX\n"); + break; + default: + debug_printf("\t\t.identity.usage = %i\n", (*cmd).identity.usage); + break; + } + debug_printf("\t\t.identity.usageIndex = %u\n", (*cmd).identity.usageIndex); + debug_printf("\t\t.array.surfaceId = %u\n", (*cmd).array.surfaceId); + debug_printf("\t\t.array.offset = %u\n", (*cmd).array.offset); + debug_printf("\t\t.array.stride = %u\n", (*cmd).array.stride); + debug_printf("\t\t.rangeHint.first = %u\n", (*cmd).rangeHint.first); + debug_printf("\t\t.rangeHint.last = %u\n", (*cmd).rangeHint.last); +} + +static void +dump_SVGA3dTextureState(const SVGA3dTextureState *cmd) +{ + debug_printf("\t\t.stage = %u\n", (*cmd).stage); + switch((*cmd).name) { + case SVGA3D_TS_INVALID: + debug_printf("\t\t.name = SVGA3D_TS_INVALID\n"); + break; + case SVGA3D_TS_BIND_TEXTURE: + debug_printf("\t\t.name = SVGA3D_TS_BIND_TEXTURE\n"); + break; + case SVGA3D_TS_COLOROP: + debug_printf("\t\t.name = SVGA3D_TS_COLOROP\n"); + break; + case SVGA3D_TS_COLORARG1: + debug_printf("\t\t.name = SVGA3D_TS_COLORARG1\n"); + break; + case SVGA3D_TS_COLORARG2: + debug_printf("\t\t.name = SVGA3D_TS_COLORARG2\n"); + break; + case SVGA3D_TS_ALPHAOP: + debug_printf("\t\t.name = SVGA3D_TS_ALPHAOP\n"); + break; + case SVGA3D_TS_ALPHAARG1: + debug_printf("\t\t.name = SVGA3D_TS_ALPHAARG1\n"); + break; + case SVGA3D_TS_ALPHAARG2: + debug_printf("\t\t.name = SVGA3D_TS_ALPHAARG2\n"); + break; + case SVGA3D_TS_ADDRESSU: + debug_printf("\t\t.name = SVGA3D_TS_ADDRESSU\n"); + break; + case SVGA3D_TS_ADDRESSV: + debug_printf("\t\t.name = SVGA3D_TS_ADDRESSV\n"); + break; + case SVGA3D_TS_MIPFILTER: + debug_printf("\t\t.name = SVGA3D_TS_MIPFILTER\n"); + break; + case SVGA3D_TS_MAGFILTER: + debug_printf("\t\t.name = SVGA3D_TS_MAGFILTER\n"); + break; + case SVGA3D_TS_MINFILTER: + debug_printf("\t\t.name = SVGA3D_TS_MINFILTER\n"); + break; + case SVGA3D_TS_BORDERCOLOR: + debug_printf("\t\t.name = SVGA3D_TS_BORDERCOLOR\n"); + break; + case SVGA3D_TS_TEXCOORDINDEX: + debug_printf("\t\t.name = SVGA3D_TS_TEXCOORDINDEX\n"); + break; + case SVGA3D_TS_TEXTURETRANSFORMFLAGS: + debug_printf("\t\t.name = SVGA3D_TS_TEXTURETRANSFORMFLAGS\n"); + break; + case SVGA3D_TS_TEXCOORDGEN: + debug_printf("\t\t.name = SVGA3D_TS_TEXCOORDGEN\n"); + break; + case SVGA3D_TS_BUMPENVMAT00: + debug_printf("\t\t.name = SVGA3D_TS_BUMPENVMAT00\n"); + break; + case SVGA3D_TS_BUMPENVMAT01: + debug_printf("\t\t.name = SVGA3D_TS_BUMPENVMAT01\n"); + break; + case SVGA3D_TS_BUMPENVMAT10: + debug_printf("\t\t.name = SVGA3D_TS_BUMPENVMAT10\n"); + break; + case SVGA3D_TS_BUMPENVMAT11: + debug_printf("\t\t.name = SVGA3D_TS_BUMPENVMAT11\n"); + break; + case SVGA3D_TS_TEXTURE_MIPMAP_LEVEL: + debug_printf("\t\t.name = SVGA3D_TS_TEXTURE_MIPMAP_LEVEL\n"); + break; + case SVGA3D_TS_TEXTURE_LOD_BIAS: + debug_printf("\t\t.name = SVGA3D_TS_TEXTURE_LOD_BIAS\n"); + break; + case SVGA3D_TS_TEXTURE_ANISOTROPIC_LEVEL: + debug_printf("\t\t.name = SVGA3D_TS_TEXTURE_ANISOTROPIC_LEVEL\n"); + break; + case SVGA3D_TS_ADDRESSW: + debug_printf("\t\t.name = SVGA3D_TS_ADDRESSW\n"); + break; + case SVGA3D_TS_GAMMA: + debug_printf("\t\t.name = SVGA3D_TS_GAMMA\n"); + break; + case SVGA3D_TS_BUMPENVLSCALE: + debug_printf("\t\t.name = SVGA3D_TS_BUMPENVLSCALE\n"); + break; + case SVGA3D_TS_BUMPENVLOFFSET: + debug_printf("\t\t.name = SVGA3D_TS_BUMPENVLOFFSET\n"); + break; + case SVGA3D_TS_COLORARG0: + debug_printf("\t\t.name = SVGA3D_TS_COLORARG0\n"); + break; + case SVGA3D_TS_ALPHAARG0: + debug_printf("\t\t.name = SVGA3D_TS_ALPHAARG0\n"); + break; + case SVGA3D_TS_MAX: + debug_printf("\t\t.name = SVGA3D_TS_MAX\n"); + break; + default: + debug_printf("\t\t.name = %i\n", (*cmd).name); + break; + } + debug_printf("\t\t.value = %u\n", (*cmd).value); + debug_printf("\t\t.floatValue = %f\n", (*cmd).floatValue); +} + +static void +dump_SVGA3dCopyBox(const SVGA3dCopyBox *cmd) +{ + debug_printf("\t\t.x = %u\n", (*cmd).x); + debug_printf("\t\t.y = %u\n", (*cmd).y); + debug_printf("\t\t.z = %u\n", (*cmd).z); + debug_printf("\t\t.w = %u\n", (*cmd).w); + debug_printf("\t\t.h = %u\n", (*cmd).h); + debug_printf("\t\t.d = %u\n", (*cmd).d); + debug_printf("\t\t.srcx = %u\n", (*cmd).srcx); + debug_printf("\t\t.srcy = %u\n", (*cmd).srcy); + debug_printf("\t\t.srcz = %u\n", (*cmd).srcz); +} + +static void +dump_SVGA3dCmdSetClipPlane(const SVGA3dCmdSetClipPlane *cmd) +{ + debug_printf("\t\t.cid = %u\n", (*cmd).cid); + debug_printf("\t\t.index = %u\n", (*cmd).index); + debug_printf("\t\t.plane[0] = %f\n", (*cmd).plane[0]); + debug_printf("\t\t.plane[1] = %f\n", (*cmd).plane[1]); + debug_printf("\t\t.plane[2] = %f\n", (*cmd).plane[2]); + debug_printf("\t\t.plane[3] = %f\n", (*cmd).plane[3]); +} + +static void +dump_SVGA3dCmdWaitForQuery(const SVGA3dCmdWaitForQuery *cmd) +{ + debug_printf("\t\t.cid = %u\n", (*cmd).cid); + switch((*cmd).type) { + case SVGA3D_QUERYTYPE_OCCLUSION: + debug_printf("\t\t.type = SVGA3D_QUERYTYPE_OCCLUSION\n"); + break; + case SVGA3D_QUERYTYPE_MAX: + debug_printf("\t\t.type = SVGA3D_QUERYTYPE_MAX\n"); + break; + default: + debug_printf("\t\t.type = %i\n", (*cmd).type); + break; + } + debug_printf("\t\t.guestResult.gmrId = %u\n", (*cmd).guestResult.gmrId); + debug_printf("\t\t.guestResult.offset = %u\n", (*cmd).guestResult.offset); +} + +static void +dump_SVGA3dCmdSetRenderTarget(const SVGA3dCmdSetRenderTarget *cmd) +{ + debug_printf("\t\t.cid = %u\n", (*cmd).cid); + switch((*cmd).type) { + case SVGA3D_RT_DEPTH: + debug_printf("\t\t.type = SVGA3D_RT_DEPTH\n"); + break; + case SVGA3D_RT_STENCIL: + debug_printf("\t\t.type = SVGA3D_RT_STENCIL\n"); + break; + default: + debug_printf("\t\t.type = SVGA3D_RT_COLOR%u\n", (*cmd).type - SVGA3D_RT_COLOR0); + break; + } + debug_printf("\t\t.target.sid = %u\n", (*cmd).target.sid); + debug_printf("\t\t.target.face = %u\n", (*cmd).target.face); + debug_printf("\t\t.target.mipmap = %u\n", (*cmd).target.mipmap); +} + +static void +dump_SVGA3dCmdSetTextureState(const SVGA3dCmdSetTextureState *cmd) +{ + debug_printf("\t\t.cid = %u\n", (*cmd).cid); +} + +static void +dump_SVGA3dCmdSurfaceCopy(const SVGA3dCmdSurfaceCopy *cmd) +{ + debug_printf("\t\t.src.sid = %u\n", (*cmd).src.sid); + debug_printf("\t\t.src.face = %u\n", (*cmd).src.face); + debug_printf("\t\t.src.mipmap = %u\n", (*cmd).src.mipmap); + debug_printf("\t\t.dest.sid = %u\n", (*cmd).dest.sid); + debug_printf("\t\t.dest.face = %u\n", (*cmd).dest.face); + debug_printf("\t\t.dest.mipmap = %u\n", (*cmd).dest.mipmap); +} + +static void +dump_SVGA3dCmdSetMaterial(const SVGA3dCmdSetMaterial *cmd) +{ + debug_printf("\t\t.cid = %u\n", (*cmd).cid); + switch((*cmd).face) { + case SVGA3D_FACE_INVALID: + debug_printf("\t\t.face = SVGA3D_FACE_INVALID\n"); + break; + case SVGA3D_FACE_NONE: + debug_printf("\t\t.face = SVGA3D_FACE_NONE\n"); + break; + case SVGA3D_FACE_FRONT: + debug_printf("\t\t.face = SVGA3D_FACE_FRONT\n"); + break; + case SVGA3D_FACE_BACK: + debug_printf("\t\t.face = SVGA3D_FACE_BACK\n"); + break; + case SVGA3D_FACE_FRONT_BACK: + debug_printf("\t\t.face = SVGA3D_FACE_FRONT_BACK\n"); + break; + case SVGA3D_FACE_MAX: + debug_printf("\t\t.face = SVGA3D_FACE_MAX\n"); + break; + default: + debug_printf("\t\t.face = %i\n", (*cmd).face); + break; + } + debug_printf("\t\t.material.diffuse[0] = %f\n", (*cmd).material.diffuse[0]); + debug_printf("\t\t.material.diffuse[1] = %f\n", (*cmd).material.diffuse[1]); + debug_printf("\t\t.material.diffuse[2] = %f\n", (*cmd).material.diffuse[2]); + debug_printf("\t\t.material.diffuse[3] = %f\n", (*cmd).material.diffuse[3]); + debug_printf("\t\t.material.ambient[0] = %f\n", (*cmd).material.ambient[0]); + debug_printf("\t\t.material.ambient[1] = %f\n", (*cmd).material.ambient[1]); + debug_printf("\t\t.material.ambient[2] = %f\n", (*cmd).material.ambient[2]); + debug_printf("\t\t.material.ambient[3] = %f\n", (*cmd).material.ambient[3]); + debug_printf("\t\t.material.specular[0] = %f\n", (*cmd).material.specular[0]); + debug_printf("\t\t.material.specular[1] = %f\n", (*cmd).material.specular[1]); + debug_printf("\t\t.material.specular[2] = %f\n", (*cmd).material.specular[2]); + debug_printf("\t\t.material.specular[3] = %f\n", (*cmd).material.specular[3]); + debug_printf("\t\t.material.emissive[0] = %f\n", (*cmd).material.emissive[0]); + debug_printf("\t\t.material.emissive[1] = %f\n", (*cmd).material.emissive[1]); + debug_printf("\t\t.material.emissive[2] = %f\n", (*cmd).material.emissive[2]); + debug_printf("\t\t.material.emissive[3] = %f\n", (*cmd).material.emissive[3]); + debug_printf("\t\t.material.shininess = %f\n", (*cmd).material.shininess); +} + +static void +dump_SVGA3dCmdSetLightData(const SVGA3dCmdSetLightData *cmd) +{ + debug_printf("\t\t.cid = %u\n", (*cmd).cid); + debug_printf("\t\t.index = %u\n", (*cmd).index); + switch((*cmd).data.type) { + case SVGA3D_LIGHTTYPE_INVALID: + debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_INVALID\n"); + break; + case SVGA3D_LIGHTTYPE_POINT: + debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_POINT\n"); + break; + case SVGA3D_LIGHTTYPE_SPOT1: + debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_SPOT1\n"); + break; + case SVGA3D_LIGHTTYPE_SPOT2: + debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_SPOT2\n"); + break; + case SVGA3D_LIGHTTYPE_DIRECTIONAL: + debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_DIRECTIONAL\n"); + break; + case SVGA3D_LIGHTTYPE_MAX: + debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_MAX\n"); + break; + default: + debug_printf("\t\t.data.type = %i\n", (*cmd).data.type); + break; + } + debug_printf("\t\t.data.inWorldSpace = %u\n", (*cmd).data.inWorldSpace); + debug_printf("\t\t.data.diffuse[0] = %f\n", (*cmd).data.diffuse[0]); + debug_printf("\t\t.data.diffuse[1] = %f\n", (*cmd).data.diffuse[1]); + debug_printf("\t\t.data.diffuse[2] = %f\n", (*cmd).data.diffuse[2]); + debug_printf("\t\t.data.diffuse[3] = %f\n", (*cmd).data.diffuse[3]); + debug_printf("\t\t.data.specular[0] = %f\n", (*cmd).data.specular[0]); + debug_printf("\t\t.data.specular[1] = %f\n", (*cmd).data.specular[1]); + debug_printf("\t\t.data.specular[2] = %f\n", (*cmd).data.specular[2]); + debug_printf("\t\t.data.specular[3] = %f\n", (*cmd).data.specular[3]); + debug_printf("\t\t.data.ambient[0] = %f\n", (*cmd).data.ambient[0]); + debug_printf("\t\t.data.ambient[1] = %f\n", (*cmd).data.ambient[1]); + debug_printf("\t\t.data.ambient[2] = %f\n", (*cmd).data.ambient[2]); + debug_printf("\t\t.data.ambient[3] = %f\n", (*cmd).data.ambient[3]); + debug_printf("\t\t.data.position[0] = %f\n", (*cmd).data.position[0]); + debug_printf("\t\t.data.position[1] = %f\n", (*cmd).data.position[1]); + debug_printf("\t\t.data.position[2] = %f\n", (*cmd).data.position[2]); + debug_printf("\t\t.data.position[3] = %f\n", (*cmd).data.position[3]); + debug_printf("\t\t.data.direction[0] = %f\n", (*cmd).data.direction[0]); + debug_printf("\t\t.data.direction[1] = %f\n", (*cmd).data.direction[1]); + debug_printf("\t\t.data.direction[2] = %f\n", (*cmd).data.direction[2]); + debug_printf("\t\t.data.direction[3] = %f\n", (*cmd).data.direction[3]); + debug_printf("\t\t.data.range = %f\n", (*cmd).data.range); + debug_printf("\t\t.data.falloff = %f\n", (*cmd).data.falloff); + debug_printf("\t\t.data.attenuation0 = %f\n", (*cmd).data.attenuation0); + debug_printf("\t\t.data.attenuation1 = %f\n", (*cmd).data.attenuation1); + debug_printf("\t\t.data.attenuation2 = %f\n", (*cmd).data.attenuation2); + debug_printf("\t\t.data.theta = %f\n", (*cmd).data.theta); + debug_printf("\t\t.data.phi = %f\n", (*cmd).data.phi); +} + +static void +dump_SVGA3dCmdSetViewport(const SVGA3dCmdSetViewport *cmd) +{ + debug_printf("\t\t.cid = %u\n", (*cmd).cid); + debug_printf("\t\t.rect.x = %u\n", (*cmd).rect.x); + debug_printf("\t\t.rect.y = %u\n", (*cmd).rect.y); + debug_printf("\t\t.rect.w = %u\n", (*cmd).rect.w); + debug_printf("\t\t.rect.h = %u\n", (*cmd).rect.h); +} + +static void +dump_SVGA3dCmdSetScissorRect(const SVGA3dCmdSetScissorRect *cmd) +{ + debug_printf("\t\t.cid = %u\n", (*cmd).cid); + debug_printf("\t\t.rect.x = %u\n", (*cmd).rect.x); + debug_printf("\t\t.rect.y = %u\n", (*cmd).rect.y); + debug_printf("\t\t.rect.w = %u\n", (*cmd).rect.w); + debug_printf("\t\t.rect.h = %u\n", (*cmd).rect.h); +} + +static void +dump_SVGA3dCopyRect(const SVGA3dCopyRect *cmd) +{ + debug_printf("\t\t.x = %u\n", (*cmd).x); + debug_printf("\t\t.y = %u\n", (*cmd).y); + debug_printf("\t\t.w = %u\n", (*cmd).w); + debug_printf("\t\t.h = %u\n", (*cmd).h); + debug_printf("\t\t.srcx = %u\n", (*cmd).srcx); + debug_printf("\t\t.srcy = %u\n", (*cmd).srcy); +} + +static void +dump_SVGA3dCmdSetShader(const SVGA3dCmdSetShader *cmd) +{ + debug_printf("\t\t.cid = %u\n", (*cmd).cid); + switch((*cmd).type) { + case SVGA3D_SHADERTYPE_COMPILED_DX8: + debug_printf("\t\t.type = SVGA3D_SHADERTYPE_COMPILED_DX8\n"); + break; + case SVGA3D_SHADERTYPE_VS: + debug_printf("\t\t.type = SVGA3D_SHADERTYPE_VS\n"); + break; + case SVGA3D_SHADERTYPE_PS: + debug_printf("\t\t.type = SVGA3D_SHADERTYPE_PS\n"); + break; + case SVGA3D_SHADERTYPE_MAX: + debug_printf("\t\t.type = SVGA3D_SHADERTYPE_MAX\n"); + break; + default: + debug_printf("\t\t.type = %i\n", (*cmd).type); + break; + } + debug_printf("\t\t.shid = %u\n", (*cmd).shid); +} + +static void +dump_SVGA3dCmdEndQuery(const SVGA3dCmdEndQuery *cmd) +{ + debug_printf("\t\t.cid = %u\n", (*cmd).cid); + switch((*cmd).type) { + case SVGA3D_QUERYTYPE_OCCLUSION: + debug_printf("\t\t.type = SVGA3D_QUERYTYPE_OCCLUSION\n"); + break; + case SVGA3D_QUERYTYPE_MAX: + debug_printf("\t\t.type = SVGA3D_QUERYTYPE_MAX\n"); + break; + default: + debug_printf("\t\t.type = %i\n", (*cmd).type); + break; + } + debug_printf("\t\t.guestResult.gmrId = %u\n", (*cmd).guestResult.gmrId); + debug_printf("\t\t.guestResult.offset = %u\n", (*cmd).guestResult.offset); +} + +static void +dump_SVGA3dSize(const SVGA3dSize *cmd) +{ + debug_printf("\t\t.width = %u\n", (*cmd).width); + debug_printf("\t\t.height = %u\n", (*cmd).height); + debug_printf("\t\t.depth = %u\n", (*cmd).depth); +} + +static void +dump_SVGA3dCmdDestroySurface(const SVGA3dCmdDestroySurface *cmd) +{ + debug_printf("\t\t.sid = %u\n", (*cmd).sid); +} + +static void +dump_SVGA3dCmdDefineContext(const SVGA3dCmdDefineContext *cmd) +{ + debug_printf("\t\t.cid = %u\n", (*cmd).cid); +} + +static void +dump_SVGA3dRect(const SVGA3dRect *cmd) +{ + debug_printf("\t\t.x = %u\n", (*cmd).x); + debug_printf("\t\t.y = %u\n", (*cmd).y); + debug_printf("\t\t.w = %u\n", (*cmd).w); + debug_printf("\t\t.h = %u\n", (*cmd).h); +} + +static void +dump_SVGA3dCmdBeginQuery(const SVGA3dCmdBeginQuery *cmd) +{ + debug_printf("\t\t.cid = %u\n", (*cmd).cid); + switch((*cmd).type) { + case SVGA3D_QUERYTYPE_OCCLUSION: + debug_printf("\t\t.type = SVGA3D_QUERYTYPE_OCCLUSION\n"); + break; + case SVGA3D_QUERYTYPE_MAX: + debug_printf("\t\t.type = SVGA3D_QUERYTYPE_MAX\n"); + break; + default: + debug_printf("\t\t.type = %i\n", (*cmd).type); + break; + } +} + +static void +dump_SVGA3dRenderState(const SVGA3dRenderState *cmd) +{ + switch((*cmd).state) { + case SVGA3D_RS_INVALID: + debug_printf("\t\t.state = SVGA3D_RS_INVALID\n"); + break; + case SVGA3D_RS_ZENABLE: + debug_printf("\t\t.state = SVGA3D_RS_ZENABLE\n"); + break; + case SVGA3D_RS_ZWRITEENABLE: + debug_printf("\t\t.state = SVGA3D_RS_ZWRITEENABLE\n"); + break; + case SVGA3D_RS_ALPHATESTENABLE: + debug_printf("\t\t.state = SVGA3D_RS_ALPHATESTENABLE\n"); + break; + case SVGA3D_RS_DITHERENABLE: + debug_printf("\t\t.state = SVGA3D_RS_DITHERENABLE\n"); + break; + case SVGA3D_RS_BLENDENABLE: + debug_printf("\t\t.state = SVGA3D_RS_BLENDENABLE\n"); + break; + case SVGA3D_RS_FOGENABLE: + debug_printf("\t\t.state = SVGA3D_RS_FOGENABLE\n"); + break; + case SVGA3D_RS_SPECULARENABLE: + debug_printf("\t\t.state = SVGA3D_RS_SPECULARENABLE\n"); + break; + case SVGA3D_RS_STENCILENABLE: + debug_printf("\t\t.state = SVGA3D_RS_STENCILENABLE\n"); + break; + case SVGA3D_RS_LIGHTINGENABLE: + debug_printf("\t\t.state = SVGA3D_RS_LIGHTINGENABLE\n"); + break; + case SVGA3D_RS_NORMALIZENORMALS: + debug_printf("\t\t.state = SVGA3D_RS_NORMALIZENORMALS\n"); + break; + case SVGA3D_RS_POINTSPRITEENABLE: + debug_printf("\t\t.state = SVGA3D_RS_POINTSPRITEENABLE\n"); + break; + case SVGA3D_RS_POINTSCALEENABLE: + debug_printf("\t\t.state = SVGA3D_RS_POINTSCALEENABLE\n"); + break; + case SVGA3D_RS_STENCILREF: + debug_printf("\t\t.state = SVGA3D_RS_STENCILREF\n"); + break; + case SVGA3D_RS_STENCILMASK: + debug_printf("\t\t.state = SVGA3D_RS_STENCILMASK\n"); + break; + case SVGA3D_RS_STENCILWRITEMASK: + debug_printf("\t\t.state = SVGA3D_RS_STENCILWRITEMASK\n"); + break; + case SVGA3D_RS_FOGSTART: + debug_printf("\t\t.state = SVGA3D_RS_FOGSTART\n"); + break; + case SVGA3D_RS_FOGEND: + debug_printf("\t\t.state = SVGA3D_RS_FOGEND\n"); + break; + case SVGA3D_RS_FOGDENSITY: + debug_printf("\t\t.state = SVGA3D_RS_FOGDENSITY\n"); + break; + case SVGA3D_RS_POINTSIZE: + debug_printf("\t\t.state = SVGA3D_RS_POINTSIZE\n"); + break; + case SVGA3D_RS_POINTSIZEMIN: + debug_printf("\t\t.state = SVGA3D_RS_POINTSIZEMIN\n"); + break; + case SVGA3D_RS_POINTSIZEMAX: + debug_printf("\t\t.state = SVGA3D_RS_POINTSIZEMAX\n"); + break; + case SVGA3D_RS_POINTSCALE_A: + debug_printf("\t\t.state = SVGA3D_RS_POINTSCALE_A\n"); + break; + case SVGA3D_RS_POINTSCALE_B: + debug_printf("\t\t.state = SVGA3D_RS_POINTSCALE_B\n"); + break; + case SVGA3D_RS_POINTSCALE_C: + debug_printf("\t\t.state = SVGA3D_RS_POINTSCALE_C\n"); + break; + case SVGA3D_RS_FOGCOLOR: + debug_printf("\t\t.state = SVGA3D_RS_FOGCOLOR\n"); + break; + case SVGA3D_RS_AMBIENT: + debug_printf("\t\t.state = SVGA3D_RS_AMBIENT\n"); + break; + case SVGA3D_RS_CLIPPLANEENABLE: + debug_printf("\t\t.state = SVGA3D_RS_CLIPPLANEENABLE\n"); + break; + case SVGA3D_RS_FOGMODE: + debug_printf("\t\t.state = SVGA3D_RS_FOGMODE\n"); + break; + case SVGA3D_RS_FILLMODE: + debug_printf("\t\t.state = SVGA3D_RS_FILLMODE\n"); + break; + case SVGA3D_RS_SHADEMODE: + debug_printf("\t\t.state = SVGA3D_RS_SHADEMODE\n"); + break; + case SVGA3D_RS_LINEPATTERN: + debug_printf("\t\t.state = SVGA3D_RS_LINEPATTERN\n"); + break; + case SVGA3D_RS_SRCBLEND: + debug_printf("\t\t.state = SVGA3D_RS_SRCBLEND\n"); + break; + case SVGA3D_RS_DSTBLEND: + debug_printf("\t\t.state = SVGA3D_RS_DSTBLEND\n"); + break; + case SVGA3D_RS_BLENDEQUATION: + debug_printf("\t\t.state = SVGA3D_RS_BLENDEQUATION\n"); + break; + case SVGA3D_RS_CULLMODE: + debug_printf("\t\t.state = SVGA3D_RS_CULLMODE\n"); + break; + case SVGA3D_RS_ZFUNC: + debug_printf("\t\t.state = SVGA3D_RS_ZFUNC\n"); + break; + case SVGA3D_RS_ALPHAFUNC: + debug_printf("\t\t.state = SVGA3D_RS_ALPHAFUNC\n"); + break; + case SVGA3D_RS_STENCILFUNC: + debug_printf("\t\t.state = SVGA3D_RS_STENCILFUNC\n"); + break; + case SVGA3D_RS_STENCILFAIL: + debug_printf("\t\t.state = SVGA3D_RS_STENCILFAIL\n"); + break; + case SVGA3D_RS_STENCILZFAIL: + debug_printf("\t\t.state = SVGA3D_RS_STENCILZFAIL\n"); + break; + case SVGA3D_RS_STENCILPASS: + debug_printf("\t\t.state = SVGA3D_RS_STENCILPASS\n"); + break; + case SVGA3D_RS_ALPHAREF: + debug_printf("\t\t.state = SVGA3D_RS_ALPHAREF\n"); + break; + case SVGA3D_RS_FRONTWINDING: + debug_printf("\t\t.state = SVGA3D_RS_FRONTWINDING\n"); + break; + case SVGA3D_RS_COORDINATETYPE: + debug_printf("\t\t.state = SVGA3D_RS_COORDINATETYPE\n"); + break; + case SVGA3D_RS_ZBIAS: + debug_printf("\t\t.state = SVGA3D_RS_ZBIAS\n"); + break; + case SVGA3D_RS_RANGEFOGENABLE: + debug_printf("\t\t.state = SVGA3D_RS_RANGEFOGENABLE\n"); + break; + case SVGA3D_RS_COLORWRITEENABLE: + debug_printf("\t\t.state = SVGA3D_RS_COLORWRITEENABLE\n"); + break; + case SVGA3D_RS_VERTEXMATERIALENABLE: + debug_printf("\t\t.state = SVGA3D_RS_VERTEXMATERIALENABLE\n"); + break; + case SVGA3D_RS_DIFFUSEMATERIALSOURCE: + debug_printf("\t\t.state = SVGA3D_RS_DIFFUSEMATERIALSOURCE\n"); + break; + case SVGA3D_RS_SPECULARMATERIALSOURCE: + debug_printf("\t\t.state = SVGA3D_RS_SPECULARMATERIALSOURCE\n"); + break; + case SVGA3D_RS_AMBIENTMATERIALSOURCE: + debug_printf("\t\t.state = SVGA3D_RS_AMBIENTMATERIALSOURCE\n"); + break; + case SVGA3D_RS_EMISSIVEMATERIALSOURCE: + debug_printf("\t\t.state = SVGA3D_RS_EMISSIVEMATERIALSOURCE\n"); + break; + case SVGA3D_RS_TEXTUREFACTOR: + debug_printf("\t\t.state = SVGA3D_RS_TEXTUREFACTOR\n"); + break; + case SVGA3D_RS_LOCALVIEWER: + debug_printf("\t\t.state = SVGA3D_RS_LOCALVIEWER\n"); + break; + case SVGA3D_RS_SCISSORTESTENABLE: + debug_printf("\t\t.state = SVGA3D_RS_SCISSORTESTENABLE\n"); + break; + case SVGA3D_RS_BLENDCOLOR: + debug_printf("\t\t.state = SVGA3D_RS_BLENDCOLOR\n"); + break; + case SVGA3D_RS_STENCILENABLE2SIDED: + debug_printf("\t\t.state = SVGA3D_RS_STENCILENABLE2SIDED\n"); + break; + case SVGA3D_RS_CCWSTENCILFUNC: + debug_printf("\t\t.state = SVGA3D_RS_CCWSTENCILFUNC\n"); + break; + case SVGA3D_RS_CCWSTENCILFAIL: + debug_printf("\t\t.state = SVGA3D_RS_CCWSTENCILFAIL\n"); + break; + case SVGA3D_RS_CCWSTENCILZFAIL: + debug_printf("\t\t.state = SVGA3D_RS_CCWSTENCILZFAIL\n"); + break; + case SVGA3D_RS_CCWSTENCILPASS: + debug_printf("\t\t.state = SVGA3D_RS_CCWSTENCILPASS\n"); + break; + case SVGA3D_RS_VERTEXBLEND: + debug_printf("\t\t.state = SVGA3D_RS_VERTEXBLEND\n"); + break; + case SVGA3D_RS_SLOPESCALEDEPTHBIAS: + debug_printf("\t\t.state = SVGA3D_RS_SLOPESCALEDEPTHBIAS\n"); + break; + case SVGA3D_RS_DEPTHBIAS: + debug_printf("\t\t.state = SVGA3D_RS_DEPTHBIAS\n"); + break; + case SVGA3D_RS_OUTPUTGAMMA: + debug_printf("\t\t.state = SVGA3D_RS_OUTPUTGAMMA\n"); + break; + case SVGA3D_RS_ZVISIBLE: + debug_printf("\t\t.state = SVGA3D_RS_ZVISIBLE\n"); + break; + case SVGA3D_RS_LASTPIXEL: + debug_printf("\t\t.state = SVGA3D_RS_LASTPIXEL\n"); + break; + case SVGA3D_RS_CLIPPING: + debug_printf("\t\t.state = SVGA3D_RS_CLIPPING\n"); + break; + case SVGA3D_RS_WRAP0: + debug_printf("\t\t.state = SVGA3D_RS_WRAP0\n"); + break; + case SVGA3D_RS_WRAP1: + debug_printf("\t\t.state = SVGA3D_RS_WRAP1\n"); + break; + case SVGA3D_RS_WRAP2: + debug_printf("\t\t.state = SVGA3D_RS_WRAP2\n"); + break; + case SVGA3D_RS_WRAP3: + debug_printf("\t\t.state = SVGA3D_RS_WRAP3\n"); + break; + case SVGA3D_RS_WRAP4: + debug_printf("\t\t.state = SVGA3D_RS_WRAP4\n"); + break; + case SVGA3D_RS_WRAP5: + debug_printf("\t\t.state = SVGA3D_RS_WRAP5\n"); + break; + case SVGA3D_RS_WRAP6: + debug_printf("\t\t.state = SVGA3D_RS_WRAP6\n"); + break; + case SVGA3D_RS_WRAP7: + debug_printf("\t\t.state = SVGA3D_RS_WRAP7\n"); + break; + case SVGA3D_RS_WRAP8: + debug_printf("\t\t.state = SVGA3D_RS_WRAP8\n"); + break; + case SVGA3D_RS_WRAP9: + debug_printf("\t\t.state = SVGA3D_RS_WRAP9\n"); + break; + case SVGA3D_RS_WRAP10: + debug_printf("\t\t.state = SVGA3D_RS_WRAP10\n"); + break; + case SVGA3D_RS_WRAP11: + debug_printf("\t\t.state = SVGA3D_RS_WRAP11\n"); + break; + case SVGA3D_RS_WRAP12: + debug_printf("\t\t.state = SVGA3D_RS_WRAP12\n"); + break; + case SVGA3D_RS_WRAP13: + debug_printf("\t\t.state = SVGA3D_RS_WRAP13\n"); + break; + case SVGA3D_RS_WRAP14: + debug_printf("\t\t.state = SVGA3D_RS_WRAP14\n"); + break; + case SVGA3D_RS_WRAP15: + debug_printf("\t\t.state = SVGA3D_RS_WRAP15\n"); + break; + case SVGA3D_RS_MULTISAMPLEANTIALIAS: + debug_printf("\t\t.state = SVGA3D_RS_MULTISAMPLEANTIALIAS\n"); + break; + case SVGA3D_RS_MULTISAMPLEMASK: + debug_printf("\t\t.state = SVGA3D_RS_MULTISAMPLEMASK\n"); + break; + case SVGA3D_RS_INDEXEDVERTEXBLENDENABLE: + debug_printf("\t\t.state = SVGA3D_RS_INDEXEDVERTEXBLENDENABLE\n"); + break; + case SVGA3D_RS_TWEENFACTOR: + debug_printf("\t\t.state = SVGA3D_RS_TWEENFACTOR\n"); + break; + case SVGA3D_RS_ANTIALIASEDLINEENABLE: + debug_printf("\t\t.state = SVGA3D_RS_ANTIALIASEDLINEENABLE\n"); + break; + case SVGA3D_RS_COLORWRITEENABLE1: + debug_printf("\t\t.state = SVGA3D_RS_COLORWRITEENABLE1\n"); + break; + case SVGA3D_RS_COLORWRITEENABLE2: + debug_printf("\t\t.state = SVGA3D_RS_COLORWRITEENABLE2\n"); + break; + case SVGA3D_RS_COLORWRITEENABLE3: + debug_printf("\t\t.state = SVGA3D_RS_COLORWRITEENABLE3\n"); + break; + case SVGA3D_RS_SEPARATEALPHABLENDENABLE: + debug_printf("\t\t.state = SVGA3D_RS_SEPARATEALPHABLENDENABLE\n"); + break; + case SVGA3D_RS_SRCBLENDALPHA: + debug_printf("\t\t.state = SVGA3D_RS_SRCBLENDALPHA\n"); + break; + case SVGA3D_RS_DSTBLENDALPHA: + debug_printf("\t\t.state = SVGA3D_RS_DSTBLENDALPHA\n"); + break; + case SVGA3D_RS_BLENDEQUATIONALPHA: + debug_printf("\t\t.state = SVGA3D_RS_BLENDEQUATIONALPHA\n"); + break; + case SVGA3D_RS_MAX: + debug_printf("\t\t.state = SVGA3D_RS_MAX\n"); + break; + default: + debug_printf("\t\t.state = %i\n", (*cmd).state); + break; + } + debug_printf("\t\t.uintValue = %u\n", (*cmd).uintValue); + debug_printf("\t\t.floatValue = %f\n", (*cmd).floatValue); +} + +static void +dump_SVGA3dVertexDivisor(const SVGA3dVertexDivisor *cmd) +{ + debug_printf("\t\t.value = %u\n", (*cmd).value); + debug_printf("\t\t.count = %u\n", (*cmd).count); + debug_printf("\t\t.indexedData = %u\n", (*cmd).indexedData); + debug_printf("\t\t.instanceData = %u\n", (*cmd).instanceData); +} + +static void +dump_SVGA3dCmdDefineShader(const SVGA3dCmdDefineShader *cmd) +{ + debug_printf("\t\t.cid = %u\n", (*cmd).cid); + debug_printf("\t\t.shid = %u\n", (*cmd).shid); + switch((*cmd).type) { + case SVGA3D_SHADERTYPE_COMPILED_DX8: + debug_printf("\t\t.type = SVGA3D_SHADERTYPE_COMPILED_DX8\n"); + break; + case SVGA3D_SHADERTYPE_VS: + debug_printf("\t\t.type = SVGA3D_SHADERTYPE_VS\n"); + break; + case SVGA3D_SHADERTYPE_PS: + debug_printf("\t\t.type = SVGA3D_SHADERTYPE_PS\n"); + break; + case SVGA3D_SHADERTYPE_MAX: + debug_printf("\t\t.type = SVGA3D_SHADERTYPE_MAX\n"); + break; + default: + debug_printf("\t\t.type = %i\n", (*cmd).type); + break; + } +} + +static void +dump_SVGA3dCmdSetShaderConst(const SVGA3dCmdSetShaderConst *cmd) +{ + debug_printf("\t\t.cid = %u\n", (*cmd).cid); + debug_printf("\t\t.reg = %u\n", (*cmd).reg); + switch((*cmd).type) { + case SVGA3D_SHADERTYPE_COMPILED_DX8: + debug_printf("\t\t.type = SVGA3D_SHADERTYPE_COMPILED_DX8\n"); + break; + case SVGA3D_SHADERTYPE_VS: + debug_printf("\t\t.type = SVGA3D_SHADERTYPE_VS\n"); + break; + case SVGA3D_SHADERTYPE_PS: + debug_printf("\t\t.type = SVGA3D_SHADERTYPE_PS\n"); + break; + case SVGA3D_SHADERTYPE_MAX: + debug_printf("\t\t.type = SVGA3D_SHADERTYPE_MAX\n"); + break; + default: + debug_printf("\t\t.type = %i\n", (*cmd).type); + break; + } + switch((*cmd).ctype) { + case SVGA3D_CONST_TYPE_FLOAT: + debug_printf("\t\t.ctype = SVGA3D_CONST_TYPE_FLOAT\n"); + debug_printf("\t\t.values[0] = %f\n", *(const float *)&(*cmd).values[0]); + debug_printf("\t\t.values[1] = %f\n", *(const float *)&(*cmd).values[1]); + debug_printf("\t\t.values[2] = %f\n", *(const float *)&(*cmd).values[2]); + debug_printf("\t\t.values[3] = %f\n", *(const float *)&(*cmd).values[3]); + break; + case SVGA3D_CONST_TYPE_INT: + debug_printf("\t\t.ctype = SVGA3D_CONST_TYPE_INT\n"); + debug_printf("\t\t.values[0] = %u\n", (*cmd).values[0]); + debug_printf("\t\t.values[1] = %u\n", (*cmd).values[1]); + debug_printf("\t\t.values[2] = %u\n", (*cmd).values[2]); + debug_printf("\t\t.values[3] = %u\n", (*cmd).values[3]); + break; + case SVGA3D_CONST_TYPE_BOOL: + debug_printf("\t\t.ctype = SVGA3D_CONST_TYPE_BOOL\n"); + debug_printf("\t\t.values[0] = %u\n", (*cmd).values[0]); + debug_printf("\t\t.values[1] = %u\n", (*cmd).values[1]); + debug_printf("\t\t.values[2] = %u\n", (*cmd).values[2]); + debug_printf("\t\t.values[3] = %u\n", (*cmd).values[3]); + break; + default: + debug_printf("\t\t.ctype = %i\n", (*cmd).ctype); + debug_printf("\t\t.values[0] = %u\n", (*cmd).values[0]); + debug_printf("\t\t.values[1] = %u\n", (*cmd).values[1]); + debug_printf("\t\t.values[2] = %u\n", (*cmd).values[2]); + debug_printf("\t\t.values[3] = %u\n", (*cmd).values[3]); + break; + } +} + +static void +dump_SVGA3dCmdSetZRange(const SVGA3dCmdSetZRange *cmd) +{ + debug_printf("\t\t.cid = %u\n", (*cmd).cid); + debug_printf("\t\t.zRange.min = %f\n", (*cmd).zRange.min); + debug_printf("\t\t.zRange.max = %f\n", (*cmd).zRange.max); +} + +static void +dump_SVGA3dCmdDrawPrimitives(const SVGA3dCmdDrawPrimitives *cmd) +{ + debug_printf("\t\t.cid = %u\n", (*cmd).cid); + debug_printf("\t\t.numVertexDecls = %u\n", (*cmd).numVertexDecls); + debug_printf("\t\t.numRanges = %u\n", (*cmd).numRanges); +} + +static void +dump_SVGA3dCmdSetLightEnabled(const SVGA3dCmdSetLightEnabled *cmd) +{ + debug_printf("\t\t.cid = %u\n", (*cmd).cid); + debug_printf("\t\t.index = %u\n", (*cmd).index); + debug_printf("\t\t.enabled = %u\n", (*cmd).enabled); +} + +static void +dump_SVGA3dPrimitiveRange(const SVGA3dPrimitiveRange *cmd) +{ + switch((*cmd).primType) { + case SVGA3D_PRIMITIVE_INVALID: + debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_INVALID\n"); + break; + case SVGA3D_PRIMITIVE_TRIANGLELIST: + debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_TRIANGLELIST\n"); + break; + case SVGA3D_PRIMITIVE_POINTLIST: + debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_POINTLIST\n"); + break; + case SVGA3D_PRIMITIVE_LINELIST: + debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_LINELIST\n"); + break; + case SVGA3D_PRIMITIVE_LINESTRIP: + debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_LINESTRIP\n"); + break; + case SVGA3D_PRIMITIVE_TRIANGLESTRIP: + debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_TRIANGLESTRIP\n"); + break; + case SVGA3D_PRIMITIVE_TRIANGLEFAN: + debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_TRIANGLEFAN\n"); + break; + case SVGA3D_PRIMITIVE_MAX: + debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_MAX\n"); + break; + default: + debug_printf("\t\t.primType = %i\n", (*cmd).primType); + break; + } + debug_printf("\t\t.primitiveCount = %u\n", (*cmd).primitiveCount); + debug_printf("\t\t.indexArray.surfaceId = %u\n", (*cmd).indexArray.surfaceId); + debug_printf("\t\t.indexArray.offset = %u\n", (*cmd).indexArray.offset); + debug_printf("\t\t.indexArray.stride = %u\n", (*cmd).indexArray.stride); + debug_printf("\t\t.indexWidth = %u\n", (*cmd).indexWidth); + debug_printf("\t\t.indexBias = %i\n", (*cmd).indexBias); +} + +static void +dump_SVGA3dCmdPresent(const SVGA3dCmdPresent *cmd) +{ + debug_printf("\t\t.sid = %u\n", (*cmd).sid); +} + +static void +dump_SVGA3dCmdSetRenderState(const SVGA3dCmdSetRenderState *cmd) +{ + debug_printf("\t\t.cid = %u\n", (*cmd).cid); +} + +static void +dump_SVGA3dCmdSurfaceStretchBlt(const SVGA3dCmdSurfaceStretchBlt *cmd) +{ + debug_printf("\t\t.src.sid = %u\n", (*cmd).src.sid); + debug_printf("\t\t.src.face = %u\n", (*cmd).src.face); + debug_printf("\t\t.src.mipmap = %u\n", (*cmd).src.mipmap); + debug_printf("\t\t.dest.sid = %u\n", (*cmd).dest.sid); + debug_printf("\t\t.dest.face = %u\n", (*cmd).dest.face); + debug_printf("\t\t.dest.mipmap = %u\n", (*cmd).dest.mipmap); + debug_printf("\t\t.boxSrc.x = %u\n", (*cmd).boxSrc.x); + debug_printf("\t\t.boxSrc.y = %u\n", (*cmd).boxSrc.y); + debug_printf("\t\t.boxSrc.z = %u\n", (*cmd).boxSrc.z); + debug_printf("\t\t.boxSrc.w = %u\n", (*cmd).boxSrc.w); + debug_printf("\t\t.boxSrc.h = %u\n", (*cmd).boxSrc.h); + debug_printf("\t\t.boxSrc.d = %u\n", (*cmd).boxSrc.d); + debug_printf("\t\t.boxDest.x = %u\n", (*cmd).boxDest.x); + debug_printf("\t\t.boxDest.y = %u\n", (*cmd).boxDest.y); + debug_printf("\t\t.boxDest.z = %u\n", (*cmd).boxDest.z); + debug_printf("\t\t.boxDest.w = %u\n", (*cmd).boxDest.w); + debug_printf("\t\t.boxDest.h = %u\n", (*cmd).boxDest.h); + debug_printf("\t\t.boxDest.d = %u\n", (*cmd).boxDest.d); + switch((*cmd).mode) { + case SVGA3D_STRETCH_BLT_POINT: + debug_printf("\t\t.mode = SVGA3D_STRETCH_BLT_POINT\n"); + break; + case SVGA3D_STRETCH_BLT_LINEAR: + debug_printf("\t\t.mode = SVGA3D_STRETCH_BLT_LINEAR\n"); + break; + case SVGA3D_STRETCH_BLT_MAX: + debug_printf("\t\t.mode = SVGA3D_STRETCH_BLT_MAX\n"); + break; + default: + debug_printf("\t\t.mode = %i\n", (*cmd).mode); + break; + } +} + +static void +dump_SVGA3dCmdSurfaceDMA(const SVGA3dCmdSurfaceDMA *cmd) +{ + debug_printf("\t\t.guest.ptr.gmrId = %u\n", (*cmd).guest.ptr.gmrId); + debug_printf("\t\t.guest.ptr.offset = %u\n", (*cmd).guest.ptr.offset); + debug_printf("\t\t.guest.pitch = %u\n", (*cmd).guest.pitch); + debug_printf("\t\t.host.sid = %u\n", (*cmd).host.sid); + debug_printf("\t\t.host.face = %u\n", (*cmd).host.face); + debug_printf("\t\t.host.mipmap = %u\n", (*cmd).host.mipmap); + switch((*cmd).transfer) { + case SVGA3D_WRITE_HOST_VRAM: + debug_printf("\t\t.transfer = SVGA3D_WRITE_HOST_VRAM\n"); + break; + case SVGA3D_READ_HOST_VRAM: + debug_printf("\t\t.transfer = SVGA3D_READ_HOST_VRAM\n"); + break; + default: + debug_printf("\t\t.transfer = %i\n", (*cmd).transfer); + break; + } +} + +static void +dump_SVGA3dCmdSurfaceDMASuffix(const SVGA3dCmdSurfaceDMASuffix *cmd) +{ + debug_printf("\t\t.suffixSize = %u\n", (*cmd).suffixSize); + debug_printf("\t\t.maximumOffset = %u\n", (*cmd).maximumOffset); + debug_printf("\t\t.flags.discard = %u\n", (*cmd).flags.discard); + debug_printf("\t\t.flags.unsynchronized = %u\n", (*cmd).flags.unsynchronized); +} + +static void +dump_SVGA3dCmdSetTransform(const SVGA3dCmdSetTransform *cmd) +{ + debug_printf("\t\t.cid = %u\n", (*cmd).cid); + switch((*cmd).type) { + case SVGA3D_TRANSFORM_INVALID: + debug_printf("\t\t.type = SVGA3D_TRANSFORM_INVALID\n"); + break; + case SVGA3D_TRANSFORM_WORLD: + debug_printf("\t\t.type = SVGA3D_TRANSFORM_WORLD\n"); + break; + case SVGA3D_TRANSFORM_VIEW: + debug_printf("\t\t.type = SVGA3D_TRANSFORM_VIEW\n"); + break; + case SVGA3D_TRANSFORM_PROJECTION: + debug_printf("\t\t.type = SVGA3D_TRANSFORM_PROJECTION\n"); + break; + case SVGA3D_TRANSFORM_TEXTURE0: + debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE0\n"); + break; + case SVGA3D_TRANSFORM_TEXTURE1: + debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE1\n"); + break; + case SVGA3D_TRANSFORM_TEXTURE2: + debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE2\n"); + break; + case SVGA3D_TRANSFORM_TEXTURE3: + debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE3\n"); + break; + case SVGA3D_TRANSFORM_TEXTURE4: + debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE4\n"); + break; + case SVGA3D_TRANSFORM_TEXTURE5: + debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE5\n"); + break; + case SVGA3D_TRANSFORM_TEXTURE6: + debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE6\n"); + break; + case SVGA3D_TRANSFORM_TEXTURE7: + debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE7\n"); + break; + case SVGA3D_TRANSFORM_WORLD1: + debug_printf("\t\t.type = SVGA3D_TRANSFORM_WORLD1\n"); + break; + case SVGA3D_TRANSFORM_WORLD2: + debug_printf("\t\t.type = SVGA3D_TRANSFORM_WORLD2\n"); + break; + case SVGA3D_TRANSFORM_WORLD3: + debug_printf("\t\t.type = SVGA3D_TRANSFORM_WORLD3\n"); + break; + case SVGA3D_TRANSFORM_MAX: + debug_printf("\t\t.type = SVGA3D_TRANSFORM_MAX\n"); + break; + default: + debug_printf("\t\t.type = %i\n", (*cmd).type); + break; + } + debug_printf("\t\t.matrix[0] = %f\n", (*cmd).matrix[0]); + debug_printf("\t\t.matrix[1] = %f\n", (*cmd).matrix[1]); + debug_printf("\t\t.matrix[2] = %f\n", (*cmd).matrix[2]); + debug_printf("\t\t.matrix[3] = %f\n", (*cmd).matrix[3]); + debug_printf("\t\t.matrix[4] = %f\n", (*cmd).matrix[4]); + debug_printf("\t\t.matrix[5] = %f\n", (*cmd).matrix[5]); + debug_printf("\t\t.matrix[6] = %f\n", (*cmd).matrix[6]); + debug_printf("\t\t.matrix[7] = %f\n", (*cmd).matrix[7]); + debug_printf("\t\t.matrix[8] = %f\n", (*cmd).matrix[8]); + debug_printf("\t\t.matrix[9] = %f\n", (*cmd).matrix[9]); + debug_printf("\t\t.matrix[10] = %f\n", (*cmd).matrix[10]); + debug_printf("\t\t.matrix[11] = %f\n", (*cmd).matrix[11]); + debug_printf("\t\t.matrix[12] = %f\n", (*cmd).matrix[12]); + debug_printf("\t\t.matrix[13] = %f\n", (*cmd).matrix[13]); + debug_printf("\t\t.matrix[14] = %f\n", (*cmd).matrix[14]); + debug_printf("\t\t.matrix[15] = %f\n", (*cmd).matrix[15]); +} + +static void +dump_SVGA3dCmdDestroyShader(const SVGA3dCmdDestroyShader *cmd) +{ + debug_printf("\t\t.cid = %u\n", (*cmd).cid); + debug_printf("\t\t.shid = %u\n", (*cmd).shid); + switch((*cmd).type) { + case SVGA3D_SHADERTYPE_COMPILED_DX8: + debug_printf("\t\t.type = SVGA3D_SHADERTYPE_COMPILED_DX8\n"); + break; + case SVGA3D_SHADERTYPE_VS: + debug_printf("\t\t.type = SVGA3D_SHADERTYPE_VS\n"); + break; + case SVGA3D_SHADERTYPE_PS: + debug_printf("\t\t.type = SVGA3D_SHADERTYPE_PS\n"); + break; + case SVGA3D_SHADERTYPE_MAX: + debug_printf("\t\t.type = SVGA3D_SHADERTYPE_MAX\n"); + break; + default: + debug_printf("\t\t.type = %i\n", (*cmd).type); + break; + } +} + +static void +dump_SVGA3dCmdDestroyContext(const SVGA3dCmdDestroyContext *cmd) +{ + debug_printf("\t\t.cid = %u\n", (*cmd).cid); +} + +static void +dump_SVGA3dCmdClear(const SVGA3dCmdClear *cmd) +{ + debug_printf("\t\t.cid = %u\n", (*cmd).cid); + switch((*cmd).clearFlag) { + case SVGA3D_CLEAR_COLOR: + debug_printf("\t\t.clearFlag = SVGA3D_CLEAR_COLOR\n"); + break; + case SVGA3D_CLEAR_DEPTH: + debug_printf("\t\t.clearFlag = SVGA3D_CLEAR_DEPTH\n"); + break; + case SVGA3D_CLEAR_STENCIL: + debug_printf("\t\t.clearFlag = SVGA3D_CLEAR_STENCIL\n"); + break; + default: + debug_printf("\t\t.clearFlag = %i\n", (*cmd).clearFlag); + break; + } + debug_printf("\t\t.color = %u\n", (*cmd).color); + debug_printf("\t\t.depth = %f\n", (*cmd).depth); + debug_printf("\t\t.stencil = %u\n", (*cmd).stencil); +} + +static void +dump_SVGA3dCmdDefineSurface(const SVGA3dCmdDefineSurface *cmd) +{ + debug_printf("\t\t.sid = %u\n", (*cmd).sid); + switch((*cmd).surfaceFlags) { + case SVGA3D_SURFACE_CUBEMAP: + debug_printf("\t\t.surfaceFlags = SVGA3D_SURFACE_CUBEMAP\n"); + break; + case SVGA3D_SURFACE_HINT_STATIC: + debug_printf("\t\t.surfaceFlags = SVGA3D_SURFACE_HINT_STATIC\n"); + break; + case SVGA3D_SURFACE_HINT_DYNAMIC: + debug_printf("\t\t.surfaceFlags = SVGA3D_SURFACE_HINT_DYNAMIC\n"); + break; + case SVGA3D_SURFACE_HINT_INDEXBUFFER: + debug_printf("\t\t.surfaceFlags = SVGA3D_SURFACE_HINT_INDEXBUFFER\n"); + break; + case SVGA3D_SURFACE_HINT_VERTEXBUFFER: + debug_printf("\t\t.surfaceFlags = SVGA3D_SURFACE_HINT_VERTEXBUFFER\n"); + break; + default: + debug_printf("\t\t.surfaceFlags = %i\n", (*cmd).surfaceFlags); + break; + } + switch((*cmd).format) { + case SVGA3D_FORMAT_INVALID: + debug_printf("\t\t.format = SVGA3D_FORMAT_INVALID\n"); + break; + case SVGA3D_X8R8G8B8: + debug_printf("\t\t.format = SVGA3D_X8R8G8B8\n"); + break; + case SVGA3D_A8R8G8B8: + debug_printf("\t\t.format = SVGA3D_A8R8G8B8\n"); + break; + case SVGA3D_R5G6B5: + debug_printf("\t\t.format = SVGA3D_R5G6B5\n"); + break; + case SVGA3D_X1R5G5B5: + debug_printf("\t\t.format = SVGA3D_X1R5G5B5\n"); + break; + case SVGA3D_A1R5G5B5: + debug_printf("\t\t.format = SVGA3D_A1R5G5B5\n"); + break; + case SVGA3D_A4R4G4B4: + debug_printf("\t\t.format = SVGA3D_A4R4G4B4\n"); + break; + case SVGA3D_Z_D32: + debug_printf("\t\t.format = SVGA3D_Z_D32\n"); + break; + case SVGA3D_Z_D16: + debug_printf("\t\t.format = SVGA3D_Z_D16\n"); + break; + case SVGA3D_Z_D24S8: + debug_printf("\t\t.format = SVGA3D_Z_D24S8\n"); + break; + case SVGA3D_Z_D15S1: + debug_printf("\t\t.format = SVGA3D_Z_D15S1\n"); + break; + case SVGA3D_LUMINANCE8: + debug_printf("\t\t.format = SVGA3D_LUMINANCE8\n"); + break; + case SVGA3D_LUMINANCE4_ALPHA4: + debug_printf("\t\t.format = SVGA3D_LUMINANCE4_ALPHA4\n"); + break; + case SVGA3D_LUMINANCE16: + debug_printf("\t\t.format = SVGA3D_LUMINANCE16\n"); + break; + case SVGA3D_LUMINANCE8_ALPHA8: + debug_printf("\t\t.format = SVGA3D_LUMINANCE8_ALPHA8\n"); + break; + case SVGA3D_DXT1: + debug_printf("\t\t.format = SVGA3D_DXT1\n"); + break; + case SVGA3D_DXT2: + debug_printf("\t\t.format = SVGA3D_DXT2\n"); + break; + case SVGA3D_DXT3: + debug_printf("\t\t.format = SVGA3D_DXT3\n"); + break; + case SVGA3D_DXT4: + debug_printf("\t\t.format = SVGA3D_DXT4\n"); + break; + case SVGA3D_DXT5: + debug_printf("\t\t.format = SVGA3D_DXT5\n"); + break; + case SVGA3D_BUMPU8V8: + debug_printf("\t\t.format = SVGA3D_BUMPU8V8\n"); + break; + case SVGA3D_BUMPL6V5U5: + debug_printf("\t\t.format = SVGA3D_BUMPL6V5U5\n"); + break; + case SVGA3D_BUMPX8L8V8U8: + debug_printf("\t\t.format = SVGA3D_BUMPX8L8V8U8\n"); + break; + case SVGA3D_BUMPL8V8U8: + debug_printf("\t\t.format = SVGA3D_BUMPL8V8U8\n"); + break; + case SVGA3D_ARGB_S10E5: + debug_printf("\t\t.format = SVGA3D_ARGB_S10E5\n"); + break; + case SVGA3D_ARGB_S23E8: + debug_printf("\t\t.format = SVGA3D_ARGB_S23E8\n"); + break; + case SVGA3D_A2R10G10B10: + debug_printf("\t\t.format = SVGA3D_A2R10G10B10\n"); + break; + case SVGA3D_V8U8: + debug_printf("\t\t.format = SVGA3D_V8U8\n"); + break; + case SVGA3D_Q8W8V8U8: + debug_printf("\t\t.format = SVGA3D_Q8W8V8U8\n"); + break; + case SVGA3D_CxV8U8: + debug_printf("\t\t.format = SVGA3D_CxV8U8\n"); + break; + case SVGA3D_X8L8V8U8: + debug_printf("\t\t.format = SVGA3D_X8L8V8U8\n"); + break; + case SVGA3D_A2W10V10U10: + debug_printf("\t\t.format = SVGA3D_A2W10V10U10\n"); + break; + case SVGA3D_ALPHA8: + debug_printf("\t\t.format = SVGA3D_ALPHA8\n"); + break; + case SVGA3D_R_S10E5: + debug_printf("\t\t.format = SVGA3D_R_S10E5\n"); + break; + case SVGA3D_R_S23E8: + debug_printf("\t\t.format = SVGA3D_R_S23E8\n"); + break; + case SVGA3D_RG_S10E5: + debug_printf("\t\t.format = SVGA3D_RG_S10E5\n"); + break; + case SVGA3D_RG_S23E8: + debug_printf("\t\t.format = SVGA3D_RG_S23E8\n"); + break; + case SVGA3D_BUFFER: + debug_printf("\t\t.format = SVGA3D_BUFFER\n"); + break; + case SVGA3D_Z_D24X8: + debug_printf("\t\t.format = SVGA3D_Z_D24X8\n"); + break; + case SVGA3D_FORMAT_MAX: + debug_printf("\t\t.format = SVGA3D_FORMAT_MAX\n"); + break; + default: + debug_printf("\t\t.format = %i\n", (*cmd).format); + break; + } + debug_printf("\t\t.face[0].numMipLevels = %u\n", (*cmd).face[0].numMipLevels); + debug_printf("\t\t.face[1].numMipLevels = %u\n", (*cmd).face[1].numMipLevels); + debug_printf("\t\t.face[2].numMipLevels = %u\n", (*cmd).face[2].numMipLevels); + debug_printf("\t\t.face[3].numMipLevels = %u\n", (*cmd).face[3].numMipLevels); + debug_printf("\t\t.face[4].numMipLevels = %u\n", (*cmd).face[4].numMipLevels); + debug_printf("\t\t.face[5].numMipLevels = %u\n", (*cmd).face[5].numMipLevels); +} + + +void +svga_dump_commands(const void *commands, uint32_t size) +{ + const uint8_t *next = commands; + const uint8_t *last = next + size; + + assert(size % sizeof(uint32_t) == 0); + + while(next < last) { + const uint32_t cmd_id = *(const uint32_t *)next; + + if(SVGA_3D_CMD_BASE <= cmd_id && cmd_id < SVGA_3D_CMD_MAX) { + const SVGA3dCmdHeader *header = (const SVGA3dCmdHeader *)next; + const uint8_t *body = (const uint8_t *)&header[1]; + + next = (const uint8_t *)body + header->size; + if(next > last) + break; + + switch(cmd_id) { + case SVGA_3D_CMD_SURFACE_DEFINE: + debug_printf("\tSVGA_3D_CMD_SURFACE_DEFINE\n"); + { + const SVGA3dCmdDefineSurface *cmd = (const SVGA3dCmdDefineSurface *)body; + dump_SVGA3dCmdDefineSurface(cmd); + body = (const uint8_t *)&cmd[1]; + while(body + sizeof(SVGA3dSize) <= next) { + dump_SVGA3dSize((const SVGA3dSize *)body); + body += sizeof(SVGA3dSize); + } + } + break; + case SVGA_3D_CMD_SURFACE_DESTROY: + debug_printf("\tSVGA_3D_CMD_SURFACE_DESTROY\n"); + { + const SVGA3dCmdDestroySurface *cmd = (const SVGA3dCmdDestroySurface *)body; + dump_SVGA3dCmdDestroySurface(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SURFACE_COPY: + debug_printf("\tSVGA_3D_CMD_SURFACE_COPY\n"); + { + const SVGA3dCmdSurfaceCopy *cmd = (const SVGA3dCmdSurfaceCopy *)body; + dump_SVGA3dCmdSurfaceCopy(cmd); + body = (const uint8_t *)&cmd[1]; + while(body + sizeof(SVGA3dCopyBox) <= next) { + dump_SVGA3dCopyBox((const SVGA3dCopyBox *)body); + body += sizeof(SVGA3dCopyBox); + } + } + break; + case SVGA_3D_CMD_SURFACE_STRETCHBLT: + debug_printf("\tSVGA_3D_CMD_SURFACE_STRETCHBLT\n"); + { + const SVGA3dCmdSurfaceStretchBlt *cmd = (const SVGA3dCmdSurfaceStretchBlt *)body; + dump_SVGA3dCmdSurfaceStretchBlt(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SURFACE_DMA: + debug_printf("\tSVGA_3D_CMD_SURFACE_DMA\n"); + { + const SVGA3dCmdSurfaceDMA *cmd = (const SVGA3dCmdSurfaceDMA *)body; + dump_SVGA3dCmdSurfaceDMA(cmd); + body = (const uint8_t *)&cmd[1]; + while(body + sizeof(SVGA3dCopyBox) <= next) { + dump_SVGA3dCopyBox((const SVGA3dCopyBox *)body); + body += sizeof(SVGA3dCopyBox); + } + while(body + sizeof(SVGA3dCmdSurfaceDMASuffix) <= next) { + dump_SVGA3dCmdSurfaceDMASuffix((const SVGA3dCmdSurfaceDMASuffix *)body); + body += sizeof(SVGA3dCmdSurfaceDMASuffix); + } + } + break; + case SVGA_3D_CMD_CONTEXT_DEFINE: + debug_printf("\tSVGA_3D_CMD_CONTEXT_DEFINE\n"); + { + const SVGA3dCmdDefineContext *cmd = (const SVGA3dCmdDefineContext *)body; + dump_SVGA3dCmdDefineContext(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_CONTEXT_DESTROY: + debug_printf("\tSVGA_3D_CMD_CONTEXT_DESTROY\n"); + { + const SVGA3dCmdDestroyContext *cmd = (const SVGA3dCmdDestroyContext *)body; + dump_SVGA3dCmdDestroyContext(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETTRANSFORM: + debug_printf("\tSVGA_3D_CMD_SETTRANSFORM\n"); + { + const SVGA3dCmdSetTransform *cmd = (const SVGA3dCmdSetTransform *)body; + dump_SVGA3dCmdSetTransform(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETZRANGE: + debug_printf("\tSVGA_3D_CMD_SETZRANGE\n"); + { + const SVGA3dCmdSetZRange *cmd = (const SVGA3dCmdSetZRange *)body; + dump_SVGA3dCmdSetZRange(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETRENDERSTATE: + debug_printf("\tSVGA_3D_CMD_SETRENDERSTATE\n"); + { + const SVGA3dCmdSetRenderState *cmd = (const SVGA3dCmdSetRenderState *)body; + dump_SVGA3dCmdSetRenderState(cmd); + body = (const uint8_t *)&cmd[1]; + while(body + sizeof(SVGA3dRenderState) <= next) { + dump_SVGA3dRenderState((const SVGA3dRenderState *)body); + body += sizeof(SVGA3dRenderState); + } + } + break; + case SVGA_3D_CMD_SETRENDERTARGET: + debug_printf("\tSVGA_3D_CMD_SETRENDERTARGET\n"); + { + const SVGA3dCmdSetRenderTarget *cmd = (const SVGA3dCmdSetRenderTarget *)body; + dump_SVGA3dCmdSetRenderTarget(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETTEXTURESTATE: + debug_printf("\tSVGA_3D_CMD_SETTEXTURESTATE\n"); + { + const SVGA3dCmdSetTextureState *cmd = (const SVGA3dCmdSetTextureState *)body; + dump_SVGA3dCmdSetTextureState(cmd); + body = (const uint8_t *)&cmd[1]; + while(body + sizeof(SVGA3dTextureState) <= next) { + dump_SVGA3dTextureState((const SVGA3dTextureState *)body); + body += sizeof(SVGA3dTextureState); + } + } + break; + case SVGA_3D_CMD_SETMATERIAL: + debug_printf("\tSVGA_3D_CMD_SETMATERIAL\n"); + { + const SVGA3dCmdSetMaterial *cmd = (const SVGA3dCmdSetMaterial *)body; + dump_SVGA3dCmdSetMaterial(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETLIGHTDATA: + debug_printf("\tSVGA_3D_CMD_SETLIGHTDATA\n"); + { + const SVGA3dCmdSetLightData *cmd = (const SVGA3dCmdSetLightData *)body; + dump_SVGA3dCmdSetLightData(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETLIGHTENABLED: + debug_printf("\tSVGA_3D_CMD_SETLIGHTENABLED\n"); + { + const SVGA3dCmdSetLightEnabled *cmd = (const SVGA3dCmdSetLightEnabled *)body; + dump_SVGA3dCmdSetLightEnabled(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETVIEWPORT: + debug_printf("\tSVGA_3D_CMD_SETVIEWPORT\n"); + { + const SVGA3dCmdSetViewport *cmd = (const SVGA3dCmdSetViewport *)body; + dump_SVGA3dCmdSetViewport(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETCLIPPLANE: + debug_printf("\tSVGA_3D_CMD_SETCLIPPLANE\n"); + { + const SVGA3dCmdSetClipPlane *cmd = (const SVGA3dCmdSetClipPlane *)body; + dump_SVGA3dCmdSetClipPlane(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_CLEAR: + debug_printf("\tSVGA_3D_CMD_CLEAR\n"); + { + const SVGA3dCmdClear *cmd = (const SVGA3dCmdClear *)body; + dump_SVGA3dCmdClear(cmd); + body = (const uint8_t *)&cmd[1]; + while(body + sizeof(SVGA3dRect) <= next) { + dump_SVGA3dRect((const SVGA3dRect *)body); + body += sizeof(SVGA3dRect); + } + } + break; + case SVGA_3D_CMD_PRESENT: + debug_printf("\tSVGA_3D_CMD_PRESENT\n"); + { + const SVGA3dCmdPresent *cmd = (const SVGA3dCmdPresent *)body; + dump_SVGA3dCmdPresent(cmd); + body = (const uint8_t *)&cmd[1]; + while(body + sizeof(SVGA3dCopyRect) <= next) { + dump_SVGA3dCopyRect((const SVGA3dCopyRect *)body); + body += sizeof(SVGA3dCopyRect); + } + } + break; + case SVGA_3D_CMD_SHADER_DEFINE: + debug_printf("\tSVGA_3D_CMD_SHADER_DEFINE\n"); + { + const SVGA3dCmdDefineShader *cmd = (const SVGA3dCmdDefineShader *)body; + dump_SVGA3dCmdDefineShader(cmd); + body = (const uint8_t *)&cmd[1]; + svga_shader_dump((const uint32_t *)body, + (unsigned)(next - body)/sizeof(uint32_t), + FALSE ); + body = next; + } + break; + case SVGA_3D_CMD_SHADER_DESTROY: + debug_printf("\tSVGA_3D_CMD_SHADER_DESTROY\n"); + { + const SVGA3dCmdDestroyShader *cmd = (const SVGA3dCmdDestroyShader *)body; + dump_SVGA3dCmdDestroyShader(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SET_SHADER: + debug_printf("\tSVGA_3D_CMD_SET_SHADER\n"); + { + const SVGA3dCmdSetShader *cmd = (const SVGA3dCmdSetShader *)body; + dump_SVGA3dCmdSetShader(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SET_SHADER_CONST: + debug_printf("\tSVGA_3D_CMD_SET_SHADER_CONST\n"); + { + const SVGA3dCmdSetShaderConst *cmd = (const SVGA3dCmdSetShaderConst *)body; + dump_SVGA3dCmdSetShaderConst(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_DRAW_PRIMITIVES: + debug_printf("\tSVGA_3D_CMD_DRAW_PRIMITIVES\n"); + { + const SVGA3dCmdDrawPrimitives *cmd = (const SVGA3dCmdDrawPrimitives *)body; + unsigned i, j; + dump_SVGA3dCmdDrawPrimitives(cmd); + body = (const uint8_t *)&cmd[1]; + for(i = 0; i < cmd->numVertexDecls; ++i) { + dump_SVGA3dVertexDecl((const SVGA3dVertexDecl *)body); + body += sizeof(SVGA3dVertexDecl); + } + for(j = 0; j < cmd->numRanges; ++j) { + dump_SVGA3dPrimitiveRange((const SVGA3dPrimitiveRange *)body); + body += sizeof(SVGA3dPrimitiveRange); + } + while(body + sizeof(SVGA3dVertexDivisor) <= next) { + dump_SVGA3dVertexDivisor((const SVGA3dVertexDivisor *)body); + body += sizeof(SVGA3dVertexDivisor); + } + } + break; + case SVGA_3D_CMD_SETSCISSORRECT: + debug_printf("\tSVGA_3D_CMD_SETSCISSORRECT\n"); + { + const SVGA3dCmdSetScissorRect *cmd = (const SVGA3dCmdSetScissorRect *)body; + dump_SVGA3dCmdSetScissorRect(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_BEGIN_QUERY: + debug_printf("\tSVGA_3D_CMD_BEGIN_QUERY\n"); + { + const SVGA3dCmdBeginQuery *cmd = (const SVGA3dCmdBeginQuery *)body; + dump_SVGA3dCmdBeginQuery(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_END_QUERY: + debug_printf("\tSVGA_3D_CMD_END_QUERY\n"); + { + const SVGA3dCmdEndQuery *cmd = (const SVGA3dCmdEndQuery *)body; + dump_SVGA3dCmdEndQuery(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_WAIT_FOR_QUERY: + debug_printf("\tSVGA_3D_CMD_WAIT_FOR_QUERY\n"); + { + const SVGA3dCmdWaitForQuery *cmd = (const SVGA3dCmdWaitForQuery *)body; + dump_SVGA3dCmdWaitForQuery(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + default: + debug_printf("\t0x%08x\n", cmd_id); + break; + } + + while(body + sizeof(uint32_t) <= next) { + debug_printf("\t\t0x%08x\n", *(const uint32_t *)body); + body += sizeof(uint32_t); + } + while(body + sizeof(uint32_t) <= next) + debug_printf("\t\t0x%02x\n", *body++); + } + else if(cmd_id == SVGA_CMD_FENCE) { + debug_printf("\tSVGA_CMD_FENCE\n"); + debug_printf("\t\t0x%08x\n", ((const uint32_t *)next)[1]); + next += 2*sizeof(uint32_t); + } + else { + debug_printf("\t0x%08x\n", cmd_id); + next += sizeof(uint32_t); + } + } +} + diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.h b/src/gallium/drivers/svga/svgadump/svga_dump.h new file mode 100644 index 0000000000..69a8702087 --- /dev/null +++ b/src/gallium/drivers/svga/svgadump/svga_dump.h @@ -0,0 +1,34 @@ +/********************************************************** + * Copyright 2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#ifndef SVGA_DUMP_H_ +#define SVGA_DUMP_H_ + +#include "pipe/p_compiler.h" + +void +svga_dump_commands(const void *commands, uint32_t size); + +#endif /* SVGA_DUMP_H_ */ diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.py b/src/gallium/drivers/svga/svgadump/svga_dump.py new file mode 100755 index 0000000000..288e753296 --- /dev/null +++ b/src/gallium/drivers/svga/svgadump/svga_dump.py @@ -0,0 +1,329 @@ +#!/usr/bin/env python +''' +Generates dumper for the SVGA 3D command stream using pygccxml. + +Jose Fonseca <jfonseca@vmware.com> +''' + +copyright = ''' +/********************************************************** + * Copyright 2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + ''' + +import os +import sys + +from pygccxml import parser +from pygccxml import declarations + +from pygccxml.declarations import algorithm +from pygccxml.declarations import decl_visitor +from pygccxml.declarations import type_traits +from pygccxml.declarations import type_visitor + + +enums = True + + +class decl_dumper_t(decl_visitor.decl_visitor_t): + + def __init__(self, instance = '', decl = None): + decl_visitor.decl_visitor_t.__init__(self) + self._instance = instance + self.decl = decl + + def clone(self): + return decl_dumper_t(self._instance, self.decl) + + def visit_class(self): + class_ = self.decl + assert self.decl.class_type in ('struct', 'union') + + for variable in class_.variables(): + if variable.name != '': + #print 'variable = %r' % variable.name + dump_type(self._instance + '.' + variable.name, variable.type) + + def visit_enumeration(self): + if enums: + print ' switch(%s) {' % ("(*cmd)" + self._instance,) + for name, value in self.decl.values: + print ' case %s:' % (name,) + print ' debug_printf("\\t\\t%s = %s\\n");' % (self._instance, name) + print ' break;' + print ' default:' + print ' debug_printf("\\t\\t%s = %%i\\n", %s);' % (self._instance, "(*cmd)" + self._instance) + print ' break;' + print ' }' + else: + print ' debug_printf("\\t\\t%s = %%i\\n", %s);' % (self._instance, "(*cmd)" + self._instance) + + +def dump_decl(instance, decl): + dumper = decl_dumper_t(instance, decl) + algorithm.apply_visitor(dumper, decl) + + +class type_dumper_t(type_visitor.type_visitor_t): + + def __init__(self, instance, type_): + type_visitor.type_visitor_t.__init__(self) + self.instance = instance + self.type = type_ + + def clone(self): + return type_dumper_t(self.instance, self.type) + + def visit_char(self): + self.print_instance('%i') + + def visit_unsigned_char(self): + self.print_instance('%u') + + def visit_signed_char(self): + self.print_instance('%i') + + def visit_wchar(self): + self.print_instance('%i') + + def visit_short_int(self): + self.print_instance('%i') + + def visit_short_unsigned_int(self): + self.print_instance('%u') + + def visit_bool(self): + self.print_instance('%i') + + def visit_int(self): + self.print_instance('%i') + + def visit_unsigned_int(self): + self.print_instance('%u') + + def visit_long_int(self): + self.print_instance('%li') + + def visit_long_unsigned_int(self): + self.print_instance('%lu') + + def visit_long_long_int(self): + self.print_instance('%lli') + + def visit_long_long_unsigned_int(self): + self.print_instance('%llu') + + def visit_float(self): + self.print_instance('%f') + + def visit_double(self): + self.print_instance('%f') + + def visit_array(self): + for i in range(type_traits.array_size(self.type)): + dump_type(self.instance + '[%i]' % i, type_traits.base_type(self.type)) + + def visit_pointer(self): + self.print_instance('%p') + + def visit_declarated(self): + #print 'decl = %r' % self.type.decl_string + decl = type_traits.remove_declarated(self.type) + dump_decl(self.instance, decl) + + def print_instance(self, format): + print ' debug_printf("\\t\\t%s = %s\\n", %s);' % (self.instance, format, "(*cmd)" + self.instance) + + +def dump_type(instance, type_): + type_ = type_traits.remove_alias(type_) + visitor = type_dumper_t(instance, type_) + algorithm.apply_visitor(visitor, type_) + + +def dump_struct(decls, class_): + print 'static void' + print 'dump_%s(const %s *cmd)' % (class_.name, class_.name) + print '{' + dump_decl('', class_) + print '}' + print '' + + +cmds = [ + ('SVGA_3D_CMD_SURFACE_DEFINE', 'SVGA3dCmdDefineSurface', (), 'SVGA3dSize'), + ('SVGA_3D_CMD_SURFACE_DESTROY', 'SVGA3dCmdDestroySurface', (), None), + ('SVGA_3D_CMD_SURFACE_COPY', 'SVGA3dCmdSurfaceCopy', (), 'SVGA3dCopyBox'), + ('SVGA_3D_CMD_SURFACE_STRETCHBLT', 'SVGA3dCmdSurfaceStretchBlt', (), None), + ('SVGA_3D_CMD_SURFACE_DMA', 'SVGA3dCmdSurfaceDMA', (), 'SVGA3dCopyBox'), + ('SVGA_3D_CMD_CONTEXT_DEFINE', 'SVGA3dCmdDefineContext', (), None), + ('SVGA_3D_CMD_CONTEXT_DESTROY', 'SVGA3dCmdDestroyContext', (), None), + ('SVGA_3D_CMD_SETTRANSFORM', 'SVGA3dCmdSetTransform', (), None), + ('SVGA_3D_CMD_SETZRANGE', 'SVGA3dCmdSetZRange', (), None), + ('SVGA_3D_CMD_SETRENDERSTATE', 'SVGA3dCmdSetRenderState', (), 'SVGA3dRenderState'), + ('SVGA_3D_CMD_SETRENDERTARGET', 'SVGA3dCmdSetRenderTarget', (), None), + ('SVGA_3D_CMD_SETTEXTURESTATE', 'SVGA3dCmdSetTextureState', (), 'SVGA3dTextureState'), + ('SVGA_3D_CMD_SETMATERIAL', 'SVGA3dCmdSetMaterial', (), None), + ('SVGA_3D_CMD_SETLIGHTDATA', 'SVGA3dCmdSetLightData', (), None), + ('SVGA_3D_CMD_SETLIGHTENABLED', 'SVGA3dCmdSetLightEnabled', (), None), + ('SVGA_3D_CMD_SETVIEWPORT', 'SVGA3dCmdSetViewport', (), None), + ('SVGA_3D_CMD_SETCLIPPLANE', 'SVGA3dCmdSetClipPlane', (), None), + ('SVGA_3D_CMD_CLEAR', 'SVGA3dCmdClear', (), 'SVGA3dRect'), + ('SVGA_3D_CMD_PRESENT', 'SVGA3dCmdPresent', (), 'SVGA3dCopyRect'), + ('SVGA_3D_CMD_SHADER_DEFINE', 'SVGA3dCmdDefineShader', (), None), + ('SVGA_3D_CMD_SHADER_DESTROY', 'SVGA3dCmdDestroyShader', (), None), + ('SVGA_3D_CMD_SET_SHADER', 'SVGA3dCmdSetShader', (), None), + ('SVGA_3D_CMD_SET_SHADER_CONST', 'SVGA3dCmdSetShaderConst', (), None), + ('SVGA_3D_CMD_DRAW_PRIMITIVES', 'SVGA3dCmdDrawPrimitives', (('SVGA3dVertexDecl', 'numVertexDecls'), ('SVGA3dPrimitiveRange', 'numRanges')), 'SVGA3dVertexDivisor'), + ('SVGA_3D_CMD_SETSCISSORRECT', 'SVGA3dCmdSetScissorRect', (), None), + ('SVGA_3D_CMD_BEGIN_QUERY', 'SVGA3dCmdBeginQuery', (), None), + ('SVGA_3D_CMD_END_QUERY', 'SVGA3dCmdEndQuery', (), None), + ('SVGA_3D_CMD_WAIT_FOR_QUERY', 'SVGA3dCmdWaitForQuery', (), None), + #('SVGA_3D_CMD_PRESENT_READBACK', None, (), None), +] + +def dump_cmds(): + print r''' +void +svga_dump_commands(const void *commands, uint32_t size) +{ + const uint8_t *next = commands; + const uint8_t *last = next + size; + + assert(size % sizeof(uint32_t) == 0); + + while(next < last) { + const uint32_t cmd_id = *(const uint32_t *)next; + + if(SVGA_3D_CMD_BASE <= cmd_id && cmd_id < SVGA_3D_CMD_MAX) { + const SVGA3dCmdHeader *header = (const SVGA3dCmdHeader *)next; + const uint8_t *body = (const uint8_t *)&header[1]; + + next = (const uint8_t *)body + header->size; + if(next > last) + break; +''' + + print ' switch(cmd_id) {' + indexes = 'ijklmn' + for id, header, body, footer in cmds: + print ' case %s:' % id + print ' debug_printf("\\t%s\\n");' % id + print ' {' + print ' const %s *cmd = (const %s *)body;' % (header, header) + if len(body): + print ' unsigned ' + ', '.join(indexes[:len(body)]) + ';' + print ' dump_%s(cmd);' % header + print ' body = (const uint8_t *)&cmd[1];' + for i in range(len(body)): + struct, count = body[i] + idx = indexes[i] + print ' for(%s = 0; %s < cmd->%s; ++%s) {' % (idx, idx, count, idx) + print ' dump_%s((const %s *)body);' % (struct, struct) + print ' body += sizeof(%s);' % struct + print ' }' + if footer is not None: + print ' while(body + sizeof(%s) <= next) {' % footer + print ' dump_%s((const %s *)body);' % (footer, footer) + print ' body += sizeof(%s);' % footer + print ' }' + if id == 'SVGA_3D_CMD_SHADER_DEFINE': + print ' sh_svga_dump((const uint32_t *)body, (unsigned)(next - body)/sizeof(uint32_t));' + print ' body = next;' + print ' }' + print ' break;' + print ' default:' + print ' debug_printf("\\t0x%08x\\n", cmd_id);' + print ' break;' + print ' }' + + print r''' + while(body + sizeof(uint32_t) <= next) { + debug_printf("\t\t0x%08x\n", *(const uint32_t *)body); + body += sizeof(uint32_t); + } + while(body + sizeof(uint32_t) <= next) + debug_printf("\t\t0x%02x\n", *body++); + } + else if(cmd_id == SVGA_CMD_FENCE) { + debug_printf("\tSVGA_CMD_FENCE\n"); + debug_printf("\t\t0x%08x\n", ((const uint32_t *)next)[1]); + next += 2*sizeof(uint32_t); + } + else { + debug_printf("\t0x%08x\n", cmd_id); + next += sizeof(uint32_t); + } + } +} +''' + +def main(): + print copyright.strip() + print + print '/**' + print ' * @file' + print ' * Dump SVGA commands.' + print ' *' + print ' * Generated automatically from svga3d_reg.h by svga_dump.py.' + print ' */' + print + print '#include "svga_types.h"' + print '#include "svga_shader_dump.h"' + print '#include "svga3d_reg.h"' + print + print '#include "pipe/p_debug.h"' + print '#include "svga_dump.h"' + print + + config = parser.config_t( + include_paths = ['include'], + compiler = 'gcc', + ) + + headers = [ + 'include/svga_types.h', + 'include/svga3d_reg.h', + ] + + decls = parser.parse(headers, config, parser.COMPILATION_MODE.ALL_AT_ONCE) + global_ns = declarations.get_global_namespace(decls) + + names = set() + for id, header, body, footer in cmds: + names.add(header) + for struct, count in body: + names.add(struct) + if footer is not None: + names.add(footer) + + for class_ in global_ns.classes(lambda decl: decl.name in names): + dump_struct(decls, class_) + + dump_cmds() + + +if __name__ == '__main__': + main() diff --git a/src/gallium/drivers/svga/svgadump/svga_shader.h b/src/gallium/drivers/svga/svgadump/svga_shader.h new file mode 100644 index 0000000000..9217af2dd9 --- /dev/null +++ b/src/gallium/drivers/svga/svgadump/svga_shader.h @@ -0,0 +1,220 @@ +/********************************************************** + * Copyright 2007-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +/** + * @file + * SVGA Shader Token Definitions + * + * @author Michal Krol <michal@vmware.com> + */ + +#ifndef ST_SHADER_SVGA_H +#define ST_SHADER_SVGA_H + +#include "pipe/p_compiler.h" + +struct sh_op +{ + unsigned opcode:16; + unsigned control:8; + unsigned length:4; + unsigned predicated:1; + unsigned unused:1; + unsigned coissue:1; + unsigned is_reg:1; +}; + +struct sh_reg +{ + unsigned number:11; + unsigned type_hi:2; + unsigned relative:1; + unsigned unused:14; + unsigned type_lo:3; + unsigned is_reg:1; +}; + +static INLINE unsigned +sh_reg_type( struct sh_reg reg ) +{ + return reg.type_lo | (reg.type_hi << 3); +} + +struct sh_cdata +{ + float xyzw[4]; +}; + +struct sh_def +{ + struct sh_op op; + struct sh_reg reg; + struct sh_cdata cdata; +}; + +struct sh_defb +{ + struct sh_op op; + struct sh_reg reg; + uint data; +}; + +struct sh_idata +{ + int xyzw[4]; +}; + +struct sh_defi +{ + struct sh_op op; + struct sh_reg reg; + struct sh_idata idata; +}; + +#define PS_TEXTURETYPE_UNKNOWN SVGA3DSAMP_UNKNOWN +#define PS_TEXTURETYPE_2D SVGA3DSAMP_2D +#define PS_TEXTURETYPE_CUBE SVGA3DSAMP_CUBE +#define PS_TEXTURETYPE_VOLUME SVGA3DSAMP_VOLUME + +struct ps_sampleinfo +{ + unsigned unused:27; + unsigned texture_type:4; + unsigned is_reg:1; +}; + +struct vs_semantic +{ + unsigned usage:5; + unsigned unused1:11; + unsigned usage_index:4; + unsigned unused2:12; +}; + +struct sh_dstreg +{ + unsigned number:11; + unsigned type_hi:2; + unsigned relative:1; + unsigned unused:2; + unsigned write_mask:4; + unsigned modifier:4; + unsigned shift_scale:4; + unsigned type_lo:3; + unsigned is_reg:1; +}; + +static INLINE unsigned +sh_dstreg_type( struct sh_dstreg reg ) +{ + return reg.type_lo | (reg.type_hi << 3); +} + +struct sh_dcl +{ + struct sh_op op; + union { + struct { + struct ps_sampleinfo sampleinfo; + } ps; + struct { + struct vs_semantic semantic; + } vs; + } u; + struct sh_dstreg reg; +}; + + +struct sh_srcreg +{ + unsigned number:11; + unsigned type_hi:2; + unsigned relative:1; + unsigned unused:2; + unsigned swizzle_x:2; + unsigned swizzle_y:2; + unsigned swizzle_z:2; + unsigned swizzle_w:2; + unsigned modifier:4; + unsigned type_lo:3; + unsigned is_reg:1; +}; + +static INLINE unsigned +sh_srcreg_type( struct sh_srcreg reg ) +{ + return reg.type_lo | (reg.type_hi << 3); +} + +struct sh_dstop +{ + struct sh_op op; + struct sh_dstreg dst; +}; + +struct sh_srcop +{ + struct sh_op op; + struct sh_srcreg src; +}; + +struct sh_src2op +{ + struct sh_op op; + struct sh_srcreg src0; + struct sh_srcreg src1; +}; + +struct sh_unaryop +{ + struct sh_op op; + struct sh_dstreg dst; + struct sh_srcreg src; +}; + +struct sh_binaryop +{ + struct sh_op op; + struct sh_dstreg dst; + struct sh_srcreg src0; + struct sh_srcreg src1; +}; + +struct sh_trinaryop +{ + struct sh_op op; + struct sh_dstreg dst; + struct sh_srcreg src0; + struct sh_srcreg src1; + struct sh_srcreg src2; +}; + +struct sh_comment +{ + unsigned opcode:16; + unsigned size:16; +}; + +#endif /* ST_SHADER_SVGA_H */ diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_dump.c b/src/gallium/drivers/svga/svgadump/svga_shader_dump.c new file mode 100644 index 0000000000..b0e7fdf378 --- /dev/null +++ b/src/gallium/drivers/svga/svgadump/svga_shader_dump.c @@ -0,0 +1,654 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +/** + * @file + * SVGA Shader Dump Facilities + * + * @author Michal Krol <michal@vmware.com> + */ + +#include "svga_shader.h" +#include "svga_shader_dump.h" +#include "svga_shader_op.h" +#include "util/u_debug.h" + +#include "../svga_hw_reg.h" +#include "svga3d_shaderdefs.h" + +struct dump_info +{ + SVGA3dShaderVersion version; + boolean is_ps; +}; + +static void dump_op( struct sh_op op, const char *mnemonic ) +{ + assert( op.predicated == 0 ); + assert( op.is_reg == 0 ); + + if (op.coissue) + debug_printf( "+" ); + debug_printf( "%s", mnemonic ); + switch (op.control) { + case 0: + break; + case SVGA3DOPCONT_PROJECT: + debug_printf( "p" ); + break; + case SVGA3DOPCONT_BIAS: + debug_printf( "b" ); + break; + default: + assert( 0 ); + } +} + + +static void dump_comp_op( struct sh_op op, const char *mnemonic ) +{ + assert( op.is_reg == 0 ); + + if (op.coissue) + debug_printf( "+" ); + debug_printf( "%s", mnemonic ); + switch (op.control) { + case SVGA3DOPCOMP_RESERVED0: + break; + case SVGA3DOPCOMP_GT: + debug_printf("_gt"); + break; + case SVGA3DOPCOMP_EQ: + debug_printf("_eq"); + break; + case SVGA3DOPCOMP_GE: + debug_printf("_ge"); + break; + case SVGA3DOPCOMP_LT: + debug_printf("_lt"); + break; + case SVGA3DOPCOMPC_NE: + debug_printf("_ne"); + break; + case SVGA3DOPCOMP_LE: + debug_printf("_le"); + break; + case SVGA3DOPCOMP_RESERVED1: + default: + assert( 0 ); + } +} + + +static void dump_reg( struct sh_reg reg, struct sh_srcreg *indreg, const struct dump_info *di ) +{ + assert( sh_reg_type( reg ) == SVGA3DREG_CONST || reg.relative == 0 ); + assert( reg.is_reg == 1 ); + + switch (sh_reg_type( reg )) { + case SVGA3DREG_TEMP: + debug_printf( "r%u", reg.number ); + break; + + case SVGA3DREG_INPUT: + debug_printf( "v%u", reg.number ); + break; + + case SVGA3DREG_CONST: + if (reg.relative) { + if (sh_srcreg_type( *indreg ) == SVGA3DREG_LOOP) + debug_printf( "c[aL+%u]", reg.number ); + else + debug_printf( "c[a%u.x+%u]", indreg->number, reg.number ); + } + else + debug_printf( "c%u", reg.number ); + break; + + case SVGA3DREG_ADDR: /* VS */ + /* SVGA3DREG_TEXTURE */ /* PS */ + if (di->is_ps) + debug_printf( "t%u", reg.number ); + else + debug_printf( "a%u", reg.number ); + break; + + case SVGA3DREG_RASTOUT: + switch (reg.number) { + case 0 /*POSITION*/: + debug_printf( "oPos" ); + break; + case 1 /*FOG*/: + debug_printf( "oFog" ); + break; + case 2 /*POINT_SIZE*/: + debug_printf( "oPts" ); + break; + default: + assert( 0 ); + debug_printf( "???" ); + } + break; + + case SVGA3DREG_ATTROUT: + assert( reg.number < 2 ); + debug_printf( "oD%u", reg.number ); + break; + + case SVGA3DREG_TEXCRDOUT: + /* SVGA3DREG_OUTPUT */ + debug_printf( "oT%u", reg.number ); + break; + + case SVGA3DREG_COLOROUT: + debug_printf( "oC%u", reg.number ); + break; + + case SVGA3DREG_DEPTHOUT: + debug_printf( "oD%u", reg.number ); + break; + + case SVGA3DREG_SAMPLER: + debug_printf( "s%u", reg.number ); + break; + + case SVGA3DREG_CONSTBOOL: + assert( !reg.relative ); + debug_printf( "b%u", reg.number ); + break; + + case SVGA3DREG_CONSTINT: + assert( !reg.relative ); + debug_printf( "i%u", reg.number ); + break; + + case SVGA3DREG_LOOP: + assert( reg.number == 0 ); + debug_printf( "aL" ); + break; + + case SVGA3DREG_MISCTYPE: + switch (reg.number) { + case SVGA3DMISCREG_POSITION: + debug_printf( "vPos" ); + break; + case SVGA3DMISCREG_FACE: + debug_printf( "vFace" ); + break; + default: + assert(0); + break; + } + break; + + case SVGA3DREG_LABEL: + debug_printf( "l%u", reg.number ); + break; + + case SVGA3DREG_PREDICATE: + debug_printf( "p%u", reg.number ); + break; + + + default: + assert( 0 ); + debug_printf( "???" ); + } +} + +static void dump_cdata( struct sh_cdata cdata ) +{ + debug_printf( "%f, %f, %f, %f", cdata.xyzw[0], cdata.xyzw[1], cdata.xyzw[2], cdata.xyzw[3] ); +} + +static void dump_idata( struct sh_idata idata ) +{ + debug_printf( "%d, %d, %d, %d", idata.xyzw[0], idata.xyzw[1], idata.xyzw[2], idata.xyzw[3] ); +} + +static void dump_bdata( boolean bdata ) +{ + debug_printf( bdata ? "TRUE" : "FALSE" ); +} + +static void dump_sampleinfo( struct ps_sampleinfo sampleinfo ) +{ + switch (sampleinfo.texture_type) { + case SVGA3DSAMP_2D: + debug_printf( "_2d" ); + break; + case SVGA3DSAMP_CUBE: + debug_printf( "_cube" ); + break; + case SVGA3DSAMP_VOLUME: + debug_printf( "_volume" ); + break; + default: + assert( 0 ); + } +} + + +static void dump_usageinfo( struct vs_semantic semantic ) +{ + switch (semantic.usage) { + case SVGA3D_DECLUSAGE_POSITION: + debug_printf("_position" ); + break; + case SVGA3D_DECLUSAGE_BLENDWEIGHT: + debug_printf("_blendweight" ); + break; + case SVGA3D_DECLUSAGE_BLENDINDICES: + debug_printf("_blendindices" ); + break; + case SVGA3D_DECLUSAGE_NORMAL: + debug_printf("_normal" ); + break; + case SVGA3D_DECLUSAGE_PSIZE: + debug_printf("_psize" ); + break; + case SVGA3D_DECLUSAGE_TEXCOORD: + debug_printf("_texcoord"); + break; + case SVGA3D_DECLUSAGE_TANGENT: + debug_printf("_tangent" ); + break; + case SVGA3D_DECLUSAGE_BINORMAL: + debug_printf("_binormal" ); + break; + case SVGA3D_DECLUSAGE_TESSFACTOR: + debug_printf("_tessfactor" ); + break; + case SVGA3D_DECLUSAGE_POSITIONT: + debug_printf("_positiont" ); + break; + case SVGA3D_DECLUSAGE_COLOR: + debug_printf("_color" ); + break; + case SVGA3D_DECLUSAGE_FOG: + debug_printf("_fog" ); + break; + case SVGA3D_DECLUSAGE_DEPTH: + debug_printf("_depth" ); + break; + case SVGA3D_DECLUSAGE_SAMPLE: + debug_printf("_sample"); + break; + default: + assert( 0 ); + return; + } + + if (semantic.usage_index != 0) { + debug_printf("%d", semantic.usage_index ); + } +} + +static void dump_dstreg( struct sh_dstreg dstreg, const struct dump_info *di ) +{ + union { + struct sh_reg reg; + struct sh_dstreg dstreg; + } u; + + assert( (dstreg.modifier & (SVGA3DDSTMOD_SATURATE | SVGA3DDSTMOD_PARTIALPRECISION)) == dstreg.modifier ); + + if (dstreg.modifier & SVGA3DDSTMOD_SATURATE) + debug_printf( "_sat" ); + if (dstreg.modifier & SVGA3DDSTMOD_PARTIALPRECISION) + debug_printf( "_pp" ); + switch (dstreg.shift_scale) { + case 0: + break; + case 1: + debug_printf( "_x2" ); + break; + case 2: + debug_printf( "_x4" ); + break; + case 3: + debug_printf( "_x8" ); + break; + case 13: + debug_printf( "_d8" ); + break; + case 14: + debug_printf( "_d4" ); + break; + case 15: + debug_printf( "_d2" ); + break; + default: + assert( 0 ); + } + debug_printf( " " ); + + u.dstreg = dstreg; + dump_reg( u.reg, NULL, di ); + if (dstreg.write_mask != SVGA3DWRITEMASK_ALL) { + debug_printf( "." ); + if (dstreg.write_mask & SVGA3DWRITEMASK_0) + debug_printf( "x" ); + if (dstreg.write_mask & SVGA3DWRITEMASK_1) + debug_printf( "y" ); + if (dstreg.write_mask & SVGA3DWRITEMASK_2) + debug_printf( "z" ); + if (dstreg.write_mask & SVGA3DWRITEMASK_3) + debug_printf( "w" ); + } +} + +static void dump_srcreg( struct sh_srcreg srcreg, struct sh_srcreg *indreg, const struct dump_info *di ) +{ + union { + struct sh_reg reg; + struct sh_srcreg srcreg; + } u; + + switch (srcreg.modifier) { + case SVGA3DSRCMOD_NEG: + case SVGA3DSRCMOD_BIASNEG: + case SVGA3DSRCMOD_SIGNNEG: + case SVGA3DSRCMOD_X2NEG: + debug_printf( "-" ); + break; + case SVGA3DSRCMOD_ABS: + debug_printf( "|" ); + break; + case SVGA3DSRCMOD_ABSNEG: + debug_printf( "-|" ); + break; + case SVGA3DSRCMOD_COMP: + debug_printf( "1-" ); + break; + case SVGA3DSRCMOD_NOT: + debug_printf( "!" ); + } + + u.srcreg = srcreg; + dump_reg( u.reg, indreg, di ); + switch (srcreg.modifier) { + case SVGA3DSRCMOD_NONE: + case SVGA3DSRCMOD_NEG: + case SVGA3DSRCMOD_COMP: + case SVGA3DSRCMOD_NOT: + break; + case SVGA3DSRCMOD_ABS: + case SVGA3DSRCMOD_ABSNEG: + debug_printf( "|" ); + break; + case SVGA3DSRCMOD_BIAS: + case SVGA3DSRCMOD_BIASNEG: + debug_printf( "_bias" ); + break; + case SVGA3DSRCMOD_SIGN: + case SVGA3DSRCMOD_SIGNNEG: + debug_printf( "_bx2" ); + break; + case SVGA3DSRCMOD_X2: + case SVGA3DSRCMOD_X2NEG: + debug_printf( "_x2" ); + break; + case SVGA3DSRCMOD_DZ: + debug_printf( "_dz" ); + break; + case SVGA3DSRCMOD_DW: + debug_printf( "_dw" ); + break; + default: + assert( 0 ); + } + if (srcreg.swizzle_x != 0 || srcreg.swizzle_y != 1 || srcreg.swizzle_z != 2 || srcreg.swizzle_w != 3) { + debug_printf( "." ); + if (srcreg.swizzle_x == srcreg.swizzle_y && srcreg.swizzle_y == srcreg.swizzle_z && srcreg.swizzle_z == srcreg.swizzle_w) { + debug_printf( "%c", "xyzw"[srcreg.swizzle_x] ); + } + else { + debug_printf( "%c", "xyzw"[srcreg.swizzle_x] ); + debug_printf( "%c", "xyzw"[srcreg.swizzle_y] ); + debug_printf( "%c", "xyzw"[srcreg.swizzle_z] ); + debug_printf( "%c", "xyzw"[srcreg.swizzle_w] ); + } + } +} + +void +svga_shader_dump( + const unsigned *assem, + unsigned dwords, + unsigned do_binary ) +{ + const unsigned *start = assem; + boolean finished = FALSE; + struct dump_info di; + unsigned i; + + if (do_binary) { + for (i = 0; i < dwords; i++) + debug_printf(" 0x%08x,\n", assem[i]); + + debug_printf("\n\n"); + } + + di.version.value = *assem++; + di.is_ps = (di.version.type == SVGA3D_PS_TYPE); + + debug_printf( + "%s_%u_%u\n", + di.is_ps ? "ps" : "vs", + di.version.major, + di.version.minor ); + + while (!finished) { + struct sh_op op = *(struct sh_op *) assem; + + if (assem - start >= dwords) { + debug_printf("... ran off end of buffer\n"); + assert(0); + return; + } + + switch (op.opcode) { + case SVGA3DOP_DCL: + { + struct sh_dcl dcl = *(struct sh_dcl *) assem; + + debug_printf( "dcl" ); + if (sh_dstreg_type( dcl.reg ) == SVGA3DREG_SAMPLER) + dump_sampleinfo( dcl.u.ps.sampleinfo ); + else if (di.is_ps) { + if (di.version.major == 3 && + sh_dstreg_type( dcl.reg ) != SVGA3DREG_MISCTYPE) + dump_usageinfo( dcl.u.vs.semantic ); + } + else + dump_usageinfo( dcl.u.vs.semantic ); + dump_dstreg( dcl.reg, &di ); + debug_printf( "\n" ); + assem += sizeof( struct sh_dcl ) / sizeof( unsigned ); + } + break; + + case SVGA3DOP_DEFB: + { + struct sh_defb defb = *(struct sh_defb *) assem; + + debug_printf( "defb " ); + dump_reg( defb.reg, NULL, &di ); + debug_printf( ", " ); + dump_bdata( defb.data ); + debug_printf( "\n" ); + assem += sizeof( struct sh_defb ) / sizeof( unsigned ); + } + break; + + case SVGA3DOP_DEFI: + { + struct sh_defi defi = *(struct sh_defi *) assem; + + debug_printf( "defi " ); + dump_reg( defi.reg, NULL, &di ); + debug_printf( ", " ); + dump_idata( defi.idata ); + debug_printf( "\n" ); + assem += sizeof( struct sh_defi ) / sizeof( unsigned ); + } + break; + + case SVGA3DOP_TEXCOORD: + assert( di.is_ps ); + dump_op( op, "texcoord" ); + if (0) { + struct sh_dstop dstop = *(struct sh_dstop *) assem; + dump_dstreg( dstop.dst, &di ); + assem += sizeof( struct sh_dstop ) / sizeof( unsigned ); + } + else { + struct sh_unaryop unaryop = *(struct sh_unaryop *) assem; + dump_dstreg( unaryop.dst, &di ); + debug_printf( ", " ); + dump_srcreg( unaryop.src, NULL, &di ); + assem += sizeof( struct sh_unaryop ) / sizeof( unsigned ); + } + debug_printf( "\n" ); + break; + + case SVGA3DOP_TEX: + assert( di.is_ps ); + if (0) { + dump_op( op, "tex" ); + if (0) { + struct sh_dstop dstop = *(struct sh_dstop *) assem; + + dump_dstreg( dstop.dst, &di ); + assem += sizeof( struct sh_dstop ) / sizeof( unsigned ); + } + else { + struct sh_unaryop unaryop = *(struct sh_unaryop *) assem; + + dump_dstreg( unaryop.dst, &di ); + debug_printf( ", " ); + dump_srcreg( unaryop.src, NULL, &di ); + assem += sizeof( struct sh_unaryop ) / sizeof( unsigned ); + } + } + else { + struct sh_binaryop binaryop = *(struct sh_binaryop *) assem; + + dump_op( op, "texld" ); + dump_dstreg( binaryop.dst, &di ); + debug_printf( ", " ); + dump_srcreg( binaryop.src0, NULL, &di ); + debug_printf( ", " ); + dump_srcreg( binaryop.src1, NULL, &di ); + assem += sizeof( struct sh_binaryop ) / sizeof( unsigned ); + } + debug_printf( "\n" ); + break; + + case SVGA3DOP_DEF: + { + struct sh_def def = *(struct sh_def *) assem; + + debug_printf( "def " ); + dump_reg( def.reg, NULL, &di ); + debug_printf( ", " ); + dump_cdata( def.cdata ); + debug_printf( "\n" ); + assem += sizeof( struct sh_def ) / sizeof( unsigned ); + } + break; + + case SVGA3DOP_PHASE: + debug_printf( "phase\n" ); + assem += sizeof( struct sh_op ) / sizeof( unsigned ); + break; + + case SVGA3DOP_COMMENT: + { + struct sh_comment comment = *(struct sh_comment *)assem; + + /* Ignore comment contents. */ + assem += sizeof(struct sh_comment) / sizeof(unsigned) + comment.size; + } + break; + + case SVGA3DOP_RET: + debug_printf( "ret\n" ); + assem += sizeof( struct sh_op ) / sizeof( unsigned ); + break; + + case SVGA3DOP_END: + debug_printf( "end\n" ); + finished = TRUE; + break; + + default: + { + const struct sh_opcode_info *info = svga_opcode_info( op.opcode ); + uint i; + uint num_src = info->num_src + op.predicated; + boolean not_first_arg = FALSE; + + assert( info->num_dst <= 1 ); + + if (op.opcode == SVGA3DOP_SINCOS && di.version.major < 3) + num_src += 2; + + dump_comp_op( op, info->mnemonic ); + assem += sizeof( struct sh_op ) / sizeof( unsigned ); + + if (info->num_dst > 0) { + struct sh_dstreg dstreg = *(struct sh_dstreg *) assem; + + dump_dstreg( dstreg, &di ); + assem += sizeof( struct sh_dstreg ) / sizeof( unsigned ); + not_first_arg = TRUE; + } + + for (i = 0; i < num_src; i++) { + struct sh_srcreg srcreg; + struct sh_srcreg indreg; + + srcreg = *(struct sh_srcreg *) assem; + assem += sizeof( struct sh_srcreg ) / sizeof( unsigned ); + if (srcreg.relative && !di.is_ps && di.version.major >= 2) { + indreg = *(struct sh_srcreg *) assem; + assem += sizeof( struct sh_srcreg ) / sizeof( unsigned ); + } + + if (not_first_arg) + debug_printf( ", " ); + else + debug_printf( " " ); + dump_srcreg( srcreg, &indreg, &di ); + not_first_arg = TRUE; + } + + debug_printf( "\n" ); + } + } + } +} diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_dump.h b/src/gallium/drivers/svga/svgadump/svga_shader_dump.h new file mode 100644 index 0000000000..a2657acb2f --- /dev/null +++ b/src/gallium/drivers/svga/svgadump/svga_shader_dump.h @@ -0,0 +1,42 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +/** + * @file + * SVGA Shader Dump Facilities + * + * @author Michal Krol <michal@vmware.com> + */ + +#ifndef SVGA_SHADER_DUMP_H +#define SVGA_SHADER_DUMP_H + +void +svga_shader_dump( + const unsigned *assem, + unsigned dwords, + unsigned do_binary ); + +#endif /* SVGA_SHADER_DUMP_H */ diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_op.c b/src/gallium/drivers/svga/svgadump/svga_shader_op.c new file mode 100644 index 0000000000..8343bfdaab --- /dev/null +++ b/src/gallium/drivers/svga/svgadump/svga_shader_op.c @@ -0,0 +1,168 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +/** + * @file + * SVGA Shader Token Opcode Info + * + * @author Michal Krol <michal@vmware.com> + */ + +#include "util/u_debug.h" +#include "svga_shader_op.h" + +#include "../svga_hw_reg.h" +#include "svga3d_shaderdefs.h" + +#define SVGA3DOP_INVALID SVGA3DOP_END +#define TGSI_OPCODE_INVALID TGSI_OPCODE_LAST + +static struct sh_opcode_info opcode_info[] = +{ + { "nop", 0, 0, SVGA3DOP_NOP }, + { "mov", 1, 1, SVGA3DOP_MOV, }, + { "add", 1, 2, SVGA3DOP_ADD, }, + { "sub", 1, 2, SVGA3DOP_SUB, }, + { "mad", 1, 3, SVGA3DOP_MAD, }, + { "mul", 1, 2, SVGA3DOP_MUL, }, + { "rcp", 1, 1, SVGA3DOP_RCP, }, + { "rsq", 1, 1, SVGA3DOP_RSQ, }, + { "dp3", 1, 2, SVGA3DOP_DP3, }, + { "dp4", 1, 2, SVGA3DOP_DP4, }, + { "min", 1, 2, SVGA3DOP_MIN, }, + { "max", 1, 2, SVGA3DOP_MAX, }, + { "slt", 1, 2, SVGA3DOP_SLT, }, + { "sge", 1, 2, SVGA3DOP_SGE, }, + { "exp", 1, 1, SVGA3DOP_EXP, }, + { "log", 1, 1, SVGA3DOP_LOG, }, + { "lit", 1, 1, SVGA3DOP_LIT, }, + { "dst", 1, 2, SVGA3DOP_DST, }, + { "lrp", 1, 3, SVGA3DOP_LRP, }, + { "frc", 1, 1, SVGA3DOP_FRC, }, + { "m4x4", 1, 2, SVGA3DOP_M4x4, }, + { "m4x3", 1, 2, SVGA3DOP_M4x3, }, + { "m3x4", 1, 2, SVGA3DOP_M3x4, }, + { "m3x3", 1, 2, SVGA3DOP_M3x3, }, + { "m3x2", 1, 2, SVGA3DOP_M3x2, }, + { "call", 0, 1, SVGA3DOP_CALL, }, + { "callnz", 0, 2, SVGA3DOP_CALLNZ, }, + { "loop", 0, 2, SVGA3DOP_LOOP, }, + { "ret", 0, 0, SVGA3DOP_RET, }, + { "endloop", 0, 0, SVGA3DOP_ENDLOOP, }, + { "label", 0, 1, SVGA3DOP_LABEL, }, + { "dcl", 0, 0, SVGA3DOP_DCL, }, + { "pow", 1, 2, SVGA3DOP_POW, }, + { "crs", 1, 2, SVGA3DOP_CRS, }, + { "sgn", 1, 3, SVGA3DOP_SGN, }, + { "abs", 1, 1, SVGA3DOP_ABS, }, + { "nrm", 1, 1, SVGA3DOP_NRM, }, /* 3-componenet normalization */ + { "sincos", 1, 1, SVGA3DOP_SINCOS, }, + { "rep", 0, 1, SVGA3DOP_REP, }, + { "endrep", 0, 0, SVGA3DOP_ENDREP, }, + { "if", 0, 1, SVGA3DOP_IF, }, + { "ifc", 0, 2, SVGA3DOP_IFC, }, + { "else", 0, 0, SVGA3DOP_ELSE, }, + { "endif", 0, 0, SVGA3DOP_ENDIF, }, + { "break", 0, 0, SVGA3DOP_BREAK, }, + { "breakc", 0, 0, SVGA3DOP_BREAKC, }, + { "mova", 1, 1, SVGA3DOP_MOVA, }, + { "defb", 0, 0, SVGA3DOP_DEFB, }, + { "defi", 0, 0, SVGA3DOP_DEFI, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "texcoord", 0, 0, SVGA3DOP_TEXCOORD, }, + { "texkill", 1, 0, SVGA3DOP_TEXKILL, }, + { "tex", 0, 0, SVGA3DOP_TEX, }, + { "texbem", 1, 1, SVGA3DOP_TEXBEM, }, + { "texbeml", 1, 1, SVGA3DOP_TEXBEML, }, + { "texreg2ar", 1, 1, SVGA3DOP_TEXREG2AR, }, + { "texreg2gb", 1, 1, SVGA3DOP_TEXREG2GB, }, + { "texm3x2pad", 1, 1, SVGA3DOP_TEXM3x2PAD, }, + { "texm3x2tex", 1, 1, SVGA3DOP_TEXM3x2TEX, }, + { "texm3x3pad", 1, 1, SVGA3DOP_TEXM3x3PAD, }, + { "texm3x3tex", 1, 1, SVGA3DOP_TEXM3x3TEX, }, + { "reserved0", 0, 0, SVGA3DOP_RESERVED0, }, + { "texm3x3spec", 1, 2, SVGA3DOP_TEXM3x3SPEC, }, + { "texm3x3vspec", 1, 1, SVGA3DOP_TEXM3x3VSPEC,}, + { "expp", 1, 1, SVGA3DOP_EXPP, }, + { "logp", 1, 1, SVGA3DOP_LOGP, }, + { "cnd", 1, 3, SVGA3DOP_CND, }, + { "def", 0, 0, SVGA3DOP_DEF, }, + { "texreg2rgb", 1, 1, SVGA3DOP_TEXREG2RGB, }, + { "texdp3tex", 1, 1, SVGA3DOP_TEXDP3TEX, }, + { "texm3x2depth", 1, 1, SVGA3DOP_TEXM3x2DEPTH,}, + { "texdp3", 1, 1, SVGA3DOP_TEXDP3, }, + { "texm3x3", 1, 1, SVGA3DOP_TEXM3x3, }, + { "texdepth", 1, 0, SVGA3DOP_TEXDEPTH, }, + { "cmp", 1, 3, SVGA3DOP_CMP, }, + { "bem", 1, 2, SVGA3DOP_BEM, }, + { "dp2add", 1, 3, SVGA3DOP_DP2ADD, }, + { "dsx", 1, 1, SVGA3DOP_INVALID, }, + { "dsy", 1, 1, SVGA3DOP_INVALID, }, + { "texldd", 1, 1, SVGA3DOP_INVALID, }, + { "setp", 1, 2, SVGA3DOP_SETP, }, + { "texldl", 1, 1, SVGA3DOP_INVALID, }, + { "breakp", 1, 1, SVGA3DOP_INVALID, }, +}; + +const struct sh_opcode_info *svga_opcode_info( uint op ) +{ + struct sh_opcode_info *info; + + if (op >= sizeof( opcode_info ) / sizeof( opcode_info[0] )) { + /* The opcode is either PHASE, COMMENT, END or out of range. + */ + assert( 0 ); + return NULL; + } + + info = &opcode_info[op]; + + if (info->svga_opcode == SVGA3DOP_INVALID) { + /* No valid information. Please provide number of dst/src registers. + */ + assert( 0 ); + return NULL; + } + + /* Sanity check. + */ + assert( op == info->svga_opcode ); + + return info; +} diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_op.h b/src/gallium/drivers/svga/svgadump/svga_shader_op.h new file mode 100644 index 0000000000..e558de02c5 --- /dev/null +++ b/src/gallium/drivers/svga/svgadump/svga_shader_op.h @@ -0,0 +1,46 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +/** + * @file + * SVGA Shader Token Opcode Info + * + * @author Michal Krol <michal@vmware.com> + */ + +#ifndef SVGA_SHADER_OP_H +#define SVGA_SHADER_OP_H + +struct sh_opcode_info +{ + const char *mnemonic; + unsigned num_dst:8; + unsigned num_src:8; + unsigned svga_opcode:16; +}; + +const struct sh_opcode_info *svga_opcode_info( unsigned op ); + +#endif /* SVGA_SHADER_OP_H */ diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index bf470b46ae..2f0f063d2d 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -107,7 +107,7 @@ trace_context_set_edgeflags(struct pipe_context *_pipe, /* FIXME: we don't know how big this array is */ trace_dump_arg(ptr, bitfield); - pipe->set_edgeflags(pipe, bitfield);; + pipe->set_edgeflags(pipe, bitfield); trace_dump_call_end(); } @@ -143,10 +143,16 @@ trace_context_draw_block(struct trace_context *tr_ctx, int flag) for (k = 0; k < tr_ctx->curr.nr_cbufs; k++) if (tr_ctx->draw_rule.surf == tr_ctx->curr.cbufs[k]) block = TRUE; - if (tr_ctx->draw_rule.tex) + if (tr_ctx->draw_rule.tex) { for (k = 0; k < tr_ctx->curr.num_texs; k++) if (tr_ctx->draw_rule.tex == tr_ctx->curr.tex[k]) block = TRUE; + for (k = 0; k < tr_ctx->curr.num_vert_texs; k++) { + if (tr_ctx->draw_rule.tex == tr_ctx->curr.vert_tex[k]) { + block = TRUE; + } + } + } if (block) tr_ctx->draw_blocked |= (flag | 4); @@ -192,7 +198,7 @@ trace_context_draw_arrays(struct pipe_context *_pipe, trace_dump_arg(uint, start); trace_dump_arg(uint, count); - result = pipe->draw_arrays(pipe, mode, start, count);; + result = pipe->draw_arrays(pipe, mode, start, count); trace_dump_ret(bool, result); @@ -232,7 +238,7 @@ trace_context_draw_elements(struct pipe_context *_pipe, trace_dump_arg(uint, start); trace_dump_arg(uint, count); - result = pipe->draw_elements(pipe, indexBuffer, indexSize, mode, start, count);; + result = pipe->draw_elements(pipe, indexBuffer, indexSize, mode, start, count); trace_dump_ret(bool, result); @@ -306,7 +312,7 @@ trace_context_create_query(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(uint, query_type); - result = pipe->create_query(pipe, query_type);; + result = pipe->create_query(pipe, query_type); trace_dump_ret(ptr, result); @@ -328,7 +334,7 @@ trace_context_destroy_query(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(ptr, query); - pipe->destroy_query(pipe, query);; + pipe->destroy_query(pipe, query); trace_dump_call_end(); } @@ -346,7 +352,7 @@ trace_context_begin_query(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(ptr, query); - pipe->begin_query(pipe, query);; + pipe->begin_query(pipe, query); trace_dump_call_end(); } @@ -385,7 +391,7 @@ trace_context_get_query_result(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); - _result = pipe->get_query_result(pipe, query, wait, presult);; + _result = pipe->get_query_result(pipe, query, wait, presult); result = *presult; trace_dump_arg(uint, result); @@ -410,7 +416,7 @@ trace_context_create_blend_state(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(blend_state, state); - result = pipe->create_blend_state(pipe, state);; + result = pipe->create_blend_state(pipe, state); trace_dump_ret(ptr, result); @@ -432,7 +438,7 @@ trace_context_bind_blend_state(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(ptr, state); - pipe->bind_blend_state(pipe, state);; + pipe->bind_blend_state(pipe, state); trace_dump_call_end(); } @@ -450,7 +456,7 @@ trace_context_delete_blend_state(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(ptr, state); - pipe->delete_blend_state(pipe, state);; + pipe->delete_blend_state(pipe, state); trace_dump_call_end(); } @@ -469,7 +475,7 @@ trace_context_create_sampler_state(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(sampler_state, state); - result = pipe->create_sampler_state(pipe, state);; + result = pipe->create_sampler_state(pipe, state); trace_dump_ret(ptr, result); @@ -480,19 +486,40 @@ trace_context_create_sampler_state(struct pipe_context *_pipe, static INLINE void -trace_context_bind_sampler_states(struct pipe_context *_pipe, - unsigned num_states, void **states) +trace_context_bind_fragment_sampler_states(struct pipe_context *_pipe, + unsigned num_states, + void **states) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "bind_fragment_sampler_states"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(uint, num_states); + trace_dump_arg_array(ptr, states, num_states); + + pipe->bind_fragment_sampler_states(pipe, num_states, states); + + trace_dump_call_end(); +} + + +static INLINE void +trace_context_bind_vertex_sampler_states(struct pipe_context *_pipe, + unsigned num_states, + void **states) { struct trace_context *tr_ctx = trace_context(_pipe); struct pipe_context *pipe = tr_ctx->pipe; - trace_dump_call_begin("pipe_context", "bind_sampler_states"); + trace_dump_call_begin("pipe_context", "bind_vertex_sampler_states"); trace_dump_arg(ptr, pipe); trace_dump_arg(uint, num_states); trace_dump_arg_array(ptr, states, num_states); - pipe->bind_sampler_states(pipe, num_states, states);; + pipe->bind_vertex_sampler_states(pipe, num_states, states); trace_dump_call_end(); } @@ -510,7 +537,7 @@ trace_context_delete_sampler_state(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(ptr, state); - pipe->delete_sampler_state(pipe, state);; + pipe->delete_sampler_state(pipe, state); trace_dump_call_end(); } @@ -529,7 +556,7 @@ trace_context_create_rasterizer_state(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(rasterizer_state, state); - result = pipe->create_rasterizer_state(pipe, state);; + result = pipe->create_rasterizer_state(pipe, state); trace_dump_ret(ptr, result); @@ -551,7 +578,7 @@ trace_context_bind_rasterizer_state(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(ptr, state); - pipe->bind_rasterizer_state(pipe, state);; + pipe->bind_rasterizer_state(pipe, state); trace_dump_call_end(); } @@ -569,7 +596,7 @@ trace_context_delete_rasterizer_state(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(ptr, state); - pipe->delete_rasterizer_state(pipe, state);; + pipe->delete_rasterizer_state(pipe, state); trace_dump_call_end(); } @@ -585,7 +612,7 @@ trace_context_create_depth_stencil_alpha_state(struct pipe_context *_pipe, trace_dump_call_begin("pipe_context", "create_depth_stencil_alpha_state"); - result = pipe->create_depth_stencil_alpha_state(pipe, state);; + result = pipe->create_depth_stencil_alpha_state(pipe, state); trace_dump_arg(ptr, pipe); trace_dump_arg(depth_stencil_alpha_state, state); @@ -610,7 +637,7 @@ trace_context_bind_depth_stencil_alpha_state(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(ptr, state); - pipe->bind_depth_stencil_alpha_state(pipe, state);; + pipe->bind_depth_stencil_alpha_state(pipe, state); trace_dump_call_end(); } @@ -628,7 +655,7 @@ trace_context_delete_depth_stencil_alpha_state(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(ptr, state); - pipe->delete_depth_stencil_alpha_state(pipe, state);; + pipe->delete_depth_stencil_alpha_state(pipe, state); trace_dump_call_end(); } @@ -647,7 +674,7 @@ trace_context_create_fs_state(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(shader_state, state); - result = pipe->create_fs_state(pipe, state);; + result = pipe->create_fs_state(pipe, state); trace_dump_ret(ptr, result); @@ -750,7 +777,7 @@ trace_context_bind_vs_state(struct pipe_context *_pipe, if (tr_shdr && tr_shdr->replaced) state = tr_shdr->replaced; - pipe->bind_vs_state(pipe, state);; + pipe->bind_vs_state(pipe, state); trace_dump_call_end(); } @@ -770,7 +797,7 @@ trace_context_delete_vs_state(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(ptr, state); - pipe->delete_vs_state(pipe, state);; + pipe->delete_vs_state(pipe, state); trace_dump_call_end(); @@ -790,7 +817,7 @@ trace_context_set_blend_color(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(blend_color, state); - pipe->set_blend_color(pipe, state);; + pipe->set_blend_color(pipe, state); trace_dump_call_end(); } @@ -808,7 +835,7 @@ trace_context_set_clip_state(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(clip_state, state); - pipe->set_clip_state(pipe, state);; + pipe->set_clip_state(pipe, state); trace_dump_call_end(); } @@ -880,7 +907,7 @@ trace_context_set_framebuffer_state(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(framebuffer_state, state); - pipe->set_framebuffer_state(pipe, state);; + pipe->set_framebuffer_state(pipe, state); trace_dump_call_end(); } @@ -898,7 +925,7 @@ trace_context_set_polygon_stipple(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(poly_stipple, state); - pipe->set_polygon_stipple(pipe, state);; + pipe->set_polygon_stipple(pipe, state); trace_dump_call_end(); } @@ -916,7 +943,7 @@ trace_context_set_scissor_state(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(scissor_state, state); - pipe->set_scissor_state(pipe, state);; + pipe->set_scissor_state(pipe, state); trace_dump_call_end(); } @@ -934,16 +961,16 @@ trace_context_set_viewport_state(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(viewport_state, state); - pipe->set_viewport_state(pipe, state);; + pipe->set_viewport_state(pipe, state); trace_dump_call_end(); } static INLINE void -trace_context_set_sampler_textures(struct pipe_context *_pipe, - unsigned num_textures, - struct pipe_texture **textures) +trace_context_set_fragment_sampler_textures(struct pipe_context *_pipe, + unsigned num_textures, + struct pipe_texture **textures) { struct trace_context *tr_ctx = trace_context(_pipe); struct trace_texture *tr_tex; @@ -959,13 +986,44 @@ trace_context_set_sampler_textures(struct pipe_context *_pipe, } textures = unwrapped_textures; - trace_dump_call_begin("pipe_context", "set_sampler_textures"); + trace_dump_call_begin("pipe_context", "set_fragment_sampler_textures"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(uint, num_textures); + trace_dump_arg_array(ptr, textures, num_textures); + + pipe->set_fragment_sampler_textures(pipe, num_textures, textures); + + trace_dump_call_end(); +} + + +static INLINE void +trace_context_set_vertex_sampler_textures(struct pipe_context *_pipe, + unsigned num_textures, + struct pipe_texture **textures) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct trace_texture *tr_tex; + struct pipe_context *pipe = tr_ctx->pipe; + struct pipe_texture *unwrapped_textures[PIPE_MAX_VERTEX_SAMPLERS]; + unsigned i; + + tr_ctx->curr.num_vert_texs = num_textures; + for(i = 0; i < num_textures; ++i) { + tr_tex = trace_texture(textures[i]); + tr_ctx->curr.vert_tex[i] = tr_tex; + unwrapped_textures[i] = tr_tex ? tr_tex->texture : NULL; + } + textures = unwrapped_textures; + + trace_dump_call_begin("pipe_context", "set_vertex_sampler_textures"); trace_dump_arg(ptr, pipe); trace_dump_arg(uint, num_textures); trace_dump_arg_array(ptr, textures, num_textures); - pipe->set_sampler_textures(pipe, num_textures, textures);; + pipe->set_vertex_sampler_textures(pipe, num_textures, textures); trace_dump_call_end(); } @@ -1024,7 +1082,7 @@ trace_context_set_vertex_elements(struct pipe_context *_pipe, trace_dump_struct_array(vertex_element, elements, num_elements); trace_dump_arg_end(); - pipe->set_vertex_elements(pipe, num_elements, elements);; + pipe->set_vertex_elements(pipe, num_elements, elements); trace_dump_call_end(); } @@ -1085,7 +1143,7 @@ trace_context_surface_fill(struct pipe_context *_pipe, trace_dump_arg(uint, width); trace_dump_arg(uint, height); - pipe->surface_fill(pipe, dst, dstx, dsty, width, height, value);; + pipe->surface_fill(pipe, dst, dstx, dsty, width, height, value); trace_dump_call_end(); } @@ -1128,7 +1186,7 @@ trace_context_flush(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(uint, flags); - pipe->flush(pipe, flags, fence);; + pipe->flush(pipe, flags, fence); if(fence) trace_dump_ret(ptr, *fence); @@ -1253,7 +1311,8 @@ trace_context_create(struct pipe_screen *_screen, tr_ctx->base.bind_blend_state = trace_context_bind_blend_state; tr_ctx->base.delete_blend_state = trace_context_delete_blend_state; tr_ctx->base.create_sampler_state = trace_context_create_sampler_state; - tr_ctx->base.bind_sampler_states = trace_context_bind_sampler_states; + tr_ctx->base.bind_fragment_sampler_states = trace_context_bind_fragment_sampler_states; + tr_ctx->base.bind_vertex_sampler_states = trace_context_bind_vertex_sampler_states; tr_ctx->base.delete_sampler_state = trace_context_delete_sampler_state; tr_ctx->base.create_rasterizer_state = trace_context_create_rasterizer_state; tr_ctx->base.bind_rasterizer_state = trace_context_bind_rasterizer_state; @@ -1274,7 +1333,8 @@ trace_context_create(struct pipe_screen *_screen, tr_ctx->base.set_polygon_stipple = trace_context_set_polygon_stipple; tr_ctx->base.set_scissor_state = trace_context_set_scissor_state; tr_ctx->base.set_viewport_state = trace_context_set_viewport_state; - tr_ctx->base.set_sampler_textures = trace_context_set_sampler_textures; + tr_ctx->base.set_fragment_sampler_textures = trace_context_set_fragment_sampler_textures; + tr_ctx->base.set_vertex_sampler_textures = trace_context_set_vertex_sampler_textures; tr_ctx->base.set_vertex_buffers = trace_context_set_vertex_buffers; tr_ctx->base.set_vertex_elements = trace_context_set_vertex_elements; if (pipe->surface_copy) diff --git a/src/gallium/drivers/trace/tr_context.h b/src/gallium/drivers/trace/tr_context.h index 6febe4b411..852b480765 100644 --- a/src/gallium/drivers/trace/tr_context.h +++ b/src/gallium/drivers/trace/tr_context.h @@ -54,6 +54,9 @@ struct trace_context struct trace_texture *tex[PIPE_MAX_SAMPLERS]; unsigned num_texs; + struct trace_texture *vert_tex[PIPE_MAX_VERTEX_SAMPLERS]; + unsigned num_vert_texs; + unsigned nr_cbufs; struct trace_texture *cbufs[PIPE_MAX_COLOR_BUFS]; struct trace_texture *zsbuf; diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c index bcf6751af4..6d58209294 100644 --- a/src/gallium/drivers/trace/tr_dump_state.c +++ b/src/gallium/drivers/trace/tr_dump_state.c @@ -83,15 +83,15 @@ void trace_dump_template(const struct pipe_texture *templat) trace_dump_member(format, templat, format); trace_dump_member_begin("width"); - trace_dump_array(uint, templat->width, 1); + trace_dump_uint(templat->width0); trace_dump_member_end(); trace_dump_member_begin("height"); - trace_dump_array(uint, templat->height, 1); + trace_dump_uint(templat->height0); trace_dump_member_end(); trace_dump_member_begin("depth"); - trace_dump_array(uint, templat->depth, 1); + trace_dump_uint(templat->depth0); trace_dump_member_end(); trace_dump_member_begin("block"); diff --git a/src/gallium/drivers/trace/tr_rbug.c b/src/gallium/drivers/trace/tr_rbug.c index 81e0a6f3b0..b59458c0e3 100644 --- a/src/gallium/drivers/trace/tr_rbug.c +++ b/src/gallium/drivers/trace/tr_rbug.c @@ -200,9 +200,9 @@ trace_rbug_texture_info(struct trace_rbug *tr_rbug, struct rbug_header *header, t = tr_tex->texture; rbug_send_texture_info_reply(tr_rbug->con, serial, t->target, t->format, - t->width, t->last_level + 1, - t->height, t->last_level + 1, - t->depth, t->last_level + 1, + &t->width0, 1, + &t->height0, 1, + &t->depth0, 1, t->block.width, t->block.height, t->block.size, t->last_level, t->nr_samples, |