diff options
author | Nicolai Hähnle <nicolai.haehnle@amd.com> | 2017-09-15 18:34:48 +0200 |
---|---|---|
committer | Nicolai Hähnle <nicolai.haehnle@amd.com> | 2017-09-16 13:18:42 +0200 |
commit | 15bd7157d2dd779cb7608b3644e7b41d17f8e5e2 (patch) | |
tree | 403b3c1276d5ea222d30a588760e283299396789 | |
parent | dd26e4273b71046de37d3e2d7b90fe86dcc85d10 (diff) |
tgsi: clarify the semantics of DFRACEXP
The status quo is quite the mess:
1. tgsi_exec will do a per-channel computation, and store the dst[0]
result (significand) correctly for each channel. The dst[1] result
(exponent) will be written to the first bit set in the writemask.
So per-component calculation only works partially.
2. r600 will only do a single computation. It will replicate the
exponent but not the significand.
3. The docs pretend that there's per-component calculation, but even
get dst[0] and dst[1] confused.
4. Luckily, st_glsl_to_tgsi only ever emits single-component instructions,
and kind-of assumes that everything is replicated, generating this for
the dvec4 case:
DFRACEXP TEMP[0].xy, TEMP[1].x, CONST[0][0].xyxy
DFRACEXP TEMP[0].zw, TEMP[1].y, CONST[0][0].zwzw
DFRACEXP TEMP[2].xy, TEMP[1].z, CONST[0][1].xyxy
DFRACEXP TEMP[2].zw, TEMP[1].w, CONST[0][1].zwzw
Settle on the simplest behavior, which is single-component calculation
with replication, document it, and adjust tgsi_exec and r600.
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_exec.c | 16 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_info_opcodes.h | 2 | ||||
-rw-r--r-- | src/gallium/docs/source/tgsi.rst | 10 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_shader.c | 14 |
4 files changed, 20 insertions, 22 deletions
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 1264df0c62..2a47f5dfae 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -3688,17 +3688,15 @@ exec_dfracexp(struct tgsi_exec_machine *mach, union tgsi_double_channel dst; union tgsi_exec_channel dst_exp; - if (((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY)) { - fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); - micro_dfracexp(&dst, &dst_exp, &src); + fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); + micro_dfracexp(&dst, &dst_exp, &src); + if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); - store_dest(mach, &dst_exp, &inst->Dst[1], inst, ffs(inst->Dst[1].Register.WriteMask) - 1, TGSI_EXEC_DATA_INT); - } - if (((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW)) { - fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); - micro_dfracexp(&dst, &dst_exp, &src); + if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); - store_dest(mach, &dst_exp, &inst->Dst[1], inst, ffs(inst->Dst[1].Register.WriteMask) - 1, TGSI_EXEC_DATA_INT); + for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + if (inst->Dst[1].Register.WriteMask & (1 << chan)) + store_dest(mach, &dst_exp, &inst->Dst[1], inst, chan, TGSI_EXEC_DATA_INT); } } diff --git a/src/gallium/auxiliary/tgsi/tgsi_info_opcodes.h b/src/gallium/auxiliary/tgsi/tgsi_info_opcodes.h index a4a9771175..3f39afe219 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info_opcodes.h +++ b/src/gallium/auxiliary/tgsi/tgsi_info_opcodes.h @@ -212,7 +212,7 @@ OPCODE(1, 1, COMP, DSQRT) OPCODE(1, 3, COMP, DMAD) OPCODE(1, 1, COMP, DFRAC) OPCODE(1, 2, COMP, DLDEXP) -OPCODE(2, 1, COMP, DFRACEXP) +OPCODE(2, 1, REPL, DFRACEXP) OPCODE(1, 1, COMP, D2I) OPCODE(1, 1, COMP, I2D) OPCODE(1, 1, COMP, D2U) diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst index 8633c929b9..fd78c40ba3 100644 --- a/src/gallium/docs/source/tgsi.rst +++ b/src/gallium/docs/source/tgsi.rst @@ -1838,17 +1838,15 @@ two-component vectors with doubled precision in each component. Like the ``frexp()`` routine in many math libraries, this opcode stores the exponent of its source to ``dst0``, and the significand to ``dst1``, such that -:math:`dst1 \times 2^{dst0} = src` . +:math:`dst1 \times 2^{dst0} = src` . The results are replicated across +channels. .. math:: - dst0.xy = exp(src.xy) + dst0.xy = dst.zw = frac(src.xy) - dst1.xy = frac(src.xy) + dst1 = frac(src.xy) - dst0.zw = exp(src.zw) - - dst1.zw = frac(src.zw) .. opcode:: DLDEXP - Multiply Number by Integral Power of 2 diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index c9c922fc02..188fbc9d47 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -4045,7 +4045,6 @@ static int tgsi_dfracexp(struct r600_shader_ctx *ctx) struct r600_bytecode_alu alu; unsigned write_mask = inst->Dst[0].Register.WriteMask; int i, j, r; - int firsti = write_mask == 0xc ? 2 : 0; for (i = 0; i <= 3; i++) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); @@ -4066,15 +4065,18 @@ static int tgsi_dfracexp(struct r600_shader_ctx *ctx) return r; } - /* MOV first two channels to writemask dst0 */ - for (i = 0; i <= 1; i++) { + /* Replicate significand result across channels. */ + for (i = 0; i <= 3; i++) { + if (!(write_mask & (1 << i))) + continue; + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ALU_OP1_MOV; - alu.src[0].chan = i + 2; + alu.src[0].chan = (i & 1) + 2; alu.src[0].sel = ctx->temp_reg; - tgsi_dst(ctx, &inst->Dst[0], firsti + i, &alu.dst); - alu.dst.write = (inst->Dst[0].Register.WriteMask >> (firsti + i)) & 1; + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + alu.dst.write = 1; alu.last = 1; r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) |