diff options
author | Nicolai Hähnle <nicolai.haehnle@amd.com> | 2018-03-23 11:20:24 +0100 |
---|---|---|
committer | Nicolai Hähnle <nicolai.haehnle@amd.com> | 2018-04-20 09:23:52 +0200 |
commit | 625dcbbc45665459737c9d028f268fd6782472f3 (patch) | |
tree | 2350e1d94b1f6ab348d755867ec7b5c910b42305 /src/amd | |
parent | f931583828f0ca9a3b135da0f2cda6a36ebbc877 (diff) |
amd/common: pass address components individually to ac_build_image_intrinsic
This is in preparation for the new image intrinsics.
Acked-by: Marek Olšák <marek.olsak@amd.com>
Diffstat (limited to 'src/amd')
-rw-r--r-- | src/amd/common/ac_llvm_build.c | 101 | ||||
-rw-r--r-- | src/amd/common/ac_llvm_build.h | 14 | ||||
-rw-r--r-- | src/amd/common/ac_nir_to_llvm.c | 365 |
3 files changed, 216 insertions, 264 deletions
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index 77b0798943..22aac7cbdb 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -37,6 +37,7 @@ #include "util/bitscan.h" #include "util/macros.h" #include "util/u_atomic.h" +#include "util/u_math.h" #include "sid.h" #include "shader_enums.h" @@ -1445,14 +1446,61 @@ void ac_build_export_null(struct ac_llvm_context *ctx) ac_build_export(ctx, &args); } +static unsigned ac_num_coords(enum ac_image_dim dim) +{ + switch (dim) { + case ac_image_1d: + return 1; + case ac_image_2d: + case ac_image_1darray: + return 2; + case ac_image_3d: + case ac_image_cube: + case ac_image_2darray: + case ac_image_2dmsaa: + return 3; + case ac_image_2darraymsaa: + return 4; + default: + unreachable("ac_num_coords: bad dim"); + } +} + +static unsigned ac_num_derivs(enum ac_image_dim dim) +{ + switch (dim) { + case ac_image_1d: + case ac_image_1darray: + return 2; + case ac_image_2d: + case ac_image_2darray: + case ac_image_cube: + return 4; + case ac_image_3d: + return 6; + case ac_image_2dmsaa: + case ac_image_2darraymsaa: + default: + unreachable("derivatives not supported"); + } +} + LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx, struct ac_image_args *a) { - LLVMValueRef args[11]; - unsigned num_args = 0; + LLVMValueRef args[16]; const char *name = NULL; char intr_name[128], type[64]; + assert(!a->lod || a->lod == ctx->i32_0 || a->lod == ctx->f32_0 || + !a->level_zero); + assert((a->opcode != ac_image_get_resinfo && a->opcode != ac_image_load_mip) || + a->lod); + assert((a->bias ? 1 : 0) + + (a->lod ? 1 : 0) + + (a->level_zero ? 1 : 0) + + (a->derivs[0] ? 1 : 0) <= 1); + bool sample = a->opcode == ac_image_sample || a->opcode == ac_image_gather4 || a->opcode == ac_image_get_lod; @@ -1463,10 +1511,38 @@ LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx, if (a->opcode == ac_image_get_lod) da = false; + unsigned num_coords = + a->opcode != ac_image_get_resinfo ? ac_num_coords(a->dim) : 0; + LLVMValueRef addr; + unsigned num_addr = 0; + + if (a->offset) + args[num_addr++] = ac_to_integer(ctx, a->offset); + if (a->bias) + args[num_addr++] = ac_to_integer(ctx, a->bias); + if (a->compare) + args[num_addr++] = ac_to_integer(ctx, a->compare); + if (a->derivs[0]) { + unsigned num_derivs = ac_num_derivs(a->dim); + for (unsigned i = 0; i < num_derivs; ++i) + args[num_addr++] = ac_to_integer(ctx, a->derivs[i]); + } + for (unsigned i = 0; i < num_coords; ++i) + args[num_addr++] = ac_to_integer(ctx, a->coords[i]); + if (a->lod) + args[num_addr++] = ac_to_integer(ctx, a->lod); + + unsigned pad_goal = util_next_power_of_two(num_addr); + while (num_addr < pad_goal) + args[num_addr++] = LLVMGetUndef(ctx->i32); + + addr = ac_build_gather_values(ctx, args, num_addr); + + unsigned num_args = 0; if (sample) - args[num_args++] = ac_to_float(ctx, a->addr); + args[num_args++] = ac_to_float(ctx, addr); else - args[num_args++] = a->addr; + args[num_args++] = ac_to_integer(ctx, addr); args[num_args++] = a->resource; if (sample) @@ -1505,12 +1581,15 @@ LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx, ac_build_type_name_for_intr(LLVMTypeOf(args[0]), type, sizeof(type)); + bool lod_suffix = + a->lod && (a->opcode == ac_image_sample || a->opcode == ac_image_gather4); + snprintf(intr_name, sizeof(intr_name), "%s%s%s%s.v4f32.%s.v8i32", name, a->compare ? ".c" : "", a->bias ? ".b" : - a->lod ? ".l" : - a->deriv ? ".d" : + lod_suffix ? ".l" : + a->derivs[0] ? ".d" : a->level_zero ? ".lz" : "", a->offset ? ".o" : "", type); @@ -2481,12 +2560,10 @@ void ac_apply_fmask_to_sample(struct ac_llvm_context *ac, LLVMValueRef fmask, fmask_load.dmask = 0xf; fmask_load.dim = is_array_tex ? ac_image_2darray : ac_image_2d; - LLVMValueRef fmask_addr[4]; - memcpy(fmask_addr, addr, sizeof(fmask_addr[0]) * 3); - fmask_addr[3] = LLVMGetUndef(ac->i32); - - fmask_load.addr = ac_build_gather_values(ac, fmask_addr, - is_array_tex ? 4 : 2); + fmask_load.coords[0] = addr[0]; + fmask_load.coords[1] = addr[1]; + if (is_array_tex) + fmask_load.coords[2] = addr[2]; LLVMValueRef fmask_value = ac_build_image_opcode(ac, &fmask_load); fmask_value = LLVMBuildExtractElement(ac->builder, fmask_value, diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h index 328eddc9a7..1691a80938 100644 --- a/src/amd/common/ac_llvm_build.h +++ b/src/amd/common/ac_llvm_build.h @@ -331,18 +331,18 @@ enum ac_image_dim { struct ac_image_args { enum ac_image_opcode opcode; enum ac_image_dim dim; - bool level_zero; - bool bias; - bool lod; - bool deriv; - bool compare; - bool offset; LLVMValueRef resource; LLVMValueRef sampler; - LLVMValueRef addr; + LLVMValueRef offset; + LLVMValueRef bias; + LLVMValueRef compare; + LLVMValueRef derivs[6]; + LLVMValueRef coords[4]; + LLVMValueRef lod; // also used by ac_image_get_resinfo unsigned dmask; bool unorm; + bool level_zero; }; LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx, diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index de3754d72b..a0e1837999 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -1152,12 +1152,9 @@ static LLVMValueRef lower_gather4_integer(struct ac_llvm_context *ctx, const nir_tex_instr *instr) { enum glsl_base_type stype = glsl_get_sampler_result_type(instr->texture->var->type); - LLVMValueRef coord = args->addr; LLVMValueRef half_texel[2]; LLVMValueRef compare_cube_wa = NULL; LLVMValueRef result; - int c; - unsigned coord_vgpr_index = (unsigned)args->offset + (unsigned)args->compare; //TODO Rect { @@ -1166,11 +1163,11 @@ static LLVMValueRef lower_gather4_integer(struct ac_llvm_context *ctx, txq_args.dim = get_ac_sampler_dim(ctx, instr->sampler_dim, instr->is_array); txq_args.opcode = ac_image_get_resinfo; txq_args.dmask = 0xf; - txq_args.addr = ctx->i32_0; + txq_args.lod = ctx->i32_0; txq_args.resource = args->resource; LLVMValueRef size = ac_build_image_opcode(ctx, &txq_args); - for (c = 0; c < 2; c++) { + for (unsigned c = 0; c < 2; c++) { half_texel[c] = LLVMBuildExtractElement(ctx->builder, size, LLVMConstInt(ctx->i32, c, false), ""); half_texel[c] = LLVMBuildUIToFP(ctx->builder, half_texel[c], ctx->f32, ""); @@ -1180,19 +1177,14 @@ static LLVMValueRef lower_gather4_integer(struct ac_llvm_context *ctx, } } - LLVMValueRef orig_coords = args->addr; + LLVMValueRef orig_coords[2] = { args->coords[0], args->coords[1] }; - for (c = 0; c < 2; c++) { + for (unsigned c = 0; c < 2; c++) { LLVMValueRef tmp; - LLVMValueRef index = LLVMConstInt(ctx->i32, coord_vgpr_index + c, 0); - tmp = LLVMBuildExtractElement(ctx->builder, coord, index, ""); - tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, ""); - tmp = LLVMBuildFAdd(ctx->builder, tmp, half_texel[c], ""); - tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, ""); - coord = LLVMBuildInsertElement(ctx->builder, coord, tmp, index, ""); + tmp = LLVMBuildBitCast(ctx->builder, args->coords[c], ctx->f32, ""); + args->coords[c] = LLVMBuildFAdd(ctx->builder, tmp, half_texel[c], ""); } - /* * Apparantly cube has issue with integer types that the workaround doesn't solve, * so this tests if the format is 8_8_8_8 and an integer type do an alternate @@ -1236,16 +1228,18 @@ static LLVMValueRef lower_gather4_integer(struct ac_llvm_context *ctx, args->resource = LLVMBuildInsertElement(ctx->builder, args->resource, tmp2, ctx->i32_1, ""); /* don't modify the coordinates for this case */ - coord = LLVMBuildSelect(ctx->builder, compare_cube_wa, orig_coords, coord, ""); + for (unsigned c = 0; c < 2; ++c) + args->coords[c] = LLVMBuildSelect( + ctx->builder, compare_cube_wa, + orig_coords[c], args->coords[c], ""); } - args->addr = coord; result = ac_build_image_opcode(ctx, args); if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) { LLVMValueRef tmp, tmp2; /* if the cube workaround is in place, f2i the result. */ - for (c = 0; c < 4; c++) { + for (unsigned c = 0; c < 4; c++) { tmp = LLVMBuildExtractElement(ctx->builder, result, LLVMConstInt(ctx->i32, c, false), ""); if (stype == GLSL_TYPE_UINT) tmp2 = LLVMBuildFPToUI(ctx->builder, tmp, ctx->i32, ""); @@ -1263,7 +1257,6 @@ static LLVMValueRef lower_gather4_integer(struct ac_llvm_context *ctx, static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx, const nir_tex_instr *instr, - bool lod_is_zero, struct ac_image_args *args) { if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) { @@ -1272,14 +1265,14 @@ static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx, if (ctx->abi->gfx9_stride_size_workaround) { return ac_build_buffer_load_format_gfx9_safe(&ctx->ac, args->resource, - args->addr, + args->coords[0], ctx->ac.i32_0, util_last_bit(mask), false, true); } else { return ac_build_buffer_load_format(&ctx->ac, args->resource, - args->addr, + args->coords[0], ctx->ac.i32_0, util_last_bit(mask), false, true); @@ -1287,37 +1280,28 @@ static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx, } args->opcode = ac_image_sample; - args->compare = instr->is_shadow; switch (instr->op) { case nir_texop_txf: case nir_texop_txf_ms: case nir_texop_samples_identical: - args->opcode = lod_is_zero || + args->opcode = args->level_zero || instr->sampler_dim == GLSL_SAMPLER_DIM_MS ? ac_image_load : ac_image_load_mip; - args->compare = false; - args->offset = false; - break; - case nir_texop_txb: - args->bias = true; - break; - case nir_texop_txl: - if (lod_is_zero) - args->level_zero = true; - else - args->lod = true; + args->level_zero = false; break; case nir_texop_txs: case nir_texop_query_levels: args->opcode = ac_image_get_resinfo; + if (!args->lod) + args->lod = ctx->ac.i32_0; + args->level_zero = false; break; case nir_texop_tex: - if (ctx->stage != MESA_SHADER_FRAGMENT) + if (ctx->stage != MESA_SHADER_FRAGMENT) { + assert(!args->lod); args->level_zero = true; - break; - case nir_texop_txd: - args->deriv = true; + } break; case nir_texop_tg4: args->opcode = ac_image_gather4; @@ -1325,8 +1309,6 @@ static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx, break; case nir_texop_lod: args->opcode = ac_image_get_lod; - args->compare = false; - args->offset = false; break; default: break; @@ -2081,23 +2063,18 @@ static LLVMValueRef adjust_sample_index_using_fmask(struct ac_llvm_context *ctx, LLVMValueRef sample_index, LLVMValueRef fmask_desc_ptr) { - LLVMValueRef fmask_load_address[4]; + struct ac_image_args args = {0}; LLVMValueRef res; - fmask_load_address[0] = coord_x; - fmask_load_address[1] = coord_y; - if (coord_z) { - fmask_load_address[2] = coord_z; - fmask_load_address[3] = LLVMGetUndef(ctx->i32); - } - - struct ac_image_args args = {0}; + args.coords[0] = coord_x; + args.coords[1] = coord_y; + if (coord_z) + args.coords[2] = coord_z; args.opcode = ac_image_load; args.dim = coord_z ? ac_image_2darray : ac_image_2d; args.resource = fmask_desc_ptr; args.dmask = 0xf; - args.addr = ac_build_gather_values(ctx, fmask_load_address, coord_z ? 4 : 2); res = ac_build_image_opcode(ctx, &args); @@ -2447,7 +2424,7 @@ static LLVMValueRef visit_image_samples(struct ac_nir_context *ctx, args.resource = get_sampler_desc(ctx, instr->variables[0], AC_DESC_IMAGE, NULL, true, false); args.opcode = ac_image_get_resinfo; - args.addr = ctx->ac.i32_0; + args.lod = ctx->ac.i32_0; return ac_build_image_opcode(&ctx->ac, &args); } @@ -2471,7 +2448,7 @@ static LLVMValueRef visit_image_size(struct ac_nir_context *ctx, args.dmask = 0xf; args.resource = get_sampler_desc(ctx, instr->variables[0], AC_DESC_IMAGE, NULL, true, false); args.opcode = ac_image_get_resinfo; - args.addr = ctx->ac.i32_0; + args.lod = ctx->ac.i32_0; res = ac_build_image_opcode(&ctx->ac, &args); @@ -3217,38 +3194,6 @@ static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx, desc_type, image, write, bindless); } -static void set_tex_fetch_args(struct ac_llvm_context *ctx, - struct ac_image_args *args, - const nir_tex_instr *instr, - nir_texop op, - LLVMValueRef res_ptr, LLVMValueRef samp_ptr, - LLVMValueRef *param, unsigned count, - unsigned dmask) -{ - unsigned is_rect = 0; - - /* Pad to power of two vector */ - while (count < util_next_power_of_two(count)) - param[count++] = LLVMGetUndef(ctx->i32); - - if (count > 1) - args->addr = ac_build_gather_values(ctx, param, count); - else - args->addr = param[0]; - - args->resource = res_ptr; - args->sampler = samp_ptr; - - if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF && op == nir_texop_txf) { - args->addr = param[0]; - return; - } - - args->dmask = dmask; - args->unorm = is_rect; - args->dim = get_ac_sampler_dim(&ctx->ac, instr->sampler_dim, instr->is_array); -} - /* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL. * * SI-CI: @@ -3313,43 +3258,41 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr) { LLVMValueRef result = NULL; struct ac_image_args args = { 0 }; - unsigned dmask = 0xf; - LLVMValueRef address[16]; - LLVMValueRef coords[5]; - LLVMValueRef coord = NULL, lod = NULL, comparator = NULL; - LLVMValueRef bias = NULL, offsets = NULL; - LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL, sample_index = NULL; + LLVMValueRef fmask_ptr = NULL, sample_index = NULL; LLVMValueRef ddx = NULL, ddy = NULL; - LLVMValueRef derivs[6]; - unsigned chan, count = 0; - unsigned const_src = 0, num_deriv_comp = 0; - bool lod_is_zero = false; + unsigned offset_src = 0; - tex_fetch_ptrs(ctx, instr, &res_ptr, &samp_ptr, &fmask_ptr); + tex_fetch_ptrs(ctx, instr, &args.resource, &args.sampler, &fmask_ptr); for (unsigned i = 0; i < instr->num_srcs; i++) { switch (instr->src[i].src_type) { - case nir_tex_src_coord: - coord = get_src(ctx, instr->src[i].src); + case nir_tex_src_coord: { + LLVMValueRef coord = get_src(ctx, instr->src[i].src); + for (unsigned chan = 0; chan < instr->coord_components; ++chan) + args.coords[chan] = ac_llvm_extract_elem(&ctx->ac, coord, chan); break; + } case nir_tex_src_projector: break; case nir_tex_src_comparator: - comparator = get_src(ctx, instr->src[i].src); + if (instr->is_shadow) + args.compare = get_src(ctx, instr->src[i].src); break; case nir_tex_src_offset: - offsets = get_src(ctx, instr->src[i].src); - const_src = i; + args.offset = get_src(ctx, instr->src[i].src); + offset_src = i; break; case nir_tex_src_bias: - bias = get_src(ctx, instr->src[i].src); + if (instr->op == nir_texop_txb) + args.bias = get_src(ctx, instr->src[i].src); break; case nir_tex_src_lod: { nir_const_value *val = nir_src_as_const_value(instr->src[i].src); if (val && val->i32[0] == 0) - lod_is_zero = true; - lod = get_src(ctx, instr->src[i].src); + args.level_zero = true; + else + args.lod = get_src(ctx, instr->src[i].src); break; } case nir_tex_src_ms_index: @@ -3359,7 +3302,6 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr) break; case nir_tex_src_ddx: ddx = get_src(ctx, instr->src[i].src); - num_deriv_comp = instr->src[i].src.ssa->num_components; break; case nir_tex_src_ddy: ddy = get_src(ctx, instr->src[i].src); @@ -3373,13 +3315,13 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr) } if (instr->op == nir_texop_txs && instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) { - result = get_buffer_size(ctx, res_ptr, true); + result = get_buffer_size(ctx, args.resource, true); goto write_result; } if (instr->op == nir_texop_texture_samples) { LLVMValueRef res, samples, is_msaa; - res = LLVMBuildBitCast(ctx->ac.builder, res_ptr, ctx->ac.v8i32, ""); + res = LLVMBuildBitCast(ctx->ac.builder, args.resource, ctx->ac.v8i32, ""); samples = LLVMBuildExtractElement(ctx->ac.builder, res, LLVMConstInt(ctx->ac.i32, 3, false), ""); is_msaa = LLVMBuildLShr(ctx->ac.builder, samples, @@ -3401,18 +3343,14 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr) goto write_result; } - if (coord) - for (chan = 0; chan < instr->coord_components; chan++) - coords[chan] = ac_llvm_extract_elem(&ctx->ac, coord, chan); - - if (offsets && instr->op != nir_texop_txf) { + if (args.offset && instr->op != nir_texop_txf) { LLVMValueRef offset[3], pack; - for (chan = 0; chan < 3; ++chan) + for (unsigned chan = 0; chan < 3; ++chan) offset[chan] = ctx->ac.i32_0; - args.offset = true; - for (chan = 0; chan < ac_get_llvm_num_components(offsets); chan++) { - offset[chan] = ac_llvm_extract_elem(&ctx->ac, offsets, chan); + unsigned num_components = ac_get_llvm_num_components(args.offset); + for (unsigned chan = 0; chan < num_components; chan++) { + offset[chan] = ac_llvm_extract_elem(&ctx->ac, args.offset, chan); offset[chan] = LLVMBuildAnd(ctx->ac.builder, offset[chan], LLVMConstInt(ctx->ac.i32, 0x3f, false), ""); if (chan) @@ -3421,31 +3359,18 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr) } pack = LLVMBuildOr(ctx->ac.builder, offset[0], offset[1], ""); pack = LLVMBuildOr(ctx->ac.builder, pack, offset[2], ""); - address[count++] = pack; - + args.offset = pack; } - /* pack LOD bias value */ - if (instr->op == nir_texop_txb && bias) { - address[count++] = bias; - } - - /* Pack depth comparison value */ - if (instr->is_shadow && comparator) { - LLVMValueRef z = ac_to_float(&ctx->ac, - ac_llvm_extract_elem(&ctx->ac, comparator, 0)); - - /* TC-compatible HTILE on radeonsi promotes Z16 and Z24 to Z32_FLOAT, - * so the depth comparison value isn't clamped for Z16 and - * Z24 anymore. Do it manually here. - * - * It's unnecessary if the original texture format was - * Z32_FLOAT, but we don't know that here. - */ - if (ctx->ac.chip_class == VI && ctx->abi->clamp_shadow_reference) - z = ac_build_clamp(&ctx->ac, z); - address[count++] = z; - } + /* TC-compatible HTILE on radeonsi promotes Z16 and Z24 to Z32_FLOAT, + * so the depth comparison value isn't clamped for Z16 and + * Z24 anymore. Do it manually here. + * + * It's unnecessary if the original texture format was + * Z32_FLOAT, but we don't know that here. + */ + if (args.compare && ctx->ac.chip_class == VI && ctx->abi->clamp_shadow_reference) + args.compare = ac_build_clamp(&ctx->ac, ac_to_float(&ctx->ac, args.compare)); /* pack derivatives */ if (ddx || ddy) { @@ -3453,7 +3378,6 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr) switch (instr->sampler_dim) { case GLSL_SAMPLER_DIM_3D: case GLSL_SAMPLER_DIM_CUBE: - num_deriv_comp = 3; num_src_deriv_channels = 3; num_dest_deriv_channels = 3; break; @@ -3461,121 +3385,76 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr) default: num_src_deriv_channels = 2; num_dest_deriv_channels = 2; - num_deriv_comp = 2; break; case GLSL_SAMPLER_DIM_1D: num_src_deriv_channels = 1; if (ctx->ac.chip_class >= GFX9) { num_dest_deriv_channels = 2; - num_deriv_comp = 2; } else { num_dest_deriv_channels = 1; - num_deriv_comp = 1; } break; } for (unsigned i = 0; i < num_src_deriv_channels; i++) { - derivs[i] = ac_to_float(&ctx->ac, ac_llvm_extract_elem(&ctx->ac, ddx, i)); - derivs[num_dest_deriv_channels + i] = ac_to_float(&ctx->ac, ac_llvm_extract_elem(&ctx->ac, ddy, i)); + args.derivs[i] = ac_to_float(&ctx->ac, + ac_llvm_extract_elem(&ctx->ac, ddx, i)); + args.derivs[num_dest_deriv_channels + i] = ac_to_float(&ctx->ac, + ac_llvm_extract_elem(&ctx->ac, ddy, i)); } for (unsigned i = num_src_deriv_channels; i < num_dest_deriv_channels; i++) { - derivs[i] = ctx->ac.f32_0; - derivs[num_dest_deriv_channels + i] = ctx->ac.f32_0; + args.derivs[i] = ctx->ac.f32_0; + args.derivs[num_dest_deriv_channels + i] = ctx->ac.f32_0; } } - if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && coord) { - for (chan = 0; chan < instr->coord_components; chan++) - coords[chan] = ac_to_float(&ctx->ac, coords[chan]); + if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && args.coords[0]) { + for (unsigned chan = 0; chan < instr->coord_components; chan++) + args.coords[chan] = ac_to_float(&ctx->ac, args.coords[chan]); if (instr->coord_components == 3) - coords[3] = LLVMGetUndef(ctx->ac.f32); + args.coords[3] = LLVMGetUndef(ctx->ac.f32); ac_prepare_cube_coords(&ctx->ac, instr->op == nir_texop_txd, instr->is_array, - instr->op == nir_texop_lod, coords, derivs); - if (num_deriv_comp) - num_deriv_comp--; + instr->op == nir_texop_lod, args.coords, args.derivs); } - if (ddx || ddy) { - for (unsigned i = 0; i < num_deriv_comp * 2; i++) - address[count++] = derivs[i]; - } - - /* Pack texture coordinates */ - if (coord) { - address[count++] = coords[0]; - if (instr->coord_components > 1) { - if (instr->sampler_dim == GLSL_SAMPLER_DIM_1D && instr->is_array && instr->op != nir_texop_txf) { - coords[1] = apply_round_slice(&ctx->ac, coords[1]); - } - address[count++] = coords[1]; - } - if (instr->coord_components > 2) { - if ((instr->sampler_dim == GLSL_SAMPLER_DIM_2D || - instr->sampler_dim == GLSL_SAMPLER_DIM_MS || - instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS || - instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS) && - instr->is_array && - instr->op != nir_texop_txf && instr->op != nir_texop_txf_ms) { - coords[2] = apply_round_slice(&ctx->ac, coords[2]); - } - address[count++] = coords[2]; - } - - if (ctx->ac.chip_class >= GFX9) { - LLVMValueRef filler; - if (instr->op == nir_texop_txf) - filler = ctx->ac.i32_0; - else - filler = LLVMConstReal(ctx->ac.f32, 0.5); - - if (instr->sampler_dim == GLSL_SAMPLER_DIM_1D) { - /* No nir_texop_lod, because it does not take a slice - * even with array textures. */ - if (instr->is_array && instr->op != nir_texop_lod ) { - address[count] = address[count - 1]; - address[count - 1] = filler; - count++; - } else - address[count++] = filler; - } - } + /* Texture coordinates fixups */ + if (instr->coord_components > 2 && + (instr->sampler_dim == GLSL_SAMPLER_DIM_2D || + instr->sampler_dim == GLSL_SAMPLER_DIM_MS || + instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS || + instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS) && + instr->is_array && + instr->op != nir_texop_txf && instr->op != nir_texop_txf_ms) { + args.coords[2] = apply_round_slice(&ctx->ac, args.coords[2]); } - /* Pack LOD */ - if (lod && ((instr->op == nir_texop_txl || instr->op == nir_texop_txf) && !lod_is_zero)) { - address[count++] = lod; - } else if (instr->op == nir_texop_txf_ms && sample_index) { - address[count++] = sample_index; - } else if(instr->op == nir_texop_txs) { - count = 0; - if (lod) - address[count++] = lod; + if (ctx->ac.chip_class >= GFX9 && + instr->sampler_dim == GLSL_SAMPLER_DIM_1D && + instr->op != nir_texop_lod) { + LLVMValueRef filler; + if (instr->op == nir_texop_txf) + filler = ctx->ac.i32_0; else - address[count++] = ctx->ac.i32_0; - } + filler = LLVMConstReal(ctx->ac.f32, 0.5); - for (chan = 0; chan < count; chan++) { - address[chan] = LLVMBuildBitCast(ctx->ac.builder, - address[chan], ctx->ac.i32, ""); + if (instr->is_array) + args.coords[2] = args.coords[1]; + args.coords[1] = filler; } + /* Pack sample index */ + if (instr->op == nir_texop_txf_ms && sample_index) + args.coords[instr->coord_components] = sample_index; + if (instr->op == nir_texop_samples_identical) { - LLVMValueRef txf_address[4]; struct ac_image_args txf_args = { 0 }; - unsigned txf_count = count; - memcpy(txf_address, address, sizeof(txf_address)); - - if (!instr->is_array) - txf_address[2] = ctx->ac.i32_0; - txf_address[3] = ctx->ac.i32_0; - - set_tex_fetch_args(&ctx->ac, &txf_args, instr, nir_texop_txf, - fmask_ptr, NULL, - txf_address, txf_count, 0xf); + memcpy(txf_args.coords, args.coords, sizeof(txf_args.coords)); - result = build_tex_intrinsic(ctx, instr, false, &txf_args); + txf_args.dmask = 0xf; + txf_args.resource = fmask_ptr; + txf_args.dim = instr->is_array ? ac_image_2darray : ac_image_2d; + result = build_tex_intrinsic(ctx, instr, &txf_args); result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, ""); result = emit_int_cmp(&ctx->ac, LLVMIntEQ, result, ctx->ac.i32_0); @@ -3585,42 +3464,38 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr) if (instr->sampler_dim == GLSL_SAMPLER_DIM_MS && instr->op != nir_texop_txs) { unsigned sample_chan = instr->is_array ? 3 : 2; - address[sample_chan] = adjust_sample_index_using_fmask(&ctx->ac, - address[0], - address[1], - instr->is_array ? address[2] : NULL, - address[sample_chan], - fmask_ptr); + args.coords[sample_chan] = adjust_sample_index_using_fmask( + &ctx->ac, args.coords[0], args.coords[1], + instr->is_array ? args.coords[2] : NULL, + args.coords[sample_chan], fmask_ptr); } - if (offsets && instr->op == nir_texop_txf) { + if (args.offset && instr->op == nir_texop_txf) { nir_const_value *const_offset = - nir_src_as_const_value(instr->src[const_src].src); - int num_offsets = instr->src[const_src].src.ssa->num_components; + nir_src_as_const_value(instr->src[offset_src].src); + int num_offsets = instr->src[offset_src].src.ssa->num_components; assert(const_offset); num_offsets = MIN2(num_offsets, instr->coord_components); - if (num_offsets > 2) - address[2] = LLVMBuildAdd(ctx->ac.builder, - address[2], LLVMConstInt(ctx->ac.i32, const_offset->i32[2], false), ""); - if (num_offsets > 1) - address[1] = LLVMBuildAdd(ctx->ac.builder, - address[1], LLVMConstInt(ctx->ac.i32, const_offset->i32[1], false), ""); - address[0] = LLVMBuildAdd(ctx->ac.builder, - address[0], LLVMConstInt(ctx->ac.i32, const_offset->i32[0], false), ""); - + for (unsigned i = 0; i < num_offsets; ++i) { + args.coords[i] = LLVMBuildAdd( + ctx->ac.builder, args.coords[i], + LLVMConstInt(ctx->ac.i32, const_offset->i32[i], false), ""); + } + args.offset = NULL; } /* TODO TG4 support */ + args.dmask = 0xf; if (instr->op == nir_texop_tg4) { if (instr->is_shadow) - dmask = 1; + args.dmask = 1; else - dmask = 1 << instr->component; + args.dmask = 1 << instr->component; } - set_tex_fetch_args(&ctx->ac, &args, instr, instr->op, - res_ptr, samp_ptr, address, count, dmask); - result = build_tex_intrinsic(ctx, instr, lod_is_zero, &args); + if (instr->sampler_dim != GLSL_SAMPLER_DIM_BUF) + args.dim = get_ac_sampler_dim(&ctx->ac, instr->sampler_dim, instr->is_array); + result = build_tex_intrinsic(ctx, instr, &args); if (instr->op == nir_texop_query_levels) result = LLVMBuildExtractElement(ctx->ac.builder, result, LLVMConstInt(ctx->ac.i32, 3, false), ""); |