amd/common: pass address components individually to ac_build_image_intrinsic

This is in preparation for the new image intrinsics. Acked-by: Marek Olšák <marek.olsak@amd.com>
author: Nicolai Hähnle <nicolai.haehnle@amd.com> 2018-03-23 11:20:24 +0100
committer: Nicolai Hähnle <nicolai.haehnle@amd.com> 2018-04-20 09:23:52 +0200
commit: 625dcbbc45665459737c9d028f268fd6782472f3 (patch)
tree: 2350e1d94b1f6ab348d755867ec7b5c910b42305 /src/amd
parent: f931583828f0ca9a3b135da0f2cda6a36ebbc877 (diff)
3 files changed, 216 insertions, 264 deletions
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 77b0798943..22aac7cbdb 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -37,6 +37,7 @@
 #include "util/bitscan.h"
 #include "util/macros.h"
 #include "util/u_atomic.h"
+#include "util/u_math.h"
 #include "sid.h"
 
 #include "shader_enums.h"
@@ -1445,14 +1446,61 @@ void ac_build_export_null(struct ac_llvm_context *ctx)
 	ac_build_export(ctx, &args);
 }
 
+static unsigned ac_num_coords(enum ac_image_dim dim)
+{
+	switch (dim) {
+	case ac_image_1d:
+		return 1;
+	case ac_image_2d:
+	case ac_image_1darray:
+		 return 2;
+	case ac_image_3d:
+	case ac_image_cube:
+	case ac_image_2darray:
+	case ac_image_2dmsaa:
+		return 3;
+	case ac_image_2darraymsaa:
+		return 4;
+	default:
+		unreachable("ac_num_coords: bad dim");
+	}
+}
+
+static unsigned ac_num_derivs(enum ac_image_dim dim)
+{
+	switch (dim) {
+	case ac_image_1d:
+	case ac_image_1darray:
+		return 2;
+	case ac_image_2d:
+	case ac_image_2darray:
+	case ac_image_cube:
+		return 4;
+	case ac_image_3d:
+		return 6;
+	case ac_image_2dmsaa:
+	case ac_image_2darraymsaa:
+	default:
+		unreachable("derivatives not supported");
+	}
+}
+
 LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
 				   struct ac_image_args *a)
 {
-	LLVMValueRef args[11];
-	unsigned num_args = 0;
+	LLVMValueRef args[16];
 	const char *name = NULL;
 	char intr_name[128], type[64];
 
+	assert(!a->lod || a->lod == ctx->i32_0 || a->lod == ctx->f32_0 ||
+	       !a->level_zero);
+	assert((a->opcode != ac_image_get_resinfo && a->opcode != ac_image_load_mip) ||
+	       a->lod);
+	assert((a->bias ? 1 : 0) +
+	       (a->lod ? 1 : 0) +
+	       (a->level_zero ? 1 : 0) +
+	       (a->derivs[0] ? 1 : 0) <= 1);
+
 	bool sample = a->opcode == ac_image_sample ||
 		      a->opcode == ac_image_gather4 ||
 		      a->opcode == ac_image_get_lod;
@@ -1463,10 +1511,38 @@ LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
 	if (a->opcode == ac_image_get_lod)
 		da = false;
 
+	unsigned num_coords =
+		a->opcode != ac_image_get_resinfo ? ac_num_coords(a->dim) : 0;
+	LLVMValueRef addr;
+	unsigned num_addr = 0;
+
+	if (a->offset)
+		args[num_addr++] = ac_to_integer(ctx, a->offset);
+	if (a->bias)
+		args[num_addr++] = ac_to_integer(ctx, a->bias);
+	if (a->compare)
+		args[num_addr++] = ac_to_integer(ctx, a->compare);
+	if (a->derivs[0]) {
+		unsigned num_derivs = ac_num_derivs(a->dim);
+		for (unsigned i = 0; i < num_derivs; ++i)
+			args[num_addr++] = ac_to_integer(ctx, a->derivs[i]);
+	}
+	for (unsigned i = 0; i < num_coords; ++i)
+		args[num_addr++] = ac_to_integer(ctx, a->coords[i]);
+	if (a->lod)
+		args[num_addr++] = ac_to_integer(ctx, a->lod);
+
+	unsigned pad_goal = util_next_power_of_two(num_addr);
+	while (num_addr < pad_goal)
+		args[num_addr++] = LLVMGetUndef(ctx->i32);
+
+	addr = ac_build_gather_values(ctx, args, num_addr);
+
+	unsigned num_args = 0;
 	if (sample)
-		args[num_args++] = ac_to_float(ctx, a->addr);
+		args[num_args++] = ac_to_float(ctx, addr);
 	else
-		args[num_args++] = a->addr;
+		args[num_args++] = ac_to_integer(ctx, addr);
 
 	args[num_args++] = a->resource;
 	if (sample)
@@ -1505,12 +1581,15 @@ LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
 	ac_build_type_name_for_intr(LLVMTypeOf(args[0]), type,
 				    sizeof(type));
 
+	bool lod_suffix =
+		a->lod && (a->opcode == ac_image_sample || a->opcode == ac_image_gather4);
+
 	snprintf(intr_name, sizeof(intr_name), "%s%s%s%s.v4f32.%s.v8i32",
 		name,
 		a->compare ? ".c" : "",
 		a->bias ? ".b" :
-		a->lod ? ".l" :
-		a->deriv ? ".d" :
+		lod_suffix ? ".l" :
+		a->derivs[0] ? ".d" :
 		a->level_zero ? ".lz" : "",
 		a->offset ? ".o" : "",
 		type);
@@ -2481,12 +2560,10 @@ void ac_apply_fmask_to_sample(struct ac_llvm_context *ac, LLVMValueRef fmask,
 	fmask_load.dmask = 0xf;
 	fmask_load.dim = is_array_tex ? ac_image_2darray : ac_image_2d;
 
-	LLVMValueRef fmask_addr[4];
-	memcpy(fmask_addr, addr, sizeof(fmask_addr[0]) * 3);
-	fmask_addr[3] = LLVMGetUndef(ac->i32);
-
-	fmask_load.addr = ac_build_gather_values(ac, fmask_addr,
-						 is_array_tex ? 4 : 2);
+	fmask_load.coords[0] = addr[0];
+	fmask_load.coords[1] = addr[1];
+	if (is_array_tex)
+		fmask_load.coords[2] = addr[2];
 
 	LLVMValueRef fmask_value = ac_build_image_opcode(ac, &fmask_load);
 	fmask_value = LLVMBuildExtractElement(ac->builder, fmask_value,
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index 328eddc9a7..1691a80938 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -331,18 +331,18 @@ enum ac_image_dim {
 struct ac_image_args {
 	enum ac_image_opcode opcode;
 	enum ac_image_dim dim;
-	bool level_zero;
-	bool bias;
-	bool lod;
-	bool deriv;
-	bool compare;
-	bool offset;
 
 	LLVMValueRef resource;
 	LLVMValueRef sampler;
-	LLVMValueRef addr;
+	LLVMValueRef offset;
+	LLVMValueRef bias;
+	LLVMValueRef compare;
+	LLVMValueRef derivs[6];
+	LLVMValueRef coords[4];
+	LLVMValueRef lod; // also used by ac_image_get_resinfo
 	unsigned dmask;
 	bool unorm;
+	bool level_zero;
 };
 
 LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index de3754d72b..a0e1837999 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1152,12 +1152,9 @@ static LLVMValueRef lower_gather4_integer(struct ac_llvm_context *ctx,
 					  const nir_tex_instr *instr)
 {
 	enum glsl_base_type stype = glsl_get_sampler_result_type(instr->texture->var->type);
-	LLVMValueRef coord = args->addr;
 	LLVMValueRef half_texel[2];
 	LLVMValueRef compare_cube_wa = NULL;
 	LLVMValueRef result;
-	int c;
-	unsigned coord_vgpr_index = (unsigned)args->offset + (unsigned)args->compare;
 
 	//TODO Rect
 	{
@@ -1166,11 +1163,11 @@ static LLVMValueRef lower_gather4_integer(struct ac_llvm_context *ctx,
 		txq_args.dim = get_ac_sampler_dim(ctx, instr->sampler_dim, instr->is_array);
 		txq_args.opcode = ac_image_get_resinfo;
 		txq_args.dmask = 0xf;
-		txq_args.addr = ctx->i32_0;
+		txq_args.lod = ctx->i32_0;
 		txq_args.resource = args->resource;
 		LLVMValueRef size = ac_build_image_opcode(ctx, &txq_args);
 
-		for (c = 0; c < 2; c++) {
+		for (unsigned c = 0; c < 2; c++) {
 			half_texel[c] = LLVMBuildExtractElement(ctx->builder, size,
 								LLVMConstInt(ctx->i32, c, false), "");
 			half_texel[c] = LLVMBuildUIToFP(ctx->builder, half_texel[c], ctx->f32, "");
@@ -1180,19 +1177,14 @@ static LLVMValueRef lower_gather4_integer(struct ac_llvm_context *ctx,
 		}
 	}
 
-	LLVMValueRef orig_coords = args->addr;
+	LLVMValueRef orig_coords[2] = { args->coords[0], args->coords[1] };
 
-	for (c = 0; c < 2; c++) {
+	for (unsigned c = 0; c < 2; c++) {
 		LLVMValueRef tmp;
-		LLVMValueRef index = LLVMConstInt(ctx->i32, coord_vgpr_index + c, 0);
-		tmp = LLVMBuildExtractElement(ctx->builder, coord, index, "");
-		tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, "");
-		tmp = LLVMBuildFAdd(ctx->builder, tmp, half_texel[c], "");
-		tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, "");
-		coord = LLVMBuildInsertElement(ctx->builder, coord, tmp, index, "");
+		tmp = LLVMBuildBitCast(ctx->builder, args->coords[c], ctx->f32, "");
+		args->coords[c] = LLVMBuildFAdd(ctx->builder, tmp, half_texel[c], "");
 	}
 
-
 	/*
 	 * Apparantly cube has issue with integer types that the workaround doesn't solve,
 	 * so this tests if the format is 8_8_8_8 and an integer type do an alternate
@@ -1236,16 +1228,18 @@ static LLVMValueRef lower_gather4_integer(struct ac_llvm_context *ctx,
 		args->resource = LLVMBuildInsertElement(ctx->builder, args->resource, tmp2, ctx->i32_1, "");
 
 		/* don't modify the coordinates for this case */
-		coord = LLVMBuildSelect(ctx->builder, compare_cube_wa, orig_coords, coord, "");
+		for (unsigned c = 0; c < 2; ++c)
+			args->coords[c] = LLVMBuildSelect(
+				ctx->builder, compare_cube_wa,
+				orig_coords[c], args->coords[c], "");
 	}
-	args->addr = coord;
 	result = ac_build_image_opcode(ctx, args);
 
 	if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
 		LLVMValueRef tmp, tmp2;
 
 		/* if the cube workaround is in place, f2i the result. */
-		for (c = 0; c < 4; c++) {
+		for (unsigned c = 0; c < 4; c++) {
 			tmp = LLVMBuildExtractElement(ctx->builder, result, LLVMConstInt(ctx->i32, c, false), "");
 			if (stype == GLSL_TYPE_UINT)
 				tmp2 = LLVMBuildFPToUI(ctx->builder, tmp, ctx->i32, "");
@@ -1263,7 +1257,6 @@ static LLVMValueRef lower_gather4_integer(struct ac_llvm_context *ctx,
 
 static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx,
 					const nir_tex_instr *instr,
-					bool lod_is_zero,
 					struct ac_image_args *args)
 {
 	if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
@@ -1272,14 +1265,14 @@ static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx,
 		if (ctx->abi->gfx9_stride_size_workaround) {
 			return ac_build_buffer_load_format_gfx9_safe(&ctx->ac,
 			                                             args->resource,
-			                                             args->addr,
+			                                             args->coords[0],
 			                                             ctx->ac.i32_0,
 			                                             util_last_bit(mask),
 			                                             false, true);
 		} else {
 			return ac_build_buffer_load_format(&ctx->ac,
 			                                   args->resource,
-			                                   args->addr,
+			                                   args->coords[0],
 			                                   ctx->ac.i32_0,
 			                                   util_last_bit(mask),
 			                                   false, true);
@@ -1287,37 +1280,28 @@ static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx,
 	}
 
 	args->opcode = ac_image_sample;
-	args->compare = instr->is_shadow;
 
 	switch (instr->op) {
 	case nir_texop_txf:
 	case nir_texop_txf_ms:
 	case nir_texop_samples_identical:
-		args->opcode = lod_is_zero ||
+		args->opcode = args->level_zero ||
 			       instr->sampler_dim == GLSL_SAMPLER_DIM_MS ?
 					ac_image_load : ac_image_load_mip;
-		args->compare = false;
-		args->offset = false;
-		break;
-	case nir_texop_txb:
-		args->bias = true;
-		break;
-	case nir_texop_txl:
-		if (lod_is_zero)
-			args->level_zero = true;
-		else
-			args->lod = true;
+		args->level_zero = false;
 		break;
 	case nir_texop_txs:
 	case nir_texop_query_levels:
 		args->opcode = ac_image_get_resinfo;
+		if (!args->lod)
+			args->lod = ctx->ac.i32_0;
+		args->level_zero = false;
 		break;
 	case nir_texop_tex:
-		if (ctx->stage != MESA_SHADER_FRAGMENT)
+		if (ctx->stage != MESA_SHADER_FRAGMENT) {
+			assert(!args->lod);
 			args->level_zero = true;
-		break;
-	case nir_texop_txd:
-		args->deriv = true;
+		}
 		break;
 	case nir_texop_tg4:
 		args->opcode = ac_image_gather4;
@@ -1325,8 +1309,6 @@ static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx,
 		break;
 	case nir_texop_lod:
 		args->opcode = ac_image_get_lod;
-		args->compare = false;
-		args->offset = false;
 		break;
 	default:
 		break;
@@ -2081,23 +2063,18 @@ static LLVMValueRef adjust_sample_index_using_fmask(struct ac_llvm_context *ctx,
 						    LLVMValueRef sample_index,
 						    LLVMValueRef fmask_desc_ptr)
 {
-	LLVMValueRef fmask_load_address[4];
+	struct ac_image_args args = {0};
 	LLVMValueRef res;
 
-	fmask_load_address[0] = coord_x;
-	fmask_load_address[1] = coord_y;
-	if (coord_z) {
-		fmask_load_address[2] = coord_z;
-		fmask_load_address[3] = LLVMGetUndef(ctx->i32);
-	}
-
-	struct ac_image_args args = {0};
+	args.coords[0] = coord_x;
+	args.coords[1] = coord_y;
+	if (coord_z)
+		args.coords[2] = coord_z;
 
 	args.opcode = ac_image_load;
 	args.dim = coord_z ? ac_image_2darray : ac_image_2d;
 	args.resource = fmask_desc_ptr;
 	args.dmask = 0xf;
-	args.addr = ac_build_gather_values(ctx, fmask_load_address, coord_z ? 4 : 2);
 
 	res = ac_build_image_opcode(ctx, &args);
 
@@ -2447,7 +2424,7 @@ static LLVMValueRef visit_image_samples(struct ac_nir_context *ctx,
 	args.resource = get_sampler_desc(ctx, instr->variables[0],
 					 AC_DESC_IMAGE, NULL, true, false);
 	args.opcode = ac_image_get_resinfo;
-	args.addr = ctx->ac.i32_0;
+	args.lod = ctx->ac.i32_0;
 
 	return ac_build_image_opcode(&ctx->ac, &args);
 }
@@ -2471,7 +2448,7 @@ static LLVMValueRef visit_image_size(struct ac_nir_context *ctx,
 	args.dmask = 0xf;
 	args.resource = get_sampler_desc(ctx, instr->variables[0], AC_DESC_IMAGE, NULL, true, false);
 	args.opcode = ac_image_get_resinfo;
-	args.addr = ctx->ac.i32_0;
+	args.lod = ctx->ac.i32_0;
 
 	res = ac_build_image_opcode(&ctx->ac, &args);
 
@@ -3217,38 +3194,6 @@ static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx,
 					  desc_type, image, write, bindless);
 }
 
-static void set_tex_fetch_args(struct ac_llvm_context *ctx,
-			       struct ac_image_args *args,
-			       const nir_tex_instr *instr,
-			       nir_texop op,
-			       LLVMValueRef res_ptr, LLVMValueRef samp_ptr,
-			       LLVMValueRef *param, unsigned count,
-			       unsigned dmask)
-{
-	unsigned is_rect = 0;
-
-	/* Pad to power of two vector */
-	while (count < util_next_power_of_two(count))
-		param[count++] = LLVMGetUndef(ctx->i32);
-
-	if (count > 1)
-		args->addr = ac_build_gather_values(ctx, param, count);
-	else
-		args->addr = param[0];
-
-	args->resource = res_ptr;
-	args->sampler = samp_ptr;
-
-	if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF && op == nir_texop_txf) {
-		args->addr = param[0];
-		return;
-	}
-
-	args->dmask = dmask;
-	args->unorm = is_rect;
-	args->dim = get_ac_sampler_dim(&ctx->ac, instr->sampler_dim, instr->is_array);
-}
-
 /* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL.
  *
  * SI-CI:
@@ -3313,43 +3258,41 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
 {
 	LLVMValueRef result = NULL;
 	struct ac_image_args args = { 0 };
-	unsigned dmask = 0xf;
-	LLVMValueRef address[16];
-	LLVMValueRef coords[5];
-	LLVMValueRef coord = NULL, lod = NULL, comparator = NULL;
-	LLVMValueRef bias = NULL, offsets = NULL;
-	LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL, sample_index = NULL;
+	LLVMValueRef fmask_ptr = NULL, sample_index = NULL;
 	LLVMValueRef ddx = NULL, ddy = NULL;
-	LLVMValueRef derivs[6];
-	unsigned chan, count = 0;
-	unsigned const_src = 0, num_deriv_comp = 0;
-	bool lod_is_zero = false;
+	unsigned offset_src = 0;
 
-	tex_fetch_ptrs(ctx, instr, &res_ptr, &samp_ptr, &fmask_ptr);
+	tex_fetch_ptrs(ctx, instr, &args.resource, &args.sampler, &fmask_ptr);
 
 	for (unsigned i = 0; i < instr->num_srcs; i++) {
 		switch (instr->src[i].src_type) {
-		case nir_tex_src_coord:
-			coord = get_src(ctx, instr->src[i].src);
+		case nir_tex_src_coord: {
+			LLVMValueRef coord = get_src(ctx, instr->src[i].src);
+			for (unsigned chan = 0; chan < instr->coord_components; ++chan)
+				args.coords[chan] = ac_llvm_extract_elem(&ctx->ac, coord, chan);
 			break;
+		}
 		case nir_tex_src_projector:
 			break;
 		case nir_tex_src_comparator:
-			comparator = get_src(ctx, instr->src[i].src);
+			if (instr->is_shadow)
+				args.compare = get_src(ctx, instr->src[i].src);
 			break;
 		case nir_tex_src_offset:
-			offsets = get_src(ctx, instr->src[i].src);
-			const_src = i;
+			args.offset = get_src(ctx, instr->src[i].src);
+			offset_src = i;
 			break;
 		case nir_tex_src_bias:
-			bias = get_src(ctx, instr->src[i].src);
+			if (instr->op == nir_texop_txb)
+				args.bias = get_src(ctx, instr->src[i].src);
 			break;
 		case nir_tex_src_lod: {
 			nir_const_value *val = nir_src_as_const_value(instr->src[i].src);
 
 			if (val && val->i32[0] == 0)
-				lod_is_zero = true;
-			lod = get_src(ctx, instr->src[i].src);
+				args.level_zero = true;
+			else
+				args.lod = get_src(ctx, instr->src[i].src);
 			break;
 		}
 		case nir_tex_src_ms_index:
@@ -3359,7 +3302,6 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
 			break;
 		case nir_tex_src_ddx:
 			ddx = get_src(ctx, instr->src[i].src);
-			num_deriv_comp = instr->src[i].src.ssa->num_components;
 			break;
 		case nir_tex_src_ddy:
 			ddy = get_src(ctx, instr->src[i].src);
@@ -3373,13 +3315,13 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
 	}
 
 	if (instr->op == nir_texop_txs && instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
-		result = get_buffer_size(ctx, res_ptr, true);
+		result = get_buffer_size(ctx, args.resource, true);
 		goto write_result;
 	}
 
 	if (instr->op == nir_texop_texture_samples) {
 		LLVMValueRef res, samples, is_msaa;
-		res = LLVMBuildBitCast(ctx->ac.builder, res_ptr, ctx->ac.v8i32, "");
+		res = LLVMBuildBitCast(ctx->ac.builder, args.resource, ctx->ac.v8i32, "");
 		samples = LLVMBuildExtractElement(ctx->ac.builder, res,
 						  LLVMConstInt(ctx->ac.i32, 3, false), "");
 		is_msaa = LLVMBuildLShr(ctx->ac.builder, samples,
@@ -3401,18 +3343,14 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
 		goto write_result;
 	}
 
-	if (coord)
-		for (chan = 0; chan < instr->coord_components; chan++)
-			coords[chan] = ac_llvm_extract_elem(&ctx->ac, coord, chan);
-
-	if (offsets && instr->op != nir_texop_txf) {
+	if (args.offset && instr->op != nir_texop_txf) {
 		LLVMValueRef offset[3], pack;
-		for (chan = 0; chan < 3; ++chan)
+		for (unsigned chan = 0; chan < 3; ++chan)
 			offset[chan] = ctx->ac.i32_0;
 
-		args.offset = true;
-		for (chan = 0; chan < ac_get_llvm_num_components(offsets); chan++) {
-			offset[chan] = ac_llvm_extract_elem(&ctx->ac, offsets, chan);
+		unsigned num_components = ac_get_llvm_num_components(args.offset);
+		for (unsigned chan = 0; chan < num_components; chan++) {
+			offset[chan] = ac_llvm_extract_elem(&ctx->ac, args.offset, chan);
 			offset[chan] = LLVMBuildAnd(ctx->ac.builder, offset[chan],
 						    LLVMConstInt(ctx->ac.i32, 0x3f, false), "");
 			if (chan)
@@ -3421,31 +3359,18 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
 		}
 		pack = LLVMBuildOr(ctx->ac.builder, offset[0], offset[1], "");
 		pack = LLVMBuildOr(ctx->ac.builder, pack, offset[2], "");
-		address[count++] = pack;
-
+		args.offset = pack;
 	}
-	/* pack LOD bias value */
-	if (instr->op == nir_texop_txb && bias) {
-		address[count++] = bias;
-	}
-
-	/* Pack depth comparison value */
-	if (instr->is_shadow && comparator) {
-		LLVMValueRef z = ac_to_float(&ctx->ac,
-		                             ac_llvm_extract_elem(&ctx->ac, comparator, 0));
-
-		/* TC-compatible HTILE on radeonsi promotes Z16 and Z24 to Z32_FLOAT,
-		 * so the depth comparison value isn't clamped for Z16 and
-		 * Z24 anymore. Do it manually here.
-		 *
-		 * It's unnecessary if the original texture format was
-		 * Z32_FLOAT, but we don't know that here.
-		 */
-		if (ctx->ac.chip_class == VI && ctx->abi->clamp_shadow_reference)
-			z = ac_build_clamp(&ctx->ac, z);
 
-		address[count++] = z;
-	}
+	/* TC-compatible HTILE on radeonsi promotes Z16 and Z24 to Z32_FLOAT,
+	 * so the depth comparison value isn't clamped for Z16 and
+	 * Z24 anymore. Do it manually here.
+	 *
+	 * It's unnecessary if the original texture format was
+	 * Z32_FLOAT, but we don't know that here.
+	 */
+	if (args.compare && ctx->ac.chip_class == VI && ctx->abi->clamp_shadow_reference)
+		args.compare = ac_build_clamp(&ctx->ac, ac_to_float(&ctx->ac, args.compare));
 
 	/* pack derivatives */
 	if (ddx || ddy) {
@@ -3453,7 +3378,6 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
 		switch (instr->sampler_dim) {
 		case GLSL_SAMPLER_DIM_3D:
 		case GLSL_SAMPLER_DIM_CUBE:
-			num_deriv_comp = 3;
 			num_src_deriv_channels = 3;
 			num_dest_deriv_channels = 3;
 			break;
@@ -3461,121 +3385,76 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
 		default:
 			num_src_deriv_channels = 2;
 			num_dest_deriv_channels = 2;
-			num_deriv_comp = 2;
 			break;
 		case GLSL_SAMPLER_DIM_1D:
 			num_src_deriv_channels = 1;
 			if (ctx->ac.chip_class >= GFX9) {
 				num_dest_deriv_channels = 2;
-				num_deriv_comp = 2;
 			} else {
 				num_dest_deriv_channels = 1;
-				num_deriv_comp = 1;
 			}
 			break;
 		}
 
 		for (unsigned i = 0; i < num_src_deriv_channels; i++) {
-			derivs[i] = ac_to_float(&ctx->ac, ac_llvm_extract_elem(&ctx->ac, ddx, i));
-			derivs[num_dest_deriv_channels + i] = ac_to_float(&ctx->ac, ac_llvm_extract_elem(&ctx->ac, ddy, i));
+			args.derivs[i] = ac_to_float(&ctx->ac,
+				ac_llvm_extract_elem(&ctx->ac, ddx, i));
+			args.derivs[num_dest_deriv_channels + i] = ac_to_float(&ctx->ac,
+				ac_llvm_extract_elem(&ctx->ac, ddy, i));
 		}
 		for (unsigned i = num_src_deriv_channels; i < num_dest_deriv_channels; i++) {
-			derivs[i] = ctx->ac.f32_0;
-			derivs[num_dest_deriv_channels + i] = ctx->ac.f32_0;
+			args.derivs[i] = ctx->ac.f32_0;
+			args.derivs[num_dest_deriv_channels + i] = ctx->ac.f32_0;
 		}
 	}
 
-	if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && coord) {
-		for (chan = 0; chan < instr->coord_components; chan++)
-			coords[chan] = ac_to_float(&ctx->ac, coords[chan]);
+	if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && args.coords[0]) {
+		for (unsigned chan = 0; chan < instr->coord_components; chan++)
+			args.coords[chan] = ac_to_float(&ctx->ac, args.coords[chan]);
 		if (instr->coord_components == 3)
-			coords[3] = LLVMGetUndef(ctx->ac.f32);
+			args.coords[3] = LLVMGetUndef(ctx->ac.f32);
 		ac_prepare_cube_coords(&ctx->ac,
 			instr->op == nir_texop_txd, instr->is_array,
-			instr->op == nir_texop_lod, coords, derivs);
-		if (num_deriv_comp)
-			num_deriv_comp--;
+			instr->op == nir_texop_lod, args.coords, args.derivs);
 	}
 
-	if (ddx || ddy) {
-		for (unsigned i = 0; i < num_deriv_comp * 2; i++)
-			address[count++] = derivs[i];
-	}
-
-	/* Pack texture coordinates */
-	if (coord) {
-		address[count++] = coords[0];
-		if (instr->coord_components > 1) {
-			if (instr->sampler_dim == GLSL_SAMPLER_DIM_1D && instr->is_array && instr->op != nir_texop_txf) {
-				coords[1] = apply_round_slice(&ctx->ac, coords[1]);
-			}
-			address[count++] = coords[1];
-		}
-		if (instr->coord_components > 2) {
-			if ((instr->sampler_dim == GLSL_SAMPLER_DIM_2D ||
-			     instr->sampler_dim == GLSL_SAMPLER_DIM_MS ||
-			     instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS ||
-			     instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS) &&
-			    instr->is_array &&
-			    instr->op != nir_texop_txf && instr->op != nir_texop_txf_ms) {
-				coords[2] = apply_round_slice(&ctx->ac, coords[2]);
-			}
-			address[count++] = coords[2];
-		}
-
-		if (ctx->ac.chip_class >= GFX9) {
-			LLVMValueRef filler;
-			if (instr->op == nir_texop_txf)
-				filler = ctx->ac.i32_0;
-			else
-				filler = LLVMConstReal(ctx->ac.f32, 0.5);
-
-			if (instr->sampler_dim == GLSL_SAMPLER_DIM_1D) {
-				/* No nir_texop_lod, because it does not take a slice
-				 * even with array textures. */
-				if (instr->is_array && instr->op != nir_texop_lod ) {
-					address[count] = address[count - 1];
-					address[count - 1] = filler;
-					count++;
-				} else
-					address[count++] = filler;
-			}
-		}
+	/* Texture coordinates fixups */
+	if (instr->coord_components > 2 &&
+	    (instr->sampler_dim == GLSL_SAMPLER_DIM_2D ||
+	     instr->sampler_dim == GLSL_SAMPLER_DIM_MS ||
+	     instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS ||
+	     instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS) &&
+	    instr->is_array &&
+	    instr->op != nir_texop_txf && instr->op != nir_texop_txf_ms) {
+		args.coords[2] = apply_round_slice(&ctx->ac, args.coords[2]);
 	}
 
-	/* Pack LOD */
-	if (lod && ((instr->op == nir_texop_txl || instr->op == nir_texop_txf) && !lod_is_zero)) {
-		address[count++] = lod;
-	} else if (instr->op == nir_texop_txf_ms && sample_index) {
-		address[count++] = sample_index;
-	} else if(instr->op == nir_texop_txs) {
-		count = 0;
-		if (lod)
-			address[count++] = lod;
+	if (ctx->ac.chip_class >= GFX9 &&
+	    instr->sampler_dim == GLSL_SAMPLER_DIM_1D &&
+	    instr->op != nir_texop_lod) {
+		LLVMValueRef filler;
+		if (instr->op == nir_texop_txf)
+			filler = ctx->ac.i32_0;
 		else
-			address[count++] = ctx->ac.i32_0;
-	}
+			filler = LLVMConstReal(ctx->ac.f32, 0.5);
 
-	for (chan = 0; chan < count; chan++) {
-		address[chan] = LLVMBuildBitCast(ctx->ac.builder,
-						 address[chan], ctx->ac.i32, "");
+		if (instr->is_array)
+			args.coords[2] = args.coords[1];
+		args.coords[1] = filler;
 	}
 
+	/* Pack sample index */
+	if (instr->op == nir_texop_txf_ms && sample_index)
+		args.coords[instr->coord_components] = sample_index;
+
 	if (instr->op == nir_texop_samples_identical) {
-		LLVMValueRef txf_address[4];
 		struct ac_image_args txf_args = { 0 };
-		unsigned txf_count = count;
-		memcpy(txf_address, address, sizeof(txf_address));
-
-		if (!instr->is_array)
-			txf_address[2] = ctx->ac.i32_0;
-		txf_address[3] = ctx->ac.i32_0;
-
-		set_tex_fetch_args(&ctx->ac, &txf_args, instr, nir_texop_txf,
-				   fmask_ptr, NULL,
-				   txf_address, txf_count, 0xf);
+		memcpy(txf_args.coords, args.coords, sizeof(txf_args.coords));
 
-		result = build_tex_intrinsic(ctx, instr, false, &txf_args);
+		txf_args.dmask = 0xf;
+		txf_args.resource = fmask_ptr;
+		txf_args.dim = instr->is_array ? ac_image_2darray : ac_image_2d;
+		result = build_tex_intrinsic(ctx, instr, &txf_args);
 
 		result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
 		result = emit_int_cmp(&ctx->ac, LLVMIntEQ, result, ctx->ac.i32_0);
@@ -3585,42 +3464,38 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
 	if (instr->sampler_dim == GLSL_SAMPLER_DIM_MS &&
 	    instr->op != nir_texop_txs) {
 		unsigned sample_chan = instr->is_array ? 3 : 2;
-		address[sample_chan] = adjust_sample_index_using_fmask(&ctx->ac,
-								       address[0],
-								       address[1],
-								       instr->is_array ? address[2] : NULL,
-								       address[sample_chan],
-								       fmask_ptr);
+		args.coords[sample_chan] = adjust_sample_index_using_fmask(
+			&ctx->ac, args.coords[0], args.coords[1],
+			instr->is_array ? args.coords[2] : NULL,
+			args.coords[sample_chan], fmask_ptr);
 	}
 
-	if (offsets && instr->op == nir_texop_txf) {
+	if (args.offset && instr->op == nir_texop_txf) {
 		nir_const_value *const_offset =
-			nir_src_as_const_value(instr->src[const_src].src);
-		int num_offsets = instr->src[const_src].src.ssa->num_components;
+			nir_src_as_const_value(instr->src[offset_src].src);
+		int num_offsets = instr->src[offset_src].src.ssa->num_components;
 		assert(const_offset);
 		num_offsets = MIN2(num_offsets, instr->coord_components);
-		if (num_offsets > 2)
-			address[2] = LLVMBuildAdd(ctx->ac.builder,
-						  address[2], LLVMConstInt(ctx->ac.i32, const_offset->i32[2], false), "");
-		if (num_offsets > 1)
-			address[1] = LLVMBuildAdd(ctx->ac.builder,
-						  address[1], LLVMConstInt(ctx->ac.i32, const_offset->i32[1], false), "");
-		address[0] = LLVMBuildAdd(ctx->ac.builder,
-					  address[0], LLVMConstInt(ctx->ac.i32, const_offset->i32[0], false), "");
-
+		for (unsigned i = 0; i < num_offsets; ++i) {
+			args.coords[i] = LLVMBuildAdd(
+				ctx->ac.builder, args.coords[i],
+				LLVMConstInt(ctx->ac.i32, const_offset->i32[i], false), "");
+		}
+		args.offset = NULL;
 	}
 
 	/* TODO TG4 support */
+	args.dmask = 0xf;
 	if (instr->op == nir_texop_tg4) {
 		if (instr->is_shadow)
-			dmask = 1;
+			args.dmask = 1;
 		else
-			dmask = 1 << instr->component;
+			args.dmask = 1 << instr->component;
 	}
-	set_tex_fetch_args(&ctx->ac, &args, instr, instr->op,
-			   res_ptr, samp_ptr, address, count, dmask);
 
-	result = build_tex_intrinsic(ctx, instr, lod_is_zero, &args);
+	if (instr->sampler_dim != GLSL_SAMPLER_DIM_BUF)
+		args.dim = get_ac_sampler_dim(&ctx->ac, instr->sampler_dim, instr->is_array);
+	result = build_tex_intrinsic(ctx, instr, &args);
 
 	if (instr->op == nir_texop_query_levels)
 		result = LLVMBuildExtractElement(ctx->ac.builder, result, LLVMConstInt(ctx->ac.i32, 3, false), "");
author	Nicolai Hähnle <nicolai.haehnle@amd.com>	2018-03-23 11:20:24 +0100
committer	Nicolai Hähnle <nicolai.haehnle@amd.com>	2018-04-20 09:23:52 +0200
commit	625dcbbc45665459737c9d028f268fd6782472f3 (patch)
tree	2350e1d94b1f6ab348d755867ec7b5c910b42305 /src/amd
parent	f931583828f0ca9a3b135da0f2cda6a36ebbc877 (diff)