diff options
-rw-r--r-- | src/gallium/auxiliary/Makefile.sources | 2 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_tgsi.c | 409 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_tgsi.h | 341 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c | 1182 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h | 138 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c | 553 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 1981 |
7 files changed, 2954 insertions, 1652 deletions
diff --git a/src/gallium/auxiliary/Makefile.sources b/src/gallium/auxiliary/Makefile.sources index f55a4eb8451..740e3016452 100644 --- a/src/gallium/auxiliary/Makefile.sources +++ b/src/gallium/auxiliary/Makefile.sources @@ -176,6 +176,8 @@ GALLIVM_SOURCES := \ gallivm/lp_bld_sample_soa.c \ gallivm/lp_bld_struct.c \ gallivm/lp_bld_swizzle.c \ + gallivm/lp_bld_tgsi.c \ + gallivm/lp_bld_tgsi_action.c \ gallivm/lp_bld_tgsi_aos.c \ gallivm/lp_bld_tgsi_info.c \ gallivm/lp_bld_tgsi_soa.c \ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c new file mode 100644 index 00000000000..261301ce542 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c @@ -0,0 +1,409 @@ +/************************************************************************** + * + * Copyright 2011-2012 Advanced Micro Devices, Inc. + * Copyright 2010 VMware, Inc. + * Copyright 2009 VMware, Inc. + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "gallivm/lp_bld_tgsi.h" + +#include "gallivm/lp_bld_arit.h" +#include "gallivm/lp_bld_gather.h" +#include "gallivm/lp_bld_init.h" +#include "gallivm/lp_bld_intr.h" +#include "tgsi/tgsi_info.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" +#include "util/u_memory.h" + +/* The user is responsible for freeing list->instructions */ +unsigned lp_bld_tgsi_list_init(struct lp_build_tgsi_context * bld_base) +{ + bld_base->instructions = (struct tgsi_full_instruction *) + MALLOC( LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction) ); + if (!bld_base->instructions) { + return 0; + } + bld_base->max_instructions = LP_MAX_INSTRUCTIONS; + return 1; +} + + +unsigned lp_bld_tgsi_add_instruction( + struct lp_build_tgsi_context * bld_base, + struct tgsi_full_instruction *inst_to_add) +{ + + if (bld_base->num_instructions == bld_base->max_instructions) { + struct tgsi_full_instruction *instructions; + instructions = REALLOC(bld_base->instructions, bld_base->max_instructions + * sizeof(struct tgsi_full_instruction), + (bld_base->max_instructions + LP_MAX_INSTRUCTIONS) + * sizeof(struct tgsi_full_instruction)); + if (!instructions) { + return 0; + } + bld_base->instructions = instructions; + bld_base->max_instructions += LP_MAX_INSTRUCTIONS; + } + memcpy(bld_base->instructions + bld_base->num_instructions, inst_to_add, + sizeof(bld_base->instructions[0])); + + bld_base->num_instructions++; + + return 1; +} + + +/** + * This function assumes that all the args in emit_data have been set. + */ +static void +lp_build_action_set_dst_type( + struct lp_build_emit_data * emit_data, + struct lp_build_tgsi_context *bld_base, + unsigned tgsi_opcode) +{ + if (emit_data->arg_count == 0) { + emit_data->dst_type = LLVMVoidTypeInContext(bld_base->base.gallivm->context); + } else { + /* XXX: Not all opcodes have the same src and dst types. */ + emit_data->dst_type = LLVMTypeOf(emit_data->args[0]); + } +} + +void +lp_build_tgsi_intrinsic( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + struct lp_build_context * base = &bld_base->base; + emit_data->output[emit_data->chan] = lp_build_intrinsic( + base->gallivm->builder, action->intr_name, + emit_data->dst_type, emit_data->args, emit_data->arg_count); +} + +LLVMValueRef +lp_build_emit_llvm( + struct lp_build_tgsi_context *bld_base, + unsigned tgsi_opcode, + struct lp_build_emit_data * emit_data) +{ + struct lp_build_tgsi_action * action = &bld_base->op_actions[tgsi_opcode]; + /* XXX: Assert that this is a componentwise or replicate instruction */ + + lp_build_action_set_dst_type(emit_data, bld_base, tgsi_opcode); + emit_data->chan = 0; + assert(action->emit); + action->emit(action, bld_base, emit_data); + return emit_data->output[0]; +} + +LLVMValueRef +lp_build_emit_llvm_unary( + struct lp_build_tgsi_context *bld_base, + unsigned tgsi_opcode, + LLVMValueRef arg0) +{ + struct lp_build_emit_data emit_data; + emit_data.arg_count = 1; + emit_data.args[0] = arg0; + return lp_build_emit_llvm(bld_base, tgsi_opcode, &emit_data); +} + +LLVMValueRef +lp_build_emit_llvm_binary( + struct lp_build_tgsi_context *bld_base, + unsigned tgsi_opcode, + LLVMValueRef arg0, + LLVMValueRef arg1) +{ + struct lp_build_emit_data emit_data; + emit_data.arg_count = 2; + emit_data.args[0] = arg0; + emit_data.args[1] = arg1; + return lp_build_emit_llvm(bld_base, tgsi_opcode, &emit_data); +} + +LLVMValueRef +lp_build_emit_llvm_ternary( + struct lp_build_tgsi_context *bld_base, + unsigned tgsi_opcode, + LLVMValueRef arg0, + LLVMValueRef arg1, + LLVMValueRef arg2) +{ + struct lp_build_emit_data emit_data; + emit_data.arg_count = 3; + emit_data.args[0] = arg0; + emit_data.args[1] = arg1; + emit_data.args[2] = arg2; + return lp_build_emit_llvm(bld_base, tgsi_opcode, &emit_data); +} + +/** + * The default fetch implementation. + */ +void lp_build_fetch_args( + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + unsigned src; + for (src = 0; src < emit_data->info->num_src; src++) { + emit_data->args[src] = lp_build_emit_fetch(bld_base, emit_data->inst, src, + emit_data->chan); + } + emit_data->arg_count = emit_data->info->num_src; + lp_build_action_set_dst_type(emit_data, bld_base, + emit_data->inst->Instruction.Opcode); +} + +/* XXX: COMMENT + * It should be assumed that this function ignores writemasks + */ +boolean +lp_build_tgsi_inst_llvm( + struct lp_build_tgsi_context * bld_base, + const struct tgsi_full_instruction * inst) +{ + unsigned tgsi_opcode = inst->Instruction.Opcode; + const struct tgsi_opcode_info * info = tgsi_get_opcode_info(tgsi_opcode); + const struct lp_build_tgsi_action * action = + &bld_base->op_actions[tgsi_opcode]; + struct lp_build_emit_data emit_data; + unsigned chan_index; + LLVMValueRef val; + + bld_base->pc++; + + /* Ignore deprecated instructions */ + switch (inst->Instruction.Opcode) { + + case TGSI_OPCODE_RCC: + case TGSI_OPCODE_UP2H: + case TGSI_OPCODE_UP2US: + case TGSI_OPCODE_UP4B: + case TGSI_OPCODE_UP4UB: + case TGSI_OPCODE_X2D: + case TGSI_OPCODE_ARA: + case TGSI_OPCODE_BRA: + case TGSI_OPCODE_DIV: + case TGSI_OPCODE_PUSHA: + case TGSI_OPCODE_POPA: + case TGSI_OPCODE_I2F: + case TGSI_OPCODE_NOT: + case TGSI_OPCODE_SHL: + case TGSI_OPCODE_ISHR: + case TGSI_OPCODE_AND: + case TGSI_OPCODE_OR: + case TGSI_OPCODE_MOD: + case TGSI_OPCODE_XOR: + case TGSI_OPCODE_SAD: + case TGSI_OPCODE_TXF: + case TGSI_OPCODE_TXQ: + /* deprecated? */ + assert(0); + return FALSE; + break; + } + + /* Check if the opcode has been implemented */ + if (!action->emit) { + return FALSE; + } + + memset(&emit_data, 0, sizeof(emit_data)); + + assert(info->num_dst <= 1); + if (info->num_dst) { + TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + emit_data.output[chan_index] = bld_base->base.undef; + } + } + + emit_data.inst = inst; + emit_data.info = info; + + /* Emit the instructions */ + if (info->output_mode == TGSI_OUTPUT_COMPONENTWISE && bld_base->soa) { + TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) { + emit_data.chan = chan_index; + if (!action->fetch_args) { + lp_build_fetch_args(bld_base, &emit_data); + } else { + action->fetch_args(bld_base, &emit_data); + } + action->emit(action, bld_base, &emit_data); + } + } else { + emit_data.chan = LP_CHAN_ALL; + if (action->fetch_args) { + action->fetch_args(bld_base, &emit_data); + } + /* Make sure the output value is stored in emit_data.output[0], unless + * the opcode is channel dependent */ + if (info->output_mode != TGSI_OUTPUT_CHAN_DEPENDENT) { + emit_data.chan = 0; + } + action->emit(action, bld_base, &emit_data); + + /* Replicate the output values */ + if (info->output_mode == TGSI_OUTPUT_REPLICATE && bld_base->soa) { + val = emit_data.output[0]; + memset(emit_data.output, 0, sizeof(emit_data.output)); + TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) { + emit_data.output[chan_index] = val; + } + } + } + + if (info->num_dst > 0) { + bld_base->emit_store(bld_base, inst, info, emit_data.output); + } + return TRUE; +} + + +LLVMValueRef +lp_build_emit_fetch( + struct lp_build_tgsi_context *bld_base, + const struct tgsi_full_instruction *inst, + unsigned src_op, + const unsigned chan_index) +{ + const struct tgsi_full_src_register *reg = &inst->Src[src_op]; + unsigned swizzle; + LLVMValueRef res; + + if (chan_index == LP_CHAN_ALL) { + swizzle = ~0; + } else { + swizzle = tgsi_util_get_full_src_register_swizzle(reg, chan_index); + if (swizzle > 3) { + assert(0 && "invalid swizzle in emit_fetch()"); + return bld_base->base.undef; + } + } + + assert(reg->Register.Index <= bld_base->info->file_max[reg->Register.File]); + + if (bld_base->emit_fetch_funcs[reg->Register.File]) { + res = bld_base->emit_fetch_funcs[reg->Register.File](bld_base, reg, + swizzle); + } else { + assert(0 && "invalid src register in emit_fetch()"); + return bld_base->base.undef; + } + + if (reg->Register.Absolute) { + res = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_ABS, res); + } + + if (reg->Register.Negate) { + res = lp_build_negate( &bld_base->base, res ); + } + + /* + * Swizzle the argument + */ + + if (swizzle == ~0) { + res = bld_base->emit_swizzle(bld_base, res, + reg->Register.SwizzleX, + reg->Register.SwizzleY, + reg->Register.SwizzleZ, + reg->Register.SwizzleW); + } + + return res; + +} + +boolean +lp_build_tgsi_llvm( + struct lp_build_tgsi_context * bld_base, + const struct tgsi_token *tokens) +{ + struct tgsi_parse_context parse; + + if (bld_base->emit_prologue) { + bld_base->emit_prologue(bld_base); + } + + if (!lp_bld_tgsi_list_init(bld_base)) { + return FALSE; + } + + tgsi_parse_init( &parse, tokens ); + + while( !tgsi_parse_end_of_tokens( &parse ) ) { + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + /* Inputs already interpolated */ + bld_base->emit_declaration(bld_base, &parse.FullToken.FullDeclaration); + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + lp_bld_tgsi_add_instruction(bld_base, &parse.FullToken.FullInstruction); + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + bld_base->emit_immediate(bld_base, &parse.FullToken.FullImmediate); + break; + + case TGSI_TOKEN_TYPE_PROPERTY: + break; + + default: + assert( 0 ); + } + } + + while (bld_base->pc != -1) { + struct tgsi_full_instruction *instr = bld_base->instructions + + bld_base->pc; + const struct tgsi_opcode_info *opcode_info = + tgsi_get_opcode_info(instr->Instruction.Opcode); + if (!lp_build_tgsi_inst_llvm(bld_base, instr)) { + _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n", + opcode_info->mnemonic); + return FALSE; + } + } + + tgsi_parse_free(&parse); + + FREE(bld_base->instructions); + + if (bld_base->emit_epilogue) { + bld_base->emit_epilogue(bld_base); + } + + return TRUE; +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h index 9713d100484..b03eefc9ee5 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h @@ -1,5 +1,6 @@ /************************************************************************** * + * Copyright 2011-2012 Advanced Micro Devices, Inc. * Copyright 2009 VMware, Inc. * All Rights Reserved. * @@ -30,21 +31,33 @@ * TGSI to LLVM IR translation. * * @author Jose Fonseca <jfonseca@vmware.com> + * @author Tom Stellard <thomas.stellard@amd.com> */ #ifndef LP_BLD_TGSI_H #define LP_BLD_TGSI_H #include "gallivm/lp_bld.h" +#include "gallivm/lp_bld_tgsi_action.h" +#include "gallivm/lp_bld_limits.h" +#include "lp_bld_type.h" #include "pipe/p_compiler.h" #include "pipe/p_state.h" +#include "tgsi/tgsi_exec.h" #include "tgsi/tgsi_scan.h" +#define LP_CHAN_ALL ~0 + +#define LP_MAX_INSTRUCTIONS 256 + +struct tgsi_full_declaration; +struct tgsi_full_immediate; +struct tgsi_full_instruction; +struct tgsi_full_src_register; +struct tgsi_opcode_info; struct tgsi_token; struct tgsi_shader_info; -struct lp_type; -struct lp_build_context; struct lp_build_mask_context; struct gallivm_state; @@ -207,4 +220,328 @@ lp_build_system_values_array(struct gallivm_state *gallivm, LLVMValueRef facing); +struct lp_exec_mask { + struct lp_build_context *bld; + + boolean has_mask; + + LLVMTypeRef int_vec_type; + + LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING]; + int cond_stack_size; + LLVMValueRef cond_mask; + + LLVMBasicBlockRef loop_block; + LLVMValueRef cont_mask; + LLVMValueRef break_mask; + LLVMValueRef break_var; + struct { + LLVMBasicBlockRef loop_block; + LLVMValueRef cont_mask; + LLVMValueRef break_mask; + LLVMValueRef break_var; + } loop_stack[LP_MAX_TGSI_NESTING]; + int loop_stack_size; + + LLVMValueRef ret_mask; + struct { + int pc; + LLVMValueRef ret_mask; + } call_stack[LP_MAX_TGSI_NESTING]; + int call_stack_size; + + LLVMValueRef exec_mask; +}; + +struct lp_build_tgsi_inst_list +{ + struct tgsi_full_instruction *instructions; + uint max_instructions; + uint num_instructions; +}; + +unsigned lp_bld_tgsi_list_init(struct lp_build_tgsi_context * bld_base); + + +unsigned lp_bld_tgsi_add_instruction( + struct lp_build_tgsi_context * bld_base, + struct tgsi_full_instruction *inst_to_add); + + +struct lp_build_tgsi_context; + + +typedef LLVMValueRef (*lp_build_emit_fetch_fn)(struct lp_build_tgsi_context *, + const struct tgsi_full_src_register *, + unsigned); + +struct lp_build_tgsi_context +{ + struct lp_build_context base; + + /** This array stores functions that are used to transform TGSI opcodes to + * LLVM instructions. + */ + struct lp_build_tgsi_action op_actions[TGSI_OPCODE_LAST]; + + /* TGSI_OPCODE_RSQ is defined as 1 / sqrt( abs(src0.x) ), rsq_action + * should compute 1 / sqrt (src0.x) */ + struct lp_build_tgsi_action rsq_action; + + const struct tgsi_shader_info *info; + + lp_build_emit_fetch_fn emit_fetch_funcs[TGSI_FILE_COUNT]; + + LLVMValueRef (*emit_swizzle)(struct lp_build_tgsi_context *, + LLVMValueRef, unsigned, unsigned, unsigned, unsigned); + + void (*emit_store)(struct lp_build_tgsi_context *, + const struct tgsi_full_instruction *, + const struct tgsi_opcode_info *, + LLVMValueRef dst[4]); + + void (*emit_declaration)(struct lp_build_tgsi_context *, + const struct tgsi_full_declaration *decl); + + void (*emit_immediate)(struct lp_build_tgsi_context *, + const struct tgsi_full_immediate *imm); + + + /* Allow the user to store data in this structure rather than passing it + * to every function. */ + void * userdata; + + boolean soa; + + int pc; + + struct tgsi_full_instruction *instructions; + uint max_instructions; + uint num_instructions; + + /** This function allows the user to insert some instructions at the + * beginning of the program. It is optional and does not need to be + * implemented. + */ + void (*emit_prologue)(struct lp_build_tgsi_context*); + + /** This function allows the user to insert some instructions at the end of + * the program. This callback is intended to be used for emitting + * instructions to handle the export for the output registers, but it can + * be used for any purpose. Implementing this function is optiona, but + * recommended. + */ + void (*emit_epilogue)(struct lp_build_tgsi_context*); +}; + +struct lp_build_tgsi_soa_context +{ + struct lp_build_tgsi_context bld_base; + + /* Builder for vector integer masks and indices */ + struct lp_build_context uint_bld; + + /* Builder for scalar elements of shader's data type (float) */ + struct lp_build_context elem_bld; + + LLVMValueRef consts_ptr; + const LLVMValueRef *pos; + const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS]; + LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS]; + + const struct lp_build_sampler_soa *sampler; + + LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][TGSI_NUM_CHANNELS]; + LLVMValueRef temps[LP_MAX_TGSI_TEMPS][TGSI_NUM_CHANNELS]; + LLVMValueRef addr[LP_MAX_TGSI_ADDRS][TGSI_NUM_CHANNELS]; + LLVMValueRef preds[LP_MAX_TGSI_PREDS][TGSI_NUM_CHANNELS]; + + /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is + * set in the indirect_files field. + * The temps[] array above is unused then. + */ + LLVMValueRef temps_array; + + /* We allocate/use this array of output if (1 << TGSI_FILE_OUTPUT) is + * set in the indirect_files field. + * The outputs[] array above is unused then. + */ + LLVMValueRef outputs_array; + + /* We allocate/use this array of inputs if (1 << TGSI_FILE_INPUT) is + * set in the indirect_files field. + * The inputs[] array above is unused then. + */ + LLVMValueRef inputs_array; + + LLVMValueRef system_values_array; + + /** bitmask indicating which register files are accessed indirectly */ + unsigned indirect_files; + + struct lp_build_mask_context *mask; + struct lp_exec_mask exec_mask; + + uint num_immediates; + +}; + +void +lp_emit_declaration_soa( + struct lp_build_tgsi_context *bld, + const struct tgsi_full_declaration *decl); + +void lp_emit_immediate_soa( + struct lp_build_tgsi_context *bld_base, + const struct tgsi_full_immediate *imm); + +boolean +lp_emit_instruction_soa( + struct lp_build_tgsi_soa_context *bld, + const struct tgsi_full_instruction *inst, + const struct tgsi_opcode_info *info); + + +LLVMValueRef +lp_get_temp_ptr_soa( + struct lp_build_tgsi_soa_context *bld, + unsigned index, + unsigned chan); + +LLVMValueRef +lp_get_output_ptr( + struct lp_build_tgsi_soa_context *bld, + unsigned index, + unsigned chan); + +struct lp_build_tgsi_aos_context +{ + struct lp_build_tgsi_context bld_base; + + /* Builder for integer masks and indices */ + struct lp_build_context int_bld; + + /* + * AoS swizzle used: + * - swizzles[0] = red index + * - swizzles[1] = green index + * - swizzles[2] = blue index + * - swizzles[3] = alpha index + */ + unsigned char swizzles[4]; + unsigned char inv_swizzles[4]; + + LLVMValueRef consts_ptr; + const LLVMValueRef *inputs; + LLVMValueRef *outputs; + + struct lp_build_sampler_aos *sampler; + + LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES]; + LLVMValueRef temps[LP_MAX_TGSI_TEMPS]; + LLVMValueRef addr[LP_MAX_TGSI_ADDRS]; + LLVMValueRef preds[LP_MAX_TGSI_PREDS]; + + /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is + * set in the indirect_files field. + * The temps[] array above is unused then. + */ + LLVMValueRef temps_array; + + /** bitmask indicating which register files are accessed indirectly */ + unsigned indirect_files; + +}; + +static INLINE struct lp_build_tgsi_soa_context * +lp_soa_context(struct lp_build_tgsi_context *bld_base) +{ + return (struct lp_build_tgsi_soa_context *)bld_base; +} + +static INLINE struct lp_build_tgsi_aos_context * +lp_aos_context(struct lp_build_tgsi_context *bld_base) +{ + return (struct lp_build_tgsi_aos_context *)bld_base; +} + +void +lp_emit_declaration_aos( + struct lp_build_tgsi_aos_context *bld, + const struct tgsi_full_declaration *decl); + + +boolean +lp_emit_instruction_aos( + struct lp_build_tgsi_aos_context *bld, + const struct tgsi_full_instruction *inst, + const struct tgsi_opcode_info *info, + int *pc); + +void +lp_emit_store_aos( + struct lp_build_tgsi_aos_context *bld, + const struct tgsi_full_instruction *inst, + unsigned index, + LLVMValueRef value); + +void lp_build_fetch_args( + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data); + +LLVMValueRef +lp_build_tgsi_inst_llvm_aos( + struct lp_build_tgsi_context * bld_base, + const struct tgsi_full_instruction *inst); + +void +lp_build_tgsi_intrinsic( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data); + +LLVMValueRef +lp_build_emit_llvm( + struct lp_build_tgsi_context *bld_base, + unsigned tgsi_opcode, + struct lp_build_emit_data * emit_data); + +LLVMValueRef +lp_build_emit_llvm_unary( + struct lp_build_tgsi_context *bld_base, + unsigned tgsi_opcode, + LLVMValueRef arg0); + +LLVMValueRef +lp_build_emit_llvm_binary( + struct lp_build_tgsi_context *bld_base, + unsigned tgsi_opcode, + LLVMValueRef arg0, + LLVMValueRef arg1); + +LLVMValueRef +lp_build_emit_llvm_ternary( + struct lp_build_tgsi_context *bld_base, + unsigned tgsi_opcode, + LLVMValueRef arg0, + LLVMValueRef arg1, + LLVMValueRef arg2); + +boolean +lp_build_tgsi_inst_llvm( + struct lp_build_tgsi_context * bld_base, + const struct tgsi_full_instruction *inst); + +LLVMValueRef +lp_build_emit_fetch( + struct lp_build_tgsi_context *bld_base, + const struct tgsi_full_instruction *inst, + unsigned src_op, + const unsigned chan_index); + +boolean +lp_build_tgsi_llvm( + struct lp_build_tgsi_context * bld_base, + const struct tgsi_token *tokens); + #endif /* LP_BLD_TGSI_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c new file mode 100644 index 00000000000..9688acc3229 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c @@ -0,0 +1,1182 @@ +/************************************************************************** + * + * Copyright 2011-2012 Advanced Micro Devices, Inc. + * Copyright 2009 VMware, Inc. + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * TGSI to LLVM IR translation. + * + * @author Jose Fonseca <jfonseca@vmware.com> + * @author Tom Stellard <thomas.stellard@amd.com> + * + * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell, + * Brian Paul, and others. + */ + + +#include "lp_bld_tgsi_action.h" + +#include "lp_bld_tgsi.h" +#include "lp_bld_arit.h" +#include "lp_bld_const.h" +#include "lp_bld_gather.h" +#include "lp_bld_logic.h" + +#include "tgsi/tgsi_exec.h" + +/* XXX: The CPU only defaults should be repaced by generic ones. In most + * cases, the CPU defaults are just wrappers around a function in + * lp_build_arit.c and these functions should be inlined here and the CPU + * generic code should be removed and placed elsewhere. + */ + +/* Default actions */ + +/* Generic fetch_arg functions */ + +static void scalar_unary_fetch_args( + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + /* src0.x */ + emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, 0); + emit_data->arg_count = 1; + emit_data->dst_type = LLVMTypeOf(emit_data->args[0]); +} + +static void scalar_binary_fetch_args( + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + /* src0.x */ + emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst, + 0, TGSI_CHAN_X); + /* src1.x */ + emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst, + 1, TGSI_CHAN_X); + emit_data->arg_count = 2; + emit_data->dst_type = LLVMTypeOf(emit_data->args[0]); +} + +/* TGSI_OPCODE_ADD */ +static void +add_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = LLVMBuildFAdd( + bld_base->base.gallivm->builder, + emit_data->args[0], emit_data->args[1], ""); +} + +/* TGSI_OPCODE_ARR */ +static void +arr_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = lp_build_emit_llvm_unary(bld_base, + TGSI_OPCODE_ROUND, emit_data->args[0]); +} + +/* TGSI_OPCODE_CLAMP */ +static void +clamp_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMValueRef tmp; + tmp = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MAX, + emit_data->args[0], + emit_data->args[1]); + emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base, + TGSI_OPCODE_MIN, tmp, emit_data->args[2]); +} + +/* DP* Helper */ + +static void +dp_fetch_args( + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data, + unsigned dp_components) +{ + unsigned chan, src; + for (src = 0; src < 2; src++) { + for (chan = 0; chan < dp_components; chan++) { + emit_data->args[(src * dp_components) + chan] = + lp_build_emit_fetch(bld_base, emit_data->inst, src, chan); + } + } + emit_data->dst_type = bld_base->base.elem_type; +} + +/* TGSI_OPCODE_DP2 */ +static void +dp2_fetch_args( + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + dp_fetch_args(bld_base, emit_data, 2); +} + +static void +dp2_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMValueRef tmp0, tmp1; + tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL, + emit_data->args[0] /* src0.x */, + emit_data->args[2] /* src1.x */); + tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL, + emit_data->args[1] /* src0.y */, + emit_data->args[3] /* src1.y */); + emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base, + TGSI_OPCODE_ADD, tmp0, tmp1); +} + +static struct lp_build_tgsi_action dp2_action = { + .fetch_args = dp2_fetch_args, + .emit = dp2_emit +}; + +/* TGSI_OPCODE_DP2A */ +static void +dp2a_fetch_args( + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + dp_fetch_args(bld_base, emit_data, 2); + emit_data->args[5] = lp_build_emit_fetch(bld_base, emit_data->inst, + 2, TGSI_CHAN_X); +} + +static void +dp2a_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMValueRef tmp; + tmp = lp_build_emit_llvm(bld_base, TGSI_OPCODE_DP2, emit_data); + emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_ADD, + emit_data->args[5], tmp); +} + +static struct lp_build_tgsi_action dp2a_action = { + .fetch_args = dp2a_fetch_args, + .emit = dp2a_emit +}; + +/* TGSI_OPCODE_DP3 */ +static void +dp3_fetch_args( + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + dp_fetch_args(bld_base, emit_data, 3); +} + +static void +dp3_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMValueRef tmp0, tmp1; + tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL, + emit_data->args[0] /* src0.x */, + emit_data->args[3] /* src1.x */); + tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL, + emit_data->args[1] /* src0.y */, + emit_data->args[4] /* src1.y */); + tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_ADD, tmp1, tmp0); + tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL, + emit_data->args[2] /* src0.z */, + emit_data->args[5] /* src1.z */); + emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base, + TGSI_OPCODE_ADD, tmp0, tmp1); +} + +static struct lp_build_tgsi_action dp3_action = { + .fetch_args = dp3_fetch_args, + .emit = dp3_emit +}; + +/* TGSI_OPCODDE_DP4 */ + +static void +dp4_fetch_args( + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + dp_fetch_args(bld_base, emit_data, 4); +} + +static void +dp4_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMValueRef tmp0, tmp1; + tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL, + emit_data->args[0] /* src0.x */, + emit_data->args[4] /* src1.x */); + tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL, + emit_data->args[1] /* src0.y */, + emit_data->args[5] /* src1.y */); + tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_ADD, tmp0, tmp1); + tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL, + emit_data->args[2] /* src0.z */, + emit_data->args[6] /* src1.z */); + tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_ADD, tmp0, tmp1); + tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL, + emit_data->args[3] /* src0.w */, + emit_data->args[7] /* src1.w */); + emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base, + TGSI_OPCODE_ADD, tmp0, tmp1); +} + +static struct lp_build_tgsi_action dp4_action = { + .fetch_args = dp4_fetch_args, + .emit = dp4_emit +}; + +/* TGSI_OPCODE_DPH */ +static void +dph_fetch_args( + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + dp_fetch_args(bld_base, emit_data, 4); + /* src0.w */ + emit_data->args[3] = bld_base->base.one; +} + +const struct lp_build_tgsi_action dph_action = { + .fetch_args = dph_fetch_args, + .emit = dp4_emit +}; + +/* TGSI_OPCODE_DST */ +static void +dst_fetch_args( + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + /* src0.y */ + emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst, + 0, TGSI_CHAN_Y); + /* src0.z */ + emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst, + 0, TGSI_CHAN_Z); + /* src1.y */ + emit_data->args[2] = lp_build_emit_fetch(bld_base, emit_data->inst, + 1, TGSI_CHAN_Y); + /* src1.w */ + emit_data->args[3] = lp_build_emit_fetch(bld_base, emit_data->inst, + 1, TGSI_CHAN_W); +} + +static void +dst_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + /* dst.x */ + emit_data->output[TGSI_CHAN_X] = bld_base->base.one; + + /* dst.y */ + emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_binary(bld_base, + TGSI_OPCODE_MUL, + emit_data->args[0] /* src0.y */, + emit_data->args[2] /* src1.y */); + /* dst.z */ + emit_data->output[TGSI_CHAN_Z] = emit_data->args[1]; /* src0.z */ + + /* dst.w */ + emit_data->output[TGSI_CHAN_W] = emit_data->args[3]; /* src1.w */ +} + +static struct lp_build_tgsi_action dst_action = { + .fetch_args = dst_fetch_args, + .emit = dst_emit +}; + +/* TGSI_OPCODE_END */ +static void +end_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + bld_base->pc = -1; +} + +/* TGSI_OPCODE_EXP */ + +static void +exp_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMValueRef floor_x; + + /* floor( src0.x ) */ + floor_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_FLR, + emit_data->args[0]); + + /* 2 ^ floor( src0.x ) */ + emit_data->output[TGSI_CHAN_X] = lp_build_emit_llvm_unary(bld_base, + TGSI_OPCODE_EX2, floor_x); + + /* src0.x - floor( src0.x ) */ + emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_binary(bld_base, + TGSI_OPCODE_SUB, emit_data->args[0] /* src0.x */, floor_x); + + /* 2 ^ src0.x */ + emit_data->output[TGSI_CHAN_Z] = lp_build_emit_llvm_unary(bld_base, + TGSI_OPCODE_EX2, emit_data->args[0] /* src0.x */); + + emit_data->output[TGSI_CHAN_W] = bld_base->base.one; +} + +const struct lp_build_tgsi_action exp_action = { + .fetch_args = scalar_unary_fetch_args, + .emit = exp_emit +}; + +/* TGSI_OPCODE_FRC */ + +static void +frc_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMValueRef tmp; + tmp = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_FLR, + emit_data->args[0]); + emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base, + TGSI_OPCODE_SUB, emit_data->args[0], tmp); +} + +/* TGSI_OPCODE_KIL */ + +static void +kil_fetch_args( + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + /* src0.x */ + emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst, + 0, TGSI_CHAN_X); + /* src0.y */ + emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst, + 0, TGSI_CHAN_Y); + /* src0.z */ + emit_data->args[2] = lp_build_emit_fetch(bld_base, emit_data->inst, + 0, TGSI_CHAN_Z); + /* src0.w */ + emit_data->args[3] = lp_build_emit_fetch(bld_base, emit_data->inst, + 0, TGSI_CHAN_W); + emit_data->arg_count = 4; + emit_data->dst_type = LLVMVoidTypeInContext(bld_base->base.gallivm->context); +} + +/* TGSI_OPCODE_KILP */ + +static void +kilp_fetch_args( + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->dst_type = LLVMVoidTypeInContext(bld_base->base.gallivm->context); +} + +/* TGSI_OPCODE_LIT */ + +static void +lit_fetch_args( + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + /* src0.x */ + emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X); + /* src0.y */ + emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_Y); + /* src0.w */ + emit_data->args[2] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W); + emit_data->arg_count = 3; +} + +static void +lit_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMValueRef tmp0, tmp2; + + /* dst.x */ + emit_data->output[TGSI_CHAN_X] = bld_base->base.one; + + /* dst. y */ + emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_binary(bld_base, + TGSI_OPCODE_MAX, + emit_data->args[0] /* src0.x */, + bld_base->base.zero); + + /* dst.z */ + /* XMM[1] = SrcReg[0].yyyy */ + LLVMValueRef tmp1 = emit_data->args[1]; + /* XMM[1] = max(XMM[1], 0) */ + tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MAX, + tmp1, bld_base->base.zero); + /* XMM[2] = SrcReg[0].wwww */ + tmp2 = emit_data->args[2]; + tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_POW, + tmp1, tmp2); + tmp0 = emit_data->args[0]; + emit_data->output[TGSI_CHAN_Z] = lp_build_emit_llvm_ternary(bld_base, + TGSI_OPCODE_CMP, + tmp0, bld_base->base.zero, tmp1); + /* dst.w */ + emit_data->output[TGSI_CHAN_W] = bld_base->base.one; +} + +static struct lp_build_tgsi_action lit_action = { + .fetch_args = lit_fetch_args, + .emit = lit_emit +}; + +/* TGSI_OPCODE_LOG */ + +static void +log_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + + LLVMValueRef abs_x, log_abs_x, flr_log_abs_x, ex2_flr_log_abs_x; + + /* abs( src0.x) */ + abs_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_ABS, + emit_data->args[0] /* src0.x */); + + /* log( abs( src0.x ) ) */ + log_abs_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_LG2, + abs_x); + + /* floor( log( abs( src0.x ) ) ) */ + flr_log_abs_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_FLR, + log_abs_x); + /* dst.x */ + emit_data->output[TGSI_CHAN_X] = flr_log_abs_x; + + /* dst.y */ + ex2_flr_log_abs_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_EX2, + flr_log_abs_x); + + /* abs( src0.x ) / 2^( floor( lg2( abs( src0.x ) ) ) ) */ + emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_binary(bld_base, + TGSI_OPCODE_DIV, abs_x, ex2_flr_log_abs_x); + + /* dst.x */ + emit_data->output[TGSI_CHAN_Z] = log_abs_x; + + /* dst.w */ + emit_data->output[TGSI_CHAN_W] = bld_base->base.one; +} + +static struct lp_build_tgsi_action log_action = { + .fetch_args = scalar_unary_fetch_args, + .emit = log_emit +}; + +/* TGSI_OPCODE_LRP */ + +static void +lrp_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMValueRef tmp; + tmp = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_SUB, + emit_data->args[1], + emit_data->args[2]); + emit_data->output[emit_data->chan] = lp_build_emit_llvm_ternary(bld_base, + TGSI_OPCODE_MAD, emit_data->args[0], tmp, emit_data->args[2]); +} + +/* TGSI_OPCODE_MAD */ + +static void +mad_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMValueRef tmp; + tmp = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL, + emit_data->args[0], + emit_data->args[1]); + emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base, + TGSI_OPCODE_ADD, tmp, emit_data->args[2]); +} + +/* TGSI_OPCODE_MOV */ + +static void +mov_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = emit_data->args[0]; +} + +/* TGSI_OPCODE_MUL */ +static void +mul_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = lp_build_mul(&bld_base->base, + emit_data->args[0], emit_data->args[1]); +} + +/* TGSI_OPCODE_POW */ + +static void +pow_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = lp_build_pow(&bld_base->base, + emit_data->args[0], emit_data->args[1]); +} + +static struct lp_build_tgsi_action pow_action = { + .fetch_args = scalar_binary_fetch_args, + .emit = pow_emit +}; + +/* TGSI_OPCODE_RSQ */ + +static void +rsq_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->args[0] = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_ABS, + emit_data->args[0]); + if (bld_base->rsq_action.emit) { + bld_base->rsq_action.emit(&bld_base->rsq_action, bld_base, emit_data); + } else { + emit_data->output[emit_data->chan] = bld_base->base.undef; + } +} + +const struct lp_build_tgsi_action rsq_action = { + .fetch_args = scalar_unary_fetch_args, + .emit = rsq_emit + +}; + +/* TGSI_OPCODE_SCS */ +static void +scs_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + /* dst.x */ + emit_data->output[TGSI_CHAN_X] = lp_build_emit_llvm_unary(bld_base, + TGSI_OPCODE_COS, emit_data->args[0]); + /* dst.y */ + emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_unary(bld_base, + TGSI_OPCODE_SIN, emit_data->args[0]); + /* dst.z */ + emit_data->output[TGSI_CHAN_Z] = bld_base->base.zero; + + /* dst.w */ + emit_data->output[TGSI_CHAN_W] = bld_base->base.one; +} + +const struct lp_build_tgsi_action scs_action = { + .fetch_args = scalar_unary_fetch_args, + .emit = scs_emit +}; + +/* TGSI_OPCODE_SFL */ + +static void +sfl_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = bld_base->base.zero; +} + +/* TGSI_OPCODE_STR */ + +static void +str_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = bld_base->base.one; +} + +/* TGSI_OPCODE_SUB */ +static void +sub_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = LLVMBuildFSub( + bld_base->base.gallivm->builder, + emit_data->args[0], + emit_data->args[1], ""); +} + +/* TGSI_OPCODE_XPD */ + +static void +xpd_fetch_args( + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + dp_fetch_args(bld_base, emit_data, 3); +} + +/** + * (a * b) - (c * d) + */ +static LLVMValueRef +xpd_helper( + struct lp_build_tgsi_context * bld_base, + LLVMValueRef a, + LLVMValueRef b, + LLVMValueRef c, + LLVMValueRef d) +{ + LLVMValueRef tmp0, tmp1; + + tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL, a, b); + tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL, c, d); + + return lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_SUB, tmp0, tmp1); +} + +static void +xpd_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[TGSI_CHAN_X] = xpd_helper(bld_base, + emit_data->args[1] /* src0.y */, emit_data->args[5] /* src1.z */, + emit_data->args[4] /* src1.y */, emit_data->args[2] /* src0.z */); + + emit_data->output[TGSI_CHAN_Y] = xpd_helper(bld_base, + emit_data->args[2] /* src0.z */, emit_data->args[3] /* src1.x */, + emit_data->args[5] /* src1.z */, emit_data->args[0] /* src0.x */); + + emit_data->output[TGSI_CHAN_Z] = xpd_helper(bld_base, + emit_data->args[0] /* src0.x */, emit_data->args[4] /* src1.y */, + emit_data->args[3] /* src1.x */, emit_data->args[1] /* src0.y */); + + emit_data->output[TGSI_CHAN_W] = bld_base->base.one; +} + +const struct lp_build_tgsi_action xpd_action = { + .fetch_args = xpd_fetch_args, + .emit = xpd_emit +}; + +void +lp_set_default_actions(struct lp_build_tgsi_context * bld_base) +{ + bld_base->op_actions[TGSI_OPCODE_DP2] = dp2_action; + bld_base->op_actions[TGSI_OPCODE_DP3] = dp3_action; + bld_base->op_actions[TGSI_OPCODE_DP4] = dp4_action; + bld_base->op_actions[TGSI_OPCODE_DP2A] = dp2a_action; + bld_base->op_actions[TGSI_OPCODE_DPH] = dph_action; + bld_base->op_actions[TGSI_OPCODE_DST] = dst_action; + bld_base->op_actions[TGSI_OPCODE_EXP] = exp_action; + bld_base->op_actions[TGSI_OPCODE_LIT] = lit_action; + bld_base->op_actions[TGSI_OPCODE_LOG] = log_action; + bld_base->op_actions[TGSI_OPCODE_RSQ] = rsq_action; + bld_base->op_actions[TGSI_OPCODE_POW] = pow_action; + bld_base->op_actions[TGSI_OPCODE_SCS] = scs_action; + bld_base->op_actions[TGSI_OPCODE_XPD] = xpd_action; + + bld_base->op_actions[TGSI_OPCODE_COS].fetch_args = scalar_unary_fetch_args; + bld_base->op_actions[TGSI_OPCODE_EX2].fetch_args = scalar_unary_fetch_args; + bld_base->op_actions[TGSI_OPCODE_IF].fetch_args = scalar_unary_fetch_args; + bld_base->op_actions[TGSI_OPCODE_KIL].fetch_args = kil_fetch_args; + bld_base->op_actions[TGSI_OPCODE_KILP].fetch_args = kilp_fetch_args; + bld_base->op_actions[TGSI_OPCODE_RCP].fetch_args = scalar_unary_fetch_args; + bld_base->op_actions[TGSI_OPCODE_SIN].fetch_args = scalar_unary_fetch_args; + bld_base->op_actions[TGSI_OPCODE_LG2].fetch_args = scalar_unary_fetch_args; + + bld_base->op_actions[TGSI_OPCODE_ADD].emit = add_emit; + bld_base->op_actions[TGSI_OPCODE_ARR].emit = arr_emit; + bld_base->op_actions[TGSI_OPCODE_CLAMP].emit = clamp_emit; + bld_base->op_actions[TGSI_OPCODE_END].emit = end_emit; + bld_base->op_actions[TGSI_OPCODE_FRC].emit = frc_emit; + bld_base->op_actions[TGSI_OPCODE_LRP].emit = lrp_emit; + bld_base->op_actions[TGSI_OPCODE_MAD].emit = mad_emit; + bld_base->op_actions[TGSI_OPCODE_MOV].emit = mov_emit; + bld_base->op_actions[TGSI_OPCODE_MUL].emit = mul_emit; + bld_base->op_actions[TGSI_OPCODE_SFL].emit = sfl_emit; + bld_base->op_actions[TGSI_OPCODE_STR].emit = str_emit; + bld_base->op_actions[TGSI_OPCODE_SUB].emit = sub_emit; +} + +/* CPU Only default actions */ + +/* These actions are CPU only, because they could potentially output SSE + * intrinsics. + */ + +/* TGSI_OPCODE_ABS (CPU Only)*/ + +static void +abs_emit_cpu( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = lp_build_abs(&bld_base->base, + emit_data->args[0]); +} + +/* TGSI_OPCODE_ADD (CPU Only) */ +static void +add_emit_cpu( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = lp_build_add(&bld_base->base, + emit_data->args[0], emit_data->args[1]); +} + +/* TGSI_OPCODE_CEIL (CPU Only) */ +static void +ceil_emit_cpu( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = lp_build_trunc(&bld_base->base, + emit_data->args[0]); +} + +/* TGSI_OPCODE_CMP (CPU Only) */ +static void +cmp_emit_cpu( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMValueRef cond = lp_build_cmp(&bld_base->base, PIPE_FUNC_LESS, + emit_data->args[0], bld_base->base.zero); + emit_data->output[emit_data->chan] = lp_build_select(&bld_base->base, + cond, emit_data->args[1], emit_data->args[2]); +} + +/* TGSI_OPCODE_CND (CPU Only) */ +static void +cnd_emit_cpu( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMValueRef half, tmp; + half = lp_build_const_vec(bld_base->base.gallivm, bld_base->base.type, 0.5); + tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_GREATER, + emit_data->args[2], half); + emit_data->output[emit_data->chan] = lp_build_select(&bld_base->base, + tmp, + emit_data->args[0], + emit_data->args[1]); +} + +/* TGSI_OPCODE_COS (CPU Only) */ +static void +cos_emit_cpu( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = lp_build_cos(&bld_base->base, + emit_data->args[0]); +} + +/* TGSI_OPCODE_DIV (CPU Only) */ +static void +div_emit_cpu( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = lp_build_div(&bld_base->base, + emit_data->args[0], emit_data->args[1]); +} + +/* TGSI_OPCODE_EX2 (CPU Only) */ +static void +ex2_emit_cpu( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = lp_build_exp2(&bld_base->base, + emit_data->args[0]); +} + +/* TGSI_OPCODE_EXP (CPU Only) */ +static void +exp_emit_cpu( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + lp_build_exp2_approx(&bld_base->base, emit_data->args[0], + &emit_data->output[TGSI_CHAN_X], + &emit_data->output[TGSI_CHAN_Y], + &emit_data->output[TGSI_CHAN_Z]); + emit_data->output[TGSI_CHAN_W] = bld_base->base.one; +} + +/* TGSI_OPCODE_FLR (CPU Only) */ + +static void +flr_emit_cpu( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = lp_build_floor(&bld_base->base, + emit_data->args[0]); +} + +/* TGSI_OPCODE_LG2 (CPU Only) */ +static void +lg2_emit_cpu( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = lp_build_log2(&bld_base->base, + emit_data->args[0]); +} + +/* TGSI_OPCODE_LOG (CPU Only) */ +static void +log_emit_cpu( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMValueRef p_floor_log2; + LLVMValueRef p_exp; + LLVMValueRef p_log2; + LLVMValueRef src0 = emit_data->args[0]; + + lp_build_log2_approx(&bld_base->base, src0, + &p_exp, &p_floor_log2, &p_log2); + + emit_data->output[TGSI_CHAN_X] = p_floor_log2; + + emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_binary(bld_base, + TGSI_OPCODE_DIV, + src0, p_exp); + emit_data->output[TGSI_CHAN_Z] = p_log2; + + emit_data->output[TGSI_CHAN_W] = bld_base->base.one; + +} + +/* TGSI_OPCODE_MAX (CPU Only) */ + +static void +max_emit_cpu( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = lp_build_max(&bld_base->base, + emit_data->args[0], emit_data->args[1]); +} + +/* TGSI_OPCODE_MIN (CPU Only) */ +static void +min_emit_cpu( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = lp_build_min(&bld_base->base, + emit_data->args[0], emit_data->args[1]); +} + +/* TGSI_OPCODE_POW (CPU Only) */ +static void +pow_emit_cpu( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = lp_build_pow(&bld_base->base, + emit_data->args[0], emit_data->args[1]); +} + + +/* TGSI_OPCODE_RCP (CPU Only) */ + +static void +rcp_emit_cpu( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = lp_build_rcp(&bld_base->base, + emit_data->args[0]); +} + +/* Reciprical squareroot (CPU Only) */ + +/* This is not the same as TGSI_OPCODE_RSQ, which requres the argument to be + * greater than or equal to 0 */ +static void +recip_sqrt_emit_cpu( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = lp_build_rsqrt(&bld_base->base, + emit_data->args[0]); +} + +/* TGSI_OPCODE_ROUND (CPU Only) */ +static void +round_emit_cpu( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = lp_build_round(&bld_base->base, + emit_data->args[0]); +} + +/* TGSI_OPCODE_SET Helper (CPU Only) */ + +static void +set_emit_cpu( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data, + unsigned pipe_func) +{ + LLVMValueRef cond = lp_build_cmp(&bld_base->base, pipe_func, + emit_data->args[0], emit_data->args[1]); + emit_data->output[emit_data->chan] = lp_build_select(&bld_base->base, + cond, + bld_base->base.one, + bld_base->base.zero); +} + +/* TGSI_OPCODE_SEQ (CPU Only) */ + +static void +seq_emit_cpu( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_EQUAL); +} + +/* TGSI_OPCODE_SGE (CPU Only) */ +static void +sge_emit_cpu( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GEQUAL); +} + +/* TGSI_OPCODE_SGT (CPU Only)*/ + +static void +sgt_emit_cpu( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GREATER); +} + +/* TGSI_OPCODE_SIN (CPU Only) */ +static void +sin_emit_cpu( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = lp_build_sin(&bld_base->base, + emit_data->args[0]); +} + +/* TGSI_OPCODE_SLE (CPU Only) */ +static void +sle_emit_cpu( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LEQUAL); +} + +/* TGSI_OPCODE_SLT (CPU Only) */ + +static void +slt_emit_cpu( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LESS); +} + +/* TGSI_OPCODE_SNE (CPU Only) */ + +static void +sne_emit_cpu( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_NOTEQUAL); +} + +/* TGSI_OPCODE_SSG (CPU Only) */ + +static void +ssg_emit_cpu( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = lp_build_sgn(&bld_base->base, + emit_data->args[0]); +} + +/* TGSI_OPCODE_SUB (CPU Only) */ + +static void +sub_emit_cpu( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = lp_build_sub(&bld_base->base, + emit_data->args[0], + emit_data->args[1]); +} + +/* TGSI_OPCODE_TRUNC (CPU Only) */ + +static void +trunc_emit_cpu( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = lp_build_trunc(&bld_base->base, + emit_data->args[0]); +} + +void +lp_set_default_actions_cpu( + struct lp_build_tgsi_context * bld_base) +{ + lp_set_default_actions(bld_base); + bld_base->op_actions[TGSI_OPCODE_ABS].emit = abs_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_ADD].emit = add_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_ARL].emit = flr_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_CEIL].emit = ceil_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_CND].emit = cnd_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_COS].emit = cos_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_CMP].emit = cmp_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_DIV].emit = div_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_EX2].emit = ex2_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_EXP].emit = exp_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_FLR].emit = flr_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_LG2].emit = lg2_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_LOG].emit = log_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_MAX].emit = max_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_MIN].emit = min_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_POW].emit = pow_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_RCP].emit = rcp_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_ROUND].emit = round_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_SEQ].emit = seq_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_SGE].emit = sge_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_SGT].emit = sgt_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_SIN].emit = sin_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_SLE].emit = sle_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_SLT].emit = slt_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_SNE].emit = sne_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_SSG].emit = ssg_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_SUB].emit = sub_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = trunc_emit_cpu; + + bld_base->rsq_action.emit = recip_sqrt_emit_cpu; +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h new file mode 100644 index 00000000000..818ff6c72a3 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h @@ -0,0 +1,138 @@ +/* + * Copyright 2011-2012 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * + * @author Tom Stellard <thomas.stellard@amd.com> + * + */ + + +#ifndef LP_BLD_TGSI_ACTION_H +#define LP_BLD_TGSI_ACTION_H + +#include <llvm-c/Core.h> + +struct lp_build_tgsi_context; + +struct lp_build_emit_data { + /** Arguments that are passed to lp_build_tgsi_action::emit. The + * order of the arguments should be as follows: + * SOA: s0.x, s0.y, s0.z, s0.w, s1.x, s1.y, s1.z, s1.w, s2.x, s2.y, s2.x, s2.w + * AOS: s0.xyzw, s1.xyzw, s2.xyzw + * TEXTURE Instructions: coord.xyzw + * + * Arguments should be packed into the args array. For example an SOA + * instructions that reads s0.x and s1.x args should look like this: + * args[0] = s0.x; + * args[1] = s1.x; + */ + LLVMValueRef args[12]; + + /** + * Number of arguments in the args array. + */ + unsigned arg_count; + + /** + * The type output type of the opcode. This should be set in the + * lp_build_tgsi_action::fetch_args function. + */ + LLVMTypeRef dst_type; + + /** This is used by the lp_build_tgsi_action::fetch_args function to + * determine which channel to read from the opcode arguments. It also + * specifies which index of the output array should be written to by + * the lp_build_tgsi_action::emit function. However, this value is + * usually ignored by any opcodes that are not TGSI_OUTPUT_COMPONENTWISE. + */ + unsigned chan; + + /** The lp_build_tgsi_action::emit 'executes' the opcode and writes the + * results to this array. + */ + LLVMValueRef output[4]; + + /** + * The current instruction that is being 'executed'. + */ + const struct tgsi_full_instruction * inst; + const struct tgsi_opcode_info * info; +}; + +struct lp_build_tgsi_action +{ + + /** + * This function is responsible for doing 2-3 things: + * 1. Fetching the instruction arguments into the emit_data->args array. + * 2. Setting the number of arguments in emit_data->arg_count. + * 3. Setting the destination type in emit_data->dst_type (usually only + * necessary for opcodes that are TGSI_OUTPUT_COMPONENTWISE). + */ + void (*fetch_args)(struct lp_build_tgsi_context *, + struct lp_build_emit_data *); + + + /** + * This function is responsible for emitting LLVM IR for a TGSI opcode. + * It should store the values it generates in the emit_data->output array + * and for TGSI_OUTPUT_COMPONENTWISE and TGSI_OUTPUT_REPLICATE instructions + * (and possibly others depending on the specific implementation), it should + * make sure to store the values in the array slot indexed by emit_data->chan. + */ + void (*emit)(const struct lp_build_tgsi_action *, + struct lp_build_tgsi_context *, + struct lp_build_emit_data *); + + /** + * This variable can be used to store an intrinsic name, in case the TGSI + * opcode will be replaced by a target specific intrinsic. (There is a + * convenience function in lp_bld_tgsi.c called lp_build_tgsi_intrinsic() + * that can be assigned to lp_build_tgsi_action::emit and used for + * generating intrinsics). + */ + const char * intr_name; +}; + +/** + * This function initializes the bld_base->op_actions array with some + * generic operand actions. + */ +void +lp_set_default_actions( + struct lp_build_tgsi_context * bld_base); + +/* + * This function initialize the bld_base->op_actions array with some + * operand actions that are intended only for use when generating + * instructions to be executed on a CPU. + */ +void +lp_set_default_actions_cpu( + struct lp_build_tgsi_context * bld_base); + +#endif /* LP_BLD_TGSI_ACTION_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c index 74b3b75039a..53013f75f2e 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c @@ -55,61 +55,15 @@ #include "lp_bld_flow.h" #include "lp_bld_quad.h" #include "lp_bld_tgsi.h" -#include "lp_bld_limits.h" #include "lp_bld_debug.h" -#define LP_MAX_INSTRUCTIONS 256 - - -struct lp_build_tgsi_aos_context -{ - struct lp_build_context base; - - /* Builder for integer masks and indices */ - struct lp_build_context int_bld; - - /* - * AoS swizzle used: - * - swizzles[0] = red index - * - swizzles[1] = green index - * - swizzles[2] = blue index - * - swizzles[3] = alpha index - */ - unsigned char swizzles[4]; - unsigned char inv_swizzles[4]; - - LLVMValueRef consts_ptr; - const LLVMValueRef *inputs; - LLVMValueRef *outputs; - - struct lp_build_sampler_aos *sampler; - - LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES]; - LLVMValueRef temps[LP_MAX_TGSI_TEMPS]; - LLVMValueRef addr[LP_MAX_TGSI_ADDRS]; - LLVMValueRef preds[LP_MAX_TGSI_PREDS]; - - /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is - * set in the indirect_files field. - * The temps[] array above is unused then. - */ - LLVMValueRef temps_array; - - /** bitmask indicating which register files are accessed indirectly */ - unsigned indirect_files; - - struct tgsi_full_instruction *instructions; - uint max_instructions; -}; - - /** * Wrapper around lp_build_swizzle_aos which translates swizzles to another * ordering. */ static LLVMValueRef -swizzle_aos(struct lp_build_tgsi_aos_context *bld, +swizzle_aos(struct lp_build_tgsi_context *bld_base, LLVMValueRef a, unsigned swizzle_x, unsigned swizzle_y, @@ -117,6 +71,7 @@ swizzle_aos(struct lp_build_tgsi_aos_context *bld, unsigned swizzle_w) { unsigned char swizzles[4]; + struct lp_build_tgsi_aos_context *bld = lp_aos_context(bld_base); assert(swizzle_x < 4); assert(swizzle_y < 4); @@ -128,7 +83,7 @@ swizzle_aos(struct lp_build_tgsi_aos_context *bld, swizzles[bld->inv_swizzles[2]] = bld->swizzles[swizzle_z]; swizzles[bld->inv_swizzles[3]] = bld->swizzles[swizzle_w]; - return lp_build_swizzle_aos(&bld->base, a, swizzles); + return lp_build_swizzle_aos(&bld->bld_base.base, a, swizzles); } @@ -138,149 +93,133 @@ swizzle_scalar_aos(struct lp_build_tgsi_aos_context *bld, unsigned chan) { chan = bld->swizzles[chan]; - return lp_build_swizzle_scalar_aos(&bld->base, a, chan); + return lp_build_swizzle_scalar_aos(&bld->bld_base.base, a, chan); } -/** - * Register fetch. - */ static LLVMValueRef -emit_fetch( - struct lp_build_tgsi_aos_context *bld, - const struct tgsi_full_instruction *inst, - unsigned src_op) +emit_fetch_constant( + struct lp_build_tgsi_context * bld_base, + const struct tgsi_full_src_register * reg, + const unsigned swizzle) { - LLVMBuilderRef builder = bld->base.gallivm->builder; - struct lp_type type = bld->base.type; - const struct tgsi_full_src_register *reg = &inst->Src[src_op]; + struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base); + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + struct lp_type type = bld_base->base.type; LLVMValueRef res; unsigned chan; assert(!reg->Register.Indirect); /* - * Fetch the from the register file. + * Get the constants components */ - switch (reg->Register.File) { - case TGSI_FILE_CONSTANT: - /* - * Get the constants components - */ - - res = bld->base.undef; - for (chan = 0; chan < 4; ++chan) { - LLVMValueRef index; - LLVMValueRef scalar_ptr; - LLVMValueRef scalar; - LLVMValueRef swizzle; - - index = lp_build_const_int32(bld->base.gallivm, reg->Register.Index * 4 + chan); - - scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr, - &index, 1, ""); + res = bld->bld_base.base.undef; + for (chan = 0; chan < 4; ++chan) { + LLVMValueRef index; + LLVMValueRef scalar_ptr; + LLVMValueRef scalar; + LLVMValueRef swizzle; - scalar = LLVMBuildLoad(builder, scalar_ptr, ""); + index = lp_build_const_int32(bld->bld_base.base.gallivm, + reg->Register.Index * 4 + chan); - lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]); + scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr, &index, 1, ""); - /* - * NOTE: constants array is always assumed to be RGBA - */ + scalar = LLVMBuildLoad(builder, scalar_ptr, ""); - swizzle = lp_build_const_int32(bld->base.gallivm, bld->swizzles[chan]); - - res = LLVMBuildInsertElement(builder, res, scalar, swizzle, ""); - } + lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]); /* - * Broadcast the first quaternion to all others. - * - * XXX: could be factored into a reusable function. + * NOTE: constants array is always assumed to be RGBA */ - if (type.length > 4) { - LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; - unsigned i; + swizzle = lp_build_const_int32(bld->bld_base.base.gallivm, + bld->swizzles[chan]); - for (chan = 0; chan < 4; ++chan) { - shuffles[chan] = lp_build_const_int32(bld->base.gallivm, chan); - } - - for (i = 4; i < type.length; ++i) { - shuffles[i] = shuffles[i % 4]; - } + res = LLVMBuildInsertElement(builder, res, scalar, swizzle, ""); + } - res = LLVMBuildShuffleVector(builder, - res, bld->base.undef, - LLVMConstVector(shuffles, type.length), - ""); - } - break; + /* + * Broadcast the first quaternion to all others. + * + * XXX: could be factored into a reusable function. + */ - case TGSI_FILE_IMMEDIATE: - res = bld->immediates[reg->Register.Index]; - assert(res); - break; + if (type.length > 4) { + LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; + unsigned i; - case TGSI_FILE_INPUT: - res = bld->inputs[reg->Register.Index]; - assert(res); - break; - - case TGSI_FILE_TEMPORARY: - { - LLVMValueRef temp_ptr; - temp_ptr = bld->temps[reg->Register.Index]; - res = LLVMBuildLoad(builder, temp_ptr, ""); - if (!res) - return bld->base.undef; + for (chan = 0; chan < 4; ++chan) { + shuffles[chan] = lp_build_const_int32(bld->bld_base.base.gallivm, chan); } - break; - default: - assert(0 && "invalid src register in emit_fetch()"); - return bld->base.undef; - } - - /* - * Apply sign modifier. - */ + for (i = 4; i < type.length; ++i) { + shuffles[i] = shuffles[i % 4]; + } - if (reg->Register.Absolute) { - res = lp_build_abs(&bld->base, res); + res = LLVMBuildShuffleVector(builder, + res, bld->bld_base.base.undef, + LLVMConstVector(shuffles, type.length), + ""); } + return res; +} - if(reg->Register.Negate) { - res = lp_build_negate(&bld->base, res); - } +static LLVMValueRef +emit_fetch_immediate( + struct lp_build_tgsi_context * bld_base, + const struct tgsi_full_src_register * reg, + const unsigned swizzle) +{ + struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base); + LLVMValueRef res = bld->immediates[reg->Register.Index]; + assert(res); + return res; +} - /* - * Swizzle the argument - */ +static LLVMValueRef +emit_fetch_input( + struct lp_build_tgsi_context * bld_base, + const struct tgsi_full_src_register * reg, + const unsigned swizzle) +{ + struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base); + LLVMValueRef res = bld->inputs[reg->Register.Index]; + assert(!reg->Register.Indirect); + assert(res); + return res; +} - res = swizzle_aos(bld, res, - reg->Register.SwizzleX, - reg->Register.SwizzleY, - reg->Register.SwizzleZ, - reg->Register.SwizzleW); +static LLVMValueRef +emit_fetch_temporary( + struct lp_build_tgsi_context * bld_base, + const struct tgsi_full_src_register * reg, + const unsigned swizzle) +{ + struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base); + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + LLVMValueRef temp_ptr = bld->temps[reg->Register.Index]; + LLVMValueRef res = LLVMBuildLoad(builder, temp_ptr, ""); + assert(!reg->Register.Indirect); + if (!res) + return bld->bld_base.base.undef; return res; } - /** * Register store. */ -static void -emit_store( +void +lp_emit_store_aos( struct lp_build_tgsi_aos_context *bld, const struct tgsi_full_instruction *inst, unsigned index, LLVMValueRef value) { - LLVMBuilderRef builder = bld->base.gallivm->builder; + LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; const struct tgsi_full_dst_register *reg = &inst->Dst[index]; LLVMValueRef mask = NULL; LLVMValueRef ptr; @@ -294,13 +233,13 @@ emit_store( break; case TGSI_SAT_ZERO_ONE: - value = lp_build_max(&bld->base, value, bld->base.zero); - value = lp_build_min(&bld->base, value, bld->base.one); + value = lp_build_max(&bld->bld_base.base, value, bld->bld_base.base.zero); + value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one); break; case TGSI_SAT_MINUS_PLUS_ONE: - value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.gallivm, bld->base.type, -1.0)); - value = lp_build_min(&bld->base, value, bld->base.one); + value = lp_build_max(&bld->bld_base.base, value, lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, -1.0)); + value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one); break; default: @@ -335,6 +274,8 @@ emit_store( return; } + if (!ptr) + return; /* * Predicate */ @@ -350,17 +291,17 @@ emit_store( /* * Convert the value to an integer mask. */ - pred = lp_build_compare(bld->base.gallivm, - bld->base.type, + pred = lp_build_compare(bld->bld_base.base.gallivm, + bld->bld_base.base.type, PIPE_FUNC_NOTEQUAL, pred, - bld->base.zero); + bld->bld_base.base.zero); if (inst->Predicate.Negate) { pred = LLVMBuildNot(builder, pred, ""); } - pred = swizzle_aos(bld, pred, + pred = bld->bld_base.emit_swizzle(&bld->bld_base, pred, inst->Predicate.SwizzleX, inst->Predicate.SwizzleY, inst->Predicate.SwizzleZ, @@ -380,7 +321,7 @@ emit_store( if (reg->Register.WriteMask != TGSI_WRITEMASK_XYZW) { LLVMValueRef writemask; - writemask = lp_build_const_mask_aos(bld->base.gallivm, bld->base.type, + writemask = lp_build_const_mask_aos(bld->bld_base.base.gallivm, bld->bld_base.base.type, reg->Register.WriteMask); if (mask) { @@ -394,7 +335,7 @@ emit_store( LLVMValueRef orig_value; orig_value = LLVMBuildLoad(builder, ptr, ""); - value = lp_build_select(&bld->base, + value = lp_build_select(&bld->bld_base.base, mask, value, orig_value); } @@ -419,44 +360,44 @@ emit_tex(struct lp_build_tgsi_aos_context *bld, if (!bld->sampler) { _debug_printf("warning: found texture instruction but no sampler generator supplied\n"); - return bld->base.undef; + return bld->bld_base.base.undef; } target = inst->Texture.Texture; - coords = emit_fetch( bld, inst, 0 ); + coords = lp_build_emit_fetch( &bld->bld_base, inst, 0 , LP_CHAN_ALL); if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) { - ddx = emit_fetch( bld, inst, 1 ); - ddy = emit_fetch( bld, inst, 2 ); + ddx = lp_build_emit_fetch( &bld->bld_base, inst, 1 , LP_CHAN_ALL); + ddy = lp_build_emit_fetch( &bld->bld_base, inst, 2 , LP_CHAN_ALL); unit = inst->Src[3].Register.Index; } else { #if 0 - ddx = lp_build_ddx( &bld->base, coords ); - ddy = lp_build_ddy( &bld->base, coords ); + ddx = lp_build_ddx( &bld->bld_base.base, coords ); + ddy = lp_build_ddy( &bld->bld_base.base, coords ); #else /* TODO */ - ddx = bld->base.one; - ddy = bld->base.one; + ddx = bld->bld_base.base.one; + ddy = bld->bld_base.base.one; #endif unit = inst->Src[1].Register.Index; } return bld->sampler->emit_fetch_texel(bld->sampler, - &bld->base, + &bld->bld_base.base, target, unit, coords, ddx, ddy, modifier); } -static void -emit_declaration( +void +lp_emit_declaration_aos( struct lp_build_tgsi_aos_context *bld, const struct tgsi_full_declaration *decl) { - struct gallivm_state *gallivm = bld->base.gallivm; - LLVMTypeRef vec_type = lp_build_vec_type(bld->base.gallivm, bld->base.type); + struct gallivm_state *gallivm = bld->bld_base.base.gallivm; + LLVMTypeRef vec_type = lp_build_vec_type(bld->bld_base.base.gallivm, bld->bld_base.base.type); unsigned first = decl->Range.First; unsigned last = decl->Range.Last; @@ -468,7 +409,7 @@ emit_declaration( assert(idx < LP_MAX_TGSI_TEMPS); if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) { LLVMValueRef array_size = lp_build_const_int32(gallivm, last + 1); - bld->temps_array = lp_build_array_alloca(bld->base.gallivm, + bld->temps_array = lp_build_array_alloca(bld->bld_base.base.gallivm, vec_type, array_size, ""); } else { bld->temps[idx] = lp_build_alloca(gallivm, vec_type, ""); @@ -501,8 +442,8 @@ emit_declaration( * Emit LLVM for one TGSI instruction. * \param return TRUE for success, FALSE otherwise */ -static boolean -emit_instruction( +boolean +lp_emit_instruction_aos( struct lp_build_tgsi_aos_context *bld, const struct tgsi_full_instruction *inst, const struct tgsi_opcode_info *info, @@ -527,17 +468,17 @@ emit_instruction( assert(info->num_dst <= 1); if (info->num_dst) { - dst0 = bld->base.undef; + dst0 = bld->bld_base.base.undef; } switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ARL: - src0 = emit_fetch(bld, inst, 0); - dst0 = lp_build_floor(&bld->base, src0); + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); + dst0 = lp_build_floor(&bld->bld_base.base, src0); break; case TGSI_OPCODE_MOV: - dst0 = emit_fetch(bld, inst, 0); + dst0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); break; case TGSI_OPCODE_LIT: @@ -545,15 +486,15 @@ emit_instruction( case TGSI_OPCODE_RCP: /* TGSI_OPCODE_RECIP */ - src0 = emit_fetch(bld, inst, 0); - dst0 = lp_build_rcp(&bld->base, src0); + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); + dst0 = lp_build_rcp(&bld->bld_base.base, src0); break; case TGSI_OPCODE_RSQ: /* TGSI_OPCODE_RECIPSQRT */ - src0 = emit_fetch(bld, inst, 0); - tmp0 = lp_build_abs(&bld->base, src0); - dst0 = lp_build_rsqrt(&bld->base, tmp0); + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); + tmp0 = lp_build_emit_llvm_unary(&bld->bld_base, TGSI_OPCODE_ABS, src0); + dst0 = lp_build_rsqrt(&bld->bld_base.base, tmp0); break; case TGSI_OPCODE_EXP: @@ -563,15 +504,15 @@ emit_instruction( return FALSE; case TGSI_OPCODE_MUL: - src0 = emit_fetch(bld, inst, 0); - src1 = emit_fetch(bld, inst, 1); - dst0 = lp_build_mul(&bld->base, src0, src1); + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); + src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); + dst0 = lp_build_mul(&bld->bld_base.base, src0, src1); break; case TGSI_OPCODE_ADD: - src0 = emit_fetch(bld, inst, 0); - src1 = emit_fetch(bld, inst, 1); - dst0 = lp_build_add(&bld->base, src0, src1); + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); + src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); + dst0 = lp_build_add(&bld->bld_base.base, src0, src1); break; case TGSI_OPCODE_DP3: @@ -586,121 +527,116 @@ emit_instruction( return FALSE; case TGSI_OPCODE_MIN: - src0 = emit_fetch(bld, inst, 0); - src1 = emit_fetch(bld, inst, 1); - dst0 = lp_build_max(&bld->base, src0, src1); + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); + src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); + dst0 = lp_build_max(&bld->bld_base.base, src0, src1); break; case TGSI_OPCODE_MAX: - src0 = emit_fetch(bld, inst, 0); - src1 = emit_fetch(bld, inst, 1); - dst0 = lp_build_max(&bld->base, src0, src1); + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); + src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); + dst0 = lp_build_max(&bld->bld_base.base, src0, src1); break; case TGSI_OPCODE_SLT: /* TGSI_OPCODE_SETLT */ - src0 = emit_fetch(bld, inst, 0); - src1 = emit_fetch(bld, inst, 1); - tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LESS, src0, src1); - dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero); + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); + src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); + tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, src1); + dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero); break; case TGSI_OPCODE_SGE: /* TGSI_OPCODE_SETGE */ - src0 = emit_fetch(bld, inst, 0); - src1 = emit_fetch(bld, inst, 1); - tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, src0, src1); - dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero); + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); + src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); + tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, src0, src1); + dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero); break; case TGSI_OPCODE_MAD: /* TGSI_OPCODE_MADD */ - src0 = emit_fetch(bld, inst, 0); - src1 = emit_fetch(bld, inst, 1); - src2 = emit_fetch(bld, inst, 2); - tmp0 = lp_build_mul(&bld->base, src0, src1); - dst0 = lp_build_add(&bld->base, tmp0, src2); + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); + src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); + src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL); + tmp0 = lp_build_mul(&bld->bld_base.base, src0, src1); + dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2); break; case TGSI_OPCODE_SUB: - src0 = emit_fetch(bld, inst, 0); - src1 = emit_fetch(bld, inst, 1); - dst0 = lp_build_sub(&bld->base, src0, src1); + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); + src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); + dst0 = lp_build_sub(&bld->bld_base.base, src0, src1); break; case TGSI_OPCODE_LRP: - src0 = emit_fetch(bld, inst, 0); - src1 = emit_fetch(bld, inst, 1); - src2 = emit_fetch(bld, inst, 2); - tmp0 = lp_build_sub(&bld->base, src1, src2); - tmp0 = lp_build_mul(&bld->base, src0, tmp0); - dst0 = lp_build_add(&bld->base, tmp0, src2); + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); + src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); + src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL); + tmp0 = lp_build_sub(&bld->bld_base.base, src1, src2); + tmp0 = lp_build_mul(&bld->bld_base.base, src0, tmp0); + dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2); break; case TGSI_OPCODE_CND: - src0 = emit_fetch(bld, inst, 0); - src1 = emit_fetch(bld, inst, 1); - src2 = emit_fetch(bld, inst, 2); - tmp1 = lp_build_const_vec(bld->base.gallivm, bld->base.type, 0.5); - tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, src2, tmp1); - dst0 = lp_build_select(&bld->base, tmp0, src0, src1); + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); + src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); + src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL); + tmp1 = lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, 0.5); + tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER, src2, tmp1); + dst0 = lp_build_select(&bld->bld_base.base, tmp0, src0, src1); break; case TGSI_OPCODE_DP2A: return FALSE; case TGSI_OPCODE_FRC: - src0 = emit_fetch(bld, inst, 0); - tmp0 = lp_build_floor(&bld->base, src0); - dst0 = lp_build_sub(&bld->base, src0, tmp0); + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); + tmp0 = lp_build_floor(&bld->bld_base.base, src0); + dst0 = lp_build_sub(&bld->bld_base.base, src0, tmp0); break; case TGSI_OPCODE_CLAMP: - src0 = emit_fetch(bld, inst, 0); - src1 = emit_fetch(bld, inst, 1); - src2 = emit_fetch(bld, inst, 2); - tmp0 = lp_build_max(&bld->base, src0, src1); - dst0 = lp_build_min(&bld->base, tmp0, src2); + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); + src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); + src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL); + tmp0 = lp_build_max(&bld->bld_base.base, src0, src1); + dst0 = lp_build_min(&bld->bld_base.base, tmp0, src2); break; case TGSI_OPCODE_FLR: - src0 = emit_fetch(bld, inst, 0); - dst0 = lp_build_floor(&bld->base, src0); + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); + dst0 = lp_build_floor(&bld->bld_base.base, src0); break; case TGSI_OPCODE_ROUND: - src0 = emit_fetch(bld, inst, 0); - dst0 = lp_build_round(&bld->base, src0); + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); + dst0 = lp_build_round(&bld->bld_base.base, src0); break; case TGSI_OPCODE_EX2: - src0 = emit_fetch(bld, inst, 0); - tmp0 = lp_build_swizzle_scalar_aos(&bld->base, src0, TGSI_SWIZZLE_X); - dst0 = lp_build_exp2(&bld->base, tmp0); + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); + tmp0 = lp_build_swizzle_scalar_aos(&bld->bld_base.base, src0, TGSI_SWIZZLE_X); + dst0 = lp_build_exp2(&bld->bld_base.base, tmp0); break; case TGSI_OPCODE_LG2: - src0 = emit_fetch(bld, inst, 0); + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X); - dst0 = lp_build_log2(&bld->base, tmp0); + dst0 = lp_build_log2(&bld->bld_base.base, tmp0); break; case TGSI_OPCODE_POW: - src0 = emit_fetch(bld, inst, 0); + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); src0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X); - src1 = emit_fetch(bld, inst, 1); + src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); src1 = swizzle_scalar_aos(bld, src1, TGSI_SWIZZLE_X); - dst0 = lp_build_pow(&bld->base, src0, src1); + dst0 = lp_build_pow(&bld->bld_base.base, src0, src1); break; case TGSI_OPCODE_XPD: return FALSE; - case TGSI_OPCODE_ABS: - src0 = emit_fetch(bld, inst, 0); - dst0 = lp_build_abs(&bld->base, src0); - break; - case TGSI_OPCODE_RCC: /* deprecated? */ assert(0); @@ -710,9 +646,9 @@ emit_instruction( return FALSE; case TGSI_OPCODE_COS: - src0 = emit_fetch(bld, inst, 0); + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X); - dst0 = lp_build_cos(&bld->base, tmp0); + dst0 = lp_build_cos(&bld->bld_base.base, tmp0); break; case TGSI_OPCODE_DDX: @@ -748,45 +684,45 @@ emit_instruction( return FALSE; case TGSI_OPCODE_SEQ: - src0 = emit_fetch(bld, inst, 0); - src1 = emit_fetch(bld, inst, 1); - tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_EQUAL, src0, src1); - dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero); + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); + src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); + tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_EQUAL, src0, src1); + dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero); break; case TGSI_OPCODE_SFL: - dst0 = bld->base.zero; + dst0 = bld->bld_base.base.zero; break; case TGSI_OPCODE_SGT: - src0 = emit_fetch(bld, inst, 0); - src1 = emit_fetch(bld, inst, 1); - tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, src0, src1); - dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero); + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); + src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); + tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER, src0, src1); + dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero); break; case TGSI_OPCODE_SIN: - src0 = emit_fetch(bld, inst, 0); + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X); - dst0 = lp_build_sin(&bld->base, tmp0); + dst0 = lp_build_sin(&bld->bld_base.base, tmp0); break; case TGSI_OPCODE_SLE: - src0 = emit_fetch(bld, inst, 0); - src1 = emit_fetch(bld, inst, 1); - tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LEQUAL, src0, src1); - dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero); + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); + src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); + tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LEQUAL, src0, src1); + dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero); break; case TGSI_OPCODE_SNE: - src0 = emit_fetch(bld, inst, 0); - src1 = emit_fetch(bld, inst, 1); - tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL, src0, src1); - dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero); + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); + src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); + tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_NOTEQUAL, src0, src1); + dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero); break; case TGSI_OPCODE_STR: - dst0 = bld->base.one; + dst0 = bld->bld_base.base.one; break; case TGSI_OPCODE_TEX: @@ -834,8 +770,8 @@ emit_instruction( break; case TGSI_OPCODE_ARR: - src0 = emit_fetch(bld, inst, 0); - dst0 = lp_build_round(&bld->base, src0); + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); + dst0 = lp_build_round(&bld->bld_base.base, src0); break; case TGSI_OPCODE_BRA: @@ -856,16 +792,16 @@ emit_instruction( case TGSI_OPCODE_SSG: /* TGSI_OPCODE_SGN */ - tmp0 = emit_fetch(bld, inst, 0); - dst0 = lp_build_sgn(&bld->base, tmp0); + tmp0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); + dst0 = lp_build_sgn(&bld->bld_base.base, tmp0); break; case TGSI_OPCODE_CMP: - src0 = emit_fetch(bld, inst, 0); - src1 = emit_fetch(bld, inst, 1); - src2 = emit_fetch(bld, inst, 2); - tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LESS, src0, bld->base.zero); - dst0 = lp_build_select(&bld->base, tmp0, src1, src2); + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); + src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); + src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL); + tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, bld->bld_base.base.zero); + dst0 = lp_build_select(&bld->bld_base.base, tmp0, src1, src2); break; case TGSI_OPCODE_SCS: @@ -934,8 +870,8 @@ emit_instruction( break; case TGSI_OPCODE_CEIL: - src0 = emit_fetch(bld, inst, 0); - dst0 = lp_build_ceil(&bld->base, src0); + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); + dst0 = lp_build_ceil(&bld->bld_base.base, src0); break; case TGSI_OPCODE_I2F: @@ -951,8 +887,8 @@ emit_instruction( break; case TGSI_OPCODE_TRUNC: - src0 = emit_fetch(bld, inst, 0); - dst0 = lp_build_trunc(&bld->base, src0); + src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); + dst0 = lp_build_trunc(&bld->bld_base.base, src0); break; case TGSI_OPCODE_SHL: @@ -1028,7 +964,7 @@ emit_instruction( } if (info->num_dst) { - emit_store(bld, inst, 0, dst0); + lp_emit_store_aos(bld, inst, 0, dst0); } return TRUE; @@ -1049,13 +985,12 @@ lp_build_tgsi_aos(struct gallivm_state *gallivm, struct lp_build_tgsi_aos_context bld; struct tgsi_parse_context parse; uint num_immediates = 0; - uint num_instructions = 0; unsigned chan; int pc = 0; /* Setup build context */ memset(&bld, 0, sizeof bld); - lp_build_context_init(&bld.base, gallivm, type); + lp_build_context_init(&bld.bld_base.base, gallivm, type); lp_build_context_init(&bld.int_bld, gallivm, lp_int_type(type)); for (chan = 0; chan < 4; ++chan) { @@ -1068,11 +1003,18 @@ lp_build_tgsi_aos(struct gallivm_state *gallivm, bld.consts_ptr = consts_ptr; bld.sampler = sampler; bld.indirect_files = info->indirect_files; - bld.instructions = (struct tgsi_full_instruction *) - MALLOC(LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction)); - bld.max_instructions = LP_MAX_INSTRUCTIONS; + bld.bld_base.emit_swizzle = swizzle_aos; + bld.bld_base.info = info; + + bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant; + bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate; + bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input; + bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary; - if (!bld.instructions) { + /* Set opcode actions */ + lp_set_default_actions_cpu(&bld.bld_base); + + if (!lp_bld_tgsi_list_init(&bld.bld_base)) { return; } @@ -1084,33 +1026,13 @@ lp_build_tgsi_aos(struct gallivm_state *gallivm, switch(parse.FullToken.Token.Type) { case TGSI_TOKEN_TYPE_DECLARATION: /* Inputs already interpolated */ - emit_declaration(&bld, &parse.FullToken.FullDeclaration); + lp_emit_declaration_aos(&bld, &parse.FullToken.FullDeclaration); break; case TGSI_TOKEN_TYPE_INSTRUCTION: - { - /* save expanded instruction */ - if (num_instructions == bld.max_instructions) { - struct tgsi_full_instruction *instructions; - instructions = REALLOC(bld.instructions, - bld.max_instructions - * sizeof(struct tgsi_full_instruction), - (bld.max_instructions + LP_MAX_INSTRUCTIONS) - * sizeof(struct tgsi_full_instruction)); - if (!instructions) { - break; - } - bld.instructions = instructions; - bld.max_instructions += LP_MAX_INSTRUCTIONS; - } - - memcpy(bld.instructions + num_instructions, - &parse.FullToken.FullInstruction, - sizeof(bld.instructions[0])); - - num_instructions++; - } - + /* save expanded instruction */ + lp_bld_tgsi_add_instruction(&bld.bld_base, + &parse.FullToken.FullInstruction); break; case TGSI_TOKEN_TYPE_IMMEDIATE: @@ -1144,10 +1066,10 @@ lp_build_tgsi_aos(struct gallivm_state *gallivm, } while (pc != -1) { - struct tgsi_full_instruction *instr = bld.instructions + pc; + struct tgsi_full_instruction *instr = bld.bld_base.instructions + pc; const struct tgsi_opcode_info *opcode_info = tgsi_get_opcode_info(instr->Instruction.Opcode); - if (!emit_instruction(&bld, instr, opcode_info, &pc)) + if (!lp_emit_instruction_aos(&bld, instr, opcode_info, &pc)) _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n", opcode_info->mnemonic); } @@ -1168,6 +1090,5 @@ lp_build_tgsi_aos(struct gallivm_state *gallivm, LLVMDumpModule(module); } - FREE(bld.instructions); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index 1ad0b74b00e..2be41950853 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -47,6 +47,7 @@ #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" #include "tgsi/tgsi_scan.h" +#include "lp_bld_tgsi_action.h" #include "lp_bld_type.h" #include "lp_bld_const.h" #include "lp_bld_arit.h" @@ -63,97 +64,6 @@ #include "lp_bld_printf.h" -#define NUM_CHANNELS 4 - -#define LP_MAX_INSTRUCTIONS 256 - - -struct lp_exec_mask { - struct lp_build_context *bld; - - boolean has_mask; - - LLVMTypeRef int_vec_type; - - LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING]; - int cond_stack_size; - LLVMValueRef cond_mask; - - LLVMBasicBlockRef loop_block; - LLVMValueRef cont_mask; - LLVMValueRef break_mask; - LLVMValueRef break_var; - struct { - LLVMBasicBlockRef loop_block; - LLVMValueRef cont_mask; - LLVMValueRef break_mask; - LLVMValueRef break_var; - } loop_stack[LP_MAX_TGSI_NESTING]; - int loop_stack_size; - - LLVMValueRef ret_mask; - struct { - int pc; - LLVMValueRef ret_mask; - } call_stack[LP_MAX_TGSI_NESTING]; - int call_stack_size; - - LLVMValueRef exec_mask; -}; - -struct lp_build_tgsi_soa_context -{ - struct lp_build_context base; - - /* Builder for vector integer masks and indices */ - struct lp_build_context uint_bld; - - /* Builder for scalar elements of shader's data type (float) */ - struct lp_build_context elem_bld; - - LLVMValueRef consts_ptr; - const LLVMValueRef *pos; - const LLVMValueRef (*inputs)[NUM_CHANNELS]; - LLVMValueRef (*outputs)[NUM_CHANNELS]; - - const struct lp_build_sampler_soa *sampler; - - LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS]; - LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS]; - LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS]; - LLVMValueRef preds[LP_MAX_TGSI_PREDS][NUM_CHANNELS]; - - /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is - * set in the indirect_files field. - * The temps[] array above is unused then. - */ - LLVMValueRef temps_array; - - /* We allocate/use this array of output if (1 << TGSI_FILE_OUTPUT) is - * set in the indirect_files field. - * The outputs[] array above is unused then. - */ - LLVMValueRef outputs_array; - - /* We allocate/use this array of inputs if (1 << TGSI_FILE_INPUT) is - * set in the indirect_files field. - * The inputs[] array above is unused then. - */ - LLVMValueRef inputs_array; - - LLVMValueRef system_values_array; - - const struct tgsi_shader_info *info; - /** bitmask indicating which register files are accessed indirectly */ - unsigned indirect_files; - - struct lp_build_mask_context *mask; - struct lp_exec_mask exec_mask; - - struct tgsi_full_instruction *instructions; - uint max_instructions; -}; - static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld) { mask->bld = bld; @@ -438,15 +348,15 @@ static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc) * \param index which temporary register * \param chan which channel of the temp register. */ -static LLVMValueRef -get_temp_ptr(struct lp_build_tgsi_soa_context *bld, +LLVMValueRef +lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld, unsigned index, unsigned chan) { - LLVMBuilderRef builder = bld->base.gallivm->builder; + LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; assert(chan < 4); if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) { - LLVMValueRef lindex = lp_build_const_int32(bld->base.gallivm, index * 4 + chan); + LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan); return LLVMBuildGEP(builder, bld->temps_array, &lindex, 1, ""); } else { @@ -460,15 +370,15 @@ get_temp_ptr(struct lp_build_tgsi_soa_context *bld, * \param index which output register * \param chan which channel of the output register. */ -static LLVMValueRef -get_output_ptr(struct lp_build_tgsi_soa_context *bld, +LLVMValueRef +lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld, unsigned index, unsigned chan) { - LLVMBuilderRef builder = bld->base.gallivm->builder; + LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; assert(chan < 4); if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) { - LLVMValueRef lindex = lp_build_const_int32(bld->base.gallivm, + LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan); return LLVMBuildGEP(builder, bld->outputs_array, &lindex, 1, ""); } @@ -487,15 +397,15 @@ build_gather(struct lp_build_tgsi_soa_context *bld, LLVMValueRef base_ptr, LLVMValueRef indexes) { - LLVMBuilderRef builder = bld->base.gallivm->builder; - LLVMValueRef res = bld->base.undef; + LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; + LLVMValueRef res = bld->bld_base.base.undef; unsigned i; /* * Loop over elements of index_vec, load scalar value, insert it into 'res'. */ - for (i = 0; i < bld->base.type.length; i++) { - LLVMValueRef ii = lp_build_const_int32(bld->base.gallivm, i); + for (i = 0; i < bld->bld_base.base.type.length; i++) { + LLVMValueRef ii = lp_build_const_int32(bld->bld_base.base.gallivm, i); LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, ""); LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, @@ -520,7 +430,7 @@ emit_mask_scatter(struct lp_build_tgsi_soa_context *bld, struct lp_exec_mask *mask, LLVMValueRef pred) { - struct gallivm_state *gallivm = bld->base.gallivm; + struct gallivm_state *gallivm = bld->bld_base.base.gallivm; LLVMBuilderRef builder = gallivm->builder; unsigned i; @@ -537,7 +447,7 @@ emit_mask_scatter(struct lp_build_tgsi_soa_context *bld, /* * Loop over elements of index_vec, store scalar value. */ - for (i = 0; i < bld->base.type.length; i++) { + for (i = 0; i < bld->bld_base.base.type.length; i++) { LLVMValueRef ii = lp_build_const_int32(gallivm, i); LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, ""); LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr"); @@ -573,7 +483,7 @@ get_indirect_index(struct lp_build_tgsi_soa_context *bld, unsigned reg_file, unsigned reg_index, const struct tgsi_src_register *indirect_reg) { - LLVMBuilderRef builder = bld->base.gallivm->builder; + LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; struct lp_build_context *uint_bld = &bld->uint_bld; /* always use X component of address register */ unsigned swizzle = indirect_reg->SwizzleX; @@ -584,7 +494,7 @@ get_indirect_index(struct lp_build_tgsi_soa_context *bld, assert(bld->indirect_files & (1 << reg_file)); - base = lp_build_const_int_vec(bld->base.gallivm, uint_bld->type, reg_index); + base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index); assert(swizzle < 4); rel = LLVMBuildLoad(builder, @@ -598,9 +508,9 @@ get_indirect_index(struct lp_build_tgsi_soa_context *bld, index = lp_build_add(uint_bld, base, rel); - max_index = lp_build_const_int_vec(bld->base.gallivm, + max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, - bld->info->file_max[reg_file]); + bld->bld_base.info->file_max[reg_file]); assert(!uint_bld->type.sign); index = lp_build_min(uint_bld, index, max_index); @@ -608,176 +518,198 @@ get_indirect_index(struct lp_build_tgsi_soa_context *bld, return index; } - -/** - * Register fetch. - */ static LLVMValueRef -emit_fetch( - struct lp_build_tgsi_soa_context *bld, - const struct tgsi_full_instruction *inst, - unsigned src_op, - const unsigned chan_index ) +emit_fetch_constant( + struct lp_build_tgsi_context * bld_base, + const struct tgsi_full_src_register * reg, + const unsigned swizzle) { - struct gallivm_state *gallivm = bld->base.gallivm; + struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); + struct gallivm_state *gallivm = bld_base->base.gallivm; LLVMBuilderRef builder = gallivm->builder; struct lp_build_context *uint_bld = &bld->uint_bld; - const struct tgsi_full_src_register *reg = &inst->Src[src_op]; - const unsigned swizzle = - tgsi_util_get_full_src_register_swizzle(reg, chan_index); - LLVMValueRef res; LLVMValueRef indirect_index = NULL; - if (swizzle > 3) { - assert(0 && "invalid swizzle in emit_fetch()"); - return bld->base.undef; - } + /* XXX: Handle fetching xyzw components as a vector */ + assert(swizzle != ~0); if (reg->Register.Indirect) { indirect_index = get_indirect_index(bld, reg->Register.File, reg->Register.Index, ®->Indirect); - } else { - assert(reg->Register.Index <= bld->info->file_max[reg->Register.File]); } - switch (reg->Register.File) { - case TGSI_FILE_CONSTANT: - if (reg->Register.Indirect) { - LLVMValueRef swizzle_vec = - lp_build_const_int_vec(bld->base.gallivm, uint_bld->type, swizzle); - LLVMValueRef index_vec; /* index into the const buffer */ + if (reg->Register.Indirect) { + LLVMValueRef swizzle_vec = + lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, swizzle); + LLVMValueRef index_vec; /* index into the const buffer */ - /* index_vec = indirect_index * 4 + swizzle */ - index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); - index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec); + /* index_vec = indirect_index * 4 + swizzle */ + index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); + index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec); - /* Gather values from the constant buffer */ - res = build_gather(bld, bld->consts_ptr, index_vec); - } - else { - LLVMValueRef index; /* index into the const buffer */ - LLVMValueRef scalar, scalar_ptr; + /* Gather values from the constant buffer */ + return build_gather(bld, bld->consts_ptr, index_vec); + } + else { + LLVMValueRef index; /* index into the const buffer */ + LLVMValueRef scalar, scalar_ptr; - index = lp_build_const_int32(gallivm, reg->Register.Index*4 + swizzle); + index = lp_build_const_int32(gallivm, reg->Register.Index*4 + swizzle); - scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr, + scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr, &index, 1, ""); - scalar = LLVMBuildLoad(builder, scalar_ptr, ""); + scalar = LLVMBuildLoad(builder, scalar_ptr, ""); - res = lp_build_broadcast_scalar(&bld->base, scalar); - } - break; + return lp_build_broadcast_scalar(&bld->bld_base.base, scalar); + } +} - case TGSI_FILE_IMMEDIATE: - res = bld->immediates[reg->Register.Index][swizzle]; - assert(res); - break; +static LLVMValueRef +emit_fetch_immediate( + struct lp_build_tgsi_context * bld_base, + const struct tgsi_full_src_register * reg, + const unsigned swizzle) +{ + struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); + LLVMValueRef res = bld->immediates[reg->Register.Index][swizzle]; + assert(res); + return res; +} - case TGSI_FILE_INPUT: - if (reg->Register.Indirect) { - LLVMValueRef swizzle_vec = - lp_build_const_int_vec(gallivm, uint_bld->type, swizzle); - LLVMValueRef length_vec = - lp_build_const_int_vec(gallivm, uint_bld->type, bld->base.type.length); - LLVMValueRef index_vec; /* index into the const buffer */ - LLVMValueRef inputs_array; - LLVMTypeRef float4_ptr_type; +static LLVMValueRef +emit_fetch_input( + struct lp_build_tgsi_context * bld_base, + const struct tgsi_full_src_register * reg, + const unsigned swizzle) +{ + struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); + struct gallivm_state *gallivm = bld->bld_base.base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + struct lp_build_context *uint_bld = &bld->uint_bld; + LLVMValueRef indirect_index = NULL; + LLVMValueRef res; - /* index_vec = (indirect_index * 4 + swizzle) * length */ - index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); - index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec); - index_vec = lp_build_mul(uint_bld, index_vec, length_vec); + if (reg->Register.Indirect) { + indirect_index = get_indirect_index(bld, + reg->Register.File, + reg->Register.Index, + ®->Indirect); + } - /* cast inputs_array pointer to float* */ - float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); - inputs_array = LLVMBuildBitCast(builder, bld->inputs_array, + if (reg->Register.Indirect) { + LLVMValueRef swizzle_vec = + lp_build_const_int_vec(gallivm, uint_bld->type, swizzle); + LLVMValueRef length_vec = + lp_build_const_int_vec(gallivm, uint_bld->type, bld->bld_base.base.type.length); + LLVMValueRef index_vec; /* index into the const buffer */ + LLVMValueRef inputs_array; + LLVMTypeRef float4_ptr_type; + + /* index_vec = (indirect_index * 4 + swizzle) * length */ + index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); + index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec); + index_vec = lp_build_mul(uint_bld, index_vec, length_vec); + + /* cast inputs_array pointer to float* */ + float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); + inputs_array = LLVMBuildBitCast(builder, bld->inputs_array, float4_ptr_type, ""); - /* Gather values from the temporary register array */ - res = build_gather(bld, inputs_array, index_vec); - } else { - if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) { - LLVMValueRef lindex = lp_build_const_int32(gallivm, - reg->Register.Index * 4 + swizzle); - LLVMValueRef input_ptr = LLVMBuildGEP(builder, - bld->inputs_array, &lindex, 1, ""); - res = LLVMBuildLoad(builder, input_ptr, ""); - } - else { - res = bld->inputs[reg->Register.Index][swizzle]; - } - } - assert(res); - break; - - case TGSI_FILE_TEMPORARY: - if (reg->Register.Indirect) { - LLVMValueRef swizzle_vec = - lp_build_const_int_vec(bld->base.gallivm, uint_bld->type, swizzle); - LLVMValueRef length_vec = - lp_build_const_int_vec(bld->base.gallivm, uint_bld->type, - bld->base.type.length); - LLVMValueRef index_vec; /* index into the const buffer */ - LLVMValueRef temps_array; - LLVMTypeRef float4_ptr_type; - - /* index_vec = (indirect_index * 4 + swizzle) * length */ - index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); - index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec); - index_vec = lp_build_mul(uint_bld, index_vec, length_vec); - - /* cast temps_array pointer to float* */ - float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(bld->base.gallivm->context), 0); - temps_array = LLVMBuildBitCast(builder, bld->temps_array, - float4_ptr_type, ""); - - /* Gather values from the temporary register array */ - res = build_gather(bld, temps_array, index_vec); + /* Gather values from the temporary register array */ + res = build_gather(bld, inputs_array, index_vec); + } else { + if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) { + LLVMValueRef lindex = lp_build_const_int32(gallivm, + reg->Register.Index * 4 + swizzle); + LLVMValueRef input_ptr = LLVMBuildGEP(builder, + bld->inputs_array, &lindex, 1, ""); + res = LLVMBuildLoad(builder, input_ptr, ""); } else { - LLVMValueRef temp_ptr; - temp_ptr = get_temp_ptr(bld, reg->Register.Index, swizzle); - res = LLVMBuildLoad(builder, temp_ptr, ""); - if (!res) - return bld->base.undef; + res = bld->inputs[reg->Register.Index][swizzle]; } - break; - - case TGSI_FILE_SYSTEM_VALUE: - assert(!reg->Register.Indirect); - { - LLVMValueRef index; /* index into the system value array */ - LLVMValueRef scalar, scalar_ptr; - - index = lp_build_const_int32(gallivm, - reg->Register.Index * 4 + swizzle); - - scalar_ptr = LLVMBuildGEP(builder, bld->system_values_array, - &index, 1, ""); - scalar = LLVMBuildLoad(builder, scalar_ptr, ""); + } + assert(res); + return res; +} - res = lp_build_broadcast_scalar(&bld->base, scalar); - } - break; +static LLVMValueRef +emit_fetch_temporary( + struct lp_build_tgsi_context * bld_base, + const struct tgsi_full_src_register * reg, + const unsigned swizzle) +{ + struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); + struct gallivm_state *gallivm = bld->bld_base.base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + struct lp_build_context *uint_bld = &bld->uint_bld; + LLVMValueRef indirect_index = NULL; + LLVMValueRef res; - default: - assert(0 && "invalid src register in emit_fetch()"); - return bld->base.undef; + if (reg->Register.Indirect) { + indirect_index = get_indirect_index(bld, + reg->Register.File, + reg->Register.Index, + ®->Indirect); } - if (reg->Register.Absolute) { - res = lp_build_abs( &bld->base, res ); + if (reg->Register.Indirect) { + LLVMValueRef swizzle_vec = + lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, swizzle); + LLVMValueRef length_vec = + lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, + bld->bld_base.base.type.length); + LLVMValueRef index_vec; /* index into the const buffer */ + LLVMValueRef temps_array; + LLVMTypeRef float4_ptr_type; + + /* index_vec = (indirect_index * 4 + swizzle) * length */ + index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); + index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec); + index_vec = lp_build_mul(uint_bld, index_vec, length_vec); + + /* cast temps_array pointer to float* */ + float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(bld->bld_base.base.gallivm->context), 0); + temps_array = LLVMBuildBitCast(builder, bld->temps_array, + float4_ptr_type, ""); + + /* Gather values from the temporary register array */ + res = build_gather(bld, temps_array, index_vec); } - - if (reg->Register.Negate) { - res = lp_build_negate( &bld->base, res ); + else { + LLVMValueRef temp_ptr; + temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle); + res = LLVMBuildLoad(builder, temp_ptr, ""); + if (!res) + return bld->bld_base.base.undef; } return res; } +static LLVMValueRef +emit_fetch_system_value( + struct lp_build_tgsi_context * bld_base, + const struct tgsi_full_src_register * reg, + const unsigned swizzle) +{ + struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); + struct gallivm_state *gallivm = bld->bld_base.base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef index; /* index into the system value array */ + LLVMValueRef scalar, scalar_ptr; + + assert(!reg->Register.Indirect); + + index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle); + + scalar_ptr = LLVMBuildGEP(builder, bld->system_values_array, &index, 1, ""); + scalar = LLVMBuildLoad(builder, scalar_ptr, ""); + + return lp_build_broadcast_scalar(&bld->bld_base.base, scalar); +} /** * Register fetch with derivatives. @@ -785,27 +717,21 @@ emit_fetch( static void emit_fetch_deriv( struct lp_build_tgsi_soa_context *bld, - const struct tgsi_full_instruction *inst, - unsigned index, - const unsigned chan_index, + LLVMValueRef src, LLVMValueRef *res, LLVMValueRef *ddx, LLVMValueRef *ddy) { - LLVMValueRef src; - - src = emit_fetch(bld, inst, index, chan_index); - if(res) *res = src; /* TODO: use interpolation coeffs for inputs */ if(ddx) - *ddx = lp_build_ddx(&bld->base, src); + *ddx = lp_build_ddx(&bld->bld_base.base, src); if(ddy) - *ddy = lp_build_ddy(&bld->base, src); + *ddy = lp_build_ddy(&bld->bld_base.base, src); } @@ -818,7 +744,7 @@ emit_fetch_predicate( const struct tgsi_full_instruction *inst, LLVMValueRef *pred) { - LLVMBuilderRef builder = bld->base.gallivm->builder; + LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; unsigned index; unsigned char swizzles[4]; LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL}; @@ -858,11 +784,11 @@ emit_fetch_predicate( * is needlessly causing two comparisons due to storing the intermediate * result as float vector instead of an integer mask vector. */ - value = lp_build_compare(bld->base.gallivm, - bld->base.type, + value = lp_build_compare(bld->bld_base.base.gallivm, + bld->bld_base.base.type, PIPE_FUNC_NOTEQUAL, value, - bld->base.zero); + bld->bld_base.base.zero); if (inst->Predicate.Negate) { value = LLVMBuildNot(builder, value, ""); } @@ -881,15 +807,16 @@ emit_fetch_predicate( * Register store. */ static void -emit_store( - struct lp_build_tgsi_soa_context *bld, +emit_store_chan( + struct lp_build_tgsi_context *bld_base, const struct tgsi_full_instruction *inst, unsigned index, unsigned chan_index, LLVMValueRef pred, LLVMValueRef value) { - struct gallivm_state *gallivm = bld->base.gallivm; + struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); + struct gallivm_state *gallivm = bld->bld_base.base.gallivm; LLVMBuilderRef builder = gallivm->builder; const struct tgsi_full_dst_register *reg = &inst->Dst[index]; struct lp_build_context *uint_bld = &bld->uint_bld; @@ -900,13 +827,13 @@ emit_store( break; case TGSI_SAT_ZERO_ONE: - value = lp_build_max(&bld->base, value, bld->base.zero); - value = lp_build_min(&bld->base, value, bld->base.one); + value = lp_build_max(&bld->bld_base.base, value, bld->bld_base.base.zero); + value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one); break; case TGSI_SAT_MINUS_PLUS_ONE: - value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.gallivm, bld->base.type, -1.0)); - value = lp_build_min(&bld->base, value, bld->base.one); + value = lp_build_max(&bld->bld_base.base, value, lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, -1.0)); + value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one); break; default: @@ -919,7 +846,8 @@ emit_store( reg->Register.Index, ®->Indirect); } else { - assert(reg->Register.Index <= bld->info->file_max[reg->Register.File]); + assert(reg->Register.Index <= + bld->bld_base.info->file_max[reg->Register.File]); } switch( reg->Register.File ) { @@ -928,7 +856,7 @@ emit_store( LLVMValueRef chan_vec = lp_build_const_int_vec(gallivm, uint_bld->type, chan_index); LLVMValueRef length_vec = - lp_build_const_int_vec(gallivm, uint_bld->type, bld->base.type.length); + lp_build_const_int_vec(gallivm, uint_bld->type, bld->bld_base.base.type.length); LLVMValueRef index_vec; /* indexes into the temp registers */ LLVMValueRef outputs_array; LLVMValueRef pixel_offsets; @@ -937,7 +865,7 @@ emit_store( /* build pixel offset vector: {0, 1, 2, 3, ...} */ pixel_offsets = uint_bld->undef; - for (i = 0; i < bld->base.type.length; i++) { + for (i = 0; i < bld->bld_base.base.type.length; i++) { LLVMValueRef ii = lp_build_const_int32(gallivm, i); pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets, ii, ii, ""); @@ -959,7 +887,7 @@ emit_store( &bld->exec_mask, pred); } else { - LLVMValueRef out_ptr = get_output_ptr(bld, reg->Register.Index, + LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index, chan_index); lp_exec_mask_store(&bld->exec_mask, pred, value, out_ptr); } @@ -971,7 +899,7 @@ emit_store( lp_build_const_int_vec(gallivm, uint_bld->type, chan_index); LLVMValueRef length_vec = lp_build_const_int_vec(gallivm, uint_bld->type, - bld->base.type.length); + bld->bld_base.base.type.length); LLVMValueRef index_vec; /* indexes into the temp registers */ LLVMValueRef temps_array; LLVMValueRef pixel_offsets; @@ -980,7 +908,7 @@ emit_store( /* build pixel offset vector: {0, 1, 2, 3, ...} */ pixel_offsets = uint_bld->undef; - for (i = 0; i < bld->base.type.length; i++) { + for (i = 0; i < bld->bld_base.base.type.length; i++) { LLVMValueRef ii = lp_build_const_int32(gallivm, i); pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets, ii, ii, ""); @@ -1002,7 +930,7 @@ emit_store( &bld->exec_mask, pred); } else { - LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index, + LLVMValueRef temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index); lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr); } @@ -1023,6 +951,27 @@ emit_store( } } +static void +emit_store( + struct lp_build_tgsi_context * bld_base, + const struct tgsi_full_instruction * inst, + const struct tgsi_opcode_info * info, + LLVMValueRef dst[4]) + +{ + unsigned chan_index; + struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); + + if(info->num_dst) { + LLVMValueRef pred[TGSI_NUM_CHANNELS]; + + emit_fetch_predicate( bld, inst, pred ); + + TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + emit_store_chan(bld_base, inst, 0, chan_index, pred[chan_index], dst[chan_index]); + } + } +} /** * High-level instruction translators. @@ -1034,7 +983,7 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, enum lp_build_tex_modifier modifier, LLVMValueRef *texel) { - LLVMBuilderRef builder = bld->base.gallivm->builder; + LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; unsigned unit; LLVMValueRef lod_bias, explicit_lod; LLVMValueRef oow = NULL; @@ -1047,7 +996,7 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, if (!bld->sampler) { _debug_printf("warning: found texture instruction but no sampler generator supplied\n"); for (i = 0; i < 4; i++) { - texel[i] = bld->base.undef; + texel[i] = bld->bld_base.base.undef; } return; } @@ -1079,12 +1028,12 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, } if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) { - lod_bias = emit_fetch( bld, inst, 0, 3 ); + lod_bias = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 ); explicit_lod = NULL; } else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) { lod_bias = NULL; - explicit_lod = emit_fetch( bld, inst, 0, 3 ); + explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 ); } else { lod_bias = NULL; @@ -1092,43 +1041,43 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, } if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) { - oow = emit_fetch( bld, inst, 0, 3 ); - oow = lp_build_rcp(&bld->base, oow); + oow = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 ); + oow = lp_build_rcp(&bld->bld_base.base, oow); } for (i = 0; i < num_coords; i++) { - coords[i] = emit_fetch( bld, inst, 0, i ); + coords[i] = lp_build_emit_fetch( &bld->bld_base, inst, 0, i ); if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) - coords[i] = lp_build_mul(&bld->base, coords[i], oow); + coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow); } for (i = num_coords; i < 3; i++) { - coords[i] = bld->base.undef; + coords[i] = bld->bld_base.base.undef; } if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) { - LLVMValueRef index0 = lp_build_const_int32(bld->base.gallivm, 0); + LLVMValueRef index0 = lp_build_const_int32(bld->bld_base.base.gallivm, 0); for (i = 0; i < num_coords; i++) { - LLVMValueRef src1 = emit_fetch( bld, inst, 1, i ); - LLVMValueRef src2 = emit_fetch( bld, inst, 2, i ); + LLVMValueRef src1 = lp_build_emit_fetch( &bld->bld_base, inst, 1, i ); + LLVMValueRef src2 = lp_build_emit_fetch( &bld->bld_base, inst, 2, i ); ddx[i] = LLVMBuildExtractElement(builder, src1, index0, ""); ddy[i] = LLVMBuildExtractElement(builder, src2, index0, ""); } unit = inst->Src[3].Register.Index; } else { for (i = 0; i < num_coords; i++) { - ddx[i] = lp_build_scalar_ddx( &bld->base, coords[i] ); - ddy[i] = lp_build_scalar_ddy( &bld->base, coords[i] ); + ddx[i] = lp_build_scalar_ddx( &bld->bld_base.base, coords[i] ); + ddy[i] = lp_build_scalar_ddy( &bld->bld_base.base, coords[i] ); } unit = inst->Src[1].Register.Index; } for (i = num_coords; i < 3; i++) { - ddx[i] = LLVMGetUndef(bld->base.elem_type); - ddy[i] = LLVMGetUndef(bld->base.elem_type); + ddx[i] = LLVMGetUndef(bld->bld_base.base.elem_type); + ddy[i] = LLVMGetUndef(bld->bld_base.base.elem_type); } bld->sampler->emit_fetch_texel(bld->sampler, - bld->base.gallivm, - bld->base.type, + bld->bld_base.base.gallivm, + bld->bld_base.base.type, unit, num_coords, coords, ddx, ddy, lod_bias, explicit_lod, @@ -1144,10 +1093,10 @@ near_end_of_shader(struct lp_build_tgsi_soa_context *bld, for (i = 0; i < 5; i++) { unsigned opcode; - if (pc + i >= bld->info->num_instructions) + if (pc + i >= bld->bld_base.info->num_instructions) return TRUE; - opcode = bld->instructions[pc + i].Instruction.Opcode; + opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode; if (opcode == TGSI_OPCODE_END) return TRUE; @@ -1182,9 +1131,9 @@ emit_kil( const struct tgsi_full_instruction *inst, int pc) { - LLVMBuilderRef builder = bld->base.gallivm->builder; + LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; const struct tgsi_full_src_register *reg = &inst->Src[0]; - LLVMValueRef terms[NUM_CHANNELS]; + LLVMValueRef terms[TGSI_NUM_CHANNELS]; LLVMValueRef mask; unsigned chan_index; @@ -1197,10 +1146,10 @@ emit_kil( swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); /* Check if the component has not been already tested. */ - assert(swizzle < NUM_CHANNELS); + assert(swizzle < TGSI_NUM_CHANNELS); if( !terms[swizzle] ) /* TODO: change the comparison operator instead of setting the sign */ - terms[swizzle] = emit_fetch(bld, inst, 0, chan_index ); + terms[swizzle] = lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index ); } mask = NULL; @@ -1211,7 +1160,7 @@ emit_kil( /* * If term < 0 then mask = 0 else mask = ~0. */ - chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero); + chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero); if(mask) mask = LLVMBuildAnd(builder, mask, chan_mask, ""); @@ -1237,10 +1186,9 @@ emit_kil( */ static void emit_kilp(struct lp_build_tgsi_soa_context *bld, - const struct tgsi_full_instruction *inst, - int pc) + int pc) { - LLVMBuilderRef builder = bld->base.gallivm->builder; + LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; LLVMValueRef mask; /* For those channels which are "alive", disable fragment shader @@ -1250,7 +1198,7 @@ emit_kilp(struct lp_build_tgsi_soa_context *bld, mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp"); } else { - LLVMValueRef zero = LLVMConstNull(bld->base.int_vec_type); + LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type); mask = zero; } @@ -1268,7 +1216,7 @@ emit_kilp(struct lp_build_tgsi_soa_context *bld, static void emit_dump_temps(struct lp_build_tgsi_soa_context *bld) { - struct gallivm_state *gallivm = bld->base.gallivm; + struct gallivm_state *gallivm = bld->bld_base.base.gallivm; LLVMBuilderRef builder = gallivm->builder; LLVMValueRef temp_ptr; LLVMValueRef i0 = lp_build_const_int32(gallivm, 0); @@ -1276,7 +1224,7 @@ emit_dump_temps(struct lp_build_tgsi_soa_context *bld) LLVMValueRef i2 = lp_build_const_int32(gallivm, 2); LLVMValueRef i3 = lp_build_const_int32(gallivm, 3); int index; - int n = bld->info->file_max[TGSI_FILE_TEMPORARY]; + int n = bld->bld_base.info->file_max[TGSI_FILE_TEMPORARY]; for (index = 0; index < n; index++) { LLVMValueRef idx = lp_build_const_int32(gallivm, index); @@ -1286,7 +1234,7 @@ emit_dump_temps(struct lp_build_tgsi_soa_context *bld) lp_build_printf(gallivm, "TEMP[%d]:\n", idx); for (chan = 0; chan < 4; chan++) { - temp_ptr = get_temp_ptr(bld, index, chan); + temp_ptr = lp_get_temp_ptr_soa(bld, index, chan); res = LLVMBuildLoad(builder, temp_ptr, ""); v[chan][0] = LLVMBuildExtractElement(builder, res, i0, ""); v[chan][1] = LLVMBuildExtractElement(builder, res, i1, ""); @@ -1307,31 +1255,32 @@ emit_dump_temps(struct lp_build_tgsi_soa_context *bld) -static void -emit_declaration( - struct lp_build_tgsi_soa_context *bld, +void +lp_emit_declaration_soa( + struct lp_build_tgsi_context *bld_base, const struct tgsi_full_declaration *decl) { - struct gallivm_state *gallivm = bld->base.gallivm; - LLVMTypeRef vec_type = bld->base.vec_type; + struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); + struct gallivm_state *gallivm = bld->bld_base.base.gallivm; + LLVMTypeRef vec_type = bld->bld_base.base.vec_type; const unsigned first = decl->Range.First; const unsigned last = decl->Range.Last; unsigned idx, i; for (idx = first; idx <= last; ++idx) { - assert(last <= bld->info->file_max[decl->Declaration.File]); + assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]); switch (decl->Declaration.File) { case TGSI_FILE_TEMPORARY: assert(idx < LP_MAX_TGSI_TEMPS); if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) { - for (i = 0; i < NUM_CHANNELS; i++) + for (i = 0; i < TGSI_NUM_CHANNELS; i++) bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp"); } break; case TGSI_FILE_OUTPUT: if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) { - for (i = 0; i < NUM_CHANNELS; i++) + for (i = 0; i < TGSI_NUM_CHANNELS; i++) bld->outputs[idx][i] = lp_build_alloca(gallivm, vec_type, "output"); } @@ -1339,13 +1288,13 @@ emit_declaration( case TGSI_FILE_ADDRESS: assert(idx < LP_MAX_TGSI_ADDRS); - for (i = 0; i < NUM_CHANNELS; i++) + for (i = 0; i < TGSI_NUM_CHANNELS; i++) bld->addr[idx][i] = lp_build_alloca(gallivm, vec_type, "addr"); break; case TGSI_FILE_PREDICATE: assert(idx < LP_MAX_TGSI_PREDS); - for (i = 0; i < NUM_CHANNELS; i++) + for (i = 0; i < TGSI_NUM_CHANNELS; i++) bld->preds[idx][i] = lp_build_alloca(gallivm, vec_type, "predicate"); break; @@ -1358,965 +1307,427 @@ emit_declaration( } -/** - * Emit LLVM for one TGSI instruction. - * \param return TRUE for success, FALSE otherwise - */ -static boolean -emit_instruction( - struct lp_build_tgsi_soa_context *bld, - const struct tgsi_full_instruction *inst, - const struct tgsi_opcode_info *info, - int *pc) +void lp_emit_immediate_soa( + struct lp_build_tgsi_context *bld_base, + const struct tgsi_full_immediate *imm) { - unsigned chan_index; - LLVMValueRef src0, src1, src2; - LLVMValueRef tmp0, tmp1, tmp2; - LLVMValueRef tmp3 = NULL; - LLVMValueRef tmp4 = NULL; - LLVMValueRef tmp5 = NULL; - LLVMValueRef tmp6 = NULL; - LLVMValueRef tmp7 = NULL; - LLVMValueRef res; - LLVMValueRef dst0[NUM_CHANNELS]; - - /* - * Stores and write masks are handled in a general fashion after the long - * instruction opcode switch statement. - * - * Although not stricitly necessary, we avoid generating instructions for - * channels which won't be stored, in cases where's that easy. For some - * complex instructions, like texture sampling, it is more convenient to - * assume a full writemask and then let LLVM optimization passes eliminate - * redundant code. - */ - - (*pc)++; - - assert(info->num_dst <= 1); - if (info->num_dst) { - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - dst0[chan_index] = bld->base.undef; - } - } - - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_ARL: - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - tmp0 = emit_fetch( bld, inst, 0, chan_index ); - tmp0 = lp_build_floor(&bld->base, tmp0); - dst0[chan_index] = tmp0; - } - break; - - case TGSI_OPCODE_MOV: - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index ); - } - break; + struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); + struct gallivm_state * gallivm = bld_base->base.gallivm; - case TGSI_OPCODE_LIT: - if(TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) ) { - dst0[TGSI_CHAN_X] = bld->base.one; - } - if(TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) ) { - src0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X ); - dst0[TGSI_CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero); - } - if(TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ) ) { - /* XMM[1] = SrcReg[0].yyyy */ - tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y ); - /* XMM[1] = max(XMM[1], 0) */ - tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero); - /* XMM[2] = SrcReg[0].wwww */ - tmp2 = emit_fetch( bld, inst, 0, TGSI_CHAN_W ); - tmp1 = lp_build_pow( &bld->base, tmp1, tmp2); - tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X ); - tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero); - dst0[TGSI_CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero); - } - if(TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_W ) ) { - dst0[TGSI_CHAN_W] = bld->base.one; - } - break; - - case TGSI_OPCODE_RCP: - /* TGSI_OPCODE_RECIP */ - src0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X ); - res = lp_build_rcp(&bld->base, src0); - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - dst0[chan_index] = res; - } - break; - - case TGSI_OPCODE_RSQ: - /* TGSI_OPCODE_RECIPSQRT */ - src0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X ); - src0 = lp_build_abs(&bld->base, src0); - res = lp_build_rsqrt(&bld->base, src0); - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - dst0[chan_index] = res; - } - break; - - case TGSI_OPCODE_EXP: - if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) || - TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) || - TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z )) { - LLVMValueRef *p_exp2_int_part = NULL; - LLVMValueRef *p_frac_part = NULL; - LLVMValueRef *p_exp2 = NULL; - - src0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X ); - - if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X )) - p_exp2_int_part = &tmp0; - if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y )) - p_frac_part = &tmp1; - if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z )) - p_exp2 = &tmp2; - - lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2); - - if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X )) - dst0[TGSI_CHAN_X] = tmp0; - if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y )) - dst0[TGSI_CHAN_Y] = tmp1; - if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z )) - dst0[TGSI_CHAN_Z] = tmp2; - } - /* dst.w = 1.0 */ - if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_W )) { - dst0[TGSI_CHAN_W] = bld->base.one; - } - break; - - case TGSI_OPCODE_LOG: - if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) || - TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) || - TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z )) { - LLVMValueRef *p_floor_log2 = NULL; - LLVMValueRef *p_exp = NULL; - LLVMValueRef *p_log2 = NULL; - - src0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X ); - src0 = lp_build_abs( &bld->base, src0 ); - - if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X )) - p_floor_log2 = &tmp0; - if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y )) - p_exp = &tmp1; - if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z )) - p_log2 = &tmp2; - - lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2); - - /* dst.x = floor(lg2(abs(src.x))) */ - if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X )) - dst0[TGSI_CHAN_X] = tmp0; - /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */ - if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y )) { - dst0[TGSI_CHAN_Y] = lp_build_div( &bld->base, src0, tmp1); - } - /* dst.z = lg2(abs(src.x)) */ - if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z )) - dst0[TGSI_CHAN_Z] = tmp2; - } - /* dst.w = 1.0 */ - if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_W )) { - dst0[TGSI_CHAN_W] = bld->base.one; - } - break; - - case TGSI_OPCODE_MUL: - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - src0 = emit_fetch( bld, inst, 0, chan_index ); - src1 = emit_fetch( bld, inst, 1, chan_index ); - dst0[chan_index] = lp_build_mul(&bld->base, src0, src1); - } - break; - - case TGSI_OPCODE_ADD: - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - src0 = emit_fetch( bld, inst, 0, chan_index ); - src1 = emit_fetch( bld, inst, 1, chan_index ); - dst0[chan_index] = lp_build_add(&bld->base, src0, src1); - } - break; - - case TGSI_OPCODE_DP3: - /* TGSI_OPCODE_DOT3 */ - tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X ); - tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_X ); - tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); - tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y ); - tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y ); - tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); - tmp0 = lp_build_add( &bld->base, tmp0, tmp1); - tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Z ); - tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Z ); - tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); - tmp0 = lp_build_add( &bld->base, tmp0, tmp1); - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - dst0[chan_index] = tmp0; - } - break; - - case TGSI_OPCODE_DP4: - /* TGSI_OPCODE_DOT4 */ - tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X ); - tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_X ); - tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); - tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y ); - tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y ); - tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); - tmp0 = lp_build_add( &bld->base, tmp0, tmp1); - tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Z ); - tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Z ); - tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); - tmp0 = lp_build_add( &bld->base, tmp0, tmp1); - tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_W ); - tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_W ); - tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); - tmp0 = lp_build_add( &bld->base, tmp0, tmp1); - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - dst0[chan_index] = tmp0; - } - break; - - case TGSI_OPCODE_DST: - TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) { - dst0[TGSI_CHAN_X] = bld->base.one; - } - TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) { - tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y ); - tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y ); - dst0[TGSI_CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1); - } - TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ) { - dst0[TGSI_CHAN_Z] = emit_fetch( bld, inst, 0, TGSI_CHAN_Z ); - } - TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_W ) { - dst0[TGSI_CHAN_W] = emit_fetch( bld, inst, 1, TGSI_CHAN_W ); - } - break; - - case TGSI_OPCODE_MIN: - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - src0 = emit_fetch( bld, inst, 0, chan_index ); - src1 = emit_fetch( bld, inst, 1, chan_index ); - dst0[chan_index] = lp_build_min( &bld->base, src0, src1 ); - } - break; - - case TGSI_OPCODE_MAX: - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - src0 = emit_fetch( bld, inst, 0, chan_index ); - src1 = emit_fetch( bld, inst, 1, chan_index ); - dst0[chan_index] = lp_build_max( &bld->base, src0, src1 ); - } - break; + /* simply copy the immediate values into the next immediates[] slot */ + unsigned i; + const uint size = imm->Immediate.NrTokens - 1; + assert(size <= 4); + assert(bld->num_immediates < LP_MAX_TGSI_IMMEDIATES); - case TGSI_OPCODE_SLT: - /* TGSI_OPCODE_SETLT */ - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - src0 = emit_fetch( bld, inst, 0, chan_index ); - src1 = emit_fetch( bld, inst, 1, chan_index ); - tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 ); - dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); - } - break; + for( i = 0; i < size; ++i ) + bld->immediates[bld->num_immediates][i] = + lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float); - case TGSI_OPCODE_SGE: - /* TGSI_OPCODE_SETGE */ - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - src0 = emit_fetch( bld, inst, 0, chan_index ); - src1 = emit_fetch( bld, inst, 1, chan_index ); - tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 ); - dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); - } - break; + for( i = size; i < 4; ++i ) + bld->immediates[bld->num_immediates][i] = bld_base->base.undef; - case TGSI_OPCODE_MAD: - /* TGSI_OPCODE_MADD */ - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - tmp0 = emit_fetch( bld, inst, 0, chan_index ); - tmp1 = emit_fetch( bld, inst, 1, chan_index ); - tmp2 = emit_fetch( bld, inst, 2, chan_index ); - tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); - tmp0 = lp_build_add( &bld->base, tmp0, tmp2); - dst0[chan_index] = tmp0; - } - break; + bld->num_immediates++; +} - case TGSI_OPCODE_SUB: - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - tmp0 = emit_fetch( bld, inst, 0, chan_index ); - tmp1 = emit_fetch( bld, inst, 1, chan_index ); - dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1); - } - break; +static void +ddx_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); - case TGSI_OPCODE_LRP: - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - src0 = emit_fetch( bld, inst, 0, chan_index ); - src1 = emit_fetch( bld, inst, 1, chan_index ); - src2 = emit_fetch( bld, inst, 2, chan_index ); - tmp0 = lp_build_sub( &bld->base, src1, src2 ); - tmp0 = lp_build_mul( &bld->base, src0, tmp0 ); - dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 ); - } - break; + emit_fetch_deriv(bld, emit_data->args[0], NULL, + &emit_data->output[emit_data->chan], NULL); +} - case TGSI_OPCODE_CND: - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - src0 = emit_fetch( bld, inst, 0, chan_index ); - src1 = emit_fetch( bld, inst, 1, chan_index ); - src2 = emit_fetch( bld, inst, 2, chan_index ); - tmp1 = lp_build_const_vec(bld->base.gallivm, bld->base.type, 0.5); - tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1); - dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 ); - } - break; +static void +ddy_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); - case TGSI_OPCODE_DP2A: - tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X ); /* xmm0 = src[0].x */ - tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_X ); /* xmm1 = src[1].x */ - tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */ - tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y ); /* xmm1 = src[0].y */ - tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y ); /* xmm2 = src[1].y */ - tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */ - tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ - tmp1 = emit_fetch( bld, inst, 2, TGSI_CHAN_X ); /* xmm1 = src[2].x */ - tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */ - } - break; + emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL, + &emit_data->output[emit_data->chan]); +} - case TGSI_OPCODE_FRC: - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - src0 = emit_fetch( bld, inst, 0, chan_index ); - tmp0 = lp_build_floor(&bld->base, src0); - tmp0 = lp_build_sub(&bld->base, src0, tmp0); - dst0[chan_index] = tmp0; - } - break; +static void +kilp_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); - case TGSI_OPCODE_CLAMP: - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - tmp0 = emit_fetch( bld, inst, 0, chan_index ); - src1 = emit_fetch( bld, inst, 1, chan_index ); - src2 = emit_fetch( bld, inst, 2, chan_index ); - tmp0 = lp_build_max(&bld->base, tmp0, src1); - tmp0 = lp_build_min(&bld->base, tmp0, src2); - dst0[chan_index] = tmp0; - } - break; + emit_kilp(bld, bld_base->pc - 1); +} - case TGSI_OPCODE_FLR: - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - tmp0 = emit_fetch( bld, inst, 0, chan_index ); - dst0[chan_index] = lp_build_floor(&bld->base, tmp0); - } - break; +static void +kil_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); - case TGSI_OPCODE_ROUND: - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - tmp0 = emit_fetch( bld, inst, 0, chan_index ); - dst0[chan_index] = lp_build_round(&bld->base, tmp0); - } - break; + emit_kil(bld, emit_data->inst, bld_base->pc - 1); +} - case TGSI_OPCODE_EX2: { - tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X ); - tmp0 = lp_build_exp2( &bld->base, tmp0); - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - dst0[chan_index] = tmp0; - } - break; - } +static void +tex_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); - case TGSI_OPCODE_LG2: - tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X ); - tmp0 = lp_build_log2( &bld->base, tmp0); - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - dst0[chan_index] = tmp0; - } - break; + emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, emit_data->output); +} - case TGSI_OPCODE_POW: - src0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X ); - src1 = emit_fetch( bld, inst, 1, TGSI_CHAN_X ); - res = lp_build_pow( &bld->base, src0, src1 ); - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - dst0[chan_index] = res; - } - break; +static void +txb_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); - case TGSI_OPCODE_XPD: - if(TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) || - TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) ) { - tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_Z ); - tmp3 = emit_fetch( bld, inst, 0, TGSI_CHAN_Z ); - } - if(TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) || - TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ) ) { - tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y ); - tmp4 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y ); - } - TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) { - tmp2 = tmp0; - tmp2 = lp_build_mul( &bld->base, tmp2, tmp1); - tmp5 = tmp3; - tmp5 = lp_build_mul( &bld->base, tmp5, tmp4); - tmp2 = lp_build_sub( &bld->base, tmp2, tmp5); - dst0[TGSI_CHAN_X] = tmp2; - } - if(TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) || - TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ) ) { - tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_X ); - tmp5 = emit_fetch( bld, inst, 0, TGSI_CHAN_X ); - } - TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) { - tmp3 = lp_build_mul( &bld->base, tmp3, tmp2); - tmp1 = lp_build_mul( &bld->base, tmp1, tmp5); - tmp3 = lp_build_sub( &bld->base, tmp3, tmp1); - dst0[TGSI_CHAN_Y] = tmp3; - } - TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ) { - tmp5 = lp_build_mul( &bld->base, tmp5, tmp4); - tmp0 = lp_build_mul( &bld->base, tmp0, tmp2); - tmp5 = lp_build_sub( &bld->base, tmp5, tmp0); - dst0[TGSI_CHAN_Z] = tmp5; - } - TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_W ) { - dst0[TGSI_CHAN_W] = bld->base.one; - } - break; + emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, + emit_data->output); +} - case TGSI_OPCODE_ABS: - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - tmp0 = emit_fetch( bld, inst, 0, chan_index ); - dst0[chan_index] = lp_build_abs( &bld->base, tmp0 ); - } - break; +static void +txd_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); - case TGSI_OPCODE_RCC: - /* deprecated? */ - assert(0); - return FALSE; - - case TGSI_OPCODE_DPH: - tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X ); - tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_X ); - tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); - tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y ); - tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y ); - tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); - tmp0 = lp_build_add( &bld->base, tmp0, tmp1); - tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Z ); - tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Z ); - tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); - tmp0 = lp_build_add( &bld->base, tmp0, tmp1); - tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_W ); - tmp0 = lp_build_add( &bld->base, tmp0, tmp1); - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - dst0[chan_index] = tmp0; - } - break; + emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV, + emit_data->output); +} - case TGSI_OPCODE_COS: - tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X ); - tmp0 = lp_build_cos( &bld->base, tmp0 ); - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - dst0[chan_index] = tmp0; - } - break; +static void +txl_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); - case TGSI_OPCODE_DDX: - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL); - } - break; + emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, + emit_data->output); +} - case TGSI_OPCODE_DDY: - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]); - } - break; +static void +txp_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); - case TGSI_OPCODE_KILP: - /* predicated kill */ - emit_kilp( bld, inst, (*pc)-1 ); - break; + emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED, + emit_data->output); +} - case TGSI_OPCODE_KIL: - /* conditional kill */ - emit_kil( bld, inst, (*pc)-1 ); - break; +static void +cal_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); - case TGSI_OPCODE_PK2H: - return FALSE; - break; + lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label, + &bld_base->pc); +} - case TGSI_OPCODE_PK2US: - return FALSE; - break; +static void +ret_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); - case TGSI_OPCODE_PK4B: - return FALSE; - break; + lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc); +} - case TGSI_OPCODE_PK4UB: - return FALSE; - break; +static void +brk_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); - case TGSI_OPCODE_RFL: - return FALSE; - break; + lp_exec_break(&bld->exec_mask); +} - case TGSI_OPCODE_SEQ: - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - src0 = emit_fetch( bld, inst, 0, chan_index ); - src1 = emit_fetch( bld, inst, 1, chan_index ); - tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 ); - dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); - } - break; +static void +if_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMValueRef tmp; + struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); - case TGSI_OPCODE_SFL: - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - dst0[chan_index] = bld->base.zero; - } - break; + tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL, + emit_data->args[0], bld->bld_base.base.zero); + lp_exec_mask_cond_push(&bld->exec_mask, tmp); +} - case TGSI_OPCODE_SGT: - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - src0 = emit_fetch( bld, inst, 0, chan_index ); - src1 = emit_fetch( bld, inst, 1, chan_index ); - tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 ); - dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); - } - break; +static void +bgnloop_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); - case TGSI_OPCODE_SIN: - tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X ); - tmp0 = lp_build_sin( &bld->base, tmp0 ); - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - dst0[chan_index] = tmp0; - } - break; + lp_exec_bgnloop(&bld->exec_mask); +} - case TGSI_OPCODE_SLE: - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - src0 = emit_fetch( bld, inst, 0, chan_index ); - src1 = emit_fetch( bld, inst, 1, chan_index ); - tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 ); - dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); - } - break; +static void +bgnsub_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); - case TGSI_OPCODE_SNE: - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - src0 = emit_fetch( bld, inst, 0, chan_index ); - src1 = emit_fetch( bld, inst, 1, chan_index ); - tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 ); - dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); - } - break; + lp_exec_mask_bgnsub(&bld->exec_mask); +} - case TGSI_OPCODE_STR: - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - dst0[chan_index] = bld->base.one; - } - break; +static void +else_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); - case TGSI_OPCODE_TEX: - emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_NONE, dst0 ); - break; + lp_exec_mask_cond_invert(&bld->exec_mask); +} - case TGSI_OPCODE_TXD: - emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV, dst0 ); - break; +static void +endif_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); - case TGSI_OPCODE_UP2H: - /* deprecated */ - assert (0); - return FALSE; - break; + lp_exec_mask_cond_pop(&bld->exec_mask); +} - case TGSI_OPCODE_UP2US: - /* deprecated */ - assert(0); - return FALSE; - break; +static void +endloop_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); - case TGSI_OPCODE_UP4B: - /* deprecated */ - assert(0); - return FALSE; - break; + lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask); +} - case TGSI_OPCODE_UP4UB: - /* deprecated */ - assert(0); - return FALSE; - break; +static void +endsub_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); - case TGSI_OPCODE_X2D: - /* deprecated? */ - assert(0); - return FALSE; - break; + lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc); +} - case TGSI_OPCODE_ARA: - /* deprecated */ - assert(0); - return FALSE; - break; +static void +cont_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); - case TGSI_OPCODE_ARR: - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - tmp0 = emit_fetch( bld, inst, 0, chan_index ); - tmp0 = lp_build_round(&bld->base, tmp0); - dst0[chan_index] = tmp0; - } - break; + lp_exec_continue(&bld->exec_mask); +} - case TGSI_OPCODE_BRA: - /* deprecated */ - assert(0); - return FALSE; - break; +/* XXX: Refactor and move it to lp_bld_tgsi_action.c + * + * XXX: What do the comments about xmm registers mean? Maybe they are left over + * from old code, but there is no garauntee that LLVM will use those registers + * for this code. + * + * XXX: There should be no calls to lp_build_emit_fetch in this function. This + * should be handled by the emit_data->fetch_args function. */ +static void +nrm_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMValueRef tmp0, tmp1; + LLVMValueRef tmp4 = NULL; + LLVMValueRef tmp5 = NULL; + LLVMValueRef tmp6 = NULL; + LLVMValueRef tmp7 = NULL; + struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); - case TGSI_OPCODE_CAL: - lp_exec_mask_call(&bld->exec_mask, - inst->Label.Label, - pc); + uint dims = (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4; - break; + if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X) || + TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y) || + TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z) || + (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W) && dims == 4)) { - case TGSI_OPCODE_RET: - lp_exec_mask_ret(&bld->exec_mask, pc); - break; + /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */ - case TGSI_OPCODE_END: - if (0) { - /* for debugging */ - emit_dump_temps(bld); + /* xmm4 = src.x */ + /* xmm0 = src.x * src.x */ + tmp0 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_X); + if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X)) { + tmp4 = tmp0; } - *pc = -1; - break; + tmp0 = lp_build_mul( &bld->bld_base.base, tmp0, tmp0); - case TGSI_OPCODE_SSG: - /* TGSI_OPCODE_SGN */ - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - tmp0 = emit_fetch( bld, inst, 0, chan_index ); - dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 ); + /* xmm5 = src.y */ + /* xmm0 = xmm0 + src.y * src.y */ + tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_Y); + if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y)) { + tmp5 = tmp1; } - break; + tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1); + tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1); - case TGSI_OPCODE_CMP: - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - src0 = emit_fetch( bld, inst, 0, chan_index ); - src1 = emit_fetch( bld, inst, 1, chan_index ); - src2 = emit_fetch( bld, inst, 2, chan_index ); - tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero ); - dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2); + /* xmm6 = src.z */ + /* xmm0 = xmm0 + src.z * src.z */ + tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_Z); + if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z)) { + tmp6 = tmp1; } - break; + tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1); + tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1); - case TGSI_OPCODE_SCS: - TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) { - tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X ); - dst0[TGSI_CHAN_X] = lp_build_cos( &bld->base, tmp0 ); - } - TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) { - tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X ); - dst0[TGSI_CHAN_Y] = lp_build_sin( &bld->base, tmp0 ); + if (dims == 4) { + /* xmm7 = src.w */ + /* xmm0 = xmm0 + src.w * src.w */ + tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_W); + if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W)) { + tmp7 = tmp1; + } + tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1); + tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1); } - TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ) { - dst0[TGSI_CHAN_Z] = bld->base.zero; + /* xmm1 = 1 / sqrt(xmm0) */ + tmp1 = lp_build_rsqrt( &bld->bld_base.base, tmp0); + /* dst.x = xmm1 * src.x */ + if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X)) { + emit_data->output[TGSI_CHAN_X] = lp_build_mul( &bld->bld_base.base, tmp4, tmp1); } - TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_W ) { - dst0[TGSI_CHAN_W] = bld->base.one; + /* dst.y = xmm1 * src.y */ + if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y)) { + emit_data->output[TGSI_CHAN_Y] = lp_build_mul( &bld->bld_base.base, tmp5, tmp1); } - break; - - case TGSI_OPCODE_TXB: - emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, dst0 ); - break; - case TGSI_OPCODE_NRM: - /* fall-through */ - case TGSI_OPCODE_NRM4: - /* 3 or 4-component normalization */ - { - uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4; - - if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_X) || - TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Y) || - TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Z) || - (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_W) && dims == 4)) { - - /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */ - - /* xmm4 = src.x */ - /* xmm0 = src.x * src.x */ - tmp0 = emit_fetch(bld, inst, 0, TGSI_CHAN_X); - if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_X)) { - tmp4 = tmp0; - } - tmp0 = lp_build_mul( &bld->base, tmp0, tmp0); - - /* xmm5 = src.y */ - /* xmm0 = xmm0 + src.y * src.y */ - tmp1 = emit_fetch(bld, inst, 0, TGSI_CHAN_Y); - if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Y)) { - tmp5 = tmp1; - } - tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); - tmp0 = lp_build_add( &bld->base, tmp0, tmp1); - - /* xmm6 = src.z */ - /* xmm0 = xmm0 + src.z * src.z */ - tmp1 = emit_fetch(bld, inst, 0, TGSI_CHAN_Z); - if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Z)) { - tmp6 = tmp1; - } - tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); - tmp0 = lp_build_add( &bld->base, tmp0, tmp1); - - if (dims == 4) { - /* xmm7 = src.w */ - /* xmm0 = xmm0 + src.w * src.w */ - tmp1 = emit_fetch(bld, inst, 0, TGSI_CHAN_W); - if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_W)) { - tmp7 = tmp1; - } - tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); - tmp0 = lp_build_add( &bld->base, tmp0, tmp1); - } - - /* xmm1 = 1 / sqrt(xmm0) */ - tmp1 = lp_build_rsqrt( &bld->base, tmp0); - - /* dst.x = xmm1 * src.x */ - if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_X)) { - dst0[TGSI_CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1); - } - - /* dst.y = xmm1 * src.y */ - if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Y)) { - dst0[TGSI_CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1); - } - - /* dst.z = xmm1 * src.z */ - if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Z)) { - dst0[TGSI_CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1); - } - - /* dst.w = xmm1 * src.w */ - if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_X) && dims == 4) { - dst0[TGSI_CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1); - } - } - - /* dst.w = 1.0 */ - if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_W) && dims == 3) { - dst0[TGSI_CHAN_W] = bld->base.one; - } + /* dst.z = xmm1 * src.z */ + if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z)) { + emit_data->output[TGSI_CHAN_Z] = lp_build_mul( &bld->bld_base.base, tmp6, tmp1); } - break; - - case TGSI_OPCODE_DIV: - /* deprecated */ - assert( 0 ); - return FALSE; - break; - - case TGSI_OPCODE_DP2: - tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X ); /* xmm0 = src[0].x */ - tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_X ); /* xmm1 = src[1].x */ - tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */ - tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y ); /* xmm1 = src[0].y */ - tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y ); /* xmm2 = src[1].y */ - tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */ - tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */ + /* dst.w = xmm1 * src.w */ + if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X) && dims == 4) { + emit_data->output[TGSI_CHAN_W] = lp_build_mul( &bld->bld_base.base, tmp7, tmp1); } - break; - - case TGSI_OPCODE_TXL: - emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, dst0 ); - break; - - case TGSI_OPCODE_TXP: - emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED, dst0 ); - break; - - case TGSI_OPCODE_BRK: - lp_exec_break(&bld->exec_mask); - break; - - case TGSI_OPCODE_IF: - tmp0 = emit_fetch(bld, inst, 0, TGSI_CHAN_X); - tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL, - tmp0, bld->base.zero); - lp_exec_mask_cond_push(&bld->exec_mask, tmp0); - break; - - case TGSI_OPCODE_BGNLOOP: - lp_exec_bgnloop(&bld->exec_mask); - break; - - case TGSI_OPCODE_BGNSUB: - lp_exec_mask_bgnsub(&bld->exec_mask); - break; - - case TGSI_OPCODE_ELSE: - lp_exec_mask_cond_invert(&bld->exec_mask); - break; - - case TGSI_OPCODE_ENDIF: - lp_exec_mask_cond_pop(&bld->exec_mask); - break; - - case TGSI_OPCODE_ENDLOOP: - lp_exec_endloop(bld->base.gallivm, &bld->exec_mask); - break; + } - case TGSI_OPCODE_ENDSUB: - lp_exec_mask_endsub(&bld->exec_mask, pc); - break; + /* dst.w = 1.0 */ + if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W) && dims == 3) { + emit_data->output[TGSI_CHAN_W] = bld->bld_base.base.one; + } +} - case TGSI_OPCODE_PUSHA: - /* deprecated? */ - assert(0); - return FALSE; - break; +static void emit_prologue(struct lp_build_tgsi_context * bld_base) +{ + struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); + struct gallivm_state * gallivm = bld_base->base.gallivm; - case TGSI_OPCODE_POPA: - /* deprecated? */ - assert(0); - return FALSE; - break; + if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) { + LLVMValueRef array_size = + lp_build_const_int32(gallivm, + bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4); + bld->temps_array = lp_build_array_alloca(gallivm, + bld_base->base.vec_type, array_size, + "temp_array"); + } - case TGSI_OPCODE_CEIL: - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - tmp0 = emit_fetch( bld, inst, 0, chan_index ); - dst0[chan_index] = lp_build_ceil(&bld->base, tmp0); - } - break; + if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) { + LLVMValueRef array_size = + lp_build_const_int32(gallivm, + bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4); + bld->outputs_array = lp_build_array_alloca(gallivm, + bld_base->base.vec_type, array_size, + "output_array"); + } - case TGSI_OPCODE_I2F: - /* deprecated? */ - assert(0); - return FALSE; - break; + /* If we have indirect addressing in inputs we need to copy them into + * our alloca array to be able to iterate over them */ + if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) { + unsigned index, chan; + LLVMTypeRef vec_type = bld_base->base.vec_type; + LLVMValueRef array_size = lp_build_const_int32(gallivm, + bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4); + bld->inputs_array = lp_build_array_alloca(gallivm, + vec_type, array_size, + "input_array"); - case TGSI_OPCODE_NOT: - /* deprecated? */ - assert(0); - return FALSE; - break; + assert(bld_base->info->num_inputs + <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1); - case TGSI_OPCODE_TRUNC: - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - tmp0 = emit_fetch( bld, inst, 0, chan_index ); - dst0[chan_index] = lp_build_trunc(&bld->base, tmp0); + for (index = 0; index < bld_base->info->num_inputs; ++index) { + for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { + LLVMValueRef lindex = + lp_build_const_int32(gallivm, index * 4 + chan); + LLVMValueRef input_ptr = + LLVMBuildGEP(gallivm->builder, bld->inputs_array, + &lindex, 1, ""); + LLVMValueRef value = bld->inputs[index][chan]; + if (value) + LLVMBuildStore(gallivm->builder, value, input_ptr); + } } - break; - - case TGSI_OPCODE_SHL: - /* deprecated? */ - assert(0); - return FALSE; - break; - - case TGSI_OPCODE_ISHR: - /* deprecated? */ - assert(0); - return FALSE; - break; - - case TGSI_OPCODE_AND: - /* deprecated? */ - assert(0); - return FALSE; - break; - - case TGSI_OPCODE_OR: - /* deprecated? */ - assert(0); - return FALSE; - break; - - case TGSI_OPCODE_MOD: - /* deprecated? */ - assert(0); - return FALSE; - break; - - case TGSI_OPCODE_XOR: - /* deprecated? */ - assert(0); - return FALSE; - break; - - case TGSI_OPCODE_SAD: - /* deprecated? */ - assert(0); - return FALSE; - break; - - case TGSI_OPCODE_TXF: - /* deprecated? */ - assert(0); - return FALSE; - break; - - case TGSI_OPCODE_TXQ: - /* deprecated? */ - assert(0); - return FALSE; - break; - - case TGSI_OPCODE_CONT: - lp_exec_continue(&bld->exec_mask); - break; - - case TGSI_OPCODE_EMIT: - return FALSE; - break; - - case TGSI_OPCODE_ENDPRIM: - return FALSE; - break; + } +} - case TGSI_OPCODE_NOP: - break; +static void emit_epilogue(struct lp_build_tgsi_context * bld_base) +{ + struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); - default: - return FALSE; + if (0) { + /* for debugging */ + emit_dump_temps(bld); } - - if(info->num_dst) { - LLVMValueRef pred[NUM_CHANNELS]; - - emit_fetch_predicate( bld, inst, pred ); - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - emit_store( bld, inst, 0, chan_index, pred[chan_index], dst0[chan_index]); + /* If we have indirect addressing in outputs we need to copy our alloca array + * to the outputs slots specified by the called */ + if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) { + unsigned index, chan; + assert(bld_base->info->num_outputs <= + bld_base->info->file_max[TGSI_FILE_OUTPUT] + 1); + for (index = 0; index < bld_base->info->num_outputs; ++index) { + for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { + bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan); + } } } - - return TRUE; } - void lp_build_tgsi_soa(struct gallivm_state *gallivm, const struct tgsi_token *tokens, @@ -2325,17 +1736,12 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm, LLVMValueRef consts_ptr, LLVMValueRef system_values_array, const LLVMValueRef *pos, - const LLVMValueRef (*inputs)[NUM_CHANNELS], - LLVMValueRef (*outputs)[NUM_CHANNELS], + const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS], + LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS], struct lp_build_sampler_soa *sampler, const struct tgsi_shader_info *info) { struct lp_build_tgsi_soa_context bld; - struct tgsi_parse_context parse; - uint num_immediates = 0; - uint num_instructions = 0; - unsigned i; - int pc = 0; struct lp_type res_type; @@ -2347,7 +1753,7 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm, /* Setup build context */ memset(&bld, 0, sizeof bld); - lp_build_context_init(&bld.base, gallivm, type); + lp_build_context_init(&bld.bld_base.base, gallivm, type); lp_build_context_init(&bld.uint_bld, gallivm, lp_uint_type(type)); lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type)); bld.mask = mask; @@ -2356,145 +1762,55 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm, bld.outputs = outputs; bld.consts_ptr = consts_ptr; bld.sampler = sampler; - bld.info = info; + bld.bld_base.info = info; bld.indirect_files = info->indirect_files; - bld.instructions = (struct tgsi_full_instruction *) - MALLOC( LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction) ); - bld.max_instructions = LP_MAX_INSTRUCTIONS; - - if (!bld.instructions) { - return; - } - - lp_exec_mask_init(&bld.exec_mask, &bld.base); - - if (bld.indirect_files & (1 << TGSI_FILE_TEMPORARY)) { - LLVMValueRef array_size = - lp_build_const_int32(gallivm, - info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4); - bld.temps_array = lp_build_array_alloca(gallivm, - bld.base.vec_type, array_size, - "temp_array"); - } - - if (bld.indirect_files & (1 << TGSI_FILE_OUTPUT)) { - LLVMValueRef array_size = - lp_build_const_int32(gallivm, - info->file_max[TGSI_FILE_OUTPUT] * 4 + 4); - bld.outputs_array = lp_build_array_alloca(gallivm, - bld.base.vec_type, array_size, - "output_array"); - } - - /* If we have indirect addressing in inputs we need to copy them into - * our alloca array to be able to iterate over them */ - if (bld.indirect_files & (1 << TGSI_FILE_INPUT)) { - unsigned index, chan; - LLVMTypeRef vec_type = bld.base.vec_type; - LLVMValueRef array_size = - lp_build_const_int32(gallivm, info->file_max[TGSI_FILE_INPUT]*4 + 4); - bld.inputs_array = lp_build_array_alloca(gallivm, - vec_type, array_size, - "input_array"); - assert(info->num_inputs <= info->file_max[TGSI_FILE_INPUT] + 1); + bld.bld_base.soa = TRUE; + bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant; + bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate; + bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input; + bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary; + bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value; + bld.bld_base.emit_store = emit_store; + + bld.bld_base.emit_declaration = lp_emit_declaration_soa; + bld.bld_base.emit_immediate = lp_emit_immediate_soa; + + bld.bld_base.emit_prologue = emit_prologue; + bld.bld_base.emit_epilogue = emit_epilogue; + + /* Set opcode actions */ + lp_set_default_actions_cpu(&bld.bld_base); + + bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit; + bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit; + bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit; + bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit; + bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit; + bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit; + bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit; + bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit; + bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit; + bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit; + bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit; + bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit; + bld.bld_base.op_actions[TGSI_OPCODE_KIL].emit = kil_emit; + bld.bld_base.op_actions[TGSI_OPCODE_KILP].emit = kilp_emit; + bld.bld_base.op_actions[TGSI_OPCODE_NRM].emit = nrm_emit; + bld.bld_base.op_actions[TGSI_OPCODE_NRM4].emit = nrm_emit; + bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit; + bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit; + bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit; + bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit; + bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit; + bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit; + + lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.base); - for (index = 0; index < info->num_inputs; ++index) { - for (chan = 0; chan < NUM_CHANNELS; ++chan) { - LLVMValueRef lindex = - lp_build_const_int32(gallivm, index * 4 + chan); - LLVMValueRef input_ptr = - LLVMBuildGEP(gallivm->builder, bld.inputs_array, - &lindex, 1, ""); - LLVMValueRef value = bld.inputs[index][chan]; - if (value) - LLVMBuildStore(gallivm->builder, value, input_ptr); - } - } - } bld.system_values_array = system_values_array; - tgsi_parse_init( &parse, tokens ); - - while( !tgsi_parse_end_of_tokens( &parse ) ) { - tgsi_parse_token( &parse ); - - switch( parse.FullToken.Token.Type ) { - case TGSI_TOKEN_TYPE_DECLARATION: - /* Inputs already interpolated */ - emit_declaration( &bld, &parse.FullToken.FullDeclaration ); - break; - - case TGSI_TOKEN_TYPE_INSTRUCTION: - { - /* save expanded instruction */ - if (num_instructions == bld.max_instructions) { - struct tgsi_full_instruction *instructions; - instructions = REALLOC(bld.instructions, - bld.max_instructions - * sizeof(struct tgsi_full_instruction), - (bld.max_instructions + LP_MAX_INSTRUCTIONS) - * sizeof(struct tgsi_full_instruction)); - if (!instructions) { - break; - } - bld.instructions = instructions; - bld.max_instructions += LP_MAX_INSTRUCTIONS; - } - - memcpy(bld.instructions + num_instructions, - &parse.FullToken.FullInstruction, - sizeof(bld.instructions[0])); - - num_instructions++; - } - - break; - - case TGSI_TOKEN_TYPE_IMMEDIATE: - /* simply copy the immediate values into the next immediates[] slot */ - { - const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; - assert(size <= 4); - assert(num_immediates < LP_MAX_TGSI_IMMEDIATES); - for( i = 0; i < size; ++i ) - bld.immediates[num_immediates][i] = - lp_build_const_vec(gallivm, type, parse.FullToken.FullImmediate.u[i].Float); - for( i = size; i < 4; ++i ) - bld.immediates[num_immediates][i] = bld.base.undef; - num_immediates++; - } - break; - - case TGSI_TOKEN_TYPE_PROPERTY: - break; - - default: - assert( 0 ); - } - } - - while (pc != -1) { - struct tgsi_full_instruction *instr = bld.instructions + pc; - const struct tgsi_opcode_info *opcode_info = - tgsi_get_opcode_info(instr->Instruction.Opcode); - if (!emit_instruction( &bld, instr, opcode_info, &pc )) - _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n", - opcode_info->mnemonic); - } - - /* If we have indirect addressing in outputs we need to copy our alloca array - * to the outputs slots specified by the called */ - if (bld.indirect_files & (1 << TGSI_FILE_OUTPUT)) { - unsigned index, chan; - assert(info->num_outputs <= info->file_max[TGSI_FILE_OUTPUT] + 1); - for (index = 0; index < info->num_outputs; ++index) { - for (chan = 0; chan < NUM_CHANNELS; ++chan) { - bld.outputs[index][chan] = get_output_ptr(&bld, index, chan); - } - } - } + lp_build_tgsi_llvm(&bld.bld_base, tokens); if (0) { LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder); @@ -2504,7 +1820,6 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm, lp_debug_dump_value(function); debug_printf("2222222222222222222222222222 \n"); } - tgsi_parse_free( &parse ); if (0) { LLVMModuleRef module = LLVMGetGlobalParent( @@ -2512,8 +1827,6 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm, LLVMDumpModule(module); } - - FREE( bld.instructions ); } |