diff options
author | Tom Stellard <thomas.stellard@amd.com> | 2012-08-24 15:15:22 +0000 |
---|---|---|
committer | Tom Stellard <thomas.stellard@amd.com> | 2012-08-24 18:24:30 +0000 |
commit | 586b3461078e28df60547b160fb816f3768adb81 (patch) | |
tree | 22f8e62bf4c32a3961fddd1fac0f8437951e337e | |
parent | 167ecf5ba358f750aecb07439ef5110e72895f25 (diff) |
radeon/llvm: Add native encoding for ALU instructions.mike-rebase
-rw-r--r-- | src/gallium/drivers/r600/r600_asm.c | 43 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_asm.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_shader.c | 60 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r700_asm.c | 43 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/MCTargetDesc/R600MCCodeEmitter.cpp | 204 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/R600Instructions.td | 111 |
6 files changed, 305 insertions, 158 deletions
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index f01de41e129..77a9c3d8dcf 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -2875,3 +2875,46 @@ int r600_vertex_elements_build_fetch_shader(struct r600_context *rctx, struct r6 return 0; } + +void r600_bytecode_alu_read(struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1) +{ + /* WORD0 */ + alu->src[0].sel = G_SQ_ALU_WORD0_SRC0_SEL(word0); + alu->src[0].rel = G_SQ_ALU_WORD0_SRC0_REL(word0); + alu->src[0].chan = G_SQ_ALU_WORD0_SRC0_CHAN(word0); + alu->src[0].neg = G_SQ_ALU_WORD0_SRC0_NEG(word0); + alu->src[1].sel = G_SQ_ALU_WORD0_SRC1_SEL(word0); + alu->src[1].rel = G_SQ_ALU_WORD0_SRC1_REL(word0); + alu->src[1].chan = G_SQ_ALU_WORD0_SRC1_CHAN(word0); + alu->src[1].neg = G_SQ_ALU_WORD0_SRC1_NEG(word0); + alu->index_mode = G_SQ_ALU_WORD0_INDEX_MODE(word0); + alu->pred_sel = G_SQ_ALU_WORD0_PRED_SEL(word0); + alu->last = G_SQ_ALU_WORD0_LAST(word0); + + /* WORD1 */ + alu->bank_swizzle = G_SQ_ALU_WORD1_BANK_SWIZZLE(word1); + alu->dst.sel = G_SQ_ALU_WORD1_DST_GPR(word1); + alu->dst.rel = G_SQ_ALU_WORD1_DST_REL(word1); + alu->dst.chan = G_SQ_ALU_WORD1_DST_CHAN(word1); + alu->dst.clamp = G_SQ_ALU_WORD1_CLAMP(word1); + if (G_SQ_ALU_WORD1_ENCODING(word1)) /*ALU_DWORD1_OP3*/ + { + alu->is_op3 = 1; + alu->src[2].sel = G_SQ_ALU_WORD1_OP3_SRC2_SEL(word1); + alu->src[2].rel = G_SQ_ALU_WORD1_OP3_SRC2_REL(word1); + alu->src[2].chan = G_SQ_ALU_WORD1_OP3_SRC2_CHAN(word1); + alu->src[2].neg = G_SQ_ALU_WORD1_OP3_SRC2_NEG(word1); + alu->inst = G_SQ_ALU_WORD1_OP3_ALU_INST(word1); + } + else /*ALU_DWORD1_OP2*/ + { + alu->src[0].abs = G_SQ_ALU_WORD1_OP2_SRC0_ABS(word1); + alu->src[1].abs = G_SQ_ALU_WORD1_OP2_SRC1_ABS(word1); + alu->inst = G_SQ_ALU_WORD1_OP2_ALU_INST(word1); + alu->omod = G_SQ_ALU_WORD1_OP2_OMOD(word1); + alu->dst.write = G_SQ_ALU_WORD1_OP2_WRITE_MASK(word1); + alu->update_pred = G_SQ_ALU_WORD1_OP2_UPDATE_PRED(word1); + alu->execute_mask = + G_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(word1); + } +} diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 87e751adc78..403365ba07b 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -233,6 +233,7 @@ int r600_bytecode_add_cfinst(struct r600_bytecode *bc, int inst); int r600_bytecode_add_alu_type(struct r600_bytecode *bc, const struct r600_bytecode_alu *alu, int type); void r600_bytecode_special_constants(uint32_t value, unsigned *sel, unsigned *neg); void r600_bytecode_dump(struct r600_bytecode *bc); +void r600_bytecode_alu_read(struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1); int cm_bytecode_add_cf_end(struct r600_bytecode *bc); @@ -241,5 +242,6 @@ int r600_vertex_elements_build_fetch_shader(struct r600_context *rctx, struct r6 /* r700_asm.c */ void r700_bytecode_cf_vtx_build(uint32_t *bytecode, const struct r600_bytecode_cf *cf); int r700_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, unsigned id); +void r700_bytecode_alu_read(struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1); #endif diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 834c0b32989..4758eda5755 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -292,32 +292,37 @@ static unsigned r600_alu_from_byte_stream(struct r600_shader_ctx *ctx, unsigned char * bytes, unsigned bytes_read) { unsigned src_idx; - unsigned inst0, inst1; - unsigned push_modifier; struct r600_bytecode_alu alu; + unsigned src_const_reg[3]; + uint32_t word0, word1; + memset(&alu, 0, sizeof(alu)); for(src_idx = 0; src_idx < 3; src_idx++) { - bytes_read = r600_src_from_byte_stream(bytes, bytes_read, - &alu, src_idx); - } - - alu.dst.sel = bytes[bytes_read++]; - alu.dst.chan = bytes[bytes_read++]; - alu.dst.clamp = bytes[bytes_read++]; - alu.dst.write = bytes[bytes_read++]; - alu.dst.rel = bytes[bytes_read++]; - inst0 = bytes[bytes_read++]; - inst1 = bytes[bytes_read++]; - alu.inst = inst0 | (inst1 << 8); - alu.last = bytes[bytes_read++]; - alu.is_op3 = bytes[bytes_read++]; - push_modifier = bytes[bytes_read++]; - alu.pred_sel = bytes[bytes_read++]; - alu.bank_swizzle = bytes[bytes_read++]; - alu.bank_swizzle_force = bytes[bytes_read++]; - alu.omod = bytes[bytes_read++]; - alu.index_mode = bytes[bytes_read++]; + unsigned i; + src_const_reg[src_idx] = bytes[bytes_read++]; + for (i = 0; i < 4; i++) { + alu.src[src_idx].value |= bytes[bytes_read++] << (i * 8); + } + } + word0 = i32_from_byte_stream(bytes, &bytes_read); + word1 = i32_from_byte_stream(bytes, &bytes_read); + + switch(ctx->bc->chip_class) { + case R600: + r600_bytecode_alu_read(&alu, word0, word1); + break; + case R700: + case EVERGREEN: + case CAYMAN: + r700_bytecode_alu_read(&alu, word0, word1); + break; + } + + for(src_idx = 0; src_idx < 3; src_idx++) { + if (src_const_reg[src_idx]) + alu.src[src_idx].sel += 512; + } if (alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE) || alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE) || @@ -328,15 +333,14 @@ static unsigned r600_alu_from_byte_stream(struct r600_shader_ctx *ctx, alu.src[1].sel = V_SQ_ALU_SRC_0; alu.src[1].chan = 0; alu.last = 1; - } + } - if (push_modifier) { - alu.pred_sel = 0; - alu.execute_mask = 1; + if (alu.execute_mask) { + alu.pred_sel = 0; r600_bytecode_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE)); - } else + } else { r600_bytecode_add_alu(ctx->bc, &alu); - + } /* XXX: Handle other KILL instructions */ if (alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT)) { diff --git a/src/gallium/drivers/r600/r700_asm.c b/src/gallium/drivers/r600/r700_asm.c index ea37c63525f..818933a4dbd 100644 --- a/src/gallium/drivers/r600/r700_asm.c +++ b/src/gallium/drivers/r600/r700_asm.c @@ -74,3 +74,46 @@ int r700_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu * } return 0; } + +void r700_bytecode_alu_read(struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1) +{ + /* WORD0 */ + alu->src[0].sel = G_SQ_ALU_WORD0_SRC0_SEL(word0); + alu->src[0].rel = G_SQ_ALU_WORD0_SRC0_REL(word0); + alu->src[0].chan = G_SQ_ALU_WORD0_SRC0_CHAN(word0); + alu->src[0].neg = G_SQ_ALU_WORD0_SRC0_NEG(word0); + alu->src[1].sel = G_SQ_ALU_WORD0_SRC1_SEL(word0); + alu->src[1].rel = G_SQ_ALU_WORD0_SRC1_REL(word0); + alu->src[1].chan = G_SQ_ALU_WORD0_SRC1_CHAN(word0); + alu->src[1].neg = G_SQ_ALU_WORD0_SRC1_NEG(word0); + alu->index_mode = G_SQ_ALU_WORD0_INDEX_MODE(word0); + alu->pred_sel = G_SQ_ALU_WORD0_PRED_SEL(word0); + alu->last = G_SQ_ALU_WORD0_LAST(word0); + + /* WORD1 */ + alu->bank_swizzle = G_SQ_ALU_WORD1_BANK_SWIZZLE(word1); + alu->dst.sel = G_SQ_ALU_WORD1_DST_GPR(word1); + alu->dst.rel = G_SQ_ALU_WORD1_DST_REL(word1); + alu->dst.chan = G_SQ_ALU_WORD1_DST_CHAN(word1); + alu->dst.clamp = G_SQ_ALU_WORD1_CLAMP(word1); + if (G_SQ_ALU_WORD1_ENCODING(word1)) /*ALU_DWORD1_OP3*/ + { + alu->is_op3 = 1; + alu->src[2].sel = G_SQ_ALU_WORD1_OP3_SRC2_SEL(word1); + alu->src[2].rel = G_SQ_ALU_WORD1_OP3_SRC2_REL(word1); + alu->src[2].chan = G_SQ_ALU_WORD1_OP3_SRC2_CHAN(word1); + alu->src[2].neg = G_SQ_ALU_WORD1_OP3_SRC2_NEG(word1); + alu->inst = G_SQ_ALU_WORD1_OP3_ALU_INST(word1); + } + else /*ALU_DWORD1_OP2*/ + { + alu->src[0].abs = G_SQ_ALU_WORD1_OP2_SRC0_ABS(word1); + alu->src[1].abs = G_SQ_ALU_WORD1_OP2_SRC1_ABS(word1); + alu->inst = G_SQ_ALU_WORD1_OP2_ALU_INST(word1); + alu->omod = G_SQ_ALU_WORD1_OP2_OMOD(word1); + alu->dst.write = G_SQ_ALU_WORD1_OP2_WRITE_MASK(word1); + alu->update_pred = G_SQ_ALU_WORD1_OP2_UPDATE_PRED(word1); + alu->execute_mask = + G_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(word1); + } +} diff --git a/src/gallium/drivers/radeon/MCTargetDesc/R600MCCodeEmitter.cpp b/src/gallium/drivers/radeon/MCTargetDesc/R600MCCodeEmitter.cpp index 34c4b396c77..0e50d1e7e3f 100644 --- a/src/gallium/drivers/radeon/MCTargetDesc/R600MCCodeEmitter.cpp +++ b/src/gallium/drivers/radeon/MCTargetDesc/R600MCCodeEmitter.cpp @@ -61,6 +61,8 @@ private: void EmitALUInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups, raw_ostream &OS) const; void EmitSrc(const MCInst &MI, unsigned OpIdx, raw_ostream &OS) const; + void EmitSrcISA(const MCInst &MI, unsigned OpIdx, uint64_t &Value, + raw_ostream &OS) const; void EmitDst(const MCInst &MI, raw_ostream &OS) const; void EmitALU(const MCInst &MI, unsigned numSrc, SmallVectorImpl<MCFixup> &Fixups, @@ -210,7 +212,9 @@ void R600MCCodeEmitter::EmitALUInstr(const MCInst &MI, } // Emit instruction type - EmitByte(0, OS); + EmitByte(INSTR_ALU, OS); + + uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups); unsigned int OpIndex; for (OpIndex = 1; OpIndex < NumOperands; OpIndex++) { @@ -218,17 +222,67 @@ void R600MCCodeEmitter::EmitALUInstr(const MCInst &MI, if (MI.getOperand(OpIndex).isImm() || MI.getOperand(OpIndex).isFPImm()) { break; } - EmitSrc(MI, OpIndex, OS); + EmitSrcISA(MI, OpIndex, InstWord01, OS); } // Emit zeros for unused sources for ( ; OpIndex < 4; OpIndex++) { - EmitNullBytes(SRC_BYTE_COUNT, OS); + EmitNullBytes(SRC_BYTE_COUNT - 6, OS); + } + + // Emit destination register + const MCOperand &dstOp = MI.getOperand(0); + if (dstOp.isReg() && dstOp.getReg() != AMDGPU::PREDICATE_BIT) { + //dst register index + InstWord01 |= uint64_t(getHWReg(dstOp.getReg())) << 53; + + //element of destination register + InstWord01 |= uint64_t(getHWRegChan(dstOp.getReg())) << 61; + + // isClamped + if (isFlagSet(MI, 0, MO_FLAG_CLAMP)) { + InstWord01 |= 1ULL << 63; + } + + // write mask + if (!isFlagSet(MI, 0, MO_FLAG_MASK) && NumOperands < 4) { + InstWord01 |= 1ULL << 36; + } + + // XXX: Emit relative addressing mode } - EmitDst(MI, OS); + // Emit ALU - EmitALU(MI, NumOperands - 1, Fixups, OS); + // Emit IsLast (for this instruction group) (1 byte) + if (!isFlagSet(MI, 0, MO_FLAG_NOT_LAST)) { + InstWord01 |= 1ULL << 31; + } + + // XXX: Emit push modifier + if(isFlagSet(MI, 1, MO_FLAG_PUSH)) { + InstWord01 |= 1ULL << 34; + } + + // XXX: Emit predicate (1 byte) + int PredIdx = MCDesc.findFirstPredOperandIdx(); + if (PredIdx != -1) { + switch(MI.getOperand(PredIdx).getReg()) { + case AMDGPU::PRED_SEL_ZERO: + InstWord01 |= 2ULL << 29; + break; + case AMDGPU::PRED_SEL_ONE: + InstWord01 |= 3ULL << 29; + break; + } + } + + //XXX: predicate + //XXX: bank swizzle + //XXX: OMOD + //XXX: index mode + + Emit(InstWord01, OS); } void R600MCCodeEmitter::EmitSrc(const MCInst &MI, unsigned OpIdx, @@ -295,99 +349,87 @@ void R600MCCodeEmitter::EmitSrc(const MCInst &MI, unsigned OpIdx, } -void R600MCCodeEmitter::EmitDst(const MCInst &MI, raw_ostream &OS) const { - - const MCOperand &MO = MI.getOperand(0); - if (MO.isReg() && MO.getReg() != AMDGPU::PREDICATE_BIT) { - // Emit the destination register index (1 byte) - EmitByte(getHWReg(MO.getReg()), OS); - - // Emit the element of the destination register (1 byte) - EmitByte(getHWRegChan(MO.getReg()), OS); - - // Emit isClamped (1 byte) - if (isFlagSet(MI, 0, MO_FLAG_CLAMP)) { +void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned OpIdx, + uint64_t &Value, raw_ostream &OS) const { + const MCOperand &MO = MI.getOperand(OpIdx); + union { + float f; + uint32_t i; + } InlineConstant; + InlineConstant.i = 0; + // Emit the source select (2 bytes). For GPRs, this is the register index. + // For other potential instruction operands, (e.g. constant registers) the + // value of the source select is defined in the r600isa docs. + if (MO.isReg()) { + unsigned Reg = MO.getReg(); + // source select + if (OpIdx == 1) + Value |= uint64_t(getHWRegIndex(Reg)); + if (OpIdx == 2) + Value |= uint64_t(getHWRegIndex(Reg)) << 13; + if (OpIdx == 3) + Value |= uint64_t(getHWRegIndex(Reg)) << 32; + + if (AMDGPUMCRegisterClasses[AMDGPU::R600_CReg32RegClassID].contains(Reg)) { EmitByte(1, OS); } else { EmitByte(0, OS); } - // Emit writemask (1 byte). - if (isFlagSet(MI, 0, MO_FLAG_MASK)) { - EmitByte(0, OS); - } else { - EmitByte(1, OS); + if (Reg == AMDGPU::ALU_LITERAL_X) { + unsigned ImmOpIndex = MI.getNumOperands() - 1; + MCOperand ImmOp = MI.getOperand(ImmOpIndex); + if (ImmOp.isFPImm()) { + InlineConstant.f = ImmOp.getFPImm(); + } else { + assert(ImmOp.isImm()); + InlineConstant.i = ImmOp.getImm(); + } } - - // XXX: Emit relative addressing mode - EmitByte(0, OS); } else { - // XXX: Handle other operand types. Are there any for destination regs? - EmitNullBytes(DST_BYTE_COUNT, OS); - } -} - -void R600MCCodeEmitter::EmitALU(const MCInst &MI, unsigned numSrc, - SmallVectorImpl<MCFixup> &Fixups, - raw_ostream &OS) const { - const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode()); - - // Emit the instruction (2 bytes) - EmitTwoBytes(getBinaryCodeForInstr(MI, Fixups), OS); - - // Emit IsLast (for this instruction group) (1 byte) - if (isFlagSet(MI, 0, MO_FLAG_NOT_LAST)) { - EmitByte(0, OS); - } else { - EmitByte(1, OS); + // XXX: Handle other operand types. + EmitTwoBytes(0, OS); } - // Emit isOp3 (1 byte) - if (numSrc == 3) { - EmitByte(1, OS); - } else { - EmitByte(0, OS); - } + // source channel + uint64_t sourceChannelValue = getHWRegChan(MO.getReg()); + if (OpIdx == 1) + Value |= sourceChannelValue << 10; + if (OpIdx == 2) + Value |= sourceChannelValue << 23; + if (OpIdx == 3) + Value |= sourceChannelValue << 42; - // XXX: Emit push modifier - if(isFlagSet(MI, 1, MO_FLAG_PUSH)) { - EmitByte(1, OS); - } else { - EmitByte(0, OS); + // isNegated + if ((!(isFlagSet(MI, OpIdx, MO_FLAG_ABS))) + && (isFlagSet(MI, OpIdx, MO_FLAG_NEG) || + (MO.isReg() && + (MO.getReg() == AMDGPU::NEG_ONE || MO.getReg() == AMDGPU::NEG_HALF)))){ + if (OpIdx == 1) + Value |= 1ULL << 12; + else if (OpIdx == 2) + Value |= 1ULL << 25; + else if (OpIdx == 3) + Value |= 1ULL << 44; } - // XXX: Emit predicate (1 byte) - int PredIdx = MCDesc.findFirstPredOperandIdx(); - if (PredIdx > -1) - switch(MI.getOperand(PredIdx).getReg()) { - case AMDGPU::PRED_SEL_ZERO: - EmitByte(2, OS); - break; - case AMDGPU::PRED_SEL_ONE: - EmitByte(3, OS); - break; - default: - EmitByte(0, OS); - break; - } - else { - EmitByte(0, OS); + // isAbsolute + if (isFlagSet(MI, OpIdx, MO_FLAG_ABS)) { + assert(OpIdx < 3); + Value |= 1ULL << (32+OpIdx-1); } + // XXX: relative addressing mode + // XXX: kc_bank - // XXX: Emit bank swizzle. (1 byte) Do we need this? It looks like - // r600_asm.c sets it. - EmitByte(0, OS); - - // XXX: Emit bank_swizzle_force (1 byte) Not sure what this is for. - EmitByte(0, OS); + // Emit the literal value, if applicable (4 bytes). + Emit(InlineConstant.i, OS); - // XXX: Emit OMOD (1 byte) Not implemented. - EmitByte(0, OS); +} - // XXX: Emit index_mode. I think this is for indirect addressing, so we - // don't need to worry about it. - EmitByte(0, OS); +void R600MCCodeEmitter::EmitALU(const MCInst &MI, unsigned numSrc, + SmallVectorImpl<MCFixup> &Fixups, + raw_ostream &OS) const { } void R600MCCodeEmitter::EmitTexInstr(const MCInst &MI, diff --git a/src/gallium/drivers/radeon/R600Instructions.td b/src/gallium/drivers/radeon/R600Instructions.td index 519f384cdeb..0d863630dd3 100644 --- a/src/gallium/drivers/radeon/R600Instructions.td +++ b/src/gallium/drivers/radeon/R600Instructions.td @@ -13,17 +13,18 @@ include "R600Intrinsics.td" -class InstR600 <bits<32> inst, dag outs, dag ins, string asm, list<dag> pattern, +class InstR600 <bits<11> inst, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin> : AMDGPUInst <outs, ins, asm, pattern> { - field bits<32> Inst; + field bits<64> Inst; bit Trig = 0; bit Op3 = 0; bit isVector = 0; bits<2> FlagOperandIdx = 0; - let Inst = inst; + bits<11> op_code = inst; + //let Inst = inst; let Namespace = "AMDGPU"; let OutOperandList = outs; let InOperandList = ins; @@ -75,27 +76,31 @@ def R600_Pred : PredicateOperand<i32, (ops R600_Predicate), (ops PRED_SEL_OFF)>; -class R600_1OP <bits<32> inst, string opName, list<dag> pattern, +class R600_1OP <bits<11> inst, string opName, list<dag> pattern, InstrItinClass itin = AnyALU> : InstR600 <inst, (outs R600_Reg32:$dst), (ins R600_Reg32:$src, R600_Pred:$p, variable_ops), !strconcat(opName, " $dst, $src ($p)"), pattern, - itin - >; + itin>{ + let Inst{49-39} = inst; + + } -class R600_2OP <bits<32> inst, string opName, list<dag> pattern, +class R600_2OP <bits<11> inst, string opName, list<dag> pattern, InstrItinClass itin = AnyALU> : InstR600 <inst, (outs R600_Reg32:$dst), (ins R600_Reg32:$src0, R600_Reg32:$src1,R600_Pred:$p, variable_ops), !strconcat(opName, " $dst, $src0, $src1"), pattern, - itin - >; + itin>{ + let Inst{49-39} = inst; + + } -class R600_3OP <bits<32> inst, string opName, list<dag> pattern, +class R600_3OP <bits<11> inst, string opName, list<dag> pattern, InstrItinClass itin = AnyALU> : InstR600 <inst, (outs R600_Reg32:$dst), @@ -105,6 +110,8 @@ class R600_3OP <bits<32> inst, string opName, list<dag> pattern, itin>{ let Op3 = 1; + let Inst{49-45} = inst{4-0}; + } @@ -115,10 +122,10 @@ def PRED_X : InstR600 <0, (outs R600_Predicate_Bit:$dst), [], NullALU> { let DisableEncoding = "$src0"; - field bits<32> Inst; - bits<32> src1; + field bits<64> Inst; + bits<11> src1; - let Inst = src1; + let Inst{49-39} = src1; let FlagOperandIdx = 3; } @@ -131,26 +138,27 @@ def JUMP : InstR600 <0x10, >; } -class R600_REDUCTION <bits<32> inst, dag ins, string asm, list<dag> pattern, +class R600_REDUCTION <bits<11> inst, dag ins, string asm, list<dag> pattern, InstrItinClass itin = VecALU> : InstR600 <inst, (outs R600_Reg32:$dst), ins, asm, pattern, - itin - - >; + itin>{ + let Inst{49-39} = inst; + } -class R600_TEX <bits<32> inst, string opName, list<dag> pattern, +class R600_TEX <bits<11> inst, string opName, list<dag> pattern, InstrItinClass itin = AnyALU> : InstR600 <inst, (outs R600_Reg128:$dst), (ins R600_Reg128:$src0, i32imm:$src1, i32imm:$src2), !strconcat(opName, "$dst, $src0, $src1, $src2"), pattern, - itin - >; + itin>{ + let Inst {10-0} = inst; + } def TEX_SHADOW : PatLeaf< (imm), @@ -326,6 +334,7 @@ def MOV : InstR600 <0x19, (outs R600_Reg32:$dst), R600_Pred:$p), "MOV $dst, $src0", [], AnyALU> { let FlagOperandIdx = 2; + let Inst{49-39} = op_code; } class MOV_IMM <ValueType vt, Operand immType> : InstR600 <0x19, @@ -333,7 +342,9 @@ class MOV_IMM <ValueType vt, Operand immType> : InstR600 <0x19, (ins R600_Reg32:$alu_literal, R600_Pred:$p, immType:$imm), "MOV_IMM $dst, $imm", [], AnyALU ->; +>{ + let Inst{49-39} = op_code; +} def MOV_IMM_I32 : MOV_IMM<i32, i32imm>; def : Pat < @@ -355,6 +366,7 @@ def KILLGT : InstR600 <0x2D, [], NullALU>{ let FlagOperandIdx = 3; + let Inst{49-39} = op_code; } def AND_INT : R600_2OP < @@ -528,34 +540,34 @@ def TEX_SAMPLE_C_G : R600_TEX < // Helper classes for common instructions //===----------------------------------------------------------------------===// -class MUL_LIT_Common <bits<32> inst> : R600_3OP < +class MUL_LIT_Common <bits<11> inst> : R600_3OP < inst, "MUL_LIT", [] >; -class MULADD_Common <bits<32> inst> : R600_3OP < +class MULADD_Common <bits<11> inst> : R600_3OP < inst, "MULADD", [(set (f32 R600_Reg32:$dst), (IL_mad R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2))] >; -class CNDE_Common <bits<32> inst> : R600_3OP < +class CNDE_Common <bits<11> inst> : R600_3OP < inst, "CNDE", [(set (f32 R600_Reg32:$dst), (select (i32 (fp_to_sint (fneg R600_Reg32:$src0))), (f32 R600_Reg32:$src2), (f32 R600_Reg32:$src1)))] >; -class CNDGT_Common <bits<32> inst> : R600_3OP < +class CNDGT_Common <bits<11> inst> : R600_3OP < inst, "CNDGT", [] >; -class CNDGE_Common <bits<32> inst> : R600_3OP < +class CNDGE_Common <bits<11> inst> : R600_3OP < inst, "CNDGE", [(set R600_Reg32:$dst, (int_AMDGPU_cndlt R600_Reg32:$src0, R600_Reg32:$src2, R600_Reg32:$src1))] >; -class DOT4_Common <bits<32> inst> : R600_REDUCTION < +class DOT4_Common <bits<11> inst> : R600_REDUCTION < inst, (ins R600_Reg128:$src0, R600_Reg128:$src1, i32imm:$flags), "DOT4 $dst $src0, $src1", @@ -569,7 +581,7 @@ class DOT4_Pat <Instruction dot4> : Pat < (dot4 R600_Reg128:$src0, R600_Reg128:$src1, 0) >; -multiclass CUBE_Common <bits<32> inst> { +multiclass CUBE_Common <bits<11> inst> { def _pseudo : InstR600 < inst, @@ -588,110 +600,111 @@ multiclass CUBE_Common <bits<32> inst> { [], VecALU >{ let FlagOperandIdx = 3; + let Inst{49-39} = inst; } } -class EXP_IEEE_Common <bits<32> inst> : R600_1OP < +class EXP_IEEE_Common <bits<11> inst> : R600_1OP < inst, "EXP_IEEE", [(set R600_Reg32:$dst, (fexp2 R600_Reg32:$src))] >; -class FLT_TO_INT_Common <bits<32> inst> : R600_1OP < +class FLT_TO_INT_Common <bits<11> inst> : R600_1OP < inst, "FLT_TO_INT", [(set R600_Reg32:$dst, (fp_to_sint R600_Reg32:$src))] >; -class INT_TO_FLT_Common <bits<32> inst> : R600_1OP < +class INT_TO_FLT_Common <bits<11> inst> : R600_1OP < inst, "INT_TO_FLT", [(set R600_Reg32:$dst, (sint_to_fp R600_Reg32:$src))] >; -class FLT_TO_UINT_Common <bits<32> inst> : R600_1OP < +class FLT_TO_UINT_Common <bits<11> inst> : R600_1OP < inst, "FLT_TO_UINT", [(set R600_Reg32:$dst, (fp_to_uint R600_Reg32:$src))] >; -class UINT_TO_FLT_Common <bits<32> inst> : R600_1OP < +class UINT_TO_FLT_Common <bits<11> inst> : R600_1OP < inst, "UINT_TO_FLT", [(set R600_Reg32:$dst, (uint_to_fp R600_Reg32:$src))] >; -class LOG_CLAMPED_Common <bits<32> inst> : R600_1OP < +class LOG_CLAMPED_Common <bits<11> inst> : R600_1OP < inst, "LOG_CLAMPED", [] >; -class LOG_IEEE_Common <bits<32> inst> : R600_1OP < +class LOG_IEEE_Common <bits<11> inst> : R600_1OP < inst, "LOG_IEEE", [(set R600_Reg32:$dst, (int_AMDIL_log R600_Reg32:$src))] >; -class LSHL_Common <bits<32> inst> : R600_2OP < +class LSHL_Common <bits<11> inst> : R600_2OP < inst, "LSHL $dst, $src0, $src1", [(set R600_Reg32:$dst, (shl R600_Reg32:$src0, R600_Reg32:$src1))] >; -class LSHR_Common <bits<32> inst> : R600_2OP < +class LSHR_Common <bits<11> inst> : R600_2OP < inst, "LSHR $dst, $src0, $src1", [(set R600_Reg32:$dst, (srl R600_Reg32:$src0, R600_Reg32:$src1))] >; -class ASHR_Common <bits<32> inst> : R600_2OP < +class ASHR_Common <bits<11> inst> : R600_2OP < inst, "ASHR $dst, $src0, $src1", [(set R600_Reg32:$dst, (sra R600_Reg32:$src0, R600_Reg32:$src1))] >; -class MULHI_INT_Common <bits<32> inst> : R600_2OP < +class MULHI_INT_Common <bits<11> inst> : R600_2OP < inst, "MULHI_INT $dst, $src0, $src1", [(set R600_Reg32:$dst, (mulhs R600_Reg32:$src0, R600_Reg32:$src1))] >; -class MULHI_UINT_Common <bits<32> inst> : R600_2OP < +class MULHI_UINT_Common <bits<11> inst> : R600_2OP < inst, "MULHI $dst, $src0, $src1", [(set R600_Reg32:$dst, (mulhu R600_Reg32:$src0, R600_Reg32:$src1))] >; -class MULLO_INT_Common <bits<32> inst> : R600_2OP < +class MULLO_INT_Common <bits<11> inst> : R600_2OP < inst, "MULLO_INT $dst, $src0, $src1", [(set R600_Reg32:$dst, (mul R600_Reg32:$src0, R600_Reg32:$src1))] >; -class MULLO_UINT_Common <bits<32> inst> : R600_2OP < +class MULLO_UINT_Common <bits<11> inst> : R600_2OP < inst, "MULLO_UINT $dst, $src0, $src1", [] >; -class RECIP_CLAMPED_Common <bits<32> inst> : R600_1OP < +class RECIP_CLAMPED_Common <bits<11> inst> : R600_1OP < inst, "RECIP_CLAMPED", [] >; -class RECIP_IEEE_Common <bits<32> inst> : R600_1OP < +class RECIP_IEEE_Common <bits<11> inst> : R600_1OP < inst, "RECIP_IEEE", [(set R600_Reg32:$dst, (int_AMDGPU_rcp R600_Reg32:$src))] >; -class RECIP_UINT_Common <bits<32> inst> : R600_1OP < +class RECIP_UINT_Common <bits<11> inst> : R600_1OP < inst, "RECIP_INT $dst, $src", [(set R600_Reg32:$dst, (AMDGPUurecip R600_Reg32:$src))] >; -class RECIPSQRT_CLAMPED_Common <bits<32> inst> : R600_1OP < +class RECIPSQRT_CLAMPED_Common <bits<11> inst> : R600_1OP < inst, "RECIPSQRT_CLAMPED", [(set R600_Reg32:$dst, (int_AMDGPU_rsq R600_Reg32:$src))] >; -class RECIPSQRT_IEEE_Common <bits<32> inst> : R600_1OP < +class RECIPSQRT_IEEE_Common <bits<11> inst> : R600_1OP < inst, "RECIPSQRT_IEEE", [] >; -class SIN_Common <bits<32> inst> : R600_1OP < +class SIN_Common <bits<11> inst> : R600_1OP < inst, "SIN", []>{ let Trig = 1; } -class COS_Common <bits<32> inst> : R600_1OP < +class COS_Common <bits<11> inst> : R600_1OP < inst, "COS", []> { let Trig = 1; } |