diff options
author | Christoph Bumiller <e0425955@student.tuwien.ac.at> | 2012-04-28 17:06:59 +0200 |
---|---|---|
committer | Christoph Bumiller <e0425955@student.tuwien.ac.at> | 2012-04-29 18:03:11 +0200 |
commit | 1f4c154f0253ed8fb448402532cfa670f74e69cd (patch) | |
tree | 10c485cfad921e6d962c2146efc468ff0aec63a8 /src/gallium/drivers | |
parent | d6ab3106cf7475cdaddf788a3e650bdd5833f73c (diff) |
nv50/ir/opt: try to convert ABS(SUB) to SAD
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r-- | src/gallium/drivers/nv50/codegen/nv50_ir.cpp | 25 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/codegen/nv50_ir.h | 1 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/codegen/nv50_ir_emit_nv50.cpp | 33 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h | 11 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp | 107 | ||||
-rw-r--r-- | src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp | 16 | ||||
-rw-r--r-- | src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp | 2 |
7 files changed, 179 insertions, 16 deletions
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir.cpp index 1006985ab9..335e9e01e6 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir.cpp @@ -658,6 +658,31 @@ Instruction::swapSources(int a, int b) srcs[b].mod = m; } +// TODO: extend for delta < 0 +void +Instruction::moveSources(int s, int delta) +{ + if (delta == 0) + return; + assert(delta > 0); + + int k; + for (k = 0; srcExists(k); ++k) { + for (int i = 0; i < 2; ++i) { + if (src(k).indirect[i] >= s) + src(k).indirect[i] += delta; + } + } + if (predSrc >= s) + predSrc += delta; + if (flagsSrc >= s) + flagsSrc += delta; + + --k; + for (int p = k + delta; k >= s; --k, --p) + setSrc(p, src(k)); +} + void Instruction::takeExtraSources(int s, Value *values[3]) { diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir.h b/src/gallium/drivers/nv50/codegen/nv50_ir.h index e544d071b5..9b47e3e13c 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir.h +++ b/src/gallium/drivers/nv50/codegen/nv50_ir.h @@ -603,6 +603,7 @@ public: void setSrc(int s, Value *); void setSrc(int s, const ValueRef&); void swapSources(int a, int b); + void moveSources(int s, int delta); // NOTE: only delta > 0 implemented bool setIndirect(int s, int dim, Value *); inline ValueRef& src(int s) { return srcs[s]; } diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_emit_nv50.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_emit_nv50.cpp index c534d4a0c5..7542b84d17 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_emit_nv50.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_emit_nv50.cpp @@ -99,6 +99,7 @@ private: void emitFMUL(const Instruction *); void emitFMAD(const Instruction *); void emitIMAD(const Instruction *); + void emitISAD(const Instruction *); void emitMINMAX(const Instruction *); @@ -1023,6 +1024,35 @@ CodeEmitterNV50::emitIMAD(const Instruction *i) } void +CodeEmitterNV50::emitISAD(const Instruction *i) +{ + if (i->encSize == 8) { + code[0] = 0x50000000; + switch (i->sType) { + case TYPE_U32: code[1] = 0x04000000; break; + case TYPE_S32: code[1] = 0x0c000000; break; + case TYPE_U16: code[1] = 0x00000000; break; + case TYPE_S16: code[1] = 0x08000000; break; + default: + assert(0); + break; + } + emitForm_MAD(i); + } else { + switch (i->sType) { + case TYPE_U32: code[0] = 0x50008000; break; + case TYPE_S32: code[0] = 0x50008100; break; + case TYPE_U16: code[0] = 0x50000000; break; + case TYPE_S16: code[0] = 0x50000100; break; + default: + assert(0); + break; + } + emitForm_MUL(i); + } +} + +void CodeEmitterNV50::emitSET(const Instruction *i) { code[0] = 0x30000000; @@ -1543,6 +1573,9 @@ CodeEmitterNV50::emitInstruction(Instruction *insn) else emitIMAD(insn); break; + case OP_SAD: + emitISAD(insn); + break; case OP_NOT: emitNOT(insn); break; diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h b/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h index 93e502ea60..b62431f1e3 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h @@ -114,6 +114,17 @@ static inline bool isSignedType(DataType ty) } } +static inline DataType intTypeToSigned(DataType ty) +{ + switch (ty) { + case TYPE_U32: return TYPE_S32; + case TYPE_U16: return TYPE_S16; + case TYPE_U8: return TYPE_S8; + default: + return ty; + } +} + const ValueRef *ValueRef::getIndirect(int dim) const { return isIndirect(dim) ? &insn->src(indirect[dim]) : NULL; diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp index 5bc3a45077..8613d7f2ef 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp @@ -915,57 +915,129 @@ class AlgebraicOpt : public Pass private: virtual bool visit(BasicBlock *); - void handleADD(Instruction *); + void handleABS(Instruction *); + bool handleADD(Instruction *); + bool tryADDToMADOrSAD(Instruction *, operation toOp); void handleMINMAX(Instruction *); void handleRCP(Instruction *); void handleSLCT(Instruction *); void handleLOGOP(Instruction *); void handleCVT(Instruction *); + + BuildUtil bld; }; void +AlgebraicOpt::handleABS(Instruction *abs) +{ + Instruction *sub = abs->getSrc(0)->getInsn(); + DataType ty; + if (!sub || + !prog->getTarget()->isOpSupported(OP_SAD, abs->dType)) + return; + // expect not to have mods yet, if we do, bail + if (sub->src(0).mod || sub->src(1).mod) + return; + // hidden conversion ? + ty = intTypeToSigned(sub->dType); + if (abs->dType != abs->sType || ty != abs->sType) + return; + + if ((sub->op != OP_ADD && sub->op != OP_SUB) || + sub->src(0).getFile() != FILE_GPR || sub->src(0).mod || + sub->src(1).getFile() != FILE_GPR || sub->src(1).mod) + return; + + Value *src0 = sub->getSrc(0); + Value *src1 = sub->getSrc(1); + + if (sub->op == OP_ADD) { + Instruction *neg = sub->getSrc(1)->getInsn(); + if (neg && neg->op != OP_NEG) { + neg = sub->getSrc(0)->getInsn(); + src0 = sub->getSrc(1); + } + if (!neg || neg->op != OP_NEG || + neg->dType != neg->sType || neg->sType != ty) + return; + src1 = neg->getSrc(0); + } + + // found ABS(SUB)) + abs->moveSources(1, 2); // move sources >=1 up by 2 + abs->op = OP_SAD; + abs->setType(sub->dType); + abs->setSrc(0, src0); + abs->setSrc(1, src1); + bld.setPosition(abs, false); + abs->setSrc(2, bld.loadImm(bld.getSSA(typeSizeof(ty)), 0)); +} + +bool AlgebraicOpt::handleADD(Instruction *add) { Value *src0 = add->getSrc(0); Value *src1 = add->getSrc(1); + + if (src0->reg.file != FILE_GPR || src1->reg.file != FILE_GPR) + return false; + + bool changed = false; + if (!changed && prog->getTarget()->isOpSupported(OP_MAD, add->dType)) + changed = tryADDToMADOrSAD(add, OP_MAD); + if (!changed && prog->getTarget()->isOpSupported(OP_SAD, add->dType)) + changed = tryADDToMADOrSAD(add, OP_SAD); + return changed; +} + +// ADD(SAD(a,b,0), c) -> SAD(a,b,c) +// ADD(MUL(a,b), c) -> MAD(a,b,c) +bool +AlgebraicOpt::tryADDToMADOrSAD(Instruction *add, operation toOp) +{ + Value *src0 = add->getSrc(0); + Value *src1 = add->getSrc(1); Value *src; int s; + const operation srcOp = toOp == OP_SAD ? OP_SAD : OP_MUL; + const Modifier modBad = Modifier(~((toOp == OP_MAD) ? NV50_IR_MOD_NEG : 0)); Modifier mod[4]; - if (!prog->getTarget()->isOpSupported(OP_MAD, add->dType)) - return; - - if (src0->reg.file != FILE_GPR || src1->reg.file != FILE_GPR) - return; - if (src0->refCount() == 1 && - src0->getUniqueInsn() && src0->getUniqueInsn()->op == OP_MUL) + src0->getUniqueInsn() && src0->getUniqueInsn()->op == srcOp) s = 0; else if (src1->refCount() == 1 && - src1->getUniqueInsn() && src1->getUniqueInsn()->op == OP_MUL) + src1->getUniqueInsn() && src1->getUniqueInsn()->op == srcOp) s = 1; else - return; + return false; if ((src0->getUniqueInsn() && src0->getUniqueInsn()->bb != add->bb) || (src1->getUniqueInsn() && src1->getUniqueInsn()->bb != add->bb)) - return; + return false; src = add->getSrc(s); if (src->getInsn()->postFactor) - return; + return false; + if (toOp == OP_SAD) { + ImmediateValue imm; + if (!src->getInsn()->src(2).getImmediate(imm)) + return false; + if (!imm.isInteger(0)) + return false; + } mod[0] = add->src(0).mod; mod[1] = add->src(1).mod; mod[2] = src->getUniqueInsn()->src(0).mod; mod[3] = src->getUniqueInsn()->src(1).mod; - if (((mod[0] | mod[1]) | (mod[2] | mod[3])) & Modifier(~NV50_IR_MOD_NEG)) - return; + if (((mod[0] | mod[1]) | (mod[2] | mod[3])) & modBad) + return false; - add->op = OP_MAD; + add->op = toOp; add->subOp = src->getInsn()->subOp; // potentially mul-high add->setSrc(2, add->src(s ? 0 : 1)); @@ -974,6 +1046,8 @@ AlgebraicOpt::handleADD(Instruction *add) add->src(0).mod = mod[2] ^ mod[s]; add->setSrc(1, src->getInsn()->getSrc(1)); add->src(1).mod = mod[3]; + + return true; } void @@ -1140,6 +1214,9 @@ AlgebraicOpt::visit(BasicBlock *bb) for (Instruction *i = bb->getEntry(); i; i = next) { next = i->next; switch (i->op) { + case OP_ABS: + handleABS(i); + break; case OP_ADD: handleADD(i); break; diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp index 026a6a0ee9..2ca4979dc7 100644 --- a/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp +++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp @@ -87,6 +87,7 @@ private: void emitUMUL(const Instruction *); void emitFMUL(const Instruction *); void emitIMAD(const Instruction *); + void emitISAD(const Instruction *); void emitFMAD(const Instruction *); void emitNOT(Instruction *); @@ -621,6 +622,18 @@ CodeEmitterNVC0::emitIMAD(const Instruction *i) } void +CodeEmitterNVC0::emitISAD(const Instruction *i) +{ + assert(i->dType == TYPE_S32 || i->dType == TYPE_U32); + assert(i->encSize == 8); + + emitForm_A(i, HEX64(38000000, 00000003)); + + if (i->dType == TYPE_S32) + code[0] |= 1 << 5; +} + +void CodeEmitterNVC0::emitNOT(Instruction *i) { assert(i->encSize == 8); @@ -1608,6 +1621,9 @@ CodeEmitterNVC0::emitInstruction(Instruction *insn) else emitIMAD(insn); break; + case OP_SAD: + emitISAD(insn); + break; case OP_NOT: emitNOT(insn); break; diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp index ffa40dd93f..10c2d09d65 100644 --- a/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp +++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp @@ -456,7 +456,7 @@ TargetNVC0::isOpSupported(operation op, DataType ty) const { if ((op == OP_MAD || op == OP_FMA) && (ty != TYPE_F32)) return false; - if (op == OP_SAD && ty != TYPE_S32) + if (op == OP_SAD && ty != TYPE_S32 && ty != TYPE_U32) return false; if (op == OP_POW || op == OP_SQRT || op == OP_DIV || op == OP_MOD) return false; |