diff options
author | Pan Xiuli <xiuli.pan@intel.com> | 2015-11-24 15:51:41 +0800 |
---|---|---|
committer | Yang Rong <rong.r.yang@intel.com> | 2015-11-24 16:58:54 +0800 |
commit | e7189e6f2b85eae01416e9e728b09241701dd78a (patch) | |
tree | ea5352e2e16fcdc43626ce0ea29130e6cceaed0a | |
parent | 6b13eaaa93b3e1089b17e7996624fa3017e8a60e (diff) |
Backend: add debugwait function
Use wait function to extend a debug function:
void debugwait(void)
This function can hang the gpu unless gpu reset
or host send something to let it go.
EXTREMELY DANGEROUS for machines turn off hangcheck
v2:
Fix some bugs, and add setting predicate and execwidth,
also modify some inst scheduling
v3:
Add push and pop in insturction selection, and set nomask
with execwidth.
v4:
Fix barrier predicate setting bugs, and rebase the patch
Signed-off-by: Pan Xiuli <xiuli.pan@intel.com>
Reviewed-by: Yang Rong <rong.r.yang@intel.com>
-rw-r--r-- | backend/src/backend/gen_context.cpp | 3 | ||||
-rw-r--r-- | backend/src/backend/gen_encoder.cpp | 1 | ||||
-rw-r--r-- | backend/src/backend/gen_insn_scheduling.cpp | 3 | ||||
-rw-r--r-- | backend/src/backend/gen_insn_selection.cpp | 28 | ||||
-rw-r--r-- | backend/src/backend/gen_insn_selection.hpp | 1 | ||||
-rw-r--r-- | backend/src/ir/instruction.cpp | 32 | ||||
-rw-r--r-- | backend/src/ir/instruction.hpp | 10 | ||||
-rw-r--r-- | backend/src/ir/instruction.hxx | 1 | ||||
-rw-r--r-- | backend/src/libocl/include/ocl_sync.h | 1 | ||||
-rw-r--r-- | backend/src/libocl/src/ocl_barrier.ll | 6 | ||||
-rw-r--r-- | backend/src/libocl/src/ocl_sync.cl | 1 | ||||
-rw-r--r-- | backend/src/llvm/llvm_gen_backend.cpp | 6 | ||||
-rw-r--r-- | backend/src/llvm/llvm_gen_ocl_function.hxx | 3 |
13 files changed, 91 insertions, 5 deletions
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index 41fe72dc..43fa7fa0 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -1804,7 +1804,7 @@ namespace gbe } void GenContext::emitWaitInstruction(const SelectionInstruction &insn) { - p->WAIT(); + p->WAIT(insn.extra.waitType); } void GenContext::emitBarrierInstruction(const SelectionInstruction &insn) { @@ -1829,6 +1829,7 @@ namespace gbe p->BARRIER(src); p->curr.execWidth = 1; // Now we wait for the other threads + p->curr.predicate = GEN_PREDICATE_NONE; p->WAIT(); p->pop(); } diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp index 1ad4f012..7c4357ab 100644 --- a/backend/src/backend/gen_encoder.cpp +++ b/backend/src/backend/gen_encoder.cpp @@ -996,6 +996,7 @@ namespace gbe void GenEncoder::WAIT(uint32_t n) { GenNativeInstruction *insn = this->next(GEN_OPCODE_WAIT); + GBE_ASSERT(curr.predicate == GEN_PREDICATE_NONE); GenRegister src = GenRegister::notification0(n); this->setDst(insn, GenRegister::null()); this->setSrc0(insn, src); diff --git a/backend/src/backend/gen_insn_scheduling.cpp b/backend/src/backend/gen_insn_scheduling.cpp index 43f67c9a..8111e0c5 100644 --- a/backend/src/backend/gen_insn_scheduling.cpp +++ b/backend/src/backend/gen_insn_scheduling.cpp @@ -591,7 +591,8 @@ namespace gbe || node->insn.opcode == SEL_OP_READ_ARF || node->insn.opcode == SEL_OP_BARRIER || node->insn.opcode == SEL_OP_CALC_TIMESTAMP - || node->insn.opcode == SEL_OP_STORE_PROFILING) + || node->insn.opcode == SEL_OP_STORE_PROFILING + || node->insn.opcode == SEL_OP_WAIT) tracker.makeBarrier(insnID, insnNum); } diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index e1cf6f74..ed7514c1 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -619,7 +619,7 @@ namespace gbe /*! No-op */ void NOP(void); /*! Wait instruction (used for the barrier) */ - void WAIT(void); + void WAIT(uint32_t n = 0); /*! Atomic instruction */ void ATOMIC(Reg dst, uint32_t function, uint32_t srcNum, Reg src0, Reg src1, Reg src2, GenRegister bti, vector<GenRegister> temps); /*! Read 64 bits float/int array */ @@ -1293,7 +1293,11 @@ namespace gbe void Selection::Opaque::EOT(void) { this->appendInsn(SEL_OP_EOT, 0, 0); } void Selection::Opaque::NOP(void) { this->appendInsn(SEL_OP_NOP, 0, 0); } - void Selection::Opaque::WAIT(void) { this->appendInsn(SEL_OP_WAIT, 0, 0); } + void Selection::Opaque::WAIT(uint32_t n) + { + SelectionInstruction *insn = this->appendInsn(SEL_OP_WAIT, 0, 0); + insn->extra.waitType = n; + } void Selection::Opaque::READ64(Reg addr, const GenRegister *dst, @@ -3465,6 +3469,25 @@ namespace gbe DECL_CTOR(SyncInstruction, 1,1); }; + /*! Wait instruction */ + DECL_PATTERN(WaitInstruction) + { + INLINE bool emitOne(Selection::Opaque &sel, const ir::WaitInstruction &insn, bool &markChildren) const + { + using namespace ir; + // Debugwait will use reg 1, which is different from barrier + sel.push(); + sel.curr.noMask = 1; + sel.curr.execWidth = 1; + sel.curr.predicate = GEN_PREDICATE_NONE; + sel.WAIT(1); + sel.pop(); + return true; + } + + DECL_CTOR(WaitInstruction, 1,1); + }; + INLINE uint32_t getByteScatterGatherSize(Selection::Opaque &sel, ir::Type type) { using namespace ir; switch (type) { @@ -5978,6 +6001,7 @@ namespace gbe this->insert<CalcTimestampInstructionPattern>(); this->insert<StoreProfilingInstructionPattern>(); this->insert<NullaryInstructionPattern>(); + this->insert<WaitInstructionPattern>(); // Sort all the patterns with the number of instructions they output for (uint32_t op = 0; op < ir::OP_INVALID; ++op) diff --git a/backend/src/backend/gen_insn_selection.hpp b/backend/src/backend/gen_insn_selection.hpp index 32e5ce2a..0070ac20 100644 --- a/backend/src/backend/gen_insn_selection.hpp +++ b/backend/src/backend/gen_insn_selection.hpp @@ -136,6 +136,7 @@ namespace gbe uint16_t lut_sub:2; }; uint32_t barrierType; + uint32_t waitType; bool longjmp; uint32_t indirect_offset; struct { diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp index 6ed0b891..c7facfbd 100644 --- a/backend/src/ir/instruction.cpp +++ b/backend/src/ir/instruction.cpp @@ -949,6 +949,21 @@ namespace ir { Register dst[0], src[0]; }; + /*! Wait instructions */ + class ALIGNED_INSTRUCTION WaitInstruction : + public BasePolicy, + public NSrcPolicy<WaitInstruction, 0>, + public NDstPolicy<WaitInstruction, 0> + { + public: + INLINE WaitInstruction() { + this->opcode = OP_WAIT; + } + INLINE bool wellFormed(const Function &fn, std::string &why) const; + INLINE void out(std::ostream &out, const Function &fn) const; + Register dst[0], src[0]; + }; + #undef ALIGNED_INSTRUCTION ///////////////////////////////////////////////////////////////////////// @@ -1247,6 +1262,8 @@ namespace ir { { return true; } INLINE bool GetImageInfoInstruction::wellFormed(const Function &fn, std::string &why) const { return true; } + INLINE bool WaitInstruction::wellFormed(const Function &fn, std::string &why) const + { return true; } // Ensure that types and register family match @@ -1531,6 +1548,9 @@ namespace ir { out << "." << syncStr[field]; } + INLINE void WaitInstruction::out(std::ostream &out, const Function &fn) const { + this->outOpcode(out); + } } /* namespace internal */ @@ -1680,6 +1700,10 @@ START_INTROSPECTION(LabelInstruction) #include "ir/instruction.hxx" END_INTROSPECTION(LabelInstruction) +START_INTROSPECTION(WaitInstruction) +#include "ir/instruction.hxx" +END_INTROSPECTION(WaitInstruction) + START_INTROSPECTION(VmeInstruction) #include "ir/instruction.hxx" END_INTROSPECTION(VmeInstruction) @@ -1829,7 +1853,8 @@ END_FUNCTION(Instruction, Register) opcode == OP_SYNC || opcode == OP_ATOMIC || opcode == OP_CALC_TIMESTAMP || - opcode == OP_STORE_PROFILING; + opcode == OP_STORE_PROFILING || + opcode == OP_WAIT; } #define DECL_MEM_FN(CLASS, RET, PROTOTYPE, CALL) \ @@ -2174,6 +2199,11 @@ DECL_MEM_FN(MemInstruction, void, setBtiReg(Register reg), setBtiReg(reg)) return internal::StoreProfilingInstruction(bti, profilingType).convert(); } + // WAIT + Instruction WAIT(void) { + return internal::WaitInstruction().convert(); + } + std::ostream &operator<< (std::ostream &out, const Instruction &insn) { const Function &fn = insn.getFunction(); const BasicBlock *bb = insn.getParent(); diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp index 7862bbf5..76ffd778 100644 --- a/backend/src/ir/instruction.hpp +++ b/backend/src/ir/instruction.hpp @@ -576,6 +576,13 @@ namespace ir { static bool isClassOf(const Instruction &insn); }; + /*! Indirect Move instruction */ + class WaitInstruction : public Instruction { + public: + /*! Return true if the given instruction is an instance of this class */ + static bool isClassOf(const Instruction &insn); + }; + /*! Specialize the instruction. Also performs typechecking first based on the * opcode. Crashes if it fails */ @@ -797,6 +804,9 @@ namespace ir { Instruction CALC_TIMESTAMP(uint32_t pointNum, uint32_t tsType); /*! calculate the execute timestamp for profiling */ Instruction STORE_PROFILING(uint32_t bti, uint32_t Type); + /*! wait */ + Instruction WAIT(void); + } /* namespace ir */ } /* namespace gbe */ diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx index 12827471..efdd4c5e 100644 --- a/backend/src/ir/instruction.hxx +++ b/backend/src/ir/instruction.hxx @@ -109,3 +109,4 @@ DECL_INSN(ELSE, BranchInstruction) DECL_INSN(WHILE, BranchInstruction) DECL_INSN(CALC_TIMESTAMP, CalcTimestampInstruction) DECL_INSN(STORE_PROFILING, StoreProfilingInstruction) +DECL_INSN(WAIT, WaitInstruction) diff --git a/backend/src/libocl/include/ocl_sync.h b/backend/src/libocl/include/ocl_sync.h index 18090d52..1d90caee 100644 --- a/backend/src/libocl/include/ocl_sync.h +++ b/backend/src/libocl/include/ocl_sync.h @@ -31,5 +31,6 @@ OVERLOADABLE void barrier(cl_mem_fence_flags flags); void mem_fence(cl_mem_fence_flags flags); void read_mem_fence(cl_mem_fence_flags flags); void write_mem_fence(cl_mem_fence_flags flags); +OVERLOADABLE void debugwait(void); #endif /* __OCL_SYNC_H__ */ diff --git a/backend/src/libocl/src/ocl_barrier.ll b/backend/src/libocl/src/ocl_barrier.ll index 2765a714..9416f801 100644 --- a/backend/src/libocl/src/ocl_barrier.ll +++ b/backend/src/libocl/src/ocl_barrier.ll @@ -12,6 +12,7 @@ declare i32 @_get_global_mem_fence() nounwind alwaysinline declare void @__gen_ocl_barrier_local() nounwind alwaysinline noduplicate declare void @__gen_ocl_barrier_global() nounwind alwaysinline noduplicate declare void @__gen_ocl_barrier_local_and_global() nounwind alwaysinline noduplicate +declare void @__gen_ocl_debugwait() nounwind alwaysinline noduplicate define void @_Z7barrierj(i32 %flags) nounwind noduplicate alwaysinline { %1 = icmp eq i32 %flags, 3 @@ -40,3 +41,8 @@ barrier_global: done: ret void } + +define void @_Z9debugwaitv() nounwind noduplicate alwaysinline { + call void @__gen_ocl_debugwait() + ret void +} diff --git a/backend/src/libocl/src/ocl_sync.cl b/backend/src/libocl/src/ocl_sync.cl index d008639a..70d6f262 100644 --- a/backend/src/libocl/src/ocl_sync.cl +++ b/backend/src/libocl/src/ocl_sync.cl @@ -20,6 +20,7 @@ void __gen_ocl_barrier_local(void); void __gen_ocl_barrier_global(void); void __gen_ocl_barrier_local_and_global(void); +void __gen_ocl_debugwait(void); void mem_fence(cl_mem_fence_flags flags) { } diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index d1b6f986..a0b22626 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -3601,6 +3601,7 @@ namespace gbe case GEN_OCL_PRINTF: case GEN_OCL_CALC_TIMESTAMP: case GEN_OCL_STORE_PROFILING: + case GEN_OCL_DEBUGWAIT: break; case GEN_OCL_NOT_FOUND: default: @@ -4394,6 +4395,11 @@ namespace gbe ctx.SIMD_SHUFFLE(getType(ctx, I.getType()), dst, src0, src1); break; } + case GEN_OCL_DEBUGWAIT: + { + ctx.WAIT(); + break; + } default: break; } } diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx index 65bf0c1c..d0e36144 100644 --- a/backend/src/llvm/llvm_gen_ocl_function.hxx +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx @@ -177,3 +177,6 @@ DECL_LLVM_GEN_FUNCTION(PRINTF, __gen_ocl_printf) DECL_LLVM_GEN_FUNCTION(CALC_TIMESTAMP, __gen_ocl_calc_timestamp) // store profiling info to the mem. DECL_LLVM_GEN_FUNCTION(STORE_PROFILING, __gen_ocl_store_profiling) + +// debug wait function +DECL_LLVM_GEN_FUNCTION(DEBUGWAIT, __gen_ocl_debugwait) |