summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPan Xiuli <xiuli.pan@intel.com>2015-11-24 15:51:41 +0800
committerYang Rong <rong.r.yang@intel.com>2015-11-24 16:58:54 +0800
commite7189e6f2b85eae01416e9e728b09241701dd78a (patch)
treeea5352e2e16fcdc43626ce0ea29130e6cceaed0a
parent6b13eaaa93b3e1089b17e7996624fa3017e8a60e (diff)
Backend: add debugwait function
Use wait function to extend a debug function: void debugwait(void) This function can hang the gpu unless gpu reset or host send something to let it go. EXTREMELY DANGEROUS for machines turn off hangcheck v2: Fix some bugs, and add setting predicate and execwidth, also modify some inst scheduling v3: Add push and pop in insturction selection, and set nomask with execwidth. v4: Fix barrier predicate setting bugs, and rebase the patch Signed-off-by: Pan Xiuli <xiuli.pan@intel.com> Reviewed-by: Yang Rong <rong.r.yang@intel.com>
-rw-r--r--backend/src/backend/gen_context.cpp3
-rw-r--r--backend/src/backend/gen_encoder.cpp1
-rw-r--r--backend/src/backend/gen_insn_scheduling.cpp3
-rw-r--r--backend/src/backend/gen_insn_selection.cpp28
-rw-r--r--backend/src/backend/gen_insn_selection.hpp1
-rw-r--r--backend/src/ir/instruction.cpp32
-rw-r--r--backend/src/ir/instruction.hpp10
-rw-r--r--backend/src/ir/instruction.hxx1
-rw-r--r--backend/src/libocl/include/ocl_sync.h1
-rw-r--r--backend/src/libocl/src/ocl_barrier.ll6
-rw-r--r--backend/src/libocl/src/ocl_sync.cl1
-rw-r--r--backend/src/llvm/llvm_gen_backend.cpp6
-rw-r--r--backend/src/llvm/llvm_gen_ocl_function.hxx3
13 files changed, 91 insertions, 5 deletions
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 41fe72dc..43fa7fa0 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -1804,7 +1804,7 @@ namespace gbe
}
void GenContext::emitWaitInstruction(const SelectionInstruction &insn) {
- p->WAIT();
+ p->WAIT(insn.extra.waitType);
}
void GenContext::emitBarrierInstruction(const SelectionInstruction &insn) {
@@ -1829,6 +1829,7 @@ namespace gbe
p->BARRIER(src);
p->curr.execWidth = 1;
// Now we wait for the other threads
+ p->curr.predicate = GEN_PREDICATE_NONE;
p->WAIT();
p->pop();
}
diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp
index 1ad4f012..7c4357ab 100644
--- a/backend/src/backend/gen_encoder.cpp
+++ b/backend/src/backend/gen_encoder.cpp
@@ -996,6 +996,7 @@ namespace gbe
void GenEncoder::WAIT(uint32_t n) {
GenNativeInstruction *insn = this->next(GEN_OPCODE_WAIT);
+ GBE_ASSERT(curr.predicate == GEN_PREDICATE_NONE);
GenRegister src = GenRegister::notification0(n);
this->setDst(insn, GenRegister::null());
this->setSrc0(insn, src);
diff --git a/backend/src/backend/gen_insn_scheduling.cpp b/backend/src/backend/gen_insn_scheduling.cpp
index 43f67c9a..8111e0c5 100644
--- a/backend/src/backend/gen_insn_scheduling.cpp
+++ b/backend/src/backend/gen_insn_scheduling.cpp
@@ -591,7 +591,8 @@ namespace gbe
|| node->insn.opcode == SEL_OP_READ_ARF
|| node->insn.opcode == SEL_OP_BARRIER
|| node->insn.opcode == SEL_OP_CALC_TIMESTAMP
- || node->insn.opcode == SEL_OP_STORE_PROFILING)
+ || node->insn.opcode == SEL_OP_STORE_PROFILING
+ || node->insn.opcode == SEL_OP_WAIT)
tracker.makeBarrier(insnID, insnNum);
}
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index e1cf6f74..ed7514c1 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -619,7 +619,7 @@ namespace gbe
/*! No-op */
void NOP(void);
/*! Wait instruction (used for the barrier) */
- void WAIT(void);
+ void WAIT(uint32_t n = 0);
/*! Atomic instruction */
void ATOMIC(Reg dst, uint32_t function, uint32_t srcNum, Reg src0, Reg src1, Reg src2, GenRegister bti, vector<GenRegister> temps);
/*! Read 64 bits float/int array */
@@ -1293,7 +1293,11 @@ namespace gbe
void Selection::Opaque::EOT(void) { this->appendInsn(SEL_OP_EOT, 0, 0); }
void Selection::Opaque::NOP(void) { this->appendInsn(SEL_OP_NOP, 0, 0); }
- void Selection::Opaque::WAIT(void) { this->appendInsn(SEL_OP_WAIT, 0, 0); }
+ void Selection::Opaque::WAIT(uint32_t n)
+ {
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_WAIT, 0, 0);
+ insn->extra.waitType = n;
+ }
void Selection::Opaque::READ64(Reg addr,
const GenRegister *dst,
@@ -3465,6 +3469,25 @@ namespace gbe
DECL_CTOR(SyncInstruction, 1,1);
};
+ /*! Wait instruction */
+ DECL_PATTERN(WaitInstruction)
+ {
+ INLINE bool emitOne(Selection::Opaque &sel, const ir::WaitInstruction &insn, bool &markChildren) const
+ {
+ using namespace ir;
+ // Debugwait will use reg 1, which is different from barrier
+ sel.push();
+ sel.curr.noMask = 1;
+ sel.curr.execWidth = 1;
+ sel.curr.predicate = GEN_PREDICATE_NONE;
+ sel.WAIT(1);
+ sel.pop();
+ return true;
+ }
+
+ DECL_CTOR(WaitInstruction, 1,1);
+ };
+
INLINE uint32_t getByteScatterGatherSize(Selection::Opaque &sel, ir::Type type) {
using namespace ir;
switch (type) {
@@ -5978,6 +6001,7 @@ namespace gbe
this->insert<CalcTimestampInstructionPattern>();
this->insert<StoreProfilingInstructionPattern>();
this->insert<NullaryInstructionPattern>();
+ this->insert<WaitInstructionPattern>();
// Sort all the patterns with the number of instructions they output
for (uint32_t op = 0; op < ir::OP_INVALID; ++op)
diff --git a/backend/src/backend/gen_insn_selection.hpp b/backend/src/backend/gen_insn_selection.hpp
index 32e5ce2a..0070ac20 100644
--- a/backend/src/backend/gen_insn_selection.hpp
+++ b/backend/src/backend/gen_insn_selection.hpp
@@ -136,6 +136,7 @@ namespace gbe
uint16_t lut_sub:2;
};
uint32_t barrierType;
+ uint32_t waitType;
bool longjmp;
uint32_t indirect_offset;
struct {
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
index 6ed0b891..c7facfbd 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -949,6 +949,21 @@ namespace ir {
Register dst[0], src[0];
};
+ /*! Wait instructions */
+ class ALIGNED_INSTRUCTION WaitInstruction :
+ public BasePolicy,
+ public NSrcPolicy<WaitInstruction, 0>,
+ public NDstPolicy<WaitInstruction, 0>
+ {
+ public:
+ INLINE WaitInstruction() {
+ this->opcode = OP_WAIT;
+ }
+ INLINE bool wellFormed(const Function &fn, std::string &why) const;
+ INLINE void out(std::ostream &out, const Function &fn) const;
+ Register dst[0], src[0];
+ };
+
#undef ALIGNED_INSTRUCTION
/////////////////////////////////////////////////////////////////////////
@@ -1247,6 +1262,8 @@ namespace ir {
{ return true; }
INLINE bool GetImageInfoInstruction::wellFormed(const Function &fn, std::string &why) const
{ return true; }
+ INLINE bool WaitInstruction::wellFormed(const Function &fn, std::string &why) const
+ { return true; }
// Ensure that types and register family match
@@ -1531,6 +1548,9 @@ namespace ir {
out << "." << syncStr[field];
}
+ INLINE void WaitInstruction::out(std::ostream &out, const Function &fn) const {
+ this->outOpcode(out);
+ }
} /* namespace internal */
@@ -1680,6 +1700,10 @@ START_INTROSPECTION(LabelInstruction)
#include "ir/instruction.hxx"
END_INTROSPECTION(LabelInstruction)
+START_INTROSPECTION(WaitInstruction)
+#include "ir/instruction.hxx"
+END_INTROSPECTION(WaitInstruction)
+
START_INTROSPECTION(VmeInstruction)
#include "ir/instruction.hxx"
END_INTROSPECTION(VmeInstruction)
@@ -1829,7 +1853,8 @@ END_FUNCTION(Instruction, Register)
opcode == OP_SYNC ||
opcode == OP_ATOMIC ||
opcode == OP_CALC_TIMESTAMP ||
- opcode == OP_STORE_PROFILING;
+ opcode == OP_STORE_PROFILING ||
+ opcode == OP_WAIT;
}
#define DECL_MEM_FN(CLASS, RET, PROTOTYPE, CALL) \
@@ -2174,6 +2199,11 @@ DECL_MEM_FN(MemInstruction, void, setBtiReg(Register reg), setBtiReg(reg))
return internal::StoreProfilingInstruction(bti, profilingType).convert();
}
+ // WAIT
+ Instruction WAIT(void) {
+ return internal::WaitInstruction().convert();
+ }
+
std::ostream &operator<< (std::ostream &out, const Instruction &insn) {
const Function &fn = insn.getFunction();
const BasicBlock *bb = insn.getParent();
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
index 7862bbf5..76ffd778 100644
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -576,6 +576,13 @@ namespace ir {
static bool isClassOf(const Instruction &insn);
};
+ /*! Indirect Move instruction */
+ class WaitInstruction : public Instruction {
+ public:
+ /*! Return true if the given instruction is an instance of this class */
+ static bool isClassOf(const Instruction &insn);
+ };
+
/*! Specialize the instruction. Also performs typechecking first based on the
* opcode. Crashes if it fails
*/
@@ -797,6 +804,9 @@ namespace ir {
Instruction CALC_TIMESTAMP(uint32_t pointNum, uint32_t tsType);
/*! calculate the execute timestamp for profiling */
Instruction STORE_PROFILING(uint32_t bti, uint32_t Type);
+ /*! wait */
+ Instruction WAIT(void);
+
} /* namespace ir */
} /* namespace gbe */
diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx
index 12827471..efdd4c5e 100644
--- a/backend/src/ir/instruction.hxx
+++ b/backend/src/ir/instruction.hxx
@@ -109,3 +109,4 @@ DECL_INSN(ELSE, BranchInstruction)
DECL_INSN(WHILE, BranchInstruction)
DECL_INSN(CALC_TIMESTAMP, CalcTimestampInstruction)
DECL_INSN(STORE_PROFILING, StoreProfilingInstruction)
+DECL_INSN(WAIT, WaitInstruction)
diff --git a/backend/src/libocl/include/ocl_sync.h b/backend/src/libocl/include/ocl_sync.h
index 18090d52..1d90caee 100644
--- a/backend/src/libocl/include/ocl_sync.h
+++ b/backend/src/libocl/include/ocl_sync.h
@@ -31,5 +31,6 @@ OVERLOADABLE void barrier(cl_mem_fence_flags flags);
void mem_fence(cl_mem_fence_flags flags);
void read_mem_fence(cl_mem_fence_flags flags);
void write_mem_fence(cl_mem_fence_flags flags);
+OVERLOADABLE void debugwait(void);
#endif /* __OCL_SYNC_H__ */
diff --git a/backend/src/libocl/src/ocl_barrier.ll b/backend/src/libocl/src/ocl_barrier.ll
index 2765a714..9416f801 100644
--- a/backend/src/libocl/src/ocl_barrier.ll
+++ b/backend/src/libocl/src/ocl_barrier.ll
@@ -12,6 +12,7 @@ declare i32 @_get_global_mem_fence() nounwind alwaysinline
declare void @__gen_ocl_barrier_local() nounwind alwaysinline noduplicate
declare void @__gen_ocl_barrier_global() nounwind alwaysinline noduplicate
declare void @__gen_ocl_barrier_local_and_global() nounwind alwaysinline noduplicate
+declare void @__gen_ocl_debugwait() nounwind alwaysinline noduplicate
define void @_Z7barrierj(i32 %flags) nounwind noduplicate alwaysinline {
%1 = icmp eq i32 %flags, 3
@@ -40,3 +41,8 @@ barrier_global:
done:
ret void
}
+
+define void @_Z9debugwaitv() nounwind noduplicate alwaysinline {
+ call void @__gen_ocl_debugwait()
+ ret void
+}
diff --git a/backend/src/libocl/src/ocl_sync.cl b/backend/src/libocl/src/ocl_sync.cl
index d008639a..70d6f262 100644
--- a/backend/src/libocl/src/ocl_sync.cl
+++ b/backend/src/libocl/src/ocl_sync.cl
@@ -20,6 +20,7 @@
void __gen_ocl_barrier_local(void);
void __gen_ocl_barrier_global(void);
void __gen_ocl_barrier_local_and_global(void);
+void __gen_ocl_debugwait(void);
void mem_fence(cl_mem_fence_flags flags) {
}
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index d1b6f986..a0b22626 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -3601,6 +3601,7 @@ namespace gbe
case GEN_OCL_PRINTF:
case GEN_OCL_CALC_TIMESTAMP:
case GEN_OCL_STORE_PROFILING:
+ case GEN_OCL_DEBUGWAIT:
break;
case GEN_OCL_NOT_FOUND:
default:
@@ -4394,6 +4395,11 @@ namespace gbe
ctx.SIMD_SHUFFLE(getType(ctx, I.getType()), dst, src0, src1);
break;
}
+ case GEN_OCL_DEBUGWAIT:
+ {
+ ctx.WAIT();
+ break;
+ }
default: break;
}
}
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
index 65bf0c1c..d0e36144 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -177,3 +177,6 @@ DECL_LLVM_GEN_FUNCTION(PRINTF, __gen_ocl_printf)
DECL_LLVM_GEN_FUNCTION(CALC_TIMESTAMP, __gen_ocl_calc_timestamp)
// store profiling info to the mem.
DECL_LLVM_GEN_FUNCTION(STORE_PROFILING, __gen_ocl_store_profiling)
+
+// debug wait function
+DECL_LLVM_GEN_FUNCTION(DEBUGWAIT, __gen_ocl_debugwait)