summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBenjamin Segovia <benjamin.segovia@intel.com>2012-11-09 20:50:58 -0800
committerBenjamin Segovia <benjamin.segovia@intel.com>2012-11-09 20:50:58 -0800
commitc422c2fa2a8a6f4e98f2328ec4441867c373b500 (patch)
tree858b48e7697ef967468162bb8f5203108ff1ce2e
parent3ddd475ba5bc4b795aed1f89d3f941b2eaa26eb6 (diff)
Started the boiler plate for barrier (and fences) instructions
Improved the comment on the instruction selection
-rw-r--r--backend/src/backend/gen_insn_selection.cpp30
-rw-r--r--backend/src/ir/instruction.cpp20
-rw-r--r--backend/src/ir/instruction.hpp10
-rw-r--r--backend/src/ir/instruction.hxx2
-rw-r--r--backend/src/llvm/llvm_gen_ocl_function.hxx5
-rw-r--r--backend/src/ocl_stdlib.h19
-rw-r--r--backend/src/ocl_stdlib_str.cpp19
7 files changed, 89 insertions, 16 deletions
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 2e091e9d..e525a675 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -57,6 +57,27 @@
* of the pattern): this creates a library of patterns that may be used in
* run-time.
*
+ * Predication / Masking and CFG linearization
+ * ===========================================
+ *
+ * The current version is based on an unfortunate choice. Basically, the problem
+ * to solve is how to map unstructured branches (i.e. regular gotos) onto Gen.
+ * Gen has a native support for structured branches (if/else/endif/while...) but
+ * nothing really native for unstructured branches.
+ *
+ * The idea we implemented is simple. We stole one flag register (here f0.0) to
+ * mask all the instructions (and only activate the proper SIMD lanes) and we
+ * use the CFG linearization technique to properly handle the control flow. This
+ * is not really good for one particular reason: Gen instructions must use the
+ * *same* flag register for the predicates (used for masking) and the
+ * conditional modifier (used as a destination for CMP). This leads to extra
+ * complications with compare instructions and select instructions. Basically,
+ * we need to insert extra MOVs.
+ *
+ * Also, there is some extra kludge to handle the predicates for JMPI.
+ *
+ * See TODO for a better idea for branching and masking
+ *
* TODO:
* =====
*
@@ -70,6 +91,15 @@
* matched with other instructions in the dominated block. This leads to the
* interesting approach which consists in traversing the dominator tree in post
* order
+ *
+ * About masking and branching, a much better idea (that I found later unfortunately)
+ * is to replace the use of the flag by uses of if/endif to enclose the basic
+ * block. So, instead of using predication, we use auto-masking. The very cool
+ * consequence is that we can reintegrate back the structured branches.
+ * Basically, we will be able to identify branches that can be mapped to
+ * structured branches and mix nicely unstructured branches (which will use
+ * jpmi, if/endif to mask the blocks) and structured branches (which are pretty
+ * fast)
*/
#include "backend/gen_insn_selection.hpp"
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
index 31f0fd0a..74124575 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -430,14 +430,14 @@ namespace ir {
Type type; //!< Type of the immediate
};
- class ALIGNED_INSTRUCTION FenceInstruction :
+ class ALIGNED_INSTRUCTION SyncInstruction :
public BasePolicy,
- public NSrcPolicy<FenceInstruction, 0>,
- public NDstPolicy<FenceInstruction, 0>
+ public NSrcPolicy<SyncInstruction, 0>,
+ public NDstPolicy<SyncInstruction, 0>
{
public:
- INLINE FenceInstruction(AddressSpace addrSpace) {
- this->opcode = OP_FENCE;
+ INLINE SyncInstruction(AddressSpace addrSpace) {
+ this->opcode = OP_SYNC;
this->addrSpace = addrSpace;
}
bool wellFormed(const Function &fn, std::string &why) const;
@@ -718,7 +718,7 @@ namespace ir {
}
// Nothing can go wrong here
- INLINE bool FenceInstruction::wellFormed(const Function &fn, std::string &whyNot) const
+ INLINE bool SyncInstruction::wellFormed(const Function &fn, std::string &whyNot) const
{
return true;
}
@@ -915,9 +915,9 @@ START_INTROSPECTION(StoreInstruction)
#include "ir/instruction.hxx"
END_INTROSPECTION(StoreInstruction)
-START_INTROSPECTION(FenceInstruction)
+START_INTROSPECTION(SyncInstruction)
#include "ir/instruction.hxx"
-END_INTROSPECTION(FenceInstruction)
+END_INTROSPECTION(SyncInstruction)
START_INTROSPECTION(LabelInstruction)
#include "ir/instruction.hxx"
@@ -1056,7 +1056,7 @@ END_FUNCTION(Instruction, Register)
bool Instruction::hasSideEffect(void) const {
return opcode == OP_STORE ||
opcode == OP_TYPED_WRITE ||
- opcode == OP_FENCE;
+ opcode == OP_SYNC;
}
#define DECL_MEM_FN(CLASS, RET, PROTOTYPE, CALL) \
@@ -1205,7 +1205,7 @@ DECL_MEM_FN(BranchInstruction, LabelIndex, getLabelIndex(void), getLabelIndex())
// FENCE
Instruction FENCE(AddressSpace space) {
- return internal::FenceInstruction(space).convert();
+ return internal::SyncInstruction(space).convert();
}
// LABEL
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
index 7034ae42..70889243 100644
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -347,10 +347,10 @@ namespace ir {
static bool isClassOf(const Instruction &insn);
};
- /*! Fence instructions are used to order loads and stores for a given memory
- * space
+ /*! Sync instructions are used to order loads and stores for a given memory
+ * space and/or to serialize threads at a given point in the program
*/
- class FenceInstruction : public Instruction {
+ class SyncInstruction : public Instruction {
public:
/*! Return true if the given instruction is an instance of this class */
static bool isClassOf(const Instruction &insn);
@@ -488,8 +488,8 @@ namespace ir {
Instruction TYPED_WRITE(void);
/*! sample TODO */
Instruction SAMPLE(void);
- /*! fence.space */
- Instruction FENCE(AddressSpace space);
+ /*! sync.space */
+ Instruction SYNC(AddressSpace space);
/*! label labelIndex */
Instruction LABEL(LabelIndex labelIndex);
diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx
index 57e6f03f..6aedc1f9 100644
--- a/backend/src/ir/instruction.hxx
+++ b/backend/src/ir/instruction.hxx
@@ -66,6 +66,6 @@ DECL_INSN(LOAD, LoadInstruction)
DECL_INSN(STORE, StoreInstruction)
DECL_INSN(TYPED_WRITE, TypedWriteInstruction)
DECL_INSN(SAMPLE, SampleInstruction)
-DECL_INSN(FENCE, FenceInstruction)
+DECL_INSN(SYNC, SyncInstruction)
DECL_INSN(LABEL, LabelInstruction)
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
index 6de7810e..551db3cb 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -31,6 +31,11 @@ DECL_LLVM_GEN_FUNCTION(RNDE, __gen_ocl_rnde)
DECL_LLVM_GEN_FUNCTION(RNDU, __gen_ocl_rndu)
DECL_LLVM_GEN_FUNCTION(RNDD, __gen_ocl_rndd)
+// Barrier function
+DECL_LLVM_GEN_FUNCTION(LBARRIER, __gen_ocl_barrier_local)
+DECL_LLVM_GEN_FUNCTION(GBARRIER, __gen_ocl_barrier_global)
+DECL_LLVM_GEN_FUNCTION(LGBARRIER, __gen_ocl_barrier_local_and_global)
+
// To force SIMD8/16 compilation
DECL_LLVM_GEN_FUNCTION(FORCE_SIMD8, __gen_ocl_force_simd8)
DECL_LLVM_GEN_FUNCTION(FORCE_SIMD16, __gen_ocl_force_simd16)
diff --git a/backend/src/ocl_stdlib.h b/backend/src/ocl_stdlib.h
index 561599ec..bf22af5f 100644
--- a/backend/src/ocl_stdlib.h
+++ b/backend/src/ocl_stdlib.h
@@ -420,6 +420,25 @@ INLINE OVERLOADABLE float8 mix(float8 x, float8 y, float a) { return mix(x,y,(fl
INLINE OVERLOADABLE float16 mix(float16 x, float16 y, float a) { return mix(x,y,(float16)(a));}
/////////////////////////////////////////////////////////////////////////////
+// Synchronization functions
+/////////////////////////////////////////////////////////////////////////////
+#define CLK_LOCAL_MEM_FENCE (1 << 0)
+#define CLK_GLOBAL_MEM_FENCE (1 << 1)
+
+extern void __gen_ocl_barrier_local(void);
+extern void __gen_ocl_barrier_global(void);
+extern void __gen_ocl_barrier_local_and_global(void);
+
+INLINE void barrier(cl_mem_fence_flags flags) {
+ if (flags == CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE)
+ __gen_ocl_barrier_local_and_global();
+ else if (flags == CLK_LOCAL_MEM_FENCE)
+ __gen_ocl_barrier_local();
+ else if (flags == CLK_GLOBAL_MEM_FENCE)
+ __gen_ocl_barrier_global();
+}
+
+/////////////////////////////////////////////////////////////////////////////
// Force the compilation to SIMD8 or SIMD16
/////////////////////////////////////////////////////////////////////////////
diff --git a/backend/src/ocl_stdlib_str.cpp b/backend/src/ocl_stdlib_str.cpp
index 32291786..9828d962 100644
--- a/backend/src/ocl_stdlib_str.cpp
+++ b/backend/src/ocl_stdlib_str.cpp
@@ -423,6 +423,25 @@ std::string ocl_stdlib_str =
"INLINE OVERLOADABLE float16 mix(float16 x, float16 y, float a) { return mix(x,y,(float16)(a));}\n"
"\n"
"/////////////////////////////////////////////////////////////////////////////\n"
+"// Synchronization functions\n"
+"/////////////////////////////////////////////////////////////////////////////\n"
+"#define CLK_LOCAL_MEM_FENCE (1 << 0)\n"
+"#define CLK_GLOBAL_MEM_FENCE (1 << 1)\n"
+"\n"
+"extern void __gen_ocl_barrier_local(void);\n"
+"extern void __gen_ocl_barrier_global(void);\n"
+"extern void __gen_ocl_barrier_local_and_global(void);\n"
+"\n"
+"INLINE void barrier(cl_mem_fence_flags flags) {\n"
+" if (flags == CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE)\n"
+" __gen_ocl_barrier_local_and_global();\n"
+" else if (flags == CLK_LOCAL_MEM_FENCE)\n"
+" __gen_ocl_barrier_local();\n"
+" else if (flags == CLK_GLOBAL_MEM_FENCE)\n"
+" __gen_ocl_barrier_global();\n"
+"}\n"
+"\n"
+"/////////////////////////////////////////////////////////////////////////////\n"
"// Force the compilation to SIMD8 or SIMD16\n"
"/////////////////////////////////////////////////////////////////////////////\n"
"\n"