diff options
author | Benjamin Segovia <benjamin.segovia@intel.com> | 2012-11-02 21:12:41 -0700 |
---|---|---|
committer | Benjamin Segovia <benjamin.segovia@intel.com> | 2012-11-02 21:12:41 -0700 |
commit | 96c13a11afdbbc61a8da60f2ec8d6527386541c2 (patch) | |
tree | b09c6cb13a81d92555ddee8f28e38e7ab7fa9f10 | |
parent | 2c49b307dfb9fca7943632d5b9de704c036da4ba (diff) |
- Added reference images for all the remaining tests that need them
- Added a lengthy comment on instruction selection
- Added some new patters to improve immediate matching
-rw-r--r-- | backend/src/backend/gen_insn_selection.cpp | 117 | ||||
-rw-r--r-- | kernels/compiler_clod_ref.bmp | bin | 0 -> 196662 bytes | |||
-rw-r--r-- | kernels/compiler_mandelbrot_alternate_ref.bmp | bin | 0 -> 196662 bytes | |||
-rw-r--r-- | kernels/compiler_mandelbrot_ref.bmp | bin | 0 -> 196662 bytes | |||
-rw-r--r-- | kernels/compiler_ribbon_ref.bmp | bin | 0 -> 196662 bytes | |||
-rw-r--r-- | utests/compiler_clod.cpp | 19 | ||||
-rw-r--r-- | utests/compiler_mandelbrot.cpp | 20 | ||||
-rw-r--r-- | utests/compiler_mandelbrot_alternate.cpp | 28 | ||||
-rw-r--r-- | utests/compiler_ribbon.cpp | 20 |
9 files changed, 154 insertions, 50 deletions
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 71175522..c429a96c 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -22,6 +22,56 @@ * \author Benjamin Segovia <benjamin.segovia@intel.com> */ +/* This is the instruction selection code. First of all, this is a bunch of c++ + * crap. Sorry if this is not that readable. Anyway, the goal here is to take + * GenIR code (i.e. the very regular, very RISC IR) and to produce GenISA with + * virtual registers (i.e. regular GenIR registers). + * + * Overall idea: + * ============= + * + * There is a lot of papers and research about that but I tried to keep it + * simple. No dynamic programming, nothing like this. Just a recursive maximal + * munch. + * + * Basically, the code is executed per basic block from bottom to top. Patterns + * of GenIR instructions are defined and each instruction is matched against the + * best pattern i.e. the pattern that catches the largest number of + * instructions. Once matched, a sequence of instructions is output. + * + * Each instruction the match depends on is then marked as "root" i.e. we + * indicate that each of these instructions must be generated: we indeed need their + * destinations for the next instructions (remember that we generate the code in + * reverse order) + * + * Patterns: + * ========= + * + * There is a lot of patterns and I did not implement all of them obviously. I + * just quickly gather the complete code to make pattern implementation kind of + * easy. This is pretty verbose to add a pattern but it should be not too hard + * to add new ones. + * + * To create and register patterns, I just abused C++ pre-main. A bunch of + * patterns is then created and sorted per opcode (i.e. the opcode of the root + * of the pattern): this creates a library of patterns that may be used in + * run-time. + * + * TODO: + * ===== + * + * Sadly, I recreated here a new DAG class. This is just a bad idea since we + * already have the DAG per basic block with the Function graph i.e. the + * complete graph of uses and definitions. I think we should be able to save a + * lot of code here if we can simply reuse the code from UD / DU chains. + * + * Finally, cross-block instruction selection is quite possible with this simple + * approach. Basically, instructions from dominating blocks could be merged and + * matched with other instructions in the dominated block. This leads to the + * interesting approach which consists in traversing the dominator tree in post + * order + */ + #include "backend/gen_insn_selection.hpp" #include "backend/gen_context.hpp" #include "ir/function.hpp" @@ -1497,17 +1547,23 @@ namespace gbe }; /*! Compare instruction pattern */ - DECL_PATTERN(CompareInstruction) + class CompareInstructionPattern : public SelectionPattern { - INLINE bool emitOne(Selection::Opaque &sel, const ir::CompareInstruction &insn) const + public: + CompareInstructionPattern(void) : SelectionPattern(1,1) { + for (uint32_t op = 0; op < ir::OP_INVALID; ++op) + if (ir::isOpcodeFrom<ir::CompareInstruction>(ir::Opcode(op)) == true) + this->opcodes.push_back(ir::Opcode(op)); + } + + INLINE bool emit(Selection::Opaque &sel, SelectionDAG &dag) const { using namespace ir; + const ir::CompareInstruction &insn = cast<CompareInstruction>(dag.insn); const Opcode opcode = insn.getOpcode(); const Type type = insn.getType(); const uint32_t genCmp = getGenCompare(opcode); const Register dst = insn.getDst(0); - const GenRegister src0 = sel.selReg(insn.getSrc(0), type); - const GenRegister src1 = sel.selReg(insn.getSrc(1), type); // Limit the compare to the active lanes. Use the same compare as for f0.0 sel.push(); @@ -1520,15 +1576,30 @@ namespace gbe sel.CMP(GEN_CONDITIONAL_LE, blockip, labelReg); sel.pop(); + // Look for immediate values for the right source + GenRegister src0, src1; + SelectionDAG *dag0 = dag.child[0]; + SelectionDAG *dag1 = dag.child[1]; + + // Right source can always be an immediate + if (OCL_OPTIMIZE_IMMEDIATE && dag1 != NULL && dag1->insn.getOpcode() == OP_LOADI) { + const auto &childInsn = cast<LoadImmInstruction>(dag1->insn); + src0 = sel.selReg(insn.getSrc(0), type); + src1 = getRegisterFromImmediate(childInsn.getImmediate()); + if (dag0) dag0->isRoot = 1; + } else { + src0 = sel.selReg(insn.getSrc(0), type); + src1 = sel.selReg(insn.getSrc(1), type); + this->markAllChildren(dag); + } + sel.push(); sel.curr.physicalFlag = 0; sel.curr.flagIndex = uint16_t(dst); - printf("%i\n",(int) dst); sel.CMP(genCmp, src0, src1); sel.pop(); return true; } - DECL_CTOR(CompareInstruction, 1, 1); }; /*! Convert instruction pattern */ @@ -1566,17 +1637,42 @@ namespace gbe }; /*! Select instruction pattern */ - DECL_PATTERN(SelectInstruction) + class SelectInstructionPattern : public SelectionPattern { - INLINE bool emitOne(Selection::Opaque &sel, const ir::SelectInstruction &insn) const + public: + SelectInstructionPattern(void) : SelectionPattern(1,1) { + for (uint32_t op = 0; op < ir::OP_INVALID; ++op) + if (ir::isOpcodeFrom<ir::SelectInstruction>(ir::Opcode(op)) == true) + this->opcodes.push_back(ir::Opcode(op)); + } + + INLINE bool emit(Selection::Opaque &sel, SelectionDAG &dag) const { using namespace ir; + const ir::SelectInstruction &insn = cast<SelectInstruction>(dag.insn); // Get all registers for the instruction const Type type = insn.getType(); const GenRegister dst = sel.selReg(insn.getDst(0), type); - const GenRegister src0 = sel.selReg(insn.getSrc(SelectInstruction::src0Index), type); - const GenRegister src1 = sel.selReg(insn.getSrc(SelectInstruction::src1Index), type); + + // Look for immediate values for the right source + GenRegister src0, src1; + SelectionDAG *dag0 = dag.child[0]; // source 0 is the predicate! + SelectionDAG *dag1 = dag.child[1]; + SelectionDAG *dag2 = dag.child[2]; + + // Right source can always be an immediate + if (OCL_OPTIMIZE_IMMEDIATE && dag2 != NULL && dag2->insn.getOpcode() == OP_LOADI) { + const auto &childInsn = cast<LoadImmInstruction>(dag2->insn); + src0 = sel.selReg(insn.getSrc(SelectInstruction::src0Index), type); + src1 = getRegisterFromImmediate(childInsn.getImmediate()); + if (dag0) dag0->isRoot = 1; + if (dag1) dag1->isRoot = 1; + } else { + src0 = sel.selReg(insn.getSrc(SelectInstruction::src0Index), type); + src1 = sel.selReg(insn.getSrc(SelectInstruction::src1Index), type); + this->markAllChildren(dag); + } // Since we cannot predicate the select instruction with our current mask, // we need to perform the selection in two steps (one to select, one to @@ -1598,7 +1694,6 @@ namespace gbe sel.MOV(dst, tmp); return true; } - DECL_CTOR(SelectInstruction, 1, 1); }; /*! Label instruction pattern */ diff --git a/kernels/compiler_clod_ref.bmp b/kernels/compiler_clod_ref.bmp Binary files differnew file mode 100644 index 00000000..71afda90 --- /dev/null +++ b/kernels/compiler_clod_ref.bmp diff --git a/kernels/compiler_mandelbrot_alternate_ref.bmp b/kernels/compiler_mandelbrot_alternate_ref.bmp Binary files differnew file mode 100644 index 00000000..011d5836 --- /dev/null +++ b/kernels/compiler_mandelbrot_alternate_ref.bmp diff --git a/kernels/compiler_mandelbrot_ref.bmp b/kernels/compiler_mandelbrot_ref.bmp Binary files differnew file mode 100644 index 00000000..494bf8b7 --- /dev/null +++ b/kernels/compiler_mandelbrot_ref.bmp diff --git a/kernels/compiler_ribbon_ref.bmp b/kernels/compiler_ribbon_ref.bmp Binary files differnew file mode 100644 index 00000000..2225f454 --- /dev/null +++ b/kernels/compiler_ribbon_ref.bmp diff --git a/utests/compiler_clod.cpp b/utests/compiler_clod.cpp index 360d197b..9ea570a0 100644 --- a/utests/compiler_clod.cpp +++ b/utests/compiler_clod.cpp @@ -20,8 +20,8 @@ #include "utest_helper.hpp" static int *dst = NULL; -static const int w = 1024; -static const int h = 1024; +static const int w = 256; +static const int h = 256; static void compiler_clod(void) { @@ -32,17 +32,20 @@ static void compiler_clod(void) const float fy = float(h); OCL_CREATE_KERNEL("compiler_clod"); - cl_mem cl_dst = clCreateBuffer(ctx, 0, sz, NULL, NULL); - OCL_CALL (clSetKernelArg, kernel, 0, sizeof(cl_mem), &cl_dst); + OCL_CREATE_BUFFER(buf[0], 0, sz, NULL); + OCL_CALL (clSetKernelArg, kernel, 0, sizeof(cl_mem), &buf[0]); OCL_CALL (clSetKernelArg, kernel, 1, sizeof(float), &fx); OCL_CALL (clSetKernelArg, kernel, 2, sizeof(float), &fy); OCL_CALL (clSetKernelArg, kernel, 3, sizeof(int), &w); OCL_CALL (clEnqueueNDRangeKernel, queue, kernel, 2, NULL, global, local, 0, NULL, NULL); - dst = (int *) clIntelMapBuffer(cl_dst, NULL); + OCL_MAP_BUFFER(0); + dst = (int*) buf_data[0]; - cl_write_bmp(dst, w, h, "clod.bmp"); - OCL_CALL (clIntelUnmapBuffer, cl_dst); - OCL_CALL (clReleaseMemObject, cl_dst); + /* Save the image (for debug purpose) */ + cl_write_bmp(dst, w, h, "compiler_clod.bmp"); + + /* Compare with the golden image */ + OCL_CHECK_IMAGE(dst, w, h, "compiler_clod_ref.bmp"); } MAKE_UTEST_FROM_FUNCTION(compiler_clod); diff --git a/utests/compiler_mandelbrot.cpp b/utests/compiler_mandelbrot.cpp index aa70f3de..7758dae3 100644 --- a/utests/compiler_mandelbrot.cpp +++ b/utests/compiler_mandelbrot.cpp @@ -20,9 +20,8 @@ #include "utest_helper.hpp" static int *dst = NULL; -static const size_t w = 64; -static const size_t h = 64; -static const size_t iter = 4; +static const size_t w = 256; +static const size_t h = 256; static void compiler_mandelbrot(void) { @@ -32,14 +31,17 @@ static void compiler_mandelbrot(void) OCL_CREATE_KERNEL("compiler_mandelbrot"); - cl_mem cl_dst = clCreateBuffer(ctx, 0, sz, NULL, NULL); - OCL_CALL (clSetKernelArg, kernel, 0, sizeof(cl_mem), &cl_dst); + OCL_CREATE_BUFFER(buf[0], 0, sz, NULL); + OCL_CALL (clSetKernelArg, kernel, 0, sizeof(cl_mem), &buf[0]); OCL_CALL (clEnqueueNDRangeKernel, queue, kernel, 2, NULL, global, local, 0, NULL, NULL); - dst = (int *) clIntelMapBuffer(cl_dst, NULL); + OCL_MAP_BUFFER(0); + dst = (int *) buf_data[0]; - cl_write_bmp(dst, w, h, "mandelbrot.bmp"); - OCL_CALL (clIntelUnmapBuffer, cl_dst); - OCL_CALL (clReleaseMemObject, cl_dst); + /* Save the image (for debug purpose) */ + cl_write_bmp(dst, w, h, "compiler_mandelbrot.bmp"); + + /* Compare with the golden image */ + OCL_CHECK_IMAGE(dst, w, h, "compiler_mandelbrot_ref.bmp"); } MAKE_UTEST_FROM_FUNCTION(compiler_mandelbrot); diff --git a/utests/compiler_mandelbrot_alternate.cpp b/utests/compiler_mandelbrot_alternate.cpp index 74fc7164..2e5d59fc 100644 --- a/utests/compiler_mandelbrot_alternate.cpp +++ b/utests/compiler_mandelbrot_alternate.cpp @@ -20,9 +20,8 @@ #include "utest_helper.hpp" static int *dst = NULL; -static const size_t w = 64; -static const size_t h = 64; -static const size_t iter = 4; +static const size_t w = 256; +static const size_t h = 256; static const float criterium = 4.f; static void compiler_mandelbrot_alternate(void) @@ -30,22 +29,25 @@ static void compiler_mandelbrot_alternate(void) const size_t global[2] = {w, h}; const size_t local[2] = {16, 1}; const size_t sz = w * h * sizeof(char[4]); - const float rcpW = 1.f / float(w); - const float rcpH = 1.f / float(h); + const float rcpWidth = 1.f / float(w); + const float rcpHeight = 1.f / float(h); OCL_CREATE_KERNEL("compiler_mandelbrot_alternate"); - cl_mem cl_dst = clCreateBuffer(ctx, 0, sz, NULL, NULL); - OCL_CALL (clSetKernelArg, kernel, 0, sizeof(cl_mem), &cl_dst); - OCL_CALL (clSetKernelArg, kernel, 1, sizeof(float), &rcpW); - OCL_CALL (clSetKernelArg, kernel, 2, sizeof(float), &rcpH); + OCL_CREATE_BUFFER(buf[0], 0, sz, NULL); + OCL_CALL (clSetKernelArg, kernel, 0, sizeof(cl_mem), &buf[0]); + OCL_CALL (clSetKernelArg, kernel, 1, sizeof(float), &rcpWidth); + OCL_CALL (clSetKernelArg, kernel, 2, sizeof(float), &rcpHeight); OCL_CALL (clSetKernelArg, kernel, 3, sizeof(float), &criterium); OCL_CALL (clEnqueueNDRangeKernel, queue, kernel, 2, NULL, global, local, 0, NULL, NULL); - dst = (int *) clIntelMapBuffer(cl_dst, NULL); + OCL_MAP_BUFFER(0); + dst = (int *) buf_data[0]; - cl_write_bmp(dst, w, h, "mandelbrot.bmp"); - OCL_CALL (clIntelUnmapBuffer, cl_dst); - OCL_CALL (clReleaseMemObject, cl_dst); + /* Save the image (for debug purpose) */ + cl_write_bmp(dst, w, h, "compiler_mandelbrot_alternate.bmp"); + + /* Compare with the golden image */ + OCL_CHECK_IMAGE(dst, w, h, "compiler_mandelbrot_alternate_ref.bmp"); } MAKE_UTEST_FROM_FUNCTION(compiler_mandelbrot_alternate); diff --git a/utests/compiler_ribbon.cpp b/utests/compiler_ribbon.cpp index 733b8ac9..1ef41a98 100644 --- a/utests/compiler_ribbon.cpp +++ b/utests/compiler_ribbon.cpp @@ -20,8 +20,8 @@ #include "utest_helper.hpp" static int *dst = NULL; -static const int w = 1024; -static const int h = 1024; +static const int w = 256; +static const int h = 256; static void compiler_ribbon(void) { @@ -32,19 +32,21 @@ static void compiler_ribbon(void) const float fy = float(h); OCL_CREATE_KERNEL("compiler_ribbon"); - cl_mem cl_dst = clCreateBuffer(ctx, 0, sz, NULL, NULL); - OCL_CALL (clSetKernelArg, kernel, 0, sizeof(cl_mem), &cl_dst); + OCL_CREATE_BUFFER(buf[0], 0, sz, NULL); + OCL_CALL (clSetKernelArg, kernel, 0, sizeof(cl_mem), &buf[0]); OCL_CALL (clSetKernelArg, kernel, 1, sizeof(float), &fx); OCL_CALL (clSetKernelArg, kernel, 2, sizeof(float), &fy); OCL_CALL (clSetKernelArg, kernel, 3, sizeof(int), &w); OCL_CALL (clEnqueueNDRangeKernel, queue, kernel, 2, NULL, global, local, 0, NULL, NULL); - dst = (int *) clIntelMapBuffer(cl_dst, NULL); + OCL_MAP_BUFFER(0); + dst = (int*) buf_data[0]; - cl_write_bmp(dst, w, h, "ribbon.bmp"); - OCL_CALL (clIntelUnmapBuffer, cl_dst); - OCL_CALL (clReleaseMemObject, cl_dst); + /* Save the image (for debug purpose) */ + cl_write_bmp(dst, w, h, "compiler_ribbon.bmp"); + + /* Compare with the golden image */ + OCL_CHECK_IMAGE(dst, w, h, "compiler_ribbon_ref.bmp"); } MAKE_UTEST_FROM_FUNCTION(compiler_ribbon); - |