diff options
author | David Li <davidxli@google.com> | 2011-02-03 10:10:59 -0800 |
---|---|---|
committer | David Li <davidxli@google.com> | 2011-02-03 10:10:59 -0800 |
commit | 3225321119408735f16b72b539c9fb7d80683552 (patch) | |
tree | 37f619c525b2b804cabb7fead3a1cb2d88b9410d | |
parent | e82376d380005c21cb70637d42104fcd4d652843 (diff) |
Checkpoint: scanline codegen.
Signed-off-by: David Li <davidxli@google.com>
-rw-r--r-- | Android.mk | 1 | ||||
-rw-r--r-- | src/glsl/ir_to_llvm_helper.cpp | 155 | ||||
-rw-r--r-- | src/glsl/linker.cpp | 42 | ||||
-rw-r--r-- | src/glsl/main.cpp | 75 | ||||
-rw-r--r-- | src/mesa/main/mtypes.h | 11 | ||||
-rw-r--r-- | src/pixelflinger2/llvm_helper.h | 218 | ||||
-rw-r--r-- | src/pixelflinger2/llvm_scanline.cpp | 687 | ||||
-rw-r--r-- | src/pixelflinger2/pixelflinger2.h | 2 | ||||
-rw-r--r-- | src/pixelflinger2/raster.cpp | 2 | ||||
-rw-r--r-- | src/pixelflinger2/scanline.cpp | 104 | ||||
-rw-r--r-- | src/pixelflinger2/shader.cpp | 21 |
11 files changed, 1078 insertions, 240 deletions
@@ -107,6 +107,7 @@ mesa_SRC_FILES := \ src/mesa/program/symbol_table.c \ src/pixelflinger2/buffer.cpp \ src/pixelflinger2/format.cpp \ + src/pixelflinger2/llvm_scanline.cpp \ src/pixelflinger2/pixelflinger2.cpp \ src/pixelflinger2/raster.cpp \ src/pixelflinger2/scanline.cpp \ diff --git a/src/glsl/ir_to_llvm_helper.cpp b/src/glsl/ir_to_llvm_helper.cpp index 9794240..9392990 100644 --- a/src/glsl/ir_to_llvm_helper.cpp +++ b/src/glsl/ir_to_llvm_helper.cpp @@ -1,3 +1,20 @@ +/** + ** + ** Copyright 2011, The Android Open Source Project + ** + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** + ** http://www.apache.org/licenses/LICENSE-2.0 + ** + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + */ + #include <stack> #include <stdio.h> @@ -6,93 +23,13 @@ #include <llvm/Support/IRBuilder.h> #include <llvm/Module.h> -using namespace llvm; - -static const char * name(const char * str) -{ - return str; -} - -static Value * minIntScalar(IRBuilder<> &builder, Value * in1, Value * in2) -{ - Value * cmp = builder.CreateICmpSLT(in1, in2); - return builder.CreateSelect(cmp, in1, in2); -} - -static Value * maxIntScalar(IRBuilder<> &builder, Value * in1, Value * in2) -{ - Value * cmp = builder.CreateICmpSGT(in1, in2); - return builder.CreateSelect(cmp, in1, in2); -} - -static Constant * constFloat(IRBuilder<> & builder, float x) -{ - return ConstantFP::get(builder.getContext(), APFloat(x)); -} - -static VectorType * intVecType(IRBuilder<> & builder) -{ - return VectorType::get(Type::getInt32Ty(builder.getContext()), 4); -} - -static VectorType * floatVecType(IRBuilder<> & builder) -{ - return VectorType::get(Type::getFloatTy(builder.getContext()), 4); -} - -static Value * constIntVec(IRBuilder<> & builder, int x, int y, int z, int w) -{ - std::vector<Constant *> vec(4); - vec[0] = builder.getInt32(x); - vec[1] = builder.getInt32(y); - vec[2] = builder.getInt32(z); - vec[3] = builder.getInt32(w); - return ConstantVector::get(intVecType(builder), vec); -} - -static Value * intVec(IRBuilder<> & builder, Value * x, Value * y, Value * z, Value * w) -{ - Value * res = Constant::getNullValue(intVecType(builder)); - res = builder.CreateInsertElement(res, x, builder.getInt32(0), name("vecx")); - res = builder.CreateInsertElement(res, y, builder.getInt32(1), name("vecy")); - res = builder.CreateInsertElement(res, z, builder.getInt32(2), name("vecz")); - if (w) - res = builder.CreateInsertElement(res, w, builder.getInt32(3), name("vecw")); - return res; -} - -static Value * constFloatVec(IRBuilder<> & builder, float x, float y, float z, float w) -{ - std::vector<Constant *> vec(4); - vec[0] = constFloat(builder, x); - vec[1] = constFloat(builder, y); - vec[2] = constFloat(builder, z); - vec[3] = constFloat(builder, w); - return ConstantVector::get(floatVecType(builder), vec); -} - -std::vector<Value *> extractVector(IRBuilder<> & builder, Value *vec) -{ - std::vector<Value*> elems(4); - elems[0] = builder.CreateExtractElement(vec, builder.getInt32(0), name("x")); - elems[1] = builder.CreateExtractElement(vec, builder.getInt32(1), name("y")); - elems[2] = builder.CreateExtractElement(vec, builder.getInt32(2), name("z")); - elems[3] = builder.CreateExtractElement(vec, builder.getInt32(3), name("w")); - return elems; -} +#include "src/pixelflinger2/llvm_helper.h" -// <4 x i32> [0, 255] to <4 x float> [0.0, 1.0] -static Value * intColorVecToFloatColorVec(IRBuilder<> & builder, Value * vec) -{ -// return builder.CreateBitCast(vec, floatVecType(builder)); - vec = builder.CreateUIToFP(vec, floatVecType(builder)); - return builder.CreateFMul(vec, constFloatVec(builder, 1 / 255.0f, 1 / 255.0f, - 1 / 255.0f, 1 / 255.0f)); -} +using namespace llvm; // texture data is int pointer to surface (will cast to short for 16bpp), index is linear texel index, // format is GGLPixelFormat for surface, return type is <4 x i32> rgba -Value * pointSample(IRBuilder<> & builder, Value * textureData, Value * index, const GGLPixelFormat format) +static Value * pointSample(IRBuilder<> & builder, Value * textureData, Value * index, const GGLPixelFormat format) { Value * texel = NULL; switch (format) { @@ -176,7 +113,7 @@ Value * pointSample(IRBuilder<> & builder, Value * textureData, Value * index, c static const unsigned SHIFT = 16; // w = width - 1, h = height - 1; similar to pointSample; returns <4 x i32> rgba -Value * linearSample(IRBuilder<> & builder, Value * textureData, Value * indexOffset, +static Value * linearSample(IRBuilder<> & builder, Value * textureData, Value * indexOffset, Value * x0, Value * y0, Value * xLerp, Value * yLerp, Value * w, Value * h, Value * width, Value * height, const GGLPixelFormat format/*, const RegDesc * dstDesc*/) @@ -252,56 +189,6 @@ Value * linearSample(IRBuilder<> & builder, Value * textureData, Value * indexOf // assert(0); } -class CondBranch -{ - IRBuilder<> & m_builder; - std::stack<BasicBlock *> m_ifStack; - -public: - CondBranch(IRBuilder<> & builder) : m_builder(builder) {} - ~CondBranch() { - assert(m_ifStack.empty()); - } - - void ifCond(Value * cmp, const char * trueBlock = "ifT", const char * falseBlock = "ifF") { - Function * function = m_builder.GetInsertBlock()->getParent(); - BasicBlock * ifthen = BasicBlock::Create(m_builder.getContext(), name(trueBlock), function, NULL); - BasicBlock * ifend = BasicBlock::Create(m_builder.getContext(), name(falseBlock), function, NULL); - m_builder.CreateCondBr(cmp, ifthen, ifend); - m_builder.SetInsertPoint(ifthen); - m_ifStack.push(ifend); - } - - void elseop() { - assert(!m_ifStack.empty()); - BasicBlock *ifend = BasicBlock::Create(m_builder.getContext(), name("else_end"), m_builder.GetInsertBlock()->getParent(),0); - if (!m_builder.GetInsertBlock()->getTerminator()) // ret void is a block terminator - m_builder.CreateBr(ifend); // branch is also a block terminator - else { - debug_printf("Instructions::elseop block alread has terminator \n"); - m_builder.GetInsertBlock()->getTerminator()->dump(); - assert(0); - } - m_builder.SetInsertPoint(m_ifStack.top()); - m_builder.GetInsertBlock()->setName(name("else_then")); - m_ifStack.pop(); - m_ifStack.push(ifend); - } - - void endif() { - assert(!m_ifStack.empty()); - if (!m_builder.GetInsertBlock()->getTerminator()) // ret void is a block terminator - m_builder.CreateBr(m_ifStack.top()); // branch is also a block terminator - else { - debug_printf("Instructions::endif block alread has terminator"); - m_builder.GetInsertBlock()->getTerminator()->dump(); - assert(0); - } - m_builder.SetInsertPoint(m_ifStack.top()); - m_ifStack.pop(); - } -}; - // dim is size - 1, since [0.0f,1.0f]->[0, size - 1] static Value * texcoordWrap(IRBuilder<> & builder, const unsigned wrap, /*const ChannelType type,*/ Value * r, Value * size, Value * dim, diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp index 9c57339..5aecb86 100644 --- a/src/glsl/linker.cpp +++ b/src/glsl/linker.cpp @@ -1174,7 +1174,7 @@ assign_attribute_locations(gl_shader_program *prog, unsigned max_attribute_index gl_shader *const sh = prog->_LinkedShaders[0]; assert(sh->Type == GL_VERTEX_SHADER); - + prog->VaryingSlots = 0; /* Operate in a total of four passes. * * 1. Invalidate the location assignments for all vertex shader inputs, @@ -1358,6 +1358,13 @@ assign_attribute_locations(gl_shader_program *prog, unsigned max_attribute_index if (0 <= paramIndex) prog->Attributes->Parameters[paramIndex].Location = location; } + + for (int i = sizeof(used_locations) * 8 - 1; i >= 0; i--) + if (used_locations & (1 << i)) + { + prog->AttributeSlots = i + 1; + break; + } return true; } @@ -1385,15 +1392,16 @@ demote_shader_inputs_and_outputs(gl_shader *sh, enum ir_variable_mode mode) } } -#define VertexOutputOffset(FIELD) (offsetof(VertexOutput,FIELD)/sizeof(Vector4)) - void assign_varying_locations(struct gl_shader_program *prog, gl_shader *producer, gl_shader *consumer) { + prog->VaryingSlots = 0; + prog->UsesFragCoord = false; + prog->UsesPointCoord = false; /* FINISHME: Set dynamically when geometry shader support is added. */ - unsigned output_index = VertexOutputOffset(varyings); /*VERT_RESULT_VAR0*/; - unsigned input_index = VertexOutputOffset(varyings); + unsigned output_index = offsetof(VertexOutput,varyings) / sizeof(Vector4); /*VERT_RESULT_VAR0*/; + unsigned input_index = offsetof(VertexOutput,varyings) / sizeof(Vector4); /* Operate in a total of three passes. * @@ -1410,9 +1418,9 @@ assign_varying_locations(struct gl_shader_program *prog, if (!var || ir_var_out != var->mode) continue; if (!strcmp("gl_Position", var->name)) - var->location = VertexOutputOffset(position); + var->location = offsetof(VertexOutput,position) / sizeof(Vector4); else if (!strcmp("gl_PointSize", var->name)) - var->location = VertexOutputOffset(pointSize); + var->location = offsetof(VertexOutput,pointSize) / sizeof(Vector4); else var->location = -1; } @@ -1421,11 +1429,17 @@ assign_varying_locations(struct gl_shader_program *prog, if (!var || ir_var_in != var->mode) continue; if (!strcmp("gl_FragCoord", var->name)) - var->location = VertexOutputOffset(position); + { + var->location = offsetof(VertexOutput,position)/sizeof(Vector4); + prog->UsesFragCoord = true; + } else if (!strcmp("gl_FrontFacing", var->name)) - var->location = VertexOutputOffset(frontFacingPointCoord); + var->location = offsetof(VertexOutput,frontFacingPointCoord)/sizeof(Vector4); else if (!strcmp("gl_PointCoord", var->name)) - var->location = VertexOutputOffset(frontFacingPointCoord); + { + var->location = offsetof(VertexOutput,frontFacingPointCoord)/sizeof(Vector4); + prog->UsesPointCoord = true; + } else var->location = -1; } @@ -1465,11 +1479,13 @@ assign_varying_locations(struct gl_shader_program *prog, output_index += slots; input_index += slots; + prog->VaryingSlots += slots; } else { const unsigned slots = output_var->type->matrix_columns; output_index += slots; input_index += slots; + prog->VaryingSlots += slots; } } @@ -1717,7 +1733,7 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) int paramIndex = _mesa_get_parameter(prog->Varying, var->name); if (0 > paramIndex) paramIndex = _mesa_add_parameter(prog->Varying, var->name); - var->location= VertexOutputOffset(fragColor); + var->location= offsetof(VertexOutput,fragColor)/sizeof(Vector4); prog->Varying->Parameters[paramIndex].Location = var->location; } else @@ -1745,6 +1761,4 @@ done: } //hieralloc_free(mem_ctx); -} - -#undef VertexOutputOffset +}
\ No newline at end of file diff --git a/src/glsl/main.cpp b/src/glsl/main.cpp index 2ce7ed6..c131021 100644 --- a/src/glsl/main.cpp +++ b/src/glsl/main.cpp @@ -190,6 +190,7 @@ compile_shader(struct gl_context *ctx, struct gl_shader *shader) return; } +#if 1 // build executable #define DRAW_TO_SCREEN 1 #include "image_file.h" @@ -216,7 +217,11 @@ void execute(const GGLContext * ctx) //const unsigned scale = 16, portWidth = 80, portHeight = 50; unsigned scale = 1, portWidth = width / scale, portHeight = height / scale; //unsigned scale = 1, portWidth = width / 4, portHeight = height / 4; - + + GGLSurface colorSurface = {width, height, GGL_PIXEL_FORMAT_RGBA_8888, frameSurface, width, sizeof(GGLSurface)}; + ggl->SetBuffer(ggl, GL_COLOR_BUFFER_BIT, &colorSurface); + ggl->EnableDisable(ggl, GL_DEPTH_TEST, false); + float * uniform = (float *)ctx->glCtx->CurrentProgram->ValuesUniform; float * attribute = (float *)ctx->glCtx->CurrentProgram->ValuesVertexInput; float * varying = (float *)ctx->glCtx->CurrentProgram->ValuesVertexOutput; @@ -258,36 +263,42 @@ void execute(const GGLContext * ctx) clock_t c0 = clock(); //while(true) - for (frames = 1; frames <= 10; frames++) + for (frames = 1; frames <= 20; frames++) { - for (unsigned y = 0; y < portHeight; y++) - for (unsigned x = 0; x < portWidth; x++) { - if (vTexCoordLocation > -1) - { - varying[vTexCoordLocation * 4 + 0] = ((float)x) / (portWidth - 1); - varying[vTexCoordLocation * 4 + 1] = ((float)y) / (portHeight - 1); - varying[vTexCoordLocation * 4 + 2] = 0; - varying[vTexCoordLocation * 4 + 3] = 1; - } - if (vNormalLocation > -1) - { - varying[vNormalLocation * 4 + 0] = 0; - varying[vNormalLocation * 4 + 1] = 1; - varying[vNormalLocation * 4 + 2] = 0; - varying[vNormalLocation * 4 + 3] = 1; - } - shader->function(); - unsigned r = output[0] * 255; - unsigned g = output[1] * 255; - unsigned b = output[2] * 255; - unsigned a = output[3] * 255; -// unsigned r = *(unsigned *)(outputs + 0); -// unsigned g = *(unsigned *)(outputs + 1); -// unsigned b = *(unsigned *)(outputs + 2); -// unsigned a = *(unsigned *)(outputs + 3); - frameSurface[y * width + x] = (a << 24) | (b << 16) | (g << 8) | r; -// frameSurface[y * width + x] = *(unsigned *)outputs; + for (unsigned y = 0; y < portHeight; y++) { + VertexOutput v0, v1; + v0.position = Vector4(0, y, 0, 0); + v1.position = Vector4(portWidth - 1, y ,0 ,0); + if (vTexCoordLocation > -1) + { + v0.varyings[vTexCoordLocation - 2] = Vector4(0, (float)y / (portHeight - 1), 0, 1); + v1.varyings[vTexCoordLocation - 2] = Vector4(1, (float)y / (portHeight - 1), 0, 1); } + ggl->ScanLine(ggl, &v0, &v1); + +// for (unsigned x = 0; x < portWidth; x++) { +// if (vTexCoordLocation > -1) +// { +// varying[vTexCoordLocation * 4 + 0] = ((float)x) / (portWidth - 1); +// varying[vTexCoordLocation * 4 + 1] = ((float)y) / (portHeight - 1); +// varying[vTexCoordLocation * 4 + 2] = 0; +// varying[vTexCoordLocation * 4 + 3] = 1; +// } +//// if (vNormalLocation > -1) +//// { +//// varying[vNormalLocation * 4 + 0] = 0; +//// varying[vNormalLocation * 4 + 1] = 1; +//// varying[vNormalLocation * 4 + 2] = 0; +//// varying[vNormalLocation * 4 + 3] = 1; +//// } +// shader->function(); +// unsigned r = output[0] * 255; +// unsigned g = output[1] * 255; +// unsigned b = output[2] * 255; +// unsigned a = output[3] * 255; +// frameSurface[y * width + x] = (a << 24) | (b << 16) | (g << 8) | r; +// } + } //* if (scale > 1) for (int y = portHeight - 1; y >= 0; y--) @@ -301,6 +312,8 @@ void execute(const GGLContext * ctx) //*/ #if defined __arm__ && DRAW_TO_SCREEN frameSurface = (unsigned *)PresentDrawingSurface(); + colorSurface.data = frameSurface; + ggl->SetBuffer(ggl, GL_COLOR_BUFFER_BIT, &colorSurface); #endif } @@ -308,12 +321,12 @@ void execute(const GGLContext * ctx) printf ("\n *** test_scan elapsed CPU time: %fs \n *** fps=%.2f, tpf=%.2fms \n", elapsed, frames / elapsed, elapsed / frames * 1000); printf("gl_FragColor=%.2f, %.2f, %.2f %.2f \n", output[0], output[1], output[2], output[3]); - //assert(0.1f < outputs[3]); #if defined __arm__ SaveBMP("/sdcard/mesa.bmp", frameSurface, width, height); #else SaveBMP("mesa.bmp", frameSurface, width, height); #endif + assert(0.1f < output[3]); #if DRAW_TO_SCREEN void DisposeDrawingSurface(); #else @@ -480,3 +493,5 @@ main(int argc, char **argv) hieralloc_report_brief(NULL, stdout); return status; } + +#endif // build executable diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index b6eba89..5614907 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -2078,12 +2078,13 @@ struct gl_shader struct exec_list *ir; struct glsl_symbol_table *symbols; - struct Executable * executable; - void (*function)(); /**< the active function */ - unsigned SamplersUsed; /**< bitfield of samplers used by shader */ /** Shaders containing built-in functions that are used for linking. */ struct gl_shader *builtins_to_link[16]; unsigned num_builtins_to_link; + + struct Executable * executable; + void (*function)(); /**< the active function */ + unsigned SamplersUsed; /**< bitfield of samplers used by shader */ }; @@ -2143,6 +2144,10 @@ struct gl_shader_program GLfloat (*ValuesVertexInput)[4]; /**< actually a VertexInput */ GLfloat (*ValuesVertexOutput)[4]; /**< actually a VertexOutput */ void * InputOuputBase; /**< allocation base for Values* */ + + unsigned AttributeSlots;/**< [0,AttributeSlots-1] read by vertex shader */ + unsigned VaryingSlots; /**< [0,VaryingSlots-1] read by fragment shader */ + unsigned UsesFragCoord : 1, UsesPointCoord : 1; }; diff --git a/src/pixelflinger2/llvm_helper.h b/src/pixelflinger2/llvm_helper.h new file mode 100644 index 0000000..3ad083b --- /dev/null +++ b/src/pixelflinger2/llvm_helper.h @@ -0,0 +1,218 @@ +/** + ** + ** Copyright 2011, The Android Open Source Project + ** + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** + ** http://www.apache.org/licenses/LICENSE-2.0 + ** + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + */ + +#ifndef _PIXELFLINGER2_LLVM_HELPER_H_ +#define _PIXELFLINGER2_LLVM_HELPER_H_ + +#include <stack> +#include <stdio.h> +#include <llvm/Support/IRBuilder.h> + +using namespace llvm; + +static const char * name(const char * str) +{ + return str; +} + +static Value * minIntScalar(IRBuilder<> &builder, Value * in1, Value * in2) +{ + Value * cmp = builder.CreateICmpSLT(in1, in2); + return builder.CreateSelect(cmp, in1, in2); +} + +static Value * maxIntScalar(IRBuilder<> &builder, Value * in1, Value * in2) +{ + Value * cmp = builder.CreateICmpSGT(in1, in2); + return builder.CreateSelect(cmp, in1, in2); +} + +static Constant * constFloat(IRBuilder<> & builder, float x) +{ + return ConstantFP::get(builder.getContext(), APFloat(x)); +} + +static VectorType * intVecType(IRBuilder<> & builder) +{ + return VectorType::get(Type::getInt32Ty(builder.getContext()), 4); +} + +static VectorType * floatVecType(IRBuilder<> & builder) +{ + return VectorType::get(Type::getFloatTy(builder.getContext()), 4); +} + +static Value * constIntVec(IRBuilder<> & builder, int x, int y, int z, int w) +{ + std::vector<Constant *> vec(4); + vec[0] = builder.getInt32(x); + vec[1] = builder.getInt32(y); + vec[2] = builder.getInt32(z); + vec[3] = builder.getInt32(w); + return ConstantVector::get(intVecType(builder), vec); +} + +static Value * intVec(IRBuilder<> & builder, Value * x, Value * y, Value * z, Value * w) +{ + Value * res = Constant::getNullValue(intVecType(builder)); + res = builder.CreateInsertElement(res, x, builder.getInt32(0), name("vecx")); + res = builder.CreateInsertElement(res, y, builder.getInt32(1), name("vecy")); + res = builder.CreateInsertElement(res, z, builder.getInt32(2), name("vecz")); + if (w) + res = builder.CreateInsertElement(res, w, builder.getInt32(3), name("vecw")); + return res; +} + +static Value * constFloatVec(IRBuilder<> & builder, float x, float y, float z, float w) +{ + std::vector<Constant *> vec(4); + vec[0] = constFloat(builder, x); + vec[1] = constFloat(builder, y); + vec[2] = constFloat(builder, z); + vec[3] = constFloat(builder, w); + return ConstantVector::get(floatVecType(builder), vec); +} + +static std::vector<Value *> extractVector(IRBuilder<> & builder, Value *vec) +{ + std::vector<Value*> elems(4); + elems[0] = builder.CreateExtractElement(vec, builder.getInt32(0), name("x")); + elems[1] = builder.CreateExtractElement(vec, builder.getInt32(1), name("y")); + elems[2] = builder.CreateExtractElement(vec, builder.getInt32(2), name("z")); + elems[3] = builder.CreateExtractElement(vec, builder.getInt32(3), name("w")); + return elems; +} + +static Value * intVecMax(IRBuilder<> & builder, Value * in1, Value * in2) +{ + std::vector<Value *> vec1 = extractVector(builder, in1); + std::vector<Value *> vec2 = extractVector(builder, in2); + for (unsigned i = 0; i < 4; i++) { + Value * cmp = builder.CreateICmpSGT(vec1[i], vec2[i], name("iVecSelCmp")); + vec1[i] = builder.CreateSelect(cmp, vec1[i], vec2[i], name("iVecSel")); + } + return intVec(builder, vec1[0], vec1[1], vec1[2], vec1[3]); +} + +static Value * intVecMin(IRBuilder<> & builder, Value * in1, Value * in2) +{ + std::vector<Value *> vec1 = extractVector(builder, in1); + std::vector<Value *> vec2 = extractVector(builder, in2); + for (unsigned i = 0; i < 4; i++) { + Value * cmp = builder.CreateICmpSLT(vec1[i], vec2[i], name("iVecSelCmp")); + vec1[i] = builder.CreateSelect(cmp, vec1[i], vec2[i], name("iVecSel")); + } + return intVec(builder, vec1[0], vec1[1], vec1[2], vec1[3]); +} + +// <4 x i32> [0, 255] to <4 x float> [0.0, 1.0] +static Value * intColorVecToFloatColorVec(IRBuilder<> & builder, Value * vec) +{ + vec = builder.CreateUIToFP(vec, floatVecType(builder)); + return builder.CreateFMul(vec, constFloatVec(builder, 1 / 255.0f, 1 / 255.0f, + 1 / 255.0f, 1 / 255.0f)); +} + +class CondBranch +{ + IRBuilder<> & m_builder; + std::stack<BasicBlock *> m_ifStack; + + struct Loop { + BasicBlock *begin; + BasicBlock *end; + }; + std::stack<Loop> m_loopStack; + + CondBranch(); + +public: + CondBranch(IRBuilder<> & builder) : m_builder(builder) {} + ~CondBranch() { + assert(m_ifStack.empty()); + assert(m_loopStack.empty()); + } + + void ifCond(Value * cmp, const char * trueBlock = "ifT", const char * falseBlock = "ifF") { + Function * function = m_builder.GetInsertBlock()->getParent(); + BasicBlock * ifthen = BasicBlock::Create(m_builder.getContext(), name(trueBlock), function, NULL); + BasicBlock * ifend = BasicBlock::Create(m_builder.getContext(), name(falseBlock), function, NULL); + m_builder.CreateCondBr(cmp, ifthen, ifend); + m_builder.SetInsertPoint(ifthen); + m_ifStack.push(ifend); + } + + void elseop() { + assert(!m_ifStack.empty()); + BasicBlock *ifend = BasicBlock::Create(m_builder.getContext(), name("else_end"), m_builder.GetInsertBlock()->getParent(),0); + if (!m_builder.GetInsertBlock()->getTerminator()) // ret void is a block terminator + m_builder.CreateBr(ifend); // branch is also a block terminator + else { + debug_printf("Instructions::elseop block alread has terminator \n"); + m_builder.GetInsertBlock()->getTerminator()->dump(); + assert(0); + } + m_builder.SetInsertPoint(m_ifStack.top()); + m_builder.GetInsertBlock()->setName(name("else_then")); + m_ifStack.pop(); + m_ifStack.push(ifend); + } + + void endif() { + assert(!m_ifStack.empty()); + if (!m_builder.GetInsertBlock()->getTerminator()) // ret void is a block terminator + m_builder.CreateBr(m_ifStack.top()); // branch is also a block terminator + else { + debug_printf("Instructions::endif block alread has terminator"); + m_builder.GetInsertBlock()->getTerminator()->dump(); + assert(0); + } + m_builder.SetInsertPoint(m_ifStack.top()); + m_ifStack.pop(); + } + + void beginLoop() { + Function * function = m_builder.GetInsertBlock()->getParent(); + BasicBlock *begin = BasicBlock::Create(m_builder.getContext(), name("loop"), function,0); + BasicBlock *end = BasicBlock::Create(m_builder.getContext(), name("endloop"), function,0); + + m_builder.CreateBr(begin); + Loop loop; + loop.begin = begin; + loop.end = end; + m_builder.SetInsertPoint(begin); + m_loopStack.push(loop); + } + + void endLoop() { + assert(!m_loopStack.empty()); + Loop loop = m_loopStack.top(); + m_builder.CreateBr(loop.begin); + loop.end->moveAfter(m_builder.GetInsertBlock()); + m_builder.SetInsertPoint(loop.end); + m_loopStack.pop(); + } + + void brk() { + assert(!m_loopStack.empty()); + BasicBlock *unr = BasicBlock::Create(m_builder.getContext(), name("unreachable"), m_builder.GetInsertBlock()->getParent(),0); + m_builder.CreateBr(m_loopStack.top().end); + m_builder.SetInsertPoint(unr); + } +}; + +#endif // #ifndef _PIXELFLINGER2_LLVM_HELPER_H_ diff --git a/src/pixelflinger2/llvm_scanline.cpp b/src/pixelflinger2/llvm_scanline.cpp new file mode 100644 index 0000000..461f8a8 --- /dev/null +++ b/src/pixelflinger2/llvm_scanline.cpp @@ -0,0 +1,687 @@ +/** + ** + ** Copyright 2011, The Android Open Source Project + ** + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** + ** http://www.apache.org/licenses/LICENSE-2.0 + ** + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + */ + +#include "src/pixelflinger2/pixelflinger2.h" +#include "src/pixelflinger2/llvm_helper.h" +#include "src/mesa/main/mtypes.h" + +#include <llvm/Module.h> + +using namespace llvm; + +static void StencilOp(IRBuilder<> &builder, const unsigned char op, + Value * sPtr, Value * sRef) +{ + CondBranch condBranch(builder); + Value * s = builder.CreateLoad(sPtr, "stenciOpS"); + switch (op) { + case 0 : // GL_ZERO + builder.CreateStore(builder.getInt8(0), sPtr); + break; + case 1 : // GL_KEEP + builder.CreateStore(s, sPtr); + break; + case 2 : // GL_REPLACE + builder.CreateStore(sRef, sPtr); + break; + case 3 : // GL_INCR + condBranch.ifCond(builder.CreateICmpEQ(s, builder.getInt8(255))); + builder.CreateStore(s, sPtr); + condBranch.elseop(); + builder.CreateStore(builder.CreateAdd(s, builder.getInt8(1)), sPtr); + condBranch.endif(); + break; + case 4 : // GL_DECR + condBranch.ifCond(builder.CreateICmpEQ(s, builder.getInt8(0))); + builder.CreateStore(s, sPtr); + condBranch.elseop(); + builder.CreateStore(builder.CreateSub(s, builder.getInt8(1)), sPtr); + condBranch.endif(); + break; + case 5 : // GL_INVERT + builder.CreateStore(builder.CreateNot(s), sPtr); + break; + case 6 : // GL_INCR_WRAP + builder.CreateStore(builder.CreateAdd(s, builder.getInt8(1)), sPtr); + break; + case 7 : // GL_DECR_WRAP + builder.CreateStore(builder.CreateSub(s, builder.getInt8(1)), sPtr); + break; + default: + assert(0); + break; + } +} + +static Value * StencilOp(IRBuilder<> & builder, Value * face, + const unsigned char frontOp, const unsigned char backOp, + Value * sPtr, Value * sRef) +{ + CondBranch condBranch(builder); + if (frontOp != backOp) + condBranch.ifCond(builder.CreateICmpEQ(face, builder.getInt8(0))); + + StencilOp(builder, frontOp, sPtr, sRef); + + if (frontOp != backOp) { + condBranch.elseop(); + StencilOp(builder, backOp, sPtr, sRef); + condBranch.endif(); + } + return builder.CreateLoad(sPtr); +} + +static void StencilFunc(IRBuilder<> & builder, const unsigned char func, + Value * s, Value * sRef, Value * sCmpPtr) +{ + switch (func) { + case GL_NEVER & 0x7: + builder.CreateStore(builder.getFalse(), sCmpPtr); + break; + case GL_LESS & 0x7: + builder.CreateStore(builder.CreateICmpULT(sRef, s), sCmpPtr); + break; + case GL_EQUAL & 0x7: + builder.CreateStore(builder.CreateICmpEQ(sRef, s), sCmpPtr); + break; + case GL_LEQUAL & 0x7: + builder.CreateStore(builder.CreateICmpULE(sRef, s), sCmpPtr); + break; + case GL_GREATER & 0x7: + builder.CreateStore(builder.CreateICmpUGT(sRef, s), sCmpPtr); + break; + case GL_NOTEQUAL & 0x7: + builder.CreateStore(builder.CreateICmpNE(sRef, s), sCmpPtr); + break; + case GL_GEQUAL & 0x7: + builder.CreateStore(builder.CreateICmpUGE(sRef, s), sCmpPtr); + break; + case GL_ALWAYS & 0x7: + builder.CreateStore(builder.getTrue(), sCmpPtr); + break; + default: + assert(0); + break; + } +} + +static Value * BlendFactor(const unsigned mode, Value * src, Value * dst, + Value * constant, Value * one, Value * zero, + Value * srcA, Value * dstA, Value * constantA, + Value * sOne, const bool isVector, IRBuilder<> & builder) +{ + Value * factor = NULL; + switch (mode) { + case 0: // GL_ZERO + factor = zero; + break; + case 1: // GL_ONE + factor = one; + break; + case 2: // GL_SRC_COLOR: + factor = src; + break; + case 3: // GL_ONE_MINUS_SRC_COLOR: + factor = builder.CreateSub(one, src); + break; + case 4: // GL_DST_COLOR: + factor = dst; + break; + case 5: // GL_ONE_MINUS_DST_COLOR: + factor = builder.CreateSub(one, dst); + break; + case 6: // GL_SRC_ALPHA: + factor = srcA; + if (isVector) + factor = intVec(builder, factor, factor, factor, factor); + break; + case 7: // GL_ONE_MINUS_SRC_ALPHA: + factor = builder.CreateSub(sOne, srcA); + if (isVector) + factor = intVec(builder, factor, factor, factor, factor); + break; + case 8: // GL_DST_ALPHA: + factor = dstA; + if (isVector) + factor = intVec(builder, factor, factor, factor, factor); + break; + case 9: // GL_ONE_MINUS_DST_ALPHA: + factor = builder.CreateSub(sOne, dstA); + if (isVector) + factor = intVec(builder, factor, factor, factor, factor); + break; + case 10: // GL_SRC_ALPHA_SATURATE: // valid only for source color and alpha + factor = minIntScalar(builder, srcA, builder.CreateSub(sOne, dstA)); + if (isVector) + factor = intVec(builder, factor, factor, factor, sOne); + else + factor = sOne; // when it's used for source alpha, it's just 1 + break; + case 11: // GL_CONSTANT_COLOR: + factor = constant; + break; + case 12: // GL_ONE_MINUS_CONSTANT_COLOR: + factor = builder.CreateSub(one, constant); + break; + case 13: // GL_CONSTANT_ALPHA: + factor = constantA; + if (isVector) + factor = intVec(builder, factor, factor, factor, factor); + break; + case 14: // GL_ONE_MINUS_CONSTANT_ALPHA: + factor = builder.CreateSub(sOne, constantA); + if (isVector) + factor = intVec(builder, factor, factor, factor, factor); + break; + default: + assert(0); + break; + } + return factor; +} + +static Value * Saturate(IRBuilder<> & builder, Value * intVector) +{ + intVector = intVecMax(builder, intVector, constIntVec(builder, 0,0,0,0)); + return intVecMin(builder, intVector, constIntVec(builder, 255,255,255,255)); +} + +// src is int32x4 [0,255] rgba vector, and combines them into int32 +static Value * IntVectorToColor(IRBuilder<> & builder, Value * src) +{ + //src = builder.CreateBitCast(src, inst->GetIntVectorType()); + src = builder.CreateShl(src, constIntVec(builder, 0, 8, 16, 24)); + std::vector<Value *> comps = extractVector(builder, src); + comps[0] = builder.CreateOr(comps[0], comps[1]); + comps[0] = builder.CreateOr(comps[0], comps[2]); + comps[0] = builder.CreateOr(comps[0], comps[3]); + return comps[0]; +} + +// src is <4 x float> approx [0,1]; dst is <4 x i32> [0,255] from frame buffer; return is i32 +Value * GenerateFSBlend(const GGLContext * gglCtx, /*const RegDesc * regDesc,*/ + IRBuilder<> & builder, Value * src, Value * dst) +{ + const Type * const intType = Type::getInt32Ty(*gglCtx->llvmCtx); + + // TODO cast the outputs pointer type to int for writing to minimize bandwidth + if (!gglCtx->blendState.enable) { +// if (regDesc->IsInt32Color()) +// { +// debug_printf("GenerateFixedFS dst is already scalar fixed0 \n"); +// src = builder.CreateExtractElement(src, builder.getInt32(0)); +// src = builder.CreateBitCast(src, intType); // it's already RGBA int32 +// } +// else if (regDesc->IsVectorType(Float)) +// { + src = builder.CreateFMul(src, constFloatVec(builder,255,255,255,255)); + src = builder.CreateFPToUI(src, intVecType(builder)); + src = Saturate(builder, src); + src = IntVectorToColor(builder, src); +// } +// else if (regDesc->IsVectorType(Fixed8)) +// { +// src = builder.CreateBitCast(src, instr->GetIntVectorType()); +// src = Saturate(instr, src); +// src = IntVectorToColor(instr, storage, src); +// } +// else if (regDesc->IsVectorType(Fixed16)) +// { +// src = builder.CreateBitCast(src, instr->GetIntVectorType()); +// src = builder.CreateAShr(src, constIntVec(builder,8,8,8,8)); +// src = Saturate(instr, src); +// src = IntVectorToColor(instr, storage, src); +// } +// else +// assert(0); + return src; + } + + // blending, so convert src to <4 x i32> +// if (regDesc->IsInt32Color()) +// { +// src = builder.CreateExtractElement(src, builder.getInt32(0)); +// src = builder.CreateBitCast(src, intType); // it's already RGBA int32 +// +// Value * channels = Constant::getNullValue(instr->GetIntVectorType()); +// channels = builder.CreateInsertElement(channels, src, builder.getInt32(0)); +// channels = builder.CreateInsertElement(channels, src, builder.getInt32(1)); +// channels = builder.CreateInsertElement(channels, src, builder.getInt32(2)); +// channels = builder.CreateInsertElement(channels, src, builder.getInt32(3)); +// channels = builder.CreateLShr(channels, constIntVec(builder,0, 8, 16, 24)); +// channels = builder.CreateAnd(channels, constIntVec(builder,0xff, 0xff, 0xff, 0xff)); +// src = channels; +// } +// else if (regDesc->IsVectorType(Fixed8)) // it's already int32x4 RGBA +// src = builder.CreateBitCast(src, instr->GetIntVectorType()); +// else if (regDesc->IsVectorType(Fixed16)) +// { +// src = builder.CreateBitCast(src, instr->GetIntVectorType()); +// // TODO DXL consider shl dst by 8 and ashr by 16 in the end for more precision +// src = builder.CreateAShr(src, constIntVec(builder,8,8,8,8)); +// } +// else if (regDesc->IsVectorType(Float)) +// { + src = builder.CreateFMul(src, constFloatVec(builder,255,255,255,255)); + src = builder.CreateFPToSI(src, intVecType(builder)); +// } +// else +// assert(0); + + Value * const one = constIntVec(builder,255,255,255,255); + Value * const zero = constIntVec(builder,0,0,0,0); + Value * const sOne = builder.getInt32(255); + Value * const sZero = builder.getInt32(0); + +#if USE_LLVM_SCANLINE + Value * constant = constIntVec(builder,gglCtx->blendState.color[0], + gglCtx->blendState.color[1], + gglCtx->blendState.color[2], + gglCtx->blendState.color[3]); +#else + Value * constant = NULL; + assert(0); +#endif + + Value * srcA = extractVector(builder,src)[3]; + Value * dstA = extractVector(builder,dst)[3]; + Value * constantA = extractVector(builder,constant)[3]; + + Value * sf = BlendFactor(gglCtx->blendState.scf, src, dst, + constant, one, zero, srcA, dstA, + constantA, sOne, true, builder); + if (gglCtx->blendState.scf != gglCtx->blendState.saf) { + Value * sfA = BlendFactor(gglCtx->blendState.saf, srcA, dstA, + constantA, sOne, sZero, srcA, dstA, + constantA, sOne, false, builder); + sf = builder.CreateInsertElement(sf, sfA, builder.getInt32(3), + name("sfAStore")); + } + + Value * df = BlendFactor(gglCtx->blendState.dcf, src, dst, + constant, one, zero, srcA, dstA, + constantA, sOne, true, builder); + if (gglCtx->blendState.dcf != gglCtx->blendState.daf) { + Value * dfA = BlendFactor(gglCtx->blendState.daf, srcA, dstA, + constantA, sOne, sZero, srcA, dstA, + constantA, sOne, false, builder); + df = builder.CreateInsertElement(df, dfA, builder.getInt32(3), + name("dfAStore")); + } + + // this is factor *= 256 / 255; factors have a chance of constant folding + sf = builder.CreateAdd(sf, builder.CreateLShr(sf, constIntVec(builder,7,7,7,7))); + df = builder.CreateAdd(df, builder.CreateLShr(df, constIntVec(builder,7,7,7,7))); + + src = builder.CreateMul(src, sf); + dst = builder.CreateMul(dst, df); + + Value * res = NULL; + switch (gglCtx->blendState.ce + GL_FUNC_ADD) { + case GL_FUNC_ADD: + res = builder.CreateAdd(src, dst); + break; + case GL_FUNC_SUBTRACT: + res = builder.CreateSub(src, dst); + break; + case GL_FUNC_REVERSE_SUBTRACT: + res = builder.CreateSub(dst, src); + break; + default: + assert(0); + break; + } + if (gglCtx->blendState.ce != gglCtx->blendState.ae) { + srcA = extractVector(builder,src)[3]; + dstA = extractVector(builder,dst)[3]; + Value * resA = NULL; + switch (gglCtx->blendState.ce + GL_FUNC_ADD) { + case GL_FUNC_ADD: + resA = builder.CreateAdd(srcA, dstA); + break; + case GL_FUNC_SUBTRACT: + resA = builder.CreateSub(srcA, dstA); + break; + case GL_FUNC_REVERSE_SUBTRACT: + resA = builder.CreateSub(dstA, srcA); + break; + default: + assert(0); + break; + } + res = builder.CreateInsertElement(res, resA, builder.getInt32(3), + name("resAStore")); + } + + res = builder.CreateAShr(res, constIntVec(builder,8,8,8,8)); + res = Saturate(builder, res); + res = IntVectorToColor(builder, res); + return res; +} + +static FunctionType * ScanLineFunctionType(IRBuilder<> & builder) +{ + std::vector<const Type*> funcArgs; + const VectorType * vectorType = floatVecType(builder); + const PointerType * vectorPtr = PointerType::get(vectorType, 0); + const Type * intType = builder.getInt32Ty(); + const PointerType * intPointerType = PointerType::get(intType, 0); + const PointerType * bytePointerType = PointerType::get(builder.getInt8Ty(), 0); + + funcArgs.push_back(vectorPtr); // start + funcArgs.push_back(vectorPtr); // step + funcArgs.push_back(intPointerType); // frame + funcArgs.push_back(intPointerType); // depth + funcArgs.push_back(bytePointerType); // stencil + funcArgs.push_back(bytePointerType); // stencil state + funcArgs.push_back(intType); // count + + FunctionType *functionType = FunctionType::get(/*Result=*/builder.getVoidTy(), + /*Params=*/funcArgs, + /*isVarArg=*/false); + + return functionType; +} + +// generated scanline function parameters are VertexOutput * start, VertexOutput * step, +// unsigned * frame, int * depth, unsigned char * stencil, +// ActiveStencilState * stencilState, unsigned count +void GenerateScanLine(const GGLContext * gglCtx, const gl_shader_program * program, Module * mod, + const char * shaderName, const char * scanlineName) +{ + IRBuilder<> builder(mod->getContext()); + debug_printf("GenerateScanLine %s \n", scanlineName); + + const Type * intType = Type::getInt32Ty(*gglCtx->llvmCtx); + const PointerType * intPointerType = PointerType::get(intType, 0); + const Type * byteType = Type::getInt8Ty(*gglCtx->llvmCtx); + const PointerType * bytePointerType = PointerType::get(byteType, 0); + + Function * func = mod->getFunction(scanlineName); + if (func) + return; + + func = llvm::cast<Function>(mod->getOrInsertFunction(scanlineName, + ScanLineFunctionType(builder))); + + BasicBlock *label_entry = BasicBlock::Create(builder.getContext(), "entry", func, 0); + builder.SetInsertPoint(label_entry); + CondBranch condBranch(builder); + + Function::arg_iterator args = func->arg_begin(); + Value * start = args++; + start->setName("start"); + Value * step = args++; + step->setName("step"); + + // need alloc to be able to assign to it by using store + Value * framePtr = builder.CreateAlloca(intPointerType); + builder.CreateStore(args++, framePtr); + Value * depthPtr = builder.CreateAlloca(intPointerType); + builder.CreateStore(args++, depthPtr); + Value * stencilPtr = builder.CreateAlloca(bytePointerType); + builder.CreateStore(args++, stencilPtr); + Value * stencilState = args++; + stencilState->setName("stencilState"); + Value * countPtr = builder.CreateAlloca(intType); + builder.CreateStore(args++, countPtr); + + Value * sFace = NULL, * sRef = NULL, *sMask = NULL, * sFunc = NULL; + if (gglCtx->bufferState.stencilTest) { + sFace = builder.CreateLoad(builder.CreateConstInBoundsGEP1_32(stencilState, 0), "sFace"); + if (gglCtx->frontStencil.ref == gglCtx->backStencil.ref) + sRef = builder.getInt8(gglCtx->frontStencil.ref); + else + sRef = builder.CreateLoad(builder.CreateConstInBoundsGEP1_32(stencilState, 1), "sRef"); + if (gglCtx->frontStencil.mask == gglCtx->backStencil.mask) + sMask = builder.getInt8(gglCtx->frontStencil.mask); + else + sMask = builder.CreateLoad(builder.CreateConstInBoundsGEP1_32(stencilState, 2), "sMask"); + if (gglCtx->frontStencil.func == gglCtx->backStencil.func) + sFunc = builder.getInt8(gglCtx->frontStencil.func); + else + sFunc = builder.CreateLoad(builder.CreateConstInBoundsGEP1_32(stencilState, 3), "sFunc"); + } + + condBranch.beginLoop(); // while (count > 0) + + // get values + Value * frame = builder.CreateLoad(framePtr); + frame->setName("frame"); + Value * depth = NULL, * stencil = NULL; + if (gglCtx->bufferState.depthTest) { + depth = builder.CreateLoad(depthPtr); + depth->setName("depth"); + } + + Value * count = builder.CreateLoad(countPtr); + count->setName("count"); + + Value * cmp = builder.CreateICmpEQ(count, builder.getInt32(0)); + condBranch.ifCond(cmp, "if_break_loop"); // if (count == 0) + condBranch.brk(); // break; + condBranch.endif(); + + Value * sCmpPtr = NULL, * sCmp = NULL, * sPtr = NULL, * s = NULL; + if (gglCtx->bufferState.stencilTest) { + stencil = builder.CreateLoad(stencilPtr); + stencil->setName("stencil"); + + // temporaries to load/store value + sCmpPtr = builder.CreateAlloca(Type::getInt1Ty(*gglCtx->llvmCtx)); + sCmpPtr->setName("sCmpPtr"); + sPtr = builder.CreateAlloca(byteType); + sPtr->setName("sPtr"); + + s = builder.CreateLoad(stencil); + s = builder.CreateAnd(s, sMask); + builder.CreateStore(s, sPtr); + + if (gglCtx->frontStencil.func != gglCtx->backStencil.func) + condBranch.ifCond(builder.CreateICmpEQ(sFace, builder.getInt8(0))); + + StencilFunc(builder, gglCtx->frontStencil.func, s, sRef, sCmpPtr); + + if (gglCtx->frontStencil.func != gglCtx->backStencil.func) { + condBranch.elseop(); + StencilFunc(builder, gglCtx->backStencil.func, s, sRef, sCmpPtr); + condBranch.endif(); + } + + sCmp = builder.CreateLoad(sCmpPtr); + } else + sCmp = ConstantInt::getTrue(mod->getContext()); + sCmp->setName("sCmp"); + + Value * depthZ = NULL, * zPtr = NULL, * z = NULL, * zCmp = NULL; + if (gglCtx->bufferState.depthTest) { + depthZ = builder.CreateLoad(depth, "depthZ"); // z stored in buffer + zPtr = builder.CreateAlloca(intType); // temp store for modifying incoming z + zPtr->setName("zPtr"); + + // modified incoming z + z = builder.CreateBitCast(start, intPointerType); + z = builder.CreateConstInBoundsGEP1_32(z, (GGL_FS_INPUT_OFFSET + + GGL_FS_INPUT_FRAGCOORD_INDEX) * 4 + 2); + z = builder.CreateLoad(z, "z"); + + builder.CreateStore(z, zPtr); + + Value * zNegative = builder.CreateICmpSLT(z, builder.getInt32(0)); + condBranch.ifCond(zNegative); + // if (0x80000000 & z) z ^= 0x7fffffff since smaller -ve float means bigger -ve int + z = builder.CreateXor(z, builder.getInt32(0x7fffffff)); + builder.CreateStore(z, zPtr); + + condBranch.endif(); + + z = builder.CreateLoad(zPtr, "z"); + + switch (0x200 | gglCtx->bufferState.depthFunc) { + case GL_NEVER: + zCmp = ConstantInt::getFalse(mod->getContext()); + break; + case GL_LESS: + zCmp = builder.CreateICmpSLT(z, depthZ); + break; + case GL_EQUAL: + zCmp = builder.CreateICmpEQ(z, depthZ); + break; + case GL_LEQUAL: + zCmp = builder.CreateICmpSLE(z, depthZ); + break; + case GL_GREATER: + zCmp = builder.CreateICmpSGT(z, depthZ); + break; + case GL_NOTEQUAL: + zCmp = builder.CreateICmpNE(z, depthZ); + break; + case GL_GEQUAL: + zCmp = builder.CreateICmpSGE(z, depthZ); + break; + case GL_ALWAYS: + zCmp = ConstantInt::getTrue(mod->getContext()); + break; + default: + assert(0); + break; + } + } else // no depth test means always pass + zCmp = ConstantInt::getTrue(mod->getContext()); + zCmp->setName("zCmp"); + + condBranch.ifCond(sCmp, "if_sCmp", "sCmp_fail"); + condBranch.ifCond(zCmp, "if_zCmp", "zCmp_fail"); + + Value * fsInputs = builder.CreateConstInBoundsGEP1_32(start, + offsetof(VertexOutput,position)/sizeof(Vector4)); + Value * fsOutputs = builder.CreateConstInBoundsGEP1_32(start, + offsetof(VertexOutput,fragColor)/sizeof(Vector4)); + + Function * fsFunction = mod->getFunction(shaderName); + assert(fsFunction); + CallInst *call = builder.CreateCall(fsFunction); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + + Value * dst = Constant::getNullValue(intVecType(builder)); + if (gglCtx->blendState.enable && (0 != gglCtx->blendState.dcf || 0 != gglCtx->blendState.daf)) { + Value * frameColor = builder.CreateLoad(frame, "frameColor"); + dst = builder.CreateInsertElement(dst, frameColor, builder.getInt32(0)); + dst = builder.CreateInsertElement(dst, frameColor, builder.getInt32(1)); + dst = builder.CreateInsertElement(dst, frameColor, builder.getInt32(2)); + dst = builder.CreateInsertElement(dst, frameColor, builder.getInt32(3)); + dst = builder.CreateLShr(dst, constIntVec(builder, 0, 8, 16, 24)); + dst = builder.CreateAnd(dst, constIntVec(builder, 0xff, 0xff, 0xff, 0xff)); + } + + Value * src = builder.CreateConstInBoundsGEP1_32(fsOutputs, 0); + src = builder.CreateLoad(src); + + Value * color = GenerateFSBlend(gglCtx, /*&prog->outputRegDesc,*/ builder, src, dst); + builder.CreateStore(color, frame); + + // TODO DXL depthmask check + if (gglCtx->bufferState.depthTest) { + z = builder.CreateBitCast(z, intType); + builder.CreateStore(z, depth); // store z + } + + if (gglCtx->bufferState.stencilTest) + builder.CreateStore(StencilOp(builder, sFace, gglCtx->frontStencil.dPass, + gglCtx->backStencil.dPass, sPtr, sRef), stencil); + + condBranch.elseop(); // failed z test + + if (gglCtx->bufferState.stencilTest) + builder.CreateStore(StencilOp(builder, sFace, gglCtx->frontStencil.dFail, + gglCtx->backStencil.dFail, sPtr, sRef), stencil); + + condBranch.endif(); + condBranch.elseop(); // failed s test + + if (gglCtx->bufferState.stencilTest) + builder.CreateStore(StencilOp(builder, sFace, gglCtx->frontStencil.sFail, + gglCtx->backStencil.sFail, sPtr, sRef), stencil); + + condBranch.endif(); + + frame = builder.CreateConstInBoundsGEP1_32(frame, 1); // frame++ + builder.CreateStore(frame, framePtr); + + if (gglCtx->bufferState.depthTest) { + depth = builder.CreateConstInBoundsGEP1_32(depth, 1); // depth++ + builder.CreateStore(depth, depthPtr); + } + if (gglCtx->bufferState.stencilTest) { + stencil = builder.CreateConstInBoundsGEP1_32(stencil, 1); // stencil++ + builder.CreateStore(stencil, stencilPtr); + } + + Value * vPtr = NULL, * v = NULL, * dx = NULL; + if (program->UsesFragCoord) { + vPtr = builder.CreateConstInBoundsGEP1_32(start, GGL_FS_INPUT_OFFSET + + GGL_FS_INPUT_FRAGCOORD_INDEX); + v = builder.CreateLoad(vPtr); + dx = builder.CreateConstInBoundsGEP1_32(step, GGL_FS_INPUT_OFFSET + + GGL_FS_INPUT_FRAGCOORD_INDEX); + dx = builder.CreateLoad(dx); + v = builder.CreateFAdd(v, dx); + builder.CreateStore(v, vPtr); + } else if (gglCtx->bufferState.depthTest) { + const Type * floatType = Type::getFloatTy(*gglCtx->llvmCtx); + const PointerType * floatPointerType = PointerType::get(floatType, 0); + vPtr = builder.CreateBitCast(start, floatPointerType); + vPtr = builder.CreateConstInBoundsGEP1_32(vPtr, + (GGL_FS_INPUT_OFFSET + GGL_FS_INPUT_FRAGCOORD_INDEX) * 4 + 2); + v = builder.CreateLoad(vPtr); + dx = builder.CreateBitCast(step, floatPointerType); + dx = builder.CreateConstInBoundsGEP1_32(dx, + (GGL_FS_INPUT_OFFSET + GGL_FS_INPUT_FRAGCOORD_INDEX) * 4 + 2); + dx = builder.CreateLoad(dx); + v = builder.CreateFAdd(v, dx); + builder.CreateStore(v, vPtr); + } + + if (program->UsesPointCoord) { + vPtr = builder.CreateConstInBoundsGEP1_32(start, GGL_FS_INPUT_OFFSET + + GGL_FS_INPUT_FRONTFACINGPOINTCOORD_INDEX); + v = builder.CreateLoad(vPtr); + dx = builder.CreateConstInBoundsGEP1_32(step, GGL_FS_INPUT_OFFSET + + GGL_FS_INPUT_FRONTFACINGPOINTCOORD_INDEX); + dx = builder.CreateLoad(dx); + v = builder.CreateFAdd(v, dx); + builder.CreateStore(v, vPtr); + } + + for (unsigned i = 0; i < program->VaryingSlots; ++i) { + vPtr = builder.CreateConstInBoundsGEP1_32(start, offsetof(VertexOutput,varyings)/sizeof(Vector4) + i); + v = builder.CreateLoad(vPtr); + dx = builder.CreateConstInBoundsGEP1_32(step, GGL_FS_INPUT_OFFSET + + GGL_FS_INPUT_VARYINGS_INDEX + i); + dx = builder.CreateLoad(dx); + v = builder.CreateFAdd(v, dx); + builder.CreateStore(v, vPtr); + } + + count = builder.CreateSub(count, builder.getInt32(1)); + builder.CreateStore(count, countPtr); // count--; + + condBranch.endLoop(); + + builder.CreateRetVoid(); +} diff --git a/src/pixelflinger2/pixelflinger2.h b/src/pixelflinger2/pixelflinger2.h index cdc2b9c..8f8a4d5 100644 --- a/src/pixelflinger2/pixelflinger2.h +++ b/src/pixelflinger2/pixelflinger2.h @@ -19,7 +19,7 @@ #define _PIXELFLINGER2_H_ #define USE_LLVM_TEXTURE_SAMPLER 1 -#define USE_LLVM_SCANLINE 0 +#define USE_LLVM_SCANLINE 1 #ifndef USE_LLVM_EXECUTIONENGINE #define USE_LLVM_EXECUTIONENGINE 0 // 1 to use llvm::Execution, 0 to use libBCC, requires modifying makefile diff --git a/src/pixelflinger2/raster.cpp b/src/pixelflinger2/raster.cpp index 23a5cef..19212d7 100644 --- a/src/pixelflinger2/raster.cpp +++ b/src/pixelflinger2/raster.cpp @@ -85,7 +85,7 @@ static void RasterTrapezoid(const GGLInterface * iface, const VertexOutput * tl, assert(fabs(tl->position.y - tr->position.y) < 1 && fabs(bl->position.y - br->position.y) < 1); const unsigned width = ctx->frameSurface.width, height = ctx->frameSurface.height; - const unsigned varyingCount = 8;//ctx->glCtx->Shader.CurrentProgram->Varying->NumParameters; + const unsigned varyingCount = ctx->glCtx->CurrentProgram->VaryingSlots; // tlv-trv and blv-brv are parallel and horizontal diff --git a/src/pixelflinger2/scanline.cpp b/src/pixelflinger2/scanline.cpp index 5ff12a6..106582a 100644 --- a/src/pixelflinger2/scanline.cpp +++ b/src/pixelflinger2/scanline.cpp @@ -186,7 +186,7 @@ void ScanLine(const GGLInterface * iface, const VertexOutput * v1, const VertexO // assert(ctx->frameSurface.width == ctx->depthSurface.width); // assert(ctx->frameSurface.height == ctx->depthSurface.height); - const unsigned int varyingCount = 8;//ctx->glCtx->Shader.CurrentProgram->Varying->NumParameters; + const unsigned int varyingCount = ctx->glCtx->CurrentProgram->VaryingSlots; const unsigned y = v1->position.y, startX = v1->position.x, endX = v2->position.x; @@ -198,6 +198,7 @@ void ScanLine(const GGLInterface * iface, const VertexOutput * v1, const VertexO const VectorComp_t div = VectorComp_t_CTR(1 / (float)(endX - startX)); memcpy(ctx->glCtx->CurrentProgram->ValuesVertexOutput, v1, sizeof(*v1)); + // shader symbols are mapped to gl_shader_program_Values* VertexOutput & vertex(*(VertexOutput*)ctx->glCtx->CurrentProgram->ValuesVertexOutput); VertexOutput vertexDx(*v2); @@ -236,17 +237,13 @@ void ScanLine(const GGLInterface * iface, const VertexOutput * v1, const VertexO #if USE_LLVM_SCANLINE typedef void (* ScanLineFunction_t)(VertexOutput * start, VertexOutput * step, - Vector4 * constants, unsigned * frame, - int * depth, unsigned char * stencil, - GGLContext::ActiveStencilState *, - unsigned count); + unsigned * frame, int * depth, unsigned char * stencil, + GGLContext::ActiveStencilState *, unsigned count); -// ScanLineFunction_t scanLineFunction = (ScanLineFunction_t) -// ctx->glCtx->Shader.CurrentProgram->GLVMFP->function; + ScanLineFunction_t scanLineFunction = (ScanLineFunction_t) + ctx->glCtx->CurrentProgram->_LinkedShaders[MESA_SHADER_FRAGMENT]->function; if (endX >= startX) { -// scanLineFunction(&vertex, &vertexDx, (Vector4 *) -// ctx->glCtx->Shader.CurrentProgram->FragmentProgram->Parameters->ParameterValues, -// frame, depth, stencil, &ctx->activeStencil, endX - startX + 1); + scanLineFunction(&vertex, &vertexDx, frame, depth, stencil, &ctx->activeStencil, endX - startX + 1); } #else @@ -304,37 +301,43 @@ void ScanLine(const GGLInterface * iface, const VertexOutput * v1, const VertexO z = vertex.position.i[2]; if (z & 0x80000000) // negative float has leading 1 z ^= 0x7fffffff; // bigger negative is smaller - bool zCmp = false; - switch (0x200 | ctx->bufferState.depthFunc) { - case GL_NEVER: - zCmp = false; - break; - case GL_LESS: - zCmp = z < *depth; - break; - case GL_EQUAL: - zCmp = z == *depth; - break; - case GL_LEQUAL: - zCmp = z <= *depth; - break; - case GL_GREATER: - zCmp = z > *depth; - break; - case GL_NOTEQUAL: - zCmp = z != *depth; - break; - case GL_GEQUAL: - zCmp = z >= *depth; - break; - case GL_ALWAYS: - zCmp = true; - break; - default: - assert(0); - break; + bool zCmp = true; + if (DepthTest) + { + switch (0x200 | ctx->bufferState.depthFunc) { + case GL_NEVER: + zCmp = false; + break; + case GL_LESS: + zCmp = z < *depth; + break; + case GL_EQUAL: + zCmp = z == *depth; + break; + case GL_LEQUAL: + zCmp = z <= *depth; + break; + case GL_GREATER: + zCmp = z > *depth; + break; + case GL_NOTEQUAL: + zCmp = z != *depth; + break; + case GL_GEQUAL: + zCmp = z >= *depth; + break; + case GL_ALWAYS: + zCmp = true; + break; + default: + assert(0); + break; + } } if (!DepthTest || zCmp) { + float * varying = (float *)ctx->glCtx->CurrentProgram->ValuesVertexOutput; + + assert((void *)&(vertex.varyings[0]) == &(varying[2 * 4])); ctx->glCtx->CurrentProgram->_LinkedShaders[MESA_SHADER_FRAGMENT]->function(); if (BlendEnable) { BlendComp_t sOne = 255, sZero = 0; @@ -478,14 +481,14 @@ void ScanLine(const GGLInterface * iface, const VertexOutput * v1, const VertexO vertex.frontFacingPointCoord.i[3] = vertexDx.frontFacingPointCoord.i[3]; } #else -// if (ctx->glCtx->Shader.CurrentProgram->FragmentProgram->UsesFragCoord) + if (ctx->glCtx->CurrentProgram->UsesFragCoord) vertex.position += vertexDx.position; -// else if (ctx->bufferState.depthTest) + else if (ctx->bufferState.depthTest) vertex.position.z += vertexDx.position.z; for (unsigned i = 0; i < varyingCount; i++) vertex.varyings[i] += vertexDx.varyings[i]; -// if (ctx->glCtx->Shader.CurrentProgram->FragmentProgram->UsesPointCoord) + if (ctx->glCtx->CurrentProgram->UsesPointCoord) { vertex.frontFacingPointCoord.z += vertexDx.frontFacingPointCoord.z; vertex.frontFacingPointCoord.w += vertexDx.frontFacingPointCoord.w; @@ -505,30 +508,29 @@ static void PickScanLine(GGLInterface * iface) GGL_GET_CONTEXT(ctx, iface); ctx->interface.ScanLine = NULL; - const bool DepthWrite = true; if (ctx->bufferState.stencilTest) { if (ctx->bufferState.depthTest) { if (ctx->blendState.enable) - ctx->interface.ScanLine = ScanLine<true, true, DepthWrite, true>; + ctx->interface.ScanLine = ScanLine<true, true, true, true>; else - ctx->interface.ScanLine = ScanLine<true, true, DepthWrite, false>; + ctx->interface.ScanLine = ScanLine<true, true, true, false>; } else { if (ctx->blendState.enable) - ctx->interface.ScanLine = ScanLine<true, false, DepthWrite, true>; + ctx->interface.ScanLine = ScanLine<true, false, false, true>; else - ctx->interface.ScanLine = ScanLine<true, false, DepthWrite, false>; + ctx->interface.ScanLine = ScanLine<true, false, false, false>; } } else { if (ctx->bufferState.depthTest) { if (ctx->blendState.enable) - ctx->interface.ScanLine = ScanLine<false, true, DepthWrite, true>; + ctx->interface.ScanLine = ScanLine<false, true, true, true>; else - ctx->interface.ScanLine = ScanLine<false, true, DepthWrite, false>; + ctx->interface.ScanLine = ScanLine<false, true, true, false>; } else { if (ctx->blendState.enable) - ctx->interface.ScanLine = ScanLine<false, false, DepthWrite, true>; + ctx->interface.ScanLine = ScanLine<false, false, false, true>; else - ctx->interface.ScanLine = ScanLine<false, false, DepthWrite, false>; + ctx->interface.ScanLine = ScanLine<false, false, false, false>; } } diff --git a/src/pixelflinger2/shader.cpp b/src/pixelflinger2/shader.cpp index 782759c..7537f2f 100644 --- a/src/pixelflinger2/shader.cpp +++ b/src/pixelflinger2/shader.cpp @@ -54,7 +54,6 @@ struct Instance { bccDisposeScript(script); else if (module) delete module; - getchar(); } }; @@ -95,10 +94,9 @@ extern "C" void _mesa_delete_shader(struct gl_context *ctx, struct gl_shader *sh shader->DeletePending = true; return; } - if (shader->executable) - { + if (shader->executable) { for (std::map<ShaderKey, Instance *>::iterator it=shader->executable->instances.begin(); - it != shader->executable->instances.end(); it++) + it != shader->executable->instances.end(); it++) (*it).second->~Instance(); shader->executable->instances.~map(); } @@ -363,9 +361,12 @@ static void CodeGen(Instance * instance, const char * mainName, gl_shader * shad if (result != BCC_NO_ERROR) fprintf(stderr, "Could not find '%s': %d\n", "main", result); else - printf("bcc_compile %s=%p \n", "main", shader->function); + printf("bcc_compile %s=%p \n", mainName, instance->function); } +void GenerateScanLine(const GGLContext * gglCtx, const gl_shader_program * program, llvm::Module * mod, + const char * shaderName, const char * scanlineName); + static void ShaderUse(GGLInterface * iface, gl_shader_program * program) { GGL_GET_CONST_CONTEXT(ctx, iface); @@ -404,7 +405,15 @@ static void ShaderUse(GGLInterface * iface, gl_shader_program * program) llvm::Module * module = glsl_ir_to_llvm_module(shader->ir, instance->module, ctx, shaderName); if (!module) assert(0); // ir to llvm failed - CodeGen(instance, mainName, shader, program, ctx); +#if USE_LLVM_SCANLINE + if (GL_FRAGMENT_SHADER == shader->Type) { + char scanlineName [SCANLINE_KEY_STRING_LEN] = {0}; + GetScanlineKeyString(&shaderKey, scanlineName, sizeof scanlineName / sizeof *scanlineName); + GenerateScanLine(ctx, program, module, mainName, scanlineName); + CodeGen(instance, scanlineName, shader, program, ctx); + } else +#endif + CodeGen(instance, mainName, shader, program, ctx); shader->executable->instances[shaderKey] = instance; debug_printf("jit new shader '%s'(%p) \n", mainName, instance->function); //getchar(); } else |