summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Li <davidxli@google.com>2011-02-03 10:10:59 -0800
committerDavid Li <davidxli@google.com>2011-02-03 10:10:59 -0800
commit3225321119408735f16b72b539c9fb7d80683552 (patch)
tree37f619c525b2b804cabb7fead3a1cb2d88b9410d
parente82376d380005c21cb70637d42104fcd4d652843 (diff)
Checkpoint: scanline codegen.
Signed-off-by: David Li <davidxli@google.com>
-rw-r--r--Android.mk1
-rw-r--r--src/glsl/ir_to_llvm_helper.cpp155
-rw-r--r--src/glsl/linker.cpp42
-rw-r--r--src/glsl/main.cpp75
-rw-r--r--src/mesa/main/mtypes.h11
-rw-r--r--src/pixelflinger2/llvm_helper.h218
-rw-r--r--src/pixelflinger2/llvm_scanline.cpp687
-rw-r--r--src/pixelflinger2/pixelflinger2.h2
-rw-r--r--src/pixelflinger2/raster.cpp2
-rw-r--r--src/pixelflinger2/scanline.cpp104
-rw-r--r--src/pixelflinger2/shader.cpp21
11 files changed, 1078 insertions, 240 deletions
diff --git a/Android.mk b/Android.mk
index f627284..94edae6 100644
--- a/Android.mk
+++ b/Android.mk
@@ -107,6 +107,7 @@ mesa_SRC_FILES := \
src/mesa/program/symbol_table.c \
src/pixelflinger2/buffer.cpp \
src/pixelflinger2/format.cpp \
+ src/pixelflinger2/llvm_scanline.cpp \
src/pixelflinger2/pixelflinger2.cpp \
src/pixelflinger2/raster.cpp \
src/pixelflinger2/scanline.cpp \
diff --git a/src/glsl/ir_to_llvm_helper.cpp b/src/glsl/ir_to_llvm_helper.cpp
index 9794240..9392990 100644
--- a/src/glsl/ir_to_llvm_helper.cpp
+++ b/src/glsl/ir_to_llvm_helper.cpp
@@ -1,3 +1,20 @@
+/**
+ **
+ ** Copyright 2011, The Android Open Source Project
+ **
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ **
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ **
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ */
+
#include <stack>
#include <stdio.h>
@@ -6,93 +23,13 @@
#include <llvm/Support/IRBuilder.h>
#include <llvm/Module.h>
-using namespace llvm;
-
-static const char * name(const char * str)
-{
- return str;
-}
-
-static Value * minIntScalar(IRBuilder<> &builder, Value * in1, Value * in2)
-{
- Value * cmp = builder.CreateICmpSLT(in1, in2);
- return builder.CreateSelect(cmp, in1, in2);
-}
-
-static Value * maxIntScalar(IRBuilder<> &builder, Value * in1, Value * in2)
-{
- Value * cmp = builder.CreateICmpSGT(in1, in2);
- return builder.CreateSelect(cmp, in1, in2);
-}
-
-static Constant * constFloat(IRBuilder<> & builder, float x)
-{
- return ConstantFP::get(builder.getContext(), APFloat(x));
-}
-
-static VectorType * intVecType(IRBuilder<> & builder)
-{
- return VectorType::get(Type::getInt32Ty(builder.getContext()), 4);
-}
-
-static VectorType * floatVecType(IRBuilder<> & builder)
-{
- return VectorType::get(Type::getFloatTy(builder.getContext()), 4);
-}
-
-static Value * constIntVec(IRBuilder<> & builder, int x, int y, int z, int w)
-{
- std::vector<Constant *> vec(4);
- vec[0] = builder.getInt32(x);
- vec[1] = builder.getInt32(y);
- vec[2] = builder.getInt32(z);
- vec[3] = builder.getInt32(w);
- return ConstantVector::get(intVecType(builder), vec);
-}
-
-static Value * intVec(IRBuilder<> & builder, Value * x, Value * y, Value * z, Value * w)
-{
- Value * res = Constant::getNullValue(intVecType(builder));
- res = builder.CreateInsertElement(res, x, builder.getInt32(0), name("vecx"));
- res = builder.CreateInsertElement(res, y, builder.getInt32(1), name("vecy"));
- res = builder.CreateInsertElement(res, z, builder.getInt32(2), name("vecz"));
- if (w)
- res = builder.CreateInsertElement(res, w, builder.getInt32(3), name("vecw"));
- return res;
-}
-
-static Value * constFloatVec(IRBuilder<> & builder, float x, float y, float z, float w)
-{
- std::vector<Constant *> vec(4);
- vec[0] = constFloat(builder, x);
- vec[1] = constFloat(builder, y);
- vec[2] = constFloat(builder, z);
- vec[3] = constFloat(builder, w);
- return ConstantVector::get(floatVecType(builder), vec);
-}
-
-std::vector<Value *> extractVector(IRBuilder<> & builder, Value *vec)
-{
- std::vector<Value*> elems(4);
- elems[0] = builder.CreateExtractElement(vec, builder.getInt32(0), name("x"));
- elems[1] = builder.CreateExtractElement(vec, builder.getInt32(1), name("y"));
- elems[2] = builder.CreateExtractElement(vec, builder.getInt32(2), name("z"));
- elems[3] = builder.CreateExtractElement(vec, builder.getInt32(3), name("w"));
- return elems;
-}
+#include "src/pixelflinger2/llvm_helper.h"
-// <4 x i32> [0, 255] to <4 x float> [0.0, 1.0]
-static Value * intColorVecToFloatColorVec(IRBuilder<> & builder, Value * vec)
-{
-// return builder.CreateBitCast(vec, floatVecType(builder));
- vec = builder.CreateUIToFP(vec, floatVecType(builder));
- return builder.CreateFMul(vec, constFloatVec(builder, 1 / 255.0f, 1 / 255.0f,
- 1 / 255.0f, 1 / 255.0f));
-}
+using namespace llvm;
// texture data is int pointer to surface (will cast to short for 16bpp), index is linear texel index,
// format is GGLPixelFormat for surface, return type is <4 x i32> rgba
-Value * pointSample(IRBuilder<> & builder, Value * textureData, Value * index, const GGLPixelFormat format)
+static Value * pointSample(IRBuilder<> & builder, Value * textureData, Value * index, const GGLPixelFormat format)
{
Value * texel = NULL;
switch (format) {
@@ -176,7 +113,7 @@ Value * pointSample(IRBuilder<> & builder, Value * textureData, Value * index, c
static const unsigned SHIFT = 16;
// w = width - 1, h = height - 1; similar to pointSample; returns <4 x i32> rgba
-Value * linearSample(IRBuilder<> & builder, Value * textureData, Value * indexOffset,
+static Value * linearSample(IRBuilder<> & builder, Value * textureData, Value * indexOffset,
Value * x0, Value * y0, Value * xLerp, Value * yLerp,
Value * w, Value * h, Value * width, Value * height,
const GGLPixelFormat format/*, const RegDesc * dstDesc*/)
@@ -252,56 +189,6 @@ Value * linearSample(IRBuilder<> & builder, Value * textureData, Value * indexOf
// assert(0);
}
-class CondBranch
-{
- IRBuilder<> & m_builder;
- std::stack<BasicBlock *> m_ifStack;
-
-public:
- CondBranch(IRBuilder<> & builder) : m_builder(builder) {}
- ~CondBranch() {
- assert(m_ifStack.empty());
- }
-
- void ifCond(Value * cmp, const char * trueBlock = "ifT", const char * falseBlock = "ifF") {
- Function * function = m_builder.GetInsertBlock()->getParent();
- BasicBlock * ifthen = BasicBlock::Create(m_builder.getContext(), name(trueBlock), function, NULL);
- BasicBlock * ifend = BasicBlock::Create(m_builder.getContext(), name(falseBlock), function, NULL);
- m_builder.CreateCondBr(cmp, ifthen, ifend);
- m_builder.SetInsertPoint(ifthen);
- m_ifStack.push(ifend);
- }
-
- void elseop() {
- assert(!m_ifStack.empty());
- BasicBlock *ifend = BasicBlock::Create(m_builder.getContext(), name("else_end"), m_builder.GetInsertBlock()->getParent(),0);
- if (!m_builder.GetInsertBlock()->getTerminator()) // ret void is a block terminator
- m_builder.CreateBr(ifend); // branch is also a block terminator
- else {
- debug_printf("Instructions::elseop block alread has terminator \n");
- m_builder.GetInsertBlock()->getTerminator()->dump();
- assert(0);
- }
- m_builder.SetInsertPoint(m_ifStack.top());
- m_builder.GetInsertBlock()->setName(name("else_then"));
- m_ifStack.pop();
- m_ifStack.push(ifend);
- }
-
- void endif() {
- assert(!m_ifStack.empty());
- if (!m_builder.GetInsertBlock()->getTerminator()) // ret void is a block terminator
- m_builder.CreateBr(m_ifStack.top()); // branch is also a block terminator
- else {
- debug_printf("Instructions::endif block alread has terminator");
- m_builder.GetInsertBlock()->getTerminator()->dump();
- assert(0);
- }
- m_builder.SetInsertPoint(m_ifStack.top());
- m_ifStack.pop();
- }
-};
-
// dim is size - 1, since [0.0f,1.0f]->[0, size - 1]
static Value * texcoordWrap(IRBuilder<> & builder, const unsigned wrap,
/*const ChannelType type,*/ Value * r, Value * size, Value * dim,
diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index 9c57339..5aecb86 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -1174,7 +1174,7 @@ assign_attribute_locations(gl_shader_program *prog, unsigned max_attribute_index
gl_shader *const sh = prog->_LinkedShaders[0];
assert(sh->Type == GL_VERTEX_SHADER);
-
+ prog->VaryingSlots = 0;
/* Operate in a total of four passes.
*
* 1. Invalidate the location assignments for all vertex shader inputs,
@@ -1358,6 +1358,13 @@ assign_attribute_locations(gl_shader_program *prog, unsigned max_attribute_index
if (0 <= paramIndex)
prog->Attributes->Parameters[paramIndex].Location = location;
}
+
+ for (int i = sizeof(used_locations) * 8 - 1; i >= 0; i--)
+ if (used_locations & (1 << i))
+ {
+ prog->AttributeSlots = i + 1;
+ break;
+ }
return true;
}
@@ -1385,15 +1392,16 @@ demote_shader_inputs_and_outputs(gl_shader *sh, enum ir_variable_mode mode)
}
}
-#define VertexOutputOffset(FIELD) (offsetof(VertexOutput,FIELD)/sizeof(Vector4))
-
void
assign_varying_locations(struct gl_shader_program *prog,
gl_shader *producer, gl_shader *consumer)
{
+ prog->VaryingSlots = 0;
+ prog->UsesFragCoord = false;
+ prog->UsesPointCoord = false;
/* FINISHME: Set dynamically when geometry shader support is added. */
- unsigned output_index = VertexOutputOffset(varyings); /*VERT_RESULT_VAR0*/;
- unsigned input_index = VertexOutputOffset(varyings);
+ unsigned output_index = offsetof(VertexOutput,varyings) / sizeof(Vector4); /*VERT_RESULT_VAR0*/;
+ unsigned input_index = offsetof(VertexOutput,varyings) / sizeof(Vector4);
/* Operate in a total of three passes.
*
@@ -1410,9 +1418,9 @@ assign_varying_locations(struct gl_shader_program *prog,
if (!var || ir_var_out != var->mode)
continue;
if (!strcmp("gl_Position", var->name))
- var->location = VertexOutputOffset(position);
+ var->location = offsetof(VertexOutput,position) / sizeof(Vector4);
else if (!strcmp("gl_PointSize", var->name))
- var->location = VertexOutputOffset(pointSize);
+ var->location = offsetof(VertexOutput,pointSize) / sizeof(Vector4);
else
var->location = -1;
}
@@ -1421,11 +1429,17 @@ assign_varying_locations(struct gl_shader_program *prog,
if (!var || ir_var_in != var->mode)
continue;
if (!strcmp("gl_FragCoord", var->name))
- var->location = VertexOutputOffset(position);
+ {
+ var->location = offsetof(VertexOutput,position)/sizeof(Vector4);
+ prog->UsesFragCoord = true;
+ }
else if (!strcmp("gl_FrontFacing", var->name))
- var->location = VertexOutputOffset(frontFacingPointCoord);
+ var->location = offsetof(VertexOutput,frontFacingPointCoord)/sizeof(Vector4);
else if (!strcmp("gl_PointCoord", var->name))
- var->location = VertexOutputOffset(frontFacingPointCoord);
+ {
+ var->location = offsetof(VertexOutput,frontFacingPointCoord)/sizeof(Vector4);
+ prog->UsesPointCoord = true;
+ }
else
var->location = -1;
}
@@ -1465,11 +1479,13 @@ assign_varying_locations(struct gl_shader_program *prog,
output_index += slots;
input_index += slots;
+ prog->VaryingSlots += slots;
} else {
const unsigned slots = output_var->type->matrix_columns;
output_index += slots;
input_index += slots;
+ prog->VaryingSlots += slots;
}
}
@@ -1717,7 +1733,7 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
int paramIndex = _mesa_get_parameter(prog->Varying, var->name);
if (0 > paramIndex)
paramIndex = _mesa_add_parameter(prog->Varying, var->name);
- var->location= VertexOutputOffset(fragColor);
+ var->location= offsetof(VertexOutput,fragColor)/sizeof(Vector4);
prog->Varying->Parameters[paramIndex].Location = var->location;
}
else
@@ -1745,6 +1761,4 @@ done:
}
//hieralloc_free(mem_ctx);
-}
-
-#undef VertexOutputOffset
+} \ No newline at end of file
diff --git a/src/glsl/main.cpp b/src/glsl/main.cpp
index 2ce7ed6..c131021 100644
--- a/src/glsl/main.cpp
+++ b/src/glsl/main.cpp
@@ -190,6 +190,7 @@ compile_shader(struct gl_context *ctx, struct gl_shader *shader)
return;
}
+#if 1 // build executable
#define DRAW_TO_SCREEN 1
#include "image_file.h"
@@ -216,7 +217,11 @@ void execute(const GGLContext * ctx)
//const unsigned scale = 16, portWidth = 80, portHeight = 50;
unsigned scale = 1, portWidth = width / scale, portHeight = height / scale;
//unsigned scale = 1, portWidth = width / 4, portHeight = height / 4;
-
+
+ GGLSurface colorSurface = {width, height, GGL_PIXEL_FORMAT_RGBA_8888, frameSurface, width, sizeof(GGLSurface)};
+ ggl->SetBuffer(ggl, GL_COLOR_BUFFER_BIT, &colorSurface);
+ ggl->EnableDisable(ggl, GL_DEPTH_TEST, false);
+
float * uniform = (float *)ctx->glCtx->CurrentProgram->ValuesUniform;
float * attribute = (float *)ctx->glCtx->CurrentProgram->ValuesVertexInput;
float * varying = (float *)ctx->glCtx->CurrentProgram->ValuesVertexOutput;
@@ -258,36 +263,42 @@ void execute(const GGLContext * ctx)
clock_t c0 = clock();
//while(true)
- for (frames = 1; frames <= 10; frames++)
+ for (frames = 1; frames <= 20; frames++)
{
- for (unsigned y = 0; y < portHeight; y++)
- for (unsigned x = 0; x < portWidth; x++) {
- if (vTexCoordLocation > -1)
- {
- varying[vTexCoordLocation * 4 + 0] = ((float)x) / (portWidth - 1);
- varying[vTexCoordLocation * 4 + 1] = ((float)y) / (portHeight - 1);
- varying[vTexCoordLocation * 4 + 2] = 0;
- varying[vTexCoordLocation * 4 + 3] = 1;
- }
- if (vNormalLocation > -1)
- {
- varying[vNormalLocation * 4 + 0] = 0;
- varying[vNormalLocation * 4 + 1] = 1;
- varying[vNormalLocation * 4 + 2] = 0;
- varying[vNormalLocation * 4 + 3] = 1;
- }
- shader->function();
- unsigned r = output[0] * 255;
- unsigned g = output[1] * 255;
- unsigned b = output[2] * 255;
- unsigned a = output[3] * 255;
-// unsigned r = *(unsigned *)(outputs + 0);
-// unsigned g = *(unsigned *)(outputs + 1);
-// unsigned b = *(unsigned *)(outputs + 2);
-// unsigned a = *(unsigned *)(outputs + 3);
- frameSurface[y * width + x] = (a << 24) | (b << 16) | (g << 8) | r;
-// frameSurface[y * width + x] = *(unsigned *)outputs;
+ for (unsigned y = 0; y < portHeight; y++) {
+ VertexOutput v0, v1;
+ v0.position = Vector4(0, y, 0, 0);
+ v1.position = Vector4(portWidth - 1, y ,0 ,0);
+ if (vTexCoordLocation > -1)
+ {
+ v0.varyings[vTexCoordLocation - 2] = Vector4(0, (float)y / (portHeight - 1), 0, 1);
+ v1.varyings[vTexCoordLocation - 2] = Vector4(1, (float)y / (portHeight - 1), 0, 1);
}
+ ggl->ScanLine(ggl, &v0, &v1);
+
+// for (unsigned x = 0; x < portWidth; x++) {
+// if (vTexCoordLocation > -1)
+// {
+// varying[vTexCoordLocation * 4 + 0] = ((float)x) / (portWidth - 1);
+// varying[vTexCoordLocation * 4 + 1] = ((float)y) / (portHeight - 1);
+// varying[vTexCoordLocation * 4 + 2] = 0;
+// varying[vTexCoordLocation * 4 + 3] = 1;
+// }
+//// if (vNormalLocation > -1)
+//// {
+//// varying[vNormalLocation * 4 + 0] = 0;
+//// varying[vNormalLocation * 4 + 1] = 1;
+//// varying[vNormalLocation * 4 + 2] = 0;
+//// varying[vNormalLocation * 4 + 3] = 1;
+//// }
+// shader->function();
+// unsigned r = output[0] * 255;
+// unsigned g = output[1] * 255;
+// unsigned b = output[2] * 255;
+// unsigned a = output[3] * 255;
+// frameSurface[y * width + x] = (a << 24) | (b << 16) | (g << 8) | r;
+// }
+ }
//*
if (scale > 1)
for (int y = portHeight - 1; y >= 0; y--)
@@ -301,6 +312,8 @@ void execute(const GGLContext * ctx)
//*/
#if defined __arm__ && DRAW_TO_SCREEN
frameSurface = (unsigned *)PresentDrawingSurface();
+ colorSurface.data = frameSurface;
+ ggl->SetBuffer(ggl, GL_COLOR_BUFFER_BIT, &colorSurface);
#endif
}
@@ -308,12 +321,12 @@ void execute(const GGLContext * ctx)
printf ("\n *** test_scan elapsed CPU time: %fs \n *** fps=%.2f, tpf=%.2fms \n",
elapsed, frames / elapsed, elapsed / frames * 1000);
printf("gl_FragColor=%.2f, %.2f, %.2f %.2f \n", output[0], output[1], output[2], output[3]);
- //assert(0.1f < outputs[3]);
#if defined __arm__
SaveBMP("/sdcard/mesa.bmp", frameSurface, width, height);
#else
SaveBMP("mesa.bmp", frameSurface, width, height);
#endif
+ assert(0.1f < output[3]);
#if DRAW_TO_SCREEN
void DisposeDrawingSurface();
#else
@@ -480,3 +493,5 @@ main(int argc, char **argv)
hieralloc_report_brief(NULL, stdout);
return status;
}
+
+#endif // build executable
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index b6eba89..5614907 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -2078,12 +2078,13 @@ struct gl_shader
struct exec_list *ir;
struct glsl_symbol_table *symbols;
- struct Executable * executable;
- void (*function)(); /**< the active function */
- unsigned SamplersUsed; /**< bitfield of samplers used by shader */
/** Shaders containing built-in functions that are used for linking. */
struct gl_shader *builtins_to_link[16];
unsigned num_builtins_to_link;
+
+ struct Executable * executable;
+ void (*function)(); /**< the active function */
+ unsigned SamplersUsed; /**< bitfield of samplers used by shader */
};
@@ -2143,6 +2144,10 @@ struct gl_shader_program
GLfloat (*ValuesVertexInput)[4]; /**< actually a VertexInput */
GLfloat (*ValuesVertexOutput)[4]; /**< actually a VertexOutput */
void * InputOuputBase; /**< allocation base for Values* */
+
+ unsigned AttributeSlots;/**< [0,AttributeSlots-1] read by vertex shader */
+ unsigned VaryingSlots; /**< [0,VaryingSlots-1] read by fragment shader */
+ unsigned UsesFragCoord : 1, UsesPointCoord : 1;
};
diff --git a/src/pixelflinger2/llvm_helper.h b/src/pixelflinger2/llvm_helper.h
new file mode 100644
index 0000000..3ad083b
--- /dev/null
+++ b/src/pixelflinger2/llvm_helper.h
@@ -0,0 +1,218 @@
+/**
+ **
+ ** Copyright 2011, The Android Open Source Project
+ **
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ **
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ **
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ */
+
+#ifndef _PIXELFLINGER2_LLVM_HELPER_H_
+#define _PIXELFLINGER2_LLVM_HELPER_H_
+
+#include <stack>
+#include <stdio.h>
+#include <llvm/Support/IRBuilder.h>
+
+using namespace llvm;
+
+static const char * name(const char * str)
+{
+ return str;
+}
+
+static Value * minIntScalar(IRBuilder<> &builder, Value * in1, Value * in2)
+{
+ Value * cmp = builder.CreateICmpSLT(in1, in2);
+ return builder.CreateSelect(cmp, in1, in2);
+}
+
+static Value * maxIntScalar(IRBuilder<> &builder, Value * in1, Value * in2)
+{
+ Value * cmp = builder.CreateICmpSGT(in1, in2);
+ return builder.CreateSelect(cmp, in1, in2);
+}
+
+static Constant * constFloat(IRBuilder<> & builder, float x)
+{
+ return ConstantFP::get(builder.getContext(), APFloat(x));
+}
+
+static VectorType * intVecType(IRBuilder<> & builder)
+{
+ return VectorType::get(Type::getInt32Ty(builder.getContext()), 4);
+}
+
+static VectorType * floatVecType(IRBuilder<> & builder)
+{
+ return VectorType::get(Type::getFloatTy(builder.getContext()), 4);
+}
+
+static Value * constIntVec(IRBuilder<> & builder, int x, int y, int z, int w)
+{
+ std::vector<Constant *> vec(4);
+ vec[0] = builder.getInt32(x);
+ vec[1] = builder.getInt32(y);
+ vec[2] = builder.getInt32(z);
+ vec[3] = builder.getInt32(w);
+ return ConstantVector::get(intVecType(builder), vec);
+}
+
+static Value * intVec(IRBuilder<> & builder, Value * x, Value * y, Value * z, Value * w)
+{
+ Value * res = Constant::getNullValue(intVecType(builder));
+ res = builder.CreateInsertElement(res, x, builder.getInt32(0), name("vecx"));
+ res = builder.CreateInsertElement(res, y, builder.getInt32(1), name("vecy"));
+ res = builder.CreateInsertElement(res, z, builder.getInt32(2), name("vecz"));
+ if (w)
+ res = builder.CreateInsertElement(res, w, builder.getInt32(3), name("vecw"));
+ return res;
+}
+
+static Value * constFloatVec(IRBuilder<> & builder, float x, float y, float z, float w)
+{
+ std::vector<Constant *> vec(4);
+ vec[0] = constFloat(builder, x);
+ vec[1] = constFloat(builder, y);
+ vec[2] = constFloat(builder, z);
+ vec[3] = constFloat(builder, w);
+ return ConstantVector::get(floatVecType(builder), vec);
+}
+
+static std::vector<Value *> extractVector(IRBuilder<> & builder, Value *vec)
+{
+ std::vector<Value*> elems(4);
+ elems[0] = builder.CreateExtractElement(vec, builder.getInt32(0), name("x"));
+ elems[1] = builder.CreateExtractElement(vec, builder.getInt32(1), name("y"));
+ elems[2] = builder.CreateExtractElement(vec, builder.getInt32(2), name("z"));
+ elems[3] = builder.CreateExtractElement(vec, builder.getInt32(3), name("w"));
+ return elems;
+}
+
+static Value * intVecMax(IRBuilder<> & builder, Value * in1, Value * in2)
+{
+ std::vector<Value *> vec1 = extractVector(builder, in1);
+ std::vector<Value *> vec2 = extractVector(builder, in2);
+ for (unsigned i = 0; i < 4; i++) {
+ Value * cmp = builder.CreateICmpSGT(vec1[i], vec2[i], name("iVecSelCmp"));
+ vec1[i] = builder.CreateSelect(cmp, vec1[i], vec2[i], name("iVecSel"));
+ }
+ return intVec(builder, vec1[0], vec1[1], vec1[2], vec1[3]);
+}
+
+static Value * intVecMin(IRBuilder<> & builder, Value * in1, Value * in2)
+{
+ std::vector<Value *> vec1 = extractVector(builder, in1);
+ std::vector<Value *> vec2 = extractVector(builder, in2);
+ for (unsigned i = 0; i < 4; i++) {
+ Value * cmp = builder.CreateICmpSLT(vec1[i], vec2[i], name("iVecSelCmp"));
+ vec1[i] = builder.CreateSelect(cmp, vec1[i], vec2[i], name("iVecSel"));
+ }
+ return intVec(builder, vec1[0], vec1[1], vec1[2], vec1[3]);
+}
+
+// <4 x i32> [0, 255] to <4 x float> [0.0, 1.0]
+static Value * intColorVecToFloatColorVec(IRBuilder<> & builder, Value * vec)
+{
+ vec = builder.CreateUIToFP(vec, floatVecType(builder));
+ return builder.CreateFMul(vec, constFloatVec(builder, 1 / 255.0f, 1 / 255.0f,
+ 1 / 255.0f, 1 / 255.0f));
+}
+
+class CondBranch
+{
+ IRBuilder<> & m_builder;
+ std::stack<BasicBlock *> m_ifStack;
+
+ struct Loop {
+ BasicBlock *begin;
+ BasicBlock *end;
+ };
+ std::stack<Loop> m_loopStack;
+
+ CondBranch();
+
+public:
+ CondBranch(IRBuilder<> & builder) : m_builder(builder) {}
+ ~CondBranch() {
+ assert(m_ifStack.empty());
+ assert(m_loopStack.empty());
+ }
+
+ void ifCond(Value * cmp, const char * trueBlock = "ifT", const char * falseBlock = "ifF") {
+ Function * function = m_builder.GetInsertBlock()->getParent();
+ BasicBlock * ifthen = BasicBlock::Create(m_builder.getContext(), name(trueBlock), function, NULL);
+ BasicBlock * ifend = BasicBlock::Create(m_builder.getContext(), name(falseBlock), function, NULL);
+ m_builder.CreateCondBr(cmp, ifthen, ifend);
+ m_builder.SetInsertPoint(ifthen);
+ m_ifStack.push(ifend);
+ }
+
+ void elseop() {
+ assert(!m_ifStack.empty());
+ BasicBlock *ifend = BasicBlock::Create(m_builder.getContext(), name("else_end"), m_builder.GetInsertBlock()->getParent(),0);
+ if (!m_builder.GetInsertBlock()->getTerminator()) // ret void is a block terminator
+ m_builder.CreateBr(ifend); // branch is also a block terminator
+ else {
+ debug_printf("Instructions::elseop block alread has terminator \n");
+ m_builder.GetInsertBlock()->getTerminator()->dump();
+ assert(0);
+ }
+ m_builder.SetInsertPoint(m_ifStack.top());
+ m_builder.GetInsertBlock()->setName(name("else_then"));
+ m_ifStack.pop();
+ m_ifStack.push(ifend);
+ }
+
+ void endif() {
+ assert(!m_ifStack.empty());
+ if (!m_builder.GetInsertBlock()->getTerminator()) // ret void is a block terminator
+ m_builder.CreateBr(m_ifStack.top()); // branch is also a block terminator
+ else {
+ debug_printf("Instructions::endif block alread has terminator");
+ m_builder.GetInsertBlock()->getTerminator()->dump();
+ assert(0);
+ }
+ m_builder.SetInsertPoint(m_ifStack.top());
+ m_ifStack.pop();
+ }
+
+ void beginLoop() {
+ Function * function = m_builder.GetInsertBlock()->getParent();
+ BasicBlock *begin = BasicBlock::Create(m_builder.getContext(), name("loop"), function,0);
+ BasicBlock *end = BasicBlock::Create(m_builder.getContext(), name("endloop"), function,0);
+
+ m_builder.CreateBr(begin);
+ Loop loop;
+ loop.begin = begin;
+ loop.end = end;
+ m_builder.SetInsertPoint(begin);
+ m_loopStack.push(loop);
+ }
+
+ void endLoop() {
+ assert(!m_loopStack.empty());
+ Loop loop = m_loopStack.top();
+ m_builder.CreateBr(loop.begin);
+ loop.end->moveAfter(m_builder.GetInsertBlock());
+ m_builder.SetInsertPoint(loop.end);
+ m_loopStack.pop();
+ }
+
+ void brk() {
+ assert(!m_loopStack.empty());
+ BasicBlock *unr = BasicBlock::Create(m_builder.getContext(), name("unreachable"), m_builder.GetInsertBlock()->getParent(),0);
+ m_builder.CreateBr(m_loopStack.top().end);
+ m_builder.SetInsertPoint(unr);
+ }
+};
+
+#endif // #ifndef _PIXELFLINGER2_LLVM_HELPER_H_
diff --git a/src/pixelflinger2/llvm_scanline.cpp b/src/pixelflinger2/llvm_scanline.cpp
new file mode 100644
index 0000000..461f8a8
--- /dev/null
+++ b/src/pixelflinger2/llvm_scanline.cpp
@@ -0,0 +1,687 @@
+/**
+ **
+ ** Copyright 2011, The Android Open Source Project
+ **
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ **
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ **
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ */
+
+#include "src/pixelflinger2/pixelflinger2.h"
+#include "src/pixelflinger2/llvm_helper.h"
+#include "src/mesa/main/mtypes.h"
+
+#include <llvm/Module.h>
+
+using namespace llvm;
+
+static void StencilOp(IRBuilder<> &builder, const unsigned char op,
+ Value * sPtr, Value * sRef)
+{
+ CondBranch condBranch(builder);
+ Value * s = builder.CreateLoad(sPtr, "stenciOpS");
+ switch (op) {
+ case 0 : // GL_ZERO
+ builder.CreateStore(builder.getInt8(0), sPtr);
+ break;
+ case 1 : // GL_KEEP
+ builder.CreateStore(s, sPtr);
+ break;
+ case 2 : // GL_REPLACE
+ builder.CreateStore(sRef, sPtr);
+ break;
+ case 3 : // GL_INCR
+ condBranch.ifCond(builder.CreateICmpEQ(s, builder.getInt8(255)));
+ builder.CreateStore(s, sPtr);
+ condBranch.elseop();
+ builder.CreateStore(builder.CreateAdd(s, builder.getInt8(1)), sPtr);
+ condBranch.endif();
+ break;
+ case 4 : // GL_DECR
+ condBranch.ifCond(builder.CreateICmpEQ(s, builder.getInt8(0)));
+ builder.CreateStore(s, sPtr);
+ condBranch.elseop();
+ builder.CreateStore(builder.CreateSub(s, builder.getInt8(1)), sPtr);
+ condBranch.endif();
+ break;
+ case 5 : // GL_INVERT
+ builder.CreateStore(builder.CreateNot(s), sPtr);
+ break;
+ case 6 : // GL_INCR_WRAP
+ builder.CreateStore(builder.CreateAdd(s, builder.getInt8(1)), sPtr);
+ break;
+ case 7 : // GL_DECR_WRAP
+ builder.CreateStore(builder.CreateSub(s, builder.getInt8(1)), sPtr);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+}
+
+static Value * StencilOp(IRBuilder<> & builder, Value * face,
+ const unsigned char frontOp, const unsigned char backOp,
+ Value * sPtr, Value * sRef)
+{
+ CondBranch condBranch(builder);
+ if (frontOp != backOp)
+ condBranch.ifCond(builder.CreateICmpEQ(face, builder.getInt8(0)));
+
+ StencilOp(builder, frontOp, sPtr, sRef);
+
+ if (frontOp != backOp) {
+ condBranch.elseop();
+ StencilOp(builder, backOp, sPtr, sRef);
+ condBranch.endif();
+ }
+ return builder.CreateLoad(sPtr);
+}
+
+static void StencilFunc(IRBuilder<> & builder, const unsigned char func,
+ Value * s, Value * sRef, Value * sCmpPtr)
+{
+ switch (func) {
+ case GL_NEVER & 0x7:
+ builder.CreateStore(builder.getFalse(), sCmpPtr);
+ break;
+ case GL_LESS & 0x7:
+ builder.CreateStore(builder.CreateICmpULT(sRef, s), sCmpPtr);
+ break;
+ case GL_EQUAL & 0x7:
+ builder.CreateStore(builder.CreateICmpEQ(sRef, s), sCmpPtr);
+ break;
+ case GL_LEQUAL & 0x7:
+ builder.CreateStore(builder.CreateICmpULE(sRef, s), sCmpPtr);
+ break;
+ case GL_GREATER & 0x7:
+ builder.CreateStore(builder.CreateICmpUGT(sRef, s), sCmpPtr);
+ break;
+ case GL_NOTEQUAL & 0x7:
+ builder.CreateStore(builder.CreateICmpNE(sRef, s), sCmpPtr);
+ break;
+ case GL_GEQUAL & 0x7:
+ builder.CreateStore(builder.CreateICmpUGE(sRef, s), sCmpPtr);
+ break;
+ case GL_ALWAYS & 0x7:
+ builder.CreateStore(builder.getTrue(), sCmpPtr);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+}
+
+static Value * BlendFactor(const unsigned mode, Value * src, Value * dst,
+ Value * constant, Value * one, Value * zero,
+ Value * srcA, Value * dstA, Value * constantA,
+ Value * sOne, const bool isVector, IRBuilder<> & builder)
+{
+ Value * factor = NULL;
+ switch (mode) {
+ case 0: // GL_ZERO
+ factor = zero;
+ break;
+ case 1: // GL_ONE
+ factor = one;
+ break;
+ case 2: // GL_SRC_COLOR:
+ factor = src;
+ break;
+ case 3: // GL_ONE_MINUS_SRC_COLOR:
+ factor = builder.CreateSub(one, src);
+ break;
+ case 4: // GL_DST_COLOR:
+ factor = dst;
+ break;
+ case 5: // GL_ONE_MINUS_DST_COLOR:
+ factor = builder.CreateSub(one, dst);
+ break;
+ case 6: // GL_SRC_ALPHA:
+ factor = srcA;
+ if (isVector)
+ factor = intVec(builder, factor, factor, factor, factor);
+ break;
+ case 7: // GL_ONE_MINUS_SRC_ALPHA:
+ factor = builder.CreateSub(sOne, srcA);
+ if (isVector)
+ factor = intVec(builder, factor, factor, factor, factor);
+ break;
+ case 8: // GL_DST_ALPHA:
+ factor = dstA;
+ if (isVector)
+ factor = intVec(builder, factor, factor, factor, factor);
+ break;
+ case 9: // GL_ONE_MINUS_DST_ALPHA:
+ factor = builder.CreateSub(sOne, dstA);
+ if (isVector)
+ factor = intVec(builder, factor, factor, factor, factor);
+ break;
+ case 10: // GL_SRC_ALPHA_SATURATE: // valid only for source color and alpha
+ factor = minIntScalar(builder, srcA, builder.CreateSub(sOne, dstA));
+ if (isVector)
+ factor = intVec(builder, factor, factor, factor, sOne);
+ else
+ factor = sOne; // when it's used for source alpha, it's just 1
+ break;
+ case 11: // GL_CONSTANT_COLOR:
+ factor = constant;
+ break;
+ case 12: // GL_ONE_MINUS_CONSTANT_COLOR:
+ factor = builder.CreateSub(one, constant);
+ break;
+ case 13: // GL_CONSTANT_ALPHA:
+ factor = constantA;
+ if (isVector)
+ factor = intVec(builder, factor, factor, factor, factor);
+ break;
+ case 14: // GL_ONE_MINUS_CONSTANT_ALPHA:
+ factor = builder.CreateSub(sOne, constantA);
+ if (isVector)
+ factor = intVec(builder, factor, factor, factor, factor);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ return factor;
+}
+
+static Value * Saturate(IRBuilder<> & builder, Value * intVector)
+{
+ intVector = intVecMax(builder, intVector, constIntVec(builder, 0,0,0,0));
+ return intVecMin(builder, intVector, constIntVec(builder, 255,255,255,255));
+}
+
+// src is int32x4 [0,255] rgba vector, and combines them into int32
+static Value * IntVectorToColor(IRBuilder<> & builder, Value * src)
+{
+ //src = builder.CreateBitCast(src, inst->GetIntVectorType());
+ src = builder.CreateShl(src, constIntVec(builder, 0, 8, 16, 24));
+ std::vector<Value *> comps = extractVector(builder, src);
+ comps[0] = builder.CreateOr(comps[0], comps[1]);
+ comps[0] = builder.CreateOr(comps[0], comps[2]);
+ comps[0] = builder.CreateOr(comps[0], comps[3]);
+ return comps[0];
+}
+
+// src is <4 x float> approx [0,1]; dst is <4 x i32> [0,255] from frame buffer; return is i32
+Value * GenerateFSBlend(const GGLContext * gglCtx, /*const RegDesc * regDesc,*/
+ IRBuilder<> & builder, Value * src, Value * dst)
+{
+ const Type * const intType = Type::getInt32Ty(*gglCtx->llvmCtx);
+
+ // TODO cast the outputs pointer type to int for writing to minimize bandwidth
+ if (!gglCtx->blendState.enable) {
+// if (regDesc->IsInt32Color())
+// {
+// debug_printf("GenerateFixedFS dst is already scalar fixed0 \n");
+// src = builder.CreateExtractElement(src, builder.getInt32(0));
+// src = builder.CreateBitCast(src, intType); // it's already RGBA int32
+// }
+// else if (regDesc->IsVectorType(Float))
+// {
+ src = builder.CreateFMul(src, constFloatVec(builder,255,255,255,255));
+ src = builder.CreateFPToUI(src, intVecType(builder));
+ src = Saturate(builder, src);
+ src = IntVectorToColor(builder, src);
+// }
+// else if (regDesc->IsVectorType(Fixed8))
+// {
+// src = builder.CreateBitCast(src, instr->GetIntVectorType());
+// src = Saturate(instr, src);
+// src = IntVectorToColor(instr, storage, src);
+// }
+// else if (regDesc->IsVectorType(Fixed16))
+// {
+// src = builder.CreateBitCast(src, instr->GetIntVectorType());
+// src = builder.CreateAShr(src, constIntVec(builder,8,8,8,8));
+// src = Saturate(instr, src);
+// src = IntVectorToColor(instr, storage, src);
+// }
+// else
+// assert(0);
+ return src;
+ }
+
+ // blending, so convert src to <4 x i32>
+// if (regDesc->IsInt32Color())
+// {
+// src = builder.CreateExtractElement(src, builder.getInt32(0));
+// src = builder.CreateBitCast(src, intType); // it's already RGBA int32
+//
+// Value * channels = Constant::getNullValue(instr->GetIntVectorType());
+// channels = builder.CreateInsertElement(channels, src, builder.getInt32(0));
+// channels = builder.CreateInsertElement(channels, src, builder.getInt32(1));
+// channels = builder.CreateInsertElement(channels, src, builder.getInt32(2));
+// channels = builder.CreateInsertElement(channels, src, builder.getInt32(3));
+// channels = builder.CreateLShr(channels, constIntVec(builder,0, 8, 16, 24));
+// channels = builder.CreateAnd(channels, constIntVec(builder,0xff, 0xff, 0xff, 0xff));
+// src = channels;
+// }
+// else if (regDesc->IsVectorType(Fixed8)) // it's already int32x4 RGBA
+// src = builder.CreateBitCast(src, instr->GetIntVectorType());
+// else if (regDesc->IsVectorType(Fixed16))
+// {
+// src = builder.CreateBitCast(src, instr->GetIntVectorType());
+// // TODO DXL consider shl dst by 8 and ashr by 16 in the end for more precision
+// src = builder.CreateAShr(src, constIntVec(builder,8,8,8,8));
+// }
+// else if (regDesc->IsVectorType(Float))
+// {
+ src = builder.CreateFMul(src, constFloatVec(builder,255,255,255,255));
+ src = builder.CreateFPToSI(src, intVecType(builder));
+// }
+// else
+// assert(0);
+
+ Value * const one = constIntVec(builder,255,255,255,255);
+ Value * const zero = constIntVec(builder,0,0,0,0);
+ Value * const sOne = builder.getInt32(255);
+ Value * const sZero = builder.getInt32(0);
+
+#if USE_LLVM_SCANLINE
+ Value * constant = constIntVec(builder,gglCtx->blendState.color[0],
+ gglCtx->blendState.color[1],
+ gglCtx->blendState.color[2],
+ gglCtx->blendState.color[3]);
+#else
+ Value * constant = NULL;
+ assert(0);
+#endif
+
+ Value * srcA = extractVector(builder,src)[3];
+ Value * dstA = extractVector(builder,dst)[3];
+ Value * constantA = extractVector(builder,constant)[3];
+
+ Value * sf = BlendFactor(gglCtx->blendState.scf, src, dst,
+ constant, one, zero, srcA, dstA,
+ constantA, sOne, true, builder);
+ if (gglCtx->blendState.scf != gglCtx->blendState.saf) {
+ Value * sfA = BlendFactor(gglCtx->blendState.saf, srcA, dstA,
+ constantA, sOne, sZero, srcA, dstA,
+ constantA, sOne, false, builder);
+ sf = builder.CreateInsertElement(sf, sfA, builder.getInt32(3),
+ name("sfAStore"));
+ }
+
+ Value * df = BlendFactor(gglCtx->blendState.dcf, src, dst,
+ constant, one, zero, srcA, dstA,
+ constantA, sOne, true, builder);
+ if (gglCtx->blendState.dcf != gglCtx->blendState.daf) {
+ Value * dfA = BlendFactor(gglCtx->blendState.daf, srcA, dstA,
+ constantA, sOne, sZero, srcA, dstA,
+ constantA, sOne, false, builder);
+ df = builder.CreateInsertElement(df, dfA, builder.getInt32(3),
+ name("dfAStore"));
+ }
+
+ // this is factor *= 256 / 255; factors have a chance of constant folding
+ sf = builder.CreateAdd(sf, builder.CreateLShr(sf, constIntVec(builder,7,7,7,7)));
+ df = builder.CreateAdd(df, builder.CreateLShr(df, constIntVec(builder,7,7,7,7)));
+
+ src = builder.CreateMul(src, sf);
+ dst = builder.CreateMul(dst, df);
+
+ Value * res = NULL;
+ switch (gglCtx->blendState.ce + GL_FUNC_ADD) {
+ case GL_FUNC_ADD:
+ res = builder.CreateAdd(src, dst);
+ break;
+ case GL_FUNC_SUBTRACT:
+ res = builder.CreateSub(src, dst);
+ break;
+ case GL_FUNC_REVERSE_SUBTRACT:
+ res = builder.CreateSub(dst, src);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ if (gglCtx->blendState.ce != gglCtx->blendState.ae) {
+ srcA = extractVector(builder,src)[3];
+ dstA = extractVector(builder,dst)[3];
+ Value * resA = NULL;
+ switch (gglCtx->blendState.ce + GL_FUNC_ADD) {
+ case GL_FUNC_ADD:
+ resA = builder.CreateAdd(srcA, dstA);
+ break;
+ case GL_FUNC_SUBTRACT:
+ resA = builder.CreateSub(srcA, dstA);
+ break;
+ case GL_FUNC_REVERSE_SUBTRACT:
+ resA = builder.CreateSub(dstA, srcA);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ res = builder.CreateInsertElement(res, resA, builder.getInt32(3),
+ name("resAStore"));
+ }
+
+ res = builder.CreateAShr(res, constIntVec(builder,8,8,8,8));
+ res = Saturate(builder, res);
+ res = IntVectorToColor(builder, res);
+ return res;
+}
+
+static FunctionType * ScanLineFunctionType(IRBuilder<> & builder)
+{
+ std::vector<const Type*> funcArgs;
+ const VectorType * vectorType = floatVecType(builder);
+ const PointerType * vectorPtr = PointerType::get(vectorType, 0);
+ const Type * intType = builder.getInt32Ty();
+ const PointerType * intPointerType = PointerType::get(intType, 0);
+ const PointerType * bytePointerType = PointerType::get(builder.getInt8Ty(), 0);
+
+ funcArgs.push_back(vectorPtr); // start
+ funcArgs.push_back(vectorPtr); // step
+ funcArgs.push_back(intPointerType); // frame
+ funcArgs.push_back(intPointerType); // depth
+ funcArgs.push_back(bytePointerType); // stencil
+ funcArgs.push_back(bytePointerType); // stencil state
+ funcArgs.push_back(intType); // count
+
+ FunctionType *functionType = FunctionType::get(/*Result=*/builder.getVoidTy(),
+ /*Params=*/funcArgs,
+ /*isVarArg=*/false);
+
+ return functionType;
+}
+
+// generated scanline function parameters are VertexOutput * start, VertexOutput * step,
+// unsigned * frame, int * depth, unsigned char * stencil,
+// ActiveStencilState * stencilState, unsigned count
+void GenerateScanLine(const GGLContext * gglCtx, const gl_shader_program * program, Module * mod,
+ const char * shaderName, const char * scanlineName)
+{
+ IRBuilder<> builder(mod->getContext());
+ debug_printf("GenerateScanLine %s \n", scanlineName);
+
+ const Type * intType = Type::getInt32Ty(*gglCtx->llvmCtx);
+ const PointerType * intPointerType = PointerType::get(intType, 0);
+ const Type * byteType = Type::getInt8Ty(*gglCtx->llvmCtx);
+ const PointerType * bytePointerType = PointerType::get(byteType, 0);
+
+ Function * func = mod->getFunction(scanlineName);
+ if (func)
+ return;
+
+ func = llvm::cast<Function>(mod->getOrInsertFunction(scanlineName,
+ ScanLineFunctionType(builder)));
+
+ BasicBlock *label_entry = BasicBlock::Create(builder.getContext(), "entry", func, 0);
+ builder.SetInsertPoint(label_entry);
+ CondBranch condBranch(builder);
+
+ Function::arg_iterator args = func->arg_begin();
+ Value * start = args++;
+ start->setName("start");
+ Value * step = args++;
+ step->setName("step");
+
+ // need alloc to be able to assign to it by using store
+ Value * framePtr = builder.CreateAlloca(intPointerType);
+ builder.CreateStore(args++, framePtr);
+ Value * depthPtr = builder.CreateAlloca(intPointerType);
+ builder.CreateStore(args++, depthPtr);
+ Value * stencilPtr = builder.CreateAlloca(bytePointerType);
+ builder.CreateStore(args++, stencilPtr);
+ Value * stencilState = args++;
+ stencilState->setName("stencilState");
+ Value * countPtr = builder.CreateAlloca(intType);
+ builder.CreateStore(args++, countPtr);
+
+ Value * sFace = NULL, * sRef = NULL, *sMask = NULL, * sFunc = NULL;
+ if (gglCtx->bufferState.stencilTest) {
+ sFace = builder.CreateLoad(builder.CreateConstInBoundsGEP1_32(stencilState, 0), "sFace");
+ if (gglCtx->frontStencil.ref == gglCtx->backStencil.ref)
+ sRef = builder.getInt8(gglCtx->frontStencil.ref);
+ else
+ sRef = builder.CreateLoad(builder.CreateConstInBoundsGEP1_32(stencilState, 1), "sRef");
+ if (gglCtx->frontStencil.mask == gglCtx->backStencil.mask)
+ sMask = builder.getInt8(gglCtx->frontStencil.mask);
+ else
+ sMask = builder.CreateLoad(builder.CreateConstInBoundsGEP1_32(stencilState, 2), "sMask");
+ if (gglCtx->frontStencil.func == gglCtx->backStencil.func)
+ sFunc = builder.getInt8(gglCtx->frontStencil.func);
+ else
+ sFunc = builder.CreateLoad(builder.CreateConstInBoundsGEP1_32(stencilState, 3), "sFunc");
+ }
+
+ condBranch.beginLoop(); // while (count > 0)
+
+ // get values
+ Value * frame = builder.CreateLoad(framePtr);
+ frame->setName("frame");
+ Value * depth = NULL, * stencil = NULL;
+ if (gglCtx->bufferState.depthTest) {
+ depth = builder.CreateLoad(depthPtr);
+ depth->setName("depth");
+ }
+
+ Value * count = builder.CreateLoad(countPtr);
+ count->setName("count");
+
+ Value * cmp = builder.CreateICmpEQ(count, builder.getInt32(0));
+ condBranch.ifCond(cmp, "if_break_loop"); // if (count == 0)
+ condBranch.brk(); // break;
+ condBranch.endif();
+
+ Value * sCmpPtr = NULL, * sCmp = NULL, * sPtr = NULL, * s = NULL;
+ if (gglCtx->bufferState.stencilTest) {
+ stencil = builder.CreateLoad(stencilPtr);
+ stencil->setName("stencil");
+
+ // temporaries to load/store value
+ sCmpPtr = builder.CreateAlloca(Type::getInt1Ty(*gglCtx->llvmCtx));
+ sCmpPtr->setName("sCmpPtr");
+ sPtr = builder.CreateAlloca(byteType);
+ sPtr->setName("sPtr");
+
+ s = builder.CreateLoad(stencil);
+ s = builder.CreateAnd(s, sMask);
+ builder.CreateStore(s, sPtr);
+
+ if (gglCtx->frontStencil.func != gglCtx->backStencil.func)
+ condBranch.ifCond(builder.CreateICmpEQ(sFace, builder.getInt8(0)));
+
+ StencilFunc(builder, gglCtx->frontStencil.func, s, sRef, sCmpPtr);
+
+ if (gglCtx->frontStencil.func != gglCtx->backStencil.func) {
+ condBranch.elseop();
+ StencilFunc(builder, gglCtx->backStencil.func, s, sRef, sCmpPtr);
+ condBranch.endif();
+ }
+
+ sCmp = builder.CreateLoad(sCmpPtr);
+ } else
+ sCmp = ConstantInt::getTrue(mod->getContext());
+ sCmp->setName("sCmp");
+
+ Value * depthZ = NULL, * zPtr = NULL, * z = NULL, * zCmp = NULL;
+ if (gglCtx->bufferState.depthTest) {
+ depthZ = builder.CreateLoad(depth, "depthZ"); // z stored in buffer
+ zPtr = builder.CreateAlloca(intType); // temp store for modifying incoming z
+ zPtr->setName("zPtr");
+
+ // modified incoming z
+ z = builder.CreateBitCast(start, intPointerType);
+ z = builder.CreateConstInBoundsGEP1_32(z, (GGL_FS_INPUT_OFFSET +
+ GGL_FS_INPUT_FRAGCOORD_INDEX) * 4 + 2);
+ z = builder.CreateLoad(z, "z");
+
+ builder.CreateStore(z, zPtr);
+
+ Value * zNegative = builder.CreateICmpSLT(z, builder.getInt32(0));
+ condBranch.ifCond(zNegative);
+ // if (0x80000000 & z) z ^= 0x7fffffff since smaller -ve float means bigger -ve int
+ z = builder.CreateXor(z, builder.getInt32(0x7fffffff));
+ builder.CreateStore(z, zPtr);
+
+ condBranch.endif();
+
+ z = builder.CreateLoad(zPtr, "z");
+
+ switch (0x200 | gglCtx->bufferState.depthFunc) {
+ case GL_NEVER:
+ zCmp = ConstantInt::getFalse(mod->getContext());
+ break;
+ case GL_LESS:
+ zCmp = builder.CreateICmpSLT(z, depthZ);
+ break;
+ case GL_EQUAL:
+ zCmp = builder.CreateICmpEQ(z, depthZ);
+ break;
+ case GL_LEQUAL:
+ zCmp = builder.CreateICmpSLE(z, depthZ);
+ break;
+ case GL_GREATER:
+ zCmp = builder.CreateICmpSGT(z, depthZ);
+ break;
+ case GL_NOTEQUAL:
+ zCmp = builder.CreateICmpNE(z, depthZ);
+ break;
+ case GL_GEQUAL:
+ zCmp = builder.CreateICmpSGE(z, depthZ);
+ break;
+ case GL_ALWAYS:
+ zCmp = ConstantInt::getTrue(mod->getContext());
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ } else // no depth test means always pass
+ zCmp = ConstantInt::getTrue(mod->getContext());
+ zCmp->setName("zCmp");
+
+ condBranch.ifCond(sCmp, "if_sCmp", "sCmp_fail");
+ condBranch.ifCond(zCmp, "if_zCmp", "zCmp_fail");
+
+ Value * fsInputs = builder.CreateConstInBoundsGEP1_32(start,
+ offsetof(VertexOutput,position)/sizeof(Vector4));
+ Value * fsOutputs = builder.CreateConstInBoundsGEP1_32(start,
+ offsetof(VertexOutput,fragColor)/sizeof(Vector4));
+
+ Function * fsFunction = mod->getFunction(shaderName);
+ assert(fsFunction);
+ CallInst *call = builder.CreateCall(fsFunction);
+ call->setCallingConv(CallingConv::C);
+ call->setTailCall(false);
+
+ Value * dst = Constant::getNullValue(intVecType(builder));
+ if (gglCtx->blendState.enable && (0 != gglCtx->blendState.dcf || 0 != gglCtx->blendState.daf)) {
+ Value * frameColor = builder.CreateLoad(frame, "frameColor");
+ dst = builder.CreateInsertElement(dst, frameColor, builder.getInt32(0));
+ dst = builder.CreateInsertElement(dst, frameColor, builder.getInt32(1));
+ dst = builder.CreateInsertElement(dst, frameColor, builder.getInt32(2));
+ dst = builder.CreateInsertElement(dst, frameColor, builder.getInt32(3));
+ dst = builder.CreateLShr(dst, constIntVec(builder, 0, 8, 16, 24));
+ dst = builder.CreateAnd(dst, constIntVec(builder, 0xff, 0xff, 0xff, 0xff));
+ }
+
+ Value * src = builder.CreateConstInBoundsGEP1_32(fsOutputs, 0);
+ src = builder.CreateLoad(src);
+
+ Value * color = GenerateFSBlend(gglCtx, /*&prog->outputRegDesc,*/ builder, src, dst);
+ builder.CreateStore(color, frame);
+
+ // TODO DXL depthmask check
+ if (gglCtx->bufferState.depthTest) {
+ z = builder.CreateBitCast(z, intType);
+ builder.CreateStore(z, depth); // store z
+ }
+
+ if (gglCtx->bufferState.stencilTest)
+ builder.CreateStore(StencilOp(builder, sFace, gglCtx->frontStencil.dPass,
+ gglCtx->backStencil.dPass, sPtr, sRef), stencil);
+
+ condBranch.elseop(); // failed z test
+
+ if (gglCtx->bufferState.stencilTest)
+ builder.CreateStore(StencilOp(builder, sFace, gglCtx->frontStencil.dFail,
+ gglCtx->backStencil.dFail, sPtr, sRef), stencil);
+
+ condBranch.endif();
+ condBranch.elseop(); // failed s test
+
+ if (gglCtx->bufferState.stencilTest)
+ builder.CreateStore(StencilOp(builder, sFace, gglCtx->frontStencil.sFail,
+ gglCtx->backStencil.sFail, sPtr, sRef), stencil);
+
+ condBranch.endif();
+
+ frame = builder.CreateConstInBoundsGEP1_32(frame, 1); // frame++
+ builder.CreateStore(frame, framePtr);
+
+ if (gglCtx->bufferState.depthTest) {
+ depth = builder.CreateConstInBoundsGEP1_32(depth, 1); // depth++
+ builder.CreateStore(depth, depthPtr);
+ }
+ if (gglCtx->bufferState.stencilTest) {
+ stencil = builder.CreateConstInBoundsGEP1_32(stencil, 1); // stencil++
+ builder.CreateStore(stencil, stencilPtr);
+ }
+
+ Value * vPtr = NULL, * v = NULL, * dx = NULL;
+ if (program->UsesFragCoord) {
+ vPtr = builder.CreateConstInBoundsGEP1_32(start, GGL_FS_INPUT_OFFSET +
+ GGL_FS_INPUT_FRAGCOORD_INDEX);
+ v = builder.CreateLoad(vPtr);
+ dx = builder.CreateConstInBoundsGEP1_32(step, GGL_FS_INPUT_OFFSET +
+ GGL_FS_INPUT_FRAGCOORD_INDEX);
+ dx = builder.CreateLoad(dx);
+ v = builder.CreateFAdd(v, dx);
+ builder.CreateStore(v, vPtr);
+ } else if (gglCtx->bufferState.depthTest) {
+ const Type * floatType = Type::getFloatTy(*gglCtx->llvmCtx);
+ const PointerType * floatPointerType = PointerType::get(floatType, 0);
+ vPtr = builder.CreateBitCast(start, floatPointerType);
+ vPtr = builder.CreateConstInBoundsGEP1_32(vPtr,
+ (GGL_FS_INPUT_OFFSET + GGL_FS_INPUT_FRAGCOORD_INDEX) * 4 + 2);
+ v = builder.CreateLoad(vPtr);
+ dx = builder.CreateBitCast(step, floatPointerType);
+ dx = builder.CreateConstInBoundsGEP1_32(dx,
+ (GGL_FS_INPUT_OFFSET + GGL_FS_INPUT_FRAGCOORD_INDEX) * 4 + 2);
+ dx = builder.CreateLoad(dx);
+ v = builder.CreateFAdd(v, dx);
+ builder.CreateStore(v, vPtr);
+ }
+
+ if (program->UsesPointCoord) {
+ vPtr = builder.CreateConstInBoundsGEP1_32(start, GGL_FS_INPUT_OFFSET +
+ GGL_FS_INPUT_FRONTFACINGPOINTCOORD_INDEX);
+ v = builder.CreateLoad(vPtr);
+ dx = builder.CreateConstInBoundsGEP1_32(step, GGL_FS_INPUT_OFFSET +
+ GGL_FS_INPUT_FRONTFACINGPOINTCOORD_INDEX);
+ dx = builder.CreateLoad(dx);
+ v = builder.CreateFAdd(v, dx);
+ builder.CreateStore(v, vPtr);
+ }
+
+ for (unsigned i = 0; i < program->VaryingSlots; ++i) {
+ vPtr = builder.CreateConstInBoundsGEP1_32(start, offsetof(VertexOutput,varyings)/sizeof(Vector4) + i);
+ v = builder.CreateLoad(vPtr);
+ dx = builder.CreateConstInBoundsGEP1_32(step, GGL_FS_INPUT_OFFSET +
+ GGL_FS_INPUT_VARYINGS_INDEX + i);
+ dx = builder.CreateLoad(dx);
+ v = builder.CreateFAdd(v, dx);
+ builder.CreateStore(v, vPtr);
+ }
+
+ count = builder.CreateSub(count, builder.getInt32(1));
+ builder.CreateStore(count, countPtr); // count--;
+
+ condBranch.endLoop();
+
+ builder.CreateRetVoid();
+}
diff --git a/src/pixelflinger2/pixelflinger2.h b/src/pixelflinger2/pixelflinger2.h
index cdc2b9c..8f8a4d5 100644
--- a/src/pixelflinger2/pixelflinger2.h
+++ b/src/pixelflinger2/pixelflinger2.h
@@ -19,7 +19,7 @@
#define _PIXELFLINGER2_H_
#define USE_LLVM_TEXTURE_SAMPLER 1
-#define USE_LLVM_SCANLINE 0
+#define USE_LLVM_SCANLINE 1
#ifndef USE_LLVM_EXECUTIONENGINE
#define USE_LLVM_EXECUTIONENGINE 0 // 1 to use llvm::Execution, 0 to use libBCC, requires modifying makefile
diff --git a/src/pixelflinger2/raster.cpp b/src/pixelflinger2/raster.cpp
index 23a5cef..19212d7 100644
--- a/src/pixelflinger2/raster.cpp
+++ b/src/pixelflinger2/raster.cpp
@@ -85,7 +85,7 @@ static void RasterTrapezoid(const GGLInterface * iface, const VertexOutput * tl,
assert(fabs(tl->position.y - tr->position.y) < 1 && fabs(bl->position.y - br->position.y) < 1);
const unsigned width = ctx->frameSurface.width, height = ctx->frameSurface.height;
- const unsigned varyingCount = 8;//ctx->glCtx->Shader.CurrentProgram->Varying->NumParameters;
+ const unsigned varyingCount = ctx->glCtx->CurrentProgram->VaryingSlots;
// tlv-trv and blv-brv are parallel and horizontal
diff --git a/src/pixelflinger2/scanline.cpp b/src/pixelflinger2/scanline.cpp
index 5ff12a6..106582a 100644
--- a/src/pixelflinger2/scanline.cpp
+++ b/src/pixelflinger2/scanline.cpp
@@ -186,7 +186,7 @@ void ScanLine(const GGLInterface * iface, const VertexOutput * v1, const VertexO
// assert(ctx->frameSurface.width == ctx->depthSurface.width);
// assert(ctx->frameSurface.height == ctx->depthSurface.height);
- const unsigned int varyingCount = 8;//ctx->glCtx->Shader.CurrentProgram->Varying->NumParameters;
+ const unsigned int varyingCount = ctx->glCtx->CurrentProgram->VaryingSlots;
const unsigned y = v1->position.y, startX = v1->position.x,
endX = v2->position.x;
@@ -198,6 +198,7 @@ void ScanLine(const GGLInterface * iface, const VertexOutput * v1, const VertexO
const VectorComp_t div = VectorComp_t_CTR(1 / (float)(endX - startX));
memcpy(ctx->glCtx->CurrentProgram->ValuesVertexOutput, v1, sizeof(*v1));
+ // shader symbols are mapped to gl_shader_program_Values*
VertexOutput & vertex(*(VertexOutput*)ctx->glCtx->CurrentProgram->ValuesVertexOutput);
VertexOutput vertexDx(*v2);
@@ -236,17 +237,13 @@ void ScanLine(const GGLInterface * iface, const VertexOutput * v1, const VertexO
#if USE_LLVM_SCANLINE
typedef void (* ScanLineFunction_t)(VertexOutput * start, VertexOutput * step,
- Vector4 * constants, unsigned * frame,
- int * depth, unsigned char * stencil,
- GGLContext::ActiveStencilState *,
- unsigned count);
+ unsigned * frame, int * depth, unsigned char * stencil,
+ GGLContext::ActiveStencilState *, unsigned count);
-// ScanLineFunction_t scanLineFunction = (ScanLineFunction_t)
-// ctx->glCtx->Shader.CurrentProgram->GLVMFP->function;
+ ScanLineFunction_t scanLineFunction = (ScanLineFunction_t)
+ ctx->glCtx->CurrentProgram->_LinkedShaders[MESA_SHADER_FRAGMENT]->function;
if (endX >= startX) {
-// scanLineFunction(&vertex, &vertexDx, (Vector4 *)
-// ctx->glCtx->Shader.CurrentProgram->FragmentProgram->Parameters->ParameterValues,
-// frame, depth, stencil, &ctx->activeStencil, endX - startX + 1);
+ scanLineFunction(&vertex, &vertexDx, frame, depth, stencil, &ctx->activeStencil, endX - startX + 1);
}
#else
@@ -304,37 +301,43 @@ void ScanLine(const GGLInterface * iface, const VertexOutput * v1, const VertexO
z = vertex.position.i[2];
if (z & 0x80000000) // negative float has leading 1
z ^= 0x7fffffff; // bigger negative is smaller
- bool zCmp = false;
- switch (0x200 | ctx->bufferState.depthFunc) {
- case GL_NEVER:
- zCmp = false;
- break;
- case GL_LESS:
- zCmp = z < *depth;
- break;
- case GL_EQUAL:
- zCmp = z == *depth;
- break;
- case GL_LEQUAL:
- zCmp = z <= *depth;
- break;
- case GL_GREATER:
- zCmp = z > *depth;
- break;
- case GL_NOTEQUAL:
- zCmp = z != *depth;
- break;
- case GL_GEQUAL:
- zCmp = z >= *depth;
- break;
- case GL_ALWAYS:
- zCmp = true;
- break;
- default:
- assert(0);
- break;
+ bool zCmp = true;
+ if (DepthTest)
+ {
+ switch (0x200 | ctx->bufferState.depthFunc) {
+ case GL_NEVER:
+ zCmp = false;
+ break;
+ case GL_LESS:
+ zCmp = z < *depth;
+ break;
+ case GL_EQUAL:
+ zCmp = z == *depth;
+ break;
+ case GL_LEQUAL:
+ zCmp = z <= *depth;
+ break;
+ case GL_GREATER:
+ zCmp = z > *depth;
+ break;
+ case GL_NOTEQUAL:
+ zCmp = z != *depth;
+ break;
+ case GL_GEQUAL:
+ zCmp = z >= *depth;
+ break;
+ case GL_ALWAYS:
+ zCmp = true;
+ break;
+ default:
+ assert(0);
+ break;
+ }
}
if (!DepthTest || zCmp) {
+ float * varying = (float *)ctx->glCtx->CurrentProgram->ValuesVertexOutput;
+
+ assert((void *)&(vertex.varyings[0]) == &(varying[2 * 4]));
ctx->glCtx->CurrentProgram->_LinkedShaders[MESA_SHADER_FRAGMENT]->function();
if (BlendEnable) {
BlendComp_t sOne = 255, sZero = 0;
@@ -478,14 +481,14 @@ void ScanLine(const GGLInterface * iface, const VertexOutput * v1, const VertexO
vertex.frontFacingPointCoord.i[3] = vertexDx.frontFacingPointCoord.i[3];
}
#else
-// if (ctx->glCtx->Shader.CurrentProgram->FragmentProgram->UsesFragCoord)
+ if (ctx->glCtx->CurrentProgram->UsesFragCoord)
vertex.position += vertexDx.position;
-// else if (ctx->bufferState.depthTest)
+ else if (ctx->bufferState.depthTest)
vertex.position.z += vertexDx.position.z;
for (unsigned i = 0; i < varyingCount; i++)
vertex.varyings[i] += vertexDx.varyings[i];
-// if (ctx->glCtx->Shader.CurrentProgram->FragmentProgram->UsesPointCoord)
+ if (ctx->glCtx->CurrentProgram->UsesPointCoord)
{
vertex.frontFacingPointCoord.z += vertexDx.frontFacingPointCoord.z;
vertex.frontFacingPointCoord.w += vertexDx.frontFacingPointCoord.w;
@@ -505,30 +508,29 @@ static void PickScanLine(GGLInterface * iface)
GGL_GET_CONTEXT(ctx, iface);
ctx->interface.ScanLine = NULL;
- const bool DepthWrite = true;
if (ctx->bufferState.stencilTest) {
if (ctx->bufferState.depthTest) {
if (ctx->blendState.enable)
- ctx->interface.ScanLine = ScanLine<true, true, DepthWrite, true>;
+ ctx->interface.ScanLine = ScanLine<true, true, true, true>;
else
- ctx->interface.ScanLine = ScanLine<true, true, DepthWrite, false>;
+ ctx->interface.ScanLine = ScanLine<true, true, true, false>;
} else {
if (ctx->blendState.enable)
- ctx->interface.ScanLine = ScanLine<true, false, DepthWrite, true>;
+ ctx->interface.ScanLine = ScanLine<true, false, false, true>;
else
- ctx->interface.ScanLine = ScanLine<true, false, DepthWrite, false>;
+ ctx->interface.ScanLine = ScanLine<true, false, false, false>;
}
} else {
if (ctx->bufferState.depthTest) {
if (ctx->blendState.enable)
- ctx->interface.ScanLine = ScanLine<false, true, DepthWrite, true>;
+ ctx->interface.ScanLine = ScanLine<false, true, true, true>;
else
- ctx->interface.ScanLine = ScanLine<false, true, DepthWrite, false>;
+ ctx->interface.ScanLine = ScanLine<false, true, true, false>;
} else {
if (ctx->blendState.enable)
- ctx->interface.ScanLine = ScanLine<false, false, DepthWrite, true>;
+ ctx->interface.ScanLine = ScanLine<false, false, false, true>;
else
- ctx->interface.ScanLine = ScanLine<false, false, DepthWrite, false>;
+ ctx->interface.ScanLine = ScanLine<false, false, false, false>;
}
}
diff --git a/src/pixelflinger2/shader.cpp b/src/pixelflinger2/shader.cpp
index 782759c..7537f2f 100644
--- a/src/pixelflinger2/shader.cpp
+++ b/src/pixelflinger2/shader.cpp
@@ -54,7 +54,6 @@ struct Instance {
bccDisposeScript(script);
else if (module)
delete module;
- getchar();
}
};
@@ -95,10 +94,9 @@ extern "C" void _mesa_delete_shader(struct gl_context *ctx, struct gl_shader *sh
shader->DeletePending = true;
return;
}
- if (shader->executable)
- {
+ if (shader->executable) {
for (std::map<ShaderKey, Instance *>::iterator it=shader->executable->instances.begin();
- it != shader->executable->instances.end(); it++)
+ it != shader->executable->instances.end(); it++)
(*it).second->~Instance();
shader->executable->instances.~map();
}
@@ -363,9 +361,12 @@ static void CodeGen(Instance * instance, const char * mainName, gl_shader * shad
if (result != BCC_NO_ERROR)
fprintf(stderr, "Could not find '%s': %d\n", "main", result);
else
- printf("bcc_compile %s=%p \n", "main", shader->function);
+ printf("bcc_compile %s=%p \n", mainName, instance->function);
}
+void GenerateScanLine(const GGLContext * gglCtx, const gl_shader_program * program, llvm::Module * mod,
+ const char * shaderName, const char * scanlineName);
+
static void ShaderUse(GGLInterface * iface, gl_shader_program * program)
{
GGL_GET_CONST_CONTEXT(ctx, iface);
@@ -404,7 +405,15 @@ static void ShaderUse(GGLInterface * iface, gl_shader_program * program)
llvm::Module * module = glsl_ir_to_llvm_module(shader->ir, instance->module, ctx, shaderName);
if (!module)
assert(0); // ir to llvm failed
- CodeGen(instance, mainName, shader, program, ctx);
+#if USE_LLVM_SCANLINE
+ if (GL_FRAGMENT_SHADER == shader->Type) {
+ char scanlineName [SCANLINE_KEY_STRING_LEN] = {0};
+ GetScanlineKeyString(&shaderKey, scanlineName, sizeof scanlineName / sizeof *scanlineName);
+ GenerateScanLine(ctx, program, module, mainName, scanlineName);
+ CodeGen(instance, scanlineName, shader, program, ctx);
+ } else
+#endif
+ CodeGen(instance, mainName, shader, program, ctx);
shader->executable->instances[shaderKey] = instance;
debug_printf("jit new shader '%s'(%p) \n", mainName, instance->function); //getchar();
} else