diff options
-rw-r--r-- | Android.mk | 4 | ||||
-rw-r--r-- | include/pixelflinger2/pixelflinger2_format.h | 38 | ||||
-rw-r--r-- | include/pixelflinger2/pixelflinger2_interface.h | 44 | ||||
-rw-r--r-- | libMesa.project | 2 | ||||
-rw-r--r-- | src/glsl/ir_to_llvm.cpp | 19 | ||||
-rw-r--r-- | src/glsl/linker.cpp | 16 | ||||
-rw-r--r-- | src/mesa/main/mtypes.h | 2 | ||||
-rw-r--r-- | src/mesa/program/prog_uniform.h | 3 | ||||
-rw-r--r-- | src/pixelflinger2/buffer.cpp | 354 | ||||
-rw-r--r-- | src/pixelflinger2/llvm_scanline.cpp | 152 | ||||
-rw-r--r-- | src/pixelflinger2/llvm_texture.cpp (renamed from src/glsl/ir_to_llvm_helper.cpp) | 95 | ||||
-rw-r--r-- | src/pixelflinger2/pixelflinger2.cpp | 83 | ||||
-rw-r--r-- | src/pixelflinger2/pixelflinger2.h | 51 | ||||
-rw-r--r-- | src/pixelflinger2/raster.cpp | 234 | ||||
-rw-r--r-- | src/pixelflinger2/scanline.cpp | 30 | ||||
-rw-r--r-- | src/pixelflinger2/shader.cpp | 226 |
16 files changed, 889 insertions, 464 deletions
@@ -3,7 +3,7 @@ USE_LLVM_EXECUTIONENGINE := false # if using libLLVMExecutionEngine, # need to add files to several Android.mk in external/llvm, and comment out some stuff in llvm DynamicLibrary.cpp and Intercept.cpp -DEBUG_BUILD := true +DEBUG_BUILD := false ifneq ($(TARGET_SIMULATOR),true) @@ -100,7 +100,6 @@ libMesa_SRC_FILES := \ src/glsl/s_expression.cpp \ src/glsl/strtod.c \ src/glsl/ir_to_llvm.cpp \ - src/glsl/ir_to_llvm_helper.cpp \ src/mesa/main/shaderobj.c \ src/mesa/program/hash_table.c \ src/mesa/program/prog_parameter.cpp \ @@ -108,6 +107,7 @@ libMesa_SRC_FILES := \ src/pixelflinger2/buffer.cpp \ src/pixelflinger2/format.cpp \ src/pixelflinger2/llvm_scanline.cpp \ + src/pixelflinger2/llvm_texture.cpp \ src/pixelflinger2/pixelflinger2.cpp \ src/pixelflinger2/raster.cpp \ src/pixelflinger2/scanline.cpp \ diff --git a/include/pixelflinger2/pixelflinger2_format.h b/include/pixelflinger2/pixelflinger2_format.h index 28eba8e..f95a88d 100644 --- a/include/pixelflinger2/pixelflinger2_format.h +++ b/include/pixelflinger2/pixelflinger2_format.h @@ -28,38 +28,38 @@ enum GGLPixelFormat { GGL_PIXEL_FORMAT_RGBA_8888 = 1, // 4x8-bit ARGB GGL_PIXEL_FORMAT_RGBX_8888 = 2, // 3x8-bit RGB stored in 32-bit chunks -// GGL_PIXEL_FORMAT_RGB_888 = 3, // 3x8-bit RGB + GGL_PIXEL_FORMAT_RGB_888 = 3, // 3x8-bit RGB GGL_PIXEL_FORMAT_RGB_565 = 4, // 16-bit RGB -// GGL_PIXEL_FORMAT_BGRA_8888 = 5, // 4x8-bit BGRA -// GGL_PIXEL_FORMAT_RGBA_5551 = 6, // 16-bit RGBA -// GGL_PIXEL_FORMAT_RGBA_4444 = 7, // 16-bit RGBA + GGL_PIXEL_FORMAT_BGRA_8888 = 5, // 4x8-bit BGRA + GGL_PIXEL_FORMAT_RGBA_5551 = 6, // 16-bit RGBA + GGL_PIXEL_FORMAT_RGBA_4444 = 7, // 16-bit RGBA GGL_PIXEL_FORMAT_A_8 = 8, // 8-bit A -// GGL_PIXEL_FORMAT_L_8 = 9, // 8-bit L (R=G=B = L) -// GGL_PIXEL_FORMAT_LA_88 = 0xA, // 16-bit LA -// GGL_PIXEL_FORMAT_RGB_332 = 0xB, // 8-bit RGB (non paletted) + GGL_PIXEL_FORMAT_L_8 = 9, // 8-bit L (R=G=B = L) + GGL_PIXEL_FORMAT_LA_88 = 0xA, // 16-bit LA + GGL_PIXEL_FORMAT_RGB_332 = 0xB, // 8-bit RGB (non paletted) // reserved range. don't use. -// GGL_PIXEL_FORMAT_RESERVED_10 = 0x10, -// GGL_PIXEL_FORMAT_RESERVED_11 = 0x11, -// GGL_PIXEL_FORMAT_RESERVED_12 = 0x12, -// GGL_PIXEL_FORMAT_RESERVED_13 = 0x13, -// GGL_PIXEL_FORMAT_RESERVED_14 = 0x14, -// GGL_PIXEL_FORMAT_RESERVED_15 = 0x15, -// GGL_PIXEL_FORMAT_RESERVED_16 = 0x16, -// GGL_PIXEL_FORMAT_RESERVED_17 = 0x17, + GGL_PIXEL_FORMAT_RESERVED_10 = 0x10, + GGL_PIXEL_FORMAT_RESERVED_11 = 0x11, + GGL_PIXEL_FORMAT_RESERVED_12 = 0x12, + GGL_PIXEL_FORMAT_RESERVED_13 = 0x13, + GGL_PIXEL_FORMAT_RESERVED_14 = 0x14, + GGL_PIXEL_FORMAT_RESERVED_15 = 0x15, + GGL_PIXEL_FORMAT_RESERVED_16 = 0x16, + GGL_PIXEL_FORMAT_RESERVED_17 = 0x17, // reserved/special formats GGL_PIXEL_FORMAT_Z_16 = 0x18, GGL_PIXEL_FORMAT_S_8 = 0x19, -// GGL_PIXEL_FORMAT_SZ_24 = 0x1A, -// GGL_PIXEL_FORMAT_SZ_8 = 0x1B, + GGL_PIXEL_FORMAT_SZ_24 = 0x1A, + GGL_PIXEL_FORMAT_SZ_8 = 0x1B, GGL_PIXEL_FORMAT_Z_32 = 0x1C, // reserved range. don't use. -// GGL_PIXEL_FORMAT_RESERVED_20 = 0x20, -// GGL_PIXEL_FORMAT_RESERVED_21 = 0x21, + GGL_PIXEL_FORMAT_RESERVED_20 = 0x20, + GGL_PIXEL_FORMAT_RESERVED_21 = 0x21, // must be last diff --git a/include/pixelflinger2/pixelflinger2_interface.h b/include/pixelflinger2/pixelflinger2_interface.h index 99b18cf..f2db05f 100644 --- a/include/pixelflinger2/pixelflinger2_interface.h +++ b/include/pixelflinger2/pixelflinger2_interface.h @@ -103,6 +103,7 @@ typedef struct GGLActiveStencil { // do not change layout, used in GenerateScanL } GGLActiveStencil_t; typedef struct GGLBufferState { // all affect scanline jit + enum GGLPixelFormat colorFormat, depthFormat, stencilFormat; unsigned stencilTest : 1; unsigned depthTest : @@ -220,7 +221,7 @@ struct GGLInterface { // creates empty shader gl_shader_t * (* ShaderCreate)(const GGLInterface_t * iface, GLenum type); - + void (* ShaderSource)(gl_shader_t * shader, GLsizei count, const char ** string, const int * length); // compiles a shader given glsl; returns GL_TRUE on success; glsl only used during call @@ -248,9 +249,13 @@ struct GGLInterface { // LLVM JIT and set as active program void (* ShaderUse)(GGLInterface_t * iface, gl_shader_program_t * program); - void (* ShaderGetiv)(gl_shader_t * shader, const GLenum pname, GLint * params); + void (* ShaderGetiv)(const gl_shader_t * shader, const GLenum pname, GLint * params); + + void (* ShaderGetInfoLog)(const gl_shader_t * shader, GLsizei bufsize, GLsizei* length, GLchar* infolog); - void (* ShaderProgramGetiv)(gl_shader_program_t * program, const GLenum pname, GLint * params); + void (* ShaderProgramGetiv)(const gl_shader_program_t * program, const GLenum pname, GLint * params); + + void (* ShaderProgramGetInfoLog)(const gl_shader_program_t * program, GLsizei bufsize, GLsizei* length, GLchar* infolog); // bind attribute location before linking void (* ShaderAttributeBind)(const gl_shader_program_t * program, @@ -267,15 +272,15 @@ struct GGLInterface { GLint location, GLfloat * params); void (* ShaderUniformGetiv)(gl_shader_program_t * program, GLint location, GLint * params); - + // retrieves the tmu each sampler is set to, sampler2tmu[sampler] == -1 means not used - void (* ShaderUniformGetSamplers)(const gl_shader_program_t * program, - int sampler2tmu[GGL_MAXCOMBINEDTEXTUREIMAGEUNITS]); + void (* ShaderUniformGetSamplers)(const gl_shader_program_t * program, + int sampler2tmu[GGL_MAXCOMBINEDTEXTUREIMAGEUNITS]); // updates linked program uniform value by location; return >= 0 indicates sampler assigned GLint (* ShaderUniform)(gl_shader_program_t * program, GLint location, GLsizei count, const GLvoid *values, GLenum type); - + // updates linked program uniform matrix value by location void (* ShaderUniformMatrix)(gl_shader_program_t * program, GLint cols, GLint rows, GLint location, GLsizei count, @@ -317,9 +322,13 @@ extern "C" // LLVM JIT and set as active program, also call after gglState change to re-JIT void GGLShaderUse(void * llvmCtx, const GGLState_t * gglState, gl_shader_program_t * program); - void GGLShaderGetiv(gl_shader_t * shader, const GLenum pname, GLint * params); + void GGLShaderGetiv(const gl_shader_t * shader, const GLenum pname, GLint * params); - void GGLShaderProgramGetiv(gl_shader_program_t * program, const GLenum pname, GLint * params); + void GGLShaderGetInfoLog(const gl_shader_t * shader, GLsizei bufsize, GLsizei* length, GLchar* infolog); + + void GGLShaderProgramGetiv(const gl_shader_program_t * program, const GLenum pname, GLint * params); + + void GGLShaderProgramGetInfoLog(const gl_shader_program_t * program, GLsizei bufsize, GLsizei* length, GLchar* infolog); // bind attribute location before linking void GGLShaderAttributeBind(const gl_shader_program_t * program, @@ -332,20 +341,23 @@ extern "C" // gets uniform location for linked program GLint GGLShaderUniformLocation(const gl_shader_program_t * program, const char * name); - + + void GGLShaderUniformMatrix(gl_shader_program_t * program, GLint cols, GLint rows, + GLint location, GLsizei count, GLboolean transpose, const GLfloat *values); + // retrieves the tmu each sampler is set to, sampler2tmu[sampler] == -1 means not used - void GGLShaderUniformGetSamplers(const gl_shader_program_t * program, - int sampler2tmu[GGL_MAXCOMBINEDTEXTUREIMAGEUNITS]); + void GGLShaderUniformGetSamplers(const gl_shader_program_t * program, + int sampler2tmu[GGL_MAXCOMBINEDTEXTUREIMAGEUNITS]); void GGLProcessVertex(const gl_shader_program_t * program, const VertexInput_t * input, VertexOutput_t * output, const float (*constants)[4]); // scan line given left and right processed and scizored vertices // depth value bitcast float->int, if negative then ^= 0x7fffffff - void GGLScanLine(const gl_shader_program_t * program, unsigned * frameBuffer, - int * depthBuffer, unsigned char * stencilBuffer, unsigned bufferWidth, - unsigned bufferHeight, GGLActiveStencil_t * activeStencil, const VertexOutput_t * start, - const VertexOutput_t * end, const float (*constants)[4]); + void GGLScanLine(const gl_shader_program_t * program, const enum GGLPixelFormat colorFormat, + void * frameBuffer, int * depthBuffer, unsigned char * stencilBuffer, + unsigned bufferWidth, unsigned bufferHeight, GGLActiveStencil_t * activeStencil, + const VertexOutput_t * start, const VertexOutput_t * end, const float (*constants)[4]); // void GGLProcessFragment(const VertexOutput_t * inputs, VertexOutput_t * outputs, // const float (*constants[4])); diff --git a/libMesa.project b/libMesa.project index 1ff6bcb..66b0723 100644 --- a/libMesa.project +++ b/libMesa.project @@ -134,7 +134,6 @@ <File Name="src/glsl/opt_constant_folding.cpp"/> <File Name="src/glsl/ast_function.cpp"/> <File Name="src/glsl/lower_jumps.cpp"/> - <File Name="src/glsl/ir_to_llvm_helper.cpp"/> <File Name="src/glsl/ast_expr.cpp"/> <File Name="src/glsl/ir_print_visitor.cpp"/> <File Name="src/glsl/opt_noop_swizzle.cpp"/> @@ -188,6 +187,7 @@ <File Name="src/pixelflinger2/buffer.cpp"/> <File Name="src/pixelflinger2/llvm_helper.h"/> <File Name="src/pixelflinger2/scanline.cpp"/> + <File Name="src/pixelflinger2/llvm_texture.cpp"/> </VirtualDirectory> </VirtualDirectory> <Description/> diff --git a/src/glsl/ir_to_llvm.cpp b/src/glsl/ir_to_llvm.cpp index 5c8b2ff..250237e 100644 --- a/src/glsl/ir_to_llvm.cpp +++ b/src/glsl/ir_to_llvm.cpp @@ -1255,22 +1255,23 @@ public: if (!(ir->write_mask & mask)) return; - if(ir->rhs->type->vector_elements < width) - { + if (ir->rhs->type->vector_elements < width) { int expand_mask[4] = {-1, -1, -1, -1}; - for(unsigned i = 0; i < ir->lhs->type->vector_elements; ++i) + for (unsigned i = 0; i < ir->lhs->type->vector_elements; ++i) expand_mask[i] = i; // printf("ve: %u w %u issw: %i\n", ir->rhs->type->vector_elements, width, !!ir->rhs->as_swizzle()); rhs = llvm_shuffle(rhs, expand_mask, width, "assign.expand"); } - if(width > 1 && (ir->write_mask & mask) != mask) - { + if (width > 1 && (ir->write_mask & mask) != mask) { llvm::Constant* blend_mask[4]; - for(unsigned i = 0; i < width; ++i) - { - if(ir->write_mask & (1 << i)) - blend_mask[i] = llvm_int(width + i); + // refer to ir.h: ir_assignment::write_mask + // A partially-set write mask means that each enabled channel gets + // the value from a consecutive channel of the rhs. + unsigned rhsChannel = 0; + for (unsigned i = 0; i < width; ++i) { + if (ir->write_mask & (1 << i)) + blend_mask[i] = llvm_int(width + rhsChannel++); else blend_mask[i] = llvm_int(i); } diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp index 14d1050..f8b6962 100644 --- a/src/glsl/linker.cpp +++ b/src/glsl/linker.cpp @@ -1124,6 +1124,7 @@ assign_uniform_locations(struct gl_shader_program *prog) prog->Uniforms = ul; prog->Uniforms->Slots = next_position; + prog->Uniforms->SamplerSlots = next_sampler_pos; hieralloc_free(mem_ctx); } @@ -1741,20 +1742,13 @@ link_shaders(const struct gl_context *ctx, struct gl_shader_program *prog) //prog->InputOuputBase = malloc(1024 * 8); //memset(prog->InputOuputBase, 0xdd, 1024 * 8); prog->InputOuputBase = hieralloc_realloc(prog, prog->InputOuputBase, char, - prog->Uniforms->Slots * 16 + sizeof(VertexInput) + sizeof(VertexOutput) + 16); - prog->ValuesVertexInput = (float (*)[4])((((unsigned long)prog->InputOuputBase) + 15) & (~15L)); + (prog->Uniforms->Slots + prog->Uniforms->SamplerSlots) * sizeof(float) * 4 + sizeof(VertexInput) + sizeof(VertexOutput) + 16); + prog->ValuesVertexInput = (float (*)[4])((((unsigned long)prog->InputOuputBase) + 15L) & (~15L)); prog->ValuesVertexOutput = (float (*)[4])((unsigned long)prog->ValuesVertexInput + sizeof(VertexInput)); prog->ValuesUniform = (float (*)[4])((unsigned long)prog->ValuesVertexOutput + sizeof(VertexOutput)); - // default mapping of tmu to sampler - for (unsigned i = 0; i < prog->Uniforms->NumUniforms; i++) - { - const gl_uniform & uniform = prog->Uniforms->Uniforms[i]; - if (uniform.Type->is_sampler()) - prog->ValuesUniform[uniform.Pos][0] = uniform.Pos; - else if (uniform.Type->is_array() && uniform.Type->fields.array->is_sampler()) - assert(0); - } + // initialize uniforms to zero after link + memset(prog->ValuesUniform, 0, sizeof(float) * 4 * (prog->Uniforms->Slots + prog->Uniforms->SamplerSlots)); done: free(vert_shader_list); diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 52a98f9..9aa1c52 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -2140,7 +2140,7 @@ struct gl_shader_program * \c NULL. */ struct gl_shader *_LinkedShaders[MESA_SHADER_TYPES]; - GLfloat (*ValuesUniform)[4]; + GLfloat (*ValuesUniform)[4]; /** < samplers are at ValuesUniform[gl_uniform_list::Slots + sampler.Pos]*/ GLfloat (*ValuesVertexInput)[4]; /**< actually a VertexInput */ GLfloat (*ValuesVertexOutput)[4]; /**< actually a VertexOutput */ void * InputOuputBase; /**< allocation base for Values* */ diff --git a/src/mesa/program/prog_uniform.h b/src/mesa/program/prog_uniform.h index 18c5c71..c6c92b0 100644 --- a/src/mesa/program/prog_uniform.h +++ b/src/mesa/program/prog_uniform.h @@ -63,7 +63,8 @@ struct gl_uniform_list { GLuint Size; /**< allocated size of Uniforms array */ GLuint NumUniforms; /**< number of uniforms in the array */ - GLuint Slots; /**< number of float[4] slots uniforms will occupy */ + GLuint Slots; /**< number of float[4] slots non-sampler uniforms occupy */ + GLuint SamplerSlots; /**< number of float[4] slots samplers occupy */ struct gl_uniform *Uniforms; /**< Array [Size] */ }; diff --git a/src/pixelflinger2/buffer.cpp b/src/pixelflinger2/buffer.cpp index 957061f..1169fe1 100644 --- a/src/pixelflinger2/buffer.cpp +++ b/src/pixelflinger2/buffer.cpp @@ -1,23 +1,22 @@ -/** +/** ** ** Copyright 2010, The Android Open Source Project ** - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at ** - ** http://www.apache.org/licenses/LICENSE-2.0 + ** http://www.apache.org/licenses/LICENSE-2.0 ** - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and ** limitations under the License. */ #include "src/pixelflinger2/pixelflinger2.h" -#include <assert.h> #include <string.h> #include <stdio.h> @@ -25,202 +24,221 @@ void SetShaderVerifyFunctions(GGLInterface *); static void DepthFunc(GGLInterface * iface, GLenum func) { - GGL_GET_CONTEXT(ctx, iface); - if (GL_NEVER > func || GL_ALWAYS < func) - return gglError(GL_INVALID_ENUM); - ctx->state.bufferState.depthFunc = func & 0x7; - SetShaderVerifyFunctions(iface); + GGL_GET_CONTEXT(ctx, iface); + if (GL_NEVER > func || GL_ALWAYS < func) + return gglError(GL_INVALID_ENUM); + ctx->state.bufferState.depthFunc = func & 0x7; + SetShaderVerifyFunctions(iface); } static void StencilFuncSeparate(GGLInterface * iface, GLenum face, GLenum func, GLint ref, GLuint mask) { - GGL_GET_CONTEXT(ctx, iface); - if (GL_FRONT > face || GL_FRONT_AND_BACK < face) - return gglError(GL_INVALID_ENUM); - if (GL_NEVER > func || GL_ALWAYS < func) - return gglError(GL_INVALID_ENUM); - mask &= 0xff; - ref = MAX2(MIN2(ref, 0xff), 0); - ref &= mask; - if (GL_FRONT == face || GL_FRONT_AND_BACK == face) - { - ctx->state.frontStencil.ref = ref; - ctx->state.frontStencil.mask = mask; - ctx->state.frontStencil.func = func & 0x7; - } - if (GL_BACK == face || GL_FRONT_AND_BACK == face) - { - ctx->state.backStencil.ref = ref; - ctx->state.backStencil.mask = mask; - ctx->state.backStencil.func = func & 0x7; - } - SetShaderVerifyFunctions(iface); + GGL_GET_CONTEXT(ctx, iface); + if (GL_FRONT > face || GL_FRONT_AND_BACK < face) + return gglError(GL_INVALID_ENUM); + if (GL_NEVER > func || GL_ALWAYS < func) + return gglError(GL_INVALID_ENUM); + mask &= 0xff; + ref = MAX2(MIN2(ref, 0xff), 0); + ref &= mask; + if (GL_FRONT == face || GL_FRONT_AND_BACK == face) { + ctx->state.frontStencil.ref = ref; + ctx->state.frontStencil.mask = mask; + ctx->state.frontStencil.func = func & 0x7; + } + if (GL_BACK == face || GL_FRONT_AND_BACK == face) { + ctx->state.backStencil.ref = ref; + ctx->state.backStencil.mask = mask; + ctx->state.backStencil.func = func & 0x7; + } + SetShaderVerifyFunctions(iface); } static unsigned StencilOpEnum(GLenum func, unsigned oldValue) { - switch (func) - { - case GL_ZERO: return 0; - case GL_KEEP: // fall through - case GL_REPLACE: // fall through - case GL_INCR: // fall through - case GL_DECR: return func - GL_KEEP + 1; break; - case GL_INVERT: return 5; - case GL_INCR_WRAP: return 6; - case GL_DECR_WRAP: return 7; - default: gglError(GL_INVALID_ENUM); return oldValue; - } + switch (func) { + case GL_ZERO: + return 0; + case GL_KEEP: // fall through + case GL_REPLACE: // fall through + case GL_INCR: // fall through + case GL_DECR: + return func - GL_KEEP + 1; + break; + case GL_INVERT: + return 5; + case GL_INCR_WRAP: + return 6; + case GL_DECR_WRAP: + return 7; + default: + gglError(GL_INVALID_ENUM); + return oldValue; + } } static void StencilOpSeparate(GGLInterface * iface, GLenum face, GLenum sfail, GLenum dpfail, GLenum dppass) { - GGL_GET_CONTEXT(ctx, iface); - if (GL_FRONT > face || GL_FRONT_AND_BACK < face) - return gglError(GL_INVALID_ENUM); - if (GL_FRONT == face || GL_FRONT_AND_BACK == face) - { - ctx->state.frontStencil.sFail = StencilOpEnum(sfail, ctx->state.frontStencil.sFail); - ctx->state.frontStencil.dFail = StencilOpEnum(dpfail, ctx->state.frontStencil.dFail); - ctx->state.frontStencil.dPass = StencilOpEnum(dppass, ctx->state.frontStencil.dPass); - } - if (GL_BACK == face || GL_FRONT_AND_BACK == face) - { - ctx->state.backStencil.sFail = StencilOpEnum(sfail, ctx->state.backStencil.sFail); - ctx->state.backStencil.dFail = StencilOpEnum(dpfail, ctx->state.backStencil.dFail); - ctx->state.backStencil.dPass = StencilOpEnum(dppass, ctx->state.backStencil.dPass); - } - SetShaderVerifyFunctions(iface); + GGL_GET_CONTEXT(ctx, iface); + if (GL_FRONT > face || GL_FRONT_AND_BACK < face) + return gglError(GL_INVALID_ENUM); + if (GL_FRONT == face || GL_FRONT_AND_BACK == face) { + ctx->state.frontStencil.sFail = StencilOpEnum(sfail, ctx->state.frontStencil.sFail); + ctx->state.frontStencil.dFail = StencilOpEnum(dpfail, ctx->state.frontStencil.dFail); + ctx->state.frontStencil.dPass = StencilOpEnum(dppass, ctx->state.frontStencil.dPass); + } + if (GL_BACK == face || GL_FRONT_AND_BACK == face) { + ctx->state.backStencil.sFail = StencilOpEnum(sfail, ctx->state.backStencil.sFail); + ctx->state.backStencil.dFail = StencilOpEnum(dpfail, ctx->state.backStencil.dFail); + ctx->state.backStencil.dPass = StencilOpEnum(dppass, ctx->state.backStencil.dPass); + } + SetShaderVerifyFunctions(iface); } static void StencilSelect(const GGLInterface * iface, GLenum face) { - GGL_GET_CONTEXT(ctx, iface); - if (GL_FRONT == face) - { - ctx->activeStencil.face = 0; - ctx->activeStencil.ref = ctx->state.frontStencil.ref; - ctx->activeStencil.mask = ctx->state.frontStencil.mask; - } - else if (GL_BACK == face) - { - ctx->activeStencil.face = 1; - ctx->activeStencil.ref = ctx->state.backStencil.ref; - ctx->activeStencil.mask = ctx->state.backStencil.mask; - } + GGL_GET_CONTEXT(ctx, iface); + if (GL_FRONT == face) { + ctx->activeStencil.face = 0; + ctx->activeStencil.ref = ctx->state.frontStencil.ref; + ctx->activeStencil.mask = ctx->state.frontStencil.mask; + } else if (GL_BACK == face) { + ctx->activeStencil.face = 1; + ctx->activeStencil.ref = ctx->state.backStencil.ref; + ctx->activeStencil.mask = ctx->state.backStencil.mask; + } } static void ClearStencil(GGLInterface * iface, GLint s) { - GGL_GET_CONTEXT(ctx, iface); - ctx->clearState.stencil = 0x01010101 * ((unsigned &)s & 0xff); + GGL_GET_CONTEXT(ctx, iface); + ctx->clearState.stencil = 0x01010101 * ((unsigned &)s & 0xff); } static void ClearColor(GGLInterface * iface, GLclampf r, GLclampf g, GLclampf b, GLclampf a) { - GGL_GET_CONTEXT(ctx, iface); - r = MAX2(MIN2(r, 1.0f), 0); - g = MAX2(MIN2(g, 1.0f), 0); - b = MAX2(MIN2(b, 1.0f), 0); - a = MAX2(MIN2(a, 1.0f), 0); - ctx->clearState.color = (unsigned(a * 255) << 24) | (unsigned(b * 255) << 16) | - (unsigned(g * 255) << 8) | unsigned(r * 255); + GGL_GET_CONTEXT(ctx, iface); + r = MAX2(MIN2(r, 1.0f), 0); + g = MAX2(MIN2(g, 1.0f), 0); + b = MAX2(MIN2(b, 1.0f), 0); + a = MAX2(MIN2(a, 1.0f), 0); + ctx->clearState.color = (unsigned(a * 255) << 24) | (unsigned(b * 255) << 16) | + (unsigned(g * 255) << 8) | unsigned(r * 255); } static void ClearDepthf(GGLInterface * iface, GLclampf d) { - GGL_GET_CONTEXT(ctx, iface); - // assuming ieee 754 32 bit float and 32 bit 2's complement int - assert(sizeof(d) == sizeof(ctx->clearState.depth)); - ctx->clearState.depth = (int &)d; // bit reinterpretation - if (0x80000000 & ctx->clearState.depth) // smaller negative float has bigger int representation, so flip - ctx->clearState.depth ^= 0x7fffffff; // since -FLT_MAX is close to -1 when bitcasted + GGL_GET_CONTEXT(ctx, iface); + // assuming ieee 754 32 bit float and 32 bit 2's complement int + assert(sizeof(d) == sizeof(ctx->clearState.depth)); + ctx->clearState.depth = (int &)d; // bit reinterpretation + if (0x80000000 & ctx->clearState.depth) // smaller negative float has bigger int representation, so flip + ctx->clearState.depth ^= 0x7fffffff; // since -FLT_MAX is close to -1 when bitcasted } static void Clear(const GGLInterface * iface, GLbitfield buf) { - GGL_GET_CONST_CONTEXT(ctx, iface); - - // TODO DXL scissor test - if (GL_COLOR_BUFFER_BIT & buf && ctx->frameSurface.data) - { - assert(GGL_PIXEL_FORMAT_RGBA_8888 == ctx->frameSurface.format); - unsigned * const end = (unsigned *)ctx->frameSurface.data + - ctx->frameSurface.width * ctx->frameSurface.height; - const unsigned color = ctx->clearState.color; - for (unsigned * start = (unsigned *)ctx->frameSurface.data; start < end; start++) + GGL_GET_CONST_CONTEXT(ctx, iface); + + // TODO DXL scissor test + if (GL_COLOR_BUFFER_BIT & buf && ctx->frameSurface.data) { + if (GGL_PIXEL_FORMAT_RGBA_8888 == ctx->frameSurface.format) { + unsigned * const end = (unsigned *)ctx->frameSurface.data + + ctx->frameSurface.width * ctx->frameSurface.height; + const unsigned color = ctx->clearState.color; + for (unsigned * start = (unsigned *)ctx->frameSurface.data; start < end; start++) + *start = color; + } else if (GGL_PIXEL_FORMAT_RGB_565 == ctx->frameSurface.format) { + short * const end = (short *)ctx->frameSurface.data + + ctx->frameSurface.width * ctx->frameSurface.height; + unsigned r = ctx->clearState.color & 0xf8, g = ctx->clearState.color & 0xfc00, + b = ctx->clearState.color & 0xf80000; + const short color = (b >> 19) | (g >> 5) | (r >> 3); + for (short * start = (short *)ctx->frameSurface.data; start < end; start++) *start = color; - } - if (GL_DEPTH_BUFFER_BIT & buf && ctx->depthSurface.data) - { - assert(GGL_PIXEL_FORMAT_Z_32 == ctx->depthSurface.format); - unsigned * const end = (unsigned *)ctx->depthSurface.data + - ctx->depthSurface.width * ctx->depthSurface.height; - const unsigned depth = ctx->clearState.depth; - for (unsigned * start = (unsigned *)ctx->depthSurface.data; start < end; start++) - *start = depth; - } - if (GL_STENCIL_BUFFER_BIT & buf && ctx->stencilSurface.data) - { - assert(GGL_PIXEL_FORMAT_S_8 == ctx->stencilSurface.format); - unsigned * const end = (unsigned *)((unsigned char *)ctx->stencilSurface.data + - ctx->stencilSurface.width * ctx->stencilSurface.height); - unsigned * start = (unsigned *)ctx->stencilSurface.data; - const unsigned stencil = ctx->clearState.stencil; - for (start; start < end; start++) - *start = stencil; - start--; - for (unsigned char * i = (unsigned char *)start; i < (unsigned char *)end; i++) - *i = stencil & 0xff; - } + } else + assert(0); + } + if (GL_DEPTH_BUFFER_BIT & buf && ctx->depthSurface.data) { + assert(GGL_PIXEL_FORMAT_Z_32 == ctx->depthSurface.format); + unsigned * const end = (unsigned *)ctx->depthSurface.data + + ctx->depthSurface.width * ctx->depthSurface.height; + const unsigned depth = ctx->clearState.depth; + for (unsigned * start = (unsigned *)ctx->depthSurface.data; start < end; start++) + *start = depth; + } + if (GL_STENCIL_BUFFER_BIT & buf && ctx->stencilSurface.data) { + assert(GGL_PIXEL_FORMAT_S_8 == ctx->stencilSurface.format); + unsigned * const end = (unsigned *)((unsigned char *)ctx->stencilSurface.data + + ctx->stencilSurface.width * ctx->stencilSurface.height); + unsigned * start = (unsigned *)ctx->stencilSurface.data; + const unsigned stencil = ctx->clearState.stencil; + for (start; start < end; start++) + *start = stencil; + start--; + for (unsigned char * i = (unsigned char *)start; i < (unsigned char *)end; i++) + *i = stencil & 0xff; + } } static void SetBuffer(GGLInterface * iface, const GLenum type, GGLSurface * surface) { - GGL_GET_CONTEXT(ctx, iface); - if (GL_COLOR_BUFFER_BIT == type) - { - if (surface) - { - ctx->frameSurface = *surface; - assert(GGL_PIXEL_FORMAT_RGBA_8888 == ctx->frameSurface.format); - } - else - memset(&ctx->frameSurface, 0, sizeof(ctx->frameSurface)); - } - else if (GL_DEPTH_BUFFER_BIT == type) - { - if (surface) - { - ctx->depthSurface = *surface; - assert(GGL_PIXEL_FORMAT_Z_32 == ctx->depthSurface.format); - } - else - memset(&ctx->depthSurface, 0, sizeof(ctx->depthSurface)); - } - else if (GL_STENCIL_BUFFER_BIT == type) - { - if (surface) - { - ctx->stencilSurface = *surface; - assert(GGL_PIXEL_FORMAT_S_8 == ctx->stencilSurface.format); - } - else - memset(&ctx->stencilSurface, 0, sizeof(ctx->stencilSurface)); - } - else - gglError(GL_INVALID_ENUM); + GGL_GET_CONTEXT(ctx, iface); + bool changed = false; + if (GL_COLOR_BUFFER_BIT == type) { + if (surface) { + ctx->frameSurface = *surface; + changed |= ctx->frameSurface.format ^ surface->format; + switch (surface->format) { + case GGL_PIXEL_FORMAT_RGBA_8888: + case GGL_PIXEL_FORMAT_RGB_565: + break; + case GGL_PIXEL_FORMAT_RGBX_8888: + default: + LOGD("pf2: SetBuffer 0x%.04X format=0x%.02X \n", type, surface ? surface->format : 0); + assert(0); + } + } else { + memset(&ctx->frameSurface, 0, sizeof(ctx->frameSurface)); + changed = true; + } + ctx->state.bufferState.colorFormat = ctx->frameSurface.format; + } else if (GL_DEPTH_BUFFER_BIT == type) { + if (surface) { + ctx->depthSurface = *surface; + changed |= ctx->depthSurface.format ^ surface->format; + assert(GGL_PIXEL_FORMAT_Z_32 == ctx->depthSurface.format); + } else { + memset(&ctx->depthSurface, 0, sizeof(ctx->depthSurface)); + changed = true; + } + ctx->state.bufferState.depthFormat = ctx->depthSurface.format; + } else if (GL_STENCIL_BUFFER_BIT == type) { + if (surface) { + ctx->stencilSurface = *surface; + changed |= ctx->stencilSurface.format ^ surface->format; + assert(GGL_PIXEL_FORMAT_S_8 == ctx->stencilSurface.format); + } else { + memset(&ctx->stencilSurface, 0, sizeof(ctx->stencilSurface)); + changed = true; + } + ctx->state.bufferState.stencilFormat = ctx->stencilSurface.format; + } else + gglError(GL_INVALID_ENUM); + if (changed) { + SetShaderVerifyFunctions(iface); + } } void InitializeBufferFunctions(GGLInterface * iface) { - iface->DepthFunc = DepthFunc; - iface->StencilFuncSeparate = StencilFuncSeparate; - iface->StencilOpSeparate = StencilOpSeparate; - iface->StencilSelect = StencilSelect; - iface->ClearStencil = ClearStencil; - iface->ClearColor = ClearColor; - iface->ClearDepthf = ClearDepthf; - iface->Clear = Clear; - iface->SetBuffer = SetBuffer; -}
\ No newline at end of file + iface->DepthFunc = DepthFunc; + iface->StencilFuncSeparate = StencilFuncSeparate; + iface->StencilOpSeparate = StencilOpSeparate; + iface->StencilSelect = StencilSelect; + iface->ClearStencil = ClearStencil; + iface->ClearColor = ClearColor; + iface->ClearDepthf = ClearDepthf; + iface->Clear = Clear; + iface->SetBuffer = SetBuffer; +} diff --git a/src/pixelflinger2/llvm_scanline.cpp b/src/pixelflinger2/llvm_scanline.cpp index e9b9efe..26c62cd 100644 --- a/src/pixelflinger2/llvm_scanline.cpp +++ b/src/pixelflinger2/llvm_scanline.cpp @@ -21,6 +21,9 @@ #include <llvm/Module.h> +//#undef LOGD +//#define LOGD(...) + using namespace llvm; static void StencilOp(IRBuilder<> &builder, const unsigned char op, @@ -126,63 +129,64 @@ static Value * BlendFactor(const unsigned mode, Value * src, Value * dst, { Value * factor = NULL; switch (mode) { - case 0: // GL_ZERO + case GGLBlendState::GGL_ZERO: factor = zero; break; - case 1: // GL_ONE + case GGLBlendState::GGL_ONE: factor = one; break; - case 2: // GL_SRC_COLOR: + case GGLBlendState::GGL_SRC_COLOR: factor = src; break; - case 3: // GL_ONE_MINUS_SRC_COLOR: + case GGLBlendState::GGL_ONE_MINUS_SRC_COLOR: factor = builder.CreateSub(one, src); break; - case 4: // GL_DST_COLOR: + case GGLBlendState::GGL_DST_COLOR: factor = dst; break; - case 5: // GL_ONE_MINUS_DST_COLOR: + case GGLBlendState::GGL_ONE_MINUS_DST_COLOR: factor = builder.CreateSub(one, dst); break; - case 6: // GL_SRC_ALPHA: + case GGLBlendState::GGL_SRC_ALPHA: factor = srcA; if (isVector) factor = intVec(builder, factor, factor, factor, factor); break; - case 7: // GL_ONE_MINUS_SRC_ALPHA: + case GGLBlendState::GGL_ONE_MINUS_SRC_ALPHA: factor = builder.CreateSub(sOne, srcA); if (isVector) factor = intVec(builder, factor, factor, factor, factor); break; - case 8: // GL_DST_ALPHA: + case GGLBlendState::GGL_DST_ALPHA: factor = dstA; if (isVector) factor = intVec(builder, factor, factor, factor, factor); break; - case 9: // GL_ONE_MINUS_DST_ALPHA: + case GGLBlendState::GGL_ONE_MINUS_DST_ALPHA: factor = builder.CreateSub(sOne, dstA); if (isVector) factor = intVec(builder, factor, factor, factor, factor); break; - case 10: // GL_SRC_ALPHA_SATURATE: // valid only for source color and alpha + case GGLBlendState::GGL_SRC_ALPHA_SATURATE: + // valid only for source color and alpha factor = minIntScalar(builder, srcA, builder.CreateSub(sOne, dstA)); if (isVector) factor = intVec(builder, factor, factor, factor, sOne); else factor = sOne; // when it's used for source alpha, it's just 1 break; - case 11: // GL_CONSTANT_COLOR: + case GGLBlendState::GGL_CONSTANT_COLOR: factor = constant; break; - case 12: // GL_ONE_MINUS_CONSTANT_COLOR: + case GGLBlendState::GGL_ONE_MINUS_CONSTANT_COLOR: factor = builder.CreateSub(one, constant); break; - case 13: // GL_CONSTANT_ALPHA: + case GGLBlendState::GGL_CONSTANT_ALPHA: factor = constantA; if (isVector) factor = intVec(builder, factor, factor, factor, factor); break; - case 14: // GL_ONE_MINUS_CONSTANT_ALPHA: + case GGLBlendState::GGL_ONE_MINUS_CONSTANT_ALPHA: factor = builder.CreateSub(sOne, constantA); if (isVector) factor = intVec(builder, factor, factor, factor, factor); @@ -201,19 +205,60 @@ static Value * Saturate(IRBuilder<> & builder, Value * intVector) } // src is int32x4 [0,255] rgba vector, and combines them into int32 -static Value * IntVectorToColor(IRBuilder<> & builder, Value * src) +// RGB_565 channel order is weird +static Value * IntVectorToScreenColor(IRBuilder<> & builder, const GGLPixelFormat format, Value * src) { - //src = builder.CreateBitCast(src, inst->GetIntVectorType()); - src = builder.CreateShl(src, constIntVec(builder, 0, 8, 16, 24)); - std::vector<Value *> comps = extractVector(builder, src); - comps[0] = builder.CreateOr(comps[0], comps[1]); - comps[0] = builder.CreateOr(comps[0], comps[2]); - comps[0] = builder.CreateOr(comps[0], comps[3]); - return comps[0]; + if (GGL_PIXEL_FORMAT_RGBA_8888 == format) { + src = builder.CreateShl(src, constIntVec(builder, 0, 8, 16, 24)); + std::vector<Value *> comps = extractVector(builder, src); + comps[0] = builder.CreateOr(comps[0], comps[1]); + comps[0] = builder.CreateOr(comps[0], comps[2]); + comps[0] = builder.CreateOr(comps[0], comps[3]); + return comps[0]; + } else if (GGL_PIXEL_FORMAT_RGB_565 == format) { + src = builder.CreateAnd(src, constIntVec(builder, 0xf8, 0xfc, 0xf8, 0)); + std::vector<Value *> comps = extractVector(builder, src); + // channel order is weird + for (unsigned i = 0; i < 4; i++) + comps[i] = builder.CreateTrunc(comps[i], builder.getInt16Ty()); + comps[2] = builder.CreateLShr(comps[2], 3); + comps[1] = builder.CreateShl(comps[1], 3); + comps[0] = builder.CreateShl(comps[0], 8); + + comps[0] = builder.CreateOr(comps[0], comps[1]); + comps[0] = builder.CreateOr(comps[0], comps[2]); + return comps[0]; + } else if (GGL_PIXEL_FORMAT_UNKNOWN == format) + return builder.getInt32(0); + else + assert(0); + return NULL; +} + +// src is int32 or int16, return is int32x4 [0,255] rgba +// RGB_565 channel order is weird +static Value * ScreenColorToIntVector(IRBuilder<> & builder, const GGLPixelFormat format, Value * src) +{ + src = builder.CreateZExt(src, builder.getInt32Ty()); + Value * dst = intVec(builder, src, src, src, src); + if (GGL_PIXEL_FORMAT_RGBA_8888 == format) { + dst = builder.CreateLShr(dst, constIntVec(builder, 0, 8, 16, 24)); + dst = builder.CreateAnd(dst, constIntVec(builder, 0xff, 0xff, 0xff, 0xff)); + } else if (GGL_PIXEL_FORMAT_RGB_565 == format) { + // channel order is weird + dst = builder.CreateAnd(dst, constIntVec(builder, 0xf800, 0x7e0, 0x1f, 0)); + dst = builder.CreateLShr(dst, constIntVec(builder, 8, 3, 0, 0)); + dst = builder.CreateShl(dst, constIntVec(builder, 0, 0, 3, 0)); + dst = builder.CreateOr(dst, constIntVec(builder, 0, 0, 0, 0xff)); + } else if (GGL_PIXEL_FORMAT_UNKNOWN == format) + LOGD("pf2: ScreenColorToIntVector GGL_PIXEL_FORMAT_UNKNOWN"); // not set yet, do nothing + else + assert(0); + return dst; } // src is <4 x float> approx [0,1]; dst is <4 x i32> [0,255] from frame buffer; return is i32 -Value * GenerateFSBlend(const GGLState * gglCtx, /*const RegDesc * regDesc,*/ +Value * GenerateFSBlend(const GGLState * gglCtx, const GGLPixelFormat format, /*const RegDesc * regDesc,*/ IRBuilder<> & builder, Value * src, Value * dst) { const Type * const intType = builder.getInt32Ty(); @@ -229,9 +274,9 @@ Value * GenerateFSBlend(const GGLState * gglCtx, /*const RegDesc * regDesc,*/ // else if (regDesc->IsVectorType(Float)) // { src = builder.CreateFMul(src, constFloatVec(builder,255,255,255,255)); - src = builder.CreateFPToUI(src, intVecType(builder)); + src = builder.CreateFPToSI(src, intVecType(builder)); src = Saturate(builder, src); - src = IntVectorToColor(builder, src); + src = IntVectorToScreenColor(builder, format, src); // } // else if (regDesc->IsVectorType(Fixed8)) // { @@ -250,7 +295,6 @@ Value * GenerateFSBlend(const GGLState * gglCtx, /*const RegDesc * regDesc,*/ // assert(0); return src; } - // blending, so convert src to <4 x i32> // if (regDesc->IsInt32Color()) // { @@ -349,7 +393,7 @@ Value * GenerateFSBlend(const GGLState * gglCtx, /*const RegDesc * regDesc,*/ srcA = extractVector(builder,src)[3]; dstA = extractVector(builder,dst)[3]; Value * resA = NULL; - switch (gglCtx->blendState.ce + GL_FUNC_ADD) { + switch (gglCtx->blendState.ae + GL_FUNC_ADD) { case GL_FUNC_ADD: resA = builder.CreateAdd(srcA, dstA); break; @@ -369,7 +413,7 @@ Value * GenerateFSBlend(const GGLState * gglCtx, /*const RegDesc * regDesc,*/ res = builder.CreateAShr(res, constIntVec(builder,8,8,8,8)); res = Saturate(builder, res); - res = IntVectorToColor(builder, res); + res = IntVectorToScreenColor(builder, format, res); return res; } @@ -398,14 +442,14 @@ static FunctionType * ScanLineFunctionType(IRBuilder<> & builder) return functionType; } -// generated scanline function parameters are VertexOutput * start, VertexOutput * step, +// generated scanline function parameters are VertexOutput * start, VertexOutput * step, // unsigned * frame, int * depth, unsigned char * stencil, // GGLActiveStencilState * stencilState, unsigned count void GenerateScanLine(const GGLState * gglCtx, const gl_shader_program * program, Module * mod, const char * shaderName, const char * scanlineName) { IRBuilder<> builder(mod->getContext()); - debug_printf("GenerateScanLine %s \n", scanlineName); +// debug_printf("GenerateScanLine %s \n", scanlineName); const Type * intType = builder.getInt32Ty(); const PointerType * intPointerType = PointerType::get(intType, 0); @@ -422,7 +466,7 @@ void GenerateScanLine(const GGLState * gglCtx, const gl_shader_program * program BasicBlock *label_entry = BasicBlock::Create(builder.getContext(), "entry", func, 0); builder.SetInsertPoint(label_entry); CondBranch condBranch(builder); - + Function::arg_iterator args = func->arg_begin(); Value * start = args++; start->setName("start"); @@ -462,11 +506,23 @@ void GenerateScanLine(const GGLState * gglCtx, const gl_shader_program * program condBranch.beginLoop(); // while (count > 0) + assert(framePtr && gglCtx); // get values - Value * frame = builder.CreateLoad(framePtr); + Value * frame = NULL; + if (GGL_PIXEL_FORMAT_RGBA_8888 == gglCtx->bufferState.colorFormat) + frame = builder.CreateLoad(framePtr); + else if (GGL_PIXEL_FORMAT_RGB_565 == gglCtx->bufferState.colorFormat) { + frame = builder.CreateLoad(framePtr); + frame = builder.CreateBitCast(frame, PointerType::get(builder.getInt16Ty(), 0)); + } else if (GGL_PIXEL_FORMAT_UNKNOWN == gglCtx->bufferState.colorFormat) + frame = builder.CreateLoad(framePtr); // color buffer not set yet + else + assert(0); + frame->setName("frame"); Value * depth = NULL, * stencil = NULL; if (gglCtx->bufferState.depthTest) { + assert(GGL_PIXEL_FORMAT_Z_32 == gglCtx->bufferState.depthFormat); depth = builder.CreateLoad(depthPtr); depth->setName("depth"); } @@ -570,17 +626,14 @@ void GenerateScanLine(const GGLState * gglCtx, const gl_shader_program * program condBranch.ifCond(sCmp, "if_sCmp", "sCmp_fail"); condBranch.ifCond(zCmp, "if_zCmp", "zCmp_fail"); -// Value * fsInputs = builder.CreateConstInBoundsGEP1_32(start, -// offsetof(VertexOutput,position)/sizeof(Vector4)); Value * inputs = start; - Value * outputs = inputs; - - Value * fsOutputs = builder.CreateConstInBoundsGEP1_32(start, - offsetof(VertexOutput,fragColor)/sizeof(Vector4)); - + Value * outputs = start; + + Value * fsOutputs = builder.CreateConstInBoundsGEP1_32(start, + offsetof(VertexOutput,fragColor)/sizeof(Vector4)); + Function * fsFunction = mod->getFunction(shaderName); assert(fsFunction); -// CallInst *call = builder.CreateCall(fsFunction); CallInst *call = builder.CreateCall3(fsFunction,inputs, outputs, constants); call->setCallingConv(CallingConv::C); call->setTailCall(false); @@ -588,20 +641,14 @@ void GenerateScanLine(const GGLState * gglCtx, const gl_shader_program * program Value * dst = Constant::getNullValue(intVecType(builder)); if (gglCtx->blendState.enable && (0 != gglCtx->blendState.dcf || 0 != gglCtx->blendState.daf)) { Value * frameColor = builder.CreateLoad(frame, "frameColor"); - dst = builder.CreateInsertElement(dst, frameColor, builder.getInt32(0)); - dst = builder.CreateInsertElement(dst, frameColor, builder.getInt32(1)); - dst = builder.CreateInsertElement(dst, frameColor, builder.getInt32(2)); - dst = builder.CreateInsertElement(dst, frameColor, builder.getInt32(3)); - dst = builder.CreateLShr(dst, constIntVec(builder, 0, 8, 16, 24)); - dst = builder.CreateAnd(dst, constIntVec(builder, 0xff, 0xff, 0xff, 0xff)); + dst = ScreenColorToIntVector(builder, gglCtx->bufferState.colorFormat, frameColor); } Value * src = builder.CreateConstInBoundsGEP1_32(fsOutputs, 0); src = builder.CreateLoad(src); - Value * color = GenerateFSBlend(gglCtx, /*&prog->outputRegDesc,*/ builder, src, dst); + Value * color = GenerateFSBlend(gglCtx, gglCtx->bufferState.colorFormat,/*&prog->outputRegDesc,*/ builder, src, dst); builder.CreateStore(color, frame); - // TODO DXL depthmask check if (gglCtx->bufferState.depthTest) { z = builder.CreateBitCast(z, intType); @@ -617,7 +664,6 @@ void GenerateScanLine(const GGLState * gglCtx, const gl_shader_program * program if (gglCtx->bufferState.stencilTest) builder.CreateStore(StencilOp(builder, sFace, gglCtx->frontStencil.dFail, gglCtx->backStencil.dFail, sPtr, sRef), stencil); - condBranch.endif(); condBranch.elseop(); // failed s test @@ -626,10 +672,11 @@ void GenerateScanLine(const GGLState * gglCtx, const gl_shader_program * program gglCtx->backStencil.sFail, sPtr, sRef), stencil); condBranch.endif(); - + assert(frame); frame = builder.CreateConstInBoundsGEP1_32(frame, 1); // frame++ + // frame may have been casted to short* from int*, so cast back + frame = builder.CreateBitCast(frame, PointerType::get(builder.getInt32Ty(), 0)); builder.CreateStore(frame, framePtr); - if (gglCtx->bufferState.depthTest) { depth = builder.CreateConstInBoundsGEP1_32(depth, 1); // depth++ builder.CreateStore(depth, depthPtr); @@ -638,7 +685,6 @@ void GenerateScanLine(const GGLState * gglCtx, const gl_shader_program * program stencil = builder.CreateConstInBoundsGEP1_32(stencil, 1); // stencil++ builder.CreateStore(stencil, stencilPtr); } - Value * vPtr = NULL, * v = NULL, * dx = NULL; if (program->UsesFragCoord) { vPtr = builder.CreateConstInBoundsGEP1_32(start, GGL_FS_INPUT_OFFSET + diff --git a/src/glsl/ir_to_llvm_helper.cpp b/src/pixelflinger2/llvm_texture.cpp index ece6653..f3d2bea 100644 --- a/src/glsl/ir_to_llvm_helper.cpp +++ b/src/pixelflinger2/llvm_texture.cpp @@ -1,22 +1,21 @@ -/** +/** ** ** Copyright 2011, The Android Open Source Project ** - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at ** - ** http://www.apache.org/licenses/LICENSE-2.0 + ** http://www.apache.org/licenses/LICENSE-2.0 ** - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and ** limitations under the License. */ - + #include <stack> -#include <stdio.h> #include "src/pixelflinger2/pixelflinger2.h" @@ -43,39 +42,67 @@ static Value * pointSample(IRBuilder<> & builder, Value * textureData, Value * i texel = builder.CreateOr(texel, builder.getInt32(0xff000000)); break; case GGL_PIXEL_FORMAT_RGB_565: { - textureData = builder.CreateBitCast(textureData, PointerType::get( - Type::getInt16Ty(builder.getContext()),0)); + textureData = builder.CreateBitCast(textureData, PointerType::get(builder.getInt16Ty(), 0)); textureData = builder.CreateGEP(textureData, index); texel = builder.CreateLoad(textureData, "texel565"); texel = builder.CreateZExt(texel, Type::getInt32Ty(builder.getContext())); - Value * r = builder.CreateAnd(texel, builder.getInt32(0x1f)); - r = builder.CreateShl(r, builder.getInt32(3)); - r = builder.CreateOr(r, builder.CreateLShr(r, builder.getInt32(5))); + Value * b = builder.CreateAnd(texel, builder.getInt32(0x1f)); + b = builder.CreateShl(b, builder.getInt32(3)); + b = builder.CreateOr(b, builder.CreateLShr(b, builder.getInt32(5))); Value * g = builder.CreateAnd(texel, builder.getInt32(0x7e0)); g = builder.CreateShl(g, builder.getInt32(5)); g = builder.CreateOr(g, builder.CreateLShr(g, builder.getInt32(6))); g = builder.CreateAnd(g, builder.getInt32(0xff00)); - Value * b = builder.CreateAnd(texel, builder.getInt32(0xF800)); - b = builder.CreateShl(b, builder.getInt32(8)); - b = builder.CreateOr(b, builder.CreateLShr(b, builder.getInt32(5))); - b = builder.CreateAnd(b, builder.getInt32(0xff0000)); + Value * r = builder.CreateAnd(texel, builder.getInt32(0xF800)); + r = builder.CreateShl(r, builder.getInt32(8)); + r = builder.CreateOr(r, builder.CreateLShr(r, builder.getInt32(5))); + r = builder.CreateAnd(r, builder.getInt32(0xff0000)); texel = builder.CreateOr(r, builder.CreateOr(g, b)); texel = builder.CreateOr(texel, builder.getInt32(0xff000000), name("texel")); break; } + case GGL_PIXEL_FORMAT_A_8: { + textureData = builder.CreateBitCast(textureData, PointerType::get(builder.getInt8Ty(),0)); + textureData = builder.CreateGEP(textureData, index); + texel = builder.CreateLoad(textureData, "texel_a8"); + texel = builder.CreateZExt(texel, builder.getInt32Ty()); + texel = builder.CreateShl(texel, builder.getInt32(24)); + break; + } + case GGL_PIXEL_FORMAT_L_8: { + textureData = builder.CreateBitCast(textureData, PointerType::get(builder.getInt8Ty(),0)); + textureData = builder.CreateGEP(textureData, index); + texel = builder.CreateLoad(textureData, "texel_l8"); + texel = builder.CreateZExt(texel, builder.getInt32Ty()); + texel = builder.CreateOr(texel, builder.CreateShl(texel, 8)); + texel = builder.CreateOr(texel, builder.CreateShl(texel, 8)); + texel = builder.CreateOr(texel, builder.getInt32(0xff000000)); + break; + } + case GGL_PIXEL_FORMAT_LA_88: { + textureData = builder.CreateBitCast(textureData, PointerType::get(builder.getInt16Ty(),0)); + textureData = builder.CreateGEP(textureData, index); + texel = builder.CreateLoad(textureData, "texel_la8"); + texel = builder.CreateZExt(texel, builder.getInt32Ty()); + Value * alpha = builder.CreateAnd(texel, builder.getInt32(0xff00)); + texel = builder.CreateAnd(texel, builder.getInt32(0xff)); + texel = builder.CreateOr(texel, builder.CreateShl(texel, 8)); + texel = builder.CreateOr(texel, builder.CreateShl(texel, 8)); + texel = builder.CreateOr(texel, builder.CreateShl(alpha, 16)); + break; + } case GGL_PIXEL_FORMAT_UNKNOWN: // usually means texture not set yet - debug_printf("pointSample: unknown format, default to 0xff0000ff \n"); - texel = builder.getInt32(0xff0000ff); + LOGD("pf2: pointSample: unknown format, default to 0xffff00ff \n"); + texel = builder.getInt32(0xffff00ff); break; default: assert(0); break; } - Value * channels = Constant::getNullValue(intVecType(builder)); // if (dstDesc && dstDesc->IsInt32Color()) { @@ -83,10 +110,10 @@ static Value * pointSample(IRBuilder<> & builder, Value * textureData, Value * i // channels = builder.CreateBitCast(channels, floatVecType(builder)); // return channels; // } else if (!dstDesc || dstDesc->IsVectorType()) { - channels = builder.CreateInsertElement(channels, texel, builder.getInt32(0)); - channels = builder.CreateInsertElement(channels, texel, builder.getInt32(1)); - channels = builder.CreateInsertElement(channels, texel, builder.getInt32(2)); - channels = builder.CreateInsertElement(channels, texel, builder.getInt32(3)); + channels = builder.CreateInsertElement(channels, texel, builder.getInt32(0)); + channels = builder.CreateInsertElement(channels, texel, builder.getInt32(1)); + channels = builder.CreateInsertElement(channels, texel, builder.getInt32(2)); + channels = builder.CreateInsertElement(channels, texel, builder.getInt32(3)); // if (dstDesc && dstDesc->IsVectorType(Fixed8)) { // channels = builder.CreateLShr(channels, constIntVec(builder, 0, 8, 16, 24)); // channels = builder.CreateAnd(channels, constIntVec(builder, 0xff, 0xff, 0xff, 0xff)); @@ -114,9 +141,9 @@ static const unsigned SHIFT = 16; // w = width - 1, h = height - 1; similar to pointSample; returns <4 x i32> rgba static Value * linearSample(IRBuilder<> & builder, Value * textureData, Value * indexOffset, - Value * x0, Value * y0, Value * xLerp, Value * yLerp, - Value * w, Value * h, Value * width, Value * height, - const GGLPixelFormat format/*, const RegDesc * dstDesc*/) + Value * x0, Value * y0, Value * xLerp, Value * yLerp, + Value * w, Value * h, Value * width, Value * height, + const GGLPixelFormat format/*, const RegDesc * dstDesc*/) { // TODO: linear filtering needs to be fixed for texcoord outside of [0,1] Value * x1 = builder.CreateAdd(x0, builder.getInt32(1)); @@ -489,14 +516,14 @@ Value * texCube(IRBuilder<> & builder, Value * in1, const unsigned sampler, if (0 == gglCtx->textureState.textures[sampler].minFilter && 0 == gglCtx->textureState.textures[sampler].magFilter) { // GL_NEAREST textureData = pointSample(builder, textureData, builder.CreateAdd(indexOffset, index), - gglCtx->textureState.textures[sampler].format/*, dstDesc*/); + gglCtx->textureState.textures[sampler].format/*, dstDesc*/); return intColorVecToFloatColorVec(builder, textureData); - + } else if (1 == gglCtx->textureState.textures[sampler].minFilter && 1 == gglCtx->textureState.textures[sampler].magFilter) { // GL_LINEAR textureData = linearSample(builder, textureData, indexOffset, x, y, xLerp, yLerp, - textureW, textureH, textureWidth, textureHeight, - gglCtx->textureState.textures[sampler].format/*, dstDesc*/); + textureW, textureH, textureWidth, textureHeight, + gglCtx->textureState.textures[sampler].format/*, dstDesc*/); return intColorVecToFloatColorVec(builder, textureData); } else assert(!"unsupported texture filter"); diff --git a/src/pixelflinger2/pixelflinger2.cpp b/src/pixelflinger2/pixelflinger2.cpp index 9fa3aa7..c5ee3a6 100644 --- a/src/pixelflinger2/pixelflinger2.cpp +++ b/src/pixelflinger2/pixelflinger2.cpp @@ -17,17 +17,15 @@ #include "pixelflinger2.h" -#include <stdlib.h> -#include <stdio.h> -#include <assert.h> - #include "src/talloc/hieralloc.h" +#include <string> void gglError(unsigned error) { + std::string str; if (GL_NO_ERROR == error) return; - printf("pf2: gglError 0x%.4X \n", error); + LOGD("\n*\n*\n pf2: gglError 0x%.4X \n*\n*\n", error); assert(0); } @@ -89,6 +87,31 @@ static void BlendEquationSeparate(GGLInterface * iface, GLenum modeRGB, GLenum m SetShaderVerifyFunctions(iface); } +static inline GGLBlendState::GGLBlendFactor GLBlendFactor(const GLenum factor) +{ +#define SWITCH_LINE(c) case c: return GGLBlendState::G##c; + switch (factor) + { + SWITCH_LINE(GL_ZERO); + SWITCH_LINE(GL_ONE); + SWITCH_LINE(GL_SRC_COLOR); + SWITCH_LINE(GL_ONE_MINUS_SRC_COLOR); + SWITCH_LINE(GL_DST_COLOR); + SWITCH_LINE(GL_ONE_MINUS_DST_COLOR); + SWITCH_LINE(GL_SRC_ALPHA); + SWITCH_LINE(GL_ONE_MINUS_SRC_ALPHA); + SWITCH_LINE(GL_DST_ALPHA); + SWITCH_LINE(GL_ONE_MINUS_DST_ALPHA); + SWITCH_LINE(GL_SRC_ALPHA_SATURATE); + SWITCH_LINE(GL_CONSTANT_COLOR); + SWITCH_LINE(GL_ONE_MINUS_CONSTANT_COLOR); + SWITCH_LINE(GL_CONSTANT_ALPHA); + SWITCH_LINE(GL_ONE_MINUS_CONSTANT_ALPHA); + default: assert(0); return GGLBlendState::GGL_ZERO; + } +#undef SWITCH_LINE +} + static void BlendFuncSeparate(GGLInterface * iface, GLenum srcRGB, GLenum dstRGB, GLenum srcAlpha, GLenum dstAlpha) { GGL_GET_CONTEXT(ctx, iface); @@ -112,22 +135,10 @@ static void BlendFuncSeparate(GGLInterface * iface, GLenum srcRGB, GLenum dstRGB srcAlpha = GL_ONE; // in c++ it's templated function for color and alpha, // so it requires setting srcAlpha to GL_ONE to run template again only for alpha - ctx->state.blendState.scf = (GGLBlendState::GGLBlendFactor)(srcRGB <= GL_ONE ? srcRGB : - (srcRGB <= GL_SRC_ALPHA_SATURATE ? srcRGB - GL_SRC_COLOR + 2 - : srcRGB - GL_CONSTANT_COLOR + 11)); - - ctx->state.blendState.saf = (GGLBlendState::GGLBlendFactor)(srcAlpha <= GL_ONE ? srcAlpha : - (srcAlpha <= GL_SRC_ALPHA_SATURATE ? srcAlpha - GL_SRC_COLOR + 2 - : srcAlpha - GL_CONSTANT_COLOR + 11)); - - ctx->state.blendState.dcf = (GGLBlendState::GGLBlendFactor)(dstRGB <= GL_ONE ? dstRGB : - (dstRGB <= GL_SRC_ALPHA_SATURATE ? dstRGB - GL_SRC_COLOR + 2 - : dstRGB - GL_CONSTANT_COLOR + 11)); - - ctx->state.blendState.daf = (GGLBlendState::GGLBlendFactor)(dstAlpha <= GL_ONE ? dstAlpha : - (dstAlpha <= GL_SRC_ALPHA_SATURATE ? dstAlpha - GL_SRC_COLOR + 2 - : dstAlpha - GL_CONSTANT_COLOR + 11)); - + ctx->state.blendState.scf = GLBlendFactor(srcRGB); + ctx->state.blendState.saf = GLBlendFactor(srcAlpha); + ctx->state.blendState.dcf = GLBlendFactor(dstRGB); + ctx->state.blendState.daf = GLBlendFactor(dstAlpha); SetShaderVerifyFunctions(iface); } @@ -153,8 +164,19 @@ static void EnableDisable(GGLInterface * iface, GLenum cap, GLboolean enable) changed |= ctx->state.bufferState.stencilTest ^ enable; ctx->state.bufferState.stencilTest = enable; break; + case GL_DITHER: +// LOGD("pf2: EnableDisable GL_DITHER \n"); + break; + case GL_SCISSOR_TEST: +// LOGD("pf2: EnableDisable GL_SCISSOR_TEST \n"); + break; + case GL_TEXTURE_2D: +// LOGD("pf2: EnableDisable GL_SCISSOR_TEST %d", enable); + break; default: - gglError(GL_INVALID_ENUM); + LOGD("pf2: EnableDisable 0x%.4X causes GL_INVALID_ENUM (maybe not implemented or ES 1.0) \n", cap); +// gglError(GL_INVALID_ENUM); + assert(0); break; } if (changed) @@ -198,6 +220,10 @@ void InitializeGGLState(GGLInterface * iface) for (unsigned i = 0; i < GGL_MAXCOMBINEDTEXTUREIMAGEUNITS; i++) iface->SetSampler(iface, i, NULL); + + iface->SetBuffer(iface, GL_COLOR_BUFFER_BIT, NULL); + iface->SetBuffer(iface, GL_DEPTH_BUFFER_BIT, NULL); + iface->SetBuffer(iface, GL_STENCIL_BUFFER_BIT, NULL); SetShaderVerifyFunctions(iface); } @@ -224,6 +250,14 @@ void UninitializeGGLState(GGLInterface * iface) GGLContext * ctx = (GGLContext *)iface; assert((void *)ctx == (void *)iface); +#if USE_DUAL_THREAD + ctx->worker.hasWork = false; + ctx->worker.quit = true; + pthread_mutex_lock(&ctx->worker.lock); + pthread_cond_signal(&ctx->worker.cond); + pthread_mutex_unlock(&ctx->worker.lock); +#endif + DestroyShaderFunctions(iface); #if USE_LLVM_TEXTURE_SAMPLER @@ -235,6 +269,9 @@ void UninitializeGGLState(GGLInterface * iface) #if USE_LLVM_EXECUTIONENGINE puts("USE_LLVM_EXECUTIONENGINE"); #endif +#if USE_DUAL_THREAD + puts("USE_DUAL_THREAD"); +#endif hieralloc_report_brief(NULL, stdout); } @@ -244,4 +281,4 @@ void DestroyGGLInterface(GGLInterface * iface) assert((void *)ctx == (void *)iface); UninitializeGGLState(iface); free(ctx); -}
\ No newline at end of file +} diff --git a/src/pixelflinger2/pixelflinger2.h b/src/pixelflinger2/pixelflinger2.h index 691a387..bd77a2e 100644 --- a/src/pixelflinger2/pixelflinger2.h +++ b/src/pixelflinger2/pixelflinger2.h @@ -20,13 +20,37 @@ #define USE_LLVM_TEXTURE_SAMPLER 1 #define USE_LLVM_SCANLINE 1 - #ifndef USE_LLVM_EXECUTIONENGINE #define USE_LLVM_EXECUTIONENGINE 0 // 1 to use llvm::Execution, 0 to use libBCC, requires modifying makefile #endif +#define USE_DUAL_THREAD 1 #define debug_printf printf +#include <stdlib.h> +#include <assert.h> +#include <stdio.h> + +#ifdef __arm__ +#include <cutils/log.h> + +#ifndef __location__ +#define __HIERALLOC_STRING_0__(s) #s +#define __HIERALLOC_STRING_1__(s) __HIERALLOC_STRING_0__(s) +#define __HIERALLOC_STRING_2__ __HIERALLOC_STRING_1__(__LINE__) +#define __location__ __FILE__ ":" __HIERALLOC_STRING_2__ +#endif +#undef assert +#define assert(EXPR) { do { if (!(EXPR)) {LOGD("\n*\n*\n*\n* assert fail: '"#EXPR"' at "__location__"\n*\n*\n*"); exit(EXIT_FAILURE); } } while (false); } + +#else // #ifdef __arm__ + +#ifndef LOGD +#define LOGD printf +#endif //#include <stdio.h> + +#endif // #ifdef __arm__ + #include "pixelflinger2/pixelflinger2_interface.h" #include <string.h> @@ -47,8 +71,12 @@ class LLVMContext; typedef int BlendComp_t; #endif +#if USE_DUAL_THREAD +#include <pthread.h> +#endif + typedef void (*ShaderFunction_t)(const void*,void*,const void*); - + #define GGL_GET_CONTEXT(context, interface) GGLContext * context = (GGLContext *)interface; #define GGL_GET_CONST_CONTEXT(context, interface) const GGLContext * context = \ (const GGLContext *)interface; (void)context; @@ -69,11 +97,28 @@ struct GGLContext { } clearState; gl_shader_program * CurrentProgram; - + mutable GGLActiveStencil activeStencil; // after primitive assembly, call StencilSelect GGLState state; // states affecting jit +#if USE_DUAL_THREAD + mutable struct Worker { + const GGLInterface * iface; + unsigned startY, endY, varyingCount; + VertexOutput bV, cV, bDx, cDx; + int width, height; + volatile bool hasWork; + bool quit; + + pthread_cond_t cond; + pthread_mutex_t lock; + pthread_t thread; + + Worker() : cond(PTHREAD_COND_INITIALIZER), lock(PTHREAD_MUTEX_INITIALIZER), thread(NULL) {} + } worker; +#endif + // called by ShaderUse to set to proper rendering functions void (* PickScanLine)(GGLInterface * iface); void (* PickRaster)(GGLInterface * iface); diff --git a/src/pixelflinger2/raster.cpp b/src/pixelflinger2/raster.cpp index eeee8e7..bd69f59 100644 --- a/src/pixelflinger2/raster.cpp +++ b/src/pixelflinger2/raster.cpp @@ -16,17 +16,18 @@ */ #include <stdlib.h> -#include <assert.h> #include <math.h> #include <string.h> #include <stdio.h> #include "pixelflinger2.h" #include "src/mesa/main/mtypes.h" +#include "src/mesa/program/prog_parameter.h" +#include "src/mesa/program/prog_uniform.h" +#include "src/glsl/glsl_types.h" -#ifdef SHADER_SOA -static struct tgsi_exec_machine machine; -#endif +//#undef LOGD +//#define LOGD(...) static inline void LerpVector4(const Vector4 * a, const Vector4 * b, const VectorComp_t x, Vector4 * d) __attribute__((always_inline)); @@ -85,59 +86,59 @@ static void ProcessVertex(const GGLInterface * iface, const VertexInput * input, //#endif } -#include <pthread.h> - -struct WorkerArgs { - const GGLInterface * iface; - unsigned startY, endY, varyingCount; - VertexOutput bV, cV, bDx, cDx; - int width, height; - volatile bool hasWork; - bool quit; - - static void * RasterTrapezoidWorker(void * threadArgs) { - WorkerArgs * args = (WorkerArgs *)threadArgs; - VertexOutput clip0, clip1, * left, * right; - while (!args->quit) { - if (!args->hasWork) - continue; - for (unsigned y = args->startY; y <= args->endY; y += 2) { - do { - if (args->bV.position.x < 0) { - if (args->cV.position.x < 0) - break; - InterpolateVertex(&args->bV, &args->cV, -args->bV.position.x / - (args->cV.position.x - args->bV.position.x), - &clip0, args->varyingCount); - left = &clip0; - } else - left = &args->bV; - if ((int)args->cV.position.x >= (int)args->width) { - if (args->bV.position.x >= (int)args->width) - break; - InterpolateVertex(&args->bV, &args->cV, (args->width - 1 - args->bV.position.x) / - (args->cV.position.x - args->bV.position.x), - &clip1, args->varyingCount); - right = &clip1; - } else - right = &args->cV; - args->iface->ScanLine(args->iface, left, right); - } while (false); - for (unsigned i = 0; i < args->varyingCount; i++) { - args->bV.varyings[i] += args->bDx.varyings[i]; - args->cV.varyings[i] += args->cDx.varyings[i]; - } - args->bV.position += args->bDx.position; - args->cV.position += args->cDx.position; - args->bV.frontFacingPointCoord += args->bDx.frontFacingPointCoord; - args->cV.frontFacingPointCoord += args->cDx.frontFacingPointCoord; +#if USE_DUAL_THREAD +static void * RasterTrapezoidWorker(void * threadArgs) +{ + GGLContext::Worker * args = (GGLContext::Worker *)threadArgs; + VertexOutput clip0, clip1, * left, * right; + while (!args->quit) { + pthread_mutex_lock(&args->lock); + while (!args->hasWork && !args->quit) + pthread_cond_wait(&args->cond, &args->lock); + pthread_mutex_unlock(&args->lock); + + if (args->quit) + break; +// if (!args->hasWork) +// continue; + + for (unsigned y = args->startY; y <= args->endY; y += 2) { + do { + if (args->bV.position.x < 0) { + if (args->cV.position.x < 0) + break; + InterpolateVertex(&args->bV, &args->cV, -args->bV.position.x / + (args->cV.position.x - args->bV.position.x), + &clip0, args->varyingCount); + left = &clip0; + } else + left = &args->bV; + if ((int)args->cV.position.x >= (int)args->width) { + if (args->bV.position.x >= (int)args->width) + break; + InterpolateVertex(&args->bV, &args->cV, (args->width - 1 - args->bV.position.x) / + (args->cV.position.x - args->bV.position.x), + &clip1, args->varyingCount); + right = &clip1; + } else + right = &args->cV; + args->iface->ScanLine(args->iface, left, right); + } while (false); + for (unsigned i = 0; i < args->varyingCount; i++) { + args->bV.varyings[i] += args->bDx.varyings[i]; + args->cV.varyings[i] += args->cDx.varyings[i]; } - args->hasWork = false; + args->bV.position += args->bDx.position; + args->cV.position += args->cDx.position; + args->bV.frontFacingPointCoord += args->bDx.frontFacingPointCoord; + args->cV.frontFacingPointCoord += args->cDx.frontFacingPointCoord; } - pthread_exit(NULL); - return NULL; + args->hasWork = false; } -}; + pthread_exit(NULL); + return NULL; +} +#endif static void RasterTrapezoid(const GGLInterface * iface, const VertexOutput * tl, const VertexOutput * tr, const VertexOutput * bl, @@ -236,14 +237,10 @@ static void RasterTrapezoid(const GGLInterface * iface, const VertexOutput * tl, cDx.frontFacingPointCoord *= yDistInv; cDx.frontFacingPointCoord.y = VectorComp_t_Zero; // gl_FrontFacing not interpolated - static WorkerArgs args; // TODO: fix this static - -#define DUAL_THREAD 1 - -#if DUAL_THREAD - static pthread_t thread; - if (!thread) { - int rc = pthread_create(&thread, NULL, WorkerArgs::RasterTrapezoidWorker, &args); +#if USE_DUAL_THREAD + GGLContext::Worker & args = ctx->worker; + if (!ctx->worker.thread) { + int rc = pthread_create(&ctx->worker.thread, NULL, RasterTrapezoidWorker, &args); assert(!rc); } args.bV = bV; @@ -270,14 +267,18 @@ static void RasterTrapezoid(const GGLInterface * iface, const VertexOutput * tl, args.endY = endY; args.width = width; args.height = height; - if (args.startY <= args.endY) + if (args.startY <= args.endY) { + pthread_mutex_lock(&args.lock); args.hasWork = true; + pthread_cond_signal(&args.cond); + pthread_mutex_unlock(&args.lock); + } #endif VertexOutput * left, * right; VertexOutput clip0, clip1; - for (unsigned y = startY; y <= endY; y += 1 + DUAL_THREAD) { + for (unsigned y = startY; y <= endY; y += 1 + USE_DUAL_THREAD) { do { if (bV.position.x < 0) { if (cV.position.x < 0) @@ -307,8 +308,10 @@ static void RasterTrapezoid(const GGLInterface * iface, const VertexOutput * tl, cV.frontFacingPointCoord += cDx.frontFacingPointCoord; } +#if USE_DUAL_THREAD while (args.hasWork) ; // wait +#endif } static void RasterTriangle(const GGLInterface * iface, const VertexOutput * v1, @@ -365,24 +368,116 @@ static void DrawTriangle(const GGLInterface * iface, const VertexInput * vin1, GGL_GET_CONST_CONTEXT(ctx, iface); VertexOutput vouts[3]; + memset(vouts, 0, sizeof(vouts)); VertexOutput * v1 = vouts + 0, * v2 = vouts + 1, * v3 = vouts + 2; -#ifdef SHADER_SOA - assert(0); // not implemented -#endif +// LOGD("pf2: DrawTriangle"); + + const gl_shader_program * program = ctx->CurrentProgram; + +// if (!strstr(program->Shaders[MESA_SHADER_FRAGMENT]->Source, +// "gl_FragColor = color * texture2D(sampler, outTexCoords).a;")) +// return; + +// for (unsigned i = 0; i < program->NumShaders; i++) +// if (program->Shaders[i]->Source) +// LOGD("%s", program->Shaders[i]->Source); + +// if (!strstr(program->Shaders[MESA_SHADER_FRAGMENT]->Source, ").a;")) +// return; + +// LOGD("%s", program->Shaders[MESA_SHADER_VERTEX]->Source); +// LOGD("%s", program->Shaders[MESA_SHADER_FRAGMENT]->Source); + +// for (unsigned i = 0; i < program->Attributes->NumParameters; i++) { +// const gl_program_parameter & attribute = program->Attributes->Parameters[i]; +// LOGD("attribute '%s': location=%d slots=%d \n", attribute.Name, attribute.Location, attribute.Slots); +// } +// for (unsigned i = 0; i < program->Varying->NumParameters; i++) { +// const gl_program_parameter & varying = program->Varying->Parameters[i]; +// LOGD("varying '%s': vs_location=%d fs_location=%d \n", varying.Name, varying.BindLocation, varying.Location); +// } +// for (unsigned i = 0; i < program->Uniforms->NumUniforms; i++) { +// const gl_uniform & uniform = program->Uniforms->Uniforms[i]; +// LOGD("uniform '%s': location=%d type=%s \n", uniform.Name, uniform.Pos, uniform.Type->name); +// } + +// __attribute__ ((aligned (16))) +// static const float matrix[16] = { +// 1,0,0,0, +// 0,1,0,0, +// 0,0,1,0, +// 0,0,0,1 +// }; +// +// iface->ShaderUniformMatrix((gl_shader_program *)program, 4, 4, 0, 1, GL_FALSE, matrix); iface->ProcessVertex(iface, vin1, v1); iface->ProcessVertex(iface, vin2, v2); iface->ProcessVertex(iface, vin3, v3); +// __attribute__ ((aligned (16))) +// static const float matrix[16] = { +// 2,0,0,0, +// 0,-2,0,0, +// 0,0,-1,0, +// -1,1,0,1 +// }; + + +// float * matrix = program->ValuesUniform[0]; +// for (unsigned i = 0; i < 4; i++) +// LOGD("pf2: DrawTriangle %.2f \t %.2f \t %.2f \t %.2f \n", matrix[i * 4 + 0], +// matrix[i * 4 + 1], matrix[i * 4 + 2], matrix[i * 4 + 3]); +//// LOGD("color %.02f %.02f %.02f %.02f", program->ValuesUniform[4][0], program->ValuesUniform[4][1], +//// program->ValuesUniform[4][2], program->ValuesUniform[4][3]); +// LOGD("vin1 position %.02f %.02f %.02f %.02f", vin1->attributes[1].x, vin1->attributes[1].y, +// vin1->attributes[1].z, vin1->attributes[1].w); +// LOGD("vin2 position %.02f %.02f %.02f %.02f", vin2->attributes[1].x, vin2->attributes[1].y, +// vin2->attributes[1].z, vin2->attributes[1].w); +// LOGD("vin3 position %.02f %.02f %.02f %.02f", vin3->attributes[1].x, vin3->attributes[1].y, +// vin3->attributes[1].z, vin3->attributes[1].w); + +// GGLProcessVertex(program, vin1, v1, (const float (*)[4])matrix); +// GGLProcessVertex(program, vin2, v2, (const float (*)[4])matrix); +// GGLProcessVertex(program, vin3, v3, (const float (*)[4])matrix); + +// LOGD("pf2: DrawTriangle processed %.02f %.02f %.2f %.2f \t %.02f %.02f %.2f %.2f \t %.02f %.02f %.2f %.2f", +// v1->position.x, v1->position.y, v1->position.z, v1->position.w, +// v2->position.x, v2->position.y, v2->position.z, v2->position.w, +// v3->position.x, v3->position.y, v3->position.z, v3->position.w); + v1->position /= v1->position.w; v2->position /= v2->position.w; v3->position /= v3->position.w; +// LOGD("pf2: DrawTriangle divided %.02f,%.02f \t %.02f,%.02f \t %.02f,%.02f", v1->position.x, v1->position.y, +// v2->position.x, v2->position.y, v3->position.x, v3->position.y); + iface->ViewportTransform(iface, &v1->position); iface->ViewportTransform(iface, &v2->position); iface->ViewportTransform(iface, &v3->position); +// if (strstr(program->Shaders[MESA_SHADER_FRAGMENT]->Source, +// "gl_FragColor = color * texture2D(sampler, outTexCoords).a;")) { +//// LOGD("%s", program->Shaders[MESA_SHADER_FRAGMENT]->Source); +// v1->position = vin1->attributes[0]; +// v2->position = vin2->attributes[0]; +// v3->position = vin3->attributes[0]; +// +// v1->varyings[0] = vin1->attributes[1]; +// v2->varyings[0] = vin2->attributes[1]; +// v3->varyings[0] = vin3->attributes[1]; +// } + +// LOGD("pf2: DrawTriangle transformed %.0f,%.0f \t %.0f,%.0f \t %.0f,%.0f", v1->position.x, v1->position.y, +// v2->position.x, v2->position.y, v3->position.x, v3->position.y); + +// LOGD("pf2: DrawTriangle varying %.02f %.02f %.2f %.2f \t %.02f %.02f %.2f %.2f \t %.02f %.02f %.2f %.2f", +// v1->varyings[0].x, v1->varyings[0].y, v1->varyings[0].z, v1->varyings[0].w, +// v2->varyings[0].x, v2->varyings[0].y, v2->varyings[0].z, v2->varyings[0].w, +// v3->varyings[0].x, v3->varyings[0].y, v3->varyings[0].z, v3->varyings[0].w); + VectorComp_t area; area = v1->position.x * v2->position.y - v2->position.x * v1->position.y; area += v2->position.x * v3->position.y - v3->position.x * v2->position.y; @@ -392,7 +487,7 @@ static void DrawTriangle(const GGLInterface * iface, const VertexInput * vin1, if (GL_CCW == ctx->cullState.frontFace + GL_CW) (unsigned &)area ^= 0x80000000; - if (ctx->cullState.enable) { + if (false && ctx->cullState.enable) { // TODO: turn off for now switch (ctx->cullState.cullFace + GL_FRONT) { case GL_FRONT: if (!((unsigned &)area & 0x80000000)) // +ve, front facing @@ -442,6 +537,8 @@ static void DrawTriangle(const GGLInterface * iface, const VertexInput * vin1, // TODO DXL view frustum clipping iface->RasterTriangle(iface, v1, v2, v3); +// LOGD("pf2: DrawTriangle end"); + } static void PickRaster(GGLInterface * iface) @@ -456,6 +553,7 @@ static void ViewportTransform(const GGLInterface * iface, Vector4 * v) { GGL_GET_CONST_CONTEXT(ctx, iface); v->x = v->x * ctx->viewport.w + ctx->viewport.x; + v->y *= -1; v->y = v->y * ctx->viewport.h + ctx->viewport.y; v->z = v->z * ctx->viewport.f + ctx->viewport.n; } diff --git a/src/pixelflinger2/scanline.cpp b/src/pixelflinger2/scanline.cpp index a7f8475..db05be5 100644 --- a/src/pixelflinger2/scanline.cpp +++ b/src/pixelflinger2/scanline.cpp @@ -178,28 +178,37 @@ unsigned char StencilOp(const unsigned op, unsigned char s, const unsigned char #ifdef USE_LLVM_SCANLINE typedef void (* ScanLineFunction_t)(VertexOutput * start, VertexOutput * step, - const float (*constants)[4], unsigned * frame, + const float (*constants)[4], void * frame, int * depth, unsigned char * stencil, GGLActiveStencil *, unsigned count); #endif -void GGLScanLine(const gl_shader_program * program, unsigned * frameBuffer, - int * depthBuffer, unsigned char * stencilBuffer, unsigned bufferWidth, - unsigned bufferHeight, GGLActiveStencil * activeStencil, const VertexOutput_t * start, - const VertexOutput_t * end, const float (*constants)[4]) +void GGLScanLine(const gl_shader_program * program, const GGLPixelFormat colorFormat, + void * frameBuffer, int * depthBuffer, unsigned char * stencilBuffer, + unsigned bufferWidth, unsigned bufferHeight, GGLActiveStencil * activeStencil, + const VertexOutput_t * start, const VertexOutput_t * end, const float (*constants)[4]) { #if !USE_LLVM_SCANLINE assert(!"only for USE_LLVM_SCANLINE"); #endif +// LOGD("pf2: GGLScanLine program=%p format=0x%.2X frameBuffer=%p depthBuffer=%p stencilBuffer=%p ", +// program, colorFormat, frameBuffer, depthBuffer, stencilBuffer); + const unsigned int varyingCount = program->VaryingSlots; const unsigned y = start->position.y, startX = start->position.x, endX = end->position.x; - //assert(ctx->frameSurface.width > startX && ctx->frameSurface.width > endX); - //assert(ctx->frameSurface.height > y); + assert(bufferWidth > startX && bufferWidth > endX); + assert(bufferHeight > y); - unsigned * frame = frameBuffer + y * bufferWidth + startX; + char * frame = (char *)frameBuffer; + if (GGL_PIXEL_FORMAT_RGBA_8888 == colorFormat) + frame += (y * bufferWidth + startX) * 4; + else if (GGL_PIXEL_FORMAT_RGB_565 == colorFormat) + frame += (y * bufferWidth + startX) * 2; + else + assert(0); const VectorComp_t div = VectorComp_t_CTR(1 / (float)(endX - startX)); //memcpy(ctx->glCtx->CurrentProgram->ValuesVertexOutput, start, sizeof(*start)); @@ -225,16 +234,19 @@ void GGLScanLine(const gl_shader_program * program, unsigned * frameBuffer, // TODO DXL consider inverting gl_FragCoord.y ScanLineFunction_t scanLineFunction = (ScanLineFunction_t) program->_LinkedShaders[MESA_SHADER_FRAGMENT]->function; +// LOGD("pf2 GGLScanLine scanline=%p start=%p constants=%p", scanLineFunction, &vertex, constants); if (endX >= startX) scanLineFunction(&vertex, &vertexDx, constants, frame, depth, stencil, activeStencil, endX - startX + 1); +// LOGD("pf2: GGLScanLine end"); + } template <bool StencilTest, bool DepthTest, bool DepthWrite, bool BlendEnable> void ScanLine(const GGLInterface * iface, const VertexOutput * start, const VertexOutput * end) { GGL_GET_CONST_CONTEXT(ctx, iface); - GGLScanLine(ctx->CurrentProgram, (unsigned *)ctx->frameSurface.data, + GGLScanLine(ctx->CurrentProgram, ctx->frameSurface.format, ctx->frameSurface.data, (int *)ctx->depthSurface.data, (unsigned char *)ctx->stencilSurface.data, ctx->frameSurface.width, ctx->frameSurface.height, &ctx->activeStencil, start, end, ctx->CurrentProgram->ValuesUniform); diff --git a/src/pixelflinger2/shader.cpp b/src/pixelflinger2/shader.cpp index 629d90b..5cca627 100644 --- a/src/pixelflinger2/shader.cpp +++ b/src/pixelflinger2/shader.cpp @@ -32,6 +32,10 @@ #include "src/mesa/program/prog_uniform.h" #include "src/glsl/glsl_types.h" #include "src/glsl/ir_to_llvm.h" +#include "src/glsl/ir_print_visitor.h" + +//#undef LOGD +//#define LOGD(...) static void InitializeGLContext(struct gl_context *ctx) { @@ -147,14 +151,13 @@ static gl_shader * ShaderCreate(const GGLInterface * iface, GLenum type) void GGLShaderSource(gl_shader_t * shader, GLsizei count, const char ** string, const int * length) { hieralloc_free(const_cast<GLchar *>(shader->Source)); - for (unsigned i = 0; i < count; i++) - { + for (unsigned i = 0; i < count; i++) { int len = strlen(string[i]); if (length && length[i] >= 0) len = length[i]; shader->Source = hieralloc_strndup_append(const_cast<GLchar *>(shader->Source), string[i], len); } - printf("pf2: GGLShaderSource: \n '%s' \n", shader->Source); +// LOGD("pf2: GGLShaderSource: \n '%s' \n", shader->Source); } GLboolean GGLShaderCompile(gl_shader * shader, const char * glsl, const char ** infoLog) @@ -163,7 +166,8 @@ GLboolean GGLShaderCompile(gl_shader * shader, const char * glsl, const char ** shader->Source = glsl; assert(shader->Source); compile_shader(glContext.ctx, shader); - shader->Source = NULL; + if (glsl) + shader->Source = NULL; if (infoLog) *infoLog = shader->InfoLog; return shader->CompileStatus; @@ -257,18 +261,18 @@ GLboolean GGLShaderProgramLink(gl_shader_program * program, const char ** infoLo *infoLog = program->InfoLog; if (!program->LinkStatus) return program->LinkStatus; - printf("slots: attribute=%d varying=%d uniforms=%d \n", program->AttributeSlots, program->VaryingSlots, program->Uniforms->Slots); - for (unsigned i = 0; i < program->Attributes->NumParameters; i++) { - const gl_program_parameter & attribute = program->Attributes->Parameters[i]; - printf("attribute '%s': location=%d slots=%d \n", attribute.Name, attribute.Location, attribute.Slots); - } - for (unsigned i = 0; i < program->Varying->NumParameters; i++) { - const gl_program_parameter & varying = program->Varying->Parameters[i]; - printf("varying '%s': vs_location=%d fs_location=%d \n", varying.Name, varying.BindLocation, varying.Location); - } + LOGD("slots: attribute=%d varying=%d uniforms=%d \n", program->AttributeSlots, program->VaryingSlots, program->Uniforms->Slots); +// for (unsigned i = 0; i < program->Attributes->NumParameters; i++) { +// const gl_program_parameter & attribute = program->Attributes->Parameters[i]; +// LOGD("attribute '%s': location=%d slots=%d \n", attribute.Name, attribute.Location, attribute.Slots); +// } +// for (unsigned i = 0; i < program->Varying->NumParameters; i++) { +// const gl_program_parameter & varying = program->Varying->Parameters[i]; +// LOGD("varying '%s': vs_location=%d fs_location=%d \n", varying.Name, varying.BindLocation, varying.Location); +// } for (unsigned i = 0; i < program->Uniforms->NumUniforms; i++) { const gl_uniform & uniform = program->Uniforms->Uniforms[i]; - printf("uniform '%s': location=%d type=%s \n", uniform.Name, uniform.Pos, uniform.Type->name); + LOGD("uniform '%s': location=%d type=%s \n", uniform.Name, uniform.Pos, uniform.Type->name); } return program->LinkStatus; } @@ -361,8 +365,6 @@ struct SymbolLookupContext { static void* SymbolLookup(void* pContext, const char* name) { SymbolLookupContext * ctx = (SymbolLookupContext *)pContext; -// const gl_shader * shader = ctx->shader; -// const gl_shader_program * program = ctx->program; const GGLState * gglCtx = ctx->gglCtx; const void * symbol = (void*)dlsym(RTLD_DEFAULT, name); if (NULL == symbol) { @@ -371,9 +373,12 @@ static void* SymbolLookup(void* pContext, const char* name) else if (!strcmp(_PF2_TEXTURE_DIMENSIONS_NAME_, name)) symbol = (void *)gglCtx->textureState.textureDimensions; else // attributes, varyings and uniforms are mapped to locations in pointers + { + LOGD("pf2: SymbolLookup unknown symbol: '%s'", name); assert(0); + } } - printf("symbolLookup '%s'=%p \n", name, symbol); +// printf("symbolLookup '%s'=%p \n", name, symbol); assert(symbol); return (void *)symbol; } @@ -384,6 +389,8 @@ static void CodeGen(Instance * instance, const char * mainName, gl_shader * shad SymbolLookupContext ctx = {gglCtx, program, shader}; int result = 0; +// instance->module->dump(); + BCCScriptRef & script = instance->script; script = bccCreateScript(); result = bccReadModule(script, "glsl", (LLVMModuleRef)instance->module, 0); @@ -394,7 +401,7 @@ static void CodeGen(Instance * instance, const char * mainName, gl_shader * shad result = bccGetError(script); if (result != 0) { - puts("failed bcc_compile"); + LOGD("failed bcc_compile"); assert(0); return; } @@ -403,9 +410,11 @@ static void CodeGen(Instance * instance, const char * mainName, gl_shader * shad assert(instance->function); result = bccGetError(script); if (result != BCC_NO_ERROR) - fprintf(stderr, "Could not find '%s': %d\n", "main", result); - else - printf("bcc_compile %s=%p \n", mainName, instance->function); + LOGD("Could not find '%s': %d\n", mainName, result); +// else +// printf("bcc_compile %s=%p \n", mainName, instance->function); + +// assert(0); } void GenerateScanLine(const GGLState * gglCtx, const gl_shader_program * program, llvm::Module * mod, @@ -413,6 +422,7 @@ void GenerateScanLine(const GGLState * gglCtx, const gl_shader_program * program void GGLShaderUse(void * llvmCtx, const GGLState * gglState, gl_shader_program * program) { +// LOGD("%s", program->Shaders[MESA_SHADER_FRAGMENT]->Source); for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) { if (!program->_LinkedShaders[i]) continue; @@ -427,7 +437,7 @@ void GGLShaderUse(void * llvmCtx, const GGLState * gglState, gl_shader_program * GetShaderKey(gglState, shader, &shaderKey); Instance * instance = shader->executable->instances[shaderKey]; if (!instance) { - puts("begin jit new shader"); +// puts("begin jit new shader"); instance = hieralloc_zero(shader->executable, Instance); instance->module = new llvm::Module("glsl", *(llvm::LLVMContext *)llvmCtx); @@ -438,10 +448,72 @@ void GGLShaderUse(void * llvmCtx, const GGLState * gglState, gl_shader_program * strcat(mainName, shaderName); do_mat_op_to_vec(shader->ir); // TODO: move these passes to link? - +//#ifdef __arm__ +// static const char fileName[] = "/data/pf2.txt"; +// FILE * file = freopen(fileName, "w", stdout); +// assert(file); +// *stdout = *file; +// std::ios_base::sync_with_stdio(true); +//#endif +// _mesa_print_ir(shader->ir, NULL); +//#ifdef __arm__ +// fclose(file); +// file = fopen(fileName, "r"); +// assert(file); +// static char str[256]; +// while (!feof(file)) { +// fgets(str, sizeof(str) - 1, file); +// str[sizeof(str) - 1] = 0; +// LOGD("%s", str); +// } +// fclose(file); +//#endif llvm::Module * module = glsl_ir_to_llvm_module(shader->ir, instance->module, gglState, shaderName); if (!module) assert(0); +//#ifdef __arm__ +// static const char fileName[] = "/data/pf2.txt"; +// FILE * file = freopen(fileName, "w", stderr); +// assert(file); +// *stderr = *file; +// std::ios_base::sync_with_stdio(true); +//#endif + +// if (strstr(program->Shaders[MESA_SHADER_FRAGMENT]->Source, +// "gl_FragColor = color * texture2D(sampler, outTexCoords).a;")) { +// if (i == MESA_SHADER_VERTEX) { +// for (unsigned i = 0; i < program->Attributes->NumParameters; i++) { +// const gl_program_parameter & attribute = program->Attributes->Parameters[i]; +// LOGD("attribute '%s': location=%d slots=%d \n", attribute.Name, attribute.Location, attribute.Slots); +// } +// for (unsigned i = 0; i < program->Varying->NumParameters; i++) { +// const gl_program_parameter & varying = program->Varying->Parameters[i]; +// LOGD("varying '%s': vs_location=%d fs_location=%d \n", varying.Name, varying.BindLocation, varying.Location); +// } +// LOGD("%s", program->Shaders[MESA_SHADER_VERTEX]->Source); +// module->dump(); +// } +// } + +//#ifdef __arm__ +// fputs("end of bcc disassembly", stderr); +// fclose(stderr); +// +// file = fopen(fileName, "r"); +// assert(file); +// fseek(file , 0 , SEEK_END); +// long lSize = ftell(file); +// rewind(file); +// assert(0 <= lSize); +// static char str[256]; +// while (!feof(file)) { +// fgets(str, sizeof(str) - 1, file); +// str[sizeof(str) - 1] = 0; +// LOGD("%s", str); +// } +// fclose(file); +//#endif + #if USE_LLVM_SCANLINE if (GL_FRAGMENT_SHADER == shader->Type) { char scanlineName [SCANLINE_KEY_STRING_LEN] = {0}; @@ -451,15 +523,18 @@ void GGLShaderUse(void * llvmCtx, const GGLState * gglState, gl_shader_program * } else #endif CodeGen(instance, mainName, shader, program, gglState); + shader->executable->instances[shaderKey] = instance; - debug_printf("jit new shader '%s'(%p) \n", mainName, instance->function); +// debug_printf("jit new shader '%s'(%p) \n", mainName, instance->function); } else // debug_printf("use cached shader %p \n", instance->function); ; shader->function = instance->function; } - puts("pf2: GGLShaderUse end"); +// puts("pf2: GGLShaderUse end"); + +// assert(0); } static void ShaderUse(GGLInterface * iface, gl_shader_program * program) @@ -471,7 +546,7 @@ static void ShaderUse(GGLInterface * iface, gl_shader_program * program) ctx->CurrentProgram = NULL; return; } - + GGLShaderUse(ctx->llvmCtx, &ctx->state, program); for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) { if (!program->_LinkedShaders[i]) @@ -535,7 +610,7 @@ static void ShaderProgramDelete(GGLInterface * iface, gl_shader_program * progra GGLShaderProgramDelete(program); } -void GGLShaderGetiv(gl_shader_t * shader, const GLenum pname, GLint * params) +void GGLShaderGetiv(const gl_shader_t * shader, const GLenum pname, GLint * params) { switch (pname) { case GL_SHADER_TYPE: @@ -559,7 +634,21 @@ void GGLShaderGetiv(gl_shader_t * shader, const GLenum pname, GLint * params) } } -void GGLShaderProgramGetiv(gl_shader_program_t * program, const GLenum pname, GLint * params) +void GGLShaderGetInfoLog(const gl_shader_t * shader, GLsizei bufsize, GLsizei* length, GLchar* infolog) +{ + unsigned len = 0; + infolog[0] = 0; + if (shader->InfoLog) + { + len = strlen(shader->InfoLog); + strncpy(infolog, shader->InfoLog, bufsize); + infolog[bufsize] = 0; + } + if (length) + *length = strlen(infolog); +} + +void GGLShaderProgramGetiv(const gl_shader_program_t * program, const GLenum pname, GLint * params) { switch (pname) { case GL_DELETE_STATUS: @@ -592,6 +681,20 @@ void GGLShaderProgramGetiv(gl_shader_program_t * program, const GLenum pname, GL } } +void GGLShaderProgramGetInfoLog(const gl_shader_program_t * program, GLsizei bufsize, GLsizei* length, GLchar* infolog) +{ + unsigned len = 0; + infolog[0] = 0; + if (program->InfoLog) + { + len = strlen(program->InfoLog); + strncpy(infolog, program->InfoLog, bufsize); + infolog[bufsize] = 0; + } + if (length) + *length = strlen(infolog); +} + void GGLShaderAttributeBind(const gl_shader_program * program, GLuint index, const GLchar * name) { int i = _mesa_add_parameter(program->Attributes, name); @@ -647,16 +750,21 @@ void GGLShaderUniformGetiv(gl_shader_program * program, GLint location, GLint * params[3] = uniform[3]; } -void GGLShaderUniformGetSamplers(const gl_shader_program_t * program, - int sampler2tmu[GGL_MAXCOMBINEDTEXTUREIMAGEUNITS]) +void GGLShaderUniformGetSamplers(const gl_shader_program_t * program, + int sampler2tmu[GGL_MAXCOMBINEDTEXTUREIMAGEUNITS]) { - memset(sampler2tmu, 0xff, sizeof sampler2tmu); - for (unsigned i = 0; i < program->Uniforms->NumUniforms; i++) - { +// LOGD("%s", program->Shaders[MESA_SHADER_FRAGMENT]->Source); +// for (unsigned i = 0; i < program->Uniforms->Slots + program->Uniforms->SamplerSlots; i++) +// LOGD("%d: %.2f \t %.2f \t %.2f \t %.2f", i, program->ValuesUniform[i][0], program->ValuesUniform[i][1], +// program->ValuesUniform[i][2], program->ValuesUniform[i][3]); + for (unsigned i = 0; i < GGL_MAXCOMBINEDTEXTUREIMAGEUNITS; i++) + sampler2tmu[i] = -1; + for (unsigned i = 0; i < program->Uniforms->NumUniforms; i++) { const gl_uniform & uniform = program->Uniforms->Uniforms[i]; - if (uniform.Type->is_sampler()) - sampler2tmu[uniform.Pos] = program->ValuesUniform[i][0]; - else if (uniform.Type->is_array() && uniform.Type->fields.array->is_sampler()) + if (uniform.Type->is_sampler()) { +// LOGD("%d uniform.Pos=%d tmu=%d", program->Uniforms->Slots, uniform.Pos, (int)program->ValuesUniform[program->Uniforms->Slots + uniform.Pos][0]); + sampler2tmu[uniform.Pos] = program->ValuesUniform[program->Uniforms->Slots + uniform.Pos][0]; + } else if (uniform.Type->is_array() && uniform.Type->fields.array->is_sampler()) assert(0); } } @@ -664,14 +772,28 @@ void GGLShaderUniformGetSamplers(const gl_shader_program_t * program, GLint GGLShaderUniform(gl_shader_program * program, GLint location, GLsizei count, const GLvoid *values, GLenum type) { +// LOGD("pf2: GGLShaderUniform location=%d count=%d type=0x%.4X", location, count, type); // TODO: sampler uniform and type checking if (!program) { //gglError(GL_INVALID_OPERATION); return -2; } + if (-1 == location) + return -1; assert(0 <= location && program->Uniforms->NumUniforms > location); - const gl_uniform & unifrom = program->Uniforms->Uniforms[location]; - int start = unifrom.Pos; + const gl_uniform & uniform = program->Uniforms->Uniforms[location]; + int start = -1; + if (uniform.Type->is_sampler()) + { + start = uniform.Pos + program->Uniforms->Slots; + assert(GL_INT == type && 1 == count); + program->ValuesUniform[start][0] = *(float *)values; + return uniform.Pos; + } + else if (uniform.Type->is_array() && uniform.Type->fields.array->is_sampler()) { + assert(0); // not implemented + } else + start = uniform.Pos; int slots = 0, elems = 0; switch (type) { case GL_INT: @@ -701,16 +823,14 @@ GLint GGLShaderUniform(gl_shader_program * program, GLint location, GLsizei coun default: assert(0); } - if (0 < start) - return -2; +// LOGD("pf2: GGLShaderUniform start=%d slots=%d elems=%d", start, slots, elems); + if (0 > start) + assert(0); if (start + slots > program->Uniforms->Slots) - return -2; + assert(0); for (int i = 0; i < slots; i++) memcpy(program->ValuesUniform + start + i, values, elems * sizeof(float)); - if (unifrom.Type->is_sampler()) - return program->ValuesUniform[start][0]; - else if (unifrom.Type->is_array() && unifrom.Type->fields.array->is_sampler()) - assert(0); +// LOGD("pf2: GGLShaderUniform copied"); return -2; } @@ -719,6 +839,7 @@ void GGLShaderUniformMatrix(gl_shader_program * program, GLint cols, GLint rows, { if (location == -1) return; + assert(!transpose); assert(cols == rows); assert(0 <= location && program->Uniforms->NumUniforms > location); int start = program->Uniforms->Uniforms[location].Pos; @@ -728,8 +849,19 @@ void GGLShaderUniformMatrix(gl_shader_program * program, GLint cols, GLint rows, for (unsigned i = 0; i < slots; i++) { float * column = program->ValuesUniform[start + i]; for (unsigned j = 0; j < rows; j++) - column[j] = *(values++); + column[j] = values[i * 4 + j]; } + +// if (!strstr(program->Shaders[MESA_SHADER_FRAGMENT]->Source, +// "gl_FragColor = color * texture2D(sampler, outTexCoords).a;")) +// return; +// +// LOGD("pf2: GGLShaderUniformMatrix location=%d cols=%d count=%d", location, cols, count); +// +// for (unsigned i = 0; i < 4; i++) +// LOGD("pf2: GGLShaderUniformMatrix %.2f \t %.2f \t %.2f \t %.2f \n", values[i * 4 + 0], +// values[i * 4 + 1], values[i * 4 + 2], values[i * 4 + 3]); + } static void ShaderVerifyProcessVertex(const GGLInterface * iface, const VertexInput * input, @@ -814,7 +946,9 @@ void InitializeShaderFunctions(struct GGLInterface * iface) iface->ShaderUse = ShaderUse; iface->ShaderProgramDelete = ShaderProgramDelete; iface->ShaderGetiv = GGLShaderGetiv; + iface->ShaderGetInfoLog = GGLShaderGetInfoLog; iface->ShaderProgramGetiv = GGLShaderProgramGetiv; + iface->ShaderProgramGetInfoLog = GGLShaderProgramGetInfoLog; iface->ShaderAttributeBind = GGLShaderAttributeBind; iface->ShaderAttributeLocation = GGLShaderAttributeLocation; iface->ShaderVaryingLocation = GGLShaderVaryingLocation; |