diff options
author | David Li <davidxli@google.com> | 2011-02-02 12:17:56 -0800 |
---|---|---|
committer | David Li <davidxli@google.com> | 2011-02-02 12:17:56 -0800 |
commit | e82376d380005c21cb70637d42104fcd4d652843 (patch) | |
tree | be9d0953aba7bac2c09564e0955d24c71e92653c | |
parent | d274f94df69a016386195efcf0640802c7d7d2dc (diff) |
Checkpoint on functional shader functions in pixelfinger2.
Need TODO: state change, scanline codegen
Signed-off-by: David Li <davidxli@google.com>
-rw-r--r-- | include/pixelflinger2/pixelflinger2_interface.h | 6 | ||||
-rw-r--r-- | src/glsl/ir_to_llvm.cpp | 46 | ||||
-rw-r--r-- | src/glsl/ir_to_llvm.h | 3 | ||||
-rw-r--r-- | src/glsl/linker.cpp | 13 | ||||
-rw-r--r-- | src/glsl/main.cpp | 214 | ||||
-rw-r--r-- | src/mesa/main/mtypes.h | 6 | ||||
-rw-r--r-- | src/pixelflinger2/pixelflinger2.h | 215 | ||||
-rw-r--r-- | src/pixelflinger2/raster.cpp | 526 | ||||
-rw-r--r-- | src/pixelflinger2/scanline.cpp | 879 | ||||
-rw-r--r-- | src/pixelflinger2/shader.cpp | 367 |
10 files changed, 1119 insertions, 1156 deletions
diff --git a/include/pixelflinger2/pixelflinger2_interface.h b/include/pixelflinger2/pixelflinger2_interface.h index 0be62a6..e597d07 100644 --- a/include/pixelflinger2/pixelflinger2_interface.h +++ b/include/pixelflinger2/pixelflinger2_interface.h @@ -152,13 +152,13 @@ struct GGLInterface { // creates empty program gl_shader_program_t * (* ShaderProgramCreate)(const GGLInterface_t * iface); - + // attaches a shader to program void (* ShaderAttach)(const GGLInterface * iface, gl_shader_program_t * program, gl_shader_t * shader); - + // detaches a shader from program void (* ShaderDetach)(const GGLInterface * iface, gl_shader_program_t * program, gl_shader_t * shader); - + // duplicates shaders to program, and links varyings / attributes; can link 1 shader GLboolean (* ShaderProgramLink)(const GGLInterface_t * iface, gl_shader_program_t * program, char ** infoLog); diff --git a/src/glsl/ir_to_llvm.cpp b/src/glsl/ir_to_llvm.cpp index 482d898..c069712 100644 --- a/src/glsl/ir_to_llvm.cpp +++ b/src/glsl/ir_to_llvm.cpp @@ -61,6 +61,7 @@ using namespace tr1; #include "ir.h" #include "ir_visitor.h" #include "glsl_types.h" +#include "src/mesa/main/mtypes.h" struct GGLContext; @@ -84,10 +85,11 @@ public: llvm::IRBuilder<> bld; const GGLContext * gglCtx; - - ir_to_llvm_visitor(llvm::LLVMContext& p_ctx, llvm::Module* p_mod, const GGLContext * GGLCtx) - : ctx(p_ctx), mod(p_mod), fun(0), loop(std::make_pair((llvm::BasicBlock*)0, - (llvm::BasicBlock*)0)), bb(0), bld(ctx), gglCtx(GGLCtx) + const char * shaderSuffix; + + ir_to_llvm_visitor(llvm::Module* p_mod, const GGLContext * GGLCtx, const char * suffix) + : ctx(p_mod->getContext()), mod(p_mod), fun(0), loop(std::make_pair((llvm::BasicBlock*)0, + (llvm::BasicBlock*)0)), bb(0), bld(ctx), gglCtx(GGLCtx), shaderSuffix(suffix) { } @@ -181,18 +183,24 @@ public: } } - typedef std::map<ir_function_signature*, llvm::Function*> llvm_functions_t; + //typedef std::map<ir_function_signature*, llvm::Function*> llvm_functions_t; //typedef std::unordered_map<ir_function_signature*, llvm::Function*> llvm_functions_t; - llvm_functions_t llvm_functions; + //llvm_functions_t llvm_functions; llvm::Function* llvm_function(class ir_function_signature* sig) { - llvm_functions_t::iterator funi = llvm_functions.find(sig); - if(funi != llvm_functions.end()) - return funi->second; + const char* name = sig->function_name(); + char * functionName = (char *)malloc(strlen(name) + strlen(shaderSuffix) + 1); + strcpy(functionName, name); + strcat(functionName, shaderSuffix); + llvm::Function * function = mod->getFunction(functionName); + if (function) + { + free(functionName); + return function; + } else { - const char* name = sig->function_name(); llvm::Function::LinkageTypes linkage; if(!strcmp(name, "main") || !sig->is_defined) linkage = llvm::Function::ExternalLinkage; @@ -205,12 +213,10 @@ public: } llvm::FunctionType* ft = llvm::FunctionType::get(llvm_type(sig->return_type), params, false); - - llvm::Function* f = llvm::Function::Create(ft, linkage, name, mod); - llvm_functions[sig] = f; - return f; + function = llvm::Function::Create(ft, linkage, functionName, mod); + free(functionName); + return function; } - } llvm::Value* llvm_value(class ir_instruction* ir) @@ -1257,20 +1263,18 @@ public: }; struct llvm::Module * -glsl_ir_to_llvm_module(struct exec_list *ir, const GGLContext * gglCtx) +glsl_ir_to_llvm_module(struct exec_list *ir, llvm::Module * mod, + const struct GGLContext * gglCtx, const char * shaderSuffix) { - llvm::LLVMContext& ctx = llvm::getGlobalContext(); - llvm::Module* mod = new llvm::Module("glsl", ctx); - ir_to_llvm_visitor v(ctx, mod, gglCtx); + ir_to_llvm_visitor v(mod, gglCtx, shaderSuffix); visit_exec_list(ir, &v); // mod->dump(); if(llvm::verifyModule(*mod, llvm::PrintMessageAction, 0)) { - delete mod; assert(0); - return 0; + return NULL; } return mod; diff --git a/src/glsl/ir_to_llvm.h b/src/glsl/ir_to_llvm.h index fa2154c..5a3cc44 100644 --- a/src/glsl/ir_to_llvm.h +++ b/src/glsl/ir_to_llvm.h @@ -4,6 +4,7 @@ #include "llvm/Module.h" #include "ir.h" -struct llvm::Module * glsl_ir_to_llvm_module(struct exec_list *ir, const struct GGLContext * gglCtx); +struct llvm::Module * glsl_ir_to_llvm_module(struct exec_list *ir, llvm::Module * mod, + const struct GGLContext * gglCtx, const char * shaderSuffix); #endif /* IR_TO_LLVM_H_ */ diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp index 18e5154..9c57339 100644 --- a/src/glsl/linker.cpp +++ b/src/glsl/linker.cpp @@ -989,7 +989,7 @@ update_array_sizes(struct gl_shader_program *prog) static int // returns location assigned add_uniform(void *mem_ctx, exec_list *uniforms, struct hash_table *ht, const char *name, const glsl_type *type, GLenum shader_type, - unsigned *next_shader_pos, unsigned *total_uniforms, unsigned *next_sampler_pos) + unsigned *next_shader_pos, unsigned *total_uniforms, unsigned *next_sampler_pos, unsigned * samplers_used) { int index = -1; if (type->is_record()) { @@ -999,7 +999,7 @@ add_uniform(void *mem_ctx, exec_list *uniforms, struct hash_table *ht, type->fields.structure[i].name); int firstIndex = add_uniform(mem_ctx, uniforms, ht, field_name, field_type, - shader_type, next_shader_pos, total_uniforms, next_sampler_pos); + shader_type, next_shader_pos, total_uniforms, next_sampler_pos, samplers_used); if (i == 0) index = firstIndex; } @@ -1015,7 +1015,7 @@ add_uniform(void *mem_ctx, exec_list *uniforms, struct hash_table *ht, for (unsigned int i = 0; i < type->length; i++) { char *elem_name = hieralloc_asprintf(mem_ctx, "%s[%d]", name, i); int firstIndex = add_uniform(mem_ctx, uniforms, ht, elem_name, array_elem_type, - shader_type, next_shader_pos, total_uniforms, next_sampler_pos); + shader_type, next_shader_pos, total_uniforms, next_sampler_pos, samplers_used); if (i == 0) index = firstIndex; } @@ -1056,6 +1056,9 @@ add_uniform(void *mem_ctx, exec_list *uniforms, struct hash_table *ht, hash_table_insert(ht, n, name); uniforms->push_tail(&n->link); } + + if (type->is_sampler() || (array_elem_type && array_elem_type->is_sampler())) + (*samplers_used) |= 1 << n->u->Pos; index = n->u->Pos; } return index; @@ -1078,7 +1081,7 @@ assign_uniform_locations(struct gl_shader_program *prog) if (prog->_LinkedShaders[i] == NULL) continue; - + prog->_LinkedShaders[i]->SamplersUsed = 0; foreach_list(node, prog->_LinkedShaders[i]->ir) { ir_variable *const var = ((ir_instruction *) node)->as_variable(); @@ -1095,7 +1098,7 @@ assign_uniform_locations(struct gl_shader_program *prog) var->location = add_uniform(mem_ctx, &uniforms, ht, var->name, var->type, prog->_LinkedShaders[i]->Type, - &next_position, &total_uniforms, &next_sampler_pos); + &next_position, &total_uniforms, &next_sampler_pos, &prog->_LinkedShaders[i]->SamplersUsed); } } diff --git a/src/glsl/main.cpp b/src/glsl/main.cpp index 2b06280..2ce7ed6 100644 --- a/src/glsl/main.cpp +++ b/src/glsl/main.cpp @@ -190,12 +190,6 @@ compile_shader(struct gl_context *ctx, struct gl_shader *shader) return; } -struct SymbolLookupContext -{ - const GGLContext * gglCtx; - const gl_shader_program * program; - const gl_shader * shader; -}; #define DRAW_TO_SCREEN 1 #include "image_file.h" @@ -206,9 +200,9 @@ extern "C" void * PresentDrawingSurface(); extern "C" void DisposeDrawingSurface(); #endif -void execute(SymbolLookupContext * ctx) +void execute(const GGLContext * ctx) { - const gl_shader * shader = ctx->shader; + const gl_shader * shader = ctx->glCtx->CurrentProgram->_LinkedShaders[MESA_SHADER_FRAGMENT]; #if defined __arm__ && DRAW_TO_SCREEN unsigned width = 0, height = 0, bpp = 0; int err = SetupDrawingSurface(&width, &height, &bpp); @@ -220,13 +214,13 @@ void execute(SymbolLookupContext * ctx) unsigned * frameSurface = new unsigned [width * height]; #endif //const unsigned scale = 16, portWidth = 80, portHeight = 50; - //unsigned scale = 1, portWidth = width / scale, portHeight = height / scale; - unsigned scale = 1, portWidth = width / 4, portHeight = height / 4; + unsigned scale = 1, portWidth = width / scale, portHeight = height / scale; + //unsigned scale = 1, portWidth = width / 4, portHeight = height / 4; - float * uniform = (float *)ctx->program->ValuesUniform; - float * attribute = (float *)ctx->program->ValuesVertexInput; - float * varying = (float *)ctx->program->ValuesVertexOutput; - float * output = ((VertexOutput*)ctx->program->ValuesVertexOutput)->fragColor[0].f; + float * uniform = (float *)ctx->glCtx->CurrentProgram->ValuesUniform; + float * attribute = (float *)ctx->glCtx->CurrentProgram->ValuesVertexInput; + float * varying = (float *)ctx->glCtx->CurrentProgram->ValuesVertexOutput; + float * output = ((VertexOutput*)ctx->glCtx->CurrentProgram->ValuesVertexOutput)->fragColor[0].f; int glFragColorLocation = 0; int vTexCoordLocation = -1; if (shader->symbols->get_variable("vTexCoord")) @@ -328,144 +322,6 @@ void execute(SymbolLookupContext * ctx) } -#if USE_LLVM_EXECUTIONENGINE - -#include <llvm/ExecutionEngine/JIT.h> -#include <llvm/Target/TargetSelect.h> - -/*void jit(llvm::Module * mod, gl_shader * shader) -{ -#ifndef __arm__ - __attribute__ ((aligned (16))) // LLVM generates movaps on X86, needs 16 bytes align -#endif - float data [64]; - memset(data, 0xff, sizeof(data)); - - llvm::InitializeNativeTarget(); - - std::string errorString; - llvm::EngineBuilder engineBuilder(mod); - engineBuilder.setEngineKind(llvm::EngineKind::JIT); - engineBuilder.setErrorStr(&errorString); -#ifdef __arm__ - engineBuilder.setMAttrs(llvm::SmallVector<std::string, 1>(1,"vfp3")); - mod->setTargetTriple("armv7-none-linux-gnueabi"); -#endif - - llvm::ExecutionEngine * ee = engineBuilder.create(); - if (!ee) - puts(errorString.c_str()); - assert(ee); - - ee->DisableLazyCompilation(); - - if ((mod->getFunction("putchar"))) - ee->updateGlobalMapping(mod->getFunction("putchar"), (void *)putchar); - if ((mod->getFunction("sinf"))) - ee->updateGlobalMapping(mod->getFunction("sinf"), (void *)sinf); - if ((mod->getFunction("cosf"))) - ee->updateGlobalMapping(mod->getFunction("cosf"), (void *)cosf); - if ((mod->getFunction("powf"))) - ee->updateGlobalMapping(mod->getFunction("powf"), (void *)cosf); - - ee->updateGlobalMapping(mod->getGlobalVariable("gl_FragColor"), (void *)(data + 0)); - ee->updateGlobalMapping(mod->getGlobalVariable("gl_FragCoord"), (void *)(data + 4)); - ee->updateGlobalMapping(mod->getGlobalVariable("gl_FrontFacing"), (void *)(data + 8)); - ee->updateGlobalMapping(mod->getGlobalVariable("vTexCoord"), (void *)(data + 12)); - ee->updateGlobalMapping(mod->getGlobalVariable("t"), (void *)(data + 36)); - - llvm::Function * func = mod->getFunction("main"); - assert(func); - - void (* function)() = (void (*)())ee->getPointerToFunction(func); - execute(function, data); - puts("USE_LLVM_EXECUTIONENGINE"); -}*/ - -#else - -#include <bcc/bcc.h> -#include <dlfcn.h> - -static void* symbolLookup(void* pContext, const char* name) -{ - SymbolLookupContext * ctx = (SymbolLookupContext *)pContext; - const gl_shader * shader = ctx->shader; - const gl_shader_program * program = ctx->program; - const GGLContext * gglCtx = ctx->gglCtx; - const void * symbol = (void*)dlsym(RTLD_DEFAULT, name); - if (NULL == symbol) { - if (!strcmp(_PF2_TEXTURE_DATA_NAME_, name)) - symbol = (void *)gglCtx->textureState.textureData; - else if (!strcmp(_PF2_TEXTURE_DIMENSIONS_NAME_, name)) - symbol = (void *)gglCtx->textureState.textureDimensions; - else - { - for (unsigned i = 0; i < program->Uniforms->NumUniforms && !symbol; i++) - if (!strcmp(program->Uniforms->Uniforms[i].Name, name)) - symbol = program->ValuesUniform + program->Uniforms->Uniforms[i].Pos; - for (unsigned i = 0; i < program->Attributes->NumParameters && !symbol; i++) - if (!strcmp(program->Attributes->Parameters[i].Name, name)) - { - assert(program->Attributes->Parameters[i].Location - < sizeof(VertexInput) / sizeof(float[4])); - symbol = program->ValuesVertexInput + program->Attributes->Parameters[i].Location; - } - for (unsigned i = 0; i < program->Varying->NumParameters && !symbol; i++) - if (!strcmp(program->Varying->Parameters[i].Name, name)) - { - int index = -1; - if (GL_VERTEX_SHADER == shader->Type) - index = program->Varying->Parameters[i].BindLocation; - else if (GL_FRAGMENT_SHADER == shader->Type) - index = program->Varying->Parameters[i].Location; - else - assert(0); - assert(index >= 0); - assert(index < sizeof(VertexOutput) / sizeof(float[4])); - symbol = program->ValuesVertexOutput + index; - } - assert(symbol >= program->ValuesVertexInput && - symbol < (char *)program->ValuesUniform + 16 * program->Uniforms->Slots - 3); - }; - } - printf("symbolLookup '%s'=%p \n", name, symbol); - //getchar(); - assert(symbol); - return (void *)symbol; -} - -void jit(gl_shader * shader, gl_shader_program * program, const GGLContext * gglCtx) -{ - SymbolLookupContext ctx = {gglCtx, program, shader}; - - BCCScriptRef script = bccCreateScript(); - bccReadModule(script, "glsl", (LLVMModuleRef)shader->module, 0); - int result = 0; - assert(0 == bccGetError(script)); - bccRegisterSymbolCallback(script, symbolLookup, &ctx); - assert(0 == bccGetError(script)); - bccPrepareExecutable(script, NULL, 0); - result = bccGetError(script); - if (result != 0) { - puts("failed bcc_compile"); - assert(0); - return; - } - - shader->function = (void (*)())bccGetFuncAddr(script, "main"); - result = bccGetError(script); - if (result != BCC_NO_ERROR) - fprintf(stderr, "Could not find '%s': %d\n", "main", result); - else - printf("bcc_compile %s=%p \n", "main", shader->function); - - if (GL_FRAGMENT_SHADER == shader->Type) - execute(&ctx); -} - -#endif - int main(int argc, char **argv) { @@ -571,46 +427,48 @@ main(int argc, char **argv) } puts("jit"); - + GGLTexture texture = {0}; LoadTGA(texturePath, &texture.width, &texture.height, &texture.levels); texture.format = GGL_PIXEL_FORMAT_RGBA_8888; texture.type = GL_TEXTURE_2D; texture.levelCount = 1; texture.wrapS = texture.wrapT = 0; // repeat = 0 fastest, clamp = 1, mirrored = 2 + texture.minFilter = texture.magFilter = 0; // nearest = 0, linear = 1 + ggl->SetSampler(ggl, 0, &texture); + + ggl->ShaderUse(ggl, program); + texture.minFilter = texture.magFilter = 1; // nearest = 0, linear = 1 ggl->SetSampler(ggl, 0, &texture); + ggl->ShaderUse(ggl, program); + static unsigned cubeTextureSurface [6] = {0xff0000ff, 0xff00ff00, 0xffff0000, 0xff00ffff, 0xffffff00, 0xffff00ff}; GGLTexture cubeTexture = {GL_TEXTURE_CUBE_MAP, GGL_PIXEL_FORMAT_RGBA_8888, 1, 1, 1, cubeTextureSurface, 1, 2, 1, 1}; - for (unsigned i = 0; do_jit && i < MESA_SHADER_TYPES; i++) { - struct gl_shader *shader = program->_LinkedShaders[i]; - if (!shader) - continue; - ir_variable * sampler = NULL; - if ((sampler = shader->symbols->get_variable("samp2D")) && sampler->location >= 0) - ggl->SetSampler(ggl, sampler->location, &texture); - if ((sampler = shader->symbols->get_variable("samp2DA")) && sampler->location >= 0) - ggl->SetSampler(ggl, sampler->location, &texture); - if ((sampler = shader->symbols->get_variable("sampCube")) && sampler->location >= 0) - ggl->SetSampler(ggl, sampler->location, &cubeTexture); - - do_mat_op_to_vec(shader->ir); - - puts("\n *** IR for JIT *** \n"); - //_mesa_print_ir(ir, NULL); - llvm::Module * module = glsl_ir_to_llvm_module(shader->ir, (GGLContext *)ggl); - assert(module); - shader->module = module; - puts("\n *** Module for JIT *** \n"); - //module->dump(); - jit(shader, program, (GGLContext *)ggl); - - puts("jitted"); - } + int samplerLocation = -1; + if (0 <= (samplerLocation = ggl->ShaderUniformLocation(ggl, program, "samp2D"))) + ggl->SetSampler(ggl, samplerLocation, &texture); + if (0 <= (samplerLocation = ggl->ShaderUniformLocation(ggl, program, "samp2DA"))) + ggl->SetSampler(ggl, samplerLocation, &texture); + if (0 <= (samplerLocation = ggl->ShaderUniformLocation(ggl, program, "sampCube"))) + ggl->SetSampler(ggl, samplerLocation, &texture); + + execute((GGLContext *)ggl); +// puts("\n *** IR for JIT *** \n"); +// //_mesa_print_ir(ir, NULL); +// +// shader->executable = hieralloc_zero(shader, Executable); +// llvm::Module * module = glsl_ir_to_llvm_module(shader->ir, (GGLContext *)ggl); +// assert(module); +// shader->executable->module = module; +// puts("\n *** Module for JIT *** \n"); +// //module->dump(); +// jit(shader, program, (GGLContext *)ggl); +// puts("jitted"); free(texture.levels); diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 095835c..b6eba89 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -2078,9 +2078,9 @@ struct gl_shader struct exec_list *ir; struct glsl_symbol_table *symbols; - void * module; - void (*function)(); - + struct Executable * executable; + void (*function)(); /**< the active function */ + unsigned SamplersUsed; /**< bitfield of samplers used by shader */ /** Shaders containing built-in functions that are used for linking. */ struct gl_shader *builtins_to_link[16]; unsigned num_builtins_to_link; diff --git a/src/pixelflinger2/pixelflinger2.h b/src/pixelflinger2/pixelflinger2.h index 54e7a29..cdc2b9c 100644 --- a/src/pixelflinger2/pixelflinger2.h +++ b/src/pixelflinger2/pixelflinger2.h @@ -1,17 +1,17 @@ -/** +/** ** ** Copyright 2010, The Android Open Source Project ** - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at ** - ** http://www.apache.org/licenses/LICENSE-2.0 + ** http://www.apache.org/licenses/LICENSE-2.0 ** - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and ** limitations under the License. */ @@ -19,7 +19,7 @@ #define _PIXELFLINGER2_H_ #define USE_LLVM_TEXTURE_SAMPLER 1 -#define USE_LLVM_SCANLINE 1 +#define USE_LLVM_SCANLINE 0 #ifndef USE_LLVM_EXECUTIONENGINE #define USE_LLVM_EXECUTIONENGINE 0 // 1 to use llvm::Execution, 0 to use libBCC, requires modifying makefile @@ -29,6 +29,8 @@ #include "pixelflinger2/pixelflinger2_interface.h" +#include <string.h> + #ifndef MIN2 # define MIN2(a, b) ((a) < (b) ? (a) : (b)) #endif @@ -36,8 +38,9 @@ # define MAX2(a, b) ((a) > (b) ? (a) : (b)) #endif -namespace llvm { - class LLVMContext; +namespace llvm +{ +class LLVMContext; }; #if !USE_LLVM_SCANLINE @@ -48,97 +51,104 @@ typedef int BlendComp_t; #define GGL_GET_CONST_CONTEXT(context, interface) const GGLContext * context = \ (const GGLContext *)interface; (void)context; -struct GGLContext -{ - GGLInterface interface; // must be first member so that GGLContext * == GGLInterface * - - GGLSurface frameSurface; - GGLSurface depthSurface; - GGLSurface stencilSurface; - - struct gl_context * glCtx; // hieralloc; mesa constants and others used for shader compiling and executing - llvm::LLVMContext * llvmCtx; - - struct - { - int depth; // assuming ieee 754 32 bit float and 32 bit 2's complement int; z_32 - unsigned color; // clear value; rgba_8888 - unsigned stencil; // s_8; repeated to clear 4 pixels at a time - } clearState; - - struct StencilState - { - unsigned char ref, mask; // ref is masked during StencilFuncSeparate - - // GL_NEVER = 0, GL_LESS, GL_EQUAL, GL_LEQUAL, GL_GREATER, GL_NOTEQUAL, GL_GEQUAL, - // GL_ALWAYS; value = GLenum & 0x7 (GLenum is 0x200-0x207) - unsigned char func; // compare function - - // GL_ZERO = 0, GL_KEEP = 1, GL_REPLACE, GL_INCR, GL_DECR, GL_INVERT, GL_INCR_WRAP, - // GL_DECR_WRAP = 7; value = 0 | GLenum - GL_KEEP | GL_INVERT | GLenum - GL_INCR_WRAP - unsigned char sFail, dFail, dPass; // operations - } frontStencil, backStencil; // all affect scanline jit - - mutable struct ActiveStencilState // do not change layout, used in GenerateScanLine - { - unsigned char face; // FRONT = 0, BACK = 1 - unsigned char ref, mask; - } activeStencil; // after primitive assembly, call StencilSelect - - struct BufferState // all affect scanline jit - { - unsigned stencilTest : 1; - unsigned depthTest : 1; - // same as sf/bFunc; GL_NEVER = 0, GL_LESS, GL_EQUAL, GL_LEQUAL, GL_GREATER, GL_NOTEQUAL, - // GL_GEQUAL, GL_ALWAYS = 7; value = GLenum & 0x7 (GLenum is 0x200-0x207) - unsigned depthFunc : 3; - } bufferState; - - struct BlendState // all values affect scanline jit - { +struct GGLContext { + GGLInterface interface; // must be first member so that GGLContext * == GGLInterface * + + GGLSurface frameSurface; + GGLSurface depthSurface; + GGLSurface stencilSurface; + + struct gl_context * glCtx; // hieralloc; mesa constants and others used for shader compiling and executing + llvm::LLVMContext * llvmCtx; + + struct { + int depth; // assuming ieee 754 32 bit float and 32 bit 2's complement int; z_32 + unsigned color; // clear value; rgba_8888 + unsigned stencil; // s_8; repeated to clear 4 pixels at a time + } clearState; + + struct StencilState { + unsigned char ref, mask; // ref is masked during StencilFuncSeparate + + // GL_NEVER = 0, GL_LESS, GL_EQUAL, GL_LEQUAL, GL_GREATER, GL_NOTEQUAL, GL_GEQUAL, + // GL_ALWAYS; value = GLenum & 0x7 (GLenum is 0x200-0x207) + unsigned char func; // compare function + + // GL_ZERO = 0, GL_KEEP = 1, GL_REPLACE, GL_INCR, GL_DECR, GL_INVERT, GL_INCR_WRAP, + // GL_DECR_WRAP = 7; value = 0 | GLenum - GL_KEEP | GL_INVERT | GLenum - GL_INCR_WRAP + unsigned char sFail, dFail, dPass; // operations + } frontStencil, backStencil; // all affect scanline jit + + mutable struct ActiveStencilState { // do not change layout, used in GenerateScanLine + unsigned char face; // FRONT = 0, BACK = 1 + unsigned char ref, mask; + } activeStencil; // after primitive assembly, call StencilSelect + + struct BufferState { // all affect scanline jit +unsigned stencilTest : + 1; +unsigned depthTest : + 1; + // same as sf/bFunc; GL_NEVER = 0, GL_LESS, GL_EQUAL, GL_LEQUAL, GL_GREATER, GL_NOTEQUAL, + // GL_GEQUAL, GL_ALWAYS = 7; value = GLenum & 0x7 (GLenum is 0x200-0x207) +unsigned depthFunc : + 3; + } bufferState; + + struct BlendState { // all values affect scanline jit #if USE_LLVM_SCANLINE - unsigned char color[4]; // rgba[0,255] + unsigned char color[4]; // rgba[0,255] #else - Vec4<BlendComp_t> color; -#endif - - unsigned scf : 4, saf : 4, dcf : 4, daf : 4; // GL_ZERO = 0, GL_ONE, GL_SRC_COLOR = 2, - // GL_ONE_MINUS_SRC_COLOR, GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, - // GL_DST_ALPHA, GL_ONE_MINUS_DST_ALPHA, GL_DST_COLOR, GL_ONE_MINUS_DST_COLOR, - // GL_SRC_ALPHA_SATURATE, GL_CONSTANT_COLOR = 11, GL_ONE_MINUS_CONSTANT_COLOR, - // GL_CONSTANT_ALPHA, GL_ONE_MINUS_CONSTANT_ALPHA; - // value = 0,1 | GLenum - GL_SRC_COLOR + 2 | GLenum - GL_CONSTANT_COLOR + 11 - - unsigned ce : 3, ae : 3; // GL_FUNC_ADD = 0, GL_FUNC_SUBTRACT = 4, - // GL_FUNC_REVERSE_SUBTRACT = 5; value = GLenum - GL_FUNC_ADD - - unsigned enable : 1; - } blendState; - - struct - { - // format affects vs and fs jit - GGLTexture textures[GGL_MAXCOMBINEDTEXTUREIMAGEUNITS]; // the active samplers - // array of pointers to texture surface data; used by LLVM generated texture sampler - void * textureData[GGL_MAXCOMBINEDTEXTUREIMAGEUNITS]; - // array of texture dimensions; used by LLVM generated texture sampler - unsigned textureDimensions[GGL_MAXCOMBINEDTEXTUREIMAGEUNITS * 2]; - } textureState; - - // called by ShaderUse to set to proper rendering functions - void (* PickScanLine)(GGLInterface * iface); - void (* PickRaster)(GGLInterface * iface); - - // viewport params are transformed so that Zw = Zd * f + n - // and Xw/Yw = x/y + Xd/Yd * w/h - struct { VectorComp_t x, y, w, h, n, f; } viewport; // should be moved into libAgl2 - - struct // should be moved into libAgl2 - { - unsigned enable : 1; - unsigned frontFace : 1; // GL_CW = 0, GL_CCW, actual value is GLenum - GL_CW - unsigned cullFace : 2; // GL_FRONT = 0, GL_BACK, GL_FRONT_AND_BACK, value = GLenum - GL_FRONT - } cullState; + Vec4<BlendComp_t> color; +#endif + +unsigned scf : +4, saf : +4, dcf : +4, daf : + 4; // GL_ZERO = 0, GL_ONE, GL_SRC_COLOR = 2, + // GL_ONE_MINUS_SRC_COLOR, GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, + // GL_DST_ALPHA, GL_ONE_MINUS_DST_ALPHA, GL_DST_COLOR, GL_ONE_MINUS_DST_COLOR, + // GL_SRC_ALPHA_SATURATE, GL_CONSTANT_COLOR = 11, GL_ONE_MINUS_CONSTANT_COLOR, + // GL_CONSTANT_ALPHA, GL_ONE_MINUS_CONSTANT_ALPHA; + // value = 0,1 | GLenum - GL_SRC_COLOR + 2 | GLenum - GL_CONSTANT_COLOR + 11 + +unsigned ce : +3, ae : + 3; // GL_FUNC_ADD = 0, GL_FUNC_SUBTRACT = 4, + // GL_FUNC_REVERSE_SUBTRACT = 5; value = GLenum - GL_FUNC_ADD + +unsigned enable : + 1; + } blendState; + + struct { + // format affects vs and fs jit + GGLTexture textures[GGL_MAXCOMBINEDTEXTUREIMAGEUNITS]; // the active samplers + // array of pointers to texture surface data; used by LLVM generated texture sampler + void * textureData[GGL_MAXCOMBINEDTEXTUREIMAGEUNITS]; + // array of texture dimensions; used by LLVM generated texture sampler + unsigned textureDimensions[GGL_MAXCOMBINEDTEXTUREIMAGEUNITS * 2]; + } textureState; + + // called by ShaderUse to set to proper rendering functions + void (* PickScanLine)(GGLInterface * iface); + void (* PickRaster)(GGLInterface * iface); + + // viewport params are transformed so that Zw = Zd * f + n + // and Xw/Yw = x/y + Xd/Yd * w/h + struct { + VectorComp_t x, y, w, h, n, f; + } viewport; // should be moved into libAgl2 + + struct { // should be moved into libAgl2 +unsigned enable : + 1; +unsigned frontFace : + 1; // GL_CW = 0, GL_CCW, actual value is GLenum - GL_CW +unsigned cullFace : + 2; // GL_FRONT = 0, GL_BACK, GL_FRONT_AND_BACK, value = GLenum - GL_FRONT + } cullState; }; #define _PF2_TEXTURE_DATA_NAME_ "gl_PF2TEXTURE_DATA" /* sampler data pointers used by LLVM */ @@ -155,7 +165,6 @@ void InitializeTextureFunctions(GGLInterface * iface); void InitializeShaderFunctions(GGLInterface * iface); // set function pointers and create needed objects void SetShaderVerifyFunctions(GGLInterface * iface); // called by state change functions void DestroyShaderFunctions(GGLInterface * iface); // destroy needed objects -// actual gl_shader and gl_shader_program is created and destroyed by ShaderCreate/Free, -// and ShaderProgramCreate/Free. +// actual gl_shader and gl_shader_program is created and destroyed by Shader(Program)Create/Delete, #endif // #ifndef _PIXELFLINGER2_H_ diff --git a/src/pixelflinger2/raster.cpp b/src/pixelflinger2/raster.cpp index cc2695c..23a5cef 100644 --- a/src/pixelflinger2/raster.cpp +++ b/src/pixelflinger2/raster.cpp @@ -1,25 +1,27 @@ -/** +/** ** ** Copyright 2010, The Android Open Source Project ** - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at ** - ** http://www.apache.org/licenses/LICENSE-2.0 + ** http://www.apache.org/licenses/LICENSE-2.0 ** - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and ** limitations under the License. */ #include <stdlib.h> #include <assert.h> #include <math.h> +#include <string.h> #include "pixelflinger2.h" +#include "src/mesa/main/mtypes.h" #ifdef SHADER_SOA static struct tgsi_exec_machine machine; @@ -27,283 +29,265 @@ static struct tgsi_exec_machine machine; static inline void LerpVector4(const Vector4 * a, const Vector4 * b, const VectorComp_t x, Vector4 * d) __attribute__((always_inline)); -static inline void LerpVector4(const Vector4 * a, const Vector4 * b, +static inline void LerpVector4(const Vector4 * a, const Vector4 * b, const VectorComp_t x, Vector4 * d) { - assert(a != d && b != d); - //d = (b - a) * x + a; - (*d) = (*b); - (*d) -= (*a); - (*d) *= x; - (*d) += (*a); + assert(a != d && b != d); + //d = (b - a) * x + a; + (*d) = (*b); + (*d) -= (*a); + (*d) *= x; + (*d) += (*a); } -static inline void InterpolateVertex(const VertexOutput * a, const VertexOutput * b, const VectorComp_t x, - VertexOutput * v, const unsigned varyingCount) +static inline void InterpolateVertex(const VertexOutput * a, const VertexOutput * b, const VectorComp_t x, + VertexOutput * v, const unsigned varyingCount) { - LerpVector4(&a->position, &b->position, x, &v->position); - for (unsigned i = 0; i < varyingCount; i++) - LerpVector4(a->varyings + i, b->varyings + i, x, v->varyings + i); - LerpVector4(&a->frontFacingPointCoord, &b->frontFacingPointCoord, - x, &v->frontFacingPointCoord); // gl_PointCoord - v->frontFacingPointCoord.y = a->frontFacingPointCoord.y; // gl_FrontFacing not interpolated - + LerpVector4(&a->position, &b->position, x, &v->position); + for (unsigned i = 0; i < varyingCount; i++) + LerpVector4(a->varyings + i, b->varyings + i, x, v->varyings + i); + LerpVector4(&a->frontFacingPointCoord, &b->frontFacingPointCoord, + x, &v->frontFacingPointCoord); // gl_PointCoord + v->frontFacingPointCoord.y = a->frontFacingPointCoord.y; // gl_FrontFacing not interpolated + } -static void ProcessVertex(const GGLInterface * iface, const VertexInput * input, +static void ProcessVertex(const GGLInterface * iface, const VertexInput * input, VertexOutput * output) -{ - GGL_GET_CONST_CONTEXT(ctx, iface); - +{ + GGL_GET_CONST_CONTEXT(ctx, iface); + //#if !USE_LLVM_TEXTURE_SAMPLER // extern const GGLContext * textureGGLContext; // textureGGLContext = ctx; //#endif -// -// const Vector4 * constants = (Vector4 *) +// + memcpy(ctx->glCtx->CurrentProgram->ValuesVertexInput, input, sizeof(*input)); + ctx->glCtx->CurrentProgram->_LinkedShaders[MESA_SHADER_VERTEX]->function(); + memcpy(output, ctx->glCtx->CurrentProgram->ValuesVertexOutput, sizeof(*output)); +// const Vector4 * constants = (Vector4 *) // ctx->glCtx->Shader.CurrentProgram->VertexProgram->Parameters->ParameterValues; // ctx->glCtx->Shader.CurrentProgram->GLVMVP->function(input, output, constants); -// +// //#if !USE_LLVM_TEXTURE_SAMPLER // textureGGLContext = NULL; //#endif } -static void RasterTrapezoid(const GGLInterface * iface, const VertexOutput * tl, +static void RasterTrapezoid(const GGLInterface * iface, const VertexOutput * tl, const VertexOutput * tr, const VertexOutput * bl, const VertexOutput * br) { - GGL_GET_CONST_CONTEXT(ctx, iface); - - assert(tl->position.x <= tr->position.x && bl->position.x <= br->position.x); - assert(tl->position.y <= bl->position.y && tr->position.y <= br->position.y); - assert(fabs(tl->position.y - tr->position.y) < 1 && fabs(bl->position.y - br->position.y) < 1); - - const unsigned width = ctx->frameSurface.width, height = ctx->frameSurface.height; - const unsigned varyingCount = 0;//ctx->glCtx->Shader.CurrentProgram->Varying->NumParameters; - - - // tlv-trv and blv-brv are parallel and horizontal - VertexOutput tlv(*tl), trv(*tr), blv(*bl), brv(*br); - VertexOutput tmp; - - // vertically clip - if ((int)tlv.position.y < 0) - { - InterpolateVertex(&tlv, &blv, (0 - tlv.position.y) / (blv.position.y - tlv.position.y), - &tmp, varyingCount); - tlv = tmp; - } - if ((int)trv.position.y < 0) - { - InterpolateVertex(&trv, &brv, (0 - trv.position.y) / (brv.position.y - trv.position.y), - &tmp, varyingCount); - trv = tmp; - } - if ((int)blv.position.y >= (int)height) - { - InterpolateVertex(&tlv, &blv, (height - 1 - tlv.position.y) / (blv.position.y - tlv.position.y), - &tmp, varyingCount); - blv = tmp; - } - if ((int)brv.position.y >= (int)height) - { - InterpolateVertex(&trv, &brv, (height - 1 - trv.position.y) / (brv.position.y - trv.position.y), - &tmp, varyingCount); - brv = tmp; - } - - // horizontally clip - if ((int)tlv.position.x < 0) - { - InterpolateVertex(&tlv, &trv, (0 - tlv.position.x) / (trv.position.x - tlv.position.x), - &tmp, varyingCount); - tlv = tmp; - } - if ((int)blv.position.x < 0) - { - InterpolateVertex(&blv, &brv, (0 - blv.position.x) / (brv.position.x - blv.position.x), - &tmp, varyingCount); - blv = tmp; - } - if ((int)trv.position.x >= (int)width) - { - InterpolateVertex(&tlv, &trv, (width - 1 - tlv.position.x) / (trv.position.x - tlv.position.x), - &tmp, varyingCount); - trv = tmp; - } - if ((int)brv.position.x >= (int)width) - { - InterpolateVertex(&blv, &brv, (width - 1 - blv.position.x) / (brv.position.x - blv.position.x), - &tmp, varyingCount); - brv = tmp; - } - - const unsigned int startY = tlv.position.y; - const unsigned int endY = blv.position.y; - - if (endY < startY) - return; - - const VectorComp_t yDistInv = VectorComp_t_CTR(1.0f / (endY - startY)); - - // bV and cV are left and right vertices on a horizontal line in quad - // bDx and cDx are iterators from tlv to blv, trv to brv for bV and cV - - VertexOutput bV(tlv), cV(trv); - VertexOutput bDx(blv), cDx(brv); - - for (unsigned i = 0; i < varyingCount; i++) - { - bDx.varyings[i] -= tlv.varyings[i]; - bDx.varyings[i] *= yDistInv; - - cDx.varyings[i] -= trv.varyings[i]; - cDx.varyings[i] *= yDistInv; - } - - bDx.position -= tlv.position; - bDx.position *= yDistInv; - - cDx.position -= trv.position; - cDx.position *= yDistInv; - - bDx.frontFacingPointCoord -= tlv.frontFacingPointCoord; // gl_PointCoord - bDx.frontFacingPointCoord *= yDistInv; - bDx.frontFacingPointCoord.y = VectorComp_t_Zero; // gl_FrontFacing not interpolated - cDx.frontFacingPointCoord -= trv.frontFacingPointCoord; // gl_PointCoord - cDx.frontFacingPointCoord *= yDistInv; - cDx.frontFacingPointCoord.y = VectorComp_t_Zero; // gl_FrontFacing not interpolated - - for (unsigned y = startY; y <= endY; y++) - { - iface->ScanLine(iface, &bV, &cV); - - for (unsigned i = 0; i < varyingCount; i++) - { - bV.varyings[i] += bDx.varyings[i]; - cV.varyings[i] += cDx.varyings[i]; - } - - bV.position += bDx.position; - cV.position += cDx.position; - - bV.frontFacingPointCoord += bDx.frontFacingPointCoord; - cV.frontFacingPointCoord += cDx.frontFacingPointCoord; - } + GGL_GET_CONST_CONTEXT(ctx, iface); + + assert(tl->position.x <= tr->position.x && bl->position.x <= br->position.x); + assert(tl->position.y <= bl->position.y && tr->position.y <= br->position.y); + assert(fabs(tl->position.y - tr->position.y) < 1 && fabs(bl->position.y - br->position.y) < 1); + + const unsigned width = ctx->frameSurface.width, height = ctx->frameSurface.height; + const unsigned varyingCount = 8;//ctx->glCtx->Shader.CurrentProgram->Varying->NumParameters; + + + // tlv-trv and blv-brv are parallel and horizontal + VertexOutput tlv(*tl), trv(*tr), blv(*bl), brv(*br); + VertexOutput tmp; + + // vertically clip + if ((int)tlv.position.y < 0) { + InterpolateVertex(&tlv, &blv, (0 - tlv.position.y) / (blv.position.y - tlv.position.y), + &tmp, varyingCount); + tlv = tmp; + } + if ((int)trv.position.y < 0) { + InterpolateVertex(&trv, &brv, (0 - trv.position.y) / (brv.position.y - trv.position.y), + &tmp, varyingCount); + trv = tmp; + } + if ((int)blv.position.y >= (int)height) { + InterpolateVertex(&tlv, &blv, (height - 1 - tlv.position.y) / (blv.position.y - tlv.position.y), + &tmp, varyingCount); + blv = tmp; + } + if ((int)brv.position.y >= (int)height) { + InterpolateVertex(&trv, &brv, (height - 1 - trv.position.y) / (brv.position.y - trv.position.y), + &tmp, varyingCount); + brv = tmp; + } + + // horizontally clip + if ((int)tlv.position.x < 0) { + InterpolateVertex(&tlv, &trv, (0 - tlv.position.x) / (trv.position.x - tlv.position.x), + &tmp, varyingCount); + tlv = tmp; + } + if ((int)blv.position.x < 0) { + InterpolateVertex(&blv, &brv, (0 - blv.position.x) / (brv.position.x - blv.position.x), + &tmp, varyingCount); + blv = tmp; + } + if ((int)trv.position.x >= (int)width) { + InterpolateVertex(&tlv, &trv, (width - 1 - tlv.position.x) / (trv.position.x - tlv.position.x), + &tmp, varyingCount); + trv = tmp; + } + if ((int)brv.position.x >= (int)width) { + InterpolateVertex(&blv, &brv, (width - 1 - blv.position.x) / (brv.position.x - blv.position.x), + &tmp, varyingCount); + brv = tmp; + } + + const unsigned int startY = tlv.position.y; + const unsigned int endY = blv.position.y; + + if (endY < startY) + return; + + const VectorComp_t yDistInv = VectorComp_t_CTR(1.0f / (endY - startY)); + + // bV and cV are left and right vertices on a horizontal line in quad + // bDx and cDx are iterators from tlv to blv, trv to brv for bV and cV + + VertexOutput bV(tlv), cV(trv); + VertexOutput bDx(blv), cDx(brv); + + for (unsigned i = 0; i < varyingCount; i++) { + bDx.varyings[i] -= tlv.varyings[i]; + bDx.varyings[i] *= yDistInv; + + cDx.varyings[i] -= trv.varyings[i]; + cDx.varyings[i] *= yDistInv; + } + + bDx.position -= tlv.position; + bDx.position *= yDistInv; + + cDx.position -= trv.position; + cDx.position *= yDistInv; + + bDx.frontFacingPointCoord -= tlv.frontFacingPointCoord; // gl_PointCoord + bDx.frontFacingPointCoord *= yDistInv; + bDx.frontFacingPointCoord.y = VectorComp_t_Zero; // gl_FrontFacing not interpolated + cDx.frontFacingPointCoord -= trv.frontFacingPointCoord; // gl_PointCoord + cDx.frontFacingPointCoord *= yDistInv; + cDx.frontFacingPointCoord.y = VectorComp_t_Zero; // gl_FrontFacing not interpolated + + for (unsigned y = startY; y <= endY; y++) { + iface->ScanLine(iface, &bV, &cV); + for (unsigned i = 0; i < varyingCount; i++) { + bV.varyings[i] += bDx.varyings[i]; + cV.varyings[i] += cDx.varyings[i]; + } + bV.position += bDx.position; + cV.position += cDx.position; + bV.frontFacingPointCoord += bDx.frontFacingPointCoord; + cV.frontFacingPointCoord += cDx.frontFacingPointCoord; + } } static void RasterTriangle(const GGLInterface * iface, const VertexOutput * v1, const VertexOutput * v2, const VertexOutput * v3) { - GGL_GET_CONST_CONTEXT(ctx, iface); - const unsigned varyingCount = 0;//ctx->glCtx->Shader.CurrentProgram->Varying->NumParameters; - const unsigned height = ctx->frameSurface.height; - const VertexOutput * a = v1, * b = v2, * d = v3; - //abc is a triangle, bcd is another triangle, they share bc as horizontal edge - //c is between a and d, xy is screen coord - - //first sort 3 vertices by MIN y first - if (v2->position.y < v1->position.y) - { - a = v2; - b = v1; - } - if (v3->position.y < a->position.y) - { - d = b; - b = a; - a = v3; - } - else if (v3->position.y < b->position.y) - { - d = b; - b = v3; - } - - assert(a->position.y <= b->position.y && b->position.y <= d->position.y); - - VertexOutput cVertex; - const VertexOutput* c = &cVertex; - - const VectorComp_t cLerp = (b->position.y - a->position.y) / - MAX2(VectorComp_t_One, (d->position.y - a->position.y)); - // create 4th vertex, same y as b to form two triangles/trapezoids sharing horizontal edge - InterpolateVertex(a, d, cLerp, &cVertex, varyingCount); - - if (c->position.x < b->position.x) - { - const VertexOutput * tmp = c; - c = b; - b = tmp; - } - - if ((int)a->position.y < (int)height && (int)b->position.y >= 0) - RasterTrapezoid(iface, a, a, b, c); - //b->position.y += VectorComp_t_One; - //c->position.y += VectorComp_t_One; - if ((int)b->position.y < (int)height && (int)d->position.y >= 0) - RasterTrapezoid(iface, b, c, d, d); + GGL_GET_CONST_CONTEXT(ctx, iface); + const unsigned varyingCount = 0;//ctx->glCtx->Shader.CurrentProgram->Varying->NumParameters; + const unsigned height = ctx->frameSurface.height; + const VertexOutput * a = v1, * b = v2, * d = v3; + //abc is a triangle, bcd is another triangle, they share bc as horizontal edge + //c is between a and d, xy is screen coord + + //first sort 3 vertices by MIN y first + if (v2->position.y < v1->position.y) { + a = v2; + b = v1; + } + if (v3->position.y < a->position.y) { + d = b; + b = a; + a = v3; + } else if (v3->position.y < b->position.y) { + d = b; + b = v3; + } + + assert(a->position.y <= b->position.y && b->position.y <= d->position.y); + + VertexOutput cVertex; + const VertexOutput* c = &cVertex; + + const VectorComp_t cLerp = (b->position.y - a->position.y) / + MAX2(VectorComp_t_One, (d->position.y - a->position.y)); + // create 4th vertex, same y as b to form two triangles/trapezoids sharing horizontal edge + InterpolateVertex(a, d, cLerp, &cVertex, varyingCount); + + if (c->position.x < b->position.x) { + const VertexOutput * tmp = c; + c = b; + b = tmp; + } + + if ((int)a->position.y < (int)height && (int)b->position.y >= 0) + RasterTrapezoid(iface, a, a, b, c); + //b->position.y += VectorComp_t_One; + //c->position.y += VectorComp_t_One; + if ((int)b->position.y < (int)height && (int)d->position.y >= 0) + RasterTrapezoid(iface, b, c, d, d); } -static void DrawTriangle(const GGLInterface * iface, const VertexInput * vin1, - const VertexInput * vin2, const VertexInput * vin3) +static void DrawTriangle(const GGLInterface * iface, const VertexInput * vin1, + const VertexInput * vin2, const VertexInput * vin3) { - GGL_GET_CONST_CONTEXT(ctx, iface); - - VertexOutput vouts[3]; - VertexOutput * v1 = vouts + 0, * v2 = vouts + 1, * v3 = vouts + 2; + GGL_GET_CONST_CONTEXT(ctx, iface); + + VertexOutput vouts[3]; + VertexOutput * v1 = vouts + 0, * v2 = vouts + 1, * v3 = vouts + 2; #ifdef SHADER_SOA - assert(0); // not implemented + assert(0); // not implemented #endif - - iface->ProcessVertex(iface, vin1, v1); - iface->ProcessVertex(iface, vin2, v2); - iface->ProcessVertex(iface, vin3, v3); - - v1->position /= v1->position.w; - v2->position /= v2->position.w; - v3->position /= v3->position.w; - - iface->ViewportTransform(iface, &v1->position); - iface->ViewportTransform(iface, &v2->position); - iface->ViewportTransform(iface, &v3->position); - - VectorComp_t area; - area = v1->position.x * v2->position.y - v2->position.x * v1->position.y; - area += v2->position.x * v3->position.y - v3->position.x * v2->position.y; - area += v3->position.x * v1->position.y - v1->position.x * v3->position.y; - area *= 0.5f; - - if (GL_CCW == ctx->cullState.frontFace + GL_CW) - (unsigned &)area ^= 0x80000000; - - if (ctx->cullState.enable) - { - switch(ctx->cullState.cullFace + GL_FRONT) - { - case GL_FRONT: - if (!((unsigned &)area & 0x80000000)) // +ve, front facing - return; - break; - case GL_BACK: - if ((unsigned &)area & 0x80000000) // -ve, back facing - return; - break; - case GL_FRONT_AND_BACK: - return; - default: - assert(0); - } - } - - v1->frontFacingPointCoord.y = v2->frontFacingPointCoord.y = - v3->frontFacingPointCoord.y = !((unsigned &)area & 0x80000000) ? - VectorComp_t_One : VectorComp_t_Zero; - - iface->StencilSelect(iface, ((unsigned &)area & 0x80000000) ? GL_BACK : GL_FRONT); - + + iface->ProcessVertex(iface, vin1, v1); + iface->ProcessVertex(iface, vin2, v2); + iface->ProcessVertex(iface, vin3, v3); + + v1->position /= v1->position.w; + v2->position /= v2->position.w; + v3->position /= v3->position.w; + + iface->ViewportTransform(iface, &v1->position); + iface->ViewportTransform(iface, &v2->position); + iface->ViewportTransform(iface, &v3->position); + + VectorComp_t area; + area = v1->position.x * v2->position.y - v2->position.x * v1->position.y; + area += v2->position.x * v3->position.y - v3->position.x * v2->position.y; + area += v3->position.x * v1->position.y - v1->position.x * v3->position.y; + area *= 0.5f; + + if (GL_CCW == ctx->cullState.frontFace + GL_CW) + (unsigned &)area ^= 0x80000000; + + if (ctx->cullState.enable) { + switch (ctx->cullState.cullFace + GL_FRONT) { + case GL_FRONT: + if (!((unsigned &)area & 0x80000000)) // +ve, front facing + return; + break; + case GL_BACK: + if ((unsigned &)area & 0x80000000) // -ve, back facing + return; + break; + case GL_FRONT_AND_BACK: + return; + default: + assert(0); + } + } + + v1->frontFacingPointCoord.y = v2->frontFacingPointCoord.y = + v3->frontFacingPointCoord.y = !((unsigned &)area & 0x80000000) ? + VectorComp_t_One : VectorComp_t_Zero; + + iface->StencilSelect(iface, ((unsigned &)area & 0x80000000) ? GL_BACK : GL_FRONT); + // if (0) // { // GGLContext * ctx =(GGLContext *)iface; @@ -324,35 +308,35 @@ static void DrawTriangle(const GGLInterface * iface, const VertexInput * vin1, // ctx->textureState.textureDimensions[sampler * 2] /= 2; // ctx->textureState.textureDimensions[sampler * 2 + 1] += 1; // ctx->textureState.textureDimensions[sampler * 2 + 1] /= 2; -// } +// } // } // } - - // TODO DXL view frustum clipping - iface->RasterTriangle(iface, v1, v2, v3); - + + // TODO DXL view frustum clipping + iface->RasterTriangle(iface, v1, v2, v3); + } static void PickRaster(GGLInterface * iface) { - iface->ProcessVertex = ProcessVertex; - iface->DrawTriangle = DrawTriangle; - iface->RasterTriangle = RasterTriangle; - iface->RasterTrapezoid = RasterTrapezoid; + iface->ProcessVertex = ProcessVertex; + iface->DrawTriangle = DrawTriangle; + iface->RasterTriangle = RasterTriangle; + iface->RasterTrapezoid = RasterTrapezoid; } -static void ViewportTransform(const GGLInterface * iface, Vector4 * v) +static void ViewportTransform(const GGLInterface * iface, Vector4 * v) { - GGL_GET_CONST_CONTEXT(ctx, iface); - v->x = v->x * ctx->viewport.w + ctx->viewport.x; - v->y = v->y * ctx->viewport.h + ctx->viewport.y; - v->z = v->z * ctx->viewport.f + ctx->viewport.n; + GGL_GET_CONST_CONTEXT(ctx, iface); + v->x = v->x * ctx->viewport.w + ctx->viewport.x; + v->y = v->y * ctx->viewport.h + ctx->viewport.y; + v->z = v->z * ctx->viewport.f + ctx->viewport.n; } void InitializeRasterFunctions(GGLInterface * iface) { - GGL_GET_CONTEXT(ctx, iface); - ctx->PickRaster = PickRaster; - iface->ViewportTransform = ViewportTransform; -}
\ No newline at end of file + GGL_GET_CONTEXT(ctx, iface); + ctx->PickRaster = PickRaster; + iface->ViewportTransform = ViewportTransform; +} diff --git a/src/pixelflinger2/scanline.cpp b/src/pixelflinger2/scanline.cpp index 52ff2d2..5ff12a6 100644 --- a/src/pixelflinger2/scanline.cpp +++ b/src/pixelflinger2/scanline.cpp @@ -1,60 +1,59 @@ -/** +/** ** ** Copyright 2010, The Android Open Source Project ** - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at ** - ** http://www.apache.org/licenses/LICENSE-2.0 + ** http://www.apache.org/licenses/LICENSE-2.0 ** - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and ** limitations under the License. */ -#include "src/pixelflinger2/pixelflinger2.h" -#include "src/pixelflinger2/texture.h" - -//#include "src/gallivm/gallivm_p.h" -//#include "src/shader/prog_parameter.h" - #include <assert.h> #include <stdio.h> +#include <string.h> + +#include "src/pixelflinger2/pixelflinger2.h" +#include "src/pixelflinger2/texture.h" +#include "src/mesa/main/mtypes.h" #if !USE_LLVM_SCANLINE static void Saturate(Vec4<BlendComp_t> * color) { - color->r = MIN2(MAX2(color->r, 0), 255); - color->g = MIN2(MAX2(color->g, 0), 255); - color->b = MIN2(MAX2(color->b, 0), 255); - color->a = MIN2(MAX2(color->a, 0), 255); + color->r = MIN2(MAX2(color->r, 0), 255); + color->g = MIN2(MAX2(color->g, 0), 255); + color->b = MIN2(MAX2(color->b, 0), 255); + color->a = MIN2(MAX2(color->a, 0), 255); } static inline void RGBAIntToRGBAIntx4(unsigned rgba, Vec4<BlendComp_t> * color) __attribute__((always_inline)); static inline void RGBAIntToRGBAIntx4(unsigned rgba, Vec4<BlendComp_t> * color) { - color->r = rgba & 0xff; - color->g = (rgba >>= 8) & 0xff; - color->b = (rgba >>= 8) & 0xff; - color->a = (rgba >>= 8); + color->r = rgba & 0xff; + color->g = (rgba >>= 8) & 0xff; + color->b = (rgba >>= 8) & 0xff; + color->a = (rgba >>= 8); } static inline void RGBAFloatx4ToRGBAIntx4(Vector4 * v, Vec4<BlendComp_t> * color) { - color->r = v->r * 255; - color->g = v->g * 255; - color->b = v->b * 255; - color->a = v->a * 255; + color->r = v->r * 255; + color->g = v->g * 255; + color->b = v->b * 255; + color->a = v->a * 255; } static inline unsigned RGBAIntx4ToRGBAInt(const Vec4<BlendComp_t> * color); static inline unsigned RGBAIntx4ToRGBAInt(const Vec4<BlendComp_t> * color) { - return color->r | (color->g << 8) | (color->b << 16) | (color->a << 24); + return color->r | (color->g << 8) | (color->b << 16) | (color->a << 24); } @@ -81,455 +80,463 @@ static inline unsigned RGBAIntx4ToRGBAInt(const Vec4<BlendComp_t> * color) //} template<typename T> -static inline void BlendFactor(const unsigned mode, T & factor, const T & src, +static inline void BlendFactor(const unsigned mode, T & factor, const T & src, const T & dst, const T & constant, const T & one, - const T & zero, const BlendComp_t & srcA, const BlendComp_t & dstA, + const T & zero, const BlendComp_t & srcA, const BlendComp_t & dstA, const BlendComp_t & constantA, const BlendComp_t & sOne) __attribute__((always_inline)); template<typename T> -static inline void BlendFactor(const unsigned mode, T & factor, const T & src, +static inline void BlendFactor(const unsigned mode, T & factor, const T & src, const T & dst, const T & constant, const T & one, - const T & zero, const BlendComp_t & srcA, const BlendComp_t & dstA, + const T & zero, const BlendComp_t & srcA, const BlendComp_t & dstA, const BlendComp_t & constantA, const BlendComp_t & sOne) { - switch (mode) - { - case 0: // GL_ZERO - factor = zero; - return; - case 1: // GL_ONE - factor = one; - return; - case 2: // GL_SRC_COLOR: - factor = src; - return; - case 3: // GL_ONE_MINUS_SRC_COLOR: - factor = one; - factor -= src; - return; - case 4: // GL_DST_COLOR: - factor = dst; - return; - case 5: // GL_ONE_MINUS_DST_COLOR: - factor = one; - factor -= dst; - return; - case 6: // GL_SRC_ALPHA: - factor = srcA; - return; - case 7: // GL_ONE_MINUS_SRC_ALPHA: - factor = sOne - srcA; - return; - case 8: // GL_DST_ALPHA: - factor = dstA; - return; - case 9: // GL_ONE_MINUS_DST_ALPHA: - factor = sOne - dstA; - return; - case 10: // GL_SRC_ALPHA_SATURATE: // valid only for source color; src alpha = 1 - factor = MIN2(srcA, sOne - dstA); - return; - case 11: // GL_CONSTANT_COLOR: - factor = constant; - return; - case 12: // GL_ONE_MINUS_CONSTANT_COLOR: - factor = one; - factor -= constant; - return; - case 13: // GL_CONSTANT_ALPHA: - factor = constantA; - return; - case 14: // GL_ONE_MINUS_CONSTANT_ALPHA: - factor = sOne - constantA; - return; - default: - assert(0); - return; - } + switch (mode) { + case 0: // GL_ZERO + factor = zero; + return; + case 1: // GL_ONE + factor = one; + return; + case 2: // GL_SRC_COLOR: + factor = src; + return; + case 3: // GL_ONE_MINUS_SRC_COLOR: + factor = one; + factor -= src; + return; + case 4: // GL_DST_COLOR: + factor = dst; + return; + case 5: // GL_ONE_MINUS_DST_COLOR: + factor = one; + factor -= dst; + return; + case 6: // GL_SRC_ALPHA: + factor = srcA; + return; + case 7: // GL_ONE_MINUS_SRC_ALPHA: + factor = sOne - srcA; + return; + case 8: // GL_DST_ALPHA: + factor = dstA; + return; + case 9: // GL_ONE_MINUS_DST_ALPHA: + factor = sOne - dstA; + return; + case 10: // GL_SRC_ALPHA_SATURATE: // valid only for source color; src alpha = 1 + factor = MIN2(srcA, sOne - dstA); + return; + case 11: // GL_CONSTANT_COLOR: + factor = constant; + return; + case 12: // GL_ONE_MINUS_CONSTANT_COLOR: + factor = one; + factor -= constant; + return; + case 13: // GL_CONSTANT_ALPHA: + factor = constantA; + return; + case 14: // GL_ONE_MINUS_CONSTANT_ALPHA: + factor = sOne - constantA; + return; + default: + assert(0); + return; + } } #endif // #if !USE_LLVM_SCANLINE unsigned char StencilOp(const unsigned op, unsigned char s, const unsigned char ref) { - switch (op) - { - case 0: // GL_ZERO - return 0; - case 1: // GL_KEEP - return s; - case 2: // GL_REPLACE - return ref; - case 3: // GL_INCR - if (s < 255) - return ++s; - return s; - case 4: // GL_DECR - if (s > 0) - return --s; - return 0; - case 5: // GL_INVERT - return ~s; - case 6: // GL_INCR_WRAP - return ++s; - case 7: // GL_DECR_WRAP - return --s; - default: assert(0); return s; - } + switch (op) { + case 0: // GL_ZERO + return 0; + case 1: // GL_KEEP + return s; + case 2: // GL_REPLACE + return ref; + case 3: // GL_INCR + if (s < 255) + return ++s; + return s; + case 4: // GL_DECR + if (s > 0) + return --s; + return 0; + case 5: // GL_INVERT + return ~s; + case 6: // GL_INCR_WRAP + return ++s; + case 7: // GL_DECR_WRAP + return --s; + default: + assert(0); + return s; + } } template <bool StencilTest, bool DepthTest, bool DepthWrite, bool BlendEnable> void ScanLine(const GGLInterface * iface, const VertexOutput * v1, const VertexOutput * v2) { - GGL_GET_CONST_CONTEXT(ctx, iface); - // assert((unsigned)v1->position.y == (unsigned)v2->position.y); - // - // assert(GGL_PIXEL_FORMAT_RGBA_8888 == ctx->frameSurface.format); - // assert(GGL_PIXEL_FORMAT_Z_32 == ctx->depthSurface.format); - // assert(ctx->frameSurface.width == ctx->depthSurface.width); - // assert(ctx->frameSurface.height == ctx->depthSurface.height); - - const unsigned int varyingCount = 0;//ctx->glCtx->Shader.CurrentProgram->Varying->NumParameters; - const unsigned y = v1->position.y, startX = v1->position.x, - endX = v2->position.x; - - //assert(ctx->frameSurface.width > startX && ctx->frameSurface.width > endX); - //assert(ctx->frameSurface.height > y); - - unsigned * frame = (unsigned *)ctx->frameSurface.data - + y * ctx->frameSurface.width + startX; - const VectorComp_t div = VectorComp_t_CTR(1 / (float)(endX - startX)); - - VertexOutput vertex(*v1); - VertexOutput vertexDx(*v2); - - vertexDx.position -= v1->position; - vertexDx.position *= div; - //printf("vertexDx.position.z=%.8g \n", vertexDx.position.z); - for (unsigned i = 0; i < varyingCount; i++) - { - vertexDx.varyings[i] -= v1->varyings[i]; - vertexDx.varyings[i] *= div; - } - vertexDx.frontFacingPointCoord -= v1->frontFacingPointCoord; - vertexDx.frontFacingPointCoord *= div; // gl_PointCoord, only zw - vertexDx.frontFacingPointCoord.y = 0; // gl_FrontFacing not interpolated - + GGL_GET_CONST_CONTEXT(ctx, iface); + // assert((unsigned)v1->position.y == (unsigned)v2->position.y); + // + // assert(GGL_PIXEL_FORMAT_RGBA_8888 == ctx->frameSurface.format); + // assert(GGL_PIXEL_FORMAT_Z_32 == ctx->depthSurface.format); + // assert(ctx->frameSurface.width == ctx->depthSurface.width); + // assert(ctx->frameSurface.height == ctx->depthSurface.height); + + const unsigned int varyingCount = 8;//ctx->glCtx->Shader.CurrentProgram->Varying->NumParameters; + const unsigned y = v1->position.y, startX = v1->position.x, + endX = v2->position.x; + + //assert(ctx->frameSurface.width > startX && ctx->frameSurface.width > endX); + //assert(ctx->frameSurface.height > y); + + unsigned * frame = (unsigned *)ctx->frameSurface.data + + y * ctx->frameSurface.width + startX; + const VectorComp_t div = VectorComp_t_CTR(1 / (float)(endX - startX)); + + memcpy(ctx->glCtx->CurrentProgram->ValuesVertexOutput, v1, sizeof(*v1)); + VertexOutput & vertex(*(VertexOutput*)ctx->glCtx->CurrentProgram->ValuesVertexOutput); + VertexOutput vertexDx(*v2); + + vertexDx.position -= v1->position; + vertexDx.position *= div; + //printf("vertexDx.position.z=%.8g \n", vertexDx.position.z); + for (unsigned i = 0; i < varyingCount; i++) { + vertexDx.varyings[i] -= v1->varyings[i]; + vertexDx.varyings[i] *= div; + } + vertexDx.frontFacingPointCoord -= v1->frontFacingPointCoord; + vertexDx.frontFacingPointCoord *= div; // gl_PointCoord, only zw + vertexDx.frontFacingPointCoord.y = 0; // gl_FrontFacing not interpolated + #if USE_FORCED_FIXEDPOINT - for (unsigned j = 0; j < 4; j++) - { - for (unsigned i = 0; i < varyingCount; i++) - { - vertex.varyings[i].i[j] = vertex.varyings[i].f[j] * 65536; - vertexDx.varyings[i].i[j] = vertexDx.varyings[i].f[j] * 65536; - } - vertex.position.i[j] = vertex.position.f[j] * 65536; - vertexDx.position.i[j] = vertexDx.position.f[j] * 65536; - vertex.frontFacingPointCoord.i[j] = vertex.frontFacingPointCoord.f[j] * 65536; - } + for (unsigned j = 0; j < 4; j++) { + for (unsigned i = 0; i < varyingCount; i++) { + vertex.varyings[i].i[j] = vertex.varyings[i].f[j] * 65536; + vertexDx.varyings[i].i[j] = vertexDx.varyings[i].f[j] * 65536; + } + vertex.position.i[j] = vertex.position.f[j] * 65536; + vertexDx.position.i[j] = vertexDx.position.f[j] * 65536; + vertex.frontFacingPointCoord.i[j] = vertex.frontFacingPointCoord.f[j] * 65536; + } #endif - - int * depth = (int *)ctx->depthSurface.data + y * ctx->frameSurface.width + startX; - unsigned char * stencil = (unsigned char *)ctx->stencilSurface.data + y * ctx->frameSurface.width + startX; - + + int * depth = (int *)ctx->depthSurface.data + y * ctx->frameSurface.width + startX; + unsigned char * stencil = (unsigned char *)ctx->stencilSurface.data + y * ctx->frameSurface.width + startX; + #if !USE_LLVM_TEXTURE_SAMPLER - extern const GGLContext * textureGGLContext; - textureGGLContext = ctx; + extern const GGLContext * textureGGLContext; + textureGGLContext = ctx; #endif - - // TODO DXL consider inverting gl_FragCoord.y - + + // TODO DXL consider inverting gl_FragCoord.y + #if USE_LLVM_SCANLINE - typedef void (* ScanLineFunction_t)(VertexOutput * start, VertexOutput * step, - Vector4 * constants, unsigned * frame, - int * depth, unsigned char * stencil, - GGLContext::ActiveStencilState *, - unsigned count); - + typedef void (* ScanLineFunction_t)(VertexOutput * start, VertexOutput * step, + Vector4 * constants, unsigned * frame, + int * depth, unsigned char * stencil, + GGLContext::ActiveStencilState *, + unsigned count); + // ScanLineFunction_t scanLineFunction = (ScanLineFunction_t) // ctx->glCtx->Shader.CurrentProgram->GLVMFP->function; - if (endX >= startX) - { + if (endX >= startX) { // scanLineFunction(&vertex, &vertexDx, (Vector4 *) // ctx->glCtx->Shader.CurrentProgram->FragmentProgram->Parameters->ParameterValues, // frame, depth, stencil, &ctx->activeStencil, endX - startX + 1); - } + } #else - - int z; - bool sCmp = true; // default passed, unless failed by stencil test - unsigned char s; // masked stored stencil value - const unsigned char sMask = ctx->activeStencil.mask; - const unsigned char sRef = ctx->activeStencil.ref; - const unsigned sFunc = ctx->activeStencil.face ? 0x200 | ctx->backStencil.func : - 0x200 | ctx->frontStencil.func; - const unsigned ssFail = ctx->activeStencil.face ? ctx->backStencil.sFail : - ctx->frontStencil.sFail; - const unsigned sdFail = ctx->activeStencil.face ? ctx->backStencil.dFail : - ctx->frontStencil.dFail; - const unsigned sdPass = ctx->activeStencil.face ? ctx->backStencil.dPass : - ctx->frontStencil.dPass; - - for (unsigned x = startX; x <= endX; x++) - { - //assert(abs((int)(vertex.position.x) - (int)x) < 2); - //assert((unsigned)vertex.position.y == y); - if (StencilTest) - { - s = *stencil & sMask; - switch (sFunc) - { - case GL_NEVER: sCmp = false; break; - case GL_LESS: sCmp = sRef < s; break; - case GL_EQUAL: sCmp = sRef == s; break; - case GL_LEQUAL: sCmp = sRef <= s; break; - case GL_GREATER: sCmp = sRef > s; break; - case GL_NOTEQUAL: sCmp = sRef != s; break; - case GL_GEQUAL: sCmp = sRef >= s; break; - case GL_ALWAYS: sCmp = true; break; - default: assert(0); break; - } - } - - if (!StencilTest || sCmp) - { - z = vertex.position.i[2]; - if (z & 0x80000000) // negative float has leading 1 - z ^= 0x7fffffff; // bigger negative is smaller - bool zCmp = false; - switch (0x200 | ctx->bufferState.depthFunc) - { - case GL_NEVER: zCmp = false; break; - case GL_LESS: zCmp = z < *depth; break; - case GL_EQUAL: zCmp = z == *depth; break; - case GL_LEQUAL: zCmp = z <= *depth; break; - case GL_GREATER: zCmp = z > *depth; break; - case GL_NOTEQUAL: zCmp = z != *depth; break; - case GL_GEQUAL: zCmp = z >= *depth; break; - case GL_ALWAYS: zCmp = true; break; - default: assert(0); break; - } - if (!DepthTest || zCmp) - { - ShaderFunction_t function = ctx->glCtx->Shader.CurrentProgram->GLVMFP->function; - const Vector4 * inputs = &vertex.position; - const Vector4 * constants = (Vector4 *)ctx->glCtx->Shader.CurrentProgram->FragmentProgram->Parameters->ParameterValues; - Vector4 * outputs = vertex.fragColor; - function(inputs, outputs, constants); - - const RegDesc & outputRegDesc = ctx->glCtx->Shader.CurrentProgram-> - GLVMFP->outputRegDesc; - if (BlendEnable) - { - BlendComp_t sOne = 255, sZero = 0; - Vec4<BlendComp_t> one = sOne, zero = sZero; - - Vec4<BlendComp_t> src; - if (outputRegDesc.IsInt32Color()) - RGBAIntToRGBAIntx4(vertex.fragColor[0].u[0], &src); - else if (outputRegDesc.IsVectorType(Float)) - RGBAFloatx4ToRGBAIntx4(&vertex.fragColor[0], &src); - else if (outputRegDesc.IsVectorType(Fixed8)) - { - src.u[0] = vertex.fragColor[0].u[0]; - src.u[1] = vertex.fragColor[0].u[1]; - src.u[2] = vertex.fragColor[0].u[2]; - src.u[3] = vertex.fragColor[0].u[3]; - } - else - assert(0); - - Vec4<BlendComp_t> dst; - unsigned dc = *frame; - dst.r = dc & 255; - dst.g = (dc >>= 8) & 255; - dst.b = (dc >>= 8) & 255; - dst.a = (dc >>= 8) & 255; - - Vec4<BlendComp_t> sf, df; - - BlendFactor(ctx->blendState.scf, sf, src, dst, - ctx->blendState.color, one, zero, src.a, dst.a, - ctx->blendState.color.a, sOne); - if (ctx->blendState.scf != ctx->blendState.saf) - BlendFactor(ctx->blendState.saf, sf.a, src.a, dst.a, - ctx->blendState.color.a, sOne, sZero, src.a, dst.a, - ctx->blendState.color.a, sOne); - BlendFactor(ctx->blendState.dcf, df, src, dst, - ctx->blendState.color, one, zero, src.a, dst.a, - ctx->blendState.color.a, sOne); - if (ctx->blendState.dcf != ctx->blendState.daf) - BlendFactor(ctx->blendState.daf, df.a, src.a, dst.a, - ctx->blendState.color.a, sOne, sZero, src.a, dst.a, - ctx->blendState.color.a, sOne); - - Vec4<BlendComp_t> sfs(sf), dfs(df); - sfs.LShr(7); sf += sfs; - dfs.LShr(7); df += dfs; - - src *= sf; - dst *= df; - Vec4<BlendComp_t> res(src); - switch (ctx->blendState.ce + GL_FUNC_ADD) - { - case GL_FUNC_ADD: - res += dst; - break; - case GL_FUNC_SUBTRACT: - res -= dst; - break; - case GL_FUNC_REVERSE_SUBTRACT: - res = dst; - res -= src; - break; - default: assert(0); break; - } - if (ctx->blendState.ce != ctx->blendState.ae) - switch (ctx->blendState.ce + GL_FUNC_ADD) - { - case GL_FUNC_ADD: - res.a = src.a + dst.a; - break; - case GL_FUNC_SUBTRACT: - res.a = src.a - dst.a; - break; - case GL_FUNC_REVERSE_SUBTRACT: - res.a = dst.a - src.a; - break; - default: assert(0); break; - } - - res.AShr(8); - Saturate(&res); - *frame = RGBAIntx4ToRGBAInt(&res); - } - else - { - if (outputRegDesc.IsInt32Color()) - *frame = vertex.fragColor[0].u[0]; - else if (outputRegDesc.IsVectorType(Float)) - { - Vec4<BlendComp_t> src; - RGBAFloatx4ToRGBAIntx4(&vertex.fragColor[0], &src); - Saturate(&src); - *frame = RGBAIntx4ToRGBAInt(&src); - } - else if (outputRegDesc.IsVectorType(Fixed16)) - { - Vec4<BlendComp_t> & src = (Vec4<BlendComp_t> &)vertex.fragColor[0]; - src.r = (src.r * 255 >> 16); - src.g = (src.g * 255 >> 16); - src.b = (src.b * 255 >> 16); - src.a = (src.a * 255 >> 16); - Saturate(&src); - *frame = RGBAIntx4ToRGBAInt(&src); - } - else if (outputRegDesc.IsVectorType(Fixed8)) - { - Vec4<BlendComp_t> & src = (Vec4<BlendComp_t> &)vertex.fragColor[0]; - Saturate(&src); - *frame = RGBAIntx4ToRGBAInt(&src); - } - else - assert(0); - } - - if (DepthWrite) - *depth = z; - if (StencilTest) - *stencil = StencilOp(sdPass, s, sRef); + + int z; + bool sCmp = true; // default passed, unless failed by stencil test + unsigned char s; // masked stored stencil value + const unsigned char sMask = ctx->activeStencil.mask; + const unsigned char sRef = ctx->activeStencil.ref; + const unsigned sFunc = ctx->activeStencil.face ? 0x200 | ctx->backStencil.func : + 0x200 | ctx->frontStencil.func; + const unsigned ssFail = ctx->activeStencil.face ? ctx->backStencil.sFail : + ctx->frontStencil.sFail; + const unsigned sdFail = ctx->activeStencil.face ? ctx->backStencil.dFail : + ctx->frontStencil.dFail; + const unsigned sdPass = ctx->activeStencil.face ? ctx->backStencil.dPass : + ctx->frontStencil.dPass; + + for (unsigned x = startX; x <= endX; x++) { + //assert(abs((int)(vertex.position.x) - (int)x) < 2); + //assert((unsigned)vertex.position.y == y); + if (StencilTest) { + s = *stencil & sMask; + switch (sFunc) { + case GL_NEVER: + sCmp = false; + break; + case GL_LESS: + sCmp = sRef < s; + break; + case GL_EQUAL: + sCmp = sRef == s; + break; + case GL_LEQUAL: + sCmp = sRef <= s; + break; + case GL_GREATER: + sCmp = sRef > s; + break; + case GL_NOTEQUAL: + sCmp = sRef != s; + break; + case GL_GEQUAL: + sCmp = sRef >= s; + break; + case GL_ALWAYS: + sCmp = true; + break; + default: + assert(0); + break; + } + } + + if (!StencilTest || sCmp) { + z = vertex.position.i[2]; + if (z & 0x80000000) // negative float has leading 1 + z ^= 0x7fffffff; // bigger negative is smaller + bool zCmp = false; + switch (0x200 | ctx->bufferState.depthFunc) { + case GL_NEVER: + zCmp = false; + break; + case GL_LESS: + zCmp = z < *depth; + break; + case GL_EQUAL: + zCmp = z == *depth; + break; + case GL_LEQUAL: + zCmp = z <= *depth; + break; + case GL_GREATER: + zCmp = z > *depth; + break; + case GL_NOTEQUAL: + zCmp = z != *depth; + break; + case GL_GEQUAL: + zCmp = z >= *depth; + break; + case GL_ALWAYS: + zCmp = true; + break; + default: + assert(0); + break; + } + if (!DepthTest || zCmp) { + ctx->glCtx->CurrentProgram->_LinkedShaders[MESA_SHADER_FRAGMENT]->function(); + if (BlendEnable) { + BlendComp_t sOne = 255, sZero = 0; + Vec4<BlendComp_t> one = sOne, zero = sZero; + + Vec4<BlendComp_t> src; +// if (outputRegDesc.IsInt32Color()) +// RGBAIntToRGBAIntx4(vertex.fragColor[0].u[0], &src); +// else if (outputRegDesc.IsVectorType(Float)) + RGBAFloatx4ToRGBAIntx4(&vertex.fragColor[0], &src); +// else if (outputRegDesc.IsVectorType(Fixed8)) +// { +// src.u[0] = vertex.fragColor[0].u[0]; +// src.u[1] = vertex.fragColor[0].u[1]; +// src.u[2] = vertex.fragColor[0].u[2]; +// src.u[3] = vertex.fragColor[0].u[3]; +// } +// else +// assert(0); + + Vec4<BlendComp_t> dst; + unsigned dc = *frame; + dst.r = dc & 255; + dst.g = (dc >>= 8) & 255; + dst.b = (dc >>= 8) & 255; + dst.a = (dc >>= 8) & 255; + + Vec4<BlendComp_t> sf, df; + + BlendFactor(ctx->blendState.scf, sf, src, dst, + ctx->blendState.color, one, zero, src.a, dst.a, + ctx->blendState.color.a, sOne); + if (ctx->blendState.scf != ctx->blendState.saf) + BlendFactor(ctx->blendState.saf, sf.a, src.a, dst.a, + ctx->blendState.color.a, sOne, sZero, src.a, dst.a, + ctx->blendState.color.a, sOne); + BlendFactor(ctx->blendState.dcf, df, src, dst, + ctx->blendState.color, one, zero, src.a, dst.a, + ctx->blendState.color.a, sOne); + if (ctx->blendState.dcf != ctx->blendState.daf) + BlendFactor(ctx->blendState.daf, df.a, src.a, dst.a, + ctx->blendState.color.a, sOne, sZero, src.a, dst.a, + ctx->blendState.color.a, sOne); + + Vec4<BlendComp_t> sfs(sf), dfs(df); + sfs.LShr(7); + sf += sfs; + dfs.LShr(7); + df += dfs; + + src *= sf; + dst *= df; + Vec4<BlendComp_t> res(src); + switch (ctx->blendState.ce + GL_FUNC_ADD) { + case GL_FUNC_ADD: + res += dst; + break; + case GL_FUNC_SUBTRACT: + res -= dst; + break; + case GL_FUNC_REVERSE_SUBTRACT: + res = dst; + res -= src; + break; + default: + assert(0); + break; + } + if (ctx->blendState.ce != ctx->blendState.ae) + switch (ctx->blendState.ce + GL_FUNC_ADD) { + case GL_FUNC_ADD: + res.a = src.a + dst.a; + break; + case GL_FUNC_SUBTRACT: + res.a = src.a - dst.a; + break; + case GL_FUNC_REVERSE_SUBTRACT: + res.a = dst.a - src.a; + break; + default: + assert(0); + break; + } + + res.AShr(8); + Saturate(&res); + *frame = RGBAIntx4ToRGBAInt(&res); + } else { +// if (outputRegDesc.IsInt32Color()) +// *frame = vertex.fragColor[0].u[0]; +// else if (outputRegDesc.IsVectorType(Float)) + { + Vec4<BlendComp_t> src; + RGBAFloatx4ToRGBAIntx4(&vertex.fragColor[0], &src); + Saturate(&src); + *frame = RGBAIntx4ToRGBAInt(&src); + } +// else if (outputRegDesc.IsVectorType(Fixed16)) +// { +// Vec4<BlendComp_t> & src = (Vec4<BlendComp_t> &)vertex.fragColor[0]; +// src.r = (src.r * 255 >> 16); +// src.g = (src.g * 255 >> 16); +// src.b = (src.b * 255 >> 16); +// src.a = (src.a * 255 >> 16); +// Saturate(&src); +// *frame = RGBAIntx4ToRGBAInt(&src); +// } +// else if (outputRegDesc.IsVectorType(Fixed8)) +// { +// Vec4<BlendComp_t> & src = (Vec4<BlendComp_t> &)vertex.fragColor[0]; +// Saturate(&src); +// *frame = RGBAIntx4ToRGBAInt(&src); +// } +// else +// assert(0); } - else if (StencilTest) - *stencil = StencilOp(sdFail, s, sRef); - } - else if (StencilTest) - *stencil = StencilOp(ssFail, s, sRef); - - frame++; - depth++; - stencil++; - + + if (DepthWrite) + *depth = z; + if (StencilTest) + *stencil = StencilOp(sdPass, s, sRef); + } else if (StencilTest) + *stencil = StencilOp(sdFail, s, sRef); + } else if (StencilTest) + *stencil = StencilOp(ssFail, s, sRef); + + frame++; + depth++; + stencil++; + #if USE_FORCED_FIXEDPOINT - for (unsigned j = 0; j < 4; j++) - { - if (ctx->glCtx->Shader.CurrentProgram->FragmentProgram->UsesFragCoord) - vertex.position.i[j] += vertexDx.position.i[j]; - for (unsigned i = 0; i < varyingCount; i++) - vertex.varyings[i].i[j] += vertexDx.varyings[i].i[j]; - } - vertex.position.i[2] += vertexDx.position.i[2]; - if (ctx->glCtx->Shader.CurrentProgram->FragmentProgram->UsesPointCoord) - { - vertex.frontFacingPointCoord.i[2] = vertexDx.frontFacingPointCoord.i[2]; - vertex.frontFacingPointCoord.i[3] = vertexDx.frontFacingPointCoord.i[3]; - } + for (unsigned j = 0; j < 4; j++) { + if (ctx->glCtx->Shader.CurrentProgram->FragmentProgram->UsesFragCoord) + vertex.position.i[j] += vertexDx.position.i[j]; + for (unsigned i = 0; i < varyingCount; i++) + vertex.varyings[i].i[j] += vertexDx.varyings[i].i[j]; + } + vertex.position.i[2] += vertexDx.position.i[2]; + if (ctx->glCtx->Shader.CurrentProgram->FragmentProgram->UsesPointCoord) { + vertex.frontFacingPointCoord.i[2] = vertexDx.frontFacingPointCoord.i[2]; + vertex.frontFacingPointCoord.i[3] = vertexDx.frontFacingPointCoord.i[3]; + } #else - if (ctx->glCtx->Shader.CurrentProgram->FragmentProgram->UsesFragCoord) - vertex.position += vertexDx.position; - else if (ctx->bufferState.depthTest) - vertex.position.z += vertexDx.position.z; - - for (unsigned i = 0; i < varyingCount; i++) - vertex.varyings[i] += vertexDx.varyings[i]; - if (ctx->glCtx->Shader.CurrentProgram->FragmentProgram->UsesPointCoord) - { - vertex.frontFacingPointCoord.z += vertexDx.frontFacingPointCoord.z; - vertex.frontFacingPointCoord.w += vertexDx.frontFacingPointCoord.w; - } +// if (ctx->glCtx->Shader.CurrentProgram->FragmentProgram->UsesFragCoord) + vertex.position += vertexDx.position; +// else if (ctx->bufferState.depthTest) + vertex.position.z += vertexDx.position.z; + + for (unsigned i = 0; i < varyingCount; i++) + vertex.varyings[i] += vertexDx.varyings[i]; +// if (ctx->glCtx->Shader.CurrentProgram->FragmentProgram->UsesPointCoord) + { + vertex.frontFacingPointCoord.z += vertexDx.frontFacingPointCoord.z; + vertex.frontFacingPointCoord.w += vertexDx.frontFacingPointCoord.w; + } #endif // #if USE_FORCED_FIXEDPOINT - } + } #endif // #if USE_LLVM_SCANLINE #if !USE_LLVM_TEXTURE_SAMPLER - textureGGLContext = NULL; + textureGGLContext = NULL; #endif } static void PickScanLine(GGLInterface * iface) { - GGL_GET_CONTEXT(ctx, iface); - - ctx->interface.ScanLine = NULL; - const bool DepthWrite = true; - if (ctx->bufferState.stencilTest) - { - if (ctx->bufferState.depthTest) - { - if (ctx->blendState.enable) - ctx->interface.ScanLine = ScanLine<true, true, DepthWrite, true>; - else - ctx->interface.ScanLine = ScanLine<true, true, DepthWrite, false>; - } - else - { - if (ctx->blendState.enable) - ctx->interface.ScanLine = ScanLine<true, false, DepthWrite, true>; - else - ctx->interface.ScanLine = ScanLine<true, false, DepthWrite, false>; - } - } - else - { - if (ctx->bufferState.depthTest) - { - if (ctx->blendState.enable) - ctx->interface.ScanLine = ScanLine<false, true, DepthWrite, true>; - else - ctx->interface.ScanLine = ScanLine<false, true, DepthWrite, false>; - } - else - { - if (ctx->blendState.enable) - ctx->interface.ScanLine = ScanLine<false, false, DepthWrite, true>; - else - ctx->interface.ScanLine = ScanLine<false, false, DepthWrite, false>; - } - } - - assert(ctx->interface.ScanLine); + GGL_GET_CONTEXT(ctx, iface); + + ctx->interface.ScanLine = NULL; + const bool DepthWrite = true; + if (ctx->bufferState.stencilTest) { + if (ctx->bufferState.depthTest) { + if (ctx->blendState.enable) + ctx->interface.ScanLine = ScanLine<true, true, DepthWrite, true>; + else + ctx->interface.ScanLine = ScanLine<true, true, DepthWrite, false>; + } else { + if (ctx->blendState.enable) + ctx->interface.ScanLine = ScanLine<true, false, DepthWrite, true>; + else + ctx->interface.ScanLine = ScanLine<true, false, DepthWrite, false>; + } + } else { + if (ctx->bufferState.depthTest) { + if (ctx->blendState.enable) + ctx->interface.ScanLine = ScanLine<false, true, DepthWrite, true>; + else + ctx->interface.ScanLine = ScanLine<false, true, DepthWrite, false>; + } else { + if (ctx->blendState.enable) + ctx->interface.ScanLine = ScanLine<false, false, DepthWrite, true>; + else + ctx->interface.ScanLine = ScanLine<false, false, DepthWrite, false>; + } + } + + assert(ctx->interface.ScanLine); } void InitializeScanLineFunctions(GGLInterface * iface) { - GGL_GET_CONTEXT(ctx, iface); - ctx->PickScanLine = PickScanLine; -}
\ No newline at end of file + GGL_GET_CONTEXT(ctx, iface); + ctx->PickScanLine = PickScanLine; +} diff --git a/src/pixelflinger2/shader.cpp b/src/pixelflinger2/shader.cpp index ef171f2..782759c 100644 --- a/src/pixelflinger2/shader.cpp +++ b/src/pixelflinger2/shader.cpp @@ -18,18 +18,56 @@ #include <assert.h> #include <stdio.h> #include <string.h> +#include <map> #include <llvm/LLVMContext.h> +#include <llvm/Module.h> +#include <bcc/bcc.h> #include "src/talloc/hieralloc.h" #include "src/mesa/main/mtypes.h" #include "src/mesa/program/prog_parameter.h" #include "src/mesa/program/prog_uniform.h" #include "src/glsl/glsl_types.h" -#include "src/glsl/ir.h" +#include "src/glsl/ir_to_llvm.h" + +struct ShaderKey { + struct ScanLineKey { + GGLContext::StencilState frontStencil, backStencil; + GGLContext::BufferState bufferState; + GGLContext::BlendState blendState; + } scanLineKey; + GGLPixelFormat textureFormats[GGL_MAXCOMBINEDTEXTUREIMAGEUNITS]; + unsigned char textureParameters[GGL_MAXCOMBINEDTEXTUREIMAGEUNITS]; // wrap and filter + bool operator <(const ShaderKey & rhs) const { + return memcmp(this, &rhs, sizeof(*this)) < 0; + } +}; + +struct Instance { + llvm::Module * module; + struct BCCOpaqueScript * script; + void (* function)(); + ~Instance() { + // TODO: check bccDisposeScript, which seems to dispose llvm::Module + if (script) + bccDisposeScript(script); + else if (module) + delete module; + getchar(); + } +}; + +struct Executable { // codegen info + std::map<ShaderKey, Instance *> instances; +}; + +bool do_mat_op_to_vec(exec_list *instructions); extern void link_shaders(struct gl_context *ctx, struct gl_shader_program *prog); +extern "C" void compile_shader(struct gl_context *ctx, struct gl_shader *shader); + extern "C" void _mesa_reference_shader(struct gl_context *ctx, struct gl_shader **ptr, struct gl_shader *sh) { @@ -53,10 +91,18 @@ extern "C" void _mesa_delete_shader(struct gl_context *ctx, struct gl_shader *sh { if (!shader) return; - if (1 == shader->RefCount) - hieralloc_free(shader); - else + if (shader->RefCount > 1) { shader->DeletePending = true; + return; + } + if (shader->executable) + { + for (std::map<ShaderKey, Instance *>::iterator it=shader->executable->instances.begin(); + it != shader->executable->instances.end(); it++) + (*it).second->~Instance(); + shader->executable->instances.~map(); + } + hieralloc_free(shader); } static gl_shader * ShaderCreate(const GGLInterface * iface, GLenum type) @@ -73,8 +119,6 @@ static gl_shader * ShaderCreate(const GGLInterface * iface, GLenum type) return shader; } -extern "C" void compile_shader(struct gl_context *ctx, struct gl_shader *shader); - static GLboolean ShaderCompile(const GGLInterface * iface, gl_shader * shader, const char * glsl, char ** infoLog) { @@ -164,34 +208,29 @@ static GLboolean ShaderProgramLink(const GGLInterface * iface, gl_shader_program return program->LinkStatus; } -struct gl_program; -struct ShaderKey; - -static void GetShaderKey(const GGLContext * ctx, const gl_program * shader, ShaderKey * key) +static void GetShaderKey(const GGLContext * ctx, const gl_shader * shader, ShaderKey * key) { -// memset(key, 0, sizeof(*key)); -// if (GL_FRAGMENT_SHADER == shader->Target) -// { -// key->scanLineKey.frontStencil = ctx->frontStencil; -// key->scanLineKey.backStencil = ctx->backStencil; -// key->scanLineKey.bufferState = ctx->bufferState; -// key->scanLineKey.blendState = ctx->blendState; -// } -// -// for (unsigned i = 0; i < GGL_MAXCOMBINEDTEXTUREIMAGEUNITS; i++) -// if (shader->SamplersUsed & (1 << i)) -// { -// const GGLTexture & texture = ctx->textureState.textures[i]; -// key->textureFormats[i] = texture.format; -// assert((1 << 2) > texture.wrapS); -// key->textureParameters[i] |= texture.wrapS; -// assert((1 << 2) > texture.wrapT); -// key->textureParameters[i] |= texture.wrapT << 2; -// assert((1 << 3) > texture.minFilter); -// key->textureParameters[i] |= texture.minFilter << (2 + 2); -// assert((1 << 1) > texture.magFilter); -// key->textureParameters[i] |= texture.magFilter << (2 + 2 + 3); -// } + memset(key, 0, sizeof(*key)); + if (GL_FRAGMENT_SHADER == shader->Type) { + key->scanLineKey.frontStencil = ctx->frontStencil; + key->scanLineKey.backStencil = ctx->backStencil; + key->scanLineKey.bufferState = ctx->bufferState; + key->scanLineKey.blendState = ctx->blendState; + } + + for (unsigned i = 0; i < GGL_MAXCOMBINEDTEXTUREIMAGEUNITS; i++) + if (shader->SamplersUsed & (1 << i)) { + const GGLTexture & texture = ctx->textureState.textures[i]; + key->textureFormats[i] = texture.format; + assert((1 << 2) > texture.wrapS); + key->textureParameters[i] |= texture.wrapS; + assert((1 << 2) > texture.wrapT); + key->textureParameters[i] |= texture.wrapT << 2; + assert((1 << 3) > texture.minFilter); + key->textureParameters[i] |= texture.minFilter << (2 + 2); + assert((1 << 1) > texture.magFilter); + key->textureParameters[i] |= texture.magFilter << (2 + 2 + 3); + } } static inline char HexDigit(unsigned char d) @@ -204,47 +243,127 @@ static const unsigned SHADER_KEY_STRING_LEN = GGL_MAXCOMBINEDTEXTUREIMAGEUNITS * static void GetShaderKeyString(const GLenum type, const ShaderKey * key, char * buffer, const unsigned bufferSize) { -// assert(1 == sizeof(char)); -// assert(0xff >= GGL_PIXEL_FORMAT_COUNT); -// assert(SHADER_KEY_STRING_LEN <= bufferSize); -// char * str = buffer; -// if (GL_VERTEX_SHADER == type) -// *str++ = 'v'; -// else if (GL_FRAGMENT_SHADER == type) -// *str++ = 'f'; -// else -// assert(0); -// for (unsigned i = 0; i < GGL_MAXCOMBINEDTEXTUREIMAGEUNITS; i++) -// { -// *str++ = HexDigit(key->textureFormats[i] / 16); -// *str++ = HexDigit(key->textureFormats[i] % 16); -// *str++ = HexDigit(key->textureParameters[i] / 16); -// *str++ = HexDigit(key->textureParameters[i] % 16); -// } -// *str++ = '\0'; + assert(1 == sizeof(char)); + assert(0xff >= GGL_PIXEL_FORMAT_COUNT); + assert(SHADER_KEY_STRING_LEN <= bufferSize); + char * str = buffer; + if (GL_VERTEX_SHADER == type) + *str++ = 'v'; + else if (GL_FRAGMENT_SHADER == type) + *str++ = 'f'; + else + assert(0); + for (unsigned i = 0; i < GGL_MAXCOMBINEDTEXTUREIMAGEUNITS; i++) { + *str++ = HexDigit(key->textureFormats[i] / 16); + *str++ = HexDigit(key->textureFormats[i] % 16); + *str++ = HexDigit(key->textureParameters[i] / 16); + *str++ = HexDigit(key->textureParameters[i] % 16); + } + *str++ = '\0'; } -//static const unsigned SCANLINE_KEY_STRING_LEN = 2 * sizeof(((ShaderKey *)0)->scanLineKey) + -// 3 + SHADER_KEY_STRING_LEN; +static const unsigned SCANLINE_KEY_STRING_LEN = 2 * sizeof(ShaderKey::scanLineKey) + 3 + SHADER_KEY_STRING_LEN; static char * GetScanlineKeyString(const ShaderKey * key, char * buffer, const unsigned bufferSize) { -// assert(1 == sizeof(char)); -// assert(0xff >= GGL_PIXEL_FORMAT_COUNT); -// assert(SCANLINE_KEY_STRING_LEN <= bufferSize); -// char * str = buffer; -// *str++ = 's'; -// const unsigned char * start = (const unsigned char *)&key->scanLineKey; -// const unsigned char * const end = start + sizeof(key->scanLineKey); -// for (; start < end; start++) -// { -// *str++ = HexDigit(*start / 16); -// *str++ = HexDigit(*start % 16); -// } -// GetShaderKeyString(GL_FRAGMENT_SHADER, key, str, bufferSize - (str - buffer)); -// return buffer; - return NULL; + assert(1 == sizeof(char)); + assert(0xff >= GGL_PIXEL_FORMAT_COUNT); + assert(SCANLINE_KEY_STRING_LEN <= bufferSize); + char * str = buffer; + *str++ = 's'; + const unsigned char * start = (const unsigned char *)&key->scanLineKey; + const unsigned char * const end = start + sizeof(key->scanLineKey); + for (; start < end; start++) { + *str++ = HexDigit(*start / 16); + *str++ = HexDigit(*start % 16); + } + GetShaderKeyString(GL_FRAGMENT_SHADER, key, str, bufferSize - (str - buffer)); + return buffer; +} + +#include <bcc/bcc.h> +#include <dlfcn.h> + +struct SymbolLookupContext { + const GGLContext * gglCtx; + const gl_shader_program * program; + const gl_shader * shader; +}; + +static void* SymbolLookup(void* pContext, const char* name) +{ + SymbolLookupContext * ctx = (SymbolLookupContext *)pContext; + const gl_shader * shader = ctx->shader; + const gl_shader_program * program = ctx->program; + const GGLContext * gglCtx = ctx->gglCtx; + const void * symbol = (void*)dlsym(RTLD_DEFAULT, name); + if (NULL == symbol) { + if (!strcmp(_PF2_TEXTURE_DATA_NAME_, name)) + symbol = (void *)gglCtx->textureState.textureData; + else if (!strcmp(_PF2_TEXTURE_DIMENSIONS_NAME_, name)) + symbol = (void *)gglCtx->textureState.textureDimensions; + else { + for (unsigned i = 0; i < program->Uniforms->NumUniforms && !symbol; i++) + if (!strcmp(program->Uniforms->Uniforms[i].Name, name)) + symbol = program->ValuesUniform + program->Uniforms->Uniforms[i].Pos; + for (unsigned i = 0; i < program->Attributes->NumParameters && !symbol; i++) + if (!strcmp(program->Attributes->Parameters[i].Name, name)) { + assert(program->Attributes->Parameters[i].Location + < sizeof(VertexInput) / sizeof(float[4])); + symbol = program->ValuesVertexInput + program->Attributes->Parameters[i].Location; + } + for (unsigned i = 0; i < program->Varying->NumParameters && !symbol; i++) + if (!strcmp(program->Varying->Parameters[i].Name, name)) { + int index = -1; + if (GL_VERTEX_SHADER == shader->Type) + index = program->Varying->Parameters[i].BindLocation; + else if (GL_FRAGMENT_SHADER == shader->Type) + index = program->Varying->Parameters[i].Location; + else + assert(0); + assert(index >= 0); + assert(index < sizeof(VertexOutput) / sizeof(float[4])); + symbol = program->ValuesVertexOutput + index; + } + assert(symbol >= program->ValuesVertexInput && + symbol < (char *)program->ValuesUniform + 16 * program->Uniforms->Slots - 3); + }; + } + printf("symbolLookup '%s'=%p \n", name, symbol); + //getchar(); + assert(symbol); + return (void *)symbol; +} + +static void CodeGen(Instance * instance, const char * mainName, gl_shader * shader, + gl_shader_program * program, const GGLContext * gglCtx) +{ + SymbolLookupContext ctx = {gglCtx, program, shader}; + int result = 0; + + BCCScriptRef & script = instance->script; + script = bccCreateScript(); + result = bccReadModule(script, "glsl", (LLVMModuleRef)instance->module, 0); + assert(0 == result); + result = bccRegisterSymbolCallback(script, SymbolLookup, &ctx); + assert(0 == result); + result = bccPrepareExecutable(script, NULL, 0); + + result = bccGetError(script); + if (result != 0) { + puts("failed bcc_compile"); + assert(0); + return; + } + + instance->function = (void (*)())bccGetFuncAddr(script, mainName); + assert(instance->function); + result = bccGetError(script); + if (result != BCC_NO_ERROR) + fprintf(stderr, "Could not find '%s': %d\n", "main", result); + else + printf("bcc_compile %s=%p \n", "main", shader->function); } static void ShaderUse(GGLInterface * iface, gl_shader_program * program) @@ -258,72 +377,51 @@ static void ShaderUse(GGLInterface * iface, gl_shader_program * program) return; } -// if (program->VertexProgram) -// { -// if (!program->STVP) -// { -// program->STVP = CALLOC_STRUCT(st_vertex_program); -// program->STVP->Base = *program->VertexProgram; -// st_translate_vertex_program(ctx->glCtx, program->STVP, NULL, NULL, NULL); -// } -// -// _mesa_update_shader_textures_used(program->VertexProgram); -// -// ShaderKey shaderKey; -// GetShaderKey(ctx, program->VertexProgram, &shaderKey); -// ShaderFunction_t function = NULL; -// if (!program->GLVMVP || NULL == (function = program->GLVMVP->functions[shaderKey])) -// { -// char shaderName [SHADER_KEY_STRING_LEN] = {0}; -// GetShaderKeyString(GL_VERTEX_SHADER, &shaderKey, shaderName, Elements(shaderName)); -// create_program(program->STVP->state.tokens, GALLIVM_VS, &program->GLVMVP, -// &ctx->glCtx->Shader.cpu, ctx, program->VertexProgram, -// shaderName, NULL); -// program->GLVMVP->functions[shaderKey] = program->GLVMVP->function; -// debug_printf("jit new vertex shader %p \n", program->GLVMVP->function); //getchar(); -// } -// else -// { -// program->GLVMVP->function = function; -// //debug_printf("use cached vertex shader %p \n", function); -// } -// ctx->PickRaster(iface); -// } -// if (program->FragmentProgram) -// { -// if (!program->STFP) -// { -// program->STFP = CALLOC_STRUCT(st_fragment_program); -// program->STFP->Base = *program->FragmentProgram; -// st_translate_fragment_program(ctx->glCtx, program->STFP, NULL); -// } -// -// _mesa_update_shader_textures_used(program->FragmentProgram); -// -// ShaderKey shaderKey; -// GetShaderKey(ctx, program->FragmentProgram, &shaderKey); -// ShaderFunction_t function = NULL; -// if (!program->GLVMFP || NULL == (function = program->GLVMFP->functions[shaderKey])) -// { -// char shaderName [SHADER_KEY_STRING_LEN] = {0}; -// GetShaderKeyString(GL_FRAGMENT_SHADER, &shaderKey, shaderName, Elements(shaderName)); -// -// char scanlineName [SCANLINE_KEY_STRING_LEN] = {0}; -// GetScanlineKeyString(&shaderKey, scanlineName, Elements(scanlineName)); -// create_program(program->STFP->state.tokens, GALLIVM_FS, &program->GLVMFP, -// &ctx->glCtx->Shader.cpu, ctx, program->FragmentProgram, -// shaderName, scanlineName); -// program->GLVMFP->functions[shaderKey] = program->GLVMFP->function; -// debug_printf("jit new fragment shader %p \n", program->GLVMFP->function); -// } -// else -// { -// program->GLVMFP->function = function; -// //debug_printf("use cached fragment shader %p \n", function); -// } -// ctx->PickScanLine(iface); -// } -// ctx->glCtx->CurrentProgram = program; + for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) { + if (!program->_LinkedShaders[i]) + continue; + gl_shader * shader = program->_LinkedShaders[i]; + if (!shader->executable) { + shader->executable = hieralloc_zero(shader, Executable); + shader->executable->instances = std::map<ShaderKey, Instance *>(); + } + + ShaderKey shaderKey; + GetShaderKey(ctx, shader, &shaderKey); + Instance * instance = shader->executable->instances[shaderKey]; + if (!instance) { + instance = hieralloc_zero(shader->executable, Instance); + instance->module = new llvm::Module("glsl", *ctx->llvmCtx); + + char shaderName [SHADER_KEY_STRING_LEN] = {0}; + GetShaderKeyString(shader->Type, &shaderKey, shaderName, sizeof shaderName / sizeof *shaderName); + + char mainName [SHADER_KEY_STRING_LEN + 6] = {"main"}; + strcat(mainName, shaderName); + + do_mat_op_to_vec(shader->ir); + + llvm::Module * module = glsl_ir_to_llvm_module(shader->ir, instance->module, ctx, shaderName); + if (!module) + assert(0); // ir to llvm failed + CodeGen(instance, mainName, shader, program, ctx); + shader->executable->instances[shaderKey] = instance; + debug_printf("jit new shader '%s'(%p) \n", mainName, instance->function); //getchar(); + } else + debug_printf("use cached shader %p \n", instance->function); + + + shader->function = instance->function; + + if (GL_VERTEX_SHADER == shader->Type) + ctx->PickRaster(iface); + else if (GL_FRAGMENT_SHADER == shader->Type) + ctx->PickScanLine(iface); + else + assert(0); + } + + ctx->glCtx->CurrentProgram = program; } static void ShaderProgramDelete(const GGLInterface * iface, gl_shader_program * program) @@ -368,7 +466,6 @@ static GLint ShaderUniformLocation(const GGLInterface * iface, const gl_shader_p for (unsigned i = 0; i < program->Uniforms->NumUniforms; i++) if (!strcmp(program->Uniforms->Uniforms[i].Name, name)) return program->Uniforms->Uniforms[i].Pos; -// return _mesa_get_shader_uniform_location(ctx->glCtx, program, name); return -2; } |