diff options
author | Hui Qi Tay <tayhuiqithq@gmail.com> | 2010-09-28 17:49:25 +0100 |
---|---|---|
committer | Keith Whitwell <keithw@vmware.com> | 2010-09-29 10:10:09 +0100 |
commit | 94f65d095a1365b69a041302b473e40c6ccae3c3 (patch) | |
tree | ecb69a784ca58b75b0808450bd9ff42abe0ff164 | |
parent | 6bf12c8b7366a9db8c88b9cacaa06266b41a73b5 (diff) |
draw: cliptest and viewport done in a single loop in vertex shader
Cliptesting now done at the end of vs in draw_llvm instead of
draw_pt_post_vs.
Added viewport mapping transformation and further cliptesting to
vertex shader in draw_llvm.c
Alternative path where vertex header setup, clip coordinates store,
cliptesting and viewport mapping are done earlier in the vertex
shader.
Still need to hook this up properly according to the return value of
"draw_llvm_shader" function.
-rw-r--r-- | src/gallium/auxiliary/draw/draw_llvm.c | 314 | ||||
-rw-r--r-- | src/gallium/auxiliary/draw/draw_llvm.h | 6 |
2 files changed, 293 insertions, 27 deletions
diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 5154c1f3c5..36254d359c 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -31,6 +31,9 @@ #include "draw_vs.h" #include "gallivm/lp_bld_arit.h" +#include "gallivm/lp_bld_logic.h" +#include "gallivm/lp_bld_const.h" +#include "gallivm/lp_bld_swizzle.h" #include "gallivm/lp_bld_struct.h" #include "gallivm/lp_bld_type.h" #include "gallivm/lp_bld_flow.h" @@ -549,19 +552,28 @@ static void store_aos(LLVMBuilderRef builder, LLVMValueRef io_ptr, LLVMValueRef index, - LLVMValueRef value) + LLVMValueRef value, + LLVMValueRef clipmask) { LLVMValueRef id_ptr = draw_jit_header_id(builder, io_ptr); LLVMValueRef data_ptr = draw_jit_header_data(builder, io_ptr); LLVMValueRef indices[3]; + LLVMValueRef val, shift; indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); indices[1] = index; indices[2] = LLVMConstInt(LLVMInt32Type(), 0, 0); - /* undefined vertex */ - LLVMBuildStore(builder, LLVMConstInt(LLVMInt32Type(), - 0xffff, 0), id_ptr); + /* initialize vertex id:16 = 0xffff, pad:3 = 0, edgeflag:1 = 1 */ + val = LLVMConstInt(LLVMInt32Type(), 0xffff1, 0); + shift = LLVMConstInt(LLVMInt32Type(), 12, 0); + val = LLVMBuildShl(builder, val, shift, ""); + /* add clipmask:12 */ + val = LLVMBuildOr(builder, val, clipmask, ""); + + /* store vertex header */ + LLVMBuildStore(builder, val, id_ptr); + #if DEBUG_STORE lp_build_printf(builder, " ---- %p storing attribute %d (io = %p)\n", data_ptr, index, io_ptr); @@ -616,7 +628,8 @@ store_aos_array(LLVMBuilderRef builder, LLVMValueRef io_ptr, LLVMValueRef aos[NUM_CHANNELS], int attrib, - int num_outputs) + int num_outputs, + LLVMValueRef clipmask) { LLVMValueRef attr_index = LLVMConstInt(LLVMInt32Type(), attrib, 0); LLVMValueRef ind0 = LLVMConstInt(LLVMInt32Type(), 0, 0); @@ -624,7 +637,8 @@ store_aos_array(LLVMBuilderRef builder, LLVMValueRef ind2 = LLVMConstInt(LLVMInt32Type(), 2, 0); LLVMValueRef ind3 = LLVMConstInt(LLVMInt32Type(), 3, 0); LLVMValueRef io0_ptr, io1_ptr, io2_ptr, io3_ptr; - + LLVMValueRef clipmask0, clipmask1, clipmask2, clipmask3; + debug_assert(NUM_CHANNELS == 4); io0_ptr = LLVMBuildGEP(builder, io_ptr, @@ -636,21 +650,31 @@ store_aos_array(LLVMBuilderRef builder, io3_ptr = LLVMBuildGEP(builder, io_ptr, &ind3, 1, ""); + clipmask0 = LLVMBuildExtractElement(builder, clipmask, + ind0, ""); + clipmask1 = LLVMBuildExtractElement(builder, clipmask, + ind1, ""); + clipmask2 = LLVMBuildExtractElement(builder, clipmask, + ind2, ""); + clipmask3 = LLVMBuildExtractElement(builder, clipmask, + ind3, ""); + #if DEBUG_STORE - lp_build_printf(builder, " io = %p, indexes[%d, %d, %d, %d]\n", - io_ptr, ind0, ind1, ind2, ind3); + lp_build_printf(builder, "io = %p, indexes[%d, %d, %d, %d]\n, clipmask0 = %x, clipmask1 = %x, clipmask2 = %x, clipmask3 = %x\n", + io_ptr, ind0, ind1, ind2, ind3, clipmask0, clipmask1, clipmask2, clipmask3); #endif - - store_aos(builder, io0_ptr, attr_index, aos[0]); - store_aos(builder, io1_ptr, attr_index, aos[1]); - store_aos(builder, io2_ptr, attr_index, aos[2]); - store_aos(builder, io3_ptr, attr_index, aos[3]); + /* store for each of the 4 vertices */ + store_aos(builder, io0_ptr, attr_index, aos[0], clipmask0); + store_aos(builder, io1_ptr, attr_index, aos[1], clipmask1); + store_aos(builder, io2_ptr, attr_index, aos[2], clipmask2); + store_aos(builder, io3_ptr, attr_index, aos[3], clipmask3); } static void convert_to_aos(LLVMBuilderRef builder, LLVMValueRef io, LLVMValueRef (*outputs)[NUM_CHANNELS], + LLVMValueRef clipmask, int num_outputs, int max_vertices) { @@ -679,13 +703,214 @@ convert_to_aos(LLVMBuilderRef builder, io, aos, attrib, - num_outputs); + num_outputs, + clipmask); } #if DEBUG_STORE lp_build_printf(builder, " # storing end\n"); #endif } +/* + * Stores original vertex positions in clip coordinates + * There is probably a more efficient way to do this, 4 floats at once + * rather than extracting each element one by one. + */ +static void +store_clip(LLVMBuilderRef builder, + LLVMValueRef io_ptr, + LLVMValueRef (*outputs)[NUM_CHANNELS]) +{ + LLVMValueRef out[4]; + LLVMValueRef indices[2]; + LLVMValueRef io0_ptr, io1_ptr, io2_ptr, io3_ptr; + LLVMValueRef clip_ptr0, clip_ptr1, clip_ptr2, clip_ptr3; + LLVMValueRef clip0_ptr, clip1_ptr, clip2_ptr, clip3_ptr; + LLVMValueRef out0elem, out1elem, out2elem, out3elem; + + LLVMValueRef ind0 = LLVMConstInt(LLVMInt32Type(), 0, 0); + LLVMValueRef ind1 = LLVMConstInt(LLVMInt32Type(), 1, 0); + LLVMValueRef ind2 = LLVMConstInt(LLVMInt32Type(), 2, 0); + LLVMValueRef ind3 = LLVMConstInt(LLVMInt32Type(), 3, 0); + + indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); + indices[1] = LLVMConstInt(LLVMInt32Type(), 0, 0); + + out[0] = LLVMBuildLoad(builder, outputs[0][0], ""); /*x0 y0 z0 w0*/ + out[1] = LLVMBuildLoad(builder, outputs[0][1], ""); /*x1 y1 z1 w1*/ + out[2] = LLVMBuildLoad(builder, outputs[0][2], ""); /*x2 y2 z2 w2*/ + out[3] = LLVMBuildLoad(builder, outputs[0][3], ""); /*x3 y3 z3 w3*/ + + io0_ptr = LLVMBuildGEP(builder, io_ptr, &ind0, 1, ""); + io1_ptr = LLVMBuildGEP(builder, io_ptr, &ind1, 1, ""); + io2_ptr = LLVMBuildGEP(builder, io_ptr, &ind2, 1, ""); + io3_ptr = LLVMBuildGEP(builder, io_ptr, &ind3, 1, ""); + + clip_ptr0 = draw_jit_header_clip(builder, io0_ptr); + clip_ptr1 = draw_jit_header_clip(builder, io1_ptr); + clip_ptr2 = draw_jit_header_clip(builder, io2_ptr); + clip_ptr3 = draw_jit_header_clip(builder, io3_ptr); + + for (int i = 0; i<4; i++){ + clip0_ptr = LLVMBuildGEP(builder, clip_ptr0, + indices, 2, ""); //x1 + clip1_ptr = LLVMBuildGEP(builder, clip_ptr1, + indices, 2, ""); //y1 + clip2_ptr = LLVMBuildGEP(builder, clip_ptr2, + indices, 2, ""); //z1 + clip3_ptr = LLVMBuildGEP(builder, clip_ptr3, + indices, 2, ""); //w1 + + out0elem = LLVMBuildExtractElement(builder, out[0], + indices[1], ""); //x1 + out1elem = LLVMBuildExtractElement(builder, out[1], + indices[1], ""); //y1 + out2elem = LLVMBuildExtractElement(builder, out[2], + indices[1], ""); //z1 + out3elem = LLVMBuildExtractElement(builder, out[3], + indices[1], ""); //w1 + + LLVMBuildStore(builder, out0elem, clip0_ptr); + LLVMBuildStore(builder, out1elem, clip1_ptr); + LLVMBuildStore(builder, out2elem, clip2_ptr); + LLVMBuildStore(builder, out3elem, clip3_ptr); + + indices[1]= LLVMBuildAdd(builder, indices[1], ind1, ""); + } + +} + +/* + * Transforms the outputs for viewport mapping + */ +static void +generate_viewport(struct draw_llvm *llvm, + LLVMBuilderRef builder, + LLVMValueRef (*outputs)[NUM_CHANNELS]) +{ + int i; + const float *scaleA = llvm->draw->viewport.scale; + const float *transA = llvm->draw->viewport.translate; + struct lp_type f32_type = lp_type_float_vec(32); + LLVMValueRef out3 = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 w2 w3*/ + LLVMValueRef const1 = lp_build_const_vec(f32_type, 1.0); /*1.0 1.0 1.0 1.0*/ + + /* for 1/w convention*/ + out3 = LLVMBuildFDiv(builder, const1, out3, ""); + + /* Viewport Mapping */ + for (i=0; i<4; i++){ + LLVMValueRef out = LLVMBuildLoad(builder, outputs[0][i], ""); /*x0 x1 x2 x3*/ + LLVMValueRef scale = lp_build_const_vec(f32_type, scaleA[i]); /*sx sx sx sx*/ + LLVMValueRef trans = lp_build_const_vec(f32_type, transA[i]); /*tx tx tx tx*/ + + /* divide by w */ + out = LLVMBuildMul(builder, out, out3, ""); + /* mult by scale */ + out = LLVMBuildMul(builder, out, scale, ""); + /* add translation */ + out = LLVMBuildAdd(builder, out, trans, ""); + + /* store transformed outputs */ + LLVMBuildStore(builder, out, outputs[0][i]); + } + +} + +/* + * Returns clipmask as 4xi32 bitmask for the 4 vertices + */ +static LLVMValueRef +generate_clipmask(LLVMBuilderRef builder, + LLVMValueRef (*outputs)[NUM_CHANNELS]) +{ + LLVMValueRef mask; /* stores the <4xi32> clipmasks */ + LLVMValueRef test, temp; + LLVMValueRef zero, shift; + LLVMValueRef pos_x, pos_y, pos_z, pos_w; + + struct lp_type f32_type = lp_type_float_vec(32); + + zero = lp_build_const_vec(f32_type, 0); /* 0.0f 0.0f 0.0f 0.0f */ + shift = lp_build_const_int_vec(lp_type_int_vec(32), 1); /* 1 1 1 1 */ + + /* Assuming position stored at output[0] */ + pos_x = LLVMBuildLoad(builder, outputs[0][0], ""); /*x0 x1 x2 x3*/ + pos_y = LLVMBuildLoad(builder, outputs[0][1], ""); /*y0 y1 y2 y3*/ + pos_z = LLVMBuildLoad(builder, outputs[0][2], ""); /*z0 z1 z2 z3*/ + pos_w = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 w2 w3*/ + + /* Cliptest, for hardwired planes */ + /* plane 1 */ + test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, pos_x , pos_w); + temp = shift; + test = LLVMBuildAnd(builder, test, temp, ""); + mask = test; + + /* plane 2 */ + test = LLVMBuildFAdd(builder, pos_x, pos_w, ""); + test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, test); + temp = LLVMBuildShl(builder, temp, shift, ""); + test = LLVMBuildAnd(builder, test, temp, ""); + mask = LLVMBuildOr(builder, mask, test, ""); + + /* plane 3 */ + test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, pos_y, pos_w); + temp = LLVMBuildShl(builder, temp, shift, ""); + test = LLVMBuildAnd(builder, test, temp, ""); + mask = LLVMBuildOr(builder, mask, test, ""); + + /* plane 4 */ + test = LLVMBuildFAdd(builder, pos_y, pos_w, ""); + test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, test); + temp = LLVMBuildShl(builder, temp, shift, ""); + test = LLVMBuildAnd(builder, test, temp, ""); + mask = LLVMBuildOr(builder, mask, test, ""); + + /* plane 5 */ + test = LLVMBuildFAdd(builder, pos_z, pos_w, ""); + test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, test); + temp = LLVMBuildShl(builder, temp, shift, ""); + test = LLVMBuildAnd(builder, test, temp, ""); + mask = LLVMBuildOr(builder, mask, test, ""); + + /* plane 6 */ + test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, pos_z, pos_w); + temp = LLVMBuildShl(builder, temp, shift, ""); + test = LLVMBuildAnd(builder, test, temp, ""); + mask = LLVMBuildOr(builder, mask, test, ""); + + return mask; + +} + +/* + * Returns boolean if any clipping has occurred + * Used zero/non-zero i32 value to represent boolean + */ +static void +clipmask_bool(LLVMBuilderRef builder, + LLVMValueRef clipmask, + LLVMValueRef ret_ptr) +{ + LLVMValueRef ret = LLVMBuildLoad(builder, ret_ptr, ""); + LLVMValueRef temp; + int i; + + LLVMDumpValue(clipmask); + + for (i=0; i<4; i++){ + temp = LLVMBuildExtractElement(builder, clipmask, + LLVMConstInt(LLVMInt32Type(), i, 0) , ""); + ret = LLVMBuildOr(builder, ret, temp, ""); + LLVMDumpValue(ret); + } + + LLVMBuildStore(builder, ret, ret_ptr); + LLVMDumpValue(ret_ptr); + +} + static void draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) { @@ -706,6 +931,8 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) void *code; struct lp_build_sampler_soa *sampler = 0; + LLVMValueRef ret, ret_ptr; + arg_types[0] = llvm->context_ptr_type; /* context */ arg_types[1] = llvm->vertex_header_ptr_type; /* vertex_header */ arg_types[2] = llvm->buffer_ptr_type; /* vbuffers */ @@ -715,7 +942,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) arg_types[6] = llvm->vb_ptr_type; /* pipe_vertex_buffer's */ arg_types[7] = LLVMInt32Type(); /* instance_id */ - func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); + func_type = LLVMFunctionType(LLVMInt32Type(), arg_types, Elements(arg_types), 0); variant->function = LLVMAddFunction(llvm->module, "draw_llvm_shader", func_type); LLVMSetFunctionCallConv(variant->function, LLVMCCallConv); @@ -755,6 +982,10 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0); + /* function will return non-zero i32 value if any clipped vertices */ + ret_ptr = lp_build_alloca(builder, LLVMInt32Type(), ""); + LLVMBuildStore(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), ret_ptr); + /* code generated texture sampling */ sampler = draw_llvm_sampler_soa_create( draw_llvm_variant_key_samplers(&variant->key), @@ -769,6 +1000,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS] = { { 0 } }; LLVMValueRef io; + LLVMValueRef clipmask; /* holds the clipmask value */ const LLVMValueRef (*ptr_aos)[NUM_CHANNELS]; io_itr = LLVMBuildSub(builder, lp_loop.counter, start, ""); @@ -805,10 +1037,22 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) context_ptr, sampler); - convert_to_aos(builder, io, outputs, + /* store original positions in clip before further manipulation */ + store_clip(builder, io, outputs); + + /* allocate clipmask, assign it integer type */ + clipmask = generate_clipmask(builder, outputs); + clipmask_bool(builder, clipmask, ret_ptr); + + /* do viewport mapping */ + generate_viewport(llvm, builder, outputs); + + /* store clipmask in vertex header and positions in data */ + convert_to_aos(builder, io, outputs, clipmask, draw->vs.vertex_shader->info.num_outputs, max_vertices); } + lp_build_loop_end_cond(builder, end, step, LLVMIntUGE, &lp_loop); sampler->destroy(sampler); @@ -818,8 +1062,9 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0); #endif - LLVMBuildRetVoid(builder); - + ret = LLVMBuildLoad(builder, ret_ptr,""); + LLVMBuildRet(builder, ret); + LLVMDisposeBuilder(builder); /* @@ -869,6 +1114,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian LLVMValueRef fetch_max; void *code; struct lp_build_sampler_soa *sampler = 0; + LLVMValueRef ret, ret_ptr; arg_types[0] = llvm->context_ptr_type; /* context */ arg_types[1] = llvm->vertex_header_ptr_type; /* vertex_header */ @@ -879,10 +1125,9 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian arg_types[6] = llvm->vb_ptr_type; /* pipe_vertex_buffer's */ arg_types[7] = LLVMInt32Type(); /* instance_id */ - func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); + func_type = LLVMFunctionType(LLVMInt32Type(), arg_types, Elements(arg_types), 0); - variant->function_elts = LLVMAddFunction(llvm->module, "draw_llvm_shader_elts", - func_type); + variant->function_elts = LLVMAddFunction(llvm->module, "draw_llvm_shader_elts", func_type); LLVMSetFunctionCallConv(variant->function_elts, LLVMCCallConv); for(i = 0; i < Elements(arg_types); ++i) if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) @@ -928,11 +1173,16 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian LLVMConstInt(LLVMInt32Type(), 1, 0), "fetch_max"); + /* function returns non-zero i32 value if any clipped vertices */ + ret_ptr = lp_build_alloca(builder, LLVMInt32Type(), ""); + LLVMBuildStore(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), ret_ptr); + lp_build_loop_begin(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), &lp_loop); { LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS] = { { 0 } }; LLVMValueRef io; + LLVMValueRef clipmask; /* holds the clipmask value */ const LLVMValueRef (*ptr_aos)[NUM_CHANNELS]; io_itr = lp_loop.counter; @@ -979,10 +1229,25 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian context_ptr, sampler); - convert_to_aos(builder, io, outputs, + /* store original positions in clip before further manipulation */ + store_clip(builder, io, outputs); + + /* allocate clipmask, assign it integer type */ + clipmask = generate_clipmask(builder, outputs); + clipmask_bool(builder, clipmask, ret_ptr); + + /* do viewport mapping */ + generate_viewport(llvm, builder, outputs); + + /* store clipmask in vertex header, + * original positions in clip + * and transformed positions in data + */ + convert_to_aos(builder, io, outputs, clipmask, draw->vs.vertex_shader->info.num_outputs, max_vertices); } + lp_build_loop_end_cond(builder, fetch_count, step, LLVMIntUGE, &lp_loop); sampler->destroy(sampler); @@ -992,8 +1257,9 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0); #endif - LLVMBuildRetVoid(builder); - + ret = LLVMBuildLoad(builder, ret_ptr,""); + LLVMBuildRet(builder, ret); + LLVMDisposeBuilder(builder); /* diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h index b881ef6113..1142ea51cf 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.h +++ b/src/gallium/auxiliary/draw/draw_llvm.h @@ -120,7 +120,7 @@ struct draw_jit_context lp_build_struct_get_ptr(_builder, _ptr, 0, "id") #define draw_jit_header_clip(_builder, _ptr) \ - lp_build_struct_get(_builder, _ptr, 1, "clip") + lp_build_struct_get_ptr(_builder, _ptr, 1, "clip") #define draw_jit_header_data(_builder, _ptr) \ lp_build_struct_get_ptr(_builder, _ptr, 2, "data") @@ -136,7 +136,7 @@ struct draw_jit_context lp_build_struct_get(_builder, _ptr, 2, "buffer_offset") -typedef void +typedef int (*draw_jit_vert_func)(struct draw_jit_context *context, struct vertex_header *io, const char *vbuffers[PIPE_MAX_ATTRIBS], @@ -147,7 +147,7 @@ typedef void unsigned instance_id); -typedef void +typedef int (*draw_jit_vert_func_elts)(struct draw_jit_context *context, struct vertex_header *io, const char *vbuffers[PIPE_MAX_ATTRIBS], |