diff options
Diffstat (limited to 'src/gallium/auxiliary')
23 files changed, 935 insertions, 537 deletions
diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index d6c16d37b2..eeeed9813f 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -398,9 +398,9 @@ aaline_create_texture(struct aaline_stage *aaline) texTemp.target = PIPE_TEXTURE_2D; texTemp.format = PIPE_FORMAT_A8_UNORM; /* XXX verify supported by driver! */ texTemp.last_level = MAX_TEXTURE_LEVEL; - texTemp.width[0] = 1 << MAX_TEXTURE_LEVEL; - texTemp.height[0] = 1 << MAX_TEXTURE_LEVEL; - texTemp.depth[0] = 1; + texTemp.width0 = 1 << MAX_TEXTURE_LEVEL; + texTemp.height0 = 1 << MAX_TEXTURE_LEVEL; + texTemp.depth0 = 1; pf_get_block(texTemp.format, &texTemp.block); aaline->texture = screen->texture_create(screen, &texTemp); @@ -413,11 +413,11 @@ aaline_create_texture(struct aaline_stage *aaline) */ for (level = 0; level <= MAX_TEXTURE_LEVEL; level++) { struct pipe_transfer *transfer; - const uint size = aaline->texture->width[level]; + const uint size = u_minify(aaline->texture->width0, level); ubyte *data; uint i, j; - assert(aaline->texture->width[level] == aaline->texture->height[level]); + assert(aaline->texture->width0 == aaline->texture->height0); /* This texture is new, no need to flush. */ diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index 283502cdf3..27d89721b1 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -427,9 +427,9 @@ pstip_create_texture(struct pstip_stage *pstip) texTemp.target = PIPE_TEXTURE_2D; texTemp.format = PIPE_FORMAT_A8_UNORM; /* XXX verify supported by driver! */ texTemp.last_level = 0; - texTemp.width[0] = 32; - texTemp.height[0] = 32; - texTemp.depth[0] = 1; + texTemp.width0 = 32; + texTemp.height0 = 32; + texTemp.depth0 = 1; pf_get_block(texTemp.format, &texTemp.block); pstip->texture = screen->texture_create(screen, &texTemp); diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index dbb5ac7182..4865a2d854 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -192,7 +192,8 @@ draw_print_arrays(struct draw_context *draw, uint prim, int start, uint count) prim, start, count); for (i = 0; i < count; i++) { - uint ii, j; + uint ii = 0; + uint j; if (draw->pt.user.elts) { /* indexed arrays */ diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c index d3f179ced1..757c487454 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache.c +++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c @@ -346,7 +346,8 @@ vcache_check_run( struct draw_pt_front_end *frontend, vcache->fetch_max, draw_count); - if (max_index == 0xffffffff || + if (max_index >= DRAW_PIPE_MAX_VERTICES || + fetch_count >= UNDEFINED_VERTEX_ID || fetch_count > draw_count) { if (0) debug_printf("fail\n"); goto fail; diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c index 2ef4293d4d..2f973684f6 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2007-2009 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -18,7 +18,7 @@ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @@ -80,11 +80,27 @@ struct fenced_buffer_list */ struct fenced_buffer { + /* + * Immutable members. + */ + struct pb_buffer base; - struct pb_buffer *buffer; + struct fenced_buffer_list *list; + + /** + * Protected by fenced_buffer_list::mutex + */ + struct list_head head; - /* FIXME: protect access with mutex */ + /** + * Following members are mutable and protected by this mutex. + * + * You may lock this mutex alone, or lock it with fenced_buffer_list::mutex + * held, but in order to prevent deadlocks you must never lock + * fenced_buffer_list::mutex with this mutex held. + */ + pipe_mutex mutex; /** * A bitmask of PIPE_BUFFER_USAGE_CPU/GPU_READ/WRITE describing the current @@ -96,9 +112,6 @@ struct fenced_buffer struct pb_validate *vl; unsigned validation_flags; struct pipe_fence_handle *fence; - - struct list_head head; - struct fenced_buffer_list *list; }; @@ -110,15 +123,24 @@ fenced_buffer(struct pb_buffer *buf) } +/** + * Add the buffer to the fenced list. + * + * fenced_buffer_list::mutex and fenced_buffer::mutex must be held, in this + * order before calling this function. + * + * Reference count should be incremented before calling this function. + */ static INLINE void -_fenced_buffer_add(struct fenced_buffer *fenced_buf) +fenced_buffer_add_locked(struct fenced_buffer_list *fenced_list, + struct fenced_buffer *fenced_buf) { - struct fenced_buffer_list *fenced_list = fenced_buf->list; - assert(pipe_is_referenced(&fenced_buf->base.base.reference)); assert(fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE); assert(fenced_buf->fence); + /* TODO: Move the reference count increment here */ + #ifdef DEBUG LIST_DEL(&fenced_buf->head); assert(fenced_list->numUnfenced); @@ -130,32 +152,16 @@ _fenced_buffer_add(struct fenced_buffer *fenced_buf) /** - * Actually destroy the buffer. + * Remove the buffer from the fenced list. + * + * fenced_buffer_list::mutex and fenced_buffer::mutex must be held, in this + * order before calling this function. + * + * Reference count should be decremented after calling this function. */ static INLINE void -_fenced_buffer_destroy(struct fenced_buffer *fenced_buf) -{ - struct fenced_buffer_list *fenced_list = fenced_buf->list; - - assert(!pipe_is_referenced(&fenced_buf->base.base.reference)); - assert(!fenced_buf->fence); -#ifdef DEBUG - assert(fenced_buf->head.prev); - assert(fenced_buf->head.next); - LIST_DEL(&fenced_buf->head); - assert(fenced_list->numUnfenced); - --fenced_list->numUnfenced; -#else - (void)fenced_list; -#endif - pb_reference(&fenced_buf->buffer, NULL); - FREE(fenced_buf); -} - - -static INLINE void -_fenced_buffer_remove(struct fenced_buffer_list *fenced_list, - struct fenced_buffer *fenced_buf) +fenced_buffer_remove_locked(struct fenced_buffer_list *fenced_list, + struct fenced_buffer *fenced_buf) { struct pb_fence_ops *ops = fenced_list->ops; @@ -177,37 +183,53 @@ _fenced_buffer_remove(struct fenced_buffer_list *fenced_list, ++fenced_list->numUnfenced; #endif - /** - * FIXME!!! - */ - - if(!pipe_is_referenced(&fenced_buf->base.base.reference)) - _fenced_buffer_destroy(fenced_buf); + /* TODO: Move the reference count decrement and destruction here */ } +/** + * Wait for the fence to expire, and remove it from the fenced list. + * + * fenced_buffer::mutex must be held. fenced_buffer_list::mutex must not be + * held -- it will + */ static INLINE enum pipe_error -_fenced_buffer_finish(struct fenced_buffer *fenced_buf) +fenced_buffer_finish_locked(struct fenced_buffer_list *fenced_list, + struct fenced_buffer *fenced_buf) { - struct fenced_buffer_list *fenced_list = fenced_buf->list; struct pb_fence_ops *ops = fenced_list->ops; + enum pipe_error ret = PIPE_ERROR; #if 0 debug_warning("waiting for GPU"); #endif + assert(pipe_is_referenced(&fenced_buf->base.base.reference)); assert(fenced_buf->fence); + + /* Acquire the global lock */ + pipe_mutex_unlock(fenced_buf->mutex); + pipe_mutex_lock(fenced_list->mutex); + pipe_mutex_lock(fenced_buf->mutex); + if(fenced_buf->fence) { - if(ops->fence_finish(ops, fenced_buf->fence, 0) != 0) { - return PIPE_ERROR; + if(ops->fence_finish(ops, fenced_buf->fence, 0) == 0) { + /* Remove from the fenced list */ + /* TODO: remove consequents */ + fenced_buffer_remove_locked(fenced_list, fenced_buf); + + p_atomic_dec(&fenced_buf->base.base.reference.count); + assert(pipe_is_referenced(&fenced_buf->base.base.reference)); + + fenced_buf->flags &= ~PIPE_BUFFER_USAGE_GPU_READ_WRITE; + + ret = PIPE_OK; } - /* Remove from the fenced list */ - /* TODO: remove consequents */ - _fenced_buffer_remove(fenced_list, fenced_buf); } - fenced_buf->flags &= ~PIPE_BUFFER_USAGE_GPU_READ_WRITE; - return PIPE_OK; + pipe_mutex_unlock(fenced_list->mutex); + + return ret; } @@ -215,7 +237,7 @@ _fenced_buffer_finish(struct fenced_buffer *fenced_buf) * Free as many fenced buffers from the list head as possible. */ static void -_fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list, +fenced_buffer_list_check_free_locked(struct fenced_buffer_list *fenced_list, int wait) { struct pb_fence_ops *ops = fenced_list->ops; @@ -228,21 +250,28 @@ _fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list, while(curr != &fenced_list->delayed) { fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); + pipe_mutex_lock(fenced_buf->mutex); + if(fenced_buf->fence != prev_fence) { int signaled; if (wait) signaled = ops->fence_finish(ops, fenced_buf->fence, 0); else signaled = ops->fence_signalled(ops, fenced_buf->fence, 0); - if (signaled != 0) + if (signaled != 0) { + pipe_mutex_unlock(fenced_buf->mutex); break; + } prev_fence = fenced_buf->fence; } else { assert(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0); } - _fenced_buffer_remove(fenced_list, fenced_buf); + fenced_buffer_remove_locked(fenced_list, fenced_buf); + pipe_mutex_unlock(fenced_buf->mutex); + + pb_reference((struct pb_buffer **)&fenced_buf, NULL); curr = next; next = curr->next; @@ -256,30 +285,25 @@ fenced_buffer_destroy(struct pb_buffer *buf) struct fenced_buffer *fenced_buf = fenced_buffer(buf); struct fenced_buffer_list *fenced_list = fenced_buf->list; - pipe_mutex_lock(fenced_list->mutex); assert(!pipe_is_referenced(&fenced_buf->base.base.reference)); - if (fenced_buf->fence) { - struct pb_fence_ops *ops = fenced_list->ops; - if(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0) { - struct list_head *curr, *prev; - curr = &fenced_buf->head; - prev = curr->prev; - do { - fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); - assert(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0); - _fenced_buffer_remove(fenced_list, fenced_buf); - curr = prev; - prev = curr->prev; - } while (curr != &fenced_list->delayed); - } - else { - /* delay destruction */ - } - } - else { - _fenced_buffer_destroy(fenced_buf); - } + assert(!fenced_buf->fence); + +#ifdef DEBUG + pipe_mutex_lock(fenced_list->mutex); + assert(fenced_buf->head.prev); + assert(fenced_buf->head.next); + LIST_DEL(&fenced_buf->head); + assert(fenced_list->numUnfenced); + --fenced_list->numUnfenced; pipe_mutex_unlock(fenced_list->mutex); +#else + (void)fenced_list; +#endif + + pb_reference(&fenced_buf->buffer, NULL); + + pipe_mutex_destroy(fenced_buf->mutex); + FREE(fenced_buf); } @@ -290,24 +314,23 @@ fenced_buffer_map(struct pb_buffer *buf, struct fenced_buffer *fenced_buf = fenced_buffer(buf); struct fenced_buffer_list *fenced_list = fenced_buf->list; struct pb_fence_ops *ops = fenced_list->ops; - void *map; + void *map = NULL; + + pipe_mutex_lock(fenced_buf->mutex); assert(!(flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE)); /* Serialize writes */ if((fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_WRITE) || ((fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_READ) && (flags & PIPE_BUFFER_USAGE_CPU_WRITE))) { - if(flags & PIPE_BUFFER_USAGE_DONTBLOCK) { + if((flags & PIPE_BUFFER_USAGE_DONTBLOCK) && + ops->fence_signalled(ops, fenced_buf->fence, 0) == 0) { /* Don't wait for the GPU to finish writing */ - if(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0) - _fenced_buffer_remove(fenced_list, fenced_buf); - else - return NULL; - } - else { - /* Wait for the GPU to finish writing */ - _fenced_buffer_finish(fenced_buf); + goto finish; } + + /* Wait for the GPU to finish writing */ + fenced_buffer_finish_locked(fenced_list, fenced_buf); } #if 0 @@ -324,6 +347,9 @@ fenced_buffer_map(struct pb_buffer *buf, fenced_buf->flags |= flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE; } +finish: + pipe_mutex_unlock(fenced_buf->mutex); + return map; } @@ -332,6 +358,9 @@ static void fenced_buffer_unmap(struct pb_buffer *buf) { struct fenced_buffer *fenced_buf = fenced_buffer(buf); + + pipe_mutex_lock(fenced_buf->mutex); + assert(fenced_buf->mapcount); if(fenced_buf->mapcount) { pb_unmap(fenced_buf->buffer); @@ -339,6 +368,8 @@ fenced_buffer_unmap(struct pb_buffer *buf) if(!fenced_buf->mapcount) fenced_buf->flags &= ~PIPE_BUFFER_USAGE_CPU_READ_WRITE; } + + pipe_mutex_unlock(fenced_buf->mutex); } @@ -350,11 +381,14 @@ fenced_buffer_validate(struct pb_buffer *buf, struct fenced_buffer *fenced_buf = fenced_buffer(buf); enum pipe_error ret; + pipe_mutex_lock(fenced_buf->mutex); + if(!vl) { /* invalidate */ fenced_buf->vl = NULL; fenced_buf->validation_flags = 0; - return PIPE_OK; + ret = PIPE_OK; + goto finish; } assert(flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE); @@ -362,14 +396,17 @@ fenced_buffer_validate(struct pb_buffer *buf, flags &= PIPE_BUFFER_USAGE_GPU_READ_WRITE; /* Buffer cannot be validated in two different lists */ - if(fenced_buf->vl && fenced_buf->vl != vl) - return PIPE_ERROR_RETRY; + if(fenced_buf->vl && fenced_buf->vl != vl) { + ret = PIPE_ERROR_RETRY; + goto finish; + } #if 0 /* Do not validate if buffer is still mapped */ if(fenced_buf->flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE) { /* TODO: wait for the thread that mapped the buffer to unmap it */ - return PIPE_ERROR_RETRY; + ret = PIPE_ERROR_RETRY; + goto finish; } /* Final sanity checking */ assert(!(fenced_buf->flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE)); @@ -379,17 +416,21 @@ fenced_buffer_validate(struct pb_buffer *buf, if(fenced_buf->vl == vl && (fenced_buf->validation_flags & flags) == flags) { /* Nothing to do -- buffer already validated */ - return PIPE_OK; + ret = PIPE_OK; + goto finish; } ret = pb_validate(fenced_buf->buffer, vl, flags); if (ret != PIPE_OK) - return ret; + goto finish; fenced_buf->vl = vl; fenced_buf->validation_flags |= flags; - return PIPE_OK; +finish: + pipe_mutex_unlock(fenced_buf->mutex); + + return ret; } @@ -404,29 +445,36 @@ fenced_buffer_fence(struct pb_buffer *buf, fenced_buf = fenced_buffer(buf); fenced_list = fenced_buf->list; ops = fenced_list->ops; - - if(fence == fenced_buf->fence) { - /* Nothing to do */ - return; - } - assert(fenced_buf->vl); - assert(fenced_buf->validation_flags); - pipe_mutex_lock(fenced_list->mutex); - if (fenced_buf->fence) - _fenced_buffer_remove(fenced_list, fenced_buf); - if (fence) { - ops->fence_reference(ops, &fenced_buf->fence, fence); - fenced_buf->flags |= fenced_buf->validation_flags; - _fenced_buffer_add(fenced_buf); - } - pipe_mutex_unlock(fenced_list->mutex); + pipe_mutex_lock(fenced_buf->mutex); + + assert(pipe_is_referenced(&fenced_buf->base.base.reference)); + + if(fence != fenced_buf->fence) { + assert(fenced_buf->vl); + assert(fenced_buf->validation_flags); + + if (fenced_buf->fence) { + fenced_buffer_remove_locked(fenced_list, fenced_buf); + p_atomic_dec(&fenced_buf->base.base.reference.count); + assert(pipe_is_referenced(&fenced_buf->base.base.reference)); + } + if (fence) { + ops->fence_reference(ops, &fenced_buf->fence, fence); + fenced_buf->flags |= fenced_buf->validation_flags; + p_atomic_inc(&fenced_buf->base.base.reference.count); + fenced_buffer_add_locked(fenced_list, fenced_buf); + } + + pb_fence(fenced_buf->buffer, fence); - pb_fence(fenced_buf->buffer, fence); + fenced_buf->vl = NULL; + fenced_buf->validation_flags = 0; + } - fenced_buf->vl = NULL; - fenced_buf->validation_flags = 0; + pipe_mutex_unlock(fenced_buf->mutex); + pipe_mutex_unlock(fenced_list->mutex); } @@ -436,6 +484,7 @@ fenced_buffer_get_base_buffer(struct pb_buffer *buf, pb_size *offset) { struct fenced_buffer *fenced_buf = fenced_buffer(buf); + /* NOTE: accesses immutable members only -- mutex not necessary */ pb_get_base_buffer(fenced_buf->buffer, base_buf, offset); } @@ -475,6 +524,8 @@ fenced_buffer_create(struct fenced_buffer_list *fenced_list, buf->buffer = buffer; buf->list = fenced_list; + pipe_mutex_init(buf->mutex); + #ifdef DEBUG pipe_mutex_lock(fenced_list->mutex); LIST_ADDTAIL(&buf->head, &fenced_list->unfenced); @@ -516,7 +567,7 @@ fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list, int wait) { pipe_mutex_lock(fenced_list->mutex); - _fenced_buffer_list_check_free(fenced_list, wait); + fenced_buffer_list_check_free_locked(fenced_list, wait); pipe_mutex_unlock(fenced_list->mutex); } @@ -538,11 +589,13 @@ fenced_buffer_list_dump(struct fenced_buffer_list *fenced_list) next = curr->next; while(curr != &fenced_list->unfenced) { fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); + pipe_mutex_lock(fenced_buf->mutex); assert(!fenced_buf->fence); debug_printf("%10p %7u %7u\n", (void *) fenced_buf, fenced_buf->base.base.size, p_atomic_read(&fenced_buf->base.base.reference.count)); + pipe_mutex_unlock(fenced_buf->mutex); curr = next; next = curr->next; } @@ -552,6 +605,7 @@ fenced_buffer_list_dump(struct fenced_buffer_list *fenced_list) while(curr != &fenced_list->delayed) { int signaled; fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); + pipe_mutex_lock(fenced_buf->mutex); signaled = ops->fence_signalled(ops, fenced_buf->fence, 0); debug_printf("%10p %7u %7u %10p %s\n", (void *) fenced_buf, @@ -559,6 +613,7 @@ fenced_buffer_list_dump(struct fenced_buffer_list *fenced_list) p_atomic_read(&fenced_buf->base.base.reference.count), (void *) fenced_buf->fence, signaled == 0 ? "y" : "n"); + pipe_mutex_unlock(fenced_buf->mutex); curr = next; next = curr->next; } @@ -579,8 +634,8 @@ fenced_buffer_list_destroy(struct fenced_buffer_list *fenced_list) #if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) sched_yield(); #endif - _fenced_buffer_list_check_free(fenced_list, 1); pipe_mutex_lock(fenced_list->mutex); + fenced_buffer_list_check_free_locked(fenced_list, 1); } #ifdef DEBUG @@ -588,6 +643,7 @@ fenced_buffer_list_destroy(struct fenced_buffer_list *fenced_list) #endif pipe_mutex_unlock(fenced_list->mutex); + pipe_mutex_destroy(fenced_list->mutex); fenced_list->ops->destroy(fenced_list->ops); diff --git a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt index eb492076b7..080fd4c731 100644 --- a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt +++ b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt @@ -1129,3 +1129,35 @@ TGSI Instruction Specification target Label of target instruction. + +3 Other tokens +=============== + + +3.1 Declaration Semantic +------------------------- + + + Follows Declaration token if Semantic bit is set. + + Since its purpose is to link a shader with other stages of the pipeline, + it is valid to follow only those Declaration tokens that declare a register + either in INPUT or OUTPUT file. + + SemanticName field contains the semantic name of the register being declared. + There is no default value. + + SemanticIndex is an optional subscript that can be used to distinguish + different register declarations with the same semantic name. The default value + is 0. + + The meanings of the individual semantic names are explained in the following + sections. + + +3.1.1 FACE + + Valid only in a fragment shader INPUT declaration. + + FACE.x is negative when the primitive is back facing. FACE.x is positive + when the primitive is front facing. diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c index 4fa10e2f7e..9791e58db3 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.c +++ b/src/gallium/auxiliary/tgsi/tgsi_build.c @@ -419,6 +419,7 @@ tgsi_default_instruction( void ) instruction.NrTokens = 1; instruction.Opcode = TGSI_OPCODE_MOV; instruction.Saturate = TGSI_SAT_NONE; + instruction.Predicate = 0; instruction.NumDstRegs = 1; instruction.NumSrcRegs = 1; instruction.Padding = 0; @@ -428,12 +429,12 @@ tgsi_default_instruction( void ) } struct tgsi_instruction -tgsi_build_instruction( - unsigned opcode, - unsigned saturate, - unsigned num_dst_regs, - unsigned num_src_regs, - struct tgsi_header *header ) +tgsi_build_instruction(unsigned opcode, + unsigned saturate, + unsigned predicate, + unsigned num_dst_regs, + unsigned num_src_regs, + struct tgsi_header *header) { struct tgsi_instruction instruction; @@ -445,6 +446,7 @@ tgsi_build_instruction( instruction = tgsi_default_instruction(); instruction.Opcode = opcode; instruction.Saturate = saturate; + instruction.Predicate = predicate; instruction.NumDstRegs = num_dst_regs; instruction.NumSrcRegs = num_src_regs; @@ -472,9 +474,9 @@ tgsi_default_full_instruction( void ) unsigned i; full_instruction.Instruction = tgsi_default_instruction(); + full_instruction.InstructionPredicate = tgsi_default_instruction_predicate(); full_instruction.InstructionExtLabel = tgsi_default_instruction_ext_label(); full_instruction.InstructionExtTexture = tgsi_default_instruction_ext_texture(); - full_instruction.InstructionExtPredicate = tgsi_default_instruction_ext_predicate(); for( i = 0; i < TGSI_FULL_MAX_DST_REGISTERS; i++ ) { full_instruction.FullDstRegisters[i] = tgsi_default_full_dst_register(); } @@ -504,14 +506,34 @@ tgsi_build_full_instruction( instruction = (struct tgsi_instruction *) &tokens[size]; size++; - *instruction = tgsi_build_instruction( - full_inst->Instruction.Opcode, - full_inst->Instruction.Saturate, - full_inst->Instruction.NumDstRegs, - full_inst->Instruction.NumSrcRegs, - header ); + *instruction = tgsi_build_instruction(full_inst->Instruction.Opcode, + full_inst->Instruction.Saturate, + full_inst->Instruction.Predicate, + full_inst->Instruction.NumDstRegs, + full_inst->Instruction.NumSrcRegs, + header); prev_token = (struct tgsi_token *) instruction; + if (full_inst->Instruction.Predicate) { + struct tgsi_instruction_predicate *instruction_predicate; + + if (maxsize <= size) { + return 0; + } + instruction_predicate = (struct tgsi_instruction_predicate *)&tokens[size]; + size++; + + *instruction_predicate = + tgsi_build_instruction_predicate(full_inst->InstructionPredicate.Index, + full_inst->InstructionPredicate.Negate, + full_inst->InstructionPredicate.SwizzleX, + full_inst->InstructionPredicate.SwizzleY, + full_inst->InstructionPredicate.SwizzleZ, + full_inst->InstructionPredicate.SwizzleW, + instruction, + header); + } + if( tgsi_compare_instruction_ext_label( full_inst->InstructionExtLabel, tgsi_default_instruction_ext_label() ) ) { @@ -550,29 +572,6 @@ tgsi_build_full_instruction( prev_token = (struct tgsi_token *) instruction_ext_texture; } - if (tgsi_compare_instruction_ext_predicate(full_inst->InstructionExtPredicate, - tgsi_default_instruction_ext_predicate())) { - struct tgsi_instruction_ext_predicate *instruction_ext_predicate; - - if (maxsize <= size) { - return 0; - } - instruction_ext_predicate = (struct tgsi_instruction_ext_predicate *)&tokens[size]; - size++; - - *instruction_ext_predicate = - tgsi_build_instruction_ext_predicate(full_inst->InstructionExtPredicate.SrcIndex, - full_inst->InstructionExtPredicate.Negate, - full_inst->InstructionExtPredicate.SwizzleX, - full_inst->InstructionExtPredicate.SwizzleY, - full_inst->InstructionExtPredicate.SwizzleZ, - full_inst->InstructionExtPredicate.SwizzleW, - prev_token, - instruction, - header); - prev_token = (struct tgsi_token *)instruction_ext_predicate; - } - for( i = 0; i < full_inst->Instruction.NumDstRegs; i++ ) { const struct tgsi_full_dst_register *reg = &full_inst->FullDstRegisters[i]; struct tgsi_dst_register *dst_register; @@ -746,6 +745,47 @@ tgsi_build_full_instruction( return size; } +struct tgsi_instruction_predicate +tgsi_default_instruction_predicate(void) +{ + struct tgsi_instruction_predicate instruction_predicate; + + instruction_predicate.SwizzleX = TGSI_SWIZZLE_X; + instruction_predicate.SwizzleY = TGSI_SWIZZLE_Y; + instruction_predicate.SwizzleZ = TGSI_SWIZZLE_Z; + instruction_predicate.SwizzleW = TGSI_SWIZZLE_W; + instruction_predicate.Negate = 0; + instruction_predicate.Index = 0; + instruction_predicate.Padding = 0; + + return instruction_predicate; +} + +struct tgsi_instruction_predicate +tgsi_build_instruction_predicate(int index, + unsigned negate, + unsigned swizzleX, + unsigned swizzleY, + unsigned swizzleZ, + unsigned swizzleW, + struct tgsi_instruction *instruction, + struct tgsi_header *header) +{ + struct tgsi_instruction_predicate instruction_predicate; + + instruction_predicate = tgsi_default_instruction_predicate(); + instruction_predicate.SwizzleX = swizzleX; + instruction_predicate.SwizzleY = swizzleY; + instruction_predicate.SwizzleZ = swizzleZ; + instruction_predicate.SwizzleW = swizzleW; + instruction_predicate.Negate = negate; + instruction_predicate.Index = index; + + instruction_grow(instruction, header); + + return instruction_predicate; +} + /** test for inequality of 32-bit values pointed to by a and b */ static INLINE boolean compare32(const void *a, const void *b) @@ -835,60 +875,6 @@ tgsi_build_instruction_ext_texture( return instruction_ext_texture; } -struct tgsi_instruction_ext_predicate -tgsi_default_instruction_ext_predicate(void) -{ - struct tgsi_instruction_ext_predicate instruction_ext_predicate; - - instruction_ext_predicate.Type = TGSI_INSTRUCTION_EXT_TYPE_PREDICATE; - instruction_ext_predicate.SwizzleX = TGSI_SWIZZLE_X; - instruction_ext_predicate.SwizzleY = TGSI_SWIZZLE_Y; - instruction_ext_predicate.SwizzleZ = TGSI_SWIZZLE_Z; - instruction_ext_predicate.SwizzleW = TGSI_SWIZZLE_W; - instruction_ext_predicate.Negate = 0; - instruction_ext_predicate.SrcIndex = 0; - instruction_ext_predicate.Padding = 0; - instruction_ext_predicate.Extended = 0; - - return instruction_ext_predicate; -} - -unsigned -tgsi_compare_instruction_ext_predicate(struct tgsi_instruction_ext_predicate a, - struct tgsi_instruction_ext_predicate b) -{ - a.Padding = b.Padding = 0; - a.Extended = b.Extended = 0; - return compare32(&a, &b); -} - -struct tgsi_instruction_ext_predicate -tgsi_build_instruction_ext_predicate(unsigned index, - unsigned negate, - unsigned swizzleX, - unsigned swizzleY, - unsigned swizzleZ, - unsigned swizzleW, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header) -{ - struct tgsi_instruction_ext_predicate instruction_ext_predicate; - - instruction_ext_predicate = tgsi_default_instruction_ext_predicate(); - instruction_ext_predicate.SwizzleX = swizzleX; - instruction_ext_predicate.SwizzleY = swizzleY; - instruction_ext_predicate.SwizzleZ = swizzleZ; - instruction_ext_predicate.SwizzleW = swizzleW; - instruction_ext_predicate.Negate = negate; - instruction_ext_predicate.SrcIndex = index; - - prev_token->Extended = 1; - instruction_grow(instruction, header); - - return instruction_ext_predicate; -} - struct tgsi_src_register tgsi_default_src_register( void ) { diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.h b/src/gallium/auxiliary/tgsi/tgsi_build.h index 669712eb8f..0fe5f229d3 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.h +++ b/src/gallium/auxiliary/tgsi/tgsi_build.h @@ -143,6 +143,7 @@ struct tgsi_instruction tgsi_build_instruction( unsigned opcode, unsigned saturate, + unsigned predicate, unsigned num_dst_regs, unsigned num_src_regs, struct tgsi_header *header ); @@ -157,6 +158,19 @@ tgsi_build_full_instruction( struct tgsi_header *header, unsigned maxsize ); +struct tgsi_instruction_predicate +tgsi_default_instruction_predicate(void); + +struct tgsi_instruction_predicate +tgsi_build_instruction_predicate(int index, + unsigned negate, + unsigned swizzleX, + unsigned swizzleY, + unsigned swizzleZ, + unsigned swizzleW, + struct tgsi_instruction *instruction, + struct tgsi_header *header); + struct tgsi_instruction_ext_label tgsi_default_instruction_ext_label( void ); @@ -187,24 +201,6 @@ tgsi_build_instruction_ext_texture( struct tgsi_instruction *instruction, struct tgsi_header *header ); -struct tgsi_instruction_ext_predicate -tgsi_default_instruction_ext_predicate(void); - -unsigned -tgsi_compare_instruction_ext_predicate(struct tgsi_instruction_ext_predicate a, - struct tgsi_instruction_ext_predicate b); - -struct tgsi_instruction_ext_predicate -tgsi_build_instruction_ext_predicate(unsigned index, - unsigned negate, - unsigned swizzleX, - unsigned swizzleY, - unsigned swizzleZ, - unsigned swizzleW, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header); - struct tgsi_src_register tgsi_default_src_register( void ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index ade5e7cbbd..2bf367ba50 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -62,9 +62,6 @@ #define FAST_MATH 1 -/** for tgsi_full_instruction::Flags */ -#define SOA_DEPENDENCY_FLAG 0x1 - #define TILE_TOP_LEFT 0 #define TILE_TOP_RIGHT 1 #define TILE_BOTTOM_LEFT 2 @@ -340,20 +337,6 @@ tgsi_exec_machine_bind_shader( maxInstructions += 10; } - if (tgsi_check_soa_dependencies(&parse.FullToken.FullInstruction)) { - uint opcode = parse.FullToken.FullInstruction.Instruction.Opcode; - parse.FullToken.FullInstruction.Flags = SOA_DEPENDENCY_FLAG; - /* XXX we only handle SOA dependencies properly for MOV/SWZ - * at this time! - */ - if (opcode != TGSI_OPCODE_MOV) { - debug_printf("Warning: SOA dependency in instruction" - " is not handled:\n"); - tgsi_dump_instruction(&parse.FullToken.FullInstruction, - numInstructions); - } - } - memcpy(instructions + numInstructions, &parse.FullToken.FullInstruction, sizeof(instructions[0])); @@ -395,6 +378,7 @@ tgsi_exec_machine_create( void ) mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR]; mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES; + mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0]; /* Setup constants. */ for( i = 0; i < 4; i++ ) { @@ -526,7 +510,7 @@ micro_ddy( dst->f[0] = dst->f[1] = dst->f[2] = - dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT]; + dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT]; } static void @@ -604,6 +588,24 @@ micro_exp2( dst->f[2] = util_fast_exp2( src->f[2] ); dst->f[3] = util_fast_exp2( src->f[3] ); #else + +#if DEBUG + /* Inf is okay for this instruction, so clamp it to silence assertions. */ + uint i; + union tgsi_exec_channel clamped; + + for (i = 0; i < 4; i++) { + if (src->f[i] > 127.99999f) { + clamped.f[i] = 127.99999f; + } else if (src->f[i] < -126.99999f) { + clamped.f[i] = -126.99999f; + } else { + clamped.f[i] = src->f[i]; + } + } + src = &clamped; +#endif + dst->f[0] = powf( 2.0f, src->f[0] ); dst->f[1] = powf( 2.0f, src->f[1] ); dst->f[2] = powf( 2.0f, src->f[2] ); @@ -1202,10 +1204,10 @@ fetch_src_file_channel( assert(index->i[1] < TGSI_EXEC_NUM_PREDS); assert(index->i[2] < TGSI_EXEC_NUM_PREDS); assert(index->i[3] < TGSI_EXEC_NUM_PREDS); - chan->u[0] = mach->Addrs[0].xyzw[swizzle].u[0]; - chan->u[1] = mach->Addrs[0].xyzw[swizzle].u[1]; - chan->u[2] = mach->Addrs[0].xyzw[swizzle].u[2]; - chan->u[3] = mach->Addrs[0].xyzw[swizzle].u[3]; + chan->u[0] = mach->Predicates[0].xyzw[swizzle].u[0]; + chan->u[1] = mach->Predicates[0].xyzw[swizzle].u[1]; + chan->u[2] = mach->Predicates[0].xyzw[swizzle].u[2]; + chan->u[3] = mach->Predicates[0].xyzw[swizzle].u[3]; break; case TGSI_FILE_OUTPUT: @@ -1496,10 +1498,17 @@ store_dest( dst = &mach->Addrs[index].xyzw[chan_index]; break; + case TGSI_FILE_LOOP: + assert(reg->DstRegister.Index == 0); + assert(mach->LoopCounterStackTop > 0); + assert(chan_index == CHAN_X); + dst = &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[chan_index]; + break; + case TGSI_FILE_PREDICATE: index = reg->DstRegister.Index; assert(index < TGSI_EXEC_NUM_PREDS); - dst = &mach->Addrs[index].xyzw[chan_index]; + dst = &mach->Predicates[index].xyzw[chan_index]; break; default: @@ -1507,6 +1516,47 @@ store_dest( return; } + if (inst->Instruction.Predicate) { + uint swizzle; + union tgsi_exec_channel *pred; + + switch (chan_index) { + case CHAN_X: + swizzle = inst->InstructionPredicate.SwizzleX; + break; + case CHAN_Y: + swizzle = inst->InstructionPredicate.SwizzleY; + break; + case CHAN_Z: + swizzle = inst->InstructionPredicate.SwizzleZ; + break; + case CHAN_W: + swizzle = inst->InstructionPredicate.SwizzleW; + break; + default: + assert(0); + return; + } + + assert(inst->InstructionPredicate.Index == 0); + + pred = &mach->Predicates[inst->InstructionPredicate.Index].xyzw[swizzle]; + + if (inst->InstructionPredicate.Negate) { + for (i = 0; i < QUAD_SIZE; i++) { + if (pred->u[i]) { + execmask &= ~(1 << i); + } + } + } else { + for (i = 0; i < QUAD_SIZE; i++) { + if (!pred->u[i]) { + execmask &= ~(1 << i); + } + } + } + } + switch (inst->Instruction.Saturate) { case TGSI_SAT_NONE: for (i = 0; i < QUAD_SIZE; i++) @@ -1762,6 +1812,64 @@ exec_tex(struct tgsi_exec_machine *mach, } } +static void +exec_txd(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + const uint unit = inst->FullSrcRegisters[3].SrcRegister.Index; + union tgsi_exec_channel r[4]; + uint chan_index; + + /* + * XXX: This is fake TXD -- the derivatives are not taken into account, yet. + */ + + switch (inst->InstructionExtTexture.Texture) { + case TGSI_TEXTURE_1D: + case TGSI_TEXTURE_SHADOW1D: + + FETCH(&r[0], 0, CHAN_X); + + fetch_texel(mach->Samplers[unit], + &r[0], &ZeroVec, &ZeroVec, 0.0f, /* S, T, P, BIAS */ + &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ + break; + + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_RECT: + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_SHADOWRECT: + + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 0, CHAN_Z); + + fetch_texel(mach->Samplers[unit], + &r[0], &r[1], &r[2], 0.0f, /* inputs */ + &r[0], &r[1], &r[2], &r[3]); /* outputs */ + break; + + case TGSI_TEXTURE_3D: + case TGSI_TEXTURE_CUBE: + + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 0, CHAN_Z); + + fetch_texel(mach->Samplers[unit], + &r[0], &r[1], &r[2], 0.0f, + &r[0], &r[1], &r[2], &r[3]); + break; + + default: + assert(0); + } + + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&r[chan_index], 0, chan_index); + } +} + /** * Evaluate a constant-valued coefficient at the position of the @@ -1831,53 +1939,58 @@ typedef void (* eval_coef_func)( unsigned chan ); static void -exec_declaration( - struct tgsi_exec_machine *mach, - const struct tgsi_full_declaration *decl ) +exec_declaration(struct tgsi_exec_machine *mach, + const struct tgsi_full_declaration *decl) { - if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { - if( decl->Declaration.File == TGSI_FILE_INPUT ) { - unsigned first, last, mask; - eval_coef_func eval; + if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { + if (decl->Declaration.File == TGSI_FILE_INPUT) { + uint first, last, mask; first = decl->DeclarationRange.First; last = decl->DeclarationRange.Last; mask = decl->Declaration.UsageMask; - switch( decl->Declaration.Interpolate ) { - case TGSI_INTERPOLATE_CONSTANT: - eval = eval_constant_coef; - break; + if (decl->Semantic.SemanticName == TGSI_SEMANTIC_POSITION) { + assert(decl->Semantic.SemanticIndex == 0); + assert(first == last); + assert(mask = TGSI_WRITEMASK_XYZW); - case TGSI_INTERPOLATE_LINEAR: - eval = eval_linear_coef; - break; + mach->Inputs[first] = mach->QuadPos; + } else if (decl->Semantic.SemanticName == TGSI_SEMANTIC_FACE) { + uint i; - case TGSI_INTERPOLATE_PERSPECTIVE: - eval = eval_perspective_coef; - break; + assert(decl->Semantic.SemanticIndex == 0); + assert(first == last); - default: - assert( 0 ); - return; - } - - if( mask == TGSI_WRITEMASK_XYZW ) { - unsigned i, j; - - for( i = first; i <= last; i++ ) { - for( j = 0; j < NUM_CHANNELS; j++ ) { - eval( mach, i, j ); - } + for (i = 0; i < QUAD_SIZE; i++) { + mach->Inputs[first].xyzw[0].f[i] = mach->Face; + } + } else { + eval_coef_func eval; + uint i, j; + + switch (decl->Declaration.Interpolate) { + case TGSI_INTERPOLATE_CONSTANT: + eval = eval_constant_coef; + break; + + case TGSI_INTERPOLATE_LINEAR: + eval = eval_linear_coef; + break; + + case TGSI_INTERPOLATE_PERSPECTIVE: + eval = eval_perspective_coef; + break; + + default: + assert(0); + return; } - } - else { - unsigned i, j; - for( j = 0; j < NUM_CHANNELS; j++ ) { - if( mask & (1 << j) ) { - for( i = first; i <= last; i++ ) { - eval( mach, i, j ); + for (j = 0; j < NUM_CHANNELS; j++) { + if (mask & (1 << j)) { + for (i = first; i <= last; i++) { + eval(mach, i, j); } } } @@ -1894,6 +2007,7 @@ exec_instruction( { uint chan_index; union tgsi_exec_channel r[10]; + union tgsi_exec_channel d[8]; (*pc)++; @@ -1902,42 +2016,27 @@ exec_instruction( case TGSI_OPCODE_FLR: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); - micro_flr( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); + micro_flr(&d[chan_index], &r[0]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; case TGSI_OPCODE_MOV: - if (inst->Flags & SOA_DEPENDENCY_FLAG) { - /* Do all fetches into temp regs, then do all stores to avoid - * intermediate/accidental clobbering. This could be done all the - * time for MOV but for other instructions we'll need more temps... - */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[chan_index], 0, chan_index ); - } - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[chan_index], 0, chan_index ); - } + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&d[chan_index], 0, chan_index); } - else { - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - STORE( &r[0], 0, chan_index ); - } + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE(&d[chan_index], 0, chan_index); } break; case TGSI_OPCODE_LIT: - if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); - } - if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { FETCH( &r[0], 0, CHAN_X ); if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { - micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); - STORE( &r[0], 0, CHAN_Y ); + micro_max(&d[CHAN_Y], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); } if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { @@ -1948,11 +2047,19 @@ exec_instruction( micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] ); micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] ); micro_pow( &r[1], &r[1], &r[2] ); - micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); - STORE( &r[0], 0, CHAN_Z ); + micro_lt(&d[CHAN_Z], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); } - } + if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { + STORE(&d[CHAN_Y], 0, CHAN_Y); + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { + STORE(&d[CHAN_Z], 0, CHAN_Z); + } + } + if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); + } if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); } @@ -2020,14 +2127,13 @@ exec_instruction( break; case TGSI_OPCODE_MUL: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) - { + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { FETCH(&r[0], 0, chan_index); FETCH(&r[1], 1, chan_index); - - micro_mul( &r[0], &r[0], &r[1] ); - - STORE(&r[0], 0, chan_index); + micro_mul(&d[chan_index], &r[0], &r[1]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2035,8 +2141,10 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - micro_add( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, chan_index ); + micro_add(&d[chan_index], &r[0], &r[1]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2092,25 +2200,29 @@ exec_instruction( break; case TGSI_OPCODE_DST: - if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); - } - if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { FETCH( &r[0], 0, CHAN_Y ); FETCH( &r[1], 1, CHAN_Y); - micro_mul( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, CHAN_Y ); + micro_mul(&d[CHAN_Y], &r[0], &r[1]); } - if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { - FETCH( &r[0], 0, CHAN_Z ); - STORE( &r[0], 0, CHAN_Z ); + FETCH(&d[CHAN_Z], 0, CHAN_Z); } - if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { - FETCH( &r[0], 1, CHAN_W ); - STORE( &r[0], 0, CHAN_W ); + FETCH(&d[CHAN_W], 1, CHAN_W); + } + + if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { + STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X); + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { + STORE(&d[CHAN_Y], 0, CHAN_Y); + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { + STORE(&d[CHAN_Z], 0, CHAN_Z); + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { + STORE(&d[CHAN_W], 0, CHAN_W); } break; @@ -2120,9 +2232,10 @@ exec_instruction( FETCH(&r[1], 1, chan_index); /* XXX use micro_min()?? */ - micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] ); - - STORE(&r[0], 0, chan_index); + micro_lt(&d[chan_index], &r[0], &r[1], &r[0], &r[1]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2132,9 +2245,10 @@ exec_instruction( FETCH(&r[1], 1, chan_index); /* XXX use micro_max()?? */ - micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] ); - - STORE(&r[0], 0, chan_index ); + micro_lt(&d[chan_index], &r[0], &r[1], &r[1], &r[0] ); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2143,8 +2257,10 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); - STORE( &r[0], 0, chan_index ); + micro_lt(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2153,9 +2269,10 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - - micro_le( &r[0], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); - STORE( &r[0], 0, chan_index ); + micro_le(&d[chan_index], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2166,8 +2283,10 @@ exec_instruction( FETCH( &r[1], 1, chan_index ); micro_mul( &r[0], &r[0], &r[1] ); FETCH( &r[1], 2, chan_index ); - micro_add( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, chan_index ); + micro_add(&d[chan_index], &r[0], &r[1]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2175,10 +2294,10 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH(&r[0], 0, chan_index); FETCH(&r[1], 1, chan_index); - - micro_sub( &r[0], &r[0], &r[1] ); - - STORE(&r[0], 0, chan_index); + micro_sub(&d[chan_index], &r[0], &r[1]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2187,12 +2306,12 @@ exec_instruction( FETCH(&r[0], 0, chan_index); FETCH(&r[1], 1, chan_index); FETCH(&r[2], 2, chan_index); - micro_sub( &r[1], &r[1], &r[2] ); micro_mul( &r[0], &r[0], &r[1] ); - micro_add( &r[0], &r[0], &r[2] ); - - STORE(&r[0], 0, chan_index); + micro_add(&d[chan_index], &r[0], &r[2]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2201,8 +2320,10 @@ exec_instruction( FETCH(&r[0], 0, chan_index); FETCH(&r[1], 1, chan_index); FETCH(&r[2], 2, chan_index); - micro_lt(&r[0], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]); - STORE(&r[0], 0, chan_index); + micro_lt(&d[chan_index], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2227,8 +2348,10 @@ exec_instruction( case TGSI_OPCODE_FRC: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); - micro_frc( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); + micro_frc(&d[chan_index], &r[0]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2238,8 +2361,10 @@ exec_instruction( FETCH(&r[1], 1, chan_index); micro_max(&r[0], &r[0], &r[1]); FETCH(&r[1], 2, chan_index); - micro_min(&r[0], &r[0], &r[1]); - STORE(&r[0], 0, chan_index); + micro_min(&d[chan_index], &r[0], &r[1]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2247,19 +2372,17 @@ exec_instruction( case TGSI_OPCODE_ARR: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); - micro_rnd( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); + micro_rnd(&d[chan_index], &r[0]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; case TGSI_OPCODE_EX2: FETCH(&r[0], 0, CHAN_X); -#if FAST_MATH micro_exp2( &r[0], &r[0] ); -#else - micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] ); -#endif FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { STORE( &r[0], 0, chan_index ); @@ -2295,11 +2418,7 @@ exec_instruction( FETCH(&r[4], 1, CHAN_Y); micro_mul( &r[5], &r[3], &r[4] ); - micro_sub( &r[2], &r[2], &r[5] ); - - if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { - STORE( &r[2], 0, CHAN_X ); - } + micro_sub(&d[CHAN_X], &r[2], &r[5]); FETCH(&r[2], 1, CHAN_X); @@ -2308,20 +2427,21 @@ exec_instruction( FETCH(&r[5], 0, CHAN_X); micro_mul( &r[1], &r[1], &r[5] ); - micro_sub( &r[3], &r[3], &r[1] ); - - if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { - STORE( &r[3], 0, CHAN_Y ); - } + micro_sub(&d[CHAN_Y], &r[3], &r[1]); micro_mul( &r[5], &r[5], &r[4] ); micro_mul( &r[0], &r[0], &r[2] ); - micro_sub( &r[5], &r[5], &r[0] ); + micro_sub(&d[CHAN_Z], &r[5], &r[0]); - if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { - STORE( &r[5], 0, CHAN_Z ); + if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { + STORE(&d[CHAN_X], 0, CHAN_X); + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { + STORE(&d[CHAN_Y], 0, CHAN_Y); + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { + STORE(&d[CHAN_Z], 0, CHAN_Z); } - if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); } @@ -2330,11 +2450,11 @@ exec_instruction( case TGSI_OPCODE_ABS: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH(&r[0], 0, chan_index); - - micro_abs( &r[0], &r[0] ); - - STORE(&r[0], 0, chan_index); + micro_abs(&d[chan_index], &r[0]); } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); + } break; case TGSI_OPCODE_RCC: @@ -2386,16 +2506,20 @@ exec_instruction( case TGSI_OPCODE_DDX: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); - micro_ddx( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); + micro_ddx(&d[chan_index], &r[0]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; case TGSI_OPCODE_DDY: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); - micro_ddy( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); + micro_ddy(&d[chan_index], &r[0]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2476,10 +2600,10 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - micro_eq( &r[0], &r[0], &r[1], - &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], - &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); - STORE( &r[0], 0, chan_index ); + micro_eq(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2493,8 +2617,10 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); - STORE( &r[0], 0, chan_index ); + micro_le(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2510,8 +2636,10 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); - STORE( &r[0], 0, chan_index ); + micro_le(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2519,8 +2647,10 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); - STORE( &r[0], 0, chan_index ); + micro_eq(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2550,7 +2680,7 @@ exec_instruction( /* src[1] = d[strq]/dx */ /* src[2] = d[strq]/dy */ /* src[3] = sampler unit */ - assert (0); + exec_txd(mach, inst); break; case TGSI_OPCODE_TXL: @@ -2594,13 +2724,8 @@ exec_instruction( micro_mul(&r[3], &r[3], &r[1]); micro_add(&r[2], &r[2], &r[3]); FETCH(&r[3], 0, CHAN_X); - micro_add(&r[2], &r[2], &r[3]); - if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { - STORE(&r[2], 0, CHAN_X); - } - if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { - STORE(&r[2], 0, CHAN_Z); - } + micro_add(&d[CHAN_X], &r[2], &r[3]); + } if (IS_CHANNEL_ENABLED(*inst, CHAN_Y) || IS_CHANNEL_ENABLED(*inst, CHAN_W)) { @@ -2610,13 +2735,20 @@ exec_instruction( micro_mul(&r[3], &r[3], &r[1]); micro_add(&r[2], &r[2], &r[3]); FETCH(&r[3], 0, CHAN_Y); - micro_add(&r[2], &r[2], &r[3]); - if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { - STORE(&r[2], 0, CHAN_Y); - } - if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { - STORE(&r[2], 0, CHAN_W); - } + micro_add(&d[CHAN_Y], &r[2], &r[3]); + + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { + STORE(&d[CHAN_X], 0, CHAN_X); + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { + STORE(&d[CHAN_Y], 0, CHAN_Y); + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { + STORE(&d[CHAN_X], 0, CHAN_Z); + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { + STORE(&d[CHAN_Y], 0, CHAN_W); } break; @@ -2701,8 +2833,10 @@ exec_instruction( /* TGSI_OPCODE_SGN */ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); - micro_sgn( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); + micro_sgn(&d[chan_index], &r[0]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2711,10 +2845,10 @@ exec_instruction( FETCH(&r[0], 0, chan_index); FETCH(&r[1], 1, chan_index); FETCH(&r[2], 2, chan_index); - - micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] ); - - STORE(&r[0], 0, chan_index); + micro_lt(&d[chan_index], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2889,32 +3023,40 @@ exec_instruction( case TGSI_OPCODE_CEIL: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); - micro_ceil( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); + micro_ceil(&d[chan_index], &r[0]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; case TGSI_OPCODE_I2F: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); - micro_i2f( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); + micro_i2f(&d[chan_index], &r[0]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; case TGSI_OPCODE_NOT: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); - micro_not( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); + micro_not(&d[chan_index], &r[0]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; case TGSI_OPCODE_TRUNC: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); - micro_trunc( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); + micro_trunc(&d[chan_index], &r[0]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2922,8 +3064,10 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - micro_shl( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, chan_index ); + micro_shl(&d[chan_index], &r[0], &r[1]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2931,8 +3075,10 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - micro_ishr( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, chan_index ); + micro_ishr(&d[chan_index], &r[0], &r[1]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2940,8 +3086,10 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - micro_and( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, chan_index ); + micro_and(&d[chan_index], &r[0], &r[1]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2949,8 +3097,10 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - micro_or( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, chan_index ); + micro_or(&d[chan_index], &r[0], &r[1]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2962,8 +3112,10 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - micro_xor( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, chan_index ); + micro_xor(&d[chan_index], &r[0], &r[1]); + } + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&d[chan_index], 0, chan_index); } break; @@ -2992,8 +3144,23 @@ exec_instruction( for (chan_index = 0; chan_index < 3; chan_index++) { FETCH( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[chan_index], 0, chan_index ); } - STORE( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_Y], 0, CHAN_X ); ++mach->LoopCounterStackTop; + STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 0, CHAN_X); + /* update LoopMask */ + if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) { + mach->LoopMask &= ~0x1; + } + if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) { + mach->LoopMask &= ~0x2; + } + if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) { + mach->LoopMask &= ~0x4; + } + if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) { + mach->LoopMask &= ~0x8; + } + /* TODO: if mach->LoopMask == 0, jump to end of loop */ + UPDATE_EXEC_MASK(mach); /* fall-through (for now) */ case TGSI_OPCODE_BGNLOOP: /* push LoopMask and ContMasks */ @@ -3007,28 +3174,28 @@ exec_instruction( case TGSI_OPCODE_ENDFOR: assert(mach->LoopCounterStackTop > 0); - micro_sub( &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], - &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], - &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); + micro_sub(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], + &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], + &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]); /* update LoopMask */ - if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[0] <= 0) { + if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) { mach->LoopMask &= ~0x1; } - if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[1] <= 0 ) { + if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) { mach->LoopMask &= ~0x2; } - if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[2] <= 0 ) { + if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) { mach->LoopMask &= ~0x4; } - if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[3] <= 0 ) { + if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) { mach->LoopMask &= ~0x8; } - micro_add( &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], - &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], - &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]); + micro_add(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], + &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], + &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]); assert(mach->LoopLabelStackTop > 0); inst = mach->Instructions + mach->LoopLabelStack[mach->LoopLabelStackTop - 1]; - STORE( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_Y], 0, CHAN_X ); + STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_X], 0, CHAN_X); /* Restore ContMask, but don't pop */ assert(mach->ContStackTop > 0); mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; @@ -3095,7 +3262,28 @@ exec_instruction( break; case TGSI_OPCODE_ENDSUB: - /* no-op */ + /* + * XXX: This really should be a no-op. We should never reach this opcode. + */ + + assert(mach->CallStackTop > 0); + mach->CallStackTop--; + + mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop; + mach->CondMask = mach->CondStack[mach->CondStackTop]; + + mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop; + mach->LoopMask = mach->LoopStack[mach->LoopStackTop]; + + mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; + mach->ContMask = mach->ContStack[mach->ContStackTop]; + + assert(mach->FuncStackTop > 0); + mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; + + *pc = mach->CallStack[mach->CallStackTop].ReturnAddr; + + UPDATE_EXEC_MASK(mach); break; case TGSI_OPCODE_NOP: @@ -3106,6 +3294,8 @@ exec_instruction( } } +#define DEBUG_EXECUTION 0 + /** * Run TGSI interpreter. @@ -3149,10 +3339,67 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) exec_declaration( mach, mach->Declarations+i ); } - /* execute instructions, until pc is set to -1 */ - while (pc != -1) { - assert(pc < (int) mach->NumInstructions); - exec_instruction( mach, mach->Instructions + pc, &pc ); + { +#if DEBUG_EXECUTION + struct tgsi_exec_vector temps[TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS]; + struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS]; + uint inst = 1; + + memcpy(temps, mach->Temps, sizeof(temps)); + memcpy(outputs, mach->Outputs, sizeof(outputs)); +#endif + + /* execute instructions, until pc is set to -1 */ + while (pc != -1) { + +#if DEBUG_EXECUTION + uint i; + + tgsi_dump_instruction(&mach->Instructions[pc], inst++); +#endif + + assert(pc < (int) mach->NumInstructions); + exec_instruction(mach, mach->Instructions + pc, &pc); + +#if DEBUG_EXECUTION + for (i = 0; i < TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS; i++) { + if (memcmp(&temps[i], &mach->Temps[i], sizeof(temps[i]))) { + uint j; + + memcpy(&temps[i], &mach->Temps[i], sizeof(temps[i])); + debug_printf("TEMP[%2u] = ", i); + for (j = 0; j < 4; j++) { + if (j > 0) { + debug_printf(" "); + } + debug_printf("(%6f, %6f, %6f, %6f)\n", + temps[i].xyzw[0].f[j], + temps[i].xyzw[1].f[j], + temps[i].xyzw[2].f[j], + temps[i].xyzw[3].f[j]); + } + } + } + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { + if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) { + uint j; + + memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i])); + debug_printf("OUT[%2u] = ", i); + for (j = 0; j < 4; j++) { + if (j > 0) { + debug_printf(" "); + } + debug_printf("{%6f, %6f, %6f, %6f}\n", + outputs[i].xyzw[0].f[j], + outputs[i].xyzw[1].f[j], + outputs[i].xyzw[2].f[j], + outputs[i].xyzw[3].f[j]); + } + } + } +#endif + } } #if 0 @@ -3166,5 +3413,10 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) } #endif + assert(mach->CondStackTop == 0); + assert(mach->LoopStackTop == 0); + assert(mach->ContStackTop == 0); + assert(mach->CallStackTop == 0); + return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index 71fd300cbb..afaf5c39c4 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -225,6 +225,7 @@ struct tgsi_exec_machine struct tgsi_exec_vector Outputs[TGSI_MAX_TOTAL_VERTICES]; struct tgsi_exec_vector *Addrs; + struct tgsi_exec_vector *Predicates; struct tgsi_sampler **Samplers; @@ -241,6 +242,7 @@ struct tgsi_exec_machine /* FRAGMENT processor only. */ const struct tgsi_interp_coef *InterpCoefs; struct tgsi_exec_vector QuadPos; + float Face; /**< +1 if front facing, -1 if back facing */ /* Conditional execution masks */ uint CondMask; /**< For IF/ELSE/ENDIF */ @@ -261,7 +263,7 @@ struct tgsi_exec_machine uint LoopLabelStack[TGSI_EXEC_MAX_LOOP_NESTING]; int LoopLabelStackTop; - /** Loop counter stack (x = count, y = current, z = step) */ + /** Loop counter stack (x = index, y = counter, z = step) */ struct tgsi_exec_vector LoopCounterStack[TGSI_EXEC_MAX_LOOP_NESTING]; int LoopCounterStackTop; diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c index 83f9df1183..9ca2993452 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.c +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c @@ -175,6 +175,10 @@ tgsi_parse_token( copy_token(&inst->Instruction, &token); extended = inst->Instruction.Extended; + if (inst->Instruction.Predicate) { + next_token(ctx, &inst->InstructionPredicate); + } + while( extended ) { struct tgsi_src_register_ext token; @@ -189,10 +193,6 @@ tgsi_parse_token( copy_token(&inst->InstructionExtTexture, &token); break; - case TGSI_INSTRUCTION_EXT_TYPE_PREDICATE: - copy_token(&inst->InstructionExtPredicate, &token); - break; - default: assert( 0 ); } diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.h b/src/gallium/auxiliary/tgsi/tgsi_parse.h index 76f1676d85..cb4772ade8 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.h +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.h @@ -80,9 +80,9 @@ struct tgsi_full_immediate struct tgsi_full_instruction { struct tgsi_instruction Instruction; + struct tgsi_instruction_predicate InstructionPredicate; struct tgsi_instruction_ext_label InstructionExtLabel; struct tgsi_instruction_ext_texture InstructionExtTexture; - struct tgsi_instruction_ext_predicate InstructionExtPredicate; struct tgsi_full_dst_register FullDstRegisters[TGSI_FULL_MAX_DST_REGISTERS]; struct tgsi_full_src_register FullSrcRegisters[TGSI_FULL_MAX_SRC_REGISTERS]; uint Flags; /**< user-defined usage */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c index 80e6ff2b79..557c885a8a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -47,9 +47,9 @@ union tgsi_any_token { struct tgsi_immediate imm; union tgsi_immediate_data imm_data; struct tgsi_instruction insn; + struct tgsi_instruction_predicate insn_predicate; struct tgsi_instruction_ext_label insn_ext_label; struct tgsi_instruction_ext_texture insn_ext_texture; - struct tgsi_instruction_ext_predicate insn_ext_predicate; struct tgsi_src_register src; struct tgsi_src_register_ext_mod src_ext_mod; struct tgsi_dimension dim; @@ -72,6 +72,7 @@ struct ureg_tokens { #define UREG_MAX_IMMEDIATE 32 #define UREG_MAX_TEMP 256 #define UREG_MAX_ADDR 2 +#define UREG_MAX_LOOP 1 #define UREG_MAX_PRED 1 #define DOMAIN_DECL 0 @@ -123,6 +124,7 @@ struct ureg_program unsigned nr_addrs; unsigned nr_preds; + unsigned nr_loops; unsigned nr_instructions; struct ureg_tokens domain[2]; @@ -383,6 +385,7 @@ struct ureg_src ureg_DECL_constant(struct ureg_program *ureg, i = ureg->nr_constant_ranges++; ureg->constant_range[i].first = index; ureg->constant_range[i].last = index; + goto out; } /* Collapse all ranges down to one: @@ -450,6 +453,19 @@ struct ureg_dst ureg_DECL_address( struct ureg_program *ureg ) return ureg_dst_register( TGSI_FILE_ADDRESS, 0 ); } +/* Allocate a new loop register. + */ +struct ureg_dst +ureg_DECL_loop(struct ureg_program *ureg) +{ + if (ureg->nr_loops < UREG_MAX_LOOP) { + return ureg_dst_register(TGSI_FILE_LOOP, ureg->nr_loops++); + } + + assert(0); + return ureg_dst_register(TGSI_FILE_LOOP, 0); +} + /* Allocate a new predicate register. */ struct ureg_dst @@ -694,35 +710,27 @@ ureg_emit_insn(struct ureg_program *ureg, validate( opcode, num_dst, num_src ); out = get_tokens( ureg, DOMAIN_INSN, count ); - out[0].value = 0; - out[0].insn.Type = TGSI_TOKEN_TYPE_INSTRUCTION; - out[0].insn.NrTokens = 0; + out[0].insn = tgsi_default_instruction(); out[0].insn.Opcode = opcode; out[0].insn.Saturate = saturate; out[0].insn.NumDstRegs = num_dst; out[0].insn.NumSrcRegs = num_src; - out[0].insn.Padding = 0; result.insn_token = ureg->domain[DOMAIN_INSN].count - count; + result.extended_token = result.insn_token; if (predicate) { - out[0].insn.Extended = 1; - out[1].insn_ext_predicate = tgsi_default_instruction_ext_predicate(); - out[1].insn_ext_predicate.Negate = pred_negate; - out[1].insn_ext_predicate.SwizzleX = pred_swizzle_x; - out[1].insn_ext_predicate.SwizzleY = pred_swizzle_y; - out[1].insn_ext_predicate.SwizzleZ = pred_swizzle_z; - out[1].insn_ext_predicate.SwizzleW = pred_swizzle_w; - - result.extended_token = result.insn_token + 1; - } else { - out[0].insn.Extended = 0; - - result.extended_token = result.insn_token; + out[0].insn.Predicate = 1; + out[1].insn_predicate = tgsi_default_instruction_predicate(); + out[1].insn_predicate.Negate = pred_negate; + out[1].insn_predicate.SwizzleX = pred_swizzle_x; + out[1].insn_predicate.SwizzleY = pred_swizzle_y; + out[1].insn_predicate.SwizzleZ = pred_swizzle_z; + out[1].insn_predicate.SwizzleW = pred_swizzle_w; } ureg->nr_instructions++; - + return result; } @@ -1065,6 +1073,13 @@ static void emit_decls( struct ureg_program *ureg ) 0, ureg->nr_addrs ); } + if (ureg->nr_loops) { + emit_decl_range(ureg, + TGSI_FILE_LOOP, + 0, + ureg->nr_loops); + } + if (ureg->nr_preds) { emit_decl_range(ureg, TGSI_FILE_PREDICATE, diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h index e6fad1f0f4..b89f55d5a8 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h @@ -163,6 +163,9 @@ struct ureg_dst ureg_DECL_address( struct ureg_program * ); struct ureg_dst +ureg_DECL_loop( struct ureg_program * ); + +struct ureg_dst ureg_DECL_predicate(struct ureg_program *); /* Supply an index to the sampler declaration as this is the hook to diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index 5038642599..5372df5735 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -354,9 +354,9 @@ util_blit_pixels_writemask(struct blit_state *ctx, texTemp.target = PIPE_TEXTURE_2D; texTemp.format = src->format; texTemp.last_level = 0; - texTemp.width[0] = srcW; - texTemp.height[0] = srcH; - texTemp.depth[0] = 1; + texTemp.width0 = srcW; + texTemp.height0 = srcH; + texTemp.depth0 = 1; pf_get_block(src->format, &texTemp.block); tex = screen->texture_create(screen, &texTemp); @@ -389,10 +389,10 @@ util_blit_pixels_writemask(struct blit_state *ctx, } else { pipe_texture_reference(&tex, src->texture); - s0 = srcX0 / (float)tex->width[0]; - s1 = srcX1 / (float)tex->width[0]; - t0 = srcY0 / (float)tex->height[0]; - t1 = srcY1 / (float)tex->height[0]; + s0 = srcX0 / (float)tex->width0; + s1 = srcX1 / (float)tex->width0; + t0 = srcY0 / (float)tex->height0; + t1 = srcY1 / (float)tex->height0; } @@ -518,13 +518,13 @@ util_blit_pixels_tex(struct blit_state *ctx, assert(filter == PIPE_TEX_MIPFILTER_NEAREST || filter == PIPE_TEX_MIPFILTER_LINEAR); - assert(tex->width[0] != 0); - assert(tex->height[0] != 0); + assert(tex->width0 != 0); + assert(tex->height0 != 0); - s0 = srcX0 / (float)tex->width[0]; - s1 = srcX1 / (float)tex->width[0]; - t0 = srcY0 / (float)tex->height[0]; - t1 = srcY1 / (float)tex->height[0]; + s0 = srcX0 / (float)tex->width0; + s1 = srcX1 / (float)tex->width0; + t0 = srcY0 / (float)tex->height0; + t1 = srcY1 / (float)tex->height0; assert(ctx->pipe->screen->is_format_supported(ctx->pipe->screen, dst->format, PIPE_TEXTURE_2D, diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c index facbe69173..a08241971c 100644 --- a/src/gallium/auxiliary/util/u_cpu_detect.c +++ b/src/gallium/auxiliary/util/u_cpu_detect.c @@ -80,7 +80,7 @@ static int has_cpuid(void); #if defined(PIPE_ARCH_X86) /* The sigill handlers */ -#if defined(PIPE_OS_LINUX) //&& defined(_POSIX_SOURCE) && defined(X86_FXSR_MAGIC) +#if defined(PIPE_OS_LINUX) /*&& defined(_POSIX_SOURCE) && defined(X86_FXSR_MAGIC)*/ static void sigill_handler_sse(int signal, struct sigcontext sc) { @@ -240,7 +240,7 @@ check_os_katmai_support(void) __asm __volatile ("xorps %xmm0, %xmm0"); #elif defined(PIPE_CC_MSVC) __asm { - xorps xmm0, xmm0 // executing SSE instruction + xorps xmm0, xmm0 /* executing SSE instruction */ } #else #error Unsupported compiler @@ -283,7 +283,7 @@ check_os_katmai_support(void) * and therefore to be safe I'm going to leave this test in here. */ if (util_cpu_caps.has_sse) { - // test_os_katmai_exception_support(); + /* test_os_katmai_exception_support(); */ } /* Restore the original signal handlers. diff --git a/src/gallium/auxiliary/util/u_format.csv b/src/gallium/auxiliary/util/u_format.csv index f1bf94f17d..b9cc2aa716 100644 --- a/src/gallium/auxiliary/util/u_format.csv +++ b/src/gallium/auxiliary/util/u_format.csv @@ -97,3 +97,13 @@ PIPE_FORMAT_B8G8R8A8_SRGB , arith , 1, 1, u8 , u8 , u8 , u8 , zyxw, PIPE_FORMAT_B8G8R8X8_SRGB , arith , 1, 1, u8 , u8 , u8 , u8 , zyx1, srgb PIPE_FORMAT_X8UB8UG8SR8S_NORM , arith , 1, 1, sn8 , sn8 , un8 , x8 , 1zyx, rgb PIPE_FORMAT_B6UG5SR5S_NORM , arith , 1, 1, sn5 , sn5 , un6 , , xyz1, rgb +PIPE_FORMAT_YCBCR , yuv , 2, 1, x32 , , , , xyz1, yuv +PIPE_FORMAT_YCBCR_REV , yuv , 2, 1, x32 , , , , xyz1, yuv +PIPE_FORMAT_DXT1_RGBA , dxt , 4, 4, x64 , , , , xyzw, rgb +PIPE_FORMAT_DXT1_RGB , dxt , 4, 4, x64 , , , , xyz1, rgb +PIPE_FORMAT_DXT3_RGBA , dxt , 4, 4, x128, , , , xyzw, rgb +PIPE_FORMAT_DXT5_RGBA , dxt , 4, 4, x128, , , , xyzw, rgb +PIPE_FORMAT_DXT1_SRGBA , dxt , 4, 4, x64 , , , , xyzw, srgb +PIPE_FORMAT_DXT1_SRGB , dxt , 4, 4, x64 , , , , xyz1, srgb +PIPE_FORMAT_DXT3_SRGBA , dxt , 4, 4, x128, , , , xyzw, srgb +PIPE_FORMAT_DXT5_SRGBA , dxt , 4, 4, x128, , , , xyzw, srgb diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index aa823aa218..f67f1e458d 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -45,6 +45,7 @@ #include "util/u_draw_quad.h" #include "util/u_gen_mipmap.h" #include "util/u_simple_shaders.h" +#include "util/u_math.h" #include "cso_cache/cso_context.h" @@ -1125,12 +1126,12 @@ make_1d_mipmap(struct gen_mipmap_state *ctx, srcTrans = screen->get_tex_transfer(screen, pt, face, srcLevel, zslice, PIPE_TRANSFER_READ, 0, 0, - pt->width[srcLevel], - pt->height[srcLevel]); + u_minify(pt->width0, srcLevel), + u_minify(pt->height0, srcLevel)); dstTrans = screen->get_tex_transfer(screen, pt, face, dstLevel, zslice, PIPE_TRANSFER_WRITE, 0, 0, - pt->width[dstLevel], - pt->height[dstLevel]); + u_minify(pt->width0, dstLevel), + u_minify(pt->height0, dstLevel)); srcMap = (ubyte *) screen->transfer_map(screen, srcTrans); dstMap = (ubyte *) screen->transfer_map(screen, dstTrans); @@ -1168,12 +1169,12 @@ make_2d_mipmap(struct gen_mipmap_state *ctx, srcTrans = screen->get_tex_transfer(screen, pt, face, srcLevel, zslice, PIPE_TRANSFER_READ, 0, 0, - pt->width[srcLevel], - pt->height[srcLevel]); + u_minify(pt->width0, srcLevel), + u_minify(pt->height0, srcLevel)); dstTrans = screen->get_tex_transfer(screen, pt, face, dstLevel, zslice, PIPE_TRANSFER_WRITE, 0, 0, - pt->width[dstLevel], - pt->height[dstLevel]); + u_minify(pt->width0, dstLevel), + u_minify(pt->height0, dstLevel)); srcMap = (ubyte *) screen->transfer_map(screen, srcTrans); dstMap = (ubyte *) screen->transfer_map(screen, dstTrans); @@ -1213,12 +1214,12 @@ make_3d_mipmap(struct gen_mipmap_state *ctx, srcTrans = screen->get_tex_transfer(screen, pt, face, srcLevel, zslice, PIPE_TRANSFER_READ, 0, 0, - pt->width[srcLevel], - pt->height[srcLevel]); + u_minify(pt->width0, srcLevel), + u_minify(pt->height0, srcLevel)); dstTrans = screen->get_tex_transfer(screen, pt, face, dstLevel, zslice, PIPE_TRANSFER_WRITE, 0, 0, - pt->width[dstLevel], - pt->height[dstLevel]); + u_minify(pt->width0, dstLevel), + u_minify(pt->height0, dstLevel)); srcMap = (ubyte *) screen->transfer_map(screen, srcTrans); dstMap = (ubyte *) screen->transfer_map(screen, dstTrans); @@ -1575,8 +1576,8 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, * Setup framebuffer / dest surface */ fb.cbufs[0] = surf; - fb.width = pt->width[dstLevel]; - fb.height = pt->height[dstLevel]; + fb.width = u_minify(pt->width0, dstLevel); + fb.height = u_minify(pt->height0, dstLevel); cso_set_framebuffer(ctx->cso, &fb); /* @@ -1597,8 +1598,8 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, offset = set_vertex_data(ctx, pt->target, face, - (float) pt->width[dstLevel], - (float) pt->height[dstLevel]); + (float) u_minify(pt->width0, dstLevel), + (float) u_minify(pt->height0, dstLevel)); util_draw_vertex_buffer(ctx->pipe, ctx->vbuf, diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index 75b075f160..7e75702701 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -491,6 +491,43 @@ util_next_power_of_two(unsigned x) /** + * Return number of bits set in n. + */ +static INLINE unsigned +util_bitcount(unsigned n) +{ +#if defined(PIPE_CC_GCC) + return __builtin_popcount(n); +#else + /* XXX there are more clever ways of doing this */ + unsigned bits = 0; + while (n) { + bits += (n & 1); + n = n >> 1; + } + return bits; +#endif +} + + +/** + * Reverse byte order of a 32 bit word. + */ +static INLINE uint32_t +util_bswap32(uint32_t n) +{ +#if defined(PIPE_CC_GCC) + return __builtin_bswap32(n); +#else + return (n >> 24) | + ((n >> 8) & 0x0000ff00) | + ((n << 8) & 0x00ff0000) | + (n << 24); +#endif +} + + +/** * Clamp X to [MIN, MAX]. * This is a macro to allow float, int, uint, etc. types. */ @@ -499,6 +536,9 @@ util_next_power_of_two(unsigned x) #define MIN2( A, B ) ( (A)<(B) ? (A) : (B) ) #define MAX2( A, B ) ( (A)>(B) ? (A) : (B) ) +#define MIN3( A, B, C ) MIN2( MIN2( A, B ), C ) +#define MAX3( A, B, C ) MAX2( MAX2( A, B ), C ) + static INLINE int align(int value, int alignment) @@ -507,9 +547,9 @@ align(int value, int alignment) } static INLINE unsigned -minify(unsigned value) +u_minify(unsigned value, unsigned levels) { - return MAX2(1, value >> 1); + return MAX2(1, value >> levels); } #ifndef COPY_4V diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h index eda883b3b9..9dacc6d83d 100644 --- a/src/gallium/auxiliary/util/u_pack_color.h +++ b/src/gallium/auxiliary/util/u_pack_color.h @@ -302,7 +302,10 @@ util_unpack_color_ub(enum pipe_format format, const void *src, static INLINE void util_pack_color(const float rgba[4], enum pipe_format format, void *dest) { - ubyte r, g, b, a; + ubyte r = 0; + ubyte g = 0; + ubyte b = 0; + ubyte a = 0; if (pf_size_x(format) <= 8) { /* format uses 8-bit components or less */ diff --git a/src/gallium/auxiliary/util/u_surface.c b/src/gallium/auxiliary/util/u_surface.c index 85e443204e..de8c266db8 100644 --- a/src/gallium/auxiliary/util/u_surface.c +++ b/src/gallium/auxiliary/util/u_surface.c @@ -79,9 +79,9 @@ util_create_rgba_surface(struct pipe_screen *screen, templ.target = target; templ.format = format; templ.last_level = 0; - templ.width[0] = width; - templ.height[0] = height; - templ.depth[0] = 1; + templ.width0 = width; + templ.height0 = height; + templ.depth0 = 1; pf_get_block(format, &templ.block); templ.tex_usage = usage; diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c index cda6dc134a..592dd17421 100644 --- a/src/gallium/auxiliary/vl/vl_compositor.c +++ b/src/gallium/auxiliary/vl/vl_compositor.c @@ -455,8 +455,8 @@ void vl_compositor_render(struct vl_compositor *compositor, assert(dst_area); assert(picture_type == PIPE_MPEG12_PICTURE_TYPE_FRAME); - compositor->fb_state.width = dst_surface->width[0]; - compositor->fb_state.height = dst_surface->height[0]; + compositor->fb_state.width = dst_surface->width0; + compositor->fb_state.height = dst_surface->height0; compositor->fb_state.cbufs[0] = compositor->pipe->screen->get_tex_surface ( compositor->pipe->screen, @@ -504,12 +504,12 @@ void vl_compositor_render(struct vl_compositor *compositor, vs_consts->dst_trans.z = 0; vs_consts->dst_trans.w = 0; - vs_consts->src_scale.x = src_area->w / (float)src_surface->width[0]; - vs_consts->src_scale.y = src_area->h / (float)src_surface->height[0]; + vs_consts->src_scale.x = src_area->w / (float)src_surface->width0; + vs_consts->src_scale.y = src_area->h / (float)src_surface->height0; vs_consts->src_scale.z = 1; vs_consts->src_scale.w = 1; - vs_consts->src_trans.x = src_area->x / (float)src_surface->width[0]; - vs_consts->src_trans.y = src_area->y / (float)src_surface->height[0]; + vs_consts->src_trans.x = src_area->x / (float)src_surface->width0; + vs_consts->src_trans.y = src_area->y / (float)src_surface->height0; vs_consts->src_trans.z = 0; vs_consts->src_trans.w = 0; diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c index c4ba69817f..1934965995 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c +++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c @@ -681,7 +681,7 @@ xfer_buffers_map(struct vl_mpeg12_mc_renderer *r) ( r->pipe->screen, r->textures.all[i], 0, 0, 0, PIPE_TRANSFER_WRITE, 0, 0, - r->textures.all[i]->width[0], r->textures.all[i]->height[0] + r->textures.all[i]->width0, r->textures.all[i]->height0 ); r->texels[i] = r->pipe->screen->transfer_map(r->pipe->screen, r->tex_transfer[i]); @@ -835,26 +835,26 @@ init_buffers(struct vl_mpeg12_mc_renderer *r) /* TODO: Accomodate HW that can't do this and also for cases when this isn't precise enough */ template.format = PIPE_FORMAT_R16_SNORM; template.last_level = 0; - template.width[0] = r->pot_buffers ? + template.width0 = r->pot_buffers ? util_next_power_of_two(r->picture_width) : r->picture_width; - template.height[0] = r->pot_buffers ? + template.height0 = r->pot_buffers ? util_next_power_of_two(r->picture_height) : r->picture_height; - template.depth[0] = 1; + template.depth0 = 1; pf_get_block(template.format, &template.block); template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_DYNAMIC; r->textures.individual.y = r->pipe->screen->texture_create(r->pipe->screen, &template); if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) { - template.width[0] = r->pot_buffers ? + template.width0 = r->pot_buffers ? util_next_power_of_two(r->picture_width / 2) : r->picture_width / 2; - template.height[0] = r->pot_buffers ? + template.height0 = r->pot_buffers ? util_next_power_of_two(r->picture_height / 2) : r->picture_height / 2; } else if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422) - template.height[0] = r->pot_buffers ? + template.height0 = r->pot_buffers ? util_next_power_of_two(r->picture_height / 2) : r->picture_height / 2; @@ -1283,8 +1283,8 @@ flush(struct vl_mpeg12_mc_renderer *r) PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD ); - vs_consts->denorm.x = r->surface->width[0]; - vs_consts->denorm.y = r->surface->height[0]; + vs_consts->denorm.x = r->surface->width0; + vs_consts->denorm.y = r->surface->height0; pipe_buffer_unmap(r->pipe->screen, r->vs_const_buf.buffer); @@ -1633,8 +1633,8 @@ vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer renderer->past = past; renderer->future = future; renderer->fence = fence; - renderer->surface_tex_inv_size.x = 1.0f / surface->width[0]; - renderer->surface_tex_inv_size.y = 1.0f / surface->height[0]; + renderer->surface_tex_inv_size.x = 1.0f / surface->width0; + renderer->surface_tex_inv_size.y = 1.0f / surface->height0; } while (num_macroblocks) { |