summaryrefslogtreecommitdiff
path: root/src/gallium/auxiliary
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/auxiliary')
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_aaline.c10
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_pstipple.c6
-rw-r--r--src/gallium/auxiliary/draw/draw_pt.c3
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_vcache.c3
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c280
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt32
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_build.c166
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_build.h32
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.c706
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.h4
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_parse.c8
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_parse.h2
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_ureg.c53
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_ureg.h3
-rw-r--r--src/gallium/auxiliary/util/u_blit.c26
-rw-r--r--src/gallium/auxiliary/util/u_cpu_detect.c6
-rw-r--r--src/gallium/auxiliary/util/u_format.csv10
-rw-r--r--src/gallium/auxiliary/util/u_gen_mipmap.c33
-rw-r--r--src/gallium/auxiliary/util/u_math.h44
-rw-r--r--src/gallium/auxiliary/util/u_pack_color.h5
-rw-r--r--src/gallium/auxiliary/util/u_surface.c6
-rw-r--r--src/gallium/auxiliary/vl/vl_compositor.c12
-rw-r--r--src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c22
23 files changed, 935 insertions, 537 deletions
diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
index d6c16d37b2..eeeed9813f 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
@@ -398,9 +398,9 @@ aaline_create_texture(struct aaline_stage *aaline)
texTemp.target = PIPE_TEXTURE_2D;
texTemp.format = PIPE_FORMAT_A8_UNORM; /* XXX verify supported by driver! */
texTemp.last_level = MAX_TEXTURE_LEVEL;
- texTemp.width[0] = 1 << MAX_TEXTURE_LEVEL;
- texTemp.height[0] = 1 << MAX_TEXTURE_LEVEL;
- texTemp.depth[0] = 1;
+ texTemp.width0 = 1 << MAX_TEXTURE_LEVEL;
+ texTemp.height0 = 1 << MAX_TEXTURE_LEVEL;
+ texTemp.depth0 = 1;
pf_get_block(texTemp.format, &texTemp.block);
aaline->texture = screen->texture_create(screen, &texTemp);
@@ -413,11 +413,11 @@ aaline_create_texture(struct aaline_stage *aaline)
*/
for (level = 0; level <= MAX_TEXTURE_LEVEL; level++) {
struct pipe_transfer *transfer;
- const uint size = aaline->texture->width[level];
+ const uint size = u_minify(aaline->texture->width0, level);
ubyte *data;
uint i, j;
- assert(aaline->texture->width[level] == aaline->texture->height[level]);
+ assert(aaline->texture->width0 == aaline->texture->height0);
/* This texture is new, no need to flush.
*/
diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
index 283502cdf3..27d89721b1 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
@@ -427,9 +427,9 @@ pstip_create_texture(struct pstip_stage *pstip)
texTemp.target = PIPE_TEXTURE_2D;
texTemp.format = PIPE_FORMAT_A8_UNORM; /* XXX verify supported by driver! */
texTemp.last_level = 0;
- texTemp.width[0] = 32;
- texTemp.height[0] = 32;
- texTemp.depth[0] = 1;
+ texTemp.width0 = 32;
+ texTemp.height0 = 32;
+ texTemp.depth0 = 1;
pf_get_block(texTemp.format, &texTemp.block);
pstip->texture = screen->texture_create(screen, &texTemp);
diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c
index dbb5ac7182..4865a2d854 100644
--- a/src/gallium/auxiliary/draw/draw_pt.c
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -192,7 +192,8 @@ draw_print_arrays(struct draw_context *draw, uint prim, int start, uint count)
prim, start, count);
for (i = 0; i < count; i++) {
- uint ii, j;
+ uint ii = 0;
+ uint j;
if (draw->pt.user.elts) {
/* indexed arrays */
diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c
index d3f179ced1..757c487454 100644
--- a/src/gallium/auxiliary/draw/draw_pt_vcache.c
+++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c
@@ -346,7 +346,8 @@ vcache_check_run( struct draw_pt_front_end *frontend,
vcache->fetch_max,
draw_count);
- if (max_index == 0xffffffff ||
+ if (max_index >= DRAW_PIPE_MAX_VERTICES ||
+ fetch_count >= UNDEFINED_VERTEX_ID ||
fetch_count > draw_count) {
if (0) debug_printf("fail\n");
goto fail;
diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
index 2ef4293d4d..2f973684f6 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
@@ -1,6 +1,6 @@
/**************************************************************************
*
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright 2007-2009 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -18,7 +18,7 @@
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -80,11 +80,27 @@ struct fenced_buffer_list
*/
struct fenced_buffer
{
+ /*
+ * Immutable members.
+ */
+
struct pb_buffer base;
-
struct pb_buffer *buffer;
+ struct fenced_buffer_list *list;
+
+ /**
+ * Protected by fenced_buffer_list::mutex
+ */
+ struct list_head head;
- /* FIXME: protect access with mutex */
+ /**
+ * Following members are mutable and protected by this mutex.
+ *
+ * You may lock this mutex alone, or lock it with fenced_buffer_list::mutex
+ * held, but in order to prevent deadlocks you must never lock
+ * fenced_buffer_list::mutex with this mutex held.
+ */
+ pipe_mutex mutex;
/**
* A bitmask of PIPE_BUFFER_USAGE_CPU/GPU_READ/WRITE describing the current
@@ -96,9 +112,6 @@ struct fenced_buffer
struct pb_validate *vl;
unsigned validation_flags;
struct pipe_fence_handle *fence;
-
- struct list_head head;
- struct fenced_buffer_list *list;
};
@@ -110,15 +123,24 @@ fenced_buffer(struct pb_buffer *buf)
}
+/**
+ * Add the buffer to the fenced list.
+ *
+ * fenced_buffer_list::mutex and fenced_buffer::mutex must be held, in this
+ * order before calling this function.
+ *
+ * Reference count should be incremented before calling this function.
+ */
static INLINE void
-_fenced_buffer_add(struct fenced_buffer *fenced_buf)
+fenced_buffer_add_locked(struct fenced_buffer_list *fenced_list,
+ struct fenced_buffer *fenced_buf)
{
- struct fenced_buffer_list *fenced_list = fenced_buf->list;
-
assert(pipe_is_referenced(&fenced_buf->base.base.reference));
assert(fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE);
assert(fenced_buf->fence);
+ /* TODO: Move the reference count increment here */
+
#ifdef DEBUG
LIST_DEL(&fenced_buf->head);
assert(fenced_list->numUnfenced);
@@ -130,32 +152,16 @@ _fenced_buffer_add(struct fenced_buffer *fenced_buf)
/**
- * Actually destroy the buffer.
+ * Remove the buffer from the fenced list.
+ *
+ * fenced_buffer_list::mutex and fenced_buffer::mutex must be held, in this
+ * order before calling this function.
+ *
+ * Reference count should be decremented after calling this function.
*/
static INLINE void
-_fenced_buffer_destroy(struct fenced_buffer *fenced_buf)
-{
- struct fenced_buffer_list *fenced_list = fenced_buf->list;
-
- assert(!pipe_is_referenced(&fenced_buf->base.base.reference));
- assert(!fenced_buf->fence);
-#ifdef DEBUG
- assert(fenced_buf->head.prev);
- assert(fenced_buf->head.next);
- LIST_DEL(&fenced_buf->head);
- assert(fenced_list->numUnfenced);
- --fenced_list->numUnfenced;
-#else
- (void)fenced_list;
-#endif
- pb_reference(&fenced_buf->buffer, NULL);
- FREE(fenced_buf);
-}
-
-
-static INLINE void
-_fenced_buffer_remove(struct fenced_buffer_list *fenced_list,
- struct fenced_buffer *fenced_buf)
+fenced_buffer_remove_locked(struct fenced_buffer_list *fenced_list,
+ struct fenced_buffer *fenced_buf)
{
struct pb_fence_ops *ops = fenced_list->ops;
@@ -177,37 +183,53 @@ _fenced_buffer_remove(struct fenced_buffer_list *fenced_list,
++fenced_list->numUnfenced;
#endif
- /**
- * FIXME!!!
- */
-
- if(!pipe_is_referenced(&fenced_buf->base.base.reference))
- _fenced_buffer_destroy(fenced_buf);
+ /* TODO: Move the reference count decrement and destruction here */
}
+/**
+ * Wait for the fence to expire, and remove it from the fenced list.
+ *
+ * fenced_buffer::mutex must be held. fenced_buffer_list::mutex must not be
+ * held -- it will
+ */
static INLINE enum pipe_error
-_fenced_buffer_finish(struct fenced_buffer *fenced_buf)
+fenced_buffer_finish_locked(struct fenced_buffer_list *fenced_list,
+ struct fenced_buffer *fenced_buf)
{
- struct fenced_buffer_list *fenced_list = fenced_buf->list;
struct pb_fence_ops *ops = fenced_list->ops;
+ enum pipe_error ret = PIPE_ERROR;
#if 0
debug_warning("waiting for GPU");
#endif
+ assert(pipe_is_referenced(&fenced_buf->base.base.reference));
assert(fenced_buf->fence);
+
+ /* Acquire the global lock */
+ pipe_mutex_unlock(fenced_buf->mutex);
+ pipe_mutex_lock(fenced_list->mutex);
+ pipe_mutex_lock(fenced_buf->mutex);
+
if(fenced_buf->fence) {
- if(ops->fence_finish(ops, fenced_buf->fence, 0) != 0) {
- return PIPE_ERROR;
+ if(ops->fence_finish(ops, fenced_buf->fence, 0) == 0) {
+ /* Remove from the fenced list */
+ /* TODO: remove consequents */
+ fenced_buffer_remove_locked(fenced_list, fenced_buf);
+
+ p_atomic_dec(&fenced_buf->base.base.reference.count);
+ assert(pipe_is_referenced(&fenced_buf->base.base.reference));
+
+ fenced_buf->flags &= ~PIPE_BUFFER_USAGE_GPU_READ_WRITE;
+
+ ret = PIPE_OK;
}
- /* Remove from the fenced list */
- /* TODO: remove consequents */
- _fenced_buffer_remove(fenced_list, fenced_buf);
}
- fenced_buf->flags &= ~PIPE_BUFFER_USAGE_GPU_READ_WRITE;
- return PIPE_OK;
+ pipe_mutex_unlock(fenced_list->mutex);
+
+ return ret;
}
@@ -215,7 +237,7 @@ _fenced_buffer_finish(struct fenced_buffer *fenced_buf)
* Free as many fenced buffers from the list head as possible.
*/
static void
-_fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list,
+fenced_buffer_list_check_free_locked(struct fenced_buffer_list *fenced_list,
int wait)
{
struct pb_fence_ops *ops = fenced_list->ops;
@@ -228,21 +250,28 @@ _fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list,
while(curr != &fenced_list->delayed) {
fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head);
+ pipe_mutex_lock(fenced_buf->mutex);
+
if(fenced_buf->fence != prev_fence) {
int signaled;
if (wait)
signaled = ops->fence_finish(ops, fenced_buf->fence, 0);
else
signaled = ops->fence_signalled(ops, fenced_buf->fence, 0);
- if (signaled != 0)
+ if (signaled != 0) {
+ pipe_mutex_unlock(fenced_buf->mutex);
break;
+ }
prev_fence = fenced_buf->fence;
}
else {
assert(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0);
}
- _fenced_buffer_remove(fenced_list, fenced_buf);
+ fenced_buffer_remove_locked(fenced_list, fenced_buf);
+ pipe_mutex_unlock(fenced_buf->mutex);
+
+ pb_reference((struct pb_buffer **)&fenced_buf, NULL);
curr = next;
next = curr->next;
@@ -256,30 +285,25 @@ fenced_buffer_destroy(struct pb_buffer *buf)
struct fenced_buffer *fenced_buf = fenced_buffer(buf);
struct fenced_buffer_list *fenced_list = fenced_buf->list;
- pipe_mutex_lock(fenced_list->mutex);
assert(!pipe_is_referenced(&fenced_buf->base.base.reference));
- if (fenced_buf->fence) {
- struct pb_fence_ops *ops = fenced_list->ops;
- if(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0) {
- struct list_head *curr, *prev;
- curr = &fenced_buf->head;
- prev = curr->prev;
- do {
- fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head);
- assert(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0);
- _fenced_buffer_remove(fenced_list, fenced_buf);
- curr = prev;
- prev = curr->prev;
- } while (curr != &fenced_list->delayed);
- }
- else {
- /* delay destruction */
- }
- }
- else {
- _fenced_buffer_destroy(fenced_buf);
- }
+ assert(!fenced_buf->fence);
+
+#ifdef DEBUG
+ pipe_mutex_lock(fenced_list->mutex);
+ assert(fenced_buf->head.prev);
+ assert(fenced_buf->head.next);
+ LIST_DEL(&fenced_buf->head);
+ assert(fenced_list->numUnfenced);
+ --fenced_list->numUnfenced;
pipe_mutex_unlock(fenced_list->mutex);
+#else
+ (void)fenced_list;
+#endif
+
+ pb_reference(&fenced_buf->buffer, NULL);
+
+ pipe_mutex_destroy(fenced_buf->mutex);
+ FREE(fenced_buf);
}
@@ -290,24 +314,23 @@ fenced_buffer_map(struct pb_buffer *buf,
struct fenced_buffer *fenced_buf = fenced_buffer(buf);
struct fenced_buffer_list *fenced_list = fenced_buf->list;
struct pb_fence_ops *ops = fenced_list->ops;
- void *map;
+ void *map = NULL;
+
+ pipe_mutex_lock(fenced_buf->mutex);
assert(!(flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE));
/* Serialize writes */
if((fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_WRITE) ||
((fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_READ) && (flags & PIPE_BUFFER_USAGE_CPU_WRITE))) {
- if(flags & PIPE_BUFFER_USAGE_DONTBLOCK) {
+ if((flags & PIPE_BUFFER_USAGE_DONTBLOCK) &&
+ ops->fence_signalled(ops, fenced_buf->fence, 0) == 0) {
/* Don't wait for the GPU to finish writing */
- if(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0)
- _fenced_buffer_remove(fenced_list, fenced_buf);
- else
- return NULL;
- }
- else {
- /* Wait for the GPU to finish writing */
- _fenced_buffer_finish(fenced_buf);
+ goto finish;
}
+
+ /* Wait for the GPU to finish writing */
+ fenced_buffer_finish_locked(fenced_list, fenced_buf);
}
#if 0
@@ -324,6 +347,9 @@ fenced_buffer_map(struct pb_buffer *buf,
fenced_buf->flags |= flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE;
}
+finish:
+ pipe_mutex_unlock(fenced_buf->mutex);
+
return map;
}
@@ -332,6 +358,9 @@ static void
fenced_buffer_unmap(struct pb_buffer *buf)
{
struct fenced_buffer *fenced_buf = fenced_buffer(buf);
+
+ pipe_mutex_lock(fenced_buf->mutex);
+
assert(fenced_buf->mapcount);
if(fenced_buf->mapcount) {
pb_unmap(fenced_buf->buffer);
@@ -339,6 +368,8 @@ fenced_buffer_unmap(struct pb_buffer *buf)
if(!fenced_buf->mapcount)
fenced_buf->flags &= ~PIPE_BUFFER_USAGE_CPU_READ_WRITE;
}
+
+ pipe_mutex_unlock(fenced_buf->mutex);
}
@@ -350,11 +381,14 @@ fenced_buffer_validate(struct pb_buffer *buf,
struct fenced_buffer *fenced_buf = fenced_buffer(buf);
enum pipe_error ret;
+ pipe_mutex_lock(fenced_buf->mutex);
+
if(!vl) {
/* invalidate */
fenced_buf->vl = NULL;
fenced_buf->validation_flags = 0;
- return PIPE_OK;
+ ret = PIPE_OK;
+ goto finish;
}
assert(flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE);
@@ -362,14 +396,17 @@ fenced_buffer_validate(struct pb_buffer *buf,
flags &= PIPE_BUFFER_USAGE_GPU_READ_WRITE;
/* Buffer cannot be validated in two different lists */
- if(fenced_buf->vl && fenced_buf->vl != vl)
- return PIPE_ERROR_RETRY;
+ if(fenced_buf->vl && fenced_buf->vl != vl) {
+ ret = PIPE_ERROR_RETRY;
+ goto finish;
+ }
#if 0
/* Do not validate if buffer is still mapped */
if(fenced_buf->flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE) {
/* TODO: wait for the thread that mapped the buffer to unmap it */
- return PIPE_ERROR_RETRY;
+ ret = PIPE_ERROR_RETRY;
+ goto finish;
}
/* Final sanity checking */
assert(!(fenced_buf->flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE));
@@ -379,17 +416,21 @@ fenced_buffer_validate(struct pb_buffer *buf,
if(fenced_buf->vl == vl &&
(fenced_buf->validation_flags & flags) == flags) {
/* Nothing to do -- buffer already validated */
- return PIPE_OK;
+ ret = PIPE_OK;
+ goto finish;
}
ret = pb_validate(fenced_buf->buffer, vl, flags);
if (ret != PIPE_OK)
- return ret;
+ goto finish;
fenced_buf->vl = vl;
fenced_buf->validation_flags |= flags;
- return PIPE_OK;
+finish:
+ pipe_mutex_unlock(fenced_buf->mutex);
+
+ return ret;
}
@@ -404,29 +445,36 @@ fenced_buffer_fence(struct pb_buffer *buf,
fenced_buf = fenced_buffer(buf);
fenced_list = fenced_buf->list;
ops = fenced_list->ops;
-
- if(fence == fenced_buf->fence) {
- /* Nothing to do */
- return;
- }
- assert(fenced_buf->vl);
- assert(fenced_buf->validation_flags);
-
pipe_mutex_lock(fenced_list->mutex);
- if (fenced_buf->fence)
- _fenced_buffer_remove(fenced_list, fenced_buf);
- if (fence) {
- ops->fence_reference(ops, &fenced_buf->fence, fence);
- fenced_buf->flags |= fenced_buf->validation_flags;
- _fenced_buffer_add(fenced_buf);
- }
- pipe_mutex_unlock(fenced_list->mutex);
+ pipe_mutex_lock(fenced_buf->mutex);
+
+ assert(pipe_is_referenced(&fenced_buf->base.base.reference));
+
+ if(fence != fenced_buf->fence) {
+ assert(fenced_buf->vl);
+ assert(fenced_buf->validation_flags);
+
+ if (fenced_buf->fence) {
+ fenced_buffer_remove_locked(fenced_list, fenced_buf);
+ p_atomic_dec(&fenced_buf->base.base.reference.count);
+ assert(pipe_is_referenced(&fenced_buf->base.base.reference));
+ }
+ if (fence) {
+ ops->fence_reference(ops, &fenced_buf->fence, fence);
+ fenced_buf->flags |= fenced_buf->validation_flags;
+ p_atomic_inc(&fenced_buf->base.base.reference.count);
+ fenced_buffer_add_locked(fenced_list, fenced_buf);
+ }
+
+ pb_fence(fenced_buf->buffer, fence);
- pb_fence(fenced_buf->buffer, fence);
+ fenced_buf->vl = NULL;
+ fenced_buf->validation_flags = 0;
+ }
- fenced_buf->vl = NULL;
- fenced_buf->validation_flags = 0;
+ pipe_mutex_unlock(fenced_buf->mutex);
+ pipe_mutex_unlock(fenced_list->mutex);
}
@@ -436,6 +484,7 @@ fenced_buffer_get_base_buffer(struct pb_buffer *buf,
pb_size *offset)
{
struct fenced_buffer *fenced_buf = fenced_buffer(buf);
+ /* NOTE: accesses immutable members only -- mutex not necessary */
pb_get_base_buffer(fenced_buf->buffer, base_buf, offset);
}
@@ -475,6 +524,8 @@ fenced_buffer_create(struct fenced_buffer_list *fenced_list,
buf->buffer = buffer;
buf->list = fenced_list;
+ pipe_mutex_init(buf->mutex);
+
#ifdef DEBUG
pipe_mutex_lock(fenced_list->mutex);
LIST_ADDTAIL(&buf->head, &fenced_list->unfenced);
@@ -516,7 +567,7 @@ fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list,
int wait)
{
pipe_mutex_lock(fenced_list->mutex);
- _fenced_buffer_list_check_free(fenced_list, wait);
+ fenced_buffer_list_check_free_locked(fenced_list, wait);
pipe_mutex_unlock(fenced_list->mutex);
}
@@ -538,11 +589,13 @@ fenced_buffer_list_dump(struct fenced_buffer_list *fenced_list)
next = curr->next;
while(curr != &fenced_list->unfenced) {
fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head);
+ pipe_mutex_lock(fenced_buf->mutex);
assert(!fenced_buf->fence);
debug_printf("%10p %7u %7u\n",
(void *) fenced_buf,
fenced_buf->base.base.size,
p_atomic_read(&fenced_buf->base.base.reference.count));
+ pipe_mutex_unlock(fenced_buf->mutex);
curr = next;
next = curr->next;
}
@@ -552,6 +605,7 @@ fenced_buffer_list_dump(struct fenced_buffer_list *fenced_list)
while(curr != &fenced_list->delayed) {
int signaled;
fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head);
+ pipe_mutex_lock(fenced_buf->mutex);
signaled = ops->fence_signalled(ops, fenced_buf->fence, 0);
debug_printf("%10p %7u %7u %10p %s\n",
(void *) fenced_buf,
@@ -559,6 +613,7 @@ fenced_buffer_list_dump(struct fenced_buffer_list *fenced_list)
p_atomic_read(&fenced_buf->base.base.reference.count),
(void *) fenced_buf->fence,
signaled == 0 ? "y" : "n");
+ pipe_mutex_unlock(fenced_buf->mutex);
curr = next;
next = curr->next;
}
@@ -579,8 +634,8 @@ fenced_buffer_list_destroy(struct fenced_buffer_list *fenced_list)
#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS)
sched_yield();
#endif
- _fenced_buffer_list_check_free(fenced_list, 1);
pipe_mutex_lock(fenced_list->mutex);
+ fenced_buffer_list_check_free_locked(fenced_list, 1);
}
#ifdef DEBUG
@@ -588,6 +643,7 @@ fenced_buffer_list_destroy(struct fenced_buffer_list *fenced_list)
#endif
pipe_mutex_unlock(fenced_list->mutex);
+ pipe_mutex_destroy(fenced_list->mutex);
fenced_list->ops->destroy(fenced_list->ops);
diff --git a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt
index eb492076b7..080fd4c731 100644
--- a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt
+++ b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt
@@ -1129,3 +1129,35 @@ TGSI Instruction Specification
target Label of target instruction.
+
+3 Other tokens
+===============
+
+
+3.1 Declaration Semantic
+-------------------------
+
+
+ Follows Declaration token if Semantic bit is set.
+
+ Since its purpose is to link a shader with other stages of the pipeline,
+ it is valid to follow only those Declaration tokens that declare a register
+ either in INPUT or OUTPUT file.
+
+ SemanticName field contains the semantic name of the register being declared.
+ There is no default value.
+
+ SemanticIndex is an optional subscript that can be used to distinguish
+ different register declarations with the same semantic name. The default value
+ is 0.
+
+ The meanings of the individual semantic names are explained in the following
+ sections.
+
+
+3.1.1 FACE
+
+ Valid only in a fragment shader INPUT declaration.
+
+ FACE.x is negative when the primitive is back facing. FACE.x is positive
+ when the primitive is front facing.
diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c
index 4fa10e2f7e..9791e58db3 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_build.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.c
@@ -419,6 +419,7 @@ tgsi_default_instruction( void )
instruction.NrTokens = 1;
instruction.Opcode = TGSI_OPCODE_MOV;
instruction.Saturate = TGSI_SAT_NONE;
+ instruction.Predicate = 0;
instruction.NumDstRegs = 1;
instruction.NumSrcRegs = 1;
instruction.Padding = 0;
@@ -428,12 +429,12 @@ tgsi_default_instruction( void )
}
struct tgsi_instruction
-tgsi_build_instruction(
- unsigned opcode,
- unsigned saturate,
- unsigned num_dst_regs,
- unsigned num_src_regs,
- struct tgsi_header *header )
+tgsi_build_instruction(unsigned opcode,
+ unsigned saturate,
+ unsigned predicate,
+ unsigned num_dst_regs,
+ unsigned num_src_regs,
+ struct tgsi_header *header)
{
struct tgsi_instruction instruction;
@@ -445,6 +446,7 @@ tgsi_build_instruction(
instruction = tgsi_default_instruction();
instruction.Opcode = opcode;
instruction.Saturate = saturate;
+ instruction.Predicate = predicate;
instruction.NumDstRegs = num_dst_regs;
instruction.NumSrcRegs = num_src_regs;
@@ -472,9 +474,9 @@ tgsi_default_full_instruction( void )
unsigned i;
full_instruction.Instruction = tgsi_default_instruction();
+ full_instruction.InstructionPredicate = tgsi_default_instruction_predicate();
full_instruction.InstructionExtLabel = tgsi_default_instruction_ext_label();
full_instruction.InstructionExtTexture = tgsi_default_instruction_ext_texture();
- full_instruction.InstructionExtPredicate = tgsi_default_instruction_ext_predicate();
for( i = 0; i < TGSI_FULL_MAX_DST_REGISTERS; i++ ) {
full_instruction.FullDstRegisters[i] = tgsi_default_full_dst_register();
}
@@ -504,14 +506,34 @@ tgsi_build_full_instruction(
instruction = (struct tgsi_instruction *) &tokens[size];
size++;
- *instruction = tgsi_build_instruction(
- full_inst->Instruction.Opcode,
- full_inst->Instruction.Saturate,
- full_inst->Instruction.NumDstRegs,
- full_inst->Instruction.NumSrcRegs,
- header );
+ *instruction = tgsi_build_instruction(full_inst->Instruction.Opcode,
+ full_inst->Instruction.Saturate,
+ full_inst->Instruction.Predicate,
+ full_inst->Instruction.NumDstRegs,
+ full_inst->Instruction.NumSrcRegs,
+ header);
prev_token = (struct tgsi_token *) instruction;
+ if (full_inst->Instruction.Predicate) {
+ struct tgsi_instruction_predicate *instruction_predicate;
+
+ if (maxsize <= size) {
+ return 0;
+ }
+ instruction_predicate = (struct tgsi_instruction_predicate *)&tokens[size];
+ size++;
+
+ *instruction_predicate =
+ tgsi_build_instruction_predicate(full_inst->InstructionPredicate.Index,
+ full_inst->InstructionPredicate.Negate,
+ full_inst->InstructionPredicate.SwizzleX,
+ full_inst->InstructionPredicate.SwizzleY,
+ full_inst->InstructionPredicate.SwizzleZ,
+ full_inst->InstructionPredicate.SwizzleW,
+ instruction,
+ header);
+ }
+
if( tgsi_compare_instruction_ext_label(
full_inst->InstructionExtLabel,
tgsi_default_instruction_ext_label() ) ) {
@@ -550,29 +572,6 @@ tgsi_build_full_instruction(
prev_token = (struct tgsi_token *) instruction_ext_texture;
}
- if (tgsi_compare_instruction_ext_predicate(full_inst->InstructionExtPredicate,
- tgsi_default_instruction_ext_predicate())) {
- struct tgsi_instruction_ext_predicate *instruction_ext_predicate;
-
- if (maxsize <= size) {
- return 0;
- }
- instruction_ext_predicate = (struct tgsi_instruction_ext_predicate *)&tokens[size];
- size++;
-
- *instruction_ext_predicate =
- tgsi_build_instruction_ext_predicate(full_inst->InstructionExtPredicate.SrcIndex,
- full_inst->InstructionExtPredicate.Negate,
- full_inst->InstructionExtPredicate.SwizzleX,
- full_inst->InstructionExtPredicate.SwizzleY,
- full_inst->InstructionExtPredicate.SwizzleZ,
- full_inst->InstructionExtPredicate.SwizzleW,
- prev_token,
- instruction,
- header);
- prev_token = (struct tgsi_token *)instruction_ext_predicate;
- }
-
for( i = 0; i < full_inst->Instruction.NumDstRegs; i++ ) {
const struct tgsi_full_dst_register *reg = &full_inst->FullDstRegisters[i];
struct tgsi_dst_register *dst_register;
@@ -746,6 +745,47 @@ tgsi_build_full_instruction(
return size;
}
+struct tgsi_instruction_predicate
+tgsi_default_instruction_predicate(void)
+{
+ struct tgsi_instruction_predicate instruction_predicate;
+
+ instruction_predicate.SwizzleX = TGSI_SWIZZLE_X;
+ instruction_predicate.SwizzleY = TGSI_SWIZZLE_Y;
+ instruction_predicate.SwizzleZ = TGSI_SWIZZLE_Z;
+ instruction_predicate.SwizzleW = TGSI_SWIZZLE_W;
+ instruction_predicate.Negate = 0;
+ instruction_predicate.Index = 0;
+ instruction_predicate.Padding = 0;
+
+ return instruction_predicate;
+}
+
+struct tgsi_instruction_predicate
+tgsi_build_instruction_predicate(int index,
+ unsigned negate,
+ unsigned swizzleX,
+ unsigned swizzleY,
+ unsigned swizzleZ,
+ unsigned swizzleW,
+ struct tgsi_instruction *instruction,
+ struct tgsi_header *header)
+{
+ struct tgsi_instruction_predicate instruction_predicate;
+
+ instruction_predicate = tgsi_default_instruction_predicate();
+ instruction_predicate.SwizzleX = swizzleX;
+ instruction_predicate.SwizzleY = swizzleY;
+ instruction_predicate.SwizzleZ = swizzleZ;
+ instruction_predicate.SwizzleW = swizzleW;
+ instruction_predicate.Negate = negate;
+ instruction_predicate.Index = index;
+
+ instruction_grow(instruction, header);
+
+ return instruction_predicate;
+}
+
/** test for inequality of 32-bit values pointed to by a and b */
static INLINE boolean
compare32(const void *a, const void *b)
@@ -835,60 +875,6 @@ tgsi_build_instruction_ext_texture(
return instruction_ext_texture;
}
-struct tgsi_instruction_ext_predicate
-tgsi_default_instruction_ext_predicate(void)
-{
- struct tgsi_instruction_ext_predicate instruction_ext_predicate;
-
- instruction_ext_predicate.Type = TGSI_INSTRUCTION_EXT_TYPE_PREDICATE;
- instruction_ext_predicate.SwizzleX = TGSI_SWIZZLE_X;
- instruction_ext_predicate.SwizzleY = TGSI_SWIZZLE_Y;
- instruction_ext_predicate.SwizzleZ = TGSI_SWIZZLE_Z;
- instruction_ext_predicate.SwizzleW = TGSI_SWIZZLE_W;
- instruction_ext_predicate.Negate = 0;
- instruction_ext_predicate.SrcIndex = 0;
- instruction_ext_predicate.Padding = 0;
- instruction_ext_predicate.Extended = 0;
-
- return instruction_ext_predicate;
-}
-
-unsigned
-tgsi_compare_instruction_ext_predicate(struct tgsi_instruction_ext_predicate a,
- struct tgsi_instruction_ext_predicate b)
-{
- a.Padding = b.Padding = 0;
- a.Extended = b.Extended = 0;
- return compare32(&a, &b);
-}
-
-struct tgsi_instruction_ext_predicate
-tgsi_build_instruction_ext_predicate(unsigned index,
- unsigned negate,
- unsigned swizzleX,
- unsigned swizzleY,
- unsigned swizzleZ,
- unsigned swizzleW,
- struct tgsi_token *prev_token,
- struct tgsi_instruction *instruction,
- struct tgsi_header *header)
-{
- struct tgsi_instruction_ext_predicate instruction_ext_predicate;
-
- instruction_ext_predicate = tgsi_default_instruction_ext_predicate();
- instruction_ext_predicate.SwizzleX = swizzleX;
- instruction_ext_predicate.SwizzleY = swizzleY;
- instruction_ext_predicate.SwizzleZ = swizzleZ;
- instruction_ext_predicate.SwizzleW = swizzleW;
- instruction_ext_predicate.Negate = negate;
- instruction_ext_predicate.SrcIndex = index;
-
- prev_token->Extended = 1;
- instruction_grow(instruction, header);
-
- return instruction_ext_predicate;
-}
-
struct tgsi_src_register
tgsi_default_src_register( void )
{
diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.h b/src/gallium/auxiliary/tgsi/tgsi_build.h
index 669712eb8f..0fe5f229d3 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_build.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.h
@@ -143,6 +143,7 @@ struct tgsi_instruction
tgsi_build_instruction(
unsigned opcode,
unsigned saturate,
+ unsigned predicate,
unsigned num_dst_regs,
unsigned num_src_regs,
struct tgsi_header *header );
@@ -157,6 +158,19 @@ tgsi_build_full_instruction(
struct tgsi_header *header,
unsigned maxsize );
+struct tgsi_instruction_predicate
+tgsi_default_instruction_predicate(void);
+
+struct tgsi_instruction_predicate
+tgsi_build_instruction_predicate(int index,
+ unsigned negate,
+ unsigned swizzleX,
+ unsigned swizzleY,
+ unsigned swizzleZ,
+ unsigned swizzleW,
+ struct tgsi_instruction *instruction,
+ struct tgsi_header *header);
+
struct tgsi_instruction_ext_label
tgsi_default_instruction_ext_label( void );
@@ -187,24 +201,6 @@ tgsi_build_instruction_ext_texture(
struct tgsi_instruction *instruction,
struct tgsi_header *header );
-struct tgsi_instruction_ext_predicate
-tgsi_default_instruction_ext_predicate(void);
-
-unsigned
-tgsi_compare_instruction_ext_predicate(struct tgsi_instruction_ext_predicate a,
- struct tgsi_instruction_ext_predicate b);
-
-struct tgsi_instruction_ext_predicate
-tgsi_build_instruction_ext_predicate(unsigned index,
- unsigned negate,
- unsigned swizzleX,
- unsigned swizzleY,
- unsigned swizzleZ,
- unsigned swizzleW,
- struct tgsi_token *prev_token,
- struct tgsi_instruction *instruction,
- struct tgsi_header *header);
-
struct tgsi_src_register
tgsi_default_src_register( void );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index ade5e7cbbd..2bf367ba50 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -62,9 +62,6 @@
#define FAST_MATH 1
-/** for tgsi_full_instruction::Flags */
-#define SOA_DEPENDENCY_FLAG 0x1
-
#define TILE_TOP_LEFT 0
#define TILE_TOP_RIGHT 1
#define TILE_BOTTOM_LEFT 2
@@ -340,20 +337,6 @@ tgsi_exec_machine_bind_shader(
maxInstructions += 10;
}
- if (tgsi_check_soa_dependencies(&parse.FullToken.FullInstruction)) {
- uint opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
- parse.FullToken.FullInstruction.Flags = SOA_DEPENDENCY_FLAG;
- /* XXX we only handle SOA dependencies properly for MOV/SWZ
- * at this time!
- */
- if (opcode != TGSI_OPCODE_MOV) {
- debug_printf("Warning: SOA dependency in instruction"
- " is not handled:\n");
- tgsi_dump_instruction(&parse.FullToken.FullInstruction,
- numInstructions);
- }
- }
-
memcpy(instructions + numInstructions,
&parse.FullToken.FullInstruction,
sizeof(instructions[0]));
@@ -395,6 +378,7 @@ tgsi_exec_machine_create( void )
mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES;
+ mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0];
/* Setup constants. */
for( i = 0; i < 4; i++ ) {
@@ -526,7 +510,7 @@ micro_ddy(
dst->f[0] =
dst->f[1] =
dst->f[2] =
- dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT];
+ dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT];
}
static void
@@ -604,6 +588,24 @@ micro_exp2(
dst->f[2] = util_fast_exp2( src->f[2] );
dst->f[3] = util_fast_exp2( src->f[3] );
#else
+
+#if DEBUG
+ /* Inf is okay for this instruction, so clamp it to silence assertions. */
+ uint i;
+ union tgsi_exec_channel clamped;
+
+ for (i = 0; i < 4; i++) {
+ if (src->f[i] > 127.99999f) {
+ clamped.f[i] = 127.99999f;
+ } else if (src->f[i] < -126.99999f) {
+ clamped.f[i] = -126.99999f;
+ } else {
+ clamped.f[i] = src->f[i];
+ }
+ }
+ src = &clamped;
+#endif
+
dst->f[0] = powf( 2.0f, src->f[0] );
dst->f[1] = powf( 2.0f, src->f[1] );
dst->f[2] = powf( 2.0f, src->f[2] );
@@ -1202,10 +1204,10 @@ fetch_src_file_channel(
assert(index->i[1] < TGSI_EXEC_NUM_PREDS);
assert(index->i[2] < TGSI_EXEC_NUM_PREDS);
assert(index->i[3] < TGSI_EXEC_NUM_PREDS);
- chan->u[0] = mach->Addrs[0].xyzw[swizzle].u[0];
- chan->u[1] = mach->Addrs[0].xyzw[swizzle].u[1];
- chan->u[2] = mach->Addrs[0].xyzw[swizzle].u[2];
- chan->u[3] = mach->Addrs[0].xyzw[swizzle].u[3];
+ chan->u[0] = mach->Predicates[0].xyzw[swizzle].u[0];
+ chan->u[1] = mach->Predicates[0].xyzw[swizzle].u[1];
+ chan->u[2] = mach->Predicates[0].xyzw[swizzle].u[2];
+ chan->u[3] = mach->Predicates[0].xyzw[swizzle].u[3];
break;
case TGSI_FILE_OUTPUT:
@@ -1496,10 +1498,17 @@ store_dest(
dst = &mach->Addrs[index].xyzw[chan_index];
break;
+ case TGSI_FILE_LOOP:
+ assert(reg->DstRegister.Index == 0);
+ assert(mach->LoopCounterStackTop > 0);
+ assert(chan_index == CHAN_X);
+ dst = &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[chan_index];
+ break;
+
case TGSI_FILE_PREDICATE:
index = reg->DstRegister.Index;
assert(index < TGSI_EXEC_NUM_PREDS);
- dst = &mach->Addrs[index].xyzw[chan_index];
+ dst = &mach->Predicates[index].xyzw[chan_index];
break;
default:
@@ -1507,6 +1516,47 @@ store_dest(
return;
}
+ if (inst->Instruction.Predicate) {
+ uint swizzle;
+ union tgsi_exec_channel *pred;
+
+ switch (chan_index) {
+ case CHAN_X:
+ swizzle = inst->InstructionPredicate.SwizzleX;
+ break;
+ case CHAN_Y:
+ swizzle = inst->InstructionPredicate.SwizzleY;
+ break;
+ case CHAN_Z:
+ swizzle = inst->InstructionPredicate.SwizzleZ;
+ break;
+ case CHAN_W:
+ swizzle = inst->InstructionPredicate.SwizzleW;
+ break;
+ default:
+ assert(0);
+ return;
+ }
+
+ assert(inst->InstructionPredicate.Index == 0);
+
+ pred = &mach->Predicates[inst->InstructionPredicate.Index].xyzw[swizzle];
+
+ if (inst->InstructionPredicate.Negate) {
+ for (i = 0; i < QUAD_SIZE; i++) {
+ if (pred->u[i]) {
+ execmask &= ~(1 << i);
+ }
+ }
+ } else {
+ for (i = 0; i < QUAD_SIZE; i++) {
+ if (!pred->u[i]) {
+ execmask &= ~(1 << i);
+ }
+ }
+ }
+ }
+
switch (inst->Instruction.Saturate) {
case TGSI_SAT_NONE:
for (i = 0; i < QUAD_SIZE; i++)
@@ -1762,6 +1812,64 @@ exec_tex(struct tgsi_exec_machine *mach,
}
}
+static void
+exec_txd(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ const uint unit = inst->FullSrcRegisters[3].SrcRegister.Index;
+ union tgsi_exec_channel r[4];
+ uint chan_index;
+
+ /*
+ * XXX: This is fake TXD -- the derivatives are not taken into account, yet.
+ */
+
+ switch (inst->InstructionExtTexture.Texture) {
+ case TGSI_TEXTURE_1D:
+ case TGSI_TEXTURE_SHADOW1D:
+
+ FETCH(&r[0], 0, CHAN_X);
+
+ fetch_texel(mach->Samplers[unit],
+ &r[0], &ZeroVec, &ZeroVec, 0.0f, /* S, T, P, BIAS */
+ &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
+ break;
+
+ case TGSI_TEXTURE_2D:
+ case TGSI_TEXTURE_RECT:
+ case TGSI_TEXTURE_SHADOW2D:
+ case TGSI_TEXTURE_SHADOWRECT:
+
+ FETCH(&r[0], 0, CHAN_X);
+ FETCH(&r[1], 0, CHAN_Y);
+ FETCH(&r[2], 0, CHAN_Z);
+
+ fetch_texel(mach->Samplers[unit],
+ &r[0], &r[1], &r[2], 0.0f, /* inputs */
+ &r[0], &r[1], &r[2], &r[3]); /* outputs */
+ break;
+
+ case TGSI_TEXTURE_3D:
+ case TGSI_TEXTURE_CUBE:
+
+ FETCH(&r[0], 0, CHAN_X);
+ FETCH(&r[1], 0, CHAN_Y);
+ FETCH(&r[2], 0, CHAN_Z);
+
+ fetch_texel(mach->Samplers[unit],
+ &r[0], &r[1], &r[2], 0.0f,
+ &r[0], &r[1], &r[2], &r[3]);
+ break;
+
+ default:
+ assert(0);
+ }
+
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&r[chan_index], 0, chan_index);
+ }
+}
+
/**
* Evaluate a constant-valued coefficient at the position of the
@@ -1831,53 +1939,58 @@ typedef void (* eval_coef_func)(
unsigned chan );
static void
-exec_declaration(
- struct tgsi_exec_machine *mach,
- const struct tgsi_full_declaration *decl )
+exec_declaration(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_declaration *decl)
{
- if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
- if( decl->Declaration.File == TGSI_FILE_INPUT ) {
- unsigned first, last, mask;
- eval_coef_func eval;
+ if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
+ if (decl->Declaration.File == TGSI_FILE_INPUT) {
+ uint first, last, mask;
first = decl->DeclarationRange.First;
last = decl->DeclarationRange.Last;
mask = decl->Declaration.UsageMask;
- switch( decl->Declaration.Interpolate ) {
- case TGSI_INTERPOLATE_CONSTANT:
- eval = eval_constant_coef;
- break;
+ if (decl->Semantic.SemanticName == TGSI_SEMANTIC_POSITION) {
+ assert(decl->Semantic.SemanticIndex == 0);
+ assert(first == last);
+ assert(mask = TGSI_WRITEMASK_XYZW);
- case TGSI_INTERPOLATE_LINEAR:
- eval = eval_linear_coef;
- break;
+ mach->Inputs[first] = mach->QuadPos;
+ } else if (decl->Semantic.SemanticName == TGSI_SEMANTIC_FACE) {
+ uint i;
- case TGSI_INTERPOLATE_PERSPECTIVE:
- eval = eval_perspective_coef;
- break;
+ assert(decl->Semantic.SemanticIndex == 0);
+ assert(first == last);
- default:
- assert( 0 );
- return;
- }
-
- if( mask == TGSI_WRITEMASK_XYZW ) {
- unsigned i, j;
-
- for( i = first; i <= last; i++ ) {
- for( j = 0; j < NUM_CHANNELS; j++ ) {
- eval( mach, i, j );
- }
+ for (i = 0; i < QUAD_SIZE; i++) {
+ mach->Inputs[first].xyzw[0].f[i] = mach->Face;
+ }
+ } else {
+ eval_coef_func eval;
+ uint i, j;
+
+ switch (decl->Declaration.Interpolate) {
+ case TGSI_INTERPOLATE_CONSTANT:
+ eval = eval_constant_coef;
+ break;
+
+ case TGSI_INTERPOLATE_LINEAR:
+ eval = eval_linear_coef;
+ break;
+
+ case TGSI_INTERPOLATE_PERSPECTIVE:
+ eval = eval_perspective_coef;
+ break;
+
+ default:
+ assert(0);
+ return;
}
- }
- else {
- unsigned i, j;
- for( j = 0; j < NUM_CHANNELS; j++ ) {
- if( mask & (1 << j) ) {
- for( i = first; i <= last; i++ ) {
- eval( mach, i, j );
+ for (j = 0; j < NUM_CHANNELS; j++) {
+ if (mask & (1 << j)) {
+ for (i = first; i <= last; i++) {
+ eval(mach, i, j);
}
}
}
@@ -1894,6 +2007,7 @@ exec_instruction(
{
uint chan_index;
union tgsi_exec_channel r[10];
+ union tgsi_exec_channel d[8];
(*pc)++;
@@ -1902,42 +2016,27 @@ exec_instruction(
case TGSI_OPCODE_FLR:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
- micro_flr( &r[0], &r[0] );
- STORE( &r[0], 0, chan_index );
+ micro_flr(&d[chan_index], &r[0]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
case TGSI_OPCODE_MOV:
- if (inst->Flags & SOA_DEPENDENCY_FLAG) {
- /* Do all fetches into temp regs, then do all stores to avoid
- * intermediate/accidental clobbering. This could be done all the
- * time for MOV but for other instructions we'll need more temps...
- */
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[chan_index], 0, chan_index );
- }
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[chan_index], 0, chan_index );
- }
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH(&d[chan_index], 0, chan_index);
}
- else {
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- STORE( &r[0], 0, chan_index );
- }
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
case TGSI_OPCODE_LIT:
- if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
- STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
- }
-
if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
FETCH( &r[0], 0, CHAN_X );
if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
- micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
- STORE( &r[0], 0, CHAN_Y );
+ micro_max(&d[CHAN_Y], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
}
if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
@@ -1948,11 +2047,19 @@ exec_instruction(
micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
micro_pow( &r[1], &r[1], &r[2] );
- micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
- STORE( &r[0], 0, CHAN_Z );
+ micro_lt(&d[CHAN_Z], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
}
- }
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
+ STORE(&d[CHAN_Y], 0, CHAN_Y);
+ }
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
+ STORE(&d[CHAN_Z], 0, CHAN_Z);
+ }
+ }
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
+ STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
+ }
if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
}
@@ -2020,14 +2127,13 @@ exec_instruction(
break;
case TGSI_OPCODE_MUL:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index )
- {
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
FETCH(&r[0], 0, chan_index);
FETCH(&r[1], 1, chan_index);
-
- micro_mul( &r[0], &r[0], &r[1] );
-
- STORE(&r[0], 0, chan_index);
+ micro_mul(&d[chan_index], &r[0], &r[1]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2035,8 +2141,10 @@ exec_instruction(
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
FETCH( &r[1], 1, chan_index );
- micro_add( &r[0], &r[0], &r[1] );
- STORE( &r[0], 0, chan_index );
+ micro_add(&d[chan_index], &r[0], &r[1]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2092,25 +2200,29 @@ exec_instruction(
break;
case TGSI_OPCODE_DST:
- if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
- STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
- }
-
if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
FETCH( &r[0], 0, CHAN_Y );
FETCH( &r[1], 1, CHAN_Y);
- micro_mul( &r[0], &r[0], &r[1] );
- STORE( &r[0], 0, CHAN_Y );
+ micro_mul(&d[CHAN_Y], &r[0], &r[1]);
}
-
if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
- FETCH( &r[0], 0, CHAN_Z );
- STORE( &r[0], 0, CHAN_Z );
+ FETCH(&d[CHAN_Z], 0, CHAN_Z);
}
-
if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
- FETCH( &r[0], 1, CHAN_W );
- STORE( &r[0], 0, CHAN_W );
+ FETCH(&d[CHAN_W], 1, CHAN_W);
+ }
+
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
+ STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X);
+ }
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
+ STORE(&d[CHAN_Y], 0, CHAN_Y);
+ }
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
+ STORE(&d[CHAN_Z], 0, CHAN_Z);
+ }
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
+ STORE(&d[CHAN_W], 0, CHAN_W);
}
break;
@@ -2120,9 +2232,10 @@ exec_instruction(
FETCH(&r[1], 1, chan_index);
/* XXX use micro_min()?? */
- micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] );
-
- STORE(&r[0], 0, chan_index);
+ micro_lt(&d[chan_index], &r[0], &r[1], &r[0], &r[1]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2132,9 +2245,10 @@ exec_instruction(
FETCH(&r[1], 1, chan_index);
/* XXX use micro_max()?? */
- micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] );
-
- STORE(&r[0], 0, chan_index );
+ micro_lt(&d[chan_index], &r[0], &r[1], &r[1], &r[0] );
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2143,8 +2257,10 @@ exec_instruction(
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
FETCH( &r[1], 1, chan_index );
- micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
- STORE( &r[0], 0, chan_index );
+ micro_lt(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2153,9 +2269,10 @@ exec_instruction(
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
FETCH( &r[1], 1, chan_index );
-
- micro_le( &r[0], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
- STORE( &r[0], 0, chan_index );
+ micro_le(&d[chan_index], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2166,8 +2283,10 @@ exec_instruction(
FETCH( &r[1], 1, chan_index );
micro_mul( &r[0], &r[0], &r[1] );
FETCH( &r[1], 2, chan_index );
- micro_add( &r[0], &r[0], &r[1] );
- STORE( &r[0], 0, chan_index );
+ micro_add(&d[chan_index], &r[0], &r[1]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2175,10 +2294,10 @@ exec_instruction(
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH(&r[0], 0, chan_index);
FETCH(&r[1], 1, chan_index);
-
- micro_sub( &r[0], &r[0], &r[1] );
-
- STORE(&r[0], 0, chan_index);
+ micro_sub(&d[chan_index], &r[0], &r[1]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2187,12 +2306,12 @@ exec_instruction(
FETCH(&r[0], 0, chan_index);
FETCH(&r[1], 1, chan_index);
FETCH(&r[2], 2, chan_index);
-
micro_sub( &r[1], &r[1], &r[2] );
micro_mul( &r[0], &r[0], &r[1] );
- micro_add( &r[0], &r[0], &r[2] );
-
- STORE(&r[0], 0, chan_index);
+ micro_add(&d[chan_index], &r[0], &r[2]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2201,8 +2320,10 @@ exec_instruction(
FETCH(&r[0], 0, chan_index);
FETCH(&r[1], 1, chan_index);
FETCH(&r[2], 2, chan_index);
- micro_lt(&r[0], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]);
- STORE(&r[0], 0, chan_index);
+ micro_lt(&d[chan_index], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2227,8 +2348,10 @@ exec_instruction(
case TGSI_OPCODE_FRC:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
- micro_frc( &r[0], &r[0] );
- STORE( &r[0], 0, chan_index );
+ micro_frc(&d[chan_index], &r[0]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2238,8 +2361,10 @@ exec_instruction(
FETCH(&r[1], 1, chan_index);
micro_max(&r[0], &r[0], &r[1]);
FETCH(&r[1], 2, chan_index);
- micro_min(&r[0], &r[0], &r[1]);
- STORE(&r[0], 0, chan_index);
+ micro_min(&d[chan_index], &r[0], &r[1]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2247,19 +2372,17 @@ exec_instruction(
case TGSI_OPCODE_ARR:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
- micro_rnd( &r[0], &r[0] );
- STORE( &r[0], 0, chan_index );
+ micro_rnd(&d[chan_index], &r[0]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
case TGSI_OPCODE_EX2:
FETCH(&r[0], 0, CHAN_X);
-#if FAST_MATH
micro_exp2( &r[0], &r[0] );
-#else
- micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] );
-#endif
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
STORE( &r[0], 0, chan_index );
@@ -2295,11 +2418,7 @@ exec_instruction(
FETCH(&r[4], 1, CHAN_Y);
micro_mul( &r[5], &r[3], &r[4] );
- micro_sub( &r[2], &r[2], &r[5] );
-
- if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
- STORE( &r[2], 0, CHAN_X );
- }
+ micro_sub(&d[CHAN_X], &r[2], &r[5]);
FETCH(&r[2], 1, CHAN_X);
@@ -2308,20 +2427,21 @@ exec_instruction(
FETCH(&r[5], 0, CHAN_X);
micro_mul( &r[1], &r[1], &r[5] );
- micro_sub( &r[3], &r[3], &r[1] );
-
- if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
- STORE( &r[3], 0, CHAN_Y );
- }
+ micro_sub(&d[CHAN_Y], &r[3], &r[1]);
micro_mul( &r[5], &r[5], &r[4] );
micro_mul( &r[0], &r[0], &r[2] );
- micro_sub( &r[5], &r[5], &r[0] );
+ micro_sub(&d[CHAN_Z], &r[5], &r[0]);
- if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
- STORE( &r[5], 0, CHAN_Z );
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
+ STORE(&d[CHAN_X], 0, CHAN_X);
+ }
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
+ STORE(&d[CHAN_Y], 0, CHAN_Y);
+ }
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
+ STORE(&d[CHAN_Z], 0, CHAN_Z);
}
-
if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
}
@@ -2330,11 +2450,11 @@ exec_instruction(
case TGSI_OPCODE_ABS:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH(&r[0], 0, chan_index);
-
- micro_abs( &r[0], &r[0] );
-
- STORE(&r[0], 0, chan_index);
+ micro_abs(&d[chan_index], &r[0]);
}
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
+ }
break;
case TGSI_OPCODE_RCC:
@@ -2386,16 +2506,20 @@ exec_instruction(
case TGSI_OPCODE_DDX:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
- micro_ddx( &r[0], &r[0] );
- STORE( &r[0], 0, chan_index );
+ micro_ddx(&d[chan_index], &r[0]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
case TGSI_OPCODE_DDY:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
- micro_ddy( &r[0], &r[0] );
- STORE( &r[0], 0, chan_index );
+ micro_ddy(&d[chan_index], &r[0]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2476,10 +2600,10 @@ exec_instruction(
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
FETCH( &r[1], 1, chan_index );
- micro_eq( &r[0], &r[0], &r[1],
- &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C],
- &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
- STORE( &r[0], 0, chan_index );
+ micro_eq(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2493,8 +2617,10 @@ exec_instruction(
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
FETCH( &r[1], 1, chan_index );
- micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
- STORE( &r[0], 0, chan_index );
+ micro_le(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2510,8 +2636,10 @@ exec_instruction(
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
FETCH( &r[1], 1, chan_index );
- micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
- STORE( &r[0], 0, chan_index );
+ micro_le(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2519,8 +2647,10 @@ exec_instruction(
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
FETCH( &r[1], 1, chan_index );
- micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
- STORE( &r[0], 0, chan_index );
+ micro_eq(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2550,7 +2680,7 @@ exec_instruction(
/* src[1] = d[strq]/dx */
/* src[2] = d[strq]/dy */
/* src[3] = sampler unit */
- assert (0);
+ exec_txd(mach, inst);
break;
case TGSI_OPCODE_TXL:
@@ -2594,13 +2724,8 @@ exec_instruction(
micro_mul(&r[3], &r[3], &r[1]);
micro_add(&r[2], &r[2], &r[3]);
FETCH(&r[3], 0, CHAN_X);
- micro_add(&r[2], &r[2], &r[3]);
- if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
- STORE(&r[2], 0, CHAN_X);
- }
- if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
- STORE(&r[2], 0, CHAN_Z);
- }
+ micro_add(&d[CHAN_X], &r[2], &r[3]);
+
}
if (IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
@@ -2610,13 +2735,20 @@ exec_instruction(
micro_mul(&r[3], &r[3], &r[1]);
micro_add(&r[2], &r[2], &r[3]);
FETCH(&r[3], 0, CHAN_Y);
- micro_add(&r[2], &r[2], &r[3]);
- if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
- STORE(&r[2], 0, CHAN_Y);
- }
- if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
- STORE(&r[2], 0, CHAN_W);
- }
+ micro_add(&d[CHAN_Y], &r[2], &r[3]);
+
+ }
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
+ STORE(&d[CHAN_X], 0, CHAN_X);
+ }
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
+ STORE(&d[CHAN_Y], 0, CHAN_Y);
+ }
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
+ STORE(&d[CHAN_X], 0, CHAN_Z);
+ }
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
+ STORE(&d[CHAN_Y], 0, CHAN_W);
}
break;
@@ -2701,8 +2833,10 @@ exec_instruction(
/* TGSI_OPCODE_SGN */
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
- micro_sgn( &r[0], &r[0] );
- STORE( &r[0], 0, chan_index );
+ micro_sgn(&d[chan_index], &r[0]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2711,10 +2845,10 @@ exec_instruction(
FETCH(&r[0], 0, chan_index);
FETCH(&r[1], 1, chan_index);
FETCH(&r[2], 2, chan_index);
-
- micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] );
-
- STORE(&r[0], 0, chan_index);
+ micro_lt(&d[chan_index], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2889,32 +3023,40 @@ exec_instruction(
case TGSI_OPCODE_CEIL:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
- micro_ceil( &r[0], &r[0] );
- STORE( &r[0], 0, chan_index );
+ micro_ceil(&d[chan_index], &r[0]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
case TGSI_OPCODE_I2F:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
- micro_i2f( &r[0], &r[0] );
- STORE( &r[0], 0, chan_index );
+ micro_i2f(&d[chan_index], &r[0]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
case TGSI_OPCODE_NOT:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
- micro_not( &r[0], &r[0] );
- STORE( &r[0], 0, chan_index );
+ micro_not(&d[chan_index], &r[0]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
case TGSI_OPCODE_TRUNC:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
- micro_trunc( &r[0], &r[0] );
- STORE( &r[0], 0, chan_index );
+ micro_trunc(&d[chan_index], &r[0]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2922,8 +3064,10 @@ exec_instruction(
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
FETCH( &r[1], 1, chan_index );
- micro_shl( &r[0], &r[0], &r[1] );
- STORE( &r[0], 0, chan_index );
+ micro_shl(&d[chan_index], &r[0], &r[1]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2931,8 +3075,10 @@ exec_instruction(
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
FETCH( &r[1], 1, chan_index );
- micro_ishr( &r[0], &r[0], &r[1] );
- STORE( &r[0], 0, chan_index );
+ micro_ishr(&d[chan_index], &r[0], &r[1]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2940,8 +3086,10 @@ exec_instruction(
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
FETCH( &r[1], 1, chan_index );
- micro_and( &r[0], &r[0], &r[1] );
- STORE( &r[0], 0, chan_index );
+ micro_and(&d[chan_index], &r[0], &r[1]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2949,8 +3097,10 @@ exec_instruction(
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
FETCH( &r[1], 1, chan_index );
- micro_or( &r[0], &r[0], &r[1] );
- STORE( &r[0], 0, chan_index );
+ micro_or(&d[chan_index], &r[0], &r[1]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2962,8 +3112,10 @@ exec_instruction(
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
FETCH( &r[1], 1, chan_index );
- micro_xor( &r[0], &r[0], &r[1] );
- STORE( &r[0], 0, chan_index );
+ micro_xor(&d[chan_index], &r[0], &r[1]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2992,8 +3144,23 @@ exec_instruction(
for (chan_index = 0; chan_index < 3; chan_index++) {
FETCH( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[chan_index], 0, chan_index );
}
- STORE( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_Y], 0, CHAN_X );
++mach->LoopCounterStackTop;
+ STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 0, CHAN_X);
+ /* update LoopMask */
+ if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) {
+ mach->LoopMask &= ~0x1;
+ }
+ if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) {
+ mach->LoopMask &= ~0x2;
+ }
+ if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) {
+ mach->LoopMask &= ~0x4;
+ }
+ if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) {
+ mach->LoopMask &= ~0x8;
+ }
+ /* TODO: if mach->LoopMask == 0, jump to end of loop */
+ UPDATE_EXEC_MASK(mach);
/* fall-through (for now) */
case TGSI_OPCODE_BGNLOOP:
/* push LoopMask and ContMasks */
@@ -3007,28 +3174,28 @@ exec_instruction(
case TGSI_OPCODE_ENDFOR:
assert(mach->LoopCounterStackTop > 0);
- micro_sub( &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X],
- &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X],
- &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
+ micro_sub(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y],
+ &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y],
+ &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]);
/* update LoopMask */
- if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[0] <= 0) {
+ if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) {
mach->LoopMask &= ~0x1;
}
- if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[1] <= 0 ) {
+ if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) {
mach->LoopMask &= ~0x2;
}
- if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[2] <= 0 ) {
+ if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) {
mach->LoopMask &= ~0x4;
}
- if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[3] <= 0 ) {
+ if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) {
mach->LoopMask &= ~0x8;
}
- micro_add( &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y],
- &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y],
- &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]);
+ micro_add(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X],
+ &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X],
+ &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]);
assert(mach->LoopLabelStackTop > 0);
inst = mach->Instructions + mach->LoopLabelStack[mach->LoopLabelStackTop - 1];
- STORE( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_Y], 0, CHAN_X );
+ STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_X], 0, CHAN_X);
/* Restore ContMask, but don't pop */
assert(mach->ContStackTop > 0);
mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
@@ -3095,7 +3262,28 @@ exec_instruction(
break;
case TGSI_OPCODE_ENDSUB:
- /* no-op */
+ /*
+ * XXX: This really should be a no-op. We should never reach this opcode.
+ */
+
+ assert(mach->CallStackTop > 0);
+ mach->CallStackTop--;
+
+ mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
+ mach->CondMask = mach->CondStack[mach->CondStackTop];
+
+ mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
+ mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
+
+ mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
+ mach->ContMask = mach->ContStack[mach->ContStackTop];
+
+ assert(mach->FuncStackTop > 0);
+ mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
+
+ *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
+
+ UPDATE_EXEC_MASK(mach);
break;
case TGSI_OPCODE_NOP:
@@ -3106,6 +3294,8 @@ exec_instruction(
}
}
+#define DEBUG_EXECUTION 0
+
/**
* Run TGSI interpreter.
@@ -3149,10 +3339,67 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
exec_declaration( mach, mach->Declarations+i );
}
- /* execute instructions, until pc is set to -1 */
- while (pc != -1) {
- assert(pc < (int) mach->NumInstructions);
- exec_instruction( mach, mach->Instructions + pc, &pc );
+ {
+#if DEBUG_EXECUTION
+ struct tgsi_exec_vector temps[TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS];
+ struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS];
+ uint inst = 1;
+
+ memcpy(temps, mach->Temps, sizeof(temps));
+ memcpy(outputs, mach->Outputs, sizeof(outputs));
+#endif
+
+ /* execute instructions, until pc is set to -1 */
+ while (pc != -1) {
+
+#if DEBUG_EXECUTION
+ uint i;
+
+ tgsi_dump_instruction(&mach->Instructions[pc], inst++);
+#endif
+
+ assert(pc < (int) mach->NumInstructions);
+ exec_instruction(mach, mach->Instructions + pc, &pc);
+
+#if DEBUG_EXECUTION
+ for (i = 0; i < TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS; i++) {
+ if (memcmp(&temps[i], &mach->Temps[i], sizeof(temps[i]))) {
+ uint j;
+
+ memcpy(&temps[i], &mach->Temps[i], sizeof(temps[i]));
+ debug_printf("TEMP[%2u] = ", i);
+ for (j = 0; j < 4; j++) {
+ if (j > 0) {
+ debug_printf(" ");
+ }
+ debug_printf("(%6f, %6f, %6f, %6f)\n",
+ temps[i].xyzw[0].f[j],
+ temps[i].xyzw[1].f[j],
+ temps[i].xyzw[2].f[j],
+ temps[i].xyzw[3].f[j]);
+ }
+ }
+ }
+ for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
+ if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) {
+ uint j;
+
+ memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]));
+ debug_printf("OUT[%2u] = ", i);
+ for (j = 0; j < 4; j++) {
+ if (j > 0) {
+ debug_printf(" ");
+ }
+ debug_printf("{%6f, %6f, %6f, %6f}\n",
+ outputs[i].xyzw[0].f[j],
+ outputs[i].xyzw[1].f[j],
+ outputs[i].xyzw[2].f[j],
+ outputs[i].xyzw[3].f[j]);
+ }
+ }
+ }
+#endif
+ }
}
#if 0
@@ -3166,5 +3413,10 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
}
#endif
+ assert(mach->CondStackTop == 0);
+ assert(mach->LoopStackTop == 0);
+ assert(mach->ContStackTop == 0);
+ assert(mach->CallStackTop == 0);
+
return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index 71fd300cbb..afaf5c39c4 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -225,6 +225,7 @@ struct tgsi_exec_machine
struct tgsi_exec_vector Outputs[TGSI_MAX_TOTAL_VERTICES];
struct tgsi_exec_vector *Addrs;
+ struct tgsi_exec_vector *Predicates;
struct tgsi_sampler **Samplers;
@@ -241,6 +242,7 @@ struct tgsi_exec_machine
/* FRAGMENT processor only. */
const struct tgsi_interp_coef *InterpCoefs;
struct tgsi_exec_vector QuadPos;
+ float Face; /**< +1 if front facing, -1 if back facing */
/* Conditional execution masks */
uint CondMask; /**< For IF/ELSE/ENDIF */
@@ -261,7 +263,7 @@ struct tgsi_exec_machine
uint LoopLabelStack[TGSI_EXEC_MAX_LOOP_NESTING];
int LoopLabelStackTop;
- /** Loop counter stack (x = count, y = current, z = step) */
+ /** Loop counter stack (x = index, y = counter, z = step) */
struct tgsi_exec_vector LoopCounterStack[TGSI_EXEC_MAX_LOOP_NESTING];
int LoopCounterStackTop;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c
index 83f9df1183..9ca2993452 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c
@@ -175,6 +175,10 @@ tgsi_parse_token(
copy_token(&inst->Instruction, &token);
extended = inst->Instruction.Extended;
+ if (inst->Instruction.Predicate) {
+ next_token(ctx, &inst->InstructionPredicate);
+ }
+
while( extended ) {
struct tgsi_src_register_ext token;
@@ -189,10 +193,6 @@ tgsi_parse_token(
copy_token(&inst->InstructionExtTexture, &token);
break;
- case TGSI_INSTRUCTION_EXT_TYPE_PREDICATE:
- copy_token(&inst->InstructionExtPredicate, &token);
- break;
-
default:
assert( 0 );
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.h b/src/gallium/auxiliary/tgsi/tgsi_parse.h
index 76f1676d85..cb4772ade8 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.h
@@ -80,9 +80,9 @@ struct tgsi_full_immediate
struct tgsi_full_instruction
{
struct tgsi_instruction Instruction;
+ struct tgsi_instruction_predicate InstructionPredicate;
struct tgsi_instruction_ext_label InstructionExtLabel;
struct tgsi_instruction_ext_texture InstructionExtTexture;
- struct tgsi_instruction_ext_predicate InstructionExtPredicate;
struct tgsi_full_dst_register FullDstRegisters[TGSI_FULL_MAX_DST_REGISTERS];
struct tgsi_full_src_register FullSrcRegisters[TGSI_FULL_MAX_SRC_REGISTERS];
uint Flags; /**< user-defined usage */
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
index 80e6ff2b79..557c885a8a 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
@@ -47,9 +47,9 @@ union tgsi_any_token {
struct tgsi_immediate imm;
union tgsi_immediate_data imm_data;
struct tgsi_instruction insn;
+ struct tgsi_instruction_predicate insn_predicate;
struct tgsi_instruction_ext_label insn_ext_label;
struct tgsi_instruction_ext_texture insn_ext_texture;
- struct tgsi_instruction_ext_predicate insn_ext_predicate;
struct tgsi_src_register src;
struct tgsi_src_register_ext_mod src_ext_mod;
struct tgsi_dimension dim;
@@ -72,6 +72,7 @@ struct ureg_tokens {
#define UREG_MAX_IMMEDIATE 32
#define UREG_MAX_TEMP 256
#define UREG_MAX_ADDR 2
+#define UREG_MAX_LOOP 1
#define UREG_MAX_PRED 1
#define DOMAIN_DECL 0
@@ -123,6 +124,7 @@ struct ureg_program
unsigned nr_addrs;
unsigned nr_preds;
+ unsigned nr_loops;
unsigned nr_instructions;
struct ureg_tokens domain[2];
@@ -383,6 +385,7 @@ struct ureg_src ureg_DECL_constant(struct ureg_program *ureg,
i = ureg->nr_constant_ranges++;
ureg->constant_range[i].first = index;
ureg->constant_range[i].last = index;
+ goto out;
}
/* Collapse all ranges down to one:
@@ -450,6 +453,19 @@ struct ureg_dst ureg_DECL_address( struct ureg_program *ureg )
return ureg_dst_register( TGSI_FILE_ADDRESS, 0 );
}
+/* Allocate a new loop register.
+ */
+struct ureg_dst
+ureg_DECL_loop(struct ureg_program *ureg)
+{
+ if (ureg->nr_loops < UREG_MAX_LOOP) {
+ return ureg_dst_register(TGSI_FILE_LOOP, ureg->nr_loops++);
+ }
+
+ assert(0);
+ return ureg_dst_register(TGSI_FILE_LOOP, 0);
+}
+
/* Allocate a new predicate register.
*/
struct ureg_dst
@@ -694,35 +710,27 @@ ureg_emit_insn(struct ureg_program *ureg,
validate( opcode, num_dst, num_src );
out = get_tokens( ureg, DOMAIN_INSN, count );
- out[0].value = 0;
- out[0].insn.Type = TGSI_TOKEN_TYPE_INSTRUCTION;
- out[0].insn.NrTokens = 0;
+ out[0].insn = tgsi_default_instruction();
out[0].insn.Opcode = opcode;
out[0].insn.Saturate = saturate;
out[0].insn.NumDstRegs = num_dst;
out[0].insn.NumSrcRegs = num_src;
- out[0].insn.Padding = 0;
result.insn_token = ureg->domain[DOMAIN_INSN].count - count;
+ result.extended_token = result.insn_token;
if (predicate) {
- out[0].insn.Extended = 1;
- out[1].insn_ext_predicate = tgsi_default_instruction_ext_predicate();
- out[1].insn_ext_predicate.Negate = pred_negate;
- out[1].insn_ext_predicate.SwizzleX = pred_swizzle_x;
- out[1].insn_ext_predicate.SwizzleY = pred_swizzle_y;
- out[1].insn_ext_predicate.SwizzleZ = pred_swizzle_z;
- out[1].insn_ext_predicate.SwizzleW = pred_swizzle_w;
-
- result.extended_token = result.insn_token + 1;
- } else {
- out[0].insn.Extended = 0;
-
- result.extended_token = result.insn_token;
+ out[0].insn.Predicate = 1;
+ out[1].insn_predicate = tgsi_default_instruction_predicate();
+ out[1].insn_predicate.Negate = pred_negate;
+ out[1].insn_predicate.SwizzleX = pred_swizzle_x;
+ out[1].insn_predicate.SwizzleY = pred_swizzle_y;
+ out[1].insn_predicate.SwizzleZ = pred_swizzle_z;
+ out[1].insn_predicate.SwizzleW = pred_swizzle_w;
}
ureg->nr_instructions++;
-
+
return result;
}
@@ -1065,6 +1073,13 @@ static void emit_decls( struct ureg_program *ureg )
0, ureg->nr_addrs );
}
+ if (ureg->nr_loops) {
+ emit_decl_range(ureg,
+ TGSI_FILE_LOOP,
+ 0,
+ ureg->nr_loops);
+ }
+
if (ureg->nr_preds) {
emit_decl_range(ureg,
TGSI_FILE_PREDICATE,
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
index e6fad1f0f4..b89f55d5a8 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
@@ -163,6 +163,9 @@ struct ureg_dst
ureg_DECL_address( struct ureg_program * );
struct ureg_dst
+ureg_DECL_loop( struct ureg_program * );
+
+struct ureg_dst
ureg_DECL_predicate(struct ureg_program *);
/* Supply an index to the sampler declaration as this is the hook to
diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c
index 5038642599..5372df5735 100644
--- a/src/gallium/auxiliary/util/u_blit.c
+++ b/src/gallium/auxiliary/util/u_blit.c
@@ -354,9 +354,9 @@ util_blit_pixels_writemask(struct blit_state *ctx,
texTemp.target = PIPE_TEXTURE_2D;
texTemp.format = src->format;
texTemp.last_level = 0;
- texTemp.width[0] = srcW;
- texTemp.height[0] = srcH;
- texTemp.depth[0] = 1;
+ texTemp.width0 = srcW;
+ texTemp.height0 = srcH;
+ texTemp.depth0 = 1;
pf_get_block(src->format, &texTemp.block);
tex = screen->texture_create(screen, &texTemp);
@@ -389,10 +389,10 @@ util_blit_pixels_writemask(struct blit_state *ctx,
}
else {
pipe_texture_reference(&tex, src->texture);
- s0 = srcX0 / (float)tex->width[0];
- s1 = srcX1 / (float)tex->width[0];
- t0 = srcY0 / (float)tex->height[0];
- t1 = srcY1 / (float)tex->height[0];
+ s0 = srcX0 / (float)tex->width0;
+ s1 = srcX1 / (float)tex->width0;
+ t0 = srcY0 / (float)tex->height0;
+ t1 = srcY1 / (float)tex->height0;
}
@@ -518,13 +518,13 @@ util_blit_pixels_tex(struct blit_state *ctx,
assert(filter == PIPE_TEX_MIPFILTER_NEAREST ||
filter == PIPE_TEX_MIPFILTER_LINEAR);
- assert(tex->width[0] != 0);
- assert(tex->height[0] != 0);
+ assert(tex->width0 != 0);
+ assert(tex->height0 != 0);
- s0 = srcX0 / (float)tex->width[0];
- s1 = srcX1 / (float)tex->width[0];
- t0 = srcY0 / (float)tex->height[0];
- t1 = srcY1 / (float)tex->height[0];
+ s0 = srcX0 / (float)tex->width0;
+ s1 = srcX1 / (float)tex->width0;
+ t0 = srcY0 / (float)tex->height0;
+ t1 = srcY1 / (float)tex->height0;
assert(ctx->pipe->screen->is_format_supported(ctx->pipe->screen, dst->format,
PIPE_TEXTURE_2D,
diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c
index facbe69173..a08241971c 100644
--- a/src/gallium/auxiliary/util/u_cpu_detect.c
+++ b/src/gallium/auxiliary/util/u_cpu_detect.c
@@ -80,7 +80,7 @@ static int has_cpuid(void);
#if defined(PIPE_ARCH_X86)
/* The sigill handlers */
-#if defined(PIPE_OS_LINUX) //&& defined(_POSIX_SOURCE) && defined(X86_FXSR_MAGIC)
+#if defined(PIPE_OS_LINUX) /*&& defined(_POSIX_SOURCE) && defined(X86_FXSR_MAGIC)*/
static void
sigill_handler_sse(int signal, struct sigcontext sc)
{
@@ -240,7 +240,7 @@ check_os_katmai_support(void)
__asm __volatile ("xorps %xmm0, %xmm0");
#elif defined(PIPE_CC_MSVC)
__asm {
- xorps xmm0, xmm0 // executing SSE instruction
+ xorps xmm0, xmm0 /* executing SSE instruction */
}
#else
#error Unsupported compiler
@@ -283,7 +283,7 @@ check_os_katmai_support(void)
* and therefore to be safe I'm going to leave this test in here.
*/
if (util_cpu_caps.has_sse) {
- // test_os_katmai_exception_support();
+ /* test_os_katmai_exception_support(); */
}
/* Restore the original signal handlers.
diff --git a/src/gallium/auxiliary/util/u_format.csv b/src/gallium/auxiliary/util/u_format.csv
index f1bf94f17d..b9cc2aa716 100644
--- a/src/gallium/auxiliary/util/u_format.csv
+++ b/src/gallium/auxiliary/util/u_format.csv
@@ -97,3 +97,13 @@ PIPE_FORMAT_B8G8R8A8_SRGB , arith , 1, 1, u8 , u8 , u8 , u8 , zyxw,
PIPE_FORMAT_B8G8R8X8_SRGB , arith , 1, 1, u8 , u8 , u8 , u8 , zyx1, srgb
PIPE_FORMAT_X8UB8UG8SR8S_NORM , arith , 1, 1, sn8 , sn8 , un8 , x8 , 1zyx, rgb
PIPE_FORMAT_B6UG5SR5S_NORM , arith , 1, 1, sn5 , sn5 , un6 , , xyz1, rgb
+PIPE_FORMAT_YCBCR , yuv , 2, 1, x32 , , , , xyz1, yuv
+PIPE_FORMAT_YCBCR_REV , yuv , 2, 1, x32 , , , , xyz1, yuv
+PIPE_FORMAT_DXT1_RGBA , dxt , 4, 4, x64 , , , , xyzw, rgb
+PIPE_FORMAT_DXT1_RGB , dxt , 4, 4, x64 , , , , xyz1, rgb
+PIPE_FORMAT_DXT3_RGBA , dxt , 4, 4, x128, , , , xyzw, rgb
+PIPE_FORMAT_DXT5_RGBA , dxt , 4, 4, x128, , , , xyzw, rgb
+PIPE_FORMAT_DXT1_SRGBA , dxt , 4, 4, x64 , , , , xyzw, srgb
+PIPE_FORMAT_DXT1_SRGB , dxt , 4, 4, x64 , , , , xyz1, srgb
+PIPE_FORMAT_DXT3_SRGBA , dxt , 4, 4, x128, , , , xyzw, srgb
+PIPE_FORMAT_DXT5_SRGBA , dxt , 4, 4, x128, , , , xyzw, srgb
diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c
index aa823aa218..f67f1e458d 100644
--- a/src/gallium/auxiliary/util/u_gen_mipmap.c
+++ b/src/gallium/auxiliary/util/u_gen_mipmap.c
@@ -45,6 +45,7 @@
#include "util/u_draw_quad.h"
#include "util/u_gen_mipmap.h"
#include "util/u_simple_shaders.h"
+#include "util/u_math.h"
#include "cso_cache/cso_context.h"
@@ -1125,12 +1126,12 @@ make_1d_mipmap(struct gen_mipmap_state *ctx,
srcTrans = screen->get_tex_transfer(screen, pt, face, srcLevel, zslice,
PIPE_TRANSFER_READ, 0, 0,
- pt->width[srcLevel],
- pt->height[srcLevel]);
+ u_minify(pt->width0, srcLevel),
+ u_minify(pt->height0, srcLevel));
dstTrans = screen->get_tex_transfer(screen, pt, face, dstLevel, zslice,
PIPE_TRANSFER_WRITE, 0, 0,
- pt->width[dstLevel],
- pt->height[dstLevel]);
+ u_minify(pt->width0, dstLevel),
+ u_minify(pt->height0, dstLevel));
srcMap = (ubyte *) screen->transfer_map(screen, srcTrans);
dstMap = (ubyte *) screen->transfer_map(screen, dstTrans);
@@ -1168,12 +1169,12 @@ make_2d_mipmap(struct gen_mipmap_state *ctx,
srcTrans = screen->get_tex_transfer(screen, pt, face, srcLevel, zslice,
PIPE_TRANSFER_READ, 0, 0,
- pt->width[srcLevel],
- pt->height[srcLevel]);
+ u_minify(pt->width0, srcLevel),
+ u_minify(pt->height0, srcLevel));
dstTrans = screen->get_tex_transfer(screen, pt, face, dstLevel, zslice,
PIPE_TRANSFER_WRITE, 0, 0,
- pt->width[dstLevel],
- pt->height[dstLevel]);
+ u_minify(pt->width0, dstLevel),
+ u_minify(pt->height0, dstLevel));
srcMap = (ubyte *) screen->transfer_map(screen, srcTrans);
dstMap = (ubyte *) screen->transfer_map(screen, dstTrans);
@@ -1213,12 +1214,12 @@ make_3d_mipmap(struct gen_mipmap_state *ctx,
srcTrans = screen->get_tex_transfer(screen, pt, face, srcLevel, zslice,
PIPE_TRANSFER_READ, 0, 0,
- pt->width[srcLevel],
- pt->height[srcLevel]);
+ u_minify(pt->width0, srcLevel),
+ u_minify(pt->height0, srcLevel));
dstTrans = screen->get_tex_transfer(screen, pt, face, dstLevel, zslice,
PIPE_TRANSFER_WRITE, 0, 0,
- pt->width[dstLevel],
- pt->height[dstLevel]);
+ u_minify(pt->width0, dstLevel),
+ u_minify(pt->height0, dstLevel));
srcMap = (ubyte *) screen->transfer_map(screen, srcTrans);
dstMap = (ubyte *) screen->transfer_map(screen, dstTrans);
@@ -1575,8 +1576,8 @@ util_gen_mipmap(struct gen_mipmap_state *ctx,
* Setup framebuffer / dest surface
*/
fb.cbufs[0] = surf;
- fb.width = pt->width[dstLevel];
- fb.height = pt->height[dstLevel];
+ fb.width = u_minify(pt->width0, dstLevel);
+ fb.height = u_minify(pt->height0, dstLevel);
cso_set_framebuffer(ctx->cso, &fb);
/*
@@ -1597,8 +1598,8 @@ util_gen_mipmap(struct gen_mipmap_state *ctx,
offset = set_vertex_data(ctx,
pt->target,
face,
- (float) pt->width[dstLevel],
- (float) pt->height[dstLevel]);
+ (float) u_minify(pt->width0, dstLevel),
+ (float) u_minify(pt->height0, dstLevel));
util_draw_vertex_buffer(ctx->pipe,
ctx->vbuf,
diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h
index 75b075f160..7e75702701 100644
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -491,6 +491,43 @@ util_next_power_of_two(unsigned x)
/**
+ * Return number of bits set in n.
+ */
+static INLINE unsigned
+util_bitcount(unsigned n)
+{
+#if defined(PIPE_CC_GCC)
+ return __builtin_popcount(n);
+#else
+ /* XXX there are more clever ways of doing this */
+ unsigned bits = 0;
+ while (n) {
+ bits += (n & 1);
+ n = n >> 1;
+ }
+ return bits;
+#endif
+}
+
+
+/**
+ * Reverse byte order of a 32 bit word.
+ */
+static INLINE uint32_t
+util_bswap32(uint32_t n)
+{
+#if defined(PIPE_CC_GCC)
+ return __builtin_bswap32(n);
+#else
+ return (n >> 24) |
+ ((n >> 8) & 0x0000ff00) |
+ ((n << 8) & 0x00ff0000) |
+ (n << 24);
+#endif
+}
+
+
+/**
* Clamp X to [MIN, MAX].
* This is a macro to allow float, int, uint, etc. types.
*/
@@ -499,6 +536,9 @@ util_next_power_of_two(unsigned x)
#define MIN2( A, B ) ( (A)<(B) ? (A) : (B) )
#define MAX2( A, B ) ( (A)>(B) ? (A) : (B) )
+#define MIN3( A, B, C ) MIN2( MIN2( A, B ), C )
+#define MAX3( A, B, C ) MAX2( MAX2( A, B ), C )
+
static INLINE int
align(int value, int alignment)
@@ -507,9 +547,9 @@ align(int value, int alignment)
}
static INLINE unsigned
-minify(unsigned value)
+u_minify(unsigned value, unsigned levels)
{
- return MAX2(1, value >> 1);
+ return MAX2(1, value >> levels);
}
#ifndef COPY_4V
diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h
index eda883b3b9..9dacc6d83d 100644
--- a/src/gallium/auxiliary/util/u_pack_color.h
+++ b/src/gallium/auxiliary/util/u_pack_color.h
@@ -302,7 +302,10 @@ util_unpack_color_ub(enum pipe_format format, const void *src,
static INLINE void
util_pack_color(const float rgba[4], enum pipe_format format, void *dest)
{
- ubyte r, g, b, a;
+ ubyte r = 0;
+ ubyte g = 0;
+ ubyte b = 0;
+ ubyte a = 0;
if (pf_size_x(format) <= 8) {
/* format uses 8-bit components or less */
diff --git a/src/gallium/auxiliary/util/u_surface.c b/src/gallium/auxiliary/util/u_surface.c
index 85e443204e..de8c266db8 100644
--- a/src/gallium/auxiliary/util/u_surface.c
+++ b/src/gallium/auxiliary/util/u_surface.c
@@ -79,9 +79,9 @@ util_create_rgba_surface(struct pipe_screen *screen,
templ.target = target;
templ.format = format;
templ.last_level = 0;
- templ.width[0] = width;
- templ.height[0] = height;
- templ.depth[0] = 1;
+ templ.width0 = width;
+ templ.height0 = height;
+ templ.depth0 = 1;
pf_get_block(format, &templ.block);
templ.tex_usage = usage;
diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
index cda6dc134a..592dd17421 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -455,8 +455,8 @@ void vl_compositor_render(struct vl_compositor *compositor,
assert(dst_area);
assert(picture_type == PIPE_MPEG12_PICTURE_TYPE_FRAME);
- compositor->fb_state.width = dst_surface->width[0];
- compositor->fb_state.height = dst_surface->height[0];
+ compositor->fb_state.width = dst_surface->width0;
+ compositor->fb_state.height = dst_surface->height0;
compositor->fb_state.cbufs[0] = compositor->pipe->screen->get_tex_surface
(
compositor->pipe->screen,
@@ -504,12 +504,12 @@ void vl_compositor_render(struct vl_compositor *compositor,
vs_consts->dst_trans.z = 0;
vs_consts->dst_trans.w = 0;
- vs_consts->src_scale.x = src_area->w / (float)src_surface->width[0];
- vs_consts->src_scale.y = src_area->h / (float)src_surface->height[0];
+ vs_consts->src_scale.x = src_area->w / (float)src_surface->width0;
+ vs_consts->src_scale.y = src_area->h / (float)src_surface->height0;
vs_consts->src_scale.z = 1;
vs_consts->src_scale.w = 1;
- vs_consts->src_trans.x = src_area->x / (float)src_surface->width[0];
- vs_consts->src_trans.y = src_area->y / (float)src_surface->height[0];
+ vs_consts->src_trans.x = src_area->x / (float)src_surface->width0;
+ vs_consts->src_trans.y = src_area->y / (float)src_surface->height0;
vs_consts->src_trans.z = 0;
vs_consts->src_trans.w = 0;
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index c4ba69817f..1934965995 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -681,7 +681,7 @@ xfer_buffers_map(struct vl_mpeg12_mc_renderer *r)
(
r->pipe->screen, r->textures.all[i],
0, 0, 0, PIPE_TRANSFER_WRITE, 0, 0,
- r->textures.all[i]->width[0], r->textures.all[i]->height[0]
+ r->textures.all[i]->width0, r->textures.all[i]->height0
);
r->texels[i] = r->pipe->screen->transfer_map(r->pipe->screen, r->tex_transfer[i]);
@@ -835,26 +835,26 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
/* TODO: Accomodate HW that can't do this and also for cases when this isn't precise enough */
template.format = PIPE_FORMAT_R16_SNORM;
template.last_level = 0;
- template.width[0] = r->pot_buffers ?
+ template.width0 = r->pot_buffers ?
util_next_power_of_two(r->picture_width) : r->picture_width;
- template.height[0] = r->pot_buffers ?
+ template.height0 = r->pot_buffers ?
util_next_power_of_two(r->picture_height) : r->picture_height;
- template.depth[0] = 1;
+ template.depth0 = 1;
pf_get_block(template.format, &template.block);
template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_DYNAMIC;
r->textures.individual.y = r->pipe->screen->texture_create(r->pipe->screen, &template);
if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
- template.width[0] = r->pot_buffers ?
+ template.width0 = r->pot_buffers ?
util_next_power_of_two(r->picture_width / 2) :
r->picture_width / 2;
- template.height[0] = r->pot_buffers ?
+ template.height0 = r->pot_buffers ?
util_next_power_of_two(r->picture_height / 2) :
r->picture_height / 2;
}
else if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422)
- template.height[0] = r->pot_buffers ?
+ template.height0 = r->pot_buffers ?
util_next_power_of_two(r->picture_height / 2) :
r->picture_height / 2;
@@ -1283,8 +1283,8 @@ flush(struct vl_mpeg12_mc_renderer *r)
PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD
);
- vs_consts->denorm.x = r->surface->width[0];
- vs_consts->denorm.y = r->surface->height[0];
+ vs_consts->denorm.x = r->surface->width0;
+ vs_consts->denorm.y = r->surface->height0;
pipe_buffer_unmap(r->pipe->screen, r->vs_const_buf.buffer);
@@ -1633,8 +1633,8 @@ vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer
renderer->past = past;
renderer->future = future;
renderer->fence = fence;
- renderer->surface_tex_inv_size.x = 1.0f / surface->width[0];
- renderer->surface_tex_inv_size.y = 1.0f / surface->height[0];
+ renderer->surface_tex_inv_size.x = 1.0f / surface->width0;
+ renderer->surface_tex_inv_size.y = 1.0f / surface->height0;
}
while (num_macroblocks) {