diff options
author | Brian Paul <brianp@vmware.com> | 2010-04-15 12:48:12 -0600 |
---|---|---|
committer | Brian Paul <brianp@vmware.com> | 2010-04-15 12:48:12 -0600 |
commit | 0f16b07a041148ce9d050ec58f42a4302a9cb2cf (patch) | |
tree | 5661a0431da6ed5e2a68dae1bae3fe83e47e208a /src/gallium/drivers/nvfx | |
parent | 563a7e3cc552fdcfcaf9ac0d4b1683c3ba2ae732 (diff) | |
parent | eee220d65d3d37030f33971b02823c614e3eb618 (diff) |
Merge branch 'master' into lp-surface-tilinglp-surface-tiling
This brings in the gallium-resources branch changes. Things seem to be
working but there's probabaly bugs to be found.
Conflicts:
src/gallium/drivers/llvmpipe/lp_rast.c
src/gallium/drivers/llvmpipe/lp_scene.c
src/gallium/drivers/llvmpipe/lp_texture.c
src/gallium/drivers/llvmpipe/lp_texture.h
Diffstat (limited to 'src/gallium/drivers/nvfx')
32 files changed, 1702 insertions, 1243 deletions
diff --git a/src/gallium/drivers/nvfx/Makefile b/src/gallium/drivers/nvfx/Makefile index dfe97e6ed5..c1d57ca396 100644 --- a/src/gallium/drivers/nvfx/Makefile +++ b/src/gallium/drivers/nvfx/Makefile @@ -5,6 +5,7 @@ LIBNAME = nvfx C_SOURCES = \ nv04_surface_2d.c \ + nvfx_buffer.c \ nvfx_context.c \ nvfx_clear.c \ nvfx_draw.c \ @@ -14,6 +15,7 @@ C_SOURCES = \ nv40_fragtex.c \ nvfx_miptree.c \ nvfx_query.c \ + nvfx_resource.c \ nvfx_screen.c \ nvfx_state.c \ nvfx_state_blend.c \ @@ -29,4 +31,7 @@ C_SOURCES = \ nvfx_vbo.c \ nvfx_vertprog.c +LIBRARY_INCLUDES = \ + -I$(TOP)/src/gallium/drivers/nouveau/include + include ../../Makefile.template diff --git a/src/gallium/drivers/nvfx/nv04_surface_2d.c b/src/gallium/drivers/nvfx/nv04_surface_2d.c index 6784170c00..22cfa0ecef 100644 --- a/src/gallium/drivers/nvfx/nv04_surface_2d.c +++ b/src/gallium/drivers/nvfx/nv04_surface_2d.c @@ -125,8 +125,8 @@ nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx, struct nouveau_channel *chan = ctx->swzsurf->channel; struct nouveau_grobj *swzsurf = ctx->swzsurf; struct nouveau_grobj *sifm = ctx->sifm; - struct nouveau_bo *src_bo = nouveau_bo(ctx->buf(src)); - struct nouveau_bo *dst_bo = nouveau_bo(ctx->buf(dst)); + struct nouveau_bo *src_bo = ctx->buf(src); + struct nouveau_bo *dst_bo = ctx->buf(dst); const unsigned src_pitch = ((struct nv04_surface *)src)->pitch; /* Max width & height may not be the same on all HW, but must be POT */ const unsigned max_w = 1024; @@ -148,7 +148,7 @@ nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx, BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_DMA_IMAGE, 1); OUT_RELOCo(chan, dst_bo, - NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_FORMAT, 1); OUT_RING (chan, nv04_surface_format(dst->format) | @@ -171,7 +171,7 @@ nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx, BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_OFFSET, 1); OUT_RELOCl(chan, dst_bo, dst->offset, - NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); BEGIN_RING(chan, sifm, NV05_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION, 9); OUT_RING (chan, NV05_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_TRUNCATE); @@ -205,8 +205,8 @@ nv04_surface_copy_m2mf(struct nv04_surface_2d *ctx, { struct nouveau_channel *chan = ctx->m2mf->channel; struct nouveau_grobj *m2mf = ctx->m2mf; - struct nouveau_bo *src_bo = nouveau_bo(ctx->buf(src)); - struct nouveau_bo *dst_bo = nouveau_bo(ctx->buf(dst)); + struct nouveau_bo *src_bo = ctx->buf(src); + struct nouveau_bo *dst_bo = ctx->buf(dst); unsigned src_pitch = ((struct nv04_surface *)src)->pitch; unsigned dst_pitch = ((struct nv04_surface *)dst)->pitch; unsigned dst_offset = dst->offset + dy * dst_pitch + @@ -252,8 +252,8 @@ nv04_surface_copy_blit(struct nv04_surface_2d *ctx, struct pipe_surface *dst, struct nouveau_channel *chan = ctx->surf2d->channel; struct nouveau_grobj *surf2d = ctx->surf2d; struct nouveau_grobj *blit = ctx->blit; - struct nouveau_bo *src_bo = nouveau_bo(ctx->buf(src)); - struct nouveau_bo *dst_bo = nouveau_bo(ctx->buf(dst)); + struct nouveau_bo *src_bo = ctx->buf(src); + struct nouveau_bo *dst_bo = ctx->buf(dst); unsigned src_pitch = ((struct nv04_surface *)src)->pitch; unsigned dst_pitch = ((struct nv04_surface *)dst)->pitch; int format; @@ -287,8 +287,8 @@ nv04_surface_copy(struct nv04_surface_2d *ctx, struct pipe_surface *dst, { unsigned src_pitch = ((struct nv04_surface *)src)->pitch; unsigned dst_pitch = ((struct nv04_surface *)dst)->pitch; - int src_linear = src->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR; - int dst_linear = dst->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR; + int src_linear = src->texture->flags & NVFX_RESOURCE_FLAG_LINEAR; + int dst_linear = dst->texture->flags & NVFX_RESOURCE_FLAG_LINEAR; assert(src->format == dst->format); @@ -298,16 +298,11 @@ nv04_surface_copy(struct nv04_surface_2d *ctx, struct pipe_surface *dst, return; } - /* NV_CONTEXT_SURFACES_2D has buffer alignment restrictions, fallback - * to NV_MEMORY_TO_MEMORY_FORMAT in this case. - */ - if ((src->offset & 63) || (dst->offset & 63) || - (src_pitch & 63) || (dst_pitch & 63)) { - nv04_surface_copy_m2mf(ctx, dst, dx, dy, src, sx, sy, w, h); - return; - } - - nv04_surface_copy_blit(ctx, dst, dx, dy, src, sx, sy, w, h); + /* Use M2MF instead of the blitter since it always works + * Any possible performance drop is likely to be not very significant + * and dwarfed anyway by the current buffer management problems + */ + nv04_surface_copy_m2mf(ctx, dst, dx, dy, src, sx, sy, w, h); } static void @@ -317,7 +312,7 @@ nv04_surface_fill(struct nv04_surface_2d *ctx, struct pipe_surface *dst, struct nouveau_channel *chan = ctx->surf2d->channel; struct nouveau_grobj *surf2d = ctx->surf2d; struct nouveau_grobj *rect = ctx->rect; - struct nouveau_bo *dst_bo = nouveau_bo(ctx->buf(dst)); + struct nouveau_bo *dst_bo = ctx->buf(dst); unsigned dst_pitch = ((struct nv04_surface *)dst)->pitch; int cs2d_format, gdirect_format; @@ -501,26 +496,19 @@ nv04_surface_2d_init(struct nouveau_screen *screen) } struct nv04_surface* -nv04_surface_wrap_for_render(struct pipe_screen *pscreen, struct nv04_surface_2d* eng2d, struct nv04_surface* ns) +nv04_surface_wrap_for_render(struct pipe_screen *pscreen, + struct nv04_surface_2d* eng2d, struct nv04_surface* ns) { int temp_flags; - // printf("creating temp, flags is %i!\n", flags); - - if(ns->base.usage & PIPE_BUFFER_USAGE_DISCARD) - { - temp_flags = ns->base.usage | PIPE_BUFFER_USAGE_GPU_READ; - ns->base.usage = PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER | PIPE_BUFFER_USAGE_DISCARD; - } - else - { - temp_flags = ns->base.usage | PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE; - ns->base.usage = PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER | PIPE_BUFFER_USAGE_GPU_READ; - } + temp_flags = (ns->base.usage | + PIPE_BIND_BLIT_SOURCE | + PIPE_BIND_BLIT_DESTINATION); - ns->base.usage = PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE; + ns->base.usage = (PIPE_BIND_BLIT_SOURCE | + PIPE_BIND_BLIT_DESTINATION); - struct pipe_texture templ; + struct pipe_resource templ; memset(&templ, 0, sizeof(templ)); templ.format = ns->base.texture->format; templ.target = PIPE_TEXTURE_2D; @@ -532,14 +520,16 @@ nv04_surface_wrap_for_render(struct pipe_screen *pscreen, struct nv04_surface_2d // TODO: this is probably wrong and we should specifically handle multisampling somehow once it is implemented templ.nr_samples = ns->base.texture->nr_samples; - templ.tex_usage = ns->base.texture->tex_usage | PIPE_TEXTURE_USAGE_RENDER_TARGET; + templ.bind = ns->base.texture->bind | PIPE_BIND_RENDER_TARGET; - struct pipe_texture* temp_tex = pscreen->texture_create(pscreen, &templ); + struct pipe_resource* temp_tex = pscreen->resource_create(pscreen, &templ); struct nv04_surface* temp_ns = (struct nv04_surface*)pscreen->get_tex_surface(pscreen, temp_tex, 0, 0, 0, temp_flags); temp_ns->backing = ns; - if(ns->base.usage & PIPE_BUFFER_USAGE_GPU_READ) - eng2d->copy(eng2d, &temp_ns->backing->base, 0, 0, &ns->base, 0, 0, ns->base.width, ns->base.height); + if(ns->base.usage & PIPE_BIND_BLIT_SOURCE) + eng2d->copy(eng2d, &temp_ns->backing->base, + 0, 0, &ns->base, + 0, 0, ns->base.width, ns->base.height); return temp_ns; } diff --git a/src/gallium/drivers/nvfx/nv04_surface_2d.h b/src/gallium/drivers/nvfx/nv04_surface_2d.h index ce696a11a3..b2b237b9df 100644 --- a/src/gallium/drivers/nvfx/nv04_surface_2d.h +++ b/src/gallium/drivers/nvfx/nv04_surface_2d.h @@ -16,7 +16,7 @@ struct nv04_surface_2d { struct nouveau_grobj *blit; struct nouveau_grobj *sifm; - struct pipe_buffer *(*buf)(struct pipe_surface *); + struct nouveau_bo *(*buf)(struct pipe_surface *); void (*copy)(struct nv04_surface_2d *, struct pipe_surface *dst, int dx, int dy, struct pipe_surface *src, int sx, int sy, @@ -34,4 +34,6 @@ nv04_surface_2d_takedown(struct nv04_surface_2d **); struct nv04_surface* nv04_surface_wrap_for_render(struct pipe_screen *pscreen, struct nv04_surface_2d* eng2d, struct nv04_surface* ns); +#define NVFX_RESOURCE_FLAG_LINEAR (PIPE_RESOURCE_FLAG_DRV_PRIV << 0) + #endif diff --git a/src/gallium/drivers/nvfx/nv30_fragtex.c b/src/gallium/drivers/nvfx/nv30_fragtex.c index d3f3edb327..dec073ac90 100644 --- a/src/gallium/drivers/nvfx/nv30_fragtex.c +++ b/src/gallium/drivers/nvfx/nv30_fragtex.c @@ -3,6 +3,7 @@ #include "nvfx_context.h" #include "nouveau/nouveau_util.h" #include "nvfx_tex.h" +#include "nvfx_resource.h" void nv30_sampler_state_init(struct pipe_context *pipe, @@ -87,21 +88,21 @@ nv30_fragtex_format(uint pipe_format) } -struct nouveau_stateobj * -nv30_fragtex_build(struct nvfx_context *nvfx, int unit) +void +nv30_fragtex_set(struct nvfx_context *nvfx, int unit) { struct nvfx_sampler_state *ps = nvfx->tex_sampler[unit]; struct nvfx_miptree *nv30mt = (struct nvfx_miptree *)nvfx->fragment_sampler_views[unit]->texture; - struct pipe_texture *pt = &nv30mt->base; - struct nouveau_bo *bo = nouveau_bo(nv30mt->buffer); + struct pipe_resource *pt = &nv30mt->base.base; + struct nouveau_bo *bo = nv30mt->base.bo; struct nv30_texture_format *tf; - struct nouveau_stateobj *so; + struct nouveau_channel* chan = nvfx->screen->base.channel; uint32_t txf, txs; unsigned tex_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; tf = nv30_fragtex_format(pt->format); if (!tf) - return NULL; + return; txf = tf->format; txf |= ((pt->last_level>0) ? NV34TCL_TX_FORMAT_MIPMAP : 0); @@ -125,23 +126,24 @@ nv30_fragtex_build(struct nvfx_context *nvfx, int unit) break; default: NOUVEAU_ERR("Unknown target %d\n", pt->target); - return NULL; + return; } txs = tf->swizzle; - so = so_new(1, 8, 2); - so_method(so, nvfx->screen->eng3d, NV34TCL_TX_OFFSET(unit), 8); - so_reloc (so, bo, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0); - so_reloc (so, bo, txf, tex_flags | NOUVEAU_BO_OR, + MARK_RING(chan, 9, 2); + OUT_RING(chan, RING_3D(NV34TCL_TX_OFFSET(unit), 8)); + OUT_RELOC(chan, bo, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0); + OUT_RELOC(chan, bo, txf, tex_flags | NOUVEAU_BO_OR, NV34TCL_TX_FORMAT_DMA0, NV34TCL_TX_FORMAT_DMA1); - so_data (so, ps->wrap); - so_data (so, NV34TCL_TX_ENABLE_ENABLE | ps->en); - so_data (so, txs); - so_data (so, ps->filt | 0x2000 /*voodoo*/); - so_data (so, (pt->width0 << NV34TCL_TX_NPOT_SIZE_W_SHIFT) | + OUT_RING(chan, ps->wrap); + OUT_RING(chan, NV34TCL_TX_ENABLE_ENABLE | ps->en); + OUT_RING(chan, txs); + OUT_RING(chan, ps->filt | 0x2000 /*voodoo*/); + OUT_RING(chan, (pt->width0 << NV34TCL_TX_NPOT_SIZE_W_SHIFT) | pt->height0); - so_data (so, ps->bcol); + OUT_RING(chan, ps->bcol); - return so; + nvfx->hw_txf[unit] = txf; + nvfx->hw_samplers |= (1 << unit); } diff --git a/src/gallium/drivers/nvfx/nv40_fragtex.c b/src/gallium/drivers/nvfx/nv40_fragtex.c index fe87cebbb6..0068b1ba54 100644 --- a/src/gallium/drivers/nvfx/nv40_fragtex.c +++ b/src/gallium/drivers/nvfx/nv40_fragtex.c @@ -1,6 +1,7 @@ #include "util/u_format.h" #include "nvfx_context.h" #include "nvfx_tex.h" +#include "nvfx_resource.h" void nv40_sampler_state_init(struct pipe_context *pipe, @@ -105,15 +106,16 @@ nv40_fragtex_format(uint pipe_format) } -struct nouveau_stateobj * -nv40_fragtex_build(struct nvfx_context *nvfx, int unit) +void +nv40_fragtex_set(struct nvfx_context *nvfx, int unit) { + struct nouveau_channel* chan = nvfx->screen->base.channel; struct nvfx_sampler_state *ps = nvfx->tex_sampler[unit]; struct nvfx_miptree *nv40mt = (struct nvfx_miptree *)nvfx->fragment_sampler_views[unit]->texture; - struct nouveau_bo *bo = nouveau_bo(nv40mt->buffer); - struct pipe_texture *pt = &nv40mt->base; + struct nouveau_bo *bo = nv40mt->base.bo; + struct pipe_resource *pt = &nv40mt->base.base; struct nv40_texture_format *tf; - struct nouveau_stateobj *so; + uint32_t txf, txs, txp; unsigned tex_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; @@ -143,10 +145,10 @@ nv40_fragtex_build(struct nvfx_context *nvfx, int unit) break; default: NOUVEAU_ERR("Unknown target %d\n", pt->target); - return NULL; + return; } - if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { + if (!(pt->flags & NVFX_RESOURCE_FLAG_LINEAR)) { txp = 0; } else { txp = nv40mt->level[0].pitch; @@ -155,20 +157,20 @@ nv40_fragtex_build(struct nvfx_context *nvfx, int unit) txs = tf->swizzle; - so = so_new(2, 9, 2); - so_method(so, nvfx->screen->eng3d, NV34TCL_TX_OFFSET(unit), 8); - so_reloc (so, bo, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0); - so_reloc (so, bo, txf, tex_flags | NOUVEAU_BO_OR, - NV34TCL_TX_FORMAT_DMA0, NV34TCL_TX_FORMAT_DMA1); - so_data (so, ps->wrap); - so_data (so, NV40TCL_TEX_ENABLE_ENABLE | ps->en); - so_data (so, txs); - so_data (so, ps->filt | tf->sign | 0x2000 /*voodoo*/); - so_data (so, (pt->width0 << NV34TCL_TX_NPOT_SIZE_W_SHIFT) | - pt->height0); - so_data (so, ps->bcol); - so_method(so, nvfx->screen->eng3d, NV40TCL_TEX_SIZE1(unit), 1); - so_data (so, (pt->depth0 << NV40TCL_TEX_SIZE1_DEPTH_SHIFT) | txp); - - return so; + MARK_RING(chan, 11 + 2 * !unit, 2); + OUT_RING(chan, RING_3D(NV34TCL_TX_OFFSET(unit), 8)); + OUT_RELOC(chan, bo, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0); + OUT_RELOC(chan, bo, txf, tex_flags | NOUVEAU_BO_OR, + NV34TCL_TX_FORMAT_DMA0, NV34TCL_TX_FORMAT_DMA1); + OUT_RING(chan, ps->wrap); + OUT_RING(chan, NV40TCL_TEX_ENABLE_ENABLE | ps->en); + OUT_RING(chan, txs); + OUT_RING(chan, ps->filt | tf->sign | 0x2000 /*voodoo*/); + OUT_RING(chan, (pt->width0 << NV34TCL_TX_NPOT_SIZE_W_SHIFT) | pt->height0); + OUT_RING(chan, ps->bcol); + OUT_RING(chan, RING_3D(NV40TCL_TEX_SIZE1(unit), 1)); + OUT_RING(chan, (pt->depth0 << NV40TCL_TEX_SIZE1_DEPTH_SHIFT) | txp); + + nvfx->hw_txf[unit] = txf; + nvfx->hw_samplers |= (1 << unit); } diff --git a/src/gallium/drivers/nvfx/nvfx_buffer.c b/src/gallium/drivers/nvfx/nvfx_buffer.c new file mode 100644 index 0000000000..24e0a0c7f6 --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_buffer.c @@ -0,0 +1,153 @@ + +#include "util/u_inlines.h" +#include "util/u_memory.h" +#include "util/u_math.h" + +#include "nouveau/nouveau_screen.h" +#include "nouveau/nouveau_winsys.h" +#include "nvfx_resource.h" + + +/* Currently using separate implementations for buffers and textures, + * even though gallium has a unified abstraction of these objects. + * Eventually these should be combined, and mechanisms like transfers + * be adapted to work for both buffer and texture uploads. + */ +static void nvfx_buffer_destroy(struct pipe_screen *pscreen, + struct pipe_resource *presource) +{ + struct nvfx_resource *buffer = nvfx_resource(presource); + + nouveau_screen_bo_release(pscreen, buffer->bo); + FREE(buffer); +} + + + + +/* Utility functions for transfer create/destroy are hooked in and + * just record the arguments to those functions. + */ +static void * +nvfx_buffer_transfer_map( struct pipe_context *pipe, + struct pipe_transfer *transfer ) +{ + struct nvfx_resource *buffer = nvfx_resource(transfer->resource); + uint8_t *map; + + map = nouveau_screen_bo_map_range( pipe->screen, + buffer->bo, + transfer->box.x, + transfer->box.width, + nouveau_screen_transfer_flags(transfer->usage) ); + if (map == NULL) + return NULL; + + return map + transfer->box.x; +} + + + +static void nvfx_buffer_transfer_flush_region( struct pipe_context *pipe, + struct pipe_transfer *transfer, + const struct pipe_box *box) +{ + struct nvfx_resource *buffer = nvfx_resource(transfer->resource); + + nouveau_screen_bo_map_flush_range(pipe->screen, + buffer->bo, + transfer->box.x + box->x, + box->width); +} + +static void nvfx_buffer_transfer_unmap( struct pipe_context *pipe, + struct pipe_transfer *transfer ) +{ + struct nvfx_resource *buffer = nvfx_resource(transfer->resource); + + nouveau_screen_bo_unmap(pipe->screen, buffer->bo); +} + + + + +struct u_resource_vtbl nvfx_buffer_vtbl = +{ + u_default_resource_get_handle, /* get_handle */ + nvfx_buffer_destroy, /* resource_destroy */ + NULL, /* is_resource_referenced */ + u_default_get_transfer, /* get_transfer */ + u_default_transfer_destroy, /* transfer_destroy */ + nvfx_buffer_transfer_map, /* transfer_map */ + nvfx_buffer_transfer_flush_region, /* transfer_flush_region */ + nvfx_buffer_transfer_unmap, /* transfer_unmap */ + u_default_transfer_inline_write /* transfer_inline_write */ +}; + + + +struct pipe_resource * +nvfx_buffer_create(struct pipe_screen *pscreen, + const struct pipe_resource *template) +{ + struct nvfx_resource *buffer; + + buffer = CALLOC_STRUCT(nvfx_resource); + if (!buffer) + return NULL; + + buffer->base = *template; + buffer->vtbl = &nvfx_buffer_vtbl; + pipe_reference_init(&buffer->base.reference, 1); + buffer->base.screen = pscreen; + + buffer->bo = nouveau_screen_bo_new(pscreen, + 16, + buffer->base._usage, + buffer->base.bind, + buffer->base.width0); + + if (buffer->bo == NULL) + goto fail; + + return &buffer->base; + +fail: + FREE(buffer); + return NULL; +} + + +struct pipe_resource * +nvfx_user_buffer_create(struct pipe_screen *pscreen, + void *ptr, + unsigned bytes, + unsigned usage) +{ + struct nvfx_resource *buffer; + + buffer = CALLOC_STRUCT(nvfx_resource); + if (!buffer) + return NULL; + + pipe_reference_init(&buffer->base.reference, 1); + buffer->vtbl = &nvfx_buffer_vtbl; + buffer->base.screen = pscreen; + buffer->base.format = PIPE_FORMAT_R8_UNORM; + buffer->base._usage = PIPE_USAGE_IMMUTABLE; + buffer->base.bind = usage; + buffer->base.width0 = bytes; + buffer->base.height0 = 1; + buffer->base.depth0 = 1; + + buffer->bo = nouveau_screen_bo_user(pscreen, ptr, bytes); + if (!buffer->bo) + goto fail; + + return &buffer->base; + +fail: + FREE(buffer); + return NULL; +} + diff --git a/src/gallium/drivers/nvfx/nvfx_context.c b/src/gallium/drivers/nvfx/nvfx_context.c index fc3cbdb558..1faa0af31f 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.c +++ b/src/gallium/drivers/nvfx/nvfx_context.c @@ -3,6 +3,7 @@ #include "nvfx_context.h" #include "nvfx_screen.h" +#include "nvfx_resource.h" static void nvfx_flush(struct pipe_context *pipe, unsigned flags, @@ -29,12 +30,6 @@ static void nvfx_destroy(struct pipe_context *pipe) { struct nvfx_context *nvfx = nvfx_context(pipe); - unsigned i; - - for (i = 0; i < NVFX_STATE_MAX; i++) { - if (nvfx->state.hw[i]) - so_ref(NULL, &nvfx->state.hw[i]); - } if (nvfx->draw) draw_destroy(nvfx->draw); @@ -65,18 +60,14 @@ nvfx_create(struct pipe_screen *pscreen, void *priv) nvfx->pipe.clear = nvfx_clear; nvfx->pipe.flush = nvfx_flush; - nvfx->pipe.is_texture_referenced = nouveau_is_texture_referenced; - nvfx->pipe.is_buffer_referenced = nouveau_is_buffer_referenced; - screen->base.channel->user_private = nvfx; - screen->base.channel->flush_notify = nvfx_state_flush_notify; nvfx->is_nv4x = screen->is_nv4x; nvfx_init_query_functions(nvfx); nvfx_init_surface_functions(nvfx); nvfx_init_state_functions(nvfx); - nvfx_init_transfer_functions(nvfx); + nvfx_init_resource_functions(&nvfx->pipe); /* Create, configure, and install fallback swtnl path */ nvfx->draw = draw_create(); @@ -86,5 +77,8 @@ nvfx_create(struct pipe_screen *pscreen, void *priv) draw_enable_point_sprites(nvfx->draw, FALSE); draw_set_rasterize_stage(nvfx->draw, nvfx_draw_render_stage(nvfx)); + /* set these to that we init them on first validation */ + nvfx->state.scissor_enabled = ~0; + nvfx->state.stipple_enabled = ~0; return &nvfx->pipe; } diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h index ab7225cf6c..e2c6d09fa1 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.h +++ b/src/gallium/drivers/nvfx/nvfx_context.h @@ -16,8 +16,6 @@ #include "nouveau/nouveau_winsys.h" #include "nouveau/nouveau_gldefs.h" -#include "nouveau/nouveau_context.h" -#include "nouveau/nouveau_stateobj.h" #include "nvfx_state.h" @@ -26,45 +24,6 @@ #define NOUVEAU_MSG(fmt, args...) \ fprintf(stderr, "nouveau: "fmt, ##args); -enum nvfx_state_index { - NVFX_STATE_FB = 0, - NVFX_STATE_VIEWPORT = 1, - NVFX_STATE_BLEND = 2, - NVFX_STATE_RAST = 3, - NVFX_STATE_ZSA = 4, - NVFX_STATE_BCOL = 5, - NVFX_STATE_CLIP = 6, - NVFX_STATE_SCISSOR = 7, - NVFX_STATE_STIPPLE = 8, - NVFX_STATE_FRAGPROG = 9, - NVFX_STATE_VERTPROG = 10, - NVFX_STATE_FRAGTEX0 = 11, - NVFX_STATE_FRAGTEX1 = 12, - NVFX_STATE_FRAGTEX2 = 13, - NVFX_STATE_FRAGTEX3 = 14, - NVFX_STATE_FRAGTEX4 = 15, - NVFX_STATE_FRAGTEX5 = 16, - NVFX_STATE_FRAGTEX6 = 17, - NVFX_STATE_FRAGTEX7 = 18, - NVFX_STATE_FRAGTEX8 = 19, - NVFX_STATE_FRAGTEX9 = 20, - NVFX_STATE_FRAGTEX10 = 21, - NVFX_STATE_FRAGTEX11 = 22, - NVFX_STATE_FRAGTEX12 = 23, - NVFX_STATE_FRAGTEX13 = 24, - NVFX_STATE_FRAGTEX14 = 25, - NVFX_STATE_FRAGTEX15 = 26, - NVFX_STATE_VERTTEX0 = 27, - NVFX_STATE_VERTTEX1 = 28, - NVFX_STATE_VERTTEX2 = 29, - NVFX_STATE_VERTTEX3 = 30, - NVFX_STATE_VTXBUF = 31, - NVFX_STATE_VTXFMT = 32, - NVFX_STATE_VTXATTR = 33, - NVFX_STATE_SR = 34, - NVFX_STATE_MAX = 35 -}; - #include "nvfx_screen.h" #define NVFX_NEW_BLEND (1 << 0) @@ -81,20 +40,25 @@ enum nvfx_state_index { #define NVFX_NEW_ARRAYS (1 << 11) #define NVFX_NEW_UCP (1 << 12) #define NVFX_NEW_SR (1 << 13) +#define NVFX_NEW_VERTCONST (1 << 14) +#define NVFX_NEW_FRAGCONST (1 << 15) struct nvfx_rasterizer_state { struct pipe_rasterizer_state pipe; - struct nouveau_stateobj *so; + unsigned sb_len; + uint32_t sb[32]; }; struct nvfx_zsa_state { struct pipe_depth_stencil_alpha_state pipe; - struct nouveau_stateobj *so; + unsigned sb_len; + uint32_t sb[26]; }; struct nvfx_blend_state { struct pipe_blend_state pipe; - struct nouveau_stateobj *so; + unsigned sb_len; + uint32_t sb[13]; }; @@ -102,9 +66,6 @@ struct nvfx_state { unsigned scissor_enabled; unsigned stipple_enabled; unsigned fp_samplers; - - uint64_t dirty; - struct nouveau_stateobj *hw[NVFX_STATE_MAX]; }; struct nvfx_vtxelt_state { @@ -112,6 +73,12 @@ struct nvfx_vtxelt_state { unsigned num_elements; }; +struct nvfx_render_target { + struct nouveau_bo* bo; + unsigned offset; + unsigned pitch; +}; + struct nvfx_context { struct pipe_context pipe; @@ -137,7 +104,6 @@ struct nvfx_context { HW, SWTNL, SWRAST } render_mode; unsigned fallback_swtnl; - unsigned fallback_swrast; /* Context state */ unsigned dirty, draw_dirty; @@ -146,7 +112,7 @@ struct nvfx_context { struct pipe_clip_state clip; struct nvfx_vertex_program *vertprog; struct nvfx_fragment_program *fragprog; - struct pipe_buffer *constbuf[PIPE_SHADER_TYPES]; + struct pipe_resource *constbuf[PIPE_SHADER_TYPES]; unsigned constbuf_nr[PIPE_SHADER_TYPES]; struct nvfx_rasterizer_state *rasterizer; struct nvfx_zsa_state *zsa; @@ -155,7 +121,7 @@ struct nvfx_context { struct pipe_stencil_ref stencil_ref; struct pipe_viewport_state viewport; struct pipe_framebuffer_state framebuffer; - struct pipe_buffer *idxbuf; + struct pipe_resource *idxbuf; unsigned idxbuf_format; struct nvfx_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS]; struct pipe_sampler_view *fragment_sampler_views[PIPE_MAX_SAMPLERS]; @@ -165,6 +131,13 @@ struct nvfx_context { struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; unsigned vtxbuf_nr; struct nvfx_vtxelt_state *vtxelt; + + unsigned vbo_bo; + unsigned hw_vtxelt_nr; + uint8_t hw_samplers; + uint32_t hw_txf[8]; + struct nvfx_render_target hw_rt[4]; + struct nvfx_render_target hw_zeta; }; static INLINE struct nvfx_context * @@ -173,14 +146,6 @@ nvfx_context(struct pipe_context *pipe) return (struct nvfx_context *)pipe; } -struct nvfx_state_entry { - boolean (*validate)(struct nvfx_context *nvfx); - struct { - unsigned pipe; - unsigned hw; - } dirty; -}; - extern struct nvfx_state_entry nvfx_state_blend; extern struct nvfx_state_entry nvfx_state_blend_colour; extern struct nvfx_state_entry nvfx_state_fragprog; @@ -210,37 +175,55 @@ extern void nvfx_clear(struct pipe_context *pipe, unsigned buffers, /* nvfx_draw.c */ extern struct draw_stage *nvfx_draw_render_stage(struct nvfx_context *nvfx); extern void nvfx_draw_elements_swtnl(struct pipe_context *pipe, - struct pipe_buffer *idxbuf, + struct pipe_resource *idxbuf, unsigned ib_size, unsigned mode, unsigned start, unsigned count); +extern void nvfx_vtxfmt_validate(struct nvfx_context *nvfx); + +/* nvfx_fb.c */ +extern void nvfx_state_framebuffer_validate(struct nvfx_context *nvfx); +void +nvfx_framebuffer_relocate(struct nvfx_context *nvfx); /* nvfx_fragprog.c */ extern void nvfx_fragprog_destroy(struct nvfx_context *, struct nvfx_fragment_program *); +extern void nvfx_fragprog_validate(struct nvfx_context *nvfx); +extern void +nvfx_fragprog_relocate(struct nvfx_context *nvfx); + +/* nvfx_fragtex.c */ +extern void nvfx_fragtex_validate(struct nvfx_context *nvfx); +extern void +nvfx_fragtex_relocate(struct nvfx_context *nvfx); /* nv30_fragtex.c */ extern void nv30_sampler_state_init(struct pipe_context *pipe, struct nvfx_sampler_state *ps, const struct pipe_sampler_state *cso); -extern void nv30_fragtex_bind(struct nvfx_context *); -extern struct nouveau_stateobj * -nv30_fragtex_build(struct nvfx_context *nvfx, int unit); +extern void nv30_fragtex_set(struct nvfx_context *nvfx, int unit); /* nv40_fragtex.c */ extern void nv40_sampler_state_init(struct pipe_context *pipe, struct nvfx_sampler_state *ps, const struct pipe_sampler_state *cso); -extern void nv40_fragtex_bind(struct nvfx_context *); -extern struct nouveau_stateobj * -nv40_fragtex_build(struct nvfx_context *nvfx, int unit); +extern void nv40_fragtex_set(struct nvfx_context *nvfx, int unit); /* nvfx_state.c */ extern void nvfx_init_state_functions(struct nvfx_context *nvfx); +extern void nvfx_state_scissor_validate(struct nvfx_context *nvfx); +extern void nvfx_state_stipple_validate(struct nvfx_context *nvfx); +extern void nvfx_state_blend_validate(struct nvfx_context *nvfx); +extern void nvfx_state_blend_colour_validate(struct nvfx_context *nvfx); +extern void nvfx_state_viewport_validate(struct nvfx_context *nvfx); +extern void nvfx_state_rasterizer_validate(struct nvfx_context *nvfx); +extern void nvfx_state_sr_validate(struct nvfx_context *nvfx); +extern void nvfx_state_zsa_validate(struct nvfx_context *nvfx); /* nvfx_state_emit.c */ -extern void nvfx_state_flush_notify(struct nouveau_channel *chan); +extern void nvfx_state_relocate(struct nvfx_context *nvfx); extern boolean nvfx_state_validate(struct nvfx_context *nvfx); extern boolean nvfx_state_validate_swtnl(struct nvfx_context *nvfx); extern void nvfx_state_emit(struct nvfx_context *nvfx); @@ -249,15 +232,18 @@ extern void nvfx_state_emit(struct nvfx_context *nvfx); extern void nvfx_init_transfer_functions(struct nvfx_context *nvfx); /* nvfx_vbo.c */ +extern boolean nvfx_vbo_validate(struct nvfx_context *nvfx); +extern void nvfx_vbo_relocate(struct nvfx_context *nvfx); extern void nvfx_draw_arrays(struct pipe_context *, unsigned mode, unsigned start, unsigned count); extern void nvfx_draw_elements(struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, + struct pipe_resource *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, unsigned count); /* nvfx_vertprog.c */ +extern boolean nvfx_vertprog_validate(struct nvfx_context *nvfx); extern void nvfx_vertprog_destroy(struct nvfx_context *, struct nvfx_vertex_program *); diff --git a/src/gallium/drivers/nvfx/nvfx_draw.c b/src/gallium/drivers/nvfx/nvfx_draw.c index 68e50a3647..2003be1020 100644 --- a/src/gallium/drivers/nvfx/nvfx_draw.c +++ b/src/gallium/drivers/nvfx/nvfx_draw.c @@ -234,28 +234,31 @@ nvfx_draw_render_stage(struct nvfx_context *nvfx) void nvfx_draw_elements_swtnl(struct pipe_context *pipe, - struct pipe_buffer *idxbuf, unsigned idxbuf_size, + struct pipe_resource *idxbuf, unsigned idxbuf_size, unsigned mode, unsigned start, unsigned count) { struct nvfx_context *nvfx = nvfx_context(pipe); - struct pipe_screen *pscreen = pipe->screen; + struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS]; + struct pipe_transfer *ib_transfer = NULL; + struct pipe_transfer *cb_transfer = NULL; unsigned i; void *map; if (!nvfx_state_validate_swtnl(nvfx)) return; - nvfx->state.dirty &= ~(1ULL << NVFX_STATE_VTXBUF); nvfx_state_emit(nvfx); for (i = 0; i < nvfx->vtxbuf_nr; i++) { - map = pipe_buffer_map(pscreen, nvfx->vtxbuf[i].buffer, - PIPE_BUFFER_USAGE_CPU_READ); + map = pipe_buffer_map(pipe, nvfx->vtxbuf[i].buffer, + PIPE_TRANSFER_READ, + &vb_transfer[i]); draw_set_mapped_vertex_buffer(nvfx->draw, i, map); } if (idxbuf) { - map = pipe_buffer_map(pscreen, idxbuf, - PIPE_BUFFER_USAGE_CPU_READ); + map = pipe_buffer_map(pipe, idxbuf, + PIPE_TRANSFER_READ, + &ib_transfer); draw_set_mapped_element_buffer(nvfx->draw, idxbuf_size, map); } else { draw_set_mapped_element_buffer(nvfx->draw, 0, NULL); @@ -264,9 +267,10 @@ nvfx_draw_elements_swtnl(struct pipe_context *pipe, if (nvfx->constbuf[PIPE_SHADER_VERTEX]) { const unsigned nr = nvfx->constbuf_nr[PIPE_SHADER_VERTEX]; - map = pipe_buffer_map(pscreen, + map = pipe_buffer_map(pipe, nvfx->constbuf[PIPE_SHADER_VERTEX], - PIPE_BUFFER_USAGE_CPU_READ); + PIPE_TRANSFER_READ, + &cb_transfer); draw_set_mapped_constant_buffer(nvfx->draw, PIPE_SHADER_VERTEX, 0, map, nr); } @@ -274,13 +278,14 @@ nvfx_draw_elements_swtnl(struct pipe_context *pipe, draw_arrays(nvfx->draw, mode, start, count); for (i = 0; i < nvfx->vtxbuf_nr; i++) - pipe_buffer_unmap(pscreen, nvfx->vtxbuf[i].buffer); + pipe_buffer_unmap(pipe, nvfx->vtxbuf[i].buffer, vb_transfer[i]); if (idxbuf) - pipe_buffer_unmap(pscreen, idxbuf); + pipe_buffer_unmap(pipe, idxbuf, ib_transfer); if (nvfx->constbuf[PIPE_SHADER_VERTEX]) - pipe_buffer_unmap(pscreen, nvfx->constbuf[PIPE_SHADER_VERTEX]); + pipe_buffer_unmap(pipe, nvfx->constbuf[PIPE_SHADER_VERTEX], + cb_transfer); draw_flush(nvfx->draw); pipe->flush(pipe, 0, NULL); @@ -298,8 +303,8 @@ emit_attrib(struct nvfx_context *nvfx, unsigned hw, unsigned emit, nvfx->swtnl.draw[a] = draw_out; } -static boolean -nvfx_state_vtxfmt_validate(struct nvfx_context *nvfx) +void +nvfx_vtxfmt_validate(struct nvfx_context *nvfx) { struct nvfx_fragment_program *fp = nvfx->fragprog; unsigned colour = 0, texcoords = 0, fog = 0, i; @@ -343,14 +348,4 @@ nvfx_state_vtxfmt_validate(struct nvfx_context *nvfx) } emit_attrib(nvfx, 0, 0xff, TGSI_SEMANTIC_POSITION, 0); - - return FALSE; } - -struct nvfx_state_entry nvfx_state_vtxfmt = { - .validate = nvfx_state_vtxfmt_validate, - .dirty = { - .pipe = NVFX_NEW_ARRAYS | NVFX_NEW_FRAGPROG, - .hw = 0 - } -}; diff --git a/src/gallium/drivers/nvfx/nvfx_fragprog.c b/src/gallium/drivers/nvfx/nvfx_fragprog.c index b9c91cec8c..5fa825ad05 100644 --- a/src/gallium/drivers/nvfx/nvfx_fragprog.c +++ b/src/gallium/drivers/nvfx/nvfx_fragprog.c @@ -9,6 +9,7 @@ #include "nvfx_context.h" #include "nvfx_shader.h" +#include "nvfx_resource.h" #define MAX_CONSTS 128 #define MAX_IMM 32 @@ -822,130 +823,175 @@ out_err: FREE(fpc); } -static void -nvfx_fragprog_upload(struct nvfx_context *nvfx, - struct nvfx_fragment_program *fp) +static inline void +nvfx_fp_memcpy(void* dst, const void* src, size_t len) { - struct pipe_screen *pscreen = nvfx->pipe.screen; - const uint32_t le = 1; - uint32_t *map; - int i; - - map = pipe_buffer_map(pscreen, fp->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); - -#if 0 - for (i = 0; i < fp->insn_len; i++) { - fflush(stdout); fflush(stderr); - NOUVEAU_ERR("%d 0x%08x\n", i, fp->insn[i]); - fflush(stdout); fflush(stderr); +#ifndef WORDS_BIGENDIAN + memcpy(dst, src, len); +#else + size_t i; + for(i = 0; i < len; i += 4) { + uint32_t v = (uint32_t*)((char*)src + i); + *(uint32_t*)((char*)dst + i) = (v >> 16) | (v << 16); } #endif - - if ((*(const uint8_t *)&le)) { - for (i = 0; i < fp->insn_len; i++) { - map[i] = fp->insn[i]; - } - } else { - /* Weird swapping for big-endian chips */ - for (i = 0; i < fp->insn_len; i++) { - map[i] = ((fp->insn[i] & 0xffff) << 16) | - ((fp->insn[i] >> 16) & 0xffff); - } - } - - pipe_buffer_unmap(pscreen, fp->buffer); } -static boolean +void nvfx_fragprog_validate(struct nvfx_context *nvfx) { + struct nouveau_channel* chan = nvfx->screen->base.channel; struct nvfx_fragment_program *fp = nvfx->fragprog; - struct pipe_buffer *constbuf = - nvfx->constbuf[PIPE_SHADER_FRAGMENT]; - struct pipe_screen *pscreen = nvfx->pipe.screen; - struct nouveau_stateobj *so; - boolean new_consts = FALSE; + int update = 0; int i; - if (fp->translated) - goto update_constants; + if (!fp->translated) + { + nvfx_fragprog_translate(nvfx, fp); + if (!fp->translated) { + static unsigned dummy[8] = {1, 0, 0, 0, 1, 0, 0, 0}; + static int warned = 0; + if(!warned) + { + fprintf(stderr, "nvfx: failed to translate fragment program!\n"); + warned = 1; + } - nvfx->fallback_swrast &= ~NVFX_NEW_FRAGPROG; - nvfx_fragprog_translate(nvfx, fp); - if (!fp->translated) { - nvfx->fallback_swrast |= NVFX_NEW_FRAGPROG; - return FALSE; - } + /* use dummy program: we cannot fail here */ + fp->translated = TRUE; + fp->insn = malloc(sizeof(dummy)); + memcpy(fp->insn, dummy, sizeof(dummy)); + fp->insn_len = sizeof(dummy) / sizeof(dummy[0]); + } + update = TRUE; - fp->buffer = pscreen->buffer_create(pscreen, 0x100, 0, fp->insn_len * 4); - nvfx_fragprog_upload(nvfx, fp); + fp->prog_size = (fp->insn_len * 4 + 63) & ~63; - so = so_new(4, 4, 1); - so_method(so, nvfx->screen->eng3d, NV34TCL_FP_ACTIVE_PROGRAM, 1); - so_reloc (so, nouveau_bo(fp->buffer), 0, NOUVEAU_BO_VRAM | - NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW | - NOUVEAU_BO_OR, NV34TCL_FP_ACTIVE_PROGRAM_DMA0, - NV34TCL_FP_ACTIVE_PROGRAM_DMA1); - so_method(so, nvfx->screen->eng3d, NV34TCL_FP_CONTROL, 1); - so_data (so, fp->fp_control); - if(!nvfx->is_nv4x) { - so_method(so, nvfx->screen->eng3d, NV34TCL_FP_REG_CONTROL, 1); - so_data (so, (1<<16)|0x4); - so_method(so, nvfx->screen->eng3d, NV34TCL_TX_UNITS_ENABLE, 1); - so_data (so, fp->samplers); + int min_size = 4096; + if(fp->prog_size >= min_size) + fp->progs_per_bo = 1; + else + fp->progs_per_bo = min_size / fp->prog_size; + fp->bo_prog_idx = fp->progs_per_bo - 1; } - so_ref(so, &fp->so); - so_ref(NULL, &so); - -update_constants: - if (fp->nr_consts) { - float *map; - - map = pipe_buffer_map(pscreen, constbuf, - PIPE_BUFFER_USAGE_CPU_READ); - for (i = 0; i < fp->nr_consts; i++) { - struct nvfx_fragment_program_data *fpd = &fp->consts[i]; - uint32_t *p = &fp->insn[fpd->offset]; - uint32_t *cb = (uint32_t *)&map[fpd->index * 4]; + if (nvfx->dirty & NVFX_NEW_FRAGCONST) + update = TRUE; - if (!memcmp(p, cb, 4 * sizeof(float))) - continue; - memcpy(p, cb, 4 * sizeof(float)); - new_consts = TRUE; + if(update) { + ++fp->bo_prog_idx; + if(fp->bo_prog_idx >= fp->progs_per_bo) + { + if(fp->fpbo && !nouveau_bo_busy(fp->fpbo->next->bo, NOUVEAU_BO_WR)) + { + fp->fpbo = fp->fpbo->next; + } + else + { + struct nvfx_fragment_program_bo* fpbo = os_malloc_aligned(sizeof(struct nvfx_fragment_program) + fp->prog_size * fp->progs_per_bo, 16); + if(fp->fpbo) + { + fpbo->next = fp->fpbo->next; + fp->fpbo->next = fpbo; + } + else + fpbo->next = fpbo; + fp->fpbo = fpbo; + fpbo->bo = 0; + nouveau_bo_new(nvfx->screen->base.device, NOUVEAU_BO_VRAM | NOUVEAU_BO_MAP, 64, fp->prog_size * fp->progs_per_bo, &fpbo->bo); + nouveau_bo_map(fpbo->bo, NOUVEAU_BO_NOSYNC); + + char* map = fpbo->bo->map; + char* buf = fpbo->insn; + for(int i = 0; i < fp->progs_per_bo; ++i) + { + memcpy(buf, fp->insn, fp->insn_len * 4); + nvfx_fp_memcpy(map, fp->insn, fp->insn_len * 4); + map += fp->prog_size; + buf += fp->prog_size; + } + } + fp->bo_prog_idx = 0; } - pipe_buffer_unmap(pscreen, constbuf); - if (new_consts) - nvfx_fragprog_upload(nvfx, fp); + int offset = fp->bo_prog_idx * fp->prog_size; + + if(nvfx->constbuf[PIPE_SHADER_FRAGMENT]) { + struct pipe_resource* constbuf = nvfx->constbuf[PIPE_SHADER_FRAGMENT]; + // TODO: avoid using transfers, just directly the buffer + struct pipe_transfer* transfer; + // TODO: does this check make any sense, or should we do this unconditionally? + uint32_t* map = pipe_buffer_map(&nvfx->pipe, constbuf, PIPE_TRANSFER_READ, &transfer); + uint32_t* fpmap = (uint32_t*)((char*)fp->fpbo->bo->map + offset); + uint32_t* buf = (uint32_t*)((char*)fp->fpbo->insn + offset); + for (i = 0; i < fp->nr_consts; ++i) { + unsigned off = fp->consts[i].offset; + unsigned idx = fp->consts[i].index * 4; + + /* TODO: is checking a good idea? */ + if(memcmp(&buf[off], &map[idx], 4 * sizeof(uint32_t))) { + memcpy(&buf[off], &map[idx], 4 * sizeof(uint32_t)); + nvfx_fp_memcpy(&fpmap[off], &map[idx], 4 * sizeof(uint32_t)); + } + } + pipe_buffer_unmap(&nvfx->pipe, constbuf, transfer); + } } - if (new_consts || fp->so != nvfx->state.hw[NVFX_STATE_FRAGPROG]) { - so_ref(fp->so, &nvfx->state.hw[NVFX_STATE_FRAGPROG]); - return TRUE; + if(update || (nvfx->dirty & NVFX_NEW_FRAGPROG)) { + int offset = fp->bo_prog_idx * fp->prog_size; + MARK_RING(chan, 8, 1); + OUT_RING(chan, RING_3D(NV34TCL_FP_ACTIVE_PROGRAM, 1)); + OUT_RELOC(chan, fp->fpbo->bo, offset, NOUVEAU_BO_VRAM | + NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW | + NOUVEAU_BO_OR, NV34TCL_FP_ACTIVE_PROGRAM_DMA0, + NV34TCL_FP_ACTIVE_PROGRAM_DMA1); + OUT_RING(chan, RING_3D(NV34TCL_FP_CONTROL, 1)); + OUT_RING(chan, fp->fp_control); + if(!nvfx->is_nv4x) { + OUT_RING(chan, RING_3D(NV34TCL_FP_REG_CONTROL, 1)); + OUT_RING(chan, (1<<16)|0x4); + OUT_RING(chan, RING_3D(NV34TCL_TX_UNITS_ENABLE, 1)); + OUT_RING(chan, fp->samplers); + } } +} - return FALSE; +void +nvfx_fragprog_relocate(struct nvfx_context *nvfx) +{ + struct nouveau_channel* chan = nvfx->screen->base.channel; + struct nvfx_fragment_program *fp = nvfx->fragprog; + struct nouveau_bo* bo = fp->fpbo->bo; + int offset = fp->bo_prog_idx * fp->prog_size; + unsigned fp_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD; // TODO: GART? + fp_flags |= NOUVEAU_BO_DUMMY; + MARK_RING(chan, 2, 2); + OUT_RELOC(chan, bo, RING_3D(NV34TCL_FP_ACTIVE_PROGRAM, 1), fp_flags, 0, 0); + OUT_RELOC(chan, bo, offset, fp_flags | NOUVEAU_BO_LOW | + NOUVEAU_BO_OR, NV34TCL_FP_ACTIVE_PROGRAM_DMA0, + NV34TCL_FP_ACTIVE_PROGRAM_DMA1); } void nvfx_fragprog_destroy(struct nvfx_context *nvfx, struct nvfx_fragment_program *fp) { - if (fp->buffer) - pipe_buffer_reference(&fp->buffer, NULL); - - if (fp->so) - so_ref(NULL, &fp->so); + struct nvfx_fragment_program_bo* fpbo = fp->fpbo; + if(fpbo) + { + do + { + struct nvfx_fragment_program_bo* next = fpbo->next; + nouveau_bo_unmap(fpbo->bo); + nouveau_bo_ref(0, &fpbo->bo); + free(fpbo); + fpbo = next; + } + while(fpbo != fp->fpbo); + } if (fp->insn_len) FREE(fp->insn); } -struct nvfx_state_entry nvfx_state_fragprog = { - .validate = nvfx_fragprog_validate, - .dirty = { - .pipe = NVFX_NEW_FRAGPROG, - .hw = NVFX_STATE_FRAGPROG - } -}; diff --git a/src/gallium/drivers/nvfx/nvfx_fragtex.c b/src/gallium/drivers/nvfx/nvfx_fragtex.c index 84e4eb1004..f5f6b0c0cb 100644 --- a/src/gallium/drivers/nvfx/nvfx_fragtex.c +++ b/src/gallium/drivers/nvfx/nvfx_fragtex.c @@ -1,49 +1,55 @@ #include "nvfx_context.h" +#include "nvfx_resource.h" -static boolean +void nvfx_fragtex_validate(struct nvfx_context *nvfx) { - struct nvfx_fragment_program *fp = nvfx->fragprog; - struct nvfx_state *state = &nvfx->state; - struct nouveau_stateobj *so; + struct nouveau_channel* chan = nvfx->screen->base.channel; unsigned samplers, unit; - samplers = state->fp_samplers & ~fp->samplers; + samplers = nvfx->dirty_samplers; + if(!samplers) + return; + while (samplers) { unit = ffs(samplers) - 1; samplers &= ~(1 << unit); - so = so_new(1, 1, 0); - so_method(so, nvfx->screen->eng3d, NV34TCL_TX_ENABLE(unit), 1); - so_data (so, 0); - so_ref(so, &nvfx->state.hw[NVFX_STATE_FRAGTEX0 + unit]); - so_ref(NULL, &so); - state->dirty |= (1ULL << (NVFX_STATE_FRAGTEX0 + unit)); + if(nvfx->fragment_sampler_views[unit] && nvfx->tex_sampler[unit]) { + if(!nvfx->is_nv4x) + nv30_fragtex_set(nvfx, unit); + else + nv40_fragtex_set(nvfx, unit); + } else { + WAIT_RING(chan, 2); + /* this is OK for nv40 too */ + OUT_RING(chan, RING_3D(NV34TCL_TX_ENABLE(unit), 1)); + OUT_RING(chan, 0); + nvfx->hw_samplers &= ~(1 << unit); + } } + nvfx->dirty_samplers = 0; +} + +void +nvfx_fragtex_relocate(struct nvfx_context *nvfx) +{ + struct nouveau_channel* chan = nvfx->screen->base.channel; + unsigned samplers, unit; + unsigned tex_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; - samplers = nvfx->dirty_samplers & fp->samplers; + samplers = nvfx->hw_samplers; while (samplers) { unit = ffs(samplers) - 1; samplers &= ~(1 << unit); - if(!nvfx->is_nv4x) - so = nv30_fragtex_build(nvfx, unit); - else - so = nv40_fragtex_build(nvfx, unit); + struct nvfx_miptree* mt = (struct nvfx_miptree*)nvfx->fragment_sampler_views[unit]->texture; + struct nouveau_bo *bo = mt->base.bo; - so_ref(so, &nvfx->state.hw[NVFX_STATE_FRAGTEX0 + unit]); - so_ref(NULL, &so); - state->dirty |= (1ULL << (NVFX_STATE_FRAGTEX0 + unit)); + MARK_RING(chan, 3, 3); + OUT_RELOC(chan, bo, RING_3D(NV34TCL_TX_OFFSET(unit), 2), tex_flags | NOUVEAU_BO_DUMMY, 0, 0); + OUT_RELOC(chan, bo, 0, tex_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_DUMMY, 0, 0); + OUT_RELOC(chan, bo, nvfx->hw_txf[unit], tex_flags | NOUVEAU_BO_OR | NOUVEAU_BO_DUMMY, + NV34TCL_TX_FORMAT_DMA0, NV34TCL_TX_FORMAT_DMA1); } - - nvfx->state.fp_samplers = fp->samplers; - return FALSE; } - -struct nvfx_state_entry nvfx_state_fragtex = { - .validate = nvfx_fragtex_validate, - .dirty = { - .pipe = NVFX_NEW_SAMPLER | NVFX_NEW_FRAGPROG, - .hw = 0 - } -}; diff --git a/src/gallium/drivers/nvfx/nvfx_miptree.c b/src/gallium/drivers/nvfx/nvfx_miptree.c index 9de25175e7..602a4768d4 100644 --- a/src/gallium/drivers/nvfx/nvfx_miptree.c +++ b/src/gallium/drivers/nvfx/nvfx_miptree.c @@ -5,22 +5,30 @@ #include "util/u_math.h" #include "nvfx_context.h" +#include "nvfx_resource.h" +#include "nvfx_transfer.h" #include "nv04_surface_2d.h" +#include "nouveau/nouveau_util.h" +/* Currently using separate implementations for buffers and textures, + * even though gallium has a unified abstraction of these objects. + * Eventually these should be combined, and mechanisms like transfers + * be adapted to work for both buffer and texture uploads. + */ static void nvfx_miptree_layout(struct nvfx_miptree *mt) { - struct pipe_texture *pt = &mt->base; + struct pipe_resource *pt = &mt->base.base; uint width = pt->width0; uint offset = 0; int nr_faces, l, f; - uint wide_pitch = pt->tex_usage & (PIPE_TEXTURE_USAGE_SAMPLER | - PIPE_TEXTURE_USAGE_DEPTH_STENCIL | - PIPE_TEXTURE_USAGE_RENDER_TARGET | - PIPE_TEXTURE_USAGE_DISPLAY_TARGET | - PIPE_TEXTURE_USAGE_SCANOUT); + uint wide_pitch = pt->bind & (PIPE_BIND_SAMPLER_VIEW | + PIPE_BIND_DEPTH_STENCIL | + PIPE_BIND_RENDER_TARGET | + PIPE_BIND_DISPLAY_TARGET | + PIPE_BIND_SCANOUT); if (pt->target == PIPE_TEXTURE_CUBE) { nr_faces = 6; @@ -32,7 +40,7 @@ nvfx_miptree_layout(struct nvfx_miptree *mt) } for (l = 0; l <= pt->last_level; l++) { - if (wide_pitch && (pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) + if (wide_pitch && (pt->flags & NVFX_RESOURCE_FLAG_LINEAR)) mt->level[l].pitch = align(util_format_get_stride(pt->format, pt->width0), 64); else mt->level[l].pitch = util_format_get_stride(pt->format, width); @@ -47,7 +55,7 @@ nvfx_miptree_layout(struct nvfx_miptree *mt) for (l = 0; l < pt->last_level; l++) { mt->level[l].image_offset[f] = offset; - if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR) && + if (!(pt->flags & NVFX_RESOURCE_FLAG_LINEAR) && u_minify(pt->width0, l + 1) > 1 && u_minify(pt->height0, l + 1) > 1) offset += align(mt->level[l].pitch * u_minify(pt->height0, l), 64); else @@ -61,35 +69,86 @@ nvfx_miptree_layout(struct nvfx_miptree *mt) mt->total_size = offset; } -static struct pipe_texture * -nvfx_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) +static boolean +nvfx_miptree_get_handle(struct pipe_screen *pscreen, + struct pipe_resource *ptexture, + struct winsys_handle *whandle) +{ + struct nvfx_miptree* mt = (struct nvfx_miptree*)ptexture; + + if (!mt || !mt->base.bo) + return FALSE; + + return nouveau_screen_bo_get_handle(pscreen, + mt->base.bo, + mt->level[0].pitch, + whandle); +} + + +static void +nvfx_miptree_destroy(struct pipe_screen *screen, struct pipe_resource *pt) +{ + struct nvfx_miptree *mt = (struct nvfx_miptree *)pt; + int l; + + nouveau_screen_bo_release(screen, mt->base.bo); + + for (l = 0; l <= pt->last_level; l++) { + if (mt->level[l].image_offset) + FREE(mt->level[l].image_offset); + } + + FREE(mt); +} + + + + +struct u_resource_vtbl nvfx_miptree_vtbl = +{ + nvfx_miptree_get_handle, /* get_handle */ + nvfx_miptree_destroy, /* resource_destroy */ + NULL, /* is_resource_referenced */ + nvfx_miptree_transfer_new, /* get_transfer */ + nvfx_miptree_transfer_del, /* transfer_destroy */ + nvfx_miptree_transfer_map, /* transfer_map */ + u_default_transfer_flush_region, /* transfer_flush_region */ + nvfx_miptree_transfer_unmap, /* transfer_unmap */ + u_default_transfer_inline_write /* transfer_inline_write */ +}; + + + +struct pipe_resource * +nvfx_miptree_create(struct pipe_screen *pscreen, const struct pipe_resource *pt) { struct nvfx_miptree *mt; - unsigned buf_usage = PIPE_BUFFER_USAGE_PIXEL | - NOUVEAU_BUFFER_USAGE_TEXTURE; static int no_swizzle = -1; if(no_swizzle < 0) no_swizzle = debug_get_bool_option("NOUVEAU_NO_SWIZZLE", FALSE); - mt = MALLOC(sizeof(struct nvfx_miptree)); + mt = CALLOC_STRUCT(nvfx_miptree); if (!mt) return NULL; - mt->base = *pt; - pipe_reference_init(&mt->base.reference, 1); - mt->base.screen = pscreen; + + mt->base.base = *pt; + mt->base.vtbl = &nvfx_miptree_vtbl; + pipe_reference_init(&mt->base.base.reference, 1); + mt->base.base.screen = pscreen; /* Swizzled textures must be POT */ if (pt->width0 & (pt->width0 - 1) || pt->height0 & (pt->height0 - 1)) - mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + mt->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR; else - if (pt->tex_usage & (PIPE_TEXTURE_USAGE_SCANOUT | - PIPE_TEXTURE_USAGE_DISPLAY_TARGET | - PIPE_TEXTURE_USAGE_DEPTH_STENCIL)) - mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + if (pt->bind & (PIPE_BIND_SCANOUT | + PIPE_BIND_DISPLAY_TARGET | + PIPE_BIND_DEPTH_STENCIL)) + mt->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR; else - if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC) - mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + if (pt->_usage == PIPE_USAGE_DYNAMIC) + mt->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR; else { switch (pt->format) { case PIPE_FORMAT_B5G6R5_UNORM: @@ -101,7 +160,7 @@ nvfx_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) are just preserving the pre-unification behavior. The whole 2D code is going to be rewritten anyway. */ if(nvfx_screen(pscreen)->is_nv4x) { - mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + mt->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR; break; } /* TODO: Figure out which formats can be swizzled */ @@ -110,80 +169,83 @@ nvfx_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) case PIPE_FORMAT_R16_SNORM: { if (no_swizzle) - mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + mt->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR; break; } default: - mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + mt->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR; } } - if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC) - buf_usage |= PIPE_BUFFER_USAGE_CPU_READ_WRITE; - /* apparently we can't render to swizzled surfaces smaller than 64 bytes, so make them linear. * If the user did not ask for a render target, they can still render to it, but it will cost them an extra copy. * This also happens for small mipmaps of large textures. */ - if (pt->tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET && util_format_get_stride(pt->format, pt->width0) < 64) - mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + if (pt->bind & PIPE_BIND_RENDER_TARGET && + util_format_get_stride(pt->format, pt->width0) < 64) + mt->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR; nvfx_miptree_layout(mt); - mt->buffer = pscreen->buffer_create(pscreen, 256, buf_usage, mt->total_size); - if (!mt->buffer) { + mt->base.bo = nouveau_screen_bo_new(pscreen, 256, + pt->_usage, pt->bind, mt->total_size); + if (!mt->base.bo) { FREE(mt); return NULL; } - mt->bo = nouveau_bo(mt->buffer); - return &mt->base; + return &mt->base.base; } -static struct pipe_texture * -nvfx_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, - const unsigned *stride, struct pipe_buffer *pb) + + + +struct pipe_resource * +nvfx_miptree_from_handle(struct pipe_screen *pscreen, + const struct pipe_resource *template, + struct winsys_handle *whandle) { struct nvfx_miptree *mt; + unsigned stride; /* Only supports 2D, non-mipmapped textures for the moment */ - if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 || - pt->depth0 != 1) + if (template->target != PIPE_TEXTURE_2D || + template->last_level != 0 || + template->depth0 != 1) return NULL; mt = CALLOC_STRUCT(nvfx_miptree); if (!mt) return NULL; - mt->base = *pt; - pipe_reference_init(&mt->base.reference, 1); - mt->base.screen = pscreen; - mt->level[0].pitch = stride[0]; + mt->base.bo = nouveau_screen_bo_from_handle(pscreen, whandle, &stride); + if (mt->base.bo == NULL) { + FREE(mt); + return NULL; + } + + mt->base.base = *template; + mt->base.vtbl = &nvfx_miptree_vtbl; + pipe_reference_init(&mt->base.base.reference, 1); + mt->base.base.screen = pscreen; + mt->level[0].pitch = stride; mt->level[0].image_offset = CALLOC(1, sizeof(unsigned)); /* Assume whoever created this buffer expects it to be linear for now */ - mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + mt->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR; - pipe_buffer_reference(&mt->buffer, pb); - mt->bo = nouveau_bo(mt->buffer); - return &mt->base; + /* XXX: Need to adjust bo refcount?? + */ + /* nouveau_bo_ref(bo, &mt->base.bo); */ + return &mt->base.base; } -static void -nvfx_miptree_destroy(struct pipe_texture *pt) -{ - struct nvfx_miptree *mt = (struct nvfx_miptree *)pt; - int l; - pipe_buffer_reference(&mt->buffer, NULL); - for (l = 0; l <= pt->last_level; l++) { - if (mt->level[l].image_offset) - FREE(mt->level[l].image_offset); - } - FREE(mt); -} -static struct pipe_surface * -nvfx_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, + +/* Surface helpers, not strictly required to implement the resource vtbl: + */ +struct pipe_surface * +nvfx_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_resource *pt, unsigned face, unsigned level, unsigned zslice, unsigned flags) { @@ -193,7 +255,7 @@ nvfx_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, ns = CALLOC_STRUCT(nv04_surface); if (!ns) return NULL; - pipe_texture_reference(&ns->base.texture, pt); + pipe_resource_reference(&ns->base.texture, pt); ns->base.format = pt->format; ns->base.width = u_minify(pt->width0, level); ns->base.height = u_minify(pt->height0, level); @@ -213,38 +275,38 @@ nvfx_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, ns->base.offset = mt->level[level].image_offset[0]; } - /* create a linear temporary that we can render into if necessary. - * Note that ns->pitch is always a multiple of 64 for linear surfaces and swizzled surfaces are POT, so - * ns->pitch & 63 is equivalent to (ns->pitch < 64 && swizzled)*/ - if((ns->pitch & 63) && (ns->base.usage & (PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER)) == PIPE_BUFFER_USAGE_GPU_WRITE) - return &nv04_surface_wrap_for_render(pscreen, ((struct nvfx_screen*)pscreen)->eng2d, ns)->base; + /* create a linear temporary that we can render into if + * necessary. + * + * Note that ns->pitch is always a multiple of 64 for linear + * surfaces and swizzled surfaces are POT, so ns->pitch & 63 + * is equivalent to (ns->pitch < 64 && swizzled) + */ + + if ((ns->pitch & 63) && + (ns->base.usage & PIPE_BIND_RENDER_TARGET)) + { + struct nv04_surface_2d* eng2d = + ((struct nvfx_screen*)pscreen)->eng2d; + + ns = nv04_surface_wrap_for_render(pscreen, eng2d, ns); + } return &ns->base; } -static void +void nvfx_miptree_surface_del(struct pipe_surface *ps) { struct nv04_surface* ns = (struct nv04_surface*)ps; if(ns->backing) { struct nvfx_screen* screen = (struct nvfx_screen*)ps->texture->screen; - if(ns->backing->base.usage & PIPE_BUFFER_USAGE_GPU_WRITE) + if(ns->backing->base.usage & PIPE_BIND_BLIT_DESTINATION) screen->eng2d->copy(screen->eng2d, &ns->backing->base, 0, 0, ps, 0, 0, ns->base.width, ns->base.height); nvfx_miptree_surface_del(&ns->backing->base); } - pipe_texture_reference(&ps->texture, NULL); + pipe_resource_reference(&ps->texture, NULL); FREE(ps); } - -void -nvfx_screen_init_miptree_functions(struct pipe_screen *pscreen) -{ - pscreen->texture_create = nvfx_miptree_create; - pscreen->texture_destroy = nvfx_miptree_destroy; - pscreen->get_tex_surface = nvfx_miptree_surface_new; - pscreen->tex_surface_destroy = nvfx_miptree_surface_del; - - nouveau_screen(pscreen)->texture_blanket = nvfx_miptree_blanket; -} diff --git a/src/gallium/drivers/nvfx/nvfx_query.c b/src/gallium/drivers/nvfx/nvfx_query.c index acbaf75a23..1b20b5245d 100644 --- a/src/gallium/drivers/nvfx/nvfx_query.c +++ b/src/gallium/drivers/nvfx/nvfx_query.c @@ -3,6 +3,7 @@ #include "nvfx_context.h" struct nvfx_query { + struct list_head list; struct nouveau_resource *object; unsigned type; boolean ready; @@ -23,6 +24,8 @@ nvfx_query_create(struct pipe_context *pipe, unsigned query_type) q = CALLOC(1, sizeof(struct nvfx_query)); q->type = query_type; + assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER); + return (struct pipe_query *)q; } @@ -32,7 +35,10 @@ nvfx_query_destroy(struct pipe_context *pipe, struct pipe_query *pq) struct nvfx_query *q = nvfx_query(pq); if (q->object) + { nouveau_resource_free(&q->object); + LIST_DEL(&q->list); + } FREE(q); } @@ -44,20 +50,25 @@ nvfx_query_begin(struct pipe_context *pipe, struct pipe_query *pq) struct nvfx_screen *screen = nvfx->screen; struct nouveau_channel *chan = screen->base.channel; struct nouveau_grobj *eng3d = screen->eng3d; - - assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER); + uint64_t tmp; /* Happens when end_query() is called, then another begin_query() * without querying the result in-between. For now we'll wait for * the existing query to notify completion, but it could be better. */ - if (q->object) { - uint64_t tmp; + if (q->object) pipe->get_query_result(pipe, pq, 1, &tmp); + + while (nouveau_resource_alloc(nvfx->screen->query_heap, 1, NULL, &q->object)) + { + struct nvfx_query* oldestq; + assert(!LIST_IS_EMPTY(&nvfx->screen->query_list)); + oldestq = LIST_ENTRY(struct nvfx_query, nvfx->screen->query_list.next, list); + pipe->get_query_result(pipe, (struct pipe_query*)oldestq, 1, &tmp); } - if (nouveau_resource_alloc(nvfx->screen->query_heap, 1, NULL, &q->object)) - assert(0); + LIST_ADDTAIL(&q->list, &nvfx->screen->query_list); + nouveau_notifier_reset(nvfx->screen->query, q->object->start); BEGIN_RING(chan, eng3d, NV34TCL_QUERY_RESET, 1); @@ -90,8 +101,6 @@ nvfx_query_result(struct pipe_context *pipe, struct pipe_query *pq, struct nvfx_context *nvfx = nvfx_context(pipe); struct nvfx_query *q = nvfx_query(pq); - assert(q->object && q->type == PIPE_QUERY_OCCLUSION_COUNTER); - if (!q->ready) { unsigned status; @@ -110,6 +119,7 @@ nvfx_query_result(struct pipe_context *pipe, struct pipe_query *pq, q->object->start); q->ready = TRUE; nouveau_resource_free(&q->object); + LIST_DEL(&q->list); } *result = q->result; diff --git a/src/gallium/drivers/nvfx/nvfx_resource.c b/src/gallium/drivers/nvfx/nvfx_resource.c new file mode 100644 index 0000000000..10cdeed2a3 --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_resource.c @@ -0,0 +1,67 @@ + +#include "pipe/p_context.h" +#include "nvfx_resource.h" +#include "nouveau/nouveau_screen.h" + + +/* This doesn't look quite right - this query is supposed to ask + * whether the particular context has references to the resource in + * any unflushed rendering command buffer, and hence requires a + * pipe->flush() for serializing some modification to that resource. + * + * This seems to be answering the question of whether the resource is + * currently on hardware. + */ +static unsigned int +nvfx_resource_is_referenced(struct pipe_context *pipe, + struct pipe_resource *resource, + unsigned face, unsigned level) +{ + return nouveau_reference_flags(nvfx_resource(resource)->bo); +} + +static struct pipe_resource * +nvfx_resource_create(struct pipe_screen *screen, + const struct pipe_resource *template) +{ + if (template->target == PIPE_BUFFER) + return nvfx_buffer_create(screen, template); + else + return nvfx_miptree_create(screen, template); +} + +static struct pipe_resource * +nvfx_resource_from_handle(struct pipe_screen * screen, + const struct pipe_resource *template, + struct winsys_handle *whandle) +{ + if (template->target == PIPE_BUFFER) + return NULL; + else + return nvfx_miptree_from_handle(screen, template, whandle); +} + +void +nvfx_init_resource_functions(struct pipe_context *pipe) +{ + pipe->get_transfer = u_get_transfer_vtbl; + pipe->transfer_map = u_transfer_map_vtbl; + pipe->transfer_flush_region = u_transfer_flush_region_vtbl; + pipe->transfer_unmap = u_transfer_unmap_vtbl; + pipe->transfer_destroy = u_transfer_destroy_vtbl; + pipe->transfer_inline_write = u_transfer_inline_write_vtbl; + pipe->is_resource_referenced = nvfx_resource_is_referenced; +} + +void +nvfx_screen_init_resource_functions(struct pipe_screen *pscreen) +{ + pscreen->resource_create = nvfx_resource_create; + pscreen->resource_from_handle = nvfx_resource_from_handle; + pscreen->resource_get_handle = u_resource_get_handle_vtbl; + pscreen->resource_destroy = u_resource_destroy_vtbl; + pscreen->user_buffer_create = nvfx_user_buffer_create; + + pscreen->get_tex_surface = nvfx_miptree_surface_new; + pscreen->tex_surface_destroy = nvfx_miptree_surface_del; +} diff --git a/src/gallium/drivers/nvfx/nvfx_resource.h b/src/gallium/drivers/nvfx/nvfx_resource.h new file mode 100644 index 0000000000..a68c14cf3f --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_resource.h @@ -0,0 +1,91 @@ + +#ifndef NVFX_RESOURCE_H +#define NVFX_RESOURCE_H + +#include "util/u_transfer.h" + +struct pipe_resource; +struct nouveau_bo; + + +/* This gets further specialized into either buffer or texture + * structures. In the future we'll want to remove much of that + * distinction, but for now try to keep as close to the existing code + * as possible and use the vtbl struct to choose between the two + * underlying implementations. + */ +struct nvfx_resource { + struct pipe_resource base; + struct u_resource_vtbl *vtbl; + struct nouveau_bo *bo; +}; + +#define NVFX_MAX_TEXTURE_LEVELS 16 + +struct nvfx_miptree { + struct nvfx_resource base; + uint total_size; + + struct { + uint pitch; + uint *image_offset; + } level[NVFX_MAX_TEXTURE_LEVELS]; + + unsigned image_nr; +}; + +static INLINE +struct nvfx_resource *nvfx_resource(struct pipe_resource *resource) +{ + return (struct nvfx_resource *)resource; +} + +static INLINE struct nouveau_bo * +nvfx_surface_buffer(struct pipe_surface *surf) +{ + struct nvfx_resource *mt = nvfx_resource(surf->texture); + + return mt->bo; +} + + +void +nvfx_init_resource_functions(struct pipe_context *pipe); + +void +nvfx_screen_init_resource_functions(struct pipe_screen *pscreen); + + +/* Internal: + */ + +struct pipe_resource * +nvfx_miptree_create(struct pipe_screen *pscreen, const struct pipe_resource *pt); + +struct pipe_resource * +nvfx_miptree_from_handle(struct pipe_screen *pscreen, + const struct pipe_resource *template, + struct winsys_handle *whandle); + +struct pipe_resource * +nvfx_buffer_create(struct pipe_screen *pscreen, + const struct pipe_resource *template); + +struct pipe_resource * +nvfx_user_buffer_create(struct pipe_screen *screen, + void *ptr, + unsigned bytes, + unsigned usage); + + + +void +nvfx_miptree_surface_del(struct pipe_surface *ps); + +struct pipe_surface * +nvfx_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_resource *pt, + unsigned face, unsigned level, unsigned zslice, + unsigned flags); + + +#endif diff --git a/src/gallium/drivers/nvfx/nvfx_screen.c b/src/gallium/drivers/nvfx/nvfx_screen.c index 1a103520a3..04b456d408 100644 --- a/src/gallium/drivers/nvfx/nvfx_screen.c +++ b/src/gallium/drivers/nvfx/nvfx_screen.c @@ -1,10 +1,12 @@ #include "pipe/p_screen.h" #include "pipe/p_state.h" +#include "util/u_simple_screen.h" #include "nouveau/nouveau_screen.h" #include "nvfx_context.h" #include "nvfx_screen.h" +#include "nvfx_resource.h" #define NV30TCL_CHIPSET_3X_MASK 0x00000003 #define NV34TCL_CHIPSET_3X_MASK 0x00000010 @@ -67,10 +69,6 @@ nvfx_screen_get_param(struct pipe_screen *pscreen, int param) return 0; case PIPE_CAP_BLEND_EQUATION_SEPARATE: return !!screen->is_nv4x; - case NOUVEAU_CAP_HW_VTXBUF: - return 0; - case NOUVEAU_CAP_HW_IDXBUF: - return 0; case PIPE_CAP_MAX_COMBINED_SAMPLERS: return 16; case PIPE_CAP_INDEP_BLEND_ENABLE: @@ -122,16 +120,17 @@ nvfx_screen_surface_format_supported(struct pipe_screen *pscreen, struct nvfx_screen *screen = nvfx_screen(pscreen); struct pipe_surface *front = ((struct nouveau_winsys *) pscreen->winsys)->front; - if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { + if (tex_usage & PIPE_BIND_RENDER_TARGET) { switch (format) { case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: case PIPE_FORMAT_B5G6R5_UNORM: return TRUE; default: break; } } else - if (tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL) { + if (tex_usage & PIPE_BIND_DEPTH_STENCIL) { switch (format) { case PIPE_FORMAT_S8_USCALED_Z24_UNORM: case PIPE_FORMAT_X8Z24_UNORM: @@ -147,6 +146,7 @@ nvfx_screen_surface_format_supported(struct pipe_screen *pscreen, } else { switch (format) { case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: case PIPE_FORMAT_B5G5R5A1_UNORM: case PIPE_FORMAT_B4G4R4A4_UNORM: case PIPE_FORMAT_B5G6R5_UNORM: @@ -172,24 +172,11 @@ nvfx_screen_surface_format_supported(struct pipe_screen *pscreen, return FALSE; } -static struct pipe_buffer * -nvfx_surface_buffer(struct pipe_surface *surf) -{ - struct nvfx_miptree *mt = (struct nvfx_miptree *)surf->texture; - - return mt->buffer; -} static void nvfx_screen_destroy(struct pipe_screen *pscreen) { struct nvfx_screen *screen = nvfx_screen(pscreen); - unsigned i; - - for (i = 0; i < NVFX_STATE_MAX; i++) { - if (screen->state[i]) - so_ref(NULL, &screen->state[i]); - } nouveau_resource_destroy(&screen->vp_exec_heap); nouveau_resource_destroy(&screen->vp_data_heap); @@ -204,97 +191,100 @@ nvfx_screen_destroy(struct pipe_screen *pscreen) FREE(pscreen); } -static void nv30_screen_init(struct nvfx_screen *screen, struct nouveau_stateobj* so) +static void nv30_screen_init(struct nvfx_screen *screen) { + struct nouveau_channel *chan = screen->base.channel; int i; /* TODO: perhaps we should do some of this on nv40 too? */ for (i=1; i<8; i++) { - so_method(so, screen->eng3d, NV34TCL_VIEWPORT_CLIP_HORIZ(i), 1); - so_data (so, 0); - so_method(so, screen->eng3d, NV34TCL_VIEWPORT_CLIP_VERT(i), 1); - so_data (so, 0); + OUT_RING(chan, RING_3D(NV34TCL_VIEWPORT_CLIP_HORIZ(i), 1)); + OUT_RING(chan, 0); + OUT_RING(chan, RING_3D(NV34TCL_VIEWPORT_CLIP_VERT(i), 1)); + OUT_RING(chan, 0); } - so_method(so, screen->eng3d, 0x220, 1); - so_data (so, 1); + OUT_RING(chan, RING_3D(0x220, 1)); + OUT_RING(chan, 1); - so_method(so, screen->eng3d, 0x03b0, 1); - so_data (so, 0x00100000); - so_method(so, screen->eng3d, 0x1454, 1); - so_data (so, 0); - so_method(so, screen->eng3d, 0x1d80, 1); - so_data (so, 3); - so_method(so, screen->eng3d, 0x1450, 1); - so_data (so, 0x00030004); + OUT_RING(chan, RING_3D(0x03b0, 1)); + OUT_RING(chan, 0x00100000); + OUT_RING(chan, RING_3D(0x1454, 1)); + OUT_RING(chan, 0); + OUT_RING(chan, RING_3D(0x1d80, 1)); + OUT_RING(chan, 3); + OUT_RING(chan, RING_3D(0x1450, 1)); + OUT_RING(chan, 0x00030004); /* NEW */ - so_method(so, screen->eng3d, 0x1e98, 1); - so_data (so, 0); - so_method(so, screen->eng3d, 0x17e0, 3); - so_data (so, fui(0.0)); - so_data (so, fui(0.0)); - so_data (so, fui(1.0)); - so_method(so, screen->eng3d, 0x1f80, 16); + OUT_RING(chan, RING_3D(0x1e98, 1)); + OUT_RING(chan, 0); + OUT_RING(chan, RING_3D(0x17e0, 3)); + OUT_RING(chan, fui(0.0)); + OUT_RING(chan, fui(0.0)); + OUT_RING(chan, fui(1.0)); + OUT_RING(chan, RING_3D(0x1f80, 16)); for (i=0; i<16; i++) { - so_data (so, (i==8) ? 0x0000ffff : 0); + OUT_RING(chan, (i==8) ? 0x0000ffff : 0); } - so_method(so, screen->eng3d, 0x120, 3); - so_data (so, 0); - so_data (so, 1); - so_data (so, 2); + OUT_RING(chan, RING_3D(0x120, 3)); + OUT_RING(chan, 0); + OUT_RING(chan, 1); + OUT_RING(chan, 2); - so_method(so, screen->eng3d, 0x1d88, 1); - so_data (so, 0x00001200); + OUT_RING(chan, RING_3D(0x1d88, 1)); + OUT_RING(chan, 0x00001200); - so_method(so, screen->eng3d, NV34TCL_RC_ENABLE, 1); - so_data (so, 0); + OUT_RING(chan, RING_3D(NV34TCL_RC_ENABLE, 1)); + OUT_RING(chan, 0); - so_method(so, screen->eng3d, NV34TCL_DEPTH_RANGE_NEAR, 2); - so_data (so, fui(0.0)); - so_data (so, fui(1.0)); + OUT_RING(chan, RING_3D(NV34TCL_DEPTH_RANGE_NEAR, 2)); + OUT_RING(chan, fui(0.0)); + OUT_RING(chan, fui(1.0)); - so_method(so, screen->eng3d, NV34TCL_MULTISAMPLE_CONTROL, 1); - so_data (so, 0xffff0000); + OUT_RING(chan, RING_3D(NV34TCL_MULTISAMPLE_CONTROL, 1)); + OUT_RING(chan, 0xffff0000); /* enables use of vp rather than fixed-function somehow */ - so_method(so, screen->eng3d, 0x1e94, 1); - so_data (so, 0x13); + OUT_RING(chan, RING_3D(0x1e94, 1)); + OUT_RING(chan, 0x13); } -static void nv40_screen_init(struct nvfx_screen *screen, struct nouveau_stateobj* so) +static void nv40_screen_init(struct nvfx_screen *screen) { - so_method(so, screen->eng3d, NV40TCL_DMA_COLOR2, 2); - so_data (so, screen->base.channel->vram->handle); - so_data (so, screen->base.channel->vram->handle); + struct nouveau_channel *chan = screen->base.channel; + + OUT_RING(chan, RING_3D(NV40TCL_DMA_COLOR2, 2)); + OUT_RING(chan, screen->base.channel->vram->handle); + OUT_RING(chan, screen->base.channel->vram->handle); - so_method(so, screen->eng3d, 0x1ea4, 3); - so_data (so, 0x00000010); - so_data (so, 0x01000100); - so_data (so, 0xff800006); + OUT_RING(chan, RING_3D(0x1ea4, 3)); + OUT_RING(chan, 0x00000010); + OUT_RING(chan, 0x01000100); + OUT_RING(chan, 0xff800006); /* vtxprog output routing */ - so_method(so, screen->eng3d, 0x1fc4, 1); - so_data (so, 0x06144321); - so_method(so, screen->eng3d, 0x1fc8, 2); - so_data (so, 0xedcba987); - so_data (so, 0x00000021); - so_method(so, screen->eng3d, 0x1fd0, 1); - so_data (so, 0x00171615); - so_method(so, screen->eng3d, 0x1fd4, 1); - so_data (so, 0x001b1a19); - - so_method(so, screen->eng3d, 0x1ef8, 1); - so_data (so, 0x0020ffff); - so_method(so, screen->eng3d, 0x1d64, 1); - so_data (so, 0x00d30000); - so_method(so, screen->eng3d, 0x1e94, 1); - so_data (so, 0x00000001); + OUT_RING(chan, RING_3D(0x1fc4, 1)); + OUT_RING(chan, 0x06144321); + OUT_RING(chan, RING_3D(0x1fc8, 2)); + OUT_RING(chan, 0xedcba987); + OUT_RING(chan, 0x00000021); + OUT_RING(chan, RING_3D(0x1fd0, 1)); + OUT_RING(chan, 0x00171615); + OUT_RING(chan, RING_3D(0x1fd4, 1)); + OUT_RING(chan, 0x001b1a19); + + OUT_RING(chan, RING_3D(0x1ef8, 1)); + OUT_RING(chan, 0x0020ffff); + OUT_RING(chan, RING_3D(0x1d64, 1)); + OUT_RING(chan, 0x00d30000); + OUT_RING(chan, RING_3D(0x1e94, 1)); + OUT_RING(chan, 0x00000001); } -static void -nvfx_screen_init_buffer_functions(struct nvfx_screen* screen) +static unsigned +nvfx_screen_get_vertex_buffer_flags(struct nvfx_screen* screen) { int vram_hack_default = 0; int vram_hack; @@ -320,7 +310,7 @@ nvfx_screen_init_buffer_functions(struct nvfx_screen* screen) } #endif - screen->vertex_buffer_flags = vram_hack ? NOUVEAU_BO_VRAM : NOUVEAU_BO_GART; + return vram_hack ? NOUVEAU_BO_VRAM : NOUVEAU_BO_GART; } struct pipe_screen * @@ -329,9 +319,8 @@ nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) struct nvfx_screen *screen = CALLOC_STRUCT(nvfx_screen); struct nouveau_channel *chan; struct pipe_screen *pscreen; - struct nouveau_stateobj *so; unsigned eng3d_class = 0; - int ret; + int ret, i; if (!screen) return NULL; @@ -380,8 +369,18 @@ nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) return NULL; } - nvfx_screen_init_buffer_functions(screen); - nvfx_screen_init_miptree_functions(pscreen); + screen->force_swtnl = debug_get_bool_option("NOUVEAU_SWTNL", FALSE); + + screen->vertex_buffer_reloc_flags = nvfx_screen_get_vertex_buffer_flags(screen); + + /* surely both nv3x and nv44 support index buffers too: find out how and test that */ + if(eng3d_class == NV40TCL) + screen->index_buffer_reloc_flags = screen->vertex_buffer_reloc_flags; + + if(!screen->force_swtnl && screen->vertex_buffer_reloc_flags == screen->index_buffer_reloc_flags) + screen->base.vertex_buffer_flags = screen->base.index_buffer_flags = screen->vertex_buffer_reloc_flags; + + nvfx_screen_init_resource_functions(pscreen); ret = nouveau_grobj_alloc(chan, 0xbeef3097, eng3d_class, &screen->eng3d); if (ret) { @@ -402,20 +401,29 @@ nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) } /* Query objects */ - ret = nouveau_notifier_alloc(chan, 0xbeef0302, 32, &screen->query); + unsigned query_sizes[] = {(4096 - 4 * 32) / 32, 3 * 1024 / 32, 2 * 1024 / 32, 1024 / 32}; + for(i = 0; i < sizeof(query_sizes) / sizeof(query_sizes[0]); ++i) + { + ret = nouveau_notifier_alloc(chan, 0xbeef0302, query_sizes[i], &screen->query); + if(!ret) + break; + } + if (ret) { NOUVEAU_ERR("Error initialising query objects: %d\n", ret); nvfx_screen_destroy(pscreen); return NULL; } - ret = nouveau_resource_init(&screen->query_heap, 0, 32); + ret = nouveau_resource_init(&screen->query_heap, 0, query_sizes[i]); if (ret) { NOUVEAU_ERR("Error initialising query object heap: %d\n", ret); nvfx_screen_destroy(pscreen); return NULL; } + LIST_INITHEAD(&screen->query_list); + /* Vtxprog resources */ if (nouveau_resource_init(&screen->vp_exec_heap, 0, screen->is_nv4x ? 512 : 256) || nouveau_resource_init(&screen->vp_data_heap, 0, 256)) { @@ -423,40 +431,36 @@ nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) return NULL; } + BIND_RING(chan, screen->eng3d, 7); + /* Static eng3d initialisation */ - /* make the so big and don't worry about exact values - since we it will be thrown away immediately after use */ - so = so_new(256, 256, 0); - so_method(so, screen->eng3d, NV34TCL_DMA_NOTIFY, 1); - so_data (so, screen->sync->handle); - so_method(so, screen->eng3d, NV34TCL_DMA_TEXTURE0, 2); - so_data (so, chan->vram->handle); - so_data (so, chan->gart->handle); - so_method(so, screen->eng3d, NV34TCL_DMA_COLOR1, 1); - so_data (so, chan->vram->handle); - so_method(so, screen->eng3d, NV34TCL_DMA_COLOR0, 2); - so_data (so, chan->vram->handle); - so_data (so, chan->vram->handle); - so_method(so, screen->eng3d, NV34TCL_DMA_VTXBUF0, 2); - so_data (so, chan->vram->handle); - so_data (so, chan->gart->handle); - - so_method(so, screen->eng3d, NV34TCL_DMA_FENCE, 2); - so_data (so, 0); - so_data (so, screen->query->handle); - - so_method(so, screen->eng3d, NV34TCL_DMA_IN_MEMORY7, 2); - so_data (so, chan->vram->handle); - so_data (so, chan->vram->handle); + /* note that we just started using the channel, so we must have space in the pushbuffer */ + OUT_RING(chan, RING_3D(NV34TCL_DMA_NOTIFY, 1)); + OUT_RING(chan, screen->sync->handle); + OUT_RING(chan, RING_3D(NV34TCL_DMA_TEXTURE0, 2)); + OUT_RING(chan, chan->vram->handle); + OUT_RING(chan, chan->gart->handle); + OUT_RING(chan, RING_3D(NV34TCL_DMA_COLOR1, 1)); + OUT_RING(chan, chan->vram->handle); + OUT_RING(chan, RING_3D(NV34TCL_DMA_COLOR0, 2)); + OUT_RING(chan, chan->vram->handle); + OUT_RING(chan, chan->vram->handle); + OUT_RING(chan, RING_3D(NV34TCL_DMA_VTXBUF0, 2)); + OUT_RING(chan, chan->vram->handle); + OUT_RING(chan, chan->gart->handle); + + OUT_RING(chan, RING_3D(NV34TCL_DMA_FENCE, 2)); + OUT_RING(chan, 0); + OUT_RING(chan, screen->query->handle); + + OUT_RING(chan, RING_3D(NV34TCL_DMA_IN_MEMORY7, 2)); + OUT_RING(chan, chan->vram->handle); + OUT_RING(chan, chan->vram->handle); if(!screen->is_nv4x) - nv30_screen_init(screen, so); + nv30_screen_init(screen); else - nv40_screen_init(screen, so); - - so_emit(chan, so); - so_ref(NULL, &so); - nouveau_pushbuf_flush(chan, 0); + nv40_screen_init(screen); return pscreen; } diff --git a/src/gallium/drivers/nvfx/nvfx_screen.h b/src/gallium/drivers/nvfx/nvfx_screen.h index baa848c47a..127d8919af 100644 --- a/src/gallium/drivers/nvfx/nvfx_screen.h +++ b/src/gallium/drivers/nvfx/nvfx_screen.h @@ -1,8 +1,10 @@ #ifndef __NVFX_SCREEN_H__ #define __NVFX_SCREEN_H__ +#include "util/u_double_list.h" #include "nouveau/nouveau_screen.h" #include "nv04_surface_2d.h" +#include "nvfx_context.h" struct nvfx_screen { struct nouveau_screen base; @@ -12,7 +14,9 @@ struct nvfx_screen { struct nvfx_context *cur_ctx; unsigned is_nv4x; /* either 0 or ~0 */ - int vertex_buffer_flags; + boolean force_swtnl; + unsigned vertex_buffer_reloc_flags; + unsigned index_buffer_reloc_flags; /* HW graphics objects */ struct nv04_surface_2d *eng2d; @@ -22,13 +26,11 @@ struct nvfx_screen { /* Query object resources */ struct nouveau_notifier *query; struct nouveau_resource *query_heap; + struct list_head query_list; /* Vtxprog resources */ struct nouveau_resource *vp_exec_heap; struct nouveau_resource *vp_data_heap; - - /* Current 3D state of channel */ - struct nouveau_stateobj *state[NVFX_STATE_MAX]; }; static INLINE struct nvfx_screen * diff --git a/src/gallium/drivers/nvfx/nvfx_shader.h b/src/gallium/drivers/nvfx/nvfx_shader.h index 0b2f044f7f..50830b3916 100644 --- a/src/gallium/drivers/nvfx/nvfx_shader.h +++ b/src/gallium/drivers/nvfx/nvfx_shader.h @@ -209,7 +209,7 @@ #define NVFX_FP_OP_OPCODE_RFL_NV30 0x36 /* NV40 only fragment program opcodes */ -#define NVFX_FP_OP_OPCODE_TXL_NV40 0x31 +#define NVFX_FP_OP_OPCODE_TXL_NV40 0x2F /* The use of these instructions appears to be indicated by bit 31 of DWORD 2.*/ #define NV40_FP_OP_BRA_OPCODE_BRK 0x0 #define NV40_FP_OP_BRA_OPCODE_CAL 0x1 diff --git a/src/gallium/drivers/nvfx/nvfx_state.c b/src/gallium/drivers/nvfx/nvfx_state.c index ecaa0dcb16..315de492da 100644 --- a/src/gallium/drivers/nvfx/nvfx_state.c +++ b/src/gallium/drivers/nvfx/nvfx_state.c @@ -15,32 +15,31 @@ nvfx_blend_state_create(struct pipe_context *pipe, const struct pipe_blend_state *cso) { struct nvfx_context *nvfx = nvfx_context(pipe); - struct nouveau_grobj *eng3d = nvfx->screen->eng3d; struct nvfx_blend_state *bso = CALLOC(1, sizeof(*bso)); - struct nouveau_stateobj *so = so_new(5, 8, 0); + struct nouveau_statebuf_builder sb = sb_init(bso->sb); if (cso->rt[0].blend_enable) { - so_method(so, eng3d, NV34TCL_BLEND_FUNC_ENABLE, 3); - so_data (so, 1); - so_data (so, (nvgl_blend_func(cso->rt[0].alpha_src_factor) << 16) | + sb_method(sb, NV34TCL_BLEND_FUNC_ENABLE, 3); + sb_data(sb, 1); + sb_data(sb, (nvgl_blend_func(cso->rt[0].alpha_src_factor) << 16) | nvgl_blend_func(cso->rt[0].rgb_src_factor)); - so_data (so, nvgl_blend_func(cso->rt[0].alpha_dst_factor) << 16 | + sb_data(sb, nvgl_blend_func(cso->rt[0].alpha_dst_factor) << 16 | nvgl_blend_func(cso->rt[0].rgb_dst_factor)); if(nvfx->screen->base.device->chipset < 0x40) { - so_method(so, eng3d, NV34TCL_BLEND_EQUATION, 1); - so_data (so, nvgl_blend_eqn(cso->rt[0].rgb_func)); + sb_method(sb, NV34TCL_BLEND_EQUATION, 1); + sb_data(sb, nvgl_blend_eqn(cso->rt[0].rgb_func)); } else { - so_method(so, eng3d, NV40TCL_BLEND_EQUATION, 1); - so_data (so, nvgl_blend_eqn(cso->rt[0].alpha_func) << 16 | + sb_method(sb, NV40TCL_BLEND_EQUATION, 1); + sb_data(sb, nvgl_blend_eqn(cso->rt[0].alpha_func) << 16 | nvgl_blend_eqn(cso->rt[0].rgb_func)); } } else { - so_method(so, eng3d, NV34TCL_BLEND_FUNC_ENABLE, 1); - so_data (so, 0); + sb_method(sb, NV34TCL_BLEND_FUNC_ENABLE, 1); + sb_data(sb, 0); } - so_method(so, eng3d, NV34TCL_COLOR_MASK, 1); - so_data (so, (((cso->rt[0].colormask & PIPE_MASK_A) ? (0x01 << 24) : 0) | + sb_method(sb, NV34TCL_COLOR_MASK, 1); + sb_data(sb, (((cso->rt[0].colormask & PIPE_MASK_A) ? (0x01 << 24) : 0) | ((cso->rt[0].colormask & PIPE_MASK_R) ? (0x01 << 16) : 0) | ((cso->rt[0].colormask & PIPE_MASK_G) ? (0x01 << 8) : 0) | ((cso->rt[0].colormask & PIPE_MASK_B) ? (0x01 << 0) : 0))); @@ -48,19 +47,18 @@ nvfx_blend_state_create(struct pipe_context *pipe, /* TODO: add NV40 MRT color mask */ if (cso->logicop_enable) { - so_method(so, eng3d, NV34TCL_COLOR_LOGIC_OP_ENABLE, 2); - so_data (so, 1); - so_data (so, nvgl_logicop_func(cso->logicop_func)); + sb_method(sb, NV34TCL_COLOR_LOGIC_OP_ENABLE, 2); + sb_data(sb, 1); + sb_data(sb, nvgl_logicop_func(cso->logicop_func)); } else { - so_method(so, eng3d, NV34TCL_COLOR_LOGIC_OP_ENABLE, 1); - so_data (so, 0); + sb_method(sb, NV34TCL_COLOR_LOGIC_OP_ENABLE, 1); + sb_data(sb, 0); } - so_method(so, eng3d, NV34TCL_DITHER_ENABLE, 1); - so_data (so, cso->dither ? 1 : 0); + sb_method(sb, NV34TCL_DITHER_ENABLE, 1); + sb_data(sb, cso->dither ? 1 : 0); - so_ref(so, &bso->so); - so_ref(NULL, &so); + bso->sb_len = sb_len(sb, bso->sb); bso->pipe = *cso; return (void *)bso; } @@ -79,7 +77,6 @@ nvfx_blend_state_delete(struct pipe_context *pipe, void *hwcso) { struct nvfx_blend_state *bso = hwcso; - so_ref(NULL, &bso->so); FREE(bso); } @@ -163,7 +160,7 @@ nvfx_set_fragment_sampler_views(struct pipe_context *pipe, static struct pipe_sampler_view * nvfx_create_sampler_view(struct pipe_context *pipe, - struct pipe_texture *texture, + struct pipe_resource *texture, const struct pipe_sampler_view *templ) { struct pipe_sampler_view *view = CALLOC_STRUCT(pipe_sampler_view); @@ -172,7 +169,7 @@ nvfx_create_sampler_view(struct pipe_context *pipe, *view = *templ; view->reference.count = 1; view->texture = NULL; - pipe_texture_reference(&view->texture, texture); + pipe_resource_reference(&view->texture, texture); view->context = pipe; } @@ -184,7 +181,7 @@ static void nvfx_sampler_view_destroy(struct pipe_context *pipe, struct pipe_sampler_view *view) { - pipe_texture_reference(&view->texture, NULL); + pipe_resource_reference(&view->texture, NULL); FREE(view); } @@ -192,99 +189,99 @@ static void * nvfx_rasterizer_state_create(struct pipe_context *pipe, const struct pipe_rasterizer_state *cso) { - struct nvfx_context *nvfx = nvfx_context(pipe); struct nvfx_rasterizer_state *rsso = CALLOC(1, sizeof(*rsso)); - struct nouveau_stateobj *so = so_new(9, 19, 0); - struct nouveau_grobj *eng3d = nvfx->screen->eng3d; + struct nouveau_statebuf_builder sb = sb_init(rsso->sb); /*XXX: ignored: - * light_twoside * point_smooth -nohw * multisample */ - so_method(so, eng3d, NV34TCL_SHADE_MODEL, 1); - so_data (so, cso->flatshade ? NV34TCL_SHADE_MODEL_FLAT : + sb_method(sb, NV34TCL_SHADE_MODEL, 1); + sb_data(sb, cso->flatshade ? NV34TCL_SHADE_MODEL_FLAT : NV34TCL_SHADE_MODEL_SMOOTH); - so_method(so, eng3d, NV34TCL_LINE_WIDTH, 2); - so_data (so, (unsigned char)(cso->line_width * 8.0) & 0xff); - so_data (so, cso->line_smooth ? 1 : 0); - so_method(so, eng3d, NV34TCL_LINE_STIPPLE_ENABLE, 2); - so_data (so, cso->line_stipple_enable ? 1 : 0); - so_data (so, (cso->line_stipple_pattern << 16) | + sb_method(sb, NV34TCL_VERTEX_TWO_SIDE_ENABLE, 1); + sb_data(sb, cso->light_twoside); + + sb_method(sb, NV34TCL_LINE_WIDTH, 2); + sb_data(sb, (unsigned char)(cso->line_width * 8.0) & 0xff); + sb_data(sb, cso->line_smooth ? 1 : 0); + sb_method(sb, NV34TCL_LINE_STIPPLE_ENABLE, 2); + sb_data(sb, cso->line_stipple_enable ? 1 : 0); + sb_data(sb, (cso->line_stipple_pattern << 16) | cso->line_stipple_factor); - so_method(so, eng3d, NV34TCL_POINT_SIZE, 1); - so_data (so, fui(cso->point_size)); + sb_method(sb, NV34TCL_POINT_SIZE, 1); + sb_data(sb, fui(cso->point_size)); - so_method(so, eng3d, NV34TCL_POLYGON_MODE_FRONT, 6); + sb_method(sb, NV34TCL_POLYGON_MODE_FRONT, 6); if (cso->front_winding == PIPE_WINDING_CCW) { - so_data(so, nvgl_polygon_mode(cso->fill_ccw)); - so_data(so, nvgl_polygon_mode(cso->fill_cw)); + sb_data(sb, nvgl_polygon_mode(cso->fill_ccw)); + sb_data(sb, nvgl_polygon_mode(cso->fill_cw)); switch (cso->cull_mode) { case PIPE_WINDING_CCW: - so_data(so, NV34TCL_CULL_FACE_FRONT); + sb_data(sb, NV34TCL_CULL_FACE_FRONT); break; case PIPE_WINDING_CW: - so_data(so, NV34TCL_CULL_FACE_BACK); + sb_data(sb, NV34TCL_CULL_FACE_BACK); break; case PIPE_WINDING_BOTH: - so_data(so, NV34TCL_CULL_FACE_FRONT_AND_BACK); + sb_data(sb, NV34TCL_CULL_FACE_FRONT_AND_BACK); break; default: - so_data(so, NV34TCL_CULL_FACE_BACK); + sb_data(sb, NV34TCL_CULL_FACE_BACK); break; } - so_data(so, NV34TCL_FRONT_FACE_CCW); + sb_data(sb, NV34TCL_FRONT_FACE_CCW); } else { - so_data(so, nvgl_polygon_mode(cso->fill_cw)); - so_data(so, nvgl_polygon_mode(cso->fill_ccw)); + sb_data(sb, nvgl_polygon_mode(cso->fill_cw)); + sb_data(sb, nvgl_polygon_mode(cso->fill_ccw)); switch (cso->cull_mode) { case PIPE_WINDING_CCW: - so_data(so, NV34TCL_CULL_FACE_BACK); + sb_data(sb, NV34TCL_CULL_FACE_BACK); break; case PIPE_WINDING_CW: - so_data(so, NV34TCL_CULL_FACE_FRONT); + sb_data(sb, NV34TCL_CULL_FACE_FRONT); break; case PIPE_WINDING_BOTH: - so_data(so, NV34TCL_CULL_FACE_FRONT_AND_BACK); + sb_data(sb, NV34TCL_CULL_FACE_FRONT_AND_BACK); break; default: - so_data(so, NV34TCL_CULL_FACE_BACK); + sb_data(sb, NV34TCL_CULL_FACE_BACK); break; } - so_data(so, NV34TCL_FRONT_FACE_CW); + sb_data(sb, NV34TCL_FRONT_FACE_CW); } - so_data(so, cso->poly_smooth ? 1 : 0); - so_data(so, (cso->cull_mode != PIPE_WINDING_NONE) ? 1 : 0); + sb_data(sb, cso->poly_smooth ? 1 : 0); + sb_data(sb, (cso->cull_mode != PIPE_WINDING_NONE) ? 1 : 0); - so_method(so, eng3d, NV34TCL_POLYGON_STIPPLE_ENABLE, 1); - so_data (so, cso->poly_stipple_enable ? 1 : 0); + sb_method(sb, NV34TCL_POLYGON_STIPPLE_ENABLE, 1); + sb_data(sb, cso->poly_stipple_enable ? 1 : 0); - so_method(so, eng3d, NV34TCL_POLYGON_OFFSET_POINT_ENABLE, 3); + sb_method(sb, NV34TCL_POLYGON_OFFSET_POINT_ENABLE, 3); if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_POINT) || (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_POINT)) - so_data(so, 1); + sb_data(sb, 1); else - so_data(so, 0); + sb_data(sb, 0); if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_LINE) || (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_LINE)) - so_data(so, 1); + sb_data(sb, 1); else - so_data(so, 0); + sb_data(sb, 0); if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_FILL) || (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_FILL)) - so_data(so, 1); + sb_data(sb, 1); else - so_data(so, 0); + sb_data(sb, 0); if (cso->offset_cw || cso->offset_ccw) { - so_method(so, eng3d, NV34TCL_POLYGON_OFFSET_FACTOR, 2); - so_data (so, fui(cso->offset_scale)); - so_data (so, fui(cso->offset_units * 2)); + sb_method(sb, NV34TCL_POLYGON_OFFSET_FACTOR, 2); + sb_data(sb, fui(cso->offset_scale)); + sb_data(sb, fui(cso->offset_units * 2)); } - so_method(so, eng3d, NV34TCL_POINT_SPRITE, 1); + sb_method(sb, NV34TCL_POINT_SPRITE, 1); if (cso->point_quad_rasterization) { unsigned psctl = (1 << 0), i; @@ -293,14 +290,13 @@ nvfx_rasterizer_state_create(struct pipe_context *pipe, psctl |= (1 << (8 + i)); } - so_data(so, psctl); + sb_data(sb, psctl); } else { - so_data(so, 0); + sb_data(sb, 0); } - so_ref(so, &rsso->so); - so_ref(NULL, &so); rsso->pipe = *cso; + rsso->sb_len = sb_len(sb, rsso->sb); return (void *)rsso; } @@ -309,6 +305,23 @@ nvfx_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) { struct nvfx_context *nvfx = nvfx_context(pipe); + if(nvfx->rasterizer && hwcso) + { + if(!nvfx->rasterizer || ((struct nvfx_rasterizer_state*)hwcso)->pipe.scissor + != nvfx->rasterizer->pipe.scissor) + { + nvfx->dirty |= NVFX_NEW_SCISSOR; + nvfx->draw_dirty |= NVFX_NEW_SCISSOR; + } + + if(((struct nvfx_rasterizer_state*)hwcso)->pipe.poly_stipple_enable + != nvfx->rasterizer->pipe.poly_stipple_enable) + { + nvfx->dirty |= NVFX_NEW_STIPPLE; + nvfx->draw_dirty |= NVFX_NEW_STIPPLE; + } + } + nvfx->rasterizer = hwcso; nvfx->dirty |= NVFX_NEW_RAST; nvfx->draw_dirty |= NVFX_NEW_RAST; @@ -319,7 +332,6 @@ nvfx_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) { struct nvfx_rasterizer_state *rsso = hwcso; - so_ref(NULL, &rsso->so); FREE(rsso); } @@ -327,54 +339,51 @@ static void * nvfx_depth_stencil_alpha_state_create(struct pipe_context *pipe, const struct pipe_depth_stencil_alpha_state *cso) { - struct nvfx_context *nvfx = nvfx_context(pipe); struct nvfx_zsa_state *zsaso = CALLOC(1, sizeof(*zsaso)); - struct nouveau_stateobj *so = so_new(6, 20, 0); - struct nouveau_grobj *eng3d = nvfx->screen->eng3d; + struct nouveau_statebuf_builder sb = sb_init(zsaso->sb); - so_method(so, eng3d, NV34TCL_DEPTH_FUNC, 3); - so_data (so, nvgl_comparison_op(cso->depth.func)); - so_data (so, cso->depth.writemask ? 1 : 0); - so_data (so, cso->depth.enabled ? 1 : 0); + sb_method(sb, NV34TCL_DEPTH_FUNC, 3); + sb_data (sb, nvgl_comparison_op(cso->depth.func)); + sb_data (sb, cso->depth.writemask ? 1 : 0); + sb_data (sb, cso->depth.enabled ? 1 : 0); - so_method(so, eng3d, NV34TCL_ALPHA_FUNC_ENABLE, 3); - so_data (so, cso->alpha.enabled ? 1 : 0); - so_data (so, nvgl_comparison_op(cso->alpha.func)); - so_data (so, float_to_ubyte(cso->alpha.ref_value)); + sb_method(sb, NV34TCL_ALPHA_FUNC_ENABLE, 3); + sb_data (sb, cso->alpha.enabled ? 1 : 0); + sb_data (sb, nvgl_comparison_op(cso->alpha.func)); + sb_data (sb, float_to_ubyte(cso->alpha.ref_value)); if (cso->stencil[0].enabled) { - so_method(so, eng3d, NV34TCL_STENCIL_FRONT_ENABLE, 3); - so_data (so, cso->stencil[0].enabled ? 1 : 0); - so_data (so, cso->stencil[0].writemask); - so_data (so, nvgl_comparison_op(cso->stencil[0].func)); - so_method(so, eng3d, NV34TCL_STENCIL_FRONT_FUNC_MASK, 4); - so_data (so, cso->stencil[0].valuemask); - so_data (so, nvgl_stencil_op(cso->stencil[0].fail_op)); - so_data (so, nvgl_stencil_op(cso->stencil[0].zfail_op)); - so_data (so, nvgl_stencil_op(cso->stencil[0].zpass_op)); + sb_method(sb, NV34TCL_STENCIL_FRONT_ENABLE, 3); + sb_data (sb, cso->stencil[0].enabled ? 1 : 0); + sb_data (sb, cso->stencil[0].writemask); + sb_data (sb, nvgl_comparison_op(cso->stencil[0].func)); + sb_method(sb, NV34TCL_STENCIL_FRONT_FUNC_MASK, 4); + sb_data (sb, cso->stencil[0].valuemask); + sb_data (sb, nvgl_stencil_op(cso->stencil[0].fail_op)); + sb_data (sb, nvgl_stencil_op(cso->stencil[0].zfail_op)); + sb_data (sb, nvgl_stencil_op(cso->stencil[0].zpass_op)); } else { - so_method(so, eng3d, NV34TCL_STENCIL_FRONT_ENABLE, 1); - so_data (so, 0); + sb_method(sb, NV34TCL_STENCIL_FRONT_ENABLE, 1); + sb_data (sb, 0); } if (cso->stencil[1].enabled) { - so_method(so, eng3d, NV34TCL_STENCIL_BACK_ENABLE, 3); - so_data (so, cso->stencil[1].enabled ? 1 : 0); - so_data (so, cso->stencil[1].writemask); - so_data (so, nvgl_comparison_op(cso->stencil[1].func)); - so_method(so, eng3d, NV34TCL_STENCIL_BACK_FUNC_MASK, 4); - so_data (so, cso->stencil[1].valuemask); - so_data (so, nvgl_stencil_op(cso->stencil[1].fail_op)); - so_data (so, nvgl_stencil_op(cso->stencil[1].zfail_op)); - so_data (so, nvgl_stencil_op(cso->stencil[1].zpass_op)); + sb_method(sb, NV34TCL_STENCIL_BACK_ENABLE, 3); + sb_data (sb, cso->stencil[1].enabled ? 1 : 0); + sb_data (sb, cso->stencil[1].writemask); + sb_data (sb, nvgl_comparison_op(cso->stencil[1].func)); + sb_method(sb, NV34TCL_STENCIL_BACK_FUNC_MASK, 4); + sb_data (sb, cso->stencil[1].valuemask); + sb_data (sb, nvgl_stencil_op(cso->stencil[1].fail_op)); + sb_data (sb, nvgl_stencil_op(cso->stencil[1].zfail_op)); + sb_data (sb, nvgl_stencil_op(cso->stencil[1].zpass_op)); } else { - so_method(so, eng3d, NV34TCL_STENCIL_BACK_ENABLE, 1); - so_data (so, 0); + sb_method(sb, NV34TCL_STENCIL_BACK_ENABLE, 1); + sb_data (sb, 0); } - so_ref(so, &zsaso->so); - so_ref(NULL, &so); zsaso->pipe = *cso; + zsaso->sb_len = sb_len(sb, zsaso->sb); return (void *)zsaso; } @@ -392,7 +401,6 @@ nvfx_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso) { struct nvfx_zsa_state *zsaso = hwcso; - so_ref(NULL, &zsaso->so); FREE(zsaso); } @@ -499,18 +507,18 @@ nvfx_set_clip_state(struct pipe_context *pipe, static void nvfx_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - struct pipe_buffer *buf ) + struct pipe_resource *buf ) { struct nvfx_context *nvfx = nvfx_context(pipe); nvfx->constbuf[shader] = buf; - nvfx->constbuf_nr[shader] = buf->size / (4 * sizeof(float)); + nvfx->constbuf_nr[shader] = buf->width0 / (4 * sizeof(float)); if (shader == PIPE_SHADER_VERTEX) { - nvfx->dirty |= NVFX_NEW_VERTPROG; + nvfx->dirty |= NVFX_NEW_VERTCONST; } else if (shader == PIPE_SHADER_FRAGMENT) { - nvfx->dirty |= NVFX_NEW_FRAGPROG; + nvfx->dirty |= NVFX_NEW_FRAGCONST; } } diff --git a/src/gallium/drivers/nvfx/nvfx_state.h b/src/gallium/drivers/nvfx/nvfx_state.h index e585246879..9ceb2577ec 100644 --- a/src/gallium/drivers/nvfx/nvfx_state.h +++ b/src/gallium/drivers/nvfx/nvfx_state.h @@ -3,6 +3,7 @@ #include "pipe/p_state.h" #include "tgsi/tgsi_scan.h" +#include "nouveau/nouveau_statebuf.h" struct nvfx_vertex_program_exec { uint32_t data[4]; @@ -38,7 +39,6 @@ struct nvfx_vertex_program { uint32_t ir; uint32_t or; uint32_t clip_ctrl; - struct nouveau_stateobj *so; }; struct nvfx_fragment_program_data { @@ -46,6 +46,12 @@ struct nvfx_fragment_program_data { unsigned index; }; +struct nvfx_fragment_program_bo { + struct nvfx_fragment_program_bo* next; + struct nouveau_bo* bo; + char insn[] __attribute__((aligned(16))); +}; + struct nvfx_fragment_program { struct pipe_shader_state pipe; struct tgsi_shader_info info; @@ -59,25 +65,13 @@ struct nvfx_fragment_program { struct nvfx_fragment_program_data *consts; unsigned nr_consts; - struct pipe_buffer *buffer; - uint32_t fp_control; - struct nouveau_stateobj *so; -}; -#define NVFX_MAX_TEXTURE_LEVELS 16 - -struct nvfx_miptree { - struct pipe_texture base; - struct nouveau_bo *bo; - - struct pipe_buffer *buffer; - uint total_size; - - struct { - uint pitch; - uint *image_offset; - } level[NVFX_MAX_TEXTURE_LEVELS]; + unsigned bo_prog_idx; + unsigned prog_size; + unsigned progs_per_bo; + struct nvfx_fragment_program_bo* fpbo; }; + #endif diff --git a/src/gallium/drivers/nvfx/nvfx_state_blend.c b/src/gallium/drivers/nvfx/nvfx_state_blend.c index 03b6ef8117..fe34e98364 100644 --- a/src/gallium/drivers/nvfx/nvfx_state_blend.c +++ b/src/gallium/drivers/nvfx/nvfx_state_blend.c @@ -1,41 +1,22 @@ #include "nvfx_context.h" -static boolean +void nvfx_state_blend_validate(struct nvfx_context *nvfx) { - so_ref(nvfx->blend->so, &nvfx->state.hw[NVFX_STATE_BLEND]); - return TRUE; + struct nouveau_channel* chan = nvfx->screen->base.channel; + sb_emit(chan, nvfx->blend->sb, nvfx->blend->sb_len); } -struct nvfx_state_entry nvfx_state_blend = { - .validate = nvfx_state_blend_validate, - .dirty = { - .pipe = NVFX_NEW_BLEND, - .hw = NVFX_STATE_BLEND - } -}; - -static boolean +void nvfx_state_blend_colour_validate(struct nvfx_context *nvfx) { - struct nouveau_stateobj *so = so_new(1, 1, 0); + struct nouveau_channel* chan = nvfx->screen->base.channel; struct pipe_blend_color *bcol = &nvfx->blend_colour; - so_method(so, nvfx->screen->eng3d, NV34TCL_BLEND_COLOR, 1); - so_data (so, ((float_to_ubyte(bcol->color[3]) << 24) | + WAIT_RING(chan, 2); + OUT_RING(chan, RING_3D(NV34TCL_BLEND_COLOR, 1)); + OUT_RING(chan, ((float_to_ubyte(bcol->color[3]) << 24) | (float_to_ubyte(bcol->color[0]) << 16) | (float_to_ubyte(bcol->color[1]) << 8) | (float_to_ubyte(bcol->color[2]) << 0))); - - so_ref(so, &nvfx->state.hw[NVFX_STATE_BCOL]); - so_ref(NULL, &so); - return TRUE; } - -struct nvfx_state_entry nvfx_state_blend_colour = { - .validate = nvfx_state_blend_colour_validate, - .dirty = { - .pipe = NVFX_NEW_BCOL, - .hw = NVFX_STATE_BCOL - } -}; diff --git a/src/gallium/drivers/nvfx/nvfx_state_emit.c b/src/gallium/drivers/nvfx/nvfx_state_emit.c index 72537388ea..4137849bf0 100644 --- a/src/gallium/drivers/nvfx/nvfx_state_emit.c +++ b/src/gallium/drivers/nvfx/nvfx_state_emit.c @@ -2,105 +2,113 @@ #include "nvfx_state.h" #include "draw/draw_context.h" -#define RENDER_STATES(name, vbo) \ -static struct nvfx_state_entry *name##render_states[] = { \ - &nvfx_state_framebuffer, \ - &nvfx_state_rasterizer, \ - &nvfx_state_scissor, \ - &nvfx_state_stipple, \ - &nvfx_state_fragprog, \ - &nvfx_state_fragtex, \ - &nvfx_state_vertprog, \ - &nvfx_state_blend, \ - &nvfx_state_blend_colour, \ - &nvfx_state_zsa, \ - &nvfx_state_sr, \ - &nvfx_state_viewport, \ - &nvfx_state_##vbo, \ - NULL \ -} - -RENDER_STATES(, vbo); -RENDER_STATES(swtnl_, vtxfmt); - -static void -nvfx_state_do_validate(struct nvfx_context *nvfx, - struct nvfx_state_entry **states) +static boolean +nvfx_state_validate_common(struct nvfx_context *nvfx) { - while (*states) { - struct nvfx_state_entry *e = *states; + struct nouveau_channel* chan = nvfx->screen->base.channel; + unsigned dirty = nvfx->dirty; + + if(nvfx != nvfx->screen->cur_ctx) + dirty = ~0; + + if(nvfx->render_mode == HW) + { + if(dirty & (NVFX_NEW_VERTPROG | NVFX_NEW_VERTCONST | NVFX_NEW_UCP)) + { + if(!nvfx_vertprog_validate(nvfx)) + return FALSE; + } - if (nvfx->dirty & e->dirty.pipe) { - if (e->validate(nvfx)) - nvfx->state.dirty |= (1ULL << e->dirty.hw); + if(dirty & (NVFX_NEW_ARRAYS)) + { + if(!nvfx_vbo_validate(nvfx)) + return FALSE; } + } + else + { + /* TODO: this looks a bit misdesigned */ + if(dirty & (NVFX_NEW_VERTPROG | NVFX_NEW_UCP)) + nvfx_vertprog_validate(nvfx); + + if(dirty & (NVFX_NEW_ARRAYS | NVFX_NEW_FRAGPROG)) + nvfx_vtxfmt_validate(nvfx); + } + + if(dirty & NVFX_NEW_FB) + nvfx_state_framebuffer_validate(nvfx); + + if(dirty & NVFX_NEW_RAST) + sb_emit(chan, nvfx->rasterizer->sb, nvfx->rasterizer->sb_len); + + if(dirty & NVFX_NEW_SCISSOR) + nvfx_state_scissor_validate(nvfx); + + if(dirty & NVFX_NEW_STIPPLE) + nvfx_state_stipple_validate(nvfx); + + if(dirty & (NVFX_NEW_FRAGPROG | NVFX_NEW_FRAGCONST)) + nvfx_fragprog_validate(nvfx); + + if(dirty & NVFX_NEW_SAMPLER) + nvfx_fragtex_validate(nvfx); + + if(dirty & NVFX_NEW_BLEND) + sb_emit(chan, nvfx->blend->sb, nvfx->blend->sb_len); + + if(dirty & NVFX_NEW_BCOL) + nvfx_state_blend_colour_validate(nvfx); + + if(dirty & NVFX_NEW_ZSA) + sb_emit(chan, nvfx->zsa->sb, nvfx->zsa->sb_len); + + if(dirty & NVFX_NEW_SR) + nvfx_state_sr_validate(nvfx); + +/* Having this depend on FB looks wrong, but it seems + necessary to make this work on nv3x + TODO: find the right fix +*/ + if(dirty & (NVFX_NEW_VIEWPORT | NVFX_NEW_FB)) + nvfx_state_viewport_validate(nvfx); - states++; + /* TODO: could nv30 need this or something similar too? */ + if((dirty & (NVFX_NEW_FRAGPROG | NVFX_NEW_SAMPLER)) && nvfx->is_nv4x) { + WAIT_RING(chan, 4); + OUT_RING(chan, RING_3D(NV40TCL_TEX_CACHE_CTL, 1)); + OUT_RING(chan, 2); + OUT_RING(chan, RING_3D(NV40TCL_TEX_CACHE_CTL, 1)); + OUT_RING(chan, 1); } nvfx->dirty = 0; + return TRUE; } void nvfx_state_emit(struct nvfx_context *nvfx) { - struct nvfx_state *state = &nvfx->state; - struct nvfx_screen *screen = nvfx->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *eng3d = screen->eng3d; - unsigned i; - uint64_t states; - - /* XXX: race conditions - */ - if (nvfx != screen->cur_ctx) { - for (i = 0; i < NVFX_STATE_MAX; i++) { - if (state->hw[i] && screen->state[i] != state->hw[i]) - state->dirty |= (1ULL << i); - } - - screen->cur_ctx = nvfx; - } - - for (i = 0, states = state->dirty; states; i++) { - if (!(states & (1ULL << i))) - continue; - so_ref (state->hw[i], &nvfx->screen->state[i]); - if (state->hw[i]) - so_emit(chan, nvfx->screen->state[i]); - states &= ~(1ULL << i); - } - - /* TODO: could nv30 need this or something similar too? */ - if(nvfx->is_nv4x) { - if (state->dirty & ((1ULL << NVFX_STATE_FRAGPROG) | - (1ULL << NVFX_STATE_FRAGTEX0))) { - BEGIN_RING(chan, eng3d, NV40TCL_TEX_CACHE_CTL, 1); - OUT_RING (chan, 2); - BEGIN_RING(chan, eng3d, NV40TCL_TEX_CACHE_CTL, 1); - OUT_RING (chan, 1); - } - } - state->dirty = 0; + struct nouveau_channel* chan = nvfx->screen->base.channel; + /* we need to ensure there is enough space to output relocations in one go */ + unsigned max_relocs = 0 + + 16 /* vertex buffers, incl. dma flag */ + + 2 /* index buffer plus format+dma flag */ + + 2 * 5 /* 4 cbufs + zsbuf, plus dma objects */ + + 2 * 16 /* fragment textures plus format+dma flag */ + + 2 * 4 /* vertex textures plus format+dma flag */ + + 1 /* fragprog incl dma flag */ + ; + MARK_RING(chan, max_relocs * 2, max_relocs * 2); + nvfx_state_relocate(nvfx); } void -nvfx_state_flush_notify(struct nouveau_channel *chan) +nvfx_state_relocate(struct nvfx_context *nvfx) { - struct nvfx_context *nvfx = chan->user_private; - struct nvfx_state *state = &nvfx->state; - unsigned i, samplers; - - so_emit_reloc_markers(chan, state->hw[NVFX_STATE_FB]); - for (i = 0, samplers = state->fp_samplers; i < 16 && samplers; i++) { - if (!(samplers & (1 << i))) - continue; - so_emit_reloc_markers(chan, - state->hw[NVFX_STATE_FRAGTEX0+i]); - samplers &= ~(1ULL << i); - } - so_emit_reloc_markers(chan, state->hw[NVFX_STATE_FRAGPROG]); - if (state->hw[NVFX_STATE_VTXBUF] && nvfx->render_mode == HW) - so_emit_reloc_markers(chan, state->hw[NVFX_STATE_VTXBUF]); + nvfx_framebuffer_relocate(nvfx); + nvfx_fragtex_relocate(nvfx); + nvfx_fragprog_relocate(nvfx); + if (nvfx->render_mode == HW) + nvfx_vbo_relocate(nvfx); } boolean @@ -117,16 +125,13 @@ nvfx_state_validate(struct nvfx_context *nvfx) return FALSE; /* Attempt to go to hwtnl again */ - nvfx->pipe.flush(&nvfx->pipe, 0, NULL); nvfx->dirty |= (NVFX_NEW_VIEWPORT | NVFX_NEW_VERTPROG | NVFX_NEW_ARRAYS); nvfx->render_mode = HW; } - nvfx_state_do_validate(nvfx, render_states); - - if (nvfx->fallback_swtnl || nvfx->fallback_swrast) + if(!nvfx_state_validate_common(nvfx)) return FALSE; if (was_sw) @@ -167,12 +172,7 @@ nvfx_state_validate_swtnl(struct nvfx_context *nvfx) draw_set_vertex_elements(draw, nvfx->vtxelt->num_elements, nvfx->vtxelt->pipe); } - nvfx_state_do_validate(nvfx, swtnl_render_states); - - if (nvfx->fallback_swrast) { - NOUVEAU_ERR("swtnl->swrast 0x%08x\n", nvfx->fallback_swrast); - return FALSE; - } + nvfx_state_validate_common(nvfx); nvfx->draw_dirty = 0; return TRUE; diff --git a/src/gallium/drivers/nvfx/nvfx_state_fb.c b/src/gallium/drivers/nvfx/nvfx_state_fb.c index 1923184163..8c215980e2 100644 --- a/src/gallium/drivers/nvfx/nvfx_state_fb.c +++ b/src/gallium/drivers/nvfx/nvfx_state_fb.c @@ -1,26 +1,17 @@ #include "nvfx_context.h" +#include "nvfx_resource.h" #include "nouveau/nouveau_util.h" -static struct pipe_buffer * -nvfx_do_surface_buffer(struct pipe_surface *surface) -{ - struct nvfx_miptree *mt = (struct nvfx_miptree *)surface->texture; - return mt->buffer; -} -#define nvfx_surface_buffer(ps) nouveau_bo(nvfx_do_surface_buffer(ps)) -static boolean +void nvfx_state_framebuffer_validate(struct nvfx_context *nvfx) { struct pipe_framebuffer_state *fb = &nvfx->framebuffer; struct nouveau_channel *chan = nvfx->screen->base.channel; - struct nouveau_grobj *eng3d = nvfx->screen->eng3d; - struct nv04_surface *rt[4], *zeta = NULL; uint32_t rt_enable = 0, rt_format = 0; int i, colour_format = 0, zeta_format = 0; int depth_only = 0; - struct nouveau_stateobj *so = so_new(18, 24, 10); unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM; unsigned w = fb->width; unsigned h = fb->height; @@ -32,14 +23,18 @@ nvfx_state_framebuffer_validate(struct nvfx_context *nvfx) assert(fb->nr_cbufs <= 4); for (i = 0; i < fb->nr_cbufs; i++) { - if (colour_format) { + if (colour_format) assert(colour_format == fb->cbufs[i]->format); - } else { + else colour_format = fb->cbufs[i]->format; - rt_enable |= (NV34TCL_RT_ENABLE_COLOR0 << i); - rt[i] = (struct nv04_surface *)fb->cbufs[i]; - } + + rt_enable |= (NV34TCL_RT_ENABLE_COLOR0 << i); + nvfx->hw_rt[i].bo = nvfx_surface_buffer(fb->cbufs[i]); + nvfx->hw_rt[i].offset = fb->cbufs[i]->offset; + nvfx->hw_rt[i].pitch = ((struct nv04_surface *)fb->cbufs[i])->pitch; } + for(; i < 4; ++i) + nvfx->hw_rt[i].bo = 0; if (rt_enable & (NV34TCL_RT_ENABLE_COLOR1 | NV40TCL_RT_ENABLE_COLOR2 | NV40TCL_RT_ENABLE_COLOR3)) @@ -47,20 +42,24 @@ nvfx_state_framebuffer_validate(struct nvfx_context *nvfx) if (fb->zsbuf) { zeta_format = fb->zsbuf->format; - zeta = (struct nv04_surface *)fb->zsbuf; + nvfx->hw_zeta.bo = nvfx_surface_buffer(fb->zsbuf); + nvfx->hw_zeta.offset = fb->zsbuf->offset; + nvfx->hw_zeta.pitch = ((struct nv04_surface *)fb->zsbuf)->pitch; } + else + nvfx->hw_zeta.bo = 0; if (rt_enable & (NV34TCL_RT_ENABLE_COLOR0 | NV34TCL_RT_ENABLE_COLOR1 | NV40TCL_RT_ENABLE_COLOR2 | NV40TCL_RT_ENABLE_COLOR3)) { /* Render to at least a colour buffer */ - if (!(rt[0]->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { + if (!(fb->cbufs[0]->texture->flags & NVFX_RESOURCE_FLAG_LINEAR)) { assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1))); for (i = 1; i < fb->nr_cbufs; i++) - assert(!(rt[i]->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)); + assert(!(fb->cbufs[i]->texture->flags & NVFX_RESOURCE_FLAG_LINEAR)); rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED | - (log2i(rt[0]->base.width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) | - (log2i(rt[0]->base.height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT); + (log2i(fb->cbufs[0]->width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) | + (log2i(fb->cbufs[0]->height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT); } else rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR; @@ -68,17 +67,17 @@ nvfx_state_framebuffer_validate(struct nvfx_context *nvfx) depth_only = 1; /* Render to depth buffer only */ - if (!(zeta->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { + if (!(fb->zsbuf->texture->_usage & NVFX_RESOURCE_FLAG_LINEAR)) { assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1))); rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED | - (log2i(zeta->base.width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) | - (log2i(zeta->base.height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT); + (log2i(fb->zsbuf->width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) | + (log2i(fb->zsbuf->height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT); } else rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR; } else { - return FALSE; + return; } switch (colour_format) { @@ -114,121 +113,138 @@ nvfx_state_framebuffer_validate(struct nvfx_context *nvfx) if ((!nvfx->is_nv4x) && colour_bits > zeta_bits) { /* TODO: does this limitation really exist? TODO: can it be worked around somehow? */ - return FALSE; + assert(0); } if ((rt_enable & NV34TCL_RT_ENABLE_COLOR0) || ((!nvfx->is_nv4x) && depth_only)) { - struct nv04_surface *rt0 = (depth_only ? zeta : rt[0]); + struct nvfx_render_target *rt0 = (depth_only ? &nvfx->hw_zeta : &nvfx->hw_rt[0]); uint32_t pitch = rt0->pitch; if(!nvfx->is_nv4x) { - if (zeta) { - pitch |= (zeta->pitch << 16); + if (nvfx->hw_zeta.bo) { + pitch |= (nvfx->hw_zeta.pitch << 16); } else { pitch |= (pitch << 16); } } - so_method(so, eng3d, NV34TCL_DMA_COLOR0, 1); - so_reloc (so, nvfx_surface_buffer(&rt0->base), 0, + OUT_RING(chan, RING_3D(NV34TCL_DMA_COLOR0, 1)); + OUT_RELOC(chan, rt0->bo, 0, rt_flags | NOUVEAU_BO_OR, chan->vram->handle, chan->gart->handle); - so_method(so, eng3d, NV34TCL_COLOR0_PITCH, 2); - so_data (so, pitch); - so_reloc (so, nvfx_surface_buffer(&rt[0]->base), - rt0->base.offset, rt_flags | NOUVEAU_BO_LOW, + OUT_RING(chan, RING_3D(NV34TCL_COLOR0_PITCH, 2)); + OUT_RING(chan, pitch); + OUT_RELOC(chan, rt0->bo, + rt0->offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); } if (rt_enable & NV34TCL_RT_ENABLE_COLOR1) { - so_method(so, eng3d, NV34TCL_DMA_COLOR1, 1); - so_reloc (so, nvfx_surface_buffer(&rt[1]->base), 0, + OUT_RING(chan, RING_3D(NV34TCL_DMA_COLOR1, 1)); + OUT_RELOC(chan, nvfx->hw_rt[1].bo, 0, rt_flags | NOUVEAU_BO_OR, chan->vram->handle, chan->gart->handle); - so_method(so, eng3d, NV34TCL_COLOR1_OFFSET, 2); - so_reloc (so, nvfx_surface_buffer(&rt[1]->base), - rt[1]->base.offset, rt_flags | NOUVEAU_BO_LOW, + OUT_RING(chan, RING_3D(NV34TCL_COLOR1_OFFSET, 2)); + OUT_RELOC(chan, nvfx->hw_rt[1].bo, + nvfx->hw_rt[1].offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); - so_data (so, rt[1]->pitch); + OUT_RING(chan, nvfx->hw_rt[1].pitch); } if(nvfx->is_nv4x) { if (rt_enable & NV40TCL_RT_ENABLE_COLOR2) { - so_method(so, eng3d, NV40TCL_DMA_COLOR2, 1); - so_reloc (so, nvfx_surface_buffer(&rt[2]->base), 0, + OUT_RING(chan, RING_3D(NV40TCL_DMA_COLOR2, 1)); + OUT_RELOC(chan, nvfx->hw_rt[2].bo, 0, rt_flags | NOUVEAU_BO_OR, chan->vram->handle, chan->gart->handle); - so_method(so, eng3d, NV40TCL_COLOR2_OFFSET, 1); - so_reloc (so, nvfx_surface_buffer(&rt[2]->base), - rt[2]->base.offset, rt_flags | NOUVEAU_BO_LOW, + OUT_RING(chan, RING_3D(NV40TCL_COLOR2_OFFSET, 1)); + OUT_RELOC(chan, nvfx->hw_rt[2].bo, + nvfx->hw_rt[2].offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); - so_method(so, eng3d, NV40TCL_COLOR2_PITCH, 1); - so_data (so, rt[2]->pitch); + OUT_RING(chan, RING_3D(NV40TCL_COLOR2_PITCH, 1)); + OUT_RING(chan, nvfx->hw_rt[2].pitch); } if (rt_enable & NV40TCL_RT_ENABLE_COLOR3) { - so_method(so, eng3d, NV40TCL_DMA_COLOR3, 1); - so_reloc (so, nvfx_surface_buffer(&rt[3]->base), 0, + OUT_RING(chan, RING_3D(NV40TCL_DMA_COLOR3, 1)); + OUT_RELOC(chan, nvfx->hw_rt[3].bo, 0, rt_flags | NOUVEAU_BO_OR, chan->vram->handle, chan->gart->handle); - so_method(so, eng3d, NV40TCL_COLOR3_OFFSET, 1); - so_reloc (so, nvfx_surface_buffer(&rt[3]->base), - rt[3]->base.offset, rt_flags | NOUVEAU_BO_LOW, + OUT_RING(chan, RING_3D(NV40TCL_COLOR3_OFFSET, 1)); + OUT_RELOC(chan, nvfx->hw_rt[3].bo, + nvfx->hw_rt[3].offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); - so_method(so, eng3d, NV40TCL_COLOR3_PITCH, 1); - so_data (so, rt[3]->pitch); + OUT_RING(chan, RING_3D(NV40TCL_COLOR3_PITCH, 1)); + OUT_RING(chan, nvfx->hw_rt[3].pitch); } } if (zeta_format) { - so_method(so, eng3d, NV34TCL_DMA_ZETA, 1); - so_reloc (so, nvfx_surface_buffer(&zeta->base), 0, + OUT_RING(chan, RING_3D(NV34TCL_DMA_ZETA, 1)); + OUT_RELOC(chan, nvfx->hw_zeta.bo, 0, rt_flags | NOUVEAU_BO_OR, chan->vram->handle, chan->gart->handle); - so_method(so, eng3d, NV34TCL_ZETA_OFFSET, 1); + OUT_RING(chan, RING_3D(NV34TCL_ZETA_OFFSET, 1)); /* TODO: reverse engineer LMA */ - so_reloc (so, nvfx_surface_buffer(&zeta->base), - zeta->base.offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); + OUT_RELOC(chan, nvfx->hw_zeta.bo, + nvfx->hw_zeta.offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); if(nvfx->is_nv4x) { - so_method(so, eng3d, NV40TCL_ZETA_PITCH, 1); - so_data (so, zeta->pitch); + OUT_RING(chan, RING_3D(NV40TCL_ZETA_PITCH, 1)); + OUT_RING(chan, nvfx->hw_zeta.pitch); } } - so_method(so, eng3d, NV34TCL_RT_ENABLE, 1); - so_data (so, rt_enable); - so_method(so, eng3d, NV34TCL_RT_HORIZ, 3); - so_data (so, (w << 16) | 0); - so_data (so, (h << 16) | 0); - so_data (so, rt_format); - so_method(so, eng3d, NV34TCL_VIEWPORT_HORIZ, 2); - so_data (so, (w << 16) | 0); - so_data (so, (h << 16) | 0); - so_method(so, eng3d, NV34TCL_VIEWPORT_CLIP_HORIZ(0), 2); - so_data (so, ((w - 1) << 16) | 0); - so_data (so, ((h - 1) << 16) | 0); - so_method(so, eng3d, 0x1d88, 1); - so_data (so, (1 << 12) | h); + OUT_RING(chan, RING_3D(NV34TCL_RT_ENABLE, 1)); + OUT_RING(chan, rt_enable); + OUT_RING(chan, RING_3D(NV34TCL_RT_HORIZ, 3)); + OUT_RING(chan, (w << 16) | 0); + OUT_RING(chan, (h << 16) | 0); + OUT_RING(chan, rt_format); + OUT_RING(chan, RING_3D(NV34TCL_VIEWPORT_HORIZ, 2)); + OUT_RING(chan, (w << 16) | 0); + OUT_RING(chan, (h << 16) | 0); + OUT_RING(chan, RING_3D(NV34TCL_VIEWPORT_CLIP_HORIZ(0), 2)); + OUT_RING(chan, ((w - 1) << 16) | 0); + OUT_RING(chan, ((h - 1) << 16) | 0); + OUT_RING(chan, RING_3D(0x1d88, 1)); + OUT_RING(chan, (1 << 12) | h); if(!nvfx->is_nv4x) { /* Wonder why this is needed, context should all be set to zero on init */ /* TODO: we can most likely remove this, after putting it in context init */ - so_method(so, eng3d, NV34TCL_VIEWPORT_TX_ORIGIN, 1); - so_data (so, 0); + OUT_RING(chan, RING_3D(NV34TCL_VIEWPORT_TX_ORIGIN, 1)); + OUT_RING(chan, 0); } - - so_ref(so, &nvfx->state.hw[NVFX_STATE_FB]); - so_ref(NULL, &so); - return TRUE; } -struct nvfx_state_entry nvfx_state_framebuffer = { - .validate = nvfx_state_framebuffer_validate, - .dirty = { - .pipe = NVFX_NEW_FB, - .hw = NVFX_STATE_FB +void +nvfx_framebuffer_relocate(struct nvfx_context *nvfx) +{ + struct nouveau_channel *chan = nvfx->screen->base.channel; + unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM; + rt_flags |= NOUVEAU_BO_DUMMY; + MARK_RING(chan, 20, 20); + +#define DO_(var, pfx, name) \ + if(var.bo) { \ + OUT_RELOC(chan, var.bo, RING_3D(pfx##TCL_DMA_##name, 1), rt_flags, 0, 0); \ + OUT_RELOC(chan, var.bo, 0, \ + rt_flags | NOUVEAU_BO_OR, \ + chan->vram->handle, chan->gart->handle); \ + OUT_RELOC(chan, var.bo, RING_3D(pfx##TCL_##name##_OFFSET, 1), rt_flags, 0, 0); \ + OUT_RELOC(chan, var.bo, \ + var.offset, rt_flags | NOUVEAU_BO_LOW, \ + 0, 0); \ } -}; + +#define DO(pfx, num) DO_(nvfx->hw_rt[num], pfx, COLOR##num) + DO(NV34, 0); + DO(NV34, 1); + DO(NV40, 2); + DO(NV40, 3); + + DO_(nvfx->hw_zeta, NV34, ZETA); +} diff --git a/src/gallium/drivers/nvfx/nvfx_state_rasterizer.c b/src/gallium/drivers/nvfx/nvfx_state_rasterizer.c index 0d35ecbf20..7f14ae85d5 100644 --- a/src/gallium/drivers/nvfx/nvfx_state_rasterizer.c +++ b/src/gallium/drivers/nvfx/nvfx_state_rasterizer.c @@ -1,17 +1,9 @@ #include "nvfx_context.h" -static boolean +void nvfx_state_rasterizer_validate(struct nvfx_context *nvfx) { - so_ref(nvfx->rasterizer->so, - &nvfx->state.hw[NVFX_STATE_RAST]); - return TRUE; + struct nouveau_channel* chan = nvfx->screen->base.channel; + sb_emit(chan, nvfx->rasterizer->sb, nvfx->rasterizer->sb_len); } -struct nvfx_state_entry nvfx_state_rasterizer = { - .validate = nvfx_state_rasterizer_validate, - .dirty = { - .pipe = NVFX_NEW_RAST, - .hw = NVFX_STATE_RAST - } -}; diff --git a/src/gallium/drivers/nvfx/nvfx_state_scissor.c b/src/gallium/drivers/nvfx/nvfx_state_scissor.c index 940d8cb5c0..9077266120 100644 --- a/src/gallium/drivers/nvfx/nvfx_state_scissor.c +++ b/src/gallium/drivers/nvfx/nvfx_state_scissor.c @@ -1,36 +1,23 @@ #include "nvfx_context.h" -static boolean +void nvfx_state_scissor_validate(struct nvfx_context *nvfx) { + struct nouveau_channel *chan = nvfx->screen->base.channel; struct pipe_rasterizer_state *rast = &nvfx->rasterizer->pipe; struct pipe_scissor_state *s = &nvfx->scissor; - struct nouveau_stateobj *so; - if (nvfx->state.hw[NVFX_STATE_SCISSOR] && - (rast->scissor == 0 && nvfx->state.scissor_enabled == 0)) - return FALSE; + if ((rast->scissor == 0 && nvfx->state.scissor_enabled == 0)) + return; nvfx->state.scissor_enabled = rast->scissor; - so = so_new(1, 2, 0); - so_method(so, nvfx->screen->eng3d, NV34TCL_SCISSOR_HORIZ, 2); + WAIT_RING(chan, 3); + OUT_RING(chan, RING_3D(NV34TCL_SCISSOR_HORIZ, 2)); if (nvfx->state.scissor_enabled) { - so_data (so, ((s->maxx - s->minx) << 16) | s->minx); - so_data (so, ((s->maxy - s->miny) << 16) | s->miny); + OUT_RING(chan, ((s->maxx - s->minx) << 16) | s->minx); + OUT_RING(chan, ((s->maxy - s->miny) << 16) | s->miny); } else { - so_data (so, 4096 << 16); - so_data (so, 4096 << 16); + OUT_RING(chan, 4096 << 16); + OUT_RING(chan, 4096 << 16); } - - so_ref(so, &nvfx->state.hw[NVFX_STATE_SCISSOR]); - so_ref(NULL, &so); - return TRUE; } - -struct nvfx_state_entry nvfx_state_scissor = { - .validate = nvfx_state_scissor_validate, - .dirty = { - .pipe = NVFX_NEW_SCISSOR | NVFX_NEW_RAST, - .hw = NVFX_STATE_SCISSOR - } -}; diff --git a/src/gallium/drivers/nvfx/nvfx_state_stipple.c b/src/gallium/drivers/nvfx/nvfx_state_stipple.c index 57cd3c936a..4da968f093 100644 --- a/src/gallium/drivers/nvfx/nvfx_state_stipple.c +++ b/src/gallium/drivers/nvfx/nvfx_state_stipple.c @@ -1,40 +1,26 @@ #include "nvfx_context.h" -static boolean +void nvfx_state_stipple_validate(struct nvfx_context *nvfx) { + struct nouveau_channel *chan = nvfx->screen->base.channel; struct pipe_rasterizer_state *rast = &nvfx->rasterizer->pipe; - struct nouveau_grobj *eng3d = nvfx->screen->eng3d; - struct nouveau_stateobj *so; - if (nvfx->state.hw[NVFX_STATE_STIPPLE] && - (rast->poly_stipple_enable == 0 && nvfx->state.stipple_enabled == 0)) - return FALSE; + if ((rast->poly_stipple_enable == 0 && nvfx->state.stipple_enabled == 0)) + return; if (rast->poly_stipple_enable) { unsigned i; - so = so_new(2, 33, 0); - so_method(so, eng3d, NV34TCL_POLYGON_STIPPLE_ENABLE, 1); - so_data (so, 1); - so_method(so, eng3d, NV34TCL_POLYGON_STIPPLE_PATTERN(0), 32); + WAIT_RING(chan, 35); + OUT_RING(chan, RING_3D(NV34TCL_POLYGON_STIPPLE_ENABLE, 1)); + OUT_RING(chan, 1); + OUT_RING(chan, RING_3D(NV34TCL_POLYGON_STIPPLE_PATTERN(0), 32)); for (i = 0; i < 32; i++) - so_data(so, nvfx->stipple[i]); + OUT_RING(chan, nvfx->stipple[i]); } else { - so = so_new(1, 1, 0); - so_method(so, eng3d, NV34TCL_POLYGON_STIPPLE_ENABLE, 1); - so_data (so, 0); + WAIT_RING(chan, 2); + OUT_RING(chan, RING_3D(NV34TCL_POLYGON_STIPPLE_ENABLE, 1)); + OUT_RING(chan, 0); } - - so_ref(so, &nvfx->state.hw[NVFX_STATE_STIPPLE]); - so_ref(NULL, &so); - return TRUE; } - -struct nvfx_state_entry nvfx_state_stipple = { - .validate = nvfx_state_stipple_validate, - .dirty = { - .pipe = NVFX_NEW_STIPPLE | NVFX_NEW_RAST, - .hw = NVFX_STATE_STIPPLE, - } -}; diff --git a/src/gallium/drivers/nvfx/nvfx_state_viewport.c b/src/gallium/drivers/nvfx/nvfx_state_viewport.c index ec730e3a9e..e983b16f32 100644 --- a/src/gallium/drivers/nvfx/nvfx_state_viewport.c +++ b/src/gallium/drivers/nvfx/nvfx_state_viewport.c @@ -1,52 +1,35 @@ #include "nvfx_context.h" -/* Having this depend on FB and RAST looks wrong, but it seems - necessary to make this work on nv3x - TODO: find the right fix -*/ - -static boolean +void nvfx_state_viewport_validate(struct nvfx_context *nvfx) { + struct nouveau_channel *chan = nvfx->screen->base.channel; struct pipe_viewport_state *vpt = &nvfx->viewport; - struct nouveau_stateobj *so; - so = so_new(2, 9, 0); - so_method(so, nvfx->screen->eng3d, - NV34TCL_VIEWPORT_TRANSLATE_X, 8); + WAIT_RING(chan, 11); if(nvfx->render_mode == HW) { - so_data (so, fui(vpt->translate[0])); - so_data (so, fui(vpt->translate[1])); - so_data (so, fui(vpt->translate[2])); - so_data (so, fui(vpt->translate[3])); - so_data (so, fui(vpt->scale[0])); - so_data (so, fui(vpt->scale[1])); - so_data (so, fui(vpt->scale[2])); - so_data (so, fui(vpt->scale[3])); - so_method(so, nvfx->screen->eng3d, 0x1d78, 1); - so_data (so, 1); + OUT_RING(chan, RING_3D(NV34TCL_VIEWPORT_TRANSLATE_X, 8)); + OUT_RINGf(chan, vpt->translate[0]); + OUT_RINGf(chan, vpt->translate[1]); + OUT_RINGf(chan, vpt->translate[2]); + OUT_RINGf(chan, vpt->translate[3]); + OUT_RINGf(chan, vpt->scale[0]); + OUT_RINGf(chan, vpt->scale[1]); + OUT_RINGf(chan, vpt->scale[2]); + OUT_RINGf(chan, vpt->scale[3]); + OUT_RING(chan, RING_3D(0x1d78, 1)); + OUT_RING(chan, 1); } else { - so_data (so, fui(0.0f)); - so_data (so, fui(0.0f)); - so_data (so, fui(0.0f)); - so_data (so, fui(0.0f)); - so_data (so, fui(1.0f)); - so_data (so, fui(1.0f)); - so_data (so, fui(1.0f)); - so_data (so, fui(1.0f)); - so_method(so, nvfx->screen->eng3d, 0x1d78, 1); - so_data (so, nvfx->is_nv4x ? 0x110 : 1); + OUT_RING(chan, RING_3D(NV34TCL_VIEWPORT_TRANSLATE_X, 8)); + OUT_RINGf(chan, 0.0f); + OUT_RINGf(chan, 0.0f); + OUT_RINGf(chan, 0.0f); + OUT_RINGf(chan, 0.0f); + OUT_RINGf(chan, 1.0f); + OUT_RINGf(chan, 1.0f); + OUT_RINGf(chan, 1.0f); + OUT_RINGf(chan, 1.0f); + OUT_RING(chan, RING_3D(0x1d78, 1)); + OUT_RING(chan, nvfx->is_nv4x ? 0x110 : 1); } - - so_ref(so, &nvfx->state.hw[NVFX_STATE_VIEWPORT]); - so_ref(NULL, &so); - return TRUE; } - -struct nvfx_state_entry nvfx_state_viewport = { - .validate = nvfx_state_viewport_validate, - .dirty = { - .pipe = NVFX_NEW_VIEWPORT | NVFX_NEW_FB | NVFX_NEW_RAST, - .hw = NVFX_STATE_VIEWPORT - } -}; diff --git a/src/gallium/drivers/nvfx/nvfx_state_zsa.c b/src/gallium/drivers/nvfx/nvfx_state_zsa.c index c84fd041c1..608605d32b 100644 --- a/src/gallium/drivers/nvfx/nvfx_state_zsa.c +++ b/src/gallium/drivers/nvfx/nvfx_state_zsa.c @@ -1,41 +1,21 @@ #include "nvfx_context.h" -static boolean +void nvfx_state_zsa_validate(struct nvfx_context *nvfx) { - so_ref(nvfx->zsa->so, - &nvfx->state.hw[NVFX_STATE_ZSA]); - return TRUE; + struct nouveau_channel* chan = nvfx->screen->base.channel; + sb_emit(chan, nvfx->zsa->sb, nvfx->zsa->sb_len); } -struct nvfx_state_entry nvfx_state_zsa = { - .validate = nvfx_state_zsa_validate, - .dirty = { - .pipe = NVFX_NEW_ZSA, - .hw = NVFX_STATE_ZSA - } -}; - -static boolean +void nvfx_state_sr_validate(struct nvfx_context *nvfx) { - struct nouveau_stateobj *so = so_new(2, 2, 0); + struct nouveau_channel* chan = nvfx->screen->base.channel; struct pipe_stencil_ref *sr = &nvfx->stencil_ref; - so_method(so, nvfx->screen->eng3d, NV34TCL_STENCIL_FRONT_FUNC_REF, 1); - so_data (so, sr->ref_value[0]); - so_method(so, nvfx->screen->eng3d, NV34TCL_STENCIL_BACK_FUNC_REF, 1); - so_data (so, sr->ref_value[1]); - - so_ref(so, &nvfx->state.hw[NVFX_STATE_SR]); - so_ref(NULL, &so); - return TRUE; + WAIT_RING(chan, 4); + OUT_RING(chan, RING_3D(NV34TCL_STENCIL_FRONT_FUNC_REF, 1)); + OUT_RING(chan, sr->ref_value[0]); + OUT_RING(chan, RING_3D(NV34TCL_STENCIL_BACK_FUNC_REF, 1)); + OUT_RING(chan, sr->ref_value[1]); } - -struct nvfx_state_entry nvfx_state_sr = { - .validate = nvfx_state_sr_validate, - .dirty = { - .pipe = NVFX_NEW_SR, - .hw = NVFX_STATE_SR - } -}; diff --git a/src/gallium/drivers/nvfx/nvfx_transfer.c b/src/gallium/drivers/nvfx/nvfx_transfer.c index 1c250e9fe4..a776ab5831 100644 --- a/src/gallium/drivers/nvfx/nvfx_transfer.c +++ b/src/gallium/drivers/nvfx/nvfx_transfer.c @@ -8,6 +8,8 @@ #include "nvfx_context.h" #include "nvfx_screen.h" #include "nvfx_state.h" +#include "nvfx_resource.h" +#include "nvfx_transfer.h" struct nvfx_transfer { struct pipe_transfer base; @@ -16,10 +18,11 @@ struct nvfx_transfer { }; static void -nvfx_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height, - struct pipe_texture *template) +nvfx_compatible_transfer_tex(struct pipe_resource *pt, unsigned width, unsigned height, + unsigned bind, + struct pipe_resource *template) { - memset(template, 0, sizeof(struct pipe_texture)); + memset(template, 0, sizeof(struct pipe_resource)); template->target = pt->target; template->format = pt->format; template->width0 = width; @@ -27,56 +30,77 @@ nvfx_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned h template->depth0 = 1; template->last_level = 0; template->nr_samples = pt->nr_samples; + template->bind = bind; + template->_usage = PIPE_USAGE_DYNAMIC; + template->flags = NVFX_RESOURCE_FLAG_LINEAR; +} + + +static unsigned nvfx_transfer_bind_flags( unsigned transfer_usage ) +{ + unsigned bind = 0; + + if (transfer_usage & PIPE_TRANSFER_WRITE) + bind |= PIPE_BIND_BLIT_SOURCE; - template->tex_usage = PIPE_TEXTURE_USAGE_DYNAMIC | - NOUVEAU_TEXTURE_USAGE_LINEAR; + if (transfer_usage & PIPE_TRANSFER_READ) + bind |= PIPE_BIND_BLIT_DESTINATION; + + return bind; } -static struct pipe_transfer * -nvfx_transfer_new(struct pipe_context *pipe, struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice, - enum pipe_transfer_usage usage, - unsigned x, unsigned y, unsigned w, unsigned h) +struct pipe_transfer * +nvfx_miptree_transfer_new(struct pipe_context *pipe, + struct pipe_resource *pt, + struct pipe_subresource sr, + unsigned usage, + const struct pipe_box *box) { struct pipe_screen *pscreen = pipe->screen; struct nvfx_miptree *mt = (struct nvfx_miptree *)pt; struct nvfx_transfer *tx; - struct pipe_texture tx_tex_template, *tx_tex; + struct pipe_resource tx_tex_template, *tx_tex; static int no_transfer = -1; + unsigned bind = nvfx_transfer_bind_flags(usage); if(no_transfer < 0) - no_transfer = debug_get_bool_option("NOUVEAU_NO_TRANSFER", TRUE/*XXX:FALSE*/); + no_transfer = debug_get_bool_option("NOUVEAU_NO_TRANSFER", FALSE); + tx = CALLOC_STRUCT(nvfx_transfer); if (!tx) return NULL; - pipe_texture_reference(&tx->base.texture, pt); - tx->base.x = x; - tx->base.y = y; - tx->base.width = w; - tx->base.height = h; - tx->base.stride = mt->level[level].pitch; + /* Don't handle 3D transfers yet. + */ + assert(box->depth == 1); + + pipe_resource_reference(&tx->base.resource, pt); + tx->base.sr = sr; tx->base.usage = usage; - tx->base.face = face; - tx->base.level = level; - tx->base.zslice = zslice; + tx->base.box = *box; + tx->base.stride = mt->level[sr.level].pitch; /* Direct access to texture */ - if ((pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC || no_transfer) && - pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR) + if ((pt->_usage == PIPE_USAGE_DYNAMIC || + no_transfer) && + pt->flags & NVFX_RESOURCE_FLAG_LINEAR) { tx->direct = true; + + /* XXX: just call the internal nvfx function. + */ tx->surface = pscreen->get_tex_surface(pscreen, pt, - face, level, zslice, - pipe_transfer_buffer_flags(&tx->base)); + sr.face, sr.level, + box->z, + bind); return &tx->base; } tx->direct = false; - nvfx_compatible_transfer_tex(pt, w, h, &tx_tex_template); + nvfx_compatible_transfer_tex(pt, box->width, box->height, bind, &tx_tex_template); - tx_tex = pscreen->texture_create(pscreen, &tx_tex_template); + tx_tex = pscreen->resource_create(pscreen, &tx_tex_template); if (!tx_tex) { FREE(tx); @@ -87,9 +111,9 @@ nvfx_transfer_new(struct pipe_context *pipe, struct pipe_texture *pt, tx->surface = pscreen->get_tex_surface(pscreen, tx_tex, 0, 0, 0, - pipe_transfer_buffer_flags(&tx->base)); + bind); - pipe_texture_reference(&tx_tex, NULL); + pipe_resource_reference(&tx_tex, NULL); if (!tx->surface) { @@ -103,15 +127,16 @@ nvfx_transfer_new(struct pipe_context *pipe, struct pipe_texture *pt, struct pipe_surface *src; src = pscreen->get_tex_surface(pscreen, pt, - face, level, zslice, - PIPE_BUFFER_USAGE_GPU_READ); + sr.face, sr.level, box->z, + PIPE_BIND_BLIT_SOURCE); /* TODO: Check if SIFM can deal with x,y,w,h when swizzling */ /* TODO: Check if SIFM can un-swizzle */ nvscreen->eng2d->copy(nvscreen->eng2d, tx->surface, 0, 0, - src, x, y, - w, h); + src, + box->x, box->y, + box->width, box->height); pipe_surface_reference(&src, NULL); } @@ -119,9 +144,9 @@ nvfx_transfer_new(struct pipe_context *pipe, struct pipe_texture *pt, return &tx->base; } -static void -nvfx_transfer_del(struct pipe_context *pipe, - struct pipe_transfer *ptx) +void +nvfx_miptree_transfer_del(struct pipe_context *pipe, + struct pipe_transfer *ptx) { struct nvfx_transfer *tx = (struct nvfx_transfer *)ptx; @@ -130,55 +155,51 @@ nvfx_transfer_del(struct pipe_context *pipe, struct nvfx_screen *nvscreen = nvfx_screen(pscreen); struct pipe_surface *dst; - dst = pscreen->get_tex_surface(pscreen, ptx->texture, - ptx->face, ptx->level, ptx->zslice, - PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER); + dst = pscreen->get_tex_surface(pscreen, + ptx->resource, + ptx->sr.face, + ptx->sr.level, + ptx->box.z, + PIPE_BIND_BLIT_DESTINATION); /* TODO: Check if SIFM can deal with x,y,w,h when swizzling */ nvscreen->eng2d->copy(nvscreen->eng2d, - dst, tx->base.x, tx->base.y, + dst, ptx->box.x, ptx->box.y, tx->surface, 0, 0, - tx->base.width, tx->base.height); + ptx->box.width, ptx->box.height); pipe_surface_reference(&dst, NULL); } pipe_surface_reference(&tx->surface, NULL); - pipe_texture_reference(&ptx->texture, NULL); + pipe_resource_reference(&ptx->resource, NULL); FREE(ptx); } -static void * -nvfx_transfer_map(struct pipe_context *pipe, struct pipe_transfer *ptx) +void * +nvfx_miptree_transfer_map(struct pipe_context *pipe, struct pipe_transfer *ptx) { struct pipe_screen *pscreen = pipe->screen; struct nvfx_transfer *tx = (struct nvfx_transfer *)ptx; struct nv04_surface *ns = (struct nv04_surface *)tx->surface; struct nvfx_miptree *mt = (struct nvfx_miptree *)tx->surface->texture; - void *map = pipe_buffer_map(pscreen, mt->buffer, - pipe_transfer_buffer_flags(ptx)); + uint8_t *map = nouveau_screen_bo_map(pscreen, mt->base.bo, + nouveau_screen_transfer_flags(ptx->usage)); if(!tx->direct) return map + ns->base.offset; else - return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format); + return (map + ns->base.offset + + ptx->box.y * ns->pitch + + ptx->box.x * util_format_get_blocksize(ptx->resource->format)); } -static void -nvfx_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *ptx) +void +nvfx_miptree_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *ptx) { struct pipe_screen *pscreen = pipe->screen; struct nvfx_transfer *tx = (struct nvfx_transfer *)ptx; struct nvfx_miptree *mt = (struct nvfx_miptree *)tx->surface->texture; - pipe_buffer_unmap(pscreen, mt->buffer); -} - -void -nvfx_init_transfer_functions(struct nvfx_context *nvfx) -{ - nvfx->pipe.get_tex_transfer = nvfx_transfer_new; - nvfx->pipe.tex_transfer_destroy = nvfx_transfer_del; - nvfx->pipe.transfer_map = nvfx_transfer_map; - nvfx->pipe.transfer_unmap = nvfx_transfer_unmap; + nouveau_screen_bo_unmap(pscreen, mt->base.bo); } diff --git a/src/gallium/drivers/nvfx/nvfx_transfer.h b/src/gallium/drivers/nvfx/nvfx_transfer.h new file mode 100644 index 0000000000..3e3317b2c7 --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_transfer.h @@ -0,0 +1,26 @@ + +#ifndef NVFX_TRANSFER_H +#define NVFX_TRANSFER_H + +#include "util/u_transfer.h" +#include "pipe/p_state.h" + + +struct pipe_transfer * +nvfx_miptree_transfer_new(struct pipe_context *pcontext, + struct pipe_resource *pt, + struct pipe_subresource sr, + unsigned usage, + const struct pipe_box *box); +void +nvfx_miptree_transfer_del(struct pipe_context *pcontext, + struct pipe_transfer *ptx); +void * +nvfx_miptree_transfer_map(struct pipe_context *pcontext, + struct pipe_transfer *ptx); +void +nvfx_miptree_transfer_unmap(struct pipe_context *pcontext, + struct pipe_transfer *ptx); + + +#endif diff --git a/src/gallium/drivers/nvfx/nvfx_vbo.c b/src/gallium/drivers/nvfx/nvfx_vbo.c index c26536b0e7..bc87fe275c 100644 --- a/src/gallium/drivers/nvfx/nvfx_vbo.c +++ b/src/gallium/drivers/nvfx/nvfx_vbo.c @@ -5,20 +5,12 @@ #include "nvfx_context.h" #include "nvfx_state.h" +#include "nvfx_resource.h" #include "nouveau/nouveau_channel.h" #include "nouveau/nouveau_pushbuf.h" #include "nouveau/nouveau_util.h" -static boolean -nvfx_force_swtnl(struct nvfx_context *nvfx) -{ - static int force_swtnl = -1; - if(force_swtnl < 0) - force_swtnl = debug_get_bool_option("NOUVEAU_SWTNL", 0); - return force_swtnl; -} - static INLINE int nvfx_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp) { @@ -29,6 +21,12 @@ nvfx_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp) case PIPE_FORMAT_R32G32B32A32_FLOAT: *fmt = NV34TCL_VTXFMT_TYPE_FLOAT; break; + case PIPE_FORMAT_R16_FLOAT: + case PIPE_FORMAT_R16G16_FLOAT: + case PIPE_FORMAT_R16G16B16_FLOAT: + case PIPE_FORMAT_R16G16B16A16_FLOAT: + *fmt = NV34TCL_VTXFMT_TYPE_HALF; + break; case PIPE_FORMAT_R8_UNORM: case PIPE_FORMAT_R8G8_UNORM: case PIPE_FORMAT_R8G8B8_UNORM: @@ -49,21 +47,25 @@ nvfx_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp) switch (pipe) { case PIPE_FORMAT_R8_UNORM: case PIPE_FORMAT_R32_FLOAT: + case PIPE_FORMAT_R16_FLOAT: case PIPE_FORMAT_R16_SSCALED: *ncomp = 1; break; case PIPE_FORMAT_R8G8_UNORM: case PIPE_FORMAT_R32G32_FLOAT: + case PIPE_FORMAT_R16G16_FLOAT: case PIPE_FORMAT_R16G16_SSCALED: *ncomp = 2; break; case PIPE_FORMAT_R8G8B8_UNORM: case PIPE_FORMAT_R32G32B32_FLOAT: + case PIPE_FORMAT_R16G16B16_FLOAT: case PIPE_FORMAT_R16G16B16_SSCALED: *ncomp = 3; break; case PIPE_FORMAT_R8G8B8A8_UNORM: case PIPE_FORMAT_R32G32B32A32_FLOAT: + case PIPE_FORMAT_R16G16B16A16_FLOAT: case PIPE_FORMAT_R16G16B16A16_SSCALED: *ncomp = 4; break; @@ -76,7 +78,7 @@ nvfx_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp) } static boolean -nvfx_vbo_set_idxbuf(struct nvfx_context *nvfx, struct pipe_buffer *ib, +nvfx_vbo_set_idxbuf(struct nvfx_context *nvfx, struct pipe_resource *ib, unsigned ib_size) { struct pipe_screen *pscreen = &nvfx->screen->base.base; @@ -88,7 +90,7 @@ nvfx_vbo_set_idxbuf(struct nvfx_context *nvfx, struct pipe_buffer *ib, return FALSE; } - if (!pscreen->get_param(pscreen, NOUVEAU_CAP_HW_IDXBUF) || ib_size == 1) + if (!nvfx->screen->index_buffer_reloc_flags || ib_size == 1) return FALSE; switch (ib_size) { @@ -112,63 +114,47 @@ nvfx_vbo_set_idxbuf(struct nvfx_context *nvfx, struct pipe_buffer *ib, return TRUE; } -static boolean -nvfx_vbo_static_attrib(struct nvfx_context *nvfx, struct nouveau_stateobj *so, +// type must be floating point +static inline void +nvfx_vbo_static_attrib(struct nvfx_context *nvfx, int attrib, struct pipe_vertex_element *ve, - struct pipe_vertex_buffer *vb) + struct pipe_vertex_buffer *vb, unsigned ncomp) { - struct pipe_screen *pscreen = nvfx->pipe.screen; - struct nouveau_grobj *eng3d = nvfx->screen->eng3d; - unsigned type, ncomp; + struct pipe_transfer *transfer; + struct nouveau_channel* chan = nvfx->screen->base.channel; void *map; - if (nvfx_vbo_format_to_hw(ve->src_format, &type, &ncomp)) - return FALSE; - - map = pipe_buffer_map(pscreen, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ); + map = pipe_buffer_map(&nvfx->pipe, vb->buffer, PIPE_TRANSFER_READ, &transfer); map += vb->buffer_offset + ve->src_offset; - switch (type) { - case NV34TCL_VTXFMT_TYPE_FLOAT: - { - float *v = map; - - switch (ncomp) { - case 4: - so_method(so, eng3d, NV34TCL_VTX_ATTR_4F_X(attrib), 4); - so_data (so, fui(v[0])); - so_data (so, fui(v[1])); - so_data (so, fui(v[2])); - so_data (so, fui(v[3])); - break; - case 3: - so_method(so, eng3d, NV34TCL_VTX_ATTR_3F_X(attrib), 3); - so_data (so, fui(v[0])); - so_data (so, fui(v[1])); - so_data (so, fui(v[2])); - break; - case 2: - so_method(so, eng3d, NV34TCL_VTX_ATTR_2F_X(attrib), 2); - so_data (so, fui(v[0])); - so_data (so, fui(v[1])); - break; - case 1: - so_method(so, eng3d, NV34TCL_VTX_ATTR_1F(attrib), 1); - so_data (so, fui(v[0])); - break; - default: - pipe_buffer_unmap(pscreen, vb->buffer); - return FALSE; - } - } + float *v = map; + + switch (ncomp) { + case 4: + OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_4F_X(attrib), 4)); + OUT_RING(chan, fui(v[0])); + OUT_RING(chan, fui(v[1])); + OUT_RING(chan, fui(v[2])); + OUT_RING(chan, fui(v[3])); + break; + case 3: + OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_3F_X(attrib), 3)); + OUT_RING(chan, fui(v[0])); + OUT_RING(chan, fui(v[1])); + OUT_RING(chan, fui(v[2])); + break; + case 2: + OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_2F_X(attrib), 2)); + OUT_RING(chan, fui(v[0])); + OUT_RING(chan, fui(v[1])); + break; + case 1: + OUT_RING(chan, RING_3D(NV34TCL_VTX_ATTR_1F(attrib), 1)); + OUT_RING(chan, fui(v[0])); break; - default: - pipe_buffer_unmap(pscreen, vb->buffer); - return FALSE; } - pipe_buffer_unmap(pscreen, vb->buffer); - return TRUE; + pipe_buffer_unmap(&nvfx->pipe, vb->buffer, transfer); } void @@ -178,11 +164,10 @@ nvfx_draw_arrays(struct pipe_context *pipe, struct nvfx_context *nvfx = nvfx_context(pipe); struct nvfx_screen *screen = nvfx->screen; struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *eng3d = screen->eng3d; unsigned restart = 0; nvfx_vbo_set_idxbuf(nvfx, NULL, 0); - if (nvfx_force_swtnl(nvfx) || !nvfx_state_validate(nvfx)) { + if (nvfx->screen->force_swtnl || !nvfx_state_validate(nvfx)) { nvfx_draw_elements_swtnl(pipe, NULL, 0, mode, start, count); return; @@ -193,19 +178,22 @@ nvfx_draw_arrays(struct pipe_context *pipe, nvfx_state_emit(nvfx); - vc = nouveau_vbuf_split(AVAIL_RING(chan), 6, 256, + unsigned avail = AVAIL_RING(chan); + avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */ + + vc = nouveau_vbuf_split(avail, 6, 256, mode, start, count, &restart); if (!vc) { FIRE_RING(chan); continue; } - BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); OUT_RING (chan, nvgl_primitive(mode)); nr = (vc & 0xff); if (nr) { - BEGIN_RING(chan, eng3d, NV34TCL_VB_VERTEX_BATCH, 1); + OUT_RING(chan, RING_3D(NV34TCL_VB_VERTEX_BATCH, 1)); OUT_RING (chan, ((nr - 1) << 24) | start); start += nr; } @@ -216,14 +204,14 @@ nvfx_draw_arrays(struct pipe_context *pipe, nr -= push; - BEGIN_RING_NI(chan, eng3d, NV34TCL_VB_VERTEX_BATCH, push); + OUT_RING(chan, RING_3D_NI(NV34TCL_VB_VERTEX_BATCH, push)); while (push--) { OUT_RING(chan, ((0x100 - 1) << 24) | start); start += 0x100; } } - BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); OUT_RING (chan, 0); count -= vc; @@ -239,7 +227,6 @@ nvfx_draw_elements_u08(struct nvfx_context *nvfx, void *ib, { struct nvfx_screen *screen = nvfx->screen; struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *eng3d = screen->eng3d; while (count) { uint8_t *elts = (uint8_t *)ib + start; @@ -247,7 +234,10 @@ nvfx_draw_elements_u08(struct nvfx_context *nvfx, void *ib, nvfx_state_emit(nvfx); - vc = nouveau_vbuf_split(AVAIL_RING(chan), 6, 2, + unsigned avail = AVAIL_RING(chan); + avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */ + + vc = nouveau_vbuf_split(avail, 6, 2, mode, start, count, &restart); if (vc == 0) { FIRE_RING(chan); @@ -255,11 +245,11 @@ nvfx_draw_elements_u08(struct nvfx_context *nvfx, void *ib, } count -= vc; - BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); OUT_RING (chan, nvgl_primitive(mode)); if (vc & 1) { - BEGIN_RING(chan, eng3d, NV34TCL_VB_ELEMENT_U32, 1); + OUT_RING(chan, RING_3D(NV34TCL_VB_ELEMENT_U32, 1)); OUT_RING (chan, elts[0]); elts++; vc--; } @@ -269,7 +259,7 @@ nvfx_draw_elements_u08(struct nvfx_context *nvfx, void *ib, push = MIN2(vc, 2047 * 2); - BEGIN_RING_NI(chan, eng3d, NV34TCL_VB_ELEMENT_U16, push >> 1); + OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U16, push >> 1)); for (i = 0; i < push; i+=2) OUT_RING(chan, (elts[i+1] << 16) | elts[i]); @@ -277,7 +267,7 @@ nvfx_draw_elements_u08(struct nvfx_context *nvfx, void *ib, elts += push; } - BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); OUT_RING (chan, 0); start = restart; @@ -290,7 +280,6 @@ nvfx_draw_elements_u16(struct nvfx_context *nvfx, void *ib, { struct nvfx_screen *screen = nvfx->screen; struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *eng3d = screen->eng3d; while (count) { uint16_t *elts = (uint16_t *)ib + start; @@ -298,7 +287,10 @@ nvfx_draw_elements_u16(struct nvfx_context *nvfx, void *ib, nvfx_state_emit(nvfx); - vc = nouveau_vbuf_split(AVAIL_RING(chan), 6, 2, + unsigned avail = AVAIL_RING(chan); + avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */ + + vc = nouveau_vbuf_split(avail, 6, 2, mode, start, count, &restart); if (vc == 0) { FIRE_RING(chan); @@ -306,11 +298,11 @@ nvfx_draw_elements_u16(struct nvfx_context *nvfx, void *ib, } count -= vc; - BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); OUT_RING (chan, nvgl_primitive(mode)); if (vc & 1) { - BEGIN_RING(chan, eng3d, NV34TCL_VB_ELEMENT_U32, 1); + OUT_RING(chan, RING_3D(NV34TCL_VB_ELEMENT_U32, 1)); OUT_RING (chan, elts[0]); elts++; vc--; } @@ -320,7 +312,7 @@ nvfx_draw_elements_u16(struct nvfx_context *nvfx, void *ib, push = MIN2(vc, 2047 * 2); - BEGIN_RING_NI(chan, eng3d, NV34TCL_VB_ELEMENT_U16, push >> 1); + OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U16, push >> 1)); for (i = 0; i < push; i+=2) OUT_RING(chan, (elts[i+1] << 16) | elts[i]); @@ -328,7 +320,7 @@ nvfx_draw_elements_u16(struct nvfx_context *nvfx, void *ib, elts += push; } - BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); OUT_RING (chan, 0); start = restart; @@ -341,7 +333,6 @@ nvfx_draw_elements_u32(struct nvfx_context *nvfx, void *ib, { struct nvfx_screen *screen = nvfx->screen; struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *eng3d = screen->eng3d; while (count) { uint32_t *elts = (uint32_t *)ib + start; @@ -349,7 +340,10 @@ nvfx_draw_elements_u32(struct nvfx_context *nvfx, void *ib, nvfx_state_emit(nvfx); - vc = nouveau_vbuf_split(AVAIL_RING(chan), 5, 1, + unsigned avail = AVAIL_RING(chan); + avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */ + + vc = nouveau_vbuf_split(avail, 5, 1, mode, start, count, &restart); if (vc == 0) { FIRE_RING(chan); @@ -357,20 +351,20 @@ nvfx_draw_elements_u32(struct nvfx_context *nvfx, void *ib, } count -= vc; - BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); OUT_RING (chan, nvgl_primitive(mode)); while (vc) { push = MIN2(vc, 2047); - BEGIN_RING_NI(chan, eng3d, NV34TCL_VB_ELEMENT_U32, push); + OUT_RING(chan, RING_3D_NI(NV34TCL_VB_ELEMENT_U32, push)); OUT_RINGp (chan, elts, push); vc -= push; elts += push; } - BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); OUT_RING (chan, 0); start = restart; @@ -379,14 +373,14 @@ nvfx_draw_elements_u32(struct nvfx_context *nvfx, void *ib, static void nvfx_draw_elements_inline(struct pipe_context *pipe, - struct pipe_buffer *ib, unsigned ib_size, + struct pipe_resource *ib, unsigned ib_size, unsigned mode, unsigned start, unsigned count) { struct nvfx_context *nvfx = nvfx_context(pipe); - struct pipe_screen *pscreen = pipe->screen; + struct pipe_transfer *transfer; void *map; - map = pipe_buffer_map(pscreen, ib, PIPE_BUFFER_USAGE_CPU_READ); + map = pipe_buffer_map(pipe, ib, PIPE_TRANSFER_READ, &transfer); if (!ib) { NOUVEAU_ERR("failed mapping ib\n"); return; @@ -407,7 +401,7 @@ nvfx_draw_elements_inline(struct pipe_context *pipe, break; } - pipe_buffer_unmap(pscreen, ib); + pipe_buffer_unmap(pipe, ib, transfer); } static void @@ -417,7 +411,6 @@ nvfx_draw_elements_vbo(struct pipe_context *pipe, struct nvfx_context *nvfx = nvfx_context(pipe); struct nvfx_screen *screen = nvfx->screen; struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *eng3d = screen->eng3d; unsigned restart = 0; while (count) { @@ -425,19 +418,22 @@ nvfx_draw_elements_vbo(struct pipe_context *pipe, nvfx_state_emit(nvfx); - vc = nouveau_vbuf_split(AVAIL_RING(chan), 6, 256, + unsigned avail = AVAIL_RING(chan); + avail -= 16 + (avail >> 10); /* for the BEGIN_RING_NIs, conservatively assuming one every 1024, plus 16 for safety */ + + vc = nouveau_vbuf_split(avail, 6, 256, mode, start, count, &restart); if (!vc) { FIRE_RING(chan); continue; } - BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); OUT_RING (chan, nvgl_primitive(mode)); nr = (vc & 0xff); if (nr) { - BEGIN_RING(chan, eng3d, NV34TCL_VB_INDEX_BATCH, 1); + OUT_RING(chan, RING_3D(NV34TCL_VB_INDEX_BATCH, 1)); OUT_RING (chan, ((nr - 1) << 24) | start); start += nr; } @@ -448,14 +444,14 @@ nvfx_draw_elements_vbo(struct pipe_context *pipe, nr -= push; - BEGIN_RING_NI(chan, eng3d, NV34TCL_VB_INDEX_BATCH, push); + OUT_RING(chan, RING_3D_NI(NV34TCL_VB_INDEX_BATCH, push)); while (push--) { OUT_RING(chan, ((0x100 - 1) << 24) | start); start += 0x100; } } - BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); OUT_RING (chan, 0); count -= vc; @@ -465,14 +461,14 @@ nvfx_draw_elements_vbo(struct pipe_context *pipe, void nvfx_draw_elements(struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, unsigned indexSize, + struct pipe_resource *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, unsigned count) { struct nvfx_context *nvfx = nvfx_context(pipe); boolean idxbuf; idxbuf = nvfx_vbo_set_idxbuf(nvfx, indexBuffer, indexSize); - if (nvfx_force_swtnl(nvfx) || !nvfx_state_validate(nvfx)) { + if (nvfx->screen->force_swtnl || !nvfx_state_validate(nvfx)) { nvfx_draw_elements_swtnl(pipe, indexBuffer, indexSize, mode, start, count); return; @@ -488,83 +484,138 @@ nvfx_draw_elements(struct pipe_context *pipe, pipe->flush(pipe, 0, NULL); } -static boolean +boolean nvfx_vbo_validate(struct nvfx_context *nvfx) { - struct nouveau_stateobj *vtxbuf, *vtxfmt, *sattr = NULL; - struct nouveau_grobj *eng3d = nvfx->screen->eng3d; - struct pipe_buffer *ib = nvfx->idxbuf; + struct nouveau_channel* chan = nvfx->screen->base.channel; + struct pipe_resource *ib = nvfx->idxbuf; unsigned ib_format = nvfx->idxbuf_format; - unsigned vb_flags = nvfx->screen->vertex_buffer_flags | NOUVEAU_BO_RD; - int hw; + int i; + int elements = MAX2(nvfx->vtxelt->num_elements, nvfx->hw_vtxelt_nr); + uint32_t vtxfmt[16]; + unsigned vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD; + + if (!elements) + return TRUE; - vtxbuf = so_new(3, 17, 18); - so_method(vtxbuf, eng3d, NV34TCL_VTXBUF_ADDRESS(0), nvfx->vtxelt->num_elements); - vtxfmt = so_new(1, 16, 0); - so_method(vtxfmt, eng3d, NV34TCL_VTXFMT(0), nvfx->vtxelt->num_elements); + nvfx->vbo_bo = 0; - for (hw = 0; hw < nvfx->vtxelt->num_elements; hw++) { + MARK_RING(chan, (5 + 2) * 16 + 2 + 11, 16 + 2); + for (i = 0; i < nvfx->vtxelt->num_elements; i++) { struct pipe_vertex_element *ve; struct pipe_vertex_buffer *vb; unsigned type, ncomp; - ve = &nvfx->vtxelt->pipe[hw]; + ve = &nvfx->vtxelt->pipe[i]; vb = &nvfx->vtxbuf[ve->vertex_buffer_index]; - if (!vb->stride) { - if (!sattr) - sattr = so_new(16, 16 * 4, 0); - - if (nvfx_vbo_static_attrib(nvfx, sattr, hw, ve, vb)) { - so_data(vtxbuf, 0); - so_data(vtxfmt, NV34TCL_VTXFMT_TYPE_FLOAT); - continue; - } - } - if (nvfx_vbo_format_to_hw(ve->src_format, &type, &ncomp)) { + MARK_UNDO(chan); nvfx->fallback_swtnl |= NVFX_NEW_ARRAYS; - so_ref(NULL, &vtxbuf); - so_ref(NULL, &vtxfmt); return FALSE; } - so_reloc(vtxbuf, nouveau_bo(vb->buffer), + if (!vb->stride && type == NV34TCL_VTXFMT_TYPE_FLOAT) { + nvfx_vbo_static_attrib(nvfx, i, ve, vb, ncomp); + vtxfmt[i] = type; + } else { + vtxfmt[i] = ((vb->stride << NV34TCL_VTXFMT_STRIDE_SHIFT) | + (ncomp << NV34TCL_VTXFMT_SIZE_SHIFT) | type); + nvfx->vbo_bo |= (1 << i); + } + } + + for(; i < elements; ++i) + vtxfmt[i] = NV34TCL_VTXFMT_TYPE_FLOAT; + + OUT_RING(chan, RING_3D(NV34TCL_VTXFMT(0), elements)); + OUT_RINGp(chan, vtxfmt, elements); + + if(nvfx->is_nv4x) { + unsigned i; + /* seems to be some kind of cache flushing */ + for(i = 0; i < 3; ++i) { + OUT_RING(chan, RING_3D(0x1718, 1)); + OUT_RING(chan, 0); + } + } + + OUT_RING(chan, RING_3D(NV34TCL_VTXBUF_ADDRESS(0), elements)); + for (i = 0; i < nvfx->vtxelt->num_elements; i++) { + struct pipe_vertex_element *ve; + struct pipe_vertex_buffer *vb; + + ve = &nvfx->vtxelt->pipe[i]; + vb = &nvfx->vtxbuf[ve->vertex_buffer_index]; + + if (!(nvfx->vbo_bo & (1 << i))) + OUT_RING(chan, 0); + else + { + struct nouveau_bo* bo = nvfx_resource(vb->buffer)->bo; + OUT_RELOC(chan, bo, vb->buffer_offset + ve->src_offset, vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, 0, NV34TCL_VTXBUF_ADDRESS_DMA1); - so_data (vtxfmt, ((vb->stride << NV34TCL_VTXFMT_STRIDE_SHIFT) | - (ncomp << NV34TCL_VTXFMT_SIZE_SHIFT) | type)); + } } + for (; i < elements; i++) + OUT_RING(chan, 0); + + OUT_RING(chan, RING_3D(0x1710, 1)); + OUT_RING(chan, 0); + if (ib) { - struct nouveau_bo *bo = nouveau_bo(ib); + unsigned ib_flags = nvfx->screen->index_buffer_reloc_flags | NOUVEAU_BO_RD; + struct nouveau_bo* bo = nvfx_resource(ib)->bo; + + assert(nvfx->screen->index_buffer_reloc_flags); - so_method(vtxbuf, eng3d, NV34TCL_IDXBUF_ADDRESS, 2); - so_reloc (vtxbuf, bo, 0, vb_flags | NOUVEAU_BO_LOW, 0, 0); - so_reloc (vtxbuf, bo, ib_format, vb_flags | NOUVEAU_BO_OR, + OUT_RING(chan, RING_3D(NV34TCL_IDXBUF_ADDRESS, 2)); + OUT_RELOC(chan, bo, 0, ib_flags | NOUVEAU_BO_LOW, 0, 0); + OUT_RELOC(chan, bo, ib_format, ib_flags | NOUVEAU_BO_OR, 0, NV34TCL_IDXBUF_FORMAT_DMA1); } - so_method(vtxbuf, eng3d, 0x1710, 1); - so_data (vtxbuf, 0); - - so_ref(vtxbuf, &nvfx->state.hw[NVFX_STATE_VTXBUF]); - so_ref(NULL, &vtxbuf); - nvfx->state.dirty |= (1ULL << NVFX_STATE_VTXBUF); - so_ref(vtxfmt, &nvfx->state.hw[NVFX_STATE_VTXFMT]); - so_ref(NULL, &vtxfmt); - nvfx->state.dirty |= (1ULL << NVFX_STATE_VTXFMT); - so_ref(sattr, &nvfx->state.hw[NVFX_STATE_VTXATTR]); - so_ref(NULL, &sattr); - nvfx->state.dirty |= (1ULL << NVFX_STATE_VTXATTR); - return FALSE; + nvfx->hw_vtxelt_nr = nvfx->vtxelt->num_elements; + return TRUE; } -struct nvfx_state_entry nvfx_state_vbo = { - .validate = nvfx_vbo_validate, - .dirty = { - .pipe = NVFX_NEW_ARRAYS, - .hw = 0, +void +nvfx_vbo_relocate(struct nvfx_context *nvfx) +{ + struct nouveau_channel* chan = nvfx->screen->base.channel; + unsigned vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD | NOUVEAU_BO_DUMMY; + int i; + + MARK_RING(chan, 2 * 16 + 3, 2 * 16 + 3); + for(i = 0; i < nvfx->vtxelt->num_elements; ++i) { + if(nvfx->vbo_bo & (1 << i)) { + struct pipe_vertex_element *ve = &nvfx->vtxelt->pipe[i]; + struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->vertex_buffer_index]; + struct nouveau_bo* bo = nvfx_resource(vb->buffer)->bo; + OUT_RELOC(chan, bo, RING_3D(NV34TCL_VTXBUF_ADDRESS(i), 1), + vb_flags, 0, 0); + OUT_RELOC(chan, bo, vb->buffer_offset + ve->src_offset, + vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, + 0, NV34TCL_VTXBUF_ADDRESS_DMA1); + } } -}; + + if(nvfx->idxbuf) + { + unsigned ib_flags = nvfx->screen->index_buffer_reloc_flags | NOUVEAU_BO_RD | NOUVEAU_BO_DUMMY; + struct nouveau_bo* bo = nvfx_resource(nvfx->idxbuf)->bo; + + assert(nvfx->screen->index_buffer_reloc_flags); + + OUT_RELOC(chan, bo, RING_3D(NV34TCL_IDXBUF_ADDRESS, 2), + ib_flags, 0, 0); + OUT_RELOC(chan, bo, 0, + ib_flags | NOUVEAU_BO_LOW, 0, 0); + OUT_RELOC(chan, bo, nvfx->idxbuf_format, + ib_flags | NOUVEAU_BO_OR, + 0, NV34TCL_IDXBUF_FORMAT_DMA1); + } +} diff --git a/src/gallium/drivers/nvfx/nvfx_vertprog.c b/src/gallium/drivers/nvfx/nvfx_vertprog.c index 2d243be16a..b405fd9c82 100644 --- a/src/gallium/drivers/nvfx/nvfx_vertprog.c +++ b/src/gallium/drivers/nvfx/nvfx_vertprog.c @@ -479,6 +479,9 @@ nvfx_vertprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, case TGSI_OPCODE_ARL: arith(vpc, VEC, ARL, dst, mask, src[0], none, none); break; + case TGSI_OPCODE_COS: + arith(vpc, SCA, COS, dst, mask, none, none, src[0]); + break; case TGSI_OPCODE_DP3: arith(vpc, VEC, DP3, dst, mask, src[0], src[1], none); break; @@ -512,6 +515,11 @@ nvfx_vertprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, case TGSI_OPCODE_LOG: arith(vpc, SCA, LOG, dst, mask, none, none, src[0]); break; + case TGSI_OPCODE_LRP: + tmp = temp(vpc); + arith(vpc, VEC, MAD, tmp, mask, neg(src[0]), src[2], src[2]); + arith(vpc, VEC, MAD, dst, mask, src[0], src[1], tmp); + break; case TGSI_OPCODE_MAD: arith(vpc, VEC, MAD, dst, mask, src[0], src[1], src[2]); break; @@ -544,15 +552,36 @@ nvfx_vertprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, case TGSI_OPCODE_RSQ: arith(vpc, SCA, RSQ, dst, mask, none, none, abs(src[0])); break; + case TGSI_OPCODE_SEQ: + arith(vpc, VEC, SEQ, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SFL: + arith(vpc, VEC, SFL, dst, mask, src[0], src[1], none); + break; case TGSI_OPCODE_SGE: arith(vpc, VEC, SGE, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_SGT: arith(vpc, VEC, SGT, dst, mask, src[0], src[1], none); break; + case TGSI_OPCODE_SIN: + arith(vpc, SCA, SIN, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_SLE: + arith(vpc, VEC, SLE, dst, mask, src[0], src[1], none); + break; case TGSI_OPCODE_SLT: arith(vpc, VEC, SLT, dst, mask, src[0], src[1], none); break; + case TGSI_OPCODE_SNE: + arith(vpc, VEC, SNE, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SSG: + arith(vpc, VEC, SSG, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_STR: + arith(vpc, VEC, STR, dst, mask, src[0], src[1], none); + break; case TGSI_OPCODE_SUB: arith(vpc, VEC, ADD, dst, mask, src[0], none, neg(src[1])); break; @@ -830,15 +859,16 @@ out_err: FREE(vpc); } -static boolean +boolean nvfx_vertprog_validate(struct nvfx_context *nvfx) { - struct pipe_screen *pscreen = nvfx->pipe.screen; + struct pipe_context *pipe = &nvfx->pipe; struct nvfx_screen *screen = nvfx->screen; struct nouveau_channel *chan = screen->base.channel; struct nouveau_grobj *eng3d = screen->eng3d; struct nvfx_vertex_program *vp; - struct pipe_buffer *constbuf; + struct pipe_resource *constbuf; + struct pipe_transfer *transfer = NULL; boolean upload_code = FALSE, upload_data = FALSE; int i; @@ -846,6 +876,7 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx) vp = nvfx->vertprog; constbuf = nvfx->constbuf[PIPE_SHADER_VERTEX]; + // TODO: ouch! can't we just use constant slots for these?! if ((nvfx->dirty & NVFX_NEW_UCP) || memcmp(&nvfx->clip, &vp->ucp, sizeof(vp->ucp))) { nvfx_vertprog_destroy(nvfx, vp); @@ -857,21 +888,19 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx) } /* Translate TGSI shader into hw bytecode */ - if (vp->translated) - goto check_gpu_resources; - - nvfx->fallback_swtnl &= ~NVFX_NEW_VERTPROG; + if (!vp->translated) + { + nvfx->fallback_swtnl &= ~NVFX_NEW_VERTPROG; nvfx_vertprog_translate(nvfx, vp); - if (!vp->translated) { - nvfx->fallback_swtnl |= NVFX_NEW_VERTPROG; + if (!vp->translated) { + nvfx->fallback_swtnl |= NVFX_NEW_VERTPROG; return FALSE; + } } -check_gpu_resources: /* Allocate hw vtxprog exec slots */ if (!vp->exec) { struct nouveau_resource *heap = nvfx->screen->vp_exec_heap; - struct nouveau_stateobj *so; uint vplen = vp->nr_insns; if (nouveau_resource_alloc(heap, vplen, vp, &vp->exec)) { @@ -886,19 +915,6 @@ check_gpu_resources: assert(0); } - so = so_new(3, 4, 0); - so_method(so, eng3d, NV34TCL_VP_START_FROM_ID, 1); - so_data (so, vp->exec->start); - if(nvfx->is_nv4x) { - so_method(so, eng3d, NV40TCL_VP_ATTRIB_EN, 2); - so_data (so, vp->ir); - so_data (so, vp->or); - } - so_method(so, eng3d, NV34TCL_VP_CLIP_PLANES_ENABLE, 1); - so_data (so, vp->clip_ctrl); - so_ref(so, &vp->so); - so_ref(NULL, &so); - upload_code = TRUE; } @@ -962,8 +978,9 @@ check_gpu_resources: float *map = NULL; if (constbuf) { - map = pipe_buffer_map(pscreen, constbuf, - PIPE_BUFFER_USAGE_CPU_READ); + map = pipe_buffer_map(pipe, constbuf, + PIPE_TRANSFER_READ, + &transfer); } for (i = 0; i < vp->nr_consts; i++) { @@ -984,7 +1001,7 @@ check_gpu_resources: } if (constbuf) - pipe_buffer_unmap(pscreen, constbuf); + pipe_buffer_unmap(pipe, constbuf, transfer); } /* Upload vtxprog */ @@ -1005,12 +1022,21 @@ check_gpu_resources: } } - if (vp->so != nvfx->state.hw[NVFX_STATE_VERTPROG]) { - so_ref(vp->so, &nvfx->state.hw[NVFX_STATE_VERTPROG]); - return TRUE; + if(nvfx->dirty & (NVFX_NEW_VERTPROG | NVFX_NEW_UCP)) + { + WAIT_RING(chan, 7); + OUT_RING(chan, RING_3D(NV34TCL_VP_START_FROM_ID, 1)); + OUT_RING(chan, vp->exec->start); + if(nvfx->is_nv4x) { + OUT_RING(chan, RING_3D(NV40TCL_VP_ATTRIB_EN, 2)); + OUT_RING(chan, vp->ir); + OUT_RING(chan, vp->or); + } + OUT_RING(chan, RING_3D(NV34TCL_VP_CLIP_PLANES_ENABLE, 1)); + OUT_RING(chan, vp->clip_ctrl); } - return FALSE; + return TRUE; } void @@ -1037,13 +1063,4 @@ nvfx_vertprog_destroy(struct nvfx_context *nvfx, struct nvfx_vertex_program *vp) vp->data_start_min = 0; vp->ir = vp->or = vp->clip_ctrl = 0; - so_ref(NULL, &vp->so); } - -struct nvfx_state_entry nvfx_state_vertprog = { - .validate = nvfx_vertprog_validate, - .dirty = { - .pipe = NVFX_NEW_VERTPROG | NVFX_NEW_UCP, - .hw = NVFX_STATE_VERTPROG, - } -}; |