diff options
26 files changed, 277 insertions, 298 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_clip.c b/src/mesa/drivers/dri/i965/brw_clip.c index c7d428ba48..d82206bae5 100644 --- a/src/mesa/drivers/dri/i965/brw_clip.c +++ b/src/mesa/drivers/dri/i965/brw_clip.c @@ -146,15 +146,12 @@ static void compile_clip_prog( struct brw_context *brw, printf("\n"); } - /* Upload - */ - drm_intel_bo_unreference(brw->clip.prog_bo); - brw->clip.prog_bo = brw_upload_cache(&brw->cache, - BRW_CLIP_PROG, - &c.key, sizeof(c.key), - program, program_size, - &c.prog_data, sizeof(c.prog_data), - &brw->clip.prog_data); + brw_upload_cache(&brw->cache, + BRW_CLIP_PROG, + &c.key, sizeof(c.key), + program, program_size, + &c.prog_data, sizeof(c.prog_data), + &brw->clip.prog_offset, &brw->clip.prog_data); ralloc_free(mem_ctx); } @@ -271,12 +268,11 @@ static void upload_clip_prog(struct brw_context *brw) } } - drm_intel_bo_unreference(brw->clip.prog_bo); - brw->clip.prog_bo = brw_search_cache(&brw->cache, BRW_CLIP_PROG, - &key, sizeof(key), - &brw->clip.prog_data); - if (brw->clip.prog_bo == NULL) + if (!brw_search_cache(&brw->cache, BRW_CLIP_PROG, + &key, sizeof(key), + &brw->clip.prog_offset, &brw->clip.prog_data)) { compile_clip_prog( brw, &key ); + } } diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c b/src/mesa/drivers/dri/i965/brw_clip_state.c index 6015c8cbe9..b9efbb74c8 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_state.c +++ b/src/mesa/drivers/dri/i965/brw_clip_state.c @@ -43,11 +43,15 @@ brw_prepare_clip_unit(struct brw_context *brw) clip = brw_state_batch(brw, sizeof(*clip), 32, &brw->clip.state_offset); memset(clip, 0, sizeof(*clip)); - /* CACHE_NEW_CLIP_PROG */ + /* BRW_NEW_PROGRAM_CACHE | CACHE_NEW_CLIP_PROG */ clip->thread0.grf_reg_count = (ALIGN(brw->clip.prog_data->total_grf, 16) / 16 - 1); - /* reloc */ - clip->thread0.kernel_start_pointer = brw->clip.prog_bo->offset >> 6; + clip->thread0.kernel_start_pointer = + brw_program_reloc(brw, + brw->clip.state_offset + + offsetof(struct brw_clip_unit_state, thread0), + brw->clip.prog_offset + + (clip->thread0.grf_reg_count << 1)) >> 6; clip->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; clip->thread1.single_program_flow = 1; @@ -110,14 +114,6 @@ brw_prepare_clip_unit(struct brw_context *brw) clip->viewport_ymin = -1; clip->viewport_ymax = 1; - /* Emit clip program relocation */ - assert(brw->clip.prog_bo); - drm_intel_bo_emit_reloc(intel->batch.bo, - (brw->clip.state_offset + - offsetof(struct brw_clip_unit_state, thread0)), - brw->clip.prog_bo, clip->thread0.grf_reg_count << 1, - I915_GEM_DOMAIN_INSTRUCTION, 0); - brw->state.dirty.cache |= CACHE_NEW_CLIP_UNIT; } @@ -125,6 +121,7 @@ const struct brw_tracked_state brw_clip_unit = { .dirty = { .mesa = _NEW_TRANSFORM, .brw = (BRW_NEW_BATCH | + BRW_NEW_PROGRAM_CACHE | BRW_NEW_CURBE_OFFSETS | BRW_NEW_URB_FENCE), .cache = CACHE_NEW_CLIP_PROG diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 621b6f8990..16b71f6b1c 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -142,7 +142,8 @@ enum brw_state_id { BRW_STATE_NR_VS_SURFACES, BRW_STATE_INDEX_BUFFER, BRW_STATE_VS_CONSTBUF, - BRW_STATE_WM_CONSTBUF + BRW_STATE_WM_CONSTBUF, + BRW_STATE_PROGRAM_CACHE, }; #define BRW_NEW_URB_FENCE (1 << BRW_STATE_URB_FENCE) @@ -172,6 +173,7 @@ enum brw_state_id { #define BRW_NEW_INDEX_BUFFER (1 << BRW_STATE_INDEX_BUFFER) #define BRW_NEW_VS_CONSTBUF (1 << BRW_STATE_VS_CONSTBUF) #define BRW_NEW_WM_CONSTBUF (1 << BRW_STATE_WM_CONSTBUF) +#define BRW_NEW_PROGRAM_CACHE (1 << BRW_STATE_PROGRAM_CACHE) struct brw_state_flags { /** State update flags signalled by mesa internals */ @@ -365,7 +367,8 @@ struct brw_cache_item { GLuint key_size; /* for variable-sized keys */ const void *key; - drm_intel_bo *bo; + uint32_t offset; + uint32_t size; struct brw_cache_item *next; }; @@ -376,14 +379,11 @@ struct brw_cache { struct brw_context *brw; struct brw_cache_item **items; + drm_intel_bo *bo; GLuint size, n_items; - char *name[BRW_MAX_CACHE]; - - /* Record of the last BOs chosen for each cache_id. Used to set - * brw->state.dirty.cache when a new cache item is chosen. - */ - drm_intel_bo *last_bo[BRW_MAX_CACHE]; + uint32_t next_offset; + bool bo_used_by_gpu; }; @@ -634,8 +634,9 @@ struct brw_context struct brw_vs_prog_data *prog_data; int8_t *constant_map; /* variable array following prog_data */ - drm_intel_bo *prog_bo; drm_intel_bo *const_bo; + /** Offset in the program cache to the VS program */ + uint32_t prog_offset; uint32_t state_offset; /** Binding table of pointers to surf_bo entries */ @@ -651,14 +652,16 @@ struct brw_context struct brw_gs_prog_data *prog_data; GLboolean prog_active; + /** Offset in the program cache to the CLIP program pre-gen6 */ + uint32_t prog_offset; uint32_t state_offset; - drm_intel_bo *prog_bo; } gs; struct { struct brw_clip_prog_data *prog_data; - drm_intel_bo *prog_bo; + /** Offset in the program cache to the CLIP program pre-gen6 */ + uint32_t prog_offset; /* Offset in the batch to the CLIP state on pre-gen6. */ uint32_t state_offset; @@ -673,7 +676,8 @@ struct brw_context struct { struct brw_sf_prog_data *prog_data; - drm_intel_bo *prog_bo; + /** Offset in the program cache to the CLIP program pre-gen6 */ + uint32_t prog_offset; uint32_t state_offset; uint32_t vp_offset; } sf; @@ -700,12 +704,14 @@ struct brw_context GLuint sampler_count; uint32_t sampler_offset; + /** Offset in the program cache to the WM program */ + uint32_t prog_offset; + /** Binding table of pointers to surf_bo entries */ uint32_t bind_bo_offset; uint32_t surf_offset[BRW_WM_MAX_SURF]; uint32_t state_offset; /* offset in batchbuffer to pre-gen6 WM state */ - drm_intel_bo *prog_bo; drm_intel_bo *const_bo; /* pull constant buffer. */ /** * This is offset in the batch to the push constants on gen6. @@ -717,9 +723,6 @@ struct brw_context struct { - /* gen4 */ - drm_intel_bo *prog_bo; - uint32_t state_offset; uint32_t blend_state_offset; uint32_t depth_stencil_state_offset; @@ -874,6 +877,26 @@ brw_register_blocks(int reg_count) return ALIGN(reg_count, 16) / 16 - 1; } +static inline uint32_t +brw_program_reloc(struct brw_context *brw, uint32_t state_offset, + uint32_t prog_offset) +{ + struct intel_context *intel = &brw->intel; + + if (intel->gen >= 5) { + /* Using state base address. */ + return prog_offset; + } + + drm_intel_bo_emit_reloc(intel->batch.bo, + state_offset, + brw->cache.bo, + prog_offset, + I915_GEM_DOMAIN_INSTRUCTION, 0); + + return brw->cache.bo->offset + prog_offset; +} + GLboolean brw_do_cubemap_normalize(struct exec_list *instructions); #endif diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 7c73a8fbf0..8580c78fea 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1697,14 +1697,12 @@ brw_fs_precompile(struct gl_context *ctx, struct gl_shader_program *prog) key.program_string_id = bfp->id; - drm_intel_bo *old_prog_bo = brw->wm.prog_bo; + uint32_t old_prog_offset = brw->wm.prog_offset; struct brw_wm_prog_data *old_prog_data = brw->wm.prog_data; - brw->wm.prog_bo = NULL; bool success = do_wm_prog(brw, prog, bfp, &key); - drm_intel_bo_unreference(brw->wm.prog_bo); - brw->wm.prog_bo = old_prog_bo; + brw->wm.prog_offset = old_prog_offset; brw->wm.prog_data = old_prog_data; return success; diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index 001cd62f8c..3171e97d7a 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -121,14 +121,11 @@ static void compile_gs_prog( struct brw_context *brw, printf("\n"); } - /* Upload - */ - drm_intel_bo_unreference(brw->gs.prog_bo); - brw->gs.prog_bo = brw_upload_cache(&brw->cache, BRW_GS_PROG, - &c.key, sizeof(c.key), - program, program_size, - &c.prog_data, sizeof(c.prog_data), - &brw->gs.prog_data); + brw_upload_cache(&brw->cache, BRW_GS_PROG, + &c.key, sizeof(c.key), + program, program_size, + &c.prog_data, sizeof(c.prog_data), + &brw->gs.prog_offset, &brw->gs.prog_data); ralloc_free(mem_ctx); } @@ -189,15 +186,12 @@ static void prepare_gs_prog(struct brw_context *brw) brw->gs.prog_active = key.need_gs_prog; } - drm_intel_bo_unreference(brw->gs.prog_bo); - brw->gs.prog_bo = NULL; - if (brw->gs.prog_active) { - brw->gs.prog_bo = brw_search_cache(&brw->cache, BRW_GS_PROG, - &key, sizeof(key), - &brw->gs.prog_data); - if (brw->gs.prog_bo == NULL) + if (!brw_search_cache(&brw->cache, BRW_GS_PROG, + &key, sizeof(key), + &brw->gs.prog_offset, &brw->gs.prog_data)) { compile_gs_prog( brw, &key ); + } } } diff --git a/src/mesa/drivers/dri/i965/brw_gs_state.c b/src/mesa/drivers/dri/i965/brw_gs_state.c index 542874b770..bbfefcd816 100644 --- a/src/mesa/drivers/dri/i965/brw_gs_state.c +++ b/src/mesa/drivers/dri/i965/brw_gs_state.c @@ -45,12 +45,17 @@ brw_prepare_gs_unit(struct brw_context *brw) memset(gs, 0, sizeof(*gs)); - /* CACHE_NEW_GS_PROG */ + /* BRW_NEW_PROGRAM_CACHE | CACHE_NEW_GS_PROG */ if (brw->gs.prog_active) { gs->thread0.grf_reg_count = (ALIGN(brw->gs.prog_data->total_grf, 16) / 16 - 1); - /* reloc */ - gs->thread0.kernel_start_pointer = brw->gs.prog_bo->offset >> 6; + + gs->thread0.kernel_start_pointer = + brw_program_reloc(brw, + brw->gs.state_offset + + offsetof(struct brw_gs_unit_state, thread0), + brw->gs.prog_offset + + (gs->thread0.grf_reg_count << 1)) >> 6; gs->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; gs->thread1.single_program_flow = 1; @@ -69,13 +74,6 @@ brw_prepare_gs_unit(struct brw_context *brw) gs->thread4.max_threads = 1; else gs->thread4.max_threads = 0; - - /* Emit GS program relocation */ - drm_intel_bo_emit_reloc(intel->batch.bo, - (brw->gs.state_offset + - offsetof(struct brw_gs_unit_state, thread0)), - brw->gs.prog_bo, gs->thread0.grf_reg_count << 1, - I915_GEM_DOMAIN_INSTRUCTION, 0); } if (intel->gen == 5) @@ -91,6 +89,7 @@ const struct brw_tracked_state brw_gs_unit = { .dirty = { .mesa = 0, .brw = (BRW_NEW_BATCH | + BRW_NEW_PROGRAM_CACHE | BRW_NEW_CURBE_OFFSETS | BRW_NEW_URB_FENCE), .cache = CACHE_NEW_GS_PROG diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 1f3b64fd53..b0f95dd66b 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -706,7 +706,9 @@ static void upload_state_base_address( struct brw_context *brw ) I915_GEM_DOMAIN_INSTRUCTION), 0, 1); OUT_BATCH(1); /* Indirect object base address: MEDIA_OBJECT data */ - OUT_BATCH(1); /* Instruction base address: shader kernels (incl. SIP) */ + OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, + 1); /* Instruction base address: shader kernels (incl. SIP) */ + OUT_BATCH(1); /* General state upper bound */ OUT_BATCH(1); /* Dynamic state upper bound */ OUT_BATCH(1); /* Indirect object upper bound */ @@ -719,7 +721,8 @@ static void upload_state_base_address( struct brw_context *brw ) OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, 1); /* Surface state base address */ OUT_BATCH(1); /* Indirect object base address */ - OUT_BATCH(1); /* Instruction base address */ + OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, + 1); /* Instruction base address */ OUT_BATCH(1); /* General state upper bound */ OUT_BATCH(1); /* Indirect object upper bound */ OUT_BATCH(1); /* Instruction access upper bound */ @@ -740,7 +743,8 @@ static void upload_state_base_address( struct brw_context *brw ) const struct brw_tracked_state brw_state_base_address = { .dirty = { .mesa = 0, - .brw = BRW_NEW_BATCH, + .brw = (BRW_NEW_BATCH | + BRW_NEW_PROGRAM_CACHE), .cache = 0, }, .emit = upload_state_base_address diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c index c2227777cf..fca30a74aa 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.c +++ b/src/mesa/drivers/dri/i965/brw_sf.c @@ -120,14 +120,11 @@ static void compile_sf_prog( struct brw_context *brw, printf("\n"); } - /* Upload - */ - drm_intel_bo_unreference(brw->sf.prog_bo); - brw->sf.prog_bo = brw_upload_cache(&brw->cache, BRW_SF_PROG, - &c.key, sizeof(c.key), - program, program_size, - &c.prog_data, sizeof(c.prog_data), - &brw->sf.prog_data); + brw_upload_cache(&brw->cache, BRW_SF_PROG, + &c.key, sizeof(c.key), + program, program_size, + &c.prog_data, sizeof(c.prog_data), + &brw->sf.prog_offset, &brw->sf.prog_data); ralloc_free(mem_ctx); } @@ -191,12 +188,11 @@ static void upload_sf_prog(struct brw_context *brw) key.frontface_ccw = (ctx->Polygon.FrontFace == GL_CCW) ^ (ctx->DrawBuffer->Name != 0); } - drm_intel_bo_unreference(brw->sf.prog_bo); - brw->sf.prog_bo = brw_search_cache(&brw->cache, BRW_SF_PROG, - &key, sizeof(key), - &brw->sf.prog_data); - if (brw->sf.prog_bo == NULL) + if (!brw_search_cache(&brw->cache, BRW_SF_PROG, + &key, sizeof(key), + &brw->sf.prog_offset, &brw->sf.prog_data)) { compile_sf_prog( brw, &key ); + } } diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c index 78b22c4df3..eb3d103099 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_state.c +++ b/src/mesa/drivers/dri/i965/brw_sf_state.c @@ -133,9 +133,14 @@ static void upload_sf_unit( struct brw_context *brw ) memset(sf, 0, sizeof(*sf)); - /* CACHE_NEW_SF_PROG */ + /* BRW_NEW_PROGRAM_CACHE | CACHE_NEW_SF_PROG */ sf->thread0.grf_reg_count = ALIGN(brw->sf.prog_data->total_grf, 16) / 16 - 1; - sf->thread0.kernel_start_pointer = brw->sf.prog_bo->offset >> 6; /* reloc */ + sf->thread0.kernel_start_pointer = + brw_program_reloc(brw, + brw->sf.state_offset + + offsetof(struct brw_sf_unit_state, thread0), + brw->sf.prog_offset + + (sf->thread0.grf_reg_count << 1)) >> 6; sf->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; @@ -282,11 +287,6 @@ static void upload_sf_unit( struct brw_context *brw ) /* STATE_PREFETCH command description describes this state as being * something loaded through the GPE (L2 ISC), so it's INSTRUCTION domain. */ - /* Emit SF program relocation */ - drm_intel_bo_emit_reloc(bo, (brw->sf.state_offset + - offsetof(struct brw_sf_unit_state, thread0)), - brw->sf.prog_bo, sf->thread0.grf_reg_count << 1, - I915_GEM_DOMAIN_INSTRUCTION, 0); /* Emit SF viewport relocation */ drm_intel_bo_emit_reloc(bo, (brw->sf.state_offset + @@ -308,6 +308,7 @@ const struct brw_tracked_state brw_sf_unit = { _NEW_SCISSOR | _NEW_BUFFERS), .brw = (BRW_NEW_BATCH | + BRW_NEW_PROGRAM_CACHE | BRW_NEW_URB_FENCE), .cache = (CACHE_NEW_SF_VP | CACHE_NEW_SF_PROG) diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 544ef7d47e..b384651d8d 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -145,21 +145,21 @@ void brw_clear_validated_bos(struct brw_context *brw); * brw_state_cache.c */ -drm_intel_bo *brw_upload_cache(struct brw_cache *cache, - enum brw_cache_id cache_id, - const void *key, - GLuint key_sz, - const void *data, - GLuint data_sz, - const void *aux, - GLuint aux_sz, - void *aux_return); - -drm_intel_bo *brw_search_cache( struct brw_cache *cache, - enum brw_cache_id cache_id, - const void *key, - GLuint key_size, - void *aux_return); +void brw_upload_cache(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_sz, + const void *data, + GLuint data_sz, + const void *aux, + GLuint aux_sz, + uint32_t *out_offset, void *out_aux); + +bool brw_search_cache(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_size, + uint32_t *inout_offset, void *out_aux); void brw_state_cache_check_size( struct brw_context *brw ); void brw_init_caches( struct brw_context *brw ); diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c index f13a41fa7c..d13711b19b 100644 --- a/src/mesa/drivers/dri/i965/brw_state_cache.c +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c @@ -45,6 +45,7 @@ */ #include "main/imports.h" +#include "intel_batchbuffer.h" #include "brw_state.h" #define FILE_DEBUG_FLAG DEBUG_STATE @@ -67,23 +68,6 @@ hash_key(struct brw_cache_item *item) return hash; } - -/** - * Marks a new buffer as being chosen for the given cache id. - */ -static void -update_cache_last(struct brw_cache *cache, enum brw_cache_id cache_id, - drm_intel_bo *bo) -{ - if (bo == cache->last_bo[cache_id]) - return; /* no change */ - - drm_intel_bo_unreference(cache->last_bo[cache_id]); - cache->last_bo[cache_id] = bo; - drm_intel_bo_reference(cache->last_bo[cache_id]); - cache->brw->state.dirty.cache |= 1 << cache_id; -} - static int brw_cache_item_equals(const struct brw_cache_item *a, const struct brw_cache_item *b) @@ -145,12 +129,13 @@ rehash(struct brw_cache *cache) /** * Returns the buffer object matching cache_id and key, or NULL. */ -drm_intel_bo * +bool brw_search_cache(struct brw_cache *cache, enum brw_cache_id cache_id, const void *key, GLuint key_size, - void *aux_return) + uint32_t *inout_offset, void *out_aux) { + struct brw_context *brw = cache->brw; struct brw_cache_item *item; struct brw_cache_item lookup; GLuint hash; @@ -164,19 +149,45 @@ brw_search_cache(struct brw_cache *cache, item = search_cache(cache, hash, &lookup); if (item == NULL) - return NULL; + return false; - if (aux_return) - *(void **)aux_return = (void *)((char *)item->key + item->key_size); + *(void **)out_aux = ((char *)item->key + item->key_size); - update_cache_last(cache, cache_id, item->bo); + if (item->offset != *inout_offset) { + brw->state.dirty.cache |= (1 << cache_id); + *inout_offset = item->offset; + } - drm_intel_bo_reference(item->bo); - return item->bo; + return true; } +static void +brw_cache_new_bo(struct brw_cache *cache, uint32_t new_size) +{ + struct brw_context *brw = cache->brw; + struct intel_context *intel = &brw->intel; + drm_intel_bo *new_bo; + + new_bo = drm_intel_bo_alloc(intel->bufmgr, "program cache", new_size, 64); -drm_intel_bo * + /* Copy any existing data that needs to be saved. */ + if (cache->next_offset != 0) { + drm_intel_bo_map(cache->bo, false); + drm_intel_bo_subdata(new_bo, 0, cache->next_offset, cache->bo->virtual); + drm_intel_bo_unmap(cache->bo); + } + + drm_intel_bo_unreference(cache->bo); + cache->bo = new_bo; + cache->bo_used_by_gpu = false; + + /* Since we have a new BO in place, we need to signal the units + * that depend on it (state base address on gen5+, or unit state before). + */ + brw->state.dirty.brw |= BRW_NEW_PROGRAM_CACHE; +} + +void brw_upload_cache(struct brw_cache *cache, enum brw_cache_id cache_id, const void *key, @@ -185,12 +196,12 @@ brw_upload_cache(struct brw_cache *cache, GLuint data_size, const void *aux, GLuint aux_size, - void *aux_return) + uint32_t *out_offset, + void *out_aux) { struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item); GLuint hash; void *tmp; - drm_intel_bo *bo; item->cache_id = cache_id; item->key = key; @@ -198,10 +209,28 @@ brw_upload_cache(struct brw_cache *cache, hash = hash_key(item); item->hash = hash; - /* Create the buffer object to contain the data */ - bo = drm_intel_bo_alloc(cache->brw->intel.bufmgr, - cache->name[cache_id], data_size, 1 << 6); + /* Allocate space in the cache BO for our new program. */ + if (cache->next_offset + data_size > cache->bo->size) { + uint32_t new_size = cache->bo->size * 2; + + while (cache->next_offset + data_size > new_size) + new_size *= 2; + + brw_cache_new_bo(cache, new_size); + } + + /* If we would block on writing to an in-use program BO, just + * recreate it. + */ + if (cache->bo_used_by_gpu) { + brw_cache_new_bo(cache, cache->bo->size); + } + + item->offset = cache->next_offset; + item->size = data_size; + /* Programs are always 64-byte aligned, so set up the next one now */ + cache->next_offset = ALIGN(item->offset + data_size, 64); /* Set up the memory containing the key and aux_data */ tmp = malloc(key_size + aux_size); @@ -211,9 +240,6 @@ brw_upload_cache(struct brw_cache *cache, item->key = tmp; - item->bo = bo; - drm_intel_bo_reference(bo); - if (cache->n_items > cache->size * 1.5) rehash(cache); @@ -222,34 +248,18 @@ brw_upload_cache(struct brw_cache *cache, cache->items[hash] = item; cache->n_items++; - if (aux_return) { - *(void **)aux_return = (void *)((char *)item->key + item->key_size); - } - - DBG("upload %s: %d bytes to cache id %d\n", - cache->name[cache_id], - data_size, cache_id); - /* Copy data to the buffer */ - drm_intel_bo_subdata(bo, 0, data_size, data); - - update_cache_last(cache, cache_id, bo); + drm_intel_bo_subdata(cache->bo, item->offset, data_size, data); - return bo; -} - -static void -brw_init_cache_id(struct brw_cache *cache, - const char *name, - enum brw_cache_id id) -{ - cache->name[id] = strdup(name); + *out_offset = item->offset; + *(void **)out_aux = (void *)((char *)item->key + item->key_size); + cache->brw->state.dirty.cache |= 1 << cache_id; } - void brw_init_caches(struct brw_context *brw) { + struct intel_context *intel = &brw->intel; struct brw_cache *cache = &brw->cache; cache->brw = brw; @@ -259,36 +269,15 @@ brw_init_caches(struct brw_context *brw) cache->items = (struct brw_cache_item **) calloc(1, cache->size * sizeof(struct brw_cache_item)); - brw_init_cache_id(cache, "CC_VP", BRW_CC_VP); - brw_init_cache_id(cache, "CC_UNIT", BRW_CC_UNIT); - brw_init_cache_id(cache, "WM_PROG", BRW_WM_PROG); - brw_init_cache_id(cache, "SAMPLER", BRW_SAMPLER); - brw_init_cache_id(cache, "WM_UNIT", BRW_WM_UNIT); - brw_init_cache_id(cache, "SF_PROG", BRW_SF_PROG); - brw_init_cache_id(cache, "SF_VP", BRW_SF_VP); - - brw_init_cache_id(cache, "SF_UNIT", BRW_SF_UNIT); - - brw_init_cache_id(cache, "VS_UNIT", BRW_VS_UNIT); - - brw_init_cache_id(cache, "VS_PROG", BRW_VS_PROG); - - brw_init_cache_id(cache, "CLIP_UNIT", BRW_CLIP_UNIT); - - brw_init_cache_id(cache, "CLIP_PROG", BRW_CLIP_PROG); - brw_init_cache_id(cache, "CLIP_VP", BRW_CLIP_VP); - - brw_init_cache_id(cache, "GS_UNIT", BRW_GS_UNIT); - - brw_init_cache_id(cache, "GS_PROG", BRW_GS_PROG); - brw_init_cache_id(cache, "BLEND_STATE", BRW_BLEND_STATE); - brw_init_cache_id(cache, "COLOR_CALC_STATE", BRW_COLOR_CALC_STATE); - brw_init_cache_id(cache, "DEPTH_STENCIL_STATE", BRW_DEPTH_STENCIL_STATE); + cache->bo = drm_intel_bo_alloc(intel->bufmgr, + "program cache", + 4096, 64); } static void brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) { + struct intel_context *intel = &brw->intel; struct brw_cache_item *c, *next; GLuint i; @@ -297,7 +286,6 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) for (i = 0; i < cache->size; i++) { for (c = cache->items[i]; c; c = next) { next = c->next; - drm_intel_bo_unreference(c->bo); free((void *)c->key); free(c); } @@ -306,9 +294,18 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) cache->n_items = 0; + /* Start putting programs into the start of the BO again, since + * we'll never find the old results. + */ + cache->next_offset = 0; + + /* We need to make sure that the programs get regenerated, since + * any offsets leftover in brw_context will no longer be valid. + */ brw->state.dirty.mesa |= ~0; brw->state.dirty.brw |= ~0; brw->state.dirty.cache |= ~0; + intel_batchbuffer_flush(intel); } void @@ -325,15 +322,10 @@ brw_state_cache_check_size(struct brw_context *brw) static void brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache) { - GLuint i; DBG("%s\n", __FUNCTION__); brw_clear_cache(brw, cache); - for (i = 0; i < BRW_MAX_CACHE; i++) { - drm_intel_bo_unreference(cache->last_bo[i]); - free(cache->name[i]); - } free(cache->items); cache->items = NULL; cache->size = 0; diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c index ff06cb3a91..7a3a88f04f 100644 --- a/src/mesa/drivers/dri/i965/brw_state_dump.c +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -459,21 +459,19 @@ static void dump_blend_state(struct brw_context *brw) } -static void brw_debug_prog(const char *name, drm_intel_bo *prog) +static void brw_debug_prog(struct brw_context *brw, + const char *name, uint32_t prog_offset) { unsigned int i; uint32_t *data; - if (prog == NULL) - return; - - drm_intel_bo_map(prog, GL_FALSE); + drm_intel_bo_map(brw->cache.bo, false); - data = prog->virtual; + data = brw->cache.bo->virtual + prog_offset; - for (i = 0; i < prog->size / 4 / 4; i++) { + for (i = 0; i < brw->cache.bo->size / 4 / 4; i++) { fprintf(stderr, "%8s: 0x%08x: 0x%08x 0x%08x 0x%08x 0x%08x\n", - name, (unsigned int)prog->offset + i * 4 * 4, + name, (unsigned int)brw->cache.bo->offset + i * 4 * 4, data[i * 4], data[i * 4 + 1], data[i * 4 + 2], data[i * 4 + 3]); /* Stop at the end of the program. It'd be nice to keep track of the actual * intended program size instead of guessing like this. @@ -485,7 +483,7 @@ static void brw_debug_prog(const char *name, drm_intel_bo *prog) break; } - drm_intel_bo_unmap(prog); + drm_intel_bo_unmap(brw->cache.bo); } @@ -518,17 +516,19 @@ void brw_debug_batch(struct intel_context *intel) if (intel->gen < 6) state_struct_out("VS", intel->batch.bo, brw->vs.state_offset, sizeof(struct brw_vs_unit_state)); - brw_debug_prog("VS prog", brw->vs.prog_bo); + brw_debug_prog(brw, "VS prog", brw->vs.prog_offset); if (intel->gen < 6) state_struct_out("GS", intel->batch.bo, brw->gs.state_offset, sizeof(struct brw_gs_unit_state)); - brw_debug_prog("GS prog", brw->gs.prog_bo); + if (brw->gs.prog_active) { + brw_debug_prog(brw, "GS prog", brw->gs.prog_offset); + } if (intel->gen < 6) { state_struct_out("SF", intel->batch.bo, brw->sf.state_offset, sizeof(struct brw_sf_unit_state)); - brw_debug_prog("SF prog", brw->sf.prog_bo); + brw_debug_prog(brw, "SF prog", brw->sf.prog_offset); } if (intel->gen >= 7) dump_sf_clip_viewport_state(brw); @@ -540,7 +540,7 @@ void brw_debug_batch(struct intel_context *intel) if (intel->gen < 6) state_struct_out("WM", intel->batch.bo, brw->wm.state_offset, sizeof(struct brw_wm_unit_state)); - brw_debug_prog("WM prog", brw->wm.prog_bo); + brw_debug_prog(brw, "WM prog", brw->wm.prog_offset); if (intel->gen >= 6) { dump_cc_viewport_state(brw); diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 6a4c112dcf..50ab490f33 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -47,11 +47,11 @@ static const struct brw_tracked_state *gen4_atoms[] = &brw_check_fallback, &brw_wm_input_sizes, - &brw_vs_prog, - &brw_gs_prog, - &brw_clip_prog, - &brw_sf_prog, - &brw_wm_prog, + &brw_vs_prog, /* must do before GS prog, state base address. */ + &brw_gs_prog, /* must do before state base address */ + &brw_clip_prog, /* must do before state base address */ + &brw_sf_prog, /* must do before state base address */ + &brw_wm_prog, /* must do before state base address */ /* Once all the programs are done, we know how large urb entry * sizes need to be and can decide if we need to change the urb @@ -110,9 +110,9 @@ static const struct brw_tracked_state *gen6_atoms[] = &brw_check_fallback, &brw_wm_input_sizes, - &brw_vs_prog, - &brw_gs_prog, - &brw_wm_prog, + &brw_vs_prog, /* must do before state base address */ + &brw_gs_prog, /* must do before state base address */ + &brw_wm_prog, /* must do before state base address */ &gen6_clip_vp, &gen6_sf_vp, @@ -365,6 +365,7 @@ static struct dirty_bit_map brw_bits[] = { DEFINE_BIT(BRW_NEW_PRIMITIVE), DEFINE_BIT(BRW_NEW_CONTEXT), DEFINE_BIT(BRW_NEW_WM_INPUT_DIMENSIONS), + DEFINE_BIT(BRW_NEW_PROGRAM_CACHE), DEFINE_BIT(BRW_NEW_PSP), DEFINE_BIT(BRW_NEW_WM_SURFACES), DEFINE_BIT(BRW_NEW_INDICES), diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index 80d5e78ed0..a9ad5311fe 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -105,12 +105,11 @@ static void do_vs_prog( struct brw_context *brw, /* constant_map */ aux_size += c.vp->program.Base.Parameters->NumParameters; - drm_intel_bo_unreference(brw->vs.prog_bo); - brw->vs.prog_bo = brw_upload_cache(&brw->cache, BRW_VS_PROG, - &c.key, sizeof(c.key), - program, program_size, - &c.prog_data, aux_size, - &brw->vs.prog_data); + brw_upload_cache(&brw->cache, BRW_VS_PROG, + &c.key, sizeof(c.key), + program, program_size, + &c.prog_data, aux_size, + &brw->vs.prog_offset, &brw->vs.prog_data); ralloc_free(mem_ctx); } @@ -153,14 +152,11 @@ static void brw_upload_vs_prog(struct brw_context *brw) } } - /* Make an early check for the key. - */ - drm_intel_bo_unreference(brw->vs.prog_bo); - brw->vs.prog_bo = brw_search_cache(&brw->cache, BRW_VS_PROG, - &key, sizeof(key), - &brw->vs.prog_data); - if (brw->vs.prog_bo == NULL) + if (!brw_search_cache(&brw->cache, BRW_VS_PROG, + &key, sizeof(key), + &brw->vs.prog_offset, &brw->vs.prog_data)) { do_vs_prog(brw, vp, &key); + } brw->vs.constant_map = ((int8_t *)brw->vs.prog_data + sizeof(*brw->vs.prog_data)); } diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c index 1eee5b7e5d..185020c816 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_state.c @@ -58,8 +58,14 @@ brw_prepare_vs_unit(struct brw_context *brw) vs = brw_state_batch(brw, sizeof(*vs), 32, &brw->vs.state_offset); memset(vs, 0, sizeof(*vs)); - /* CACHE_NEW_VS_PROG */ - vs->thread0.kernel_start_pointer = brw->vs.prog_bo->offset >> 6; /* reloc */ + /* BRW_NEW_PROGRAM_CACHE | CACHE_NEW_VS_PROG */ + vs->thread0.kernel_start_pointer = + brw_program_reloc(brw, + brw->vs.state_offset + + offsetof(struct brw_vs_unit_state, thread0), + brw->vs.prog_offset + + (vs->thread0.grf_reg_count << 1)) >> 6; + vs->thread0.grf_reg_count = ALIGN(brw->vs.prog_data->total_grf, 16) / 16 - 1; vs->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; /* Choosing multiple program flow means that we may get 2-vertex threads, @@ -152,13 +158,6 @@ brw_prepare_vs_unit(struct brw_context *brw) */ vs->vs6.vs_enable = 1; - /* Emit VS program relocation */ - drm_intel_bo_emit_reloc(intel->batch.bo, (brw->vs.state_offset + - offsetof(struct brw_vs_unit_state, - thread0)), - brw->vs.prog_bo, vs->thread0.grf_reg_count << 1, - I915_GEM_DOMAIN_INSTRUCTION, 0); - brw->state.dirty.cache |= CACHE_NEW_VS_UNIT; } @@ -166,6 +165,7 @@ const struct brw_tracked_state brw_vs_unit = { .dirty = { .mesa = _NEW_TRANSFORM, .brw = (BRW_NEW_BATCH | + BRW_NEW_PROGRAM_CACHE | BRW_NEW_CURBE_OFFSETS | BRW_NEW_NR_VS_SURFACES | BRW_NEW_URB_FENCE), diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c index 236c4d297d..0f73148262 100644 --- a/src/mesa/drivers/dri/i965/brw_vtbl.c +++ b/src/mesa/drivers/dri/i965/brw_vtbl.c @@ -69,14 +69,8 @@ static void brw_destroy_context( struct intel_context *intel ) ralloc_free(brw->wm.compile_data); dri_bo_release(&brw->curbe.curbe_bo); - dri_bo_release(&brw->vs.prog_bo); dri_bo_release(&brw->vs.const_bo); - dri_bo_release(&brw->gs.prog_bo); - dri_bo_release(&brw->clip.prog_bo); - dri_bo_release(&brw->sf.prog_bo); - dri_bo_release(&brw->wm.prog_bo); dri_bo_release(&brw->wm.const_bo); - dri_bo_release(&brw->cc.prog_bo); free(brw->curbe.last_buf); free(brw->curbe.next_buf); @@ -125,6 +119,12 @@ static void brw_new_batch( struct intel_context *intel ) brw->state.dirty.brw |= BRW_NEW_CONTEXT | BRW_NEW_BATCH; brw->vb.nr_current_buffers = 0; + + /* Mark that the current program cache BO has been used by the GPU. + * It will be reallocated if we need to put new programs in for the + * next batch. + */ + brw->cache.bo_used_by_gpu = true; } static void brw_invalidate_state( struct intel_context *intel, GLuint new_state ) diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 1aebd12df4..f1c9985290 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -273,12 +273,11 @@ bool do_wm_prog(struct brw_context *brw, */ program = brw_get_program(&c->func, &program_size); - drm_intel_bo_unreference(brw->wm.prog_bo); - brw->wm.prog_bo = brw_upload_cache(&brw->cache, BRW_WM_PROG, - &c->key, sizeof(c->key), - program, program_size, - &c->prog_data, sizeof(c->prog_data), - &brw->wm.prog_data); + brw_upload_cache(&brw->cache, BRW_WM_PROG, + &c->key, sizeof(c->key), + program, program_size, + &c->prog_data, sizeof(c->prog_data), + &brw->wm.prog_offset, &brw->wm.prog_data); return true; } @@ -477,13 +476,9 @@ static void brw_prepare_wm_prog(struct brw_context *brw) brw_wm_populate_key(brw, &key); - /* Make an early check for the key. - */ - drm_intel_bo_unreference(brw->wm.prog_bo); - brw->wm.prog_bo = brw_search_cache(&brw->cache, BRW_WM_PROG, - &key, sizeof(key), - &brw->wm.prog_data); - if (brw->wm.prog_bo == NULL) { + if (!brw_search_cache(&brw->cache, BRW_WM_PROG, + &key, sizeof(key), + &brw->wm.prog_offset, &brw->wm.prog_data)) { bool success = do_wm_prog(brw, ctx->Shader.CurrentFragmentProgram, fp, &key); assert(success); diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index ef98f8126d..506e2bdff5 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -90,13 +90,25 @@ brw_prepare_wm_unit(struct brw_context *brw) brw->wm.prog_data->first_curbe_grf_16); } - /* CACHE_NEW_WM_PROG */ + /* BRW_NEW_PROGRAM_CACHE | CACHE_NEW_WM_PROG */ wm->thread0.grf_reg_count = brw->wm.prog_data->reg_blocks; wm->wm9.grf_reg_count_2 = brw->wm.prog_data->reg_blocks_16; - wm->thread0.kernel_start_pointer = brw->wm.prog_bo->offset >> 6; /* reloc */ - /* reloc */ - wm->wm9.kernel_start_pointer_2 = (brw->wm.prog_bo->offset + - brw->wm.prog_data->prog_offset_16) >> 6; + + wm->thread0.kernel_start_pointer = + brw_program_reloc(brw, + brw->wm.state_offset + + offsetof(struct brw_wm_unit_state, thread0), + brw->wm.prog_offset + + (wm->thread0.grf_reg_count << 1)) >> 6; + + wm->wm9.kernel_start_pointer_2 = + brw_program_reloc(brw, + brw->wm.state_offset + + offsetof(struct brw_wm_unit_state, wm9), + brw->wm.prog_offset + + brw->wm.prog_data->prog_offset_16 + + (wm->wm9.grf_reg_count_2 << 1)) >> 6; + wm->thread1.depth_coef_urb_read_offset = 1; wm->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; @@ -214,23 +226,6 @@ brw_prepare_wm_unit(struct brw_context *brw) if (unlikely(INTEL_DEBUG & DEBUG_STATS) || intel->stats_wm) wm->wm4.stats_enable = 1; - /* Emit WM program relocation */ - drm_intel_bo_emit_reloc(intel->batch.bo, - brw->wm.state_offset + - offsetof(struct brw_wm_unit_state, thread0), - brw->wm.prog_bo, wm->thread0.grf_reg_count << 1, - I915_GEM_DOMAIN_INSTRUCTION, 0); - - if (brw->wm.prog_data->prog_offset_16) { - drm_intel_bo_emit_reloc(intel->batch.bo, - brw->wm.state_offset + - offsetof(struct brw_wm_unit_state, wm9), - brw->wm.prog_bo, - ((wm->wm9.grf_reg_count_2 << 1) + - brw->wm.prog_data->prog_offset_16), - I915_GEM_DOMAIN_INSTRUCTION, 0); - } - /* Emit scratch space relocation */ if (brw->wm.prog_data->total_scratch != 0) { drm_intel_bo_emit_reloc(intel->batch.bo, @@ -265,6 +260,7 @@ const struct brw_tracked_state brw_wm_unit = { _NEW_BUFFERS), .brw = (BRW_NEW_BATCH | + BRW_NEW_PROGRAM_CACHE | BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_CURBE_OFFSETS | BRW_NEW_NR_WM_SURFACES), diff --git a/src/mesa/drivers/dri/i965/gen6_gs_state.c b/src/mesa/drivers/dri/i965/gen6_gs_state.c index c1d0a73939..e73e7824af 100644 --- a/src/mesa/drivers/dri/i965/gen6_gs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_gs_state.c @@ -45,7 +45,7 @@ upload_gs_state(struct brw_context *brw) ADVANCE_BATCH(); // GS should never be used on Gen6. Disable it. - assert(brw->gs.prog_bo == NULL); + assert(!brw->gs.prog_active); BEGIN_BATCH(7); OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2)); OUT_BATCH(0); /* prog_bo */ diff --git a/src/mesa/drivers/dri/i965/gen6_urb.c b/src/mesa/drivers/dri/i965/gen6_urb.c index 62645a6a30..b4105111c8 100644 --- a/src/mesa/drivers/dri/i965/gen6_urb.c +++ b/src/mesa/drivers/dri/i965/gen6_urb.c @@ -64,7 +64,7 @@ upload_urb(struct brw_context *brw) assert(brw->urb.nr_vs_entries % 4 == 0); assert(brw->urb.nr_gs_entries % 4 == 0); /* GS requirement */ - assert(!brw->gs.prog_bo || brw->urb.vs_size < 5); + assert(!brw->gs.prog_active || brw->urb.vs_size < 5); BEGIN_BATCH(3); OUT_BATCH(_3DSTATE_URB << 16 | (3 - 2)); diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c index b46368e36e..7838a91d8d 100644 --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c @@ -147,7 +147,7 @@ upload_vs_state(struct brw_context *brw) BEGIN_BATCH(6); OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2)); - OUT_RELOC(brw->vs.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_BATCH(brw->vs.prog_offset); OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) | GEN6_VS_FLOATING_POINT_MODE_ALT | (brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c index 43e651db3e..024a1d879e 100644 --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c @@ -183,7 +183,7 @@ upload_wm_state(struct brw_context *brw) BEGIN_BATCH(9); OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2)); - OUT_RELOC(brw->wm.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_BATCH(brw->wm.prog_offset); OUT_BATCH(dw2); if (brw->wm.prog_data->total_scratch) { OUT_RELOC(brw->wm.scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, @@ -195,12 +195,8 @@ upload_wm_state(struct brw_context *brw) OUT_BATCH(dw5); OUT_BATCH(dw6); OUT_BATCH(0); /* kernel 1 pointer */ - if (brw->wm.prog_data->prog_offset_16) { - OUT_RELOC(brw->wm.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, - brw->wm.prog_data->prog_offset_16); - } else { - OUT_BATCH(0); /* kernel 2 pointer */ - } + /* kernel 2 pointer */ + OUT_BATCH(brw->wm.prog_offset + brw->wm.prog_data->prog_offset_16); ADVANCE_BATCH(); } diff --git a/src/mesa/drivers/dri/i965/gen7_disable.c b/src/mesa/drivers/dri/i965/gen7_disable.c index 4e9461739d..a44d31596b 100644 --- a/src/mesa/drivers/dri/i965/gen7_disable.c +++ b/src/mesa/drivers/dri/i965/gen7_disable.c @@ -31,7 +31,7 @@ disable_stages(struct brw_context *brw) { struct intel_context *intel = &brw->intel; - assert(brw->gs.prog_bo == NULL); + assert(!brw->gs.prog_active); /* Disable the Geometry Shader (GS) Unit */ BEGIN_BATCH(7); diff --git a/src/mesa/drivers/dri/i965/gen7_urb.c b/src/mesa/drivers/dri/i965/gen7_urb.c index 3a614693df..b36d780ed4 100644 --- a/src/mesa/drivers/dri/i965/gen7_urb.c +++ b/src/mesa/drivers/dri/i965/gen7_urb.c @@ -78,7 +78,7 @@ upload_urb(struct brw_context *brw) assert(brw->urb.nr_vs_entries % 8 == 0); assert(brw->urb.nr_gs_entries % 8 == 0); /* GS requirement */ - assert(!brw->gs.prog_bo); + assert(brw->gs.prog_active); BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_VS << 16 | (2 - 2)); diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c index ae7a1d6c35..0fad3d2fb6 100644 --- a/src/mesa/drivers/dri/i965/gen7_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c @@ -67,7 +67,7 @@ upload_vs_state(struct brw_context *brw) BEGIN_BATCH(6); OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2)); - OUT_RELOC(brw->vs.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_BATCH(brw->vs.prog_offset); OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) | GEN6_VS_FLOATING_POINT_MODE_ALT | (brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c index 6a64eb8a2d..ac6ba2fed1 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c @@ -227,18 +227,13 @@ upload_ps_state(struct brw_context *brw) BEGIN_BATCH(8); OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2)); - OUT_RELOC(brw->wm.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_BATCH(brw->wm.prog_offset); OUT_BATCH(dw2); OUT_BATCH(0); /* scratch space base offset */ OUT_BATCH(dw4); OUT_BATCH(dw5); OUT_BATCH(0); /* kernel 1 pointer */ - if (brw->wm.prog_data->prog_offset_16) { - OUT_RELOC(brw->wm.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, - brw->wm.prog_data->prog_offset_16); - } else { - OUT_BATCH(0); /* kernel 2 pointer */ - } + OUT_BATCH(brw->wm.prog_offset + brw->wm.prog_data->prog_offset_16); ADVANCE_BATCH(); } |