diff options
author | Luc Verhaegen <libv@skynet.be> | 2010-03-13 02:36:00 +0100 |
---|---|---|
committer | Luc Verhaegen <libv@skynet.be> | 2010-03-13 02:36:00 +0100 |
commit | fedcb3219e8f9a587c693bbb2178ec3e83bf0320 (patch) | |
tree | b37f142039934c27eb13d9ff2344776d7f92bff6 /i965/brw_wm_state.c | |
parent | 6e23622cb869c14d82f8c901c4bbea80ded6220e (diff) |
Import i915 and i965 dri drivers from mesa 7.1.0.7.1.0
Diffstat (limited to 'i965/brw_wm_state.c')
-rw-r--r-- | i965/brw_wm_state.c | 283 |
1 files changed, 192 insertions, 91 deletions
diff --git a/i965/brw_wm_state.c b/i965/brw_wm_state.c index 5b4f2ab..f4da0f2 100644 --- a/i965/brw_wm_state.c +++ b/i965/brw_wm_state.c @@ -34,109 +34,136 @@ #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" -#include "bufmgr.h" +#include "dri_bufmgr.h" +#include "brw_wm.h" /*********************************************************************** * WM unit - fragment programs and rasterization */ -static void invalidate_scratch_cb( struct intel_context *intel, - void *unused ) -{ - /* nothing */ -} +struct brw_wm_unit_key { + unsigned int total_grf, total_scratch; + unsigned int urb_entry_read_length; + unsigned int curb_entry_read_length; + unsigned int dispatch_grf_start_reg; + + unsigned int curbe_offset; + unsigned int urb_size; + + unsigned int max_threads; + unsigned int nr_surfaces, sampler_count; + GLboolean uses_depth, computes_depth, uses_kill, is_glsl; + GLboolean polygon_stipple, stats_wm, line_stipple, offset_enable; + GLfloat offset_units, offset_factor; +}; -static void upload_wm_unit(struct brw_context *brw ) +static void +wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) { + const struct gl_fragment_program *fp = brw->fragment_program; struct intel_context *intel = &brw->intel; - struct brw_wm_unit_state wm; - GLuint max_threads; + + memset(key, 0, sizeof(*key)); if (INTEL_DEBUG & DEBUG_SINGLE_THREAD) - max_threads = 0; + key->max_threads = 1; else - max_threads = 31; - - - memset(&wm, 0, sizeof(wm)); + key->max_threads = 32; /* CACHE_NEW_WM_PROG */ - wm.thread0.grf_reg_count = ((brw->wm.prog_data->total_grf-1) & ~15) / 16; - wm.thread0.kernel_start_pointer = brw->wm.prog_gs_offset >> 6; - wm.thread3.dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf; - wm.thread3.urb_entry_read_length = brw->wm.prog_data->urb_read_length; - wm.thread3.const_urb_entry_read_length = brw->wm.prog_data->curb_read_length; - - wm.wm5.max_threads = max_threads; - - if (brw->wm.prog_data->total_scratch) { - GLuint per_thread = (brw->wm.prog_data->total_scratch + 1023) / 1024; - GLuint total = per_thread * (max_threads + 1); - - /* Scratch space -- just have to make sure there is sufficient - * allocated for the active program and current number of threads. - */ - - if (!brw->wm.scratch_buffer) { - bmGenBuffers(intel, "wm scratch", 1, &brw->wm.scratch_buffer, 12); - bmBufferSetInvalidateCB(intel, - brw->wm.scratch_buffer, - invalidate_scratch_cb, - NULL, - GL_FALSE); - } + key->total_grf = brw->wm.prog_data->total_grf; + key->urb_entry_read_length = brw->wm.prog_data->urb_read_length; + key->curb_entry_read_length = brw->wm.prog_data->curb_read_length; + key->dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf; + key->total_scratch = ALIGN(brw->wm.prog_data->total_scratch, 1024); - if (total > brw->wm.scratch_buffer_size) { - brw->wm.scratch_buffer_size = total; - bmBufferData(intel, - brw->wm.scratch_buffer, - brw->wm.scratch_buffer_size, - NULL, - 0); - } - - assert(per_thread <= 12 * 1024); - wm.thread2.per_thread_scratch_space = (per_thread / 1024) - 1; + /* BRW_NEW_URB_FENCE */ + key->urb_size = brw->urb.vsize; - /* XXX: could make this dynamic as this is so rarely active: - */ - /* BRW_NEW_LOCK */ - wm.thread2.scratch_space_base_pointer = - bmBufferOffset(intel, brw->wm.scratch_buffer) >> 10; - } + /* BRW_NEW_CURBE_OFFSETS */ + key->curbe_offset = brw->curbe.wm_start; /* CACHE_NEW_SURFACE */ - wm.thread1.binding_table_entry_count = brw->wm.nr_surfaces; + key->nr_surfaces = brw->wm.nr_surfaces; - /* BRW_NEW_CURBE_OFFSETS */ - wm.thread3.const_urb_entry_read_offset = brw->curbe.wm_start * 2; + /* CACHE_NEW_SAMPLER */ + key->sampler_count = brw->wm.sampler_count; - wm.thread3.urb_entry_read_offset = 0; + /* _NEW_POLYGONSTIPPLE */ + key->polygon_stipple = brw->attribs.Polygon->StippleFlag; + + /* BRW_NEW_FRAGMENT_PROGRAM */ + key->uses_depth = (fp->Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) != 0; + + /* as far as we can tell */ + key->computes_depth = + (fp->Base.OutputsWritten & (1 << FRAG_RESULT_DEPR)) != 0; + + /* _NEW_COLOR */ + key->uses_kill = fp->UsesKill || brw->attribs.Color->AlphaEnabled; + key->is_glsl = brw_wm_is_glsl(fp); + + /* XXX: This needs a flag to indicate when it changes. */ + key->stats_wm = intel->stats_wm; + + /* _NEW_LINE */ + key->line_stipple = brw->attribs.Line->StippleFlag; + + /* _NEW_POLYGON */ + key->offset_enable = brw->attribs.Polygon->OffsetFill; + key->offset_units = brw->attribs.Polygon->OffsetUnits; + key->offset_factor = brw->attribs.Polygon->OffsetFactor; +} + +static dri_bo * +wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, + dri_bo **reloc_bufs) +{ + struct brw_wm_unit_state wm; + dri_bo *bo; + + memset(&wm, 0, sizeof(wm)); + + wm.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1; + wm.thread0.kernel_start_pointer = brw->wm.prog_bo->offset >> 6; /* reloc */ wm.thread1.depth_coef_urb_read_offset = 1; wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + wm.thread1.binding_table_entry_count = key->nr_surfaces; + + if (key->total_scratch != 0) { + wm.thread2.scratch_space_base_pointer = + brw->wm.scratch_buffer->offset >> 10; /* reloc */ + wm.thread2.per_thread_scratch_space = key->total_scratch / 1024 - 1; + } else { + wm.thread2.scratch_space_base_pointer = 0; + wm.thread2.per_thread_scratch_space = 0; + } - /* CACHE_NEW_SAMPLER */ - wm.wm4.sampler_count = (brw->wm.sampler_count + 1) / 4; - wm.wm4.sampler_state_pointer = brw->wm.sampler_gs_offset >> 5; + wm.thread3.dispatch_grf_start_reg = key->dispatch_grf_start_reg; + wm.thread3.urb_entry_read_length = key->urb_entry_read_length; + wm.thread3.const_urb_entry_read_length = key->curb_entry_read_length; + wm.thread3.const_urb_entry_read_offset = key->curbe_offset * 2; + wm.thread3.urb_entry_read_offset = 0; - /* BRW_NEW_FRAGMENT_PROGRAM */ - { - const struct gl_fragment_program *fp = brw->fragment_program; - - if (fp->Base.InputsRead & (1<<FRAG_ATTRIB_WPOS)) - wm.wm5.program_uses_depth = 1; /* as far as we can tell */ - - if (fp->Base.OutputsWritten & (1<<FRAG_RESULT_DEPR)) - wm.wm5.program_computes_depth = 1; - - /* _NEW_COLOR */ - if (fp->UsesKill || - brw->attribs.Color->AlphaEnabled) - wm.wm5.program_uses_killpixel = 1; + wm.wm4.sampler_count = (key->sampler_count + 1) / 4; + if (brw->wm.sampler_bo != NULL) { + /* reloc */ + wm.wm4.sampler_state_pointer = brw->wm.sampler_bo->offset >> 5; + } else { + wm.wm4.sampler_state_pointer = 0; } - wm.wm5.enable_16_pix = 1; + wm.wm5.program_uses_depth = key->uses_depth; + wm.wm5.program_computes_depth = key->computes_depth; + wm.wm5.program_uses_killpixel = key->uses_kill; + + if (key->is_glsl) + wm.wm5.enable_8_pix = 1; + else + wm.wm5.enable_16_pix = 1; + + wm.wm5.max_threads = key->max_threads - 1; wm.wm5.thread_dispatch_enable = 1; /* AKA: color_write */ wm.wm5.legacy_line_rast = 0; wm.wm5.legacy_global_depth_bias = 0; @@ -144,34 +171,108 @@ static void upload_wm_unit(struct brw_context *brw ) wm.wm5.line_aa_region_width = 0; wm.wm5.line_endcap_aa_region_width = 1; - /* _NEW_POLYGONSTIPPLE */ - if (brw->attribs.Polygon->StippleFlag) - wm.wm5.polygon_stipple = 1; + wm.wm5.polygon_stipple = key->polygon_stipple; - /* _NEW_POLYGON */ - if (brw->attribs.Polygon->OffsetFill) { + if (key->offset_enable) { wm.wm5.depth_offset = 1; /* Something wierd going on with legacy_global_depth_bias, * offset_constant, scaling and MRD. This value passes glean * but gives some odd results elsewere (eg. the * quad-offset-units test). */ - wm.global_depth_offset_constant = brw->attribs.Polygon->OffsetUnits * 2; + wm.global_depth_offset_constant = key->offset_units * 2; /* This is the only value that passes glean: */ - wm.global_depth_offset_scale = brw->attribs.Polygon->OffsetFactor; + wm.global_depth_offset_scale = key->offset_factor; } - /* _NEW_LINE */ - if (brw->attribs.Line->StippleFlag) { - wm.wm5.line_stipple = 1; - } + wm.wm5.line_stipple = key->line_stipple; - if (INTEL_DEBUG & DEBUG_STATS || intel->stats_wm) + if (INTEL_DEBUG & DEBUG_STATS || key->stats_wm) wm.wm4.stats_enable = 1; - brw->wm.state_gs_offset = brw_cache_data( &brw->cache[BRW_WM_UNIT], &wm ); + bo = brw_upload_cache(&brw->cache, BRW_WM_UNIT, + key, sizeof(*key), + reloc_bufs, 3, + &wm, sizeof(wm), + NULL, NULL); + + /* Emit WM program relocation */ + dri_emit_reloc(bo, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + wm.thread0.grf_reg_count << 1, + offsetof(struct brw_wm_unit_state, thread0), + brw->wm.prog_bo); + + /* Emit scratch space relocation */ + if (key->total_scratch != 0) { + dri_emit_reloc(bo, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE, + wm.thread2.per_thread_scratch_space, + offsetof(struct brw_wm_unit_state, thread2), + brw->wm.scratch_buffer); + } + + /* Emit sampler state relocation */ + if (key->sampler_count != 0) { + dri_emit_reloc(bo, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + wm.wm4.stats_enable | (wm.wm4.sampler_count << 2), + offsetof(struct brw_wm_unit_state, wm4), + brw->wm.sampler_bo); + } + + return bo; +} + + +static int upload_wm_unit( struct brw_context *brw ) +{ + struct intel_context *intel = &brw->intel; + struct brw_wm_unit_key key; + dri_bo *reloc_bufs[3]; + int ret = 0, i; + wm_unit_populate_key(brw, &key); + + /* Allocate the necessary scratch space if we haven't already. Don't + * bother reducing the allocation later, since we use scratch so + * rarely. + */ + assert(key.total_scratch <= 12 * 1024); + if (key.total_scratch) { + GLuint total = key.total_scratch * key.max_threads; + + if (brw->wm.scratch_buffer && total > brw->wm.scratch_buffer->size) { + dri_bo_unreference(brw->wm.scratch_buffer); + brw->wm.scratch_buffer = NULL; + } + if (brw->wm.scratch_buffer == NULL) { + brw->wm.scratch_buffer = dri_bo_alloc(intel->bufmgr, + "wm scratch", + total, + 4096, DRM_BO_FLAG_MEM_TT); + } + } + + reloc_bufs[0] = brw->wm.prog_bo; + reloc_bufs[1] = brw->wm.scratch_buffer; + reloc_bufs[2] = brw->wm.sampler_bo; + + dri_bo_unreference(brw->wm.state_bo); + brw->wm.state_bo = brw_search_cache(&brw->cache, BRW_WM_UNIT, + &key, sizeof(key), + reloc_bufs, 3, + NULL); + if (brw->wm.state_bo == NULL) { + brw->wm.state_bo = wm_unit_create_from_key(brw, &key, reloc_bufs); + } + + for (i = 0; i < 3; i++) + if (reloc_bufs[i]) + ret |= dri_bufmgr_check_aperture_space(reloc_bufs[i]); + ret |= dri_bufmgr_check_aperture_space(brw->wm.state_bo); + return ret; } const struct brw_tracked_state brw_wm_unit = { @@ -189,6 +290,6 @@ const struct brw_tracked_state brw_wm_unit = { CACHE_NEW_WM_PROG | CACHE_NEW_SAMPLER) }, - .update = upload_wm_unit + .prepare = upload_wm_unit, }; |