summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTopi Pohjolainen <topi.pohjolainen@intel.com>2015-06-25 14:00:41 +0300
committerTopi Pohjolainen <topi.pohjolainen@intel.com>2015-08-13 13:37:49 +0300
commit1bba29ed403e735ba0bf04ed8aa2e571884fcaaf (patch)
tree578d9a1434bf1525c81b943cd98920850af41e14
parent12a66d91f6b0beff123fb6fd8a4f3c3796379532 (diff)
i965: Stop aux data compare preventing program binary re-use
Items in the program cache consist of three things: key, the data representing the instructions and auxiliary data representing uniform storage. The data consisting of instructions is stored into a drm buffer object while the key and the auxiliary data reside in malloced section. Now the cache uploading is equipped with a check that iterates over existing items and seeks to find a another item using identical instruction data than the one being just uploaded. If such is found there is no need to add another section into the drm buffer object holding identical copy of the existing one. The item just being uploaded should instead simply point to the same offset in the underlying drm buffer object. Unfortunately the check for the matching instruction data is coupled with a check for matching auxiliary data also. This effectively prevents the cache from ever containing two items that could share a section in the drm buffer object. The constraint for the instruction data and auxiliary data to match is, fortunately, unnecessary strong. When items are stored into the cache they will anyway contain their own copy of the auxiliary data (even if they matched - which they in real world never will). The only thing the items would be sharing is the instruction data and hence we should only check for that to match and nothing else. No piglit regression in jenkins. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Signed-off-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_cache.c52
1 files changed, 20 insertions, 32 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c
index 61439a89f8..9e00c83740 100644
--- a/src/mesa/drivers/dri/i965/brw_state_cache.c
+++ b/src/mesa/drivers/dri/i965/brw_state_cache.c
@@ -200,36 +200,23 @@ brw_cache_new_bo(struct brw_cache *cache, uint32_t new_size)
}
/**
- * Attempts to find an item in the cache with identical data and aux
- * data to use
+ * Attempts to find an item in the cache with identical data.
*/
-static bool
-brw_try_upload_using_copy(struct brw_cache *cache,
- struct brw_cache_item *result_item,
- const void *data,
- const void *aux)
+static const struct brw_cache_item *
+brw_lookup_prog(const struct brw_cache *cache,
+ enum brw_cache_id cache_id,
+ const void *data, unsigned data_size)
{
- struct brw_context *brw = cache->brw;
+ const struct brw_context *brw = cache->brw;
int i;
- struct brw_cache_item *item;
+ const struct brw_cache_item *item;
for (i = 0; i < cache->size; i++) {
for (item = cache->items[i]; item; item = item->next) {
- const void *item_aux = item->key + item->key_size;
int ret;
- if (item->cache_id != result_item->cache_id ||
- item->size != result_item->size ||
- item->aux_size != result_item->aux_size) {
- continue;
- }
-
- if (cache->aux_compare[result_item->cache_id]) {
- if (!cache->aux_compare[result_item->cache_id](item_aux, aux))
- continue;
- } else if (memcmp(item_aux, aux, item->aux_size) != 0) {
+ if (item->cache_id != cache_id || item->size != data_size)
continue;
- }
if (!brw->has_llc)
drm_intel_bo_map(cache->bo, false);
@@ -239,13 +226,11 @@ brw_try_upload_using_copy(struct brw_cache *cache,
if (ret)
continue;
- result_item->offset = item->offset;
-
- return true;
+ return item;
}
}
- return false;
+ return NULL;
}
static uint32_t
@@ -294,6 +279,8 @@ brw_upload_cache(struct brw_cache *cache,
{
struct brw_context *brw = cache->brw;
struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item);
+ const struct brw_cache_item *matching_data =
+ brw_lookup_prog(cache, cache_id, data, data_size);
GLuint hash;
void *tmp;
@@ -305,14 +292,15 @@ brw_upload_cache(struct brw_cache *cache,
hash = hash_key(item);
item->hash = hash;
- /* If we can find a matching prog/prog_data combo in the cache
- * already, then reuse the existing stuff. This will mean not
- * flagging CACHE_NEW_* when transitioning between the two
- * equivalent hash keys. This is notably useful for programs
- * generating shaders at runtime, where multiple shaders may
- * compile to the thing in our backend.
+ /* If we can find a matching prog in the cache already, then reuse the
+ * existing stuff without creating new copy into the underlying buffer
+ * object. This is notably useful for programs generating shaders at
+ * runtime, where multiple shaders may compile to the same thing in our
+ * backend.
*/
- if (!brw_try_upload_using_copy(cache, item, data, aux)) {
+ if (matching_data) {
+ item->offset = matching_data->offset;
+ } else {
item->offset = brw_alloc_item_data(cache, data_size);
/* Copy data to the buffer */