diff options
-rw-r--r-- | intel/intel_bufmgr_gem.c | 260 |
1 files changed, 97 insertions, 163 deletions
diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c index 007a6d86..686e2851 100644 --- a/intel/intel_bufmgr_gem.c +++ b/intel/intel_bufmgr_gem.c @@ -83,12 +83,10 @@ #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) -typedef struct _drm_intel_bo_gem drm_intel_bo_gem; +/* Maximum size of an object we'll hang onto in the buffer cache. */ +#define CACHE_MAX_SIZE (64 * 1024 * 1024) -struct drm_intel_gem_bo_bucket { - drmMMListHead head; - unsigned long size; -}; +typedef struct _drm_intel_bo_gem drm_intel_bo_gem; typedef struct _drm_intel_bufmgr_gem { drm_intel_bufmgr bufmgr; @@ -105,9 +103,7 @@ typedef struct _drm_intel_bufmgr_gem { int exec_size; int exec_count; - /** Array of lists of cached gem objects of power-of-two sizes */ - struct drm_intel_gem_bo_bucket cache_bucket[14 * 4]; - int num_buckets; + drmMMListHead cache; time_t time; drmMMListHead named; @@ -347,23 +343,6 @@ drm_intel_gem_bo_tile_pitch(drm_intel_bufmgr_gem *bufmgr_gem, return i; } -static struct drm_intel_gem_bo_bucket * -drm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem *bufmgr_gem, - unsigned long size) -{ - int i; - - for (i = 0; i < bufmgr_gem->num_buckets; i++) { - struct drm_intel_gem_bo_bucket *bucket = - &bufmgr_gem->cache_bucket[i]; - if (bucket->size >= size) { - return bucket; - } - } - - return NULL; -} - static void drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem) { @@ -618,14 +597,13 @@ drm_intel_gem_bo_madvise(drm_intel_bo *bo, int madv) /* drop the oldest entries that have been purged by the kernel */ static void -drm_intel_gem_bo_cache_purge_bucket(drm_intel_bufmgr_gem *bufmgr_gem, - struct drm_intel_gem_bo_bucket *bucket) +drm_intel_gem_bo_cache_purge(drm_intel_bufmgr_gem *bufmgr_gem) { - while (!DRMLISTEMPTY(&bucket->head)) { + while (!DRMLISTEMPTY(&bufmgr_gem->cache)) { drm_intel_bo_gem *bo_gem; bo_gem = DRMLISTENTRY(drm_intel_bo_gem, - bucket->head.next, head); + bufmgr_gem->cache.next, head); if (drm_intel_gem_bo_madvise_internal (bufmgr_gem, bo_gem, I915_MADV_DONTNEED)) break; @@ -635,6 +613,59 @@ drm_intel_gem_bo_cache_purge_bucket(drm_intel_bufmgr_gem *bufmgr_gem, } } +static bool +size_is_close_enough(drm_intel_bo_gem *bo_gem, unsigned long size) +{ + if (bo_gem->bo.size < size) + return false; + if (bo_gem->bo.size > size + size / 4) + return false; + + return true; +} + +static drm_intel_bo_gem * +find_cached_bo(drm_intel_bufmgr_gem *bufmgr_gem, + unsigned long size, + unsigned long flags) +{ + drm_intel_bo_gem *bo_gem; + drmMMListHead *entry, *temp; + + if (size > CACHE_MAX_SIZE) + return NULL; + + if (DRMLISTEMPTY(&bufmgr_gem->cache)) + return NULL; + + if (!(flags & BO_ALLOC_FOR_RENDER)) { + /* For non-render-target BOs (where we're probably + * going to map it first thing in order to fill it + * with data), check if the last BO in the cache is + * unbusy, and only reuse in that case. Otherwise, + * allocating a new buffer is probably faster than + * waiting for the GPU to finish. + */ + DRMLISTFOREACH(entry, &bufmgr_gem->cache) { + bo_gem = DRMLISTENTRY(drm_intel_bo_gem, entry, head); + + if (drm_intel_gem_bo_busy(&bo_gem->bo)) + return NULL; + + if (size_is_close_enough(bo_gem, size)) + return bo_gem; + } + } else { + DRMLISTFOREACHSAFEREVERSE(entry, temp, &bufmgr_gem->cache) { + bo_gem = DRMLISTENTRY(drm_intel_bo_gem, entry, head); + if (size_is_close_enough(bo_gem, size)) + return bo_gem; + } + } + + return NULL; +} + static drm_intel_bo * drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr, const char *name, @@ -647,88 +678,44 @@ drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr, drm_intel_bo_gem *bo_gem; unsigned int page_size = getpagesize(); int ret; - struct drm_intel_gem_bo_bucket *bucket; - bool alloc_from_cache; - unsigned long bo_size; - bool for_render = false; - - if (flags & BO_ALLOC_FOR_RENDER) - for_render = true; - - /* Round the allocated size up to a power of two number of pages. */ - bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, size); - /* If we don't have caching at this size, don't actually round the - * allocation up. - */ - if (bucket == NULL) { - bo_size = size; - if (bo_size < page_size) - bo_size = page_size; - } else { - bo_size = bucket->size; - } + size = ALIGN(size, page_size); pthread_mutex_lock(&bufmgr_gem->lock); /* Get a buffer out of the cache if available */ retry: - alloc_from_cache = false; - if (bucket != NULL && !DRMLISTEMPTY(&bucket->head)) { - if (for_render) { - /* Allocate new render-target BOs from the tail (MRU) - * of the list, as it will likely be hot in the GPU - * cache and in the aperture for us. - */ - bo_gem = DRMLISTENTRY(drm_intel_bo_gem, - bucket->head.prev, head); - DRMLISTDEL(&bo_gem->head); - alloc_from_cache = true; - } else { - /* For non-render-target BOs (where we're probably - * going to map it first thing in order to fill it - * with data), check if the last BO in the cache is - * unbusy, and only reuse in that case. Otherwise, - * allocating a new buffer is probably faster than - * waiting for the GPU to finish. - */ - bo_gem = DRMLISTENTRY(drm_intel_bo_gem, - bucket->head.next, head); - if (!drm_intel_gem_bo_busy(&bo_gem->bo)) { - alloc_from_cache = true; - DRMLISTDEL(&bo_gem->head); - } - } + bo_gem = find_cached_bo(bufmgr_gem, size, flags); - if (alloc_from_cache) { - if (!drm_intel_gem_bo_madvise_internal - (bufmgr_gem, bo_gem, I915_MADV_WILLNEED)) { - drm_intel_gem_bo_free(&bo_gem->bo); - drm_intel_gem_bo_cache_purge_bucket(bufmgr_gem, - bucket); - goto retry; - } + if (bo_gem) { + DRMLISTDEL(&bo_gem->head); - if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo, - tiling_mode, - stride)) { - drm_intel_gem_bo_free(&bo_gem->bo); - goto retry; - } + if (!drm_intel_gem_bo_madvise_internal (bufmgr_gem, bo_gem, + I915_MADV_WILLNEED)) { + drm_intel_gem_bo_free(&bo_gem->bo); + drm_intel_gem_bo_cache_purge(bufmgr_gem); + goto retry; + } + + if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo, + tiling_mode, + stride)) { + drm_intel_gem_bo_free(&bo_gem->bo); + goto retry; } } pthread_mutex_unlock(&bufmgr_gem->lock); - if (!alloc_from_cache) { + if (!bo_gem) { struct drm_i915_gem_create create; bo_gem = calloc(1, sizeof(*bo_gem)); if (!bo_gem) return NULL; - bo_gem->bo.size = bo_size; + bo_gem->bo.size = size; VG_CLEAR(create); - create.size = bo_size; + create.size = size; ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CREATE, @@ -992,27 +979,20 @@ drm_intel_gem_bo_mark_mmaps_incoherent(drm_intel_bo *bo) static void drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time) { - int i; - if (bufmgr_gem->time == time) return; - for (i = 0; i < bufmgr_gem->num_buckets; i++) { - struct drm_intel_gem_bo_bucket *bucket = - &bufmgr_gem->cache_bucket[i]; - - while (!DRMLISTEMPTY(&bucket->head)) { - drm_intel_bo_gem *bo_gem; + while (!DRMLISTEMPTY(&bufmgr_gem->cache)) { + drm_intel_bo_gem *bo_gem; - bo_gem = DRMLISTENTRY(drm_intel_bo_gem, - bucket->head.next, head); - if (time - bo_gem->free_time <= 1) - break; + bo_gem = DRMLISTENTRY(drm_intel_bo_gem, + &bufmgr_gem->cache.next, head); + if (time - bo_gem->free_time <= 1) + break; - DRMLISTDEL(&bo_gem->head); + DRMLISTDEL(&bo_gem->head); - drm_intel_gem_bo_free(&bo_gem->bo); - } + drm_intel_gem_bo_free(&bo_gem->bo); } bufmgr_gem->time = time; @@ -1084,7 +1064,6 @@ drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time) { drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; - struct drm_intel_gem_bo_bucket *bucket; int i; /* Unreference all the target buffers */ @@ -1121,9 +1100,9 @@ drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time) DRMLISTDEL(&bo_gem->name_list); - bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo->size); /* Put the buffer into our internal cache for reuse if we can. */ - if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL && + if (bufmgr_gem->bo_reuse && bo_gem->reusable && + bo->size < CACHE_MAX_SIZE && drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem, I915_MADV_DONTNEED)) { bo_gem->free_time = time; @@ -1131,7 +1110,7 @@ drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time) bo_gem->name = NULL; bo_gem->validate_index = -1; - DRMLISTADDTAIL(&bo_gem->head, &bucket->head); + DRMLISTADDTAIL(&bo_gem->head, &bufmgr_gem->cache); } else { drm_intel_gem_bo_free(bo); } @@ -1612,7 +1591,7 @@ static void drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr) { drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; - int i; + drm_intel_bo_gem *bo_gem; free(bufmgr_gem->exec2_objects); free(bufmgr_gem->exec_objects); @@ -1622,18 +1601,12 @@ drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr) pthread_mutex_destroy(&bufmgr_gem->lock); /* Free any cached buffer objects we were going to reuse */ - for (i = 0; i < bufmgr_gem->num_buckets; i++) { - struct drm_intel_gem_bo_bucket *bucket = - &bufmgr_gem->cache_bucket[i]; - drm_intel_bo_gem *bo_gem; - - while (!DRMLISTEMPTY(&bucket->head)) { - bo_gem = DRMLISTENTRY(drm_intel_bo_gem, - bucket->head.next, head); - DRMLISTDEL(&bo_gem->head); + while (!DRMLISTEMPTY(&bufmgr_gem->cache)) { + bo_gem = DRMLISTENTRY(drm_intel_bo_gem, + &bufmgr_gem->cache.next, head); + DRMLISTDEL(&bo_gem->head); - drm_intel_gem_bo_free(&bo_gem->bo); - } + drm_intel_gem_bo_free(&bo_gem->bo); } free(bufmgr); @@ -2867,45 +2840,6 @@ drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo) return 0; } -static void -add_bucket(drm_intel_bufmgr_gem *bufmgr_gem, int size) -{ - unsigned int i = bufmgr_gem->num_buckets; - - assert(i < ARRAY_SIZE(bufmgr_gem->cache_bucket)); - - DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head); - bufmgr_gem->cache_bucket[i].size = size; - bufmgr_gem->num_buckets++; -} - -static void -init_cache_buckets(drm_intel_bufmgr_gem *bufmgr_gem) -{ - unsigned long size, cache_max_size = 64 * 1024 * 1024; - - /* OK, so power of two buckets was too wasteful of memory. - * Give 3 other sizes between each power of two, to hopefully - * cover things accurately enough. (The alternative is - * probably to just go for exact matching of sizes, and assume - * that for things like composited window resize the tiled - * width/height alignment and rounding of sizes to pages will - * get us useful cache hit rates anyway) - */ - add_bucket(bufmgr_gem, 4096); - add_bucket(bufmgr_gem, 4096 * 2); - add_bucket(bufmgr_gem, 4096 * 3); - - /* Initialize the linked lists for BO reuse cache. */ - for (size = 4 * 4096; size <= cache_max_size; size *= 2) { - add_bucket(bufmgr_gem, size); - - add_bucket(bufmgr_gem, size + size * 1 / 4); - add_bucket(bufmgr_gem, size + size * 2 / 4); - add_bucket(bufmgr_gem, size + size * 3 / 4); - } -} - void drm_intel_bufmgr_gem_set_vma_cache_size(drm_intel_bufmgr *bufmgr, int limit) { @@ -3363,7 +3297,7 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size) bufmgr_gem->bufmgr.bo_references = drm_intel_gem_bo_references; DRMINITLISTHEAD(&bufmgr_gem->named); - init_cache_buckets(bufmgr_gem); + DRMINITLISTHEAD(&bufmgr_gem->cache); DRMINITLISTHEAD(&bufmgr_gem->vma_cache); bufmgr_gem->vma_max = -1; /* unlimited by default */ |