summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--intel/intel_bufmgr_gem.c260
1 files changed, 97 insertions, 163 deletions
diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
index 007a6d86..686e2851 100644
--- a/intel/intel_bufmgr_gem.c
+++ b/intel/intel_bufmgr_gem.c
@@ -83,12 +83,10 @@
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-typedef struct _drm_intel_bo_gem drm_intel_bo_gem;
+/* Maximum size of an object we'll hang onto in the buffer cache. */
+#define CACHE_MAX_SIZE (64 * 1024 * 1024)
-struct drm_intel_gem_bo_bucket {
- drmMMListHead head;
- unsigned long size;
-};
+typedef struct _drm_intel_bo_gem drm_intel_bo_gem;
typedef struct _drm_intel_bufmgr_gem {
drm_intel_bufmgr bufmgr;
@@ -105,9 +103,7 @@ typedef struct _drm_intel_bufmgr_gem {
int exec_size;
int exec_count;
- /** Array of lists of cached gem objects of power-of-two sizes */
- struct drm_intel_gem_bo_bucket cache_bucket[14 * 4];
- int num_buckets;
+ drmMMListHead cache;
time_t time;
drmMMListHead named;
@@ -347,23 +343,6 @@ drm_intel_gem_bo_tile_pitch(drm_intel_bufmgr_gem *bufmgr_gem,
return i;
}
-static struct drm_intel_gem_bo_bucket *
-drm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem *bufmgr_gem,
- unsigned long size)
-{
- int i;
-
- for (i = 0; i < bufmgr_gem->num_buckets; i++) {
- struct drm_intel_gem_bo_bucket *bucket =
- &bufmgr_gem->cache_bucket[i];
- if (bucket->size >= size) {
- return bucket;
- }
- }
-
- return NULL;
-}
-
static void
drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem)
{
@@ -618,14 +597,13 @@ drm_intel_gem_bo_madvise(drm_intel_bo *bo, int madv)
/* drop the oldest entries that have been purged by the kernel */
static void
-drm_intel_gem_bo_cache_purge_bucket(drm_intel_bufmgr_gem *bufmgr_gem,
- struct drm_intel_gem_bo_bucket *bucket)
+drm_intel_gem_bo_cache_purge(drm_intel_bufmgr_gem *bufmgr_gem)
{
- while (!DRMLISTEMPTY(&bucket->head)) {
+ while (!DRMLISTEMPTY(&bufmgr_gem->cache)) {
drm_intel_bo_gem *bo_gem;
bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
- bucket->head.next, head);
+ bufmgr_gem->cache.next, head);
if (drm_intel_gem_bo_madvise_internal
(bufmgr_gem, bo_gem, I915_MADV_DONTNEED))
break;
@@ -635,6 +613,59 @@ drm_intel_gem_bo_cache_purge_bucket(drm_intel_bufmgr_gem *bufmgr_gem,
}
}
+static bool
+size_is_close_enough(drm_intel_bo_gem *bo_gem, unsigned long size)
+{
+ if (bo_gem->bo.size < size)
+ return false;
+ if (bo_gem->bo.size > size + size / 4)
+ return false;
+
+ return true;
+}
+
+static drm_intel_bo_gem *
+find_cached_bo(drm_intel_bufmgr_gem *bufmgr_gem,
+ unsigned long size,
+ unsigned long flags)
+{
+ drm_intel_bo_gem *bo_gem;
+ drmMMListHead *entry, *temp;
+
+ if (size > CACHE_MAX_SIZE)
+ return NULL;
+
+ if (DRMLISTEMPTY(&bufmgr_gem->cache))
+ return NULL;
+
+ if (!(flags & BO_ALLOC_FOR_RENDER)) {
+ /* For non-render-target BOs (where we're probably
+ * going to map it first thing in order to fill it
+ * with data), check if the last BO in the cache is
+ * unbusy, and only reuse in that case. Otherwise,
+ * allocating a new buffer is probably faster than
+ * waiting for the GPU to finish.
+ */
+ DRMLISTFOREACH(entry, &bufmgr_gem->cache) {
+ bo_gem = DRMLISTENTRY(drm_intel_bo_gem, entry, head);
+
+ if (drm_intel_gem_bo_busy(&bo_gem->bo))
+ return NULL;
+
+ if (size_is_close_enough(bo_gem, size))
+ return bo_gem;
+ }
+ } else {
+ DRMLISTFOREACHSAFEREVERSE(entry, temp, &bufmgr_gem->cache) {
+ bo_gem = DRMLISTENTRY(drm_intel_bo_gem, entry, head);
+ if (size_is_close_enough(bo_gem, size))
+ return bo_gem;
+ }
+ }
+
+ return NULL;
+}
+
static drm_intel_bo *
drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr,
const char *name,
@@ -647,88 +678,44 @@ drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr,
drm_intel_bo_gem *bo_gem;
unsigned int page_size = getpagesize();
int ret;
- struct drm_intel_gem_bo_bucket *bucket;
- bool alloc_from_cache;
- unsigned long bo_size;
- bool for_render = false;
-
- if (flags & BO_ALLOC_FOR_RENDER)
- for_render = true;
-
- /* Round the allocated size up to a power of two number of pages. */
- bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, size);
- /* If we don't have caching at this size, don't actually round the
- * allocation up.
- */
- if (bucket == NULL) {
- bo_size = size;
- if (bo_size < page_size)
- bo_size = page_size;
- } else {
- bo_size = bucket->size;
- }
+ size = ALIGN(size, page_size);
pthread_mutex_lock(&bufmgr_gem->lock);
/* Get a buffer out of the cache if available */
retry:
- alloc_from_cache = false;
- if (bucket != NULL && !DRMLISTEMPTY(&bucket->head)) {
- if (for_render) {
- /* Allocate new render-target BOs from the tail (MRU)
- * of the list, as it will likely be hot in the GPU
- * cache and in the aperture for us.
- */
- bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
- bucket->head.prev, head);
- DRMLISTDEL(&bo_gem->head);
- alloc_from_cache = true;
- } else {
- /* For non-render-target BOs (where we're probably
- * going to map it first thing in order to fill it
- * with data), check if the last BO in the cache is
- * unbusy, and only reuse in that case. Otherwise,
- * allocating a new buffer is probably faster than
- * waiting for the GPU to finish.
- */
- bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
- bucket->head.next, head);
- if (!drm_intel_gem_bo_busy(&bo_gem->bo)) {
- alloc_from_cache = true;
- DRMLISTDEL(&bo_gem->head);
- }
- }
+ bo_gem = find_cached_bo(bufmgr_gem, size, flags);
- if (alloc_from_cache) {
- if (!drm_intel_gem_bo_madvise_internal
- (bufmgr_gem, bo_gem, I915_MADV_WILLNEED)) {
- drm_intel_gem_bo_free(&bo_gem->bo);
- drm_intel_gem_bo_cache_purge_bucket(bufmgr_gem,
- bucket);
- goto retry;
- }
+ if (bo_gem) {
+ DRMLISTDEL(&bo_gem->head);
- if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo,
- tiling_mode,
- stride)) {
- drm_intel_gem_bo_free(&bo_gem->bo);
- goto retry;
- }
+ if (!drm_intel_gem_bo_madvise_internal (bufmgr_gem, bo_gem,
+ I915_MADV_WILLNEED)) {
+ drm_intel_gem_bo_free(&bo_gem->bo);
+ drm_intel_gem_bo_cache_purge(bufmgr_gem);
+ goto retry;
+ }
+
+ if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo,
+ tiling_mode,
+ stride)) {
+ drm_intel_gem_bo_free(&bo_gem->bo);
+ goto retry;
}
}
pthread_mutex_unlock(&bufmgr_gem->lock);
- if (!alloc_from_cache) {
+ if (!bo_gem) {
struct drm_i915_gem_create create;
bo_gem = calloc(1, sizeof(*bo_gem));
if (!bo_gem)
return NULL;
- bo_gem->bo.size = bo_size;
+ bo_gem->bo.size = size;
VG_CLEAR(create);
- create.size = bo_size;
+ create.size = size;
ret = drmIoctl(bufmgr_gem->fd,
DRM_IOCTL_I915_GEM_CREATE,
@@ -992,27 +979,20 @@ drm_intel_gem_bo_mark_mmaps_incoherent(drm_intel_bo *bo)
static void
drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time)
{
- int i;
-
if (bufmgr_gem->time == time)
return;
- for (i = 0; i < bufmgr_gem->num_buckets; i++) {
- struct drm_intel_gem_bo_bucket *bucket =
- &bufmgr_gem->cache_bucket[i];
-
- while (!DRMLISTEMPTY(&bucket->head)) {
- drm_intel_bo_gem *bo_gem;
+ while (!DRMLISTEMPTY(&bufmgr_gem->cache)) {
+ drm_intel_bo_gem *bo_gem;
- bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
- bucket->head.next, head);
- if (time - bo_gem->free_time <= 1)
- break;
+ bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
+ &bufmgr_gem->cache.next, head);
+ if (time - bo_gem->free_time <= 1)
+ break;
- DRMLISTDEL(&bo_gem->head);
+ DRMLISTDEL(&bo_gem->head);
- drm_intel_gem_bo_free(&bo_gem->bo);
- }
+ drm_intel_gem_bo_free(&bo_gem->bo);
}
bufmgr_gem->time = time;
@@ -1084,7 +1064,6 @@ drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time)
{
drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
- struct drm_intel_gem_bo_bucket *bucket;
int i;
/* Unreference all the target buffers */
@@ -1121,9 +1100,9 @@ drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time)
DRMLISTDEL(&bo_gem->name_list);
- bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo->size);
/* Put the buffer into our internal cache for reuse if we can. */
- if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL &&
+ if (bufmgr_gem->bo_reuse && bo_gem->reusable &&
+ bo->size < CACHE_MAX_SIZE &&
drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem,
I915_MADV_DONTNEED)) {
bo_gem->free_time = time;
@@ -1131,7 +1110,7 @@ drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time)
bo_gem->name = NULL;
bo_gem->validate_index = -1;
- DRMLISTADDTAIL(&bo_gem->head, &bucket->head);
+ DRMLISTADDTAIL(&bo_gem->head, &bufmgr_gem->cache);
} else {
drm_intel_gem_bo_free(bo);
}
@@ -1612,7 +1591,7 @@ static void
drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr)
{
drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
- int i;
+ drm_intel_bo_gem *bo_gem;
free(bufmgr_gem->exec2_objects);
free(bufmgr_gem->exec_objects);
@@ -1622,18 +1601,12 @@ drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr)
pthread_mutex_destroy(&bufmgr_gem->lock);
/* Free any cached buffer objects we were going to reuse */
- for (i = 0; i < bufmgr_gem->num_buckets; i++) {
- struct drm_intel_gem_bo_bucket *bucket =
- &bufmgr_gem->cache_bucket[i];
- drm_intel_bo_gem *bo_gem;
-
- while (!DRMLISTEMPTY(&bucket->head)) {
- bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
- bucket->head.next, head);
- DRMLISTDEL(&bo_gem->head);
+ while (!DRMLISTEMPTY(&bufmgr_gem->cache)) {
+ bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
+ &bufmgr_gem->cache.next, head);
+ DRMLISTDEL(&bo_gem->head);
- drm_intel_gem_bo_free(&bo_gem->bo);
- }
+ drm_intel_gem_bo_free(&bo_gem->bo);
}
free(bufmgr);
@@ -2867,45 +2840,6 @@ drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo)
return 0;
}
-static void
-add_bucket(drm_intel_bufmgr_gem *bufmgr_gem, int size)
-{
- unsigned int i = bufmgr_gem->num_buckets;
-
- assert(i < ARRAY_SIZE(bufmgr_gem->cache_bucket));
-
- DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head);
- bufmgr_gem->cache_bucket[i].size = size;
- bufmgr_gem->num_buckets++;
-}
-
-static void
-init_cache_buckets(drm_intel_bufmgr_gem *bufmgr_gem)
-{
- unsigned long size, cache_max_size = 64 * 1024 * 1024;
-
- /* OK, so power of two buckets was too wasteful of memory.
- * Give 3 other sizes between each power of two, to hopefully
- * cover things accurately enough. (The alternative is
- * probably to just go for exact matching of sizes, and assume
- * that for things like composited window resize the tiled
- * width/height alignment and rounding of sizes to pages will
- * get us useful cache hit rates anyway)
- */
- add_bucket(bufmgr_gem, 4096);
- add_bucket(bufmgr_gem, 4096 * 2);
- add_bucket(bufmgr_gem, 4096 * 3);
-
- /* Initialize the linked lists for BO reuse cache. */
- for (size = 4 * 4096; size <= cache_max_size; size *= 2) {
- add_bucket(bufmgr_gem, size);
-
- add_bucket(bufmgr_gem, size + size * 1 / 4);
- add_bucket(bufmgr_gem, size + size * 2 / 4);
- add_bucket(bufmgr_gem, size + size * 3 / 4);
- }
-}
-
void
drm_intel_bufmgr_gem_set_vma_cache_size(drm_intel_bufmgr *bufmgr, int limit)
{
@@ -3363,7 +3297,7 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size)
bufmgr_gem->bufmgr.bo_references = drm_intel_gem_bo_references;
DRMINITLISTHEAD(&bufmgr_gem->named);
- init_cache_buckets(bufmgr_gem);
+ DRMINITLISTHEAD(&bufmgr_gem->cache);
DRMINITLISTHEAD(&bufmgr_gem->vma_cache);
bufmgr_gem->vma_max = -1; /* unlimited by default */