summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicolai Hähnle <nicolai.haehnle@amd.com>2016-09-12 10:52:35 +0200
committerNicolai Hähnle <nicolai.haehnle@amd.com>2016-09-13 11:37:32 +0200
commit986aaf8f47a6242c5cbee43271390acab99eca50 (patch)
tree15d8f48d652471981bb4635325acc732e32bb6a0
parent5bc6bfbfb3615ec33b1ab1ed0577d76cc30f475f (diff)
winsys/radeon: add slab buffer list
Introducing radeon_bo::hash will reduce collisions between "real" buffers and buffers from slabs.
-rw-r--r--src/gallium/winsys/radeon/drm/radeon_drm_bo.c3
-rw-r--r--src/gallium/winsys/radeon/drm/radeon_drm_bo.h1
-rw-r--r--src/gallium/winsys/radeon/drm/radeon_drm_cs.c98
-rw-r--r--src/gallium/winsys/radeon/drm/radeon_drm_cs.h16
-rw-r--r--src/gallium/winsys/radeon/drm/radeon_drm_winsys.h1
5 files changed, 107 insertions, 12 deletions
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
index df6e53c634..1725080a13 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -587,6 +587,7 @@ static struct radeon_bo *radeon_create_bo(struct radeon_drm_winsys *rws,
bo->handle = args.handle;
bo->va = 0;
bo->initial_domain = initial_domains;
+ bo->hash = __sync_fetch_and_add(&rws->next_bo_hash, 1);
pipe_mutex_init(bo->u.real.map_mutex);
pb_cache_init_entry(&rws->bo_cache, &bo->u.real.cache_entry, &bo->base,
pb_cache_bucket);
@@ -864,6 +865,7 @@ static struct pb_buffer *radeon_winsys_bo_from_ptr(struct radeon_winsys *rws,
bo->user_ptr = pointer;
bo->va = 0;
bo->initial_domain = RADEON_DOMAIN_GTT;
+ bo->hash = __sync_fetch_and_add(&ws->next_bo_hash, 1);
pipe_mutex_init(bo->u.real.map_mutex);
util_hash_table_set(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
@@ -997,6 +999,7 @@ static struct pb_buffer *radeon_winsys_bo_from_handle(struct radeon_winsys *rws,
bo->base.vtbl = &radeon_bo_vtbl;
bo->rws = ws;
bo->va = 0;
+ bo->hash = __sync_fetch_and_add(&ws->next_bo_hash, 1);
pipe_mutex_init(bo->u.real.map_mutex);
if (bo->flink_name)
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
index b9a4a05b7c..8e35a385ed 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
@@ -59,6 +59,7 @@ struct radeon_bo {
uint32_t handle; /* 0 for slab entries */
uint32_t flink_name;
uint64_t va;
+ uint32_t hash;
enum radeon_bo_domain initial_domain;
/* how many command streams is this bo referenced in? */
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
index 20f90cf7ca..9fbd378369 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
@@ -129,9 +129,14 @@ static void radeon_cs_context_cleanup(struct radeon_cs_context *csc)
p_atomic_dec(&csc->relocs_bo[i].bo->num_cs_references);
radeon_bo_reference(&csc->relocs_bo[i].bo, NULL);
}
+ for (i = 0; i < csc->num_slab_buffers; ++i) {
+ p_atomic_dec(&csc->slab_buffers[i].bo->num_cs_references);
+ radeon_bo_reference(&csc->slab_buffers[i].bo, NULL);
+ }
csc->num_relocs = 0;
csc->num_validated_relocs = 0;
+ csc->num_slab_buffers = 0;
csc->chunks[0].length_dw = 0;
csc->chunks[1].length_dw = 0;
@@ -143,6 +148,7 @@ static void radeon_cs_context_cleanup(struct radeon_cs_context *csc)
static void radeon_destroy_cs_context(struct radeon_cs_context *csc)
{
radeon_cs_context_cleanup(csc);
+ FREE(csc->slab_buffers);
FREE(csc->relocs_bo);
FREE(csc->relocs);
}
@@ -191,16 +197,26 @@ radeon_drm_cs_create(struct radeon_winsys_ctx *ctx,
int radeon_lookup_buffer(struct radeon_cs_context *csc, struct radeon_bo *bo)
{
- unsigned hash = bo->handle & (ARRAY_SIZE(csc->reloc_indices_hashlist)-1);
+ unsigned hash = bo->hash & (ARRAY_SIZE(csc->reloc_indices_hashlist)-1);
+ struct radeon_bo_item *buffers;
+ unsigned num_buffers;
int i = csc->reloc_indices_hashlist[hash];
+ if (bo->handle) {
+ buffers = csc->relocs_bo;
+ num_buffers = csc->num_relocs;
+ } else {
+ buffers = csc->slab_buffers;
+ num_buffers = csc->num_slab_buffers;
+ }
+
/* not found or found */
- if (i == -1 || csc->relocs_bo[i].bo == bo)
+ if (i == -1 || (i < num_buffers && buffers[i].bo == bo))
return i;
/* Hash collision, look for the BO in the list of relocs linearly. */
- for (i = csc->num_relocs - 1; i >= 0; i--) {
- if (csc->relocs_bo[i].bo == bo) {
+ for (i = num_buffers - 1; i >= 0; i--) {
+ if (buffers[i].bo == bo) {
/* Put this reloc in the hash list.
* This will prevent additional hash collisions if there are
* several consecutive lookup_buffer calls for the same buffer.
@@ -217,12 +233,12 @@ int radeon_lookup_buffer(struct radeon_cs_context *csc, struct radeon_bo *bo)
return -1;
}
-static unsigned radeon_lookup_or_add_buffer(struct radeon_drm_cs *cs,
- struct radeon_bo *bo)
+static unsigned radeon_lookup_or_add_real_buffer(struct radeon_drm_cs *cs,
+ struct radeon_bo *bo)
{
struct radeon_cs_context *csc = cs->csc;
struct drm_radeon_cs_reloc *reloc;
- unsigned hash = bo->handle & (ARRAY_SIZE(csc->reloc_indices_hashlist)-1);
+ unsigned hash = bo->hash & (ARRAY_SIZE(csc->reloc_indices_hashlist)-1);
int i = -1;
i = radeon_lookup_buffer(csc, bo);
@@ -259,7 +275,7 @@ static unsigned radeon_lookup_or_add_buffer(struct radeon_drm_cs *cs,
/* Initialize the new relocation. */
csc->relocs_bo[csc->num_relocs].bo = NULL;
- csc->relocs_bo[csc->num_relocs].priority_usage = 0;
+ csc->relocs_bo[csc->num_relocs].u.real.priority_usage = 0;
radeon_bo_reference(&csc->relocs_bo[csc->num_relocs].bo, bo);
p_atomic_inc(&bo->num_cs_references);
reloc = &csc->relocs[csc->num_relocs];
@@ -275,6 +291,53 @@ static unsigned radeon_lookup_or_add_buffer(struct radeon_drm_cs *cs,
return csc->num_relocs++;
}
+static int radeon_lookup_or_add_slab_buffer(struct radeon_drm_cs *cs,
+ struct radeon_bo *bo)
+{
+ struct radeon_cs_context *csc = cs->csc;
+ unsigned hash;
+ struct radeon_bo_item *item;
+ int idx;
+ int real_idx;
+
+ idx = radeon_lookup_buffer(csc, bo);
+ if (idx >= 0)
+ return idx;
+
+ real_idx = radeon_lookup_or_add_real_buffer(cs, bo->u.slab.real);
+
+ /* Check if the backing array is large enough. */
+ if (csc->num_slab_buffers >= csc->max_slab_buffers) {
+ unsigned new_max = MAX2(csc->max_slab_buffers + 16,
+ (unsigned)(csc->max_slab_buffers * 1.3));
+ struct radeon_bo_item *new_buffers =
+ REALLOC(csc->slab_buffers,
+ csc->max_slab_buffers * sizeof(*new_buffers),
+ new_max * sizeof(*new_buffers));
+ if (!new_buffers) {
+ fprintf(stderr, "radeon_lookup_or_add_slab_buffer: allocation failure\n");
+ return -1;
+ }
+
+ csc->max_slab_buffers = new_max;
+ csc->slab_buffers = new_buffers;
+ }
+
+ /* Initialize the new relocation. */
+ idx = csc->num_slab_buffers++;
+ item = &csc->slab_buffers[idx];
+
+ item->bo = NULL;
+ item->u.slab.real_idx = real_idx;
+ radeon_bo_reference(&item->bo, bo);
+ p_atomic_inc(&bo->num_cs_references);
+
+ hash = bo->hash & (ARRAY_SIZE(csc->reloc_indices_hashlist)-1);
+ csc->reloc_indices_hashlist[hash] = idx;
+
+ return idx;
+}
+
static unsigned radeon_drm_cs_add_buffer(struct radeon_winsys_cs *rcs,
struct pb_buffer *buf,
enum radeon_bo_usage usage,
@@ -287,14 +350,24 @@ static unsigned radeon_drm_cs_add_buffer(struct radeon_winsys_cs *rcs,
enum radeon_bo_domain rd = usage & RADEON_USAGE_READ ? domains : 0;
enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? domains : 0;
struct drm_radeon_cs_reloc *reloc;
- unsigned index = radeon_lookup_or_add_buffer(cs, bo);
+ int index;
+
+ if (!bo->handle) {
+ index = radeon_lookup_or_add_slab_buffer(cs, bo);
+ if (index < 0)
+ return 0;
+
+ index = cs->csc->slab_buffers[index].u.slab.real_idx;
+ } else {
+ index = radeon_lookup_or_add_real_buffer(cs, bo);
+ }
reloc = &cs->csc->relocs[index];
added_domains = (rd | wd) & ~(reloc->read_domains | reloc->write_domain);
reloc->read_domains |= rd;
reloc->write_domain |= wd;
reloc->flags = MAX2(reloc->flags, priority);
- cs->csc->relocs_bo[index].priority_usage |= 1llu << priority;
+ cs->csc->relocs_bo[index].u.real.priority_usage |= 1llu << priority;
if (added_domains & RADEON_DOMAIN_VRAM)
cs->base.used_vram += bo->base.size;
@@ -366,7 +439,7 @@ static unsigned radeon_drm_cs_get_buffer_list(struct radeon_winsys_cs *rcs,
for (i = 0; i < cs->csc->num_relocs; i++) {
list[i].bo_size = cs->csc->relocs_bo[i].bo->base.size;
list[i].vm_address = cs->csc->relocs_bo[i].bo->va;
- list[i].priority_usage = cs->csc->relocs_bo[i].priority_usage;
+ list[i].priority_usage = cs->csc->relocs_bo[i].u.real.priority_usage;
}
}
return cs->csc->num_relocs;
@@ -584,6 +657,9 @@ static bool radeon_bo_is_referenced(struct radeon_winsys_cs *rcs,
if (index == -1)
return false;
+ if (!bo->handle)
+ index = cs->csc->slab_buffers[index].u.slab.real_idx;
+
if ((usage & RADEON_USAGE_WRITE) && cs->csc->relocs[index].write_domain)
return true;
if ((usage & RADEON_USAGE_READ) && cs->csc->relocs[index].read_domains)
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
index bd55548e93..f9b26af28f 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
@@ -31,7 +31,14 @@
struct radeon_bo_item {
struct radeon_bo *bo;
- uint64_t priority_usage;
+ union {
+ struct {
+ uint64_t priority_usage;
+ } real;
+ struct {
+ unsigned real_idx;
+ } slab;
+ } u;
};
struct radeon_cs_context {
@@ -50,6 +57,10 @@ struct radeon_cs_context {
struct radeon_bo_item *relocs_bo;
struct drm_radeon_cs_reloc *relocs;
+ unsigned num_slab_buffers;
+ unsigned max_slab_buffers;
+ struct radeon_bo_item *slab_buffers;
+
int reloc_indices_hashlist[4096];
};
@@ -108,6 +119,9 @@ radeon_bo_is_referenced_by_cs_for_write(struct radeon_drm_cs *cs,
if (index == -1)
return false;
+ if (!bo->handle)
+ index = cs->csc->slab_buffers[index].u.slab.real_idx;
+
return cs->csc->relocs[index].write_domain != 0;
}
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
index 27fbe906f6..55149806ae 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
@@ -75,6 +75,7 @@ struct radeon_drm_winsys {
uint64_t mapped_gtt;
uint64_t buffer_wait_time; /* time spent in buffer_wait in ns */
uint64_t num_cs_flushes;
+ uint32_t next_bo_hash;
enum radeon_generation gen;
struct radeon_info info;