summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/r600/evergreen_compute.c4
-rw-r--r--src/gallium/drivers/r600/r600_asm.c4
-rw-r--r--src/gallium/drivers/r600/r600_shader.c4
-rw-r--r--src/gallium/drivers/r600/radeon_uvd.c8
-rw-r--r--src/gallium/drivers/r600/radeon_vce.c4
-rw-r--r--src/gallium/drivers/r600/radeon_video.c6
-rw-r--r--src/gallium/drivers/radeon/radeon_uvd.c10
-rw-r--r--src/gallium/drivers/radeon/radeon_uvd_enc.c6
-rw-r--r--src/gallium/drivers/radeon/radeon_vce.c4
-rw-r--r--src/gallium/drivers/radeon/radeon_vcn_dec.c18
-rw-r--r--src/gallium/drivers/radeon/radeon_vcn_enc.c4
-rw-r--r--src/gallium/drivers/radeon/radeon_video.c6
-rw-r--r--src/gallium/drivers/radeon/radeon_winsys.h14
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c3
-rw-r--r--src/gallium/include/pipe/p_defines.h8
-rw-r--r--src/gallium/winsys/amdgpu/drm/amdgpu_bo.c96
-rw-r--r--src/gallium/winsys/amdgpu/drm/amdgpu_bo.h3
17 files changed, 140 insertions, 62 deletions
diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c
index a77f58242e3..9085be4e2f3 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -438,7 +438,9 @@ static void *evergreen_create_compute_state(struct pipe_context *ctx,
/* Upload code + ROdata */
shader->code_bo = r600_compute_buffer_alloc_vram(rctx->screen,
shader->bc.ndw * 4);
- p = r600_buffer_map_sync_with_rings(&rctx->b, shader->code_bo, PIPE_TRANSFER_WRITE);
+ p = r600_buffer_map_sync_with_rings(
+ &rctx->b, shader->code_bo,
+ PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
//TODO: use util_memcpy_cpu_to_le32 ?
memcpy(p, shader->bc.bytecode, shader->bc.ndw * 4);
rctx->b.ws->buffer_unmap(shader->code_bo->buf);
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 7029be24f4b..4ba77c535f9 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -2772,7 +2772,9 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx,
return NULL;
}
- bytecode = r600_buffer_map_sync_with_rings(&rctx->b, shader->buffer, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED);
+ bytecode = r600_buffer_map_sync_with_rings
+ (&rctx->b, shader->buffer,
+ PIPE_TRANSFER_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED | RADEON_TRANSFER_TEMPORARY);
bytecode += shader->offset / 4;
if (R600_BIG_ENDIAN) {
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 408939d1105..fc826470d69 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -141,7 +141,9 @@ static int store_shader(struct pipe_context *ctx,
if (shader->bo == NULL) {
return -ENOMEM;
}
- ptr = r600_buffer_map_sync_with_rings(&rctx->b, shader->bo, PIPE_TRANSFER_WRITE);
+ ptr = r600_buffer_map_sync_with_rings(
+ &rctx->b, shader->bo,
+ PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
if (R600_BIG_ENDIAN) {
for (i = 0; i < shader->shader.bc.ndw; ++i) {
ptr[i] = util_cpu_to_le32(shader->shader.bc.bytecode[i]);
diff --git a/src/gallium/drivers/r600/radeon_uvd.c b/src/gallium/drivers/r600/radeon_uvd.c
index 495a93dc55a..5568f2138e4 100644
--- a/src/gallium/drivers/r600/radeon_uvd.c
+++ b/src/gallium/drivers/r600/radeon_uvd.c
@@ -152,7 +152,8 @@ static void map_msg_fb_it_buf(struct ruvd_decoder *dec)
buf = &dec->msg_fb_it_buffers[dec->cur_buffer];
/* and map it for CPU access */
- ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, PIPE_TRANSFER_WRITE);
+ ptr = dec->ws->buffer_map(buf->res->buf, dec->cs,
+ PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
/* calc buffer offsets */
dec->msg = (struct ruvd_msg *)ptr;
@@ -1068,7 +1069,7 @@ static void ruvd_begin_frame(struct pipe_video_codec *decoder,
dec->bs_size = 0;
dec->bs_ptr = dec->ws->buffer_map(
dec->bs_buffers[dec->cur_buffer].res->buf,
- dec->cs, PIPE_TRANSFER_WRITE);
+ dec->cs, PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
}
/**
@@ -1121,7 +1122,8 @@ static void ruvd_decode_bitstream(struct pipe_video_codec *decoder,
}
dec->bs_ptr = dec->ws->buffer_map(buf->res->buf, dec->cs,
- PIPE_TRANSFER_WRITE);
+ PIPE_TRANSFER_WRITE |
+ RADEON_TRANSFER_TEMPORARY);
if (!dec->bs_ptr)
return;
diff --git a/src/gallium/drivers/r600/radeon_vce.c b/src/gallium/drivers/r600/radeon_vce.c
index 60ba12a593a..e38b927b1d4 100644
--- a/src/gallium/drivers/r600/radeon_vce.c
+++ b/src/gallium/drivers/r600/radeon_vce.c
@@ -353,7 +353,9 @@ static void rvce_get_feedback(struct pipe_video_codec *encoder,
struct rvid_buffer *fb = feedback;
if (size) {
- uint32_t *ptr = enc->ws->buffer_map(fb->res->buf, enc->cs, PIPE_TRANSFER_READ_WRITE);
+ uint32_t *ptr = enc->ws->buffer_map(
+ fb->res->buf, enc->cs,
+ PIPE_TRANSFER_READ_WRITE | RADEON_TRANSFER_TEMPORARY);
if (ptr[1]) {
*size = ptr[4] - ptr[9];
diff --git a/src/gallium/drivers/r600/radeon_video.c b/src/gallium/drivers/r600/radeon_video.c
index 02fcf77d4ff..8e0af448be5 100644
--- a/src/gallium/drivers/r600/radeon_video.c
+++ b/src/gallium/drivers/r600/radeon_video.c
@@ -97,11 +97,13 @@ bool rvid_resize_buffer(struct pipe_screen *screen, struct radeon_cmdbuf *cs,
if (!rvid_create_buffer(screen, new_buf, new_size, new_buf->usage))
goto error;
- src = ws->buffer_map(old_buf.res->buf, cs, PIPE_TRANSFER_READ);
+ src = ws->buffer_map(old_buf.res->buf, cs,
+ PIPE_TRANSFER_READ | RADEON_TRANSFER_TEMPORARY);
if (!src)
goto error;
- dst = ws->buffer_map(new_buf->res->buf, cs, PIPE_TRANSFER_WRITE);
+ dst = ws->buffer_map(new_buf->res->buf, cs,
+ PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
if (!dst)
goto error;
diff --git a/src/gallium/drivers/radeon/radeon_uvd.c b/src/gallium/drivers/radeon/radeon_uvd.c
index 62af1a311c2..ca066e89823 100644
--- a/src/gallium/drivers/radeon/radeon_uvd.c
+++ b/src/gallium/drivers/radeon/radeon_uvd.c
@@ -148,7 +148,8 @@ static void map_msg_fb_it_buf(struct ruvd_decoder *dec)
buf = &dec->msg_fb_it_buffers[dec->cur_buffer];
/* and map it for CPU access */
- ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, PIPE_TRANSFER_WRITE);
+ ptr = dec->ws->buffer_map(buf->res->buf, dec->cs,
+ PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
/* calc buffer offsets */
dec->msg = (struct ruvd_msg *)ptr;
@@ -1015,7 +1016,7 @@ static void ruvd_begin_frame(struct pipe_video_codec *decoder,
dec->bs_size = 0;
dec->bs_ptr = dec->ws->buffer_map(
dec->bs_buffers[dec->cur_buffer].res->buf,
- dec->cs, PIPE_TRANSFER_WRITE);
+ dec->cs, PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
}
/**
@@ -1060,8 +1061,9 @@ static void ruvd_decode_bitstream(struct pipe_video_codec *decoder,
return;
}
- dec->bs_ptr = dec->ws->buffer_map(buf->res->buf, dec->cs,
- PIPE_TRANSFER_WRITE);
+ dec->bs_ptr = dec->ws->buffer_map(
+ buf->res->buf, dec->cs,
+ PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
if (!dec->bs_ptr)
return;
diff --git a/src/gallium/drivers/radeon/radeon_uvd_enc.c b/src/gallium/drivers/radeon/radeon_uvd_enc.c
index 4384e5e1646..3164dbb2c20 100644
--- a/src/gallium/drivers/radeon/radeon_uvd_enc.c
+++ b/src/gallium/drivers/radeon/radeon_uvd_enc.c
@@ -263,9 +263,9 @@ radeon_uvd_enc_get_feedback(struct pipe_video_codec *encoder,
if (NULL != size) {
radeon_uvd_enc_feedback_t *fb_data =
- (radeon_uvd_enc_feedback_t *) enc->ws->buffer_map(fb->res->buf,
- enc->cs,
- PIPE_TRANSFER_READ_WRITE);
+ (radeon_uvd_enc_feedback_t *) enc->ws->buffer_map(
+ fb->res->buf, enc->cs,
+ PIPE_TRANSFER_READ_WRITE | RADEON_TRANSFER_TEMPORARY);
if (!fb_data->status)
*size = fb_data->bitstream_size;
diff --git a/src/gallium/drivers/radeon/radeon_vce.c b/src/gallium/drivers/radeon/radeon_vce.c
index 310d1654b05..94df06e88c6 100644
--- a/src/gallium/drivers/radeon/radeon_vce.c
+++ b/src/gallium/drivers/radeon/radeon_vce.c
@@ -352,7 +352,9 @@ static void rvce_get_feedback(struct pipe_video_codec *encoder,
struct rvid_buffer *fb = feedback;
if (size) {
- uint32_t *ptr = enc->ws->buffer_map(fb->res->buf, enc->cs, PIPE_TRANSFER_READ_WRITE);
+ uint32_t *ptr = enc->ws->buffer_map(
+ fb->res->buf, enc->cs,
+ PIPE_TRANSFER_READ_WRITE | RADEON_TRANSFER_TEMPORARY);
if (ptr[1]) {
*size = ptr[4] - ptr[9];
diff --git a/src/gallium/drivers/radeon/radeon_vcn_dec.c b/src/gallium/drivers/radeon/radeon_vcn_dec.c
index 1ee85ae3d3f..e402af21a64 100644
--- a/src/gallium/drivers/radeon/radeon_vcn_dec.c
+++ b/src/gallium/drivers/radeon/radeon_vcn_dec.c
@@ -941,7 +941,9 @@ static struct pb_buffer *rvcn_dec_message_decode(struct radeon_decoder *dec,
si_vid_clear_buffer(dec->base.context, &dec->ctx);
/* ctx needs probs table */
- ptr = dec->ws->buffer_map(dec->ctx.res->buf, dec->cs, PIPE_TRANSFER_WRITE);
+ ptr = dec->ws->buffer_map(
+ dec->ctx.res->buf, dec->cs,
+ PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
fill_probs_table(ptr);
dec->ws->buffer_unmap(dec->ctx.res->buf);
}
@@ -1034,7 +1036,8 @@ static void map_msg_fb_it_probs_buf(struct radeon_decoder *dec)
buf = &dec->msg_fb_it_probs_buffers[dec->cur_buffer];
/* and map it for CPU access */
- ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, PIPE_TRANSFER_WRITE);
+ ptr = dec->ws->buffer_map(buf->res->buf, dec->cs,
+ PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
/* calc buffer offsets */
dec->msg = ptr;
@@ -1312,7 +1315,7 @@ static void radeon_dec_begin_frame(struct pipe_video_codec *decoder,
dec->bs_size = 0;
dec->bs_ptr = dec->ws->buffer_map(
dec->bs_buffers[dec->cur_buffer].res->buf,
- dec->cs, PIPE_TRANSFER_WRITE);
+ dec->cs, PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
}
/**
@@ -1357,8 +1360,9 @@ static void radeon_dec_decode_bitstream(struct pipe_video_codec *decoder,
return;
}
- dec->bs_ptr = dec->ws->buffer_map(buf->res->buf, dec->cs,
- PIPE_TRANSFER_WRITE);
+ dec->bs_ptr = dec->ws->buffer_map(
+ buf->res->buf, dec->cs,
+ PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
if (!dec->bs_ptr)
return;
@@ -1543,7 +1547,9 @@ struct pipe_video_codec *radeon_create_decoder(struct pipe_context *context,
void *ptr;
buf = &dec->msg_fb_it_probs_buffers[i];
- ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, PIPE_TRANSFER_WRITE);
+ ptr = dec->ws->buffer_map(
+ buf->res->buf, dec->cs,
+ PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
ptr += FB_BUFFER_OFFSET + FB_BUFFER_SIZE;
fill_probs_table(ptr);
dec->ws->buffer_unmap(buf->res->buf);
diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc.c b/src/gallium/drivers/radeon/radeon_vcn_enc.c
index e8676f6c721..7d64a28a405 100644
--- a/src/gallium/drivers/radeon/radeon_vcn_enc.c
+++ b/src/gallium/drivers/radeon/radeon_vcn_enc.c
@@ -244,7 +244,9 @@ static void radeon_enc_get_feedback(struct pipe_video_codec *encoder,
struct rvid_buffer *fb = feedback;
if (size) {
- uint32_t *ptr = enc->ws->buffer_map(fb->res->buf, enc->cs, PIPE_TRANSFER_READ_WRITE);
+ uint32_t *ptr = enc->ws->buffer_map(
+ fb->res->buf, enc->cs,
+ PIPE_TRANSFER_READ_WRITE | RADEON_TRANSFER_TEMPORARY);
if (ptr[1])
*size = ptr[6];
else
diff --git a/src/gallium/drivers/radeon/radeon_video.c b/src/gallium/drivers/radeon/radeon_video.c
index a39ce4cc73e..bb1173e8005 100644
--- a/src/gallium/drivers/radeon/radeon_video.c
+++ b/src/gallium/drivers/radeon/radeon_video.c
@@ -88,11 +88,13 @@ bool si_vid_resize_buffer(struct pipe_screen *screen, struct radeon_cmdbuf *cs,
if (!si_vid_create_buffer(screen, new_buf, new_size, new_buf->usage))
goto error;
- src = ws->buffer_map(old_buf.res->buf, cs, PIPE_TRANSFER_READ);
+ src = ws->buffer_map(old_buf.res->buf, cs,
+ PIPE_TRANSFER_READ | RADEON_TRANSFER_TEMPORARY);
if (!src)
goto error;
- dst = ws->buffer_map(new_buf->res->buf, cs, PIPE_TRANSFER_WRITE);
+ dst = ws->buffer_map(new_buf->res->buf, cs,
+ PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
if (!dst)
goto error;
diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h
index 49f8bb279e5..a56ff75ad24 100644
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -76,6 +76,15 @@ enum radeon_bo_usage { /* bitfield */
RADEON_USAGE_SYNCHRONIZED = 8
};
+enum radeon_transfer_flags {
+ /* Indicates that the caller will unmap the buffer.
+ *
+ * Not unmapping buffers is an important performance optimization for
+ * OpenGL (avoids kernel overhead for frequently mapped buffers).
+ */
+ RADEON_TRANSFER_TEMPORARY = (PIPE_TRANSFER_DRV_PRV << 0),
+};
+
#define RADEON_SPARSE_PAGE_SIZE (64 * 1024)
enum ring_type {
@@ -294,9 +303,12 @@ struct radeon_winsys {
* Map the entire data store of a buffer object into the client's address
* space.
*
+ * Callers are expected to unmap buffers again if and only if the
+ * RADEON_TRANSFER_TEMPORARY flag is set in \p usage.
+ *
* \param buf A winsys buffer object to map.
* \param cs A command stream to flush if the buffer is referenced by it.
- * \param usage A bitmask of the PIPE_TRANSFER_* flags.
+ * \param usage A bitmask of the PIPE_TRANSFER_* and RADEON_TRANSFER_* flags.
* \return The pointer at the beginning of the buffer.
*/
void *(*buffer_map)(struct pb_buffer *buf,
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 19522cc97b1..d455fb5db6a 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5293,7 +5293,8 @@ int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader)
/* Upload. */
ptr = sscreen->ws->buffer_map(shader->bo->buf, NULL,
PIPE_TRANSFER_READ_WRITE |
- PIPE_TRANSFER_UNSYNCHRONIZED);
+ PIPE_TRANSFER_UNSYNCHRONIZED |
+ RADEON_TRANSFER_TEMPORARY);
/* Don't use util_memcpy_cpu_to_le32. LLVM binaries are
* endian-independent. */
diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h
index 693f041b1da..e99895d30d8 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -341,7 +341,13 @@ enum pipe_transfer_usage
* PIPE_RESOURCE_FLAG_MAP_COHERENT must be set when creating
* the resource.
*/
- PIPE_TRANSFER_COHERENT = (1 << 14)
+ PIPE_TRANSFER_COHERENT = (1 << 14),
+
+ /**
+ * This and higher bits are reserved for private use by drivers. Drivers
+ * should use this as (PIPE_TRANSFER_DRV_PRV << i).
+ */
+ PIPE_TRANSFER_DRV_PRV = (1 << 24)
};
/**
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
index 4cbaa8056ad..73336dd3e01 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
@@ -56,6 +56,7 @@ amdgpu_bo_create(struct radeon_winsys *rws,
unsigned alignment,
enum radeon_bo_domain domain,
enum radeon_bo_flag flags);
+static void amdgpu_bo_unmap(struct pb_buffer *buf);
static bool amdgpu_bo_wait(struct pb_buffer *_buf, uint64_t timeout,
enum radeon_bo_usage usage)
@@ -173,6 +174,12 @@ void amdgpu_bo_destroy(struct pb_buffer *_buf)
assert(bo->bo && "must not be called for slab entries");
+ if (!bo->is_user_ptr && bo->cpu_ptr) {
+ bo->cpu_ptr = NULL;
+ amdgpu_bo_unmap(&bo->base);
+ }
+ assert(bo->is_user_ptr || bo->u.real.map_count == 0);
+
if (ws->debug_all_bos) {
simple_mtx_lock(&ws->global_bo_list_lock);
LIST_DEL(&bo->u.real.global_list_item);
@@ -195,14 +202,6 @@ void amdgpu_bo_destroy(struct pb_buffer *_buf)
else if (bo->initial_domain & RADEON_DOMAIN_GTT)
ws->allocated_gtt -= align64(bo->base.size, ws->info.gart_page_size);
- if (bo->u.real.map_count >= 1) {
- if (bo->initial_domain & RADEON_DOMAIN_VRAM)
- ws->mapped_vram -= bo->base.size;
- else if (bo->initial_domain & RADEON_DOMAIN_GTT)
- ws->mapped_gtt -= bo->base.size;
- ws->num_mapped_buffers--;
- }
-
simple_mtx_destroy(&bo->lock);
FREE(bo);
}
@@ -219,6 +218,29 @@ static void amdgpu_bo_destroy_or_cache(struct pb_buffer *_buf)
amdgpu_bo_destroy(_buf);
}
+static bool amdgpu_bo_do_map(struct amdgpu_winsys_bo *bo, void **cpu)
+{
+ assert(!bo->sparse && bo->bo && !bo->is_user_ptr);
+ int r = amdgpu_bo_cpu_map(bo->bo, cpu);
+ if (r) {
+ /* Clear the cache and try again. */
+ pb_cache_release_all_buffers(&bo->ws->bo_cache);
+ r = amdgpu_bo_cpu_map(bo->bo, cpu);
+ if (r)
+ return false;
+ }
+
+ if (p_atomic_inc_return(&bo->u.real.map_count) == 1) {
+ if (bo->initial_domain & RADEON_DOMAIN_VRAM)
+ bo->ws->mapped_vram += bo->base.size;
+ else if (bo->initial_domain & RADEON_DOMAIN_GTT)
+ bo->ws->mapped_gtt += bo->base.size;
+ bo->ws->num_mapped_buffers++;
+ }
+
+ return true;
+}
+
static void *amdgpu_bo_map(struct pb_buffer *buf,
struct radeon_cmdbuf *rcs,
enum pipe_transfer_usage usage)
@@ -226,9 +248,6 @@ static void *amdgpu_bo_map(struct pb_buffer *buf,
struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;
struct amdgpu_winsys_bo *real;
struct amdgpu_cs *cs = (struct amdgpu_cs*)rcs;
- int r;
- void *cpu = NULL;
- uint64_t offset = 0;
assert(!bo->sparse);
@@ -313,9 +332,9 @@ static void *amdgpu_bo_map(struct pb_buffer *buf,
}
}
- /* If the buffer is created from user memory, return the user pointer. */
- if (bo->user_ptr)
- return bo->user_ptr;
+ /* Buffer synchronization has been checked, now actually map the buffer. */
+ void *cpu = NULL;
+ uint64_t offset = 0;
if (bo->bo) {
real = bo;
@@ -324,22 +343,31 @@ static void *amdgpu_bo_map(struct pb_buffer *buf,
offset = bo->va - real->va;
}
- r = amdgpu_bo_cpu_map(real->bo, &cpu);
- if (r) {
- /* Clear the cache and try again. */
- pb_cache_release_all_buffers(&real->ws->bo_cache);
- r = amdgpu_bo_cpu_map(real->bo, &cpu);
- if (r)
- return NULL;
+ if (usage & RADEON_TRANSFER_TEMPORARY) {
+ if (real->is_user_ptr) {
+ cpu = real->cpu_ptr;
+ } else {
+ if (!amdgpu_bo_do_map(real, &cpu))
+ return NULL;
+ }
+ } else {
+ cpu = p_atomic_read(&real->cpu_ptr);
+ if (!cpu) {
+ simple_mtx_lock(&real->lock);
+ /* Must re-check due to the possibility of a race. Re-check need not
+ * be atomic thanks to the lock. */
+ cpu = real->cpu_ptr;
+ if (!cpu) {
+ if (!amdgpu_bo_do_map(real, &cpu)) {
+ simple_mtx_unlock(&real->lock);
+ return NULL;
+ }
+ p_atomic_set(&real->cpu_ptr, cpu);
+ }
+ simple_mtx_unlock(&real->lock);
+ }
}
- if (p_atomic_inc_return(&real->u.real.map_count) == 1) {
- if (real->initial_domain & RADEON_DOMAIN_VRAM)
- real->ws->mapped_vram += real->base.size;
- else if (real->initial_domain & RADEON_DOMAIN_GTT)
- real->ws->mapped_gtt += real->base.size;
- real->ws->num_mapped_buffers++;
- }
return (uint8_t*)cpu + offset;
}
@@ -350,12 +378,15 @@ static void amdgpu_bo_unmap(struct pb_buffer *buf)
assert(!bo->sparse);
- if (bo->user_ptr)
+ if (bo->is_user_ptr)
return;
real = bo->bo ? bo : bo->u.slab.real;
-
+ assert(real->u.real.map_count != 0 && "too many unmaps");
if (p_atomic_dec_zero(&real->u.real.map_count)) {
+ assert(!real->cpu_ptr &&
+ "too many unmaps or forgot RADEON_TRANSFER_TEMPORARY flag");
+
if (real->initial_domain & RADEON_DOMAIN_VRAM)
real->ws->mapped_vram -= real->base.size;
else if (real->initial_domain & RADEON_DOMAIN_GTT)
@@ -1459,6 +1490,7 @@ static struct pb_buffer *amdgpu_bo_from_ptr(struct radeon_winsys *rws,
goto error_va_map;
/* Initialize it. */
+ bo->is_user_ptr = true;
pipe_reference_init(&bo->base.reference, 1);
simple_mtx_init(&bo->lock, mtx_plain);
bo->bo = buf_handle;
@@ -1466,7 +1498,7 @@ static struct pb_buffer *amdgpu_bo_from_ptr(struct radeon_winsys *rws,
bo->base.size = size;
bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
bo->ws = ws;
- bo->user_ptr = pointer;
+ bo->cpu_ptr = pointer;
bo->va = va;
bo->u.real.va_handle = va_handle;
bo->initial_domain = RADEON_DOMAIN_GTT;
@@ -1493,7 +1525,7 @@ error:
static bool amdgpu_bo_is_user_ptr(struct pb_buffer *buf)
{
- return ((struct amdgpu_winsys_bo*)buf)->user_ptr != NULL;
+ return ((struct amdgpu_winsys_bo*)buf)->is_user_ptr;
}
static bool amdgpu_bo_is_suballocated(struct pb_buffer *buf)
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h
index 58e6eed733d..88f4241327d 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h
@@ -88,10 +88,11 @@ struct amdgpu_winsys_bo {
} u;
struct amdgpu_winsys *ws;
- void *user_ptr; /* from buffer_from_ptr */
+ void *cpu_ptr; /* for user_ptr and permanent maps */
amdgpu_bo_handle bo; /* NULL for slab entries and sparse buffers */
bool sparse;
+ bool is_user_ptr;
bool is_local;
uint32_t unique_id;
uint64_t va;