diff options
author | Nicolai Hähnle <nicolai.haehnle@amd.com> | 2017-11-06 10:46:59 +0100 |
---|---|---|
committer | Nicolai Hähnle <nicolai.haehnle@amd.com> | 2017-11-06 11:09:22 +0100 |
commit | 5e7f015980fdc78f4add2f0e63c79a71e19e8a86 (patch) | |
tree | b2f283140f928ffaae39f63acf3106f41c2e5d45 | |
parent | a92d76e9e09bf6cef477255abc197b147f4359b5 (diff) |
winsys/amdgpu: handle cs_add_fence_dependency for deferred/unsubmitted fences
The idea is to fix the following interleaving of operations
that can arise from deferred fences:
Thread 1 / Context 1 Thread 2 / Context 2
-------------------- --------------------
f = deferred flush
<------- application-side synchronization ------->
fence_server_sync(f)
...
flush()
flush()
We will now stall in fence_server_sync until the flush of context 1
has completed.
This scenario was unlikely to occur previously, because applications
seem to be doing
Thread 1 / Context 1 Thread 2 / Context 2
-------------------- --------------------
f = glFenceSync()
glFlush()
<------- application-side synchronization ------->
glWaitSync(f)
... and indeed they probably *have* to use this ordering to avoid
deadlocks in the GLX model, where all GL operations conceptually
go through a single connection to the X server. However, it's less
clear whether applications have to do this with other WSI (i.e. EGL).
Besides, even this sequence of GL commands can be translated into
the Gallium-level sequence outlined above when Gallium threading
and asynchronous flushes are used. So it makes sense to be more
robust.
As a side effect, we no longer busy-wait on submission_in_progress.
We won't enable asynchronous flushes on radeon, but add a
cs_add_fence_dependency stub anyway to document the potential
issue.
-rw-r--r-- | src/gallium/drivers/radeon/radeon_winsys.h | 4 | ||||
-rw-r--r-- | src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 21 | ||||
-rw-r--r-- | src/gallium/winsys/amdgpu/drm/amdgpu_cs.h | 9 | ||||
-rw-r--r-- | src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 19 |
4 files changed, 41 insertions, 12 deletions
diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h index 2d3f646dc6..e8c486cb7f 100644 --- a/src/gallium/drivers/radeon/radeon_winsys.h +++ b/src/gallium/drivers/radeon/radeon_winsys.h @@ -543,7 +543,9 @@ struct radeon_winsys { /** * Create a fence before the CS is flushed. * The user must flush manually to complete the initializaton of the fence. - * The fence must not be used before the flush. + * + * The fence must not be used for anything except \ref cs_add_fence_dependency + * before the flush. */ struct pipe_fence_handle *(*cs_get_next_fence)(struct radeon_winsys_cs *cs); diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c index 0450ccc359..0628e54735 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c @@ -50,7 +50,8 @@ amdgpu_fence_create(struct amdgpu_ctx *ctx, unsigned ip_type, fence->fence.ip_type = ip_type; fence->fence.ip_instance = ip_instance; fence->fence.ring = ring; - fence->submission_in_progress = true; + util_queue_fence_init(&fence->submitted); + util_queue_fence_reset(&fence->submitted); p_atomic_inc(&ctx->refcount); return (struct pipe_fence_handle *)fence; } @@ -81,6 +82,9 @@ amdgpu_fence_import_sync_file(struct radeon_winsys *rws, int fd) FREE(fence); return NULL; } + + util_queue_fence_init(&fence->submitted); + return (struct pipe_fence_handle*)fence; } @@ -98,7 +102,7 @@ static int amdgpu_fence_export_sync_file(struct radeon_winsys *rws, return r ? -1 : fd; } - os_wait_until_zero(&fence->submission_in_progress, PIPE_TIMEOUT_INFINITE); + util_queue_fence_wait(&fence->submitted); /* Convert the amdgpu fence into a fence FD. */ int fd; @@ -118,7 +122,7 @@ static void amdgpu_fence_submitted(struct pipe_fence_handle *fence, rfence->fence.fence = seq_no; rfence->user_fence_cpu_address = user_fence_cpu_address; - rfence->submission_in_progress = false; + util_queue_fence_signal(&rfence->submitted); } static void amdgpu_fence_signalled(struct pipe_fence_handle *fence) @@ -126,7 +130,7 @@ static void amdgpu_fence_signalled(struct pipe_fence_handle *fence) struct amdgpu_fence *rfence = (struct amdgpu_fence*)fence; rfence->signalled = true; - rfence->submission_in_progress = false; + util_queue_fence_signal(&rfence->submitted); } bool amdgpu_fence_wait(struct pipe_fence_handle *fence, uint64_t timeout, @@ -164,8 +168,7 @@ bool amdgpu_fence_wait(struct pipe_fence_handle *fence, uint64_t timeout, /* The fence might not have a number assigned if its IB is being * submitted in the other thread right now. Wait until the submission * is done. */ - if (!os_wait_until_zero_abs_timeout(&rfence->submission_in_progress, - abs_timeout)) + if (!util_queue_fence_wait_timeout(&rfence->submitted, abs_timeout)) return false; user_fence_cpu = rfence->user_fence_cpu_address; @@ -1029,6 +1032,8 @@ static void amdgpu_cs_add_fence_dependency(struct radeon_winsys_cs *rws, struct amdgpu_cs_context *cs = acs->csc; struct amdgpu_fence *fence = (struct amdgpu_fence*)pfence; + util_queue_fence_wait(&fence->submitted); + if (is_noop_fence_dependency(acs, fence)) return; @@ -1304,7 +1309,7 @@ bo_list_error: continue; } - assert(!fence->submission_in_progress); + assert(util_queue_fence_is_signalled(&fence->submitted)); amdgpu_cs_chunk_fence_to_dep(&fence->fence, &dep_chunk[num++]); } @@ -1327,7 +1332,7 @@ bo_list_error: if (!amdgpu_fence_is_syncobj(fence)) continue; - assert(!fence->submission_in_progress); + assert(util_queue_fence_is_signalled(&fence->submitted)); sem_chunk[num++].handle = fence->syncobj; } diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h index b7bc9a20bb..fbf44b3661 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h @@ -144,9 +144,11 @@ struct amdgpu_fence { struct amdgpu_cs_fence fence; uint64_t *user_fence_cpu_address; - /* If the fence is unknown due to an IB still being submitted - * in the other thread. */ - volatile int submission_in_progress; /* bool (int for atomicity) */ + /* If the fence has been submitted. This is unsignalled for deferred fences + * (cs->next_fence) and while an IB is still being submitted in the submit + * thread. */ + struct util_queue_fence submitted; + volatile int signalled; /* bool (int for atomicity) */ }; @@ -178,6 +180,7 @@ static inline void amdgpu_fence_reference(struct pipe_fence_handle **dst, else amdgpu_ctx_unref(fence->ctx); + util_queue_fence_destroy(&fence->submitted); FREE(fence); } *rdst = rsrc; diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c index 7220f3a024..add88f80aa 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c @@ -786,6 +786,24 @@ radeon_drm_cs_get_next_fence(struct radeon_winsys_cs *rcs) return fence; } +static void +radeon_drm_cs_add_fence_dependency(struct radeon_winsys_cs *cs, + struct pipe_fence_handle *fence) +{ + /* TODO: Handle the following unlikely multi-threaded scenario: + * + * Thread 1 / Context 1 Thread 2 / Context 2 + * -------------------- -------------------- + * f = cs_get_next_fence() + * cs_add_fence_dependency(f) + * cs_flush() + * cs_flush() + * + * We currently assume that this does not happen because we don't support + * asynchronous flushes on Radeon. + */ +} + void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws) { ws->base.ctx_create = radeon_drm_ctx_create; @@ -801,6 +819,7 @@ void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws) ws->base.cs_get_next_fence = radeon_drm_cs_get_next_fence; ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced; ws->base.cs_sync_flush = radeon_drm_cs_sync_flush; + ws->base.cs_add_fence_dependency = radeon_drm_cs_add_fence_dependency; ws->base.fence_wait = radeon_fence_wait; ws->base.fence_reference = radeon_fence_reference; } |