diff options
author | Fei Yang <fei.yang@intel.com> | 2024-11-07 17:10:41 -0800 |
---|---|---|
committer | Ashutosh Dixit <ashutosh.dixit@intel.com> | 2024-11-12 11:48:11 -0800 |
commit | 5da2928391abd9625dd87b4a3fb6c35a54a0d948 (patch) | |
tree | 4a80c866b1ba16345512e736c71d8485b9b0a572 | |
parent | 3544d6d2620f72d30e0ba55df10ca319fc261256 (diff) |
tests/intel/xe_exec_threads: separate exec_sync and batch buffer
In INVALIDATE cases the test purposely remap the data buffer to a
different physical location in the midle of execution to exercise the
page fault handling flow. After the remapping we lose access to the old
physical location, and that would cause a problem for comparing ufence
value at the end of the execution. To fix this the exec_sync data needs
to be separated from the batch buffer for instructions, and during the
execution we don't remap the exec_sync data.
v2: Separate only exec_sync. Keep data field together with the batch
buffer (Matt Roper)
Signed-off-by: Fei Yang <fei.yang@intel.com>
Reviewed-by: Nirmoy Das <nirmoy.das@intel.com>
-rw-r--r-- | tests/intel/xe_exec_threads.c | 55 |
1 files changed, 36 insertions, 19 deletions
diff --git a/tests/intel/xe_exec_threads.c b/tests/intel/xe_exec_threads.c index 433f2620a..661117bed 100644 --- a/tests/intel/xe_exec_threads.c +++ b/tests/intel/xe_exec_threads.c @@ -241,6 +241,7 @@ test_compute_mode(int fd, uint32_t vm, uint64_t addr, uint64_t userptr, struct drm_xe_engine_class_instance *eci, int n_exec_queues, int n_execs, unsigned int flags) { + uint64_t sync_addr = addr + 0x10000000; #define USER_FENCE_VALUE 0xdeadbeefdeadbeefull struct drm_xe_sync sync[1] = { { .type = DRM_XE_SYNC_TYPE_USER_FENCE, .flags = DRM_XE_SYNC_FLAG_SIGNAL, @@ -253,15 +254,15 @@ test_compute_mode(int fd, uint32_t vm, uint64_t addr, uint64_t userptr, }; int64_t fence_timeout; uint32_t exec_queues[MAX_N_EXEC_QUEUES]; - size_t bo_size; + size_t bo_size, sync_size; uint32_t bo = 0; struct { uint32_t batch[16]; uint64_t pad; uint64_t vm_sync; - uint64_t exec_sync; uint32_t data; } *data; + uint64_t *exec_sync; int i, j, b; int map_fd = -1; bool owns_vm = false, owns_fd = false; @@ -280,6 +281,8 @@ test_compute_mode(int fd, uint32_t vm, uint64_t addr, uint64_t userptr, bo_size = sizeof(*data) * n_execs; bo_size = xe_bb_size(fd, bo_size); + sync_size = sizeof(*exec_sync) * n_execs; + sync_size = xe_bb_size(fd, sync_size); if (flags & USERPTR) { if (flags & INVALIDATE) { @@ -301,6 +304,12 @@ test_compute_mode(int fd, uint32_t vm, uint64_t addr, uint64_t userptr, } memset(data, 0, bo_size); + exec_sync = mmap(from_user_pointer(userptr + 0x10000000), + sync_size, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED | MAP_ANONYMOUS, -1, 0); + igt_assert(exec_sync != MAP_FAILED); + memset(exec_sync, 0, sync_size); + for (i = 0; i < n_exec_queues; i++) exec_queues[i] = xe_exec_queue_create(fd, vm, eci, 0); @@ -312,9 +321,12 @@ test_compute_mode(int fd, uint32_t vm, uint64_t addr, uint64_t userptr, else xe_vm_bind_userptr_async(fd, vm, 0, to_user_pointer(data), addr, bo_size, sync, 1); - fence_timeout = (igt_run_in_simulation() ? 30 : 3) * NSEC_PER_SEC; + xe_wait_ufence(fd, &data[0].vm_sync, USER_FENCE_VALUE, 0, fence_timeout); + data[0].vm_sync = 0; + xe_vm_bind_userptr_async(fd, vm, 0, to_user_pointer(exec_sync), + sync_addr, sync_size, sync, 1); xe_wait_ufence(fd, &data[0].vm_sync, USER_FENCE_VALUE, 0, fence_timeout); data[0].vm_sync = 0; @@ -333,7 +345,7 @@ test_compute_mode(int fd, uint32_t vm, uint64_t addr, uint64_t userptr, data[i].batch[b++] = MI_BATCH_BUFFER_END; igt_assert(b <= ARRAY_SIZE(data[i].batch)); - sync[0].addr = addr + (char *)&data[i].exec_sync - (char *)data; + sync[0].addr = sync_addr + (char *)&exec_sync[i] - (char *)exec_sync; exec.exec_queue_id = exec_queues[e]; exec.address = batch_addr; @@ -341,7 +353,7 @@ test_compute_mode(int fd, uint32_t vm, uint64_t addr, uint64_t userptr, if (flags & REBIND && i && !(i & 0x1f)) { for (j = i == 0x20 ? 0 : i - 0x1f; j <= i; ++j) - xe_wait_ufence(fd, &data[j].exec_sync, + xe_wait_ufence(fd, &exec_sync[j], USER_FENCE_VALUE, exec_queues[e], fence_timeout); xe_vm_unbind_async(fd, vm, 0, 0, addr, bo_size, @@ -371,7 +383,7 @@ test_compute_mode(int fd, uint32_t vm, uint64_t addr, uint64_t userptr, * an invalidate. */ for (j = i == 0x20 ? 0 : i - 0x1f; j <= i; ++j) - xe_wait_ufence(fd, &data[j].exec_sync, + xe_wait_ufence(fd, &exec_sync[j], USER_FENCE_VALUE, exec_queues[e], fence_timeout); @@ -409,16 +421,9 @@ test_compute_mode(int fd, uint32_t vm, uint64_t addr, uint64_t userptr, /* * For !RACE cases xe_wait_ufence has been called in above for-loop * except the last batch of submissions. For RACE cases we will need - * to wait for the second half of the submissions to complete. There - * is a potential race here because the first half submissions might - * have updated the fence in the old physical location while the test - * is remapping the buffer from a different physical location, but the - * wait_ufence only checks the fence from the new location which would - * never be updated. We have to assume the first half of the submissions - * complete before the second half. Will have a follow up patch to fix - * this completely. + * to wait for all submissions to complete. */ - j = (flags & RACE) ? (n_execs / 2 + 1) : (((n_execs - 1) & ~0x1f) + 1); + j = (flags & RACE) ? 0 : (((n_execs - 1) & ~0x1f) + 1); else if (flags & REBIND) /* * For REBIND cases xe_wait_ufence has been called in above for-loop @@ -427,19 +432,31 @@ test_compute_mode(int fd, uint32_t vm, uint64_t addr, uint64_t userptr, j = ((n_execs - 1) & ~0x1f) + 1; for (i = j; i < n_execs; i++) - xe_wait_ufence(fd, &data[i].exec_sync, USER_FENCE_VALUE, + xe_wait_ufence(fd, &exec_sync[i], USER_FENCE_VALUE, exec_queues[i % n_exec_queues], fence_timeout); + /* + * For INVALIDATE && RACE cases, due the the remmap in the + * middle of the execution, we lose access to some of the + * 0xc0ffee written to the old location, so check only for + * the second half of the submissions. + */ + if (flags & INVALIDATE && flags & RACE) + j = n_execs / 2 + 1; + for (i = j; i < n_execs; i++) + igt_assert_eq(data[i].data, 0xc0ffee); + sync[0].addr = to_user_pointer(&data[0].vm_sync); + xe_vm_unbind_async(fd, vm, 0, 0, sync_addr, sync_size, sync, 1); + xe_wait_ufence(fd, &data[0].vm_sync, USER_FENCE_VALUE, 0, fence_timeout); + data[0].vm_sync = 0; xe_vm_unbind_async(fd, vm, 0, 0, addr, bo_size, sync, 1); xe_wait_ufence(fd, &data[0].vm_sync, USER_FENCE_VALUE, 0, fence_timeout); - for (i = j; i < n_execs; i++) - igt_assert_eq(data[i].data, 0xc0ffee); - for (i = 0; i < n_exec_queues; i++) xe_exec_queue_destroy(fd, exec_queues[i]); + munmap(exec_sync, sync_size); if (bo) { munmap(data, bo_size); gem_close(fd, bo); |