diff options
author | Danylo Piliaiev <dpiliaiev@igalia.com> | 2024-07-11 18:21:52 +0200 |
---|---|---|
committer | Marge Bot <emma+marge@anholt.net> | 2024-07-12 11:48:36 +0000 |
commit | 7231eef6304eb356611f947fe3a0429160a67097 (patch) | |
tree | ac0c34d7cbd4e13098a59d09e4ff0fbaa41f2d7c /src/freedreno | |
parent | 5bb9c1cca94f4aa47e669c769c08af4aabf31e71 (diff) |
tu: Have single Flush/Invalidate memory entrypoints
Make all flush/invalidation logic kernel independent. The only
downside is that aarch32 would have cached non-coherent memory
disabled, but there are probably no users of it.
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/11468
Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30131>
Diffstat (limited to 'src/freedreno')
-rw-r--r-- | src/freedreno/vulkan/tu_device.cc | 43 | ||||
-rw-r--r-- | src/freedreno/vulkan/tu_knl.cc | 97 | ||||
-rw-r--r-- | src/freedreno/vulkan/tu_knl.h | 14 | ||||
-rw-r--r-- | src/freedreno/vulkan/tu_knl_drm.cc | 96 | ||||
-rw-r--r-- | src/freedreno/vulkan/tu_knl_drm.h | 12 | ||||
-rw-r--r-- | src/freedreno/vulkan/tu_knl_drm_msm.cc | 2 | ||||
-rw-r--r-- | src/freedreno/vulkan/tu_knl_drm_virtio.cc | 2 | ||||
-rw-r--r-- | src/freedreno/vulkan/tu_knl_kgsl.cc | 61 |
8 files changed, 134 insertions, 193 deletions
diff --git a/src/freedreno/vulkan/tu_device.cc b/src/freedreno/vulkan/tu_device.cc index 6187886476a..78dfa778fea 100644 --- a/src/freedreno/vulkan/tu_device.cc +++ b/src/freedreno/vulkan/tu_device.cc @@ -1230,6 +1230,10 @@ tu_physical_device_init(struct tu_physical_device *device, goto fail_free_name; } + device->level1_dcache_size = tu_get_l1_dcache_size(); + device->has_cached_non_coherent_memory = + device->level1_dcache_size > 0 && !DETECT_ARCH_ARM; + device->memory.type_count = 1; device->memory.types[0] = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | @@ -2959,6 +2963,45 @@ tu_UnmapMemory2KHR(VkDevice _device, const VkMemoryUnmapInfoKHR *pMemoryUnmapInf return tu_bo_unmap(device, mem->bo, pMemoryUnmapInfo->flags & VK_MEMORY_UNMAP_RESERVE_BIT_EXT); } +static VkResult +sync_cache(VkDevice _device, + enum tu_mem_sync_op op, + uint32_t count, + const VkMappedMemoryRange *ranges) +{ + VK_FROM_HANDLE(tu_device, device, _device); + + if (!device->physical_device->has_cached_non_coherent_memory) { + tu_finishme( + "data cache clean and invalidation are unsupported on this arch!"); + return VK_SUCCESS; + } + + for (uint32_t i = 0; i < count; i++) { + VK_FROM_HANDLE(tu_device_memory, mem, ranges[i].memory); + tu_bo_sync_cache(device, mem->bo, ranges[i].offset, ranges[i].size, op); + } + + return VK_SUCCESS; +} + +VkResult +tu_FlushMappedMemoryRanges(VkDevice _device, + uint32_t memoryRangeCount, + const VkMappedMemoryRange *pMemoryRanges) +{ + return sync_cache(_device, TU_MEM_SYNC_CACHE_TO_GPU, memoryRangeCount, + pMemoryRanges); +} + +VkResult +tu_InvalidateMappedMemoryRanges(VkDevice _device, + uint32_t memoryRangeCount, + const VkMappedMemoryRange *pMemoryRanges) +{ + return sync_cache(_device, TU_MEM_SYNC_CACHE_FROM_GPU, memoryRangeCount, + pMemoryRanges); +} VKAPI_ATTR void VKAPI_CALL tu_GetDeviceMemoryCommitment(VkDevice device, diff --git a/src/freedreno/vulkan/tu_knl.cc b/src/freedreno/vulkan/tu_knl.cc index a9ff89b8f02..76e92e481c1 100644 --- a/src/freedreno/vulkan/tu_knl.cc +++ b/src/freedreno/vulkan/tu_knl.cc @@ -92,6 +92,81 @@ tu_bo_unmap(struct tu_device *dev, struct tu_bo *bo, bool reserve) return VK_SUCCESS; } +static inline void +tu_sync_cacheline_to_gpu(void const *p __attribute__((unused))) +{ +#if DETECT_ARCH_AARCH64 + /* Clean data cache. */ + __asm volatile("dc cvac, %0" : : "r" (p) : "memory"); +#elif (DETECT_ARCH_X86 || DETECT_ARCH_X86_64) + __builtin_ia32_clflush(p); +#elif DETECT_ARCH_ARM + /* DCCMVAC - same as DC CVAC on aarch64. + * Seems to be illegal to call from userspace. + */ + //__asm volatile("mcr p15, 0, %0, c7, c10, 1" : : "r" (p) : "memory"); + unreachable("Cache line clean is unsupported on ARMv7"); +#endif +} + +static inline void +tu_sync_cacheline_from_gpu(void const *p __attribute__((unused))) +{ +#if DETECT_ARCH_AARCH64 + /* Clean and Invalidate data cache, there is no separate Invalidate. */ + __asm volatile("dc civac, %0" : : "r" (p) : "memory"); +#elif (DETECT_ARCH_X86 || DETECT_ARCH_X86_64) + __builtin_ia32_clflush(p); +#elif DETECT_ARCH_ARM + /* DCCIMVAC - same as DC CIVAC on aarch64. + * Seems to be illegal to call from userspace. + */ + //__asm volatile("mcr p15, 0, %0, c7, c14, 1" : : "r" (p) : "memory"); + unreachable("Cache line invalidate is unsupported on ARMv7"); +#endif +} + +void +tu_bo_sync_cache(struct tu_device *dev, + struct tu_bo *bo, + VkDeviceSize offset, + VkDeviceSize size, + enum tu_mem_sync_op op) +{ + uintptr_t level1_dcache_size = dev->physical_device->level1_dcache_size; + char *start = (char *) bo->map + offset; + char *end = start + (size == VK_WHOLE_SIZE ? (bo->size - offset) : size); + + start = (char *) ((uintptr_t) start & ~(level1_dcache_size - 1)); + + for (; start < end; start += level1_dcache_size) { + if (op == TU_MEM_SYNC_CACHE_TO_GPU) { + tu_sync_cacheline_to_gpu(start); + } else { + tu_sync_cacheline_from_gpu(start); + } + } +} + +uint32_t +tu_get_l1_dcache_size() +{ +if (!(DETECT_ARCH_AARCH64 || DETECT_ARCH_X86 || DETECT_ARCH_X86_64)) + return 0; + +#if DETECT_ARCH_AARCH64 && \ + (!defined(_SC_LEVEL1_DCACHE_LINESIZE) || DETECT_OS_ANDROID) + /* Bionic does not implement _SC_LEVEL1_DCACHE_LINESIZE properly: */ + uint64_t ctr_el0; + asm("mrs\t%x0, ctr_el0" : "=r"(ctr_el0)); + return 4 << ((ctr_el0 >> 16) & 0xf); +#elif defined(_SC_LEVEL1_DCACHE_LINESIZE) + return sysconf(_SC_LEVEL1_DCACHE_LINESIZE); +#else + return 0; +#endif +} + void tu_bo_allow_dump(struct tu_device *dev, struct tu_bo *bo) { dev->instance->knl->bo_allow_dump(dev, bo); @@ -212,25 +287,6 @@ tu_enumerate_devices(struct vk_instance *vk_instance) #endif } -static long -l1_dcache_size() -{ - if (!(DETECT_ARCH_AARCH64 || DETECT_ARCH_X86 || DETECT_ARCH_X86_64)) - return 0; - -#if DETECT_ARCH_AARCH64 && \ - (!defined(_SC_LEVEL1_DCACHE_LINESIZE) || DETECT_OS_ANDROID) - /* Bionic does not implement _SC_LEVEL1_DCACHE_LINESIZE properly: */ - uint64_t ctr_el0; - asm("mrs\t%x0, ctr_el0" : "=r"(ctr_el0)); - return 4 << ((ctr_el0 >> 16) & 0xf); -#elif defined(_SC_LEVEL1_DCACHE_LINESIZE) - return sysconf(_SC_LEVEL1_DCACHE_LINESIZE); -#else - return 0; -#endif -} - /** * Enumeration entrypoint for drm devices */ @@ -290,9 +346,6 @@ tu_physical_device_try_create(struct vk_instance *vk_instance, assert(device); - device->level1_dcache_size = l1_dcache_size(); - device->has_cached_non_coherent_memory = device->level1_dcache_size > 0; - if (instance->vk.enabled_extensions.KHR_display) { master_fd = open(primary_path, O_RDWR | O_CLOEXEC); } diff --git a/src/freedreno/vulkan/tu_knl.h b/src/freedreno/vulkan/tu_knl.h index 4306c0cbed8..e33fedc8d46 100644 --- a/src/freedreno/vulkan/tu_knl.h +++ b/src/freedreno/vulkan/tu_knl.h @@ -41,6 +41,11 @@ enum tu_timeline_sync_state { TU_TIMELINE_SYNC_STATE_SIGNALED, }; +enum tu_mem_sync_op { + TU_MEM_SYNC_CACHE_TO_GPU, + TU_MEM_SYNC_CACHE_FROM_GPU, +}; + struct tu_bo { uint32_t gem_handle; #ifdef TU_HAS_VIRTIO @@ -155,6 +160,15 @@ tu_bo_map(struct tu_device *dev, struct tu_bo *bo, void *placed_addr); VkResult tu_bo_unmap(struct tu_device *dev, struct tu_bo *bo, bool reserve); +void +tu_bo_sync_cache(struct tu_device *dev, + struct tu_bo *bo, + VkDeviceSize offset, + VkDeviceSize size, + enum tu_mem_sync_op op); + +uint32_t tu_get_l1_dcache_size(); + void tu_bo_allow_dump(struct tu_device *dev, struct tu_bo *bo); void tu_bo_set_metadata(struct tu_device *dev, struct tu_bo *bo, diff --git a/src/freedreno/vulkan/tu_knl_drm.cc b/src/freedreno/vulkan/tu_knl_drm.cc index c0970d26a0e..c3eabed2ffc 100644 --- a/src/freedreno/vulkan/tu_knl_drm.cc +++ b/src/freedreno/vulkan/tu_knl_drm.cc @@ -12,102 +12,6 @@ #include "tu_device.h" #include "tu_rmv.h" -static inline void -tu_sync_cacheline_to_gpu(void const *p __attribute__((unused))) -{ -#if DETECT_ARCH_AARCH64 - /* Clean data cache. */ - __asm volatile("dc cvac, %0" : : "r" (p) : "memory"); -#elif (DETECT_ARCH_X86 || DETECT_ARCH_X86_64) - __builtin_ia32_clflush(p); -#elif DETECT_ARCH_ARM - /* DCCMVAC - same as DC CVAC on aarch64. - * Seems to be illegal to call from userspace. - */ - //__asm volatile("mcr p15, 0, %0, c7, c10, 1" : : "r" (p) : "memory"); - unreachable("Cache line clean is unsupported on ARMv7"); -#endif -} - -static inline void -tu_sync_cacheline_from_gpu(void const *p __attribute__((unused))) -{ -#if DETECT_ARCH_AARCH64 - /* Clean and Invalidate data cache, there is no separate Invalidate. */ - __asm volatile("dc civac, %0" : : "r" (p) : "memory"); -#elif (DETECT_ARCH_X86 || DETECT_ARCH_X86_64) - __builtin_ia32_clflush(p); -#elif DETECT_ARCH_ARM - /* DCCIMVAC - same as DC CIVAC on aarch64. - * Seems to be illegal to call from userspace. - */ - //__asm volatile("mcr p15, 0, %0, c7, c14, 1" : : "r" (p) : "memory"); - unreachable("Cache line invalidate is unsupported on ARMv7"); -#endif -} - -void -tu_sync_cache_bo(struct tu_device *dev, - struct tu_bo *bo, - VkDeviceSize offset, - VkDeviceSize size, - enum tu_mem_sync_op op) -{ - uintptr_t level1_dcache_size = dev->physical_device->level1_dcache_size; - char *start = (char *) bo->map + offset; - char *end = start + (size == VK_WHOLE_SIZE ? (bo->size - offset) : size); - - start = (char *) ((uintptr_t) start & ~(level1_dcache_size - 1)); - - for (; start < end; start += level1_dcache_size) { - if (op == TU_MEM_SYNC_CACHE_TO_GPU) { - tu_sync_cacheline_to_gpu(start); - } else { - tu_sync_cacheline_from_gpu(start); - } - } -} - -static VkResult -sync_cache(VkDevice _device, - enum tu_mem_sync_op op, - uint32_t count, - const VkMappedMemoryRange *ranges) -{ - VK_FROM_HANDLE(tu_device, device, _device); - - if (!device->physical_device->has_cached_non_coherent_memory) { - tu_finishme( - "data cache clean and invalidation are unsupported on this arch!"); - return VK_SUCCESS; - } - - for (uint32_t i = 0; i < count; i++) { - VK_FROM_HANDLE(tu_device_memory, mem, ranges[i].memory); - tu_sync_cache_bo(device, mem->bo, ranges[i].offset, ranges[i].size, op); - } - - return VK_SUCCESS; -} - -VkResult -tu_FlushMappedMemoryRanges(VkDevice _device, - uint32_t memoryRangeCount, - const VkMappedMemoryRange *pMemoryRanges) -{ - return sync_cache(_device, TU_MEM_SYNC_CACHE_TO_GPU, memoryRangeCount, - pMemoryRanges); -} - -VkResult -tu_InvalidateMappedMemoryRanges(VkDevice _device, - uint32_t memoryRangeCount, - const VkMappedMemoryRange *pMemoryRanges) -{ - return sync_cache(_device, TU_MEM_SYNC_CACHE_FROM_GPU, memoryRangeCount, - pMemoryRanges); -} - VkResult tu_allocate_userspace_iova(struct tu_device *dev, uint64_t size, diff --git a/src/freedreno/vulkan/tu_knl_drm.h b/src/freedreno/vulkan/tu_knl_drm.h index 52fb4ef0e4b..f19969ceb8a 100644 --- a/src/freedreno/vulkan/tu_knl_drm.h +++ b/src/freedreno/vulkan/tu_knl_drm.h @@ -14,18 +14,6 @@ #include "util/timespec.h" -enum tu_mem_sync_op { - TU_MEM_SYNC_CACHE_TO_GPU, - TU_MEM_SYNC_CACHE_FROM_GPU, -}; - -void -tu_sync_cache_bo(struct tu_device *dev, - struct tu_bo *bo, - VkDeviceSize offset, - VkDeviceSize size, - enum tu_mem_sync_op op); - VkResult tu_allocate_userspace_iova(struct tu_device *dev, uint64_t size, uint64_t client_iova, diff --git a/src/freedreno/vulkan/tu_knl_drm_msm.cc b/src/freedreno/vulkan/tu_knl_drm_msm.cc index 58878669e6d..2b5346c8e17 100644 --- a/src/freedreno/vulkan/tu_knl_drm_msm.cc +++ b/src/freedreno/vulkan/tu_knl_drm_msm.cc @@ -615,7 +615,7 @@ msm_bo_init(struct tu_device *dev, * * MSM already does this automatically for uncached (MSM_BO_WC) memory. */ - tu_sync_cache_bo(dev, bo, 0, VK_WHOLE_SIZE, TU_MEM_SYNC_CACHE_TO_GPU); + tu_bo_sync_cache(dev, bo, 0, VK_WHOLE_SIZE, TU_MEM_SYNC_CACHE_TO_GPU); } return result; diff --git a/src/freedreno/vulkan/tu_knl_drm_virtio.cc b/src/freedreno/vulkan/tu_knl_drm_virtio.cc index eb45e5cb62f..9680f2cd6a3 100644 --- a/src/freedreno/vulkan/tu_knl_drm_virtio.cc +++ b/src/freedreno/vulkan/tu_knl_drm_virtio.cc @@ -672,7 +672,7 @@ virtio_bo_init(struct tu_device *dev, * * MSM already does this automatically for uncached (MSM_BO_WC) memory. */ - tu_sync_cache_bo(dev, bo, 0, VK_WHOLE_SIZE, TU_MEM_SYNC_CACHE_TO_GPU); + tu_bo_sync_cache(dev, bo, 0, VK_WHOLE_SIZE, TU_MEM_SYNC_CACHE_TO_GPU); } return VK_SUCCESS; diff --git a/src/freedreno/vulkan/tu_knl_kgsl.cc b/src/freedreno/vulkan/tu_knl_kgsl.cc index 6c7dd102605..da7ad4982c1 100644 --- a/src/freedreno/vulkan/tu_knl_kgsl.cc +++ b/src/freedreno/vulkan/tu_knl_kgsl.cc @@ -390,66 +390,6 @@ kgsl_bo_finish(struct tu_device *dev, struct tu_bo *bo) } static VkResult -kgsl_sync_cache(VkDevice _device, - uint32_t op, - uint32_t count, - const VkMappedMemoryRange *ranges) -{ - VK_FROM_HANDLE(tu_device, device, _device); - - struct kgsl_gpuobj_sync_obj *sync_list = - (struct kgsl_gpuobj_sync_obj *) vk_zalloc( - &device->vk.alloc, sizeof(*sync_list)*count, 8, - VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); - - struct kgsl_gpuobj_sync gpuobj_sync = { - .objs = (uintptr_t) sync_list, - .obj_len = sizeof(*sync_list), - .count = count, - }; - - for (uint32_t i = 0; i < count; i++) { - VK_FROM_HANDLE(tu_device_memory, mem, ranges[i].memory); - - sync_list[i].op = op; - sync_list[i].id = mem->bo->gem_handle; - sync_list[i].offset = ranges[i].offset; - sync_list[i].length = ranges[i].size == VK_WHOLE_SIZE - ? (mem->bo->size - ranges[i].offset) - : ranges[i].size; - } - - /* There are two other KGSL ioctls for flushing/invalidation: - * - IOCTL_KGSL_GPUMEM_SYNC_CACHE - processes one memory range at a time; - * - IOCTL_KGSL_GPUMEM_SYNC_CACHE_BULK - processes several buffers but - * not way to specify ranges. - * - * While IOCTL_KGSL_GPUOBJ_SYNC exactly maps to VK function. - */ - safe_ioctl(device->fd, IOCTL_KGSL_GPUOBJ_SYNC, &gpuobj_sync); - - vk_free(&device->vk.alloc, sync_list); - - return VK_SUCCESS; -} - -VkResult -tu_FlushMappedMemoryRanges(VkDevice device, - uint32_t count, - const VkMappedMemoryRange *ranges) -{ - return kgsl_sync_cache(device, KGSL_GPUMEM_CACHE_TO_GPU, count, ranges); -} - -VkResult -tu_InvalidateMappedMemoryRanges(VkDevice device, - uint32_t count, - const VkMappedMemoryRange *ranges) -{ - return kgsl_sync_cache(device, KGSL_GPUMEM_CACHE_FROM_GPU, count, ranges); -} - -static VkResult get_kgsl_prop(int fd, unsigned int type, void *value, size_t size) { struct kgsl_device_getproperty getprop = { @@ -1648,7 +1588,6 @@ tu_knl_kgsl_load(struct tu_instance *instance, int fd) device->has_cached_coherent_memory = kgsl_is_memory_type_supported( fd, KGSL_MEMFLAGS_IOCOHERENT | (KGSL_CACHEMODE_WRITEBACK << KGSL_CACHEMODE_SHIFT)); - device->has_cached_non_coherent_memory = true; instance->knl = &kgsl_knl_funcs; |