summaryrefslogtreecommitdiff
path: root/src/freedreno
diff options
context:
space:
mode:
authorDanylo Piliaiev <dpiliaiev@igalia.com>2024-07-11 18:21:52 +0200
committerMarge Bot <emma+marge@anholt.net>2024-07-12 11:48:36 +0000
commit7231eef6304eb356611f947fe3a0429160a67097 (patch)
treeac0c34d7cbd4e13098a59d09e4ff0fbaa41f2d7c /src/freedreno
parent5bb9c1cca94f4aa47e669c769c08af4aabf31e71 (diff)
tu: Have single Flush/Invalidate memory entrypoints
Make all flush/invalidation logic kernel independent. The only downside is that aarch32 would have cached non-coherent memory disabled, but there are probably no users of it. Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/11468 Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30131>
Diffstat (limited to 'src/freedreno')
-rw-r--r--src/freedreno/vulkan/tu_device.cc43
-rw-r--r--src/freedreno/vulkan/tu_knl.cc97
-rw-r--r--src/freedreno/vulkan/tu_knl.h14
-rw-r--r--src/freedreno/vulkan/tu_knl_drm.cc96
-rw-r--r--src/freedreno/vulkan/tu_knl_drm.h12
-rw-r--r--src/freedreno/vulkan/tu_knl_drm_msm.cc2
-rw-r--r--src/freedreno/vulkan/tu_knl_drm_virtio.cc2
-rw-r--r--src/freedreno/vulkan/tu_knl_kgsl.cc61
8 files changed, 134 insertions, 193 deletions
diff --git a/src/freedreno/vulkan/tu_device.cc b/src/freedreno/vulkan/tu_device.cc
index 6187886476a..78dfa778fea 100644
--- a/src/freedreno/vulkan/tu_device.cc
+++ b/src/freedreno/vulkan/tu_device.cc
@@ -1230,6 +1230,10 @@ tu_physical_device_init(struct tu_physical_device *device,
goto fail_free_name;
}
+ device->level1_dcache_size = tu_get_l1_dcache_size();
+ device->has_cached_non_coherent_memory =
+ device->level1_dcache_size > 0 && !DETECT_ARCH_ARM;
+
device->memory.type_count = 1;
device->memory.types[0] =
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
@@ -2959,6 +2963,45 @@ tu_UnmapMemory2KHR(VkDevice _device, const VkMemoryUnmapInfoKHR *pMemoryUnmapInf
return tu_bo_unmap(device, mem->bo, pMemoryUnmapInfo->flags & VK_MEMORY_UNMAP_RESERVE_BIT_EXT);
}
+static VkResult
+sync_cache(VkDevice _device,
+ enum tu_mem_sync_op op,
+ uint32_t count,
+ const VkMappedMemoryRange *ranges)
+{
+ VK_FROM_HANDLE(tu_device, device, _device);
+
+ if (!device->physical_device->has_cached_non_coherent_memory) {
+ tu_finishme(
+ "data cache clean and invalidation are unsupported on this arch!");
+ return VK_SUCCESS;
+ }
+
+ for (uint32_t i = 0; i < count; i++) {
+ VK_FROM_HANDLE(tu_device_memory, mem, ranges[i].memory);
+ tu_bo_sync_cache(device, mem->bo, ranges[i].offset, ranges[i].size, op);
+ }
+
+ return VK_SUCCESS;
+}
+
+VkResult
+tu_FlushMappedMemoryRanges(VkDevice _device,
+ uint32_t memoryRangeCount,
+ const VkMappedMemoryRange *pMemoryRanges)
+{
+ return sync_cache(_device, TU_MEM_SYNC_CACHE_TO_GPU, memoryRangeCount,
+ pMemoryRanges);
+}
+
+VkResult
+tu_InvalidateMappedMemoryRanges(VkDevice _device,
+ uint32_t memoryRangeCount,
+ const VkMappedMemoryRange *pMemoryRanges)
+{
+ return sync_cache(_device, TU_MEM_SYNC_CACHE_FROM_GPU, memoryRangeCount,
+ pMemoryRanges);
+}
VKAPI_ATTR void VKAPI_CALL
tu_GetDeviceMemoryCommitment(VkDevice device,
diff --git a/src/freedreno/vulkan/tu_knl.cc b/src/freedreno/vulkan/tu_knl.cc
index a9ff89b8f02..76e92e481c1 100644
--- a/src/freedreno/vulkan/tu_knl.cc
+++ b/src/freedreno/vulkan/tu_knl.cc
@@ -92,6 +92,81 @@ tu_bo_unmap(struct tu_device *dev, struct tu_bo *bo, bool reserve)
return VK_SUCCESS;
}
+static inline void
+tu_sync_cacheline_to_gpu(void const *p __attribute__((unused)))
+{
+#if DETECT_ARCH_AARCH64
+ /* Clean data cache. */
+ __asm volatile("dc cvac, %0" : : "r" (p) : "memory");
+#elif (DETECT_ARCH_X86 || DETECT_ARCH_X86_64)
+ __builtin_ia32_clflush(p);
+#elif DETECT_ARCH_ARM
+ /* DCCMVAC - same as DC CVAC on aarch64.
+ * Seems to be illegal to call from userspace.
+ */
+ //__asm volatile("mcr p15, 0, %0, c7, c10, 1" : : "r" (p) : "memory");
+ unreachable("Cache line clean is unsupported on ARMv7");
+#endif
+}
+
+static inline void
+tu_sync_cacheline_from_gpu(void const *p __attribute__((unused)))
+{
+#if DETECT_ARCH_AARCH64
+ /* Clean and Invalidate data cache, there is no separate Invalidate. */
+ __asm volatile("dc civac, %0" : : "r" (p) : "memory");
+#elif (DETECT_ARCH_X86 || DETECT_ARCH_X86_64)
+ __builtin_ia32_clflush(p);
+#elif DETECT_ARCH_ARM
+ /* DCCIMVAC - same as DC CIVAC on aarch64.
+ * Seems to be illegal to call from userspace.
+ */
+ //__asm volatile("mcr p15, 0, %0, c7, c14, 1" : : "r" (p) : "memory");
+ unreachable("Cache line invalidate is unsupported on ARMv7");
+#endif
+}
+
+void
+tu_bo_sync_cache(struct tu_device *dev,
+ struct tu_bo *bo,
+ VkDeviceSize offset,
+ VkDeviceSize size,
+ enum tu_mem_sync_op op)
+{
+ uintptr_t level1_dcache_size = dev->physical_device->level1_dcache_size;
+ char *start = (char *) bo->map + offset;
+ char *end = start + (size == VK_WHOLE_SIZE ? (bo->size - offset) : size);
+
+ start = (char *) ((uintptr_t) start & ~(level1_dcache_size - 1));
+
+ for (; start < end; start += level1_dcache_size) {
+ if (op == TU_MEM_SYNC_CACHE_TO_GPU) {
+ tu_sync_cacheline_to_gpu(start);
+ } else {
+ tu_sync_cacheline_from_gpu(start);
+ }
+ }
+}
+
+uint32_t
+tu_get_l1_dcache_size()
+{
+if (!(DETECT_ARCH_AARCH64 || DETECT_ARCH_X86 || DETECT_ARCH_X86_64))
+ return 0;
+
+#if DETECT_ARCH_AARCH64 && \
+ (!defined(_SC_LEVEL1_DCACHE_LINESIZE) || DETECT_OS_ANDROID)
+ /* Bionic does not implement _SC_LEVEL1_DCACHE_LINESIZE properly: */
+ uint64_t ctr_el0;
+ asm("mrs\t%x0, ctr_el0" : "=r"(ctr_el0));
+ return 4 << ((ctr_el0 >> 16) & 0xf);
+#elif defined(_SC_LEVEL1_DCACHE_LINESIZE)
+ return sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
+#else
+ return 0;
+#endif
+}
+
void tu_bo_allow_dump(struct tu_device *dev, struct tu_bo *bo)
{
dev->instance->knl->bo_allow_dump(dev, bo);
@@ -212,25 +287,6 @@ tu_enumerate_devices(struct vk_instance *vk_instance)
#endif
}
-static long
-l1_dcache_size()
-{
- if (!(DETECT_ARCH_AARCH64 || DETECT_ARCH_X86 || DETECT_ARCH_X86_64))
- return 0;
-
-#if DETECT_ARCH_AARCH64 && \
- (!defined(_SC_LEVEL1_DCACHE_LINESIZE) || DETECT_OS_ANDROID)
- /* Bionic does not implement _SC_LEVEL1_DCACHE_LINESIZE properly: */
- uint64_t ctr_el0;
- asm("mrs\t%x0, ctr_el0" : "=r"(ctr_el0));
- return 4 << ((ctr_el0 >> 16) & 0xf);
-#elif defined(_SC_LEVEL1_DCACHE_LINESIZE)
- return sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
-#else
- return 0;
-#endif
-}
-
/**
* Enumeration entrypoint for drm devices
*/
@@ -290,9 +346,6 @@ tu_physical_device_try_create(struct vk_instance *vk_instance,
assert(device);
- device->level1_dcache_size = l1_dcache_size();
- device->has_cached_non_coherent_memory = device->level1_dcache_size > 0;
-
if (instance->vk.enabled_extensions.KHR_display) {
master_fd = open(primary_path, O_RDWR | O_CLOEXEC);
}
diff --git a/src/freedreno/vulkan/tu_knl.h b/src/freedreno/vulkan/tu_knl.h
index 4306c0cbed8..e33fedc8d46 100644
--- a/src/freedreno/vulkan/tu_knl.h
+++ b/src/freedreno/vulkan/tu_knl.h
@@ -41,6 +41,11 @@ enum tu_timeline_sync_state {
TU_TIMELINE_SYNC_STATE_SIGNALED,
};
+enum tu_mem_sync_op {
+ TU_MEM_SYNC_CACHE_TO_GPU,
+ TU_MEM_SYNC_CACHE_FROM_GPU,
+};
+
struct tu_bo {
uint32_t gem_handle;
#ifdef TU_HAS_VIRTIO
@@ -155,6 +160,15 @@ tu_bo_map(struct tu_device *dev, struct tu_bo *bo, void *placed_addr);
VkResult
tu_bo_unmap(struct tu_device *dev, struct tu_bo *bo, bool reserve);
+void
+tu_bo_sync_cache(struct tu_device *dev,
+ struct tu_bo *bo,
+ VkDeviceSize offset,
+ VkDeviceSize size,
+ enum tu_mem_sync_op op);
+
+uint32_t tu_get_l1_dcache_size();
+
void tu_bo_allow_dump(struct tu_device *dev, struct tu_bo *bo);
void tu_bo_set_metadata(struct tu_device *dev, struct tu_bo *bo,
diff --git a/src/freedreno/vulkan/tu_knl_drm.cc b/src/freedreno/vulkan/tu_knl_drm.cc
index c0970d26a0e..c3eabed2ffc 100644
--- a/src/freedreno/vulkan/tu_knl_drm.cc
+++ b/src/freedreno/vulkan/tu_knl_drm.cc
@@ -12,102 +12,6 @@
#include "tu_device.h"
#include "tu_rmv.h"
-static inline void
-tu_sync_cacheline_to_gpu(void const *p __attribute__((unused)))
-{
-#if DETECT_ARCH_AARCH64
- /* Clean data cache. */
- __asm volatile("dc cvac, %0" : : "r" (p) : "memory");
-#elif (DETECT_ARCH_X86 || DETECT_ARCH_X86_64)
- __builtin_ia32_clflush(p);
-#elif DETECT_ARCH_ARM
- /* DCCMVAC - same as DC CVAC on aarch64.
- * Seems to be illegal to call from userspace.
- */
- //__asm volatile("mcr p15, 0, %0, c7, c10, 1" : : "r" (p) : "memory");
- unreachable("Cache line clean is unsupported on ARMv7");
-#endif
-}
-
-static inline void
-tu_sync_cacheline_from_gpu(void const *p __attribute__((unused)))
-{
-#if DETECT_ARCH_AARCH64
- /* Clean and Invalidate data cache, there is no separate Invalidate. */
- __asm volatile("dc civac, %0" : : "r" (p) : "memory");
-#elif (DETECT_ARCH_X86 || DETECT_ARCH_X86_64)
- __builtin_ia32_clflush(p);
-#elif DETECT_ARCH_ARM
- /* DCCIMVAC - same as DC CIVAC on aarch64.
- * Seems to be illegal to call from userspace.
- */
- //__asm volatile("mcr p15, 0, %0, c7, c14, 1" : : "r" (p) : "memory");
- unreachable("Cache line invalidate is unsupported on ARMv7");
-#endif
-}
-
-void
-tu_sync_cache_bo(struct tu_device *dev,
- struct tu_bo *bo,
- VkDeviceSize offset,
- VkDeviceSize size,
- enum tu_mem_sync_op op)
-{
- uintptr_t level1_dcache_size = dev->physical_device->level1_dcache_size;
- char *start = (char *) bo->map + offset;
- char *end = start + (size == VK_WHOLE_SIZE ? (bo->size - offset) : size);
-
- start = (char *) ((uintptr_t) start & ~(level1_dcache_size - 1));
-
- for (; start < end; start += level1_dcache_size) {
- if (op == TU_MEM_SYNC_CACHE_TO_GPU) {
- tu_sync_cacheline_to_gpu(start);
- } else {
- tu_sync_cacheline_from_gpu(start);
- }
- }
-}
-
-static VkResult
-sync_cache(VkDevice _device,
- enum tu_mem_sync_op op,
- uint32_t count,
- const VkMappedMemoryRange *ranges)
-{
- VK_FROM_HANDLE(tu_device, device, _device);
-
- if (!device->physical_device->has_cached_non_coherent_memory) {
- tu_finishme(
- "data cache clean and invalidation are unsupported on this arch!");
- return VK_SUCCESS;
- }
-
- for (uint32_t i = 0; i < count; i++) {
- VK_FROM_HANDLE(tu_device_memory, mem, ranges[i].memory);
- tu_sync_cache_bo(device, mem->bo, ranges[i].offset, ranges[i].size, op);
- }
-
- return VK_SUCCESS;
-}
-
-VkResult
-tu_FlushMappedMemoryRanges(VkDevice _device,
- uint32_t memoryRangeCount,
- const VkMappedMemoryRange *pMemoryRanges)
-{
- return sync_cache(_device, TU_MEM_SYNC_CACHE_TO_GPU, memoryRangeCount,
- pMemoryRanges);
-}
-
-VkResult
-tu_InvalidateMappedMemoryRanges(VkDevice _device,
- uint32_t memoryRangeCount,
- const VkMappedMemoryRange *pMemoryRanges)
-{
- return sync_cache(_device, TU_MEM_SYNC_CACHE_FROM_GPU, memoryRangeCount,
- pMemoryRanges);
-}
-
VkResult
tu_allocate_userspace_iova(struct tu_device *dev,
uint64_t size,
diff --git a/src/freedreno/vulkan/tu_knl_drm.h b/src/freedreno/vulkan/tu_knl_drm.h
index 52fb4ef0e4b..f19969ceb8a 100644
--- a/src/freedreno/vulkan/tu_knl_drm.h
+++ b/src/freedreno/vulkan/tu_knl_drm.h
@@ -14,18 +14,6 @@
#include "util/timespec.h"
-enum tu_mem_sync_op {
- TU_MEM_SYNC_CACHE_TO_GPU,
- TU_MEM_SYNC_CACHE_FROM_GPU,
-};
-
-void
-tu_sync_cache_bo(struct tu_device *dev,
- struct tu_bo *bo,
- VkDeviceSize offset,
- VkDeviceSize size,
- enum tu_mem_sync_op op);
-
VkResult tu_allocate_userspace_iova(struct tu_device *dev,
uint64_t size,
uint64_t client_iova,
diff --git a/src/freedreno/vulkan/tu_knl_drm_msm.cc b/src/freedreno/vulkan/tu_knl_drm_msm.cc
index 58878669e6d..2b5346c8e17 100644
--- a/src/freedreno/vulkan/tu_knl_drm_msm.cc
+++ b/src/freedreno/vulkan/tu_knl_drm_msm.cc
@@ -615,7 +615,7 @@ msm_bo_init(struct tu_device *dev,
*
* MSM already does this automatically for uncached (MSM_BO_WC) memory.
*/
- tu_sync_cache_bo(dev, bo, 0, VK_WHOLE_SIZE, TU_MEM_SYNC_CACHE_TO_GPU);
+ tu_bo_sync_cache(dev, bo, 0, VK_WHOLE_SIZE, TU_MEM_SYNC_CACHE_TO_GPU);
}
return result;
diff --git a/src/freedreno/vulkan/tu_knl_drm_virtio.cc b/src/freedreno/vulkan/tu_knl_drm_virtio.cc
index eb45e5cb62f..9680f2cd6a3 100644
--- a/src/freedreno/vulkan/tu_knl_drm_virtio.cc
+++ b/src/freedreno/vulkan/tu_knl_drm_virtio.cc
@@ -672,7 +672,7 @@ virtio_bo_init(struct tu_device *dev,
*
* MSM already does this automatically for uncached (MSM_BO_WC) memory.
*/
- tu_sync_cache_bo(dev, bo, 0, VK_WHOLE_SIZE, TU_MEM_SYNC_CACHE_TO_GPU);
+ tu_bo_sync_cache(dev, bo, 0, VK_WHOLE_SIZE, TU_MEM_SYNC_CACHE_TO_GPU);
}
return VK_SUCCESS;
diff --git a/src/freedreno/vulkan/tu_knl_kgsl.cc b/src/freedreno/vulkan/tu_knl_kgsl.cc
index 6c7dd102605..da7ad4982c1 100644
--- a/src/freedreno/vulkan/tu_knl_kgsl.cc
+++ b/src/freedreno/vulkan/tu_knl_kgsl.cc
@@ -390,66 +390,6 @@ kgsl_bo_finish(struct tu_device *dev, struct tu_bo *bo)
}
static VkResult
-kgsl_sync_cache(VkDevice _device,
- uint32_t op,
- uint32_t count,
- const VkMappedMemoryRange *ranges)
-{
- VK_FROM_HANDLE(tu_device, device, _device);
-
- struct kgsl_gpuobj_sync_obj *sync_list =
- (struct kgsl_gpuobj_sync_obj *) vk_zalloc(
- &device->vk.alloc, sizeof(*sync_list)*count, 8,
- VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
-
- struct kgsl_gpuobj_sync gpuobj_sync = {
- .objs = (uintptr_t) sync_list,
- .obj_len = sizeof(*sync_list),
- .count = count,
- };
-
- for (uint32_t i = 0; i < count; i++) {
- VK_FROM_HANDLE(tu_device_memory, mem, ranges[i].memory);
-
- sync_list[i].op = op;
- sync_list[i].id = mem->bo->gem_handle;
- sync_list[i].offset = ranges[i].offset;
- sync_list[i].length = ranges[i].size == VK_WHOLE_SIZE
- ? (mem->bo->size - ranges[i].offset)
- : ranges[i].size;
- }
-
- /* There are two other KGSL ioctls for flushing/invalidation:
- * - IOCTL_KGSL_GPUMEM_SYNC_CACHE - processes one memory range at a time;
- * - IOCTL_KGSL_GPUMEM_SYNC_CACHE_BULK - processes several buffers but
- * not way to specify ranges.
- *
- * While IOCTL_KGSL_GPUOBJ_SYNC exactly maps to VK function.
- */
- safe_ioctl(device->fd, IOCTL_KGSL_GPUOBJ_SYNC, &gpuobj_sync);
-
- vk_free(&device->vk.alloc, sync_list);
-
- return VK_SUCCESS;
-}
-
-VkResult
-tu_FlushMappedMemoryRanges(VkDevice device,
- uint32_t count,
- const VkMappedMemoryRange *ranges)
-{
- return kgsl_sync_cache(device, KGSL_GPUMEM_CACHE_TO_GPU, count, ranges);
-}
-
-VkResult
-tu_InvalidateMappedMemoryRanges(VkDevice device,
- uint32_t count,
- const VkMappedMemoryRange *ranges)
-{
- return kgsl_sync_cache(device, KGSL_GPUMEM_CACHE_FROM_GPU, count, ranges);
-}
-
-static VkResult
get_kgsl_prop(int fd, unsigned int type, void *value, size_t size)
{
struct kgsl_device_getproperty getprop = {
@@ -1648,7 +1588,6 @@ tu_knl_kgsl_load(struct tu_instance *instance, int fd)
device->has_cached_coherent_memory = kgsl_is_memory_type_supported(
fd, KGSL_MEMFLAGS_IOCOHERENT |
(KGSL_CACHEMODE_WRITEBACK << KGSL_CACHEMODE_SHIFT));
- device->has_cached_non_coherent_memory = true;
instance->knl = &kgsl_knl_funcs;