instmem/gk20a: use direct CPU access

The Great Nouveau Refactoring Take II brought us a lot of goodness, including acquire/release methods that are called before and after an instobj is modified. These functions can be used as synchronization points to manage CPU/GPU coherency if we modify an instobj using the CPU. This patch replaces the legacy and slow PRAMIN access for gk20a instmem with CPU mappings and writes. A LRU list is used to unmap unused mappings after a certain threshold (currently 1MB) of mapped instobjs is reached. This allows mappings to be reused most of the time. Accessing instobjs using the CPU requires to maintain the GPU L2 cache, which we do in the acquire/release functions. This triggers a lot of L2 flushes/invalidates, but most of them are performed on an empty cache (and thus return immediately), and overall context setup performance greatly benefits from this (from 250ms to 160ms on Jetson TK1 for a simple libdrm program). Making L2 management more explicit should allow us to grab some more performance in the future. Signed-off-by: Alexandre Courbot <acourbot@nvidia.com> Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
author: Alexandre Courbot <acourbot@nvidia.com> 2015-09-04 19:52:11 +0900
committer: Ben Skeggs <bskeggs@redhat.com> 2015-11-03 14:57:28 +1000
commit: d72b4e0442c71266bcd75ff33609a53adaac3642 (patch)
tree: 86a8ebc89f540744a9ad19e0bdcd3231fa604a87 /lib
parent: eb43004087507e71e7294fb786b715ae81242c34 (diff)
1 files changed, 53 insertions, 19 deletions
diff --git a/lib/include/nvif/os.h b/lib/include/nvif/os.h
index 48bcf150..2df30489 100644
--- a/lib/include/nvif/os.h
+++ b/lib/include/nvif/os.h
@@ -55,6 +55,7 @@ typedef int16_t __s16;
 typedef int8_t __s8;
 #endif
 
+typedef u64 phys_addr_t;
 typedef u64 dma_addr_t;
 typedef dma_addr_t resource_size_t;
 
@@ -480,6 +481,30 @@ get_num_physpages(void)
 	return 0;
 }
 
+typedef struct {
+	unsigned long pgprot;
+} pgprot_t;
+
+#define __pgprot(x) ((pgprot_t) { (x) } )
+#define pgprot_noncached(prot) (prot)
+#define pgprot_writecombine pgprot_noncached
+
+#define PAGE_KERNEL __pgprot(0)
+
+#define VM_MAP 4
+
+static inline void *
+vmap(struct page **pages, unsigned int count,
+     unsigned long flags, pgprot_t prot)
+{
+	return NULL;
+}
+
+static inline void
+vunmap(const void *addr)
+{
+}
+
 /******************************************************************************
  * assertions
  *****************************************************************************/
@@ -897,6 +922,34 @@ iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
 }
 
 /******************************************************************************
+ * DMA
+ *****************************************************************************/
+static inline dma_addr_t
+dma_map_page(struct device *pdev, struct page *page, int offset,
+	     int length, unsigned flags)
+{
+	return 0;
+}
+
+
+static inline bool
+dma_mapping_error(struct device *pdev, dma_addr_t addr)
+{
+	return true;
+}
+
+static inline void
+dma_unmap_page(struct device *pdev, dma_addr_t addr, int size, unsigned flags)
+{
+}
+
+static inline phys_addr_t
+dma_to_phys(struct device *dev, dma_addr_t addr)
+{
+	return 0;
+}
+
+/******************************************************************************
  * PCI
  *****************************************************************************/
 #include <pciaccess.h>
@@ -975,25 +1028,6 @@ pci_resource_len(struct pci_dev *pdev, int bar)
 	return pdev->pdev->regions[bar].size;
 }
 
-static inline dma_addr_t
-dma_map_page(struct device *pdev, struct page *page, int offset,
-	     int length, unsigned flags)
-{
-	return 0;
-}
-
-
-static inline bool
-dma_mapping_error(struct device *pdev, dma_addr_t addr)
-{
-	return true;
-}
-
-static inline void
-dma_unmap_page(struct device *pdev, dma_addr_t addr, int size, unsigned flags)
-{
-}
-
 static inline int
 pci_enable_rom(struct pci_dev *pdev)
 {
author	Alexandre Courbot <acourbot@nvidia.com>	2015-09-04 19:52:11 +0900
committer	Ben Skeggs <bskeggs@redhat.com>	2015-11-03 14:57:28 +1000
commit	d72b4e0442c71266bcd75ff33609a53adaac3642 (patch)
tree	86a8ebc89f540744a9ad19e0bdcd3231fa604a87 /lib
parent	eb43004087507e71e7294fb786b715ae81242c34 (diff)