diff options
author | Alexandre Courbot <acourbot@nvidia.com> | 2015-09-04 19:52:11 +0900 |
---|---|---|
committer | Ben Skeggs <bskeggs@redhat.com> | 2015-11-03 14:57:28 +1000 |
commit | d72b4e0442c71266bcd75ff33609a53adaac3642 (patch) | |
tree | 86a8ebc89f540744a9ad19e0bdcd3231fa604a87 /lib | |
parent | eb43004087507e71e7294fb786b715ae81242c34 (diff) |
instmem/gk20a: use direct CPU access
The Great Nouveau Refactoring Take II brought us a lot of goodness,
including acquire/release methods that are called before and after an
instobj is modified. These functions can be used as synchronization
points to manage CPU/GPU coherency if we modify an instobj using the
CPU.
This patch replaces the legacy and slow PRAMIN access for gk20a instmem
with CPU mappings and writes. A LRU list is used to unmap unused
mappings after a certain threshold (currently 1MB) of mapped instobjs is
reached. This allows mappings to be reused most of the time.
Accessing instobjs using the CPU requires to maintain the GPU L2 cache,
which we do in the acquire/release functions. This triggers a lot of L2
flushes/invalidates, but most of them are performed on an empty cache
(and thus return immediately), and overall context setup performance
greatly benefits from this (from 250ms to 160ms on Jetson TK1 for a
simple libdrm program).
Making L2 management more explicit should allow us to grab some more
performance in the future.
Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Diffstat (limited to 'lib')
-rw-r--r-- | lib/include/nvif/os.h | 72 |
1 files changed, 53 insertions, 19 deletions
diff --git a/lib/include/nvif/os.h b/lib/include/nvif/os.h index 48bcf150..2df30489 100644 --- a/lib/include/nvif/os.h +++ b/lib/include/nvif/os.h @@ -55,6 +55,7 @@ typedef int16_t __s16; typedef int8_t __s8; #endif +typedef u64 phys_addr_t; typedef u64 dma_addr_t; typedef dma_addr_t resource_size_t; @@ -480,6 +481,30 @@ get_num_physpages(void) return 0; } +typedef struct { + unsigned long pgprot; +} pgprot_t; + +#define __pgprot(x) ((pgprot_t) { (x) } ) +#define pgprot_noncached(prot) (prot) +#define pgprot_writecombine pgprot_noncached + +#define PAGE_KERNEL __pgprot(0) + +#define VM_MAP 4 + +static inline void * +vmap(struct page **pages, unsigned int count, + unsigned long flags, pgprot_t prot) +{ + return NULL; +} + +static inline void +vunmap(const void *addr) +{ +} + /****************************************************************************** * assertions *****************************************************************************/ @@ -897,6 +922,34 @@ iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) } /****************************************************************************** + * DMA + *****************************************************************************/ +static inline dma_addr_t +dma_map_page(struct device *pdev, struct page *page, int offset, + int length, unsigned flags) +{ + return 0; +} + + +static inline bool +dma_mapping_error(struct device *pdev, dma_addr_t addr) +{ + return true; +} + +static inline void +dma_unmap_page(struct device *pdev, dma_addr_t addr, int size, unsigned flags) +{ +} + +static inline phys_addr_t +dma_to_phys(struct device *dev, dma_addr_t addr) +{ + return 0; +} + +/****************************************************************************** * PCI *****************************************************************************/ #include <pciaccess.h> @@ -975,25 +1028,6 @@ pci_resource_len(struct pci_dev *pdev, int bar) return pdev->pdev->regions[bar].size; } -static inline dma_addr_t -dma_map_page(struct device *pdev, struct page *page, int offset, - int length, unsigned flags) -{ - return 0; -} - - -static inline bool -dma_mapping_error(struct device *pdev, dma_addr_t addr) -{ - return true; -} - -static inline void -dma_unmap_page(struct device *pdev, dma_addr_t addr, int size, unsigned flags) -{ -} - static inline int pci_enable_rom(struct pci_dev *pdev) { |