diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-15 20:42:10 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-15 20:42:10 -0800 |
commit | e60e1ee60630cafef5e430c2ae364877e061d980 (patch) | |
tree | 816aeef8fe8d4a2c6a1ebbc7a350839bac8dd4c2 /drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c | |
parent | 5d352e69c60e54b5f04d6e337a1d2bf0dbf3d94a (diff) | |
parent | f150891fd9878ef0d9197c4e8451ce67c3bdd014 (diff) |
Merge tag 'drm-for-v4.15' of git://people.freedesktop.org/~airlied/linux
Pull drm updates from Dave Airlie:
"This is the main drm pull request for v4.15.
Core:
- Atomic object lifetime fixes
- Atomic iterator improvements
- Sparse/smatch fixes
- Legacy kms ioctls to be interruptible
- EDID override improvements
- fb/gem helper cleanups
- Simple outreachy patches
- Documentation improvements
- Fix dma-buf rcu races
- DRM mode object leasing for improving VR use cases.
- vgaarb improvements for non-x86 platforms.
New driver:
- tve200: Faraday Technology TVE200 block.
This "TV Encoder" encodes a ITU-T BT.656 stream and can be found in
the StorLink SL3516 (later Cortina Systems CS3516) as well as the
Grain Media GM8180.
New bridges:
- SiI9234 support
New panels:
- S6E63J0X03, OTM8009A, Seiko 43WVF1G, 7" rpi touch panel, Toshiba
LT089AC19000, Innolux AT043TN24
i915:
- Remove Coffeelake from alpha support
- Cannonlake workarounds
- Infoframe refactoring for DisplayPort
- VBT updates
- DisplayPort vswing/emph/buffer translation refactoring
- CCS fixes
- Restore GPU clock boost on missed vblanks
- Scatter list updates for userptr allocations
- Gen9+ transition watermarks
- Display IPC (Isochronous Priority Control)
- Private PAT management
- GVT: improved error handling and pci config sanitizing
- Execlist refactoring
- Transparent Huge Page support
- User defined priorities support
- HuC/GuC firmware refactoring
- DP MST fixes
- eDP power sequencing fixes
- Use RCU instead of stop_machine
- PSR state tracking support
- Eviction fixes
- BDW DP aux channel timeout fixes
- LSPCON fixes
- Cannonlake PLL fixes
amdgpu:
- Per VM BO support
- Powerplay cleanups
- CI powerplay support
- PASID mgr for kfd
- SR-IOV fixes
- initial GPU reset for vega10
- Prime mmap support
- TTM updates
- Clock query interface for Raven
- Fence to handle ioctl
- UVD encode ring support on Polaris
- Transparent huge page DMA support
- Compute LRU pipe tweaks
- BO flag to allow buffers to opt out of implicit sync
- CTX priority setting API
- VRAM lost infrastructure plumbing
qxl:
- fix flicker since atomic rework
amdkfd:
- Further improvements from internal AMD tree
- Usermode events
- Drop radeon support
nouveau:
- Pascal temperature sensor support
- Improved BAR2 handling
- MMU rework to support Pascal MMU
exynos:
- Improved HDMI/mixer support
- HDMI audio interface support
tegra:
- Prep work for tegra186
- Cleanup/fixes
msm:
- Preemption support for a5xx
- Display fixes for 8x96 (snapdragon 820)
- Async cursor plane fixes
- FW loading rework
- GPU debugging improvements
vc4:
- Prep for DSI panels
- fix T-format tiling scanout
- New madvise ioctl
Rockchip:
- LVDS support
omapdrm:
- omap4 HDMI CEC support
etnaviv:
- GPU performance counters groundwork
sun4i:
- refactor driver load + TCON backend
- HDMI improvements
- A31 support
- Misc fixes
udl:
- Probe/EDID read fixes.
tilcdc:
- Misc fixes.
pl111:
- Support more variants
adv7511:
- Improve EDID handling.
- HDMI CEC support
sii8620:
- Add remote control support"
* tag 'drm-for-v4.15' of git://people.freedesktop.org/~airlied/linux: (1480 commits)
drm/rockchip: analogix_dp: Use mutex rather than spinlock
drm/mode_object: fix documentation for object lookups.
drm/i915: Reorder context-close to avoid calling i915_vma_close() under RCU
drm/i915: Move init_clock_gating() back to where it was
drm/i915: Prune the reservation shared fence array
drm/i915: Idle the GPU before shinking everything
drm/i915: Lock llist_del_first() vs llist_del_all()
drm/i915: Calculate ironlake intermediate watermarks correctly, v2.
drm/i915: Disable lazy PPGTT page table optimization for vGPU
drm/i915/execlists: Remove the priority "optimisation"
drm/i915: Filter out spurious execlists context-switch interrupts
drm/amdgpu: use irq-safe lock for kiq->ring_lock
drm/amdgpu: bypass lru touch for KIQ ring submission
drm/amdgpu: Potential uninitialized variable in amdgpu_vm_update_directories()
drm/amdgpu: potential uninitialized variable in amdgpu_vce_ring_parse_cs()
drm/amd/powerplay: initialize a variable before using it
drm/amd/powerplay: suppress KASAN out of bounds warning in vega10_populate_all_memory_levels
drm/amd/amdgpu: fix evicted VRAM bo adjudgement condition
drm/vblank: Tune drm_crtc_accurate_vblank_count() WARN down to a debug
drm/rockchip: add CONFIG_OF dependency for lvds
...
Diffstat (limited to 'drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c')
-rw-r--r-- | drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c | 329 |
1 files changed, 233 insertions, 96 deletions
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c index 6d512c062ae3..1ba7289684aa 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c @@ -31,147 +31,293 @@ struct nv50_instmem { struct nvkm_instmem base; - unsigned long lock_flags; - spinlock_t lock; u64 addr; + + /* Mappings that can be evicted when BAR2 space has been exhausted. */ + struct list_head lru; }; /****************************************************************************** * instmem object implementation *****************************************************************************/ -#define nv50_instobj(p) container_of((p), struct nv50_instobj, memory) +#define nv50_instobj(p) container_of((p), struct nv50_instobj, base.memory) struct nv50_instobj { - struct nvkm_memory memory; + struct nvkm_instobj base; struct nv50_instmem *imem; - struct nvkm_mem *mem; - struct nvkm_vma bar; + struct nvkm_memory *ram; + struct nvkm_vma *bar; + refcount_t maps; void *map; + struct list_head lru; }; -static enum nvkm_memory_target -nv50_instobj_target(struct nvkm_memory *memory) +static void +nv50_instobj_wr32_slow(struct nvkm_memory *memory, u64 offset, u32 data) { - return NVKM_MEM_TARGET_VRAM; + struct nv50_instobj *iobj = nv50_instobj(memory); + struct nv50_instmem *imem = iobj->imem; + struct nvkm_device *device = imem->base.subdev.device; + u64 base = (nvkm_memory_addr(iobj->ram) + offset) & 0xffffff00000ULL; + u64 addr = (nvkm_memory_addr(iobj->ram) + offset) & 0x000000fffffULL; + unsigned long flags; + + spin_lock_irqsave(&imem->base.lock, flags); + if (unlikely(imem->addr != base)) { + nvkm_wr32(device, 0x001700, base >> 16); + imem->addr = base; + } + nvkm_wr32(device, 0x700000 + addr, data); + spin_unlock_irqrestore(&imem->base.lock, flags); } -static u64 -nv50_instobj_addr(struct nvkm_memory *memory) +static u32 +nv50_instobj_rd32_slow(struct nvkm_memory *memory, u64 offset) { - return nv50_instobj(memory)->mem->offset; + struct nv50_instobj *iobj = nv50_instobj(memory); + struct nv50_instmem *imem = iobj->imem; + struct nvkm_device *device = imem->base.subdev.device; + u64 base = (nvkm_memory_addr(iobj->ram) + offset) & 0xffffff00000ULL; + u64 addr = (nvkm_memory_addr(iobj->ram) + offset) & 0x000000fffffULL; + u32 data; + unsigned long flags; + + spin_lock_irqsave(&imem->base.lock, flags); + if (unlikely(imem->addr != base)) { + nvkm_wr32(device, 0x001700, base >> 16); + imem->addr = base; + } + data = nvkm_rd32(device, 0x700000 + addr); + spin_unlock_irqrestore(&imem->base.lock, flags); + return data; } -static u64 -nv50_instobj_size(struct nvkm_memory *memory) +static const struct nvkm_memory_ptrs +nv50_instobj_slow = { + .rd32 = nv50_instobj_rd32_slow, + .wr32 = nv50_instobj_wr32_slow, +}; + +static void +nv50_instobj_wr32(struct nvkm_memory *memory, u64 offset, u32 data) { - return (u64)nv50_instobj(memory)->mem->size << NVKM_RAM_MM_SHIFT; + iowrite32_native(data, nv50_instobj(memory)->map + offset); } +static u32 +nv50_instobj_rd32(struct nvkm_memory *memory, u64 offset) +{ + return ioread32_native(nv50_instobj(memory)->map + offset); +} + +static const struct nvkm_memory_ptrs +nv50_instobj_fast = { + .rd32 = nv50_instobj_rd32, + .wr32 = nv50_instobj_wr32, +}; + static void -nv50_instobj_boot(struct nvkm_memory *memory, struct nvkm_vm *vm) +nv50_instobj_kmap(struct nv50_instobj *iobj, struct nvkm_vmm *vmm) { - struct nv50_instobj *iobj = nv50_instobj(memory); - struct nvkm_subdev *subdev = &iobj->imem->base.subdev; + struct nv50_instmem *imem = iobj->imem; + struct nv50_instobj *eobj; + struct nvkm_memory *memory = &iobj->base.memory; + struct nvkm_subdev *subdev = &imem->base.subdev; struct nvkm_device *device = subdev->device; + struct nvkm_vma *bar = NULL, *ebar; u64 size = nvkm_memory_size(memory); - void __iomem *map; + void *emap; int ret; - iobj->map = ERR_PTR(-ENOMEM); - - ret = nvkm_vm_get(vm, size, 12, NV_MEM_ACCESS_RW, &iobj->bar); - if (ret == 0) { - map = ioremap(device->func->resource_addr(device, 3) + - (u32)iobj->bar.offset, size); - if (map) { - nvkm_memory_map(memory, &iobj->bar, 0); - iobj->map = map; - } else { - nvkm_warn(subdev, "PRAMIN ioremap failed\n"); - nvkm_vm_put(&iobj->bar); + /* Attempt to allocate BAR2 address-space and map the object + * into it. The lock has to be dropped while doing this due + * to the possibility of recursion for page table allocation. + */ + mutex_unlock(&subdev->mutex); + while ((ret = nvkm_vmm_get(vmm, 12, size, &bar))) { + /* Evict unused mappings, and keep retrying until we either + * succeed,or there's no more objects left on the LRU. + */ + mutex_lock(&subdev->mutex); + eobj = list_first_entry_or_null(&imem->lru, typeof(*eobj), lru); + if (eobj) { + nvkm_debug(subdev, "evict %016llx %016llx @ %016llx\n", + nvkm_memory_addr(&eobj->base.memory), + nvkm_memory_size(&eobj->base.memory), + eobj->bar->addr); + list_del_init(&eobj->lru); + ebar = eobj->bar; + eobj->bar = NULL; + emap = eobj->map; + eobj->map = NULL; } - } else { - nvkm_warn(subdev, "PRAMIN exhausted\n"); + mutex_unlock(&subdev->mutex); + if (!eobj) + break; + iounmap(emap); + nvkm_vmm_put(vmm, &ebar); } + + if (ret == 0) + ret = nvkm_memory_map(memory, 0, vmm, bar, NULL, 0); + mutex_lock(&subdev->mutex); + if (ret || iobj->bar) { + /* We either failed, or another thread beat us. */ + mutex_unlock(&subdev->mutex); + nvkm_vmm_put(vmm, &bar); + mutex_lock(&subdev->mutex); + return; + } + + /* Make the mapping visible to the host. */ + iobj->bar = bar; + iobj->map = ioremap_wc(device->func->resource_addr(device, 3) + + (u32)iobj->bar->addr, size); + if (!iobj->map) { + nvkm_warn(subdev, "PRAMIN ioremap failed\n"); + nvkm_vmm_put(vmm, &iobj->bar); + } +} + +static int +nv50_instobj_map(struct nvkm_memory *memory, u64 offset, struct nvkm_vmm *vmm, + struct nvkm_vma *vma, void *argv, u32 argc) +{ + memory = nv50_instobj(memory)->ram; + return nvkm_memory_map(memory, offset, vmm, vma, argv, argc); } static void nv50_instobj_release(struct nvkm_memory *memory) { - struct nv50_instmem *imem = nv50_instobj(memory)->imem; - spin_unlock_irqrestore(&imem->lock, imem->lock_flags); + struct nv50_instobj *iobj = nv50_instobj(memory); + struct nv50_instmem *imem = iobj->imem; + struct nvkm_subdev *subdev = &imem->base.subdev; + + wmb(); + nvkm_bar_flush(subdev->device->bar); + + if (refcount_dec_and_mutex_lock(&iobj->maps, &subdev->mutex)) { + /* Add the now-unused mapping to the LRU instead of directly + * unmapping it here, in case we need to map it again later. + */ + if (likely(iobj->lru.next) && iobj->map) { + BUG_ON(!list_empty(&iobj->lru)); + list_add_tail(&iobj->lru, &imem->lru); + } + + /* Switch back to NULL accessors when last map is gone. */ + iobj->base.memory.ptrs = NULL; + mutex_unlock(&subdev->mutex); + } } static void __iomem * nv50_instobj_acquire(struct nvkm_memory *memory) { struct nv50_instobj *iobj = nv50_instobj(memory); - struct nv50_instmem *imem = iobj->imem; - struct nvkm_bar *bar = imem->base.subdev.device->bar; - struct nvkm_vm *vm; - unsigned long flags; + struct nvkm_instmem *imem = &iobj->imem->base; + struct nvkm_vmm *vmm; + void __iomem *map = NULL; - if (!iobj->map && (vm = nvkm_bar_kmap(bar))) - nvkm_memory_boot(memory, vm); - if (!IS_ERR_OR_NULL(iobj->map)) + /* Already mapped? */ + if (refcount_inc_not_zero(&iobj->maps)) return iobj->map; - spin_lock_irqsave(&imem->lock, flags); - imem->lock_flags = flags; - return NULL; -} + /* Take the lock, and re-check that another thread hasn't + * already mapped the object in the meantime. + */ + mutex_lock(&imem->subdev.mutex); + if (refcount_inc_not_zero(&iobj->maps)) { + mutex_unlock(&imem->subdev.mutex); + return iobj->map; + } -static u32 -nv50_instobj_rd32(struct nvkm_memory *memory, u64 offset) -{ - struct nv50_instobj *iobj = nv50_instobj(memory); - struct nv50_instmem *imem = iobj->imem; - struct nvkm_device *device = imem->base.subdev.device; - u64 base = (iobj->mem->offset + offset) & 0xffffff00000ULL; - u64 addr = (iobj->mem->offset + offset) & 0x000000fffffULL; - u32 data; + /* Attempt to get a direct CPU mapping of the object. */ + if ((vmm = nvkm_bar_bar2_vmm(imem->subdev.device))) { + if (!iobj->map) + nv50_instobj_kmap(iobj, vmm); + map = iobj->map; + } - if (unlikely(imem->addr != base)) { - nvkm_wr32(device, 0x001700, base >> 16); - imem->addr = base; + if (!refcount_inc_not_zero(&iobj->maps)) { + /* Exclude object from eviction while it's being accessed. */ + if (likely(iobj->lru.next)) + list_del_init(&iobj->lru); + + if (map) + iobj->base.memory.ptrs = &nv50_instobj_fast; + else + iobj->base.memory.ptrs = &nv50_instobj_slow; + refcount_inc(&iobj->maps); } - data = nvkm_rd32(device, 0x700000 + addr); - return data; + + mutex_unlock(&imem->subdev.mutex); + return map; } static void -nv50_instobj_wr32(struct nvkm_memory *memory, u64 offset, u32 data) +nv50_instobj_boot(struct nvkm_memory *memory, struct nvkm_vmm *vmm) { struct nv50_instobj *iobj = nv50_instobj(memory); - struct nv50_instmem *imem = iobj->imem; - struct nvkm_device *device = imem->base.subdev.device; - u64 base = (iobj->mem->offset + offset) & 0xffffff00000ULL; - u64 addr = (iobj->mem->offset + offset) & 0x000000fffffULL; - - if (unlikely(imem->addr != base)) { - nvkm_wr32(device, 0x001700, base >> 16); - imem->addr = base; + struct nvkm_instmem *imem = &iobj->imem->base; + + /* Exclude bootstrapped objects (ie. the page tables for the + * instmem BAR itself) from eviction. + */ + mutex_lock(&imem->subdev.mutex); + if (likely(iobj->lru.next)) { + list_del_init(&iobj->lru); + iobj->lru.next = NULL; } - nvkm_wr32(device, 0x700000 + addr, data); + + nv50_instobj_kmap(iobj, vmm); + nvkm_instmem_boot(imem); + mutex_unlock(&imem->subdev.mutex); } -static void -nv50_instobj_map(struct nvkm_memory *memory, struct nvkm_vma *vma, u64 offset) +static u64 +nv50_instobj_size(struct nvkm_memory *memory) { - struct nv50_instobj *iobj = nv50_instobj(memory); - nvkm_vm_map_at(vma, offset, iobj->mem); + return nvkm_memory_size(nv50_instobj(memory)->ram); +} + +static u64 +nv50_instobj_addr(struct nvkm_memory *memory) +{ + return nvkm_memory_addr(nv50_instobj(memory)->ram); +} + +static enum nvkm_memory_target +nv50_instobj_target(struct nvkm_memory *memory) +{ + return nvkm_memory_target(nv50_instobj(memory)->ram); } static void * nv50_instobj_dtor(struct nvkm_memory *memory) { struct nv50_instobj *iobj = nv50_instobj(memory); - struct nvkm_ram *ram = iobj->imem->base.subdev.device->fb->ram; - if (!IS_ERR_OR_NULL(iobj->map)) { - nvkm_vm_put(&iobj->bar); - iounmap(iobj->map); + struct nvkm_instmem *imem = &iobj->imem->base; + struct nvkm_vma *bar; + void *map = map; + + mutex_lock(&imem->subdev.mutex); + if (likely(iobj->lru.next)) + list_del(&iobj->lru); + map = iobj->map; + bar = iobj->bar; + mutex_unlock(&imem->subdev.mutex); + + if (map) { + struct nvkm_vmm *vmm = nvkm_bar_bar2_vmm(imem->subdev.device); + iounmap(map); + if (likely(vmm)) /* Can be NULL during BAR destructor. */ + nvkm_vmm_put(vmm, &bar); } - ram->func->put(ram, &iobj->mem); + + nvkm_memory_unref(&iobj->ram); + nvkm_instobj_dtor(imem, &iobj->base); return iobj; } @@ -184,8 +330,6 @@ nv50_instobj_func = { .boot = nv50_instobj_boot, .acquire = nv50_instobj_acquire, .release = nv50_instobj_release, - .rd32 = nv50_instobj_rd32, - .wr32 = nv50_instobj_wr32, .map = nv50_instobj_map, }; @@ -195,25 +339,19 @@ nv50_instobj_new(struct nvkm_instmem *base, u32 size, u32 align, bool zero, { struct nv50_instmem *imem = nv50_instmem(base); struct nv50_instobj *iobj; - struct nvkm_ram *ram = imem->base.subdev.device->fb->ram; - int ret; + struct nvkm_device *device = imem->base.subdev.device; + u8 page = max(order_base_2(align), 12); if (!(iobj = kzalloc(sizeof(*iobj), GFP_KERNEL))) return -ENOMEM; - *pmemory = &iobj->memory; + *pmemory = &iobj->base.memory; - nvkm_memory_ctor(&nv50_instobj_func, &iobj->memory); + nvkm_instobj_ctor(&nv50_instobj_func, &imem->base, &iobj->base); iobj->imem = imem; + refcount_set(&iobj->maps, 0); + INIT_LIST_HEAD(&iobj->lru); - size = max((size + 4095) & ~4095, (u32)4096); - align = max((align + 4095) & ~4095, (u32)4096); - - ret = ram->func->get(ram, size, align, 0, 0x800, &iobj->mem); - if (ret) - return ret; - - iobj->mem->page_shift = 12; - return 0; + return nvkm_ram_get(device, 0, 1, page, size, true, true, &iobj->ram); } /****************************************************************************** @@ -230,7 +368,6 @@ static const struct nvkm_instmem_func nv50_instmem = { .fini = nv50_instmem_fini, .memory_new = nv50_instobj_new, - .persistent = false, .zero = false, }; @@ -243,7 +380,7 @@ nv50_instmem_new(struct nvkm_device *device, int index, if (!(imem = kzalloc(sizeof(*imem), GFP_KERNEL))) return -ENOMEM; nvkm_instmem_ctor(&nv50_instmem, device, index, &imem->base); - spin_lock_init(&imem->lock); + INIT_LIST_HEAD(&imem->lru); *pimem = &imem->base; return 0; } |