diff options
Diffstat (limited to 'lib/test_hmm.c')
-rw-r--r-- | lib/test_hmm.c | 347 |
1 files changed, 261 insertions, 86 deletions
diff --git a/lib/test_hmm.c b/lib/test_hmm.c index cfe632047839..e3965cafd27c 100644 --- a/lib/test_hmm.c +++ b/lib/test_hmm.c @@ -32,11 +32,32 @@ #include "test_hmm_uapi.h" -#define DMIRROR_NDEVICES 2 +#define DMIRROR_NDEVICES 4 #define DMIRROR_RANGE_FAULT_TIMEOUT 1000 #define DEVMEM_CHUNK_SIZE (256 * 1024 * 1024U) #define DEVMEM_CHUNKS_RESERVE 16 +/* + * For device_private pages, dpage is just a dummy struct page + * representing a piece of device memory. dmirror_devmem_alloc_page + * allocates a real system memory page as backing storage to fake a + * real device. zone_device_data points to that backing page. But + * for device_coherent memory, the struct page represents real + * physical CPU-accessible memory that we can use directly. + */ +#define BACKING_PAGE(page) (is_device_private_page((page)) ? \ + (page)->zone_device_data : (page)) + +static unsigned long spm_addr_dev0; +module_param(spm_addr_dev0, long, 0644); +MODULE_PARM_DESC(spm_addr_dev0, + "Specify start address for SPM (special purpose memory) used for device 0. By setting this Coherent device type will be used. Make sure spm_addr_dev1 is set too. Minimum SPM size should be DEVMEM_CHUNK_SIZE."); + +static unsigned long spm_addr_dev1; +module_param(spm_addr_dev1, long, 0644); +MODULE_PARM_DESC(spm_addr_dev1, + "Specify start address for SPM (special purpose memory) used for device 1. By setting this Coherent device type will be used. Make sure spm_addr_dev0 is set too. Minimum SPM size should be DEVMEM_CHUNK_SIZE."); + static const struct dev_pagemap_ops dmirror_devmem_ops; static const struct mmu_interval_notifier_ops dmirror_min_ops; static dev_t dmirror_dev; @@ -87,6 +108,7 @@ struct dmirror_chunk { struct dmirror_device { struct cdev cdevice; struct hmm_devmem *devmem; + unsigned int zone_device_type; unsigned int devmem_capacity; unsigned int devmem_count; @@ -114,6 +136,21 @@ static int dmirror_bounce_init(struct dmirror_bounce *bounce, return 0; } +static bool dmirror_is_private_zone(struct dmirror_device *mdevice) +{ + return (mdevice->zone_device_type == + HMM_DMIRROR_MEMORY_DEVICE_PRIVATE) ? true : false; +} + +static enum migrate_vma_direction +dmirror_select_device(struct dmirror *dmirror) +{ + return (dmirror->mdevice->zone_device_type == + HMM_DMIRROR_MEMORY_DEVICE_PRIVATE) ? + MIGRATE_VMA_SELECT_DEVICE_PRIVATE : + MIGRATE_VMA_SELECT_DEVICE_COHERENT; +} + static void dmirror_bounce_fini(struct dmirror_bounce *bounce) { vfree(bounce->ptr); @@ -454,28 +491,44 @@ fini: return ret; } -static bool dmirror_allocate_chunk(struct dmirror_device *mdevice, +static int dmirror_allocate_chunk(struct dmirror_device *mdevice, struct page **ppage) { struct dmirror_chunk *devmem; - struct resource *res; + struct resource *res = NULL; unsigned long pfn; unsigned long pfn_first; unsigned long pfn_last; void *ptr; + int ret = -ENOMEM; devmem = kzalloc(sizeof(*devmem), GFP_KERNEL); if (!devmem) - return false; + return ret; - res = request_free_mem_region(&iomem_resource, DEVMEM_CHUNK_SIZE, - "hmm_dmirror"); - if (IS_ERR(res)) + switch (mdevice->zone_device_type) { + case HMM_DMIRROR_MEMORY_DEVICE_PRIVATE: + res = request_free_mem_region(&iomem_resource, DEVMEM_CHUNK_SIZE, + "hmm_dmirror"); + if (IS_ERR_OR_NULL(res)) + goto err_devmem; + devmem->pagemap.range.start = res->start; + devmem->pagemap.range.end = res->end; + devmem->pagemap.type = MEMORY_DEVICE_PRIVATE; + break; + case HMM_DMIRROR_MEMORY_DEVICE_COHERENT: + devmem->pagemap.range.start = (MINOR(mdevice->cdevice.dev) - 2) ? + spm_addr_dev0 : + spm_addr_dev1; + devmem->pagemap.range.end = devmem->pagemap.range.start + + DEVMEM_CHUNK_SIZE - 1; + devmem->pagemap.type = MEMORY_DEVICE_COHERENT; + break; + default: + ret = -EINVAL; goto err_devmem; + } - devmem->pagemap.type = MEMORY_DEVICE_PRIVATE; - devmem->pagemap.range.start = res->start; - devmem->pagemap.range.end = res->end; devmem->pagemap.nr_range = 1; devmem->pagemap.ops = &dmirror_devmem_ops; devmem->pagemap.owner = mdevice; @@ -496,10 +549,14 @@ static bool dmirror_allocate_chunk(struct dmirror_device *mdevice, mdevice->devmem_capacity = new_capacity; mdevice->devmem_chunks = new_chunks; } - ptr = memremap_pages(&devmem->pagemap, numa_node_id()); - if (IS_ERR(ptr)) + if (IS_ERR_OR_NULL(ptr)) { + if (ptr) + ret = PTR_ERR(ptr); + else + ret = -EFAULT; goto err_release; + } devmem->mdevice = mdevice; pfn_first = devmem->pagemap.range.start >> PAGE_SHIFT; @@ -528,30 +585,35 @@ static bool dmirror_allocate_chunk(struct dmirror_device *mdevice, } spin_unlock(&mdevice->lock); - return true; + return 0; err_release: mutex_unlock(&mdevice->devmem_lock); - release_mem_region(devmem->pagemap.range.start, range_len(&devmem->pagemap.range)); + if (res && devmem->pagemap.type == MEMORY_DEVICE_PRIVATE) + release_mem_region(devmem->pagemap.range.start, + range_len(&devmem->pagemap.range)); err_devmem: kfree(devmem); - return false; + return ret; } static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice) { struct page *dpage = NULL; - struct page *rpage; + struct page *rpage = NULL; /* - * This is a fake device so we alloc real system memory to store - * our device memory. + * For ZONE_DEVICE private type, this is a fake device so we allocate + * real system memory to store our device memory. + * For ZONE_DEVICE coherent type we use the actual dpage to store the + * data and ignore rpage. */ - rpage = alloc_page(GFP_HIGHUSER); - if (!rpage) - return NULL; - + if (dmirror_is_private_zone(mdevice)) { + rpage = alloc_page(GFP_HIGHUSER); + if (!rpage) + return NULL; + } spin_lock(&mdevice->lock); if (mdevice->free_pages) { @@ -561,7 +623,7 @@ static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice) spin_unlock(&mdevice->lock); } else { spin_unlock(&mdevice->lock); - if (!dmirror_allocate_chunk(mdevice, &dpage)) + if (dmirror_allocate_chunk(mdevice, &dpage)) goto error; } @@ -570,7 +632,8 @@ static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice) return dpage; error: - __free_page(rpage); + if (rpage) + __free_page(rpage); return NULL; } @@ -596,12 +659,16 @@ static void dmirror_migrate_alloc_and_copy(struct migrate_vma *args, * unallocated pte_none() or read-only zero page. */ spage = migrate_pfn_to_page(*src); + if (WARN(spage && is_zone_device_page(spage), + "page already in device spage pfn: 0x%lx\n", + page_to_pfn(spage))) + continue; dpage = dmirror_devmem_alloc_page(mdevice); if (!dpage) continue; - rpage = dpage->zone_device_data; + rpage = BACKING_PAGE(dpage); if (spage) copy_highpage(rpage, spage); else @@ -615,6 +682,8 @@ static void dmirror_migrate_alloc_and_copy(struct migrate_vma *args, */ rpage->zone_device_data = dmirror; + pr_debug("migrating from sys to dev pfn src: 0x%lx pfn dst: 0x%lx\n", + page_to_pfn(spage), page_to_pfn(dpage)); *dst = migrate_pfn(page_to_pfn(dpage)); if ((*src & MIGRATE_PFN_WRITE) || (!spage && args->vma->vm_flags & VM_WRITE)) @@ -692,11 +761,7 @@ static int dmirror_migrate_finalize_and_map(struct migrate_vma *args, if (!dpage) continue; - /* - * Store the page that holds the data so the page table - * doesn't have to deal with ZONE_DEVICE private pages. - */ - entry = dpage->zone_device_data; + entry = BACKING_PAGE(dpage); if (*dst & MIGRATE_PFN_WRITE) entry = xa_tag_pointer(entry, DPT_XA_TAG_WRITE); entry = xa_store(&dmirror->pt, pfn, entry, GFP_ATOMIC); @@ -732,7 +797,7 @@ static int dmirror_exclusive(struct dmirror *dmirror, mmap_read_lock(mm); for (addr = start; addr < end; addr = next) { - unsigned long mapped; + unsigned long mapped = 0; int i; if (end < addr + (ARRAY_SIZE(pages) << PAGE_SHIFT)) @@ -741,7 +806,13 @@ static int dmirror_exclusive(struct dmirror *dmirror, next = addr + (ARRAY_SIZE(pages) << PAGE_SHIFT); ret = make_device_exclusive_range(mm, addr, next, pages, NULL); - mapped = dmirror_atomic_map(addr, next, pages, dmirror); + /* + * Do dmirror_atomic_map() iff all pages are marked for + * exclusive access to avoid accessing uninitialized + * fields of pages. + */ + if (ret == (next - addr) >> PAGE_SHIFT) + mapped = dmirror_atomic_map(addr, next, pages, dmirror); for (i = 0; i < ret; i++) { if (pages[i]) { unlock_page(pages[i]); @@ -776,15 +847,126 @@ static int dmirror_exclusive(struct dmirror *dmirror, return ret; } -static int dmirror_migrate(struct dmirror *dmirror, - struct hmm_dmirror_cmd *cmd) +static vm_fault_t dmirror_devmem_fault_alloc_and_copy(struct migrate_vma *args, + struct dmirror *dmirror) +{ + const unsigned long *src = args->src; + unsigned long *dst = args->dst; + unsigned long start = args->start; + unsigned long end = args->end; + unsigned long addr; + + for (addr = start; addr < end; addr += PAGE_SIZE, + src++, dst++) { + struct page *dpage, *spage; + + spage = migrate_pfn_to_page(*src); + if (!spage || !(*src & MIGRATE_PFN_MIGRATE)) + continue; + + if (WARN_ON(!is_device_private_page(spage) && + !is_device_coherent_page(spage))) + continue; + spage = BACKING_PAGE(spage); + dpage = alloc_page_vma(GFP_HIGHUSER_MOVABLE, args->vma, addr); + if (!dpage) + continue; + pr_debug("migrating from dev to sys pfn src: 0x%lx pfn dst: 0x%lx\n", + page_to_pfn(spage), page_to_pfn(dpage)); + + lock_page(dpage); + xa_erase(&dmirror->pt, addr >> PAGE_SHIFT); + copy_highpage(dpage, spage); + *dst = migrate_pfn(page_to_pfn(dpage)); + if (*src & MIGRATE_PFN_WRITE) + *dst |= MIGRATE_PFN_WRITE; + } + return 0; +} + +static unsigned long +dmirror_successful_migrated_pages(struct migrate_vma *migrate) +{ + unsigned long cpages = 0; + unsigned long i; + + for (i = 0; i < migrate->npages; i++) { + if (migrate->src[i] & MIGRATE_PFN_VALID && + migrate->src[i] & MIGRATE_PFN_MIGRATE) + cpages++; + } + return cpages; +} + +static int dmirror_migrate_to_system(struct dmirror *dmirror, + struct hmm_dmirror_cmd *cmd) { unsigned long start, end, addr; unsigned long size = cmd->npages << PAGE_SHIFT; struct mm_struct *mm = dmirror->notifier.mm; struct vm_area_struct *vma; - unsigned long src_pfns[64]; - unsigned long dst_pfns[64]; + unsigned long src_pfns[64] = { 0 }; + unsigned long dst_pfns[64] = { 0 }; + struct migrate_vma args; + unsigned long next; + int ret; + + start = cmd->addr; + end = start + size; + if (end < start) + return -EINVAL; + + /* Since the mm is for the mirrored process, get a reference first. */ + if (!mmget_not_zero(mm)) + return -EINVAL; + + cmd->cpages = 0; + mmap_read_lock(mm); + for (addr = start; addr < end; addr = next) { + vma = vma_lookup(mm, addr); + if (!vma || !(vma->vm_flags & VM_READ)) { + ret = -EINVAL; + goto out; + } + next = min(end, addr + (ARRAY_SIZE(src_pfns) << PAGE_SHIFT)); + if (next > vma->vm_end) + next = vma->vm_end; + + args.vma = vma; + args.src = src_pfns; + args.dst = dst_pfns; + args.start = addr; + args.end = next; + args.pgmap_owner = dmirror->mdevice; + args.flags = dmirror_select_device(dmirror); + + ret = migrate_vma_setup(&args); + if (ret) + goto out; + + pr_debug("Migrating from device mem to sys mem\n"); + dmirror_devmem_fault_alloc_and_copy(&args, dmirror); + + migrate_vma_pages(&args); + cmd->cpages += dmirror_successful_migrated_pages(&args); + migrate_vma_finalize(&args); + } +out: + mmap_read_unlock(mm); + mmput(mm); + + return ret; +} + +static int dmirror_migrate_to_device(struct dmirror *dmirror, + struct hmm_dmirror_cmd *cmd) +{ + unsigned long start, end, addr; + unsigned long size = cmd->npages << PAGE_SHIFT; + struct mm_struct *mm = dmirror->notifier.mm; + struct vm_area_struct *vma; + unsigned long src_pfns[64] = { 0 }; + unsigned long dst_pfns[64] = { 0 }; struct dmirror_bounce bounce; struct migrate_vma args; unsigned long next; @@ -821,6 +1003,7 @@ static int dmirror_migrate(struct dmirror *dmirror, if (ret) goto out; + pr_debug("Migrating from sys mem to device mem\n"); dmirror_migrate_alloc_and_copy(&args, dmirror); migrate_vma_pages(&args); dmirror_migrate_finalize_and_map(&args, dmirror); @@ -829,7 +1012,10 @@ static int dmirror_migrate(struct dmirror *dmirror, mmap_read_unlock(mm); mmput(mm); - /* Return the migrated data for verification. */ + /* + * Return the migrated data for verification. + * Only for pages in device zone + */ ret = dmirror_bounce_init(&bounce, start, size); if (ret) return ret; @@ -872,6 +1058,12 @@ static void dmirror_mkentry(struct dmirror *dmirror, struct hmm_range *range, *perm = HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL; else *perm = HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE; + } else if (is_device_coherent_page(page)) { + /* Is the page migrated to this device or some other? */ + if (dmirror->mdevice == dmirror_page_to_device(page)) + *perm = HMM_DMIRROR_PROT_DEV_COHERENT_LOCAL; + else + *perm = HMM_DMIRROR_PROT_DEV_COHERENT_REMOTE; } else if (is_zero_pfn(page_to_pfn(page))) *perm = HMM_DMIRROR_PROT_ZERO; else @@ -1059,8 +1251,12 @@ static long dmirror_fops_unlocked_ioctl(struct file *filp, ret = dmirror_write(dmirror, &cmd); break; - case HMM_DMIRROR_MIGRATE: - ret = dmirror_migrate(dmirror, &cmd); + case HMM_DMIRROR_MIGRATE_TO_DEV: + ret = dmirror_migrate_to_device(dmirror, &cmd); + break; + + case HMM_DMIRROR_MIGRATE_TO_SYS: + ret = dmirror_migrate_to_system(dmirror, &cmd); break; case HMM_DMIRROR_EXCLUSIVE: @@ -1122,14 +1318,13 @@ static const struct file_operations dmirror_fops = { static void dmirror_devmem_free(struct page *page) { - struct page *rpage = page->zone_device_data; + struct page *rpage = BACKING_PAGE(page); struct dmirror_device *mdevice; - if (rpage) + if (rpage != page) __free_page(rpage); mdevice = dmirror_page_to_device(page); - spin_lock(&mdevice->lock); mdevice->cfree++; page->zone_device_data = mdevice->free_pages; @@ -1137,43 +1332,11 @@ static void dmirror_devmem_free(struct page *page) spin_unlock(&mdevice->lock); } -static vm_fault_t dmirror_devmem_fault_alloc_and_copy(struct migrate_vma *args, - struct dmirror *dmirror) -{ - const unsigned long *src = args->src; - unsigned long *dst = args->dst; - unsigned long start = args->start; - unsigned long end = args->end; - unsigned long addr; - - for (addr = start; addr < end; addr += PAGE_SIZE, - src++, dst++) { - struct page *dpage, *spage; - - spage = migrate_pfn_to_page(*src); - if (!spage || !(*src & MIGRATE_PFN_MIGRATE)) - continue; - spage = spage->zone_device_data; - - dpage = alloc_page_vma(GFP_HIGHUSER_MOVABLE, args->vma, addr); - if (!dpage) - continue; - - lock_page(dpage); - xa_erase(&dmirror->pt, addr >> PAGE_SHIFT); - copy_highpage(dpage, spage); - *dst = migrate_pfn(page_to_pfn(dpage)); - if (*src & MIGRATE_PFN_WRITE) - *dst |= MIGRATE_PFN_WRITE; - } - return 0; -} - static vm_fault_t dmirror_devmem_fault(struct vm_fault *vmf) { struct migrate_vma args; - unsigned long src_pfns; - unsigned long dst_pfns; + unsigned long src_pfns = 0; + unsigned long dst_pfns = 0; struct page *rpage; struct dmirror *dmirror; vm_fault_t ret; @@ -1193,7 +1356,7 @@ static vm_fault_t dmirror_devmem_fault(struct vm_fault *vmf) args.src = &src_pfns; args.dst = &dst_pfns; args.pgmap_owner = dmirror->mdevice; - args.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE; + args.flags = dmirror_select_device(dmirror); if (migrate_vma_setup(&args)) return VM_FAULT_SIGBUS; @@ -1231,10 +1394,8 @@ static int dmirror_device_init(struct dmirror_device *mdevice, int id) if (ret) return ret; - /* Build a list of free ZONE_DEVICE private struct pages */ - dmirror_allocate_chunk(mdevice, NULL); - - return 0; + /* Build a list of free ZONE_DEVICE struct pages */ + return dmirror_allocate_chunk(mdevice, NULL); } static void dmirror_device_remove(struct dmirror_device *mdevice) @@ -1247,8 +1408,9 @@ static void dmirror_device_remove(struct dmirror_device *mdevice) mdevice->devmem_chunks[i]; memunmap_pages(&devmem->pagemap); - release_mem_region(devmem->pagemap.range.start, - range_len(&devmem->pagemap.range)); + if (devmem->pagemap.type == MEMORY_DEVICE_PRIVATE) + release_mem_region(devmem->pagemap.range.start, + range_len(&devmem->pagemap.range)); kfree(devmem); } kfree(mdevice->devmem_chunks); @@ -1260,14 +1422,26 @@ static void dmirror_device_remove(struct dmirror_device *mdevice) static int __init hmm_dmirror_init(void) { int ret; - int id; + int id = 0; + int ndevices = 0; ret = alloc_chrdev_region(&dmirror_dev, 0, DMIRROR_NDEVICES, "HMM_DMIRROR"); if (ret) goto err_unreg; - for (id = 0; id < DMIRROR_NDEVICES; id++) { + memset(dmirror_devices, 0, DMIRROR_NDEVICES * sizeof(dmirror_devices[0])); + dmirror_devices[ndevices++].zone_device_type = + HMM_DMIRROR_MEMORY_DEVICE_PRIVATE; + dmirror_devices[ndevices++].zone_device_type = + HMM_DMIRROR_MEMORY_DEVICE_PRIVATE; + if (spm_addr_dev0 && spm_addr_dev1) { + dmirror_devices[ndevices++].zone_device_type = + HMM_DMIRROR_MEMORY_DEVICE_COHERENT; + dmirror_devices[ndevices++].zone_device_type = + HMM_DMIRROR_MEMORY_DEVICE_COHERENT; + } + for (id = 0; id < ndevices; id++) { ret = dmirror_device_init(dmirror_devices + id, id); if (ret) goto err_chrdev; @@ -1289,7 +1463,8 @@ static void __exit hmm_dmirror_exit(void) int id; for (id = 0; id < DMIRROR_NDEVICES; id++) - dmirror_device_remove(dmirror_devices + id); + if (dmirror_devices[id].zone_device_type) + dmirror_device_remove(dmirror_devices + id); unregister_chrdev_region(dmirror_dev, DMIRROR_NDEVICES); } |