summaryrefslogtreecommitdiff
path: root/drivers/char/hmm_dummy.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/char/hmm_dummy.c')
-rw-r--r--drivers/char/hmm_dummy.c395
1 files changed, 376 insertions, 19 deletions
diff --git a/drivers/char/hmm_dummy.c b/drivers/char/hmm_dummy.c
index 52843cbd67e3..a4af5b1a4c51 100644
--- a/drivers/char/hmm_dummy.c
+++ b/drivers/char/hmm_dummy.c
@@ -43,6 +43,9 @@
#define HMM_DUMMY_MAX_DEVICES 4
#define HMM_DUMMY_MAX_MIRRORS 4
+#define HMM_DUMMY_RMEM_SIZE (32UL << 20UL)
+#define HMM_DUMMY_RMEM_NBITS (HMM_DUMMY_RMEM_SIZE >> PAGE_SHIFT)
+
struct dummy_device;
struct dummy_mirror {
@@ -70,6 +73,8 @@ struct dummy_device {
/* device file mapping tracking (keep track of all vma) */
struct dummy_mirror *dmirrors[HMM_DUMMY_MAX_MIRRORS];
struct address_space *fmapping[HMM_DUMMY_MAX_MIRRORS];
+ struct page **rmem_pages;
+ unsigned long *rmem_bitmap;
};
struct dummy_event {
@@ -77,11 +82,30 @@ struct dummy_event {
struct list_head list;
uint64_t nsys_pages;
uint64_t nfaulted_sys_pages;
+ uint64_t ndev_pages;
+ uint64_t nfaulted_dev_pages;
+ unsigned *dpfn;
+ unsigned npages;
bool backoff;
};
static struct dummy_device ddevices[HMM_DUMMY_MAX_DEVICES];
+/* dummy_device_pfn_to_page() - Return struct page of fake device memory.
+ *
+ * @ddevice: The dummy device.
+ * @pfn: The fake device page frame number.
+ * Return: The pointer to the struct page of the fake device memory.
+ *
+ * For the dummy device remote memory we simply allocate regular page and
+ * pretend they are not accessible directly by the CPU.
+ */
+struct page *dummy_device_pfn_to_page(struct dummy_device *ddevice,
+ unsigned pfn)
+{
+ return ddevice->rmem_pages[pfn];
+}
+
static void dummy_mirror_release(struct hmm_mirror *mirror)
{
@@ -233,9 +257,11 @@ static int dummy_mirror_pt_invalidate(struct hmm_mirror *mirror,
unsigned long addr = event->start;
struct hmm_pt_iter miter, diter;
struct dummy_mirror *dmirror;
+ struct dummy_device *ddevice;
int ret = 0;
dmirror = container_of(mirror, struct dummy_mirror, mirror);
+ ddevice = dmirror->ddevice;
hmm_pt_iter_init(&diter, &dmirror->pt);
hmm_pt_iter_init(&miter, &mirror->pt);
@@ -259,6 +285,24 @@ static int dummy_mirror_pt_invalidate(struct hmm_mirror *mirror,
*/
hmm_pt_iter_directory_lock(&diter);
+ /* Handle the fake device memory page table entry case. */
+ if (hmm_pte_test_valid_dev(dpte)) {
+ unsigned dpfn = hmm_pte_dev_addr(*dpte) >> PAGE_SHIFT;
+
+ *dpte &= event->pte_mask;
+ if (!hmm_pte_test_valid_dev(dpte)) {
+ /*
+ * Just directly free the fake device memory.
+ */
+ clear_bit(dpfn, ddevice->rmem_bitmap);
+ hmm_pt_iter_directory_unref(&diter);
+ }
+ hmm_pt_iter_directory_unlock(&diter);
+
+ addr += PAGE_SIZE;
+ continue;
+ }
+
/*
* Just skip this entry if it is not valid inside the dummy
* mirror page table.
@@ -341,10 +385,178 @@ static int dummy_mirror_update(struct hmm_mirror *mirror,
}
}
+static int dummy_copy_from_device(struct hmm_mirror *mirror,
+ const struct hmm_event *event,
+ dma_addr_t *dst,
+ unsigned long start,
+ unsigned long end)
+{
+ struct hmm_pt_iter miter, diter;
+ struct dummy_device *ddevice;
+ struct dummy_mirror *dmirror;
+ struct dummy_event *devent;
+ unsigned long addr = start;
+ int ret = 0, i = 0;
+
+ dmirror = container_of(mirror, struct dummy_mirror, mirror);
+ devent = container_of(event, struct dummy_event, hevent);
+ ddevice = dmirror->ddevice;
+
+ hmm_pt_iter_init(&diter, &dmirror->pt);
+ hmm_pt_iter_init(&miter, &mirror->pt);
+
+ do {
+ struct page *spage, *dpage;
+ unsigned long dpfn, next = end;
+ dma_addr_t *mpte, *dpte;
+
+ mpte = hmm_pt_iter_lookup(&miter, addr, &next);
+ if (!mpte || !hmm_pte_test_valid_dev(mpte) ||
+ !hmm_pte_test_select(&dst[i])) {
+ i++;
+ continue;
+ }
+
+ dpte = hmm_pt_iter_lookup(&diter, addr, &next);
+ /*
+ * Sanity check, that that device driver page table is a valid
+ * entry pointing to device memory.
+ */
+ if (!dpte || !hmm_pte_test_valid_dev(dpte) ||
+ !hmm_pte_test_select(&dst[i])) {
+ ret = -EINVAL;
+ break;
+ }
+
+ dpfn = hmm_pte_dev_addr(*mpte) >> PAGE_SHIFT;
+ spage = dummy_device_pfn_to_page(ddevice, dpfn);
+ dpage = pfn_to_page(hmm_pte_pfn(dst[i]));
+ copy_highpage(dpage, spage);
+
+ /* Directly free the fake device memory. */
+ clear_bit(dpfn, ddevice->rmem_bitmap);
+
+ if (hmm_pte_test_and_clear_dirty(dpte))
+ hmm_pte_set_dirty(&dst[i]);
+
+ /*
+ * This is bit inefficient to lock directoy per entry instead
+ * of locking directory and going over all its entry. But this
+ * is a dummy driver and we do not care about efficiency here.
+ */
+ hmm_pt_iter_directory_lock(&diter);
+ *dpte = dst[i];
+ hmm_pte_clear_dirty(dpte);
+ hmm_pt_iter_directory_unlock(&diter);
+
+ i++;
+ } while (addr += PAGE_SIZE, addr < end);
+
+ hmm_pt_iter_fini(&miter);
+ hmm_pt_iter_fini(&diter);
+
+ return ret;
+}
+
+static int dummy_copy_to_device(struct hmm_mirror *mirror,
+ const struct hmm_event *event,
+ struct vm_area_struct *vma,
+ dma_addr_t *dst,
+ unsigned long start,
+ unsigned long end)
+{
+ struct hmm_pt_iter miter, diter;
+ struct dummy_device *ddevice;
+ struct dummy_mirror *dmirror;
+ struct dummy_event *devent;
+ unsigned long addr = start;
+ int ret = 0, i = 0;
+
+ dmirror = container_of(mirror, struct dummy_mirror, mirror);
+ devent = container_of(event, struct dummy_event, hevent);
+ ddevice = dmirror->ddevice;
+
+ hmm_pt_iter_init(&diter, &dmirror->pt);
+ hmm_pt_iter_init(&miter, &mirror->pt);
+
+ do {
+ struct page *spage, *dpage;
+ dma_addr_t *mpte, *dpte;
+ unsigned long next = end;
+
+ mpte = hmm_pt_iter_lookup(&miter, addr, &next);
+ /*
+ * Sanity check, this is only important for debugging HMM, a
+ * device driver can ignore those test and assume everything
+ * below is false (ie mpte is not NULL and it is a valid pfn
+ * entry with the select bit set).
+ */
+ if (!mpte || !hmm_pte_test_valid_pfn(mpte) ||
+ !hmm_pte_test_select(mpte)) {
+ pr_debug("(%s:%4d) (HMM FATAL) empty pt at 0x%lX\n",
+ __FILE__, __LINE__, addr);
+ ret = -EINVAL;
+ break;
+ }
+
+ dpte = hmm_pt_iter_populate(&diter, addr, &next);
+ if (!dpte) {
+ ret = -ENOMEM;
+ break;
+ }
+ /*
+ * Sanity check, this is only important for debugging HMM, a
+ * device driver can ignore those test and assume everything
+ * below is false (ie dpte is not a valid device entry).
+ */
+ if (hmm_pte_test_valid_dev(dpte)) {
+ pr_debug("(%s:%4d) (DUMMY FATAL) existing device entry %pad at 0x%lX\n",
+ __FILE__, __LINE__, dpte, addr);
+ ret = -EINVAL;
+ break;
+ }
+
+ spage = pfn_to_page(hmm_pte_pfn(*mpte));
+ dpage = dummy_device_pfn_to_page(ddevice, devent->dpfn[i]);
+ dst[i] = hmm_pte_from_dev_addr(devent->dpfn[i] << PAGE_SHIFT);
+ copy_highpage(dpage, spage);
+ devent->dpfn[i] = -1;
+ devent->nfaulted_dev_pages++;
+
+ /*
+ * This is bit inefficient to lock directoy per entry instead
+ * of locking directory and going over all its entry. But this
+ * is a dummy driver and we do not care about efficiency here.
+ */
+ hmm_pt_iter_directory_lock(&diter);
+ if (hmm_pte_test_and_clear_dirty(dpte))
+ hmm_pte_set_dirty(&dst[i]);
+ if (vma->vm_flags & VM_WRITE)
+ hmm_pte_set_write(&dst[i]);
+ /*
+ * Increment ref count of dummy page table directory if the
+ * previous entry was not valid. Note that previous entry
+ * can not be a valid device memory entry.
+ */
+ if (!hmm_pte_test_valid_pfn(dpte))
+ hmm_pt_iter_directory_ref(&diter);
+ *dpte = dst[i];
+ hmm_pt_iter_directory_unlock(&diter);
+
+ } while (i++, addr += PAGE_SIZE, addr < end);
+
+ hmm_pt_iter_fini(&miter);
+ hmm_pt_iter_fini(&diter);
+
+ return ret;
+}
+
static const struct hmm_device_ops hmm_dummy_ops = {
.release = &dummy_mirror_release,
.free = &dummy_mirror_free,
.update = &dummy_mirror_update,
+ .copy_from_device = &dummy_copy_from_device,
+ .copy_to_device = &dummy_copy_to_device,
};
@@ -443,6 +655,7 @@ static int dummy_read(struct dummy_mirror *dmirror,
char __user *buf,
size_t size)
{
+ struct dummy_device *ddevice = dmirror->ddevice;
struct hmm_event *event = &devent->hevent;
long r = 0;
@@ -483,14 +696,21 @@ static int dummy_read(struct dummy_mirror *dmirror,
* coherent value for each page table entry.
*/
dpte = ACCESS_ONCE(*dptep);
- if (!hmm_pte_test_valid_pfn(&dpte)) {
+
+ if (hmm_pte_test_valid_dev(&dpte)) {
+ dma_addr_t dpfn;
+
+ dpfn = hmm_pte_dev_addr(dpte) >> PAGE_SHIFT;
+ page = dummy_device_pfn_to_page(ddevice, dpfn);
+ devent->ndev_pages++;
+ } else if (hmm_pte_test_valid_pfn(&dpte)) {
+ page = pfn_to_page(hmm_pte_pfn(dpte));
+ devent->nsys_pages++;
+ } else {
dummy_mirror_access_stop(dmirror, devent);
break;
}
- devent->nsys_pages++;
-
- page = pfn_to_page(hmm_pte_pfn(dpte));
ptr = kmap(page);
r = copy_to_user(buf, ptr + offset, count);
@@ -515,6 +735,7 @@ static int dummy_write(struct dummy_mirror *dmirror,
char __user *buf,
size_t size)
{
+ struct dummy_device *ddevice = dmirror->ddevice;
struct hmm_event *event = &devent->hevent;
long r = 0;
@@ -555,15 +776,25 @@ static int dummy_write(struct dummy_mirror *dmirror,
* coherent value for each page table entry.
*/
dpte = ACCESS_ONCE(*dptep);
- if (!hmm_pte_test_valid_pfn(&dpte) ||
- !hmm_pte_test_write(&dpte)) {
+ if (!hmm_pte_test_write(&dpte)) {
+ dummy_mirror_access_stop(dmirror, devent);
+ break;
+ }
+
+ if (hmm_pte_test_valid_dev(&dpte)) {
+ dma_addr_t dpfn;
+
+ dpfn = hmm_pte_dev_addr(dpte) >> PAGE_SHIFT;
+ page = dummy_device_pfn_to_page(ddevice, dpfn);
+ devent->ndev_pages++;
+ } else if (hmm_pte_test_valid_pfn(&dpte)) {
+ page = pfn_to_page(hmm_pte_pfn(dpte));
+ devent->nsys_pages++;
+ } else {
dummy_mirror_access_stop(dmirror, devent);
break;
}
- devent->nsys_pages++;
-
- page = pfn_to_page(hmm_pte_pfn(dpte));
ptr = kmap(page);
r = copy_from_user(ptr + offset, buf, count);
@@ -583,6 +814,58 @@ static int dummy_write(struct dummy_mirror *dmirror,
return r;
}
+static int dummy_lmem_to_rmem(struct dummy_mirror *dmirror,
+ struct dummy_event *devent)
+{
+ struct dummy_device *ddevice = dmirror->ddevice;
+ struct hmm_mirror *mirror = &dmirror->mirror;
+ int i, ret;
+
+ devent->hevent.start = PAGE_MASK & devent->hevent.start;
+ devent->hevent.end = PAGE_ALIGN(devent->hevent.end);
+ devent->hevent.etype = HMM_COPY_TO_DEVICE;
+
+ /* Simple bitmap allocator for fake device memory. */
+ devent->dpfn = kcalloc(devent->npages, sizeof(unsigned), GFP_KERNEL);
+ if (devent->dpfn == NULL) {
+ return -ENOMEM;
+ }
+
+ /*
+ * Pre-allocate device memory. Device driver is free to pre-allocate
+ * memory or to allocate it inside the copy callback.
+ */
+ mutex_lock(&ddevice->mutex);
+ for (i = 0; i < devent->npages; ++i) {
+ int idx;
+
+ idx = find_first_zero_bit(ddevice->rmem_bitmap,
+ HMM_DUMMY_RMEM_NBITS);
+ if (idx < 0) {
+ while ((--i) > 0) {
+ idx = devent->dpfn[i];
+ clear_bit(idx, ddevice->rmem_bitmap);
+ }
+ mutex_unlock(&ddevice->mutex);
+ kfree(devent->dpfn);
+ return -ENOMEM;
+ }
+ devent->dpfn[i] = idx;
+ set_bit(idx, ddevice->rmem_bitmap);
+ }
+ mutex_unlock(&ddevice->mutex);
+
+ ret = hmm_mirror_fault(mirror, &devent->hevent);
+ for (i = 0; i < devent->npages; ++i) {
+ if (devent->dpfn[i] == -1U)
+ continue;
+ clear_bit(devent->dpfn[i], ddevice->rmem_bitmap);
+ }
+ kfree(devent->dpfn);
+
+ return ret;
+}
+
/*
* Below are the vm operation for the dummy device file. Sadly we can not allow
@@ -695,11 +978,26 @@ static int dummy_fops_release(struct inode *inode, struct file *filp)
return 0;
}
+struct dummy_ioctlp {
+ uint64_t address;
+ uint64_t size;
+};
+
+static void dummy_event_init(struct dummy_event *devent,
+ const struct dummy_ioctlp *ioctlp)
+{
+ memset(devent, 0, sizeof(*devent));
+ devent->hevent.start = ioctlp->address;
+ devent->hevent.end = ioctlp->address + ioctlp->size;
+ devent->npages = PAGE_ALIGN(ioctlp->size) >> PAGE_SHIFT;
+}
+
static long dummy_fops_unlocked_ioctl(struct file *filp,
unsigned int command,
unsigned long arg)
{
void __user *uarg = (void __user *)arg;
+ struct hmm_dummy_migrate dmigrate;
struct dummy_device *ddevice;
struct dummy_mirror *dmirror;
struct hmm_dummy_write dwrite;
@@ -765,15 +1063,15 @@ static long dummy_fops_unlocked_ioctl(struct file *filp,
return -EFAULT;
}
- memset(&devent, 0, sizeof(devent));
- devent.hevent.start = dread.address;
- devent.hevent.end = dread.address + dread.size;
+ dummy_event_init(&devent, (struct dummy_ioctlp*)&dread);
ret = dummy_read(dmirror, &devent,
(void __user *)dread.ptr,
dread.size);
dread.nsys_pages = devent.nsys_pages;
dread.nfaulted_sys_pages = devent.nfaulted_sys_pages;
+ dread.ndev_pages = devent.ndev_pages;
+ dread.nfaulted_dev_pages = devent.nfaulted_dev_pages;
if (copy_to_user(uarg, &dread, sizeof(dread))) {
dummy_mirror_worker_thread_stop(dmirror);
return -EFAULT;
@@ -787,15 +1085,15 @@ static long dummy_fops_unlocked_ioctl(struct file *filp,
return -EFAULT;
}
- memset(&devent, 0, sizeof(devent));
- devent.hevent.start = dwrite.address;
- devent.hevent.end = dwrite.address + dwrite.size;
+ dummy_event_init(&devent, (struct dummy_ioctlp*)&dwrite);
ret = dummy_write(dmirror, &devent,
(void __user *)dwrite.ptr,
dwrite.size);
dwrite.nsys_pages = devent.nsys_pages;
dwrite.nfaulted_sys_pages = devent.nfaulted_sys_pages;
+ dwrite.ndev_pages = devent.ndev_pages;
+ dwrite.nfaulted_dev_pages = devent.nfaulted_dev_pages;
if (copy_to_user(uarg, &dwrite, sizeof(dwrite))) {
dummy_mirror_worker_thread_stop(dmirror);
return -EFAULT;
@@ -803,6 +1101,23 @@ static long dummy_fops_unlocked_ioctl(struct file *filp,
dummy_mirror_worker_thread_stop(dmirror);
return ret;
+ case HMM_DUMMY_MIGRATE_TO:
+ if (copy_from_user(&dmigrate, uarg, sizeof(dmigrate))) {
+ dummy_mirror_worker_thread_stop(dmirror);
+ return -EFAULT;
+ }
+
+ dummy_event_init(&devent, (struct dummy_ioctlp*)&dmigrate);
+ ret = dummy_lmem_to_rmem(dmirror, &devent);
+
+ dmigrate.nfaulted_dev_pages = devent.nfaulted_dev_pages;
+ if (copy_to_user(uarg, &dmigrate, sizeof(dmigrate))) {
+ dummy_mirror_worker_thread_stop(dmirror);
+ return -EFAULT;
+ }
+
+ dummy_mirror_worker_thread_stop(dmirror);
+ return ret;
default:
return -EINVAL;
}
@@ -826,20 +1141,44 @@ static const struct file_operations hmm_dummy_fops = {
*/
static int dummy_device_init(struct dummy_device *ddevice)
{
- int ret, i;
+ struct page **pages;
+ unsigned long *bitmap;
+ int ret, i, npages;
+
+ npages = HMM_DUMMY_RMEM_SIZE >> PAGE_SHIFT;
+ bitmap = kzalloc(BITS_TO_LONGS(npages) * sizeof(long), GFP_KERNEL);
+ if (!bitmap) {
+ return -ENOMEM;
+ }
+ pages = kzalloc(npages * sizeof(void*), GFP_KERNEL);
+ if (!pages) {
+ kfree(bitmap);
+ return -ENOMEM;
+ }
+ for (i = 0; i < npages; ++i) {
+ pages[i] = alloc_page(GFP_KERNEL);
+ if (!pages[i]) {
+ while ((--i)) {
+ __free_page(pages[i]);
+ }
+ kfree(bitmap);
+ kfree(pages);
+ return -ENOMEM;
+ }
+ }
ret = alloc_chrdev_region(&ddevice->dev, 0,
HMM_DUMMY_MAX_DEVICES,
ddevice->name);
if (ret < 0)
- return ret;
+ goto error;
ddevice->major = MAJOR(ddevice->dev);
cdev_init(&ddevice->cdevice, &hmm_dummy_fops);
ret = cdev_add(&ddevice->cdevice, ddevice->dev, HMM_DUMMY_MAX_MIRRORS);
if (ret) {
unregister_chrdev_region(ddevice->dev, HMM_DUMMY_MAX_MIRRORS);
- return ret;
+ goto error;
}
/* Register the hmm device. */
@@ -853,14 +1192,25 @@ static int dummy_device_init(struct dummy_device *ddevice)
if (ret) {
cdev_del(&ddevice->cdevice);
unregister_chrdev_region(ddevice->dev, HMM_DUMMY_MAX_MIRRORS);
+ goto error;
}
+ ddevice->rmem_bitmap = bitmap;
+ ddevice->rmem_pages = pages;
+ return 0;
+
+error:
+ for (i = 0; i < npages; ++i) {
+ __free_page(pages[i]);
+ }
+ kfree(bitmap);
+ kfree(pages);
return ret;
}
static void dummy_device_fini(struct dummy_device *ddevice)
{
struct dummy_mirror *dmirror;
- unsigned i;
+ unsigned i, npages;
/* First unregister all mirror. */
do {
@@ -880,6 +1230,13 @@ static void dummy_device_fini(struct dummy_device *ddevice)
cdev_del(&ddevice->cdevice);
unregister_chrdev_region(ddevice->dev, HMM_DUMMY_MAX_MIRRORS);
+
+ npages = HMM_DUMMY_RMEM_SIZE >> PAGE_SHIFT;
+ for (i = 0; i < npages; ++i) {
+ __free_page(ddevice->rmem_pages[i]);
+ }
+ kfree(ddevice->rmem_bitmap);
+ kfree(ddevice->rmem_pages);
}
static int __init hmm_dummy_init(void)