summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/hmm_pt.h11
-rw-r--r--mm/hmm.c202
2 files changed, 174 insertions, 39 deletions
diff --git a/include/linux/hmm_pt.h b/include/linux/hmm_pt.h
index b9cb7b542bfd..9abb8ab24e2b 100644
--- a/include/linux/hmm_pt.h
+++ b/include/linux/hmm_pt.h
@@ -176,6 +176,17 @@ static inline dma_addr_t hmm_pte_from_pfn(dma_addr_t pfn)
return (pfn << PAGE_SHIFT) | (1 << HMM_PTE_VALID_PFN_BIT);
}
+static inline dma_addr_t hmm_pte_from_dma_addr(dma_addr_t dma_addr)
+{
+ return (dma_addr & HMM_PTE_DMA_MASK) | (1 << HMM_PTE_VALID_DMA_BIT);
+}
+
+static inline dma_addr_t hmm_pte_dma_addr(dma_addr_t pte)
+{
+ /* FIXME Use max dma addr instead of 0 ? */
+ return hmm_pte_test_valid_dma(&pte) ? (pte & HMM_PTE_DMA_MASK) : 0;
+}
+
static inline unsigned long hmm_pte_pfn(dma_addr_t pte)
{
return hmm_pte_test_valid_pfn(&pte) ? pte >> PAGE_SHIFT : 0;
diff --git a/mm/hmm.c b/mm/hmm.c
index 8b3beed772b6..ea1c5f135a42 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -41,6 +41,7 @@
#include <linux/mman.h>
#include <linux/delay.h>
#include <linux/workqueue.h>
+#include <linux/dma-mapping.h>
#include "internal.h"
@@ -584,6 +585,46 @@ static inline int hmm_mirror_update(struct hmm_mirror *mirror,
return ret;
}
+static void hmm_mirror_update_pte(struct hmm_mirror *mirror,
+ struct hmm_event *event,
+ struct hmm_pt_iter *iter,
+ struct mm_pt_iter *mm_iter,
+ struct page *page,
+ dma_addr_t *hmm_pte,
+ unsigned long addr)
+{
+ bool dirty = hmm_pte_test_and_clear_dirty(hmm_pte);
+
+ if (hmm_pte_test_valid_pfn(hmm_pte)) {
+ *hmm_pte &= event->pte_mask;
+ if (!hmm_pte_test_valid_pfn(hmm_pte))
+ hmm_pt_iter_directory_unref(iter);
+ goto out;
+ }
+
+ if (!hmm_pte_test_valid_dma(hmm_pte))
+ return;
+
+ if (!hmm_pte_test_valid_dma(&event->pte_mask)) {
+ struct device *dev = mirror->device->dev;
+ dma_addr_t dma_addr;
+
+ dma_addr = hmm_pte_dma_addr(*hmm_pte);
+ dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+ }
+
+ *hmm_pte &= event->pte_mask;
+ if (!hmm_pte_test_valid_dma(hmm_pte))
+ hmm_pt_iter_directory_unref(iter);
+
+out:
+ if (dirty) {
+ page = page ? : mm_pt_iter_page(mm_iter, addr);
+ if (page)
+ set_page_dirty(page);
+ }
+}
+
static void hmm_mirror_update_pt(struct hmm_mirror *mirror,
struct hmm_event *event,
struct page *page)
@@ -610,19 +651,9 @@ static void hmm_mirror_update_pt(struct hmm_mirror *mirror,
*/
hmm_pt_iter_directory_lock(&iter);
do {
- if (!hmm_pte_test_valid_pfn(hmm_pte))
- continue;
- if (hmm_pte_test_and_clear_dirty(hmm_pte) &&
- hmm_pte_test_write(hmm_pte)) {
- page = page ? : mm_pt_iter_page(&mm_iter, addr);
- if (page)
- set_page_dirty(page);
- page = NULL;
- }
- *hmm_pte &= event->pte_mask;
- if (hmm_pte_test_valid_pfn(hmm_pte))
- continue;
- hmm_pt_iter_directory_unref(&iter);
+ hmm_mirror_update_pte(mirror, event, &iter, &mm_iter,
+ page, hmm_pte, addr);
+ page = NULL;
} while (addr += PAGE_SIZE, hmm_pte++, addr != next);
hmm_pt_iter_directory_unlock(&iter);
}
@@ -694,6 +725,9 @@ static int hmm_mirror_fault_hpmd(struct hmm_mirror *mirror,
*/
hmm_pt_iter_directory_lock(iter);
do {
+ if (hmm_pte_test_valid_dma(&hmm_pte[i]))
+ continue;
+
if (!hmm_pte_test_valid_pfn(&hmm_pte[i])) {
hmm_pte[i] = hmm_pte_from_pfn(pfn);
hmm_pt_iter_directory_ref(iter);
@@ -767,6 +801,9 @@ static int hmm_mirror_fault_pmd(pmd_t *pmdp,
break;
}
+ if (hmm_pte_test_valid_dma(&hmm_pte[i]))
+ continue;
+
if (!hmm_pte_test_valid_pfn(&hmm_pte[i])) {
hmm_pte[i] = hmm_pte_from_pfn(pte_pfn(*ptep));
hmm_pt_iter_directory_ref(iter);
@@ -783,6 +820,80 @@ static int hmm_mirror_fault_pmd(pmd_t *pmdp,
return ret;
}
+static int hmm_mirror_dma_map(struct hmm_mirror *mirror,
+ struct hmm_pt_iter *iter,
+ unsigned long start,
+ unsigned long end)
+{
+ struct device *dev = mirror->device->dev;
+ unsigned long addr;
+ int ret;
+
+ for (ret = 0, addr = start; !ret && addr < end;) {
+ unsigned long i = 0, next = end;
+ dma_addr_t *hmm_pte;
+
+ hmm_pte = hmm_pt_iter_populate(iter, addr, &next);
+ if (!hmm_pte)
+ return -ENOENT;
+
+ do {
+ dma_addr_t dma_addr, pte;
+ struct page *page;
+
+again:
+ pte = ACCESS_ONCE(hmm_pte[i]);
+ if (!hmm_pte_test_valid_pfn(&pte)) {
+ if (!hmm_pte_test_valid_dma(&pte)) {
+ ret = -ENOENT;
+ break;
+ }
+ continue;
+ }
+
+ page = pfn_to_page(hmm_pte_pfn(pte));
+ VM_BUG_ON(!page);
+ dma_addr = dma_map_page(dev, page, 0, PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+ if (dma_mapping_error(dev, dma_addr)) {
+ ret = -ENOMEM;
+ break;
+ }
+
+ hmm_pt_iter_directory_lock(iter);
+ /*
+ * Make sure we transfer the dirty bit. Note that there
+ * might still be a window for another thread to set
+ * the dirty bit before we check for pte equality. This
+ * will just lead to a useless retry so it is not the
+ * end of the world here.
+ */
+ if (hmm_pte_test_dirty(&hmm_pte[i]))
+ hmm_pte_set_dirty(&pte);
+ if (ACCESS_ONCE(hmm_pte[i]) != pte) {
+ hmm_pt_iter_directory_unlock(iter);
+ dma_unmap_page(dev, dma_addr, PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+ if (hmm_pte_test_valid_pfn(&pte))
+ goto again;
+ if (!hmm_pte_test_valid_dma(&pte)) {
+ ret = -ENOENT;
+ break;
+ }
+ } else {
+ hmm_pte[i] = hmm_pte_from_dma_addr(dma_addr);
+ if (hmm_pte_test_write(&pte))
+ hmm_pte_set_write(&hmm_pte[i]);
+ if (hmm_pte_test_dirty(&pte))
+ hmm_pte_set_dirty(&hmm_pte[i]);
+ hmm_pt_iter_directory_unlock(iter);
+ }
+ } while (addr += PAGE_SIZE, i++, addr != next && !ret);
+ }
+
+ return ret;
+}
+
static int hmm_mirror_handle_fault(struct hmm_mirror *mirror,
struct hmm_event *event,
struct vm_area_struct *vma,
@@ -791,7 +902,7 @@ static int hmm_mirror_handle_fault(struct hmm_mirror *mirror,
struct hmm_mirror_fault mirror_fault;
unsigned long addr = event->start;
struct mm_walk walk = {0};
- int ret = 0;
+ int ret;
if ((event->etype == HMM_DEVICE_WRITE_FAULT) && !(vma->vm_flags & VM_WRITE))
return -EACCES;
@@ -800,33 +911,45 @@ static int hmm_mirror_handle_fault(struct hmm_mirror *mirror,
if (ret)
return ret;
-again:
- if (event->backoff) {
- ret = -EAGAIN;
- goto out;
- }
- if (addr >= event->end)
- goto out;
+ do {
+ if (event->backoff) {
+ ret = -EAGAIN;
+ break;
+ }
+ if (addr >= event->end)
+ break;
+
+ mirror_fault.event = event;
+ mirror_fault.mirror = mirror;
+ mirror_fault.vma = vma;
+ mirror_fault.addr = addr;
+ mirror_fault.iter = iter;
+ walk.mm = mirror->hmm->mm;
+ walk.private = &mirror_fault;
+ walk.pmd_entry = hmm_mirror_fault_pmd;
+ walk.pte_hole = hmm_pte_hole;
+ ret = walk_page_range(addr, event->end, &walk);
+ if (ret)
+ break;
+
+ if (event->backoff) {
+ ret = -EAGAIN;
+ break;
+ }
- mirror_fault.event = event;
- mirror_fault.mirror = mirror;
- mirror_fault.vma = vma;
- mirror_fault.addr = addr;
- mirror_fault.iter = iter;
- walk.mm = mirror->hmm->mm;
- walk.private = &mirror_fault;
- walk.pmd_entry = hmm_mirror_fault_pmd;
- walk.pte_hole = hmm_pte_hole;
- ret = walk_page_range(addr, event->end, &walk);
- if (!ret) {
- ret = mirror->device->ops->update(mirror, event);
- if (!ret) {
- addr = mirror_fault.addr;
- goto again;
+ if (mirror->device->dev) {
+ ret = hmm_mirror_dma_map(mirror, iter,
+ addr, event->end);
+ if (ret)
+ break;
}
- }
-out:
+ ret = mirror->device->ops->update(mirror, event);
+ if (ret)
+ break;
+ addr = mirror_fault.addr;
+ } while (1);
+
hmm_device_fault_end(mirror->hmm, event);
if (ret == -ENOENT) {
ret = hmm_mm_fault(mirror->hmm, event, vma, addr);
@@ -980,7 +1103,8 @@ void hmm_mirror_range_dirty(struct hmm_mirror *mirror,
hmm_pte = hmm_pt_iter_walk(&iter, &addr, &next);
for (; hmm_pte && addr != next; hmm_pte++, addr += PAGE_SIZE) {
- if (!hmm_pte_test_valid_pfn(hmm_pte) ||
+ if ((!hmm_pte_test_valid_pfn(hmm_pte) &&
+ !hmm_pte_test_valid_dma(hmm_pte)) ||
!hmm_pte_test_write(hmm_pte))
continue;
hmm_pte_set_dirty(hmm_pte);