diff options
-rw-r--r-- | include/linux/hmm_pt.h | 11 | ||||
-rw-r--r-- | mm/hmm.c | 202 |
2 files changed, 174 insertions, 39 deletions
diff --git a/include/linux/hmm_pt.h b/include/linux/hmm_pt.h index b9cb7b542bfd..9abb8ab24e2b 100644 --- a/include/linux/hmm_pt.h +++ b/include/linux/hmm_pt.h @@ -176,6 +176,17 @@ static inline dma_addr_t hmm_pte_from_pfn(dma_addr_t pfn) return (pfn << PAGE_SHIFT) | (1 << HMM_PTE_VALID_PFN_BIT); } +static inline dma_addr_t hmm_pte_from_dma_addr(dma_addr_t dma_addr) +{ + return (dma_addr & HMM_PTE_DMA_MASK) | (1 << HMM_PTE_VALID_DMA_BIT); +} + +static inline dma_addr_t hmm_pte_dma_addr(dma_addr_t pte) +{ + /* FIXME Use max dma addr instead of 0 ? */ + return hmm_pte_test_valid_dma(&pte) ? (pte & HMM_PTE_DMA_MASK) : 0; +} + static inline unsigned long hmm_pte_pfn(dma_addr_t pte) { return hmm_pte_test_valid_pfn(&pte) ? pte >> PAGE_SHIFT : 0; @@ -41,6 +41,7 @@ #include <linux/mman.h> #include <linux/delay.h> #include <linux/workqueue.h> +#include <linux/dma-mapping.h> #include "internal.h" @@ -584,6 +585,46 @@ static inline int hmm_mirror_update(struct hmm_mirror *mirror, return ret; } +static void hmm_mirror_update_pte(struct hmm_mirror *mirror, + struct hmm_event *event, + struct hmm_pt_iter *iter, + struct mm_pt_iter *mm_iter, + struct page *page, + dma_addr_t *hmm_pte, + unsigned long addr) +{ + bool dirty = hmm_pte_test_and_clear_dirty(hmm_pte); + + if (hmm_pte_test_valid_pfn(hmm_pte)) { + *hmm_pte &= event->pte_mask; + if (!hmm_pte_test_valid_pfn(hmm_pte)) + hmm_pt_iter_directory_unref(iter); + goto out; + } + + if (!hmm_pte_test_valid_dma(hmm_pte)) + return; + + if (!hmm_pte_test_valid_dma(&event->pte_mask)) { + struct device *dev = mirror->device->dev; + dma_addr_t dma_addr; + + dma_addr = hmm_pte_dma_addr(*hmm_pte); + dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + } + + *hmm_pte &= event->pte_mask; + if (!hmm_pte_test_valid_dma(hmm_pte)) + hmm_pt_iter_directory_unref(iter); + +out: + if (dirty) { + page = page ? : mm_pt_iter_page(mm_iter, addr); + if (page) + set_page_dirty(page); + } +} + static void hmm_mirror_update_pt(struct hmm_mirror *mirror, struct hmm_event *event, struct page *page) @@ -610,19 +651,9 @@ static void hmm_mirror_update_pt(struct hmm_mirror *mirror, */ hmm_pt_iter_directory_lock(&iter); do { - if (!hmm_pte_test_valid_pfn(hmm_pte)) - continue; - if (hmm_pte_test_and_clear_dirty(hmm_pte) && - hmm_pte_test_write(hmm_pte)) { - page = page ? : mm_pt_iter_page(&mm_iter, addr); - if (page) - set_page_dirty(page); - page = NULL; - } - *hmm_pte &= event->pte_mask; - if (hmm_pte_test_valid_pfn(hmm_pte)) - continue; - hmm_pt_iter_directory_unref(&iter); + hmm_mirror_update_pte(mirror, event, &iter, &mm_iter, + page, hmm_pte, addr); + page = NULL; } while (addr += PAGE_SIZE, hmm_pte++, addr != next); hmm_pt_iter_directory_unlock(&iter); } @@ -694,6 +725,9 @@ static int hmm_mirror_fault_hpmd(struct hmm_mirror *mirror, */ hmm_pt_iter_directory_lock(iter); do { + if (hmm_pte_test_valid_dma(&hmm_pte[i])) + continue; + if (!hmm_pte_test_valid_pfn(&hmm_pte[i])) { hmm_pte[i] = hmm_pte_from_pfn(pfn); hmm_pt_iter_directory_ref(iter); @@ -767,6 +801,9 @@ static int hmm_mirror_fault_pmd(pmd_t *pmdp, break; } + if (hmm_pte_test_valid_dma(&hmm_pte[i])) + continue; + if (!hmm_pte_test_valid_pfn(&hmm_pte[i])) { hmm_pte[i] = hmm_pte_from_pfn(pte_pfn(*ptep)); hmm_pt_iter_directory_ref(iter); @@ -783,6 +820,80 @@ static int hmm_mirror_fault_pmd(pmd_t *pmdp, return ret; } +static int hmm_mirror_dma_map(struct hmm_mirror *mirror, + struct hmm_pt_iter *iter, + unsigned long start, + unsigned long end) +{ + struct device *dev = mirror->device->dev; + unsigned long addr; + int ret; + + for (ret = 0, addr = start; !ret && addr < end;) { + unsigned long i = 0, next = end; + dma_addr_t *hmm_pte; + + hmm_pte = hmm_pt_iter_populate(iter, addr, &next); + if (!hmm_pte) + return -ENOENT; + + do { + dma_addr_t dma_addr, pte; + struct page *page; + +again: + pte = ACCESS_ONCE(hmm_pte[i]); + if (!hmm_pte_test_valid_pfn(&pte)) { + if (!hmm_pte_test_valid_dma(&pte)) { + ret = -ENOENT; + break; + } + continue; + } + + page = pfn_to_page(hmm_pte_pfn(pte)); + VM_BUG_ON(!page); + dma_addr = dma_map_page(dev, page, 0, PAGE_SIZE, + DMA_BIDIRECTIONAL); + if (dma_mapping_error(dev, dma_addr)) { + ret = -ENOMEM; + break; + } + + hmm_pt_iter_directory_lock(iter); + /* + * Make sure we transfer the dirty bit. Note that there + * might still be a window for another thread to set + * the dirty bit before we check for pte equality. This + * will just lead to a useless retry so it is not the + * end of the world here. + */ + if (hmm_pte_test_dirty(&hmm_pte[i])) + hmm_pte_set_dirty(&pte); + if (ACCESS_ONCE(hmm_pte[i]) != pte) { + hmm_pt_iter_directory_unlock(iter); + dma_unmap_page(dev, dma_addr, PAGE_SIZE, + DMA_BIDIRECTIONAL); + if (hmm_pte_test_valid_pfn(&pte)) + goto again; + if (!hmm_pte_test_valid_dma(&pte)) { + ret = -ENOENT; + break; + } + } else { + hmm_pte[i] = hmm_pte_from_dma_addr(dma_addr); + if (hmm_pte_test_write(&pte)) + hmm_pte_set_write(&hmm_pte[i]); + if (hmm_pte_test_dirty(&pte)) + hmm_pte_set_dirty(&hmm_pte[i]); + hmm_pt_iter_directory_unlock(iter); + } + } while (addr += PAGE_SIZE, i++, addr != next && !ret); + } + + return ret; +} + static int hmm_mirror_handle_fault(struct hmm_mirror *mirror, struct hmm_event *event, struct vm_area_struct *vma, @@ -791,7 +902,7 @@ static int hmm_mirror_handle_fault(struct hmm_mirror *mirror, struct hmm_mirror_fault mirror_fault; unsigned long addr = event->start; struct mm_walk walk = {0}; - int ret = 0; + int ret; if ((event->etype == HMM_DEVICE_WRITE_FAULT) && !(vma->vm_flags & VM_WRITE)) return -EACCES; @@ -800,33 +911,45 @@ static int hmm_mirror_handle_fault(struct hmm_mirror *mirror, if (ret) return ret; -again: - if (event->backoff) { - ret = -EAGAIN; - goto out; - } - if (addr >= event->end) - goto out; + do { + if (event->backoff) { + ret = -EAGAIN; + break; + } + if (addr >= event->end) + break; + + mirror_fault.event = event; + mirror_fault.mirror = mirror; + mirror_fault.vma = vma; + mirror_fault.addr = addr; + mirror_fault.iter = iter; + walk.mm = mirror->hmm->mm; + walk.private = &mirror_fault; + walk.pmd_entry = hmm_mirror_fault_pmd; + walk.pte_hole = hmm_pte_hole; + ret = walk_page_range(addr, event->end, &walk); + if (ret) + break; + + if (event->backoff) { + ret = -EAGAIN; + break; + } - mirror_fault.event = event; - mirror_fault.mirror = mirror; - mirror_fault.vma = vma; - mirror_fault.addr = addr; - mirror_fault.iter = iter; - walk.mm = mirror->hmm->mm; - walk.private = &mirror_fault; - walk.pmd_entry = hmm_mirror_fault_pmd; - walk.pte_hole = hmm_pte_hole; - ret = walk_page_range(addr, event->end, &walk); - if (!ret) { - ret = mirror->device->ops->update(mirror, event); - if (!ret) { - addr = mirror_fault.addr; - goto again; + if (mirror->device->dev) { + ret = hmm_mirror_dma_map(mirror, iter, + addr, event->end); + if (ret) + break; } - } -out: + ret = mirror->device->ops->update(mirror, event); + if (ret) + break; + addr = mirror_fault.addr; + } while (1); + hmm_device_fault_end(mirror->hmm, event); if (ret == -ENOENT) { ret = hmm_mm_fault(mirror->hmm, event, vma, addr); @@ -980,7 +1103,8 @@ void hmm_mirror_range_dirty(struct hmm_mirror *mirror, hmm_pte = hmm_pt_iter_walk(&iter, &addr, &next); for (; hmm_pte && addr != next; hmm_pte++, addr += PAGE_SIZE) { - if (!hmm_pte_test_valid_pfn(hmm_pte) || + if ((!hmm_pte_test_valid_pfn(hmm_pte) && + !hmm_pte_test_valid_dma(hmm_pte)) || !hmm_pte_test_write(hmm_pte)) continue; hmm_pte_set_dirty(hmm_pte); |