mm/hmm: fix handling of pmd inside HMM CPU page table snapshot helperhmm-cdm-next

First hmm_pfns_bad() had bad outdated logic to get to range structure which would lead to kernel segfault if call. Second the whole handling of huge and transparent pmd was wrong. This patch fix that. Note that unlike GUP we do not need to take pmd lock as we are also tracking changes to pmd through mmu_notifier. Signed-off-by: Jérôme Glisse <jglisse@redhat.com>
author: Jérôme Glisse <jglisse@redhat.com> 2017-07-13 16:00:58 -0400
committer: Jérôme Glisse <jglisse@redhat.com> 2017-07-13 16:45:13 -0400
commit: 554e68bef0d25abd0ed72094ea143b111d5b4bc4 (patch)
tree: 944927e19fd19eaf7bbf5800ccba7c6d6c406902
parent: 84204c5be742186236b371ea2f7ad39bf1770fe6 (diff)
1 files changed, 32 insertions, 26 deletions
diff --git a/mm/hmm.c b/mm/hmm.c
index 28e54e3b4e1d..228e9f6f6c47 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -293,7 +293,8 @@ static int hmm_pfns_bad(unsigned long addr,
 			unsigned long end,
 			struct mm_walk *walk)
 {
-	struct hmm_range *range = walk->private;
+	struct hmm_vma_walk *hmm_vma_walk = walk->private;
+	struct hmm_range *range = hmm_vma_walk->range;
 	hmm_pfn_t *pfns = range->pfns;
 	unsigned long i;
 
@@ -380,41 +381,46 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
 	flag = vma->vm_flags & VM_READ ? HMM_PFN_READ : 0;
 	write_fault = hmm_vma_walk->fault & hmm_vma_walk->write;
 
-	if (pmd_none(*pmdp) || pmd_trans_unstable(pmdp)) {
+again:
+	if (pmd_none(*pmdp))
+		return hmm_vma_walk_hole(start, end, walk);
+
+	if (pmd_huge(*pmdp) && vma->vm_flags & VM_HUGETLB)
+		return hmm_pfns_bad(start, end, walk);
+
+	if (pmd_devmap(*pmdp) || pmd_trans_huge(*pmdp)) {
+		unsigned long pfn;
 		pmd_t pmd;
 
+		/*
+		 * No need to take pmd_lock here, even if some other threads
+		 * is splitting the huge pmd we will get that event through
+		 * mmu_notifier callback.
+		 *
+		 * So just read pmd value and check again its a transparent
+		 * huge or device mapping one and compute corresponding pfn
+		 * values.
+		 */
 		pmd = pmd_read_atomic(pmdp);
 		barrier();
-		if (pmd_none(pmd))
-			return hmm_vma_walk_hole(start, end, walk);
-
-		if (pmd_bad(pmd))
-			return hmm_pfns_bad(start, end, walk);
+		if (!pmd_devmap(pmd) && !pmd_trans_huge(pmd))
+			goto again;
 
 		if (pmd_protnone(pmd))
 			return hmm_vma_walk_clear(start, end, walk);
-
-		if (pmd_trans_huge(pmd) || pmd_devmap(pmd)) {
-			unsigned long pfn = pmd_pfn(pmd) + pte_index(addr);
-
-			if (write_fault && !pmd_write(pmd))
-				return hmm_vma_walk_clear(start, end, walk);
-
-			flag |= pmd_write(pmd) ? HMM_PFN_WRITE : 0;
-			for (; addr < end; addr += PAGE_SIZE, i++, pfn++)
-				pfns[i] = hmm_pfn_t_from_pfn(pfn) | flag;
-			return 0;
-		} else {
-			/*
-			 * Something unusual is going on. Better to have the
-			 * driver assume there is nothing for this range and
-			 * let the fault code path sort out proper pages for the
-			 * range.
-			 */
+		if (write_fault && !pmd_write(pmd))
 			return hmm_vma_walk_clear(start, end, walk);
-		}
+
+		pfn = pmd_pfn(pmd) + pte_index(addr);
+		flag |= pmd_write(pmd) ? HMM_PFN_WRITE : 0;
+		for (; addr < end; addr += PAGE_SIZE, i++, pfn++)
+			pfns[i] = hmm_pfn_t_from_pfn(pfn) | flag;
+		return 0;
 	}
 
+	if (pmd_bad(*pmdp))
+		return hmm_pfns_bad(start, end, walk);
+
 	ptep = pte_offset_map(pmdp, addr);
 	for (; addr < end; addr += PAGE_SIZE, ptep++, i++) {
 		pte_t pte = *ptep;
author	Jérôme Glisse <jglisse@redhat.com>	2017-07-13 16:00:58 -0400
committer	Jérôme Glisse <jglisse@redhat.com>	2017-07-13 16:45:13 -0400
commit	554e68bef0d25abd0ed72094ea143b111d5b4bc4 (patch)
tree	944927e19fd19eaf7bbf5800ccba7c6d6c406902
parent	84204c5be742186236b371ea2f7ad39bf1770fe6 (diff)