summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--mm/huge_memory.c13
-rw-r--r--mm/memory.c19
2 files changed, 13 insertions, 19 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 626e93db28ba..2f12e9fcf1a2 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1291,17 +1291,8 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
flags |= TNF_FAULT_LOCAL;
}
- /*
- * Avoid grouping on DSO/COW pages in specific and RO pages
- * in general, RO pages shouldn't hurt as much anyway since
- * they can be in shared cache state.
- *
- * FIXME! This checks "pmd_dirty()" as an approximation of
- * "is this a read-only page", since checking "pmd_write()"
- * is even more broken. We haven't actually turned this into
- * a writable page, so pmd_write() will always be false.
- */
- if (!pmd_dirty(pmd))
+ /* See similar comment in do_numa_page for explanation */
+ if (!(vma->vm_flags & VM_WRITE))
flags |= TNF_NO_GROUP;
/*
diff --git a/mm/memory.c b/mm/memory.c
index 411144f977b1..20beb6647dba 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3069,16 +3069,19 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
}
/*
- * Avoid grouping on DSO/COW pages in specific and RO pages
- * in general, RO pages shouldn't hurt as much anyway since
- * they can be in shared cache state.
+ * Avoid grouping on RO pages in general. RO pages shouldn't hurt as
+ * much anyway since they can be in shared cache state. This misses
+ * the case where a mapping is writable but the process never writes
+ * to it but pte_write gets cleared during protection updates and
+ * pte_dirty has unpredictable behaviour between PTE scan updates,
+ * background writeback, dirty balancing and application behaviour.
*
- * FIXME! This checks "pmd_dirty()" as an approximation of
- * "is this a read-only page", since checking "pmd_write()"
- * is even more broken. We haven't actually turned this into
- * a writable page, so pmd_write() will always be false.
+ * TODO: Note that the ideal here would be to avoid a situation where a
+ * NUMA fault is taken immediately followed by a write fault in
+ * some cases which would have lower overhead overall but would be
+ * invasive as the fault paths would need to be unified.
*/
- if (!pte_dirty(pte))
+ if (!(vma->vm_flags & VM_WRITE))
flags |= TNF_NO_GROUP;
/*