mm/hmm: heterogeneous memory management (HMM for short)

HMM provides 3 separate functionality : - Mirroring: synchronize CPU page table and device page table - Device memory: allocating struct page for device memory - Migration: migrating regular memory to device memory This patch introduces some common helpers and definitions to all of those 3 functionality. Signed-off-by: Jérôme Glisse <jglisse@redhat.com> Signed-off-by: Jatin Kumar <jakumar@nvidia.com> Signed-off-by: John Hubbard <jhubbard@nvidia.com> Signed-off-by: Mark Hairgrove <mhairgrove@nvidia.com> Signed-off-by: Sherry Cheung <SCheung@nvidia.com> Signed-off-by: Subhash Gutti <sgutti@nvidia.com>
author: Jérôme Glisse <jglisse@redhat.com> 2016-07-15 22:35:55 -0400
committer: Jérôme Glisse <jglisse@redhat.com> 2016-11-17 12:42:08 -0500
commit: 4fd5e67d91b33bf61e20ea00ffdb737465a4b7c7 (patch)
tree: 0ee5ba689e71c86f67b46b2a215a2bbe9e9eceeb /include
parent: 24ed99e4dc460c4a3c0f8f5cc591fe09d8fc4afa (diff)
2 files changed, 144 insertions, 0 deletions
diff --git a/include/linux/hmm.h b/include/linux/hmm.h
new file mode 100644
index 000000000000..54dd529b693d
--- /dev/null
+++ b/include/linux/hmm.h
@@ -0,0 +1,139 @@
+/*
+ * Copyright 2013 Red Hat Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Authors: Jérôme Glisse <jglisse@redhat.com>
+ */
+/*
+ * HMM provides 3 separate functionality :
+ *   - Mirroring: synchronize CPU page table and device page table
+ *   - Device memory: allocating struct page for device memory
+ *   - Migration: migrating regular memory to device memory
+ *
+ * Each can be use independently from the others.
+ *
+ *
+ * Mirroring:
+ *
+ * HMM provide helpers to mirror process address space on a device. For this it
+ * provides several helpers to order device page table update in respect to CPU
+ * page table update. Requirement is that for any given virtual address the CPU
+ * and device page table can not point to different physical page. It uses the
+ * mmu_notifier API and introduce virtual address range lock which block CPU
+ * page table update for a range while the device page table is being updated.
+ * Usage pattern is:
+ *
+ *      hmm_vma_range_lock(vma, start, end);
+ *      // snap shot CPU page table
+ *      // update device page table from snapshot
+ *      hmm_vma_range_unlock(vma, start, end);
+ *
+ * Any CPU page table update that conflict with a range lock will wait until
+ * range is unlock. This garanty proper serialization of CPU and device page
+ * table update.
+ *
+ *
+ * Device memory:
+ *
+ * HMM provides helpers to help leverage device memory either addressable like
+ * regular memory by the CPU or un-addressable at all. In both case the device
+ * memory is associated to dedicated structs page (which are allocated like for
+ * hotplug memory). Device memory management is under the responsability of the
+ * device driver. HMM only allocate and initialize the struct pages associated
+ * with the device memory.
+ *
+ * Allocating struct page for device memory allow to use device memory allmost
+ * like any regular memory. Unlike regular memory it can not be added to the
+ * lru, nor can any memory allocation can use device memory directly. Device
+ * memory will only end up to be use in a process if device driver migrate some
+ * of the process memory from regular memory to device memory.
+ *
+ *
+ * Migration:
+ *
+ * Existing memory migration mechanism (mm/migrate.c) does not allow to use
+ * something else than the CPU to copy from source to destination memory. More
+ * over existing code is not tailor to drive migration from process virtual
+ * address rather than from list of pages. Finaly the migration flow does not
+ * allow for graceful failure at different step of the migration process.
+ *
+ * HMM solves all of the above though simple API :
+ *
+ *      hmm_vma_migrate(vma, start, end, ops);
+ *
+ * With ops struct providing 2 callback alloc_and_copy() which allocated the
+ * destination memory and initialize it using source memory. Migration can fail
+ * after this step and thus last callback finalize_and_map() allow the device
+ * driver to know which page were successfully migrated and which were not.
+ *
+ * This can easily be use outside of HMM intended use case.
+ *
+ *
+ * This header file contain all the API related to this 3 functionality and
+ * each functions and struct are more thouroughly documented in below comments.
+ */
+#ifndef LINUX_HMM_H
+#define LINUX_HMM_H
+
+#include <linux/kconfig.h>
+
+#if IS_ENABLED(CONFIG_HMM)
+
+
+/*
+ * hmm_pfn_t - HMM use its own pfn type to keep several flags per page
+ *
+ * Flags:
+ * HMM_PFN_VALID: pfn is valid
+ * HMM_PFN_WRITE: CPU page table have the write permission set
+ */
+typedef unsigned long hmm_pfn_t;
+
+#define HMM_PFN_VALID (1 << 0)
+#define HMM_PFN_WRITE (1 << 1)
+#define HMM_PFN_SHIFT 2
+
+static inline struct page *hmm_pfn_to_page(hmm_pfn_t pfn)
+{
+	if (!(pfn & HMM_PFN_VALID))
+		return NULL;
+	return pfn_to_page(pfn >> HMM_PFN_SHIFT);
+}
+
+static inline unsigned long hmm_pfn_to_pfn(hmm_pfn_t pfn)
+{
+	if (!(pfn & HMM_PFN_VALID))
+		return -1UL;
+	return (pfn >> HMM_PFN_SHIFT);
+}
+
+static inline hmm_pfn_t hmm_pfn_from_page(struct page *page)
+{
+	return (page_to_pfn(page) << HMM_PFN_SHIFT) | HMM_PFN_VALID;
+}
+
+static inline hmm_pfn_t hmm_pfn_from_pfn(unsigned long pfn)
+{
+	return (pfn << HMM_PFN_SHIFT) | HMM_PFN_VALID;
+}
+
+
+/* Below are for HMM internal use only ! Not to be use by device driver ! */
+void hmm_mm_destroy(struct mm_struct *mm);
+
+#else /* IS_ENABLED(CONFIG_HMM) */
+
+/* Below are for HMM internal use only ! Not to be use by device driver ! */
+static inline void hmm_mm_destroy(struct mm_struct *mm) {}
+
+#endif /* IS_ENABLED(CONFIG_HMM) */
+#endif /* LINUX_HMM_H */
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 4a8acedf4b7d..4effdbf364cf 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -23,6 +23,7 @@
 
 struct address_space;
 struct mem_cgroup;
+struct hmm;
 
 #define USE_SPLIT_PTE_PTLOCKS	(NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS)
 #define USE_SPLIT_PMD_PTLOCKS	(USE_SPLIT_PTE_PTLOCKS && \
@@ -516,6 +517,10 @@ struct mm_struct {
 	atomic_long_t hugetlb_usage;
 #endif
 	struct work_struct async_put_work;
+#if IS_ENABLED(CONFIG_HMM)
+	/* HMM need to track few things per mm */
+	struct hmm *hmm;
+#endif
 };
 
 static inline void mm_init_cpumask(struct mm_struct *mm)
author	Jérôme Glisse <jglisse@redhat.com>	2016-07-15 22:35:55 -0400
committer	Jérôme Glisse <jglisse@redhat.com>	2016-11-17 12:42:08 -0500
commit	4fd5e67d91b33bf61e20ea00ffdb737465a4b7c7 (patch)
tree	0ee5ba689e71c86f67b46b2a215a2bbe9e9eceeb /include
parent	24ed99e4dc460c4a3c0f8f5cc591fe09d8fc4afa (diff)