summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-02-11 18:23:28 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2015-02-11 18:23:28 -0800
commit59d53737a8640482995fea13c6e2c0fd016115d6 (patch)
tree3423eb92315865d76cb8d488513bfef6ab9251d0 /include
parentd3f180ea1a44aecba1b0dab2a253428e77f906bf (diff)
parent8138a67a5557ffea3a21dfd6f037842d4e748513 (diff)
Merge branch 'akpm' (patches from Andrew)
Merge second set of updates from Andrew Morton: "More of MM" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (83 commits) mm/nommu.c: fix arithmetic overflow in __vm_enough_memory() mm/mmap.c: fix arithmetic overflow in __vm_enough_memory() vmstat: Reduce time interval to stat update on idle cpu mm/page_owner.c: remove unnecessary stack_trace field Documentation/filesystems/proc.txt: describe /proc/<pid>/map_files mm: incorporate read-only pages into transparent huge pages vmstat: do not use deferrable delayed work for vmstat_update mm: more aggressive page stealing for UNMOVABLE allocations mm: always steal split buddies in fallback allocations mm: when stealing freepages, also take pages created by splitting buddy page mincore: apply page table walker on do_mincore() mm: /proc/pid/clear_refs: avoid split_huge_page() mm: pagewalk: fix misbehavior of walk_page_range for vma(VM_PFNMAP) mempolicy: apply page table walker on queue_pages_range() arch/powerpc/mm/subpage-prot.c: use walk->vma and walk_page_vma() memcg: cleanup preparation for page table walk numa_maps: remove numa_maps->vma numa_maps: fix typo in gather_hugetbl_stats pagemap: use walk->vma instead of calling find_vma() clear_refs: remove clear_refs_private->vma and introduce clear_refs_test_walk() ...
Diffstat (limited to 'include')
-rw-r--r--include/asm-generic/4level-fixup.h1
-rw-r--r--include/linux/compaction.h86
-rw-r--r--include/linux/gfp.h12
-rw-r--r--include/linux/huge_mm.h12
-rw-r--r--include/linux/hugetlb.h8
-rw-r--r--include/linux/kvm_host.h11
-rw-r--r--include/linux/memcontrol.h50
-rw-r--r--include/linux/mm.h69
-rw-r--r--include/linux/mm_types.h11
-rw-r--r--include/linux/mmzone.h15
-rw-r--r--include/linux/oom.h18
-rw-r--r--include/linux/page_counter.h3
-rw-r--r--include/linux/page_ext.h2
-rw-r--r--include/linux/swap.h15
-rw-r--r--include/linux/swapops.h4
-rw-r--r--include/trace/events/compaction.h209
-rw-r--r--include/trace/events/kmem.h7
-rw-r--r--include/uapi/linux/kernel-page-flags.h1
18 files changed, 364 insertions, 170 deletions
diff --git a/include/asm-generic/4level-fixup.h b/include/asm-generic/4level-fixup.h
index 77ff547730af..5bdab6bffd23 100644
--- a/include/asm-generic/4level-fixup.h
+++ b/include/asm-generic/4level-fixup.h
@@ -4,6 +4,7 @@
#define __ARCH_HAS_4LEVEL_HACK
#define __PAGETABLE_PUD_FOLDED
+#define PUD_SHIFT PGDIR_SHIFT
#define PUD_SIZE PGDIR_SIZE
#define PUD_MASK PGDIR_MASK
#define PTRS_PER_PUD 1
diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 3238ffa33f68..a014559e4a49 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -12,6 +12,10 @@
#define COMPACT_PARTIAL 3
/* The full zone was compacted */
#define COMPACT_COMPLETE 4
+/* For more detailed tracepoint output */
+#define COMPACT_NO_SUITABLE_PAGE 5
+#define COMPACT_NOT_SUITABLE_ZONE 6
+/* When adding new state, please change compaction_status_string, too */
/* Used to signal whether compaction detected need_sched() or lock contention */
/* No contention detected */
@@ -21,6 +25,8 @@
/* Zone lock or lru_lock was contended in async compaction */
#define COMPACT_CONTENDED_LOCK 2
+struct alloc_context; /* in mm/internal.h */
+
#ifdef CONFIG_COMPACTION
extern int sysctl_compact_memory;
extern int sysctl_compaction_handler(struct ctl_table *table, int write,
@@ -30,81 +36,25 @@ extern int sysctl_extfrag_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *length, loff_t *ppos);
extern int fragmentation_index(struct zone *zone, unsigned int order);
-extern unsigned long try_to_compact_pages(struct zonelist *zonelist,
- int order, gfp_t gfp_mask, nodemask_t *mask,
- enum migrate_mode mode, int *contended,
- int alloc_flags, int classzone_idx);
+extern unsigned long try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
+ int alloc_flags, const struct alloc_context *ac,
+ enum migrate_mode mode, int *contended);
extern void compact_pgdat(pg_data_t *pgdat, int order);
extern void reset_isolation_suitable(pg_data_t *pgdat);
extern unsigned long compaction_suitable(struct zone *zone, int order,
int alloc_flags, int classzone_idx);
-/* Do not skip compaction more than 64 times */
-#define COMPACT_MAX_DEFER_SHIFT 6
-
-/*
- * Compaction is deferred when compaction fails to result in a page
- * allocation success. 1 << compact_defer_limit compactions are skipped up
- * to a limit of 1 << COMPACT_MAX_DEFER_SHIFT
- */
-static inline void defer_compaction(struct zone *zone, int order)
-{
- zone->compact_considered = 0;
- zone->compact_defer_shift++;
-
- if (order < zone->compact_order_failed)
- zone->compact_order_failed = order;
-
- if (zone->compact_defer_shift > COMPACT_MAX_DEFER_SHIFT)
- zone->compact_defer_shift = COMPACT_MAX_DEFER_SHIFT;
-}
-
-/* Returns true if compaction should be skipped this time */
-static inline bool compaction_deferred(struct zone *zone, int order)
-{
- unsigned long defer_limit = 1UL << zone->compact_defer_shift;
-
- if (order < zone->compact_order_failed)
- return false;
-
- /* Avoid possible overflow */
- if (++zone->compact_considered > defer_limit)
- zone->compact_considered = defer_limit;
-
- return zone->compact_considered < defer_limit;
-}
-
-/*
- * Update defer tracking counters after successful compaction of given order,
- * which means an allocation either succeeded (alloc_success == true) or is
- * expected to succeed.
- */
-static inline void compaction_defer_reset(struct zone *zone, int order,
- bool alloc_success)
-{
- if (alloc_success) {
- zone->compact_considered = 0;
- zone->compact_defer_shift = 0;
- }
- if (order >= zone->compact_order_failed)
- zone->compact_order_failed = order + 1;
-}
-
-/* Returns true if restarting compaction after many failures */
-static inline bool compaction_restarting(struct zone *zone, int order)
-{
- if (order < zone->compact_order_failed)
- return false;
-
- return zone->compact_defer_shift == COMPACT_MAX_DEFER_SHIFT &&
- zone->compact_considered >= 1UL << zone->compact_defer_shift;
-}
+extern void defer_compaction(struct zone *zone, int order);
+extern bool compaction_deferred(struct zone *zone, int order);
+extern void compaction_defer_reset(struct zone *zone, int order,
+ bool alloc_success);
+extern bool compaction_restarting(struct zone *zone, int order);
#else
-static inline unsigned long try_to_compact_pages(struct zonelist *zonelist,
- int order, gfp_t gfp_mask, nodemask_t *nodemask,
- enum migrate_mode mode, int *contended,
- int alloc_flags, int classzone_idx)
+static inline unsigned long try_to_compact_pages(gfp_t gfp_mask,
+ unsigned int order, int alloc_flags,
+ const struct alloc_context *ac,
+ enum migrate_mode mode, int *contended)
{
return COMPACT_CONTINUE;
}
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index b840e3b2770d..51bd1e72a917 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -334,18 +334,22 @@ alloc_pages(gfp_t gfp_mask, unsigned int order)
}
extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
struct vm_area_struct *vma, unsigned long addr,
- int node);
+ int node, bool hugepage);
+#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
+ alloc_pages_vma(gfp_mask, order, vma, addr, numa_node_id(), true)
#else
#define alloc_pages(gfp_mask, order) \
alloc_pages_node(numa_node_id(), gfp_mask, order)
-#define alloc_pages_vma(gfp_mask, order, vma, addr, node) \
+#define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\
+ alloc_pages(gfp_mask, order)
+#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
alloc_pages(gfp_mask, order)
#endif
#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
#define alloc_page_vma(gfp_mask, vma, addr) \
- alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id())
+ alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id(), false)
#define alloc_page_vma_node(gfp_mask, vma, addr, node) \
- alloc_pages_vma(gfp_mask, 0, vma, addr, node)
+ alloc_pages_vma(gfp_mask, 0, vma, addr, node, false)
extern struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order);
extern struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask,
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index ad9051bab267..f10b20f05159 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -157,6 +157,13 @@ static inline int hpage_nr_pages(struct page *page)
extern int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long addr, pmd_t pmd, pmd_t *pmdp);
+extern struct page *huge_zero_page;
+
+static inline bool is_huge_zero_page(struct page *page)
+{
+ return ACCESS_ONCE(huge_zero_page) == page;
+}
+
#else /* CONFIG_TRANSPARENT_HUGEPAGE */
#define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; })
#define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; })
@@ -206,6 +213,11 @@ static inline int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_str
return 0;
}
+static inline bool is_huge_zero_page(struct page *page)
+{
+ return false;
+}
+
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif /* _LINUX_HUGE_MM_H */
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 7d7856359920..7b5785032049 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -99,9 +99,9 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep);
struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
int write);
struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
- pmd_t *pmd, int write);
+ pmd_t *pmd, int flags);
struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address,
- pud_t *pud, int write);
+ pud_t *pud, int flags);
int pmd_huge(pmd_t pmd);
int pud_huge(pud_t pmd);
unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
@@ -133,8 +133,8 @@ static inline void hugetlb_report_meminfo(struct seq_file *m)
static inline void hugetlb_show_meminfo(void)
{
}
-#define follow_huge_pmd(mm, addr, pmd, write) NULL
-#define follow_huge_pud(mm, addr, pud, write) NULL
+#define follow_huge_pmd(mm, addr, pmd, flags) NULL
+#define follow_huge_pud(mm, addr, pud, flags) NULL
#define prepare_hugepage_range(file, addr, len) (-EINVAL)
#define pmd_huge(x) 0
#define pud_huge(x) 0
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 26f106022c88..d189ee098aa2 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -200,17 +200,6 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva,
int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu);
#endif
-/*
- * Carry out a gup that requires IO. Allow the mm to relinquish the mmap
- * semaphore if the filemap/swap has to wait on a page lock. pagep == NULL
- * controls whether we retry the gup one more time to completion in that case.
- * Typically this is called after a FAULT_FLAG_RETRY_NOWAIT in the main tdp
- * handler.
- */
-int kvm_get_user_page_io(struct task_struct *tsk, struct mm_struct *mm,
- unsigned long addr, bool write_fault,
- struct page **pagep);
-
enum {
OUTSIDE_GUEST_MODE,
IN_GUEST_MODE,
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index fb212e1d700d..6cfd934c7c9b 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -52,7 +52,27 @@ struct mem_cgroup_reclaim_cookie {
unsigned int generation;
};
+enum mem_cgroup_events_index {
+ MEM_CGROUP_EVENTS_PGPGIN, /* # of pages paged in */
+ MEM_CGROUP_EVENTS_PGPGOUT, /* # of pages paged out */
+ MEM_CGROUP_EVENTS_PGFAULT, /* # of page-faults */
+ MEM_CGROUP_EVENTS_PGMAJFAULT, /* # of major page-faults */
+ MEM_CGROUP_EVENTS_NSTATS,
+ /* default hierarchy events */
+ MEMCG_LOW = MEM_CGROUP_EVENTS_NSTATS,
+ MEMCG_HIGH,
+ MEMCG_MAX,
+ MEMCG_OOM,
+ MEMCG_NR_EVENTS,
+};
+
#ifdef CONFIG_MEMCG
+void mem_cgroup_events(struct mem_cgroup *memcg,
+ enum mem_cgroup_events_index idx,
+ unsigned int nr);
+
+bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg);
+
int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
gfp_t gfp_mask, struct mem_cgroup **memcgp);
void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg,
@@ -102,6 +122,7 @@ void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *);
* For memory reclaim.
*/
int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec);
+bool mem_cgroup_lruvec_online(struct lruvec *lruvec);
int mem_cgroup_select_victim_node(struct mem_cgroup *memcg);
unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list);
void mem_cgroup_update_lru_size(struct lruvec *, enum lru_list, int);
@@ -138,12 +159,10 @@ static inline bool mem_cgroup_disabled(void)
return false;
}
-struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page, bool *locked,
- unsigned long *flags);
-void mem_cgroup_end_page_stat(struct mem_cgroup *memcg, bool *locked,
- unsigned long *flags);
+struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page);
void mem_cgroup_update_page_stat(struct mem_cgroup *memcg,
enum mem_cgroup_stat_index idx, int val);
+void mem_cgroup_end_page_stat(struct mem_cgroup *memcg);
static inline void mem_cgroup_inc_page_stat(struct mem_cgroup *memcg,
enum mem_cgroup_stat_index idx)
@@ -176,6 +195,18 @@ void mem_cgroup_split_huge_fixup(struct page *head);
#else /* CONFIG_MEMCG */
struct mem_cgroup;
+static inline void mem_cgroup_events(struct mem_cgroup *memcg,
+ enum mem_cgroup_events_index idx,
+ unsigned int nr)
+{
+}
+
+static inline bool mem_cgroup_low(struct mem_cgroup *root,
+ struct mem_cgroup *memcg)
+{
+ return false;
+}
+
static inline int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
gfp_t gfp_mask,
struct mem_cgroup **memcgp)
@@ -268,6 +299,11 @@ mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec)
return 1;
}
+static inline bool mem_cgroup_lruvec_online(struct lruvec *lruvec)
+{
+ return true;
+}
+
static inline unsigned long
mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru)
{
@@ -285,14 +321,12 @@ mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
{
}
-static inline struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page,
- bool *locked, unsigned long *flags)
+static inline struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page)
{
return NULL;
}
-static inline void mem_cgroup_end_page_stat(struct mem_cgroup *memcg,
- bool *locked, unsigned long *flags)
+static inline void mem_cgroup_end_page_stat(struct mem_cgroup *memcg)
{
}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 65db4aee738a..a4d24f3c5430 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -484,7 +484,8 @@ static inline void page_mapcount_reset(struct page *page)
static inline int page_mapcount(struct page *page)
{
- return atomic_read(&(page)->_mapcount) + 1;
+ VM_BUG_ON_PAGE(PageSlab(page), page);
+ return atomic_read(&page->_mapcount) + 1;
}
static inline int page_count(struct page *page)
@@ -627,29 +628,28 @@ int split_free_page(struct page *page);
* prototype for that function and accessor functions.
* These are _only_ valid on the head of a PG_compound page.
*/
-typedef void compound_page_dtor(struct page *);
static inline void set_compound_page_dtor(struct page *page,
compound_page_dtor *dtor)
{
- page[1].lru.next = (void *)dtor;
+ page[1].compound_dtor = dtor;
}
static inline compound_page_dtor *get_compound_page_dtor(struct page *page)
{
- return (compound_page_dtor *)page[1].lru.next;
+ return page[1].compound_dtor;
}
static inline int compound_order(struct page *page)
{
if (!PageHead(page))
return 0;
- return (unsigned long)page[1].lru.prev;
+ return page[1].compound_order;
}
static inline void set_compound_order(struct page *page, unsigned long order)
{
- page[1].lru.prev = (void *)order;
+ page[1].compound_order = order;
}
#ifdef CONFIG_MMU
@@ -1164,8 +1164,6 @@ void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
/**
* mm_walk - callbacks for walk_page_range
- * @pgd_entry: if set, called for each non-empty PGD (top-level) entry
- * @pud_entry: if set, called for each non-empty PUD (2nd-level) entry
* @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry
* this handler is required to be able to handle
* pmd_trans_huge() pmds. They may simply choose to
@@ -1173,16 +1171,18 @@ void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
* @pte_entry: if set, called for each non-empty PTE (4th-level) entry
* @pte_hole: if set, called for each hole at all levels
* @hugetlb_entry: if set, called for each hugetlb entry
- * *Caution*: The caller must hold mmap_sem() if @hugetlb_entry
- * is used.
+ * @test_walk: caller specific callback function to determine whether
+ * we walk over the current vma or not. A positive returned
+ * value means "do page table walk over the current vma,"
+ * and a negative one means "abort current page table walk
+ * right now." 0 means "skip the current vma."
+ * @mm: mm_struct representing the target process of page table walk
+ * @vma: vma currently walked (NULL if walking outside vmas)
+ * @private: private data for callbacks' usage
*
- * (see walk_page_range for more details)
+ * (see the comment on walk_page_range() for more details)
*/
struct mm_walk {
- int (*pgd_entry)(pgd_t *pgd, unsigned long addr,
- unsigned long next, struct mm_walk *walk);
- int (*pud_entry)(pud_t *pud, unsigned long addr,
- unsigned long next, struct mm_walk *walk);
int (*pmd_entry)(pmd_t *pmd, unsigned long addr,
unsigned long next, struct mm_walk *walk);
int (*pte_entry)(pte_t *pte, unsigned long addr,
@@ -1192,12 +1192,16 @@ struct mm_walk {
int (*hugetlb_entry)(pte_t *pte, unsigned long hmask,
unsigned long addr, unsigned long next,
struct mm_walk *walk);
+ int (*test_walk)(unsigned long addr, unsigned long next,
+ struct mm_walk *walk);
struct mm_struct *mm;
+ struct vm_area_struct *vma;
void *private;
};
int walk_page_range(unsigned long addr, unsigned long end,
struct mm_walk *walk);
+int walk_page_vma(struct vm_area_struct *vma, struct mm_walk *walk);
void free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
unsigned long end, unsigned long floor, unsigned long ceiling);
int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
@@ -1261,6 +1265,17 @@ long get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
unsigned long start, unsigned long nr_pages,
int write, int force, struct page **pages,
struct vm_area_struct **vmas);
+long get_user_pages_locked(struct task_struct *tsk, struct mm_struct *mm,
+ unsigned long start, unsigned long nr_pages,
+ int write, int force, struct page **pages,
+ int *locked);
+long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
+ unsigned long start, unsigned long nr_pages,
+ int write, int force, struct page **pages,
+ unsigned int gup_flags);
+long get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
+ unsigned long start, unsigned long nr_pages,
+ int write, int force, struct page **pages);
int get_user_pages_fast(unsigned long start, int nr_pages, int write,
struct page **pages);
struct kvec;
@@ -1438,8 +1453,32 @@ static inline int __pmd_alloc(struct mm_struct *mm, pud_t *pud,
{
return 0;
}
+
+static inline unsigned long mm_nr_pmds(struct mm_struct *mm)
+{
+ return 0;
+}
+
+static inline void mm_inc_nr_pmds(struct mm_struct *mm) {}
+static inline void mm_dec_nr_pmds(struct mm_struct *mm) {}
+
#else
int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address);
+
+static inline unsigned long mm_nr_pmds(struct mm_struct *mm)
+{
+ return atomic_long_read(&mm->nr_pmds);
+}
+
+static inline void mm_inc_nr_pmds(struct mm_struct *mm)
+{
+ atomic_long_inc(&mm->nr_pmds);
+}
+
+static inline void mm_dec_nr_pmds(struct mm_struct *mm)
+{
+ atomic_long_dec(&mm->nr_pmds);
+}
#endif
int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 07c8bd3f7b48..199a03aab8dc 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -28,6 +28,8 @@ struct mem_cgroup;
IS_ENABLED(CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK))
#define ALLOC_SPLIT_PTLOCKS (SPINLOCK_SIZE > BITS_PER_LONG/8)
+typedef void compound_page_dtor(struct page *);
+
/*
* Each physical page in the system has a struct page associated with
* it to keep track of whatever it is we are using the page for at the
@@ -142,6 +144,12 @@ struct page {
struct rcu_head rcu_head; /* Used by SLAB
* when destroying via RCU
*/
+ /* First tail page of compound page */
+ struct {
+ compound_page_dtor *compound_dtor;
+ unsigned long compound_order;
+ };
+
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS
pgtable_t pmd_huge_pte; /* protected by page->ptl */
#endif
@@ -355,7 +363,8 @@ struct mm_struct {
pgd_t * pgd;
atomic_t mm_users; /* How many users with user space? */
atomic_t mm_count; /* How many references to "struct mm_struct" (users count as 1) */
- atomic_long_t nr_ptes; /* Page table pages */
+ atomic_long_t nr_ptes; /* PTE page table pages */
+ atomic_long_t nr_pmds; /* PMD page table pages */
int map_count; /* number of VMAs */
spinlock_t page_table_lock; /* Protects page tables and some counters */
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 2f0856d14b21..f279d9c158cd 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -426,7 +426,7 @@ struct zone {
const char *name;
/*
- * Number of MIGRATE_RESEVE page block. To maintain for just
+ * Number of MIGRATE_RESERVE page block. To maintain for just
* optimization. Protected by zone->lock.
*/
int nr_migrate_reserve_block;
@@ -970,7 +970,6 @@ static inline int zonelist_node_idx(struct zoneref *zoneref)
* @z - The cursor used as a starting point for the search
* @highest_zoneidx - The zone index of the highest zone to return
* @nodes - An optional nodemask to filter the zonelist with
- * @zone - The first suitable zone found is returned via this parameter
*
* This function returns the next zone at or below a given zone index that is
* within the allowed nodemask using a cursor as the starting point for the
@@ -980,8 +979,7 @@ static inline int zonelist_node_idx(struct zoneref *zoneref)
*/
struct zoneref *next_zones_zonelist(struct zoneref *z,
enum zone_type highest_zoneidx,
- nodemask_t *nodes,
- struct zone **zone);
+ nodemask_t *nodes);
/**
* first_zones_zonelist - Returns the first zone at or below highest_zoneidx within the allowed nodemask in a zonelist
@@ -1000,8 +998,10 @@ static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist,
nodemask_t *nodes,
struct zone **zone)
{
- return next_zones_zonelist(zonelist->_zonerefs, highest_zoneidx, nodes,
- zone);
+ struct zoneref *z = next_zones_zonelist(zonelist->_zonerefs,
+ highest_zoneidx, nodes);
+ *zone = zonelist_zone(z);
+ return z;
}
/**
@@ -1018,7 +1018,8 @@ static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist,
#define for_each_zone_zonelist_nodemask(zone, z, zlist, highidx, nodemask) \
for (z = first_zones_zonelist(zlist, highidx, nodemask, &zone); \
zone; \
- z = next_zones_zonelist(++z, highidx, nodemask, &zone)) \
+ z = next_zones_zonelist(++z, highidx, nodemask), \
+ zone = zonelist_zone(z)) \
/**
* for_each_zone_zonelist - helper macro to iterate over valid zones in a zonelist at or below a given zone index
diff --git a/include/linux/oom.h b/include/linux/oom.h
index 76200984d1e2..d5771bed59c9 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -47,6 +47,10 @@ static inline bool oom_task_origin(const struct task_struct *p)
return !!(p->signal->oom_flags & OOM_FLAG_ORIGIN);
}
+extern void mark_tsk_oom_victim(struct task_struct *tsk);
+
+extern void unmark_oom_victim(void);
+
extern unsigned long oom_badness(struct task_struct *p,
struct mem_cgroup *memcg, const nodemask_t *nodemask,
unsigned long totalpages);
@@ -68,22 +72,14 @@ extern enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
unsigned long totalpages, const nodemask_t *nodemask,
bool force_kill);
-extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
+extern bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
int order, nodemask_t *mask, bool force_kill);
extern int register_oom_notifier(struct notifier_block *nb);
extern int unregister_oom_notifier(struct notifier_block *nb);
extern bool oom_killer_disabled;
-
-static inline void oom_killer_disable(void)
-{
- oom_killer_disabled = true;
-}
-
-static inline void oom_killer_enable(void)
-{
- oom_killer_disabled = false;
-}
+extern bool oom_killer_disable(void);
+extern void oom_killer_enable(void);
extern struct task_struct *find_lock_task_mm(struct task_struct *p);
diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h
index 955421575d16..17fa4f8de3a6 100644
--- a/include/linux/page_counter.h
+++ b/include/linux/page_counter.h
@@ -41,7 +41,8 @@ int page_counter_try_charge(struct page_counter *counter,
struct page_counter **fail);
void page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages);
int page_counter_limit(struct page_counter *counter, unsigned long limit);
-int page_counter_memparse(const char *buf, unsigned long *nr_pages);
+int page_counter_memparse(const char *buf, const char *max,
+ unsigned long *nr_pages);
static inline void page_counter_reset_watermark(struct page_counter *counter)
{
diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h
index d2a2c84c72d0..c42981cd99aa 100644
--- a/include/linux/page_ext.h
+++ b/include/linux/page_ext.h
@@ -40,7 +40,7 @@ struct page_ext {
#ifdef CONFIG_PAGE_OWNER
unsigned int order;
gfp_t gfp_mask;
- struct stack_trace trace;
+ unsigned int nr_entries;
unsigned long trace_entries[8];
#endif
};
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 34e8b60ab973..7067eca501e2 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -437,16 +437,6 @@ extern int reuse_swap_page(struct page *);
extern int try_to_free_swap(struct page *);
struct backing_dev_info;
-#ifdef CONFIG_MEMCG
-extern void
-mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout);
-#else
-static inline void
-mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout)
-{
-}
-#endif
-
#else /* CONFIG_SWAP */
#define swap_address_space(entry) (NULL)
@@ -547,11 +537,6 @@ static inline swp_entry_t get_swap_page(void)
return entry;
}
-static inline void
-mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent)
-{
-}
-
#endif /* CONFIG_SWAP */
#endif /* __KERNEL__*/
#endif /* _LINUX_SWAP_H */
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 50cbc876be56..831a3168ab35 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -135,6 +135,8 @@ static inline void make_migration_entry_read(swp_entry_t *entry)
*entry = swp_entry(SWP_MIGRATION_READ, swp_offset(*entry));
}
+extern void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
+ spinlock_t *ptl);
extern void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
unsigned long address);
extern void migration_entry_wait_huge(struct vm_area_struct *vma,
@@ -148,6 +150,8 @@ static inline int is_migration_entry(swp_entry_t swp)
}
#define migration_entry_to_page(swp) NULL
static inline void make_migration_entry_read(swp_entry_t *entryp) { }
+static inline void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
+ spinlock_t *ptl) { }
static inline void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
unsigned long address) { }
static inline void migration_entry_wait_huge(struct vm_area_struct *vma,
diff --git a/include/trace/events/compaction.h b/include/trace/events/compaction.h
index c6814b917bdf..9a6a3fe0fb51 100644
--- a/include/trace/events/compaction.h
+++ b/include/trace/events/compaction.h
@@ -11,39 +11,55 @@
DECLARE_EVENT_CLASS(mm_compaction_isolate_template,
- TP_PROTO(unsigned long nr_scanned,
+ TP_PROTO(
+ unsigned long start_pfn,
+ unsigned long end_pfn,
+ unsigned long nr_scanned,
unsigned long nr_taken),
- TP_ARGS(nr_scanned, nr_taken),
+ TP_ARGS(start_pfn, end_pfn, nr_scanned, nr_taken),
TP_STRUCT__entry(
+ __field(unsigned long, start_pfn)
+ __field(unsigned long, end_pfn)
__field(unsigned long, nr_scanned)
__field(unsigned long, nr_taken)
),
TP_fast_assign(
+ __entry->start_pfn = start_pfn;
+ __entry->end_pfn = end_pfn;
__entry->nr_scanned = nr_scanned;
__entry->nr_taken = nr_taken;
),
- TP_printk("nr_scanned=%lu nr_taken=%lu",
+ TP_printk("range=(0x%lx ~ 0x%lx) nr_scanned=%lu nr_taken=%lu",
+ __entry->start_pfn,
+ __entry->end_pfn,
__entry->nr_scanned,
__entry->nr_taken)
);
DEFINE_EVENT(mm_compaction_isolate_template, mm_compaction_isolate_migratepages,
- TP_PROTO(unsigned long nr_scanned,
+ TP_PROTO(
+ unsigned long start_pfn,
+ unsigned long end_pfn,
+ unsigned long nr_scanned,
unsigned long nr_taken),
- TP_ARGS(nr_scanned, nr_taken)
+ TP_ARGS(start_pfn, end_pfn, nr_scanned, nr_taken)
);
DEFINE_EVENT(mm_compaction_isolate_template, mm_compaction_isolate_freepages,
- TP_PROTO(unsigned long nr_scanned,
+
+ TP_PROTO(
+ unsigned long start_pfn,
+ unsigned long end_pfn,
+ unsigned long nr_scanned,
unsigned long nr_taken),
- TP_ARGS(nr_scanned, nr_taken)
+ TP_ARGS(start_pfn, end_pfn, nr_scanned, nr_taken)
);
TRACE_EVENT(mm_compaction_migratepages,
@@ -85,47 +101,198 @@ TRACE_EVENT(mm_compaction_migratepages,
);
TRACE_EVENT(mm_compaction_begin,
- TP_PROTO(unsigned long zone_start, unsigned long migrate_start,
- unsigned long free_start, unsigned long zone_end),
+ TP_PROTO(unsigned long zone_start, unsigned long migrate_pfn,
+ unsigned long free_pfn, unsigned long zone_end, bool sync),
- TP_ARGS(zone_start, migrate_start, free_start, zone_end),
+ TP_ARGS(zone_start, migrate_pfn, free_pfn, zone_end, sync),
TP_STRUCT__entry(
__field(unsigned long, zone_start)
- __field(unsigned long, migrate_start)
- __field(unsigned long, free_start)
+ __field(unsigned long, migrate_pfn)
+ __field(unsigned long, free_pfn)
__field(unsigned long, zone_end)
+ __field(bool, sync)
),
TP_fast_assign(
__entry->zone_start = zone_start;
- __entry->migrate_start = migrate_start;
- __entry->free_start = free_start;
+ __entry->migrate_pfn = migrate_pfn;
+ __entry->free_pfn = free_pfn;
__entry->zone_end = zone_end;
+ __entry->sync = sync;
),
- TP_printk("zone_start=%lu migrate_start=%lu free_start=%lu zone_end=%lu",
+ TP_printk("zone_start=0x%lx migrate_pfn=0x%lx free_pfn=0x%lx zone_end=0x%lx, mode=%s",
__entry->zone_start,
- __entry->migrate_start,
- __entry->free_start,
- __entry->zone_end)
+ __entry->migrate_pfn,
+ __entry->free_pfn,
+ __entry->zone_end,
+ __entry->sync ? "sync" : "async")
);
TRACE_EVENT(mm_compaction_end,
- TP_PROTO(int status),
+ TP_PROTO(unsigned long zone_start, unsigned long migrate_pfn,
+ unsigned long free_pfn, unsigned long zone_end, bool sync,
+ int status),
- TP_ARGS(status),
+ TP_ARGS(zone_start, migrate_pfn, free_pfn, zone_end, sync, status),
TP_STRUCT__entry(
+ __field(unsigned long, zone_start)
+ __field(unsigned long, migrate_pfn)
+ __field(unsigned long, free_pfn)
+ __field(unsigned long, zone_end)
+ __field(bool, sync)
__field(int, status)
),
TP_fast_assign(
+ __entry->zone_start = zone_start;
+ __entry->migrate_pfn = migrate_pfn;
+ __entry->free_pfn = free_pfn;
+ __entry->zone_end = zone_end;
+ __entry->sync = sync;
__entry->status = status;
),
- TP_printk("status=%d", __entry->status)
+ TP_printk("zone_start=0x%lx migrate_pfn=0x%lx free_pfn=0x%lx zone_end=0x%lx, mode=%s status=%s",
+ __entry->zone_start,
+ __entry->migrate_pfn,
+ __entry->free_pfn,
+ __entry->zone_end,
+ __entry->sync ? "sync" : "async",
+ compaction_status_string[__entry->status])
+);
+
+TRACE_EVENT(mm_compaction_try_to_compact_pages,
+
+ TP_PROTO(
+ int order,
+ gfp_t gfp_mask,
+ enum migrate_mode mode),
+
+ TP_ARGS(order, gfp_mask, mode),
+
+ TP_STRUCT__entry(
+ __field(int, order)
+ __field(gfp_t, gfp_mask)
+ __field(enum migrate_mode, mode)
+ ),
+
+ TP_fast_assign(
+ __entry->order = order;
+ __entry->gfp_mask = gfp_mask;
+ __entry->mode = mode;
+ ),
+
+ TP_printk("order=%d gfp_mask=0x%x mode=%d",
+ __entry->order,
+ __entry->gfp_mask,
+ (int)__entry->mode)
+);
+
+DECLARE_EVENT_CLASS(mm_compaction_suitable_template,
+
+ TP_PROTO(struct zone *zone,
+ int order,
+ int ret),
+
+ TP_ARGS(zone, order, ret),
+
+ TP_STRUCT__entry(
+ __field(int, nid)
+ __field(char *, name)
+ __field(int, order)
+ __field(int, ret)
+ ),
+
+ TP_fast_assign(
+ __entry->nid = zone_to_nid(zone);
+ __entry->name = (char *)zone->name;
+ __entry->order = order;
+ __entry->ret = ret;
+ ),
+
+ TP_printk("node=%d zone=%-8s order=%d ret=%s",
+ __entry->nid,
+ __entry->name,
+ __entry->order,
+ compaction_status_string[__entry->ret])
+);
+
+DEFINE_EVENT(mm_compaction_suitable_template, mm_compaction_finished,
+
+ TP_PROTO(struct zone *zone,
+ int order,
+ int ret),
+
+ TP_ARGS(zone, order, ret)
+);
+
+DEFINE_EVENT(mm_compaction_suitable_template, mm_compaction_suitable,
+
+ TP_PROTO(struct zone *zone,
+ int order,
+ int ret),
+
+ TP_ARGS(zone, order, ret)
+);
+
+#ifdef CONFIG_COMPACTION
+DECLARE_EVENT_CLASS(mm_compaction_defer_template,
+
+ TP_PROTO(struct zone *zone, int order),
+
+ TP_ARGS(zone, order),
+
+ TP_STRUCT__entry(
+ __field(int, nid)
+ __field(char *, name)
+ __field(int, order)
+ __field(unsigned int, considered)
+ __field(unsigned int, defer_shift)
+ __field(int, order_failed)
+ ),
+
+ TP_fast_assign(
+ __entry->nid = zone_to_nid(zone);
+ __entry->name = (char *)zone->name;
+ __entry->order = order;
+ __entry->considered = zone->compact_considered;
+ __entry->defer_shift = zone->compact_defer_shift;
+ __entry->order_failed = zone->compact_order_failed;
+ ),
+
+ TP_printk("node=%d zone=%-8s order=%d order_failed=%d consider=%u limit=%lu",
+ __entry->nid,
+ __entry->name,
+ __entry->order,
+ __entry->order_failed,
+ __entry->considered,
+ 1UL << __entry->defer_shift)
+);
+
+DEFINE_EVENT(mm_compaction_defer_template, mm_compaction_deferred,
+
+ TP_PROTO(struct zone *zone, int order),
+
+ TP_ARGS(zone, order)
+);
+
+DEFINE_EVENT(mm_compaction_defer_template, mm_compaction_defer_compaction,
+
+ TP_PROTO(struct zone *zone, int order),
+
+ TP_ARGS(zone, order)
+);
+
+DEFINE_EVENT(mm_compaction_defer_template, mm_compaction_defer_reset,
+
+ TP_PROTO(struct zone *zone, int order),
+
+ TP_ARGS(zone, order)
);
+#endif
#endif /* _TRACE_COMPACTION_H */
diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index aece1346ceb7..4ad10baecd4d 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -268,11 +268,11 @@ TRACE_EVENT(mm_page_alloc_extfrag,
TP_PROTO(struct page *page,
int alloc_order, int fallback_order,
- int alloc_migratetype, int fallback_migratetype, int new_migratetype),
+ int alloc_migratetype, int fallback_migratetype),
TP_ARGS(page,
alloc_order, fallback_order,
- alloc_migratetype, fallback_migratetype, new_migratetype),
+ alloc_migratetype, fallback_migratetype),
TP_STRUCT__entry(
__field( struct page *, page )
@@ -289,7 +289,8 @@ TRACE_EVENT(mm_page_alloc_extfrag,
__entry->fallback_order = fallback_order;
__entry->alloc_migratetype = alloc_migratetype;
__entry->fallback_migratetype = fallback_migratetype;
- __entry->change_ownership = (new_migratetype == alloc_migratetype);
+ __entry->change_ownership = (alloc_migratetype ==
+ get_pageblock_migratetype(page));
),
TP_printk("page=%p pfn=%lu alloc_order=%d fallback_order=%d pageblock_order=%d alloc_migratetype=%d fallback_migratetype=%d fragmenting=%d change_ownership=%d",
diff --git a/include/uapi/linux/kernel-page-flags.h b/include/uapi/linux/kernel-page-flags.h
index 2f96d233c980..a6c4962e5d46 100644
--- a/include/uapi/linux/kernel-page-flags.h
+++ b/include/uapi/linux/kernel-page-flags.h
@@ -32,6 +32,7 @@
#define KPF_KSM 21
#define KPF_THP 22
#define KPF_BALLOON 23
+#define KPF_ZERO_PAGE 24
#endif /* _UAPILINUX_KERNEL_PAGE_FLAGS_H */