summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-10-09 16:23:15 +0900
committerLinus Torvalds <torvalds@linux-foundation.org>2012-10-09 16:23:15 +0900
commit9e2d8656f5e8aa214e66b462680cf86b210b74a8 (patch)
treef67d62e896cedf75599ea45f9ecf9999c6ad24cd /include
parent1ea4f4f8405cc1ceec23f2d261bc3775785e6712 (diff)
parent9e695d2ecc8451cc2c1603d60b5c8e7f5581923a (diff)
Merge branch 'akpm' (Andrew's patch-bomb)
Merge patches from Andrew Morton: "A few misc things and very nearly all of the MM tree. A tremendous amount of stuff (again), including a significant rbtree library rework." * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (160 commits) sparc64: Support transparent huge pages. mm: thp: Use more portable PMD clearing sequenece in zap_huge_pmd(). mm: Add and use update_mmu_cache_pmd() in transparent huge page code. sparc64: Document PGD and PMD layout. sparc64: Eliminate PTE table memory wastage. sparc64: Halve the size of PTE tables sparc64: Only support 4MB huge pages and 8KB base pages. memory-hotplug: suppress "Trying to free nonexistent resource <XXXXXXXXXXXXXXXX-YYYYYYYYYYYYYYYY>" warning mm: memcg: clean up mm_match_cgroup() signature mm: document PageHuge somewhat mm: use %pK for /proc/vmallocinfo mm, thp: fix mlock statistics mm, thp: fix mapped pages avoiding unevictable list on mlock memory-hotplug: update memory block's state and notify userspace memory-hotplug: preparation to notify memory block's state at memory hot remove mm: avoid section mismatch warning for memblock_type_name make GFP_NOTRACK definition unconditional cma: decrease cc.nr_migratepages after reclaiming pagelist CMA: migrate mlocked pages kpageflags: fix wrong KPF_THP on non-huge compound pages ...
Diffstat (limited to 'include')
-rw-r--r--include/asm-generic/pgtable.h72
-rw-r--r--include/linux/atomic.h25
-rw-r--r--include/linux/compaction.h19
-rw-r--r--include/linux/fs.h8
-rw-r--r--include/linux/gfp.h9
-rw-r--r--include/linux/huge_mm.h3
-rw-r--r--include/linux/interval_tree.h27
-rw-r--r--include/linux/interval_tree_generic.h191
-rw-r--r--include/linux/memblock.h3
-rw-r--r--include/linux/memcontrol.h14
-rw-r--r--include/linux/memory_hotplug.h3
-rw-r--r--include/linux/mempolicy.h4
-rw-r--r--include/linux/mm.h140
-rw-r--r--include/linux/mm_types.h16
-rw-r--r--include/linux/mman.h1
-rw-r--r--include/linux/mmu_notifier.h60
-rw-r--r--include/linux/mmzone.h10
-rw-r--r--include/linux/oom.h11
-rw-r--r--include/linux/page-isolation.h7
-rw-r--r--include/linux/pageblock-flags.h19
-rw-r--r--include/linux/prio_tree.h120
-rw-r--r--include/linux/rbtree.h119
-rw-r--r--include/linux/rbtree_augmented.h223
-rw-r--r--include/linux/rmap.h36
-rw-r--r--include/linux/sched.h1
-rw-r--r--include/linux/swap.h2
-rw-r--r--include/linux/timerqueue.h2
-rw-r--r--include/linux/vm_event_item.h1
-rw-r--r--include/linux/vmstat.h12
-rw-r--r--include/trace/events/gfpflags.h1
30 files changed, 730 insertions, 429 deletions
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index ff4947b7a97..b36ce40bd1c 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -87,7 +87,7 @@ static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm,
pmd_t *pmdp)
{
pmd_t pmd = *pmdp;
- pmd_clear(mm, address, pmdp);
+ pmd_clear(pmdp);
return pmd;
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
@@ -162,6 +162,19 @@ extern void pmdp_splitting_flush(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp);
#endif
+#ifndef __HAVE_ARCH_PGTABLE_DEPOSIT
+extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable);
+#endif
+
+#ifndef __HAVE_ARCH_PGTABLE_WITHDRAW
+extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm);
+#endif
+
+#ifndef __HAVE_ARCH_PMDP_INVALIDATE
+extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
+ pmd_t *pmdp);
+#endif
+
#ifndef __HAVE_ARCH_PTE_SAME
static inline int pte_same(pte_t pte_a, pte_t pte_b)
{
@@ -381,48 +394,59 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm,
#ifndef __HAVE_PFNMAP_TRACKING
/*
- * Interface that can be used by architecture code to keep track of
- * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn)
- *
- * track_pfn_vma_new is called when a _new_ pfn mapping is being established
- * for physical range indicated by pfn and size.
+ * Interfaces that can be used by architecture code to keep track of
+ * memory type of pfn mappings specified by the remap_pfn_range,
+ * vm_insert_pfn.
+ */
+
+/*
+ * track_pfn_remap is called when a _new_ pfn mapping is being established
+ * by remap_pfn_range() for physical range indicated by pfn and size.
*/
-static inline int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot,
- unsigned long pfn, unsigned long size)
+static inline int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
+ unsigned long pfn, unsigned long addr,
+ unsigned long size)
{
return 0;
}
/*
- * Interface that can be used by architecture code to keep track of
- * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn)
- *
- * track_pfn_vma_copy is called when vma that is covering the pfnmap gets
+ * track_pfn_insert is called when a _new_ single pfn is established
+ * by vm_insert_pfn().
+ */
+static inline int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
+ unsigned long pfn)
+{
+ return 0;
+}
+
+/*
+ * track_pfn_copy is called when vma that is covering the pfnmap gets
* copied through copy_page_range().
*/
-static inline int track_pfn_vma_copy(struct vm_area_struct *vma)
+static inline int track_pfn_copy(struct vm_area_struct *vma)
{
return 0;
}
/*
- * Interface that can be used by architecture code to keep track of
- * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn)
- *
* untrack_pfn_vma is called while unmapping a pfnmap for a region.
* untrack can be called for a specific region indicated by pfn and size or
- * can be for the entire vma (in which case size can be zero).
+ * can be for the entire vma (in which case pfn, size are zero).
*/
-static inline void untrack_pfn_vma(struct vm_area_struct *vma,
- unsigned long pfn, unsigned long size)
+static inline void untrack_pfn(struct vm_area_struct *vma,
+ unsigned long pfn, unsigned long size)
{
}
#else
-extern int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot,
- unsigned long pfn, unsigned long size);
-extern int track_pfn_vma_copy(struct vm_area_struct *vma);
-extern void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn,
- unsigned long size);
+extern int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
+ unsigned long pfn, unsigned long addr,
+ unsigned long size);
+extern int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
+ unsigned long pfn);
+extern int track_pfn_copy(struct vm_area_struct *vma);
+extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
+ unsigned long size);
#endif
#ifdef CONFIG_MMU
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 70cfcb2d63c..5b08a8540ec 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -86,6 +86,31 @@ static inline int atomic_dec_unless_positive(atomic_t *p)
}
#endif
+/*
+ * atomic_dec_if_positive - decrement by 1 if old value positive
+ * @v: pointer of type atomic_t
+ *
+ * The function returns the old value of *v minus 1, even if
+ * the atomic variable, v, was not decremented.
+ */
+#ifndef atomic_dec_if_positive
+static inline int atomic_dec_if_positive(atomic_t *v)
+{
+ int c, old, dec;
+ c = atomic_read(v);
+ for (;;) {
+ dec = c - 1;
+ if (unlikely(dec < 0))
+ break;
+ old = atomic_cmpxchg((v), c, dec);
+ if (likely(old == c))
+ break;
+ c = old;
+ }
+ return dec;
+}
+#endif
+
#ifndef CONFIG_ARCH_HAS_ATOMIC_OR
static inline void atomic_or(int i, atomic_t *v)
{
diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index ef658147e4e..6ecb6dc2f30 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -22,8 +22,9 @@ extern int sysctl_extfrag_handler(struct ctl_table *table, int write,
extern int fragmentation_index(struct zone *zone, unsigned int order);
extern unsigned long try_to_compact_pages(struct zonelist *zonelist,
int order, gfp_t gfp_mask, nodemask_t *mask,
- bool sync, bool *contended);
+ bool sync, bool *contended, struct page **page);
extern int compact_pgdat(pg_data_t *pgdat, int order);
+extern void reset_isolation_suitable(pg_data_t *pgdat);
extern unsigned long compaction_suitable(struct zone *zone, int order);
/* Do not skip compaction more than 64 times */
@@ -61,10 +62,20 @@ static inline bool compaction_deferred(struct zone *zone, int order)
return zone->compact_considered < defer_limit;
}
+/* Returns true if restarting compaction after many failures */
+static inline bool compaction_restarting(struct zone *zone, int order)
+{
+ if (order < zone->compact_order_failed)
+ return false;
+
+ return zone->compact_defer_shift == COMPACT_MAX_DEFER_SHIFT &&
+ zone->compact_considered >= 1UL << zone->compact_defer_shift;
+}
+
#else
static inline unsigned long try_to_compact_pages(struct zonelist *zonelist,
int order, gfp_t gfp_mask, nodemask_t *nodemask,
- bool sync, bool *contended)
+ bool sync, bool *contended, struct page **page)
{
return COMPACT_CONTINUE;
}
@@ -74,6 +85,10 @@ static inline int compact_pgdat(pg_data_t *pgdat, int order)
return COMPACT_CONTINUE;
}
+static inline void reset_isolation_suitable(pg_data_t *pgdat)
+{
+}
+
static inline unsigned long compaction_suitable(struct zone *zone, int order)
{
return COMPACT_SKIPPED;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ca6d8c806f4..c617ed024df 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -401,7 +401,7 @@ struct inodes_stat_t {
#include <linux/cache.h>
#include <linux/list.h>
#include <linux/radix-tree.h>
-#include <linux/prio_tree.h>
+#include <linux/rbtree.h>
#include <linux/init.h>
#include <linux/pid.h>
#include <linux/bug.h>
@@ -669,7 +669,7 @@ struct address_space {
struct radix_tree_root page_tree; /* radix tree of all pages */
spinlock_t tree_lock; /* and lock protecting it */
unsigned int i_mmap_writable;/* count VM_SHARED mappings */
- struct prio_tree_root i_mmap; /* tree of private and shared mappings */
+ struct rb_root i_mmap; /* tree of private and shared mappings */
struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */
struct mutex i_mmap_mutex; /* protect tree, count, list */
/* Protected by tree_lock together with the radix tree */
@@ -741,7 +741,7 @@ int mapping_tagged(struct address_space *mapping, int tag);
*/
static inline int mapping_mapped(struct address_space *mapping)
{
- return !prio_tree_empty(&mapping->i_mmap) ||
+ return !RB_EMPTY_ROOT(&mapping->i_mmap) ||
!list_empty(&mapping->i_mmap_nonlinear);
}
@@ -2552,6 +2552,8 @@ extern int sb_min_blocksize(struct super_block *, int);
extern int generic_file_mmap(struct file *, struct vm_area_struct *);
extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
+extern int generic_file_remap_pages(struct vm_area_struct *, unsigned long addr,
+ unsigned long size, pgoff_t pgoff);
extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size);
int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk);
extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t);
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 4883f393f50..02c1c9710be 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -30,12 +30,7 @@ struct vm_area_struct;
#define ___GFP_HARDWALL 0x20000u
#define ___GFP_THISNODE 0x40000u
#define ___GFP_RECLAIMABLE 0x80000u
-#ifdef CONFIG_KMEMCHECK
#define ___GFP_NOTRACK 0x200000u
-#else
-#define ___GFP_NOTRACK 0
-#endif
-#define ___GFP_NO_KSWAPD 0x400000u
#define ___GFP_OTHER_NODE 0x800000u
#define ___GFP_WRITE 0x1000000u
@@ -90,7 +85,6 @@ struct vm_area_struct;
#define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) /* Page is reclaimable */
#define __GFP_NOTRACK ((__force gfp_t)___GFP_NOTRACK) /* Don't track with kmemcheck */
-#define __GFP_NO_KSWAPD ((__force gfp_t)___GFP_NO_KSWAPD)
#define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */
#define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) /* Allocator intends to dirty page */
@@ -120,8 +114,7 @@ struct vm_area_struct;
__GFP_MOVABLE)
#define GFP_IOFS (__GFP_IO | __GFP_FS)
#define GFP_TRANSHUGE (GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
- __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | \
- __GFP_NO_KSWAPD)
+ __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN)
#ifdef CONFIG_NUMA
#define GFP_THISNODE (__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY)
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 4c59b113118..b31cb7da034 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -11,8 +11,7 @@ extern int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
extern int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pmd_t *pmd,
pmd_t orig_pmd);
-extern pgtable_t get_pmd_huge_pte(struct mm_struct *mm);
-extern struct page *follow_trans_huge_pmd(struct mm_struct *mm,
+extern struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
unsigned long addr,
pmd_t *pmd,
unsigned int flags);
diff --git a/include/linux/interval_tree.h b/include/linux/interval_tree.h
new file mode 100644
index 00000000000..724556aa3c9
--- /dev/null
+++ b/include/linux/interval_tree.h
@@ -0,0 +1,27 @@
+#ifndef _LINUX_INTERVAL_TREE_H
+#define _LINUX_INTERVAL_TREE_H
+
+#include <linux/rbtree.h>
+
+struct interval_tree_node {
+ struct rb_node rb;
+ unsigned long start; /* Start of interval */
+ unsigned long last; /* Last location _in_ interval */
+ unsigned long __subtree_last;
+};
+
+extern void
+interval_tree_insert(struct interval_tree_node *node, struct rb_root *root);
+
+extern void
+interval_tree_remove(struct interval_tree_node *node, struct rb_root *root);
+
+extern struct interval_tree_node *
+interval_tree_iter_first(struct rb_root *root,
+ unsigned long start, unsigned long last);
+
+extern struct interval_tree_node *
+interval_tree_iter_next(struct interval_tree_node *node,
+ unsigned long start, unsigned long last);
+
+#endif /* _LINUX_INTERVAL_TREE_H */
diff --git a/include/linux/interval_tree_generic.h b/include/linux/interval_tree_generic.h
new file mode 100644
index 00000000000..58370e1862a
--- /dev/null
+++ b/include/linux/interval_tree_generic.h
@@ -0,0 +1,191 @@
+/*
+ Interval Trees
+ (C) 2012 Michel Lespinasse <walken@google.com>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+ include/linux/interval_tree_generic.h
+*/
+
+#include <linux/rbtree_augmented.h>
+
+/*
+ * Template for implementing interval trees
+ *
+ * ITSTRUCT: struct type of the interval tree nodes
+ * ITRB: name of struct rb_node field within ITSTRUCT
+ * ITTYPE: type of the interval endpoints
+ * ITSUBTREE: name of ITTYPE field within ITSTRUCT holding last-in-subtree
+ * ITSTART(n): start endpoint of ITSTRUCT node n
+ * ITLAST(n): last endpoint of ITSTRUCT node n
+ * ITSTATIC: 'static' or empty
+ * ITPREFIX: prefix to use for the inline tree definitions
+ *
+ * Note - before using this, please consider if non-generic version
+ * (interval_tree.h) would work for you...
+ */
+
+#define INTERVAL_TREE_DEFINE(ITSTRUCT, ITRB, ITTYPE, ITSUBTREE, \
+ ITSTART, ITLAST, ITSTATIC, ITPREFIX) \
+ \
+/* Callbacks for augmented rbtree insert and remove */ \
+ \
+static inline ITTYPE ITPREFIX ## _compute_subtree_last(ITSTRUCT *node) \
+{ \
+ ITTYPE max = ITLAST(node), subtree_last; \
+ if (node->ITRB.rb_left) { \
+ subtree_last = rb_entry(node->ITRB.rb_left, \
+ ITSTRUCT, ITRB)->ITSUBTREE; \
+ if (max < subtree_last) \
+ max = subtree_last; \
+ } \
+ if (node->ITRB.rb_right) { \
+ subtree_last = rb_entry(node->ITRB.rb_right, \
+ ITSTRUCT, ITRB)->ITSUBTREE; \
+ if (max < subtree_last) \
+ max = subtree_last; \
+ } \
+ return max; \
+} \
+ \
+RB_DECLARE_CALLBACKS(static, ITPREFIX ## _augment, ITSTRUCT, ITRB, \
+ ITTYPE, ITSUBTREE, ITPREFIX ## _compute_subtree_last) \
+ \
+/* Insert / remove interval nodes from the tree */ \
+ \
+ITSTATIC void ITPREFIX ## _insert(ITSTRUCT *node, struct rb_root *root) \
+{ \
+ struct rb_node **link = &root->rb_node, *rb_parent = NULL; \
+ ITTYPE start = ITSTART(node), last = ITLAST(node); \
+ ITSTRUCT *parent; \
+ \
+ while (*link) { \
+ rb_parent = *link; \
+ parent = rb_entry(rb_parent, ITSTRUCT, ITRB); \
+ if (parent->ITSUBTREE < last) \
+ parent->ITSUBTREE = last; \
+ if (start < ITSTART(parent)) \
+ link = &parent->ITRB.rb_left; \
+ else \
+ link = &parent->ITRB.rb_right; \
+ } \
+ \
+ node->ITSUBTREE = last; \
+ rb_link_node(&node->ITRB, rb_parent, link); \
+ rb_insert_augmented(&node->ITRB, root, &ITPREFIX ## _augment); \
+} \
+ \
+ITSTATIC void ITPREFIX ## _remove(ITSTRUCT *node, struct rb_root *root) \
+{ \
+ rb_erase_augmented(&node->ITRB, root, &ITPREFIX ## _augment); \
+} \
+ \
+/* \
+ * Iterate over intervals intersecting [start;last] \
+ * \
+ * Note that a node's interval intersects [start;last] iff: \
+ * Cond1: ITSTART(node) <= last \
+ * and \
+ * Cond2: start <= ITLAST(node) \
+ */ \
+ \
+static ITSTRUCT * \
+ITPREFIX ## _subtree_search(ITSTRUCT *node, ITTYPE start, ITTYPE last) \
+{ \
+ while (true) { \
+ /* \
+ * Loop invariant: start <= node->ITSUBTREE \
+ * (Cond2 is satisfied by one of the subtree nodes) \
+ */ \
+ if (node->ITRB.rb_left) { \
+ ITSTRUCT *left = rb_entry(node->ITRB.rb_left, \
+ ITSTRUCT, ITRB); \
+ if (start <= left->ITSUBTREE) { \
+ /* \
+ * Some nodes in left subtree satisfy Cond2. \
+ * Iterate to find the leftmost such node N. \
+ * If it also satisfies Cond1, that's the \
+ * match we are looking for. Otherwise, there \
+ * is no matching interval as nodes to the \
+ * right of N can't satisfy Cond1 either. \
+ */ \
+ node = left; \
+ continue; \
+ } \
+ } \
+ if (ITSTART(node) <= last) { /* Cond1 */ \
+ if (start <= ITLAST(node)) /* Cond2 */ \
+ return node; /* node is leftmost match */ \
+ if (node->ITRB.rb_right) { \
+ node = rb_entry(node->ITRB.rb_right, \
+ ITSTRUCT, ITRB); \
+ if (start <= node->ITSUBTREE) \
+ continue; \
+ } \
+ } \
+ return NULL; /* No match */ \
+ } \
+} \
+ \
+ITSTATIC ITSTRUCT * \
+ITPREFIX ## _iter_first(struct rb_root *root, ITTYPE start, ITTYPE last) \
+{ \
+ ITSTRUCT *node; \
+ \
+ if (!root->rb_node) \
+ return NULL; \
+ node = rb_entry(root->rb_node, ITSTRUCT, ITRB); \
+ if (node->ITSUBTREE < start) \
+ return NULL; \
+ return ITPREFIX ## _subtree_search(node, start, last); \
+} \
+ \
+ITSTATIC ITSTRUCT * \
+ITPREFIX ## _iter_next(ITSTRUCT *node, ITTYPE start, ITTYPE last) \
+{ \
+ struct rb_node *rb = node->ITRB.rb_right, *prev; \
+ \
+ while (true) { \
+ /* \
+ * Loop invariants: \
+ * Cond1: ITSTART(node) <= last \
+ * rb == node->ITRB.rb_right \
+ * \
+ * First, search right subtree if suitable \
+ */ \
+ if (rb) { \
+ ITSTRUCT *right = rb_entry(rb, ITSTRUCT, ITRB); \
+ if (start <= right->ITSUBTREE) \
+ return ITPREFIX ## _subtree_search(right, \
+ start, last); \
+ } \
+ \
+ /* Move up the tree until we come from a node's left child */ \
+ do { \
+ rb = rb_parent(&node->ITRB); \
+ if (!rb) \
+ return NULL; \
+ prev = &node->ITRB; \
+ node = rb_entry(rb, ITSTRUCT, ITRB); \
+ rb = node->ITRB.rb_right; \
+ } while (prev == rb); \
+ \
+ /* Check if the node intersects [start;last] */ \
+ if (last < ITSTART(node)) /* !Cond1 */ \
+ return NULL; \
+ else if (start <= ITLAST(node)) /* Cond2 */ \
+ return node; \
+ } \
+}
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 19dc455b4f3..569d67d4243 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -70,8 +70,7 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
* @p_end: ptr to ulong for end pfn of the range, can be %NULL
* @p_nid: ptr to int for nid of the range, can be %NULL
*
- * Walks over configured memory ranges. Available after early_node_map is
- * populated.
+ * Walks over configured memory ranges.
*/
#define for_each_mem_pfn_range(i, nid, p_start, p_end, p_nid) \
for (i = -1, __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid); \
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 8d9489fdab2..fd0e6d53836 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -84,14 +84,14 @@ extern struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg);
extern struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont);
static inline
-int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup)
+bool mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *memcg)
{
- struct mem_cgroup *memcg;
- int match;
+ struct mem_cgroup *task_memcg;
+ bool match;
rcu_read_lock();
- memcg = mem_cgroup_from_task(rcu_dereference((mm)->owner));
- match = __mem_cgroup_same_or_subtree(cgroup, memcg);
+ task_memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
+ match = __mem_cgroup_same_or_subtree(memcg, task_memcg);
rcu_read_unlock();
return match;
}
@@ -258,10 +258,10 @@ static inline struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm
return NULL;
}
-static inline int mm_match_cgroup(struct mm_struct *mm,
+static inline bool mm_match_cgroup(struct mm_struct *mm,
struct mem_cgroup *memcg)
{
- return 1;
+ return true;
}
static inline int task_in_mem_cgroup(struct task_struct *task,
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 910550f3b70..95573ec4ee6 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -10,6 +10,7 @@ struct page;
struct zone;
struct pglist_data;
struct mem_section;
+struct memory_block;
#ifdef CONFIG_MEMORY_HOTPLUG
@@ -233,6 +234,8 @@ static inline int is_mem_section_removable(unsigned long pfn,
extern int mem_online_node(int nid);
extern int add_memory(int nid, u64 start, u64 size);
extern int arch_add_memory(int nid, u64 start, u64 size);
+extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
+extern int offline_memory_block(struct memory_block *mem);
extern int remove_memory(u64 start, u64 size);
extern int sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
int nr_pages);
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 95b738c7abf..cec56932560 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -188,7 +188,7 @@ struct sp_node {
struct shared_policy {
struct rb_root root;
- spinlock_t lock;
+ struct mutex mutex;
};
void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol);
@@ -239,7 +239,7 @@ extern int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol,
/* Check if a vma is migratable */
static inline int vma_migratable(struct vm_area_struct *vma)
{
- if (vma->vm_flags & (VM_IO|VM_HUGETLB|VM_PFNMAP|VM_RESERVED))
+ if (vma->vm_flags & (VM_IO | VM_HUGETLB | VM_PFNMAP))
return 0;
/*
* Migration allocates pages in the highest zone. If we cannot
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 311be906b57..fa068040273 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -10,7 +10,6 @@
#include <linux/list.h>
#include <linux/mmzone.h>
#include <linux/rbtree.h>
-#include <linux/prio_tree.h>
#include <linux/atomic.h>
#include <linux/debug_locks.h>
#include <linux/mm_types.h>
@@ -21,6 +20,7 @@
struct mempolicy;
struct anon_vma;
+struct anon_vma_chain;
struct file_ra_state;
struct user_struct;
struct writeback_control;
@@ -70,6 +70,8 @@ extern unsigned int kobjsize(const void *objp);
/*
* vm_flags in vm_area_struct, see mm_types.h.
*/
+#define VM_NONE 0x00000000
+
#define VM_READ 0x00000001 /* currently active flags */
#define VM_WRITE 0x00000002
#define VM_EXEC 0x00000004
@@ -82,16 +84,9 @@ extern unsigned int kobjsize(const void *objp);
#define VM_MAYSHARE 0x00000080
#define VM_GROWSDOWN 0x00000100 /* general info on the segment */
-#if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64)
-#define VM_GROWSUP 0x00000200
-#else
-#define VM_GROWSUP 0x00000000
-#define VM_NOHUGEPAGE 0x00000200 /* MADV_NOHUGEPAGE marked this vma */
-#endif
#define VM_PFNMAP 0x00000400 /* Page-ranges managed without "struct page", just pure PFN */
#define VM_DENYWRITE 0x00000800 /* ETXTBSY on write attempts.. */
-#define VM_EXECUTABLE 0x00001000
#define VM_LOCKED 0x00002000
#define VM_IO 0x00004000 /* Memory mapped I/O or similar */
@@ -101,25 +96,34 @@ extern unsigned int kobjsize(const void *objp);
#define VM_DONTCOPY 0x00020000 /* Do not copy this vma on fork */
#define VM_DONTEXPAND 0x00040000 /* Cannot expand with mremap() */
-#define VM_RESERVED 0x00080000 /* Count as reserved_vm like IO */
#define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */
#define VM_NORESERVE 0x00200000 /* should the VM suppress accounting */
#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */
#define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */
-#ifndef CONFIG_TRANSPARENT_HUGEPAGE
-#define VM_MAPPED_COPY 0x01000000 /* T if mapped copy of data (nommu mmap) */
-#else
-#define VM_HUGEPAGE 0x01000000 /* MADV_HUGEPAGE marked this vma */
-#endif
-#define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it */
-#define VM_NODUMP 0x04000000 /* Do not include in the core dump */
+#define VM_ARCH_1 0x01000000 /* Architecture-specific flag */
+#define VM_DONTDUMP 0x04000000 /* Do not include in the core dump */
-#define VM_CAN_NONLINEAR 0x08000000 /* Has ->fault & does nonlinear pages */
#define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */
-#define VM_SAO 0x20000000 /* Strong Access Ordering (powerpc) */
-#define VM_PFN_AT_MMAP 0x40000000 /* PFNMAP vma that is fully mapped at mmap time */
+#define VM_HUGEPAGE 0x20000000 /* MADV_HUGEPAGE marked this vma */
+#define VM_NOHUGEPAGE 0x40000000 /* MADV_NOHUGEPAGE marked this vma */
#define VM_MERGEABLE 0x80000000 /* KSM may merge identical pages */
+#if defined(CONFIG_X86)
+# define VM_PAT VM_ARCH_1 /* PAT reserves whole VMA at once (x86) */
+#elif defined(CONFIG_PPC)
+# define VM_SAO VM_ARCH_1 /* Strong Access Ordering (powerpc) */
+#elif defined(CONFIG_PARISC)
+# define VM_GROWSUP VM_ARCH_1
+#elif defined(CONFIG_IA64)
+# define VM_GROWSUP VM_ARCH_1
+#elif !defined(CONFIG_MMU)
+# define VM_MAPPED_COPY VM_ARCH_1 /* T if mapped copy of data (nommu mmap) */
+#endif
+
+#ifndef VM_GROWSUP
+# define VM_GROWSUP VM_NONE
+#endif
+
/* Bits set in the VMA until the stack is in its final location */
#define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ)
@@ -143,7 +147,7 @@ extern unsigned int kobjsize(const void *objp);
* Special vmas that are non-mergable, non-mlock()able.
* Note: mm/huge_memory.c VM_NO_THP depends on this definition.
*/
-#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP)
+#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP)
/*
* mapping from the currently active vm_flags protection bits (the
@@ -157,24 +161,7 @@ extern pgprot_t protection_map[16];
#define FAULT_FLAG_ALLOW_RETRY 0x08 /* Retry fault if blocking */
#define FAULT_FLAG_RETRY_NOWAIT 0x10 /* Don't drop mmap_sem and wait when retrying */
#define FAULT_FLAG_KILLABLE 0x20 /* The fault task is in SIGKILL killable region */
-
-/*
- * This interface is used by x86 PAT code to identify a pfn mapping that is
- * linear over entire vma. This is to optimize PAT code that deals with
- * marking the physical region with a particular prot. This is not for generic
- * mm use. Note also that this check will not work if the pfn mapping is
- * linear for a vma starting at physical address 0. In which case PAT code
- * falls back to slow path of reserving physical range page by page.
- */
-static inline int is_linear_pfn_mapping(struct vm_area_struct *vma)
-{
- return !!(vma->vm_flags & VM_PFN_AT_MMAP);
-}
-
-static inline int is_pfn_mapping(struct vm_area_struct *vma)
-{
- return !!(vma->vm_flags & VM_PFNMAP);
-}
+#define FAULT_FLAG_TRIED 0x40 /* second try */
/*
* vm_fault is filled by the the pagefault handler and passed to the vma's
@@ -182,8 +169,7 @@ static inline int is_pfn_mapping(struct vm_area_struct *vma)
* of VM_FAULT_xxx flags that give details about how the fault was handled.
*
* pgoff should be used in favour of virtual_address, if possible. If pgoff
- * is used, one may set VM_CAN_NONLINEAR in the vma->vm_flags to get nonlinear
- * mapping support.
+ * is used, one may implement ->remap_pages to get nonlinear mapping support.
*/
struct vm_fault {
unsigned int flags; /* FAULT_FLAG_xxx flags */
@@ -241,6 +227,9 @@ struct vm_operations_struct {
int (*migrate)(struct vm_area_struct *vma, const nodemask_t *from,
const nodemask_t *to, unsigned long flags);
#endif
+ /* called by sys_remap_file_pages() to populate non-linear mapping */
+ int (*remap_pages)(struct vm_area_struct *vma, unsigned long addr,
+ unsigned long size, pgoff_t pgoff);
};
struct mmu_gather;
@@ -249,6 +238,18 @@ struct inode;
#define page_private(page) ((page)->private)
#define set_page_private(page, v) ((page)->private = (v))
+/* It's valid only if the page is free path or free_list */
+static inline void set_freepage_migratetype(struct page *page, int migratetype)
+{
+ page->index = migratetype;
+}
+
+/* It's valid only if the page is free path or free_list */
+static inline int get_freepage_migratetype(struct page *page)
+{
+ return page->index;
+}
+
/*
* FIXME: take this include out, include page-flags.h in
* files which need it (119 of them)
@@ -454,6 +455,7 @@ void put_pages_list(struct list_head *pages);
void split_page(struct page *page, unsigned int order);
int split_free_page(struct page *page);
+int capture_free_page(struct page *page, int alloc_order, int migratetype);
/*
* Compound pages have a destructor function. Provide a
@@ -1071,7 +1073,8 @@ vm_is_stack(struct task_struct *task, struct vm_area_struct *vma, int in_group);
extern unsigned long move_page_tables(struct vm_area_struct *vma,
unsigned long old_addr, struct vm_area_struct *new_vma,
- unsigned long new_addr, unsigned long len);
+ unsigned long new_addr, unsigned long len,
+ bool need_rmap_locks);
extern unsigned long do_mremap(unsigned long addr,
unsigned long old_len, unsigned long new_len,
unsigned long flags, unsigned long new_addr);
@@ -1366,24 +1369,45 @@ extern void zone_pcp_reset(struct zone *zone);
extern atomic_long_t mmap_pages_allocated;
extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t);
-/* prio_tree.c */
-void vma_prio_tree_add(struct vm_area_struct *, struct vm_area_struct *old);
-void vma_prio_tree_insert(struct vm_area_struct *, struct prio_tree_root *);
-void vma_prio_tree_remove(struct vm_area_struct *, struct prio_tree_root *);
-struct vm_area_struct *vma_prio_tree_next(struct vm_area_struct *vma,
- struct prio_tree_iter *iter);
-
-#define vma_prio_tree_foreach(vma, iter, root, begin, end) \
- for (prio_tree_iter_init(iter, root, begin, end), vma = NULL; \
- (vma = vma_prio_tree_next(vma, iter)); )
+/* interval_tree.c */
+void vma_interval_tree_insert(struct vm_area_struct *node,
+ struct rb_root *root);
+void vma_interval_tree_insert_after(struct vm_area_struct *node,
+ struct vm_area_struct *prev,
+ struct rb_root *root);
+void vma_interval_tree_remove(struct vm_area_struct *node,
+ struct rb_root *root);
+struct vm_area_struct *vma_interval_tree_iter_first(struct rb_root *root,
+ unsigned long start, unsigned long last);
+struct vm_area_struct *vma_interval_tree_iter_next(struct vm_area_struct *node,
+ unsigned long start, unsigned long last);
+
+#define vma_interval_tree_foreach(vma, root, start, last) \
+ for (vma = vma_interval_tree_iter_first(root, start, last); \
+ vma; vma = vma_interval_tree_iter_next(vma, start, last))
static inline void vma_nonlinear_insert(struct vm_area_struct *vma,
struct list_head *list)
{
- vma->shared.vm_set.parent = NULL;
- list_add_tail(&vma->shared.vm_set.list, list);
+ list_add_tail(&vma->shared.nonlinear, list);
}
+void anon_vma_interval_tree_insert(struct anon_vma_chain *node,
+ struct rb_root *root);
+void anon_vma_interval_tree_remove(struct anon_vma_chain *node,
+ struct rb_root *root);
+struct anon_vma_chain *anon_vma_interval_tree_iter_first(
+ struct rb_root *root, unsigned long start, unsigned long last);
+struct anon_vma_chain *anon_vma_interval_tree_iter_next(
+ struct anon_vma_chain *node, unsigned long start, unsigned long last);
+#ifdef CONFIG_DEBUG_VM_RB
+void anon_vma_interval_tree_verify(struct anon_vma_chain *node);
+#endif
+
+#define anon_vma_interval_tree_foreach(avc, root, start, last) \
+ for (avc = anon_vma_interval_tree_iter_first(root, start, last); \
+ avc; avc = anon_vma_interval_tree_iter_next(avc, start, last))
+
/* mmap.c */
extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin);
extern int vma_adjust(struct vm_area_struct *vma, unsigned long start,
@@ -1400,15 +1424,13 @@ extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *,
struct rb_node **, struct rb_node *);
extern void unlink_file_vma(struct vm_area_struct *);
extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
- unsigned long addr, unsigned long len, pgoff_t pgoff);
+ unsigned long addr, unsigned long len, pgoff_t pgoff,
+ bool *need_rmap_locks);
extern void exit_mmap(struct mm_struct *);
extern int mm_take_all_locks(struct mm_struct *mm);
extern void mm_drop_all_locks(struct mm_struct *mm);
-/* From fs/proc/base.c. callers must _not_ hold the mm's exe_file_lock */
-extern void added_exe_file_vma(struct mm_struct *mm);
-extern void removed_exe_file_vma(struct mm_struct *mm);
extern void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file);
extern struct file *get_mm_exe_file(struct mm_struct *mm);
@@ -1662,5 +1684,9 @@ static inline unsigned int debug_guardpage_minorder(void) { return 0; }
static inline bool page_is_guard(struct page *page) { return false; }
#endif /* CONFIG_DEBUG_PAGEALLOC */
+extern void reset_zone_present_pages(void);
+extern void fixup_zone_present_pages(int nid, unsigned long start_pfn,
+ unsigned long end_pfn);
+
#endif /* __KERNEL__ */
#endif /* _LINUX_MM_H */
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index bf7867200b9..31f8a3af7d9 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -6,7 +6,6 @@
#include <linux/threads.h>
#include <linux/list.h>
#include <linux/spinlock.h>
-#include <linux/prio_tree.h>
#include <linux/rbtree.h>
#include <linux/rwsem.h>
#include <linux/completion.h>
@@ -240,18 +239,15 @@ struct vm_area_struct {
/*
* For areas with an address space and backing store,
- * linkage into the address_space->i_mmap prio tree, or
- * linkage to the list of like vmas hanging off its node, or
+ * linkage into the address_space->i_mmap interval tree, or
* linkage of vma in the address_space->i_mmap_nonlinear list.
*/
union {
struct {
- struct list_head list;
- void *parent; /* aligns with prio_tree_node parent */
- struct vm_area_struct *head;
- } vm_set;
-
- struct raw_prio_tree_node prio_tree_node;
+ struct rb_node rb;
+ unsigned long rb_subtree_last;
+ } linear;
+ struct list_head nonlinear;
} shared;
/*
@@ -349,7 +345,6 @@ struct mm_struct {
unsigned long shared_vm; /* Shared pages (files) */
unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE */
unsigned long stack_vm; /* VM_GROWSUP/DOWN */
- unsigned long reserved_vm; /* VM_RESERVED|VM_IO pages */
unsigned long def_flags;
unsigned long nr_ptes; /* Page table pages */
unsigned long start_code, end_code, start_data, end_data;
@@ -394,7 +389,6 @@ struct mm_struct {
/* store ref to file /proc/<pid>/exe symlink points to */
struct file *exe_file;
- unsigned long num_exe_file_vmas;
#ifdef CONFIG_MMU_NOTIFIER
struct mmu_notifier_mm *mmu_notifier_mm;
#endif
diff --git a/include/linux/mman.h b/include/linux/mman.h
index 8b74e9b1d0a..77cec2f45cb 100644
--- a/include/linux/mman.h
+++ b/include/linux/mman.h
@@ -86,7 +86,6 @@ calc_vm_flag_bits(unsigned long flags)
{
return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) |
_calc_vm_trans(flags, MAP_DENYWRITE, VM_DENYWRITE ) |
- _calc_vm_trans(flags, MAP_EXECUTABLE, VM_EXECUTABLE) |
_calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED );
}
#endif /* __KERNEL__ */
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 1d1b1e13f79..bc823c4c028 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -4,6 +4,7 @@
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/mm_types.h>
+#include <linux/srcu.h>
struct mmu_notifier;
struct mmu_notifier_ops;
@@ -245,50 +246,6 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
__mmu_notifier_mm_destroy(mm);
}
-/*
- * These two macros will sometime replace ptep_clear_flush.
- * ptep_clear_flush is implemented as macro itself, so this also is
- * implemented as a macro until ptep_clear_flush will converted to an
- * inline function, to diminish the risk of compilation failure. The
- * invalidate_page method over time can be moved outside the PT lock
- * and these two macros can be later removed.
- */
-#define ptep_clear_flush_notify(__vma, __address, __ptep) \
-({ \
- pte_t __pte; \
- struct vm_area_struct *___vma = __vma; \
- unsigned long ___address = __address; \
- __pte = ptep_clear_flush(___vma, ___address, __ptep); \
- mmu_notifier_invalidate_page(___vma->vm_mm, ___address); \
- __pte; \
-})
-
-#define pmdp_clear_flush_notify(__vma, __address, __pmdp) \
-({ \
- pmd_t __pmd; \
- struct vm_area_struct *___vma = __vma; \
- unsigned long ___address = __address; \
- VM_BUG_ON(__address & ~HPAGE_PMD_MASK); \
- mmu_notifier_invalidate_range_start(___vma->vm_mm, ___address, \
- (__address)+HPAGE_PMD_SIZE);\
- __pmd = pmdp_clear_flush(___vma, ___address, __pmdp); \
- mmu_notifier_invalidate_range_end(___vma->vm_mm, ___address, \
- (__address)+HPAGE_PMD_SIZE); \
- __pmd; \
-})
-
-#define pmdp_splitting_flush_notify(__vma, __address, __pmdp) \
-({ \
- struct vm_area_struct *___vma = __vma; \
- unsigned long ___address = __address; \
- VM_BUG_ON(__address & ~HPAGE_PMD_MASK); \
- mmu_notifier_invalidate_range_start(___vma->vm_mm, ___address, \
- (__address)+HPAGE_PMD_SIZE);\
- pmdp_splitting_flush(___vma, ___address, __pmdp); \
- mmu_notifier_invalidate_range_end(___vma->vm_mm, ___address, \
- (__address)+HPAGE_PMD_SIZE); \
-})
-
#define ptep_clear_flush_young_notify(__vma, __address, __ptep) \
({ \
int __young; \
@@ -311,14 +268,24 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
__young; \
})
+/*
+ * set_pte_at_notify() sets the pte _after_ running the notifier.
+ * This is safe to start by updating the secondary MMUs, because the primary MMU
+ * pte invalidate must have already happened with a ptep_clear_flush() before
+ * set_pte_at_notify() has been invoked. Updating the secondary MMUs first is
+ * required when we change both the protection of the mapping from read-only to
+ * read-write and the pfn (like during copy on write page faults). Otherwise the
+ * old page would remain mapped readonly in the secondary MMUs after the new
+ * page is already writable by some CPU through the primary MMU.
+ */
#define set_pte_at_notify(__mm, __address, __ptep, __pte) \
({ \
struct mm_struct *___mm = __mm; \
unsigned long ___address = __address; \
pte_t ___pte = __pte; \
\
- set_pte_at(___mm, ___address, __ptep, ___pte); \
mmu_notifier_change_pte(___mm, ___address, ___pte); \
+ set_pte_at(___mm, ___address, __ptep, ___pte); \
})
#else /* CONFIG_MMU_NOTIFIER */
@@ -369,9 +336,6 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
#define ptep_clear_flush_young_notify ptep_clear_flush_young
#define pmdp_clear_flush_young_notify pmdp_clear_flush_young
-#define ptep_clear_flush_notify ptep_clear_flush
-#define pmdp_clear_flush_notify pmdp_clear_flush
-#define pmdp_splitting_flush_notify pmdp_splitting_flush
#define set_pte_at_notify set_pte_at
#endif /* CONFIG_MMU_NOTIFIER */
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 2daa54f55db..50aaca81f63 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -142,6 +142,7 @@ enum zone_stat_item {
NUMA_OTHER, /* allocation from other node */
#endif
NR_ANON_TRANSPARENT_HUGEPAGES,
+ NR_FREE_CMA_PAGES,
NR_VM_ZONE_STAT_ITEMS };
/*
@@ -217,6 +218,8 @@ struct lruvec {
#define ISOLATE_UNMAPPED ((__force isolate_mode_t)0x2)
/* Isolate for asynchronous migration */
#define ISOLATE_ASYNC_MIGRATE ((__force isolate_mode_t)0x4)
+/* Isolate unevictable pages */
+#define ISOLATE_UNEVICTABLE ((__force isolate_mode_t)0x8)
/* LRU Isolation modes. */
typedef unsigned __bitwise__ isolate_mode_t;
@@ -369,8 +372,12 @@ struct zone {
spinlock_t lock;
int all_unreclaimable; /* All pages pinned */
#if defined CONFIG_COMPACTION || defined CONFIG_CMA
- /* pfn where the last incremental compaction isolated free pages */
+ /* Set to true when the PG_migrate_skip bits should be cleared */
+ bool compact_blockskip_flush;
+
+ /* pfns where compaction scanners should start */
unsigned long compact_cached_free_pfn;
+ unsigned long compact_cached_migrate_pfn;
#endif
#ifdef CONFIG_MEMORY_HOTPLUG
/* see spanned/present_pages for more description */
@@ -704,6 +711,7 @@ typedef struct pglist_data {
unsigned long node_spanned_pages; /* total size of physical page
range, including holes */
int node_id;
+ nodemask_t reclaim_nodes; /* Nodes allowed to reclaim from */
wait_queue_head_t kswapd_wait;
wait_queue_head_t pfmemalloc_wait;
struct task_struct *kswapd; /* Protected by lock_memory_hotplug() */
diff --git a/include/linux/oom.h b/include/linux/oom.h
index 49a3031fda5..d36a8221f58 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -2,17 +2,6 @@
#define __INCLUDE_LINUX_OOM_H
/*
- * /proc/<pid>/oom_adj is deprecated, see
- * Documentation/feature-removal-schedule.txt.
- *
- * /proc/<pid>/oom_adj set to -17 protects from the oom-killer
- */
-#define OOM_DISABLE (-17)
-/* inclusive */
-#define OOM_ADJUST_MIN (-16)
-#define OOM_ADJUST_MAX 15
-
-/*
* /proc/<pid>/oom_score_adj set to OOM_SCORE_ADJ_MIN disables oom killing for
* pid.
*/
diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
index 105077aa768..76a9539cfd3 100644
--- a/include/linux/page-isolation.h
+++ b/include/linux/page-isolation.h
@@ -6,6 +6,10 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count);
void set_pageblock_migratetype(struct page *page, int migratetype);
int move_freepages_block(struct zone *zone, struct page *page,
int migratetype);
+int move_freepages(struct zone *zone,
+ struct page *start_page, struct page *end_page,
+ int migratetype);
+
/*
* Changes migrate type in [start_pfn, end_pfn) to be MIGRATE_ISOLATE.
* If specified range includes migrate types other than MOVABLE or CMA,
@@ -37,6 +41,7 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn);
*/
int set_migratetype_isolate(struct page *page);
void unset_migratetype_isolate(struct page *page, unsigned migratetype);
-
+struct page *alloc_migrate_target(struct page *page, unsigned long private,
+ int **resultp);
#endif
diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
index 19ef95d293a..eed27f4f4c3 100644
--- a/include/linux/pageblock-flags.h
+++ b/include/linux/pageblock-flags.h
@@ -30,6 +30,9 @@ enum pageblock_bits {
PB_migrate,
PB_migrate_end = PB_migrate + 3 - 1,
/* 3 bits required for migrate types */
+#ifdef CONFIG_COMPACTION
+ PB_migrate_skip,/* If set the block is skipped by compaction */
+#endif /* CONFIG_COMPACTION */
NR_PAGEBLOCK_BITS
};
@@ -65,10 +68,22 @@ unsigned long get_pageblock_flags_group(struct page *page,
void set_pageblock_flags_group(struct page *page, unsigned long flags,
int start_bitidx, int end_bitidx);
+#ifdef CONFIG_COMPACTION
+#define get_pageblock_skip(page) \
+ get_pageblock_flags_group(page, PB_migrate_skip, \
+ PB_migrate_skip + 1)
+#define clear_pageblock_skip(page) \
+ set_pageblock_flags_group(page, 0, PB_migrate_skip, \
+ PB_migrate_skip + 1)
+#define set_pageblock_skip(page) \
+ set_pageblock_flags_group(page, 1, PB_migrate_skip, \
+ PB_migrate_skip + 1)
+#endif /* CONFIG_COMPACTION */
+
#define get_pageblock_flags(page) \
- get_pageblock_flags_group(page, 0, NR_PAGEBLOCK_BITS-1)
+ get_pageblock_flags_group(page, 0, PB_migrate_end)
#define set_pageblock_flags(page, flags) \
set_pageblock_flags_group(page, flags, \
- 0, NR_PAGEBLOCK_BITS-1)
+ 0, PB_migrate_end)
#endif /* PAGEBLOCK_FLAGS_H */
diff --git a/include/linux/prio_tree.h b/include/linux/prio_tree.h
deleted file mode 100644
index db04abb557e..00000000000
--- a/include/linux/prio_tree.h
+++ /dev/null
@@ -1,120 +0,0 @@
-#ifndef _LINUX_PRIO_TREE_H
-#define _LINUX_PRIO_TREE_H
-
-/*
- * K&R 2nd ed. A8.3 somewhat obliquely hints that initial sequences of struct
- * fields with identical types should end up at the same location. We'll use
- * this until we can scrap struct raw_prio_tree_node.
- *
- * Note: all this could be done more elegantly by using unnamed union/struct
- * fields. However, gcc 2.95.3 and apparently also gcc 3.0.4 don't support this
- * language extension.
- */
-
-struct raw_prio_tree_node {
- struct prio_tree_node *left;
- struct prio_tree_node *right;
- struct prio_tree_node *parent;
-};
-
-struct prio_tree_node {
- struct prio_tree_node *left;
- struct prio_tree_node *right;
- struct prio_tree_node *parent;
- unsigned long start;
- unsigned long last; /* last location _in_ interval */
-};
-
-struct prio_tree_root {
- struct prio_tree_node *prio_tree_node;
- unsigned short index_bits;
- unsigned short raw;
- /*
- * 0: nodes are of type struct prio_tree_node
- * 1: nodes are of type raw_prio_tree_node
- */
-};
-
-struct prio_tree_iter {
- struct prio_tree_node *cur;
- unsigned long mask;
- unsigned long value;
- int size_level;
-
- struct prio_tree_root *root;
- pgoff_t r_index;
- pgoff_t h_index;
-};
-
-static inline void prio_tree_iter_init(struct prio_tree_iter *iter,
- struct prio_tree_root *root, pgoff_t r_index, pgoff_t h_index)
-{
- iter->root = root;
- iter->r_index = r_index;
- iter->h_index = h_index;
- iter->cur = NULL;
-}
-
-#define __INIT_PRIO_TREE_ROOT(ptr, _raw) \
-do { \
- (ptr)->prio_tree_node = NULL; \
- (ptr)->index_bits = 1; \
- (ptr)->raw = (_raw); \
-} while (0)
-
-#define INIT_PRIO_TREE_ROOT(ptr) __INIT_PRIO_TREE_ROOT(ptr, 0)
-#define INIT_RAW_PRIO_TREE_ROOT(ptr) __INIT_PRIO_TREE_ROOT(ptr, 1)
-
-#define INIT_PRIO_TREE_NODE(ptr) \
-do { \
- (ptr)->left = (ptr)->right = (ptr)->parent = (ptr); \
-} while (0)
-
-#define INIT_PRIO_TREE_ITER(ptr) \
-do { \
- (ptr)->cur = NULL; \
- (ptr)->mask = 0UL; \
- (ptr)->value = 0UL; \
- (ptr)->size_level = 0; \
-} while (0)
-
-#define prio_tree_entry(ptr, type, member) \
- ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
-
-static inline int prio_tree_empty(const struct prio_tree_root *root)
-{
- return root->prio_tree_node == NULL;
-}
-
-static inline int prio_tree_root(const struct prio_tree_node *node)
-{
- return node->parent == node;
-}
-
-static inline int prio_tree_left_empty(const struct prio_tree_node *node)
-{
- return node->left == node;
-}
-
-static inline int prio_tree_right_empty(const struct prio_tree_node *node)
-{
- return node->right == node;
-}
-
-
-struct prio_tree_node *prio_tree_replace(struct prio_tree_root *root,
- struct prio_tree_node *old, struct prio_tree_node *node);
-struct prio_tree_node *prio_tree_insert(struct prio_tree_root *root,
- struct prio_tree_node *node);
-void prio_tree_remove(struct prio_tree_root *root, struct prio_tree_node *node);
-struct prio_tree_node *prio_tree_next(struct prio_tree_iter *iter);
-
-#define raw_prio_tree_replace(root, old, node) \
- prio_tree_replace(root, (struct prio_tree_node *) (old), \
- (struct prio_tree_node *) (node))
-#define raw_prio_tree_insert(root, node) \
- prio_tree_insert(root, (struct prio_tree_node *) (node))
-#define raw_prio_tree_remove(root, node) \
- prio_tree_remove(root, (struct prio_tree_node *) (node))
-
-#endif /* _LINUX_PRIO_TREE_H */
diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h
index 033b507b33b..0022c1bb1e2 100644
--- a/include/linux/rbtree.h
+++ b/include/linux/rbtree.h
@@ -23,72 +23,7 @@
I know it's not the cleaner way, but in C (not in C++) to get
performances and genericity...
- Some example of insert and search follows here. The search is a plain
- normal search over an ordered tree. The insert instead must be implemented
- in two steps: First, the code must insert the element in order as a red leaf
- in the tree, and then the support library function rb_insert_color() must
- be called. Such function will do the not trivial work to rebalance the
- rbtree, if necessary.
-
------------------------------------------------------------------------
-static inline struct page * rb_search_page_cache(struct inode * inode,
- unsigned long offset)
-{
- struct rb_node * n = inode->i_rb_page_cache.rb_node;
- struct page * page;
-
- while (n)
- {
- page = rb_entry(n, struct page, rb_page_cache);
-
- if (offset < page->offset)
- n = n->rb_left;
- else if (offset > page->offset)
- n = n->rb_right;
- else
- return page;
- }
- return NULL;
-}
-
-static inline struct page * __rb_insert_page_cache(struct inode * inode,
- unsigned long offset,
- struct rb_node * node)
-{
- struct rb_node ** p = &inode->i_rb_page_cache.rb_node;
- struct rb_node * parent = NULL;
- struct page * page;
-
- while (*p)
- {
- parent = *p;
- page = rb_entry(parent, struct page, rb_page_cache);
-
- if (offset < page->offset)
- p = &(*p)->rb_left;
- else if (offset > page->offset)
- p = &(*p)->rb_right;
- else
- return page;
- }
-
- rb_link_node(node, parent, p);
-
- return NULL;
-}
-
-static inline struct page * rb_insert_page_cache(struct inode * inode,
- unsigned long offset,
- struct rb_node * node)
-{
- struct page * ret;
- if ((ret = __rb_insert_page_cache(inode, offset, node)))
- goto out;
- rb_insert_color(node, &inode->i_rb_page_cache);
- out:
- return ret;
-}
------------------------------------------------------------------------
+ See Documentation/rbtree.txt for documentation and samples.
*/
#ifndef _LINUX_RBTREE_H
@@ -97,63 +32,35 @@ static inline struct page * rb_insert_page_cache(struct inode * inode,
#include <linux/kernel.h>
#include <linux/stddef.h>
-struct rb_node
-{
- unsigned long rb_parent_color;
-#define RB_RED 0
-#define RB_BLACK 1
+struct rb_node {
+ unsigned long __rb_parent_color;
struct rb_node *rb_right;
struct rb_node *rb_left;
} __attribute__((aligned(sizeof(long))));
/* The alignment might seem pointless, but allegedly CRIS needs it */
-struct rb_root
-{
+struct rb_root {
struct rb_node *rb_node;
};
-#define rb_parent(r) ((struct rb_node *)((r)->rb_parent_color & ~3))
-#define rb_color(r) ((r)->rb_parent_color & 1)
-#define rb_is_red(r) (!rb_color(r))
-#define rb_is_black(r) rb_color(r)
-#define rb_set_red(r) do { (r)->rb_parent_color &= ~1; } while (0)
-#define rb_set_black(r) do { (r)->rb_parent_color |= 1; } while (0)
-
-static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p)
-{
- rb->rb_parent_color = (rb->rb_parent_color & 3) | (unsigned long)p;
-}
-static inline void rb_set_color(struct rb_node *rb, int color)
-{
- rb->rb_parent_color = (rb->rb_parent_color & ~1) | color;
-}
+#define rb_parent(r) ((struct rb_node *)((r)->__rb_parent_color & ~3))
#define RB_ROOT (struct rb_root) { NULL, }
#define rb_entry(ptr, type, member) container_of(ptr, type, member)
-#define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL)
-#define RB_EMPTY_NODE(node) (rb_parent(node) == node)
-#define RB_CLEAR_NODE(node) (rb_set_parent(node, node))
+#define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL)
+
+/* 'empty' nodes are nodes that are known not to be inserted in an rbree */
+#define RB_EMPTY_NODE(node) \
+ ((node)->__rb_parent_color == (unsigned long)(node))
+#define RB_CLEAR_NODE(node) \
+ ((node)->__rb_parent_color = (unsigned long)(node))
-static inline void rb_init_node(struct rb_node *rb)
-{
- rb->rb_parent_color = 0;
- rb->rb_right = NULL;
- rb->rb_left = NULL;
- RB_CLEAR_NODE(rb);
-}
extern void rb_insert_color(struct rb_node *, struct rb_root *);
extern void rb_erase(struct rb_node *, struct rb_root *);
-typedef void (*rb_augment_f)(struct rb_node *node, void *data);
-
-extern void rb_augment_insert(struct rb_node *node,
- rb_augment_f func, void *data);
-extern struct rb_node *rb_augment_erase_begin(struct rb_node *node);
-extern void rb_augment_erase_end(struct rb_node *node,
- rb_augment_f func, void *data);
/* Find logical next and previous nodes in a tree */
extern struct rb_node *rb_next(const struct rb_node *);
@@ -168,7 +75,7 @@ extern void rb_replace_node(struct rb_node *victim, struct rb_node *new,
static inline void rb_link_node(struct rb_node * node, struct rb_node * parent,
struct rb_node ** rb_link)
{
- node->rb_parent_color = (unsigned long )parent;
+ node->__rb_parent_color = (unsigned long)parent;
node->rb_left = node->rb_right = NULL;
*rb_link = node;
diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h
new file mode 100644
index 00000000000..214caa33433
--- /dev/null
+++ b/include/linux/rbtree_augmented.h
@@ -0,0 +1,223 @@
+/*
+ Red Black Trees
+ (C) 1999 Andrea Arcangeli <andrea@suse.de>
+ (C) 2002 David Woodhouse <dwmw2@infradead.org>
+ (C) 2012 Michel Lespinasse <walken@google.com>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+ linux/include/linux/rbtree_augmented.h
+*/
+
+#ifndef _LINUX_RBTREE_AUGMENTED_H
+#define _LINUX_RBTREE_AUGMENTED_H
+
+#include <linux/rbtree.h>
+
+/*
+ * Please note - only struct rb_augment_callbacks and the prototypes for
+ * rb_insert_augmented() and rb_erase_augmented() are intended to be public.
+ * The rest are implementation details you are not expected to depend on.
+ *
+ * See Documentation/rbtree.txt for documentation and samples.
+ */
+
+struct rb_augment_callbacks {
+ void (*propagate)(struct rb_node *node, struct rb_node *stop);
+ void (*copy)(struct rb_node *old, struct rb_node *new);
+ void (*rotate)(struct rb_node *old, struct rb_node *new);
+};
+
+extern void __rb_insert_augmented(struct rb_node *node, struct rb_root *root,
+ void (*augment_rotate)(struct rb_node *old, struct rb_node *new));
+static inline void
+rb_insert_augmented(struct rb_node *node, struct rb_root *root,
+ const struct rb_augment_callbacks *augment)
+{
+ __rb_insert_augmented(node, root, augment->rotate);
+}
+
+#define RB_DECLARE_CALLBACKS(rbstatic, rbname, rbstruct, rbfield, \
+ rbtype, rbaugmented, rbcompute) \
+static inline void \
+rbname ## _propagate(struct rb_node *rb, struct rb_node *stop) \
+{ \
+ while (rb != stop) { \
+ rbstruct *node = rb_entry(rb, rbstruct, rbfield); \
+ rbtype augmented = rbcompute(node); \
+ if (node->rbaugmented == augmented) \
+ break; \
+ node->rbaugmented = augmented; \
+ rb = rb_parent(&node->rbfield); \
+ } \
+} \
+static inline void \
+rbname ## _copy(struct rb_node *rb_old, struct rb_node *rb_new) \
+{ \
+ rbstruct *old = rb_entry(rb_old, rbstruct, rbfield); \
+ rbstruct *new = rb_entry(rb_new, rbstruct, rbfield); \
+ new->rbaugmented = old->rbaugmented; \
+} \
+static void \
+rbname ## _rotate(struct rb_node *rb_old, struct rb_node *rb_new) \
+{ \
+ rbstruct *old = rb_entry(rb_old, rbstruct, rbfield); \
+ rbstruct *new = rb_entry(rb_new, rbstruct, rbfield); \
+ new->rbaugmented = old->rbaugmented; \
+ old->rbaugmented = rbcompute(old); \
+} \
+rbstatic const struct rb_augment_callbacks rbname = { \
+ rbname ## _propagate, rbname ## _copy, rbname ## _rotate \
+};
+
+
+#define RB_RED 0
+#define RB_BLACK 1
+
+#define __rb_parent(pc) ((struct rb_node *)(pc & ~3))
+
+#define __rb_color(pc) ((pc) & 1)
+#define __rb_is_black(pc) __rb_color(pc)
+#define __rb_is_red(pc) (!__rb_color(pc))
+#define rb_color(rb) __rb_color((rb)->__rb_parent_color)
+#define rb_is_red(rb) __rb_is_red((rb)->__rb_parent_color)
+#define rb_is_black(rb) __rb_is_black((rb)->__rb_parent_color)
+
+static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p)
+{
+ rb->__rb_parent_color = rb_color(rb) | (unsigned long)p;
+}
+
+static inline void rb_set_parent_color(struct rb_node *rb,
+ struct rb_node *p, int color)
+{
+ rb->__rb_parent_color = (unsigned long)p | color;
+}
+
+static inline void
+__rb_change_child(struct rb_node *old, struct rb_node *new,
+ struct rb_node *parent, struct rb_root *root)
+{
+ if (parent) {
+ if (parent->rb_left == old)
+ parent->rb_left = new;
+ else
+ parent->rb_right = new;
+ } else
+ root->rb_node = new;
+}
+
+extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root,
+ void (*augment_rotate)(struct rb_node *old, struct rb_node *new));
+
+static __always_inline void
+rb_erase_augmented(struct rb_node *node, struct rb_root *root,
+ const struct rb_augment_callbacks *augment)
+{
+ struct rb_node *child = node->rb_right, *tmp = node->rb_left;
+ struct rb_node *parent, *rebalance;
+ unsigned long pc;
+
+ if (!tmp) {
+ /*
+ * Case 1: node to erase has no more than 1 child (easy!)
+ *
+ * Note that if there is one child it must be red due to 5)
+ * and node must be black due to 4). We adjust colors locally
+ * so as to bypass __rb_erase_color() later on.
+ */
+ pc = node->__rb_parent_color;
+ parent = __rb_parent(pc);
+ __rb_change_child(node, child, parent, root);
+ if (child) {
+ child->__rb_parent_color = pc;
+ rebalance = NULL;
+ } else
+ rebalance = __rb_is_black(pc) ? parent : NULL;
+ tmp = parent;
+ } else if (!child) {
+ /* Still case 1, but this time the child is node->rb_left */
+ tmp->__rb_parent_color = pc = node->__rb_parent_color;
+ parent = __rb_parent(pc);
+ __rb_change_child(node, tmp, parent, root);
+ rebalance = NULL;
+ tmp = parent;
+ } else {
+ struct rb_node *successor = child, *child2;
+ tmp = child->rb_left;
+ if (!tmp) {
+ /*
+ * Case 2: node's successor is its right child
+ *
+ * (n) (s)
+ * / \ / \
+ * (x) (s) -> (x) (c)
+ * \
+ * (c)
+ */
+ parent = successor;
+ child2 = successor->rb_right;
+ augment->copy(node, successor);
+ } else {
+ /*
+ * Case 3: node's successor is leftmost under
+ * node's right child subtree
+ *
+ * (n) (s)
+ * / \ / \
+ * (x) (y) -> (x) (y)
+ * / /
+ * (p) (p)
+ * / /
+ * (s) (c)
+ * \
+ * (c)
+ */
+ do {
+ parent = successor;
+ successor = tmp;
+ tmp = tmp->rb_left;
+ } while (tmp);
+ parent->rb_left = child2 = successor->rb_right;
+ successor->rb_right = child;
+ rb_set_parent(child, successor);
+ augment->copy(node, successor);
+ augment->propagate(parent, successor);
+ }
+
+ successor->rb_left = tmp = node->rb_left;
+ rb_set_parent(tmp, successor);
+
+ pc = node->__rb_parent_color;
+ tmp = __rb_parent(pc);
+ __rb_change_child(node, successor, tmp, root);
+ if (child2) {
+ successor->__rb_parent_color = pc;
+ rb_set_parent_color(child2, parent, RB_BLACK);
+ rebalance = NULL;
+ } else {
+ unsigned long pc2 = successor->__rb_parent_color;
+ successor->__rb_parent_color = pc;
+ rebalance = __rb_is_black(pc2) ? parent : NULL;
+ }
+ tmp = successor;
+ }
+
+ augment->propagate(tmp, NULL);
+ if (rebalance)
+ __rb_erase_color(rebalance, root, augment->rotate);
+}
+
+#endif /* _LINUX_RBTREE_AUGMENTED_H */
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 3fce545df39..bfe1f478064 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -37,14 +37,14 @@ struct anon_vma {
atomic_t refcount;
/*
- * NOTE: the LSB of the head.next is set by
+ * NOTE: the LSB of the rb_root.rb_node is set by
* mm_take_all_locks() _after_ taking the above lock. So the
- * head must only be read/written after taking the above lock
+ * rb_root must only be read/written after taking the above lock
* to be sure to see a valid next pointer. The LSB bit itself
* is serialized by a system wide lock only visible to
* mm_take_all_locks() (mm_all_locks_mutex).
*/
- struct list_head head; /* Chain of private "related" vmas */
+ struct rb_root rb_root; /* Interval tree of private "related" vmas */
};
/*
@@ -57,14 +57,29 @@ struct anon_vma {
* with a VMA, or the VMAs associated with an anon_vma.
* The "same_vma" list contains the anon_vma_chains linking
* all the anon_vmas associated with this VMA.
- * The "same_anon_vma" list contains the anon_vma_chains
+ * The "rb" field indexes on an interval tree the anon_vma_chains
* which link all the VMAs associated with this anon_vma.
*/
struct anon_vma_chain {
struct vm_area_struct *vma;
struct anon_vma *anon_vma;
struct list_head same_vma; /* locked by mmap_sem & page_table_lock */
- struct list_head same_anon_vma; /* locked by anon_vma->mutex */
+ struct rb_node rb; /* locked by anon_vma->mutex */
+ unsigned long rb_subtree_last;
+#ifdef CONFIG_DEBUG_VM_RB
+ unsigned long cached_vma_start, cached_vma_last;
+#endif
+};
+
+enum ttu_flags {
+ TTU_UNMAP = 0, /* unmap mode */
+ TTU_MIGRATION = 1, /* migration mode */
+ TTU_MUNLOCK = 2, /* munlock mode */
+ TTU_ACTION_MASK = 0xff,
+
+ TTU_IGNORE_MLOCK = (1 << 8), /* ignore mlock */
+ TTU_IGNORE_ACCESS = (1 << 9), /* don't age */
+ TTU_IGNORE_HWPOISON = (1 << 10),/* corrupted page is recoverable */
};
#ifdef CONFIG_MMU
@@ -120,7 +135,6 @@ void anon_vma_init(void); /* create anon_vma_cachep */
int anon_vma_prepare(struct vm_area_struct *);
void unlink_anon_vmas(struct vm_area_struct *);
int anon_vma_clone(struct vm_area_struct *, struct vm_area_struct *);
-void anon_vma_moveto_tail(struct vm_area_struct *);
int anon_vma_fork(struct vm_area_struct *, struct vm_area_struct *);
static inline void anon_vma_merge(struct vm_area_struct *vma,
@@ -161,16 +175,6 @@ int page_referenced(struct page *, int is_locked,
int page_referenced_one(struct page *, struct vm_area_struct *,
unsigned long address, unsigned int *mapcount, unsigned long *vm_flags);
-enum ttu_flags {
- TTU_UNMAP = 0, /* unmap mode */
- TTU_MIGRATION = 1, /* migration mode */
- TTU_MUNLOCK = 2, /* munlock mode */
- TTU_ACTION_MASK = 0xff,
-
- TTU_IGNORE_MLOCK = (1 << 8), /* ignore mlock */
- TTU_IGNORE_ACCESS = (1 << 9), /* don't age */
- TTU_IGNORE_HWPOISON = (1 << 10),/* corrupted page is recoverable */
-};
#define TTU_ACTION(x) ((x) & TTU_ACTION_MASK)
int try_to_unmap(struct page *, enum ttu_flags flags);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9c5612f0374..c2070e92a9d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -671,7 +671,6 @@ struct signal_struct {
struct rw_semaphore group_rwsem;
#endif
- int oom_adj; /* OOM kill score adjustment (bit shift) */
int oom_score_adj; /* OOM kill score adjustment */
int oom_score_adj_min; /* OOM kill score adjustment minimum value.
* Only settable by CAP_SYS_RESOURCE. */
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 388e7060141..68df9c17fbb 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -281,7 +281,7 @@ static inline int zone_reclaim(struct zone *z, gfp_t mask, unsigned int order)
}
#endif
-extern int page_evictable(struct page *page, struct vm_area_struct *vma);
+extern int page_evictable(struct page *page);
extern void check_move_unevictable_pages(struct page **, int nr_pages);
extern unsigned long scan_unevictable_pages;
diff --git a/include/linux/timerqueue.h b/include/linux/timerqueue.h
index 5088727478f..a520fd70a59 100644
--- a/include/linux/timerqueue.h
+++ b/include/linux/timerqueue.h
@@ -39,7 +39,7 @@ struct timerqueue_node *timerqueue_getnext(struct timerqueue_head *head)
static inline void timerqueue_init(struct timerqueue_node *node)
{
- rb_init_node(&node->node);
+ RB_CLEAR_NODE(&node->node);
}
static inline void timerqueue_init_head(struct timerqueue_head *head)
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 57f7b109151..3d311459437 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -52,7 +52,6 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
UNEVICTABLE_PGMUNLOCKED,
UNEVICTABLE_PGCLEARED, /* on COW, page truncate */
UNEVICTABLE_PGSTRANDED, /* unable to isolate on unlock */
- UNEVICTABLE_MLOCKFREED,
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
THP_FAULT_ALLOC,
THP_FAULT_FALLBACK,
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index ad2cfd53dad..92a86b2cce3 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -198,6 +198,8 @@ extern void __dec_zone_state(struct zone *, enum zone_stat_item);
void refresh_cpu_vm_stats(int);
void refresh_zone_stat_thresholds(void);
+void drain_zonestat(struct zone *zone, struct per_cpu_pageset *);
+
int calculate_pressure_threshold(struct zone *zone);
int calculate_normal_threshold(struct zone *zone);
void set_pgdat_percpu_threshold(pg_data_t *pgdat,
@@ -251,8 +253,18 @@ static inline void __dec_zone_page_state(struct page *page,
static inline void refresh_cpu_vm_stats(int cpu) { }
static inline void refresh_zone_stat_thresholds(void) { }
+static inline void drain_zonestat(struct zone *zone,
+ struct per_cpu_pageset *pset) { }
#endif /* CONFIG_SMP */
+static inline void __mod_zone_freepage_state(struct zone *zone, int nr_pages,
+ int migratetype)
+{
+ __mod_zone_page_state(zone, NR_FREE_PAGES, nr_pages);
+ if (is_migrate_cma(migratetype))
+ __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, nr_pages);
+}
+
extern const char * const vmstat_text[];
#endif /* _LINUX_VMSTAT_H */
diff --git a/include/trace/events/gfpflags.h b/include/trace/events/gfpflags.h
index d6fd8e5b14b..9391706e925 100644
--- a/include/trace/events/gfpflags.h
+++ b/include/trace/events/gfpflags.h
@@ -36,7 +36,6 @@
{(unsigned long)__GFP_RECLAIMABLE, "GFP_RECLAIMABLE"}, \
{(unsigned long)__GFP_MOVABLE, "GFP_MOVABLE"}, \
{(unsigned long)__GFP_NOTRACK, "GFP_NOTRACK"}, \
- {(unsigned long)__GFP_NO_KSWAPD, "GFP_NO_KSWAPD"}, \
{(unsigned long)__GFP_OTHER_NODE, "GFP_OTHER_NODE"} \
) : "GFP_NOWAIT"