summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-01-17 12:58:52 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2016-01-17 12:58:52 -0800
commit0cbeafb245ca568bc0765645aa64f0451b716657 (patch)
tree663c09ff5a62a1b2b66a17c4dfe0413603530a36 /include
parent58cf279acac3080ce03eeea5ca268210b3165fe1 (diff)
parent06b031de22d28ae76b2e5bfaf22c56a265a1e106 (diff)
Merge branch 'akpm' (patches from Andrew)
Merge second patch-bomb from Andrew Morton: - more MM stuff: - Kirill's page-flags rework - Kirill's now-allegedly-fixed THP rework - MADV_FREE implementation - DAX feature work (msync/fsync). This isn't quite complete but DAX is new and it's good enough and the guys have a handle on what needs to be done - I expect this to be wrapped in the next week or two. - some vsprintf maintenance work - various other misc bits * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (145 commits) printk: change recursion_bug type to bool lib/vsprintf: factor out %pN[F] handler as netdev_bits() lib/vsprintf: refactor duplicate code to special_hex_number() printk-formats.txt: remove unimplemented %pT printk: help pr_debug and pr_devel to optimize out arguments lib/test_printf.c: test dentry printing lib/test_printf.c: add test for large bitmaps lib/test_printf.c: account for kvasprintf tests lib/test_printf.c: add a few number() tests lib/test_printf.c: test precision quirks lib/test_printf.c: check for out-of-bound writes lib/test_printf.c: don't BUG lib/kasprintf.c: add sanity check to kvasprintf lib/vsprintf.c: warn about too large precisions and field widths lib/vsprintf.c: help gcc make number() smaller lib/vsprintf.c: expand field_width to 24 bits lib/vsprintf.c: eliminate potential race in string() lib/vsprintf.c: move string() below widen_string() lib/vsprintf.c: pull out padding code from dentry_name() printk: do cond_resched() between lines while outputting to consoles ...
Diffstat (limited to 'include')
-rw-r--r--include/asm-generic/pgtable.h15
-rw-r--r--include/asm-generic/sections.h65
-rw-r--r--include/linux/blkdev.h20
-rw-r--r--include/linux/console.h1
-rw-r--r--include/linux/err.h2
-rw-r--r--include/linux/huge_mm.h79
-rw-r--r--include/linux/hugetlb.h1
-rw-r--r--include/linux/io.h15
-rw-r--r--include/linux/kdev_t.h5
-rw-r--r--include/linux/kernel.h36
-rw-r--r--include/linux/kvm_host.h37
-rw-r--r--include/linux/kvm_types.h2
-rw-r--r--include/linux/list.h11
-rw-r--r--include/linux/memblock.h18
-rw-r--r--include/linux/memcontrol.h16
-rw-r--r--include/linux/memory_hotplug.h3
-rw-r--r--include/linux/memremap.h114
-rw-r--r--include/linux/mm.h199
-rw-r--r--include/linux/mm_types.h25
-rw-r--r--include/linux/mmdebug.h6
-rw-r--r--include/linux/page-flags.h286
-rw-r--r--include/linux/pagemap.h38
-rw-r--r--include/linux/pfn.h9
-rw-r--r--include/linux/pfn_t.h102
-rw-r--r--include/linux/poison.h6
-rw-r--r--include/linux/printk.h12
-rw-r--r--include/linux/rmap.h37
-rw-r--r--include/linux/swap.h4
-rw-r--r--include/linux/vm_event_item.h5
-rw-r--r--include/trace/events/huge_memory.h1
-rw-r--r--include/uapi/asm-generic/mman-common.h1
31 files changed, 798 insertions, 373 deletions
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 3a6803cb0ec9..0b3c0d39ef75 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -1,6 +1,8 @@
#ifndef _ASM_GENERIC_PGTABLE_H
#define _ASM_GENERIC_PGTABLE_H
+#include <linux/pfn.h>
+
#ifndef __ASSEMBLY__
#ifdef CONFIG_MMU
@@ -207,11 +209,6 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif
-#ifndef __HAVE_ARCH_PMDP_SPLITTING_FLUSH
-extern void pmdp_splitting_flush(struct vm_area_struct *vma,
- unsigned long address, pmd_t *pmdp);
-#endif
-
#ifndef pmdp_collapse_flush
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
extern pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
@@ -554,7 +551,7 @@ static inline int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
* by vm_insert_pfn().
*/
static inline int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
- unsigned long pfn)
+ pfn_t pfn)
{
return 0;
}
@@ -589,7 +586,7 @@ extern int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
unsigned long pfn, unsigned long addr,
unsigned long size);
extern int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
- unsigned long pfn);
+ pfn_t pfn);
extern int track_pfn_copy(struct vm_area_struct *vma);
extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
unsigned long size);
@@ -627,10 +624,6 @@ static inline int pmd_trans_huge(pmd_t pmd)
{
return 0;
}
-static inline int pmd_trans_splitting(pmd_t pmd)
-{
- return 0;
-}
#ifndef __HAVE_ARCH_PMD_WRITE
static inline int pmd_write(pmd_t pmd)
{
diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h
index b58fd667f87b..af0254c09424 100644
--- a/include/asm-generic/sections.h
+++ b/include/asm-generic/sections.h
@@ -4,6 +4,7 @@
/* References to section boundaries */
#include <linux/compiler.h>
+#include <linux/types.h>
/*
* Usage guidelines:
@@ -63,4 +64,68 @@ static inline int arch_is_kernel_data(unsigned long addr)
}
#endif
+/**
+ * memory_contains - checks if an object is contained within a memory region
+ * @begin: virtual address of the beginning of the memory region
+ * @end: virtual address of the end of the memory region
+ * @virt: virtual address of the memory object
+ * @size: size of the memory object
+ *
+ * Returns: true if the object specified by @virt and @size is entirely
+ * contained within the memory region defined by @begin and @end, false
+ * otherwise.
+ */
+static inline bool memory_contains(void *begin, void *end, void *virt,
+ size_t size)
+{
+ return virt >= begin && virt + size <= end;
+}
+
+/**
+ * memory_intersects - checks if the region occupied by an object intersects
+ * with another memory region
+ * @begin: virtual address of the beginning of the memory regien
+ * @end: virtual address of the end of the memory region
+ * @virt: virtual address of the memory object
+ * @size: size of the memory object
+ *
+ * Returns: true if an object's memory region, specified by @virt and @size,
+ * intersects with the region specified by @begin and @end, false otherwise.
+ */
+static inline bool memory_intersects(void *begin, void *end, void *virt,
+ size_t size)
+{
+ void *vend = virt + size;
+
+ return (virt >= begin && virt < end) || (vend >= begin && vend < end);
+}
+
+/**
+ * init_section_contains - checks if an object is contained within the init
+ * section
+ * @virt: virtual address of the memory object
+ * @size: size of the memory object
+ *
+ * Returns: true if the object specified by @virt and @size is entirely
+ * contained within the init section, false otherwise.
+ */
+static inline bool init_section_contains(void *virt, size_t size)
+{
+ return memory_contains(__init_begin, __init_end, virt, size);
+}
+
+/**
+ * init_section_intersects - checks if the region occupied by an object
+ * intersects with the init section
+ * @virt: virtual address of the memory object
+ * @size: size of the memory object
+ *
+ * Returns: true if an object's memory region, specified by @virt and @size,
+ * intersects with the init section, false otherwise.
+ */
+static inline bool init_section_intersects(void *virt, size_t size)
+{
+ return memory_intersects(__init_begin, __init_end, virt, size);
+}
+
#endif /* _ASM_GENERIC_SECTIONS_H_ */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c70e3588a48c..bfb64d672e19 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -15,6 +15,7 @@
#include <linux/backing-dev-defs.h>
#include <linux/wait.h>
#include <linux/mempool.h>
+#include <linux/pfn.h>
#include <linux/bio.h>
#include <linux/stringify.h>
#include <linux/gfp.h>
@@ -1617,6 +1618,20 @@ static inline bool integrity_req_gap_front_merge(struct request *req,
#endif /* CONFIG_BLK_DEV_INTEGRITY */
+/**
+ * struct blk_dax_ctl - control and output parameters for ->direct_access
+ * @sector: (input) offset relative to a block_device
+ * @addr: (output) kernel virtual address for @sector populated by driver
+ * @pfn: (output) page frame number for @addr populated by driver
+ * @size: (input) number of bytes requested
+ */
+struct blk_dax_ctl {
+ sector_t sector;
+ void __pmem *addr;
+ long size;
+ pfn_t pfn;
+};
+
struct block_device_operations {
int (*open) (struct block_device *, fmode_t);
void (*release) (struct gendisk *, fmode_t);
@@ -1624,7 +1639,7 @@ struct block_device_operations {
int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
long (*direct_access)(struct block_device *, sector_t, void __pmem **,
- unsigned long *pfn);
+ pfn_t *);
unsigned int (*check_events) (struct gendisk *disk,
unsigned int clearing);
/* ->media_changed() is DEPRECATED, use ->check_events() instead */
@@ -1643,8 +1658,7 @@ extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int,
extern int bdev_read_page(struct block_device *, sector_t, struct page *);
extern int bdev_write_page(struct block_device *, sector_t, struct page *,
struct writeback_control *);
-extern long bdev_direct_access(struct block_device *, sector_t,
- void __pmem **addr, unsigned long *pfn, long size);
+extern long bdev_direct_access(struct block_device *, struct blk_dax_ctl *);
#else /* CONFIG_BLOCK */
struct block_device;
diff --git a/include/linux/console.h b/include/linux/console.h
index bd194343c346..ea731af2451e 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -150,6 +150,7 @@ extern int console_trylock(void);
extern void console_unlock(void);
extern void console_conditional_schedule(void);
extern void console_unblank(void);
+extern void console_flush_on_panic(void);
extern struct tty_driver *console_device(int *);
extern void console_stop(struct console *);
extern void console_start(struct console *);
diff --git a/include/linux/err.h b/include/linux/err.h
index a729120644d5..56762ab41713 100644
--- a/include/linux/err.h
+++ b/include/linux/err.h
@@ -37,7 +37,7 @@ static inline bool __must_check IS_ERR(__force const void *ptr)
static inline bool __must_check IS_ERR_OR_NULL(__force const void *ptr)
{
- return !ptr || IS_ERR_VALUE((unsigned long)ptr);
+ return unlikely(!ptr) || IS_ERR_VALUE((unsigned long)ptr);
}
/**
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index ecb080d6ff42..cfe81e10bd54 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -19,13 +19,16 @@ extern struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
unsigned long addr,
pmd_t *pmd,
unsigned int flags);
+extern int madvise_free_huge_pmd(struct mmu_gather *tlb,
+ struct vm_area_struct *vma,
+ pmd_t *pmd, unsigned long addr, unsigned long next);
extern int zap_huge_pmd(struct mmu_gather *tlb,
struct vm_area_struct *vma,
pmd_t *pmd, unsigned long addr);
extern int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
unsigned long addr, unsigned long end,
unsigned char *vec);
-extern int move_huge_pmd(struct vm_area_struct *vma,
+extern bool move_huge_pmd(struct vm_area_struct *vma,
struct vm_area_struct *new_vma,
unsigned long old_addr,
unsigned long new_addr, unsigned long old_end,
@@ -34,8 +37,7 @@ extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
unsigned long addr, pgprot_t newprot,
int prot_numa);
int vmf_insert_pfn_pmd(struct vm_area_struct *, unsigned long addr, pmd_t *,
- unsigned long pfn, bool write);
-
+ pfn_t pfn, bool write);
enum transparent_hugepage_flag {
TRANSPARENT_HUGEPAGE_FLAG,
TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG,
@@ -48,21 +50,13 @@ enum transparent_hugepage_flag {
#endif
};
-enum page_check_address_pmd_flag {
- PAGE_CHECK_ADDRESS_PMD_FLAG,
- PAGE_CHECK_ADDRESS_PMD_NOTSPLITTING_FLAG,
- PAGE_CHECK_ADDRESS_PMD_SPLITTING_FLAG,
-};
-extern pmd_t *page_check_address_pmd(struct page *page,
- struct mm_struct *mm,
- unsigned long address,
- enum page_check_address_pmd_flag flag,
- spinlock_t **ptl);
-
#define HPAGE_PMD_ORDER (HPAGE_PMD_SHIFT-PAGE_SHIFT)
#define HPAGE_PMD_NR (1<<HPAGE_PMD_ORDER)
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
+ pmd_t *pmd, int flags);
+
#define HPAGE_PMD_SHIFT PMD_SHIFT
#define HPAGE_PMD_SIZE ((1UL) << HPAGE_PMD_SHIFT)
#define HPAGE_PMD_MASK (~(HPAGE_PMD_SIZE - 1))
@@ -95,30 +89,28 @@ extern bool is_vma_temporary_stack(struct vm_area_struct *vma);
#endif /* CONFIG_DEBUG_VM */
extern unsigned long transparent_hugepage_flags;
-extern int split_huge_page_to_list(struct page *page, struct list_head *list);
+
+extern void prep_transhuge_page(struct page *page);
+extern void free_transhuge_page(struct page *page);
+
+int split_huge_page_to_list(struct page *page, struct list_head *list);
static inline int split_huge_page(struct page *page)
{
return split_huge_page_to_list(page, NULL);
}
-extern void __split_huge_page_pmd(struct vm_area_struct *vma,
- unsigned long address, pmd_t *pmd);
-#define split_huge_page_pmd(__vma, __address, __pmd) \
+void deferred_split_huge_page(struct page *page);
+
+void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
+ unsigned long address);
+
+#define split_huge_pmd(__vma, __pmd, __address) \
do { \
pmd_t *____pmd = (__pmd); \
- if (unlikely(pmd_trans_huge(*____pmd))) \
- __split_huge_page_pmd(__vma, __address, \
- ____pmd); \
+ if (pmd_trans_huge(*____pmd) \
+ || pmd_devmap(*____pmd)) \
+ __split_huge_pmd(__vma, __pmd, __address); \
} while (0)
-#define wait_split_huge_page(__anon_vma, __pmd) \
- do { \
- pmd_t *____pmd = (__pmd); \
- anon_vma_lock_write(__anon_vma); \
- anon_vma_unlock_write(__anon_vma); \
- BUG_ON(pmd_trans_splitting(*____pmd) || \
- pmd_trans_huge(*____pmd)); \
- } while (0)
-extern void split_huge_page_pmd_mm(struct mm_struct *mm, unsigned long address,
- pmd_t *pmd);
+
#if HPAGE_PMD_ORDER >= MAX_ORDER
#error "hugepages can't be allocated by the buddy allocator"
#endif
@@ -128,17 +120,17 @@ extern void vma_adjust_trans_huge(struct vm_area_struct *vma,
unsigned long start,
unsigned long end,
long adjust_next);
-extern int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
+extern bool __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
spinlock_t **ptl);
/* mmap_sem must be held on entry */
-static inline int pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
+static inline bool pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
spinlock_t **ptl)
{
VM_BUG_ON_VMA(!rwsem_is_locked(&vma->vm_mm->mmap_sem), vma);
- if (pmd_trans_huge(*pmd))
+ if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd))
return __pmd_trans_huge_lock(pmd, vma, ptl);
else
- return 0;
+ return false;
}
static inline int hpage_nr_pages(struct page *page)
{
@@ -183,11 +175,8 @@ static inline int split_huge_page(struct page *page)
{
return 0;
}
-#define split_huge_page_pmd(__vma, __address, __pmd) \
- do { } while (0)
-#define wait_split_huge_page(__anon_vma, __pmd) \
- do { } while (0)
-#define split_huge_page_pmd_mm(__mm, __address, __pmd) \
+static inline void deferred_split_huge_page(struct page *page) {}
+#define split_huge_pmd(__vma, __pmd, __address) \
do { } while (0)
static inline int hugepage_madvise(struct vm_area_struct *vma,
unsigned long *vm_flags, int advice)
@@ -201,10 +190,10 @@ static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
long adjust_next)
{
}
-static inline int pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
+static inline bool pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
spinlock_t **ptl)
{
- return 0;
+ return false;
}
static inline int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
@@ -218,6 +207,12 @@ static inline bool is_huge_zero_page(struct page *page)
return false;
}
+
+static inline struct page *follow_devmap_pmd(struct vm_area_struct *vma,
+ unsigned long addr, pmd_t *pmd, int flags)
+{
+ return NULL;
+}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif /* _LINUX_HUGE_MM_H */
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index e76574d8f9b5..7d953c2542a8 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -8,6 +8,7 @@
#include <linux/cgroup.h>
#include <linux/list.h>
#include <linux/kref.h>
+#include <asm/pgtable.h>
struct ctl_table;
struct user_struct;
diff --git a/include/linux/io.h b/include/linux/io.h
index de64c1e53612..fffd88d7f426 100644
--- a/include/linux/io.h
+++ b/include/linux/io.h
@@ -89,21 +89,6 @@ void devm_memunmap(struct device *dev, void *addr);
void *__devm_memremap_pages(struct device *dev, struct resource *res);
-#ifdef CONFIG_ZONE_DEVICE
-void *devm_memremap_pages(struct device *dev, struct resource *res);
-#else
-static inline void *devm_memremap_pages(struct device *dev, struct resource *res)
-{
- /*
- * Fail attempts to call devm_memremap_pages() without
- * ZONE_DEVICE support enabled, this requires callers to fall
- * back to plain devm_memremap() based on config
- */
- WARN_ON_ONCE(1);
- return ERR_PTR(-ENXIO);
-}
-#endif
-
/*
* Some systems do not have legacy ISA devices.
* /dev/port is not a valid interface on these systems.
diff --git a/include/linux/kdev_t.h b/include/linux/kdev_t.h
index 052c7b32cc91..8e9e288b08c1 100644
--- a/include/linux/kdev_t.h
+++ b/include/linux/kdev_t.h
@@ -35,11 +35,6 @@ static inline dev_t old_decode_dev(u16 val)
return MKDEV((val >> 8) & 255, val & 255);
}
-static inline bool new_valid_dev(dev_t dev)
-{
- return 1;
-}
-
static inline u32 new_encode_dev(dev_t dev)
{
unsigned major = MAJOR(dev);
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 7311c3294e25..f31638c6e873 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -202,26 +202,26 @@ extern int _cond_resched(void);
/**
* abs - return absolute value of an argument
- * @x: the value. If it is unsigned type, it is converted to signed type first
- * (s64, long or int depending on its size).
+ * @x: the value. If it is unsigned type, it is converted to signed type first.
+ * char is treated as if it was signed (regardless of whether it really is)
+ * but the macro's return type is preserved as char.
*
- * Return: an absolute value of x. If x is 64-bit, macro's return type is s64,
- * otherwise it is signed long.
+ * Return: an absolute value of x.
*/
-#define abs(x) __builtin_choose_expr(sizeof(x) == sizeof(s64), ({ \
- s64 __x = (x); \
- (__x < 0) ? -__x : __x; \
- }), ({ \
- long ret; \
- if (sizeof(x) == sizeof(long)) { \
- long __x = (x); \
- ret = (__x < 0) ? -__x : __x; \
- } else { \
- int __x = (x); \
- ret = (__x < 0) ? -__x : __x; \
- } \
- ret; \
- }))
+#define abs(x) __abs_choose_expr(x, long long, \
+ __abs_choose_expr(x, long, \
+ __abs_choose_expr(x, int, \
+ __abs_choose_expr(x, short, \
+ __abs_choose_expr(x, char, \
+ __builtin_choose_expr( \
+ __builtin_types_compatible_p(typeof(x), char), \
+ (char)({ signed char __x = (x); __x<0?-__x:__x; }), \
+ ((void)0)))))))
+
+#define __abs_choose_expr(x, type, other) __builtin_choose_expr( \
+ __builtin_types_compatible_p(typeof(x), signed type) || \
+ __builtin_types_compatible_p(typeof(x), unsigned type), \
+ ({ signed type __x = (x); __x < 0 ? -__x : __x; }), other)
/**
* reciprocal_scale - "scale" a value into range [0, ep_ro)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index f707f74055c3..861f690aa791 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -66,7 +66,7 @@
* error pfns indicate that the gfn is in slot but faild to
* translate it to pfn on host.
*/
-static inline bool is_error_pfn(pfn_t pfn)
+static inline bool is_error_pfn(kvm_pfn_t pfn)
{
return !!(pfn & KVM_PFN_ERR_MASK);
}
@@ -76,13 +76,13 @@ static inline bool is_error_pfn(pfn_t pfn)
* translated to pfn - it is not in slot or failed to
* translate it to pfn.
*/
-static inline bool is_error_noslot_pfn(pfn_t pfn)
+static inline bool is_error_noslot_pfn(kvm_pfn_t pfn)
{
return !!(pfn & KVM_PFN_ERR_NOSLOT_MASK);
}
/* noslot pfn indicates that the gfn is not in slot. */
-static inline bool is_noslot_pfn(pfn_t pfn)
+static inline bool is_noslot_pfn(kvm_pfn_t pfn)
{
return pfn == KVM_PFN_NOSLOT;
}
@@ -591,19 +591,20 @@ void kvm_release_page_clean(struct page *page);
void kvm_release_page_dirty(struct page *page);
void kvm_set_page_accessed(struct page *page);
-pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn);
-pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
-pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
+kvm_pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn);
+kvm_pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
+kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
bool *writable);
-pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
-pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn);
-pfn_t __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn, bool atomic,
- bool *async, bool write_fault, bool *writable);
+kvm_pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
+kvm_pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn);
+kvm_pfn_t __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn,
+ bool atomic, bool *async, bool write_fault,
+ bool *writable);
-void kvm_release_pfn_clean(pfn_t pfn);
-void kvm_set_pfn_dirty(pfn_t pfn);
-void kvm_set_pfn_accessed(pfn_t pfn);
-void kvm_get_pfn(pfn_t pfn);
+void kvm_release_pfn_clean(kvm_pfn_t pfn);
+void kvm_set_pfn_dirty(kvm_pfn_t pfn);
+void kvm_set_pfn_accessed(kvm_pfn_t pfn);
+void kvm_get_pfn(kvm_pfn_t pfn);
int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
int len);
@@ -629,8 +630,8 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu);
struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn);
-pfn_t kvm_vcpu_gfn_to_pfn_atomic(struct kvm_vcpu *vcpu, gfn_t gfn);
-pfn_t kvm_vcpu_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn);
+kvm_pfn_t kvm_vcpu_gfn_to_pfn_atomic(struct kvm_vcpu *vcpu, gfn_t gfn);
+kvm_pfn_t kvm_vcpu_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn);
struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn);
unsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn);
unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *writable);
@@ -811,7 +812,7 @@ void kvm_arch_sync_events(struct kvm *kvm);
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
-bool kvm_is_reserved_pfn(pfn_t pfn);
+bool kvm_is_reserved_pfn(kvm_pfn_t pfn);
struct kvm_irq_ack_notifier {
struct hlist_node link;
@@ -965,7 +966,7 @@ static inline gfn_t gpa_to_gfn(gpa_t gpa)
return (gfn_t)(gpa >> PAGE_SHIFT);
}
-static inline hpa_t pfn_to_hpa(pfn_t pfn)
+static inline hpa_t pfn_to_hpa(kvm_pfn_t pfn)
{
return (hpa_t)pfn << PAGE_SHIFT;
}
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index 1b47a185c2f0..8bf259dae9f6 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -53,7 +53,7 @@ typedef unsigned long hva_t;
typedef u64 hpa_t;
typedef u64 hfn_t;
-typedef hfn_t pfn_t;
+typedef hfn_t kvm_pfn_t;
struct gfn_to_hva_cache {
u64 generation;
diff --git a/include/linux/list.h b/include/linux/list.h
index 5356f4d661a7..30cf4200ab40 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -113,6 +113,17 @@ extern void __list_del_entry(struct list_head *entry);
extern void list_del(struct list_head *entry);
#endif
+#ifdef CONFIG_DEBUG_LIST
+/*
+ * See devm_memremap_pages() which wants DEBUG_LIST=y to assert if one
+ * of the pages it allocates is ever passed to list_add()
+ */
+extern void list_force_poison(struct list_head *entry);
+#else
+/* fallback to the less strict LIST_POISON* definitions */
+#define list_force_poison list_del
+#endif
+
/**
* list_replace - replace old entry by new one
* @old : the element to be replaced
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 173fb44e22f1..3106ac1c895e 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -61,6 +61,14 @@ extern int memblock_debug;
extern bool movable_node_enabled;
#endif /* CONFIG_MOVABLE_NODE */
+#ifdef CONFIG_ARCH_DISCARD_MEMBLOCK
+#define __init_memblock __meminit
+#define __initdata_memblock __meminitdata
+#else
+#define __init_memblock
+#define __initdata_memblock
+#endif
+
#define memblock_dbg(fmt, ...) \
if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
@@ -166,7 +174,7 @@ static inline bool memblock_is_hotpluggable(struct memblock_region *m)
return m->flags & MEMBLOCK_HOTPLUG;
}
-static inline bool movable_node_is_enabled(void)
+static inline bool __init_memblock movable_node_is_enabled(void)
{
return movable_node_enabled;
}
@@ -405,14 +413,6 @@ static inline unsigned long memblock_region_reserved_end_pfn(const struct memblo
for (idx = 0; idx < memblock_type->cnt; \
idx++,rgn = &memblock_type->regions[idx])
-#ifdef CONFIG_ARCH_DISCARD_MEMBLOCK
-#define __init_memblock __meminit
-#define __initdata_memblock __meminitdata
-#else
-#define __init_memblock
-#define __initdata_memblock
-#endif
-
#ifdef CONFIG_MEMTEST
extern void early_memtest(phys_addr_t start, phys_addr_t end);
#else
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 2292468f2a30..189f04d4d2ec 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -280,10 +280,12 @@ static inline void mem_cgroup_events(struct mem_cgroup *memcg,
bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg);
int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
- gfp_t gfp_mask, struct mem_cgroup **memcgp);
+ gfp_t gfp_mask, struct mem_cgroup **memcgp,
+ bool compound);
void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg,
- bool lrucare);
-void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg);
+ bool lrucare, bool compound);
+void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg,
+ bool compound);
void mem_cgroup_uncharge(struct page *page);
void mem_cgroup_uncharge_list(struct list_head *page_list);
@@ -515,7 +517,8 @@ static inline bool mem_cgroup_low(struct mem_cgroup *root,
static inline int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
gfp_t gfp_mask,
- struct mem_cgroup **memcgp)
+ struct mem_cgroup **memcgp,
+ bool compound)
{
*memcgp = NULL;
return 0;
@@ -523,12 +526,13 @@ static inline int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
static inline void mem_cgroup_commit_charge(struct page *page,
struct mem_cgroup *memcg,
- bool lrucare)
+ bool lrucare, bool compound)
{
}
static inline void mem_cgroup_cancel_charge(struct page *page,
- struct mem_cgroup *memcg)
+ struct mem_cgroup *memcg,
+ bool compound)
{
}
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 2ea574ff9714..43405992d027 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -275,7 +275,8 @@ extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
extern bool is_memblock_offlined(struct memory_block *mem);
extern void remove_memory(int nid, u64 start, u64 size);
extern int sparse_add_one_section(struct zone *zone, unsigned long start_pfn);
-extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms);
+extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms,
+ unsigned long map_offset);
extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map,
unsigned long pnum);
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
new file mode 100644
index 000000000000..bcaa634139a9
--- /dev/null
+++ b/include/linux/memremap.h
@@ -0,0 +1,114 @@
+#ifndef _LINUX_MEMREMAP_H_
+#define _LINUX_MEMREMAP_H_
+#include <linux/mm.h>
+#include <linux/ioport.h>
+#include <linux/percpu-refcount.h>
+
+struct resource;
+struct device;
+
+/**
+ * struct vmem_altmap - pre-allocated storage for vmemmap_populate
+ * @base_pfn: base of the entire dev_pagemap mapping
+ * @reserve: pages mapped, but reserved for driver use (relative to @base)
+ * @free: free pages set aside in the mapping for memmap storage
+ * @align: pages reserved to meet allocation alignments
+ * @alloc: track pages consumed, private to vmemmap_populate()
+ */
+struct vmem_altmap {
+ const unsigned long base_pfn;
+ const unsigned long reserve;
+ unsigned long free;
+ unsigned long align;
+ unsigned long alloc;
+};
+
+unsigned long vmem_altmap_offset(struct vmem_altmap *altmap);
+void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns);
+
+#if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_ZONE_DEVICE)
+struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start);
+#else
+static inline struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start)
+{
+ return NULL;
+}
+#endif
+
+/**
+ * struct dev_pagemap - metadata for ZONE_DEVICE mappings
+ * @altmap: pre-allocated/reserved memory for vmemmap allocations
+ * @res: physical address range covered by @ref
+ * @ref: reference count that pins the devm_memremap_pages() mapping
+ * @dev: host device of the mapping for debug
+ */
+struct dev_pagemap {
+ struct vmem_altmap *altmap;
+ const struct resource *res;
+ struct percpu_ref *ref;
+ struct device *dev;
+};
+
+#ifdef CONFIG_ZONE_DEVICE
+void *devm_memremap_pages(struct device *dev, struct resource *res,
+ struct percpu_ref *ref, struct vmem_altmap *altmap);
+struct dev_pagemap *find_dev_pagemap(resource_size_t phys);
+#else
+static inline void *devm_memremap_pages(struct device *dev,
+ struct resource *res, struct percpu_ref *ref,
+ struct vmem_altmap *altmap)
+{
+ /*
+ * Fail attempts to call devm_memremap_pages() without
+ * ZONE_DEVICE support enabled, this requires callers to fall
+ * back to plain devm_memremap() based on config
+ */
+ WARN_ON_ONCE(1);
+ return ERR_PTR(-ENXIO);
+}
+
+static inline struct dev_pagemap *find_dev_pagemap(resource_size_t phys)
+{
+ return NULL;
+}
+#endif
+
+/**
+ * get_dev_pagemap() - take a new live reference on the dev_pagemap for @pfn
+ * @pfn: page frame number to lookup page_map
+ * @pgmap: optional known pgmap that already has a reference
+ *
+ * @pgmap allows the overhead of a lookup to be bypassed when @pfn lands in the
+ * same mapping.
+ */
+static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
+ struct dev_pagemap *pgmap)
+{
+ const struct resource *res = pgmap ? pgmap->res : NULL;
+ resource_size_t phys = PFN_PHYS(pfn);
+
+ /*
+ * In the cached case we're already holding a live reference so
+ * we can simply do a blind increment
+ */
+ if (res && phys >= res->start && phys <= res->end) {
+ percpu_ref_get(pgmap->ref);
+ return pgmap;
+ }
+
+ /* fall back to slow path lookup */
+ rcu_read_lock();
+ pgmap = find_dev_pagemap(phys);
+ if (pgmap && !percpu_ref_tryget_live(pgmap->ref))
+ pgmap = NULL;
+ rcu_read_unlock();
+
+ return pgmap;
+}
+
+static inline void put_dev_pagemap(struct dev_pagemap *pgmap)
+{
+ if (pgmap)
+ percpu_ref_put(pgmap->ref);
+}
+#endif /* _LINUX_MEMREMAP_H_ */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 839d9e9a1c38..f1cd22f2df1a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -16,6 +16,7 @@
#include <linux/mm_types.h>
#include <linux/range.h>
#include <linux/pfn.h>
+#include <linux/percpu-refcount.h>
#include <linux/bit_spinlock.h>
#include <linux/shrinker.h>
#include <linux/resource.h>
@@ -329,6 +330,13 @@ struct inode;
#define page_private(page) ((page)->private)
#define set_page_private(page, v) ((page)->private = (v))
+#if !defined(__HAVE_ARCH_PTE_DEVMAP) || !defined(CONFIG_TRANSPARENT_HUGEPAGE)
+static inline int pmd_devmap(pmd_t pmd)
+{
+ return 0;
+}
+#endif
+
/*
* FIXME: take this include out, include page-flags.h in
* files which need it (119 of them)
@@ -410,39 +418,17 @@ static inline int is_vmalloc_or_module_addr(const void *x)
extern void kvfree(const void *addr);
-static inline void compound_lock(struct page *page)
+static inline atomic_t *compound_mapcount_ptr(struct page *page)
{
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- VM_BUG_ON_PAGE(PageSlab(page), page);
- bit_spin_lock(PG_compound_lock, &page->flags);
-#endif
+ return &page[1].compound_mapcount;
}
-static inline void compound_unlock(struct page *page)
+static inline int compound_mapcount(struct page *page)
{
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- VM_BUG_ON_PAGE(PageSlab(page), page);
- bit_spin_unlock(PG_compound_lock, &page->flags);
-#endif
-}
-
-static inline unsigned long compound_lock_irqsave(struct page *page)
-{
- unsigned long uninitialized_var(flags);
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- local_irq_save(flags);
- compound_lock(page);
-#endif
- return flags;
-}
-
-static inline void compound_unlock_irqrestore(struct page *page,
- unsigned long flags)
-{
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- compound_unlock(page);
- local_irq_restore(flags);
-#endif
+ if (!PageCompound(page))
+ return 0;
+ page = compound_head(page);
+ return atomic_read(compound_mapcount_ptr(page)) + 1;
}
/*
@@ -455,61 +441,29 @@ static inline void page_mapcount_reset(struct page *page)
atomic_set(&(page)->_mapcount, -1);
}
+int __page_mapcount(struct page *page);
+
static inline int page_mapcount(struct page *page)
{
VM_BUG_ON_PAGE(PageSlab(page), page);
- return atomic_read(&page->_mapcount) + 1;
-}
-static inline int page_count(struct page *page)
-{
- return atomic_read(&compound_head(page)->_count);
-}
-
-static inline bool __compound_tail_refcounted(struct page *page)
-{
- return PageAnon(page) && !PageSlab(page) && !PageHeadHuge(page);
-}
-
-/*
- * This takes a head page as parameter and tells if the
- * tail page reference counting can be skipped.
- *
- * For this to be safe, PageSlab and PageHeadHuge must remain true on
- * any given page where they return true here, until all tail pins
- * have been released.
- */
-static inline bool compound_tail_refcounted(struct page *page)
-{
- VM_BUG_ON_PAGE(!PageHead(page), page);
- return __compound_tail_refcounted(page);
+ if (unlikely(PageCompound(page)))
+ return __page_mapcount(page);
+ return atomic_read(&page->_mapcount) + 1;
}
-static inline void get_huge_page_tail(struct page *page)
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+int total_mapcount(struct page *page);
+#else
+static inline int total_mapcount(struct page *page)
{
- /*
- * __split_huge_page_refcount() cannot run from under us.
- */
- VM_BUG_ON_PAGE(!PageTail(page), page);
- VM_BUG_ON_PAGE(page_mapcount(page) < 0, page);
- VM_BUG_ON_PAGE(atomic_read(&page->_count) != 0, page);
- if (compound_tail_refcounted(compound_head(page)))
- atomic_inc(&page->_mapcount);
+ return page_mapcount(page);
}
+#endif
-extern bool __get_page_tail(struct page *page);
-
-static inline void get_page(struct page *page)
+static inline int page_count(struct page *page)
{
- if (unlikely(PageTail(page)))
- if (likely(__get_page_tail(page)))
- return;
- /*
- * Getting a normal page or the head of a compound page
- * requires to already have an elevated page->_count.
- */
- VM_BUG_ON_PAGE(atomic_read(&page->_count) <= 0, page);
- atomic_inc(&page->_count);
+ return atomic_read(&compound_head(page)->_count);
}
static inline struct page *virt_to_head_page(const void *x)
@@ -528,7 +482,8 @@ static inline void init_page_count(struct page *page)
atomic_set(&page->_count, 1);
}
-void put_page(struct page *page);
+void __put_page(struct page *page);
+
void put_pages_list(struct list_head *pages);
void split_page(struct page *page, unsigned int order);
@@ -548,6 +503,9 @@ enum compound_dtor_id {
#ifdef CONFIG_HUGETLB_PAGE
HUGETLB_PAGE_DTOR,
#endif
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ TRANSHUGE_PAGE_DTOR,
+#endif
NR_COMPOUND_DTORS,
};
extern compound_page_dtor * const compound_page_dtors[];
@@ -577,6 +535,8 @@ static inline void set_compound_order(struct page *page, unsigned int order)
page[1].compound_order = order;
}
+void free_compound_page(struct page *page);
+
#ifdef CONFIG_MMU
/*
* Do pte_mkwrite, but only if the vma says VM_WRITE. We do this when
@@ -704,6 +664,51 @@ static inline enum zone_type page_zonenum(const struct page *page)
return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK;
}
+#ifdef CONFIG_ZONE_DEVICE
+void get_zone_device_page(struct page *page);
+void put_zone_device_page(struct page *page);
+static inline bool is_zone_device_page(const struct page *page)
+{
+ return page_zonenum(page) == ZONE_DEVICE;
+}
+#else
+static inline void get_zone_device_page(struct page *page)
+{
+}
+static inline void put_zone_device_page(struct page *page)
+{
+}
+static inline bool is_zone_device_page(const struct page *page)
+{
+ return false;
+}
+#endif
+
+static inline void get_page(struct page *page)
+{
+ page = compound_head(page);
+ /*
+ * Getting a normal page or the head of a compound page
+ * requires to already have an elevated page->_count.
+ */
+ VM_BUG_ON_PAGE(atomic_read(&page->_count) <= 0, page);
+ atomic_inc(&page->_count);
+
+ if (unlikely(is_zone_device_page(page)))
+ get_zone_device_page(page);
+}
+
+static inline void put_page(struct page *page)
+{
+ page = compound_head(page);
+
+ if (put_page_testzero(page))
+ __put_page(page);
+
+ if (unlikely(is_zone_device_page(page)))
+ put_zone_device_page(page);
+}
+
#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
#define SECTION_IN_PAGE_FLAGS
#endif
@@ -993,10 +998,21 @@ static inline pgoff_t page_file_index(struct page *page)
/*
* Return true if this page is mapped into pagetables.
+ * For compound page it returns true if any subpage of compound page is mapped.
*/
-static inline int page_mapped(struct page *page)
-{
- return atomic_read(&(page)->_mapcount) >= 0;
+static inline bool page_mapped(struct page *page)
+{
+ int i;
+ if (likely(!PageCompound(page)))
+ return atomic_read(&page->_mapcount) >= 0;
+ page = compound_head(page);
+ if (atomic_read(compound_mapcount_ptr(page)) >= 0)
+ return true;
+ for (i = 0; i < hpage_nr_pages(page); i++) {
+ if (atomic_read(&page[i]._mapcount) >= 0)
+ return true;
+ }
+ return false;
}
/*
@@ -1084,7 +1100,7 @@ static inline bool shmem_mapping(struct address_space *mapping)
}
#endif
-extern int can_do_mlock(void);
+extern bool can_do_mlock(void);
extern int user_shm_lock(size_t, struct user_struct *);
extern void user_shm_unlock(size_t, struct user_struct *);
@@ -1178,7 +1194,8 @@ int invalidate_inode_page(struct page *page);
extern int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, unsigned int flags);
extern int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
- unsigned long address, unsigned int fault_flags);
+ unsigned long address, unsigned int fault_flags,
+ bool *unlocked);
#else
static inline int handle_mm_fault(struct mm_struct *mm,
struct vm_area_struct *vma, unsigned long address,
@@ -1190,7 +1207,7 @@ static inline int handle_mm_fault(struct mm_struct *mm,
}
static inline int fixup_user_fault(struct task_struct *tsk,
struct mm_struct *mm, unsigned long address,
- unsigned int fault_flags)
+ unsigned int fault_flags, bool *unlocked)
{
/* should never happen if there's no MMU */
BUG();
@@ -1444,6 +1461,13 @@ static inline void sync_mm_rss(struct mm_struct *mm)
}
#endif
+#ifndef __HAVE_ARCH_PTE_DEVMAP
+static inline int pte_devmap(pte_t pte)
+{
+ return 0;
+}
+#endif
+
int vma_wants_writenotify(struct vm_area_struct *vma);
extern pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr,
@@ -2114,7 +2138,7 @@ int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *);
int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
unsigned long pfn);
int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
- unsigned long pfn);
+ pfn_t pfn);
int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len);
@@ -2224,7 +2248,14 @@ pud_t *vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node);
pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node);
pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node);
void *vmemmap_alloc_block(unsigned long size, int node);
-void *vmemmap_alloc_block_buf(unsigned long size, int node);
+struct vmem_altmap;
+void *__vmemmap_alloc_block_buf(unsigned long size, int node,
+ struct vmem_altmap *altmap);
+static inline void *vmemmap_alloc_block_buf(unsigned long size, int node)
+{
+ return __vmemmap_alloc_block_buf(size, node, NULL);
+}
+
void vmemmap_verify(pte_t *, int, unsigned long, unsigned long);
int vmemmap_populate_basepages(unsigned long start, unsigned long end,
int node);
@@ -2246,7 +2277,7 @@ extern int memory_failure(unsigned long pfn, int trapno, int flags);
extern void memory_failure_queue(unsigned long pfn, int trapno, int flags);
extern int unpoison_memory(unsigned long pfn);
extern int get_hwpoison_page(struct page *page);
-extern void put_hwpoison_page(struct page *page);
+#define put_hwpoison_page(page) put_page(page)
extern int sysctl_memory_failure_early_kill;
extern int sysctl_memory_failure_recovery;
extern void shake_page(struct page *p, int access);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 6bc9a0ce2253..d3ebb9d21a53 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -54,6 +54,8 @@ struct page {
* see PAGE_MAPPING_ANON below.
*/
void *s_mem; /* slab first object */
+ atomic_t compound_mapcount; /* first tail page */
+ /* page_deferred_list().next -- second tail page */
};
/* Second double word */
@@ -61,6 +63,7 @@ struct page {
union {
pgoff_t index; /* Our offset within mapping. */
void *freelist; /* sl[aou]b first free object */
+ /* page_deferred_list().prev -- second tail page */
};
union {
@@ -81,20 +84,9 @@ struct page {
union {
/*
- * Count of ptes mapped in
- * mms, to show when page is
- * mapped & limit reverse map
- * searches.
- *
- * Used also for tail pages
- * refcounting instead of
- * _count. Tail pages cannot
- * be mapped and keeping the
- * tail page _count zero at
- * all times guarantees
- * get_page_unless_zero() will
- * never succeed on tail
- * pages.
+ * Count of ptes mapped in mms, to show
+ * when page is mapped & limit reverse
+ * map searches.
*/
atomic_t _mapcount;
@@ -124,6 +116,11 @@ struct page {
* Can be used as a generic list
* by the page owner.
*/
+ struct dev_pagemap *pgmap; /* ZONE_DEVICE pages are never on an
+ * lru or handled by a slab
+ * allocator, this points to the
+ * hosting device page map.
+ */
struct { /* slub per cpu partial pages */
struct page *next; /* Next partial slab */
#ifdef CONFIG_64BIT
diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
index 772362adf471..053824b0a412 100644
--- a/include/linux/mmdebug.h
+++ b/include/linux/mmdebug.h
@@ -56,4 +56,10 @@ void dump_mm(const struct mm_struct *mm);
#define VIRTUAL_BUG_ON(cond) do { } while (0)
#endif
+#ifdef CONFIG_DEBUG_VM_PGFLAGS
+#define VM_BUG_ON_PGFLAGS(cond, page) VM_BUG_ON_PAGE(cond, page)
+#else
+#define VM_BUG_ON_PGFLAGS(cond, page) BUILD_BUG_ON_INVALID(cond)
+#endif
+
#endif
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index bb53c7b86315..19724e6ebd26 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -101,9 +101,6 @@ enum pageflags {
#ifdef CONFIG_MEMORY_FAILURE
PG_hwpoison, /* hardware poisoned page. Don't touch */
#endif
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- PG_compound_lock,
-#endif
#if defined(CONFIG_IDLE_PAGE_TRACKING) && defined(CONFIG_64BIT)
PG_young,
PG_idle,
@@ -129,53 +126,104 @@ enum pageflags {
/* SLOB */
PG_slob_free = PG_private,
+
+ /* Compound pages. Stored in first tail page's flags */
+ PG_double_map = PG_private_2,
};
#ifndef __GENERATING_BOUNDS_H
+struct page; /* forward declaration */
+
+static inline struct page *compound_head(struct page *page)
+{
+ unsigned long head = READ_ONCE(page->compound_head);
+
+ if (unlikely(head & 1))
+ return (struct page *) (head - 1);
+ return page;
+}
+
+static inline int PageTail(struct page *page)
+{
+ return READ_ONCE(page->compound_head) & 1;
+}
+
+static inline int PageCompound(struct page *page)
+{
+ return test_bit(PG_head, &page->flags) || PageTail(page);
+}
+
+/*
+ * Page flags policies wrt compound pages
+ *
+ * PF_ANY:
+ * the page flag is relevant for small, head and tail pages.
+ *
+ * PF_HEAD:
+ * for compound page all operations related to the page flag applied to
+ * head page.
+ *
+ * PF_NO_TAIL:
+ * modifications of the page flag must be done on small or head pages,
+ * checks can be done on tail pages too.
+ *
+ * PF_NO_COMPOUND:
+ * the page flag is not relevant for compound pages.
+ */
+#define PF_ANY(page, enforce) page
+#define PF_HEAD(page, enforce) compound_head(page)
+#define PF_NO_TAIL(page, enforce) ({ \
+ VM_BUG_ON_PGFLAGS(enforce && PageTail(page), page); \
+ compound_head(page);})
+#define PF_NO_COMPOUND(page, enforce) ({ \
+ VM_BUG_ON_PGFLAGS(enforce && PageCompound(page), page); \
+ page;})
+
/*
* Macros to create function definitions for page flags
*/
-#define TESTPAGEFLAG(uname, lname) \
-static inline int Page##uname(const struct page *page) \
- { return test_bit(PG_##lname, &page->flags); }
+#define TESTPAGEFLAG(uname, lname, policy) \
+static inline int Page##uname(struct page *page) \
+ { return test_bit(PG_##lname, &policy(page, 0)->flags); }
-#define SETPAGEFLAG(uname, lname) \
+#define SETPAGEFLAG(uname, lname, policy) \
static inline void SetPage##uname(struct page *page) \
- { set_bit(PG_##lname, &page->flags); }
+ { set_bit(PG_##lname, &policy(page, 1)->flags); }
-#define CLEARPAGEFLAG(uname, lname) \
+#define CLEARPAGEFLAG(uname, lname, policy) \
static inline void ClearPage##uname(struct page *page) \
- { clear_bit(PG_##lname, &page->flags); }
+ { clear_bit(PG_##lname, &policy(page, 1)->flags); }
-#define __SETPAGEFLAG(uname, lname) \
+#define __SETPAGEFLAG(uname, lname, policy) \
static inline void __SetPage##uname(struct page *page) \
- { __set_bit(PG_##lname, &page->flags); }
+ { __set_bit(PG_##lname, &policy(page, 1)->flags); }
-#define __CLEARPAGEFLAG(uname, lname) \
+#define __CLEARPAGEFLAG(uname, lname, policy) \
static inline void __ClearPage##uname(struct page *page) \
- { __clear_bit(PG_##lname, &page->flags); }
+ { __clear_bit(PG_##lname, &policy(page, 1)->flags); }
-#define TESTSETFLAG(uname, lname) \
+#define TESTSETFLAG(uname, lname, policy) \
static inline int TestSetPage##uname(struct page *page) \
- { return test_and_set_bit(PG_##lname, &page->flags); }
+ { return test_and_set_bit(PG_##lname, &policy(page, 1)->flags); }
-#define TESTCLEARFLAG(uname, lname) \
+#define TESTCLEARFLAG(uname, lname, policy) \
static inline int TestClearPage##uname(struct page *page) \
- { return test_and_clear_bit(PG_##lname, &page->flags); }
-
-#define __TESTCLEARFLAG(uname, lname) \
-static inline int __TestClearPage##uname(struct page *page) \
- { return __test_and_clear_bit(PG_##lname, &page->flags); }
+ { return test_and_clear_bit(PG_##lname, &policy(page, 1)->flags); }
-#define PAGEFLAG(uname, lname) TESTPAGEFLAG(uname, lname) \
- SETPAGEFLAG(uname, lname) CLEARPAGEFLAG(uname, lname)
+#define PAGEFLAG(uname, lname, policy) \
+ TESTPAGEFLAG(uname, lname, policy) \
+ SETPAGEFLAG(uname, lname, policy) \
+ CLEARPAGEFLAG(uname, lname, policy)
-#define __PAGEFLAG(uname, lname) TESTPAGEFLAG(uname, lname) \
- __SETPAGEFLAG(uname, lname) __CLEARPAGEFLAG(uname, lname)
+#define __PAGEFLAG(uname, lname, policy) \
+ TESTPAGEFLAG(uname, lname, policy) \
+ __SETPAGEFLAG(uname, lname, policy) \
+ __CLEARPAGEFLAG(uname, lname, policy)
-#define TESTSCFLAG(uname, lname) \
- TESTSETFLAG(uname, lname) TESTCLEARFLAG(uname, lname)
+#define TESTSCFLAG(uname, lname, policy) \
+ TESTSETFLAG(uname, lname, policy) \
+ TESTCLEARFLAG(uname, lname, policy)
#define TESTPAGEFLAG_FALSE(uname) \
static inline int Page##uname(const struct page *page) { return 0; }
@@ -195,56 +243,62 @@ static inline int TestSetPage##uname(struct page *page) { return 0; }
#define TESTCLEARFLAG_FALSE(uname) \
static inline int TestClearPage##uname(struct page *page) { return 0; }
-#define __TESTCLEARFLAG_FALSE(uname) \
-static inline int __TestClearPage##uname(struct page *page) { return 0; }
-
#define PAGEFLAG_FALSE(uname) TESTPAGEFLAG_FALSE(uname) \
SETPAGEFLAG_NOOP(uname) CLEARPAGEFLAG_NOOP(uname)
#define TESTSCFLAG_FALSE(uname) \
TESTSETFLAG_FALSE(uname) TESTCLEARFLAG_FALSE(uname)
-struct page; /* forward declaration */
-
-TESTPAGEFLAG(Locked, locked)
-PAGEFLAG(Error, error) TESTCLEARFLAG(Error, error)
-PAGEFLAG(Referenced, referenced) TESTCLEARFLAG(Referenced, referenced)
- __SETPAGEFLAG(Referenced, referenced)
-PAGEFLAG(Dirty, dirty) TESTSCFLAG(Dirty, dirty) __CLEARPAGEFLAG(Dirty, dirty)
-PAGEFLAG(LRU, lru) __CLEARPAGEFLAG(LRU, lru)
-PAGEFLAG(Active, active) __CLEARPAGEFLAG(Active, active)
- TESTCLEARFLAG(Active, active)
-__PAGEFLAG(Slab, slab)
-PAGEFLAG(Checked, checked) /* Used by some filesystems */
-PAGEFLAG(Pinned, pinned) TESTSCFLAG(Pinned, pinned) /* Xen */
-PAGEFLAG(SavePinned, savepinned); /* Xen */
-PAGEFLAG(Foreign, foreign); /* Xen */
-PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved)
-PAGEFLAG(SwapBacked, swapbacked) __CLEARPAGEFLAG(SwapBacked, swapbacked)
- __SETPAGEFLAG(SwapBacked, swapbacked)
-
-__PAGEFLAG(SlobFree, slob_free)
+__PAGEFLAG(Locked, locked, PF_NO_TAIL)
+PAGEFLAG(Error, error, PF_NO_COMPOUND) TESTCLEARFLAG(Error, error, PF_NO_COMPOUND)
+PAGEFLAG(Referenced, referenced, PF_HEAD)
+ TESTCLEARFLAG(Referenced, referenced, PF_HEAD)
+ __SETPAGEFLAG(Referenced, referenced, PF_HEAD)
+PAGEFLAG(Dirty, dirty, PF_HEAD) TESTSCFLAG(Dirty, dirty, PF_HEAD)
+ __CLEARPAGEFLAG(Dirty, dirty, PF_HEAD)
+PAGEFLAG(LRU, lru, PF_HEAD) __CLEARPAGEFLAG(LRU, lru, PF_HEAD)
+PAGEFLAG(Active, active, PF_HEAD) __CLEARPAGEFLAG(Active, active, PF_HEAD)
+ TESTCLEARFLAG(Active, active, PF_HEAD)
+__PAGEFLAG(Slab, slab, PF_NO_TAIL)
+__PAGEFLAG(SlobFree, slob_free, PF_NO_TAIL)
+PAGEFLAG(Checked, checked, PF_NO_COMPOUND) /* Used by some filesystems */
+
+/* Xen */
+PAGEFLAG(Pinned, pinned, PF_NO_COMPOUND)
+ TESTSCFLAG(Pinned, pinned, PF_NO_COMPOUND)
+PAGEFLAG(SavePinned, savepinned, PF_NO_COMPOUND);
+PAGEFLAG(Foreign, foreign, PF_NO_COMPOUND);
+
+PAGEFLAG(Reserved, reserved, PF_NO_COMPOUND)
+ __CLEARPAGEFLAG(Reserved, reserved, PF_NO_COMPOUND)
+PAGEFLAG(SwapBacked, swapbacked, PF_NO_TAIL)
+ __CLEARPAGEFLAG(SwapBacked, swapbacked, PF_NO_TAIL)
+ __SETPAGEFLAG(SwapBacked, swapbacked, PF_NO_TAIL)
/*
* Private page markings that may be used by the filesystem that owns the page
* for its own purposes.
* - PG_private and PG_private_2 cause releasepage() and co to be invoked
*/
-PAGEFLAG(Private, private) __SETPAGEFLAG(Private, private)
- __CLEARPAGEFLAG(Private, private)
-PAGEFLAG(Private2, private_2) TESTSCFLAG(Private2, private_2)
-PAGEFLAG(OwnerPriv1, owner_priv_1) TESTCLEARFLAG(OwnerPriv1, owner_priv_1)
+PAGEFLAG(Private, private, PF_ANY) __SETPAGEFLAG(Private, private, PF_ANY)
+ __CLEARPAGEFLAG(Private, private, PF_ANY)
+PAGEFLAG(Private2, private_2, PF_ANY) TESTSCFLAG(Private2, private_2, PF_ANY)
+PAGEFLAG(OwnerPriv1, owner_priv_1, PF_ANY)
+ TESTCLEARFLAG(OwnerPriv1, owner_priv_1, PF_ANY)
/*
* Only test-and-set exist for PG_writeback. The unconditional operators are
* risky: they bypass page accounting.
*/
-TESTPAGEFLAG(Writeback, writeback) TESTSCFLAG(Writeback, writeback)
-PAGEFLAG(MappedToDisk, mappedtodisk)
+TESTPAGEFLAG(Writeback, writeback, PF_NO_COMPOUND)
+ TESTSCFLAG(Writeback, writeback, PF_NO_COMPOUND)
+PAGEFLAG(MappedToDisk, mappedtodisk, PF_NO_COMPOUND)
/* PG_readahead is only used for reads; PG_reclaim is only for writes */
-PAGEFLAG(Reclaim, reclaim) TESTCLEARFLAG(Reclaim, reclaim)
-PAGEFLAG(Readahead, reclaim) TESTCLEARFLAG(Readahead, reclaim)
+PAGEFLAG(Reclaim, reclaim, PF_NO_COMPOUND)
+ TESTCLEARFLAG(Reclaim, reclaim, PF_NO_COMPOUND)
+PAGEFLAG(Readahead, reclaim, PF_NO_COMPOUND)
+ TESTCLEARFLAG(Readahead, reclaim, PF_NO_COMPOUND)
#ifdef CONFIG_HIGHMEM
/*
@@ -257,31 +311,33 @@ PAGEFLAG_FALSE(HighMem)
#endif
#ifdef CONFIG_SWAP
-PAGEFLAG(SwapCache, swapcache)
+PAGEFLAG(SwapCache, swapcache, PF_NO_COMPOUND)
#else
PAGEFLAG_FALSE(SwapCache)
#endif
-PAGEFLAG(Unevictable, unevictable) __CLEARPAGEFLAG(Unevictable, unevictable)
- TESTCLEARFLAG(Unevictable, unevictable)
+PAGEFLAG(Unevictable, unevictable, PF_HEAD)
+ __CLEARPAGEFLAG(Unevictable, unevictable, PF_HEAD)
+ TESTCLEARFLAG(Unevictable, unevictable, PF_HEAD)
#ifdef CONFIG_MMU
-PAGEFLAG(Mlocked, mlocked) __CLEARPAGEFLAG(Mlocked, mlocked)
- TESTSCFLAG(Mlocked, mlocked) __TESTCLEARFLAG(Mlocked, mlocked)
+PAGEFLAG(Mlocked, mlocked, PF_NO_TAIL)
+ __CLEARPAGEFLAG(Mlocked, mlocked, PF_NO_TAIL)
+ TESTSCFLAG(Mlocked, mlocked, PF_NO_TAIL)
#else
PAGEFLAG_FALSE(Mlocked) __CLEARPAGEFLAG_NOOP(Mlocked)
- TESTSCFLAG_FALSE(Mlocked) __TESTCLEARFLAG_FALSE(Mlocked)
+ TESTSCFLAG_FALSE(Mlocked)
#endif
#ifdef CONFIG_ARCH_USES_PG_UNCACHED
-PAGEFLAG(Uncached, uncached)
+PAGEFLAG(Uncached, uncached, PF_NO_COMPOUND)
#else
PAGEFLAG_FALSE(Uncached)
#endif
#ifdef CONFIG_MEMORY_FAILURE
-PAGEFLAG(HWPoison, hwpoison)
-TESTSCFLAG(HWPoison, hwpoison)
+PAGEFLAG(HWPoison, hwpoison, PF_ANY)
+TESTSCFLAG(HWPoison, hwpoison, PF_ANY)
#define __PG_HWPOISON (1UL << PG_hwpoison)
#else
PAGEFLAG_FALSE(HWPoison)
@@ -289,10 +345,10 @@ PAGEFLAG_FALSE(HWPoison)
#endif
#if defined(CONFIG_IDLE_PAGE_TRACKING) && defined(CONFIG_64BIT)
-TESTPAGEFLAG(Young, young)
-SETPAGEFLAG(Young, young)
-TESTCLEARFLAG(Young, young)
-PAGEFLAG(Idle, idle)
+TESTPAGEFLAG(Young, young, PF_ANY)
+SETPAGEFLAG(Young, young, PF_ANY)
+TESTCLEARFLAG(Young, young, PF_ANY)
+PAGEFLAG(Idle, idle, PF_ANY)
#endif
/*
@@ -317,6 +373,7 @@ PAGEFLAG(Idle, idle)
static inline int PageAnon(struct page *page)
{
+ page = compound_head(page);
return ((unsigned long)page->mapping & PAGE_MAPPING_ANON) != 0;
}
@@ -329,6 +386,7 @@ static inline int PageAnon(struct page *page)
*/
static inline int PageKsm(struct page *page)
{
+ page = compound_head(page);
return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) ==
(PAGE_MAPPING_ANON | PAGE_MAPPING_KSM);
}
@@ -340,8 +398,9 @@ u64 stable_page_flags(struct page *page);
static inline int PageUptodate(struct page *page)
{
- int ret = test_bit(PG_uptodate, &(page)->flags);
-
+ int ret;
+ page = compound_head(page);
+ ret = test_bit(PG_uptodate, &(page)->flags);
/*
* Must ensure that the data we read out of the page is loaded
* _after_ we've loaded page->flags to check for PageUptodate.
@@ -358,22 +417,24 @@ static inline int PageUptodate(struct page *page)
static inline void __SetPageUptodate(struct page *page)
{
+ VM_BUG_ON_PAGE(PageTail(page), page);
smp_wmb();
- __set_bit(PG_uptodate, &(page)->flags);
+ __set_bit(PG_uptodate, &page->flags);
}
static inline void SetPageUptodate(struct page *page)
{
+ VM_BUG_ON_PAGE(PageTail(page), page);
/*
* Memory barrier must be issued before setting the PG_uptodate bit,
* so that all previous stores issued in order to bring the page
* uptodate are actually visible before PageUptodate becomes true.
*/
smp_wmb();
- set_bit(PG_uptodate, &(page)->flags);
+ set_bit(PG_uptodate, &page->flags);
}
-CLEARPAGEFLAG(Uptodate, uptodate)
+CLEARPAGEFLAG(Uptodate, uptodate, PF_NO_TAIL)
int test_clear_page_writeback(struct page *page);
int __test_set_page_writeback(struct page *page, bool keep_write);
@@ -393,12 +454,7 @@ static inline void set_page_writeback_keepwrite(struct page *page)
test_set_page_writeback_keepwrite(page);
}
-__PAGEFLAG(Head, head) CLEARPAGEFLAG(Head, head)
-
-static inline int PageTail(struct page *page)
-{
- return READ_ONCE(page->compound_head) & 1;
-}
+__PAGEFLAG(Head, head, PF_ANY) CLEARPAGEFLAG(Head, head, PF_ANY)
static inline void set_compound_head(struct page *page, struct page *head)
{
@@ -410,20 +466,6 @@ static inline void clear_compound_head(struct page *page)
WRITE_ONCE(page->compound_head, 0);
}
-static inline struct page *compound_head(struct page *page)
-{
- unsigned long head = READ_ONCE(page->compound_head);
-
- if (unlikely(head & 1))
- return (struct page *) (head - 1);
- return page;
-}
-
-static inline int PageCompound(struct page *page)
-{
- return PageHead(page) || PageTail(page);
-
-}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static inline void ClearPageCompound(struct page *page)
{
@@ -484,22 +526,43 @@ static inline int PageTransTail(struct page *page)
return PageTail(page);
}
-#else
-
-static inline int PageTransHuge(struct page *page)
+/*
+ * PageDoubleMap indicates that the compound page is mapped with PTEs as well
+ * as PMDs.
+ *
+ * This is required for optimization of rmap operations for THP: we can postpone
+ * per small page mapcount accounting (and its overhead from atomic operations)
+ * until the first PMD split.
+ *
+ * For the page PageDoubleMap means ->_mapcount in all sub-pages is offset up
+ * by one. This reference will go away with last compound_mapcount.
+ *
+ * See also __split_huge_pmd_locked() and page_remove_anon_compound_rmap().
+ */
+static inline int PageDoubleMap(struct page *page)
{
- return 0;
+ return PageHead(page) && test_bit(PG_double_map, &page[1].flags);
}
-static inline int PageTransCompound(struct page *page)
+static inline int TestSetPageDoubleMap(struct page *page)
{
- return 0;
+ VM_BUG_ON_PAGE(!PageHead(page), page);
+ return test_and_set_bit(PG_double_map, &page[1].flags);
}
-static inline int PageTransTail(struct page *page)
+static inline int TestClearPageDoubleMap(struct page *page)
{
- return 0;
+ VM_BUG_ON_PAGE(!PageHead(page), page);
+ return test_and_clear_bit(PG_double_map, &page[1].flags);
}
+
+#else
+TESTPAGEFLAG_FALSE(TransHuge)
+TESTPAGEFLAG_FALSE(TransCompound)
+TESTPAGEFLAG_FALSE(TransTail)
+TESTPAGEFLAG_FALSE(DoubleMap)
+ TESTSETFLAG_FALSE(DoubleMap)
+ TESTCLEARFLAG_FALSE(DoubleMap)
#endif
/*
@@ -583,12 +646,6 @@ static inline void ClearPageSlabPfmemalloc(struct page *page)
#define __PG_MLOCKED 0
#endif
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-#define __PG_COMPOUND_LOCK (1 << PG_compound_lock)
-#else
-#define __PG_COMPOUND_LOCK 0
-#endif
-
/*
* Flags checked when a page is freed. Pages being freed should not have
* these flags set. It they are, there is a problem.
@@ -598,8 +655,7 @@ static inline void ClearPageSlabPfmemalloc(struct page *page)
1 << PG_private | 1 << PG_private_2 | \
1 << PG_writeback | 1 << PG_reserved | \
1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \
- 1 << PG_unevictable | __PG_MLOCKED | \
- __PG_COMPOUND_LOCK)
+ 1 << PG_unevictable | __PG_MLOCKED)
/*
* Flags checked when a page is prepped for return by the page allocator.
@@ -626,6 +682,10 @@ static inline int page_has_private(struct page *page)
return !!(page->flags & PAGE_FLAGS_PRIVATE);
}
+#undef PF_ANY
+#undef PF_HEAD
+#undef PF_NO_TAIL
+#undef PF_NO_COMPOUND
#endif /* !__GENERATING_BOUNDS_H */
#endif /* PAGE_FLAGS_H */
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 26eabf5ec718..4d08b6c33557 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -394,10 +394,21 @@ static inline struct page *read_mapping_page(struct address_space *mapping,
*/
static inline pgoff_t page_to_pgoff(struct page *page)
{
+ pgoff_t pgoff;
+
if (unlikely(PageHeadHuge(page)))
return page->index << compound_order(page);
- else
+
+ if (likely(!PageTransTail(page)))
return page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+
+ /*
+ * We don't initialize ->index for tail pages: calculate based on
+ * head page
+ */
+ pgoff = compound_head(page)->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+ pgoff += page - compound_head(page);
+ return pgoff;
}
/*
@@ -433,18 +444,9 @@ extern int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
unsigned int flags);
extern void unlock_page(struct page *page);
-static inline void __set_page_locked(struct page *page)
-{
- __set_bit(PG_locked, &page->flags);
-}
-
-static inline void __clear_page_locked(struct page *page)
-{
- __clear_bit(PG_locked, &page->flags);
-}
-
static inline int trylock_page(struct page *page)
{
+ page = compound_head(page);
return (likely(!test_and_set_bit_lock(PG_locked, &page->flags)));
}
@@ -497,9 +499,9 @@ extern int wait_on_page_bit_killable_timeout(struct page *page,
static inline int wait_on_page_locked_killable(struct page *page)
{
- if (PageLocked(page))
- return wait_on_page_bit_killable(page, PG_locked);
- return 0;
+ if (!PageLocked(page))
+ return 0;
+ return wait_on_page_bit_killable(compound_head(page), PG_locked);
}
extern wait_queue_head_t *page_waitqueue(struct page *page);
@@ -518,7 +520,7 @@ static inline void wake_up_page(struct page *page, int bit)
static inline void wait_on_page_locked(struct page *page)
{
if (PageLocked(page))
- wait_on_page_bit(page, PG_locked);
+ wait_on_page_bit(compound_head(page), PG_locked);
}
/*
@@ -664,17 +666,17 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask);
/*
* Like add_to_page_cache_locked, but used to add newly allocated pages:
- * the page is new, so we can just run __set_page_locked() against it.
+ * the page is new, so we can just run __SetPageLocked() against it.
*/
static inline int add_to_page_cache(struct page *page,
struct address_space *mapping, pgoff_t offset, gfp_t gfp_mask)
{
int error;
- __set_page_locked(page);
+ __SetPageLocked(page);
error = add_to_page_cache_locked(page, mapping, offset, gfp_mask);
if (unlikely(error))
- __clear_page_locked(page);
+ __ClearPageLocked(page);
return error;
}
diff --git a/include/linux/pfn.h b/include/linux/pfn.h
index 97f3e88aead4..2d8e49711b63 100644
--- a/include/linux/pfn.h
+++ b/include/linux/pfn.h
@@ -3,6 +3,15 @@
#ifndef __ASSEMBLY__
#include <linux/types.h>
+
+/*
+ * pfn_t: encapsulates a page-frame number that is optionally backed
+ * by memmap (struct page). Whether a pfn_t has a 'struct page'
+ * backing is indicated by flags in the high bits of the value.
+ */
+typedef struct {
+ unsigned long val;
+} pfn_t;
#endif
#define PFN_ALIGN(x) (((unsigned long)(x) + (PAGE_SIZE - 1)) & PAGE_MASK)
diff --git a/include/linux/pfn_t.h b/include/linux/pfn_t.h
new file mode 100644
index 000000000000..0703b5360d31
--- /dev/null
+++ b/include/linux/pfn_t.h
@@ -0,0 +1,102 @@
+#ifndef _LINUX_PFN_T_H_
+#define _LINUX_PFN_T_H_
+#include <linux/mm.h>
+
+/*
+ * PFN_FLAGS_MASK - mask of all the possible valid pfn_t flags
+ * PFN_SG_CHAIN - pfn is a pointer to the next scatterlist entry
+ * PFN_SG_LAST - pfn references a page and is the last scatterlist entry
+ * PFN_DEV - pfn is not covered by system memmap by default
+ * PFN_MAP - pfn has a dynamic page mapping established by a device driver
+ */
+#define PFN_FLAGS_MASK (((unsigned long) ~PAGE_MASK) \
+ << (BITS_PER_LONG - PAGE_SHIFT))
+#define PFN_SG_CHAIN (1UL << (BITS_PER_LONG - 1))
+#define PFN_SG_LAST (1UL << (BITS_PER_LONG - 2))
+#define PFN_DEV (1UL << (BITS_PER_LONG - 3))
+#define PFN_MAP (1UL << (BITS_PER_LONG - 4))
+
+static inline pfn_t __pfn_to_pfn_t(unsigned long pfn, unsigned long flags)
+{
+ pfn_t pfn_t = { .val = pfn | (flags & PFN_FLAGS_MASK), };
+
+ return pfn_t;
+}
+
+/* a default pfn to pfn_t conversion assumes that @pfn is pfn_valid() */
+static inline pfn_t pfn_to_pfn_t(unsigned long pfn)
+{
+ return __pfn_to_pfn_t(pfn, 0);
+}
+
+extern pfn_t phys_to_pfn_t(dma_addr_t addr, unsigned long flags);
+
+static inline bool pfn_t_has_page(pfn_t pfn)
+{
+ return (pfn.val & PFN_MAP) == PFN_MAP || (pfn.val & PFN_DEV) == 0;
+}
+
+static inline unsigned long pfn_t_to_pfn(pfn_t pfn)
+{
+ return pfn.val & ~PFN_FLAGS_MASK;
+}
+
+static inline struct page *pfn_t_to_page(pfn_t pfn)
+{
+ if (pfn_t_has_page(pfn))
+ return pfn_to_page(pfn_t_to_pfn(pfn));
+ return NULL;
+}
+
+static inline dma_addr_t pfn_t_to_phys(pfn_t pfn)
+{
+ return PFN_PHYS(pfn_t_to_pfn(pfn));
+}
+
+static inline void *pfn_t_to_virt(pfn_t pfn)
+{
+ if (pfn_t_has_page(pfn))
+ return __va(pfn_t_to_phys(pfn));
+ return NULL;
+}
+
+static inline pfn_t page_to_pfn_t(struct page *page)
+{
+ return pfn_to_pfn_t(page_to_pfn(page));
+}
+
+static inline int pfn_t_valid(pfn_t pfn)
+{
+ return pfn_valid(pfn_t_to_pfn(pfn));
+}
+
+#ifdef CONFIG_MMU
+static inline pte_t pfn_t_pte(pfn_t pfn, pgprot_t pgprot)
+{
+ return pfn_pte(pfn_t_to_pfn(pfn), pgprot);
+}
+#endif
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline pmd_t pfn_t_pmd(pfn_t pfn, pgprot_t pgprot)
+{
+ return pfn_pmd(pfn_t_to_pfn(pfn), pgprot);
+}
+#endif
+
+#ifdef __HAVE_ARCH_PTE_DEVMAP
+static inline bool pfn_t_devmap(pfn_t pfn)
+{
+ const unsigned long flags = PFN_DEV|PFN_MAP;
+
+ return (pfn.val & flags) == flags;
+}
+#else
+static inline bool pfn_t_devmap(pfn_t pfn)
+{
+ return false;
+}
+pte_t pte_mkdevmap(pte_t pte);
+pmd_t pmd_mkdevmap(pmd_t pmd);
+#endif
+#endif /* _LINUX_PFN_T_H_ */
diff --git a/include/linux/poison.h b/include/linux/poison.h
index 317e16de09e5..4a27153574e2 100644
--- a/include/linux/poison.h
+++ b/include/linux/poison.h
@@ -27,11 +27,15 @@
* Magic number "tsta" to indicate a static timer initializer
* for the object debugging code.
*/
-#define TIMER_ENTRY_STATIC ((void *) 0x74737461)
+#define TIMER_ENTRY_STATIC ((void *) 0x300 + POISON_POINTER_DELTA)
/********** mm/debug-pagealloc.c **********/
#define PAGE_POISON 0xaa
+/********** mm/page_alloc.c ************/
+
+#define TAIL_MAPPING ((void *) 0x400 + POISON_POINTER_DELTA)
+
/********** mm/slab.c **********/
/*
* Magic nums for obj red zoning.
diff --git a/include/linux/printk.h b/include/linux/printk.h
index 9729565c25ff..9ccbdf2c1453 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -106,13 +106,13 @@ struct va_format {
/*
* Dummy printk for disabled debugging statements to use whilst maintaining
- * gcc's format and side-effect checking.
+ * gcc's format checking.
*/
-static inline __printf(1, 2)
-int no_printk(const char *fmt, ...)
-{
- return 0;
-}
+#define no_printk(fmt, ...) \
+do { \
+ if (0) \
+ printk(fmt, ##__VA_ARGS__); \
+} while (0)
#ifdef CONFIG_EARLY_PRINTK
extern asmlinkage __printf(1, 2)
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 29446aeef36e..bdf597c4f0be 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -85,6 +85,7 @@ enum ttu_flags {
TTU_UNMAP = 1, /* unmap mode */
TTU_MIGRATION = 2, /* migration mode */
TTU_MUNLOCK = 4, /* munlock mode */
+ TTU_LZFREE = 8, /* lazy free mode */
TTU_IGNORE_MLOCK = (1 << 8), /* ignore mlock */
TTU_IGNORE_ACCESS = (1 << 9), /* don't age */
@@ -161,25 +162,31 @@ static inline void anon_vma_merge(struct vm_area_struct *vma,
struct anon_vma *page_get_anon_vma(struct page *page);
+/* bitflags for do_page_add_anon_rmap() */
+#define RMAP_EXCLUSIVE 0x01
+#define RMAP_COMPOUND 0x02
+
/*
* rmap interfaces called when adding or removing pte of page
*/
void page_move_anon_rmap(struct page *, struct vm_area_struct *, unsigned long);
-void page_add_anon_rmap(struct page *, struct vm_area_struct *, unsigned long);
+void page_add_anon_rmap(struct page *, struct vm_area_struct *,
+ unsigned long, bool);
void do_page_add_anon_rmap(struct page *, struct vm_area_struct *,
unsigned long, int);
-void page_add_new_anon_rmap(struct page *, struct vm_area_struct *, unsigned long);
+void page_add_new_anon_rmap(struct page *, struct vm_area_struct *,
+ unsigned long, bool);
void page_add_file_rmap(struct page *);
-void page_remove_rmap(struct page *);
+void page_remove_rmap(struct page *, bool);
void hugepage_add_anon_rmap(struct page *, struct vm_area_struct *,
unsigned long);
void hugepage_add_new_anon_rmap(struct page *, struct vm_area_struct *,
unsigned long);
-static inline void page_dup_rmap(struct page *page)
+static inline void page_dup_rmap(struct page *page, bool compound)
{
- atomic_inc(&page->_mapcount);
+ atomic_inc(compound ? compound_mapcount_ptr(page) : &page->_mapcount);
}
/*
@@ -210,6 +217,25 @@ static inline pte_t *page_check_address(struct page *page, struct mm_struct *mm,
}
/*
+ * Used by idle page tracking to check if a page was referenced via page
+ * tables.
+ */
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+bool page_check_address_transhuge(struct page *page, struct mm_struct *mm,
+ unsigned long address, pmd_t **pmdp,
+ pte_t **ptep, spinlock_t **ptlp);
+#else
+static inline bool page_check_address_transhuge(struct page *page,
+ struct mm_struct *mm, unsigned long address,
+ pmd_t **pmdp, pte_t **ptep, spinlock_t **ptlp)
+{
+ *ptep = page_check_address(page, mm, address, ptlp, 0);
+ *pmdp = NULL;
+ return !!*ptep;
+}
+#endif
+
+/*
* Used by swapoff to help locate where page is expected in vma.
*/
unsigned long page_address_in_vma(struct page *, struct vm_area_struct *);
@@ -286,5 +312,6 @@ static inline int page_mkclean(struct page *page)
#define SWAP_AGAIN 1
#define SWAP_FAIL 2
#define SWAP_MLOCK 3
+#define SWAP_LZFREE 4
#endif /* _LINUX_RMAP_H */
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 066bd21765ad..414e101cd061 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -307,6 +307,7 @@ extern void lru_add_drain_cpu(int cpu);
extern void lru_add_drain_all(void);
extern void rotate_reclaimable_page(struct page *page);
extern void deactivate_file_page(struct page *page);
+extern void deactivate_page(struct page *page);
extern void swap_setup(void);
extern void add_page_to_unevictable_list(struct page *page);
@@ -538,7 +539,8 @@ static inline int swp_swapcount(swp_entry_t entry)
return 0;
}
-#define reuse_swap_page(page) (page_mapcount(page) == 1)
+#define reuse_swap_page(page) \
+ (!PageTransCompound(page) && page_mapcount(page) == 1)
static inline int try_to_free_swap(struct page *page)
{
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index e623d392db0c..67c1dbd19c6d 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -25,6 +25,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
FOR_ALL_ZONES(PGALLOC),
PGFREE, PGACTIVATE, PGDEACTIVATE,
PGFAULT, PGMAJFAULT,
+ PGLAZYFREED,
FOR_ALL_ZONES(PGREFILL),
FOR_ALL_ZONES(PGSTEAL_KSWAPD),
FOR_ALL_ZONES(PGSTEAL_DIRECT),
@@ -68,7 +69,9 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
THP_FAULT_FALLBACK,
THP_COLLAPSE_ALLOC,
THP_COLLAPSE_ALLOC_FAILED,
- THP_SPLIT,
+ THP_SPLIT_PAGE,
+ THP_SPLIT_PAGE_FAILED,
+ THP_SPLIT_PMD,
THP_ZERO_PAGE_ALLOC,
THP_ZERO_PAGE_ALLOC_FAILED,
#endif
diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h
index 97d635cabac8..0f803d2783e3 100644
--- a/include/trace/events/huge_memory.h
+++ b/include/trace/events/huge_memory.h
@@ -22,6 +22,7 @@
EM( SCAN_PAGE_LRU, "page_not_in_lru") \
EM( SCAN_PAGE_LOCK, "page_locked") \
EM( SCAN_PAGE_ANON, "page_not_anon") \
+ EM( SCAN_PAGE_COMPOUND, "page_compound") \
EM( SCAN_ANY_PROCESS, "no_process_for_page") \
EM( SCAN_VMA_NULL, "vma_null") \
EM( SCAN_VMA_CHECK, "vma_check_failed") \
diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h
index a74dd84bbb6d..58274382a616 100644
--- a/include/uapi/asm-generic/mman-common.h
+++ b/include/uapi/asm-generic/mman-common.h
@@ -41,6 +41,7 @@
#define MADV_DONTNEED 4 /* don't need these pages */
/* common parameters: try to keep these consistent across architectures */
+#define MADV_FREE 8 /* free pages only if memory pressure */
#define MADV_REMOVE 9 /* remove these pages & resources */
#define MADV_DONTFORK 10 /* don't inherit across fork */
#define MADV_DOFORK 11 /* do inherit across fork */