summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/linux/backing-dev.h3
-rw-r--r--include/linux/binfmts.h2
-rw-r--r--include/linux/cmdline-parser.h43
-rw-r--r--include/linux/compat.h1
-rw-r--r--include/linux/crash_dump.h9
-rw-r--r--include/linux/genalloc.h4
-rw-r--r--include/linux/hugetlb.h25
-rw-r--r--include/linux/init.h1
-rw-r--r--include/linux/ipc_namespace.h2
-rw-r--r--include/linux/kprobes.h34
-rw-r--r--include/linux/lz4.h8
-rw-r--r--include/linux/memblock.h2
-rw-r--r--include/linux/mempolicy.h11
-rw-r--r--include/linux/migrate.h5
-rw-r--r--include/linux/mm.h32
-rw-r--r--include/linux/mm_inline.h1
-rw-r--r--include/linux/mmzone.h2
-rw-r--r--include/linux/radix-tree.h1
-rw-r--r--include/linux/ramfs.h2
-rw-r--r--include/linux/rbtree.h22
-rw-r--r--include/linux/sched.h8
-rw-r--r--include/linux/smp.h79
-rw-r--r--include/linux/swap.h52
-rw-r--r--include/linux/syscalls.h1
-rw-r--r--include/linux/vm_event_item.h6
-rw-r--r--include/linux/vmstat.h4
-rw-r--r--include/linux/writeback.h2
-rw-r--r--include/trace/events/kmem.h10
28 files changed, 252 insertions, 120 deletions
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index c3881553f7d1..5f66d519a726 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -243,6 +243,8 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
* BDI_CAP_EXEC_MAP: Can be mapped for execution
*
* BDI_CAP_SWAP_BACKED: Count shmem/tmpfs objects as swap-backed.
+ *
+ * BDI_CAP_STRICTLIMIT: Keep number of dirty pages below bdi threshold.
*/
#define BDI_CAP_NO_ACCT_DIRTY 0x00000001
#define BDI_CAP_NO_WRITEBACK 0x00000002
@@ -254,6 +256,7 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
#define BDI_CAP_NO_ACCT_WB 0x00000080
#define BDI_CAP_SWAP_BACKED 0x00000100
#define BDI_CAP_STABLE_WRITES 0x00000200
+#define BDI_CAP_STRICTLIMIT 0x00000400
#define BDI_CAP_VMFLAGS \
(BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP)
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 70cf138690e9..e8112ae50531 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -31,7 +31,7 @@ struct linux_binprm {
#ifdef __alpha__
unsigned int taso:1;
#endif
- unsigned int recursion_depth;
+ unsigned int recursion_depth; /* only for search_binary_handler() */
struct file * file;
struct cred *cred; /* new credentials */
int unsafe; /* how unsafe this exec is (mask of LSM_UNSAFE_*) */
diff --git a/include/linux/cmdline-parser.h b/include/linux/cmdline-parser.h
new file mode 100644
index 000000000000..98e892ef6d5a
--- /dev/null
+++ b/include/linux/cmdline-parser.h
@@ -0,0 +1,43 @@
+/*
+ * Parsing command line, get the partitions information.
+ *
+ * Written by Cai Zhiyong <caizhiyong@huawei.com>
+ *
+ */
+#ifndef CMDLINEPARSEH
+#define CMDLINEPARSEH
+
+#include <linux/blkdev.h>
+
+/* partition flags */
+#define PF_RDONLY 0x01 /* Device is read only */
+#define PF_POWERUP_LOCK 0x02 /* Always locked after reset */
+
+struct cmdline_subpart {
+ char name[BDEVNAME_SIZE]; /* partition name, such as 'rootfs' */
+ sector_t from;
+ sector_t size;
+ int flags;
+ struct cmdline_subpart *next_subpart;
+};
+
+struct cmdline_parts {
+ char name[BDEVNAME_SIZE]; /* block device, such as 'mmcblk0' */
+ unsigned int nr_subparts;
+ struct cmdline_subpart *subpart;
+ struct cmdline_parts *next_parts;
+};
+
+void cmdline_parts_free(struct cmdline_parts **parts);
+
+int cmdline_parts_parse(struct cmdline_parts **parts, const char *cmdline);
+
+struct cmdline_parts *cmdline_parts_find(struct cmdline_parts *parts,
+ const char *bdev);
+
+void cmdline_parts_set(struct cmdline_parts *parts, sector_t disk_size,
+ int slot,
+ int (*add_part)(int, struct cmdline_subpart *, void *),
+ void *param);
+
+#endif /* CMDLINEPARSEH */
diff --git a/include/linux/compat.h b/include/linux/compat.h
index ec1aee4aec9c..345da00a86e0 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -43,6 +43,7 @@
#define COMPAT_SYSCALL_DEFINEx(x, name, ...) \
asmlinkage long compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\
static inline long C_SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__));\
+ asmlinkage long compat_SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__));\
asmlinkage long compat_SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__))\
{ \
return C_SYSC##name(__MAP(x,__SC_DELOUSE,__VA_ARGS__)); \
diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h
index 37e4f8da7cdf..fe68a5a98583 100644
--- a/include/linux/crash_dump.h
+++ b/include/linux/crash_dump.h
@@ -12,6 +12,15 @@
extern unsigned long long elfcorehdr_addr;
extern unsigned long long elfcorehdr_size;
+extern int __weak elfcorehdr_alloc(unsigned long long *addr,
+ unsigned long long *size);
+extern void __weak elfcorehdr_free(unsigned long long addr);
+extern ssize_t __weak elfcorehdr_read(char *buf, size_t count, u64 *ppos);
+extern ssize_t __weak elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos);
+extern int __weak remap_oldmem_pfn_range(struct vm_area_struct *vma,
+ unsigned long from, unsigned long pfn,
+ unsigned long size, pgprot_t prot);
+
extern ssize_t copy_oldmem_page(unsigned long, char *, size_t,
unsigned long, int);
diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h
index 661d374aeb2d..f8d41cb1cbe0 100644
--- a/include/linux/genalloc.h
+++ b/include/linux/genalloc.h
@@ -66,8 +66,8 @@ struct gen_pool_chunk {
struct list_head next_chunk; /* next chunk in pool */
atomic_t avail;
phys_addr_t phys_addr; /* physical starting address of memory chunk */
- unsigned long start_addr; /* starting address of memory chunk */
- unsigned long end_addr; /* ending address of memory chunk */
+ unsigned long start_addr; /* start address of memory chunk */
+ unsigned long end_addr; /* end address of memory chunk (inclusive) */
unsigned long bits[0]; /* bitmap for allocating memory chunk */
};
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index c2b1801a160b..0393270466c3 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -66,6 +66,9 @@ int hugetlb_reserve_pages(struct inode *inode, long from, long to,
vm_flags_t vm_flags);
void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed);
int dequeue_hwpoisoned_huge_page(struct page *page);
+bool isolate_huge_page(struct page *page, struct list_head *list);
+void putback_active_hugepage(struct page *page);
+bool is_hugepage_active(struct page *page);
void copy_huge_page(struct page *dst, struct page *src);
#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
@@ -134,6 +137,9 @@ static inline int dequeue_hwpoisoned_huge_page(struct page *page)
return 0;
}
+#define isolate_huge_page(p, l) false
+#define putback_active_hugepage(p) do {} while (0)
+#define is_hugepage_active(x) false
static inline void copy_huge_page(struct page *dst, struct page *src)
{
}
@@ -261,6 +267,8 @@ struct huge_bootmem_page {
};
struct page *alloc_huge_page_node(struct hstate *h, int nid);
+struct page *alloc_huge_page_noerr(struct vm_area_struct *vma,
+ unsigned long addr, int avoid_reserve);
/* arch callback */
int __init alloc_bootmem_huge_page(struct hstate *h);
@@ -371,9 +379,23 @@ static inline pgoff_t basepage_index(struct page *page)
return __basepage_index(page);
}
+extern void dissolve_free_huge_pages(unsigned long start_pfn,
+ unsigned long end_pfn);
+int pmd_huge_support(void);
+/*
+ * Currently hugepage migration is enabled only for pmd-based hugepage.
+ * This function will be updated when hugepage migration is more widely
+ * supported.
+ */
+static inline int hugepage_migration_support(struct hstate *h)
+{
+ return pmd_huge_support() && (huge_page_shift(h) == PMD_SHIFT);
+}
+
#else /* CONFIG_HUGETLB_PAGE */
struct hstate {};
#define alloc_huge_page_node(h, nid) NULL
+#define alloc_huge_page_noerr(v, a, r) NULL
#define alloc_bootmem_huge_page(h) NULL
#define hstate_file(f) NULL
#define hstate_sizelog(s) NULL
@@ -396,6 +418,9 @@ static inline pgoff_t basepage_index(struct page *page)
{
return page->index;
}
+#define dissolve_free_huge_pages(s, e) do {} while (0)
+#define pmd_huge_support() 0
+#define hugepage_migration_support(h) 0
#endif /* CONFIG_HUGETLB_PAGE */
#endif /* _LINUX_HUGETLB_H */
diff --git a/include/linux/init.h b/include/linux/init.h
index e73f2b708525..f1c27a71d03c 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -153,6 +153,7 @@ extern unsigned int reset_devices;
void setup_arch(char **);
void prepare_namespace(void);
void __init load_default_modules(void);
+int __init init_rootfs(void);
extern void (*late_time_init)(void);
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index c4d870b0d5e6..19c19a5eee29 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -22,7 +22,7 @@ struct ipc_ids {
int in_use;
unsigned short seq;
unsigned short seq_max;
- struct rw_semaphore rw_mutex;
+ struct rw_semaphore rwsem;
struct idr ipcs_idr;
int next_id;
};
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index ca1d27a0d6a6..925eaf28fca9 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -264,10 +264,36 @@ extern void arch_arm_kprobe(struct kprobe *p);
extern void arch_disarm_kprobe(struct kprobe *p);
extern int arch_init_kprobes(void);
extern void show_registers(struct pt_regs *regs);
-extern kprobe_opcode_t *get_insn_slot(void);
-extern void free_insn_slot(kprobe_opcode_t *slot, int dirty);
extern void kprobes_inc_nmissed_count(struct kprobe *p);
+struct kprobe_insn_cache {
+ struct mutex mutex;
+ void *(*alloc)(void); /* allocate insn page */
+ void (*free)(void *); /* free insn page */
+ struct list_head pages; /* list of kprobe_insn_page */
+ size_t insn_size; /* size of instruction slot */
+ int nr_garbage;
+};
+
+extern kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache *c);
+extern void __free_insn_slot(struct kprobe_insn_cache *c,
+ kprobe_opcode_t *slot, int dirty);
+
+#define DEFINE_INSN_CACHE_OPS(__name) \
+extern struct kprobe_insn_cache kprobe_##__name##_slots; \
+ \
+static inline kprobe_opcode_t *get_##__name##_slot(void) \
+{ \
+ return __get_insn_slot(&kprobe_##__name##_slots); \
+} \
+ \
+static inline void free_##__name##_slot(kprobe_opcode_t *slot, int dirty)\
+{ \
+ __free_insn_slot(&kprobe_##__name##_slots, slot, dirty); \
+} \
+
+DEFINE_INSN_CACHE_OPS(insn);
+
#ifdef CONFIG_OPTPROBES
/*
* Internal structure for direct jump optimized probe
@@ -287,13 +313,13 @@ extern void arch_optimize_kprobes(struct list_head *oplist);
extern void arch_unoptimize_kprobes(struct list_head *oplist,
struct list_head *done_list);
extern void arch_unoptimize_kprobe(struct optimized_kprobe *op);
-extern kprobe_opcode_t *get_optinsn_slot(void);
-extern void free_optinsn_slot(kprobe_opcode_t *slot, int dirty);
extern int arch_within_optimized_kprobe(struct optimized_kprobe *op,
unsigned long addr);
extern void opt_pre_handler(struct kprobe *p, struct pt_regs *regs);
+DEFINE_INSN_CACHE_OPS(optinsn);
+
#ifdef CONFIG_SYSCTL
extern int sysctl_kprobes_optimization;
extern int proc_kprobes_optimization_handler(struct ctl_table *table,
diff --git a/include/linux/lz4.h b/include/linux/lz4.h
index d21c13f10a64..4356686b0a39 100644
--- a/include/linux/lz4.h
+++ b/include/linux/lz4.h
@@ -67,8 +67,8 @@ int lz4hc_compress(const unsigned char *src, size_t src_len,
* note : Destination buffer must be already allocated.
* slightly faster than lz4_decompress_unknownoutputsize()
*/
-int lz4_decompress(const char *src, size_t *src_len, char *dest,
- size_t actual_dest_len);
+int lz4_decompress(const unsigned char *src, size_t *src_len,
+ unsigned char *dest, size_t actual_dest_len);
/*
* lz4_decompress_unknownoutputsize()
@@ -82,6 +82,6 @@ int lz4_decompress(const char *src, size_t *src_len, char *dest,
* Error if return (< 0)
* note : Destination buffer must be already allocated.
*/
-int lz4_decompress_unknownoutputsize(const char *src, size_t src_len,
- char *dest, size_t *dest_len);
+int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len,
+ unsigned char *dest, size_t *dest_len);
#endif
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index f388203db7e8..31e95acddb4d 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -60,6 +60,8 @@ int memblock_reserve(phys_addr_t base, phys_addr_t size);
void memblock_trim_memory(phys_addr_t align);
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
+int memblock_search_pfn_nid(unsigned long pfn, unsigned long *start_pfn,
+ unsigned long *end_pfn);
void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
unsigned long *out_end_pfn, int *out_nid);
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 0d7df39a5885..da6716b9e3fe 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -91,7 +91,6 @@ static inline struct mempolicy *mpol_dup(struct mempolicy *pol)
}
#define vma_policy(vma) ((vma)->vm_policy)
-#define vma_set_policy(vma, pol) ((vma)->vm_policy = (pol))
static inline void mpol_get(struct mempolicy *pol)
{
@@ -126,6 +125,7 @@ struct shared_policy {
spinlock_t lock;
};
+int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst);
void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol);
int mpol_set_shared_policy(struct shared_policy *info,
struct vm_area_struct *vma,
@@ -173,7 +173,7 @@ extern int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol);
/* Check if a vma is migratable */
static inline int vma_migratable(struct vm_area_struct *vma)
{
- if (vma->vm_flags & (VM_IO | VM_HUGETLB | VM_PFNMAP))
+ if (vma->vm_flags & (VM_IO | VM_PFNMAP))
return 0;
/*
* Migration allocates pages in the highest zone. If we cannot
@@ -240,7 +240,12 @@ mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx)
}
#define vma_policy(vma) NULL
-#define vma_set_policy(vma, pol) do {} while(0)
+
+static inline int
+vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst)
+{
+ return 0;
+}
static inline void numa_policy_init(void)
{
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index a405d3dc0f61..6fe521420631 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -41,8 +41,6 @@ extern int migrate_page(struct address_space *,
struct page *, struct page *, enum migrate_mode);
extern int migrate_pages(struct list_head *l, new_page_t x,
unsigned long private, enum migrate_mode mode, int reason);
-extern int migrate_huge_page(struct page *, new_page_t x,
- unsigned long private, enum migrate_mode mode);
extern int fail_migrate_page(struct address_space *,
struct page *, struct page *);
@@ -62,9 +60,6 @@ static inline void putback_movable_pages(struct list_head *l) {}
static inline int migrate_pages(struct list_head *l, new_page_t x,
unsigned long private, enum migrate_mode mode, int reason)
{ return -ENOSYS; }
-static inline int migrate_huge_page(struct page *page, new_page_t x,
- unsigned long private, enum migrate_mode mode)
- { return -ENOSYS; }
static inline int migrate_prep(void) { return -ENOSYS; }
static inline int migrate_prep_local(void) { return -ENOSYS; }
diff --git a/include/linux/mm.h b/include/linux/mm.h
index d2d59b4149d0..caf543c7eaa7 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -115,6 +115,12 @@ extern unsigned int kobjsize(const void *objp);
#define VM_ARCH_1 0x01000000 /* Architecture-specific flag */
#define VM_DONTDUMP 0x04000000 /* Do not include in the core dump */
+#ifdef CONFIG_MEM_SOFT_DIRTY
+# define VM_SOFTDIRTY 0x08000000 /* Not soft dirty clean area */
+#else
+# define VM_SOFTDIRTY 0
+#endif
+
#define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */
#define VM_HUGEPAGE 0x20000000 /* MADV_HUGEPAGE marked this vma */
#define VM_NOHUGEPAGE 0x40000000 /* MADV_NOHUGEPAGE marked this vma */
@@ -489,20 +495,6 @@ static inline int compound_order(struct page *page)
return (unsigned long)page[1].lru.prev;
}
-static inline int compound_trans_order(struct page *page)
-{
- int order;
- unsigned long flags;
-
- if (!PageHead(page))
- return 0;
-
- flags = compound_lock_irqsave(page);
- order = compound_order(page);
- compound_unlock_irqrestore(page, flags);
- return order;
-}
-
static inline void set_compound_order(struct page *page, unsigned long order)
{
page[1].lru.prev = (void *)order;
@@ -637,12 +629,12 @@ static inline enum zone_type page_zonenum(const struct page *page)
#endif
/*
- * The identification function is only used by the buddy allocator for
- * determining if two pages could be buddies. We are not really
- * identifying a zone since we could be using a the section number
- * id if we have not node id available in page flags.
- * We guarantee only that it will return the same value for two
- * combinable pages in a zone.
+ * The identification function is mainly used by the buddy allocator for
+ * determining if two pages could be buddies. We are not really identifying
+ * the zone since we could be using the section number id if we do not have
+ * node id available in page flags.
+ * We only guarantee that it will return the same value for two combinable
+ * pages in a zone.
*/
static inline int page_zone_id(struct page *page)
{
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 1397ccf81e91..cf55945c83fb 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -2,6 +2,7 @@
#define LINUX_MM_INLINE_H
#include <linux/huge_mm.h>
+#include <linux/swap.h>
/**
* page_is_file_cache - should the page be on a file LRU or anon LRU?
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index af4a3b77a8de..bd791e452ad7 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -105,6 +105,7 @@ struct zone_padding {
enum zone_stat_item {
/* First 128 byte cacheline (assuming 64 bit words) */
NR_FREE_PAGES,
+ NR_ALLOC_BATCH,
NR_LRU_BASE,
NR_INACTIVE_ANON = NR_LRU_BASE, /* must match order of LRU_[IN]ACTIVE */
NR_ACTIVE_ANON, /* " " " " " */
@@ -352,7 +353,6 @@ struct zone {
* free areas of different sizes
*/
spinlock_t lock;
- int all_unreclaimable; /* All pages pinned */
#if defined CONFIG_COMPACTION || defined CONFIG_CMA
/* Set to true when the PG_migrate_skip bits should be cleared */
bool compact_blockskip_flush;
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index ffc444c38b0a..403940787be1 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -231,6 +231,7 @@ unsigned long radix_tree_next_hole(struct radix_tree_root *root,
unsigned long radix_tree_prev_hole(struct radix_tree_root *root,
unsigned long index, unsigned long max_scan);
int radix_tree_preload(gfp_t gfp_mask);
+int radix_tree_maybe_preload(gfp_t gfp_mask);
void radix_tree_init(void);
void *radix_tree_tag_set(struct radix_tree_root *root,
unsigned long index, unsigned int tag);
diff --git a/include/linux/ramfs.h b/include/linux/ramfs.h
index 69e37c2d1ea5..753207c8ce20 100644
--- a/include/linux/ramfs.h
+++ b/include/linux/ramfs.h
@@ -25,7 +25,7 @@ extern int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma);
extern const struct file_operations ramfs_file_operations;
extern const struct vm_operations_struct generic_file_vm_ops;
-extern int __init init_rootfs(void);
+extern int __init init_ramfs_fs(void);
int ramfs_fill_super(struct super_block *sb, void *data, int silent);
diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h
index 0022c1bb1e26..aa870a4ddf54 100644
--- a/include/linux/rbtree.h
+++ b/include/linux/rbtree.h
@@ -68,6 +68,10 @@ extern struct rb_node *rb_prev(const struct rb_node *);
extern struct rb_node *rb_first(const struct rb_root *);
extern struct rb_node *rb_last(const struct rb_root *);
+/* Postorder iteration - always visit the parent after its children */
+extern struct rb_node *rb_first_postorder(const struct rb_root *);
+extern struct rb_node *rb_next_postorder(const struct rb_node *);
+
/* Fast replacement of a single node without remove/rebalance/add/rebalance */
extern void rb_replace_node(struct rb_node *victim, struct rb_node *new,
struct rb_root *root);
@@ -81,4 +85,22 @@ static inline void rb_link_node(struct rb_node * node, struct rb_node * parent,
*rb_link = node;
}
+/**
+ * rbtree_postorder_for_each_entry_safe - iterate over rb_root in post order of
+ * given type safe against removal of rb_node entry
+ *
+ * @pos: the 'type *' to use as a loop cursor.
+ * @n: another 'type *' to use as temporary storage
+ * @root: 'rb_root *' of the rbtree.
+ * @field: the name of the rb_node field within 'type'.
+ */
+#define rbtree_postorder_for_each_entry_safe(pos, n, root, field) \
+ for (pos = rb_entry(rb_first_postorder(root), typeof(*pos), field),\
+ n = rb_entry(rb_next_postorder(&pos->field), \
+ typeof(*pos), field); \
+ &pos->field; \
+ pos = n, \
+ n = rb_entry(rb_next_postorder(&pos->field), \
+ typeof(*pos), field))
+
#endif /* _LINUX_RBTREE_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ce1e1c0aaa33..45f254dddafc 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2169,15 +2169,15 @@ static inline bool thread_group_leader(struct task_struct *p)
* all we care about is that we have a task with the appropriate
* pid, we don't actually care if we have the right task.
*/
-static inline int has_group_leader_pid(struct task_struct *p)
+static inline bool has_group_leader_pid(struct task_struct *p)
{
- return p->pid == p->tgid;
+ return task_pid(p) == p->signal->leader_pid;
}
static inline
-int same_thread_group(struct task_struct *p1, struct task_struct *p2)
+bool same_thread_group(struct task_struct *p1, struct task_struct *p2)
{
- return p1->tgid == p2->tgid;
+ return p1->signal == p2->signal;
}
static inline struct task_struct *next_thread(const struct task_struct *p)
diff --git a/include/linux/smp.h b/include/linux/smp.h
index c181399f2c20..cfb7ca094b38 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -28,6 +28,27 @@ extern unsigned int total_cpus;
int smp_call_function_single(int cpuid, smp_call_func_t func, void *info,
int wait);
+/*
+ * Call a function on all processors
+ */
+int on_each_cpu(smp_call_func_t func, void *info, int wait);
+
+/*
+ * Call a function on processors specified by mask, which might include
+ * the local one.
+ */
+void on_each_cpu_mask(const struct cpumask *mask, smp_call_func_t func,
+ void *info, bool wait);
+
+/*
+ * Call a function on each processor for which the supplied function
+ * cond_func returns a positive value. This may include the local
+ * processor.
+ */
+void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
+ smp_call_func_t func, void *info, bool wait,
+ gfp_t gfp_flags);
+
#ifdef CONFIG_SMP
#include <linux/preempt.h>
@@ -95,27 +116,6 @@ static inline void call_function_init(void) { }
#endif
/*
- * Call a function on all processors
- */
-int on_each_cpu(smp_call_func_t func, void *info, int wait);
-
-/*
- * Call a function on processors specified by mask, which might include
- * the local one.
- */
-void on_each_cpu_mask(const struct cpumask *mask, smp_call_func_t func,
- void *info, bool wait);
-
-/*
- * Call a function on each processor for which the supplied function
- * cond_func returns a positive value. This may include the local
- * processor.
- */
-void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
- smp_call_func_t func, void *info, bool wait,
- gfp_t gfp_flags);
-
-/*
* Mark the boot cpu "online" so that it can call console drivers in
* printk() and can access its per-cpu storage.
*/
@@ -139,43 +139,6 @@ static inline int up_smp_call_function(smp_call_func_t func, void *info)
}
#define smp_call_function(func, info, wait) \
(up_smp_call_function(func, info))
-#define on_each_cpu(func, info, wait) \
- ({ \
- unsigned long __flags; \
- local_irq_save(__flags); \
- func(info); \
- local_irq_restore(__flags); \
- 0; \
- })
-/*
- * Note we still need to test the mask even for UP
- * because we actually can get an empty mask from
- * code that on SMP might call us without the local
- * CPU in the mask.
- */
-#define on_each_cpu_mask(mask, func, info, wait) \
- do { \
- if (cpumask_test_cpu(0, (mask))) { \
- local_irq_disable(); \
- (func)(info); \
- local_irq_enable(); \
- } \
- } while (0)
-/*
- * Preemption is disabled here to make sure the cond_func is called under the
- * same condtions in UP and SMP.
- */
-#define on_each_cpu_cond(cond_func, func, info, wait, gfp_flags)\
- do { \
- void *__info = (info); \
- preempt_disable(); \
- if ((cond_func)(0, __info)) { \
- local_irq_disable(); \
- (func)(__info); \
- local_irq_enable(); \
- } \
- preempt_enable(); \
- } while (0)
static inline void smp_send_reschedule(int cpu) { }
#define smp_prepare_boot_cpu() do {} while (0)
diff --git a/include/linux/swap.h b/include/linux/swap.h
index d95cde5e257d..c03c139219c9 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -182,6 +182,33 @@ enum {
#define SWAP_MAP_SHMEM 0xbf /* Owned by shmem/tmpfs, in first swap_map */
/*
+ * We use this to track usage of a cluster. A cluster is a block of swap disk
+ * space with SWAPFILE_CLUSTER pages long and naturally aligns in disk. All
+ * free clusters are organized into a list. We fetch an entry from the list to
+ * get a free cluster.
+ *
+ * The data field stores next cluster if the cluster is free or cluster usage
+ * counter otherwise. The flags field determines if a cluster is free. This is
+ * protected by swap_info_struct.lock.
+ */
+struct swap_cluster_info {
+ unsigned int data:24;
+ unsigned int flags:8;
+};
+#define CLUSTER_FLAG_FREE 1 /* This cluster is free */
+#define CLUSTER_FLAG_NEXT_NULL 2 /* This cluster has no next cluster */
+
+/*
+ * We assign a cluster to each CPU, so each CPU can allocate swap entry from
+ * its own cluster and swapout sequentially. The purpose is to optimize swapout
+ * throughput.
+ */
+struct percpu_cluster {
+ struct swap_cluster_info index; /* Current cluster index */
+ unsigned int next; /* Likely next allocation offset */
+};
+
+/*
* The in-memory structure used to track swap areas.
*/
struct swap_info_struct {
@@ -191,14 +218,16 @@ struct swap_info_struct {
signed char next; /* next type on the swap list */
unsigned int max; /* extent of the swap_map */
unsigned char *swap_map; /* vmalloc'ed array of usage counts */
+ struct swap_cluster_info *cluster_info; /* cluster info. Only for SSD */
+ struct swap_cluster_info free_cluster_head; /* free cluster list head */
+ struct swap_cluster_info free_cluster_tail; /* free cluster list tail */
unsigned int lowest_bit; /* index of first free in swap_map */
unsigned int highest_bit; /* index of last free in swap_map */
unsigned int pages; /* total of usable pages of swap */
unsigned int inuse_pages; /* number of those currently in use */
unsigned int cluster_next; /* likely index for next allocation */
unsigned int cluster_nr; /* countdown to next cluster search */
- unsigned int lowest_alloc; /* while preparing discard cluster */
- unsigned int highest_alloc; /* while preparing discard cluster */
+ struct percpu_cluster __percpu *percpu_cluster; /* per cpu's swap location */
struct swap_extent *curr_swap_extent;
struct swap_extent first_swap_extent;
struct block_device *bdev; /* swap device or bdev of swap file */
@@ -212,14 +241,18 @@ struct swap_info_struct {
* protect map scan related fields like
* swap_map, lowest_bit, highest_bit,
* inuse_pages, cluster_next,
- * cluster_nr, lowest_alloc and
- * highest_alloc. other fields are only
- * changed at swapon/swapoff, so are
- * protected by swap_lock. changing
- * flags need hold this lock and
- * swap_lock. If both locks need hold,
- * hold swap_lock first.
+ * cluster_nr, lowest_alloc,
+ * highest_alloc, free/discard cluster
+ * list. other fields are only changed
+ * at swapon/swapoff, so are protected
+ * by swap_lock. changing flags need
+ * hold this lock and swap_lock. If
+ * both locks need hold, hold swap_lock
+ * first.
*/
+ struct work_struct discard_work; /* discard worker */
+ struct swap_cluster_info discard_cluster_head; /* list head of discard clusters */
+ struct swap_cluster_info discard_cluster_tail; /* list tail of discard clusters */
};
struct swap_list_t {
@@ -414,6 +447,7 @@ mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout)
#else /* CONFIG_SWAP */
+#define swap_address_space(entry) (NULL)
#define get_nr_swap_pages() 0L
#define total_swap_pages 0L
#define total_swapcache_pages() 0UL
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 84662ecc7b51..7fac04e7ff6e 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -186,6 +186,7 @@ extern struct trace_event_functions exit_syscall_print_funcs;
#define __SYSCALL_DEFINEx(x, name, ...) \
asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \
static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \
+ asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \
asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \
{ \
long ret = SYSC##name(__MAP(x,__SC_CAST,__VA_ARGS__)); \
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index bd6cf61142be..1855f0a22add 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -70,6 +70,12 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
THP_ZERO_PAGE_ALLOC,
THP_ZERO_PAGE_ALLOC_FAILED,
#endif
+#ifdef CONFIG_SMP
+ NR_TLB_REMOTE_FLUSH, /* cpu tried to flush others' tlbs */
+ NR_TLB_REMOTE_FLUSH_RECEIVED,/* cpu received ipi for flush */
+#endif
+ NR_TLB_LOCAL_FLUSH_ALL,
+ NR_TLB_LOCAL_FLUSH_ONE,
NR_VM_EVENT_ITEMS
};
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index c586679b6fef..e4b948080d20 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -143,7 +143,6 @@ static inline unsigned long zone_page_state_snapshot(struct zone *zone,
}
extern unsigned long global_reclaimable_pages(void);
-extern unsigned long zone_reclaimable_pages(struct zone *zone);
#ifdef CONFIG_NUMA
/*
@@ -198,7 +197,7 @@ extern void __inc_zone_state(struct zone *, enum zone_stat_item);
extern void dec_zone_state(struct zone *, enum zone_stat_item);
extern void __dec_zone_state(struct zone *, enum zone_stat_item);
-void refresh_cpu_vm_stats(int);
+void cpu_vm_stats_fold(int cpu);
void refresh_zone_stat_thresholds(void);
void drain_zonestat(struct zone *zone, struct per_cpu_pageset *);
@@ -255,6 +254,7 @@ static inline void __dec_zone_page_state(struct page *page,
static inline void refresh_cpu_vm_stats(int cpu) { }
static inline void refresh_zone_stat_thresholds(void) { }
+static inline void cpu_vm_stats_fold(int cpu) { }
static inline void drain_zonestat(struct zone *zone,
struct per_cpu_pageset *pset) { }
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 4e198ca1f685..021b8a319b9e 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -98,8 +98,6 @@ int try_to_writeback_inodes_sb(struct super_block *, enum wb_reason reason);
int try_to_writeback_inodes_sb_nr(struct super_block *, unsigned long nr,
enum wb_reason reason);
void sync_inodes_sb(struct super_block *);
-long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
- enum wb_reason reason);
void wakeup_flusher_threads(long nr_pages, enum wb_reason reason);
void inode_wait_for_writeback(struct inode *inode);
diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index 6bc943ecb841..d0c613476620 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -268,11 +268,13 @@ TRACE_EVENT(mm_page_alloc_extfrag,
TP_PROTO(struct page *page,
int alloc_order, int fallback_order,
- int alloc_migratetype, int fallback_migratetype),
+ int alloc_migratetype, int fallback_migratetype,
+ int change_ownership),
TP_ARGS(page,
alloc_order, fallback_order,
- alloc_migratetype, fallback_migratetype),
+ alloc_migratetype, fallback_migratetype,
+ change_ownership),
TP_STRUCT__entry(
__field( struct page *, page )
@@ -280,6 +282,7 @@ TRACE_EVENT(mm_page_alloc_extfrag,
__field( int, fallback_order )
__field( int, alloc_migratetype )
__field( int, fallback_migratetype )
+ __field( int, change_ownership )
),
TP_fast_assign(
@@ -288,6 +291,7 @@ TRACE_EVENT(mm_page_alloc_extfrag,
__entry->fallback_order = fallback_order;
__entry->alloc_migratetype = alloc_migratetype;
__entry->fallback_migratetype = fallback_migratetype;
+ __entry->change_ownership = change_ownership;
),
TP_printk("page=%p pfn=%lu alloc_order=%d fallback_order=%d pageblock_order=%d alloc_migratetype=%d fallback_migratetype=%d fragmenting=%d change_ownership=%d",
@@ -299,7 +303,7 @@ TRACE_EVENT(mm_page_alloc_extfrag,
__entry->alloc_migratetype,
__entry->fallback_migratetype,
__entry->fallback_order < pageblock_order,
- __entry->alloc_migratetype == __entry->fallback_migratetype)
+ __entry->change_ownership)
);
#endif /* _TRACE_KMEM_H */