summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-12-13 13:11:15 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2012-12-13 13:11:15 -0800
commitf6e858a00af788bab0fd4c0b7f5cd788000edc18 (patch)
treef9403ca3671be9821dbf83e726e61dbe75fbca6b /include
parent193c0d682525987db59ac3a24531a77e4947aa95 (diff)
parent98870901cce098bbe94d90d2c41d8d1fa8d94392 (diff)
Merge branch 'akpm' (Andrew's patch-bomb)
Merge misc VM changes from Andrew Morton: "The rest of most-of-MM. The other MM bits await a slab merge. This patch includes the addition of a huge zero_page. Not a performance boost but it an save large amounts of physical memory in some situations. Also a bunch of Fujitsu engineers are working on memory hotplug. Which, as it turns out, was badly broken. About half of their patches are included here; the remainder are 3.8 material." However, this merge disables CONFIG_MOVABLE_NODE, which was totally broken. We don't add new features with "default y", nor do we add Kconfig questions that are incomprehensible to most people without any help text. Does the feature even make sense without compaction or memory hotplug? * akpm: (54 commits) mm/bootmem.c: remove unused wrapper function reserve_bootmem_generic() mm/memory.c: remove unused code from do_wp_page() asm-generic, mm: pgtable: consolidate zero page helpers mm/hugetlb.c: fix warning on freeing hwpoisoned hugepage hwpoison, hugetlbfs: fix RSS-counter warning hwpoison, hugetlbfs: fix "bad pmd" warning in unmapping hwpoisoned hugepage mm: protect against concurrent vma expansion memcg: do not check for mm in __mem_cgroup_count_vm_event tmpfs: support SEEK_DATA and SEEK_HOLE (reprise) mm: provide more accurate estimation of pages occupied by memmap fs/buffer.c: remove redundant initialization in alloc_page_buffers() fs/buffer.c: do not inline exported function writeback: fix a typo in comment mm: introduce new field "managed_pages" to struct zone mm, oom: remove statically defined arch functions of same name mm, oom: remove redundant sleep in pagefault oom handler mm, oom: cleanup pagefault oom handler memory_hotplug: allow online/offline memory to result movable node numa: add CONFIG_MOVABLE_NODE for movable-dedicated node mm, memcg: avoid unnecessary function call when memcg is disabled ...
Diffstat (limited to 'include')
-rw-r--r--include/asm-generic/pgtable.h26
-rw-r--r--include/linux/bootmem.h3
-rw-r--r--include/linux/cpuset.h2
-rw-r--r--include/linux/gfp.h1
-rw-r--r--include/linux/huge_mm.h18
-rw-r--r--include/linux/memcontrol.h9
-rw-r--r--include/linux/memory.h1
-rw-r--r--include/linux/mmzone.h41
-rw-r--r--include/linux/nodemask.h5
-rw-r--r--include/linux/res_counter.h5
-rw-r--r--include/linux/vm_event_item.h2
11 files changed, 92 insertions, 21 deletions
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index b36ce40bd1c6..284e80831d2c 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -449,6 +449,32 @@ extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
unsigned long size);
#endif
+#ifdef __HAVE_COLOR_ZERO_PAGE
+static inline int is_zero_pfn(unsigned long pfn)
+{
+ extern unsigned long zero_pfn;
+ unsigned long offset_from_zero_pfn = pfn - zero_pfn;
+ return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT);
+}
+
+static inline unsigned long my_zero_pfn(unsigned long addr)
+{
+ return page_to_pfn(ZERO_PAGE(addr));
+}
+#else
+static inline int is_zero_pfn(unsigned long pfn)
+{
+ extern unsigned long zero_pfn;
+ return pfn == zero_pfn;
+}
+
+static inline unsigned long my_zero_pfn(unsigned long addr)
+{
+ extern unsigned long zero_pfn;
+ return zero_pfn;
+}
+#endif
+
#ifdef CONFIG_MMU
#ifndef CONFIG_TRANSPARENT_HUGEPAGE
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 7b74452c5317..3f778c27f825 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -137,9 +137,6 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
#define alloc_bootmem_low_pages_node(pgdat, x) \
__alloc_bootmem_low_node(pgdat, x, PAGE_SIZE, 0)
-extern int reserve_bootmem_generic(unsigned long addr, unsigned long size,
- int flags);
-
#ifdef CONFIG_HAVE_ARCH_ALLOC_REMAP
extern void *alloc_remap(int nid, unsigned long size);
#else
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 838320fc3d1d..8c8a60d29407 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -144,7 +144,7 @@ static inline nodemask_t cpuset_mems_allowed(struct task_struct *p)
return node_possible_map;
}
-#define cpuset_current_mems_allowed (node_states[N_HIGH_MEMORY])
+#define cpuset_current_mems_allowed (node_states[N_MEMORY])
static inline void cpuset_init_current_mems_allowed(void) {}
static inline int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask)
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 31e8041274f6..f74856e17e48 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -34,6 +34,7 @@ struct vm_area_struct;
#define ___GFP_NO_KSWAPD 0x400000u
#define ___GFP_OTHER_NODE 0x800000u
#define ___GFP_WRITE 0x1000000u
+/* If the above are modified, __GFP_BITS_SHIFT may need updating */
/*
* GFP bitmasks..
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 1af477552459..092dc5305a32 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -39,6 +39,7 @@ enum transparent_hugepage_flag {
TRANSPARENT_HUGEPAGE_DEFRAG_FLAG,
TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG,
TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG,
+ TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG,
#ifdef CONFIG_DEBUG_VM
TRANSPARENT_HUGEPAGE_DEBUG_COW_FLAG,
#endif
@@ -78,6 +79,9 @@ extern bool is_vma_temporary_stack(struct vm_area_struct *vma);
(transparent_hugepage_flags & \
(1<<TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG) && \
(__vma)->vm_flags & VM_HUGEPAGE))
+#define transparent_hugepage_use_zero_page() \
+ (transparent_hugepage_flags & \
+ (1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG))
#ifdef CONFIG_DEBUG_VM
#define transparent_hugepage_debug_cow() \
(transparent_hugepage_flags & \
@@ -95,12 +99,14 @@ extern int handle_pte_fault(struct mm_struct *mm,
struct vm_area_struct *vma, unsigned long address,
pte_t *pte, pmd_t *pmd, unsigned int flags);
extern int split_huge_page(struct page *page);
-extern void __split_huge_page_pmd(struct mm_struct *mm, pmd_t *pmd);
-#define split_huge_page_pmd(__mm, __pmd) \
+extern void __split_huge_page_pmd(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmd);
+#define split_huge_page_pmd(__vma, __address, __pmd) \
do { \
pmd_t *____pmd = (__pmd); \
if (unlikely(pmd_trans_huge(*____pmd))) \
- __split_huge_page_pmd(__mm, ____pmd); \
+ __split_huge_page_pmd(__vma, __address, \
+ ____pmd); \
} while (0)
#define wait_split_huge_page(__anon_vma, __pmd) \
do { \
@@ -110,6 +116,8 @@ extern void __split_huge_page_pmd(struct mm_struct *mm, pmd_t *pmd);
BUG_ON(pmd_trans_splitting(*____pmd) || \
pmd_trans_huge(*____pmd)); \
} while (0)
+extern void split_huge_page_pmd_mm(struct mm_struct *mm, unsigned long address,
+ pmd_t *pmd);
#if HPAGE_PMD_ORDER > MAX_ORDER
#error "hugepages can't be allocated by the buddy allocator"
#endif
@@ -177,10 +185,12 @@ static inline int split_huge_page(struct page *page)
{
return 0;
}
-#define split_huge_page_pmd(__mm, __pmd) \
+#define split_huge_page_pmd(__vma, __address, __pmd) \
do { } while (0)
#define wait_split_huge_page(__anon_vma, __pmd) \
do { } while (0)
+#define split_huge_page_pmd_mm(__mm, __address, __pmd) \
+ do { } while (0)
#define compound_trans_head(page) compound_head(page)
static inline int hugepage_madvise(struct vm_area_struct *vma,
unsigned long *vm_flags, int advice)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 11ddc7ffeba8..e98a74c0c9c0 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -181,7 +181,14 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
gfp_t gfp_mask,
unsigned long *total_scanned);
-void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx);
+void __mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx);
+static inline void mem_cgroup_count_vm_event(struct mm_struct *mm,
+ enum vm_event_item idx)
+{
+ if (mem_cgroup_disabled())
+ return;
+ __mem_cgroup_count_vm_event(mm, idx);
+}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
void mem_cgroup_split_huge_fixup(struct page *head);
#endif
diff --git a/include/linux/memory.h b/include/linux/memory.h
index a09216d0dcc7..45e93b468878 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -54,6 +54,7 @@ struct memory_notify {
unsigned long start_pfn;
unsigned long nr_pages;
int status_change_nid_normal;
+ int status_change_nid_high;
int status_change_nid;
};
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 0c0b1d608a69..cd55dad56aac 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -460,17 +460,44 @@ struct zone {
unsigned long zone_start_pfn;
/*
- * zone_start_pfn, spanned_pages and present_pages are all
- * protected by span_seqlock. It is a seqlock because it has
- * to be read outside of zone->lock, and it is done in the main
- * allocator path. But, it is written quite infrequently.
+ * spanned_pages is the total pages spanned by the zone, including
+ * holes, which is calculated as:
+ * spanned_pages = zone_end_pfn - zone_start_pfn;
*
- * The lock is declared along with zone->lock because it is
+ * present_pages is physical pages existing within the zone, which
+ * is calculated as:
+ * present_pages = spanned_pages - absent_pages(pags in holes);
+ *
+ * managed_pages is present pages managed by the buddy system, which
+ * is calculated as (reserved_pages includes pages allocated by the
+ * bootmem allocator):
+ * managed_pages = present_pages - reserved_pages;
+ *
+ * So present_pages may be used by memory hotplug or memory power
+ * management logic to figure out unmanaged pages by checking
+ * (present_pages - managed_pages). And managed_pages should be used
+ * by page allocator and vm scanner to calculate all kinds of watermarks
+ * and thresholds.
+ *
+ * Locking rules:
+ *
+ * zone_start_pfn and spanned_pages are protected by span_seqlock.
+ * It is a seqlock because it has to be read outside of zone->lock,
+ * and it is done in the main allocator path. But, it is written
+ * quite infrequently.
+ *
+ * The span_seq lock is declared along with zone->lock because it is
* frequently read in proximity to zone->lock. It's good to
* give them a chance of being in the same cacheline.
+ *
+ * Write access to present_pages and managed_pages at runtime should
+ * be protected by lock_memory_hotplug()/unlock_memory_hotplug().
+ * Any reader who can't tolerant drift of present_pages and
+ * managed_pages should hold memory hotplug lock to get a stable value.
*/
- unsigned long spanned_pages; /* total size, including holes */
- unsigned long present_pages; /* amount of memory (excluding holes) */
+ unsigned long spanned_pages;
+ unsigned long present_pages;
+ unsigned long managed_pages;
/*
* rarely used fields:
diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index 7afc36334d52..4e2cbfa640b7 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -380,6 +380,11 @@ enum node_states {
#else
N_HIGH_MEMORY = N_NORMAL_MEMORY,
#endif
+#ifdef CONFIG_MOVABLE_NODE
+ N_MEMORY, /* The node has memory(regular, high, movable) */
+#else
+ N_MEMORY = N_HIGH_MEMORY,
+#endif
N_CPU, /* The node has one or more cpus */
NR_NODE_STATES
};
diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h
index 7d7fbe2ef782..6f54e40fa218 100644
--- a/include/linux/res_counter.h
+++ b/include/linux/res_counter.h
@@ -74,14 +74,9 @@ ssize_t res_counter_read(struct res_counter *counter, int member,
const char __user *buf, size_t nbytes, loff_t *pos,
int (*read_strategy)(unsigned long long val, char *s));
-typedef int (*write_strategy_fn)(const char *buf, unsigned long long *val);
-
int res_counter_memparse_write_strategy(const char *buf,
unsigned long long *res);
-int res_counter_write(struct res_counter *counter, int member,
- const char *buffer, write_strategy_fn write_strategy);
-
/*
* the field descriptors. one for each member of res_counter
*/
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 3d3114594370..fe786f07d2bd 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -58,6 +58,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
THP_COLLAPSE_ALLOC,
THP_COLLAPSE_ALLOC_FAILED,
THP_SPLIT,
+ THP_ZERO_PAGE_ALLOC,
+ THP_ZERO_PAGE_ALLOC_FAILED,
#endif
NR_VM_EVENT_ITEMS
};