diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/compaction.c | 11 | ||||
-rw-r--r-- | mm/huge_memory.c | 5 | ||||
-rw-r--r-- | mm/memblock.c | 8 | ||||
-rw-r--r-- | mm/memcontrol.c | 190 | ||||
-rw-r--r-- | mm/truncate.c | 11 | ||||
-rw-r--r-- | mm/vmscan.c | 1 |
6 files changed, 134 insertions, 92 deletions
diff --git a/mm/compaction.c b/mm/compaction.c index 6d592a021072..8be430b812de 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -406,6 +406,10 @@ static int compact_finished(struct zone *zone, if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0)) return COMPACT_CONTINUE; + /* + * order == -1 is expected when compacting via + * /proc/sys/vm/compact_memory + */ if (cc->order == -1) return COMPACT_CONTINUE; @@ -454,6 +458,13 @@ unsigned long compaction_suitable(struct zone *zone, int order) return COMPACT_SKIPPED; /* + * order == -1 is expected when compacting via + * /proc/sys/vm/compact_memory + */ + if (order == -1) + return COMPACT_CONTINUE; + + /* * fragmentation index determines if allocation failures are due to * low memory or external fragmentation * diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 004c9c2aac78..e187454d82f6 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1203,6 +1203,8 @@ static void __split_huge_page_refcount(struct page *page) BUG_ON(!PageDirty(page_tail)); BUG_ON(!PageSwapBacked(page_tail)); + mem_cgroup_split_huge_fixup(page, page_tail); + lru_add_page_tail(zone, page, page_tail); } @@ -1837,9 +1839,9 @@ static void collapse_huge_page(struct mm_struct *mm, spin_lock(ptl); isolated = __collapse_huge_page_isolate(vma, address, pte); spin_unlock(ptl); - pte_unmap(pte); if (unlikely(!isolated)) { + pte_unmap(pte); spin_lock(&mm->page_table_lock); BUG_ON(!pmd_none(*pmd)); set_pmd_at(mm, address, pmd, _pmd); @@ -1856,6 +1858,7 @@ static void collapse_huge_page(struct mm_struct *mm, anon_vma_unlock(vma->anon_vma); __collapse_huge_page_copy(pte, new_page, vma, address, ptl); + pte_unmap(pte); __SetPageUptodate(new_page); pgtable = pmd_pgtable(_pmd); VM_BUG_ON(page_count(pgtable) != 1); diff --git a/mm/memblock.c b/mm/memblock.c index 400dc62697d7..bdba245d8afd 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -683,13 +683,13 @@ int __init_memblock memblock_is_memory(phys_addr_t addr) int __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t size) { - int idx = memblock_search(&memblock.reserved, base); + int idx = memblock_search(&memblock.memory, base); if (idx == -1) return 0; - return memblock.reserved.regions[idx].base <= base && - (memblock.reserved.regions[idx].base + - memblock.reserved.regions[idx].size) >= (base + size); + return memblock.memory.regions[idx].base <= base && + (memblock.memory.regions[idx].base + + memblock.memory.regions[idx].size) >= (base + size); } int __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t size) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 8ab841031436..db76ef726293 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -600,23 +600,22 @@ static void mem_cgroup_swap_statistics(struct mem_cgroup *mem, } static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, - struct page_cgroup *pc, - bool charge) + bool file, int nr_pages) { - int val = (charge) ? 1 : -1; - preempt_disable(); - if (PageCgroupCache(pc)) - __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_CACHE], val); + if (file) + __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_CACHE], nr_pages); else - __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_RSS], val); + __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_RSS], nr_pages); - if (charge) + /* pagein of a big page is an event. So, ignore page size */ + if (nr_pages > 0) __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGIN_COUNT]); else __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGOUT_COUNT]); - __this_cpu_inc(mem->stat->count[MEM_CGROUP_EVENTS]); + + __this_cpu_add(mem->stat->count[MEM_CGROUP_EVENTS], nr_pages); preempt_enable(); } @@ -815,7 +814,8 @@ void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru) * removed from global LRU. */ mz = page_cgroup_zoneinfo(pc); - MEM_CGROUP_ZSTAT(mz, lru) -= 1; + /* huge page split is done under lru_lock. so, we have no races. */ + MEM_CGROUP_ZSTAT(mz, lru) -= 1 << compound_order(page); if (mem_cgroup_is_root(pc->mem_cgroup)) return; VM_BUG_ON(list_empty(&pc->lru)); @@ -836,13 +836,12 @@ void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru) return; pc = lookup_page_cgroup(page); - /* - * Used bit is set without atomic ops but after smp_wmb(). - * For making pc->mem_cgroup visible, insert smp_rmb() here. - */ - smp_rmb(); /* unused or root page is not rotated. */ - if (!PageCgroupUsed(pc) || mem_cgroup_is_root(pc->mem_cgroup)) + if (!PageCgroupUsed(pc)) + return; + /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */ + smp_rmb(); + if (mem_cgroup_is_root(pc->mem_cgroup)) return; mz = page_cgroup_zoneinfo(pc); list_move(&pc->lru, &mz->lists[lru]); @@ -857,16 +856,13 @@ void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru) return; pc = lookup_page_cgroup(page); VM_BUG_ON(PageCgroupAcctLRU(pc)); - /* - * Used bit is set without atomic ops but after smp_wmb(). - * For making pc->mem_cgroup visible, insert smp_rmb() here. - */ - smp_rmb(); if (!PageCgroupUsed(pc)) return; - + /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */ + smp_rmb(); mz = page_cgroup_zoneinfo(pc); - MEM_CGROUP_ZSTAT(mz, lru) += 1; + /* huge page split is done under lru_lock. so, we have no races. */ + MEM_CGROUP_ZSTAT(mz, lru) += 1 << compound_order(page); SetPageCgroupAcctLRU(pc); if (mem_cgroup_is_root(pc->mem_cgroup)) return; @@ -1030,14 +1026,10 @@ mem_cgroup_get_reclaim_stat_from_page(struct page *page) return NULL; pc = lookup_page_cgroup(page); - /* - * Used bit is set without atomic ops but after smp_wmb(). - * For making pc->mem_cgroup visible, insert smp_rmb() here. - */ - smp_rmb(); if (!PageCgroupUsed(pc)) return NULL; - + /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */ + smp_rmb(); mz = page_cgroup_zoneinfo(pc); if (!mz) return NULL; @@ -1615,7 +1607,7 @@ void mem_cgroup_update_page_stat(struct page *page, if (unlikely(!mem || !PageCgroupUsed(pc))) goto out; /* pc->mem_cgroup is unstable ? */ - if (unlikely(mem_cgroup_stealed(mem))) { + if (unlikely(mem_cgroup_stealed(mem)) || PageTransHuge(page)) { /* take a lock against to access pc->mem_cgroup */ move_lock_page_cgroup(pc, &flags); need_unlock = true; @@ -2084,14 +2076,27 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) return mem; } -/* - * commit a charge got by __mem_cgroup_try_charge() and makes page_cgroup to be - * USED state. If already USED, uncharge and return. - */ -static void ____mem_cgroup_commit_charge(struct mem_cgroup *mem, - struct page_cgroup *pc, - enum charge_type ctype) +static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, + struct page_cgroup *pc, + enum charge_type ctype, + int page_size) { + int nr_pages = page_size >> PAGE_SHIFT; + + /* try_charge() can return NULL to *memcg, taking care of it. */ + if (!mem) + return; + + lock_page_cgroup(pc); + if (unlikely(PageCgroupUsed(pc))) { + unlock_page_cgroup(pc); + mem_cgroup_cancel_charge(mem, page_size); + return; + } + /* + * we don't need page_cgroup_lock about tail pages, becase they are not + * accessed by any other context at this point. + */ pc->mem_cgroup = mem; /* * We access a page_cgroup asynchronously without lock_page_cgroup(). @@ -2115,35 +2120,7 @@ static void ____mem_cgroup_commit_charge(struct mem_cgroup *mem, break; } - mem_cgroup_charge_statistics(mem, pc, true); -} - -static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, - struct page_cgroup *pc, - enum charge_type ctype, - int page_size) -{ - int i; - int count = page_size >> PAGE_SHIFT; - - /* try_charge() can return NULL to *memcg, taking care of it. */ - if (!mem) - return; - - lock_page_cgroup(pc); - if (unlikely(PageCgroupUsed(pc))) { - unlock_page_cgroup(pc); - mem_cgroup_cancel_charge(mem, page_size); - return; - } - - /* - * we don't need page_cgroup_lock about tail pages, becase they are not - * accessed by any other context at this point. - */ - for (i = 0; i < count; i++) - ____mem_cgroup_commit_charge(mem, pc + i, ctype); - + mem_cgroup_charge_statistics(mem, PageCgroupCache(pc), nr_pages); unlock_page_cgroup(pc); /* * "charge_statistics" updated event counter. Then, check it. @@ -2153,6 +2130,46 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, memcg_check_events(mem, pc->page); } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + +#define PCGF_NOCOPY_AT_SPLIT ((1 << PCG_LOCK) | (1 << PCG_MOVE_LOCK) |\ + (1 << PCG_ACCT_LRU) | (1 << PCG_MIGRATION)) +/* + * Because tail pages are not marked as "used", set it. We're under + * zone->lru_lock, 'splitting on pmd' and compund_lock. + */ +void mem_cgroup_split_huge_fixup(struct page *head, struct page *tail) +{ + struct page_cgroup *head_pc = lookup_page_cgroup(head); + struct page_cgroup *tail_pc = lookup_page_cgroup(tail); + unsigned long flags; + + /* + * We have no races with charge/uncharge but will have races with + * page state accounting. + */ + move_lock_page_cgroup(head_pc, &flags); + + tail_pc->mem_cgroup = head_pc->mem_cgroup; + smp_wmb(); /* see __commit_charge() */ + if (PageCgroupAcctLRU(head_pc)) { + enum lru_list lru; + struct mem_cgroup_per_zone *mz; + + /* + * LRU flags cannot be copied because we need to add tail + *.page to LRU by generic call and our hook will be called. + * We hold lru_lock, then, reduce counter directly. + */ + lru = page_lru(head); + mz = page_cgroup_zoneinfo(head_pc); + MEM_CGROUP_ZSTAT(mz, lru) -= 1; + } + tail_pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT; + move_unlock_page_cgroup(head_pc, &flags); +} +#endif + /** * __mem_cgroup_move_account - move account of the page * @pc: page_cgroup of the page. @@ -2171,8 +2188,11 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, */ static void __mem_cgroup_move_account(struct page_cgroup *pc, - struct mem_cgroup *from, struct mem_cgroup *to, bool uncharge) + struct mem_cgroup *from, struct mem_cgroup *to, bool uncharge, + int charge_size) { + int nr_pages = charge_size >> PAGE_SHIFT; + VM_BUG_ON(from == to); VM_BUG_ON(PageLRU(pc->page)); VM_BUG_ON(!page_is_cgroup_locked(pc)); @@ -2186,14 +2206,14 @@ static void __mem_cgroup_move_account(struct page_cgroup *pc, __this_cpu_inc(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]); preempt_enable(); } - mem_cgroup_charge_statistics(from, pc, false); + mem_cgroup_charge_statistics(from, PageCgroupCache(pc), -nr_pages); if (uncharge) /* This is not "cancel", but cancel_charge does all we need. */ - mem_cgroup_cancel_charge(from, PAGE_SIZE); + mem_cgroup_cancel_charge(from, charge_size); /* caller should have done css_get */ pc->mem_cgroup = to; - mem_cgroup_charge_statistics(to, pc, true); + mem_cgroup_charge_statistics(to, PageCgroupCache(pc), nr_pages); /* * We charges against "to" which may not have any tasks. Then, "to" * can be under rmdir(). But in current implementation, caller of @@ -2208,15 +2228,19 @@ static void __mem_cgroup_move_account(struct page_cgroup *pc, * __mem_cgroup_move_account() */ static int mem_cgroup_move_account(struct page_cgroup *pc, - struct mem_cgroup *from, struct mem_cgroup *to, bool uncharge) + struct mem_cgroup *from, struct mem_cgroup *to, + bool uncharge, int charge_size) { int ret = -EINVAL; unsigned long flags; + if ((charge_size > PAGE_SIZE) && !PageTransHuge(pc->page)) + return -EBUSY; + lock_page_cgroup(pc); if (PageCgroupUsed(pc) && pc->mem_cgroup == from) { move_lock_page_cgroup(pc, &flags); - __mem_cgroup_move_account(pc, from, to, uncharge); + __mem_cgroup_move_account(pc, from, to, uncharge, charge_size); move_unlock_page_cgroup(pc, &flags); ret = 0; } @@ -2241,6 +2265,8 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc, struct cgroup *cg = child->css.cgroup; struct cgroup *pcg = cg->parent; struct mem_cgroup *parent; + int charge = PAGE_SIZE; + unsigned long flags; int ret; /* Is ROOT ? */ @@ -2252,17 +2278,23 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc, goto out; if (isolate_lru_page(page)) goto put; + /* The page is isolated from LRU and we have no race with splitting */ + charge = PAGE_SIZE << compound_order(page); parent = mem_cgroup_from_cont(pcg); - ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false, - PAGE_SIZE); + ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false, charge); if (ret || !parent) goto put_back; - ret = mem_cgroup_move_account(pc, child, parent, true); + if (charge > PAGE_SIZE) + flags = compound_lock_irqsave(page); + + ret = mem_cgroup_move_account(pc, child, parent, true, charge); if (ret) - mem_cgroup_cancel_charge(parent, PAGE_SIZE); + mem_cgroup_cancel_charge(parent, charge); put_back: + if (charge > PAGE_SIZE) + compound_unlock_irqrestore(page, flags); putback_lru_page(page); put: put_page(page); @@ -2546,7 +2578,6 @@ direct_uncharge: static struct mem_cgroup * __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) { - int i; int count; struct page_cgroup *pc; struct mem_cgroup *mem = NULL; @@ -2596,8 +2627,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) break; } - for (i = 0; i < count; i++) - mem_cgroup_charge_statistics(mem, pc + i, false); + mem_cgroup_charge_statistics(mem, PageCgroupCache(pc), -count); ClearPageCgroupUsed(pc); /* @@ -4844,7 +4874,7 @@ retry: goto put; pc = lookup_page_cgroup(page); if (!mem_cgroup_move_account(pc, - mc.from, mc.to, false)) { + mc.from, mc.to, false, PAGE_SIZE)) { mc.precharge--; /* we uncharge from mc.from later. */ mc.moved_charge++; diff --git a/mm/truncate.c b/mm/truncate.c index 3c2d5ddfa0d4..49feb46e77b8 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -549,13 +549,12 @@ EXPORT_SYMBOL(truncate_pagecache); * @inode: inode * @newsize: new file size * - * truncate_setsize updastes i_size update and performs pagecache - * truncation (if necessary) for a file size updates. It will be - * typically be called from the filesystem's setattr function when - * ATTR_SIZE is passed in. + * truncate_setsize updates i_size and performs pagecache truncation (if + * necessary) to @newsize. It will be typically be called from the filesystem's + * setattr function when ATTR_SIZE is passed in. * - * Must be called with inode_mutex held and after all filesystem - * specific block truncation has been performed. + * Must be called with inode_mutex held and before all filesystem specific + * block truncation has been performed. */ void truncate_setsize(struct inode *inode, loff_t newsize) { diff --git a/mm/vmscan.c b/mm/vmscan.c index 47a50962ce81..f5d90dedebba 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -41,7 +41,6 @@ #include <linux/memcontrol.h> #include <linux/delayacct.h> #include <linux/sysctl.h> -#include <linux/compaction.h> #include <asm/tlbflush.h> #include <asm/div64.h> |