diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-06-29 17:29:11 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-06-29 17:29:11 -0700 |
commit | 65090f30ab791810a3dc840317e57df05018559c (patch) | |
tree | f417526656da37109777e89613e140ffc59228bc /include | |
parent | 349a2d52ffe59b7a0c5876fa7ee9f3eaf188b830 (diff) | |
parent | 0ed950d1f28142ccd9a9453c60df87853530d778 (diff) |
Merge branch 'akpm' (patches from Andrew)
Merge misc updates from Andrew Morton:
"191 patches.
Subsystems affected by this patch series: kthread, ia64, scripts,
ntfs, squashfs, ocfs2, kernel/watchdog, and mm (gup, pagealloc, slab,
slub, kmemleak, dax, debug, pagecache, gup, swap, memcg, pagemap,
mprotect, bootmem, dma, tracing, vmalloc, kasan, initialization,
pagealloc, and memory-failure)"
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (191 commits)
mm,hwpoison: make get_hwpoison_page() call get_any_page()
mm,hwpoison: send SIGBUS with error virutal address
mm/page_alloc: split pcp->high across all online CPUs for cpuless nodes
mm/page_alloc: allow high-order pages to be stored on the per-cpu lists
mm: replace CONFIG_FLAT_NODE_MEM_MAP with CONFIG_FLATMEM
mm: replace CONFIG_NEED_MULTIPLE_NODES with CONFIG_NUMA
docs: remove description of DISCONTIGMEM
arch, mm: remove stale mentions of DISCONIGMEM
mm: remove CONFIG_DISCONTIGMEM
m68k: remove support for DISCONTIGMEM
arc: remove support for DISCONTIGMEM
arc: update comment about HIGHMEM implementation
alpha: remove DISCONTIGMEM and NUMA
mm/page_alloc: move free_the_page
mm/page_alloc: fix counting of managed_pages
mm/page_alloc: improve memmap_pages dbg msg
mm: drop SECTION_SHIFT in code comments
mm/page_alloc: introduce vm.percpu_pagelist_high_fraction
mm/page_alloc: limit the number of pages on PCP lists when reclaim is active
mm/page_alloc: scale the number of pages that are batch freed
...
Diffstat (limited to 'include')
39 files changed, 342 insertions, 215 deletions
diff --git a/include/asm-generic/memory_model.h b/include/asm-generic/memory_model.h index 7637fb46ba4f..a2c8ed60233a 100644 --- a/include/asm-generic/memory_model.h +++ b/include/asm-generic/memory_model.h @@ -6,47 +6,18 @@ #ifndef __ASSEMBLY__ +/* + * supports 3 memory models. + */ #if defined(CONFIG_FLATMEM) #ifndef ARCH_PFN_OFFSET #define ARCH_PFN_OFFSET (0UL) #endif -#elif defined(CONFIG_DISCONTIGMEM) - -#ifndef arch_pfn_to_nid -#define arch_pfn_to_nid(pfn) pfn_to_nid(pfn) -#endif - -#ifndef arch_local_page_offset -#define arch_local_page_offset(pfn, nid) \ - ((pfn) - NODE_DATA(nid)->node_start_pfn) -#endif - -#endif /* CONFIG_DISCONTIGMEM */ - -/* - * supports 3 memory models. - */ -#if defined(CONFIG_FLATMEM) - #define __pfn_to_page(pfn) (mem_map + ((pfn) - ARCH_PFN_OFFSET)) #define __page_to_pfn(page) ((unsigned long)((page) - mem_map) + \ ARCH_PFN_OFFSET) -#elif defined(CONFIG_DISCONTIGMEM) - -#define __pfn_to_page(pfn) \ -({ unsigned long __pfn = (pfn); \ - unsigned long __nid = arch_pfn_to_nid(__pfn); \ - NODE_DATA(__nid)->node_mem_map + arch_local_page_offset(__pfn, __nid);\ -}) - -#define __page_to_pfn(pg) \ -({ const struct page *__pg = (pg); \ - struct pglist_data *__pgdat = NODE_DATA(page_to_nid(__pg)); \ - (unsigned long)(__pg - __pgdat->node_mem_map) + \ - __pgdat->node_start_pfn; \ -}) #elif defined(CONFIG_SPARSEMEM_VMEMMAP) @@ -70,7 +41,7 @@ struct mem_section *__sec = __pfn_to_section(__pfn); \ __section_mem_map_addr(__sec) + __pfn; \ }) -#endif /* CONFIG_FLATMEM/DISCONTIGMEM/SPARSEMEM */ +#endif /* CONFIG_FLATMEM/SPARSEMEM */ /* * Convert a physical address to a Page Frame Number and back diff --git a/include/asm-generic/pgtable-nop4d.h b/include/asm-generic/pgtable-nop4d.h index ce2cbb3c380f..2f6b1befb129 100644 --- a/include/asm-generic/pgtable-nop4d.h +++ b/include/asm-generic/pgtable-nop4d.h @@ -9,7 +9,6 @@ typedef struct { pgd_t pgd; } p4d_t; #define P4D_SHIFT PGDIR_SHIFT -#define MAX_PTRS_PER_P4D 1 #define PTRS_PER_P4D 1 #define P4D_SIZE (1UL << P4D_SHIFT) #define P4D_MASK (~(P4D_SIZE-1)) diff --git a/include/asm-generic/topology.h b/include/asm-generic/topology.h index 5aa8705df87e..4dbe715be65b 100644 --- a/include/asm-generic/topology.h +++ b/include/asm-generic/topology.h @@ -45,7 +45,7 @@ #endif #ifndef cpumask_of_node - #ifdef CONFIG_NEED_MULTIPLE_NODES + #ifdef CONFIG_NUMA #define cpumask_of_node(node) ((node) == 0 ? cpu_online_mask : cpu_none_mask) #else #define cpumask_of_node(node) ((void)(node), cpu_online_mask) diff --git a/include/kunit/test.h b/include/kunit/test.h index 49601c4b98b8..524d4789af22 100644 --- a/include/kunit/test.h +++ b/include/kunit/test.h @@ -515,8 +515,9 @@ kunit_find_resource(struct kunit *test, void *match_data) { struct kunit_resource *res, *found = NULL; + unsigned long flags; - spin_lock(&test->lock); + spin_lock_irqsave(&test->lock, flags); list_for_each_entry_reverse(res, &test->resources, node) { if (match(test, res, (void *)match_data)) { @@ -526,7 +527,7 @@ kunit_find_resource(struct kunit *test, } } - spin_unlock(&test->lock); + spin_unlock_irqrestore(&test->lock, flags); return found; } diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index fff9367a6348..1d7edad9914f 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -154,6 +154,8 @@ struct bdi_writeback { struct cgroup_subsys_state *blkcg_css; /* and blkcg */ struct list_head memcg_node; /* anchored at memcg->cgwb_list */ struct list_head blkcg_node; /* anchored at blkcg->cgwb_list */ + struct list_head b_attached; /* attached inodes, protected by list_lock */ + struct list_head offline_node; /* anchored at offline_cgwbs */ union { struct work_struct release_work; @@ -239,8 +241,9 @@ static inline void wb_get(struct bdi_writeback *wb) /** * wb_put - decrement a wb's refcount * @wb: bdi_writeback to put + * @nr: number of references to put */ -static inline void wb_put(struct bdi_writeback *wb) +static inline void wb_put_many(struct bdi_writeback *wb, unsigned long nr) { if (WARN_ON_ONCE(!wb->bdi)) { /* @@ -251,7 +254,16 @@ static inline void wb_put(struct bdi_writeback *wb) } if (wb != &wb->bdi->wb) - percpu_ref_put(&wb->refcnt); + percpu_ref_put_many(&wb->refcnt, nr); +} + +/** + * wb_put - decrement a wb's refcount + * @wb: bdi_writeback to put + */ +static inline void wb_put(struct bdi_writeback *wb) +{ + wb_put_many(wb, 1); } /** @@ -280,6 +292,10 @@ static inline void wb_put(struct bdi_writeback *wb) { } +static inline void wb_put_many(struct bdi_writeback *wb, unsigned long nr) +{ +} + static inline bool wb_dying(struct bdi_writeback *wb) { return false; diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 4a62b3980642..47e13582d9fc 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -54,7 +54,7 @@ enum cpuhp_state { CPUHP_MM_MEMCQ_DEAD, CPUHP_PERCPU_CNT_DEAD, CPUHP_RADIX_DEAD, - CPUHP_PAGE_ALLOC_DEAD, + CPUHP_PAGE_ALLOC, CPUHP_NET_DEV_DEAD, CPUHP_PCI_XGENE_DEAD, CPUHP_IOMMU_IOVA_DEAD, diff --git a/include/linux/fs.h b/include/linux/fs.h index c3c88fdb9b2a..fad6663cd1b0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3417,18 +3417,14 @@ extern int simple_rename(struct user_namespace *, struct inode *, extern void simple_recursive_removal(struct dentry *, void (*callback)(struct dentry *)); extern int noop_fsync(struct file *, loff_t, loff_t, int); -extern int noop_set_page_dirty(struct page *page); extern void noop_invalidatepage(struct page *page, unsigned int offset, unsigned int length); extern ssize_t noop_direct_IO(struct kiocb *iocb, struct iov_iter *iter); extern int simple_empty(struct dentry *); -extern int simple_readpage(struct file *file, struct page *page); extern int simple_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata); -extern int simple_write_end(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata); +extern const struct address_space_operations ram_aops; extern int always_delete_dentry(const struct dentry *); extern struct inode *alloc_anon_inode(struct super_block *); extern int simple_nosetlease(struct file *, long, struct file_lock **, void **); diff --git a/include/linux/gfp.h b/include/linux/gfp.h index e6102dfa4faa..55b2ec1f965a 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -506,8 +506,8 @@ static inline int gfp_zonelist(gfp_t flags) * There are two zonelists per node, one for all zones with memory and * one containing just zones from the node the zonelist belongs to. * - * For the normal case of non-DISCONTIGMEM systems the NODE_DATA() gets - * optimized to &contig_page_data at compile-time. + * For the case of non-NUMA systems the NODE_DATA() gets optimized to + * &contig_page_data at compile-time. */ static inline struct zonelist *node_zonelist(int nid, gfp_t flags) { @@ -548,6 +548,15 @@ alloc_pages_bulk_array(gfp_t gfp, unsigned long nr_pages, struct page **page_arr return __alloc_pages_bulk(gfp, numa_mem_id(), NULL, nr_pages, NULL, page_array); } +static inline unsigned long +alloc_pages_bulk_array_node(gfp_t gfp, int nid, unsigned long nr_pages, struct page **page_array) +{ + if (nid == NUMA_NO_NODE) + nid = numa_mem_id(); + + return __alloc_pages_bulk(gfp, nid, NULL, nr_pages, NULL, page_array); +} + /* * Allocate pages, preferring the node given as nid. The node must be valid and * online. For more general interface, see alloc_pages_node(). diff --git a/include/linux/iomap.h b/include/linux/iomap.h index c87d0cb0de6d..479c1da3e221 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -159,7 +159,6 @@ ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from, const struct iomap_ops *ops); int iomap_readpage(struct page *page, const struct iomap_ops *ops); void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops); -int iomap_set_page_dirty(struct page *page); int iomap_is_partially_uptodate(struct page *page, unsigned long from, unsigned long count); int iomap_releasepage(struct page *page, gfp_t gfp_mask); diff --git a/include/linux/kasan.h b/include/linux/kasan.h index a1c7ce5f3e4f..5310e217bd74 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -18,7 +18,6 @@ struct task_struct; /* kasan_data struct is used in KUnit tests for KASAN expected failures */ struct kunit_kasan_expectation { - bool report_expected; bool report_found; }; @@ -42,9 +41,9 @@ struct kunit_kasan_expectation { #endif extern unsigned char kasan_early_shadow_page[PAGE_SIZE]; -extern pte_t kasan_early_shadow_pte[PTRS_PER_PTE + PTE_HWTABLE_PTRS]; -extern pmd_t kasan_early_shadow_pmd[PTRS_PER_PMD]; -extern pud_t kasan_early_shadow_pud[PTRS_PER_PUD]; +extern pte_t kasan_early_shadow_pte[MAX_PTRS_PER_PTE + PTE_HWTABLE_PTRS]; +extern pmd_t kasan_early_shadow_pmd[MAX_PTRS_PER_PMD]; +extern pud_t kasan_early_shadow_pud[MAX_PTRS_PER_PUD]; extern p4d_t kasan_early_shadow_p4d[MAX_PTRS_PER_P4D]; int kasan_populate_early_shadow(const void *shadow_start, diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 15d8bad3d2f2..bf950621febf 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -357,6 +357,8 @@ int sscanf(const char *, const char *, ...); extern __scanf(2, 0) int vsscanf(const char *, const char *, va_list); +extern int no_hash_pointers_enable(char *str); + extern int get_option(char **str, int *pint); extern char *get_options(const char *str, int nints, int *ints); extern unsigned long long memparse(const char *ptr, char **retptr); diff --git a/include/linux/kthread.h b/include/linux/kthread.h index d9133d6db308..346b0f269161 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -18,7 +18,7 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), * @threadfn: the function to run in the thread * @data: data pointer for @threadfn() * @namefmt: printf-style format string for the thread name - * @arg...: arguments for @namefmt. + * @arg: arguments for @namefmt. * * This macro will create a kthread on the current node, leaving it in * the stopped state. This is just a helper for kthread_create_on_node(); diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 5984fff3f175..552309342c38 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -50,7 +50,7 @@ struct memblock_region { phys_addr_t base; phys_addr_t size; enum memblock_flags flags; -#ifdef CONFIG_NEED_MULTIPLE_NODES +#ifdef CONFIG_NUMA int nid; #endif }; @@ -347,7 +347,7 @@ int __init deferred_page_init_max_threads(const struct cpumask *node_cpumask); int memblock_set_node(phys_addr_t base, phys_addr_t size, struct memblock_type *type, int nid); -#ifdef CONFIG_NEED_MULTIPLE_NODES +#ifdef CONFIG_NUMA static inline void memblock_set_region_node(struct memblock_region *r, int nid) { r->nid = nid; @@ -366,7 +366,7 @@ static inline int memblock_get_region_node(const struct memblock_region *r) { return 0; } -#endif /* CONFIG_NEED_MULTIPLE_NODES */ +#endif /* CONFIG_NUMA */ /* Flags for memblock allocation APIs */ #define MEMBLOCK_ALLOC_ANYWHERE (~(phys_addr_t)0) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index c193be760709..6d66037be646 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -192,7 +192,7 @@ enum memcg_kmem_state { struct memcg_padding { char x[0]; } ____cacheline_internodealigned_in_smp; -#define MEMCG_PADDING(name) struct memcg_padding name; +#define MEMCG_PADDING(name) struct memcg_padding name #else #define MEMCG_PADDING(name) #endif @@ -349,8 +349,7 @@ struct mem_cgroup { struct deferred_split deferred_split_queue; #endif - struct mem_cgroup_per_node *nodeinfo[0]; - /* WARNING: nodeinfo must be the last member here */ + struct mem_cgroup_per_node *nodeinfo[]; }; /* @@ -743,35 +742,18 @@ out: /** * mem_cgroup_page_lruvec - return lruvec for isolating/putting an LRU page * @page: the page - * @pgdat: pgdat of the page * * This function relies on page->mem_cgroup being stable. */ -static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page, - struct pglist_data *pgdat) +static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page) { + pg_data_t *pgdat = page_pgdat(page); struct mem_cgroup *memcg = page_memcg(page); VM_WARN_ON_ONCE_PAGE(!memcg && !mem_cgroup_disabled(), page); return mem_cgroup_lruvec(memcg, pgdat); } -static inline bool lruvec_holds_page_lru_lock(struct page *page, - struct lruvec *lruvec) -{ - pg_data_t *pgdat = page_pgdat(page); - const struct mem_cgroup *memcg; - struct mem_cgroup_per_node *mz; - - if (mem_cgroup_disabled()) - return lruvec == &pgdat->__lruvec; - - mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec); - memcg = page_memcg(page) ? : root_mem_cgroup; - - return lruvec->pgdat == pgdat && mz->memcg == memcg; -} - struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm); @@ -1221,18 +1203,11 @@ static inline struct lruvec *mem_cgroup_lruvec(struct mem_cgroup *memcg, return &pgdat->__lruvec; } -static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page, - struct pglist_data *pgdat) -{ - return &pgdat->__lruvec; -} - -static inline bool lruvec_holds_page_lru_lock(struct page *page, - struct lruvec *lruvec) +static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page) { pg_data_t *pgdat = page_pgdat(page); - return lruvec == &pgdat->__lruvec; + return &pgdat->__lruvec; } static inline void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page) @@ -1255,6 +1230,12 @@ static inline struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm) return NULL; } +static inline +struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css) +{ + return NULL; +} + static inline void mem_cgroup_put(struct mem_cgroup *memcg) { } @@ -1516,12 +1497,19 @@ static inline void unlock_page_lruvec_irqrestore(struct lruvec *lruvec, spin_unlock_irqrestore(&lruvec->lru_lock, flags); } +/* Test requires a stable page->memcg binding, see page_memcg() */ +static inline bool page_matches_lruvec(struct page *page, struct lruvec *lruvec) +{ + return lruvec_pgdat(lruvec) == page_pgdat(page) && + lruvec_memcg(lruvec) == page_memcg(page); +} + /* Don't lock again iff page's lruvec locked */ static inline struct lruvec *relock_page_lruvec_irq(struct page *page, struct lruvec *locked_lruvec) { if (locked_lruvec) { - if (lruvec_holds_page_lru_lock(page, locked_lruvec)) + if (page_matches_lruvec(page, locked_lruvec)) return locked_lruvec; unlock_page_lruvec_irq(locked_lruvec); @@ -1535,7 +1523,7 @@ static inline struct lruvec *relock_page_lruvec_irqsave(struct page *page, struct lruvec *locked_lruvec, unsigned long *flags) { if (locked_lruvec) { - if (lruvec_holds_page_lru_lock(page, locked_lruvec)) + if (page_matches_lruvec(page, locked_lruvec)) return locked_lruvec; unlock_page_lruvec_irqrestore(locked_lruvec, *flags); diff --git a/include/linux/mm.h b/include/linux/mm.h index 01ecf9e3603c..6d0f827ca4eb 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -46,7 +46,7 @@ extern int sysctl_page_lock_unfairness; void init_mm_internals(void); -#ifndef CONFIG_NEED_MULTIPLE_NODES /* Don't use mapnrs, do it properly */ +#ifndef CONFIG_NUMA /* Don't use mapnrs, do it properly */ extern unsigned long max_mapnr; static inline void set_max_mapnr(unsigned long limit) @@ -234,7 +234,11 @@ int overcommit_policy_handler(struct ctl_table *, int, void *, size_t *, int __add_to_page_cache_locked(struct page *page, struct address_space *mapping, pgoff_t index, gfp_t gfp, void **shadowp); +#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n)) +#else +#define nth_page(page,n) ((page) + (n)) +#endif /* to align the pointer to the (next) page boundary */ #define PAGE_ALIGN(addr) ALIGN(addr, PAGE_SIZE) @@ -1341,7 +1345,7 @@ static inline bool page_needs_cow_for_dma(struct vm_area_struct *vma, if (!is_cow_mapping(vma->vm_flags)) return false; - if (!atomic_read(&vma->vm_mm->has_pinned)) + if (!test_bit(MMF_HAS_PINNED, &vma->vm_mm->flags)) return false; return page_maybe_dma_pinned(page); @@ -1850,12 +1854,8 @@ extern int try_to_release_page(struct page * page, gfp_t gfp_mask); extern void do_invalidatepage(struct page *page, unsigned int offset, unsigned int length); -void __set_page_dirty(struct page *, struct address_space *, int warn); -int __set_page_dirty_nobuffers(struct page *page); -int __set_page_dirty_no_writeback(struct page *page); int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page); -void account_page_dirtied(struct page *page, struct address_space *mapping); void account_page_cleaned(struct page *page, struct address_space *mapping, struct bdi_writeback *wb); int set_page_dirty(struct page *page); @@ -2420,7 +2420,7 @@ static inline unsigned long free_initmem_default(int poison) extern char __init_begin[], __init_end[]; return free_reserved_area(&__init_begin, &__init_end, - poison, "unused kernel"); + poison, "unused kernel image (initmem)"); } static inline unsigned long get_num_physpages(void) @@ -2460,7 +2460,7 @@ extern void get_pfn_range_for_nid(unsigned int nid, unsigned long *start_pfn, unsigned long *end_pfn); extern unsigned long find_min_pfn_with_active_regions(void); -#ifndef CONFIG_NEED_MULTIPLE_NODES +#ifndef CONFIG_NUMA static inline int early_pfn_to_nid(unsigned long pfn) { return 0; @@ -2474,7 +2474,6 @@ extern void set_dma_reserve(unsigned long new_dma_reserve); extern void memmap_init_range(unsigned long, int, unsigned long, unsigned long, unsigned long, enum meminit_context, struct vmem_altmap *, int migratetype); -extern void memmap_init_zone(struct zone *zone); extern void setup_per_zone_wmarks(void); extern int __meminit init_per_zone_wmark_min(void); extern void mem_init(void); @@ -2681,17 +2680,45 @@ extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long add extern struct vm_area_struct * find_vma_prev(struct mm_struct * mm, unsigned long addr, struct vm_area_struct **pprev); -/* Look up the first VMA which intersects the interval start_addr..end_addr-1, - NULL if none. Assume start_addr < end_addr. */ -static inline struct vm_area_struct * find_vma_intersection(struct mm_struct * mm, unsigned long start_addr, unsigned long end_addr) +/** + * find_vma_intersection() - Look up the first VMA which intersects the interval + * @mm: The process address space. + * @start_addr: The inclusive start user address. + * @end_addr: The exclusive end user address. + * + * Returns: The first VMA within the provided range, %NULL otherwise. Assumes + * start_addr < end_addr. + */ +static inline +struct vm_area_struct *find_vma_intersection(struct mm_struct *mm, + unsigned long start_addr, + unsigned long end_addr) { - struct vm_area_struct * vma = find_vma(mm,start_addr); + struct vm_area_struct *vma = find_vma(mm, start_addr); if (vma && end_addr <= vma->vm_start) vma = NULL; return vma; } +/** + * vma_lookup() - Find a VMA at a specific address + * @mm: The process address space. + * @addr: The user address. + * + * Return: The vm_area_struct at the given address, %NULL otherwise. + */ +static inline +struct vm_area_struct *vma_lookup(struct mm_struct *mm, unsigned long addr) +{ + struct vm_area_struct *vma = find_vma(mm, addr); + + if (vma && addr < vma->vm_start) + vma = NULL; + + return vma; +} + static inline unsigned long vm_start_gap(struct vm_area_struct *vma) { unsigned long vm_start = vma->vm_start; diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 8f0fb62e8975..b66d0225414e 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -435,16 +435,6 @@ struct mm_struct { */ atomic_t mm_count; - /** - * @has_pinned: Whether this mm has pinned any pages. This can - * be either replaced in the future by @pinned_vm when it - * becomes stable, or grow into a counter on its own. We're - * aggresive on this bit now - even if the pinned pages were - * unpinned later on, we'll still keep this bit set for the - * lifecycle of this mm just for simplicity. - */ - atomic_t has_pinned; - #ifdef CONFIG_MMU atomic_long_t pgtables_bytes; /* PTE page table pages */ #endif diff --git a/include/linux/mman.h b/include/linux/mman.h index 629cefc4ecba..ebb09a964272 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -31,6 +31,8 @@ /* * The historical set of flags that all mmap implementations implicitly * support when a ->mmap_validate() op is not provided in file_operations. + * + * MAP_EXECUTABLE is completely ignored throughout the kernel. */ #define LEGACY_MAP_MASK (MAP_SHARED \ | MAP_PRIVATE \ diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h index 5d0767cb424a..1935d4c72d10 100644 --- a/include/linux/mmdebug.h +++ b/include/linux/mmdebug.h @@ -9,8 +9,7 @@ struct page; struct vm_area_struct; struct mm_struct; -extern void dump_page(struct page *page, const char *reason); -extern void __dump_page(struct page *page, const char *reason); +void dump_page(struct page *page, const char *reason); void dump_vma(const struct vm_area_struct *vma); void dump_mm(const struct mm_struct *mm); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 0d53eba1c383..265a32e1ff74 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -20,6 +20,7 @@ #include <linux/atomic.h> #include <linux/mm_types.h> #include <linux/page-flags.h> +#include <linux/local_lock.h> #include <asm/page.h> /* Free memory management - zoned buddy allocator. */ @@ -134,10 +135,10 @@ enum numa_stat_item { NUMA_INTERLEAVE_HIT, /* interleaver preferred this zone */ NUMA_LOCAL, /* allocation from local node */ NUMA_OTHER, /* allocation from other node */ - NR_VM_NUMA_STAT_ITEMS + NR_VM_NUMA_EVENT_ITEMS }; #else -#define NR_VM_NUMA_STAT_ITEMS 0 +#define NR_VM_NUMA_EVENT_ITEMS 0 #endif enum zone_stat_item { @@ -332,29 +333,55 @@ enum zone_watermarks { NR_WMARK }; +/* + * One per migratetype for each PAGE_ALLOC_COSTLY_ORDER plus one additional + * for pageblock size for THP if configured. + */ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#define NR_PCP_THP 1 +#else +#define NR_PCP_THP 0 +#endif +#define NR_PCP_LISTS (MIGRATE_PCPTYPES * (PAGE_ALLOC_COSTLY_ORDER + 1 + NR_PCP_THP)) + +/* + * Shift to encode migratetype and order in the same integer, with order + * in the least significant bits. + */ +#define NR_PCP_ORDER_WIDTH 8 +#define NR_PCP_ORDER_MASK ((1<<NR_PCP_ORDER_WIDTH) - 1) + #define min_wmark_pages(z) (z->_watermark[WMARK_MIN] + z->watermark_boost) #define low_wmark_pages(z) (z->_watermark[WMARK_LOW] + z->watermark_boost) #define high_wmark_pages(z) (z->_watermark[WMARK_HIGH] + z->watermark_boost) #define wmark_pages(z, i) (z->_watermark[i] + z->watermark_boost) +/* Fields and list protected by pagesets local_lock in page_alloc.c */ struct per_cpu_pages { int count; /* number of pages in the list */ int high; /* high watermark, emptying needed */ int batch; /* chunk size for buddy add/remove */ + short free_factor; /* batch scaling factor during free */ +#ifdef CONFIG_NUMA + short expire; /* When 0, remote pagesets are drained */ +#endif /* Lists of pages, one per migrate type stored on the pcp-lists */ - struct list_head lists[MIGRATE_PCPTYPES]; + struct list_head lists[NR_PCP_LISTS]; }; -struct per_cpu_pageset { - struct per_cpu_pages pcp; -#ifdef CONFIG_NUMA - s8 expire; - u16 vm_numa_stat_diff[NR_VM_NUMA_STAT_ITEMS]; -#endif +struct per_cpu_zonestat { #ifdef CONFIG_SMP - s8 stat_threshold; s8 vm_stat_diff[NR_VM_ZONE_STAT_ITEMS]; + s8 stat_threshold; +#endif +#ifdef CONFIG_NUMA + /* + * Low priority inaccurate counters that are only folded + * on demand. Use a large type to avoid the overhead of + * folding during refresh_cpu_vm_stats. + */ + unsigned long vm_numa_event[NR_VM_NUMA_EVENT_ITEMS]; #endif }; @@ -484,7 +511,8 @@ struct zone { int node; #endif struct pglist_data *zone_pgdat; - struct per_cpu_pageset __percpu *pageset; + struct per_cpu_pages __percpu *per_cpu_pageset; + struct per_cpu_zonestat __percpu *per_cpu_zonestats; /* * the high and batch values are copied to individual pagesets for * faster access @@ -619,7 +647,7 @@ struct zone { ZONE_PADDING(_pad3_) /* Zone statistics */ atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS]; - atomic_long_t vm_numa_stat[NR_VM_NUMA_STAT_ITEMS]; + atomic_long_t vm_numa_event[NR_VM_NUMA_EVENT_ITEMS]; } ____cacheline_internodealigned_in_smp; enum pgdat_flags { @@ -637,6 +665,7 @@ enum zone_flags { ZONE_BOOSTED_WATERMARK, /* zone recently boosted watermarks. * Cleared when kswapd is woken. */ + ZONE_RECLAIM_ACTIVE, /* kswapd may be scanning the zone. */ }; static inline unsigned long zone_managed_pages(struct zone *zone) @@ -738,10 +767,12 @@ struct zonelist { struct zoneref _zonerefs[MAX_ZONES_PER_ZONELIST + 1]; }; -#ifndef CONFIG_DISCONTIGMEM -/* The array of struct pages - for discontigmem use pgdat->lmem_map */ +/* + * The array of struct pages for flatmem. + * It must be declared for SPARSEMEM as well because there are configurations + * that rely on that. + */ extern struct page *mem_map; -#endif #ifdef CONFIG_TRANSPARENT_HUGEPAGE struct deferred_split { @@ -775,7 +806,7 @@ typedef struct pglist_data { struct zonelist node_zonelists[MAX_ZONELISTS]; int nr_zones; /* number of populated zones in this node */ -#ifdef CONFIG_FLAT_NODE_MEM_MAP /* means !SPARSEMEM */ +#ifdef CONFIG_FLATMEM /* means !SPARSEMEM */ struct page *node_mem_map; #ifdef CONFIG_PAGE_EXTENSION struct page_ext *node_page_ext; @@ -865,7 +896,7 @@ typedef struct pglist_data { #define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages) #define node_spanned_pages(nid) (NODE_DATA(nid)->node_spanned_pages) -#ifdef CONFIG_FLAT_NODE_MEM_MAP +#ifdef CONFIG_FLATMEM #define pgdat_page_nr(pgdat, pagenr) ((pgdat)->node_mem_map + (pagenr)) #else #define pgdat_page_nr(pgdat, pagenr) pfn_to_page((pgdat)->node_start_pfn + (pagenr)) @@ -982,22 +1013,11 @@ static inline void zone_set_nid(struct zone *zone, int nid) {} extern int movable_zone; -#ifdef CONFIG_HIGHMEM -static inline int zone_movable_is_highmem(void) -{ -#ifdef CONFIG_NEED_MULTIPLE_NODES - return movable_zone == ZONE_HIGHMEM; -#else - return (ZONE_MOVABLE - 1) == ZONE_HIGHMEM; -#endif -} -#endif - static inline int is_highmem_idx(enum zone_type idx) { #ifdef CONFIG_HIGHMEM return (idx == ZONE_HIGHMEM || - (idx == ZONE_MOVABLE && zone_movable_is_highmem())); + (idx == ZONE_MOVABLE && movable_zone == ZONE_HIGHMEM)); #else return 0; #endif @@ -1029,7 +1049,7 @@ int watermark_scale_factor_sysctl_handler(struct ctl_table *, int, void *, extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES]; int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, void *, size_t *, loff_t *); -int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, +int percpu_pagelist_high_fraction_sysctl_handler(struct ctl_table *, int, void *, size_t *, loff_t *); int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int, void *, size_t *, loff_t *); @@ -1037,21 +1057,21 @@ int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int, void *, size_t *, loff_t *); int numa_zonelist_order_handler(struct ctl_table *, int, void *, size_t *, loff_t *); -extern int percpu_pagelist_fraction; +extern int percpu_pagelist_high_fraction; extern char numa_zonelist_order[]; #define NUMA_ZONELIST_ORDER_LEN 16 -#ifndef CONFIG_NEED_MULTIPLE_NODES +#ifndef CONFIG_NUMA extern struct pglist_data contig_page_data; #define NODE_DATA(nid) (&contig_page_data) #define NODE_MEM_MAP(nid) mem_map -#else /* CONFIG_NEED_MULTIPLE_NODES */ +#else /* CONFIG_NUMA */ #include <asm/mmzone.h> -#endif /* !CONFIG_NEED_MULTIPLE_NODES */ +#endif /* !CONFIG_NUMA */ extern struct pglist_data *first_online_pgdat(void); extern struct pglist_data *next_online_pgdat(struct pglist_data *pgdat); @@ -1200,8 +1220,6 @@ static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist, #ifdef CONFIG_SPARSEMEM /* - * SECTION_SHIFT #bits space required to store a section # - * * PA_SECTION_SHIFT physical address to/from section number * PFN_SECTION_SHIFT pfn to/from section number */ diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 40e2c5000585..458696550028 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -180,17 +180,17 @@ enum pageflags { #ifndef __GENERATING_BOUNDS_H -struct page; /* forward declaration */ - -static inline struct page *compound_head(struct page *page) +static inline unsigned long _compound_head(const struct page *page) { unsigned long head = READ_ONCE(page->compound_head); if (unlikely(head & 1)) - return (struct page *) (head - 1); - return page; + return head - 1; + return (unsigned long)page; } +#define compound_head(page) ((typeof(page))_compound_head(page)) + static __always_inline int PageTail(struct page *page) { return READ_ONCE(page->compound_head) & 1; diff --git a/include/linux/page_owner.h b/include/linux/page_owner.h index 3468794f83d2..719bfe5108c5 100644 --- a/include/linux/page_owner.h +++ b/include/linux/page_owner.h @@ -14,7 +14,7 @@ extern void __set_page_owner(struct page *page, extern void __split_page_owner(struct page *page, unsigned int nr); extern void __copy_page_owner(struct page *oldpage, struct page *newpage); extern void __set_page_owner_migrate_reason(struct page *page, int reason); -extern void __dump_page_owner(struct page *page); +extern void __dump_page_owner(const struct page *page); extern void pagetypeinfo_showmixedcount_print(struct seq_file *m, pg_data_t *pgdat, struct zone *zone); @@ -46,7 +46,7 @@ static inline void set_page_owner_migrate_reason(struct page *page, int reason) if (static_branch_unlikely(&page_owner_inited)) __set_page_owner_migrate_reason(page, reason); } -static inline void dump_page_owner(struct page *page) +static inline void dump_page_owner(const struct page *page) { if (static_branch_unlikely(&page_owner_inited)) __dump_page_owner(page); @@ -69,7 +69,7 @@ static inline void copy_page_owner(struct page *oldpage, struct page *newpage) static inline void set_page_owner_migrate_reason(struct page *page, int reason) { } -static inline void dump_page_owner(struct page *page) +static inline void dump_page_owner(const struct page *page) { } #endif /* CONFIG_PAGE_OWNER */ diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h index f3318f34fc54..7ad46f45df39 100644 --- a/include/linux/page_ref.h +++ b/include/linux/page_ref.h @@ -62,12 +62,12 @@ static inline void __page_ref_unfreeze(struct page *page, int v) #endif -static inline int page_ref_count(struct page *page) +static inline int page_ref_count(const struct page *page) { return atomic_read(&page->_refcount); } -static inline int page_count(struct page *page) +static inline int page_count(const struct page *page) { return atomic_read(&compound_head(page)->_refcount); } diff --git a/include/linux/page_reporting.h b/include/linux/page_reporting.h index 3b99e0ec24f2..fe648dfa3a7c 100644 --- a/include/linux/page_reporting.h +++ b/include/linux/page_reporting.h @@ -18,6 +18,9 @@ struct page_reporting_dev_info { /* Current state of page reporting */ atomic_t state; + + /* Minimal order of page reporting */ + unsigned int order; }; /* Tear-down and bring-up for page reporting devices */ diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h index fff52ad370c1..973fd731a520 100644 --- a/include/linux/pageblock-flags.h +++ b/include/linux/pageblock-flags.h @@ -54,7 +54,7 @@ extern unsigned int pageblock_order; /* Forward declaration */ struct page; -unsigned long get_pfnblock_flags_mask(struct page *page, +unsigned long get_pfnblock_flags_mask(const struct page *page, unsigned long pfn, unsigned long mask); diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 0f1b34dbf3a2..ed02aa522263 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -702,6 +702,10 @@ int wait_on_page_writeback_killable(struct page *page); extern void end_page_writeback(struct page *page); void wait_for_stable_page(struct page *page); +void __set_page_dirty(struct page *, struct address_space *, int warn); +int __set_page_dirty_nobuffers(struct page *page); +int __set_page_dirty_no_writeback(struct page *page); + void page_endio(struct page *page, bool is_write, int err); /** diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index a43047b1030d..c32600c9e1ad 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1592,4 +1592,26 @@ typedef unsigned int pgtbl_mod_mask; #define pte_leaf_size(x) PAGE_SIZE #endif +/* + * Some architectures have MMUs that are configurable or selectable at boot + * time. These lead to variable PTRS_PER_x. For statically allocated arrays it + * helps to have a static maximum value. + */ + +#ifndef MAX_PTRS_PER_PTE +#define MAX_PTRS_PER_PTE PTRS_PER_PTE +#endif + +#ifndef MAX_PTRS_PER_PMD +#define MAX_PTRS_PER_PMD PTRS_PER_PMD +#endif + +#ifndef MAX_PTRS_PER_PUD +#define MAX_PTRS_PER_PUD PTRS_PER_PUD +#endif + +#ifndef MAX_PTRS_PER_P4D +#define MAX_PTRS_PER_P4D PTRS_PER_P4D +#endif + #endif /* _LINUX_PGTABLE_H */ diff --git a/include/linux/printk.h b/include/linux/printk.h index 1790a5521fd9..d796183f26c9 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -206,6 +206,7 @@ void __init setup_log_buf(int early); __printf(1, 2) void dump_stack_set_arch_desc(const char *fmt, ...); void dump_stack_print_info(const char *log_lvl); void show_regs_print_info(const char *log_lvl); +extern asmlinkage void dump_stack_lvl(const char *log_lvl) __cold; extern asmlinkage void dump_stack(void) __cold; extern void printk_safe_flush(void); extern void printk_safe_flush_on_panic(void); @@ -269,6 +270,10 @@ static inline void show_regs_print_info(const char *log_lvl) { } +static inline void dump_stack_lvl(const char *log_lvl) +{ +} + static inline void dump_stack(void) { } diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h index dfd82eab2902..4d9e3a656875 100644 --- a/include/linux/sched/coredump.h +++ b/include/linux/sched/coredump.h @@ -73,6 +73,14 @@ static inline int get_dumpable(struct mm_struct *mm) #define MMF_OOM_VICTIM 25 /* mm is the oom victim */ #define MMF_OOM_REAP_QUEUED 26 /* mm was queued for oom_reaper */ #define MMF_MULTIPROCESS 27 /* mm is shared between processes */ +/* + * MMF_HAS_PINNED: Whether this mm has pinned any pages. This can be either + * replaced in the future by mm.pinned_vm when it becomes stable, or grow into + * a counter on its own. We're aggresive on this bit for now: even if the + * pinned pages were unpinned later on, we'll still keep this bit set for the + * lifecycle of this mm, just for simplicity. + */ +#define MMF_HAS_PINNED 28 /* FOLL_PIN has run, never cleared */ #define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP) #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\ diff --git a/include/linux/slab.h b/include/linux/slab.h index 0c97d788762c..083f3ce550bc 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -305,9 +305,21 @@ static inline void __check_heap_object(const void *ptr, unsigned long n, /* * Whenever changing this, take care of that kmalloc_type() and * create_kmalloc_caches() still work as intended. + * + * KMALLOC_NORMAL can contain only unaccounted objects whereas KMALLOC_CGROUP + * is for accounted but unreclaimable and non-dma objects. All the other + * kmem caches can have both accounted and unaccounted objects. */ enum kmalloc_cache_type { KMALLOC_NORMAL = 0, +#ifndef CONFIG_ZONE_DMA + KMALLOC_DMA = KMALLOC_NORMAL, +#endif +#ifndef CONFIG_MEMCG_KMEM + KMALLOC_CGROUP = KMALLOC_NORMAL, +#else + KMALLOC_CGROUP, +#endif KMALLOC_RECLAIM, #ifdef CONFIG_ZONE_DMA KMALLOC_DMA, @@ -319,24 +331,36 @@ enum kmalloc_cache_type { extern struct kmem_cache * kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1]; +/* + * Define gfp bits that should not be set for KMALLOC_NORMAL. + */ +#define KMALLOC_NOT_NORMAL_BITS \ + (__GFP_RECLAIMABLE | \ + (IS_ENABLED(CONFIG_ZONE_DMA) ? __GFP_DMA : 0) | \ + (IS_ENABLED(CONFIG_MEMCG_KMEM) ? __GFP_ACCOUNT : 0)) + static __always_inline enum kmalloc_cache_type kmalloc_type(gfp_t flags) { -#ifdef CONFIG_ZONE_DMA /* * The most common case is KMALLOC_NORMAL, so test for it - * with a single branch for both flags. + * with a single branch for all the relevant flags. */ - if (likely((flags & (__GFP_DMA | __GFP_RECLAIMABLE)) == 0)) + if (likely((flags & KMALLOC_NOT_NORMAL_BITS) == 0)) return KMALLOC_NORMAL; /* - * At least one of the flags has to be set. If both are, __GFP_DMA - * is more important. + * At least one of the flags has to be set. Their priorities in + * decreasing order are: + * 1) __GFP_DMA + * 2) __GFP_RECLAIMABLE + * 3) __GFP_ACCOUNT */ - return flags & __GFP_DMA ? KMALLOC_DMA : KMALLOC_RECLAIM; -#else - return flags & __GFP_RECLAIMABLE ? KMALLOC_RECLAIM : KMALLOC_NORMAL; -#endif + if (IS_ENABLED(CONFIG_ZONE_DMA) && (flags & __GFP_DMA)) + return KMALLOC_DMA; + if (!IS_ENABLED(CONFIG_MEMCG_KMEM) || (flags & __GFP_RECLAIMABLE)) + return KMALLOC_RECLAIM; + else + return KMALLOC_CGROUP; } /* @@ -346,8 +370,14 @@ static __always_inline enum kmalloc_cache_type kmalloc_type(gfp_t flags) * 1 = 65 .. 96 bytes * 2 = 129 .. 192 bytes * n = 2^(n-1)+1 .. 2^n + * + * Note: __kmalloc_index() is compile-time optimized, and not runtime optimized; + * typical usage is via kmalloc_index() and therefore evaluated at compile-time. + * Callers where !size_is_constant should only be test modules, where runtime + * overheads of __kmalloc_index() can be tolerated. Also see kmalloc_slab(). */ -static __always_inline unsigned int kmalloc_index(size_t size) +static __always_inline unsigned int __kmalloc_index(size_t size, + bool size_is_constant) { if (!size) return 0; @@ -382,12 +412,17 @@ static __always_inline unsigned int kmalloc_index(size_t size) if (size <= 8 * 1024 * 1024) return 23; if (size <= 16 * 1024 * 1024) return 24; if (size <= 32 * 1024 * 1024) return 25; - if (size <= 64 * 1024 * 1024) return 26; - BUG(); + + if ((IS_ENABLED(CONFIG_CC_IS_GCC) || CONFIG_CLANG_VERSION >= 110000) + && !IS_ENABLED(CONFIG_PROFILE_ALL_BRANCHES) && size_is_constant) + BUILD_BUG_ON_MSG(1, "unexpected size in kmalloc_index()"); + else + BUG(); /* Will never be reached. Needed because the compiler may complain */ return -1; } +#define kmalloc_index(s) __kmalloc_index(s, true) #endif /* !CONFIG_SLOB */ void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment __malloc; diff --git a/include/linux/swap.h b/include/linux/swap.h index 144727041e78..49b1dd2c100b 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -177,7 +177,6 @@ enum { SWP_PAGE_DISCARD = (1 << 10), /* freed swap page-cluster discards */ SWP_STABLE_WRITES = (1 << 11), /* no overwrite PG_writeback pages */ SWP_SYNCHRONOUS_IO = (1 << 12), /* synchronous IO is efficient */ - SWP_VALID = (1 << 13), /* swap is valid to be operated on? */ /* add others here before... */ SWP_SCANNING = (1 << 14), /* refcount in scan_swap_map */ }; @@ -240,6 +239,7 @@ struct swap_cluster_list { * The in-memory structure used to track swap areas. */ struct swap_info_struct { + struct percpu_ref users; /* indicate and keep swap device valid. */ unsigned long flags; /* SWP_USED etc: see above */ signed short prio; /* swap priority of this type */ struct plist_node list; /* entry in swap_active_head */ @@ -260,6 +260,7 @@ struct swap_info_struct { struct block_device *bdev; /* swap device or bdev of swap file */ struct file *swap_file; /* seldom referenced */ unsigned int old_block_size; /* seldom referenced */ + struct completion comp; /* seldom referenced */ #ifdef CONFIG_FRONTSWAP unsigned long *frontswap_map; /* frontswap in-use, one bit per page */ atomic_t frontswap_pages; /* frontswap pages in-use counter */ @@ -445,6 +446,7 @@ extern void __delete_from_swap_cache(struct page *page, extern void delete_from_swap_cache(struct page *); extern void clear_shadow_from_swap_cache(int type, unsigned long begin, unsigned long end); +extern void free_swap_cache(struct page *); extern void free_page_and_swap_cache(struct page *); extern void free_pages_and_swap_cache(struct page **, int); extern struct page *lookup_swap_cache(swp_entry_t entry, @@ -511,7 +513,7 @@ sector_t swap_page_sector(struct page *page); static inline void put_swap_device(struct swap_info_struct *si) { - rcu_read_unlock(); + percpu_ref_put(&si->users); } #else /* CONFIG_SWAP */ @@ -526,6 +528,15 @@ static inline struct swap_info_struct *swp_swap_info(swp_entry_t entry) return NULL; } +static inline struct swap_info_struct *get_swap_device(swp_entry_t entry) +{ + return NULL; +} + +static inline void put_swap_device(struct swap_info_struct *si) +{ +} + #define swap_address_space(entry) (NULL) #define get_nr_swap_pages() 0L #define total_swap_pages 0L @@ -541,6 +552,10 @@ static inline struct swap_info_struct *swp_swap_info(swp_entry_t entry) #define free_pages_and_swap_cache(pages, nr) \ release_pages((pages), (nr)); +static inline void free_swap_cache(struct page *page) +{ +} + static inline void show_swap_cache_info(void) { } diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 6430a94c6981..5907205c712c 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -330,6 +330,11 @@ static inline int is_hwpoison_entry(swp_entry_t entry) return swp_type(entry) == SWP_HWPOISON; } +static inline unsigned long hwpoison_entry_to_pfn(swp_entry_t entry) +{ + return swp_offset(entry); +} + static inline void num_poisoned_pages_inc(void) { atomic_long_inc(&num_poisoned_pages); diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 3299cd69e4ca..d6a6cf53b127 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -138,34 +138,27 @@ static inline void vm_events_fold_cpu(int cpu) * Zone and node-based page accounting with per cpu differentials. */ extern atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS]; -extern atomic_long_t vm_numa_stat[NR_VM_NUMA_STAT_ITEMS]; extern atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS]; +extern atomic_long_t vm_numa_event[NR_VM_NUMA_EVENT_ITEMS]; #ifdef CONFIG_NUMA -static inline void zone_numa_state_add(long x, struct zone *zone, - enum numa_stat_item item) +static inline void zone_numa_event_add(long x, struct zone *zone, + enum numa_stat_item item) { - atomic_long_add(x, &zone->vm_numa_stat[item]); - atomic_long_add(x, &vm_numa_stat[item]); + atomic_long_add(x, &zone->vm_numa_event[item]); + atomic_long_add(x, &vm_numa_event[item]); } -static inline unsigned long global_numa_state(enum numa_stat_item item) +static inline unsigned long zone_numa_event_state(struct zone *zone, + enum numa_stat_item item) { - long x = atomic_long_read(&vm_numa_stat[item]); - - return x; + return atomic_long_read(&zone->vm_numa_event[item]); } -static inline unsigned long zone_numa_state_snapshot(struct zone *zone, - enum numa_stat_item item) +static inline unsigned long +global_numa_event_state(enum numa_stat_item item) { - long x = atomic_long_read(&zone->vm_numa_stat[item]); - int cpu; - - for_each_online_cpu(cpu) - x += per_cpu_ptr(zone->pageset, cpu)->vm_numa_stat_diff[item]; - - return x; + return atomic_long_read(&vm_numa_event[item]); } #endif /* CONFIG_NUMA */ @@ -236,7 +229,7 @@ static inline unsigned long zone_page_state_snapshot(struct zone *zone, #ifdef CONFIG_SMP int cpu; for_each_online_cpu(cpu) - x += per_cpu_ptr(zone->pageset, cpu)->vm_stat_diff[item]; + x += per_cpu_ptr(zone->per_cpu_zonestats, cpu)->vm_stat_diff[item]; if (x < 0) x = 0; @@ -245,18 +238,38 @@ static inline unsigned long zone_page_state_snapshot(struct zone *zone, } #ifdef CONFIG_NUMA -extern void __inc_numa_state(struct zone *zone, enum numa_stat_item item); +/* See __count_vm_event comment on why raw_cpu_inc is used. */ +static inline void +__count_numa_event(struct zone *zone, enum numa_stat_item item) +{ + struct per_cpu_zonestat __percpu *pzstats = zone->per_cpu_zonestats; + + raw_cpu_inc(pzstats->vm_numa_event[item]); +} + +static inline void +__count_numa_events(struct zone *zone, enum numa_stat_item item, long delta) +{ + struct per_cpu_zonestat __percpu *pzstats = zone->per_cpu_zonestats; + + raw_cpu_add(pzstats->vm_numa_event[item], delta); +} + extern unsigned long sum_zone_node_page_state(int node, enum zone_stat_item item); -extern unsigned long sum_zone_numa_state(int node, enum numa_stat_item item); +extern unsigned long sum_zone_numa_event_state(int node, enum numa_stat_item item); extern unsigned long node_page_state(struct pglist_data *pgdat, enum node_stat_item item); extern unsigned long node_page_state_pages(struct pglist_data *pgdat, enum node_stat_item item); +extern void fold_vm_numa_events(void); #else #define sum_zone_node_page_state(node, item) global_zone_page_state(item) #define node_page_state(node, item) global_node_page_state(item) #define node_page_state_pages(node, item) global_node_page_state_pages(item) +static inline void fold_vm_numa_events(void) +{ +} #endif /* CONFIG_NUMA */ #ifdef CONFIG_SMP @@ -291,7 +304,7 @@ struct ctl_table; int vmstat_refresh(struct ctl_table *, int write, void *buffer, size_t *lenp, loff_t *ppos); -void drain_zonestat(struct zone *zone, struct per_cpu_pageset *); +void drain_zonestat(struct zone *zone, struct per_cpu_zonestat *); int calculate_pressure_threshold(struct zone *zone); int calculate_normal_threshold(struct zone *zone); @@ -399,7 +412,7 @@ static inline void cpu_vm_stats_fold(int cpu) { } static inline void quiet_vmstat(void) { } static inline void drain_zonestat(struct zone *zone, - struct per_cpu_pageset *pset) { } + struct per_cpu_zonestat *pzstats) { } #endif /* CONFIG_SMP */ static inline void __mod_zone_freepage_state(struct zone *zone, int nr_pages, @@ -428,7 +441,7 @@ static inline const char *numa_stat_name(enum numa_stat_item item) static inline const char *node_stat_name(enum node_stat_item item) { return vmstat_text[NR_VM_ZONE_STAT_ITEMS + - NR_VM_NUMA_STAT_ITEMS + + NR_VM_NUMA_EVENT_ITEMS + item]; } @@ -440,7 +453,7 @@ static inline const char *lru_list_name(enum lru_list lru) static inline const char *writeback_stat_name(enum writeback_stat_item item) { return vmstat_text[NR_VM_ZONE_STAT_ITEMS + - NR_VM_NUMA_STAT_ITEMS + + NR_VM_NUMA_EVENT_ITEMS + NR_VM_NODE_STAT_ITEMS + item]; } @@ -449,7 +462,7 @@ static inline const char *writeback_stat_name(enum writeback_stat_item item) static inline const char *vm_event_name(enum vm_event_item item) { return vmstat_text[NR_VM_ZONE_STAT_ITEMS + - NR_VM_NUMA_STAT_ITEMS + + NR_VM_NUMA_EVENT_ITEMS + NR_VM_NODE_STAT_ITEMS + NR_VM_WRITEBACK_STAT_ITEMS + item]; diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 8e5c5bb16e2d..95de51c10248 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -221,6 +221,7 @@ void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page, int cgroup_writeback_by_id(u64 bdi_id, int memcg_id, unsigned long nr_pages, enum wb_reason reason, struct wb_completion *done); void cgroup_writeback_umount(void); +bool cleanup_offline_cgwb(struct bdi_writeback *wb); /** * inode_attach_wb - associate an inode with its wb diff --git a/include/trace/events/cma.h b/include/trace/events/cma.h index c3d354702cb0..3d708dae1542 100644 --- a/include/trace/events/cma.h +++ b/include/trace/events/cma.h @@ -31,7 +31,7 @@ DECLARE_EVENT_CLASS(cma_alloc_class, __entry->align = align; ), - TP_printk("name=%s pfn=%lx page=%p count=%lu align=%u", + TP_printk("name=%s pfn=0x%lx page=%p count=%lu align=%u", __get_str(name), __entry->pfn, __entry->page, @@ -60,7 +60,7 @@ TRACE_EVENT(cma_release, __entry->count = count; ), - TP_printk("name=%s pfn=%lx page=%p count=%lu", + TP_printk("name=%s pfn=0x%lx page=%p count=%lu", __get_str(name), __entry->pfn, __entry->page, diff --git a/include/trace/events/filemap.h b/include/trace/events/filemap.h index 796053e162d2..c47b63db124e 100644 --- a/include/trace/events/filemap.h +++ b/include/trace/events/filemap.h @@ -36,7 +36,7 @@ DECLARE_EVENT_CLASS(mm_filemap_op_page_cache, __entry->s_dev = page->mapping->host->i_rdev; ), - TP_printk("dev %d:%d ino %lx page=%p pfn=%lu ofs=%lu", + TP_printk("dev %d:%d ino %lx page=%p pfn=0x%lx ofs=%lu", MAJOR(__entry->s_dev), MINOR(__entry->s_dev), __entry->i_ino, pfn_to_page(__entry->pfn), diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index 829a75692cc0..ddc8c944f417 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -173,7 +173,7 @@ TRACE_EVENT(mm_page_free, __entry->order = order; ), - TP_printk("page=%p pfn=%lu order=%d", + TP_printk("page=%p pfn=0x%lx order=%d", pfn_to_page(__entry->pfn), __entry->pfn, __entry->order) @@ -193,7 +193,7 @@ TRACE_EVENT(mm_page_free_batched, __entry->pfn = page_to_pfn(page); ), - TP_printk("page=%p pfn=%lu order=0", + TP_printk("page=%p pfn=0x%lx order=0", pfn_to_page(__entry->pfn), __entry->pfn) ); @@ -219,7 +219,7 @@ TRACE_EVENT(mm_page_alloc, __entry->migratetype = migratetype; ), - TP_printk("page=%p pfn=%lu order=%d migratetype=%d gfp_flags=%s", + TP_printk("page=%p pfn=0x%lx order=%d migratetype=%d gfp_flags=%s", __entry->pfn != -1UL ? pfn_to_page(__entry->pfn) : NULL, __entry->pfn != -1UL ? __entry->pfn : 0, __entry->order, @@ -245,7 +245,7 @@ DECLARE_EVENT_CLASS(mm_page, __entry->migratetype = migratetype; ), - TP_printk("page=%p pfn=%lu order=%u migratetype=%d percpu_refill=%d", + TP_printk("page=%p pfn=0x%lx order=%u migratetype=%d percpu_refill=%d", __entry->pfn != -1UL ? pfn_to_page(__entry->pfn) : NULL, __entry->pfn != -1UL ? __entry->pfn : 0, __entry->order, @@ -278,7 +278,7 @@ TRACE_EVENT(mm_page_pcpu_drain, __entry->migratetype = migratetype; ), - TP_printk("page=%p pfn=%lu order=%d migratetype=%d", + TP_printk("page=%p pfn=0x%lx order=%d migratetype=%d", pfn_to_page(__entry->pfn), __entry->pfn, __entry->order, __entry->migratetype) ); @@ -312,7 +312,7 @@ TRACE_EVENT(mm_page_alloc_extfrag, get_pageblock_migratetype(page)); ), - TP_printk("page=%p pfn=%lu alloc_order=%d fallback_order=%d pageblock_order=%d alloc_migratetype=%d fallback_migratetype=%d fragmenting=%d change_ownership=%d", + TP_printk("page=%p pfn=0x%lx alloc_order=%d fallback_order=%d pageblock_order=%d alloc_migratetype=%d fallback_migratetype=%d fragmenting=%d change_ownership=%d", pfn_to_page(__entry->pfn), __entry->pfn, __entry->alloc_order, diff --git a/include/trace/events/page_pool.h b/include/trace/events/page_pool.h index ad0aa7f31675..ca534501158b 100644 --- a/include/trace/events/page_pool.h +++ b/include/trace/events/page_pool.h @@ -60,7 +60,7 @@ TRACE_EVENT(page_pool_state_release, __entry->pfn = page_to_pfn(page); ), - TP_printk("page_pool=%p page=%p pfn=%lu release=%u", + TP_printk("page_pool=%p page=%p pfn=0x%lx release=%u", __entry->pool, __entry->page, __entry->pfn, __entry->release) ); @@ -85,7 +85,7 @@ TRACE_EVENT(page_pool_state_hold, __entry->pfn = page_to_pfn(page); ), - TP_printk("page_pool=%p page=%p pfn=%lu hold=%u", + TP_printk("page_pool=%p page=%p pfn=0x%lx hold=%u", __entry->pool, __entry->page, __entry->pfn, __entry->hold) ); diff --git a/include/trace/events/pagemap.h b/include/trace/events/pagemap.h index e1735fe7c76a..1d28431e85bd 100644 --- a/include/trace/events/pagemap.h +++ b/include/trace/events/pagemap.h @@ -46,7 +46,7 @@ TRACE_EVENT(mm_lru_insertion, ), /* Flag format is based on page-types.c formatting for pagemap */ - TP_printk("page=%p pfn=%lu lru=%d flags=%s%s%s%s%s%s", + TP_printk("page=%p pfn=0x%lx lru=%d flags=%s%s%s%s%s%s", __entry->page, __entry->pfn, __entry->lru, @@ -75,7 +75,7 @@ TRACE_EVENT(mm_lru_activate, ), /* Flag format is based on page-types.c formatting for pagemap */ - TP_printk("page=%p pfn=%lu", __entry->page, __entry->pfn) + TP_printk("page=%p pfn=0x%lx", __entry->page, __entry->pfn) ); diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index 2070df64958e..00d1180527d8 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h @@ -330,7 +330,7 @@ TRACE_EVENT(mm_vmscan_writepage, page_is_file_lru(page)); ), - TP_printk("page=%p pfn=%lu flags=%s", + TP_printk("page=%p pfn=0x%lx flags=%s", pfn_to_page(__entry->pfn), __entry->pfn, show_reclaim_flags(__entry->reclaim_flags)) |