diff options
Diffstat (limited to 'include')
36 files changed, 455 insertions, 344 deletions
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 9dbb739cafa0..c6d667187608 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -107,6 +107,12 @@ struct mmu_gather { struct mmu_gather_batch local; struct page *__pages[MMU_GATHER_BUNDLE]; unsigned int batch_count; + /* + * __tlb_adjust_range will track the new addr here, + * that that we can adjust the range after the flush + */ + unsigned long addr; + int page_size; }; #define HAVE_GENERIC_MMU_GATHER @@ -115,23 +121,20 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long void tlb_flush_mmu(struct mmu_gather *tlb); void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end); -int __tlb_remove_page(struct mmu_gather *tlb, struct page *page); - -/* tlb_remove_page - * Similar to __tlb_remove_page but will call tlb_flush_mmu() itself when - * required. - */ -static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) -{ - if (!__tlb_remove_page(tlb, page)) - tlb_flush_mmu(tlb); -} +extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, + int page_size); static inline void __tlb_adjust_range(struct mmu_gather *tlb, unsigned long address) { tlb->start = min(tlb->start, address); tlb->end = max(tlb->end, address + PAGE_SIZE); + /* + * Track the last address with which we adjusted the range. This + * will be used later to adjust again after a mmu_flush due to + * failed __tlb_remove_page + */ + tlb->addr = address; } static inline void __tlb_reset_range(struct mmu_gather *tlb) @@ -144,6 +147,40 @@ static inline void __tlb_reset_range(struct mmu_gather *tlb) } } +static inline void tlb_remove_page_size(struct mmu_gather *tlb, + struct page *page, int page_size) +{ + if (__tlb_remove_page_size(tlb, page, page_size)) { + tlb_flush_mmu(tlb); + tlb->page_size = page_size; + __tlb_adjust_range(tlb, tlb->addr); + __tlb_remove_page_size(tlb, page, page_size); + } +} + +static bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page) +{ + return __tlb_remove_page_size(tlb, page, PAGE_SIZE); +} + +/* tlb_remove_page + * Similar to __tlb_remove_page but will call tlb_flush_mmu() itself when + * required. + */ +static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) +{ + return tlb_remove_page_size(tlb, page, PAGE_SIZE); +} + +static inline bool __tlb_remove_pte_page(struct mmu_gather *tlb, struct page *page) +{ + /* active->nr should be zero when we call this */ + VM_BUG_ON_PAGE(tlb->active->nr, page); + tlb->page_size = PAGE_SIZE; + __tlb_adjust_range(tlb, tlb->addr); + return __tlb_remove_page(tlb, page); +} + /* * In the case of tlb vma handling, we can optimise these away in the * case where we're doing a full MM flush. When we're doing a munmap, diff --git a/include/linux/balloon_compaction.h b/include/linux/balloon_compaction.h index 9b0a15d06a4f..79542b2698ec 100644 --- a/include/linux/balloon_compaction.h +++ b/include/linux/balloon_compaction.h @@ -48,6 +48,7 @@ #include <linux/migrate.h> #include <linux/gfp.h> #include <linux/err.h> +#include <linux/fs.h> /* * Balloon device information descriptor. @@ -62,6 +63,7 @@ struct balloon_dev_info { struct list_head pages; /* Pages enqueued & handled to Host */ int (*migratepage)(struct balloon_dev_info *, struct page *newpage, struct page *page, enum migrate_mode mode); + struct inode *inode; }; extern struct page *balloon_page_enqueue(struct balloon_dev_info *b_dev_info); @@ -73,45 +75,19 @@ static inline void balloon_devinfo_init(struct balloon_dev_info *balloon) spin_lock_init(&balloon->pages_lock); INIT_LIST_HEAD(&balloon->pages); balloon->migratepage = NULL; + balloon->inode = NULL; } #ifdef CONFIG_BALLOON_COMPACTION -extern bool balloon_page_isolate(struct page *page); +extern const struct address_space_operations balloon_aops; +extern bool balloon_page_isolate(struct page *page, + isolate_mode_t mode); extern void balloon_page_putback(struct page *page); -extern int balloon_page_migrate(struct page *newpage, +extern int balloon_page_migrate(struct address_space *mapping, + struct page *newpage, struct page *page, enum migrate_mode mode); /* - * __is_movable_balloon_page - helper to perform @page PageBalloon tests - */ -static inline bool __is_movable_balloon_page(struct page *page) -{ - return PageBalloon(page); -} - -/* - * balloon_page_movable - test PageBalloon to identify balloon pages - * and PagePrivate to check that the page is not - * isolated and can be moved by compaction/migration. - * - * As we might return false positives in the case of a balloon page being just - * released under us, this need to be re-tested later, under the page lock. - */ -static inline bool balloon_page_movable(struct page *page) -{ - return PageBalloon(page) && PagePrivate(page); -} - -/* - * isolated_balloon_page - identify an isolated balloon page on private - * compaction/migration page lists. - */ -static inline bool isolated_balloon_page(struct page *page) -{ - return PageBalloon(page); -} - -/* * balloon_page_insert - insert a page into the balloon's page list and make * the page->private assignment accordingly. * @balloon : pointer to balloon device @@ -124,7 +100,7 @@ static inline void balloon_page_insert(struct balloon_dev_info *balloon, struct page *page) { __SetPageBalloon(page); - SetPagePrivate(page); + __SetPageMovable(page, balloon->inode->i_mapping); set_page_private(page, (unsigned long)balloon); list_add(&page->lru, &balloon->pages); } @@ -140,11 +116,14 @@ static inline void balloon_page_insert(struct balloon_dev_info *balloon, static inline void balloon_page_delete(struct page *page) { __ClearPageBalloon(page); + __ClearPageMovable(page); set_page_private(page, 0); - if (PagePrivate(page)) { - ClearPagePrivate(page); + /* + * No touch page.lru field once @page has been isolated + * because VM is using the field. + */ + if (!PageIsolated(page)) list_del(&page->lru); - } } /* diff --git a/include/linux/compaction.h b/include/linux/compaction.h index a58c852a268f..1a02dab16646 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -212,6 +212,7 @@ static inline void wakeup_kcompactd(pg_data_t *pgdat, int order, int classzone_i #endif /* CONFIG_COMPACTION */ #if defined(CONFIG_COMPACTION) && defined(CONFIG_SYSFS) && defined(CONFIG_NUMA) +struct node; extern int compaction_register_node(struct node *node); extern void compaction_unregister_node(struct node *node); diff --git a/include/linux/dax.h b/include/linux/dax.h index 43d5f0b799c7..9c6dc7704043 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -14,7 +14,6 @@ ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *, int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t); int dax_truncate_page(struct inode *, loff_t from, get_block_t); int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t); -int __dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t); int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); void dax_wake_mapping_entry_waiter(struct address_space *mapping, pgoff_t index, bool wake_all); @@ -46,19 +45,15 @@ static inline int __dax_zero_page_range(struct block_device *bdev, #if defined(CONFIG_TRANSPARENT_HUGEPAGE) int dax_pmd_fault(struct vm_area_struct *, unsigned long addr, pmd_t *, unsigned int flags, get_block_t); -int __dax_pmd_fault(struct vm_area_struct *, unsigned long addr, pmd_t *, - unsigned int flags, get_block_t); #else static inline int dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd, unsigned int flags, get_block_t gb) { return VM_FAULT_FALLBACK; } -#define __dax_pmd_fault dax_pmd_fault #endif int dax_pfn_mkwrite(struct vm_area_struct *, struct vm_fault *); #define dax_mkwrite(vma, vmf, gb) dax_fault(vma, vmf, gb) -#define __dax_mkwrite(vma, vmf, gb) __dax_fault(vma, vmf, gb) static inline bool vma_is_dax(struct vm_area_struct *vma) { diff --git a/include/linux/debugobjects.h b/include/linux/debugobjects.h index 46056cb161fc..d82bf1994485 100644 --- a/include/linux/debugobjects.h +++ b/include/linux/debugobjects.h @@ -38,7 +38,7 @@ struct debug_obj { * @name: name of the object typee * @debug_hint: function returning address, which have associated * kernel symbol, to allow identify the object - * @is_static_object return true if the obj is static, otherwise return false + * @is_static_object: return true if the obj is static, otherwise return false * @fixup_init: fixup function, which is called when the init check * fails. All fixup functions must return true if fixup * was successful, otherwise return false diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h index e65ef959546c..c46d2aa16d81 100644 --- a/include/linux/frontswap.h +++ b/include/linux/frontswap.h @@ -4,6 +4,7 @@ #include <linux/swap.h> #include <linux/mm.h> #include <linux/bitops.h> +#include <linux/jump_label.h> struct frontswap_ops { void (*init)(unsigned); /* this swap type was just swapon'ed */ @@ -14,7 +15,6 @@ struct frontswap_ops { struct frontswap_ops *next; /* private pointer to next ops */ }; -extern bool frontswap_enabled; extern void frontswap_register_ops(struct frontswap_ops *ops); extern void frontswap_shrink(unsigned long); extern unsigned long frontswap_curr_pages(void); @@ -30,7 +30,12 @@ extern void __frontswap_invalidate_page(unsigned, pgoff_t); extern void __frontswap_invalidate_area(unsigned); #ifdef CONFIG_FRONTSWAP -#define frontswap_enabled (1) +extern struct static_key_false frontswap_enabled_key; + +static inline bool frontswap_enabled(void) +{ + return static_branch_unlikely(&frontswap_enabled_key); +} static inline bool frontswap_test(struct swap_info_struct *sis, pgoff_t offset) { @@ -50,7 +55,10 @@ static inline unsigned long *frontswap_map_get(struct swap_info_struct *p) #else /* all inline routines become no-ops and all externs are ignored */ -#define frontswap_enabled (0) +static inline bool frontswap_enabled(void) +{ + return false; +} static inline bool frontswap_test(struct swap_info_struct *sis, pgoff_t offset) { @@ -70,37 +78,35 @@ static inline unsigned long *frontswap_map_get(struct swap_info_struct *p) static inline int frontswap_store(struct page *page) { - int ret = -1; + if (frontswap_enabled()) + return __frontswap_store(page); - if (frontswap_enabled) - ret = __frontswap_store(page); - return ret; + return -1; } static inline int frontswap_load(struct page *page) { - int ret = -1; + if (frontswap_enabled()) + return __frontswap_load(page); - if (frontswap_enabled) - ret = __frontswap_load(page); - return ret; + return -1; } static inline void frontswap_invalidate_page(unsigned type, pgoff_t offset) { - if (frontswap_enabled) + if (frontswap_enabled()) __frontswap_invalidate_page(type, offset); } static inline void frontswap_invalidate_area(unsigned type) { - if (frontswap_enabled) + if (frontswap_enabled()) __frontswap_invalidate_area(type); } static inline void frontswap_init(unsigned type, unsigned long *map) { - if (frontswap_enabled) + if (frontswap_enabled()) __frontswap_init(type, map); } diff --git a/include/linux/fs.h b/include/linux/fs.h index dc488662ce0b..f2a69f20926f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -398,6 +398,8 @@ struct address_space_operations { */ int (*migratepage) (struct address_space *, struct page *, struct page *, enum migrate_mode); + bool (*isolate_page)(struct page *, isolate_mode_t); + void (*putback_page)(struct page *); int (*launder_page) (struct page *); int (*is_partially_uptodate) (struct page *, unsigned long, unsigned long); @@ -661,6 +663,7 @@ struct inode { #endif struct list_head i_lru; /* inode LRU list */ struct list_head i_sb_list; + struct list_head i_wb_list; /* backing dev writeback list */ union { struct hlist_head i_dentry; struct rcu_head i_rcu; @@ -1444,6 +1447,9 @@ struct super_block { /* s_inode_list_lock protects s_inodes */ spinlock_t s_inode_list_lock ____cacheline_aligned_in_smp; struct list_head s_inodes; /* all inodes */ + + spinlock_t s_inode_wblist_lock; + struct list_head s_inodes_wb; /* writeback inodes */ }; extern struct timespec current_fs_time(struct super_block *sb); diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 570383a41853..c29e9d347bc6 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -78,8 +78,7 @@ struct vm_area_struct; * __GFP_THISNODE forces the allocation to be satisified from the requested * node with no fallbacks or placement policy enforcements. * - * __GFP_ACCOUNT causes the allocation to be accounted to kmemcg (only relevant - * to kmem allocations). + * __GFP_ACCOUNT causes the allocation to be accounted to kmemcg. */ #define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) #define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) @@ -486,10 +485,6 @@ extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order, #define alloc_page_vma_node(gfp_mask, vma, addr, node) \ alloc_pages_vma(gfp_mask, 0, vma, addr, node, false) -extern struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order); -extern struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask, - unsigned int order); - extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order); extern unsigned long get_zeroed_page(gfp_t gfp_mask); @@ -513,9 +508,6 @@ extern void *__alloc_page_frag(struct page_frag_cache *nc, unsigned int fragsz, gfp_t gfp_mask); extern void __free_page_frag(void *addr); -extern void __free_kmem_pages(struct page *page, unsigned int order); -extern void free_kmem_pages(unsigned long addr, unsigned int order); - #define __free_page(page) __free_pages((page), 0) #define free_page(addr) free_pages((addr), 0) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index f0a7a0320300..92ce91c03cd0 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -1,20 +1,12 @@ #ifndef _LINUX_HUGE_MM_H #define _LINUX_HUGE_MM_H -extern int do_huge_pmd_anonymous_page(struct mm_struct *mm, - struct vm_area_struct *vma, - unsigned long address, pmd_t *pmd, - unsigned int flags); +extern int do_huge_pmd_anonymous_page(struct fault_env *fe); extern int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr, struct vm_area_struct *vma); -extern void huge_pmd_set_accessed(struct mm_struct *mm, - struct vm_area_struct *vma, - unsigned long address, pmd_t *pmd, - pmd_t orig_pmd, int dirty); -extern int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long address, pmd_t *pmd, - pmd_t orig_pmd); +extern void huge_pmd_set_accessed(struct fault_env *fe, pmd_t orig_pmd); +extern int do_huge_pmd_wp_page(struct fault_env *fe, pmd_t orig_pmd); extern struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd, @@ -49,6 +41,18 @@ enum transparent_hugepage_flag { #endif }; +struct kobject; +struct kobj_attribute; + +extern ssize_t single_hugepage_flag_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count, + enum transparent_hugepage_flag flag); +extern ssize_t single_hugepage_flag_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf, + enum transparent_hugepage_flag flag); +extern struct kobj_attribute shmem_enabled_attr; + #define HPAGE_PMD_ORDER (HPAGE_PMD_SHIFT-PAGE_SHIFT) #define HPAGE_PMD_NR (1<<HPAGE_PMD_ORDER) @@ -134,8 +138,7 @@ static inline int hpage_nr_pages(struct page *page) return 1; } -extern int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long addr, pmd_t pmd, pmd_t *pmdp); +extern int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t orig_pmd); extern struct page *huge_zero_page; @@ -152,6 +155,8 @@ static inline bool is_huge_zero_pmd(pmd_t pmd) struct page *get_huge_zero_page(void); void put_huge_zero_page(void); +#define mk_huge_pmd(page, prot) pmd_mkhuge(mk_pmd(page, prot)) + #else /* CONFIG_TRANSPARENT_HUGEPAGE */ #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; }) #define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; }) @@ -161,6 +166,8 @@ void put_huge_zero_page(void); #define transparent_hugepage_enabled(__vma) 0 +static inline void prep_transhuge_page(struct page *page) {} + #define transparent_hugepage_flags 0UL static inline int split_huge_page_to_list(struct page *page, struct list_head *list) @@ -196,8 +203,7 @@ static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd, return NULL; } -static inline int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long addr, pmd_t pmd, pmd_t *pmdp) +static inline int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t orig_pmd) { return 0; } diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h index eeb307985715..1e032a1ddb3e 100644 --- a/include/linux/khugepaged.h +++ b/include/linux/khugepaged.h @@ -4,6 +4,11 @@ #include <linux/sched.h> /* MMF_VM_HUGEPAGE */ #ifdef CONFIG_TRANSPARENT_HUGEPAGE +extern struct attribute_group khugepaged_attr_group; + +extern int khugepaged_init(void); +extern void khugepaged_destroy(void); +extern int start_stop_khugepaged(void); extern int __khugepaged_enter(struct mm_struct *mm); extern void __khugepaged_exit(struct mm_struct *mm); extern int khugepaged_enter_vma_merge(struct vm_area_struct *vma, diff --git a/include/linux/ksm.h b/include/linux/ksm.h index 7ae216a39c9e..481c8c4627ca 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -43,8 +43,7 @@ static inline struct stable_node *page_stable_node(struct page *page) static inline void set_page_stable_node(struct page *page, struct stable_node *stable_node) { - page->mapping = (void *)stable_node + - (PAGE_MAPPING_ANON | PAGE_MAPPING_KSM); + page->mapping = (void *)((unsigned long)stable_node | PAGE_MAPPING_KSM); } /* diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 3106ac1c895e..6c14b6179727 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -73,8 +73,8 @@ extern bool movable_node_enabled; if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) phys_addr_t memblock_find_in_range_node(phys_addr_t size, phys_addr_t align, - phys_addr_t start, phys_addr_t end, - int nid, ulong flags); + phys_addr_t start, phys_addr_t end, + int nid, ulong flags); phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end, phys_addr_t size, phys_addr_t align); phys_addr_t get_allocated_memblock_reserved_regions_info(phys_addr_t *addr); @@ -110,7 +110,7 @@ void __next_mem_range_rev(u64 *idx, int nid, ulong flags, phys_addr_t *out_end, int *out_nid); void __next_reserved_mem_region(u64 *idx, phys_addr_t *out_start, - phys_addr_t *out_end); + phys_addr_t *out_end); /** * for_each_mem_range - iterate through memblock areas from type_a and not @@ -148,7 +148,7 @@ void __next_reserved_mem_region(u64 *idx, phys_addr_t *out_start, p_start, p_end, p_nid) \ for (i = (u64)ULLONG_MAX, \ __next_mem_range_rev(&i, nid, flags, type_a, type_b,\ - p_start, p_end, p_nid); \ + p_start, p_end, p_nid); \ i != (u64)ULLONG_MAX; \ __next_mem_range_rev(&i, nid, flags, type_a, type_b, \ p_start, p_end, p_nid)) @@ -163,8 +163,7 @@ void __next_reserved_mem_region(u64 *idx, phys_addr_t *out_start, * is initialized. */ #define for_each_reserved_mem_region(i, p_start, p_end) \ - for (i = 0UL, \ - __next_reserved_mem_region(&i, p_start, p_end); \ + for (i = 0UL, __next_reserved_mem_region(&i, p_start, p_end); \ i != (u64)ULLONG_MAX; \ __next_reserved_mem_region(&i, p_start, p_end)) @@ -403,15 +402,14 @@ static inline unsigned long memblock_region_reserved_end_pfn(const struct memblo } #define for_each_memblock(memblock_type, region) \ - for (region = memblock.memblock_type.regions; \ + for (region = memblock.memblock_type.regions; \ region < (memblock.memblock_type.regions + memblock.memblock_type.cnt); \ region++) #define for_each_memblock_type(memblock_type, rgn) \ - idx = 0; \ - rgn = &memblock_type->regions[idx]; \ - for (idx = 0; idx < memblock_type->cnt; \ - idx++,rgn = &memblock_type->regions[idx]) + for (idx = 0, rgn = &memblock_type->regions[0]; \ + idx < memblock_type->cnt; \ + idx++, rgn = &memblock_type->regions[idx]) #ifdef CONFIG_MEMTEST extern void early_memtest(phys_addr_t start, phys_addr_t end); diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 56e6069d2452..71aff733a497 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -749,6 +749,13 @@ static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) } #endif +struct kmem_cache *memcg_kmem_get_cache(struct kmem_cache *cachep); +void memcg_kmem_put_cache(struct kmem_cache *cachep); +int memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, int order, + struct mem_cgroup *memcg); +int memcg_kmem_charge(struct page *page, gfp_t gfp, int order); +void memcg_kmem_uncharge(struct page *page, int order); + #if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB) extern struct static_key_false memcg_kmem_enabled_key; @@ -770,22 +777,6 @@ static inline bool memcg_kmem_enabled(void) } /* - * In general, we'll do everything in our power to not incur in any overhead - * for non-memcg users for the kmem functions. Not even a function call, if we - * can avoid it. - * - * Therefore, we'll inline all those functions so that in the best case, we'll - * see that kmemcg is off for everybody and proceed quickly. If it is on, - * we'll still do most of the flag checking inline. We check a lot of - * conditions, but because they are pretty simple, they are expected to be - * fast. - */ -int __memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, int order, - struct mem_cgroup *memcg); -int __memcg_kmem_charge(struct page *page, gfp_t gfp, int order); -void __memcg_kmem_uncharge(struct page *page, int order); - -/* * helper for accessing a memcg's index. It will be used as an index in the * child cache array in kmem_cache, and also to derive its name. This function * will return -1 when this is not a kmem-limited memcg. @@ -795,67 +786,6 @@ static inline int memcg_cache_id(struct mem_cgroup *memcg) return memcg ? memcg->kmemcg_id : -1; } -struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp); -void __memcg_kmem_put_cache(struct kmem_cache *cachep); - -static inline bool __memcg_kmem_bypass(void) -{ - if (!memcg_kmem_enabled()) - return true; - if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD)) - return true; - return false; -} - -/** - * memcg_kmem_charge: charge a kmem page - * @page: page to charge - * @gfp: reclaim mode - * @order: allocation order - * - * Returns 0 on success, an error code on failure. - */ -static __always_inline int memcg_kmem_charge(struct page *page, - gfp_t gfp, int order) -{ - if (__memcg_kmem_bypass()) - return 0; - if (!(gfp & __GFP_ACCOUNT)) - return 0; - return __memcg_kmem_charge(page, gfp, order); -} - -/** - * memcg_kmem_uncharge: uncharge a kmem page - * @page: page to uncharge - * @order: allocation order - */ -static __always_inline void memcg_kmem_uncharge(struct page *page, int order) -{ - if (memcg_kmem_enabled()) - __memcg_kmem_uncharge(page, order); -} - -/** - * memcg_kmem_get_cache: selects the correct per-memcg cache for allocation - * @cachep: the original global kmem cache - * - * All memory allocated from a per-memcg cache is charged to the owner memcg. - */ -static __always_inline struct kmem_cache * -memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp) -{ - if (__memcg_kmem_bypass()) - return cachep; - return __memcg_kmem_get_cache(cachep, gfp); -} - -static __always_inline void memcg_kmem_put_cache(struct kmem_cache *cachep) -{ - if (memcg_kmem_enabled()) - __memcg_kmem_put_cache(cachep); -} - /** * memcg_kmem_update_page_stat - update kmem page state statistics * @page: the page @@ -878,15 +808,6 @@ static inline bool memcg_kmem_enabled(void) return false; } -static inline int memcg_kmem_charge(struct page *page, gfp_t gfp, int order) -{ - return 0; -} - -static inline void memcg_kmem_uncharge(struct page *page, int order) -{ -} - static inline int memcg_cache_id(struct mem_cgroup *memcg) { return -1; @@ -900,16 +821,6 @@ static inline void memcg_put_cache_ids(void) { } -static inline struct kmem_cache * -memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp) -{ - return cachep; -} - -static inline void memcg_kmem_put_cache(struct kmem_cache *cachep) -{ -} - static inline void memcg_kmem_update_page_stat(struct page *page, enum mem_cgroup_stat_index idx, int val) { diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 5145620ba48a..01033fadea47 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -284,5 +284,7 @@ extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms, unsigned long map_offset); extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum); +extern int zone_can_shift(unsigned long pfn, unsigned long nr_pages, + enum zone_type target); #endif /* __LINUX_MEMORY_HOTPLUG_H */ diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 9b50325e4ddf..ae8d475a9385 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -37,6 +37,8 @@ extern int migrate_page(struct address_space *, struct page *, struct page *, enum migrate_mode); extern int migrate_pages(struct list_head *l, new_page_t new, free_page_t free, unsigned long private, enum migrate_mode mode, int reason); +extern bool isolate_movable_page(struct page *page, isolate_mode_t mode); +extern void putback_movable_page(struct page *page); extern int migrate_prep(void); extern int migrate_prep_local(void); @@ -69,6 +71,21 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping, #endif /* CONFIG_MIGRATION */ +#ifdef CONFIG_COMPACTION +extern int PageMovable(struct page *page); +extern void __SetPageMovable(struct page *page, struct address_space *mapping); +extern void __ClearPageMovable(struct page *page); +#else +static inline int PageMovable(struct page *page) { return 0; }; +static inline void __SetPageMovable(struct page *page, + struct address_space *mapping) +{ +} +static inline void __ClearPageMovable(struct page *page) +{ +} +#endif + #ifdef CONFIG_NUMA_BALANCING extern bool pmd_trans_migrating(pmd_t pmd); extern int migrate_misplaced_page(struct page *page, diff --git a/include/linux/mm.h b/include/linux/mm.h index ece042dfe23c..192c1bbe5fcd 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -309,10 +309,34 @@ struct vm_fault { * VM_FAULT_DAX_LOCKED and fill in * entry here. */ - /* for ->map_pages() only */ - pgoff_t max_pgoff; /* map pages for offset from pgoff till - * max_pgoff inclusive */ - pte_t *pte; /* pte entry associated with ->pgoff */ +}; + +/* + * Page fault context: passes though page fault handler instead of endless list + * of function arguments. + */ +struct fault_env { + struct vm_area_struct *vma; /* Target VMA */ + unsigned long address; /* Faulting virtual address */ + unsigned int flags; /* FAULT_FLAG_xxx flags */ + pmd_t *pmd; /* Pointer to pmd entry matching + * the 'address' + */ + pte_t *pte; /* Pointer to pte entry matching + * the 'address'. NULL if the page + * table hasn't been allocated. + */ + spinlock_t *ptl; /* Page table lock. + * Protects pte page table if 'pte' + * is not NULL, otherwise pmd. + */ + pgtable_t prealloc_pte; /* Pre-allocated pte page table. + * vm_ops->map_pages() calls + * alloc_set_pte() from atomic context. + * do_fault_around() pre-allocates + * page table to avoid allocation from + * atomic context. + */ }; /* @@ -327,7 +351,8 @@ struct vm_operations_struct { int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf); int (*pmd_fault)(struct vm_area_struct *, unsigned long address, pmd_t *, unsigned int flags); - void (*map_pages)(struct vm_area_struct *vma, struct vm_fault *vmf); + void (*map_pages)(struct fault_env *fe, + pgoff_t start_pgoff, pgoff_t end_pgoff); /* notification that a previously read-only page is about to become * writable, if an error is returned it will cause a SIGBUS */ @@ -537,7 +562,6 @@ void __put_page(struct page *page); void put_pages_list(struct list_head *pages); void split_page(struct page *page, unsigned int order); -int split_free_page(struct page *page); /* * Compound pages have a destructor function. Provide a @@ -601,8 +625,8 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) return pte; } -void do_set_pte(struct vm_area_struct *vma, unsigned long address, - struct page *page, pte_t *pte, bool write, bool anon); +int alloc_set_pte(struct fault_env *fe, struct mem_cgroup *memcg, + struct page *page); #endif /* @@ -1035,6 +1059,7 @@ static inline pgoff_t page_file_index(struct page *page) } bool page_mapped(struct page *page); +struct address_space *page_mapping(struct page *page); /* * Return true only if the page has been allocated with @@ -1215,15 +1240,14 @@ int generic_error_remove_page(struct address_space *mapping, struct page *page); int invalidate_inode_page(struct page *page); #ifdef CONFIG_MMU -extern int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long address, unsigned int flags); +extern int handle_mm_fault(struct vm_area_struct *vma, unsigned long address, + unsigned int flags); extern int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm, unsigned long address, unsigned int fault_flags, bool *unlocked); #else -static inline int handle_mm_fault(struct mm_struct *mm, - struct vm_area_struct *vma, unsigned long address, - unsigned int flags) +static inline int handle_mm_fault(struct vm_area_struct *vma, + unsigned long address, unsigned int flags) { /* should never happen if there's no MMU */ BUG(); @@ -2063,7 +2087,8 @@ extern void truncate_inode_pages_final(struct address_space *); /* generic vm_area_ops exported for stackable file systems */ extern int filemap_fault(struct vm_area_struct *, struct vm_fault *); -extern void filemap_map_pages(struct vm_area_struct *vma, struct vm_fault *vmf); +extern void filemap_map_pages(struct fault_env *fe, + pgoff_t start_pgoff, pgoff_t end_pgoff); extern int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); /* mm/page-writeback.c */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 917f2b6a0cde..79472b22d23f 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -60,51 +60,52 @@ struct page { }; /* Second double word */ - struct { - union { - pgoff_t index; /* Our offset within mapping. */ - void *freelist; /* sl[aou]b first free object */ - /* page_deferred_list().prev -- second tail page */ - }; + union { + pgoff_t index; /* Our offset within mapping. */ + void *freelist; /* sl[aou]b first free object */ + /* page_deferred_list().prev -- second tail page */ + }; - union { + union { #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \ defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE) - /* Used for cmpxchg_double in slub */ - unsigned long counters; + /* Used for cmpxchg_double in slub */ + unsigned long counters; #else - /* - * Keep _refcount separate from slub cmpxchg_double - * data. As the rest of the double word is protected by - * slab_lock but _refcount is not. - */ - unsigned counters; + /* + * Keep _refcount separate from slub cmpxchg_double data. + * As the rest of the double word is protected by slab_lock + * but _refcount is not. + */ + unsigned counters; #endif + struct { - struct { - - union { - /* - * Count of ptes mapped in mms, to show - * when page is mapped & limit reverse - * map searches. - */ - atomic_t _mapcount; - - struct { /* SLUB */ - unsigned inuse:16; - unsigned objects:15; - unsigned frozen:1; - }; - int units; /* SLOB */ - }; + union { /* - * Usage count, *USE WRAPPER FUNCTION* - * when manual accounting. See page_ref.h + * Count of ptes mapped in mms, to show when + * page is mapped & limit reverse map searches. + * + * Extra information about page type may be + * stored here for pages that are never mapped, + * in which case the value MUST BE <= -2. + * See page-flags.h for more details. */ - atomic_t _refcount; + atomic_t _mapcount; + + unsigned int active; /* SLAB */ + struct { /* SLUB */ + unsigned inuse:16; + unsigned objects:15; + unsigned frozen:1; + }; + int units; /* SLOB */ }; - unsigned int active; /* SLAB */ + /* + * Usage count, *USE WRAPPER FUNCTION* when manual + * accounting. See page_ref.h + */ + atomic_t _refcount; }; }; diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h index de7be78c6f0e..451a811f48f2 100644 --- a/include/linux/mmdebug.h +++ b/include/linux/mmdebug.h @@ -39,6 +39,7 @@ void dump_mm(const struct mm_struct *mm); #define VM_WARN_ON(cond) WARN_ON(cond) #define VM_WARN_ON_ONCE(cond) WARN_ON_ONCE(cond) #define VM_WARN_ONCE(cond, format...) WARN_ONCE(cond, format) +#define VM_WARN(cond, format...) WARN(cond, format) #else #define VM_BUG_ON(cond) BUILD_BUG_ON_INVALID(cond) #define VM_BUG_ON_PAGE(cond, page) VM_BUG_ON(cond) @@ -47,6 +48,7 @@ void dump_mm(const struct mm_struct *mm); #define VM_WARN_ON(cond) BUILD_BUG_ON_INVALID(cond) #define VM_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond) #define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond) +#define VM_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond) #endif #ifdef CONFIG_DEBUG_VIRTUAL diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 02069c23486d..19425e988bdc 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -140,6 +140,9 @@ enum zone_stat_item { NR_DIRTIED, /* page dirtyings since bootup */ NR_WRITTEN, /* page writings since bootup */ NR_PAGES_SCANNED, /* pages scanned since last reclaim */ +#if IS_ENABLED(CONFIG_ZSMALLOC) + NR_ZSPAGES, /* allocated in zsmalloc */ +#endif #ifdef CONFIG_NUMA NUMA_HIT, /* allocated in intended node */ NUMA_MISS, /* allocated in non intended node */ @@ -151,7 +154,9 @@ enum zone_stat_item { WORKINGSET_REFAULT, WORKINGSET_ACTIVATE, WORKINGSET_NODERECLAIM, - NR_ANON_TRANSPARENT_HUGEPAGES, + NR_ANON_THPS, + NR_SHMEM_THPS, + NR_SHMEM_PMDMAPPED, NR_FREE_CMA_PAGES, NR_VM_ZONE_STAT_ITEMS }; @@ -524,7 +529,6 @@ struct zone { enum zone_flags { ZONE_RECLAIM_LOCKED, /* prevents concurrent reclaim */ - ZONE_OOM_LOCKED, /* zone is in OOM killer zonelist */ ZONE_CONGESTED, /* zone has many dirty pages backed by * a congested BDI */ diff --git a/include/linux/oom.h b/include/linux/oom.h index 83469522690a..606137b3b778 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -23,6 +23,9 @@ struct oom_control { /* Used to determine mempolicy */ nodemask_t *nodemask; + /* Memory cgroup in which oom is invoked, or NULL for global oom */ + struct mem_cgroup *memcg; + /* Used to determine cpuset and node locality requirement */ const gfp_t gfp_mask; @@ -83,14 +86,13 @@ extern unsigned long oom_badness(struct task_struct *p, extern void oom_kill_process(struct oom_control *oc, struct task_struct *p, unsigned int points, unsigned long totalpages, - struct mem_cgroup *memcg, const char *message); + const char *message); extern void check_panic_on_oom(struct oom_control *oc, - enum oom_constraint constraint, - struct mem_cgroup *memcg); + enum oom_constraint constraint); extern enum oom_scan_t oom_scan_process_thread(struct oom_control *oc, - struct task_struct *task, unsigned long totalpages); + struct task_struct *task); extern bool out_of_memory(struct oom_control *oc); diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index e5a32445f930..74e4dda91238 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -129,6 +129,9 @@ enum pageflags { /* Compound pages. Stored in first tail page's flags */ PG_double_map = PG_private_2, + + /* non-lru isolated movable page */ + PG_isolated = PG_reclaim, }; #ifndef __GENERATING_BOUNDS_H @@ -292,11 +295,11 @@ PAGEFLAG(OwnerPriv1, owner_priv_1, PF_ANY) */ TESTPAGEFLAG(Writeback, writeback, PF_NO_COMPOUND) TESTSCFLAG(Writeback, writeback, PF_NO_COMPOUND) -PAGEFLAG(MappedToDisk, mappedtodisk, PF_NO_COMPOUND) +PAGEFLAG(MappedToDisk, mappedtodisk, PF_NO_TAIL) /* PG_readahead is only used for reads; PG_reclaim is only for writes */ -PAGEFLAG(Reclaim, reclaim, PF_NO_COMPOUND) - TESTCLEARFLAG(Reclaim, reclaim, PF_NO_COMPOUND) +PAGEFLAG(Reclaim, reclaim, PF_NO_TAIL) + TESTCLEARFLAG(Reclaim, reclaim, PF_NO_TAIL) PAGEFLAG(Readahead, reclaim, PF_NO_COMPOUND) TESTCLEARFLAG(Readahead, reclaim, PF_NO_COMPOUND) @@ -357,29 +360,37 @@ PAGEFLAG(Idle, idle, PF_ANY) * with the PAGE_MAPPING_ANON bit set to distinguish it. See rmap.h. * * On an anonymous page in a VM_MERGEABLE area, if CONFIG_KSM is enabled, - * the PAGE_MAPPING_KSM bit may be set along with the PAGE_MAPPING_ANON bit; - * and then page->mapping points, not to an anon_vma, but to a private + * the PAGE_MAPPING_MOVABLE bit may be set along with the PAGE_MAPPING_ANON + * bit; and then page->mapping points, not to an anon_vma, but to a private * structure which KSM associates with that merged page. See ksm.h. * - * PAGE_MAPPING_KSM without PAGE_MAPPING_ANON is currently never used. + * PAGE_MAPPING_KSM without PAGE_MAPPING_ANON is used for non-lru movable + * page and then page->mapping points a struct address_space. * * Please note that, confusingly, "page_mapping" refers to the inode * address_space which maps the page from disk; whereas "page_mapped" * refers to user virtual address space into which the page is mapped. */ -#define PAGE_MAPPING_ANON 1 -#define PAGE_MAPPING_KSM 2 -#define PAGE_MAPPING_FLAGS (PAGE_MAPPING_ANON | PAGE_MAPPING_KSM) +#define PAGE_MAPPING_ANON 0x1 +#define PAGE_MAPPING_MOVABLE 0x2 +#define PAGE_MAPPING_KSM (PAGE_MAPPING_ANON | PAGE_MAPPING_MOVABLE) +#define PAGE_MAPPING_FLAGS (PAGE_MAPPING_ANON | PAGE_MAPPING_MOVABLE) -static __always_inline int PageAnonHead(struct page *page) +static __always_inline int PageMappingFlags(struct page *page) { - return ((unsigned long)page->mapping & PAGE_MAPPING_ANON) != 0; + return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) != 0; } static __always_inline int PageAnon(struct page *page) { page = compound_head(page); - return PageAnonHead(page); + return ((unsigned long)page->mapping & PAGE_MAPPING_ANON) != 0; +} + +static __always_inline int __PageMovable(struct page *page) +{ + return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) == + PAGE_MAPPING_MOVABLE; } #ifdef CONFIG_KSM @@ -393,7 +404,7 @@ static __always_inline int PageKsm(struct page *page) { page = compound_head(page); return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) == - (PAGE_MAPPING_ANON | PAGE_MAPPING_KSM); + PAGE_MAPPING_KSM; } #else TESTPAGEFLAG_FALSE(Ksm) @@ -570,6 +581,17 @@ static inline int PageDoubleMap(struct page *page) return PageHead(page) && test_bit(PG_double_map, &page[1].flags); } +static inline void SetPageDoubleMap(struct page *page) +{ + VM_BUG_ON_PAGE(!PageHead(page), page); + set_bit(PG_double_map, &page[1].flags); +} + +static inline void ClearPageDoubleMap(struct page *page) +{ + VM_BUG_ON_PAGE(!PageHead(page), page); + clear_bit(PG_double_map, &page[1].flags); +} static inline int TestSetPageDoubleMap(struct page *page) { VM_BUG_ON_PAGE(!PageHead(page), page); @@ -587,59 +609,59 @@ TESTPAGEFLAG_FALSE(TransHuge) TESTPAGEFLAG_FALSE(TransCompound) TESTPAGEFLAG_FALSE(TransCompoundMap) TESTPAGEFLAG_FALSE(TransTail) -TESTPAGEFLAG_FALSE(DoubleMap) +PAGEFLAG_FALSE(DoubleMap) TESTSETFLAG_FALSE(DoubleMap) TESTCLEARFLAG_FALSE(DoubleMap) #endif /* + * For pages that are never mapped to userspace, page->mapcount may be + * used for storing extra information about page type. Any value used + * for this purpose must be <= -2, but it's better start not too close + * to -2 so that an underflow of the page_mapcount() won't be mistaken + * for a special page. + */ +#define PAGE_MAPCOUNT_OPS(uname, lname) \ +static __always_inline int Page##uname(struct page *page) \ +{ \ + return atomic_read(&page->_mapcount) == \ + PAGE_##lname##_MAPCOUNT_VALUE; \ +} \ +static __always_inline void __SetPage##uname(struct page *page) \ +{ \ + VM_BUG_ON_PAGE(atomic_read(&page->_mapcount) != -1, page); \ + atomic_set(&page->_mapcount, PAGE_##lname##_MAPCOUNT_VALUE); \ +} \ +static __always_inline void __ClearPage##uname(struct page *page) \ +{ \ + VM_BUG_ON_PAGE(!Page##uname(page), page); \ + atomic_set(&page->_mapcount, -1); \ +} + +/* * PageBuddy() indicate that the page is free and in the buddy system * (see mm/page_alloc.c). - * - * PAGE_BUDDY_MAPCOUNT_VALUE must be <= -2 but better not too close to - * -2 so that an underflow of the page_mapcount() won't be mistaken - * for a genuine PAGE_BUDDY_MAPCOUNT_VALUE. -128 can be created very - * efficiently by most CPU architectures. */ -#define PAGE_BUDDY_MAPCOUNT_VALUE (-128) - -static inline int PageBuddy(struct page *page) -{ - return atomic_read(&page->_mapcount) == PAGE_BUDDY_MAPCOUNT_VALUE; -} +#define PAGE_BUDDY_MAPCOUNT_VALUE (-128) +PAGE_MAPCOUNT_OPS(Buddy, BUDDY) -static inline void __SetPageBuddy(struct page *page) -{ - VM_BUG_ON_PAGE(atomic_read(&page->_mapcount) != -1, page); - atomic_set(&page->_mapcount, PAGE_BUDDY_MAPCOUNT_VALUE); -} +/* + * PageBalloon() is set on pages that are on the balloon page list + * (see mm/balloon_compaction.c). + */ +#define PAGE_BALLOON_MAPCOUNT_VALUE (-256) +PAGE_MAPCOUNT_OPS(Balloon, BALLOON) -static inline void __ClearPageBuddy(struct page *page) -{ - VM_BUG_ON_PAGE(!PageBuddy(page), page); - atomic_set(&page->_mapcount, -1); -} +/* + * If kmemcg is enabled, the buddy allocator will set PageKmemcg() on + * pages allocated with __GFP_ACCOUNT. It gets cleared on page free. + */ +#define PAGE_KMEMCG_MAPCOUNT_VALUE (-512) +PAGE_MAPCOUNT_OPS(Kmemcg, KMEMCG) extern bool is_free_buddy_page(struct page *page); -#define PAGE_BALLOON_MAPCOUNT_VALUE (-256) - -static inline int PageBalloon(struct page *page) -{ - return atomic_read(&page->_mapcount) == PAGE_BALLOON_MAPCOUNT_VALUE; -} - -static inline void __SetPageBalloon(struct page *page) -{ - VM_BUG_ON_PAGE(atomic_read(&page->_mapcount) != -1, page); - atomic_set(&page->_mapcount, PAGE_BALLOON_MAPCOUNT_VALUE); -} - -static inline void __ClearPageBalloon(struct page *page) -{ - VM_BUG_ON_PAGE(!PageBalloon(page), page); - atomic_set(&page->_mapcount, -1); -} +__PAGEFLAG(Isolated, isolated, PF_ANY); /* * If network-based swap is enabled, sl*b must keep track of whether pages diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h index e1fe7cf5bddf..03f2a3e7d76d 100644 --- a/include/linux/page_ext.h +++ b/include/linux/page_ext.h @@ -3,6 +3,7 @@ #include <linux/types.h> #include <linux/stacktrace.h> +#include <linux/stackdepot.h> struct pglist_data; struct page_ext_operations { @@ -44,9 +45,8 @@ struct page_ext { #ifdef CONFIG_PAGE_OWNER unsigned int order; gfp_t gfp_mask; - unsigned int nr_entries; int last_migrate_reason; - unsigned long trace_entries[8]; + depot_stack_handle_t handle; #endif }; diff --git a/include/linux/page_owner.h b/include/linux/page_owner.h index 46f1b939948c..30583ab0ffb1 100644 --- a/include/linux/page_owner.h +++ b/include/linux/page_owner.h @@ -10,7 +10,7 @@ extern struct page_ext_operations page_owner_ops; extern void __reset_page_owner(struct page *page, unsigned int order); extern void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask); -extern gfp_t __get_page_owner_gfp(struct page *page); +extern void __split_page_owner(struct page *page, unsigned int order); extern void __copy_page_owner(struct page *oldpage, struct page *newpage); extern void __set_page_owner_migrate_reason(struct page *page, int reason); extern void __dump_page_owner(struct page *page); @@ -28,12 +28,10 @@ static inline void set_page_owner(struct page *page, __set_page_owner(page, order, gfp_mask); } -static inline gfp_t get_page_owner_gfp(struct page *page) +static inline void split_page_owner(struct page *page, unsigned int order) { if (static_branch_unlikely(&page_owner_inited)) - return __get_page_owner_gfp(page); - else - return 0; + __split_page_owner(page, order); } static inline void copy_page_owner(struct page *oldpage, struct page *newpage) { @@ -58,9 +56,9 @@ static inline void set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask) { } -static inline gfp_t get_page_owner_gfp(struct page *page) +static inline void split_page_owner(struct page *page, + unsigned int order) { - return 0; } static inline void copy_page_owner(struct page *oldpage, struct page *newpage) { diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 97354102794d..81363b834900 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -209,10 +209,10 @@ static inline struct page *page_cache_alloc_cold(struct address_space *x) return __page_cache_alloc(mapping_gfp_mask(x)|__GFP_COLD); } -static inline struct page *page_cache_alloc_readahead(struct address_space *x) +static inline gfp_t readahead_gfp_mask(struct address_space *x) { - return __page_cache_alloc(mapping_gfp_mask(x) | - __GFP_COLD | __GFP_NORETRY | __GFP_NOWARN); + return mapping_gfp_mask(x) | + __GFP_COLD | __GFP_NORETRY | __GFP_NOWARN; } typedef int filler_t(void *, struct page *); diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index eca6f626c16e..cbfee507c839 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -291,6 +291,7 @@ unsigned int radix_tree_gang_lookup_slot(struct radix_tree_root *root, unsigned long first_index, unsigned int max_items); int radix_tree_preload(gfp_t gfp_mask); int radix_tree_maybe_preload(gfp_t gfp_mask); +int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order); void radix_tree_init(void); void *radix_tree_tag_set(struct radix_tree_root *root, unsigned long index, unsigned int tag); diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 2b0fad83683f..b46bb5620a76 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -165,7 +165,7 @@ void do_page_add_anon_rmap(struct page *, struct vm_area_struct *, unsigned long, int); void page_add_new_anon_rmap(struct page *, struct vm_area_struct *, unsigned long, bool); -void page_add_file_rmap(struct page *); +void page_add_file_rmap(struct page *, bool); void page_remove_rmap(struct page *, bool); void hugepage_add_anon_rmap(struct page *, struct vm_area_struct *, diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 4d4780c00d34..ff078e7043b6 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -16,8 +16,9 @@ struct shmem_inode_info { unsigned long flags; unsigned long alloced; /* data pages alloced to file */ unsigned long swapped; /* subtotal assigned to swap */ - struct shared_policy policy; /* NUMA memory alloc policy */ + struct list_head shrinklist; /* shrinkable hpage inodes */ struct list_head swaplist; /* chain of maybes on swap */ + struct shared_policy policy; /* NUMA memory alloc policy */ struct simple_xattrs xattrs; /* list of xattrs */ struct inode vfs_inode; }; @@ -28,10 +29,14 @@ struct shmem_sb_info { unsigned long max_inodes; /* How many inodes are allowed */ unsigned long free_inodes; /* How many are left for allocation */ spinlock_t stat_lock; /* Serialize shmem_sb_info changes */ + umode_t mode; /* Mount mode for root directory */ + unsigned char huge; /* Whether to try for hugepages */ kuid_t uid; /* Mount uid for root directory */ kgid_t gid; /* Mount gid for root directory */ - umode_t mode; /* Mount mode for root directory */ struct mempolicy *mpol; /* default memory policy for mappings */ + spinlock_t shrinklist_lock; /* Protects shrinklist */ + struct list_head shrinklist; /* List of shinkable inodes */ + unsigned long shrinklist_len; /* Length of shrinklist */ }; static inline struct shmem_inode_info *SHMEM_I(struct inode *inode) @@ -49,6 +54,8 @@ extern struct file *shmem_file_setup(const char *name, extern struct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned long flags); extern int shmem_zero_setup(struct vm_area_struct *); +extern unsigned long shmem_get_unmapped_area(struct file *, unsigned long addr, + unsigned long len, unsigned long pgoff, unsigned long flags); extern int shmem_lock(struct file *file, int lock, struct user_struct *user); extern bool shmem_mapping(struct address_space *mapping); extern void shmem_unlock_mapping(struct address_space *mapping); @@ -61,6 +68,19 @@ extern unsigned long shmem_swap_usage(struct vm_area_struct *vma); extern unsigned long shmem_partial_swap_usage(struct address_space *mapping, pgoff_t start, pgoff_t end); +/* Flag allocation requirements to shmem_getpage */ +enum sgp_type { + SGP_READ, /* don't exceed i_size, don't allocate page */ + SGP_CACHE, /* don't exceed i_size, may allocate page */ + SGP_NOHUGE, /* like SGP_CACHE, but no huge pages */ + SGP_HUGE, /* like SGP_CACHE, huge pages preferred */ + SGP_WRITE, /* may exceed i_size, may allocate !Uptodate page */ + SGP_FALLOC, /* like SGP_WRITE, but make existing page Uptodate */ +}; + +extern int shmem_getpage(struct inode *inode, pgoff_t index, + struct page **pagep, enum sgp_type sgp); + static inline struct page *shmem_read_mapping_page( struct address_space *mapping, pgoff_t index) { @@ -68,6 +88,18 @@ static inline struct page *shmem_read_mapping_page( mapping_gfp_mask(mapping)); } +static inline bool shmem_file(struct file *file) +{ + if (!IS_ENABLED(CONFIG_SHMEM)) + return false; + if (!file || !file->f_mapping) + return false; + return shmem_mapping(file->f_mapping); +} + +extern bool shmem_charge(struct inode *inode, long pages); +extern void shmem_uncharge(struct inode *inode, long pages); + #ifdef CONFIG_TMPFS extern int shmem_add_seals(struct file *file, unsigned int seals); @@ -83,4 +115,13 @@ static inline long shmem_fcntl(struct file *f, unsigned int c, unsigned long a) #endif +#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE +extern bool shmem_huge_enabled(struct vm_area_struct *vma); +#else +static inline bool shmem_huge_enabled(struct vm_area_struct *vma) +{ + return false; +} +#endif + #endif diff --git a/include/linux/slab.h b/include/linux/slab.h index aeb3e6d00a66..1a4ea551aae5 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -565,6 +565,8 @@ static inline void *kmalloc_array(size_t n, size_t size, gfp_t flags) { if (size != 0 && n > SIZE_MAX / size) return NULL; + if (__builtin_constant_p(n) && __builtin_constant_p(size)) + return kmalloc(n * size, flags); return __kmalloc(n * size, flags); } diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index 8694f7a5d92b..339ba027ade9 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -81,7 +81,7 @@ struct kmem_cache { #endif #ifdef CONFIG_SLAB_FREELIST_RANDOM - void *random_seq; + unsigned int *random_seq; #endif struct kmem_cache_node *node[MAX_NUMNODES]; diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index d1faa019c02a..5624c1f3eb0a 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -99,6 +99,11 @@ struct kmem_cache { */ int remote_node_defrag_ratio; #endif + +#ifdef CONFIG_SLAB_FREELIST_RANDOM + unsigned int *random_seq; +#endif + struct kmem_cache_node *node[MAX_NUMNODES]; }; diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index 587480ad41b7..dd66a952e8cd 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -27,8 +27,7 @@ #define UFFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK) #define UFFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS) -extern int handle_userfault(struct vm_area_struct *vma, unsigned long address, - unsigned int flags, unsigned long reason); +extern int handle_userfault(struct fault_env *fe, unsigned long reason); extern ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start, unsigned long src_start, unsigned long len); @@ -56,10 +55,7 @@ static inline bool userfaultfd_armed(struct vm_area_struct *vma) #else /* CONFIG_USERFAULTFD */ /* mm helpers */ -static inline int handle_userfault(struct vm_area_struct *vma, - unsigned long address, - unsigned int flags, - unsigned long reason) +static inline int handle_userfault(struct fault_env *fe, unsigned long reason) { return VM_FAULT_SIGBUS; } diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index ec084321fe09..42604173f122 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -70,6 +70,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, THP_FAULT_FALLBACK, THP_COLLAPSE_ALLOC, THP_COLLAPSE_ALLOC_FAILED, + THP_FILE_ALLOC, + THP_FILE_MAPPED, THP_SPLIT_PAGE, THP_SPLIT_PAGE_FAILED, THP_DEFERRED_SPLIT_PAGE, @@ -100,4 +102,9 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, NR_VM_EVENT_ITEMS }; +#ifndef CONFIG_TRANSPARENT_HUGEPAGE +#define THP_FILE_ALLOC ({ BUILD_BUG(); 0; }) +#define THP_FILE_MAPPED ({ BUILD_BUG(); 0; }) +#endif + #endif /* VM_EVENT_ITEM_H_INCLUDED */ diff --git a/include/linux/writeback.h b/include/linux/writeback.h index d0b5ca5d4e08..717e6149e753 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -384,4 +384,7 @@ void tag_pages_for_writeback(struct address_space *mapping, void account_page_redirty(struct page *page); +void sb_mark_inode_writeback(struct inode *inode); +void sb_clear_inode_writeback(struct inode *inode); + #endif /* WRITEBACK_H */ diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h index 551ba4acde4d..04f58acda8e8 100644 --- a/include/trace/events/huge_memory.h +++ b/include/trace/events/huge_memory.h @@ -13,7 +13,7 @@ EM( SCAN_EXCEED_NONE_PTE, "exceed_none_pte") \ EM( SCAN_PTE_NON_PRESENT, "pte_non_present") \ EM( SCAN_PAGE_RO, "no_writable_page") \ - EM( SCAN_NO_REFERENCED_PAGE, "no_referenced_page") \ + EM( SCAN_LACK_REFERENCED_PAGE, "lack_referenced_page") \ EM( SCAN_PAGE_NULL, "page_null") \ EM( SCAN_SCAN_ABORT, "scan_aborted") \ EM( SCAN_PAGE_COUNT, "not_suitable_page_count") \ @@ -28,7 +28,9 @@ EM( SCAN_SWAP_CACHE_PAGE, "page_swap_cache") \ EM( SCAN_DEL_PAGE_LRU, "could_not_delete_page_from_lru")\ EM( SCAN_ALLOC_HUGE_PAGE_FAIL, "alloc_huge_page_failed") \ - EMe( SCAN_CGROUP_CHARGE_FAIL, "ccgroup_charge_failed") + EM( SCAN_CGROUP_CHARGE_FAIL, "ccgroup_charge_failed") \ + EM( SCAN_EXCEED_SWAP_PTE, "exceed_swap_pte") \ + EMe(SCAN_TRUNCATED, "truncated") \ #undef EM #undef EMe @@ -45,17 +47,18 @@ SCAN_STATUS TRACE_EVENT(mm_khugepaged_scan_pmd, TP_PROTO(struct mm_struct *mm, struct page *page, bool writable, - bool referenced, int none_or_zero, int status), + int referenced, int none_or_zero, int status, int unmapped), - TP_ARGS(mm, page, writable, referenced, none_or_zero, status), + TP_ARGS(mm, page, writable, referenced, none_or_zero, status, unmapped), TP_STRUCT__entry( __field(struct mm_struct *, mm) __field(unsigned long, pfn) __field(bool, writable) - __field(bool, referenced) + __field(int, referenced) __field(int, none_or_zero) __field(int, status) + __field(int, unmapped) ), TP_fast_assign( @@ -65,15 +68,17 @@ TRACE_EVENT(mm_khugepaged_scan_pmd, __entry->referenced = referenced; __entry->none_or_zero = none_or_zero; __entry->status = status; + __entry->unmapped = unmapped; ), - TP_printk("mm=%p, scan_pfn=0x%lx, writable=%d, referenced=%d, none_or_zero=%d, status=%s", + TP_printk("mm=%p, scan_pfn=0x%lx, writable=%d, referenced=%d, none_or_zero=%d, status=%s, unmapped=%d", __entry->mm, __entry->pfn, __entry->writable, __entry->referenced, __entry->none_or_zero, - __print_symbolic(__entry->status, SCAN_STATUS)) + __print_symbolic(__entry->status, SCAN_STATUS), + __entry->unmapped) ); TRACE_EVENT(mm_collapse_huge_page, @@ -103,14 +108,14 @@ TRACE_EVENT(mm_collapse_huge_page, TRACE_EVENT(mm_collapse_huge_page_isolate, TP_PROTO(struct page *page, int none_or_zero, - bool referenced, bool writable, int status), + int referenced, bool writable, int status), TP_ARGS(page, none_or_zero, referenced, writable, status), TP_STRUCT__entry( __field(unsigned long, pfn) __field(int, none_or_zero) - __field(bool, referenced) + __field(int, referenced) __field(bool, writable) __field(int, status) ), @@ -131,5 +136,32 @@ TRACE_EVENT(mm_collapse_huge_page_isolate, __print_symbolic(__entry->status, SCAN_STATUS)) ); +TRACE_EVENT(mm_collapse_huge_page_swapin, + + TP_PROTO(struct mm_struct *mm, int swapped_in, int referenced, int ret), + + TP_ARGS(mm, swapped_in, referenced, ret), + + TP_STRUCT__entry( + __field(struct mm_struct *, mm) + __field(int, swapped_in) + __field(int, referenced) + __field(int, ret) + ), + + TP_fast_assign( + __entry->mm = mm; + __entry->swapped_in = swapped_in; + __entry->referenced = referenced; + __entry->ret = ret; + ), + + TP_printk("mm=%p, swapped_in=%d, referenced=%d, ret=%d", + __entry->mm, + __entry->swapped_in, + __entry->referenced, + __entry->ret) +); + #endif /* __HUGE_MEMORY_H */ #include <trace/define_trace.h> diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 73614ce1d204..531f5811ff6b 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -696,7 +696,7 @@ DEFINE_EVENT(writeback_single_inode_template, writeback_single_inode, TP_ARGS(inode, wbc, nr_to_write) ); -DECLARE_EVENT_CLASS(writeback_lazytime_template, +DECLARE_EVENT_CLASS(writeback_inode_template, TP_PROTO(struct inode *inode), TP_ARGS(inode), @@ -723,25 +723,39 @@ DECLARE_EVENT_CLASS(writeback_lazytime_template, show_inode_state(__entry->state), __entry->mode) ); -DEFINE_EVENT(writeback_lazytime_template, writeback_lazytime, +DEFINE_EVENT(writeback_inode_template, writeback_lazytime, TP_PROTO(struct inode *inode), TP_ARGS(inode) ); -DEFINE_EVENT(writeback_lazytime_template, writeback_lazytime_iput, +DEFINE_EVENT(writeback_inode_template, writeback_lazytime_iput, TP_PROTO(struct inode *inode), TP_ARGS(inode) ); -DEFINE_EVENT(writeback_lazytime_template, writeback_dirty_inode_enqueue, +DEFINE_EVENT(writeback_inode_template, writeback_dirty_inode_enqueue, TP_PROTO(struct inode *inode), TP_ARGS(inode) ); +/* + * Inode writeback list tracking. + */ + +DEFINE_EVENT(writeback_inode_template, sb_mark_inode_writeback, + TP_PROTO(struct inode *inode), + TP_ARGS(inode) +); + +DEFINE_EVENT(writeback_inode_template, sb_clear_inode_writeback, + TP_PROTO(struct inode *inode), + TP_ARGS(inode) +); + #endif /* _TRACE_WRITEBACK_H */ /* This part must be outside protection */ diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index 546b38886e11..e398beac67b8 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -80,5 +80,7 @@ #define BPF_FS_MAGIC 0xcafe4a11 /* Since UDF 2.01 is ISO 13346 based... */ #define UDF_SUPER_MAGIC 0x15013346 +#define BALLOON_KVM_MAGIC 0x13661366 +#define ZSMALLOC_MAGIC 0x58295829 #endif /* __LINUX_MAGIC_H__ */ |