diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 3 | ||||
-rw-r--r-- | mm/Makefile | 1 | ||||
-rw-r--r-- | mm/early_ioremap.c | 1 | ||||
-rw-r--r-- | mm/frame_vector.c | 230 | ||||
-rw-r--r-- | mm/kasan/kasan.c | 3 | ||||
-rw-r--r-- | mm/migrate.c | 2 | ||||
-rw-r--r-- | mm/mmap.c | 11 | ||||
-rw-r--r-- | mm/vmscan.c | 2 |
8 files changed, 240 insertions, 13 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index 6413d027c0b2..0d9fdcd01e47 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -677,3 +677,6 @@ config ZONE_DEVICE mapping in an O_DIRECT operation, among other things. If FS_DAX is enabled, then say Y. + +config FRAME_VECTOR + bool diff --git a/mm/Makefile b/mm/Makefile index 56f8eed73f1a..2ed43191fc3b 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -80,3 +80,4 @@ obj-$(CONFIG_PAGE_EXTENSION) += page_ext.o obj-$(CONFIG_CMA_DEBUGFS) += cma_debug.o obj-$(CONFIG_USERFAULTFD) += userfaultfd.o obj-$(CONFIG_IDLE_PAGE_TRACKING) += page_idle.o +obj-$(CONFIG_FRAME_VECTOR) += frame_vector.o diff --git a/mm/early_ioremap.c b/mm/early_ioremap.c index 23f744d77ce0..17ae14b5aefa 100644 --- a/mm/early_ioremap.c +++ b/mm/early_ioremap.c @@ -15,6 +15,7 @@ #include <linux/mm.h> #include <linux/vmalloc.h> #include <asm/fixmap.h> +#include <asm/early_ioremap.h> #ifdef CONFIG_MMU static int early_ioremap_debug __initdata; diff --git a/mm/frame_vector.c b/mm/frame_vector.c new file mode 100644 index 000000000000..cdabcb93c6a6 --- /dev/null +++ b/mm/frame_vector.c @@ -0,0 +1,230 @@ +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/err.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> +#include <linux/pagemap.h> +#include <linux/sched.h> + +/* + * get_vaddr_frames() - map virtual addresses to pfns + * @start: starting user address + * @nr_frames: number of pages / pfns from start to map + * @write: whether pages will be written to by the caller + * @force: whether to force write access even if user mapping is + * readonly. See description of the same argument of + get_user_pages(). + * @vec: structure which receives pages / pfns of the addresses mapped. + * It should have space for at least nr_frames entries. + * + * This function maps virtual addresses from @start and fills @vec structure + * with page frame numbers or page pointers to corresponding pages (choice + * depends on the type of the vma underlying the virtual address). If @start + * belongs to a normal vma, the function grabs reference to each of the pages + * to pin them in memory. If @start belongs to VM_IO | VM_PFNMAP vma, we don't + * touch page structures and the caller must make sure pfns aren't reused for + * anything else while he is using them. + * + * The function returns number of pages mapped which may be less than + * @nr_frames. In particular we stop mapping if there are more vmas of + * different type underlying the specified range of virtual addresses. + * When the function isn't able to map a single page, it returns error. + * + * This function takes care of grabbing mmap_sem as necessary. + */ +int get_vaddr_frames(unsigned long start, unsigned int nr_frames, + bool write, bool force, struct frame_vector *vec) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + int ret = 0; + int err; + int locked; + + if (nr_frames == 0) + return 0; + + if (WARN_ON_ONCE(nr_frames > vec->nr_allocated)) + nr_frames = vec->nr_allocated; + + down_read(&mm->mmap_sem); + locked = 1; + vma = find_vma_intersection(mm, start, start + 1); + if (!vma) { + ret = -EFAULT; + goto out; + } + if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) { + vec->got_ref = true; + vec->is_pfns = false; + ret = get_user_pages_locked(current, mm, start, nr_frames, + write, force, (struct page **)(vec->ptrs), &locked); + goto out; + } + + vec->got_ref = false; + vec->is_pfns = true; + do { + unsigned long *nums = frame_vector_pfns(vec); + + while (ret < nr_frames && start + PAGE_SIZE <= vma->vm_end) { + err = follow_pfn(vma, start, &nums[ret]); + if (err) { + if (ret == 0) + ret = err; + goto out; + } + start += PAGE_SIZE; + ret++; + } + /* + * We stop if we have enough pages or if VMA doesn't completely + * cover the tail page. + */ + if (ret >= nr_frames || start < vma->vm_end) + break; + vma = find_vma_intersection(mm, start, start + 1); + } while (vma && vma->vm_flags & (VM_IO | VM_PFNMAP)); +out: + if (locked) + up_read(&mm->mmap_sem); + if (!ret) + ret = -EFAULT; + if (ret > 0) + vec->nr_frames = ret; + return ret; +} +EXPORT_SYMBOL(get_vaddr_frames); + +/** + * put_vaddr_frames() - drop references to pages if get_vaddr_frames() acquired + * them + * @vec: frame vector to put + * + * Drop references to pages if get_vaddr_frames() acquired them. We also + * invalidate the frame vector so that it is prepared for the next call into + * get_vaddr_frames(). + */ +void put_vaddr_frames(struct frame_vector *vec) +{ + int i; + struct page **pages; + + if (!vec->got_ref) + goto out; + pages = frame_vector_pages(vec); + /* + * frame_vector_pages() might needed to do a conversion when + * get_vaddr_frames() got pages but vec was later converted to pfns. + * But it shouldn't really fail to convert pfns back... + */ + if (WARN_ON(IS_ERR(pages))) + goto out; + for (i = 0; i < vec->nr_frames; i++) + put_page(pages[i]); + vec->got_ref = false; +out: + vec->nr_frames = 0; +} +EXPORT_SYMBOL(put_vaddr_frames); + +/** + * frame_vector_to_pages - convert frame vector to contain page pointers + * @vec: frame vector to convert + * + * Convert @vec to contain array of page pointers. If the conversion is + * successful, return 0. Otherwise return an error. Note that we do not grab + * page references for the page structures. + */ +int frame_vector_to_pages(struct frame_vector *vec) +{ + int i; + unsigned long *nums; + struct page **pages; + + if (!vec->is_pfns) + return 0; + nums = frame_vector_pfns(vec); + for (i = 0; i < vec->nr_frames; i++) + if (!pfn_valid(nums[i])) + return -EINVAL; + pages = (struct page **)nums; + for (i = 0; i < vec->nr_frames; i++) + pages[i] = pfn_to_page(nums[i]); + vec->is_pfns = false; + return 0; +} +EXPORT_SYMBOL(frame_vector_to_pages); + +/** + * frame_vector_to_pfns - convert frame vector to contain pfns + * @vec: frame vector to convert + * + * Convert @vec to contain array of pfns. + */ +void frame_vector_to_pfns(struct frame_vector *vec) +{ + int i; + unsigned long *nums; + struct page **pages; + + if (vec->is_pfns) + return; + pages = (struct page **)(vec->ptrs); + nums = (unsigned long *)pages; + for (i = 0; i < vec->nr_frames; i++) + nums[i] = page_to_pfn(pages[i]); + vec->is_pfns = true; +} +EXPORT_SYMBOL(frame_vector_to_pfns); + +/** + * frame_vector_create() - allocate & initialize structure for pinned pfns + * @nr_frames: number of pfns slots we should reserve + * + * Allocate and initialize struct pinned_pfns to be able to hold @nr_pfns + * pfns. + */ +struct frame_vector *frame_vector_create(unsigned int nr_frames) +{ + struct frame_vector *vec; + int size = sizeof(struct frame_vector) + sizeof(void *) * nr_frames; + + if (WARN_ON_ONCE(nr_frames == 0)) + return NULL; + /* + * This is absurdly high. It's here just to avoid strange effects when + * arithmetics overflows. + */ + if (WARN_ON_ONCE(nr_frames > INT_MAX / sizeof(void *) / 2)) + return NULL; + /* + * Avoid higher order allocations, use vmalloc instead. It should + * be rare anyway. + */ + if (size <= PAGE_SIZE) + vec = kmalloc(size, GFP_KERNEL); + else + vec = vmalloc(size); + if (!vec) + return NULL; + vec->nr_allocated = nr_frames; + vec->nr_frames = 0; + return vec; +} +EXPORT_SYMBOL(frame_vector_create); + +/** + * frame_vector_destroy() - free memory allocated to carry frame vector + * @vec: Frame vector to free + * + * Free structure allocated by frame_vector_create() to carry frames. + */ +void frame_vector_destroy(struct frame_vector *vec) +{ + /* Make sure put_vaddr_frames() got called properly... */ + VM_BUG_ON(vec->nr_frames > 0); + kvfree(vec); +} +EXPORT_SYMBOL(frame_vector_destroy); diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index 7b28e9cdf1c7..8da211411b57 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -135,12 +135,11 @@ static __always_inline bool memory_is_poisoned_16(unsigned long addr) if (unlikely(*shadow_addr)) { u16 shadow_first_bytes = *(u16 *)shadow_addr; - s8 last_byte = (addr + 15) & KASAN_SHADOW_MASK; if (unlikely(shadow_first_bytes)) return true; - if (likely(!last_byte)) + if (likely(IS_ALIGNED(addr, 8))) return false; return memory_is_poisoned_1(addr + 15); diff --git a/mm/migrate.c b/mm/migrate.c index c3cb566af3e2..7452a00bbb50 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1075,7 +1075,7 @@ out: if (rc != MIGRATEPAGE_SUCCESS && put_new_page) put_new_page(new_hpage, private); else - put_page(new_hpage); + putback_active_hugepage(new_hpage); if (result) { if (rc) diff --git a/mm/mmap.c b/mm/mmap.c index 971dd2cb77d2..79bcc9f92e48 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -612,8 +612,6 @@ static unsigned long count_vma_pages_range(struct mm_struct *mm, void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma, struct rb_node **rb_link, struct rb_node *rb_parent) { - WARN_ONCE(vma->vm_file && !vma->vm_ops, "missing vma->vm_ops"); - /* Update tracking information for the gap following the new vma. */ if (vma->vm_next) vma_gap_update(vma->vm_next); @@ -1492,13 +1490,14 @@ SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg) int vma_wants_writenotify(struct vm_area_struct *vma) { vm_flags_t vm_flags = vma->vm_flags; + const struct vm_operations_struct *vm_ops = vma->vm_ops; /* If it was private or non-writable, the write bit is already clear */ if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED))) return 0; /* The backer wishes to know when pages are first written to? */ - if (vma->vm_ops && vma->vm_ops->page_mkwrite) + if (vm_ops && (vm_ops->page_mkwrite || vm_ops->pfn_mkwrite)) return 1; /* The open routine did something to the protections that pgprot_modify @@ -1638,12 +1637,6 @@ unsigned long mmap_region(struct file *file, unsigned long addr, */ WARN_ON_ONCE(addr != vma->vm_start); - /* All file mapping must have ->vm_ops set */ - if (!vma->vm_ops) { - static const struct vm_operations_struct dummy_ops = {}; - vma->vm_ops = &dummy_ops; - } - addr = vma->vm_start; vm_flags = vma->vm_flags; } else if (vm_flags & VM_SHARED) { diff --git a/mm/vmscan.c b/mm/vmscan.c index 2d978b28a410..7f63a9381f71 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -175,7 +175,7 @@ static bool sane_reclaim(struct scan_control *sc) if (!memcg) return true; #ifdef CONFIG_CGROUP_WRITEBACK - if (memcg->css.cgroup) + if (cgroup_on_dfl(memcg->css.cgroup)) return true; #endif return false; |