diff options
Diffstat (limited to 'arch/powerpc/platforms/pseries/cmm.c')
-rw-r--r-- | arch/powerpc/platforms/pseries/cmm.c | 431 |
1 files changed, 191 insertions, 240 deletions
diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c index b33251d75927..91571841df8a 100644 --- a/arch/powerpc/platforms/pseries/cmm.c +++ b/arch/powerpc/platforms/pseries/cmm.c @@ -19,6 +19,10 @@ #include <linux/stringify.h> #include <linux/swap.h> #include <linux/device.h> +#include <linux/mount.h> +#include <linux/pseudo_fs.h> +#include <linux/magic.h> +#include <linux/balloon_compaction.h> #include <asm/firmware.h> #include <asm/hvcall.h> #include <asm/mmu.h> @@ -38,12 +42,8 @@ #define CMM_MIN_MEM_MB 256 #define KB2PAGES(_p) ((_p)>>(PAGE_SHIFT-10)) #define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10)) -/* - * The priority level tries to ensure that this notifier is called as - * late as possible to reduce thrashing in the shared memory pool. - */ + #define CMM_MEM_HOTPLUG_PRI 1 -#define CMM_MEM_ISOLATE_PRI 15 static unsigned int delay = CMM_DEFAULT_DELAY; static unsigned int hotplug_delay = CMM_HOTPLUG_DELAY; @@ -51,6 +51,8 @@ static unsigned int oom_kb = CMM_OOM_KB; static unsigned int cmm_debug = CMM_DEBUG; static unsigned int cmm_disabled = CMM_DISABLE; static unsigned long min_mem_mb = CMM_MIN_MEM_MB; +static bool __read_mostly simulate; +static unsigned long simulate_loan_target_kb; static struct device cmm_dev; MODULE_AUTHOR("Brian King <brking@linux.vnet.ibm.com>"); @@ -74,35 +76,31 @@ MODULE_PARM_DESC(min_mem_mb, "Minimum amount of memory (in MB) to not balloon. " module_param_named(debug, cmm_debug, uint, 0644); MODULE_PARM_DESC(debug, "Enable module debugging logging. Set to 1 to enable. " "[Default=" __stringify(CMM_DEBUG) "]"); - -#define CMM_NR_PAGES ((PAGE_SIZE - sizeof(void *) - sizeof(unsigned long)) / sizeof(unsigned long)) +module_param_named(simulate, simulate, bool, 0444); +MODULE_PARM_DESC(simulate, "Enable simulation mode (no communication with hw)."); #define cmm_dbg(...) if (cmm_debug) { printk(KERN_INFO "cmm: "__VA_ARGS__); } -struct cmm_page_array { - struct cmm_page_array *next; - unsigned long index; - unsigned long page[CMM_NR_PAGES]; -}; - -static unsigned long loaned_pages; +static atomic_long_t loaned_pages; static unsigned long loaned_pages_target; static unsigned long oom_freed_pages; -static struct cmm_page_array *cmm_page_list; -static DEFINE_SPINLOCK(cmm_lock); - static DEFINE_MUTEX(hotplug_mutex); static int hotplug_occurred; /* protected by the hotplug mutex */ static struct task_struct *cmm_thread_ptr; +static struct balloon_dev_info b_dev_info; -static long plpar_page_set_loaned(unsigned long vpa) +static long plpar_page_set_loaned(struct page *page) { + const unsigned long vpa = page_to_phys(page); unsigned long cmo_page_sz = cmo_get_page_size(); long rc = 0; int i; + if (unlikely(simulate)) + return 0; + for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz) rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, vpa + i, 0); @@ -113,12 +111,16 @@ static long plpar_page_set_loaned(unsigned long vpa) return rc; } -static long plpar_page_set_active(unsigned long vpa) +static long plpar_page_set_active(struct page *page) { + const unsigned long vpa = page_to_phys(page); unsigned long cmo_page_sz = cmo_get_page_size(); long rc = 0; int i; + if (unlikely(simulate)) + return 0; + for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz) rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, vpa + i, 0); @@ -138,8 +140,7 @@ static long plpar_page_set_active(unsigned long vpa) **/ static long cmm_alloc_pages(long nr) { - struct cmm_page_array *pa, *npa; - unsigned long addr; + struct page *page; long rc; cmm_dbg("Begin request for %ld pages\n", nr); @@ -156,46 +157,19 @@ static long cmm_alloc_pages(long nr) break; } - addr = __get_free_page(GFP_NOIO | __GFP_NOWARN | - __GFP_NORETRY | __GFP_NOMEMALLOC); - if (!addr) + page = balloon_page_alloc(); + if (!page) break; - spin_lock(&cmm_lock); - pa = cmm_page_list; - if (!pa || pa->index >= CMM_NR_PAGES) { - /* Need a new page for the page list. */ - spin_unlock(&cmm_lock); - npa = (struct cmm_page_array *)__get_free_page( - GFP_NOIO | __GFP_NOWARN | - __GFP_NORETRY | __GFP_NOMEMALLOC); - if (!npa) { - pr_info("%s: Can not allocate new page list\n", __func__); - free_page(addr); - break; - } - spin_lock(&cmm_lock); - pa = cmm_page_list; - - if (!pa || pa->index >= CMM_NR_PAGES) { - npa->next = pa; - npa->index = 0; - pa = npa; - cmm_page_list = pa; - } else - free_page((unsigned long) npa); - } - - if ((rc = plpar_page_set_loaned(__pa(addr)))) { + rc = plpar_page_set_loaned(page); + if (rc) { pr_err("%s: Can not set page to loaned. rc=%ld\n", __func__, rc); - spin_unlock(&cmm_lock); - free_page(addr); + __free_page(page); break; } - pa->page[pa->index++] = addr; - loaned_pages++; - totalram_pages_dec(); - spin_unlock(&cmm_lock); + balloon_page_enqueue(&b_dev_info, page); + atomic_long_inc(&loaned_pages); + adjust_managed_page_count(page, -1); nr--; } @@ -212,30 +186,19 @@ static long cmm_alloc_pages(long nr) **/ static long cmm_free_pages(long nr) { - struct cmm_page_array *pa; - unsigned long addr; + struct page *page; cmm_dbg("Begin free of %ld pages.\n", nr); - spin_lock(&cmm_lock); - pa = cmm_page_list; while (nr) { - if (!pa || pa->index <= 0) + page = balloon_page_dequeue(&b_dev_info); + if (!page) break; - addr = pa->page[--pa->index]; - - if (pa->index == 0) { - pa = pa->next; - free_page((unsigned long) cmm_page_list); - cmm_page_list = pa; - } - - plpar_page_set_active(__pa(addr)); - free_page(addr); - loaned_pages--; + plpar_page_set_active(page); + adjust_managed_page_count(page, 1); + __free_page(page); + atomic_long_dec(&loaned_pages); nr--; - totalram_pages_inc(); } - spin_unlock(&cmm_lock); cmm_dbg("End request with %ld pages unfulfilled\n", nr); return nr; } @@ -257,7 +220,7 @@ static int cmm_oom_notify(struct notifier_block *self, cmm_dbg("OOM processing started\n"); nr = cmm_free_pages(nr); - loaned_pages_target = loaned_pages; + loaned_pages_target = atomic_long_read(&loaned_pages); *freed += KB2PAGES(oom_kb) - nr; oom_freed_pages += KB2PAGES(oom_kb) - nr; cmm_dbg("OOM processing complete\n"); @@ -274,19 +237,24 @@ static int cmm_oom_notify(struct notifier_block *self, **/ static void cmm_get_mpp(void) { + const long __loaned_pages = atomic_long_read(&loaned_pages); + const long total_pages = totalram_pages() + __loaned_pages; int rc; struct hvcall_mpp_data mpp_data; signed long active_pages_target, page_loan_request, target; - signed long total_pages = totalram_pages() + loaned_pages; signed long min_mem_pages = (min_mem_mb * 1024 * 1024) / PAGE_SIZE; - rc = h_get_mpp(&mpp_data); - - if (rc != H_SUCCESS) - return; - - page_loan_request = div_s64((s64)mpp_data.loan_request, PAGE_SIZE); - target = page_loan_request + (signed long)loaned_pages; + if (likely(!simulate)) { + rc = h_get_mpp(&mpp_data); + if (rc != H_SUCCESS) + return; + page_loan_request = div_s64((s64)mpp_data.loan_request, + PAGE_SIZE); + target = page_loan_request + __loaned_pages; + } else { + target = KB2PAGES(simulate_loan_target_kb); + page_loan_request = target - __loaned_pages; + } if (target < 0 || total_pages < min_mem_pages) target = 0; @@ -307,7 +275,7 @@ static void cmm_get_mpp(void) loaned_pages_target = target; cmm_dbg("delta = %ld, loaned = %lu, target = %lu, oom = %lu, totalram = %lu\n", - page_loan_request, loaned_pages, loaned_pages_target, + page_loan_request, __loaned_pages, loaned_pages_target, oom_freed_pages, totalram_pages()); } @@ -325,6 +293,7 @@ static struct notifier_block cmm_oom_nb = { static int cmm_thread(void *dummy) { unsigned long timeleft; + long __loaned_pages; while (1) { timeleft = msleep_interruptible(delay * 1000); @@ -355,11 +324,12 @@ static int cmm_thread(void *dummy) cmm_get_mpp(); - if (loaned_pages_target > loaned_pages) { - if (cmm_alloc_pages(loaned_pages_target - loaned_pages)) - loaned_pages_target = loaned_pages; - } else if (loaned_pages_target < loaned_pages) - cmm_free_pages(loaned_pages - loaned_pages_target); + __loaned_pages = atomic_long_read(&loaned_pages); + if (loaned_pages_target > __loaned_pages) { + if (cmm_alloc_pages(loaned_pages_target - __loaned_pages)) + loaned_pages_target = __loaned_pages; + } else if (loaned_pages_target < __loaned_pages) + cmm_free_pages(__loaned_pages - loaned_pages_target); } return 0; } @@ -373,7 +343,7 @@ static int cmm_thread(void *dummy) } \ static DEVICE_ATTR(name, 0444, show_##name, NULL) -CMM_SHOW(loaned_kb, "%lu\n", PAGES2KB(loaned_pages)); +CMM_SHOW(loaned_kb, "%lu\n", PAGES2KB(atomic_long_read(&loaned_pages))); CMM_SHOW(loaned_target_kb, "%lu\n", PAGES2KB(loaned_pages_target)); static ssize_t show_oom_pages(struct device *dev, @@ -406,11 +376,18 @@ static struct device_attribute *cmm_attrs[] = { &dev_attr_oom_freed_kb, }; +static DEVICE_ULONG_ATTR(simulate_loan_target_kb, 0644, + simulate_loan_target_kb); + static struct bus_type cmm_subsys = { .name = "cmm", .dev_name = "cmm", }; +static void cmm_release_device(struct device *dev) +{ +} + /** * cmm_sysfs_register - Register with sysfs * @@ -426,6 +403,7 @@ static int cmm_sysfs_register(struct device *dev) dev->id = 0; dev->bus = &cmm_subsys; + dev->release = cmm_release_device; if ((rc = device_register(dev))) goto subsys_unregister; @@ -435,6 +413,11 @@ static int cmm_sysfs_register(struct device *dev) goto fail; } + if (!simulate) + return 0; + rc = device_create_file(dev, &dev_attr_simulate_loan_target_kb.attr); + if (rc) + goto fail; return 0; fail: @@ -471,7 +454,7 @@ static int cmm_reboot_notifier(struct notifier_block *nb, if (cmm_thread_ptr) kthread_stop(cmm_thread_ptr); cmm_thread_ptr = NULL; - cmm_free_pages(loaned_pages); + cmm_free_pages(atomic_long_read(&loaned_pages)); } return NOTIFY_DONE; } @@ -481,142 +464,6 @@ static struct notifier_block cmm_reboot_nb = { }; /** - * cmm_count_pages - Count the number of pages loaned in a particular range. - * - * @arg: memory_isolate_notify structure with address range and count - * - * Return value: - * 0 on success - **/ -static unsigned long cmm_count_pages(void *arg) -{ - struct memory_isolate_notify *marg = arg; - struct cmm_page_array *pa; - unsigned long start = (unsigned long)pfn_to_kaddr(marg->start_pfn); - unsigned long end = start + (marg->nr_pages << PAGE_SHIFT); - unsigned long idx; - - spin_lock(&cmm_lock); - pa = cmm_page_list; - while (pa) { - if ((unsigned long)pa >= start && (unsigned long)pa < end) - marg->pages_found++; - for (idx = 0; idx < pa->index; idx++) - if (pa->page[idx] >= start && pa->page[idx] < end) - marg->pages_found++; - pa = pa->next; - } - spin_unlock(&cmm_lock); - return 0; -} - -/** - * cmm_memory_isolate_cb - Handle memory isolation notifier calls - * @self: notifier block struct - * @action: action to take - * @arg: struct memory_isolate_notify data for handler - * - * Return value: - * NOTIFY_OK or notifier error based on subfunction return value - **/ -static int cmm_memory_isolate_cb(struct notifier_block *self, - unsigned long action, void *arg) -{ - int ret = 0; - - if (action == MEM_ISOLATE_COUNT) - ret = cmm_count_pages(arg); - - return notifier_from_errno(ret); -} - -static struct notifier_block cmm_mem_isolate_nb = { - .notifier_call = cmm_memory_isolate_cb, - .priority = CMM_MEM_ISOLATE_PRI -}; - -/** - * cmm_mem_going_offline - Unloan pages where memory is to be removed - * @arg: memory_notify structure with page range to be offlined - * - * Return value: - * 0 on success - **/ -static int cmm_mem_going_offline(void *arg) -{ - struct memory_notify *marg = arg; - unsigned long start_page = (unsigned long)pfn_to_kaddr(marg->start_pfn); - unsigned long end_page = start_page + (marg->nr_pages << PAGE_SHIFT); - struct cmm_page_array *pa_curr, *pa_last, *npa; - unsigned long idx; - unsigned long freed = 0; - - cmm_dbg("Memory going offline, searching 0x%lx (%ld pages).\n", - start_page, marg->nr_pages); - spin_lock(&cmm_lock); - - /* Search the page list for pages in the range to be offlined */ - pa_last = pa_curr = cmm_page_list; - while (pa_curr) { - for (idx = (pa_curr->index - 1); (idx + 1) > 0; idx--) { - if ((pa_curr->page[idx] < start_page) || - (pa_curr->page[idx] >= end_page)) - continue; - - plpar_page_set_active(__pa(pa_curr->page[idx])); - free_page(pa_curr->page[idx]); - freed++; - loaned_pages--; - totalram_pages_inc(); - pa_curr->page[idx] = pa_last->page[--pa_last->index]; - if (pa_last->index == 0) { - if (pa_curr == pa_last) - pa_curr = pa_last->next; - pa_last = pa_last->next; - free_page((unsigned long)cmm_page_list); - cmm_page_list = pa_last; - } - } - pa_curr = pa_curr->next; - } - - /* Search for page list structures in the range to be offlined */ - pa_last = NULL; - pa_curr = cmm_page_list; - while (pa_curr) { - if (((unsigned long)pa_curr >= start_page) && - ((unsigned long)pa_curr < end_page)) { - npa = (struct cmm_page_array *)__get_free_page( - GFP_NOIO | __GFP_NOWARN | - __GFP_NORETRY | __GFP_NOMEMALLOC); - if (!npa) { - spin_unlock(&cmm_lock); - cmm_dbg("Failed to allocate memory for list " - "management. Memory hotplug " - "failed.\n"); - return -ENOMEM; - } - memcpy(npa, pa_curr, PAGE_SIZE); - if (pa_curr == cmm_page_list) - cmm_page_list = npa; - if (pa_last) - pa_last->next = npa; - free_page((unsigned long) pa_curr); - freed++; - pa_curr = npa; - } - - pa_last = pa_curr; - pa_curr = pa_curr->next; - } - - spin_unlock(&cmm_lock); - cmm_dbg("Released %ld pages in the search range.\n", freed); - - return 0; -} - -/** * cmm_memory_cb - Handle memory hotplug notifier calls * @self: notifier block struct * @action: action to take @@ -635,7 +482,6 @@ static int cmm_memory_cb(struct notifier_block *self, case MEM_GOING_OFFLINE: mutex_lock(&hotplug_mutex); hotplug_occurred = 1; - ret = cmm_mem_going_offline(arg); break; case MEM_OFFLINE: case MEM_CANCEL_OFFLINE: @@ -656,6 +502,106 @@ static struct notifier_block cmm_mem_nb = { .priority = CMM_MEM_HOTPLUG_PRI }; +#ifdef CONFIG_BALLOON_COMPACTION +static struct vfsmount *balloon_mnt; + +static int cmm_init_fs_context(struct fs_context *fc) +{ + return init_pseudo(fc, PPC_CMM_MAGIC) ? 0 : -ENOMEM; +} + +static struct file_system_type balloon_fs = { + .name = "ppc-cmm", + .init_fs_context = cmm_init_fs_context, + .kill_sb = kill_anon_super, +}; + +static int cmm_migratepage(struct balloon_dev_info *b_dev_info, + struct page *newpage, struct page *page, + enum migrate_mode mode) +{ + unsigned long flags; + + /* + * loan/"inflate" the newpage first. + * + * We might race against the cmm_thread who might discover after our + * loan request that another page is to be unloaned. However, once + * the cmm_thread runs again later, this error will automatically + * be corrected. + */ + if (plpar_page_set_loaned(newpage)) { + /* Unlikely, but possible. Tell the caller not to retry now. */ + pr_err_ratelimited("%s: Cannot set page to loaned.", __func__); + return -EBUSY; + } + + /* balloon page list reference */ + get_page(newpage); + + spin_lock_irqsave(&b_dev_info->pages_lock, flags); + balloon_page_insert(b_dev_info, newpage); + balloon_page_delete(page); + b_dev_info->isolated_pages--; + spin_unlock_irqrestore(&b_dev_info->pages_lock, flags); + + /* + * activate/"deflate" the old page. We ignore any errors just like the + * other callers. + */ + plpar_page_set_active(page); + + /* balloon page list reference */ + put_page(page); + + return MIGRATEPAGE_SUCCESS; +} + +static int cmm_balloon_compaction_init(void) +{ + int rc; + + balloon_devinfo_init(&b_dev_info); + b_dev_info.migratepage = cmm_migratepage; + + balloon_mnt = kern_mount(&balloon_fs); + if (IS_ERR(balloon_mnt)) { + rc = PTR_ERR(balloon_mnt); + balloon_mnt = NULL; + return rc; + } + + b_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb); + if (IS_ERR(b_dev_info.inode)) { + rc = PTR_ERR(b_dev_info.inode); + b_dev_info.inode = NULL; + kern_unmount(balloon_mnt); + balloon_mnt = NULL; + return rc; + } + + b_dev_info.inode->i_mapping->a_ops = &balloon_aops; + return 0; +} +static void cmm_balloon_compaction_deinit(void) +{ + if (b_dev_info.inode) + iput(b_dev_info.inode); + b_dev_info.inode = NULL; + kern_unmount(balloon_mnt); + balloon_mnt = NULL; +} +#else /* CONFIG_BALLOON_COMPACTION */ +static int cmm_balloon_compaction_init(void) +{ + return 0; +} + +static void cmm_balloon_compaction_deinit(void) +{ +} +#endif /* CONFIG_BALLOON_COMPACTION */ + /** * cmm_init - Module initialization * @@ -664,26 +610,31 @@ static struct notifier_block cmm_mem_nb = { **/ static int cmm_init(void) { - int rc = -ENOMEM; + int rc; - if (!firmware_has_feature(FW_FEATURE_CMO)) + if (!firmware_has_feature(FW_FEATURE_CMO) && !simulate) return -EOPNOTSUPP; - if ((rc = register_oom_notifier(&cmm_oom_nb)) < 0) + rc = cmm_balloon_compaction_init(); + if (rc) return rc; + rc = register_oom_notifier(&cmm_oom_nb); + if (rc < 0) + goto out_balloon_compaction; + if ((rc = register_reboot_notifier(&cmm_reboot_nb))) goto out_oom_notifier; if ((rc = cmm_sysfs_register(&cmm_dev))) goto out_reboot_notifier; - if (register_memory_notifier(&cmm_mem_nb) || - register_memory_isolate_notifier(&cmm_mem_isolate_nb)) + rc = register_memory_notifier(&cmm_mem_nb); + if (rc) goto out_unregister_notifier; if (cmm_disabled) - return rc; + return 0; cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread"); if (IS_ERR(cmm_thread_ptr)) { @@ -691,16 +642,16 @@ static int cmm_init(void) goto out_unregister_notifier; } - return rc; - + return 0; out_unregister_notifier: unregister_memory_notifier(&cmm_mem_nb); - unregister_memory_isolate_notifier(&cmm_mem_isolate_nb); cmm_unregister_sysfs(&cmm_dev); out_reboot_notifier: unregister_reboot_notifier(&cmm_reboot_nb); out_oom_notifier: unregister_oom_notifier(&cmm_oom_nb); +out_balloon_compaction: + cmm_balloon_compaction_deinit(); return rc; } @@ -717,9 +668,9 @@ static void cmm_exit(void) unregister_oom_notifier(&cmm_oom_nb); unregister_reboot_notifier(&cmm_reboot_nb); unregister_memory_notifier(&cmm_mem_nb); - unregister_memory_isolate_notifier(&cmm_mem_isolate_nb); - cmm_free_pages(loaned_pages); + cmm_free_pages(atomic_long_read(&loaned_pages)); cmm_unregister_sysfs(&cmm_dev); + cmm_balloon_compaction_deinit(); } /** @@ -739,7 +690,7 @@ static int cmm_set_disable(const char *val, const struct kernel_param *kp) if (cmm_thread_ptr) kthread_stop(cmm_thread_ptr); cmm_thread_ptr = NULL; - cmm_free_pages(loaned_pages); + cmm_free_pages(atomic_long_read(&loaned_pages)); } else if (!disable && cmm_disabled) { cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread"); if (IS_ERR(cmm_thread_ptr)) |