diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-12-17 08:27:23 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-12-17 08:27:23 -0800 |
commit | 60da5bf47dd3d301a1d3bd4f0a4b9e29a184515c (patch) | |
tree | 30de83370440aae5350d9ab3fbe6583abd439ee8 /mm | |
parent | 3c2e81ef344a90bb0a39d84af6878b4aeff568a2 (diff) | |
parent | cbae8d45d61f3a8c155caf267d01e5e0f0b2f4b7 (diff) |
Merge branch 'for-3.8/core' of git://git.kernel.dk/linux-block
Pull block layer core updates from Jens Axboe:
"Here are the core block IO bits for 3.8. The branch contains:
- The final version of the surprise device removal fixups from Bart.
- Don't hide EFI partitions under advanced partition types. It's
fairly wide spread these days. This is especially dangerous for
systems that have both msdos and efi partition tables, where you
want to keep them in sync.
- Cleanup of using -1 instead of the proper NUMA_NO_NODE
- Export control of bdi flusher thread CPU mask and default to using
the home node (if known) from Jeff.
- Export unplug tracepoint for MD.
- Core improvements from Shaohua. Reinstate the recursive merge, as
the original bug has been fixed. Add plugging for discard and also
fix a problem handling non pow-of-2 discard limits.
There's a trivial merge in block/blk-exec.c due to a fix that went
into 3.7-rc at a later point than -rc4 where this is based."
* 'for-3.8/core' of git://git.kernel.dk/linux-block:
block: export block_unplug tracepoint
block: add plug for blkdev_issue_discard
block: discard granularity might not be power of 2
deadline: Allow 0ms deadline latency, increase the read speed
partitions: enable EFI/GPT support by default
bsg: Remove unused function bsg_goose_queue()
block: Make blk_cleanup_queue() wait until request_fn finished
block: Avoid scheduling delayed work on a dead queue
block: Avoid that request_fn is invoked on a dead queue
block: Let blk_drain_queue() caller obtain the queue lock
block: Rename queue dead flag
bdi: add a user-tunable cpu_list for the bdi flusher threads
block: use NUMA_NO_NODE instead of -1
block: recursive merge requests
block CFQ: avoid moving request to different queue
Diffstat (limited to 'mm')
-rw-r--r-- | mm/backing-dev.c | 84 |
1 files changed, 84 insertions, 0 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index d3ca2b3ee176..bd6a6cabef71 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -10,6 +10,7 @@ #include <linux/module.h> #include <linux/writeback.h> #include <linux/device.h> +#include <linux/slab.h> #include <trace/events/writeback.h> static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); @@ -221,12 +222,63 @@ static ssize_t max_ratio_store(struct device *dev, } BDI_SHOW(max_ratio, bdi->max_ratio) +static ssize_t cpu_list_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + struct bdi_writeback *wb = &bdi->wb; + cpumask_var_t newmask; + ssize_t ret; + struct task_struct *task; + + if (!alloc_cpumask_var(&newmask, GFP_KERNEL)) + return -ENOMEM; + + ret = cpulist_parse(buf, newmask); + if (!ret) { + spin_lock_bh(&bdi->wb_lock); + task = wb->task; + if (task) + get_task_struct(task); + spin_unlock_bh(&bdi->wb_lock); + + mutex_lock(&bdi->flusher_cpumask_lock); + if (task) { + ret = set_cpus_allowed_ptr(task, newmask); + put_task_struct(task); + } + if (ret == 0) { + cpumask_copy(bdi->flusher_cpumask, newmask); + ret = count; + } + mutex_unlock(&bdi->flusher_cpumask_lock); + + } + free_cpumask_var(newmask); + + return ret; +} + +static ssize_t cpu_list_show(struct device *dev, + struct device_attribute *attr, char *page) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + ssize_t ret; + + mutex_lock(&bdi->flusher_cpumask_lock); + ret = cpulist_scnprintf(page, PAGE_SIZE-1, bdi->flusher_cpumask); + mutex_unlock(&bdi->flusher_cpumask_lock); + + return ret; +} + #define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store) static struct device_attribute bdi_dev_attrs[] = { __ATTR_RW(read_ahead_kb), __ATTR_RW(min_ratio), __ATTR_RW(max_ratio), + __ATTR_RW(cpu_list), __ATTR_NULL, }; @@ -428,6 +480,7 @@ static int bdi_forker_thread(void *ptr) writeback_inodes_wb(&bdi->wb, 1024, WB_REASON_FORKER_THREAD); } else { + int ret; /* * The spinlock makes sure we do not lose * wake-ups when racing with 'bdi_queue_work()'. @@ -437,6 +490,14 @@ static int bdi_forker_thread(void *ptr) spin_lock_bh(&bdi->wb_lock); bdi->wb.task = task; spin_unlock_bh(&bdi->wb_lock); + mutex_lock(&bdi->flusher_cpumask_lock); + ret = set_cpus_allowed_ptr(task, + bdi->flusher_cpumask); + mutex_unlock(&bdi->flusher_cpumask_lock); + if (ret) + printk_once("%s: failed to bind flusher" + " thread %s, error %d\n", + __func__, task->comm, ret); wake_up_process(task); } bdi_clear_pending(bdi); @@ -509,6 +570,17 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent, dev_name(dev)); if (IS_ERR(wb->task)) return PTR_ERR(wb->task); + } else { + int node; + /* + * Set up a default cpumask for the flusher threads that + * includes all cpus on the same numa node as the device. + * The mask may be overridden via sysfs. + */ + node = dev_to_node(bdi->dev); + if (node != NUMA_NO_NODE) + cpumask_copy(bdi->flusher_cpumask, + cpumask_of_node(node)); } bdi_debug_register(bdi, dev_name(dev)); @@ -634,6 +706,15 @@ int bdi_init(struct backing_dev_info *bdi) bdi_wb_init(&bdi->wb, bdi); + if (!bdi_cap_flush_forker(bdi)) { + bdi->flusher_cpumask = kmalloc(sizeof(cpumask_t), GFP_KERNEL); + if (!bdi->flusher_cpumask) + return -ENOMEM; + cpumask_setall(bdi->flusher_cpumask); + mutex_init(&bdi->flusher_cpumask_lock); + } else + bdi->flusher_cpumask = NULL; + for (i = 0; i < NR_BDI_STAT_ITEMS; i++) { err = percpu_counter_init(&bdi->bdi_stat[i], 0); if (err) @@ -656,6 +737,7 @@ int bdi_init(struct backing_dev_info *bdi) err: while (i--) percpu_counter_destroy(&bdi->bdi_stat[i]); + kfree(bdi->flusher_cpumask); } return err; @@ -683,6 +765,8 @@ void bdi_destroy(struct backing_dev_info *bdi) bdi_unregister(bdi); + kfree(bdi->flusher_cpumask); + /* * If bdi_unregister() had already been called earlier, the * wakeup_timer could still be armed because bdi_prune_sb() |