diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-05-08 19:49:35 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-05-08 19:49:35 -0700 |
commit | 1daac193f21d6e3d0adc528a06a7e11522d4254d (patch) | |
tree | 4034f896bc92bc3568c0e9bc1cd1df0af884d625 | |
parent | 41c64bb19c740b5433f768032ecaf05375c955ee (diff) | |
parent | 0ff28d9f4674d781e492bcff6f32f0fe48cf0fed (diff) |
Merge branch 'for-linus' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe:
"A collection of fixes since the merge window;
- fix for a double elevator module release, from Chao Yu. Ancient bug.
- the splice() MORE flag fix from Christophe Leroy.
- a fix for NVMe, fixing a patch that went in in the merge window.
From Keith.
- two fixes for blk-mq CPU hotplug handling, from Ming Lei.
- bdi vs blockdev lifetime fix from Neil Brown, fixing and oops in md.
- two blk-mq fixes from Shaohua, fixing a race on queue stop and a
bad merge issue with FUA writes.
- division-by-zero fix for writeback from Tejun.
- a block bounce page accounting fix, making sure we inc/dec after
bouncing so that pre/post IO pages match up. From Wang YanQing"
* 'for-linus' of git://git.kernel.dk/linux-block:
splice: sendfile() at once fails for big files
blk-mq: don't lose requests if a stopped queue restarts
blk-mq: fix FUA request hang
block: destroy bdi before blockdev is unregistered.
block:bounce: fix call inc_|dec_zone_page_state on different pages confuse value of NR_BOUNCE
elevator: fix double release of elevator module
writeback: use |1 instead of +1 to protect against div by zero
blk-mq: fix CPU hotplug handling
blk-mq: fix race between timeout and CPU hotplug
NVMe: Fix VPD B0 max sectors translation
-rw-r--r-- | block/blk-core.c | 2 | ||||
-rw-r--r-- | block/blk-mq.c | 60 | ||||
-rw-r--r-- | block/blk-sysfs.c | 2 | ||||
-rw-r--r-- | block/bounce.c | 2 | ||||
-rw-r--r-- | block/elevator.c | 6 | ||||
-rw-r--r-- | drivers/block/loop.c | 2 | ||||
-rw-r--r-- | drivers/block/nvme-scsi.c | 3 | ||||
-rw-r--r-- | drivers/md/md.c | 4 | ||||
-rw-r--r-- | fs/splice.c | 12 | ||||
-rw-r--r-- | include/linux/blk_types.h | 2 | ||||
-rw-r--r-- | mm/page-writeback.c | 6 |
11 files changed, 60 insertions, 41 deletions
diff --git a/block/blk-core.c b/block/blk-core.c index fd154b94447a..7871603f0a29 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -552,6 +552,8 @@ void blk_cleanup_queue(struct request_queue *q) q->queue_lock = &q->__queue_lock; spin_unlock_irq(lock); + bdi_destroy(&q->backing_dev_info); + /* @q is and will stay empty, shutdown and put */ blk_put_queue(q); } diff --git a/block/blk-mq.c b/block/blk-mq.c index ade8a2d1b0aa..e68b71b85a7e 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -677,8 +677,11 @@ static void blk_mq_rq_timer(unsigned long priv) data.next = blk_rq_timeout(round_jiffies_up(data.next)); mod_timer(&q->timeout, data.next); } else { - queue_for_each_hw_ctx(q, hctx, i) - blk_mq_tag_idle(hctx); + queue_for_each_hw_ctx(q, hctx, i) { + /* the hctx may be unmapped, so check it here */ + if (blk_mq_hw_queue_mapped(hctx)) + blk_mq_tag_idle(hctx); + } } } @@ -855,6 +858,16 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) spin_lock(&hctx->lock); list_splice(&rq_list, &hctx->dispatch); spin_unlock(&hctx->lock); + /* + * the queue is expected stopped with BLK_MQ_RQ_QUEUE_BUSY, but + * it's possible the queue is stopped and restarted again + * before this. Queue restart will dispatch requests. And since + * requests in rq_list aren't added into hctx->dispatch yet, + * the requests in rq_list might get lost. + * + * blk_mq_run_hw_queue() already checks the STOPPED bit + **/ + blk_mq_run_hw_queue(hctx, true); } } @@ -1571,22 +1584,6 @@ static int blk_mq_hctx_cpu_offline(struct blk_mq_hw_ctx *hctx, int cpu) return NOTIFY_OK; } -static int blk_mq_hctx_cpu_online(struct blk_mq_hw_ctx *hctx, int cpu) -{ - struct request_queue *q = hctx->queue; - struct blk_mq_tag_set *set = q->tag_set; - - if (set->tags[hctx->queue_num]) - return NOTIFY_OK; - - set->tags[hctx->queue_num] = blk_mq_init_rq_map(set, hctx->queue_num); - if (!set->tags[hctx->queue_num]) - return NOTIFY_STOP; - - hctx->tags = set->tags[hctx->queue_num]; - return NOTIFY_OK; -} - static int blk_mq_hctx_notify(void *data, unsigned long action, unsigned int cpu) { @@ -1594,8 +1591,11 @@ static int blk_mq_hctx_notify(void *data, unsigned long action, if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) return blk_mq_hctx_cpu_offline(hctx, cpu); - else if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) - return blk_mq_hctx_cpu_online(hctx, cpu); + + /* + * In case of CPU online, tags may be reallocated + * in blk_mq_map_swqueue() after mapping is updated. + */ return NOTIFY_OK; } @@ -1775,6 +1775,7 @@ static void blk_mq_map_swqueue(struct request_queue *q) unsigned int i; struct blk_mq_hw_ctx *hctx; struct blk_mq_ctx *ctx; + struct blk_mq_tag_set *set = q->tag_set; queue_for_each_hw_ctx(q, hctx, i) { cpumask_clear(hctx->cpumask); @@ -1803,16 +1804,20 @@ static void blk_mq_map_swqueue(struct request_queue *q) * disable it and free the request entries. */ if (!hctx->nr_ctx) { - struct blk_mq_tag_set *set = q->tag_set; - if (set->tags[i]) { blk_mq_free_rq_map(set, set->tags[i], i); set->tags[i] = NULL; - hctx->tags = NULL; } + hctx->tags = NULL; continue; } + /* unmapped hw queue can be remapped after CPU topo changed */ + if (!set->tags[i]) + set->tags[i] = blk_mq_init_rq_map(set, i); + hctx->tags = set->tags[i]; + WARN_ON(!hctx->tags); + /* * Set the map size to the number of mapped software queues. * This is more accurate and more efficient than looping @@ -2090,9 +2095,16 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb, */ list_for_each_entry(q, &all_q_list, all_q_node) blk_mq_freeze_queue_start(q); - list_for_each_entry(q, &all_q_list, all_q_node) + list_for_each_entry(q, &all_q_list, all_q_node) { blk_mq_freeze_queue_wait(q); + /* + * timeout handler can't touch hw queue during the + * reinitialization + */ + del_timer_sync(&q->timeout); + } + list_for_each_entry(q, &all_q_list, all_q_node) blk_mq_queue_reinit(q); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index faaf36ade7eb..2b8fd302f677 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -522,8 +522,6 @@ static void blk_release_queue(struct kobject *kobj) blk_trace_shutdown(q); - bdi_destroy(&q->backing_dev_info); - ida_simple_remove(&blk_queue_ida, q->id); call_rcu(&q->rcu_head, blk_free_queue_rcu); } diff --git a/block/bounce.c b/block/bounce.c index ab21ba203d5c..ed9dd8067120 100644 --- a/block/bounce.c +++ b/block/bounce.c @@ -221,8 +221,8 @@ bounce: if (page_to_pfn(page) <= queue_bounce_pfn(q) && !force) continue; - inc_zone_page_state(to->bv_page, NR_BOUNCE); to->bv_page = mempool_alloc(pool, q->bounce_gfp); + inc_zone_page_state(to->bv_page, NR_BOUNCE); if (rw == WRITE) { char *vto, *vfrom; diff --git a/block/elevator.c b/block/elevator.c index 59794d0d38e3..8985038f398c 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -157,7 +157,7 @@ struct elevator_queue *elevator_alloc(struct request_queue *q, eq = kzalloc_node(sizeof(*eq), GFP_KERNEL, q->node); if (unlikely(!eq)) - goto err; + return NULL; eq->type = e; kobject_init(&eq->kobj, &elv_ktype); @@ -165,10 +165,6 @@ struct elevator_queue *elevator_alloc(struct request_queue *q, hash_init(eq->hash); return eq; -err: - kfree(eq); - elevator_put(e); - return NULL; } EXPORT_SYMBOL(elevator_alloc); diff --git a/drivers/block/loop.c b/drivers/block/loop.c index ae3fcb4199e9..d7173cb1ea76 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1620,8 +1620,8 @@ out: static void loop_remove(struct loop_device *lo) { - del_gendisk(lo->lo_disk); blk_cleanup_queue(lo->lo_queue); + del_gendisk(lo->lo_disk); blk_mq_free_tag_set(&lo->tag_set); put_disk(lo->lo_disk); kfree(lo); diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c index 6b736b00f63e..88f13c525712 100644 --- a/drivers/block/nvme-scsi.c +++ b/drivers/block/nvme-scsi.c @@ -944,7 +944,8 @@ static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, static int nvme_trans_bdev_limits_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 *inq_response, int alloc_len) { - __be32 max_sectors = cpu_to_be32(queue_max_hw_sectors(ns->queue)); + __be32 max_sectors = cpu_to_be32( + nvme_block_nr(ns, queue_max_hw_sectors(ns->queue))); __be32 max_discard = cpu_to_be32(ns->queue->limits.max_discard_sectors); __be32 discard_desc_count = cpu_to_be32(0x100); diff --git a/drivers/md/md.c b/drivers/md/md.c index d4f31e195e26..593a02476c78 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -4818,12 +4818,12 @@ static void md_free(struct kobject *ko) if (mddev->sysfs_state) sysfs_put(mddev->sysfs_state); + if (mddev->queue) + blk_cleanup_queue(mddev->queue); if (mddev->gendisk) { del_gendisk(mddev->gendisk); put_disk(mddev->gendisk); } - if (mddev->queue) - blk_cleanup_queue(mddev->queue); kfree(mddev); } diff --git a/fs/splice.c b/fs/splice.c index 476024bb6546..bfe62ae40f40 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1161,7 +1161,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, long ret, bytes; umode_t i_mode; size_t len; - int i, flags; + int i, flags, more; /* * We require the input being a regular file, as we don't want to @@ -1204,6 +1204,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, * Don't block on output, we have to drain the direct pipe. */ sd->flags &= ~SPLICE_F_NONBLOCK; + more = sd->flags & SPLICE_F_MORE; while (len) { size_t read_len; @@ -1217,6 +1218,15 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, sd->total_len = read_len; /* + * If more data is pending, set SPLICE_F_MORE + * If this is the last data and SPLICE_F_MORE was not set + * initially, clears it. + */ + if (read_len < len) + sd->flags |= SPLICE_F_MORE; + else if (!more) + sd->flags &= ~SPLICE_F_MORE; + /* * NOTE: nonblocking mode only applies to the input. We * must not do the output in nonblocking mode as then we * could get stuck data in the internal pipe: diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index a1b25e35ea5f..b7299febc4b4 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -220,7 +220,7 @@ enum rq_flag_bits { /* This mask is used for both bio and request merge checking */ #define REQ_NOMERGE_FLAGS \ - (REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA) + (REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA | REQ_FLUSH_SEQ) #define REQ_RAHEAD (1ULL << __REQ_RAHEAD) #define REQ_THROTTLED (1ULL << __REQ_THROTTLED) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 5daf5568b9e1..eb59f7eea508 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -580,7 +580,7 @@ static long long pos_ratio_polynom(unsigned long setpoint, long x; x = div64_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT, - limit - setpoint + 1); + (limit - setpoint) | 1); pos_ratio = x; pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT; pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT; @@ -807,7 +807,7 @@ static unsigned long bdi_position_ratio(struct backing_dev_info *bdi, * scale global setpoint to bdi's: * bdi_setpoint = setpoint * bdi_thresh / thresh */ - x = div_u64((u64)bdi_thresh << 16, thresh + 1); + x = div_u64((u64)bdi_thresh << 16, thresh | 1); bdi_setpoint = setpoint * (u64)x >> 16; /* * Use span=(8*write_bw) in single bdi case as indicated by @@ -822,7 +822,7 @@ static unsigned long bdi_position_ratio(struct backing_dev_info *bdi, if (bdi_dirty < x_intercept - span / 4) { pos_ratio = div64_u64(pos_ratio * (x_intercept - bdi_dirty), - x_intercept - bdi_setpoint + 1); + (x_intercept - bdi_setpoint) | 1); } else pos_ratio /= 4; |