From 7982e90c3a574c90b51aa1c77404e7e6189d58d5 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Sat, 8 Mar 2014 17:20:01 -0700 Subject: block: fix q->flush_rq NULL pointer crash on dm-mpath flush Commit 1874198 ("blk-mq: rework flush sequencing logic") switched ->flush_rq from being an embedded member of the request_queue structure to being dynamically allocated in blk_init_queue_node(). Request-based DM multipath doesn't use blk_init_queue_node(), instead it uses blk_alloc_queue_node() + blk_init_allocated_queue(). Because commit 1874198 placed the dynamic allocation of ->flush_rq in blk_init_queue_node() any flush issued to a dm-mpath device would crash with a NULL pointer, e.g.: BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] blk_rq_init+0x1e/0xb0 PGD bb3c7067 PUD bb01d067 PMD 0 Oops: 0002 [#1] SMP ... CPU: 5 PID: 5028 Comm: dt Tainted: G W O 3.14.0-rc3.snitm+ #10 ... task: ffff88032fb270e0 ti: ffff880079564000 task.ti: ffff880079564000 RIP: 0010:[] [] blk_rq_init+0x1e/0xb0 RSP: 0018:ffff880079565c98 EFLAGS: 00010046 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000030 RDX: ffff880260c74048 RSI: 0000000000000000 RDI: 0000000000000000 RBP: ffff880079565ca8 R08: ffff880260aa1e98 R09: 0000000000000001 R10: ffff88032fa78500 R11: 0000000000000246 R12: 0000000000000000 R13: ffff880260aa1de8 R14: 0000000000000650 R15: 0000000000000000 FS: 00007f8d36a2a700(0000) GS:ffff88033fca0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 0000000079b36000 CR4: 00000000000007e0 Stack: 0000000000000000 ffff880260c74048 ffff880079565cd8 ffffffff81257a47 ffff880260aa1de8 ffff880260c74048 0000000000000001 0000000000000000 ffff880079565d08 ffffffff81257c2d 0000000000000000 ffff880260aa1de8 Call Trace: [] blk_flush_complete_seq+0x2d7/0x2e0 [] blk_insert_flush+0x1dd/0x210 [] __elv_add_request+0x1f9/0x320 [] ? blk_account_io_start+0x111/0x190 [] blk_queue_bio+0x25b/0x330 [] dm_request+0x35/0x40 [dm_mod] [] generic_make_request+0xc0/0x100 [] submit_bio+0x73/0x140 [] submit_bio_wait+0x5d/0x80 [] blkdev_issue_flush+0x78/0xa0 [] blkdev_fsync+0x3f/0x60 [] vfs_fsync_range+0x1e/0x20 [] vfs_fsync+0x1c/0x20 [] do_fsync+0x41/0x80 [] ? SyS_lseek+0x7e/0x80 [] SyS_fsync+0x10/0x20 [] system_call_fastpath+0x16/0x1b Fix this by moving the ->flush_rq allocation from blk_init_queue_node() to blk_init_allocated_queue(). blk_init_queue_node() also calls blk_init_allocated_queue() so this change is functionality equivalent for all blk_init_queue_node() callers. Reported-by: Hannes Reinecke Reported-by: Christoph Hellwig Signed-off-by: Mike Snitzer Signed-off-by: Jens Axboe --- block/blk-core.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 853f92749202..4cd5ffc18442 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -693,20 +693,11 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) if (!uninit_q) return NULL; - uninit_q->flush_rq = kzalloc(sizeof(struct request), GFP_KERNEL); - if (!uninit_q->flush_rq) - goto out_cleanup_queue; - q = blk_init_allocated_queue(uninit_q, rfn, lock); if (!q) - goto out_free_flush_rq; - return q; + blk_cleanup_queue(uninit_q); -out_free_flush_rq: - kfree(uninit_q->flush_rq); -out_cleanup_queue: - blk_cleanup_queue(uninit_q); - return NULL; + return q; } EXPORT_SYMBOL(blk_init_queue_node); @@ -717,6 +708,10 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, if (!q) return NULL; + q->flush_rq = kzalloc(sizeof(struct request), GFP_KERNEL); + if (!q->flush_rq) + return NULL; + if (blk_init_rl(&q->root_rl, q, GFP_KERNEL)) return NULL; -- cgit v1.2.3 From 10beafc190abcc4ad64024a053441520ba116127 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Sat, 8 Mar 2014 20:19:20 -0700 Subject: block: change flush sequence list addition back to front add Commit 18741986 inadvertently changed the rq flush insertion from a head to a tail insertion. Fix that back up. Signed-off-by: Mike Snitzer Signed-off-by: Jens Axboe --- block/blk-flush.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/block/blk-flush.c b/block/blk-flush.c index f598f794c3c6..43e6b4755e9a 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -140,14 +140,17 @@ static void mq_flush_run(struct work_struct *work) blk_mq_insert_request(rq, false, true, false); } -static bool blk_flush_queue_rq(struct request *rq) +static bool blk_flush_queue_rq(struct request *rq, bool add_front) { if (rq->q->mq_ops) { INIT_WORK(&rq->mq_flush_work, mq_flush_run); kblockd_schedule_work(rq->q, &rq->mq_flush_work); return false; } else { - list_add_tail(&rq->queuelist, &rq->q->queue_head); + if (add_front) + list_add(&rq->queuelist, &rq->q->queue_head); + else + list_add_tail(&rq->queuelist, &rq->q->queue_head); return true; } } @@ -193,7 +196,7 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq, case REQ_FSEQ_DATA: list_move_tail(&rq->flush.list, &q->flush_data_in_flight); - queued = blk_flush_queue_rq(rq); + queued = blk_flush_queue_rq(rq, true); break; case REQ_FSEQ_DONE: @@ -326,7 +329,7 @@ static bool blk_kick_flush(struct request_queue *q) q->flush_rq->rq_disk = first_rq->rq_disk; q->flush_rq->end_io = flush_end_io; - return blk_flush_queue_rq(q->flush_rq); + return blk_flush_queue_rq(q->flush_rq, false); } static void flush_data_end_io(struct request *rq, int error) -- cgit v1.2.3 From 7f328908f9cb69a72ce1c9279508c786cf85f1f5 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 10 Mar 2014 14:29:37 -0600 Subject: mtip32xx: fix bad use of smp_processor_id() mtip_pci_probe() dumps the current CPU when loaded, but it does so in a preemptible context. Hence smp_processor_id() correctly warns: BUG: using smp_processor_id() in preemptible [00000000] code: systemd-udevd/155 caller is mtip_pci_probe+0x53/0x880 [mtip32xx] Switch to raw_smp_processor_id(), since it's just informational and persistent accuracy isn't important. Signed-off-by: Jens Axboe --- drivers/block/mtip32xx/mtip32xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 516026954be6..d777bb7cea93 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -4498,7 +4498,7 @@ static int mtip_pci_probe(struct pci_dev *pdev, } dev_info(&pdev->dev, "NUMA node %d (closest: %d,%d, probe on %d:%d)\n", my_node, pcibus_to_node(pdev->bus), dev_to_node(&pdev->dev), - cpu_to_node(smp_processor_id()), smp_processor_id()); + cpu_to_node(raw_smp_processor_id()), raw_smp_processor_id()); dd = kzalloc_node(sizeof(struct driver_data), GFP_KERNEL, my_node); if (dd == NULL) { -- cgit v1.2.3