From 4f024f3797c43cb4b73cd2c50cec728842d0e49e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 11 Oct 2013 15:44:27 -0700 Subject: block: Abstract out bvec iterator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Immutable biovecs are going to require an explicit iterator. To implement immutable bvecs, a later patch is going to add a bi_bvec_done member to this struct; for now, this patch effectively just renames things. Signed-off-by: Kent Overstreet Cc: Jens Axboe Cc: Geert Uytterhoeven Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: "Ed L. Cashin" Cc: Nick Piggin Cc: Lars Ellenberg Cc: Jiri Kosina Cc: Matthew Wilcox Cc: Geoff Levand Cc: Yehuda Sadeh Cc: Sage Weil Cc: Alex Elder Cc: ceph-devel@vger.kernel.org Cc: Joshua Morris Cc: Philip Kelleher Cc: Rusty Russell Cc: "Michael S. Tsirkin" Cc: Konrad Rzeszutek Wilk Cc: Jeremy Fitzhardinge Cc: Neil Brown Cc: Alasdair Kergon Cc: Mike Snitzer Cc: dm-devel@redhat.com Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: linux390@de.ibm.com Cc: Boaz Harrosh Cc: Benny Halevy Cc: "James E.J. Bottomley" Cc: Greg Kroah-Hartman Cc: "Nicholas A. Bellinger" Cc: Alexander Viro Cc: Chris Mason Cc: "Theodore Ts'o" Cc: Andreas Dilger Cc: Jaegeuk Kim Cc: Steven Whitehouse Cc: Dave Kleikamp Cc: Joern Engel Cc: Prasad Joshi Cc: Trond Myklebust Cc: KONISHI Ryusuke Cc: Mark Fasheh Cc: Joel Becker Cc: Ben Myers Cc: xfs@oss.sgi.com Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Len Brown Cc: Pavel Machek Cc: "Rafael J. Wysocki" Cc: Herton Ronaldo Krzesinski Cc: Ben Hutchings Cc: Andrew Morton Cc: Guo Chao Cc: Tejun Heo Cc: Asai Thambi S P Cc: Selvan Mani Cc: Sam Bradshaw Cc: Wei Yongjun Cc: "Roger Pau MonnĂ©" Cc: Jan Beulich Cc: Stefano Stabellini Cc: Ian Campbell Cc: Sebastian Ott Cc: Christian Borntraeger Cc: Minchan Kim Cc: Jiang Liu Cc: Nitin Gupta Cc: Jerome Marchand Cc: Joe Perches Cc: Peng Tao Cc: Andy Adamson Cc: fanchaoting Cc: Jie Liu Cc: Sunil Mushran Cc: "Martin K. Petersen" Cc: Namjae Jeon Cc: Pankaj Kumar Cc: Dan Magenheimer Cc: Mel Gorman 6 --- include/linux/bio.h | 16 ++++++++-------- include/linux/blk_types.h | 19 ++++++++++++------- include/trace/events/bcache.h | 26 +++++++++++++------------- include/trace/events/block.h | 26 +++++++++++++------------- include/trace/events/f2fs.h | 4 ++-- 5 files changed, 48 insertions(+), 43 deletions(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index 060ff695085c..e2e0bc642ed1 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -62,19 +62,19 @@ * on highmem page vectors */ #define bio_iovec_idx(bio, idx) (&((bio)->bi_io_vec[(idx)])) -#define bio_iovec(bio) bio_iovec_idx((bio), (bio)->bi_idx) +#define bio_iovec(bio) bio_iovec_idx((bio), (bio)->bi_iter.bi_idx) #define bio_page(bio) bio_iovec((bio))->bv_page #define bio_offset(bio) bio_iovec((bio))->bv_offset -#define bio_segments(bio) ((bio)->bi_vcnt - (bio)->bi_idx) -#define bio_sectors(bio) ((bio)->bi_size >> 9) -#define bio_end_sector(bio) ((bio)->bi_sector + bio_sectors((bio))) +#define bio_segments(bio) ((bio)->bi_vcnt - (bio)->bi_iter.bi_idx) +#define bio_sectors(bio) ((bio)->bi_iter.bi_size >> 9) +#define bio_end_sector(bio) ((bio)->bi_iter.bi_sector + bio_sectors((bio))) static inline unsigned int bio_cur_bytes(struct bio *bio) { if (bio->bi_vcnt) return bio_iovec(bio)->bv_len; else /* dataless requests such as discard */ - return bio->bi_size; + return bio->bi_iter.bi_size; } static inline void *bio_data(struct bio *bio) @@ -108,7 +108,7 @@ static inline void *bio_data(struct bio *bio) */ #define __BVEC_END(bio) bio_iovec_idx((bio), (bio)->bi_vcnt - 1) -#define __BVEC_START(bio) bio_iovec_idx((bio), (bio)->bi_idx) +#define __BVEC_START(bio) bio_iovec_idx((bio), (bio)->bi_iter.bi_idx) /* Default implementation of BIOVEC_PHYS_MERGEABLE */ #define __BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ @@ -150,7 +150,7 @@ static inline void *bio_data(struct bio *bio) i++) #define bio_for_each_segment(bvl, bio, i) \ - for (i = (bio)->bi_idx; \ + for (i = (bio)->bi_iter.bi_idx; \ bvl = bio_iovec_idx((bio), (i)), i < (bio)->bi_vcnt; \ i++) @@ -365,7 +365,7 @@ static inline char *__bio_kmap_irq(struct bio *bio, unsigned short idx, #define __bio_kunmap_irq(buf, flags) bvec_kunmap_irq(buf, flags) #define bio_kmap_irq(bio, flags) \ - __bio_kmap_irq((bio), (bio)->bi_idx, (flags)) + __bio_kmap_irq((bio), (bio)->bi_iter.bi_idx, (flags)) #define bio_kunmap_irq(buf,flags) __bio_kunmap_irq(buf, flags) /* diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 238ef0ed62f8..29b5b84d8a29 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -28,13 +28,19 @@ struct bio_vec { unsigned int bv_offset; }; +struct bvec_iter { + sector_t bi_sector; /* device address in 512 byte + sectors */ + unsigned int bi_size; /* residual I/O count */ + + unsigned int bi_idx; /* current index into bvl_vec */ +}; + /* * main unit of I/O for the block layer and lower layers (ie drivers and * stacking drivers) */ struct bio { - sector_t bi_sector; /* device address in 512 byte - sectors */ struct bio *bi_next; /* request queue link */ struct block_device *bi_bdev; unsigned long bi_flags; /* status, command, etc */ @@ -42,16 +48,13 @@ struct bio { * top bits priority */ - unsigned short bi_vcnt; /* how many bio_vec's */ - unsigned short bi_idx; /* current index into bvl_vec */ + struct bvec_iter bi_iter; /* Number of segments in this BIO after * physical address coalescing is performed. */ unsigned int bi_phys_segments; - unsigned int bi_size; /* residual I/O count */ - /* * To keep track of the max segment size, we account for the * sizes of the first and last mergeable segments in this bio. @@ -74,11 +77,13 @@ struct bio { struct bio_integrity_payload *bi_integrity; /* data integrity */ #endif + unsigned short bi_vcnt; /* how many bio_vec's */ + /* * Everything starting with bi_max_vecs will be preserved by bio_reset() */ - unsigned int bi_max_vecs; /* max bvl_vecs we can hold */ + unsigned short bi_max_vecs; /* max bvl_vecs we can hold */ atomic_t bi_cnt; /* pin count */ diff --git a/include/trace/events/bcache.h b/include/trace/events/bcache.h index e2b9576d00e2..095c6e4fe1e8 100644 --- a/include/trace/events/bcache.h +++ b/include/trace/events/bcache.h @@ -24,10 +24,10 @@ DECLARE_EVENT_CLASS(bcache_request, __entry->dev = bio->bi_bdev->bd_dev; __entry->orig_major = d->disk->major; __entry->orig_minor = d->disk->first_minor; - __entry->sector = bio->bi_sector; - __entry->orig_sector = bio->bi_sector - 16; - __entry->nr_sector = bio->bi_size >> 9; - blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + __entry->sector = bio->bi_iter.bi_sector; + __entry->orig_sector = bio->bi_iter.bi_sector - 16; + __entry->nr_sector = bio->bi_iter.bi_size >> 9; + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_iter.bi_size); ), TP_printk("%d,%d %s %llu + %u (from %d,%d @ %llu)", @@ -99,9 +99,9 @@ DECLARE_EVENT_CLASS(bcache_bio, TP_fast_assign( __entry->dev = bio->bi_bdev->bd_dev; - __entry->sector = bio->bi_sector; - __entry->nr_sector = bio->bi_size >> 9; - blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + __entry->sector = bio->bi_iter.bi_sector; + __entry->nr_sector = bio->bi_iter.bi_size >> 9; + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_iter.bi_size); ), TP_printk("%d,%d %s %llu + %u", @@ -134,9 +134,9 @@ TRACE_EVENT(bcache_read, TP_fast_assign( __entry->dev = bio->bi_bdev->bd_dev; - __entry->sector = bio->bi_sector; - __entry->nr_sector = bio->bi_size >> 9; - blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + __entry->sector = bio->bi_iter.bi_sector; + __entry->nr_sector = bio->bi_iter.bi_size >> 9; + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_iter.bi_size); __entry->cache_hit = hit; __entry->bypass = bypass; ), @@ -162,9 +162,9 @@ TRACE_EVENT(bcache_write, TP_fast_assign( __entry->dev = bio->bi_bdev->bd_dev; - __entry->sector = bio->bi_sector; - __entry->nr_sector = bio->bi_size >> 9; - blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + __entry->sector = bio->bi_iter.bi_sector; + __entry->nr_sector = bio->bi_iter.bi_size >> 9; + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_iter.bi_size); __entry->writeback = writeback; __entry->bypass = bypass; ), diff --git a/include/trace/events/block.h b/include/trace/events/block.h index 4c2301d2ef1a..e76ae19a8d6f 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -243,9 +243,9 @@ TRACE_EVENT(block_bio_bounce, TP_fast_assign( __entry->dev = bio->bi_bdev ? bio->bi_bdev->bd_dev : 0; - __entry->sector = bio->bi_sector; + __entry->sector = bio->bi_iter.bi_sector; __entry->nr_sector = bio_sectors(bio); - blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_iter.bi_size); memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), @@ -280,10 +280,10 @@ TRACE_EVENT(block_bio_complete, TP_fast_assign( __entry->dev = bio->bi_bdev->bd_dev; - __entry->sector = bio->bi_sector; + __entry->sector = bio->bi_iter.bi_sector; __entry->nr_sector = bio_sectors(bio); __entry->error = error; - blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_iter.bi_size); ), TP_printk("%d,%d %s %llu + %u [%d]", @@ -308,9 +308,9 @@ DECLARE_EVENT_CLASS(block_bio_merge, TP_fast_assign( __entry->dev = bio->bi_bdev->bd_dev; - __entry->sector = bio->bi_sector; + __entry->sector = bio->bi_iter.bi_sector; __entry->nr_sector = bio_sectors(bio); - blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_iter.bi_size); memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), @@ -375,9 +375,9 @@ TRACE_EVENT(block_bio_queue, TP_fast_assign( __entry->dev = bio->bi_bdev->bd_dev; - __entry->sector = bio->bi_sector; + __entry->sector = bio->bi_iter.bi_sector; __entry->nr_sector = bio_sectors(bio); - blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_iter.bi_size); memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), @@ -403,7 +403,7 @@ DECLARE_EVENT_CLASS(block_get_rq, TP_fast_assign( __entry->dev = bio ? bio->bi_bdev->bd_dev : 0; - __entry->sector = bio ? bio->bi_sector : 0; + __entry->sector = bio ? bio->bi_iter.bi_sector : 0; __entry->nr_sector = bio ? bio_sectors(bio) : 0; blk_fill_rwbs(__entry->rwbs, bio ? bio->bi_rw : 0, __entry->nr_sector); @@ -538,9 +538,9 @@ TRACE_EVENT(block_split, TP_fast_assign( __entry->dev = bio->bi_bdev->bd_dev; - __entry->sector = bio->bi_sector; + __entry->sector = bio->bi_iter.bi_sector; __entry->new_sector = new_sector; - blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_iter.bi_size); memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), @@ -579,11 +579,11 @@ TRACE_EVENT(block_bio_remap, TP_fast_assign( __entry->dev = bio->bi_bdev->bd_dev; - __entry->sector = bio->bi_sector; + __entry->sector = bio->bi_iter.bi_sector; __entry->nr_sector = bio_sectors(bio); __entry->old_dev = dev; __entry->old_sector = from; - blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_iter.bi_size); ), TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu", diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index e0dc355fa317..bd3ee4fbe7a7 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -616,8 +616,8 @@ TRACE_EVENT(f2fs_do_submit_bio, __entry->dev = sb->s_dev; __entry->btype = btype; __entry->sync = sync; - __entry->sector = bio->bi_sector; - __entry->size = bio->bi_size; + __entry->sector = bio->bi_iter.bi_sector; + __entry->size = bio->bi_iter.bi_size; ), TP_printk("dev = (%d,%d), type = %s, io = %s, sector = %lld, size = %u", -- cgit v1.2.3 From a4ad39b1d10584dfcfcfb0d510faab2c7f034399 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 7 Aug 2013 14:24:32 -0700 Subject: block: Convert bio_iovec() to bvec_iter For immutable biovecs, we'll be introducing a new bio_iovec() that uses our new bvec iterator to construct a biovec, taking into account bvec_iter->bi_bvec_done - this patch updates existing users for the new usage. Some of the existing users really do need a pointer into the bvec array - those uses are all going to be removed, but we'll need the functionality from immutable to remove them - so for now rename the existing bio_iovec() -> __bio_iovec(), and it'll be removed in a couple patches. Signed-off-by: Kent Overstreet Cc: Jens Axboe Cc: "Ed L. Cashin" Cc: Alasdair Kergon Cc: dm-devel@redhat.com Cc: "James E.J. Bottomley" --- drivers/block/aoe/aoecmd.c | 2 +- drivers/md/bcache/io.c | 13 +++++++------ drivers/md/dm-verity.c | 2 +- drivers/scsi/sd.c | 2 +- fs/bio.c | 20 ++++++++++---------- include/linux/bio.h | 10 ++++++---- 6 files changed, 26 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 877ba119b3f8..77c24ab1898a 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -932,7 +932,7 @@ bufinit(struct buf *buf, struct request *rq, struct bio *bio) buf->resid = bio->bi_iter.bi_size; buf->sector = bio->bi_iter.bi_sector; bio_pageinc(bio); - buf->bv = bio_iovec(bio); + buf->bv = __bio_iovec(bio); buf->bv_resid = buf->bv->bv_len; WARN_ON(buf->bv_resid == 0); } diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c index cc4ba2da5fb6..dc44f0689eb7 100644 --- a/drivers/md/bcache/io.c +++ b/drivers/md/bcache/io.c @@ -22,11 +22,12 @@ static void bch_bi_idx_hack_endio(struct bio *bio, int error) static void bch_generic_make_request_hack(struct bio *bio) { if (bio->bi_iter.bi_idx) { + int i; + struct bio_vec *bv; struct bio *clone = bio_alloc(GFP_NOIO, bio_segments(bio)); - memcpy(clone->bi_io_vec, - bio_iovec(bio), - bio_segments(bio) * sizeof(struct bio_vec)); + bio_for_each_segment(bv, bio, i) + clone->bi_io_vec[clone->bi_vcnt++] = *bv; clone->bi_iter.bi_sector = bio->bi_iter.bi_sector; clone->bi_bdev = bio->bi_bdev; @@ -97,7 +98,7 @@ struct bio *bch_bio_split(struct bio *bio, int sectors, if (!ret) return NULL; - memcpy(ret->bi_io_vec, bio_iovec(bio), + memcpy(ret->bi_io_vec, __bio_iovec(bio), sizeof(struct bio_vec) * vcnt); break; @@ -106,7 +107,7 @@ struct bio *bch_bio_split(struct bio *bio, int sectors, if (!ret) return NULL; - memcpy(ret->bi_io_vec, bio_iovec(bio), + memcpy(ret->bi_io_vec, __bio_iovec(bio), sizeof(struct bio_vec) * vcnt); ret->bi_io_vec[vcnt - 1].bv_len = nbytes; @@ -182,7 +183,7 @@ static unsigned bch_bio_max_sectors(struct bio *bio) ret = min(ret, queue_max_sectors(q)); WARN_ON(!ret); - ret = max_t(int, ret, bio_iovec(bio)->bv_len >> 9); + ret = max_t(int, ret, bio_iovec(bio).bv_len >> 9); return ret; } diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c index 132b3154d466..5392135924ca 100644 --- a/drivers/md/dm-verity.c +++ b/drivers/md/dm-verity.c @@ -524,7 +524,7 @@ static int verity_map(struct dm_target *ti, struct bio *bio) io->io_vec = io->io_vec_inline; else io->io_vec = mempool_alloc(v->vec_mempool, GFP_NOIO); - memcpy(io->io_vec, bio_iovec(bio), + memcpy(io->io_vec, __bio_iovec(bio), io->io_vec_size * sizeof(struct bio_vec)); verity_submit_prefetch(v, io); diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index e6c4bff04339..200d6bc81240 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -801,7 +801,7 @@ static int sd_setup_write_same_cmnd(struct scsi_device *sdp, struct request *rq) if (sdkp->device->no_write_same) return BLKPREP_KILL; - BUG_ON(bio_offset(bio) || bio_iovec(bio)->bv_len != sdp->sector_size); + BUG_ON(bio_offset(bio) || bio_iovec(bio).bv_len != sdp->sector_size); sector >>= ilog2(sdp->sector_size) - 9; nr_sectors >>= ilog2(sdp->sector_size) - 9; diff --git a/fs/bio.c b/fs/bio.c index a402ad6e753f..7bb281fc3d5c 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -821,12 +821,12 @@ void bio_advance(struct bio *bio, unsigned bytes) break; } - if (bytes >= bio_iovec(bio)->bv_len) { - bytes -= bio_iovec(bio)->bv_len; + if (bytes >= bio_iovec(bio).bv_len) { + bytes -= bio_iovec(bio).bv_len; bio->bi_iter.bi_idx++; } else { - bio_iovec(bio)->bv_len -= bytes; - bio_iovec(bio)->bv_offset += bytes; + bio_iovec(bio).bv_len -= bytes; + bio_iovec(bio).bv_offset += bytes; bytes = 0; } } @@ -879,8 +879,8 @@ void bio_copy_data(struct bio *dst, struct bio *src) unsigned src_offset, dst_offset, bytes; void *src_p, *dst_p; - src_bv = bio_iovec(src); - dst_bv = bio_iovec(dst); + src_bv = __bio_iovec(src); + dst_bv = __bio_iovec(dst); src_offset = src_bv->bv_offset; dst_offset = dst_bv->bv_offset; @@ -893,7 +893,7 @@ void bio_copy_data(struct bio *dst, struct bio *src) if (!src) break; - src_bv = bio_iovec(src); + src_bv = __bio_iovec(src); } src_offset = src_bv->bv_offset; @@ -906,7 +906,7 @@ void bio_copy_data(struct bio *dst, struct bio *src) if (!dst) break; - dst_bv = bio_iovec(dst); + dst_bv = __bio_iovec(dst); } dst_offset = dst_bv->bv_offset; @@ -1776,8 +1776,8 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors) bp->bio1.bi_iter.bi_size = first_sectors << 9; if (bi->bi_vcnt != 0) { - bp->bv1 = *bio_iovec(bi); - bp->bv2 = *bio_iovec(bi); + bp->bv1 = bio_iovec(bi); + bp->bv2 = bio_iovec(bi); if (bio_is_rw(bi)) { bp->bv2.bv_offset += first_sectors << 9; diff --git a/include/linux/bio.h b/include/linux/bio.h index e2e0bc642ed1..9f182fcbe714 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -62,9 +62,11 @@ * on highmem page vectors */ #define bio_iovec_idx(bio, idx) (&((bio)->bi_io_vec[(idx)])) -#define bio_iovec(bio) bio_iovec_idx((bio), (bio)->bi_iter.bi_idx) -#define bio_page(bio) bio_iovec((bio))->bv_page -#define bio_offset(bio) bio_iovec((bio))->bv_offset +#define __bio_iovec(bio) bio_iovec_idx((bio), (bio)->bi_iter.bi_idx) +#define bio_iovec(bio) (*__bio_iovec(bio)) + +#define bio_page(bio) (bio_iovec((bio)).bv_page) +#define bio_offset(bio) (bio_iovec((bio)).bv_offset) #define bio_segments(bio) ((bio)->bi_vcnt - (bio)->bi_iter.bi_idx) #define bio_sectors(bio) ((bio)->bi_iter.bi_size >> 9) #define bio_end_sector(bio) ((bio)->bi_iter.bi_sector + bio_sectors((bio))) @@ -72,7 +74,7 @@ static inline unsigned int bio_cur_bytes(struct bio *bio) { if (bio->bi_vcnt) - return bio_iovec(bio)->bv_len; + return bio_iovec(bio).bv_len; else /* dataless requests such as discard */ return bio->bi_iter.bi_size; } -- cgit v1.2.3 From 7988613b0e5b2638caf6cd493cc78e9595eba19c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 23 Nov 2013 17:19:00 -0800 Subject: block: Convert bio_for_each_segment() to bvec_iter More prep work for immutable biovecs - with immutable bvecs drivers won't be able to use the biovec directly, they'll need to use helpers that take into account bio->bi_iter.bi_bvec_done. This updates callers for the new usage without changing the implementation yet. Signed-off-by: Kent Overstreet Cc: Jens Axboe Cc: Geert Uytterhoeven Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: "Ed L. Cashin" Cc: Nick Piggin Cc: Lars Ellenberg Cc: Jiri Kosina Cc: Paul Clements Cc: Jim Paris Cc: Geoff Levand Cc: Yehuda Sadeh Cc: Sage Weil Cc: Alex Elder Cc: ceph-devel@vger.kernel.org Cc: Joshua Morris Cc: Philip Kelleher Cc: Konrad Rzeszutek Wilk Cc: Jeremy Fitzhardinge Cc: Neil Brown Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: linux390@de.ibm.com Cc: Nagalakshmi Nandigama Cc: Sreekanth Reddy Cc: support@lsi.com Cc: "James E.J. Bottomley" Cc: Greg Kroah-Hartman Cc: Alexander Viro Cc: Steven Whitehouse Cc: Herton Ronaldo Krzesinski Cc: Tejun Heo Cc: Andrew Morton Cc: Guo Chao Cc: Asai Thambi S P Cc: Selvan Mani Cc: Sam Bradshaw Cc: Matthew Wilcox Cc: Keith Busch Cc: Stephen Hemminger Cc: Quoc-Son Anh Cc: Sebastian Ott Cc: Nitin Gupta Cc: Minchan Kim Cc: Jerome Marchand Cc: Seth Jennings Cc: "Martin K. Petersen" Cc: Mike Snitzer Cc: Vivek Goyal Cc: "Darrick J. Wong" Cc: Chris Metcalf Cc: Jan Kara Cc: linux-m68k@lists.linux-m68k.org Cc: linuxppc-dev@lists.ozlabs.org Cc: drbd-user@lists.linbit.com Cc: nbd-general@lists.sourceforge.net Cc: cbe-oss-dev@lists.ozlabs.org Cc: xen-devel@lists.xensource.com Cc: virtualization@lists.linux-foundation.org Cc: linux-raid@vger.kernel.org Cc: linux-s390@vger.kernel.org Cc: DL-MPTFusionLinux@lsi.com Cc: linux-scsi@vger.kernel.org Cc: devel@driverdev.osuosl.org Cc: linux-fsdevel@vger.kernel.org Cc: cluster-devel@redhat.com Cc: linux-mm@kvack.org Acked-by: Geoff Levand --- arch/m68k/emu/nfblock.c | 11 ++--- arch/powerpc/sysdev/axonram.c | 18 ++++---- block/blk-core.c | 4 +- block/blk-merge.c | 49 ++++++++++---------- drivers/block/aoe/aoecmd.c | 16 +++---- drivers/block/brd.c | 12 ++--- drivers/block/drbd/drbd_main.c | 27 ++++++----- drivers/block/drbd/drbd_receiver.c | 13 +++--- drivers/block/drbd/drbd_worker.c | 8 ++-- drivers/block/floppy.c | 12 ++--- drivers/block/loop.c | 23 +++++----- drivers/block/mtip32xx/mtip32xx.c | 13 +++--- drivers/block/nbd.c | 12 ++--- drivers/block/nvme-core.c | 33 ++++++++------ drivers/block/ps3disk.c | 10 ++--- drivers/block/ps3vram.c | 10 ++--- drivers/block/rbd.c | 38 ++++++++-------- drivers/block/rsxx/dma.c | 11 ++--- drivers/md/bcache/btree.c | 4 +- drivers/md/bcache/debug.c | 19 ++++---- drivers/md/bcache/io.c | 69 ++++++++++++----------------- drivers/md/bcache/request.c | 26 +++++------ drivers/md/raid5.c | 12 ++--- drivers/s390/block/dasd_diag.c | 10 ++--- drivers/s390/block/dasd_eckd.c | 48 ++++++++++---------- drivers/s390/block/dasd_fba.c | 26 +++++------ drivers/s390/block/dcssblk.c | 16 +++---- drivers/s390/block/scm_blk.c | 8 ++-- drivers/s390/block/scm_blk_cluster.c | 4 +- drivers/s390/block/xpram.c | 10 ++--- drivers/scsi/mpt2sas/mpt2sas_transport.c | 31 ++++++------- drivers/scsi/mpt3sas/mpt3sas_transport.c | 31 ++++++------- drivers/staging/lustre/lustre/llite/lloop.c | 14 +++--- drivers/staging/zram/zram_drv.c | 19 ++++---- fs/bio-integrity.c | 30 +++++++------ fs/bio.c | 22 ++++----- include/linux/bio.h | 28 ++++++------ include/linux/blkdev.h | 7 +-- mm/bounce.c | 44 +++++++++--------- 39 files changed, 401 insertions(+), 397 deletions(-) (limited to 'include') diff --git a/arch/m68k/emu/nfblock.c b/arch/m68k/emu/nfblock.c index 0a9d0b3c794b..2d75ae246167 100644 --- a/arch/m68k/emu/nfblock.c +++ b/arch/m68k/emu/nfblock.c @@ -62,17 +62,18 @@ struct nfhd_device { static void nfhd_make_request(struct request_queue *queue, struct bio *bio) { struct nfhd_device *dev = queue->queuedata; - struct bio_vec *bvec; - int i, dir, len, shift; + struct bio_vec bvec; + struct bvec_iter iter; + int dir, len, shift; sector_t sec = bio->bi_iter.bi_sector; dir = bio_data_dir(bio); shift = dev->bshift; - bio_for_each_segment(bvec, bio, i) { - len = bvec->bv_len; + bio_for_each_segment(bvec, bio, iter) { + len = bvec.bv_len; len >>= 9; nfhd_read_write(dev->id, 0, dir, sec >> shift, len >> shift, - bvec_to_phys(bvec)); + bvec_to_phys(&bvec)); sec += len; } bio_endio(bio, 0); diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c index f33bcbaa6a07..47b6b9f81d43 100644 --- a/arch/powerpc/sysdev/axonram.c +++ b/arch/powerpc/sysdev/axonram.c @@ -109,28 +109,28 @@ axon_ram_make_request(struct request_queue *queue, struct bio *bio) struct axon_ram_bank *bank = bio->bi_bdev->bd_disk->private_data; unsigned long phys_mem, phys_end; void *user_mem; - struct bio_vec *vec; + struct bio_vec vec; unsigned int transfered; - unsigned short idx; + struct bvec_iter iter; phys_mem = bank->io_addr + (bio->bi_iter.bi_sector << AXON_RAM_SECTOR_SHIFT); phys_end = bank->io_addr + bank->size; transfered = 0; - bio_for_each_segment(vec, bio, idx) { - if (unlikely(phys_mem + vec->bv_len > phys_end)) { + bio_for_each_segment(vec, bio, iter) { + if (unlikely(phys_mem + vec.bv_len > phys_end)) { bio_io_error(bio); return; } - user_mem = page_address(vec->bv_page) + vec->bv_offset; + user_mem = page_address(vec.bv_page) + vec.bv_offset; if (bio_data_dir(bio) == READ) - memcpy(user_mem, (void *) phys_mem, vec->bv_len); + memcpy(user_mem, (void *) phys_mem, vec.bv_len); else - memcpy((void *) phys_mem, user_mem, vec->bv_len); + memcpy((void *) phys_mem, user_mem, vec.bv_len); - phys_mem += vec->bv_len; - transfered += vec->bv_len; + phys_mem += vec.bv_len; + transfered += vec.bv_len; } bio_endio(bio, 0); } diff --git a/block/blk-core.c b/block/blk-core.c index 5c2ab2c74066..5da8e900d3b1 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2746,10 +2746,10 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq, void rq_flush_dcache_pages(struct request *rq) { struct req_iterator iter; - struct bio_vec *bvec; + struct bio_vec bvec; rq_for_each_segment(bvec, rq, iter) - flush_dcache_page(bvec->bv_page); + flush_dcache_page(bvec.bv_page); } EXPORT_SYMBOL_GPL(rq_flush_dcache_pages); #endif diff --git a/block/blk-merge.c b/block/blk-merge.c index 03bc083c28cf..a1ead9049ed6 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -12,10 +12,11 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q, struct bio *bio) { - struct bio_vec *bv, *bvprv = NULL; - int cluster, i, high, highprv = 1; + struct bio_vec bv, bvprv = { NULL }; + int cluster, high, highprv = 1; unsigned int seg_size, nr_phys_segs; struct bio *fbio, *bbio; + struct bvec_iter iter; if (!bio) return 0; @@ -25,25 +26,23 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q, seg_size = 0; nr_phys_segs = 0; for_each_bio(bio) { - bio_for_each_segment(bv, bio, i) { + bio_for_each_segment(bv, bio, iter) { /* * the trick here is making sure that a high page is * never considered part of another segment, since that * might change with the bounce page. */ - high = page_to_pfn(bv->bv_page) > queue_bounce_pfn(q); - if (high || highprv) - goto new_segment; - if (cluster) { - if (seg_size + bv->bv_len + high = page_to_pfn(bv.bv_page) > queue_bounce_pfn(q); + if (!high && !highprv && cluster) { + if (seg_size + bv.bv_len > queue_max_segment_size(q)) goto new_segment; - if (!BIOVEC_PHYS_MERGEABLE(bvprv, bv)) + if (!BIOVEC_PHYS_MERGEABLE(&bvprv, &bv)) goto new_segment; - if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv)) + if (!BIOVEC_SEG_BOUNDARY(q, &bvprv, &bv)) goto new_segment; - seg_size += bv->bv_len; + seg_size += bv.bv_len; bvprv = bv; continue; } @@ -54,7 +53,7 @@ new_segment: nr_phys_segs++; bvprv = bv; - seg_size = bv->bv_len; + seg_size = bv.bv_len; highprv = high; } bbio = bio; @@ -110,21 +109,21 @@ static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio, return 0; } -static void +static inline void __blk_segment_map_sg(struct request_queue *q, struct bio_vec *bvec, - struct scatterlist *sglist, struct bio_vec **bvprv, + struct scatterlist *sglist, struct bio_vec *bvprv, struct scatterlist **sg, int *nsegs, int *cluster) { int nbytes = bvec->bv_len; - if (*bvprv && *cluster) { + if (*sg && *cluster) { if ((*sg)->length + nbytes > queue_max_segment_size(q)) goto new_segment; - if (!BIOVEC_PHYS_MERGEABLE(*bvprv, bvec)) + if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec)) goto new_segment; - if (!BIOVEC_SEG_BOUNDARY(q, *bvprv, bvec)) + if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec)) goto new_segment; (*sg)->length += nbytes; @@ -150,7 +149,7 @@ new_segment: sg_set_page(*sg, bvec->bv_page, nbytes, bvec->bv_offset); (*nsegs)++; } - *bvprv = bvec; + *bvprv = *bvec; } /* @@ -160,7 +159,7 @@ new_segment: int blk_rq_map_sg(struct request_queue *q, struct request *rq, struct scatterlist *sglist) { - struct bio_vec *bvec, *bvprv; + struct bio_vec bvec, bvprv; struct req_iterator iter; struct scatterlist *sg; int nsegs, cluster; @@ -171,10 +170,9 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq, /* * for each bio in rq */ - bvprv = NULL; sg = NULL; rq_for_each_segment(bvec, rq, iter) { - __blk_segment_map_sg(q, bvec, sglist, &bvprv, &sg, + __blk_segment_map_sg(q, &bvec, sglist, &bvprv, &sg, &nsegs, &cluster); } /* segments in rq */ @@ -223,18 +221,17 @@ EXPORT_SYMBOL(blk_rq_map_sg); int blk_bio_map_sg(struct request_queue *q, struct bio *bio, struct scatterlist *sglist) { - struct bio_vec *bvec, *bvprv; + struct bio_vec bvec, bvprv; struct scatterlist *sg; int nsegs, cluster; - unsigned long i; + struct bvec_iter iter; nsegs = 0; cluster = blk_queue_cluster(q); - bvprv = NULL; sg = NULL; - bio_for_each_segment(bvec, bio, i) { - __blk_segment_map_sg(q, bvec, sglist, &bvprv, &sg, + bio_for_each_segment(bvec, bio, iter) { + __blk_segment_map_sg(q, &bvec, sglist, &bvprv, &sg, &nsegs, &cluster); } /* segments in bio */ diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 77c24ab1898a..7a06aec1dedc 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -897,15 +897,15 @@ rqbiocnt(struct request *r) static void bio_pageinc(struct bio *bio) { - struct bio_vec *bv; + struct bio_vec bv; struct page *page; - int i; + struct bvec_iter iter; - bio_for_each_segment(bv, bio, i) { + bio_for_each_segment(bv, bio, iter) { /* Non-zero page count for non-head members of * compound pages is no longer allowed by the kernel. */ - page = compound_trans_head(bv->bv_page); + page = compound_trans_head(bv.bv_page); atomic_inc(&page->_count); } } @@ -913,12 +913,12 @@ bio_pageinc(struct bio *bio) static void bio_pagedec(struct bio *bio) { - struct bio_vec *bv; struct page *page; - int i; + struct bio_vec bv; + struct bvec_iter iter; - bio_for_each_segment(bv, bio, i) { - page = compound_trans_head(bv->bv_page); + bio_for_each_segment(bv, bio, iter) { + page = compound_trans_head(bv.bv_page); atomic_dec(&page->_count); } } diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 66f5aaae15a2..e73b85cf0756 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -328,9 +328,9 @@ static void brd_make_request(struct request_queue *q, struct bio *bio) struct block_device *bdev = bio->bi_bdev; struct brd_device *brd = bdev->bd_disk->private_data; int rw; - struct bio_vec *bvec; + struct bio_vec bvec; sector_t sector; - int i; + struct bvec_iter iter; int err = -EIO; sector = bio->bi_iter.bi_sector; @@ -347,10 +347,10 @@ static void brd_make_request(struct request_queue *q, struct bio *bio) if (rw == READA) rw = READ; - bio_for_each_segment(bvec, bio, i) { - unsigned int len = bvec->bv_len; - err = brd_do_bvec(brd, bvec->bv_page, len, - bvec->bv_offset, rw, sector); + bio_for_each_segment(bvec, bio, iter) { + unsigned int len = bvec.bv_len; + err = brd_do_bvec(brd, bvec.bv_page, len, + bvec.bv_offset, rw, sector); if (err) break; sector += len >> SECTOR_SHIFT; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 9e3818b1bc83..f4e5440aba05 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1537,15 +1537,17 @@ static int _drbd_send_page(struct drbd_conf *mdev, struct page *page, static int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio) { - struct bio_vec *bvec; - int i; + struct bio_vec bvec; + struct bvec_iter iter; + /* hint all but last page with MSG_MORE */ - bio_for_each_segment(bvec, bio, i) { + bio_for_each_segment(bvec, bio, iter) { int err; - err = _drbd_no_send_page(mdev, bvec->bv_page, - bvec->bv_offset, bvec->bv_len, - i == bio->bi_vcnt - 1 ? 0 : MSG_MORE); + err = _drbd_no_send_page(mdev, bvec.bv_page, + bvec.bv_offset, bvec.bv_len, + bio_iter_last(bio, iter) + ? 0 : MSG_MORE); if (err) return err; } @@ -1554,15 +1556,16 @@ static int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio) static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio) { - struct bio_vec *bvec; - int i; + struct bio_vec bvec; + struct bvec_iter iter; + /* hint all but last page with MSG_MORE */ - bio_for_each_segment(bvec, bio, i) { + bio_for_each_segment(bvec, bio, iter) { int err; - err = _drbd_send_page(mdev, bvec->bv_page, - bvec->bv_offset, bvec->bv_len, - i == bio->bi_vcnt - 1 ? 0 : MSG_MORE); + err = _drbd_send_page(mdev, bvec.bv_page, + bvec.bv_offset, bvec.bv_len, + bio_iter_last(bio, iter) ? 0 : MSG_MORE); if (err) return err; } diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 5326c22cdb9d..d073305ffd5e 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1595,9 +1595,10 @@ static int drbd_drain_block(struct drbd_conf *mdev, int data_size) static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, sector_t sector, int data_size) { - struct bio_vec *bvec; + struct bio_vec bvec; + struct bvec_iter iter; struct bio *bio; - int dgs, err, i, expect; + int dgs, err, expect; void *dig_in = mdev->tconn->int_dig_in; void *dig_vv = mdev->tconn->int_dig_vv; @@ -1617,11 +1618,11 @@ static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, bio = req->master_bio; D_ASSERT(sector == bio->bi_iter.bi_sector); - bio_for_each_segment(bvec, bio, i) { - void *mapped = kmap(bvec->bv_page) + bvec->bv_offset; - expect = min_t(int, data_size, bvec->bv_len); + bio_for_each_segment(bvec, bio, iter) { + void *mapped = kmap(bvec.bv_page) + bvec.bv_offset; + expect = min_t(int, data_size, bvec.bv_len); err = drbd_recv_all_warn(mdev->tconn, mapped, expect); - kunmap(bvec->bv_page); + kunmap(bvec.bv_page); if (err) return err; data_size -= expect; diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 891c0ecaa292..84d3175d493a 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -313,8 +313,8 @@ void drbd_csum_bio(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio * { struct hash_desc desc; struct scatterlist sg; - struct bio_vec *bvec; - int i; + struct bio_vec bvec; + struct bvec_iter iter; desc.tfm = tfm; desc.flags = 0; @@ -322,8 +322,8 @@ void drbd_csum_bio(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio * sg_init_table(&sg, 1); crypto_hash_init(&desc); - bio_for_each_segment(bvec, bio, i) { - sg_set_page(&sg, bvec->bv_page, bvec->bv_len, bvec->bv_offset); + bio_for_each_segment(bvec, bio, iter) { + sg_set_page(&sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset); crypto_hash_update(&desc, &sg, sg.length); } crypto_hash_final(&desc, digest); diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 6a86fe7b730f..6b29c4422828 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -2351,7 +2351,7 @@ static void rw_interrupt(void) /* Compute maximal contiguous buffer size. */ static int buffer_chain_size(void) { - struct bio_vec *bv; + struct bio_vec bv; int size; struct req_iterator iter; char *base; @@ -2360,10 +2360,10 @@ static int buffer_chain_size(void) size = 0; rq_for_each_segment(bv, current_req, iter) { - if (page_address(bv->bv_page) + bv->bv_offset != base + size) + if (page_address(bv.bv_page) + bv.bv_offset != base + size) break; - size += bv->bv_len; + size += bv.bv_len; } return size >> 9; @@ -2389,7 +2389,7 @@ static int transfer_size(int ssize, int max_sector, int max_size) static void copy_buffer(int ssize, int max_sector, int max_sector_2) { int remaining; /* number of transferred 512-byte sectors */ - struct bio_vec *bv; + struct bio_vec bv; char *buffer; char *dma_buffer; int size; @@ -2427,10 +2427,10 @@ static void copy_buffer(int ssize, int max_sector, int max_sector_2) if (!remaining) break; - size = bv->bv_len; + size = bv.bv_len; SUPBOUND(size, remaining); - buffer = page_address(bv->bv_page) + bv->bv_offset; + buffer = page_address(bv.bv_page) + bv.bv_offset; if (dma_buffer + size > floppy_track_buffer + (max_buffer_sectors << 10) || dma_buffer < floppy_track_buffer) { diff --git a/drivers/block/loop.c b/drivers/block/loop.c index f5e39989adde..33fde3a39759 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -288,9 +288,10 @@ static int lo_send(struct loop_device *lo, struct bio *bio, loff_t pos) { int (*do_lo_send)(struct loop_device *, struct bio_vec *, loff_t, struct page *page); - struct bio_vec *bvec; + struct bio_vec bvec; + struct bvec_iter iter; struct page *page = NULL; - int i, ret = 0; + int ret = 0; if (lo->transfer != transfer_none) { page = alloc_page(GFP_NOIO | __GFP_HIGHMEM); @@ -302,11 +303,11 @@ static int lo_send(struct loop_device *lo, struct bio *bio, loff_t pos) do_lo_send = do_lo_send_direct_write; } - bio_for_each_segment(bvec, bio, i) { - ret = do_lo_send(lo, bvec, pos, page); + bio_for_each_segment(bvec, bio, iter) { + ret = do_lo_send(lo, &bvec, pos, page); if (ret < 0) break; - pos += bvec->bv_len; + pos += bvec.bv_len; } if (page) { kunmap(page); @@ -392,20 +393,20 @@ do_lo_receive(struct loop_device *lo, static int lo_receive(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos) { - struct bio_vec *bvec; + struct bio_vec bvec; + struct bvec_iter iter; ssize_t s; - int i; - bio_for_each_segment(bvec, bio, i) { - s = do_lo_receive(lo, bvec, bsize, pos); + bio_for_each_segment(bvec, bio, iter) { + s = do_lo_receive(lo, &bvec, bsize, pos); if (s < 0) return s; - if (s != bvec->bv_len) { + if (s != bvec.bv_len) { zero_fill_bio(bio); break; } - pos += bvec->bv_len; + pos += bvec.bv_len; } return 0; } diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 69e9eb5a6b34..52b2f2a71470 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -3962,8 +3962,9 @@ static void mtip_make_request(struct request_queue *queue, struct bio *bio) { struct driver_data *dd = queue->queuedata; struct scatterlist *sg; - struct bio_vec *bvec; - int i, nents = 0; + struct bio_vec bvec; + struct bvec_iter iter; + int nents = 0; int tag = 0, unaligned = 0; if (unlikely(dd->dd_flag & MTIP_DDF_STOP_IO)) { @@ -4026,11 +4027,11 @@ static void mtip_make_request(struct request_queue *queue, struct bio *bio) } /* Create the scatter list for this bio. */ - bio_for_each_segment(bvec, bio, i) { + bio_for_each_segment(bvec, bio, iter) { sg_set_page(&sg[nents], - bvec->bv_page, - bvec->bv_len, - bvec->bv_offset); + bvec.bv_page, + bvec.bv_len, + bvec.bv_offset); nents++; } diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 2dc3b5153f0d..aa362f493216 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -271,7 +271,7 @@ static int nbd_send_req(struct nbd_device *nbd, struct request *req) if (nbd_cmd(req) == NBD_CMD_WRITE) { struct req_iterator iter; - struct bio_vec *bvec; + struct bio_vec bvec; /* * we are really probing at internals to determine * whether to set MSG_MORE or not... @@ -281,8 +281,8 @@ static int nbd_send_req(struct nbd_device *nbd, struct request *req) if (!rq_iter_last(req, iter)) flags = MSG_MORE; dprintk(DBG_TX, "%s: request %p: sending %d bytes data\n", - nbd->disk->disk_name, req, bvec->bv_len); - result = sock_send_bvec(nbd, bvec, flags); + nbd->disk->disk_name, req, bvec.bv_len); + result = sock_send_bvec(nbd, &bvec, flags); if (result <= 0) { dev_err(disk_to_dev(nbd->disk), "Send data failed (result %d)\n", @@ -378,10 +378,10 @@ static struct request *nbd_read_stat(struct nbd_device *nbd) nbd->disk->disk_name, req); if (nbd_cmd(req) == NBD_CMD_READ) { struct req_iterator iter; - struct bio_vec *bvec; + struct bio_vec bvec; rq_for_each_segment(bvec, req, iter) { - result = sock_recv_bvec(nbd, bvec); + result = sock_recv_bvec(nbd, &bvec); if (result <= 0) { dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n", result); @@ -389,7 +389,7 @@ static struct request *nbd_read_stat(struct nbd_device *nbd) return req; } dprintk(DBG_RX, "%s: request %p: got %d bytes data\n", - nbd->disk->disk_name, req, bvec->bv_len); + nbd->disk->disk_name, req, bvec.bv_len); } } return req; diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 53d217381873..5539d2920872 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -550,9 +550,11 @@ static int nvme_split_and_submit(struct bio *bio, struct nvme_queue *nvmeq, static int nvme_map_bio(struct nvme_queue *nvmeq, struct nvme_iod *iod, struct bio *bio, enum dma_data_direction dma_dir, int psegs) { - struct bio_vec *bvec, *bvprv = NULL; + struct bio_vec bvec, bvprv; + struct bvec_iter iter; struct scatterlist *sg = NULL; - int i, length = 0, nsegs = 0, split_len = bio->bi_iter.bi_size; + int length = 0, nsegs = 0, split_len = bio->bi_iter.bi_size; + int first = 1; if (nvmeq->dev->stripe_size) split_len = nvmeq->dev->stripe_size - @@ -560,25 +562,28 @@ static int nvme_map_bio(struct nvme_queue *nvmeq, struct nvme_iod *iod, (nvmeq->dev->stripe_size - 1)); sg_init_table(iod->sg, psegs); - bio_for_each_segment(bvec, bio, i) { - if (bvprv && BIOVEC_PHYS_MERGEABLE(bvprv, bvec)) { - sg->length += bvec->bv_len; + bio_for_each_segment(bvec, bio, iter) { + if (!first && BIOVEC_PHYS_MERGEABLE(&bvprv, &bvec)) { + sg->length += bvec.bv_len; } else { - if (bvprv && BIOVEC_NOT_VIRT_MERGEABLE(bvprv, bvec)) - return nvme_split_and_submit(bio, nvmeq, i, - length, 0); + if (!first && BIOVEC_NOT_VIRT_MERGEABLE(&bvprv, &bvec)) + return nvme_split_and_submit(bio, nvmeq, + iter.bi_idx, + length, 0); sg = sg ? sg + 1 : iod->sg; - sg_set_page(sg, bvec->bv_page, bvec->bv_len, - bvec->bv_offset); + sg_set_page(sg, bvec.bv_page, + bvec.bv_len, bvec.bv_offset); nsegs++; } - if (split_len - length < bvec->bv_len) - return nvme_split_and_submit(bio, nvmeq, i, split_len, - split_len - length); - length += bvec->bv_len; + if (split_len - length < bvec.bv_len) + return nvme_split_and_submit(bio, nvmeq, iter.bi_idx, + split_len, + split_len - length); + length += bvec.bv_len; bvprv = bvec; + first = 0; } iod->nents = nsegs; sg_mark_end(sg); diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index 464be78a0836..1c6edb9a9960 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c @@ -94,7 +94,7 @@ static void ps3disk_scatter_gather(struct ps3_storage_device *dev, { unsigned int offset = 0; struct req_iterator iter; - struct bio_vec *bvec; + struct bio_vec bvec; unsigned int i = 0; size_t size; void *buf; @@ -106,14 +106,14 @@ static void ps3disk_scatter_gather(struct ps3_storage_device *dev, __func__, __LINE__, i, bio_segments(iter.bio), bio_sectors(iter.bio), iter.bio->bi_iter.bi_sector); - size = bvec->bv_len; - buf = bvec_kmap_irq(bvec, &flags); + size = bvec.bv_len; + buf = bvec_kmap_irq(&bvec, &flags); if (gather) memcpy(dev->bounce_buf+offset, buf, size); else memcpy(buf, dev->bounce_buf+offset, size); offset += size; - flush_kernel_dcache_page(bvec->bv_page); + flush_kernel_dcache_page(bvec.bv_page); bvec_kunmap_irq(buf, &flags); i++; } @@ -130,7 +130,7 @@ static int ps3disk_submit_request_sg(struct ps3_storage_device *dev, #ifdef DEBUG unsigned int n = 0; - struct bio_vec *bv; + struct bio_vec bv; struct req_iterator iter; rq_for_each_segment(bv, req, iter) diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c index 320bbfc9b902..ef45cfb98fd2 100644 --- a/drivers/block/ps3vram.c +++ b/drivers/block/ps3vram.c @@ -555,14 +555,14 @@ static struct bio *ps3vram_do_bio(struct ps3_system_bus_device *dev, const char *op = write ? "write" : "read"; loff_t offset = bio->bi_iter.bi_sector << 9; int error = 0; - struct bio_vec *bvec; - unsigned int i; + struct bio_vec bvec; + struct bvec_iter iter; struct bio *next; - bio_for_each_segment(bvec, bio, i) { + bio_for_each_segment(bvec, bio, iter) { /* PS3 is ppc64, so we don't handle highmem */ - char *ptr = page_address(bvec->bv_page) + bvec->bv_offset; - size_t len = bvec->bv_len, retlen; + char *ptr = page_address(bvec.bv_page) + bvec.bv_offset; + size_t len = bvec.bv_len, retlen; dev_dbg(&dev->core, " %s %zu bytes at offset %llu\n", op, len, offset); diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index a8f4fe2d4d1b..20e8ab35736b 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1109,23 +1109,23 @@ static void bio_chain_put(struct bio *chain) */ static void zero_bio_chain(struct bio *chain, int start_ofs) { - struct bio_vec *bv; + struct bio_vec bv; + struct bvec_iter iter; unsigned long flags; void *buf; - int i; int pos = 0; while (chain) { - bio_for_each_segment(bv, chain, i) { - if (pos + bv->bv_len > start_ofs) { + bio_for_each_segment(bv, chain, iter) { + if (pos + bv.bv_len > start_ofs) { int remainder = max(start_ofs - pos, 0); - buf = bvec_kmap_irq(bv, &flags); + buf = bvec_kmap_irq(&bv, &flags); memset(buf + remainder, 0, - bv->bv_len - remainder); - flush_dcache_page(bv->bv_page); + bv.bv_len - remainder); + flush_dcache_page(bv.bv_page); bvec_kunmap_irq(buf, &flags); } - pos += bv->bv_len; + pos += bv.bv_len; } chain = chain->bi_next; @@ -1173,11 +1173,11 @@ static struct bio *bio_clone_range(struct bio *bio_src, unsigned int len, gfp_t gfpmask) { - struct bio_vec *bv; + struct bio_vec bv; + struct bvec_iter iter; + struct bvec_iter end_iter; unsigned int resid; - unsigned short idx; unsigned int voff; - unsigned short end_idx; unsigned short vcnt; struct bio *bio; @@ -1196,22 +1196,22 @@ static struct bio *bio_clone_range(struct bio *bio_src, /* Find first affected segment... */ resid = offset; - bio_for_each_segment(bv, bio_src, idx) { - if (resid < bv->bv_len) + bio_for_each_segment(bv, bio_src, iter) { + if (resid < bv.bv_len) break; - resid -= bv->bv_len; + resid -= bv.bv_len; } voff = resid; /* ...and the last affected segment */ resid += len; - __bio_for_each_segment(bv, bio_src, end_idx, idx) { - if (resid <= bv->bv_len) + __bio_for_each_segment(bv, bio_src, end_iter, iter) { + if (resid <= bv.bv_len) break; - resid -= bv->bv_len; + resid -= bv.bv_len; } - vcnt = end_idx - idx + 1; + vcnt = end_iter.bi_idx = iter.bi_idx + 1; /* Build the clone */ @@ -1229,7 +1229,7 @@ static struct bio *bio_clone_range(struct bio *bio_src, * Copy over our part of the bio_vec, then update the first * and last (or only) entries. */ - memcpy(&bio->bi_io_vec[0], &bio_src->bi_io_vec[idx], + memcpy(&bio->bi_io_vec[0], &bio_src->bi_io_vec[iter.bi_idx], vcnt * sizeof (struct bio_vec)); bio->bi_io_vec[0].bv_offset += voff; if (vcnt > 1) { diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c index 3716633be3c2..cf8cd293abb5 100644 --- a/drivers/block/rsxx/dma.c +++ b/drivers/block/rsxx/dma.c @@ -684,7 +684,8 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card, void *cb_data) { struct list_head dma_list[RSXX_MAX_TARGETS]; - struct bio_vec *bvec; + struct bio_vec bvec; + struct bvec_iter iter; unsigned long long addr8; unsigned int laddr; unsigned int bv_len; @@ -722,9 +723,9 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card, bv_len -= RSXX_HW_BLK_SIZE; } } else { - bio_for_each_segment(bvec, bio, i) { - bv_len = bvec->bv_len; - bv_off = bvec->bv_offset; + bio_for_each_segment(bvec, bio, iter) { + bv_len = bvec.bv_len; + bv_off = bvec.bv_offset; while (bv_len > 0) { tgt = rsxx_get_dma_tgt(card, addr8); @@ -736,7 +737,7 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card, st = rsxx_queue_dma(card, &dma_list[tgt], bio_data_dir(bio), dma_off, dma_len, - laddr, bvec->bv_page, + laddr, bvec.bv_page, bv_off, cb, cb_data); if (st) goto bvec_err; diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 038a6d2aced3..b62f37925374 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -362,7 +362,7 @@ static void btree_node_write_done(struct closure *cl) struct bio_vec *bv; int n; - __bio_for_each_segment(bv, b->bio, n, 0) + bio_for_each_segment_all(bv, b->bio, n) __free_page(bv->bv_page); __btree_node_write_done(cl); @@ -421,7 +421,7 @@ static void do_btree_node_write(struct btree *b) struct bio_vec *bv; void *base = (void *) ((unsigned long) i & ~(PAGE_SIZE - 1)); - bio_for_each_segment(bv, b->bio, j) + bio_for_each_segment_all(bv, b->bio, j) memcpy(page_address(bv->bv_page), base + j * PAGE_SIZE, PAGE_SIZE); diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c index 92b3fd468a03..03cb4d114e16 100644 --- a/drivers/md/bcache/debug.c +++ b/drivers/md/bcache/debug.c @@ -173,7 +173,8 @@ void bch_data_verify(struct cached_dev *dc, struct bio *bio) { char name[BDEVNAME_SIZE]; struct bio *check; - struct bio_vec *bv; + struct bio_vec bv, *bv2; + struct bvec_iter iter; int i; check = bio_clone(bio, GFP_NOIO); @@ -185,13 +186,13 @@ void bch_data_verify(struct cached_dev *dc, struct bio *bio) submit_bio_wait(READ_SYNC, check); - bio_for_each_segment(bv, bio, i) { - void *p1 = kmap_atomic(bv->bv_page); - void *p2 = page_address(check->bi_io_vec[i].bv_page); + bio_for_each_segment(bv, bio, iter) { + void *p1 = kmap_atomic(bv.bv_page); + void *p2 = page_address(check->bi_io_vec[iter.bi_idx].bv_page); - cache_set_err_on(memcmp(p1 + bv->bv_offset, - p2 + bv->bv_offset, - bv->bv_len), + cache_set_err_on(memcmp(p1 + bv.bv_offset, + p2 + bv.bv_offset, + bv.bv_len), dc->disk.c, "verify failed at dev %s sector %llu", bdevname(dc->bdev, name), @@ -200,8 +201,8 @@ void bch_data_verify(struct cached_dev *dc, struct bio *bio) kunmap_atomic(p1); } - bio_for_each_segment_all(bv, check, i) - __free_page(bv->bv_page); + bio_for_each_segment_all(bv2, check, i) + __free_page(bv2->bv_page); out_put: bio_put(check); } diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c index dc44f0689eb7..9b5b6a41a9b6 100644 --- a/drivers/md/bcache/io.c +++ b/drivers/md/bcache/io.c @@ -22,12 +22,12 @@ static void bch_bi_idx_hack_endio(struct bio *bio, int error) static void bch_generic_make_request_hack(struct bio *bio) { if (bio->bi_iter.bi_idx) { - int i; - struct bio_vec *bv; + struct bio_vec bv; + struct bvec_iter iter; struct bio *clone = bio_alloc(GFP_NOIO, bio_segments(bio)); - bio_for_each_segment(bv, bio, i) - clone->bi_io_vec[clone->bi_vcnt++] = *bv; + bio_for_each_segment(bv, bio, iter) + clone->bi_io_vec[clone->bi_vcnt++] = bv; clone->bi_iter.bi_sector = bio->bi_iter.bi_sector; clone->bi_bdev = bio->bi_bdev; @@ -73,8 +73,9 @@ static void bch_generic_make_request_hack(struct bio *bio) struct bio *bch_bio_split(struct bio *bio, int sectors, gfp_t gfp, struct bio_set *bs) { - unsigned idx = bio->bi_iter.bi_idx, vcnt = 0, nbytes = sectors << 9; - struct bio_vec *bv; + unsigned vcnt = 0, nbytes = sectors << 9; + struct bio_vec bv; + struct bvec_iter iter; struct bio *ret = NULL; BUG_ON(sectors <= 0); @@ -86,49 +87,35 @@ struct bio *bch_bio_split(struct bio *bio, int sectors, ret = bio_alloc_bioset(gfp, 1, bs); if (!ret) return NULL; - idx = 0; goto out; } - bio_for_each_segment(bv, bio, idx) { - vcnt = idx - bio->bi_iter.bi_idx; + bio_for_each_segment(bv, bio, iter) { + vcnt++; - if (!nbytes) { - ret = bio_alloc_bioset(gfp, vcnt, bs); - if (!ret) - return NULL; + if (nbytes <= bv.bv_len) + break; - memcpy(ret->bi_io_vec, __bio_iovec(bio), - sizeof(struct bio_vec) * vcnt); + nbytes -= bv.bv_len; + } - break; - } else if (nbytes < bv->bv_len) { - ret = bio_alloc_bioset(gfp, ++vcnt, bs); - if (!ret) - return NULL; + ret = bio_alloc_bioset(gfp, vcnt, bs); + if (!ret) + return NULL; - memcpy(ret->bi_io_vec, __bio_iovec(bio), - sizeof(struct bio_vec) * vcnt); + bio_for_each_segment(bv, bio, iter) { + ret->bi_io_vec[ret->bi_vcnt++] = bv; - ret->bi_io_vec[vcnt - 1].bv_len = nbytes; - bv->bv_offset += nbytes; - bv->bv_len -= nbytes; + if (ret->bi_vcnt == vcnt) break; - } - - nbytes -= bv->bv_len; } + + ret->bi_io_vec[ret->bi_vcnt - 1].bv_len = nbytes; out: ret->bi_bdev = bio->bi_bdev; ret->bi_iter.bi_sector = bio->bi_iter.bi_sector; ret->bi_iter.bi_size = sectors << 9; ret->bi_rw = bio->bi_rw; - ret->bi_vcnt = vcnt; - ret->bi_max_vecs = vcnt; - - bio->bi_iter.bi_sector += sectors; - bio->bi_iter.bi_size -= sectors << 9; - bio->bi_iter.bi_idx = idx; if (bio_integrity(bio)) { if (bio_integrity_clone(ret, bio, gfp)) { @@ -137,9 +124,10 @@ out: } bio_integrity_trim(ret, 0, bio_sectors(ret)); - bio_integrity_trim(bio, bio_sectors(ret), bio_sectors(bio)); } + bio_advance(bio, ret->bi_iter.bi_size); + return ret; } @@ -155,12 +143,13 @@ static unsigned bch_bio_max_sectors(struct bio *bio) if (bio_segments(bio) > max_segments || q->merge_bvec_fn) { - struct bio_vec *bv; - int i, seg = 0; + struct bio_vec bv; + struct bvec_iter iter; + unsigned seg = 0; ret = 0; - bio_for_each_segment(bv, bio, i) { + bio_for_each_segment(bv, bio, iter) { struct bvec_merge_data bvm = { .bi_bdev = bio->bi_bdev, .bi_sector = bio->bi_iter.bi_sector, @@ -172,11 +161,11 @@ static unsigned bch_bio_max_sectors(struct bio *bio) break; if (q->merge_bvec_fn && - q->merge_bvec_fn(q, &bvm, bv) < (int) bv->bv_len) + q->merge_bvec_fn(q, &bvm, &bv) < (int) bv.bv_len) break; seg++; - ret += bv->bv_len >> 9; + ret += bv.bv_len >> 9; } } diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 47a9bbc75124..4c0a422fd49f 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -198,14 +198,14 @@ static bool verify(struct cached_dev *dc, struct bio *bio) static void bio_csum(struct bio *bio, struct bkey *k) { - struct bio_vec *bv; + struct bio_vec bv; + struct bvec_iter iter; uint64_t csum = 0; - int i; - bio_for_each_segment(bv, bio, i) { - void *d = kmap(bv->bv_page) + bv->bv_offset; - csum = bch_crc64_update(csum, d, bv->bv_len); - kunmap(bv->bv_page); + bio_for_each_segment(bv, bio, iter) { + void *d = kmap(bv.bv_page) + bv.bv_offset; + csum = bch_crc64_update(csum, d, bv.bv_len); + kunmap(bv.bv_page); } k->ptr[KEY_PTRS(k)] = csum & (~0ULL >> 1); @@ -1182,17 +1182,17 @@ void bch_cached_dev_request_init(struct cached_dev *dc) static int flash_dev_cache_miss(struct btree *b, struct search *s, struct bio *bio, unsigned sectors) { - struct bio_vec *bv; - int i; + struct bio_vec bv; + struct bvec_iter iter; /* Zero fill bio */ - bio_for_each_segment(bv, bio, i) { - unsigned j = min(bv->bv_len >> 9, sectors); + bio_for_each_segment(bv, bio, iter) { + unsigned j = min(bv.bv_len >> 9, sectors); - void *p = kmap(bv->bv_page); - memset(p + bv->bv_offset, 0, j << 9); - kunmap(bv->bv_page); + void *p = kmap(bv.bv_page); + memset(p + bv.bv_offset, 0, j << 9); + kunmap(bv.bv_page); sectors -= j; } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index a5d9c0ee4d60..bef353c51c04 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -937,9 +937,9 @@ static struct dma_async_tx_descriptor * async_copy_data(int frombio, struct bio *bio, struct page *page, sector_t sector, struct dma_async_tx_descriptor *tx) { - struct bio_vec *bvl; + struct bio_vec bvl; + struct bvec_iter iter; struct page *bio_page; - int i; int page_offset; struct async_submit_ctl submit; enum async_tx_flags flags = 0; @@ -953,8 +953,8 @@ async_copy_data(int frombio, struct bio *bio, struct page *page, flags |= ASYNC_TX_FENCE; init_async_submit(&submit, flags, tx, NULL, NULL, NULL); - bio_for_each_segment(bvl, bio, i) { - int len = bvl->bv_len; + bio_for_each_segment(bvl, bio, iter) { + int len = bvl.bv_len; int clen; int b_offset = 0; @@ -970,8 +970,8 @@ async_copy_data(int frombio, struct bio *bio, struct page *page, clen = len; if (clen > 0) { - b_offset += bvl->bv_offset; - bio_page = bvl->bv_page; + b_offset += bvl.bv_offset; + bio_page = bvl.bv_page; if (frombio) tx = async_memcpy(page, bio_page, page_offset, b_offset, clen, &submit); diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c index 92bd22ce6760..9cbc567698ce 100644 --- a/drivers/s390/block/dasd_diag.c +++ b/drivers/s390/block/dasd_diag.c @@ -504,7 +504,7 @@ static struct dasd_ccw_req *dasd_diag_build_cp(struct dasd_device *memdev, struct dasd_diag_req *dreq; struct dasd_diag_bio *dbio; struct req_iterator iter; - struct bio_vec *bv; + struct bio_vec bv; char *dst; unsigned int count, datasize; sector_t recid, first_rec, last_rec; @@ -525,10 +525,10 @@ static struct dasd_ccw_req *dasd_diag_build_cp(struct dasd_device *memdev, /* Check struct bio and count the number of blocks for the request. */ count = 0; rq_for_each_segment(bv, req, iter) { - if (bv->bv_len & (blksize - 1)) + if (bv.bv_len & (blksize - 1)) /* Fba can only do full blocks. */ return ERR_PTR(-EINVAL); - count += bv->bv_len >> (block->s2b_shift + 9); + count += bv.bv_len >> (block->s2b_shift + 9); } /* Paranoia. */ if (count != last_rec - first_rec + 1) @@ -545,8 +545,8 @@ static struct dasd_ccw_req *dasd_diag_build_cp(struct dasd_device *memdev, dbio = dreq->bio; recid = first_rec; rq_for_each_segment(bv, req, iter) { - dst = page_address(bv->bv_page) + bv->bv_offset; - for (off = 0; off < bv->bv_len; off += blksize) { + dst = page_address(bv.bv_page) + bv.bv_offset; + for (off = 0; off < bv.bv_len; off += blksize) { memset(dbio, 0, sizeof (struct dasd_diag_bio)); dbio->type = rw_cmd; dbio->block_number = recid + 1; diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index cee7e2708a1f..70d177017329 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -2551,7 +2551,7 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_cmd_single( struct dasd_ccw_req *cqr; struct ccw1 *ccw; struct req_iterator iter; - struct bio_vec *bv; + struct bio_vec bv; char *dst; unsigned int off; int count, cidaw, cplength, datasize; @@ -2573,13 +2573,13 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_cmd_single( count = 0; cidaw = 0; rq_for_each_segment(bv, req, iter) { - if (bv->bv_len & (blksize - 1)) + if (bv.bv_len & (blksize - 1)) /* Eckd can only do full blocks. */ return ERR_PTR(-EINVAL); - count += bv->bv_len >> (block->s2b_shift + 9); + count += bv.bv_len >> (block->s2b_shift + 9); #if defined(CONFIG_64BIT) - if (idal_is_needed (page_address(bv->bv_page), bv->bv_len)) - cidaw += bv->bv_len >> (block->s2b_shift + 9); + if (idal_is_needed (page_address(bv.bv_page), bv.bv_len)) + cidaw += bv.bv_len >> (block->s2b_shift + 9); #endif } /* Paranoia. */ @@ -2650,16 +2650,16 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_cmd_single( last_rec - recid + 1, cmd, basedev, blksize); } rq_for_each_segment(bv, req, iter) { - dst = page_address(bv->bv_page) + bv->bv_offset; + dst = page_address(bv.bv_page) + bv.bv_offset; if (dasd_page_cache) { char *copy = kmem_cache_alloc(dasd_page_cache, GFP_DMA | __GFP_NOWARN); if (copy && rq_data_dir(req) == WRITE) - memcpy(copy + bv->bv_offset, dst, bv->bv_len); + memcpy(copy + bv.bv_offset, dst, bv.bv_len); if (copy) - dst = copy + bv->bv_offset; + dst = copy + bv.bv_offset; } - for (off = 0; off < bv->bv_len; off += blksize) { + for (off = 0; off < bv.bv_len; off += blksize) { sector_t trkid = recid; unsigned int recoffs = sector_div(trkid, blk_per_trk); rcmd = cmd; @@ -2735,7 +2735,7 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_cmd_track( struct dasd_ccw_req *cqr; struct ccw1 *ccw; struct req_iterator iter; - struct bio_vec *bv; + struct bio_vec bv; char *dst, *idaw_dst; unsigned int cidaw, cplength, datasize; unsigned int tlf; @@ -2813,8 +2813,8 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_cmd_track( idaw_dst = NULL; idaw_len = 0; rq_for_each_segment(bv, req, iter) { - dst = page_address(bv->bv_page) + bv->bv_offset; - seg_len = bv->bv_len; + dst = page_address(bv.bv_page) + bv.bv_offset; + seg_len = bv.bv_len; while (seg_len) { if (new_track) { trkid = recid; @@ -3039,7 +3039,7 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_tpm_track( { struct dasd_ccw_req *cqr; struct req_iterator iter; - struct bio_vec *bv; + struct bio_vec bv; char *dst; unsigned int trkcount, ctidaw; unsigned char cmd; @@ -3125,8 +3125,8 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_tpm_track( new_track = 1; recid = first_rec; rq_for_each_segment(bv, req, iter) { - dst = page_address(bv->bv_page) + bv->bv_offset; - seg_len = bv->bv_len; + dst = page_address(bv.bv_page) + bv.bv_offset; + seg_len = bv.bv_len; while (seg_len) { if (new_track) { trkid = recid; @@ -3158,9 +3158,9 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_tpm_track( } } else { rq_for_each_segment(bv, req, iter) { - dst = page_address(bv->bv_page) + bv->bv_offset; + dst = page_address(bv.bv_page) + bv.bv_offset; last_tidaw = itcw_add_tidaw(itcw, 0x00, - dst, bv->bv_len); + dst, bv.bv_len); if (IS_ERR(last_tidaw)) { ret = -EINVAL; goto out_error; @@ -3276,7 +3276,7 @@ static struct dasd_ccw_req *dasd_raw_build_cp(struct dasd_device *startdev, struct dasd_ccw_req *cqr; struct ccw1 *ccw; struct req_iterator iter; - struct bio_vec *bv; + struct bio_vec bv; char *dst; unsigned char cmd; unsigned int trkcount; @@ -3376,8 +3376,8 @@ static struct dasd_ccw_req *dasd_raw_build_cp(struct dasd_device *startdev, idaws = idal_create_words(idaws, rawpadpage, PAGE_SIZE); } rq_for_each_segment(bv, req, iter) { - dst = page_address(bv->bv_page) + bv->bv_offset; - seg_len = bv->bv_len; + dst = page_address(bv.bv_page) + bv.bv_offset; + seg_len = bv.bv_len; if (cmd == DASD_ECKD_CCW_READ_TRACK) memset(dst, 0, seg_len); if (!len_to_track_end) { @@ -3422,7 +3422,7 @@ dasd_eckd_free_cp(struct dasd_ccw_req *cqr, struct request *req) struct dasd_eckd_private *private; struct ccw1 *ccw; struct req_iterator iter; - struct bio_vec *bv; + struct bio_vec bv; char *dst, *cda; unsigned int blksize, blk_per_trk, off; sector_t recid; @@ -3440,8 +3440,8 @@ dasd_eckd_free_cp(struct dasd_ccw_req *cqr, struct request *req) if (private->uses_cdl == 0 || recid > 2*blk_per_trk) ccw++; rq_for_each_segment(bv, req, iter) { - dst = page_address(bv->bv_page) + bv->bv_offset; - for (off = 0; off < bv->bv_len; off += blksize) { + dst = page_address(bv.bv_page) + bv.bv_offset; + for (off = 0; off < bv.bv_len; off += blksize) { /* Skip locate record. */ if (private->uses_cdl && recid <= 2*blk_per_trk) ccw++; @@ -3452,7 +3452,7 @@ dasd_eckd_free_cp(struct dasd_ccw_req *cqr, struct request *req) cda = (char *)((addr_t) ccw->cda); if (dst != cda) { if (rq_data_dir(req) == READ) - memcpy(dst, cda, bv->bv_len); + memcpy(dst, cda, bv.bv_len); kmem_cache_free(dasd_page_cache, (void *)((addr_t)cda & PAGE_MASK)); } diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c index 9cbc8c32ba59..2c8e68bf9a1c 100644 --- a/drivers/s390/block/dasd_fba.c +++ b/drivers/s390/block/dasd_fba.c @@ -260,7 +260,7 @@ static struct dasd_ccw_req *dasd_fba_build_cp(struct dasd_device * memdev, struct dasd_ccw_req *cqr; struct ccw1 *ccw; struct req_iterator iter; - struct bio_vec *bv; + struct bio_vec bv; char *dst; int count, cidaw, cplength, datasize; sector_t recid, first_rec, last_rec; @@ -283,13 +283,13 @@ static struct dasd_ccw_req *dasd_fba_build_cp(struct dasd_device * memdev, count = 0; cidaw = 0; rq_for_each_segment(bv, req, iter) { - if (bv->bv_len & (blksize - 1)) + if (bv.bv_len & (blksize - 1)) /* Fba can only do full blocks. */ return ERR_PTR(-EINVAL); - count += bv->bv_len >> (block->s2b_shift + 9); + count += bv.bv_len >> (block->s2b_shift + 9); #if defined(CONFIG_64BIT) - if (idal_is_needed (page_address(bv->bv_page), bv->bv_len)) - cidaw += bv->bv_len / blksize; + if (idal_is_needed (page_address(bv.bv_page), bv.bv_len)) + cidaw += bv.bv_len / blksize; #endif } /* Paranoia. */ @@ -326,16 +326,16 @@ static struct dasd_ccw_req *dasd_fba_build_cp(struct dasd_device * memdev, } recid = first_rec; rq_for_each_segment(bv, req, iter) { - dst = page_address(bv->bv_page) + bv->bv_offset; + dst = page_address(bv.bv_page) + bv.bv_offset; if (dasd_page_cache) { char *copy = kmem_cache_alloc(dasd_page_cache, GFP_DMA | __GFP_NOWARN); if (copy && rq_data_dir(req) == WRITE) - memcpy(copy + bv->bv_offset, dst, bv->bv_len); + memcpy(copy + bv.bv_offset, dst, bv.bv_len); if (copy) - dst = copy + bv->bv_offset; + dst = copy + bv.bv_offset; } - for (off = 0; off < bv->bv_len; off += blksize) { + for (off = 0; off < bv.bv_len; off += blksize) { /* Locate record for stupid devices. */ if (private->rdc_data.mode.bits.data_chain == 0) { ccw[-1].flags |= CCW_FLAG_CC; @@ -384,7 +384,7 @@ dasd_fba_free_cp(struct dasd_ccw_req *cqr, struct request *req) struct dasd_fba_private *private; struct ccw1 *ccw; struct req_iterator iter; - struct bio_vec *bv; + struct bio_vec bv; char *dst, *cda; unsigned int blksize, off; int status; @@ -399,8 +399,8 @@ dasd_fba_free_cp(struct dasd_ccw_req *cqr, struct request *req) if (private->rdc_data.mode.bits.data_chain != 0) ccw++; rq_for_each_segment(bv, req, iter) { - dst = page_address(bv->bv_page) + bv->bv_offset; - for (off = 0; off < bv->bv_len; off += blksize) { + dst = page_address(bv.bv_page) + bv.bv_offset; + for (off = 0; off < bv.bv_len; off += blksize) { /* Skip locate record. */ if (private->rdc_data.mode.bits.data_chain == 0) ccw++; @@ -411,7 +411,7 @@ dasd_fba_free_cp(struct dasd_ccw_req *cqr, struct request *req) cda = (char *)((addr_t) ccw->cda); if (dst != cda) { if (rq_data_dir(req) == READ) - memcpy(dst, cda, bv->bv_len); + memcpy(dst, cda, bv.bv_len); kmem_cache_free(dasd_page_cache, (void *)((addr_t)cda & PAGE_MASK)); } diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 16814a8457f8..ebf41e228e55 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -808,12 +808,12 @@ static void dcssblk_make_request(struct request_queue *q, struct bio *bio) { struct dcssblk_dev_info *dev_info; - struct bio_vec *bvec; + struct bio_vec bvec; + struct bvec_iter iter; unsigned long index; unsigned long page_addr; unsigned long source_addr; unsigned long bytes_done; - int i; bytes_done = 0; dev_info = bio->bi_bdev->bd_disk->private_data; @@ -844,21 +844,21 @@ dcssblk_make_request(struct request_queue *q, struct bio *bio) } index = (bio->bi_iter.bi_sector >> 3); - bio_for_each_segment(bvec, bio, i) { + bio_for_each_segment(bvec, bio, iter) { page_addr = (unsigned long) - page_address(bvec->bv_page) + bvec->bv_offset; + page_address(bvec.bv_page) + bvec.bv_offset; source_addr = dev_info->start + (index<<12) + bytes_done; - if (unlikely((page_addr & 4095) != 0) || (bvec->bv_len & 4095) != 0) + if (unlikely((page_addr & 4095) != 0) || (bvec.bv_len & 4095) != 0) // More paranoia. goto fail; if (bio_data_dir(bio) == READ) { memcpy((void*)page_addr, (void*)source_addr, - bvec->bv_len); + bvec.bv_len); } else { memcpy((void*)source_addr, (void*)page_addr, - bvec->bv_len); + bvec.bv_len); } - bytes_done += bvec->bv_len; + bytes_done += bvec.bv_len; } bio_endio(bio, 0); return; diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c index d0ab5019d885..76bed1743db1 100644 --- a/drivers/s390/block/scm_blk.c +++ b/drivers/s390/block/scm_blk.c @@ -130,7 +130,7 @@ static void scm_request_prepare(struct scm_request *scmrq) struct aidaw *aidaw = scmrq->aidaw; struct msb *msb = &scmrq->aob->msb[0]; struct req_iterator iter; - struct bio_vec *bv; + struct bio_vec bv; msb->bs = MSB_BS_4K; scmrq->aob->request.msb_count = 1; @@ -142,9 +142,9 @@ static void scm_request_prepare(struct scm_request *scmrq) msb->data_addr = (u64) aidaw; rq_for_each_segment(bv, scmrq->request, iter) { - WARN_ON(bv->bv_offset); - msb->blk_count += bv->bv_len >> 12; - aidaw->data_addr = (u64) page_address(bv->bv_page); + WARN_ON(bv.bv_offset); + msb->blk_count += bv.bv_len >> 12; + aidaw->data_addr = (u64) page_address(bv.bv_page); aidaw++; } } diff --git a/drivers/s390/block/scm_blk_cluster.c b/drivers/s390/block/scm_blk_cluster.c index 27f930cd657f..9aae909d47a5 100644 --- a/drivers/s390/block/scm_blk_cluster.c +++ b/drivers/s390/block/scm_blk_cluster.c @@ -122,7 +122,7 @@ static void scm_prepare_cluster_request(struct scm_request *scmrq) struct aidaw *aidaw = scmrq->aidaw; struct msb *msb = &scmrq->aob->msb[0]; struct req_iterator iter; - struct bio_vec *bv; + struct bio_vec bv; int i = 0; u64 addr; @@ -163,7 +163,7 @@ static void scm_prepare_cluster_request(struct scm_request *scmrq) i++; } rq_for_each_segment(bv, req, iter) { - aidaw->data_addr = (u64) page_address(bv->bv_page); + aidaw->data_addr = (u64) page_address(bv.bv_page); aidaw++; i++; } diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c index dd4e73fdb323..3e530f9da8c4 100644 --- a/drivers/s390/block/xpram.c +++ b/drivers/s390/block/xpram.c @@ -184,11 +184,11 @@ static unsigned long xpram_highest_page_index(void) static void xpram_make_request(struct request_queue *q, struct bio *bio) { xpram_device_t *xdev = bio->bi_bdev->bd_disk->private_data; - struct bio_vec *bvec; + struct bio_vec bvec; + struct bvec_iter iter; unsigned int index; unsigned long page_addr; unsigned long bytes; - int i; if ((bio->bi_iter.bi_sector & 7) != 0 || (bio->bi_iter.bi_size & 4095) != 0) @@ -200,10 +200,10 @@ static void xpram_make_request(struct request_queue *q, struct bio *bio) if ((bio->bi_iter.bi_sector >> 3) > 0xffffffffU - xdev->offset) goto fail; index = (bio->bi_iter.bi_sector >> 3) + xdev->offset; - bio_for_each_segment(bvec, bio, i) { + bio_for_each_segment(bvec, bio, iter) { page_addr = (unsigned long) - kmap(bvec->bv_page) + bvec->bv_offset; - bytes = bvec->bv_len; + kmap(bvec.bv_page) + bvec.bv_offset; + bytes = bvec.bv_len; if ((page_addr & 4095) != 0 || (bytes & 4095) != 0) /* More paranoia. */ goto fail; diff --git a/drivers/scsi/mpt2sas/mpt2sas_transport.c b/drivers/scsi/mpt2sas/mpt2sas_transport.c index 9d26637308be..7143e86af326 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_transport.c +++ b/drivers/scsi/mpt2sas/mpt2sas_transport.c @@ -1901,7 +1901,7 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy, struct MPT2SAS_ADAPTER *ioc = shost_priv(shost); Mpi2SmpPassthroughRequest_t *mpi_request; Mpi2SmpPassthroughReply_t *mpi_reply; - int rc, i; + int rc; u16 smid; u32 ioc_state; unsigned long timeleft; @@ -1916,7 +1916,8 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy, void *pci_addr_out = NULL; u16 wait_state_count; struct request *rsp = req->next_rq; - struct bio_vec *bvec = NULL; + struct bio_vec bvec; + struct bvec_iter iter; if (!rsp) { printk(MPT2SAS_ERR_FMT "%s: the smp response space is " @@ -1955,11 +1956,11 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy, goto out; } - bio_for_each_segment(bvec, req->bio, i) { + bio_for_each_segment(bvec, req->bio, iter) { memcpy(pci_addr_out + offset, - page_address(bvec->bv_page) + bvec->bv_offset, - bvec->bv_len); - offset += bvec->bv_len; + page_address(bvec.bv_page) + bvec.bv_offset, + bvec.bv_len); + offset += bvec.bv_len; } } else { dma_addr_out = pci_map_single(ioc->pdev, bio_data(req->bio), @@ -2106,19 +2107,19 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy, u32 offset = 0; u32 bytes_to_copy = le16_to_cpu(mpi_reply->ResponseDataLength); - bio_for_each_segment(bvec, rsp->bio, i) { - if (bytes_to_copy <= bvec->bv_len) { - memcpy(page_address(bvec->bv_page) + - bvec->bv_offset, pci_addr_in + + bio_for_each_segment(bvec, rsp->bio, iter) { + if (bytes_to_copy <= bvec.bv_len) { + memcpy(page_address(bvec.bv_page) + + bvec.bv_offset, pci_addr_in + offset, bytes_to_copy); break; } else { - memcpy(page_address(bvec->bv_page) + - bvec->bv_offset, pci_addr_in + - offset, bvec->bv_len); - bytes_to_copy -= bvec->bv_len; + memcpy(page_address(bvec.bv_page) + + bvec.bv_offset, pci_addr_in + + offset, bvec.bv_len); + bytes_to_copy -= bvec.bv_len; } - offset += bvec->bv_len; + offset += bvec.bv_len; } } } else { diff --git a/drivers/scsi/mpt3sas/mpt3sas_transport.c b/drivers/scsi/mpt3sas/mpt3sas_transport.c index e771a88c6a74..196a67f2e95f 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_transport.c +++ b/drivers/scsi/mpt3sas/mpt3sas_transport.c @@ -1884,7 +1884,7 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy, struct MPT3SAS_ADAPTER *ioc = shost_priv(shost); Mpi2SmpPassthroughRequest_t *mpi_request; Mpi2SmpPassthroughReply_t *mpi_reply; - int rc, i; + int rc; u16 smid; u32 ioc_state; unsigned long timeleft; @@ -1898,7 +1898,8 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy, void *pci_addr_out = NULL; u16 wait_state_count; struct request *rsp = req->next_rq; - struct bio_vec *bvec = NULL; + struct bio_vec bvec; + struct bvec_iter iter; if (!rsp) { pr_err(MPT3SAS_FMT "%s: the smp response space is missing\n", @@ -1938,11 +1939,11 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy, goto out; } - bio_for_each_segment(bvec, req->bio, i) { + bio_for_each_segment(bvec, req->bio, iter) { memcpy(pci_addr_out + offset, - page_address(bvec->bv_page) + bvec->bv_offset, - bvec->bv_len); - offset += bvec->bv_len; + page_address(bvec.bv_page) + bvec.bv_offset, + bvec.bv_len); + offset += bvec.bv_len; } } else { dma_addr_out = pci_map_single(ioc->pdev, bio_data(req->bio), @@ -2067,19 +2068,19 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy, u32 offset = 0; u32 bytes_to_copy = le16_to_cpu(mpi_reply->ResponseDataLength); - bio_for_each_segment(bvec, rsp->bio, i) { - if (bytes_to_copy <= bvec->bv_len) { - memcpy(page_address(bvec->bv_page) + - bvec->bv_offset, pci_addr_in + + bio_for_each_segment(bvec, rsp->bio, iter) { + if (bytes_to_copy <= bvec.bv_len) { + memcpy(page_address(bvec.bv_page) + + bvec.bv_offset, pci_addr_in + offset, bytes_to_copy); break; } else { - memcpy(page_address(bvec->bv_page) + - bvec->bv_offset, pci_addr_in + - offset, bvec->bv_len); - bytes_to_copy -= bvec->bv_len; + memcpy(page_address(bvec.bv_page) + + bvec.bv_offset, pci_addr_in + + offset, bvec.bv_len); + bytes_to_copy -= bvec.bv_len; } - offset += bvec->bv_len; + offset += bvec.bv_len; } } } else { diff --git a/drivers/staging/lustre/lustre/llite/lloop.c b/drivers/staging/lustre/lustre/llite/lloop.c index 53741be754b4..581ff78be1a2 100644 --- a/drivers/staging/lustre/lustre/llite/lloop.c +++ b/drivers/staging/lustre/lustre/llite/lloop.c @@ -194,10 +194,10 @@ static int do_bio_lustrebacked(struct lloop_device *lo, struct bio *head) struct cl_object *obj = ll_i2info(inode)->lli_clob; pgoff_t offset; int ret; - int i; int rw; obd_count page_count = 0; - struct bio_vec *bvec; + struct bio_vec bvec; + struct bvec_iter iter; struct bio *bio; ssize_t bytes; @@ -221,14 +221,14 @@ static int do_bio_lustrebacked(struct lloop_device *lo, struct bio *head) LASSERT(rw == bio->bi_rw); offset = (pgoff_t)(bio->bi_iter.bi_sector << 9) + lo->lo_offset; - bio_for_each_segment(bvec, bio, i) { - BUG_ON(bvec->bv_offset != 0); - BUG_ON(bvec->bv_len != PAGE_CACHE_SIZE); + bio_for_each_segment(bvec, bio, iter) { + BUG_ON(bvec.bv_offset != 0); + BUG_ON(bvec.bv_len != PAGE_CACHE_SIZE); - pages[page_count] = bvec->bv_page; + pages[page_count] = bvec.bv_page; offsets[page_count] = offset; page_count++; - offset += bvec->bv_len; + offset += bvec.bv_len; } LASSERT(page_count <= LLOOP_MAX_SEGMENTS); } diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c index e9e6f984092b..6f988382b174 100644 --- a/drivers/staging/zram/zram_drv.c +++ b/drivers/staging/zram/zram_drv.c @@ -672,9 +672,10 @@ static ssize_t reset_store(struct device *dev, static void __zram_make_request(struct zram *zram, struct bio *bio, int rw) { - int i, offset; + int offset; u32 index; - struct bio_vec *bvec; + struct bio_vec bvec; + struct bvec_iter iter; switch (rw) { case READ: @@ -689,33 +690,33 @@ static void __zram_make_request(struct zram *zram, struct bio *bio, int rw) offset = (bio->bi_iter.bi_sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT; - bio_for_each_segment(bvec, bio, i) { + bio_for_each_segment(bvec, bio, iter) { int max_transfer_size = PAGE_SIZE - offset; - if (bvec->bv_len > max_transfer_size) { + if (bvec.bv_len > max_transfer_size) { /* * zram_bvec_rw() can only make operation on a single * zram page. Split the bio vector. */ struct bio_vec bv; - bv.bv_page = bvec->bv_page; + bv.bv_page = bvec.bv_page; bv.bv_len = max_transfer_size; - bv.bv_offset = bvec->bv_offset; + bv.bv_offset = bvec.bv_offset; if (zram_bvec_rw(zram, &bv, index, offset, bio, rw) < 0) goto out; - bv.bv_len = bvec->bv_len - max_transfer_size; + bv.bv_len = bvec.bv_len - max_transfer_size; bv.bv_offset += max_transfer_size; if (zram_bvec_rw(zram, &bv, index+1, 0, bio, rw) < 0) goto out; } else - if (zram_bvec_rw(zram, bvec, index, offset, bio, rw) + if (zram_bvec_rw(zram, &bvec, index, offset, bio, rw) < 0) goto out; - update_position(&index, &offset, bvec); + update_position(&index, &offset, &bvec); } set_bit(BIO_UPTODATE, &bio->bi_flags); diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index 08e3d1388c65..9127db86f315 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c @@ -299,25 +299,26 @@ static void bio_integrity_generate(struct bio *bio) { struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); struct blk_integrity_exchg bix; - struct bio_vec *bv; + struct bio_vec bv; + struct bvec_iter iter; sector_t sector = bio->bi_iter.bi_sector; - unsigned int i, sectors, total; + unsigned int sectors, total; void *prot_buf = bio->bi_integrity->bip_buf; total = 0; bix.disk_name = bio->bi_bdev->bd_disk->disk_name; bix.sector_size = bi->sector_size; - bio_for_each_segment(bv, bio, i) { - void *kaddr = kmap_atomic(bv->bv_page); - bix.data_buf = kaddr + bv->bv_offset; - bix.data_size = bv->bv_len; + bio_for_each_segment(bv, bio, iter) { + void *kaddr = kmap_atomic(bv.bv_page); + bix.data_buf = kaddr + bv.bv_offset; + bix.data_size = bv.bv_len; bix.prot_buf = prot_buf; bix.sector = sector; bi->generate_fn(&bix); - sectors = bv->bv_len / bi->sector_size; + sectors = bv.bv_len / bi->sector_size; sector += sectors; prot_buf += sectors * bi->tuple_size; total += sectors * bi->tuple_size; @@ -441,19 +442,20 @@ static int bio_integrity_verify(struct bio *bio) { struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); struct blk_integrity_exchg bix; - struct bio_vec *bv; + struct bio_vec bv; + struct bvec_iter iter; sector_t sector = bio->bi_integrity->bip_sector; - unsigned int i, sectors, total, ret; + unsigned int sectors, total, ret; void *prot_buf = bio->bi_integrity->bip_buf; ret = total = 0; bix.disk_name = bio->bi_bdev->bd_disk->disk_name; bix.sector_size = bi->sector_size; - bio_for_each_segment(bv, bio, i) { - void *kaddr = kmap_atomic(bv->bv_page); - bix.data_buf = kaddr + bv->bv_offset; - bix.data_size = bv->bv_len; + bio_for_each_segment(bv, bio, iter) { + void *kaddr = kmap_atomic(bv.bv_page); + bix.data_buf = kaddr + bv.bv_offset; + bix.data_size = bv.bv_len; bix.prot_buf = prot_buf; bix.sector = sector; @@ -464,7 +466,7 @@ static int bio_integrity_verify(struct bio *bio) return ret; } - sectors = bv->bv_len / bi->sector_size; + sectors = bv.bv_len / bi->sector_size; sector += sectors; prot_buf += sectors * bi->tuple_size; total += sectors * bi->tuple_size; diff --git a/fs/bio.c b/fs/bio.c index 7bb281fc3d5c..8b7f14a95503 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -473,13 +473,13 @@ EXPORT_SYMBOL(bio_alloc_bioset); void zero_fill_bio(struct bio *bio) { unsigned long flags; - struct bio_vec *bv; - int i; + struct bio_vec bv; + struct bvec_iter iter; - bio_for_each_segment(bv, bio, i) { - char *data = bvec_kmap_irq(bv, &flags); - memset(data, 0, bv->bv_len); - flush_dcache_page(bv->bv_page); + bio_for_each_segment(bv, bio, iter) { + char *data = bvec_kmap_irq(&bv, &flags); + memset(data, 0, bv.bv_len); + flush_dcache_page(bv.bv_page); bvec_kunmap_irq(data, &flags); } } @@ -1687,11 +1687,11 @@ void bio_check_pages_dirty(struct bio *bio) #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE void bio_flush_dcache_pages(struct bio *bi) { - int i; - struct bio_vec *bvec; + struct bio_vec bvec; + struct bvec_iter iter; - bio_for_each_segment(bvec, bi, i) - flush_dcache_page(bvec->bv_page); + bio_for_each_segment(bvec, bi, iter) + flush_dcache_page(bvec.bv_page); } EXPORT_SYMBOL(bio_flush_dcache_pages); #endif @@ -1840,7 +1840,7 @@ void bio_trim(struct bio *bio, int offset, int size) bio->bi_iter.bi_idx = 0; } /* Make sure vcnt and last bv are not too big */ - bio_for_each_segment(bvec, bio, i) { + bio_for_each_segment_all(bvec, bio, i) { if (sofar + bvec->bv_len > size) bvec->bv_len = size - sofar; if (bvec->bv_len == 0) { diff --git a/include/linux/bio.h b/include/linux/bio.h index 9f182fcbe714..c16adb5f69f8 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -63,10 +63,13 @@ */ #define bio_iovec_idx(bio, idx) (&((bio)->bi_io_vec[(idx)])) #define __bio_iovec(bio) bio_iovec_idx((bio), (bio)->bi_iter.bi_idx) -#define bio_iovec(bio) (*__bio_iovec(bio)) + +#define bio_iter_iovec(bio, iter) ((bio)->bi_io_vec[(iter).bi_idx]) #define bio_page(bio) (bio_iovec((bio)).bv_page) #define bio_offset(bio) (bio_iovec((bio)).bv_offset) +#define bio_iovec(bio) (*__bio_iovec(bio)) + #define bio_segments(bio) ((bio)->bi_vcnt - (bio)->bi_iter.bi_idx) #define bio_sectors(bio) ((bio)->bi_iter.bi_size >> 9) #define bio_end_sector(bio) ((bio)->bi_iter.bi_sector + bio_sectors((bio))) @@ -133,15 +136,6 @@ static inline void *bio_data(struct bio *bio) #define bio_io_error(bio) bio_endio((bio), -EIO) -/* - * drivers should not use the __ version unless they _really_ know what - * they're doing - */ -#define __bio_for_each_segment(bvl, bio, i, start_idx) \ - for (bvl = bio_iovec_idx((bio), (start_idx)), i = (start_idx); \ - i < (bio)->bi_vcnt; \ - bvl++, i++) - /* * drivers should _never_ use the all version - the bio may have been split * before it got to the driver and the driver won't own all of it @@ -151,10 +145,16 @@ static inline void *bio_data(struct bio *bio) bvl = bio_iovec_idx((bio), (i)), i < (bio)->bi_vcnt; \ i++) -#define bio_for_each_segment(bvl, bio, i) \ - for (i = (bio)->bi_iter.bi_idx; \ - bvl = bio_iovec_idx((bio), (i)), i < (bio)->bi_vcnt; \ - i++) +#define __bio_for_each_segment(bvl, bio, iter, start) \ + for (iter = (start); \ + bvl = bio_iter_iovec((bio), (iter)), \ + (iter).bi_idx < (bio)->bi_vcnt; \ + (iter).bi_idx++) + +#define bio_for_each_segment(bvl, bio, iter) \ + __bio_for_each_segment(bvl, bio, iter, (bio)->bi_iter) + +#define bio_iter_last(bio, iter) ((iter).bi_idx == (bio)->bi_vcnt - 1) /* * get a reference to a bio, so it won't disappear. the intended use is diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1b135d49b279..337b92a54658 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -735,7 +735,7 @@ struct rq_map_data { }; struct req_iterator { - int i; + struct bvec_iter iter; struct bio *bio; }; @@ -748,10 +748,11 @@ struct req_iterator { #define rq_for_each_segment(bvl, _rq, _iter) \ __rq_for_each_bio(_iter.bio, _rq) \ - bio_for_each_segment(bvl, _iter.bio, _iter.i) + bio_for_each_segment(bvl, _iter.bio, _iter.iter) #define rq_iter_last(rq, _iter) \ - (_iter.bio->bi_next == NULL && _iter.i == _iter.bio->bi_vcnt-1) + (_iter.bio->bi_next == NULL && \ + bio_iter_last(_iter.bio, _iter.iter)) #ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE # error "You should define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE for your platform" diff --git a/mm/bounce.c b/mm/bounce.c index 5a7d58fb883b..523918b8c6dc 100644 --- a/mm/bounce.c +++ b/mm/bounce.c @@ -98,27 +98,24 @@ int init_emergency_isa_pool(void) static void copy_to_high_bio_irq(struct bio *to, struct bio *from) { unsigned char *vfrom; - struct bio_vec *tovec, *fromvec; - int i; - - bio_for_each_segment(tovec, to, i) { - fromvec = from->bi_io_vec + i; - - /* - * not bounced - */ - if (tovec->bv_page == fromvec->bv_page) - continue; - - /* - * fromvec->bv_offset and fromvec->bv_len might have been - * modified by the block layer, so use the original copy, - * bounce_copy_vec already uses tovec->bv_len - */ - vfrom = page_address(fromvec->bv_page) + tovec->bv_offset; + struct bio_vec tovec, *fromvec = from->bi_io_vec; + struct bvec_iter iter; + + bio_for_each_segment(tovec, to, iter) { + if (tovec.bv_page != fromvec->bv_page) { + /* + * fromvec->bv_offset and fromvec->bv_len might have + * been modified by the block layer, so use the original + * copy, bounce_copy_vec already uses tovec->bv_len + */ + vfrom = page_address(fromvec->bv_page) + + tovec.bv_offset; + + bounce_copy_vec(&tovec, vfrom); + flush_dcache_page(tovec.bv_page); + } - bounce_copy_vec(tovec, vfrom); - flush_dcache_page(tovec->bv_page); + fromvec++; } } @@ -201,13 +198,14 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig, { struct bio *bio; int rw = bio_data_dir(*bio_orig); - struct bio_vec *to, *from; + struct bio_vec *to, from; + struct bvec_iter iter; unsigned i; if (force) goto bounce; - bio_for_each_segment(from, *bio_orig, i) - if (page_to_pfn(from->bv_page) > queue_bounce_pfn(q)) + bio_for_each_segment(from, *bio_orig, iter) + if (page_to_pfn(from.bv_page) > queue_bounce_pfn(q)) goto bounce; return; -- cgit v1.2.3 From 4550dd6c6b062fc5e5b647296d55da22616123c3 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 7 Aug 2013 14:26:21 -0700 Subject: block: Immutable bio vecs This adds a mechanism by which we can advance a bio by an arbitrary number of bytes without modifying the biovec: bio->bi_iter.bi_bvec_done indicates the number of bytes completed in the current bvec. Various driver code still needs to be updated to not refer to the bvec directly before we can use this for interesting things, like efficient bio splitting. Signed-off-by: Kent Overstreet Cc: Jens Axboe Cc: Lars Ellenberg Cc: Paul Clements Cc: drbd-user@lists.linbit.com Cc: nbd-general@lists.sourceforge.net --- Documentation/block/biovecs.txt | 111 ++++++++++++++++++++++++++++++++++++++++ drivers/block/drbd/drbd_main.c | 4 +- drivers/block/nbd.c | 2 +- fs/bio.c | 27 +--------- include/linux/bio.h | 81 ++++++++++++++++++++++++++--- include/linux/blk_types.h | 3 ++ include/linux/blkdev.h | 4 +- 7 files changed, 194 insertions(+), 38 deletions(-) create mode 100644 Documentation/block/biovecs.txt (limited to 'include') diff --git a/Documentation/block/biovecs.txt b/Documentation/block/biovecs.txt new file mode 100644 index 000000000000..74a32ad52f53 --- /dev/null +++ b/Documentation/block/biovecs.txt @@ -0,0 +1,111 @@ + +Immutable biovecs and biovec iterators: +======================================= + +Kent Overstreet + +As of 3.13, biovecs should never be modified after a bio has been submitted. +Instead, we have a new struct bvec_iter which represents a range of a biovec - +the iterator will be modified as the bio is completed, not the biovec. + +More specifically, old code that needed to partially complete a bio would +update bi_sector and bi_size, and advance bi_idx to the next biovec. If it +ended up partway through a biovec, it would increment bv_offset and decrement +bv_len by the number of bytes completed in that biovec. + +In the new scheme of things, everything that must be mutated in order to +partially complete a bio is segregated into struct bvec_iter: bi_sector, +bi_size and bi_idx have been moved there; and instead of modifying bv_offset +and bv_len, struct bvec_iter has bi_bvec_done, which represents the number of +bytes completed in the current bvec. + +There are a bunch of new helper macros for hiding the gory details - in +particular, presenting the illusion of partially completed biovecs so that +normal code doesn't have to deal with bi_bvec_done. + + * Driver code should no longer refer to biovecs directly; we now have + bio_iovec() and bio_iovec_iter() macros that return literal struct biovecs, + constructed from the raw biovecs but taking into account bi_bvec_done and + bi_size. + + bio_for_each_segment() has been updated to take a bvec_iter argument + instead of an integer (that corresponded to bi_idx); for a lot of code the + conversion just required changing the types of the arguments to + bio_for_each_segment(). + + * Advancing a bvec_iter is done with bio_advance_iter(); bio_advance() is a + wrapper around bio_advance_iter() that operates on bio->bi_iter, and also + advances the bio integrity's iter if present. + + There is a lower level advance function - bvec_iter_advance() - which takes + a pointer to a biovec, not a bio; this is used by the bio integrity code. + +What's all this get us? +======================= + +Having a real iterator, and making biovecs immutable, has a number of +advantages: + + * Before, iterating over bios was very awkward when you weren't processing + exactly one bvec at a time - for example, bio_copy_data() in fs/bio.c, + which copies the contents of one bio into another. Because the biovecs + wouldn't necessarily be the same size, the old code was tricky convoluted - + it had to walk two different bios at the same time, keeping both bi_idx and + and offset into the current biovec for each. + + The new code is much more straightforward - have a look. This sort of + pattern comes up in a lot of places; a lot of drivers were essentially open + coding bvec iterators before, and having common implementation considerably + simplifies a lot of code. + + * Before, any code that might need to use the biovec after the bio had been + completed (perhaps to copy the data somewhere else, or perhaps to resubmit + it somewhere else if there was an error) had to save the entire bvec array + - again, this was being done in a fair number of places. + + * Biovecs can be shared between multiple bios - a bvec iter can represent an + arbitrary range of an existing biovec, both starting and ending midway + through biovecs. This is what enables efficient splitting of arbitrary + bios. Note that this means we _only_ use bi_size to determine when we've + reached the end of a bio, not bi_vcnt - and the bio_iovec() macro takes + bi_size into account when constructing biovecs. + + * Splitting bios is now much simpler. The old bio_split() didn't even work on + bios with more than a single bvec! Now, we can efficiently split arbitrary + size bios - because the new bio can share the old bio's biovec. + + Care must be taken to ensure the biovec isn't freed while the split bio is + still using it, in case the original bio completes first, though. Using + bio_chain() when splitting bios helps with this. + + * Submitting partially completed bios is now perfectly fine - this comes up + occasionally in stacking block drivers and various code (e.g. md and + bcache) had some ugly workarounds for this. + + It used to be the case that submitting a partially completed bio would work + fine to _most_ devices, but since accessing the raw bvec array was the + norm, not all drivers would respect bi_idx and those would break. Now, + since all drivers _must_ go through the bvec iterator - and have been + audited to make sure they are - submitting partially completed bios is + perfectly fine. + +Other implications: +=================== + + * Almost all usage of bi_idx is now incorrect and has been removed; instead, + where previously you would have used bi_idx you'd now use a bvec_iter, + probably passing it to one of the helper macros. + + I.e. instead of using bio_iovec_idx() (or bio->bi_iovec[bio->bi_idx]), you + now use bio_iter_iovec(), which takes a bvec_iter and returns a + literal struct bio_vec - constructed on the fly from the raw biovec but + taking into account bi_bvec_done (and bi_size). + + * bi_vcnt can't be trusted or relied upon by driver code - i.e. anything that + doesn't actually own the bio. The reason is twofold: firstly, it's not + actually needed for iterating over the bio anymore - we only use bi_size. + Secondly, when cloning a bio and reusing (a portion of) the original bio's + biovec, in order to calculate bi_vcnt for the new bio we'd have to iterate + over all the biovecs in the new bio - which is silly as it's not needed. + + So, don't use bi_vcnt anymore. diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index f4e5440aba05..929468e1512a 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1546,7 +1546,7 @@ static int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio) err = _drbd_no_send_page(mdev, bvec.bv_page, bvec.bv_offset, bvec.bv_len, - bio_iter_last(bio, iter) + bio_iter_last(bvec, iter) ? 0 : MSG_MORE); if (err) return err; @@ -1565,7 +1565,7 @@ static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio) err = _drbd_send_page(mdev, bvec.bv_page, bvec.bv_offset, bvec.bv_len, - bio_iter_last(bio, iter) ? 0 : MSG_MORE); + bio_iter_last(bvec, iter) ? 0 : MSG_MORE); if (err) return err; } diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index aa362f493216..55298db36b2d 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -278,7 +278,7 @@ static int nbd_send_req(struct nbd_device *nbd, struct request *req) */ rq_for_each_segment(bvec, req, iter) { flags = 0; - if (!rq_iter_last(req, iter)) + if (!rq_iter_last(bvec, iter)) flags = MSG_MORE; dprintk(DBG_TX, "%s: request %p: sending %d bytes data\n", nbd->disk->disk_name, req, bvec.bv_len); diff --git a/fs/bio.c b/fs/bio.c index 8b7f14a95503..07b4b7afa695 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -532,13 +532,11 @@ void __bio_clone(struct bio *bio, struct bio *bio_src) * most users will be overriding ->bi_bdev with a new target, * so we don't set nor calculate new physical/hw segment counts here */ - bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector; bio->bi_bdev = bio_src->bi_bdev; bio->bi_flags |= 1 << BIO_CLONED; bio->bi_rw = bio_src->bi_rw; bio->bi_vcnt = bio_src->bi_vcnt; - bio->bi_iter.bi_size = bio_src->bi_iter.bi_size; - bio->bi_iter.bi_idx = bio_src->bi_iter.bi_idx; + bio->bi_iter = bio_src->bi_iter; } EXPORT_SYMBOL(__bio_clone); @@ -808,28 +806,7 @@ void bio_advance(struct bio *bio, unsigned bytes) if (bio_integrity(bio)) bio_integrity_advance(bio, bytes); - bio->bi_iter.bi_sector += bytes >> 9; - bio->bi_iter.bi_size -= bytes; - - if (bio->bi_rw & BIO_NO_ADVANCE_ITER_MASK) - return; - - while (bytes) { - if (unlikely(bio->bi_iter.bi_idx >= bio->bi_vcnt)) { - WARN_ONCE(1, "bio idx %d >= vcnt %d\n", - bio->bi_iter.bi_idx, bio->bi_vcnt); - break; - } - - if (bytes >= bio_iovec(bio).bv_len) { - bytes -= bio_iovec(bio).bv_len; - bio->bi_iter.bi_idx++; - } else { - bio_iovec(bio).bv_len -= bytes; - bio_iovec(bio).bv_offset += bytes; - bytes = 0; - } - } + bio_advance_iter(bio, &bio->bi_iter, bytes); } EXPORT_SYMBOL(bio_advance); diff --git a/include/linux/bio.h b/include/linux/bio.h index c16adb5f69f8..04e592e74c92 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -64,11 +64,38 @@ #define bio_iovec_idx(bio, idx) (&((bio)->bi_io_vec[(idx)])) #define __bio_iovec(bio) bio_iovec_idx((bio), (bio)->bi_iter.bi_idx) -#define bio_iter_iovec(bio, iter) ((bio)->bi_io_vec[(iter).bi_idx]) +#define __bvec_iter_bvec(bvec, iter) (&(bvec)[(iter).bi_idx]) -#define bio_page(bio) (bio_iovec((bio)).bv_page) -#define bio_offset(bio) (bio_iovec((bio)).bv_offset) -#define bio_iovec(bio) (*__bio_iovec(bio)) +#define bvec_iter_page(bvec, iter) \ + (__bvec_iter_bvec((bvec), (iter))->bv_page) + +#define bvec_iter_len(bvec, iter) \ + min((iter).bi_size, \ + __bvec_iter_bvec((bvec), (iter))->bv_len - (iter).bi_bvec_done) + +#define bvec_iter_offset(bvec, iter) \ + (__bvec_iter_bvec((bvec), (iter))->bv_offset + (iter).bi_bvec_done) + +#define bvec_iter_bvec(bvec, iter) \ +((struct bio_vec) { \ + .bv_page = bvec_iter_page((bvec), (iter)), \ + .bv_len = bvec_iter_len((bvec), (iter)), \ + .bv_offset = bvec_iter_offset((bvec), (iter)), \ +}) + +#define bio_iter_iovec(bio, iter) \ + bvec_iter_bvec((bio)->bi_io_vec, (iter)) + +#define bio_iter_page(bio, iter) \ + bvec_iter_page((bio)->bi_io_vec, (iter)) +#define bio_iter_len(bio, iter) \ + bvec_iter_len((bio)->bi_io_vec, (iter)) +#define bio_iter_offset(bio, iter) \ + bvec_iter_offset((bio)->bi_io_vec, (iter)) + +#define bio_page(bio) bio_iter_page((bio), (bio)->bi_iter) +#define bio_offset(bio) bio_iter_offset((bio), (bio)->bi_iter) +#define bio_iovec(bio) bio_iter_iovec((bio), (bio)->bi_iter) #define bio_segments(bio) ((bio)->bi_vcnt - (bio)->bi_iter.bi_idx) #define bio_sectors(bio) ((bio)->bi_iter.bi_size >> 9) @@ -145,16 +172,54 @@ static inline void *bio_data(struct bio *bio) bvl = bio_iovec_idx((bio), (i)), i < (bio)->bi_vcnt; \ i++) +static inline void bvec_iter_advance(struct bio_vec *bv, struct bvec_iter *iter, + unsigned bytes) +{ + WARN_ONCE(bytes > iter->bi_size, + "Attempted to advance past end of bvec iter\n"); + + while (bytes) { + unsigned len = min(bytes, bvec_iter_len(bv, *iter)); + + bytes -= len; + iter->bi_size -= len; + iter->bi_bvec_done += len; + + if (iter->bi_bvec_done == __bvec_iter_bvec(bv, *iter)->bv_len) { + iter->bi_bvec_done = 0; + iter->bi_idx++; + } + } +} + +#define for_each_bvec(bvl, bio_vec, iter, start) \ + for ((iter) = start; \ + (bvl) = bvec_iter_bvec((bio_vec), (iter)), \ + (iter).bi_size; \ + bvec_iter_advance((bio_vec), &(iter), (bvl).bv_len)) + + +static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter, + unsigned bytes) +{ + iter->bi_sector += bytes >> 9; + + if (bio->bi_rw & BIO_NO_ADVANCE_ITER_MASK) + iter->bi_size -= bytes; + else + bvec_iter_advance(bio->bi_io_vec, iter, bytes); +} + #define __bio_for_each_segment(bvl, bio, iter, start) \ for (iter = (start); \ - bvl = bio_iter_iovec((bio), (iter)), \ - (iter).bi_idx < (bio)->bi_vcnt; \ - (iter).bi_idx++) + (iter).bi_size && \ + ((bvl = bio_iter_iovec((bio), (iter))), 1); \ + bio_advance_iter((bio), &(iter), (bvl).bv_len)) #define bio_for_each_segment(bvl, bio, iter) \ __bio_for_each_segment(bvl, bio, iter, (bio)->bi_iter) -#define bio_iter_last(bio, iter) ((iter).bi_idx == (bio)->bi_vcnt - 1) +#define bio_iter_last(bvec, iter) ((iter).bi_size == (bvec).bv_len) /* * get a reference to a bio, so it won't disappear. the intended use is diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 29b5b84d8a29..d369f8f6af79 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -34,6 +34,9 @@ struct bvec_iter { unsigned int bi_size; /* residual I/O count */ unsigned int bi_idx; /* current index into bvl_vec */ + + unsigned int bi_bvec_done; /* number of bytes completed in + current bvec */ }; /* diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 337b92a54658..02cb6f0ea71d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -750,9 +750,9 @@ struct req_iterator { __rq_for_each_bio(_iter.bio, _rq) \ bio_for_each_segment(bvl, _iter.bio, _iter.iter) -#define rq_iter_last(rq, _iter) \ +#define rq_iter_last(bvec, _iter) \ (_iter.bio->bi_next == NULL && \ - bio_iter_last(_iter.bio, _iter.iter)) + bio_iter_last(bvec, _iter.iter)) #ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE # error "You should define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE for your platform" -- cgit v1.2.3 From d57a5f7c6605f15f3b5134837e68b448a7cea88e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 23 Nov 2013 17:20:16 -0800 Subject: bio-integrity: Convert to bvec_iter The bio integrity is also stored in a bvec array, so if we use the bvec iter code we just added, the integrity code won't need to implement its own iteration stuff (bio_integrity_mark_head(), bio_integrity_mark_tail()) Signed-off-by: Kent Overstreet Cc: Jens Axboe Cc: "Martin K. Petersen" Cc: "James E.J. Bottomley" --- block/blk-integrity.c | 40 ++++++++++--------- drivers/scsi/sd_dif.c | 30 +++++++------- fs/bio-integrity.c | 108 ++++++++++++-------------------------------------- include/linux/bio.h | 19 ++++----- 4 files changed, 71 insertions(+), 126 deletions(-) (limited to 'include') diff --git a/block/blk-integrity.c b/block/blk-integrity.c index 03cf7179e8ef..7fbab84399e6 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -43,30 +43,32 @@ static const char *bi_unsupported_name = "unsupported"; */ int blk_rq_count_integrity_sg(struct request_queue *q, struct bio *bio) { - struct bio_vec *iv, *ivprv = NULL; + struct bio_vec iv, ivprv = { NULL }; unsigned int segments = 0; unsigned int seg_size = 0; - unsigned int i = 0; + struct bvec_iter iter; + int prev = 0; - bio_for_each_integrity_vec(iv, bio, i) { + bio_for_each_integrity_vec(iv, bio, iter) { - if (ivprv) { - if (!BIOVEC_PHYS_MERGEABLE(ivprv, iv)) + if (prev) { + if (!BIOVEC_PHYS_MERGEABLE(&ivprv, &iv)) goto new_segment; - if (!BIOVEC_SEG_BOUNDARY(q, ivprv, iv)) + if (!BIOVEC_SEG_BOUNDARY(q, &ivprv, &iv)) goto new_segment; - if (seg_size + iv->bv_len > queue_max_segment_size(q)) + if (seg_size + iv.bv_len > queue_max_segment_size(q)) goto new_segment; - seg_size += iv->bv_len; + seg_size += iv.bv_len; } else { new_segment: segments++; - seg_size = iv->bv_len; + seg_size = iv.bv_len; } + prev = 1; ivprv = iv; } @@ -87,24 +89,25 @@ EXPORT_SYMBOL(blk_rq_count_integrity_sg); int blk_rq_map_integrity_sg(struct request_queue *q, struct bio *bio, struct scatterlist *sglist) { - struct bio_vec *iv, *ivprv = NULL; + struct bio_vec iv, ivprv = { NULL }; struct scatterlist *sg = NULL; unsigned int segments = 0; - unsigned int i = 0; + struct bvec_iter iter; + int prev = 0; - bio_for_each_integrity_vec(iv, bio, i) { + bio_for_each_integrity_vec(iv, bio, iter) { - if (ivprv) { - if (!BIOVEC_PHYS_MERGEABLE(ivprv, iv)) + if (prev) { + if (!BIOVEC_PHYS_MERGEABLE(&ivprv, &iv)) goto new_segment; - if (!BIOVEC_SEG_BOUNDARY(q, ivprv, iv)) + if (!BIOVEC_SEG_BOUNDARY(q, &ivprv, &iv)) goto new_segment; - if (sg->length + iv->bv_len > queue_max_segment_size(q)) + if (sg->length + iv.bv_len > queue_max_segment_size(q)) goto new_segment; - sg->length += iv->bv_len; + sg->length += iv.bv_len; } else { new_segment: if (!sg) @@ -114,10 +117,11 @@ new_segment: sg = sg_next(sg); } - sg_set_page(sg, iv->bv_page, iv->bv_len, iv->bv_offset); + sg_set_page(sg, iv.bv_page, iv.bv_len, iv.bv_offset); segments++; } + prev = 1; ivprv = iv; } diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c index 6174ca4ea275..a7a691d0af7d 100644 --- a/drivers/scsi/sd_dif.c +++ b/drivers/scsi/sd_dif.c @@ -365,7 +365,6 @@ void sd_dif_prepare(struct request *rq, sector_t hw_sector, struct bio *bio; struct scsi_disk *sdkp; struct sd_dif_tuple *sdt; - unsigned int i, j; u32 phys, virt; sdkp = rq->bio->bi_bdev->bd_disk->private_data; @@ -376,19 +375,21 @@ void sd_dif_prepare(struct request *rq, sector_t hw_sector, phys = hw_sector & 0xffffffff; __rq_for_each_bio(bio, rq) { - struct bio_vec *iv; + struct bio_vec iv; + struct bvec_iter iter; + unsigned int j; /* Already remapped? */ if (bio_flagged(bio, BIO_MAPPED_INTEGRITY)) break; - virt = bio->bi_integrity->bip_sector & 0xffffffff; + virt = bio->bi_integrity->bip_iter.bi_sector & 0xffffffff; - bip_for_each_vec(iv, bio->bi_integrity, i) { - sdt = kmap_atomic(iv->bv_page) - + iv->bv_offset; + bip_for_each_vec(iv, bio->bi_integrity, iter) { + sdt = kmap_atomic(iv.bv_page) + + iv.bv_offset; - for (j = 0 ; j < iv->bv_len ; j += tuple_sz, sdt++) { + for (j = 0; j < iv.bv_len; j += tuple_sz, sdt++) { if (be32_to_cpu(sdt->ref_tag) == virt) sdt->ref_tag = cpu_to_be32(phys); @@ -414,7 +415,7 @@ void sd_dif_complete(struct scsi_cmnd *scmd, unsigned int good_bytes) struct scsi_disk *sdkp; struct bio *bio; struct sd_dif_tuple *sdt; - unsigned int i, j, sectors, sector_sz; + unsigned int j, sectors, sector_sz; u32 phys, virt; sdkp = scsi_disk(scmd->request->rq_disk); @@ -430,15 +431,16 @@ void sd_dif_complete(struct scsi_cmnd *scmd, unsigned int good_bytes) phys >>= 3; __rq_for_each_bio(bio, scmd->request) { - struct bio_vec *iv; + struct bio_vec iv; + struct bvec_iter iter; - virt = bio->bi_integrity->bip_sector & 0xffffffff; + virt = bio->bi_integrity->bip_iter.bi_sector & 0xffffffff; - bip_for_each_vec(iv, bio->bi_integrity, i) { - sdt = kmap_atomic(iv->bv_page) - + iv->bv_offset; + bip_for_each_vec(iv, bio->bi_integrity, iter) { + sdt = kmap_atomic(iv.bv_page) + + iv.bv_offset; - for (j = 0 ; j < iv->bv_len ; j += tuple_sz, sdt++) { + for (j = 0; j < iv.bv_len; j += tuple_sz, sdt++) { if (sectors == 0) { kunmap_atomic(sdt); diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index 9127db86f315..fed744b8c9e5 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c @@ -134,8 +134,7 @@ int bio_integrity_add_page(struct bio *bio, struct page *page, return 0; } - iv = bip_vec_idx(bip, bip->bip_vcnt); - BUG_ON(iv == NULL); + iv = bip->bip_vec + bip->bip_vcnt; iv->bv_page = page; iv->bv_len = len; @@ -203,6 +202,12 @@ static inline unsigned int bio_integrity_hw_sectors(struct blk_integrity *bi, return sectors; } +static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi, + unsigned int sectors) +{ + return bio_integrity_hw_sectors(bi, sectors) * bi->tuple_size; +} + /** * bio_integrity_tag_size - Retrieve integrity tag space * @bio: bio to inspect @@ -235,9 +240,9 @@ int bio_integrity_tag(struct bio *bio, void *tag_buf, unsigned int len, int set) nr_sectors = bio_integrity_hw_sectors(bi, DIV_ROUND_UP(len, bi->tag_size)); - if (nr_sectors * bi->tuple_size > bip->bip_size) { - printk(KERN_ERR "%s: tag too big for bio: %u > %u\n", - __func__, nr_sectors * bi->tuple_size, bip->bip_size); + if (nr_sectors * bi->tuple_size > bip->bip_iter.bi_size) { + printk(KERN_ERR "%s: tag too big for bio: %u > %u\n", __func__, + nr_sectors * bi->tuple_size, bip->bip_iter.bi_size); return -1; } @@ -322,7 +327,7 @@ static void bio_integrity_generate(struct bio *bio) sector += sectors; prot_buf += sectors * bi->tuple_size; total += sectors * bi->tuple_size; - BUG_ON(total > bio->bi_integrity->bip_size); + BUG_ON(total > bio->bi_integrity->bip_iter.bi_size); kunmap_atomic(kaddr); } @@ -387,8 +392,8 @@ int bio_integrity_prep(struct bio *bio) bip->bip_owns_buf = 1; bip->bip_buf = buf; - bip->bip_size = len; - bip->bip_sector = bio->bi_iter.bi_sector; + bip->bip_iter.bi_size = len; + bip->bip_iter.bi_sector = bio->bi_iter.bi_sector; /* Map it */ offset = offset_in_page(buf); @@ -444,7 +449,7 @@ static int bio_integrity_verify(struct bio *bio) struct blk_integrity_exchg bix; struct bio_vec bv; struct bvec_iter iter; - sector_t sector = bio->bi_integrity->bip_sector; + sector_t sector = bio->bi_integrity->bip_iter.bi_sector; unsigned int sectors, total, ret; void *prot_buf = bio->bi_integrity->bip_buf; @@ -470,7 +475,7 @@ static int bio_integrity_verify(struct bio *bio) sector += sectors; prot_buf += sectors * bi->tuple_size; total += sectors * bi->tuple_size; - BUG_ON(total > bio->bi_integrity->bip_size); + BUG_ON(total > bio->bi_integrity->bip_iter.bi_size); kunmap_atomic(kaddr); } @@ -534,56 +539,6 @@ void bio_integrity_endio(struct bio *bio, int error) } EXPORT_SYMBOL(bio_integrity_endio); -/** - * bio_integrity_mark_head - Advance bip_vec skip bytes - * @bip: Integrity vector to advance - * @skip: Number of bytes to advance it - */ -void bio_integrity_mark_head(struct bio_integrity_payload *bip, - unsigned int skip) -{ - struct bio_vec *iv; - unsigned int i; - - bip_for_each_vec(iv, bip, i) { - if (skip == 0) { - bip->bip_idx = i; - return; - } else if (skip >= iv->bv_len) { - skip -= iv->bv_len; - } else { /* skip < iv->bv_len) */ - iv->bv_offset += skip; - iv->bv_len -= skip; - bip->bip_idx = i; - return; - } - } -} - -/** - * bio_integrity_mark_tail - Truncate bip_vec to be len bytes long - * @bip: Integrity vector to truncate - * @len: New length of integrity vector - */ -void bio_integrity_mark_tail(struct bio_integrity_payload *bip, - unsigned int len) -{ - struct bio_vec *iv; - unsigned int i; - - bip_for_each_vec(iv, bip, i) { - if (len == 0) { - bip->bip_vcnt = i; - return; - } else if (len >= iv->bv_len) { - len -= iv->bv_len; - } else { /* len < iv->bv_len) */ - iv->bv_len = len; - len = 0; - } - } -} - /** * bio_integrity_advance - Advance integrity vector * @bio: bio whose integrity vector to update @@ -597,13 +552,9 @@ void bio_integrity_advance(struct bio *bio, unsigned int bytes_done) { struct bio_integrity_payload *bip = bio->bi_integrity; struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); - unsigned int nr_sectors; + unsigned bytes = bio_integrity_bytes(bi, bytes_done >> 9); - BUG_ON(bip == NULL); - BUG_ON(bi == NULL); - - nr_sectors = bio_integrity_hw_sectors(bi, bytes_done >> 9); - bio_integrity_mark_head(bip, nr_sectors * bi->tuple_size); + bvec_iter_advance(bip->bip_vec, &bip->bip_iter, bytes); } EXPORT_SYMBOL(bio_integrity_advance); @@ -623,16 +574,9 @@ void bio_integrity_trim(struct bio *bio, unsigned int offset, { struct bio_integrity_payload *bip = bio->bi_integrity; struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); - unsigned int nr_sectors; - BUG_ON(bip == NULL); - BUG_ON(bi == NULL); - BUG_ON(!bio_flagged(bio, BIO_CLONED)); - - nr_sectors = bio_integrity_hw_sectors(bi, sectors); - bip->bip_sector = bip->bip_sector + offset; - bio_integrity_mark_head(bip, offset * bi->tuple_size); - bio_integrity_mark_tail(bip, sectors * bi->tuple_size); + bio_integrity_advance(bio, offset << 9); + bip->bip_iter.bi_size = bio_integrity_bytes(bi, sectors); } EXPORT_SYMBOL(bio_integrity_trim); @@ -662,8 +606,8 @@ void bio_integrity_split(struct bio *bio, struct bio_pair *bp, int sectors) bp->bio1.bi_integrity = &bp->bip1; bp->bio2.bi_integrity = &bp->bip2; - bp->iv1 = bip->bip_vec[bip->bip_idx]; - bp->iv2 = bip->bip_vec[bip->bip_idx]; + bp->iv1 = bip->bip_vec[bip->bip_iter.bi_idx]; + bp->iv2 = bip->bip_vec[bip->bip_iter.bi_idx]; bp->bip1.bip_vec = &bp->iv1; bp->bip2.bip_vec = &bp->iv2; @@ -672,11 +616,12 @@ void bio_integrity_split(struct bio *bio, struct bio_pair *bp, int sectors) bp->iv2.bv_offset += sectors * bi->tuple_size; bp->iv2.bv_len -= sectors * bi->tuple_size; - bp->bip1.bip_sector = bio->bi_integrity->bip_sector; - bp->bip2.bip_sector = bio->bi_integrity->bip_sector + nr_sectors; + bp->bip1.bip_iter.bi_sector = bio->bi_integrity->bip_iter.bi_sector; + bp->bip2.bip_iter.bi_sector = + bio->bi_integrity->bip_iter.bi_sector + nr_sectors; bp->bip1.bip_vcnt = bp->bip2.bip_vcnt = 1; - bp->bip1.bip_idx = bp->bip2.bip_idx = 0; + bp->bip1.bip_iter.bi_idx = bp->bip2.bip_iter.bi_idx = 0; } EXPORT_SYMBOL(bio_integrity_split); @@ -704,9 +649,8 @@ int bio_integrity_clone(struct bio *bio, struct bio *bio_src, memcpy(bip->bip_vec, bip_src->bip_vec, bip_src->bip_vcnt * sizeof(struct bio_vec)); - bip->bip_sector = bip_src->bip_sector; bip->bip_vcnt = bip_src->bip_vcnt; - bip->bip_idx = bip_src->bip_idx; + bip->bip_iter = bip_src->bip_iter; return 0; } diff --git a/include/linux/bio.h b/include/linux/bio.h index 04e592e74c92..930cb73c894b 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -244,16 +244,15 @@ static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter, struct bio_integrity_payload { struct bio *bip_bio; /* parent bio */ - sector_t bip_sector; /* virtual start sector */ + struct bvec_iter bip_iter; + /* kill - should just use bip_vec */ void *bip_buf; /* generated integrity data */ - bio_end_io_t *bip_end_io; /* saved I/O completion fn */ - unsigned int bip_size; + bio_end_io_t *bip_end_io; /* saved I/O completion fn */ unsigned short bip_slab; /* slab the bip came from */ unsigned short bip_vcnt; /* # of integrity bio_vecs */ - unsigned short bip_idx; /* current bip_vec index */ unsigned bip_owns_buf:1; /* should free bip_buf */ struct work_struct bip_work; /* I/O completion */ @@ -626,16 +625,12 @@ struct biovec_slab { #if defined(CONFIG_BLK_DEV_INTEGRITY) -#define bip_vec_idx(bip, idx) (&(bip->bip_vec[(idx)])) -#define bip_vec(bip) bip_vec_idx(bip, 0) -#define __bip_for_each_vec(bvl, bip, i, start_idx) \ - for (bvl = bip_vec_idx((bip), (start_idx)), i = (start_idx); \ - i < (bip)->bip_vcnt; \ - bvl++, i++) -#define bip_for_each_vec(bvl, bip, i) \ - __bip_for_each_vec(bvl, bip, i, (bip)->bip_idx) +#define bip_vec_idx(bip, idx) (&(bip->bip_vec[(idx)])) + +#define bip_for_each_vec(bvl, bip, iter) \ + for_each_bvec(bvl, (bip)->bip_vec, iter, (bip)->bip_iter) #define bio_for_each_integrity_vec(_bvl, _bio, _iter) \ for_each_bio(_bio) \ -- cgit v1.2.3 From 458b76ed2f9517becb74dcc8eedd70d3068ea6e4 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 24 Sep 2013 16:26:05 -0700 Subject: block: Kill bio_segments()/bi_vcnt usage When we start sharing biovecs, keeping bi_vcnt accurate for splits is going to be error prone - and unnecessary, if we refactor some code. So bio_segments() has to go - but most of the existing users just needed to know if the bio had multiple segments, which is easier - add a bio_multiple_segments() for them. (Two of the current uses of bio_segments() are going to go away in a couple patches, but the current implementation of bio_segments() is unsafe as soon as we start doing driver conversions for immutable biovecs - so implement a dumb version for bisectability, it'll go away in a couple patches) Signed-off-by: Kent Overstreet Cc: Jens Axboe Cc: Neil Brown Cc: Nagalakshmi Nandigama Cc: Sreekanth Reddy Cc: "James E.J. Bottomley" --- drivers/block/ps3disk.c | 7 ++- drivers/md/bcache/io.c | 53 +++++++++------------ drivers/md/raid0.c | 2 +- drivers/md/raid10.c | 2 +- drivers/message/fusion/mptsas.c | 8 ++-- drivers/scsi/libsas/sas_expander.c | 8 ++-- drivers/scsi/mpt2sas/mpt2sas_transport.c | 10 ++-- drivers/scsi/mpt3sas/mpt3sas_transport.c | 8 ++-- fs/bio.c | 2 +- include/linux/bio.h | 81 +++++++++++++++++++------------- 10 files changed, 94 insertions(+), 87 deletions(-) (limited to 'include') diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index 1c6edb9a9960..c120d70d3fb3 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c @@ -101,10 +101,9 @@ static void ps3disk_scatter_gather(struct ps3_storage_device *dev, rq_for_each_segment(bvec, req, iter) { unsigned long flags; - dev_dbg(&dev->sbd.core, - "%s:%u: bio %u: %u segs %u sectors from %lu\n", - __func__, __LINE__, i, bio_segments(iter.bio), - bio_sectors(iter.bio), iter.bio->bi_iter.bi_sector); + dev_dbg(&dev->sbd.core, "%s:%u: bio %u: %u sectors from %lu\n", + __func__, __LINE__, i, bio_sectors(iter.bio), + iter.bio->bi_iter.bi_sector); size = bvec.bv_len; buf = bvec_kmap_irq(&bvec, &flags); diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c index 9b5b6a41a9b6..6e04f3bb0286 100644 --- a/drivers/md/bcache/io.c +++ b/drivers/md/bcache/io.c @@ -24,7 +24,8 @@ static void bch_generic_make_request_hack(struct bio *bio) if (bio->bi_iter.bi_idx) { struct bio_vec bv; struct bvec_iter iter; - struct bio *clone = bio_alloc(GFP_NOIO, bio_segments(bio)); + unsigned segs = bio_segments(bio); + struct bio *clone = bio_alloc(GFP_NOIO, segs); bio_for_each_segment(bv, bio, iter) clone->bi_io_vec[clone->bi_vcnt++] = bv; @@ -32,7 +33,7 @@ static void bch_generic_make_request_hack(struct bio *bio) clone->bi_iter.bi_sector = bio->bi_iter.bi_sector; clone->bi_bdev = bio->bi_bdev; clone->bi_rw = bio->bi_rw; - clone->bi_vcnt = bio_segments(bio); + clone->bi_vcnt = segs; clone->bi_iter.bi_size = bio->bi_iter.bi_size; clone->bi_private = bio; @@ -133,40 +134,32 @@ out: static unsigned bch_bio_max_sectors(struct bio *bio) { - unsigned ret = bio_sectors(bio); struct request_queue *q = bdev_get_queue(bio->bi_bdev); - unsigned max_segments = min_t(unsigned, BIO_MAX_PAGES, - queue_max_segments(q)); + struct bio_vec bv; + struct bvec_iter iter; + unsigned ret = 0, seg = 0; if (bio->bi_rw & REQ_DISCARD) - return min(ret, q->limits.max_discard_sectors); - - if (bio_segments(bio) > max_segments || - q->merge_bvec_fn) { - struct bio_vec bv; - struct bvec_iter iter; - unsigned seg = 0; - - ret = 0; + return min(bio_sectors(bio), q->limits.max_discard_sectors); - bio_for_each_segment(bv, bio, iter) { - struct bvec_merge_data bvm = { - .bi_bdev = bio->bi_bdev, - .bi_sector = bio->bi_iter.bi_sector, - .bi_size = ret << 9, - .bi_rw = bio->bi_rw, - }; - - if (seg == max_segments) - break; + bio_for_each_segment(bv, bio, iter) { + struct bvec_merge_data bvm = { + .bi_bdev = bio->bi_bdev, + .bi_sector = bio->bi_iter.bi_sector, + .bi_size = ret << 9, + .bi_rw = bio->bi_rw, + }; + + if (seg == min_t(unsigned, BIO_MAX_PAGES, + queue_max_segments(q))) + break; - if (q->merge_bvec_fn && - q->merge_bvec_fn(q, &bvm, &bv) < (int) bv.bv_len) - break; + if (q->merge_bvec_fn && + q->merge_bvec_fn(q, &bvm, &bv) < (int) bv.bv_len) + break; - seg++; - ret += bv.bv_len >> 9; - } + seg++; + ret += bv.bv_len >> 9; } ret = min(ret, queue_max_sectors(q)); diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index e38d1d3226f3..8ee1a6c658b4 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -528,7 +528,7 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio) sector_t sector = bio->bi_iter.bi_sector; struct bio_pair *bp; /* Sanity check -- queue functions should prevent this happening */ - if (bio_segments(bio) > 1) + if (bio_multiple_segments(bio)) goto bad_map; /* This is a one page bio that upper layers * refuse to split for us, so we need to split it. diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index dbf3b63c2754..ac4bfa438c57 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1188,7 +1188,7 @@ static void make_request(struct mddev *mddev, struct bio * bio) || conf->prev.near_copies < conf->prev.raid_disks))) { struct bio_pair *bp; /* Sanity check -- queue functions should prevent this happening */ - if (bio_segments(bio) > 1) + if (bio_multiple_segments(bio)) goto bad_map; /* This is a one page bio that upper layers * refuse to split for us, so we need to split it. diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c index dd239bdbfcb4..00d339c361fc 100644 --- a/drivers/message/fusion/mptsas.c +++ b/drivers/message/fusion/mptsas.c @@ -2235,10 +2235,10 @@ static int mptsas_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy, } /* do we need to support multiple segments? */ - if (bio_segments(req->bio) > 1 || bio_segments(rsp->bio) > 1) { - printk(MYIOC_s_ERR_FMT "%s: multiple segments req %u %u, rsp %u %u\n", - ioc->name, __func__, bio_segments(req->bio), blk_rq_bytes(req), - bio_segments(rsp->bio), blk_rq_bytes(rsp)); + if (bio_multiple_segments(req->bio) || + bio_multiple_segments(rsp->bio)) { + printk(MYIOC_s_ERR_FMT "%s: multiple segments req %u, rsp %u\n", + ioc->name, __func__, blk_rq_bytes(req), blk_rq_bytes(rsp)); return -EINVAL; } diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index 446b85110a1f..0cac7d8fd0f7 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -2163,10 +2163,10 @@ int sas_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy, } /* do we need to support multiple segments? */ - if (bio_segments(req->bio) > 1 || bio_segments(rsp->bio) > 1) { - printk("%s: multiple segments req %u %u, rsp %u %u\n", - __func__, bio_segments(req->bio), blk_rq_bytes(req), - bio_segments(rsp->bio), blk_rq_bytes(rsp)); + if (bio_multiple_segments(req->bio) || + bio_multiple_segments(rsp->bio)) { + printk("%s: multiple segments req %u, rsp %u\n", + __func__, blk_rq_bytes(req), blk_rq_bytes(rsp)); return -EINVAL; } diff --git a/drivers/scsi/mpt2sas/mpt2sas_transport.c b/drivers/scsi/mpt2sas/mpt2sas_transport.c index 7143e86af326..410f4a3e8888 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_transport.c +++ b/drivers/scsi/mpt2sas/mpt2sas_transport.c @@ -1943,7 +1943,7 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy, ioc->transport_cmds.status = MPT2_CMD_PENDING; /* Check if the request is split across multiple segments */ - if (bio_segments(req->bio) > 1) { + if (bio_multiple_segments(req->bio)) { u32 offset = 0; /* Allocate memory and copy the request */ @@ -1975,7 +1975,7 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy, /* Check if the response needs to be populated across * multiple segments */ - if (bio_segments(rsp->bio) > 1) { + if (bio_multiple_segments(rsp->bio)) { pci_addr_in = pci_alloc_consistent(ioc->pdev, blk_rq_bytes(rsp), &pci_dma_in); if (!pci_addr_in) { @@ -2042,7 +2042,7 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy, sgl_flags = (MPI2_SGE_FLAGS_SIMPLE_ELEMENT | MPI2_SGE_FLAGS_END_OF_BUFFER | MPI2_SGE_FLAGS_HOST_TO_IOC); sgl_flags = sgl_flags << MPI2_SGE_FLAGS_SHIFT; - if (bio_segments(req->bio) > 1) { + if (bio_multiple_segments(req->bio)) { ioc->base_add_sg_single(psge, sgl_flags | (blk_rq_bytes(req) - 4), pci_dma_out); } else { @@ -2058,7 +2058,7 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy, MPI2_SGE_FLAGS_LAST_ELEMENT | MPI2_SGE_FLAGS_END_OF_BUFFER | MPI2_SGE_FLAGS_END_OF_LIST); sgl_flags = sgl_flags << MPI2_SGE_FLAGS_SHIFT; - if (bio_segments(rsp->bio) > 1) { + if (bio_multiple_segments(rsp->bio)) { ioc->base_add_sg_single(psge, sgl_flags | (blk_rq_bytes(rsp) + 4), pci_dma_in); } else { @@ -2103,7 +2103,7 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy, le16_to_cpu(mpi_reply->ResponseDataLength); /* check if the resp needs to be copied from the allocated * pci mem */ - if (bio_segments(rsp->bio) > 1) { + if (bio_multiple_segments(rsp->bio)) { u32 offset = 0; u32 bytes_to_copy = le16_to_cpu(mpi_reply->ResponseDataLength); diff --git a/drivers/scsi/mpt3sas/mpt3sas_transport.c b/drivers/scsi/mpt3sas/mpt3sas_transport.c index 196a67f2e95f..65170cb1a00f 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_transport.c +++ b/drivers/scsi/mpt3sas/mpt3sas_transport.c @@ -1926,7 +1926,7 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy, ioc->transport_cmds.status = MPT3_CMD_PENDING; /* Check if the request is split across multiple segments */ - if (req->bio->bi_vcnt > 1) { + if (bio_multiple_segments(req->bio)) { u32 offset = 0; /* Allocate memory and copy the request */ @@ -1958,7 +1958,7 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy, /* Check if the response needs to be populated across * multiple segments */ - if (rsp->bio->bi_vcnt > 1) { + if (bio_multiple_segments(rsp->bio)) { pci_addr_in = pci_alloc_consistent(ioc->pdev, blk_rq_bytes(rsp), &pci_dma_in); if (!pci_addr_in) { @@ -2019,7 +2019,7 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy, mpi_request->RequestDataLength = cpu_to_le16(blk_rq_bytes(req) - 4); psge = &mpi_request->SGL; - if (req->bio->bi_vcnt > 1) + if (bio_multiple_segments(req->bio)) ioc->build_sg(ioc, psge, pci_dma_out, (blk_rq_bytes(req) - 4), pci_dma_in, (blk_rq_bytes(rsp) + 4)); else @@ -2064,7 +2064,7 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy, /* check if the resp needs to be copied from the allocated * pci mem */ - if (rsp->bio->bi_vcnt > 1) { + if (bio_multiple_segments(rsp->bio)) { u32 offset = 0; u32 bytes_to_copy = le16_to_cpu(mpi_reply->ResponseDataLength); diff --git a/fs/bio.c b/fs/bio.c index f61e59b38815..e32f2ffc3f33 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -1733,7 +1733,7 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors) trace_block_split(bdev_get_queue(bi->bi_bdev), bi, bi->bi_iter.bi_sector + first_sectors); - BUG_ON(bio_segments(bi) > 1); + BUG_ON(bio_multiple_segments(bi)); atomic_set(&bp->cnt, 3); bp->error = 0; bp->bio1 = *bi; diff --git a/include/linux/bio.h b/include/linux/bio.h index 930cb73c894b..aea9896a6289 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -97,13 +97,46 @@ #define bio_offset(bio) bio_iter_offset((bio), (bio)->bi_iter) #define bio_iovec(bio) bio_iter_iovec((bio), (bio)->bi_iter) -#define bio_segments(bio) ((bio)->bi_vcnt - (bio)->bi_iter.bi_idx) +#define bio_multiple_segments(bio) \ + ((bio)->bi_iter.bi_size != bio_iovec(bio).bv_len) #define bio_sectors(bio) ((bio)->bi_iter.bi_size >> 9) #define bio_end_sector(bio) ((bio)->bi_iter.bi_sector + bio_sectors((bio))) +/* + * Check whether this bio carries any data or not. A NULL bio is allowed. + */ +static inline bool bio_has_data(struct bio *bio) +{ + if (bio && + bio->bi_iter.bi_size && + !(bio->bi_rw & REQ_DISCARD)) + return true; + + return false; +} + +static inline bool bio_is_rw(struct bio *bio) +{ + if (!bio_has_data(bio)) + return false; + + if (bio->bi_rw & BIO_NO_ADVANCE_ITER_MASK) + return false; + + return true; +} + +static inline bool bio_mergeable(struct bio *bio) +{ + if (bio->bi_rw & REQ_NOMERGE_FLAGS) + return false; + + return true; +} + static inline unsigned int bio_cur_bytes(struct bio *bio) { - if (bio->bi_vcnt) + if (bio_has_data(bio)) return bio_iovec(bio).bv_len; else /* dataless requests such as discard */ return bio->bi_iter.bi_size; @@ -111,7 +144,7 @@ static inline unsigned int bio_cur_bytes(struct bio *bio) static inline void *bio_data(struct bio *bio) { - if (bio->bi_vcnt) + if (bio_has_data(bio)) return page_address(bio_page(bio)) + bio_offset(bio); return NULL; @@ -221,6 +254,18 @@ static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter, #define bio_iter_last(bvec, iter) ((iter).bi_size == (bvec).bv_len) +static inline unsigned bio_segments(struct bio *bio) +{ + unsigned segs = 0; + struct bio_vec bv; + struct bvec_iter iter; + + bio_for_each_segment(bv, bio, iter) + segs++; + + return segs; +} + /* * get a reference to a bio, so it won't disappear. the intended use is * something like: @@ -434,36 +479,6 @@ static inline char *__bio_kmap_irq(struct bio *bio, unsigned short idx, __bio_kmap_irq((bio), (bio)->bi_iter.bi_idx, (flags)) #define bio_kunmap_irq(buf,flags) __bio_kunmap_irq(buf, flags) -/* - * Check whether this bio carries any data or not. A NULL bio is allowed. - */ -static inline bool bio_has_data(struct bio *bio) -{ - if (bio && bio->bi_vcnt) - return true; - - return false; -} - -static inline bool bio_is_rw(struct bio *bio) -{ - if (!bio_has_data(bio)) - return false; - - if (bio->bi_rw & REQ_WRITE_SAME) - return false; - - return true; -} - -static inline bool bio_mergeable(struct bio *bio) -{ - if (bio->bi_rw & REQ_NOMERGE_FLAGS) - return false; - - return true; -} - /* * BIO list management for use by remapping drivers (e.g. DM or MD) and loop. * -- cgit v1.2.3 From 003b5c5719f159f4f4bf97511c4702a0638313dd Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 11 Oct 2013 15:45:43 -0700 Subject: block: Convert drivers to immutable biovecs Now that we've got a mechanism for immutable biovecs - bi_iter.bi_bvec_done - we need to convert drivers to use primitives that respect it instead of using the bvec array directly. Signed-off-by: Kent Overstreet Cc: Jens Axboe Cc: NeilBrown Cc: Alasdair Kergon Cc: dm-devel@redhat.com --- drivers/block/umem.c | 50 ++++++++++++++++++++++-------------------------- drivers/md/dm-crypt.c | 49 +++++++++++++++++------------------------------ drivers/md/dm-io.c | 31 ++++++++++++++++-------------- drivers/md/dm-raid1.c | 8 ++++---- drivers/md/dm-verity.c | 52 ++++++++++++++------------------------------------ fs/bio.c | 14 +++++++++++--- include/linux/dm-io.h | 4 ++-- 7 files changed, 89 insertions(+), 119 deletions(-) (limited to 'include') diff --git a/drivers/block/umem.c b/drivers/block/umem.c index dab4f1afeae9..4cf81b5bf0f7 100644 --- a/drivers/block/umem.c +++ b/drivers/block/umem.c @@ -108,8 +108,7 @@ struct cardinfo { * have been written */ struct bio *bio, *currentbio, **biotail; - int current_idx; - sector_t current_sector; + struct bvec_iter current_iter; struct request_queue *queue; @@ -118,7 +117,7 @@ struct cardinfo { struct mm_dma_desc *desc; int cnt, headcnt; struct bio *bio, **biotail; - int idx; + struct bvec_iter iter; } mm_pages[2]; #define DESC_PER_PAGE ((PAGE_SIZE*2)/sizeof(struct mm_dma_desc)) @@ -344,16 +343,13 @@ static int add_bio(struct cardinfo *card) dma_addr_t dma_handle; int offset; struct bio *bio; - struct bio_vec *vec; - int idx; + struct bio_vec vec; int rw; - int len; bio = card->currentbio; if (!bio && card->bio) { card->currentbio = card->bio; - card->current_idx = card->bio->bi_iter.bi_idx; - card->current_sector = card->bio->bi_iter.bi_sector; + card->current_iter = card->bio->bi_iter; card->bio = card->bio->bi_next; if (card->bio == NULL) card->biotail = &card->bio; @@ -362,18 +358,17 @@ static int add_bio(struct cardinfo *card) } if (!bio) return 0; - idx = card->current_idx; rw = bio_rw(bio); if (card->mm_pages[card->Ready].cnt >= DESC_PER_PAGE) return 0; - vec = bio_iovec_idx(bio, idx); - len = vec->bv_len; + vec = bio_iter_iovec(bio, card->current_iter); + dma_handle = pci_map_page(card->dev, - vec->bv_page, - vec->bv_offset, - len, + vec.bv_page, + vec.bv_offset, + vec.bv_len, (rw == READ) ? PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE); @@ -381,7 +376,7 @@ static int add_bio(struct cardinfo *card) desc = &p->desc[p->cnt]; p->cnt++; if (p->bio == NULL) - p->idx = idx; + p->iter = card->current_iter; if ((p->biotail) != &bio->bi_next) { *(p->biotail) = bio; p->biotail = &(bio->bi_next); @@ -391,8 +386,8 @@ static int add_bio(struct cardinfo *card) desc->data_dma_handle = dma_handle; desc->pci_addr = cpu_to_le64((u64)desc->data_dma_handle); - desc->local_addr = cpu_to_le64(card->current_sector << 9); - desc->transfer_size = cpu_to_le32(len); + desc->local_addr = cpu_to_le64(card->current_iter.bi_sector << 9); + desc->transfer_size = cpu_to_le32(vec.bv_len); offset = (((char *)&desc->sem_control_bits) - ((char *)p->desc)); desc->sem_addr = cpu_to_le64((u64)(p->page_dma+offset)); desc->zero1 = desc->zero2 = 0; @@ -407,10 +402,9 @@ static int add_bio(struct cardinfo *card) desc->control_bits |= cpu_to_le32(DMASCR_TRANSFER_READ); desc->sem_control_bits = desc->control_bits; - card->current_sector += (len >> 9); - idx++; - card->current_idx = idx; - if (idx >= bio->bi_vcnt) + + bio_advance_iter(bio, &card->current_iter, vec.bv_len); + if (!card->current_iter.bi_size) card->currentbio = NULL; return 1; @@ -439,23 +433,25 @@ static void process_page(unsigned long data) struct mm_dma_desc *desc = &page->desc[page->headcnt]; int control = le32_to_cpu(desc->sem_control_bits); int last = 0; - int idx; + struct bio_vec vec; if (!(control & DMASCR_DMA_COMPLETE)) { control = dma_status; last = 1; } + page->headcnt++; - idx = page->idx; - page->idx++; - if (page->idx >= bio->bi_vcnt) { + vec = bio_iter_iovec(bio, page->iter); + bio_advance_iter(bio, &page->iter, vec.bv_len); + + if (!page->iter.bi_size) { page->bio = bio->bi_next; if (page->bio) - page->idx = page->bio->bi_iter.bi_idx; + page->iter = page->bio->bi_iter; } pci_unmap_page(card->dev, desc->data_dma_handle, - bio_iovec_idx(bio, idx)->bv_len, + vec.bv_len, (control & DMASCR_TRANSFER_READ) ? PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE); if (control & DMASCR_HARD_ERROR) { diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 1e2e5465d28e..784695d22fde 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -39,10 +39,8 @@ struct convert_context { struct completion restart; struct bio *bio_in; struct bio *bio_out; - unsigned int offset_in; - unsigned int offset_out; - unsigned int idx_in; - unsigned int idx_out; + struct bvec_iter iter_in; + struct bvec_iter iter_out; sector_t cc_sector; atomic_t cc_pending; }; @@ -826,10 +824,10 @@ static void crypt_convert_init(struct crypt_config *cc, { ctx->bio_in = bio_in; ctx->bio_out = bio_out; - ctx->offset_in = 0; - ctx->offset_out = 0; - ctx->idx_in = bio_in ? bio_in->bi_iter.bi_idx : 0; - ctx->idx_out = bio_out ? bio_out->bi_iter.bi_idx : 0; + if (bio_in) + ctx->iter_in = bio_in->bi_iter; + if (bio_out) + ctx->iter_out = bio_out->bi_iter; ctx->cc_sector = sector + cc->iv_offset; init_completion(&ctx->restart); } @@ -857,8 +855,8 @@ static int crypt_convert_block(struct crypt_config *cc, struct convert_context *ctx, struct ablkcipher_request *req) { - struct bio_vec *bv_in = bio_iovec_idx(ctx->bio_in, ctx->idx_in); - struct bio_vec *bv_out = bio_iovec_idx(ctx->bio_out, ctx->idx_out); + struct bio_vec bv_in = bio_iter_iovec(ctx->bio_in, ctx->iter_in); + struct bio_vec bv_out = bio_iter_iovec(ctx->bio_out, ctx->iter_out); struct dm_crypt_request *dmreq; u8 *iv; int r; @@ -869,24 +867,15 @@ static int crypt_convert_block(struct crypt_config *cc, dmreq->iv_sector = ctx->cc_sector; dmreq->ctx = ctx; sg_init_table(&dmreq->sg_in, 1); - sg_set_page(&dmreq->sg_in, bv_in->bv_page, 1 << SECTOR_SHIFT, - bv_in->bv_offset + ctx->offset_in); + sg_set_page(&dmreq->sg_in, bv_in.bv_page, 1 << SECTOR_SHIFT, + bv_in.bv_offset); sg_init_table(&dmreq->sg_out, 1); - sg_set_page(&dmreq->sg_out, bv_out->bv_page, 1 << SECTOR_SHIFT, - bv_out->bv_offset + ctx->offset_out); + sg_set_page(&dmreq->sg_out, bv_out.bv_page, 1 << SECTOR_SHIFT, + bv_out.bv_offset); - ctx->offset_in += 1 << SECTOR_SHIFT; - if (ctx->offset_in >= bv_in->bv_len) { - ctx->offset_in = 0; - ctx->idx_in++; - } - - ctx->offset_out += 1 << SECTOR_SHIFT; - if (ctx->offset_out >= bv_out->bv_len) { - ctx->offset_out = 0; - ctx->idx_out++; - } + bio_advance_iter(ctx->bio_in, &ctx->iter_in, 1 << SECTOR_SHIFT); + bio_advance_iter(ctx->bio_out, &ctx->iter_out, 1 << SECTOR_SHIFT); if (cc->iv_gen_ops) { r = cc->iv_gen_ops->generator(cc, iv, dmreq); @@ -937,8 +926,7 @@ static int crypt_convert(struct crypt_config *cc, atomic_set(&ctx->cc_pending, 1); - while(ctx->idx_in < ctx->bio_in->bi_vcnt && - ctx->idx_out < ctx->bio_out->bi_vcnt) { + while (ctx->iter_in.bi_size && ctx->iter_out.bi_size) { crypt_alloc_req(cc, ctx); @@ -1207,7 +1195,7 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async) } /* crypt_convert should have filled the clone bio */ - BUG_ON(io->ctx.idx_out < clone->bi_vcnt); + BUG_ON(io->ctx.iter_out.bi_size); clone->bi_iter.bi_sector = cc->start + io->sector; @@ -1246,7 +1234,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) } io->ctx.bio_out = clone; - io->ctx.idx_out = 0; + io->ctx.iter_out = clone->bi_iter; remaining -= clone->bi_iter.bi_size; sector += bio_sectors(clone); @@ -1290,8 +1278,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) crypt_inc_pending(new_io); crypt_convert_init(cc, &new_io->ctx, NULL, io->base_bio, sector); - new_io->ctx.idx_in = io->ctx.idx_in; - new_io->ctx.offset_in = io->ctx.offset_in; + new_io->ctx.iter_in = io->ctx.iter_in; /* * Fragments after the first use the base_io diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 01558b093307..b2b8a10e8427 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -201,26 +201,29 @@ static void list_dp_init(struct dpages *dp, struct page_list *pl, unsigned offse /* * Functions for getting the pages from a bvec. */ -static void bvec_get_page(struct dpages *dp, +static void bio_get_page(struct dpages *dp, struct page **p, unsigned long *len, unsigned *offset) { - struct bio_vec *bvec = (struct bio_vec *) dp->context_ptr; - *p = bvec->bv_page; - *len = bvec->bv_len; - *offset = bvec->bv_offset; + struct bio *bio = dp->context_ptr; + struct bio_vec bvec = bio_iovec(bio); + *p = bvec.bv_page; + *len = bvec.bv_len; + *offset = bvec.bv_offset; } -static void bvec_next_page(struct dpages *dp) +static void bio_next_page(struct dpages *dp) { - struct bio_vec *bvec = (struct bio_vec *) dp->context_ptr; - dp->context_ptr = bvec + 1; + struct bio *bio = dp->context_ptr; + struct bio_vec bvec = bio_iovec(bio); + + bio_advance(bio, bvec.bv_len); } -static void bvec_dp_init(struct dpages *dp, struct bio_vec *bvec) +static void bio_dp_init(struct dpages *dp, struct bio *bio) { - dp->get_page = bvec_get_page; - dp->next_page = bvec_next_page; - dp->context_ptr = bvec; + dp->get_page = bio_get_page; + dp->next_page = bio_next_page; + dp->context_ptr = bio; } /* @@ -457,8 +460,8 @@ static int dp_init(struct dm_io_request *io_req, struct dpages *dp, list_dp_init(dp, io_req->mem.ptr.pl, io_req->mem.offset); break; - case DM_IO_BVEC: - bvec_dp_init(dp, io_req->mem.ptr.bvec); + case DM_IO_BIO: + bio_dp_init(dp, io_req->mem.ptr.bio); break; case DM_IO_VMA: diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 9f6d8e6baa7d..f284e0bfb25f 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -526,8 +526,8 @@ static void read_async_bio(struct mirror *m, struct bio *bio) struct dm_io_region io; struct dm_io_request io_req = { .bi_rw = READ, - .mem.type = DM_IO_BVEC, - .mem.ptr.bvec = bio->bi_io_vec + bio->bi_iter.bi_idx, + .mem.type = DM_IO_BIO, + .mem.ptr.bio = bio, .notify.fn = read_callback, .notify.context = bio, .client = m->ms->io_client, @@ -629,8 +629,8 @@ static void do_write(struct mirror_set *ms, struct bio *bio) struct mirror *m; struct dm_io_request io_req = { .bi_rw = WRITE | (bio->bi_rw & WRITE_FLUSH_FUA), - .mem.type = DM_IO_BVEC, - .mem.ptr.bvec = bio->bi_io_vec + bio->bi_iter.bi_idx, + .mem.type = DM_IO_BIO, + .mem.ptr.bio = bio, .notify.fn = write_callback, .notify.context = bio, .client = ms->io_client, diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c index 5392135924ca..ac35e959d49b 100644 --- a/drivers/md/dm-verity.c +++ b/drivers/md/dm-verity.c @@ -73,15 +73,10 @@ struct dm_verity_io { sector_t block; unsigned n_blocks; - /* saved bio vector */ - struct bio_vec *io_vec; - unsigned io_vec_size; + struct bvec_iter iter; struct work_struct work; - /* A space for short vectors; longer vectors are allocated separately. */ - struct bio_vec io_vec_inline[DM_VERITY_IO_VEC_INLINE]; - /* * Three variably-size fields follow this struct: * @@ -284,9 +279,10 @@ release_ret_r: static int verity_verify_io(struct dm_verity_io *io) { struct dm_verity *v = io->v; + struct bio *bio = dm_bio_from_per_bio_data(io, + v->ti->per_bio_data_size); unsigned b; int i; - unsigned vector = 0, offset = 0; for (b = 0; b < io->n_blocks; b++) { struct shash_desc *desc; @@ -336,31 +332,22 @@ test_block_hash: } todo = 1 << v->data_dev_block_bits; - do { - struct bio_vec *bv; + while (io->iter.bi_size) { u8 *page; - unsigned len; - - BUG_ON(vector >= io->io_vec_size); - bv = &io->io_vec[vector]; - page = kmap_atomic(bv->bv_page); - len = bv->bv_len - offset; - if (likely(len >= todo)) - len = todo; - r = crypto_shash_update(desc, - page + bv->bv_offset + offset, len); + struct bio_vec bv = bio_iter_iovec(bio, io->iter); + + page = kmap_atomic(bv.bv_page); + r = crypto_shash_update(desc, page + bv.bv_offset, + bv.bv_len); kunmap_atomic(page); + if (r < 0) { DMERR("crypto_shash_update failed: %d", r); return r; } - offset += len; - if (likely(offset == bv->bv_len)) { - offset = 0; - vector++; - } - todo -= len; - } while (todo); + + bio_advance_iter(bio, &io->iter, bv.bv_len); + } if (!v->version) { r = crypto_shash_update(desc, v->salt, v->salt_size); @@ -383,8 +370,6 @@ test_block_hash: return -EIO; } } - BUG_ON(vector != io->io_vec_size); - BUG_ON(offset); return 0; } @@ -400,9 +385,6 @@ static void verity_finish_io(struct dm_verity_io *io, int error) bio->bi_end_io = io->orig_bi_end_io; bio->bi_private = io->orig_bi_private; - if (io->io_vec != io->io_vec_inline) - mempool_free(io->io_vec, v->vec_mempool); - bio_endio(bio, error); } @@ -519,13 +501,7 @@ static int verity_map(struct dm_target *ti, struct bio *bio) bio->bi_end_io = verity_end_io; bio->bi_private = io; - io->io_vec_size = bio_segments(bio); - if (io->io_vec_size < DM_VERITY_IO_VEC_INLINE) - io->io_vec = io->io_vec_inline; - else - io->io_vec = mempool_alloc(v->vec_mempool, GFP_NOIO); - memcpy(io->io_vec, __bio_iovec(bio), - io->io_vec_size * sizeof(struct bio_vec)); + io->iter = bio->bi_iter; verity_submit_prefetch(v, io); diff --git a/fs/bio.c b/fs/bio.c index e32f2ffc3f33..a082ce2d197b 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -525,8 +525,17 @@ EXPORT_SYMBOL(bio_phys_segments); */ void __bio_clone(struct bio *bio, struct bio *bio_src) { - memcpy(bio->bi_io_vec, bio_src->bi_io_vec, - bio_src->bi_max_vecs * sizeof(struct bio_vec)); + if (bio_is_rw(bio_src)) { + struct bio_vec bv; + struct bvec_iter iter; + + bio_for_each_segment(bv, bio_src, iter) + bio->bi_io_vec[bio->bi_vcnt++] = bv; + } else if (bio_has_data(bio_src)) { + memcpy(bio->bi_io_vec, bio_src->bi_io_vec, + bio_src->bi_max_vecs * sizeof(struct bio_vec)); + bio->bi_vcnt = bio_src->bi_vcnt; + } /* * most users will be overriding ->bi_bdev with a new target, @@ -535,7 +544,6 @@ void __bio_clone(struct bio *bio, struct bio *bio_src) bio->bi_bdev = bio_src->bi_bdev; bio->bi_flags |= 1 << BIO_CLONED; bio->bi_rw = bio_src->bi_rw; - bio->bi_vcnt = bio_src->bi_vcnt; bio->bi_iter = bio_src->bi_iter; } EXPORT_SYMBOL(__bio_clone); diff --git a/include/linux/dm-io.h b/include/linux/dm-io.h index f4b0aa3126f5..a68cbe59e6ad 100644 --- a/include/linux/dm-io.h +++ b/include/linux/dm-io.h @@ -29,7 +29,7 @@ typedef void (*io_notify_fn)(unsigned long error, void *context); enum dm_io_mem_type { DM_IO_PAGE_LIST,/* Page list */ - DM_IO_BVEC, /* Bio vector */ + DM_IO_BIO, /* Bio vector */ DM_IO_VMA, /* Virtual memory area */ DM_IO_KMEM, /* Kernel memory */ }; @@ -41,7 +41,7 @@ struct dm_io_memory { union { struct page_list *pl; - struct bio_vec *bvec; + struct bio *bio; void *vma; void *addr; } ptr; -- cgit v1.2.3 From f38a5181d9f3e004b1f50f9d7e1f2a8492ce240a Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 7 Aug 2013 14:30:24 -0700 Subject: ceph: Convert to immutable biovecs Now that we've got a mechanism for immutable biovecs - bi_iter.bi_bvec_done - we need to convert drivers to use primitives that respect it instead of using the bvec array directly. Signed-off-by: Kent Overstreet Cc: Jens Axboe Cc: Sage Weil Cc: ceph-devel@vger.kernel.org --- include/linux/ceph/messenger.h | 4 ++-- net/ceph/messenger.c | 43 +++++++++++++++++------------------------- 2 files changed, 19 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 7c1420bb1dce..091fdb600d55 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -1,6 +1,7 @@ #ifndef __FS_CEPH_MESSENGER_H #define __FS_CEPH_MESSENGER_H +#include #include #include #include @@ -119,8 +120,7 @@ struct ceph_msg_data_cursor { #ifdef CONFIG_BLOCK struct { /* bio */ struct bio *bio; /* bio from list */ - unsigned int vector_index; /* vector from bio */ - unsigned int vector_offset; /* bytes from vector */ + struct bvec_iter bvec_iter; }; #endif /* CONFIG_BLOCK */ struct { /* pages */ diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 4a5df7b1cc9f..18c039b95c22 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -777,13 +777,12 @@ static void ceph_msg_data_bio_cursor_init(struct ceph_msg_data_cursor *cursor, bio = data->bio; BUG_ON(!bio); - BUG_ON(!bio->bi_vcnt); cursor->resid = min(length, data->bio_length); cursor->bio = bio; - cursor->vector_index = 0; - cursor->vector_offset = 0; - cursor->last_piece = length <= bio->bi_io_vec[0].bv_len; + cursor->bvec_iter = bio->bi_iter; + cursor->last_piece = + cursor->resid <= bio_iter_len(bio, cursor->bvec_iter); } static struct page *ceph_msg_data_bio_next(struct ceph_msg_data_cursor *cursor, @@ -792,71 +791,63 @@ static struct page *ceph_msg_data_bio_next(struct ceph_msg_data_cursor *cursor, { struct ceph_msg_data *data = cursor->data; struct bio *bio; - struct bio_vec *bio_vec; - unsigned int index; + struct bio_vec bio_vec; BUG_ON(data->type != CEPH_MSG_DATA_BIO); bio = cursor->bio; BUG_ON(!bio); - index = cursor->vector_index; - BUG_ON(index >= (unsigned int) bio->bi_vcnt); + bio_vec = bio_iter_iovec(bio, cursor->bvec_iter); - bio_vec = &bio->bi_io_vec[index]; - BUG_ON(cursor->vector_offset >= bio_vec->bv_len); - *page_offset = (size_t) (bio_vec->bv_offset + cursor->vector_offset); + *page_offset = (size_t) bio_vec.bv_offset; BUG_ON(*page_offset >= PAGE_SIZE); if (cursor->last_piece) /* pagelist offset is always 0 */ *length = cursor->resid; else - *length = (size_t) (bio_vec->bv_len - cursor->vector_offset); + *length = (size_t) bio_vec.bv_len; BUG_ON(*length > cursor->resid); BUG_ON(*page_offset + *length > PAGE_SIZE); - return bio_vec->bv_page; + return bio_vec.bv_page; } static bool ceph_msg_data_bio_advance(struct ceph_msg_data_cursor *cursor, size_t bytes) { struct bio *bio; - struct bio_vec *bio_vec; - unsigned int index; + struct bio_vec bio_vec; BUG_ON(cursor->data->type != CEPH_MSG_DATA_BIO); bio = cursor->bio; BUG_ON(!bio); - index = cursor->vector_index; - BUG_ON(index >= (unsigned int) bio->bi_vcnt); - bio_vec = &bio->bi_io_vec[index]; + bio_vec = bio_iter_iovec(bio, cursor->bvec_iter); /* Advance the cursor offset */ BUG_ON(cursor->resid < bytes); cursor->resid -= bytes; - cursor->vector_offset += bytes; - if (cursor->vector_offset < bio_vec->bv_len) + + bio_advance_iter(bio, &cursor->bvec_iter, bytes); + + if (bytes < bio_vec.bv_len) return false; /* more bytes to process in this segment */ - BUG_ON(cursor->vector_offset != bio_vec->bv_len); /* Move on to the next segment, and possibly the next bio */ - if (++index == (unsigned int) bio->bi_vcnt) { + if (!cursor->bvec_iter.bi_size) { bio = bio->bi_next; - index = 0; + cursor->bvec_iter = bio->bi_iter; } cursor->bio = bio; - cursor->vector_index = index; - cursor->vector_offset = 0; if (!cursor->last_piece) { BUG_ON(!cursor->resid); BUG_ON(!bio); /* A short read is OK, so use <= rather than == */ - if (cursor->resid <= bio->bi_io_vec[index].bv_len) + if (cursor->resid <= bio_iter_len(bio, cursor->bvec_iter)) cursor->last_piece = true; } -- cgit v1.2.3 From f619d25460473788944e3b71b030398681e8809b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 7 Aug 2013 14:30:33 -0700 Subject: block: Kill bio_iovec_idx(), __bio_iovec() bio_iovec_idx() and __bio_iovec() don't have any valid uses anymore - previous users have been converted to bio_iovec_iter() or other methods. __BVEC_END() has to go too - the bvec array can't be used directly for the last biovec because we might only be using the first portion of it, we have to iterate over the bvec array with bio_for_each_segment() which checks against the current value of bi_iter.bi_size. Signed-off-by: Kent Overstreet Cc: Jens Axboe --- block/blk-merge.c | 13 +++++++++++-- include/linux/bio.h | 26 ++++++++------------------ 2 files changed, 19 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/block/blk-merge.c b/block/blk-merge.c index a1ead9049ed6..05c17be0eea4 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -86,6 +86,9 @@ EXPORT_SYMBOL(blk_recount_segments); static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio, struct bio *nxt) { + struct bio_vec end_bv, nxt_bv; + struct bvec_iter iter; + if (!blk_queue_cluster(q)) return 0; @@ -96,14 +99,20 @@ static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio, if (!bio_has_data(bio)) return 1; - if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt))) + bio_for_each_segment(end_bv, bio, iter) + if (end_bv.bv_len == iter.bi_size) + break; + + nxt_bv = bio_iovec(nxt); + + if (!BIOVEC_PHYS_MERGEABLE(&end_bv, &nxt_bv)) return 0; /* * bio and nxt are contiguous in memory; check if the queue allows * these two to be merged into one */ - if (BIO_SEG_BOUNDARY(q, bio, nxt)) + if (BIOVEC_SEG_BOUNDARY(q, &end_bv, &nxt_bv)) return 1; return 0; diff --git a/include/linux/bio.h b/include/linux/bio.h index aea9896a6289..1a31f9d9e057 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -61,9 +61,6 @@ * various member access, note that bio_data should of course not be used * on highmem page vectors */ -#define bio_iovec_idx(bio, idx) (&((bio)->bi_io_vec[(idx)])) -#define __bio_iovec(bio) bio_iovec_idx((bio), (bio)->bi_iter.bi_idx) - #define __bvec_iter_bvec(bvec, iter) (&(bvec)[(iter).bi_idx]) #define bvec_iter_page(bvec, iter) \ @@ -162,19 +159,16 @@ static inline void *bio_data(struct bio *bio) * permanent PIO fall back, user is probably better off disabling highmem * I/O completely on that queue (see ide-dma for example) */ -#define __bio_kmap_atomic(bio, idx) \ - (kmap_atomic(bio_iovec_idx((bio), (idx))->bv_page) + \ - bio_iovec_idx((bio), (idx))->bv_offset) +#define __bio_kmap_atomic(bio, iter) \ + (kmap_atomic(bio_iter_iovec((bio), (iter)).bv_page) + \ + bio_iter_iovec((bio), (iter)).bv_offset) -#define __bio_kunmap_atomic(addr) kunmap_atomic(addr) +#define __bio_kunmap_atomic(addr) kunmap_atomic(addr) /* * merge helpers etc */ -#define __BVEC_END(bio) bio_iovec_idx((bio), (bio)->bi_vcnt - 1) -#define __BVEC_START(bio) bio_iovec_idx((bio), (bio)->bi_iter.bi_idx) - /* Default implementation of BIOVEC_PHYS_MERGEABLE */ #define __BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ ((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2))) @@ -191,8 +185,6 @@ static inline void *bio_data(struct bio *bio) (((addr1) | (mask)) == (((addr2) - 1) | (mask))) #define BIOVEC_SEG_BOUNDARY(q, b1, b2) \ __BIO_SEG_BOUNDARY(bvec_to_phys((b1)), bvec_to_phys((b2)) + (b2)->bv_len, queue_segment_boundary((q))) -#define BIO_SEG_BOUNDARY(q, b1, b2) \ - BIOVEC_SEG_BOUNDARY((q), __BVEC_END((b1)), __BVEC_START((b2))) #define bio_io_error(bio) bio_endio((bio), -EIO) @@ -201,9 +193,7 @@ static inline void *bio_data(struct bio *bio) * before it got to the driver and the driver won't own all of it */ #define bio_for_each_segment_all(bvl, bio, i) \ - for (i = 0; \ - bvl = bio_iovec_idx((bio), (i)), i < (bio)->bi_vcnt; \ - i++) + for (i = 0, bvl = (bio)->bi_io_vec; i < (bio)->bi_vcnt; i++, bvl++) static inline void bvec_iter_advance(struct bio_vec *bv, struct bvec_iter *iter, unsigned bytes) @@ -468,15 +458,15 @@ static inline void bvec_kunmap_irq(char *buffer, unsigned long *flags) } #endif -static inline char *__bio_kmap_irq(struct bio *bio, unsigned short idx, +static inline char *__bio_kmap_irq(struct bio *bio, struct bvec_iter iter, unsigned long *flags) { - return bvec_kmap_irq(bio_iovec_idx(bio, idx), flags); + return bvec_kmap_irq(&bio_iter_iovec(bio, iter), flags); } #define __bio_kunmap_irq(buf, flags) bvec_kunmap_irq(buf, flags) #define bio_kmap_irq(bio, flags) \ - __bio_kmap_irq((bio), (bio)->bi_iter.bi_idx, (flags)) + __bio_kmap_irq((bio), (bio)->bi_iter, (flags)) #define bio_kunmap_irq(buf,flags) __bio_kunmap_irq(buf, flags) /* -- cgit v1.2.3 From 59d276fe02d7e887a4825ef05c80b8f8c54ba60a Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 23 Nov 2013 18:19:27 -0800 Subject: block: Add bio_clone_fast() bio_clone() just got more expensive - however, most users of bio_clone() don't actually need to modify the biovec. If they aren't modifying the biovec, and they can guarantee that the original bio isn't freed before the clone (also true in most cases), we can just point the clone at the original bio's biovec. Signed-off-by: Kent Overstreet --- drivers/md/bcache/request.c | 8 ++---- fs/bio.c | 60 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/bio.h | 2 ++ 3 files changed, 64 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 4c0a422fd49f..63451c724781 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -613,7 +613,6 @@ struct search { struct btree_op op; struct data_insert_op iop; - struct bio_vec bv[BIO_MAX_PAGES]; }; static void bch_cache_read_endio(struct bio *bio, int error) @@ -761,9 +760,7 @@ static void do_bio_hook(struct search *s) struct bio *bio = &s->bio.bio; bio_init(bio); - bio->bi_io_vec = s->bv; - bio->bi_max_vecs = BIO_MAX_PAGES; - __bio_clone(bio, s->orig_bio); + __bio_clone_fast(bio, s->orig_bio); bio->bi_end_io = request_endio; bio->bi_private = &s->cl; @@ -1065,8 +1062,7 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s) closure_bio_submit(flush, cl, s->d); } } else { - s->iop.bio = bio_clone_bioset(bio, GFP_NOIO, - dc->disk.bio_split); + s->iop.bio = bio_clone_fast(bio, GFP_NOIO, dc->disk.bio_split); closure_bio_submit(bio, cl, s->d); } diff --git a/fs/bio.c b/fs/bio.c index 1628917e262a..00dc1893c6ee 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -548,6 +548,66 @@ void __bio_clone(struct bio *bio, struct bio *bio_src) } EXPORT_SYMBOL(__bio_clone); +/** + * __bio_clone_fast - clone a bio that shares the original bio's biovec + * @bio: destination bio + * @bio_src: bio to clone + * + * Clone a &bio. Caller will own the returned bio, but not + * the actual data it points to. Reference count of returned + * bio will be one. + * + * Caller must ensure that @bio_src is not freed before @bio. + */ +void __bio_clone_fast(struct bio *bio, struct bio *bio_src) +{ + BUG_ON(bio->bi_pool && BIO_POOL_IDX(bio) != BIO_POOL_NONE); + + /* + * most users will be overriding ->bi_bdev with a new target, + * so we don't set nor calculate new physical/hw segment counts here + */ + bio->bi_bdev = bio_src->bi_bdev; + bio->bi_flags |= 1 << BIO_CLONED; + bio->bi_rw = bio_src->bi_rw; + bio->bi_iter = bio_src->bi_iter; + bio->bi_io_vec = bio_src->bi_io_vec; +} +EXPORT_SYMBOL(__bio_clone_fast); + +/** + * bio_clone_fast - clone a bio that shares the original bio's biovec + * @bio: bio to clone + * @gfp_mask: allocation priority + * @bs: bio_set to allocate from + * + * Like __bio_clone_fast, only also allocates the returned bio + */ +struct bio *bio_clone_fast(struct bio *bio, gfp_t gfp_mask, struct bio_set *bs) +{ + struct bio *b; + + b = bio_alloc_bioset(gfp_mask, 0, bs); + if (!b) + return NULL; + + __bio_clone_fast(b, bio); + + if (bio_integrity(bio)) { + int ret; + + ret = bio_integrity_clone(b, bio, gfp_mask); + + if (ret < 0) { + bio_put(b); + return NULL; + } + } + + return b; +} +EXPORT_SYMBOL(bio_clone_fast); + /** * bio_clone_bioset - clone a bio * @bio_src: bio to clone diff --git a/include/linux/bio.h b/include/linux/bio.h index 1a31f9d9e057..1f83f4a3083e 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -328,6 +328,8 @@ extern mempool_t *biovec_create_pool(struct bio_set *bs, int pool_entries); extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *); extern void bio_put(struct bio *); +extern void __bio_clone_fast(struct bio *, struct bio *); +extern struct bio *bio_clone_fast(struct bio *, gfp_t, struct bio_set *); extern void __bio_clone(struct bio *, struct bio *); extern struct bio *bio_clone_bioset(struct bio *, gfp_t, struct bio_set *bs); -- cgit v1.2.3 From 1c3b13e64cf70d652fb04e32d13ae3e36810c2e4 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 29 Oct 2013 17:17:49 -0700 Subject: dm: Refactor for new bio cloning/splitting We need to convert the dm code to the new bvec_iter primitives which respect bi_bvec_done; they also allow us to drastically simplify dm's bio splitting code. Also, it's no longer necessary to save/restore the bvec array anymore - driver conversions for immutable bvecs are done, so drivers should never be modifying it. Also kill bio_sector_offset(), dm was the only user and it doesn't make much sense anymore. Signed-off-by: Kent Overstreet Cc: Jens Axboe Cc: Alasdair Kergon Cc: dm-devel@redhat.com Reviewed-by: Mike Snitzer --- drivers/md/dm-bio-record.h | 25 ------- drivers/md/dm.c | 174 ++++++--------------------------------------- fs/bio.c | 72 ------------------- include/linux/bio.h | 2 - 4 files changed, 20 insertions(+), 253 deletions(-) (limited to 'include') diff --git a/drivers/md/dm-bio-record.h b/drivers/md/dm-bio-record.h index 4f46e8e528de..dd3646111561 100644 --- a/drivers/md/dm-bio-record.h +++ b/drivers/md/dm-bio-record.h @@ -17,49 +17,24 @@ * original bio state. */ -struct dm_bio_vec_details { -#if PAGE_SIZE < 65536 - __u16 bv_len; - __u16 bv_offset; -#else - unsigned bv_len; - unsigned bv_offset; -#endif -}; - struct dm_bio_details { struct block_device *bi_bdev; unsigned long bi_flags; struct bvec_iter bi_iter; - struct dm_bio_vec_details bi_io_vec[BIO_MAX_PAGES]; }; static inline void dm_bio_record(struct dm_bio_details *bd, struct bio *bio) { - unsigned i; - bd->bi_bdev = bio->bi_bdev; bd->bi_flags = bio->bi_flags; bd->bi_iter = bio->bi_iter; - - for (i = 0; i < bio->bi_vcnt; i++) { - bd->bi_io_vec[i].bv_len = bio->bi_io_vec[i].bv_len; - bd->bi_io_vec[i].bv_offset = bio->bi_io_vec[i].bv_offset; - } } static inline void dm_bio_restore(struct dm_bio_details *bd, struct bio *bio) { - unsigned i; - bio->bi_bdev = bd->bi_bdev; bio->bi_flags = bd->bi_flags; bio->bi_iter = bd->bi_iter; - - for (i = 0; i < bio->bi_vcnt; i++) { - bio->bi_io_vec[i].bv_len = bd->bi_io_vec[i].bv_len; - bio->bi_io_vec[i].bv_offset = bd->bi_io_vec[i].bv_offset; - } } #endif diff --git a/drivers/md/dm.c b/drivers/md/dm.c index ccd064ea4fe6..44a2fa6814ce 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1155,7 +1155,6 @@ struct clone_info { struct dm_io *io; sector_t sector; sector_t sector_count; - unsigned short idx; }; static void bio_setup_sector(struct bio *bio, sector_t sector, sector_t len) @@ -1164,68 +1163,24 @@ static void bio_setup_sector(struct bio *bio, sector_t sector, sector_t len) bio->bi_iter.bi_size = to_bytes(len); } -static void bio_setup_bv(struct bio *bio, unsigned short idx, unsigned short bv_count) -{ - bio->bi_iter.bi_idx = idx; - bio->bi_vcnt = idx + bv_count; - bio->bi_flags &= ~(1 << BIO_SEG_VALID); -} - -static void clone_bio_integrity(struct bio *bio, struct bio *clone, - unsigned short idx, unsigned len, unsigned offset, - unsigned trim) -{ - if (!bio_integrity(bio)) - return; - - bio_integrity_clone(clone, bio, GFP_NOIO); - - if (trim) - bio_integrity_trim(clone, bio_sector_offset(bio, idx, offset), len); -} - -/* - * Creates a little bio that just does part of a bvec. - */ -static void clone_split_bio(struct dm_target_io *tio, struct bio *bio, - sector_t sector, unsigned short idx, - unsigned offset, unsigned len) -{ - struct bio *clone = &tio->clone; - struct bio_vec *bv = bio->bi_io_vec + idx; - - *clone->bi_io_vec = *bv; - - bio_setup_sector(clone, sector, len); - - clone->bi_bdev = bio->bi_bdev; - clone->bi_rw = bio->bi_rw; - clone->bi_vcnt = 1; - clone->bi_io_vec->bv_offset = offset; - clone->bi_io_vec->bv_len = clone->bi_iter.bi_size; - clone->bi_flags |= 1 << BIO_CLONED; - - clone_bio_integrity(bio, clone, idx, len, offset, 1); -} - /* * Creates a bio that consists of range of complete bvecs. */ static void clone_bio(struct dm_target_io *tio, struct bio *bio, - sector_t sector, unsigned short idx, - unsigned short bv_count, unsigned len) + sector_t sector, unsigned len) { struct bio *clone = &tio->clone; - unsigned trim = 0; - __bio_clone(clone, bio); - bio_setup_sector(clone, sector, len); - bio_setup_bv(clone, idx, bv_count); + __bio_clone_fast(clone, bio); + + if (bio_integrity(bio)) + bio_integrity_clone(clone, bio, GFP_NOIO); + + bio_advance(clone, to_bytes(sector - clone->bi_iter.bi_sector)); + clone->bi_iter.bi_size = to_bytes(len); - if (idx != bio->bi_iter.bi_idx || - clone->bi_iter.bi_size < bio->bi_iter.bi_size) - trim = 1; - clone_bio_integrity(bio, clone, idx, len, 0, trim); + if (bio_integrity(bio)) + bio_integrity_trim(clone, 0, len); } static struct dm_target_io *alloc_tio(struct clone_info *ci, @@ -1258,7 +1213,7 @@ static void __clone_and_map_simple_bio(struct clone_info *ci, * ci->bio->bi_max_vecs is BIO_INLINE_VECS anyway, for both flush * and discard, so no need for concern about wasted bvec allocations. */ - __bio_clone(clone, ci->bio); + __bio_clone_fast(clone, ci->bio); if (len) bio_setup_sector(clone, ci->sector, len); @@ -1287,10 +1242,7 @@ static int __send_empty_flush(struct clone_info *ci) } static void __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti, - sector_t sector, int nr_iovecs, - unsigned short idx, unsigned short bv_count, - unsigned offset, unsigned len, - unsigned split_bvec) + sector_t sector, unsigned len) { struct bio *bio = ci->bio; struct dm_target_io *tio; @@ -1304,11 +1256,8 @@ static void __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti num_target_bios = ti->num_write_bios(ti, bio); for (target_bio_nr = 0; target_bio_nr < num_target_bios; target_bio_nr++) { - tio = alloc_tio(ci, ti, nr_iovecs, target_bio_nr); - if (split_bvec) - clone_split_bio(tio, bio, sector, idx, offset, len); - else - clone_bio(tio, bio, sector, idx, bv_count, len); + tio = alloc_tio(ci, ti, 0, target_bio_nr); + clone_bio(tio, bio, sector, len); __map_bio(tio); } } @@ -1379,60 +1328,6 @@ static int __send_write_same(struct clone_info *ci) return __send_changing_extent_only(ci, get_num_write_same_bios, NULL); } -/* - * Find maximum number of sectors / bvecs we can process with a single bio. - */ -static sector_t __len_within_target(struct clone_info *ci, sector_t max, int *idx) -{ - struct bio *bio = ci->bio; - sector_t bv_len, total_len = 0; - - for (*idx = ci->idx; max && (*idx < bio->bi_vcnt); (*idx)++) { - bv_len = to_sector(bio->bi_io_vec[*idx].bv_len); - - if (bv_len > max) - break; - - max -= bv_len; - total_len += bv_len; - } - - return total_len; -} - -static int __split_bvec_across_targets(struct clone_info *ci, - struct dm_target *ti, sector_t max) -{ - struct bio *bio = ci->bio; - struct bio_vec *bv = bio->bi_io_vec + ci->idx; - sector_t remaining = to_sector(bv->bv_len); - unsigned offset = 0; - sector_t len; - - do { - if (offset) { - ti = dm_table_find_target(ci->map, ci->sector); - if (!dm_target_is_valid(ti)) - return -EIO; - - max = max_io_len(ci->sector, ti); - } - - len = min(remaining, max); - - __clone_and_map_data_bio(ci, ti, ci->sector, 1, ci->idx, 0, - bv->bv_offset + offset, len, 1); - - ci->sector += len; - ci->sector_count -= len; - offset += to_bytes(len); - } while (remaining -= len); - - ci->idx++; - - return 0; -} - /* * Select the correct strategy for processing a non-flush bio. */ @@ -1440,8 +1335,7 @@ static int __split_and_process_non_flush(struct clone_info *ci) { struct bio *bio = ci->bio; struct dm_target *ti; - sector_t len, max; - int idx; + unsigned len; if (unlikely(bio->bi_rw & REQ_DISCARD)) return __send_discard(ci); @@ -1452,41 +1346,14 @@ static int __split_and_process_non_flush(struct clone_info *ci) if (!dm_target_is_valid(ti)) return -EIO; - max = max_io_len(ci->sector, ti); - - /* - * Optimise for the simple case where we can do all of - * the remaining io with a single clone. - */ - if (ci->sector_count <= max) { - __clone_and_map_data_bio(ci, ti, ci->sector, bio->bi_max_vecs, - ci->idx, bio->bi_vcnt - ci->idx, 0, - ci->sector_count, 0); - ci->sector_count = 0; - return 0; - } - - /* - * There are some bvecs that don't span targets. - * Do as many of these as possible. - */ - if (to_sector(bio->bi_io_vec[ci->idx].bv_len) <= max) { - len = __len_within_target(ci, max, &idx); - - __clone_and_map_data_bio(ci, ti, ci->sector, bio->bi_max_vecs, - ci->idx, idx - ci->idx, 0, len, 0); + len = min_t(sector_t, max_io_len(ci->sector, ti), ci->sector_count); - ci->sector += len; - ci->sector_count -= len; - ci->idx = idx; + __clone_and_map_data_bio(ci, ti, ci->sector, len); - return 0; - } + ci->sector += len; + ci->sector_count -= len; - /* - * Handle a bvec that must be split between two or more targets. - */ - return __split_bvec_across_targets(ci, ti, max); + return 0; } /* @@ -1512,7 +1379,6 @@ static void __split_and_process_bio(struct mapped_device *md, ci.io->md = md; spin_lock_init(&ci.io->endio_lock); ci.sector = bio->bi_iter.bi_sector; - ci.idx = bio->bi_iter.bi_idx; start_io_acct(ci.io); diff --git a/fs/bio.c b/fs/bio.c index 00dc1893c6ee..6e42b68ab0ac 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -514,40 +514,6 @@ inline int bio_phys_segments(struct request_queue *q, struct bio *bio) } EXPORT_SYMBOL(bio_phys_segments); -/** - * __bio_clone - clone a bio - * @bio: destination bio - * @bio_src: bio to clone - * - * Clone a &bio. Caller will own the returned bio, but not - * the actual data it points to. Reference count of returned - * bio will be one. - */ -void __bio_clone(struct bio *bio, struct bio *bio_src) -{ - if (bio_is_rw(bio_src)) { - struct bio_vec bv; - struct bvec_iter iter; - - bio_for_each_segment(bv, bio_src, iter) - bio->bi_io_vec[bio->bi_vcnt++] = bv; - } else if (bio_has_data(bio_src)) { - memcpy(bio->bi_io_vec, bio_src->bi_io_vec, - bio_src->bi_max_vecs * sizeof(struct bio_vec)); - bio->bi_vcnt = bio_src->bi_vcnt; - } - - /* - * most users will be overriding ->bi_bdev with a new target, - * so we don't set nor calculate new physical/hw segment counts here - */ - bio->bi_bdev = bio_src->bi_bdev; - bio->bi_flags |= 1 << BIO_CLONED; - bio->bi_rw = bio_src->bi_rw; - bio->bi_iter = bio_src->bi_iter; -} -EXPORT_SYMBOL(__bio_clone); - /** * __bio_clone_fast - clone a bio that shares the original bio's biovec * @bio: destination bio @@ -1921,44 +1887,6 @@ void bio_trim(struct bio *bio, int offset, int size) } EXPORT_SYMBOL_GPL(bio_trim); -/** - * bio_sector_offset - Find hardware sector offset in bio - * @bio: bio to inspect - * @index: bio_vec index - * @offset: offset in bv_page - * - * Return the number of hardware sectors between beginning of bio - * and an end point indicated by a bio_vec index and an offset - * within that vector's page. - */ -sector_t bio_sector_offset(struct bio *bio, unsigned short index, - unsigned int offset) -{ - unsigned int sector_sz; - struct bio_vec *bv; - sector_t sectors; - int i; - - sector_sz = queue_logical_block_size(bio->bi_bdev->bd_disk->queue); - sectors = 0; - - if (index >= bio->bi_iter.bi_idx) - index = bio->bi_vcnt - 1; - - bio_for_each_segment_all(bv, bio, i) { - if (i == index) { - if (offset > bv->bv_offset) - sectors += (offset - bv->bv_offset) / sector_sz; - break; - } - - sectors += bv->bv_len / sector_sz; - } - - return sectors; -} -EXPORT_SYMBOL(bio_sector_offset); - /* * create memory pools for biovec's in a bio_set. * use the global biovec slabs created for general use. diff --git a/include/linux/bio.h b/include/linux/bio.h index 1f83f4a3083e..0c32a45a419c 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -330,7 +330,6 @@ extern void bio_put(struct bio *); extern void __bio_clone_fast(struct bio *, struct bio *); extern struct bio *bio_clone_fast(struct bio *, gfp_t, struct bio_set *); -extern void __bio_clone(struct bio *, struct bio *); extern struct bio *bio_clone_bioset(struct bio *, gfp_t, struct bio_set *bs); extern struct bio_set *fs_bio_set; @@ -370,7 +369,6 @@ extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int); extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *, unsigned int, unsigned int); extern int bio_get_nr_vecs(struct block_device *); -extern sector_t bio_sector_offset(struct bio *, unsigned short, unsigned int); extern struct bio *bio_map_user(struct request_queue *, struct block_device *, unsigned long, unsigned int, int, gfp_t); struct sg_iovec; -- cgit v1.2.3 From 196d38bccfcfa32faed8c561868336fdfa0fe8e4 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 23 Nov 2013 18:34:15 -0800 Subject: block: Generic bio chaining This adds a generic mechanism for chaining bio completions. This is going to be used for a bio_split() replacement, and it turns out to be very useful in a fair amount of driver code - a fair number of drivers were implementing this in their own roundabout ways, often painfully. Note that this means it's no longer to call bio_endio() more than once on the same bio! This can cause problems for drivers that save/restore bi_end_io. Arguably they shouldn't be saving/restoring bi_end_io at all - in all but the simplest cases they'd be better off just cloning the bio, and immutable biovecs is making bio cloning cheaper. But for now, we add a bio_endio_nodec() for these cases. Signed-off-by: Kent Overstreet Cc: Jens Axboe --- drivers/md/bcache/io.c | 2 +- drivers/md/dm-cache-target.c | 6 ++++ drivers/md/dm-snap.c | 1 + drivers/md/dm-thin.c | 8 +++-- drivers/md/dm-verity.c | 2 +- fs/bio-integrity.c | 2 +- fs/bio.c | 76 ++++++++++++++++++++++++++++++++++++++++---- include/linux/bio.h | 2 ++ include/linux/blk_types.h | 2 ++ 9 files changed, 90 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c index 0f0ab659914d..522f95778443 100644 --- a/drivers/md/bcache/io.c +++ b/drivers/md/bcache/io.c @@ -133,7 +133,7 @@ static void bch_bio_submit_split_done(struct closure *cl) s->bio->bi_end_io = s->bi_end_io; s->bio->bi_private = s->bi_private; - bio_endio(s->bio, 0); + bio_endio_nodec(s->bio, 0); closure_debug_destroy(&s->cl); mempool_free(s, s->p->bio_split_hook); diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 86f9c83eb30c..bf3a206abd78 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -765,6 +765,12 @@ static void writethrough_endio(struct bio *bio, int err) dm_unhook_bio(&pb->hook_info, bio); + /* + * Must bump bi_remaining to allow bio to complete with + * restored bi_end_io. + */ + atomic_inc(&bio->bi_remaining); + if (err) { bio_endio(bio, err); return; diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 3ded8c729dfb..80b5cabbea29 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -1415,6 +1415,7 @@ out: if (full_bio) { full_bio->bi_end_io = pe->full_bio_end_io; full_bio->bi_private = pe->full_bio_private; + atomic_inc(&full_bio->bi_remaining); } free_pending_exception(pe); diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index a65402480c8c..1abb4a24c338 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -611,8 +611,10 @@ static void cell_defer_no_holder(struct thin_c *tc, struct dm_bio_prison_cell *c static void process_prepared_mapping_fail(struct dm_thin_new_mapping *m) { - if (m->bio) + if (m->bio) { m->bio->bi_end_io = m->saved_bi_end_io; + atomic_inc(&m->bio->bi_remaining); + } cell_error(m->tc->pool, m->cell); list_del(&m->list); mempool_free(m, m->tc->pool->mapping_pool); @@ -626,8 +628,10 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m) int r; bio = m->bio; - if (bio) + if (bio) { bio->bi_end_io = m->saved_bi_end_io; + atomic_inc(&bio->bi_remaining); + } if (m->err) { cell_error(pool, m->cell); diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c index ac35e959d49b..796007a5e0e1 100644 --- a/drivers/md/dm-verity.c +++ b/drivers/md/dm-verity.c @@ -385,7 +385,7 @@ static void verity_finish_io(struct dm_verity_io *io, int error) bio->bi_end_io = io->orig_bi_end_io; bio->bi_private = io->orig_bi_private; - bio_endio(bio, error); + bio_endio_nodec(bio, error); } static void verity_work(struct work_struct *w) diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index fed744b8c9e5..9d547d2e357c 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c @@ -502,7 +502,7 @@ static void bio_integrity_verify_fn(struct work_struct *work) /* Restore original bio completion handler */ bio->bi_end_io = bip->bip_end_io; - bio_endio(bio, error); + bio_endio_nodec(bio, error); } /** diff --git a/fs/bio.c b/fs/bio.c index e6dfa06773ac..b0a16dbc71ef 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -273,6 +273,7 @@ void bio_init(struct bio *bio) { memset(bio, 0, sizeof(*bio)); bio->bi_flags = 1 << BIO_UPTODATE; + atomic_set(&bio->bi_remaining, 1); atomic_set(&bio->bi_cnt, 1); } EXPORT_SYMBOL(bio_init); @@ -295,9 +296,35 @@ void bio_reset(struct bio *bio) memset(bio, 0, BIO_RESET_BYTES); bio->bi_flags = flags|(1 << BIO_UPTODATE); + atomic_set(&bio->bi_remaining, 1); } EXPORT_SYMBOL(bio_reset); +static void bio_chain_endio(struct bio *bio, int error) +{ + bio_endio(bio->bi_private, error); + bio_put(bio); +} + +/** + * bio_chain - chain bio completions + * + * The caller won't have a bi_end_io called when @bio completes - instead, + * @parent's bi_end_io won't be called until both @parent and @bio have + * completed; the chained bio will also be freed when it completes. + * + * The caller must not set bi_private or bi_end_io in @bio. + */ +void bio_chain(struct bio *bio, struct bio *parent) +{ + BUG_ON(bio->bi_private || bio->bi_end_io); + + bio->bi_private = parent; + bio->bi_end_io = bio_chain_endio; + atomic_inc(&parent->bi_remaining); +} +EXPORT_SYMBOL(bio_chain); + static void bio_alloc_rescue(struct work_struct *work) { struct bio_set *bs = container_of(work, struct bio_set, rescue_work); @@ -1719,16 +1746,53 @@ EXPORT_SYMBOL(bio_flush_dcache_pages); **/ void bio_endio(struct bio *bio, int error) { - if (error) - clear_bit(BIO_UPTODATE, &bio->bi_flags); - else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) - error = -EIO; + while (bio) { + BUG_ON(atomic_read(&bio->bi_remaining) <= 0); + + if (error) + clear_bit(BIO_UPTODATE, &bio->bi_flags); + else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) + error = -EIO; + + if (!atomic_dec_and_test(&bio->bi_remaining)) + return; - if (bio->bi_end_io) - bio->bi_end_io(bio, error); + /* + * Need to have a real endio function for chained bios, + * otherwise various corner cases will break (like stacking + * block devices that save/restore bi_end_io) - however, we want + * to avoid unbounded recursion and blowing the stack. Tail call + * optimization would handle this, but compiling with frame + * pointers also disables gcc's sibling call optimization. + */ + if (bio->bi_end_io == bio_chain_endio) { + struct bio *parent = bio->bi_private; + bio_put(bio); + bio = parent; + } else { + if (bio->bi_end_io) + bio->bi_end_io(bio, error); + bio = NULL; + } + } } EXPORT_SYMBOL(bio_endio); +/** + * bio_endio_nodec - end I/O on a bio, without decrementing bi_remaining + * @bio: bio + * @error: error, if any + * + * For code that has saved and restored bi_end_io; thing hard before using this + * function, probably you should've cloned the entire bio. + **/ +void bio_endio_nodec(struct bio *bio, int error) +{ + atomic_inc(&bio->bi_remaining); + bio_endio(bio, error); +} +EXPORT_SYMBOL(bio_endio_nodec); + void bio_pair_release(struct bio_pair *bp) { if (atomic_dec_and_test(&bp->cnt)) { diff --git a/include/linux/bio.h b/include/linux/bio.h index 0c32a45a419c..64f5169c224b 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -356,6 +356,7 @@ static inline struct bio *bio_clone_kmalloc(struct bio *bio, gfp_t gfp_mask) } extern void bio_endio(struct bio *, int); +extern void bio_endio_nodec(struct bio *, int); struct request_queue; extern int bio_phys_segments(struct request_queue *, struct bio *); @@ -364,6 +365,7 @@ extern void bio_advance(struct bio *, unsigned); extern void bio_init(struct bio *); extern void bio_reset(struct bio *); +void bio_chain(struct bio *, struct bio *); extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int); extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *, diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index d369f8f6af79..bbc3a6c88fce 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -65,6 +65,8 @@ struct bio { unsigned int bi_seg_front_size; unsigned int bi_seg_back_size; + atomic_t bi_remaining; + bio_end_io_t *bi_end_io; void *bi_private; -- cgit v1.2.3 From ee67891bf132612feb7b999ee1f3350b40867cb4 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 7 Aug 2013 14:32:45 -0700 Subject: block: Rename bio_split() -> bio_pair_split() This is prep work for introducing a more general bio_split(). Signed-off-by: Kent Overstreet Cc: Jens Axboe Cc: NeilBrown Cc: Alasdair Kergon Cc: Lars Ellenberg Cc: Peter Osterlund Cc: Sage Weil --- drivers/block/pktcdvd.c | 2 +- drivers/md/linear.c | 2 +- drivers/md/raid0.c | 6 +++--- drivers/md/raid10.c | 2 +- fs/bio.c | 4 ++-- include/linux/bio.h | 2 +- 6 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index ce986bacf7b7..28789b82ae7d 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2399,7 +2399,7 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio) if (last_zone != zone) { BUG_ON(last_zone != zone + pd->settings.size); first_sectors = last_zone - bio->bi_iter.bi_sector; - bp = bio_split(bio, first_sectors); + bp = bio_pair_split(bio, first_sectors); BUG_ON(!bp); pkt_make_request(q, &bp->bio1); pkt_make_request(q, &bp->bio2); diff --git a/drivers/md/linear.c b/drivers/md/linear.c index fb3b0d04edfb..e9b53e9793bf 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -326,7 +326,7 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio) rcu_read_unlock(); - bp = bio_split(bio, end_sector - bio->bi_iter.bi_sector); + bp = bio_pair_split(bio, end_sector - bio->bi_iter.bi_sector); linear_make_request(mddev, &bp->bio1); linear_make_request(mddev, &bp->bio2); diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 8ee1a6c658b4..ea754dd1a5f5 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -534,11 +534,11 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio) * refuse to split for us, so we need to split it. */ if (likely(is_power_of_2(chunk_sects))) - bp = bio_split(bio, chunk_sects - (sector & + bp = bio_pair_split(bio, chunk_sects - (sector & (chunk_sects-1))); else - bp = bio_split(bio, chunk_sects - - sector_div(sector, chunk_sects)); + bp = bio_pair_split(bio, chunk_sects - + sector_div(sector, chunk_sects)); raid0_make_request(mddev, &bp->bio1); raid0_make_request(mddev, &bp->bio2); bio_pair_release(bp); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index ac4bfa438c57..69c1bc8da88f 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1193,7 +1193,7 @@ static void make_request(struct mddev *mddev, struct bio * bio) /* This is a one page bio that upper layers * refuse to split for us, so we need to split it. */ - bp = bio_split(bio, chunk_sects - + bp = bio_pair_split(bio, chunk_sects - (bio->bi_iter.bi_sector & (chunk_sects - 1))); /* Each of these 'make_request' calls will call 'wait_barrier'. diff --git a/fs/bio.c b/fs/bio.c index b0a16dbc71ef..a3e753f4d5a6 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -1827,7 +1827,7 @@ static void bio_pair_end_2(struct bio *bi, int err) /* * split a bio - only worry about a bio with a single page in its iovec */ -struct bio_pair *bio_split(struct bio *bi, int first_sectors) +struct bio_pair *bio_pair_split(struct bio *bi, int first_sectors) { struct bio_pair *bp = mempool_alloc(bio_split_pool, GFP_NOIO); @@ -1874,7 +1874,7 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors) return bp; } -EXPORT_SYMBOL(bio_split); +EXPORT_SYMBOL(bio_pair_split); /** * bio_trim - trim a bio diff --git a/include/linux/bio.h b/include/linux/bio.h index 64f5169c224b..aa67af0b31ac 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -317,7 +317,7 @@ struct bio_pair { atomic_t cnt; int error; }; -extern struct bio_pair *bio_split(struct bio *bi, int first_sectors); +extern struct bio_pair *bio_pair_split(struct bio *bi, int first_sectors); extern void bio_pair_release(struct bio_pair *dbio); extern void bio_trim(struct bio *bio, int offset, int size); -- cgit v1.2.3 From 20d0189b1012a37d2533a87fb451f7852f2418d1 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 23 Nov 2013 18:21:01 -0800 Subject: block: Introduce new bio_split() The new bio_split() can split arbitrary bios - it's not restricted to single page bios, like the old bio_split() (previously renamed to bio_pair_split()). It also has different semantics - it doesn't allocate a struct bio_pair, leaving it up to the caller to handle completions. Then convert the existing bio_pair_split() users to the new bio_split() - and also nvme, which was open coding bio splitting. (We have to take that BUG_ON() out of bio_integrity_trim() because this bio_split() needs to use it, and there's no reason it has to be used on bios marked as cloned; BIO_CLONED doesn't seem to have clearly documented semantics anyways.) Signed-off-by: Kent Overstreet Cc: Jens Axboe Cc: Martin K. Petersen Cc: Matthew Wilcox Cc: Keith Busch Cc: Vishal Verma Cc: Jiri Kosina Cc: Neil Brown --- drivers/block/nvme-core.c | 106 +++------------------------------- drivers/block/pktcdvd.c | 136 ++++++++++++++++++++++++-------------------- drivers/md/bcache/bcache.h | 1 - drivers/md/bcache/io.c | 82 +------------------------- drivers/md/bcache/request.c | 12 ++-- drivers/md/linear.c | 96 +++++++++++++++---------------- drivers/md/raid0.c | 77 +++++++++---------------- drivers/md/raid10.c | 113 +++++++++++++++--------------------- fs/bio.c | 36 ++++++++++++ include/linux/bio.h | 22 +++++++ 10 files changed, 272 insertions(+), 409 deletions(-) (limited to 'include') diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 5539d2920872..1f14ac403945 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -441,104 +441,19 @@ int nvme_setup_prps(struct nvme_dev *dev, struct nvme_common_command *cmd, return total_len; } -struct nvme_bio_pair { - struct bio b1, b2, *parent; - struct bio_vec *bv1, *bv2; - int err; - atomic_t cnt; -}; - -static void nvme_bio_pair_endio(struct bio *bio, int err) -{ - struct nvme_bio_pair *bp = bio->bi_private; - - if (err) - bp->err = err; - - if (atomic_dec_and_test(&bp->cnt)) { - bio_endio(bp->parent, bp->err); - kfree(bp->bv1); - kfree(bp->bv2); - kfree(bp); - } -} - -static struct nvme_bio_pair *nvme_bio_split(struct bio *bio, int idx, - int len, int offset) -{ - struct nvme_bio_pair *bp; - - BUG_ON(len > bio->bi_iter.bi_size); - BUG_ON(idx > bio->bi_vcnt); - - bp = kmalloc(sizeof(*bp), GFP_ATOMIC); - if (!bp) - return NULL; - bp->err = 0; - - bp->b1 = *bio; - bp->b2 = *bio; - - bp->b1.bi_iter.bi_size = len; - bp->b2.bi_iter.bi_size -= len; - bp->b1.bi_vcnt = idx; - bp->b2.bi_iter.bi_idx = idx; - bp->b2.bi_iter.bi_sector += len >> 9; - - if (offset) { - bp->bv1 = kmalloc(bio->bi_max_vecs * sizeof(struct bio_vec), - GFP_ATOMIC); - if (!bp->bv1) - goto split_fail_1; - - bp->bv2 = kmalloc(bio->bi_max_vecs * sizeof(struct bio_vec), - GFP_ATOMIC); - if (!bp->bv2) - goto split_fail_2; - - memcpy(bp->bv1, bio->bi_io_vec, - bio->bi_max_vecs * sizeof(struct bio_vec)); - memcpy(bp->bv2, bio->bi_io_vec, - bio->bi_max_vecs * sizeof(struct bio_vec)); - - bp->b1.bi_io_vec = bp->bv1; - bp->b2.bi_io_vec = bp->bv2; - bp->b2.bi_io_vec[idx].bv_offset += offset; - bp->b2.bi_io_vec[idx].bv_len -= offset; - bp->b1.bi_io_vec[idx].bv_len = offset; - bp->b1.bi_vcnt++; - } else - bp->bv1 = bp->bv2 = NULL; - - bp->b1.bi_private = bp; - bp->b2.bi_private = bp; - - bp->b1.bi_end_io = nvme_bio_pair_endio; - bp->b2.bi_end_io = nvme_bio_pair_endio; - - bp->parent = bio; - atomic_set(&bp->cnt, 2); - - return bp; - - split_fail_2: - kfree(bp->bv1); - split_fail_1: - kfree(bp); - return NULL; -} - static int nvme_split_and_submit(struct bio *bio, struct nvme_queue *nvmeq, - int idx, int len, int offset) + int len) { - struct nvme_bio_pair *bp = nvme_bio_split(bio, idx, len, offset); - if (!bp) + struct bio *split = bio_split(bio, len >> 9, GFP_ATOMIC, NULL); + if (!split) return -ENOMEM; + bio_chain(split, bio); + if (bio_list_empty(&nvmeq->sq_cong)) add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait); - bio_list_add(&nvmeq->sq_cong, &bp->b1); - bio_list_add(&nvmeq->sq_cong, &bp->b2); + bio_list_add(&nvmeq->sq_cong, split); + bio_list_add(&nvmeq->sq_cong, bio); return 0; } @@ -568,8 +483,7 @@ static int nvme_map_bio(struct nvme_queue *nvmeq, struct nvme_iod *iod, } else { if (!first && BIOVEC_NOT_VIRT_MERGEABLE(&bvprv, &bvec)) return nvme_split_and_submit(bio, nvmeq, - iter.bi_idx, - length, 0); + length); sg = sg ? sg + 1 : iod->sg; sg_set_page(sg, bvec.bv_page, @@ -578,9 +492,7 @@ static int nvme_map_bio(struct nvme_queue *nvmeq, struct nvme_iod *iod, } if (split_len - length < bvec.bv_len) - return nvme_split_and_submit(bio, nvmeq, iter.bi_idx, - split_len, - split_len - length); + return nvme_split_and_submit(bio, nvmeq, split_len); length += bvec.bv_len; bvprv = bvec; first = 0; diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 28789b82ae7d..3dda09a5ec41 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2338,75 +2338,29 @@ static void pkt_end_io_read_cloned(struct bio *bio, int err) pkt_bio_finished(pd); } -static void pkt_make_request(struct request_queue *q, struct bio *bio) +static void pkt_make_request_read(struct pktcdvd_device *pd, struct bio *bio) { - struct pktcdvd_device *pd; - char b[BDEVNAME_SIZE]; + struct bio *cloned_bio = bio_clone(bio, GFP_NOIO); + struct packet_stacked_data *psd = mempool_alloc(psd_pool, GFP_NOIO); + + psd->pd = pd; + psd->bio = bio; + cloned_bio->bi_bdev = pd->bdev; + cloned_bio->bi_private = psd; + cloned_bio->bi_end_io = pkt_end_io_read_cloned; + pd->stats.secs_r += bio_sectors(bio); + pkt_queue_bio(pd, cloned_bio); +} + +static void pkt_make_request_write(struct request_queue *q, struct bio *bio) +{ + struct pktcdvd_device *pd = q->queuedata; sector_t zone; struct packet_data *pkt; int was_empty, blocked_bio; struct pkt_rb_node *node; - pd = q->queuedata; - if (!pd) { - pr_err("%s incorrect request queue\n", - bdevname(bio->bi_bdev, b)); - goto end_io; - } - - /* - * Clone READ bios so we can have our own bi_end_io callback. - */ - if (bio_data_dir(bio) == READ) { - struct bio *cloned_bio = bio_clone(bio, GFP_NOIO); - struct packet_stacked_data *psd = mempool_alloc(psd_pool, GFP_NOIO); - - psd->pd = pd; - psd->bio = bio; - cloned_bio->bi_bdev = pd->bdev; - cloned_bio->bi_private = psd; - cloned_bio->bi_end_io = pkt_end_io_read_cloned; - pd->stats.secs_r += bio_sectors(bio); - pkt_queue_bio(pd, cloned_bio); - return; - } - - if (!test_bit(PACKET_WRITABLE, &pd->flags)) { - pkt_notice(pd, "WRITE for ro device (%llu)\n", - (unsigned long long)bio->bi_iter.bi_sector); - goto end_io; - } - - if (!bio->bi_iter.bi_size || (bio->bi_iter.bi_size % CD_FRAMESIZE)) { - pkt_err(pd, "wrong bio size\n"); - goto end_io; - } - - blk_queue_bounce(q, &bio); - zone = get_zone(bio->bi_iter.bi_sector, pd); - pkt_dbg(2, pd, "start = %6llx stop = %6llx\n", - (unsigned long long)bio->bi_iter.bi_sector, - (unsigned long long)bio_end_sector(bio)); - - /* Check if we have to split the bio */ - { - struct bio_pair *bp; - sector_t last_zone; - int first_sectors; - - last_zone = get_zone(bio_end_sector(bio) - 1, pd); - if (last_zone != zone) { - BUG_ON(last_zone != zone + pd->settings.size); - first_sectors = last_zone - bio->bi_iter.bi_sector; - bp = bio_pair_split(bio, first_sectors); - BUG_ON(!bp); - pkt_make_request(q, &bp->bio1); - pkt_make_request(q, &bp->bio2); - bio_pair_release(bp); - return; - } - } /* * If we find a matching packet in state WAITING or READ_WAIT, we can @@ -2480,6 +2434,64 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio) */ wake_up(&pd->wqueue); } +} + +static void pkt_make_request(struct request_queue *q, struct bio *bio) +{ + struct pktcdvd_device *pd; + char b[BDEVNAME_SIZE]; + struct bio *split; + + pd = q->queuedata; + if (!pd) { + pr_err("%s incorrect request queue\n", + bdevname(bio->bi_bdev, b)); + goto end_io; + } + + pkt_dbg(2, pd, "start = %6llx stop = %6llx\n", + (unsigned long long)bio->bi_iter.bi_sector, + (unsigned long long)bio_end_sector(bio)); + + /* + * Clone READ bios so we can have our own bi_end_io callback. + */ + if (bio_data_dir(bio) == READ) { + pkt_make_request_read(pd, bio); + return; + } + + if (!test_bit(PACKET_WRITABLE, &pd->flags)) { + pkt_notice(pd, "WRITE for ro device (%llu)\n", + (unsigned long long)bio->bi_iter.bi_sector); + goto end_io; + } + + if (!bio->bi_iter.bi_size || (bio->bi_iter.bi_size % CD_FRAMESIZE)) { + pkt_err(pd, "wrong bio size\n"); + goto end_io; + } + + blk_queue_bounce(q, &bio); + + do { + sector_t zone = get_zone(bio->bi_iter.bi_sector, pd); + sector_t last_zone = get_zone(bio_end_sector(bio) - 1, pd); + + if (last_zone != zone) { + BUG_ON(last_zone != zone + pd->settings.size); + + split = bio_split(bio, last_zone - + bio->bi_iter.bi_sector, + GFP_NOIO, fs_bio_set); + bio_chain(split, bio); + } else { + split = bio; + } + + pkt_make_request_write(q, split); + } while (split != bio); + return; end_io: bio_io_error(bio); diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 6b6fe935be73..964353c5329d 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -901,7 +901,6 @@ void bch_bbio_endio(struct cache_set *, struct bio *, int, const char *); void bch_bbio_free(struct bio *, struct cache_set *); struct bio *bch_bbio_alloc(struct cache_set *); -struct bio *bch_bio_split(struct bio *, int, gfp_t, struct bio_set *); void bch_generic_make_request(struct bio *, struct bio_split_pool *); void __bch_submit_bbio(struct bio *, struct cache_set *); void bch_submit_bbio(struct bio *, struct cache_set *, struct bkey *, unsigned); diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c index 522f95778443..fa028fa82df4 100644 --- a/drivers/md/bcache/io.c +++ b/drivers/md/bcache/io.c @@ -11,84 +11,6 @@ #include -/** - * bch_bio_split - split a bio - * @bio: bio to split - * @sectors: number of sectors to split from the front of @bio - * @gfp: gfp mask - * @bs: bio set to allocate from - * - * Allocates and returns a new bio which represents @sectors from the start of - * @bio, and updates @bio to represent the remaining sectors. - * - * If bio_sectors(@bio) was less than or equal to @sectors, returns @bio - * unchanged. - * - * The newly allocated bio will point to @bio's bi_io_vec, if the split was on a - * bvec boundry; it is the caller's responsibility to ensure that @bio is not - * freed before the split. - */ -struct bio *bch_bio_split(struct bio *bio, int sectors, - gfp_t gfp, struct bio_set *bs) -{ - unsigned vcnt = 0, nbytes = sectors << 9; - struct bio_vec bv; - struct bvec_iter iter; - struct bio *ret = NULL; - - BUG_ON(sectors <= 0); - - if (sectors >= bio_sectors(bio)) - return bio; - - if (bio->bi_rw & REQ_DISCARD) { - ret = bio_alloc_bioset(gfp, 1, bs); - if (!ret) - return NULL; - goto out; - } - - bio_for_each_segment(bv, bio, iter) { - vcnt++; - - if (nbytes <= bv.bv_len) - break; - - nbytes -= bv.bv_len; - } - - ret = bio_alloc_bioset(gfp, vcnt, bs); - if (!ret) - return NULL; - - bio_for_each_segment(bv, bio, iter) { - ret->bi_io_vec[ret->bi_vcnt++] = bv; - - if (ret->bi_vcnt == vcnt) - break; - } - - ret->bi_io_vec[ret->bi_vcnt - 1].bv_len = nbytes; -out: - ret->bi_bdev = bio->bi_bdev; - ret->bi_iter.bi_sector = bio->bi_iter.bi_sector; - ret->bi_iter.bi_size = sectors << 9; - ret->bi_rw = bio->bi_rw; - - if (bio_integrity(bio)) { - if (bio_integrity_clone(ret, bio, gfp)) { - bio_put(ret); - return NULL; - } - - bio_integrity_trim(ret, 0, bio_sectors(ret)); - } - - bio_advance(bio, ret->bi_iter.bi_size); - - return ret; -} - static unsigned bch_bio_max_sectors(struct bio *bio) { struct request_queue *q = bdev_get_queue(bio->bi_bdev); @@ -172,8 +94,8 @@ void bch_generic_make_request(struct bio *bio, struct bio_split_pool *p) bio_get(bio); do { - n = bch_bio_split(bio, bch_bio_max_sectors(bio), - GFP_NOIO, s->p->bio_split); + n = bio_next_split(bio, bch_bio_max_sectors(bio), + GFP_NOIO, s->p->bio_split); n->bi_end_io = bch_bio_submit_split_endio; n->bi_private = &s->cl; diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 63451c724781..5878cdb39529 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -371,7 +371,7 @@ static void bch_data_insert_start(struct closure *cl) op->writeback)) goto err; - n = bch_bio_split(bio, KEY_SIZE(k), GFP_NOIO, split); + n = bio_next_split(bio, KEY_SIZE(k), GFP_NOIO, split); n->bi_end_io = bch_data_insert_endio; n->bi_private = cl; @@ -679,9 +679,9 @@ static int cache_lookup_fn(struct btree_op *op, struct btree *b, struct bkey *k) if (KEY_DIRTY(k)) s->read_dirty_data = true; - n = bch_bio_split(bio, min_t(uint64_t, INT_MAX, - KEY_OFFSET(k) - bio->bi_iter.bi_sector), - GFP_NOIO, s->d->bio_split); + n = bio_next_split(bio, min_t(uint64_t, INT_MAX, + KEY_OFFSET(k) - bio->bi_iter.bi_sector), + GFP_NOIO, s->d->bio_split); bio_key = &container_of(n, struct bbio, bio)->key; bch_bkey_copy_single_ptr(bio_key, k, ptr); @@ -920,7 +920,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, struct bio *miss, *cache_bio; if (s->cache_miss || s->iop.bypass) { - miss = bch_bio_split(bio, sectors, GFP_NOIO, s->d->bio_split); + miss = bio_next_split(bio, sectors, GFP_NOIO, s->d->bio_split); ret = miss == bio ? MAP_DONE : MAP_CONTINUE; goto out_submit; } @@ -943,7 +943,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, s->iop.replace = true; - miss = bch_bio_split(bio, sectors, GFP_NOIO, s->d->bio_split); + miss = bio_next_split(bio, sectors, GFP_NOIO, s->d->bio_split); /* btree_search_recurse()'s btree iterator is no good anymore */ ret = miss == bio ? MAP_DONE : -EINTR; diff --git a/drivers/md/linear.c b/drivers/md/linear.c index e9b53e9793bf..56f534b4a2d2 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -288,65 +288,65 @@ static int linear_stop (struct mddev *mddev) static void linear_make_request(struct mddev *mddev, struct bio *bio) { + char b[BDEVNAME_SIZE]; struct dev_info *tmp_dev; - sector_t start_sector; + struct bio *split; + sector_t start_sector, end_sector, data_offset; if (unlikely(bio->bi_rw & REQ_FLUSH)) { md_flush_request(mddev, bio); return; } - rcu_read_lock(); - tmp_dev = which_dev(mddev, bio->bi_iter.bi_sector); - start_sector = tmp_dev->end_sector - tmp_dev->rdev->sectors; - - - if (unlikely(bio->bi_iter.bi_sector >= (tmp_dev->end_sector) - || (bio->bi_iter.bi_sector < start_sector))) { - char b[BDEVNAME_SIZE]; - - printk(KERN_ERR - "md/linear:%s: make_request: Sector %llu out of bounds on " - "dev %s: %llu sectors, offset %llu\n", - mdname(mddev), - (unsigned long long)bio->bi_iter.bi_sector, - bdevname(tmp_dev->rdev->bdev, b), - (unsigned long long)tmp_dev->rdev->sectors, - (unsigned long long)start_sector); - rcu_read_unlock(); - bio_io_error(bio); - return; - } - if (unlikely(bio_end_sector(bio) > tmp_dev->end_sector)) { - /* This bio crosses a device boundary, so we have to - * split it. - */ - struct bio_pair *bp; - sector_t end_sector = tmp_dev->end_sector; + do { + rcu_read_lock(); - rcu_read_unlock(); - - bp = bio_pair_split(bio, end_sector - bio->bi_iter.bi_sector); + tmp_dev = which_dev(mddev, bio->bi_iter.bi_sector); + start_sector = tmp_dev->end_sector - tmp_dev->rdev->sectors; + end_sector = tmp_dev->end_sector; + data_offset = tmp_dev->rdev->data_offset; + bio->bi_bdev = tmp_dev->rdev->bdev; - linear_make_request(mddev, &bp->bio1); - linear_make_request(mddev, &bp->bio2); - bio_pair_release(bp); - return; - } - - bio->bi_bdev = tmp_dev->rdev->bdev; - bio->bi_iter.bi_sector = bio->bi_iter.bi_sector - start_sector - + tmp_dev->rdev->data_offset; - rcu_read_unlock(); + rcu_read_unlock(); - if (unlikely((bio->bi_rw & REQ_DISCARD) && - !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) { - /* Just ignore it */ - bio_endio(bio, 0); - return; - } + if (unlikely(bio->bi_iter.bi_sector >= end_sector || + bio->bi_iter.bi_sector < start_sector)) + goto out_of_bounds; + + if (unlikely(bio_end_sector(bio) > end_sector)) { + /* This bio crosses a device boundary, so we have to + * split it. + */ + split = bio_split(bio, end_sector - + bio->bi_iter.bi_sector, + GFP_NOIO, fs_bio_set); + bio_chain(split, bio); + } else { + split = bio; + } - generic_make_request(bio); + split->bi_iter.bi_sector = split->bi_iter.bi_sector - + start_sector + data_offset; + + if (unlikely((split->bi_rw & REQ_DISCARD) && + !blk_queue_discard(bdev_get_queue(split->bi_bdev)))) { + /* Just ignore it */ + bio_endio(split, 0); + } else + generic_make_request(split); + } while (split != bio); + return; + +out_of_bounds: + printk(KERN_ERR + "md/linear:%s: make_request: Sector %llu out of bounds on " + "dev %s: %llu sectors, offset %llu\n", + mdname(mddev), + (unsigned long long)bio->bi_iter.bi_sector, + bdevname(tmp_dev->rdev->bdev, b), + (unsigned long long)tmp_dev->rdev->sectors, + (unsigned long long)start_sector); + bio_io_error(bio); } static void linear_status (struct seq_file *seq, struct mddev *mddev) diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index ea754dd1a5f5..407a99e46f69 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -513,65 +513,44 @@ static inline int is_io_in_chunk_boundary(struct mddev *mddev, static void raid0_make_request(struct mddev *mddev, struct bio *bio) { - unsigned int chunk_sects; - sector_t sector_offset; struct strip_zone *zone; struct md_rdev *tmp_dev; + struct bio *split; if (unlikely(bio->bi_rw & REQ_FLUSH)) { md_flush_request(mddev, bio); return; } - chunk_sects = mddev->chunk_sectors; - if (unlikely(!is_io_in_chunk_boundary(mddev, chunk_sects, bio))) { + do { sector_t sector = bio->bi_iter.bi_sector; - struct bio_pair *bp; - /* Sanity check -- queue functions should prevent this happening */ - if (bio_multiple_segments(bio)) - goto bad_map; - /* This is a one page bio that upper layers - * refuse to split for us, so we need to split it. - */ - if (likely(is_power_of_2(chunk_sects))) - bp = bio_pair_split(bio, chunk_sects - (sector & - (chunk_sects-1))); - else - bp = bio_pair_split(bio, chunk_sects - - sector_div(sector, chunk_sects)); - raid0_make_request(mddev, &bp->bio1); - raid0_make_request(mddev, &bp->bio2); - bio_pair_release(bp); - return; - } - - sector_offset = bio->bi_iter.bi_sector; - zone = find_zone(mddev->private, §or_offset); - tmp_dev = map_sector(mddev, zone, bio->bi_iter.bi_sector, - §or_offset); - bio->bi_bdev = tmp_dev->bdev; - bio->bi_iter.bi_sector = sector_offset + zone->dev_start + - tmp_dev->data_offset; - - if (unlikely((bio->bi_rw & REQ_DISCARD) && - !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) { - /* Just ignore it */ - bio_endio(bio, 0); - return; - } - - generic_make_request(bio); - return; - -bad_map: - printk("md/raid0:%s: make_request bug: can't convert block across chunks" - " or bigger than %dk %llu %d\n", - mdname(mddev), chunk_sects / 2, - (unsigned long long)bio->bi_iter.bi_sector, - bio_sectors(bio) / 2); + unsigned chunk_sects = mddev->chunk_sectors; + + unsigned sectors = chunk_sects - + (likely(is_power_of_2(chunk_sects)) + ? (sector & (chunk_sects-1)) + : sector_div(sector, chunk_sects)); + + if (sectors < bio_sectors(bio)) { + split = bio_split(bio, sectors, GFP_NOIO, fs_bio_set); + bio_chain(split, bio); + } else { + split = bio; + } - bio_io_error(bio); - return; + zone = find_zone(mddev->private, §or); + tmp_dev = map_sector(mddev, zone, sector, §or); + split->bi_bdev = tmp_dev->bdev; + split->bi_iter.bi_sector = sector + zone->dev_start + + tmp_dev->data_offset; + + if (unlikely((split->bi_rw & REQ_DISCARD) && + !blk_queue_discard(bdev_get_queue(split->bi_bdev)))) { + /* Just ignore it */ + bio_endio(split, 0); + } else + generic_make_request(split); + } while (split != bio); } static void raid0_status(struct seq_file *seq, struct mddev *mddev) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 69c1bc8da88f..6d43d88657aa 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1152,14 +1152,12 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule) kfree(plug); } -static void make_request(struct mddev *mddev, struct bio * bio) +static void __make_request(struct mddev *mddev, struct bio *bio) { struct r10conf *conf = mddev->private; struct r10bio *r10_bio; struct bio *read_bio; int i; - sector_t chunk_mask = (conf->geo.chunk_mask & conf->prev.chunk_mask); - int chunk_sects = chunk_mask + 1; const int rw = bio_data_dir(bio); const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); const unsigned long do_fua = (bio->bi_rw & REQ_FUA); @@ -1174,69 +1172,6 @@ static void make_request(struct mddev *mddev, struct bio * bio) int max_sectors; int sectors; - if (unlikely(bio->bi_rw & REQ_FLUSH)) { - md_flush_request(mddev, bio); - return; - } - - /* If this request crosses a chunk boundary, we need to - * split it. This will only happen for 1 PAGE (or less) requests. - */ - if (unlikely((bio->bi_iter.bi_sector & chunk_mask) + bio_sectors(bio) - > chunk_sects - && (conf->geo.near_copies < conf->geo.raid_disks - || conf->prev.near_copies < conf->prev.raid_disks))) { - struct bio_pair *bp; - /* Sanity check -- queue functions should prevent this happening */ - if (bio_multiple_segments(bio)) - goto bad_map; - /* This is a one page bio that upper layers - * refuse to split for us, so we need to split it. - */ - bp = bio_pair_split(bio, chunk_sects - - (bio->bi_iter.bi_sector & (chunk_sects - 1))); - - /* Each of these 'make_request' calls will call 'wait_barrier'. - * If the first succeeds but the second blocks due to the resync - * thread raising the barrier, we will deadlock because the - * IO to the underlying device will be queued in generic_make_request - * and will never complete, so will never reduce nr_pending. - * So increment nr_waiting here so no new raise_barriers will - * succeed, and so the second wait_barrier cannot block. - */ - spin_lock_irq(&conf->resync_lock); - conf->nr_waiting++; - spin_unlock_irq(&conf->resync_lock); - - make_request(mddev, &bp->bio1); - make_request(mddev, &bp->bio2); - - spin_lock_irq(&conf->resync_lock); - conf->nr_waiting--; - wake_up(&conf->wait_barrier); - spin_unlock_irq(&conf->resync_lock); - - bio_pair_release(bp); - return; - bad_map: - printk("md/raid10:%s: make_request bug: can't convert block across chunks" - " or bigger than %dk %llu %d\n", mdname(mddev), chunk_sects/2, - (unsigned long long)bio->bi_iter.bi_sector, - bio_sectors(bio) / 2); - - bio_io_error(bio); - return; - } - - md_write_start(mddev, bio); - - /* - * Register the new request and wait if the reconstruction - * thread has put up a bar for new requests. - * Continue immediately if no resync is active currently. - */ - wait_barrier(conf); - sectors = bio_sectors(bio); while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && bio->bi_iter.bi_sector < conf->reshape_progress && @@ -1600,6 +1535,52 @@ retry_write: goto retry_write; } one_write_done(r10_bio); +} + +static void make_request(struct mddev *mddev, struct bio *bio) +{ + struct r10conf *conf = mddev->private; + sector_t chunk_mask = (conf->geo.chunk_mask & conf->prev.chunk_mask); + int chunk_sects = chunk_mask + 1; + + struct bio *split; + + if (unlikely(bio->bi_rw & REQ_FLUSH)) { + md_flush_request(mddev, bio); + return; + } + + md_write_start(mddev, bio); + + /* + * Register the new request and wait if the reconstruction + * thread has put up a bar for new requests. + * Continue immediately if no resync is active currently. + */ + wait_barrier(conf); + + do { + + /* + * If this request crosses a chunk boundary, we need to split + * it. + */ + if (unlikely((bio->bi_iter.bi_sector & chunk_mask) + + bio_sectors(bio) > chunk_sects + && (conf->geo.near_copies < conf->geo.raid_disks + || conf->prev.near_copies < + conf->prev.raid_disks))) { + split = bio_split(bio, chunk_sects - + (bio->bi_iter.bi_sector & + (chunk_sects - 1)), + GFP_NOIO, fs_bio_set); + bio_chain(split, bio); + } else { + split = bio; + } + + __make_request(mddev, split); + } while (split != bio); /* In case raid10d snuck in to freeze_array */ wake_up(&conf->wait_barrier); diff --git a/fs/bio.c b/fs/bio.c index a3e753f4d5a6..7b062befac82 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -1793,6 +1793,42 @@ void bio_endio_nodec(struct bio *bio, int error) } EXPORT_SYMBOL(bio_endio_nodec); +/** + * bio_split - split a bio + * @bio: bio to split + * @sectors: number of sectors to split from the front of @bio + * @gfp: gfp mask + * @bs: bio set to allocate from + * + * Allocates and returns a new bio which represents @sectors from the start of + * @bio, and updates @bio to represent the remaining sectors. + * + * The newly allocated bio will point to @bio's bi_io_vec; it is the caller's + * responsibility to ensure that @bio is not freed before the split. + */ +struct bio *bio_split(struct bio *bio, int sectors, + gfp_t gfp, struct bio_set *bs) +{ + struct bio *split = NULL; + + BUG_ON(sectors <= 0); + BUG_ON(sectors >= bio_sectors(bio)); + + split = bio_clone_fast(bio, gfp, bs); + if (!split) + return NULL; + + split->bi_iter.bi_size = sectors << 9; + + if (bio_integrity(split)) + bio_integrity_trim(split, 0, sectors); + + bio_advance(bio, split->bi_iter.bi_size); + + return split; +} +EXPORT_SYMBOL(bio_split); + void bio_pair_release(struct bio_pair *bp) { if (atomic_dec_and_test(&bp->cnt)) { diff --git a/include/linux/bio.h b/include/linux/bio.h index aa67af0b31ac..19e31b2f5b2c 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -321,6 +321,28 @@ extern struct bio_pair *bio_pair_split(struct bio *bi, int first_sectors); extern void bio_pair_release(struct bio_pair *dbio); extern void bio_trim(struct bio *bio, int offset, int size); +extern struct bio *bio_split(struct bio *bio, int sectors, + gfp_t gfp, struct bio_set *bs); + +/** + * bio_next_split - get next @sectors from a bio, splitting if necessary + * @bio: bio to split + * @sectors: number of sectors to split from the front of @bio + * @gfp: gfp mask + * @bs: bio set to allocate from + * + * Returns a bio representing the next @sectors of @bio - if the bio is smaller + * than @sectors, returns the original bio unchanged. + */ +static inline struct bio *bio_next_split(struct bio *bio, int sectors, + gfp_t gfp, struct bio_set *bs) +{ + if (sectors >= bio_sectors(bio)) + return bio; + + return bio_split(bio, sectors, gfp, bs); +} + extern struct bio_set *bioset_create(unsigned int, unsigned int); extern void bioset_free(struct bio_set *); extern mempool_t *biovec_create_pool(struct bio_set *bs, int pool_entries); -- cgit v1.2.3 From 4b1faf931650d4a35b2a570318862821d6a962e3 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 7 Aug 2013 14:33:00 -0700 Subject: block: Kill bio_pair_split() Signed-off-by: Kent Overstreet Cc: Jens Axboe --- fs/bio-integrity.c | 45 --------------------------- fs/bio.c | 90 ----------------------------------------------------- include/linux/bio.h | 30 ------------------ 3 files changed, 165 deletions(-) (limited to 'include') diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index 9d547d2e357c..80d972d739e5 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c @@ -580,51 +580,6 @@ void bio_integrity_trim(struct bio *bio, unsigned int offset, } EXPORT_SYMBOL(bio_integrity_trim); -/** - * bio_integrity_split - Split integrity metadata - * @bio: Protected bio - * @bp: Resulting bio_pair - * @sectors: Offset - * - * Description: Splits an integrity page into a bio_pair. - */ -void bio_integrity_split(struct bio *bio, struct bio_pair *bp, int sectors) -{ - struct blk_integrity *bi; - struct bio_integrity_payload *bip = bio->bi_integrity; - unsigned int nr_sectors; - - if (bio_integrity(bio) == 0) - return; - - bi = bdev_get_integrity(bio->bi_bdev); - BUG_ON(bi == NULL); - BUG_ON(bip->bip_vcnt != 1); - - nr_sectors = bio_integrity_hw_sectors(bi, sectors); - - bp->bio1.bi_integrity = &bp->bip1; - bp->bio2.bi_integrity = &bp->bip2; - - bp->iv1 = bip->bip_vec[bip->bip_iter.bi_idx]; - bp->iv2 = bip->bip_vec[bip->bip_iter.bi_idx]; - - bp->bip1.bip_vec = &bp->iv1; - bp->bip2.bip_vec = &bp->iv2; - - bp->iv1.bv_len = sectors * bi->tuple_size; - bp->iv2.bv_offset += sectors * bi->tuple_size; - bp->iv2.bv_len -= sectors * bi->tuple_size; - - bp->bip1.bip_iter.bi_sector = bio->bi_integrity->bip_iter.bi_sector; - bp->bip2.bip_iter.bi_sector = - bio->bi_integrity->bip_iter.bi_sector + nr_sectors; - - bp->bip1.bip_vcnt = bp->bip2.bip_vcnt = 1; - bp->bip1.bip_iter.bi_idx = bp->bip2.bip_iter.bi_idx = 0; -} -EXPORT_SYMBOL(bio_integrity_split); - /** * bio_integrity_clone - Callback for cloning bios with integrity metadata * @bio: New bio diff --git a/fs/bio.c b/fs/bio.c index 7b062befac82..75c49a382239 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -38,8 +38,6 @@ */ #define BIO_INLINE_VECS 4 -static mempool_t *bio_split_pool __read_mostly; - /* * if you change this list, also change bvec_alloc or things will * break badly! cannot be bigger than what you can fit into an @@ -1829,89 +1827,6 @@ struct bio *bio_split(struct bio *bio, int sectors, } EXPORT_SYMBOL(bio_split); -void bio_pair_release(struct bio_pair *bp) -{ - if (atomic_dec_and_test(&bp->cnt)) { - struct bio *master = bp->bio1.bi_private; - - bio_endio(master, bp->error); - mempool_free(bp, bp->bio2.bi_private); - } -} -EXPORT_SYMBOL(bio_pair_release); - -static void bio_pair_end_1(struct bio *bi, int err) -{ - struct bio_pair *bp = container_of(bi, struct bio_pair, bio1); - - if (err) - bp->error = err; - - bio_pair_release(bp); -} - -static void bio_pair_end_2(struct bio *bi, int err) -{ - struct bio_pair *bp = container_of(bi, struct bio_pair, bio2); - - if (err) - bp->error = err; - - bio_pair_release(bp); -} - -/* - * split a bio - only worry about a bio with a single page in its iovec - */ -struct bio_pair *bio_pair_split(struct bio *bi, int first_sectors) -{ - struct bio_pair *bp = mempool_alloc(bio_split_pool, GFP_NOIO); - - if (!bp) - return bp; - - trace_block_split(bdev_get_queue(bi->bi_bdev), bi, - bi->bi_iter.bi_sector + first_sectors); - - BUG_ON(bio_multiple_segments(bi)); - atomic_set(&bp->cnt, 3); - bp->error = 0; - bp->bio1 = *bi; - bp->bio2 = *bi; - bp->bio2.bi_iter.bi_sector += first_sectors; - bp->bio2.bi_iter.bi_size -= first_sectors << 9; - bp->bio1.bi_iter.bi_size = first_sectors << 9; - - if (bi->bi_vcnt != 0) { - bp->bv1 = bio_iovec(bi); - bp->bv2 = bio_iovec(bi); - - if (bio_is_rw(bi)) { - bp->bv2.bv_offset += first_sectors << 9; - bp->bv2.bv_len -= first_sectors << 9; - bp->bv1.bv_len = first_sectors << 9; - } - - bp->bio1.bi_io_vec = &bp->bv1; - bp->bio2.bi_io_vec = &bp->bv2; - - bp->bio1.bi_max_vecs = 1; - bp->bio2.bi_max_vecs = 1; - } - - bp->bio1.bi_end_io = bio_pair_end_1; - bp->bio2.bi_end_io = bio_pair_end_2; - - bp->bio1.bi_private = bi; - bp->bio2.bi_private = bio_split_pool; - - if (bio_integrity(bi)) - bio_integrity_split(bi, bp, first_sectors); - - return bp; -} -EXPORT_SYMBOL(bio_pair_split); - /** * bio_trim - trim a bio * @bio: bio to trim @@ -2113,11 +2028,6 @@ static int __init init_bio(void) if (bioset_integrity_create(fs_bio_set, BIO_POOL_SIZE)) panic("bio: can't create integrity pool\n"); - bio_split_pool = mempool_create_kmalloc_pool(BIO_SPLIT_ENTRIES, - sizeof(struct bio_pair)); - if (!bio_split_pool) - panic("bio: can't create split pool\n"); - return 0; } subsys_initcall(init_bio); diff --git a/include/linux/bio.h b/include/linux/bio.h index 19e31b2f5b2c..70654521dab6 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -297,30 +297,7 @@ struct bio_integrity_payload { }; #endif /* CONFIG_BLK_DEV_INTEGRITY */ -/* - * A bio_pair is used when we need to split a bio. - * This can only happen for a bio that refers to just one - * page of data, and in the unusual situation when the - * page crosses a chunk/device boundary - * - * The address of the master bio is stored in bio1.bi_private - * The address of the pool the pair was allocated from is stored - * in bio2.bi_private - */ -struct bio_pair { - struct bio bio1, bio2; - struct bio_vec bv1, bv2; -#if defined(CONFIG_BLK_DEV_INTEGRITY) - struct bio_integrity_payload bip1, bip2; - struct bio_vec iv1, iv2; -#endif - atomic_t cnt; - int error; -}; -extern struct bio_pair *bio_pair_split(struct bio *bi, int first_sectors); -extern void bio_pair_release(struct bio_pair *dbio); extern void bio_trim(struct bio *bio, int offset, int size); - extern struct bio *bio_split(struct bio *bio, int sectors, gfp_t gfp, struct bio_set *bs); @@ -677,7 +654,6 @@ extern int bio_integrity_prep(struct bio *); extern void bio_integrity_endio(struct bio *, int); extern void bio_integrity_advance(struct bio *, unsigned int); extern void bio_integrity_trim(struct bio *, unsigned int, unsigned int); -extern void bio_integrity_split(struct bio *, struct bio_pair *, int); extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t); extern int bioset_integrity_create(struct bio_set *, int); extern void bioset_integrity_free(struct bio_set *); @@ -721,12 +697,6 @@ static inline int bio_integrity_clone(struct bio *bio, struct bio *bio_src, return 0; } -static inline void bio_integrity_split(struct bio *bio, struct bio_pair *bp, - int sectors) -{ - return; -} - static inline void bio_integrity_advance(struct bio *bio, unsigned int bytes_done) { -- cgit v1.2.3 From c241418b7c4fca9d3caa69be6eb9cf8e7aef1419 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Tue, 31 Dec 2013 11:38:50 +0800 Subject: blk-mq: Don't reserve a tag for flush request Reserving a tag (request) for flush to avoid dead lock is a overkill. A tag is valuable resource. We can track the number of flush requests and disallow having too many pending flush requests allocated. With this patch, blk_mq_alloc_request_pinned() could do a busy nop (but not a dead loop) if too many pending requests are allocated and new flush request is allocated. But this should not be a problem, too many pending flush requests are very rare case. I verified this can fix the deadlock caused by too many pending flush requests. Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- block/blk-flush.c | 8 +++++--- block/blk-mq.c | 46 ++++++++++++++++++++++++++++++---------------- include/linux/blk-mq.h | 3 +++ 3 files changed, 38 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/block/blk-flush.c b/block/blk-flush.c index fb6f3c0ffa49..701e5d9a80dd 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -284,9 +284,8 @@ static void mq_flush_work(struct work_struct *work) q = container_of(work, struct request_queue, mq_flush_work); - /* We don't need set REQ_FLUSH_SEQ, it's for consistency */ rq = blk_mq_alloc_request(q, WRITE_FLUSH|REQ_FLUSH_SEQ, - __GFP_WAIT|GFP_ATOMIC, true); + __GFP_WAIT|GFP_ATOMIC, false); rq->cmd_type = REQ_TYPE_FS; rq->end_io = flush_end_io; @@ -408,8 +407,11 @@ void blk_insert_flush(struct request *rq) /* * @policy now records what operations need to be done. Adjust * REQ_FLUSH and FUA for the driver. + * We keep REQ_FLUSH for mq to track flush requests. For !FUA, + * we never dispatch the request directly. */ - rq->cmd_flags &= ~REQ_FLUSH; + if (rq->cmd_flags & REQ_FUA) + rq->cmd_flags &= ~REQ_FLUSH; if (!(fflags & REQ_FUA)) rq->cmd_flags &= ~REQ_FUA; diff --git a/block/blk-mq.c b/block/blk-mq.c index c79126e11030..0d2e2b2864c0 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -183,9 +183,27 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx, } static struct request *__blk_mq_alloc_request(struct blk_mq_hw_ctx *hctx, - gfp_t gfp, bool reserved) + gfp_t gfp, bool reserved, + int rw) { - return blk_mq_alloc_rq(hctx, gfp, reserved); + struct request *req; + bool is_flush = false; + /* + * flush need allocate a request, leave at least one request for + * non-flush IO to avoid deadlock + */ + if ((rw & REQ_FLUSH) && !(rw & REQ_FLUSH_SEQ)) { + if (atomic_inc_return(&hctx->pending_flush) >= + hctx->queue_depth - hctx->reserved_tags - 1) { + atomic_dec(&hctx->pending_flush); + return NULL; + } + is_flush = true; + } + req = blk_mq_alloc_rq(hctx, gfp, reserved); + if (!req && is_flush) + atomic_dec(&hctx->pending_flush); + return req; } static struct request *blk_mq_alloc_request_pinned(struct request_queue *q, @@ -198,7 +216,7 @@ static struct request *blk_mq_alloc_request_pinned(struct request_queue *q, struct blk_mq_ctx *ctx = blk_mq_get_ctx(q); struct blk_mq_hw_ctx *hctx = q->mq_ops->map_queue(q, ctx->cpu); - rq = __blk_mq_alloc_request(hctx, gfp & ~__GFP_WAIT, reserved); + rq = __blk_mq_alloc_request(hctx, gfp & ~__GFP_WAIT, reserved, rw); if (rq) { blk_mq_rq_ctx_init(q, ctx, rq, rw); break; @@ -261,6 +279,9 @@ static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx, const int tag = rq->tag; struct request_queue *q = rq->q; + if ((rq->cmd_flags & REQ_FLUSH) && !(rq->cmd_flags & REQ_FLUSH_SEQ)) + atomic_dec(&hctx->pending_flush); + blk_mq_rq_init(hctx, rq); blk_mq_put_tag(hctx->tags, tag); @@ -928,14 +949,14 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio) hctx = q->mq_ops->map_queue(q, ctx->cpu); trace_block_getrq(q, bio, rw); - rq = __blk_mq_alloc_request(hctx, GFP_ATOMIC, false); + rq = __blk_mq_alloc_request(hctx, GFP_ATOMIC, false, bio->bi_rw); if (likely(rq)) - blk_mq_rq_ctx_init(q, ctx, rq, rw); + blk_mq_rq_ctx_init(q, ctx, rq, bio->bi_rw); else { blk_mq_put_ctx(ctx); trace_block_sleeprq(q, bio, rw); - rq = blk_mq_alloc_request_pinned(q, rw, __GFP_WAIT|GFP_ATOMIC, - false); + rq = blk_mq_alloc_request_pinned(q, bio->bi_rw, + __GFP_WAIT|GFP_ATOMIC, false); ctx = rq->mq_ctx; hctx = q->mq_ops->map_queue(q, ctx->cpu); } @@ -1212,7 +1233,9 @@ static int blk_mq_init_hw_queues(struct request_queue *q, hctx->queue_num = i; hctx->flags = reg->flags; hctx->queue_depth = reg->queue_depth; + hctx->reserved_tags = reg->reserved_tags; hctx->cmd_size = reg->cmd_size; + atomic_set(&hctx->pending_flush, 0); blk_mq_init_cpu_notifier(&hctx->cpu_notifier, blk_mq_hctx_notify, hctx); @@ -1337,15 +1360,6 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg, reg->queue_depth = BLK_MQ_MAX_DEPTH; } - /* - * Set aside a tag for flush requests. It will only be used while - * another flush request is in progress but outside the driver. - * - * TODO: only allocate if flushes are supported - */ - reg->queue_depth++; - reg->reserved_tags++; - if (reg->queue_depth < (reg->reserved_tags + BLK_MQ_TAG_MIN)) return ERR_PTR(-EINVAL); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index ab0e9b2025b3..a5517b84e794 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -36,12 +36,15 @@ struct blk_mq_hw_ctx { struct list_head page_list; struct blk_mq_tags *tags; + atomic_t pending_flush; + unsigned long queued; unsigned long run; #define BLK_MQ_MAX_DISPATCH_ORDER 10 unsigned long dispatched[BLK_MQ_MAX_DISPATCH_ORDER]; unsigned int queue_depth; + unsigned int reserved_tags; unsigned int numa_node; unsigned int cmd_size; /* per-request extra data */ -- cgit v1.2.3 From 3edcc0ce85c59d45d6dfc6a36a6b3f8b31ba9887 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 26 Dec 2013 21:31:38 +0800 Subject: block: blk-mq: don't export blk_mq_free_queue() blk_mq_free_queue() is called from release handler of queue kobject, so it needn't be called from drivers. Cc: Jens Axboe Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-mq.c | 1 - block/blk-mq.h | 1 + block/blk-sysfs.c | 1 + include/linux/blk-mq.h | 1 - 4 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/block/blk-mq.c b/block/blk-mq.c index edbd2536f942..6914f9bd470a 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1461,7 +1461,6 @@ void blk_mq_free_queue(struct request_queue *q) list_del_init(&q->all_q_node); mutex_unlock(&all_q_mutex); } -EXPORT_SYMBOL(blk_mq_free_queue); /* Basically redo blk_mq_init_queue with queue frozen */ static void blk_mq_queue_reinit(struct request_queue *q) diff --git a/block/blk-mq.h b/block/blk-mq.h index caa614f24409..e151a2f4f171 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -28,6 +28,7 @@ void blk_mq_run_request(struct request *rq, bool run_queue, bool async); void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); void blk_mq_init_flush(struct request_queue *q); void blk_mq_drain_queue(struct request_queue *q); +void blk_mq_free_queue(struct request_queue *q); /* * CPU hotplug helpers diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 97779522472f..8095c4a21fc0 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -11,6 +11,7 @@ #include "blk.h" #include "blk-cgroup.h" +#include "blk-mq.h" struct queue_sysfs_entry { struct attribute attr; diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index ab0e9b2025b3..851d34b7ac26 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -113,7 +113,6 @@ enum { }; struct request_queue *blk_mq_init_queue(struct blk_mq_reg *, void *); -void blk_mq_free_queue(struct request_queue *); int blk_mq_register_disk(struct gendisk *); void blk_mq_unregister_disk(struct gendisk *); void blk_mq_init_commands(struct request_queue *, void (*init)(void *data, struct blk_mq_hw_ctx *, struct request *, unsigned int), void *data); -- cgit v1.2.3 From 78365411b344df35a198b119133e6515c2dcfb9f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 17 Dec 2013 01:29:34 -0800 Subject: bcache: Rework allocator reserves We need a reserve for allocating buckets for new btree nodes - and now that we've got multiple btrees, it really needs to be per btree. This reworks the reserves so we've got separate freelists for each reserve instead of watermarks, which seems to make things a bit cleaner, and it adds some code so that btree_split() can make sure the reserve is available before it starts. Signed-off-by: Kent Overstreet --- drivers/md/bcache/alloc.c | 72 ++++++++++++++++++++++++++----------------- drivers/md/bcache/bcache.h | 16 +++++----- drivers/md/bcache/btree.c | 34 ++++++++++++++++++-- drivers/md/bcache/btree.h | 4 +++ drivers/md/bcache/movinggc.c | 2 +- drivers/md/bcache/super.c | 21 ++++++++----- drivers/md/bcache/sysfs.c | 31 ------------------- include/trace/events/bcache.h | 8 ++--- 8 files changed, 105 insertions(+), 83 deletions(-) (limited to 'include') diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index 4c9852d92b0a..bcfd96e2121b 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -132,10 +132,16 @@ bool bch_bucket_add_unused(struct cache *ca, struct bucket *b) { BUG_ON(GC_MARK(b) || GC_SECTORS_USED(b)); - if (fifo_used(&ca->free) > ca->watermark[WATERMARK_MOVINGGC] && - CACHE_REPLACEMENT(&ca->sb) == CACHE_REPLACEMENT_FIFO) - return false; + if (CACHE_REPLACEMENT(&ca->sb) == CACHE_REPLACEMENT_FIFO) { + unsigned i; + + for (i = 0; i < RESERVE_NONE; i++) + if (!fifo_full(&ca->free[i])) + goto add; + return false; + } +add: b->prio = 0; if (can_inc_bucket_gen(b) && @@ -304,6 +310,21 @@ do { \ __set_current_state(TASK_RUNNING); \ } while (0) +static int bch_allocator_push(struct cache *ca, long bucket) +{ + unsigned i; + + /* Prios/gens are actually the most important reserve */ + if (fifo_push(&ca->free[RESERVE_PRIO], bucket)) + return true; + + for (i = 0; i < RESERVE_NR; i++) + if (fifo_push(&ca->free[i], bucket)) + return true; + + return false; +} + static int bch_allocator_thread(void *arg) { struct cache *ca = arg; @@ -336,9 +357,7 @@ static int bch_allocator_thread(void *arg) mutex_lock(&ca->set->bucket_lock); } - allocator_wait(ca, !fifo_full(&ca->free)); - - fifo_push(&ca->free, bucket); + allocator_wait(ca, bch_allocator_push(ca, bucket)); wake_up(&ca->set->bucket_wait); } @@ -365,34 +384,29 @@ static int bch_allocator_thread(void *arg) } } -long bch_bucket_alloc(struct cache *ca, unsigned watermark, bool wait) +long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait) { DEFINE_WAIT(w); struct bucket *b; long r; /* fastpath */ - if (fifo_used(&ca->free) > ca->watermark[watermark]) { - fifo_pop(&ca->free, r); + if (fifo_pop(&ca->free[RESERVE_NONE], r) || + fifo_pop(&ca->free[reserve], r)) goto out; - } if (!wait) return -1; - while (1) { - if (fifo_used(&ca->free) > ca->watermark[watermark]) { - fifo_pop(&ca->free, r); - break; - } - + do { prepare_to_wait(&ca->set->bucket_wait, &w, TASK_UNINTERRUPTIBLE); mutex_unlock(&ca->set->bucket_lock); schedule(); mutex_lock(&ca->set->bucket_lock); - } + } while (!fifo_pop(&ca->free[RESERVE_NONE], r) && + !fifo_pop(&ca->free[reserve], r)); finish_wait(&ca->set->bucket_wait, &w); out: @@ -401,12 +415,14 @@ out: if (expensive_debug_checks(ca->set)) { size_t iter; long i; + unsigned j; for (iter = 0; iter < prio_buckets(ca) * 2; iter++) BUG_ON(ca->prio_buckets[iter] == (uint64_t) r); - fifo_for_each(i, &ca->free, iter) - BUG_ON(i == r); + for (j = 0; j < RESERVE_NR; j++) + fifo_for_each(i, &ca->free[j], iter) + BUG_ON(i == r); fifo_for_each(i, &ca->free_inc, iter) BUG_ON(i == r); fifo_for_each(i, &ca->unused, iter) @@ -419,7 +435,7 @@ out: SET_GC_SECTORS_USED(b, ca->sb.bucket_size); - if (watermark <= WATERMARK_METADATA) { + if (reserve <= RESERVE_PRIO) { SET_GC_MARK(b, GC_MARK_METADATA); SET_GC_MOVE(b, 0); b->prio = BTREE_PRIO; @@ -445,7 +461,7 @@ void bch_bucket_free(struct cache_set *c, struct bkey *k) } } -int __bch_bucket_alloc_set(struct cache_set *c, unsigned watermark, +int __bch_bucket_alloc_set(struct cache_set *c, unsigned reserve, struct bkey *k, int n, bool wait) { int i; @@ -459,7 +475,7 @@ int __bch_bucket_alloc_set(struct cache_set *c, unsigned watermark, for (i = 0; i < n; i++) { struct cache *ca = c->cache_by_alloc[i]; - long b = bch_bucket_alloc(ca, watermark, wait); + long b = bch_bucket_alloc(ca, reserve, wait); if (b == -1) goto err; @@ -478,12 +494,12 @@ err: return -1; } -int bch_bucket_alloc_set(struct cache_set *c, unsigned watermark, +int bch_bucket_alloc_set(struct cache_set *c, unsigned reserve, struct bkey *k, int n, bool wait) { int ret; mutex_lock(&c->bucket_lock); - ret = __bch_bucket_alloc_set(c, watermark, k, n, wait); + ret = __bch_bucket_alloc_set(c, reserve, k, n, wait); mutex_unlock(&c->bucket_lock); return ret; } @@ -573,8 +589,8 @@ bool bch_alloc_sectors(struct cache_set *c, struct bkey *k, unsigned sectors, while (!(b = pick_data_bucket(c, k, write_point, &alloc.key))) { unsigned watermark = write_prio - ? WATERMARK_MOVINGGC - : WATERMARK_NONE; + ? RESERVE_MOVINGGC + : RESERVE_NONE; spin_unlock(&c->data_bucket_lock); @@ -689,7 +705,7 @@ int bch_cache_allocator_init(struct cache *ca) * Then 8 for btree allocations * Then half for the moving garbage collector */ - +#if 0 ca->watermark[WATERMARK_PRIO] = 0; ca->watermark[WATERMARK_METADATA] = prio_buckets(ca); @@ -699,6 +715,6 @@ int bch_cache_allocator_init(struct cache *ca) ca->watermark[WATERMARK_NONE] = ca->free.size / 2 + ca->watermark[WATERMARK_MOVINGGC]; - +#endif return 0; } diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 9d062bc56261..94d346e2ea17 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -383,12 +383,12 @@ struct cached_dev { unsigned writeback_rate_p_term_inverse; }; -enum alloc_watermarks { - WATERMARK_PRIO, - WATERMARK_METADATA, - WATERMARK_MOVINGGC, - WATERMARK_NONE, - WATERMARK_MAX +enum alloc_reserve { + RESERVE_BTREE, + RESERVE_PRIO, + RESERVE_MOVINGGC, + RESERVE_NONE, + RESERVE_NR, }; struct cache { @@ -400,8 +400,6 @@ struct cache { struct kobject kobj; struct block_device *bdev; - unsigned watermark[WATERMARK_MAX]; - struct task_struct *alloc_thread; struct closure prio; @@ -430,7 +428,7 @@ struct cache { * because all the data they contained was overwritten), so we only * need to discard them before they can be moved to the free list. */ - DECLARE_FIFO(long, free); + DECLARE_FIFO(long, free)[RESERVE_NR]; DECLARE_FIFO(long, free_inc); DECLARE_FIFO(long, unused); diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 101231f0f399..6a0f5faf0bed 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -167,6 +167,8 @@ static inline bool should_split(struct btree *b) _r = bch_btree_ ## fn(_b, op, ##__VA_ARGS__); \ } \ rw_unlock(_w, _b); \ + if (_r == -EINTR) \ + schedule(); \ bch_cannibalize_unlock(c); \ if (_r == -ENOSPC) { \ wait_event((c)->try_wait, \ @@ -175,6 +177,7 @@ static inline bool should_split(struct btree *b) } \ } while (_r == -EINTR); \ \ + finish_wait(&(c)->bucket_wait, &(op)->wait); \ _r; \ }) @@ -1075,7 +1078,7 @@ struct btree *bch_btree_node_alloc(struct cache_set *c, int level, bool wait) mutex_lock(&c->bucket_lock); retry: - if (__bch_bucket_alloc_set(c, WATERMARK_METADATA, &k.key, 1, wait)) + if (__bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, 1, wait)) goto err; bkey_put(c, &k.key); @@ -1132,6 +1135,28 @@ static void make_btree_freeing_key(struct btree *b, struct bkey *k) atomic_inc(&b->c->prio_blocked); } +static int btree_check_reserve(struct btree *b, struct btree_op *op) +{ + struct cache_set *c = b->c; + struct cache *ca; + unsigned i, reserve = c->root->level * 2 + 1; + int ret = 0; + + mutex_lock(&c->bucket_lock); + + for_each_cache(ca, c, i) + if (fifo_used(&ca->free[RESERVE_BTREE]) < reserve) { + if (op) + prepare_to_wait(&c->bucket_wait, &op->wait, + TASK_UNINTERRUPTIBLE); + ret = -EINTR; + break; + } + + mutex_unlock(&c->bucket_lock); + return ret; +} + /* Garbage collection */ uint8_t __bch_btree_mark_key(struct cache_set *c, int level, struct bkey *k) @@ -1428,7 +1453,8 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op, if (!IS_ERR(last->b)) { should_rewrite = btree_gc_mark_node(last->b, gc); - if (should_rewrite) { + if (should_rewrite && + !btree_check_reserve(b, NULL)) { n = btree_node_alloc_replacement(last->b, false); @@ -2071,6 +2097,10 @@ static int btree_split(struct btree *b, struct btree_op *op, closure_init_stack(&cl); bch_keylist_init(&parent_keys); + if (!b->level && + btree_check_reserve(b, op)) + return -EINTR; + n1 = btree_node_alloc_replacement(b, true); if (IS_ERR(n1)) goto err; diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h index d68af7442f70..4f0378ac1f7b 100644 --- a/drivers/md/bcache/btree.h +++ b/drivers/md/bcache/btree.h @@ -241,6 +241,9 @@ void bkey_put(struct cache_set *c, struct bkey *k); /* Recursing down the btree */ struct btree_op { + /* for waiting on btree reserve in btree_split() */ + wait_queue_t wait; + /* Btree level at which we start taking write locks */ short lock; @@ -250,6 +253,7 @@ struct btree_op { static inline void bch_btree_op_init(struct btree_op *op, int write_lock_level) { memset(op, 0, sizeof(struct btree_op)); + init_wait(&op->wait); op->lock = write_lock_level; } diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c index 052bd24d24b4..9eb60d102de8 100644 --- a/drivers/md/bcache/movinggc.c +++ b/drivers/md/bcache/movinggc.c @@ -211,7 +211,7 @@ void bch_moving_gc(struct cache_set *c) for_each_cache(ca, c, i) { unsigned sectors_to_move = 0; unsigned reserve_sectors = ca->sb.bucket_size * - min(fifo_used(&ca->free), ca->free.size / 2); + fifo_used(&ca->free[RESERVE_MOVINGGC]); ca->heap.used = 0; diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index b057676fc67d..63ebef78df4a 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -444,7 +444,7 @@ static int __uuid_write(struct cache_set *c) lockdep_assert_held(&bch_register_lock); - if (bch_bucket_alloc_set(c, WATERMARK_METADATA, &k.key, 1, true)) + if (bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, 1, true)) return 1; SET_KEY_SIZE(&k.key, c->sb.bucket_size); @@ -562,8 +562,8 @@ void bch_prio_write(struct cache *ca) atomic_long_add(ca->sb.bucket_size * prio_buckets(ca), &ca->meta_sectors_written); - pr_debug("free %zu, free_inc %zu, unused %zu", fifo_used(&ca->free), - fifo_used(&ca->free_inc), fifo_used(&ca->unused)); + //pr_debug("free %zu, free_inc %zu, unused %zu", fifo_used(&ca->free), + // fifo_used(&ca->free_inc), fifo_used(&ca->unused)); for (i = prio_buckets(ca) - 1; i >= 0; --i) { long bucket; @@ -582,7 +582,7 @@ void bch_prio_write(struct cache *ca) p->magic = pset_magic(&ca->sb); p->csum = bch_crc64(&p->magic, bucket_bytes(ca) - 8); - bucket = bch_bucket_alloc(ca, WATERMARK_PRIO, true); + bucket = bch_bucket_alloc(ca, RESERVE_PRIO, true); BUG_ON(bucket == -1); mutex_unlock(&ca->set->bucket_lock); @@ -1767,6 +1767,7 @@ err: void bch_cache_release(struct kobject *kobj) { struct cache *ca = container_of(kobj, struct cache, kobj); + unsigned i; if (ca->set) ca->set->cache[ca->sb.nr_this_dev] = NULL; @@ -1780,7 +1781,9 @@ void bch_cache_release(struct kobject *kobj) free_heap(&ca->heap); free_fifo(&ca->unused); free_fifo(&ca->free_inc); - free_fifo(&ca->free); + + for (i = 0; i < RESERVE_NR; i++) + free_fifo(&ca->free[i]); if (ca->sb_bio.bi_inline_vecs[0].bv_page) put_page(ca->sb_bio.bi_io_vec[0].bv_page); @@ -1806,10 +1809,12 @@ static int cache_alloc(struct cache_sb *sb, struct cache *ca) ca->journal.bio.bi_max_vecs = 8; ca->journal.bio.bi_io_vec = ca->journal.bio.bi_inline_vecs; - free = roundup_pow_of_two(ca->sb.nbuckets) >> 9; - free = max_t(size_t, free, (prio_buckets(ca) + 8) * 2); + free = roundup_pow_of_two(ca->sb.nbuckets) >> 10; - if (!init_fifo(&ca->free, free, GFP_KERNEL) || + if (!init_fifo(&ca->free[RESERVE_BTREE], 8, GFP_KERNEL) || + !init_fifo(&ca->free[RESERVE_PRIO], prio_buckets(ca), GFP_KERNEL) || + !init_fifo(&ca->free[RESERVE_MOVINGGC], free, GFP_KERNEL) || + !init_fifo(&ca->free[RESERVE_NONE], free, GFP_KERNEL) || !init_fifo(&ca->free_inc, free << 2, GFP_KERNEL) || !init_fifo(&ca->unused, free << 2, GFP_KERNEL) || !init_heap(&ca->heap, free << 3, GFP_KERNEL) || diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index a1f85612f0b3..d5dd282b176f 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -102,7 +102,6 @@ rw_attribute(bypass_torture_test); rw_attribute(key_merging_disabled); rw_attribute(gc_always_rewrite); rw_attribute(expensive_debug_checks); -rw_attribute(freelist_percent); rw_attribute(cache_replacement_policy); rw_attribute(btree_shrinker_disabled); rw_attribute(copy_gc_enabled); @@ -711,9 +710,6 @@ SHOW(__bch_cache) sysfs_print(io_errors, atomic_read(&ca->io_errors) >> IO_ERROR_SHIFT); - sysfs_print(freelist_percent, ca->free.size * 100 / - ((size_t) ca->sb.nbuckets)); - if (attr == &sysfs_cache_replacement_policy) return bch_snprint_string_list(buf, PAGE_SIZE, cache_replacement_policies, @@ -820,32 +816,6 @@ STORE(__bch_cache) } } - if (attr == &sysfs_freelist_percent) { - DECLARE_FIFO(long, free); - long i; - size_t p = strtoul_or_return(buf); - - p = clamp_t(size_t, - ((size_t) ca->sb.nbuckets * p) / 100, - roundup_pow_of_two(ca->sb.nbuckets) >> 9, - ca->sb.nbuckets / 2); - - if (!init_fifo_exact(&free, p, GFP_KERNEL)) - return -ENOMEM; - - mutex_lock(&ca->set->bucket_lock); - - fifo_move(&free, &ca->free); - fifo_swap(&free, &ca->free); - - mutex_unlock(&ca->set->bucket_lock); - - while (fifo_pop(&free, i)) - atomic_dec(&ca->buckets[i].pin); - - free_fifo(&free); - } - if (attr == &sysfs_clear_stats) { atomic_long_set(&ca->sectors_written, 0); atomic_long_set(&ca->btree_sectors_written, 0); @@ -869,7 +839,6 @@ static struct attribute *bch_cache_files[] = { &sysfs_metadata_written, &sysfs_io_errors, &sysfs_clear_stats, - &sysfs_freelist_percent, &sysfs_cache_replacement_policy, NULL }; diff --git a/include/trace/events/bcache.h b/include/trace/events/bcache.h index 095c6e4fe1e8..0c5cf2f63dc3 100644 --- a/include/trace/events/bcache.h +++ b/include/trace/events/bcache.h @@ -411,7 +411,7 @@ TRACE_EVENT(bcache_alloc_invalidate, ), TP_fast_assign( - __entry->free = fifo_used(&ca->free); + __entry->free = fifo_used(&ca->free[RESERVE_NONE]); __entry->free_inc = fifo_used(&ca->free_inc); __entry->free_inc_size = ca->free_inc.size; __entry->unused = fifo_used(&ca->unused); @@ -422,8 +422,8 @@ TRACE_EVENT(bcache_alloc_invalidate, ); TRACE_EVENT(bcache_alloc_fail, - TP_PROTO(struct cache *ca), - TP_ARGS(ca), + TP_PROTO(struct cache *ca, unsigned reserve), + TP_ARGS(ca, reserve), TP_STRUCT__entry( __field(unsigned, free ) @@ -433,7 +433,7 @@ TRACE_EVENT(bcache_alloc_fail, ), TP_fast_assign( - __entry->free = fifo_used(&ca->free); + __entry->free = fifo_used(&ca->free[reserve]); __entry->free_inc = fifo_used(&ca->free_inc); __entry->unused = fifo_used(&ca->unused); __entry->blocked = atomic_read(&ca->set->prio_blocked); -- cgit v1.2.3 From c78afc6261b09f74abff8c0719b80692a4959768 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 11 Jul 2013 22:39:53 -0700 Subject: bcache/md: Use raid stripe size Now that we've got code for raid5/6 stripe awareness, bcache just needs to know about the stripes and when writing partial stripes is expensive - we probably don't want to enable this optimization for raid1 or 10, even though they have stripes. So add a flag to queue_limits. Signed-off-by: Kent Overstreet --- block/blk-settings.c | 4 ++++ drivers/md/bcache/super.c | 6 ++++++ drivers/md/raid5.c | 1 + include/linux/blkdev.h | 1 + 4 files changed, 12 insertions(+) (limited to 'include') diff --git a/block/blk-settings.c b/block/blk-settings.c index 05e826793e4e..5d21239bc859 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -592,6 +592,10 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, ret = -1; } + t->raid_partial_stripes_expensive = + max(t->raid_partial_stripes_expensive, + b->raid_partial_stripes_expensive); + /* Find lowest common alignment_offset */ t->alignment_offset = lcm(t->alignment_offset, alignment) & (max(t->physical_block_size, t->io_min) - 1); diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 63ebef78df4a..e363efcf2b76 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1134,6 +1134,12 @@ static int cached_dev_init(struct cached_dev *dc, unsigned block_size) hlist_add_head(&io->hash, dc->io_hash + RECENT_IO); } + dc->disk.stripe_size = q->limits.io_opt >> 9; + + if (dc->disk.stripe_size) + dc->partial_stripes_expensive = + q->limits.raid_partial_stripes_expensive; + ret = bcache_device_init(&dc->disk, block_size, dc->bdev->bd_part->nr_sects - dc->sb.data_offset); if (ret) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index eea63372e4d3..1cfb22c025b6 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -6101,6 +6101,7 @@ static int run(struct mddev *mddev) blk_queue_io_min(mddev->queue, chunk_size); blk_queue_io_opt(mddev->queue, chunk_size * (conf->raid_disks - conf->max_degraded)); + mddev->queue->limits.raid_partial_stripes_expensive = 1; /* * We can only discard a whole stripe. It doesn't make sense to * discard data disk but write parity disk diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 02cb6f0ea71d..0375654adb28 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -291,6 +291,7 @@ struct queue_limits { unsigned char discard_misaligned; unsigned char cluster; unsigned char discard_zeroes_data; + unsigned char raid_partial_stripes_expensive; }; struct request_queue { -- cgit v1.2.3 From fafff81cead78157099df1ee10af16cc51893ddc Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 17 Dec 2013 21:56:21 -0800 Subject: bcache: Bkey indexing renaming More refactoring: node() -> bset_bkey_idx() end() -> bset_bkey_last() Signed-off-by: Kent Overstreet --- drivers/md/bcache/bcache.h | 11 ++--------- drivers/md/bcache/bset.c | 28 ++++++++++++++-------------- drivers/md/bcache/bset.h | 30 ++++++++++++++++++++++-------- drivers/md/bcache/btree.c | 33 ++++++++++++++++++--------------- drivers/md/bcache/debug.c | 6 +++--- drivers/md/bcache/journal.c | 6 +++--- include/uapi/linux/bcache.h | 2 +- 7 files changed, 63 insertions(+), 53 deletions(-) (limited to 'include') diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 3fd87323368c..2b46c86ac440 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -724,9 +724,6 @@ struct bbio { #define __set_blocks(i, k, c) DIV_ROUND_UP(__set_bytes(i, k), block_bytes(c)) #define set_blocks(i, c) __set_blocks(i, (i)->keys, c) -#define node(i, j) ((struct bkey *) ((i)->d + (j))) -#define end(i) node(i, (i)->keys) - #define btree_data_space(b) (PAGE_SIZE << (b)->page_order) #define prios_per_bucket(c) \ @@ -791,18 +788,14 @@ static inline bool ptr_available(struct cache_set *c, const struct bkey *k, /* Btree key macros */ -static inline void bkey_init(struct bkey *k) -{ - *k = ZERO_KEY; -} - /* * This is used for various on disk data structures - cache_sb, prio_set, bset, * jset: The checksum is _always_ the first 8 bytes of these structs */ #define csum_set(i) \ bch_crc64(((void *) (i)) + sizeof(uint64_t), \ - ((void *) end(i)) - (((void *) (i)) + sizeof(uint64_t))) + ((void *) bset_bkey_last(i)) - \ + (((void *) (i)) + sizeof(uint64_t))) /* Error handling macros */ diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index f91347a55c41..bfee926e35f0 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c @@ -500,7 +500,7 @@ static void make_bfloat(struct bset_tree *t, unsigned j) : tree_to_prev_bkey(t, j >> ffs(j)); struct bkey *r = is_power_of_2(j + 1) - ? node(t->data, t->data->keys - bkey_u64s(&t->end)) + ? bset_bkey_idx(t->data, t->data->keys - bkey_u64s(&t->end)) : tree_to_bkey(t, j >> (ffz(j) + 1)); BUG_ON(m < l || m > r); @@ -559,7 +559,7 @@ static void bset_build_written_tree(struct btree *b) bset_alloc_tree(b, t); t->size = min_t(unsigned, - bkey_to_cacheline(t, end(t->data)), + bkey_to_cacheline(t, bset_bkey_last(t->data)), b->sets->tree + bset_tree_space(b) - t->tree); if (t->size < 2) { @@ -582,7 +582,7 @@ static void bset_build_written_tree(struct btree *b) t->tree[j].m = bkey_to_cacheline_offset(k); } - while (bkey_next(k) != end(t->data)) + while (bkey_next(k) != bset_bkey_last(t->data)) k = bkey_next(k); t->end = *k; @@ -600,7 +600,7 @@ void bch_bset_fix_invalidated_key(struct btree *b, struct bkey *k) unsigned inorder, j = 1; for (t = b->sets; t <= &b->sets[b->nsets]; t++) - if (k < end(t->data)) + if (k < bset_bkey_last(t->data)) goto found_set; BUG(); @@ -613,7 +613,7 @@ found_set: if (k == t->data->start) goto fix_left; - if (bkey_next(k) == end(t->data)) { + if (bkey_next(k) == bset_bkey_last(t->data)) { t->end = *k; goto fix_right; } @@ -679,7 +679,7 @@ void bch_bset_fix_lookup_table(struct btree *b, struct bkey *k) /* Possibly add a new entry to the end of the lookup table */ for (k = table_to_bkey(t, t->size - 1); - k != end(t->data); + k != bset_bkey_last(t->data); k = bkey_next(k)) if (t->size == bkey_to_cacheline(t, k)) { t->prev[t->size] = bkey_to_cacheline_offset(k); @@ -715,7 +715,7 @@ static struct bset_search_iter bset_search_write_set(struct btree *b, unsigned li = 0, ri = t->size; BUG_ON(!b->nsets && - t->size < bkey_to_cacheline(t, end(t->data))); + t->size < bkey_to_cacheline(t, bset_bkey_last(t->data))); while (li + 1 != ri) { unsigned m = (li + ri) >> 1; @@ -728,7 +728,7 @@ static struct bset_search_iter bset_search_write_set(struct btree *b, return (struct bset_search_iter) { table_to_bkey(t, li), - ri < t->size ? table_to_bkey(t, ri) : end(t->data) + ri < t->size ? table_to_bkey(t, ri) : bset_bkey_last(t->data) }; } @@ -780,7 +780,7 @@ static struct bset_search_iter bset_search_tree(struct btree *b, f = &t->tree[inorder_next(j, t->size)]; r = cacheline_to_bkey(t, inorder, f->m); } else - r = end(t->data); + r = bset_bkey_last(t->data); } else { r = cacheline_to_bkey(t, inorder, f->m); @@ -816,7 +816,7 @@ struct bkey *__bch_bset_search(struct btree *b, struct bset_tree *t, if (unlikely(!t->size)) { i.l = t->data->start; - i.r = end(t->data); + i.r = bset_bkey_last(t->data); } else if (bset_written(b, t)) { /* * Each node in the auxiliary search tree covers a certain range @@ -826,7 +826,7 @@ struct bkey *__bch_bset_search(struct btree *b, struct bset_tree *t, */ if (unlikely(bkey_cmp(search, &t->end) >= 0)) - return end(t->data); + return bset_bkey_last(t->data); if (unlikely(bkey_cmp(search, t->data->start) < 0)) return t->data->start; @@ -842,7 +842,7 @@ struct bkey *__bch_bset_search(struct btree *b, struct bset_tree *t, inorder_to_tree(bkey_to_cacheline(t, i.l), t)), search) > 0); - BUG_ON(i.r != end(t->data) && + BUG_ON(i.r != bset_bkey_last(t->data) && bkey_cmp(i.r, search) <= 0); } @@ -897,7 +897,7 @@ struct bkey *__bch_btree_iter_init(struct btree *b, struct btree_iter *iter, for (; start <= &b->sets[b->nsets]; start++) { ret = bch_bset_search(b, start, search); - bch_btree_iter_push(iter, ret, end(start->data)); + bch_btree_iter_push(iter, ret, bset_bkey_last(start->data)); } return ret; @@ -1067,7 +1067,7 @@ static void __btree_sort(struct btree *b, struct btree_iter *iter, } else { b->sets[start].data->keys = out->keys; memcpy(b->sets[start].data->start, out->start, - (void *) end(out) - (void *) out->start); + (void *) bset_bkey_last(out) - (void *) out->start); } if (used_mempool) diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h index 303d31a3b9e6..88b6edbf508b 100644 --- a/drivers/md/bcache/bset.h +++ b/drivers/md/bcache/bset.h @@ -190,14 +190,6 @@ struct bset_tree { struct bset *data; }; -static __always_inline int64_t bkey_cmp(const struct bkey *l, - const struct bkey *r) -{ - return unlikely(KEY_INODE(l) != KEY_INODE(r)) - ? (int64_t) KEY_INODE(l) - (int64_t) KEY_INODE(r) - : (int64_t) KEY_OFFSET(l) - (int64_t) KEY_OFFSET(r); -} - /* Keylists */ struct keylist { @@ -261,6 +253,28 @@ struct bkey *bch_keylist_pop(struct keylist *); void bch_keylist_pop_front(struct keylist *); int __bch_keylist_realloc(struct keylist *, unsigned); +/* Bkey utility code */ + +#define bset_bkey_last(i) bkey_idx((struct bkey *) (i)->d, (i)->keys) + +static inline struct bkey *bset_bkey_idx(struct bset *i, unsigned idx) +{ + return bkey_idx(i->start, idx); +} + +static inline void bkey_init(struct bkey *k) +{ + *k = ZERO_KEY; +} + +static __always_inline int64_t bkey_cmp(const struct bkey *l, + const struct bkey *r) +{ + return unlikely(KEY_INODE(l) != KEY_INODE(r)) + ? (int64_t) KEY_INODE(l) - (int64_t) KEY_INODE(r) + : (int64_t) KEY_OFFSET(l) - (int64_t) KEY_OFFSET(r); +} + void bch_bkey_copy_single_ptr(struct bkey *, const struct bkey *, unsigned); bool __bch_cut_front(const struct bkey *, struct bkey *); diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index f0a6399fdd3c..8aaaf16637a0 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -197,7 +197,7 @@ void bkey_put(struct cache_set *c, struct bkey *k) static uint64_t btree_csum_set(struct btree *b, struct bset *i) { uint64_t crc = b->key.ptr[0]; - void *data = (void *) i + 8, *end = end(i); + void *data = (void *) i + 8, *end = bset_bkey_last(i); crc = bch_crc64_update(crc, data, end - data); return crc ^ 0xffffffffffffffffULL; @@ -251,7 +251,7 @@ void bch_btree_node_read_done(struct btree *b) if (i != b->sets[0].data && !i->keys) goto err; - bch_btree_iter_push(iter, i->start, end(i)); + bch_btree_iter_push(iter, i->start, bset_bkey_last(i)); b->written += set_blocks(i, b->c); } @@ -1310,7 +1310,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, if (i > 1) { for (k = n2->start; - k < end(n2); + k < bset_bkey_last(n2); k = bkey_next(k)) { if (__set_blocks(n1, n1->keys + keys + bkey_u64s(k), b->c) > blocks) @@ -1343,16 +1343,17 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, if (last) bkey_copy_key(&new_nodes[i]->key, last); - memcpy(end(n1), + memcpy(bset_bkey_last(n1), n2->start, - (void *) node(n2, keys) - (void *) n2->start); + (void *) bset_bkey_idx(n2, keys) - (void *) n2->start); n1->keys += keys; r[i].keys = n1->keys; memmove(n2->start, - node(n2, keys), - (void *) end(n2) - (void *) node(n2, keys)); + bset_bkey_idx(n2, keys), + (void *) bset_bkey_last(n2) - + (void *) bset_bkey_idx(n2, keys)); n2->keys -= keys; @@ -1830,7 +1831,7 @@ static void shift_keys(struct btree *b, struct bkey *where, struct bkey *insert) memmove((uint64_t *) where + bkey_u64s(insert), where, - (void *) end(i) - (void *) where); + (void *) bset_bkey_last(i) - (void *) where); i->keys += bkey_u64s(insert); bkey_copy(where, insert); @@ -2014,7 +2015,7 @@ static bool btree_insert_key(struct btree *b, struct btree_op *op, bcache_dev_sectors_dirty_add(b->c, KEY_INODE(k), KEY_START(k), KEY_SIZE(k)); - while (m != end(i) && + while (m != bset_bkey_last(i) && bkey_cmp(k, &START_KEY(m)) > 0) prev = m, m = bkey_next(m); @@ -2028,12 +2029,12 @@ static bool btree_insert_key(struct btree *b, struct btree_op *op, goto merged; status = BTREE_INSERT_STATUS_OVERWROTE; - if (m != end(i) && + if (m != bset_bkey_last(i) && KEY_PTRS(m) == KEY_PTRS(k) && !KEY_SIZE(m)) goto copy; status = BTREE_INSERT_STATUS_FRONT_MERGE; - if (m != end(i) && + if (m != bset_bkey_last(i) && bch_bkey_try_merge(b, k, m)) goto copy; } else { @@ -2142,16 +2143,18 @@ static int btree_split(struct btree *b, struct btree_op *op, */ while (keys < (n1->sets[0].data->keys * 3) / 5) - keys += bkey_u64s(node(n1->sets[0].data, keys)); + keys += bkey_u64s(bset_bkey_idx(n1->sets[0].data, + keys)); - bkey_copy_key(&n1->key, node(n1->sets[0].data, keys)); - keys += bkey_u64s(node(n1->sets[0].data, keys)); + bkey_copy_key(&n1->key, + bset_bkey_idx(n1->sets[0].data, keys)); + keys += bkey_u64s(bset_bkey_idx(n1->sets[0].data, keys)); n2->sets[0].data->keys = n1->sets[0].data->keys - keys; n1->sets[0].data->keys = keys; memcpy(n2->sets[0].data->start, - end(n1->sets[0].data), + bset_bkey_last(n1->sets[0].data), n2->sets[0].data->keys * sizeof(uint64_t)); bkey_copy_key(&n2->key, &b->key); diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c index 8887c550d56c..955fa1d31774 100644 --- a/drivers/md/bcache/debug.c +++ b/drivers/md/bcache/debug.c @@ -84,7 +84,7 @@ static void dump_bset(struct btree *b, struct bset *i, unsigned set) unsigned j; char buf[80]; - for (k = i->start; k < end(i); k = next) { + for (k = i->start; k < bset_bkey_last(i); k = next) { next = bkey_next(k); bch_bkey_to_text(buf, sizeof(buf), k); @@ -102,7 +102,7 @@ static void dump_bset(struct btree *b, struct bset *i, unsigned set) printk(" %s\n", bch_ptr_status(b->c, k)); - if (next < end(i) && + if (next < bset_bkey_last(i) && bkey_cmp(k, !b->level ? &START_KEY(next) : next) > 0) printk(KERN_ERR "Key skipped backwards\n"); } @@ -162,7 +162,7 @@ void bch_btree_verify(struct btree *b) if (inmemory->keys != sorted->keys || memcmp(inmemory->start, sorted->start, - (void *) end(inmemory) - (void *) inmemory->start)) { + (void *) bset_bkey_last(inmemory) - (void *) inmemory->start)) { struct bset *i; unsigned j; diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 9d32d5790822..5e14e3325ec1 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -284,7 +284,7 @@ void bch_journal_mark(struct cache_set *c, struct list_head *list) } for (k = i->j.start; - k < end(&i->j); + k < bset_bkey_last(&i->j); k = bkey_next(k)) { unsigned j; @@ -322,7 +322,7 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list) n, i->j.seq - 1, start, end); for (k = i->j.start; - k < end(&i->j); + k < bset_bkey_last(&i->j); k = bkey_next(k)) { trace_bcache_journal_replay_key(k); @@ -751,7 +751,7 @@ atomic_t *bch_journal(struct cache_set *c, w = journal_wait_for_write(c, bch_keylist_nkeys(keys)); - memcpy(end(w->data), keys->keys, bch_keylist_bytes(keys)); + memcpy(bset_bkey_last(w->data), keys->keys, bch_keylist_bytes(keys)); w->data->keys += bch_keylist_nkeys(keys); ret = &fifo_back(&c->journal.pin); diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h index 164a7e263988..ae66311be82f 100644 --- a/include/uapi/linux/bcache.h +++ b/include/uapi/linux/bcache.h @@ -118,7 +118,7 @@ static inline struct bkey *bkey_next(const struct bkey *k) return (struct bkey *) (d + bkey_u64s(k)); } -static inline struct bkey *bkey_last(const struct bkey *k, unsigned nr_keys) +static inline struct bkey *bkey_idx(const struct bkey *k, unsigned nr_keys) { __u64 *d = (void *) k; return (struct bkey *) (d + nr_keys); -- cgit v1.2.3 From a85e968e66a175c86d0410719ea84a5bd0f1d070 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 20 Dec 2013 17:28:16 -0800 Subject: bcache: Add struct btree_keys Soon, bset.c won't need to depend on struct btree. Signed-off-by: Kent Overstreet --- drivers/md/bcache/bcache.h | 2 +- drivers/md/bcache/bset.c | 179 +++++++++++++++++++++++++----------------- drivers/md/bcache/bset.h | 119 ++++++++++++++++++++++++++-- drivers/md/bcache/btree.c | 153 +++++++++++++++++------------------- drivers/md/bcache/btree.h | 93 ++-------------------- drivers/md/bcache/debug.c | 18 ++--- drivers/md/bcache/extents.c | 19 +++-- drivers/md/bcache/sysfs.c | 2 +- include/trace/events/bcache.h | 2 +- 9 files changed, 323 insertions(+), 264 deletions(-) (limited to 'include') diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 5c74d55cea7f..93b848419665 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -679,9 +679,9 @@ struct cache_set { unsigned error_decay; unsigned short journal_delay_ms; + bool expensive_debug_checks; unsigned verify:1; unsigned key_merging_disabled:1; - unsigned expensive_debug_checks:1; unsigned gc_always_rewrite:1; unsigned shrinker_disabled:1; unsigned copy_gc_enabled:1; diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index c2c42cbbe885..f34ef56560ed 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c @@ -149,33 +149,33 @@ struct bkey_float { #define BSET_CACHELINE 128 /* Space required for the btree node keys */ -static inline size_t btree_keys_bytes(struct btree *b) +static inline size_t btree_keys_bytes(struct btree_keys *b) { return PAGE_SIZE << b->page_order; } -static inline size_t btree_keys_cachelines(struct btree *b) +static inline size_t btree_keys_cachelines(struct btree_keys *b) { return btree_keys_bytes(b) / BSET_CACHELINE; } /* Space required for the auxiliary search trees */ -static inline size_t bset_tree_bytes(struct btree *b) +static inline size_t bset_tree_bytes(struct btree_keys *b) { return btree_keys_cachelines(b) * sizeof(struct bkey_float); } /* Space required for the prev pointers */ -static inline size_t bset_prev_bytes(struct btree *b) +static inline size_t bset_prev_bytes(struct btree_keys *b) { return btree_keys_cachelines(b) * sizeof(uint8_t); } /* Memory allocation */ -void bch_btree_keys_free(struct btree *b) +void bch_btree_keys_free(struct btree_keys *b) { - struct bset_tree *t = b->sets; + struct bset_tree *t = b->set; if (bset_prev_bytes(b) < PAGE_SIZE) kfree(t->prev); @@ -195,10 +195,11 @@ void bch_btree_keys_free(struct btree *b) t->tree = NULL; t->data = NULL; } +EXPORT_SYMBOL(bch_btree_keys_free); -int bch_btree_keys_alloc(struct btree *b, unsigned page_order, gfp_t gfp) +int bch_btree_keys_alloc(struct btree_keys *b, unsigned page_order, gfp_t gfp) { - struct bset_tree *t = b->sets; + struct bset_tree *t = b->set; BUG_ON(t->data); @@ -225,6 +226,29 @@ err: bch_btree_keys_free(b); return -ENOMEM; } +EXPORT_SYMBOL(bch_btree_keys_alloc); + +void bch_btree_keys_init(struct btree_keys *b, const struct btree_keys_ops *ops, + bool *expensive_debug_checks) +{ + unsigned i; + + b->ops = ops; + b->expensive_debug_checks = expensive_debug_checks; + b->nsets = 0; + b->last_set_unwritten = 0; + + /* XXX: shouldn't be needed */ + for (i = 0; i < MAX_BSETS; i++) + b->set[i].size = 0; + /* + * Second loop starts at 1 because b->keys[0]->data is the memory we + * allocated + */ + for (i = 1; i < MAX_BSETS; i++) + b->set[i].data = NULL; +} +EXPORT_SYMBOL(bch_btree_keys_init); /* Binary tree stuff for auxiliary search trees */ @@ -448,9 +472,9 @@ static void make_bfloat(struct bset_tree *t, unsigned j) f->exponent = 127; } -static void bset_alloc_tree(struct btree *b, struct bset_tree *t) +static void bset_alloc_tree(struct btree_keys *b, struct bset_tree *t) { - if (t != b->sets) { + if (t != b->set) { unsigned j = roundup(t[-1].size, 64 / sizeof(struct bkey_float)); @@ -458,27 +482,30 @@ static void bset_alloc_tree(struct btree *b, struct bset_tree *t) t->prev = t[-1].prev + j; } - while (t < b->sets + MAX_BSETS) + while (t < b->set + MAX_BSETS) t++->size = 0; } -static void bch_bset_build_unwritten_tree(struct btree *b) +static void bch_bset_build_unwritten_tree(struct btree_keys *b) { struct bset_tree *t = bset_tree_last(b); + BUG_ON(b->last_set_unwritten); + b->last_set_unwritten = 1; + bset_alloc_tree(b, t); - if (t->tree != b->sets->tree + btree_keys_cachelines(b)) { + if (t->tree != b->set->tree + btree_keys_cachelines(b)) { t->prev[0] = bkey_to_cacheline_offset(t->data->start); t->size = 1; } } -void bch_bset_init_next(struct btree *b, struct bset *i, uint64_t magic) +void bch_bset_init_next(struct btree_keys *b, struct bset *i, uint64_t magic) { - if (i != b->sets->data) { - b->sets[++b->nsets].data = i; - i->seq = b->sets->data->seq; + if (i != b->set->data) { + b->set[++b->nsets].data = i; + i->seq = b->set->data->seq; } else get_random_bytes(&i->seq, sizeof(uint64_t)); @@ -488,18 +515,21 @@ void bch_bset_init_next(struct btree *b, struct bset *i, uint64_t magic) bch_bset_build_unwritten_tree(b); } +EXPORT_SYMBOL(bch_bset_init_next); -static void bset_build_written_tree(struct btree *b) +void bch_bset_build_written_tree(struct btree_keys *b) { struct bset_tree *t = bset_tree_last(b); struct bkey *k = t->data->start; unsigned j, cacheline = 1; + b->last_set_unwritten = 0; + bset_alloc_tree(b, t); t->size = min_t(unsigned, bkey_to_cacheline(t, bset_bkey_last(t->data)), - b->sets->tree + btree_keys_cachelines(b) - t->tree); + b->set->tree + btree_keys_cachelines(b) - t->tree); if (t->size < 2) { t->size = 0; @@ -532,13 +562,14 @@ static void bset_build_written_tree(struct btree *b) j = inorder_next(j, t->size)) make_bfloat(t, j); } +EXPORT_SYMBOL(bch_bset_build_written_tree); -void bch_bset_fix_invalidated_key(struct btree *b, struct bkey *k) +void bch_bset_fix_invalidated_key(struct btree_keys *b, struct bkey *k) { struct bset_tree *t; unsigned inorder, j = 1; - for (t = b->sets; t <= bset_tree_last(b); t++) + for (t = b->set; t <= bset_tree_last(b); t++) if (k < bset_bkey_last(t->data)) goto found_set; @@ -577,8 +608,9 @@ fix_right: do { j = j * 2 + 1; } while (j < t->size); } +EXPORT_SYMBOL(bch_bset_fix_invalidated_key); -static void bch_bset_fix_lookup_table(struct btree *b, +static void bch_bset_fix_lookup_table(struct btree_keys *b, struct bset_tree *t, struct bkey *k) { @@ -613,7 +645,7 @@ static void bch_bset_fix_lookup_table(struct btree *b, } } - if (t->size == b->sets->tree + btree_keys_cachelines(b) - t->tree) + if (t->size == b->set->tree + btree_keys_cachelines(b) - t->tree) return; /* Possibly add a new entry to the end of the lookup table */ @@ -627,12 +659,12 @@ static void bch_bset_fix_lookup_table(struct btree *b, } } -void bch_bset_insert(struct btree *b, struct bkey *where, +void bch_bset_insert(struct btree_keys *b, struct bkey *where, struct bkey *insert) { struct bset_tree *t = bset_tree_last(b); - BUG_ON(t->data != write_block(b)); + BUG_ON(!b->last_set_unwritten); BUG_ON(bset_byte_offset(b, t->data) + __set_bytes(t->data, t->data->keys + bkey_u64s(insert)) > PAGE_SIZE << b->page_order); @@ -645,20 +677,17 @@ void bch_bset_insert(struct btree *b, struct bkey *where, bkey_copy(where, insert); bch_bset_fix_lookup_table(b, t, where); } +EXPORT_SYMBOL(bch_bset_insert); struct bset_search_iter { struct bkey *l, *r; }; -static struct bset_search_iter bset_search_write_set(struct btree *b, - struct bset_tree *t, +static struct bset_search_iter bset_search_write_set(struct bset_tree *t, const struct bkey *search) { unsigned li = 0, ri = t->size; - BUG_ON(!b->nsets && - t->size < bkey_to_cacheline(t, bset_bkey_last(t->data))); - while (li + 1 != ri) { unsigned m = (li + ri) >> 1; @@ -674,8 +703,7 @@ static struct bset_search_iter bset_search_write_set(struct btree *b, }; } -static struct bset_search_iter bset_search_tree(struct btree *b, - struct bset_tree *t, +static struct bset_search_iter bset_search_tree(struct bset_tree *t, const struct bkey *search) { struct bkey *l, *r; @@ -759,7 +787,7 @@ struct bkey *__bch_bset_search(struct btree *b, struct bset_tree *t, if (unlikely(!t->size)) { i.l = t->data->start; i.r = bset_bkey_last(t->data); - } else if (bset_written(b, t)) { + } else if (bset_written(&b->keys, t)) { /* * Each node in the auxiliary search tree covers a certain range * of bits, and keys above and below the set it covers might @@ -773,12 +801,16 @@ struct bkey *__bch_bset_search(struct btree *b, struct bset_tree *t, if (unlikely(bkey_cmp(search, t->data->start) < 0)) return t->data->start; - i = bset_search_tree(b, t, search); - } else - i = bset_search_write_set(b, t, search); + i = bset_search_tree(t, search); + } else { + BUG_ON(!b->keys.nsets && + t->size < bkey_to_cacheline(t, bset_bkey_last(t->data))); + + i = bset_search_write_set(t, search); + } if (expensive_debug_checks(b->c)) { - BUG_ON(bset_written(b, t) && + BUG_ON(bset_written(&b->keys, t) && i.l != t->data->start && bkey_cmp(tree_to_prev_bkey(t, inorder_to_tree(bkey_to_cacheline(t, i.l), t)), @@ -794,6 +826,7 @@ struct bkey *__bch_bset_search(struct btree *b, struct bset_tree *t, return i.l; } +EXPORT_SYMBOL(__bch_bset_search); /* Btree iterator */ @@ -833,7 +866,7 @@ static struct bkey *__bch_btree_iter_init(struct btree *b, iter->b = b; #endif - for (; start <= &b->sets[b->nsets]; start++) { + for (; start <= bset_tree_last(&b->keys); start++) { ret = bch_bset_search(b, start, search); bch_btree_iter_push(iter, ret, bset_bkey_last(start->data)); } @@ -845,8 +878,9 @@ struct bkey *bch_btree_iter_init(struct btree *b, struct btree_iter *iter, struct bkey *search) { - return __bch_btree_iter_init(b, iter, search, b->sets); + return __bch_btree_iter_init(b, iter, search, b->keys.set); } +EXPORT_SYMBOL(bch_btree_iter_init); static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter, btree_iter_cmp_fn *cmp) @@ -879,9 +913,10 @@ struct bkey *bch_btree_iter_next(struct btree_iter *iter) return __bch_btree_iter_next(iter, btree_iter_cmp); } +EXPORT_SYMBOL(bch_btree_iter_next); struct bkey *bch_btree_iter_next_filter(struct btree_iter *iter, - struct btree *b, ptr_filter_fn fn) + struct btree_keys *b, ptr_filter_fn fn) { struct bkey *ret; @@ -913,15 +948,16 @@ int bch_bset_sort_state_init(struct bset_sort_state *state, unsigned page_order) return 0; } +EXPORT_SYMBOL(bch_bset_sort_state_init); -static void btree_mergesort(struct btree *b, struct bset *out, +static void btree_mergesort(struct btree_keys *b, struct bset *out, struct btree_iter *iter, bool fixup, bool remove_stale) { int i; struct bkey *k, *last = NULL; BKEY_PADDED(k) tmp; - bool (*bad)(struct btree *, const struct bkey *) = remove_stale + bool (*bad)(struct btree_keys *, const struct bkey *) = remove_stale ? bch_ptr_bad : bch_ptr_invalid; @@ -955,7 +991,7 @@ static void btree_mergesort(struct btree *b, struct bset *out, pr_debug("sorted %i keys", out->keys); } -static void __btree_sort(struct btree *b, struct btree_iter *iter, +static void __btree_sort(struct btree_keys *b, struct btree_iter *iter, unsigned start, unsigned order, bool fixup, struct bset_sort_state *state) { @@ -968,7 +1004,7 @@ static void __btree_sort(struct btree *b, struct btree_iter *iter, out = page_address(mempool_alloc(state->pool, GFP_NOIO)); used_mempool = true; - order = ilog2(bucket_pages(b->c)); + order = state->page_order; } start_time = local_clock(); @@ -983,13 +1019,13 @@ static void __btree_sort(struct btree *b, struct btree_iter *iter, * memcpy() */ - out->magic = bset_magic(&b->c->sb); - out->seq = b->sets[0].data->seq; - out->version = b->sets[0].data->version; - swap(out, b->sets[0].data); + out->magic = b->set->data->magic; + out->seq = b->set->data->seq; + out->version = b->set->data->version; + swap(out, b->set->data); } else { - b->sets[start].data->keys = out->keys; - memcpy(b->sets[start].data->start, out->start, + b->set[start].data->keys = out->keys; + memcpy(b->set[start].data->start, out->start, (void *) bset_bkey_last(out) - (void *) out->start); } @@ -998,7 +1034,7 @@ static void __btree_sort(struct btree *b, struct btree_iter *iter, else free_pages((unsigned long) out, order); - bset_build_written_tree(b); + bch_bset_build_written_tree(b); if (!start) bch_time_stats_update(&state->time, start_time); @@ -1007,34 +1043,32 @@ static void __btree_sort(struct btree *b, struct btree_iter *iter, void bch_btree_sort_partial(struct btree *b, unsigned start, struct bset_sort_state *state) { - size_t order = b->page_order, keys = 0; + size_t order = b->keys.page_order, keys = 0; struct btree_iter iter; int oldsize = bch_count_data(b); - __bch_btree_iter_init(b, &iter, NULL, &b->sets[start]); - - BUG_ON(!bset_written(b, bset_tree_last(b)) && - (bset_tree_last(b)->size || b->nsets)); + __bch_btree_iter_init(b, &iter, NULL, &b->keys.set[start]); if (start) { unsigned i; - for (i = start; i <= b->nsets; i++) - keys += b->sets[i].data->keys; + for (i = start; i <= b->keys.nsets; i++) + keys += b->keys.set[i].data->keys; - order = roundup_pow_of_two(__set_bytes(b->sets->data, + order = roundup_pow_of_two(__set_bytes(b->keys.set->data, keys)) / PAGE_SIZE; if (order) order = ilog2(order); } - __btree_sort(b, &iter, start, order, false, state); + __btree_sort(&b->keys, &iter, start, order, false, state); EBUG_ON(b->written && oldsize >= 0 && bch_count_data(b) != oldsize); } EXPORT_SYMBOL(bch_btree_sort_partial); -void bch_btree_sort_and_fix_extents(struct btree *b, struct btree_iter *iter, +void bch_btree_sort_and_fix_extents(struct btree_keys *b, + struct btree_iter *iter, struct bset_sort_state *state) { __btree_sort(b, iter, 0, b->page_order, true, state); @@ -1048,11 +1082,11 @@ void bch_btree_sort_into(struct btree *b, struct btree *new, struct btree_iter iter; bch_btree_iter_init(b, &iter, NULL); - btree_mergesort(b, new->sets->data, &iter, false, true); + btree_mergesort(&b->keys, new->keys.set->data, &iter, false, true); bch_time_stats_update(&state->time, start_time); - new->sets->size = 0; + new->keys.set->size = 0; // XXX: why? } #define SORT_CRIT (4096 / sizeof(uint64_t)) @@ -1062,28 +1096,31 @@ void bch_btree_sort_lazy(struct btree *b, struct bset_sort_state *state) unsigned crit = SORT_CRIT; int i; + b->keys.last_set_unwritten = 0; + /* Don't sort if nothing to do */ - if (!b->nsets) + if (!b->keys.nsets) goto out; - for (i = b->nsets - 1; i >= 0; --i) { + for (i = b->keys.nsets - 1; i >= 0; --i) { crit *= state->crit_factor; - if (b->sets[i].data->keys < crit) { + if (b->keys.set[i].data->keys < crit) { bch_btree_sort_partial(b, i, state); return; } } /* Sort if we'd overflow */ - if (b->nsets + 1 == MAX_BSETS) { + if (b->keys.nsets + 1 == MAX_BSETS) { bch_btree_sort(b, state); return; } out: - bset_build_written_tree(b); + bch_bset_build_written_tree(&b->keys); } +EXPORT_SYMBOL(bch_btree_sort_lazy); /* Sysfs stuff */ @@ -1102,12 +1139,12 @@ static int btree_bset_stats(struct btree_op *op, struct btree *b) stats->nodes++; - for (i = 0; i <= b->nsets; i++) { - struct bset_tree *t = &b->sets[i]; + for (i = 0; i <= b->keys.nsets; i++) { + struct bset_tree *t = &b->keys.set[i]; size_t bytes = t->data->keys * sizeof(uint64_t); size_t j; - if (bset_written(b, t)) { + if (bset_written(&b->keys, t)) { stats->sets_written++; stats->bytes_written += bytes; diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h index b5797129e919..87da828477f3 100644 --- a/drivers/md/bcache/bset.h +++ b/drivers/md/bcache/bset.h @@ -145,6 +145,9 @@ */ struct btree; +struct btree_keys; +struct btree_iter; +struct btree_iter_set; struct bkey_float; #define MAX_BSETS 4U @@ -181,6 +184,74 @@ struct bset_tree { struct bset *data; }; +struct btree_keys_ops { + bool (*sort_cmp)(struct btree_iter_set, + struct btree_iter_set); + struct bkey *(*sort_fixup)(struct btree_iter *, struct bkey *); + bool (*key_invalid)(struct btree_keys *, + const struct bkey *); + bool (*key_bad)(struct btree_keys *, const struct bkey *); + bool (*key_merge)(struct btree_keys *, + struct bkey *, struct bkey *); + + /* + * Only used for deciding whether to use START_KEY(k) or just the key + * itself in a couple places + */ + bool is_extents; +}; + +struct btree_keys { + const struct btree_keys_ops *ops; + uint8_t page_order; + uint8_t nsets; + unsigned last_set_unwritten:1; + bool *expensive_debug_checks; + + /* + * Sets of sorted keys - the real btree node - plus a binary search tree + * + * set[0] is special; set[0]->tree, set[0]->prev and set[0]->data point + * to the memory we have allocated for this btree node. Additionally, + * set[0]->data points to the entire btree node as it exists on disk. + */ + struct bset_tree set[MAX_BSETS]; +}; + +static inline struct bset_tree *bset_tree_last(struct btree_keys *b) +{ + return b->set + b->nsets; +} + +static inline bool bset_written(struct btree_keys *b, struct bset_tree *t) +{ + return t <= b->set + b->nsets - b->last_set_unwritten; +} + +static inline bool bkey_written(struct btree_keys *b, struct bkey *k) +{ + return !b->last_set_unwritten || k < b->set[b->nsets].data->start; +} + +static inline unsigned bset_byte_offset(struct btree_keys *b, struct bset *i) +{ + return ((size_t) i) - ((size_t) b->set->data); +} + +static inline unsigned bset_sector_offset(struct btree_keys *b, struct bset *i) +{ + return bset_byte_offset(b, i) >> 9; +} + +static inline bool btree_keys_expensive_checks(struct btree_keys *b) +{ +#ifdef CONFIG_BCACHE_DEBUG + return *b->expensive_debug_checks; +#else + return false; +#endif +} + #define __set_bytes(i, k) (sizeof(*(i)) + (k) * sizeof(uint64_t)) #define set_bytes(i) __set_bytes(i, i->keys) @@ -189,12 +260,34 @@ struct bset_tree { #define set_blocks(i, block_bytes) \ __set_blocks(i, (i)->keys, block_bytes) -void bch_btree_keys_free(struct btree *); -int bch_btree_keys_alloc(struct btree *, unsigned, gfp_t); +static inline struct bset *bset_next_set(struct btree_keys *b, + unsigned block_bytes) +{ + struct bset *i = bset_tree_last(b)->data; + + return ((void *) i) + roundup(set_bytes(i), block_bytes); +} + +void bch_btree_keys_free(struct btree_keys *); +int bch_btree_keys_alloc(struct btree_keys *, unsigned, gfp_t); +void bch_btree_keys_init(struct btree_keys *, const struct btree_keys_ops *, + bool *); -void bch_bset_fix_invalidated_key(struct btree *, struct bkey *); -void bch_bset_init_next(struct btree *, struct bset *, uint64_t); -void bch_bset_insert(struct btree *, struct bkey *, struct bkey *); +void bch_bset_init_next(struct btree_keys *, struct bset *, uint64_t); +void bch_bset_build_written_tree(struct btree_keys *); +void bch_bset_fix_invalidated_key(struct btree_keys *, struct bkey *); +void bch_bset_insert(struct btree_keys *, struct bkey *, struct bkey *); + +/* + * Tries to merge l and r: l should be lower than r + * Returns true if we were able to merge. If we did merge, l will be the merged + * key, r will be untouched. + */ +static inline bool bch_bkey_try_merge(struct btree_keys *b, + struct bkey *l, struct bkey *r) +{ + return b->ops->key_merge ? b->ops->key_merge(b, l, r) : false; +} /* Btree key iteration */ @@ -208,11 +301,11 @@ struct btree_iter { } data[MAX_BSETS]; }; -typedef bool (*ptr_filter_fn)(struct btree *, const struct bkey *); +typedef bool (*ptr_filter_fn)(struct btree_keys *, const struct bkey *); struct bkey *bch_btree_iter_next(struct btree_iter *); struct bkey *bch_btree_iter_next_filter(struct btree_iter *, - struct btree *, ptr_filter_fn); + struct btree_keys *, ptr_filter_fn); void bch_btree_iter_push(struct btree_iter *, struct bkey *, struct bkey *); struct bkey *bch_btree_iter_init(struct btree *, struct btree_iter *, @@ -246,7 +339,7 @@ int bch_bset_sort_state_init(struct bset_sort_state *, unsigned); void bch_btree_sort_lazy(struct btree *, struct bset_sort_state *); void bch_btree_sort_into(struct btree *, struct btree *, struct bset_sort_state *); -void bch_btree_sort_and_fix_extents(struct btree *, struct btree_iter *, +void bch_btree_sort_and_fix_extents(struct btree_keys *, struct btree_iter *, struct bset_sort_state *); void bch_btree_sort_partial(struct btree *, unsigned, struct bset_sort_state *); @@ -311,6 +404,16 @@ static inline bool bch_cut_back(const struct bkey *where, struct bkey *k) _ret; \ }) +static inline bool bch_ptr_invalid(struct btree_keys *b, const struct bkey *k) +{ + return b->ops->key_invalid(b, k); +} + +static inline bool bch_ptr_bad(struct btree_keys *b, const struct bkey *k) +{ + return b->ops->key_bad(b, k); +} + /* Keylists */ struct keylist { diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 6734e2759b93..5d7dee8bb850 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -107,14 +107,6 @@ enum { static struct workqueue_struct *btree_io_wq; -static inline bool should_split(struct btree *b) -{ - struct bset *i = write_block(b); - return b->written >= btree_blocks(b) || - (b->written + __set_blocks(i, i->keys + 15, block_bytes(b->c)) - > btree_blocks(b)); -} - #define insert_lock(s, b) ((b)->level <= (s)->lock) /* @@ -182,6 +174,19 @@ static inline bool should_split(struct btree *b) _r; \ }) +static inline struct bset *write_block(struct btree *b) +{ + return ((void *) btree_bset_first(b)) + b->written * block_bytes(b->c); +} + +static inline bool should_split(struct btree *b) +{ + struct bset *i = write_block(b); + return b->written >= btree_blocks(b) || + (b->written + __set_blocks(i, i->keys + 15, block_bytes(b->c)) + > btree_blocks(b)); +} + /* Btree key manipulation */ void bkey_put(struct cache_set *c, struct bkey *k) @@ -222,7 +227,7 @@ void bch_btree_node_read_done(struct btree *b) goto err; for (; - b->written < btree_blocks(b) && i->seq == b->sets[0].data->seq; + b->written < btree_blocks(b) && i->seq == b->keys.set[0].data->seq; i = write_block(b)) { err = "unsupported bset version"; if (i->version > BCACHE_BSET_VERSION) @@ -250,7 +255,7 @@ void bch_btree_node_read_done(struct btree *b) } err = "empty set"; - if (i != b->sets[0].data && !i->keys) + if (i != b->keys.set[0].data && !i->keys) goto err; bch_btree_iter_push(iter, i->start, bset_bkey_last(i)); @@ -260,21 +265,22 @@ void bch_btree_node_read_done(struct btree *b) err = "corrupted btree"; for (i = write_block(b); - bset_sector_offset(b, i) < KEY_SIZE(&b->key); + bset_sector_offset(&b->keys, i) < KEY_SIZE(&b->key); i = ((void *) i) + block_bytes(b->c)) - if (i->seq == b->sets[0].data->seq) + if (i->seq == b->keys.set[0].data->seq) goto err; - bch_btree_sort_and_fix_extents(b, iter, &b->c->sort); + bch_btree_sort_and_fix_extents(&b->keys, iter, &b->c->sort); - i = b->sets[0].data; + i = b->keys.set[0].data; err = "short btree key"; - if (b->sets[0].size && - bkey_cmp(&b->key, &b->sets[0].end) < 0) + if (b->keys.set[0].size && + bkey_cmp(&b->key, &b->keys.set[0].end) < 0) goto err; if (b->written < btree_blocks(b)) - bch_bset_init_next(b, write_block(b), bset_magic(&b->c->sb)); + bch_bset_init_next(&b->keys, write_block(b), + bset_magic(&b->c->sb)); out: mempool_free(iter, b->c->fill_iter); return; @@ -308,7 +314,7 @@ static void bch_btree_node_read(struct btree *b) bio->bi_end_io = btree_node_read_endio; bio->bi_private = &cl; - bch_bio_map(bio, b->sets[0].data); + bch_bio_map(bio, b->keys.set[0].data); bch_submit_bbio(bio, b->c, &b->key, 0); closure_sync(&cl); @@ -427,7 +433,7 @@ static void do_btree_node_write(struct btree *b) bkey_copy(&k.key, &b->key); SET_PTR_OFFSET(&k.key, 0, PTR_OFFSET(&k.key, 0) + - bset_sector_offset(b, i)); + bset_sector_offset(&b->keys, i)); if (!bio_alloc_pages(b->bio, GFP_NOIO)) { int j; @@ -475,12 +481,13 @@ void bch_btree_node_write(struct btree *b, struct closure *parent) do_btree_node_write(b); - b->written += set_blocks(i, block_bytes(b->c)); atomic_long_add(set_blocks(i, block_bytes(b->c)) * b->c->sb.block_size, &PTR_CACHE(b->c, &b->key, 0)->btree_sectors_written); + b->written += set_blocks(i, block_bytes(b->c)); + /* If not a leaf node, always sort */ - if (b->level && b->nsets) + if (b->level && b->keys.nsets) bch_btree_sort(b, &b->c->sort); else bch_btree_sort_lazy(b, &b->c->sort); @@ -489,11 +496,12 @@ void bch_btree_node_write(struct btree *b, struct closure *parent) * do verify if there was more than one set initially (i.e. we did a * sort) and we sorted down to a single set: */ - if (i != b->sets->data && !b->nsets) + if (i != b->keys.set->data && !b->keys.nsets) bch_btree_verify(b); if (b->written < btree_blocks(b)) - bch_bset_init_next(b, write_block(b), bset_magic(&b->c->sb)); + bch_bset_init_next(&b->keys, write_block(b), + bset_magic(&b->c->sb)); } static void bch_btree_node_write_sync(struct btree *b) @@ -553,24 +561,6 @@ static void bch_btree_leaf_dirty(struct btree *b, atomic_t *journal_ref) * mca -> memory cache */ -static void mca_reinit(struct btree *b) -{ - unsigned i; - - b->flags = 0; - b->written = 0; - b->nsets = 0; - - for (i = 0; i < MAX_BSETS; i++) - b->sets[i].size = 0; - /* - * Second loop starts at 1 because b->sets[0]->data is the memory we - * allocated - */ - for (i = 1; i < MAX_BSETS; i++) - b->sets[i].data = NULL; -} - #define mca_reserve(c) (((c->root && c->root->level) \ ? c->root->level : 1) * 8 + 16) #define mca_can_free(c) \ @@ -580,7 +570,7 @@ static void mca_data_free(struct btree *b) { BUG_ON(b->io_mutex.count != 1); - bch_btree_keys_free(b); + bch_btree_keys_free(&b->keys); b->c->bucket_cache_used--; list_move(&b->list, &b->c->btree_cache_freed); @@ -602,7 +592,7 @@ static unsigned btree_order(struct bkey *k) static void mca_data_alloc(struct btree *b, struct bkey *k, gfp_t gfp) { - if (!bch_btree_keys_alloc(b, + if (!bch_btree_keys_alloc(&b->keys, max_t(unsigned, ilog2(b->c->btree_pages), btree_order(k)), @@ -642,9 +632,9 @@ static int mca_reap(struct btree *b, unsigned min_order, bool flush) if (!down_write_trylock(&b->lock)) return -ENOMEM; - BUG_ON(btree_node_dirty(b) && !b->sets[0].data); + BUG_ON(btree_node_dirty(b) && !b->keys.set[0].data); - if (b->page_order < min_order) + if (b->keys.page_order < min_order) goto out_unlock; if (!flush) { @@ -809,7 +799,7 @@ int bch_btree_cache_alloc(struct cache_set *c) c->verify_data = mca_bucket_alloc(c, &ZERO_KEY, GFP_KERNEL); if (c->verify_data && - c->verify_data->sets[0].data) + c->verify_data->keys.set->data) list_del_init(&c->verify_data->list); else c->verify_data = NULL; @@ -907,7 +897,7 @@ static struct btree *mca_alloc(struct cache_set *c, struct bkey *k, int level) list_for_each_entry(b, &c->btree_cache_freed, list) if (!mca_reap(b, 0, false)) { mca_data_alloc(b, k, __GFP_NOWARN|GFP_NOIO); - if (!b->sets[0].data) + if (!b->keys.set[0].data) goto err; else goto out; @@ -918,7 +908,7 @@ static struct btree *mca_alloc(struct cache_set *c, struct bkey *k, int level) goto err; BUG_ON(!down_write_trylock(&b->lock)); - if (!b->sets->data) + if (!b->keys.set->data) goto err; out: BUG_ON(b->io_mutex.count != 1); @@ -929,15 +919,17 @@ out: hlist_add_head_rcu(&b->hash, mca_hash(c, k)); lock_set_subclass(&b->lock.dep_map, level + 1, _THIS_IP_); - b->level = level; b->parent = (void *) ~0UL; + b->flags = 0; + b->written = 0; + b->level = level; if (!b->level) - b->ops = &bch_extent_keys_ops; + bch_btree_keys_init(&b->keys, &bch_extent_keys_ops, + &b->c->expensive_debug_checks); else - b->ops = &bch_btree_keys_ops; - - mca_reinit(b); + bch_btree_keys_init(&b->keys, &bch_btree_keys_ops, + &b->c->expensive_debug_checks); return b; err: @@ -998,13 +990,13 @@ retry: b->accessed = 1; - for (; i <= b->nsets && b->sets[i].size; i++) { - prefetch(b->sets[i].tree); - prefetch(b->sets[i].data); + for (; i <= b->keys.nsets && b->keys.set[i].size; i++) { + prefetch(b->keys.set[i].tree); + prefetch(b->keys.set[i].data); } - for (; i <= b->nsets; i++) - prefetch(b->sets[i].data); + for (; i <= b->keys.nsets; i++) + prefetch(b->keys.set[i].data); if (btree_node_io_error(b)) { rw_unlock(write, b); @@ -1084,7 +1076,7 @@ retry: } b->accessed = 1; - bch_bset_init_next(b, b->sets->data, bset_magic(&b->c->sb)); + bch_bset_init_next(&b->keys, b->keys.set->data, bset_magic(&b->c->sb)); mutex_unlock(&c->bucket_lock); @@ -1215,7 +1207,7 @@ static bool btree_gc_mark_node(struct btree *b, struct gc_stat *gc) stale = max(stale, btree_mark_key(b, k)); keys++; - if (bch_ptr_bad(b, k)) + if (bch_ptr_bad(&b->keys, k)) continue; gc->key_bytes += bkey_u64s(k); @@ -1225,9 +1217,9 @@ static bool btree_gc_mark_node(struct btree *b, struct gc_stat *gc) gc->data += KEY_SIZE(k); } - for (t = b->sets; t <= &b->sets[b->nsets]; t++) + for (t = b->keys.set; t <= &b->keys.set[b->keys.nsets]; t++) btree_bug_on(t->size && - bset_written(b, t) && + bset_written(&b->keys, t) && bkey_cmp(&b->key, &t->end) < 0, b, "found short btree key in gc"); @@ -1271,7 +1263,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, blocks = btree_default_blocks(b->c) * 2 / 3; if (nodes < 2 || - __set_blocks(b->sets[0].data, keys, + __set_blocks(b->keys.set[0].data, keys, block_bytes(b->c)) > blocks * (nodes - 1)) return 0; @@ -1428,7 +1420,7 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op, r[i].b = ERR_PTR(-EINTR); while (1) { - k = bch_btree_iter_next_filter(&iter, b, bch_ptr_bad); + k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad); if (k) { r->b = bch_btree_node_get(b->c, k, b->level - 1, true); if (IS_ERR(r->b)) { @@ -1764,7 +1756,8 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op, bch_btree_iter_init(b, &iter, NULL); do { - k = bch_btree_iter_next_filter(&iter, b, bch_ptr_bad); + k = bch_btree_iter_next_filter(&iter, &b->keys, + bch_ptr_bad); if (k) btree_node_prefetch(b->c, k, b->level - 1); @@ -1894,7 +1887,7 @@ static bool fix_overlapping_extents(struct btree *b, struct bkey *insert, subtract_dirty(k, KEY_START(insert), KEY_SIZE(insert)); - if (bkey_written(b, k)) { + if (bkey_written(&b->keys, k)) { /* * We insert a new key to cover the top of the * old key, and the old key is modified in place @@ -1907,19 +1900,20 @@ static bool fix_overlapping_extents(struct btree *b, struct bkey *insert, * depends on us inserting a new key for the top * here. */ - top = bch_bset_search(b, bset_tree_last(b), + top = bch_bset_search(b, + bset_tree_last(&b->keys), insert); - bch_bset_insert(b, top, k); + bch_bset_insert(&b->keys, top, k); } else { BKEY_PADDED(key) temp; bkey_copy(&temp.key, k); - bch_bset_insert(b, k, &temp.key); + bch_bset_insert(&b->keys, k, &temp.key); top = bkey_next(k); } bch_cut_front(insert, top); bch_cut_back(&START_KEY(insert), k); - bch_bset_fix_invalidated_key(b, k); + bch_bset_fix_invalidated_key(&b->keys, k); return false; } @@ -1929,7 +1923,7 @@ static bool fix_overlapping_extents(struct btree *b, struct bkey *insert, if (bkey_cmp(&START_KEY(insert), &START_KEY(k)) > 0) old_offset = KEY_START(insert); - if (bkey_written(b, k) && + if (bkey_written(&b->keys, k) && bkey_cmp(&START_KEY(insert), &START_KEY(k)) <= 0) { /* * Completely overwrote, so we don't have to @@ -1938,7 +1932,7 @@ static bool fix_overlapping_extents(struct btree *b, struct bkey *insert, bch_cut_front(k, k); } else { __bch_cut_back(&START_KEY(insert), k); - bch_bset_fix_invalidated_key(b, k); + bch_bset_fix_invalidated_key(&b->keys, k); } } @@ -1979,7 +1973,8 @@ static bool btree_insert_key(struct btree *b, struct btree_op *op, * the previous key. */ prev = NULL; - m = bch_btree_iter_init(b, &iter, PRECEDING_KEY(&START_KEY(k))); + m = bch_btree_iter_init(b, &iter, + PRECEDING_KEY(&START_KEY(k))); if (fix_overlapping_extents(b, k, &iter, replace_key)) { op->insert_collision = true; @@ -2000,7 +1995,7 @@ static bool btree_insert_key(struct btree *b, struct btree_op *op, /* prev is in the tree, if we merge we're done */ status = BTREE_INSERT_STATUS_BACK_MERGE; if (prev && - bch_bkey_try_merge(b, prev, k)) + bch_bkey_try_merge(&b->keys, prev, k)) goto merged; status = BTREE_INSERT_STATUS_OVERWROTE; @@ -2010,14 +2005,14 @@ static bool btree_insert_key(struct btree *b, struct btree_op *op, status = BTREE_INSERT_STATUS_FRONT_MERGE; if (m != bset_bkey_last(i) && - bch_bkey_try_merge(b, k, m)) + bch_bkey_try_merge(&b->keys, k, m)) goto copy; } else { BUG_ON(replace_key); - m = bch_bset_search(b, bset_tree_last(b), k); + m = bch_bset_search(b, bset_tree_last(&b->keys), k); } -insert: bch_bset_insert(b, m, k); +insert: bch_bset_insert(&b->keys, m, k); copy: bkey_copy(m, k); merged: bch_check_keys(b, "%u for %s", status, @@ -2362,7 +2357,7 @@ static int bch_btree_map_nodes_recurse(struct btree *b, struct btree_op *op, bch_btree_iter_init(b, &iter, from); - while ((k = bch_btree_iter_next_filter(&iter, b, + while ((k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad))) { ret = btree(map_nodes_recurse, k, b, op, from, fn, flags); @@ -2395,7 +2390,7 @@ static int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op, bch_btree_iter_init(b, &iter, from); - while ((k = bch_btree_iter_next_filter(&iter, b, bch_ptr_bad))) { + while ((k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad))) { ret = !b->level ? fn(op, b, k) : btree(map_keys_recurse, k, b, op, from, fn, flags); diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h index 0b436079db71..04e81f8ab89a 100644 --- a/drivers/md/bcache/btree.h +++ b/drivers/md/bcache/btree.h @@ -113,28 +113,7 @@ struct btree_write { int prio_blocked; }; -struct btree_keys_ops { - bool (*sort_cmp)(struct btree_iter_set, - struct btree_iter_set); - struct bkey *(*sort_fixup)(struct btree_iter *, - struct bkey *); - bool (*key_invalid)(struct btree *, - const struct bkey *); - bool (*key_bad)(struct btree *, - const struct bkey *); - bool (*key_merge)(struct btree *, - struct bkey *, struct bkey *); - - - /* - * Only used for deciding whether to use START_KEY(k) or just the key - * itself in a couple places - */ - bool is_extents; -}; - struct btree { - const struct btree_keys_ops *ops; /* Hottest entries first */ struct hlist_node hash; @@ -151,17 +130,8 @@ struct btree { unsigned long flags; uint16_t written; /* would be nice to kill */ uint8_t level; - uint8_t nsets; - uint8_t page_order; - - /* - * Set of sorted keys - the real btree node - plus a binary search tree - * - * sets[0] is special; set[0]->tree, set[0]->prev and set[0]->data point - * to the memory we have allocated for this btree node. Additionally, - * set[0]->data points to the entire btree node as it exists on disk. - */ - struct bset_tree sets[MAX_BSETS]; + + struct btree_keys keys; /* For outstanding btree writes, used as a lock - protects write_idx */ struct closure io; @@ -201,49 +171,19 @@ static inline struct btree_write *btree_prev_write(struct btree *b) return b->writes + (btree_node_write_idx(b) ^ 1); } -static inline struct bset_tree *bset_tree_last(struct btree *b) -{ - return b->sets + b->nsets; -} - static inline struct bset *btree_bset_first(struct btree *b) { - return b->sets->data; + return b->keys.set->data; } static inline struct bset *btree_bset_last(struct btree *b) { - return bset_tree_last(b)->data; -} - -static inline unsigned bset_byte_offset(struct btree *b, struct bset *i) -{ - return ((size_t) i) - ((size_t) b->sets->data); -} - -static inline unsigned bset_sector_offset(struct btree *b, struct bset *i) -{ - return (((void *) i) - ((void *) btree_bset_first(b))) >> 9; + return bset_tree_last(&b->keys)->data; } static inline unsigned bset_block_offset(struct btree *b, struct bset *i) { - return bset_sector_offset(b, i) >> b->c->block_bits; -} - -static inline struct bset *write_block(struct btree *b) -{ - return ((void *) b->sets[0].data) + b->written * block_bytes(b->c); -} - -static inline bool bset_written(struct btree *b, struct bset_tree *t) -{ - return t->data < write_block(b); -} - -static inline bool bkey_written(struct btree *b, struct bkey *k) -{ - return k < write_block(b)->start; + return bset_sector_offset(&b->keys, i) >> b->c->block_bits; } static inline void set_gc_sectors(struct cache_set *c) @@ -251,27 +191,6 @@ static inline void set_gc_sectors(struct cache_set *c) atomic_set(&c->sectors_to_gc, c->sb.bucket_size * c->nbuckets / 16); } -static inline bool bch_ptr_invalid(struct btree *b, const struct bkey *k) -{ - return b->ops->key_invalid(b, k); -} - -static inline bool bch_ptr_bad(struct btree *b, const struct bkey *k) -{ - return b->ops->key_bad(b, k); -} - -/* - * Tries to merge l and r: l should be lower than r - * Returns true if we were able to merge. If we did merge, l will be the merged - * key, r will be untouched. - */ -static inline bool bch_bkey_try_merge(struct btree *b, - struct bkey *l, struct bkey *r) -{ - return b->ops->key_merge ? b->ops->key_merge(b, l, r) : false; -} - void bkey_put(struct cache_set *c, struct bkey *k); /* Looping macros */ @@ -284,7 +203,7 @@ void bkey_put(struct cache_set *c, struct bkey *k); #define for_each_key_filter(b, k, iter, filter) \ for (bch_btree_iter_init((b), (iter), NULL); \ - ((k) = bch_btree_iter_next_filter((iter), b, filter));) + ((k) = bch_btree_iter_next_filter((iter), &(b)->keys, filter));) #define for_each_key(b, k, iter) \ for (bch_btree_iter_init((b), (iter), NULL); \ diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c index 2c6587d016db..8acc18af07c1 100644 --- a/drivers/md/bcache/debug.c +++ b/drivers/md/bcache/debug.c @@ -113,9 +113,9 @@ static void bch_dump_bucket(struct btree *b) unsigned i; console_lock(); - for (i = 0; i <= b->nsets; i++) - dump_bset(b, b->sets[i].data, - bset_block_offset(b, b->sets[i].data)); + for (i = 0; i <= b->keys.nsets; i++) + dump_bset(b, b->keys.set[i].data, + bset_block_offset(b, b->keys.set[i].data)); console_unlock(); } @@ -139,13 +139,13 @@ void bch_btree_verify(struct btree *b) mutex_lock(&b->c->verify_lock); ondisk = b->c->verify_ondisk; - sorted = b->c->verify_data->sets->data; - inmemory = b->sets->data; + sorted = b->c->verify_data->keys.set->data; + inmemory = b->keys.set->data; bkey_copy(&v->key, &b->key); v->written = 0; v->level = b->level; - v->ops = b->ops; + v->keys.ops = b->keys.ops; bio = bch_bbio_alloc(b->c); bio->bi_bdev = PTR_CACHE(b->c, &b->key, 0)->bdev; @@ -159,7 +159,7 @@ void bch_btree_verify(struct btree *b) memcpy(ondisk, sorted, KEY_SIZE(&v->key) << 9); bch_btree_node_read_done(v); - sorted = v->sets->data; + sorted = v->keys.set->data; if (inmemory->keys != sorted->keys || memcmp(inmemory->start, @@ -264,14 +264,14 @@ void __bch_check_keys(struct btree *b, const char *fmt, ...) if (p && bkey_cmp(&START_KEY(p), &START_KEY(k)) > 0) goto bug; - if (bch_ptr_invalid(b, k)) + if (bch_ptr_invalid(&b->keys, k)) continue; err = "Overlapping keys"; if (p && bkey_cmp(p, &START_KEY(k)) > 0) goto bug; } else { - if (bch_ptr_bad(b, k)) + if (bch_ptr_bad(&b->keys, k)) continue; err = "Duplicate keys"; diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c index 8fe6aaece41d..ba3021128e7a 100644 --- a/drivers/md/bcache/extents.c +++ b/drivers/md/bcache/extents.c @@ -81,8 +81,9 @@ bad: return true; } -static bool bch_btree_ptr_invalid(struct btree *b, const struct bkey *k) +static bool bch_btree_ptr_invalid(struct btree_keys *bk, const struct bkey *k) { + struct btree *b = container_of(bk, struct btree, keys); return __bch_btree_ptr_invalid(b->c, k); } @@ -118,13 +119,14 @@ err: return true; } -static bool bch_btree_ptr_bad(struct btree *b, const struct bkey *k) +static bool bch_btree_ptr_bad(struct btree_keys *bk, const struct bkey *k) { + struct btree *b = container_of(bk, struct btree, keys); unsigned i; if (!bkey_cmp(k, &ZERO_KEY) || !KEY_PTRS(k) || - bch_ptr_invalid(b, k)) + bch_ptr_invalid(bk, k)) return true; for (i = 0; i < KEY_PTRS(k); i++) @@ -209,8 +211,9 @@ static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter, return NULL; } -static bool bch_extent_invalid(struct btree *b, const struct bkey *k) +static bool bch_extent_invalid(struct btree_keys *bk, const struct bkey *k) { + struct btree *b = container_of(bk, struct btree, keys); char buf[80]; if (!KEY_SIZE(k)) @@ -259,13 +262,14 @@ err: return true; } -static bool bch_extent_bad(struct btree *b, const struct bkey *k) +static bool bch_extent_bad(struct btree_keys *bk, const struct bkey *k) { + struct btree *b = container_of(bk, struct btree, keys); struct bucket *g; unsigned i, stale; if (!KEY_PTRS(k) || - bch_extent_invalid(b, k)) + bch_extent_invalid(bk, k)) return true; for (i = 0; i < KEY_PTRS(k); i++) @@ -303,8 +307,9 @@ static uint64_t merge_chksums(struct bkey *l, struct bkey *r) ~((uint64_t)1 << 63); } -static bool bch_extent_merge(struct btree *b, struct bkey *l, struct bkey *r) +static bool bch_extent_merge(struct btree_keys *bk, struct bkey *l, struct bkey *r) { + struct btree *b = container_of(bk, struct btree, keys); unsigned i; if (key_merging_disabled(b->c)) diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 206c80fb27c1..7e175dbc76b0 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -433,7 +433,7 @@ lock_root: mutex_lock(&c->bucket_lock); list_for_each_entry(b, &c->btree_cache, list) - ret += 1 << (b->page_order + PAGE_SHIFT); + ret += 1 << (b->keys.page_order + PAGE_SHIFT); mutex_unlock(&c->bucket_lock); return ret; diff --git a/include/trace/events/bcache.h b/include/trace/events/bcache.h index 0c5cf2f63dc3..7110897c3dfa 100644 --- a/include/trace/events/bcache.h +++ b/include/trace/events/bcache.h @@ -247,7 +247,7 @@ TRACE_EVENT(bcache_btree_write, TP_fast_assign( __entry->bucket = PTR_BUCKET_NR(b->c, &b->key, 0); __entry->block = b->written; - __entry->keys = b->sets[b->nsets].data->keys; + __entry->keys = b->keys.set[b->keys.nsets].data->keys; ), TP_printk("bucket %zu", __entry->bucket) -- cgit v1.2.3 From 59158fde429fb5d18064e2734b3dd5e6048affbd Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 11 Nov 2013 19:03:54 -0800 Subject: bcache: Add bch_btree_keys_u64s_remaining() Helper function to explicitly check how much space is free in a btree node Signed-off-by: Kent Overstreet --- drivers/md/bcache/bset.h | 15 +++++++++++++++ drivers/md/bcache/btree.c | 28 +++++++++++++++------------- include/uapi/linux/bcache.h | 1 + 3 files changed, 31 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h index 87da828477f3..4fc40fd719de 100644 --- a/drivers/md/bcache/bset.h +++ b/drivers/md/bcache/bset.h @@ -260,6 +260,21 @@ static inline bool btree_keys_expensive_checks(struct btree_keys *b) #define set_blocks(i, block_bytes) \ __set_blocks(i, (i)->keys, block_bytes) +static inline size_t bch_btree_keys_u64s_remaining(struct btree_keys *b) +{ + struct bset_tree *t = bset_tree_last(b); + + BUG_ON((PAGE_SIZE << b->page_order) < + (bset_byte_offset(b, t->data) + set_bytes(t->data))); + + if (!b->last_set_unwritten) + return 0; + + return ((PAGE_SIZE << b->page_order) - + (bset_byte_offset(b, t->data) + set_bytes(t->data))) / + sizeof(u64); +} + static inline struct bset *bset_next_set(struct btree_keys *b, unsigned block_bytes) { diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 5d7dee8bb850..2c90003ff4ce 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -179,14 +179,6 @@ static inline struct bset *write_block(struct btree *b) return ((void *) btree_bset_first(b)) + b->written * block_bytes(b->c); } -static inline bool should_split(struct btree *b) -{ - struct bset *i = write_block(b); - return b->written >= btree_blocks(b) || - (b->written + __set_blocks(i, i->keys + 15, block_bytes(b->c)) - > btree_blocks(b)); -} - /* Btree key manipulation */ void bkey_put(struct cache_set *c, struct bkey *k) @@ -2026,6 +2018,19 @@ merged: return true; } +static size_t insert_u64s_remaining(struct btree *b) +{ + ssize_t ret = bch_btree_keys_u64s_remaining(&b->keys); + + /* + * Might land in the middle of an existing extent and have to split it + */ + if (b->keys.ops->is_extents) + ret -= KEY_MAX_U64S; + + return max(ret, 0L); +} + static bool bch_btree_insert_keys(struct btree *b, struct btree_op *op, struct keylist *insert_keys, struct bkey *replace_key) @@ -2034,12 +2039,9 @@ static bool bch_btree_insert_keys(struct btree *b, struct btree_op *op, int oldsize = bch_count_data(b); while (!bch_keylist_empty(insert_keys)) { - struct bset *i = write_block(b); struct bkey *k = insert_keys->keys; - if (b->written + - __set_blocks(i, i->keys + bkey_u64s(k), - block_bytes(b->c)) > btree_blocks(b)) + if (bkey_u64s(k) > insert_u64s_remaining(b)) break; if (bkey_cmp(k, &b->key) <= 0) { @@ -2203,7 +2205,7 @@ static int bch_btree_insert_node(struct btree *b, struct btree_op *op, { BUG_ON(b->level && replace_key); - if (should_split(b)) { + if (bch_keylist_nkeys(insert_keys) > insert_u64s_remaining(b)) { if (current->bio_list) { op->lock = b->c->root->level + 1; return -EAGAIN; diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h index ae66311be82f..22b6ad31c706 100644 --- a/include/uapi/linux/bcache.h +++ b/include/uapi/linux/bcache.h @@ -39,6 +39,7 @@ static inline void SET_##name(struct bkey *k, unsigned i, __u64 v) \ } #define KEY_SIZE_BITS 16 +#define KEY_MAX_U64S 8 KEY_FIELD(KEY_PTRS, high, 60, 3) KEY_FIELD(HEADER_SIZE, high, 58, 2) -- cgit v1.2.3