diff options
Diffstat (limited to 'drivers/md/dm-bufio.c')
-rw-r--r-- | drivers/md/dm-bufio.c | 279 |
1 files changed, 122 insertions, 157 deletions
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index aa2032fa80d4..12aa9ca21d8c 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -6,7 +6,7 @@ * This file is released under the GPL. */ -#include "dm-bufio.h" +#include <linux/dm-bufio.h> #include <linux/device-mapper.h> #include <linux/dm-io.h> @@ -51,19 +51,6 @@ #define DM_BUFIO_DEFAULT_RETAIN_BYTES (256 * 1024) /* - * The number of bvec entries that are embedded directly in the buffer. - * If the chunk size is larger, dm-io is used to do the io. - */ -#define DM_BUFIO_INLINE_VECS 16 - -/* - * Don't try to use kmem_cache_alloc for blocks larger than this. - * For explanation, see alloc_buffer_data below. - */ -#define DM_BUFIO_BLOCK_SIZE_SLAB_LIMIT (PAGE_SIZE >> 1) -#define DM_BUFIO_BLOCK_SIZE_GFP_LIMIT (PAGE_SIZE << (MAX_ORDER - 1)) - -/* * Align buffer writes to this boundary. * Tests show that SSDs have the highest IOPS when using 4k writes. */ @@ -99,13 +86,12 @@ struct dm_bufio_client { struct block_device *bdev; unsigned block_size; - unsigned char sectors_per_block_bits; - unsigned char pages_per_block_bits; - unsigned char blocks_per_page_bits; - unsigned aux_size; + s8 sectors_per_block_bits; void (*alloc_callback)(struct dm_buffer *); void (*write_callback)(struct dm_buffer *); + struct kmem_cache *slab_buffer; + struct kmem_cache *slab_cache; struct dm_io_client *dm_io; struct list_head reserved_buffers; @@ -148,11 +134,11 @@ struct dm_buffer { struct list_head lru_list; sector_t block; void *data; - enum data_mode data_mode; + unsigned char data_mode; /* DATA_MODE_* */ unsigned char list_mode; /* LIST_* */ - unsigned hold_count; blk_status_t read_error; blk_status_t write_error; + unsigned hold_count; unsigned long state; unsigned long last_accessed; unsigned dirty_start; @@ -161,8 +147,7 @@ struct dm_buffer { unsigned write_end; struct dm_bufio_client *c; struct list_head write_list; - struct bio bio; - struct bio_vec bio_vec[DM_BUFIO_INLINE_VECS]; + void (*end_io)(struct dm_buffer *, blk_status_t); #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING #define MAX_STACK 10 struct stack_trace stack_trace; @@ -172,21 +157,6 @@ struct dm_buffer { /*----------------------------------------------------------------*/ -static struct kmem_cache *dm_bufio_caches[PAGE_SHIFT - SECTOR_SHIFT]; -static char *dm_bufio_cache_names[PAGE_SHIFT - SECTOR_SHIFT]; - -static inline int dm_bufio_cache_index(struct dm_bufio_client *c) -{ - unsigned ret = c->blocks_per_page_bits - 1; - - BUG_ON(ret >= ARRAY_SIZE(dm_bufio_caches)); - - return ret; -} - -#define DM_BUFIO_CACHE(c) (dm_bufio_caches[dm_bufio_cache_index(c)]) -#define DM_BUFIO_CACHE_NAME(c) (dm_bufio_cache_names[dm_bufio_cache_index(c)]) - #define dm_bufio_in_request() (!!current->bio_list) static void dm_bufio_lock(struct dm_bufio_client *c) @@ -319,7 +289,7 @@ static void __remove(struct dm_bufio_client *c, struct dm_buffer *b) /*----------------------------------------------------------------*/ -static void adjust_total_allocated(enum data_mode data_mode, long diff) +static void adjust_total_allocated(unsigned char data_mode, long diff) { static unsigned long * const class_ptr[DATA_MODE_LIMIT] = { &dm_bufio_allocated_kmem_cache, @@ -384,18 +354,18 @@ static void __cache_size_refresh(void) * space. */ static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask, - enum data_mode *data_mode) + unsigned char *data_mode) { - if (c->block_size <= DM_BUFIO_BLOCK_SIZE_SLAB_LIMIT) { + if (unlikely(c->slab_cache != NULL)) { *data_mode = DATA_MODE_SLAB; - return kmem_cache_alloc(DM_BUFIO_CACHE(c), gfp_mask); + return kmem_cache_alloc(c->slab_cache, gfp_mask); } - if (c->block_size <= DM_BUFIO_BLOCK_SIZE_GFP_LIMIT && + if (c->block_size <= KMALLOC_MAX_SIZE && gfp_mask & __GFP_NORETRY) { *data_mode = DATA_MODE_GET_FREE_PAGES; return (void *)__get_free_pages(gfp_mask, - c->pages_per_block_bits); + c->sectors_per_block_bits - (PAGE_SHIFT - SECTOR_SHIFT)); } *data_mode = DATA_MODE_VMALLOC; @@ -424,15 +394,16 @@ static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask, * Free buffer's data. */ static void free_buffer_data(struct dm_bufio_client *c, - void *data, enum data_mode data_mode) + void *data, unsigned char data_mode) { switch (data_mode) { case DATA_MODE_SLAB: - kmem_cache_free(DM_BUFIO_CACHE(c), data); + kmem_cache_free(c->slab_cache, data); break; case DATA_MODE_GET_FREE_PAGES: - free_pages((unsigned long)data, c->pages_per_block_bits); + free_pages((unsigned long)data, + c->sectors_per_block_bits - (PAGE_SHIFT - SECTOR_SHIFT)); break; case DATA_MODE_VMALLOC: @@ -451,8 +422,7 @@ static void free_buffer_data(struct dm_bufio_client *c, */ static struct dm_buffer *alloc_buffer(struct dm_bufio_client *c, gfp_t gfp_mask) { - struct dm_buffer *b = kmalloc(sizeof(struct dm_buffer) + c->aux_size, - gfp_mask); + struct dm_buffer *b = kmem_cache_alloc(c->slab_buffer, gfp_mask); if (!b) return NULL; @@ -461,7 +431,7 @@ static struct dm_buffer *alloc_buffer(struct dm_bufio_client *c, gfp_t gfp_mask) b->data = alloc_buffer_data(c, gfp_mask, &b->data_mode); if (!b->data) { - kfree(b); + kmem_cache_free(c->slab_buffer, b); return NULL; } @@ -483,7 +453,7 @@ static void free_buffer(struct dm_buffer *b) adjust_total_allocated(b->data_mode, -(long)c->block_size); free_buffer_data(c, b->data, b->data_mode); - kfree(b); + kmem_cache_free(c->slab_buffer, b); } /* @@ -540,10 +510,6 @@ static void __relink_lru(struct dm_buffer *b, int dirty) * * the memory must be direct-mapped, not vmalloced; * - * the I/O driver can reject requests spuriously if it thinks that - * the requests are too big for the device or if they cross a - * controller-defined memory boundary. - * * If the buffer is small enough (up to DM_BUFIO_INLINE_VECS pages) and * it is not vmalloced, try using the bio interface. * @@ -561,12 +527,11 @@ static void dmio_complete(unsigned long error, void *context) { struct dm_buffer *b = context; - b->bio.bi_status = error ? BLK_STS_IOERR : 0; - b->bio.bi_end_io(&b->bio); + b->end_io(b, unlikely(error != 0) ? BLK_STS_IOERR : 0); } static void use_dmio(struct dm_buffer *b, int rw, sector_t sector, - unsigned n_sectors, unsigned offset, bio_end_io_t *end_io) + unsigned n_sectors, unsigned offset) { int r; struct dm_io_request io_req = { @@ -590,76 +555,77 @@ static void use_dmio(struct dm_buffer *b, int rw, sector_t sector, io_req.mem.ptr.vma = (char *)b->data + offset; } - b->bio.bi_end_io = end_io; - r = dm_io(&io_req, 1, ®ion, NULL); - if (r) { - b->bio.bi_status = errno_to_blk_status(r); - end_io(&b->bio); - } + if (unlikely(r)) + b->end_io(b, errno_to_blk_status(r)); } -static void inline_endio(struct bio *bio) +static void bio_complete(struct bio *bio) { - bio_end_io_t *end_fn = bio->bi_private; + struct dm_buffer *b = bio->bi_private; blk_status_t status = bio->bi_status; - - /* - * Reset the bio to free any attached resources - * (e.g. bio integrity profiles). - */ - bio_reset(bio); - - bio->bi_status = status; - end_fn(bio); + bio_put(bio); + b->end_io(b, status); } -static void use_inline_bio(struct dm_buffer *b, int rw, sector_t sector, - unsigned n_sectors, unsigned offset, bio_end_io_t *end_io) +static void use_bio(struct dm_buffer *b, int rw, sector_t sector, + unsigned n_sectors, unsigned offset) { + struct bio *bio; char *ptr; - unsigned len; + unsigned vec_size, len; - bio_init(&b->bio, b->bio_vec, DM_BUFIO_INLINE_VECS); - b->bio.bi_iter.bi_sector = sector; - bio_set_dev(&b->bio, b->c->bdev); - b->bio.bi_end_io = inline_endio; - /* - * Use of .bi_private isn't a problem here because - * the dm_buffer's inline bio is local to bufio. - */ - b->bio.bi_private = end_io; - bio_set_op_attrs(&b->bio, rw, 0); + vec_size = b->c->block_size >> PAGE_SHIFT; + if (unlikely(b->c->sectors_per_block_bits < PAGE_SHIFT - SECTOR_SHIFT)) + vec_size += 2; + + bio = bio_kmalloc(GFP_NOWAIT | __GFP_NORETRY | __GFP_NOWARN, vec_size); + if (!bio) { +dmio: + use_dmio(b, rw, sector, n_sectors, offset); + return; + } + + bio->bi_iter.bi_sector = sector; + bio_set_dev(bio, b->c->bdev); + bio_set_op_attrs(bio, rw, 0); + bio->bi_end_io = bio_complete; + bio->bi_private = b; ptr = (char *)b->data + offset; len = n_sectors << SECTOR_SHIFT; do { unsigned this_step = min((unsigned)(PAGE_SIZE - offset_in_page(ptr)), len); - if (!bio_add_page(&b->bio, virt_to_page(ptr), this_step, + if (!bio_add_page(bio, virt_to_page(ptr), this_step, offset_in_page(ptr))) { - BUG_ON(b->c->block_size <= PAGE_SIZE); - use_dmio(b, rw, sector, n_sectors, offset, end_io); - return; + bio_put(bio); + goto dmio; } len -= this_step; ptr += this_step; } while (len > 0); - submit_bio(&b->bio); + submit_bio(bio); } -static void submit_io(struct dm_buffer *b, int rw, bio_end_io_t *end_io) +static void submit_io(struct dm_buffer *b, int rw, void (*end_io)(struct dm_buffer *, blk_status_t)) { unsigned n_sectors; sector_t sector; unsigned offset, end; - sector = (b->block << b->c->sectors_per_block_bits) + b->c->start; + b->end_io = end_io; + + if (likely(b->c->sectors_per_block_bits >= 0)) + sector = b->block << b->c->sectors_per_block_bits; + else + sector = b->block * (b->c->block_size >> SECTOR_SHIFT); + sector += b->c->start; if (rw != REQ_OP_WRITE) { - n_sectors = 1 << b->c->sectors_per_block_bits; + n_sectors = b->c->block_size >> SECTOR_SHIFT; offset = 0; } else { if (b->c->write_callback) @@ -676,11 +642,10 @@ static void submit_io(struct dm_buffer *b, int rw, bio_end_io_t *end_io) n_sectors = (end - offset) >> SECTOR_SHIFT; } - if (n_sectors <= ((DM_BUFIO_INLINE_VECS * PAGE_SIZE) >> SECTOR_SHIFT) && - b->data_mode != DATA_MODE_VMALLOC) - use_inline_bio(b, rw, sector, n_sectors, offset, end_io); + if (b->data_mode != DATA_MODE_VMALLOC) + use_bio(b, rw, sector, n_sectors, offset); else - use_dmio(b, rw, sector, n_sectors, offset, end_io); + use_dmio(b, rw, sector, n_sectors, offset); } /*---------------------------------------------------------------- @@ -693,16 +658,14 @@ static void submit_io(struct dm_buffer *b, int rw, bio_end_io_t *end_io) * Set the error, clear B_WRITING bit and wake anyone who was waiting on * it. */ -static void write_endio(struct bio *bio) +static void write_endio(struct dm_buffer *b, blk_status_t status) { - struct dm_buffer *b = container_of(bio, struct dm_buffer, bio); - - b->write_error = bio->bi_status; - if (unlikely(bio->bi_status)) { + b->write_error = status; + if (unlikely(status)) { struct dm_bufio_client *c = b->c; (void)cmpxchg(&c->async_write_error, 0, - blk_status_to_errno(bio->bi_status)); + blk_status_to_errno(status)); } BUG_ON(!test_bit(B_WRITING, &b->state)); @@ -963,8 +926,11 @@ static void __get_memory_limit(struct dm_bufio_client *c, } } - buffers = dm_bufio_cache_size_per_client >> - (c->sectors_per_block_bits + SECTOR_SHIFT); + buffers = dm_bufio_cache_size_per_client; + if (likely(c->sectors_per_block_bits >= 0)) + buffers >>= c->sectors_per_block_bits + SECTOR_SHIFT; + else + buffers /= c->block_size; if (buffers < c->minimum_buffers) buffers = c->minimum_buffers; @@ -1076,11 +1042,9 @@ found_buffer: * The endio routine for reading: set the error, clear the bit and wake up * anyone waiting on the buffer. */ -static void read_endio(struct bio *bio) +static void read_endio(struct dm_buffer *b, blk_status_t status) { - struct dm_buffer *b = container_of(bio, struct dm_buffer, bio); - - b->read_error = bio->bi_status; + b->read_error = status; BUG_ON(!test_bit(B_READING, &b->state)); @@ -1482,13 +1446,13 @@ void dm_bufio_forget(struct dm_bufio_client *c, sector_t block) dm_bufio_unlock(c); } -EXPORT_SYMBOL(dm_bufio_forget); +EXPORT_SYMBOL_GPL(dm_bufio_forget); void dm_bufio_set_minimum_buffers(struct dm_bufio_client *c, unsigned n) { c->minimum_buffers = n; } -EXPORT_SYMBOL(dm_bufio_set_minimum_buffers); +EXPORT_SYMBOL_GPL(dm_bufio_set_minimum_buffers); unsigned dm_bufio_get_block_size(struct dm_bufio_client *c) { @@ -1498,8 +1462,12 @@ EXPORT_SYMBOL_GPL(dm_bufio_get_block_size); sector_t dm_bufio_get_device_size(struct dm_bufio_client *c) { - return i_size_read(c->bdev->bd_inode) >> - (SECTOR_SHIFT + c->sectors_per_block_bits); + sector_t s = i_size_read(c->bdev->bd_inode) >> SECTOR_SHIFT; + if (likely(c->sectors_per_block_bits >= 0)) + s >>= c->sectors_per_block_bits; + else + sector_div(s, c->block_size >> SECTOR_SHIFT); + return s; } EXPORT_SYMBOL_GPL(dm_bufio_get_device_size); @@ -1597,8 +1565,12 @@ static bool __try_evict_buffer(struct dm_buffer *b, gfp_t gfp) static unsigned long get_retain_buffers(struct dm_bufio_client *c) { - unsigned long retain_bytes = READ_ONCE(dm_bufio_retain_bytes); - return retain_bytes >> (c->sectors_per_block_bits + SECTOR_SHIFT); + unsigned long retain_bytes = READ_ONCE(dm_bufio_retain_bytes); + if (likely(c->sectors_per_block_bits >= 0)) + retain_bytes >>= c->sectors_per_block_bits + SECTOR_SHIFT; + else + retain_bytes /= c->block_size; + return retain_bytes; } static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan, @@ -1662,9 +1634,13 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign int r; struct dm_bufio_client *c; unsigned i; + char slab_name[27]; - BUG_ON(block_size < 1 << SECTOR_SHIFT || - (block_size & (block_size - 1))); + if (!block_size || block_size & ((1 << SECTOR_SHIFT) - 1)) { + DMERR("%s: block size not specified or is not multiple of 512b", __func__); + r = -EINVAL; + goto bad_client; + } c = kzalloc(sizeof(*c), GFP_KERNEL); if (!c) { @@ -1675,13 +1651,11 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign c->bdev = bdev; c->block_size = block_size; - c->sectors_per_block_bits = __ffs(block_size) - SECTOR_SHIFT; - c->pages_per_block_bits = (__ffs(block_size) >= PAGE_SHIFT) ? - __ffs(block_size) - PAGE_SHIFT : 0; - c->blocks_per_page_bits = (__ffs(block_size) < PAGE_SHIFT ? - PAGE_SHIFT - __ffs(block_size) : 0); + if (is_power_of_2(block_size)) + c->sectors_per_block_bits = __ffs(block_size) - SECTOR_SHIFT; + else + c->sectors_per_block_bits = -1; - c->aux_size = aux_size; c->alloc_callback = alloc_callback; c->write_callback = write_callback; @@ -1694,7 +1668,7 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign INIT_LIST_HEAD(&c->reserved_buffers); c->need_reserved_buffers = reserved_buffers; - c->minimum_buffers = DM_BUFIO_MIN_BUFFERS; + dm_bufio_set_minimum_buffers(c, DM_BUFIO_MIN_BUFFERS); init_waitqueue_head(&c->free_buffer_wait); c->async_write_error = 0; @@ -1705,29 +1679,26 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign goto bad_dm_io; } - mutex_lock(&dm_bufio_clients_lock); - if (c->blocks_per_page_bits) { - if (!DM_BUFIO_CACHE_NAME(c)) { - DM_BUFIO_CACHE_NAME(c) = kasprintf(GFP_KERNEL, "dm_bufio_cache-%u", c->block_size); - if (!DM_BUFIO_CACHE_NAME(c)) { - r = -ENOMEM; - mutex_unlock(&dm_bufio_clients_lock); - goto bad; - } - } - - if (!DM_BUFIO_CACHE(c)) { - DM_BUFIO_CACHE(c) = kmem_cache_create(DM_BUFIO_CACHE_NAME(c), - c->block_size, - c->block_size, 0, NULL); - if (!DM_BUFIO_CACHE(c)) { - r = -ENOMEM; - mutex_unlock(&dm_bufio_clients_lock); - goto bad; - } + if (block_size <= KMALLOC_MAX_SIZE && + (block_size < PAGE_SIZE || !is_power_of_2(block_size))) { + snprintf(slab_name, sizeof slab_name, "dm_bufio_cache-%u", c->block_size); + c->slab_cache = kmem_cache_create(slab_name, c->block_size, ARCH_KMALLOC_MINALIGN, + SLAB_RECLAIM_ACCOUNT, NULL); + if (!c->slab_cache) { + r = -ENOMEM; + goto bad; } } - mutex_unlock(&dm_bufio_clients_lock); + if (aux_size) + snprintf(slab_name, sizeof slab_name, "dm_bufio_buffer-%u", aux_size); + else + snprintf(slab_name, sizeof slab_name, "dm_bufio_buffer"); + c->slab_buffer = kmem_cache_create(slab_name, sizeof(struct dm_buffer) + aux_size, + 0, SLAB_RECLAIM_ACCOUNT, NULL); + if (!c->slab_buffer) { + r = -ENOMEM; + goto bad; + } while (c->need_reserved_buffers) { struct dm_buffer *b = alloc_buffer(c, GFP_KERNEL); @@ -1762,6 +1733,8 @@ bad: list_del(&b->lru_list); free_buffer(b); } + kmem_cache_destroy(c->slab_cache); + kmem_cache_destroy(c->slab_buffer); dm_io_client_destroy(c->dm_io); bad_dm_io: mutex_destroy(&c->lock); @@ -1808,6 +1781,8 @@ void dm_bufio_client_destroy(struct dm_bufio_client *c) for (i = 0; i < LIST_SIZE; i++) BUG_ON(c->n_buffers[i]); + kmem_cache_destroy(c->slab_cache); + kmem_cache_destroy(c->slab_buffer); dm_io_client_destroy(c->dm_io); mutex_destroy(&c->lock); kfree(c); @@ -1911,9 +1886,6 @@ static int __init dm_bufio_init(void) dm_bufio_allocated_vmalloc = 0; dm_bufio_current_allocated = 0; - memset(&dm_bufio_caches, 0, sizeof dm_bufio_caches); - memset(&dm_bufio_cache_names, 0, sizeof dm_bufio_cache_names); - mem = (__u64)mult_frac(totalram_pages - totalhigh_pages, DM_BUFIO_MEMORY_PERCENT, 100) << PAGE_SHIFT; @@ -1948,17 +1920,10 @@ static int __init dm_bufio_init(void) static void __exit dm_bufio_exit(void) { int bug = 0; - int i; cancel_delayed_work_sync(&dm_bufio_work); destroy_workqueue(dm_bufio_wq); - for (i = 0; i < ARRAY_SIZE(dm_bufio_caches); i++) - kmem_cache_destroy(dm_bufio_caches[i]); - - for (i = 0; i < ARRAY_SIZE(dm_bufio_cache_names); i++) - kfree(dm_bufio_cache_names[i]); - if (dm_bufio_client_count) { DMCRIT("%s: dm_bufio_client_count leaked: %d", __func__, dm_bufio_client_count); |