diff options
Diffstat (limited to 'net/ceph')
-rw-r--r-- | net/ceph/Makefile | 1 | ||||
-rw-r--r-- | net/ceph/ceph_common.c | 8 | ||||
-rw-r--r-- | net/ceph/crypto.c | 6 | ||||
-rw-r--r-- | net/ceph/debugfs.c | 17 | ||||
-rw-r--r-- | net/ceph/messenger.c | 188 | ||||
-rw-r--r-- | net/ceph/mon_client.c | 2 | ||||
-rw-r--r-- | net/ceph/osd_client.c | 67 | ||||
-rw-r--r-- | net/ceph/osdmap.c | 71 | ||||
-rw-r--r-- | net/ceph/striper.c | 261 |
9 files changed, 453 insertions, 168 deletions
diff --git a/net/ceph/Makefile b/net/ceph/Makefile index b4bded4b5396..12bf49772d24 100644 --- a/net/ceph/Makefile +++ b/net/ceph/Makefile @@ -8,6 +8,7 @@ libceph-y := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \ mon_client.o \ cls_lock_client.o \ osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \ + striper.o \ debugfs.o \ auth.o auth_none.o \ crypto.o armor.o \ diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 4adf07826f4a..584fdbef2088 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -72,6 +72,7 @@ const char *ceph_msg_type_name(int type) case CEPH_MSG_MON_GET_VERSION: return "mon_get_version"; case CEPH_MSG_MON_GET_VERSION_REPLY: return "mon_get_version_reply"; case CEPH_MSG_MDS_MAP: return "mds_map"; + case CEPH_MSG_FS_MAP_USER: return "fs_map_user"; case CEPH_MSG_CLIENT_SESSION: return "client_session"; case CEPH_MSG_CLIENT_RECONNECT: return "client_reconnect"; case CEPH_MSG_CLIENT_REQUEST: return "client_request"; @@ -79,8 +80,13 @@ const char *ceph_msg_type_name(int type) case CEPH_MSG_CLIENT_REPLY: return "client_reply"; case CEPH_MSG_CLIENT_CAPS: return "client_caps"; case CEPH_MSG_CLIENT_CAPRELEASE: return "client_cap_release"; + case CEPH_MSG_CLIENT_QUOTA: return "client_quota"; case CEPH_MSG_CLIENT_SNAP: return "client_snap"; case CEPH_MSG_CLIENT_LEASE: return "client_lease"; + case CEPH_MSG_POOLOP_REPLY: return "poolop_reply"; + case CEPH_MSG_POOLOP: return "poolop"; + case CEPH_MSG_MON_COMMAND: return "mon_command"; + case CEPH_MSG_MON_COMMAND_ACK: return "mon_command_ack"; case CEPH_MSG_OSD_MAP: return "osd_map"; case CEPH_MSG_OSD_OP: return "osd_op"; case CEPH_MSG_OSD_OPREPLY: return "osd_opreply"; @@ -217,7 +223,7 @@ static int parse_fsid(const char *str, struct ceph_fsid *fsid) if (i == 16) err = 0; - dout("parse_fsid ret %d got fsid %pU", err, fsid); + dout("parse_fsid ret %d got fsid %pU\n", err, fsid); return err; } diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c index bf9d079cbafd..02172c408ff2 100644 --- a/net/ceph/crypto.c +++ b/net/ceph/crypto.c @@ -347,10 +347,12 @@ struct key_type key_type_ceph = { .destroy = ceph_key_destroy, }; -int ceph_crypto_init(void) { +int __init ceph_crypto_init(void) +{ return register_key_type(&key_type_ceph); } -void ceph_crypto_shutdown(void) { +void ceph_crypto_shutdown(void) +{ unregister_key_type(&key_type_ceph); } diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c index 1eef6806aa1a..02952605d121 100644 --- a/net/ceph/debugfs.c +++ b/net/ceph/debugfs.c @@ -389,7 +389,7 @@ CEPH_DEFINE_SHOW_FUNC(monc_show) CEPH_DEFINE_SHOW_FUNC(osdc_show) CEPH_DEFINE_SHOW_FUNC(client_options_show) -int ceph_debugfs_init(void) +int __init ceph_debugfs_init(void) { ceph_debugfs_dir = debugfs_create_dir("ceph", NULL); if (!ceph_debugfs_dir) @@ -418,7 +418,7 @@ int ceph_debugfs_client_init(struct ceph_client *client) goto out; client->monc.debugfs_file = debugfs_create_file("monc", - 0600, + 0400, client->debugfs_dir, client, &monc_show_fops); @@ -426,7 +426,7 @@ int ceph_debugfs_client_init(struct ceph_client *client) goto out; client->osdc.debugfs_file = debugfs_create_file("osdc", - 0600, + 0400, client->debugfs_dir, client, &osdc_show_fops); @@ -434,7 +434,7 @@ int ceph_debugfs_client_init(struct ceph_client *client) goto out; client->debugfs_monmap = debugfs_create_file("monmap", - 0600, + 0400, client->debugfs_dir, client, &monmap_show_fops); @@ -442,7 +442,7 @@ int ceph_debugfs_client_init(struct ceph_client *client) goto out; client->debugfs_osdmap = debugfs_create_file("osdmap", - 0600, + 0400, client->debugfs_dir, client, &osdmap_show_fops); @@ -450,7 +450,7 @@ int ceph_debugfs_client_init(struct ceph_client *client) goto out; client->debugfs_options = debugfs_create_file("client_options", - 0600, + 0400, client->debugfs_dir, client, &client_options_show_fops); @@ -477,7 +477,7 @@ void ceph_debugfs_client_cleanup(struct ceph_client *client) #else /* CONFIG_DEBUG_FS */ -int ceph_debugfs_init(void) +int __init ceph_debugfs_init(void) { return 0; } @@ -496,6 +496,3 @@ void ceph_debugfs_client_cleanup(struct ceph_client *client) } #endif /* CONFIG_DEBUG_FS */ - -EXPORT_SYMBOL(ceph_debugfs_init); -EXPORT_SYMBOL(ceph_debugfs_cleanup); diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 8a4d3758030b..fcb40c12b1f8 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -277,7 +277,7 @@ static void _ceph_msgr_exit(void) ceph_msgr_slab_exit(); } -int ceph_msgr_init(void) +int __init ceph_msgr_init(void) { if (ceph_msgr_slab_init()) return -ENOMEM; @@ -299,7 +299,6 @@ int ceph_msgr_init(void) return -ENOMEM; } -EXPORT_SYMBOL(ceph_msgr_init); void ceph_msgr_exit(void) { @@ -307,7 +306,6 @@ void ceph_msgr_exit(void) _ceph_msgr_exit(); } -EXPORT_SYMBOL(ceph_msgr_exit); void ceph_msgr_flush(void) { @@ -839,93 +837,112 @@ static void ceph_msg_data_bio_cursor_init(struct ceph_msg_data_cursor *cursor, size_t length) { struct ceph_msg_data *data = cursor->data; - struct bio *bio; + struct ceph_bio_iter *it = &cursor->bio_iter; - BUG_ON(data->type != CEPH_MSG_DATA_BIO); + cursor->resid = min_t(size_t, length, data->bio_length); + *it = data->bio_pos; + if (cursor->resid < it->iter.bi_size) + it->iter.bi_size = cursor->resid; - bio = data->bio; - BUG_ON(!bio); - - cursor->resid = min(length, data->bio_length); - cursor->bio = bio; - cursor->bvec_iter = bio->bi_iter; - cursor->last_piece = - cursor->resid <= bio_iter_len(bio, cursor->bvec_iter); + BUG_ON(cursor->resid < bio_iter_len(it->bio, it->iter)); + cursor->last_piece = cursor->resid == bio_iter_len(it->bio, it->iter); } static struct page *ceph_msg_data_bio_next(struct ceph_msg_data_cursor *cursor, size_t *page_offset, size_t *length) { - struct ceph_msg_data *data = cursor->data; - struct bio *bio; - struct bio_vec bio_vec; - - BUG_ON(data->type != CEPH_MSG_DATA_BIO); - - bio = cursor->bio; - BUG_ON(!bio); - - bio_vec = bio_iter_iovec(bio, cursor->bvec_iter); - - *page_offset = (size_t) bio_vec.bv_offset; - BUG_ON(*page_offset >= PAGE_SIZE); - if (cursor->last_piece) /* pagelist offset is always 0 */ - *length = cursor->resid; - else - *length = (size_t) bio_vec.bv_len; - BUG_ON(*length > cursor->resid); - BUG_ON(*page_offset + *length > PAGE_SIZE); + struct bio_vec bv = bio_iter_iovec(cursor->bio_iter.bio, + cursor->bio_iter.iter); - return bio_vec.bv_page; + *page_offset = bv.bv_offset; + *length = bv.bv_len; + return bv.bv_page; } static bool ceph_msg_data_bio_advance(struct ceph_msg_data_cursor *cursor, size_t bytes) { - struct bio *bio; - struct bio_vec bio_vec; + struct ceph_bio_iter *it = &cursor->bio_iter; - BUG_ON(cursor->data->type != CEPH_MSG_DATA_BIO); + BUG_ON(bytes > cursor->resid); + BUG_ON(bytes > bio_iter_len(it->bio, it->iter)); + cursor->resid -= bytes; + bio_advance_iter(it->bio, &it->iter, bytes); - bio = cursor->bio; - BUG_ON(!bio); + if (!cursor->resid) { + BUG_ON(!cursor->last_piece); + return false; /* no more data */ + } - bio_vec = bio_iter_iovec(bio, cursor->bvec_iter); + if (!bytes || (it->iter.bi_size && it->iter.bi_bvec_done)) + return false; /* more bytes to process in this segment */ - /* Advance the cursor offset */ + if (!it->iter.bi_size) { + it->bio = it->bio->bi_next; + it->iter = it->bio->bi_iter; + if (cursor->resid < it->iter.bi_size) + it->iter.bi_size = cursor->resid; + } - BUG_ON(cursor->resid < bytes); - cursor->resid -= bytes; + BUG_ON(cursor->last_piece); + BUG_ON(cursor->resid < bio_iter_len(it->bio, it->iter)); + cursor->last_piece = cursor->resid == bio_iter_len(it->bio, it->iter); + return true; +} +#endif /* CONFIG_BLOCK */ - bio_advance_iter(bio, &cursor->bvec_iter, bytes); +static void ceph_msg_data_bvecs_cursor_init(struct ceph_msg_data_cursor *cursor, + size_t length) +{ + struct ceph_msg_data *data = cursor->data; + struct bio_vec *bvecs = data->bvec_pos.bvecs; - if (bytes < bio_vec.bv_len) - return false; /* more bytes to process in this segment */ + cursor->resid = min_t(size_t, length, data->bvec_pos.iter.bi_size); + cursor->bvec_iter = data->bvec_pos.iter; + cursor->bvec_iter.bi_size = cursor->resid; - /* Move on to the next segment, and possibly the next bio */ + BUG_ON(cursor->resid < bvec_iter_len(bvecs, cursor->bvec_iter)); + cursor->last_piece = + cursor->resid == bvec_iter_len(bvecs, cursor->bvec_iter); +} - if (!cursor->bvec_iter.bi_size) { - bio = bio->bi_next; - cursor->bio = bio; - if (bio) - cursor->bvec_iter = bio->bi_iter; - else - memset(&cursor->bvec_iter, 0, - sizeof(cursor->bvec_iter)); - } +static struct page *ceph_msg_data_bvecs_next(struct ceph_msg_data_cursor *cursor, + size_t *page_offset, + size_t *length) +{ + struct bio_vec bv = bvec_iter_bvec(cursor->data->bvec_pos.bvecs, + cursor->bvec_iter); + + *page_offset = bv.bv_offset; + *length = bv.bv_len; + return bv.bv_page; +} + +static bool ceph_msg_data_bvecs_advance(struct ceph_msg_data_cursor *cursor, + size_t bytes) +{ + struct bio_vec *bvecs = cursor->data->bvec_pos.bvecs; + + BUG_ON(bytes > cursor->resid); + BUG_ON(bytes > bvec_iter_len(bvecs, cursor->bvec_iter)); + cursor->resid -= bytes; + bvec_iter_advance(bvecs, &cursor->bvec_iter, bytes); - if (!cursor->last_piece) { - BUG_ON(!cursor->resid); - BUG_ON(!bio); - /* A short read is OK, so use <= rather than == */ - if (cursor->resid <= bio_iter_len(bio, cursor->bvec_iter)) - cursor->last_piece = true; + if (!cursor->resid) { + BUG_ON(!cursor->last_piece); + return false; /* no more data */ } + if (!bytes || cursor->bvec_iter.bi_bvec_done) + return false; /* more bytes to process in this segment */ + + BUG_ON(cursor->last_piece); + BUG_ON(cursor->resid < bvec_iter_len(bvecs, cursor->bvec_iter)); + cursor->last_piece = + cursor->resid == bvec_iter_len(bvecs, cursor->bvec_iter); return true; } -#endif /* CONFIG_BLOCK */ /* * For a page array, a piece comes from the first page in the array @@ -1110,6 +1127,9 @@ static void __ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor) ceph_msg_data_bio_cursor_init(cursor, length); break; #endif /* CONFIG_BLOCK */ + case CEPH_MSG_DATA_BVECS: + ceph_msg_data_bvecs_cursor_init(cursor, length); + break; case CEPH_MSG_DATA_NONE: default: /* BUG(); */ @@ -1158,14 +1178,19 @@ static struct page *ceph_msg_data_next(struct ceph_msg_data_cursor *cursor, page = ceph_msg_data_bio_next(cursor, page_offset, length); break; #endif /* CONFIG_BLOCK */ + case CEPH_MSG_DATA_BVECS: + page = ceph_msg_data_bvecs_next(cursor, page_offset, length); + break; case CEPH_MSG_DATA_NONE: default: page = NULL; break; } + BUG_ON(!page); BUG_ON(*page_offset + *length > PAGE_SIZE); BUG_ON(!*length); + BUG_ON(*length > cursor->resid); if (last_piece) *last_piece = cursor->last_piece; @@ -1194,6 +1219,9 @@ static void ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor, new_piece = ceph_msg_data_bio_advance(cursor, bytes); break; #endif /* CONFIG_BLOCK */ + case CEPH_MSG_DATA_BVECS: + new_piece = ceph_msg_data_bvecs_advance(cursor, bytes); + break; case CEPH_MSG_DATA_NONE: default: BUG(); @@ -1575,13 +1603,18 @@ static int write_partial_message_data(struct ceph_connection *con) * been revoked, so use the zero page. */ crc = do_datacrc ? le32_to_cpu(msg->footer.data_crc) : 0; - while (cursor->resid) { + while (cursor->total_resid) { struct page *page; size_t page_offset; size_t length; bool last_piece; int ret; + if (!cursor->resid) { + ceph_msg_data_advance(cursor, 0); + continue; + } + page = ceph_msg_data_next(cursor, &page_offset, &length, &last_piece); ret = ceph_tcp_sendpage(con->sock, page, page_offset, @@ -2297,7 +2330,12 @@ static int read_partial_msg_data(struct ceph_connection *con) if (do_datacrc) crc = con->in_data_crc; - while (cursor->resid) { + while (cursor->total_resid) { + if (!cursor->resid) { + ceph_msg_data_advance(cursor, 0); + continue; + } + page = ceph_msg_data_next(cursor, &page_offset, &length, NULL); ret = ceph_tcp_recvpage(con->sock, page, page_offset, length); if (ret <= 0) { @@ -3262,16 +3300,14 @@ void ceph_msg_data_add_pagelist(struct ceph_msg *msg, EXPORT_SYMBOL(ceph_msg_data_add_pagelist); #ifdef CONFIG_BLOCK -void ceph_msg_data_add_bio(struct ceph_msg *msg, struct bio *bio, - size_t length) +void ceph_msg_data_add_bio(struct ceph_msg *msg, struct ceph_bio_iter *bio_pos, + u32 length) { struct ceph_msg_data *data; - BUG_ON(!bio); - data = ceph_msg_data_create(CEPH_MSG_DATA_BIO); BUG_ON(!data); - data->bio = bio; + data->bio_pos = *bio_pos; data->bio_length = length; list_add_tail(&data->links, &msg->data); @@ -3280,6 +3316,20 @@ void ceph_msg_data_add_bio(struct ceph_msg *msg, struct bio *bio, EXPORT_SYMBOL(ceph_msg_data_add_bio); #endif /* CONFIG_BLOCK */ +void ceph_msg_data_add_bvecs(struct ceph_msg *msg, + struct ceph_bvec_iter *bvec_pos) +{ + struct ceph_msg_data *data; + + data = ceph_msg_data_create(CEPH_MSG_DATA_BVECS); + BUG_ON(!data); + data->bvec_pos = *bvec_pos; + + list_add_tail(&data->links, &msg->data); + msg->data_length += bvec_pos->iter.bi_size; +} +EXPORT_SYMBOL(ceph_msg_data_add_bvecs); + /* * construct a new message with given type, size * the new msg has a ref count of 1. diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 1547107f4854..b3dac24412d3 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -60,7 +60,7 @@ struct ceph_monmap *ceph_monmap_decode(void *p, void *end) num_mon = ceph_decode_32(&p); ceph_decode_need(&p, end, num_mon*sizeof(m->mon_inst[0]), bad); - if (num_mon >= CEPH_MAX_MON) + if (num_mon > CEPH_MAX_MON) goto bad; m = kmalloc(sizeof(*m) + sizeof(m->mon_inst[0])*num_mon, GFP_NOFS); if (m == NULL) diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 2814dba5902d..ea2a6c9fb7ce 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -20,6 +20,7 @@ #include <linux/ceph/decode.h> #include <linux/ceph/auth.h> #include <linux/ceph/pagelist.h> +#include <linux/ceph/striper.h> #define OSD_OPREPLY_FRONT_LEN 512 @@ -103,13 +104,12 @@ static int calc_layout(struct ceph_file_layout *layout, u64 off, u64 *plen, u64 *objnum, u64 *objoff, u64 *objlen) { u64 orig_len = *plen; - int r; + u32 xlen; /* object extent? */ - r = ceph_calc_file_object_mapping(layout, off, orig_len, objnum, - objoff, objlen); - if (r < 0) - return r; + ceph_calc_file_object_mapping(layout, off, orig_len, objnum, + objoff, &xlen); + *objlen = xlen; if (*objlen < orig_len) { *plen = *objlen; dout(" skipping last %llu, final file extent %llu~%llu\n", @@ -117,7 +117,6 @@ static int calc_layout(struct ceph_file_layout *layout, u64 off, u64 *plen, } dout("calc_layout objnum=%llx %llu~%llu\n", *objnum, *objoff, *objlen); - return 0; } @@ -148,14 +147,22 @@ static void ceph_osd_data_pagelist_init(struct ceph_osd_data *osd_data, #ifdef CONFIG_BLOCK static void ceph_osd_data_bio_init(struct ceph_osd_data *osd_data, - struct bio *bio, size_t bio_length) + struct ceph_bio_iter *bio_pos, + u32 bio_length) { osd_data->type = CEPH_OSD_DATA_TYPE_BIO; - osd_data->bio = bio; + osd_data->bio_pos = *bio_pos; osd_data->bio_length = bio_length; } #endif /* CONFIG_BLOCK */ +static void ceph_osd_data_bvecs_init(struct ceph_osd_data *osd_data, + struct ceph_bvec_iter *bvec_pos) +{ + osd_data->type = CEPH_OSD_DATA_TYPE_BVECS; + osd_data->bvec_pos = *bvec_pos; +} + #define osd_req_op_data(oreq, whch, typ, fld) \ ({ \ struct ceph_osd_request *__oreq = (oreq); \ @@ -218,16 +225,29 @@ EXPORT_SYMBOL(osd_req_op_extent_osd_data_pagelist); #ifdef CONFIG_BLOCK void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req, - unsigned int which, struct bio *bio, size_t bio_length) + unsigned int which, + struct ceph_bio_iter *bio_pos, + u32 bio_length) { struct ceph_osd_data *osd_data; osd_data = osd_req_op_data(osd_req, which, extent, osd_data); - ceph_osd_data_bio_init(osd_data, bio, bio_length); + ceph_osd_data_bio_init(osd_data, bio_pos, bio_length); } EXPORT_SYMBOL(osd_req_op_extent_osd_data_bio); #endif /* CONFIG_BLOCK */ +void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req, + unsigned int which, + struct ceph_bvec_iter *bvec_pos) +{ + struct ceph_osd_data *osd_data; + + osd_data = osd_req_op_data(osd_req, which, extent, osd_data); + ceph_osd_data_bvecs_init(osd_data, bvec_pos); +} +EXPORT_SYMBOL(osd_req_op_extent_osd_data_bvec_pos); + static void osd_req_op_cls_request_info_pagelist( struct ceph_osd_request *osd_req, unsigned int which, struct ceph_pagelist *pagelist) @@ -265,6 +285,23 @@ void osd_req_op_cls_request_data_pages(struct ceph_osd_request *osd_req, } EXPORT_SYMBOL(osd_req_op_cls_request_data_pages); +void osd_req_op_cls_request_data_bvecs(struct ceph_osd_request *osd_req, + unsigned int which, + struct bio_vec *bvecs, u32 bytes) +{ + struct ceph_osd_data *osd_data; + struct ceph_bvec_iter it = { + .bvecs = bvecs, + .iter = { .bi_size = bytes }, + }; + + osd_data = osd_req_op_data(osd_req, which, cls, request_data); + ceph_osd_data_bvecs_init(osd_data, &it); + osd_req->r_ops[which].cls.indata_len += bytes; + osd_req->r_ops[which].indata_len += bytes; +} +EXPORT_SYMBOL(osd_req_op_cls_request_data_bvecs); + void osd_req_op_cls_response_data_pages(struct ceph_osd_request *osd_req, unsigned int which, struct page **pages, u64 length, u32 alignment, bool pages_from_pool, bool own_pages) @@ -290,6 +327,8 @@ static u64 ceph_osd_data_length(struct ceph_osd_data *osd_data) case CEPH_OSD_DATA_TYPE_BIO: return (u64)osd_data->bio_length; #endif /* CONFIG_BLOCK */ + case CEPH_OSD_DATA_TYPE_BVECS: + return osd_data->bvec_pos.iter.bi_size; default: WARN(true, "unrecognized data type %d\n", (int)osd_data->type); return 0; @@ -828,8 +867,10 @@ static void ceph_osdc_msg_data_add(struct ceph_msg *msg, ceph_msg_data_add_pagelist(msg, osd_data->pagelist); #ifdef CONFIG_BLOCK } else if (osd_data->type == CEPH_OSD_DATA_TYPE_BIO) { - ceph_msg_data_add_bio(msg, osd_data->bio, length); + ceph_msg_data_add_bio(msg, &osd_data->bio_pos, length); #endif + } else if (osd_data->type == CEPH_OSD_DATA_TYPE_BVECS) { + ceph_msg_data_add_bvecs(msg, &osd_data->bvec_pos); } else { BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_NONE); } @@ -5065,7 +5106,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, } EXPORT_SYMBOL(ceph_osdc_writepages); -int ceph_osdc_setup(void) +int __init ceph_osdc_setup(void) { size_t size = sizeof(struct ceph_osd_request) + CEPH_OSD_SLAB_OPS * sizeof(struct ceph_osd_req_op); @@ -5076,7 +5117,6 @@ int ceph_osdc_setup(void) return ceph_osd_request_cache ? 0 : -ENOMEM; } -EXPORT_SYMBOL(ceph_osdc_setup); void ceph_osdc_cleanup(void) { @@ -5084,7 +5124,6 @@ void ceph_osdc_cleanup(void) kmem_cache_destroy(ceph_osd_request_cache); ceph_osd_request_cache = NULL; } -EXPORT_SYMBOL(ceph_osdc_cleanup); /* * handle incoming message diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 0da27c66349a..9645ffd6acfb 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -4,7 +4,6 @@ #include <linux/module.h> #include <linux/slab.h> -#include <asm/div64.h> #include <linux/ceph/libceph.h> #include <linux/ceph/osdmap.h> @@ -2141,76 +2140,6 @@ bool ceph_osds_changed(const struct ceph_osds *old_acting, } /* - * calculate file layout from given offset, length. - * fill in correct oid, logical length, and object extent - * offset, length. - * - * for now, we write only a single su, until we can - * pass a stride back to the caller. - */ -int ceph_calc_file_object_mapping(struct ceph_file_layout *layout, - u64 off, u64 len, - u64 *ono, - u64 *oxoff, u64 *oxlen) -{ - u32 osize = layout->object_size; - u32 su = layout->stripe_unit; - u32 sc = layout->stripe_count; - u32 bl, stripeno, stripepos, objsetno; - u32 su_per_object; - u64 t, su_offset; - - dout("mapping %llu~%llu osize %u fl_su %u\n", off, len, - osize, su); - if (su == 0 || sc == 0) - goto invalid; - su_per_object = osize / su; - if (su_per_object == 0) - goto invalid; - dout("osize %u / su %u = su_per_object %u\n", osize, su, - su_per_object); - - if ((su & ~PAGE_MASK) != 0) - goto invalid; - - /* bl = *off / su; */ - t = off; - do_div(t, su); - bl = t; - dout("off %llu / su %u = bl %u\n", off, su, bl); - - stripeno = bl / sc; - stripepos = bl % sc; - objsetno = stripeno / su_per_object; - - *ono = objsetno * sc + stripepos; - dout("objset %u * sc %u = ono %u\n", objsetno, sc, (unsigned int)*ono); - - /* *oxoff = *off % layout->fl_stripe_unit; # offset in su */ - t = off; - su_offset = do_div(t, su); - *oxoff = su_offset + (stripeno % su_per_object) * su; - - /* - * Calculate the length of the extent being written to the selected - * object. This is the minimum of the full length requested (len) or - * the remainder of the current stripe being written to. - */ - *oxlen = min_t(u64, len, su - su_offset); - - dout(" obj extent %llu~%llu\n", *oxoff, *oxlen); - return 0; - -invalid: - dout(" invalid layout\n"); - *ono = 0; - *oxoff = 0; - *oxlen = 0; - return -EINVAL; -} -EXPORT_SYMBOL(ceph_calc_file_object_mapping); - -/* * Map an object into a PG. * * Should only be called with target_oid and target_oloc (as opposed to diff --git a/net/ceph/striper.c b/net/ceph/striper.c new file mode 100644 index 000000000000..c36462dc86b7 --- /dev/null +++ b/net/ceph/striper.c @@ -0,0 +1,261 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include <linux/ceph/ceph_debug.h> + +#include <linux/math64.h> +#include <linux/slab.h> + +#include <linux/ceph/striper.h> +#include <linux/ceph/types.h> + +/* + * Map a file extent to a stripe unit within an object. + * Fill in objno, offset into object, and object extent length (i.e. the + * number of bytes mapped, less than or equal to @l->stripe_unit). + * + * Example for stripe_count = 3, stripes_per_object = 4: + * + * blockno | 0 3 6 9 | 1 4 7 10 | 2 5 8 11 | 12 15 18 21 | 13 16 19 + * stripeno | 0 1 2 3 | 0 1 2 3 | 0 1 2 3 | 4 5 6 7 | 4 5 6 + * stripepos | 0 | 1 | 2 | 0 | 1 + * objno | 0 | 1 | 2 | 3 | 4 + * objsetno | 0 | 1 + */ +void ceph_calc_file_object_mapping(struct ceph_file_layout *l, + u64 off, u64 len, + u64 *objno, u64 *objoff, u32 *xlen) +{ + u32 stripes_per_object = l->object_size / l->stripe_unit; + u64 blockno; /* which su in the file (i.e. globally) */ + u32 blockoff; /* offset into su */ + u64 stripeno; /* which stripe */ + u32 stripepos; /* which su in the stripe, + which object in the object set */ + u64 objsetno; /* which object set */ + u32 objsetpos; /* which stripe in the object set */ + + blockno = div_u64_rem(off, l->stripe_unit, &blockoff); + stripeno = div_u64_rem(blockno, l->stripe_count, &stripepos); + objsetno = div_u64_rem(stripeno, stripes_per_object, &objsetpos); + + *objno = objsetno * l->stripe_count + stripepos; + *objoff = objsetpos * l->stripe_unit + blockoff; + *xlen = min_t(u64, len, l->stripe_unit - blockoff); +} +EXPORT_SYMBOL(ceph_calc_file_object_mapping); + +/* + * Return the last extent with given objno (@object_extents is sorted + * by objno). If not found, return NULL and set @add_pos so that the + * new extent can be added with list_add(add_pos, new_ex). + */ +static struct ceph_object_extent * +lookup_last(struct list_head *object_extents, u64 objno, + struct list_head **add_pos) +{ + struct list_head *pos; + + list_for_each_prev(pos, object_extents) { + struct ceph_object_extent *ex = + list_entry(pos, typeof(*ex), oe_item); + + if (ex->oe_objno == objno) + return ex; + + if (ex->oe_objno < objno) + break; + } + + *add_pos = pos; + return NULL; +} + +static struct ceph_object_extent * +lookup_containing(struct list_head *object_extents, u64 objno, + u64 objoff, u32 xlen) +{ + struct ceph_object_extent *ex; + + list_for_each_entry(ex, object_extents, oe_item) { + if (ex->oe_objno == objno && + ex->oe_off <= objoff && + ex->oe_off + ex->oe_len >= objoff + xlen) /* paranoia */ + return ex; + + if (ex->oe_objno > objno) + break; + } + + return NULL; +} + +/* + * Map a file extent to a sorted list of object extents. + * + * We want only one (or as few as possible) object extents per object. + * Adjacent object extents will be merged together, each returned object + * extent may reverse map to multiple different file extents. + * + * Call @alloc_fn for each new object extent and @action_fn for each + * mapped stripe unit, whether it was merged into an already allocated + * object extent or started a new object extent. + * + * Newly allocated object extents are added to @object_extents. + * To keep @object_extents sorted, successive calls to this function + * must map successive file extents (i.e. the list of file extents that + * are mapped using the same @object_extents must be sorted). + * + * The caller is responsible for @object_extents. + */ +int ceph_file_to_extents(struct ceph_file_layout *l, u64 off, u64 len, + struct list_head *object_extents, + struct ceph_object_extent *alloc_fn(void *arg), + void *alloc_arg, + ceph_object_extent_fn_t action_fn, + void *action_arg) +{ + struct ceph_object_extent *last_ex, *ex; + + while (len) { + struct list_head *add_pos = NULL; + u64 objno, objoff; + u32 xlen; + + ceph_calc_file_object_mapping(l, off, len, &objno, &objoff, + &xlen); + + last_ex = lookup_last(object_extents, objno, &add_pos); + if (!last_ex || last_ex->oe_off + last_ex->oe_len != objoff) { + ex = alloc_fn(alloc_arg); + if (!ex) + return -ENOMEM; + + ex->oe_objno = objno; + ex->oe_off = objoff; + ex->oe_len = xlen; + if (action_fn) + action_fn(ex, xlen, action_arg); + + if (!last_ex) + list_add(&ex->oe_item, add_pos); + else + list_add(&ex->oe_item, &last_ex->oe_item); + } else { + last_ex->oe_len += xlen; + if (action_fn) + action_fn(last_ex, xlen, action_arg); + } + + off += xlen; + len -= xlen; + } + + for (last_ex = list_first_entry(object_extents, typeof(*ex), oe_item), + ex = list_next_entry(last_ex, oe_item); + &ex->oe_item != object_extents; + last_ex = ex, ex = list_next_entry(ex, oe_item)) { + if (last_ex->oe_objno > ex->oe_objno || + (last_ex->oe_objno == ex->oe_objno && + last_ex->oe_off + last_ex->oe_len >= ex->oe_off)) { + WARN(1, "%s: object_extents list not sorted!\n", + __func__); + return -EINVAL; + } + } + + return 0; +} +EXPORT_SYMBOL(ceph_file_to_extents); + +/* + * A stripped down, non-allocating version of ceph_file_to_extents(), + * for when @object_extents is already populated. + */ +int ceph_iterate_extents(struct ceph_file_layout *l, u64 off, u64 len, + struct list_head *object_extents, + ceph_object_extent_fn_t action_fn, + void *action_arg) +{ + while (len) { + struct ceph_object_extent *ex; + u64 objno, objoff; + u32 xlen; + + ceph_calc_file_object_mapping(l, off, len, &objno, &objoff, + &xlen); + + ex = lookup_containing(object_extents, objno, objoff, xlen); + if (!ex) { + WARN(1, "%s: objno %llu %llu~%u not found!\n", + __func__, objno, objoff, xlen); + return -EINVAL; + } + + action_fn(ex, xlen, action_arg); + + off += xlen; + len -= xlen; + } + + return 0; +} +EXPORT_SYMBOL(ceph_iterate_extents); + +/* + * Reverse map an object extent to a sorted list of file extents. + * + * On success, the caller is responsible for: + * + * kfree(file_extents) + */ +int ceph_extent_to_file(struct ceph_file_layout *l, + u64 objno, u64 objoff, u64 objlen, + struct ceph_file_extent **file_extents, + u32 *num_file_extents) +{ + u32 stripes_per_object = l->object_size / l->stripe_unit; + u64 blockno; /* which su */ + u32 blockoff; /* offset into su */ + u64 stripeno; /* which stripe */ + u32 stripepos; /* which su in the stripe, + which object in the object set */ + u64 objsetno; /* which object set */ + u32 i = 0; + + if (!objlen) { + *file_extents = NULL; + *num_file_extents = 0; + return 0; + } + + *num_file_extents = DIV_ROUND_UP_ULL(objoff + objlen, l->stripe_unit) - + DIV_ROUND_DOWN_ULL(objoff, l->stripe_unit); + *file_extents = kmalloc_array(*num_file_extents, sizeof(**file_extents), + GFP_NOIO); + if (!*file_extents) + return -ENOMEM; + + div_u64_rem(objoff, l->stripe_unit, &blockoff); + while (objlen) { + u64 off, len; + + objsetno = div_u64_rem(objno, l->stripe_count, &stripepos); + stripeno = div_u64(objoff, l->stripe_unit) + + objsetno * stripes_per_object; + blockno = stripeno * l->stripe_count + stripepos; + off = blockno * l->stripe_unit + blockoff; + len = min_t(u64, objlen, l->stripe_unit - blockoff); + + (*file_extents)[i].fe_off = off; + (*file_extents)[i].fe_len = len; + + blockoff = 0; + objoff += len; + objlen -= len; + i++; + } + + BUG_ON(i != *num_file_extents); + return 0; +} +EXPORT_SYMBOL(ceph_extent_to_file); |