diff options
Diffstat (limited to 'drivers/infiniband/ulp')
20 files changed, 496 insertions, 413 deletions
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 75cd44789661..44d8d151ff90 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -454,7 +454,7 @@ struct ipoib_neigh { struct list_head list; struct ipoib_neigh __rcu *hnext; struct rcu_head rcu; - atomic_t refcnt; + refcount_t refcnt; unsigned long alive; }; @@ -464,7 +464,7 @@ struct ipoib_neigh { void ipoib_neigh_dtor(struct ipoib_neigh *neigh); static inline void ipoib_neigh_put(struct ipoib_neigh *neigh) { - if (atomic_dec_and_test(&neigh->refcnt)) + if (refcount_dec_and_test(&neigh->refcnt)) ipoib_neigh_dtor(neigh); } struct ipoib_neigh *ipoib_neigh_get(struct net_device *dev, u8 *daddr); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 9dbc85a6b702..684c2ddb16f5 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1503,7 +1503,7 @@ static void ipoib_cm_stale_task(struct work_struct *work) spin_unlock_irq(&priv->lock); } -static ssize_t show_mode(struct device *d, struct device_attribute *attr, +static ssize_t mode_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_device *dev = to_net_dev(d); @@ -1515,8 +1515,8 @@ static ssize_t show_mode(struct device *d, struct device_attribute *attr, return sysfs_emit(buf, "datagram\n"); } -static ssize_t set_mode(struct device *d, struct device_attribute *attr, - const char *buf, size_t count) +static ssize_t mode_store(struct device *d, struct device_attribute *attr, + const char *buf, size_t count) { struct net_device *dev = to_net_dev(d); int ret; @@ -1542,7 +1542,7 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr, return (!ret || ret == -EBUSY) ? count : ret; } -static DEVICE_ATTR(mode, S_IWUSR | S_IRUGO, show_mode, set_mode); +static DEVICE_ATTR_RW(mode); int ipoib_cm_add_mode_attr(struct net_device *dev) { diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index bbb18087fdab..abf60f4d9203 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -316,7 +316,7 @@ static bool ipoib_is_dev_match_addr_rcu(const struct sockaddr *addr, return false; } -/** +/* * Find the master net_device on top of the given net_device. * @dev: base IPoIB net_device * @@ -361,8 +361,9 @@ static int ipoib_upper_walk(struct net_device *upper, } /** - * Find a net_device matching the given address, which is an upper device of - * the given net_device. + * ipoib_get_net_dev_match_addr - Find a net_device matching + * the given address, which is an upper device of the given net_device. + * * @addr: IP address to look for. * @dev: base IPoIB net_device * @@ -1287,7 +1288,7 @@ struct ipoib_neigh *ipoib_neigh_get(struct net_device *dev, u8 *daddr) neigh = rcu_dereference_bh(neigh->hnext)) { if (memcmp(daddr, neigh->daddr, INFINIBAND_ALEN) == 0) { /* found, take one ref on behalf of the caller */ - if (!atomic_inc_not_zero(&neigh->refcnt)) { + if (!refcount_inc_not_zero(&neigh->refcnt)) { /* deleted */ neigh = NULL; goto out_unlock; @@ -1382,7 +1383,7 @@ static struct ipoib_neigh *ipoib_neigh_ctor(u8 *daddr, INIT_LIST_HEAD(&neigh->list); ipoib_cm_set(neigh, NULL); /* one ref on behalf of the caller */ - atomic_set(&neigh->refcnt, 1); + refcount_set(&neigh->refcnt, 1); return neigh; } @@ -1414,7 +1415,7 @@ struct ipoib_neigh *ipoib_neigh_alloc(u8 *daddr, lockdep_is_held(&priv->lock))) { if (memcmp(daddr, neigh->daddr, INFINIBAND_ALEN) == 0) { /* found, take one ref on behalf of the caller */ - if (!atomic_inc_not_zero(&neigh->refcnt)) { + if (!refcount_inc_not_zero(&neigh->refcnt)) { /* deleted */ neigh = NULL; break; @@ -1429,7 +1430,7 @@ struct ipoib_neigh *ipoib_neigh_alloc(u8 *daddr, goto out_unlock; /* one ref on behalf of the hash table */ - atomic_inc(&neigh->refcnt); + refcount_inc(&neigh->refcnt); neigh->alive = jiffies; /* put in hash */ rcu_assign_pointer(neigh->hnext, @@ -2268,18 +2269,18 @@ void ipoib_intf_free(struct net_device *dev) kfree(priv); } -static ssize_t show_pkey(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t pkey_show(struct device *dev, struct device_attribute *attr, + char *buf) { struct net_device *ndev = to_net_dev(dev); struct ipoib_dev_priv *priv = ipoib_priv(ndev); return sysfs_emit(buf, "0x%04x\n", priv->pkey); } -static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL); +static DEVICE_ATTR_RO(pkey); -static ssize_t show_umcast(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t umcast_show(struct device *dev, struct device_attribute *attr, + char *buf) { struct net_device *ndev = to_net_dev(dev); struct ipoib_dev_priv *priv = ipoib_priv(ndev); @@ -2300,9 +2301,8 @@ void ipoib_set_umcast(struct net_device *ndev, int umcast_val) clear_bit(IPOIB_FLAG_UMCAST, &priv->flags); } -static ssize_t set_umcast(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) +static ssize_t umcast_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) { unsigned long umcast_val = simple_strtoul(buf, NULL, 0); @@ -2310,7 +2310,7 @@ static ssize_t set_umcast(struct device *dev, return count; } -static DEVICE_ATTR(umcast, S_IWUSR | S_IRUGO, show_umcast, set_umcast); +static DEVICE_ATTR_RW(umcast); int ipoib_add_umcast_attr(struct net_device *dev) { @@ -2381,9 +2381,9 @@ static int ipoib_set_mac(struct net_device *dev, void *addr) return 0; } -static ssize_t create_child(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) +static ssize_t create_child_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) { int pkey; int ret; @@ -2398,11 +2398,11 @@ static ssize_t create_child(struct device *dev, return ret ? ret : count; } -static DEVICE_ATTR(create_child, S_IWUSR, NULL, create_child); +static DEVICE_ATTR_WO(create_child); -static ssize_t delete_child(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) +static ssize_t delete_child_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) { int pkey; int ret; @@ -2418,7 +2418,7 @@ static ssize_t delete_child(struct device *dev, return ret ? ret : count; } -static DEVICE_ATTR(delete_child, S_IWUSR, NULL, delete_child); +static DEVICE_ATTR_WO(delete_child); int ipoib_add_pkey_attr(struct net_device *dev) { diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index 5958840dbeed..0322dc75396f 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -40,7 +40,7 @@ #include "ipoib.h" -static ssize_t show_parent(struct device *d, struct device_attribute *attr, +static ssize_t parent_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_device *dev = to_net_dev(d); @@ -48,7 +48,7 @@ static ssize_t show_parent(struct device *d, struct device_attribute *attr, return sysfs_emit(buf, "%s\n", priv->parent->name); } -static DEVICE_ATTR(parent, S_IRUGO, show_parent, NULL); +static DEVICE_ATTR_RO(parent); static bool is_child_unique(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv) diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 136f6c4492e0..b44cbb8e84eb 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -761,7 +761,7 @@ void iser_conn_init(struct iser_conn *iser_conn) ib_conn->reg_cqe.done = iser_reg_comp; } - /** +/* * starts the process of connecting to the target * sleeps until the connection is established or rejected */ diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 18266f07c58d..636d590765f9 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -35,10 +35,10 @@ static const struct kernel_param_ops sg_tablesize_ops = { .get = param_get_int, }; -static int isert_sg_tablesize = ISCSI_ISER_DEF_SG_TABLESIZE; +static int isert_sg_tablesize = ISCSI_ISER_MIN_SG_TABLESIZE; module_param_cb(sg_tablesize, &sg_tablesize_ops, &isert_sg_tablesize, 0644); MODULE_PARM_DESC(sg_tablesize, - "Number of gather/scatter entries in a single scsi command, should >= 128 (default: 256, max: 4096)"); + "Number of gather/scatter entries in a single scsi command, should >= 128 (default: 128, max: 4096)"); static DEFINE_MUTEX(device_list_mutex); static LIST_HEAD(device_list); @@ -2231,6 +2231,16 @@ isert_setup_id(struct isert_np *isert_np) } isert_dbg("id %p context %p\n", id, id->context); + /* + * Allow both IPv4 and IPv6 sockets to bind a single port + * at the same time. + */ + ret = rdma_set_afonly(id, 1); + if (ret) { + isert_err("rdma_set_afonly() failed: %d\n", ret); + goto out_id; + } + ret = rdma_bind_addr(id, sa); if (ret) { isert_err("rdma_bind_addr() failed: %d\n", ret); @@ -2387,10 +2397,10 @@ accept_wait: spin_unlock_bh(&np->np_thread_lock); isert_dbg("np_thread_state %d\n", np->np_thread_state); - /** + /* * No point in stalling here when np_thread * is in state RESET/SHUTDOWN/EXIT - bail - **/ + */ return -ENODEV; } spin_unlock_bh(&np->np_thread_lock); diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h index 6c5af13db4e0..ca8cfebe26ca 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.h +++ b/drivers/infiniband/ulp/isert/ib_isert.h @@ -65,9 +65,6 @@ */ #define ISER_RX_SIZE (ISCSI_DEF_MAX_RECV_SEG_LEN + 1024) -/* Default I/O size is 1MB */ -#define ISCSI_ISER_DEF_SG_TABLESIZE 256 - /* Minimum I/O size is 512KB */ #define ISCSI_ISER_MIN_SG_TABLESIZE 128 diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c index 7d53d18a5004..4ee592ccf979 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c @@ -250,7 +250,6 @@ static ssize_t rtrs_clt_disconnect_store(struct kobject *kobj, const char *buf, size_t count) { struct rtrs_clt_sess *sess; - int ret; sess = container_of(kobj, struct rtrs_clt_sess, kobj); if (!sysfs_streq(buf, "1")) { @@ -258,9 +257,7 @@ static ssize_t rtrs_clt_disconnect_store(struct kobject *kobj, attr->attr.name, buf); return -EINVAL; } - ret = rtrs_clt_disconnect_from_sysfs(sess); - if (ret) - return ret; + rtrs_clt_close_conns(sess, true); return count; } diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index 0a794d748a7a..f2c40e50f25e 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -32,6 +32,8 @@ #define RTRS_RECONNECT_SEED 8 #define FIRST_CONN 0x01 +/* limit to 128 * 4k = 512k max IO */ +#define RTRS_MAX_SEGMENTS 128 MODULE_DESCRIPTION("RDMA Transport Client"); MODULE_LICENSE("GPL"); @@ -412,6 +414,7 @@ static void complete_rdma_req(struct rtrs_clt_io_req *req, int errno, req->inv_errno = errno; } + refcount_inc(&req->ref); err = rtrs_inv_rkey(req); if (unlikely(err)) { rtrs_err(con->c.sess, "Send INV WR key=%#x: %d\n", @@ -427,10 +430,14 @@ static void complete_rdma_req(struct rtrs_clt_io_req *req, int errno, return; } + if (!refcount_dec_and_test(&req->ref)) + return; } ib_dma_unmap_sg(sess->s.dev->ib_dev, req->sglist, req->sg_cnt, req->dir); } + if (!refcount_dec_and_test(&req->ref)) + return; if (sess->clt->mp_policy == MP_POLICY_MIN_INFLIGHT) atomic_dec(&sess->stats->inflight); @@ -438,10 +445,9 @@ static void complete_rdma_req(struct rtrs_clt_io_req *req, int errno, req->con = NULL; if (errno) { - rtrs_err_rl(con->c.sess, - "IO request failed: error=%d path=%s [%s:%u]\n", + rtrs_err_rl(con->c.sess, "IO request failed: error=%d path=%s [%s:%u] notify=%d\n", errno, kobject_name(&sess->kobj), sess->hca_name, - sess->hca_port); + sess->hca_port, notify); } if (notify) @@ -480,7 +486,7 @@ static int rtrs_post_send_rdma(struct rtrs_clt_con *con, return rtrs_iu_post_rdma_write_imm(&con->c, req->iu, &sge, 1, rbuf->rkey, rbuf->addr + off, - imm, flags, wr); + imm, flags, wr, NULL); } static void process_io_rsp(struct rtrs_clt_sess *sess, u32 msg_id, @@ -655,7 +661,6 @@ static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc) rtrs_err(con->c.sess, "rtrs_post_recv_empty(): %d\n", err); rtrs_rdma_error_recovery(con); - break; } break; case IB_WC_RECV: @@ -814,6 +819,9 @@ static struct rtrs_clt_sess *get_next_path_min_inflight(struct path_it *it) int inflight; list_for_each_entry_rcu(sess, &clt->paths_list, s.entry) { + if (unlikely(READ_ONCE(sess->state) != RTRS_CLT_CONNECTED)) + continue; + if (unlikely(!list_empty(raw_cpu_ptr(sess->mp_skip_entry)))) continue; @@ -913,7 +921,7 @@ static inline void path_it_deinit(struct path_it *it) } /** - * rtrs_clt_init_req() Initialize an rtrs_clt_io_req holding information + * rtrs_clt_init_req() - Initialize an rtrs_clt_io_req holding information * about an inflight IO. * The user buffer holding user control message (not data) is copied into * the corresponding buffer of rtrs_iu (req->iu->buf), which later on will @@ -954,6 +962,7 @@ static void rtrs_clt_init_req(struct rtrs_clt_io_req *req, req->need_inv = false; req->need_inv_comp = false; req->inv_errno = 0; + refcount_set(&req->ref, 1); iov_iter_kvec(&iter, READ, vec, 1, usr_len); len = _copy_from_iter(req->iu->buf, usr_len, &iter); @@ -997,9 +1006,10 @@ rtrs_clt_get_copy_req(struct rtrs_clt_sess *alive_sess, } static int rtrs_post_rdma_write_sg(struct rtrs_clt_con *con, - struct rtrs_clt_io_req *req, - struct rtrs_rbuf *rbuf, - u32 size, u32 imm) + struct rtrs_clt_io_req *req, + struct rtrs_rbuf *rbuf, bool fr_en, + u32 size, u32 imm, struct ib_send_wr *wr, + struct ib_send_wr *tail) { struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); struct ib_sge *sge = req->sge; @@ -1007,18 +1017,28 @@ static int rtrs_post_rdma_write_sg(struct rtrs_clt_con *con, struct scatterlist *sg; size_t num_sge; int i; - - for_each_sg(req->sglist, sg, req->sg_cnt, i) { - sge[i].addr = sg_dma_address(sg); - sge[i].length = sg_dma_len(sg); - sge[i].lkey = sess->s.dev->ib_pd->local_dma_lkey; + struct ib_send_wr *ptail = NULL; + + if (fr_en) { + i = 0; + sge[i].addr = req->mr->iova; + sge[i].length = req->mr->length; + sge[i].lkey = req->mr->lkey; + i++; + num_sge = 2; + ptail = tail; + } else { + for_each_sg(req->sglist, sg, req->sg_cnt, i) { + sge[i].addr = sg_dma_address(sg); + sge[i].length = sg_dma_len(sg); + sge[i].lkey = sess->s.dev->ib_pd->local_dma_lkey; + } + num_sge = 1 + req->sg_cnt; } sge[i].addr = req->iu->dma_addr; sge[i].length = size; sge[i].lkey = sess->s.dev->ib_pd->local_dma_lkey; - num_sge = 1 + req->sg_cnt; - /* * From time to time we have to post signalled sends, * or send queue will fill up and only QP reset can help. @@ -1031,7 +1051,22 @@ static int rtrs_post_rdma_write_sg(struct rtrs_clt_con *con, return rtrs_iu_post_rdma_write_imm(&con->c, req->iu, sge, num_sge, rbuf->rkey, rbuf->addr, imm, - flags, NULL); + flags, wr, ptail); +} + +static int rtrs_map_sg_fr(struct rtrs_clt_io_req *req, size_t count) +{ + int nr; + + /* Align the MR to a 4K page size to match the block virt boundary */ + nr = ib_map_mr_sg(req->mr, req->sglist, count, NULL, SZ_4K); + if (nr < 0) + return nr; + if (unlikely(nr < req->sg_cnt)) + return -EINVAL; + ib_update_fast_reg_key(req->mr, ib_inc_rkey(req->mr->rkey)); + + return nr; } static int rtrs_clt_write_req(struct rtrs_clt_io_req *req) @@ -1044,6 +1079,10 @@ static int rtrs_clt_write_req(struct rtrs_clt_io_req *req) struct rtrs_rbuf *rbuf; int ret, count = 0; u32 imm, buf_id; + struct ib_reg_wr rwr; + struct ib_send_wr inv_wr; + struct ib_send_wr *wr = NULL; + bool fr_en = false; const size_t tsize = sizeof(*msg) + req->data_len + req->usr_len; @@ -1072,15 +1111,43 @@ static int rtrs_clt_write_req(struct rtrs_clt_io_req *req) req->sg_size = tsize; rbuf = &sess->rbufs[buf_id]; + if (count) { + ret = rtrs_map_sg_fr(req, count); + if (ret < 0) { + rtrs_err_rl(s, + "Write request failed, failed to map fast reg. data, err: %d\n", + ret); + ib_dma_unmap_sg(sess->s.dev->ib_dev, req->sglist, + req->sg_cnt, req->dir); + return ret; + } + inv_wr = (struct ib_send_wr) { + .opcode = IB_WR_LOCAL_INV, + .wr_cqe = &req->inv_cqe, + .send_flags = IB_SEND_SIGNALED, + .ex.invalidate_rkey = req->mr->rkey, + }; + req->inv_cqe.done = rtrs_clt_inv_rkey_done; + rwr = (struct ib_reg_wr) { + .wr.opcode = IB_WR_REG_MR, + .wr.wr_cqe = &fast_reg_cqe, + .mr = req->mr, + .key = req->mr->rkey, + .access = (IB_ACCESS_LOCAL_WRITE), + }; + wr = &rwr.wr; + fr_en = true; + refcount_inc(&req->ref); + } /* * Update stats now, after request is successfully sent it is not * safe anymore to touch it. */ rtrs_clt_update_all_stats(req, WRITE); - ret = rtrs_post_rdma_write_sg(req->con, req, rbuf, - req->usr_len + sizeof(*msg), - imm); + ret = rtrs_post_rdma_write_sg(req->con, req, rbuf, fr_en, + req->usr_len + sizeof(*msg), + imm, wr, &inv_wr); if (unlikely(ret)) { rtrs_err_rl(s, "Write request failed: error=%d path=%s [%s:%u]\n", @@ -1096,21 +1163,6 @@ static int rtrs_clt_write_req(struct rtrs_clt_io_req *req) return ret; } -static int rtrs_map_sg_fr(struct rtrs_clt_io_req *req, size_t count) -{ - int nr; - - /* Align the MR to a 4K page size to match the block virt boundary */ - nr = ib_map_mr_sg(req->mr, req->sglist, count, NULL, SZ_4K); - if (nr < 0) - return nr; - if (unlikely(nr < req->sg_cnt)) - return -EINVAL; - ib_update_fast_reg_key(req->mr, ib_inc_rkey(req->mr->rkey)); - - return nr; -} - static int rtrs_clt_read_req(struct rtrs_clt_io_req *req) { struct rtrs_clt_con *con = req->con; @@ -1219,7 +1271,7 @@ static int rtrs_clt_read_req(struct rtrs_clt_io_req *req) } /** - * rtrs_clt_failover_req() Try to find an active path for a failed request + * rtrs_clt_failover_req() - Try to find an active path for a failed request * @clt: clt context * @fail_req: a failed io request. */ @@ -1305,7 +1357,6 @@ static void free_sess_reqs(struct rtrs_clt_sess *sess) static int alloc_sess_reqs(struct rtrs_clt_sess *sess) { struct rtrs_clt_io_req *req; - struct rtrs_clt *clt = sess->clt; int i, err = -ENOMEM; sess->reqs = kcalloc(sess->queue_depth, sizeof(*sess->reqs), @@ -1322,8 +1373,7 @@ static int alloc_sess_reqs(struct rtrs_clt_sess *sess) if (!req->iu) goto out; - req->sge = kmalloc_array(clt->max_segments + 1, - sizeof(*req->sge), GFP_KERNEL); + req->sge = kcalloc(2, sizeof(*req->sge), GFP_KERNEL); if (!req->sge) goto out; @@ -1415,7 +1465,8 @@ static void query_fast_reg_mode(struct rtrs_clt_sess *sess) sess->max_pages_per_mr = min3(sess->max_pages_per_mr, (u32)max_pages_per_mr, ib_dev->attrs.max_fast_reg_page_list_len); - sess->max_send_sge = ib_dev->attrs.max_send_sge; + sess->clt->max_segments = + min(sess->max_pages_per_mr, sess->clt->max_segments); } static bool rtrs_clt_change_state_get_old(struct rtrs_clt_sess *sess, @@ -1449,23 +1500,12 @@ static void rtrs_clt_init_hb(struct rtrs_clt_sess *sess) rtrs_wq); } -static void rtrs_clt_start_hb(struct rtrs_clt_sess *sess) -{ - rtrs_start_hb(&sess->s); -} - -static void rtrs_clt_stop_hb(struct rtrs_clt_sess *sess) -{ - rtrs_stop_hb(&sess->s); -} - static void rtrs_clt_reconnect_work(struct work_struct *work); static void rtrs_clt_close_work(struct work_struct *work); static struct rtrs_clt_sess *alloc_sess(struct rtrs_clt *clt, - const struct rtrs_addr *path, - size_t con_num, u16 max_segments, - u32 nr_poll_queues) + const struct rtrs_addr *path, + size_t con_num, u32 nr_poll_queues) { struct rtrs_clt_sess *sess; int err = -ENOMEM; @@ -1505,9 +1545,9 @@ static struct rtrs_clt_sess *alloc_sess(struct rtrs_clt *clt, if (path->src) memcpy(&sess->s.src_addr, path->src, rdma_addr_size((struct sockaddr *)path->src)); - strlcpy(sess->s.sessname, clt->sessname, sizeof(sess->s.sessname)); + strscpy(sess->s.sessname, clt->sessname, sizeof(sess->s.sessname)); sess->clt = clt; - sess->max_pages_per_mr = max_segments; + sess->max_pages_per_mr = RTRS_MAX_SEGMENTS; init_waitqueue_head(&sess->state_wq); sess->state = RTRS_CLT_CONNECTING; atomic_set(&sess->connected_cnt, 0); @@ -1581,20 +1621,13 @@ static void destroy_con(struct rtrs_clt_con *con) static int create_con_cq_qp(struct rtrs_clt_con *con) { struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); - u32 max_send_wr, max_recv_wr, cq_size; + u32 max_send_wr, max_recv_wr, cq_num, max_send_sge, wr_limit; int err, cq_vector; struct rtrs_msg_rkey_rsp *rsp; lockdep_assert_held(&con->con_mutex); if (con->c.cid == 0) { - /* - * One completion for each receive and two for each send - * (send request + registration) - * + 2 for drain and heartbeat - * in case qp gets into error state - */ - max_send_wr = SERVICE_CON_QUEUE_DEPTH * 2 + 2; - max_recv_wr = SERVICE_CON_QUEUE_DEPTH * 2 + 2; + max_send_sge = 1; /* We must be the first here */ if (WARN_ON(sess->s.dev)) return -EINVAL; @@ -1613,6 +1646,17 @@ static int create_con_cq_qp(struct rtrs_clt_con *con) } sess->s.dev_ref = 1; query_fast_reg_mode(sess); + wr_limit = sess->s.dev->ib_dev->attrs.max_qp_wr; + /* + * Two (request + registration) completion for send + * Two for recv if always_invalidate is set on server + * or one for recv. + * + 2 for drain and heartbeat + * in case qp gets into error state. + */ + max_send_wr = + min_t(int, wr_limit, SERVICE_CON_QUEUE_DEPTH * 2 + 2); + max_recv_wr = max_send_wr; } else { /* * Here we assume that session members are correctly set. @@ -1624,35 +1668,36 @@ static int create_con_cq_qp(struct rtrs_clt_con *con) if (WARN_ON(!sess->queue_depth)) return -EINVAL; + wr_limit = sess->s.dev->ib_dev->attrs.max_qp_wr; /* Shared between connections */ sess->s.dev_ref++; - max_send_wr = - min_t(int, sess->s.dev->ib_dev->attrs.max_qp_wr, + max_send_wr = min_t(int, wr_limit, /* QD * (REQ + RSP + FR REGS or INVS) + drain */ sess->queue_depth * 3 + 1); - max_recv_wr = - min_t(int, sess->s.dev->ib_dev->attrs.max_qp_wr, + max_recv_wr = min_t(int, wr_limit, sess->queue_depth * 3 + 1); + max_send_sge = 2; } + cq_num = max_send_wr + max_recv_wr; /* alloc iu to recv new rkey reply when server reports flags set */ if (sess->flags & RTRS_MSG_NEW_RKEY_F || con->c.cid == 0) { - con->rsp_ius = rtrs_iu_alloc(max_recv_wr, sizeof(*rsp), + con->rsp_ius = rtrs_iu_alloc(cq_num, sizeof(*rsp), GFP_KERNEL, sess->s.dev->ib_dev, DMA_FROM_DEVICE, rtrs_clt_rdma_done); if (!con->rsp_ius) return -ENOMEM; - con->queue_size = max_recv_wr; + con->queue_num = cq_num; } - cq_size = max_send_wr + max_recv_wr; + cq_num = max_send_wr + max_recv_wr; cq_vector = con->cpu % sess->s.dev->ib_dev->num_comp_vectors; if (con->c.cid >= sess->s.irq_con_num) - err = rtrs_cq_qp_create(&sess->s, &con->c, sess->max_send_sge, - cq_vector, cq_size, max_send_wr, + err = rtrs_cq_qp_create(&sess->s, &con->c, max_send_sge, + cq_vector, cq_num, max_send_wr, max_recv_wr, IB_POLL_DIRECT); else - err = rtrs_cq_qp_create(&sess->s, &con->c, sess->max_send_sge, - cq_vector, cq_size, max_send_wr, + err = rtrs_cq_qp_create(&sess->s, &con->c, max_send_sge, + cq_vector, cq_num, max_send_wr, max_recv_wr, IB_POLL_SOFTIRQ); /* * In case of error we do not bother to clean previous allocations, @@ -1672,9 +1717,9 @@ static void destroy_con_cq_qp(struct rtrs_clt_con *con) lockdep_assert_held(&con->con_mutex); rtrs_cq_qp_destroy(&con->c); if (con->rsp_ius) { - rtrs_iu_free(con->rsp_ius, sess->s.dev->ib_dev, con->queue_size); + rtrs_iu_free(con->rsp_ius, sess->s.dev->ib_dev, con->queue_num); con->rsp_ius = NULL; - con->queue_size = 0; + con->queue_num = 0; } if (sess->s.dev_ref && !--sess->s.dev_ref) { rtrs_ib_dev_put(sess->s.dev); @@ -1783,12 +1828,19 @@ static int rtrs_rdma_conn_established(struct rtrs_clt_con *con, if (con->c.cid == 0) { queue_depth = le16_to_cpu(msg->queue_depth); - if (queue_depth > MAX_SESS_QUEUE_DEPTH) { - rtrs_err(clt, "Invalid RTRS message: queue=%d\n", - queue_depth); + if (sess->queue_depth > 0 && queue_depth != sess->queue_depth) { + rtrs_err(clt, "Error: queue depth changed\n"); + + /* + * Stop any more reconnection attempts + */ + sess->reconnect_attempts = -1; + rtrs_err(clt, + "Disabling auto-reconnect. Trigger a manual reconnect after issue is resolved\n"); return -ECONNRESET; } - if (!sess->rbufs || sess->queue_depth < queue_depth) { + + if (!sess->rbufs) { kfree(sess->rbufs); sess->rbufs = kcalloc(queue_depth, sizeof(*sess->rbufs), GFP_KERNEL); @@ -1802,7 +1854,7 @@ static int rtrs_rdma_conn_established(struct rtrs_clt_con *con, sess->chunk_size = sess->max_io_size + sess->max_hdr_size; /* - * Global queue depth and IO size is always a minimum. + * Global IO size is always a minimum. * If while a reconnection server sends us a value a bit * higher - client does not care and uses cached minimum. * @@ -1810,8 +1862,7 @@ static int rtrs_rdma_conn_established(struct rtrs_clt_con *con, * connections in parallel, use lock. */ mutex_lock(&clt->paths_mutex); - clt->queue_depth = min_not_zero(sess->queue_depth, - clt->queue_depth); + clt->queue_depth = sess->queue_depth; clt->max_io_size = min_not_zero(sess->max_io_size, clt->max_io_size); mutex_unlock(&clt->paths_mutex); @@ -1869,7 +1920,7 @@ static int rtrs_rdma_conn_rejected(struct rtrs_clt_con *con, return -ECONNRESET; } -static void rtrs_clt_close_conns(struct rtrs_clt_sess *sess, bool wait) +void rtrs_clt_close_conns(struct rtrs_clt_sess *sess, bool wait) { if (rtrs_clt_change_state_get_old(sess, RTRS_CLT_CLOSING, NULL)) queue_work(rtrs_wq, &sess->close_work); @@ -2098,7 +2149,7 @@ static void rtrs_clt_stop_and_destroy_conns(struct rtrs_clt_sess *sess) */ synchronize_rcu(); - rtrs_clt_stop_hb(sess); + rtrs_stop_hb(&sess->s); /* * The order it utterly crucial: firstly disconnect and complete all @@ -2291,7 +2342,7 @@ static int init_conns(struct rtrs_clt_sess *sess) if (err) goto destroy; - rtrs_clt_start_hb(sess); + rtrs_start_hb(&sess->s); return 0; @@ -2465,7 +2516,7 @@ static int rtrs_send_sess_info(struct rtrs_clt_sess *sess) int err; rx_sz = sizeof(struct rtrs_msg_info_rsp); - rx_sz += sizeof(u64) * MAX_SESS_QUEUE_DEPTH; + rx_sz += sizeof(struct rtrs_sg_desc) * sess->queue_depth; tx_iu = rtrs_iu_alloc(1, sizeof(struct rtrs_msg_info_req), GFP_KERNEL, sess->s.dev->ib_dev, DMA_TO_DEVICE, @@ -2617,7 +2668,6 @@ static struct rtrs_clt *alloc_clt(const char *sessname, size_t paths_num, u16 port, size_t pdu_sz, void *priv, void (*link_ev)(void *priv, enum rtrs_clt_link_ev ev), - unsigned int max_segments, unsigned int reconnect_delay_sec, unsigned int max_reconnect_attempts) { @@ -2646,13 +2696,13 @@ static struct rtrs_clt *alloc_clt(const char *sessname, size_t paths_num, clt->paths_up = MAX_PATHS_NUM; clt->port = port; clt->pdu_sz = pdu_sz; - clt->max_segments = max_segments; + clt->max_segments = RTRS_MAX_SEGMENTS; clt->reconnect_delay_sec = reconnect_delay_sec; clt->max_reconnect_attempts = max_reconnect_attempts; clt->priv = priv; clt->link_ev = link_ev; clt->mp_policy = MP_POLICY_MIN_INFLIGHT; - strlcpy(clt->sessname, sessname, sizeof(clt->sessname)); + strscpy(clt->sessname, sessname, sizeof(clt->sessname)); init_waitqueue_head(&clt->permits_wait); mutex_init(&clt->paths_ev_mutex); mutex_init(&clt->paths_mutex); @@ -2715,7 +2765,6 @@ static void free_clt(struct rtrs_clt *clt) * @port: port to be used by the RTRS session * @pdu_sz: Size of extra payload which can be accessed after permit allocation. * @reconnect_delay_sec: time between reconnect tries - * @max_segments: Max. number of segments per IO request * @max_reconnect_attempts: Number of times to reconnect on error before giving * up, 0 for * disabled, -1 for forever * @nr_poll_queues: number of polling mode connection using IB_POLL_DIRECT flag @@ -2730,7 +2779,6 @@ struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops, const struct rtrs_addr *paths, size_t paths_num, u16 port, size_t pdu_sz, u8 reconnect_delay_sec, - u16 max_segments, s16 max_reconnect_attempts, u32 nr_poll_queues) { struct rtrs_clt_sess *sess, *tmp; @@ -2739,7 +2787,7 @@ struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops, clt = alloc_clt(sessname, paths_num, port, pdu_sz, ops->priv, ops->link_ev, - max_segments, reconnect_delay_sec, + reconnect_delay_sec, max_reconnect_attempts); if (IS_ERR(clt)) { err = PTR_ERR(clt); @@ -2749,7 +2797,7 @@ struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops, struct rtrs_clt_sess *sess; sess = alloc_sess(clt, &paths[i], nr_cpu_ids, - max_segments, nr_poll_queues); + nr_poll_queues); if (IS_ERR(sess)) { err = PTR_ERR(sess); goto close_all_sess; @@ -2762,6 +2810,8 @@ struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops, if (err) { list_del_rcu(&sess->s.entry); rtrs_clt_close_conns(sess, true); + free_percpu(sess->stats->pcpu_stats); + kfree(sess->stats); free_sess(sess); goto close_all_sess; } @@ -2770,6 +2820,8 @@ struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops, if (err) { list_del_rcu(&sess->s.entry); rtrs_clt_close_conns(sess, true); + free_percpu(sess->stats->pcpu_stats); + kfree(sess->stats); free_sess(sess); goto close_all_sess; } @@ -2841,13 +2893,6 @@ int rtrs_clt_reconnect_from_sysfs(struct rtrs_clt_sess *sess) return err; } -int rtrs_clt_disconnect_from_sysfs(struct rtrs_clt_sess *sess) -{ - rtrs_clt_close_conns(sess, true); - - return 0; -} - int rtrs_clt_remove_path_from_sysfs(struct rtrs_clt_sess *sess, const struct attribute *sysfs_self) { @@ -3014,6 +3059,7 @@ int rtrs_clt_query(struct rtrs_clt *clt, struct rtrs_attrs *attr) return -ECOMM; attr->queue_depth = clt->queue_depth; + attr->max_segments = clt->max_segments; /* Cap max_io_size to min of remote buffer size and the fr pages */ attr->max_io_size = min_t(int, clt->max_io_size, clt->max_segments * SZ_4K); @@ -3028,7 +3074,7 @@ int rtrs_clt_create_path_from_sysfs(struct rtrs_clt *clt, struct rtrs_clt_sess *sess; int err; - sess = alloc_sess(clt, addr, nr_cpu_ids, clt->max_segments, 0); + sess = alloc_sess(clt, addr, nr_cpu_ids, 0); if (IS_ERR(sess)) return PTR_ERR(sess); @@ -3052,6 +3098,8 @@ int rtrs_clt_create_path_from_sysfs(struct rtrs_clt *clt, close_sess: rtrs_clt_remove_path_from_arr(sess); rtrs_clt_close_conns(sess, true); + free_percpu(sess->stats->pcpu_stats); + kfree(sess->stats); free_sess(sess); return err; diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.h b/drivers/infiniband/ulp/rtrs/rtrs-clt.h index 4c52f30e4da1..e276a2dfcf7c 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.h +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.h @@ -71,7 +71,7 @@ struct rtrs_clt_stats { struct rtrs_clt_con { struct rtrs_con c; struct rtrs_iu *rsp_ius; - u32 queue_size; + u32 queue_num; unsigned int cpu; struct mutex con_mutex; atomic_t io_cnt; @@ -116,6 +116,7 @@ struct rtrs_clt_io_req { int inv_errno; bool need_inv_comp; bool need_inv; + refcount_t ref; }; struct rtrs_rbuf { @@ -141,7 +142,6 @@ struct rtrs_clt_sess { u32 chunk_size; size_t queue_depth; u32 max_pages_per_mr; - int max_send_sge; u32 flags; struct kobject kobj; u8 for_new_clt; @@ -202,7 +202,7 @@ static inline struct rtrs_permit *get_permit(struct rtrs_clt *clt, int idx) } int rtrs_clt_reconnect_from_sysfs(struct rtrs_clt_sess *sess); -int rtrs_clt_disconnect_from_sysfs(struct rtrs_clt_sess *sess); +void rtrs_clt_close_conns(struct rtrs_clt_sess *sess, bool wait); int rtrs_clt_create_path_from_sysfs(struct rtrs_clt *clt, struct rtrs_addr *addr); int rtrs_clt_remove_path_from_sysfs(struct rtrs_clt_sess *sess, diff --git a/drivers/infiniband/ulp/rtrs/rtrs-pri.h b/drivers/infiniband/ulp/rtrs/rtrs-pri.h index 86e65cf30cab..36f184a3b676 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-pri.h +++ b/drivers/infiniband/ulp/rtrs/rtrs-pri.h @@ -47,12 +47,16 @@ enum { MAX_PATHS_NUM = 128, /* - * With the size of struct rtrs_permit allocated on the client, 4K - * is the maximum number of rtrs_permits we can allocate. This number is - * also used on the client to allocate the IU for the user connection - * to receive the RDMA addresses from the server. + * Max IB immediate data size is 2^28 (MAX_IMM_PAYL_BITS) + * and the minimum chunk size is 4096 (2^12). + * So the maximum sess_queue_depth is 65536 (2^16) in theory. + * But mempool_create, create_qp and ib_post_send fail with + * "cannot allocate memory" error if sess_queue_depth is too big. + * Therefore the pratical max value of sess_queue_depth is + * somewhere between 1 and 65534 and it depends on the system. */ - MAX_SESS_QUEUE_DEPTH = 4096, + MAX_SESS_QUEUE_DEPTH = 65535, + MIN_CHUNK_SIZE = 8192, RTRS_HB_INTERVAL_MS = 5000, RTRS_HB_MISSED_MAX = 5, @@ -91,7 +95,7 @@ struct rtrs_con { struct ib_cq *cq; struct rdma_cm_id *cm_id; unsigned int cid; - u16 cq_size; + int nr_cqe; }; struct rtrs_sess { @@ -290,10 +294,10 @@ struct rtrs_msg_rdma_hdr { /* rtrs.c */ -struct rtrs_iu *rtrs_iu_alloc(u32 queue_size, size_t size, gfp_t t, +struct rtrs_iu *rtrs_iu_alloc(u32 queue_num, size_t size, gfp_t t, struct ib_device *dev, enum dma_data_direction, void (*done)(struct ib_cq *cq, struct ib_wc *wc)); -void rtrs_iu_free(struct rtrs_iu *iu, struct ib_device *dev, u32 queue_size); +void rtrs_iu_free(struct rtrs_iu *iu, struct ib_device *dev, u32 queue_num); int rtrs_iu_post_recv(struct rtrs_con *con, struct rtrs_iu *iu); int rtrs_iu_post_send(struct rtrs_con *con, struct rtrs_iu *iu, size_t size, struct ib_send_wr *head); @@ -301,15 +305,16 @@ int rtrs_iu_post_rdma_write_imm(struct rtrs_con *con, struct rtrs_iu *iu, struct ib_sge *sge, unsigned int num_sge, u32 rkey, u64 rdma_addr, u32 imm_data, enum ib_send_flags flags, - struct ib_send_wr *head); + struct ib_send_wr *head, + struct ib_send_wr *tail); int rtrs_post_recv_empty(struct rtrs_con *con, struct ib_cqe *cqe); int rtrs_post_rdma_write_imm_empty(struct rtrs_con *con, struct ib_cqe *cqe, u32 imm_data, enum ib_send_flags flags, struct ib_send_wr *head); -int rtrs_cq_qp_create(struct rtrs_sess *rtrs_sess, struct rtrs_con *con, - u32 max_send_sge, int cq_vector, int cq_size, +int rtrs_cq_qp_create(struct rtrs_sess *sess, struct rtrs_con *con, + u32 max_send_sge, int cq_vector, int nr_cqe, u32 max_send_wr, u32 max_recv_wr, enum ib_poll_context poll_ctx); void rtrs_cq_qp_destroy(struct rtrs_con *con); diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-stats.c b/drivers/infiniband/ulp/rtrs/rtrs-srv-stats.c index e102b1368d0c..12c374b5eb6e 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv-stats.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-stats.c @@ -27,12 +27,10 @@ ssize_t rtrs_srv_stats_rdma_to_str(struct rtrs_srv_stats *stats, char *page, size_t len) { struct rtrs_srv_stats_rdma_stats *r = &stats->rdma_stats; - struct rtrs_srv_sess *sess = stats->sess; - return scnprintf(page, len, "%lld %lld %lld %lld %u\n", - (s64)atomic64_read(&r->dir[READ].cnt), - (s64)atomic64_read(&r->dir[READ].size_total), - (s64)atomic64_read(&r->dir[WRITE].cnt), - (s64)atomic64_read(&r->dir[WRITE].size_total), - atomic_read(&sess->ids_inflight)); + return sysfs_emit(page, "%lld %lld %lld %lldn %u\n", + (s64)atomic64_read(&r->dir[READ].cnt), + (s64)atomic64_read(&r->dir[READ].size_total), + (s64)atomic64_read(&r->dir[WRITE].cnt), + (s64)atomic64_read(&r->dir[WRITE].size_total), 0); } diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c index a9288175fbb5..20efd44297fb 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c @@ -208,6 +208,7 @@ rtrs_srv_destroy_once_sysfs_root_folders(struct rtrs_srv_sess *sess) device_del(&srv->dev); put_device(&srv->dev); } else { + put_device(&srv->dev); mutex_unlock(&srv->paths_mutex); } } diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index 0fa116cabc44..3df290086169 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -67,59 +67,33 @@ static inline struct rtrs_srv_sess *to_srv_sess(struct rtrs_sess *s) return container_of(s, struct rtrs_srv_sess, s); } -static bool __rtrs_srv_change_state(struct rtrs_srv_sess *sess, - enum rtrs_srv_state new_state) +static bool rtrs_srv_change_state(struct rtrs_srv_sess *sess, + enum rtrs_srv_state new_state) { enum rtrs_srv_state old_state; bool changed = false; - lockdep_assert_held(&sess->state_lock); + spin_lock_irq(&sess->state_lock); old_state = sess->state; switch (new_state) { case RTRS_SRV_CONNECTED: - switch (old_state) { - case RTRS_SRV_CONNECTING: + if (old_state == RTRS_SRV_CONNECTING) changed = true; - fallthrough; - default: - break; - } break; case RTRS_SRV_CLOSING: - switch (old_state) { - case RTRS_SRV_CONNECTING: - case RTRS_SRV_CONNECTED: + if (old_state == RTRS_SRV_CONNECTING || + old_state == RTRS_SRV_CONNECTED) changed = true; - fallthrough; - default: - break; - } break; case RTRS_SRV_CLOSED: - switch (old_state) { - case RTRS_SRV_CLOSING: + if (old_state == RTRS_SRV_CLOSING) changed = true; - fallthrough; - default: - break; - } break; default: break; } if (changed) sess->state = new_state; - - return changed; -} - -static bool rtrs_srv_change_state(struct rtrs_srv_sess *sess, - enum rtrs_srv_state new_state) -{ - bool changed; - - spin_lock_irq(&sess->state_lock); - changed = __rtrs_srv_change_state(sess, new_state); spin_unlock_irq(&sess->state_lock); return changed; @@ -137,7 +111,6 @@ static void rtrs_srv_free_ops_ids(struct rtrs_srv_sess *sess) struct rtrs_srv *srv = sess->srv; int i; - WARN_ON(atomic_read(&sess->ids_inflight)); if (sess->ops_ids) { for (i = 0; i < srv->queue_depth; i++) free_id(sess->ops_ids[i]); @@ -152,11 +125,19 @@ static struct ib_cqe io_comp_cqe = { .done = rtrs_srv_rdma_done }; +static inline void rtrs_srv_inflight_ref_release(struct percpu_ref *ref) +{ + struct rtrs_srv_sess *sess = container_of(ref, struct rtrs_srv_sess, ids_inflight_ref); + + percpu_ref_exit(&sess->ids_inflight_ref); + complete(&sess->complete_done); +} + static int rtrs_srv_alloc_ops_ids(struct rtrs_srv_sess *sess) { struct rtrs_srv *srv = sess->srv; struct rtrs_srv_op *id; - int i; + int i, ret; sess->ops_ids = kcalloc(srv->queue_depth, sizeof(*sess->ops_ids), GFP_KERNEL); @@ -170,8 +151,14 @@ static int rtrs_srv_alloc_ops_ids(struct rtrs_srv_sess *sess) sess->ops_ids[i] = id; } - init_waitqueue_head(&sess->ids_waitq); - atomic_set(&sess->ids_inflight, 0); + + ret = percpu_ref_init(&sess->ids_inflight_ref, + rtrs_srv_inflight_ref_release, 0, GFP_KERNEL); + if (ret) { + pr_err("Percpu reference init failed\n"); + goto err; + } + init_completion(&sess->complete_done); return 0; @@ -182,21 +169,14 @@ err: static inline void rtrs_srv_get_ops_ids(struct rtrs_srv_sess *sess) { - atomic_inc(&sess->ids_inflight); + percpu_ref_get(&sess->ids_inflight_ref); } static inline void rtrs_srv_put_ops_ids(struct rtrs_srv_sess *sess) { - if (atomic_dec_and_test(&sess->ids_inflight)) - wake_up(&sess->ids_waitq); + percpu_ref_put(&sess->ids_inflight_ref); } -static void rtrs_srv_wait_ops_ids(struct rtrs_srv_sess *sess) -{ - wait_event(sess->ids_waitq, !atomic_read(&sess->ids_inflight)); -} - - static void rtrs_srv_reg_mr_done(struct ib_cq *cq, struct ib_wc *wc) { struct rtrs_srv_con *con = to_srv_con(wc->qp->qp_context); @@ -773,7 +753,40 @@ static void rtrs_srv_sess_down(struct rtrs_srv_sess *sess) mutex_unlock(&srv->paths_ev_mutex); } +static bool exist_sessname(struct rtrs_srv_ctx *ctx, + const char *sessname, const uuid_t *path_uuid) +{ + struct rtrs_srv *srv; + struct rtrs_srv_sess *sess; + bool found = false; + + mutex_lock(&ctx->srv_mutex); + list_for_each_entry(srv, &ctx->srv_list, ctx_list) { + mutex_lock(&srv->paths_mutex); + + /* when a client with same uuid and same sessname tried to add a path */ + if (uuid_equal(&srv->paths_uuid, path_uuid)) { + mutex_unlock(&srv->paths_mutex); + continue; + } + + list_for_each_entry(sess, &srv->paths_list, s.entry) { + if (strlen(sess->s.sessname) == strlen(sessname) && + !strcmp(sess->s.sessname, sessname)) { + found = true; + break; + } + } + mutex_unlock(&srv->paths_mutex); + if (found) + break; + } + mutex_unlock(&ctx->srv_mutex); + return found; +} + static int post_recv_sess(struct rtrs_srv_sess *sess); +static int rtrs_rdma_do_reject(struct rdma_cm_id *cm_id, int errno); static int process_info_req(struct rtrs_srv_con *con, struct rtrs_msg_info_req *msg) @@ -792,10 +805,17 @@ static int process_info_req(struct rtrs_srv_con *con, rtrs_err(s, "post_recv_sess(), err: %d\n", err); return err; } + + if (exist_sessname(sess->srv->ctx, + msg->sessname, &sess->srv->paths_uuid)) { + rtrs_err(s, "sessname is duplicated: %s\n", msg->sessname); + return -EPERM; + } + strscpy(sess->s.sessname, msg->sessname, sizeof(sess->s.sessname)); + rwr = kcalloc(sess->mrs_num, sizeof(*rwr), GFP_KERNEL); if (unlikely(!rwr)) return -ENOMEM; - strlcpy(sess->s.sessname, msg->sessname, sizeof(sess->s.sessname)); tx_sz = sizeof(*rsp); tx_sz += sizeof(rsp->desc[0]) * sess->mrs_num; @@ -1276,7 +1296,7 @@ int rtrs_srv_get_sess_name(struct rtrs_srv *srv, char *sessname, size_t len) list_for_each_entry(sess, &srv->paths_list, s.entry) { if (sess->state != RTRS_SRV_CONNECTED) continue; - strlcpy(sessname, sess->s.sessname, + strscpy(sessname, sess->s.sessname, min_t(size_t, sizeof(sess->s.sessname), len)); err = 0; break; @@ -1288,7 +1308,7 @@ int rtrs_srv_get_sess_name(struct rtrs_srv *srv, char *sessname, size_t len) EXPORT_SYMBOL(rtrs_srv_get_sess_name); /** - * rtrs_srv_get_sess_qdepth() - Get rtrs_srv qdepth. + * rtrs_srv_get_queue_depth() - Get rtrs_srv qdepth. * @srv: Session */ int rtrs_srv_get_queue_depth(struct rtrs_srv *srv) @@ -1356,8 +1376,10 @@ static struct rtrs_srv *get_or_create_srv(struct rtrs_srv_ctx *ctx, * If this request is not the first connection request from the * client for this session then fail and return error. */ - if (!first_conn) + if (!first_conn) { + pr_err_ratelimited("Error: Not the first connection request for this session\n"); return ERR_PTR(-ENXIO); + } /* need to allocate a new srv */ srv = kzalloc(sizeof(*srv), GFP_KERNEL); @@ -1481,6 +1503,7 @@ static void free_sess(struct rtrs_srv_sess *sess) kobject_del(&sess->kobj); kobject_put(&sess->kobj); } else { + kfree(sess->stats); kfree(sess); } } @@ -1503,8 +1526,15 @@ static void rtrs_srv_close_work(struct work_struct *work) rdma_disconnect(con->c.cm_id); ib_drain_qp(con->c.qp); } - /* Wait for all inflights */ - rtrs_srv_wait_ops_ids(sess); + + /* + * Degrade ref count to the usual model with a single shared + * atomic_t counter + */ + percpu_ref_kill(&sess->ids_inflight_ref); + + /* Wait for all completion */ + wait_for_completion(&sess->complete_done); /* Notify upper layer if we are the last path */ rtrs_srv_sess_down(sess); @@ -1604,7 +1634,7 @@ static int create_con(struct rtrs_srv_sess *sess, struct rtrs_sess *s = &sess->s; struct rtrs_srv_con *con; - u32 cq_size, wr_queue_size; + u32 cq_num, max_send_wr, max_recv_wr, wr_limit; int err, cq_vector; con = kzalloc(sizeof(*con), GFP_KERNEL); @@ -1619,36 +1649,42 @@ static int create_con(struct rtrs_srv_sess *sess, con->c.sess = &sess->s; con->c.cid = cid; atomic_set(&con->wr_cnt, 1); + wr_limit = sess->s.dev->ib_dev->attrs.max_qp_wr; if (con->c.cid == 0) { /* * All receive and all send (each requiring invalidate) * + 2 for drain and heartbeat */ - wr_queue_size = SERVICE_CON_QUEUE_DEPTH * 3 + 2; - cq_size = wr_queue_size; + max_send_wr = min_t(int, wr_limit, + SERVICE_CON_QUEUE_DEPTH * 2 + 2); + max_recv_wr = max_send_wr; } else { + /* when always_invlaidate enalbed, we need linv+rinv+mr+imm */ + if (always_invalidate) + max_send_wr = + min_t(int, wr_limit, + srv->queue_depth * (1 + 4) + 1); + else + max_send_wr = + min_t(int, wr_limit, + srv->queue_depth * (1 + 2) + 1); + + max_recv_wr = srv->queue_depth + 1; /* * If we have all receive requests posted and * all write requests posted and each read request * requires an invalidate request + drain * and qp gets into error state. */ - cq_size = srv->queue_depth * 3 + 1; - /* - * In theory we might have queue_depth * 32 - * outstanding requests if an unsafe global key is used - * and we have queue_depth read requests each consisting - * of 32 different addresses. div 3 for mlx5. - */ - wr_queue_size = sess->s.dev->ib_dev->attrs.max_qp_wr / 3; } - atomic_set(&con->sq_wr_avail, wr_queue_size); + cq_num = max_send_wr + max_recv_wr; + atomic_set(&con->sq_wr_avail, max_send_wr); cq_vector = rtrs_srv_get_next_cq_vector(sess); /* TODO: SOFTIRQ can be faster, but be careful with softirq context */ - err = rtrs_cq_qp_create(&sess->s, &con->c, 1, cq_vector, cq_size, - wr_queue_size, wr_queue_size, + err = rtrs_cq_qp_create(&sess->s, &con->c, 1, cq_vector, cq_num, + max_send_wr, max_recv_wr, IB_POLL_WORKQUEUE); if (err) { rtrs_err(s, "rtrs_cq_qp_create(), err: %d\n", err); @@ -1728,7 +1764,7 @@ static struct rtrs_srv_sess *__alloc_sess(struct rtrs_srv *srv, path.src = &sess->s.src_addr; path.dst = &sess->s.dst_addr; rtrs_addr_to_str(&path, str, sizeof(str)); - strlcpy(sess->s.sessname, str, sizeof(sess->s.sessname)); + strscpy(sess->s.sessname, str, sizeof(sess->s.sessname)); sess->s.con_num = con_num; sess->s.recon_cnt = recon_cnt; @@ -1780,38 +1816,39 @@ static int rtrs_rdma_connect(struct rdma_cm_id *cm_id, u16 version, con_num, cid; u16 recon_cnt; - int err; + int err = -ECONNRESET; if (len < sizeof(*msg)) { pr_err("Invalid RTRS connection request\n"); - goto reject_w_econnreset; + goto reject_w_err; } if (le16_to_cpu(msg->magic) != RTRS_MAGIC) { pr_err("Invalid RTRS magic\n"); - goto reject_w_econnreset; + goto reject_w_err; } version = le16_to_cpu(msg->version); if (version >> 8 != RTRS_PROTO_VER_MAJOR) { pr_err("Unsupported major RTRS version: %d, expected %d\n", version >> 8, RTRS_PROTO_VER_MAJOR); - goto reject_w_econnreset; + goto reject_w_err; } con_num = le16_to_cpu(msg->cid_num); if (con_num > 4096) { /* Sanity check */ pr_err("Too many connections requested: %d\n", con_num); - goto reject_w_econnreset; + goto reject_w_err; } cid = le16_to_cpu(msg->cid); if (cid >= con_num) { /* Sanity check */ pr_err("Incorrect cid: %d >= %d\n", cid, con_num); - goto reject_w_econnreset; + goto reject_w_err; } recon_cnt = le16_to_cpu(msg->recon_cnt); srv = get_or_create_srv(ctx, &msg->paths_uuid, msg->first_conn); if (IS_ERR(srv)) { err = PTR_ERR(srv); + pr_err("get_or_create_srv(), error %d\n", err); goto reject_w_err; } mutex_lock(&srv->paths_mutex); @@ -1826,7 +1863,7 @@ static int rtrs_rdma_connect(struct rdma_cm_id *cm_id, rtrs_err(s, "Session in wrong state: %s\n", rtrs_srv_state_str(sess->state)); mutex_unlock(&srv->paths_mutex); - goto reject_w_econnreset; + goto reject_w_err; } /* * Sanity checks @@ -1835,13 +1872,13 @@ static int rtrs_rdma_connect(struct rdma_cm_id *cm_id, rtrs_err(s, "Incorrect request: %d, %d\n", cid, con_num); mutex_unlock(&srv->paths_mutex); - goto reject_w_econnreset; + goto reject_w_err; } if (s->con[cid]) { rtrs_err(s, "Connection already exists: %d\n", cid); mutex_unlock(&srv->paths_mutex); - goto reject_w_econnreset; + goto reject_w_err; } } else { sess = __alloc_sess(srv, cm_id, con_num, recon_cnt, @@ -1850,11 +1887,13 @@ static int rtrs_rdma_connect(struct rdma_cm_id *cm_id, mutex_unlock(&srv->paths_mutex); put_srv(srv); err = PTR_ERR(sess); + pr_err("RTRS server session allocation failed: %d\n", err); goto reject_w_err; } } err = create_con(sess, cm_id, cid); if (err) { + rtrs_err((&sess->s), "create_con(), error %d\n", err); (void)rtrs_rdma_do_reject(cm_id, err); /* * Since session has other connections we follow normal way @@ -1865,6 +1904,7 @@ static int rtrs_rdma_connect(struct rdma_cm_id *cm_id, } err = rtrs_rdma_do_accept(sess, cm_id); if (err) { + rtrs_err((&sess->s), "rtrs_rdma_do_accept(), error %d\n", err); (void)rtrs_rdma_do_reject(cm_id, err); /* * Since current connection was successfully added to the @@ -1882,9 +1922,6 @@ static int rtrs_rdma_connect(struct rdma_cm_id *cm_id, reject_w_err: return rtrs_rdma_do_reject(cm_id, err); -reject_w_econnreset: - return rtrs_rdma_do_reject(cm_id, -ECONNRESET); - close_and_return_err: mutex_unlock(&srv->paths_mutex); close_sess(sess); @@ -2177,9 +2214,9 @@ static int check_module_params(void) sess_queue_depth, 1, MAX_SESS_QUEUE_DEPTH); return -EINVAL; } - if (max_chunk_size < 4096 || !is_power_of_2(max_chunk_size)) { + if (max_chunk_size < MIN_CHUNK_SIZE || !is_power_of_2(max_chunk_size)) { pr_err("Invalid max_chunk_size value %d, has to be >= %d and should be power of two.\n", - max_chunk_size, 4096); + max_chunk_size, MIN_CHUNK_SIZE); return -EINVAL; } diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.h b/drivers/infiniband/ulp/rtrs/rtrs-srv.h index 9543ae19996c..f8da2e3f0bda 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.h +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.h @@ -81,8 +81,8 @@ struct rtrs_srv_sess { spinlock_t state_lock; int cur_cq_vector; struct rtrs_srv_op **ops_ids; - atomic_t ids_inflight; - wait_queue_head_t ids_waitq; + struct percpu_ref ids_inflight_ref; + struct completion complete_done; struct rtrs_srv_mr *mrs; unsigned int mrs_num; dma_addr_t *dma_addr; diff --git a/drivers/infiniband/ulp/rtrs/rtrs.c b/drivers/infiniband/ulp/rtrs/rtrs.c index a7847282a2eb..61919ebd92b2 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs.c @@ -18,7 +18,7 @@ MODULE_DESCRIPTION("RDMA Transport Core"); MODULE_LICENSE("GPL"); -struct rtrs_iu *rtrs_iu_alloc(u32 queue_size, size_t size, gfp_t gfp_mask, +struct rtrs_iu *rtrs_iu_alloc(u32 iu_num, size_t size, gfp_t gfp_mask, struct ib_device *dma_dev, enum dma_data_direction dir, void (*done)(struct ib_cq *cq, struct ib_wc *wc)) @@ -26,10 +26,10 @@ struct rtrs_iu *rtrs_iu_alloc(u32 queue_size, size_t size, gfp_t gfp_mask, struct rtrs_iu *ius, *iu; int i; - ius = kcalloc(queue_size, sizeof(*ius), gfp_mask); + ius = kcalloc(iu_num, sizeof(*ius), gfp_mask); if (!ius) return NULL; - for (i = 0; i < queue_size; i++) { + for (i = 0; i < iu_num; i++) { iu = &ius[i]; iu->direction = dir; iu->buf = kzalloc(size, gfp_mask); @@ -50,7 +50,7 @@ err: } EXPORT_SYMBOL_GPL(rtrs_iu_alloc); -void rtrs_iu_free(struct rtrs_iu *ius, struct ib_device *ibdev, u32 queue_size) +void rtrs_iu_free(struct rtrs_iu *ius, struct ib_device *ibdev, u32 queue_num) { struct rtrs_iu *iu; int i; @@ -58,7 +58,7 @@ void rtrs_iu_free(struct rtrs_iu *ius, struct ib_device *ibdev, u32 queue_size) if (!ius) return; - for (i = 0; i < queue_size; i++) { + for (i = 0; i < queue_num; i++) { iu = &ius[i]; ib_dma_unmap_single(ibdev, iu->dma_addr, iu->size, iu->direction); kfree(iu->buf); @@ -105,18 +105,21 @@ int rtrs_post_recv_empty(struct rtrs_con *con, struct ib_cqe *cqe) EXPORT_SYMBOL_GPL(rtrs_post_recv_empty); static int rtrs_post_send(struct ib_qp *qp, struct ib_send_wr *head, - struct ib_send_wr *wr) + struct ib_send_wr *wr, struct ib_send_wr *tail) { if (head) { - struct ib_send_wr *tail = head; + struct ib_send_wr *next = head; - while (tail->next) - tail = tail->next; - tail->next = wr; + while (next->next) + next = next->next; + next->next = wr; } else { head = wr; } + if (tail) + wr->next = tail; + return ib_post_send(qp, head, NULL); } @@ -142,15 +145,16 @@ int rtrs_iu_post_send(struct rtrs_con *con, struct rtrs_iu *iu, size_t size, .send_flags = IB_SEND_SIGNALED, }; - return rtrs_post_send(con->qp, head, &wr); + return rtrs_post_send(con->qp, head, &wr, NULL); } EXPORT_SYMBOL_GPL(rtrs_iu_post_send); int rtrs_iu_post_rdma_write_imm(struct rtrs_con *con, struct rtrs_iu *iu, - struct ib_sge *sge, unsigned int num_sge, - u32 rkey, u64 rdma_addr, u32 imm_data, - enum ib_send_flags flags, - struct ib_send_wr *head) + struct ib_sge *sge, unsigned int num_sge, + u32 rkey, u64 rdma_addr, u32 imm_data, + enum ib_send_flags flags, + struct ib_send_wr *head, + struct ib_send_wr *tail) { struct ib_rdma_wr wr; int i; @@ -174,7 +178,7 @@ int rtrs_iu_post_rdma_write_imm(struct rtrs_con *con, struct rtrs_iu *iu, if (WARN_ON(sge[i].length == 0)) return -EINVAL; - return rtrs_post_send(con->qp, head, &wr.wr); + return rtrs_post_send(con->qp, head, &wr.wr, tail); } EXPORT_SYMBOL_GPL(rtrs_iu_post_rdma_write_imm); @@ -191,7 +195,7 @@ int rtrs_post_rdma_write_imm_empty(struct rtrs_con *con, struct ib_cqe *cqe, .wr.ex.imm_data = cpu_to_be32(imm_data), }; - return rtrs_post_send(con->qp, head, &wr.wr); + return rtrs_post_send(con->qp, head, &wr.wr, NULL); } EXPORT_SYMBOL_GPL(rtrs_post_rdma_write_imm_empty); @@ -212,20 +216,20 @@ static void qp_event_handler(struct ib_event *ev, void *ctx) } } -static int create_cq(struct rtrs_con *con, int cq_vector, u16 cq_size, +static int create_cq(struct rtrs_con *con, int cq_vector, int nr_cqe, enum ib_poll_context poll_ctx) { struct rdma_cm_id *cm_id = con->cm_id; struct ib_cq *cq; - cq = ib_cq_pool_get(cm_id->device, cq_size, cq_vector, poll_ctx); + cq = ib_cq_pool_get(cm_id->device, nr_cqe, cq_vector, poll_ctx); if (IS_ERR(cq)) { rtrs_err(con->sess, "Creating completion queue failed, errno: %ld\n", PTR_ERR(cq)); return PTR_ERR(cq); } con->cq = cq; - con->cq_size = cq_size; + con->nr_cqe = nr_cqe; return 0; } @@ -260,20 +264,20 @@ static int create_qp(struct rtrs_con *con, struct ib_pd *pd, } int rtrs_cq_qp_create(struct rtrs_sess *sess, struct rtrs_con *con, - u32 max_send_sge, int cq_vector, int cq_size, + u32 max_send_sge, int cq_vector, int nr_cqe, u32 max_send_wr, u32 max_recv_wr, enum ib_poll_context poll_ctx) { int err; - err = create_cq(con, cq_vector, cq_size, poll_ctx); + err = create_cq(con, cq_vector, nr_cqe, poll_ctx); if (err) return err; err = create_qp(con, sess->dev->ib_pd, max_send_wr, max_recv_wr, max_send_sge); if (err) { - ib_cq_pool_put(con->cq, con->cq_size); + ib_cq_pool_put(con->cq, con->nr_cqe); con->cq = NULL; return err; } @@ -290,7 +294,7 @@ void rtrs_cq_qp_destroy(struct rtrs_con *con) con->qp = NULL; } if (con->cq) { - ib_cq_pool_put(con->cq, con->cq_size); + ib_cq_pool_put(con->cq, con->nr_cqe); con->cq = NULL; } } @@ -376,7 +380,6 @@ void rtrs_stop_hb(struct rtrs_sess *sess) { cancel_delayed_work_sync(&sess->hb_dwork); sess->hb_missed_cnt = 0; - sess->hb_missed_max = 0; } EXPORT_SYMBOL_GPL(rtrs_stop_hb); diff --git a/drivers/infiniband/ulp/rtrs/rtrs.h b/drivers/infiniband/ulp/rtrs/rtrs.h index dc3e1af1a85b..859c79685daf 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs.h +++ b/drivers/infiniband/ulp/rtrs/rtrs.h @@ -57,7 +57,6 @@ struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops, const struct rtrs_addr *paths, size_t path_cnt, u16 port, size_t pdu_sz, u8 reconnect_delay_sec, - u16 max_segments, s16 max_reconnect_attempts, u32 nr_poll_queues); void rtrs_clt_close(struct rtrs_clt *sess); @@ -110,6 +109,7 @@ int rtrs_clt_rdma_cq_direct(struct rtrs_clt *clt, unsigned int index); struct rtrs_attrs { u32 queue_depth; u32 max_io_size; + u32 max_segments; }; int rtrs_clt_query(struct rtrs_clt *sess, struct rtrs_attrs *attr); diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 31f8aa2c40ed..6ba48a09eac4 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -965,68 +965,52 @@ static void srp_disconnect_target(struct srp_target_port *target) } } -static void srp_free_req_data(struct srp_target_port *target, - struct srp_rdma_ch *ch) +static int srp_exit_cmd_priv(struct Scsi_Host *shost, struct scsi_cmnd *cmd) { + struct srp_target_port *target = host_to_target(shost); struct srp_device *dev = target->srp_host->srp_dev; struct ib_device *ibdev = dev->dev; - struct srp_request *req; - int i; - - if (!ch->req_ring) - return; + struct srp_request *req = scsi_cmd_priv(cmd); - for (i = 0; i < target->req_ring_size; ++i) { - req = &ch->req_ring[i]; - if (dev->use_fast_reg) - kfree(req->fr_list); - if (req->indirect_dma_addr) { - ib_dma_unmap_single(ibdev, req->indirect_dma_addr, - target->indirect_size, - DMA_TO_DEVICE); - } - kfree(req->indirect_desc); + kfree(req->fr_list); + if (req->indirect_dma_addr) { + ib_dma_unmap_single(ibdev, req->indirect_dma_addr, + target->indirect_size, + DMA_TO_DEVICE); } + kfree(req->indirect_desc); - kfree(ch->req_ring); - ch->req_ring = NULL; + return 0; } -static int srp_alloc_req_data(struct srp_rdma_ch *ch) +static int srp_init_cmd_priv(struct Scsi_Host *shost, struct scsi_cmnd *cmd) { - struct srp_target_port *target = ch->target; + struct srp_target_port *target = host_to_target(shost); struct srp_device *srp_dev = target->srp_host->srp_dev; struct ib_device *ibdev = srp_dev->dev; - struct srp_request *req; - void *mr_list; + struct srp_request *req = scsi_cmd_priv(cmd); dma_addr_t dma_addr; - int i, ret = -ENOMEM; - - ch->req_ring = kcalloc(target->req_ring_size, sizeof(*ch->req_ring), - GFP_KERNEL); - if (!ch->req_ring) - goto out; + int ret = -ENOMEM; - for (i = 0; i < target->req_ring_size; ++i) { - req = &ch->req_ring[i]; - mr_list = kmalloc_array(target->mr_per_cmd, sizeof(void *), + if (srp_dev->use_fast_reg) { + req->fr_list = kmalloc_array(target->mr_per_cmd, sizeof(void *), GFP_KERNEL); - if (!mr_list) - goto out; - if (srp_dev->use_fast_reg) - req->fr_list = mr_list; - req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL); - if (!req->indirect_desc) - goto out; - - dma_addr = ib_dma_map_single(ibdev, req->indirect_desc, - target->indirect_size, - DMA_TO_DEVICE); - if (ib_dma_mapping_error(ibdev, dma_addr)) + if (!req->fr_list) goto out; + } + req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL); + if (!req->indirect_desc) + goto out; - req->indirect_dma_addr = dma_addr; + dma_addr = ib_dma_map_single(ibdev, req->indirect_desc, + target->indirect_size, + DMA_TO_DEVICE); + if (ib_dma_mapping_error(ibdev, dma_addr)) { + srp_exit_cmd_priv(shost, cmd); + goto out; } + + req->indirect_dma_addr = dma_addr; ret = 0; out: @@ -1068,10 +1052,6 @@ static void srp_remove_target(struct srp_target_port *target) } cancel_work_sync(&target->tl_err_work); srp_rport_put(target->rport); - for (i = 0; i < target->ch_count; i++) { - ch = &target->ch[i]; - srp_free_req_data(target, ch); - } kfree(target->ch); target->ch = NULL; @@ -1290,22 +1270,32 @@ static void srp_finish_req(struct srp_rdma_ch *ch, struct srp_request *req, } } -static void srp_terminate_io(struct srp_rport *rport) +struct srp_terminate_context { + struct srp_target_port *srp_target; + int scsi_result; +}; + +static bool srp_terminate_cmd(struct scsi_cmnd *scmnd, void *context_ptr, + bool reserved) { - struct srp_target_port *target = rport->lld_data; - struct srp_rdma_ch *ch; - int i, j; + struct srp_terminate_context *context = context_ptr; + struct srp_target_port *target = context->srp_target; + u32 tag = blk_mq_unique_tag(scmnd->request); + struct srp_rdma_ch *ch = &target->ch[blk_mq_unique_tag_to_hwq(tag)]; + struct srp_request *req = scsi_cmd_priv(scmnd); - for (i = 0; i < target->ch_count; i++) { - ch = &target->ch[i]; + srp_finish_req(ch, req, NULL, context->scsi_result); - for (j = 0; j < target->req_ring_size; ++j) { - struct srp_request *req = &ch->req_ring[j]; + return true; +} - srp_finish_req(ch, req, NULL, - DID_TRANSPORT_FAILFAST << 16); - } - } +static void srp_terminate_io(struct srp_rport *rport) +{ + struct srp_target_port *target = rport->lld_data; + struct srp_terminate_context context = { .srp_target = target, + .scsi_result = DID_TRANSPORT_FAILFAST << 16 }; + + scsi_host_busy_iter(target->scsi_host, srp_terminate_cmd, &context); } /* Calculate maximum initiator to target information unit length. */ @@ -1361,13 +1351,12 @@ static int srp_rport_reconnect(struct srp_rport *rport) ch = &target->ch[i]; ret += srp_new_cm_id(ch); } - for (i = 0; i < target->ch_count; i++) { - ch = &target->ch[i]; - for (j = 0; j < target->req_ring_size; ++j) { - struct srp_request *req = &ch->req_ring[j]; + { + struct srp_terminate_context context = { + .srp_target = target, .scsi_result = DID_RESET << 16}; - srp_finish_req(ch, req, NULL, DID_RESET << 16); - } + scsi_host_busy_iter(target->scsi_host, srp_terminate_cmd, + &context); } for (i = 0; i < target->ch_count; i++) { ch = &target->ch[i]; @@ -1963,13 +1952,10 @@ static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp) spin_unlock_irqrestore(&ch->lock, flags); } else { scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag); - if (scmnd && scmnd->host_scribble) { - req = (void *)scmnd->host_scribble; + if (scmnd) { + req = scsi_cmd_priv(scmnd); scmnd = srp_claim_req(ch, req, NULL, scmnd); } else { - scmnd = NULL; - } - if (!scmnd) { shost_printk(KERN_ERR, target->scsi_host, "Null scmnd for RSP w/tag %#016llx received on ch %td / QP %#x\n", rsp->tag, ch - target->ch, ch->qp->qp_num); @@ -2001,7 +1987,6 @@ static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp) srp_free_req(ch, req, scmnd, be32_to_cpu(rsp->req_lim_delta)); - scmnd->host_scribble = NULL; scmnd->scsi_done(scmnd); } } @@ -2169,13 +2154,12 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) { struct srp_target_port *target = host_to_target(shost); struct srp_rdma_ch *ch; - struct srp_request *req; + struct srp_request *req = scsi_cmd_priv(scmnd); struct srp_iu *iu; struct srp_cmd *cmd; struct ib_device *dev; unsigned long flags; u32 tag; - u16 idx; int len, ret; scmnd->result = srp_chkready(target->rport); @@ -2185,10 +2169,6 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) WARN_ON_ONCE(scmnd->request->tag < 0); tag = blk_mq_unique_tag(scmnd->request); ch = &target->ch[blk_mq_unique_tag_to_hwq(tag)]; - idx = blk_mq_unique_tag_to_tag(tag); - WARN_ONCE(idx >= target->req_ring_size, "%s: tag %#x: idx %d >= %d\n", - dev_name(&shost->shost_gendev), tag, idx, - target->req_ring_size); spin_lock_irqsave(&ch->lock, flags); iu = __srp_get_tx_iu(ch, SRP_IU_CMD); @@ -2197,13 +2177,10 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) if (!iu) goto err; - req = &ch->req_ring[idx]; dev = target->srp_host->srp_dev->dev; ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_it_iu_len, DMA_TO_DEVICE); - scmnd->host_scribble = (void *) req; - cmd = iu->buf; memset(cmd, 0, sizeof *cmd); @@ -2891,7 +2868,7 @@ static int srp_slave_configure(struct scsi_device *sdev) return 0; } -static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr, +static ssize_t id_ext_show(struct device *dev, struct device_attribute *attr, char *buf) { struct srp_target_port *target = host_to_target(class_to_shost(dev)); @@ -2899,7 +2876,9 @@ static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr, return sysfs_emit(buf, "0x%016llx\n", be64_to_cpu(target->id_ext)); } -static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr, +static DEVICE_ATTR_RO(id_ext); + +static ssize_t ioc_guid_show(struct device *dev, struct device_attribute *attr, char *buf) { struct srp_target_port *target = host_to_target(class_to_shost(dev)); @@ -2907,7 +2886,9 @@ static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr, return sysfs_emit(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid)); } -static ssize_t show_service_id(struct device *dev, +static DEVICE_ATTR_RO(ioc_guid); + +static ssize_t service_id_show(struct device *dev, struct device_attribute *attr, char *buf) { struct srp_target_port *target = host_to_target(class_to_shost(dev)); @@ -2918,7 +2899,9 @@ static ssize_t show_service_id(struct device *dev, be64_to_cpu(target->ib_cm.service_id)); } -static ssize_t show_pkey(struct device *dev, struct device_attribute *attr, +static DEVICE_ATTR_RO(service_id); + +static ssize_t pkey_show(struct device *dev, struct device_attribute *attr, char *buf) { struct srp_target_port *target = host_to_target(class_to_shost(dev)); @@ -2929,7 +2912,9 @@ static ssize_t show_pkey(struct device *dev, struct device_attribute *attr, return sysfs_emit(buf, "0x%04x\n", be16_to_cpu(target->ib_cm.pkey)); } -static ssize_t show_sgid(struct device *dev, struct device_attribute *attr, +static DEVICE_ATTR_RO(pkey); + +static ssize_t sgid_show(struct device *dev, struct device_attribute *attr, char *buf) { struct srp_target_port *target = host_to_target(class_to_shost(dev)); @@ -2937,7 +2922,9 @@ static ssize_t show_sgid(struct device *dev, struct device_attribute *attr, return sysfs_emit(buf, "%pI6\n", target->sgid.raw); } -static ssize_t show_dgid(struct device *dev, struct device_attribute *attr, +static DEVICE_ATTR_RO(sgid); + +static ssize_t dgid_show(struct device *dev, struct device_attribute *attr, char *buf) { struct srp_target_port *target = host_to_target(class_to_shost(dev)); @@ -2949,8 +2936,10 @@ static ssize_t show_dgid(struct device *dev, struct device_attribute *attr, return sysfs_emit(buf, "%pI6\n", ch->ib_cm.path.dgid.raw); } -static ssize_t show_orig_dgid(struct device *dev, - struct device_attribute *attr, char *buf) +static DEVICE_ATTR_RO(dgid); + +static ssize_t orig_dgid_show(struct device *dev, struct device_attribute *attr, + char *buf) { struct srp_target_port *target = host_to_target(class_to_shost(dev)); @@ -2960,8 +2949,10 @@ static ssize_t show_orig_dgid(struct device *dev, return sysfs_emit(buf, "%pI6\n", target->ib_cm.orig_dgid.raw); } -static ssize_t show_req_lim(struct device *dev, - struct device_attribute *attr, char *buf) +static DEVICE_ATTR_RO(orig_dgid); + +static ssize_t req_lim_show(struct device *dev, struct device_attribute *attr, + char *buf) { struct srp_target_port *target = host_to_target(class_to_shost(dev)); struct srp_rdma_ch *ch; @@ -2975,7 +2966,9 @@ static ssize_t show_req_lim(struct device *dev, return sysfs_emit(buf, "%d\n", req_lim); } -static ssize_t show_zero_req_lim(struct device *dev, +static DEVICE_ATTR_RO(req_lim); + +static ssize_t zero_req_lim_show(struct device *dev, struct device_attribute *attr, char *buf) { struct srp_target_port *target = host_to_target(class_to_shost(dev)); @@ -2983,7 +2976,9 @@ static ssize_t show_zero_req_lim(struct device *dev, return sysfs_emit(buf, "%d\n", target->zero_req_lim); } -static ssize_t show_local_ib_port(struct device *dev, +static DEVICE_ATTR_RO(zero_req_lim); + +static ssize_t local_ib_port_show(struct device *dev, struct device_attribute *attr, char *buf) { struct srp_target_port *target = host_to_target(class_to_shost(dev)); @@ -2991,7 +2986,9 @@ static ssize_t show_local_ib_port(struct device *dev, return sysfs_emit(buf, "%d\n", target->srp_host->port); } -static ssize_t show_local_ib_device(struct device *dev, +static DEVICE_ATTR_RO(local_ib_port); + +static ssize_t local_ib_device_show(struct device *dev, struct device_attribute *attr, char *buf) { struct srp_target_port *target = host_to_target(class_to_shost(dev)); @@ -3000,7 +2997,9 @@ static ssize_t show_local_ib_device(struct device *dev, dev_name(&target->srp_host->srp_dev->dev->dev)); } -static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr, +static DEVICE_ATTR_RO(local_ib_device); + +static ssize_t ch_count_show(struct device *dev, struct device_attribute *attr, char *buf) { struct srp_target_port *target = host_to_target(class_to_shost(dev)); @@ -3008,7 +3007,9 @@ static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr, return sysfs_emit(buf, "%d\n", target->ch_count); } -static ssize_t show_comp_vector(struct device *dev, +static DEVICE_ATTR_RO(ch_count); + +static ssize_t comp_vector_show(struct device *dev, struct device_attribute *attr, char *buf) { struct srp_target_port *target = host_to_target(class_to_shost(dev)); @@ -3016,7 +3017,9 @@ static ssize_t show_comp_vector(struct device *dev, return sysfs_emit(buf, "%d\n", target->comp_vector); } -static ssize_t show_tl_retry_count(struct device *dev, +static DEVICE_ATTR_RO(comp_vector); + +static ssize_t tl_retry_count_show(struct device *dev, struct device_attribute *attr, char *buf) { struct srp_target_port *target = host_to_target(class_to_shost(dev)); @@ -3024,7 +3027,9 @@ static ssize_t show_tl_retry_count(struct device *dev, return sysfs_emit(buf, "%d\n", target->tl_retry_count); } -static ssize_t show_cmd_sg_entries(struct device *dev, +static DEVICE_ATTR_RO(tl_retry_count); + +static ssize_t cmd_sg_entries_show(struct device *dev, struct device_attribute *attr, char *buf) { struct srp_target_port *target = host_to_target(class_to_shost(dev)); @@ -3032,7 +3037,9 @@ static ssize_t show_cmd_sg_entries(struct device *dev, return sysfs_emit(buf, "%u\n", target->cmd_sg_cnt); } -static ssize_t show_allow_ext_sg(struct device *dev, +static DEVICE_ATTR_RO(cmd_sg_entries); + +static ssize_t allow_ext_sg_show(struct device *dev, struct device_attribute *attr, char *buf) { struct srp_target_port *target = host_to_target(class_to_shost(dev)); @@ -3040,22 +3047,7 @@ static ssize_t show_allow_ext_sg(struct device *dev, return sysfs_emit(buf, "%s\n", target->allow_ext_sg ? "true" : "false"); } -static DEVICE_ATTR(id_ext, S_IRUGO, show_id_ext, NULL); -static DEVICE_ATTR(ioc_guid, S_IRUGO, show_ioc_guid, NULL); -static DEVICE_ATTR(service_id, S_IRUGO, show_service_id, NULL); -static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL); -static DEVICE_ATTR(sgid, S_IRUGO, show_sgid, NULL); -static DEVICE_ATTR(dgid, S_IRUGO, show_dgid, NULL); -static DEVICE_ATTR(orig_dgid, S_IRUGO, show_orig_dgid, NULL); -static DEVICE_ATTR(req_lim, S_IRUGO, show_req_lim, NULL); -static DEVICE_ATTR(zero_req_lim, S_IRUGO, show_zero_req_lim, NULL); -static DEVICE_ATTR(local_ib_port, S_IRUGO, show_local_ib_port, NULL); -static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL); -static DEVICE_ATTR(ch_count, S_IRUGO, show_ch_count, NULL); -static DEVICE_ATTR(comp_vector, S_IRUGO, show_comp_vector, NULL); -static DEVICE_ATTR(tl_retry_count, S_IRUGO, show_tl_retry_count, NULL); -static DEVICE_ATTR(cmd_sg_entries, S_IRUGO, show_cmd_sg_entries, NULL); -static DEVICE_ATTR(allow_ext_sg, S_IRUGO, show_allow_ext_sg, NULL); +static DEVICE_ATTR_RO(allow_ext_sg); static struct device_attribute *srp_host_attrs[] = { &dev_attr_id_ext, @@ -3084,6 +3076,8 @@ static struct scsi_host_template srp_template = { .target_alloc = srp_target_alloc, .slave_configure = srp_slave_configure, .info = srp_target_info, + .init_cmd_priv = srp_init_cmd_priv, + .exit_cmd_priv = srp_exit_cmd_priv, .queuecommand = srp_queuecommand, .change_queue_depth = srp_change_queue_depth, .eh_timed_out = srp_timed_out, @@ -3097,6 +3091,7 @@ static struct scsi_host_template srp_template = { .cmd_per_lun = SRP_DEFAULT_CMD_SQ_SIZE, .shost_attrs = srp_host_attrs, .track_queue_depth = 1, + .cmd_size = sizeof(struct srp_request), }; static int srp_sdev_count(struct Scsi_Host *host) @@ -3617,9 +3612,9 @@ out: return ret; } -static ssize_t srp_create_target(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) +static ssize_t add_target_store(struct device *dev, + struct device_attribute *attr, const char *buf, + size_t count) { struct srp_host *host = container_of(dev, struct srp_host, dev); @@ -3676,8 +3671,6 @@ static ssize_t srp_create_target(struct device *dev, if (ret) goto out; - target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE; - if (!srp_conn_unique(target->srp_host, target)) { if (target->using_rdma_cm) { shost_printk(KERN_INFO, target->scsi_host, @@ -3780,10 +3773,6 @@ static ssize_t srp_create_target(struct device *dev, if (ret) goto err_disconnect; - ret = srp_alloc_req_data(ch); - if (ret) - goto err_disconnect; - ret = srp_connect_ch(ch, max_iu_len, multich); if (ret) { char dst[64]; @@ -3802,7 +3791,6 @@ static ssize_t srp_create_target(struct device *dev, goto free_ch; } else { srp_free_ch_ib(target, ch); - srp_free_req_data(target, ch); target->ch_count = ch - target->ch; goto connected; } @@ -3863,16 +3851,15 @@ free_ch: for (i = 0; i < target->ch_count; i++) { ch = &target->ch[i]; srp_free_ch_ib(target, ch); - srp_free_req_data(target, ch); } kfree(target->ch); goto out; } -static DEVICE_ATTR(add_target, S_IWUSR, NULL, srp_create_target); +static DEVICE_ATTR_WO(add_target); -static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr, +static ssize_t ibdev_show(struct device *dev, struct device_attribute *attr, char *buf) { struct srp_host *host = container_of(dev, struct srp_host, dev); @@ -3880,9 +3867,9 @@ static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr, return sysfs_emit(buf, "%s\n", dev_name(&host->srp_dev->dev->dev)); } -static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); +static DEVICE_ATTR_RO(ibdev); -static ssize_t show_port(struct device *dev, struct device_attribute *attr, +static ssize_t port_show(struct device *dev, struct device_attribute *attr, char *buf) { struct srp_host *host = container_of(dev, struct srp_host, dev); @@ -3890,7 +3877,7 @@ static ssize_t show_port(struct device *dev, struct device_attribute *attr, return sysfs_emit(buf, "%d\n", host->port); } -static DEVICE_ATTR(port, S_IRUGO, show_port, NULL); +static DEVICE_ATTR_RO(port); static struct srp_host *srp_add_port(struct srp_device *device, u8 port) { @@ -4078,10 +4065,13 @@ static int __init srp_init_module(void) { int ret; + BUILD_BUG_ON(sizeof(struct srp_aer_req) != 36); + BUILD_BUG_ON(sizeof(struct srp_cmd) != 48); BUILD_BUG_ON(sizeof(struct srp_imm_buf) != 4); + BUILD_BUG_ON(sizeof(struct srp_indirect_buf) != 20); BUILD_BUG_ON(sizeof(struct srp_login_req) != 64); BUILD_BUG_ON(sizeof(struct srp_login_req_rdma) != 56); - BUILD_BUG_ON(sizeof(struct srp_cmd) != 48); + BUILD_BUG_ON(sizeof(struct srp_rsp) != 36); if (srp_sg_tablesize) { pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n"); diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index 6818cac0a3b7..abccddeea1e3 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -174,7 +174,6 @@ struct srp_rdma_ch { struct srp_iu **tx_ring; struct srp_iu **rx_ring; - struct srp_request *req_ring; int comp_vector; u64 tsk_mgmt_tag; @@ -220,7 +219,6 @@ struct srp_target_port { int mr_pool_size; int mr_per_cmd; int queue_size; - int req_ring_size; int comp_vector; int tl_retry_count; diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index ea447805d4ea..3cadf1295417 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -2858,7 +2858,6 @@ static void srpt_queue_response(struct se_cmd *cmd) &ch->sq_wr_avail) < 0)) { pr_warn("%s: IB send queue full (needed %d)\n", __func__, ioctx->n_rdma); - ret = -ENOMEM; goto out; } |