summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLeon Romanovsky <leonro@mellanox.com>2019-02-18 22:25:43 +0200
committerJason Gunthorpe <jgg@mellanox.com>2019-02-18 21:04:36 -0700
commitfd47c2f99f04249d1ba82c422d1818dcbe193908 (patch)
treeba51844cae66a900a3c8ceeda5fb973417a5aef7
parent5f8f5499005c51656645a011bca81ffb66fcaaca (diff)
RDMA/restrack: Convert internal DB from hash to XArray
The additions of .doit callbacks posses new access pattern to the resource entries by some user visible index. Back then, the legacy DB was implemented as hash because per-index access wasn't needed and XArray wasn't accepted yet. Acceptance of XArray together with per-index access requires the refresh of DB implementation. Signed-off-by: Leon Romanovsky <leonro@mellanox.com> Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
-rw-r--r--drivers/infiniband/core/nldev.c13
-rw-r--r--drivers/infiniband/core/restrack.c103
-rw-r--r--include/rdma/restrack.h22
3 files changed, 93 insertions, 45 deletions
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 5601fa968244..4bf890ae6e28 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -970,6 +970,7 @@ static int res_get_common_dumpit(struct sk_buff *skb,
int start = cb->args[0];
bool has_cap_net_admin;
struct nlmsghdr *nlh;
+ unsigned long id;
u32 index, port = 0;
bool filled = false;
@@ -1020,7 +1021,12 @@ static int res_get_common_dumpit(struct sk_buff *skb,
has_cap_net_admin = netlink_capable(cb->skb, CAP_NET_ADMIN);
down_read(&device->res.rwsem);
- hash_for_each_possible(device->res.hash, res, node, res_type) {
+ /*
+ * FIXME: if the skip ahead is something common this loop should
+ * use xas_for_each & xas_pause to optimize, we can have a lot of
+ * objects.
+ */
+ xa_for_each(&device->res.xa[res_type], id, res) {
if (idx < start)
goto next;
@@ -1047,11 +1053,6 @@ static int res_get_common_dumpit(struct sk_buff *skb,
rdma_restrack_put(res);
if (ret == -EMSGSIZE)
- /*
- * There is a chance to optimize here.
- * It can be done by using list_prepare_entry
- * and list_for_each_entry_continue afterwards.
- */
break;
if (ret)
goto res_err;
diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c
index f80b37d437ac..b4f302811858 100644
--- a/drivers/infiniband/core/restrack.c
+++ b/drivers/infiniband/core/restrack.c
@@ -12,6 +12,28 @@
#include "cma_priv.h"
+static int rt_xa_alloc_cyclic(struct xarray *xa, u32 *id, void *entry,
+ u32 *next)
+{
+ int err;
+
+ *id = *next;
+ if (*next == U32_MAX)
+ *id = 0;
+
+ xa_lock(xa);
+ err = __xa_alloc(xa, id, U32_MAX, entry, GFP_KERNEL);
+ if (err && *next != U32_MAX) {
+ *id = 0;
+ err = __xa_alloc(xa, id, *next, entry, GFP_KERNEL);
+ }
+
+ if (!err)
+ *next = *id + 1;
+ xa_unlock(xa);
+ return err;
+}
+
/**
* rdma_restrack_init() - initialize resource tracking
* @dev: IB device
@@ -19,6 +41,10 @@
void rdma_restrack_init(struct ib_device *dev)
{
struct rdma_restrack_root *res = &dev->res;
+ int i;
+
+ for (i = 0 ; i < RDMA_RESTRACK_MAX; i++)
+ xa_init_flags(&res->xa[i], XA_FLAGS_ALLOC);
init_rwsem(&res->rwsem);
}
@@ -46,33 +72,42 @@ void rdma_restrack_clean(struct ib_device *dev)
struct rdma_restrack_root *res = &dev->res;
struct rdma_restrack_entry *e;
char buf[TASK_COMM_LEN];
+ bool found = false;
const char *owner;
- int bkt;
-
- if (hash_empty(res->hash))
- return;
-
- dev = container_of(res, struct ib_device, res);
- pr_err("restrack: %s", CUT_HERE);
- dev_err(&dev->dev, "BUG: RESTRACK detected leak of resources\n");
- hash_for_each(res->hash, bkt, e, node) {
- if (rdma_is_kernel_res(e)) {
- owner = e->kern_name;
- } else {
- /*
- * There is no need to call get_task_struct here,
- * because we can be here only if there are more
- * get_task_struct() call than put_task_struct().
- */
- get_task_comm(buf, e->task);
- owner = buf;
+ int i;
+
+ for (i = 0 ; i < RDMA_RESTRACK_MAX; i++) {
+ if (!xa_empty(&res->xa[i])) {
+ unsigned long index;
+
+ if (!found) {
+ pr_err("restrack: %s", CUT_HERE);
+ dev_err(&dev->dev, "BUG: RESTRACK detected leak of resources\n");
+ }
+ xa_for_each(&res->xa[i], index, e) {
+ if (rdma_is_kernel_res(e)) {
+ owner = e->kern_name;
+ } else {
+ /*
+ * There is no need to call get_task_struct here,
+ * because we can be here only if there are more
+ * get_task_struct() call than put_task_struct().
+ */
+ get_task_comm(buf, e->task);
+ owner = buf;
+ }
+
+ pr_err("restrack: %s %s object allocated by %s is not freed\n",
+ rdma_is_kernel_res(e) ? "Kernel" :
+ "User",
+ type2str(e->type), owner);
+ }
+ found = true;
}
-
- pr_err("restrack: %s %s object allocated by %s is not freed\n",
- rdma_is_kernel_res(e) ? "Kernel" : "User",
- type2str(e->type), owner);
+ xa_destroy(&res->xa[i]);
}
- pr_err("restrack: %s", CUT_HERE);
+ if (found)
+ pr_err("restrack: %s", CUT_HERE);
}
/**
@@ -86,10 +121,11 @@ int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type,
{
struct rdma_restrack_root *res = &dev->res;
struct rdma_restrack_entry *e;
+ unsigned long index = 0;
u32 cnt = 0;
down_read(&res->rwsem);
- hash_for_each_possible(res->hash, e, node, type) {
+ xa_for_each(&res->xa[type], index, e) {
if (ns == &init_pid_ns ||
(!rdma_is_kernel_res(e) &&
ns == task_active_pid_ns(e->task)))
@@ -166,16 +202,20 @@ EXPORT_SYMBOL(rdma_restrack_set_task);
static void rdma_restrack_add(struct rdma_restrack_entry *res)
{
struct ib_device *dev = res_to_dev(res);
+ int ret;
if (!dev)
return;
kref_init(&res->kref);
init_completion(&res->comp);
- res->valid = true;
down_write(&dev->res.rwsem);
- hash_add(dev->res.hash, &res->node, res->type);
+ ret = rt_xa_alloc_cyclic(&dev->res.xa[res->type], &res->id, res,
+ &dev->res.next_id[res->type]);
+
+ if (!ret)
+ res->valid = true;
up_write(&dev->res.rwsem);
}
@@ -241,15 +281,14 @@ void rdma_restrack_del(struct rdma_restrack_entry *res)
if (!dev)
return;
- rdma_restrack_put(res);
-
- wait_for_completion(&res->comp);
-
down_write(&dev->res.rwsem);
- hash_del(&res->node);
+ xa_erase(&dev->res.xa[res->type], res->id);
res->valid = false;
up_write(&dev->res.rwsem);
+ rdma_restrack_put(res);
+ wait_for_completion(&res->comp);
+
out:
if (res->task) {
put_task_struct(res->task);
diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h
index cc66cc7a11d3..16e11b4c3ec3 100644
--- a/include/rdma/restrack.h
+++ b/include/rdma/restrack.h
@@ -13,6 +13,7 @@
#include <linux/completion.h>
#include <linux/sched/task.h>
#include <uapi/rdma/rdma_netlink.h>
+#include <linux/xarray.h>
/**
* enum rdma_restrack_type - HW objects to track
@@ -48,7 +49,6 @@ enum rdma_restrack_type {
RDMA_RESTRACK_MAX
};
-#define RDMA_RESTRACK_HASH_BITS 8
struct ib_device;
struct rdma_restrack_entry;
@@ -62,9 +62,17 @@ struct rdma_restrack_root {
*/
struct rw_semaphore rwsem;
/**
- * @hash: global database for all resources per-device
+ * @xa: Array of XArray structures to hold restrack entries.
+ * We want to use array of XArrays because insertion is type
+ * dependent. For types with xisiting unique ID (like QPN),
+ * we will insert to that unique index. For other types,
+ * we insert based on pointers and auto-allocate unique index.
*/
- DECLARE_HASHTABLE(hash, RDMA_RESTRACK_HASH_BITS);
+ struct xarray xa[RDMA_RESTRACK_MAX];
+ /**
+ * @next_id: Next ID to support cyclic allocation
+ */
+ u32 next_id[RDMA_RESTRACK_MAX];
};
/**
@@ -103,10 +111,6 @@ struct rdma_restrack_entry {
*/
const char *kern_name;
/**
- * @node: hash table entry
- */
- struct hlist_node node;
- /**
* @type: various objects in restrack database
*/
enum rdma_restrack_type type;
@@ -114,6 +118,10 @@ struct rdma_restrack_entry {
* @user: user resource
*/
bool user;
+ /**
+ * @id: ID to expose to users
+ */
+ u32 id;
};
void rdma_restrack_init(struct ib_device *dev);