Skip to content

Commit

Permalink
RDMA/restrack: Add general infrastructure to track RDMA resources
Browse files Browse the repository at this point in the history
The RDMA subsystem has very strict set of objects to work with, but it
completely lacks tracking facilities and has no visibility of resource
utilization.

The following patch adds such infrastructure to keep track of RDMA
resources to help with debugging of user space applications. The primary
user of this infrastructure is RDMA nldev netlink (following patches), to
be exposed to userspace via rdmatool, but it is not limited too that.

At this stage, the main three objects (PD, CQ and QP) are added, and more
will be added later.

Reviewed-by: Mark Bloch <markb@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Reviewed-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
  • Loading branch information
Leon Romanovsky authored and Jason Gunthorpe committed Jan 30, 2018
1 parent f66c8ba commit 02d8883
Show file tree
Hide file tree
Showing 6 changed files with 346 additions and 1 deletion.
2 changes: 1 addition & 1 deletion drivers/infiniband/core/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \
device.o fmr_pool.o cache.o netlink.o \
roce_gid_mgmt.o mr_pool.o addr.o sa_query.o \
multicast.o mad.o smi.o agent.o mad_rmpp.o \
security.o nldev.o
security.o nldev.o restrack.o

ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o
Expand Down
1 change: 1 addition & 0 deletions drivers/infiniband/core/core_priv.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
#include <rdma/ib_verbs.h>
#include <rdma/opa_addr.h>
#include <rdma/ib_mad.h>
#include <rdma/restrack.h>
#include "mad_priv.h"

/* Total number of ports combined across all struct ib_devices's */
Expand Down
4 changes: 4 additions & 0 deletions drivers/infiniband/core/device.c
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,8 @@ struct ib_device *ib_alloc_device(size_t size)
if (!device)
return NULL;

rdma_restrack_init(&device->res);

device->dev.class = &ib_class;
device_initialize(&device->dev);

Expand Down Expand Up @@ -596,6 +598,8 @@ void ib_unregister_device(struct ib_device *device)
}
up_read(&lists_rwsem);

rdma_restrack_clean(&device->res);

ib_device_unregister_rdmacg(device);
ib_device_unregister_sysfs(device);

Expand Down
164 changes: 164 additions & 0 deletions drivers/infiniband/core/restrack.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
/*
* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
*/

#include <rdma/ib_verbs.h>
#include <rdma/restrack.h>
#include <linux/mutex.h>
#include <linux/sched/task.h>
#include <linux/uaccess.h>
#include <linux/pid_namespace.h>

void rdma_restrack_init(struct rdma_restrack_root *res)
{
init_rwsem(&res->rwsem);
}

void rdma_restrack_clean(struct rdma_restrack_root *res)
{
WARN_ON_ONCE(!hash_empty(res->hash));
}

int rdma_restrack_count(struct rdma_restrack_root *res,
enum rdma_restrack_type type,
struct pid_namespace *ns)
{
struct rdma_restrack_entry *e;
u32 cnt = 0;

down_read(&res->rwsem);
hash_for_each_possible(res->hash, e, node, type) {
if (ns == &init_pid_ns ||
(!rdma_is_kernel_res(e) &&
ns == task_active_pid_ns(e->task)))
cnt++;
}
up_read(&res->rwsem);
return cnt;
}
EXPORT_SYMBOL(rdma_restrack_count);

static void set_kern_name(struct rdma_restrack_entry *res)
{
enum rdma_restrack_type type = res->type;
struct ib_qp *qp;

if (type != RDMA_RESTRACK_QP)
/* PD and CQ types already have this name embedded in */
return;

qp = container_of(res, struct ib_qp, res);
if (!qp->pd) {
WARN_ONCE(true, "XRC QPs are not supported\n");
/* Survive, despite the programmer's error */
res->kern_name = " ";
return;
}

res->kern_name = qp->pd->res.kern_name;
}

static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
{
enum rdma_restrack_type type = res->type;
struct ib_device *dev;
struct ib_xrcd *xrcd;
struct ib_pd *pd;
struct ib_cq *cq;
struct ib_qp *qp;

switch (type) {
case RDMA_RESTRACK_PD:
pd = container_of(res, struct ib_pd, res);
dev = pd->device;
break;
case RDMA_RESTRACK_CQ:
cq = container_of(res, struct ib_cq, res);
dev = cq->device;
break;
case RDMA_RESTRACK_QP:
qp = container_of(res, struct ib_qp, res);
dev = qp->device;
break;
case RDMA_RESTRACK_XRCD:
xrcd = container_of(res, struct ib_xrcd, res);
dev = xrcd->device;
break;
default:
WARN_ONCE(true, "Wrong resource tracking type %u\n", type);
return NULL;
}

return dev;
}

void rdma_restrack_add(struct rdma_restrack_entry *res)
{
struct ib_device *dev = res_to_dev(res);

if (!dev)
return;

if (!uaccess_kernel()) {
get_task_struct(current);
res->task = current;
res->kern_name = NULL;
} else {
set_kern_name(res);
res->task = NULL;
}

kref_init(&res->kref);
init_completion(&res->comp);
res->valid = true;

down_write(&dev->res.rwsem);
hash_add(dev->res.hash, &res->node, res->type);
up_write(&dev->res.rwsem);
}
EXPORT_SYMBOL(rdma_restrack_add);

int __must_check rdma_restrack_get(struct rdma_restrack_entry *res)
{
return kref_get_unless_zero(&res->kref);
}
EXPORT_SYMBOL(rdma_restrack_get);

static void restrack_release(struct kref *kref)
{
struct rdma_restrack_entry *res;

res = container_of(kref, struct rdma_restrack_entry, kref);
complete(&res->comp);
}

int rdma_restrack_put(struct rdma_restrack_entry *res)
{
return kref_put(&res->kref, restrack_release);
}
EXPORT_SYMBOL(rdma_restrack_put);

void rdma_restrack_del(struct rdma_restrack_entry *res)
{
struct ib_device *dev;

if (!res->valid)
return;

dev = res_to_dev(res);
if (!dev)
return;

rdma_restrack_put(res);

wait_for_completion(&res->comp);

down_write(&dev->res.rwsem);
hash_del(&res->node);
res->valid = false;
if (res->task)
put_task_struct(res->task);
up_write(&dev->res.rwsem);
}
EXPORT_SYMBOL(rdma_restrack_del);
19 changes: 19 additions & 0 deletions include/rdma/ib_verbs.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
#include <linux/uaccess.h>
#include <linux/cgroup_rdma.h>
#include <uapi/rdma/ib_user_verbs.h>
#include <rdma/restrack.h>

#define IB_FW_VERSION_NAME_MAX ETHTOOL_FWVERS_LEN

Expand Down Expand Up @@ -1525,6 +1526,7 @@ struct ib_pd {
* Implementation details of the RDMA core, don't use in drivers:
*/
struct ib_mr *__internal_mr;
struct rdma_restrack_entry res;
};

struct ib_xrcd {
Expand All @@ -1534,6 +1536,10 @@ struct ib_xrcd {

struct mutex tgt_qp_mutex;
struct list_head tgt_qp_list;
/*
* Implementation details of the RDMA core, don't use in drivers:
*/
struct rdma_restrack_entry res;
};

struct ib_ah {
Expand Down Expand Up @@ -1565,6 +1571,10 @@ struct ib_cq {
struct irq_poll iop;
struct work_struct work;
};
/*
* Implementation details of the RDMA core, don't use in drivers:
*/
struct rdma_restrack_entry res;
};

struct ib_srq {
Expand Down Expand Up @@ -1741,6 +1751,11 @@ struct ib_qp {
struct ib_rwq_ind_table *rwq_ind_tbl;
struct ib_qp_security *qp_sec;
u8 port;

/*
* Implementation details of the RDMA core, don't use in drivers:
*/
struct rdma_restrack_entry res;
};

struct ib_mr {
Expand Down Expand Up @@ -2347,6 +2362,10 @@ struct ib_device {
#endif

u32 index;
/*
* Implementation details of the RDMA core, don't use in drivers
*/
struct rdma_restrack_root res;

/**
* The following mandatory functions are used only at device
Expand Down
Loading

0 comments on commit 02d8883

Please sign in to comment.