Skip to content

Commit

Permalink
RDMA/rxe: Stop lookup of partially built objects
Browse files Browse the repository at this point in the history
Currently the rdma_rxe driver has a security weakness due to giving
objects which are partially initialized indices allowing external actors
to gain access to them by sending packets which refer to their
index (e.g. qpn, rkey, etc) causing unpredictable results.

This patch adds a new API rxe_finalize(obj) which enables looking up pool
objects from indices using rxe_pool_get_index() for AH, QP, MR, and
MW. They are added in create verbs only after the objects are fully
initialized.

It also adds wait for completion to destroy/dealloc verbs to assure that
all references have been dropped before returning to rdma_core by
implementing a new rxe_pool API rxe_cleanup() which drops a reference to
the object and then waits for all other references to be dropped.  When
the last reference is dropped the object is completed by kref.  After that
it cleans up the object and if locally allocated frees the memory. In the
special case of address handle objects the delay is implemented separately
if the destroy_ah call is not sleepable.

Combined with deferring cleanup code to type specific cleanup routines
this allows all pending activity referring to objects to complete before
returning to rdma_core.

Link: https://lore.kernel.org/r/20220612223434.31462-2-rpearsonhpe@gmail.com
Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
  • Loading branch information
Bob Pearson authored and Jason Gunthorpe committed Jun 30, 2022
1 parent 80a14dd commit 215d0a7
Show file tree
Hide file tree
Showing 5 changed files with 136 additions and 27 deletions.
2 changes: 1 addition & 1 deletion drivers/infiniband/sw/rxe/rxe_mr.c
Original file line number Diff line number Diff line change
Expand Up @@ -687,7 +687,7 @@ int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
if (atomic_read(&mr->num_mw) > 0)
return -EINVAL;

rxe_put(mr);
rxe_cleanup(mr);

return 0;
}
Expand Down
4 changes: 3 additions & 1 deletion drivers/infiniband/sw/rxe/rxe_mw.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,16 @@ int rxe_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
RXE_MW_STATE_FREE : RXE_MW_STATE_VALID;
spin_lock_init(&mw->lock);

rxe_finalize(mw);

return 0;
}

int rxe_dealloc_mw(struct ib_mw *ibmw)
{
struct rxe_mw *mw = to_rmw(ibmw);

rxe_put(mw);
rxe_cleanup(mw);

return 0;
}
Expand Down
100 changes: 94 additions & 6 deletions drivers/infiniband/sw/rxe/rxe_pool.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

#include "rxe.h"

#define RXE_POOL_TIMEOUT (200)
#define RXE_POOL_ALIGN (16)

static const struct rxe_type_info {
Expand Down Expand Up @@ -136,8 +137,12 @@ void *rxe_alloc(struct rxe_pool *pool)
elem->pool = pool;
elem->obj = obj;
kref_init(&elem->ref_cnt);
init_completion(&elem->complete);

err = xa_alloc_cyclic(&pool->xa, &elem->index, elem, pool->limit,
/* allocate index in array but leave pointer as NULL so it
* can't be looked up until rxe_finalize() is called
*/
err = xa_alloc_cyclic(&pool->xa, &elem->index, NULL, pool->limit,
&pool->next, GFP_KERNEL);
if (err < 0)
goto err_free;
Expand All @@ -151,9 +156,11 @@ void *rxe_alloc(struct rxe_pool *pool)
return NULL;
}

int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_elem *elem)
int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_elem *elem,
bool sleepable)
{
int err;
gfp_t gfp_flags;

if (WARN_ON(pool->type == RXE_TYPE_MR))
return -EINVAL;
Expand All @@ -164,9 +171,18 @@ int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_elem *elem)
elem->pool = pool;
elem->obj = (u8 *)elem - pool->elem_offset;
kref_init(&elem->ref_cnt);

err = xa_alloc_cyclic(&pool->xa, &elem->index, elem, pool->limit,
&pool->next, GFP_KERNEL);
init_completion(&elem->complete);

/* AH objects are unique in that the create_ah verb
* can be called in atomic context. If the create_ah
* call is not sleepable use GFP_ATOMIC.
*/
gfp_flags = sleepable ? GFP_KERNEL : GFP_ATOMIC;

if (sleepable)
might_sleep();
err = xa_alloc_cyclic(&pool->xa, &elem->index, NULL, pool->limit,
&pool->next, gfp_flags);
if (err < 0)
goto err_cnt;

Expand Down Expand Up @@ -198,9 +214,67 @@ void *rxe_pool_get_index(struct rxe_pool *pool, u32 index)
static void rxe_elem_release(struct kref *kref)
{
struct rxe_pool_elem *elem = container_of(kref, typeof(*elem), ref_cnt);

complete(&elem->complete);
}

int __rxe_cleanup(struct rxe_pool_elem *elem, bool sleepable)
{
struct rxe_pool *pool = elem->pool;
struct xarray *xa = &pool->xa;
static int timeout = RXE_POOL_TIMEOUT;
unsigned long flags;
int ret, err = 0;
void *xa_ret;

xa_erase(&pool->xa, elem->index);
if (sleepable)
might_sleep();

/* erase xarray entry to prevent looking up
* the pool elem from its index
*/
xa_lock_irqsave(xa, flags);
xa_ret = __xa_erase(xa, elem->index);
xa_unlock_irqrestore(xa, flags);
WARN_ON(xa_err(xa_ret));

/* if this is the last call to rxe_put complete the
* object. It is safe to touch obj->elem after this since
* it is freed below
*/
__rxe_put(elem);

/* wait until all references to the object have been
* dropped before final object specific cleanup and
* return to rdma-core
*/
if (sleepable) {
if (!completion_done(&elem->complete) && timeout) {
ret = wait_for_completion_timeout(&elem->complete,
timeout);

/* Shouldn't happen. There are still references to
* the object but, rather than deadlock, free the
* object or pass back to rdma-core.
*/
if (WARN_ON(!ret))
err = -EINVAL;
}
} else {
unsigned long until = jiffies + timeout;

/* AH objects are unique in that the destroy_ah verb
* can be called in atomic context. This delay
* replaces the wait_for_completion call above
* when the destroy_ah call is not sleepable
*/
while (!completion_done(&elem->complete) &&
time_before(jiffies, until))
mdelay(1);

if (WARN_ON(!completion_done(&elem->complete)))
err = -EINVAL;
}

if (pool->cleanup)
pool->cleanup(elem);
Expand All @@ -209,6 +283,8 @@ static void rxe_elem_release(struct kref *kref)
kfree(elem->obj);

atomic_dec(&pool->num_elem);

return err;
}

int __rxe_get(struct rxe_pool_elem *elem)
Expand All @@ -220,3 +296,15 @@ int __rxe_put(struct rxe_pool_elem *elem)
{
return kref_put(&elem->ref_cnt, rxe_elem_release);
}

void __rxe_finalize(struct rxe_pool_elem *elem)
{
struct xarray *xa = &elem->pool->xa;
unsigned long flags;
void *ret;

xa_lock_irqsave(xa, flags);
ret = __xa_store(&elem->pool->xa, elem->index, elem, GFP_KERNEL);
xa_unlock_irqrestore(xa, flags);
WARN_ON(xa_err(ret));
}
18 changes: 13 additions & 5 deletions drivers/infiniband/sw/rxe/rxe_pool.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ struct rxe_pool_elem {
void *obj;
struct kref ref_cnt;
struct list_head list;
struct completion complete;
u32 index;
};

Expand Down Expand Up @@ -57,21 +58,28 @@ void rxe_pool_cleanup(struct rxe_pool *pool);
void *rxe_alloc(struct rxe_pool *pool);

/* connect already allocated object to pool */
int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_elem *elem);

#define rxe_add_to_pool(pool, obj) __rxe_add_to_pool(pool, &(obj)->elem)
int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_elem *elem,
bool sleepable);
#define rxe_add_to_pool(pool, obj) __rxe_add_to_pool(pool, &(obj)->elem, true)
#define rxe_add_to_pool_ah(pool, obj, sleepable) __rxe_add_to_pool(pool, \
&(obj)->elem, sleepable)

/* lookup an indexed object from index. takes a reference on object */
void *rxe_pool_get_index(struct rxe_pool *pool, u32 index);

int __rxe_get(struct rxe_pool_elem *elem);

#define rxe_get(obj) __rxe_get(&(obj)->elem)

int __rxe_put(struct rxe_pool_elem *elem);

#define rxe_put(obj) __rxe_put(&(obj)->elem)

int __rxe_cleanup(struct rxe_pool_elem *elem, bool sleepable);
#define rxe_cleanup(obj) __rxe_cleanup(&(obj)->elem, true)
#define rxe_cleanup_ah(obj, sleepable) __rxe_cleanup(&(obj)->elem, sleepable)

#define rxe_read(obj) kref_read(&(obj)->elem.ref_cnt)

void __rxe_finalize(struct rxe_pool_elem *elem);
#define rxe_finalize(obj) __rxe_finalize(&(obj)->elem)

#endif /* RXE_POOL_H */
39 changes: 25 additions & 14 deletions drivers/infiniband/sw/rxe/rxe_verbs.c
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ static void rxe_dealloc_ucontext(struct ib_ucontext *ibuc)
{
struct rxe_ucontext *uc = to_ruc(ibuc);

rxe_put(uc);
rxe_cleanup(uc);
}

static int rxe_port_immutable(struct ib_device *dev, u32 port_num,
Expand Down Expand Up @@ -149,7 +149,7 @@ static int rxe_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
{
struct rxe_pd *pd = to_rpd(ibpd);

rxe_put(pd);
rxe_cleanup(pd);
return 0;
}

Expand All @@ -176,7 +176,8 @@ static int rxe_create_ah(struct ib_ah *ibah,
if (err)
return err;

err = rxe_add_to_pool(&rxe->ah_pool, ah);
err = rxe_add_to_pool_ah(&rxe->ah_pool, ah,
init_attr->flags & RDMA_CREATE_AH_SLEEPABLE);
if (err)
return err;

Expand All @@ -188,7 +189,7 @@ static int rxe_create_ah(struct ib_ah *ibah,
err = copy_to_user(&uresp->ah_num, &ah->ah_num,
sizeof(uresp->ah_num));
if (err) {
rxe_put(ah);
rxe_cleanup(ah);
return -EFAULT;
}
} else if (ah->is_user) {
Expand All @@ -197,6 +198,8 @@ static int rxe_create_ah(struct ib_ah *ibah,
}

rxe_init_av(init_attr->ah_attr, &ah->av);
rxe_finalize(ah);

return 0;
}

Expand Down Expand Up @@ -228,7 +231,8 @@ static int rxe_destroy_ah(struct ib_ah *ibah, u32 flags)
{
struct rxe_ah *ah = to_rah(ibah);

rxe_put(ah);
rxe_cleanup_ah(ah, flags & RDMA_DESTROY_AH_SLEEPABLE);

return 0;
}

Expand Down Expand Up @@ -308,12 +312,13 @@ static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init,

err = rxe_srq_from_init(rxe, srq, init, udata, uresp);
if (err)
goto err_put;
goto err_cleanup;

return 0;

err_put:
rxe_put(srq);
err_cleanup:
rxe_cleanup(srq);

return err;
}

Expand Down Expand Up @@ -362,7 +367,7 @@ static int rxe_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
{
struct rxe_srq *srq = to_rsrq(ibsrq);

rxe_put(srq);
rxe_cleanup(srq);
return 0;
}

Expand Down Expand Up @@ -429,10 +434,11 @@ static int rxe_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init,
if (err)
goto qp_init;

rxe_finalize(qp);
return 0;

qp_init:
rxe_put(qp);
rxe_cleanup(qp);
return err;
}

Expand Down Expand Up @@ -485,7 +491,7 @@ static int rxe_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
if (ret)
return ret;

rxe_put(qp);
rxe_cleanup(qp);
return 0;
}

Expand Down Expand Up @@ -803,7 +809,7 @@ static int rxe_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)

rxe_cq_disable(cq);

rxe_put(cq);
rxe_cleanup(cq);
return 0;
}

Expand Down Expand Up @@ -898,6 +904,7 @@ static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access)

rxe_get(pd);
rxe_mr_init_dma(pd, access, mr);
rxe_finalize(mr);

return &mr->ibmr;
}
Expand Down Expand Up @@ -926,11 +933,13 @@ static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd,
if (err)
goto err3;

rxe_finalize(mr);

return &mr->ibmr;

err3:
rxe_put(pd);
rxe_put(mr);
rxe_cleanup(mr);
err2:
return ERR_PTR(err);
}
Expand Down Expand Up @@ -958,11 +967,13 @@ static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
if (err)
goto err2;

rxe_finalize(mr);

return &mr->ibmr;

err2:
rxe_put(pd);
rxe_put(mr);
rxe_cleanup(mr);
err1:
return ERR_PTR(err);
}
Expand Down

0 comments on commit 215d0a7

Please sign in to comment.