Skip to content

Commit

Permalink
IB/core: Add "type 2" memory windows support
Browse files Browse the repository at this point in the history
This patch enhances the IB core support for Memory Windows (MWs).

MWs allow an application to have better/flexible control over remote
access to memory.

Two types of MWs are supported, with the second type having two flavors:

    Type 1  - associated with PD only
    Type 2A - associated with QPN only
    Type 2B - associated with PD and QPN

Applications can allocate a MW once, and then repeatedly bind the MW
to different ranges in MRs that are associated to the same PD. Type 1
windows are bound through a verb, while type 2 windows are bound by
posting a work request.

The 32-bit memory key is composed of a 24-bit index and an 8-bit
key. The key is changed with each bind, thus allowing more control
over the peer's use of the memory key.

The changes introduced are the following:

* add memory window type enum and a corresponding parameter to ib_alloc_mw.
* type 2 memory window bind work request support.
* create a struct that contains the common part of the bind verb struct
  ibv_mw_bind and the bind work request into a single struct.
* add the ib_inc_rkey helper function to advance the tag part of an rkey.

Consumer interface details:

* new device capability flags IB_DEVICE_MEM_WINDOW_TYPE_2A and
  IB_DEVICE_MEM_WINDOW_TYPE_2B are added to indicate device support
  for these features.

  Devices can set either IB_DEVICE_MEM_WINDOW_TYPE_2A or
  IB_DEVICE_MEM_WINDOW_TYPE_2B if it supports type 2A or type 2B
  memory windows. It can set neither to indicate it doesn't support
  type 2 windows at all.

* modify existing provides and consumers code to the new param of
  ib_alloc_mw and the ib_mw_bind_info structure

Signed-off-by: Haggai Eran <haggaie@mellanox.com>
Signed-off-by: Shani Michaeli <shanim@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
  • Loading branch information
Shani Michaeli authored and Roland Dreier committed Feb 21, 2013
1 parent 836dc9e commit 7083e42
Show file tree
Hide file tree
Showing 10 changed files with 110 additions and 41 deletions.
5 changes: 3 additions & 2 deletions drivers/infiniband/core/verbs.c
Original file line number Diff line number Diff line change
Expand Up @@ -1099,18 +1099,19 @@ EXPORT_SYMBOL(ib_free_fast_reg_page_list);

/* Memory windows */

struct ib_mw *ib_alloc_mw(struct ib_pd *pd)
struct ib_mw *ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
{
struct ib_mw *mw;

if (!pd->device->alloc_mw)
return ERR_PTR(-ENOSYS);

mw = pd->device->alloc_mw(pd);
mw = pd->device->alloc_mw(pd, type);
if (!IS_ERR(mw)) {
mw->device = pd->device;
mw->pd = pd;
mw->uobject = NULL;
mw->type = type;
atomic_inc(&pd->usecnt);
}

Expand Down
5 changes: 4 additions & 1 deletion drivers/infiniband/hw/cxgb3/iwch_provider.c
Original file line number Diff line number Diff line change
Expand Up @@ -738,7 +738,7 @@ static struct ib_mr *iwch_get_dma_mr(struct ib_pd *pd, int acc)
return ibmr;
}

static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd)
static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
{
struct iwch_dev *rhp;
struct iwch_pd *php;
Expand All @@ -747,6 +747,9 @@ static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd)
u32 stag = 0;
int ret;

if (type != IB_MW_TYPE_1)
return ERR_PTR(-EINVAL);

php = to_iwch_pd(pd);
rhp = php->rhp;
mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
Expand Down
15 changes: 8 additions & 7 deletions drivers/infiniband/hw/cxgb3/iwch_qp.c
Original file line number Diff line number Diff line change
Expand Up @@ -567,18 +567,19 @@ int iwch_bind_mw(struct ib_qp *qp,
if (mw_bind->send_flags & IB_SEND_SIGNALED)
t3_wr_flags = T3_COMPLETION_FLAG;

sgl.addr = mw_bind->addr;
sgl.lkey = mw_bind->mr->lkey;
sgl.length = mw_bind->length;
sgl.addr = mw_bind->bind_info.addr;
sgl.lkey = mw_bind->bind_info.mr->lkey;
sgl.length = mw_bind->bind_info.length;
wqe->bind.reserved = 0;
wqe->bind.type = TPT_VATO;

/* TBD: check perms */
wqe->bind.perms = iwch_ib_to_tpt_bind_access(mw_bind->mw_access_flags);
wqe->bind.mr_stag = cpu_to_be32(mw_bind->mr->lkey);
wqe->bind.perms = iwch_ib_to_tpt_bind_access(
mw_bind->bind_info.mw_access_flags);
wqe->bind.mr_stag = cpu_to_be32(mw_bind->bind_info.mr->lkey);
wqe->bind.mw_stag = cpu_to_be32(mw->rkey);
wqe->bind.mw_len = cpu_to_be32(mw_bind->length);
wqe->bind.mw_va = cpu_to_be64(mw_bind->addr);
wqe->bind.mw_len = cpu_to_be32(mw_bind->bind_info.length);
wqe->bind.mw_va = cpu_to_be64(mw_bind->bind_info.addr);
err = iwch_sgl2pbl_map(rhp, &sgl, 1, &pbl_addr, &page_size);
if (err) {
spin_unlock_irqrestore(&qhp->lock, flag);
Expand Down
2 changes: 1 addition & 1 deletion drivers/infiniband/hw/cxgb4/iw_cxgb4.h
Original file line number Diff line number Diff line change
Expand Up @@ -866,7 +866,7 @@ struct ib_fast_reg_page_list *c4iw_alloc_fastreg_pbl(
int page_list_len);
struct ib_mr *c4iw_alloc_fast_reg_mr(struct ib_pd *pd, int pbl_depth);
int c4iw_dealloc_mw(struct ib_mw *mw);
struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd);
struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start,
u64 length, u64 virt, int acc,
struct ib_udata *udata);
Expand Down
5 changes: 4 additions & 1 deletion drivers/infiniband/hw/cxgb4/mem.c
Original file line number Diff line number Diff line change
Expand Up @@ -650,7 +650,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
return ERR_PTR(err);
}

struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd)
struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
{
struct c4iw_dev *rhp;
struct c4iw_pd *php;
Expand All @@ -659,6 +659,9 @@ struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd)
u32 stag = 0;
int ret;

if (type != IB_MW_TYPE_1)
return ERR_PTR(-EINVAL);

php = to_c4iw_pd(pd);
rhp = php->rhp;
mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
Expand Down
2 changes: 1 addition & 1 deletion drivers/infiniband/hw/ehca/ehca_iverbs.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr);

int ehca_dereg_mr(struct ib_mr *mr);

struct ib_mw *ehca_alloc_mw(struct ib_pd *pd);
struct ib_mw *ehca_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);

int ehca_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
struct ib_mw_bind *mw_bind);
Expand Down
5 changes: 4 additions & 1 deletion drivers/infiniband/hw/ehca/ehca_mrmw.c
Original file line number Diff line number Diff line change
Expand Up @@ -688,7 +688,7 @@ int ehca_dereg_mr(struct ib_mr *mr)

/*----------------------------------------------------------------------*/

struct ib_mw *ehca_alloc_mw(struct ib_pd *pd)
struct ib_mw *ehca_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
{
struct ib_mw *ib_mw;
u64 h_ret;
Expand All @@ -698,6 +698,9 @@ struct ib_mw *ehca_alloc_mw(struct ib_pd *pd)
container_of(pd->device, struct ehca_shca, ib_device);
struct ehca_mw_hipzout_parms hipzout;

if (type != IB_MW_TYPE_1)
return ERR_PTR(-EINVAL);

e_mw = ehca_mw_new();
if (!e_mw) {
ib_mw = ERR_PTR(-ENOMEM);
Expand Down
19 changes: 11 additions & 8 deletions drivers/infiniband/hw/nes/nes_verbs.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ static void nes_unregister_ofa_device(struct nes_ib_device *nesibdev);
/**
* nes_alloc_mw
*/
static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd) {
static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd, enum ib_mw_type type)
{
struct nes_pd *nespd = to_nespd(ibpd);
struct nes_vnic *nesvnic = to_nesvnic(ibpd->device);
struct nes_device *nesdev = nesvnic->nesdev;
Expand All @@ -71,6 +72,9 @@ static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd) {
u32 driver_key = 0;
u8 stag_key = 0;

if (type != IB_MW_TYPE_1)
return ERR_PTR(-EINVAL);

get_random_bytes(&next_stag_index, sizeof(next_stag_index));
stag_key = (u8)next_stag_index;

Expand Down Expand Up @@ -244,20 +248,19 @@ static int nes_bind_mw(struct ib_qp *ibqp, struct ib_mw *ibmw,
if (ibmw_bind->send_flags & IB_SEND_SIGNALED)
wqe_misc |= NES_IWARP_SQ_WQE_SIGNALED_COMPL;

if (ibmw_bind->mw_access_flags & IB_ACCESS_REMOTE_WRITE) {
if (ibmw_bind->bind_info.mw_access_flags & IB_ACCESS_REMOTE_WRITE)
wqe_misc |= NES_CQP_STAG_RIGHTS_REMOTE_WRITE;
}
if (ibmw_bind->mw_access_flags & IB_ACCESS_REMOTE_READ) {
if (ibmw_bind->bind_info.mw_access_flags & IB_ACCESS_REMOTE_READ)
wqe_misc |= NES_CQP_STAG_RIGHTS_REMOTE_READ;
}

set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_MISC_IDX, wqe_misc);
set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_MR_IDX, ibmw_bind->mr->lkey);
set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_MR_IDX,
ibmw_bind->bind_info.mr->lkey);
set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_MW_IDX, ibmw->rkey);
set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_LENGTH_LOW_IDX,
ibmw_bind->length);
ibmw_bind->bind_info.length);
wqe->wqe_words[NES_IWARP_SQ_BIND_WQE_LENGTH_HIGH_IDX] = 0;
u64temp = (u64)ibmw_bind->addr;
u64temp = (u64)ibmw_bind->bind_info.addr;
set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_VA_FBO_LOW_IDX, u64temp);

head++;
Expand Down
73 changes: 64 additions & 9 deletions include/rdma/ib_verbs.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,8 @@ enum ib_device_cap_flags {
IB_DEVICE_XRC = (1<<20),
IB_DEVICE_MEM_MGT_EXTENSIONS = (1<<21),
IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1<<22),
IB_DEVICE_MEM_WINDOW_TYPE_2A = (1<<23),
IB_DEVICE_MEM_WINDOW_TYPE_2B = (1<<24)
};

enum ib_atomic_cap {
Expand Down Expand Up @@ -715,6 +717,11 @@ enum ib_mig_state {
IB_MIG_ARMED
};

enum ib_mw_type {
IB_MW_TYPE_1 = 1,
IB_MW_TYPE_2 = 2
};

struct ib_qp_attr {
enum ib_qp_state qp_state;
enum ib_qp_state cur_qp_state;
Expand Down Expand Up @@ -758,6 +765,7 @@ enum ib_wr_opcode {
IB_WR_FAST_REG_MR,
IB_WR_MASKED_ATOMIC_CMP_AND_SWP,
IB_WR_MASKED_ATOMIC_FETCH_AND_ADD,
IB_WR_BIND_MW,
};

enum ib_send_flags {
Expand All @@ -780,6 +788,23 @@ struct ib_fast_reg_page_list {
unsigned int max_page_list_len;
};

/**
* struct ib_mw_bind_info - Parameters for a memory window bind operation.
* @mr: A memory region to bind the memory window to.
* @addr: The address where the memory window should begin.
* @length: The length of the memory window, in bytes.
* @mw_access_flags: Access flags from enum ib_access_flags for the window.
*
* This struct contains the shared parameters for type 1 and type 2
* memory window bind operations.
*/
struct ib_mw_bind_info {
struct ib_mr *mr;
u64 addr;
u64 length;
int mw_access_flags;
};

struct ib_send_wr {
struct ib_send_wr *next;
u64 wr_id;
Expand Down Expand Up @@ -823,6 +848,12 @@ struct ib_send_wr {
int access_flags;
u32 rkey;
} fast_reg;
struct {
struct ib_mw *mw;
/* The new rkey for the memory window. */
u32 rkey;
struct ib_mw_bind_info bind_info;
} bind_mw;
} wr;
u32 xrc_remote_srq_num; /* XRC TGT QPs only */
};
Expand All @@ -839,7 +870,8 @@ enum ib_access_flags {
IB_ACCESS_REMOTE_WRITE = (1<<1),
IB_ACCESS_REMOTE_READ = (1<<2),
IB_ACCESS_REMOTE_ATOMIC = (1<<3),
IB_ACCESS_MW_BIND = (1<<4)
IB_ACCESS_MW_BIND = (1<<4),
IB_ZERO_BASED = (1<<5)
};

struct ib_phys_buf {
Expand All @@ -862,13 +894,16 @@ enum ib_mr_rereg_flags {
IB_MR_REREG_ACCESS = (1<<2)
};

/**
* struct ib_mw_bind - Parameters for a type 1 memory window bind operation.
* @wr_id: Work request id.
* @send_flags: Flags from ib_send_flags enum.
* @bind_info: More parameters of the bind operation.
*/
struct ib_mw_bind {
struct ib_mr *mr;
u64 wr_id;
u64 addr;
u32 length;
int send_flags;
int mw_access_flags;
u64 wr_id;
int send_flags;
struct ib_mw_bind_info bind_info;
};

struct ib_fmr_attr {
Expand Down Expand Up @@ -991,6 +1026,7 @@ struct ib_mw {
struct ib_pd *pd;
struct ib_uobject *uobject;
u32 rkey;
enum ib_mw_type type;
};

struct ib_fmr {
Expand Down Expand Up @@ -1202,7 +1238,8 @@ struct ib_device {
int num_phys_buf,
int mr_access_flags,
u64 *iova_start);
struct ib_mw * (*alloc_mw)(struct ib_pd *pd);
struct ib_mw * (*alloc_mw)(struct ib_pd *pd,
enum ib_mw_type type);
int (*bind_mw)(struct ib_qp *qp,
struct ib_mw *mw,
struct ib_mw_bind *mw_bind);
Expand Down Expand Up @@ -2019,6 +2056,8 @@ int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr);
* ib_dereg_mr - Deregisters a memory region and removes it from the
* HCA translation table.
* @mr: The memory region to deregister.
*
* This function can fail, if the memory region has memory windows bound to it.
*/
int ib_dereg_mr(struct ib_mr *mr);

Expand Down Expand Up @@ -2070,11 +2109,23 @@ static inline void ib_update_fast_reg_key(struct ib_mr *mr, u8 newkey)
mr->rkey = (mr->rkey & 0xffffff00) | newkey;
}

/**
* ib_inc_rkey - increments the key portion of the given rkey. Can be used
* for calculating a new rkey for type 2 memory windows.
* @rkey - the rkey to increment.
*/
static inline u32 ib_inc_rkey(u32 rkey)
{
const u32 mask = 0x000000ff;
return ((rkey + 1) & mask) | (rkey & ~mask);
}

/**
* ib_alloc_mw - Allocates a memory window.
* @pd: The protection domain associated with the memory window.
* @type: The type of the memory window (1 or 2).
*/
struct ib_mw *ib_alloc_mw(struct ib_pd *pd);
struct ib_mw *ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);

/**
* ib_bind_mw - Posts a work request to the send queue of the specified
Expand All @@ -2084,6 +2135,10 @@ struct ib_mw *ib_alloc_mw(struct ib_pd *pd);
* @mw: The memory window to bind.
* @mw_bind: Specifies information about the memory window, including
* its address range, remote access rights, and associated memory region.
*
* If there is no immediate error, the function will update the rkey member
* of the mw parameter to its new value. The bind operation can still fail
* asynchronously.
*/
static inline int ib_bind_mw(struct ib_qp *qp,
struct ib_mw *mw,
Expand Down
20 changes: 10 additions & 10 deletions net/sunrpc/xprtrdma/verbs.c
Original file line number Diff line number Diff line change
Expand Up @@ -1086,7 +1086,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
case RPCRDMA_MEMWINDOWS:
/* Allocate one extra request's worth, for full cycling */
for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) {
r->r.mw = ib_alloc_mw(ia->ri_pd);
r->r.mw = ib_alloc_mw(ia->ri_pd, IB_MW_TYPE_1);
if (IS_ERR(r->r.mw)) {
rc = PTR_ERR(r->r.mw);
dprintk("RPC: %s: ib_alloc_mw"
Expand Down Expand Up @@ -1673,12 +1673,12 @@ rpcrdma_register_memwin_external(struct rpcrdma_mr_seg *seg,

*nsegs = 1;
rpcrdma_map_one(ia, seg, writing);
param.mr = ia->ri_bind_mem;
param.bind_info.mr = ia->ri_bind_mem;
param.wr_id = 0ULL; /* no send cookie */
param.addr = seg->mr_dma;
param.length = seg->mr_len;
param.bind_info.addr = seg->mr_dma;
param.bind_info.length = seg->mr_len;
param.send_flags = 0;
param.mw_access_flags = mem_priv;
param.bind_info.mw_access_flags = mem_priv;

DECR_CQCOUNT(&r_xprt->rx_ep);
rc = ib_bind_mw(ia->ri_id->qp, seg->mr_chunk.rl_mw->r.mw, &param);
Expand All @@ -1690,7 +1690,7 @@ rpcrdma_register_memwin_external(struct rpcrdma_mr_seg *seg,
rpcrdma_unmap_one(ia, seg);
} else {
seg->mr_rkey = seg->mr_chunk.rl_mw->r.mw->rkey;
seg->mr_base = param.addr;
seg->mr_base = param.bind_info.addr;
seg->mr_nsegs = 1;
}
return rc;
Expand All @@ -1706,10 +1706,10 @@ rpcrdma_deregister_memwin_external(struct rpcrdma_mr_seg *seg,
int rc;

BUG_ON(seg->mr_nsegs != 1);
param.mr = ia->ri_bind_mem;
param.addr = 0ULL; /* unbind */
param.length = 0;
param.mw_access_flags = 0;
param.bind_info.mr = ia->ri_bind_mem;
param.bind_info.addr = 0ULL; /* unbind */
param.bind_info.length = 0;
param.bind_info.mw_access_flags = 0;
if (*r) {
param.wr_id = (u64) (unsigned long) *r;
param.send_flags = IB_SEND_SIGNALED;
Expand Down

0 comments on commit 7083e42

Please sign in to comment.