Skip to content

Commit

Permalink
IB/iser: Introduce fast memory registration model (FRWR)
Browse files Browse the repository at this point in the history
Newer HCAs and Virtual functions may not support FMRs but rather a fast
registration model, which we call FRWR - "Fast Registration Work Requests".

This model was introduced in 00f7ec3 ("RDMA/core: Add memory management
extensions support") and works when the IB device supports the
IB_DEVICE_MEM_MGT_EXTENSIONS capability.

Upon creating the iser device iser will test whether the HCA supports
FMRs.  If no support for FMRs, check if IB_DEVICE_MEM_MGT_EXTENSIONS
is supported and assign function pointers that handle fast
registration and allocation of appropriate resources (fast_reg
descriptors).

Registration is done using posting IB_WR_FAST_REG_MR to the QP and
invalidations using posting IB_WR_LOCAL_INV.

Signed-off-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
  • Loading branch information
Sagi Grimberg authored and Roland Dreier committed Aug 10, 2013
1 parent e657571 commit 5587856
Show file tree
Hide file tree
Showing 3 changed files with 287 additions and 12 deletions.
21 changes: 20 additions & 1 deletion drivers/infiniband/ulp/iser/iscsi_iser.h
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ struct iser_mem_reg {
u64 va;
u64 len;
void *mem_h;
int is_fmr;
int is_mr;
};

struct iser_regd_buf {
Expand Down Expand Up @@ -277,6 +277,15 @@ struct iser_device {
enum iser_data_dir cmd_dir);
};

struct fast_reg_descriptor {
struct list_head list;
/* For fast registration - FRWR */
struct ib_mr *data_mr;
struct ib_fast_reg_page_list *data_frpl;
/* Valid for fast registration flag */
bool valid;
};

struct iser_conn {
struct iscsi_iser_conn *iser_conn; /* iser conn for upcalls */
struct iscsi_endpoint *ep;
Expand Down Expand Up @@ -307,6 +316,10 @@ struct iser_conn {
struct iser_page_vec *page_vec; /* represents SG to fmr maps*
* maps serialized as tx is*/
} fmr;
struct {
struct list_head pool;
int pool_size;
} frwr;
} fastreg;
};

Expand Down Expand Up @@ -393,6 +406,8 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *task,

int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *task,
enum iser_data_dir cmd_dir);
int iser_reg_rdma_mem_frwr(struct iscsi_iser_task *task,
enum iser_data_dir cmd_dir);

int iser_connect(struct iser_conn *ib_conn,
struct sockaddr_in *src_addr,
Expand All @@ -405,6 +420,8 @@ int iser_reg_page_vec(struct iser_conn *ib_conn,

void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir);
void iser_unreg_mem_frwr(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir);

int iser_post_recvl(struct iser_conn *ib_conn);
int iser_post_recvm(struct iser_conn *ib_conn, int count);
Expand All @@ -421,4 +438,6 @@ int iser_initialize_task_headers(struct iscsi_task *task,
int iser_alloc_rx_descriptors(struct iser_conn *ib_conn, struct iscsi_session *session);
int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max);
void iser_free_fmr_pool(struct iser_conn *ib_conn);
int iser_create_frwr_pool(struct iser_conn *ib_conn, unsigned cmds_max);
void iser_free_frwr_pool(struct iser_conn *ib_conn);
#endif
140 changes: 137 additions & 3 deletions drivers/infiniband/ulp/iser/iser_memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -395,8 +395,7 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
regd_buf = &iser_task->rdma_regd[cmd_dir];

aligned_len = iser_data_buf_aligned_len(mem, ibdev);
if (aligned_len != mem->dma_nents ||
(!ib_conn->fastreg.fmr.pool && mem->dma_nents > 1)) {
if (aligned_len != mem->dma_nents) {
err = fall_to_bounce_buf(iser_task, ibdev,
cmd_dir, aligned_len);
if (err) {
Expand All @@ -414,7 +413,7 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
regd_buf->reg.rkey = device->mr->rkey;
regd_buf->reg.len = ib_sg_dma_len(ibdev, &sg[0]);
regd_buf->reg.va = ib_sg_dma_address(ibdev, &sg[0]);
regd_buf->reg.is_fmr = 0;
regd_buf->reg.is_mr = 0;

iser_dbg("PHYSICAL Mem.register: lkey: 0x%08X rkey: 0x%08X "
"va: 0x%08lX sz: %ld]\n",
Expand Down Expand Up @@ -444,3 +443,138 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
}
return 0;
}

static int iser_fast_reg_mr(struct fast_reg_descriptor *desc,
struct iser_conn *ib_conn,
struct iser_regd_buf *regd_buf,
u32 offset, unsigned int data_size,
unsigned int page_list_len)
{
struct ib_send_wr fastreg_wr, inv_wr;
struct ib_send_wr *bad_wr, *wr = NULL;
u8 key;
int ret;

if (!desc->valid) {
memset(&inv_wr, 0, sizeof(inv_wr));
inv_wr.opcode = IB_WR_LOCAL_INV;
inv_wr.send_flags = IB_SEND_SIGNALED;
inv_wr.ex.invalidate_rkey = desc->data_mr->rkey;
wr = &inv_wr;
/* Bump the key */
key = (u8)(desc->data_mr->rkey & 0x000000FF);
ib_update_fast_reg_key(desc->data_mr, ++key);
}

/* Prepare FASTREG WR */
memset(&fastreg_wr, 0, sizeof(fastreg_wr));
fastreg_wr.opcode = IB_WR_FAST_REG_MR;
fastreg_wr.send_flags = IB_SEND_SIGNALED;
fastreg_wr.wr.fast_reg.iova_start = desc->data_frpl->page_list[0] + offset;
fastreg_wr.wr.fast_reg.page_list = desc->data_frpl;
fastreg_wr.wr.fast_reg.page_list_len = page_list_len;
fastreg_wr.wr.fast_reg.page_shift = SHIFT_4K;
fastreg_wr.wr.fast_reg.length = data_size;
fastreg_wr.wr.fast_reg.rkey = desc->data_mr->rkey;
fastreg_wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE |
IB_ACCESS_REMOTE_WRITE |
IB_ACCESS_REMOTE_READ);

if (!wr) {
wr = &fastreg_wr;
atomic_inc(&ib_conn->post_send_buf_count);
} else {
wr->next = &fastreg_wr;
atomic_add(2, &ib_conn->post_send_buf_count);
}

ret = ib_post_send(ib_conn->qp, wr, &bad_wr);
if (ret) {
if (bad_wr->next)
atomic_sub(2, &ib_conn->post_send_buf_count);
else
atomic_dec(&ib_conn->post_send_buf_count);
iser_err("fast registration failed, ret:%d\n", ret);
return ret;
}
desc->valid = false;

regd_buf->reg.mem_h = desc;
regd_buf->reg.lkey = desc->data_mr->lkey;
regd_buf->reg.rkey = desc->data_mr->rkey;
regd_buf->reg.va = desc->data_frpl->page_list[0] + offset;
regd_buf->reg.len = data_size;
regd_buf->reg.is_mr = 1;

return ret;
}

/**
* iser_reg_rdma_mem_frwr - Registers memory intended for RDMA,
* using Fast Registration WR (if possible) obtaining rkey and va
*
* returns 0 on success, errno code on failure
*/
int iser_reg_rdma_mem_frwr(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir)
{
struct iser_conn *ib_conn = iser_task->iser_conn->ib_conn;
struct iser_device *device = ib_conn->device;
struct ib_device *ibdev = device->ib_device;
struct iser_data_buf *mem = &iser_task->data[cmd_dir];
struct iser_regd_buf *regd_buf = &iser_task->rdma_regd[cmd_dir];
struct fast_reg_descriptor *desc;
unsigned int data_size, page_list_len;
int err, aligned_len;
unsigned long flags;
u32 offset;

aligned_len = iser_data_buf_aligned_len(mem, ibdev);
if (aligned_len != mem->dma_nents) {
err = fall_to_bounce_buf(iser_task, ibdev,
cmd_dir, aligned_len);
if (err) {
iser_err("failed to allocate bounce buffer\n");
return err;
}
mem = &iser_task->data_copy[cmd_dir];
}

/* if there a single dma entry, dma mr suffices */
if (mem->dma_nents == 1) {
struct scatterlist *sg = (struct scatterlist *)mem->buf;

regd_buf->reg.lkey = device->mr->lkey;
regd_buf->reg.rkey = device->mr->rkey;
regd_buf->reg.len = ib_sg_dma_len(ibdev, &sg[0]);
regd_buf->reg.va = ib_sg_dma_address(ibdev, &sg[0]);
regd_buf->reg.is_mr = 0;
} else {
spin_lock_irqsave(&ib_conn->lock, flags);
desc = list_first_entry(&ib_conn->fastreg.frwr.pool,
struct fast_reg_descriptor, list);
list_del(&desc->list);
spin_unlock_irqrestore(&ib_conn->lock, flags);
page_list_len = iser_sg_to_page_vec(mem, device->ib_device,
desc->data_frpl->page_list,
&offset, &data_size);

if (page_list_len * SIZE_4K < data_size) {
iser_err("fast reg page_list too short to hold this SG\n");
err = -EINVAL;
goto err_reg;
}

err = iser_fast_reg_mr(desc, ib_conn, regd_buf,
offset, data_size, page_list_len);
if (err)
goto err_reg;
}

return 0;
err_reg:
spin_lock_irqsave(&ib_conn->lock, flags);
list_add_tail(&desc->list, &ib_conn->fastreg.frwr.pool);
spin_unlock_irqrestore(&ib_conn->lock, flags);
return err;
}
Loading

0 comments on commit 5587856

Please sign in to comment.