Skip to content

Commit

Permalink
---
Browse files Browse the repository at this point in the history
yaml
---
r: 214381
b: refs/heads/master
c: 15133f6
h: refs/heads/master
i:
  214379: a254b13
v: v3
  • Loading branch information
Andy Grover committed Sep 9, 2010
1 parent ab03251 commit 86b0154
Show file tree
Hide file tree
Showing 10 changed files with 322 additions and 24 deletions.
2 changes: 1 addition & 1 deletion [refs]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
---
refs/heads/master: a63273d4992603979ddb181b6a8f07082839b39f
refs/heads/master: 15133f6e67d8d646d0744336b4daa3135452cb0d
19 changes: 19 additions & 0 deletions trunk/include/linux/rds.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@
#define RDS_CMSG_RDMA_MAP 3
#define RDS_CMSG_RDMA_STATUS 4
#define RDS_CMSG_CONG_UPDATE 5
#define RDS_CMSG_ATOMIC_FADD 6
#define RDS_CMSG_ATOMIC_CSWP 7

#define RDS_INFO_FIRST 10000
#define RDS_INFO_COUNTERS 10000
Expand Down Expand Up @@ -237,6 +239,23 @@ struct rds_rdma_args {
u_int64_t user_token;
};

struct rds_atomic_args {
rds_rdma_cookie_t cookie;
uint64_t local_addr;
uint64_t remote_addr;
union {
struct {
uint64_t compare;
uint64_t swap;
} cswp;
struct {
uint64_t add;
} fadd;
};
uint64_t flags;
uint64_t user_token;
};

struct rds_rdma_notify {
u_int64_t user_token;
int32_t status;
Expand Down
1 change: 1 addition & 0 deletions trunk/net/rds/ib.c
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,7 @@ struct rds_transport rds_ib_transport = {
.xmit = rds_ib_xmit,
.xmit_cong_map = NULL,
.xmit_rdma = rds_ib_xmit_rdma,
.xmit_atomic = rds_ib_xmit_atomic,
.recv = rds_ib_recv,
.conn_alloc = rds_ib_conn_alloc,
.conn_free = rds_ib_conn_free,
Expand Down
1 change: 1 addition & 0 deletions trunk/net/rds/ib.h
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,7 @@ void rds_ib_send_add_credits(struct rds_connection *conn, unsigned int credits);
void rds_ib_advertise_credits(struct rds_connection *conn, unsigned int posted);
int rds_ib_send_grab_credits(struct rds_ib_connection *ic, u32 wanted,
u32 *adv_credits, int need_posted, int max_posted);
int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op);

/* ib_stats.c */
DECLARE_PER_CPU(struct rds_ib_statistics, rds_ib_stats);
Expand Down
4 changes: 3 additions & 1 deletion trunk/net/rds/ib_rdma.c
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,9 @@ static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev)
ibmr->fmr = ib_alloc_fmr(rds_ibdev->pd,
(IB_ACCESS_LOCAL_WRITE |
IB_ACCESS_REMOTE_READ |
IB_ACCESS_REMOTE_WRITE),
IB_ACCESS_REMOTE_WRITE|
IB_ACCESS_REMOTE_ATOMIC),

&pool->fmr_attr);
if (IS_ERR(ibmr->fmr)) {
err = PTR_ERR(ibmr->fmr);
Expand Down
140 changes: 124 additions & 16 deletions trunk/net/rds/ib_send.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,15 +62,17 @@ static void rds_ib_send_rdma_complete(struct rds_message *rm,
rds_rdma_send_complete(rm, notify_status);
}

static void rds_ib_send_unmap_rdma(struct rds_ib_connection *ic,
struct rds_rdma_op *op)
static void rds_ib_send_atomic_complete(struct rds_message *rm,
int wc_status)
{
if (op->r_mapped) {
ib_dma_unmap_sg(ic->i_cm_id->device,
op->r_sg, op->r_nents,
op->r_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
op->r_mapped = 0;
}
int notify_status;

if (wc_status != IB_WC_SUCCESS)
notify_status = RDS_RDMA_OTHER_ERROR;
else
notify_status = RDS_RDMA_SUCCESS;

rds_atomic_send_complete(rm, notify_status);
}

static void rds_ib_send_unmap_rm(struct rds_ib_connection *ic,
Expand All @@ -86,7 +88,14 @@ static void rds_ib_send_unmap_rm(struct rds_ib_connection *ic,
DMA_TO_DEVICE);

if (rm->rdma.m_rdma_op.r_active) {
rds_ib_send_unmap_rdma(ic, &rm->rdma.m_rdma_op);
struct rds_rdma_op *op = &rm->rdma.m_rdma_op;

if (op->r_mapped) {
ib_dma_unmap_sg(ic->i_cm_id->device,
op->r_sg, op->r_nents,
op->r_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
op->r_mapped = 0;
}

/* If the user asked for a completion notification on this
* message, we can implement three different semantics:
Expand Down Expand Up @@ -116,6 +125,24 @@ static void rds_ib_send_unmap_rm(struct rds_ib_connection *ic,
rds_stats_add(s_recv_rdma_bytes, rm->rdma.m_rdma_op.r_bytes);
}

if (rm->atomic.op_active) {
struct rm_atomic_op *op = &rm->atomic;

/* unmap atomic recvbuf */
if (op->op_mapped) {
ib_dma_unmap_sg(ic->i_cm_id->device, op->op_sg, 1,
DMA_FROM_DEVICE);
op->op_mapped = 0;
}

rds_ib_send_atomic_complete(rm, wc_status);

if (rm->atomic.op_type == RDS_ATOMIC_TYPE_CSWP)
rds_stats_inc(s_atomic_cswp);
else
rds_stats_inc(s_atomic_fadd);
}

/* If anyone waited for this message to get flushed out, wake
* them up now */
rds_message_unmapped(rm);
Expand Down Expand Up @@ -158,12 +185,9 @@ void rds_ib_send_clear_ring(struct rds_ib_connection *ic)
u32 i;

for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) {
if (send->s_wr.opcode == 0xdead)
if (!send->s_rm || send->s_wr.opcode == 0xdead)
continue;
if (send->s_rm)
rds_ib_send_unmap_rm(ic, send, IB_WC_WR_FLUSH_ERR);
if (send->s_op)
rds_ib_send_unmap_rdma(ic, send->s_op);
rds_ib_send_unmap_rm(ic, send, IB_WC_WR_FLUSH_ERR);
}
}

Expand Down Expand Up @@ -218,6 +242,8 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context)
break;
case IB_WR_RDMA_WRITE:
case IB_WR_RDMA_READ:
case IB_WR_ATOMIC_FETCH_AND_ADD:
case IB_WR_ATOMIC_CMP_AND_SWP:
/* Nothing to be done - the SG list will be unmapped
* when the SEND completes. */
break;
Expand All @@ -243,8 +269,7 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context)

rm = rds_send_get_message(conn, send->s_op);
if (rm) {
if (rm->rdma.m_rdma_op.r_active)
rds_ib_send_unmap_rdma(ic, &rm->rdma.m_rdma_op);
rds_ib_send_unmap_rm(ic, send, wc.status);
rds_ib_send_rdma_complete(rm, wc.status);
rds_message_put(rm);
}
Expand Down Expand Up @@ -736,6 +761,89 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
return ret;
}

/*
* Issue atomic operation.
* A simplified version of the rdma case, we always map 1 SG, and
* only 8 bytes, for the return value from the atomic operation.
*/
int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op)
{
struct rds_ib_connection *ic = conn->c_transport_data;
struct rds_ib_send_work *send = NULL;
struct ib_send_wr *failed_wr;
struct rds_ib_device *rds_ibdev;
u32 pos;
u32 work_alloc;
int ret;

rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client);

work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, 1, &pos);
if (work_alloc != 1) {
rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
rds_ib_stats_inc(s_ib_tx_ring_full);
ret = -ENOMEM;
goto out;
}

/* address of send request in ring */
send = &ic->i_sends[pos];
send->s_queued = jiffies;

if (op->op_type == RDS_ATOMIC_TYPE_CSWP) {
send->s_wr.opcode = IB_WR_ATOMIC_CMP_AND_SWP;
send->s_wr.wr.atomic.compare_add = op->op_compare;
send->s_wr.wr.atomic.swap = op->op_swap_add;
} else { /* FADD */
send->s_wr.opcode = IB_WR_ATOMIC_FETCH_AND_ADD;
send->s_wr.wr.atomic.compare_add = op->op_swap_add;
send->s_wr.wr.atomic.swap = 0;
}
send->s_wr.send_flags = IB_SEND_SIGNALED;
send->s_wr.num_sge = 1;
send->s_wr.next = NULL;
send->s_wr.wr.atomic.remote_addr = op->op_remote_addr;
send->s_wr.wr.atomic.rkey = op->op_rkey;

/* map 8 byte retval buffer to the device */
ret = ib_dma_map_sg(ic->i_cm_id->device, op->op_sg, 1, DMA_FROM_DEVICE);
rdsdebug("ic %p mapping atomic op %p. mapped %d pg\n", ic, op, ret);
if (ret != 1) {
rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
rds_ib_stats_inc(s_ib_tx_sg_mapping_failure);
ret = -ENOMEM; /* XXX ? */
goto out;
}

/* Convert our struct scatterlist to struct ib_sge */
send->s_sge[0].addr = ib_sg_dma_address(ic->i_cm_id->device, op->op_sg);
send->s_sge[0].length = ib_sg_dma_len(ic->i_cm_id->device, op->op_sg);
send->s_sge[0].lkey = ic->i_mr->lkey;

rdsdebug("rva %Lx rpa %Lx len %u\n", op->op_remote_addr,
send->s_sge[0].addr, send->s_sge[0].length);

failed_wr = &send->s_wr;
ret = ib_post_send(ic->i_cm_id->qp, &send->s_wr, &failed_wr);
rdsdebug("ic %p send %p (wr %p) ret %d wr %p\n", ic,
send, &send->s_wr, ret, failed_wr);
BUG_ON(failed_wr != &send->s_wr);
if (ret) {
printk(KERN_WARNING "RDS/IB: atomic ib_post_send to %pI4 "
"returned %d\n", &conn->c_faddr, ret);
rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
goto out;
}

if (unlikely(failed_wr != &send->s_wr)) {
printk(KERN_WARNING "RDS/IB: atomic ib_post_send() rc=%d, but failed_wqe updated!\n", ret);
BUG_ON(failed_wr != &send->s_wr);
}

out:
return ret;
}

int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
{
struct rds_ib_connection *ic = conn->c_transport_data;
Expand Down
73 changes: 73 additions & 0 deletions trunk/net/rds/rdma.c
Original file line number Diff line number Diff line change
Expand Up @@ -719,3 +719,76 @@ int rds_cmsg_rdma_map(struct rds_sock *rs, struct rds_message *rm,

return __rds_rdma_map(rs, CMSG_DATA(cmsg), &rm->m_rdma_cookie, &rm->rdma.m_rdma_mr);
}

/*
* Fill in rds_message for an atomic request.
*/
int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm,
struct cmsghdr *cmsg)
{
struct page *page = NULL;
struct rds_atomic_args *args;
int ret = 0;

if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct rds_atomic_args))
|| rm->atomic.op_active)
return -EINVAL;

args = CMSG_DATA(cmsg);

if (cmsg->cmsg_type == RDS_CMSG_ATOMIC_CSWP) {
rm->atomic.op_type = RDS_ATOMIC_TYPE_CSWP;
rm->atomic.op_swap_add = args->cswp.swap;
rm->atomic.op_compare = args->cswp.compare;
} else {
rm->atomic.op_type = RDS_ATOMIC_TYPE_FADD;
rm->atomic.op_swap_add = args->fadd.add;
}

rm->m_rdma_cookie = args->cookie;
rm->atomic.op_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME);
rm->atomic.op_recverr = rs->rs_recverr;
rm->atomic.op_sg = rds_message_alloc_sgs(rm, 1);

/* verify 8 byte-aligned */
if (args->local_addr & 0x7) {
ret = -EFAULT;
goto err;
}

ret = rds_pin_pages(args->local_addr, 1, &page, 1);
if (ret != 1)
goto err;
ret = 0;

sg_set_page(rm->atomic.op_sg, page, 8, offset_in_page(args->local_addr));

if (rm->atomic.op_notify || rm->atomic.op_recverr) {
/* We allocate an uninitialized notifier here, because
* we don't want to do that in the completion handler. We
* would have to use GFP_ATOMIC there, and don't want to deal
* with failed allocations.
*/
rm->atomic.op_notifier = kmalloc(sizeof(*rm->atomic.op_notifier), GFP_KERNEL);
if (!rm->atomic.op_notifier) {
ret = -ENOMEM;
goto err;
}

rm->atomic.op_notifier->n_user_token = args->user_token;
rm->atomic.op_notifier->n_status = RDS_RDMA_SUCCESS;
}

rm->atomic.op_rkey = rds_rdma_cookie_key(rm->m_rdma_cookie);
rm->atomic.op_remote_addr = args->remote_addr + rds_rdma_cookie_offset(args->cookie);

rm->atomic.op_active = 1;

return ret;
err:
if (page)
put_page(page);
kfree(rm->atomic.op_notifier);

return ret;
}
Loading

0 comments on commit 86b0154

Please sign in to comment.