Skip to content

Commit

Permalink
IB/umad: Add support for large RMPP transfers
Browse files Browse the repository at this point in the history
Add support for sending and receiving large RMPP transfers.  The old
code supports transfers only as large as a single contiguous kernel
memory allocation.  This patch uses linked list of memory buffers when
sending and receiving data to avoid needing contiguous pages for
larger transfers.

  Receive side: copy the arriving MADs in chunks instead of coalescing
  to one large buffer in kernel space.

  Send side: split a multipacket MAD buffer to a list of segments,
  (multipacket_list) and send these using a gather list of size 2.
  Also, save pointer to last sent segment, and retrieve requested
  segments by walking list starting at last sent segment. Finally,
  save pointer to last-acked segment.  When retrying, retrieve
  segments for resending relative to this pointer.  When updating last
  ack, start at this pointer.

Signed-off-by: Jack Morgenstein <jackm@mellanox.co.il>
Signed-off-by: Sean Hefty <sean.hefty@intel.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
  • Loading branch information
Jack Morgenstein authored and Roland Dreier committed Mar 20, 2006
1 parent 6ecb0c8 commit f36e179
Show file tree
Hide file tree
Showing 5 changed files with 376 additions and 227 deletions.
166 changes: 135 additions & 31 deletions drivers/infiniband/core/mad.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* $Id: mad.c 2817 2005-07-07 11:29:26Z halr $
* $Id: mad.c 5596 2006-03-03 01:00:07Z sean.hefty $
*/
#include <linux/dma-mapping.h>

Expand Down Expand Up @@ -765,18 +765,67 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
return ret;
}

static int get_buf_length(int hdr_len, int data_len)
static int get_pad_size(int hdr_len, int data_len)
{
int seg_size, pad;

seg_size = sizeof(struct ib_mad) - hdr_len;
if (data_len && seg_size) {
pad = seg_size - data_len % seg_size;
if (pad == seg_size)
pad = 0;
return pad == seg_size ? 0 : pad;
} else
pad = seg_size;
return hdr_len + data_len + pad;
return seg_size;
}

static void free_send_rmpp_list(struct ib_mad_send_wr_private *mad_send_wr)
{
struct ib_rmpp_segment *s, *t;

list_for_each_entry_safe(s, t, &mad_send_wr->rmpp_list, list) {
list_del(&s->list);
kfree(s);
}
}

static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr,
gfp_t gfp_mask)
{
struct ib_mad_send_buf *send_buf = &send_wr->send_buf;
struct ib_rmpp_mad *rmpp_mad = send_buf->mad;
struct ib_rmpp_segment *seg = NULL;
int left, seg_size, pad;

send_buf->seg_size = sizeof (struct ib_mad) - send_buf->hdr_len;
seg_size = send_buf->seg_size;
pad = send_wr->pad;

/* Allocate data segments. */
for (left = send_buf->data_len + pad; left > 0; left -= seg_size) {
seg = kmalloc(sizeof (*seg) + seg_size, gfp_mask);
if (!seg) {
printk(KERN_ERR "alloc_send_rmpp_segs: RMPP mem "
"alloc failed for len %zd, gfp %#x\n",
sizeof (*seg) + seg_size, gfp_mask);
free_send_rmpp_list(send_wr);
return -ENOMEM;
}
seg->num = ++send_buf->seg_count;
list_add_tail(&seg->list, &send_wr->rmpp_list);
}

/* Zero any padding */
if (pad)
memset(seg->data + seg_size - pad, 0, pad);

rmpp_mad->rmpp_hdr.rmpp_version = send_wr->mad_agent_priv->
agent.rmpp_version;
rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_DATA;
ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE);

send_wr->cur_seg = container_of(send_wr->rmpp_list.next,
struct ib_rmpp_segment, list);
send_wr->last_ack_seg = send_wr->cur_seg;
return 0;
}

struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
Expand All @@ -787,46 +836,52 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
{
struct ib_mad_agent_private *mad_agent_priv;
struct ib_mad_send_wr_private *mad_send_wr;
int buf_size;
int pad, message_size, ret, size;
void *buf;

mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private,
agent);
buf_size = get_buf_length(hdr_len, data_len);
pad = get_pad_size(hdr_len, data_len);
message_size = hdr_len + data_len + pad;

if ((!mad_agent->rmpp_version &&
(rmpp_active || buf_size > sizeof(struct ib_mad))) ||
(!rmpp_active && buf_size > sizeof(struct ib_mad)))
(rmpp_active || message_size > sizeof(struct ib_mad))) ||
(!rmpp_active && message_size > sizeof(struct ib_mad)))
return ERR_PTR(-EINVAL);

buf = kzalloc(sizeof *mad_send_wr + buf_size, gfp_mask);
size = rmpp_active ? hdr_len : sizeof(struct ib_mad);
buf = kzalloc(sizeof *mad_send_wr + size, gfp_mask);
if (!buf)
return ERR_PTR(-ENOMEM);

mad_send_wr = buf + buf_size;
mad_send_wr = buf + size;
INIT_LIST_HEAD(&mad_send_wr->rmpp_list);
mad_send_wr->send_buf.mad = buf;
mad_send_wr->send_buf.hdr_len = hdr_len;
mad_send_wr->send_buf.data_len = data_len;
mad_send_wr->pad = pad;

mad_send_wr->mad_agent_priv = mad_agent_priv;
mad_send_wr->sg_list[0].length = buf_size;
mad_send_wr->sg_list[0].length = hdr_len;
mad_send_wr->sg_list[0].lkey = mad_agent->mr->lkey;
mad_send_wr->sg_list[1].length = sizeof(struct ib_mad) - hdr_len;
mad_send_wr->sg_list[1].lkey = mad_agent->mr->lkey;

mad_send_wr->send_wr.wr_id = (unsigned long) mad_send_wr;
mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list;
mad_send_wr->send_wr.num_sge = 1;
mad_send_wr->send_wr.num_sge = 2;
mad_send_wr->send_wr.opcode = IB_WR_SEND;
mad_send_wr->send_wr.send_flags = IB_SEND_SIGNALED;
mad_send_wr->send_wr.wr.ud.remote_qpn = remote_qpn;
mad_send_wr->send_wr.wr.ud.remote_qkey = IB_QP_SET_QKEY;
mad_send_wr->send_wr.wr.ud.pkey_index = pkey_index;

if (rmpp_active) {
struct ib_rmpp_mad *rmpp_mad = mad_send_wr->send_buf.mad;
rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(hdr_len -
IB_MGMT_RMPP_HDR + data_len);
rmpp_mad->rmpp_hdr.rmpp_version = mad_agent->rmpp_version;
rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_DATA;
ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr,
IB_MGMT_RMPP_FLAG_ACTIVE);
ret = alloc_send_rmpp_list(mad_send_wr, gfp_mask);
if (ret) {
kfree(buf);
return ERR_PTR(ret);
}
}

mad_send_wr->send_buf.mad_agent = mad_agent;
Expand All @@ -835,14 +890,50 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
}
EXPORT_SYMBOL(ib_create_send_mad);

void *ib_get_rmpp_segment(struct ib_mad_send_buf *send_buf, int seg_num)
{
struct ib_mad_send_wr_private *mad_send_wr;
struct list_head *list;

mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private,
send_buf);
list = &mad_send_wr->cur_seg->list;

if (mad_send_wr->cur_seg->num < seg_num) {
list_for_each_entry(mad_send_wr->cur_seg, list, list)
if (mad_send_wr->cur_seg->num == seg_num)
break;
} else if (mad_send_wr->cur_seg->num > seg_num) {
list_for_each_entry_reverse(mad_send_wr->cur_seg, list, list)
if (mad_send_wr->cur_seg->num == seg_num)
break;
}
return mad_send_wr->cur_seg->data;
}
EXPORT_SYMBOL(ib_get_rmpp_segment);

static inline void *ib_get_payload(struct ib_mad_send_wr_private *mad_send_wr)
{
if (mad_send_wr->send_buf.seg_count)
return ib_get_rmpp_segment(&mad_send_wr->send_buf,
mad_send_wr->seg_num);
else
return mad_send_wr->send_buf.mad +
mad_send_wr->send_buf.hdr_len;
}

void ib_free_send_mad(struct ib_mad_send_buf *send_buf)
{
struct ib_mad_agent_private *mad_agent_priv;
struct ib_mad_send_wr_private *mad_send_wr;

mad_agent_priv = container_of(send_buf->mad_agent,
struct ib_mad_agent_private, agent);
kfree(send_buf->mad);
mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private,
send_buf);

free_send_rmpp_list(mad_send_wr);
kfree(send_buf->mad);
if (atomic_dec_and_test(&mad_agent_priv->refcount))
wake_up(&mad_agent_priv->wait);
}
Expand All @@ -865,10 +956,17 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)

mad_agent = mad_send_wr->send_buf.mad_agent;
sge = mad_send_wr->sg_list;
sge->addr = dma_map_single(mad_agent->device->dma_device,
mad_send_wr->send_buf.mad, sge->length,
DMA_TO_DEVICE);
pci_unmap_addr_set(mad_send_wr, mapping, sge->addr);
sge[0].addr = dma_map_single(mad_agent->device->dma_device,
mad_send_wr->send_buf.mad,
sge[0].length,
DMA_TO_DEVICE);
pci_unmap_addr_set(mad_send_wr, header_mapping, sge[0].addr);

sge[1].addr = dma_map_single(mad_agent->device->dma_device,
ib_get_payload(mad_send_wr),
sge[1].length,
DMA_TO_DEVICE);
pci_unmap_addr_set(mad_send_wr, payload_mapping, sge[1].addr);

spin_lock_irqsave(&qp_info->send_queue.lock, flags);
if (qp_info->send_queue.count < qp_info->send_queue.max_active) {
Expand All @@ -885,11 +983,14 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
list_add_tail(&mad_send_wr->mad_list.list, list);
}
spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
if (ret)
if (ret) {
dma_unmap_single(mad_agent->device->dma_device,
pci_unmap_addr(mad_send_wr, mapping),
sge->length, DMA_TO_DEVICE);

pci_unmap_addr(mad_send_wr, header_mapping),
sge[0].length, DMA_TO_DEVICE);
dma_unmap_single(mad_agent->device->dma_device,
pci_unmap_addr(mad_send_wr, payload_mapping),
sge[1].length, DMA_TO_DEVICE);
}
return ret;
}

Expand Down Expand Up @@ -1860,8 +1961,11 @@ static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv,

retry:
dma_unmap_single(mad_send_wr->send_buf.mad_agent->device->dma_device,
pci_unmap_addr(mad_send_wr, mapping),
pci_unmap_addr(mad_send_wr, header_mapping),
mad_send_wr->sg_list[0].length, DMA_TO_DEVICE);
dma_unmap_single(mad_send_wr->send_buf.mad_agent->device->dma_device,
pci_unmap_addr(mad_send_wr, payload_mapping),
mad_send_wr->sg_list[1].length, DMA_TO_DEVICE);
queued_send_wr = NULL;
spin_lock_irqsave(&send_queue->lock, flags);
list_del(&mad_list->list);
Expand Down
16 changes: 12 additions & 4 deletions drivers/infiniband/core/mad_priv.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* $Id: mad_priv.h 2730 2005-06-28 16:43:03Z sean.hefty $
* $Id: mad_priv.h 5596 2006-03-03 01:00:07Z sean.hefty $
*/

#ifndef __IB_MAD_PRIV_H__
Expand Down Expand Up @@ -85,6 +85,12 @@ struct ib_mad_private {
} mad;
} __attribute__ ((packed));

struct ib_rmpp_segment {
struct list_head list;
u32 num;
u8 data[0];
};

struct ib_mad_agent_private {
struct list_head agent_list;
struct ib_mad_agent agent;
Expand Down Expand Up @@ -119,7 +125,8 @@ struct ib_mad_send_wr_private {
struct list_head agent_list;
struct ib_mad_agent_private *mad_agent_priv;
struct ib_mad_send_buf send_buf;
DECLARE_PCI_UNMAP_ADDR(mapping)
DECLARE_PCI_UNMAP_ADDR(header_mapping)
DECLARE_PCI_UNMAP_ADDR(payload_mapping)
struct ib_send_wr send_wr;
struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG];
__be64 tid;
Expand All @@ -130,11 +137,12 @@ struct ib_mad_send_wr_private {
enum ib_wc_status status;

/* RMPP control */
struct list_head rmpp_list;
struct ib_rmpp_segment *last_ack_seg;
struct ib_rmpp_segment *cur_seg;
int last_ack;
int seg_num;
int newwin;
int total_seg;
int data_offset;
int pad;
};

Expand Down
Loading

0 comments on commit f36e179

Please sign in to comment.