Skip to content

Commit

Permalink
IB/ipath: Performance improvements via mmap of queues
Browse files Browse the repository at this point in the history
Improve performance of userspace post receive, post SRQ receive, and
poll CQ operations for ipath by allowing userspace to directly mmap()
receive queues and completion queues.  This eliminates the copying
between userspace and the kernel in the data path.

Signed-off-by: Ralph Campbell <ralph.campbell@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
  • Loading branch information
Ralph Campbell authored and Roland Dreier committed Sep 22, 2006
1 parent 9bc57e2 commit 373d991
Show file tree
Hide file tree
Showing 9 changed files with 785 additions and 382 deletions.
1 change: 1 addition & 0 deletions drivers/infiniband/hw/ipath/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ ib_ipath-y := \
ipath_cq.o \
ipath_keys.o \
ipath_mad.o \
ipath_mmap.o \
ipath_mr.o \
ipath_qp.o \
ipath_rc.o \
Expand Down
176 changes: 133 additions & 43 deletions drivers/infiniband/hw/ipath/ipath_cq.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,20 +42,28 @@
* @entry: work completion entry to add
* @sig: true if @entry is a solicitated entry
*
* This may be called with one of the qp->s_lock or qp->r_rq.lock held.
* This may be called with qp->s_lock held.
*/
void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
{
struct ipath_cq_wc *wc = cq->queue;
unsigned long flags;
u32 head;
u32 next;

spin_lock_irqsave(&cq->lock, flags);

if (cq->head == cq->ibcq.cqe)
/*
* Note that the head pointer might be writable by user processes.
* Take care to verify it is a sane value.
*/
head = wc->head;
if (head >= (unsigned) cq->ibcq.cqe) {
head = cq->ibcq.cqe;
next = 0;
else
next = cq->head + 1;
if (unlikely(next == cq->tail)) {
} else
next = head + 1;
if (unlikely(next == wc->tail)) {
spin_unlock_irqrestore(&cq->lock, flags);
if (cq->ibcq.event_handler) {
struct ib_event ev;
Expand All @@ -67,8 +75,8 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
}
return;
}
cq->queue[cq->head] = *entry;
cq->head = next;
wc->queue[head] = *entry;
wc->head = next;

if (cq->notify == IB_CQ_NEXT_COMP ||
(cq->notify == IB_CQ_SOLICITED && solicited)) {
Expand Down Expand Up @@ -101,19 +109,20 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
{
struct ipath_cq *cq = to_icq(ibcq);
struct ipath_cq_wc *wc = cq->queue;
unsigned long flags;
int npolled;

spin_lock_irqsave(&cq->lock, flags);

for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
if (cq->tail == cq->head)
if (wc->tail == wc->head)
break;
*entry = cq->queue[cq->tail];
if (cq->tail == cq->ibcq.cqe)
cq->tail = 0;
*entry = wc->queue[wc->tail];
if (wc->tail >= cq->ibcq.cqe)
wc->tail = 0;
else
cq->tail++;
wc->tail++;
}

spin_unlock_irqrestore(&cq->lock, flags);
Expand Down Expand Up @@ -160,38 +169,74 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries,
{
struct ipath_ibdev *dev = to_idev(ibdev);
struct ipath_cq *cq;
struct ib_wc *wc;
struct ipath_cq_wc *wc;
struct ib_cq *ret;

if (entries > ib_ipath_max_cqes) {
ret = ERR_PTR(-EINVAL);
goto bail;
goto done;
}

if (dev->n_cqs_allocated == ib_ipath_max_cqs) {
ret = ERR_PTR(-ENOMEM);
goto bail;
goto done;
}

/*
* Need to use vmalloc() if we want to support large #s of
* entries.
*/
/* Allocate the completion queue structure. */
cq = kmalloc(sizeof(*cq), GFP_KERNEL);
if (!cq) {
ret = ERR_PTR(-ENOMEM);
goto bail;
goto done;
}

/*
* Need to use vmalloc() if we want to support large #s of entries.
* Allocate the completion queue entries and head/tail pointers.
* This is allocated separately so that it can be resized and
* also mapped into user space.
* We need to use vmalloc() in order to support mmap and large
* numbers of entries.
*/
wc = vmalloc(sizeof(*wc) * (entries + 1));
wc = vmalloc_user(sizeof(*wc) + sizeof(struct ib_wc) * entries);
if (!wc) {
kfree(cq);
ret = ERR_PTR(-ENOMEM);
goto bail;
goto bail_cq;
}

/*
* Return the address of the WC as the offset to mmap.
* See ipath_mmap() for details.
*/
if (udata && udata->outlen >= sizeof(__u64)) {
struct ipath_mmap_info *ip;
__u64 offset = (__u64) wc;
int err;

err = ib_copy_to_udata(udata, &offset, sizeof(offset));
if (err) {
ret = ERR_PTR(err);
goto bail_wc;
}

/* Allocate info for ipath_mmap(). */
ip = kmalloc(sizeof(*ip), GFP_KERNEL);
if (!ip) {
ret = ERR_PTR(-ENOMEM);
goto bail_wc;
}
cq->ip = ip;
ip->context = context;
ip->obj = wc;
kref_init(&ip->ref);
ip->mmap_cnt = 0;
ip->size = PAGE_ALIGN(sizeof(*wc) +
sizeof(struct ib_wc) * entries);
spin_lock_irq(&dev->pending_lock);
ip->next = dev->pending_mmaps;
dev->pending_mmaps = ip;
spin_unlock_irq(&dev->pending_lock);
} else
cq->ip = NULL;

/*
* ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe.
* The number of entries should be >= the number requested or return
Expand All @@ -202,15 +247,22 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries,
cq->triggered = 0;
spin_lock_init(&cq->lock);
tasklet_init(&cq->comptask, send_complete, (unsigned long)cq);
cq->head = 0;
cq->tail = 0;
wc->head = 0;
wc->tail = 0;
cq->queue = wc;

ret = &cq->ibcq;

dev->n_cqs_allocated++;
goto done;

bail:
bail_wc:
vfree(wc);

bail_cq:
kfree(cq);

done:
return ret;
}

Expand All @@ -229,7 +281,10 @@ int ipath_destroy_cq(struct ib_cq *ibcq)

tasklet_kill(&cq->comptask);
dev->n_cqs_allocated--;
vfree(cq->queue);
if (cq->ip)
kref_put(&cq->ip->ref, ipath_release_mmap_info);
else
vfree(cq->queue);
kfree(cq);

return 0;
Expand All @@ -253,7 +308,7 @@ int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
spin_lock_irqsave(&cq->lock, flags);
/*
* Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow
* any other transitions.
* any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2).
*/
if (cq->notify != IB_CQ_NEXT_COMP)
cq->notify = notify;
Expand All @@ -264,46 +319,81 @@ int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
{
struct ipath_cq *cq = to_icq(ibcq);
struct ib_wc *wc, *old_wc;
u32 n;
struct ipath_cq_wc *old_wc = cq->queue;
struct ipath_cq_wc *wc;
u32 head, tail, n;
int ret;

/*
* Need to use vmalloc() if we want to support large #s of entries.
*/
wc = vmalloc(sizeof(*wc) * (cqe + 1));
wc = vmalloc_user(sizeof(*wc) + sizeof(struct ib_wc) * cqe);
if (!wc) {
ret = -ENOMEM;
goto bail;
}

/*
* Return the address of the WC as the offset to mmap.
* See ipath_mmap() for details.
*/
if (udata && udata->outlen >= sizeof(__u64)) {
__u64 offset = (__u64) wc;

ret = ib_copy_to_udata(udata, &offset, sizeof(offset));
if (ret)
goto bail;
}

spin_lock_irq(&cq->lock);
if (cq->head < cq->tail)
n = cq->ibcq.cqe + 1 + cq->head - cq->tail;
/*
* Make sure head and tail are sane since they
* might be user writable.
*/
head = old_wc->head;
if (head > (u32) cq->ibcq.cqe)
head = (u32) cq->ibcq.cqe;
tail = old_wc->tail;
if (tail > (u32) cq->ibcq.cqe)
tail = (u32) cq->ibcq.cqe;
if (head < tail)
n = cq->ibcq.cqe + 1 + head - tail;
else
n = cq->head - cq->tail;
n = head - tail;
if (unlikely((u32)cqe < n)) {
spin_unlock_irq(&cq->lock);
vfree(wc);
ret = -EOVERFLOW;
goto bail;
}
for (n = 0; cq->tail != cq->head; n++) {
wc[n] = cq->queue[cq->tail];
if (cq->tail == cq->ibcq.cqe)
cq->tail = 0;
for (n = 0; tail != head; n++) {
wc->queue[n] = old_wc->queue[tail];
if (tail == (u32) cq->ibcq.cqe)
tail = 0;
else
cq->tail++;
tail++;
}
cq->ibcq.cqe = cqe;
cq->head = n;
cq->tail = 0;
old_wc = cq->queue;
wc->head = n;
wc->tail = 0;
cq->queue = wc;
spin_unlock_irq(&cq->lock);

vfree(old_wc);

if (cq->ip) {
struct ipath_ibdev *dev = to_idev(ibcq->device);
struct ipath_mmap_info *ip = cq->ip;

ip->obj = wc;
ip->size = PAGE_ALIGN(sizeof(*wc) +
sizeof(struct ib_wc) * cqe);
spin_lock_irq(&dev->pending_lock);
ip->next = dev->pending_mmaps;
dev->pending_mmaps = ip;
spin_unlock_irq(&dev->pending_lock);
}

ret = 0;

bail:
Expand Down
Loading

0 comments on commit 373d991

Please sign in to comment.