Skip to content

Commit

Permalink
Merge tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kern…
Browse files Browse the repository at this point in the history
…el/git/roland/infiniband

Pull infiniband updates from Roland Dreier:
 "Main batch of InfiniBand/RDMA changes for 3.19:

   - On-demand paging support in core midlayer and mlx5 driver.  This
     lets userspace create non-pinned memory regions and have the
     adapter HW trigger page faults.
   - iSER and IPoIB updates and fixes.
   - Low-level HW driver updates for cxgb4, mlx4 and ocrdma.
   - Other miscellaneous fixes"

* tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband: (56 commits)
  IB/mlx5: Implement on demand paging by adding support for MMU notifiers
  IB/mlx5: Add support for RDMA read/write responder page faults
  IB/mlx5: Handle page faults
  IB/mlx5: Page faults handling infrastructure
  IB/mlx5: Add mlx5_ib_update_mtt to update page tables after creation
  IB/mlx5: Changes in memory region creation to support on-demand paging
  IB/mlx5: Implement the ODP capability query verb
  mlx5_core: Add support for page faults events and low level handling
  mlx5_core: Re-add MLX5_DEV_CAP_FLAG_ON_DMND_PG flag
  IB/srp: Allow newline separator for connection string
  IB/core: Implement support for MMU notifiers regarding on demand paging regions
  IB/core: Add support for on demand paging regions
  IB/core: Add flags for on demand paging support
  IB/core: Add support for extended query device caps
  IB/mlx5: Add function to read WQE from user-space
  IB/core: Add umem function to read data from user-space
  IB/core: Replace ib_umem's offset field with a full address
  IB/mlx5: Enhance UMR support to allow partial page table update
  IB/mlx5: Remove per-MR pas and dma pointers
  RDMA/ocrdma: Always resolve destination mac from GRH for UD QPs
  ...
  • Loading branch information
Linus Torvalds committed Dec 19, 2014
2 parents 018cb13 + a7cfef2 commit 4c929fe
Show file tree
Hide file tree
Showing 53 changed files with 3,511 additions and 457 deletions.
11 changes: 11 additions & 0 deletions drivers/infiniband/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,17 @@ config INFINIBAND_USER_MEM
depends on INFINIBAND_USER_ACCESS != n
default y

config INFINIBAND_ON_DEMAND_PAGING
bool "InfiniBand on-demand paging support"
depends on INFINIBAND_USER_MEM
select MMU_NOTIFIER
default y
---help---
On demand paging support for the InfiniBand subsystem.
Together with driver support this allows registration of
memory regions without pinning their pages, fetching the
pages on demand instead.

config INFINIBAND_ADDR_TRANS
bool
depends on INFINIBAND
Expand Down
1 change: 1 addition & 0 deletions drivers/infiniband/core/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \
ib_core-y := packer.o ud_header.o verbs.o sysfs.o \
device.o fmr_pool.o cache.o netlink.o
ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o umem_rbtree.o

ib_mad-y := mad.o smi.o agent.o mad_rmpp.o

Expand Down
4 changes: 2 additions & 2 deletions drivers/infiniband/core/addr.c
Original file line number Diff line number Diff line change
Expand Up @@ -176,8 +176,8 @@ static void set_timeout(unsigned long time)
unsigned long delay;

delay = time - jiffies;
if ((long)delay <= 0)
delay = 1;
if ((long)delay < 0)
delay = 0;

mod_delayed_work(addr_wq, &work, delay);
}
Expand Down
11 changes: 8 additions & 3 deletions drivers/infiniband/core/multicast.c
Original file line number Diff line number Diff line change
Expand Up @@ -525,17 +525,22 @@ static void join_handler(int status, struct ib_sa_mcmember_rec *rec,
if (status)
process_join_error(group, status);
else {
int mgids_changed, is_mgid0;
ib_find_pkey(group->port->dev->device, group->port->port_num,
be16_to_cpu(rec->pkey), &pkey_index);

spin_lock_irq(&group->port->lock);
group->rec = *rec;
if (group->state == MCAST_BUSY &&
group->pkey_index == MCAST_INVALID_PKEY_INDEX)
group->pkey_index = pkey_index;
if (!memcmp(&mgid0, &group->rec.mgid, sizeof mgid0)) {
mgids_changed = memcmp(&rec->mgid, &group->rec.mgid,
sizeof(group->rec.mgid));
group->rec = *rec;
if (mgids_changed) {
rb_erase(&group->node, &group->port->table);
mcast_insert(group->port, group, 1);
is_mgid0 = !memcmp(&mgid0, &group->rec.mgid,
sizeof(mgid0));
mcast_insert(group->port, group, is_mgid0);
}
spin_unlock_irq(&group->port->lock);
}
Expand Down
72 changes: 66 additions & 6 deletions drivers/infiniband/core/umem.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
#include <linux/hugetlb.h>
#include <linux/dma-attrs.h>
#include <linux/slab.h>
#include <rdma/ib_umem_odp.h>

#include "uverbs.h"

Expand Down Expand Up @@ -69,6 +70,10 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d

/**
* ib_umem_get - Pin and DMA map userspace memory.
*
* If access flags indicate ODP memory, avoid pinning. Instead, stores
* the mm for future page fault handling in conjunction with MMU notifiers.
*
* @context: userspace context to pin memory for
* @addr: userspace virtual address to start at
* @size: length of region to pin
Expand Down Expand Up @@ -103,17 +108,30 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,

umem->context = context;
umem->length = size;
umem->offset = addr & ~PAGE_MASK;
umem->address = addr;
umem->page_size = PAGE_SIZE;
umem->pid = get_task_pid(current, PIDTYPE_PID);
/*
* We ask for writable memory if any access flags other than
* "remote read" are set. "Local write" and "remote write"
* We ask for writable memory if any of the following
* access flags are set. "Local write" and "remote write"
* obviously require write access. "Remote atomic" can do
* things like fetch and add, which will modify memory, and
* "MW bind" can change permissions by binding a window.
*/
umem->writable = !!(access & ~IB_ACCESS_REMOTE_READ);
umem->writable = !!(access &
(IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE |
IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_MW_BIND));

if (access & IB_ACCESS_ON_DEMAND) {
ret = ib_umem_odp_get(context, umem);
if (ret) {
kfree(umem);
return ERR_PTR(ret);
}
return umem;
}

umem->odp_data = NULL;

/* We assume the memory is from hugetlb until proved otherwise */
umem->hugetlb = 1;
Expand All @@ -132,7 +150,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
if (!vma_list)
umem->hugetlb = 0;

npages = PAGE_ALIGN(size + umem->offset) >> PAGE_SHIFT;
npages = ib_umem_num_pages(umem);

down_write(&current->mm->mmap_sem);

Expand Down Expand Up @@ -235,6 +253,11 @@ void ib_umem_release(struct ib_umem *umem)
struct task_struct *task;
unsigned long diff;

if (umem->odp_data) {
ib_umem_odp_release(umem);
return;
}

__ib_umem_release(umem->context->device, umem, 1);

task = get_pid_task(umem->pid, PIDTYPE_PID);
Expand All @@ -246,7 +269,7 @@ void ib_umem_release(struct ib_umem *umem)
if (!mm)
goto out;

diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT;
diff = ib_umem_num_pages(umem);

/*
* We may be called with the mm's mmap_sem already held. This
Expand Down Expand Up @@ -283,6 +306,9 @@ int ib_umem_page_count(struct ib_umem *umem)
int n;
struct scatterlist *sg;

if (umem->odp_data)
return ib_umem_num_pages(umem);

shift = ilog2(umem->page_size);

n = 0;
Expand All @@ -292,3 +318,37 @@ int ib_umem_page_count(struct ib_umem *umem)
return n;
}
EXPORT_SYMBOL(ib_umem_page_count);

/*
* Copy from the given ib_umem's pages to the given buffer.
*
* umem - the umem to copy from
* offset - offset to start copying from
* dst - destination buffer
* length - buffer length
*
* Returns 0 on success, or an error code.
*/
int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
size_t length)
{
size_t end = offset + length;
int ret;

if (offset > umem->length || length > umem->length - offset) {
pr_err("ib_umem_copy_from not in range. offset: %zd umem length: %zd end: %zd\n",
offset, umem->length, end);
return -EINVAL;
}

ret = sg_pcopy_to_buffer(umem->sg_head.sgl, umem->nmap, dst, length,
offset + ib_umem_offset(umem));

if (ret < 0)
return ret;
else if (ret != length)
return -EINVAL;
else
return 0;
}
EXPORT_SYMBOL(ib_umem_copy_from);
Loading

0 comments on commit 4c929fe

Please sign in to comment.