Skip to content

Commit

Permalink
IB/mlx5: Add contiguous ODP support
Browse files Browse the repository at this point in the history
Currenlty ODP supports only regular MMU pages.
Add ODP support for regions consisting of physically contiguous chunks
of arbitrary order (huge pages for instance) to improve performance.

Signed-off-by: Artemy Kovalyov <artemyko@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
  • Loading branch information
Artemy Kovalyov authored and Doug Ledford committed Apr 25, 2017
1 parent 403cd12 commit b2ac918
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 18 deletions.
9 changes: 4 additions & 5 deletions drivers/infiniband/hw/mlx5/mem.c
Original file line number Diff line number Diff line change
Expand Up @@ -61,13 +61,12 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
int entry;
unsigned long page_shift = umem->page_shift;

/* With ODP we must always match OS page size. */
if (umem->odp_data) {
*count = ib_umem_page_count(umem);
*shift = PAGE_SHIFT;
*ncont = *count;
*ncont = ib_umem_page_count(umem);
*count = *ncont << (page_shift - PAGE_SHIFT);
*shift = page_shift;
if (order)
*order = ilog2(roundup_pow_of_two(*count));
*order = ilog2(roundup_pow_of_two(*ncont));

return;
}
Expand Down
28 changes: 15 additions & 13 deletions drivers/infiniband/hw/mlx5/odp.c
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
*/

for (addr = start; addr < end; addr += BIT(umem->page_shift)) {
idx = (addr - ib_umem_start(umem)) / PAGE_SIZE;
idx = (addr - ib_umem_start(umem)) >> umem->page_shift;
/*
* Strive to write the MTTs in chunks, but avoid overwriting
* non-existing MTTs. The huristic here can be improved to
Expand All @@ -218,8 +218,7 @@ void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,

if (in_block && umr_offset == 0) {
mlx5_ib_update_xlt(mr, blk_start_idx,
idx - blk_start_idx,
PAGE_SHIFT,
idx - blk_start_idx, 0,
MLX5_IB_UPD_XLT_ZAP |
MLX5_IB_UPD_XLT_ATOMIC);
in_block = 0;
Expand All @@ -228,8 +227,7 @@ void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
}
if (in_block)
mlx5_ib_update_xlt(mr, blk_start_idx,
idx - blk_start_idx + 1,
PAGE_SHIFT,
idx - blk_start_idx + 1, 0,
MLX5_IB_UPD_XLT_ZAP |
MLX5_IB_UPD_XLT_ATOMIC);
/*
Expand Down Expand Up @@ -516,7 +514,7 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
/*
* Handle a single data segment in a page-fault WQE or RDMA region.
*
* Returns number of pages retrieved on success. The caller may continue to
* Returns number of OS pages retrieved on success. The caller may continue to
* the next data segment.
* Can return the following error codes:
* -EAGAIN to designate a temporary error. The caller will abort handling the
Expand All @@ -531,13 +529,14 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
{
int srcu_key;
unsigned int current_seq = 0;
u64 start_idx;
u64 start_idx, page_mask;
int npages = 0, ret = 0;
struct mlx5_ib_mr *mr;
u64 access_mask = ODP_READ_ALLOWED_BIT;
struct ib_umem_odp *odp;
int implicit = 0;
size_t size;
int page_shift;

srcu_key = srcu_read_lock(&dev->mr_srcu);
mr = mlx5_ib_odp_find_mr_lkey(dev, key);
Expand Down Expand Up @@ -583,6 +582,9 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
odp = mr->umem->odp_data;
}

page_shift = mr->umem->page_shift;
page_mask = ~(BIT(page_shift) - 1);

next_mr:
current_seq = READ_ONCE(odp->notifiers_seq);
/*
Expand All @@ -592,7 +594,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
smp_rmb();

size = min_t(size_t, bcnt, ib_umem_end(odp->umem) - io_virt);
start_idx = (io_virt - (mr->mmkey.iova & PAGE_MASK)) >> PAGE_SHIFT;
start_idx = (io_virt - (mr->mmkey.iova & page_mask)) >> page_shift;

if (mr->umem->writable)
access_mask |= ODP_WRITE_ALLOWED_BIT;
Expand All @@ -614,7 +616,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
* checks this.
*/
ret = mlx5_ib_update_xlt(mr, start_idx, np,
PAGE_SHIFT,
page_shift,
MLX5_IB_UPD_XLT_ATOMIC);
} else {
ret = -EAGAIN;
Expand All @@ -625,14 +627,14 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
mlx5_ib_err(dev, "Failed to update mkey page tables\n");
goto srcu_unlock;
}

if (bytes_mapped) {
u32 new_mappings = np * PAGE_SIZE -
(io_virt - round_down(io_virt, PAGE_SIZE));
u32 new_mappings = (np << page_shift) -
(io_virt - round_down(io_virt,
1 << page_shift));
*bytes_mapped += min_t(u32, new_mappings, size);
}

npages += np;
npages += np << (page_shift - PAGE_SHIFT);
}

bcnt -= size;
Expand Down

0 comments on commit b2ac918

Please sign in to comment.