Skip to content

Commit

Permalink
RDMA/odp: Lift umem_mutex out of ib_umem_odp_unmap_dma_pages()
Browse files Browse the repository at this point in the history
This fixes a race of the form:
    CPU0                               CPU1
mlx5_ib_invalidate_range()     mlx5_ib_invalidate_range()
				 // This one actually makes npages == 0
				 ib_umem_odp_unmap_dma_pages()
				 if (npages == 0 && !dying)
  // This one does nothing
  ib_umem_odp_unmap_dma_pages()
  if (npages == 0 && !dying)
     dying = 1;
                                    dying = 1;
				    schedule_work(&umem_odp->work);
     // Double schedule of the same work
     schedule_work(&umem_odp->work);  // BOOM

npages and dying must be read and written under the umem_mutex lock.

Since whenever ib_umem_odp_unmap_dma_pages() is called mlx5 must also call
mlx5_ib_update_xlt, and both need to be done in the same locking region,
hoist the lock out of unmap.

This avoids an expensive double critical section in
mlx5_ib_invalidate_range().

Fixes: 81713d3 ("IB/mlx5: Add implicit MR support")
Link: https://lore.kernel.org/r/20191001153821.23621-4-jgg@ziepe.ca
Reviewed-by: Artemy Kovalyov <artemyko@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
  • Loading branch information
Jason Gunthorpe committed Oct 4, 2019
1 parent f28b193 commit 9dc775e
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 6 deletions.
6 changes: 4 additions & 2 deletions drivers/infiniband/core/umem_odp.c
Original file line number Diff line number Diff line change
Expand Up @@ -451,8 +451,10 @@ void ib_umem_odp_release(struct ib_umem_odp *umem_odp)
* that the hardware will not attempt to access the MR any more.
*/
if (!umem_odp->is_implicit_odp) {
mutex_lock(&umem_odp->umem_mutex);
ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp),
ib_umem_end(umem_odp));
mutex_unlock(&umem_odp->umem_mutex);
kvfree(umem_odp->dma_list);
kvfree(umem_odp->page_list);
}
Expand Down Expand Up @@ -719,14 +721,15 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
u64 addr;
struct ib_device *dev = umem_odp->umem.ibdev;

lockdep_assert_held(&umem_odp->umem_mutex);

virt = max_t(u64, virt, ib_umem_start(umem_odp));
bound = min_t(u64, bound, ib_umem_end(umem_odp));
/* Note that during the run of this function, the
* notifiers_count of the MR is > 0, preventing any racing
* faults from completion. We might be racing with other
* invalidations, so we must make sure we free each page only
* once. */
mutex_lock(&umem_odp->umem_mutex);
for (addr = virt; addr < bound; addr += BIT(umem_odp->page_shift)) {
idx = (addr - ib_umem_start(umem_odp)) >> umem_odp->page_shift;
if (umem_odp->page_list[idx]) {
Expand Down Expand Up @@ -757,7 +760,6 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
umem_odp->npages--;
}
}
mutex_unlock(&umem_odp->umem_mutex);
}
EXPORT_SYMBOL(ib_umem_odp_unmap_dma_pages);

Expand Down
12 changes: 8 additions & 4 deletions drivers/infiniband/hw/mlx5/odp.c
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,6 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
idx - blk_start_idx + 1, 0,
MLX5_IB_UPD_XLT_ZAP |
MLX5_IB_UPD_XLT_ATOMIC);
mutex_unlock(&umem_odp->umem_mutex);
/*
* We are now sure that the device will not access the
* memory. We can safely unmap it, and mark it as dirty if
Expand All @@ -319,10 +318,11 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,

if (unlikely(!umem_odp->npages && mr->parent &&
!umem_odp->dying)) {
WRITE_ONCE(umem_odp->dying, 1);
umem_odp->dying = 1;
atomic_inc(&mr->parent->num_leaf_free);
schedule_work(&umem_odp->work);
}
mutex_unlock(&umem_odp->umem_mutex);
}

void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
Expand Down Expand Up @@ -585,15 +585,19 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
if (mr->parent != imr)
continue;

mutex_lock(&umem_odp->umem_mutex);
ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp),
ib_umem_end(umem_odp));

if (umem_odp->dying)
if (umem_odp->dying) {
mutex_unlock(&umem_odp->umem_mutex);
continue;
}

WRITE_ONCE(umem_odp->dying, 1);
umem_odp->dying = 1;
atomic_inc(&imr->num_leaf_free);
schedule_work(&umem_odp->work);
mutex_unlock(&umem_odp->umem_mutex);
}
up_read(&per_mm->umem_rwsem);

Expand Down

0 comments on commit 9dc775e

Please sign in to comment.