Skip to content

Commit

Permalink
RDMA/mlx5: Cache all user cacheable mkeys on dereg MR flow
Browse files Browse the repository at this point in the history
Currently, when dereging an MR, if the mkey doesn't belong to a cache
entry, it will be destroyed.  As a result, the restart of applications
with many non-cached mkeys is not efficient since all the mkeys are
destroyed and then recreated.  This process takes a long time (for 100,000
MRs, it is ~20 seconds for dereg and ~28 seconds for re-reg).

To shorten the restart runtime, insert all cacheable mkeys to the cache.
If there is no fitting entry to the mkey properties, create a temporary
entry that fits it.

After a predetermined timeout, the cache entries will shrink to the
initial high limit.

The mkeys will still be in the cache when consuming them again after an
application restart. Therefore, the registration will be much faster
(for 100,000 MRs, it is ~4 seconds for dereg and ~5 seconds for re-reg).

The temporary cache entries created to store the non-cache mkeys are not
exposed through sysfs like the default cache entries.

Link: https://lore.kernel.org/r/20230125222807.6921-6-michaelgur@nvidia.com
Signed-off-by: Michael Guralnik <michaelgur@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
  • Loading branch information
Michael Guralnik authored and Jason Gunthorpe committed Jan 27, 2023
1 parent 73d09b2 commit dd1b913
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 13 deletions.
2 changes: 2 additions & 0 deletions drivers/infiniband/hw/mlx5/mlx5_ib.h
Original file line number Diff line number Diff line change
Expand Up @@ -630,6 +630,8 @@ struct mlx5_ib_mkey {
unsigned int ndescs;
struct wait_queue_head wait;
refcount_t usecount;
/* User Mkey must hold either a rb_key or a cache_ent. */
struct mlx5r_cache_rb_key rb_key;
struct mlx5_cache_ent *cache_ent;
};

Expand Down
55 changes: 42 additions & 13 deletions drivers/infiniband/hw/mlx5/mr.c
Original file line number Diff line number Diff line change
Expand Up @@ -1110,15 +1110,14 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
rb_key.access_flags = get_unchangeable_access_flags(dev, access_flags);
ent = mkey_cache_ent_from_rb_key(dev, rb_key);
/*
* Matches access in alloc_cache_mr(). If the MR can't come from the
* cache then synchronously create an uncached one.
* If the MR can't come from the cache then synchronously create an uncached
* one.
*/
if (!ent || ent->limit == 0 ||
!mlx5r_umr_can_reconfig(dev, 0, access_flags) ||
mlx5_umem_needs_ats(dev, umem, access_flags)) {
if (!ent) {
mutex_lock(&dev->slow_path_mutex);
mr = reg_create(pd, umem, iova, access_flags, page_size, false);
mutex_unlock(&dev->slow_path_mutex);
mr->mmkey.rb_key = rb_key;
return mr;
}

Expand Down Expand Up @@ -1209,6 +1208,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
goto err_2;
}
mr->mmkey.type = MLX5_MKEY_MR;
mr->mmkey.ndescs = get_octo_len(iova, umem->length, mr->page_shift);
mr->umem = umem;
set_mr_fields(dev, mr, umem->length, access_flags, iova);
kvfree(in);
Expand Down Expand Up @@ -1746,6 +1746,40 @@ mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
}
}

static int cache_ent_find_and_store(struct mlx5_ib_dev *dev,
struct mlx5_ib_mr *mr)
{
struct mlx5_mkey_cache *cache = &dev->cache;
struct mlx5_cache_ent *ent;

if (mr->mmkey.cache_ent) {
xa_lock_irq(&mr->mmkey.cache_ent->mkeys);
mr->mmkey.cache_ent->in_use--;
xa_unlock_irq(&mr->mmkey.cache_ent->mkeys);
goto end;
}

mutex_lock(&cache->rb_lock);
ent = mkey_cache_ent_from_rb_key(dev, mr->mmkey.rb_key);
mutex_unlock(&cache->rb_lock);
if (ent) {
if (ent->rb_key.ndescs == mr->mmkey.rb_key.ndescs) {
mr->mmkey.cache_ent = ent;
goto end;
}
}

ent = mlx5r_cache_create_ent(dev, mr->mmkey.rb_key, false);
if (IS_ERR(ent))
return PTR_ERR(ent);

mr->mmkey.cache_ent = ent;

end:
return push_mkey(mr->mmkey.cache_ent, false,
xa_mk_value(mr->mmkey.key));
}

int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
{
struct mlx5_ib_mr *mr = to_mmr(ibmr);
Expand Down Expand Up @@ -1791,16 +1825,11 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
}

/* Stop DMA */
if (mr->mmkey.cache_ent) {
xa_lock_irq(&mr->mmkey.cache_ent->mkeys);
mr->mmkey.cache_ent->in_use--;
xa_unlock_irq(&mr->mmkey.cache_ent->mkeys);

if (mr->umem && mlx5r_umr_can_load_pas(dev, mr->umem->length))
if (mlx5r_umr_revoke_mr(mr) ||
push_mkey(mr->mmkey.cache_ent, false,
xa_mk_value(mr->mmkey.key)))
cache_ent_find_and_store(dev, mr))
mr->mmkey.cache_ent = NULL;
}

if (!mr->mmkey.cache_ent) {
rc = destroy_mkey(to_mdev(mr->ibmr.device), mr);
if (rc)
Expand Down

0 comments on commit dd1b913

Please sign in to comment.