Skip to content

Commit

Permalink
RDMA/mlx5: Allow MRs to be created in the cache synchronously
Browse files Browse the repository at this point in the history
If the cache is completely out of MRs, and we are running in cache mode,
then directly, and synchronously, create an MR that is compatible with the
cache bucket using a sleeping mailbox command. This ensures that the
thread that is waiting for the MR absolutely will get one.

When a MR allocated in this way becomes freed then it is compatible with
the cache bucket and will be recycled back into it.

Deletes the very buggy ent->compl scheme to create a synchronous MR
allocation.

Link: https://lore.kernel.org/r/20200310082238.239865-13-leon@kernel.org
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
  • Loading branch information
Jason Gunthorpe committed Mar 13, 2020
1 parent 1c78a21 commit aad719d
Show file tree
Hide file tree
Showing 2 changed files with 87 additions and 61 deletions.
1 change: 0 additions & 1 deletion drivers/infiniband/hw/mlx5/mlx5_ib.h
Original file line number Diff line number Diff line change
Expand Up @@ -722,7 +722,6 @@ struct mlx5_cache_ent {
struct mlx5_ib_dev *dev;
struct work_struct work;
struct delayed_work dwork;
struct completion compl;
};

struct mlx5_mr_cache {
Expand Down
147 changes: 87 additions & 60 deletions drivers/infiniband/hw/mlx5/mr.c
Original file line number Diff line number Diff line change
Expand Up @@ -139,14 +139,34 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context)
queue_adjust_cache_locked(ent);
ent->pending--;
spin_unlock_irqrestore(&ent->lock, flags);
}

static struct mlx5_ib_mr *alloc_cache_mr(struct mlx5_cache_ent *ent, void *mkc)
{
struct mlx5_ib_mr *mr;

mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr)
return NULL;
mr->order = ent->order;
mr->cache_ent = ent;
mr->dev = ent->dev;

MLX5_SET(mkc, mkc, free, 1);
MLX5_SET(mkc, mkc, umr_en, 1);
MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3);
MLX5_SET(mkc, mkc, access_mode_4_2, (ent->access_mode >> 2) & 0x7);

if (!completion_done(&ent->compl))
complete(&ent->compl);
MLX5_SET(mkc, mkc, qpn, 0xffffff);
MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt);
MLX5_SET(mkc, mkc, log_page_size, ent->page);
return mr;
}

/* Asynchronously schedule new MRs to be populated in the cache. */
static int add_keys(struct mlx5_cache_ent *ent, unsigned int num)
{
int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
struct mlx5_ib_mr *mr;
void *mkc;
u32 *in;
Expand All @@ -159,25 +179,11 @@ static int add_keys(struct mlx5_cache_ent *ent, unsigned int num)

mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
for (i = 0; i < num; i++) {
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
mr = alloc_cache_mr(ent, mkc);
if (!mr) {
err = -ENOMEM;
break;
}
mr->order = ent->order;
mr->cache_ent = ent;
mr->dev = ent->dev;

MLX5_SET(mkc, mkc, free, 1);
MLX5_SET(mkc, mkc, umr_en, 1);
MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3);
MLX5_SET(mkc, mkc, access_mode_4_2,
(ent->access_mode >> 2) & 0x7);

MLX5_SET(mkc, mkc, qpn, 0xffffff);
MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt);
MLX5_SET(mkc, mkc, log_page_size, ent->page);

spin_lock_irq(&ent->lock);
if (ent->pending >= MAX_PENDING_REG_MR) {
err = -EAGAIN;
Expand Down Expand Up @@ -205,6 +211,44 @@ static int add_keys(struct mlx5_cache_ent *ent, unsigned int num)
return err;
}

/* Synchronously create a MR in the cache */
static struct mlx5_ib_mr *create_cache_mr(struct mlx5_cache_ent *ent)
{
size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
struct mlx5_ib_mr *mr;
void *mkc;
u32 *in;
int err;

in = kzalloc(inlen, GFP_KERNEL);
if (!in)
return ERR_PTR(-ENOMEM);
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);

mr = alloc_cache_mr(ent, mkc);
if (!mr) {
err = -ENOMEM;
goto free_in;
}

err = mlx5_core_create_mkey(ent->dev->mdev, &mr->mmkey, in, inlen);
if (err)
goto free_mr;

mr->mmkey.type = MLX5_MKEY_MR;
WRITE_ONCE(ent->dev->cache.last_add, jiffies);
spin_lock_irq(&ent->lock);
ent->total_mrs++;
spin_unlock_irq(&ent->lock);
kfree(in);
return mr;
free_mr:
kfree(mr);
free_in:
kfree(in);
return ERR_PTR(err);
}

static void remove_cache_mr_locked(struct mlx5_cache_ent *ent)
{
struct mlx5_ib_mr *mr;
Expand Down Expand Up @@ -427,12 +471,12 @@ static void __cache_work_func(struct mlx5_cache_ent *ent)
if (ent->disabled)
goto out;
if (err) {
if (err == -EAGAIN) {
mlx5_ib_dbg(dev, "returned eagain, order %d\n",
ent->order);
queue_delayed_work(cache->wq, &ent->dwork,
msecs_to_jiffies(3));
} else {
/*
* EAGAIN only happens if pending is positive, so we
* will be rescheduled from reg_mr_callback(). The only
* failure path here is ENOMEM.
*/
if (err != -EAGAIN) {
mlx5_ib_warn(
dev,
"command failed order %d, err %d\n",
Expand Down Expand Up @@ -495,36 +539,30 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
struct mlx5_mr_cache *cache = &dev->cache;
struct mlx5_cache_ent *ent;
struct mlx5_ib_mr *mr;
int err;

if (WARN_ON(entry <= MR_CACHE_LAST_STD_ENTRY ||
entry >= ARRAY_SIZE(cache->ent)))
return ERR_PTR(-EINVAL);

ent = &cache->ent[entry];
while (1) {
spin_lock_irq(&ent->lock);
if (list_empty(&ent->head)) {
spin_unlock_irq(&ent->lock);

err = add_keys(ent, 1);
if (err && err != -EAGAIN)
return ERR_PTR(err);

wait_for_completion(&ent->compl);
} else {
mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
list);
list_del(&mr->list);
ent->available_mrs--;
queue_adjust_cache_locked(ent);
spin_unlock_irq(&ent->lock);
spin_lock_irq(&ent->lock);
if (list_empty(&ent->head)) {
spin_unlock_irq(&ent->lock);
mr = create_cache_mr(ent);
if (IS_ERR(mr))
return mr;
}
} else {
mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
list_del(&mr->list);
ent->available_mrs--;
queue_adjust_cache_locked(ent);
spin_unlock_irq(&ent->lock);
}
return mr;
}

static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_cache_ent *req_ent)
/* Return a MR already available in the cache */
static struct mlx5_ib_mr *get_cache_mr(struct mlx5_cache_ent *req_ent)
{
struct mlx5_ib_dev *dev = req_ent->dev;
struct mlx5_ib_mr *mr = NULL;
Expand Down Expand Up @@ -676,7 +714,6 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
ent->dev = dev;
ent->limit = 0;

init_completion(&ent->compl);
INIT_WORK(&ent->work, cache_work_func);
INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);

Expand Down Expand Up @@ -939,26 +976,16 @@ alloc_mr_from_cache(struct ib_pd *pd, struct ib_umem *umem, u64 virt_addr,
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_cache_ent *ent = mr_cache_ent_from_order(dev, order);
struct mlx5_ib_mr *mr;
int err = 0;
int i;

if (!ent)
return ERR_PTR(-E2BIG);
for (i = 0; i < 1; i++) {
mr = alloc_cached_mr(ent);
if (mr)
break;

err = add_keys(ent, 1);
if (err && err != -EAGAIN) {
mlx5_ib_warn(dev, "add_keys failed, err %d\n", err);
break;
}
mr = get_cache_mr(ent);
if (!mr) {
mr = create_cache_mr(ent);
if (IS_ERR(mr))
return mr;
}

if (!mr)
return ERR_PTR(-EAGAIN);

mr->ibmr.pd = pd;
mr->umem = umem;
mr->access_flags = access_flags;
Expand Down

0 comments on commit aad719d

Please sign in to comment.