Skip to content

Commit

Permalink
IB/mlx5: Fix long EEH recover time with NVMe offloads
Browse files Browse the repository at this point in the history
On NVMe offloads connection with many IO queues, EEH takes long time to
recover. The culprit is the synchronize_srcu in the destroy_mkey. The
solution is to use synchronize_srcu only for ODP mkey.

Fixes: b4cfe44 ("IB/mlx5: Implement on demand paging by adding support for MMU notifiers")
Signed-off-by: Huy Nguyen <huyn@mellanox.com>
Reviewed-by: Daniel Jurgens <danielj@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
  • Loading branch information
Huy Nguyen authored and Jason Gunthorpe committed Dec 20, 2018
1 parent 842a9c8 commit bb7e22a
Showing 1 changed file with 16 additions and 3 deletions.
19 changes: 16 additions & 3 deletions drivers/infiniband/hw/mlx5/mr.c
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)

#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
/* Wait until all page fault handlers using the mr complete. */
synchronize_srcu(&dev->mr_srcu);
if (mr->umem && mr->umem->is_odp)
synchronize_srcu(&dev->mr_srcu);
#endif

return err;
Expand Down Expand Up @@ -237,6 +238,9 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
{
struct mlx5_mr_cache *cache = &dev->cache;
struct mlx5_cache_ent *ent = &cache->ent[c];
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
bool odp_mkey_exist = false;
#endif
struct mlx5_ib_mr *tmp_mr;
struct mlx5_ib_mr *mr;
LIST_HEAD(del_list);
Expand All @@ -249,6 +253,10 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
break;
}
mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
if (mr->umem && mr->umem->is_odp)
odp_mkey_exist = true;
#endif
list_move(&mr->list, &del_list);
ent->cur--;
ent->size--;
Expand All @@ -257,7 +265,8 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
}

#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
synchronize_srcu(&dev->mr_srcu);
if (odp_mkey_exist)
synchronize_srcu(&dev->mr_srcu);
#endif

list_for_each_entry_safe(mr, tmp_mr, &del_list, list) {
Expand Down Expand Up @@ -572,6 +581,7 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c)
{
struct mlx5_mr_cache *cache = &dev->cache;
struct mlx5_cache_ent *ent = &cache->ent[c];
bool odp_mkey_exist = false;
struct mlx5_ib_mr *tmp_mr;
struct mlx5_ib_mr *mr;
LIST_HEAD(del_list);
Expand All @@ -584,6 +594,8 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c)
break;
}
mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
if (mr->umem && mr->umem->is_odp)
odp_mkey_exist = true;
list_move(&mr->list, &del_list);
ent->cur--;
ent->size--;
Expand All @@ -592,7 +604,8 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c)
}

#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
synchronize_srcu(&dev->mr_srcu);
if (odp_mkey_exist)
synchronize_srcu(&dev->mr_srcu);
#endif

list_for_each_entry_safe(mr, tmp_mr, &del_list, list) {
Expand Down

0 comments on commit bb7e22a

Please sign in to comment.