Skip to content

Commit

Permalink
IB/mlx5: Device memory support in mlx5_ib
Browse files Browse the repository at this point in the history
This patch adds the mlx5_ib driver implementation for the device
memory allocation API.
It implements the ib_device callbacks for allocation and deallocation
operations as well as a new mmap command support which allows mapping
an allocated device memory to a VMA.

The change also adds reporting of device memory maximum size and
alignment parameters reported in device capabilities.

The allocation/deallocation operations are using new firmware
commands to allocate MEMIC memory on the device.

Signed-off-by: Ariel Levkovich <lariel@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
  • Loading branch information
Ariel Levkovich authored and Jason Gunthorpe committed Apr 5, 2018
1 parent e72bd81 commit 24da001
Show file tree
Hide file tree
Showing 7 changed files with 347 additions and 3 deletions.
106 changes: 106 additions & 0 deletions drivers/infiniband/hw/mlx5/cmd.c
Original file line number Diff line number Diff line change
Expand Up @@ -66,3 +66,109 @@ int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *dev,

return mlx5_cmd_exec(dev, in, in_size, out, sizeof(out));
}

int mlx5_cmd_alloc_memic(struct mlx5_memic *memic, phys_addr_t *addr,
u64 length, u32 alignment)
{
struct mlx5_core_dev *dev = memic->dev;
u64 num_memic_hw_pages = MLX5_CAP_DEV_MEM(dev, memic_bar_size)
>> PAGE_SHIFT;
u64 hw_start_addr = MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr);
u32 max_alignment = MLX5_CAP_DEV_MEM(dev, log_max_memic_addr_alignment);
u32 num_pages = DIV_ROUND_UP(length, PAGE_SIZE);
u32 out[MLX5_ST_SZ_DW(alloc_memic_out)] = {};
u32 in[MLX5_ST_SZ_DW(alloc_memic_in)] = {};
u32 mlx5_alignment;
u64 page_idx = 0;
int ret = 0;

if (!length || (length & MLX5_MEMIC_ALLOC_SIZE_MASK))
return -EINVAL;

/* mlx5 device sets alignment as 64*2^driver_value
* so normalizing is needed.
*/
mlx5_alignment = (alignment < MLX5_MEMIC_BASE_ALIGN) ? 0 :
alignment - MLX5_MEMIC_BASE_ALIGN;
if (mlx5_alignment > max_alignment)
return -EINVAL;

MLX5_SET(alloc_memic_in, in, opcode, MLX5_CMD_OP_ALLOC_MEMIC);
MLX5_SET(alloc_memic_in, in, range_size, num_pages * PAGE_SIZE);
MLX5_SET(alloc_memic_in, in, memic_size, length);
MLX5_SET(alloc_memic_in, in, log_memic_addr_alignment,
mlx5_alignment);

do {
spin_lock(&memic->memic_lock);
page_idx = bitmap_find_next_zero_area(memic->memic_alloc_pages,
num_memic_hw_pages,
page_idx,
num_pages, 0);

if (page_idx + num_pages <= num_memic_hw_pages)
bitmap_set(memic->memic_alloc_pages,
page_idx, num_pages);
else
ret = -ENOMEM;

spin_unlock(&memic->memic_lock);

if (ret)
return ret;

MLX5_SET64(alloc_memic_in, in, range_start_addr,
hw_start_addr + (page_idx * PAGE_SIZE));

ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
if (ret) {
spin_lock(&memic->memic_lock);
bitmap_clear(memic->memic_alloc_pages,
page_idx, num_pages);
spin_unlock(&memic->memic_lock);

if (ret == -EAGAIN) {
page_idx++;
continue;
}

return ret;
}

*addr = pci_resource_start(dev->pdev, 0) +
MLX5_GET64(alloc_memic_out, out, memic_start_addr);

return ret;
} while (page_idx < num_memic_hw_pages);

return ret;
}

int mlx5_cmd_dealloc_memic(struct mlx5_memic *memic, u64 addr, u64 length)
{
struct mlx5_core_dev *dev = memic->dev;
u64 hw_start_addr = MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr);
u32 num_pages = DIV_ROUND_UP(length, PAGE_SIZE);
u32 out[MLX5_ST_SZ_DW(dealloc_memic_out)] = {0};
u32 in[MLX5_ST_SZ_DW(dealloc_memic_in)] = {0};
u64 start_page_idx;
int err;

addr -= pci_resource_start(dev->pdev, 0);
start_page_idx = (addr - hw_start_addr) >> PAGE_SHIFT;

MLX5_SET(dealloc_memic_in, in, opcode, MLX5_CMD_OP_DEALLOC_MEMIC);
MLX5_SET64(dealloc_memic_in, in, memic_start_addr, addr);
MLX5_SET(dealloc_memic_in, in, memic_size, length);

err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));

if (!err) {
spin_lock(&memic->memic_lock);
bitmap_clear(memic->memic_alloc_pages,
start_page_idx, num_pages);
spin_unlock(&memic->memic_lock);
}

return err;
}
4 changes: 4 additions & 0 deletions drivers/infiniband/hw/mlx5/cmd.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#ifndef MLX5_IB_CMD_H
#define MLX5_IB_CMD_H

#include "mlx5_ib.h"
#include <linux/kernel.h>
#include <linux/mlx5/driver.h>

Expand All @@ -41,4 +42,7 @@ int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point,
void *out, int out_size);
int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev,
void *in, int in_size);
int mlx5_cmd_alloc_memic(struct mlx5_memic *memic, phys_addr_t *addr,
u64 length, u32 alignment);
int mlx5_cmd_dealloc_memic(struct mlx5_memic *memic, u64 addr, u64 length);
#endif /* MLX5_IB_CMD_H */
143 changes: 142 additions & 1 deletion drivers/infiniband/hw/mlx5/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
#include <linux/pci.h>
#include <linux/dma-mapping.h>
#include <linux/slab.h>
#include <linux/bitmap.h>
#if defined(CONFIG_X86)
#include <asm/pat.h>
#endif
Expand Down Expand Up @@ -891,6 +892,11 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
props->raw_packet_caps |= IB_RAW_PACKET_CAP_SCATTER_FCS;
}

if (MLX5_CAP_DEV_MEM(mdev, memic)) {
props->max_dm_size =
MLX5_CAP_DEV_MEM(mdev, max_memic_size);
}

if (mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS))
props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;

Expand Down Expand Up @@ -2014,6 +2020,8 @@ static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd)
return "best effort WC";
case MLX5_IB_MMAP_NC_PAGE:
return "NC";
case MLX5_IB_MMAP_DEVICE_MEM:
return "Device Memory";
default:
return NULL;
}
Expand Down Expand Up @@ -2172,6 +2180,34 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
return err;
}

static int dm_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
{
struct mlx5_ib_ucontext *mctx = to_mucontext(context);
struct mlx5_ib_dev *dev = to_mdev(context->device);
u16 page_idx = get_extended_index(vma->vm_pgoff);
size_t map_size = vma->vm_end - vma->vm_start;
u32 npages = map_size >> PAGE_SHIFT;
phys_addr_t pfn;
pgprot_t prot;

if (find_next_zero_bit(mctx->dm_pages, page_idx + npages, page_idx) !=
page_idx + npages)
return -EINVAL;

pfn = ((pci_resource_start(dev->mdev->pdev, 0) +
MLX5_CAP64_DEV_MEM(dev->mdev, memic_bar_start_addr)) >>
PAGE_SHIFT) +
page_idx;
prot = pgprot_writecombine(vma->vm_page_prot);
vma->vm_page_prot = prot;

if (io_remap_pfn_range(vma, vma->vm_start, pfn, map_size,
vma->vm_page_prot))
return -EAGAIN;

return mlx5_ib_set_vma_data(vma, mctx);
}

static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
{
struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
Expand Down Expand Up @@ -2216,13 +2252,97 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
case MLX5_IB_MMAP_CLOCK_INFO:
return mlx5_ib_mmap_clock_info_page(dev, vma, context);

case MLX5_IB_MMAP_DEVICE_MEM:
return dm_mmap(ibcontext, vma);

default:
return -EINVAL;
}

return 0;
}

struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev,
struct ib_ucontext *context,
struct ib_dm_alloc_attr *attr,
struct uverbs_attr_bundle *attrs)
{
u64 act_size = roundup(attr->length, MLX5_MEMIC_BASE_SIZE);
struct mlx5_memic *memic = &to_mdev(ibdev)->memic;
phys_addr_t memic_addr;
struct mlx5_ib_dm *dm;
u64 start_offset;
u32 page_idx;
int err;

dm = kzalloc(sizeof(*dm), GFP_KERNEL);
if (!dm)
return ERR_PTR(-ENOMEM);

mlx5_ib_dbg(to_mdev(ibdev), "alloc_memic req: user_length=0x%llx act_length=0x%llx log_alignment=%d\n",
attr->length, act_size, attr->alignment);

err = mlx5_cmd_alloc_memic(memic, &memic_addr,
act_size, attr->alignment);
if (err)
goto err_free;

start_offset = memic_addr & ~PAGE_MASK;
page_idx = (memic_addr - pci_resource_start(memic->dev->pdev, 0) -
MLX5_CAP64_DEV_MEM(memic->dev, memic_bar_start_addr)) >>
PAGE_SHIFT;

err = uverbs_copy_to(attrs,
MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
&start_offset, sizeof(start_offset));
if (err)
goto err_dealloc;

err = uverbs_copy_to(attrs,
MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX,
&page_idx, sizeof(page_idx));
if (err)
goto err_dealloc;

bitmap_set(to_mucontext(context)->dm_pages, page_idx,
DIV_ROUND_UP(act_size, PAGE_SIZE));

dm->dev_addr = memic_addr;

return &dm->ibdm;

err_dealloc:
mlx5_cmd_dealloc_memic(memic, memic_addr,
act_size);
err_free:
kfree(dm);
return ERR_PTR(err);
}

int mlx5_ib_dealloc_dm(struct ib_dm *ibdm)
{
struct mlx5_memic *memic = &to_mdev(ibdm->device)->memic;
struct mlx5_ib_dm *dm = to_mdm(ibdm);
u64 act_size = roundup(dm->ibdm.length, MLX5_MEMIC_BASE_SIZE);
u32 page_idx;
int ret;

ret = mlx5_cmd_dealloc_memic(memic, dm->dev_addr, act_size);
if (ret)
return ret;

page_idx = (dm->dev_addr - pci_resource_start(memic->dev->pdev, 0) -
MLX5_CAP64_DEV_MEM(memic->dev, memic_bar_start_addr)) >>
PAGE_SHIFT;
bitmap_clear(to_mucontext(ibdm->uobject->context)->dm_pages,
page_idx,
DIV_ROUND_UP(act_size, PAGE_SIZE));

kfree(dm);

return 0;
}

static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev,
struct ib_ucontext *context,
struct ib_udata *udata)
Expand Down Expand Up @@ -4834,13 +4954,22 @@ static void mlx5_ib_cleanup_multiport_master(struct mlx5_ib_dev *dev)
mlx5_nic_vport_disable_roce(dev->mdev);
}

ADD_UVERBS_ATTRIBUTES_SIMPLE(mlx5_ib_dm, UVERBS_OBJECT_DM,
UVERBS_METHOD_DM_ALLOC,
&UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
UVERBS_ATTR_TYPE(u64),
UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
&UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX,
UVERBS_ATTR_TYPE(u16),
UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));

ADD_UVERBS_ATTRIBUTES_SIMPLE(mlx5_ib_flow_action, UVERBS_OBJECT_FLOW_ACTION,
UVERBS_METHOD_FLOW_ACTION_ESP_CREATE,
&UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS,
UVERBS_ATTR_TYPE(u64),
UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));

#define NUM_TREES 1
#define NUM_TREES 2
static int populate_specs_root(struct mlx5_ib_dev *dev)
{
const struct uverbs_object_tree_def *default_root[NUM_TREES + 1] = {
Expand All @@ -4851,6 +4980,10 @@ static int populate_specs_root(struct mlx5_ib_dev *dev)
!WARN_ON(num_trees >= ARRAY_SIZE(default_root)))
default_root[num_trees++] = &mlx5_ib_flow_action;

if (MLX5_CAP_DEV_MEM(dev->mdev, memic) &&
!WARN_ON(num_trees >= ARRAY_SIZE(default_root)))
default_root[num_trees++] = &mlx5_ib_dm;

dev->ib_dev.specs_root =
uverbs_alloc_spec_tree(num_trees, default_root);

Expand Down Expand Up @@ -4925,6 +5058,9 @@ int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
INIT_LIST_HEAD(&dev->qp_list);
spin_lock_init(&dev->reset_flow_resource_lock);

spin_lock_init(&dev->memic.memic_lock);
dev->memic.dev = mdev;

#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
err = init_srcu_struct(&dev->mr_srcu);
if (err)
Expand Down Expand Up @@ -5087,6 +5223,11 @@ int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
(1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
}

if (MLX5_CAP_DEV_MEM(mdev, memic)) {
dev->ib_dev.alloc_dm = mlx5_ib_alloc_dm;
dev->ib_dev.dealloc_dm = mlx5_ib_dealloc_dm;
}

dev->ib_dev.create_flow = mlx5_ib_create_flow;
dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow;
dev->ib_dev.uverbs_ex_cmd_mask |=
Expand Down
Loading

0 comments on commit 24da001

Please sign in to comment.