Skip to content

Commit

Permalink
IB/mlx5: Implement fragmented completion queue (CQ)
Browse files Browse the repository at this point in the history
The current implementation of create CQ requires contiguous
memory, such requirement is problematic once the memory is
fragmented or the system is low in memory, it causes for
failures in dma_zalloc_coherent().

This patch implements new scheme of fragmented CQ to overcome
this issue by introducing new type: 'struct mlx5_frag_buf_ctrl'
to allocate fragmented buffers, rather than contiguous ones.

Base the Completion Queues (CQs) on this new fragmented buffer.

It fixes following crashes:
kworker/29:0: page allocation failure: order:6, mode:0x80d0
CPU: 29 PID: 8374 Comm: kworker/29:0 Tainted: G OE 3.10.0
Workqueue: ib_cm cm_work_handler [ib_cm]
Call Trace:
[<>] dump_stack+0x19/0x1b
[<>] warn_alloc_failed+0x110/0x180
[<>] __alloc_pages_slowpath+0x6b7/0x725
[<>] __alloc_pages_nodemask+0x405/0x420
[<>] dma_generic_alloc_coherent+0x8f/0x140
[<>] x86_swiotlb_alloc_coherent+0x21/0x50
[<>] mlx5_dma_zalloc_coherent_node+0xad/0x110 [mlx5_core]
[<>] ? mlx5_db_alloc_node+0x69/0x1b0 [mlx5_core]
[<>] mlx5_buf_alloc_node+0x3e/0xa0 [mlx5_core]
[<>] mlx5_buf_alloc+0x14/0x20 [mlx5_core]
[<>] create_cq_kernel+0x90/0x1f0 [mlx5_ib]
[<>] mlx5_ib_create_cq+0x3b0/0x4e0 [mlx5_ib]

Signed-off-by: Yonatan Cohen <yonatanc@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
  • Loading branch information
Yonatan Cohen authored and Saeed Mahameed committed Feb 15, 2018
1 parent 3ec5693 commit 388ca8b
Show file tree
Hide file tree
Showing 7 changed files with 124 additions and 85 deletions.
64 changes: 39 additions & 25 deletions drivers/infiniband/hw/mlx5/cq.c
Original file line number Diff line number Diff line change
Expand Up @@ -64,14 +64,9 @@ static void mlx5_ib_cq_event(struct mlx5_core_cq *mcq, enum mlx5_event type)
}
}

static void *get_cqe_from_buf(struct mlx5_ib_cq_buf *buf, int n, int size)
{
return mlx5_buf_offset(&buf->buf, n * size);
}

static void *get_cqe(struct mlx5_ib_cq *cq, int n)
{
return get_cqe_from_buf(&cq->buf, n, cq->mcq.cqe_sz);
return mlx5_frag_buf_get_wqe(&cq->buf.fbc, n);
}

static u8 sw_ownership_bit(int n, int nent)
Expand Down Expand Up @@ -403,7 +398,7 @@ static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,

static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf)
{
mlx5_buf_free(dev->mdev, &buf->buf);
mlx5_frag_buf_free(dev->mdev, &buf->fbc.frag_buf);
}

static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe,
Expand Down Expand Up @@ -724,12 +719,25 @@ int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
return ret;
}

static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf,
int nent, int cqe_size)
static int alloc_cq_frag_buf(struct mlx5_ib_dev *dev,
struct mlx5_ib_cq_buf *buf,
int nent,
int cqe_size)
{
struct mlx5_frag_buf_ctrl *c = &buf->fbc;
struct mlx5_frag_buf *frag_buf = &c->frag_buf;
u32 cqc_buff[MLX5_ST_SZ_DW(cqc)] = {0};
int err;

err = mlx5_buf_alloc(dev->mdev, nent * cqe_size, &buf->buf);
MLX5_SET(cqc, cqc_buff, log_cq_size, ilog2(cqe_size));
MLX5_SET(cqc, cqc_buff, cqe_sz, (cqe_size == 128) ? 1 : 0);

mlx5_core_init_cq_frag_buf(&buf->fbc, cqc_buff);

err = mlx5_frag_buf_alloc_node(dev->mdev,
nent * cqe_size,
frag_buf,
dev->mdev->priv.numa_node);
if (err)
return err;

Expand Down Expand Up @@ -862,14 +870,15 @@ static void destroy_cq_user(struct mlx5_ib_cq *cq, struct ib_ucontext *context)
ib_umem_release(cq->buf.umem);
}

static void init_cq_buf(struct mlx5_ib_cq *cq, struct mlx5_ib_cq_buf *buf)
static void init_cq_frag_buf(struct mlx5_ib_cq *cq,
struct mlx5_ib_cq_buf *buf)
{
int i;
void *cqe;
struct mlx5_cqe64 *cqe64;

for (i = 0; i < buf->nent; i++) {
cqe = get_cqe_from_buf(buf, i, buf->cqe_size);
cqe = get_cqe(cq, i);
cqe64 = buf->cqe_size == 64 ? cqe : cqe + 64;
cqe64->op_own = MLX5_CQE_INVALID << 4;
}
Expand All @@ -891,26 +900,28 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
cq->mcq.arm_db = cq->db.db + 1;
cq->mcq.cqe_sz = cqe_size;

err = alloc_cq_buf(dev, &cq->buf, entries, cqe_size);
err = alloc_cq_frag_buf(dev, &cq->buf, entries, cqe_size);
if (err)
goto err_db;

init_cq_buf(cq, &cq->buf);
init_cq_frag_buf(cq, &cq->buf);

*inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * cq->buf.buf.npages;
MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) *
cq->buf.fbc.frag_buf.npages;
*cqb = kvzalloc(*inlen, GFP_KERNEL);
if (!*cqb) {
err = -ENOMEM;
goto err_buf;
}

pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas);
mlx5_fill_page_array(&cq->buf.buf, pas);
mlx5_fill_page_frag_array(&cq->buf.fbc.frag_buf, pas);

cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context);
MLX5_SET(cqc, cqc, log_page_size,
cq->buf.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
cq->buf.fbc.frag_buf.page_shift -
MLX5_ADAPTER_PAGE_SHIFT);

*index = dev->mdev->priv.uar->index;

Expand Down Expand Up @@ -1207,11 +1218,11 @@ static int resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
if (!cq->resize_buf)
return -ENOMEM;

err = alloc_cq_buf(dev, cq->resize_buf, entries, cqe_size);
err = alloc_cq_frag_buf(dev, cq->resize_buf, entries, cqe_size);
if (err)
goto ex;

init_cq_buf(cq, cq->resize_buf);
init_cq_frag_buf(cq, cq->resize_buf);

return 0;

Expand Down Expand Up @@ -1256,9 +1267,8 @@ static int copy_resize_cqes(struct mlx5_ib_cq *cq)
}

while ((scqe64->op_own >> 4) != MLX5_CQE_RESIZE_CQ) {
dcqe = get_cqe_from_buf(cq->resize_buf,
(i + 1) & (cq->resize_buf->nent),
dsize);
dcqe = mlx5_frag_buf_get_wqe(&cq->resize_buf->fbc,
(i + 1) & cq->resize_buf->nent);
dcqe64 = dsize == 64 ? dcqe : dcqe + 64;
sw_own = sw_ownership_bit(i + 1, cq->resize_buf->nent);
memcpy(dcqe, scqe, dsize);
Expand Down Expand Up @@ -1324,8 +1334,11 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
cqe_size = 64;
err = resize_kernel(dev, cq, entries, cqe_size);
if (!err) {
npas = cq->resize_buf->buf.npages;
page_shift = cq->resize_buf->buf.page_shift;
struct mlx5_frag_buf_ctrl *c;

c = &cq->resize_buf->fbc;
npas = c->frag_buf.npages;
page_shift = c->frag_buf.page_shift;
}
}

Expand All @@ -1346,7 +1359,8 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
mlx5_ib_populate_pas(dev, cq->resize_umem, page_shift,
pas, 0);
else
mlx5_fill_page_array(&cq->resize_buf->buf, pas);
mlx5_fill_page_frag_array(&cq->resize_buf->fbc.frag_buf,
pas);

MLX5_SET(modify_cq_in, in,
modify_field_select_resize_field_select.resize_field_select.resize_field_select,
Expand Down
6 changes: 3 additions & 3 deletions drivers/infiniband/hw/mlx5/mlx5_ib.h
Original file line number Diff line number Diff line change
Expand Up @@ -371,7 +371,7 @@ struct mlx5_ib_qp {
struct mlx5_ib_rss_qp rss_qp;
struct mlx5_ib_dct dct;
};
struct mlx5_buf buf;
struct mlx5_frag_buf buf;

struct mlx5_db db;
struct mlx5_ib_wq rq;
Expand Down Expand Up @@ -413,7 +413,7 @@ struct mlx5_ib_qp {
};

struct mlx5_ib_cq_buf {
struct mlx5_buf buf;
struct mlx5_frag_buf_ctrl fbc;
struct ib_umem *umem;
int cqe_size;
int nent;
Expand Down Expand Up @@ -495,7 +495,7 @@ struct mlx5_ib_wc {
struct mlx5_ib_srq {
struct ib_srq ibsrq;
struct mlx5_core_srq msrq;
struct mlx5_buf buf;
struct mlx5_frag_buf buf;
struct mlx5_db db;
u64 *wrid;
/* protect SRQ hanlding
Expand Down
37 changes: 25 additions & 12 deletions drivers/net/ethernet/mellanox/mlx5/core/alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,38 +71,49 @@ static void *mlx5_dma_zalloc_coherent_node(struct mlx5_core_dev *dev,
}

int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size,
struct mlx5_buf *buf, int node)
struct mlx5_frag_buf *buf, int node)
{
dma_addr_t t;

buf->size = size;
buf->npages = 1;
buf->page_shift = (u8)get_order(size) + PAGE_SHIFT;
buf->direct.buf = mlx5_dma_zalloc_coherent_node(dev, size,
&t, node);
if (!buf->direct.buf)

buf->frags = kzalloc(sizeof(*buf->frags), GFP_KERNEL);
if (!buf->frags)
return -ENOMEM;

buf->direct.map = t;
buf->frags->buf = mlx5_dma_zalloc_coherent_node(dev, size,
&t, node);
if (!buf->frags->buf)
goto err_out;

buf->frags->map = t;

while (t & ((1 << buf->page_shift) - 1)) {
--buf->page_shift;
buf->npages *= 2;
}

return 0;
err_out:
kfree(buf->frags);
return -ENOMEM;
}

int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf)
int mlx5_buf_alloc(struct mlx5_core_dev *dev,
int size, struct mlx5_frag_buf *buf)
{
return mlx5_buf_alloc_node(dev, size, buf, dev->priv.numa_node);
}
EXPORT_SYMBOL_GPL(mlx5_buf_alloc);
EXPORT_SYMBOL(mlx5_buf_alloc);

void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_buf *buf)
void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf)
{
dma_free_coherent(&dev->pdev->dev, buf->size, buf->direct.buf,
buf->direct.map);
dma_free_coherent(&dev->pdev->dev, buf->size, buf->frags->buf,
buf->frags->map);

kfree(buf->frags);
}
EXPORT_SYMBOL_GPL(mlx5_buf_free);

Expand Down Expand Up @@ -147,6 +158,7 @@ int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size,
err_out:
return -ENOMEM;
}
EXPORT_SYMBOL_GPL(mlx5_frag_buf_alloc_node);

void mlx5_frag_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf)
{
Expand All @@ -162,6 +174,7 @@ void mlx5_frag_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf)
}
kfree(buf->frags);
}
EXPORT_SYMBOL_GPL(mlx5_frag_buf_free);

static struct mlx5_db_pgdir *mlx5_alloc_db_pgdir(struct mlx5_core_dev *dev,
int node)
Expand Down Expand Up @@ -275,13 +288,13 @@ void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db)
}
EXPORT_SYMBOL_GPL(mlx5_db_free);

void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas)
void mlx5_fill_page_array(struct mlx5_frag_buf *buf, __be64 *pas)
{
u64 addr;
int i;

for (i = 0; i < buf->npages; i++) {
addr = buf->direct.map + (i << buf->page_shift);
addr = buf->frags->map + (i << buf->page_shift);

pas[i] = cpu_to_be64(addr);
}
Expand Down
11 changes: 6 additions & 5 deletions drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config)
static inline void mlx5e_read_cqe_slot(struct mlx5e_cq *cq, u32 cqcc,
void *data)
{
u32 ci = cqcc & cq->wq.sz_m1;
u32 ci = cqcc & cq->wq.fbc.sz_m1;

memcpy(data, mlx5_cqwq_get_wqe(&cq->wq, ci), sizeof(struct mlx5_cqe64));
}
Expand All @@ -74,9 +74,10 @@ static inline void mlx5e_read_mini_arr_slot(struct mlx5e_cq *cq, u32 cqcc)

static inline void mlx5e_cqes_update_owner(struct mlx5e_cq *cq, u32 cqcc, int n)
{
u8 op_own = (cqcc >> cq->wq.log_sz) & 1;
u32 wq_sz = 1 << cq->wq.log_sz;
u32 ci = cqcc & cq->wq.sz_m1;
struct mlx5_frag_buf_ctrl *fbc = &cq->wq.fbc;
u8 op_own = (cqcc >> fbc->log_sz) & 1;
u32 wq_sz = 1 << fbc->log_sz;
u32 ci = cqcc & fbc->sz_m1;
u32 ci_top = min_t(u32, wq_sz, ci + n);

for (; ci < ci_top; ci++, n--) {
Expand All @@ -101,7 +102,7 @@ static inline void mlx5e_decompress_cqe(struct mlx5e_rq *rq,
cq->title.byte_cnt = cq->mini_arr[cq->mini_arr_idx].byte_cnt;
cq->title.check_sum = cq->mini_arr[cq->mini_arr_idx].checksum;
cq->title.op_own &= 0xf0;
cq->title.op_own |= 0x01 & (cqcc >> cq->wq.log_sz);
cq->title.op_own |= 0x01 & (cqcc >> cq->wq.fbc.log_sz);
cq->title.wqe_counter = cpu_to_be16(cq->decmprs_wqe_counter);

if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
Expand Down
18 changes: 7 additions & 11 deletions drivers/net/ethernet/mellanox/mlx5/core/wq.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq)

u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq)
{
return wq->sz_m1 + 1;
return wq->fbc.sz_m1 + 1;
}

u32 mlx5_wq_ll_get_size(struct mlx5_wq_ll *wq)
Expand All @@ -62,7 +62,7 @@ static u32 mlx5_wq_qp_get_byte_size(struct mlx5_wq_qp *wq)

static u32 mlx5_cqwq_get_byte_size(struct mlx5_cqwq *wq)
{
return mlx5_cqwq_get_size(wq) << wq->log_stride;
return mlx5_cqwq_get_size(wq) << wq->fbc.log_stride;
}

static u32 mlx5_wq_ll_get_byte_size(struct mlx5_wq_ll *wq)
Expand Down Expand Up @@ -92,7 +92,7 @@ int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
goto err_db_free;
}

wq->buf = wq_ctrl->buf.direct.buf;
wq->buf = wq_ctrl->buf.frags->buf;
wq->db = wq_ctrl->db.db;

wq_ctrl->mdev = mdev;
Expand Down Expand Up @@ -130,7 +130,7 @@ int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
goto err_db_free;
}

wq->rq.buf = wq_ctrl->buf.direct.buf;
wq->rq.buf = wq_ctrl->buf.frags->buf;
wq->sq.buf = wq->rq.buf + mlx5_wq_cyc_get_byte_size(&wq->rq);
wq->rq.db = &wq_ctrl->db.db[MLX5_RCV_DBR];
wq->sq.db = &wq_ctrl->db.db[MLX5_SND_DBR];
Expand All @@ -151,11 +151,7 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
{
int err;

wq->log_stride = 6 + MLX5_GET(cqc, cqc, cqe_sz);
wq->log_sz = MLX5_GET(cqc, cqc, log_cq_size);
wq->sz_m1 = (1 << wq->log_sz) - 1;
wq->log_frag_strides = PAGE_SHIFT - wq->log_stride;
wq->frag_sz_m1 = (1 << wq->log_frag_strides) - 1;
mlx5_core_init_cq_frag_buf(&wq->fbc, cqc);

err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
if (err) {
Expand All @@ -172,7 +168,7 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
goto err_db_free;
}

wq->frag_buf = wq_ctrl->frag_buf;
wq->fbc.frag_buf = wq_ctrl->frag_buf;
wq->db = wq_ctrl->db.db;

wq_ctrl->mdev = mdev;
Expand Down Expand Up @@ -209,7 +205,7 @@ int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
goto err_db_free;
}

wq->buf = wq_ctrl->buf.direct.buf;
wq->buf = wq_ctrl->buf.frags->buf;
wq->db = wq_ctrl->db.db;

for (i = 0; i < wq->sz_m1; i++) {
Expand Down
Loading

0 comments on commit 388ca8b

Please sign in to comment.