Skip to content

Commit

Permalink
Merge tag 'mlx5-fixes-2020-09-30' of git://git.kernel.org/pub/scm/lin…
Browse files Browse the repository at this point in the history
…ux/kernel/git/saeed/linux

From: Saeed Mahameed <saeedm@nvidia.com>

====================
This series introduces some fixes to mlx5 driver.

v1->v2:
 - Patch #1 Don't return while mutex is held. (Dave)

v2->v3:
 - Drop patch #1, will consider a better approach (Jakub)
 - use cpu_relax() instead of cond_resched() (Jakub)
 - while(i--) to reveres a loop (Jakub)
 - Drop old mellanox email sign-off and change the committer email
   (Jakub)

Please pull and let me know if there is any problem.

For -stable v4.15
 ('net/mlx5e: Fix VLAN cleanup flow')
 ('net/mlx5e: Fix VLAN create flow')

For -stable v4.16
 ('net/mlx5: Fix request_irqs error flow')

For -stable v5.4
 ('net/mlx5e: Add resiliency in Striding RQ mode for packets larger than MTU')
 ('net/mlx5: Avoid possible free of command entry while timeout comp handler')

For -stable v5.7
 ('net/mlx5e: Fix return status when setting unsupported FEC mode')

For -stable v5.8
 ('net/mlx5e: Fix race condition on nhe->n pointer in neigh update')
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Oct 2, 2020
2 parents 9d8c05a + 1253935 commit ab0faf5
Show file tree
Hide file tree
Showing 13 changed files with 350 additions and 119 deletions.
198 changes: 144 additions & 54 deletions drivers/net/ethernet/mellanox/mlx5/core/cmd.c
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,10 @@ enum {
MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR = 0x10,
};

static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd,
struct mlx5_cmd_msg *in,
struct mlx5_cmd_msg *out,
void *uout, int uout_size,
mlx5_cmd_cbk_t cbk,
void *context, int page_queue)
static struct mlx5_cmd_work_ent *
cmd_alloc_ent(struct mlx5_cmd *cmd, struct mlx5_cmd_msg *in,
struct mlx5_cmd_msg *out, void *uout, int uout_size,
mlx5_cmd_cbk_t cbk, void *context, int page_queue)
{
gfp_t alloc_flags = cbk ? GFP_ATOMIC : GFP_KERNEL;
struct mlx5_cmd_work_ent *ent;
Expand All @@ -83,6 +81,7 @@ static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd,
if (!ent)
return ERR_PTR(-ENOMEM);

ent->idx = -EINVAL;
ent->in = in;
ent->out = out;
ent->uout = uout;
Expand All @@ -91,10 +90,16 @@ static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd,
ent->context = context;
ent->cmd = cmd;
ent->page_queue = page_queue;
refcount_set(&ent->refcnt, 1);

return ent;
}

static void cmd_free_ent(struct mlx5_cmd_work_ent *ent)
{
kfree(ent);
}

static u8 alloc_token(struct mlx5_cmd *cmd)
{
u8 token;
Expand All @@ -109,7 +114,7 @@ static u8 alloc_token(struct mlx5_cmd *cmd)
return token;
}

static int alloc_ent(struct mlx5_cmd *cmd)
static int cmd_alloc_index(struct mlx5_cmd *cmd)
{
unsigned long flags;
int ret;
Expand All @@ -123,7 +128,7 @@ static int alloc_ent(struct mlx5_cmd *cmd)
return ret < cmd->max_reg_cmds ? ret : -ENOMEM;
}

static void free_ent(struct mlx5_cmd *cmd, int idx)
static void cmd_free_index(struct mlx5_cmd *cmd, int idx)
{
unsigned long flags;

Expand All @@ -132,6 +137,22 @@ static void free_ent(struct mlx5_cmd *cmd, int idx)
spin_unlock_irqrestore(&cmd->alloc_lock, flags);
}

static void cmd_ent_get(struct mlx5_cmd_work_ent *ent)
{
refcount_inc(&ent->refcnt);
}

static void cmd_ent_put(struct mlx5_cmd_work_ent *ent)
{
if (!refcount_dec_and_test(&ent->refcnt))
return;

if (ent->idx >= 0)
cmd_free_index(ent->cmd, ent->idx);

cmd_free_ent(ent);
}

static struct mlx5_cmd_layout *get_inst(struct mlx5_cmd *cmd, int idx)
{
return cmd->cmd_buf + (idx << cmd->log_stride);
Expand Down Expand Up @@ -219,11 +240,6 @@ static void poll_timeout(struct mlx5_cmd_work_ent *ent)
ent->ret = -ETIMEDOUT;
}

static void free_cmd(struct mlx5_cmd_work_ent *ent)
{
kfree(ent);
}

static int verify_signature(struct mlx5_cmd_work_ent *ent)
{
struct mlx5_cmd_mailbox *next = ent->out->next;
Expand Down Expand Up @@ -837,11 +853,22 @@ static void cb_timeout_handler(struct work_struct *work)
struct mlx5_core_dev *dev = container_of(ent->cmd, struct mlx5_core_dev,
cmd);

mlx5_cmd_eq_recover(dev);

/* Maybe got handled by eq recover ? */
if (!test_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state)) {
mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) Async, recovered after timeout\n", ent->idx,
mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in));
goto out; /* phew, already handled */
}

ent->ret = -ETIMEDOUT;
mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n",
mlx5_command_str(msg_to_opcode(ent->in)),
msg_to_opcode(ent->in));
mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) Async, timeout. Will cause a leak of a command resource\n",
ent->idx, mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in));
mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);

out:
cmd_ent_put(ent); /* for the cmd_ent_get() took on schedule delayed work */
}

static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg);
Expand All @@ -856,6 +883,32 @@ static bool opcode_allowed(struct mlx5_cmd *cmd, u16 opcode)
return cmd->allowed_opcode == opcode;
}

static int cmd_alloc_index_retry(struct mlx5_cmd *cmd)
{
unsigned long alloc_end = jiffies + msecs_to_jiffies(1000);
int idx;

retry:
idx = cmd_alloc_index(cmd);
if (idx < 0 && time_before(jiffies, alloc_end)) {
/* Index allocation can fail on heavy load of commands. This is a temporary
* situation as the current command already holds the semaphore, meaning that
* another command completion is being handled and it is expected to release
* the entry index soon.
*/
cpu_relax();
goto retry;
}
return idx;
}

bool mlx5_cmd_is_down(struct mlx5_core_dev *dev)
{
return pci_channel_offline(dev->pdev) ||
dev->cmd.state != MLX5_CMDIF_STATE_UP ||
dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR;
}

static void cmd_work_handler(struct work_struct *work)
{
struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work);
Expand All @@ -873,14 +926,14 @@ static void cmd_work_handler(struct work_struct *work)
sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem;
down(sem);
if (!ent->page_queue) {
alloc_ret = alloc_ent(cmd);
alloc_ret = cmd_alloc_index_retry(cmd);
if (alloc_ret < 0) {
mlx5_core_err_rl(dev, "failed to allocate command entry\n");
if (ent->callback) {
ent->callback(-EAGAIN, ent->context);
mlx5_free_cmd_msg(dev, ent->out);
free_msg(dev, ent->in);
free_cmd(ent);
cmd_ent_put(ent);
} else {
ent->ret = -EAGAIN;
complete(&ent->done);
Expand Down Expand Up @@ -916,15 +969,12 @@ static void cmd_work_handler(struct work_struct *work)
ent->ts1 = ktime_get_ns();
cmd_mode = cmd->mode;

if (ent->callback)
schedule_delayed_work(&ent->cb_timeout_work, cb_timeout);
if (ent->callback && schedule_delayed_work(&ent->cb_timeout_work, cb_timeout))
cmd_ent_get(ent);
set_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state);

/* Skip sending command to fw if internal error */
if (pci_channel_offline(dev->pdev) ||
dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR ||
cmd->state != MLX5_CMDIF_STATE_UP ||
!opcode_allowed(&dev->cmd, ent->op)) {
if (mlx5_cmd_is_down(dev) || !opcode_allowed(&dev->cmd, ent->op)) {
u8 status = 0;
u32 drv_synd;

Expand All @@ -933,13 +983,10 @@ static void cmd_work_handler(struct work_struct *work)
MLX5_SET(mbox_out, ent->out, syndrome, drv_synd);

mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
/* no doorbell, no need to keep the entry */
free_ent(cmd, ent->idx);
if (ent->callback)
free_cmd(ent);
return;
}

cmd_ent_get(ent); /* for the _real_ FW event on completion */
/* ring doorbell after the descriptor is valid */
mlx5_core_dbg(dev, "writing 0x%x to command doorbell\n", 1 << ent->idx);
wmb();
Expand Down Expand Up @@ -983,6 +1030,35 @@ static const char *deliv_status_to_str(u8 status)
}
}

enum {
MLX5_CMD_TIMEOUT_RECOVER_MSEC = 5 * 1000,
};

static void wait_func_handle_exec_timeout(struct mlx5_core_dev *dev,
struct mlx5_cmd_work_ent *ent)
{
unsigned long timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_RECOVER_MSEC);

mlx5_cmd_eq_recover(dev);

/* Re-wait on the ent->done after executing the recovery flow. If the
* recovery flow (or any other recovery flow running simultaneously)
* has recovered an EQE, it should cause the entry to be completed by
* the command interface.
*/
if (wait_for_completion_timeout(&ent->done, timeout)) {
mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) recovered after timeout\n", ent->idx,
mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in));
return;
}

mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) No done completion\n", ent->idx,
mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in));

ent->ret = -ETIMEDOUT;
mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
}

static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
{
unsigned long timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC);
Expand All @@ -994,12 +1070,10 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
ent->ret = -ECANCELED;
goto out_err;
}
if (cmd->mode == CMD_MODE_POLLING || ent->polling) {
if (cmd->mode == CMD_MODE_POLLING || ent->polling)
wait_for_completion(&ent->done);
} else if (!wait_for_completion_timeout(&ent->done, timeout)) {
ent->ret = -ETIMEDOUT;
mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
}
else if (!wait_for_completion_timeout(&ent->done, timeout))
wait_func_handle_exec_timeout(dev, ent);

out_err:
err = ent->ret;
Expand Down Expand Up @@ -1039,11 +1113,16 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
if (callback && page_queue)
return -EINVAL;

ent = alloc_cmd(cmd, in, out, uout, uout_size, callback, context,
page_queue);
ent = cmd_alloc_ent(cmd, in, out, uout, uout_size,
callback, context, page_queue);
if (IS_ERR(ent))
return PTR_ERR(ent);

/* put for this ent is when consumed, depending on the use case
* 1) (!callback) blocking flow: by caller after wait_func completes
* 2) (callback) flow: by mlx5_cmd_comp_handler() when ent is handled
*/

ent->token = token;
ent->polling = force_polling;

Expand All @@ -1062,12 +1141,10 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
}

if (callback)
goto out;
goto out; /* mlx5_cmd_comp_handler() will put(ent) */

err = wait_func(dev, ent);
if (err == -ETIMEDOUT)
goto out;
if (err == -ECANCELED)
if (err == -ETIMEDOUT || err == -ECANCELED)
goto out_free;

ds = ent->ts2 - ent->ts1;
Expand All @@ -1085,7 +1162,7 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
*status = ent->status;

out_free:
free_cmd(ent);
cmd_ent_put(ent);
out:
return err;
}
Expand Down Expand Up @@ -1516,14 +1593,19 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force
if (!forced) {
mlx5_core_err(dev, "Command completion arrived after timeout (entry idx = %d).\n",
ent->idx);
free_ent(cmd, ent->idx);
free_cmd(ent);
cmd_ent_put(ent);
}
continue;
}

if (ent->callback)
cancel_delayed_work(&ent->cb_timeout_work);
if (ent->callback && cancel_delayed_work(&ent->cb_timeout_work))
cmd_ent_put(ent); /* timeout work was canceled */

if (!forced || /* Real FW completion */
pci_channel_offline(dev->pdev) || /* FW is inaccessible */
dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
cmd_ent_put(ent);

if (ent->page_queue)
sem = &cmd->pages_sem;
else
Expand All @@ -1545,10 +1627,6 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force
ent->ret, deliv_status_to_str(ent->status), ent->status);
}

/* only real completion will free the entry slot */
if (!forced)
free_ent(cmd, ent->idx);

if (ent->callback) {
ds = ent->ts2 - ent->ts1;
if (ent->op < MLX5_CMD_OP_MAX) {
Expand Down Expand Up @@ -1576,10 +1654,13 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force
free_msg(dev, ent->in);

err = err ? err : ent->status;
if (!forced)
free_cmd(ent);
/* final consumer is done, release ent */
cmd_ent_put(ent);
callback(err, context);
} else {
/* release wait_func() so mlx5_cmd_invoke()
* can make the final ent_put()
*/
complete(&ent->done);
}
up(sem);
Expand All @@ -1589,8 +1670,11 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force

void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev)
{
struct mlx5_cmd *cmd = &dev->cmd;
unsigned long bitmask;
unsigned long flags;
u64 vector;
int i;

/* wait for pending handlers to complete */
mlx5_eq_synchronize_cmd_irq(dev);
Expand All @@ -1599,11 +1683,20 @@ void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev)
if (!vector)
goto no_trig;

bitmask = vector;
/* we must increment the allocated entries refcount before triggering the completions
* to guarantee pending commands will not get freed in the meanwhile.
* For that reason, it also has to be done inside the alloc_lock.
*/
for_each_set_bit(i, &bitmask, (1 << cmd->log_sz))
cmd_ent_get(cmd->ent_arr[i]);
vector |= MLX5_TRIGGERED_CMD_COMP;
spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);

mlx5_core_dbg(dev, "vector 0x%llx\n", vector);
mlx5_cmd_comp_handler(dev, vector, true);
for_each_set_bit(i, &bitmask, (1 << cmd->log_sz))
cmd_ent_put(cmd->ent_arr[i]);
return;

no_trig:
Expand Down Expand Up @@ -1711,10 +1804,7 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
u8 token;

opcode = MLX5_GET(mbox_in, in, opcode);
if (pci_channel_offline(dev->pdev) ||
dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR ||
dev->cmd.state != MLX5_CMDIF_STATE_UP ||
!opcode_allowed(&dev->cmd, opcode)) {
if (mlx5_cmd_is_down(dev) || !opcode_allowed(&dev->cmd, opcode)) {
err = mlx5_internal_err_ret_value(dev, opcode, &drv_synd, &status);
MLX5_SET(mbox_out, out, status, status);
MLX5_SET(mbox_out, out, syndrome, drv_synd);
Expand Down
Loading

0 comments on commit ab0faf5

Please sign in to comment.