Skip to content

Commit

Permalink
net/mlx5: Avoid using pending command interface slots
Browse files Browse the repository at this point in the history
Currently when firmware command gets stuck or it takes long time to
complete, the driver command will get timeout and the command slot is
freed and can be used for new commands, and if the firmware receive new
command on the old busy slot its behavior is unexpected and this could
be harmful.
To fix this when the driver command gets timeout we return failure,
but we don't free the command slot and we wait for the firmware to
explicitly respond to that command.
Once all the entries are busy we will stop processing new firmware
commands.

Fixes: 9cba4eb ('net/mlx5: Fix potential deadlock in command mode change')
Signed-off-by: Mohamad Haj Yahia <mohamad@mellanox.com>
Cc: kernel-team@fb.com
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
  • Loading branch information
Mohamad Haj Yahia authored and Saeed Mahameed committed May 23, 2017
1 parent b57fe69 commit 73dd3a4
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 8 deletions.
41 changes: 36 additions & 5 deletions drivers/net/ethernet/mellanox/mlx5/core/cmd.c
Original file line number Diff line number Diff line change
Expand Up @@ -774,7 +774,7 @@ static void cb_timeout_handler(struct work_struct *work)
mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n",
mlx5_command_str(msg_to_opcode(ent->in)),
msg_to_opcode(ent->in));
mlx5_cmd_comp_handler(dev, 1UL << ent->idx);
mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
}

static void cmd_work_handler(struct work_struct *work)
Expand Down Expand Up @@ -804,6 +804,7 @@ static void cmd_work_handler(struct work_struct *work)
}

cmd->ent_arr[ent->idx] = ent;
set_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state);
lay = get_inst(cmd, ent->idx);
ent->lay = lay;
memset(lay, 0, sizeof(*lay));
Expand All @@ -825,6 +826,20 @@ static void cmd_work_handler(struct work_struct *work)
if (ent->callback)
schedule_delayed_work(&ent->cb_timeout_work, cb_timeout);

/* Skip sending command to fw if internal error */
if (pci_channel_offline(dev->pdev) ||
dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
u8 status = 0;
u32 drv_synd;

ent->ret = mlx5_internal_err_ret_value(dev, msg_to_opcode(ent->in), &drv_synd, &status);
MLX5_SET(mbox_out, ent->out, status, status);
MLX5_SET(mbox_out, ent->out, syndrome, drv_synd);

mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
return;
}

/* ring doorbell after the descriptor is valid */
mlx5_core_dbg(dev, "writing 0x%x to command doorbell\n", 1 << ent->idx);
wmb();
Expand All @@ -835,7 +850,7 @@ static void cmd_work_handler(struct work_struct *work)
poll_timeout(ent);
/* make sure we read the descriptor after ownership is SW */
rmb();
mlx5_cmd_comp_handler(dev, 1UL << ent->idx);
mlx5_cmd_comp_handler(dev, 1UL << ent->idx, (ent->ret == -ETIMEDOUT));
}
}

Expand Down Expand Up @@ -879,7 +894,7 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
wait_for_completion(&ent->done);
} else if (!wait_for_completion_timeout(&ent->done, timeout)) {
ent->ret = -ETIMEDOUT;
mlx5_cmd_comp_handler(dev, 1UL << ent->idx);
mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
}

err = ent->ret;
Expand Down Expand Up @@ -1375,7 +1390,7 @@ static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg)
}
}

void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec)
void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced)
{
struct mlx5_cmd *cmd = &dev->cmd;
struct mlx5_cmd_work_ent *ent;
Expand All @@ -1395,6 +1410,19 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec)
struct semaphore *sem;

ent = cmd->ent_arr[i];

/* if we already completed the command, ignore it */
if (!test_and_clear_bit(MLX5_CMD_ENT_STATE_PENDING_COMP,
&ent->state)) {
/* only real completion can free the cmd slot */
if (!forced) {
mlx5_core_err(dev, "Command completion arrived after timeout (entry idx = %d).\n",
ent->idx);
free_ent(cmd, ent->idx);
}
continue;
}

if (ent->callback)
cancel_delayed_work(&ent->cb_timeout_work);
if (ent->page_queue)
Expand All @@ -1417,7 +1445,10 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec)
mlx5_core_dbg(dev, "command completed. ret 0x%x, delivery status %s(0x%x)\n",
ent->ret, deliv_status_to_str(ent->status), ent->status);
}
free_ent(cmd, ent->idx);

/* only real completion will free the entry slot */
if (!forced)
free_ent(cmd, ent->idx);

if (ent->callback) {
ds = ent->ts2 - ent->ts1;
Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ethernet/mellanox/mlx5/core/eq.c
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,7 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
break;

case MLX5_EVENT_TYPE_CMD:
mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector));
mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector), false);
break;

case MLX5_EVENT_TYPE_PORT_CHANGE:
Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ethernet/mellanox/mlx5/core/health.c
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ static void trigger_cmd_completions(struct mlx5_core_dev *dev)
spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);

mlx5_core_dbg(dev, "vector 0x%llx\n", vector);
mlx5_cmd_comp_handler(dev, vector);
mlx5_cmd_comp_handler(dev, vector, true);
return;

no_trig:
Expand Down
7 changes: 6 additions & 1 deletion include/linux/mlx5/driver.h
Original file line number Diff line number Diff line change
Expand Up @@ -787,7 +787,12 @@ enum {

typedef void (*mlx5_cmd_cbk_t)(int status, void *context);

enum {
MLX5_CMD_ENT_STATE_PENDING_COMP,
};

struct mlx5_cmd_work_ent {
unsigned long state;
struct mlx5_cmd_msg *in;
struct mlx5_cmd_msg *out;
void *uout;
Expand Down Expand Up @@ -976,7 +981,7 @@ void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn);
void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type);
void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type);
struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn);
void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec);
void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced);
void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type);
int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
int nent, u64 mask, const char *name,
Expand Down

0 comments on commit 73dd3a4

Please sign in to comment.