Skip to content

Commit

Permalink
Merge tag 'mlx5-updates-2024-02-01' of git://git.kernel.org/pub/scm/l…
Browse files Browse the repository at this point in the history
…inux/kernel/git/saeed/linux

Saeed Mahameed says:

====================
mlx5-updates-2024-02-01

1) IPSec global stats for xfrm and mlx5
2) XSK memory improvements for non-linear SKBs
3) Software steering debug dump to use seq_file ops
4) Various code clean-ups

* tag 'mlx5-updates-2024-02-01' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux:
  net/mlx5e: XDP, Exclude headroom and tailroom from memory calculations
  net/mlx5e: XSK, Exclude tailroom from non-linear SKBs memory calculations
  net/mlx5: DR, Change SWS usage to debug fs seq_file interface
  net/mlx5: Change missing SyncE capability print to debug
  net/mlx5: Remove initial segmentation duplicate definitions
  net/mlx5: Return specific error code for timeout on wait_fw_init
  net/mlx5: SF, Stop waiting for FW as teardown was called
  net/mlx5: remove fw reporter dump option for non PF
  net/mlx5: remove fw_fatal reporter dump option for non PF
  net/mlx5: Rename mlx5_sf_dev_remove
  Documentation: Fix counter name of mlx5 vnic reporter
  net/mlx5e: Delete obsolete IPsec code
  net/mlx5e: Connect mlx5 IPsec statistics with XFRM core
  xfrm: get global statistics from the offloaded device
  xfrm: generalize xdo_dev_state_update_curlft to allow statistics update
====================

Link: https://lore.kernel.org/r/20240206005527.1353368-1-saeed@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
  • Loading branch information
Jakub Kicinski committed Feb 8, 2024
2 parents 313fb18 + a90f559 commit 006e896
Show file tree
Hide file tree
Showing 23 changed files with 766 additions and 240 deletions.
5 changes: 3 additions & 2 deletions Documentation/networking/devlink/mlx5.rst
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ them in realtime.

Description of the vnic counters:

- total_q_under_processor_handle
- total_error_queues
number of queues in an error state due to
an async error or errored command.
- send_queue_priority_update_flow
Expand All @@ -259,7 +259,8 @@ Description of the vnic counters:
number of times CQ entered an error state due to an overflow.
- async_eq_overrun
number of times an EQ mapped to async events was overrun.
comp_eq_overrun number of times an EQ mapped to completion events was
- comp_eq_overrun
number of times an EQ mapped to completion events was
overrun.
- quota_exceeded_command
number of commands issued and failed due to quota exceeded.
Expand Down
4 changes: 2 additions & 2 deletions Documentation/networking/xfrm_device.rst
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,9 @@ Callbacks to implement
bool (*xdo_dev_offload_ok) (struct sk_buff *skb,
struct xfrm_state *x);
void (*xdo_dev_state_advance_esn) (struct xfrm_state *x);
void (*xdo_dev_state_update_stats) (struct xfrm_state *x);

/* Solely packet offload callbacks */
void (*xdo_dev_state_update_curlft) (struct xfrm_state *x);
int (*xdo_dev_policy_add) (struct xfrm_policy *x, struct netlink_ext_ack *extack);
void (*xdo_dev_policy_delete) (struct xfrm_policy *x);
void (*xdo_dev_policy_free) (struct xfrm_policy *x);
Expand Down Expand Up @@ -191,6 +191,6 @@ xdo_dev_policy_free() on any remaining offloaded states.

Outcome of HW handling packets, the XFRM core can't count hard, soft limits.
The HW/driver are responsible to perform it and provide accurate data when
xdo_dev_state_update_curlft() is called. In case of one of these limits
xdo_dev_state_update_stats() is called. In case of one of these limits
occuried, the driver needs to call to xfrm_state_check_expire() to make sure
that XFRM performs rekeying sequence.
2 changes: 1 addition & 1 deletion drivers/net/ethernet/mellanox/mlx5/core/dev.c
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ static bool is_dpll_supported(struct mlx5_core_dev *dev)
return false;

if (!MLX5_CAP_MCAM_REG2(dev, synce_registers)) {
mlx5_core_warn(dev, "Missing SyncE capability\n");
mlx5_core_dbg(dev, "Missing SyncE capability\n");
return false;
}

Expand Down
24 changes: 19 additions & 5 deletions drivers/net/ethernet/mellanox/mlx5/core/en/params.c
Original file line number Diff line number Diff line change
Expand Up @@ -240,11 +240,14 @@ static u32 mlx5e_rx_get_linear_sz_xsk(struct mlx5e_params *params,
return xsk->headroom + hw_mtu;
}

static u32 mlx5e_rx_get_linear_sz_skb(struct mlx5e_params *params, bool xsk)
static u32 mlx5e_rx_get_linear_sz_skb(struct mlx5e_params *params, bool no_head_tail_room)
{
/* SKBs built on XDP_PASS on XSK RQs don't have headroom. */
u16 headroom = xsk ? 0 : mlx5e_get_linear_rq_headroom(params, NULL);
u32 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
u16 headroom;

if (no_head_tail_room)
return SKB_DATA_ALIGN(hw_mtu);
headroom = mlx5e_get_linear_rq_headroom(params, NULL);

return MLX5_SKB_FRAG_SZ(headroom + hw_mtu);
}
Expand All @@ -254,6 +257,7 @@ static u32 mlx5e_rx_get_linear_stride_sz(struct mlx5_core_dev *mdev,
struct mlx5e_xsk_param *xsk,
bool mpwqe)
{
bool no_head_tail_room;
u32 sz;

/* XSK frames are mapped as individual pages, because frames may come in
Expand All @@ -262,7 +266,13 @@ static u32 mlx5e_rx_get_linear_stride_sz(struct mlx5_core_dev *mdev,
if (xsk)
return mpwqe ? 1 << mlx5e_mpwrq_page_shift(mdev, xsk) : PAGE_SIZE;

sz = roundup_pow_of_two(mlx5e_rx_get_linear_sz_skb(params, false));
no_head_tail_room = params->xdp_prog && mpwqe && !mlx5e_rx_is_linear_skb(mdev, params, xsk);

/* When no_head_tail_room is set, headroom and tailroom are excluded from skb calculations.
* no_head_tail_room should be set in the case of XDP with Striding RQ
* when SKB is not linear. This is because another page is allocated for the linear part.
*/
sz = roundup_pow_of_two(mlx5e_rx_get_linear_sz_skb(params, no_head_tail_room));

/* XDP in mlx5e doesn't support multiple packets per page.
* Do not assume sz <= PAGE_SIZE if params->xdp_prog is set.
Expand All @@ -289,7 +299,11 @@ bool mlx5e_rx_is_linear_skb(struct mlx5_core_dev *mdev,
if (params->packet_merge.type != MLX5E_PACKET_MERGE_NONE)
return false;

/* Both XSK and non-XSK cases allocate an SKB on XDP_PASS. Packet data
/* Call mlx5e_rx_get_linear_sz_skb with the no_head_tail_room parameter set
* to exclude headroom and tailroom from calculations.
* no_head_tail_room is true when SKB is built on XDP_PASS on XSK RQs
* since packet data buffers don't have headroom and tailroom resreved for the SKB.
* Both XSK and non-XSK cases allocate an SKB on XDP_PASS. Packet data
* must fit into a CPU page.
*/
if (mlx5e_rx_get_linear_sz_skb(params, xsk) > PAGE_SIZE)
Expand Down
26 changes: 23 additions & 3 deletions drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
Original file line number Diff line number Diff line change
Expand Up @@ -984,21 +984,41 @@ static void mlx5e_xfrm_advance_esn_state(struct xfrm_state *x)
queue_work(sa_entry->ipsec->wq, &work->work);
}

static void mlx5e_xfrm_update_curlft(struct xfrm_state *x)
static void mlx5e_xfrm_update_stats(struct xfrm_state *x)
{
struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
struct mlx5e_ipsec_rule *ipsec_rule = &sa_entry->ipsec_rule;
struct net *net = dev_net(x->xso.dev);
u64 packets, bytes, lastuse;

lockdep_assert(lockdep_is_held(&x->lock) ||
lockdep_is_held(&dev_net(x->xso.real_dev)->xfrm.xfrm_cfg_mutex));
lockdep_is_held(&dev_net(x->xso.real_dev)->xfrm.xfrm_cfg_mutex) ||
lockdep_is_held(&dev_net(x->xso.real_dev)->xfrm.xfrm_state_lock));

if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ)
return;

if (sa_entry->attrs.dir == XFRM_DEV_OFFLOAD_IN) {
mlx5_fc_query_cached(ipsec_rule->auth.fc, &bytes, &packets, &lastuse);
x->stats.integrity_failed += packets;
XFRM_ADD_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR, packets);

mlx5_fc_query_cached(ipsec_rule->trailer.fc, &bytes, &packets, &lastuse);
XFRM_ADD_STATS(net, LINUX_MIB_XFRMINHDRERROR, packets);
}

if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
return;

mlx5_fc_query_cached(ipsec_rule->fc, &bytes, &packets, &lastuse);
x->curlft.packets += packets;
x->curlft.bytes += bytes;

if (sa_entry->attrs.dir == XFRM_DEV_OFFLOAD_IN) {
mlx5_fc_query_cached(ipsec_rule->replay.fc, &bytes, &packets, &lastuse);
x->stats.replay += packets;
XFRM_ADD_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR, packets);
}
}

static int mlx5e_xfrm_validate_policy(struct mlx5_core_dev *mdev,
Expand Down Expand Up @@ -1156,7 +1176,7 @@ static const struct xfrmdev_ops mlx5e_ipsec_xfrmdev_ops = {
.xdo_dev_offload_ok = mlx5e_ipsec_offload_ok,
.xdo_dev_state_advance_esn = mlx5e_xfrm_advance_esn_state,

.xdo_dev_state_update_curlft = mlx5e_xfrm_update_curlft,
.xdo_dev_state_update_stats = mlx5e_xfrm_update_stats,
.xdo_dev_policy_add = mlx5e_xfrm_add_policy,
.xdo_dev_policy_delete = mlx5e_xfrm_del_policy,
.xdo_dev_policy_free = mlx5e_xfrm_free_policy,
Expand Down
1 change: 0 additions & 1 deletion drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,6 @@ struct mlx5e_ipsec_hw_stats {
struct mlx5e_ipsec_sw_stats {
atomic64_t ipsec_rx_drop_sp_alloc;
atomic64_t ipsec_rx_drop_sadb_miss;
atomic64_t ipsec_rx_drop_syndrome;
atomic64_t ipsec_tx_drop_bundle;
atomic64_t ipsec_tx_drop_no_state;
atomic64_t ipsec_tx_drop_not_ip;
Expand Down
25 changes: 2 additions & 23 deletions drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
Original file line number Diff line number Diff line change
Expand Up @@ -304,12 +304,6 @@ bool mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
return false;
}

enum {
MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_DECRYPTED,
MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_AUTH_FAILED,
MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_BAD_TRAILER,
};

void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev,
struct sk_buff *skb,
u32 ipsec_meta_data)
Expand Down Expand Up @@ -343,20 +337,7 @@ void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev,

xo = xfrm_offload(skb);
xo->flags = CRYPTO_DONE;

switch (MLX5_IPSEC_METADATA_SYNDROM(ipsec_meta_data)) {
case MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_DECRYPTED:
xo->status = CRYPTO_SUCCESS;
break;
case MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_AUTH_FAILED:
xo->status = CRYPTO_TUNNEL_ESP_AUTH_FAILED;
break;
case MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_BAD_TRAILER:
xo->status = CRYPTO_INVALID_PACKET_SYNTAX;
break;
default:
atomic64_inc(&ipsec->sw_stats.ipsec_rx_drop_syndrome);
}
xo->status = CRYPTO_SUCCESS;
}

int mlx5_esw_ipsec_rx_make_metadata(struct mlx5e_priv *priv, u32 id, u32 *metadata)
Expand All @@ -374,8 +355,6 @@ int mlx5_esw_ipsec_rx_make_metadata(struct mlx5e_priv *priv, u32 id, u32 *metada
return err;
}

*metadata = MLX5_IPSEC_METADATA_CREATE(ipsec_obj_id,
MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_DECRYPTED);

*metadata = ipsec_obj_id;
return 0;
}
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@
#define MLX5_IPSEC_METADATA_MARKER(metadata) (((metadata) >> 31) & 0x1)
#define MLX5_IPSEC_METADATA_SYNDROM(metadata) (((metadata) >> 24) & GENMASK(5, 0))
#define MLX5_IPSEC_METADATA_HANDLE(metadata) ((metadata) & GENMASK(23, 0))
#define MLX5_IPSEC_METADATA_CREATE(id, syndrome) ((id) | ((syndrome) << 24))

struct mlx5e_accel_tx_ipsec_state {
struct xfrm_offload *xo;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ static const struct counter_desc mlx5e_ipsec_hw_stats_desc[] = {
static const struct counter_desc mlx5e_ipsec_sw_stats_desc[] = {
{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_rx_drop_sp_alloc) },
{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_rx_drop_sadb_miss) },
{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_rx_drop_syndrome) },
{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_bundle) },
{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_no_state) },
{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_not_ip) },
Expand Down
6 changes: 3 additions & 3 deletions drivers/net/ethernet/mellanox/mlx5/core/fw.c
Original file line number Diff line number Diff line change
Expand Up @@ -366,18 +366,18 @@ int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev)
return -EIO;
}

mlx5_set_nic_state(dev, MLX5_NIC_IFC_DISABLED);
mlx5_set_nic_state(dev, MLX5_INITIAL_SEG_NIC_INTERFACE_DISABLED);

/* Loop until device state turns to disable */
end = jiffies + msecs_to_jiffies(delay_ms);
do {
if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED)
if (mlx5_get_nic_state(dev) == MLX5_INITIAL_SEG_NIC_INTERFACE_DISABLED)
break;

cond_resched();
} while (!time_after(jiffies, end));

if (mlx5_get_nic_state(dev) != MLX5_NIC_IFC_DISABLED) {
if (mlx5_get_nic_state(dev) != MLX5_INITIAL_SEG_NIC_INTERFACE_DISABLED) {
dev_err(&dev->pdev->dev, "NIC IFC still %d after %lums.\n",
mlx5_get_nic_state(dev), delay_ms);
return -EIO;
Expand Down
45 changes: 30 additions & 15 deletions drivers/net/ethernet/mellanox/mlx5/core/health.c
Original file line number Diff line number Diff line change
Expand Up @@ -116,9 +116,9 @@ u32 mlx5_health_check_fatal_sensors(struct mlx5_core_dev *dev)
return MLX5_SENSOR_PCI_COMM_ERR;
if (pci_channel_offline(dev->pdev))
return MLX5_SENSOR_PCI_ERR;
if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED)
if (mlx5_get_nic_state(dev) == MLX5_INITIAL_SEG_NIC_INTERFACE_DISABLED)
return MLX5_SENSOR_NIC_DISABLED;
if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_SW_RESET)
if (mlx5_get_nic_state(dev) == MLX5_INITIAL_SEG_NIC_INTERFACE_SW_RESET)
return MLX5_SENSOR_NIC_SW_RESET;
if (sensor_fw_synd_rfr(dev))
return MLX5_SENSOR_FW_SYND_RFR;
Expand Down Expand Up @@ -185,7 +185,7 @@ static bool reset_fw_if_needed(struct mlx5_core_dev *dev)
/* Write the NIC interface field to initiate the reset, the command
* interface address also resides here, don't overwrite it.
*/
mlx5_set_nic_state(dev, MLX5_NIC_IFC_SW_RESET);
mlx5_set_nic_state(dev, MLX5_INITIAL_SEG_NIC_INTERFACE_SW_RESET);

return true;
}
Expand Down Expand Up @@ -246,13 +246,13 @@ void mlx5_error_sw_reset(struct mlx5_core_dev *dev)
/* Recover from SW reset */
end = jiffies + msecs_to_jiffies(delay_ms);
do {
if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED)
if (mlx5_get_nic_state(dev) == MLX5_INITIAL_SEG_NIC_INTERFACE_DISABLED)
break;

msleep(20);
} while (!time_after(jiffies, end));

if (mlx5_get_nic_state(dev) != MLX5_NIC_IFC_DISABLED) {
if (mlx5_get_nic_state(dev) != MLX5_INITIAL_SEG_NIC_INTERFACE_DISABLED) {
dev_err(&dev->pdev->dev, "NIC IFC still %d after %lums.\n",
mlx5_get_nic_state(dev), delay_ms);
}
Expand All @@ -272,26 +272,26 @@ static void mlx5_handle_bad_state(struct mlx5_core_dev *dev)
u8 nic_interface = mlx5_get_nic_state(dev);

switch (nic_interface) {
case MLX5_NIC_IFC_FULL:
case MLX5_INITIAL_SEG_NIC_INTERFACE_FULL_DRIVER:
mlx5_core_warn(dev, "Expected to see disabled NIC but it is full driver\n");
break;

case MLX5_NIC_IFC_DISABLED:
case MLX5_INITIAL_SEG_NIC_INTERFACE_DISABLED:
mlx5_core_warn(dev, "starting teardown\n");
break;

case MLX5_NIC_IFC_NO_DRAM_NIC:
case MLX5_INITIAL_SEG_NIC_INTERFACE_NO_DRAM_NIC:
mlx5_core_warn(dev, "Expected to see disabled NIC but it is no dram nic\n");
break;

case MLX5_NIC_IFC_SW_RESET:
case MLX5_INITIAL_SEG_NIC_INTERFACE_SW_RESET:
/* The IFC mode field is 3 bits, so it will read 0x7 in 2 cases:
* 1. PCI has been disabled (ie. PCI-AER, PF driver unloaded
* and this is a VF), this is not recoverable by SW reset.
* Logging of this is handled elsewhere.
* 2. FW reset has been issued by another function, driver can
* be reloaded to recover after the mode switches to
* MLX5_NIC_IFC_DISABLED.
* MLX5_INITIAL_SEG_NIC_INTERFACE_DISABLED.
*/
if (dev->priv.health.fatal_error != MLX5_SENSOR_PCI_COMM_ERR)
mlx5_core_warn(dev, "NIC SW reset in progress\n");
Expand Down Expand Up @@ -555,12 +555,17 @@ static void mlx5_fw_reporter_err_work(struct work_struct *work)
&fw_reporter_ctx);
}

static const struct devlink_health_reporter_ops mlx5_fw_reporter_ops = {
static const struct devlink_health_reporter_ops mlx5_fw_reporter_pf_ops = {
.name = "fw",
.diagnose = mlx5_fw_reporter_diagnose,
.dump = mlx5_fw_reporter_dump,
};

static const struct devlink_health_reporter_ops mlx5_fw_reporter_ops = {
.name = "fw",
.diagnose = mlx5_fw_reporter_diagnose,
};

static int
mlx5_fw_fatal_reporter_recover(struct devlink_health_reporter *reporter,
void *priv_ctx,
Expand Down Expand Up @@ -646,42 +651,52 @@ static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work)
}
}

static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_ops = {
static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_pf_ops = {
.name = "fw_fatal",
.recover = mlx5_fw_fatal_reporter_recover,
.dump = mlx5_fw_fatal_reporter_dump,
};

static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_ops = {
.name = "fw_fatal",
.recover = mlx5_fw_fatal_reporter_recover,
};

#define MLX5_FW_REPORTER_ECPF_GRACEFUL_PERIOD 180000
#define MLX5_FW_REPORTER_PF_GRACEFUL_PERIOD 60000
#define MLX5_FW_REPORTER_VF_GRACEFUL_PERIOD 30000
#define MLX5_FW_REPORTER_DEFAULT_GRACEFUL_PERIOD MLX5_FW_REPORTER_VF_GRACEFUL_PERIOD

void mlx5_fw_reporters_create(struct mlx5_core_dev *dev)
{
const struct devlink_health_reporter_ops *fw_fatal_ops;
struct mlx5_core_health *health = &dev->priv.health;
const struct devlink_health_reporter_ops *fw_ops;
struct devlink *devlink = priv_to_devlink(dev);
u64 grace_period;

fw_fatal_ops = &mlx5_fw_fatal_reporter_pf_ops;
fw_ops = &mlx5_fw_reporter_pf_ops;
if (mlx5_core_is_ecpf(dev)) {
grace_period = MLX5_FW_REPORTER_ECPF_GRACEFUL_PERIOD;
} else if (mlx5_core_is_pf(dev)) {
grace_period = MLX5_FW_REPORTER_PF_GRACEFUL_PERIOD;
} else {
/* VF or SF */
grace_period = MLX5_FW_REPORTER_DEFAULT_GRACEFUL_PERIOD;
fw_fatal_ops = &mlx5_fw_fatal_reporter_ops;
fw_ops = &mlx5_fw_reporter_ops;
}

health->fw_reporter =
devl_health_reporter_create(devlink, &mlx5_fw_reporter_ops,
0, dev);
devl_health_reporter_create(devlink, fw_ops, 0, dev);
if (IS_ERR(health->fw_reporter))
mlx5_core_warn(dev, "Failed to create fw reporter, err = %ld\n",
PTR_ERR(health->fw_reporter));

health->fw_fatal_reporter =
devl_health_reporter_create(devlink,
&mlx5_fw_fatal_reporter_ops,
fw_fatal_ops,
grace_period,
dev);
if (IS_ERR(health->fw_fatal_reporter))
Expand Down
Loading

0 comments on commit 006e896

Please sign in to comment.