Skip to content

Commit

Permalink
Merge branch '100GbE' of git://git.kernel.org/pub/scm/linux/kernel/gi…
Browse files Browse the repository at this point in the history
…t/tnguy/next-queue

Tony Nguyen says:

====================
Intel Wired LAN Driver Updates 2025-01-08 (ice)

This series contains updates to ice driver only.

Przemek reworks implementation so that ice_init_hw() is called before
ice_adapter initialization. The motivation is to have ability to act
on the number of PFs in ice_adapter initialization. This is not done
here but the code is also a bit cleaner.

Michal adds priority to be considered when matching recipes for proper
differentiation.

Konrad adds devlink health reporting for firmware generated events.

R Sundar utilizes string helpers over open coded versions.

Jake adds implementation to utilize a lower latency interface to program
PHY timer when supported.

Additional information can be found on the original cover letter:

  https://lore.kernel.org/intel-wired-lan/20241216145453.333745-1-anton.nadezhdin@intel.com/

Karol adds and allows for different PTP delay values to be used per pin.

* '100GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/next-queue:
  ice: Add in/out PTP pin delays
  ice: implement low latency PHY timer updates
  ice: check low latency PHY timer update firmware capability
  ice: add lock to protect low latency interface
  ice: rename TS_LL_READ* macros to REG_LL_PROXY_H_*
  ice: use read_poll_timeout_atomic in ice_read_phy_tstamp_ll_e810
  ice: use string choice helpers
  ice: add fw and port health reporters
  ice: add recipe priority check in search
  ice: ice_probe: init ice_adapter after HW init
  ice: minor: rename goto labels from err to unroll
  ice: split ice_init_hw() out from ice_init_dev()
  ice: c827: move wait for FW to ice_init_hw()
====================

Link: https://patch.msgid.link/20250115000844.714530-1-anthony.l.nguyen@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
  • Loading branch information
Jakub Kicinski committed Jan 16, 2025
2 parents 3440fa3 + 9146394 commit 1da742e
Show file tree
Hide file tree
Showing 14 changed files with 804 additions and 216 deletions.
10 changes: 9 additions & 1 deletion drivers/net/ethernet/intel/ice/devlink/devlink.c
Original file line number Diff line number Diff line change
Expand Up @@ -1207,9 +1207,15 @@ static int ice_devlink_reinit_up(struct ice_pf *pf)
struct ice_vsi *vsi = ice_get_main_vsi(pf);
int err;

err = ice_init_hw(&pf->hw);
if (err) {
dev_err(ice_pf_to_dev(pf), "ice_init_hw failed: %d\n", err);
return err;
}

err = ice_init_dev(pf);
if (err)
return err;
goto unroll_hw_init;

vsi->flags = ICE_VSI_FLAG_INIT;

Expand All @@ -1232,6 +1238,8 @@ static int ice_devlink_reinit_up(struct ice_pf *pf)
rtnl_unlock();
err_vsi_cfg:
ice_deinit_dev(pf);
unroll_hw_init:
ice_deinit_hw(&pf->hw);
return err;
}

Expand Down
295 changes: 288 additions & 7 deletions drivers/net/ethernet/intel/ice/devlink/health.c
Original file line number Diff line number Diff line change
@@ -1,12 +1,270 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2024, Intel Corporation. */

#include "health.h"
#include "ice.h"
#include "ice_adminq_cmd.h" /* for enum ice_aqc_health_status_elem */
#include "health.h"

#define ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, obj, name) \
devlink_fmsg_put(fmsg, #name, (obj)->name)

#define ICE_HEALTH_STATUS_DATA_SIZE 2

struct ice_health_status {
enum ice_aqc_health_status code;
const char *description;
const char *solution;
const char *data_label[ICE_HEALTH_STATUS_DATA_SIZE];
};

/*
* In addition to the health status codes provided below, the firmware might
* generate Health Status Codes that are not pertinent to the end-user.
* For instance, Health Code 0x1002 is triggered when the command fails.
* Such codes should be disregarded by the end-user.
* The below lookup requires to be sorted by code.
*/

static const char *const ice_common_port_solutions =
"Check your cable connection. Change or replace the module or cable. Manually set speed and duplex.";
static const char *const ice_port_number_label = "Port Number";
static const char *const ice_update_nvm_solution = "Update to the latest NVM image.";

static const struct ice_health_status ice_health_status_lookup[] = {
{ICE_AQC_HEALTH_STATUS_ERR_UNKNOWN_MOD_STRICT, "An unsupported module was detected.",
ice_common_port_solutions, {ice_port_number_label}},
{ICE_AQC_HEALTH_STATUS_ERR_MOD_TYPE, "Module type is not supported.",
"Change or replace the module or cable.", {ice_port_number_label}},
{ICE_AQC_HEALTH_STATUS_ERR_MOD_QUAL, "Module is not qualified.",
ice_common_port_solutions, {ice_port_number_label}},
{ICE_AQC_HEALTH_STATUS_ERR_MOD_COMM,
"Device cannot communicate with the module.",
"Check your cable connection. Change or replace the module or cable. Manually set speed and duplex.",
{ice_port_number_label}},
{ICE_AQC_HEALTH_STATUS_ERR_MOD_CONFLICT, "Unresolved module conflict.",
"Manually set speed/duplex or change the port option. If the problem persists, use a cable/module that is found in the supported modules and cables list for this device.",
{ice_port_number_label}},
{ICE_AQC_HEALTH_STATUS_ERR_MOD_NOT_PRESENT, "Module is not present.",
"Check that the module is inserted correctly. If the problem persists, use a cable/module that is found in the supported modules and cables list for this device.",
{ice_port_number_label}},
{ICE_AQC_HEALTH_STATUS_INFO_MOD_UNDERUTILIZED, "Underutilized module.",
"Change or replace the module or cable. Change the port option.",
{ice_port_number_label}},
{ICE_AQC_HEALTH_STATUS_ERR_UNKNOWN_MOD_LENIENT, "An unsupported module was detected.",
ice_common_port_solutions, {ice_port_number_label}},
{ICE_AQC_HEALTH_STATUS_ERR_INVALID_LINK_CFG, "Invalid link configuration.",
NULL, {ice_port_number_label}},
{ICE_AQC_HEALTH_STATUS_ERR_PORT_ACCESS, "Port hardware access error.",
ice_update_nvm_solution, {ice_port_number_label}},
{ICE_AQC_HEALTH_STATUS_ERR_PORT_UNREACHABLE, "A port is unreachable.",
"Change the port option. Update to the latest NVM image."},
{ICE_AQC_HEALTH_STATUS_INFO_PORT_SPEED_MOD_LIMITED, "Port speed is limited due to module.",
"Change the module or configure the port option to match the current module speed. Change the port option.",
{ice_port_number_label}},
{ICE_AQC_HEALTH_STATUS_ERR_PARALLEL_FAULT,
"All configured link modes were attempted but failed to establish link. The device will restart the process to establish link.",
"Check link partner connection and configuration.",
{ice_port_number_label}},
{ICE_AQC_HEALTH_STATUS_INFO_PORT_SPEED_PHY_LIMITED,
"Port speed is limited by PHY capabilities.",
"Change the module to align to port option.", {ice_port_number_label}},
{ICE_AQC_HEALTH_STATUS_ERR_NETLIST_TOPO, "LOM topology netlist is corrupted.",
ice_update_nvm_solution, {ice_port_number_label}},
{ICE_AQC_HEALTH_STATUS_ERR_NETLIST, "Unrecoverable netlist error.",
ice_update_nvm_solution, {ice_port_number_label}},
{ICE_AQC_HEALTH_STATUS_ERR_TOPO_CONFLICT, "Port topology conflict.",
"Change the port option. Update to the latest NVM image."},
{ICE_AQC_HEALTH_STATUS_ERR_LINK_HW_ACCESS, "Unrecoverable hardware access error.",
ice_update_nvm_solution, {ice_port_number_label}},
{ICE_AQC_HEALTH_STATUS_ERR_LINK_RUNTIME, "Unrecoverable runtime error.",
ice_update_nvm_solution, {ice_port_number_label}},
{ICE_AQC_HEALTH_STATUS_ERR_DNL_INIT, "Link management engine failed to initialize.",
ice_update_nvm_solution, {ice_port_number_label}},
{ICE_AQC_HEALTH_STATUS_ERR_PHY_FW_LOAD,
"Failed to load the firmware image in the external PHY.",
ice_update_nvm_solution, {ice_port_number_label}},
{ICE_AQC_HEALTH_STATUS_INFO_RECOVERY, "The device is in firmware recovery mode.",
ice_update_nvm_solution, {"Extended Error"}},
{ICE_AQC_HEALTH_STATUS_ERR_FLASH_ACCESS, "The flash chip cannot be accessed.",
"If issue persists, call customer support.", {"Access Type"}},
{ICE_AQC_HEALTH_STATUS_ERR_NVM_AUTH, "NVM authentication failed.",
ice_update_nvm_solution},
{ICE_AQC_HEALTH_STATUS_ERR_OROM_AUTH, "Option ROM authentication failed.",
ice_update_nvm_solution},
{ICE_AQC_HEALTH_STATUS_ERR_DDP_AUTH, "DDP package authentication failed.",
"Update to latest base driver and DDP package."},
{ICE_AQC_HEALTH_STATUS_ERR_NVM_COMPAT, "NVM image is incompatible.",
ice_update_nvm_solution},
{ICE_AQC_HEALTH_STATUS_ERR_OROM_COMPAT, "Option ROM is incompatible.",
ice_update_nvm_solution, {"Expected PCI Device ID", "Expected Module ID"}},
{ICE_AQC_HEALTH_STATUS_ERR_DCB_MIB,
"Supplied MIB file is invalid. DCB reverted to default configuration.",
"Disable FW-LLDP and check DCBx system configuration.",
{ice_port_number_label, "MIB ID"}},
};

static int ice_health_status_lookup_compare(const void *a, const void *b)
{
return ((struct ice_health_status *)a)->code - ((struct ice_health_status *)b)->code;
}

static const struct ice_health_status *ice_get_health_status(u16 code)
{
struct ice_health_status key = { .code = code };

return bsearch(&key, ice_health_status_lookup, ARRAY_SIZE(ice_health_status_lookup),
sizeof(struct ice_health_status), ice_health_status_lookup_compare);
}

static void ice_describe_status_code(struct devlink_fmsg *fmsg,
struct ice_aqc_health_status_elem *hse)
{
static const char *const aux_label[] = { "Aux Data 1", "Aux Data 2" };
const struct ice_health_status *health_code;
u32 internal_data[2];
u16 status_code;

status_code = le16_to_cpu(hse->health_status_code);

devlink_fmsg_put(fmsg, "Syndrome", status_code);
if (status_code) {
internal_data[0] = le32_to_cpu(hse->internal_data1);
internal_data[1] = le32_to_cpu(hse->internal_data2);

health_code = ice_get_health_status(status_code);
if (!health_code)
return;

devlink_fmsg_string_pair_put(fmsg, "Description", health_code->description);
if (health_code->solution)
devlink_fmsg_string_pair_put(fmsg, "Possible Solution",
health_code->solution);

for (size_t i = 0; i < ICE_HEALTH_STATUS_DATA_SIZE; i++) {
if (internal_data[i] != ICE_AQC_HEALTH_STATUS_UNDEFINED_DATA)
devlink_fmsg_u32_pair_put(fmsg,
health_code->data_label[i] ?
health_code->data_label[i] :
aux_label[i],
internal_data[i]);
}
}
}

static int
ice_port_reporter_diagnose(struct devlink_health_reporter *reporter, struct devlink_fmsg *fmsg,
struct netlink_ext_ack *extack)
{
struct ice_pf *pf = devlink_health_reporter_priv(reporter);

ice_describe_status_code(fmsg, &pf->health_reporters.port_status);
return 0;
}

static int
ice_port_reporter_dump(struct devlink_health_reporter *reporter, struct devlink_fmsg *fmsg,
void *priv_ctx, struct netlink_ext_ack __always_unused *extack)
{
struct ice_pf *pf = devlink_health_reporter_priv(reporter);

ice_describe_status_code(fmsg, &pf->health_reporters.port_status);
return 0;
}

static int
ice_fw_reporter_diagnose(struct devlink_health_reporter *reporter, struct devlink_fmsg *fmsg,
struct netlink_ext_ack *extack)
{
struct ice_pf *pf = devlink_health_reporter_priv(reporter);

ice_describe_status_code(fmsg, &pf->health_reporters.fw_status);
return 0;
}

static int
ice_fw_reporter_dump(struct devlink_health_reporter *reporter, struct devlink_fmsg *fmsg,
void *priv_ctx, struct netlink_ext_ack *extack)
{
struct ice_pf *pf = devlink_health_reporter_priv(reporter);

ice_describe_status_code(fmsg, &pf->health_reporters.fw_status);
return 0;
}

static void ice_config_health_events(struct ice_pf *pf, bool enable)
{
u8 enable_bits = 0;
int ret;

if (enable)
enable_bits = ICE_AQC_HEALTH_STATUS_SET_PF_SPECIFIC_MASK |
ICE_AQC_HEALTH_STATUS_SET_GLOBAL_MASK;

ret = ice_aq_set_health_status_cfg(&pf->hw, enable_bits);
if (ret)
dev_err(ice_pf_to_dev(pf), "Failed to %s firmware health events, err %d aq_err %s\n",
str_enable_disable(enable), ret,
ice_aq_str(pf->hw.adminq.sq_last_status));
}

/**
* ice_process_health_status_event - Process the health status event from FW
* @pf: pointer to the PF structure
* @event: event structure containing the Health Status Event opcode
*
* Decode the Health Status Events and print the associated messages
*/
void ice_process_health_status_event(struct ice_pf *pf, struct ice_rq_event_info *event)
{
const struct ice_aqc_health_status_elem *health_info;
u16 count;

health_info = (struct ice_aqc_health_status_elem *)event->msg_buf;
count = le16_to_cpu(event->desc.params.get_health_status.health_status_count);

if (count > (event->buf_len / sizeof(*health_info))) {
dev_err(ice_pf_to_dev(pf), "Received a health status event with invalid element count\n");
return;
}

for (size_t i = 0; i < count; i++) {
const struct ice_health_status *health_code;
u16 status_code;

status_code = le16_to_cpu(health_info->health_status_code);
health_code = ice_get_health_status(status_code);

if (health_code) {
switch (le16_to_cpu(health_info->event_source)) {
case ICE_AQC_HEALTH_STATUS_GLOBAL:
pf->health_reporters.fw_status = *health_info;
devlink_health_report(pf->health_reporters.fw,
"FW syndrome reported", NULL);
break;
case ICE_AQC_HEALTH_STATUS_PF:
case ICE_AQC_HEALTH_STATUS_PORT:
pf->health_reporters.port_status = *health_info;
devlink_health_report(pf->health_reporters.port,
"Port syndrome reported", NULL);
break;
default:
dev_err(ice_pf_to_dev(pf), "Health code with unknown source\n");
}
} else {
u32 data1, data2;
u16 source;

source = le16_to_cpu(health_info->event_source);
data1 = le32_to_cpu(health_info->internal_data1);
data2 = le32_to_cpu(health_info->internal_data2);
dev_dbg(ice_pf_to_dev(pf),
"Received internal health status code 0x%08x, source: 0x%08x, data1: 0x%08x, data2: 0x%08x",
status_code, source, data1, data2);
}
health_info++;
}
}

/**
* ice_devlink_health_report - boilerplate to call given @reporter
*
Expand Down Expand Up @@ -203,14 +461,26 @@ ice_init_devlink_rep(struct ice_pf *pf,
return rep;
}

#define ICE_DEFINE_HEALTH_REPORTER_OPS(_name) \
static const struct devlink_health_reporter_ops ice_ ## _name ## _reporter_ops = { \
#define ICE_HEALTH_REPORTER_OPS_FIELD(_name, _field) \
._field = ice_##_name##_reporter_##_field,

#define ICE_DEFINE_HEALTH_REPORTER_OPS_1(_name, _field1) \
static const struct devlink_health_reporter_ops ice_##_name##_reporter_ops = { \
.name = #_name, \
.dump = ice_ ## _name ## _reporter_dump, \
}
ICE_HEALTH_REPORTER_OPS_FIELD(_name, _field1) \
}

#define ICE_DEFINE_HEALTH_REPORTER_OPS_2(_name, _field1, _field2) \
static const struct devlink_health_reporter_ops ice_##_name##_reporter_ops = { \
.name = #_name, \
ICE_HEALTH_REPORTER_OPS_FIELD(_name, _field1) \
ICE_HEALTH_REPORTER_OPS_FIELD(_name, _field2) \
}

ICE_DEFINE_HEALTH_REPORTER_OPS(mdd);
ICE_DEFINE_HEALTH_REPORTER_OPS(tx_hang);
ICE_DEFINE_HEALTH_REPORTER_OPS_1(mdd, dump);
ICE_DEFINE_HEALTH_REPORTER_OPS_1(tx_hang, dump);
ICE_DEFINE_HEALTH_REPORTER_OPS_2(fw, dump, diagnose);
ICE_DEFINE_HEALTH_REPORTER_OPS_2(port, dump, diagnose);

/**
* ice_health_init - allocate and init all ice devlink health reporters and
Expand All @@ -224,6 +494,12 @@ void ice_health_init(struct ice_pf *pf)

reps->mdd = ice_init_devlink_rep(pf, &ice_mdd_reporter_ops);
reps->tx_hang = ice_init_devlink_rep(pf, &ice_tx_hang_reporter_ops);

if (ice_is_fw_health_report_supported(&pf->hw)) {
reps->fw = ice_init_devlink_rep(pf, &ice_fw_reporter_ops);
reps->port = ice_init_devlink_rep(pf, &ice_port_reporter_ops);
ice_config_health_events(pf, true);
}
}

/**
Expand All @@ -246,6 +522,11 @@ void ice_health_deinit(struct ice_pf *pf)
{
ice_deinit_devl_reporter(pf->health_reporters.mdd);
ice_deinit_devl_reporter(pf->health_reporters.tx_hang);
if (ice_is_fw_health_report_supported(&pf->hw)) {
ice_deinit_devl_reporter(pf->health_reporters.fw);
ice_deinit_devl_reporter(pf->health_reporters.port);
ice_config_health_events(pf, false);
}
}

static
Expand Down
Loading

0 comments on commit 1da742e

Please sign in to comment.