Skip to content

Commit

Permalink
bnxt_en: Event handler for Thermal event
Browse files Browse the repository at this point in the history
Newer FW will send a new async event when it detects that
the chip's temperature has crossed the configured threshold value.
The driver will now notify hwmon and will log a warning message.

Link: https://lore.kernel.org/netdev/20230815045658.80494-13-michael.chan@broadcom.com/
Cc: Jean Delvare <jdelvare@suse.com>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: linux-hwmon@vger.kernel.org
Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Acked-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
Kalesh AP authored and David S. Miller committed Oct 4, 2023
1 parent 3d9cf96 commit a19b480
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 0 deletions.
52 changes: 52 additions & 0 deletions drivers/net/ethernet/broadcom/bnxt/bnxt.c
Original file line number Diff line number Diff line change
Expand Up @@ -2129,6 +2129,24 @@ static u16 bnxt_agg_ring_id_to_grp_idx(struct bnxt *bp, u16 ring_id)
return INVALID_HW_RING_ID;
}

#define BNXT_EVENT_THERMAL_CURRENT_TEMP(data2) \
((data2) & \
ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA2_CURRENT_TEMP_MASK)

#define BNXT_EVENT_THERMAL_THRESHOLD_TEMP(data2) \
(((data2) & \
ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA2_THRESHOLD_TEMP_MASK) >>\
ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA2_THRESHOLD_TEMP_SFT)

#define EVENT_DATA1_THERMAL_THRESHOLD_TYPE(data1) \
((data1) & \
ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_THRESHOLD_TYPE_MASK)

#define EVENT_DATA1_THERMAL_THRESHOLD_DIR_INCREASING(data1) \
(((data1) & \
ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_TRANSITION_DIR) ==\
ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_TRANSITION_DIR_INCREASING)

static void bnxt_event_error_report(struct bnxt *bp, u32 data1, u32 data2)
{
u32 err_type = BNXT_EVENT_ERROR_REPORT_TYPE(data1);
Expand All @@ -2144,6 +2162,40 @@ static void bnxt_event_error_report(struct bnxt *bp, u32 data1, u32 data2)
case ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DOORBELL_DROP_THRESHOLD:
netdev_warn(bp->dev, "One or more MMIO doorbells dropped by the device!\n");
break;
case ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_THERMAL_THRESHOLD: {
u32 type = EVENT_DATA1_THERMAL_THRESHOLD_TYPE(data1);
char *threshold_type;
char *dir_str;

switch (type) {
case ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_THRESHOLD_TYPE_WARN:
threshold_type = "warning";
break;
case ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_THRESHOLD_TYPE_CRITICAL:
threshold_type = "critical";
break;
case ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_THRESHOLD_TYPE_FATAL:
threshold_type = "fatal";
break;
case ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_THRESHOLD_TYPE_SHUTDOWN:
threshold_type = "shutdown";
break;
default:
netdev_err(bp->dev, "Unknown Thermal threshold type event\n");
return;
}
if (EVENT_DATA1_THERMAL_THRESHOLD_DIR_INCREASING(data1))
dir_str = "above";
else
dir_str = "below";
netdev_warn(bp->dev, "Chip temperature has gone %s the %s thermal threshold!\n",
dir_str, threshold_type);
netdev_warn(bp->dev, "Temperature (In Celsius), Current: %lu, threshold: %lu\n",
BNXT_EVENT_THERMAL_CURRENT_TEMP(data2),
BNXT_EVENT_THERMAL_THRESHOLD_TEMP(data2));
bnxt_hwmon_notify_event(bp, type);
break;
}
default:
netdev_err(bp->dev, "FW reported unknown error type %u\n",
err_type);
Expand Down
25 changes: 25 additions & 0 deletions drivers/net/ethernet/broadcom/bnxt/bnxt_hwmon.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,31 @@
#include "bnxt_hwrm.h"
#include "bnxt_hwmon.h"

void bnxt_hwmon_notify_event(struct bnxt *bp, u32 type)
{
u32 attr;

if (!bp->hwmon_dev)
return;

switch (type) {
case ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_THRESHOLD_TYPE_WARN:
attr = hwmon_temp_max_alarm;
break;
case ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_THRESHOLD_TYPE_CRITICAL:
attr = hwmon_temp_crit_alarm;
break;
case ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_THRESHOLD_TYPE_FATAL:
case ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA1_THRESHOLD_TYPE_SHUTDOWN:
attr = hwmon_temp_emergency_alarm;
break;
default:
return;
}

hwmon_notify_event(&bp->pdev->dev, hwmon_temp, attr, 0);
}

static int bnxt_hwrm_temp_query(struct bnxt *bp, u8 *temp)
{
struct hwrm_temp_monitor_query_output *resp;
Expand Down
5 changes: 5 additions & 0 deletions drivers/net/ethernet/broadcom/bnxt/bnxt_hwmon.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,14 @@
#define BNXT_HWMON_H

#ifdef CONFIG_BNXT_HWMON
void bnxt_hwmon_notify_event(struct bnxt *bp, u32 type);
void bnxt_hwmon_uninit(struct bnxt *bp);
void bnxt_hwmon_init(struct bnxt *bp);
#else
static inline void bnxt_hwmon_notify_event(struct bnxt *bp, u32 type)
{
}

static inline void bnxt_hwmon_uninit(struct bnxt *bp)
{
}
Expand Down

0 comments on commit a19b480

Please sign in to comment.