Skip to content

Commit

Permalink
iwlwifi: jiffies based tx queues watchdog
Browse files Browse the repository at this point in the history
This patch replace monitor/recover timer by watchdog based on time
stamp. New code allow to discover hangs more precisely.

Timeout values are currently doubled monitoring period values of
previous timer. This have to be tuned based of firmware timing
capabilities.

Tested on 3945, 4965, 5300, 6300.

Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Acked-by: Wey-Yi Guy <wey-yi.w.guy@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
  • Loading branch information
Stanislaw Gruszka authored and John W. Linville committed Dec 6, 2010
1 parent abc471d commit 22de94d
Show file tree
Hide file tree
Showing 13 changed files with 91 additions and 138 deletions.
3 changes: 1 addition & 2 deletions drivers/net/wireless/iwlwifi/iwl-1000.c
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,6 @@ static struct iwl_lib_ops iwl1000_lib = {
.bt_stats_read = iwl_ucode_bt_stats_read,
.reply_tx_error = iwl_reply_tx_error_read,
},
.recover_from_tx_stall = iwl_bg_monitor_recover,
.check_plcp_health = iwl_good_plcp_health,
.check_ack_health = iwl_good_ack_health,
.txfifo_flush = iwlagn_txfifo_flush,
Expand Down Expand Up @@ -262,7 +261,7 @@ static struct iwl_base_params iwl1000_base_params = {
.support_ct_kill_exit = true,
.plcp_delta_threshold = IWL_MAX_PLCP_ERR_EXT_LONG_THRESHOLD_DEF,
.chain_noise_scale = 1000,
.monitor_recover_period = IWL_DEF_MONITORING_PERIOD,
.wd_timeout = IWL_DEF_WD_TIMEOUT,
.max_event_log_size = 128,
.ucode_tracing = true,
.sensitivity_calib_by_driver = true,
Expand Down
4 changes: 2 additions & 2 deletions drivers/net/wireless/iwlwifi/iwl-3945.c
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,7 @@ static void iwl3945_rx_reply_tx(struct iwl_priv *priv,
return;
}

txq->time_stamp = jiffies;
info = IEEE80211_SKB_CB(txq->txb[txq->q.read_ptr].skb);
ieee80211_tx_info_clear_status(info);

Expand Down Expand Up @@ -2733,7 +2734,6 @@ static struct iwl_lib_ops iwl3945_lib = {
.isr_ops = {
.isr = iwl_isr_legacy,
},
.recover_from_tx_stall = iwl_bg_monitor_recover,
.check_plcp_health = iwl3945_good_plcp_health,

.debugfs_ops = {
Expand Down Expand Up @@ -2776,7 +2776,7 @@ static struct iwl_base_params iwl3945_base_params = {
.led_compensation = 64,
.broken_powersave = true,
.plcp_delta_threshold = IWL_MAX_PLCP_ERR_LONG_THRESHOLD_DEF,
.monitor_recover_period = IWL_DEF_MONITORING_PERIOD,
.wd_timeout = IWL_DEF_WD_TIMEOUT,
.max_event_log_size = 512,
.tx_power_by_driver = true,
};
Expand Down
4 changes: 2 additions & 2 deletions drivers/net/wireless/iwlwifi/iwl-4965.c
Original file line number Diff line number Diff line change
Expand Up @@ -2198,6 +2198,7 @@ static void iwl4965_rx_reply_tx(struct iwl_priv *priv,
return;
}

txq->time_stamp = jiffies;
info = IEEE80211_SKB_CB(txq->txb[txq->q.read_ptr].skb);
memset(&info->status, 0, sizeof(info->status));

Expand Down Expand Up @@ -2554,7 +2555,6 @@ static struct iwl_lib_ops iwl4965_lib = {
.bt_stats_read = iwl_ucode_bt_stats_read,
.reply_tx_error = iwl_reply_tx_error_read,
},
.recover_from_tx_stall = iwl_bg_monitor_recover,
.check_plcp_health = iwl_good_plcp_health,
};

Expand Down Expand Up @@ -2609,7 +2609,7 @@ static struct iwl_base_params iwl4965_base_params = {
.led_compensation = 61,
.chain_noise_num_beacons = IWL4965_CAL_NUM_BEACONS,
.plcp_delta_threshold = IWL_MAX_PLCP_ERR_THRESHOLD_DEF,
.monitor_recover_period = IWL_DEF_MONITORING_PERIOD,
.wd_timeout = IWL_DEF_WD_TIMEOUT,
.temperature_kelvin = true,
.max_event_log_size = 512,
.tx_power_by_driver = true,
Expand Down
4 changes: 1 addition & 3 deletions drivers/net/wireless/iwlwifi/iwl-5000.c
Original file line number Diff line number Diff line change
Expand Up @@ -402,7 +402,6 @@ static struct iwl_lib_ops iwl5000_lib = {
.bt_stats_read = iwl_ucode_bt_stats_read,
.reply_tx_error = iwl_reply_tx_error_read,
},
.recover_from_tx_stall = iwl_bg_monitor_recover,
.check_plcp_health = iwl_good_plcp_health,
.check_ack_health = iwl_good_ack_health,
.txfifo_flush = iwlagn_txfifo_flush,
Expand Down Expand Up @@ -472,7 +471,6 @@ static struct iwl_lib_ops iwl5150_lib = {
.bt_stats_read = iwl_ucode_bt_stats_read,
.reply_tx_error = iwl_reply_tx_error_read,
},
.recover_from_tx_stall = iwl_bg_monitor_recover,
.check_plcp_health = iwl_good_plcp_health,
.check_ack_health = iwl_good_ack_health,
.txfifo_flush = iwlagn_txfifo_flush,
Expand Down Expand Up @@ -511,7 +509,7 @@ static struct iwl_base_params iwl5000_base_params = {
.chain_noise_num_beacons = IWL_CAL_NUM_BEACONS,
.plcp_delta_threshold = IWL_MAX_PLCP_ERR_LONG_THRESHOLD_DEF,
.chain_noise_scale = 1000,
.monitor_recover_period = IWL_LONG_MONITORING_PERIOD,
.wd_timeout = IWL_LONG_WD_TIMEOUT,
.max_event_log_size = 512,
.ucode_tracing = true,
.sensitivity_calib_by_driver = true,
Expand Down
8 changes: 3 additions & 5 deletions drivers/net/wireless/iwlwifi/iwl-6000.c
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,6 @@ static struct iwl_lib_ops iwl6000_lib = {
.bt_stats_read = iwl_ucode_bt_stats_read,
.reply_tx_error = iwl_reply_tx_error_read,
},
.recover_from_tx_stall = iwl_bg_monitor_recover,
.check_plcp_health = iwl_good_plcp_health,
.check_ack_health = iwl_good_ack_health,
.txfifo_flush = iwlagn_txfifo_flush,
Expand Down Expand Up @@ -412,7 +411,6 @@ static struct iwl_lib_ops iwl6000g2b_lib = {
.bt_stats_read = iwl_ucode_bt_stats_read,
.reply_tx_error = iwl_reply_tx_error_read,
},
.recover_from_tx_stall = iwl_bg_monitor_recover,
.check_plcp_health = iwl_good_plcp_health,
.check_ack_health = iwl_good_ack_health,
.txfifo_flush = iwlagn_txfifo_flush,
Expand Down Expand Up @@ -482,7 +480,7 @@ static struct iwl_base_params iwl6000_base_params = {
.support_ct_kill_exit = true,
.plcp_delta_threshold = IWL_MAX_PLCP_ERR_THRESHOLD_DEF,
.chain_noise_scale = 1000,
.monitor_recover_period = IWL_DEF_MONITORING_PERIOD,
.wd_timeout = IWL_DEF_WD_TIMEOUT,
.max_event_log_size = 512,
.ucode_tracing = true,
.sensitivity_calib_by_driver = true,
Expand All @@ -506,7 +504,7 @@ static struct iwl_base_params iwl6050_base_params = {
.support_ct_kill_exit = true,
.plcp_delta_threshold = IWL_MAX_PLCP_ERR_THRESHOLD_DEF,
.chain_noise_scale = 1500,
.monitor_recover_period = IWL_DEF_MONITORING_PERIOD,
.wd_timeout = IWL_DEF_WD_TIMEOUT,
.max_event_log_size = 1024,
.ucode_tracing = true,
.sensitivity_calib_by_driver = true,
Expand All @@ -529,7 +527,7 @@ static struct iwl_base_params iwl6000_coex_base_params = {
.support_ct_kill_exit = true,
.plcp_delta_threshold = IWL_MAX_PLCP_ERR_THRESHOLD_DEF,
.chain_noise_scale = 1000,
.monitor_recover_period = IWL_LONG_MONITORING_PERIOD,
.wd_timeout = IWL_LONG_WD_TIMEOUT,
.max_event_log_size = 512,
.ucode_tracing = true,
.sensitivity_calib_by_driver = true,
Expand Down
1 change: 1 addition & 0 deletions drivers/net/wireless/iwlwifi/iwl-agn-lib.c
Original file line number Diff line number Diff line change
Expand Up @@ -405,6 +405,7 @@ static void iwlagn_rx_reply_tx(struct iwl_priv *priv,
return;
}

txq->time_stamp = jiffies;
info = IEEE80211_SKB_CB(txq->txb[txq->q.read_ptr].skb);
memset(&info->status, 0, sizeof(info->status));

Expand Down
21 changes: 6 additions & 15 deletions drivers/net/wireless/iwlwifi/iwl-agn.c
Original file line number Diff line number Diff line change
Expand Up @@ -2654,13 +2654,8 @@ static void iwl_alive_start(struct iwl_priv *priv)
/* After the ALIVE response, we can send host commands to the uCode */
set_bit(STATUS_ALIVE, &priv->status);

if (priv->cfg->ops->lib->recover_from_tx_stall) {
/* Enable timer to monitor the driver queues */
mod_timer(&priv->monitor_recover,
jiffies +
msecs_to_jiffies(
priv->cfg->base_params->monitor_recover_period));
}
/* Enable watchdog to monitor the driver tx queues */
iwl_setup_watchdog(priv);

if (iwl_is_rfkill(priv))
return;
Expand Down Expand Up @@ -2755,8 +2750,7 @@ static void __iwl_down(struct iwl_priv *priv)

/* Stop TX queues watchdog. We need to have STATUS_EXIT_PENDING bit set
* to prevent rearm timer */
if (priv->cfg->ops->lib->recover_from_tx_stall)
del_timer_sync(&priv->monitor_recover);
del_timer_sync(&priv->watchdog);

iwl_clear_ucode_stations(priv, NULL);
iwl_dealloc_bcast_stations(priv);
Expand Down Expand Up @@ -3742,12 +3736,9 @@ static void iwl_setup_deferred_work(struct iwl_priv *priv)
priv->ucode_trace.data = (unsigned long)priv;
priv->ucode_trace.function = iwl_bg_ucode_trace;

if (priv->cfg->ops->lib->recover_from_tx_stall) {
init_timer(&priv->monitor_recover);
priv->monitor_recover.data = (unsigned long)priv;
priv->monitor_recover.function =
priv->cfg->ops->lib->recover_from_tx_stall;
}
init_timer(&priv->watchdog);
priv->watchdog.data = (unsigned long)priv;
priv->watchdog.function = iwl_bg_watchdog;

if (!priv->cfg->base_params->use_isr_legacy)
tasklet_init(&priv->irq_tasklet, (void (*)(unsigned long))
Expand Down
111 changes: 49 additions & 62 deletions drivers/net/wireless/iwlwifi/iwl-core.c
Original file line number Diff line number Diff line change
Expand Up @@ -1894,77 +1894,58 @@ int iwl_mac_change_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
}
EXPORT_SYMBOL(iwl_mac_change_interface);

/**
* iwl_bg_monitor_recover - Timer callback to check for stuck queue and recover
*
* During normal condition (no queue is stuck), the timer is continually set to
* execute every monitor_recover_period milliseconds after the last timer
* expired. When the queue read_ptr is at the same place, the timer is
* shorten to 100mSecs. This is
* 1) to reduce the chance that the read_ptr may wrap around (not stuck)
* 2) to detect the stuck queues quicker before the station and AP can
* disassociate each other.
*
* This function monitors all the tx queues and recover from it if any
* of the queues are stuck.
* 1. It first check the cmd queue for stuck conditions. If it is stuck,
* it will recover by resetting the firmware and return.
* 2. Then, it checks for station association. If it associates it will check
* other queues. If any queue is stuck, it will recover by resetting
* the firmware.
* Note: It the number of times the queue read_ptr to be at the same place to
* be MAX_REPEAT+1 in order to consider to be stuck.
*/
/*
* The maximum number of times the read pointer of the tx queue at the
* same place without considering to be stuck.
* On every watchdog tick we check (latest) time stamp. If it does not
* change during timeout period and queue is not empty we reset firmware.
*/
#define MAX_REPEAT (2)
static int iwl_check_stuck_queue(struct iwl_priv *priv, int cnt)
{
struct iwl_tx_queue *txq;
struct iwl_queue *q;
struct iwl_tx_queue *txq = &priv->txq[cnt];
struct iwl_queue *q = &txq->q;
unsigned long timeout;
int ret;

txq = &priv->txq[cnt];
q = &txq->q;
/* queue is empty, skip */
if (q->read_ptr == q->write_ptr)
if (q->read_ptr == q->write_ptr) {
txq->time_stamp = jiffies;
return 0;
}

if (q->read_ptr == q->last_read_ptr) {
/* a queue has not been read from last time */
if (q->repeat_same_read_ptr > MAX_REPEAT) {
IWL_ERR(priv,
"queue %d stuck %d time. Fw reload.\n",
q->id, q->repeat_same_read_ptr);
q->repeat_same_read_ptr = 0;
iwl_force_reset(priv, IWL_FW_RESET, false);
} else {
q->repeat_same_read_ptr++;
IWL_DEBUG_RADIO(priv,
"queue %d, not read %d time\n",
q->id,
q->repeat_same_read_ptr);
mod_timer(&priv->monitor_recover,
jiffies + msecs_to_jiffies(
IWL_ONE_HUNDRED_MSECS));
return 1;
}
} else {
q->last_read_ptr = q->read_ptr;
q->repeat_same_read_ptr = 0;
timeout = txq->time_stamp +
msecs_to_jiffies(priv->cfg->base_params->wd_timeout);

if (time_after(jiffies, timeout)) {
IWL_ERR(priv, "Queue %d stuck for %u ms.\n",
q->id, priv->cfg->base_params->wd_timeout);
ret = iwl_force_reset(priv, IWL_FW_RESET, false);
return (ret == -EAGAIN) ? 0 : 1;
}

return 0;
}

void iwl_bg_monitor_recover(unsigned long data)
/*
* Making watchdog tick be a quarter of timeout assure we will
* discover the queue hung between timeout and 1.25*timeout
*/
#define IWL_WD_TICK(timeout) ((timeout) / 4)

/*
* Watchdog timer callback, we check each tx queue for stuck, if if hung
* we reset the firmware. If everything is fine just rearm the timer.
*/
void iwl_bg_watchdog(unsigned long data)
{
struct iwl_priv *priv = (struct iwl_priv *)data;
int cnt;
unsigned long timeout;

if (test_bit(STATUS_EXIT_PENDING, &priv->status))
return;

timeout = priv->cfg->base_params->wd_timeout;
if (timeout == 0)
return;

/* monitor and check for stuck cmd queue */
if (iwl_check_stuck_queue(priv, priv->cmd_queue))
return;
Expand All @@ -1979,17 +1960,23 @@ void iwl_bg_monitor_recover(unsigned long data)
return;
}
}
if (priv->cfg->base_params->monitor_recover_period) {
/*
* Reschedule the timer to occur in
* priv->cfg->base_params->monitor_recover_period
*/
mod_timer(&priv->monitor_recover, jiffies + msecs_to_jiffies(
priv->cfg->base_params->monitor_recover_period));
}

mod_timer(&priv->watchdog, jiffies +
msecs_to_jiffies(IWL_WD_TICK(timeout)));
}
EXPORT_SYMBOL(iwl_bg_monitor_recover);
EXPORT_SYMBOL(iwl_bg_watchdog);

void iwl_setup_watchdog(struct iwl_priv *priv)
{
unsigned int timeout = priv->cfg->base_params->wd_timeout;

if (timeout)
mod_timer(&priv->watchdog,
jiffies + msecs_to_jiffies(IWL_WD_TICK(timeout)));
else
del_timer(&priv->watchdog);
}
EXPORT_SYMBOL(iwl_setup_watchdog);

/*
* extended beacon time format
Expand Down
10 changes: 4 additions & 6 deletions drivers/net/wireless/iwlwifi/iwl-core.h
Original file line number Diff line number Diff line change
Expand Up @@ -210,8 +210,6 @@ struct iwl_lib_ops {

/* temperature */
struct iwl_temp_ops temp_ops;
/* recover from tx queue stall */
void (*recover_from_tx_stall)(unsigned long data);
/* check for plcp health */
bool (*check_plcp_health)(struct iwl_priv *priv,
struct iwl_rx_packet *pkt);
Expand Down Expand Up @@ -280,7 +278,7 @@ struct iwl_mod_params {
* @plcp_delta_threshold: plcp error rate threshold used to trigger
* radio tuning when there is a high receiving plcp error rate
* @chain_noise_scale: default chain noise scale used for gain computation
* @monitor_recover_period: default timer used to check stuck queues
* @wd_timeout: TX queues watchdog timeout
* @temperature_kelvin: temperature report by uCode in kelvin
* @max_event_log_size: size of event log buffer size for ucode event logging
* @tx_power_by_driver: tx power calibration performed by driver
Expand Down Expand Up @@ -315,8 +313,7 @@ struct iwl_base_params {
const bool support_wimax_coexist;
u8 plcp_delta_threshold;
s32 chain_noise_scale;
/* timer period for monitor the driver queues */
u32 monitor_recover_period;
unsigned int wd_timeout;
bool temperature_kelvin;
u32 max_event_log_size;
const bool tx_power_by_driver;
Expand Down Expand Up @@ -546,6 +543,7 @@ int iwl_tx_queue_init(struct iwl_priv *priv, struct iwl_tx_queue *txq,
void iwl_tx_queue_reset(struct iwl_priv *priv, struct iwl_tx_queue *txq,
int slots_num, u32 txq_id);
void iwl_tx_queue_free(struct iwl_priv *priv, int txq_id);
void iwl_setup_watchdog(struct iwl_priv *priv);
/*****************************************************
* TX power
****************************************************/
Expand Down Expand Up @@ -625,7 +623,7 @@ static inline u16 iwl_pcie_link_ctl(struct iwl_priv *priv)
return pci_lnk_ctl;
}

void iwl_bg_monitor_recover(unsigned long data);
void iwl_bg_watchdog(unsigned long data);
u32 iwl_usecs_to_beacons(struct iwl_priv *priv, u32 usec, u32 beacon_interval);
__le32 iwl_add_beacon_time(struct iwl_priv *priv, u32 base,
u32 addon, u32 beacon_interval);
Expand Down
Loading

0 comments on commit 22de94d

Please sign in to comment.