Skip to content

Commit

Permalink
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/gi…
Browse files Browse the repository at this point in the history
…t/jkirsher/net

Jeff Kirsher says:

====================
Intel Wired LAN Driver Updates 2015-02-26

This series contains fixes for i40e and i40evf only.

Alexey Khoroshilov found a possible leak of 'cmd_buf' when copy_from_user()
failed in i40e_dbg_command_write(), so resolved by calling kfree().

Shannon provides a fix to ensure the shift and bitwise precedences do not
work backwards for us by adding parans.  Fixed the driver by preventing
the driver from allowing stray interrupts or causing system logs from
un-handled interrupts by combining the ICR0 shutdown with the standard
interrupt shutdown and add the interrupt clearing to the PCI shutdown
path.  Fixed an issue where a NVM write times out before a transaction
can complete, so Shannon added logic to make another attempt by
reacquiring the semaphore, then retry the write, if the one retry fails,
we will then give up.  Adds checks to pointers before their use to ensure
we do not try to dereference NULL pointers when returning values from the
AdminQ calls.

Akeem adds a check to bail out if the device is already down when checking
for Tx hang subtask.

Anjali fixes TSO with more than 8 frags per segment issue.  The hardware
has some limitations which the driver needs to adhere to:
  1) no more than 8 descriptors per packet on the wire
  2) no header can span more than 3 descriptors
If one of these events happens, the hardware will generate an internal
error and freeze the Tx queue, so Anjali fixes this by linearizes the skb
to avoid these situations.  Fixed an issue where the per Traffic Class
queue count was higher than queues enabled, which will fix a warning
with multiple function mode where systems regularly have more cores than
vectors.  Fixed TCP/IPv6 over VXLAN Tx checksum offload, where we were
checking the outer protocol flags and deciding the flow for the inner
header.

Jesse fixes a race condition in the transmit hang detection.  Before we
were having issues of false Tx hang detection, no the driver makes more
direct with the checks for progress forward by directly checking the head
write back address and tail register when determining progress.  This
avoids Tx hangs where the software gets behind, because we are directly
checking hardware state when determining a hang state.

Neerav fixes the transmit ring Qset handle when DCB reconfigures. The issue
was when DCB is reconfigured to a single traffic class (TC) and the driver
did not reset the Tx ring Qset handle to correct the mapping, which caused
the Tx queue to disable timeouts.  Also as part of DCB reconfiguration flow
if the Tx queue disable times out, then issue a PF reset to do some level
of recovery.

Mitch stops flow director on shutdown because, in some cases, the hardware
would continue to try to access the FDIR ring after entering D3Hot state,
which would cause either PCIe errors or NMIs, depending upon the system
configuration.

* NOTE * I have verified that this series of patches for net will not cause
any merge issues when you sync up your net tree with your net-next tree.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Feb 27, 2015
2 parents c30e76a + 65d1346 commit 061c1a6
Show file tree
Hide file tree
Showing 9 changed files with 280 additions and 76 deletions.
7 changes: 4 additions & 3 deletions drivers/net/ethernet/intel/i40e/i40e_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -868,8 +868,9 @@ i40e_status i40e_pf_reset(struct i40e_hw *hw)
* The grst delay value is in 100ms units, and we'll wait a
* couple counts longer to be sure we don't just miss the end.
*/
grst_del = rd32(hw, I40E_GLGEN_RSTCTL) & I40E_GLGEN_RSTCTL_GRSTDEL_MASK
>> I40E_GLGEN_RSTCTL_GRSTDEL_SHIFT;
grst_del = (rd32(hw, I40E_GLGEN_RSTCTL) &
I40E_GLGEN_RSTCTL_GRSTDEL_MASK) >>
I40E_GLGEN_RSTCTL_GRSTDEL_SHIFT;
for (cnt = 0; cnt < grst_del + 2; cnt++) {
reg = rd32(hw, I40E_GLGEN_RSTAT);
if (!(reg & I40E_GLGEN_RSTAT_DEVSTATE_MASK))
Expand Down Expand Up @@ -2846,7 +2847,7 @@ i40e_status i40e_aq_add_udp_tunnel(struct i40e_hw *hw,

status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);

if (!status)
if (!status && filter_index)
*filter_index = resp->index;

return status;
Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ static void i40e_get_pfc_delay(struct i40e_hw *hw, u16 *delay)
u32 val;

val = rd32(hw, I40E_PRTDCB_GENC);
*delay = (u16)(val & I40E_PRTDCB_GENC_PFCLDA_MASK >>
*delay = (u16)((val & I40E_PRTDCB_GENC_PFCLDA_MASK) >>
I40E_PRTDCB_GENC_PFCLDA_SHIFT);
}

Expand Down
4 changes: 3 additions & 1 deletion drivers/net/ethernet/intel/i40e/i40e_debugfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -989,8 +989,10 @@ static ssize_t i40e_dbg_command_write(struct file *filp,
if (!cmd_buf)
return count;
bytes_not_copied = copy_from_user(cmd_buf, buffer, count);
if (bytes_not_copied < 0)
if (bytes_not_copied < 0) {
kfree(cmd_buf);
return bytes_not_copied;
}
if (bytes_not_copied > 0)
count -= bytes_not_copied;
cmd_buf[count] = '\0';
Expand Down
44 changes: 33 additions & 11 deletions drivers/net/ethernet/intel/i40e/i40e_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -1512,7 +1512,12 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi,
vsi->tc_config.numtc = numtc;
vsi->tc_config.enabled_tc = enabled_tc ? enabled_tc : 1;
/* Number of queues per enabled TC */
num_tc_qps = vsi->alloc_queue_pairs/numtc;
/* In MFP case we can have a much lower count of MSIx
* vectors available and so we need to lower the used
* q count.
*/
qcount = min_t(int, vsi->alloc_queue_pairs, pf->num_lan_msix);
num_tc_qps = qcount / numtc;
num_tc_qps = min_t(int, num_tc_qps, I40E_MAX_QUEUES_PER_TC);

/* Setup queue offset/count for all TCs for given VSI */
Expand Down Expand Up @@ -2684,8 +2689,15 @@ static void i40e_vsi_config_dcb_rings(struct i40e_vsi *vsi)
u16 qoffset, qcount;
int i, n;

if (!(vsi->back->flags & I40E_FLAG_DCB_ENABLED))
return;
if (!(vsi->back->flags & I40E_FLAG_DCB_ENABLED)) {
/* Reset the TC information */
for (i = 0; i < vsi->num_queue_pairs; i++) {
rx_ring = vsi->rx_rings[i];
tx_ring = vsi->tx_rings[i];
rx_ring->dcb_tc = 0;
tx_ring->dcb_tc = 0;
}
}

for (n = 0; n < I40E_MAX_TRAFFIC_CLASS; n++) {
if (!(vsi->tc_config.enabled_tc & (1 << n)))
Expand Down Expand Up @@ -3830,6 +3842,12 @@ static void i40e_clear_interrupt_scheme(struct i40e_pf *pf)
{
int i;

i40e_stop_misc_vector(pf);
if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
synchronize_irq(pf->msix_entries[0].vector);
free_irq(pf->msix_entries[0].vector, pf);
}

i40e_put_lump(pf->irq_pile, 0, I40E_PILE_VALID_BIT-1);
for (i = 0; i < pf->num_alloc_vsi; i++)
if (pf->vsi[i])
Expand Down Expand Up @@ -5254,8 +5272,14 @@ static int i40e_handle_lldp_event(struct i40e_pf *pf,

/* Wait for the PF's Tx queues to be disabled */
ret = i40e_pf_wait_txq_disabled(pf);
if (!ret)
if (ret) {
/* Schedule PF reset to recover */
set_bit(__I40E_PF_RESET_REQUESTED, &pf->state);
i40e_service_event_schedule(pf);
} else {
i40e_pf_unquiesce_all_vsi(pf);
}

exit:
return ret;
}
Expand Down Expand Up @@ -5587,7 +5611,8 @@ static void i40e_check_hang_subtask(struct i40e_pf *pf)
int i, v;

/* If we're down or resetting, just bail */
if (test_bit(__I40E_CONFIG_BUSY, &pf->state))
if (test_bit(__I40E_DOWN, &pf->state) ||
test_bit(__I40E_CONFIG_BUSY, &pf->state))
return;

/* for each VSI/netdev
Expand Down Expand Up @@ -9533,6 +9558,7 @@ static void i40e_remove(struct pci_dev *pdev)
set_bit(__I40E_DOWN, &pf->state);
del_timer_sync(&pf->service_timer);
cancel_work_sync(&pf->service_task);
i40e_fdir_teardown(pf);

if (pf->flags & I40E_FLAG_SRIOV_ENABLED) {
i40e_free_vfs(pf);
Expand All @@ -9559,12 +9585,6 @@ static void i40e_remove(struct pci_dev *pdev)
if (pf->vsi[pf->lan_vsi])
i40e_vsi_release(pf->vsi[pf->lan_vsi]);

i40e_stop_misc_vector(pf);
if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
synchronize_irq(pf->msix_entries[0].vector);
free_irq(pf->msix_entries[0].vector, pf);
}

/* shutdown and destroy the HMC */
if (pf->hw.hmc.hmc_obj) {
ret_code = i40e_shutdown_lan_hmc(&pf->hw);
Expand Down Expand Up @@ -9718,6 +9738,8 @@ static void i40e_shutdown(struct pci_dev *pdev)
wr32(hw, I40E_PFPM_APM, (pf->wol_en ? I40E_PFPM_APM_APME_MASK : 0));
wr32(hw, I40E_PFPM_WUFC, (pf->wol_en ? I40E_PFPM_WUFC_MAG_MASK : 0));

i40e_clear_interrupt_scheme(pf);

if (system_state == SYSTEM_POWER_OFF) {
pci_wake_from_d3(pdev, pf->wol_en);
pci_set_power_state(pdev, PCI_D3hot);
Expand Down
35 changes: 35 additions & 0 deletions drivers/net/ethernet/intel/i40e/i40e_nvm.c
Original file line number Diff line number Diff line change
Expand Up @@ -679,9 +679,11 @@ static i40e_status i40e_nvmupd_state_writing(struct i40e_hw *hw,
{
i40e_status status;
enum i40e_nvmupd_cmd upd_cmd;
bool retry_attempt = false;

upd_cmd = i40e_nvmupd_validate_command(hw, cmd, errno);

retry:
switch (upd_cmd) {
case I40E_NVMUPD_WRITE_CON:
status = i40e_nvmupd_nvm_write(hw, cmd, bytes, errno);
Expand Down Expand Up @@ -725,6 +727,39 @@ static i40e_status i40e_nvmupd_state_writing(struct i40e_hw *hw,
*errno = -ESRCH;
break;
}

/* In some circumstances, a multi-write transaction takes longer
* than the default 3 minute timeout on the write semaphore. If
* the write failed with an EBUSY status, this is likely the problem,
* so here we try to reacquire the semaphore then retry the write.
* We only do one retry, then give up.
*/
if (status && (hw->aq.asq_last_status == I40E_AQ_RC_EBUSY) &&
!retry_attempt) {
i40e_status old_status = status;
u32 old_asq_status = hw->aq.asq_last_status;
u32 gtime;

gtime = rd32(hw, I40E_GLVFGEN_TIMER);
if (gtime >= hw->nvm.hw_semaphore_timeout) {
i40e_debug(hw, I40E_DEBUG_ALL,
"NVMUPD: write semaphore expired (%d >= %lld), retrying\n",
gtime, hw->nvm.hw_semaphore_timeout);
i40e_release_nvm(hw);
status = i40e_acquire_nvm(hw, I40E_RESOURCE_WRITE);
if (status) {
i40e_debug(hw, I40E_DEBUG_ALL,
"NVMUPD: write semaphore reacquire failed aq_err = %d\n",
hw->aq.asq_last_status);
status = old_status;
hw->aq.asq_last_status = old_asq_status;
} else {
retry_attempt = true;
goto retry;
}
}
}

return status;
}

Expand Down
119 changes: 95 additions & 24 deletions drivers/net/ethernet/intel/i40e/i40e_txrx.c
Original file line number Diff line number Diff line change
Expand Up @@ -585,6 +585,20 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring)
}
}

/**
* i40e_get_head - Retrieve head from head writeback
* @tx_ring: tx ring to fetch head of
*
* Returns value of Tx ring head based on value stored
* in head write-back location
**/
static inline u32 i40e_get_head(struct i40e_ring *tx_ring)
{
void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count;

return le32_to_cpu(*(volatile __le32 *)head);
}

/**
* i40e_get_tx_pending - how many tx descriptors not processed
* @tx_ring: the ring of descriptors
Expand All @@ -594,10 +608,16 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring)
**/
static u32 i40e_get_tx_pending(struct i40e_ring *ring)
{
u32 ntu = ((ring->next_to_clean <= ring->next_to_use)
? ring->next_to_use
: ring->next_to_use + ring->count);
return ntu - ring->next_to_clean;
u32 head, tail;

head = i40e_get_head(ring);
tail = readl(ring->tail);

if (head != tail)
return (head < tail) ?
tail - head : (tail + ring->count - head);

return 0;
}

/**
Expand All @@ -606,6 +626,8 @@ static u32 i40e_get_tx_pending(struct i40e_ring *ring)
**/
static bool i40e_check_tx_hang(struct i40e_ring *tx_ring)
{
u32 tx_done = tx_ring->stats.packets;
u32 tx_done_old = tx_ring->tx_stats.tx_done_old;
u32 tx_pending = i40e_get_tx_pending(tx_ring);
struct i40e_pf *pf = tx_ring->vsi->back;
bool ret = false;
Expand All @@ -623,41 +645,25 @@ static bool i40e_check_tx_hang(struct i40e_ring *tx_ring)
* run the check_tx_hang logic with a transmit completion
* pending but without time to complete it yet.
*/
if ((tx_ring->tx_stats.tx_done_old == tx_ring->stats.packets) &&
(tx_pending >= I40E_MIN_DESC_PENDING)) {
if ((tx_done_old == tx_done) && tx_pending) {
/* make sure it is true for two checks in a row */
ret = test_and_set_bit(__I40E_HANG_CHECK_ARMED,
&tx_ring->state);
} else if ((tx_ring->tx_stats.tx_done_old == tx_ring->stats.packets) &&
(tx_pending < I40E_MIN_DESC_PENDING) &&
(tx_pending > 0)) {
} else if (tx_done_old == tx_done &&
(tx_pending < I40E_MIN_DESC_PENDING) && (tx_pending > 0)) {
if (I40E_DEBUG_FLOW & pf->hw.debug_mask)
dev_info(tx_ring->dev, "HW needs some more descs to do a cacheline flush. tx_pending %d, queue %d",
tx_pending, tx_ring->queue_index);
pf->tx_sluggish_count++;
} else {
/* update completed stats and disarm the hang check */
tx_ring->tx_stats.tx_done_old = tx_ring->stats.packets;
tx_ring->tx_stats.tx_done_old = tx_done;
clear_bit(__I40E_HANG_CHECK_ARMED, &tx_ring->state);
}

return ret;
}

/**
* i40e_get_head - Retrieve head from head writeback
* @tx_ring: tx ring to fetch head of
*
* Returns value of Tx ring head based on value stored
* in head write-back location
**/
static inline u32 i40e_get_head(struct i40e_ring *tx_ring)
{
void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count;

return le32_to_cpu(*(volatile __le32 *)head);
}

#define WB_STRIDE 0x3

/**
Expand Down Expand Up @@ -2139,6 +2145,67 @@ static int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
return __i40e_maybe_stop_tx(tx_ring, size);
}

/**
* i40e_chk_linearize - Check if there are more than 8 fragments per packet
* @skb: send buffer
* @tx_flags: collected send information
* @hdr_len: size of the packet header
*
* Note: Our HW can't scatter-gather more than 8 fragments to build
* a packet on the wire and so we need to figure out the cases where we
* need to linearize the skb.
**/
static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags,
const u8 hdr_len)
{
struct skb_frag_struct *frag;
bool linearize = false;
unsigned int size = 0;
u16 num_frags;
u16 gso_segs;

num_frags = skb_shinfo(skb)->nr_frags;
gso_segs = skb_shinfo(skb)->gso_segs;

if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO)) {
u16 j = 1;

if (num_frags < (I40E_MAX_BUFFER_TXD))
goto linearize_chk_done;
/* try the simple math, if we have too many frags per segment */
if (DIV_ROUND_UP((num_frags + gso_segs), gso_segs) >
I40E_MAX_BUFFER_TXD) {
linearize = true;
goto linearize_chk_done;
}
frag = &skb_shinfo(skb)->frags[0];
size = hdr_len;
/* we might still have more fragments per segment */
do {
size += skb_frag_size(frag);
frag++; j++;
if (j == I40E_MAX_BUFFER_TXD) {
if (size < skb_shinfo(skb)->gso_size) {
linearize = true;
break;
}
j = 1;
size -= skb_shinfo(skb)->gso_size;
if (size)
j++;
size += hdr_len;
}
num_frags--;
} while (num_frags);
} else {
if (num_frags >= I40E_MAX_BUFFER_TXD)
linearize = true;
}

linearize_chk_done:
return linearize;
}

/**
* i40e_tx_map - Build the Tx descriptor
* @tx_ring: ring to send buffer on
Expand Down Expand Up @@ -2396,6 +2463,10 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
if (tsyn)
tx_flags |= I40E_TX_FLAGS_TSYN;

if (i40e_chk_linearize(skb, tx_flags, hdr_len))
if (skb_linearize(skb))
goto out_drop;

skb_tx_timestamp(skb);

/* always enable CRC insertion offload */
Expand Down
1 change: 1 addition & 0 deletions drivers/net/ethernet/intel/i40e/i40e_txrx.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ enum i40e_dyn_idx_t {

#define i40e_rx_desc i40e_32byte_rx_desc

#define I40E_MAX_BUFFER_TXD 8
#define I40E_MIN_TX_LEN 17
#define I40E_MAX_DATA_PER_TXD 8192

Expand Down
Loading

0 comments on commit 061c1a6

Please sign in to comment.