Skip to content

Commit

Permalink
Merge branch 'tso_fix'
Browse files Browse the repository at this point in the history
Vladislav Yasevich says:

====================
Fix TSO and checksum issues with non-accelerated vlan traffic.

I've recently ran across something rather interesting when testing vlans
from inside VMs.  In some scenarios I was getting awfull thruput.
Some debugging uncovered a very scary packet corruption.  I was
seeing packets that had original TSO length as IP total length
and their ip checksum was 0.  This was with e1000e driver.

A bit more debugging uncovered an assumption made by that driver
that skb->protocol will contain l3 protocol information.  This
was not the case in my setup since I manually turned off vlan
tx acceleration for the device.  This caused the driver to not
initialize the tso information correctly and resulted in
corrupt TSO frames on the wire.

I decided to do some auditing of the usage of skb->protocols
in the drivers.  Out of all the drivers, the included 8 appear
to be effected.  They all allow user to control vlan acceleration
settings, all support TSO on vlan devices, and all use
skb->protocol to decide how to encode TSO information.  Some
also have similar problems when initializing hw checksum information.
On such device, it is simple enough to reproduce the issue.
Simply turn off TX VLAN acceleration on the device, create a vlan,
and run you favorite network performance tool.

There is 1 driver I ran across that I belive will trigger a BUG
in the system when used with vlans.  That driver is tile/tilepro.c
I have not changed it in this patch set and would hope that
the maintainer has time to look at it.

V2: Fix i40ev using the wrong function name.  Full build.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Aug 26, 2014
2 parents 2b7890e + 1ee1cfe commit 8dbb200
Show file tree
Hide file tree
Showing 8 changed files with 34 additions and 31 deletions.
7 changes: 4 additions & 3 deletions drivers/net/ethernet/brocade/bna/bnad.c
Original file line number Diff line number Diff line change
Expand Up @@ -2506,7 +2506,7 @@ bnad_tso_prepare(struct bnad *bnad, struct sk_buff *skb)
* For TSO, the TCP checksum field is seeded with pseudo-header sum
* excluding the length field.
*/
if (skb->protocol == htons(ETH_P_IP)) {
if (vlan_get_protocol(skb) == htons(ETH_P_IP)) {
struct iphdr *iph = ip_hdr(skb);

/* Do we really need these? */
Expand Down Expand Up @@ -2870,12 +2870,13 @@ bnad_txq_wi_prepare(struct bnad *bnad, struct bna_tcb *tcb,
}

if (skb->ip_summed == CHECKSUM_PARTIAL) {
__be16 net_proto = vlan_get_protocol(skb);
u8 proto = 0;

if (skb->protocol == htons(ETH_P_IP))
if (net_proto == htons(ETH_P_IP))
proto = ip_hdr(skb)->protocol;
#ifdef NETIF_F_IPV6_CSUM
else if (skb->protocol == htons(ETH_P_IPV6)) {
else if (net_proto == htons(ETH_P_IPV6)) {
/* nexthdr may not be TCP immediately. */
proto = ipv6_hdr(skb)->nexthdr;
}
Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ethernet/ibm/ehea/ehea_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -1994,7 +1994,7 @@ static void xmit_common(struct sk_buff *skb, struct ehea_swqe *swqe)
{
swqe->tx_control |= EHEA_SWQE_IMM_DATA_PRESENT | EHEA_SWQE_CRC;

if (skb->protocol != htons(ETH_P_IP))
if (vlan_get_protocol(skb) != htons(ETH_P_IP))
return;

if (skb->ip_summed == CHECKSUM_PARTIAL)
Expand Down
19 changes: 11 additions & 8 deletions drivers/net/ethernet/intel/e1000/e1000_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -2674,7 +2674,8 @@ static void e1000_set_itr(struct e1000_adapter *adapter)
#define E1000_TX_FLAGS_VLAN_SHIFT 16

static int e1000_tso(struct e1000_adapter *adapter,
struct e1000_tx_ring *tx_ring, struct sk_buff *skb)
struct e1000_tx_ring *tx_ring, struct sk_buff *skb,
__be16 protocol)
{
struct e1000_context_desc *context_desc;
struct e1000_buffer *buffer_info;
Expand All @@ -2692,7 +2693,7 @@ static int e1000_tso(struct e1000_adapter *adapter,

hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
mss = skb_shinfo(skb)->gso_size;
if (skb->protocol == htons(ETH_P_IP)) {
if (protocol == htons(ETH_P_IP)) {
struct iphdr *iph = ip_hdr(skb);
iph->tot_len = 0;
iph->check = 0;
Expand All @@ -2702,7 +2703,7 @@ static int e1000_tso(struct e1000_adapter *adapter,
0);
cmd_length = E1000_TXD_CMD_IP;
ipcse = skb_transport_offset(skb) - 1;
} else if (skb->protocol == htons(ETH_P_IPV6)) {
} else if (skb_is_gso_v6(skb)) {
ipv6_hdr(skb)->payload_len = 0;
tcp_hdr(skb)->check =
~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
Expand Down Expand Up @@ -2745,7 +2746,8 @@ static int e1000_tso(struct e1000_adapter *adapter,
}

static bool e1000_tx_csum(struct e1000_adapter *adapter,
struct e1000_tx_ring *tx_ring, struct sk_buff *skb)
struct e1000_tx_ring *tx_ring, struct sk_buff *skb,
__be16 protocol)
{
struct e1000_context_desc *context_desc;
struct e1000_buffer *buffer_info;
Expand All @@ -2756,7 +2758,7 @@ static bool e1000_tx_csum(struct e1000_adapter *adapter,
if (skb->ip_summed != CHECKSUM_PARTIAL)
return false;

switch (skb->protocol) {
switch (protocol) {
case cpu_to_be16(ETH_P_IP):
if (ip_hdr(skb)->protocol == IPPROTO_TCP)
cmd_len |= E1000_TXD_CMD_TCP;
Expand Down Expand Up @@ -3097,6 +3099,7 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb,
int count = 0;
int tso;
unsigned int f;
__be16 protocol = vlan_get_protocol(skb);

/* This goes back to the question of how to logically map a Tx queue
* to a flow. Right now, performance is impacted slightly negatively
Expand Down Expand Up @@ -3210,7 +3213,7 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb,

first = tx_ring->next_to_use;

tso = e1000_tso(adapter, tx_ring, skb);
tso = e1000_tso(adapter, tx_ring, skb, protocol);
if (tso < 0) {
dev_kfree_skb_any(skb);
return NETDEV_TX_OK;
Expand All @@ -3220,10 +3223,10 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb,
if (likely(hw->mac_type != e1000_82544))
tx_ring->last_tx_tso = true;
tx_flags |= E1000_TX_FLAGS_TSO;
} else if (likely(e1000_tx_csum(adapter, tx_ring, skb)))
} else if (likely(e1000_tx_csum(adapter, tx_ring, skb, protocol)))
tx_flags |= E1000_TX_FLAGS_CSUM;

if (likely(skb->protocol == htons(ETH_P_IP)))
if (protocol == htons(ETH_P_IP))
tx_flags |= E1000_TX_FLAGS_IPV4;

if (unlikely(skb->no_fcs))
Expand Down
21 changes: 9 additions & 12 deletions drivers/net/ethernet/intel/e1000e/netdev.c
Original file line number Diff line number Diff line change
Expand Up @@ -5164,7 +5164,8 @@ static void e1000_watchdog_task(struct work_struct *work)
#define E1000_TX_FLAGS_VLAN_MASK 0xffff0000
#define E1000_TX_FLAGS_VLAN_SHIFT 16

static int e1000_tso(struct e1000_ring *tx_ring, struct sk_buff *skb)
static int e1000_tso(struct e1000_ring *tx_ring, struct sk_buff *skb,
__be16 protocol)
{
struct e1000_context_desc *context_desc;
struct e1000_buffer *buffer_info;
Expand All @@ -5183,7 +5184,7 @@ static int e1000_tso(struct e1000_ring *tx_ring, struct sk_buff *skb)

hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
mss = skb_shinfo(skb)->gso_size;
if (skb->protocol == htons(ETH_P_IP)) {
if (protocol == htons(ETH_P_IP)) {
struct iphdr *iph = ip_hdr(skb);
iph->tot_len = 0;
iph->check = 0;
Expand Down Expand Up @@ -5231,24 +5232,19 @@ static int e1000_tso(struct e1000_ring *tx_ring, struct sk_buff *skb)
return 1;
}

static bool e1000_tx_csum(struct e1000_ring *tx_ring, struct sk_buff *skb)
static bool e1000_tx_csum(struct e1000_ring *tx_ring, struct sk_buff *skb,
__be16 protocol)
{
struct e1000_adapter *adapter = tx_ring->adapter;
struct e1000_context_desc *context_desc;
struct e1000_buffer *buffer_info;
unsigned int i;
u8 css;
u32 cmd_len = E1000_TXD_CMD_DEXT;
__be16 protocol;

if (skb->ip_summed != CHECKSUM_PARTIAL)
return false;

if (skb->protocol == cpu_to_be16(ETH_P_8021Q))
protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
else
protocol = skb->protocol;

switch (protocol) {
case cpu_to_be16(ETH_P_IP):
if (ip_hdr(skb)->protocol == IPPROTO_TCP)
Expand Down Expand Up @@ -5546,6 +5542,7 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb,
int count = 0;
int tso;
unsigned int f;
__be16 protocol = vlan_get_protocol(skb);

if (test_bit(__E1000_DOWN, &adapter->state)) {
dev_kfree_skb_any(skb);
Expand Down Expand Up @@ -5620,22 +5617,22 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb,

first = tx_ring->next_to_use;

tso = e1000_tso(tx_ring, skb);
tso = e1000_tso(tx_ring, skb, protocol);
if (tso < 0) {
dev_kfree_skb_any(skb);
return NETDEV_TX_OK;
}

if (tso)
tx_flags |= E1000_TX_FLAGS_TSO;
else if (e1000_tx_csum(tx_ring, skb))
else if (e1000_tx_csum(tx_ring, skb, protocol))
tx_flags |= E1000_TX_FLAGS_CSUM;

/* Old method was to assume IPv4 packet by default if TSO was enabled.
* 82571 hardware supports TSO capabilities for IPv6 as well...
* no longer assume, we must.
*/
if (skb->protocol == htons(ETH_P_IP))
if (protocol == htons(ETH_P_IP))
tx_flags |= E1000_TX_FLAGS_IPV4;

if (unlikely(skb->no_fcs))
Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ethernet/intel/i40e/i40e_txrx.c
Original file line number Diff line number Diff line change
Expand Up @@ -2295,7 +2295,7 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
goto out_drop;

/* obtain protocol of skb */
protocol = skb->protocol;
protocol = vlan_get_protocol(skb);

/* record the location of the first descriptor for this packet */
first = &tx_ring->tx_bi[tx_ring->next_to_use];
Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ethernet/intel/i40evf/i40e_txrx.c
Original file line number Diff line number Diff line change
Expand Up @@ -1597,7 +1597,7 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
goto out_drop;

/* obtain protocol of skb */
protocol = skb->protocol;
protocol = vlan_get_protocol(skb);

/* record the location of the first descriptor for this packet */
first = &tx_ring->tx_bi[tx_ring->next_to_use];
Expand Down
7 changes: 4 additions & 3 deletions drivers/net/ethernet/marvell/mvneta.c
Original file line number Diff line number Diff line change
Expand Up @@ -1371,15 +1371,16 @@ static u32 mvneta_skb_tx_csum(struct mvneta_port *pp, struct sk_buff *skb)
{
if (skb->ip_summed == CHECKSUM_PARTIAL) {
int ip_hdr_len = 0;
__be16 l3_proto = vlan_get_protocol(skb);
u8 l4_proto;

if (skb->protocol == htons(ETH_P_IP)) {
if (l3_proto == htons(ETH_P_IP)) {
struct iphdr *ip4h = ip_hdr(skb);

/* Calculate IPv4 checksum and L4 checksum */
ip_hdr_len = ip4h->ihl;
l4_proto = ip4h->protocol;
} else if (skb->protocol == htons(ETH_P_IPV6)) {
} else if (l3_proto == htons(ETH_P_IPV6)) {
struct ipv6hdr *ip6h = ipv6_hdr(skb);

/* Read l4_protocol from one of IPv6 extra headers */
Expand All @@ -1390,7 +1391,7 @@ static u32 mvneta_skb_tx_csum(struct mvneta_port *pp, struct sk_buff *skb)
return MVNETA_TX_L4_CSUM_NOT;

return mvneta_txq_desc_csum(skb_network_offset(skb),
skb->protocol, ip_hdr_len, l4_proto);
l3_proto, ip_hdr_len, l4_proto);
}

return MVNETA_TX_L4_CSUM_NOT;
Expand Down
5 changes: 3 additions & 2 deletions drivers/net/ethernet/qlogic/qlge/qlge_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -2556,6 +2556,7 @@ static int ql_tso(struct sk_buff *skb, struct ob_mac_tso_iocb_req *mac_iocb_ptr)

if (skb_is_gso(skb)) {
int err;
__be16 l3_proto = vlan_get_protocol(skb);

err = skb_cow_head(skb, 0);
if (err < 0)
Expand All @@ -2572,15 +2573,15 @@ static int ql_tso(struct sk_buff *skb, struct ob_mac_tso_iocb_req *mac_iocb_ptr)
<< OB_MAC_TRANSPORT_HDR_SHIFT);
mac_iocb_ptr->mss = cpu_to_le16(skb_shinfo(skb)->gso_size);
mac_iocb_ptr->flags2 |= OB_MAC_TSO_IOCB_LSO;
if (likely(skb->protocol == htons(ETH_P_IP))) {
if (likely(l3_proto == htons(ETH_P_IP))) {
struct iphdr *iph = ip_hdr(skb);
iph->check = 0;
mac_iocb_ptr->flags1 |= OB_MAC_TSO_IOCB_IP4;
tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
iph->daddr, 0,
IPPROTO_TCP,
0);
} else if (skb->protocol == htons(ETH_P_IPV6)) {
} else if (l3_proto == htons(ETH_P_IPV6)) {
mac_iocb_ptr->flags1 |= OB_MAC_TSO_IOCB_IP6;
tcp_hdr(skb)->check =
~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
Expand Down

0 comments on commit 8dbb200

Please sign in to comment.