Skip to content

Commit

Permalink
Merge branch 'csums-next'
Browse files Browse the repository at this point in the history
Tom Herbert says:

====================
net: Checksum offload changes - Part VI

I am working on overhauling RX checksum offload. Goals of this effort
are:

- Specify what exactly it means when driver returns CHECKSUM_UNNECESSARY
- Preserve CHECKSUM_COMPLETE through encapsulation layers
- Don't do skb_checksum more than once per packet
- Unify GRO and non-GRO csum verification as much as possible
- Unify the checksum functions (checksum_init)
- Simplify code

What is in this sixth patch set:

- Clarify the specific requirements of devices returning
  CHECKSUM_UNNECESSARY (comments in skbuff.h).
- Add csum_level field to skbuff. This is used to express how
  many checksums are covered by CHECKSUM_UNNECESSARY (stores n - 1).
- Change __skb_checksum_validate_needed to "consume" each checksum
  as indicated by csum_level as layers of the the packet are parsed.
- Remove skb_pop_rcv_encapsulation, no longer needed in the new
  csum_level model.
- Allow GRO path to "consume" checksums provided in CHECKSUM_UNNECESSARY
  and to report new verfied checksums for use in normal path fallback.
- Add proper support to SCTP to accept CHECKSUM_UNNECESSARY to validate
  header CRC.
- Modify drivers to set skb->csum_level instead of setting
  skb->encapsulation to indicate validation of an encapsulated
  checksum on receive.

v2:

Allocate a new 16 bits for flags in skbuff.

Please review carefully and test if possible, mucking with basic
checksum functions is always a little precarious :-)
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Aug 30, 2014
2 parents 96d4922 + 71d7a27 commit 030824e
Show file tree
Hide file tree
Showing 13 changed files with 101 additions and 64 deletions.
4 changes: 2 additions & 2 deletions drivers/net/ethernet/emulex/benet/be_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -1683,7 +1683,7 @@ static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
if (netdev->features & NETIF_F_RXHASH)
skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);

skb->encapsulation = rxcp->tunneled;
skb->csum_level = rxcp->tunneled;
skb_mark_napi_id(skb, napi);

if (rxcp->vlanf)
Expand Down Expand Up @@ -1741,7 +1741,7 @@ static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
if (adapter->netdev->features & NETIF_F_RXHASH)
skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);

skb->encapsulation = rxcp->tunneled;
skb->csum_level = rxcp->tunneled;
skb_mark_napi_id(skb, napi);

if (rxcp->vlanf)
Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ethernet/intel/i40e/i40e_txrx.c
Original file line number Diff line number Diff line change
Expand Up @@ -1241,7 +1241,6 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
ipv6_tunnel = (rx_ptype > I40E_RX_PTYPE_GRENAT6_MAC_PAY3) &&
(rx_ptype < I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4);

skb->encapsulation = ipv4_tunnel || ipv6_tunnel;
skb->ip_summed = CHECKSUM_NONE;

/* Rx csum enabled and ip headers found? */
Expand Down Expand Up @@ -1315,6 +1314,7 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
}

skb->ip_summed = CHECKSUM_UNNECESSARY;
skb->csum_level = ipv4_tunnel || ipv6_tunnel;

return;

Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ethernet/intel/i40evf/i40e_txrx.c
Original file line number Diff line number Diff line change
Expand Up @@ -746,7 +746,6 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
ipv6_tunnel = (rx_ptype > I40E_RX_PTYPE_GRENAT6_MAC_PAY3) &&
(rx_ptype < I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4);

skb->encapsulation = ipv4_tunnel || ipv6_tunnel;
skb->ip_summed = CHECKSUM_NONE;

/* Rx csum enabled and ip headers found? */
Expand Down Expand Up @@ -820,6 +819,7 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
}

skb->ip_summed = CHECKSUM_UNNECESSARY;
skb->csum_level = ipv4_tunnel || ipv6_tunnel;

return;

Expand Down
6 changes: 3 additions & 3 deletions drivers/net/ethernet/mellanox/mlx4/en_rx.c
Original file line number Diff line number Diff line change
Expand Up @@ -769,7 +769,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
gro_skb->ip_summed = CHECKSUM_UNNECESSARY;

if (l2_tunnel)
gro_skb->encapsulation = 1;
gro_skb->csum_level = 1;
if ((cqe->vlan_my_qpn &
cpu_to_be32(MLX4_CQE_VLAN_PRESENT_MASK)) &&
(dev->features & NETIF_F_HW_VLAN_CTAG_RX)) {
Expand Down Expand Up @@ -823,8 +823,8 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
skb->protocol = eth_type_trans(skb, dev);
skb_record_rx_queue(skb, cq->ring);

if (l2_tunnel)
skb->encapsulation = 1;
if (l2_tunnel && ip_summed == CHECKSUM_UNNECESSARY)
skb->csum_level = 1;

if (dev->features & NETIF_F_RXHASH)
skb_set_hash(skb,
Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
Original file line number Diff line number Diff line change
Expand Up @@ -1753,7 +1753,7 @@ qlcnic_83xx_process_rcv(struct qlcnic_adapter *adapter,

if (qlcnic_encap_length(sts_data[1]) &&
skb->ip_summed == CHECKSUM_UNNECESSARY) {
skb->encapsulation = 1;
skb->csum_level = 1;
adapter->stats.encap_rx_csummed++;
}

Expand Down
2 changes: 0 additions & 2 deletions drivers/net/vxlan.c
Original file line number Diff line number Diff line change
Expand Up @@ -1158,8 +1158,6 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
if (!vs)
goto drop;

skb_pop_rcv_encapsulation(skb);

vs->rcv(vs, skb, vxh->vx_vni);
return 0;

Expand Down
26 changes: 12 additions & 14 deletions include/linux/netdevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -1883,8 +1883,8 @@ struct napi_gro_cb {
/* GRO checksum is valid */
u8 csum_valid:1;

/* Number encapsulation layers crossed */
u8 encapsulation;
/* Number of checksums via CHECKSUM_UNNECESSARY */
u8 csum_cnt:3;

/* used to support CHECKSUM_COMPLETE for tunneling protocols */
__wsum csum;
Expand Down Expand Up @@ -2179,8 +2179,7 @@ static inline bool __skb_gro_checksum_validate_needed(struct sk_buff *skb,
__sum16 check)
{
return (skb->ip_summed != CHECKSUM_PARTIAL &&
(skb->ip_summed != CHECKSUM_UNNECESSARY ||
(NAPI_GRO_CB(skb)->encapsulation > skb->encapsulation)) &&
NAPI_GRO_CB(skb)->csum_cnt == 0 &&
(!zero_okay || check));
}

Expand All @@ -2196,18 +2195,17 @@ static inline __sum16 __skb_gro_checksum_validate_complete(struct sk_buff *skb,
return __skb_gro_checksum_complete(skb);
}

/* Update skb for CHECKSUM_UNNECESSARY when we verified a top level
* checksum or an encapsulated one during GRO. This saves work
* if we fallback to normal path with the packet.
*/
static inline void skb_gro_incr_csum_unnecessary(struct sk_buff *skb)
{
if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
if (NAPI_GRO_CB(skb)->encapsulation)
skb->encapsulation = 1;
} else if (skb->ip_summed != CHECKSUM_PARTIAL) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
skb->encapsulation = 0;
if (NAPI_GRO_CB(skb)->csum_cnt > 0) {
/* Consume a checksum from CHECKSUM_UNNECESSARY */
NAPI_GRO_CB(skb)->csum_cnt--;
} else {
/* Update skb for CHECKSUM_UNNECESSARY and csum_level when we
* verified a new top level checksum or an encapsulated one
* during GRO. This saves work if we fallback to normal path.
*/
__skb_incr_checksum_unnecessary(skb);
}
}

Expand Down
76 changes: 54 additions & 22 deletions include/linux/skbuff.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,29 @@
*
* The hardware you're dealing with doesn't calculate the full checksum
* (as in CHECKSUM_COMPLETE), but it does parse headers and verify checksums
* for specific protocols e.g. TCP/UDP/SCTP, then, for such packets it will
* set CHECKSUM_UNNECESSARY if their checksums are okay. skb->csum is still
* undefined in this case though. It is a bad option, but, unfortunately,
* nowadays most vendors do this. Apparently with the secret goal to sell
* you new devices, when you will add new protocol to your host, f.e. IPv6 8)
* for specific protocols. For such packets it will set CHECKSUM_UNNECESSARY
* if their checksums are okay. skb->csum is still undefined in this case
* though. It is a bad option, but, unfortunately, nowadays most vendors do
* this. Apparently with the secret goal to sell you new devices, when you
* will add new protocol to your host, f.e. IPv6 8)
*
* CHECKSUM_UNNECESSARY is applicable to following protocols:
* TCP: IPv6 and IPv4.
* UDP: IPv4 and IPv6. A device may apply CHECKSUM_UNNECESSARY to a
* zero UDP checksum for either IPv4 or IPv6, the networking stack
* may perform further validation in this case.
* GRE: only if the checksum is present in the header.
* SCTP: indicates the CRC in SCTP header has been validated.
*
* skb->csum_level indicates the number of consecutive checksums found in
* the packet minus one that have been verified as CHECKSUM_UNNECESSARY.
* For instance if a device receives an IPv6->UDP->GRE->IPv4->TCP packet
* and a device is able to verify the checksums for UDP (possibly zero),
* GRE (checksum flag is set), and TCP-- skb->csum_level would be set to
* two. If the device were only able to verify the UDP checksum and not
* GRE, either because it doesn't support GRE checksum of because GRE
* checksum is bad, skb->csum_level would be set to zero (TCP checksum is
* not considered in this case).
*
* CHECKSUM_COMPLETE:
*
Expand Down Expand Up @@ -112,6 +130,9 @@
#define CHECKSUM_COMPLETE 2
#define CHECKSUM_PARTIAL 3

/* Maximum value in skb->csum_level */
#define SKB_MAX_CSUM_LEVEL 3

#define SKB_DATA_ALIGN(X) ALIGN(X, SMP_CACHE_BYTES)
#define SKB_WITH_OVERHEAD(X) \
((X) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
Expand Down Expand Up @@ -571,11 +592,7 @@ struct sk_buff {
__u8 wifi_acked:1;
__u8 no_fcs:1;
__u8 head_frag:1;
/* Encapsulation protocol and NIC drivers should use
* this flag to indicate to each other if the skb contains
* encapsulated packet or not and maybe use the inner packet
* headers if needed
*/
/* Indicates the inner headers are valid in the skbuff. */
__u8 encapsulation:1;
__u8 encap_hdr_csum:1;
__u8 csum_valid:1;
Expand All @@ -598,6 +615,11 @@ struct sk_buff {
__u32 reserved_tailroom;
};

kmemcheck_bitfield_begin(flags3);
__u8 csum_level:2;
/* 14 bit hole */
kmemcheck_bitfield_end(flags3);

__be16 inner_protocol;
__u16 inner_transport_header;
__u16 inner_network_header;
Expand Down Expand Up @@ -1862,18 +1884,6 @@ static inline int pskb_network_may_pull(struct sk_buff *skb, unsigned int len)
return pskb_may_pull(skb, skb_network_offset(skb) + len);
}

static inline void skb_pop_rcv_encapsulation(struct sk_buff *skb)
{
/* Only continue with checksum unnecessary if device indicated
* it is valid across encapsulation (skb->encapsulation was set).
*/
if (skb->ip_summed == CHECKSUM_UNNECESSARY && !skb->encapsulation)
skb->ip_summed = CHECKSUM_NONE;

skb->encapsulation = 0;
skb->csum_valid = 0;
}

/*
* CPUs often take a performance hit when accessing unaligned memory
* locations. The actual performance hit varies, it can be small if the
Expand Down Expand Up @@ -2794,6 +2804,27 @@ static inline __sum16 skb_checksum_complete(struct sk_buff *skb)
0 : __skb_checksum_complete(skb);
}

static inline void __skb_decr_checksum_unnecessary(struct sk_buff *skb)
{
if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
if (skb->csum_level == 0)
skb->ip_summed = CHECKSUM_NONE;
else
skb->csum_level--;
}
}

static inline void __skb_incr_checksum_unnecessary(struct sk_buff *skb)
{
if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
if (skb->csum_level < SKB_MAX_CSUM_LEVEL)
skb->csum_level++;
} else if (skb->ip_summed == CHECKSUM_NONE) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
skb->csum_level = 0;
}
}

/* Check if we need to perform checksum complete validation.
*
* Returns true if checksum complete is needed, false otherwise
Expand All @@ -2805,6 +2836,7 @@ static inline bool __skb_checksum_validate_needed(struct sk_buff *skb,
{
if (skb_csum_unnecessary(skb) || (zero_okay && !check)) {
skb->csum_valid = 1;
__skb_decr_checksum_unnecessary(skb);
return false;
}

Expand Down
24 changes: 16 additions & 8 deletions net/core/dev.c
Original file line number Diff line number Diff line change
Expand Up @@ -3962,13 +3962,6 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff

gro_list_prepare(napi, skb);

if (skb->ip_summed == CHECKSUM_COMPLETE) {
NAPI_GRO_CB(skb)->csum = skb->csum;
NAPI_GRO_CB(skb)->csum_valid = 1;
} else {
NAPI_GRO_CB(skb)->csum_valid = 0;
}

rcu_read_lock();
list_for_each_entry_rcu(ptype, head, list) {
if (ptype->type != type || !ptype->callbacks.gro_receive)
Expand All @@ -3980,7 +3973,22 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
NAPI_GRO_CB(skb)->flush = 0;
NAPI_GRO_CB(skb)->free = 0;
NAPI_GRO_CB(skb)->udp_mark = 0;
NAPI_GRO_CB(skb)->encapsulation = 0;

/* Setup for GRO checksum validation */
switch (skb->ip_summed) {
case CHECKSUM_COMPLETE:
NAPI_GRO_CB(skb)->csum = skb->csum;
NAPI_GRO_CB(skb)->csum_valid = 1;
NAPI_GRO_CB(skb)->csum_cnt = 0;
break;
case CHECKSUM_UNNECESSARY:
NAPI_GRO_CB(skb)->csum_cnt = skb->csum_level + 1;
NAPI_GRO_CB(skb)->csum_valid = 0;
break;
default:
NAPI_GRO_CB(skb)->csum_cnt = 0;
NAPI_GRO_CB(skb)->csum_valid = 0;
}

pp = ptype->callbacks.gro_receive(&napi->gro_list, skb);
break;
Expand Down
1 change: 0 additions & 1 deletion net/ipv4/gre_demux.c
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,6 @@ static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
*csum_err = true;
return -EINVAL;
}
skb_pop_rcv_encapsulation(skb);
options++;
}

Expand Down
7 changes: 2 additions & 5 deletions net/ipv4/gre_offload.c
Original file line number Diff line number Diff line change
Expand Up @@ -172,12 +172,9 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head,
}

/* Don't bother verifying checksum if we're going to flush anyway. */
if (greh->flags & GRE_CSUM) {
if (!NAPI_GRO_CB(skb)->flush &&
skb_gro_checksum_simple_validate(skb))
if ((greh->flags & GRE_CSUM) && !NAPI_GRO_CB(skb)->flush &&
skb_gro_checksum_simple_validate(skb))
goto out_unlock;
NAPI_GRO_CB(skb)->encapsulation++;
}

flush = 0;

Expand Down
5 changes: 3 additions & 2 deletions net/ipv4/udp_offload.c
Original file line number Diff line number Diff line change
Expand Up @@ -238,12 +238,13 @@ struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb,
int flush = 1;

if (NAPI_GRO_CB(skb)->udp_mark ||
(!skb->encapsulation && !NAPI_GRO_CB(skb)->csum_valid))
(skb->ip_summed != CHECKSUM_PARTIAL &&
NAPI_GRO_CB(skb)->csum_cnt == 0 &&
!NAPI_GRO_CB(skb)->csum_valid))
goto out;

/* mark that this skb passed once through the udp gro layer */
NAPI_GRO_CB(skb)->udp_mark = 1;
NAPI_GRO_CB(skb)->encapsulation++;

rcu_read_lock();
uo_priv = rcu_dereference(udp_offload_base);
Expand Down
8 changes: 6 additions & 2 deletions net/sctp/input.c
Original file line number Diff line number Diff line change
Expand Up @@ -133,9 +133,13 @@ int sctp_rcv(struct sk_buff *skb)
__skb_pull(skb, skb_transport_offset(skb));
if (skb->len < sizeof(struct sctphdr))
goto discard_it;
if (!sctp_checksum_disable && !skb_csum_unnecessary(skb) &&
sctp_rcv_checksum(net, skb) < 0)

skb->csum_valid = 0; /* Previous value not applicable */
if (skb_csum_unnecessary(skb))
__skb_decr_checksum_unnecessary(skb);
else if (!sctp_checksum_disable && sctp_rcv_checksum(net, skb) < 0)
goto discard_it;
skb->csum_valid = 1;

skb_pull(skb, sizeof(struct sctphdr));

Expand Down

0 comments on commit 030824e

Please sign in to comment.