Skip to content

Commit

Permalink
Merge branch 'csums-next'
Browse files Browse the repository at this point in the history
Tom Herbert says:

====================
net: Checksum offload changes - Part VI

I am working on overhauling RX checksum offload. Goals of this effort
are:

- Specify what exactly it means when driver returns CHECKSUM_UNNECESSARY
- Preserve CHECKSUM_COMPLETE through encapsulation layers
- Don't do skb_checksum more than once per packet
- Unify GRO and non-GRO csum verification as much as possible
- Unify the checksum functions (checksum_init)
- Simplify code

What is in this seventh patch set:

- Add skb->csum. This allows a device or GRO to indicate that an
  invalid checksum was detected.
- Checksum unncessary to checksum complete conversions.

With these changes, I believe that the third goal of the overhaul is
now mostly achieved. In the case of no encapsulation or one layer of
encapsulation, there should only be at most one skb_checksum over
each packet (between GRO and normal path). In the case of two layers
of encapsulation, it is still possible with the right combination of
non-zero and zero UDP checksums to have >1 skb_checksum. For instance:
IP>GRE(with csum)>IP>UDP(zero csum)>VXLAN>IP>UDP(non-zero csum),
would likely necessiate an skb_checksum in GRO and normal path.
This doesn't seem like a common scenario at all so I'm inclined to
not address this now, if multiple layers of encapsulation becomes
popular we can reassess.

Note that checksum conversion shows a nice improvement for RX VXLAN when
outer UDP checksum is enabled (12.65% CPU compared to 20.94%). This
is not only from the fact that we don't need checksum calculation on
the host, but also allows GRO for VXLAN in this case. Checksum
conversion does not help send side (which still needs to perform
a checksum on host). For that we will implement remote checksum offload
in a later patch
(http://tools.ietf.org/html/draft-herbert-remotecsumoffload-00).

Please review carefully and test if possible, mucking with basic
checksum functions is always a little precarious :-)
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Sep 2, 2014
2 parents 52aec12 + 72297c5 commit c5a6568
Show file tree
Hide file tree
Showing 12 changed files with 135 additions and 21 deletions.
2 changes: 2 additions & 0 deletions drivers/net/vxlan.c
Original file line number Diff line number Diff line change
Expand Up @@ -2370,6 +2370,8 @@ static struct socket *vxlan_create_sock(struct net *net, bool ipv6,
/* Disable multicast loopback */
inet_sk(sock->sk)->mc_loop = 0;

udp_set_convert_csum(sock->sk, true);

return sock;
}

Expand Down
24 changes: 23 additions & 1 deletion include/linux/netdevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -2216,7 +2216,9 @@ static inline void skb_gro_incr_csum_unnecessary(struct sk_buff *skb)
if (__skb_gro_checksum_validate_needed(skb, zero_okay, check)) \
__ret = __skb_gro_checksum_validate_complete(skb, \
compute_pseudo(skb, proto)); \
if (!__ret) \
if (__ret) \
__skb_mark_checksum_bad(skb); \
else \
skb_gro_incr_csum_unnecessary(skb); \
__ret; \
})
Expand All @@ -2231,6 +2233,26 @@ static inline void skb_gro_incr_csum_unnecessary(struct sk_buff *skb)
#define skb_gro_checksum_simple_validate(skb) \
__skb_gro_checksum_validate(skb, 0, false, 0, null_compute_pseudo)

static inline bool __skb_gro_checksum_convert_check(struct sk_buff *skb)
{
return (NAPI_GRO_CB(skb)->csum_cnt == 0 &&
!NAPI_GRO_CB(skb)->csum_valid);
}

static inline void __skb_gro_checksum_convert(struct sk_buff *skb,
__sum16 check, __wsum pseudo)
{
NAPI_GRO_CB(skb)->csum = ~pseudo;
NAPI_GRO_CB(skb)->csum_valid = 1;
}

#define skb_gro_checksum_try_convert(skb, proto, check, compute_pseudo) \
do { \
if (__skb_gro_checksum_convert_check(skb)) \
__skb_gro_checksum_convert(skb, check, \
compute_pseudo(skb, proto)); \
} while (0)

static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev,
unsigned short type,
const void *daddr, const void *saddr,
Expand Down
41 changes: 40 additions & 1 deletion include/linux/skbuff.h
Original file line number Diff line number Diff line change
Expand Up @@ -617,7 +617,8 @@ struct sk_buff {

kmemcheck_bitfield_begin(flags3);
__u8 csum_level:2;
/* 14 bit hole */
__u8 csum_bad:1;
/* 13 bit hole */
kmemcheck_bitfield_end(flags3);

__be16 inner_protocol;
Expand Down Expand Up @@ -2825,6 +2826,21 @@ static inline void __skb_incr_checksum_unnecessary(struct sk_buff *skb)
}
}

static inline void __skb_mark_checksum_bad(struct sk_buff *skb)
{
/* Mark current checksum as bad (typically called from GRO
* path). In the case that ip_summed is CHECKSUM_NONE
* this must be the first checksum encountered in the packet.
* When ip_summed is CHECKSUM_UNNECESSARY, this is the first
* checksum after the last one validated. For UDP, a zero
* checksum can not be marked as bad.
*/

if (skb->ip_summed == CHECKSUM_NONE ||
skb->ip_summed == CHECKSUM_UNNECESSARY)
skb->csum_bad = 1;
}

/* Check if we need to perform checksum complete validation.
*
* Returns true if checksum complete is needed, false otherwise
Expand Down Expand Up @@ -2866,6 +2882,9 @@ static inline __sum16 __skb_checksum_validate_complete(struct sk_buff *skb,
skb->csum_valid = 1;
return 0;
}
} else if (skb->csum_bad) {
/* ip_summed == CHECKSUM_NONE in this case */
return 1;
}

skb->csum = psum;
Expand Down Expand Up @@ -2923,6 +2942,26 @@ static inline __wsum null_compute_pseudo(struct sk_buff *skb, int proto)
#define skb_checksum_simple_validate(skb) \
__skb_checksum_validate(skb, 0, true, false, 0, null_compute_pseudo)

static inline bool __skb_checksum_convert_check(struct sk_buff *skb)
{
return (skb->ip_summed == CHECKSUM_NONE &&
skb->csum_valid && !skb->csum_bad);
}

static inline void __skb_checksum_convert(struct sk_buff *skb,
__sum16 check, __wsum pseudo)
{
skb->csum = ~pseudo;
skb->ip_summed = CHECKSUM_COMPLETE;
}

#define skb_checksum_try_convert(skb, proto, check, compute_pseudo) \
do { \
if (__skb_checksum_convert_check(skb)) \
__skb_checksum_convert(skb, check, \
compute_pseudo(skb, proto)); \
} while (0)

#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
void nf_conntrack_destroy(struct nf_conntrack *nfct);
static inline void nf_conntrack_put(struct nf_conntrack *nfct)
Expand Down
16 changes: 15 additions & 1 deletion include/linux/udp.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,11 @@ struct udp_sock {
unsigned int corkflag; /* Cork is required */
__u8 encap_type; /* Is this an Encapsulation socket? */
unsigned char no_check6_tx:1,/* Send zero UDP6 checksums on TX? */
no_check6_rx:1;/* Allow zero UDP6 checksums on RX? */
no_check6_rx:1,/* Allow zero UDP6 checksums on RX? */
convert_csum:1;/* On receive, convert checksum
* unnecessary to checksum complete
* if possible.
*/
/*
* Following member retains the information to create a UDP header
* when the socket is uncorked.
Expand Down Expand Up @@ -98,6 +102,16 @@ static inline bool udp_get_no_check6_rx(struct sock *sk)
return udp_sk(sk)->no_check6_rx;
}

static inline void udp_set_convert_csum(struct sock *sk, bool val)
{
udp_sk(sk)->convert_csum = val;
}

static inline bool udp_get_convert_csum(struct sock *sk)
{
return udp_sk(sk)->convert_csum;
}

#define udp_portaddr_for_each_entry(__sk, node, list) \
hlist_nulls_for_each_entry(__sk, node, list, __sk_common.skc_portaddr_node)

Expand Down
2 changes: 1 addition & 1 deletion net/core/dev.c
Original file line number Diff line number Diff line change
Expand Up @@ -3918,7 +3918,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
if (!(skb->dev->features & NETIF_F_GRO))
goto normal;

if (skb_is_gso(skb) || skb_has_frag_list(skb))
if (skb_is_gso(skb) || skb_has_frag_list(skb) || skb->csum_bad)
goto normal;

gro_list_prepare(napi, skb);
Expand Down
4 changes: 4 additions & 0 deletions net/ipv4/gre_demux.c
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,10 @@ static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
*csum_err = true;
return -EINVAL;
}

skb_checksum_try_convert(skb, IPPROTO_GRE, 0,
null_compute_pseudo);

options++;
}

Expand Down
8 changes: 6 additions & 2 deletions net/ipv4/gre_offload.c
Original file line number Diff line number Diff line change
Expand Up @@ -172,10 +172,14 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head,
}

/* Don't bother verifying checksum if we're going to flush anyway. */
if ((greh->flags & GRE_CSUM) && !NAPI_GRO_CB(skb)->flush &&
skb_gro_checksum_simple_validate(skb))
if ((greh->flags & GRE_CSUM) && !NAPI_GRO_CB(skb)->flush) {
if (skb_gro_checksum_simple_validate(skb))
goto out_unlock;

skb_gro_checksum_try_convert(skb, IPPROTO_GRE, 0,
null_compute_pseudo);
}

flush = 0;

for (p = *head; p; p = p->next) {
Expand Down
4 changes: 4 additions & 0 deletions net/ipv4/udp.c
Original file line number Diff line number Diff line change
Expand Up @@ -1788,6 +1788,10 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
if (sk != NULL) {
int ret;

if (udp_sk(sk)->convert_csum && uh->check && !IS_UDPLITE(sk))
skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
inet_compute_pseudo);

ret = udp_queue_rcv_skb(sk, skb);
sock_put(sk);

Expand Down
25 changes: 17 additions & 8 deletions net/ipv4/udp_offload.c
Original file line number Diff line number Diff line change
Expand Up @@ -290,16 +290,25 @@ static struct sk_buff **udp4_gro_receive(struct sk_buff **head,
{
struct udphdr *uh = udp_gro_udphdr(skb);

/* Don't bother verifying checksum if we're going to flush anyway. */
if (unlikely(!uh) ||
(!NAPI_GRO_CB(skb)->flush &&
skb_gro_checksum_validate_zero_check(skb, IPPROTO_UDP, uh->check,
inet_gro_compute_pseudo))) {
NAPI_GRO_CB(skb)->flush = 1;
return NULL;
}
if (unlikely(!uh))
goto flush;

/* Don't bother verifying checksum if we're going to flush anyway. */
if (!NAPI_GRO_CB(skb)->flush)
goto skip;

if (skb_gro_checksum_validate_zero_check(skb, IPPROTO_UDP, uh->check,
inet_gro_compute_pseudo))
goto flush;
else if (uh->check)
skb_gro_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
inet_gro_compute_pseudo);
skip:
return udp_gro_receive(head, skb, uh);

flush:
NAPI_GRO_CB(skb)->flush = 1;
return NULL;
}

int udp_gro_complete(struct sk_buff *skb, int nhoff)
Expand Down
4 changes: 4 additions & 0 deletions net/ipv6/udp.c
Original file line number Diff line number Diff line change
Expand Up @@ -891,6 +891,10 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
goto csum_error;
}

if (udp_sk(sk)->convert_csum && uh->check && !IS_UDPLITE(sk))
skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
ip6_compute_pseudo);

ret = udpv6_queue_rcv_skb(sk, skb);
sock_put(sk);

Expand Down
24 changes: 17 additions & 7 deletions net/ipv6/udp_offload.c
Original file line number Diff line number Diff line change
Expand Up @@ -134,16 +134,26 @@ static struct sk_buff **udp6_gro_receive(struct sk_buff **head,
{
struct udphdr *uh = udp_gro_udphdr(skb);

if (unlikely(!uh))
goto flush;

/* Don't bother verifying checksum if we're going to flush anyway. */
if (unlikely(!uh) ||
(!NAPI_GRO_CB(skb)->flush &&
skb_gro_checksum_validate_zero_check(skb, IPPROTO_UDP, uh->check,
ip6_gro_compute_pseudo))) {
NAPI_GRO_CB(skb)->flush = 1;
return NULL;
}
if (!NAPI_GRO_CB(skb)->flush)
goto skip;

if (skb_gro_checksum_validate_zero_check(skb, IPPROTO_UDP, uh->check,
ip6_gro_compute_pseudo))
goto flush;
else if (uh->check)
skb_gro_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
ip6_gro_compute_pseudo);

skip:
return udp_gro_receive(head, skb, uh);

flush:
NAPI_GRO_CB(skb)->flush = 1;
return NULL;
}

int udp6_gro_complete(struct sk_buff *skb, int nhoff)
Expand Down
2 changes: 2 additions & 0 deletions net/l2tp/l2tp_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -1392,6 +1392,8 @@ static int l2tp_tunnel_sock_create(struct net *net,
if (err < 0)
goto out;

udp_set_convert_csum(sock->sk, true);

break;

case L2TP_ENCAPTYPE_IP:
Expand Down

0 comments on commit c5a6568

Please sign in to comment.