From 573e8fca255a27e3573b51f9b183d62641c47a3d Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 22 Aug 2014 13:33:47 -0700 Subject: [PATCH 1/6] net: skb_gro_checksum_* functions Add skb_gro_checksum_validate, skb_gro_checksum_validate_zero_check, and skb_gro_checksum_simple_validate, and __skb_gro_checksum_complete. These are the cognates of the normal checksum functions but are used in the gro_receive path and operate on GRO related fields in sk_buffs. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/netdevice.h | 76 +++++++++++++++++++++++++++++++++++++-- net/core/dev.c | 34 +++++++++++++++++- 2 files changed, 107 insertions(+), 3 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7e2b0b8b5cd71..eb73444e1bd09 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1883,7 +1883,13 @@ struct napi_gro_cb { u16 proto; /* Used in udp_gro_receive */ - u16 udp_mark; + u8 udp_mark:1; + + /* GRO checksum is valid */ + u8 csum_valid:1; + + /* Number encapsulation layers crossed */ + u8 encapsulation; /* used to support CHECKSUM_COMPLETE for tunneling protocols */ __wsum csum; @@ -2154,11 +2160,77 @@ static inline void *skb_gro_network_header(struct sk_buff *skb) static inline void skb_gro_postpull_rcsum(struct sk_buff *skb, const void *start, unsigned int len) { - if (skb->ip_summed == CHECKSUM_COMPLETE) + if (NAPI_GRO_CB(skb)->csum_valid) NAPI_GRO_CB(skb)->csum = csum_sub(NAPI_GRO_CB(skb)->csum, csum_partial(start, len, 0)); } +/* GRO checksum functions. These are logical equivalents of the normal + * checksum functions (in skbuff.h) except that they operate on the GRO + * offsets and fields in sk_buff. + */ + +__sum16 __skb_gro_checksum_complete(struct sk_buff *skb); + +static inline bool __skb_gro_checksum_validate_needed(struct sk_buff *skb, + bool zero_okay, + __sum16 check) +{ + return (skb->ip_summed != CHECKSUM_PARTIAL && + (skb->ip_summed != CHECKSUM_UNNECESSARY || + (NAPI_GRO_CB(skb)->encapsulation > skb->encapsulation)) && + (!zero_okay || check)); +} + +static inline __sum16 __skb_gro_checksum_validate_complete(struct sk_buff *skb, + __wsum psum) +{ + if (NAPI_GRO_CB(skb)->csum_valid && + !csum_fold(csum_add(psum, NAPI_GRO_CB(skb)->csum))) + return 0; + + NAPI_GRO_CB(skb)->csum = psum; + + return __skb_gro_checksum_complete(skb); +} + +/* Update skb for CHECKSUM_UNNECESSARY when we verified a top level + * checksum or an encapsulated one during GRO. This saves work + * if we fallback to normal path with the packet. + */ +static inline void skb_gro_incr_csum_unnecessary(struct sk_buff *skb) +{ + if (skb->ip_summed == CHECKSUM_UNNECESSARY) { + if (NAPI_GRO_CB(skb)->encapsulation) + skb->encapsulation = 1; + } else if (skb->ip_summed != CHECKSUM_PARTIAL) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + skb->encapsulation = 0; + } +} + +#define __skb_gro_checksum_validate(skb, proto, zero_okay, check, \ + compute_pseudo) \ +({ \ + __sum16 __ret = 0; \ + if (__skb_gro_checksum_validate_needed(skb, zero_okay, check)) \ + __ret = __skb_gro_checksum_validate_complete(skb, \ + compute_pseudo(skb, proto)); \ + if (!__ret) \ + skb_gro_incr_csum_unnecessary(skb); \ + __ret; \ +}) + +#define skb_gro_checksum_validate(skb, proto, compute_pseudo) \ + __skb_gro_checksum_validate(skb, proto, false, 0, compute_pseudo) + +#define skb_gro_checksum_validate_zero_check(skb, proto, check, \ + compute_pseudo) \ + __skb_gro_checksum_validate(skb, proto, true, check, compute_pseudo) + +#define skb_gro_checksum_simple_validate(skb) \ + __skb_gro_checksum_validate(skb, 0, false, 0, null_compute_pseudo) + static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, diff --git a/net/core/dev.c b/net/core/dev.c index 1421dad4cb29c..b6a718ec11c1d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3962,7 +3962,13 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff goto normal; gro_list_prepare(napi, skb); - NAPI_GRO_CB(skb)->csum = skb->csum; /* Needed for CHECKSUM_COMPLETE */ + + if (skb->ip_summed == CHECKSUM_COMPLETE) { + NAPI_GRO_CB(skb)->csum = skb->csum; + NAPI_GRO_CB(skb)->csum_valid = 1; + } else { + NAPI_GRO_CB(skb)->csum_valid = 0; + } rcu_read_lock(); list_for_each_entry_rcu(ptype, head, list) { @@ -3975,6 +3981,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff NAPI_GRO_CB(skb)->flush = 0; NAPI_GRO_CB(skb)->free = 0; NAPI_GRO_CB(skb)->udp_mark = 0; + NAPI_GRO_CB(skb)->encapsulation = 0; pp = ptype->callbacks.gro_receive(&napi->gro_list, skb); break; @@ -4205,6 +4212,31 @@ gro_result_t napi_gro_frags(struct napi_struct *napi) } EXPORT_SYMBOL(napi_gro_frags); +/* Compute the checksum from gro_offset and return the folded value + * after adding in any pseudo checksum. + */ +__sum16 __skb_gro_checksum_complete(struct sk_buff *skb) +{ + __wsum wsum; + __sum16 sum; + + wsum = skb_checksum(skb, skb_gro_offset(skb), skb_gro_len(skb), 0); + + /* NAPI_GRO_CB(skb)->csum holds pseudo checksum */ + sum = csum_fold(csum_add(NAPI_GRO_CB(skb)->csum, wsum)); + if (likely(!sum)) { + if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) && + !skb->csum_complete_sw) + netdev_rx_csum_fault(skb->dev); + } + + NAPI_GRO_CB(skb)->csum = wsum; + NAPI_GRO_CB(skb)->csum_valid = 1; + + return sum; +} +EXPORT_SYMBOL(__skb_gro_checksum_complete); + /* * net_rps_action_and_irq_enable sends any pending IPI's for rps. * Note: called with local irq disabled, but exits with local irq enabled. From 1933a7852ce6a81349855431b25122d7666bbfca Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 22 Aug 2014 13:34:04 -0700 Subject: [PATCH 2/6] net: add gro_compute_pseudo functions Add inet_gro_compute_pseudo and ip6_gro_compute_pseudo. These are the logical equivalents of inet_compute_pseudo and ip6_compute_pseudo for GRO path. The IP header is taken from skb_gro_network_header. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/ip.h | 8 ++++++++ include/net/ip6_checksum.h | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/include/net/ip.h b/include/net/ip.h index db4a771b9ef3a..c8fd6112bd0b4 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -364,6 +364,14 @@ static inline void inet_set_txhash(struct sock *sk) sk->sk_txhash = flow_hash_from_keys(&keys); } +static inline __wsum inet_gro_compute_pseudo(struct sk_buff *skb, int proto) +{ + const struct iphdr *iph = skb_gro_network_header(skb); + + return csum_tcpudp_nofold(iph->saddr, iph->daddr, + skb_gro_len(skb), proto, 0); +} + /* * Map a multicast IP onto multicast MAC for type ethernet. */ diff --git a/include/net/ip6_checksum.h b/include/net/ip6_checksum.h index 55236cb711745..1a49b73f7f6e3 100644 --- a/include/net/ip6_checksum.h +++ b/include/net/ip6_checksum.h @@ -48,6 +48,14 @@ static inline __wsum ip6_compute_pseudo(struct sk_buff *skb, int proto) skb->len, proto, 0)); } +static inline __wsum ip6_gro_compute_pseudo(struct sk_buff *skb, int proto) +{ + const struct ipv6hdr *iph = skb_gro_network_header(skb); + + return ~csum_unfold(csum_ipv6_magic(&iph->saddr, &iph->daddr, + skb_gro_len(skb), proto, 0)); +} + static __inline__ __sum16 tcp_v6_check(int len, const struct in6_addr *saddr, const struct in6_addr *daddr, From 758f75d1ffa9ef482ae095f40087cf217e1f41b0 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 22 Aug 2014 13:34:22 -0700 Subject: [PATCH 3/6] gre: call skb_gro_checksum_simple_validate Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/ipv4/gre_offload.c | 43 +++++++----------------------------------- 1 file changed, 7 insertions(+), 36 deletions(-) diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index 6556263c8fa52..d1bd16937d932 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -119,28 +119,6 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, return segs; } -/* Compute the whole skb csum in s/w and store it, then verify GRO csum - * starting from gro_offset. - */ -static __sum16 gro_skb_checksum(struct sk_buff *skb) -{ - __sum16 sum; - - skb->csum = skb_checksum(skb, 0, skb->len, 0); - NAPI_GRO_CB(skb)->csum = csum_sub(skb->csum, - csum_partial(skb->data, skb_gro_offset(skb), 0)); - sum = csum_fold(NAPI_GRO_CB(skb)->csum); - if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) { - if (unlikely(!sum) && !skb->csum_complete_sw) - netdev_rx_csum_fault(skb->dev); - } else { - skb->ip_summed = CHECKSUM_COMPLETE; - skb->csum_complete_sw = 1; - } - - return sum; -} - static struct sk_buff **gre_gro_receive(struct sk_buff **head, struct sk_buff *skb) { @@ -192,22 +170,15 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head, if (unlikely(!greh)) goto out_unlock; } - if (greh->flags & GRE_CSUM) { /* Need to verify GRE csum first */ - __sum16 csum = 0; - - if (skb->ip_summed == CHECKSUM_COMPLETE) - csum = csum_fold(NAPI_GRO_CB(skb)->csum); - /* Don't trust csum error calculated/reported by h/w */ - if (skb->ip_summed == CHECKSUM_NONE || csum != 0) - csum = gro_skb_checksum(skb); - - /* GRE CSUM is the 1's complement of the 1's complement sum - * of the GRE hdr plus payload so it should add up to 0xffff - * (and 0 after csum_fold()) just like the IPv4 hdr csum. - */ - if (csum) + + /* Don't bother verifying checksum if we're going to flush anyway. */ + if (greh->flags & GRE_CSUM) { + if (!NAPI_GRO_CB(skb)->flush && + skb_gro_checksum_simple_validate(skb)) goto out_unlock; + NAPI_GRO_CB(skb)->encapsulation++; } + flush = 0; for (p = *head; p; p = p->next) { From 149d0774a729497c6a876260d3884826088724b6 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 22 Aug 2014 13:34:30 -0700 Subject: [PATCH 4/6] tcp: Call skb_gro_checksum_validate In tcp[64]_gro_receive call skb_gro_checksum_validate to validate TCP checksum in the gro context. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/ipv4/tcp_offload.c | 27 +++------------------------ net/ipv6/tcpv6_offload.c | 26 +++----------------------- 2 files changed, 6 insertions(+), 47 deletions(-) diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index bc1b83cb8309f..72912533a191b 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -288,35 +288,14 @@ static int tcp_v4_gso_send_check(struct sk_buff *skb) static struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) { - /* Use the IP hdr immediately proceeding for this transport */ - const struct iphdr *iph = skb_gro_network_header(skb); - __wsum wsum; - /* Don't bother verifying checksum if we're going to flush anyway. */ - if (NAPI_GRO_CB(skb)->flush) - goto skip_csum; - - wsum = NAPI_GRO_CB(skb)->csum; - - switch (skb->ip_summed) { - case CHECKSUM_NONE: - wsum = skb_checksum(skb, skb_gro_offset(skb), skb_gro_len(skb), - 0); - - /* fall through */ - - case CHECKSUM_COMPLETE: - if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr, - wsum)) { - skb->ip_summed = CHECKSUM_UNNECESSARY; - break; - } - + if (!NAPI_GRO_CB(skb)->flush && + skb_gro_checksum_validate(skb, IPPROTO_TCP, + inet_gro_compute_pseudo)) { NAPI_GRO_CB(skb)->flush = 1; return NULL; } -skip_csum: return tcp_gro_receive(head, skb); } diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index 01b0ff9a0c2c0..dbb3d9262bf66 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -35,34 +35,14 @@ static int tcp_v6_gso_send_check(struct sk_buff *skb) static struct sk_buff **tcp6_gro_receive(struct sk_buff **head, struct sk_buff *skb) { - const struct ipv6hdr *iph = skb_gro_network_header(skb); - __wsum wsum; - /* Don't bother verifying checksum if we're going to flush anyway. */ - if (NAPI_GRO_CB(skb)->flush) - goto skip_csum; - - wsum = NAPI_GRO_CB(skb)->csum; - - switch (skb->ip_summed) { - case CHECKSUM_NONE: - wsum = skb_checksum(skb, skb_gro_offset(skb), skb_gro_len(skb), - wsum); - - /* fall through */ - - case CHECKSUM_COMPLETE: - if (!tcp_v6_check(skb_gro_len(skb), &iph->saddr, &iph->daddr, - wsum)) { - skb->ip_summed = CHECKSUM_UNNECESSARY; - break; - } - + if (!NAPI_GRO_CB(skb)->flush && + skb_gro_checksum_validate(skb, IPPROTO_TCP, + ip6_gro_compute_pseudo)) { NAPI_GRO_CB(skb)->flush = 1; return NULL; } -skip_csum: return tcp_gro_receive(head, skb); } From 57c67ff4bd92af634f7c91c40eb02a96dd785dda Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 22 Aug 2014 13:34:44 -0700 Subject: [PATCH 5/6] udp: additional GRO support Implement GRO for UDPv6. Add UDP checksum verification in gro_receive for both UDP4 and UDP6 calling skb_gro_checksum_validate_zero_check. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/udp.h | 18 +++++++++++++ net/ipv4/udp.c | 1 + net/ipv4/udp_offload.c | 61 ++++++++++++++++++++++++++++++------------ net/ipv6/udp_offload.c | 33 +++++++++++++++++++++++ 4 files changed, 96 insertions(+), 17 deletions(-) diff --git a/include/net/udp.h b/include/net/udp.h index 70f941368ace4..16f4e80f0519c 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -158,6 +158,24 @@ static inline __sum16 udp_v4_check(int len, __be32 saddr, void udp_set_csum(bool nocheck, struct sk_buff *skb, __be32 saddr, __be32 daddr, int len); +struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb, + struct udphdr *uh); +int udp_gro_complete(struct sk_buff *skb, int nhoff); + +static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb) +{ + struct udphdr *uh; + unsigned int hlen, off; + + off = skb_gro_offset(skb); + hlen = off + sizeof(*uh); + uh = skb_gro_header_fast(skb, off); + if (skb_gro_header_hard(skb, hlen)) + uh = skb_gro_header_slow(skb, hlen, off); + + return uh; +} + /* hash routines shared between UDPv4/6 and UDP-Litev4/6 */ static inline void udp_lib_hash(struct sock *sk) { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 32f9571e776b8..3549c21fe5f73 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -99,6 +99,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 59035bc3008d2..8ed460e3753ce 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -228,29 +228,22 @@ void udp_del_offload(struct udp_offload *uo) } EXPORT_SYMBOL(udp_del_offload); -static struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb) +struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb, + struct udphdr *uh) { struct udp_offload_priv *uo_priv; struct sk_buff *p, **pp = NULL; - struct udphdr *uh, *uh2; - unsigned int hlen, off; + struct udphdr *uh2; + unsigned int off = skb_gro_offset(skb); int flush = 1; if (NAPI_GRO_CB(skb)->udp_mark || - (!skb->encapsulation && skb->ip_summed != CHECKSUM_COMPLETE)) + (!skb->encapsulation && !NAPI_GRO_CB(skb)->csum_valid)) goto out; /* mark that this skb passed once through the udp gro layer */ NAPI_GRO_CB(skb)->udp_mark = 1; - - off = skb_gro_offset(skb); - hlen = off + sizeof(*uh); - uh = skb_gro_header_fast(skb, off); - if (skb_gro_header_hard(skb, hlen)) { - uh = skb_gro_header_slow(skb, hlen, off); - if (unlikely(!uh)) - goto out; - } + NAPI_GRO_CB(skb)->encapsulation++; rcu_read_lock(); uo_priv = rcu_dereference(udp_offload_base); @@ -269,7 +262,12 @@ static struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *s continue; uh2 = (struct udphdr *)(p->data + off); - if ((*(u32 *)&uh->source != *(u32 *)&uh2->source)) { + + /* Match ports and either checksums are either both zero + * or nonzero. + */ + if ((*(u32 *)&uh->source != *(u32 *)&uh2->source) || + (!uh->check ^ !uh2->check)) { NAPI_GRO_CB(p)->same_flow = 0; continue; } @@ -286,7 +284,24 @@ static struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *s return pp; } -static int udp_gro_complete(struct sk_buff *skb, int nhoff) +static struct sk_buff **udp4_gro_receive(struct sk_buff **head, + struct sk_buff *skb) +{ + struct udphdr *uh = udp_gro_udphdr(skb); + + /* Don't bother verifying checksum if we're going to flush anyway. */ + if (unlikely(!uh) || + (!NAPI_GRO_CB(skb)->flush && + skb_gro_checksum_validate_zero_check(skb, IPPROTO_UDP, uh->check, + inet_gro_compute_pseudo))) { + NAPI_GRO_CB(skb)->flush = 1; + return NULL; + } + + return udp_gro_receive(head, skb, uh); +} + +int udp_gro_complete(struct sk_buff *skb, int nhoff) { struct udp_offload_priv *uo_priv; __be16 newlen = htons(skb->len - nhoff); @@ -311,12 +326,24 @@ static int udp_gro_complete(struct sk_buff *skb, int nhoff) return err; } +int udp4_gro_complete(struct sk_buff *skb, int nhoff) +{ + const struct iphdr *iph = ip_hdr(skb); + struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); + + if (uh->check) + uh->check = ~udp_v4_check(skb->len - nhoff, iph->saddr, + iph->daddr, 0); + + return udp_gro_complete(skb, nhoff); +} + static const struct net_offload udpv4_offload = { .callbacks = { .gso_send_check = udp4_ufo_send_check, .gso_segment = udp4_ufo_fragment, - .gro_receive = udp_gro_receive, - .gro_complete = udp_gro_complete, + .gro_receive = udp4_gro_receive, + .gro_complete = udp4_gro_complete, }, }; diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 0ae3d98f83e00..b13e377e9c537 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -10,6 +10,7 @@ * UDPv6 GSO support */ #include +#include #include #include #include @@ -127,10 +128,42 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, out: return segs; } + +static struct sk_buff **udp6_gro_receive(struct sk_buff **head, + struct sk_buff *skb) +{ + struct udphdr *uh = udp_gro_udphdr(skb); + + /* Don't bother verifying checksum if we're going to flush anyway. */ + if (unlikely(!uh) || + (!NAPI_GRO_CB(skb)->flush && + skb_gro_checksum_validate_zero_check(skb, IPPROTO_UDP, uh->check, + ip6_gro_compute_pseudo))) { + NAPI_GRO_CB(skb)->flush = 1; + return NULL; + } + + return udp_gro_receive(head, skb, uh); +} + +int udp6_gro_complete(struct sk_buff *skb, int nhoff) +{ + const struct ipv6hdr *ipv6h = ipv6_hdr(skb); + struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); + + if (uh->check) + uh->check = ~udp_v6_check(skb->len - nhoff, &ipv6h->saddr, + &ipv6h->daddr, 0); + + return udp_gro_complete(skb, nhoff); +} + static const struct net_offload udpv6_offload = { .callbacks = { .gso_send_check = udp6_ufo_send_check, .gso_segment = udp6_ufo_fragment, + .gro_receive = udp6_gro_receive, + .gro_complete = udp6_gro_complete, }, }; From 48a5fc773190bd5339869003fa65d38559bb8890 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 22 Aug 2014 13:34:52 -0700 Subject: [PATCH 6/6] gre: When GRE csum is present count as encap layer wrt csum In GRE demux if the GRE checksum pop rcv encapsulation so that any encapsulated checksums are treated as tunnel checksums. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/ipv4/gre_demux.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c index 0485bf7f8f030..7c1a8ff974dd1 100644 --- a/net/ipv4/gre_demux.c +++ b/net/ipv4/gre_demux.c @@ -125,6 +125,7 @@ static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, *csum_err = true; return -EINVAL; } + skb_pop_rcv_encapsulation(skb); options++; }