Skip to content

Commit

Permalink
net: fix possible wrong checksum generation
Browse files Browse the repository at this point in the history
Pravin Shelar mentioned that GSO could potentially generate
wrong TX checksum if skb has fragments that are overwritten
by the user between the checksum computation and transmit.

He suggested to linearize skbs but this extra copy can be
avoided for normal tcp skbs cooked by tcp_sendmsg().

This patch introduces a new SKB_GSO_SHARED_FRAG flag, set
in skb_shinfo(skb)->gso_type if at least one frag can be
modified by the user.

Typical sources of such possible overwrites are {vm}splice(),
sendfile(), and macvtap/tun/virtio_net drivers.

Tested:

$ netperf -H 7.7.8.84
MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to
7.7.8.84 () port 0 AF_INET
Recv   Send    Send
Socket Socket  Message  Elapsed
Size   Size    Size     Time     Throughput
bytes  bytes   bytes    secs.    10^6bits/sec

 87380  16384  16384    10.00    3959.52

$ netperf -H 7.7.8.84 -t TCP_SENDFILE
TCP SENDFILE TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 7.7.8.84 ()
port 0 AF_INET
Recv   Send    Send
Socket Socket  Message  Elapsed
Size   Size    Size     Time     Throughput
bytes  bytes   bytes    secs.    10^6bits/sec

 87380  16384  16384    10.00    3216.80

Performance of the SENDFILE is impacted by the extra allocation and
copy, and because we use order-0 pages, while the TCP_STREAM uses
bigger pages.

Reported-by: Pravin Shelar <pshelar@nicira.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
Eric Dumazet authored and David S. Miller committed Jan 28, 2013
1 parent 6155002 commit cef401d
Show file tree
Hide file tree
Showing 13 changed files with 65 additions and 15 deletions.
3 changes: 2 additions & 1 deletion drivers/net/macvtap.c
Original file line number Diff line number Diff line change
Expand Up @@ -543,6 +543,7 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
skb->data_len += len;
skb->len += len;
skb->truesize += truesize;
skb_shinfo(skb)->gso_type |= SKB_GSO_SHARED_FRAG;
atomic_add(truesize, &skb->sk->sk_wmem_alloc);
while (len) {
int off = base & ~PAGE_MASK;
Expand Down Expand Up @@ -598,7 +599,7 @@ static int macvtap_skb_from_vnet_hdr(struct sk_buff *skb,

if (vnet_hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
skb_shinfo(skb)->gso_size = vnet_hdr->gso_size;
skb_shinfo(skb)->gso_type = gso_type;
skb_shinfo(skb)->gso_type |= gso_type;

/* Header must be checked, and gso_segs computed. */
skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
Expand Down
12 changes: 8 additions & 4 deletions drivers/net/tun.c
Original file line number Diff line number Diff line change
Expand Up @@ -1005,6 +1005,7 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
skb->data_len += len;
skb->len += len;
skb->truesize += truesize;
skb_shinfo(skb)->gso_type |= SKB_GSO_SHARED_FRAG;
atomic_add(truesize, &skb->sk->sk_wmem_alloc);
while (len) {
int off = base & ~PAGE_MASK;
Expand Down Expand Up @@ -1150,16 +1151,18 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
}

if (gso.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
unsigned short gso_type = 0;

pr_debug("GSO!\n");
switch (gso.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
case VIRTIO_NET_HDR_GSO_TCPV4:
skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
gso_type = SKB_GSO_TCPV4;
break;
case VIRTIO_NET_HDR_GSO_TCPV6:
skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
gso_type = SKB_GSO_TCPV6;
break;
case VIRTIO_NET_HDR_GSO_UDP:
skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
gso_type = SKB_GSO_UDP;
break;
default:
tun->dev->stats.rx_frame_errors++;
Expand All @@ -1168,9 +1171,10 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
}

if (gso.gso_type & VIRTIO_NET_HDR_GSO_ECN)
skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
gso_type |= SKB_GSO_TCP_ECN;

skb_shinfo(skb)->gso_size = gso.gso_size;
skb_shinfo(skb)->gso_type |= gso_type;
if (skb_shinfo(skb)->gso_size == 0) {
tun->dev->stats.rx_frame_errors++;
kfree_skb(skb);
Expand Down
12 changes: 8 additions & 4 deletions drivers/net/virtio_net.c
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,7 @@ static void set_skb_frag(struct sk_buff *skb, struct page *page,
skb->len += size;
skb->truesize += PAGE_SIZE;
skb_shinfo(skb)->nr_frags++;
skb_shinfo(skb)->gso_type |= SKB_GSO_SHARED_FRAG;
*len -= size;
}

Expand Down Expand Up @@ -379,16 +380,18 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
ntohs(skb->protocol), skb->len, skb->pkt_type);

if (hdr->hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
unsigned short gso_type = 0;

pr_debug("GSO!\n");
switch (hdr->hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
case VIRTIO_NET_HDR_GSO_TCPV4:
skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
gso_type = SKB_GSO_TCPV4;
break;
case VIRTIO_NET_HDR_GSO_UDP:
skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
gso_type = SKB_GSO_UDP;
break;
case VIRTIO_NET_HDR_GSO_TCPV6:
skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
gso_type = SKB_GSO_TCPV6;
break;
default:
net_warn_ratelimited("%s: bad gso type %u.\n",
Expand All @@ -397,14 +400,15 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
}

if (hdr->hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN)
skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
gso_type |= SKB_GSO_TCP_ECN;

skb_shinfo(skb)->gso_size = hdr->hdr.gso_size;
if (skb_shinfo(skb)->gso_size == 0) {
net_warn_ratelimited("%s: zero gso size.\n", dev->name);
goto frame_err;
}

skb_shinfo(skb)->gso_type |= gso_type;
/* Header must be checked, and gso_segs computed. */
skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
skb_shinfo(skb)->gso_segs = 0;
Expand Down
19 changes: 19 additions & 0 deletions include/linux/skbuff.h
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,13 @@ enum {
SKB_GSO_TCPV6 = 1 << 4,

SKB_GSO_FCOE = 1 << 5,

/* This indicates at least one fragment might be overwritten
* (as in vmsplice(), sendfile() ...)
* If we need to compute a TX checksum, we'll need to copy
* all frags to avoid possible bad checksum
*/
SKB_GSO_SHARED_FRAG = 1 << 6,
};

#if BITS_PER_LONG > 32
Expand Down Expand Up @@ -2200,6 +2207,18 @@ static inline int skb_linearize(struct sk_buff *skb)
return skb_is_nonlinear(skb) ? __skb_linearize(skb) : 0;
}

/**
* skb_has_shared_frag - can any frag be overwritten
* @skb: buffer to test
*
* Return true if the skb has at least one frag that might be modified
* by an external entity (as in vmsplice()/sendfile())
*/
static inline bool skb_has_shared_frag(const struct sk_buff *skb)
{
return skb_shinfo(skb)->gso_type & SKB_GSO_SHARED_FRAG;
}

/**
* skb_linearize_cow - make sure skb is linear and writable
* @skb: buffer to process
Expand Down
9 changes: 9 additions & 0 deletions net/core/dev.c
Original file line number Diff line number Diff line change
Expand Up @@ -2271,6 +2271,15 @@ int skb_checksum_help(struct sk_buff *skb)
return -EINVAL;
}

/* Before computing a checksum, we should make sure no frag could
* be modified by an external entity : checksum could be wrong.
*/
if (skb_has_shared_frag(skb)) {
ret = __skb_linearize(skb);
if (ret)
goto out;
}

offset = skb_checksum_start_offset(skb);
BUG_ON(offset >= skb_headlen(skb));
csum = skb_checksum(skb, offset, skb->len - offset, 0);
Expand Down
4 changes: 4 additions & 0 deletions net/core/skbuff.c
Original file line number Diff line number Diff line change
Expand Up @@ -2340,6 +2340,8 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len)
{
int pos = skb_headlen(skb);

skb_shinfo(skb1)->gso_type = skb_shinfo(skb)->gso_type;

if (len < pos) /* Split line is inside header. */
skb_split_inside_header(skb, skb1, len, pos);
else /* Second chunk has no header, nothing to copy. */
Expand Down Expand Up @@ -2845,6 +2847,8 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
skb_copy_from_linear_data_offset(skb, offset,
skb_put(nskb, hsize), hsize);

skb_shinfo(nskb)->gso_type = skb_shinfo(skb)->gso_type;

while (pos < offset + len && i < nfrags) {
*frag = skb_shinfo(skb)->frags[i];
__skb_frag_ref(frag);
Expand Down
1 change: 1 addition & 0 deletions net/ipv4/af_inet.c
Original file line number Diff line number Diff line change
Expand Up @@ -1306,6 +1306,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
SKB_GSO_UDP |
SKB_GSO_DODGY |
SKB_GSO_TCP_ECN |
SKB_GSO_SHARED_FRAG |
0)))
goto out;

Expand Down
4 changes: 3 additions & 1 deletion net/ipv4/ip_gre.c
Original file line number Diff line number Diff line change
Expand Up @@ -738,7 +738,7 @@ static int ipgre_rcv(struct sk_buff *skb)
static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
const struct iphdr *old_iph = ip_hdr(skb);
const struct iphdr *old_iph;
const struct iphdr *tiph;
struct flowi4 fl4;
u8 tos;
Expand All @@ -756,6 +756,8 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
skb_checksum_help(skb))
goto tx_error;

old_iph = ip_hdr(skb);

if (dev->type == ARPHRD_ETHER)
IPCB(skb)->flags = 0;

Expand Down
4 changes: 3 additions & 1 deletion net/ipv4/ipip.c
Original file line number Diff line number Diff line change
Expand Up @@ -472,7 +472,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
__be16 df = tiph->frag_off;
struct rtable *rt; /* Route to the other host */
struct net_device *tdev; /* Device to other host */
const struct iphdr *old_iph = ip_hdr(skb);
const struct iphdr *old_iph;
struct iphdr *iph; /* Our new IP header */
unsigned int max_headroom; /* The extra header space needed */
__be32 dst = tiph->daddr;
Expand All @@ -486,6 +486,8 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
skb_checksum_help(skb))
goto tx_error;

old_iph = ip_hdr(skb);

if (tos & 1)
tos = old_iph->tos;

Expand Down
3 changes: 3 additions & 0 deletions net/ipv4/tcp.c
Original file line number Diff line number Diff line change
Expand Up @@ -896,6 +896,8 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
skb_fill_page_desc(skb, i, page, offset, copy);
}

skb_shinfo(skb)->gso_type |= SKB_GSO_SHARED_FRAG;

skb->len += copy;
skb->data_len += copy;
skb->truesize += copy;
Expand Down Expand Up @@ -3032,6 +3034,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
SKB_GSO_DODGY |
SKB_GSO_TCP_ECN |
SKB_GSO_TCPV6 |
SKB_GSO_SHARED_FRAG |
0) ||
!(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))))
goto out;
Expand Down
4 changes: 2 additions & 2 deletions net/ipv4/tcp_input.c
Original file line number Diff line number Diff line change
Expand Up @@ -1240,13 +1240,13 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
*/
if (!skb_shinfo(prev)->gso_size) {
skb_shinfo(prev)->gso_size = mss;
skb_shinfo(prev)->gso_type = sk->sk_gso_type;
skb_shinfo(prev)->gso_type |= sk->sk_gso_type;
}

/* CHECKME: To clear or not to clear? Mimics normal skb currently */
if (skb_shinfo(skb)->gso_segs <= 1) {
skb_shinfo(skb)->gso_size = 0;
skb_shinfo(skb)->gso_type = 0;
skb_shinfo(skb)->gso_type &= SKB_GSO_SHARED_FRAG;
}

/* Difference in this won't matter, both ACKed by the same cumul. ACK */
Expand Down
4 changes: 2 additions & 2 deletions net/ipv4/tcp_output.c
Original file line number Diff line number Diff line change
Expand Up @@ -1133,18 +1133,18 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb,
unsigned int mss_now)
{
skb_shinfo(skb)->gso_type &= SKB_GSO_SHARED_FRAG;
if (skb->len <= mss_now || !sk_can_gso(sk) ||
skb->ip_summed == CHECKSUM_NONE) {
/* Avoid the costly divide in the normal
* non-TSO case.
*/
skb_shinfo(skb)->gso_segs = 1;
skb_shinfo(skb)->gso_size = 0;
skb_shinfo(skb)->gso_type = 0;
} else {
skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss_now);
skb_shinfo(skb)->gso_size = mss_now;
skb_shinfo(skb)->gso_type = sk->sk_gso_type;
skb_shinfo(skb)->gso_type |= sk->sk_gso_type;
}
}

Expand Down
1 change: 1 addition & 0 deletions net/ipv6/ip6_offload.c
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
SKB_GSO_DODGY |
SKB_GSO_TCP_ECN |
SKB_GSO_TCPV6 |
SKB_GSO_SHARED_FRAG |
0)))
goto out;

Expand Down

0 comments on commit cef401d

Please sign in to comment.