From 16cc1400456a4d5d6df9f5e7f1b2c7cb8c50404b Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 3 Feb 2016 18:02:14 -0500 Subject: [PATCH 1/4] packet: move vnet_hdr code to helper functions packet_snd and packet_rcv support virtio net headers for GSO. Move this logic into helper functions to be able to reuse it in tpacket_snd and tpacket_rcv. This is a straighforward code move with one exception. Instead of creating and passing a separate gso_type variable, reuse vnet_hdr.gso_type after conversion from virtio to kernel gso type. Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/packet/af_packet.c | 261 +++++++++++++++++++++++------------------ 1 file changed, 148 insertions(+), 113 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 992396aa635ce..bd3de7b4fb79e 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1960,6 +1960,64 @@ static unsigned int run_filter(struct sk_buff *skb, return res; } +static int __packet_rcv_vnet(const struct sk_buff *skb, + struct virtio_net_hdr *vnet_hdr) +{ + *vnet_hdr = (const struct virtio_net_hdr) { 0 }; + + if (skb_is_gso(skb)) { + struct skb_shared_info *sinfo = skb_shinfo(skb); + + /* This is a hint as to how much should be linear. */ + vnet_hdr->hdr_len = + __cpu_to_virtio16(vio_le(), skb_headlen(skb)); + vnet_hdr->gso_size = + __cpu_to_virtio16(vio_le(), sinfo->gso_size); + + if (sinfo->gso_type & SKB_GSO_TCPV4) + vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; + else if (sinfo->gso_type & SKB_GSO_TCPV6) + vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; + else if (sinfo->gso_type & SKB_GSO_UDP) + vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP; + else if (sinfo->gso_type & SKB_GSO_FCOE) + return -EINVAL; + else + BUG(); + + if (sinfo->gso_type & SKB_GSO_TCP_ECN) + vnet_hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN; + } else + vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE; + + if (skb->ip_summed == CHECKSUM_PARTIAL) { + vnet_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; + vnet_hdr->csum_start = __cpu_to_virtio16(vio_le(), + skb_checksum_start_offset(skb)); + vnet_hdr->csum_offset = __cpu_to_virtio16(vio_le(), + skb->csum_offset); + } else if (skb->ip_summed == CHECKSUM_UNNECESSARY) { + vnet_hdr->flags = VIRTIO_NET_HDR_F_DATA_VALID; + } /* else everything is zero */ + + return 0; +} + +static int packet_rcv_vnet(struct msghdr *msg, const struct sk_buff *skb, + size_t *len) +{ + struct virtio_net_hdr vnet_hdr; + + if (*len < sizeof(vnet_hdr)) + return -EINVAL; + *len -= sizeof(vnet_hdr); + + if (__packet_rcv_vnet(skb, &vnet_hdr)) + return -EINVAL; + + return memcpy_to_msg(msg, (void *)&vnet_hdr, sizeof(vnet_hdr)); +} + /* * This function makes lazy skb cloning in hope that most of packets * are discarded by BPF. @@ -2347,6 +2405,84 @@ static void tpacket_set_protocol(const struct net_device *dev, } } +static int __packet_snd_vnet_parse(struct virtio_net_hdr *vnet_hdr, size_t len) +{ + unsigned short gso_type = 0; + + if ((vnet_hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && + (__virtio16_to_cpu(vio_le(), vnet_hdr->csum_start) + + __virtio16_to_cpu(vio_le(), vnet_hdr->csum_offset) + 2 > + __virtio16_to_cpu(vio_le(), vnet_hdr->hdr_len))) + vnet_hdr->hdr_len = __cpu_to_virtio16(vio_le(), + __virtio16_to_cpu(vio_le(), vnet_hdr->csum_start) + + __virtio16_to_cpu(vio_le(), vnet_hdr->csum_offset) + 2); + + if (__virtio16_to_cpu(vio_le(), vnet_hdr->hdr_len) > len) + return -EINVAL; + + if (vnet_hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { + switch (vnet_hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { + case VIRTIO_NET_HDR_GSO_TCPV4: + gso_type = SKB_GSO_TCPV4; + break; + case VIRTIO_NET_HDR_GSO_TCPV6: + gso_type = SKB_GSO_TCPV6; + break; + case VIRTIO_NET_HDR_GSO_UDP: + gso_type = SKB_GSO_UDP; + break; + default: + return -EINVAL; + } + + if (vnet_hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) + gso_type |= SKB_GSO_TCP_ECN; + + if (vnet_hdr->gso_size == 0) + return -EINVAL; + } + + vnet_hdr->gso_type = gso_type; /* changes type, temporary storage */ + return 0; +} + +static int packet_snd_vnet_parse(struct msghdr *msg, size_t *len, + struct virtio_net_hdr *vnet_hdr) +{ + int n; + + if (*len < sizeof(*vnet_hdr)) + return -EINVAL; + *len -= sizeof(*vnet_hdr); + + n = copy_from_iter(vnet_hdr, sizeof(*vnet_hdr), &msg->msg_iter); + if (n != sizeof(*vnet_hdr)) + return -EFAULT; + + return __packet_snd_vnet_parse(vnet_hdr, *len); +} + +static int packet_snd_vnet_gso(struct sk_buff *skb, + struct virtio_net_hdr *vnet_hdr) +{ + if (vnet_hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { + u16 s = __virtio16_to_cpu(vio_le(), vnet_hdr->csum_start); + u16 o = __virtio16_to_cpu(vio_le(), vnet_hdr->csum_offset); + + if (!skb_partial_csum_set(skb, s, o)) + return -EINVAL; + } + + skb_shinfo(skb)->gso_size = + __virtio16_to_cpu(vio_le(), vnet_hdr->gso_size); + skb_shinfo(skb)->gso_type = vnet_hdr->gso_type; + + /* Header must be checked, and gso_segs computed. */ + skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; + skb_shinfo(skb)->gso_segs = 0; + return 0; +} + static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, void *frame, struct net_device *dev, int size_max, __be16 proto, unsigned char *addr, int hlen) @@ -2643,12 +2779,9 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) struct sockcm_cookie sockc; struct virtio_net_hdr vnet_hdr = { 0 }; int offset = 0; - int vnet_hdr_len; struct packet_sock *po = pkt_sk(sk); - unsigned short gso_type = 0; int hlen, tlen; int extra_len = 0; - ssize_t n; /* * Get and verify the address. @@ -2686,53 +2819,9 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) if (sock->type == SOCK_RAW) reserve = dev->hard_header_len; if (po->has_vnet_hdr) { - vnet_hdr_len = sizeof(vnet_hdr); - - err = -EINVAL; - if (len < vnet_hdr_len) - goto out_unlock; - - len -= vnet_hdr_len; - - err = -EFAULT; - n = copy_from_iter(&vnet_hdr, vnet_hdr_len, &msg->msg_iter); - if (n != vnet_hdr_len) - goto out_unlock; - - if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && - (__virtio16_to_cpu(vio_le(), vnet_hdr.csum_start) + - __virtio16_to_cpu(vio_le(), vnet_hdr.csum_offset) + 2 > - __virtio16_to_cpu(vio_le(), vnet_hdr.hdr_len))) - vnet_hdr.hdr_len = __cpu_to_virtio16(vio_le(), - __virtio16_to_cpu(vio_le(), vnet_hdr.csum_start) + - __virtio16_to_cpu(vio_le(), vnet_hdr.csum_offset) + 2); - - err = -EINVAL; - if (__virtio16_to_cpu(vio_le(), vnet_hdr.hdr_len) > len) + err = packet_snd_vnet_parse(msg, &len, &vnet_hdr); + if (err) goto out_unlock; - - if (vnet_hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) { - switch (vnet_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { - case VIRTIO_NET_HDR_GSO_TCPV4: - gso_type = SKB_GSO_TCPV4; - break; - case VIRTIO_NET_HDR_GSO_TCPV6: - gso_type = SKB_GSO_TCPV6; - break; - case VIRTIO_NET_HDR_GSO_UDP: - gso_type = SKB_GSO_UDP; - break; - default: - goto out_unlock; - } - - if (vnet_hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN) - gso_type |= SKB_GSO_TCP_ECN; - - if (vnet_hdr.gso_size == 0) - goto out_unlock; - - } } if (unlikely(sock_flag(sk, SOCK_NOFCS))) { @@ -2744,7 +2833,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) } err = -EMSGSIZE; - if (!gso_type && (len > dev->mtu + reserve + VLAN_HLEN + extra_len)) + if (!vnet_hdr.gso_type && + (len > dev->mtu + reserve + VLAN_HLEN + extra_len)) goto out_unlock; err = -ENOBUFS; @@ -2775,7 +2865,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); - if (!gso_type && (len > dev->mtu + reserve + extra_len) && + if (!vnet_hdr.gso_type && (len > dev->mtu + reserve + extra_len) && !packet_extra_vlan_len_allowed(dev, skb)) { err = -EMSGSIZE; goto out_free; @@ -2789,24 +2879,10 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) packet_pick_tx_queue(dev, skb); if (po->has_vnet_hdr) { - if (vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { - u16 s = __virtio16_to_cpu(vio_le(), vnet_hdr.csum_start); - u16 o = __virtio16_to_cpu(vio_le(), vnet_hdr.csum_offset); - if (!skb_partial_csum_set(skb, s, o)) { - err = -EINVAL; - goto out_free; - } - } - - skb_shinfo(skb)->gso_size = - __virtio16_to_cpu(vio_le(), vnet_hdr.gso_size); - skb_shinfo(skb)->gso_type = gso_type; - - /* Header must be checked, and gso_segs computed. */ - skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; - skb_shinfo(skb)->gso_segs = 0; - - len += vnet_hdr_len; + err = packet_snd_vnet_gso(skb, &vnet_hdr); + if (err) + goto out_free; + len += sizeof(vnet_hdr); } skb_probe_transport_header(skb, reserve); @@ -3177,51 +3253,10 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, packet_rcv_has_room(pkt_sk(sk), NULL); if (pkt_sk(sk)->has_vnet_hdr) { - struct virtio_net_hdr vnet_hdr = { 0 }; - - err = -EINVAL; - vnet_hdr_len = sizeof(vnet_hdr); - if (len < vnet_hdr_len) - goto out_free; - - len -= vnet_hdr_len; - - if (skb_is_gso(skb)) { - struct skb_shared_info *sinfo = skb_shinfo(skb); - - /* This is a hint as to how much should be linear. */ - vnet_hdr.hdr_len = - __cpu_to_virtio16(vio_le(), skb_headlen(skb)); - vnet_hdr.gso_size = - __cpu_to_virtio16(vio_le(), sinfo->gso_size); - if (sinfo->gso_type & SKB_GSO_TCPV4) - vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4; - else if (sinfo->gso_type & SKB_GSO_TCPV6) - vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV6; - else if (sinfo->gso_type & SKB_GSO_UDP) - vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_UDP; - else if (sinfo->gso_type & SKB_GSO_FCOE) - goto out_free; - else - BUG(); - if (sinfo->gso_type & SKB_GSO_TCP_ECN) - vnet_hdr.gso_type |= VIRTIO_NET_HDR_GSO_ECN; - } else - vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE; - - if (skb->ip_summed == CHECKSUM_PARTIAL) { - vnet_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; - vnet_hdr.csum_start = __cpu_to_virtio16(vio_le(), - skb_checksum_start_offset(skb)); - vnet_hdr.csum_offset = __cpu_to_virtio16(vio_le(), - skb->csum_offset); - } else if (skb->ip_summed == CHECKSUM_UNNECESSARY) { - vnet_hdr.flags = VIRTIO_NET_HDR_F_DATA_VALID; - } /* else everything is zero */ - - err = memcpy_to_msg(msg, (void *)&vnet_hdr, vnet_hdr_len); - if (err < 0) + err = packet_rcv_vnet(msg, skb, &len); + if (err) goto out_free; + vnet_hdr_len = sizeof(struct virtio_net_hdr); } /* You lose any data beyond the buffer you gave. If it worries From 58d19b19cd99b438541eea4cdbf5c171900b25e5 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 3 Feb 2016 18:02:15 -0500 Subject: [PATCH 2/4] packet: vnet_hdr support for tpacket_rcv Support socket option PACKET_VNET_HDR together with PACKET_RX_RING. When enabled, a struct virtio_net_hdr will precede the data in the packet ring slots. Verified with test program at github.com/wdebruij/kerneltools/blob/master/tests/psock_rxring_vnet.c pkt: 1454269209.798420 len=5066 vnet: gso_type=tcpv4 gso_size=1448 hlen=66 ecn=off csum: start=34 off=16 eth: proto=0x800 ip: src= dst= proto=6 len=5052 Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/packet/af_packet.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index bd3de7b4fb79e..b26df32cc64f6 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2206,7 +2206,9 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, unsigned int maclen = skb_network_offset(skb); netoff = TPACKET_ALIGN(po->tp_hdrlen + (maclen < 16 ? 16 : maclen)) + - po->tp_reserve; + po->tp_reserve; + if (po->has_vnet_hdr) + netoff += sizeof(struct virtio_net_hdr); macoff = netoff - maclen; } if (po->tp_version <= TPACKET_V2) { @@ -2243,7 +2245,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, h.raw = packet_current_rx_frame(po, skb, TP_STATUS_KERNEL, (macoff+snaplen)); if (!h.raw) - goto ring_is_full; + goto drop_n_account; if (po->tp_version <= TPACKET_V2) { packet_increment_rx_head(po, &po->rx_ring); /* @@ -2262,6 +2264,14 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, } spin_unlock(&sk->sk_receive_queue.lock); + if (po->has_vnet_hdr) { + if (__packet_rcv_vnet(skb, h.raw + macoff - + sizeof(struct virtio_net_hdr))) { + spin_lock(&sk->sk_receive_queue.lock); + goto drop_n_account; + } + } + skb_copy_bits(skb, 0, h.raw + macoff, snaplen); if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp))) @@ -2357,7 +2367,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, kfree_skb(skb); return 0; -ring_is_full: +drop_n_account: po->stats.stats1.tp_drops++; spin_unlock(&sk->sk_receive_queue.lock); @@ -3587,7 +3597,8 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv } if (optlen < len) return -EINVAL; - if (pkt_sk(sk)->has_vnet_hdr) + if (pkt_sk(sk)->has_vnet_hdr && + optname == PACKET_TX_RING) return -EINVAL; if (copy_from_user(&req_u.req, optval, len)) return -EFAULT; From 8d39b4a6b83c141acaf0b5e8f90fd5e67721ff90 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 3 Feb 2016 18:02:16 -0500 Subject: [PATCH 3/4] packet: parse tpacket header before skb alloc GSO packet headers must be stored in the linear skb segment. Move tpacket header parsing before sock_alloc_send_skb. The GSO follow-on patch will later increase the skb linear argument to sock_alloc_send_skb if needed for large packets. The header parsing code does not require an allocated skb, so is safe to move. Later pass to tpacket_fill_skb the computed data start and length. Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/packet/af_packet.c | 111 ++++++++++++++++++++++++----------------- 1 file changed, 65 insertions(+), 46 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index b26df32cc64f6..89377bf283c82 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2494,14 +2494,13 @@ static int packet_snd_vnet_gso(struct sk_buff *skb, } static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, - void *frame, struct net_device *dev, int size_max, + void *frame, struct net_device *dev, void *data, int tp_len, __be16 proto, unsigned char *addr, int hlen) { union tpacket_uhdr ph; - int to_write, offset, len, tp_len, nr_frags, len_max; + int to_write, offset, len, nr_frags, len_max; struct socket *sock = po->sk.sk_socket; struct page *page; - void *data; int err; ph.raw = frame; @@ -2513,51 +2512,9 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, sock_tx_timestamp(&po->sk, &skb_shinfo(skb)->tx_flags); skb_shinfo(skb)->destructor_arg = ph.raw; - switch (po->tp_version) { - case TPACKET_V2: - tp_len = ph.h2->tp_len; - break; - default: - tp_len = ph.h1->tp_len; - break; - } - if (unlikely(tp_len > size_max)) { - pr_err("packet size is too long (%d > %d)\n", tp_len, size_max); - return -EMSGSIZE; - } - skb_reserve(skb, hlen); skb_reset_network_header(skb); - if (unlikely(po->tp_tx_has_off)) { - int off_min, off_max, off; - off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll); - off_max = po->tx_ring.frame_size - tp_len; - if (sock->type == SOCK_DGRAM) { - switch (po->tp_version) { - case TPACKET_V2: - off = ph.h2->tp_net; - break; - default: - off = ph.h1->tp_net; - break; - } - } else { - switch (po->tp_version) { - case TPACKET_V2: - off = ph.h2->tp_mac; - break; - default: - off = ph.h1->tp_mac; - break; - } - } - if (unlikely((off < off_min) || (off_max < off))) - return -EINVAL; - data = ph.raw + off; - } else { - data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll); - } to_write = tp_len; if (sock->type == SOCK_DGRAM) { @@ -2615,6 +2572,61 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, return tp_len; } +static int tpacket_parse_header(struct packet_sock *po, void *frame, + int size_max, void **data) +{ + union tpacket_uhdr ph; + int tp_len, off; + + ph.raw = frame; + + switch (po->tp_version) { + case TPACKET_V2: + tp_len = ph.h2->tp_len; + break; + default: + tp_len = ph.h1->tp_len; + break; + } + if (unlikely(tp_len > size_max)) { + pr_err("packet size is too long (%d > %d)\n", tp_len, size_max); + return -EMSGSIZE; + } + + if (unlikely(po->tp_tx_has_off)) { + int off_min, off_max; + + off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll); + off_max = po->tx_ring.frame_size - tp_len; + if (po->sk.sk_type == SOCK_DGRAM) { + switch (po->tp_version) { + case TPACKET_V2: + off = ph.h2->tp_net; + break; + default: + off = ph.h1->tp_net; + break; + } + } else { + switch (po->tp_version) { + case TPACKET_V2: + off = ph.h2->tp_mac; + break; + default: + off = ph.h1->tp_mac; + break; + } + } + if (unlikely((off < off_min) || (off_max < off))) + return -EINVAL; + } else { + off = po->tp_hdrlen - sizeof(struct sockaddr_ll); + } + + *data = frame + off; + return tp_len; +} + static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) { struct sk_buff *skb; @@ -2626,6 +2638,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) bool need_wait = !(msg->msg_flags & MSG_DONTWAIT); int tp_len, size_max; unsigned char *addr; + void *data; int len_sum = 0; int status = TP_STATUS_AVAILABLE; int hlen, tlen; @@ -2673,6 +2686,11 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) continue; } + skb = NULL; + tp_len = tpacket_parse_header(po, ph, size_max, &data); + if (tp_len < 0) + goto tpacket_error; + status = TP_STATUS_SEND_REQUEST; hlen = LL_RESERVED_SPACE(dev); tlen = dev->needed_tailroom; @@ -2686,7 +2704,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) err = len_sum; goto out_status; } - tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto, + tp_len = tpacket_fill_skb(po, skb, ph, dev, data, tp_len, proto, addr, hlen); if (likely(tp_len >= 0) && tp_len > dev->mtu + reserve && @@ -2694,6 +2712,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) tp_len = -EMSGSIZE; if (unlikely(tp_len < 0)) { +tpacket_error: if (po->tp_loss) { __packet_set_status(po, ph, TP_STATUS_AVAILABLE); From 1d036d25e5609ba73fee6a88db01c306b140d512 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 3 Feb 2016 18:02:17 -0500 Subject: [PATCH 4/4] packet: tpacket_snd gso and checksum offload Support socket option PACKET_VNET_HDR together with PACKET_TX_RING. When enabled, a struct virtio_net_hdr is expected to precede the data in the ring. The vnet option must be set before the ring is created. The implementation reuses the existing skb_copy_bits code that is used when dev->hard_header_len is non-zero. Move this ll_header check to before the skb alloc and combine it with a test for vnet_hdr->hdr_len. Allocate and copy the max of the two. Verified with test program at github.com/wdebruij/kerneltools/blob/master/tests/psock_txring_vnet.c Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/packet/af_packet.c | 53 +++++++++++++++++++++++++++++------------- 1 file changed, 37 insertions(+), 16 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 89377bf283c82..b7e7851ddc5d0 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2495,7 +2495,7 @@ static int packet_snd_vnet_gso(struct sk_buff *skb, static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, void *frame, struct net_device *dev, void *data, int tp_len, - __be16 proto, unsigned char *addr, int hlen) + __be16 proto, unsigned char *addr, int hlen, int copylen) { union tpacket_uhdr ph; int to_write, offset, len, nr_frags, len_max; @@ -2522,20 +2522,17 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, NULL, tp_len); if (unlikely(err < 0)) return -EINVAL; - } else if (dev->hard_header_len) { - if (ll_header_truncated(dev, tp_len)) - return -EINVAL; - + } else if (copylen) { skb_push(skb, dev->hard_header_len); - err = skb_store_bits(skb, 0, data, - dev->hard_header_len); + skb_put(skb, copylen - dev->hard_header_len); + err = skb_store_bits(skb, 0, data, copylen); if (unlikely(err)) return err; if (!skb->protocol) tpacket_set_protocol(dev, skb); - data += dev->hard_header_len; - to_write -= dev->hard_header_len; + data += copylen; + to_write -= copylen; } offset = offset_in_page(data); @@ -2631,6 +2628,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) { struct sk_buff *skb; struct net_device *dev; + struct virtio_net_hdr *vnet_hdr = NULL; __be16 proto; int err, reserve = 0; void *ph; @@ -2641,7 +2639,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) void *data; int len_sum = 0; int status = TP_STATUS_AVAILABLE; - int hlen, tlen; + int hlen, tlen, copylen = 0; mutex_lock(&po->pg_vec_lock); @@ -2674,7 +2672,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) size_max = po->tx_ring.frame_size - (po->tp_hdrlen - sizeof(struct sockaddr_ll)); - if (size_max > dev->mtu + reserve + VLAN_HLEN) + if ((size_max > dev->mtu + reserve + VLAN_HLEN) && !po->has_vnet_hdr) size_max = dev->mtu + reserve + VLAN_HLEN; do { @@ -2694,8 +2692,28 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) status = TP_STATUS_SEND_REQUEST; hlen = LL_RESERVED_SPACE(dev); tlen = dev->needed_tailroom; + if (po->has_vnet_hdr) { + vnet_hdr = data; + data += sizeof(*vnet_hdr); + tp_len -= sizeof(*vnet_hdr); + if (tp_len < 0 || + __packet_snd_vnet_parse(vnet_hdr, tp_len)) { + tp_len = -EINVAL; + goto tpacket_error; + } + copylen = __virtio16_to_cpu(vio_le(), + vnet_hdr->hdr_len); + } + if (dev->hard_header_len) { + if (ll_header_truncated(dev, tp_len)) { + tp_len = -EINVAL; + goto tpacket_error; + } + copylen = max_t(int, copylen, dev->hard_header_len); + } skb = sock_alloc_send_skb(&po->sk, - hlen + tlen + sizeof(struct sockaddr_ll), + hlen + tlen + sizeof(struct sockaddr_ll) + + (copylen - dev->hard_header_len), !need_wait, &err); if (unlikely(skb == NULL)) { @@ -2705,9 +2723,10 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) goto out_status; } tp_len = tpacket_fill_skb(po, skb, ph, dev, data, tp_len, proto, - addr, hlen); + addr, hlen, copylen); if (likely(tp_len >= 0) && tp_len > dev->mtu + reserve && + !po->has_vnet_hdr && !packet_extra_vlan_len_allowed(dev, skb)) tp_len = -EMSGSIZE; @@ -2726,6 +2745,11 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) } } + if (po->has_vnet_hdr && packet_snd_vnet_gso(skb, vnet_hdr)) { + tp_len = -EINVAL; + goto tpacket_error; + } + packet_pick_tx_queue(dev, skb); skb->destructor = tpacket_destruct_skb; @@ -3616,9 +3640,6 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv } if (optlen < len) return -EINVAL; - if (pkt_sk(sk)->has_vnet_hdr && - optname == PACKET_TX_RING) - return -EINVAL; if (copy_from_user(&req_u.req, optval, len)) return -EFAULT; return packet_set_ring(sk, &req_u, 0,