From 366e41d9774d7010cb63112b6db2fce6dc7809c0 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Sat, 31 Jan 2015 10:40:13 -0500 Subject: [PATCH 1/6] ipv6: pull cork initialization into its own function. Pull IPv6 cork initialization into its own function that can be re-used. IPv6 specific cork data did not have an explicit data structure. This patch creats eone so that just ipv6 cork data can be as arguemts. Also, since IPv6 tries to save the flow label into inet_cork_full tructure, pass the full cork. Adjust ip6_cork_release() to take cork data structures. Signed-off-by: Vladislav Yasevich Signed-off-by: David S. Miller --- include/linux/ipv6.h | 12 ++-- net/ipv6/ip6_output.c | 158 ++++++++++++++++++++++++------------------ 2 files changed, 96 insertions(+), 74 deletions(-) diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 2805062c013fa..4d5169f5d7d18 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -125,6 +125,12 @@ struct ipv6_mc_socklist; struct ipv6_ac_socklist; struct ipv6_fl_socklist; +struct inet6_cork { + struct ipv6_txoptions *opt; + u8 hop_limit; + u8 tclass; +}; + /** * struct ipv6_pinfo - ipv6 private area * @@ -217,11 +223,7 @@ struct ipv6_pinfo { struct ipv6_txoptions *opt; struct sk_buff *pktoptions; struct sk_buff *rxpmtu; - struct { - struct ipv6_txoptions *opt; - u8 hop_limit; - u8 tclass; - } cork; + struct inet6_cork cork; }; /* WARNING: don't change the layout of the members in {raw,udp,tcp}6_sock! */ diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index ce69a12ae48c2..f9f08c43c6e15 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1135,6 +1135,74 @@ static void ip6_append_data_mtu(unsigned int *mtu, } } +static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, + struct inet6_cork *v6_cork, + int hlimit, int tclass, struct ipv6_txoptions *opt, + struct rt6_info *rt, struct flowi6 *fl6) +{ + struct ipv6_pinfo *np = inet6_sk(sk); + unsigned int mtu; + + /* + * setup for corking + */ + if (opt) { + if (WARN_ON(v6_cork->opt)) + return -EINVAL; + + v6_cork->opt = kzalloc(opt->tot_len, sk->sk_allocation); + if (unlikely(v6_cork->opt == NULL)) + return -ENOBUFS; + + v6_cork->opt->tot_len = opt->tot_len; + v6_cork->opt->opt_flen = opt->opt_flen; + v6_cork->opt->opt_nflen = opt->opt_nflen; + + v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt, + sk->sk_allocation); + if (opt->dst0opt && !v6_cork->opt->dst0opt) + return -ENOBUFS; + + v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt, + sk->sk_allocation); + if (opt->dst1opt && !v6_cork->opt->dst1opt) + return -ENOBUFS; + + v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt, + sk->sk_allocation); + if (opt->hopopt && !v6_cork->opt->hopopt) + return -ENOBUFS; + + v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt, + sk->sk_allocation); + if (opt->srcrt && !v6_cork->opt->srcrt) + return -ENOBUFS; + + /* need source address above miyazawa*/ + } + dst_hold(&rt->dst); + cork->base.dst = &rt->dst; + cork->fl.u.ip6 = *fl6; + v6_cork->hop_limit = hlimit; + v6_cork->tclass = tclass; + if (rt->dst.flags & DST_XFRM_TUNNEL) + mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? + rt->dst.dev->mtu : dst_mtu(&rt->dst); + else + mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? + rt->dst.dev->mtu : dst_mtu(rt->dst.path); + if (np->frag_size < mtu) { + if (np->frag_size) + mtu = np->frag_size; + } + cork->base.fragsize = mtu; + if (dst_allfrag(rt->dst.path)) + cork->base.flags |= IPCORK_ALLFRAG; + cork->base.length = 0; + + return 0; +} + int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int transhdrlen, @@ -1162,59 +1230,10 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, /* * setup for corking */ - if (opt) { - if (WARN_ON(np->cork.opt)) - return -EINVAL; - - np->cork.opt = kzalloc(opt->tot_len, sk->sk_allocation); - if (unlikely(np->cork.opt == NULL)) - return -ENOBUFS; - - np->cork.opt->tot_len = opt->tot_len; - np->cork.opt->opt_flen = opt->opt_flen; - np->cork.opt->opt_nflen = opt->opt_nflen; - - np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt, - sk->sk_allocation); - if (opt->dst0opt && !np->cork.opt->dst0opt) - return -ENOBUFS; - - np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt, - sk->sk_allocation); - if (opt->dst1opt && !np->cork.opt->dst1opt) - return -ENOBUFS; - - np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt, - sk->sk_allocation); - if (opt->hopopt && !np->cork.opt->hopopt) - return -ENOBUFS; - - np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt, - sk->sk_allocation); - if (opt->srcrt && !np->cork.opt->srcrt) - return -ENOBUFS; - - /* need source address above miyazawa*/ - } - dst_hold(&rt->dst); - cork->dst = &rt->dst; - inet->cork.fl.u.ip6 = *fl6; - np->cork.hop_limit = hlimit; - np->cork.tclass = tclass; - if (rt->dst.flags & DST_XFRM_TUNNEL) - mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? - rt->dst.dev->mtu : dst_mtu(&rt->dst); - else - mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? - rt->dst.dev->mtu : dst_mtu(rt->dst.path); - if (np->frag_size < mtu) { - if (np->frag_size) - mtu = np->frag_size; - } - cork->fragsize = mtu; - if (dst_allfrag(rt->dst.path)) - cork->flags |= IPCORK_ALLFRAG; - cork->length = 0; + err = ip6_setup_cork(sk, &inet->cork, &np->cork, hlimit, + tclass, opt, rt, fl6); + if (err) + return err; exthdrlen = (opt ? opt->opt_flen : 0); length += exthdrlen; transhdrlen += exthdrlen; @@ -1226,8 +1245,8 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, transhdrlen = 0; exthdrlen = 0; dst_exthdrlen = 0; - mtu = cork->fragsize; } + mtu = cork->fragsize; orig_mtu = mtu; hh_len = LL_RESERVED_SPACE(rt->dst.dev); @@ -1503,23 +1522,24 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, } EXPORT_SYMBOL_GPL(ip6_append_data); -static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np) +static void ip6_cork_release(struct inet_cork_full *cork, + struct inet6_cork *v6_cork) { - if (np->cork.opt) { - kfree(np->cork.opt->dst0opt); - kfree(np->cork.opt->dst1opt); - kfree(np->cork.opt->hopopt); - kfree(np->cork.opt->srcrt); - kfree(np->cork.opt); - np->cork.opt = NULL; + if (v6_cork->opt) { + kfree(v6_cork->opt->dst0opt); + kfree(v6_cork->opt->dst1opt); + kfree(v6_cork->opt->hopopt); + kfree(v6_cork->opt->srcrt); + kfree(v6_cork->opt); + v6_cork->opt = NULL; } - if (inet->cork.base.dst) { - dst_release(inet->cork.base.dst); - inet->cork.base.dst = NULL; - inet->cork.base.flags &= ~IPCORK_ALLFRAG; + if (cork->base.dst) { + dst_release(cork->base.dst); + cork->base.dst = NULL; + cork->base.flags &= ~IPCORK_ALLFRAG; } - memset(&inet->cork.fl, 0, sizeof(inet->cork.fl)); + memset(&cork->fl, 0, sizeof(cork->fl)); } int ip6_push_pending_frames(struct sock *sk) @@ -1599,7 +1619,7 @@ int ip6_push_pending_frames(struct sock *sk) } out: - ip6_cork_release(inet, np); + ip6_cork_release(&inet->cork, &np->cork); return err; error: IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); @@ -1618,6 +1638,6 @@ void ip6_flush_pending_frames(struct sock *sk) kfree_skb(skb); } - ip6_cork_release(inet_sk(sk), inet6_sk(sk)); + ip6_cork_release(&inet_sk(sk)->cork, &inet6_sk(sk)->cork); } EXPORT_SYMBOL_GPL(ip6_flush_pending_frames); From 0bbe84a67b0b5460c2e67101b3aa95cb828845f3 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Sat, 31 Jan 2015 10:40:14 -0500 Subject: [PATCH 2/6] ipv6: Append sending data to arbitrary queue Add the ability to append data to arbitrary queue. This will be needed later to implement lockless UDP sends. Signed-off-by: Vladislav Yasevich Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 106 ++++++++++++++++++++++++++---------------- 1 file changed, 67 insertions(+), 39 deletions(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index f9f08c43c6e15..1b66453a695d1 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1041,6 +1041,7 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); static inline int ip6_ufo_append_data(struct sock *sk, + struct sk_buff_head *queue, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int hh_len, int fragheaderlen, @@ -1056,7 +1057,7 @@ static inline int ip6_ufo_append_data(struct sock *sk, * device, so create one single skb packet containing complete * udp datagram */ - skb = skb_peek_tail(&sk->sk_write_queue); + skb = skb_peek_tail(queue); if (skb == NULL) { skb = sock_alloc_send_skb(sk, hh_len + fragheaderlen + transhdrlen + 20, @@ -1079,7 +1080,7 @@ static inline int ip6_ufo_append_data(struct sock *sk, skb->protocol = htons(ETH_P_IPV6); skb->csum = 0; - __skb_queue_tail(&sk->sk_write_queue, skb); + __skb_queue_tail(queue, skb); } else if (skb_is_gso(skb)) { goto append; } @@ -1203,49 +1204,36 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, return 0; } -int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, - int offset, int len, int odd, struct sk_buff *skb), - void *from, int length, int transhdrlen, - int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6, - struct rt6_info *rt, unsigned int flags, int dontfrag) +static int __ip6_append_data(struct sock *sk, + struct flowi6 *fl6, + struct sk_buff_head *queue, + struct inet_cork *cork, + struct inet6_cork *v6_cork, + struct page_frag *pfrag, + int getfrag(void *from, char *to, int offset, + int len, int odd, struct sk_buff *skb), + void *from, int length, int transhdrlen, + unsigned int flags, int dontfrag) { - struct inet_sock *inet = inet_sk(sk); - struct ipv6_pinfo *np = inet6_sk(sk); - struct inet_cork *cork; struct sk_buff *skb, *skb_prev = NULL; unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu; - int exthdrlen; - int dst_exthdrlen; + int exthdrlen = 0; + int dst_exthdrlen = 0; int hh_len; int copy; int err; int offset = 0; __u8 tx_flags = 0; u32 tskey = 0; + struct rt6_info *rt = (struct rt6_info *)cork->dst; + struct ipv6_txoptions *opt = v6_cork->opt; - if (flags&MSG_PROBE) - return 0; - cork = &inet->cork.base; - if (skb_queue_empty(&sk->sk_write_queue)) { - /* - * setup for corking - */ - err = ip6_setup_cork(sk, &inet->cork, &np->cork, hlimit, - tclass, opt, rt, fl6); - if (err) - return err; - exthdrlen = (opt ? opt->opt_flen : 0); - length += exthdrlen; - transhdrlen += exthdrlen; + skb = skb_peek_tail(queue); + if (!skb) { + exthdrlen = opt ? opt->opt_flen : 0; dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len; - } else { - rt = (struct rt6_info *)cork->dst; - fl6 = &inet->cork.fl.u.ip6; - opt = np->cork.opt; - transhdrlen = 0; - exthdrlen = 0; - dst_exthdrlen = 0; } + mtu = cork->fragsize; orig_mtu = mtu; @@ -1311,13 +1299,12 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, * --yoshfuji */ - skb = skb_peek_tail(&sk->sk_write_queue); cork->length += length; if (((length > mtu) || (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO)) { - err = ip6_ufo_append_data(sk, getfrag, from, length, + err = ip6_ufo_append_data(sk, queue, getfrag, from, length, hh_len, fragheaderlen, transhdrlen, mtu, flags, rt); if (err) @@ -1458,7 +1445,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, /* * Put the packet on the pending queue */ - __skb_queue_tail(&sk->sk_write_queue, skb); + __skb_queue_tail(queue, skb); continue; } @@ -1477,7 +1464,6 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, } } else { int i = skb_shinfo(skb)->nr_frags; - struct page_frag *pfrag = sk_page_frag(sk); err = -ENOMEM; if (!sk_page_frag_refill(sk, pfrag)) @@ -1520,6 +1506,42 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); return err; } + +int ip6_append_data(struct sock *sk, + int getfrag(void *from, char *to, int offset, int len, + int odd, struct sk_buff *skb), + void *from, int length, int transhdrlen, int hlimit, + int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6, + struct rt6_info *rt, unsigned int flags, int dontfrag) +{ + struct inet_sock *inet = inet_sk(sk); + struct ipv6_pinfo *np = inet6_sk(sk); + int exthdrlen; + int err; + + if (flags&MSG_PROBE) + return 0; + if (skb_queue_empty(&sk->sk_write_queue)) { + /* + * setup for corking + */ + err = ip6_setup_cork(sk, &inet->cork, &np->cork, hlimit, + tclass, opt, rt, fl6); + if (err) + return err; + + exthdrlen = (opt ? opt->opt_flen : 0); + length += exthdrlen; + transhdrlen += exthdrlen; + } else { + fl6 = &inet->cork.fl.u.ip6; + transhdrlen = 0; + } + + return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base, + &np->cork, sk_page_frag(sk), getfrag, + from, length, transhdrlen, flags, dontfrag); +} EXPORT_SYMBOL_GPL(ip6_append_data); static void ip6_cork_release(struct inet_cork_full *cork, @@ -1627,11 +1649,12 @@ int ip6_push_pending_frames(struct sock *sk) } EXPORT_SYMBOL_GPL(ip6_push_pending_frames); -void ip6_flush_pending_frames(struct sock *sk) +static void __ip6_flush_pending_frames(struct sock *sk, + struct sk_buff_head *queue) { struct sk_buff *skb; - while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) { + while ((skb = __skb_dequeue_tail(queue)) != NULL) { if (skb_dst(skb)) IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUTDISCARDS); @@ -1640,4 +1663,9 @@ void ip6_flush_pending_frames(struct sock *sk) ip6_cork_release(&inet_sk(sk)->cork, &inet6_sk(sk)->cork); } + +void ip6_flush_pending_frames(struct sock *sk) +{ + __ip6_flush_pending_frames(sk, &sk->sk_write_queue); +} EXPORT_SYMBOL_GPL(ip6_flush_pending_frames); From 6422398c2ab09268a55112f98cbf96bbf0184328 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Sat, 31 Jan 2015 10:40:15 -0500 Subject: [PATCH 3/6] ipv6: introduce ipv6_make_skb This commit is very similar to commit 1c32c5ad6fac8cee1a77449f5abf211e911ff830 Author: Herbert Xu Date: Tue Mar 1 02:36:47 2011 +0000 inet: Add ip_make_skb and ip_finish_skb It adds IPv6 version of the helpers ip6_make_skb and ip6_finish_skb. The job of ip6_make_skb is to collect messages into an ipv6 packet and poplulate ipv6 eader. The job of ip6_finish_skb is to transmit the generated skb. Together they replicated the job of ip6_push_pending_frames() while also provide the capability to be called independently. This will be needed to add lockless UDP sendmsg support. Signed-off-by: Vladislav Yasevich Signed-off-by: David S. Miller --- include/net/ipv6.h | 19 ++++++++ net/ipv6/ip6_output.c | 103 ++++++++++++++++++++++++++++++++++-------- 2 files changed, 103 insertions(+), 19 deletions(-) diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 4292929392b01..8027ca53e31f5 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -788,6 +788,25 @@ int ip6_push_pending_frames(struct sock *sk); void ip6_flush_pending_frames(struct sock *sk); +int ip6_send_skb(struct sk_buff *skb); + +struct sk_buff *__ip6_make_skb(struct sock *sk, struct sk_buff_head *queue, + struct inet_cork_full *cork, + struct inet6_cork *v6_cork); +struct sk_buff *ip6_make_skb(struct sock *sk, + int getfrag(void *from, char *to, int offset, + int len, int odd, struct sk_buff *skb), + void *from, int length, int transhdrlen, + int hlimit, int tclass, struct ipv6_txoptions *opt, + struct flowi6 *fl6, struct rt6_info *rt, + unsigned int flags, int dontfrag); + +static inline struct sk_buff *ip6_finish_skb(struct sock *sk) +{ + return __ip6_make_skb(sk, &sk->sk_write_queue, &inet_sk(sk)->cork, + &inet6_sk(sk)->cork); +} + int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6); struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, const struct in6_addr *final_dst); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 1b66453a695d1..b89d3c27dac7a 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1564,22 +1564,23 @@ static void ip6_cork_release(struct inet_cork_full *cork, memset(&cork->fl, 0, sizeof(cork->fl)); } -int ip6_push_pending_frames(struct sock *sk) +struct sk_buff *__ip6_make_skb(struct sock *sk, + struct sk_buff_head *queue, + struct inet_cork_full *cork, + struct inet6_cork *v6_cork) { struct sk_buff *skb, *tmp_skb; struct sk_buff **tail_skb; struct in6_addr final_dst_buf, *final_dst = &final_dst_buf; - struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct net *net = sock_net(sk); struct ipv6hdr *hdr; - struct ipv6_txoptions *opt = np->cork.opt; - struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst; - struct flowi6 *fl6 = &inet->cork.fl.u.ip6; + struct ipv6_txoptions *opt = v6_cork->opt; + struct rt6_info *rt = (struct rt6_info *)cork->base.dst; + struct flowi6 *fl6 = &cork->fl.u.ip6; unsigned char proto = fl6->flowi6_proto; - int err = 0; - skb = __skb_dequeue(&sk->sk_write_queue); + skb = __skb_dequeue(queue); if (skb == NULL) goto out; tail_skb = &(skb_shinfo(skb)->frag_list); @@ -1587,7 +1588,7 @@ int ip6_push_pending_frames(struct sock *sk) /* move skb->data to ip header from ext header */ if (skb->data < skb_network_header(skb)) __skb_pull(skb, skb_network_offset(skb)); - while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { + while ((tmp_skb = __skb_dequeue(queue)) != NULL) { __skb_pull(tmp_skb, skb_network_header_len(skb)); *tail_skb = tmp_skb; tail_skb = &(tmp_skb->next); @@ -1612,10 +1613,10 @@ int ip6_push_pending_frames(struct sock *sk) skb_reset_network_header(skb); hdr = ipv6_hdr(skb); - ip6_flow_hdr(hdr, np->cork.tclass, + ip6_flow_hdr(hdr, v6_cork->tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, np->autoflowlabel)); - hdr->hop_limit = np->cork.hop_limit; + hdr->hop_limit = v6_cork->hop_limit; hdr->nexthdr = proto; hdr->saddr = fl6->saddr; hdr->daddr = *final_dst; @@ -1632,25 +1633,45 @@ int ip6_push_pending_frames(struct sock *sk) ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); } + ip6_cork_release(cork, v6_cork); +out: + return skb; +} + +int ip6_send_skb(struct sk_buff *skb) +{ + struct net *net = sock_net(skb->sk); + struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); + int err; + err = ip6_local_out(skb); if (err) { if (err > 0) err = net_xmit_errno(err); if (err) - goto error; + IP6_INC_STATS(net, rt->rt6i_idev, + IPSTATS_MIB_OUTDISCARDS); } -out: - ip6_cork_release(&inet->cork, &np->cork); return err; -error: - IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); - goto out; +} + +int ip6_push_pending_frames(struct sock *sk) +{ + struct sk_buff *skb; + + skb = ip6_finish_skb(sk); + if (!skb) + return 0; + + return ip6_send_skb(skb); } EXPORT_SYMBOL_GPL(ip6_push_pending_frames); static void __ip6_flush_pending_frames(struct sock *sk, - struct sk_buff_head *queue) + struct sk_buff_head *queue, + struct inet_cork_full *cork, + struct inet6_cork *v6_cork) { struct sk_buff *skb; @@ -1661,11 +1682,55 @@ static void __ip6_flush_pending_frames(struct sock *sk, kfree_skb(skb); } - ip6_cork_release(&inet_sk(sk)->cork, &inet6_sk(sk)->cork); + ip6_cork_release(cork, v6_cork); } void ip6_flush_pending_frames(struct sock *sk) { - __ip6_flush_pending_frames(sk, &sk->sk_write_queue); + __ip6_flush_pending_frames(sk, &sk->sk_write_queue, + &inet_sk(sk)->cork, &inet6_sk(sk)->cork); } EXPORT_SYMBOL_GPL(ip6_flush_pending_frames); + +struct sk_buff *ip6_make_skb(struct sock *sk, + int getfrag(void *from, char *to, int offset, + int len, int odd, struct sk_buff *skb), + void *from, int length, int transhdrlen, + int hlimit, int tclass, + struct ipv6_txoptions *opt, struct flowi6 *fl6, + struct rt6_info *rt, unsigned int flags, + int dontfrag) +{ + struct inet_cork_full cork; + struct inet6_cork v6_cork; + struct sk_buff_head queue; + int exthdrlen = (opt ? opt->opt_flen : 0); + int err; + + if (flags & MSG_PROBE) + return NULL; + + __skb_queue_head_init(&queue); + + cork.base.flags = 0; + cork.base.addr = 0; + cork.base.opt = NULL; + v6_cork.opt = NULL; + err = ip6_setup_cork(sk, &cork, &v6_cork, hlimit, tclass, opt, rt, fl6); + if (err) + return ERR_PTR(err); + + if (dontfrag < 0) + dontfrag = inet6_sk(sk)->dontfrag; + + err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork, + ¤t->task_frag, getfrag, from, + length + exthdrlen, transhdrlen + exthdrlen, + flags, dontfrag); + if (err) { + __ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork); + return ERR_PTR(err); + } + + return __ip6_make_skb(sk, &queue, &cork, &v6_cork); +} From d39d938c8228a4c5860138a53cf2b9ae4c4baec2 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Sat, 31 Jan 2015 10:40:16 -0500 Subject: [PATCH 4/6] ipv6: Introduce udpv6_send_skb() Now that we can individually construct IPv6 skbs to send, add a udpv6_send_skb() function to populate the udp header and send the skb. This allows udp_v6_push_pending_frames() to re-use this function as well as enables us to add lockless sendmsg() support. Signed-off-by: Vladislav Yasevich Signed-off-by: David S. Miller --- net/ipv6/udp.c | 67 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 40 insertions(+), 27 deletions(-) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index e41f017cd479c..67a3d70f7ac48 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -990,9 +990,10 @@ static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, { unsigned int offset; struct udphdr *uh = udp_hdr(skb); + struct sk_buff *frags = skb_shinfo(skb)->frag_list; __wsum csum = 0; - if (skb_queue_len(&sk->sk_write_queue) == 1) { + if (!frags) { /* Only one fragment on the socket. */ skb->csum_start = skb_transport_header(skb) - skb->head; skb->csum_offset = offsetof(struct udphdr, check); @@ -1008,9 +1009,9 @@ static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, skb->ip_summed = CHECKSUM_NONE; - skb_queue_walk(&sk->sk_write_queue, skb) { - csum = csum_add(csum, skb->csum); - } + do { + csum = csum_add(csum, frags->csum); + } while ((frags = frags->next)); uh->check = csum_ipv6_magic(saddr, daddr, len, IPPROTO_UDP, csum); @@ -1023,26 +1024,15 @@ static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, * Sending */ -static int udp_v6_push_pending_frames(struct sock *sk) +static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6) { - struct sk_buff *skb; + struct sock *sk = skb->sk; struct udphdr *uh; - struct udp_sock *up = udp_sk(sk); - struct inet_sock *inet = inet_sk(sk); - struct flowi6 *fl6; int err = 0; int is_udplite = IS_UDPLITE(sk); __wsum csum = 0; - - if (up->pending == AF_INET) - return udp_push_pending_frames(sk); - - fl6 = &inet->cork.fl.u.ip6; - - /* Grab the skbuff where UDP header space exists. */ - skb = skb_peek(&sk->sk_write_queue); - if (skb == NULL) - goto out; + int offset = skb_transport_offset(skb); + int len = skb->len - offset; /* * Create a UDP header @@ -1050,29 +1040,28 @@ static int udp_v6_push_pending_frames(struct sock *sk) uh = udp_hdr(skb); uh->source = fl6->fl6_sport; uh->dest = fl6->fl6_dport; - uh->len = htons(up->len); + uh->len = htons(len); uh->check = 0; if (is_udplite) - csum = udplite_csum_outgoing(sk, skb); - else if (up->no_check6_tx) { /* UDP csum disabled */ + csum = udplite_csum(skb); + else if (udp_sk(sk)->no_check6_tx) { /* UDP csum disabled */ skb->ip_summed = CHECKSUM_NONE; goto send; } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ - udp6_hwcsum_outgoing(sk, skb, &fl6->saddr, &fl6->daddr, - up->len); + udp6_hwcsum_outgoing(sk, skb, &fl6->saddr, &fl6->daddr, len); goto send; } else - csum = udp_csum_outgoing(sk, skb); + csum = udp_csum(skb); /* add protocol-dependent pseudo-header */ uh->check = csum_ipv6_magic(&fl6->saddr, &fl6->daddr, - up->len, fl6->flowi6_proto, csum); + len, fl6->flowi6_proto, csum); if (uh->check == 0) uh->check = CSUM_MANGLED_0; send: - err = ip6_push_pending_frames(sk); + err = ip6_send_skb(skb); if (err) { if (err == -ENOBUFS && !inet6_sk(sk)->recverr) { UDP6_INC_STATS_USER(sock_net(sk), @@ -1082,6 +1071,30 @@ static int udp_v6_push_pending_frames(struct sock *sk) } else UDP6_INC_STATS_USER(sock_net(sk), UDP_MIB_OUTDATAGRAMS, is_udplite); + return err; +} + +static int udp_v6_push_pending_frames(struct sock *sk) +{ + struct sk_buff *skb; + struct udp_sock *up = udp_sk(sk); + struct flowi6 fl6; + int err = 0; + + if (up->pending == AF_INET) + return udp_push_pending_frames(sk); + + /* ip6_finish_skb will release the cork, so make a copy of + * fl6 here. + */ + fl6 = inet_sk(sk)->cork.fl.u.ip6; + + skb = ip6_finish_skb(sk); + if (!skb) + goto out; + + err = udp_v6_send_skb(skb, &fl6); + out: up->len = 0; up->pending = 0; From 03485f2adcde0c2d4e9228b659be78e872486bbb Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Sat, 31 Jan 2015 10:40:17 -0500 Subject: [PATCH 5/6] udpv6: Add lockless sendmsg() support This commit adds the same functionaliy to IPv6 that commit 903ab86d195cca295379699299c5fc10beba31c7 Author: Herbert Xu Date: Tue Mar 1 02:36:48 2011 +0000 udp: Add lockless transmit path added to IPv4. UDP transmit path can now run without a socket lock, thus allowing multiple threads to send to a single socket more efficiently. This is only used when corking/MSG_MORE is not used. Signed-off-by: Vladislav Yasevich Signed-off-by: David S. Miller --- net/ipv6/udp.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 67a3d70f7ac48..d048d46779fc5 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1177,6 +1177,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, if (len > INT_MAX - sizeof(struct udphdr)) return -EMSGSIZE; + getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; if (up->pending) { /* * There are pending frames. @@ -1307,6 +1308,20 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, goto do_confirm; back_from_confirm: + /* Lockless fast path for the non-corking case */ + if (!corkreq) { + struct sk_buff *skb; + + skb = ip6_make_skb(sk, getfrag, msg, ulen, + sizeof(struct udphdr), hlimit, tclass, opt, + &fl6, (struct rt6_info *)dst, + msg->msg_flags, dontfrag); + err = PTR_ERR(skb); + if (!IS_ERR_OR_NULL(skb)) + err = udp_v6_send_skb(skb, &fl6); + goto release_dst; + } + lock_sock(sk); if (unlikely(up->pending)) { /* The socket is already corked while preparing it. */ @@ -1324,7 +1339,6 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, if (dontfrag < 0) dontfrag = np->dontfrag; up->len += ulen; - getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr), hlimit, tclass, opt, &fl6, (struct rt6_info *)dst, @@ -1336,6 +1350,11 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, else if (unlikely(skb_queue_empty(&sk->sk_write_queue))) up->pending = 0; + if (err > 0) + err = np->recverr ? net_xmit_errno(err) : 0; + release_sock(sk); + +release_dst: if (dst) { if (connected) { ip6_dst_store(sk, dst, @@ -1352,9 +1371,6 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, dst = NULL; } - if (err > 0) - err = np->recverr ? net_xmit_errno(err) : 0; - release_sock(sk); out: dst_release(dst); fl6_sock_release(flowlabel); From 32dce968dd987adfb0c00946d78dad9154f64759 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Sat, 31 Jan 2015 10:40:18 -0500 Subject: [PATCH 6/6] ipv6: Allow for partial checksums on non-ufo packets Currntly, if we are not doing UFO on the packet, all UDP packets will start with CHECKSUM_NONE and thus perform full checksum computations in software even if device support IPv6 checksum offloading. Let's start start with CHECKSUM_PARTIAL if the device supports it and we are sending only a single packet at or below mtu size. Signed-off-by: Vladislav Yasevich Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index b89d3c27dac7a..1a036f35d833e 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1227,6 +1227,7 @@ static int __ip6_append_data(struct sock *sk, u32 tskey = 0; struct rt6_info *rt = (struct rt6_info *)cork->dst; struct ipv6_txoptions *opt = v6_cork->opt; + int csummode = CHECKSUM_NONE; skb = skb_peek_tail(queue); if (!skb) { @@ -1283,6 +1284,14 @@ static int __ip6_append_data(struct sock *sk, tskey = sk->sk_tskey++; } + /* If this is the first and only packet and device + * supports checksum offloading, let's use it. + */ + if (!skb && + length + fragheaderlen < mtu && + rt->dst.dev->features & NETIF_F_V6_CSUM && + !exthdrlen) + csummode = CHECKSUM_PARTIAL; /* * Let's try using as much space as possible. * Use MTU if total length of the message fits into the MTU. @@ -1395,7 +1404,7 @@ static int __ip6_append_data(struct sock *sk, * Fill in the control structures */ skb->protocol = htons(ETH_P_IPV6); - skb->ip_summed = CHECKSUM_NONE; + skb->ip_summed = csummode; skb->csum = 0; /* reserve for fragmentation and ipsec header */ skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +