Skip to content

Commit

Permalink
net: avoid a pair of dst_hold()/dst_release() in ip_append_data()
Browse files Browse the repository at this point in the history
We can reduce pressure on dst entry refcount that slowdown UDP transmit
path on SMP machines. This pressure is visible on RTP servers when
delivering content to mediagateways, especially big ones, handling
thousand of streams. Several cpus send UDP frames to the same
destination, hence use the same dst entry.

This patch makes ip_append_data() eventually steal the refcount its
callers had to take on the dst entry.

This doesnt avoid all refcounting, but still gives speedups on SMP,
on UDP/RAW transmit path

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
Eric Dumazet authored and David S. Miller committed Nov 24, 2008
1 parent 4db0acf commit 2e77d89
Show file tree
Hide file tree
Showing 5 changed files with 15 additions and 10 deletions.
2 changes: 1 addition & 1 deletion include/net/ip.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ extern int ip_append_data(struct sock *sk,
int odd, struct sk_buff *skb),
void *from, int len, int protolen,
struct ipcm_cookie *ipc,
struct rtable *rt,
struct rtable **rt,
unsigned int flags);
extern int ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb);
extern ssize_t ip_append_page(struct sock *sk, struct page *page,
Expand Down
8 changes: 4 additions & 4 deletions net/ipv4/icmp.c
Original file line number Diff line number Diff line change
Expand Up @@ -321,12 +321,12 @@ static int icmp_glue_bits(void *from, char *to, int offset, int len, int odd,
}

static void icmp_push_reply(struct icmp_bxm *icmp_param,
struct ipcm_cookie *ipc, struct rtable *rt)
struct ipcm_cookie *ipc, struct rtable **rt)
{
struct sock *sk;
struct sk_buff *skb;

sk = icmp_sk(dev_net(rt->u.dst.dev));
sk = icmp_sk(dev_net((*rt)->u.dst.dev));
if (ip_append_data(sk, icmp_glue_bits, icmp_param,
icmp_param->data_len+icmp_param->head_len,
icmp_param->head_len,
Expand Down Expand Up @@ -392,7 +392,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
}
if (icmpv4_xrlim_allow(net, rt, icmp_param->data.icmph.type,
icmp_param->data.icmph.code))
icmp_push_reply(icmp_param, &ipc, rt);
icmp_push_reply(icmp_param, &ipc, &rt);
ip_rt_put(rt);
out_unlock:
icmp_xmit_unlock(sk);
Expand Down Expand Up @@ -635,7 +635,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
icmp_param.data_len = room;
icmp_param.head_len = sizeof(struct icmphdr);

icmp_push_reply(&icmp_param, &ipc, rt);
icmp_push_reply(&icmp_param, &ipc, &rt);
ende:
ip_rt_put(rt);
out_unlock:
Expand Down
11 changes: 8 additions & 3 deletions net/ipv4/ip_output.c
Original file line number Diff line number Diff line change
Expand Up @@ -778,7 +778,7 @@ int ip_append_data(struct sock *sk,
int getfrag(void *from, char *to, int offset, int len,
int odd, struct sk_buff *skb),
void *from, int length, int transhdrlen,
struct ipcm_cookie *ipc, struct rtable *rt,
struct ipcm_cookie *ipc, struct rtable **rtp,
unsigned int flags)
{
struct inet_sock *inet = inet_sk(sk);
Expand All @@ -793,6 +793,7 @@ int ip_append_data(struct sock *sk,
int offset = 0;
unsigned int maxfraglen, fragheaderlen;
int csummode = CHECKSUM_NONE;
struct rtable *rt;

if (flags&MSG_PROBE)
return 0;
Expand All @@ -812,7 +813,11 @@ int ip_append_data(struct sock *sk,
inet->cork.flags |= IPCORK_OPT;
inet->cork.addr = ipc->addr;
}
dst_hold(&rt->u.dst);
rt = *rtp;
/*
* We steal reference to this route, caller should not release it
*/
*rtp = NULL;
inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ?
rt->u.dst.dev->mtu :
dst_mtu(rt->u.dst.path);
Expand Down Expand Up @@ -1391,7 +1396,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
sk->sk_protocol = ip_hdr(skb)->protocol;
sk->sk_bound_dev_if = arg->bound_dev_if;
ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0,
&ipc, rt, MSG_DONTWAIT);
&ipc, &rt, MSG_DONTWAIT);
if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
if (arg->csumoffset >= 0)
*((__sum16 *)skb_transport_header(skb) +
Expand Down
2 changes: 1 addition & 1 deletion net/ipv4/raw.c
Original file line number Diff line number Diff line change
Expand Up @@ -572,7 +572,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
ipc.addr = rt->rt_dst;
lock_sock(sk);
err = ip_append_data(sk, ip_generic_getfrag, msg->msg_iov, len, 0,
&ipc, rt, msg->msg_flags);
&ipc, &rt, msg->msg_flags);
if (err)
ip_flush_pending_frames(sk);
else if (!(msg->msg_flags & MSG_MORE))
Expand Down
2 changes: 1 addition & 1 deletion net/ipv4/udp.c
Original file line number Diff line number Diff line change
Expand Up @@ -719,7 +719,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
up->len += ulen;
getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
err = ip_append_data(sk, getfrag, msg->msg_iov, ulen,
sizeof(struct udphdr), &ipc, rt,
sizeof(struct udphdr), &ipc, &rt,
corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
if (err)
udp_flush_pending_frames(sk);
Expand Down

0 comments on commit 2e77d89

Please sign in to comment.