Skip to content

Commit

Permalink
udp: enable MSG_PEEK at non-zero offset
Browse files Browse the repository at this point in the history
Enable peeking at UDP datagrams at the offset specified with socket
option SOL_SOCKET/SO_PEEK_OFF. Peek at any datagram in the queue, up
to the end of the given datagram.

Implement the SO_PEEK_OFF semantics introduced in commit ef64a54
("sock: Introduce the SO_PEEK_OFF sock option"). Increase the offset
on peek, decrease it on regular reads.

When peeking, always checksum the packet immediately, to avoid
recomputation on subsequent peeks and final read.

The socket lock is not held for the duration of udp_recvmsg, so
peek and read operations can run concurrently. Only the last store
to sk_peek_off is preserved.

Signed-off-by: Sam Kumar <samanthakumar@google.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
samanthakumar authored and David S. Miller committed Apr 5, 2016
1 parent e6afc8a commit 627d2d6
Show file tree
Hide file tree
Showing 8 changed files with 47 additions and 26 deletions.
7 changes: 6 additions & 1 deletion include/linux/skbuff.h
Original file line number Diff line number Diff line change
Expand Up @@ -2949,7 +2949,12 @@ int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset,
struct iov_iter *from, int len);
int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *frm);
void skb_free_datagram(struct sock *sk, struct sk_buff *skb);
void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb);
void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len);
static inline void skb_free_datagram_locked(struct sock *sk,
struct sk_buff *skb)
{
__skb_free_datagram_locked(sk, skb, 0);
}
int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags);
int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len);
int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len);
Expand Down
2 changes: 2 additions & 0 deletions include/net/sock.h
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,8 @@ struct sock {
#define SK_CAN_REUSE 1
#define SK_FORCE_REUSE 2

int sk_set_peek_off(struct sock *sk, int val);

static inline int sk_peek_offset(struct sock *sk, int flags)
{
if (unlikely(flags & MSG_PEEK)) {
Expand Down
9 changes: 6 additions & 3 deletions net/core/datagram.c
Original file line number Diff line number Diff line change
Expand Up @@ -301,24 +301,27 @@ void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
}
EXPORT_SYMBOL(skb_free_datagram);

void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb)
void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len)
{
bool slow;

if (likely(atomic_read(&skb->users) == 1))
smp_rmb();
else if (likely(!atomic_dec_and_test(&skb->users)))
else if (likely(!atomic_dec_and_test(&skb->users))) {
sk_peek_offset_bwd(sk, len);
return;
}

slow = lock_sock_fast(sk);
sk_peek_offset_bwd(sk, len);
skb_orphan(skb);
sk_mem_reclaim_partial(sk);
unlock_sock_fast(sk, slow);

/* skb is now orphaned, can be freed outside of locked section */
__kfree_skb(skb);
}
EXPORT_SYMBOL(skb_free_datagram_locked);
EXPORT_SYMBOL(__skb_free_datagram_locked);

/**
* skb_kill_datagram - Free a datagram skbuff forcibly
Expand Down
9 changes: 9 additions & 0 deletions net/core/sock.c
Original file line number Diff line number Diff line change
Expand Up @@ -2187,6 +2187,15 @@ void __sk_mem_reclaim(struct sock *sk, int amount)
}
EXPORT_SYMBOL(__sk_mem_reclaim);

int sk_set_peek_off(struct sock *sk, int val)
{
if (val < 0)
return -EINVAL;

sk->sk_peek_off = val;
return 0;
}
EXPORT_SYMBOL_GPL(sk_set_peek_off);

/*
* Set of default routines for initialising struct proto_ops when
Expand Down
1 change: 1 addition & 0 deletions net/ipv4/af_inet.c
Original file line number Diff line number Diff line change
Expand Up @@ -948,6 +948,7 @@ const struct proto_ops inet_dgram_ops = {
.recvmsg = inet_recvmsg,
.mmap = sock_no_mmap,
.sendpage = inet_sendpage,
.set_peek_off = sk_set_peek_off,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_sock_common_setsockopt,
.compat_getsockopt = compat_sock_common_getsockopt,
Expand Down
22 changes: 11 additions & 11 deletions net/ipv4/udp.c
Original file line number Diff line number Diff line change
Expand Up @@ -1294,7 +1294,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name);
struct sk_buff *skb;
unsigned int ulen, copied;
int peeked, off = 0;
int peeked, peeking, off;
int err;
int is_udplite = IS_UDPLITE(sk);
bool checksum_valid = false;
Expand All @@ -1304,15 +1304,16 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
return ip_recv_error(sk, msg, len, addr_len);

try_again:
peeking = off = sk_peek_offset(sk, flags);
skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
&peeked, &off, &err);
if (!skb)
goto out;
return err;

ulen = skb->len;
copied = len;
if (copied > ulen)
copied = ulen;
if (copied > ulen - off)
copied = ulen - off;
else if (copied < ulen)
msg->msg_flags |= MSG_TRUNC;

Expand All @@ -1322,16 +1323,16 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
* coverage checksum (UDP-Lite), do it before the copy.
*/

if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) {
if (copied < ulen || UDP_SKB_CB(skb)->partial_cov || peeking) {
checksum_valid = !udp_lib_checksum_complete(skb);
if (!checksum_valid)
goto csum_copy_err;
}

if (checksum_valid || skb_csum_unnecessary(skb))
err = skb_copy_datagram_msg(skb, 0, msg, copied);
err = skb_copy_datagram_msg(skb, off, msg, copied);
else {
err = skb_copy_and_csum_datagram_msg(skb, 0, msg);
err = skb_copy_and_csum_datagram_msg(skb, off, msg);

if (err == -EINVAL)
goto csum_copy_err;
Expand All @@ -1344,7 +1345,8 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
UDP_INC_STATS_USER(sock_net(sk),
UDP_MIB_INERRORS, is_udplite);
}
goto out_free;
skb_free_datagram_locked(sk, skb);
return err;
}

if (!peeked)
Expand All @@ -1368,9 +1370,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
if (flags & MSG_TRUNC)
err = ulen;

out_free:
skb_free_datagram_locked(sk, skb);
out:
__skb_free_datagram_locked(sk, skb, peeking ? -err : err);
return err;

csum_copy_err:
Expand Down
1 change: 1 addition & 0 deletions net/ipv6/af_inet6.c
Original file line number Diff line number Diff line change
Expand Up @@ -561,6 +561,7 @@ const struct proto_ops inet6_dgram_ops = {
.recvmsg = inet_recvmsg, /* ok */
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
.set_peek_off = sk_set_peek_off,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_sock_common_setsockopt,
.compat_getsockopt = compat_sock_common_getsockopt,
Expand Down
22 changes: 11 additions & 11 deletions net/ipv6/udp.c
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
struct inet_sock *inet = inet_sk(sk);
struct sk_buff *skb;
unsigned int ulen, copied;
int peeked, off = 0;
int peeked, peeking, off;
int err;
int is_udplite = IS_UDPLITE(sk);
bool checksum_valid = false;
Expand All @@ -371,15 +371,16 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
return ipv6_recv_rxpmtu(sk, msg, len, addr_len);

try_again:
peeking = off = sk_peek_offset(sk, flags);
skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
&peeked, &off, &err);
if (!skb)
goto out;
return err;

ulen = skb->len;
copied = len;
if (copied > ulen)
copied = ulen;
if (copied > ulen - off)
copied = ulen - off;
else if (copied < ulen)
msg->msg_flags |= MSG_TRUNC;

Expand All @@ -391,16 +392,16 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
* coverage checksum (UDP-Lite), do it before the copy.
*/

if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) {
if (copied < ulen || UDP_SKB_CB(skb)->partial_cov || peeking) {
checksum_valid = !udp_lib_checksum_complete(skb);
if (!checksum_valid)
goto csum_copy_err;
}

if (checksum_valid || skb_csum_unnecessary(skb))
err = skb_copy_datagram_msg(skb, 0, msg, copied);
err = skb_copy_datagram_msg(skb, off, msg, copied);
else {
err = skb_copy_and_csum_datagram_msg(skb, 0, msg);
err = skb_copy_and_csum_datagram_msg(skb, off, msg);
if (err == -EINVAL)
goto csum_copy_err;
}
Expand All @@ -417,7 +418,8 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
UDP_MIB_INERRORS,
is_udplite);
}
goto out_free;
skb_free_datagram_locked(sk, skb);
return err;
}
if (!peeked) {
if (is_udp4)
Expand Down Expand Up @@ -465,9 +467,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
if (flags & MSG_TRUNC)
err = ulen;

out_free:
skb_free_datagram_locked(sk, skb);
out:
__skb_free_datagram_locked(sk, skb, peeking ? -err : err);
return err;

csum_copy_err:
Expand Down

0 comments on commit 627d2d6

Please sign in to comment.