Skip to content

Commit

Permalink
Merge branch 'af_unix_sendpage'
Browse files Browse the repository at this point in the history
Hannes Frederic Sowa says:

====================
net: af_unix: zerocopy stream bits

This series implements zerocopy support for AF_UNIX SOCK_STREAM sockets.

Changelog in the specific patches. Thanks to all the reviewers!
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed May 25, 2015
2 parents d98c3ed + 2b51457 commit b10e3d6
Show file tree
Hide file tree
Showing 6 changed files with 284 additions and 54 deletions.
1 change: 1 addition & 0 deletions fs/splice.c
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,7 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,

return ret;
}
EXPORT_SYMBOL_GPL(splice_to_pipe);

void spd_release_page(struct splice_pipe_desc *spd, unsigned int i)
{
Expand Down
14 changes: 12 additions & 2 deletions include/linux/skbuff.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#include <linux/netdev_features.h>
#include <linux/sched.h>
#include <net/flow_dissector.h>
#include <linux/splice.h>

/* A. Checksumming of received packets by device.
*
Expand Down Expand Up @@ -861,6 +862,9 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
int len, int odd, struct sk_buff *skb),
void *from, int length);

int skb_append_pagefrags(struct sk_buff *skb, struct page *page,
int offset, size_t size);

struct skb_seq_state {
__u32 lower_offset;
__u32 upper_offset;
Expand Down Expand Up @@ -2696,9 +2700,15 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len);
int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len);
__wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to,
int len, __wsum csum);
int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
ssize_t skb_socket_splice(struct sock *sk,
struct pipe_inode_info *pipe,
struct splice_pipe_desc *spd);
int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
struct pipe_inode_info *pipe, unsigned int len,
unsigned int flags);
unsigned int flags,
ssize_t (*splice_cb)(struct sock *,
struct pipe_inode_info *,
struct splice_pipe_desc *));
void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
unsigned int skb_zerocopy_headlen(const struct sk_buff *from);
int skb_zerocopy(struct sk_buff *to, struct sk_buff *from,
Expand Down
64 changes: 47 additions & 17 deletions net/core/skbuff.c
Original file line number Diff line number Diff line change
Expand Up @@ -1870,15 +1870,39 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
return false;
}

ssize_t skb_socket_splice(struct sock *sk,
struct pipe_inode_info *pipe,
struct splice_pipe_desc *spd)
{
int ret;

/* Drop the socket lock, otherwise we have reverse
* locking dependencies between sk_lock and i_mutex
* here as compared to sendfile(). We enter here
* with the socket lock held, and splice_to_pipe() will
* grab the pipe inode lock. For sendfile() emulation,
* we call into ->sendpage() with the i_mutex lock held
* and networking will grab the socket lock.
*/
release_sock(sk);
ret = splice_to_pipe(pipe, spd);
lock_sock(sk);

return ret;
}

/*
* Map data from the skb to a pipe. Should handle both the linear part,
* the fragments, and the frag list. It does NOT handle frag lists within
* the frag list, if such a thing exists. We'd probably need to recurse to
* handle that cleanly.
*/
int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
struct pipe_inode_info *pipe, unsigned int tlen,
unsigned int flags)
unsigned int flags,
ssize_t (*splice_cb)(struct sock *,
struct pipe_inode_info *,
struct splice_pipe_desc *))
{
struct partial_page partial[MAX_SKB_FRAGS];
struct page *pages[MAX_SKB_FRAGS];
Expand All @@ -1891,7 +1915,6 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
.spd_release = sock_spd_release,
};
struct sk_buff *frag_iter;
struct sock *sk = skb->sk;
int ret = 0;

/*
Expand All @@ -1914,23 +1937,12 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
}

done:
if (spd.nr_pages) {
/*
* Drop the socket lock, otherwise we have reverse
* locking dependencies between sk_lock and i_mutex
* here as compared to sendfile(). We enter here
* with the socket lock held, and splice_to_pipe() will
* grab the pipe inode lock. For sendfile() emulation,
* we call into ->sendpage() with the i_mutex lock held
* and networking will grab the socket lock.
*/
release_sock(sk);
ret = splice_to_pipe(pipe, &spd);
lock_sock(sk);
}
if (spd.nr_pages)
ret = splice_cb(sk, pipe, &spd);

return ret;
}
EXPORT_SYMBOL_GPL(skb_splice_bits);

/**
* skb_store_bits - store bits from kernel buffer to skb
Expand Down Expand Up @@ -2915,6 +2927,24 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
}
EXPORT_SYMBOL(skb_append_datato_frags);

int skb_append_pagefrags(struct sk_buff *skb, struct page *page,
int offset, size_t size)
{
int i = skb_shinfo(skb)->nr_frags;

if (skb_can_coalesce(skb, i, page, offset)) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], size);
} else if (i < MAX_SKB_FRAGS) {
get_page(page);
skb_fill_page_desc(skb, i, page, offset, size);
} else {
return -EMSGSIZE;
}

return 0;
}
EXPORT_SYMBOL_GPL(skb_append_pagefrags);

/**
* skb_pull_rcsum - pull skb and update receive checksum
* @skb: buffer to update
Expand Down
15 changes: 4 additions & 11 deletions net/ipv4/ip_output.c
Original file line number Diff line number Diff line change
Expand Up @@ -1233,11 +1233,9 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
}

while (size > 0) {
int i;

if (skb_is_gso(skb))
if (skb_is_gso(skb)) {
len = size;
else {
} else {

/* Check if the remaining data fits into current packet. */
len = mtu - skb->len;
Expand Down Expand Up @@ -1289,15 +1287,10 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
continue;
}

i = skb_shinfo(skb)->nr_frags;
if (len > size)
len = size;
if (skb_can_coalesce(skb, i, page, offset)) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i-1], len);
} else if (i < MAX_SKB_FRAGS) {
get_page(page);
skb_fill_page_desc(skb, i, page, offset, len);
} else {

if (skb_append_pagefrags(skb, page, offset, len)) {
err = -EMSGSIZE;
goto error;
}
Expand Down
5 changes: 3 additions & 2 deletions net/ipv4/tcp.c
Original file line number Diff line number Diff line change
Expand Up @@ -695,8 +695,9 @@ static int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
struct tcp_splice_state *tss = rd_desc->arg.data;
int ret;

ret = skb_splice_bits(skb, offset, tss->pipe, min(rd_desc->count, len),
tss->flags);
ret = skb_splice_bits(skb, skb->sk, offset, tss->pipe,
min(rd_desc->count, len), tss->flags,
skb_socket_splice);
if (ret > 0)
rd_desc->count -= ret;
return ret;
Expand Down
Loading

0 comments on commit b10e3d6

Please sign in to comment.