Skip to content

Commit

Permalink
pipe: Allow pipes to have kernel-reserved slots
Browse files Browse the repository at this point in the history
Split pipe->ring_size into two numbers:

 (1) pipe->ring_size - indicates the hard size of the pipe ring.

 (2) pipe->max_usage - indicates the maximum number of pipe ring slots that
     userspace orchestrated events can fill.

This allows for a pipe that is both writable by the general kernel
notification facility and by userspace, allowing plenty of ring space for
notifications to be added whilst preventing userspace from being able to
pin too much unswappable kernel space.

Signed-off-by: David Howells <dhowells@redhat.com>
  • Loading branch information
David Howells committed Nov 15, 2019
1 parent 8cefc10 commit 6718b6f
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 24 deletions.
8 changes: 4 additions & 4 deletions fs/fuse/dev.c
Original file line number Diff line number Diff line change
Expand Up @@ -703,7 +703,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
cs->pipebufs++;
cs->nr_segs--;
} else {
if (cs->nr_segs >= cs->pipe->ring_size)
if (cs->nr_segs >= cs->pipe->max_usage)
return -EIO;

page = alloc_page(GFP_HIGHUSER);
Expand Down Expand Up @@ -879,7 +879,7 @@ static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
struct pipe_buffer *buf;
int err;

if (cs->nr_segs >= cs->pipe->ring_size)
if (cs->nr_segs >= cs->pipe->max_usage)
return -EIO;

err = unlock_request(cs->req);
Expand Down Expand Up @@ -1341,7 +1341,7 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
if (!fud)
return -EPERM;

bufs = kvmalloc_array(pipe->ring_size, sizeof(struct pipe_buffer),
bufs = kvmalloc_array(pipe->max_usage, sizeof(struct pipe_buffer),
GFP_KERNEL);
if (!bufs)
return -ENOMEM;
Expand All @@ -1353,7 +1353,7 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
if (ret < 0)
goto out;

if (pipe_occupancy(pipe->head, pipe->tail) + cs.nr_segs > pipe->ring_size) {
if (pipe_occupancy(pipe->head, pipe->tail) + cs.nr_segs > pipe->max_usage) {
ret = -EIO;
goto out;
}
Expand Down
10 changes: 6 additions & 4 deletions fs/pipe.c
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)

tail = pipe->tail;
head = pipe->head;
max_usage = pipe->ring_size;
max_usage = pipe->max_usage;
mask = pipe->ring_size - 1;

/* We try to merge small writes */
Expand Down Expand Up @@ -571,7 +571,7 @@ pipe_poll(struct file *filp, poll_table *wait)
}

if (filp->f_mode & FMODE_WRITE) {
if (!pipe_full(head, tail, pipe->ring_size))
if (!pipe_full(head, tail, pipe->max_usage))
mask |= EPOLLOUT | EPOLLWRNORM;
/*
* Most Unices do not set EPOLLERR for FIFOs but on Linux they
Expand Down Expand Up @@ -696,6 +696,7 @@ struct pipe_inode_info *alloc_pipe_info(void)
if (pipe->bufs) {
init_waitqueue_head(&pipe->wait);
pipe->r_counter = pipe->w_counter = 1;
pipe->max_usage = pipe_bufs;
pipe->ring_size = pipe_bufs;
pipe->user = user;
mutex_init(&pipe->mutex);
Expand Down Expand Up @@ -1150,9 +1151,10 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg)
kfree(pipe->bufs);
pipe->bufs = bufs;
pipe->ring_size = nr_slots;
pipe->max_usage = nr_slots;
pipe->tail = tail;
pipe->head = head;
return pipe->ring_size * PAGE_SIZE;
return pipe->max_usage * PAGE_SIZE;

out_revert_acct:
(void) account_pipe_buffers(pipe->user, nr_slots, pipe->ring_size);
Expand Down Expand Up @@ -1185,7 +1187,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
ret = pipe_set_size(pipe, arg);
break;
case F_GETPIPE_SZ:
ret = pipe->ring_size * PAGE_SIZE;
ret = pipe->max_usage * PAGE_SIZE;
break;
default:
ret = -EINVAL;
Expand Down
26 changes: 13 additions & 13 deletions fs/splice.c
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
goto out;
}

while (!pipe_full(head, tail, pipe->ring_size)) {
while (!pipe_full(head, tail, pipe->max_usage)) {
struct pipe_buffer *buf = &pipe->bufs[head & mask];

buf->page = spd->pages[page_nr];
Expand Down Expand Up @@ -239,7 +239,7 @@ ssize_t add_to_pipe(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
if (unlikely(!pipe->readers)) {
send_sig(SIGPIPE, current, 0);
ret = -EPIPE;
} else if (pipe_full(head, tail, pipe->ring_size)) {
} else if (pipe_full(head, tail, pipe->max_usage)) {
ret = -EAGAIN;
} else {
pipe->bufs[head & mask] = *buf;
Expand All @@ -257,7 +257,7 @@ EXPORT_SYMBOL(add_to_pipe);
*/
int splice_grow_spd(const struct pipe_inode_info *pipe, struct splice_pipe_desc *spd)
{
unsigned int max_usage = READ_ONCE(pipe->ring_size);
unsigned int max_usage = READ_ONCE(pipe->max_usage);

spd->nr_pages_max = max_usage;
if (max_usage <= PIPE_DEF_BUFFERS)
Expand Down Expand Up @@ -381,7 +381,7 @@ static ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
ssize_t res;
int i;

if (pipe_full(pipe->head, pipe->tail, pipe->ring_size))
if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
return -EAGAIN;

/*
Expand Down Expand Up @@ -698,7 +698,7 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
.pos = *ppos,
.u.file = out,
};
int nbufs = pipe->ring_size;
int nbufs = pipe->max_usage;
struct bio_vec *array = kcalloc(nbufs, sizeof(struct bio_vec),
GFP_KERNEL);
ssize_t ret;
Expand All @@ -721,9 +721,9 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
if (ret <= 0)
break;

if (unlikely(nbufs < pipe->ring_size)) {
if (unlikely(nbufs < pipe->max_usage)) {
kfree(array);
nbufs = pipe->ring_size;
nbufs = pipe->max_usage;
array = kcalloc(nbufs, sizeof(struct bio_vec),
GFP_KERNEL);
if (!array) {
Expand Down Expand Up @@ -963,7 +963,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
loff_t pos = sd->pos, prev_pos = pos;

/* Don't try to read more the pipe has space for. */
p_space = pipe->ring_size -
p_space = pipe->max_usage -
pipe_occupancy(pipe->head, pipe->tail);
read_len = min_t(size_t, len, p_space << PAGE_SHIFT);
ret = do_splice_to(in, &pos, pipe, read_len, flags);
Expand Down Expand Up @@ -1090,7 +1090,7 @@ static int wait_for_space(struct pipe_inode_info *pipe, unsigned flags)
send_sig(SIGPIPE, current, 0);
return -EPIPE;
}
if (!pipe_full(pipe->head, pipe->tail, pipe->ring_size))
if (!pipe_full(pipe->head, pipe->tail, pipe->max_usage))
return 0;
if (flags & SPLICE_F_NONBLOCK)
return -EAGAIN;
Expand Down Expand Up @@ -1498,13 +1498,13 @@ static int opipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
* Check pipe occupancy without the inode lock first. This function
* is speculative anyways, so missing one is ok.
*/
if (pipe_full(pipe->head, pipe->tail, pipe->ring_size))
if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
return 0;

ret = 0;
pipe_lock(pipe);

while (pipe_full(pipe->head, pipe->tail, pipe->ring_size)) {
while (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) {
if (!pipe->readers) {
send_sig(SIGPIPE, current, 0);
ret = -EPIPE;
Expand Down Expand Up @@ -1584,7 +1584,7 @@ static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
* pipe is empty or the output pipe is full.
*/
if (pipe_empty(i_head, i_tail) ||
pipe_full(o_head, o_tail, opipe->ring_size)) {
pipe_full(o_head, o_tail, opipe->max_usage)) {
/* Already processed some buffers, break */
if (ret)
break;
Expand Down Expand Up @@ -1706,7 +1706,7 @@ static int link_pipe(struct pipe_inode_info *ipipe,
* output room, break.
*/
if (pipe_empty(i_head, i_tail) ||
pipe_full(o_head, o_tail, opipe->ring_size))
pipe_full(o_head, o_tail, opipe->max_usage))
break;

ibuf = &ipipe->bufs[i_tail & i_mask];
Expand Down
6 changes: 5 additions & 1 deletion include/linux/pipe_fs_i.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ struct pipe_buffer {
* @wait: reader/writer wait point in case of empty/full pipe
* @head: The point of buffer production
* @tail: The point of buffer consumption
* @max_usage: The maximum number of slots that may be used in the ring
* @ring_size: total number of buffers (should be a power of 2)
* @tmp_page: cached released page
* @readers: number of current readers of this pipe
Expand All @@ -50,6 +51,7 @@ struct pipe_inode_info {
wait_queue_head_t wait;
unsigned int head;
unsigned int tail;
unsigned int max_usage;
unsigned int ring_size;
unsigned int readers;
unsigned int writers;
Expand Down Expand Up @@ -150,9 +152,11 @@ static inline unsigned int pipe_space_for_user(unsigned int head, unsigned int t
unsigned int p_occupancy, p_space;

p_occupancy = pipe_occupancy(head, tail);
if (p_occupancy >= pipe->ring_size)
if (p_occupancy >= pipe->max_usage)
return 0;
p_space = pipe->ring_size - p_occupancy;
if (p_space > pipe->max_usage)
p_space = pipe->max_usage;
return p_space;
}

Expand Down
4 changes: 2 additions & 2 deletions lib/iov_iter.c
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,7 @@ static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t by
i_head++;
buf = &pipe->bufs[i_head & p_mask];
}
if (pipe_full(i_head, p_tail, pipe->ring_size))
if (pipe_full(i_head, p_tail, pipe->max_usage))
return 0;

buf->ops = &page_cache_pipe_buf_ops;
Expand Down Expand Up @@ -528,7 +528,7 @@ static size_t push_pipe(struct iov_iter *i, size_t size,
pipe->bufs[iter_head & p_mask].len = PAGE_SIZE;
iter_head++;
}
while (!pipe_full(iter_head, p_tail, pipe->ring_size)) {
while (!pipe_full(iter_head, p_tail, pipe->max_usage)) {
struct pipe_buffer *buf = &pipe->bufs[iter_head & p_mask];
struct page *page = alloc_page(GFP_USER);
if (!page)
Expand Down

0 comments on commit 6718b6f

Please sign in to comment.