From 7cabc61e01a0a8b663bd2b4c982aa53048218734 Mon Sep 17 00:00:00 2001 From: Robert Doebbelin Date: Mon, 7 Mar 2016 09:50:56 +0100 Subject: [PATCH 1/3] fuse: do not use iocb after it may have been freed There's a race in fuse_direct_IO(), whereby is_sync_kiocb() is called on an iocb that could have been freed if async io has already completed. The fix in this case is simple and obvious: cache the result before starting io. It was discovered by KASan: kernel: ================================================================== kernel: BUG: KASan: use after free in fuse_direct_IO+0xb1a/0xcc0 at addr ffff88036c414390 Signed-off-by: Robert Doebbelin Signed-off-by: Miklos Szeredi Fixes: bcba24ccdc82 ("fuse: enable asynchronous processing direct IO") Cc: # 3.10+ --- fs/fuse/file.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index b03d253ece159..34a23df361ca2 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2843,6 +2843,7 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) loff_t i_size; size_t count = iov_iter_count(iter); struct fuse_io_priv *io; + bool is_sync = is_sync_kiocb(iocb); pos = offset; inode = file->f_mapping->host; @@ -2882,11 +2883,11 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) * to wait on real async I/O requests, so we must submit this request * synchronously. */ - if (!is_sync_kiocb(iocb) && (offset + count > i_size) && + if (!is_sync && (offset + count > i_size) && iov_iter_rw(iter) == WRITE) io->async = false; - if (io->async && is_sync_kiocb(iocb)) + if (io->async && is_sync) io->done = &wait; if (iov_iter_rw(iter) == WRITE) { @@ -2900,7 +2901,7 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) fuse_aio_complete(io, ret < 0 ? ret : 0, -1); /* we have a non-extending, async request, so return */ - if (!is_sync_kiocb(iocb)) + if (!is_sync) return -EIOCBQUEUED; wait_for_completion(&wait); From 744742d692e37ad5c20630e57d526c8f2e2fe3c9 Mon Sep 17 00:00:00 2001 From: Seth Forshee Date: Fri, 11 Mar 2016 10:35:34 -0600 Subject: [PATCH 2/3] fuse: Add reference counting for fuse_io_priv The 'reqs' member of fuse_io_priv serves two purposes. First is to track the number of oustanding async requests to the server and to signal that the io request is completed. The second is to be a reference count on the structure to know when it can be freed. For sync io requests these purposes can be at odds. fuse_direct_IO() wants to block until the request is done, and since the signal is sent when 'reqs' reaches 0 it cannot keep a reference to the object. Yet it needs to use the object after the userspace server has completed processing requests. This leads to some handshaking and special casing that it needlessly complicated and responsible for at least one race condition. It's much cleaner and safer to maintain a separate reference count for the object lifecycle and to let 'reqs' just be a count of outstanding requests to the userspace server. Then we can know for sure when it is safe to free the object without any handshaking or special cases. The catch here is that most of the time these objects are stack allocated and should not be freed. Initializing these objects with a single reference that is never released prevents accidental attempts to free the objects. Fixes: 9d5722b7777e ("fuse: handle synchronous iocbs internally") Cc: stable@vger.kernel.org # v4.1+ Signed-off-by: Seth Forshee Signed-off-by: Miklos Szeredi --- fs/fuse/cuse.c | 4 ++-- fs/fuse/file.c | 28 +++++++++++++++++++++------- fs/fuse/fuse_i.h | 9 +++++++++ 3 files changed, 32 insertions(+), 9 deletions(-) diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index 8e3ee1936c7e3..c5b6b71654893 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -90,7 +90,7 @@ static struct list_head *cuse_conntbl_head(dev_t devt) static ssize_t cuse_read_iter(struct kiocb *kiocb, struct iov_iter *to) { - struct fuse_io_priv io = { .async = 0, .file = kiocb->ki_filp }; + struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(kiocb->ki_filp); loff_t pos = 0; return fuse_direct_io(&io, to, &pos, FUSE_DIO_CUSE); @@ -98,7 +98,7 @@ static ssize_t cuse_read_iter(struct kiocb *kiocb, struct iov_iter *to) static ssize_t cuse_write_iter(struct kiocb *kiocb, struct iov_iter *from) { - struct fuse_io_priv io = { .async = 0, .file = kiocb->ki_filp }; + struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(kiocb->ki_filp); loff_t pos = 0; /* * No locking or generic_write_checks(), the server is diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 34a23df361ca2..416108b424128 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -528,6 +528,11 @@ static void fuse_release_user_pages(struct fuse_req *req, int write) } } +static void fuse_io_release(struct kref *kref) +{ + kfree(container_of(kref, struct fuse_io_priv, refcnt)); +} + static ssize_t fuse_get_res_by_io(struct fuse_io_priv *io) { if (io->err) @@ -585,8 +590,9 @@ static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos) } io->iocb->ki_complete(io->iocb, res, 0); - kfree(io); } + + kref_put(&io->refcnt, fuse_io_release); } static void fuse_aio_complete_req(struct fuse_conn *fc, struct fuse_req *req) @@ -613,6 +619,7 @@ static size_t fuse_async_req_send(struct fuse_conn *fc, struct fuse_req *req, size_t num_bytes, struct fuse_io_priv *io) { spin_lock(&io->lock); + kref_get(&io->refcnt); io->size += num_bytes; io->reqs++; spin_unlock(&io->lock); @@ -691,7 +698,7 @@ static void fuse_short_read(struct fuse_req *req, struct inode *inode, static int fuse_do_readpage(struct file *file, struct page *page) { - struct fuse_io_priv io = { .async = 0, .file = file }; + struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(file); struct inode *inode = page->mapping->host; struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_req *req; @@ -984,7 +991,7 @@ static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file, size_t res; unsigned offset; unsigned i; - struct fuse_io_priv io = { .async = 0, .file = file }; + struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(file); for (i = 0; i < req->num_pages; i++) fuse_wait_on_page_writeback(inode, req->pages[i]->index); @@ -1398,7 +1405,7 @@ static ssize_t __fuse_direct_read(struct fuse_io_priv *io, static ssize_t fuse_direct_read_iter(struct kiocb *iocb, struct iov_iter *to) { - struct fuse_io_priv io = { .async = 0, .file = iocb->ki_filp }; + struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb->ki_filp); return __fuse_direct_read(&io, to, &iocb->ki_pos); } @@ -1406,7 +1413,7 @@ static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); - struct fuse_io_priv io = { .async = 0, .file = file }; + struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(file); ssize_t res; if (is_bad_inode(inode)) @@ -2864,6 +2871,7 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) if (!io) return -ENOMEM; spin_lock_init(&io->lock); + kref_init(&io->refcnt); io->reqs = 1; io->bytes = -1; io->size = 0; @@ -2887,8 +2895,14 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) iov_iter_rw(iter) == WRITE) io->async = false; - if (io->async && is_sync) + if (io->async && is_sync) { + /* + * Additional reference to keep io around after + * calling fuse_aio_complete() + */ + kref_get(&io->refcnt); io->done = &wait; + } if (iov_iter_rw(iter) == WRITE) { ret = fuse_direct_io(io, iter, &pos, FUSE_DIO_WRITE); @@ -2908,7 +2922,7 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) ret = fuse_get_res_by_io(io); } - kfree(io); + kref_put(&io->refcnt, fuse_io_release); if (iov_iter_rw(iter) == WRITE) { if (ret > 0) diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index ce394b5fe6b43..eddbe02c40289 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -22,6 +22,7 @@ #include #include #include +#include /** Max number of pages that can be used in a single read request */ #define FUSE_MAX_PAGES_PER_REQ 32 @@ -243,6 +244,7 @@ struct fuse_args { /** The request IO state (for asynchronous processing) */ struct fuse_io_priv { + struct kref refcnt; int async; spinlock_t lock; unsigned reqs; @@ -256,6 +258,13 @@ struct fuse_io_priv { struct completion *done; }; +#define FUSE_IO_PRIV_SYNC(f) \ +{ \ + .refcnt = { ATOMIC_INIT(1) }, \ + .async = 0, \ + .file = f, \ +} + /** * Request flags * From 742f992708dff0ada8b426228900ffb009c7167b Mon Sep 17 00:00:00 2001 From: Ashish Samant Date: Mon, 14 Mar 2016 21:57:35 -0700 Subject: [PATCH 3/3] fuse: return patrial success from fuse_direct_io() If a user calls writev/readv in direct io mode with partially valid data in the iovec array such that any vector other than the first one in the array contains invalid data, we currently return the error for the invalid iovec. Instead, we should return the number of bytes already written/read and not the error as we do in the non direct_io case. Reported-by: Alexey Kodanev Signed-off-by: Ashish Samant Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 416108b424128..9dde38f12c07b 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1247,6 +1247,7 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii, size_t *nbytesp, int write) { size_t nbytes = 0; /* # bytes already packed in req */ + ssize_t ret = 0; /* Special case for kernel I/O: can copy directly into the buffer */ if (ii->type & ITER_KVEC) { @@ -1266,13 +1267,12 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii, while (nbytes < *nbytesp && req->num_pages < req->max_pages) { unsigned npages; size_t start; - ssize_t ret = iov_iter_get_pages(ii, - &req->pages[req->num_pages], + ret = iov_iter_get_pages(ii, &req->pages[req->num_pages], *nbytesp - nbytes, req->max_pages - req->num_pages, &start); if (ret < 0) - return ret; + break; iov_iter_advance(ii, ret); nbytes += ret; @@ -1295,7 +1295,7 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii, *nbytesp = nbytes; - return 0; + return ret; } static inline int fuse_iter_npages(const struct iov_iter *ii_p) @@ -1319,6 +1319,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, pgoff_t idx_to = (pos + count - 1) >> PAGE_CACHE_SHIFT; ssize_t res = 0; struct fuse_req *req; + int err = 0; if (io->async) req = fuse_get_req_for_background(fc, fuse_iter_npages(iter)); @@ -1339,11 +1340,9 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, size_t nres; fl_owner_t owner = current->files; size_t nbytes = min(count, nmax); - int err = fuse_get_user_pages(req, iter, &nbytes, write); - if (err) { - res = err; + err = fuse_get_user_pages(req, iter, &nbytes, write); + if (err && !nbytes) break; - } if (write) nres = fuse_send_write(req, io, pos, nbytes, owner); @@ -1353,11 +1352,11 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, if (!io->async) fuse_release_user_pages(req, !write); if (req->out.h.error) { - if (!res) - res = req->out.h.error; + err = req->out.h.error; break; } else if (nres > nbytes) { - res = -EIO; + res = 0; + err = -EIO; break; } count -= nres; @@ -1381,7 +1380,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, if (res > 0) *ppos = pos; - return res; + return res > 0 ? res : err; } EXPORT_SYMBOL_GPL(fuse_direct_io);