From 112e72373d1f60f1e4558d0a7f0de5da39a1224d Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Fri, 11 Oct 2019 14:18:26 -0400 Subject: [PATCH 01/14] virtio-fs: Change module name to virtiofs.ko We have been calling it virtio_fs and even file name is virtio_fs.c. Module name is virtio_fs.ko but when registering file system user is supposed to specify filesystem type as "virtiofs". Masayoshi Mizuma reported that he specified filesytem type as "virtio_fs" and got this warning on console. ------------[ cut here ]------------ request_module fs-virtio_fs succeeded, but still no fs? WARNING: CPU: 1 PID: 1234 at fs/filesystems.c:274 get_fs_type+0x12c/0x138 Modules linked in: ... virtio_fs fuse virtio_net net_failover ... CPU: 1 PID: 1234 Comm: mount Not tainted 5.4.0-rc1 #1 So looks like kernel could find the module virtio_fs.ko but could not find filesystem type after that. It probably is better to rename module name to virtiofs.ko so that above warning goes away in case user ends up specifying wrong fs name. Reported-by: Masayoshi Mizuma Suggested-by: Stefan Hajnoczi Signed-off-by: Vivek Goyal Tested-by: Masayoshi Mizuma Reviewed-by: Stefan Hajnoczi Signed-off-by: Miklos Szeredi --- fs/fuse/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile index 6419a2b3510de..3e8cebfb59b7b 100644 --- a/fs/fuse/Makefile +++ b/fs/fuse/Makefile @@ -5,6 +5,7 @@ obj-$(CONFIG_FUSE_FS) += fuse.o obj-$(CONFIG_CUSE) += cuse.o -obj-$(CONFIG_VIRTIO_FS) += virtio_fs.o +obj-$(CONFIG_VIRTIO_FS) += virtiofs.o fuse-objs := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o +virtiofs-y += virtio_fs.o From 3f22c7467136adfa6d2a7baf7cd5c573f0641bd1 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 15 Oct 2019 16:11:41 +0200 Subject: [PATCH 02/14] virtio-fs: don't show mount options Virtio-fs does not accept any mount options, so it's confusing and wrong to show any in /proc/mounts. Reported-by: Stefan Hajnoczi Signed-off-by: Miklos Szeredi --- fs/fuse/fuse_i.h | 4 ++++ fs/fuse/inode.c | 4 ++++ fs/fuse/virtio_fs.c | 1 + 3 files changed, 9 insertions(+) diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 956aeaf961ae1..d148188cfca45 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -479,6 +479,7 @@ struct fuse_fs_context { bool destroy:1; bool no_control:1; bool no_force_umount:1; + bool no_mount_options:1; unsigned int max_read; unsigned int blksize; const char *subtype; @@ -713,6 +714,9 @@ struct fuse_conn { /** Do not allow MNT_FORCE umount */ unsigned int no_force_umount:1; + /* Do not show mount options */ + unsigned int no_mount_options:1; + /** The number of requests waiting for completion */ atomic_t num_waiting; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index e040e2a2b6210..16aec32f7f3d7 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -558,6 +558,9 @@ static int fuse_show_options(struct seq_file *m, struct dentry *root) struct super_block *sb = root->d_sb; struct fuse_conn *fc = get_fuse_conn_super(sb); + if (fc->no_mount_options) + return 0; + seq_printf(m, ",user_id=%u", from_kuid_munged(fc->user_ns, fc->user_id)); seq_printf(m, ",group_id=%u", from_kgid_munged(fc->user_ns, fc->group_id)); if (fc->default_permissions) @@ -1180,6 +1183,7 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) fc->destroy = ctx->destroy; fc->no_control = ctx->no_control; fc->no_force_umount = ctx->no_force_umount; + fc->no_mount_options = ctx->no_mount_options; err = -ENOMEM; root = fuse_get_root_inode(sb, ctx->rootmode); diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 6af3f131e468a..e22a0c003c3d2 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -992,6 +992,7 @@ static int virtio_fs_fill_super(struct super_block *sb) .destroy = true, .no_control = true, .no_force_umount = true, + .no_mount_options = true, }; mutex_lock(&virtio_fs_mutex); From 2b319d1f6f92a4ced9897678113d176ee16ae85d Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 21 Oct 2019 09:11:40 +0200 Subject: [PATCH 03/14] fuse: don't dereference req->args on finished request Move the check for async request after check for the request being already finished and done with. Reported-by: syzbot+ae0bb7aae3de6b4594e2@syzkaller.appspotmail.com Fixes: d49937749fef ("fuse: stop copying args to fuse_req") Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index dadd617d826c9..ed1abc9e33cfc 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -276,10 +276,12 @@ static void flush_bg_queue(struct fuse_conn *fc) void fuse_request_end(struct fuse_conn *fc, struct fuse_req *req) { struct fuse_iqueue *fiq = &fc->iq; - bool async = req->args->end; + bool async; if (test_and_set_bit(FR_FINISHED, &req->flags)) goto put_request; + + async = req->args->end; /* * test_and_set_bit() implies smp_mb() between bit * changing and below intr_entry check. Pairs with From 6c26f71759a6efc04b888dd2c1cc4f1cac38cdf0 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 21 Oct 2019 15:57:07 +0200 Subject: [PATCH 04/14] fuse: don't advise readdirplus for negative lookup If the FUSE_READDIRPLUS_AUTO feature is enabled, then lookups on a directory before/during readdir are used as an indication that READDIRPLUS should be used instead of READDIR. However if the lookup turns out to be negative, then selecting READDIRPLUS makes no sense. Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index d572c900bb0f2..b77954a275384 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -405,7 +405,8 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, else fuse_invalidate_entry_cache(entry); - fuse_advise_use_readdirplus(dir); + if (inode) + fuse_advise_use_readdirplus(dir); return newent; out_iput: From 51fecdd2555b3e0e05a78d30093c638d164a32f9 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Tue, 15 Oct 2019 13:46:22 -0400 Subject: [PATCH 05/14] virtiofs: Do not end request in submission context Submission context can hold some locks which end request code tries to hold again and deadlock can occur. For example, fc->bg_lock. If a background request is being submitted, it might hold fc->bg_lock and if we could not submit request (because device went away) and tried to end request, then deadlock happens. During testing, I also got a warning from deadlock detection code. So put requests on a list and end requests from a worker thread. I got following warning from deadlock detector. [ 603.137138] WARNING: possible recursive locking detected [ 603.137142] -------------------------------------------- [ 603.137144] blogbench/2036 is trying to acquire lock: [ 603.137149] 00000000f0f51107 (&(&fc->bg_lock)->rlock){+.+.}, at: fuse_request_end+0xdf/0x1c0 [fuse] [ 603.140701] [ 603.140701] but task is already holding lock: [ 603.140703] 00000000f0f51107 (&(&fc->bg_lock)->rlock){+.+.}, at: fuse_simple_background+0x92/0x1d0 [fuse] [ 603.140713] [ 603.140713] other info that might help us debug this: [ 603.140714] Possible unsafe locking scenario: [ 603.140714] [ 603.140715] CPU0 [ 603.140716] ---- [ 603.140716] lock(&(&fc->bg_lock)->rlock); [ 603.140718] lock(&(&fc->bg_lock)->rlock); [ 603.140719] [ 603.140719] *** DEADLOCK *** Signed-off-by: Vivek Goyal Signed-off-by: Miklos Szeredi --- fs/fuse/virtio_fs.c | 37 +++++++++++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index e22a0c003c3d2..7ea58606cc1d3 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -30,6 +30,7 @@ struct virtio_fs_vq { struct virtqueue *vq; /* protected by ->lock */ struct work_struct done_work; struct list_head queued_reqs; + struct list_head end_reqs; /* End these requests */ struct delayed_work dispatch_work; struct fuse_dev *fud; bool connected; @@ -259,8 +260,27 @@ static void virtio_fs_hiprio_done_work(struct work_struct *work) spin_unlock(&fsvq->lock); } -static void virtio_fs_dummy_dispatch_work(struct work_struct *work) +static void virtio_fs_request_dispatch_work(struct work_struct *work) { + struct fuse_req *req; + struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, + dispatch_work.work); + struct fuse_conn *fc = fsvq->fud->fc; + + pr_debug("virtio-fs: worker %s called.\n", __func__); + while (1) { + spin_lock(&fsvq->lock); + req = list_first_entry_or_null(&fsvq->end_reqs, struct fuse_req, + list); + if (!req) { + spin_unlock(&fsvq->lock); + return; + } + + list_del_init(&req->list); + spin_unlock(&fsvq->lock); + fuse_request_end(fc, req); + } } static void virtio_fs_hiprio_dispatch_work(struct work_struct *work) @@ -502,6 +522,7 @@ static int virtio_fs_setup_vqs(struct virtio_device *vdev, names[VQ_HIPRIO] = fs->vqs[VQ_HIPRIO].name; INIT_WORK(&fs->vqs[VQ_HIPRIO].done_work, virtio_fs_hiprio_done_work); INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].queued_reqs); + INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].end_reqs); INIT_DELAYED_WORK(&fs->vqs[VQ_HIPRIO].dispatch_work, virtio_fs_hiprio_dispatch_work); spin_lock_init(&fs->vqs[VQ_HIPRIO].lock); @@ -511,8 +532,9 @@ static int virtio_fs_setup_vqs(struct virtio_device *vdev, spin_lock_init(&fs->vqs[i].lock); INIT_WORK(&fs->vqs[i].done_work, virtio_fs_requests_done_work); INIT_DELAYED_WORK(&fs->vqs[i].dispatch_work, - virtio_fs_dummy_dispatch_work); + virtio_fs_request_dispatch_work); INIT_LIST_HEAD(&fs->vqs[i].queued_reqs); + INIT_LIST_HEAD(&fs->vqs[i].end_reqs); snprintf(fs->vqs[i].name, sizeof(fs->vqs[i].name), "requests.%u", i - VQ_REQUEST); callbacks[i] = virtio_fs_vq_done; @@ -918,6 +940,7 @@ __releases(fiq->lock) struct fuse_conn *fc; struct fuse_req *req; struct fuse_pqueue *fpq; + struct virtio_fs_vq *fsvq; int ret; WARN_ON(list_empty(&fiq->pending)); @@ -951,7 +974,8 @@ __releases(fiq->lock) smp_mb__after_atomic(); retry: - ret = virtio_fs_enqueue_req(&fs->vqs[queue_id], req); + fsvq = &fs->vqs[queue_id]; + ret = virtio_fs_enqueue_req(fsvq, req); if (ret < 0) { if (ret == -ENOMEM || ret == -ENOSPC) { /* Virtqueue full. Retry submission */ @@ -965,7 +989,12 @@ __releases(fiq->lock) clear_bit(FR_SENT, &req->flags); list_del_init(&req->list); spin_unlock(&fpq->lock); - fuse_request_end(fc, req); + + /* Can't end request in submission context. Use a worker */ + spin_lock(&fsvq->lock); + list_add_tail(&req->list, &fsvq->end_reqs); + schedule_delayed_work(&fsvq->dispatch_work, 0); + spin_unlock(&fsvq->lock); return; } } From 7ee1e2e631dbf0ff0df2a67a1e01ba3c1dce7a46 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Tue, 15 Oct 2019 13:46:23 -0400 Subject: [PATCH 06/14] virtiofs: No need to check fpq->connected state In virtiofs we keep per queue connected state in virtio_fs_vq->connected and use that to end request if queue is not connected. And virtiofs does not even touch fpq->connected state. We probably need to merge these two at some point of time. For now, simplify the code a bit and do not worry about checking state of fpq->connected. Signed-off-by: Vivek Goyal Signed-off-by: Miklos Szeredi --- fs/fuse/virtio_fs.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 7ea58606cc1d3..a2724b77221d6 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -960,13 +960,6 @@ __releases(fiq->lock) fpq = &fs->vqs[queue_id].fud->pq; spin_lock(&fpq->lock); - if (!fpq->connected) { - spin_unlock(&fpq->lock); - req->out.h.error = -ENODEV; - pr_err("virtio-fs: %s disconnected\n", __func__); - fuse_request_end(fc, req); - return; - } list_add_tail(&req->list, fpq->processing); spin_unlock(&fpq->lock); set_bit(FR_SENT, &req->flags); From 5dbe190f341206a7896f7e40c1e3a36933d812f3 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Tue, 15 Oct 2019 13:46:24 -0400 Subject: [PATCH 07/14] virtiofs: Set FR_SENT flag only after request has been sent FR_SENT flag should be set when request has been sent successfully sent over virtqueue. This is used by interrupt logic to figure out if interrupt request should be sent or not. Also add it to fqp->processing list after sending it successfully. Signed-off-by: Vivek Goyal Signed-off-by: Miklos Szeredi --- fs/fuse/virtio_fs.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index a2724b77221d6..6d153e70c87b0 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -857,6 +857,7 @@ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, unsigned int i; int ret; bool notify; + struct fuse_pqueue *fpq; /* Does the sglist fit on the stack? */ total_sgs = sg_count_fuse_req(req); @@ -911,6 +912,15 @@ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, goto out; } + /* Request successfully sent. */ + fpq = &fsvq->fud->pq; + spin_lock(&fpq->lock); + list_add_tail(&req->list, fpq->processing); + spin_unlock(&fpq->lock); + set_bit(FR_SENT, &req->flags); + /* matches barrier in request_wait_answer() */ + smp_mb__after_atomic(); + fsvq->in_flight++; notify = virtqueue_kick_prepare(vq); @@ -939,7 +949,6 @@ __releases(fiq->lock) struct virtio_fs *fs; struct fuse_conn *fc; struct fuse_req *req; - struct fuse_pqueue *fpq; struct virtio_fs_vq *fsvq; int ret; @@ -958,14 +967,6 @@ __releases(fiq->lock) req->in.h.nodeid, req->in.h.len, fuse_len_args(req->args->out_numargs, req->args->out_args)); - fpq = &fs->vqs[queue_id].fud->pq; - spin_lock(&fpq->lock); - list_add_tail(&req->list, fpq->processing); - spin_unlock(&fpq->lock); - set_bit(FR_SENT, &req->flags); - /* matches barrier in request_wait_answer() */ - smp_mb__after_atomic(); - retry: fsvq = &fs->vqs[queue_id]; ret = virtio_fs_enqueue_req(fsvq, req); @@ -978,10 +979,6 @@ __releases(fiq->lock) } req->out.h.error = ret; pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", ret); - spin_lock(&fpq->lock); - clear_bit(FR_SENT, &req->flags); - list_del_init(&req->list); - spin_unlock(&fpq->lock); /* Can't end request in submission context. Use a worker */ spin_lock(&fsvq->lock); From c17ea009610366146ec409fd6dc277e0f2510b10 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Tue, 15 Oct 2019 13:46:25 -0400 Subject: [PATCH 08/14] virtiofs: Count pending forgets as in_flight forgets If virtqueue is full, we put forget requests on a list and these forgets are dispatched later using a worker. As of now we don't count these forgets in fsvq->in_flight variable. This means when queue is being drained, we have to have special logic to first drain these pending requests and then wait for fsvq->in_flight to go to zero. By counting pending forgets in fsvq->in_flight, we can get rid of special logic and just wait for in_flight to go to zero. Worker thread will kick and drain all the forgets anyway, leading in_flight to zero. I also need similar logic for normal request queue in next patch where I am about to defer request submission in the worker context if queue is full. This simplifies the code a bit. Also add two helper functions to inc/dec in_flight. Decrement in_flight helper will later used to call completion when in_flight reaches zero. Signed-off-by: Vivek Goyal Signed-off-by: Miklos Szeredi --- fs/fuse/virtio_fs.c | 44 ++++++++++++++++++++------------------------ 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 6d153e70c87b0..3ea613d5e34f2 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -67,6 +67,19 @@ static inline struct fuse_pqueue *vq_to_fpq(struct virtqueue *vq) return &vq_to_fsvq(vq)->fud->pq; } +/* Should be called with fsvq->lock held. */ +static inline void inc_in_flight_req(struct virtio_fs_vq *fsvq) +{ + fsvq->in_flight++; +} + +/* Should be called with fsvq->lock held. */ +static inline void dec_in_flight_req(struct virtio_fs_vq *fsvq) +{ + WARN_ON(fsvq->in_flight <= 0); + fsvq->in_flight--; +} + static void release_virtio_fs_obj(struct kref *ref) { struct virtio_fs *vfs = container_of(ref, struct virtio_fs, refcount); @@ -110,22 +123,6 @@ static void virtio_fs_drain_queue(struct virtio_fs_vq *fsvq) flush_delayed_work(&fsvq->dispatch_work); } -static inline void drain_hiprio_queued_reqs(struct virtio_fs_vq *fsvq) -{ - struct virtio_fs_forget *forget; - - spin_lock(&fsvq->lock); - while (1) { - forget = list_first_entry_or_null(&fsvq->queued_reqs, - struct virtio_fs_forget, list); - if (!forget) - break; - list_del(&forget->list); - kfree(forget); - } - spin_unlock(&fsvq->lock); -} - static void virtio_fs_drain_all_queues(struct virtio_fs *fs) { struct virtio_fs_vq *fsvq; @@ -133,9 +130,6 @@ static void virtio_fs_drain_all_queues(struct virtio_fs *fs) for (i = 0; i < fs->nvqs; i++) { fsvq = &fs->vqs[i]; - if (i == VQ_HIPRIO) - drain_hiprio_queued_reqs(fsvq); - virtio_fs_drain_queue(fsvq); } } @@ -254,7 +248,7 @@ static void virtio_fs_hiprio_done_work(struct work_struct *work) while ((req = virtqueue_get_buf(vq, &len)) != NULL) { kfree(req); - fsvq->in_flight--; + dec_in_flight_req(fsvq); } } while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq))); spin_unlock(&fsvq->lock); @@ -306,6 +300,7 @@ static void virtio_fs_hiprio_dispatch_work(struct work_struct *work) list_del(&forget->list); if (!fsvq->connected) { + dec_in_flight_req(fsvq); spin_unlock(&fsvq->lock); kfree(forget); continue; @@ -327,13 +322,13 @@ static void virtio_fs_hiprio_dispatch_work(struct work_struct *work) } else { pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n", ret); + dec_in_flight_req(fsvq); kfree(forget); } spin_unlock(&fsvq->lock); return; } - fsvq->in_flight++; notify = virtqueue_kick_prepare(vq); spin_unlock(&fsvq->lock); @@ -472,7 +467,7 @@ static void virtio_fs_requests_done_work(struct work_struct *work) fuse_request_end(fc, req); spin_lock(&fsvq->lock); - fsvq->in_flight--; + dec_in_flight_req(fsvq); spin_unlock(&fsvq->lock); } } @@ -730,6 +725,7 @@ __releases(fiq->lock) list_add_tail(&forget->list, &fsvq->queued_reqs); schedule_delayed_work(&fsvq->dispatch_work, msecs_to_jiffies(1)); + inc_in_flight_req(fsvq); } else { pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n", ret); @@ -739,7 +735,7 @@ __releases(fiq->lock) goto out; } - fsvq->in_flight++; + inc_in_flight_req(fsvq); notify = virtqueue_kick_prepare(vq); spin_unlock(&fsvq->lock); @@ -921,7 +917,7 @@ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, /* matches barrier in request_wait_answer() */ smp_mb__after_atomic(); - fsvq->in_flight++; + inc_in_flight_req(fsvq); notify = virtqueue_kick_prepare(vq); spin_unlock(&fsvq->lock); From a9bfd9dd3417561d06c81de04f6d6c1e0c9b3d44 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Tue, 15 Oct 2019 13:46:26 -0400 Subject: [PATCH 09/14] virtiofs: Retry request submission from worker context If regular request queue gets full, currently we sleep for a bit and retrying submission in submitter's context. This assumes submitter is not holding any spin lock. But this assumption is not true for background requests. For background requests, we are called with fc->bg_lock held. This can lead to deadlock where one thread is trying submission with fc->bg_lock held while request completion thread has called fuse_request_end() which tries to acquire fc->bg_lock and gets blocked. As request completion thread gets blocked, it does not make further progress and that means queue does not get empty and submitter can't submit more requests. To solve this issue, retry submission with the help of a worker, instead of retrying in submitter's context. We already do this for hiprio/forget requests. Reported-by: Chirantan Ekbote Signed-off-by: Vivek Goyal Signed-off-by: Miklos Szeredi --- fs/fuse/virtio_fs.c | 61 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 52 insertions(+), 9 deletions(-) diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 3ea613d5e34f2..2de8fc0d6a246 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -55,6 +55,9 @@ struct virtio_fs_forget { struct list_head list; }; +static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, + struct fuse_req *req, bool in_flight); + static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq) { struct virtio_fs *fs = vq->vdev->priv; @@ -260,6 +263,7 @@ static void virtio_fs_request_dispatch_work(struct work_struct *work) struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, dispatch_work.work); struct fuse_conn *fc = fsvq->fud->fc; + int ret; pr_debug("virtio-fs: worker %s called.\n", __func__); while (1) { @@ -268,13 +272,45 @@ static void virtio_fs_request_dispatch_work(struct work_struct *work) list); if (!req) { spin_unlock(&fsvq->lock); - return; + break; } list_del_init(&req->list); spin_unlock(&fsvq->lock); fuse_request_end(fc, req); } + + /* Dispatch pending requests */ + while (1) { + spin_lock(&fsvq->lock); + req = list_first_entry_or_null(&fsvq->queued_reqs, + struct fuse_req, list); + if (!req) { + spin_unlock(&fsvq->lock); + return; + } + list_del_init(&req->list); + spin_unlock(&fsvq->lock); + + ret = virtio_fs_enqueue_req(fsvq, req, true); + if (ret < 0) { + if (ret == -ENOMEM || ret == -ENOSPC) { + spin_lock(&fsvq->lock); + list_add_tail(&req->list, &fsvq->queued_reqs); + schedule_delayed_work(&fsvq->dispatch_work, + msecs_to_jiffies(1)); + spin_unlock(&fsvq->lock); + return; + } + req->out.h.error = ret; + spin_lock(&fsvq->lock); + dec_in_flight_req(fsvq); + spin_unlock(&fsvq->lock); + pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", + ret); + fuse_request_end(fc, req); + } + } } static void virtio_fs_hiprio_dispatch_work(struct work_struct *work) @@ -837,7 +873,7 @@ static unsigned int sg_init_fuse_args(struct scatterlist *sg, /* Add a request to a virtqueue and kick the device */ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, - struct fuse_req *req) + struct fuse_req *req, bool in_flight) { /* requests need at least 4 elements */ struct scatterlist *stack_sgs[6]; @@ -917,7 +953,8 @@ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, /* matches barrier in request_wait_answer() */ smp_mb__after_atomic(); - inc_in_flight_req(fsvq); + if (!in_flight) + inc_in_flight_req(fsvq); notify = virtqueue_kick_prepare(vq); spin_unlock(&fsvq->lock); @@ -963,15 +1000,21 @@ __releases(fiq->lock) req->in.h.nodeid, req->in.h.len, fuse_len_args(req->args->out_numargs, req->args->out_args)); -retry: fsvq = &fs->vqs[queue_id]; - ret = virtio_fs_enqueue_req(fsvq, req); + ret = virtio_fs_enqueue_req(fsvq, req, false); if (ret < 0) { if (ret == -ENOMEM || ret == -ENOSPC) { - /* Virtqueue full. Retry submission */ - /* TODO use completion instead of timeout */ - usleep_range(20, 30); - goto retry; + /* + * Virtqueue full. Retry submission from worker + * context as we might be holding fc->bg_lock. + */ + spin_lock(&fsvq->lock); + list_add_tail(&req->list, &fsvq->queued_reqs); + inc_in_flight_req(fsvq); + schedule_delayed_work(&fsvq->dispatch_work, + msecs_to_jiffies(1)); + spin_unlock(&fsvq->lock); + return; } req->out.h.error = ret; pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", ret); From 80da5a809d193c60d090cbdf4fe316781bc07965 Mon Sep 17 00:00:00 2001 From: zhengbin Date: Wed, 23 Oct 2019 10:02:49 +0800 Subject: [PATCH 10/14] virtiofs: Remove set but not used variable 'fc' Fixes gcc '-Wunused-but-set-variable' warning: fs/fuse/virtio_fs.c: In function virtio_fs_wake_pending_and_unlock: fs/fuse/virtio_fs.c:983:20: warning: variable fc set but not used [-Wunused-but-set-variable] It is not used since commit 7ee1e2e631db ("virtiofs: No need to check fpq->connected state") Reported-by: Hulk Robot Signed-off-by: zhengbin Signed-off-by: Miklos Szeredi --- fs/fuse/virtio_fs.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 2de8fc0d6a246..a5c86048b96ed 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -980,7 +980,6 @@ __releases(fiq->lock) { unsigned int queue_id = VQ_REQUEST; /* TODO multiqueue */ struct virtio_fs *fs; - struct fuse_conn *fc; struct fuse_req *req; struct virtio_fs_vq *fsvq; int ret; @@ -993,7 +992,6 @@ __releases(fiq->lock) spin_unlock(&fiq->lock); fs = fiq->priv; - fc = fs->vqs[queue_id].fud->fc; pr_debug("%s: opcode %u unique %#llx nodeid %#llx in.len %u out.len %u\n", __func__, req->in.h.opcode, req->in.h.unique, From b24e7598db62386a95a3c8b9c75630c5d56fe077 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 23 Oct 2019 14:26:37 +0200 Subject: [PATCH 11/14] fuse: flush dirty data/metadata before non-truncate setattr If writeback cache is enabled, then writes might get reordered with chmod/chown/utimes. The problem with this is that performing the write in the fuse daemon might itself change some of these attributes. In such case the following sequence of operations will result in file ending up with the wrong mode, for example: int fd = open ("suid", O_WRONLY|O_CREAT|O_EXCL); write (fd, "1", 1); fchown (fd, 0, 0); fchmod (fd, 04755); close (fd); This patch fixes this by flushing pending writes before performing chown/chmod/utimes. Reported-by: Giuseppe Scrivano Tested-by: Giuseppe Scrivano Fixes: 4d99ff8f12eb ("fuse: Turn writeback cache on") Cc: # v3.15+ Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index b77954a275384..54d638f9ba1ca 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1522,6 +1522,19 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, is_truncate = true; } + /* Flush dirty data/metadata before non-truncate SETATTR */ + if (is_wb && S_ISREG(inode->i_mode) && + attr->ia_valid & + (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_MTIME_SET | + ATTR_TIMES_SET)) { + err = write_inode_now(inode, true); + if (err) + return err; + + fuse_set_nowrite(inode); + fuse_release_nowrite(inode); + } + if (is_truncate) { fuse_set_nowrite(inode); set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); From e4648309b85a78f8c787457832269a8712a8673e Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 23 Oct 2019 14:26:37 +0200 Subject: [PATCH 12/14] fuse: truncate pending writes on O_TRUNC Make sure cached writes are not reordered around open(..., O_TRUNC), with the obvious wrong results. Fixes: 4d99ff8f12eb ("fuse: Turn writeback cache on") Cc: # v3.15+ Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 0f0225686aeeb..6edf949b91397 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -217,7 +217,7 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir) { struct fuse_conn *fc = get_fuse_conn(inode); int err; - bool lock_inode = (file->f_flags & O_TRUNC) && + bool is_wb_truncate = (file->f_flags & O_TRUNC) && fc->atomic_o_trunc && fc->writeback_cache; @@ -225,16 +225,20 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir) if (err) return err; - if (lock_inode) + if (is_wb_truncate) { inode_lock(inode); + fuse_set_nowrite(inode); + } err = fuse_do_open(fc, get_node_id(inode), file, isdir); if (!err) fuse_finish_open(inode, file); - if (lock_inode) + if (is_wb_truncate) { + fuse_release_nowrite(inode); inode_unlock(inode); + } return err; } From 9de55a37fcc5f1550a743910f493197223f5e384 Mon Sep 17 00:00:00 2001 From: Alan Somers Date: Mon, 19 Aug 2019 11:10:30 -0600 Subject: [PATCH 13/14] fuse: Add changelog entries for protocols 7.1 - 7.8 Retroactively add changelog entry for FUSE protocols 7.1 through 7.8. Signed-off-by: Alan Somers Signed-off-by: Miklos Szeredi --- include/uapi/linux/fuse.h | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 802b0377a49e5..373cada898159 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -38,6 +38,43 @@ * * Protocol changelog: * + * 7.1: + * - add the following messages: + * FUSE_SETATTR, FUSE_SYMLINK, FUSE_MKNOD, FUSE_MKDIR, FUSE_UNLINK, + * FUSE_RMDIR, FUSE_RENAME, FUSE_LINK, FUSE_OPEN, FUSE_READ, FUSE_WRITE, + * FUSE_RELEASE, FUSE_FSYNC, FUSE_FLUSH, FUSE_SETXATTR, FUSE_GETXATTR, + * FUSE_LISTXATTR, FUSE_REMOVEXATTR, FUSE_OPENDIR, FUSE_READDIR, + * FUSE_RELEASEDIR + * - add padding to messages to accommodate 32-bit servers on 64-bit kernels + * + * 7.2: + * - add FOPEN_DIRECT_IO and FOPEN_KEEP_CACHE flags + * - add FUSE_FSYNCDIR message + * + * 7.3: + * - add FUSE_ACCESS message + * - add FUSE_CREATE message + * - add filehandle to fuse_setattr_in + * + * 7.4: + * - add frsize to fuse_kstatfs + * - clean up request size limit checking + * + * 7.5: + * - add flags and max_write to fuse_init_out + * + * 7.6: + * - add max_readahead to fuse_init_in and fuse_init_out + * + * 7.7: + * - add FUSE_INTERRUPT message + * - add POSIX file lock support + * + * 7.8: + * - add lock_owner and flags fields to fuse_release_in + * - add FUSE_BMAP message + * - add FUSE_DESTROY message + * * 7.9: * - new fuse_getattr_in input argument of GETATTR * - add lk_flags in fuse_lk_in From 091d1a7267726ba162b12ce9332d76cdae602789 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Mon, 19 Aug 2019 08:48:26 +0300 Subject: [PATCH 14/14] fuse: redundant get_fuse_inode() calls in fuse_writepages_fill() Currently fuse_writepages_fill() calls get_fuse_inode() few times with the same argument. Signed-off-by: Vasily Averin Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 6edf949b91397..db48a5cf86209 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2001,7 +2001,7 @@ static int fuse_writepages_fill(struct page *page, if (!data->ff) { err = -EIO; - data->ff = fuse_write_file_get(fc, get_fuse_inode(inode)); + data->ff = fuse_write_file_get(fc, fi); if (!data->ff) goto out_unlock; } @@ -2046,8 +2046,6 @@ static int fuse_writepages_fill(struct page *page, * under writeback, so we can release the page lock. */ if (data->wpa == NULL) { - struct fuse_inode *fi = get_fuse_inode(inode); - err = -ENOMEM; wpa = fuse_writepage_args_alloc(); if (!wpa) {