Skip to content

Commit

Permalink
---
Browse files Browse the repository at this point in the history
yaml
---
r: 29774
b: refs/heads/master
c: 51eb01e
h: refs/heads/master
v: v3
  • Loading branch information
Miklos Szeredi authored and Linus Torvalds committed Jun 25, 2006
1 parent ca780c9 commit 1c818fa
Show file tree
Hide file tree
Showing 7 changed files with 113 additions and 254 deletions.
2 changes: 1 addition & 1 deletion [refs]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
---
refs/heads/master: 3e8c54fad89144b8d63cc41619f363df1ec7cc42
refs/heads/master: 51eb01e73599efb88c6c20b1c226d20309a75450
40 changes: 6 additions & 34 deletions trunk/Documentation/filesystems/fuse.txt
Original file line number Diff line number Diff line change
Expand Up @@ -304,25 +304,7 @@ Scenario 1 - Simple deadlock
| | for "file"]
| | *DEADLOCK*

The solution for this is to allow requests to be interrupted while
they are in userspace:

| [interrupted by signal] |
| <fuse_unlink() |
| [release semaphore] | [semaphore acquired]
| <sys_unlink() |
| | >fuse_unlink()
| | [queue req on fc->pending]
| | [wake up fc->waitq]
| | [sleep on req->waitq]

If the filesystem daemon was single threaded, this will stop here,
since there's no other thread to dequeue and execute the request.
In this case the solution is to kill the FUSE daemon as well. If
there are multiple serving threads, you just have to kill them as
long as any remain.

Moral: a filesystem which deadlocks, can soon find itself dead.
The solution for this is to allow the filesystem to be aborted.

Scenario 2 - Tricky deadlock
----------------------------
Expand Down Expand Up @@ -355,24 +337,14 @@ but is caused by a pagefault.
| | [lock page]
| | * DEADLOCK *

Solution is again to let the the request be interrupted (not
elaborated further).
Solution is basically the same as above.

An additional problem is that while the write buffer is being
copied to the request, the request must not be interrupted. This
is because the destination address of the copy may not be valid
after the request is interrupted.

This is solved with doing the copy atomically, and allowing
interruption while the page(s) belonging to the write buffer are
faulted with get_user_pages(). The 'req->locked' flag indicates
when the copy is taking place, and interruption is delayed until
this flag is unset.

Scenario 3 - Tricky deadlock with asynchronous read
---------------------------------------------------

The same situation as above, except thread-1 will wait on page lock
and hence it will be uninterruptible as well. The solution is to
abort the connection with forced umount (if mount is attached) or
through the abort attribute in sysfs.
This is solved with doing the copy atomically, and allowing abort
while the page(s) belonging to the write buffer are faulted with
get_user_pages(). The 'req->locked' flag indicates when the copy is
taking place, and abort is delayed until this flag is unset.
157 changes: 44 additions & 113 deletions trunk/fs/fuse/dev.c
Original file line number Diff line number Diff line change
Expand Up @@ -64,18 +64,6 @@ static void restore_sigs(sigset_t *oldset)
sigprocmask(SIG_SETMASK, oldset, NULL);
}

/*
* Reset request, so that it can be reused
*
* The caller must be _very_ careful to make sure, that it is holding
* the only reference to req
*/
void fuse_reset_request(struct fuse_req *req)
{
BUG_ON(atomic_read(&req->count) != 1);
fuse_request_init(req);
}

static void __fuse_get_request(struct fuse_req *req)
{
atomic_inc(&req->count);
Expand Down Expand Up @@ -103,6 +91,10 @@ struct fuse_req *fuse_get_req(struct fuse_conn *fc)
if (intr)
goto out;

err = -ENOTCONN;
if (!fc->connected)
goto out;

req = fuse_request_alloc();
err = -ENOMEM;
if (!req)
Expand All @@ -128,114 +120,39 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
}
}

/*
* Called with sbput_sem held for read (request_end) or write
* (fuse_put_super). By the time fuse_put_super() is finished, all
* inodes belonging to background requests must be released, so the
* iputs have to be done within the locked region.
*/
void fuse_release_background(struct fuse_conn *fc, struct fuse_req *req)
{
iput(req->inode);
iput(req->inode2);
spin_lock(&fc->lock);
list_del(&req->bg_entry);
if (fc->num_background == FUSE_MAX_BACKGROUND) {
fc->blocked = 0;
wake_up_all(&fc->blocked_waitq);
}
fc->num_background--;
spin_unlock(&fc->lock);
}

/*
* This function is called when a request is finished. Either a reply
* has arrived or it was interrupted (and not yet sent) or some error
* occurred during communication with userspace, or the device file
* was closed. In case of a background request the reference to the
* stored objects are released. The requester thread is woken up (if
* still waiting), the 'end' callback is called if given, else the
* reference to the request is released
*
* Releasing extra reference for foreground requests must be done
* within the same locked region as setting state to finished. This
* is because fuse_reset_request() may be called after request is
* finished and it must be the sole possessor. If request is
* interrupted and put in the background, it will return with an error
* and hence never be reset and reused.
* was closed. The requester thread is woken up (if still waiting),
* the 'end' callback is called if given, else the reference to the
* request is released
*
* Called with fc->lock, unlocks it
*/
static void request_end(struct fuse_conn *fc, struct fuse_req *req)
{
void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
req->end = NULL;
list_del(&req->list);
req->state = FUSE_REQ_FINISHED;
if (!req->background) {
spin_unlock(&fc->lock);
wake_up(&req->waitq);
fuse_put_request(fc, req);
} else {
void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
req->end = NULL;
spin_unlock(&fc->lock);
down_read(&fc->sbput_sem);
if (fc->mounted)
fuse_release_background(fc, req);
up_read(&fc->sbput_sem);

/* fput must go outside sbput_sem, otherwise it can deadlock */
if (req->file)
fput(req->file);

if (end)
end(fc, req);
else
fuse_put_request(fc, req);
if (req->background) {
if (fc->num_background == FUSE_MAX_BACKGROUND) {
fc->blocked = 0;
wake_up_all(&fc->blocked_waitq);
}
fc->num_background--;
}
}

/*
* Unfortunately request interruption not just solves the deadlock
* problem, it causes problems too. These stem from the fact, that an
* interrupted request is continued to be processed in userspace,
* while all the locks and object references (inode and file) held
* during the operation are released.
*
* To release the locks is exactly why there's a need to interrupt the
* request, so there's not a lot that can be done about this, except
* introduce additional locking in userspace.
*
* More important is to keep inode and file references until userspace
* has replied, otherwise FORGET and RELEASE could be sent while the
* inode/file is still used by the filesystem.
*
* For this reason the concept of "background" request is introduced.
* An interrupted request is backgrounded if it has been already sent
* to userspace. Backgrounding involves getting an extra reference to
* inode(s) or file used in the request, and adding the request to
* fc->background list. When a reply is received for a background
* request, the object references are released, and the request is
* removed from the list. If the filesystem is unmounted while there
* are still background requests, the list is walked and references
* are released as if a reply was received.
*
* There's one more use for a background request. The RELEASE message is
* always sent as background, since it doesn't return an error or
* data.
*/
static void background_request(struct fuse_conn *fc, struct fuse_req *req)
{
req->background = 1;
list_add(&req->bg_entry, &fc->background);
fc->num_background++;
if (fc->num_background == FUSE_MAX_BACKGROUND)
fc->blocked = 1;
if (req->inode)
req->inode = igrab(req->inode);
if (req->inode2)
req->inode2 = igrab(req->inode2);
spin_unlock(&fc->lock);
dput(req->dentry);
mntput(req->vfsmount);
if (req->file)
get_file(req->file);
fput(req->file);
wake_up(&req->waitq);
if (end)
end(fc, req);
else
fuse_put_request(fc, req);
}

/* Called with fc->lock held. Releases, and then reacquires it. */
Expand All @@ -244,9 +161,14 @@ static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
sigset_t oldset;

spin_unlock(&fc->lock);
block_sigs(&oldset);
wait_event_interruptible(req->waitq, req->state == FUSE_REQ_FINISHED);
restore_sigs(&oldset);
if (req->force)
wait_event(req->waitq, req->state == FUSE_REQ_FINISHED);
else {
block_sigs(&oldset);
wait_event_interruptible(req->waitq,
req->state == FUSE_REQ_FINISHED);
restore_sigs(&oldset);
}
spin_lock(&fc->lock);
if (req->state == FUSE_REQ_FINISHED && !req->interrupted)
return;
Expand All @@ -268,8 +190,11 @@ static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
if (req->state == FUSE_REQ_PENDING) {
list_del(&req->list);
__fuse_put_request(req);
} else if (req->state == FUSE_REQ_SENT)
background_request(fc, req);
} else if (req->state == FUSE_REQ_SENT) {
spin_unlock(&fc->lock);
wait_event(req->waitq, req->state == FUSE_REQ_FINISHED);
spin_lock(&fc->lock);
}
}

static unsigned len_args(unsigned numargs, struct fuse_arg *args)
Expand Down Expand Up @@ -327,8 +252,12 @@ void request_send(struct fuse_conn *fc, struct fuse_req *req)
static void request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
{
spin_lock(&fc->lock);
background_request(fc, req);
if (fc->connected) {
req->background = 1;
fc->num_background++;
if (fc->num_background == FUSE_MAX_BACKGROUND)
fc->blocked = 1;

queue_request(fc, req);
spin_unlock(&fc->lock);
} else {
Expand Down Expand Up @@ -883,10 +812,12 @@ void fuse_abort_conn(struct fuse_conn *fc)
spin_lock(&fc->lock);
if (fc->connected) {
fc->connected = 0;
fc->blocked = 0;
end_io_requests(fc);
end_requests(fc, &fc->pending);
end_requests(fc, &fc->processing);
wake_up_all(&fc->waitq);
wake_up_all(&fc->blocked_waitq);
kill_fasync(&fc->fasync, SIGIO, POLL_IN);
}
spin_unlock(&fc->lock);
Expand Down
Loading

0 comments on commit 1c818fa

Please sign in to comment.