Skip to content

Commit

Permalink
fuse: separate queue for FORGET requests
Browse files Browse the repository at this point in the history
Terje Malmedal reports that a fuse filesystem with 32 million inodes
on a machine with lots of memory can go unresponsive for up to 30
minutes when all those inodes are evicted from the icache.

The reason is that FORGET messages, sent when the inode is evicted,
are queued up together with regular filesystem requests, and while the
huge queue of FORGET messages are processed no other filesystem
operation can proceed.

Since a full fuse request structure is allocated for each inode, these
take up quite a bit of memory as well.

To solve these issues, create a slim 'fuse_forget_link' structure
containing just the minimum of information required to send the FORGET
request and chain these on a separate queue.

When userspace is asking for a request make sure that FORGET and
non-FORGET requests are selected fairly: for each 8 non-FORGET allow
16 FORGET requests.  This will make sure FORGETs do not pile up, yet
other requests are also allowed to proceed while the queued FORGETs
are processed.

Reported-by: Terje Malmedal <terje.malmedal@usit.uio.no>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
  • Loading branch information
Miklos Szeredi committed Dec 7, 2010
1 parent 8ac8350 commit 07e77dc
Show file tree
Hide file tree
Showing 4 changed files with 133 additions and 64 deletions.
86 changes: 77 additions & 9 deletions fs/fuse/dev.c
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,20 @@ static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
kill_fasync(&fc->fasync, SIGIO, POLL_IN);
}

void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
u64 nodeid, u64 nlookup)
{
forget->nodeid = nodeid;
forget->nlookup = nlookup;

spin_lock(&fc->lock);
fc->forget_list_tail->next = forget;
fc->forget_list_tail = forget;
wake_up(&fc->waitq);
kill_fasync(&fc->fasync, SIGIO, POLL_IN);
spin_unlock(&fc->lock);
}

static void flush_bg_queue(struct fuse_conn *fc)
{
while (fc->active_background < fc->max_background &&
Expand Down Expand Up @@ -438,12 +452,6 @@ static void fuse_request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
}
}

void fuse_request_send_noreply(struct fuse_conn *fc, struct fuse_req *req)
{
req->isreply = 0;
fuse_request_send_nowait(fc, req);
}

void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req)
{
req->isreply = 1;
Expand Down Expand Up @@ -896,9 +904,15 @@ static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
return err;
}

static int forget_pending(struct fuse_conn *fc)
{
return fc->forget_list_head.next != NULL;
}

static int request_pending(struct fuse_conn *fc)
{
return !list_empty(&fc->pending) || !list_empty(&fc->interrupts);
return !list_empty(&fc->pending) || !list_empty(&fc->interrupts) ||
forget_pending(fc);
}

/* Wait until a request is available on the pending list */
Expand Down Expand Up @@ -960,6 +974,50 @@ __releases(fc->lock)
return err ? err : reqsize;
}

static struct fuse_forget_link *dequeue_forget(struct fuse_conn *fc)
{
struct fuse_forget_link *forget = fc->forget_list_head.next;

fc->forget_list_head.next = forget->next;
if (fc->forget_list_head.next == NULL)
fc->forget_list_tail = &fc->forget_list_head;

return forget;
}

static int fuse_read_single_forget(struct fuse_conn *fc,
struct fuse_copy_state *cs,
size_t nbytes)
__releases(fc->lock)
{
int err;
struct fuse_forget_link *forget = dequeue_forget(fc);
struct fuse_forget_in arg = {
.nlookup = forget->nlookup,
};
struct fuse_in_header ih = {
.opcode = FUSE_FORGET,
.nodeid = forget->nodeid,
.unique = fuse_get_unique(fc),
.len = sizeof(ih) + sizeof(arg),
};

spin_unlock(&fc->lock);
kfree(forget);
if (nbytes < ih.len)
return -EINVAL;

err = fuse_copy_one(cs, &ih, sizeof(ih));
if (!err)
err = fuse_copy_one(cs, &arg, sizeof(arg));
fuse_copy_finish(cs);

if (err)
return err;

return ih.len;
}

/*
* Read a single request into the userspace filesystem's buffer. This
* function waits until a request is available, then removes it from
Expand Down Expand Up @@ -998,6 +1056,14 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file,
return fuse_read_interrupt(fc, cs, nbytes, req);
}

if (forget_pending(fc)) {
if (list_empty(&fc->pending) || fc->forget_batch-- > 0)
return fuse_read_single_forget(fc, cs, nbytes);

if (fc->forget_batch <= -8)
fc->forget_batch = 16;
}

req = list_entry(fc->pending.next, struct fuse_req, list);
req->state = FUSE_REQ_READING;
list_move(&req->list, &fc->io);
Expand Down Expand Up @@ -1090,7 +1156,7 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
if (!fc)
return -EPERM;

bufs = kmalloc(pipe->buffers * sizeof (struct pipe_buffer), GFP_KERNEL);
bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
if (!bufs)
return -ENOMEM;

Expand Down Expand Up @@ -1626,7 +1692,7 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
if (!fc)
return -EPERM;

bufs = kmalloc(pipe->buffers * sizeof (struct pipe_buffer), GFP_KERNEL);
bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
if (!bufs)
return -ENOMEM;

Expand Down Expand Up @@ -1770,6 +1836,8 @@ __acquires(fc->lock)
flush_bg_queue(fc);
end_requests(fc, &fc->pending);
end_requests(fc, &fc->processing);
while (forget_pending(fc))
kfree(dequeue_forget(fc));
}

/*
Expand Down
53 changes: 26 additions & 27 deletions fs/fuse/dir.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@

#include <linux/pagemap.h>
#include <linux/file.h>
#include <linux/gfp.h>
#include <linux/sched.h>
#include <linux/namei.h>
#include <linux/slab.h>

#if BITS_PER_LONG >= 64
static inline void fuse_dentry_settime(struct dentry *entry, u64 time)
Expand Down Expand Up @@ -165,7 +165,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
struct fuse_entry_out outarg;
struct fuse_conn *fc;
struct fuse_req *req;
struct fuse_req *forget_req;
struct fuse_forget_link *forget;
struct dentry *parent;
u64 attr_version;

Expand All @@ -178,8 +178,8 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
if (IS_ERR(req))
return 0;

forget_req = fuse_get_req(fc);
if (IS_ERR(forget_req)) {
forget = fuse_alloc_forget();
if (!forget) {
fuse_put_request(fc, req);
return 0;
}
Expand All @@ -199,15 +199,14 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
if (!err) {
struct fuse_inode *fi = get_fuse_inode(inode);
if (outarg.nodeid != get_node_id(inode)) {
fuse_send_forget(fc, forget_req,
outarg.nodeid, 1);
fuse_queue_forget(fc, forget, outarg.nodeid, 1);
return 0;
}
spin_lock(&fc->lock);
fi->nlookup++;
spin_unlock(&fc->lock);
}
fuse_put_request(fc, forget_req);
kfree(forget);
if (err || (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
return 0;

Expand Down Expand Up @@ -259,7 +258,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
{
struct fuse_conn *fc = get_fuse_conn_super(sb);
struct fuse_req *req;
struct fuse_req *forget_req;
struct fuse_forget_link *forget;
u64 attr_version;
int err;

Expand All @@ -273,9 +272,9 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
if (IS_ERR(req))
goto out;

forget_req = fuse_get_req(fc);
err = PTR_ERR(forget_req);
if (IS_ERR(forget_req)) {
forget = fuse_alloc_forget();
err = -ENOMEM;
if (!forget) {
fuse_put_request(fc, req);
goto out;
}
Expand All @@ -301,13 +300,13 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
attr_version);
err = -ENOMEM;
if (!*inode) {
fuse_send_forget(fc, forget_req, outarg->nodeid, 1);
fuse_queue_forget(fc, forget, outarg->nodeid, 1);
goto out;
}
err = 0;

out_put_forget:
fuse_put_request(fc, forget_req);
kfree(forget);
out:
return err;
}
Expand Down Expand Up @@ -374,7 +373,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
struct inode *inode;
struct fuse_conn *fc = get_fuse_conn(dir);
struct fuse_req *req;
struct fuse_req *forget_req;
struct fuse_forget_link *forget;
struct fuse_create_in inarg;
struct fuse_open_out outopen;
struct fuse_entry_out outentry;
Expand All @@ -388,9 +387,9 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
if (flags & O_DIRECT)
return -EINVAL;

forget_req = fuse_get_req(fc);
if (IS_ERR(forget_req))
return PTR_ERR(forget_req);
forget = fuse_alloc_forget();
if (!forget)
return -ENOMEM;

req = fuse_get_req(fc);
err = PTR_ERR(req);
Expand Down Expand Up @@ -448,10 +447,10 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
if (!inode) {
flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
fuse_sync_release(ff, flags);
fuse_send_forget(fc, forget_req, outentry.nodeid, 1);
fuse_queue_forget(fc, forget, outentry.nodeid, 1);
return -ENOMEM;
}
fuse_put_request(fc, forget_req);
kfree(forget);
d_instantiate(entry, inode);
fuse_change_entry_timeout(entry, &outentry);
fuse_invalidate_attr(dir);
Expand All @@ -469,7 +468,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
out_put_request:
fuse_put_request(fc, req);
out_put_forget_req:
fuse_put_request(fc, forget_req);
kfree(forget);
return err;
}

Expand All @@ -483,12 +482,12 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
struct fuse_entry_out outarg;
struct inode *inode;
int err;
struct fuse_req *forget_req;
struct fuse_forget_link *forget;

forget_req = fuse_get_req(fc);
if (IS_ERR(forget_req)) {
forget = fuse_alloc_forget();
if (!forget) {
fuse_put_request(fc, req);
return PTR_ERR(forget_req);
return -ENOMEM;
}

memset(&outarg, 0, sizeof(outarg));
Expand All @@ -515,10 +514,10 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
&outarg.attr, entry_attr_timeout(&outarg), 0);
if (!inode) {
fuse_send_forget(fc, forget_req, outarg.nodeid, 1);
fuse_queue_forget(fc, forget, outarg.nodeid, 1);
return -ENOMEM;
}
fuse_put_request(fc, forget_req);
kfree(forget);

if (S_ISDIR(inode->i_mode)) {
struct dentry *alias;
Expand All @@ -541,7 +540,7 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
return 0;

out_put_forget_req:
fuse_put_request(fc, forget_req);
kfree(forget);
return err;
}

Expand Down
28 changes: 19 additions & 9 deletions fs/fuse/fuse_i.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,13 @@ extern struct mutex fuse_mutex;
extern unsigned max_user_bgreq;
extern unsigned max_user_congthresh;

/* One forget request */
struct fuse_forget_link {
u64 nodeid;
u64 nlookup;
struct fuse_forget_link *next;
};

/** FUSE inode */
struct fuse_inode {
/** Inode data */
Expand All @@ -66,7 +73,7 @@ struct fuse_inode {
u64 nlookup;

/** The request used for sending the FORGET message */
struct fuse_req *forget_req;
struct fuse_forget_link *forget;

/** Time in jiffies until the file attributes are valid */
u64 i_time;
Expand Down Expand Up @@ -255,7 +262,6 @@ struct fuse_req {

/** Data for asynchronous requests */
union {
struct fuse_forget_in forget_in;
struct {
struct fuse_release_in in;
struct path path;
Expand Down Expand Up @@ -369,6 +375,13 @@ struct fuse_conn {
/** Pending interrupts */
struct list_head interrupts;

/** Queue of pending forgets */
struct fuse_forget_link forget_list_head;
struct fuse_forget_link *forget_list_tail;

/** Batching of FORGET requests (positive indicates FORGET batch) */
int forget_batch;

/** Flag indicating if connection is blocked. This will be
the case before the INIT reply is received, and if there
are too many outstading backgrounds requests */
Expand Down Expand Up @@ -543,8 +556,10 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
/**
* Send FORGET command
*/
void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req,
u64 nodeid, u64 nlookup);
void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
u64 nodeid, u64 nlookup);

struct fuse_forget_link *fuse_alloc_forget(void);

/**
* Initialize READ or READDIR request
Expand Down Expand Up @@ -655,11 +670,6 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req);
*/
void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req);

/**
* Send a request with no reply
*/
void fuse_request_send_noreply(struct fuse_conn *fc, struct fuse_req *req);

/**
* Send a request in the background
*/
Expand Down
Loading

0 comments on commit 07e77dc

Please sign in to comment.