Skip to content

Commit

Permalink
fuse: implement NFS-like readdirplus support
Browse files Browse the repository at this point in the history
This patch implements readdirplus support in FUSE, similar to NFS.
The payload returned in the readdirplus call contains
'fuse_entry_out' structure thereby providing all the necessary inputs
for 'faking' a lookup() operation on the spot.

If the dentry and inode already existed (for e.g. in a re-run of ls -l)
then just the inode attributes timeout and dentry timeout are refreshed.

With a simple client->network->server implementation of a FUSE based
filesystem, the following performance observations were made:

Test: Performing a filesystem crawl over 20,000 files with

sh# time ls -lR /mnt

Without readdirplus:
Run 1: 18.1s
Run 2: 16.0s
Run 3: 16.2s

With readdirplus:
Run 1: 4.1s
Run 2: 3.8s
Run 3: 3.8s

The performance improvement is significant as it avoided 20,000 upcalls
calls (lookup). Cache consistency is no worse than what already is.

Signed-off-by: Anand V. Avati <avati@redhat.com>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
  • Loading branch information
Anand V. Avati authored and Miklos Szeredi committed Jan 24, 2013
1 parent ff7532c commit 0b05b18
Show file tree
Hide file tree
Showing 5 changed files with 197 additions and 5 deletions.
19 changes: 19 additions & 0 deletions fs/fuse/dev.c
Original file line number Diff line number Diff line change
Expand Up @@ -491,6 +491,25 @@ void fuse_request_send_background_locked(struct fuse_conn *fc,
fuse_request_send_nowait_locked(fc, req);
}

void fuse_force_forget(struct file *file, u64 nodeid)
{
struct inode *inode = file->f_path.dentry->d_inode;
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_req *req;
struct fuse_forget_in inarg;

memset(&inarg, 0, sizeof(inarg));
inarg.nlookup = 1;
req = fuse_get_req_nofail(fc, file);
req->in.h.opcode = FUSE_FORGET;
req->in.h.nodeid = nodeid;
req->in.numargs = 1;
req->in.args[0].size = sizeof(inarg);
req->in.args[0].value = &inarg;
req->isreply = 0;
fuse_request_send_nowait(fc, req);
}

/*
* Lock the request. Up to the next unlock_request() there mustn't be
* anything that could cause a page-fault. If the request was already
Expand Down
160 changes: 156 additions & 4 deletions fs/fuse/dir.c
Original file line number Diff line number Diff line change
Expand Up @@ -1155,6 +1155,143 @@ static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
return 0;
}

static int fuse_direntplus_link(struct file *file,
struct fuse_direntplus *direntplus,
u64 attr_version)
{
int err;
struct fuse_entry_out *o = &direntplus->entry_out;
struct fuse_dirent *dirent = &direntplus->dirent;
struct dentry *parent = file->f_path.dentry;
struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
struct dentry *dentry;
struct dentry *alias;
struct inode *dir = parent->d_inode;
struct fuse_conn *fc;
struct inode *inode;

if (!o->nodeid) {
/*
* Unlike in the case of fuse_lookup, zero nodeid does not mean
* ENOENT. Instead, it only means the userspace filesystem did
* not want to return attributes/handle for this entry.
*
* So do nothing.
*/
return 0;
}

if (name.name[0] == '.') {
/*
* We could potentially refresh the attributes of the directory
* and its parent?
*/
if (name.len == 1)
return 0;
if (name.name[1] == '.' && name.len == 2)
return 0;
}
fc = get_fuse_conn(dir);

name.hash = full_name_hash(name.name, name.len);
dentry = d_lookup(parent, &name);
if (dentry && dentry->d_inode) {
inode = dentry->d_inode;
if (get_node_id(inode) == o->nodeid) {
struct fuse_inode *fi;
fi = get_fuse_inode(inode);
spin_lock(&fc->lock);
fi->nlookup++;
spin_unlock(&fc->lock);

/*
* The other branch to 'found' comes via fuse_iget()
* which bumps nlookup inside
*/
goto found;
}
err = d_invalidate(dentry);
if (err)
goto out;
dput(dentry);
dentry = NULL;
}

dentry = d_alloc(parent, &name);
err = -ENOMEM;
if (!dentry)
goto out;

inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
&o->attr, entry_attr_timeout(o), attr_version);
if (!inode)
goto out;

alias = d_materialise_unique(dentry, inode);
err = PTR_ERR(alias);
if (IS_ERR(alias))
goto out;
if (alias) {
dput(dentry);
dentry = alias;
}

found:
fuse_change_attributes(inode, &o->attr, entry_attr_timeout(o),
attr_version);

fuse_change_entry_timeout(dentry, o);

err = 0;
out:
if (dentry)
dput(dentry);
return err;
}

static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
void *dstbuf, filldir_t filldir, u64 attr_version)
{
struct fuse_direntplus *direntplus;
struct fuse_dirent *dirent;
size_t reclen;
int over = 0;
int ret;

while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) {
direntplus = (struct fuse_direntplus *) buf;
dirent = &direntplus->dirent;
reclen = FUSE_DIRENTPLUS_SIZE(direntplus);

if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
return -EIO;
if (reclen > nbytes)
break;

if (!over) {
/* We fill entries into dstbuf only as much as
it can hold. But we still continue iterating
over remaining entries to link them. If not,
we need to send a FORGET for each of those
which we did not link.
*/
over = filldir(dstbuf, dirent->name, dirent->namelen,
file->f_pos, dirent->ino,
dirent->type);
file->f_pos = dirent->off;
}

buf += reclen;
nbytes -= reclen;

ret = fuse_direntplus_link(file, direntplus, attr_version);
if (ret)
fuse_force_forget(file, direntplus->entry_out.nodeid);
}

return 0;
}

static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir)
{
int err;
Expand All @@ -1163,6 +1300,7 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir)
struct inode *inode = file->f_path.dentry->d_inode;
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_req *req;
u64 attr_version = 0;

if (is_bad_inode(inode))
return -EIO;
Expand All @@ -1179,14 +1317,28 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir)
req->out.argpages = 1;
req->num_pages = 1;
req->pages[0] = page;
fuse_read_fill(req, file, file->f_pos, PAGE_SIZE, FUSE_READDIR);
if (fc->do_readdirplus) {
attr_version = fuse_get_attr_version(fc);
fuse_read_fill(req, file, file->f_pos, PAGE_SIZE,
FUSE_READDIRPLUS);
} else {
fuse_read_fill(req, file, file->f_pos, PAGE_SIZE,
FUSE_READDIR);
}
fuse_request_send(fc, req);
nbytes = req->out.args[0].size;
err = req->out.h.error;
fuse_put_request(fc, req);
if (!err)
err = parse_dirfile(page_address(page), nbytes, file, dstbuf,
filldir);
if (!err) {
if (fc->do_readdirplus) {
err = parse_dirplusfile(page_address(page), nbytes,
file, dstbuf, filldir,
attr_version);
} else {
err = parse_dirfile(page_address(page), nbytes, file,
dstbuf, filldir);
}
}

__free_page(page);
fuse_invalidate_attr(inode); /* atime changed */
Expand Down
6 changes: 6 additions & 0 deletions fs/fuse/fuse_i.h
Original file line number Diff line number Diff line change
Expand Up @@ -487,6 +487,9 @@ struct fuse_conn {
/** Use enhanced/automatic page cache invalidation. */
unsigned auto_inval_data:1;

/** Does the filesystem support readdir-plus? */
unsigned do_readdirplus:1;

/** The number of requests waiting for completion */
atomic_t num_waiting;

Expand Down Expand Up @@ -578,6 +581,9 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,

struct fuse_forget_link *fuse_alloc_forget(void);

/* Used by READDIRPLUS */
void fuse_force_forget(struct file *file, u64 nodeid);

/**
* Initialize READ or READDIR request
*/
Expand Down
5 changes: 4 additions & 1 deletion fs/fuse/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -863,6 +863,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
fc->dont_mask = 1;
if (arg->flags & FUSE_AUTO_INVAL_DATA)
fc->auto_inval_data = 1;
if (arg->flags & FUSE_DO_READDIRPLUS)
fc->do_readdirplus = 1;
} else {
ra_pages = fc->max_read / PAGE_CACHE_SIZE;
fc->no_lock = 1;
Expand All @@ -889,7 +891,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK |
FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA;
FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
FUSE_DO_READDIRPLUS;
req->in.h.opcode = FUSE_INIT;
req->in.numargs = 1;
req->in.args[0].size = sizeof(*arg);
Expand Down
12 changes: 12 additions & 0 deletions include/uapi/linux/fuse.h
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,7 @@ struct fuse_file_lock {
#define FUSE_FLOCK_LOCKS (1 << 10)
#define FUSE_HAS_IOCTL_DIR (1 << 11)
#define FUSE_AUTO_INVAL_DATA (1 << 12)
#define FUSE_DO_READDIRPLUS (1 << 13)

/**
* CUSE INIT request/reply flags
Expand Down Expand Up @@ -299,6 +300,7 @@ enum fuse_opcode {
FUSE_NOTIFY_REPLY = 41,
FUSE_BATCH_FORGET = 42,
FUSE_FALLOCATE = 43,
FUSE_READDIRPLUS = 44,

/* CUSE specific operations */
CUSE_INIT = 4096,
Expand Down Expand Up @@ -630,6 +632,16 @@ struct fuse_dirent {
#define FUSE_DIRENT_SIZE(d) \
FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen)

struct fuse_direntplus {
struct fuse_entry_out entry_out;
struct fuse_dirent dirent;
};

#define FUSE_NAME_OFFSET_DIRENTPLUS \
offsetof(struct fuse_direntplus, dirent.name)
#define FUSE_DIRENTPLUS_SIZE(d) \
FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET_DIRENTPLUS + (d)->dirent.namelen)

struct fuse_notify_inval_inode_out {
__u64 ino;
__s64 off;
Expand Down

0 comments on commit 0b05b18

Please sign in to comment.