Skip to content

Commit

Permalink
Merge tag 'fuse-update-4.18' of git://git.kernel.org/pub/scm/linux/ke…
Browse files Browse the repository at this point in the history
…rnel/git/mszeredi/fuse

Pull fuse updates from Miklos Szeredi:
 "The most interesting part of this update is user namespace support,
  mostly done by Eric Biederman. This enables safe unprivileged fuse
  mounts within a user namespace.

  There are also a couple of fixes for bugs found by syzbot and
  miscellaneous fixes and cleanups"

* tag 'fuse-update-4.18' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse:
  fuse: don't keep dead fuse_conn at fuse_fill_super().
  fuse: fix control dir setup and teardown
  fuse: fix congested state leak on aborted connections
  fuse: Allow fully unprivileged mounts
  fuse: Ensure posix acls are translated outside of init_user_ns
  fuse: add writeback documentation
  fuse: honor AT_STATX_FORCE_SYNC
  fuse: honor AT_STATX_DONT_SYNC
  fuse: Restrict allow_other to the superblock's namespace or a descendant
  fuse: Support fuse filesystems outside of init_user_ns
  fuse: Fail all requests with invalid uids or gids
  fuse: Remove the buggy retranslation of pids in fuse_dev_do_read
  fuse: return -ECONNABORTED on /dev/fuse read after abort
  fuse: atomic_o_trunc should truncate pagecache
  • Loading branch information
Linus Torvalds committed Jun 7, 2018
2 parents 1c8c5a9 + 543b8f8 commit da315f6
Show file tree
Hide file tree
Showing 11 changed files with 209 additions and 63 deletions.
38 changes: 38 additions & 0 deletions Documentation/filesystems/fuse-io.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
Fuse supports the following I/O modes:

- direct-io
- cached
+ write-through
+ writeback-cache

The direct-io mode can be selected with the FOPEN_DIRECT_IO flag in the
FUSE_OPEN reply.

In direct-io mode the page cache is completely bypassed for reads and writes.
No read-ahead takes place. Shared mmap is disabled.

In cached mode reads may be satisfied from the page cache, and data may be
read-ahead by the kernel to fill the cache. The cache is always kept consistent
after any writes to the file. All mmap modes are supported.

The cached mode has two sub modes controlling how writes are handled. The
write-through mode is the default and is supported on all kernels. The
writeback-cache mode may be selected by the FUSE_WRITEBACK_CACHE flag in the
FUSE_INIT reply.

In write-through mode each write is immediately sent to userspace as one or more
WRITE requests, as well as updating any cached pages (and caching previously
uncached, but fully written pages). No READ requests are ever sent for writes,
so when an uncached page is partially written, the page is discarded.

In writeback-cache mode (enabled by the FUSE_WRITEBACK_CACHE flag) writes go to
the cache only, which means that the write(2) syscall can often complete very
fast. Dirty pages are written back implicitly (background writeback or page
reclaim on memory pressure) or explicitly (invoked by close(2), fsync(2) and
when the last ref to the file is being released on munmap(2)). This mode
assumes that all changes to the filesystem go through the FUSE kernel module
(size and atime/ctime/mtime attributes are kept up-to-date by the kernel), so
it's generally not suitable for network filesystems. If a partial page is
written, then the page needs to be first read from userspace. This means, that
even for files opened for O_WRONLY it is possible that READ requests will be
generated by the kernel.
4 changes: 2 additions & 2 deletions fs/fuse/acl.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ struct posix_acl *fuse_get_acl(struct inode *inode, int type)
return ERR_PTR(-ENOMEM);
size = fuse_getxattr(inode, name, value, PAGE_SIZE);
if (size > 0)
acl = posix_acl_from_xattr(&init_user_ns, value, size);
acl = posix_acl_from_xattr(fc->user_ns, value, size);
else if ((size == 0) || (size == -ENODATA) ||
(size == -EOPNOTSUPP && fc->no_getxattr))
acl = NULL;
Expand Down Expand Up @@ -81,7 +81,7 @@ int fuse_set_acl(struct inode *inode, struct posix_acl *acl, int type)
if (!value)
return -ENOMEM;

ret = posix_acl_to_xattr(&init_user_ns, acl, value, size);
ret = posix_acl_to_xattr(fc->user_ns, acl, value, size);
if (ret < 0) {
kfree(value);
return ret;
Expand Down
15 changes: 11 additions & 4 deletions fs/fuse/control.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ static ssize_t fuse_conn_abort_write(struct file *file, const char __user *buf,
{
struct fuse_conn *fc = fuse_ctl_file_conn_get(file);
if (fc) {
fuse_abort_conn(fc);
fuse_abort_conn(fc, true);
fuse_conn_put(fc);
}
return count;
Expand Down Expand Up @@ -211,10 +211,11 @@ static struct dentry *fuse_ctl_add_dentry(struct dentry *parent,
if (!dentry)
return NULL;

fc->ctl_dentry[fc->ctl_ndents++] = dentry;
inode = new_inode(fuse_control_sb);
if (!inode)
if (!inode) {
dput(dentry);
return NULL;
}

inode->i_ino = get_next_ino();
inode->i_mode = mode;
Expand All @@ -228,6 +229,9 @@ static struct dentry *fuse_ctl_add_dentry(struct dentry *parent,
set_nlink(inode, nlink);
inode->i_private = fc;
d_add(dentry, inode);

fc->ctl_dentry[fc->ctl_ndents++] = dentry;

return dentry;
}

Expand Down Expand Up @@ -284,7 +288,10 @@ void fuse_ctl_remove_conn(struct fuse_conn *fc)
for (i = fc->ctl_ndents - 1; i >= 0; i--) {
struct dentry *dentry = fc->ctl_dentry[i];
d_inode(dentry)->i_private = NULL;
d_drop(dentry);
if (!i) {
/* Get rid of submounts: */
d_invalidate(dentry);
}
dput(dentry);
}
drop_nlink(d_inode(fuse_control_sb->s_root));
Expand Down
11 changes: 8 additions & 3 deletions fs/fuse/cuse.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
#include <linux/stat.h>
#include <linux/module.h>
#include <linux/uio.h>
#include <linux/user_namespace.h>

#include "fuse_i.h"

Expand Down Expand Up @@ -406,7 +407,7 @@ static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
err_region:
unregister_chrdev_region(devt, 1);
err:
fuse_abort_conn(fc);
fuse_abort_conn(fc, false);
goto out;
}

Expand Down Expand Up @@ -498,7 +499,11 @@ static int cuse_channel_open(struct inode *inode, struct file *file)
if (!cc)
return -ENOMEM;

fuse_conn_init(&cc->fc);
/*
* Limit the cuse channel to requests that can
* be represented in file->f_cred->user_ns.
*/
fuse_conn_init(&cc->fc, file->f_cred->user_ns);

fud = fuse_dev_alloc(&cc->fc);
if (!fud) {
Expand Down Expand Up @@ -581,7 +586,7 @@ static ssize_t cuse_class_abort_store(struct device *dev,
{
struct cuse_conn *cc = dev_get_drvdata(dev);

fuse_abort_conn(&cc->fc);
fuse_abort_conn(&cc->fc, false);
return count;
}
static DEVICE_ATTR(abort, 0200, NULL, cuse_class_abort_store);
Expand Down
43 changes: 21 additions & 22 deletions fs/fuse/dev.c
Original file line number Diff line number Diff line change
Expand Up @@ -112,13 +112,6 @@ static void __fuse_put_request(struct fuse_req *req)
refcount_dec(&req->count);
}

static void fuse_req_init_context(struct fuse_conn *fc, struct fuse_req *req)
{
req->in.h.uid = from_kuid_munged(&init_user_ns, current_fsuid());
req->in.h.gid = from_kgid_munged(&init_user_ns, current_fsgid());
req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns);
}

void fuse_set_initialized(struct fuse_conn *fc)
{
/* Make sure stores before this are seen on another CPU */
Expand Down Expand Up @@ -163,11 +156,19 @@ static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages,
goto out;
}

fuse_req_init_context(fc, req);
req->in.h.uid = from_kuid(fc->user_ns, current_fsuid());
req->in.h.gid = from_kgid(fc->user_ns, current_fsgid());
req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns);

__set_bit(FR_WAITING, &req->flags);
if (for_background)
__set_bit(FR_BACKGROUND, &req->flags);

if (unlikely(req->in.h.uid == ((uid_t)-1) ||
req->in.h.gid == ((gid_t)-1))) {
fuse_put_request(fc, req);
return ERR_PTR(-EOVERFLOW);
}
return req;

out:
Expand Down Expand Up @@ -256,7 +257,10 @@ struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc,
if (!req)
req = get_reserved_req(fc, file);

fuse_req_init_context(fc, req);
req->in.h.uid = from_kuid_munged(fc->user_ns, current_fsuid());
req->in.h.gid = from_kgid_munged(fc->user_ns, current_fsgid());
req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns);

__set_bit(FR_WAITING, &req->flags);
__clear_bit(FR_BACKGROUND, &req->flags);
return req;
Expand Down Expand Up @@ -381,8 +385,7 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req)
if (!fc->blocked && waitqueue_active(&fc->blocked_waitq))
wake_up(&fc->blocked_waitq);

if (fc->num_background == fc->congestion_threshold &&
fc->connected && fc->sb) {
if (fc->num_background == fc->congestion_threshold && fc->sb) {
clear_bdi_congested(fc->sb->s_bdi, BLK_RW_SYNC);
clear_bdi_congested(fc->sb->s_bdi, BLK_RW_ASYNC);
}
Expand Down Expand Up @@ -1234,9 +1237,10 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
if (err)
goto err_unlock;

err = -ENODEV;
if (!fiq->connected)
if (!fiq->connected) {
err = (fc->aborted && fc->abort_err) ? -ECONNABORTED : -ENODEV;
goto err_unlock;
}

if (!list_empty(&fiq->interrupts)) {
req = list_entry(fiq->interrupts.next, struct fuse_req,
Expand All @@ -1260,12 +1264,6 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
in = &req->in;
reqsize = in->h.len;

if (task_active_pid_ns(current) != fc->pid_ns) {
rcu_read_lock();
in->h.pid = pid_vnr(find_pid_ns(in->h.pid, fc->pid_ns));
rcu_read_unlock();
}

/* If request is too large, reply with an error and restart the read */
if (nbytes < reqsize) {
req->out.h.error = -EIO;
Expand All @@ -1287,7 +1285,7 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
spin_lock(&fpq->lock);
clear_bit(FR_LOCKED, &req->flags);
if (!fpq->connected) {
err = -ENODEV;
err = (fc->aborted && fc->abort_err) ? -ECONNABORTED : -ENODEV;
goto out_end;
}
if (err) {
Expand Down Expand Up @@ -2076,7 +2074,7 @@ static void end_polls(struct fuse_conn *fc)
* is OK, the request will in that case be removed from the list before we touch
* it.
*/
void fuse_abort_conn(struct fuse_conn *fc)
void fuse_abort_conn(struct fuse_conn *fc, bool is_abort)
{
struct fuse_iqueue *fiq = &fc->iq;

Expand All @@ -2089,6 +2087,7 @@ void fuse_abort_conn(struct fuse_conn *fc)

fc->connected = 0;
fc->blocked = 0;
fc->aborted = is_abort;
fuse_set_initialized(fc);
list_for_each_entry(fud, &fc->devices, entry) {
struct fuse_pqueue *fpq = &fud->pq;
Expand Down Expand Up @@ -2151,7 +2150,7 @@ int fuse_dev_release(struct inode *inode, struct file *file)
/* Are we the last open device? */
if (atomic_dec_and_test(&fc->dev_count)) {
WARN_ON(fc->iq.fasync != NULL);
fuse_abort_conn(fc);
fuse_abort_conn(fc, false);
}
fuse_dev_free(fud);
}
Expand Down
45 changes: 32 additions & 13 deletions fs/fuse/dir.c
Original file line number Diff line number Diff line change
Expand Up @@ -858,8 +858,8 @@ static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
stat->ino = attr->ino;
stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
stat->nlink = attr->nlink;
stat->uid = make_kuid(&init_user_ns, attr->uid);
stat->gid = make_kgid(&init_user_ns, attr->gid);
stat->uid = make_kuid(fc->user_ns, attr->uid);
stat->gid = make_kgid(fc->user_ns, attr->gid);
stat->rdev = inode->i_rdev;
stat->atime.tv_sec = attr->atime;
stat->atime.tv_nsec = attr->atimensec;
Expand Down Expand Up @@ -924,12 +924,20 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
}

static int fuse_update_get_attr(struct inode *inode, struct file *file,
struct kstat *stat)
struct kstat *stat, unsigned int flags)
{
struct fuse_inode *fi = get_fuse_inode(inode);
int err = 0;
bool sync;

if (time_before64(fi->i_time, get_jiffies_64())) {
if (flags & AT_STATX_FORCE_SYNC)
sync = true;
else if (flags & AT_STATX_DONT_SYNC)
sync = false;
else
sync = time_before64(fi->i_time, get_jiffies_64());

if (sync) {
forget_all_cached_acls(inode);
err = fuse_do_getattr(inode, stat, file);
} else if (stat) {
Expand All @@ -943,7 +951,7 @@ static int fuse_update_get_attr(struct inode *inode, struct file *file,

int fuse_update_attributes(struct inode *inode, struct file *file)
{
return fuse_update_get_attr(inode, file, NULL);
return fuse_update_get_attr(inode, file, NULL, 0);
}

int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
Expand Down Expand Up @@ -1030,7 +1038,7 @@ int fuse_allow_current_process(struct fuse_conn *fc)
const struct cred *cred;

if (fc->allow_other)
return 1;
return current_in_userns(fc->user_ns);

cred = current_cred();
if (uid_eq(cred->euid, fc->user_id) &&
Expand Down Expand Up @@ -1475,17 +1483,17 @@ static bool update_mtime(unsigned ivalid, bool trust_local_mtime)
return true;
}

static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg,
bool trust_local_cmtime)
static void iattr_to_fattr(struct fuse_conn *fc, struct iattr *iattr,
struct fuse_setattr_in *arg, bool trust_local_cmtime)
{
unsigned ivalid = iattr->ia_valid;

if (ivalid & ATTR_MODE)
arg->valid |= FATTR_MODE, arg->mode = iattr->ia_mode;
if (ivalid & ATTR_UID)
arg->valid |= FATTR_UID, arg->uid = from_kuid(&init_user_ns, iattr->ia_uid);
arg->valid |= FATTR_UID, arg->uid = from_kuid(fc->user_ns, iattr->ia_uid);
if (ivalid & ATTR_GID)
arg->valid |= FATTR_GID, arg->gid = from_kgid(&init_user_ns, iattr->ia_gid);
arg->valid |= FATTR_GID, arg->gid = from_kgid(fc->user_ns, iattr->ia_gid);
if (ivalid & ATTR_SIZE)
arg->valid |= FATTR_SIZE, arg->size = iattr->ia_size;
if (ivalid & ATTR_ATIME) {
Expand Down Expand Up @@ -1629,8 +1637,19 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
return err;

if (attr->ia_valid & ATTR_OPEN) {
if (fc->atomic_o_trunc)
/* This is coming from open(..., ... | O_TRUNC); */
WARN_ON(!(attr->ia_valid & ATTR_SIZE));
WARN_ON(attr->ia_size != 0);
if (fc->atomic_o_trunc) {
/*
* No need to send request to userspace, since actual
* truncation has already been done by OPEN. But still
* need to truncate page cache.
*/
i_size_write(inode, 0);
truncate_pagecache(inode, 0);
return 0;
}
file = NULL;
}

Expand All @@ -1646,7 +1665,7 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,

memset(&inarg, 0, sizeof(inarg));
memset(&outarg, 0, sizeof(outarg));
iattr_to_fattr(attr, &inarg, trust_local_cmtime);
iattr_to_fattr(fc, attr, &inarg, trust_local_cmtime);
if (file) {
struct fuse_file *ff = file->private_data;
inarg.valid |= FATTR_FH;
Expand Down Expand Up @@ -1783,7 +1802,7 @@ static int fuse_getattr(const struct path *path, struct kstat *stat,
if (!fuse_allow_current_process(fc))
return -EACCES;

return fuse_update_get_attr(inode, NULL, stat);
return fuse_update_get_attr(inode, NULL, stat, flags);
}

static const struct inode_operations fuse_dir_inode_operations = {
Expand Down
Loading

0 comments on commit da315f6

Please sign in to comment.