Skip to content

Commit

Permalink
vfs: pull btrfs clone API to vfs layer
Browse files Browse the repository at this point in the history
The btrfs clone ioctls are now adopted by other file systems, with NFS
and CIFS already having support for them, and XFS being under active
development.  To avoid growth of various slightly incompatible
implementations, add one to the VFS.  Note that clones are different from
file copies in several ways:

 - they are atomic vs other writers
 - they support whole file clones
 - they support 64-bit legth clones
 - they do not allow partial success (aka short writes)
 - clones are expected to be a fast metadata operation

Because of that it would be rather cumbersome to try to piggyback them on
top of the recent clone_file_range infrastructure.  The converse isn't
true and the clone_file_range system call could try clone file range as
a first attempt to copy, something that further patches will enable.

Based on earlier work from Peng Tao.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
  • Loading branch information
Christoph Hellwig authored and Al Viro committed Dec 8, 2015
1 parent acc1557 commit 04b38d6
Show file tree
Hide file tree
Showing 11 changed files with 254 additions and 193 deletions.
3 changes: 2 additions & 1 deletion fs/btrfs/ctree.h
Original file line number Diff line number Diff line change
Expand Up @@ -4025,7 +4025,6 @@ void btrfs_get_block_group_info(struct list_head *groups_list,
void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
struct btrfs_ioctl_balance_args *bargs);


/* file.c */
int btrfs_auto_defrag_init(void);
void btrfs_auto_defrag_exit(void);
Expand Down Expand Up @@ -4058,6 +4057,8 @@ int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end);
ssize_t btrfs_copy_file_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
size_t len, unsigned int flags);
int btrfs_clone_file_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out, u64 len);

/* tree-defrag.c */
int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
Expand Down
1 change: 1 addition & 0 deletions fs/btrfs/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -2925,6 +2925,7 @@ const struct file_operations btrfs_file_operations = {
.compat_ioctl = btrfs_ioctl,
#endif
.copy_file_range = btrfs_copy_file_range,
.clone_file_range = btrfs_clone_file_range,
};

void btrfs_auto_defrag_exit(void)
Expand Down
49 changes: 3 additions & 46 deletions fs/btrfs/ioctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -3906,49 +3906,10 @@ ssize_t btrfs_copy_file_range(struct file *file_in, loff_t pos_in,
return ret;
}

static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
u64 off, u64 olen, u64 destoff)
int btrfs_clone_file_range(struct file *src_file, loff_t off,
struct file *dst_file, loff_t destoff, u64 len)
{
struct fd src_file;
int ret;

/* the destination must be opened for writing */
if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND))
return -EINVAL;

ret = mnt_want_write_file(file);
if (ret)
return ret;

src_file = fdget(srcfd);
if (!src_file.file) {
ret = -EBADF;
goto out_drop_write;
}

/* the src must be open for reading */
if (!(src_file.file->f_mode & FMODE_READ)) {
ret = -EINVAL;
goto out_fput;
}

ret = btrfs_clone_files(file, src_file.file, off, olen, destoff);

out_fput:
fdput(src_file);
out_drop_write:
mnt_drop_write_file(file);
return ret;
}

static long btrfs_ioctl_clone_range(struct file *file, void __user *argp)
{
struct btrfs_ioctl_clone_range_args args;

if (copy_from_user(&args, argp, sizeof(args)))
return -EFAULT;
return btrfs_ioctl_clone(file, args.src_fd, args.src_offset,
args.src_length, args.dest_offset);
return btrfs_clone_files(dst_file, src_file, off, len, destoff);
}

/*
Expand Down Expand Up @@ -5498,10 +5459,6 @@ long btrfs_ioctl(struct file *file, unsigned int
return btrfs_ioctl_dev_info(root, argp);
case BTRFS_IOC_BALANCE:
return btrfs_ioctl_balance(file, NULL);
case BTRFS_IOC_CLONE:
return btrfs_ioctl_clone(file, arg, 0, 0, 0);
case BTRFS_IOC_CLONE_RANGE:
return btrfs_ioctl_clone_range(file, argp);
case BTRFS_IOC_TRANS_START:
return btrfs_ioctl_trans_start(file);
case BTRFS_IOC_TRANS_END:
Expand Down
63 changes: 63 additions & 0 deletions fs/cifs/cifsfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -914,6 +914,61 @@ const struct inode_operations cifs_symlink_inode_ops = {
#endif
};

static int cifs_clone_file_range(struct file *src_file, loff_t off,
struct file *dst_file, loff_t destoff, u64 len)
{
struct inode *src_inode = file_inode(src_file);
struct inode *target_inode = file_inode(dst_file);
struct cifsFileInfo *smb_file_src = src_file->private_data;
struct cifsFileInfo *smb_file_target = dst_file->private_data;
struct cifs_tcon *src_tcon = tlink_tcon(smb_file_src->tlink);
struct cifs_tcon *target_tcon = tlink_tcon(smb_file_target->tlink);
unsigned int xid;
int rc;

cifs_dbg(FYI, "clone range\n");

xid = get_xid();

if (!src_file->private_data || !dst_file->private_data) {
rc = -EBADF;
cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n");
goto out;
}

/*
* Note: cifs case is easier than btrfs since server responsible for
* checks for proper open modes and file type and if it wants
* server could even support copy of range where source = target
*/
lock_two_nondirectories(target_inode, src_inode);

if (len == 0)
len = src_inode->i_size - off;

cifs_dbg(FYI, "about to flush pages\n");
/* should we flush first and last page first */
truncate_inode_pages_range(&target_inode->i_data, destoff,
PAGE_CACHE_ALIGN(destoff + len)-1);

if (target_tcon->ses->server->ops->duplicate_extents)
rc = target_tcon->ses->server->ops->duplicate_extents(xid,
smb_file_src, smb_file_target, off, len, destoff);
else
rc = -EOPNOTSUPP;

/* force revalidate of size and timestamps of target file now
that target is updated on the server */
CIFS_I(target_inode)->time = 0;
out_unlock:
/* although unlocking in the reverse order from locking is not
strictly necessary here it is a little cleaner to be consistent */
unlock_two_nondirectories(src_inode, target_inode);
out:
free_xid(xid);
return rc;
}

const struct file_operations cifs_file_ops = {
.read_iter = cifs_loose_read_iter,
.write_iter = cifs_file_write_iter,
Expand All @@ -926,6 +981,7 @@ const struct file_operations cifs_file_ops = {
.splice_read = generic_file_splice_read,
.llseek = cifs_llseek,
.unlocked_ioctl = cifs_ioctl,
.clone_file_range = cifs_clone_file_range,
.setlease = cifs_setlease,
.fallocate = cifs_fallocate,
};
Expand All @@ -942,6 +998,8 @@ const struct file_operations cifs_file_strict_ops = {
.splice_read = generic_file_splice_read,
.llseek = cifs_llseek,
.unlocked_ioctl = cifs_ioctl,
.clone_file_range = cifs_clone_file_range,
.clone_file_range = cifs_clone_file_range,
.setlease = cifs_setlease,
.fallocate = cifs_fallocate,
};
Expand All @@ -958,6 +1016,7 @@ const struct file_operations cifs_file_direct_ops = {
.mmap = cifs_file_mmap,
.splice_read = generic_file_splice_read,
.unlocked_ioctl = cifs_ioctl,
.clone_file_range = cifs_clone_file_range,
.llseek = cifs_llseek,
.setlease = cifs_setlease,
.fallocate = cifs_fallocate,
Expand All @@ -974,6 +1033,7 @@ const struct file_operations cifs_file_nobrl_ops = {
.splice_read = generic_file_splice_read,
.llseek = cifs_llseek,
.unlocked_ioctl = cifs_ioctl,
.clone_file_range = cifs_clone_file_range,
.setlease = cifs_setlease,
.fallocate = cifs_fallocate,
};
Expand All @@ -989,6 +1049,7 @@ const struct file_operations cifs_file_strict_nobrl_ops = {
.splice_read = generic_file_splice_read,
.llseek = cifs_llseek,
.unlocked_ioctl = cifs_ioctl,
.clone_file_range = cifs_clone_file_range,
.setlease = cifs_setlease,
.fallocate = cifs_fallocate,
};
Expand All @@ -1004,6 +1065,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = {
.mmap = cifs_file_mmap,
.splice_read = generic_file_splice_read,
.unlocked_ioctl = cifs_ioctl,
.clone_file_range = cifs_clone_file_range,
.llseek = cifs_llseek,
.setlease = cifs_setlease,
.fallocate = cifs_fallocate,
Expand All @@ -1014,6 +1076,7 @@ const struct file_operations cifs_dir_ops = {
.release = cifs_closedir,
.read = generic_read_dir,
.unlocked_ioctl = cifs_ioctl,
.clone_file_range = cifs_clone_file_range,
.llseek = generic_file_llseek,
};

Expand Down
1 change: 0 additions & 1 deletion fs/cifs/cifsfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,6 @@ extern int cifs_setxattr(struct dentry *, const char *, const void *,
extern ssize_t cifs_getxattr(struct dentry *, const char *, void *, size_t);
extern ssize_t cifs_listxattr(struct dentry *, char *, size_t);
extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);

#ifdef CONFIG_CIFS_NFSD_EXPORT
extern const struct export_operations cifs_export_ops;
#endif /* CONFIG_CIFS_NFSD_EXPORT */
Expand Down
126 changes: 59 additions & 67 deletions fs/cifs/ioctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,108 +34,103 @@
#include "cifs_ioctl.h"
#include <linux/btrfs.h>

static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file,
unsigned long srcfd, u64 off, u64 len, u64 destoff,
bool dup_extents)
static int cifs_file_clone_range(unsigned int xid, struct file *src_file,
struct file *dst_file)
{
int rc;
struct cifsFileInfo *smb_file_target = dst_file->private_data;
struct inode *src_inode = file_inode(src_file);
struct inode *target_inode = file_inode(dst_file);
struct cifs_tcon *target_tcon;
struct fd src_file;
struct cifsFileInfo *smb_file_src;
struct inode *src_inode;
struct cifsFileInfo *smb_file_target;
struct cifs_tcon *src_tcon;
struct cifs_tcon *target_tcon;
int rc;

cifs_dbg(FYI, "ioctl clone range\n");
/* the destination must be opened for writing */
if (!(dst_file->f_mode & FMODE_WRITE)) {
cifs_dbg(FYI, "file target not open for write\n");
return -EINVAL;
}

/* check if target volume is readonly and take reference */
rc = mnt_want_write_file(dst_file);
if (rc) {
cifs_dbg(FYI, "mnt_want_write failed with rc %d\n", rc);
return rc;
}

src_file = fdget(srcfd);
if (!src_file.file) {
rc = -EBADF;
goto out_drop_write;
}

if (src_file.file->f_op->unlocked_ioctl != cifs_ioctl) {
rc = -EBADF;
cifs_dbg(VFS, "src file seems to be from a different filesystem type\n");
goto out_fput;
}

if ((!src_file.file->private_data) || (!dst_file->private_data)) {
if (!src_file->private_data || !dst_file->private_data) {
rc = -EBADF;
cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n");
goto out_fput;
goto out;
}

rc = -EXDEV;
smb_file_target = dst_file->private_data;
smb_file_src = src_file.file->private_data;
smb_file_src = src_file->private_data;
src_tcon = tlink_tcon(smb_file_src->tlink);
target_tcon = tlink_tcon(smb_file_target->tlink);

/* check source and target on same server (or volume if dup_extents) */
if (dup_extents && (src_tcon != target_tcon)) {
cifs_dbg(VFS, "source and target of copy not on same share\n");
goto out_fput;
}

if (!dup_extents && (src_tcon->ses != target_tcon->ses)) {
if (src_tcon->ses != target_tcon->ses) {
cifs_dbg(VFS, "source and target of copy not on same server\n");
goto out_fput;
goto out;
}

src_inode = file_inode(src_file.file);
rc = -EINVAL;
if (S_ISDIR(src_inode->i_mode))
goto out_fput;

/*
* Note: cifs case is easier than btrfs since server responsible for
* checks for proper open modes and file type and if it wants
* server could even support copy of range where source = target
*/
lock_two_nondirectories(target_inode, src_inode);

/* determine range to clone */
rc = -EINVAL;
if (off + len > src_inode->i_size || off + len < off)
goto out_unlock;
if (len == 0)
len = src_inode->i_size - off;

cifs_dbg(FYI, "about to flush pages\n");
/* should we flush first and last page first */
truncate_inode_pages_range(&target_inode->i_data, destoff,
PAGE_CACHE_ALIGN(destoff + len)-1);
truncate_inode_pages(&target_inode->i_data, 0);

if (dup_extents && target_tcon->ses->server->ops->duplicate_extents)
rc = target_tcon->ses->server->ops->duplicate_extents(xid,
smb_file_src, smb_file_target, off, len, destoff);
else if (!dup_extents && target_tcon->ses->server->ops->clone_range)
if (target_tcon->ses->server->ops->clone_range)
rc = target_tcon->ses->server->ops->clone_range(xid,
smb_file_src, smb_file_target, off, len, destoff);
smb_file_src, smb_file_target, 0, src_inode->i_size, 0);
else
rc = -EOPNOTSUPP;

/* force revalidate of size and timestamps of target file now
that target is updated on the server */
CIFS_I(target_inode)->time = 0;
out_unlock:
/* although unlocking in the reverse order from locking is not
strictly necessary here it is a little cleaner to be consistent */
unlock_two_nondirectories(src_inode, target_inode);
out:
return rc;
}

static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file,
unsigned long srcfd)
{
int rc;
struct fd src_file;
struct inode *src_inode;

cifs_dbg(FYI, "ioctl clone range\n");
/* the destination must be opened for writing */
if (!(dst_file->f_mode & FMODE_WRITE)) {
cifs_dbg(FYI, "file target not open for write\n");
return -EINVAL;
}

/* check if target volume is readonly and take reference */
rc = mnt_want_write_file(dst_file);
if (rc) {
cifs_dbg(FYI, "mnt_want_write failed with rc %d\n", rc);
return rc;
}

src_file = fdget(srcfd);
if (!src_file.file) {
rc = -EBADF;
goto out_drop_write;
}

if (src_file.file->f_op->unlocked_ioctl != cifs_ioctl) {
rc = -EBADF;
cifs_dbg(VFS, "src file seems to be from a different filesystem type\n");
goto out_fput;
}

src_inode = file_inode(src_file.file);
rc = -EINVAL;
if (S_ISDIR(src_inode->i_mode))
goto out_fput;

rc = cifs_file_clone_range(xid, src_file.file, dst_file);

out_fput:
fdput(src_file);
out_drop_write:
Expand Down Expand Up @@ -256,10 +251,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
}
break;
case CIFS_IOC_COPYCHUNK_FILE:
rc = cifs_ioctl_clone(xid, filep, arg, 0, 0, 0, false);
break;
case BTRFS_IOC_CLONE:
rc = cifs_ioctl_clone(xid, filep, arg, 0, 0, 0, true);
rc = cifs_ioctl_clone(xid, filep, arg);
break;
case CIFS_IOC_SET_INTEGRITY:
if (pSMBFile == NULL)
Expand Down
Loading

0 comments on commit 04b38d6

Please sign in to comment.