Skip to content

Commit

Permalink
Merge branch 'iomap-4.9-dax' into for-next
Browse files Browse the repository at this point in the history
  • Loading branch information
Dave Chinner committed Oct 2, 2016
2 parents a89b3f9 + d5bfccd commit a1f45e6
Show file tree
Hide file tree
Showing 13 changed files with 464 additions and 122 deletions.
252 changes: 240 additions & 12 deletions fs/dax.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
#include <linux/vmstat.h>
#include <linux/pfn_t.h>
#include <linux/sizes.h>
#include <linux/iomap.h>
#include "internal.h"

/*
* We use lowest available bit in exceptional entry for locking, other two
Expand Down Expand Up @@ -580,14 +582,13 @@ static int dax_load_hole(struct address_space *mapping, void *entry,
return VM_FAULT_LOCKED;
}

static int copy_user_bh(struct page *to, struct inode *inode,
struct buffer_head *bh, unsigned long vaddr)
static int copy_user_dax(struct block_device *bdev, sector_t sector, size_t size,
struct page *to, unsigned long vaddr)
{
struct blk_dax_ctl dax = {
.sector = to_sector(bh, inode),
.size = bh->b_size,
.sector = sector,
.size = size,
};
struct block_device *bdev = bh->b_bdev;
void *vto;

if (dax_map_atomic(bdev, &dax) < 0)
Expand Down Expand Up @@ -790,14 +791,13 @@ int dax_writeback_mapping_range(struct address_space *mapping,
EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);

static int dax_insert_mapping(struct address_space *mapping,
struct buffer_head *bh, void **entryp,
struct vm_area_struct *vma, struct vm_fault *vmf)
struct block_device *bdev, sector_t sector, size_t size,
void **entryp, struct vm_area_struct *vma, struct vm_fault *vmf)
{
unsigned long vaddr = (unsigned long)vmf->virtual_address;
struct block_device *bdev = bh->b_bdev;
struct blk_dax_ctl dax = {
.sector = to_sector(bh, mapping->host),
.size = bh->b_size,
.sector = sector,
.size = size,
};
void *ret;
void *entry = *entryp;
Expand Down Expand Up @@ -868,7 +868,8 @@ int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
if (vmf->cow_page) {
struct page *new_page = vmf->cow_page;
if (buffer_written(&bh))
error = copy_user_bh(new_page, inode, &bh, vaddr);
error = copy_user_dax(bh.b_bdev, to_sector(&bh, inode),
bh.b_size, new_page, vaddr);
else
clear_user_highpage(new_page, vaddr);
if (error)
Expand Down Expand Up @@ -898,7 +899,8 @@ int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,

/* Filesystem should not return unwritten buffers to us! */
WARN_ON_ONCE(buffer_unwritten(&bh) || buffer_new(&bh));
error = dax_insert_mapping(mapping, &bh, &entry, vma, vmf);
error = dax_insert_mapping(mapping, bh.b_bdev, to_sector(&bh, inode),
bh.b_size, &entry, vma, vmf);
unlock_entry:
put_locked_mapping_entry(mapping, vmf->pgoff, entry);
out:
Expand Down Expand Up @@ -1241,3 +1243,229 @@ int dax_truncate_page(struct inode *inode, loff_t from, get_block_t get_block)
return dax_zero_page_range(inode, from, length, get_block);
}
EXPORT_SYMBOL_GPL(dax_truncate_page);

#ifdef CONFIG_FS_IOMAP
static loff_t
iomap_dax_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
struct iomap *iomap)
{
struct iov_iter *iter = data;
loff_t end = pos + length, done = 0;
ssize_t ret = 0;

if (iov_iter_rw(iter) == READ) {
end = min(end, i_size_read(inode));
if (pos >= end)
return 0;

if (iomap->type == IOMAP_HOLE || iomap->type == IOMAP_UNWRITTEN)
return iov_iter_zero(min(length, end - pos), iter);
}

if (WARN_ON_ONCE(iomap->type != IOMAP_MAPPED))
return -EIO;

while (pos < end) {
unsigned offset = pos & (PAGE_SIZE - 1);
struct blk_dax_ctl dax = { 0 };
ssize_t map_len;

dax.sector = iomap->blkno +
(((pos & PAGE_MASK) - iomap->offset) >> 9);
dax.size = (length + offset + PAGE_SIZE - 1) & PAGE_MASK;
map_len = dax_map_atomic(iomap->bdev, &dax);
if (map_len < 0) {
ret = map_len;
break;
}

dax.addr += offset;
map_len -= offset;
if (map_len > end - pos)
map_len = end - pos;

if (iov_iter_rw(iter) == WRITE)
map_len = copy_from_iter_pmem(dax.addr, map_len, iter);
else
map_len = copy_to_iter(dax.addr, map_len, iter);
dax_unmap_atomic(iomap->bdev, &dax);
if (map_len <= 0) {
ret = map_len ? map_len : -EFAULT;
break;
}

pos += map_len;
length -= map_len;
done += map_len;
}

return done ? done : ret;
}

/**
* iomap_dax_rw - Perform I/O to a DAX file
* @iocb: The control block for this I/O
* @iter: The addresses to do I/O from or to
* @ops: iomap ops passed from the file system
*
* This function performs read and write operations to directly mapped
* persistent memory. The callers needs to take care of read/write exclusion
* and evicting any page cache pages in the region under I/O.
*/
ssize_t
iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter,
struct iomap_ops *ops)
{
struct address_space *mapping = iocb->ki_filp->f_mapping;
struct inode *inode = mapping->host;
loff_t pos = iocb->ki_pos, ret = 0, done = 0;
unsigned flags = 0;

if (iov_iter_rw(iter) == WRITE)
flags |= IOMAP_WRITE;

/*
* Yes, even DAX files can have page cache attached to them: A zeroed
* page is inserted into the pagecache when we have to serve a write
* fault on a hole. It should never be dirtied and can simply be
* dropped from the pagecache once we get real data for the page.
*
* XXX: This is racy against mmap, and there's nothing we can do about
* it. We'll eventually need to shift this down even further so that
* we can check if we allocated blocks over a hole first.
*/
if (mapping->nrpages) {
ret = invalidate_inode_pages2_range(mapping,
pos >> PAGE_SHIFT,
(pos + iov_iter_count(iter) - 1) >> PAGE_SHIFT);
WARN_ON_ONCE(ret);
}

while (iov_iter_count(iter)) {
ret = iomap_apply(inode, pos, iov_iter_count(iter), flags, ops,
iter, iomap_dax_actor);
if (ret <= 0)
break;
pos += ret;
done += ret;
}

iocb->ki_pos += done;
return done ? done : ret;
}
EXPORT_SYMBOL_GPL(iomap_dax_rw);

/**
* iomap_dax_fault - handle a page fault on a DAX file
* @vma: The virtual memory area where the fault occurred
* @vmf: The description of the fault
* @ops: iomap ops passed from the file system
*
* When a page fault occurs, filesystems may call this helper in their fault
* or mkwrite handler for DAX files. Assumes the caller has done all the
* necessary locking for the page fault to proceed successfully.
*/
int iomap_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
struct iomap_ops *ops)
{
struct address_space *mapping = vma->vm_file->f_mapping;
struct inode *inode = mapping->host;
unsigned long vaddr = (unsigned long)vmf->virtual_address;
loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT;
sector_t sector;
struct iomap iomap = { 0 };
unsigned flags = 0;
int error, major = 0;
void *entry;

/*
* Check whether offset isn't beyond end of file now. Caller is supposed
* to hold locks serializing us with truncate / punch hole so this is
* a reliable test.
*/
if (pos >= i_size_read(inode))
return VM_FAULT_SIGBUS;

entry = grab_mapping_entry(mapping, vmf->pgoff);
if (IS_ERR(entry)) {
error = PTR_ERR(entry);
goto out;
}

if ((vmf->flags & FAULT_FLAG_WRITE) && !vmf->cow_page)
flags |= IOMAP_WRITE;

/*
* Note that we don't bother to use iomap_apply here: DAX required
* the file system block size to be equal the page size, which means
* that we never have to deal with more than a single extent here.
*/
error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap);
if (error)
goto unlock_entry;
if (WARN_ON_ONCE(iomap.offset + iomap.length < pos + PAGE_SIZE)) {
error = -EIO; /* fs corruption? */
goto unlock_entry;
}

sector = iomap.blkno + (((pos & PAGE_MASK) - iomap.offset) >> 9);

if (vmf->cow_page) {
switch (iomap.type) {
case IOMAP_HOLE:
case IOMAP_UNWRITTEN:
clear_user_highpage(vmf->cow_page, vaddr);
break;
case IOMAP_MAPPED:
error = copy_user_dax(iomap.bdev, sector, PAGE_SIZE,
vmf->cow_page, vaddr);
break;
default:
WARN_ON_ONCE(1);
error = -EIO;
break;
}

if (error)
goto unlock_entry;
if (!radix_tree_exceptional_entry(entry)) {
vmf->page = entry;
return VM_FAULT_LOCKED;
}
vmf->entry = entry;
return VM_FAULT_DAX_LOCKED;
}

switch (iomap.type) {
case IOMAP_MAPPED:
if (iomap.flags & IOMAP_F_NEW) {
count_vm_event(PGMAJFAULT);
mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
major = VM_FAULT_MAJOR;
}
error = dax_insert_mapping(mapping, iomap.bdev, sector,
PAGE_SIZE, &entry, vma, vmf);
break;
case IOMAP_UNWRITTEN:
case IOMAP_HOLE:
if (!(vmf->flags & FAULT_FLAG_WRITE))
return dax_load_hole(mapping, entry, vmf);
/*FALLTHRU*/
default:
WARN_ON_ONCE(1);
error = -EIO;
break;
}

unlock_entry:
put_locked_mapping_entry(mapping, vmf->pgoff, entry);
out:
if (error == -ENOMEM)
return VM_FAULT_OOM | major;
/* -EBUSY is fine, somebody else faulted on the same PTE */
if (error < 0 && error != -EBUSY)
return VM_FAULT_SIGBUS | major;
return VM_FAULT_NOPAGE | major;
}
EXPORT_SYMBOL_GPL(iomap_dax_fault);
#endif /* CONFIG_FS_IOMAP */
1 change: 1 addition & 0 deletions fs/ext2/Kconfig
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
config EXT2_FS
tristate "Second extended fs support"
select FS_IOMAP if FS_DAX
help
Ext2 is a standard Linux file system for hard disks.

Expand Down
1 change: 1 addition & 0 deletions fs/ext2/ext2.h
Original file line number Diff line number Diff line change
Expand Up @@ -814,6 +814,7 @@ extern const struct file_operations ext2_file_operations;
/* inode.c */
extern const struct address_space_operations ext2_aops;
extern const struct address_space_operations ext2_nobh_aops;
extern struct iomap_ops ext2_iomap_ops;

/* namei.c */
extern const struct inode_operations ext2_dir_inode_operations;
Expand Down
Loading

0 comments on commit a1f45e6

Please sign in to comment.