Skip to content

Commit

Permalink
Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel…
Browse files Browse the repository at this point in the history
…/git/jack/linux-fs

Pull ext2 fix from Jan Kara:
 "Fix for DAX on ext2"

* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs:
  ext2: Add locking for DAX faults
  • Loading branch information
Linus Torvalds committed Nov 10, 2015
2 parents 373ee21 + 5726b27 commit 123a28d
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 4 deletions.
11 changes: 11 additions & 0 deletions fs/ext2/ext2.h
Original file line number Diff line number Diff line change
Expand Up @@ -684,6 +684,9 @@ struct ext2_inode_info {
struct rw_semaphore xattr_sem;
#endif
rwlock_t i_meta_lock;
#ifdef CONFIG_FS_DAX
struct rw_semaphore dax_sem;
#endif

/*
* truncate_mutex is for serialising ext2_truncate() against
Expand All @@ -699,6 +702,14 @@ struct ext2_inode_info {
#endif
};

#ifdef CONFIG_FS_DAX
#define dax_sem_down_write(ext2_inode) down_write(&(ext2_inode)->dax_sem)
#define dax_sem_up_write(ext2_inode) up_write(&(ext2_inode)->dax_sem)
#else
#define dax_sem_down_write(ext2_inode)
#define dax_sem_up_write(ext2_inode)
#endif

/*
* Inode dynamic state flags
*/
Expand Down
84 changes: 80 additions & 4 deletions fs/ext2/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,27 +27,103 @@
#include "acl.h"

#ifdef CONFIG_FS_DAX
/*
* The lock ordering for ext2 DAX fault paths is:
*
* mmap_sem (MM)
* sb_start_pagefault (vfs, freeze)
* ext2_inode_info->dax_sem
* address_space->i_mmap_rwsem or page_lock (mutually exclusive in DAX)
* ext2_inode_info->truncate_mutex
*
* The default page_lock and i_size verification done by non-DAX fault paths
* is sufficient because ext2 doesn't support hole punching.
*/
static int ext2_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
return dax_fault(vma, vmf, ext2_get_block, NULL);
struct inode *inode = file_inode(vma->vm_file);
struct ext2_inode_info *ei = EXT2_I(inode);
int ret;

if (vmf->flags & FAULT_FLAG_WRITE) {
sb_start_pagefault(inode->i_sb);
file_update_time(vma->vm_file);
}
down_read(&ei->dax_sem);

ret = __dax_fault(vma, vmf, ext2_get_block, NULL);

up_read(&ei->dax_sem);
if (vmf->flags & FAULT_FLAG_WRITE)
sb_end_pagefault(inode->i_sb);
return ret;
}

static int ext2_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
pmd_t *pmd, unsigned int flags)
{
return dax_pmd_fault(vma, addr, pmd, flags, ext2_get_block, NULL);
struct inode *inode = file_inode(vma->vm_file);
struct ext2_inode_info *ei = EXT2_I(inode);
int ret;

if (flags & FAULT_FLAG_WRITE) {
sb_start_pagefault(inode->i_sb);
file_update_time(vma->vm_file);
}
down_read(&ei->dax_sem);

ret = __dax_pmd_fault(vma, addr, pmd, flags, ext2_get_block, NULL);

up_read(&ei->dax_sem);
if (flags & FAULT_FLAG_WRITE)
sb_end_pagefault(inode->i_sb);
return ret;
}

static int ext2_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
{
return dax_mkwrite(vma, vmf, ext2_get_block, NULL);
struct inode *inode = file_inode(vma->vm_file);
struct ext2_inode_info *ei = EXT2_I(inode);
int ret;

sb_start_pagefault(inode->i_sb);
file_update_time(vma->vm_file);
down_read(&ei->dax_sem);

ret = __dax_mkwrite(vma, vmf, ext2_get_block, NULL);

up_read(&ei->dax_sem);
sb_end_pagefault(inode->i_sb);
return ret;
}

static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma,
struct vm_fault *vmf)
{
struct inode *inode = file_inode(vma->vm_file);
struct ext2_inode_info *ei = EXT2_I(inode);
int ret = VM_FAULT_NOPAGE;
loff_t size;

sb_start_pagefault(inode->i_sb);
file_update_time(vma->vm_file);
down_read(&ei->dax_sem);

/* check that the faulting page hasn't raced with truncate */
size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
if (vmf->pgoff >= size)
ret = VM_FAULT_SIGBUS;

up_read(&ei->dax_sem);
sb_end_pagefault(inode->i_sb);
return ret;
}

static const struct vm_operations_struct ext2_dax_vm_ops = {
.fault = ext2_dax_fault,
.pmd_fault = ext2_dax_pmd_fault,
.page_mkwrite = ext2_dax_mkwrite,
.pfn_mkwrite = dax_pfn_mkwrite,
.pfn_mkwrite = ext2_dax_pfn_mkwrite,
};

static int ext2_file_mmap(struct file *file, struct vm_area_struct *vma)
Expand Down
10 changes: 10 additions & 0 deletions fs/ext2/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -1085,6 +1085,7 @@ static void ext2_free_branches(struct inode *inode, __le32 *p, __le32 *q, int de
ext2_free_data(inode, p, q);
}

/* dax_sem must be held when calling this function */
static void __ext2_truncate_blocks(struct inode *inode, loff_t offset)
{
__le32 *i_data = EXT2_I(inode)->i_data;
Expand All @@ -1100,6 +1101,10 @@ static void __ext2_truncate_blocks(struct inode *inode, loff_t offset)
blocksize = inode->i_sb->s_blocksize;
iblock = (offset + blocksize-1) >> EXT2_BLOCK_SIZE_BITS(inode->i_sb);

#ifdef CONFIG_FS_DAX
WARN_ON(!rwsem_is_locked(&ei->dax_sem));
#endif

n = ext2_block_to_path(inode, iblock, offsets, NULL);
if (n == 0)
return;
Expand Down Expand Up @@ -1185,7 +1190,10 @@ static void ext2_truncate_blocks(struct inode *inode, loff_t offset)
return;
if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
return;

dax_sem_down_write(EXT2_I(inode));
__ext2_truncate_blocks(inode, offset);
dax_sem_up_write(EXT2_I(inode));
}

static int ext2_setsize(struct inode *inode, loff_t newsize)
Expand Down Expand Up @@ -1213,8 +1221,10 @@ static int ext2_setsize(struct inode *inode, loff_t newsize)
if (error)
return error;

dax_sem_down_write(EXT2_I(inode));
truncate_setsize(inode, newsize);
__ext2_truncate_blocks(inode, newsize);
dax_sem_up_write(EXT2_I(inode));

inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
if (inode_needs_sync(inode)) {
Expand Down
3 changes: 3 additions & 0 deletions fs/ext2/super.c
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,9 @@ static void init_once(void *foo)
init_rwsem(&ei->xattr_sem);
#endif
mutex_init(&ei->truncate_mutex);
#ifdef CONFIG_FS_DAX
init_rwsem(&ei->dax_sem);
#endif
inode_init_once(&ei->vfs_inode);
}

Expand Down

0 comments on commit 123a28d

Please sign in to comment.