Skip to content

Commit

Permalink
mm: remove enum page_entry_size
Browse files Browse the repository at this point in the history
Remove the unnecessary encoding of page order into an enum and pass the
page order directly.  That lets us get rid of pe_order().

The switch constructs have to be changed to if/else constructs to prevent
GCC from warning on builds with 3-level page tables where PMD_ORDER and
PUD_ORDER have the same value.

If you are looking at this commit because your driver stopped compiling,
look at the previous commit as well and audit your driver to be sure it
doesn't depend on mmap_lock being held in its ->huge_fault method.

[willy@infradead.org: use "order %u" to match the (non dev_t) style]
  Link: https://lkml.kernel.org/r/ZOUYekbtTv+n8hYf@casper.infradead.org
Link: https://lkml.kernel.org/r/20230818202335.2739663-4-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
  • Loading branch information
Matthew Wilcox (Oracle) authored and Andrew Morton committed Aug 24, 2023
1 parent 40d49a3 commit 1d024e7
Show file tree
Hide file tree
Showing 11 changed files with 59 additions and 98 deletions.
22 changes: 8 additions & 14 deletions drivers/dax/device.c
Original file line number Diff line number Diff line change
Expand Up @@ -228,32 +228,26 @@ static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
}
#endif /* !CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */

static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf,
enum page_entry_size pe_size)
static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf, unsigned int order)
{
struct file *filp = vmf->vma->vm_file;
vm_fault_t rc = VM_FAULT_SIGBUS;
int id;
struct dev_dax *dev_dax = filp->private_data;

dev_dbg(&dev_dax->dev, "%s: %s (%#lx - %#lx) size = %d\n", current->comm,
dev_dbg(&dev_dax->dev, "%s: %s (%#lx - %#lx) order:%d\n", current->comm,
(vmf->flags & FAULT_FLAG_WRITE) ? "write" : "read",
vmf->vma->vm_start, vmf->vma->vm_end, pe_size);
vmf->vma->vm_start, vmf->vma->vm_end, order);

id = dax_read_lock();
switch (pe_size) {
case PE_SIZE_PTE:
if (order == 0)
rc = __dev_dax_pte_fault(dev_dax, vmf);
break;
case PE_SIZE_PMD:
else if (order == PMD_ORDER)
rc = __dev_dax_pmd_fault(dev_dax, vmf);
break;
case PE_SIZE_PUD:
else if (order == PUD_ORDER)
rc = __dev_dax_pud_fault(dev_dax, vmf);
break;
default:
else
rc = VM_FAULT_SIGBUS;
}

dax_read_unlock(id);

Expand All @@ -262,7 +256,7 @@ static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf,

static vm_fault_t dev_dax_fault(struct vm_fault *vmf)
{
return dev_dax_huge_fault(vmf, PE_SIZE_PTE);
return dev_dax_huge_fault(vmf, 0);
}

static int dev_dax_may_split(struct vm_area_struct *vma, unsigned long addr)
Expand Down
30 changes: 8 additions & 22 deletions fs/dax.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,17 +30,6 @@
#define CREATE_TRACE_POINTS
#include <trace/events/fs_dax.h>

static inline unsigned int pe_order(enum page_entry_size pe_size)
{
if (pe_size == PE_SIZE_PTE)
return PAGE_SHIFT - PAGE_SHIFT;
if (pe_size == PE_SIZE_PMD)
return PMD_SHIFT - PAGE_SHIFT;
if (pe_size == PE_SIZE_PUD)
return PUD_SHIFT - PAGE_SHIFT;
return ~0;
}

/* We choose 4096 entries - same as per-zone page wait tables */
#define DAX_WAIT_TABLE_BITS 12
#define DAX_WAIT_TABLE_ENTRIES (1 << DAX_WAIT_TABLE_BITS)
Expand Down Expand Up @@ -1905,7 +1894,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
/**
* dax_iomap_fault - handle a page fault on a DAX file
* @vmf: The description of the fault
* @pe_size: Size of the page to fault in
* @order: Order of the page to fault in
* @pfnp: PFN to insert for synchronous faults if fsync is required
* @iomap_errp: Storage for detailed error code in case of error
* @ops: Iomap ops passed from the file system
Expand All @@ -1915,17 +1904,15 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
* has done all the necessary locking for page fault to proceed
* successfully.
*/
vm_fault_t dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size,
vm_fault_t dax_iomap_fault(struct vm_fault *vmf, unsigned int order,
pfn_t *pfnp, int *iomap_errp, const struct iomap_ops *ops)
{
switch (pe_size) {
case PE_SIZE_PTE:
if (order == 0)
return dax_iomap_pte_fault(vmf, pfnp, iomap_errp, ops);
case PE_SIZE_PMD:
else if (order == PMD_ORDER)
return dax_iomap_pmd_fault(vmf, pfnp, ops);
default:
else
return VM_FAULT_FALLBACK;
}
}
EXPORT_SYMBOL_GPL(dax_iomap_fault);

Expand Down Expand Up @@ -1976,19 +1963,18 @@ dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order)
/**
* dax_finish_sync_fault - finish synchronous page fault
* @vmf: The description of the fault
* @pe_size: Size of entry to be inserted
* @order: Order of entry to be inserted
* @pfn: PFN to insert
*
* This function ensures that the file range touched by the page fault is
* stored persistently on the media and handles inserting of appropriate page
* table entry.
*/
vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf,
enum page_entry_size pe_size, pfn_t pfn)
vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf, unsigned int order,
pfn_t pfn)
{
int err;
loff_t start = ((loff_t)vmf->pgoff) << PAGE_SHIFT;
unsigned int order = pe_order(pe_size);
size_t len = PAGE_SIZE << order;

err = vfs_fsync_range(vmf->vma->vm_file, start, start + len - 1, 1);
Expand Down
6 changes: 3 additions & 3 deletions fs/erofs/data.c
Original file line number Diff line number Diff line change
Expand Up @@ -413,14 +413,14 @@ const struct address_space_operations erofs_raw_access_aops = {

#ifdef CONFIG_FS_DAX
static vm_fault_t erofs_dax_huge_fault(struct vm_fault *vmf,
enum page_entry_size pe_size)
unsigned int order)
{
return dax_iomap_fault(vmf, pe_size, NULL, NULL, &erofs_iomap_ops);
return dax_iomap_fault(vmf, order, NULL, NULL, &erofs_iomap_ops);
}

static vm_fault_t erofs_dax_fault(struct vm_fault *vmf)
{
return erofs_dax_huge_fault(vmf, PE_SIZE_PTE);
return erofs_dax_huge_fault(vmf, 0);
}

static const struct vm_operations_struct erofs_dax_vm_ops = {
Expand Down
2 changes: 1 addition & 1 deletion fs/ext2/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ static vm_fault_t ext2_dax_fault(struct vm_fault *vmf)
}
filemap_invalidate_lock_shared(inode->i_mapping);

ret = dax_iomap_fault(vmf, PE_SIZE_PTE, NULL, NULL, &ext2_iomap_ops);
ret = dax_iomap_fault(vmf, 0, NULL, NULL, &ext2_iomap_ops);

filemap_invalidate_unlock_shared(inode->i_mapping);
if (write)
Expand Down
11 changes: 5 additions & 6 deletions fs/ext4/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -723,8 +723,7 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
}

#ifdef CONFIG_FS_DAX
static vm_fault_t ext4_dax_huge_fault(struct vm_fault *vmf,
enum page_entry_size pe_size)
static vm_fault_t ext4_dax_huge_fault(struct vm_fault *vmf, unsigned int order)
{
int error = 0;
vm_fault_t result;
Expand All @@ -740,7 +739,7 @@ static vm_fault_t ext4_dax_huge_fault(struct vm_fault *vmf,
* read-only.
*
* We check for VM_SHARED rather than vmf->cow_page since the latter is
* unset for pe_size != PE_SIZE_PTE (i.e. only in do_cow_fault); for
* unset for order != 0 (i.e. only in do_cow_fault); for
* other sizes, dax_iomap_fault will handle splitting / fallback so that
* we eventually come back with a COW page.
*/
Expand All @@ -764,7 +763,7 @@ static vm_fault_t ext4_dax_huge_fault(struct vm_fault *vmf,
} else {
filemap_invalidate_lock_shared(mapping);
}
result = dax_iomap_fault(vmf, pe_size, &pfn, &error, &ext4_iomap_ops);
result = dax_iomap_fault(vmf, order, &pfn, &error, &ext4_iomap_ops);
if (write) {
ext4_journal_stop(handle);

Expand All @@ -773,7 +772,7 @@ static vm_fault_t ext4_dax_huge_fault(struct vm_fault *vmf,
goto retry;
/* Handling synchronous page fault? */
if (result & VM_FAULT_NEEDDSYNC)
result = dax_finish_sync_fault(vmf, pe_size, pfn);
result = dax_finish_sync_fault(vmf, order, pfn);
filemap_invalidate_unlock_shared(mapping);
sb_end_pagefault(sb);
} else {
Expand All @@ -785,7 +784,7 @@ static vm_fault_t ext4_dax_huge_fault(struct vm_fault *vmf,

static vm_fault_t ext4_dax_fault(struct vm_fault *vmf)
{
return ext4_dax_huge_fault(vmf, PE_SIZE_PTE);
return ext4_dax_huge_fault(vmf, 0);
}

static const struct vm_operations_struct ext4_dax_vm_ops = {
Expand Down
20 changes: 9 additions & 11 deletions fs/fuse/dax.c
Original file line number Diff line number Diff line change
Expand Up @@ -784,8 +784,8 @@ static int fuse_dax_writepages(struct address_space *mapping,
return dax_writeback_mapping_range(mapping, fc->dax->dev, wbc);
}

static vm_fault_t __fuse_dax_fault(struct vm_fault *vmf,
enum page_entry_size pe_size, bool write)
static vm_fault_t __fuse_dax_fault(struct vm_fault *vmf, unsigned int order,
bool write)
{
vm_fault_t ret;
struct inode *inode = file_inode(vmf->vma->vm_file);
Expand All @@ -809,7 +809,7 @@ static vm_fault_t __fuse_dax_fault(struct vm_fault *vmf,
* to populate page cache or access memory we are trying to free.
*/
filemap_invalidate_lock_shared(inode->i_mapping);
ret = dax_iomap_fault(vmf, pe_size, &pfn, &error, &fuse_iomap_ops);
ret = dax_iomap_fault(vmf, order, &pfn, &error, &fuse_iomap_ops);
if ((ret & VM_FAULT_ERROR) && error == -EAGAIN) {
error = 0;
retry = true;
Expand All @@ -818,7 +818,7 @@ static vm_fault_t __fuse_dax_fault(struct vm_fault *vmf,
}

if (ret & VM_FAULT_NEEDDSYNC)
ret = dax_finish_sync_fault(vmf, pe_size, pfn);
ret = dax_finish_sync_fault(vmf, order, pfn);
filemap_invalidate_unlock_shared(inode->i_mapping);

if (write)
Expand All @@ -829,24 +829,22 @@ static vm_fault_t __fuse_dax_fault(struct vm_fault *vmf,

static vm_fault_t fuse_dax_fault(struct vm_fault *vmf)
{
return __fuse_dax_fault(vmf, PE_SIZE_PTE,
vmf->flags & FAULT_FLAG_WRITE);
return __fuse_dax_fault(vmf, 0, vmf->flags & FAULT_FLAG_WRITE);
}

static vm_fault_t fuse_dax_huge_fault(struct vm_fault *vmf,
enum page_entry_size pe_size)
static vm_fault_t fuse_dax_huge_fault(struct vm_fault *vmf, unsigned int order)
{
return __fuse_dax_fault(vmf, pe_size, vmf->flags & FAULT_FLAG_WRITE);
return __fuse_dax_fault(vmf, order, vmf->flags & FAULT_FLAG_WRITE);
}

static vm_fault_t fuse_dax_page_mkwrite(struct vm_fault *vmf)
{
return __fuse_dax_fault(vmf, PE_SIZE_PTE, true);
return __fuse_dax_fault(vmf, 0, true);
}

static vm_fault_t fuse_dax_pfn_mkwrite(struct vm_fault *vmf)
{
return __fuse_dax_fault(vmf, PE_SIZE_PTE, true);
return __fuse_dax_fault(vmf, 0, true);
}

static const struct vm_operations_struct fuse_dax_vm_ops = {
Expand Down
24 changes: 12 additions & 12 deletions fs/xfs/xfs_file.c
Original file line number Diff line number Diff line change
Expand Up @@ -1287,11 +1287,11 @@ xfs_file_llseek(
static inline vm_fault_t
xfs_dax_fault(
struct vm_fault *vmf,
enum page_entry_size pe_size,
unsigned int order,
bool write_fault,
pfn_t *pfn)
{
return dax_iomap_fault(vmf, pe_size, pfn, NULL,
return dax_iomap_fault(vmf, order, pfn, NULL,
(write_fault && !vmf->cow_page) ?
&xfs_dax_write_iomap_ops :
&xfs_read_iomap_ops);
Expand All @@ -1300,7 +1300,7 @@ xfs_dax_fault(
static inline vm_fault_t
xfs_dax_fault(
struct vm_fault *vmf,
enum page_entry_size pe_size,
unsigned int order,
bool write_fault,
pfn_t *pfn)
{
Expand All @@ -1322,14 +1322,14 @@ xfs_dax_fault(
static vm_fault_t
__xfs_filemap_fault(
struct vm_fault *vmf,
enum page_entry_size pe_size,
unsigned int order,
bool write_fault)
{
struct inode *inode = file_inode(vmf->vma->vm_file);
struct xfs_inode *ip = XFS_I(inode);
vm_fault_t ret;

trace_xfs_filemap_fault(ip, pe_size, write_fault);
trace_xfs_filemap_fault(ip, order, write_fault);

if (write_fault) {
sb_start_pagefault(inode->i_sb);
Expand All @@ -1340,9 +1340,9 @@ __xfs_filemap_fault(
pfn_t pfn;

xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
ret = xfs_dax_fault(vmf, pe_size, write_fault, &pfn);
ret = xfs_dax_fault(vmf, order, write_fault, &pfn);
if (ret & VM_FAULT_NEEDDSYNC)
ret = dax_finish_sync_fault(vmf, pe_size, pfn);
ret = dax_finish_sync_fault(vmf, order, pfn);
xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
} else {
if (write_fault) {
Expand Down Expand Up @@ -1373,29 +1373,29 @@ xfs_filemap_fault(
struct vm_fault *vmf)
{
/* DAX can shortcut the normal fault path on write faults! */
return __xfs_filemap_fault(vmf, PE_SIZE_PTE,
return __xfs_filemap_fault(vmf, 0,
IS_DAX(file_inode(vmf->vma->vm_file)) &&
xfs_is_write_fault(vmf));
}

static vm_fault_t
xfs_filemap_huge_fault(
struct vm_fault *vmf,
enum page_entry_size pe_size)
unsigned int order)
{
if (!IS_DAX(file_inode(vmf->vma->vm_file)))
return VM_FAULT_FALLBACK;

/* DAX can shortcut the normal fault path on write faults! */
return __xfs_filemap_fault(vmf, pe_size,
return __xfs_filemap_fault(vmf, order,
xfs_is_write_fault(vmf));
}

static vm_fault_t
xfs_filemap_page_mkwrite(
struct vm_fault *vmf)
{
return __xfs_filemap_fault(vmf, PE_SIZE_PTE, true);
return __xfs_filemap_fault(vmf, 0, true);
}

/*
Expand All @@ -1408,7 +1408,7 @@ xfs_filemap_pfn_mkwrite(
struct vm_fault *vmf)
{

return __xfs_filemap_fault(vmf, PE_SIZE_PTE, true);
return __xfs_filemap_fault(vmf, 0, true);
}

static const struct vm_operations_struct xfs_file_vm_ops = {
Expand Down
20 changes: 6 additions & 14 deletions fs/xfs/xfs_trace.h
Original file line number Diff line number Diff line change
Expand Up @@ -802,36 +802,28 @@ DEFINE_INODE_EVENT(xfs_inode_inactivating);
* ring buffer. Somehow this was only worth mentioning in the ftrace sample
* code.
*/
TRACE_DEFINE_ENUM(PE_SIZE_PTE);
TRACE_DEFINE_ENUM(PE_SIZE_PMD);
TRACE_DEFINE_ENUM(PE_SIZE_PUD);

TRACE_DEFINE_ENUM(XFS_REFC_DOMAIN_SHARED);
TRACE_DEFINE_ENUM(XFS_REFC_DOMAIN_COW);

TRACE_EVENT(xfs_filemap_fault,
TP_PROTO(struct xfs_inode *ip, enum page_entry_size pe_size,
bool write_fault),
TP_ARGS(ip, pe_size, write_fault),
TP_PROTO(struct xfs_inode *ip, unsigned int order, bool write_fault),
TP_ARGS(ip, order, write_fault),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_ino_t, ino)
__field(enum page_entry_size, pe_size)
__field(unsigned int, order)
__field(bool, write_fault)
),
TP_fast_assign(
__entry->dev = VFS_I(ip)->i_sb->s_dev;
__entry->ino = ip->i_ino;
__entry->pe_size = pe_size;
__entry->order = order;
__entry->write_fault = write_fault;
),
TP_printk("dev %d:%d ino 0x%llx %s write_fault %d",
TP_printk("dev %d:%d ino 0x%llx order %u write_fault %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__print_symbolic(__entry->pe_size,
{ PE_SIZE_PTE, "PTE" },
{ PE_SIZE_PMD, "PMD" },
{ PE_SIZE_PUD, "PUD" }),
__entry->order,
__entry->write_fault)
)

Expand Down
Loading

0 comments on commit 1d024e7

Please sign in to comment.