Skip to content

Commit

Permalink
Merge tag 'fsnotify_for_v6.14-rc7' of git://git.kernel.org/pub/scm/li…
Browse files Browse the repository at this point in the history
…nux/kernel/git/jack/linux-fs

Pull fsnotify reverts from Jan Kara:
 "Syzbot has found out that fsnotify HSM events generated on page fault
  can be generated while we already hold freeze protection for the
  filesystem (when you do buffered write from a buffer which is mmapped
  file on the same filesystem) which violates expectations for HSM
  events and could lead to deadlocks of HSM clients with filesystem
  freezing.

  Since it's quite late in the cycle we've decided to revert changes
  implementing HSM events on page fault for now and instead just
  generate one event for the whole range on mmap(2) so that HSM client
  can fetch the data at that moment"

* tag 'fsnotify_for_v6.14-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs:
  Revert "fanotify: disable readahead if we have pre-content watches"
  Revert "mm: don't allow huge faults for files with pre content watches"
  Revert "fsnotify: generate pre-content permission event on page fault"
  Revert "xfs: add pre-content fsnotify hook for DAX faults"
  Revert "ext4: add pre-content fsnotify hook for DAX faults"
  fsnotify: add pre-content hooks on mmap()
  • Loading branch information
Linus Torvalds committed Mar 15, 2025
2 parents 3571e8b + 252256e commit eb88e6b
Show file tree
Hide file tree
Showing 9 changed files with 24 additions and 143 deletions.
3 changes: 0 additions & 3 deletions fs/ext4/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -756,9 +756,6 @@ static vm_fault_t ext4_dax_huge_fault(struct vm_fault *vmf, unsigned int order)
return VM_FAULT_SIGBUS;
}
} else {
result = filemap_fsnotify_fault(vmf);
if (unlikely(result))
return result;
filemap_invalidate_lock_shared(mapping);
}
result = dax_iomap_fault(vmf, order, &pfn, &error, &ext4_iomap_ops);
Expand Down
13 changes: 0 additions & 13 deletions fs/xfs/xfs_file.c
Original file line number Diff line number Diff line change
Expand Up @@ -1451,9 +1451,6 @@ xfs_dax_read_fault(

trace_xfs_read_fault(ip, order);

ret = filemap_fsnotify_fault(vmf);
if (unlikely(ret))
return ret;
xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
ret = xfs_dax_fault_locked(vmf, order, false);
xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
Expand Down Expand Up @@ -1482,16 +1479,6 @@ xfs_write_fault(
vm_fault_t ret;

trace_xfs_write_fault(ip, order);
/*
* Usually we get here from ->page_mkwrite callback but in case of DAX
* we will get here also for ordinary write fault. Handle HSM
* notifications for that case.
*/
if (IS_DAX(inode)) {
ret = filemap_fsnotify_fault(vmf);
if (unlikely(ret))
return ret;
}

sb_start_pagefault(inode->i_sb);
file_update_time(vmf->vma->vm_file);
Expand Down
21 changes: 21 additions & 0 deletions include/linux/fsnotify.h
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,21 @@ static inline int fsnotify_file_area_perm(struct file *file, int perm_mask,
return fsnotify_path(&file->f_path, FS_ACCESS_PERM);
}

/*
* fsnotify_mmap_perm - permission hook before mmap of file range
*/
static inline int fsnotify_mmap_perm(struct file *file, int prot,
const loff_t off, size_t len)
{
/*
* mmap() generates only pre-content events.
*/
if (!file || likely(!FMODE_FSNOTIFY_HSM(file->f_mode)))
return 0;

return fsnotify_pre_content(&file->f_path, &off, len);
}

/*
* fsnotify_truncate_perm - permission hook before file truncate
*/
Expand Down Expand Up @@ -223,6 +238,12 @@ static inline int fsnotify_file_area_perm(struct file *file, int perm_mask,
return 0;
}

static inline int fsnotify_mmap_perm(struct file *file, int prot,
const loff_t off, size_t len)
{
return 0;
}

static inline int fsnotify_truncate_perm(const struct path *path, loff_t length)
{
return 0;
Expand Down
1 change: 0 additions & 1 deletion include/linux/mm.h
Original file line number Diff line number Diff line change
Expand Up @@ -3420,7 +3420,6 @@ extern vm_fault_t filemap_fault(struct vm_fault *vmf);
extern vm_fault_t filemap_map_pages(struct vm_fault *vmf,
pgoff_t start_pgoff, pgoff_t end_pgoff);
extern vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf);
extern vm_fault_t filemap_fsnotify_fault(struct vm_fault *vmf);

extern unsigned long stack_guard_gap;
/* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */
Expand Down
86 changes: 0 additions & 86 deletions mm/filemap.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@
#include <linux/splice.h>
#include <linux/rcupdate_wait.h>
#include <linux/sched/mm.h>
#include <linux/fsnotify.h>
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include "internal.h"
Expand Down Expand Up @@ -3198,14 +3197,6 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
unsigned long vm_flags = vmf->vma->vm_flags;
unsigned int mmap_miss;

/*
* If we have pre-content watches we need to disable readahead to make
* sure that we don't populate our mapping with 0 filled pages that we
* never emitted an event for.
*/
if (unlikely(FMODE_FSNOTIFY_HSM(file->f_mode)))
return fpin;

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/* Use the readahead code, even if readahead is disabled */
if ((vm_flags & VM_HUGEPAGE) && HPAGE_PMD_ORDER <= MAX_PAGECACHE_ORDER) {
Expand Down Expand Up @@ -3274,10 +3265,6 @@ static struct file *do_async_mmap_readahead(struct vm_fault *vmf,
struct file *fpin = NULL;
unsigned int mmap_miss;

/* See comment in do_sync_mmap_readahead. */
if (unlikely(FMODE_FSNOTIFY_HSM(file->f_mode)))
return fpin;

/* If we don't want any read-ahead, don't bother */
if (vmf->vma->vm_flags & VM_RAND_READ || !ra->ra_pages)
return fpin;
Expand Down Expand Up @@ -3336,48 +3323,6 @@ static vm_fault_t filemap_fault_recheck_pte_none(struct vm_fault *vmf)
return ret;
}

/**
* filemap_fsnotify_fault - maybe emit a pre-content event.
* @vmf: struct vm_fault containing details of the fault.
*
* If we have a pre-content watch on this file we will emit an event for this
* range. If we return anything the fault caller should return immediately, we
* will return VM_FAULT_RETRY if we had to emit an event, which will trigger the
* fault again and then the fault handler will run the second time through.
*
* Return: a bitwise-OR of %VM_FAULT_ codes, 0 if nothing happened.
*/
vm_fault_t filemap_fsnotify_fault(struct vm_fault *vmf)
{
struct file *fpin = NULL;
int mask = (vmf->flags & FAULT_FLAG_WRITE) ? MAY_WRITE : MAY_ACCESS;
loff_t pos = vmf->pgoff >> PAGE_SHIFT;
size_t count = PAGE_SIZE;
int err;

/*
* We already did this and now we're retrying with everything locked,
* don't emit the event and continue.
*/
if (vmf->flags & FAULT_FLAG_TRIED)
return 0;

/* No watches, we're done. */
if (likely(!FMODE_FSNOTIFY_HSM(vmf->vma->vm_file->f_mode)))
return 0;

fpin = maybe_unlock_mmap_for_io(vmf, fpin);
if (!fpin)
return VM_FAULT_SIGBUS;

err = fsnotify_file_area_perm(fpin, mask, &pos, count);
fput(fpin);
if (err)
return VM_FAULT_SIGBUS;
return VM_FAULT_RETRY;
}
EXPORT_SYMBOL_GPL(filemap_fsnotify_fault);

/**
* filemap_fault - read in file data for page fault handling
* @vmf: struct vm_fault containing details of the fault
Expand Down Expand Up @@ -3481,37 +3426,6 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
* or because readahead was otherwise unable to retrieve it.
*/
if (unlikely(!folio_test_uptodate(folio))) {
/*
* If this is a precontent file we have can now emit an event to
* try and populate the folio.
*/
if (!(vmf->flags & FAULT_FLAG_TRIED) &&
unlikely(FMODE_FSNOTIFY_HSM(file->f_mode))) {
loff_t pos = folio_pos(folio);
size_t count = folio_size(folio);

/* We're NOWAIT, we have to retry. */
if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT) {
folio_unlock(folio);
goto out_retry;
}

if (mapping_locked)
filemap_invalidate_unlock_shared(mapping);
mapping_locked = false;

folio_unlock(folio);
fpin = maybe_unlock_mmap_for_io(vmf, fpin);
if (!fpin)
goto out_retry;

error = fsnotify_file_area_perm(fpin, MAY_ACCESS, &pos,
count);
if (error)
ret = VM_FAULT_SIGBUS;
goto out_retry;
}

/*
* If the invalidate lock is not held, the folio was in cache
* and uptodate and now it is not. Strange but possible since we
Expand Down
19 changes: 0 additions & 19 deletions mm/memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,6 @@
#include <linux/ptrace.h>
#include <linux/vmalloc.h>
#include <linux/sched/sysctl.h>
#include <linux/fsnotify.h>

#include <trace/events/kmem.h>

Expand Down Expand Up @@ -5750,17 +5749,8 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
static inline vm_fault_t create_huge_pmd(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;

if (vma_is_anonymous(vma))
return do_huge_pmd_anonymous_page(vmf);
/*
* Currently we just emit PAGE_SIZE for our fault events, so don't allow
* a huge fault if we have a pre content watch on this file. This would
* be trivial to support, but there would need to be tests to ensure
* this works properly and those don't exist currently.
*/
if (unlikely(FMODE_FSNOTIFY_HSM(vma->vm_file->f_mode)))
return VM_FAULT_FALLBACK;
if (vma->vm_ops->huge_fault)
return vma->vm_ops->huge_fault(vmf, PMD_ORDER);
return VM_FAULT_FALLBACK;
Expand All @@ -5784,9 +5774,6 @@ static inline vm_fault_t wp_huge_pmd(struct vm_fault *vmf)
}

if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) {
/* See comment in create_huge_pmd. */
if (unlikely(FMODE_FSNOTIFY_HSM(vma->vm_file->f_mode)))
goto split;
if (vma->vm_ops->huge_fault) {
ret = vma->vm_ops->huge_fault(vmf, PMD_ORDER);
if (!(ret & VM_FAULT_FALLBACK))
Expand All @@ -5809,9 +5796,6 @@ static vm_fault_t create_huge_pud(struct vm_fault *vmf)
/* No support for anonymous transparent PUD pages yet */
if (vma_is_anonymous(vma))
return VM_FAULT_FALLBACK;
/* See comment in create_huge_pmd. */
if (unlikely(FMODE_FSNOTIFY_HSM(vma->vm_file->f_mode)))
return VM_FAULT_FALLBACK;
if (vma->vm_ops->huge_fault)
return vma->vm_ops->huge_fault(vmf, PUD_ORDER);
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
Expand All @@ -5829,9 +5813,6 @@ static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud)
if (vma_is_anonymous(vma))
goto split;
if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) {
/* See comment in create_huge_pmd. */
if (unlikely(FMODE_FSNOTIFY_HSM(vma->vm_file->f_mode)))
goto split;
if (vma->vm_ops->huge_fault) {
ret = vma->vm_ops->huge_fault(vmf, PUD_ORDER);
if (!(ret & VM_FAULT_FALLBACK))
Expand Down
7 changes: 0 additions & 7 deletions mm/nommu.c
Original file line number Diff line number Diff line change
Expand Up @@ -1613,13 +1613,6 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
}
EXPORT_SYMBOL(remap_vmalloc_range);

vm_fault_t filemap_fsnotify_fault(struct vm_fault *vmf)
{
BUG();
return 0;
}
EXPORT_SYMBOL_GPL(filemap_fsnotify_fault);

vm_fault_t filemap_fault(struct vm_fault *vmf)
{
BUG();
Expand Down
14 changes: 0 additions & 14 deletions mm/readahead.c
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,6 @@
#include <linux/blk-cgroup.h>
#include <linux/fadvise.h>
#include <linux/sched/mm.h>
#include <linux/fsnotify.h>

#include "internal.h"

Expand Down Expand Up @@ -558,15 +557,6 @@ void page_cache_sync_ra(struct readahead_control *ractl,
unsigned long max_pages, contig_count;
pgoff_t prev_index, miss;

/*
* If we have pre-content watches we need to disable readahead to make
* sure that we don't find 0 filled pages in cache that we never emitted
* events for. Filesystems supporting HSM must make sure to not call
* this function with ractl->file unset for files handled by HSM.
*/
if (ractl->file && unlikely(FMODE_FSNOTIFY_HSM(ractl->file->f_mode)))
return;

/*
* Even if readahead is disabled, issue this request as readahead
* as we'll need it to satisfy the requested range. The forced
Expand Down Expand Up @@ -645,10 +635,6 @@ void page_cache_async_ra(struct readahead_control *ractl,
if (!ra->ra_pages)
return;

/* See the comment in page_cache_sync_ra. */
if (ractl->file && unlikely(FMODE_FSNOTIFY_HSM(ractl->file->f_mode)))
return;

/*
* Same bit is used for PG_readahead and PG_reclaim.
*/
Expand Down
3 changes: 3 additions & 0 deletions mm/util.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <linux/processor.h>
#include <linux/sizes.h>
#include <linux/compat.h>
#include <linux/fsnotify.h>

#include <linux/uaccess.h>

Expand Down Expand Up @@ -569,6 +570,8 @@ unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr,
LIST_HEAD(uf);

ret = security_mmap_file(file, prot, flag);
if (!ret)
ret = fsnotify_mmap_perm(file, prot, pgoff >> PAGE_SHIFT, len);
if (!ret) {
if (mmap_write_lock_killable(mm))
return -EINTR;
Expand Down

0 comments on commit eb88e6b

Please sign in to comment.