Skip to content

Commit

Permalink
Merge tag 'nfsd-5.19' of git://git.kernel.org/pub/scm/linux/kernel/gi…
Browse files Browse the repository at this point in the history
…t/cel/linux

Pull nfsd updates from Chuck Lever:
 "We introduce 'courteous server' in this release. Previously NFSD would
  purge open and lock state for an unresponsive client after one lease
  period (typically 90 seconds). Now, after one lease period, another
  client can open and lock those files and the unresponsive client's
  lease is purged; otherwise if the unresponsive client's open and lock
  state is uncontended, the server retains that open and lock state for
  up to 24 hours, allowing the client's workload to resume after a
  lengthy network partition.

  A longstanding issue with NFSv4 file creation is also addressed.
  Previously a file creation can fail internally, returning an error to
  the client, but leave the newly created file in place as an artifact.
  The file creation code path has been reorganized so that internal
  failures and race conditions are less likely to result in an unwanted
  file creation.

  A fault injector has been added to help exercise paths that are run
  during kernel metadata cache invalidation. These caches contain
  information maintained by user space about exported filesystems. Many
  of our test workloads do not trigger cache invalidation.

  There is one patch that is needed to support PREEMPT_RT and a fix for
  an ancient 'sleep while spin-locked' splat that seems to have become
  easier to hit since v5.18-rc3"

* tag 'nfsd-5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux: (36 commits)
  NFSD: nfsd_file_put() can sleep
  NFSD: Add documenting comment for nfsd4_release_lockowner()
  NFSD: Modernize nfsd4_release_lockowner()
  NFSD: Fix possible sleep during nfsd4_release_lockowner()
  nfsd: destroy percpu stats counters after reply cache shutdown
  nfsd: Fix null-ptr-deref in nfsd_fill_super()
  nfsd: Unregister the cld notifier when laundry_wq create failed
  SUNRPC: Use RMW bitops in single-threaded hot paths
  NFSD: Clean up the show_nf_flags() macro
  NFSD: Trace filecache opens
  NFSD: Move documenting comment for nfsd4_process_open2()
  NFSD: Fix whitespace
  NFSD: Remove dprintk call sites from tail of nfsd4_open()
  NFSD: Instantiate a struct file when creating a regular NFSv4 file
  NFSD: Clean up nfsd_open_verified()
  NFSD: Remove do_nfsd_create()
  NFSD: Refactor NFSv4 OPEN(CREATE)
  NFSD: Refactor NFSv3 CREATE
  NFSD: Refactor nfsd_create_setattr()
  NFSD: Avoid calling fh_drop_write() twice in do_nfsd_create()
  ...
  • Loading branch information
Linus Torvalds committed May 27, 2022
2 parents 7f50d4d + 08af54b commit 6d29d7f
Show file tree
Hide file tree
Showing 30 changed files with 985 additions and 426 deletions.
4 changes: 4 additions & 0 deletions Documentation/filesystems/locking.rst
Original file line number Diff line number Diff line change
Expand Up @@ -434,6 +434,8 @@ prototypes::
void (*lm_break)(struct file_lock *); /* break_lease callback */
int (*lm_change)(struct file_lock **, int);
bool (*lm_breaker_owns_lease)(struct file_lock *);
bool (*lm_lock_expirable)(struct file_lock *);
void (*lm_expire_lock)(void);

locking rules:

Expand All @@ -445,6 +447,8 @@ lm_grant: no no no
lm_break: yes no no
lm_change yes no no
lm_breaker_owns_lease: yes no no
lm_lock_expirable yes no no
lm_expire_lock no no yes
====================== ============= ================= =========

buffer_head
Expand Down
61 changes: 58 additions & 3 deletions fs/locks.c
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,34 @@ void locks_release_private(struct file_lock *fl)
}
EXPORT_SYMBOL_GPL(locks_release_private);

/**
* locks_owner_has_blockers - Check for blocking lock requests
* @flctx: file lock context
* @owner: lock owner
*
* Return values:
* %true: @owner has at least one blocker
* %false: @owner has no blockers
*/
bool locks_owner_has_blockers(struct file_lock_context *flctx,
fl_owner_t owner)
{
struct file_lock *fl;

spin_lock(&flctx->flc_lock);
list_for_each_entry(fl, &flctx->flc_posix, fl_list) {
if (fl->fl_owner != owner)
continue;
if (!list_empty(&fl->fl_blocked_requests)) {
spin_unlock(&flctx->flc_lock);
return true;
}
}
spin_unlock(&flctx->flc_lock);
return false;
}
EXPORT_SYMBOL_GPL(locks_owner_has_blockers);

/* Free a lock which is not in use. */
void locks_free_lock(struct file_lock *fl)
{
Expand Down Expand Up @@ -874,19 +902,32 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
struct file_lock *cfl;
struct file_lock_context *ctx;
struct inode *inode = locks_inode(filp);
void *owner;
void (*func)(void);

ctx = smp_load_acquire(&inode->i_flctx);
if (!ctx || list_empty_careful(&ctx->flc_posix)) {
fl->fl_type = F_UNLCK;
return;
}

retry:
spin_lock(&ctx->flc_lock);
list_for_each_entry(cfl, &ctx->flc_posix, fl_list) {
if (posix_locks_conflict(fl, cfl)) {
locks_copy_conflock(fl, cfl);
goto out;
if (!posix_locks_conflict(fl, cfl))
continue;
if (cfl->fl_lmops && cfl->fl_lmops->lm_lock_expirable
&& (*cfl->fl_lmops->lm_lock_expirable)(cfl)) {
owner = cfl->fl_lmops->lm_mod_owner;
func = cfl->fl_lmops->lm_expire_lock;
__module_get(owner);
spin_unlock(&ctx->flc_lock);
(*func)();
module_put(owner);
goto retry;
}
locks_copy_conflock(fl, cfl);
goto out;
}
fl->fl_type = F_UNLCK;
out:
Expand Down Expand Up @@ -1060,6 +1101,8 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request,
int error;
bool added = false;
LIST_HEAD(dispose);
void *owner;
void (*func)(void);

ctx = locks_get_lock_context(inode, request->fl_type);
if (!ctx)
Expand All @@ -1078,6 +1121,7 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request,
new_fl2 = locks_alloc_lock();
}

retry:
percpu_down_read(&file_rwsem);
spin_lock(&ctx->flc_lock);
/*
Expand All @@ -1089,6 +1133,17 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request,
list_for_each_entry(fl, &ctx->flc_posix, fl_list) {
if (!posix_locks_conflict(request, fl))
continue;
if (fl->fl_lmops && fl->fl_lmops->lm_lock_expirable
&& (*fl->fl_lmops->lm_lock_expirable)(fl)) {
owner = fl->fl_lmops->lm_mod_owner;
func = fl->fl_lmops->lm_expire_lock;
__module_get(owner);
spin_unlock(&ctx->flc_lock);
percpu_up_read(&file_rwsem);
(*func)();
module_put(owner);
goto retry;
}
if (conflock)
locks_copy_conflock(conflock, fl);
error = -EAGAIN;
Expand Down
54 changes: 47 additions & 7 deletions fs/nfsd/filecache.c
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,8 @@ nfsd_file_put_noref(struct nfsd_file *nf)
void
nfsd_file_put(struct nfsd_file *nf)
{
might_sleep();

set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags);
if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) {
nfsd_file_flush(nf);
Expand Down Expand Up @@ -899,9 +901,9 @@ nfsd_file_is_cached(struct inode *inode)
return ret;
}

__be32
nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
unsigned int may_flags, struct nfsd_file **pnf)
static __be32
nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
unsigned int may_flags, struct nfsd_file **pnf, bool open)
{
__be32 status;
struct net *net = SVC_NET(rqstp);
Expand Down Expand Up @@ -996,10 +998,14 @@ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
nfsd_file_gc();

nf->nf_mark = nfsd_file_mark_find_or_create(nf);
if (nf->nf_mark)
status = nfsd_open_verified(rqstp, fhp, S_IFREG,
may_flags, &nf->nf_file);
else
if (nf->nf_mark) {
if (open) {
status = nfsd_open_verified(rqstp, fhp, may_flags,
&nf->nf_file);
trace_nfsd_file_open(nf, status);
} else
status = nfs_ok;
} else
status = nfserr_jukebox;
/*
* If construction failed, or we raced with a call to unlink()
Expand All @@ -1019,6 +1025,40 @@ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
goto out;
}

/**
* nfsd_file_acquire - Get a struct nfsd_file with an open file
* @rqstp: the RPC transaction being executed
* @fhp: the NFS filehandle of the file to be opened
* @may_flags: NFSD_MAY_ settings for the file
* @pnf: OUT: new or found "struct nfsd_file" object
*
* Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in
* network byte order is returned.
*/
__be32
nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
unsigned int may_flags, struct nfsd_file **pnf)
{
return nfsd_do_file_acquire(rqstp, fhp, may_flags, pnf, true);
}

/**
* nfsd_file_create - Get a struct nfsd_file, do not open
* @rqstp: the RPC transaction being executed
* @fhp: the NFS filehandle of the file just created
* @may_flags: NFSD_MAY_ settings for the file
* @pnf: OUT: new or found "struct nfsd_file" object
*
* Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in
* network byte order is returned.
*/
__be32
nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
unsigned int may_flags, struct nfsd_file **pnf)
{
return nfsd_do_file_acquire(rqstp, fhp, may_flags, pnf, false);
}

/*
* Note that fields may be added, removed or reordered in the future. Programs
* scraping this file for info should test the labels to ensure they're
Expand Down
2 changes: 2 additions & 0 deletions fs/nfsd/filecache.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,5 +59,7 @@ void nfsd_file_close_inode_sync(struct inode *inode);
bool nfsd_file_is_cached(struct inode *inode);
__be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
unsigned int may_flags, struct nfsd_file **nfp);
__be32 nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
unsigned int may_flags, struct nfsd_file **nfp);
int nfsd_file_cache_stats_open(struct inode *, struct file *);
#endif /* _FS_NFSD_FILECACHE_H */
141 changes: 122 additions & 19 deletions fs/nfsd/nfs3proc.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include <linux/fs.h>
#include <linux/ext2_fs.h>
#include <linux/magic.h>
#include <linux/namei.h>

#include "cache.h"
#include "xdr3.h"
Expand Down Expand Up @@ -220,17 +221,132 @@ nfsd3_proc_write(struct svc_rqst *rqstp)
}

/*
* With NFSv3, CREATE processing is a lot easier than with NFSv2.
* At least in theory; we'll see how it fares in practice when the
* first reports about SunOS compatibility problems start to pour in...
* Implement NFSv3's unchecked, guarded, and exclusive CREATE
* semantics for regular files. Except for the created file,
* this operation is stateless on the server.
*
* Upon return, caller must release @fhp and @resfhp.
*/
static __be32
nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct svc_fh *resfhp, struct nfsd3_createargs *argp)
{
struct iattr *iap = &argp->attrs;
struct dentry *parent, *child;
__u32 v_mtime, v_atime;
struct inode *inode;
__be32 status;
int host_err;

if (isdotent(argp->name, argp->len))
return nfserr_exist;
if (!(iap->ia_valid & ATTR_MODE))
iap->ia_mode = 0;

status = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC);
if (status != nfs_ok)
return status;

parent = fhp->fh_dentry;
inode = d_inode(parent);

host_err = fh_want_write(fhp);
if (host_err)
return nfserrno(host_err);

fh_lock_nested(fhp, I_MUTEX_PARENT);

child = lookup_one_len(argp->name, parent, argp->len);
if (IS_ERR(child)) {
status = nfserrno(PTR_ERR(child));
goto out;
}

if (d_really_is_negative(child)) {
status = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
if (status != nfs_ok)
goto out;
}

status = fh_compose(resfhp, fhp->fh_export, child, fhp);
if (status != nfs_ok)
goto out;

v_mtime = 0;
v_atime = 0;
if (argp->createmode == NFS3_CREATE_EXCLUSIVE) {
u32 *verifier = (u32 *)argp->verf;

/*
* Solaris 7 gets confused (bugid 4218508) if these have
* the high bit set, as do xfs filesystems without the
* "bigtime" feature. So just clear the high bits.
*/
v_mtime = verifier[0] & 0x7fffffff;
v_atime = verifier[1] & 0x7fffffff;
}

if (d_really_is_positive(child)) {
status = nfs_ok;

switch (argp->createmode) {
case NFS3_CREATE_UNCHECKED:
if (!d_is_reg(child))
break;
iap->ia_valid &= ATTR_SIZE;
goto set_attr;
case NFS3_CREATE_GUARDED:
status = nfserr_exist;
break;
case NFS3_CREATE_EXCLUSIVE:
if (d_inode(child)->i_mtime.tv_sec == v_mtime &&
d_inode(child)->i_atime.tv_sec == v_atime &&
d_inode(child)->i_size == 0) {
break;
}
status = nfserr_exist;
}
goto out;
}

if (!IS_POSIXACL(inode))
iap->ia_mode &= ~current_umask();

host_err = vfs_create(&init_user_ns, inode, child, iap->ia_mode, true);
if (host_err < 0) {
status = nfserrno(host_err);
goto out;
}

/* A newly created file already has a file size of zero. */
if ((iap->ia_valid & ATTR_SIZE) && (iap->ia_size == 0))
iap->ia_valid &= ~ATTR_SIZE;
if (argp->createmode == NFS3_CREATE_EXCLUSIVE) {
iap->ia_valid = ATTR_MTIME | ATTR_ATIME |
ATTR_MTIME_SET | ATTR_ATIME_SET;
iap->ia_mtime.tv_sec = v_mtime;
iap->ia_atime.tv_sec = v_atime;
iap->ia_mtime.tv_nsec = 0;
iap->ia_atime.tv_nsec = 0;
}

set_attr:
status = nfsd_create_setattr(rqstp, fhp, resfhp, iap);

out:
fh_unlock(fhp);
if (child && !IS_ERR(child))
dput(child);
fh_drop_write(fhp);
return status;
}

static __be32
nfsd3_proc_create(struct svc_rqst *rqstp)
{
struct nfsd3_createargs *argp = rqstp->rq_argp;
struct nfsd3_diropres *resp = rqstp->rq_resp;
svc_fh *dirfhp, *newfhp = NULL;
struct iattr *attr;
svc_fh *dirfhp, *newfhp;

dprintk("nfsd: CREATE(3) %s %.*s\n",
SVCFH_fmt(&argp->fh),
Expand All @@ -239,21 +355,8 @@ nfsd3_proc_create(struct svc_rqst *rqstp)

dirfhp = fh_copy(&resp->dirfh, &argp->fh);
newfhp = fh_init(&resp->fh, NFS3_FHSIZE);
attr = &argp->attrs;

/* Unfudge the mode bits */
attr->ia_mode &= ~S_IFMT;
if (!(attr->ia_valid & ATTR_MODE)) {
attr->ia_valid |= ATTR_MODE;
attr->ia_mode = S_IFREG;
} else {
attr->ia_mode = (attr->ia_mode & ~S_IFMT) | S_IFREG;
}

/* Now create the file and set attributes */
resp->status = do_nfsd_create(rqstp, dirfhp, argp->name, argp->len,
attr, newfhp, argp->createmode,
(u32 *)argp->verf, NULL, NULL);
resp->status = nfsd3_create_file(rqstp, dirfhp, newfhp, argp);
return rpc_success;
}

Expand Down
Loading

0 comments on commit 6d29d7f

Please sign in to comment.