From 12dd7ecf2323c572b1d302707eada4900848dced Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 1 May 2014 14:15:02 -0700 Subject: [PATCH 001/100] lockd: avoid warning when CONFIG_SYSCTL undefined MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When building without CONFIG_SYSCTL, the compiler saw an unused label. This moves the label into the #ifdef it is used under. fs/lockd/svc.c: In function ‘init_nlm’: fs/lockd/svc.c:626:1: warning: label ‘err_sysctl’ defined but not used [-Wunused-label] Signed-off-by: Kees Cook Signed-off-by: J. Bruce Fields --- fs/lockd/svc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 6bf06a07f3e0..2b431f7266c3 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -622,8 +622,8 @@ static int __init init_nlm(void) err_pernet: #ifdef CONFIG_SYSCTL unregister_sysctl_table(nlm_sysctl_table); -#endif err_sysctl: +#endif return err; } From 4dd86e150f63a6c360783f163a979ec563e6d570 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 18 Apr 2014 14:43:58 -0400 Subject: [PATCH 002/100] NFSd: Remove 'inline' designation for free_client() It is large, it is used in more than one place, and it is not performance critical. Let gcc figure out whether it should be inlined... Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 32b699bebb9c..841495aa9170 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1093,7 +1093,7 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name) return clp; } -static inline void +static void free_client(struct nfs4_client *clp) { struct nfsd_net __maybe_unused *nn = net_generic(clp->net, nfsd_net_id); From 9c69de4c94fcb11db919160d5fa0b48f13d1757a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 6 May 2014 19:37:13 +0200 Subject: [PATCH 003/100] nfsd: remove MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The only real user of this header is fs/nfsd/nfsfh.h, so merge the two. Various lockѕ source files used it to indirectly get other sunrpc or nfs headers, so fix those up. Signed-off-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/lockd/clnt4xdr.c | 2 ++ fs/lockd/clntxdr.c | 2 ++ fs/lockd/svcsubs.c | 2 +- fs/lockd/xdr.c | 2 ++ fs/nfsd/nfsd.h | 1 + fs/nfsd/nfsfh.h | 59 +++++++++++++++++++++++++++++++--- fs/nfsd/state.h | 1 - include/linux/lockd/lockd.h | 2 +- include/linux/nfsd/export.h | 6 +++- include/linux/nfsd/nfsfh.h | 63 ------------------------------------- 10 files changed, 68 insertions(+), 72 deletions(-) delete mode 100644 include/linux/nfsd/nfsfh.h diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c index 00ec0b9c94d1..d3e40db28930 100644 --- a/fs/lockd/clnt4xdr.c +++ b/fs/lockd/clnt4xdr.c @@ -14,6 +14,8 @@ #include #include +#include + #define NLMDBG_FACILITY NLMDBG_XDR #if (NLMCLNT_OHSIZE > XDR_MAX_NETOBJ) diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c index 9a55797a1cd4..3e9f7874b975 100644 --- a/fs/lockd/clntxdr.c +++ b/fs/lockd/clntxdr.c @@ -15,6 +15,8 @@ #include #include +#include + #define NLMDBG_FACILITY NLMDBG_XDR #if (NLMCLNT_OHSIZE > XDR_MAX_NETOBJ) diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index dc5c75930f0f..7ec6b1074d8c 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -14,12 +14,12 @@ #include #include #include -#include #include #include #include #include #include +#include #define NLMDBG_FACILITY NLMDBG_SVCSUBS diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c index 964666c68a86..9340e7e10ef6 100644 --- a/fs/lockd/xdr.c +++ b/fs/lockd/xdr.c @@ -16,6 +16,8 @@ #include #include +#include + #define NLMDBG_FACILITY NLMDBG_XDR diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 479eb681c27c..7d5c310678d0 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -15,6 +15,7 @@ #include #include #include +#include #include #include diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index ad67964d0bb1..2e89e70ac15c 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -1,9 +1,58 @@ -/* Copyright (C) 1995, 1996, 1997 Olaf Kirch */ +/* + * Copyright (C) 1995, 1996, 1997 Olaf Kirch + * + * This file describes the layout of the file handles as passed + * over the wire. + */ +#ifndef _LINUX_NFSD_NFSFH_H +#define _LINUX_NFSD_NFSFH_H + +#include +#include + +static inline __u32 ino_t_to_u32(ino_t ino) +{ + return (__u32) ino; +} + +static inline ino_t u32_to_ino_t(__u32 uino) +{ + return (ino_t) uino; +} -#ifndef _LINUX_NFSD_FH_INT_H -#define _LINUX_NFSD_FH_INT_H +/* + * This is the internal representation of an NFS handle used in knfsd. + * pre_mtime/post_version will be used to support wcc_attr's in NFSv3. + */ +typedef struct svc_fh { + struct knfsd_fh fh_handle; /* FH data */ + struct dentry * fh_dentry; /* validated dentry */ + struct svc_export * fh_export; /* export pointer */ + int fh_maxsize; /* max size for fh_handle */ + + unsigned char fh_locked; /* inode locked by us */ + unsigned char fh_want_write; /* remount protection taken */ + +#ifdef CONFIG_NFSD_V3 + unsigned char fh_post_saved; /* post-op attrs saved */ + unsigned char fh_pre_saved; /* pre-op attrs saved */ + + /* Pre-op attributes saved during fh_lock */ + __u64 fh_pre_size; /* size before operation */ + struct timespec fh_pre_mtime; /* mtime before oper */ + struct timespec fh_pre_ctime; /* ctime before oper */ + /* + * pre-op nfsv4 change attr: note must check IS_I_VERSION(inode) + * to find out if it is valid. + */ + u64 fh_pre_change; + + /* Post-op attributes saved in fh_unlock */ + struct kstat fh_post_attr; /* full attrs after operation */ + u64 fh_post_change; /* nfsv4 change; see above */ +#endif /* CONFIG_NFSD_V3 */ -#include +} svc_fh; enum nfsd_fsid { FSID_DEV = 0, @@ -215,4 +264,4 @@ fh_unlock(struct svc_fh *fhp) } } -#endif /* _LINUX_NFSD_FH_INT_H */ +#endif /* _LINUX_NFSD_NFSFH_H */ diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 424d8f5f2317..5b3bbf24097c 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -37,7 +37,6 @@ #include #include -#include #include "nfsfh.h" typedef struct { diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index dcaad79f54ed..219d79627c05 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -17,13 +17,13 @@ #include #include #include -#include #include #include #ifdef CONFIG_LOCKD_V4 #include #endif #include +#include /* * Version string diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h index 7898c997dfea..b12c4e526ef2 100644 --- a/include/linux/nfsd/export.h +++ b/include/linux/nfsd/export.h @@ -9,9 +9,13 @@ #ifndef NFSD_EXPORT_H #define NFSD_EXPORT_H -# include +#include #include +struct knfsd_fh; +struct svc_fh; +struct svc_rqst; + /* * FS Locations */ diff --git a/include/linux/nfsd/nfsfh.h b/include/linux/nfsd/nfsfh.h deleted file mode 100644 index a93593f1fa4e..000000000000 --- a/include/linux/nfsd/nfsfh.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * include/linux/nfsd/nfsfh.h - * - * This file describes the layout of the file handles as passed - * over the wire. - * - * Earlier versions of knfsd used to sign file handles using keyed MD5 - * or SHA. I've removed this code, because it doesn't give you more - * security than blocking external access to port 2049 on your firewall. - * - * Copyright (C) 1995, 1996, 1997 Olaf Kirch - */ -#ifndef _LINUX_NFSD_FH_H -#define _LINUX_NFSD_FH_H - -# include -#include - -static inline __u32 ino_t_to_u32(ino_t ino) -{ - return (__u32) ino; -} - -static inline ino_t u32_to_ino_t(__u32 uino) -{ - return (ino_t) uino; -} - -/* - * This is the internal representation of an NFS handle used in knfsd. - * pre_mtime/post_version will be used to support wcc_attr's in NFSv3. - */ -typedef struct svc_fh { - struct knfsd_fh fh_handle; /* FH data */ - struct dentry * fh_dentry; /* validated dentry */ - struct svc_export * fh_export; /* export pointer */ - int fh_maxsize; /* max size for fh_handle */ - - unsigned char fh_locked; /* inode locked by us */ - unsigned char fh_want_write; /* remount protection taken */ - -#ifdef CONFIG_NFSD_V3 - unsigned char fh_post_saved; /* post-op attrs saved */ - unsigned char fh_pre_saved; /* pre-op attrs saved */ - - /* Pre-op attributes saved during fh_lock */ - __u64 fh_pre_size; /* size before operation */ - struct timespec fh_pre_mtime; /* mtime before oper */ - struct timespec fh_pre_ctime; /* ctime before oper */ - /* - * pre-op nfsv4 change attr: note must check IS_I_VERSION(inode) - * to find out if it is valid. - */ - u64 fh_pre_change; - - /* Post-op attributes saved in fh_unlock */ - struct kstat fh_post_attr; /* full attrs after operation */ - u64 fh_post_change; /* nfsv4 change; see above */ -#endif /* CONFIG_NFSD_V3 */ - -} svc_fh; - -#endif /* _LINUX_NFSD_FH_H */ From d430e8d530e900c923bf77718d72478b1c280592 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 6 May 2014 19:37:14 +0200 Subject: [PATCH 004/100] nfsd: move to fs/nfsd There are no legitimate users outside of fs/nfsd, so move it there. Signed-off-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/lockd/svcsubs.c | 1 - {include/linux => fs}/nfsd/export.h | 5 ----- fs/nfsd/nfsd.h | 3 ++- 3 files changed, 2 insertions(+), 7 deletions(-) rename {include/linux => fs}/nfsd/export.h (95%) diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index 7ec6b1074d8c..b6f3b84b6e99 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include diff --git a/include/linux/nfsd/export.h b/fs/nfsd/export.h similarity index 95% rename from include/linux/nfsd/export.h rename to fs/nfsd/export.h index b12c4e526ef2..d7939a62a0ae 100644 --- a/include/linux/nfsd/export.h +++ b/fs/nfsd/export.h @@ -1,9 +1,4 @@ /* - * include/linux/nfsd/export.h - * - * Public declarations for NFS exports. The definitions for the - * syscall interface are in nfsctl.h - * * Copyright (C) 1995-1997 Olaf Kirch */ #ifndef NFSD_EXPORT_H diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 7d5c310678d0..72004caad718 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -19,9 +19,10 @@ #include #include -#include #include +#include "export.h" + /* * nfsd version */ From 7f94423e8fcc1e0f3416be76d3da0982f586d565 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 6 May 2014 19:37:15 +0200 Subject: [PATCH 005/100] nfsd: move to fs/nfsd There are no legitimate users outside of fs/nfsd, so move it there. Signed-off-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsd.h | 2 +- fs/nfsd/stats.c | 1 - {include/linux => fs}/nfsd/stats.h | 8 +++----- 3 files changed, 4 insertions(+), 7 deletions(-) rename {include/linux => fs}/nfsd/stats.h (91%) diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 72004caad718..7a07f9c6ee78 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -19,8 +19,8 @@ #include #include -#include +#include "stats.h" #include "export.h" /* diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c index 6d4521feb6e3..cd90878a76aa 100644 --- a/fs/nfsd/stats.c +++ b/fs/nfsd/stats.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include "nfsd.h" diff --git a/include/linux/nfsd/stats.h b/fs/nfsd/stats.h similarity index 91% rename from include/linux/nfsd/stats.h rename to fs/nfsd/stats.h index e75b2544ff12..a5c944b771c6 100644 --- a/include/linux/nfsd/stats.h +++ b/fs/nfsd/stats.h @@ -1,12 +1,10 @@ /* - * linux/include/linux/nfsd/stats.h - * * Statistics for NFS server. * * Copyright (C) 1995, 1996 Olaf Kirch */ -#ifndef LINUX_NFSD_STATS_H -#define LINUX_NFSD_STATS_H +#ifndef _NFSD_STATS_H +#define _NFSD_STATS_H #include @@ -42,4 +40,4 @@ extern struct svc_stat nfsd_svcstats; void nfsd_stat_init(void); void nfsd_stat_shutdown(void); -#endif /* LINUX_NFSD_STATS_H */ +#endif /* _NFSD_STATS_H */ From 6f226e2ab1b895c8685e868af0a5f797fcaaaf57 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 6 May 2014 19:37:16 +0200 Subject: [PATCH 006/100] nfsd: remove There is almost nothing left it in, just merge it into the only file that includes it. Signed-off-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsd.h | 9 ++++++++- include/linux/nfsd/debug.h | 19 ------------------- 2 files changed, 8 insertions(+), 20 deletions(-) delete mode 100644 include/linux/nfsd/debug.h diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 7a07f9c6ee78..e9f2fd42d184 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -18,11 +18,18 @@ #include #include -#include +#include #include "stats.h" #include "export.h" +#undef ifdebug +#ifdef NFSD_DEBUG +# define ifdebug(flag) if (nfsd_debug & NFSDDBG_##flag) +#else +# define ifdebug(flag) if (0) +#endif + /* * nfsd version */ diff --git a/include/linux/nfsd/debug.h b/include/linux/nfsd/debug.h deleted file mode 100644 index 19ef8375b577..000000000000 --- a/include/linux/nfsd/debug.h +++ /dev/null @@ -1,19 +0,0 @@ -/* - * linux/include/linux/nfsd/debug.h - * - * Debugging-related stuff for nfsd - * - * Copyright (C) 1995 Olaf Kirch - */ -#ifndef LINUX_NFSD_DEBUG_H -#define LINUX_NFSD_DEBUG_H - -#include - -# undef ifdebug -# ifdef NFSD_DEBUG -# define ifdebug(flag) if (nfsd_debug & NFSDDBG_##flag) -# else -# define ifdebug(flag) if (0) -# endif -#endif /* LINUX_NFSD_DEBUG_H */ From 50cc62317dece5173b56aa8d3569930b065ce47d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 18 Apr 2014 14:44:03 -0400 Subject: [PATCH 007/100] NFSd: Mark nfs4_free_lockowner and nfs4_free_openowner as static functions They do not need to be used outside fs/nfsd/nfs4state.c Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 24 ++++++++++++------------ fs/nfsd/state.h | 2 -- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 841495aa9170..fcab9091f094 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -645,6 +645,12 @@ static void unhash_lockowner(struct nfs4_lockowner *lo) } } +static void nfs4_free_lockowner(struct nfs4_lockowner *lo) +{ + kfree(lo->lo_owner.so_owner.data); + kmem_cache_free(lockowner_slab, lo); +} + static void release_lockowner(struct nfs4_lockowner *lo) { unhash_lockowner(lo); @@ -699,6 +705,12 @@ static void release_last_closed_stateid(struct nfs4_openowner *oo) } } +static void nfs4_free_openowner(struct nfs4_openowner *oo) +{ + kfree(oo->oo_owner.so_owner.data); + kmem_cache_free(openowner_slab, oo); +} + static void release_openowner(struct nfs4_openowner *oo) { unhash_openowner(oo); @@ -2553,18 +2565,6 @@ nfsd4_init_slabs(void) return -ENOMEM; } -void nfs4_free_openowner(struct nfs4_openowner *oo) -{ - kfree(oo->oo_owner.so_owner.data); - kmem_cache_free(openowner_slab, oo); -} - -void nfs4_free_lockowner(struct nfs4_lockowner *lo) -{ - kfree(lo->lo_owner.so_owner.data); - kmem_cache_free(lockowner_slab, lo); -} - static void init_nfs4_replay(struct nfs4_replay *rp) { rp->rp_status = nfserr_serverfault; diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 5b3bbf24097c..fda9ce28b1b2 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -463,8 +463,6 @@ extern void nfs4_release_reclaim(struct nfsd_net *); extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir, struct nfsd_net *nn); extern __be32 nfs4_check_open_reclaim(clientid_t *clid, bool sessions, struct nfsd_net *nn); -extern void nfs4_free_openowner(struct nfs4_openowner *); -extern void nfs4_free_lockowner(struct nfs4_lockowner *); extern int set_callback_cred(void); extern void nfsd4_init_callback(struct nfsd4_callback *); extern void nfsd4_probe_callback(struct nfs4_client *clp); From 14bcab1a395b1b150e654100cce09a1b552ec5f1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 18 Apr 2014 14:44:07 -0400 Subject: [PATCH 008/100] NFSd: Clean up nfs4_preprocess_stateid_op Move the state locking and file descriptor reference out from the callers and into nfs4_preprocess_stateid_op() itself. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 11 ----------- fs/nfsd/nfs4state.c | 19 +++++++++++++------ 2 files changed, 13 insertions(+), 17 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index d543222babf3..ac837783b37d 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -786,7 +786,6 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (!nfsd4_last_compound_op(rqstp)) rqstp->rq_splice_ok = false; - nfs4_lock_state(); /* check stateid */ if ((status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), cstate, &read->rd_stateid, @@ -794,11 +793,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, dprintk("NFSD: nfsd4_read: couldn't process stateid!\n"); goto out; } - if (read->rd_filp) - get_file(read->rd_filp); status = nfs_ok; out: - nfs4_unlock_state(); read->rd_rqstp = rqstp; read->rd_fhp = &cstate->current_fh; return status; @@ -937,10 +933,8 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, int err; if (setattr->sa_iattr.ia_valid & ATTR_SIZE) { - nfs4_lock_state(); status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), cstate, &setattr->sa_stateid, WR_STATE, NULL); - nfs4_unlock_state(); if (status) { dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n"); return status; @@ -1006,17 +1000,12 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (write->wr_offset >= OFFSET_MAX) return nfserr_inval; - nfs4_lock_state(); status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), cstate, stateid, WR_STATE, &filp); if (status) { - nfs4_unlock_state(); dprintk("NFSD: nfsd4_write: couldn't process stateid!\n"); return status; } - if (filp) - get_file(filp); - nfs4_unlock_state(); cnt = write->wr_buflen; write->wr_how_written = write->wr_stable_how; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index fcab9091f094..b5278941f3a1 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3654,6 +3654,7 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, struct svc_fh *current_fh = &cstate->current_fh; struct inode *ino = current_fh->fh_dentry->d_inode; struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct file *file = NULL; __be32 status; if (filpp) @@ -3665,10 +3666,12 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) return check_special_stateids(net, current_fh, stateid, flags); + nfs4_lock_state(); + status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID, &s, cstate->minorversion, nn); if (status) - return status; + goto out; status = check_stateid_generation(stateid, &s->sc_stateid, nfsd4_has_session(cstate)); if (status) goto out; @@ -3679,8 +3682,8 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, if (status) goto out; if (filpp) { - *filpp = dp->dl_file->fi_deleg_file; - if (!*filpp) { + file = dp->dl_file->fi_deleg_file; + if (!file) { WARN_ON_ONCE(1); status = nfserr_serverfault; goto out; @@ -3701,16 +3704,20 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, goto out; if (filpp) { if (flags & RD_STATE) - *filpp = find_readable_file(stp->st_file); + file = find_readable_file(stp->st_file); else - *filpp = find_writeable_file(stp->st_file); + file = find_writeable_file(stp->st_file); } break; default: - return nfserr_bad_stateid; + status = nfserr_bad_stateid; + goto out; } status = nfs_ok; + if (file) + *filpp = get_file(file); out: + nfs4_unlock_state(); return status; } From ecc7455d8eb1860f5aa6b9ad82a9a81f93eb11d1 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Wed, 7 May 2014 23:08:04 +0800 Subject: [PATCH 009/100] NFSD: cleanup unneeded including linux/export.h commit 4ac7249ea5a0ceef9f8269f63f33cc873c3fac61 have remove all EXPORT_SYMBOL, linux/export.h is not needed, just clean it. Signed-off-by: Kinglong Mee Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4acl.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index 6f3f392d48af..b206e6cfcada 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c @@ -36,7 +36,6 @@ #include #include -#include #include "nfsfh.h" #include "nfsd.h" #include "acl.h" From 5409e46f1bcf960c651f3fff35f2f25e539655cf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 7 May 2014 13:49:44 +0200 Subject: [PATCH 010/100] nfsd: clean up fh_auth usage Use fh_fsid when reffering to the fsid part of the filehandle. The variable length auth field envisioned in nfsfh wasn't ever implemented. Also clean up some lose ends around this and document the file handle format better. Btw, why do we even export nfsfh.h to userspace? The file handle very much is kernel private, and nothing in nfs-utils include the header either. Signed-off-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsfh.c | 20 +++++++++----------- include/uapi/linux/nfsd/nfsfh.h | 32 +++++++------------------------- 2 files changed, 16 insertions(+), 36 deletions(-) diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 3c37b160dcad..a337106d6875 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -169,8 +169,8 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) data_left -= len; if (data_left < 0) return error; - exp = rqst_exp_find(rqstp, fh->fh_fsid_type, fh->fh_auth); - fid = (struct fid *)(fh->fh_auth + len); + exp = rqst_exp_find(rqstp, fh->fh_fsid_type, fh->fh_fsid); + fid = (struct fid *)(fh->fh_fsid + len); } else { __u32 tfh[2]; dev_t xdev; @@ -385,7 +385,7 @@ static void _fh_update(struct svc_fh *fhp, struct svc_export *exp, { if (dentry != exp->ex_path.dentry) { struct fid *fid = (struct fid *) - (fhp->fh_handle.fh_auth + fhp->fh_handle.fh_size/4 - 1); + (fhp->fh_handle.fh_fsid + fhp->fh_handle.fh_size/4 - 1); int maxsize = (fhp->fh_maxsize - fhp->fh_handle.fh_size)/4; int subtreecheck = !(exp->ex_flags & NFSEXP_NOSUBTREECHECK); @@ -513,7 +513,6 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, */ struct inode * inode = dentry->d_inode; - __u32 *datap; dev_t ex_dev = exp_sb(exp)->s_dev; dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %pd2, ino=%ld)\n", @@ -557,17 +556,16 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, if (inode) _fh_update_old(dentry, exp, &fhp->fh_handle); } else { - int len; + fhp->fh_handle.fh_size = + key_len(fhp->fh_handle.fh_fsid_type) + 4; fhp->fh_handle.fh_auth_type = 0; - datap = fhp->fh_handle.fh_auth+0; - mk_fsid(fhp->fh_handle.fh_fsid_type, datap, ex_dev, + + mk_fsid(fhp->fh_handle.fh_fsid_type, + fhp->fh_handle.fh_fsid, + ex_dev, exp->ex_path.dentry->d_inode->i_ino, exp->ex_fsid, exp->ex_uuid); - len = key_len(fhp->fh_handle.fh_fsid_type); - datap += len/4; - fhp->fh_handle.fh_size = 4 + len; - if (inode) _fh_update(fhp, exp, dentry); if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) { diff --git a/include/uapi/linux/nfsd/nfsfh.h b/include/uapi/linux/nfsd/nfsfh.h index 616e3b396476..20391235d088 100644 --- a/include/uapi/linux/nfsd/nfsfh.h +++ b/include/uapi/linux/nfsd/nfsfh.h @@ -1,13 +1,7 @@ /* - * include/linux/nfsd/nfsfh.h - * * This file describes the layout of the file handles as passed * over the wire. * - * Earlier versions of knfsd used to sign file handles using keyed MD5 - * or SHA. I've removed this code, because it doesn't give you more - * security than blocking external access to port 2049 on your firewall. - * * Copyright (C) 1995, 1996, 1997 Olaf Kirch */ @@ -37,7 +31,7 @@ struct nfs_fhbase_old { }; /* - * This is the new flexible, extensible style NFSv2/v3 file handle. + * This is the new flexible, extensible style NFSv2/v3/v4 file handle. * by Neil Brown - March 2000 * * The file handle starts with a sequence of four-byte words. @@ -47,14 +41,7 @@ struct nfs_fhbase_old { * * All four-byte values are in host-byte-order. * - * The auth_type field specifies how the filehandle can be authenticated - * This might allow a file to be confirmed to be in a writable part of a - * filetree without checking the path from it up to the root. - * Current values: - * 0 - No authentication. fb_auth is 0 bytes long - * Possible future values: - * 1 - 4 bytes taken from MD5 hash of the remainer of the file handle - * prefixed by a secret and with the important export flags. + * The auth_type field is deprecated and must be set to 0. * * The fsid_type identifies how the filesystem (or export point) is * encoded. @@ -71,14 +58,9 @@ struct nfs_fhbase_old { * 7 - 8 byte inode number and 16 byte uuid * * The fileid_type identified how the file within the filesystem is encoded. - * This is (will be) passed to, and set by, the underlying filesystem if it supports - * filehandle operations. The filesystem must not use the value '0' or '0xff' and may - * only use the values 1 and 2 as defined below: - * Current values: - * 0 - The root, or export point, of the filesystem. fb_fileid is 0 bytes. - * 1 - 32bit inode number, 32 bit generation number. - * 2 - 32bit inode number, 32 bit generation number, 32 bit parent directory inode number. - * + * The values for this field are filesystem specific, exccept that + * filesystems must not use the values '0' or '0xff'. 'See enum fid_type' + * in include/linux/exportfs.h for currently registered values. */ struct nfs_fhbase_new { __u8 fb_version; /* == 1, even => nfs_fhbase_old */ @@ -114,9 +96,9 @@ struct knfsd_fh { #define fh_fsid_type fh_base.fh_new.fb_fsid_type #define fh_auth_type fh_base.fh_new.fb_auth_type #define fh_fileid_type fh_base.fh_new.fb_fileid_type -#define fh_auth fh_base.fh_new.fb_auth #define fh_fsid fh_base.fh_new.fb_auth - +/* Do not use, provided for userspace compatiblity. */ +#define fh_auth fh_base.fh_new.fb_auth #endif /* _UAPI_LINUX_NFSD_FH_H */ From ecca063b31b88d31ee79e9d958ea78023659554e Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Tue, 15 Apr 2014 17:13:56 +0800 Subject: [PATCH 011/100] SUNRPC: Fix printk that is not only for nfsd Signed-off-by: Kinglong Mee Signed-off-by: J. Bruce Fields --- net/sunrpc/cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index ae333c1845bb..066362141133 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -374,7 +374,7 @@ void sunrpc_destroy_cache_detail(struct cache_detail *cd) } return; out: - printk(KERN_ERR "nfsd: failed to unregister %s cache\n", cd->name); + printk(KERN_ERR "RPC: failed to unregister %s cache\n", cd->name); } EXPORT_SYMBOL_GPL(sunrpc_destroy_cache_detail); From f3e41ec5ef0f5d2e10b6bfd3a13dc29f6d260d79 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Tue, 8 Apr 2014 13:04:01 +0800 Subject: [PATCH 012/100] NFSD: Use simple_read_from_buffer for coping data to userspace Signed-off-by: Kinglong Mee Signed-off-by: J. Bruce Fields --- fs/nfsd/fault_inject.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c index d620e7f81429..2ed05c3cd43d 100644 --- a/fs/nfsd/fault_inject.c +++ b/fs/nfsd/fault_inject.c @@ -97,25 +97,14 @@ static ssize_t fault_inject_read(struct file *file, char __user *buf, { static u64 val; char read_buf[25]; - size_t size, ret; + size_t size; loff_t pos = *ppos; if (!pos) nfsd_inject_get(file_inode(file)->i_private, &val); size = scnprintf(read_buf, sizeof(read_buf), "%llu\n", val); - if (pos < 0) - return -EINVAL; - if (pos >= size || !len) - return 0; - if (len > size - pos) - len = size - pos; - ret = copy_to_user(buf, read_buf + pos, len); - if (ret == len) - return -EFAULT; - len -= ret; - *ppos = pos + len; - return len; + return simple_read_from_buffer(buf, len, ppos, read_buf, size); } static ssize_t fault_inject_write(struct file *file, const char __user *buf, From 9fa1959e976f7a6ae84f616ca669359028070c61 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Tue, 8 Apr 2014 13:06:28 +0800 Subject: [PATCH 013/100] NFSD: Get rid of empty function nfs4_state_init Signed-off-by: Kinglong Mee Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 7 ------- fs/nfsd/nfsctl.c | 1 - fs/nfsd/nfsd.h | 2 -- 3 files changed, 10 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index b5278941f3a1..fac26836e4ac 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4979,13 +4979,6 @@ struct nfs4_client *nfsd_find_client(struct sockaddr_storage *addr, size_t addr_ #endif /* CONFIG_NFSD_FAULT_INJECTION */ -/* initialization to perform at module load time: */ - -void -nfs4_state_init(void) -{ -} - /* * Since the lifetime of a delegation isn't limited to that of an open, a * client may quite reasonably hang on to a delegation as long as it has diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index f34d9de802ab..51844048937f 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1179,7 +1179,6 @@ static int __init init_nfsd(void) retval = nfsd4_init_slabs(); if (retval) goto out_unregister_pernet; - nfs4_state_init(); retval = nfsd_fault_inject_init(); /* nfsd fault injection controls */ if (retval) goto out_free_slabs; diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index e9f2fd42d184..847daf37e566 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -115,7 +115,6 @@ static inline int nfsd_v4client(struct svc_rqst *rq) */ #ifdef CONFIG_NFSD_V4 extern unsigned long max_delegations; -void nfs4_state_init(void); int nfsd4_init_slabs(void); void nfsd4_free_slabs(void); int nfs4_state_start(void); @@ -126,7 +125,6 @@ void nfs4_reset_lease(time_t leasetime); int nfs4_reset_recoverydir(char *recdir); char * nfs4_recoverydir(void); #else -static inline void nfs4_state_init(void) { } static inline int nfsd4_init_slabs(void) { return 0; } static inline void nfsd4_free_slabs(void) { } static inline int nfs4_state_start(void) { return 0; } From 368fe39b508486483eb2cbb03a21ebd1b2a204bf Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Sat, 19 Apr 2014 00:17:31 +0800 Subject: [PATCH 014/100] NFSD: Don't clear SUID/SGID after root writing data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We're clearing the SUID/SGID bits on write by hand in nfsd_vfs_write, even though the subsequent vfs_writev() call will end up doing this for us (through file system write methods eventually calling file_remove_suid(), e.g., from __generic_file_aio_write). So, remove the redundant nfsd code. The only change in behavior is when the write is by root, in which case we previously cleared SUID/SGID, but will now leave it alone. The new behavior is the behavior of every filesystem we've checked. It seems better to be consistent with local filesystem behavior. And the security advantage seems limited as root could always restore these bits by hand if it wanted. SUID/SGID is not cleared after writing data with (root, local ext4), File: ‘test’ Size: 0 Blocks: 0 IO Block: 4096 regular empty file Device: 803h/2051d Inode: 1200137 Links: 1 Access: (4777/-rwsrwxrwx) Uid: ( 0/ root) Gid: ( 0/ root) Context: unconfined_u:object_r:admin_home_t:s0 Access: 2014-04-18 21:36:31.016029014 +0800 Modify: 2014-04-18 21:36:31.016029014 +0800 Change: 2014-04-18 21:36:31.026030285 +0800 Birth: - File: ‘test’ Size: 5 Blocks: 8 IO Block: 4096 regular file Device: 803h/2051d Inode: 1200137 Links: 1 Access: (4777/-rwsrwxrwx) Uid: ( 0/ root) Gid: ( 0/ root) Context: unconfined_u:object_r:admin_home_t:s0 Access: 2014-04-18 21:36:31.016029014 +0800 Modify: 2014-04-18 21:36:31.040032065 +0800 Change: 2014-04-18 21:36:31.040032065 +0800 Birth: - With no_root_squash, (root, remote ext4), SUID/SGID are cleared, File: ‘test’ Size: 0 Blocks: 0 IO Block: 262144 regular empty file Device: 24h/36d Inode: 786439 Links: 1 Access: (4777/-rwsrwxrwx) Uid: ( 1000/ test) Gid: ( 1000/ test) Context: system_u:object_r:nfs_t:s0 Access: 2014-04-18 21:45:32.155805097 +0800 Modify: 2014-04-18 21:45:32.155805097 +0800 Change: 2014-04-18 21:45:32.168806749 +0800 Birth: - File: ‘test’ Size: 5 Blocks: 8 IO Block: 262144 regular file Device: 24h/36d Inode: 786439 Links: 1 Access: (0777/-rwxrwxrwx) Uid: ( 1000/ test) Gid: ( 1000/ test) Context: system_u:object_r:nfs_t:s0 Access: 2014-04-18 21:45:32.155805097 +0800 Modify: 2014-04-18 21:45:32.184808783 +0800 Change: 2014-04-18 21:45:32.184808783 +0800 Birth: - Signed-off-by: Kinglong Mee Signed-off-by: J. Bruce Fields --- fs/nfsd/vfs.c | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 16f0673a423c..6aaa3057683a 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -857,20 +857,6 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, return err; } -static void kill_suid(struct dentry *dentry) -{ - struct iattr ia; - ia.ia_valid = ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV; - - mutex_lock(&dentry->d_inode->i_mutex); - /* - * Note we call this on write, so notify_change will not - * encounter any conflicting delegations: - */ - notify_change(dentry, &ia, NULL); - mutex_unlock(&dentry->d_inode->i_mutex); -} - /* * Gathered writes: If another process is currently writing to the file, * there's a high chance this is another nfsd (triggered by a bulk write @@ -942,10 +928,6 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, nfsdstats.io_write += host_err; fsnotify_modify(file); - /* clear setuid/setgid flag after write */ - if (inode->i_mode & (S_ISUID | S_ISGID)) - kill_suid(dentry); - if (stable) { if (use_wgather) host_err = wait_for_concurrent_writes(file); From cbf7a75bc58a2458bd6e47476e47819ba3f40b00 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 3 Mar 2014 12:19:18 -0500 Subject: [PATCH 015/100] nfsd4: fix delegation cleanup on error We're not cleaning up everything we need to on error. In particular, we're not removing our lease. Among other problems this can cause the struct nfs4_file used as fl_owner to be referenced after it has been destroyed. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 32 ++++++++++++-------------------- 1 file changed, 12 insertions(+), 20 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index fac26836e4ac..c6ded9feaef9 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -418,6 +418,8 @@ nfs4_put_delegation(struct nfs4_delegation *dp) static void nfs4_put_deleg_lease(struct nfs4_file *fp) { + if (!fp->fi_lease) + return; if (atomic_dec_and_test(&fp->fi_delegees)) { vfs_setlease(fp->fi_deleg_file, F_UNLCK, &fp->fi_lease); fp->fi_lease = NULL; @@ -440,9 +442,11 @@ unhash_delegation(struct nfs4_delegation *dp) list_del_init(&dp->dl_perfile); list_del_init(&dp->dl_recall_lru); spin_unlock(&recall_lock); - nfs4_put_deleg_lease(dp->dl_file); - put_nfs4_file(dp->dl_file); - dp->dl_file = NULL; + if (dp->dl_file) { + nfs4_put_deleg_lease(dp->dl_file); + put_nfs4_file(dp->dl_file); + dp->dl_file = NULL; + } } @@ -3060,33 +3064,22 @@ static int nfs4_setlease(struct nfs4_delegation *dp) static int nfs4_set_delegation(struct nfs4_delegation *dp, struct nfs4_file *fp) { - int status; - if (fp->fi_had_conflict) return -EAGAIN; get_nfs4_file(fp); dp->dl_file = fp; - if (!fp->fi_lease) { - status = nfs4_setlease(dp); - if (status) - goto out_free; - return 0; - } + if (!fp->fi_lease) + return nfs4_setlease(dp); spin_lock(&recall_lock); + atomic_inc(&fp->fi_delegees); if (fp->fi_had_conflict) { spin_unlock(&recall_lock); - status = -EAGAIN; - goto out_free; + return -EAGAIN; } - atomic_inc(&fp->fi_delegees); list_add(&dp->dl_perfile, &fp->fi_delegations); spin_unlock(&recall_lock); list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations); return 0; -out_free: - put_nfs4_file(fp); - dp->dl_file = fp; - return status; } static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status) @@ -3173,8 +3166,7 @@ nfs4_open_delegation(struct net *net, struct svc_fh *fh, open->op_delegate_type = NFS4_OPEN_DELEGATE_READ; return; out_free: - remove_stid(&dp->dl_stid); - nfs4_put_delegation(dp); + destroy_delegation(dp); out_no_deleg: open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE; if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS && From d40aa3372f90d478b6166df0321349b5aeb0aea8 Mon Sep 17 00:00:00 2001 From: Benoit Taine Date: Thu, 22 May 2014 16:32:30 +0200 Subject: [PATCH 016/100] nfsd: Remove assignments inside conditions Assignments should not happen inside an if conditional, but in the line before. This issue was reported by checkpatch. The semantic patch that makes this change is as follows (http://coccinelle.lip6.fr/): // @@ identifier i1; expression e1; statement S; @@ -if(!(i1 = e1)) S +i1 = e1; +if(!i1) +S // It has been tested by compilation. Signed-off-by: Benoit Taine Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs2acl.c | 12 ++++++++---- fs/nfsd/nfs3acl.c | 6 ++++-- fs/nfsd/nfs3xdr.c | 27 ++++++++++++++++++--------- fs/nfsd/nfsxdr.c | 15 ++++++++++----- 4 files changed, 40 insertions(+), 20 deletions(-) diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c index 11c1fba29312..12b023a7ab7d 100644 --- a/fs/nfsd/nfs2acl.c +++ b/fs/nfsd/nfs2acl.c @@ -182,7 +182,8 @@ static __be32 nfsacld_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessarg static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_getaclargs *argp) { - if (!(p = nfs2svc_decode_fh(p, &argp->fh))) + p = nfs2svc_decode_fh(p, &argp->fh); + if (!p) return 0; argp->mask = ntohl(*p); p++; @@ -197,7 +198,8 @@ static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p, unsigned int base; int n; - if (!(p = nfs2svc_decode_fh(p, &argp->fh))) + p = nfs2svc_decode_fh(p, &argp->fh); + if (!p) return 0; argp->mask = ntohl(*p++); if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT) || @@ -218,7 +220,8 @@ static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p, static int nfsaclsvc_decode_fhandleargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_fhandle *argp) { - if (!(p = nfs2svc_decode_fh(p, &argp->fh))) + p = nfs2svc_decode_fh(p, &argp->fh); + if (!p) return 0; return xdr_argsize_check(rqstp, p); } @@ -226,7 +229,8 @@ static int nfsaclsvc_decode_fhandleargs(struct svc_rqst *rqstp, __be32 *p, static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_accessargs *argp) { - if (!(p = nfs2svc_decode_fh(p, &argp->fh))) + p = nfs2svc_decode_fh(p, &argp->fh); + if (!p) return 0; argp->access = ntohl(*p++); diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c index adc5f1b1dc26..2a514e21dc74 100644 --- a/fs/nfsd/nfs3acl.c +++ b/fs/nfsd/nfs3acl.c @@ -128,7 +128,8 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst * rqstp, static int nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_getaclargs *args) { - if (!(p = nfs3svc_decode_fh(p, &args->fh))) + p = nfs3svc_decode_fh(p, &args->fh); + if (!p) return 0; args->mask = ntohl(*p); p++; @@ -143,7 +144,8 @@ static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p, unsigned int base; int n; - if (!(p = nfs3svc_decode_fh(p, &args->fh))) + p = nfs3svc_decode_fh(p, &args->fh); + if (!p) return 0; args->mask = ntohl(*p++); if (args->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT) || diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index de6e39e12cb3..e6c01e80325e 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -278,7 +278,8 @@ void fill_post_wcc(struct svc_fh *fhp) int nfs3svc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p, struct nfsd_fhandle *args) { - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; return xdr_argsize_check(rqstp, p); } @@ -287,7 +288,8 @@ int nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_sattrargs *args) { - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; p = decode_sattr3(p, &args->attrs); @@ -315,7 +317,8 @@ int nfs3svc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_accessargs *args) { - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; args->access = ntohl(*p++); @@ -330,7 +333,8 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p, int v; u32 max_blocksize = svc_max_payload(rqstp); - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; p = xdr_decode_hyper(p, &args->offset); @@ -360,7 +364,8 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, unsigned int len, v, hdr, dlen; u32 max_blocksize = svc_max_payload(rqstp); - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; p = xdr_decode_hyper(p, &args->offset); @@ -535,7 +540,8 @@ int nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_readlinkargs *args) { - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; args->buffer = page_address(*(rqstp->rq_next_page++)); @@ -558,7 +564,8 @@ int nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_readdirargs *args) { - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; p = xdr_decode_hyper(p, &args->cookie); args->verf = p; p += 2; @@ -580,7 +587,8 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p, int len; u32 max_blocksize = svc_max_payload(rqstp); - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; p = xdr_decode_hyper(p, &args->cookie); args->verf = p; p += 2; @@ -605,7 +613,8 @@ int nfs3svc_decode_commitargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_commitargs *args) { - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; p = xdr_decode_hyper(p, &args->offset); args->count = ntohl(*p++); diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 9c769a47ac5a..1ac306b769df 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -214,7 +214,8 @@ nfssvc_decode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy) int nfssvc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p, struct nfsd_fhandle *args) { - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; return xdr_argsize_check(rqstp, p); } @@ -248,7 +249,8 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p, { unsigned int len; int v; - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; args->offset = ntohl(*p++); @@ -281,7 +283,8 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, unsigned int len, hdr, dlen; int v; - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; p++; /* beginoffset */ @@ -355,7 +358,8 @@ nfssvc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p, int nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_readlinkargs *args) { - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; args->buffer = page_address(*(rqstp->rq_next_page++)); @@ -391,7 +395,8 @@ int nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_readdirargs *args) { - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; args->cookie = ntohl(*p++); args->count = ntohl(*p++); From abf1135b6ee31cc17f569f2a59f87c833ba0849c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 21 May 2014 07:43:03 -0700 Subject: [PATCH 017/100] nfsd: remove nfsd4_free_slab No need for a kmem_cache_destroy wrapper in nfsd, just do proper goto based unwinding. Signed-off-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 51 ++++++++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index a037627ce5c7..42f6c25ec8e8 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -83,11 +83,11 @@ static DEFINE_MUTEX(client_mutex); */ static DEFINE_SPINLOCK(recall_lock); -static struct kmem_cache *openowner_slab = NULL; -static struct kmem_cache *lockowner_slab = NULL; -static struct kmem_cache *file_slab = NULL; -static struct kmem_cache *stateid_slab = NULL; -static struct kmem_cache *deleg_slab = NULL; +static struct kmem_cache *openowner_slab; +static struct kmem_cache *lockowner_slab; +static struct kmem_cache *file_slab; +static struct kmem_cache *stateid_slab; +static struct kmem_cache *deleg_slab; void nfs4_lock_state(void) @@ -2520,23 +2520,14 @@ static void nfsd4_init_file(struct nfs4_file *fp, struct inode *ino) spin_unlock(&recall_lock); } -static void -nfsd4_free_slab(struct kmem_cache **slab) -{ - if (*slab == NULL) - return; - kmem_cache_destroy(*slab); - *slab = NULL; -} - void nfsd4_free_slabs(void) { - nfsd4_free_slab(&openowner_slab); - nfsd4_free_slab(&lockowner_slab); - nfsd4_free_slab(&file_slab); - nfsd4_free_slab(&stateid_slab); - nfsd4_free_slab(&deleg_slab); + kmem_cache_destroy(openowner_slab); + kmem_cache_destroy(lockowner_slab); + kmem_cache_destroy(file_slab); + kmem_cache_destroy(stateid_slab); + kmem_cache_destroy(deleg_slab); } int @@ -2545,26 +2536,34 @@ nfsd4_init_slabs(void) openowner_slab = kmem_cache_create("nfsd4_openowners", sizeof(struct nfs4_openowner), 0, 0, NULL); if (openowner_slab == NULL) - goto out_nomem; + goto out; lockowner_slab = kmem_cache_create("nfsd4_lockowners", sizeof(struct nfs4_lockowner), 0, 0, NULL); if (lockowner_slab == NULL) - goto out_nomem; + goto out_free_openowner_slab; file_slab = kmem_cache_create("nfsd4_files", sizeof(struct nfs4_file), 0, 0, NULL); if (file_slab == NULL) - goto out_nomem; + goto out_free_lockowner_slab; stateid_slab = kmem_cache_create("nfsd4_stateids", sizeof(struct nfs4_ol_stateid), 0, 0, NULL); if (stateid_slab == NULL) - goto out_nomem; + goto out_free_file_slab; deleg_slab = kmem_cache_create("nfsd4_delegations", sizeof(struct nfs4_delegation), 0, 0, NULL); if (deleg_slab == NULL) - goto out_nomem; + goto out_free_stateid_slab; return 0; -out_nomem: - nfsd4_free_slabs(); + +out_free_stateid_slab: + kmem_cache_destroy(stateid_slab); +out_free_file_slab: + kmem_cache_destroy(file_slab); +out_free_lockowner_slab: + kmem_cache_destroy(lockowner_slab); +out_free_openowner_slab: + kmem_cache_destroy(openowner_slab); +out: dprintk("nfsd4: out of memory while initializing nfsv4\n"); return -ENOMEM; } From 16e4d93f6de7063800f3f5e68f064b0ff8fae9b7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 19 May 2014 13:40:22 -0400 Subject: [PATCH 018/100] NFSD: Ignore client's source port on RDMA transports An NFS/RDMA client's source port is meaningless for RDMA transports. The transport layer typically sets the source port value on the connection to a random ephemeral port. Currently, NFS server administrators must specify the "insecure" export option to enable clients to access exports via RDMA. But this means NFS clients can access such an export via IP using an ephemeral port, which may not be desirable. This patch eliminates the need to specify the "insecure" export option to allow NFS/RDMA clients access to an export. BugLink: https://bugzilla.linux-nfs.org/show_bug.cgi?id=250 Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_xprt.h | 1 + net/sunrpc/svc_xprt.c | 2 +- net/sunrpc/svcsock.c | 9 +++++++++ net/sunrpc/xprtrdma/svc_rdma_transport.c | 7 +++++++ 4 files changed, 18 insertions(+), 1 deletion(-) diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index b05963f09ebf..0cec1b94c670 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -24,6 +24,7 @@ struct svc_xprt_ops { void (*xpo_release_rqst)(struct svc_rqst *); void (*xpo_detach)(struct svc_xprt *); void (*xpo_free)(struct svc_xprt *); + int (*xpo_secure_port)(struct svc_rqst *); }; struct svc_xprt_class { diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 06c6ff0cb911..614956f1777e 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -793,7 +793,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) clear_bit(XPT_OLD, &xprt->xpt_flags); - rqstp->rq_secure = svc_port_is_privileged(svc_addr(rqstp)); + rqstp->rq_secure = xprt->xpt_ops->xpo_secure_port(rqstp); rqstp->rq_chandle.defer = svc_defer; if (serv->sv_stats) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 43bcb4699d69..0cb34f5d58dc 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -400,6 +400,12 @@ static void svc_sock_setbufsize(struct socket *sock, unsigned int snd, release_sock(sock->sk); #endif } + +static int svc_sock_secure_port(struct svc_rqst *rqstp) +{ + return svc_port_is_privileged(svc_addr(rqstp)); +} + /* * INET callback when data has been received on the socket. */ @@ -678,6 +684,7 @@ static struct svc_xprt_ops svc_udp_ops = { .xpo_prep_reply_hdr = svc_udp_prep_reply_hdr, .xpo_has_wspace = svc_udp_has_wspace, .xpo_accept = svc_udp_accept, + .xpo_secure_port = svc_sock_secure_port, }; static struct svc_xprt_class svc_udp_class = { @@ -1234,6 +1241,7 @@ static struct svc_xprt_ops svc_tcp_bc_ops = { .xpo_detach = svc_bc_tcp_sock_detach, .xpo_free = svc_bc_sock_free, .xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr, + .xpo_secure_port = svc_sock_secure_port, }; static struct svc_xprt_class svc_tcp_bc_class = { @@ -1272,6 +1280,7 @@ static struct svc_xprt_ops svc_tcp_ops = { .xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr, .xpo_has_wspace = svc_tcp_has_wspace, .xpo_accept = svc_tcp_accept, + .xpo_secure_port = svc_sock_secure_port, }; static struct svc_xprt_class svc_tcp_class = { diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 25688fa2207f..02db8d9cc994 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -65,6 +65,7 @@ static void dto_tasklet_func(unsigned long data); static void svc_rdma_detach(struct svc_xprt *xprt); static void svc_rdma_free(struct svc_xprt *xprt); static int svc_rdma_has_wspace(struct svc_xprt *xprt); +static int svc_rdma_secure_port(struct svc_rqst *); static void rq_cq_reap(struct svcxprt_rdma *xprt); static void sq_cq_reap(struct svcxprt_rdma *xprt); @@ -82,6 +83,7 @@ static struct svc_xprt_ops svc_rdma_ops = { .xpo_prep_reply_hdr = svc_rdma_prep_reply_hdr, .xpo_has_wspace = svc_rdma_has_wspace, .xpo_accept = svc_rdma_accept, + .xpo_secure_port = svc_rdma_secure_port, }; struct svc_xprt_class svc_rdma_class = { @@ -1207,6 +1209,11 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt) return 1; } +static int svc_rdma_secure_port(struct svc_rqst *rqstp) +{ + return 1; +} + /* * Attempt to register the kvec representing the RPC memory with the * device. From c789102c20bbbdda6831a273e046715be9d6af79 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 18 May 2014 14:05:22 -0400 Subject: [PATCH 019/100] SUNRPC: Fix a module reference leak in svc_handle_xprt If the accept() call fails, we need to put the module reference. Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields --- net/sunrpc/svc_xprt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 614956f1777e..29772e01b179 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -730,6 +730,8 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) newxpt = xprt->xpt_ops->xpo_accept(xprt); if (newxpt) svc_add_new_temp_xprt(serv, newxpt); + else + module_put(xprt->xpt_class->xcl_owner); } else if (xprt->xpt_ops->xpo_has_wspace(xprt)) { /* XPT_DATA|XPT_DEFERRED case: */ dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n", From ef11ce24875a8a540adc185e7bce3d7d49c8296f Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 12 May 2014 11:22:47 +1000 Subject: [PATCH 020/100] SUNRPC: track whether a request is coming from a loop-back interface. If an incoming NFS request is coming from the local host, then nfsd will need to perform some special handling. So detect that possibility and make the source visible in rq_local. Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc.h | 1 + include/linux/sunrpc/svc_xprt.h | 1 + net/sunrpc/sunrpc.h | 13 +++++++++++++ net/sunrpc/svcsock.c | 5 +++++ 4 files changed, 20 insertions(+) diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 04e763221246..a0dbbd1e00e9 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -254,6 +254,7 @@ struct svc_rqst { u32 rq_prot; /* IP protocol */ unsigned short rq_secure : 1; /* secure port */ + unsigned short rq_local : 1; /* local request */ void * rq_argp; /* decoded arguments */ void * rq_resp; /* xdr'd results */ diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 0cec1b94c670..7235040a19b2 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -64,6 +64,7 @@ struct svc_xprt { #define XPT_DETACHED 10 /* detached from tempsocks list */ #define XPT_LISTENER 11 /* listening endpoint */ #define XPT_CACHE_AUTH 12 /* cache auth info */ +#define XPT_LOCAL 13 /* connection from loopback interface */ struct svc_serv *xpt_server; /* service for transport */ atomic_t xpt_reserved; /* space on outq that is rsvd */ diff --git a/net/sunrpc/sunrpc.h b/net/sunrpc/sunrpc.h index 14c9f6d1c5ff..f2b7cb540e61 100644 --- a/net/sunrpc/sunrpc.h +++ b/net/sunrpc/sunrpc.h @@ -43,6 +43,19 @@ static inline int rpc_reply_expected(struct rpc_task *task) (task->tk_msg.rpc_proc->p_decode != NULL); } +static inline int sock_is_loopback(struct sock *sk) +{ + struct dst_entry *dst; + int loopback = 0; + rcu_read_lock(); + dst = rcu_dereference(sk->sk_dst_cache); + if (dst && dst->dev && + (dst->dev->features & NETIF_F_LOOPBACK)) + loopback = 1; + rcu_read_unlock(); + return loopback; +} + int svc_send_common(struct socket *sock, struct xdr_buf *xdr, struct page *headpage, unsigned long headoffset, struct page *tailpage, unsigned long tailoffset); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 0cb34f5d58dc..f3b8eb309d01 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -874,6 +874,10 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt) } svc_xprt_set_local(&newsvsk->sk_xprt, sin, slen); + if (sock_is_loopback(newsock->sk)) + set_bit(XPT_LOCAL, &newsvsk->sk_xprt.xpt_flags); + else + clear_bit(XPT_LOCAL, &newsvsk->sk_xprt.xpt_flags); if (serv->sv_stats) serv->sv_stats->nettcpconn++; @@ -1119,6 +1123,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) rqstp->rq_xprt_ctxt = NULL; rqstp->rq_prot = IPPROTO_TCP; + rqstp->rq_local = !!test_bit(XPT_LOCAL, &svsk->sk_xprt.xpt_flags); p = (__be32 *)rqstp->rq_arg.head[0].iov_base; calldir = p[1]; From 8658452e4a588da603f6cb5ee2615deafcd82b71 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 12 May 2014 11:22:47 +1000 Subject: [PATCH 021/100] nfsd: Only set PF_LESS_THROTTLE when really needed. PF_LESS_THROTTLE has a very specific use case: to avoid deadlocks and live-locks while writing to the page cache in a loop-back NFS mount situation. It therefore makes sense to *only* set PF_LESS_THROTTLE in this situation. We now know when a request came from the local-host so it could be a loop-back mount. We already know when we are handling write requests, and when we are doing anything else. So combine those two to allow nfsd to still be throttled (like any other process) in every situation except when it is known to be problematic. Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields --- fs/nfsd/nfssvc.c | 6 ------ fs/nfsd/vfs.c | 12 ++++++++++++ 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 9a4a5f9e7468..1879e43f2868 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -591,12 +591,6 @@ nfsd(void *vrqstp) nfsdstats.th_cnt++; mutex_unlock(&nfsd_mutex); - /* - * We want less throttling in balance_dirty_pages() so that nfs to - * localhost doesn't cause nfsd to lock up due to all the client's - * dirty pages. - */ - current->flags |= PF_LESS_THROTTLE; set_freezable(); /* diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 6aaa3057683a..3aa38523b6d7 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -908,6 +908,16 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, int stable = *stablep; int use_wgather; loff_t pos = offset; + unsigned int pflags = current->flags; + + if (rqstp->rq_local) + /* + * We want less throttling in balance_dirty_pages() + * and shrink_inactive_list() so that nfs to + * localhost doesn't cause nfsd to lock up due to all + * the client's dirty pages or its congested queue. + */ + current->flags |= PF_LESS_THROTTLE; dentry = file->f_path.dentry; inode = dentry->d_inode; @@ -941,6 +951,8 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, err = 0; else err = nfserrno(host_err); + if (rqstp->rq_local) + tsk_restore_flags(current, pflags, PF_LESS_THROTTLE); return err; } From 5b648699afa00de0cfe679a4733bef593faaa3a4 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 7 Mar 2014 11:43:58 -0500 Subject: [PATCH 022/100] nfsd4: READ, READDIR, etc., are idempotent OP_MODIFIES_SOMETHING flags operations that we should be careful not to initiate without being sure we have the buffer space to encode a reply. None of these ops fall into that category. We could probably remove a few more, but this isn't a very important problem at least for ops whose reply size is easy to estimate. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index ac837783b37d..2c1ee70b730f 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1665,37 +1665,32 @@ static struct nfsd4_operation nfsd4_ops[] = { [OP_PUTFH] = { .op_func = (nfsd4op_func)nfsd4_putfh, .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS - | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING - | OP_CLEAR_STATEID, + | OP_IS_PUTFH_LIKE | OP_CLEAR_STATEID, .op_name = "OP_PUTFH", .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, }, [OP_PUTPUBFH] = { .op_func = (nfsd4op_func)nfsd4_putrootfh, .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS - | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING - | OP_CLEAR_STATEID, + | OP_IS_PUTFH_LIKE | OP_CLEAR_STATEID, .op_name = "OP_PUTPUBFH", .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, }, [OP_PUTROOTFH] = { .op_func = (nfsd4op_func)nfsd4_putrootfh, .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS - | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING - | OP_CLEAR_STATEID, + | OP_IS_PUTFH_LIKE | OP_CLEAR_STATEID, .op_name = "OP_PUTROOTFH", .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, }, [OP_READ] = { .op_func = (nfsd4op_func)nfsd4_read, - .op_flags = OP_MODIFIES_SOMETHING, .op_name = "OP_READ", .op_rsize_bop = (nfsd4op_rsize)nfsd4_read_rsize, .op_get_currentstateid = (stateid_getter)nfsd4_get_readstateid, }, [OP_READDIR] = { .op_func = (nfsd4op_func)nfsd4_readdir, - .op_flags = OP_MODIFIES_SOMETHING, .op_name = "OP_READDIR", .op_rsize_bop = (nfsd4op_rsize)nfsd4_readdir_rsize, }, From 24906f32378288d74289405231ddbb7120317691 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 12 Mar 2014 15:17:18 -0400 Subject: [PATCH 023/100] nfsd4: allow larger 4.1 session drc slots The client is actually asking for 2532 bytes. I suspect that's a mistake. But maybe we can allow some more. In theory lock needs more if it might return a maximum-length lockowner in the denied case. Signed-off-by: J. Bruce Fields --- fs/nfsd/state.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index fda9ce28b1b2..374c66283ac5 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -122,7 +122,7 @@ static inline struct nfs4_delegation *delegstateid(struct nfs4_stid *s) /* Maximum number of operations per session compound */ #define NFSD_MAX_OPS_PER_COMPOUND 16 /* Maximum session per slot cache size */ -#define NFSD_SLOT_CACHE_SIZE 1024 +#define NFSD_SLOT_CACHE_SIZE 2048 /* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */ #define NFSD_CACHE_SIZE_SLOTS_PER_SESSION 32 #define NFSD_MAX_MEM_PER_SESSION \ From 622f560e6afbd54cdd9b9a416ddab8114baedd76 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 16 May 2014 14:38:20 -0400 Subject: [PATCH 024/100] nfsd4: read size estimate should include padding Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 2c1ee70b730f..77c43697cd4f 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1489,7 +1489,7 @@ static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) if (rlen > maxcount) rlen = maxcount; - return (op_encode_hdr_size + 2) * sizeof(__be32) + rlen; + return (op_encode_hdr_size + 2 + XDR_QUADLEN(rlen)) * sizeof(__be32); } static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) From f34e432b673781175c782fa8c5a619114340ba38 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 16 May 2014 21:24:56 -0400 Subject: [PATCH 025/100] nfsd4: fix write reply size estimate The write reply also includes count and stable_how. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 77c43697cd4f..9d60a1c08105 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1528,7 +1528,7 @@ static inline u32 nfsd4_setclientid_rsize(struct svc_rqst *rqstp, struct nfsd4_o static inline u32 nfsd4_write_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { - return (op_encode_hdr_size + op_encode_verifier_maxsz) * sizeof(__be32); + return (op_encode_hdr_size + 2 + op_encode_verifier_maxsz) * sizeof(__be32); } static inline u32 nfsd4_exchange_id_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) From e372ba60def1af33e1c0b9bbfa5c8f8559c1ad6b Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 19 May 2014 12:27:11 -0400 Subject: [PATCH 026/100] nfsd4: decoding errors can still be cached and require space Currently a non-idempotent op reply may be cached if it fails in the proc code but not if it fails at xdr decoding. I doubt there are any xdr-decoding-time errors that would make this a problem in practice, so this probably isn't a serious bug. The space estimates should also take into account space required for encoding of error returns. Again, not a practical problem, though it would become one after future patches which will tighten the space estimates. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 2 ++ fs/nfsd/nfs4xdr.c | 10 +++++----- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 9d60a1c08105..b0e075cd6c99 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1215,6 +1215,8 @@ static inline struct nfsd4_operation *OPDESC(struct nfsd4_op *op) bool nfsd4_cache_this_op(struct nfsd4_op *op) { + if (op->opnum == OP_ILLEGAL) + return false; return OPDESC(op)->op_flags & OP_CACHEME; } diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 18881f34737a..e866a06b66bd 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1677,11 +1677,6 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) op->opnum = OP_ILLEGAL; op->status = nfserr_op_illegal; } - - if (op->status) { - argp->opcnt = i+1; - break; - } /* * We'll try to cache the result in the DRC if any one * op in the compound wants to be cached: @@ -1689,6 +1684,11 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) cachethis |= nfsd4_cache_this_op(op); max_reply = max(max_reply, nfsd4_max_reply(op->opnum)); + + if (op->status) { + argp->opcnt = i+1; + break; + } } /* Sessions make the DRC unnecessary: */ if (argp->minorversion) From 4aea24b2ff7510932118ec9b06c35a11625194ea Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 15 Jan 2014 15:17:58 -0500 Subject: [PATCH 027/100] nfsd4: embed xdr_stream in nfsd4_compoundres This is a mechanical transformation with no change in behavior. Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 12 ++++---- fs/nfsd/nfs4state.c | 8 +++--- fs/nfsd/nfs4xdr.c | 67 +++++++++++++++++++++++---------------------- fs/nfsd/xdr4.h | 4 +-- 4 files changed, 46 insertions(+), 45 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index b0e075cd6c99..6d94adf6c963 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1270,13 +1270,13 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, u32 plen = 0; __be32 status; - resp->xbuf = &rqstp->rq_res; - resp->p = rqstp->rq_res.head[0].iov_base + + resp->xdr.buf = &rqstp->rq_res; + resp->xdr.p = rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len; - resp->tagp = resp->p; + resp->tagp = resp->xdr.p; /* reserve space for: taglen, tag, and opcnt */ - resp->p += 2 + XDR_QUADLEN(args->taglen); - resp->end = rqstp->rq_res.head[0].iov_base + PAGE_SIZE; + resp->xdr.p += 2 + XDR_QUADLEN(args->taglen); + resp->xdr.end = rqstp->rq_res.head[0].iov_base + PAGE_SIZE; resp->taglen = args->taglen; resp->tag = args->tag; resp->opcnt = 0; @@ -1328,7 +1328,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, * failed response to the next operation. If we don't * have enough room, fail with ERR_RESOURCE. */ - slack_bytes = (char *)resp->end - (char *)resp->p; + slack_bytes = (char *)resp->xdr.end - (char *)resp->xdr.p; if (slack_bytes < COMPOUND_SLACK_SPACE + COMPOUND_ERR_SLACK_SPACE) { BUG_ON(slack_bytes < COMPOUND_ERR_SLACK_SPACE); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 42f6c25ec8e8..14bfb5568715 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1573,10 +1573,10 @@ nfsd4_store_cache_entry(struct nfsd4_compoundres *resp) slot->sl_datalen = 0; return; } - slot->sl_datalen = (char *)resp->p - (char *)resp->cstate.datap; + slot->sl_datalen = (char *)resp->xdr.p - (char *)resp->cstate.datap; base = (char *)resp->cstate.datap - - (char *)resp->xbuf->head[0].iov_base; - if (read_bytes_from_xdr_buf(resp->xbuf, base, slot->sl_data, + (char *)resp->xdr.buf->head[0].iov_base; + if (read_bytes_from_xdr_buf(resp->xdr.buf, base, slot->sl_data, slot->sl_datalen)) WARN("%s: sessions DRC could not cache compound\n", __func__); return; @@ -1630,7 +1630,7 @@ nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp, memcpy(resp->cstate.datap, slot->sl_data, slot->sl_datalen); resp->opcnt = slot->sl_opcnt; - resp->p = resp->cstate.datap + XDR_QUADLEN(slot->sl_datalen); + resp->xdr.p = resp->cstate.datap + XDR_QUADLEN(slot->sl_datalen); status = slot->sl_status; return status; diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index e866a06b66bd..911372590ae1 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1747,10 +1747,10 @@ static void write_cinfo(__be32 **p, struct nfsd4_change_info *c) } #define RESERVE_SPACE(nbytes) do { \ - p = resp->p; \ - BUG_ON(p + XDR_QUADLEN(nbytes) > resp->end); \ + p = resp->xdr.p; \ + BUG_ON(p + XDR_QUADLEN(nbytes) > resp->xdr.end); \ } while (0) -#define ADJUST_ARGS() resp->p = p +#define ADJUST_ARGS() resp->xdr.p = p /* Encode as an array of strings the string given with components * separated @sep, escaped with esc_enter and esc_exit. @@ -2751,9 +2751,9 @@ nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 if (nfserr) return nfserr; - buflen = resp->end - resp->p - (COMPOUND_ERR_SLACK_SPACE >> 2); + buflen = resp->xdr.end - resp->xdr.p - (COMPOUND_ERR_SLACK_SPACE >> 2); nfserr = nfsd4_encode_fattr(fhp, fhp->fh_export, fhp->fh_dentry, - &resp->p, buflen, getattr->ga_bmval, + &resp->xdr.p, buflen, getattr->ga_bmval, resp->rqstp, 0); return nfserr; } @@ -2953,7 +2953,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, if (nfserr) return nfserr; - if (resp->xbuf->page_len) + if (resp->xdr.buf->page_len) return nfserr_resource; RESERVE_SPACE(8); /* eof flag and byte count */ @@ -2991,18 +2991,18 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, WRITE32(eof); WRITE32(maxcount); ADJUST_ARGS(); - resp->xbuf->head[0].iov_len = (char*)p - - (char*)resp->xbuf->head[0].iov_base; - resp->xbuf->page_len = maxcount; + resp->xdr.buf->head[0].iov_len = (char *)p + - (char *)resp->xdr.buf->head[0].iov_base; + resp->xdr.buf->page_len = maxcount; /* Use rest of head for padding and remaining ops: */ - resp->xbuf->tail[0].iov_base = p; - resp->xbuf->tail[0].iov_len = 0; + resp->xdr.buf->tail[0].iov_base = p; + resp->xdr.buf->tail[0].iov_len = 0; if (maxcount&3) { RESERVE_SPACE(4); WRITE32(0); - resp->xbuf->tail[0].iov_base += maxcount&3; - resp->xbuf->tail[0].iov_len = 4 - (maxcount&3); + resp->xdr.buf->tail[0].iov_base += maxcount&3; + resp->xdr.buf->tail[0].iov_len = 4 - (maxcount&3); ADJUST_ARGS(); } return 0; @@ -3017,7 +3017,7 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd if (nfserr) return nfserr; - if (resp->xbuf->page_len) + if (resp->xdr.buf->page_len) return nfserr_resource; if (!*resp->rqstp->rq_next_page) return nfserr_resource; @@ -3041,18 +3041,18 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd WRITE32(maxcount); ADJUST_ARGS(); - resp->xbuf->head[0].iov_len = (char*)p - - (char*)resp->xbuf->head[0].iov_base; - resp->xbuf->page_len = maxcount; + resp->xdr.buf->head[0].iov_len = (char *)p + - (char *)resp->xdr.buf->head[0].iov_base; + resp->xdr.buf->page_len = maxcount; /* Use rest of head for padding and remaining ops: */ - resp->xbuf->tail[0].iov_base = p; - resp->xbuf->tail[0].iov_len = 0; + resp->xdr.buf->tail[0].iov_base = p; + resp->xdr.buf->tail[0].iov_len = 0; if (maxcount&3) { RESERVE_SPACE(4); WRITE32(0); - resp->xbuf->tail[0].iov_base += maxcount&3; - resp->xbuf->tail[0].iov_len = 4 - (maxcount&3); + resp->xdr.buf->tail[0].iov_base += maxcount&3; + resp->xdr.buf->tail[0].iov_len = 4 - (maxcount&3); ADJUST_ARGS(); } return 0; @@ -3068,7 +3068,7 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 if (nfserr) return nfserr; - if (resp->xbuf->page_len) + if (resp->xdr.buf->page_len) return nfserr_resource; if (!*resp->rqstp->rq_next_page) return nfserr_resource; @@ -3080,7 +3080,8 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 WRITE32(0); WRITE32(0); ADJUST_ARGS(); - resp->xbuf->head[0].iov_len = ((char*)resp->p) - (char*)resp->xbuf->head[0].iov_base; + resp->xdr.buf->head[0].iov_len = ((char *)resp->xdr.p) + - (char *)resp->xdr.buf->head[0].iov_base; tailbase = p; maxcount = PAGE_SIZE; @@ -3121,14 +3122,15 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 p = readdir->buffer; *p++ = 0; /* no more entries */ *p++ = htonl(readdir->common.err == nfserr_eof); - resp->xbuf->page_len = ((char*)p) - + resp->xdr.buf->page_len = ((char *)p) - (char*)page_address(*(resp->rqstp->rq_next_page-1)); /* Use rest of head for padding and remaining ops: */ - resp->xbuf->tail[0].iov_base = tailbase; - resp->xbuf->tail[0].iov_len = 0; - resp->p = resp->xbuf->tail[0].iov_base; - resp->end = resp->p + (PAGE_SIZE - resp->xbuf->head[0].iov_len)/4; + resp->xdr.buf->tail[0].iov_base = tailbase; + resp->xdr.buf->tail[0].iov_len = 0; + resp->xdr.p = resp->xdr.buf->tail[0].iov_base; + resp->xdr.end = resp->xdr.p + + (PAGE_SIZE - resp->xdr.buf->head[0].iov_len)/4; return 0; err_no_verf: @@ -3587,10 +3589,10 @@ __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 pad) session = resp->cstate.session; if (xb->page_len == 0) { - length = (char *)resp->p - (char *)xb->head[0].iov_base + pad; + length = (char *)resp->xdr.p - (char *)xb->head[0].iov_base + pad; } else { if (xb->tail[0].iov_base && xb->tail[0].iov_len > 0) - tlen = (char *)resp->p - (char *)xb->tail[0].iov_base; + tlen = (char *)resp->xdr.p - (char *)xb->tail[0].iov_base; length = xb->head[0].iov_len + xb->page_len + tlen + pad; } @@ -3629,7 +3631,8 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) op->status = nfsd4_check_resp_size(resp, 0); if (so) { so->so_replay.rp_status = op->status; - so->so_replay.rp_buflen = (char *)resp->p - (char *)(statp+1); + so->so_replay.rp_buflen = (char *)resp->xdr.p + - (char *)(statp+1); memcpy(so->so_replay.rp_buf, statp+1, so->so_replay.rp_buflen); } status: @@ -3731,7 +3734,7 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo iov = &rqstp->rq_res.tail[0]; else iov = &rqstp->rq_res.head[0]; - iov->iov_len = ((char*)resp->p) - (char*)iov->iov_base; + iov->iov_len = ((char *)resp->xdr.p) - (char *)iov->iov_base; BUG_ON(iov->iov_len > PAGE_SIZE); if (nfsd4_has_session(cs)) { struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 5ea7df305083..6884d70363b5 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -506,9 +506,7 @@ struct nfsd4_compoundargs { struct nfsd4_compoundres { /* scratch variables for XDR encode */ - __be32 * p; - __be32 * end; - struct xdr_buf * xbuf; + struct xdr_stream xdr; struct svc_rqst * rqstp; u32 taglen; From d518465866bfeaa41fb685d7dfc9983e0312232e Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 26 Aug 2013 16:04:46 -0400 Subject: [PATCH 028/100] nfsd4: tweak nfsd4_encode_getattr to take xdr_stream Just change the nfsd4_encode_getattr api. Not changing any code or adding any new functionality yet. Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 6 +++--- fs/nfsd/nfs4xdr.c | 47 ++++++++++++++++++++++++++++++++++------------ fs/nfsd/xdr4.h | 7 ++++--- 3 files changed, 42 insertions(+), 18 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 6d94adf6c963..94ef528bb1dd 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1061,10 +1061,10 @@ _nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, return nfserr_jukebox; p = buf; - status = nfsd4_encode_fattr(&cstate->current_fh, + status = nfsd4_encode_fattr_to_buf(&p, count, &cstate->current_fh, cstate->current_fh.fh_export, - cstate->current_fh.fh_dentry, &p, - count, verify->ve_bmval, + cstate->current_fh.fh_dentry, + verify->ve_bmval, rqstp, 0); /* * If nfsd4_encode_fattr() ran out of space, assume that's because diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 911372590ae1..187925065d16 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2045,12 +2045,11 @@ static int get_parent_attributes(struct svc_export *exp, struct kstat *stat) /* * Note: @fhp can be NULL; in this case, we might have to compose the filehandle * ourselves. - * - * countp is the buffer size in _words_ */ __be32 -nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, - struct dentry *dentry, __be32 **buffer, int count, u32 *bmval, +nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, + struct svc_export *exp, + struct dentry *dentry, u32 *bmval, struct svc_rqst *rqstp, int ignore_crossmnt) { u32 bmval0 = bmval[0]; @@ -2059,12 +2058,12 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, struct kstat stat; struct svc_fh *tempfh = NULL; struct kstatfs statfs; - int buflen = count << 2; + __be32 *p = xdr->p; + int buflen = xdr->buf->buflen; __be32 *attrlenp; u32 dummy; u64 dummy64; u32 rdattr_err = 0; - __be32 *p = *buffer; __be32 status; int err; int aclsupport = 0; @@ -2491,7 +2490,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, } *attrlenp = htonl((char *)p - (char *)attrlenp - 4); - *buffer = p; + xdr->p = p; status = nfs_ok; out: @@ -2513,6 +2512,27 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, goto out; } +__be32 nfsd4_encode_fattr_to_buf(__be32 **p, int words, + struct svc_fh *fhp, struct svc_export *exp, + struct dentry *dentry, u32 *bmval, + struct svc_rqst *rqstp, int ignore_crossmnt) +{ + struct xdr_buf dummy = { + .head[0] = { + .iov_base = *p, + }, + .buflen = words << 2, + }; + struct xdr_stream xdr; + __be32 ret; + + xdr_init_encode(&xdr, &dummy, NULL); + ret = nfsd4_encode_fattr(&xdr, fhp, exp, dentry, bmval, rqstp, + ignore_crossmnt); + *p = xdr.p; + return ret; +} + static inline int attributes_need_mount(u32 *bmval) { if (bmval[0] & ~(FATTR4_WORD0_RDATTR_ERROR | FATTR4_WORD0_LEASE_TIME)) @@ -2576,7 +2596,8 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd, } out_encode: - nfserr = nfsd4_encode_fattr(NULL, exp, dentry, p, buflen, cd->rd_bmval, + nfserr = nfsd4_encode_fattr_to_buf(p, buflen, NULL, exp, dentry, + cd->rd_bmval, cd->rd_rqstp, ignore_crossmnt); out_put: dput(dentry); @@ -2746,14 +2767,16 @@ static __be32 nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_getattr *getattr) { struct svc_fh *fhp = getattr->ga_fhp; - int buflen; + struct xdr_stream *xdr = &resp->xdr; + struct xdr_buf *buf = resp->xdr.buf; if (nfserr) return nfserr; - buflen = resp->xdr.end - resp->xdr.p - (COMPOUND_ERR_SLACK_SPACE >> 2); - nfserr = nfsd4_encode_fattr(fhp, fhp->fh_export, fhp->fh_dentry, - &resp->xdr.p, buflen, getattr->ga_bmval, + buf->buflen = (void *)resp->xdr.end - (void *)resp->xdr.p + - COMPOUND_ERR_SLACK_SPACE; + nfserr = nfsd4_encode_fattr(xdr, fhp, fhp->fh_export, fhp->fh_dentry, + getattr->ga_bmval, resp->rqstp, 0); return nfserr; } diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 6884d70363b5..f62a055bf63c 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -562,9 +562,10 @@ int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *, __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *, u32); void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *); void nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op); -__be32 nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, - struct dentry *dentry, __be32 **buffer, int countp, - u32 *bmval, struct svc_rqst *, int ignore_crossmnt); +__be32 nfsd4_encode_fattr_to_buf(__be32 **p, int words, + struct svc_fh *fhp, struct svc_export *exp, + struct dentry *dentry, + u32 *bmval, struct svc_rqst *, int ignore_crossmnt); extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *, struct nfsd4_setclientid *setclid); From 2d124dfaad08733077d2c46755716b2873af65a3 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 15 Jan 2014 16:26:42 -0500 Subject: [PATCH 029/100] nfsd4: move proc_compound xdr encode init to helper Mechanical transformation with no change of behavior. Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 94ef528bb1dd..f809aa8d9213 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1253,6 +1253,18 @@ static bool need_wrongsec_check(struct svc_rqst *rqstp) return !(nextd->op_flags & OP_HANDLES_WRONGSEC); } +static void svcxdr_init_encode(struct svc_rqst *rqstp, + struct nfsd4_compoundres *resp) +{ + struct xdr_stream *xdr = &resp->xdr; + struct xdr_buf *buf = &rqstp->rq_res; + struct kvec *head = buf->head; + + xdr->buf = buf; + xdr->p = head->iov_base + head->iov_len; + xdr->end = head->iov_base + PAGE_SIZE; +} + /* * COMPOUND call. */ @@ -1270,13 +1282,10 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, u32 plen = 0; __be32 status; - resp->xdr.buf = &rqstp->rq_res; - resp->xdr.p = rqstp->rq_res.head[0].iov_base + - rqstp->rq_res.head[0].iov_len; + svcxdr_init_encode(rqstp, resp); resp->tagp = resp->xdr.p; /* reserve space for: taglen, tag, and opcnt */ resp->xdr.p += 2 + XDR_QUADLEN(args->taglen); - resp->xdr.end = rqstp->rq_res.head[0].iov_base + PAGE_SIZE; resp->taglen = args->taglen; resp->tag = args->tag; resp->opcnt = 0; From 1802a67894fab3ff90a3ef4f484e97a5b4515426 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 21 Jan 2014 11:06:52 -0500 Subject: [PATCH 030/100] nfsd4: reserve head space for krb5 integ/priv info Currently if the nfs-level part of a reply would be too large, we'll return an error to the client. But if the nfs-level part fits and leaves no room for krb5p or krb5i stuff, then we just drop the request entirely. That's no good. Instead, reserve some slack space at the end of the buffer and make sure we fail outright if we'd come close. The slack space here is a massive overstimate of what's required, we should probably try for a tighter limit at some point. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index f809aa8d9213..747d6a87a47d 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1262,7 +1262,7 @@ static void svcxdr_init_encode(struct svc_rqst *rqstp, xdr->buf = buf; xdr->p = head->iov_base + head->iov_len; - xdr->end = head->iov_base + PAGE_SIZE; + xdr->end = head->iov_base + PAGE_SIZE - 2 * RPC_MAX_AUTH_SIZE; } /* From 07d1f8020738ba3180ea9992c4fa7dbc0685396a Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 6 Mar 2014 20:39:29 -0500 Subject: [PATCH 031/100] nfsd4: fix encoding of out-of-space replies If nfsd4_check_resp_size() returns an error then we should really be truncating the reply here, otherwise we may leave extra garbage at the end of the rpc reply. Also add a warning to catch any cases where our reply-size estimates may be wrong in the case of a non-idempotent operation. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 13 +++++++++---- fs/nfsd/nfs4xdr.c | 17 ++++++++++++++++- fs/nfsd/xdr4.h | 2 ++ 3 files changed, 27 insertions(+), 5 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 747d6a87a47d..bf8cddfdd3be 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1171,9 +1171,7 @@ struct nfsd4_operation { static struct nfsd4_operation nfsd4_ops[]; -#ifdef NFSD_DEBUG static const char *nfsd4_op_name(unsigned opnum); -#endif /* * Enforce NFSv4.1 COMPOUND ordering rules: @@ -1859,14 +1857,21 @@ static struct nfsd4_operation nfsd4_ops[] = { }, }; -#ifdef NFSD_DEBUG +void warn_on_nonidempotent_op(struct nfsd4_op *op) +{ + if (OPDESC(op)->op_flags & OP_MODIFIES_SOMETHING) { + pr_err("unable to encode reply to nonidempotent op %d (%s)\n", + op->opnum, nfsd4_op_name(op->opnum)); + WARN_ON_ONCE(1); + } +} + static const char *nfsd4_op_name(unsigned opnum) { if (opnum < ARRAY_SIZE(nfsd4_ops)) return nfsd4_ops[opnum].op_name; return "unknown_operation"; } -#endif #define nfsd4_voidres nfsd4_voidargs struct nfsd4_voidargs { int dummy; }; diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 187925065d16..24ba652008db 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3637,6 +3637,7 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) { struct nfs4_stateowner *so = resp->cstate.replay_owner; __be32 *statp; + nfsd4_enc encoder; __be32 *p; RESERVE_SPACE(8); @@ -3648,10 +3649,24 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) goto status; BUG_ON(op->opnum < 0 || op->opnum >= ARRAY_SIZE(nfsd4_enc_ops) || !nfsd4_enc_ops[op->opnum]); - op->status = nfsd4_enc_ops[op->opnum](resp, op->status, &op->u); + encoder = nfsd4_enc_ops[op->opnum]; + op->status = encoder(resp, op->status, &op->u); /* nfsd4_check_resp_size guarantees enough room for error status */ if (!op->status) op->status = nfsd4_check_resp_size(resp, 0); + if (op->status == nfserr_resource || + op->status == nfserr_rep_too_big || + op->status == nfserr_rep_too_big_to_cache) { + /* + * The operation may have already been encoded or + * partially encoded. No op returns anything additional + * in the case of one of these three errors, so we can + * just truncate back to after the status. But it's a + * bug if we had to do this on a non-idempotent op: + */ + warn_on_nonidempotent_op(op); + resp->xdr.p = statp + 1; + } if (so) { so->so_replay.rp_status = op->status; so->so_replay.rp_buflen = (char *)resp->xdr.p diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index f62a055bf63c..15ca47797a82 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -536,6 +536,8 @@ static inline bool nfsd4_last_compound_op(struct svc_rqst *rqstp) return argp->opcnt == resp->opcnt; } +void warn_on_nonidempotent_op(struct nfsd4_op *op); + #define NFS4_SVC_XDRSIZE sizeof(struct nfsd4_compoundargs) static inline void From 5f4ab9458755eddc66912a15319363bf311f7fc8 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 7 Mar 2014 11:01:37 -0500 Subject: [PATCH 032/100] nfsd4: allow space for final error return This post-encoding check should be taking into account the need to encode at least an out-of-space error to the following op (if any). Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 24ba652008db..2ed803662b09 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3636,6 +3636,7 @@ void nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) { struct nfs4_stateowner *so = resp->cstate.replay_owner; + struct svc_rqst *rqstp = resp->rqstp; __be32 *statp; nfsd4_enc encoder; __be32 *p; @@ -3652,8 +3653,12 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) encoder = nfsd4_enc_ops[op->opnum]; op->status = encoder(resp, op->status, &op->u); /* nfsd4_check_resp_size guarantees enough room for error status */ - if (!op->status) - op->status = nfsd4_check_resp_size(resp, 0); + if (!op->status) { + int space_needed = 0; + if (!nfsd4_last_compound_op(rqstp)) + space_needed = COMPOUND_ERR_SLACK_SPACE; + op->status = nfsd4_check_resp_size(resp, space_needed); + } if (op->status == nfserr_resource || op->status == nfserr_rep_too_big || op->status == nfserr_rep_too_big_to_cache) { From ddd1ea56367202f6c99135cd59de7a97af4c4ffd Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 27 Aug 2013 21:32:25 -0400 Subject: [PATCH 033/100] nfsd4: use xdr_reserve_space in attribute encoding This is a cosmetic change for now; no change in behavior. Note we're just depending on xdr_reserve_space to do the bounds checking for us, we're not really depending on its adjustment of iovec or xdr_buf lengths yet, as those are fixed up by as necessary after the fact by read-link operations and by nfs4svc_encode_compoundres. However we do have to update xdr->iov on read-like operations to prevent xdr_reserve_space from messing with the already-fixed-up length of the the head. When the attribute encoding fails partway through we have to undo the length adjustments made so far. We do it manually for now, but later patches will add an xdr_truncate_encode() helper to handle cases like this. Signed-off-by: J. Bruce Fields --- fs/nfsd/acl.h | 2 +- fs/nfsd/idmap.h | 4 +- fs/nfsd/nfs4acl.c | 11 +- fs/nfsd/nfs4idmap.c | 42 ++++--- fs/nfsd/nfs4proc.c | 1 + fs/nfsd/nfs4xdr.c | 288 ++++++++++++++++++++++++++------------------ 6 files changed, 203 insertions(+), 145 deletions(-) diff --git a/fs/nfsd/acl.h b/fs/nfsd/acl.h index b481e1f5eecc..a986ceb6fd0d 100644 --- a/fs/nfsd/acl.h +++ b/fs/nfsd/acl.h @@ -49,7 +49,7 @@ struct svc_rqst; struct nfs4_acl *nfs4_acl_new(int); int nfs4_acl_get_whotype(char *, u32); -__be32 nfs4_acl_write_who(int who, __be32 **p, int *len); +__be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who); int nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_acl **acl); diff --git a/fs/nfsd/idmap.h b/fs/nfsd/idmap.h index 66e58db01936..a3f34900091f 100644 --- a/fs/nfsd/idmap.h +++ b/fs/nfsd/idmap.h @@ -56,7 +56,7 @@ static inline void nfsd_idmap_shutdown(struct net *net) __be32 nfsd_map_name_to_uid(struct svc_rqst *, const char *, size_t, kuid_t *); __be32 nfsd_map_name_to_gid(struct svc_rqst *, const char *, size_t, kgid_t *); -__be32 nfsd4_encode_user(struct svc_rqst *, kuid_t, __be32 **, int *); -__be32 nfsd4_encode_group(struct svc_rqst *, kgid_t, __be32 **, int *); +__be32 nfsd4_encode_user(struct xdr_stream *, struct svc_rqst *, kuid_t); +__be32 nfsd4_encode_group(struct xdr_stream *, struct svc_rqst *, kgid_t); #endif /* LINUX_NFSD_IDMAP_H */ diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index 7c7c02554a81..d714156a19fd 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c @@ -919,20 +919,19 @@ nfs4_acl_get_whotype(char *p, u32 len) return NFS4_ACL_WHO_NAMED; } -__be32 nfs4_acl_write_who(int who, __be32 **p, int *len) +__be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who) { + __be32 *p; int i; - int bytes; for (i = 0; i < ARRAY_SIZE(s2t_map); i++) { if (s2t_map[i].type != who) continue; - bytes = 4 + (XDR_QUADLEN(s2t_map[i].stringlen) << 2); - if (bytes > *len) + p = xdr_reserve_space(xdr, s2t_map[i].stringlen + 4); + if (!p) return nfserr_resource; - *p = xdr_encode_opaque(*p, s2t_map[i].string, + p = xdr_encode_opaque(p, s2t_map[i].string, s2t_map[i].stringlen); - *len -= bytes; return 0; } WARN_ON_ONCE(1); diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index c0dfde68742e..a0ab0a847d69 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -551,44 +551,43 @@ idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen return 0; } -static __be32 encode_ascii_id(u32 id, __be32 **p, int *buflen) +static __be32 encode_ascii_id(struct xdr_stream *xdr, u32 id) { char buf[11]; int len; - int bytes; + __be32 *p; len = sprintf(buf, "%u", id); - bytes = 4 + (XDR_QUADLEN(len) << 2); - if (bytes > *buflen) + p = xdr_reserve_space(xdr, len + 4); + if (!p) return nfserr_resource; - *p = xdr_encode_opaque(*p, buf, len); - *buflen -= bytes; + p = xdr_encode_opaque(p, buf, len); return 0; } -static __be32 idmap_id_to_name(struct svc_rqst *rqstp, int type, u32 id, __be32 **p, int *buflen) +static __be32 idmap_id_to_name(struct xdr_stream *xdr, + struct svc_rqst *rqstp, int type, u32 id) { struct ent *item, key = { .id = id, .type = type, }; + __be32 *p; int ret; - int bytes; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); strlcpy(key.authname, rqst_authname(rqstp), sizeof(key.authname)); ret = idmap_lookup(rqstp, idtoname_lookup, &key, nn->idtoname_cache, &item); if (ret == -ENOENT) - return encode_ascii_id(id, p, buflen); + return encode_ascii_id(xdr, id); if (ret) return nfserrno(ret); ret = strlen(item->name); WARN_ON_ONCE(ret > IDMAP_NAMESZ); - bytes = 4 + (XDR_QUADLEN(ret) << 2); - if (bytes > *buflen) + p = xdr_reserve_space(xdr, ret + 4); + if (!p) return nfserr_resource; - *p = xdr_encode_opaque(*p, item->name, ret); - *buflen -= bytes; + p = xdr_encode_opaque(p, item->name, ret); cache_put(&item->h, nn->idtoname_cache); return 0; } @@ -622,11 +621,12 @@ do_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, u return idmap_name_to_id(rqstp, type, name, namelen, id); } -static __be32 encode_name_from_id(struct svc_rqst *rqstp, int type, u32 id, __be32 **p, int *buflen) +static __be32 encode_name_from_id(struct xdr_stream *xdr, + struct svc_rqst *rqstp, int type, u32 id) { if (nfs4_disable_idmapping && rqstp->rq_cred.cr_flavor < RPC_AUTH_GSS) - return encode_ascii_id(id, p, buflen); - return idmap_id_to_name(rqstp, type, id, p, buflen); + return encode_ascii_id(xdr, id); + return idmap_id_to_name(xdr, rqstp, type, id); } __be32 @@ -655,14 +655,16 @@ nfsd_map_name_to_gid(struct svc_rqst *rqstp, const char *name, size_t namelen, return status; } -__be32 nfsd4_encode_user(struct svc_rqst *rqstp, kuid_t uid, __be32 **p, int *buflen) +__be32 nfsd4_encode_user(struct xdr_stream *xdr, struct svc_rqst *rqstp, + kuid_t uid) { u32 id = from_kuid(&init_user_ns, uid); - return encode_name_from_id(rqstp, IDMAP_TYPE_USER, id, p, buflen); + return encode_name_from_id(xdr, rqstp, IDMAP_TYPE_USER, id); } -__be32 nfsd4_encode_group(struct svc_rqst *rqstp, kgid_t gid, __be32 **p, int *buflen) +__be32 nfsd4_encode_group(struct xdr_stream *xdr, struct svc_rqst *rqstp, + kgid_t gid) { u32 id = from_kgid(&init_user_ns, gid); - return encode_name_from_id(rqstp, IDMAP_TYPE_GROUP, id, p, buflen); + return encode_name_from_id(xdr, rqstp, IDMAP_TYPE_GROUP, id); } diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index bf8cddfdd3be..41c7c0a3ddd0 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1259,6 +1259,7 @@ static void svcxdr_init_encode(struct svc_rqst *rqstp, struct kvec *head = buf->head; xdr->buf = buf; + xdr->iov = head; xdr->p = head->iov_base + head->iov_len; xdr->end = head->iov_base + PAGE_SIZE - 2 * RPC_MAX_AUTH_SIZE; } diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 2ed803662b09..27d1e94f12fb 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1755,18 +1755,20 @@ static void write_cinfo(__be32 **p, struct nfsd4_change_info *c) /* Encode as an array of strings the string given with components * separated @sep, escaped with esc_enter and esc_exit. */ -static __be32 nfsd4_encode_components_esc(char sep, char *components, - __be32 **pp, int *buflen, - char esc_enter, char esc_exit) +static __be32 nfsd4_encode_components_esc(struct xdr_stream *xdr, char sep, + char *components, char esc_enter, + char esc_exit) { - __be32 *p = *pp; - __be32 *countp = p; + __be32 *p; + __be32 *countp; int strlen, count=0; char *str, *end, *next; dprintk("nfsd4_encode_components(%s)\n", components); - if ((*buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) return nfserr_resource; + countp = p; WRITE32(0); /* We will fill this in with @count later */ end = str = components; while (*end) { @@ -1789,7 +1791,8 @@ static __be32 nfsd4_encode_components_esc(char sep, char *components, strlen = end - str; if (strlen) { - if ((*buflen -= ((XDR_QUADLEN(strlen) << 2) + 4)) < 0) + p = xdr_reserve_space(xdr, strlen + 4); + if (!p) return nfserr_resource; WRITE32(strlen); WRITEMEM(str, strlen); @@ -1799,7 +1802,6 @@ static __be32 nfsd4_encode_components_esc(char sep, char *components, end++; str = end; } - *pp = p; p = countp; WRITE32(count); return 0; @@ -1808,40 +1810,39 @@ static __be32 nfsd4_encode_components_esc(char sep, char *components, /* Encode as an array of strings the string given with components * separated @sep. */ -static __be32 nfsd4_encode_components(char sep, char *components, - __be32 **pp, int *buflen) +static __be32 nfsd4_encode_components(struct xdr_stream *xdr, char sep, + char *components) { - return nfsd4_encode_components_esc(sep, components, pp, buflen, 0, 0); + return nfsd4_encode_components_esc(xdr, sep, components, 0, 0); } /* * encode a location element of a fs_locations structure */ -static __be32 nfsd4_encode_fs_location4(struct nfsd4_fs_location *location, - __be32 **pp, int *buflen) +static __be32 nfsd4_encode_fs_location4(struct xdr_stream *xdr, + struct nfsd4_fs_location *location) { __be32 status; - __be32 *p = *pp; - status = nfsd4_encode_components_esc(':', location->hosts, &p, buflen, + status = nfsd4_encode_components_esc(xdr, ':', location->hosts, '[', ']'); if (status) return status; - status = nfsd4_encode_components('/', location->path, &p, buflen); + status = nfsd4_encode_components(xdr, '/', location->path); if (status) return status; - *pp = p; return 0; } /* * Encode a path in RFC3530 'pathname4' format */ -static __be32 nfsd4_encode_path(const struct path *root, - const struct path *path, __be32 **pp, int *buflen) +static __be32 nfsd4_encode_path(struct xdr_stream *xdr, + const struct path *root, + const struct path *path) { struct path cur = *path; - __be32 *p = *pp; + __be32 *p; struct dentry **components = NULL; unsigned int ncomponents = 0; __be32 err = nfserr_jukebox; @@ -1872,9 +1873,9 @@ static __be32 nfsd4_encode_path(const struct path *root, components[ncomponents++] = cur.dentry; cur.dentry = dget_parent(cur.dentry); } - - *buflen -= 4; - if (*buflen < 0) + err = nfserr_resource; + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_free; WRITE32(ncomponents); @@ -1884,8 +1885,8 @@ static __be32 nfsd4_encode_path(const struct path *root, spin_lock(&dentry->d_lock); len = dentry->d_name.len; - *buflen -= 4 + (XDR_QUADLEN(len) << 2); - if (*buflen < 0) { + p = xdr_reserve_space(xdr, len + 4); + if (!p) { spin_unlock(&dentry->d_lock); goto out_free; } @@ -1897,7 +1898,6 @@ static __be32 nfsd4_encode_path(const struct path *root, ncomponents--; } - *pp = p; err = 0; out_free: dprintk(")\n"); @@ -1908,8 +1908,8 @@ static __be32 nfsd4_encode_path(const struct path *root, return err; } -static __be32 nfsd4_encode_fsloc_fsroot(struct svc_rqst *rqstp, - const struct path *path, __be32 **pp, int *buflen) +static __be32 nfsd4_encode_fsloc_fsroot(struct xdr_stream *xdr, + struct svc_rqst *rqstp, const struct path *path) { struct svc_export *exp_ps; __be32 res; @@ -1917,7 +1917,7 @@ static __be32 nfsd4_encode_fsloc_fsroot(struct svc_rqst *rqstp, exp_ps = rqst_find_fsidzero_export(rqstp); if (IS_ERR(exp_ps)) return nfserrno(PTR_ERR(exp_ps)); - res = nfsd4_encode_path(&exp_ps->ex_path, path, pp, buflen); + res = nfsd4_encode_path(xdr, &exp_ps->ex_path, path); exp_put(exp_ps); return res; } @@ -1925,28 +1925,26 @@ static __be32 nfsd4_encode_fsloc_fsroot(struct svc_rqst *rqstp, /* * encode a fs_locations structure */ -static __be32 nfsd4_encode_fs_locations(struct svc_rqst *rqstp, - struct svc_export *exp, - __be32 **pp, int *buflen) +static __be32 nfsd4_encode_fs_locations(struct xdr_stream *xdr, + struct svc_rqst *rqstp, struct svc_export *exp) { __be32 status; int i; - __be32 *p = *pp; + __be32 *p; struct nfsd4_fs_locations *fslocs = &exp->ex_fslocs; - status = nfsd4_encode_fsloc_fsroot(rqstp, &exp->ex_path, &p, buflen); + status = nfsd4_encode_fsloc_fsroot(xdr, rqstp, &exp->ex_path); if (status) return status; - if ((*buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) return nfserr_resource; WRITE32(fslocs->locations_count); for (i=0; ilocations_count; i++) { - status = nfsd4_encode_fs_location4(&fslocs->locations[i], - &p, buflen); + status = nfsd4_encode_fs_location4(xdr, &fslocs->locations[i]); if (status) return status; } - *pp = p; return 0; } @@ -1965,15 +1963,15 @@ static u32 nfs4_file_type(umode_t mode) } static inline __be32 -nfsd4_encode_aclname(struct svc_rqst *rqstp, struct nfs4_ace *ace, - __be32 **p, int *buflen) +nfsd4_encode_aclname(struct xdr_stream *xdr, struct svc_rqst *rqstp, + struct nfs4_ace *ace) { if (ace->whotype != NFS4_ACL_WHO_NAMED) - return nfs4_acl_write_who(ace->whotype, p, buflen); + return nfs4_acl_write_who(xdr, ace->whotype); else if (ace->flag & NFS4_ACE_IDENTIFIER_GROUP) - return nfsd4_encode_group(rqstp, ace->who_gid, p, buflen); + return nfsd4_encode_group(xdr, rqstp, ace->who_gid); else - return nfsd4_encode_user(rqstp, ace->who_uid, p, buflen); + return nfsd4_encode_user(xdr, rqstp, ace->who_uid); } #define WORD0_ABSENT_FS_ATTRS (FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_FSID | \ @@ -1982,31 +1980,28 @@ nfsd4_encode_aclname(struct svc_rqst *rqstp, struct nfs4_ace *ace, #ifdef CONFIG_NFSD_V4_SECURITY_LABEL static inline __be32 -nfsd4_encode_security_label(struct svc_rqst *rqstp, void *context, int len, __be32 **pp, int *buflen) +nfsd4_encode_security_label(struct xdr_stream *xdr, struct svc_rqst *rqstp, + void *context, int len) { - __be32 *p = *pp; + __be32 *p; - if (*buflen < ((XDR_QUADLEN(len) << 2) + 4 + 4 + 4)) + p = xdr_reserve_space(xdr, len + 4 + 4 + 4); + if (!p) return nfserr_resource; /* * For now we use a 0 here to indicate the null translation; in * the future we may place a call to translation code here. */ - if ((*buflen -= 8) < 0) - return nfserr_resource; - WRITE32(0); /* lfs */ WRITE32(0); /* pi */ p = xdr_encode_opaque(p, context, len); - *buflen -= (XDR_QUADLEN(len) << 2) + 4; - - *pp = p; return 0; } #else static inline __be32 -nfsd4_encode_security_label(struct svc_rqst *rqstp, void *context, int len, __be32 **pp, int *buflen) +nfsd4_encode_security_label(struct xdr_stream *xdr, struct svc_rqst *rqstp, + void *context, int len) { return 0; } #endif @@ -2058,8 +2053,8 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, struct kstat stat; struct svc_fh *tempfh = NULL; struct kstatfs statfs; - __be32 *p = xdr->p; - int buflen = xdr->buf->buflen; + __be32 *p; + __be32 *start = xdr->p; __be32 *attrlenp; u32 dummy; u64 dummy64; @@ -2144,24 +2139,30 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, #endif /* CONFIG_NFSD_V4_SECURITY_LABEL */ if (bmval2) { - if ((buflen -= 16) < 0) + p = xdr_reserve_space(xdr, 16); + if (!p) goto out_resource; WRITE32(3); WRITE32(bmval0); WRITE32(bmval1); WRITE32(bmval2); } else if (bmval1) { - if ((buflen -= 12) < 0) + p = xdr_reserve_space(xdr, 12); + if (!p) goto out_resource; WRITE32(2); WRITE32(bmval0); WRITE32(bmval1); } else { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; WRITE32(1); WRITE32(bmval0); } + p = xdr_reserve_space(xdr, 4); + if (!p) + goto out_resource; attrlenp = p++; /* to be backfilled later */ if (bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) { @@ -2174,13 +2175,15 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, if (!contextsupport) word2 &= ~FATTR4_WORD2_SECURITY_LABEL; if (!word2) { - if ((buflen -= 12) < 0) + p = xdr_reserve_space(xdr, 12); + if (!p) goto out_resource; WRITE32(2); WRITE32(word0); WRITE32(word1); } else { - if ((buflen -= 16) < 0) + p = xdr_reserve_space(xdr, 16); + if (!p) goto out_resource; WRITE32(3); WRITE32(word0); @@ -2189,7 +2192,8 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, } } if (bmval0 & FATTR4_WORD0_TYPE) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; dummy = nfs4_file_type(stat.mode); if (dummy == NF4BAD) { @@ -2199,7 +2203,8 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, WRITE32(dummy); } if (bmval0 & FATTR4_WORD0_FH_EXPIRE_TYPE) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; if (exp->ex_flags & NFSEXP_NOSUBTREECHECK) WRITE32(NFS4_FH_PERSISTENT); @@ -2207,32 +2212,38 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, WRITE32(NFS4_FH_PERSISTENT|NFS4_FH_VOL_RENAME); } if (bmval0 & FATTR4_WORD0_CHANGE) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; write_change(&p, &stat, dentry->d_inode); } if (bmval0 & FATTR4_WORD0_SIZE) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; WRITE64(stat.size); } if (bmval0 & FATTR4_WORD0_LINK_SUPPORT) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; WRITE32(1); } if (bmval0 & FATTR4_WORD0_SYMLINK_SUPPORT) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; WRITE32(1); } if (bmval0 & FATTR4_WORD0_NAMED_ATTR) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; WRITE32(0); } if (bmval0 & FATTR4_WORD0_FSID) { - if ((buflen -= 16) < 0) + p = xdr_reserve_space(xdr, 16); + if (!p) goto out_resource; if (exp->ex_fslocs.migrated) { WRITE64(NFS4_REFERRAL_FSID_MAJOR); @@ -2254,17 +2265,20 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, } } if (bmval0 & FATTR4_WORD0_UNIQUE_HANDLES) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; WRITE32(0); } if (bmval0 & FATTR4_WORD0_LEASE_TIME) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; WRITE32(nn->nfsd4_lease); } if (bmval0 & FATTR4_WORD0_RDATTR_ERROR) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; WRITE32(rdattr_err); } @@ -2272,198 +2286,229 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, struct nfs4_ace *ace; if (acl == NULL) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; WRITE32(0); goto out_acl; } - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; WRITE32(acl->naces); for (ace = acl->aces; ace < acl->aces + acl->naces; ace++) { - if ((buflen -= 4*3) < 0) + p = xdr_reserve_space(xdr, 4*3); + if (!p) goto out_resource; WRITE32(ace->type); WRITE32(ace->flag); WRITE32(ace->access_mask & NFS4_ACE_MASK_ALL); - status = nfsd4_encode_aclname(rqstp, ace, &p, &buflen); + status = nfsd4_encode_aclname(xdr, rqstp, ace); if (status) goto out; } } out_acl: if (bmval0 & FATTR4_WORD0_ACLSUPPORT) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; WRITE32(aclsupport ? ACL4_SUPPORT_ALLOW_ACL|ACL4_SUPPORT_DENY_ACL : 0); } if (bmval0 & FATTR4_WORD0_CANSETTIME) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; WRITE32(1); } if (bmval0 & FATTR4_WORD0_CASE_INSENSITIVE) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; WRITE32(0); } if (bmval0 & FATTR4_WORD0_CASE_PRESERVING) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; WRITE32(1); } if (bmval0 & FATTR4_WORD0_CHOWN_RESTRICTED) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; WRITE32(1); } if (bmval0 & FATTR4_WORD0_FILEHANDLE) { - buflen -= (XDR_QUADLEN(fhp->fh_handle.fh_size) << 2) + 4; - if (buflen < 0) + p = xdr_reserve_space(xdr, fhp->fh_handle.fh_size + 4); + if (!p) goto out_resource; WRITE32(fhp->fh_handle.fh_size); WRITEMEM(&fhp->fh_handle.fh_base, fhp->fh_handle.fh_size); } if (bmval0 & FATTR4_WORD0_FILEID) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; WRITE64(stat.ino); } if (bmval0 & FATTR4_WORD0_FILES_AVAIL) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; WRITE64((u64) statfs.f_ffree); } if (bmval0 & FATTR4_WORD0_FILES_FREE) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; WRITE64((u64) statfs.f_ffree); } if (bmval0 & FATTR4_WORD0_FILES_TOTAL) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; WRITE64((u64) statfs.f_files); } if (bmval0 & FATTR4_WORD0_FS_LOCATIONS) { - status = nfsd4_encode_fs_locations(rqstp, exp, &p, &buflen); + status = nfsd4_encode_fs_locations(xdr, rqstp, exp); if (status) goto out; } if (bmval0 & FATTR4_WORD0_HOMOGENEOUS) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; WRITE32(1); } if (bmval0 & FATTR4_WORD0_MAXFILESIZE) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; WRITE64(exp->ex_path.mnt->mnt_sb->s_maxbytes); } if (bmval0 & FATTR4_WORD0_MAXLINK) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; WRITE32(255); } if (bmval0 & FATTR4_WORD0_MAXNAME) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; WRITE32(statfs.f_namelen); } if (bmval0 & FATTR4_WORD0_MAXREAD) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; WRITE64((u64) svc_max_payload(rqstp)); } if (bmval0 & FATTR4_WORD0_MAXWRITE) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; WRITE64((u64) svc_max_payload(rqstp)); } if (bmval1 & FATTR4_WORD1_MODE) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; WRITE32(stat.mode & S_IALLUGO); } if (bmval1 & FATTR4_WORD1_NO_TRUNC) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; WRITE32(1); } if (bmval1 & FATTR4_WORD1_NUMLINKS) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; WRITE32(stat.nlink); } if (bmval1 & FATTR4_WORD1_OWNER) { - status = nfsd4_encode_user(rqstp, stat.uid, &p, &buflen); + status = nfsd4_encode_user(xdr, rqstp, stat.uid); if (status) goto out; } if (bmval1 & FATTR4_WORD1_OWNER_GROUP) { - status = nfsd4_encode_group(rqstp, stat.gid, &p, &buflen); + status = nfsd4_encode_group(xdr, rqstp, stat.gid); if (status) goto out; } if (bmval1 & FATTR4_WORD1_RAWDEV) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; WRITE32((u32) MAJOR(stat.rdev)); WRITE32((u32) MINOR(stat.rdev)); } if (bmval1 & FATTR4_WORD1_SPACE_AVAIL) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; dummy64 = (u64)statfs.f_bavail * (u64)statfs.f_bsize; WRITE64(dummy64); } if (bmval1 & FATTR4_WORD1_SPACE_FREE) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; dummy64 = (u64)statfs.f_bfree * (u64)statfs.f_bsize; WRITE64(dummy64); } if (bmval1 & FATTR4_WORD1_SPACE_TOTAL) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; dummy64 = (u64)statfs.f_blocks * (u64)statfs.f_bsize; WRITE64(dummy64); } if (bmval1 & FATTR4_WORD1_SPACE_USED) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; dummy64 = (u64)stat.blocks << 9; WRITE64(dummy64); } if (bmval1 & FATTR4_WORD1_TIME_ACCESS) { - if ((buflen -= 12) < 0) + p = xdr_reserve_space(xdr, 12); + if (!p) goto out_resource; WRITE64((s64)stat.atime.tv_sec); WRITE32(stat.atime.tv_nsec); } if (bmval1 & FATTR4_WORD1_TIME_DELTA) { - if ((buflen -= 12) < 0) + p = xdr_reserve_space(xdr, 12); + if (!p) goto out_resource; WRITE32(0); WRITE32(1); WRITE32(0); } if (bmval1 & FATTR4_WORD1_TIME_METADATA) { - if ((buflen -= 12) < 0) + p = xdr_reserve_space(xdr, 12); + if (!p) goto out_resource; WRITE64((s64)stat.ctime.tv_sec); WRITE32(stat.ctime.tv_nsec); } if (bmval1 & FATTR4_WORD1_TIME_MODIFY) { - if ((buflen -= 12) < 0) + p = xdr_reserve_space(xdr, 12); + if (!p) goto out_resource; WRITE64((s64)stat.mtime.tv_sec); WRITE32(stat.mtime.tv_nsec); } if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; /* * Get parent's attributes if not ignoring crossmount @@ -2475,13 +2520,14 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, WRITE64(stat.ino); } if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) { - status = nfsd4_encode_security_label(rqstp, context, - contextlen, &p, &buflen); + status = nfsd4_encode_security_label(xdr, rqstp, context, + contextlen); if (status) goto out; } if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) { - if ((buflen -= 16) < 0) + p = xdr_reserve_space(xdr, 16); + if (!p) goto out_resource; WRITE32(3); WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD0); @@ -2489,8 +2535,7 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD2); } - *attrlenp = htonl((char *)p - (char *)attrlenp - 4); - xdr->p = p; + *attrlenp = htonl((char *)xdr->p - (char *)attrlenp - 4); status = nfs_ok; out: @@ -2503,6 +2548,13 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, fh_put(tempfh); kfree(tempfh); } + if (status) { + int nbytes = (char *)xdr->p - (char *)start; + /* open code what *should* be xdr_truncate(xdr, len); */ + xdr->iov->iov_len -= nbytes; + xdr->buf->len -= nbytes; + xdr->p = start; + } return status; out_nfserr: status = nfserrno(err); @@ -2768,13 +2820,10 @@ nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 { struct svc_fh *fhp = getattr->ga_fhp; struct xdr_stream *xdr = &resp->xdr; - struct xdr_buf *buf = resp->xdr.buf; if (nfserr) return nfserr; - buf->buflen = (void *)resp->xdr.end - (void *)resp->xdr.p - - COMPOUND_ERR_SLACK_SPACE; nfserr = nfsd4_encode_fattr(xdr, fhp, fhp->fh_export, fhp->fh_dentry, getattr->ga_bmval, resp->rqstp, 0); @@ -2971,6 +3020,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, int v; struct page *page; unsigned long maxcount; + struct xdr_stream *xdr = &resp->xdr; long len; __be32 *p; @@ -3017,6 +3067,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, resp->xdr.buf->head[0].iov_len = (char *)p - (char *)resp->xdr.buf->head[0].iov_base; resp->xdr.buf->page_len = maxcount; + xdr->iov = xdr->buf->tail; /* Use rest of head for padding and remaining ops: */ resp->xdr.buf->tail[0].iov_base = p; @@ -3035,6 +3086,7 @@ static __be32 nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readlink *readlink) { int maxcount; + struct xdr_stream *xdr = &resp->xdr; char *page; __be32 *p; @@ -3067,6 +3119,7 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd resp->xdr.buf->head[0].iov_len = (char *)p - (char *)resp->xdr.buf->head[0].iov_base; resp->xdr.buf->page_len = maxcount; + xdr->iov = xdr->buf->tail; /* Use rest of head for padding and remaining ops: */ resp->xdr.buf->tail[0].iov_base = p; @@ -3086,6 +3139,7 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 { int maxcount; loff_t offset; + struct xdr_stream *xdr = &resp->xdr; __be32 *page, *savep, *tailbase; __be32 *p; @@ -3148,6 +3202,8 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 resp->xdr.buf->page_len = ((char *)p) - (char*)page_address(*(resp->rqstp->rq_next_page-1)); + xdr->iov = xdr->buf->tail; + /* Use rest of head for padding and remaining ops: */ resp->xdr.buf->tail[0].iov_base = tailbase; resp->xdr.buf->tail[0].iov_len = 0; From d3f627c815b6eb5f6be388100617c36823d661c5 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 26 Feb 2014 17:00:38 -0500 Subject: [PATCH 034/100] nfsd4: use xdr_stream throughout compound encoding Note this makes ADJUST_ARGS useless; we'll remove it in the following patch. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 2 +- fs/nfsd/nfs4xdr.c | 23 +++++++++++++++-------- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 41c7c0a3ddd0..109b5a84b548 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1284,7 +1284,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, svcxdr_init_encode(rqstp, resp); resp->tagp = resp->xdr.p; /* reserve space for: taglen, tag, and opcnt */ - resp->xdr.p += 2 + XDR_QUADLEN(args->taglen); + xdr_reserve_space(&resp->xdr, 8 + args->taglen); resp->taglen = args->taglen; resp->tag = args->tag; resp->opcnt = 0; diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 27d1e94f12fb..5064cb5a0b8e 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1747,10 +1747,10 @@ static void write_cinfo(__be32 **p, struct nfsd4_change_info *c) } #define RESERVE_SPACE(nbytes) do { \ - p = resp->xdr.p; \ - BUG_ON(p + XDR_QUADLEN(nbytes) > resp->xdr.end); \ + p = xdr_reserve_space(&resp->xdr, nbytes); \ + BUG_ON(!p); \ } while (0) -#define ADJUST_ARGS() resp->xdr.p = p +#define ADJUST_ARGS() WARN_ON_ONCE(p != resp->xdr.p) \ /* Encode as an array of strings the string given with components * separated @sep, escaped with esc_enter and esc_exit. @@ -3056,8 +3056,11 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, read->rd_offset, resp->rqstp->rq_vec, read->rd_vlen, &maxcount); - if (nfserr) + if (nfserr) { + xdr->p -= 2; + xdr->iov->iov_len -= 8; return nfserr; + } eof = (read->rd_offset + maxcount >= read->rd_fhp->fh_dentry->d_inode->i_size); @@ -3110,9 +3113,12 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd */ nfserr = nfsd_readlink(readlink->rl_rqstp, readlink->rl_fhp, page, &maxcount); if (nfserr == nfserr_isdir) - return nfserr_inval; - if (nfserr) + nfserr = nfserr_inval; + if (nfserr) { + xdr->p--; + xdr->iov->iov_len -= 4; return nfserr; + } WRITE32(maxcount); ADJUST_ARGS(); @@ -3213,8 +3219,9 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 return 0; err_no_verf: - p = savep; - ADJUST_ARGS(); + xdr->p = savep; + xdr->iov->iov_len = ((char *)resp->xdr.p) + - (char *)resp->xdr.buf->head[0].iov_base; return nfserr; } From f46d382a749874e1b29cfb34d4ccf283eae4fffa Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 31 Jan 2014 11:19:41 -0500 Subject: [PATCH 035/100] nfsd4: remove ADJUST_ARGS It's just uninteresting debugging code at this point. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 40 ---------------------------------------- 1 file changed, 40 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 5064cb5a0b8e..e9abf5f66e8b 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1750,7 +1750,6 @@ static void write_cinfo(__be32 **p, struct nfsd4_change_info *c) p = xdr_reserve_space(&resp->xdr, nbytes); \ BUG_ON(!p); \ } while (0) -#define ADJUST_ARGS() WARN_ON_ONCE(p != resp->xdr.p) \ /* Encode as an array of strings the string given with components * separated @sep, escaped with esc_enter and esc_exit. @@ -2744,7 +2743,6 @@ nfsd4_encode_stateid(struct nfsd4_compoundres *resp, stateid_t *sid) RESERVE_SPACE(sizeof(stateid_t)); WRITE32(sid->si_generation); WRITEMEM(&sid->si_opaque, sizeof(stateid_opaque_t)); - ADJUST_ARGS(); } static __be32 @@ -2756,7 +2754,6 @@ nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ RESERVE_SPACE(8); WRITE32(access->ac_supported); WRITE32(access->ac_resp_access); - ADJUST_ARGS(); } return nfserr; } @@ -2771,7 +2768,6 @@ static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, WRITE32(bcts->dir); /* Sorry, we do not yet support RDMA over 4.1: */ WRITE32(0); - ADJUST_ARGS(); } return nfserr; } @@ -2794,7 +2790,6 @@ nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ if (!nfserr) { RESERVE_SPACE(NFS4_VERIFIER_SIZE); WRITEMEM(commit->co_verf.data, NFS4_VERIFIER_SIZE); - ADJUST_ARGS(); } return nfserr; } @@ -2810,7 +2805,6 @@ nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ WRITE32(2); WRITE32(create->cr_bmval[0]); WRITE32(create->cr_bmval[1]); - ADJUST_ARGS(); } return nfserr; } @@ -2842,7 +2836,6 @@ nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh RESERVE_SPACE(len + 4); WRITE32(len); WRITEMEM(&fhp->fh_handle.fh_base, len); - ADJUST_ARGS(); } return nfserr; } @@ -2870,7 +2863,6 @@ nfsd4_encode_lock_denied(struct nfsd4_compoundres *resp, struct nfsd4_lock_denie WRITE64((u64)0); /* clientid */ WRITE32(0); /* length of owner name */ } - ADJUST_ARGS(); } static __be32 @@ -2910,7 +2902,6 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_li if (!nfserr) { RESERVE_SPACE(20); write_cinfo(&p, &link->li_cinfo); - ADJUST_ARGS(); } return nfserr; } @@ -2932,7 +2923,6 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op WRITE32(open->op_bmval[0]); WRITE32(open->op_bmval[1]); WRITE32(open->op_delegate_type); - ADJUST_ARGS(); switch (open->op_delegate_type) { case NFS4_OPEN_DELEGATE_NONE: @@ -2949,7 +2939,6 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op WRITE32(0); WRITE32(0); WRITE32(0); /* XXX: is NULL principal ok? */ - ADJUST_ARGS(); break; case NFS4_OPEN_DELEGATE_WRITE: nfsd4_encode_stateid(resp, &open->op_delegate_stateid); @@ -2970,7 +2959,6 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op WRITE32(0); WRITE32(0); WRITE32(0); /* XXX: is NULL principal ok? */ - ADJUST_ARGS(); break; case NFS4_OPEN_DELEGATE_NONE_EXT: /* 4.1 */ switch (open->op_why_no_deleg) { @@ -2984,7 +2972,6 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op RESERVE_SPACE(4); WRITE32(open->op_why_no_deleg); } - ADJUST_ARGS(); break; default: BUG(); @@ -3066,7 +3053,6 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, WRITE32(eof); WRITE32(maxcount); - ADJUST_ARGS(); resp->xdr.buf->head[0].iov_len = (char *)p - (char *)resp->xdr.buf->head[0].iov_base; resp->xdr.buf->page_len = maxcount; @@ -3080,7 +3066,6 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, WRITE32(0); resp->xdr.buf->tail[0].iov_base += maxcount&3; resp->xdr.buf->tail[0].iov_len = 4 - (maxcount&3); - ADJUST_ARGS(); } return 0; } @@ -3121,7 +3106,6 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd } WRITE32(maxcount); - ADJUST_ARGS(); resp->xdr.buf->head[0].iov_len = (char *)p - (char *)resp->xdr.buf->head[0].iov_base; resp->xdr.buf->page_len = maxcount; @@ -3135,7 +3119,6 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd WRITE32(0); resp->xdr.buf->tail[0].iov_base += maxcount&3; resp->xdr.buf->tail[0].iov_len = 4 - (maxcount&3); - ADJUST_ARGS(); } return 0; } @@ -3162,7 +3145,6 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 /* XXX: Following NFSv3, we ignore the READDIR verifier for now. */ WRITE32(0); WRITE32(0); - ADJUST_ARGS(); resp->xdr.buf->head[0].iov_len = ((char *)resp->xdr.p) - (char *)resp->xdr.buf->head[0].iov_base; tailbase = p; @@ -3233,7 +3215,6 @@ nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ if (!nfserr) { RESERVE_SPACE(20); write_cinfo(&p, &remove->rm_cinfo); - ADJUST_ARGS(); } return nfserr; } @@ -3247,7 +3228,6 @@ nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ RESERVE_SPACE(40); write_cinfo(&p, &rename->rn_sinfo); write_cinfo(&p, &rename->rn_tinfo); - ADJUST_ARGS(); } return nfserr; } @@ -3287,7 +3267,6 @@ nfsd4_do_encode_secinfo(struct nfsd4_compoundres *resp, supported = 0; RESERVE_SPACE(4); flavorsp = p++; /* to be backfilled later */ - ADJUST_ARGS(); for (i = 0; i < nflavs; i++) { rpc_authflavor_t pf = flavs[i].pseudoflavor; @@ -3301,12 +3280,10 @@ nfsd4_do_encode_secinfo(struct nfsd4_compoundres *resp, WRITEMEM(info.oid.data, info.oid.len); WRITE32(info.qop); WRITE32(info.service); - ADJUST_ARGS(); } else if (pf < RPC_AUTH_MAXFLAVOR) { supported++; RESERVE_SPACE(4); WRITE32(pf); - ADJUST_ARGS(); } else { if (report) pr_warn("NFS: SECINFO: security flavor %u " @@ -3360,7 +3337,6 @@ nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 WRITE32(setattr->sa_bmval[1]); WRITE32(setattr->sa_bmval[2]); } - ADJUST_ARGS(); return nfserr; } @@ -3373,13 +3349,11 @@ nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct n RESERVE_SPACE(8 + NFS4_VERIFIER_SIZE); WRITEMEM(&scd->se_clientid, 8); WRITEMEM(&scd->se_confirm, NFS4_VERIFIER_SIZE); - ADJUST_ARGS(); } else if (nfserr == nfserr_clid_inuse) { RESERVE_SPACE(8); WRITE32(0); WRITE32(0); - ADJUST_ARGS(); } return nfserr; } @@ -3394,7 +3368,6 @@ nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_w WRITE32(write->wr_bytes_written); WRITE32(write->wr_how_written); WRITEMEM(write->wr_verifier.data, NFS4_VERIFIER_SIZE); - ADJUST_ARGS(); } return nfserr; } @@ -3437,7 +3410,6 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, WRITE32(exid->flags); WRITE32(exid->spa_how); - ADJUST_ARGS(); switch (exid->spa_how) { case SP4_NONE: @@ -3453,7 +3425,6 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, /* empty spo_must_allow bitmap: */ WRITE32(0); - ADJUST_ARGS(); break; default: WARN_ON_ONCE(1); @@ -3479,7 +3450,6 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, /* Implementation id */ WRITE32(0); /* zero length nfs_impl_id4 array */ - ADJUST_ARGS(); return 0; } @@ -3496,7 +3466,6 @@ nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr, WRITEMEM(sess->sessionid.data, NFS4_MAX_SESSIONID_LEN); WRITE32(sess->seqid); WRITE32(sess->flags); - ADJUST_ARGS(); RESERVE_SPACE(28); WRITE32(0); /* headerpadsz */ @@ -3506,12 +3475,10 @@ nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr, WRITE32(sess->fore_channel.maxops); WRITE32(sess->fore_channel.maxreqs); WRITE32(sess->fore_channel.nr_rdma_attrs); - ADJUST_ARGS(); if (sess->fore_channel.nr_rdma_attrs) { RESERVE_SPACE(4); WRITE32(sess->fore_channel.rdma_attrs); - ADJUST_ARGS(); } RESERVE_SPACE(28); @@ -3522,12 +3489,10 @@ nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr, WRITE32(sess->back_channel.maxops); WRITE32(sess->back_channel.maxreqs); WRITE32(sess->back_channel.nr_rdma_attrs); - ADJUST_ARGS(); if (sess->back_channel.nr_rdma_attrs) { RESERVE_SPACE(4); WRITE32(sess->back_channel.rdma_attrs); - ADJUST_ARGS(); } return 0; } @@ -3550,7 +3515,6 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr, WRITE32(seq->maxslots - 1); /* sr_target_highest_slotid */ WRITE32(seq->status_flags); - ADJUST_ARGS(); resp->cstate.datap = p; /* DRC cache data pointer */ return 0; } @@ -3572,7 +3536,6 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr, *p++ = stateid->ts_id_status; } - ADJUST_ARGS(); return nfserr; } @@ -3707,7 +3670,6 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) RESERVE_SPACE(8); WRITE32(op->opnum); statp = p++; /* to be backfilled at the end */ - ADJUST_ARGS(); if (op->opnum == OP_ILLEGAL) goto status; @@ -3768,11 +3730,9 @@ nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op) RESERVE_SPACE(8); WRITE32(op->opnum); *p++ = rp->rp_status; /* already xdr'ed */ - ADJUST_ARGS(); RESERVE_SPACE(rp->rp_buflen); WRITEMEM(rp->rp_buf, rp->rp_buflen); - ADJUST_ARGS(); } int From dd97fddedc251eb423408d89f2947eff9c4ea3c1 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 26 Feb 2014 17:16:27 -0500 Subject: [PATCH 036/100] nfsd4: no need for encode_compoundres to adjust lengths xdr_reserve_space should now be calculating the length correctly as we go, so there's no longer any need to fix it up here. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 3 +++ fs/nfsd/nfs4xdr.c | 8 +------- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 14bfb5568715..b06f9a00ff8d 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1618,6 +1618,7 @@ nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp, struct nfsd4_sequence *seq) { struct nfsd4_slot *slot = resp->cstate.slot; + struct kvec *head = resp->xdr.iov; __be32 status; dprintk("--> %s slot %p\n", __func__, slot); @@ -1631,6 +1632,8 @@ nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp, resp->opcnt = slot->sl_opcnt; resp->xdr.p = resp->cstate.datap + XDR_QUADLEN(slot->sl_datalen); + head->iov_len = (void *)resp->xdr.p - head->iov_base; + resp->xdr.buf->len = head->iov_len; status = slot->sl_status; return status; diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index e9abf5f66e8b..79b8e1ef0f39 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3789,19 +3789,13 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo * All that remains is to write the tag and operation count... */ struct nfsd4_compound_state *cs = &resp->cstate; - struct kvec *iov; + p = resp->tagp; *p++ = htonl(resp->taglen); memcpy(p, resp->tag, resp->taglen); p += XDR_QUADLEN(resp->taglen); *p++ = htonl(resp->opcnt); - if (rqstp->rq_res.page_len) - iov = &rqstp->rq_res.tail[0]; - else - iov = &rqstp->rq_res.head[0]; - iov->iov_len = ((char *)resp->xdr.p) - (char *)iov->iov_base; - BUG_ON(iov->iov_len > PAGE_SIZE); if (nfsd4_has_session(cs)) { struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); struct nfs4_client *clp = cs->session->se_client; From 6ac90391c6e36c536cfcedbe4801a77e304205b1 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 26 Feb 2014 17:39:35 -0500 Subject: [PATCH 037/100] nfsd4: keep xdr buf length updated Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 2 ++ fs/nfsd/nfs4xdr.c | 15 +++++++++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 109b5a84b548..ef3952ac65b9 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1262,6 +1262,8 @@ static void svcxdr_init_encode(struct svc_rqst *rqstp, xdr->iov = head; xdr->p = head->iov_base + head->iov_len; xdr->end = head->iov_base + PAGE_SIZE - 2 * RPC_MAX_AUTH_SIZE; + /* Tail and page_len should be zero at this point: */ + buf->len = buf->head[0].iov_len; } /* diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 79b8e1ef0f39..57f60810b745 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3046,6 +3046,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, if (nfserr) { xdr->p -= 2; xdr->iov->iov_len -= 8; + xdr->buf->len -= 8; return nfserr; } eof = (read->rd_offset + maxcount >= @@ -3053,9 +3054,10 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, WRITE32(eof); WRITE32(maxcount); - resp->xdr.buf->head[0].iov_len = (char *)p - - (char *)resp->xdr.buf->head[0].iov_base; + WARN_ON_ONCE(resp->xdr.buf->head[0].iov_len != (char *)p + - (char *)resp->xdr.buf->head[0].iov_base); resp->xdr.buf->page_len = maxcount; + xdr->buf->len += maxcount; xdr->iov = xdr->buf->tail; /* Use rest of head for padding and remaining ops: */ @@ -3066,6 +3068,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, WRITE32(0); resp->xdr.buf->tail[0].iov_base += maxcount&3; resp->xdr.buf->tail[0].iov_len = 4 - (maxcount&3); + xdr->buf->len -= (maxcount&3); } return 0; } @@ -3102,6 +3105,7 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd if (nfserr) { xdr->p--; xdr->iov->iov_len -= 4; + xdr->buf->len -= 4; return nfserr; } @@ -3109,6 +3113,7 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd resp->xdr.buf->head[0].iov_len = (char *)p - (char *)resp->xdr.buf->head[0].iov_base; resp->xdr.buf->page_len = maxcount; + xdr->buf->len += maxcount; xdr->iov = xdr->buf->tail; /* Use rest of head for padding and remaining ops: */ @@ -3189,6 +3194,7 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 *p++ = htonl(readdir->common.err == nfserr_eof); resp->xdr.buf->page_len = ((char *)p) - (char*)page_address(*(resp->rqstp->rq_next_page-1)); + xdr->buf->len += xdr->buf->page_len; xdr->iov = xdr->buf->tail; @@ -3204,6 +3210,7 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 xdr->p = savep; xdr->iov->iov_len = ((char *)resp->xdr.p) - (char *)resp->xdr.buf->head[0].iov_base; + xdr->buf->len = xdr->iov->iov_len; return nfserr; } @@ -3789,6 +3796,10 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo * All that remains is to write the tag and operation count... */ struct nfsd4_compound_state *cs = &resp->cstate; + struct xdr_buf *buf = resp->xdr.buf; + + WARN_ON_ONCE(buf->len != buf->head[0].iov_len + buf->page_len + + buf->tail[0].iov_len); p = resp->tagp; *p++ = htonl(resp->taglen); From 3e19ce762b537dd9aeefdd0849ba5f2f01ff83cf Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 25 Feb 2014 17:44:21 -0500 Subject: [PATCH 038/100] rpc: xdr_truncate_encode This will be used in the server side in a few cases: - when certain operations (read, readdir, readlink) fail after encoding a partial response. - when we run out of space after encoding a partial response. - in readlink, where we initially reserve PAGE_SIZE bytes for data, then truncate to the actual size. Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/xdr.h | 1 + net/sunrpc/xdr.c | 66 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 15f9204ee70b..e7bb2e3bd0fb 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -215,6 +215,7 @@ typedef int (*kxdrdproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj); extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p); extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes); +extern void xdr_truncate_encode(struct xdr_stream *xdr, size_t len); extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, unsigned int base, unsigned int len); extern unsigned int xdr_stream_pos(const struct xdr_stream *xdr); diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index dd97ba3c4456..352f3b35bbe5 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -508,6 +508,72 @@ __be32 * xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes) } EXPORT_SYMBOL_GPL(xdr_reserve_space); +/** + * xdr_truncate_encode - truncate an encode buffer + * @xdr: pointer to xdr_stream + * @len: new length of buffer + * + * Truncates the xdr stream, so that xdr->buf->len == len, + * and xdr->p points at offset len from the start of the buffer, and + * head, tail, and page lengths are adjusted to correspond. + * + * If this means moving xdr->p to a different buffer, we assume that + * that the end pointer should be set to the end of the current page, + * except in the case of the head buffer when we assume the head + * buffer's current length represents the end of the available buffer. + * + * This is *not* safe to use on a buffer that already has inlined page + * cache pages (as in a zero-copy server read reply), except for the + * simple case of truncating from one position in the tail to another. + * + */ +void xdr_truncate_encode(struct xdr_stream *xdr, size_t len) +{ + struct xdr_buf *buf = xdr->buf; + struct kvec *head = buf->head; + struct kvec *tail = buf->tail; + int fraglen; + int new, old; + + if (len > buf->len) { + WARN_ON_ONCE(1); + return; + } + + fraglen = min_t(int, buf->len - len, tail->iov_len); + tail->iov_len -= fraglen; + buf->len -= fraglen; + if (tail->iov_len && buf->len == len) { + xdr->p = tail->iov_base + tail->iov_len; + /* xdr->end, xdr->iov should be set already */ + return; + } + WARN_ON_ONCE(fraglen); + fraglen = min_t(int, buf->len - len, buf->page_len); + buf->page_len -= fraglen; + buf->len -= fraglen; + + new = buf->page_base + buf->page_len; + old = new + fraglen; + xdr->page_ptr -= (old >> PAGE_SHIFT) - (new >> PAGE_SHIFT); + + if (buf->page_len && buf->len == len) { + xdr->p = page_address(*xdr->page_ptr); + xdr->end = (void *)xdr->p + PAGE_SIZE; + xdr->p = (void *)xdr->p + (new % PAGE_SIZE); + /* xdr->iov should already be NULL */ + return; + } + if (fraglen) + xdr->end = head->iov_base + head->iov_len; + /* (otherwise assume xdr->end is already set) */ + head->iov_len = len; + buf->len = len; + xdr->p = head->iov_base + head->iov_len; + xdr->iov = buf->head; +} +EXPORT_SYMBOL(xdr_truncate_encode); + /** * xdr_write_pages - Insert a list of pages into an XDR buffer for sending * @xdr: pointer to xdr_stream From 1fcea5b20b74cb856f5cd27161fea5329079dbd7 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 26 Feb 2014 20:17:02 -0500 Subject: [PATCH 039/100] nfsd4: use xdr_truncate_encode Now that lengths are reliable, we can use xdr_truncate instead of open-coding it everywhere. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 36 ++++++++++++++++-------------------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 57f60810b745..c2815f4849bf 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2053,7 +2053,7 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, struct svc_fh *tempfh = NULL; struct kstatfs statfs; __be32 *p; - __be32 *start = xdr->p; + int starting_len = xdr->buf->len; __be32 *attrlenp; u32 dummy; u64 dummy64; @@ -2547,13 +2547,8 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, fh_put(tempfh); kfree(tempfh); } - if (status) { - int nbytes = (char *)xdr->p - (char *)start; - /* open code what *should* be xdr_truncate(xdr, len); */ - xdr->iov->iov_len -= nbytes; - xdr->buf->len -= nbytes; - xdr->p = start; - } + if (status) + xdr_truncate_encode(xdr, starting_len); return status; out_nfserr: status = nfserrno(err); @@ -3008,6 +3003,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, struct page *page; unsigned long maxcount; struct xdr_stream *xdr = &resp->xdr; + int starting_len = xdr->buf->len; long len; __be32 *p; @@ -3044,9 +3040,13 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, &maxcount); if (nfserr) { - xdr->p -= 2; - xdr->iov->iov_len -= 8; - xdr->buf->len -= 8; + /* + * nfsd_splice_actor may have already messed with the + * page length; reset it so as not to confuse + * xdr_truncate_encode: + */ + xdr->buf->page_len = 0; + xdr_truncate_encode(xdr, starting_len); return nfserr; } eof = (read->rd_offset + maxcount >= @@ -3079,6 +3079,7 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd int maxcount; struct xdr_stream *xdr = &resp->xdr; char *page; + int length_offset = xdr->buf->len; __be32 *p; if (nfserr) @@ -3103,9 +3104,7 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd if (nfserr == nfserr_isdir) nfserr = nfserr_inval; if (nfserr) { - xdr->p--; - xdr->iov->iov_len -= 4; - xdr->buf->len -= 4; + xdr_truncate_encode(xdr, length_offset); return nfserr; } @@ -3134,7 +3133,8 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 int maxcount; loff_t offset; struct xdr_stream *xdr = &resp->xdr; - __be32 *page, *savep, *tailbase; + int starting_len = xdr->buf->len; + __be32 *page, *tailbase; __be32 *p; if (nfserr) @@ -3145,7 +3145,6 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 return nfserr_resource; RESERVE_SPACE(NFS4_VERIFIER_SIZE); - savep = p; /* XXX: Following NFSv3, we ignore the READDIR verifier for now. */ WRITE32(0); @@ -3207,10 +3206,7 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 return 0; err_no_verf: - xdr->p = savep; - xdr->iov->iov_len = ((char *)resp->xdr.p) - - (char *)resp->xdr.buf->head[0].iov_base; - xdr->buf->len = xdr->iov->iov_len; + xdr_truncate_encode(xdr, starting_len); return nfserr; } From 082d4bd72a4527c6568f53f4a5de74e804666fa7 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 29 Aug 2013 15:42:52 -0400 Subject: [PATCH 040/100] nfsd4: "backfill" using write_bytes_to_xdr_buf Normally xdr encoding proceeds in a single pass from start of a buffer to end, but sometimes we have to write a few bytes to an earlier position. Use write_bytes_to_xdr_buf for these cases rather than saving a pointer to write to. We plan to rewrite xdr_reserve_space to handle encoding across page boundaries using a scratch buffer, and don't want to risk writing to a pointer that was contained in a scratch buffer. Also it will no longer be safe to calculate lengths by subtracting two pointers, so use xdr_buf offsets instead. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 45 ++++++++++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 19 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index c2815f4849bf..37a73c8cda61 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1759,16 +1759,19 @@ static __be32 nfsd4_encode_components_esc(struct xdr_stream *xdr, char sep, char esc_exit) { __be32 *p; - __be32 *countp; + __be32 pathlen; + int pathlen_offset; int strlen, count=0; char *str, *end, *next; dprintk("nfsd4_encode_components(%s)\n", components); + + pathlen_offset = xdr->buf->len; p = xdr_reserve_space(xdr, 4); if (!p) return nfserr_resource; - countp = p; - WRITE32(0); /* We will fill this in with @count later */ + p++; /* We will fill this in with @count later */ + end = str = components; while (*end) { bool found_esc = false; @@ -1801,8 +1804,8 @@ static __be32 nfsd4_encode_components_esc(struct xdr_stream *xdr, char sep, end++; str = end; } - p = countp; - WRITE32(count); + pathlen = htonl(xdr->buf->len - pathlen_offset); + write_bytes_to_xdr_buf(xdr->buf, pathlen_offset, &pathlen, 4); return 0; } @@ -2054,7 +2057,8 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, struct kstatfs statfs; __be32 *p; int starting_len = xdr->buf->len; - __be32 *attrlenp; + int attrlen_offset; + __be32 attrlen; u32 dummy; u64 dummy64; u32 rdattr_err = 0; @@ -2159,10 +2163,12 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, WRITE32(1); WRITE32(bmval0); } + + attrlen_offset = xdr->buf->len; p = xdr_reserve_space(xdr, 4); if (!p) goto out_resource; - attrlenp = p++; /* to be backfilled later */ + p++; /* to be backfilled later */ if (bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) { u32 word0 = nfsd_suppattrs0(minorversion); @@ -2534,7 +2540,8 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD2); } - *attrlenp = htonl((char *)xdr->p - (char *)attrlenp - 4); + attrlen = htonl(xdr->buf->len - attrlen_offset - 4); + write_bytes_to_xdr_buf(xdr->buf, attrlen_offset, &attrlen, 4); status = nfs_ok; out: @@ -3664,15 +3671,16 @@ __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 pad) void nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) { + struct xdr_stream *xdr = &resp->xdr; struct nfs4_stateowner *so = resp->cstate.replay_owner; struct svc_rqst *rqstp = resp->rqstp; - __be32 *statp; + int post_err_offset; nfsd4_enc encoder; __be32 *p; RESERVE_SPACE(8); WRITE32(op->opnum); - statp = p++; /* to be backfilled at the end */ + post_err_offset = xdr->buf->len; if (op->opnum == OP_ILLEGAL) goto status; @@ -3698,20 +3706,19 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) * bug if we had to do this on a non-idempotent op: */ warn_on_nonidempotent_op(op); - resp->xdr.p = statp + 1; + xdr_truncate_encode(xdr, post_err_offset); } if (so) { + int len = xdr->buf->len - post_err_offset; + so->so_replay.rp_status = op->status; - so->so_replay.rp_buflen = (char *)resp->xdr.p - - (char *)(statp+1); - memcpy(so->so_replay.rp_buf, statp+1, so->so_replay.rp_buflen); + so->so_replay.rp_buflen = len; + read_bytes_from_xdr_buf(xdr->buf, post_err_offset, + so->so_replay.rp_buf, len); } status: - /* - * Note: We write the status directly, instead of using WRITE32(), - * since it is already in network byte order. - */ - *statp = op->status; + /* Note that op->status is already in network byte order: */ + write_bytes_to_xdr_buf(xdr->buf, post_err_offset - 4, &op->status, 4); } /* From d0a381dd0eda1cc769a5762d0eed4d0d662219f2 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 30 Jan 2014 17:18:38 -0500 Subject: [PATCH 041/100] nfsd4: teach encoders to handle reserve_space failures We've tried to prevent running out of space with COMPOUND_SLACK_SPACE and special checking in those operations (getattr) whose result can vary enormously. However: - COMPOUND_SLACK_SPACE may be difficult to maintain as we add more protocol. - BUG_ON or page faulting on failure seems overly fragile. - Especially in the 4.1 case, we prefer not to fail compounds just because the returned result came *close* to session limits. (Though perfect enforcement here may be difficult.) - I'd prefer encoding to be uniform for all encoders instead of having special exceptions for encoders containing, for example, attributes. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 2 +- fs/nfsd/nfs4xdr.c | 247 +++++++++++++++++++++++++++++++++------------ fs/nfsd/xdr4.h | 2 +- 3 files changed, 185 insertions(+), 66 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index ef3952ac65b9..6a5b701961a4 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1400,7 +1400,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, } if (op->status == nfserr_replay_me) { op->replay = &cstate->replay_owner->so_replay; - nfsd4_encode_replay(resp, op); + nfsd4_encode_replay(&resp->xdr, op); status = op->status = op->replay->rp_status; } else { nfsd4_encode_operation(resp, op); diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 37a73c8cda61..799a90479487 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1746,11 +1746,6 @@ static void write_cinfo(__be32 **p, struct nfsd4_change_info *c) } } -#define RESERVE_SPACE(nbytes) do { \ - p = xdr_reserve_space(&resp->xdr, nbytes); \ - BUG_ON(!p); \ -} while (0) - /* Encode as an array of strings the string given with components * separated @sep, escaped with esc_enter and esc_exit. */ @@ -2737,23 +2732,29 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, return -EINVAL; } -static void -nfsd4_encode_stateid(struct nfsd4_compoundres *resp, stateid_t *sid) +static __be32 +nfsd4_encode_stateid(struct xdr_stream *xdr, stateid_t *sid) { __be32 *p; - RESERVE_SPACE(sizeof(stateid_t)); + p = xdr_reserve_space(xdr, sizeof(stateid_t)); + if (!p) + return nfserr_resource; WRITE32(sid->si_generation); WRITEMEM(&sid->si_opaque, sizeof(stateid_opaque_t)); + return 0; } static __be32 nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_access *access) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; if (!nfserr) { - RESERVE_SPACE(8); + p = xdr_reserve_space(xdr, 8); + if (!p) + return nfserr_resource; WRITE32(access->ac_supported); WRITE32(access->ac_resp_access); } @@ -2762,10 +2763,13 @@ nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_bind_conn_to_session *bcts) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; if (!nfserr) { - RESERVE_SPACE(NFS4_MAX_SESSIONID_LEN + 8); + p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 8); + if (!p) + return nfserr_resource; WRITEMEM(bcts->sessionid.data, NFS4_MAX_SESSIONID_LEN); WRITE32(bcts->dir); /* Sorry, we do not yet support RDMA over 4.1: */ @@ -2777,8 +2781,10 @@ static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, static __be32 nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_close *close) { + struct xdr_stream *xdr = &resp->xdr; + if (!nfserr) - nfsd4_encode_stateid(resp, &close->cl_stateid); + nfserr = nfsd4_encode_stateid(xdr, &close->cl_stateid); return nfserr; } @@ -2787,10 +2793,13 @@ nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_c static __be32 nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_commit *commit) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; if (!nfserr) { - RESERVE_SPACE(NFS4_VERIFIER_SIZE); + p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE); + if (!p) + return nfserr_resource; WRITEMEM(commit->co_verf.data, NFS4_VERIFIER_SIZE); } return nfserr; @@ -2799,10 +2808,13 @@ nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ static __be32 nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_create *create) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; if (!nfserr) { - RESERVE_SPACE(32); + p = xdr_reserve_space(xdr, 32); + if (!p) + return nfserr_resource; write_cinfo(&p, &create->cr_cinfo); WRITE32(2); WRITE32(create->cr_bmval[0]); @@ -2829,13 +2841,16 @@ nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 static __be32 nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh **fhpp) { + struct xdr_stream *xdr = &resp->xdr; struct svc_fh *fhp = *fhpp; unsigned int len; __be32 *p; if (!nfserr) { len = fhp->fh_handle.fh_size; - RESERVE_SPACE(len + 4); + p = xdr_reserve_space(xdr, len + 4); + if (!p) + return nfserr_resource; WRITE32(len); WRITEMEM(&fhp->fh_handle.fh_base, len); } @@ -2846,13 +2861,15 @@ nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh * Including all fields other than the name, a LOCK4denied structure requires * 8(clientid) + 4(namelen) + 8(offset) + 8(length) + 4(type) = 32 bytes. */ -static void -nfsd4_encode_lock_denied(struct nfsd4_compoundres *resp, struct nfsd4_lock_denied *ld) +static __be32 +nfsd4_encode_lock_denied(struct xdr_stream *xdr, struct nfsd4_lock_denied *ld) { struct xdr_netobj *conf = &ld->ld_owner; __be32 *p; - RESERVE_SPACE(32 + XDR_LEN(conf->len)); + p = xdr_reserve_space(xdr, 32 + XDR_LEN(conf->len)); + if (!p) + return nfserr_resource; WRITE64(ld->ld_start); WRITE64(ld->ld_length); WRITE32(ld->ld_type); @@ -2865,15 +2882,18 @@ nfsd4_encode_lock_denied(struct nfsd4_compoundres *resp, struct nfsd4_lock_denie WRITE64((u64)0); /* clientid */ WRITE32(0); /* length of owner name */ } + return nfserr_denied; } static __be32 nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lock *lock) { + struct xdr_stream *xdr = &resp->xdr; + if (!nfserr) - nfsd4_encode_stateid(resp, &lock->lk_resp_stateid); + nfserr = nfsd4_encode_stateid(xdr, &lock->lk_resp_stateid); else if (nfserr == nfserr_denied) - nfsd4_encode_lock_denied(resp, &lock->lk_denied); + nfserr = nfsd4_encode_lock_denied(xdr, &lock->lk_denied); return nfserr; } @@ -2881,16 +2901,20 @@ nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lo static __be32 nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lockt *lockt) { + struct xdr_stream *xdr = &resp->xdr; + if (nfserr == nfserr_denied) - nfsd4_encode_lock_denied(resp, &lockt->lt_denied); + nfsd4_encode_lock_denied(xdr, &lockt->lt_denied); return nfserr; } static __be32 nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_locku *locku) { + struct xdr_stream *xdr = &resp->xdr; + if (!nfserr) - nfsd4_encode_stateid(resp, &locku->lu_stateid); + nfserr = nfsd4_encode_stateid(xdr, &locku->lu_stateid); return nfserr; } @@ -2899,10 +2923,13 @@ nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l static __be32 nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_link *link) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; if (!nfserr) { - RESERVE_SPACE(20); + p = xdr_reserve_space(xdr, 20); + if (!p) + return nfserr_resource; write_cinfo(&p, &link->li_cinfo); } return nfserr; @@ -2912,13 +2939,18 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_li static __be32 nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open *open) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; if (nfserr) goto out; - nfsd4_encode_stateid(resp, &open->op_stateid); - RESERVE_SPACE(40); + nfserr = nfsd4_encode_stateid(xdr, &open->op_stateid); + if (nfserr) + goto out; + p = xdr_reserve_space(xdr, 40); + if (!p) + return nfserr_resource; write_cinfo(&p, &open->op_cinfo); WRITE32(open->op_rflags); WRITE32(2); @@ -2930,8 +2962,12 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op case NFS4_OPEN_DELEGATE_NONE: break; case NFS4_OPEN_DELEGATE_READ: - nfsd4_encode_stateid(resp, &open->op_delegate_stateid); - RESERVE_SPACE(20); + nfserr = nfsd4_encode_stateid(xdr, &open->op_delegate_stateid); + if (nfserr) + return nfserr; + p = xdr_reserve_space(xdr, 20); + if (!p) + return nfserr_resource; WRITE32(open->op_recall); /* @@ -2943,8 +2979,12 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op WRITE32(0); /* XXX: is NULL principal ok? */ break; case NFS4_OPEN_DELEGATE_WRITE: - nfsd4_encode_stateid(resp, &open->op_delegate_stateid); - RESERVE_SPACE(32); + nfserr = nfsd4_encode_stateid(xdr, &open->op_delegate_stateid); + if (nfserr) + return nfserr; + p = xdr_reserve_space(xdr, 32); + if (!p) + return nfserr_resource; WRITE32(0); /* @@ -2966,12 +3006,16 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op switch (open->op_why_no_deleg) { case WND4_CONTENTION: case WND4_RESOURCE: - RESERVE_SPACE(8); + p = xdr_reserve_space(xdr, 8); + if (!p) + return nfserr_resource; WRITE32(open->op_why_no_deleg); WRITE32(0); /* deleg signaling not supported yet */ break; default: - RESERVE_SPACE(4); + p = xdr_reserve_space(xdr, 4); + if (!p) + return nfserr_resource; WRITE32(open->op_why_no_deleg); } break; @@ -2986,8 +3030,10 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op static __be32 nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_confirm *oc) { + struct xdr_stream *xdr = &resp->xdr; + if (!nfserr) - nfsd4_encode_stateid(resp, &oc->oc_resp_stateid); + nfserr = nfsd4_encode_stateid(xdr, &oc->oc_resp_stateid); return nfserr; } @@ -2995,8 +3041,10 @@ nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct static __be32 nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_downgrade *od) { + struct xdr_stream *xdr = &resp->xdr; + if (!nfserr) - nfsd4_encode_stateid(resp, &od->od_stateid); + nfserr = nfsd4_encode_stateid(xdr, &od->od_stateid); return nfserr; } @@ -3019,7 +3067,9 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, if (resp->xdr.buf->page_len) return nfserr_resource; - RESERVE_SPACE(8); /* eof flag and byte count */ + p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */ + if (!p) + return nfserr_resource; maxcount = svc_max_payload(resp->rqstp); if (maxcount > read->rd_length) @@ -3071,7 +3121,9 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, resp->xdr.buf->tail[0].iov_base = p; resp->xdr.buf->tail[0].iov_len = 0; if (maxcount&3) { - RESERVE_SPACE(4); + p = xdr_reserve_space(xdr, 4); + if (!p) + return nfserr_resource; WRITE32(0); resp->xdr.buf->tail[0].iov_base += maxcount&3; resp->xdr.buf->tail[0].iov_len = 4 - (maxcount&3); @@ -3099,7 +3151,10 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd page = page_address(*(resp->rqstp->rq_next_page++)); maxcount = PAGE_SIZE; - RESERVE_SPACE(4); + + p = xdr_reserve_space(xdr, 4); + if (!p) + return nfserr_resource; /* * XXX: By default, the ->readlink() VFS op will truncate symlinks @@ -3126,7 +3181,9 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd resp->xdr.buf->tail[0].iov_base = p; resp->xdr.buf->tail[0].iov_len = 0; if (maxcount&3) { - RESERVE_SPACE(4); + p = xdr_reserve_space(xdr, 4); + if (!p) + return nfserr_resource; WRITE32(0); resp->xdr.buf->tail[0].iov_base += maxcount&3; resp->xdr.buf->tail[0].iov_len = 4 - (maxcount&3); @@ -3151,7 +3208,9 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 if (!*resp->rqstp->rq_next_page) return nfserr_resource; - RESERVE_SPACE(NFS4_VERIFIER_SIZE); + p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE); + if (!p) + return nfserr_resource; /* XXX: Following NFSv3, we ignore the READDIR verifier for now. */ WRITE32(0); @@ -3220,10 +3279,13 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 static __be32 nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_remove *remove) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; if (!nfserr) { - RESERVE_SPACE(20); + p = xdr_reserve_space(xdr, 20); + if (!p) + return nfserr_resource; write_cinfo(&p, &remove->rm_cinfo); } return nfserr; @@ -3232,10 +3294,13 @@ nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ static __be32 nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_rename *rename) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; if (!nfserr) { - RESERVE_SPACE(40); + p = xdr_reserve_space(xdr, 40); + if (!p) + return nfserr_resource; write_cinfo(&p, &rename->rn_sinfo); write_cinfo(&p, &rename->rn_tinfo); } @@ -3243,7 +3308,7 @@ nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ } static __be32 -nfsd4_do_encode_secinfo(struct nfsd4_compoundres *resp, +nfsd4_do_encode_secinfo(struct xdr_stream *xdr, __be32 nfserr, struct svc_export *exp) { u32 i, nflavs, supported; @@ -3254,6 +3319,7 @@ nfsd4_do_encode_secinfo(struct nfsd4_compoundres *resp, if (nfserr) goto out; + nfserr = nfserr_resource; if (exp->ex_nflavors) { flavs = exp->ex_flavors; nflavs = exp->ex_nflavors; @@ -3275,7 +3341,9 @@ nfsd4_do_encode_secinfo(struct nfsd4_compoundres *resp, } supported = 0; - RESERVE_SPACE(4); + p = xdr_reserve_space(xdr, 4); + if (!p) + goto out; flavorsp = p++; /* to be backfilled later */ for (i = 0; i < nflavs; i++) { @@ -3284,7 +3352,10 @@ nfsd4_do_encode_secinfo(struct nfsd4_compoundres *resp, if (rpcauth_get_gssinfo(pf, &info) == 0) { supported++; - RESERVE_SPACE(4 + 4 + XDR_LEN(info.oid.len) + 4 + 4); + p = xdr_reserve_space(xdr, 4 + 4 + + XDR_LEN(info.oid.len) + 4 + 4); + if (!p) + goto out; WRITE32(RPC_AUTH_GSS); WRITE32(info.oid.len); WRITEMEM(info.oid.data, info.oid.len); @@ -3292,7 +3363,9 @@ nfsd4_do_encode_secinfo(struct nfsd4_compoundres *resp, WRITE32(info.service); } else if (pf < RPC_AUTH_MAXFLAVOR) { supported++; - RESERVE_SPACE(4); + p = xdr_reserve_space(xdr, 4); + if (!p) + goto out; WRITE32(pf); } else { if (report) @@ -3304,7 +3377,7 @@ nfsd4_do_encode_secinfo(struct nfsd4_compoundres *resp, if (nflavs != supported) report = false; *flavorsp = htonl(supported); - + nfserr = 0; out: if (exp) exp_put(exp); @@ -3315,14 +3388,18 @@ static __be32 nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_secinfo *secinfo) { - return nfsd4_do_encode_secinfo(resp, nfserr, secinfo->si_exp); + struct xdr_stream *xdr = &resp->xdr; + + return nfsd4_do_encode_secinfo(xdr, nfserr, secinfo->si_exp); } static __be32 nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_secinfo_no_name *secinfo) { - return nfsd4_do_encode_secinfo(resp, nfserr, secinfo->sin_exp); + struct xdr_stream *xdr = &resp->xdr; + + return nfsd4_do_encode_secinfo(xdr, nfserr, secinfo->sin_exp); } /* @@ -3332,9 +3409,12 @@ nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setattr *setattr) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; - RESERVE_SPACE(16); + p = xdr_reserve_space(xdr, 16); + if (!p) + return nfserr_resource; if (nfserr) { WRITE32(3); WRITE32(0); @@ -3353,15 +3433,20 @@ nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 static __be32 nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setclientid *scd) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; if (!nfserr) { - RESERVE_SPACE(8 + NFS4_VERIFIER_SIZE); + p = xdr_reserve_space(xdr, 8 + NFS4_VERIFIER_SIZE); + if (!p) + return nfserr_resource; WRITEMEM(&scd->se_clientid, 8); WRITEMEM(&scd->se_confirm, NFS4_VERIFIER_SIZE); } else if (nfserr == nfserr_clid_inuse) { - RESERVE_SPACE(8); + p = xdr_reserve_space(xdr, 8); + if (!p) + return nfserr_resource; WRITE32(0); WRITE32(0); } @@ -3371,10 +3456,13 @@ nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct n static __be32 nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_write *write) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; if (!nfserr) { - RESERVE_SPACE(16); + p = xdr_reserve_space(xdr, 16); + if (!p) + return nfserr_resource; WRITE32(write->wr_bytes_written); WRITE32(write->wr_how_written); WRITEMEM(write->wr_verifier.data, NFS4_VERIFIER_SIZE); @@ -3394,6 +3482,7 @@ static __be32 nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_exchange_id *exid) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; char *major_id; char *server_scope; @@ -3409,11 +3498,13 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, server_scope = utsname()->nodename; server_scope_sz = strlen(server_scope); - RESERVE_SPACE( + p = xdr_reserve_space(xdr, 8 /* eir_clientid */ + 4 /* eir_sequenceid */ + 4 /* eir_flags */ + 4 /* spr_how */); + if (!p) + return nfserr_resource; WRITEMEM(&exid->clientid, 8); WRITE32(exid->seqid); @@ -3426,7 +3517,9 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, break; case SP4_MACH_CRED: /* spo_must_enforce, spo_must_allow */ - RESERVE_SPACE(16); + p = xdr_reserve_space(xdr, 16); + if (!p) + return nfserr_resource; /* spo_must_enforce bitmap: */ WRITE32(2); @@ -3440,13 +3533,15 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, WARN_ON_ONCE(1); } - RESERVE_SPACE( + p = xdr_reserve_space(xdr, 8 /* so_minor_id */ + 4 /* so_major_id.len */ + (XDR_QUADLEN(major_id_sz) * 4) + 4 /* eir_server_scope.len */ + (XDR_QUADLEN(server_scope_sz) * 4) + 4 /* eir_server_impl_id.count (0) */); + if (!p) + return nfserr_resource; /* The server_owner struct */ WRITE64(minor_id); /* Minor id */ @@ -3467,17 +3562,22 @@ static __be32 nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_create_session *sess) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; if (nfserr) return nfserr; - RESERVE_SPACE(24); + p = xdr_reserve_space(xdr, 24); + if (!p) + return nfserr_resource; WRITEMEM(sess->sessionid.data, NFS4_MAX_SESSIONID_LEN); WRITE32(sess->seqid); WRITE32(sess->flags); - RESERVE_SPACE(28); + p = xdr_reserve_space(xdr, 28); + if (!p) + return nfserr_resource; WRITE32(0); /* headerpadsz */ WRITE32(sess->fore_channel.maxreq_sz); WRITE32(sess->fore_channel.maxresp_sz); @@ -3487,11 +3587,15 @@ nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr, WRITE32(sess->fore_channel.nr_rdma_attrs); if (sess->fore_channel.nr_rdma_attrs) { - RESERVE_SPACE(4); + p = xdr_reserve_space(xdr, 4); + if (!p) + return nfserr_resource; WRITE32(sess->fore_channel.rdma_attrs); } - RESERVE_SPACE(28); + p = xdr_reserve_space(xdr, 28); + if (!p) + return nfserr_resource; WRITE32(0); /* headerpadsz */ WRITE32(sess->back_channel.maxreq_sz); WRITE32(sess->back_channel.maxresp_sz); @@ -3501,7 +3605,9 @@ nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr, WRITE32(sess->back_channel.nr_rdma_attrs); if (sess->back_channel.nr_rdma_attrs) { - RESERVE_SPACE(4); + p = xdr_reserve_space(xdr, 4); + if (!p) + return nfserr_resource; WRITE32(sess->back_channel.rdma_attrs); } return 0; @@ -3511,12 +3617,15 @@ static __be32 nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_sequence *seq) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; if (nfserr) return nfserr; - RESERVE_SPACE(NFS4_MAX_SESSIONID_LEN + 20); + p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 20); + if (!p) + return nfserr_resource; WRITEMEM(seq->sessionid.data, NFS4_MAX_SESSIONID_LEN); WRITE32(seq->seqid); WRITE32(seq->slotid); @@ -3533,13 +3642,16 @@ static __be32 nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_test_stateid *test_stateid) { + struct xdr_stream *xdr = &resp->xdr; struct nfsd4_test_stateid_id *stateid, *next; __be32 *p; if (nfserr) return nfserr; - RESERVE_SPACE(4 + (4 * test_stateid->ts_num_ids)); + p = xdr_reserve_space(xdr, 4 + (4 * test_stateid->ts_num_ids)); + if (!p) + return nfserr_resource; *p++ = htonl(test_stateid->ts_num_ids); list_for_each_entry_safe(stateid, next, &test_stateid->ts_stateid_list, ts_id_list) { @@ -3678,7 +3790,11 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) nfsd4_enc encoder; __be32 *p; - RESERVE_SPACE(8); + p = xdr_reserve_space(xdr, 8); + if (!p) { + WARN_ON_ONCE(1); + return; + } WRITE32(op->opnum); post_err_offset = xdr->buf->len; @@ -3730,18 +3846,21 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) * called with nfs4_lock_state() held */ void -nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op) +nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op) { __be32 *p; struct nfs4_replay *rp = op->replay; BUG_ON(!rp); - RESERVE_SPACE(8); + p = xdr_reserve_space(xdr, 8 + rp->rp_buflen); + if (!p) { + WARN_ON_ONCE(1); + return; + } WRITE32(op->opnum); *p++ = rp->rp_status; /* already xdr'ed */ - RESERVE_SPACE(rp->rp_buflen); WRITEMEM(rp->rp_buf, rp->rp_buflen); } diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 15ca47797a82..ea5ad5db655b 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -563,7 +563,7 @@ int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *, struct nfsd4_compoundres *); __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *, u32); void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *); -void nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op); +void nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op); __be32 nfsd4_encode_fattr_to_buf(__be32 **p, int words, struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, From 4e21ac4b6f1d09c56f7d10916eaa738361214ab7 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 22 Mar 2014 15:15:11 -0400 Subject: [PATCH 042/100] nfsd4: reserve space before inlining 0-copy pages Once we've included page-cache pages in the encoding it's difficult to remove them and restart encoding. (xdr_truncate_encode doesn't handle that case.) So, make sure we'll have adequate space to finish the operation first. For now COMPOUND_SLACK_SPACE checks should prevent this case happening, but we want to remove those checks. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 799a90479487..8728715cd6e8 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3071,6 +3071,10 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, if (!p) return nfserr_resource; + /* Make sure there will be room for padding if needed: */ + if (xdr->end - xdr->p < 1) + return nfserr_resource; + maxcount = svc_max_payload(resp->rqstp); if (maxcount > read->rd_length) maxcount = read->rd_length; @@ -3122,8 +3126,6 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, resp->xdr.buf->tail[0].iov_len = 0; if (maxcount&3) { p = xdr_reserve_space(xdr, 4); - if (!p) - return nfserr_resource; WRITE32(0); resp->xdr.buf->tail[0].iov_base += maxcount&3; resp->xdr.buf->tail[0].iov_len = 4 - (maxcount&3); @@ -3156,6 +3158,9 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd if (!p) return nfserr_resource; + if (xdr->end - xdr->p < 1) + return nfserr_resource; + /* * XXX: By default, the ->readlink() VFS op will truncate symlinks * if they would overflow the buffer. Is this kosher in NFSv4? If @@ -3182,8 +3187,6 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd resp->xdr.buf->tail[0].iov_len = 0; if (maxcount&3) { p = xdr_reserve_space(xdr, 4); - if (!p) - return nfserr_resource; WRITE32(0); resp->xdr.buf->tail[0].iov_base += maxcount&3; resp->xdr.buf->tail[0].iov_len = 4 - (maxcount&3); From 67492c990300912c717bc95e9f705feb63de2df9 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 8 Mar 2014 16:42:05 -0500 Subject: [PATCH 043/100] nfsd4: nfsd4_check_resp_size needn't recalculate length We're keeping the length updated as we go now, so there's no need for the extra calculation here. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 8728715cd6e8..89c65a3e09e4 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3752,32 +3752,20 @@ static nfsd4_enc nfsd4_enc_ops[] = { */ __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 pad) { - struct xdr_buf *xb = &resp->rqstp->rq_res; + struct xdr_buf *buf = &resp->rqstp->rq_res; struct nfsd4_session *session = NULL; struct nfsd4_slot *slot = resp->cstate.slot; - u32 length, tlen = 0; if (!nfsd4_has_session(&resp->cstate)) return 0; session = resp->cstate.session; - if (xb->page_len == 0) { - length = (char *)resp->xdr.p - (char *)xb->head[0].iov_base + pad; - } else { - if (xb->tail[0].iov_base && xb->tail[0].iov_len > 0) - tlen = (char *)resp->xdr.p - (char *)xb->tail[0].iov_base; - - length = xb->head[0].iov_len + xb->page_len + tlen + pad; - } - dprintk("%s length %u, xb->page_len %u tlen %u pad %u\n", __func__, - length, xb->page_len, tlen, pad); - - if (length > session->se_fchannel.maxresp_sz) + if (buf->len + pad > session->se_fchannel.maxresp_sz) return nfserr_rep_too_big; if ((slot->sl_flags & NFSD4_SLOT_CACHETHIS) && - length > session->se_fchannel.maxresp_cached) + buf->len + pad > session->se_fchannel.maxresp_cached) return nfserr_rep_too_big_to_cache; return 0; From ea8d7720b274607f48fb524337254a9c43dbc2df Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 8 Mar 2014 17:19:55 -0500 Subject: [PATCH 044/100] nfsd4: remove redundant encode buffer size checking Now that all op encoders can handle running out of space, we no longer need to check the remaining size for every operation; only nonidempotent operations need that check, and that can be done by nfsd4_check_resp_size. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 14 -------------- fs/nfsd/nfs4xdr.c | 22 +++++++++++++--------- 2 files changed, 13 insertions(+), 23 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 6a5b701961a4..a5b666ca560d 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1279,7 +1279,6 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate = &resp->cstate; struct svc_fh *current_fh = &cstate->current_fh; struct svc_fh *save_fh = &cstate->save_fh; - int slack_bytes; u32 plen = 0; __be32 status; @@ -1333,19 +1332,6 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, goto encode_op; } - /* We must be able to encode a successful response to - * this operation, with enough room left over to encode a - * failed response to the next operation. If we don't - * have enough room, fail with ERR_RESOURCE. - */ - slack_bytes = (char *)resp->xdr.end - (char *)resp->xdr.p; - if (slack_bytes < COMPOUND_SLACK_SPACE - + COMPOUND_ERR_SLACK_SPACE) { - BUG_ON(slack_bytes < COMPOUND_ERR_SLACK_SPACE); - op->status = nfserr_resource; - goto encode_op; - } - opdesc = OPDESC(op); if (!current_fh->fh_dentry) { diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 89c65a3e09e4..df643e0fb6b3 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3753,20 +3753,24 @@ static nfsd4_enc nfsd4_enc_ops[] = { __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 pad) { struct xdr_buf *buf = &resp->rqstp->rq_res; - struct nfsd4_session *session = NULL; + struct nfsd4_session *session = resp->cstate.session; struct nfsd4_slot *slot = resp->cstate.slot; + int slack_bytes = (char *)resp->xdr.end - (char *)resp->xdr.p; - if (!nfsd4_has_session(&resp->cstate)) - return 0; + if (nfsd4_has_session(&resp->cstate)) { - session = resp->cstate.session; + if (buf->len + pad > session->se_fchannel.maxresp_sz) + return nfserr_rep_too_big; - if (buf->len + pad > session->se_fchannel.maxresp_sz) - return nfserr_rep_too_big; + if ((slot->sl_flags & NFSD4_SLOT_CACHETHIS) && + buf->len + pad > session->se_fchannel.maxresp_cached) + return nfserr_rep_too_big_to_cache; + } - if ((slot->sl_flags & NFSD4_SLOT_CACHETHIS) && - buf->len + pad > session->se_fchannel.maxresp_cached) - return nfserr_rep_too_big_to_cache; + if (pad > slack_bytes) { + WARN_ON_ONCE(nfsd4_has_session(&resp->cstate)); + return nfserr_resource; + } return 0; } From a8095f7e80fbf3e0efe4ee5cd3f509113c56290f Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 11 Mar 2014 17:54:34 -0400 Subject: [PATCH 045/100] nfsd4: size-checking cleanup Better variable name, some comments, etc. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 9 ++++++--- fs/nfsd/nfs4xdr.c | 29 +++++++++++++++-------------- 2 files changed, 21 insertions(+), 17 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index a5b666ca560d..3ce431b9b577 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1279,7 +1279,6 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate = &resp->cstate; struct svc_fh *current_fh = &cstate->current_fh; struct svc_fh *save_fh = &cstate->save_fh; - u32 plen = 0; __be32 status; svcxdr_init_encode(rqstp, resp); @@ -1349,9 +1348,13 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, /* If op is non-idempotent */ if (opdesc->op_flags & OP_MODIFIES_SOMETHING) { - plen = opdesc->op_rsize_bop(rqstp, op); /* - * If there's still another operation, make sure + * Don't execute this op if we couldn't encode a + * succesful reply: + */ + u32 plen = opdesc->op_rsize_bop(rqstp, op); + /* + * Plus if there's another operation, make sure * we'll have space to at least encode an error: */ if (resp->opcnt < args->opcnt) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index df643e0fb6b3..bd529e523087 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3739,35 +3739,36 @@ static nfsd4_enc nfsd4_enc_ops[] = { }; /* - * Calculate the total amount of memory that the compound response has taken - * after encoding the current operation with pad. + * Calculate whether we still have space to encode repsize bytes. + * There are two considerations: + * - For NFS versions >=4.1, the size of the reply must stay within + * session limits + * - For all NFS versions, we must stay within limited preallocated + * buffer space. * - * pad: if operation is non-idempotent, pad was calculate by op_rsize_bop() - * which was specified at nfsd4_operation, else pad is zero. - * - * Compare this length to the session se_fmaxresp_sz and se_fmaxresp_cached. - * - * Our se_fmaxresp_cached will always be a multiple of PAGE_SIZE, and so - * will be at least a page and will therefore hold the xdr_buf head. + * This is called before the operation is processed, so can only provide + * an upper estimate. For some nonidempotent operations (such as + * getattr), it's not necessarily a problem if that estimate is wrong, + * as we can fail it after processing without significant side effects. */ -__be32 nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 pad) +__be32 nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 respsize) { struct xdr_buf *buf = &resp->rqstp->rq_res; struct nfsd4_session *session = resp->cstate.session; - struct nfsd4_slot *slot = resp->cstate.slot; int slack_bytes = (char *)resp->xdr.end - (char *)resp->xdr.p; if (nfsd4_has_session(&resp->cstate)) { + struct nfsd4_slot *slot = resp->cstate.slot; - if (buf->len + pad > session->se_fchannel.maxresp_sz) + if (buf->len + respsize > session->se_fchannel.maxresp_sz) return nfserr_rep_too_big; if ((slot->sl_flags & NFSD4_SLOT_CACHETHIS) && - buf->len + pad > session->se_fchannel.maxresp_cached) + buf->len + respsize > session->se_fchannel.maxresp_cached) return nfserr_rep_too_big_to_cache; } - if (pad > slack_bytes) { + if (respsize > slack_bytes) { WARN_ON_ONCE(nfsd4_has_session(&resp->cstate)); return nfserr_resource; } From 2825a7f90753012babe7ee292f4a1eadd3706f92 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 26 Aug 2013 16:04:46 -0400 Subject: [PATCH 046/100] nfsd4: allow encoding across page boundaries After this we can handle for example getattr of very large ACLs. Read, readdir, readlink are still special cases with their own limits. Also we can't handle a new operation starting close to the end of a page. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 4 ++ fs/nfsd/nfs4xdr.c | 60 ++++++++++++++++++++--------- include/linux/sunrpc/svc.h | 1 + include/linux/sunrpc/xdr.h | 1 + net/sunrpc/svc_xprt.c | 1 + net/sunrpc/xdr.c | 78 +++++++++++++++++++++++++++++++++++++- 6 files changed, 126 insertions(+), 19 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 3ce431b9b577..5d8f9158701d 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1264,6 +1264,10 @@ static void svcxdr_init_encode(struct svc_rqst *rqstp, xdr->end = head->iov_base + PAGE_SIZE - 2 * RPC_MAX_AUTH_SIZE; /* Tail and page_len should be zero at this point: */ buf->len = buf->head[0].iov_len; + xdr->scratch.iov_len = 0; + xdr->page_ptr = buf->pages; + buf->buflen = PAGE_SIZE * (1 + rqstp->rq_page_end - buf->pages) + - 2 * RPC_MAX_AUTH_SIZE; } /* diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index bd529e523087..d3a576dbd99b 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1624,6 +1624,7 @@ static int nfsd4_max_reply(u32 opnum) * the head and tail in another page: */ return 2 * PAGE_SIZE; + case OP_GETATTR: case OP_READ: return INT_MAX; default: @@ -2560,21 +2561,31 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, goto out; } +static void svcxdr_init_encode_from_buffer(struct xdr_stream *xdr, + struct xdr_buf *buf, __be32 *p, int bytes) +{ + xdr->scratch.iov_len = 0; + memset(buf, 0, sizeof(struct xdr_buf)); + buf->head[0].iov_base = p; + buf->head[0].iov_len = 0; + buf->len = 0; + xdr->buf = buf; + xdr->iov = buf->head; + xdr->p = p; + xdr->end = (void *)p + bytes; + buf->buflen = bytes; +} + __be32 nfsd4_encode_fattr_to_buf(__be32 **p, int words, struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, u32 *bmval, struct svc_rqst *rqstp, int ignore_crossmnt) { - struct xdr_buf dummy = { - .head[0] = { - .iov_base = *p, - }, - .buflen = words << 2, - }; + struct xdr_buf dummy; struct xdr_stream xdr; __be32 ret; - xdr_init_encode(&xdr, &dummy, NULL); + svcxdr_init_encode_from_buffer(&xdr, &dummy, *p, words << 2); ret = nfsd4_encode_fattr(&xdr, fhp, exp, dentry, bmval, rqstp, ignore_crossmnt); *p = xdr.p; @@ -3064,8 +3075,6 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, if (nfserr) return nfserr; - if (resp->xdr.buf->page_len) - return nfserr_resource; p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */ if (!p) @@ -3075,6 +3084,9 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, if (xdr->end - xdr->p < 1) return nfserr_resource; + if (resp->xdr.buf->page_len) + return nfserr_resource; + maxcount = svc_max_payload(resp->rqstp); if (maxcount > read->rd_length) maxcount = read->rd_length; @@ -3119,6 +3131,8 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, - (char *)resp->xdr.buf->head[0].iov_base); resp->xdr.buf->page_len = maxcount; xdr->buf->len += maxcount; + xdr->page_ptr += v; + xdr->buf->buflen = maxcount + PAGE_SIZE - 2 * RPC_MAX_AUTH_SIZE; xdr->iov = xdr->buf->tail; /* Use rest of head for padding and remaining ops: */ @@ -3145,6 +3159,11 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd if (nfserr) return nfserr; + + p = xdr_reserve_space(xdr, 4); + if (!p) + return nfserr_resource; + if (resp->xdr.buf->page_len) return nfserr_resource; if (!*resp->rqstp->rq_next_page) @@ -3154,10 +3173,6 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd maxcount = PAGE_SIZE; - p = xdr_reserve_space(xdr, 4); - if (!p) - return nfserr_resource; - if (xdr->end - xdr->p < 1) return nfserr_resource; @@ -3180,6 +3195,8 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd - (char *)resp->xdr.buf->head[0].iov_base; resp->xdr.buf->page_len = maxcount; xdr->buf->len += maxcount; + xdr->page_ptr += 1; + xdr->buf->buflen -= PAGE_SIZE; xdr->iov = xdr->buf->tail; /* Use rest of head for padding and remaining ops: */ @@ -3206,15 +3223,16 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 if (nfserr) return nfserr; - if (resp->xdr.buf->page_len) - return nfserr_resource; - if (!*resp->rqstp->rq_next_page) - return nfserr_resource; p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE); if (!p) return nfserr_resource; + if (resp->xdr.buf->page_len) + return nfserr_resource; + if (!*resp->rqstp->rq_next_page) + return nfserr_resource; + /* XXX: Following NFSv3, we ignore the READDIR verifier for now. */ WRITE32(0); WRITE32(0); @@ -3266,6 +3284,10 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 xdr->iov = xdr->buf->tail; + xdr->page_ptr++; + xdr->buf->buflen -= PAGE_SIZE; + xdr->iov = xdr->buf->tail; + /* Use rest of head for padding and remaining ops: */ resp->xdr.buf->tail[0].iov_base = tailbase; resp->xdr.buf->tail[0].iov_len = 0; @@ -3800,6 +3822,8 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) !nfsd4_enc_ops[op->opnum]); encoder = nfsd4_enc_ops[op->opnum]; op->status = encoder(resp, op->status, &op->u); + xdr_commit_encode(xdr); + /* nfsd4_check_resp_size guarantees enough room for error status */ if (!op->status) { int space_needed = 0; @@ -3919,6 +3943,8 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo WARN_ON_ONCE(buf->len != buf->head[0].iov_len + buf->page_len + buf->tail[0].iov_len); + rqstp->rq_next_page = resp->xdr.page_ptr + 1; + p = resp->tagp; *p++ = htonl(resp->taglen); memcpy(p, resp->tag, resp->taglen); diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index a0dbbd1e00e9..85cb6472a423 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -244,6 +244,7 @@ struct svc_rqst { struct page * rq_pages[RPCSVC_MAXPAGES]; struct page * *rq_respages; /* points into rq_pages */ struct page * *rq_next_page; /* next reply page to use */ + struct page * *rq_page_end; /* one past the last page */ struct kvec rq_vec[RPCSVC_MAXPAGES]; /* generally useful.. */ diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index e7bb2e3bd0fb..b23d69ffd5ec 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -215,6 +215,7 @@ typedef int (*kxdrdproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj); extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p); extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes); +extern void xdr_commit_encode(struct xdr_stream *xdr); extern void xdr_truncate_encode(struct xdr_stream *xdr, size_t len); extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, unsigned int base, unsigned int len); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 29772e01b179..b4737fbdec13 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -597,6 +597,7 @@ static int svc_alloc_arg(struct svc_rqst *rqstp) } rqstp->rq_pages[i] = p; } + rqstp->rq_page_end = &rqstp->rq_pages[i]; rqstp->rq_pages[i++] = NULL; /* this might be seen in nfs_read_actor */ /* Make arg->head point to first page and arg->pages point to rest */ diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 352f3b35bbe5..2b546e8ce43d 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -462,6 +462,7 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p) struct kvec *iov = buf->head; int scratch_len = buf->buflen - buf->page_len - buf->tail[0].iov_len; + xdr_set_scratch_buffer(xdr, NULL, 0); BUG_ON(scratch_len < 0); xdr->buf = buf; xdr->iov = iov; @@ -481,6 +482,74 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p) } EXPORT_SYMBOL_GPL(xdr_init_encode); +/** + * xdr_commit_encode - Ensure all data is written to buffer + * @xdr: pointer to xdr_stream + * + * We handle encoding across page boundaries by giving the caller a + * temporary location to write to, then later copying the data into + * place; xdr_commit_encode does that copying. + * + * Normally the caller doesn't need to call this directly, as the + * following xdr_reserve_space will do it. But an explicit call may be + * required at the end of encoding, or any other time when the xdr_buf + * data might be read. + */ +void xdr_commit_encode(struct xdr_stream *xdr) +{ + int shift = xdr->scratch.iov_len; + void *page; + + if (shift == 0) + return; + page = page_address(*xdr->page_ptr); + memcpy(xdr->scratch.iov_base, page, shift); + memmove(page, page + shift, (void *)xdr->p - page); + xdr->scratch.iov_len = 0; +} +EXPORT_SYMBOL_GPL(xdr_commit_encode); + +__be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr, size_t nbytes) +{ + static __be32 *p; + int space_left; + int frag1bytes, frag2bytes; + + if (nbytes > PAGE_SIZE) + return NULL; /* Bigger buffers require special handling */ + if (xdr->buf->len + nbytes > xdr->buf->buflen) + return NULL; /* Sorry, we're totally out of space */ + frag1bytes = (xdr->end - xdr->p) << 2; + frag2bytes = nbytes - frag1bytes; + if (xdr->iov) + xdr->iov->iov_len += frag1bytes; + else { + xdr->buf->page_len += frag1bytes; + xdr->page_ptr++; + } + xdr->iov = NULL; + /* + * If the last encode didn't end exactly on a page boundary, the + * next one will straddle boundaries. Encode into the next + * page, then copy it back later in xdr_commit_encode. We use + * the "scratch" iov to track any temporarily unused fragment of + * space at the end of the previous buffer: + */ + xdr->scratch.iov_base = xdr->p; + xdr->scratch.iov_len = frag1bytes; + p = page_address(*xdr->page_ptr); + /* + * Note this is where the next encode will start after we've + * shifted this one back: + */ + xdr->p = (void *)p + frag2bytes; + space_left = xdr->buf->buflen - xdr->buf->len; + xdr->end = (void *)p + min_t(int, space_left, PAGE_SIZE); + xdr->buf->page_len += frag2bytes; + xdr->buf->len += nbytes; + return p; +} + /** * xdr_reserve_space - Reserve buffer space for sending * @xdr: pointer to xdr_stream @@ -495,14 +564,18 @@ __be32 * xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes) __be32 *p = xdr->p; __be32 *q; + xdr_commit_encode(xdr); /* align nbytes on the next 32-bit boundary */ nbytes += 3; nbytes &= ~3; q = p + (nbytes >> 2); if (unlikely(q > xdr->end || q < p)) - return NULL; + return xdr_get_next_encode_buffer(xdr, nbytes); xdr->p = q; - xdr->iov->iov_len += nbytes; + if (xdr->iov) + xdr->iov->iov_len += nbytes; + else + xdr->buf->page_len += nbytes; xdr->buf->len += nbytes; return p; } @@ -539,6 +612,7 @@ void xdr_truncate_encode(struct xdr_stream *xdr, size_t len) WARN_ON_ONCE(1); return; } + xdr_commit_encode(xdr); fraglen = min_t(int, buf->len - len, tail->iov_len); tail->iov_len -= fraglen; From f5236013a21c118e9d317e90c7a152dfe51fab93 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 21 Mar 2014 17:57:57 -0400 Subject: [PATCH 047/100] nfsd4: convert 4.1 replay encoding Limits on maxresp_sz mean that we only ever need to replay rpc's that are contained entirely in the head. The one exception is very small zero-copy reads. That's an odd corner case as clients wouldn't normally ask those to be cached. in any case, this seems a little more robust. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 28 ++++++++++++++-------------- fs/nfsd/nfs4xdr.c | 2 +- fs/nfsd/xdr4.h | 2 +- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index b06f9a00ff8d..8e22ea485f99 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1560,6 +1560,7 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, struct svc_r void nfsd4_store_cache_entry(struct nfsd4_compoundres *resp) { + struct xdr_buf *buf = resp->xdr.buf; struct nfsd4_slot *slot = resp->cstate.slot; unsigned int base; @@ -1573,11 +1574,9 @@ nfsd4_store_cache_entry(struct nfsd4_compoundres *resp) slot->sl_datalen = 0; return; } - slot->sl_datalen = (char *)resp->xdr.p - (char *)resp->cstate.datap; - base = (char *)resp->cstate.datap - - (char *)resp->xdr.buf->head[0].iov_base; - if (read_bytes_from_xdr_buf(resp->xdr.buf, base, slot->sl_data, - slot->sl_datalen)) + base = resp->cstate.data_offset; + slot->sl_datalen = buf->len - base; + if (read_bytes_from_xdr_buf(buf, base, slot->sl_data, slot->sl_datalen)) WARN("%s: sessions DRC could not cache compound\n", __func__); return; } @@ -1618,7 +1617,8 @@ nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp, struct nfsd4_sequence *seq) { struct nfsd4_slot *slot = resp->cstate.slot; - struct kvec *head = resp->xdr.iov; + struct xdr_stream *xdr = &resp->xdr; + __be32 *p; __be32 status; dprintk("--> %s slot %p\n", __func__, slot); @@ -1627,16 +1627,16 @@ nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp, if (status) return status; - /* The sequence operation has been encoded, cstate->datap set. */ - memcpy(resp->cstate.datap, slot->sl_data, slot->sl_datalen); + p = xdr_reserve_space(xdr, slot->sl_datalen); + if (!p) { + WARN_ON_ONCE(1); + return nfserr_serverfault; + } + xdr_encode_opaque_fixed(p, slot->sl_data, slot->sl_datalen); + xdr_commit_encode(xdr); resp->opcnt = slot->sl_opcnt; - resp->xdr.p = resp->cstate.datap + XDR_QUADLEN(slot->sl_datalen); - head->iov_len = (void *)resp->xdr.p - head->iov_base; - resp->xdr.buf->len = head->iov_len; - status = slot->sl_status; - - return status; + return slot->sl_status; } /* diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index d3a576dbd99b..a90a1e86614a 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3659,7 +3659,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr, WRITE32(seq->maxslots - 1); /* sr_target_highest_slotid */ WRITE32(seq->status_flags); - resp->cstate.datap = p; /* DRC cache data pointer */ + resp->cstate.data_offset = xdr->buf->len; /* DRC cache data pointer */ return 0; } diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index ea5ad5db655b..ee9ffdc8a0cb 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -58,7 +58,7 @@ struct nfsd4_compound_state { /* For sessions DRC */ struct nfsd4_session *session; struct nfsd4_slot *slot; - __be32 *datap; + int data_offset; size_t iovlen; u32 minorversion; __be32 status; From 8c7424cff6bd33459945646cfcbf6dc6c899ab24 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 10 Mar 2014 12:19:10 -0400 Subject: [PATCH 048/100] nfsd4: don't try to encode conflicting owner if low on space I ran into this corner case in testing: in theory clients can provide state owners up to 1024 bytes long. In the sessions case there might be a risk of this pushing us over the DRC slot size. The conflicting owner isn't really that important, so let's humor a client that provides a small maxresponsize_cached by allowing ourselves to return without the conflicting owner instead of outright failing the operation. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 3 ++- fs/nfsd/nfs4xdr.c | 16 +++++++++++++--- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 5d8f9158701d..20aad5a3005f 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1432,7 +1432,8 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, #define op_encode_change_info_maxsz (5) #define nfs4_fattr_bitmap_maxsz (4) -#define op_encode_lockowner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) +/* We'll fall back on returning no lockowner if run out of space: */ +#define op_encode_lockowner_maxsz (0) #define op_encode_lock_denied_maxsz (8 + op_encode_lockowner_maxsz) #define nfs4_owner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index a90a1e86614a..2f0ea20edb23 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2878,9 +2878,20 @@ nfsd4_encode_lock_denied(struct xdr_stream *xdr, struct nfsd4_lock_denied *ld) struct xdr_netobj *conf = &ld->ld_owner; __be32 *p; +again: p = xdr_reserve_space(xdr, 32 + XDR_LEN(conf->len)); - if (!p) + if (!p) { + /* + * Don't fail to return the result just because we can't + * return the conflicting open: + */ + if (conf->len) { + conf->len = 0; + conf->data = NULL; + goto again; + } return nfserr_resource; + } WRITE64(ld->ld_start); WRITE64(ld->ld_length); WRITE32(ld->ld_type); @@ -2888,7 +2899,6 @@ nfsd4_encode_lock_denied(struct xdr_stream *xdr, struct nfsd4_lock_denied *ld) WRITEMEM(&ld->ld_clientid, 8); WRITE32(conf->len); WRITEMEM(conf->data, conf->len); - kfree(conf->data); } else { /* non - nfsv4 lock in conflict, no clientid nor owner */ WRITE64((u64)0); /* clientid */ WRITE32(0); /* length of owner name */ @@ -2905,7 +2915,7 @@ nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lo nfserr = nfsd4_encode_stateid(xdr, &lock->lk_resp_stateid); else if (nfserr == nfserr_denied) nfserr = nfsd4_encode_lock_denied(xdr, &lock->lk_denied); - + kfree(lock->lk_denied.ld_owner.data); return nfserr; } From 4f0cefbf389c28b0a2be34960797adb0c84ee43d Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 11 Mar 2014 15:39:13 -0400 Subject: [PATCH 049/100] nfsd4: more precise nfsd4_max_reply It will turn out to be useful to have a more accurate estimate of reply size; so, piggyback on the existing op reply-size estimators. Also move nfsd4_max_reply to nfs4proc.c to get easier access to struct nfsd4_operation and friends. (Thanks to Christoph Hellwig for pointing out that simplification.) Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 12 ++++++++++++ fs/nfsd/nfs4xdr.c | 35 ++++------------------------------- fs/nfsd/xdr4.h | 1 + 3 files changed, 17 insertions(+), 31 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 20aad5a3005f..59c319528cf9 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1854,6 +1854,18 @@ static struct nfsd4_operation nfsd4_ops[] = { }, }; +int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + struct nfsd4_operation *opdesc; + nfsd4op_rsize estimator; + + if (op->opnum == OP_ILLEGAL) + return op_encode_hdr_size * sizeof(__be32); + opdesc = OPDESC(op); + estimator = opdesc->op_rsize_bop; + return estimator ? estimator(rqstp, op) : PAGE_SIZE; +} + void warn_on_nonidempotent_op(struct nfsd4_op *op) { if (OPDESC(op)->op_flags & OP_MODIFIES_SOMETHING) { diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 2f0ea20edb23..8b3d24de9cac 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1605,40 +1605,13 @@ nfsd4_opnum_in_range(struct nfsd4_compoundargs *argp, struct nfsd4_op *op) return true; } -/* - * Return a rough estimate of the maximum possible reply size. Note the - * estimate includes rpc headers so is meant to be passed to - * svc_reserve, not svc_reserve_auth. - * - * Also note the current compound encoding permits only one operation to - * use pages beyond the first one, so the maximum possible length is the - * maximum over these values, not the sum. - */ -static int nfsd4_max_reply(u32 opnum) -{ - switch (opnum) { - case OP_READLINK: - case OP_READDIR: - /* - * Both of these ops take a single page for data and put - * the head and tail in another page: - */ - return 2 * PAGE_SIZE; - case OP_GETATTR: - case OP_READ: - return INT_MAX; - default: - return PAGE_SIZE; - } -} - static __be32 nfsd4_decode_compound(struct nfsd4_compoundargs *argp) { DECODE_HEAD; struct nfsd4_op *op; bool cachethis = false; - int max_reply = PAGE_SIZE; + int max_reply = 2 * RPC_MAX_AUTH_SIZE + 8; /* opcnt, status */ int i; READ_BUF(4); @@ -1647,6 +1620,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) SAVEMEM(argp->tag, argp->taglen); READ32(argp->minorversion); READ32(argp->opcnt); + max_reply += 4 + (XDR_QUADLEN(argp->taglen) << 2); if (argp->taglen > NFSD4_MAX_TAGLEN) goto xdr_error; @@ -1684,7 +1658,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) */ cachethis |= nfsd4_cache_this_op(op); - max_reply = max(max_reply, nfsd4_max_reply(op->opnum)); + max_reply += nfsd4_max_reply(argp->rqstp, op); if (op->status) { argp->opcnt = i+1; @@ -1694,8 +1668,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) /* Sessions make the DRC unnecessary: */ if (argp->minorversion) cachethis = false; - if (max_reply != INT_MAX) - svc_reserve(argp->rqstp, max_reply); + svc_reserve(argp->rqstp, max_reply); argp->rqstp->rq_cachetype = cachethis ? RC_REPLBUFF : RC_NOCACHE; DECODE_TAIL; diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index ee9ffdc8a0cb..41e522993d94 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -536,6 +536,7 @@ static inline bool nfsd4_last_compound_op(struct svc_rqst *rqstp) return argp->opcnt == resp->opcnt; } +int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op); void warn_on_nonidempotent_op(struct nfsd4_op *op); #define NFS4_SVC_XDRSIZE sizeof(struct nfsd4_compoundargs) From 6ff9897d2bcf4036dfd139caeddd6f0a51c9ca06 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 11 Mar 2014 16:51:23 -0400 Subject: [PATCH 050/100] nfsd4: minor encode_read cleanup Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 8b3d24de9cac..8ce6c8d5ee8a 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3076,18 +3076,20 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, len = maxcount; v = 0; - while (len > 0) { + while (len) { + int thislen; + page = *(resp->rqstp->rq_next_page); if (!page) { /* ran out of pages */ maxcount -= len; break; } + thislen = min_t(long, len, PAGE_SIZE); resp->rqstp->rq_vec[v].iov_base = page_address(page); - resp->rqstp->rq_vec[v].iov_len = - len < PAGE_SIZE ? len : PAGE_SIZE; + resp->rqstp->rq_vec[v].iov_len = thislen; resp->rqstp->rq_next_page++; v++; - len -= PAGE_SIZE; + len -= thislen; } read->rd_vlen = v; From 89ff884ebbd0a667253dd61ade8a0e70b787c84a Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 11 Mar 2014 17:58:57 -0400 Subject: [PATCH 051/100] nfsd4: nfsd4_check_resp_size should check against whole buffer Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 8ce6c8d5ee8a..0eeba2199c8c 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3762,7 +3762,6 @@ __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 respsize) { struct xdr_buf *buf = &resp->rqstp->rq_res; struct nfsd4_session *session = resp->cstate.session; - int slack_bytes = (char *)resp->xdr.end - (char *)resp->xdr.p; if (nfsd4_has_session(&resp->cstate)) { struct nfsd4_slot *slot = resp->cstate.slot; @@ -3775,7 +3774,7 @@ __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 respsize) return nfserr_rep_too_big_to_cache; } - if (respsize > slack_bytes) { + if (buf->len + respsize > buf->buflen) { WARN_ON_ONCE(nfsd4_has_session(&resp->cstate)); return nfserr_resource; } From 30596768b31069a3ae08fc305f394efb8c42b473 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 19 May 2014 16:18:23 -0400 Subject: [PATCH 052/100] nfsd4: fix buflen calculation after read encoding We don't necessarily want to assume that the buflen is the same as the number of bytes available in the pages. We may have some reason to set it to something less (for example, later patches will use a smaller buflen to enforce session limits). So, calculate the buflen relative to the previous buflen instead of recalculating it from scratch. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 0eeba2199c8c..1278d98a923c 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3053,6 +3053,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, unsigned long maxcount; struct xdr_stream *xdr = &resp->xdr; int starting_len = xdr->buf->len; + int space_left; long len; __be32 *p; @@ -3117,7 +3118,6 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, resp->xdr.buf->page_len = maxcount; xdr->buf->len += maxcount; xdr->page_ptr += v; - xdr->buf->buflen = maxcount + PAGE_SIZE - 2 * RPC_MAX_AUTH_SIZE; xdr->iov = xdr->buf->tail; /* Use rest of head for padding and remaining ops: */ @@ -3130,6 +3130,12 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, resp->xdr.buf->tail[0].iov_len = 4 - (maxcount&3); xdr->buf->len -= (maxcount&3); } + + space_left = min_t(int, (void *)xdr->end - (void *)xdr->p, + xdr->buf->buflen - xdr->buf->len); + xdr->buf->buflen = xdr->buf->len + space_left; + xdr->end = (__be32 *)((void *)xdr->end + space_left); + return 0; } From db3f58a95beea6752d90fed03f9f198d282a3913 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 6 Mar 2014 13:22:18 -0500 Subject: [PATCH 053/100] rpc: define xdr_restrict_buflen With this xdr_reserve_space can help us enforce various limits. Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/xdr.h | 1 + net/sunrpc/xdr.c | 29 +++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index b23d69ffd5ec..70c6b92e15a7 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -217,6 +217,7 @@ extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes); extern void xdr_commit_encode(struct xdr_stream *xdr); extern void xdr_truncate_encode(struct xdr_stream *xdr, size_t len); +extern int xdr_restrict_buflen(struct xdr_stream *xdr, int newbuflen); extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, unsigned int base, unsigned int len); extern unsigned int xdr_stream_pos(const struct xdr_stream *xdr); diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 2b546e8ce43d..39928444c7fb 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -648,6 +648,35 @@ void xdr_truncate_encode(struct xdr_stream *xdr, size_t len) } EXPORT_SYMBOL(xdr_truncate_encode); +/** + * xdr_restrict_buflen - decrease available buffer space + * @xdr: pointer to xdr_stream + * @newbuflen: new maximum number of bytes available + * + * Adjust our idea of how much space is available in the buffer. + * If we've already used too much space in the buffer, returns -1. + * If the available space is already smaller than newbuflen, returns 0 + * and does nothing. Otherwise, adjusts xdr->buf->buflen to newbuflen + * and ensures xdr->end is set at most offset newbuflen from the start + * of the buffer. + */ +int xdr_restrict_buflen(struct xdr_stream *xdr, int newbuflen) +{ + struct xdr_buf *buf = xdr->buf; + int left_in_this_buf = (void *)xdr->end - (void *)xdr->p; + int end_offset = buf->len + left_in_this_buf; + + if (newbuflen < 0 || newbuflen < buf->len) + return -1; + if (newbuflen > buf->buflen) + return 0; + if (newbuflen < end_offset) + xdr->end = (void *)xdr->end + newbuflen - end_offset; + buf->buflen = newbuflen; + return 0; +} +EXPORT_SYMBOL(xdr_restrict_buflen); + /** * xdr_write_pages - Insert a list of pages into an XDR buffer for sending * @xdr: pointer to xdr_stream From 47ee52986472dba068e8223cbaf1b65d74238781 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 12 Mar 2014 21:39:35 -0400 Subject: [PATCH 054/100] nfsd4: adjust buflen to session channel limit We can simplify session limit enforcement by restricting the xdr buflen to the session size. Also fix a preexisting bug: we should really have been taking into account the auth-required space when comparing against session limits, which are limits on the size of the entire rpc reply, including any krb5 overhead. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 11 +++++++++++ fs/nfsd/nfs4xdr.c | 24 ++++++++---------------- 2 files changed, 19 insertions(+), 16 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 8e22ea485f99..612b85ac414b 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2208,11 +2208,13 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_sequence *seq) { struct nfsd4_compoundres *resp = rqstp->rq_resp; + struct xdr_stream *xdr = &resp->xdr; struct nfsd4_session *session; struct nfs4_client *clp; struct nfsd4_slot *slot; struct nfsd4_conn *conn; __be32 status; + int buflen; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); if (resp->opcnt != 1) @@ -2281,6 +2283,15 @@ nfsd4_sequence(struct svc_rqst *rqstp, if (status) goto out_put_session; + buflen = (seq->cachethis) ? + session->se_fchannel.maxresp_cached : + session->se_fchannel.maxresp_sz; + status = (seq->cachethis) ? nfserr_rep_too_big_to_cache : + nfserr_rep_too_big; + if (xdr_restrict_buflen(xdr, buflen - 2 * RPC_MAX_AUTH_SIZE)) + goto out_put_session; + + status = nfs_ok; /* Success! bump slot seqid */ slot->sl_seqid = seq->seqid; slot->sl_flags |= NFSD4_SLOT_INUSE; diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 1278d98a923c..5e7bac4a7e71 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3767,25 +3767,17 @@ static nfsd4_enc nfsd4_enc_ops[] = { __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 respsize) { struct xdr_buf *buf = &resp->rqstp->rq_res; - struct nfsd4_session *session = resp->cstate.session; + struct nfsd4_slot *slot = resp->cstate.slot; - if (nfsd4_has_session(&resp->cstate)) { - struct nfsd4_slot *slot = resp->cstate.slot; - - if (buf->len + respsize > session->se_fchannel.maxresp_sz) - return nfserr_rep_too_big; - - if ((slot->sl_flags & NFSD4_SLOT_CACHETHIS) && - buf->len + respsize > session->se_fchannel.maxresp_cached) - return nfserr_rep_too_big_to_cache; - } - - if (buf->len + respsize > buf->buflen) { - WARN_ON_ONCE(nfsd4_has_session(&resp->cstate)); + if (buf->len + respsize <= buf->buflen) + return nfs_ok; + if (!nfsd4_has_session(&resp->cstate)) return nfserr_resource; + if (slot->sl_flags & NFSD4_SLOT_CACHETHIS) { + WARN_ON_ONCE(1); + return nfserr_rep_too_big_to_cache; } - - return 0; + return nfserr_rep_too_big; } void From 32aaa62ede574ff99b020b4ee3ff6f9cfc9f0099 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 20 Mar 2014 20:47:41 -0400 Subject: [PATCH 055/100] nfsd4: use session limits to release send buffer reservation Once we know the limits the session places on the size of the rpc, we can also use that information to release any unnecessary reserved reply buffer space. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 612b85ac414b..62b882dc48ec 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2290,6 +2290,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, nfserr_rep_too_big; if (xdr_restrict_buflen(xdr, buflen - 2 * RPC_MAX_AUTH_SIZE)) goto out_put_session; + svc_reserve(rqstp, buflen); status = nfs_ok; /* Success! bump slot seqid */ From 561f0ed498ca4342573a870779cc645d3fd7dfe7 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 20 Jan 2014 16:37:11 -0500 Subject: [PATCH 056/100] nfsd4: allow large readdirs Currently we limit readdir results to a single page. This can result in a performance regression compared to NFSv3 when reading large directories. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 9 +-- fs/nfsd/nfs4xdr.c | 137 +++++++++++++++++++++++++-------------------- fs/nfsd/xdr4.h | 5 +- 3 files changed, 82 insertions(+), 69 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 59c319528cf9..d95d9015e50a 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1500,13 +1500,14 @@ static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { + u32 maxcount = svc_max_payload(rqstp); u32 rlen = op->u.readdir.rd_maxcount; - if (rlen > PAGE_SIZE) - rlen = PAGE_SIZE; + if (rlen > maxcount) + rlen = maxcount; - return (op_encode_hdr_size + op_encode_verifier_maxsz) - * sizeof(__be32) + rlen; + return (op_encode_hdr_size + op_encode_verifier_maxsz + + XDR_QUADLEN(rlen)) * sizeof(__be32); } static inline u32 nfsd4_remove_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 5e7bac4a7e71..4d79e5366a82 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2575,8 +2575,8 @@ static inline int attributes_need_mount(u32 *bmval) } static __be32 -nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd, - const char *name, int namlen, __be32 **p, int buflen) +nfsd4_encode_dirent_fattr(struct xdr_stream *xdr, struct nfsd4_readdir *cd, + const char *name, int namlen) { struct svc_export *exp = cd->rd_fhp->fh_export; struct dentry *dentry; @@ -2628,8 +2628,7 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd, } out_encode: - nfserr = nfsd4_encode_fattr_to_buf(p, buflen, NULL, exp, dentry, - cd->rd_bmval, + nfserr = nfsd4_encode_fattr(xdr, NULL, exp, dentry, cd->rd_bmval, cd->rd_rqstp, ignore_crossmnt); out_put: dput(dentry); @@ -2638,9 +2637,12 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd, } static __be32 * -nfsd4_encode_rdattr_error(__be32 *p, int buflen, __be32 nfserr) +nfsd4_encode_rdattr_error(struct xdr_stream *xdr, __be32 nfserr) { - if (buflen < 6) + __be32 *p; + + p = xdr_reserve_space(xdr, 6); + if (!p) return NULL; *p++ = htonl(2); *p++ = htonl(FATTR4_WORD0_RDATTR_ERROR); /* bmval0 */ @@ -2657,10 +2659,13 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, { struct readdir_cd *ccd = ccdv; struct nfsd4_readdir *cd = container_of(ccd, struct nfsd4_readdir, common); - int buflen; - __be32 *p = cd->buffer; - __be32 *cookiep; + struct xdr_stream *xdr = cd->xdr; + int start_offset = xdr->buf->len; + int cookie_offset; + int entry_bytes; __be32 nfserr = nfserr_toosmall; + __be64 wire_offset; + __be32 *p; /* In nfsv4, "." and ".." never make it onto the wire.. */ if (name && isdotent(name, namlen)) { @@ -2668,19 +2673,24 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, return 0; } - if (cd->offset) - xdr_encode_hyper(cd->offset, (u64) offset); + if (cd->cookie_offset) { + wire_offset = cpu_to_be64(offset); + write_bytes_to_xdr_buf(xdr->buf, cd->cookie_offset, + &wire_offset, 8); + } - buflen = cd->buflen - 4 - XDR_QUADLEN(namlen); - if (buflen < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto fail; - *p++ = xdr_one; /* mark entry present */ - cookiep = p; + cookie_offset = xdr->buf->len; + p = xdr_reserve_space(xdr, 3*4 + namlen); + if (!p) + goto fail; p = xdr_encode_hyper(p, NFS_OFFSET_MAX); /* offset of next entry */ p = xdr_encode_array(p, name, namlen); /* name length & name */ - nfserr = nfsd4_encode_dirent_fattr(cd, name, namlen, &p, buflen); + nfserr = nfsd4_encode_dirent_fattr(xdr, cd, name, namlen); switch (nfserr) { case nfs_ok: break; @@ -2699,19 +2709,23 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, */ if (!(cd->rd_bmval[0] & FATTR4_WORD0_RDATTR_ERROR)) goto fail; - p = nfsd4_encode_rdattr_error(p, buflen, nfserr); + p = nfsd4_encode_rdattr_error(xdr, nfserr); if (p == NULL) { nfserr = nfserr_toosmall; goto fail; } } - cd->buflen -= (p - cd->buffer); - cd->buffer = p; - cd->offset = cookiep; + nfserr = nfserr_toosmall; + entry_bytes = xdr->buf->len - start_offset; + if (entry_bytes > cd->rd_maxcount) + goto fail; + cd->rd_maxcount -= entry_bytes; + cd->cookie_offset = cookie_offset; skip_entry: cd->common.err = nfs_ok; return 0; fail: + xdr_truncate_encode(xdr, start_offset); cd->common.err = nfserr; return -EINVAL; } @@ -3206,10 +3220,11 @@ static __be32 nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readdir *readdir) { int maxcount; + int bytes_left; loff_t offset; + __be64 wire_offset; struct xdr_stream *xdr = &resp->xdr; int starting_len = xdr->buf->len; - __be32 *page, *tailbase; __be32 *p; if (nfserr) @@ -3219,38 +3234,38 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 if (!p) return nfserr_resource; - if (resp->xdr.buf->page_len) - return nfserr_resource; - if (!*resp->rqstp->rq_next_page) - return nfserr_resource; - /* XXX: Following NFSv3, we ignore the READDIR verifier for now. */ WRITE32(0); WRITE32(0); resp->xdr.buf->head[0].iov_len = ((char *)resp->xdr.p) - (char *)resp->xdr.buf->head[0].iov_base; - tailbase = p; - - maxcount = PAGE_SIZE; - if (maxcount > readdir->rd_maxcount) - maxcount = readdir->rd_maxcount; /* - * Convert from bytes to words, account for the two words already - * written, make sure to leave two words at the end for the next - * pointer and eof field. + * Number of bytes left for directory entries allowing for the + * final 8 bytes of the readdir and a following failed op: + */ + bytes_left = xdr->buf->buflen - xdr->buf->len + - COMPOUND_ERR_SLACK_SPACE - 8; + if (bytes_left < 0) { + nfserr = nfserr_resource; + goto err_no_verf; + } + maxcount = min_t(u32, readdir->rd_maxcount, INT_MAX); + /* + * Note the rfc defines rd_maxcount as the size of the + * READDIR4resok structure, which includes the verifier above + * and the 8 bytes encoded at the end of this function: */ - maxcount = (maxcount >> 2) - 4; - if (maxcount < 0) { - nfserr = nfserr_toosmall; + if (maxcount < 16) { + nfserr = nfserr_toosmall; goto err_no_verf; } + maxcount = min_t(int, maxcount-16, bytes_left); - page = page_address(*(resp->rqstp->rq_next_page++)); + readdir->xdr = xdr; + readdir->rd_maxcount = maxcount; readdir->common.err = 0; - readdir->buflen = maxcount; - readdir->buffer = page; - readdir->offset = NULL; + readdir->cookie_offset = 0; offset = readdir->rd_cookie; nfserr = nfsd_readdir(readdir->rd_rqstp, readdir->rd_fhp, @@ -3258,33 +3273,31 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 &readdir->common, nfsd4_encode_dirent); if (nfserr == nfs_ok && readdir->common.err == nfserr_toosmall && - readdir->buffer == page) - nfserr = nfserr_toosmall; + xdr->buf->len == starting_len + 8) { + /* nothing encoded; which limit did we hit?: */ + if (maxcount - 16 < bytes_left) + /* It was the fault of rd_maxcount: */ + nfserr = nfserr_toosmall; + else + /* We ran out of buffer space: */ + nfserr = nfserr_resource; + } if (nfserr) goto err_no_verf; - if (readdir->offset) - xdr_encode_hyper(readdir->offset, offset); + if (readdir->cookie_offset) { + wire_offset = cpu_to_be64(offset); + write_bytes_to_xdr_buf(xdr->buf, readdir->cookie_offset, + &wire_offset, 8); + } - p = readdir->buffer; + p = xdr_reserve_space(xdr, 8); + if (!p) { + WARN_ON_ONCE(1); + goto err_no_verf; + } *p++ = 0; /* no more entries */ *p++ = htonl(readdir->common.err == nfserr_eof); - resp->xdr.buf->page_len = ((char *)p) - - (char*)page_address(*(resp->rqstp->rq_next_page-1)); - xdr->buf->len += xdr->buf->page_len; - - xdr->iov = xdr->buf->tail; - - xdr->page_ptr++; - xdr->buf->buflen -= PAGE_SIZE; - xdr->iov = xdr->buf->tail; - - /* Use rest of head for padding and remaining ops: */ - resp->xdr.buf->tail[0].iov_base = tailbase; - resp->xdr.buf->tail[0].iov_len = 0; - resp->xdr.p = resp->xdr.buf->tail[0].iov_base; - resp->xdr.end = resp->xdr.p + - (PAGE_SIZE - resp->xdr.buf->head[0].iov_len)/4; return 0; err_no_verf: diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 41e522993d94..18cbb6d9c8a9 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -287,9 +287,8 @@ struct nfsd4_readdir { struct svc_fh * rd_fhp; /* response */ struct readdir_cd common; - __be32 * buffer; - int buflen; - __be32 * offset; + struct xdr_stream *xdr; + int cookie_offset; }; struct nfsd4_release_lockowner { From 3b299709091befc0e02aa33d55ddd5baef006853 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 20 Mar 2014 21:20:26 -0400 Subject: [PATCH 057/100] nfsd4: enforce rd_dircount As long as we're here, let's enforce the protocol's limit on the number of directory entries to return in a readdir. I don't think anyone's ever noticed our lack of enforcement, but maybe there's more of a chance they will now that we allow larger readdirs. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 4d79e5366a82..3f2a52ccb9d1 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1033,7 +1033,7 @@ nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, struct nfsd4_readdir *read READ_BUF(24); READ64(readdir->rd_cookie); COPYMEM(readdir->rd_verf.data, sizeof(readdir->rd_verf.data)); - READ32(readdir->rd_dircount); /* just in case you needed a useless field... */ + READ32(readdir->rd_dircount); READ32(readdir->rd_maxcount); if ((status = nfsd4_decode_bitmap(argp, readdir->rd_bmval))) goto out; @@ -2720,6 +2720,9 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, if (entry_bytes > cd->rd_maxcount) goto fail; cd->rd_maxcount -= entry_bytes; + if (!cd->rd_dircount) + goto fail; + cd->rd_dircount--; cd->cookie_offset = cookie_offset; skip_entry: cd->common.err = nfs_ok; From 476a7b1f4b2c9c38255653fa55157565be8b14be Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 20 Jan 2014 17:08:27 -0500 Subject: [PATCH 058/100] nfsd4: don't treat readlink like a zero-copy operation There's no advantage to this zero-copy-style readlink encoding, and it unnecessarily limits the kinds of compounds we can handle. (In practice I can't see why a client would want e.g. multiple readlink calls in a comound, but it's probably a spec violation for us not to handle it.) Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 42 ++++++++++++------------------------------ 1 file changed, 12 insertions(+), 30 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 3f2a52ccb9d1..cda6226bda91 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3160,8 +3160,9 @@ static __be32 nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readlink *readlink) { int maxcount; + __be32 wire_count; + int zero = 0; struct xdr_stream *xdr = &resp->xdr; - char *page; int length_offset = xdr->buf->len; __be32 *p; @@ -3171,26 +3172,19 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd p = xdr_reserve_space(xdr, 4); if (!p) return nfserr_resource; - - if (resp->xdr.buf->page_len) - return nfserr_resource; - if (!*resp->rqstp->rq_next_page) - return nfserr_resource; - - page = page_address(*(resp->rqstp->rq_next_page++)); - maxcount = PAGE_SIZE; - if (xdr->end - xdr->p < 1) + p = xdr_reserve_space(xdr, maxcount); + if (!p) return nfserr_resource; - /* * XXX: By default, the ->readlink() VFS op will truncate symlinks * if they would overflow the buffer. Is this kosher in NFSv4? If * not, one easy fix is: if ->readlink() precisely fills the buffer, * assume that truncation occurred, and return NFS4ERR_RESOURCE. */ - nfserr = nfsd_readlink(readlink->rl_rqstp, readlink->rl_fhp, page, &maxcount); + nfserr = nfsd_readlink(readlink->rl_rqstp, readlink->rl_fhp, + (char *)p, &maxcount); if (nfserr == nfserr_isdir) nfserr = nfserr_inval; if (nfserr) { @@ -3198,24 +3192,12 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd return nfserr; } - WRITE32(maxcount); - resp->xdr.buf->head[0].iov_len = (char *)p - - (char *)resp->xdr.buf->head[0].iov_base; - resp->xdr.buf->page_len = maxcount; - xdr->buf->len += maxcount; - xdr->page_ptr += 1; - xdr->buf->buflen -= PAGE_SIZE; - xdr->iov = xdr->buf->tail; - - /* Use rest of head for padding and remaining ops: */ - resp->xdr.buf->tail[0].iov_base = p; - resp->xdr.buf->tail[0].iov_len = 0; - if (maxcount&3) { - p = xdr_reserve_space(xdr, 4); - WRITE32(0); - resp->xdr.buf->tail[0].iov_base += maxcount&3; - resp->xdr.buf->tail[0].iov_len = 4 - (maxcount&3); - } + wire_count = htonl(maxcount); + write_bytes_to_xdr_buf(xdr->buf, length_offset, &wire_count, 4); + xdr_truncate_encode(xdr, length_offset + 4 + maxcount); + if (maxcount & 3) + write_bytes_to_xdr_buf(xdr->buf, length_offset + 4 + maxcount, + &zero, 4 - (maxcount&3)); return 0; } From b86cef60dafcbdf5a19adfa990c2c1672222e677 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sun, 23 Mar 2014 12:01:48 -0400 Subject: [PATCH 059/100] nfsd4: better estimate of getattr response size We plan to use this estimate to decide whether or not to allow zero-copy reads. Currently we're assuming all getattr's are a page, which can be both too small (ACLs e.g. may be arbitrarily long) and too large (after an upcoming read patch this will unnecessarily prevent zero copy reads in any read compound also containing a getattr). Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index d95d9015e50a..7507effc7c42 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1465,6 +1465,49 @@ static inline u32 nfsd4_create_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op + nfs4_fattr_bitmap_maxsz) * sizeof(__be32); } +/* + * Note since this is an idempotent operation we won't insist on failing + * the op prematurely if the estimate is too large. We may turn off splice + * reads unnecessarily. + */ +static inline u32 nfsd4_getattr_rsize(struct svc_rqst *rqstp, + struct nfsd4_op *op) +{ + u32 *bmap = op->u.getattr.ga_bmval; + u32 bmap0 = bmap[0], bmap1 = bmap[1], bmap2 = bmap[2]; + u32 ret = 0; + + if (bmap0 & FATTR4_WORD0_ACL) + return svc_max_payload(rqstp); + if (bmap0 & FATTR4_WORD0_FS_LOCATIONS) + return svc_max_payload(rqstp); + + if (bmap1 & FATTR4_WORD1_OWNER) { + ret += IDMAP_NAMESZ + 4; + bmap1 &= ~FATTR4_WORD1_OWNER; + } + if (bmap1 & FATTR4_WORD1_OWNER_GROUP) { + ret += IDMAP_NAMESZ + 4; + bmap1 &= ~FATTR4_WORD1_OWNER_GROUP; + } + if (bmap0 & FATTR4_WORD0_FILEHANDLE) { + ret += NFS4_FHSIZE + 4; + bmap0 &= ~FATTR4_WORD0_FILEHANDLE; + } + if (bmap2 & FATTR4_WORD2_SECURITY_LABEL) { + ret += NFSD4_MAX_SEC_LABEL_LEN + 12; + bmap2 &= ~FATTR4_WORD2_SECURITY_LABEL; + } + /* + * Largest of remaining attributes are 16 bytes (e.g., + * supported_attributes) + */ + ret += 16 * (hweight32(bmap0) + hweight32(bmap1) + hweight32(bmap2)); + /* bitmask, length */ + ret += 20; + return ret; +} + static inline u32 nfsd4_link_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { return (op_encode_hdr_size + op_encode_change_info_maxsz) @@ -1603,6 +1646,7 @@ static struct nfsd4_operation nfsd4_ops[] = { [OP_GETATTR] = { .op_func = (nfsd4op_func)nfsd4_getattr, .op_flags = ALLOWED_ON_ABSENT_FS, + .op_rsize_bop = nfsd4_getattr_rsize, .op_name = "OP_GETATTR", }, [OP_GETFH] = { From ccae70a9ee415a89b70e97a36886ab55191ebaea Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sun, 23 Mar 2014 09:34:22 -0700 Subject: [PATCH 060/100] nfsd4: estimate sequence response size Otherwise a following patch would turn off all 4.1 zero-copy reads. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 7507effc7c42..2d786b813c7e 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1565,6 +1565,12 @@ static inline u32 nfsd4_rename_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op + op_encode_change_info_maxsz) * sizeof(__be32); } +static inline u32 nfsd4_sequence_rsize(struct svc_rqst *rqstp, + struct nfsd4_op *op) +{ + return NFS4_MAX_SESSIONID_LEN + 20; +} + static inline u32 nfsd4_setattr_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { return (op_encode_hdr_size + nfs4_fattr_bitmap_maxsz) * sizeof(__be32); From b0e35fda827e72cf4b065b52c4c472c28c004fca Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 4 Feb 2014 10:36:59 -0500 Subject: [PATCH 061/100] nfsd4: turn off zero-copy-read in exotic cases We currently allow only one read per compound, with operations before and after whose responses will require no more than about a page to encode. While we don't expect clients to violate those limits any time soon, this limitation isn't really condoned by the spec, so to future proof the server we should lift the limitation. At the same time we'd like to continue to support zero-copy reads. Supporting multiple zero-copy-reads per compound would require a new data structure to replace struct xdr_buf, which can represent only one set of included pages. So for now we plan to modify encode_read() to support either zero-copy or non-zero-copy reads, and use some heuristics at the start of the compound processing to decide whether a zero-copy read will work. This will allow us to support more exotic compounds without introducing a performance regression in the normal case. Later patches handle those "exotic compounds", this one just makes sure zero-copy is turned off in those cases. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index cda6226bda91..f6a5cb722697 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1612,6 +1612,8 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) struct nfsd4_op *op; bool cachethis = false; int max_reply = 2 * RPC_MAX_AUTH_SIZE + 8; /* opcnt, status */ + int readcount = 0; + int readbytes = 0; int i; READ_BUF(4); @@ -1658,7 +1660,11 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) */ cachethis |= nfsd4_cache_this_op(op); - max_reply += nfsd4_max_reply(argp->rqstp, op); + if (op->opnum == OP_READ) { + readcount++; + readbytes += nfsd4_max_reply(argp->rqstp, op); + } else + max_reply += nfsd4_max_reply(argp->rqstp, op); if (op->status) { argp->opcnt = i+1; @@ -1668,9 +1674,12 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) /* Sessions make the DRC unnecessary: */ if (argp->minorversion) cachethis = false; - svc_reserve(argp->rqstp, max_reply); + svc_reserve(argp->rqstp, max_reply + readbytes); argp->rqstp->rq_cachetype = cachethis ? RC_REPLBUFF : RC_NOCACHE; + if (readcount > 1 || max_reply > PAGE_SIZE - 2*RPC_MAX_AUTH_SIZE) + argp->rqstp->rq_splice_ok = false; + DECODE_TAIL; } @@ -3078,15 +3087,19 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, return nfserr; p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */ - if (!p) + if (!p) { + WARN_ON_ONCE(resp->rqstp->rq_splice_ok); return nfserr_resource; + } /* Make sure there will be room for padding if needed: */ if (xdr->end - xdr->p < 1) return nfserr_resource; - if (resp->xdr.buf->page_len) + if (resp->xdr.buf->page_len) { + WARN_ON_ONCE(resp->rqstp->rq_splice_ok); return nfserr_resource; + } maxcount = svc_max_payload(resp->rqstp); if (maxcount > read->rd_length) From 02fe4707740e7c8a3d0ec34ffbf4630d7d41ca68 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 20 Mar 2014 21:32:04 -0400 Subject: [PATCH 062/100] nfsd4: nfsd_vfs_read doesn't use file handle parameter Signed-off-by: J. Bruce Fields --- fs/nfsd/vfs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 3aa38523b6d7..f3a0f6cc4298 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -821,7 +821,7 @@ static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe, } static __be32 -nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, +nfsd_vfs_read(struct svc_rqst *rqstp, struct file *file, loff_t offset, struct kvec *vec, int vlen, unsigned long *count) { mm_segment_t oldfs; @@ -981,7 +981,7 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, if (ra && ra->p_set) file->f_ra = ra->p_ra; - err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count); + err = nfsd_vfs_read(rqstp, file, offset, vec, vlen, count); /* Write back readahead params */ if (ra) { @@ -1010,7 +1010,7 @@ nfsd_read_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, NFSD_MAY_READ|NFSD_MAY_OWNER_OVERRIDE); if (err) goto out; - err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count); + err = nfsd_vfs_read(rqstp, file, offset, vec, vlen, count); } else /* Note file may still be NULL in NFSv4 special stateid case: */ err = nfsd_read(rqstp, fhp, offset, vec, vlen, count); out: From dc97618ddda9a23e5211e800f0614e9612178200 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 18 Mar 2014 17:01:51 -0400 Subject: [PATCH 063/100] nfsd4: separate splice and readv cases The splice and readv cases are actually quite different--for example the former case ignores the array of vectors we build up for the latter. It is probably clearer to separate the two cases entirely. There's some code duplication between the split out encoders, but this is only temporary and will be fixed by a later patch. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 173 +++++++++++++++++++++++++++++++++++----------- fs/nfsd/vfs.c | 121 ++++++++++++++++++++------------ fs/nfsd/vfs.h | 8 +++ 3 files changed, 214 insertions(+), 88 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index f6a5cb722697..bad762b6cf10 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3069,41 +3069,84 @@ nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struc return nfserr; } -static __be32 -nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_read *read) +static __be32 nfsd4_encode_splice_read( + struct nfsd4_compoundres *resp, + struct nfsd4_read *read, + struct file *file, unsigned long maxcount) { - u32 eof; - int v; - struct page *page; - unsigned long maxcount; struct xdr_stream *xdr = &resp->xdr; - int starting_len = xdr->buf->len; + u32 eof; + int starting_len = xdr->buf->len - 8; int space_left; - long len; + __be32 nfserr; + __be32 tmp; __be32 *p; - if (nfserr) - return nfserr; - - p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */ - if (!p) { - WARN_ON_ONCE(resp->rqstp->rq_splice_ok); + /* + * Don't inline pages unless we know there's room for eof, + * count, and possible padding: + */ + if (xdr->end - xdr->p < 3) return nfserr_resource; + + nfserr = nfsd_splice_read(read->rd_rqstp, file, + read->rd_offset, &maxcount); + if (nfserr) { + /* + * nfsd_splice_actor may have already messed with the + * page length; reset it so as not to confuse + * xdr_truncate_encode: + */ + xdr->buf->page_len = 0; + return nfserr; } - /* Make sure there will be room for padding if needed: */ - if (xdr->end - xdr->p < 1) - return nfserr_resource; + eof = (read->rd_offset + maxcount >= + read->rd_fhp->fh_dentry->d_inode->i_size); - if (resp->xdr.buf->page_len) { - WARN_ON_ONCE(resp->rqstp->rq_splice_ok); - return nfserr_resource; + tmp = htonl(eof); + write_bytes_to_xdr_buf(xdr->buf, starting_len , &tmp, 4); + tmp = htonl(maxcount); + write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4); + + resp->xdr.buf->page_len = maxcount; + xdr->buf->len += maxcount; + xdr->page_ptr += (maxcount + PAGE_SIZE - 1) / PAGE_SIZE; + xdr->iov = xdr->buf->tail; + + /* Use rest of head for padding and remaining ops: */ + resp->xdr.buf->tail[0].iov_base = xdr->p; + resp->xdr.buf->tail[0].iov_len = 0; + if (maxcount&3) { + p = xdr_reserve_space(xdr, 4); + WRITE32(0); + resp->xdr.buf->tail[0].iov_base += maxcount&3; + resp->xdr.buf->tail[0].iov_len = 4 - (maxcount&3); + xdr->buf->len -= (maxcount&3); } - maxcount = svc_max_payload(resp->rqstp); - if (maxcount > read->rd_length) - maxcount = read->rd_length; + space_left = min_t(int, (void *)xdr->end - (void *)xdr->p, + xdr->buf->buflen - xdr->buf->len); + xdr->buf->buflen = xdr->buf->len + space_left; + xdr->end = (__be32 *)((void *)xdr->end + space_left); + + return 0; +} + +static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp, + struct nfsd4_read *read, + struct file *file, unsigned long maxcount) +{ + struct xdr_stream *xdr = &resp->xdr; + u32 eof; + int v; + struct page *page; + int starting_len = xdr->buf->len - 8; + int space_left; + long len; + __be32 nfserr; + __be32 tmp; + __be32 *p; len = maxcount; v = 0; @@ -3124,34 +3167,26 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, } read->rd_vlen = v; - nfserr = nfsd_read_file(read->rd_rqstp, read->rd_fhp, read->rd_filp, - read->rd_offset, resp->rqstp->rq_vec, read->rd_vlen, - &maxcount); - - if (nfserr) { - /* - * nfsd_splice_actor may have already messed with the - * page length; reset it so as not to confuse - * xdr_truncate_encode: - */ - xdr->buf->page_len = 0; - xdr_truncate_encode(xdr, starting_len); + nfserr = nfsd_readv(file, read->rd_offset, resp->rqstp->rq_vec, + read->rd_vlen, &maxcount); + if (nfserr) return nfserr; - } + eof = (read->rd_offset + maxcount >= read->rd_fhp->fh_dentry->d_inode->i_size); - WRITE32(eof); - WRITE32(maxcount); - WARN_ON_ONCE(resp->xdr.buf->head[0].iov_len != (char *)p - - (char *)resp->xdr.buf->head[0].iov_base); + tmp = htonl(eof); + write_bytes_to_xdr_buf(xdr->buf, starting_len , &tmp, 4); + tmp = htonl(maxcount); + write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4); + resp->xdr.buf->page_len = maxcount; xdr->buf->len += maxcount; xdr->page_ptr += v; xdr->iov = xdr->buf->tail; /* Use rest of head for padding and remaining ops: */ - resp->xdr.buf->tail[0].iov_base = p; + resp->xdr.buf->tail[0].iov_base = xdr->p; resp->xdr.buf->tail[0].iov_len = 0; if (maxcount&3) { p = xdr_reserve_space(xdr, 4); @@ -3167,6 +3202,60 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, xdr->end = (__be32 *)((void *)xdr->end + space_left); return 0; + +} + +static __be32 +nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, + struct nfsd4_read *read) +{ + unsigned long maxcount; + struct xdr_stream *xdr = &resp->xdr; + struct file *file = read->rd_filp; + int starting_len = xdr->buf->len; + struct raparms *ra; + __be32 *p; + __be32 err; + + if (nfserr) + return nfserr; + + p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */ + if (!p) { + WARN_ON_ONCE(resp->rqstp->rq_splice_ok); + return nfserr_resource; + } + + if (resp->xdr.buf->page_len) { + WARN_ON_ONCE(resp->rqstp->rq_splice_ok); + return nfserr_resource; + } + + xdr_commit_encode(xdr); + + maxcount = svc_max_payload(resp->rqstp); + if (maxcount > read->rd_length) + maxcount = read->rd_length; + + if (!read->rd_filp) { + err = nfsd_get_tmp_read_open(resp->rqstp, read->rd_fhp, + &file, &ra); + if (err) + goto err_truncate; + } + + if (file->f_op->splice_read && resp->rqstp->rq_splice_ok) + err = nfsd4_encode_splice_read(resp, read, file, maxcount); + else + err = nfsd4_encode_readv(resp, read, file, maxcount); + + if (!read->rd_filp) + nfsd_put_tmp_read_open(file, ra); + +err_truncate: + if (err) + xdr_truncate_encode(xdr, starting_len); + return err; } static __be32 diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index f3a0f6cc4298..b7d35a4afefb 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -820,41 +820,54 @@ static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe, return __splice_from_pipe(pipe, sd, nfsd_splice_actor); } -static __be32 -nfsd_vfs_read(struct svc_rqst *rqstp, struct file *file, - loff_t offset, struct kvec *vec, int vlen, unsigned long *count) +__be32 nfsd_finish_read(struct file *file, unsigned long *count, int host_err) { - mm_segment_t oldfs; - __be32 err; - int host_err; - - err = nfserr_perm; - - if (file->f_op->splice_read && rqstp->rq_splice_ok) { - struct splice_desc sd = { - .len = 0, - .total_len = *count, - .pos = offset, - .u.data = rqstp, - }; - - rqstp->rq_next_page = rqstp->rq_respages + 1; - host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor); - } else { - oldfs = get_fs(); - set_fs(KERNEL_DS); - host_err = vfs_readv(file, (struct iovec __user *)vec, vlen, &offset); - set_fs(oldfs); - } - if (host_err >= 0) { nfsdstats.io_read += host_err; *count = host_err; - err = 0; fsnotify_access(file); + return 0; } else - err = nfserrno(host_err); - return err; + return nfserrno(host_err); +} + +int nfsd_splice_read(struct svc_rqst *rqstp, + struct file *file, loff_t offset, unsigned long *count) +{ + struct splice_desc sd = { + .len = 0, + .total_len = *count, + .pos = offset, + .u.data = rqstp, + }; + int host_err; + + rqstp->rq_next_page = rqstp->rq_respages + 1; + host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor); + return nfsd_finish_read(file, count, host_err); +} + +int nfsd_readv(struct file *file, loff_t offset, struct kvec *vec, int vlen, + unsigned long *count) +{ + mm_segment_t oldfs; + int host_err; + + oldfs = get_fs(); + set_fs(KERNEL_DS); + host_err = vfs_readv(file, (struct iovec __user *)vec, vlen, &offset); + set_fs(oldfs); + return nfsd_finish_read(file, count, host_err); +} + +static __be32 +nfsd_vfs_read(struct svc_rqst *rqstp, struct file *file, + loff_t offset, struct kvec *vec, int vlen, unsigned long *count) +{ + if (file->f_op->splice_read && rqstp->rq_splice_ok) + return nfsd_splice_read(rqstp, file, offset, count); + else + return nfsd_readv(file, offset, vec, vlen, count); } /* @@ -956,33 +969,28 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, return err; } -/* - * Read data from a file. count must contain the requested read count - * on entry. On return, *count contains the number of bytes actually read. - * N.B. After this call fhp needs an fh_put - */ -__be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, - loff_t offset, struct kvec *vec, int vlen, unsigned long *count) +__be32 nfsd_get_tmp_read_open(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct file **file, struct raparms **ra) { - struct file *file; struct inode *inode; - struct raparms *ra; __be32 err; - err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file); + err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, file); if (err) return err; - inode = file_inode(file); + inode = file_inode(*file); /* Get readahead parameters */ - ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino); - - if (ra && ra->p_set) - file->f_ra = ra->p_ra; + *ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino); - err = nfsd_vfs_read(rqstp, file, offset, vec, vlen, count); + if (*ra && (*ra)->p_set) + (*file)->f_ra = (*ra)->p_ra; + return nfs_ok; +} +void nfsd_put_tmp_read_open(struct file *file, struct raparms *ra) +{ /* Write back readahead params */ if (ra) { struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex]; @@ -992,8 +1000,29 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, ra->p_count--; spin_unlock(&rab->pb_lock); } - nfsd_close(file); +} + +/* + * Read data from a file. count must contain the requested read count + * on entry. On return, *count contains the number of bytes actually read. + * N.B. After this call fhp needs an fh_put + */ +__be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, + loff_t offset, struct kvec *vec, int vlen, unsigned long *count) +{ + struct file *file; + struct raparms *ra; + __be32 err; + + err = nfsd_get_tmp_read_open(rqstp, fhp, &file, &ra); + if (err) + return err; + + err = nfsd_vfs_read(rqstp, file, offset, vec, vlen, count); + + nfsd_put_tmp_read_open(file, ra); + return err; } diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index fbe90bdb2214..7441e9655eb7 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -70,6 +70,14 @@ __be32 nfsd_commit(struct svc_rqst *, struct svc_fh *, __be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t, int, struct file **); void nfsd_close(struct file *); +struct raparms; +__be32 nfsd_get_tmp_read_open(struct svc_rqst *, struct svc_fh *, + struct file **, struct raparms **); +void nfsd_put_tmp_read_open(struct file *, struct raparms *); +int nfsd_splice_read(struct svc_rqst *, + struct file *, loff_t, unsigned long *); +int nfsd_readv(struct file *, loff_t, struct kvec *, int, + unsigned long *); __be32 nfsd_read(struct svc_rqst *, struct svc_fh *, loff_t, struct kvec *, int, unsigned long *); __be32 nfsd_read_file(struct svc_rqst *, struct svc_fh *, struct file *, From 34a78b488f144e011493fa51f10c01d034d47c8e Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 13 May 2014 16:32:04 -0400 Subject: [PATCH 064/100] nfsd4: read encoding cleanup Trivial cleanup, no change in functionality. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index bad762b6cf10..addb93bf03b7 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3075,6 +3075,7 @@ static __be32 nfsd4_encode_splice_read( struct file *file, unsigned long maxcount) { struct xdr_stream *xdr = &resp->xdr; + struct xdr_buf *buf = xdr->buf; u32 eof; int starting_len = xdr->buf->len - 8; int space_left; @@ -3097,7 +3098,7 @@ static __be32 nfsd4_encode_splice_read( * page length; reset it so as not to confuse * xdr_truncate_encode: */ - xdr->buf->page_len = 0; + buf->page_len = 0; return nfserr; } @@ -3105,29 +3106,29 @@ static __be32 nfsd4_encode_splice_read( read->rd_fhp->fh_dentry->d_inode->i_size); tmp = htonl(eof); - write_bytes_to_xdr_buf(xdr->buf, starting_len , &tmp, 4); + write_bytes_to_xdr_buf(buf, starting_len , &tmp, 4); tmp = htonl(maxcount); - write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4); + write_bytes_to_xdr_buf(buf, starting_len + 4, &tmp, 4); - resp->xdr.buf->page_len = maxcount; - xdr->buf->len += maxcount; + buf->page_len = maxcount; + buf->len += maxcount; xdr->page_ptr += (maxcount + PAGE_SIZE - 1) / PAGE_SIZE; - xdr->iov = xdr->buf->tail; + xdr->iov = buf->tail; /* Use rest of head for padding and remaining ops: */ - resp->xdr.buf->tail[0].iov_base = xdr->p; - resp->xdr.buf->tail[0].iov_len = 0; + buf->tail[0].iov_base = xdr->p; + buf->tail[0].iov_len = 0; if (maxcount&3) { p = xdr_reserve_space(xdr, 4); WRITE32(0); - resp->xdr.buf->tail[0].iov_base += maxcount&3; - resp->xdr.buf->tail[0].iov_len = 4 - (maxcount&3); - xdr->buf->len -= (maxcount&3); + buf->tail[0].iov_base += maxcount&3; + buf->tail[0].iov_len = 4 - (maxcount&3); + buf->len -= (maxcount&3); } space_left = min_t(int, (void *)xdr->end - (void *)xdr->p, - xdr->buf->buflen - xdr->buf->len); - xdr->buf->buflen = xdr->buf->len + space_left; + buf->buflen - buf->len); + buf->buflen = buf->len + space_left; xdr->end = (__be32 *)((void *)xdr->end + space_left); return 0; From fec25fa4ad728dd9b063313f2a61ff65eae0d571 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 13 May 2014 17:27:03 -0400 Subject: [PATCH 065/100] nfsd4: more read encoding cleanup More cleanup, no change in functionality. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index addb93bf03b7..92f071b44f75 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3077,11 +3077,9 @@ static __be32 nfsd4_encode_splice_read( struct xdr_stream *xdr = &resp->xdr; struct xdr_buf *buf = xdr->buf; u32 eof; - int starting_len = xdr->buf->len - 8; int space_left; __be32 nfserr; - __be32 tmp; - __be32 *p; + __be32 *p = xdr->p - 2; /* * Don't inline pages unless we know there's room for eof, @@ -3105,25 +3103,25 @@ static __be32 nfsd4_encode_splice_read( eof = (read->rd_offset + maxcount >= read->rd_fhp->fh_dentry->d_inode->i_size); - tmp = htonl(eof); - write_bytes_to_xdr_buf(buf, starting_len , &tmp, 4); - tmp = htonl(maxcount); - write_bytes_to_xdr_buf(buf, starting_len + 4, &tmp, 4); + *(p++) = htonl(eof); + *(p++) = htonl(maxcount); buf->page_len = maxcount; buf->len += maxcount; xdr->page_ptr += (maxcount + PAGE_SIZE - 1) / PAGE_SIZE; - xdr->iov = buf->tail; /* Use rest of head for padding and remaining ops: */ buf->tail[0].iov_base = xdr->p; buf->tail[0].iov_len = 0; + xdr->iov = buf->tail; if (maxcount&3) { - p = xdr_reserve_space(xdr, 4); - WRITE32(0); + int pad = 4 - (maxcount&3); + + *(xdr->p++) = 0; + buf->tail[0].iov_base += maxcount&3; - buf->tail[0].iov_len = 4 - (maxcount&3); - buf->len -= (maxcount&3); + buf->tail[0].iov_len = pad; + buf->len += pad; } space_left = min_t(int, (void *)xdr->end - (void *)xdr->p, From b042098063849794d69b5322fcc6cf9fb5f2586e Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 18 Mar 2014 17:44:10 -0400 Subject: [PATCH 066/100] nfsd4: allow exotic read compounds I'm not sure why a client would want to stuff multiple reads in a single compound rpc, but it's legal for them to do it, and we should really support it. Signed-off-by: J. Bruce Fields --- .../filesystems/nfs/nfs41-server.txt | 2 - fs/nfsd/nfs4xdr.c | 61 ++++++++----------- 2 files changed, 25 insertions(+), 38 deletions(-) diff --git a/Documentation/filesystems/nfs/nfs41-server.txt b/Documentation/filesystems/nfs/nfs41-server.txt index b930ad087780..c49cd7e796e7 100644 --- a/Documentation/filesystems/nfs/nfs41-server.txt +++ b/Documentation/filesystems/nfs/nfs41-server.txt @@ -176,7 +176,5 @@ Nonstandard compound limitations: ca_maxrequestsize request and a ca_maxresponsesize reply, so we may fail to live up to the promise we made in CREATE_SESSION fore channel negotiation. -* No more than one read-like operation allowed per compound; encoding - replies that cross page boundaries (except for read data) not handled. See also http://wiki.linux-nfs.org/wiki/index.php/Server_4.0_and_4.1_issues. diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 92f071b44f75..480f12c4e590 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3139,28 +3139,34 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp, struct xdr_stream *xdr = &resp->xdr; u32 eof; int v; - struct page *page; int starting_len = xdr->buf->len - 8; - int space_left; long len; + int thislen; __be32 nfserr; __be32 tmp; __be32 *p; + u32 zzz = 0; + int pad; len = maxcount; v = 0; - while (len) { - int thislen; - page = *(resp->rqstp->rq_next_page); - if (!page) { /* ran out of pages */ - maxcount -= len; - break; - } + thislen = (void *)xdr->end - (void *)xdr->p; + if (len < thislen) + thislen = len; + p = xdr_reserve_space(xdr, (thislen+3)&~3); + WARN_ON_ONCE(!p); + resp->rqstp->rq_vec[v].iov_base = p; + resp->rqstp->rq_vec[v].iov_len = thislen; + v++; + len -= thislen; + + while (len) { thislen = min_t(long, len, PAGE_SIZE); - resp->rqstp->rq_vec[v].iov_base = page_address(page); + p = xdr_reserve_space(xdr, (thislen+3)&~3); + WARN_ON_ONCE(!p); + resp->rqstp->rq_vec[v].iov_base = p; resp->rqstp->rq_vec[v].iov_len = thislen; - resp->rqstp->rq_next_page++; v++; len -= thislen; } @@ -3170,6 +3176,7 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp, read->rd_vlen, &maxcount); if (nfserr) return nfserr; + xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3)); eof = (read->rd_offset + maxcount >= read->rd_fhp->fh_dentry->d_inode->i_size); @@ -3179,27 +3186,9 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp, tmp = htonl(maxcount); write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4); - resp->xdr.buf->page_len = maxcount; - xdr->buf->len += maxcount; - xdr->page_ptr += v; - xdr->iov = xdr->buf->tail; - - /* Use rest of head for padding and remaining ops: */ - resp->xdr.buf->tail[0].iov_base = xdr->p; - resp->xdr.buf->tail[0].iov_len = 0; - if (maxcount&3) { - p = xdr_reserve_space(xdr, 4); - WRITE32(0); - resp->xdr.buf->tail[0].iov_base += maxcount&3; - resp->xdr.buf->tail[0].iov_len = 4 - (maxcount&3); - xdr->buf->len -= (maxcount&3); - } - - space_left = min_t(int, (void *)xdr->end - (void *)xdr->p, - xdr->buf->buflen - xdr->buf->len); - xdr->buf->buflen = xdr->buf->len + space_left; - xdr->end = (__be32 *)((void *)xdr->end + space_left); - + pad = (maxcount&3) ? 4 - (maxcount&3) : 0; + write_bytes_to_xdr_buf(xdr->buf, starting_len + 8 + maxcount, + &zzz, pad); return 0; } @@ -3224,15 +3213,15 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, WARN_ON_ONCE(resp->rqstp->rq_splice_ok); return nfserr_resource; } - - if (resp->xdr.buf->page_len) { - WARN_ON_ONCE(resp->rqstp->rq_splice_ok); + if (resp->xdr.buf->page_len && resp->rqstp->rq_splice_ok) { + WARN_ON_ONCE(1); return nfserr_resource; } - xdr_commit_encode(xdr); maxcount = svc_max_payload(resp->rqstp); + if (maxcount > xdr->buf->buflen - xdr->buf->len) + maxcount = xdr->buf->buflen - xdr->buf->len; if (maxcount > read->rd_length) maxcount = read->rd_length; From c8f13d977518e588ac89dcf8e841821569108109 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 8 May 2014 17:38:18 -0400 Subject: [PATCH 067/100] nfsd4: really fix nfs4err_resource in 4.1 case encode_getattr, for example, can return nfserr_resource to indicate it ran out of buffer space. That's not a legal error in the 4.1 case. And in the 4.1 case, if we ran out of buffer space, we should have exceeded a session limit too. (Note in 1bc49d83c37cfaf46be357757e592711e67f9809 "nfsd4: fix nfs4err_resource in 4.1 case" we originally tried fixing this error return before fixing the problem that we could error out while we still had lots of available space. The result was to trade one illegal error for another in those cases. We decided that was helpful, so reverted the change in fc208d026be0c7d60db9118583fc62f6ca97743d, and are only reinstating it now that we've elimited almost all of those cases.) Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 480f12c4e590..7f346d8ef267 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3901,6 +3901,14 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) space_needed = COMPOUND_ERR_SLACK_SPACE; op->status = nfsd4_check_resp_size(resp, space_needed); } + if (op->status == nfserr_resource && nfsd4_has_session(&resp->cstate)) { + struct nfsd4_slot *slot = resp->cstate.slot; + + if (slot->sl_flags & NFSD4_SLOT_CACHETHIS) + op->status = nfserr_rep_too_big_to_cache; + else + op->status = nfserr_rep_too_big; + } if (op->status == nfserr_resource || op->status == nfserr_rep_too_big || op->status == nfserr_rep_too_big_to_cache) { From c373b0a4289ebf1ca6fbf4614d8b457b5f1b489f Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 22 Mar 2014 17:09:18 -0400 Subject: [PATCH 068/100] nfsd4: kill WRITE32 These macros just obscure what's going on. Adopt the convention of the client-side code. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 306 +++++++++++++++++++++++----------------------- 1 file changed, 154 insertions(+), 152 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 7f346d8ef267..1dca513a1e46 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1683,7 +1683,6 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) DECODE_TAIL; } -#define WRITE32(n) *p++ = htonl(n) #define WRITE64(n) do { \ *p++ = htonl((u32)((n) >> 32)); \ *p++ = htonl((u32)(n)); \ @@ -1774,7 +1773,7 @@ static __be32 nfsd4_encode_components_esc(struct xdr_stream *xdr, char sep, p = xdr_reserve_space(xdr, strlen + 4); if (!p) return nfserr_resource; - WRITE32(strlen); + *p++ = cpu_to_be32(strlen); WRITEMEM(str, strlen); count++; } @@ -1857,7 +1856,7 @@ static __be32 nfsd4_encode_path(struct xdr_stream *xdr, p = xdr_reserve_space(xdr, 4); if (!p) goto out_free; - WRITE32(ncomponents); + *p++ = cpu_to_be32(ncomponents); while (ncomponents) { struct dentry *dentry = components[ncomponents - 1]; @@ -1870,7 +1869,7 @@ static __be32 nfsd4_encode_path(struct xdr_stream *xdr, spin_unlock(&dentry->d_lock); goto out_free; } - WRITE32(len); + *p++ = cpu_to_be32(len); WRITEMEM(dentry->d_name.name, len); dprintk("/%s", dentry->d_name.name); spin_unlock(&dentry->d_lock); @@ -1919,7 +1918,7 @@ static __be32 nfsd4_encode_fs_locations(struct xdr_stream *xdr, p = xdr_reserve_space(xdr, 4); if (!p) return nfserr_resource; - WRITE32(fslocs->locations_count); + *p++ = cpu_to_be32(fslocs->locations_count); for (i=0; ilocations_count; i++) { status = nfsd4_encode_fs_location4(xdr, &fslocs->locations[i]); if (status) @@ -1973,8 +1972,8 @@ nfsd4_encode_security_label(struct xdr_stream *xdr, struct svc_rqst *rqstp, * For now we use a 0 here to indicate the null translation; in * the future we may place a call to translation code here. */ - WRITE32(0); /* lfs */ - WRITE32(0); /* pi */ + *p++ = cpu_to_be32(0); /* lfs */ + *p++ = cpu_to_be32(0); /* pi */ p = xdr_encode_opaque(p, context, len); return 0; } @@ -2123,23 +2122,23 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, p = xdr_reserve_space(xdr, 16); if (!p) goto out_resource; - WRITE32(3); - WRITE32(bmval0); - WRITE32(bmval1); - WRITE32(bmval2); + *p++ = cpu_to_be32(3); + *p++ = cpu_to_be32(bmval0); + *p++ = cpu_to_be32(bmval1); + *p++ = cpu_to_be32(bmval2); } else if (bmval1) { p = xdr_reserve_space(xdr, 12); if (!p) goto out_resource; - WRITE32(2); - WRITE32(bmval0); - WRITE32(bmval1); + *p++ = cpu_to_be32(2); + *p++ = cpu_to_be32(bmval0); + *p++ = cpu_to_be32(bmval1); } else { p = xdr_reserve_space(xdr, 8); if (!p) goto out_resource; - WRITE32(1); - WRITE32(bmval0); + *p++ = cpu_to_be32(1); + *p++ = cpu_to_be32(bmval0); } attrlen_offset = xdr->buf->len; @@ -2161,17 +2160,17 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, p = xdr_reserve_space(xdr, 12); if (!p) goto out_resource; - WRITE32(2); - WRITE32(word0); - WRITE32(word1); + *p++ = cpu_to_be32(2); + *p++ = cpu_to_be32(word0); + *p++ = cpu_to_be32(word1); } else { p = xdr_reserve_space(xdr, 16); if (!p) goto out_resource; - WRITE32(3); - WRITE32(word0); - WRITE32(word1); - WRITE32(word2); + *p++ = cpu_to_be32(3); + *p++ = cpu_to_be32(word0); + *p++ = cpu_to_be32(word1); + *p++ = cpu_to_be32(word2); } } if (bmval0 & FATTR4_WORD0_TYPE) { @@ -2183,16 +2182,17 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, status = nfserr_serverfault; goto out; } - WRITE32(dummy); + *p++ = cpu_to_be32(dummy); } if (bmval0 & FATTR4_WORD0_FH_EXPIRE_TYPE) { p = xdr_reserve_space(xdr, 4); if (!p) goto out_resource; if (exp->ex_flags & NFSEXP_NOSUBTREECHECK) - WRITE32(NFS4_FH_PERSISTENT); + *p++ = cpu_to_be32(NFS4_FH_PERSISTENT); else - WRITE32(NFS4_FH_PERSISTENT|NFS4_FH_VOL_RENAME); + *p++ = cpu_to_be32(NFS4_FH_PERSISTENT| + NFS4_FH_VOL_RENAME); } if (bmval0 & FATTR4_WORD0_CHANGE) { p = xdr_reserve_space(xdr, 8); @@ -2210,19 +2210,19 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, p = xdr_reserve_space(xdr, 4); if (!p) goto out_resource; - WRITE32(1); + *p++ = cpu_to_be32(1); } if (bmval0 & FATTR4_WORD0_SYMLINK_SUPPORT) { p = xdr_reserve_space(xdr, 4); if (!p) goto out_resource; - WRITE32(1); + *p++ = cpu_to_be32(1); } if (bmval0 & FATTR4_WORD0_NAMED_ATTR) { p = xdr_reserve_space(xdr, 4); if (!p) goto out_resource; - WRITE32(0); + *p++ = cpu_to_be32(0); } if (bmval0 & FATTR4_WORD0_FSID) { p = xdr_reserve_space(xdr, 16); @@ -2237,10 +2237,10 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, WRITE64((u64)0); break; case FSIDSOURCE_DEV: - WRITE32(0); - WRITE32(MAJOR(stat.dev)); - WRITE32(0); - WRITE32(MINOR(stat.dev)); + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(MAJOR(stat.dev)); + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(MINOR(stat.dev)); break; case FSIDSOURCE_UUID: WRITEMEM(exp->ex_uuid, 16); @@ -2251,19 +2251,19 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, p = xdr_reserve_space(xdr, 4); if (!p) goto out_resource; - WRITE32(0); + *p++ = cpu_to_be32(0); } if (bmval0 & FATTR4_WORD0_LEASE_TIME) { p = xdr_reserve_space(xdr, 4); if (!p) goto out_resource; - WRITE32(nn->nfsd4_lease); + *p++ = cpu_to_be32(nn->nfsd4_lease); } if (bmval0 & FATTR4_WORD0_RDATTR_ERROR) { p = xdr_reserve_space(xdr, 4); if (!p) goto out_resource; - WRITE32(rdattr_err); + *p++ = cpu_to_be32(rdattr_err); } if (bmval0 & FATTR4_WORD0_ACL) { struct nfs4_ace *ace; @@ -2273,21 +2273,22 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, if (!p) goto out_resource; - WRITE32(0); + *p++ = cpu_to_be32(0); goto out_acl; } p = xdr_reserve_space(xdr, 4); if (!p) goto out_resource; - WRITE32(acl->naces); + *p++ = cpu_to_be32(acl->naces); for (ace = acl->aces; ace < acl->aces + acl->naces; ace++) { p = xdr_reserve_space(xdr, 4*3); if (!p) goto out_resource; - WRITE32(ace->type); - WRITE32(ace->flag); - WRITE32(ace->access_mask & NFS4_ACE_MASK_ALL); + *p++ = cpu_to_be32(ace->type); + *p++ = cpu_to_be32(ace->flag); + *p++ = cpu_to_be32(ace->access_mask & + NFS4_ACE_MASK_ALL); status = nfsd4_encode_aclname(xdr, rqstp, ace); if (status) goto out; @@ -2298,38 +2299,38 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, p = xdr_reserve_space(xdr, 4); if (!p) goto out_resource; - WRITE32(aclsupport ? + *p++ = cpu_to_be32(aclsupport ? ACL4_SUPPORT_ALLOW_ACL|ACL4_SUPPORT_DENY_ACL : 0); } if (bmval0 & FATTR4_WORD0_CANSETTIME) { p = xdr_reserve_space(xdr, 4); if (!p) goto out_resource; - WRITE32(1); + *p++ = cpu_to_be32(1); } if (bmval0 & FATTR4_WORD0_CASE_INSENSITIVE) { p = xdr_reserve_space(xdr, 4); if (!p) goto out_resource; - WRITE32(0); + *p++ = cpu_to_be32(0); } if (bmval0 & FATTR4_WORD0_CASE_PRESERVING) { p = xdr_reserve_space(xdr, 4); if (!p) goto out_resource; - WRITE32(1); + *p++ = cpu_to_be32(1); } if (bmval0 & FATTR4_WORD0_CHOWN_RESTRICTED) { p = xdr_reserve_space(xdr, 4); if (!p) goto out_resource; - WRITE32(1); + *p++ = cpu_to_be32(1); } if (bmval0 & FATTR4_WORD0_FILEHANDLE) { p = xdr_reserve_space(xdr, fhp->fh_handle.fh_size + 4); if (!p) goto out_resource; - WRITE32(fhp->fh_handle.fh_size); + *p++ = cpu_to_be32(fhp->fh_handle.fh_size); WRITEMEM(&fhp->fh_handle.fh_base, fhp->fh_handle.fh_size); } if (bmval0 & FATTR4_WORD0_FILEID) { @@ -2365,7 +2366,7 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, p = xdr_reserve_space(xdr, 4); if (!p) goto out_resource; - WRITE32(1); + *p++ = cpu_to_be32(1); } if (bmval0 & FATTR4_WORD0_MAXFILESIZE) { p = xdr_reserve_space(xdr, 8); @@ -2377,13 +2378,13 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, p = xdr_reserve_space(xdr, 4); if (!p) goto out_resource; - WRITE32(255); + *p++ = cpu_to_be32(255); } if (bmval0 & FATTR4_WORD0_MAXNAME) { p = xdr_reserve_space(xdr, 4); if (!p) goto out_resource; - WRITE32(statfs.f_namelen); + *p++ = cpu_to_be32(statfs.f_namelen); } if (bmval0 & FATTR4_WORD0_MAXREAD) { p = xdr_reserve_space(xdr, 8); @@ -2401,19 +2402,19 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, p = xdr_reserve_space(xdr, 4); if (!p) goto out_resource; - WRITE32(stat.mode & S_IALLUGO); + *p++ = cpu_to_be32(stat.mode & S_IALLUGO); } if (bmval1 & FATTR4_WORD1_NO_TRUNC) { p = xdr_reserve_space(xdr, 4); if (!p) goto out_resource; - WRITE32(1); + *p++ = cpu_to_be32(1); } if (bmval1 & FATTR4_WORD1_NUMLINKS) { p = xdr_reserve_space(xdr, 4); if (!p) goto out_resource; - WRITE32(stat.nlink); + *p++ = cpu_to_be32(stat.nlink); } if (bmval1 & FATTR4_WORD1_OWNER) { status = nfsd4_encode_user(xdr, rqstp, stat.uid); @@ -2429,8 +2430,8 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, p = xdr_reserve_space(xdr, 8); if (!p) goto out_resource; - WRITE32((u32) MAJOR(stat.rdev)); - WRITE32((u32) MINOR(stat.rdev)); + *p++ = cpu_to_be32((u32) MAJOR(stat.rdev)); + *p++ = cpu_to_be32((u32) MINOR(stat.rdev)); } if (bmval1 & FATTR4_WORD1_SPACE_AVAIL) { p = xdr_reserve_space(xdr, 8); @@ -2465,29 +2466,29 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, if (!p) goto out_resource; WRITE64((s64)stat.atime.tv_sec); - WRITE32(stat.atime.tv_nsec); + *p++ = cpu_to_be32(stat.atime.tv_nsec); } if (bmval1 & FATTR4_WORD1_TIME_DELTA) { p = xdr_reserve_space(xdr, 12); if (!p) goto out_resource; - WRITE32(0); - WRITE32(1); - WRITE32(0); + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(1); + *p++ = cpu_to_be32(0); } if (bmval1 & FATTR4_WORD1_TIME_METADATA) { p = xdr_reserve_space(xdr, 12); if (!p) goto out_resource; WRITE64((s64)stat.ctime.tv_sec); - WRITE32(stat.ctime.tv_nsec); + *p++ = cpu_to_be32(stat.ctime.tv_nsec); } if (bmval1 & FATTR4_WORD1_TIME_MODIFY) { p = xdr_reserve_space(xdr, 12); if (!p) goto out_resource; WRITE64((s64)stat.mtime.tv_sec); - WRITE32(stat.mtime.tv_nsec); + *p++ = cpu_to_be32(stat.mtime.tv_nsec); } if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) { p = xdr_reserve_space(xdr, 8); @@ -2512,10 +2513,10 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, p = xdr_reserve_space(xdr, 16); if (!p) goto out_resource; - WRITE32(3); - WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD0); - WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD1); - WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD2); + *p++ = cpu_to_be32(3); + *p++ = cpu_to_be32(NFSD_SUPPATTR_EXCLCREAT_WORD0); + *p++ = cpu_to_be32(NFSD_SUPPATTR_EXCLCREAT_WORD1); + *p++ = cpu_to_be32(NFSD_SUPPATTR_EXCLCREAT_WORD2); } attrlen = htonl(xdr->buf->len - attrlen_offset - 4); @@ -2750,7 +2751,7 @@ nfsd4_encode_stateid(struct xdr_stream *xdr, stateid_t *sid) p = xdr_reserve_space(xdr, sizeof(stateid_t)); if (!p) return nfserr_resource; - WRITE32(sid->si_generation); + *p++ = cpu_to_be32(sid->si_generation); WRITEMEM(&sid->si_opaque, sizeof(stateid_opaque_t)); return 0; } @@ -2765,8 +2766,8 @@ nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ p = xdr_reserve_space(xdr, 8); if (!p) return nfserr_resource; - WRITE32(access->ac_supported); - WRITE32(access->ac_resp_access); + *p++ = cpu_to_be32(access->ac_supported); + *p++ = cpu_to_be32(access->ac_resp_access); } return nfserr; } @@ -2781,9 +2782,9 @@ static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, if (!p) return nfserr_resource; WRITEMEM(bcts->sessionid.data, NFS4_MAX_SESSIONID_LEN); - WRITE32(bcts->dir); + *p++ = cpu_to_be32(bcts->dir); /* Sorry, we do not yet support RDMA over 4.1: */ - WRITE32(0); + *p++ = cpu_to_be32(0); } return nfserr; } @@ -2826,9 +2827,9 @@ nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ if (!p) return nfserr_resource; write_cinfo(&p, &create->cr_cinfo); - WRITE32(2); - WRITE32(create->cr_bmval[0]); - WRITE32(create->cr_bmval[1]); + *p++ = cpu_to_be32(2); + *p++ = cpu_to_be32(create->cr_bmval[0]); + *p++ = cpu_to_be32(create->cr_bmval[1]); } return nfserr; } @@ -2861,7 +2862,7 @@ nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh p = xdr_reserve_space(xdr, len + 4); if (!p) return nfserr_resource; - WRITE32(len); + *p++ = cpu_to_be32(len); WRITEMEM(&fhp->fh_handle.fh_base, len); } return nfserr; @@ -2893,14 +2894,14 @@ nfsd4_encode_lock_denied(struct xdr_stream *xdr, struct nfsd4_lock_denied *ld) } WRITE64(ld->ld_start); WRITE64(ld->ld_length); - WRITE32(ld->ld_type); + *p++ = cpu_to_be32(ld->ld_type); if (conf->len) { WRITEMEM(&ld->ld_clientid, 8); - WRITE32(conf->len); + *p++ = cpu_to_be32(conf->len); WRITEMEM(conf->data, conf->len); } else { /* non - nfsv4 lock in conflict, no clientid nor owner */ WRITE64((u64)0); /* clientid */ - WRITE32(0); /* length of owner name */ + *p++ = cpu_to_be32(0); /* length of owner name */ } return nfserr_denied; } @@ -2972,11 +2973,11 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op if (!p) return nfserr_resource; write_cinfo(&p, &open->op_cinfo); - WRITE32(open->op_rflags); - WRITE32(2); - WRITE32(open->op_bmval[0]); - WRITE32(open->op_bmval[1]); - WRITE32(open->op_delegate_type); + *p++ = cpu_to_be32(open->op_rflags); + *p++ = cpu_to_be32(2); + *p++ = cpu_to_be32(open->op_bmval[0]); + *p++ = cpu_to_be32(open->op_bmval[1]); + *p++ = cpu_to_be32(open->op_delegate_type); switch (open->op_delegate_type) { case NFS4_OPEN_DELEGATE_NONE: @@ -2988,15 +2989,15 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op p = xdr_reserve_space(xdr, 20); if (!p) return nfserr_resource; - WRITE32(open->op_recall); + *p++ = cpu_to_be32(open->op_recall); /* * TODO: ACE's in delegations */ - WRITE32(NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE); - WRITE32(0); - WRITE32(0); - WRITE32(0); /* XXX: is NULL principal ok? */ + *p++ = cpu_to_be32(NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE); + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(0); /* XXX: is NULL principal ok? */ break; case NFS4_OPEN_DELEGATE_WRITE: nfserr = nfsd4_encode_stateid(xdr, &open->op_delegate_stateid); @@ -3005,22 +3006,22 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op p = xdr_reserve_space(xdr, 32); if (!p) return nfserr_resource; - WRITE32(0); + *p++ = cpu_to_be32(0); /* * TODO: space_limit's in delegations */ - WRITE32(NFS4_LIMIT_SIZE); - WRITE32(~(u32)0); - WRITE32(~(u32)0); + *p++ = cpu_to_be32(NFS4_LIMIT_SIZE); + *p++ = cpu_to_be32(~(u32)0); + *p++ = cpu_to_be32(~(u32)0); /* * TODO: ACE's in delegations */ - WRITE32(NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE); - WRITE32(0); - WRITE32(0); - WRITE32(0); /* XXX: is NULL principal ok? */ + *p++ = cpu_to_be32(NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE); + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(0); /* XXX: is NULL principal ok? */ break; case NFS4_OPEN_DELEGATE_NONE_EXT: /* 4.1 */ switch (open->op_why_no_deleg) { @@ -3029,14 +3030,15 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op p = xdr_reserve_space(xdr, 8); if (!p) return nfserr_resource; - WRITE32(open->op_why_no_deleg); - WRITE32(0); /* deleg signaling not supported yet */ + *p++ = cpu_to_be32(open->op_why_no_deleg); + /* deleg signaling not supported yet: */ + *p++ = cpu_to_be32(0); break; default: p = xdr_reserve_space(xdr, 4); if (!p) return nfserr_resource; - WRITE32(open->op_why_no_deleg); + *p++ = cpu_to_be32(open->op_why_no_deleg); } break; default: @@ -3310,8 +3312,8 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 return nfserr_resource; /* XXX: Following NFSv3, we ignore the READDIR verifier for now. */ - WRITE32(0); - WRITE32(0); + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(0); resp->xdr.buf->head[0].iov_len = ((char *)resp->xdr.p) - (char *)resp->xdr.buf->head[0].iov_base; @@ -3460,17 +3462,17 @@ nfsd4_do_encode_secinfo(struct xdr_stream *xdr, XDR_LEN(info.oid.len) + 4 + 4); if (!p) goto out; - WRITE32(RPC_AUTH_GSS); - WRITE32(info.oid.len); + *p++ = cpu_to_be32(RPC_AUTH_GSS); + *p++ = cpu_to_be32(info.oid.len); WRITEMEM(info.oid.data, info.oid.len); - WRITE32(info.qop); - WRITE32(info.service); + *p++ = cpu_to_be32(info.qop); + *p++ = cpu_to_be32(info.service); } else if (pf < RPC_AUTH_MAXFLAVOR) { supported++; p = xdr_reserve_space(xdr, 4); if (!p) goto out; - WRITE32(pf); + *p++ = cpu_to_be32(pf); } else { if (report) pr_warn("NFS: SECINFO: security flavor %u " @@ -3520,16 +3522,16 @@ nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 if (!p) return nfserr_resource; if (nfserr) { - WRITE32(3); - WRITE32(0); - WRITE32(0); - WRITE32(0); + *p++ = cpu_to_be32(3); + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(0); } else { - WRITE32(3); - WRITE32(setattr->sa_bmval[0]); - WRITE32(setattr->sa_bmval[1]); - WRITE32(setattr->sa_bmval[2]); + *p++ = cpu_to_be32(3); + *p++ = cpu_to_be32(setattr->sa_bmval[0]); + *p++ = cpu_to_be32(setattr->sa_bmval[1]); + *p++ = cpu_to_be32(setattr->sa_bmval[2]); } return nfserr; } @@ -3551,8 +3553,8 @@ nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct n p = xdr_reserve_space(xdr, 8); if (!p) return nfserr_resource; - WRITE32(0); - WRITE32(0); + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(0); } return nfserr; } @@ -3567,8 +3569,8 @@ nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_w p = xdr_reserve_space(xdr, 16); if (!p) return nfserr_resource; - WRITE32(write->wr_bytes_written); - WRITE32(write->wr_how_written); + *p++ = cpu_to_be32(write->wr_bytes_written); + *p++ = cpu_to_be32(write->wr_how_written); WRITEMEM(write->wr_verifier.data, NFS4_VERIFIER_SIZE); } return nfserr; @@ -3611,10 +3613,10 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, return nfserr_resource; WRITEMEM(&exid->clientid, 8); - WRITE32(exid->seqid); - WRITE32(exid->flags); + *p++ = cpu_to_be32(exid->seqid); + *p++ = cpu_to_be32(exid->flags); - WRITE32(exid->spa_how); + *p++ = cpu_to_be32(exid->spa_how); switch (exid->spa_how) { case SP4_NONE: @@ -3626,11 +3628,11 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, return nfserr_resource; /* spo_must_enforce bitmap: */ - WRITE32(2); - WRITE32(nfs4_minimal_spo_must_enforce[0]); - WRITE32(nfs4_minimal_spo_must_enforce[1]); + *p++ = cpu_to_be32(2); + *p++ = cpu_to_be32(nfs4_minimal_spo_must_enforce[0]); + *p++ = cpu_to_be32(nfs4_minimal_spo_must_enforce[1]); /* empty spo_must_allow bitmap: */ - WRITE32(0); + *p++ = cpu_to_be32(0); break; default: @@ -3650,15 +3652,15 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, /* The server_owner struct */ WRITE64(minor_id); /* Minor id */ /* major id */ - WRITE32(major_id_sz); + *p++ = cpu_to_be32(major_id_sz); WRITEMEM(major_id, major_id_sz); /* Server scope */ - WRITE32(server_scope_sz); + *p++ = cpu_to_be32(server_scope_sz); WRITEMEM(server_scope, server_scope_sz); /* Implementation id */ - WRITE32(0); /* zero length nfs_impl_id4 array */ + *p++ = cpu_to_be32(0); /* zero length nfs_impl_id4 array */ return 0; } @@ -3676,43 +3678,43 @@ nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr, if (!p) return nfserr_resource; WRITEMEM(sess->sessionid.data, NFS4_MAX_SESSIONID_LEN); - WRITE32(sess->seqid); - WRITE32(sess->flags); + *p++ = cpu_to_be32(sess->seqid); + *p++ = cpu_to_be32(sess->flags); p = xdr_reserve_space(xdr, 28); if (!p) return nfserr_resource; - WRITE32(0); /* headerpadsz */ - WRITE32(sess->fore_channel.maxreq_sz); - WRITE32(sess->fore_channel.maxresp_sz); - WRITE32(sess->fore_channel.maxresp_cached); - WRITE32(sess->fore_channel.maxops); - WRITE32(sess->fore_channel.maxreqs); - WRITE32(sess->fore_channel.nr_rdma_attrs); + *p++ = cpu_to_be32(0); /* headerpadsz */ + *p++ = cpu_to_be32(sess->fore_channel.maxreq_sz); + *p++ = cpu_to_be32(sess->fore_channel.maxresp_sz); + *p++ = cpu_to_be32(sess->fore_channel.maxresp_cached); + *p++ = cpu_to_be32(sess->fore_channel.maxops); + *p++ = cpu_to_be32(sess->fore_channel.maxreqs); + *p++ = cpu_to_be32(sess->fore_channel.nr_rdma_attrs); if (sess->fore_channel.nr_rdma_attrs) { p = xdr_reserve_space(xdr, 4); if (!p) return nfserr_resource; - WRITE32(sess->fore_channel.rdma_attrs); + *p++ = cpu_to_be32(sess->fore_channel.rdma_attrs); } p = xdr_reserve_space(xdr, 28); if (!p) return nfserr_resource; - WRITE32(0); /* headerpadsz */ - WRITE32(sess->back_channel.maxreq_sz); - WRITE32(sess->back_channel.maxresp_sz); - WRITE32(sess->back_channel.maxresp_cached); - WRITE32(sess->back_channel.maxops); - WRITE32(sess->back_channel.maxreqs); - WRITE32(sess->back_channel.nr_rdma_attrs); + *p++ = cpu_to_be32(0); /* headerpadsz */ + *p++ = cpu_to_be32(sess->back_channel.maxreq_sz); + *p++ = cpu_to_be32(sess->back_channel.maxresp_sz); + *p++ = cpu_to_be32(sess->back_channel.maxresp_cached); + *p++ = cpu_to_be32(sess->back_channel.maxops); + *p++ = cpu_to_be32(sess->back_channel.maxreqs); + *p++ = cpu_to_be32(sess->back_channel.nr_rdma_attrs); if (sess->back_channel.nr_rdma_attrs) { p = xdr_reserve_space(xdr, 4); if (!p) return nfserr_resource; - WRITE32(sess->back_channel.rdma_attrs); + *p++ = cpu_to_be32(sess->back_channel.rdma_attrs); } return 0; } @@ -3731,12 +3733,12 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr, if (!p) return nfserr_resource; WRITEMEM(seq->sessionid.data, NFS4_MAX_SESSIONID_LEN); - WRITE32(seq->seqid); - WRITE32(seq->slotid); + *p++ = cpu_to_be32(seq->seqid); + *p++ = cpu_to_be32(seq->slotid); /* Note slotid's are numbered from zero: */ - WRITE32(seq->maxslots - 1); /* sr_highest_slotid */ - WRITE32(seq->maxslots - 1); /* sr_target_highest_slotid */ - WRITE32(seq->status_flags); + *p++ = cpu_to_be32(seq->maxslots - 1); /* sr_highest_slotid */ + *p++ = cpu_to_be32(seq->maxslots - 1); /* sr_target_highest_slotid */ + *p++ = cpu_to_be32(seq->status_flags); resp->cstate.data_offset = xdr->buf->len; /* DRC cache data pointer */ return 0; @@ -3883,7 +3885,7 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) WARN_ON_ONCE(1); return; } - WRITE32(op->opnum); + *p++ = cpu_to_be32(op->opnum); post_err_offset = xdr->buf->len; if (op->opnum == OP_ILLEGAL) @@ -3956,7 +3958,7 @@ nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op) WARN_ON_ONCE(1); return; } - WRITE32(op->opnum); + *p++ = cpu_to_be32(op->opnum); *p++ = rp->rp_status; /* already xdr'ed */ WRITEMEM(rp->rp_buf, rp->rp_buflen); From b64c7f3bdfbb468d9026ca91d55c57675724f516 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 22 Mar 2014 17:11:35 -0400 Subject: [PATCH 069/100] nfsd4: kill WRITE64 Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 52 ++++++++++++++++++++++------------------------- 1 file changed, 24 insertions(+), 28 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 1dca513a1e46..094a7c585f1e 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1683,10 +1683,6 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) DECODE_TAIL; } -#define WRITE64(n) do { \ - *p++ = htonl((u32)((n) >> 32)); \ - *p++ = htonl((u32)(n)); \ -} while (0) #define WRITEMEM(ptr,nbytes) do { if (nbytes > 0) { \ *(p + XDR_QUADLEN(nbytes) -1) = 0; \ memcpy(p, ptr, nbytes); \ @@ -2204,7 +2200,7 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, p = xdr_reserve_space(xdr, 8); if (!p) goto out_resource; - WRITE64(stat.size); + p = xdr_encode_hyper(p, stat.size); } if (bmval0 & FATTR4_WORD0_LINK_SUPPORT) { p = xdr_reserve_space(xdr, 4); @@ -2229,12 +2225,12 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, if (!p) goto out_resource; if (exp->ex_fslocs.migrated) { - WRITE64(NFS4_REFERRAL_FSID_MAJOR); - WRITE64(NFS4_REFERRAL_FSID_MINOR); + p = xdr_encode_hyper(p, NFS4_REFERRAL_FSID_MAJOR); + p = xdr_encode_hyper(p, NFS4_REFERRAL_FSID_MINOR); } else switch(fsid_source(fhp)) { case FSIDSOURCE_FSID: - WRITE64((u64)exp->ex_fsid); - WRITE64((u64)0); + p = xdr_encode_hyper(p, (u64)exp->ex_fsid); + p = xdr_encode_hyper(p, (u64)0); break; case FSIDSOURCE_DEV: *p++ = cpu_to_be32(0); @@ -2337,25 +2333,25 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, p = xdr_reserve_space(xdr, 8); if (!p) goto out_resource; - WRITE64(stat.ino); + p = xdr_encode_hyper(p, stat.ino); } if (bmval0 & FATTR4_WORD0_FILES_AVAIL) { p = xdr_reserve_space(xdr, 8); if (!p) goto out_resource; - WRITE64((u64) statfs.f_ffree); + p = xdr_encode_hyper(p, (u64) statfs.f_ffree); } if (bmval0 & FATTR4_WORD0_FILES_FREE) { p = xdr_reserve_space(xdr, 8); if (!p) goto out_resource; - WRITE64((u64) statfs.f_ffree); + p = xdr_encode_hyper(p, (u64) statfs.f_ffree); } if (bmval0 & FATTR4_WORD0_FILES_TOTAL) { p = xdr_reserve_space(xdr, 8); if (!p) goto out_resource; - WRITE64((u64) statfs.f_files); + p = xdr_encode_hyper(p, (u64) statfs.f_files); } if (bmval0 & FATTR4_WORD0_FS_LOCATIONS) { status = nfsd4_encode_fs_locations(xdr, rqstp, exp); @@ -2372,7 +2368,7 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, p = xdr_reserve_space(xdr, 8); if (!p) goto out_resource; - WRITE64(exp->ex_path.mnt->mnt_sb->s_maxbytes); + p = xdr_encode_hyper(p, exp->ex_path.mnt->mnt_sb->s_maxbytes); } if (bmval0 & FATTR4_WORD0_MAXLINK) { p = xdr_reserve_space(xdr, 4); @@ -2390,13 +2386,13 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, p = xdr_reserve_space(xdr, 8); if (!p) goto out_resource; - WRITE64((u64) svc_max_payload(rqstp)); + p = xdr_encode_hyper(p, (u64) svc_max_payload(rqstp)); } if (bmval0 & FATTR4_WORD0_MAXWRITE) { p = xdr_reserve_space(xdr, 8); if (!p) goto out_resource; - WRITE64((u64) svc_max_payload(rqstp)); + p = xdr_encode_hyper(p, (u64) svc_max_payload(rqstp)); } if (bmval1 & FATTR4_WORD1_MODE) { p = xdr_reserve_space(xdr, 4); @@ -2438,34 +2434,34 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, if (!p) goto out_resource; dummy64 = (u64)statfs.f_bavail * (u64)statfs.f_bsize; - WRITE64(dummy64); + p = xdr_encode_hyper(p, dummy64); } if (bmval1 & FATTR4_WORD1_SPACE_FREE) { p = xdr_reserve_space(xdr, 8); if (!p) goto out_resource; dummy64 = (u64)statfs.f_bfree * (u64)statfs.f_bsize; - WRITE64(dummy64); + p = xdr_encode_hyper(p, dummy64); } if (bmval1 & FATTR4_WORD1_SPACE_TOTAL) { p = xdr_reserve_space(xdr, 8); if (!p) goto out_resource; dummy64 = (u64)statfs.f_blocks * (u64)statfs.f_bsize; - WRITE64(dummy64); + p = xdr_encode_hyper(p, dummy64); } if (bmval1 & FATTR4_WORD1_SPACE_USED) { p = xdr_reserve_space(xdr, 8); if (!p) goto out_resource; dummy64 = (u64)stat.blocks << 9; - WRITE64(dummy64); + p = xdr_encode_hyper(p, dummy64); } if (bmval1 & FATTR4_WORD1_TIME_ACCESS) { p = xdr_reserve_space(xdr, 12); if (!p) goto out_resource; - WRITE64((s64)stat.atime.tv_sec); + p = xdr_encode_hyper(p, (s64)stat.atime.tv_sec); *p++ = cpu_to_be32(stat.atime.tv_nsec); } if (bmval1 & FATTR4_WORD1_TIME_DELTA) { @@ -2480,14 +2476,14 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, p = xdr_reserve_space(xdr, 12); if (!p) goto out_resource; - WRITE64((s64)stat.ctime.tv_sec); + p = xdr_encode_hyper(p, (s64)stat.ctime.tv_sec); *p++ = cpu_to_be32(stat.ctime.tv_nsec); } if (bmval1 & FATTR4_WORD1_TIME_MODIFY) { p = xdr_reserve_space(xdr, 12); if (!p) goto out_resource; - WRITE64((s64)stat.mtime.tv_sec); + p = xdr_encode_hyper(p, (s64)stat.mtime.tv_sec); *p++ = cpu_to_be32(stat.mtime.tv_nsec); } if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) { @@ -2501,7 +2497,7 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, if (ignore_crossmnt == 0 && dentry == exp->ex_path.mnt->mnt_root) get_parent_attributes(exp, &stat); - WRITE64(stat.ino); + p = xdr_encode_hyper(p, stat.ino); } if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) { status = nfsd4_encode_security_label(xdr, rqstp, context, @@ -2892,15 +2888,15 @@ nfsd4_encode_lock_denied(struct xdr_stream *xdr, struct nfsd4_lock_denied *ld) } return nfserr_resource; } - WRITE64(ld->ld_start); - WRITE64(ld->ld_length); + p = xdr_encode_hyper(p, ld->ld_start); + p = xdr_encode_hyper(p, ld->ld_length); *p++ = cpu_to_be32(ld->ld_type); if (conf->len) { WRITEMEM(&ld->ld_clientid, 8); *p++ = cpu_to_be32(conf->len); WRITEMEM(conf->data, conf->len); } else { /* non - nfsv4 lock in conflict, no clientid nor owner */ - WRITE64((u64)0); /* clientid */ + p = xdr_encode_hyper(p, (u64)0); /* clientid */ *p++ = cpu_to_be32(0); /* length of owner name */ } return nfserr_denied; @@ -3650,7 +3646,7 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, return nfserr_resource; /* The server_owner struct */ - WRITE64(minor_id); /* Minor id */ + p = xdr_encode_hyper(p, minor_id); /* Minor id */ /* major id */ *p++ = cpu_to_be32(major_id_sz); WRITEMEM(major_id, major_id_sz); From 0c0c267ba96f606b541ab8e4bcde54e6b3f0198f Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 22 Mar 2014 18:43:16 -0400 Subject: [PATCH 070/100] nfsd4: kill WRITEMEM Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 62 +++++++++++++++++++++-------------------------- 1 file changed, 28 insertions(+), 34 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 094a7c585f1e..3844dbe0cb6e 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1683,12 +1683,6 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) DECODE_TAIL; } -#define WRITEMEM(ptr,nbytes) do { if (nbytes > 0) { \ - *(p + XDR_QUADLEN(nbytes) -1) = 0; \ - memcpy(p, ptr, nbytes); \ - p += XDR_QUADLEN(nbytes); \ -}} while (0) - static void write32(__be32 **p, u32 n) { *(*p)++ = htonl(n); @@ -1769,8 +1763,7 @@ static __be32 nfsd4_encode_components_esc(struct xdr_stream *xdr, char sep, p = xdr_reserve_space(xdr, strlen + 4); if (!p) return nfserr_resource; - *p++ = cpu_to_be32(strlen); - WRITEMEM(str, strlen); + p = xdr_encode_opaque(p, str, strlen); count++; } else @@ -1865,8 +1858,7 @@ static __be32 nfsd4_encode_path(struct xdr_stream *xdr, spin_unlock(&dentry->d_lock); goto out_free; } - *p++ = cpu_to_be32(len); - WRITEMEM(dentry->d_name.name, len); + p = xdr_encode_opaque(p, dentry->d_name.name, len); dprintk("/%s", dentry->d_name.name); spin_unlock(&dentry->d_lock); dput(dentry); @@ -2239,7 +2231,7 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, *p++ = cpu_to_be32(MINOR(stat.dev)); break; case FSIDSOURCE_UUID: - WRITEMEM(exp->ex_uuid, 16); + p = xdr_encode_opaque_fixed(p, exp->ex_uuid, 16); break; } } @@ -2326,8 +2318,8 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, p = xdr_reserve_space(xdr, fhp->fh_handle.fh_size + 4); if (!p) goto out_resource; - *p++ = cpu_to_be32(fhp->fh_handle.fh_size); - WRITEMEM(&fhp->fh_handle.fh_base, fhp->fh_handle.fh_size); + p = xdr_encode_opaque(p, &fhp->fh_handle.fh_base, + fhp->fh_handle.fh_size); } if (bmval0 & FATTR4_WORD0_FILEID) { p = xdr_reserve_space(xdr, 8); @@ -2748,7 +2740,8 @@ nfsd4_encode_stateid(struct xdr_stream *xdr, stateid_t *sid) if (!p) return nfserr_resource; *p++ = cpu_to_be32(sid->si_generation); - WRITEMEM(&sid->si_opaque, sizeof(stateid_opaque_t)); + p = xdr_encode_opaque_fixed(p, &sid->si_opaque, + sizeof(stateid_opaque_t)); return 0; } @@ -2777,7 +2770,8 @@ static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 8); if (!p) return nfserr_resource; - WRITEMEM(bcts->sessionid.data, NFS4_MAX_SESSIONID_LEN); + p = xdr_encode_opaque_fixed(p, bcts->sessionid.data, + NFS4_MAX_SESSIONID_LEN); *p++ = cpu_to_be32(bcts->dir); /* Sorry, we do not yet support RDMA over 4.1: */ *p++ = cpu_to_be32(0); @@ -2807,7 +2801,8 @@ nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE); if (!p) return nfserr_resource; - WRITEMEM(commit->co_verf.data, NFS4_VERIFIER_SIZE); + p = xdr_encode_opaque_fixed(p, commit->co_verf.data, + NFS4_VERIFIER_SIZE); } return nfserr; } @@ -2858,8 +2853,7 @@ nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh p = xdr_reserve_space(xdr, len + 4); if (!p) return nfserr_resource; - *p++ = cpu_to_be32(len); - WRITEMEM(&fhp->fh_handle.fh_base, len); + p = xdr_encode_opaque(p, &fhp->fh_handle.fh_base, len); } return nfserr; } @@ -2892,9 +2886,8 @@ nfsd4_encode_lock_denied(struct xdr_stream *xdr, struct nfsd4_lock_denied *ld) p = xdr_encode_hyper(p, ld->ld_length); *p++ = cpu_to_be32(ld->ld_type); if (conf->len) { - WRITEMEM(&ld->ld_clientid, 8); - *p++ = cpu_to_be32(conf->len); - WRITEMEM(conf->data, conf->len); + p = xdr_encode_opaque_fixed(p, &ld->ld_clientid, 8); + p = xdr_encode_opaque(p, conf->data, conf->len); } else { /* non - nfsv4 lock in conflict, no clientid nor owner */ p = xdr_encode_hyper(p, (u64)0); /* clientid */ *p++ = cpu_to_be32(0); /* length of owner name */ @@ -3459,8 +3452,7 @@ nfsd4_do_encode_secinfo(struct xdr_stream *xdr, if (!p) goto out; *p++ = cpu_to_be32(RPC_AUTH_GSS); - *p++ = cpu_to_be32(info.oid.len); - WRITEMEM(info.oid.data, info.oid.len); + p = xdr_encode_opaque(p, info.oid.data, info.oid.len); *p++ = cpu_to_be32(info.qop); *p++ = cpu_to_be32(info.service); } else if (pf < RPC_AUTH_MAXFLAVOR) { @@ -3542,8 +3534,9 @@ nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct n p = xdr_reserve_space(xdr, 8 + NFS4_VERIFIER_SIZE); if (!p) return nfserr_resource; - WRITEMEM(&scd->se_clientid, 8); - WRITEMEM(&scd->se_confirm, NFS4_VERIFIER_SIZE); + p = xdr_encode_opaque_fixed(p, &scd->se_clientid, 8); + p = xdr_encode_opaque_fixed(p, &scd->se_confirm, + NFS4_VERIFIER_SIZE); } else if (nfserr == nfserr_clid_inuse) { p = xdr_reserve_space(xdr, 8); @@ -3567,7 +3560,8 @@ nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_w return nfserr_resource; *p++ = cpu_to_be32(write->wr_bytes_written); *p++ = cpu_to_be32(write->wr_how_written); - WRITEMEM(write->wr_verifier.data, NFS4_VERIFIER_SIZE); + p = xdr_encode_opaque_fixed(p, write->wr_verifier.data, + NFS4_VERIFIER_SIZE); } return nfserr; } @@ -3608,7 +3602,7 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, if (!p) return nfserr_resource; - WRITEMEM(&exid->clientid, 8); + p = xdr_encode_opaque_fixed(p, &exid->clientid, 8); *p++ = cpu_to_be32(exid->seqid); *p++ = cpu_to_be32(exid->flags); @@ -3648,12 +3642,10 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, /* The server_owner struct */ p = xdr_encode_hyper(p, minor_id); /* Minor id */ /* major id */ - *p++ = cpu_to_be32(major_id_sz); - WRITEMEM(major_id, major_id_sz); + p = xdr_encode_opaque(p, major_id, major_id_sz); /* Server scope */ - *p++ = cpu_to_be32(server_scope_sz); - WRITEMEM(server_scope, server_scope_sz); + p = xdr_encode_opaque(p, server_scope, server_scope_sz); /* Implementation id */ *p++ = cpu_to_be32(0); /* zero length nfs_impl_id4 array */ @@ -3673,7 +3665,8 @@ nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr, p = xdr_reserve_space(xdr, 24); if (!p) return nfserr_resource; - WRITEMEM(sess->sessionid.data, NFS4_MAX_SESSIONID_LEN); + p = xdr_encode_opaque_fixed(p, sess->sessionid.data, + NFS4_MAX_SESSIONID_LEN); *p++ = cpu_to_be32(sess->seqid); *p++ = cpu_to_be32(sess->flags); @@ -3728,7 +3721,8 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr, p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 20); if (!p) return nfserr_resource; - WRITEMEM(seq->sessionid.data, NFS4_MAX_SESSIONID_LEN); + p = xdr_encode_opaque_fixed(p, seq->sessionid.data, + NFS4_MAX_SESSIONID_LEN); *p++ = cpu_to_be32(seq->seqid); *p++ = cpu_to_be32(seq->slotid); /* Note slotid's are numbered from zero: */ @@ -3957,7 +3951,7 @@ nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op) *p++ = cpu_to_be32(op->opnum); *p++ = rp->rp_status; /* already xdr'ed */ - WRITEMEM(rp->rp_buf, rp->rp_buflen); + p = xdr_encode_opaque_fixed(p, rp->rp_buf, rp->rp_buflen); } int From d05d5744ef67879877dbe2e3d0fb9fcc27ee44e5 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 22 Mar 2014 18:48:39 -0400 Subject: [PATCH 071/100] nfsd4: kill write32, write64 And switch a couple other functions from the encode(&p,...) convention to the p = encode(p,...) convention mostly used elsewhere. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 51 +++++++++++++++++++---------------------------- 1 file changed, 21 insertions(+), 30 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 3844dbe0cb6e..3e347a1caec4 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1683,39 +1683,30 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) DECODE_TAIL; } -static void write32(__be32 **p, u32 n) -{ - *(*p)++ = htonl(n); -} - -static void write64(__be32 **p, u64 n) -{ - write32(p, (n >> 32)); - write32(p, (u32)n); -} - -static void write_change(__be32 **p, struct kstat *stat, struct inode *inode) +static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode) { if (IS_I_VERSION(inode)) { - write64(p, inode->i_version); + p = xdr_encode_hyper(p, inode->i_version); } else { - write32(p, stat->ctime.tv_sec); - write32(p, stat->ctime.tv_nsec); + *p++ = cpu_to_be32(stat->ctime.tv_sec); + *p++ = cpu_to_be32(stat->ctime.tv_nsec); } + return p; } -static void write_cinfo(__be32 **p, struct nfsd4_change_info *c) +static __be32 *encode_cinfo(__be32 *p, struct nfsd4_change_info *c) { - write32(p, c->atomic); + *p++ = cpu_to_be32(c->atomic); if (c->change_supported) { - write64(p, c->before_change); - write64(p, c->after_change); + p = xdr_encode_hyper(p, c->before_change); + p = xdr_encode_hyper(p, c->after_change); } else { - write32(p, c->before_ctime_sec); - write32(p, c->before_ctime_nsec); - write32(p, c->after_ctime_sec); - write32(p, c->after_ctime_nsec); + *p++ = cpu_to_be32(c->before_ctime_sec); + *p++ = cpu_to_be32(c->before_ctime_nsec); + *p++ = cpu_to_be32(c->after_ctime_sec); + *p++ = cpu_to_be32(c->after_ctime_nsec); } + return p; } /* Encode as an array of strings the string given with components @@ -2186,7 +2177,7 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, p = xdr_reserve_space(xdr, 8); if (!p) goto out_resource; - write_change(&p, &stat, dentry->d_inode); + p = encode_change(p, &stat, dentry->d_inode); } if (bmval0 & FATTR4_WORD0_SIZE) { p = xdr_reserve_space(xdr, 8); @@ -2817,7 +2808,7 @@ nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ p = xdr_reserve_space(xdr, 32); if (!p) return nfserr_resource; - write_cinfo(&p, &create->cr_cinfo); + p = encode_cinfo(p, &create->cr_cinfo); *p++ = cpu_to_be32(2); *p++ = cpu_to_be32(create->cr_bmval[0]); *p++ = cpu_to_be32(create->cr_bmval[1]); @@ -2940,7 +2931,7 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_li p = xdr_reserve_space(xdr, 20); if (!p) return nfserr_resource; - write_cinfo(&p, &link->li_cinfo); + p = encode_cinfo(p, &link->li_cinfo); } return nfserr; } @@ -2961,7 +2952,7 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op p = xdr_reserve_space(xdr, 40); if (!p) return nfserr_resource; - write_cinfo(&p, &open->op_cinfo); + p = encode_cinfo(p, &open->op_cinfo); *p++ = cpu_to_be32(open->op_rflags); *p++ = cpu_to_be32(2); *p++ = cpu_to_be32(open->op_bmval[0]); @@ -3381,7 +3372,7 @@ nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ p = xdr_reserve_space(xdr, 20); if (!p) return nfserr_resource; - write_cinfo(&p, &remove->rm_cinfo); + p = encode_cinfo(p, &remove->rm_cinfo); } return nfserr; } @@ -3396,8 +3387,8 @@ nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ p = xdr_reserve_space(xdr, 40); if (!p) return nfserr_resource; - write_cinfo(&p, &rename->rn_sinfo); - write_cinfo(&p, &rename->rn_tinfo); + p = encode_cinfo(p, &rename->rn_sinfo); + p = encode_cinfo(p, &rename->rn_tinfo); } return nfserr; } From a5cddc885b99458df963a75abbe0b40cbef56c48 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 12 May 2014 18:10:58 -0400 Subject: [PATCH 072/100] nfsd4: better reservation of head space for krb5 RPC_MAX_AUTH_SIZE is scattered around several places. Better to set it once in the auth code, where this kind of estimate should be made. And while we're at it we can leave it zero when we're not using krb5i or krb5p. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 4 ++-- fs/nfsd/nfs4state.c | 2 +- fs/nfsd/nfs4xdr.c | 5 +++-- include/linux/sunrpc/svc.h | 11 +++++------ net/sunrpc/auth_gss/svcauth_gss.c | 2 ++ net/sunrpc/svcauth.c | 2 ++ 6 files changed, 15 insertions(+), 11 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 2d786b813c7e..16e71d033ea5 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1261,13 +1261,13 @@ static void svcxdr_init_encode(struct svc_rqst *rqstp, xdr->buf = buf; xdr->iov = head; xdr->p = head->iov_base + head->iov_len; - xdr->end = head->iov_base + PAGE_SIZE - 2 * RPC_MAX_AUTH_SIZE; + xdr->end = head->iov_base + PAGE_SIZE - rqstp->rq_auth_slack; /* Tail and page_len should be zero at this point: */ buf->len = buf->head[0].iov_len; xdr->scratch.iov_len = 0; xdr->page_ptr = buf->pages; buf->buflen = PAGE_SIZE * (1 + rqstp->rq_page_end - buf->pages) - - 2 * RPC_MAX_AUTH_SIZE; + - rqstp->rq_auth_slack; } /* diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 62b882dc48ec..d0a016a502be 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2288,7 +2288,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, session->se_fchannel.maxresp_sz; status = (seq->cachethis) ? nfserr_rep_too_big_to_cache : nfserr_rep_too_big; - if (xdr_restrict_buflen(xdr, buflen - 2 * RPC_MAX_AUTH_SIZE)) + if (xdr_restrict_buflen(xdr, buflen - rqstp->rq_auth_slack)) goto out_put_session; svc_reserve(rqstp, buflen); diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 3e347a1caec4..470fe8998c9b 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1611,7 +1611,8 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) DECODE_HEAD; struct nfsd4_op *op; bool cachethis = false; - int max_reply = 2 * RPC_MAX_AUTH_SIZE + 8; /* opcnt, status */ + int auth_slack= argp->rqstp->rq_auth_slack; + int max_reply = auth_slack + 8; /* opcnt, status */ int readcount = 0; int readbytes = 0; int i; @@ -1677,7 +1678,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) svc_reserve(argp->rqstp, max_reply + readbytes); argp->rqstp->rq_cachetype = cachethis ? RC_REPLBUFF : RC_NOCACHE; - if (readcount > 1 || max_reply > PAGE_SIZE - 2*RPC_MAX_AUTH_SIZE) + if (readcount > 1 || max_reply > PAGE_SIZE - auth_slack) argp->rqstp->rq_splice_ok = false; DECODE_TAIL; diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 85cb6472a423..1bc7cd05b22e 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -260,7 +260,10 @@ struct svc_rqst { void * rq_argp; /* decoded arguments */ void * rq_resp; /* xdr'd results */ void * rq_auth_data; /* flavor-specific data */ - + int rq_auth_slack; /* extra space xdr code + * should leave in head + * for krb5i, krb5p. + */ int rq_reserved; /* space on socket outq * reserved for this request */ @@ -456,11 +459,7 @@ char * svc_print_addr(struct svc_rqst *, char *, size_t); */ static inline void svc_reserve_auth(struct svc_rqst *rqstp, int space) { - int added_space = 0; - - if (rqstp->rq_authop->flavour) - added_space = RPC_MAX_AUTH_SIZE; - svc_reserve(rqstp, space + added_space); + svc_reserve(rqstp, space + rqstp->rq_auth_slack); } #endif /* SUNRPC_SVC_H */ diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 0f73f4507746..4ce5eccec1f6 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -1503,6 +1503,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) if (unwrap_integ_data(rqstp, &rqstp->rq_arg, gc->gc_seq, rsci->mechctx)) goto garbage_args; + rqstp->rq_auth_slack = RPC_MAX_AUTH_SIZE; break; case RPC_GSS_SVC_PRIVACY: /* placeholders for length and seq. number: */ @@ -1511,6 +1512,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) if (unwrap_priv_data(rqstp, &rqstp->rq_arg, gc->gc_seq, rsci->mechctx)) goto garbage_args; + rqstp->rq_auth_slack = RPC_MAX_AUTH_SIZE * 2; break; default: goto auth_err; diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c index 2af7b0cba43a..79c0f3459b5c 100644 --- a/net/sunrpc/svcauth.c +++ b/net/sunrpc/svcauth.c @@ -54,6 +54,8 @@ svc_authenticate(struct svc_rqst *rqstp, __be32 *authp) } spin_unlock(&authtab_lock); + rqstp->rq_auth_slack = 0; + rqstp->rq_authop = aops; return aops->accept(rqstp, authp); } From a1f05514b016aeaed638dbf677f443af7e7bde4f Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Fri, 23 May 2014 19:57:49 +0800 Subject: [PATCH 073/100] NFS4: Avoid NULL reference or double free in nfsd4_fslocs_free() If fsloc_parse() failed at kzalloc(), fs/nfsd/export.c 411 412 fsloc->locations = kzalloc(fsloc->locations_count 413 * sizeof(struct nfsd4_fs_location), GFP_KERNEL); 414 if (!fsloc->locations) 415 return -ENOMEM; svc_export_parse() will call nfsd4_fslocs_free() with fsloc->locations = NULL, so that, "kfree(fsloc->locations[i].path);" will cause a crash. If fsloc_parse() failed after that, fsloc_parse() will call nfsd4_fslocs_free(), and svc_export_parse() will call it again, so that, a double free is caused. This patch checks the fsloc->locations, and set to NULL after it be freed. Signed-off-by: Kinglong Mee Signed-off-by: J. Bruce Fields --- fs/nfsd/export.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 8513c598fabf..9a41d3ddd8df 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -295,13 +295,19 @@ svc_expkey_update(struct cache_detail *cd, struct svc_expkey *new, static void nfsd4_fslocs_free(struct nfsd4_fs_locations *fsloc) { + struct nfsd4_fs_location *locations = fsloc->locations; int i; + if (!locations) + return; + for (i = 0; i < fsloc->locations_count; i++) { - kfree(fsloc->locations[i].path); - kfree(fsloc->locations[i].hosts); + kfree(locations[i].path); + kfree(locations[i].hosts); } - kfree(fsloc->locations); + + kfree(locations); + fsloc->locations = NULL; } static void svc_export_put(struct kref *ref) From 0d63790c365852a6ce2913632b933633343ae479 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Fri, 23 May 2014 19:59:06 +0800 Subject: [PATCH 074/100] NFSD: Helper function for parsing uuid Signed-off-by: Kinglong Mee Signed-off-by: J. Bruce Fields --- fs/nfsd/export.c | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 9a41d3ddd8df..8771f417efa6 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -480,6 +480,23 @@ static inline int secinfo_parse(char **mesg, char *buf, struct svc_export *exp) { return 0; } #endif +static inline int +uuid_parse(char **mesg, char *buf, unsigned char **puuid) +{ + int len; + + /* expect a 16 byte uuid encoded as \xXXXX... */ + len = qword_get(mesg, buf, PAGE_SIZE); + if (len != 16) + return -EINVAL; + + *puuid = kmemdup(buf, 16, GFP_KERNEL); + if (*puuid == NULL) + return -ENOMEM; + + return 0; +} + static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) { /* client path expiry [flags anonuid anongid fsid] */ @@ -558,18 +575,9 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) while ((len = qword_get(&mesg, buf, PAGE_SIZE)) > 0) { if (strcmp(buf, "fsloc") == 0) err = fsloc_parse(&mesg, buf, &exp.ex_fslocs); - else if (strcmp(buf, "uuid") == 0) { - /* expect a 16 byte uuid encoded as \xXXXX... */ - len = qword_get(&mesg, buf, PAGE_SIZE); - if (len != 16) - err = -EINVAL; - else { - exp.ex_uuid = - kmemdup(buf, 16, GFP_KERNEL); - if (exp.ex_uuid == NULL) - err = -ENOMEM; - } - } else if (strcmp(buf, "secinfo") == 0) + else if (strcmp(buf, "uuid") == 0) + err = uuid_parse(&mesg, buf, &exp.ex_uuid); + else if (strcmp(buf, "secinfo") == 0) err = secinfo_parse(&mesg, buf, &exp); else /* quietly ignore unknown words and anything From 94eb36892d727145794b80dceffc435d1d68edbb Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Fri, 23 May 2014 20:00:19 +0800 Subject: [PATCH 075/100] NFSD: Adds macro EX_UUID_LEN for exports uuid's length Signed-off-by: Kinglong Mee Signed-off-by: J. Bruce Fields --- fs/nfsd/export.c | 6 +++--- fs/nfsd/export.h | 1 + fs/nfsd/nfs4xdr.c | 3 ++- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 8771f417efa6..90d37b6ef163 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -487,10 +487,10 @@ uuid_parse(char **mesg, char *buf, unsigned char **puuid) /* expect a 16 byte uuid encoded as \xXXXX... */ len = qword_get(mesg, buf, PAGE_SIZE); - if (len != 16) + if (len != EX_UUID_LEN) return -EINVAL; - *puuid = kmemdup(buf, 16, GFP_KERNEL); + *puuid = kmemdup(buf, EX_UUID_LEN, GFP_KERNEL); if (*puuid == NULL) return -ENOMEM; @@ -663,7 +663,7 @@ static int svc_export_show(struct seq_file *m, if (exp->ex_uuid) { int i; seq_puts(m, ",uuid="); - for (i=0; i<16; i++) { + for (i = 0; i < EX_UUID_LEN; i++) { if ((i&3) == 0 && i) seq_putc(m, ':'); seq_printf(m, "%02x", exp->ex_uuid[i]); diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h index d7939a62a0ae..c7d4ed05e470 100644 --- a/fs/nfsd/export.h +++ b/fs/nfsd/export.h @@ -37,6 +37,7 @@ struct nfsd4_fs_locations { * spkm3i, and spkm3p (and using all 8 at once should be rare). */ #define MAX_SECINFO_LIST 8 +#define EX_UUID_LEN 16 struct exp_flavor_info { u32 pseudoflavor; diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 470fe8998c9b..70d147244891 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2223,7 +2223,8 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, *p++ = cpu_to_be32(MINOR(stat.dev)); break; case FSIDSOURCE_UUID: - p = xdr_encode_opaque_fixed(p, exp->ex_uuid, 16); + p = xdr_encode_opaque_fixed(p, exp->ex_uuid, + EX_UUID_LEN); break; } } From 0faed901c680b153ca090a766ceda699b55993eb Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Fri, 23 May 2014 20:53:05 +0800 Subject: [PATCH 076/100] NFSD: remove unneeded linux/user_namespace.h include After commit 4c1e1b34d5c8 ("nfsd: Store ex_anon_uid and ex_anon_gid as kuids and kgids") using kuid/kgid for ex_anon_uid/ex_anon_gid, user_namespace.h is not needed. Signed-off-by: Kinglong Mee Signed-off-by: J. Bruce Fields --- fs/nfsd/auth.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index 2645be435e75..104232571491 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c @@ -1,7 +1,6 @@ /* Copyright (C) 1995, 1996 Olaf Kirch */ #include -#include #include "nfsd.h" #include "auth.h" From 61a27f08a63ee9460653633d8a9cc5a09dcb9aa5 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Fri, 23 May 2014 20:53:44 +0800 Subject: [PATCH 077/100] NFSD: Cleanup unused variable in nfsd_setuser() Commit 8f6c5ffc8987 ("kernel/groups.c: remove return value of set_groups") removed the last use of "ret". Signed-off-by: Kinglong Mee Signed-off-by: J. Bruce Fields --- fs/nfsd/auth.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index 104232571491..72f44823adbb 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c @@ -24,7 +24,6 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) struct cred *new; int i; int flags = nfsexp_flags(rqstp, exp); - int ret; validate_process_creds(); @@ -85,8 +84,7 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) return 0; oom: - ret = -ENOMEM; abort_creds(new); - return ret; + return -ENOMEM; } From a30ae94c0797f9de47626eecc43359989447d7a3 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Fri, 23 May 2014 20:54:27 +0800 Subject: [PATCH 078/100] NFSD: Cleanup unneeded including net/ipv6.h Commit 49b28684fdba ("nfsd: Remove deprecated nfsctl system call and related code") removed the only use of ipv6_addr_set_v4mapped(), so net/ipv6.h is unneeded now. Signed-off-by: Kinglong Mee Signed-off-by: J. Bruce Fields --- fs/nfsd/export.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 90d37b6ef163..1d6d7bd0c211 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -17,8 +17,6 @@ #include #include -#include - #include "nfsd.h" #include "nfsfh.h" #include "netns.h" From e6d615f7428bb9a202f7fab563e917e89169d349 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Fri, 23 May 2014 20:55:43 +0800 Subject: [PATCH 079/100] NFSD: Remove typedef of svc_client and svc_export in export.c No need for a typedef wrapper for svc_export or svc_client, remove them. Signed-off-by: Kinglong Mee Signed-off-by: J. Bruce Fields --- fs/nfsd/export.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 1d6d7bd0c211..858c536c4c1d 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -23,9 +23,6 @@ #define NFSDDBG_FACILITY NFSDDBG_EXPORT -typedef struct auth_domain svc_client; -typedef struct svc_export svc_export; - /* * We have two caches. * One maps client+vfsmnt+dentry to export options - the export map @@ -783,7 +780,7 @@ svc_export_update(struct svc_export *new, struct svc_export *old) static struct svc_expkey * -exp_find_key(struct cache_detail *cd, svc_client *clp, int fsid_type, +exp_find_key(struct cache_detail *cd, struct auth_domain *clp, int fsid_type, u32 *fsidv, struct cache_req *reqp) { struct svc_expkey key, *ek; @@ -805,9 +802,9 @@ exp_find_key(struct cache_detail *cd, svc_client *clp, int fsid_type, return ek; } - -static svc_export *exp_get_by_name(struct cache_detail *cd, svc_client *clp, - const struct path *path, struct cache_req *reqp) +static struct svc_export * +exp_get_by_name(struct cache_detail *cd, struct auth_domain *clp, + const struct path *path, struct cache_req *reqp) { struct svc_export *exp, key; int err; @@ -831,11 +828,11 @@ static svc_export *exp_get_by_name(struct cache_detail *cd, svc_client *clp, /* * Find the export entry for a given dentry. */ -static struct svc_export *exp_parent(struct cache_detail *cd, svc_client *clp, - struct path *path) +static struct svc_export * +exp_parent(struct cache_detail *cd, struct auth_domain *clp, struct path *path) { struct dentry *saved = dget(path->dentry); - svc_export *exp = exp_get_by_name(cd, clp, path, NULL); + struct svc_export *exp = exp_get_by_name(cd, clp, path, NULL); while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(path->dentry)) { struct dentry *parent = dget_parent(path->dentry); @@ -856,7 +853,7 @@ static struct svc_export *exp_parent(struct cache_detail *cd, svc_client *clp, * since its harder to fool a kernel module than a user space program. */ int -exp_rootfh(struct net *net, svc_client *clp, char *name, +exp_rootfh(struct net *net, struct auth_domain *clp, char *name, struct knfsd_fh *f, int maxsize) { struct svc_export *exp; From f0db79d54b6c9f612fb2ef4f71ca8340edaf89f1 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Fri, 23 May 2014 20:56:16 +0800 Subject: [PATCH 080/100] NFSD: Add missing comment of "expiry" in expkey_parse() Signed-off-by: Kinglong Mee Signed-off-by: J. Bruce Fields --- fs/nfsd/export.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 858c536c4c1d..263d30e7972e 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -68,7 +68,7 @@ static struct svc_expkey *svc_expkey_lookup(struct cache_detail *cd, struct svc_ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen) { - /* client fsidtype fsid [path] */ + /* client fsidtype fsid expiry [path] */ char *buf; int len; struct auth_domain *dom = NULL; From 1f53146da9cb2c941a3928320a6824d3b035455f Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Sat, 24 May 2014 11:26:49 +0800 Subject: [PATCH 081/100] NFSD: Using type of uint32_t for ex_nflavors instead of int ex_nflavors can't be negative number, just defined by uint32_t. Signed-off-by: Kinglong Mee Signed-off-by: J. Bruce Fields --- fs/nfsd/export.c | 7 ++++--- fs/nfsd/export.h | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 263d30e7972e..788405107006 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -438,13 +438,14 @@ fsloc_parse(char **mesg, char *buf, struct nfsd4_fs_locations *fsloc) static int secinfo_parse(char **mesg, char *buf, struct svc_export *exp) { - int listsize, err; struct exp_flavor_info *f; + u32 listsize; + int err; - err = get_int(mesg, &listsize); + err = get_uint(mesg, &listsize); if (err) return err; - if (listsize < 0 || listsize > MAX_SECINFO_LIST) + if (listsize > MAX_SECINFO_LIST) return -EINVAL; for (f = exp->ex_flavors; f < exp->ex_flavors + listsize; f++) { diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h index c7d4ed05e470..cfeea85c5bed 100644 --- a/fs/nfsd/export.h +++ b/fs/nfsd/export.h @@ -54,7 +54,7 @@ struct svc_export { int ex_fsid; unsigned char * ex_uuid; /* 16 byte fsid */ struct nfsd4_fs_locations ex_fslocs; - int ex_nflavors; + uint32_t ex_nflavors; struct exp_flavor_info ex_flavors[MAX_SECINFO_LIST]; struct cache_detail *cd; }; From be69da8052af38a9d72ed32fb765fd4446e4091c Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Sat, 24 May 2014 11:19:57 +0800 Subject: [PATCH 082/100] NFSD: Error out when getting more than one fsloc/secinfo/uuid Signed-off-by: Kinglong Mee Signed-off-by: J. Bruce Fields --- fs/nfsd/export.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 788405107006..13b85f94d9e2 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -389,6 +389,10 @@ fsloc_parse(char **mesg, char *buf, struct nfsd4_fs_locations *fsloc) int len; int migrated, i, err; + /* more than one fsloc */ + if (fsloc->locations) + return -EINVAL; + /* listsize */ err = get_uint(mesg, &fsloc->locations_count); if (err) @@ -442,6 +446,10 @@ static int secinfo_parse(char **mesg, char *buf, struct svc_export *exp) u32 listsize; int err; + /* more than one secinfo */ + if (exp->ex_nflavors) + return -EINVAL; + err = get_uint(mesg, &listsize); if (err) return err; @@ -481,6 +489,10 @@ uuid_parse(char **mesg, char *buf, unsigned char **puuid) { int len; + /* more than one uuid */ + if (*puuid) + return -EINVAL; + /* expect a 16 byte uuid encoded as \xXXXX... */ len = qword_get(mesg, buf, PAGE_SIZE); if (len != EX_UUID_LEN) From 12337901d654415d9f764b5f5ba50052e9700f37 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 28 May 2014 10:46:13 +0200 Subject: [PATCH 083/100] nfsd: getattr for FATTR4_WORD0_FILES_AVAIL needs the statfs buffer Note nobody's ever noticed because the typical client probably never requests FILES_AVAIL without also requesting something else on the list. Signed-off-by: Christoph Hellwig Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 70d147244891..3976dc699731 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2049,8 +2049,8 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, err = vfs_getattr(&path, &stat); if (err) goto out_nfserr; - if ((bmval0 & (FATTR4_WORD0_FILES_FREE | FATTR4_WORD0_FILES_TOTAL | - FATTR4_WORD0_MAXNAME)) || + if ((bmval0 & (FATTR4_WORD0_FILES_AVAIL | FATTR4_WORD0_FILES_FREE | + FATTR4_WORD0_FILES_TOTAL | FATTR4_WORD0_MAXNAME)) || (bmval1 & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE | FATTR4_WORD1_SPACE_TOTAL))) { err = vfs_statfs(&path, &statfs); From b52bd7bccca57b2bbe837d14a0bf3e45279459a8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 28 May 2014 10:46:32 +0200 Subject: [PATCH 084/100] nfsd: remove unused function nfsd_read_file Signed-off-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/vfs.c | 20 -------------------- fs/nfsd/vfs.h | 2 -- 2 files changed, 22 deletions(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index b7d35a4afefb..140c496f612c 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1026,26 +1026,6 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, return err; } -/* As above, but use the provided file descriptor. */ -__be32 -nfsd_read_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, - loff_t offset, struct kvec *vec, int vlen, - unsigned long *count) -{ - __be32 err; - - if (file) { - err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry, - NFSD_MAY_READ|NFSD_MAY_OWNER_OVERRIDE); - if (err) - goto out; - err = nfsd_vfs_read(rqstp, file, offset, vec, vlen, count); - } else /* Note file may still be NULL in NFSv4 special stateid case: */ - err = nfsd_read(rqstp, fhp, offset, vec, vlen, count); -out: - return err; -} - /* * Write data to a file. * The stable flag requests synchronous writes. diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 7441e9655eb7..91b6ae3f658b 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -80,8 +80,6 @@ int nfsd_readv(struct file *, loff_t, struct kvec *, int, unsigned long *); __be32 nfsd_read(struct svc_rqst *, struct svc_fh *, loff_t, struct kvec *, int, unsigned long *); -__be32 nfsd_read_file(struct svc_rqst *, struct svc_fh *, struct file *, - loff_t, struct kvec *, int, unsigned long *); __be32 nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *, loff_t, struct kvec *,int, unsigned long *, int *); __be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *, From a48fd0f9f77b6e144813230c26621c00aac92ce8 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Thu, 29 May 2014 12:18:52 +0800 Subject: [PATCH 085/100] SUNRPC/NFSD: Remove using of dprintk with KERN_WARNING When debugging, rpc prints messages from dprintk(KERN_WARNING ...) with "^A4" prefixed, [ 2780.339988] ^A4nfsd: connect from unprivileged port: 127.0.0.1, port=35316 Trond tells, > dprintk != printk. We have NEVER supported dprintk(KERN_WARNING...) This patch removes using of dprintk with KERN_WARNING. Signed-off-by: Kinglong Mee Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsfh.c | 5 ++--- net/sunrpc/svcsock.c | 3 +-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index a337106d6875..ec8393418154 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -88,9 +88,8 @@ static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp, /* Check if the request originated from a secure port. */ if (!rqstp->rq_secure && !(flags & NFSEXP_INSECURE_PORT)) { RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); - dprintk(KERN_WARNING - "nfsd: request from insecure port %s!\n", - svc_print_addr(rqstp, buf, sizeof(buf))); + dprintk("nfsd: request from insecure port %s!\n", + svc_print_addr(rqstp, buf, sizeof(buf))); return nfserr_perm; } diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index f3b8eb309d01..b507cd327d9b 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -849,8 +849,7 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt) * tell us anything. For now just warn about unpriv connections. */ if (!svc_port_is_privileged(sin)) { - dprintk(KERN_WARNING - "%s: connect from unprivileged port: %s\n", + dprintk("%s: connect from unprivileged port: %s\n", serv->sv_name, __svc_print_addr(sin, buf, sizeof(buf))); } From da2ebce6a0f64cc01bd00aba998c0a4fa7c09843 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 30 May 2014 09:09:25 -0400 Subject: [PATCH 086/100] nfsd: make nfsd4_encode_fattr static sparse says: CHECK fs/nfsd/nfs4xdr.c fs/nfsd/nfs4xdr.c:2043:1: warning: symbol 'nfsd4_encode_fattr' was not declared. Should it be static? Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 3976dc699731..a7268b4a4954 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2000,7 +2000,7 @@ static int get_parent_attributes(struct svc_export *exp, struct kstat *stat) * Note: @fhp can be NULL; in this case, we might have to compose the filehandle * ourselves. */ -__be32 +static __be32 nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, u32 *bmval, From a832e7ae8b4f71fc522089d83fd445693aa7dbc5 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 30 May 2014 09:09:26 -0400 Subject: [PATCH 087/100] nfsd: fix laundromat next-run-time calculation The laundromat uses two variables to calculate when it should next run, but one is completely ignored at the end of the run. Merge the two and rename the variable to be more descriptive of what it does. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index d0a016a502be..01a0e4384500 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3397,8 +3397,7 @@ nfs4_laundromat(struct nfsd_net *nn) struct nfs4_delegation *dp; struct list_head *pos, *next, reaplist; time_t cutoff = get_seconds() - nn->nfsd4_lease; - time_t t, clientid_val = nn->nfsd4_lease; - time_t u, test_val = nn->nfsd4_lease; + time_t t, new_timeo = nn->nfsd4_lease; nfs4_lock_state(); @@ -3410,8 +3409,7 @@ nfs4_laundromat(struct nfsd_net *nn) clp = list_entry(pos, struct nfs4_client, cl_lru); if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) { t = clp->cl_time - cutoff; - if (clientid_val > t) - clientid_val = t; + new_timeo = min(new_timeo, t); break; } if (mark_client_expired_locked(clp)) { @@ -3434,9 +3432,8 @@ nfs4_laundromat(struct nfsd_net *nn) if (net_generic(dp->dl_stid.sc_client->net, nfsd_net_id) != nn) continue; if (time_after((unsigned long)dp->dl_time, (unsigned long)cutoff)) { - u = dp->dl_time - cutoff; - if (test_val > u) - test_val = u; + t = dp->dl_time - cutoff; + new_timeo = min(new_timeo, t); break; } list_move(&dp->dl_recall_lru, &reaplist); @@ -3446,21 +3443,18 @@ nfs4_laundromat(struct nfsd_net *nn) dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); revoke_delegation(dp); } - test_val = nn->nfsd4_lease; list_for_each_safe(pos, next, &nn->close_lru) { oo = container_of(pos, struct nfs4_openowner, oo_close_lru); if (time_after((unsigned long)oo->oo_time, (unsigned long)cutoff)) { - u = oo->oo_time - cutoff; - if (test_val > u) - test_val = u; + t = oo->oo_time - cutoff; + new_timeo = min(new_timeo, t); break; } release_openowner(oo); } - if (clientid_val < NFSD_LAUNDROMAT_MINTIMEOUT) - clientid_val = NFSD_LAUNDROMAT_MINTIMEOUT; + new_timeo = max_t(time_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT); nfs4_unlock_state(); - return clientid_val; + return new_timeo; } static struct workqueue_struct *laundry_wq; From 931ee56c67573eb4e51c8a4e78598d965b8b059e Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Fri, 30 May 2014 09:09:27 -0400 Subject: [PATCH 088/100] nfsd4: use recall_lock for delegation hashing This fixes a bug in the handling of the fi_delegations list. nfs4_setlease does not hold the recall_lock when adding to it. The client_mutex is held, which prevents against concurrent list changes, but nfsd_break_deleg_cb does not hold while walking it. New delegations could theoretically creep onto the list while we're walking it there. Signed-off-by: Benny Halevy Signed-off-by: Jeff Layton Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 01a0e4384500..05ca64c4599d 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -433,12 +433,21 @@ static void unhash_stid(struct nfs4_stid *s) s->sc_type = 0; } +static void +hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp) +{ + lockdep_assert_held(&recall_lock); + + list_add(&dp->dl_perfile, &fp->fi_delegations); + list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations); +} + /* Called under the state lock. */ static void unhash_delegation(struct nfs4_delegation *dp) { - list_del_init(&dp->dl_perclnt); spin_lock(&recall_lock); + list_del_init(&dp->dl_perclnt); list_del_init(&dp->dl_perfile); list_del_init(&dp->dl_recall_lru); spin_unlock(&recall_lock); @@ -3065,11 +3074,12 @@ static int nfs4_setlease(struct nfs4_delegation *dp) status = vfs_setlease(fl->fl_file, fl->fl_type, &fl); if (status) goto out_free; - list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations); fp->fi_lease = fl; fp->fi_deleg_file = get_file(fl->fl_file); atomic_set(&fp->fi_delegees, 1); - list_add(&dp->dl_perfile, &fp->fi_delegations); + spin_lock(&recall_lock); + hash_delegation_locked(dp, fp); + spin_unlock(&recall_lock); return 0; out_free: locks_free_lock(fl); @@ -3090,9 +3100,8 @@ static int nfs4_set_delegation(struct nfs4_delegation *dp, struct nfs4_file *fp) spin_unlock(&recall_lock); return -EAGAIN; } - list_add(&dp->dl_perfile, &fp->fi_delegations); + hash_delegation_locked(dp, fp); spin_unlock(&recall_lock); - list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations); return 0; } @@ -4903,6 +4912,7 @@ static u64 nfsd_find_all_delegations(struct nfs4_client *clp, u64 max, struct nfs4_delegation *dp, *next; u64 count = 0; + lockdep_assert_held(&recall_lock); list_for_each_entry_safe(dp, next, &clp->cl_delegations, dl_perclnt) { if (victims) list_move(&dp->dl_recall_lru, victims); From ba5378b66f5eb20e464796c5b088c0961fb5f618 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 30 May 2014 09:09:28 -0400 Subject: [PATCH 089/100] nfsd: fix setting of NFS4_OO_CONFIRMED in nfsd4_open In the NFS4_OPEN_CLAIM_PREVIOUS case, we should only mark it confirmed if the nfs4_check_open_reclaim check succeeds. In the NFS4_OPEN_CLAIM_DELEG_PREV_FH and NFS4_OPEN_CLAIM_DELEGATE_PREV cases, I see no point in declaring the openowner confirmed when the operation is going to fail anyway, and doing so might allow the client to game things such that it wouldn't need to confirm a subsequent open with the same owner. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 16e71d033ea5..8fc901acf1d3 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -430,12 +430,12 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; break; case NFS4_OPEN_CLAIM_PREVIOUS: - open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; status = nfs4_check_open_reclaim(&open->op_clientid, cstate->minorversion, nn); if (status) goto out; + open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; case NFS4_OPEN_CLAIM_FH: case NFS4_OPEN_CLAIM_DELEG_CUR_FH: status = do_open_fhandle(rqstp, cstate, open); @@ -445,7 +445,6 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, break; case NFS4_OPEN_CLAIM_DELEG_PREV_FH: case NFS4_OPEN_CLAIM_DELEGATE_PREV: - open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; dprintk("NFSD: unsupported OPEN claim type %d\n", open->op_claim_type); status = nfserr_notsupp; From 7025005d5e7715efa24c115c45f4a6b4f1a545de Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 30 May 2014 09:09:29 -0400 Subject: [PATCH 090/100] nfsd: remove unneeded zeroing of fields in nfsd4_proc_compound The memset of resp in svc_process_common should ensure that these are already zeroed by the time they get here. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 8fc901acf1d3..f053c5a9ebf1 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1290,11 +1290,8 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, xdr_reserve_space(&resp->xdr, 8 + args->taglen); resp->taglen = args->taglen; resp->tag = args->tag; - resp->opcnt = 0; resp->rqstp = rqstp; cstate->minorversion = args->minorversion; - cstate->replay_owner = NULL; - cstate->session = NULL; fh_init(current_fh, NFS4_FHSIZE); fh_init(save_fh, NFS4_FHSIZE); /* From cdc975050077d707d5315e51c2b05a763d4895e5 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Fri, 30 May 2014 09:09:30 -0400 Subject: [PATCH 091/100] nfsd4: rename recall_lock to state_lock ...as the name is a bit more descriptive and we've started using it for other purposes. Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 63 +++++++++++++++++++++++---------------------- 1 file changed, 32 insertions(+), 31 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 05ca64c4599d..63e764506bd2 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -81,7 +81,7 @@ static DEFINE_MUTEX(client_mutex); * effort to decrease the scope of the client_mutex, this spinlock may * eventually cover more: */ -static DEFINE_SPINLOCK(recall_lock); +static DEFINE_SPINLOCK(state_lock); static struct kmem_cache *openowner_slab; static struct kmem_cache *lockowner_slab; @@ -235,9 +235,9 @@ static void nfsd4_free_file(struct nfs4_file *f) static inline void put_nfs4_file(struct nfs4_file *fi) { - if (atomic_dec_and_lock(&fi->fi_ref, &recall_lock)) { + if (atomic_dec_and_lock(&fi->fi_ref, &state_lock)) { hlist_del(&fi->fi_hash); - spin_unlock(&recall_lock); + spin_unlock(&state_lock); iput(fi->fi_inode); nfsd4_free_file(fi); } @@ -436,7 +436,7 @@ static void unhash_stid(struct nfs4_stid *s) static void hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp) { - lockdep_assert_held(&recall_lock); + lockdep_assert_held(&state_lock); list_add(&dp->dl_perfile, &fp->fi_delegations); list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations); @@ -446,11 +446,11 @@ hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp) static void unhash_delegation(struct nfs4_delegation *dp) { - spin_lock(&recall_lock); + spin_lock(&state_lock); list_del_init(&dp->dl_perclnt); list_del_init(&dp->dl_perfile); list_del_init(&dp->dl_recall_lru); - spin_unlock(&recall_lock); + spin_unlock(&state_lock); if (dp->dl_file) { nfs4_put_deleg_lease(dp->dl_file); put_nfs4_file(dp->dl_file); @@ -1161,13 +1161,13 @@ destroy_client(struct nfs4_client *clp) struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); INIT_LIST_HEAD(&reaplist); - spin_lock(&recall_lock); + spin_lock(&state_lock); while (!list_empty(&clp->cl_delegations)) { dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt); list_del_init(&dp->dl_perclnt); list_move(&dp->dl_recall_lru, &reaplist); } - spin_unlock(&recall_lock); + spin_unlock(&state_lock); while (!list_empty(&reaplist)) { dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); destroy_delegation(dp); @@ -2539,9 +2539,9 @@ static void nfsd4_init_file(struct nfs4_file *fp, struct inode *ino) fp->fi_lease = NULL; memset(fp->fi_fds, 0, sizeof(fp->fi_fds)); memset(fp->fi_access, 0, sizeof(fp->fi_access)); - spin_lock(&recall_lock); + spin_lock(&state_lock); hlist_add_head(&fp->fi_hash, &file_hashtbl[hashval]); - spin_unlock(&recall_lock); + spin_unlock(&state_lock); } void @@ -2712,15 +2712,15 @@ find_file(struct inode *ino) unsigned int hashval = file_hashval(ino); struct nfs4_file *fp; - spin_lock(&recall_lock); + spin_lock(&state_lock); hlist_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) { if (fp->fi_inode == ino) { get_nfs4_file(fp); - spin_unlock(&recall_lock); + spin_unlock(&state_lock); return fp; } } - spin_unlock(&recall_lock); + spin_unlock(&state_lock); return NULL; } @@ -2757,6 +2757,7 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp) struct nfs4_client *clp = dp->dl_stid.sc_client; struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + lockdep_assert_held(&state_lock); /* We're assuming the state code never drops its reference * without first removing the lease. Since we're in this lease * callback (and since the lease code is serialized by the kernel @@ -2793,11 +2794,11 @@ static void nfsd_break_deleg_cb(struct file_lock *fl) */ fl->fl_break_time = 0; - spin_lock(&recall_lock); + spin_lock(&state_lock); fp->fi_had_conflict = true; list_for_each_entry(dp, &fp->fi_delegations, dl_perfile) nfsd_break_one_deleg(dp); - spin_unlock(&recall_lock); + spin_unlock(&state_lock); } static @@ -3077,9 +3078,9 @@ static int nfs4_setlease(struct nfs4_delegation *dp) fp->fi_lease = fl; fp->fi_deleg_file = get_file(fl->fl_file); atomic_set(&fp->fi_delegees, 1); - spin_lock(&recall_lock); + spin_lock(&state_lock); hash_delegation_locked(dp, fp); - spin_unlock(&recall_lock); + spin_unlock(&state_lock); return 0; out_free: locks_free_lock(fl); @@ -3094,14 +3095,14 @@ static int nfs4_set_delegation(struct nfs4_delegation *dp, struct nfs4_file *fp) dp->dl_file = fp; if (!fp->fi_lease) return nfs4_setlease(dp); - spin_lock(&recall_lock); + spin_lock(&state_lock); atomic_inc(&fp->fi_delegees); if (fp->fi_had_conflict) { - spin_unlock(&recall_lock); + spin_unlock(&state_lock); return -EAGAIN; } hash_delegation_locked(dp, fp); - spin_unlock(&recall_lock); + spin_unlock(&state_lock); return 0; } @@ -3435,7 +3436,7 @@ nfs4_laundromat(struct nfsd_net *nn) clp->cl_clientid.cl_id); expire_client(clp); } - spin_lock(&recall_lock); + spin_lock(&state_lock); list_for_each_safe(pos, next, &nn->del_recall_lru) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); if (net_generic(dp->dl_stid.sc_client->net, nfsd_net_id) != nn) @@ -3447,7 +3448,7 @@ nfs4_laundromat(struct nfsd_net *nn) } list_move(&dp->dl_recall_lru, &reaplist); } - spin_unlock(&recall_lock); + spin_unlock(&state_lock); list_for_each_safe(pos, next, &reaplist) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); revoke_delegation(dp); @@ -4912,7 +4913,7 @@ static u64 nfsd_find_all_delegations(struct nfs4_client *clp, u64 max, struct nfs4_delegation *dp, *next; u64 count = 0; - lockdep_assert_held(&recall_lock); + lockdep_assert_held(&state_lock); list_for_each_entry_safe(dp, next, &clp->cl_delegations, dl_perclnt) { if (victims) list_move(&dp->dl_recall_lru, victims); @@ -4928,9 +4929,9 @@ u64 nfsd_forget_client_delegations(struct nfs4_client *clp, u64 max) LIST_HEAD(victims); u64 count; - spin_lock(&recall_lock); + spin_lock(&state_lock); count = nfsd_find_all_delegations(clp, max, &victims); - spin_unlock(&recall_lock); + spin_unlock(&state_lock); list_for_each_entry_safe(dp, next, &victims, dl_recall_lru) revoke_delegation(dp); @@ -4944,11 +4945,11 @@ u64 nfsd_recall_client_delegations(struct nfs4_client *clp, u64 max) LIST_HEAD(victims); u64 count; - spin_lock(&recall_lock); + spin_lock(&state_lock); count = nfsd_find_all_delegations(clp, max, &victims); list_for_each_entry_safe(dp, next, &victims, dl_recall_lru) nfsd_break_one_deleg(dp); - spin_unlock(&recall_lock); + spin_unlock(&state_lock); return count; } @@ -4957,9 +4958,9 @@ u64 nfsd_print_client_delegations(struct nfs4_client *clp, u64 max) { u64 count = 0; - spin_lock(&recall_lock); + spin_lock(&state_lock); count = nfsd_find_all_delegations(clp, max, NULL); - spin_unlock(&recall_lock); + spin_unlock(&state_lock); nfsd_print_count(clp, count, "delegations"); return count; @@ -5170,12 +5171,12 @@ nfs4_state_shutdown_net(struct net *net) nfs4_lock_state(); INIT_LIST_HEAD(&reaplist); - spin_lock(&recall_lock); + spin_lock(&state_lock); list_for_each_safe(pos, next, &nn->del_recall_lru) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); list_move(&dp->dl_recall_lru, &reaplist); } - spin_unlock(&recall_lock); + spin_unlock(&state_lock); list_for_each_safe(pos, next, &reaplist) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); destroy_delegation(dp); From 3fb87d13ce29637e0ae005fa66a4f917cce6fede Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Fri, 30 May 2014 09:09:31 -0400 Subject: [PATCH 092/100] nfsd4: hash deleg stateid only on successful nfs4_set_delegation We don't want the stateid to be found in the hash table before the delegation is granted. Currently this is protected by the client_mutex, but we want to break that up and this is a necessary step toward that goal. Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 63e764506bd2..e5197d947b9e 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -375,7 +375,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct sv dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab)); if (dp == NULL) return dp; - dp->dl_stid.sc_type = NFS4_DELEG_STID; /* * delegation seqid's are never incremented. The 4.1 special * meaning of seqid 0 isn't meaningful, really, but let's avoid @@ -438,6 +437,7 @@ hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp) { lockdep_assert_held(&state_lock); + dp->dl_stid.sc_type = NFS4_DELEG_STID; list_add(&dp->dl_perfile, &fp->fi_delegations); list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations); } From 05638dc73af2586517468b1159d4b76e90607359 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 2 Jun 2014 12:05:47 -0400 Subject: [PATCH 093/100] nfsd4: simplify server xdr->next_page use The rpc code makes available to the NFS server an array of pages to encod into. The server represents its reply as an xdr buf, with the head pointing into the first page in that array, the pages ** array starting just after that, and the tail (if any) sharing any leftover space in the page used by the head. While encoding, we use xdr_stream->page_ptr to keep track of which page we're currently using. Currently we set xdr_stream->page_ptr to buf->pages, which makes the head a weird exception to the rule that page_ptr always points to the page we're currently encoding into. So, instead set it to buf->pages - 1 (the page actually containing the head), and remove the need for a little unintuitive logic in xdr_get_next_encode_buffer() and xdr_truncate_encode. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 2 +- net/sunrpc/xdr.c | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index f053c5a9ebf1..6851b003f2a4 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1264,7 +1264,7 @@ static void svcxdr_init_encode(struct svc_rqst *rqstp, /* Tail and page_len should be zero at this point: */ buf->len = buf->head[0].iov_len; xdr->scratch.iov_len = 0; - xdr->page_ptr = buf->pages; + xdr->page_ptr = buf->pages - 1; buf->buflen = PAGE_SIZE * (1 + rqstp->rq_page_end - buf->pages) - rqstp->rq_auth_slack; } diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 39928444c7fb..23fb4e75e245 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -523,10 +523,9 @@ __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr, size_t nbytes) frag2bytes = nbytes - frag1bytes; if (xdr->iov) xdr->iov->iov_len += frag1bytes; - else { + else xdr->buf->page_len += frag1bytes; - xdr->page_ptr++; - } + xdr->page_ptr++; xdr->iov = NULL; /* * If the last encode didn't end exactly on a page boundary, the @@ -638,8 +637,10 @@ void xdr_truncate_encode(struct xdr_stream *xdr, size_t len) /* xdr->iov should already be NULL */ return; } - if (fraglen) + if (fraglen) { xdr->end = head->iov_base + head->iov_len; + xdr->page_ptr--; + } /* (otherwise assume xdr->end is already set) */ head->iov_len = len; buf->len = len; From 06553991e7757c668efb3bce9dcc740f31aead60 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 2 Jun 2014 12:32:51 -0400 Subject: [PATCH 094/100] nfsd4: kill READ32 While we're here, let's kill off a couple of the read-side macros. Leaving the more complicated ones alone for now. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 255 +++++++++++++++++++++++----------------------- 1 file changed, 127 insertions(+), 128 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index a7268b4a4954..a23fa002e0d0 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -98,7 +98,6 @@ xdr_error: \ status = nfserr_bad_xdr; \ goto out -#define READ32(x) (x) = ntohl(*p++) #define READ64(x) do { \ (x) = (u64)ntohl(*p++) << 32; \ (x) |= ntohl(*p++); \ @@ -248,17 +247,17 @@ nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval) bmval[2] = 0; READ_BUF(4); - READ32(bmlen); + bmlen = be32_to_cpup(p++); if (bmlen > 1000) goto xdr_error; READ_BUF(bmlen << 2); if (bmlen > 0) - READ32(bmval[0]); + bmval[0] = be32_to_cpup(p++); if (bmlen > 1) - READ32(bmval[1]); + bmval[1] = be32_to_cpup(p++); if (bmlen > 2) - READ32(bmval[2]); + bmval[2] = be32_to_cpup(p++); DECODE_TAIL; } @@ -278,7 +277,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, return status; READ_BUF(4); - READ32(expected_len); + expected_len = be32_to_cpup(p++); if (bmval[0] & FATTR4_WORD0_SIZE) { READ_BUF(8); @@ -291,7 +290,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct nfs4_ace *ace; READ_BUF(4); len += 4; - READ32(nace); + nace = be32_to_cpup(p++); if (nace > NFS4_ACL_MAX) return nfserr_fbig; @@ -305,10 +304,10 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, (*acl)->naces = nace; for (ace = (*acl)->aces; ace < (*acl)->aces + nace; ace++) { READ_BUF(16); len += 16; - READ32(ace->type); - READ32(ace->flag); - READ32(ace->access_mask); - READ32(dummy32); + ace->type = be32_to_cpup(p++); + ace->flag = be32_to_cpup(p++); + ace->access_mask = be32_to_cpup(p++); + dummy32 = be32_to_cpup(p++); READ_BUF(dummy32); len += XDR_QUADLEN(dummy32) << 2; READMEM(buf, dummy32); @@ -330,14 +329,14 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, if (bmval[1] & FATTR4_WORD1_MODE) { READ_BUF(4); len += 4; - READ32(iattr->ia_mode); + iattr->ia_mode = be32_to_cpup(p++); iattr->ia_mode &= (S_IFMT | S_IALLUGO); iattr->ia_valid |= ATTR_MODE; } if (bmval[1] & FATTR4_WORD1_OWNER) { READ_BUF(4); len += 4; - READ32(dummy32); + dummy32 = be32_to_cpup(p++); READ_BUF(dummy32); len += (XDR_QUADLEN(dummy32) << 2); READMEM(buf, dummy32); @@ -348,7 +347,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, if (bmval[1] & FATTR4_WORD1_OWNER_GROUP) { READ_BUF(4); len += 4; - READ32(dummy32); + dummy32 = be32_to_cpup(p++); READ_BUF(dummy32); len += (XDR_QUADLEN(dummy32) << 2); READMEM(buf, dummy32); @@ -359,7 +358,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, if (bmval[1] & FATTR4_WORD1_TIME_ACCESS_SET) { READ_BUF(4); len += 4; - READ32(dummy32); + dummy32 = be32_to_cpup(p++); switch (dummy32) { case NFS4_SET_TO_CLIENT_TIME: /* We require the high 32 bits of 'seconds' to be 0, and we ignore @@ -367,7 +366,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, READ_BUF(12); len += 12; READ64(iattr->ia_atime.tv_sec); - READ32(iattr->ia_atime.tv_nsec); + iattr->ia_atime.tv_nsec = be32_to_cpup(p++); if (iattr->ia_atime.tv_nsec >= (u32)1000000000) return nfserr_inval; iattr->ia_valid |= (ATTR_ATIME | ATTR_ATIME_SET); @@ -382,7 +381,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, if (bmval[1] & FATTR4_WORD1_TIME_MODIFY_SET) { READ_BUF(4); len += 4; - READ32(dummy32); + dummy32 = be32_to_cpup(p++); switch (dummy32) { case NFS4_SET_TO_CLIENT_TIME: /* We require the high 32 bits of 'seconds' to be 0, and we ignore @@ -390,7 +389,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, READ_BUF(12); len += 12; READ64(iattr->ia_mtime.tv_sec); - READ32(iattr->ia_mtime.tv_nsec); + iattr->ia_mtime.tv_nsec = be32_to_cpup(p++); if (iattr->ia_mtime.tv_nsec >= (u32)1000000000) return nfserr_inval; iattr->ia_valid |= (ATTR_MTIME | ATTR_MTIME_SET); @@ -408,13 +407,13 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, if (bmval[2] & FATTR4_WORD2_SECURITY_LABEL) { READ_BUF(4); len += 4; - READ32(dummy32); /* lfs: we don't use it */ + dummy32 = be32_to_cpup(p++); /* lfs: we don't use it */ READ_BUF(4); len += 4; - READ32(dummy32); /* pi: we don't use it either */ + dummy32 = be32_to_cpup(p++); /* pi: we don't use it either */ READ_BUF(4); len += 4; - READ32(dummy32); + dummy32 = be32_to_cpup(p++); READ_BUF(dummy32); if (dummy32 > NFSD4_MAX_SEC_LABEL_LEN) return nfserr_badlabel; @@ -445,7 +444,7 @@ nfsd4_decode_stateid(struct nfsd4_compoundargs *argp, stateid_t *sid) DECODE_HEAD; READ_BUF(sizeof(stateid_t)); - READ32(sid->si_generation); + sid->si_generation = be32_to_cpup(p++); COPYMEM(&sid->si_opaque, sizeof(stateid_opaque_t)); DECODE_TAIL; @@ -457,7 +456,7 @@ nfsd4_decode_access(struct nfsd4_compoundargs *argp, struct nfsd4_access *access DECODE_HEAD; READ_BUF(4); - READ32(access->ac_req_access); + access->ac_req_access = be32_to_cpup(p++); DECODE_TAIL; } @@ -472,7 +471,7 @@ static __be32 nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_ /* callback_sec_params4 */ READ_BUF(4); - READ32(nr_secflavs); + nr_secflavs = be32_to_cpup(p++); if (nr_secflavs) cbs->flavor = (u32)(-1); else @@ -480,7 +479,7 @@ static __be32 nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_ cbs->flavor = 0; for (i = 0; i < nr_secflavs; ++i) { READ_BUF(4); - READ32(dummy); + dummy = be32_to_cpup(p++); switch (dummy) { case RPC_AUTH_NULL: /* Nothing to read */ @@ -490,21 +489,21 @@ static __be32 nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_ case RPC_AUTH_UNIX: READ_BUF(8); /* stamp */ - READ32(dummy); + dummy = be32_to_cpup(p++); /* machine name */ - READ32(dummy); + dummy = be32_to_cpup(p++); READ_BUF(dummy); SAVEMEM(machine_name, dummy); /* uid, gid */ READ_BUF(8); - READ32(uid); - READ32(gid); + uid = be32_to_cpup(p++); + gid = be32_to_cpup(p++); /* more gids */ READ_BUF(4); - READ32(dummy); + dummy = be32_to_cpup(p++); READ_BUF(dummy * 4); if (cbs->flavor == (u32)(-1)) { kuid_t kuid = make_kuid(&init_user_ns, uid); @@ -524,14 +523,14 @@ static __be32 nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_ "not supported!\n"); READ_BUF(8); /* gcbp_service */ - READ32(dummy); + dummy = be32_to_cpup(p++); /* gcbp_handle_from_server */ - READ32(dummy); + dummy = be32_to_cpup(p++); READ_BUF(dummy); p += XDR_QUADLEN(dummy); /* gcbp_handle_from_client */ READ_BUF(4); - READ32(dummy); + dummy = be32_to_cpup(p++); READ_BUF(dummy); break; default: @@ -547,7 +546,7 @@ static __be32 nfsd4_decode_backchannel_ctl(struct nfsd4_compoundargs *argp, stru DECODE_HEAD; READ_BUF(4); - READ32(bc->bc_cb_program); + bc->bc_cb_program = be32_to_cpup(p++); nfsd4_decode_cb_sec(argp, &bc->bc_cb_sec); DECODE_TAIL; @@ -559,7 +558,7 @@ static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, READ_BUF(NFS4_MAX_SESSIONID_LEN + 8); COPYMEM(bcts->sessionid.data, NFS4_MAX_SESSIONID_LEN); - READ32(bcts->dir); + bcts->dir = be32_to_cpup(p++); /* XXX: skipping ctsa_use_conn_in_rdma_mode. Perhaps Tom Tucker * could help us figure out we should be using it. */ DECODE_TAIL; @@ -571,7 +570,7 @@ nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close) DECODE_HEAD; READ_BUF(4); - READ32(close->cl_seqid); + close->cl_seqid = be32_to_cpup(p++); return nfsd4_decode_stateid(argp, &close->cl_stateid); DECODE_TAIL; @@ -585,7 +584,7 @@ nfsd4_decode_commit(struct nfsd4_compoundargs *argp, struct nfsd4_commit *commit READ_BUF(12); READ64(commit->co_offset); - READ32(commit->co_count); + commit->co_count = be32_to_cpup(p++); DECODE_TAIL; } @@ -596,19 +595,19 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create DECODE_HEAD; READ_BUF(4); - READ32(create->cr_type); + create->cr_type = be32_to_cpup(p++); switch (create->cr_type) { case NF4LNK: READ_BUF(4); - READ32(create->cr_linklen); + create->cr_linklen = be32_to_cpup(p++); READ_BUF(create->cr_linklen); SAVEMEM(create->cr_linkname, create->cr_linklen); break; case NF4BLK: case NF4CHR: READ_BUF(8); - READ32(create->cr_specdata1); - READ32(create->cr_specdata2); + create->cr_specdata1 = be32_to_cpup(p++); + create->cr_specdata2 = be32_to_cpup(p++); break; case NF4SOCK: case NF4FIFO: @@ -618,7 +617,7 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create } READ_BUF(4); - READ32(create->cr_namelen); + create->cr_namelen = be32_to_cpup(p++); READ_BUF(create->cr_namelen); SAVEMEM(create->cr_name, create->cr_namelen); if ((status = check_filename(create->cr_name, create->cr_namelen))) @@ -650,7 +649,7 @@ nfsd4_decode_link(struct nfsd4_compoundargs *argp, struct nfsd4_link *link) DECODE_HEAD; READ_BUF(4); - READ32(link->li_namelen); + link->li_namelen = be32_to_cpup(p++); READ_BUF(link->li_namelen); SAVEMEM(link->li_name, link->li_namelen); if ((status = check_filename(link->li_name, link->li_namelen))) @@ -668,24 +667,24 @@ nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock) * type, reclaim(boolean), offset, length, new_lock_owner(boolean) */ READ_BUF(28); - READ32(lock->lk_type); + lock->lk_type = be32_to_cpup(p++); if ((lock->lk_type < NFS4_READ_LT) || (lock->lk_type > NFS4_WRITEW_LT)) goto xdr_error; - READ32(lock->lk_reclaim); + lock->lk_reclaim = be32_to_cpup(p++); READ64(lock->lk_offset); READ64(lock->lk_length); - READ32(lock->lk_is_new); + lock->lk_is_new = be32_to_cpup(p++); if (lock->lk_is_new) { READ_BUF(4); - READ32(lock->lk_new_open_seqid); + lock->lk_new_open_seqid = be32_to_cpup(p++); status = nfsd4_decode_stateid(argp, &lock->lk_new_open_stateid); if (status) return status; READ_BUF(8 + sizeof(clientid_t)); - READ32(lock->lk_new_lock_seqid); + lock->lk_new_lock_seqid = be32_to_cpup(p++); COPYMEM(&lock->lk_new_clientid, sizeof(clientid_t)); - READ32(lock->lk_new_owner.len); + lock->lk_new_owner.len = be32_to_cpup(p++); READ_BUF(lock->lk_new_owner.len); READMEM(lock->lk_new_owner.data, lock->lk_new_owner.len); } else { @@ -693,7 +692,7 @@ nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock) if (status) return status; READ_BUF(4); - READ32(lock->lk_old_lock_seqid); + lock->lk_old_lock_seqid = be32_to_cpup(p++); } DECODE_TAIL; @@ -705,13 +704,13 @@ nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt) DECODE_HEAD; READ_BUF(32); - READ32(lockt->lt_type); + lockt->lt_type = be32_to_cpup(p++); if((lockt->lt_type < NFS4_READ_LT) || (lockt->lt_type > NFS4_WRITEW_LT)) goto xdr_error; READ64(lockt->lt_offset); READ64(lockt->lt_length); COPYMEM(&lockt->lt_clientid, 8); - READ32(lockt->lt_owner.len); + lockt->lt_owner.len = be32_to_cpup(p++); READ_BUF(lockt->lt_owner.len); READMEM(lockt->lt_owner.data, lockt->lt_owner.len); @@ -724,10 +723,10 @@ nfsd4_decode_locku(struct nfsd4_compoundargs *argp, struct nfsd4_locku *locku) DECODE_HEAD; READ_BUF(8); - READ32(locku->lu_type); + locku->lu_type = be32_to_cpup(p++); if ((locku->lu_type < NFS4_READ_LT) || (locku->lu_type > NFS4_WRITEW_LT)) goto xdr_error; - READ32(locku->lu_seqid); + locku->lu_seqid = be32_to_cpup(p++); status = nfsd4_decode_stateid(argp, &locku->lu_stateid); if (status) return status; @@ -744,7 +743,7 @@ nfsd4_decode_lookup(struct nfsd4_compoundargs *argp, struct nfsd4_lookup *lookup DECODE_HEAD; READ_BUF(4); - READ32(lookup->lo_len); + lookup->lo_len = be32_to_cpup(p++); READ_BUF(lookup->lo_len); SAVEMEM(lookup->lo_name, lookup->lo_len); if ((status = check_filename(lookup->lo_name, lookup->lo_len))) @@ -759,7 +758,7 @@ static __be32 nfsd4_decode_share_access(struct nfsd4_compoundargs *argp, u32 *sh u32 w; READ_BUF(4); - READ32(w); + w = be32_to_cpup(p++); *share_access = w & NFS4_SHARE_ACCESS_MASK; *deleg_want = w & NFS4_SHARE_WANT_MASK; if (deleg_when) @@ -811,7 +810,7 @@ static __be32 nfsd4_decode_share_deny(struct nfsd4_compoundargs *argp, u32 *x) __be32 *p; READ_BUF(4); - READ32(*x); + *x = be32_to_cpup(p++); /* Note: unlinke access bits, deny bits may be zero. */ if (*x & ~NFS4_SHARE_DENY_BOTH) return nfserr_bad_xdr; @@ -825,7 +824,7 @@ static __be32 nfsd4_decode_opaque(struct nfsd4_compoundargs *argp, struct xdr_ne __be32 *p; READ_BUF(4); - READ32(o->len); + o->len = be32_to_cpup(p++); if (o->len == 0 || o->len > NFS4_OPAQUE_LIMIT) return nfserr_bad_xdr; @@ -850,7 +849,7 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) open->op_xdr_error = 0; /* seqid, share_access, share_deny, clientid, ownerlen */ READ_BUF(4); - READ32(open->op_seqid); + open->op_seqid = be32_to_cpup(p++); /* decode, yet ignore deleg_when until supported */ status = nfsd4_decode_share_access(argp, &open->op_share_access, &open->op_deleg_want, &dummy); @@ -865,13 +864,13 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) if (status) goto xdr_error; READ_BUF(4); - READ32(open->op_create); + open->op_create = be32_to_cpup(p++); switch (open->op_create) { case NFS4_OPEN_NOCREATE: break; case NFS4_OPEN_CREATE: READ_BUF(4); - READ32(open->op_createmode); + open->op_createmode = be32_to_cpup(p++); switch (open->op_createmode) { case NFS4_CREATE_UNCHECKED: case NFS4_CREATE_GUARDED: @@ -904,12 +903,12 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) /* open_claim */ READ_BUF(4); - READ32(open->op_claim_type); + open->op_claim_type = be32_to_cpup(p++); switch (open->op_claim_type) { case NFS4_OPEN_CLAIM_NULL: case NFS4_OPEN_CLAIM_DELEGATE_PREV: READ_BUF(4); - READ32(open->op_fname.len); + open->op_fname.len = be32_to_cpup(p++); READ_BUF(open->op_fname.len); SAVEMEM(open->op_fname.data, open->op_fname.len); if ((status = check_filename(open->op_fname.data, open->op_fname.len))) @@ -917,14 +916,14 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) break; case NFS4_OPEN_CLAIM_PREVIOUS: READ_BUF(4); - READ32(open->op_delegate_type); + open->op_delegate_type = be32_to_cpup(p++); break; case NFS4_OPEN_CLAIM_DELEGATE_CUR: status = nfsd4_decode_stateid(argp, &open->op_delegate_stateid); if (status) return status; READ_BUF(4); - READ32(open->op_fname.len); + open->op_fname.len = be32_to_cpup(p++); READ_BUF(open->op_fname.len); SAVEMEM(open->op_fname.data, open->op_fname.len); if ((status = check_filename(open->op_fname.data, open->op_fname.len))) @@ -962,7 +961,7 @@ nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_open_con if (status) return status; READ_BUF(4); - READ32(open_conf->oc_seqid); + open_conf->oc_seqid = be32_to_cpup(p++); DECODE_TAIL; } @@ -976,7 +975,7 @@ nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, struct nfsd4_open_d if (status) return status; READ_BUF(4); - READ32(open_down->od_seqid); + open_down->od_seqid = be32_to_cpup(p++); status = nfsd4_decode_share_access(argp, &open_down->od_share_access, &open_down->od_deleg_want, NULL); if (status) @@ -993,7 +992,7 @@ nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, struct nfsd4_putfh *putfh) DECODE_HEAD; READ_BUF(4); - READ32(putfh->pf_fhlen); + putfh->pf_fhlen = be32_to_cpup(p++); if (putfh->pf_fhlen > NFS4_FHSIZE) goto xdr_error; READ_BUF(putfh->pf_fhlen); @@ -1020,7 +1019,7 @@ nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read) return status; READ_BUF(12); READ64(read->rd_offset); - READ32(read->rd_length); + read->rd_length = be32_to_cpup(p++); DECODE_TAIL; } @@ -1033,8 +1032,8 @@ nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, struct nfsd4_readdir *read READ_BUF(24); READ64(readdir->rd_cookie); COPYMEM(readdir->rd_verf.data, sizeof(readdir->rd_verf.data)); - READ32(readdir->rd_dircount); - READ32(readdir->rd_maxcount); + readdir->rd_dircount = be32_to_cpup(p++); + readdir->rd_maxcount = be32_to_cpup(p++); if ((status = nfsd4_decode_bitmap(argp, readdir->rd_bmval))) goto out; @@ -1047,7 +1046,7 @@ nfsd4_decode_remove(struct nfsd4_compoundargs *argp, struct nfsd4_remove *remove DECODE_HEAD; READ_BUF(4); - READ32(remove->rm_namelen); + remove->rm_namelen = be32_to_cpup(p++); READ_BUF(remove->rm_namelen); SAVEMEM(remove->rm_name, remove->rm_namelen); if ((status = check_filename(remove->rm_name, remove->rm_namelen))) @@ -1062,10 +1061,10 @@ nfsd4_decode_rename(struct nfsd4_compoundargs *argp, struct nfsd4_rename *rename DECODE_HEAD; READ_BUF(4); - READ32(rename->rn_snamelen); + rename->rn_snamelen = be32_to_cpup(p++); READ_BUF(rename->rn_snamelen + 4); SAVEMEM(rename->rn_sname, rename->rn_snamelen); - READ32(rename->rn_tnamelen); + rename->rn_tnamelen = be32_to_cpup(p++); READ_BUF(rename->rn_tnamelen); SAVEMEM(rename->rn_tname, rename->rn_tnamelen); if ((status = check_filename(rename->rn_sname, rename->rn_snamelen))) @@ -1097,7 +1096,7 @@ nfsd4_decode_secinfo(struct nfsd4_compoundargs *argp, DECODE_HEAD; READ_BUF(4); - READ32(secinfo->si_namelen); + secinfo->si_namelen = be32_to_cpup(p++); READ_BUF(secinfo->si_namelen); SAVEMEM(secinfo->si_name, secinfo->si_namelen); status = check_filename(secinfo->si_name, secinfo->si_namelen); @@ -1113,7 +1112,7 @@ nfsd4_decode_secinfo_no_name(struct nfsd4_compoundargs *argp, DECODE_HEAD; READ_BUF(4); - READ32(sin->sin_style); + sin->sin_style = be32_to_cpup(p++); DECODE_TAIL; } @@ -1144,16 +1143,16 @@ nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclient if (status) return nfserr_bad_xdr; READ_BUF(8); - READ32(setclientid->se_callback_prog); - READ32(setclientid->se_callback_netid_len); + setclientid->se_callback_prog = be32_to_cpup(p++); + setclientid->se_callback_netid_len = be32_to_cpup(p++); READ_BUF(setclientid->se_callback_netid_len + 4); SAVEMEM(setclientid->se_callback_netid_val, setclientid->se_callback_netid_len); - READ32(setclientid->se_callback_addr_len); + setclientid->se_callback_addr_len = be32_to_cpup(p++); READ_BUF(setclientid->se_callback_addr_len + 4); SAVEMEM(setclientid->se_callback_addr_val, setclientid->se_callback_addr_len); - READ32(setclientid->se_callback_ident); + setclientid->se_callback_ident = be32_to_cpup(p++); DECODE_TAIL; } @@ -1186,7 +1185,7 @@ nfsd4_decode_verify(struct nfsd4_compoundargs *argp, struct nfsd4_verify *verify * nfsd4_proc_verify */ READ_BUF(4); - READ32(verify->ve_attrlen); + verify->ve_attrlen = be32_to_cpup(p++); READ_BUF(verify->ve_attrlen); SAVEMEM(verify->ve_attrval, verify->ve_attrlen); @@ -1205,10 +1204,10 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) return status; READ_BUF(16); READ64(write->wr_offset); - READ32(write->wr_stable_how); + write->wr_stable_how = be32_to_cpup(p++); if (write->wr_stable_how > 2) goto xdr_error; - READ32(write->wr_buflen); + write->wr_buflen = be32_to_cpup(p++); /* Sorry .. no magic macros for this.. * * READ_BUF(write->wr_buflen); @@ -1254,7 +1253,7 @@ nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_rel READ_BUF(12); COPYMEM(&rlockowner->rl_clientid, sizeof(clientid_t)); - READ32(rlockowner->rl_owner.len); + rlockowner->rl_owner.len = be32_to_cpup(p++); READ_BUF(rlockowner->rl_owner.len); READMEM(rlockowner->rl_owner.data, rlockowner->rl_owner.len); @@ -1278,63 +1277,63 @@ nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, return nfserr_bad_xdr; READ_BUF(4); - READ32(exid->flags); + exid->flags = be32_to_cpup(p++); /* Ignore state_protect4_a */ READ_BUF(4); - READ32(exid->spa_how); + exid->spa_how = be32_to_cpup(p++); switch (exid->spa_how) { case SP4_NONE: break; case SP4_MACH_CRED: /* spo_must_enforce */ READ_BUF(4); - READ32(dummy); + dummy = be32_to_cpup(p++); READ_BUF(dummy * 4); p += dummy; /* spo_must_allow */ READ_BUF(4); - READ32(dummy); + dummy = be32_to_cpup(p++); READ_BUF(dummy * 4); p += dummy; break; case SP4_SSV: /* ssp_ops */ READ_BUF(4); - READ32(dummy); + dummy = be32_to_cpup(p++); READ_BUF(dummy * 4); p += dummy; READ_BUF(4); - READ32(dummy); + dummy = be32_to_cpup(p++); READ_BUF(dummy * 4); p += dummy; /* ssp_hash_algs<> */ READ_BUF(4); - READ32(tmp); + tmp = be32_to_cpup(p++); while (tmp--) { READ_BUF(4); - READ32(dummy); + dummy = be32_to_cpup(p++); READ_BUF(dummy); p += XDR_QUADLEN(dummy); } /* ssp_encr_algs<> */ READ_BUF(4); - READ32(tmp); + tmp = be32_to_cpup(p++); while (tmp--) { READ_BUF(4); - READ32(dummy); + dummy = be32_to_cpup(p++); READ_BUF(dummy); p += XDR_QUADLEN(dummy); } /* ssp_window and ssp_num_gss_handles */ READ_BUF(8); - READ32(dummy); - READ32(dummy); + dummy = be32_to_cpup(p++); + dummy = be32_to_cpup(p++); break; default: goto xdr_error; @@ -1342,7 +1341,7 @@ nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, /* Ignore Implementation ID */ READ_BUF(4); /* nfs_impl_id4 array length */ - READ32(dummy); + dummy = be32_to_cpup(p++); if (dummy > 1) goto xdr_error; @@ -1350,13 +1349,13 @@ nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, if (dummy == 1) { /* nii_domain */ READ_BUF(4); - READ32(dummy); + dummy = be32_to_cpup(p++); READ_BUF(dummy); p += XDR_QUADLEN(dummy); /* nii_name */ READ_BUF(4); - READ32(dummy); + dummy = be32_to_cpup(p++); READ_BUF(dummy); p += XDR_QUADLEN(dummy); @@ -1376,21 +1375,21 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp, READ_BUF(16); COPYMEM(&sess->clientid, 8); - READ32(sess->seqid); - READ32(sess->flags); + sess->seqid = be32_to_cpup(p++); + sess->flags = be32_to_cpup(p++); /* Fore channel attrs */ READ_BUF(28); - READ32(dummy); /* headerpadsz is always 0 */ - READ32(sess->fore_channel.maxreq_sz); - READ32(sess->fore_channel.maxresp_sz); - READ32(sess->fore_channel.maxresp_cached); - READ32(sess->fore_channel.maxops); - READ32(sess->fore_channel.maxreqs); - READ32(sess->fore_channel.nr_rdma_attrs); + dummy = be32_to_cpup(p++); /* headerpadsz is always 0 */ + sess->fore_channel.maxreq_sz = be32_to_cpup(p++); + sess->fore_channel.maxresp_sz = be32_to_cpup(p++); + sess->fore_channel.maxresp_cached = be32_to_cpup(p++); + sess->fore_channel.maxops = be32_to_cpup(p++); + sess->fore_channel.maxreqs = be32_to_cpup(p++); + sess->fore_channel.nr_rdma_attrs = be32_to_cpup(p++); if (sess->fore_channel.nr_rdma_attrs == 1) { READ_BUF(4); - READ32(sess->fore_channel.rdma_attrs); + sess->fore_channel.rdma_attrs = be32_to_cpup(p++); } else if (sess->fore_channel.nr_rdma_attrs > 1) { dprintk("Too many fore channel attr bitmaps!\n"); goto xdr_error; @@ -1398,23 +1397,23 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp, /* Back channel attrs */ READ_BUF(28); - READ32(dummy); /* headerpadsz is always 0 */ - READ32(sess->back_channel.maxreq_sz); - READ32(sess->back_channel.maxresp_sz); - READ32(sess->back_channel.maxresp_cached); - READ32(sess->back_channel.maxops); - READ32(sess->back_channel.maxreqs); - READ32(sess->back_channel.nr_rdma_attrs); + dummy = be32_to_cpup(p++); /* headerpadsz is always 0 */ + sess->back_channel.maxreq_sz = be32_to_cpup(p++); + sess->back_channel.maxresp_sz = be32_to_cpup(p++); + sess->back_channel.maxresp_cached = be32_to_cpup(p++); + sess->back_channel.maxops = be32_to_cpup(p++); + sess->back_channel.maxreqs = be32_to_cpup(p++); + sess->back_channel.nr_rdma_attrs = be32_to_cpup(p++); if (sess->back_channel.nr_rdma_attrs == 1) { READ_BUF(4); - READ32(sess->back_channel.rdma_attrs); + sess->back_channel.rdma_attrs = be32_to_cpup(p++); } else if (sess->back_channel.nr_rdma_attrs > 1) { dprintk("Too many back channel attr bitmaps!\n"); goto xdr_error; } READ_BUF(4); - READ32(sess->callback_prog); + sess->callback_prog = be32_to_cpup(p++); nfsd4_decode_cb_sec(argp, &sess->cb_sec); DECODE_TAIL; } @@ -1437,7 +1436,7 @@ nfsd4_decode_free_stateid(struct nfsd4_compoundargs *argp, DECODE_HEAD; READ_BUF(sizeof(stateid_t)); - READ32(free_stateid->fr_stateid.si_generation); + free_stateid->fr_stateid.si_generation = be32_to_cpup(p++); COPYMEM(&free_stateid->fr_stateid.si_opaque, sizeof(stateid_opaque_t)); DECODE_TAIL; @@ -1451,10 +1450,10 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp, READ_BUF(NFS4_MAX_SESSIONID_LEN + 16); COPYMEM(seq->sessionid.data, NFS4_MAX_SESSIONID_LEN); - READ32(seq->seqid); - READ32(seq->slotid); - READ32(seq->maxslots); - READ32(seq->cachethis); + seq->seqid = be32_to_cpup(p++); + seq->slotid = be32_to_cpup(p++); + seq->maxslots = be32_to_cpup(p++); + seq->cachethis = be32_to_cpup(p++); DECODE_TAIL; } @@ -1511,7 +1510,7 @@ static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, str DECODE_HEAD; READ_BUF(4); - READ32(rc->rca_one_fs); + rc->rca_one_fs = be32_to_cpup(p++); DECODE_TAIL; } @@ -1618,11 +1617,11 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) int i; READ_BUF(4); - READ32(argp->taglen); + argp->taglen = be32_to_cpup(p++); READ_BUF(argp->taglen + 8); SAVEMEM(argp->tag, argp->taglen); - READ32(argp->minorversion); - READ32(argp->opcnt); + argp->minorversion = be32_to_cpup(p++); + argp->opcnt = be32_to_cpup(p++); max_reply += 4 + (XDR_QUADLEN(argp->taglen) << 2); if (argp->taglen > NFSD4_MAX_TAGLEN) @@ -1647,7 +1646,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) op->replay = NULL; READ_BUF(4); - READ32(op->opnum); + op->opnum = be32_to_cpup(p++); if (nfsd4_opnum_in_range(argp, op)) op->status = nfsd4_dec_ops[op->opnum](argp, &op->u); From 542d1ab3c7ce53be7d7122a83d016304af4e6345 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 2 Jun 2014 12:45:31 -0400 Subject: [PATCH 095/100] nfsd4: kill READ64 Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index a23fa002e0d0..2d305a121f37 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -98,10 +98,6 @@ xdr_error: \ status = nfserr_bad_xdr; \ goto out -#define READ64(x) do { \ - (x) = (u64)ntohl(*p++) << 32; \ - (x) |= ntohl(*p++); \ -} while (0) #define READMEM(x,nbytes) do { \ x = (char *)p; \ p += XDR_QUADLEN(nbytes); \ @@ -269,6 +265,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, { int expected_len, len = 0; u32 dummy32; + u64 sec; char *buf; DECODE_HEAD; @@ -282,7 +279,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, if (bmval[0] & FATTR4_WORD0_SIZE) { READ_BUF(8); len += 8; - READ64(iattr->ia_size); + p = xdr_decode_hyper(p, &iattr->ia_size); iattr->ia_valid |= ATTR_SIZE; } if (bmval[0] & FATTR4_WORD0_ACL) { @@ -365,7 +362,8 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, all 32 bits of 'nseconds'. */ READ_BUF(12); len += 12; - READ64(iattr->ia_atime.tv_sec); + p = xdr_decode_hyper(p, &sec); + iattr->ia_atime.tv_sec = (time_t)sec; iattr->ia_atime.tv_nsec = be32_to_cpup(p++); if (iattr->ia_atime.tv_nsec >= (u32)1000000000) return nfserr_inval; @@ -388,7 +386,8 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, all 32 bits of 'nseconds'. */ READ_BUF(12); len += 12; - READ64(iattr->ia_mtime.tv_sec); + p = xdr_decode_hyper(p, &sec); + iattr->ia_mtime.tv_sec = sec; iattr->ia_mtime.tv_nsec = be32_to_cpup(p++); if (iattr->ia_mtime.tv_nsec >= (u32)1000000000) return nfserr_inval; @@ -583,7 +582,7 @@ nfsd4_decode_commit(struct nfsd4_compoundargs *argp, struct nfsd4_commit *commit DECODE_HEAD; READ_BUF(12); - READ64(commit->co_offset); + p = xdr_decode_hyper(p, &commit->co_offset); commit->co_count = be32_to_cpup(p++); DECODE_TAIL; @@ -671,8 +670,8 @@ nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock) if ((lock->lk_type < NFS4_READ_LT) || (lock->lk_type > NFS4_WRITEW_LT)) goto xdr_error; lock->lk_reclaim = be32_to_cpup(p++); - READ64(lock->lk_offset); - READ64(lock->lk_length); + p = xdr_decode_hyper(p, &lock->lk_offset); + p = xdr_decode_hyper(p, &lock->lk_length); lock->lk_is_new = be32_to_cpup(p++); if (lock->lk_is_new) { @@ -707,8 +706,8 @@ nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt) lockt->lt_type = be32_to_cpup(p++); if((lockt->lt_type < NFS4_READ_LT) || (lockt->lt_type > NFS4_WRITEW_LT)) goto xdr_error; - READ64(lockt->lt_offset); - READ64(lockt->lt_length); + p = xdr_decode_hyper(p, &lockt->lt_offset); + p = xdr_decode_hyper(p, &lockt->lt_length); COPYMEM(&lockt->lt_clientid, 8); lockt->lt_owner.len = be32_to_cpup(p++); READ_BUF(lockt->lt_owner.len); @@ -731,8 +730,8 @@ nfsd4_decode_locku(struct nfsd4_compoundargs *argp, struct nfsd4_locku *locku) if (status) return status; READ_BUF(16); - READ64(locku->lu_offset); - READ64(locku->lu_length); + p = xdr_decode_hyper(p, &locku->lu_offset); + p = xdr_decode_hyper(p, &locku->lu_length); DECODE_TAIL; } @@ -1018,7 +1017,7 @@ nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read) if (status) return status; READ_BUF(12); - READ64(read->rd_offset); + p = xdr_decode_hyper(p, &read->rd_offset); read->rd_length = be32_to_cpup(p++); DECODE_TAIL; @@ -1030,7 +1029,7 @@ nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, struct nfsd4_readdir *read DECODE_HEAD; READ_BUF(24); - READ64(readdir->rd_cookie); + p = xdr_decode_hyper(p, &readdir->rd_cookie); COPYMEM(readdir->rd_verf.data, sizeof(readdir->rd_verf.data)); readdir->rd_dircount = be32_to_cpup(p++); readdir->rd_maxcount = be32_to_cpup(p++); @@ -1203,7 +1202,7 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) if (status) return status; READ_BUF(16); - READ64(write->wr_offset); + p = xdr_decode_hyper(p, &write->wr_offset); write->wr_stable_how = be32_to_cpup(p++); if (write->wr_stable_how > 2) goto xdr_error; From 999e568354b8c797f344a2154761dd94cc84e4ac Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 3 Jun 2014 17:33:35 -0400 Subject: [PATCH 096/100] nfs4: remove unused CHANGE_SECURITY_LABEL This constant has the wrong value. And we don't use it. And it's been removed from the 4.2 spec anyway. Signed-off-by: J. Bruce Fields --- fs/nfs/nfs4proc.c | 2 +- include/linux/nfs4.h | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 397be39c6dc8..7f55fed8dc64 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2750,7 +2750,7 @@ static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync) #define FATTR4_WORD1_NFS40_MASK (2*FATTR4_WORD1_MOUNTED_ON_FILEID - 1UL) #define FATTR4_WORD2_NFS41_MASK (2*FATTR4_WORD2_SUPPATTR_EXCLCREAT - 1UL) -#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_CHANGE_SECURITY_LABEL - 1UL) +#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_SECURITY_LABEL - 1UL) static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) { diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 12c2cb947df5..a1e3064a8d99 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -399,8 +399,6 @@ enum lock_type4 { #define FATTR4_WORD2_LAYOUT_BLKSIZE (1UL << 1) #define FATTR4_WORD2_MDSTHRESHOLD (1UL << 4) #define FATTR4_WORD2_SECURITY_LABEL (1UL << 16) -#define FATTR4_WORD2_CHANGE_SECURITY_LABEL \ - (1UL << 17) /* MDS threshold bitmap bits */ #define THRESHOLD_RD (1UL << 0) From 1b19453d1c6abcfa7c312ba6c9f11a277568fc94 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 5 Jun 2014 09:45:00 -0400 Subject: [PATCH 097/100] nfsd: don't halt scanning the DRC LRU list when there's an RC_INPROG entry Currently, the DRC cache pruner will stop scanning the list when it hits an entry that is RC_INPROG. It's possible however for a call to take a *very* long time. In that case, we don't want it to block other entries from being pruned if they are expired or we need to trim the cache to get back under the limit. Fix the DRC cache pruner to just ignore RC_INPROG entries. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfscache.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index f8f060ffbf4f..6040da8830ff 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -224,13 +224,6 @@ hash_refile(struct svc_cacherep *rp) hlist_add_head(&rp->c_hash, cache_hash + hash_32(rp->c_xid, maskbits)); } -static inline bool -nfsd_cache_entry_expired(struct svc_cacherep *rp) -{ - return rp->c_state != RC_INPROG && - time_after(jiffies, rp->c_timestamp + RC_EXPIRE); -} - /* * Walk the LRU list and prune off entries that are older than RC_EXPIRE. * Also prune the oldest ones when the total exceeds the max number of entries. @@ -242,8 +235,14 @@ prune_cache_entries(void) long freed = 0; list_for_each_entry_safe(rp, tmp, &lru_head, c_lru) { - if (!nfsd_cache_entry_expired(rp) && - num_drc_entries <= max_drc_entries) + /* + * Don't free entries attached to calls that are still + * in-progress, but do keep scanning the list. + */ + if (rp->c_state == RC_INPROG) + continue; + if (num_drc_entries <= max_drc_entries && + time_before(jiffies, rp->c_timestamp + RC_EXPIRE)) break; nfsd_reply_cache_free_locked(rp); freed++; From 0bf4828983dff062cd502f27ab8644b32774e72e Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Wed, 28 May 2014 15:12:01 -0500 Subject: [PATCH 098/100] svcrdma: refactor marshalling logic This patch refactors the NFSRDMA server marshalling logic to remove the intermediary map structures. It also fixes an existing bug where the NFSRDMA server was not minding the device fast register page list length limitations. Signed-off-by: Tom Tucker Signed-off-by: Steve Wise --- include/linux/sunrpc/svc_rdma.h | 3 +- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 643 ++++++++++------------- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 230 +------- net/sunrpc/xprtrdma/svc_rdma_transport.c | 62 ++- 4 files changed, 332 insertions(+), 606 deletions(-) diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 0b8e3e6bdacf..5cf99a016368 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -115,14 +115,13 @@ struct svc_rdma_fastreg_mr { struct list_head frmr_list; }; struct svc_rdma_req_map { - struct svc_rdma_fastreg_mr *frmr; unsigned long count; union { struct kvec sge[RPCSVC_MAXPAGES]; struct svc_rdma_chunk_sge ch[RPCSVC_MAXPAGES]; + unsigned long lkey[RPCSVC_MAXPAGES]; }; }; -#define RDMACTXT_F_FAST_UNREG 1 #define RDMACTXT_F_LAST_CTXT 2 #define SVCRDMA_DEVCAP_FAST_REG 1 /* fast mr registration */ diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 8d904e4eef15..52d9f2ce20b0 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -1,4 +1,5 @@ /* + * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved. * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -69,7 +70,8 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp, /* Set up the XDR head */ rqstp->rq_arg.head[0].iov_base = page_address(page); - rqstp->rq_arg.head[0].iov_len = min(byte_count, ctxt->sge[0].length); + rqstp->rq_arg.head[0].iov_len = + min_t(size_t, byte_count, ctxt->sge[0].length); rqstp->rq_arg.len = byte_count; rqstp->rq_arg.buflen = byte_count; @@ -85,7 +87,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp, page = ctxt->pages[sge_no]; put_page(rqstp->rq_pages[sge_no]); rqstp->rq_pages[sge_no] = page; - bc -= min(bc, ctxt->sge[sge_no].length); + bc -= min_t(u32, bc, ctxt->sge[sge_no].length); rqstp->rq_arg.buflen += ctxt->sge[sge_no].length; sge_no++; } @@ -113,291 +115,265 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp, rqstp->rq_arg.tail[0].iov_len = 0; } -/* Encode a read-chunk-list as an array of IB SGE - * - * Assumptions: - * - chunk[0]->position points to pages[0] at an offset of 0 - * - pages[] is not physically or virtually contiguous and consists of - * PAGE_SIZE elements. - * - * Output: - * - sge array pointing into pages[] array. - * - chunk_sge array specifying sge index and count for each - * chunk in the read list - * - */ -static int map_read_chunks(struct svcxprt_rdma *xprt, - struct svc_rqst *rqstp, - struct svc_rdma_op_ctxt *head, - struct rpcrdma_msg *rmsgp, - struct svc_rdma_req_map *rpl_map, - struct svc_rdma_req_map *chl_map, - int ch_count, - int byte_count) +static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count) { - int sge_no; - int sge_bytes; - int page_off; - int page_no; - int ch_bytes; - int ch_no; - struct rpcrdma_read_chunk *ch; + if (rdma_node_get_transport(xprt->sc_cm_id->device->node_type) == + RDMA_TRANSPORT_IWARP) + return 1; + else + return min_t(int, sge_count, xprt->sc_max_sge); +} - sge_no = 0; - page_no = 0; - page_off = 0; - ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; - ch_no = 0; - ch_bytes = ntohl(ch->rc_target.rs_length); - head->arg.head[0] = rqstp->rq_arg.head[0]; - head->arg.tail[0] = rqstp->rq_arg.tail[0]; - head->arg.pages = &head->pages[head->count]; - head->hdr_count = head->count; /* save count of hdr pages */ - head->arg.page_base = 0; - head->arg.page_len = ch_bytes; - head->arg.len = rqstp->rq_arg.len + ch_bytes; - head->arg.buflen = rqstp->rq_arg.buflen + ch_bytes; - head->count++; - chl_map->ch[0].start = 0; - while (byte_count) { - rpl_map->sge[sge_no].iov_base = - page_address(rqstp->rq_arg.pages[page_no]) + page_off; - sge_bytes = min_t(int, PAGE_SIZE-page_off, ch_bytes); - rpl_map->sge[sge_no].iov_len = sge_bytes; - /* - * Don't bump head->count here because the same page - * may be used by multiple SGE. - */ - head->arg.pages[page_no] = rqstp->rq_arg.pages[page_no]; - rqstp->rq_respages = &rqstp->rq_arg.pages[page_no+1]; +typedef int (*rdma_reader_fn)(struct svcxprt_rdma *xprt, + struct svc_rqst *rqstp, + struct svc_rdma_op_ctxt *head, + int *page_no, + u32 *page_offset, + u32 rs_handle, + u32 rs_length, + u64 rs_offset, + int last); + +/* Issue an RDMA_READ using the local lkey to map the data sink */ +static int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt, + struct svc_rqst *rqstp, + struct svc_rdma_op_ctxt *head, + int *page_no, + u32 *page_offset, + u32 rs_handle, + u32 rs_length, + u64 rs_offset, + int last) +{ + struct ib_send_wr read_wr; + int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT; + struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt); + int ret, read, pno; + u32 pg_off = *page_offset; + u32 pg_no = *page_no; + + ctxt->direction = DMA_FROM_DEVICE; + ctxt->read_hdr = head; + pages_needed = + min_t(int, pages_needed, rdma_read_max_sge(xprt, pages_needed)); + read = min_t(int, pages_needed << PAGE_SHIFT, rs_length); + + for (pno = 0; pno < pages_needed; pno++) { + int len = min_t(int, rs_length, PAGE_SIZE - pg_off); + + head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no]; + head->arg.page_len += len; + head->arg.len += len; + if (!pg_off) + head->count++; + rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1]; rqstp->rq_next_page = rqstp->rq_respages + 1; + ctxt->sge[pno].addr = + ib_dma_map_page(xprt->sc_cm_id->device, + head->arg.pages[pg_no], pg_off, + PAGE_SIZE - pg_off, + DMA_FROM_DEVICE); + ret = ib_dma_mapping_error(xprt->sc_cm_id->device, + ctxt->sge[pno].addr); + if (ret) + goto err; + atomic_inc(&xprt->sc_dma_used); - byte_count -= sge_bytes; - ch_bytes -= sge_bytes; - sge_no++; - /* - * If all bytes for this chunk have been mapped to an - * SGE, move to the next SGE - */ - if (ch_bytes == 0) { - chl_map->ch[ch_no].count = - sge_no - chl_map->ch[ch_no].start; - ch_no++; - ch++; - chl_map->ch[ch_no].start = sge_no; - ch_bytes = ntohl(ch->rc_target.rs_length); - /* If bytes remaining account for next chunk */ - if (byte_count) { - head->arg.page_len += ch_bytes; - head->arg.len += ch_bytes; - head->arg.buflen += ch_bytes; - } + /* The lkey here is either a local dma lkey or a dma_mr lkey */ + ctxt->sge[pno].lkey = xprt->sc_dma_lkey; + ctxt->sge[pno].length = len; + ctxt->count++; + + /* adjust offset and wrap to next page if needed */ + pg_off += len; + if (pg_off == PAGE_SIZE) { + pg_off = 0; + pg_no++; } - /* - * If this SGE consumed all of the page, move to the - * next page - */ - if ((sge_bytes + page_off) == PAGE_SIZE) { - page_no++; - page_off = 0; - /* - * If there are still bytes left to map, bump - * the page count - */ - if (byte_count) - head->count++; - } else - page_off += sge_bytes; + rs_length -= len; } - BUG_ON(byte_count != 0); - return sge_no; + + if (last && rs_length == 0) + set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); + else + clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); + + memset(&read_wr, 0, sizeof(read_wr)); + read_wr.wr_id = (unsigned long)ctxt; + read_wr.opcode = IB_WR_RDMA_READ; + ctxt->wr_op = read_wr.opcode; + read_wr.send_flags = IB_SEND_SIGNALED; + read_wr.wr.rdma.rkey = rs_handle; + read_wr.wr.rdma.remote_addr = rs_offset; + read_wr.sg_list = ctxt->sge; + read_wr.num_sge = pages_needed; + + ret = svc_rdma_send(xprt, &read_wr); + if (ret) { + pr_err("svcrdma: Error %d posting RDMA_READ\n", ret); + set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); + goto err; + } + + /* return current location in page array */ + *page_no = pg_no; + *page_offset = pg_off; + ret = read; + atomic_inc(&rdma_stat_read); + return ret; + err: + svc_rdma_unmap_dma(ctxt); + svc_rdma_put_context(ctxt, 0); + return ret; } -/* Map a read-chunk-list to an XDR and fast register the page-list. - * - * Assumptions: - * - chunk[0] position points to pages[0] at an offset of 0 - * - pages[] will be made physically contiguous by creating a one-off memory - * region using the fastreg verb. - * - byte_count is # of bytes in read-chunk-list - * - ch_count is # of chunks in read-chunk-list - * - * Output: - * - sge array pointing into pages[] array. - * - chunk_sge array specifying sge index and count for each - * chunk in the read list - */ -static int fast_reg_read_chunks(struct svcxprt_rdma *xprt, +/* Issue an RDMA_READ using an FRMR to map the data sink */ +static int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *head, - struct rpcrdma_msg *rmsgp, - struct svc_rdma_req_map *rpl_map, - struct svc_rdma_req_map *chl_map, - int ch_count, - int byte_count) + int *page_no, + u32 *page_offset, + u32 rs_handle, + u32 rs_length, + u64 rs_offset, + int last) { - int page_no; - int ch_no; - u32 offset; - struct rpcrdma_read_chunk *ch; - struct svc_rdma_fastreg_mr *frmr; - int ret = 0; + struct ib_send_wr read_wr; + struct ib_send_wr inv_wr; + struct ib_send_wr fastreg_wr; + u8 key; + int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT; + struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt); + struct svc_rdma_fastreg_mr *frmr = svc_rdma_get_frmr(xprt); + int ret, read, pno; + u32 pg_off = *page_offset; + u32 pg_no = *page_no; - frmr = svc_rdma_get_frmr(xprt); if (IS_ERR(frmr)) return -ENOMEM; - head->frmr = frmr; - head->arg.head[0] = rqstp->rq_arg.head[0]; - head->arg.tail[0] = rqstp->rq_arg.tail[0]; - head->arg.pages = &head->pages[head->count]; - head->hdr_count = head->count; /* save count of hdr pages */ - head->arg.page_base = 0; - head->arg.page_len = byte_count; - head->arg.len = rqstp->rq_arg.len + byte_count; - head->arg.buflen = rqstp->rq_arg.buflen + byte_count; + ctxt->direction = DMA_FROM_DEVICE; + ctxt->frmr = frmr; + pages_needed = min_t(int, pages_needed, xprt->sc_frmr_pg_list_len); + read = min_t(int, pages_needed << PAGE_SHIFT, rs_length); - /* Fast register the page list */ - frmr->kva = page_address(rqstp->rq_arg.pages[0]); + frmr->kva = page_address(rqstp->rq_arg.pages[pg_no]); frmr->direction = DMA_FROM_DEVICE; frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE); - frmr->map_len = byte_count; - frmr->page_list_len = PAGE_ALIGN(byte_count) >> PAGE_SHIFT; - for (page_no = 0; page_no < frmr->page_list_len; page_no++) { - frmr->page_list->page_list[page_no] = + frmr->map_len = pages_needed << PAGE_SHIFT; + frmr->page_list_len = pages_needed; + + for (pno = 0; pno < pages_needed; pno++) { + int len = min_t(int, rs_length, PAGE_SIZE - pg_off); + + head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no]; + head->arg.page_len += len; + head->arg.len += len; + if (!pg_off) + head->count++; + rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1]; + rqstp->rq_next_page = rqstp->rq_respages + 1; + frmr->page_list->page_list[pno] = ib_dma_map_page(xprt->sc_cm_id->device, - rqstp->rq_arg.pages[page_no], 0, + head->arg.pages[pg_no], 0, PAGE_SIZE, DMA_FROM_DEVICE); - if (ib_dma_mapping_error(xprt->sc_cm_id->device, - frmr->page_list->page_list[page_no])) - goto fatal_err; + ret = ib_dma_mapping_error(xprt->sc_cm_id->device, + frmr->page_list->page_list[pno]); + if (ret) + goto err; atomic_inc(&xprt->sc_dma_used); - head->arg.pages[page_no] = rqstp->rq_arg.pages[page_no]; - } - head->count += page_no; - - /* rq_respages points one past arg pages */ - rqstp->rq_respages = &rqstp->rq_arg.pages[page_no]; - rqstp->rq_next_page = rqstp->rq_respages + 1; - /* Create the reply and chunk maps */ - offset = 0; - ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; - for (ch_no = 0; ch_no < ch_count; ch_no++) { - int len = ntohl(ch->rc_target.rs_length); - rpl_map->sge[ch_no].iov_base = frmr->kva + offset; - rpl_map->sge[ch_no].iov_len = len; - chl_map->ch[ch_no].count = 1; - chl_map->ch[ch_no].start = ch_no; - offset += len; - ch++; + /* adjust offset and wrap to next page if needed */ + pg_off += len; + if (pg_off == PAGE_SIZE) { + pg_off = 0; + pg_no++; + } + rs_length -= len; } - ret = svc_rdma_fastreg(xprt, frmr); - if (ret) - goto fatal_err; - - return ch_no; - - fatal_err: - printk("svcrdma: error fast registering xdr for xprt %p", xprt); - svc_rdma_put_frmr(xprt, frmr); - return -EIO; -} - -static int rdma_set_ctxt_sge(struct svcxprt_rdma *xprt, - struct svc_rdma_op_ctxt *ctxt, - struct svc_rdma_fastreg_mr *frmr, - struct kvec *vec, - u64 *sgl_offset, - int count) -{ - int i; - unsigned long off; + if (last && rs_length == 0) + set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); + else + clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); - ctxt->count = count; - ctxt->direction = DMA_FROM_DEVICE; - for (i = 0; i < count; i++) { - ctxt->sge[i].length = 0; /* in case map fails */ - if (!frmr) { - BUG_ON(!virt_to_page(vec[i].iov_base)); - off = (unsigned long)vec[i].iov_base & ~PAGE_MASK; - ctxt->sge[i].addr = - ib_dma_map_page(xprt->sc_cm_id->device, - virt_to_page(vec[i].iov_base), - off, - vec[i].iov_len, - DMA_FROM_DEVICE); - if (ib_dma_mapping_error(xprt->sc_cm_id->device, - ctxt->sge[i].addr)) - return -EINVAL; - ctxt->sge[i].lkey = xprt->sc_dma_lkey; - atomic_inc(&xprt->sc_dma_used); - } else { - ctxt->sge[i].addr = (unsigned long)vec[i].iov_base; - ctxt->sge[i].lkey = frmr->mr->lkey; - } - ctxt->sge[i].length = vec[i].iov_len; - *sgl_offset = *sgl_offset + vec[i].iov_len; + /* Bump the key */ + key = (u8)(frmr->mr->lkey & 0x000000FF); + ib_update_fast_reg_key(frmr->mr, ++key); + + ctxt->sge[0].addr = (unsigned long)frmr->kva + *page_offset; + ctxt->sge[0].lkey = frmr->mr->lkey; + ctxt->sge[0].length = read; + ctxt->count = 1; + ctxt->read_hdr = head; + + /* Prepare FASTREG WR */ + memset(&fastreg_wr, 0, sizeof(fastreg_wr)); + fastreg_wr.opcode = IB_WR_FAST_REG_MR; + fastreg_wr.send_flags = IB_SEND_SIGNALED; + fastreg_wr.wr.fast_reg.iova_start = (unsigned long)frmr->kva; + fastreg_wr.wr.fast_reg.page_list = frmr->page_list; + fastreg_wr.wr.fast_reg.page_list_len = frmr->page_list_len; + fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT; + fastreg_wr.wr.fast_reg.length = frmr->map_len; + fastreg_wr.wr.fast_reg.access_flags = frmr->access_flags; + fastreg_wr.wr.fast_reg.rkey = frmr->mr->lkey; + fastreg_wr.next = &read_wr; + + /* Prepare RDMA_READ */ + memset(&read_wr, 0, sizeof(read_wr)); + read_wr.send_flags = IB_SEND_SIGNALED; + read_wr.wr.rdma.rkey = rs_handle; + read_wr.wr.rdma.remote_addr = rs_offset; + read_wr.sg_list = ctxt->sge; + read_wr.num_sge = 1; + if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_READ_W_INV) { + read_wr.opcode = IB_WR_RDMA_READ_WITH_INV; + read_wr.wr_id = (unsigned long)ctxt; + read_wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey; + } else { + read_wr.opcode = IB_WR_RDMA_READ; + read_wr.next = &inv_wr; + /* Prepare invalidate */ + memset(&inv_wr, 0, sizeof(inv_wr)); + inv_wr.wr_id = (unsigned long)ctxt; + inv_wr.opcode = IB_WR_LOCAL_INV; + inv_wr.send_flags = IB_SEND_SIGNALED; + inv_wr.ex.invalidate_rkey = frmr->mr->lkey; + } + ctxt->wr_op = read_wr.opcode; + + /* Post the chain */ + ret = svc_rdma_send(xprt, &fastreg_wr); + if (ret) { + pr_err("svcrdma: Error %d posting RDMA_READ\n", ret); + set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); + goto err; } - return 0; -} -static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count) -{ - if ((rdma_node_get_transport(xprt->sc_cm_id->device->node_type) == - RDMA_TRANSPORT_IWARP) && - sge_count > 1) - return 1; - else - return min_t(int, sge_count, xprt->sc_max_sge); + /* return current location in page array */ + *page_no = pg_no; + *page_offset = pg_off; + ret = read; + atomic_inc(&rdma_stat_read); + return ret; + err: + svc_rdma_unmap_dma(ctxt); + svc_rdma_put_context(ctxt, 0); + svc_rdma_put_frmr(xprt, frmr); + return ret; } -/* - * Use RDMA_READ to read data from the advertised client buffer into the - * XDR stream starting at rq_arg.head[0].iov_base. - * Each chunk in the array - * contains the following fields: - * discrim - '1', This isn't used for data placement - * position - The xdr stream offset (the same for every chunk) - * handle - RMR for client memory region - * length - data transfer length - * offset - 64 bit tagged offset in remote memory region - * - * On our side, we need to read into a pagelist. The first page immediately - * follows the RPC header. - * - * This function returns: - * 0 - No error and no read-list found. - * - * 1 - Successful read-list processing. The data is not yet in - * the pagelist and therefore the RPC request must be deferred. The - * I/O completion will enqueue the transport again and - * svc_rdma_recvfrom will complete the request. - * - * <0 - Error processing/posting read-list. - * - * NOTE: The ctxt must not be touched after the last WR has been posted - * because the I/O completion processing may occur on another - * processor and free / modify the context. Ne touche pas! - */ -static int rdma_read_xdr(struct svcxprt_rdma *xprt, - struct rpcrdma_msg *rmsgp, - struct svc_rqst *rqstp, - struct svc_rdma_op_ctxt *hdr_ctxt) +static int rdma_read_chunks(struct svcxprt_rdma *xprt, + struct rpcrdma_msg *rmsgp, + struct svc_rqst *rqstp, + struct svc_rdma_op_ctxt *head) { - struct ib_send_wr read_wr; - struct ib_send_wr inv_wr; - int err = 0; - int ch_no; - int ch_count; - int byte_count; - int sge_count; - u64 sgl_offset; + int page_no, ch_count, ret; struct rpcrdma_read_chunk *ch; - struct svc_rdma_op_ctxt *ctxt = NULL; - struct svc_rdma_req_map *rpl_map; - struct svc_rdma_req_map *chl_map; + u32 page_offset, byte_count; + u64 rs_offset; + rdma_reader_fn reader; /* If no read list is present, return 0 */ ch = svc_rdma_get_read_chunk(rmsgp); @@ -408,122 +384,55 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt, if (ch_count > RPCSVC_MAXPAGES) return -EINVAL; - /* Allocate temporary reply and chunk maps */ - rpl_map = svc_rdma_get_req_map(); - chl_map = svc_rdma_get_req_map(); + /* The request is completed when the RDMA_READs complete. The + * head context keeps all the pages that comprise the + * request. + */ + head->arg.head[0] = rqstp->rq_arg.head[0]; + head->arg.tail[0] = rqstp->rq_arg.tail[0]; + head->arg.pages = &head->pages[head->count]; + head->hdr_count = head->count; + head->arg.page_base = 0; + head->arg.page_len = 0; + head->arg.len = rqstp->rq_arg.len; + head->arg.buflen = rqstp->rq_arg.buflen; - if (!xprt->sc_frmr_pg_list_len) - sge_count = map_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp, - rpl_map, chl_map, ch_count, - byte_count); + /* Use FRMR if supported */ + if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG) + reader = rdma_read_chunk_frmr; else - sge_count = fast_reg_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp, - rpl_map, chl_map, ch_count, - byte_count); - if (sge_count < 0) { - err = -EIO; - goto out; - } - - sgl_offset = 0; - ch_no = 0; + reader = rdma_read_chunk_lcl; + page_no = 0; page_offset = 0; for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; - ch->rc_discrim != 0; ch++, ch_no++) { - u64 rs_offset; -next_sge: - ctxt = svc_rdma_get_context(xprt); - ctxt->direction = DMA_FROM_DEVICE; - ctxt->frmr = hdr_ctxt->frmr; - ctxt->read_hdr = NULL; - clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); - clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags); + ch->rc_discrim != 0; ch++) { - /* Prepare READ WR */ - memset(&read_wr, 0, sizeof read_wr); - read_wr.wr_id = (unsigned long)ctxt; - read_wr.opcode = IB_WR_RDMA_READ; - ctxt->wr_op = read_wr.opcode; - read_wr.send_flags = IB_SEND_SIGNALED; - read_wr.wr.rdma.rkey = ntohl(ch->rc_target.rs_handle); xdr_decode_hyper((__be32 *)&ch->rc_target.rs_offset, &rs_offset); - read_wr.wr.rdma.remote_addr = rs_offset + sgl_offset; - read_wr.sg_list = ctxt->sge; - read_wr.num_sge = - rdma_read_max_sge(xprt, chl_map->ch[ch_no].count); - err = rdma_set_ctxt_sge(xprt, ctxt, hdr_ctxt->frmr, - &rpl_map->sge[chl_map->ch[ch_no].start], - &sgl_offset, - read_wr.num_sge); - if (err) { - svc_rdma_unmap_dma(ctxt); - svc_rdma_put_context(ctxt, 0); - goto out; - } - if (((ch+1)->rc_discrim == 0) && - (read_wr.num_sge == chl_map->ch[ch_no].count)) { - /* - * Mark the last RDMA_READ with a bit to - * indicate all RPC data has been fetched from - * the client and the RPC needs to be enqueued. - */ - set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); - if (hdr_ctxt->frmr) { - set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags); - /* - * Invalidate the local MR used to map the data - * sink. - */ - if (xprt->sc_dev_caps & - SVCRDMA_DEVCAP_READ_W_INV) { - read_wr.opcode = - IB_WR_RDMA_READ_WITH_INV; - ctxt->wr_op = read_wr.opcode; - read_wr.ex.invalidate_rkey = - ctxt->frmr->mr->lkey; - } else { - /* Prepare INVALIDATE WR */ - memset(&inv_wr, 0, sizeof inv_wr); - inv_wr.opcode = IB_WR_LOCAL_INV; - inv_wr.send_flags = IB_SEND_SIGNALED; - inv_wr.ex.invalidate_rkey = - hdr_ctxt->frmr->mr->lkey; - read_wr.next = &inv_wr; - } - } - ctxt->read_hdr = hdr_ctxt; - } - /* Post the read */ - err = svc_rdma_send(xprt, &read_wr); - if (err) { - printk(KERN_ERR "svcrdma: Error %d posting RDMA_READ\n", - err); - set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); - svc_rdma_unmap_dma(ctxt); - svc_rdma_put_context(ctxt, 0); - goto out; + byte_count = ntohl(ch->rc_target.rs_length); + + while (byte_count > 0) { + ret = reader(xprt, rqstp, head, + &page_no, &page_offset, + ntohl(ch->rc_target.rs_handle), + byte_count, rs_offset, + ((ch+1)->rc_discrim == 0) /* last */ + ); + if (ret < 0) + goto err; + byte_count -= ret; + rs_offset += ret; + head->arg.buflen += ret; } - atomic_inc(&rdma_stat_read); - - if (read_wr.num_sge < chl_map->ch[ch_no].count) { - chl_map->ch[ch_no].count -= read_wr.num_sge; - chl_map->ch[ch_no].start += read_wr.num_sge; - goto next_sge; - } - sgl_offset = 0; - err = 1; } - - out: - svc_rdma_put_req_map(rpl_map); - svc_rdma_put_req_map(chl_map); - + ret = 1; + err: /* Detach arg pages. svc_recv will replenish them */ - for (ch_no = 0; &rqstp->rq_pages[ch_no] < rqstp->rq_respages; ch_no++) - rqstp->rq_pages[ch_no] = NULL; + for (page_no = 0; + &rqstp->rq_pages[page_no] < rqstp->rq_respages; page_no++) + rqstp->rq_pages[page_no] = NULL; - return err; + return ret; } static int rdma_read_complete(struct svc_rqst *rqstp, @@ -595,13 +504,9 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) struct svc_rdma_op_ctxt, dto_q); list_del_init(&ctxt->dto_q); - } - if (ctxt) { spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock); return rdma_read_complete(rqstp, ctxt); - } - - if (!list_empty(&rdma_xprt->sc_rq_dto_q)) { + } else if (!list_empty(&rdma_xprt->sc_rq_dto_q)) { ctxt = list_entry(rdma_xprt->sc_rq_dto_q.next, struct svc_rdma_op_ctxt, dto_q); @@ -621,7 +526,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) goto close_out; - BUG_ON(ret); goto out; } dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p, status=%d\n", @@ -644,12 +548,11 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) } /* Read read-list data. */ - ret = rdma_read_xdr(rdma_xprt, rmsgp, rqstp, ctxt); + ret = rdma_read_chunks(rdma_xprt, rmsgp, rqstp, ctxt); if (ret > 0) { /* read-list posted, defer until data received from client. */ goto defer; - } - if (ret < 0) { + } else if (ret < 0) { /* Post of read-list failed, free context. */ svc_rdma_put_context(ctxt, 1); return 0; diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 7e024a51617e..49fd21a5c215 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -1,4 +1,5 @@ /* + * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved. * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -49,152 +50,6 @@ #define RPCDBG_FACILITY RPCDBG_SVCXPRT -/* Encode an XDR as an array of IB SGE - * - * Assumptions: - * - head[0] is physically contiguous. - * - tail[0] is physically contiguous. - * - pages[] is not physically or virtually contiguous and consists of - * PAGE_SIZE elements. - * - * Output: - * SGE[0] reserved for RCPRDMA header - * SGE[1] data from xdr->head[] - * SGE[2..sge_count-2] data from xdr->pages[] - * SGE[sge_count-1] data from xdr->tail. - * - * The max SGE we need is the length of the XDR / pagesize + one for - * head + one for tail + one for RPCRDMA header. Since RPCSVC_MAXPAGES - * reserves a page for both the request and the reply header, and this - * array is only concerned with the reply we are assured that we have - * on extra page for the RPCRMDA header. - */ -static int fast_reg_xdr(struct svcxprt_rdma *xprt, - struct xdr_buf *xdr, - struct svc_rdma_req_map *vec) -{ - int sge_no; - u32 sge_bytes; - u32 page_bytes; - u32 page_off; - int page_no = 0; - u8 *frva; - struct svc_rdma_fastreg_mr *frmr; - - frmr = svc_rdma_get_frmr(xprt); - if (IS_ERR(frmr)) - return -ENOMEM; - vec->frmr = frmr; - - /* Skip the RPCRDMA header */ - sge_no = 1; - - /* Map the head. */ - frva = (void *)((unsigned long)(xdr->head[0].iov_base) & PAGE_MASK); - vec->sge[sge_no].iov_base = xdr->head[0].iov_base; - vec->sge[sge_no].iov_len = xdr->head[0].iov_len; - vec->count = 2; - sge_no++; - - /* Map the XDR head */ - frmr->kva = frva; - frmr->direction = DMA_TO_DEVICE; - frmr->access_flags = 0; - frmr->map_len = PAGE_SIZE; - frmr->page_list_len = 1; - page_off = (unsigned long)xdr->head[0].iov_base & ~PAGE_MASK; - frmr->page_list->page_list[page_no] = - ib_dma_map_page(xprt->sc_cm_id->device, - virt_to_page(xdr->head[0].iov_base), - page_off, - PAGE_SIZE - page_off, - DMA_TO_DEVICE); - if (ib_dma_mapping_error(xprt->sc_cm_id->device, - frmr->page_list->page_list[page_no])) - goto fatal_err; - atomic_inc(&xprt->sc_dma_used); - - /* Map the XDR page list */ - page_off = xdr->page_base; - page_bytes = xdr->page_len + page_off; - if (!page_bytes) - goto encode_tail; - - /* Map the pages */ - vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off; - vec->sge[sge_no].iov_len = page_bytes; - sge_no++; - while (page_bytes) { - struct page *page; - - page = xdr->pages[page_no++]; - sge_bytes = min_t(u32, page_bytes, (PAGE_SIZE - page_off)); - page_bytes -= sge_bytes; - - frmr->page_list->page_list[page_no] = - ib_dma_map_page(xprt->sc_cm_id->device, - page, page_off, - sge_bytes, DMA_TO_DEVICE); - if (ib_dma_mapping_error(xprt->sc_cm_id->device, - frmr->page_list->page_list[page_no])) - goto fatal_err; - - atomic_inc(&xprt->sc_dma_used); - page_off = 0; /* reset for next time through loop */ - frmr->map_len += PAGE_SIZE; - frmr->page_list_len++; - } - vec->count++; - - encode_tail: - /* Map tail */ - if (0 == xdr->tail[0].iov_len) - goto done; - - vec->count++; - vec->sge[sge_no].iov_len = xdr->tail[0].iov_len; - - if (((unsigned long)xdr->tail[0].iov_base & PAGE_MASK) == - ((unsigned long)xdr->head[0].iov_base & PAGE_MASK)) { - /* - * If head and tail use the same page, we don't need - * to map it again. - */ - vec->sge[sge_no].iov_base = xdr->tail[0].iov_base; - } else { - void *va; - - /* Map another page for the tail */ - page_off = (unsigned long)xdr->tail[0].iov_base & ~PAGE_MASK; - va = (void *)((unsigned long)xdr->tail[0].iov_base & PAGE_MASK); - vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off; - - frmr->page_list->page_list[page_no] = - ib_dma_map_page(xprt->sc_cm_id->device, virt_to_page(va), - page_off, - PAGE_SIZE, - DMA_TO_DEVICE); - if (ib_dma_mapping_error(xprt->sc_cm_id->device, - frmr->page_list->page_list[page_no])) - goto fatal_err; - atomic_inc(&xprt->sc_dma_used); - frmr->map_len += PAGE_SIZE; - frmr->page_list_len++; - } - - done: - if (svc_rdma_fastreg(xprt, frmr)) - goto fatal_err; - - return 0; - - fatal_err: - printk("svcrdma: Error fast registering memory for xprt %p\n", xprt); - vec->frmr = NULL; - svc_rdma_put_frmr(xprt, frmr); - return -EIO; -} - static int map_xdr(struct svcxprt_rdma *xprt, struct xdr_buf *xdr, struct svc_rdma_req_map *vec) @@ -208,9 +63,6 @@ static int map_xdr(struct svcxprt_rdma *xprt, BUG_ON(xdr->len != (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len)); - if (xprt->sc_frmr_pg_list_len) - return fast_reg_xdr(xprt, xdr, vec); - /* Skip the first sge, this is for the RPCRDMA header */ sge_no = 1; @@ -282,8 +134,6 @@ static dma_addr_t dma_map_xdr(struct svcxprt_rdma *xprt, } /* Assumptions: - * - We are using FRMR - * - or - * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE */ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, @@ -327,23 +177,16 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, sge_bytes = min_t(size_t, bc, vec->sge[xdr_sge_no].iov_len-sge_off); sge[sge_no].length = sge_bytes; - if (!vec->frmr) { - sge[sge_no].addr = - dma_map_xdr(xprt, &rqstp->rq_res, xdr_off, - sge_bytes, DMA_TO_DEVICE); - xdr_off += sge_bytes; - if (ib_dma_mapping_error(xprt->sc_cm_id->device, - sge[sge_no].addr)) - goto err; - atomic_inc(&xprt->sc_dma_used); - sge[sge_no].lkey = xprt->sc_dma_lkey; - } else { - sge[sge_no].addr = (unsigned long) - vec->sge[xdr_sge_no].iov_base + sge_off; - sge[sge_no].lkey = vec->frmr->mr->lkey; - } + sge[sge_no].addr = + dma_map_xdr(xprt, &rqstp->rq_res, xdr_off, + sge_bytes, DMA_TO_DEVICE); + xdr_off += sge_bytes; + if (ib_dma_mapping_error(xprt->sc_cm_id->device, + sge[sge_no].addr)) + goto err; + atomic_inc(&xprt->sc_dma_used); + sge[sge_no].lkey = xprt->sc_dma_lkey; ctxt->count++; - ctxt->frmr = vec->frmr; sge_off = 0; sge_no++; xdr_sge_no++; @@ -369,7 +212,6 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, return 0; err: svc_rdma_unmap_dma(ctxt); - svc_rdma_put_frmr(xprt, vec->frmr); svc_rdma_put_context(ctxt, 0); /* Fatal error, close transport */ return -EIO; @@ -397,10 +239,7 @@ static int send_write_chunks(struct svcxprt_rdma *xprt, res_ary = (struct rpcrdma_write_array *) &rdma_resp->rm_body.rm_chunks[1]; - if (vec->frmr) - max_write = vec->frmr->map_len; - else - max_write = xprt->sc_max_sge * PAGE_SIZE; + max_write = xprt->sc_max_sge * PAGE_SIZE; /* Write chunks start at the pagelist */ for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0; @@ -472,10 +311,7 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt, res_ary = (struct rpcrdma_write_array *) &rdma_resp->rm_body.rm_chunks[2]; - if (vec->frmr) - max_write = vec->frmr->map_len; - else - max_write = xprt->sc_max_sge * PAGE_SIZE; + max_write = xprt->sc_max_sge * PAGE_SIZE; /* xdr offset starts at RPC message */ nchunks = ntohl(arg_ary->wc_nchunks); @@ -545,7 +381,6 @@ static int send_reply(struct svcxprt_rdma *rdma, int byte_count) { struct ib_send_wr send_wr; - struct ib_send_wr inv_wr; int sge_no; int sge_bytes; int page_no; @@ -559,7 +394,6 @@ static int send_reply(struct svcxprt_rdma *rdma, "svcrdma: could not post a receive buffer, err=%d." "Closing transport %p.\n", ret, rdma); set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); - svc_rdma_put_frmr(rdma, vec->frmr); svc_rdma_put_context(ctxt, 0); return -ENOTCONN; } @@ -567,11 +401,6 @@ static int send_reply(struct svcxprt_rdma *rdma, /* Prepare the context */ ctxt->pages[0] = page; ctxt->count = 1; - ctxt->frmr = vec->frmr; - if (vec->frmr) - set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags); - else - clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags); /* Prepare the SGE for the RPCRDMA Header */ ctxt->sge[0].lkey = rdma->sc_dma_lkey; @@ -590,21 +419,15 @@ static int send_reply(struct svcxprt_rdma *rdma, int xdr_off = 0; sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count); byte_count -= sge_bytes; - if (!vec->frmr) { - ctxt->sge[sge_no].addr = - dma_map_xdr(rdma, &rqstp->rq_res, xdr_off, - sge_bytes, DMA_TO_DEVICE); - xdr_off += sge_bytes; - if (ib_dma_mapping_error(rdma->sc_cm_id->device, - ctxt->sge[sge_no].addr)) - goto err; - atomic_inc(&rdma->sc_dma_used); - ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey; - } else { - ctxt->sge[sge_no].addr = (unsigned long) - vec->sge[sge_no].iov_base; - ctxt->sge[sge_no].lkey = vec->frmr->mr->lkey; - } + ctxt->sge[sge_no].addr = + dma_map_xdr(rdma, &rqstp->rq_res, xdr_off, + sge_bytes, DMA_TO_DEVICE); + xdr_off += sge_bytes; + if (ib_dma_mapping_error(rdma->sc_cm_id->device, + ctxt->sge[sge_no].addr)) + goto err; + atomic_inc(&rdma->sc_dma_used); + ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey; ctxt->sge[sge_no].length = sge_bytes; } BUG_ON(byte_count != 0); @@ -627,6 +450,7 @@ static int send_reply(struct svcxprt_rdma *rdma, ctxt->sge[page_no+1].length = 0; } rqstp->rq_next_page = rqstp->rq_respages + 1; + BUG_ON(sge_no > rdma->sc_max_sge); memset(&send_wr, 0, sizeof send_wr); ctxt->wr_op = IB_WR_SEND; @@ -635,15 +459,6 @@ static int send_reply(struct svcxprt_rdma *rdma, send_wr.num_sge = sge_no; send_wr.opcode = IB_WR_SEND; send_wr.send_flags = IB_SEND_SIGNALED; - if (vec->frmr) { - /* Prepare INVALIDATE WR */ - memset(&inv_wr, 0, sizeof inv_wr); - inv_wr.opcode = IB_WR_LOCAL_INV; - inv_wr.send_flags = IB_SEND_SIGNALED; - inv_wr.ex.invalidate_rkey = - vec->frmr->mr->lkey; - send_wr.next = &inv_wr; - } ret = svc_rdma_send(rdma, &send_wr); if (ret) @@ -653,7 +468,6 @@ static int send_reply(struct svcxprt_rdma *rdma, err: svc_rdma_unmap_dma(ctxt); - svc_rdma_put_frmr(rdma, vec->frmr); svc_rdma_put_context(ctxt, 1); return -EIO; } diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 02db8d9cc994..e7323fbbd348 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -1,4 +1,5 @@ /* + * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved. * Copyright (c) 2005-2007 Network Appliance, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -162,7 +163,6 @@ struct svc_rdma_req_map *svc_rdma_get_req_map(void) schedule_timeout_uninterruptible(msecs_to_jiffies(500)); } map->count = 0; - map->frmr = NULL; return map; } @@ -338,22 +338,21 @@ static void process_context(struct svcxprt_rdma *xprt, switch (ctxt->wr_op) { case IB_WR_SEND: - if (test_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags)) - svc_rdma_put_frmr(xprt, ctxt->frmr); + BUG_ON(ctxt->frmr); svc_rdma_put_context(ctxt, 1); break; case IB_WR_RDMA_WRITE: + BUG_ON(ctxt->frmr); svc_rdma_put_context(ctxt, 0); break; case IB_WR_RDMA_READ: case IB_WR_RDMA_READ_WITH_INV: + svc_rdma_put_frmr(xprt, ctxt->frmr); if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) { struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr; BUG_ON(!read_hdr); - if (test_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags)) - svc_rdma_put_frmr(xprt, ctxt->frmr); spin_lock_bh(&xprt->sc_rq_dto_lock); set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); list_add_tail(&read_hdr->dto_q, @@ -365,6 +364,7 @@ static void process_context(struct svcxprt_rdma *xprt, break; default: + BUG_ON(1); printk(KERN_ERR "svcrdma: unexpected completion type, " "opcode=%d\n", ctxt->wr_op); @@ -380,29 +380,42 @@ static void process_context(struct svcxprt_rdma *xprt, static void sq_cq_reap(struct svcxprt_rdma *xprt) { struct svc_rdma_op_ctxt *ctxt = NULL; - struct ib_wc wc; + struct ib_wc wc_a[6]; + struct ib_wc *wc; struct ib_cq *cq = xprt->sc_sq_cq; int ret; + memset(wc_a, 0, sizeof(wc_a)); + if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags)) return; ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP); atomic_inc(&rdma_stat_sq_poll); - while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) { - if (wc.status != IB_WC_SUCCESS) - /* Close the transport */ - set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); + while ((ret = ib_poll_cq(cq, ARRAY_SIZE(wc_a), wc_a)) > 0) { + int i; - /* Decrement used SQ WR count */ - atomic_dec(&xprt->sc_sq_count); - wake_up(&xprt->sc_send_wait); + for (i = 0; i < ret; i++) { + wc = &wc_a[i]; + if (wc->status != IB_WC_SUCCESS) { + dprintk("svcrdma: sq wc err status %d\n", + wc->status); - ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; - if (ctxt) - process_context(xprt, ctxt); + /* Close the transport */ + set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); + } - svc_xprt_put(&xprt->sc_xprt); + /* Decrement used SQ WR count */ + atomic_dec(&xprt->sc_sq_count); + wake_up(&xprt->sc_send_wait); + + ctxt = (struct svc_rdma_op_ctxt *) + (unsigned long)wc->wr_id; + if (ctxt) + process_context(xprt, ctxt); + + svc_xprt_put(&xprt->sc_xprt); + } } if (ctxt) @@ -995,7 +1008,11 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) need_dma_mr = 0; break; case RDMA_TRANSPORT_IB: - if (!(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) { + if (!(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG)) { + need_dma_mr = 1; + dma_mr_acc = IB_ACCESS_LOCAL_WRITE; + } else if (!(devattr.device_cap_flags & + IB_DEVICE_LOCAL_DMA_LKEY)) { need_dma_mr = 1; dma_mr_acc = IB_ACCESS_LOCAL_WRITE; } else @@ -1192,14 +1209,7 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt) container_of(xprt, struct svcxprt_rdma, sc_xprt); /* - * If there are fewer SQ WR available than required to send a - * simple response, return false. - */ - if ((rdma->sc_sq_depth - atomic_read(&rdma->sc_sq_count) < 3)) - return 0; - - /* - * ...or there are already waiters on the SQ, + * If there are already waiters on the SQ, * return false. */ if (waitqueue_active(&rdma->sc_send_wait)) From 83710fc753d2ae158aa3cb7a7966d9c1bd05b792 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Thu, 5 Jun 2014 09:54:31 -0500 Subject: [PATCH 099/100] svcrdma: Fence LOCAL_INV work requests Fencing forces the invalidate to only happen after all prior send work requests have been completed. Signed-off-by: Steve Wise Reported by : Devesh Sharma Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 52d9f2ce20b0..8f92a61ee2df 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -338,7 +338,7 @@ static int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, memset(&inv_wr, 0, sizeof(inv_wr)); inv_wr.wr_id = (unsigned long)ctxt; inv_wr.opcode = IB_WR_LOCAL_INV; - inv_wr.send_flags = IB_SEND_SIGNALED; + inv_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_FENCE; inv_wr.ex.invalidate_rkey = frmr->mr->lkey; } ctxt->wr_op = read_wr.opcode; From 48385408b45523d9a432c66292d47ef43efcbb94 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 27 May 2014 11:14:26 -0400 Subject: [PATCH 100/100] nfsd4: fix FREE_STATEID lockowner leak 27b11428b7de ("nfsd4: remove lockowner when removing lock stateid") introduced a memory leak. Cc: stable@vger.kernel.org Reported-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index e5197d947b9e..c0d45cec9958 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3743,7 +3743,7 @@ nfsd4_free_lock_stateid(struct nfs4_ol_stateid *stp) * correspondance, and we have to delete the lockowner when we * delete the lock stateid: */ - unhash_lockowner(lo); + release_lockowner(lo); return nfs_ok; }