From bc1f12c71434aca808a98f715a8c3679110a8ea3 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Wed, 6 Dec 2006 11:35:16 +1000 Subject: [PATCH] --- yaml --- r: 42556 b: refs/heads/master c: 5340be59093d59826f4477a5f9991c762e4417e8 h: refs/heads/master v: v3 --- [refs] | 2 +- trunk/fs/lockd/clntproc.c | 2 +- trunk/fs/lockd/svc4proc.c | 2 +- trunk/fs/lockd/svcproc.c | 2 +- trunk/fs/nfs/direct.c | 6 + trunk/fs/nfs/file.c | 28 +- trunk/fs/nfs/inode.c | 2 +- trunk/fs/nfs/internal.h | 18 - trunk/fs/nfs/nfs3proc.c | 52 +- trunk/fs/nfs/nfs4proc.c | 99 ++- trunk/fs/nfs/pagelist.c | 67 +- trunk/fs/nfs/proc.c | 31 + trunk/fs/nfs/read.c | 179 ++--- trunk/fs/nfs/symlink.c | 2 + trunk/fs/nfs/write.c | 598 ++++++++-------- trunk/include/asm-m68knommu/rtc.h | 1 + trunk/include/linux/nfs_fs.h | 37 +- trunk/include/linux/nfs_page.h | 7 +- trunk/include/linux/nfs_xdr.h | 2 + trunk/include/linux/sunrpc/auth_gss.h | 2 + trunk/include/linux/sunrpc/clnt.h | 1 - trunk/include/linux/sunrpc/debug.h | 6 + trunk/include/linux/sunrpc/gss_krb5.h | 6 +- trunk/include/linux/sunrpc/gss_spkm3.h | 34 +- trunk/include/linux/sunrpc/sched.h | 11 +- trunk/include/linux/sunrpc/xdr.h | 23 +- trunk/include/linux/sunrpc/xprt.h | 37 +- trunk/net/sunrpc/auth_gss/auth_gss.c | 42 +- trunk/net/sunrpc/auth_gss/gss_krb5_crypto.c | 101 ++- trunk/net/sunrpc/auth_gss/gss_krb5_mech.c | 18 +- trunk/net/sunrpc/auth_gss/gss_krb5_seal.c | 55 +- trunk/net/sunrpc/auth_gss/gss_krb5_unseal.c | 87 ++- trunk/net/sunrpc/auth_gss/gss_krb5_wrap.c | 153 ++-- trunk/net/sunrpc/auth_gss/gss_spkm3_mech.c | 131 +++- trunk/net/sunrpc/auth_gss/gss_spkm3_seal.c | 101 +-- trunk/net/sunrpc/auth_gss/gss_spkm3_token.c | 6 +- trunk/net/sunrpc/auth_gss/gss_spkm3_unseal.c | 92 +-- trunk/net/sunrpc/clnt.c | 70 +- trunk/net/sunrpc/pmap_clnt.c | 13 +- trunk/net/sunrpc/sched.c | 137 ++-- trunk/net/sunrpc/socklib.c | 18 +- trunk/net/sunrpc/sunrpc_syms.c | 5 +- trunk/net/sunrpc/sysctl.c | 50 ++ trunk/net/sunrpc/xdr.c | 255 +++---- trunk/net/sunrpc/xprt.c | 33 +- trunk/net/sunrpc/xprtsock.c | 712 +++++++------------ 46 files changed, 1780 insertions(+), 1556 deletions(-) create mode 100644 trunk/include/asm-m68knommu/rtc.h diff --git a/[refs] b/[refs] index 8fd7b2893bef..201191d43b60 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 5847e1f4d058677c5e46dc6c3e3c70e8855ea3ba +refs/heads/master: 5340be59093d59826f4477a5f9991c762e4417e8 diff --git a/trunk/fs/lockd/clntproc.c b/trunk/fs/lockd/clntproc.c index 41c983a05294..3d84f600b633 100644 --- a/trunk/fs/lockd/clntproc.c +++ b/trunk/fs/lockd/clntproc.c @@ -729,7 +729,7 @@ static void nlmclnt_cancel_callback(struct rpc_task *task, void *data) goto retry_cancel; } - dprintk("lockd: cancel status %u (task %u)\n", + dprintk("lockd: cancel status %d (task %d)\n", req->a_res.status, task->tk_pid); switch (req->a_res.status) { diff --git a/trunk/fs/lockd/svc4proc.c b/trunk/fs/lockd/svc4proc.c index f67146a8199a..0ce5c81ff507 100644 --- a/trunk/fs/lockd/svc4proc.c +++ b/trunk/fs/lockd/svc4proc.c @@ -234,7 +234,7 @@ nlm4svc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp, */ static void nlm4svc_callback_exit(struct rpc_task *task, void *data) { - dprintk("lockd: %5u callback returned %d\n", task->tk_pid, + dprintk("lockd: %4d callback returned %d\n", task->tk_pid, -task->tk_status); } diff --git a/trunk/fs/lockd/svcproc.c b/trunk/fs/lockd/svcproc.c index 3707c3a23e93..32e99a6e8dca 100644 --- a/trunk/fs/lockd/svcproc.c +++ b/trunk/fs/lockd/svcproc.c @@ -263,7 +263,7 @@ nlmsvc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp, */ static void nlmsvc_callback_exit(struct rpc_task *task, void *data) { - dprintk("lockd: %5u callback returned %d\n", task->tk_pid, + dprintk("lockd: %4d callback returned %d\n", task->tk_pid, -task->tk_status); } diff --git a/trunk/fs/nfs/direct.c b/trunk/fs/nfs/direct.c index 784bbb54e6c1..bdfabf854a51 100644 --- a/trunk/fs/nfs/direct.c +++ b/trunk/fs/nfs/direct.c @@ -307,7 +307,9 @@ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned lo data->task.tk_cookie = (unsigned long) inode; + lock_kernel(); rpc_execute(&data->task); + unlock_kernel(); dfprintk(VFS, "NFS: %5u initiated direct read call (req %s/%Ld, %zu bytes @ offset %Lu)\n", data->task.tk_pid, @@ -473,7 +475,9 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid); + lock_kernel(); rpc_execute(&data->task); + unlock_kernel(); } static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode) @@ -637,7 +641,9 @@ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned l data->task.tk_priority = RPC_PRIORITY_NORMAL; data->task.tk_cookie = (unsigned long) inode; + lock_kernel(); rpc_execute(&data->task); + unlock_kernel(); dfprintk(VFS, "NFS: %5u initiated direct write call (req %s/%Ld, %zu bytes @ offset %Lu)\n", data->task.tk_pid, diff --git a/trunk/fs/nfs/file.c b/trunk/fs/nfs/file.c index 8e28bffc35a0..cc93865cea93 100644 --- a/trunk/fs/nfs/file.c +++ b/trunk/fs/nfs/file.c @@ -307,28 +307,28 @@ static int nfs_commit_write(struct file *file, struct page *page, unsigned offse static void nfs_invalidate_page(struct page *page, unsigned long offset) { - if (offset != 0) - return; + struct inode *inode = page->mapping->host; + /* Cancel any unstarted writes on this page */ - nfs_wb_page_priority(page->mapping->host, page, FLUSH_INVALIDATE); + if (offset == 0) + nfs_sync_inode_wait(inode, page->index, 1, FLUSH_INVALIDATE); } static int nfs_release_page(struct page *page, gfp_t gfp) { - /* - * Avoid deadlock on nfs_wait_on_request(). - */ - if (!(gfp & __GFP_FS)) + if (gfp & __GFP_FS) + return !nfs_wb_page(page->mapping->host, page); + else + /* + * Avoid deadlock on nfs_wait_on_request(). + */ return 0; - /* Hack... Force nfs_wb_page() to write out the page */ - SetPageDirty(page); - return !nfs_wb_page(page->mapping->host, page); } const struct address_space_operations nfs_file_aops = { .readpage = nfs_readpage, .readpages = nfs_readpages, - .set_page_dirty = nfs_set_page_dirty, + .set_page_dirty = __set_page_dirty_nobuffers, .writepage = nfs_writepage, .writepages = nfs_writepages, .prepare_write = nfs_prepare_write, @@ -375,12 +375,6 @@ static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count); result = generic_file_aio_write(iocb, iov, nr_segs, pos); - /* Return error values for O_SYNC and IS_SYNC() */ - if (result >= 0 && (IS_SYNC(inode) || (iocb->ki_filp->f_flags & O_SYNC))) { - int err = nfs_fsync(iocb->ki_filp, dentry, 1); - if (err < 0) - result = err; - } out: return result; diff --git a/trunk/fs/nfs/inode.c b/trunk/fs/nfs/inode.c index 7c32187f953e..08cc4c5919ab 100644 --- a/trunk/fs/nfs/inode.c +++ b/trunk/fs/nfs/inode.c @@ -422,7 +422,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) int err; /* Flush out writes to the server in order to update c/mtime */ - nfs_sync_mapping_range(inode->i_mapping, 0, 0, FLUSH_NOCOMMIT); + nfs_sync_inode_wait(inode, 0, 0, FLUSH_NOCOMMIT); /* * We may force a getattr if the user cares about atime. diff --git a/trunk/fs/nfs/internal.h b/trunk/fs/nfs/internal.h index a28f6ce2e131..d205466233f6 100644 --- a/trunk/fs/nfs/internal.h +++ b/trunk/fs/nfs/internal.h @@ -217,21 +217,3 @@ void nfs_super_set_maxbytes(struct super_block *sb, __u64 maxfilesize) if (sb->s_maxbytes > MAX_LFS_FILESIZE || sb->s_maxbytes <= 0) sb->s_maxbytes = MAX_LFS_FILESIZE; } - -/* - * Determine the number of bytes of data the page contains - */ -static inline -unsigned int nfs_page_length(struct page *page) -{ - loff_t i_size = i_size_read(page->mapping->host); - - if (i_size > 0) { - pgoff_t end_index = (i_size - 1) >> PAGE_CACHE_SHIFT; - if (page->index < end_index) - return PAGE_CACHE_SIZE; - if (page->index == end_index) - return ((i_size - 1) & ~PAGE_CACHE_MASK) + 1; - } - return 0; -} diff --git a/trunk/fs/nfs/nfs3proc.c b/trunk/fs/nfs/nfs3proc.c index 510ae524f3fd..e5f128ffc32d 100644 --- a/trunk/fs/nfs/nfs3proc.c +++ b/trunk/fs/nfs/nfs3proc.c @@ -276,6 +276,51 @@ static int nfs3_proc_read(struct nfs_read_data *rdata) return status; } +static int nfs3_proc_write(struct nfs_write_data *wdata) +{ + int rpcflags = wdata->flags; + struct inode * inode = wdata->inode; + struct nfs_fattr * fattr = wdata->res.fattr; + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_WRITE], + .rpc_argp = &wdata->args, + .rpc_resp = &wdata->res, + .rpc_cred = wdata->cred, + }; + int status; + + dprintk("NFS call write %d @ %Ld\n", wdata->args.count, + (long long) wdata->args.offset); + nfs_fattr_init(fattr); + status = rpc_call_sync(NFS_CLIENT(inode), &msg, rpcflags); + if (status >= 0) + nfs_post_op_update_inode(inode, fattr); + dprintk("NFS reply write: %d\n", status); + return status < 0? status : wdata->res.count; +} + +static int nfs3_proc_commit(struct nfs_write_data *cdata) +{ + struct inode * inode = cdata->inode; + struct nfs_fattr * fattr = cdata->res.fattr; + struct rpc_message msg = { + .rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT], + .rpc_argp = &cdata->args, + .rpc_resp = &cdata->res, + .rpc_cred = cdata->cred, + }; + int status; + + dprintk("NFS call commit %d @ %Ld\n", cdata->args.count, + (long long) cdata->args.offset); + nfs_fattr_init(fattr); + status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); + if (status >= 0) + nfs_post_op_update_inode(inode, fattr); + dprintk("NFS reply commit: %d\n", status); + return status; +} + /* * Create a regular file. * For now, we don't implement O_EXCL. @@ -324,7 +369,7 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, /* If the server doesn't support the exclusive creation semantics, * try again with simple 'guarded' mode. */ - if (status == -ENOTSUPP) { + if (status == NFSERR_NOTSUPP) { switch (arg.createmode) { case NFS3_CREATE_EXCLUSIVE: arg.createmode = NFS3_CREATE_GUARDED; @@ -645,6 +690,8 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, }; int status; + lock_kernel(); + if (plus) msg.rpc_proc = &nfs3_procedures[NFS3PROC_READDIRPLUS]; @@ -655,6 +702,7 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); nfs_refresh_inode(dir, &dir_attr); dprintk("NFS reply readdir: %d\n", status); + unlock_kernel(); return status; } @@ -856,6 +904,8 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .access = nfs3_proc_access, .readlink = nfs3_proc_readlink, .read = nfs3_proc_read, + .write = nfs3_proc_write, + .commit = nfs3_proc_commit, .create = nfs3_proc_create, .remove = nfs3_proc_remove, .unlink_setup = nfs3_proc_unlink_setup, diff --git a/trunk/fs/nfs/nfs4proc.c b/trunk/fs/nfs/nfs4proc.c index ee458aeab24a..8118036cc449 100644 --- a/trunk/fs/nfs/nfs4proc.c +++ b/trunk/fs/nfs/nfs4proc.c @@ -636,7 +636,7 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data) smp_wmb(); } else status = data->rpc_status; - rpc_put_task(task); + rpc_release_task(task); return status; } @@ -742,7 +742,7 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) smp_wmb(); } else status = data->rpc_status; - rpc_put_task(task); + rpc_release_task(task); if (status != 0) return status; @@ -1775,6 +1775,89 @@ static int nfs4_proc_read(struct nfs_read_data *rdata) return err; } +static int _nfs4_proc_write(struct nfs_write_data *wdata) +{ + int rpcflags = wdata->flags; + struct inode *inode = wdata->inode; + struct nfs_fattr *fattr = wdata->res.fattr; + struct nfs_server *server = NFS_SERVER(inode); + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE], + .rpc_argp = &wdata->args, + .rpc_resp = &wdata->res, + .rpc_cred = wdata->cred, + }; + int status; + + dprintk("NFS call write %d @ %Ld\n", wdata->args.count, + (long long) wdata->args.offset); + + wdata->args.bitmask = server->attr_bitmask; + wdata->res.server = server; + wdata->timestamp = jiffies; + nfs_fattr_init(fattr); + status = rpc_call_sync(server->client, &msg, rpcflags); + dprintk("NFS reply write: %d\n", status); + if (status < 0) + return status; + renew_lease(server, wdata->timestamp); + nfs_post_op_update_inode(inode, fattr); + return wdata->res.count; +} + +static int nfs4_proc_write(struct nfs_write_data *wdata) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(wdata->inode), + _nfs4_proc_write(wdata), + &exception); + } while (exception.retry); + return err; +} + +static int _nfs4_proc_commit(struct nfs_write_data *cdata) +{ + struct inode *inode = cdata->inode; + struct nfs_fattr *fattr = cdata->res.fattr; + struct nfs_server *server = NFS_SERVER(inode); + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT], + .rpc_argp = &cdata->args, + .rpc_resp = &cdata->res, + .rpc_cred = cdata->cred, + }; + int status; + + dprintk("NFS call commit %d @ %Ld\n", cdata->args.count, + (long long) cdata->args.offset); + + cdata->args.bitmask = server->attr_bitmask; + cdata->res.server = server; + cdata->timestamp = jiffies; + nfs_fattr_init(fattr); + status = rpc_call_sync(server->client, &msg, 0); + if (status >= 0) + renew_lease(server, cdata->timestamp); + dprintk("NFS reply commit: %d\n", status); + if (status >= 0) + nfs_post_op_update_inode(inode, fattr); + return status; +} + +static int nfs4_proc_commit(struct nfs_write_data *cdata) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(cdata->inode), + _nfs4_proc_commit(cdata), + &exception); + } while (exception.retry); + return err; +} + /* * Got race? * We will need to arrange for the VFS layer to provide an atomic open. @@ -2140,11 +2223,13 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, dentry->d_parent->d_name.name, dentry->d_name.name, (unsigned long long)cookie); + lock_kernel(); nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args); res.pgbase = args.pgbase; status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); if (status == 0) memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE); + unlock_kernel(); dprintk("%s: returns %d\n", __FUNCTION__, status); return status; } @@ -2982,7 +3067,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co if (status == 0) nfs_post_op_update_inode(inode, &data->fattr); } - rpc_put_task(task); + rpc_release_task(task); return status; } @@ -3229,7 +3314,7 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock * if (IS_ERR(task)) goto out; status = nfs4_wait_for_completion_rpc_task(task); - rpc_put_task(task); + rpc_release_task(task); out: return status; } @@ -3345,7 +3430,7 @@ static void nfs4_lock_release(void *calldata) task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp, data->arg.lock_seqid); if (!IS_ERR(task)) - rpc_put_task(task); + rpc_release_task(task); dprintk("%s: cancelling lock!\n", __FUNCTION__); } else nfs_free_seqid(data->arg.lock_seqid); @@ -3387,7 +3472,7 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f ret = -EAGAIN; } else data->cancelled = 1; - rpc_put_task(task); + rpc_release_task(task); dprintk("%s: done, ret = %d!\n", __FUNCTION__, ret); return ret; } @@ -3647,6 +3732,8 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .access = nfs4_proc_access, .readlink = nfs4_proc_readlink, .read = nfs4_proc_read, + .write = nfs4_proc_write, + .commit = nfs4_proc_commit, .create = nfs4_proc_create, .remove = nfs4_proc_remove, .unlink_setup = nfs4_proc_unlink_setup, diff --git a/trunk/fs/nfs/pagelist.c b/trunk/fs/nfs/pagelist.c index bc9fab68b29c..829af323f288 100644 --- a/trunk/fs/nfs/pagelist.c +++ b/trunk/fs/nfs/pagelist.c @@ -17,7 +17,6 @@ #include #include #include -#include #define NFS_PARANOIA 1 @@ -269,10 +268,11 @@ nfs_coalesce_requests(struct list_head *head, struct list_head *dst, #define NFS_SCAN_MAXENTRIES 16 /** - * nfs_scan_dirty - Scan the radix tree for dirty requests - * @mapping: pointer to address space - * @wbc: writeback_control structure + * nfs_scan_lock_dirty - Scan the radix tree for dirty requests + * @nfsi: NFS inode * @dst: Destination list + * @idx_start: lower bound of page->index to scan + * @npages: idx_start + npages sets the upper bound to scan. * * Moves elements from one of the inode request lists. * If the number of requests is set to 0, the entire address_space @@ -280,63 +280,46 @@ nfs_coalesce_requests(struct list_head *head, struct list_head *dst, * The requests are *not* checked to ensure that they form a contiguous set. * You must be holding the inode's req_lock when calling this function */ -long nfs_scan_dirty(struct address_space *mapping, - struct writeback_control *wbc, - struct list_head *dst) +int +nfs_scan_lock_dirty(struct nfs_inode *nfsi, struct list_head *dst, + unsigned long idx_start, unsigned int npages) { - struct nfs_inode *nfsi = NFS_I(mapping->host); struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES]; struct nfs_page *req; - pgoff_t idx_start, idx_end; - long res = 0; + unsigned long idx_end; int found, i; + int res; - if (nfsi->ndirty == 0) - return 0; - if (wbc->range_cyclic) { - idx_start = 0; - idx_end = ULONG_MAX; - } else if (wbc->range_end == 0) { - idx_start = wbc->range_start >> PAGE_CACHE_SHIFT; - idx_end = ULONG_MAX; - } else { - idx_start = wbc->range_start >> PAGE_CACHE_SHIFT; - idx_end = wbc->range_end >> PAGE_CACHE_SHIFT; - } + res = 0; + if (npages == 0) + idx_end = ~0; + else + idx_end = idx_start + npages - 1; for (;;) { - unsigned int toscan = NFS_SCAN_MAXENTRIES; - found = radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, - (void **)&pgvec[0], idx_start, toscan, + (void **)&pgvec[0], idx_start, NFS_SCAN_MAXENTRIES, NFS_PAGE_TAG_DIRTY); - - /* Did we make progress? */ if (found <= 0) break; - for (i = 0; i < found; i++) { req = pgvec[i]; - if (!wbc->range_cyclic && req->wb_index > idx_end) + if (req->wb_index > idx_end) goto out; - /* Try to lock request and mark it for writeback */ - if (!nfs_set_page_writeback_locked(req)) - goto next; - radix_tree_tag_clear(&nfsi->nfs_page_tree, - req->wb_index, NFS_PAGE_TAG_DIRTY); - nfsi->ndirty--; - nfs_list_remove_request(req); - nfs_list_add_request(req, dst); - res++; - if (res == LONG_MAX) - goto out; -next: idx_start = req->wb_index + 1; + + if (nfs_set_page_writeback_locked(req)) { + radix_tree_tag_clear(&nfsi->nfs_page_tree, + req->wb_index, NFS_PAGE_TAG_DIRTY); + nfs_list_remove_request(req); + nfs_list_add_request(req, dst); + dec_zone_page_state(req->wb_page, NR_FILE_DIRTY); + res++; + } } } out: - WARN_ON ((nfsi->ndirty == 0) != list_empty(&nfsi->dirty)); return res; } diff --git a/trunk/fs/nfs/proc.c b/trunk/fs/nfs/proc.c index 10f5e80ca157..4529cc4f3f8f 100644 --- a/trunk/fs/nfs/proc.c +++ b/trunk/fs/nfs/proc.c @@ -215,6 +215,32 @@ static int nfs_proc_read(struct nfs_read_data *rdata) return status; } +static int nfs_proc_write(struct nfs_write_data *wdata) +{ + int flags = wdata->flags; + struct inode * inode = wdata->inode; + struct nfs_fattr * fattr = wdata->res.fattr; + struct rpc_message msg = { + .rpc_proc = &nfs_procedures[NFSPROC_WRITE], + .rpc_argp = &wdata->args, + .rpc_resp = &wdata->res, + .rpc_cred = wdata->cred, + }; + int status; + + dprintk("NFS call write %d @ %Ld\n", wdata->args.count, + (long long) wdata->args.offset); + nfs_fattr_init(fattr); + status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags); + if (status >= 0) { + nfs_post_op_update_inode(inode, fattr); + wdata->res.count = wdata->args.count; + wdata->verf.committed = NFS_FILE_SYNC; + } + dprintk("NFS reply write: %d\n", status); + return status < 0? status : wdata->res.count; +} + static int nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, int flags, struct nameidata *nd) @@ -519,10 +545,13 @@ nfs_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, }; int status; + lock_kernel(); + dprintk("NFS call readdir %d\n", (unsigned int)cookie); status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); dprintk("NFS reply readdir: %d\n", status); + unlock_kernel(); return status; } @@ -667,6 +696,8 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .access = NULL, /* access */ .readlink = nfs_proc_readlink, .read = nfs_proc_read, + .write = nfs_proc_write, + .commit = NULL, /* commit */ .create = nfs_proc_create, .remove = nfs_proc_remove, .unlink_setup = nfs_proc_unlink_setup, diff --git a/trunk/fs/nfs/read.c b/trunk/fs/nfs/read.c index 05cca6609977..c2e49c397a27 100644 --- a/trunk/fs/nfs/read.c +++ b/trunk/fs/nfs/read.c @@ -30,7 +30,6 @@ #include -#include "internal.h" #include "iostat.h" #define NFSDBG_FACILITY NFSDBG_PAGECACHE @@ -66,22 +65,32 @@ struct nfs_read_data *nfs_readdata_alloc(size_t len) return p; } -static void nfs_readdata_rcu_free(struct rcu_head *head) +static void nfs_readdata_free(struct nfs_read_data *p) { - struct nfs_read_data *p = container_of(head, struct nfs_read_data, task.u.tk_rcu); if (p && (p->pagevec != &p->page_array[0])) kfree(p->pagevec); mempool_free(p, nfs_rdata_mempool); } -static void nfs_readdata_free(struct nfs_read_data *rdata) +void nfs_readdata_release(void *data) { - call_rcu_bh(&rdata->task.u.tk_rcu, nfs_readdata_rcu_free); + nfs_readdata_free(data); } -void nfs_readdata_release(void *data) +static +unsigned int nfs_page_length(struct inode *inode, struct page *page) { - nfs_readdata_free(data); + loff_t i_size = i_size_read(inode); + unsigned long idx; + + if (i_size <= 0) + return 0; + idx = (i_size - 1) >> PAGE_CACHE_SHIFT; + if (page->index > idx) + return 0; + if (page->index != idx) + return PAGE_CACHE_SIZE; + return 1 + ((i_size - 1) & (PAGE_CACHE_SIZE - 1)); } static @@ -130,12 +139,12 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode, { unsigned int rsize = NFS_SERVER(inode)->rsize; unsigned int count = PAGE_CACHE_SIZE; - int result = -ENOMEM; + int result; struct nfs_read_data *rdata; rdata = nfs_readdata_alloc(count); if (!rdata) - goto out_unlock; + return -ENOMEM; memset(rdata, 0, sizeof(*rdata)); rdata->flags = (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0); @@ -203,9 +212,8 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode, result = 0; io_error: - nfs_readdata_free(rdata); -out_unlock: unlock_page(page); + nfs_readdata_free(rdata); return result; } @@ -216,7 +224,7 @@ static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, struct nfs_page *new; unsigned int len; - len = nfs_page_length(page); + len = nfs_page_length(inode, page); if (len == 0) return nfs_return_empty_page(page); new = nfs_create_request(ctx, inode, page, 0, len); @@ -308,7 +316,9 @@ static void nfs_execute_read(struct nfs_read_data *data) sigset_t oldset; rpc_clnt_sigmask(clnt, &oldset); + lock_kernel(); rpc_execute(&data->task); + unlock_kernel(); rpc_clnt_sigunmask(clnt, &oldset); } @@ -444,55 +454,6 @@ nfs_pagein_list(struct list_head *head, int rpages) return error; } -/* - * This is the callback from RPC telling us whether a reply was - * received or some error occurred (timeout or socket shutdown). - */ -int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data) -{ - int status; - - dprintk("%s: %4d, (status %d)\n", __FUNCTION__, task->tk_pid, - task->tk_status); - - status = NFS_PROTO(data->inode)->read_done(task, data); - if (status != 0) - return status; - - nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, data->res.count); - - if (task->tk_status == -ESTALE) { - set_bit(NFS_INO_STALE, &NFS_FLAGS(data->inode)); - nfs_mark_for_revalidate(data->inode); - } - spin_lock(&data->inode->i_lock); - NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME; - spin_unlock(&data->inode->i_lock); - return 0; -} - -static int nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data) -{ - struct nfs_readargs *argp = &data->args; - struct nfs_readres *resp = &data->res; - - if (resp->eof || resp->count == argp->count) - return 0; - - /* This is a short read! */ - nfs_inc_stats(data->inode, NFSIOS_SHORTREAD); - /* Has the server at least made some progress? */ - if (resp->count == 0) - return 0; - - /* Yes, so retry the read at the end of the data */ - argp->offset += resp->count; - argp->pgbase += resp->count; - argp->count -= resp->count; - rpc_restart_call(task); - return -EAGAIN; -} - /* * Handle a read reply that fills part of a page. */ @@ -502,16 +463,12 @@ static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata) struct nfs_page *req = data->req; struct page *page = req->wb_page; - if (nfs_readpage_result(task, data) != 0) - return; - - if (likely(task->tk_status >= 0)) { + if (likely(task->tk_status >= 0)) nfs_readpage_truncate_uninitialised_page(data); - if (nfs_readpage_retry(task, data) != 0) - return; - } - if (unlikely(task->tk_status < 0)) + else SetPageError(page); + if (nfs_readpage_result(task, data) != 0) + return; if (atomic_dec_and_test(&req->wb_complete)) { if (!PageError(page)) SetPageUptodate(page); @@ -539,13 +496,25 @@ static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data) count += base; for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++) SetPageUptodate(*pages); - if (count == 0) - return; - /* Was this a short read? */ - if (data->res.eof || data->res.count == data->args.count) + if (count != 0) SetPageUptodate(*pages); } +static void nfs_readpage_set_pages_error(struct nfs_read_data *data) +{ + unsigned int count = data->args.count; + unsigned int base = data->args.pgbase; + struct page **pages; + + pages = &data->args.pages[base >> PAGE_CACHE_SHIFT]; + base &= ~PAGE_CACHE_MASK; + count += base; + for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++) + SetPageError(*pages); + if (count != 0) + SetPageError(*pages); +} + /* * This is the callback from RPC telling us whether a reply was * received or some error occurred (timeout or socket shutdown). @@ -554,20 +523,19 @@ static void nfs_readpage_result_full(struct rpc_task *task, void *calldata) { struct nfs_read_data *data = calldata; - if (nfs_readpage_result(task, data) != 0) - return; /* - * Note: nfs_readpage_retry may change the values of + * Note: nfs_readpage_result may change the values of * data->args. In the multi-page case, we therefore need - * to ensure that we call nfs_readpage_set_pages_uptodate() - * first. + * to ensure that we call the next nfs_readpage_set_page_uptodate() + * first in the multi-page case. */ if (likely(task->tk_status >= 0)) { nfs_readpage_truncate_uninitialised_page(data); nfs_readpage_set_pages_uptodate(data); - if (nfs_readpage_retry(task, data) != 0) - return; - } + } else + nfs_readpage_set_pages_error(data); + if (nfs_readpage_result(task, data) != 0) + return; while (!list_empty(&data->pages)) { struct nfs_page *req = nfs_list_entry(data->pages.next); @@ -581,6 +549,50 @@ static const struct rpc_call_ops nfs_read_full_ops = { .rpc_release = nfs_readdata_release, }; +/* + * This is the callback from RPC telling us whether a reply was + * received or some error occurred (timeout or socket shutdown). + */ +int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data) +{ + struct nfs_readargs *argp = &data->args; + struct nfs_readres *resp = &data->res; + int status; + + dprintk("NFS: %4d nfs_readpage_result, (status %d)\n", + task->tk_pid, task->tk_status); + + status = NFS_PROTO(data->inode)->read_done(task, data); + if (status != 0) + return status; + + nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, resp->count); + + if (task->tk_status < 0) { + if (task->tk_status == -ESTALE) { + set_bit(NFS_INO_STALE, &NFS_FLAGS(data->inode)); + nfs_mark_for_revalidate(data->inode); + } + } else if (resp->count < argp->count && !resp->eof) { + /* This is a short read! */ + nfs_inc_stats(data->inode, NFSIOS_SHORTREAD); + /* Has the server at least made some progress? */ + if (resp->count != 0) { + /* Yes, so retry the read at the end of the data */ + argp->offset += resp->count; + argp->pgbase += resp->count; + argp->count -= resp->count; + rpc_restart_call(task); + return -EAGAIN; + } + task->tk_status = -EIO; + } + spin_lock(&data->inode->i_lock); + NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME; + spin_unlock(&data->inode->i_lock); + return 0; +} + /* * Read a page over NFS. * We read the page synchronously in the following case: @@ -614,10 +626,9 @@ int nfs_readpage(struct file *file, struct page *page) goto out_error; if (file == NULL) { - error = -EBADF; ctx = nfs_find_open_context(inode, NULL, FMODE_READ); if (ctx == NULL) - goto out_error; + return -EBADF; } else ctx = get_nfs_open_context((struct nfs_open_context *) file->private_data); @@ -652,7 +663,7 @@ readpage_async_filler(void *data, struct page *page) unsigned int len; nfs_wb_page(inode, page); - len = nfs_page_length(page); + len = nfs_page_length(inode, page); if (len == 0) return nfs_return_empty_page(page); new = nfs_create_request(desc->ctx, inode, page, 0, len); diff --git a/trunk/fs/nfs/symlink.c b/trunk/fs/nfs/symlink.c index 6c686112cc03..600bbe630abd 100644 --- a/trunk/fs/nfs/symlink.c +++ b/trunk/fs/nfs/symlink.c @@ -33,7 +33,9 @@ static int nfs_symlink_filler(struct inode *inode, struct page *page) { int error; + lock_kernel(); error = NFS_PROTO(inode)->readlink(inode, page, 0, PAGE_SIZE); + unlock_kernel(); if (error < 0) goto error; SetPageUptodate(page); diff --git a/trunk/fs/nfs/write.c b/trunk/fs/nfs/write.c index 7f3844d2bf36..883dd4a1c157 100644 --- a/trunk/fs/nfs/write.c +++ b/trunk/fs/nfs/write.c @@ -63,7 +63,6 @@ #include #include "delegation.h" -#include "internal.h" #include "iostat.h" #define NFSDBG_FACILITY NFSDBG_PAGECACHE @@ -75,12 +74,13 @@ * Local function declarations */ static struct nfs_page * nfs_update_request(struct nfs_open_context*, + struct inode *, struct page *, unsigned int, unsigned int); -static void nfs_mark_request_dirty(struct nfs_page *req); static int nfs_wait_on_write_congestion(struct address_space *, int); static int nfs_wait_on_requests(struct inode *, unsigned long, unsigned int); -static long nfs_flush_mapping(struct address_space *mapping, struct writeback_control *wbc, int how); +static int nfs_flush_inode(struct inode *inode, unsigned long idx_start, + unsigned int npages, int how); static const struct rpc_call_ops nfs_write_partial_ops; static const struct rpc_call_ops nfs_write_full_ops; static const struct rpc_call_ops nfs_commit_ops; @@ -102,19 +102,13 @@ struct nfs_write_data *nfs_commit_alloc(void) return p; } -void nfs_commit_rcu_free(struct rcu_head *head) +void nfs_commit_free(struct nfs_write_data *p) { - struct nfs_write_data *p = container_of(head, struct nfs_write_data, task.u.tk_rcu); if (p && (p->pagevec != &p->page_array[0])) kfree(p->pagevec); mempool_free(p, nfs_commit_mempool); } -void nfs_commit_free(struct nfs_write_data *wdata) -{ - call_rcu_bh(&wdata->task.u.tk_rcu, nfs_commit_rcu_free); -} - struct nfs_write_data *nfs_writedata_alloc(size_t len) { unsigned int pagecount = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; @@ -137,47 +131,18 @@ struct nfs_write_data *nfs_writedata_alloc(size_t len) return p; } -static void nfs_writedata_rcu_free(struct rcu_head *head) +static void nfs_writedata_free(struct nfs_write_data *p) { - struct nfs_write_data *p = container_of(head, struct nfs_write_data, task.u.tk_rcu); if (p && (p->pagevec != &p->page_array[0])) kfree(p->pagevec); mempool_free(p, nfs_wdata_mempool); } -static void nfs_writedata_free(struct nfs_write_data *wdata) -{ - call_rcu_bh(&wdata->task.u.tk_rcu, nfs_writedata_rcu_free); -} - void nfs_writedata_release(void *wdata) { nfs_writedata_free(wdata); } -static struct nfs_page *nfs_page_find_request_locked(struct page *page) -{ - struct nfs_page *req = NULL; - - if (PagePrivate(page)) { - req = (struct nfs_page *)page_private(page); - if (req != NULL) - atomic_inc(&req->wb_count); - } - return req; -} - -static struct nfs_page *nfs_page_find_request(struct page *page) -{ - struct nfs_page *req = NULL; - spinlock_t *req_lock = &NFS_I(page->mapping->host)->req_lock; - - spin_lock(req_lock); - req = nfs_page_find_request_locked(page); - spin_unlock(req_lock); - return req; -} - /* Adjust the file length if we're writing beyond the end */ static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count) { @@ -199,34 +164,113 @@ static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int c */ static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int count) { + loff_t end_offs; + if (PageUptodate(page)) return; if (base != 0) return; - if (count != nfs_page_length(page)) + if (count == PAGE_CACHE_SIZE) { + SetPageUptodate(page); + return; + } + + end_offs = i_size_read(page->mapping->host) - 1; + if (end_offs < 0) return; - if (count != PAGE_CACHE_SIZE) + /* Is this the last page? */ + if (page->index != (unsigned long)(end_offs >> PAGE_CACHE_SHIFT)) + return; + /* This is the last page: set PG_uptodate if we cover the entire + * extent of the data, then zero the rest of the page. + */ + if (count == (unsigned int)(end_offs & (PAGE_CACHE_SIZE - 1)) + 1) { memclear_highpage_flush(page, count, PAGE_CACHE_SIZE - count); - SetPageUptodate(page); + SetPageUptodate(page); + } } -static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page, +/* + * Write a page synchronously. + * Offset is the data offset within the page. + */ +static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode, + struct page *page, unsigned int offset, unsigned int count, + int how) +{ + unsigned int wsize = NFS_SERVER(inode)->wsize; + int result, written = 0; + struct nfs_write_data *wdata; + + wdata = nfs_writedata_alloc(wsize); + if (!wdata) + return -ENOMEM; + + wdata->flags = how; + wdata->cred = ctx->cred; + wdata->inode = inode; + wdata->args.fh = NFS_FH(inode); + wdata->args.context = ctx; + wdata->args.pages = &page; + wdata->args.stable = NFS_FILE_SYNC; + wdata->args.pgbase = offset; + wdata->args.count = wsize; + wdata->res.fattr = &wdata->fattr; + wdata->res.verf = &wdata->verf; + + dprintk("NFS: nfs_writepage_sync(%s/%Ld %d@%Ld)\n", + inode->i_sb->s_id, + (long long)NFS_FILEID(inode), + count, (long long)(page_offset(page) + offset)); + + set_page_writeback(page); + nfs_begin_data_update(inode); + do { + if (count < wsize) + wdata->args.count = count; + wdata->args.offset = page_offset(page) + wdata->args.pgbase; + + result = NFS_PROTO(inode)->write(wdata); + + if (result < 0) { + /* Must mark the page invalid after I/O error */ + ClearPageUptodate(page); + goto io_error; + } + if (result < wdata->args.count) + printk(KERN_WARNING "NFS: short write, count=%u, result=%d\n", + wdata->args.count, result); + + wdata->args.offset += result; + wdata->args.pgbase += result; + written += result; + count -= result; + nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, result); + } while (count); + /* Update file length */ + nfs_grow_file(page, offset, written); + /* Set the PG_uptodate flag? */ + nfs_mark_uptodate(page, offset, written); + + if (PageError(page)) + ClearPageError(page); + +io_error: + nfs_end_data_update(inode); + end_page_writeback(page); + nfs_writedata_free(wdata); + return written ? written : result; +} + +static int nfs_writepage_async(struct nfs_open_context *ctx, + struct inode *inode, struct page *page, unsigned int offset, unsigned int count) { struct nfs_page *req; - int ret; - for (;;) { - req = nfs_update_request(ctx, page, offset, count); - if (!IS_ERR(req)) - break; - ret = PTR_ERR(req); - if (ret != -EBUSY) - return ret; - ret = nfs_wb_page(page->mapping->host, page); - if (ret != 0) - return ret; - } + req = nfs_update_request(ctx, inode, page, offset, count); + if (IS_ERR(req)) + return PTR_ERR(req); /* Update file length */ nfs_grow_file(page, offset, count); /* Set the PG_uptodate flag? */ @@ -244,95 +288,74 @@ static int wb_priority(struct writeback_control *wbc) return 0; } -/* - * Find an associated nfs write request, and prepare to flush it out - * Returns 1 if there was no write request, or if the request was - * already tagged by nfs_set_page_dirty.Returns 0 if the request - * was not tagged. - * May also return an error if the user signalled nfs_wait_on_request(). - */ -static int nfs_page_mark_flush(struct page *page) -{ - struct nfs_page *req; - spinlock_t *req_lock = &NFS_I(page->mapping->host)->req_lock; - int ret; - - spin_lock(req_lock); - for(;;) { - req = nfs_page_find_request_locked(page); - if (req == NULL) { - spin_unlock(req_lock); - return 1; - } - if (nfs_lock_request_dontget(req)) - break; - /* Note: If we hold the page lock, as is the case in nfs_writepage, - * then the call to nfs_lock_request_dontget() will always - * succeed provided that someone hasn't already marked the - * request as dirty (in which case we don't care). - */ - spin_unlock(req_lock); - ret = nfs_wait_on_request(req); - nfs_release_request(req); - if (ret != 0) - return ret; - spin_lock(req_lock); - } - spin_unlock(req_lock); - if (test_and_set_bit(PG_FLUSHING, &req->wb_flags) == 0) { - nfs_mark_request_dirty(req); - set_page_writeback(page); - } - ret = test_bit(PG_NEED_FLUSH, &req->wb_flags); - nfs_unlock_request(req); - return ret; -} - /* * Write an mmapped page to the server. */ -static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc) +int nfs_writepage(struct page *page, struct writeback_control *wbc) { struct nfs_open_context *ctx; struct inode *inode = page->mapping->host; - unsigned offset; + unsigned long end_index; + unsigned offset = PAGE_CACHE_SIZE; + loff_t i_size = i_size_read(inode); + int inode_referenced = 0; + int priority = wb_priority(wbc); int err; nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1); - err = nfs_page_mark_flush(page); - if (err <= 0) - goto out; - err = 0; - offset = nfs_page_length(page); - if (!offset) + /* + * Note: We need to ensure that we have a reference to the inode + * if we are to do asynchronous writes. If not, waiting + * in nfs_wait_on_request() may deadlock with clear_inode(). + * + * If igrab() fails here, then it is in any case safe to + * call nfs_wb_page(), since there will be no pending writes. + */ + if (igrab(inode) != 0) + inode_referenced = 1; + end_index = i_size >> PAGE_CACHE_SHIFT; + + /* Ensure we've flushed out any previous writes */ + nfs_wb_page_priority(inode, page, priority); + + /* easy case */ + if (page->index < end_index) + goto do_it; + /* things got complicated... */ + offset = i_size & (PAGE_CACHE_SIZE-1); + + /* OK, are we completely out? */ + err = 0; /* potential race with truncate - ignore */ + if (page->index >= end_index+1 || !offset) goto out; - +do_it: ctx = nfs_find_open_context(inode, NULL, FMODE_WRITE); if (ctx == NULL) { err = -EBADF; goto out; } - err = nfs_writepage_setup(ctx, page, 0, offset); + lock_kernel(); + if (!IS_SYNC(inode) && inode_referenced) { + err = nfs_writepage_async(ctx, inode, page, 0, offset); + if (!wbc->for_writepages) + nfs_flush_inode(inode, 0, 0, wb_priority(wbc)); + } else { + err = nfs_writepage_sync(ctx, inode, page, 0, + offset, priority); + if (err >= 0) { + if (err != offset) + redirty_page_for_writepage(wbc, page); + err = 0; + } + } + unlock_kernel(); put_nfs_open_context(ctx); - if (err != 0) - goto out; - err = nfs_page_mark_flush(page); - if (err > 0) - err = 0; out: - if (!wbc->for_writepages) - nfs_flush_mapping(page->mapping, wbc, wb_priority(wbc)); - return err; -} - -int nfs_writepage(struct page *page, struct writeback_control *wbc) -{ - int err; - - err = nfs_writepage_locked(page, wbc); unlock_page(page); + if (inode_referenced) + iput(inode); return err; } @@ -356,18 +379,21 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) return 0; nfs_wait_on_write_congestion(mapping, 0); } - err = nfs_flush_mapping(mapping, wbc, wb_priority(wbc)); + err = nfs_flush_inode(inode, 0, 0, wb_priority(wbc)); if (err < 0) goto out; nfs_add_stats(inode, NFSIOS_WRITEPAGES, err); + wbc->nr_to_write -= err; if (!wbc->nonblocking && wbc->sync_mode == WB_SYNC_ALL) { err = nfs_wait_on_requests(inode, 0, 0); if (err < 0) goto out; } err = nfs_commit_inode(inode, wb_priority(wbc)); - if (err > 0) + if (err > 0) { + wbc->nr_to_write -= err; err = 0; + } out: clear_bit(BDI_write_congested, &bdi->state); wake_up_all(&nfs_write_congestion); @@ -394,7 +420,6 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) nfsi->change_attr++; } SetPagePrivate(req->wb_page); - set_page_private(req->wb_page, (unsigned long)req); nfsi->npages++; atomic_inc(&req->wb_count); return 0; @@ -411,7 +436,6 @@ static void nfs_inode_remove_request(struct nfs_page *req) BUG_ON (!NFS_WBACK_BUSY(req)); spin_lock(&nfsi->req_lock); - set_page_private(req->wb_page, 0); ClearPagePrivate(req->wb_page); radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index); nfsi->npages--; @@ -425,6 +449,33 @@ static void nfs_inode_remove_request(struct nfs_page *req) nfs_release_request(req); } +/* + * Find a request + */ +static inline struct nfs_page * +_nfs_find_request(struct inode *inode, unsigned long index) +{ + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_page *req; + + req = (struct nfs_page*)radix_tree_lookup(&nfsi->nfs_page_tree, index); + if (req) + atomic_inc(&req->wb_count); + return req; +} + +static struct nfs_page * +nfs_find_request(struct inode *inode, unsigned long index) +{ + struct nfs_page *req; + struct nfs_inode *nfsi = NFS_I(inode); + + spin_lock(&nfsi->req_lock); + req = _nfs_find_request(inode, index); + spin_unlock(&nfsi->req_lock); + return req; +} + /* * Add a request to the inode's dirty list. */ @@ -440,14 +491,8 @@ nfs_mark_request_dirty(struct nfs_page *req) nfs_list_add_request(req, &nfsi->dirty); nfsi->ndirty++; spin_unlock(&nfsi->req_lock); - __mark_inode_dirty(inode, I_DIRTY_PAGES); -} - -static void -nfs_redirty_request(struct nfs_page *req) -{ - clear_bit(PG_FLUSHING, &req->wb_flags); - __set_page_dirty_nobuffers(req->wb_page); + inc_zone_page_state(req->wb_page, NR_FILE_DIRTY); + mark_inode_dirty(inode); } /* @@ -456,7 +501,8 @@ nfs_redirty_request(struct nfs_page *req) static inline int nfs_dirty_request(struct nfs_page *req) { - return test_bit(PG_FLUSHING, &req->wb_flags) == 0; + struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode); + return !list_empty(&req->wb_list) && req->wb_list_head == &nfsi->dirty; } #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) @@ -474,7 +520,7 @@ nfs_mark_request_commit(struct nfs_page *req) nfsi->ncommit++; spin_unlock(&nfsi->req_lock); inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); - __mark_inode_dirty(inode, I_DIRTY_DATASYNC); + mark_inode_dirty(inode); } #endif @@ -551,6 +597,31 @@ static void nfs_cancel_commit_list(struct list_head *head) } } +/* + * nfs_scan_dirty - Scan an inode for dirty requests + * @inode: NFS inode to scan + * @dst: destination list + * @idx_start: lower bound of page->index to scan. + * @npages: idx_start + npages sets the upper bound to scan. + * + * Moves requests from the inode's dirty page list. + * The requests are *not* checked to ensure that they form a contiguous set. + */ +static int +nfs_scan_dirty(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages) +{ + struct nfs_inode *nfsi = NFS_I(inode); + int res = 0; + + if (nfsi->ndirty != 0) { + res = nfs_scan_lock_dirty(nfsi, dst, idx_start, npages); + nfsi->ndirty -= res; + if ((nfsi->ndirty == 0) != list_empty(&nfsi->dirty)) + printk(KERN_ERR "NFS: desynchronized value of nfs_i.ndirty.\n"); + } + return res; +} + #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) /* * nfs_scan_commit - Scan an inode for commit requests @@ -627,27 +698,27 @@ static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr) * Note: Should always be called with the Page Lock held! */ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, - struct page *page, unsigned int offset, unsigned int bytes) + struct inode *inode, struct page *page, + unsigned int offset, unsigned int bytes) { - struct inode *inode = page->mapping->host; + struct nfs_server *server = NFS_SERVER(inode); struct nfs_inode *nfsi = NFS_I(inode); struct nfs_page *req, *new = NULL; unsigned long rqend, end; end = offset + bytes; - if (nfs_wait_on_write_congestion(page->mapping, NFS_SERVER(inode)->flags & NFS_MOUNT_INTR)) + if (nfs_wait_on_write_congestion(page->mapping, server->flags & NFS_MOUNT_INTR)) return ERR_PTR(-ERESTARTSYS); for (;;) { /* Loop over all inode entries and see if we find * A request for the page we wish to update */ spin_lock(&nfsi->req_lock); - req = nfs_page_find_request_locked(page); + req = _nfs_find_request(inode, page->index); if (req) { if (!nfs_lock_request_dontget(req)) { int error; - spin_unlock(&nfsi->req_lock); error = nfs_wait_on_request(req); nfs_release_request(req); @@ -674,6 +745,7 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, return ERR_PTR(error); } spin_unlock(&nfsi->req_lock); + nfs_mark_request_dirty(new); return new; } spin_unlock(&nfsi->req_lock); @@ -714,8 +786,9 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, int nfs_flush_incompatible(struct file *file, struct page *page) { struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data; + struct inode *inode = page->mapping->host; struct nfs_page *req; - int do_flush, status; + int status = 0; /* * Look for a request corresponding to this page. If there * is one, and it belongs to another file, we flush it out @@ -724,18 +797,13 @@ int nfs_flush_incompatible(struct file *file, struct page *page) * Also do the same if we find a request from an existing * dropped page. */ - do { - req = nfs_page_find_request(page); - if (req == NULL) - return 0; - do_flush = req->wb_page != page || req->wb_context != ctx - || !nfs_dirty_request(req); + req = nfs_find_request(inode, page->index); + if (req) { + if (req->wb_page != page || ctx != req->wb_context) + status = nfs_wb_page(inode, page); nfs_release_request(req); - if (!do_flush) - return 0; - status = nfs_wb_page(page->mapping->host, page); - } while (status == 0); - return status; + } + return (status < 0) ? status : 0; } /* @@ -749,6 +817,7 @@ int nfs_updatepage(struct file *file, struct page *page, { struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data; struct inode *inode = page->mapping->host; + struct nfs_page *req; int status = 0; nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE); @@ -758,18 +827,62 @@ int nfs_updatepage(struct file *file, struct page *page, file->f_dentry->d_name.name, count, (long long)(page_offset(page) +offset)); + if (IS_SYNC(inode)) { + status = nfs_writepage_sync(ctx, inode, page, offset, count, 0); + if (status > 0) { + if (offset == 0 && status == PAGE_CACHE_SIZE) + SetPageUptodate(page); + return 0; + } + return status; + } + /* If we're not using byte range locks, and we know the page * is entirely in cache, it may be more efficient to avoid * fragmenting write requests. */ if (PageUptodate(page) && inode->i_flock == NULL && !(file->f_mode & O_SYNC)) { - count = max(count + offset, nfs_page_length(page)); + loff_t end_offs = i_size_read(inode) - 1; + unsigned long end_index = end_offs >> PAGE_CACHE_SHIFT; + + count += offset; offset = 0; + if (unlikely(end_offs < 0)) { + /* Do nothing */ + } else if (page->index == end_index) { + unsigned int pglen; + pglen = (unsigned int)(end_offs & (PAGE_CACHE_SIZE-1)) + 1; + if (count < pglen) + count = pglen; + } else if (page->index < end_index) + count = PAGE_CACHE_SIZE; } - status = nfs_writepage_setup(ctx, page, offset, count); - __set_page_dirty_nobuffers(page); + /* + * Try to find an NFS request corresponding to this page + * and update it. + * If the existing request cannot be updated, we must flush + * it out now. + */ + do { + req = nfs_update_request(ctx, inode, page, offset, count); + status = (IS_ERR(req)) ? PTR_ERR(req) : 0; + if (status != -EBUSY) + break; + /* Request could not be updated. Flush it out and try again */ + status = nfs_wb_page(inode, page); + } while (status >= 0); + if (status < 0) + goto done; + + status = 0; + /* Update file length */ + nfs_grow_file(page, offset, count); + /* Set the PG_uptodate flag? */ + nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); + nfs_unlock_request(req); +done: dprintk("NFS: nfs_updatepage returns %d (isize %Ld)\n", status, (long long)i_size_read(inode)); if (status < 0) @@ -784,7 +897,7 @@ static void nfs_writepage_release(struct nfs_page *req) #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) if (!PageError(req->wb_page)) { if (NFS_NEED_RESCHED(req)) { - nfs_redirty_request(req); + nfs_mark_request_dirty(req); goto out; } else if (NFS_NEED_COMMIT(req)) { nfs_mark_request_commit(req); @@ -866,7 +979,9 @@ static void nfs_execute_write(struct nfs_write_data *data) sigset_t oldset; rpc_clnt_sigmask(clnt, &oldset); + lock_kernel(); rpc_execute(&data->task); + unlock_kernel(); rpc_clnt_sigunmask(clnt, &oldset); } @@ -900,6 +1015,7 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, int how) atomic_set(&req->wb_complete, requests); ClearPageError(page); + set_page_writeback(page); offset = 0; nbytes = req->wb_bytes; do { @@ -927,9 +1043,9 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, int how) while (!list_empty(&list)) { data = list_entry(list.next, struct nfs_write_data, pages); list_del(&data->pages); - nfs_writedata_release(data); + nfs_writedata_free(data); } - nfs_redirty_request(req); + nfs_mark_request_dirty(req); nfs_clear_page_writeback(req); return -ENOMEM; } @@ -960,6 +1076,7 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, int how) nfs_list_remove_request(req); nfs_list_add_request(req, &data->pages); ClearPageError(req->wb_page); + set_page_writeback(req->wb_page); *pages++ = req->wb_page; count += req->wb_bytes; } @@ -974,7 +1091,7 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, int how) while (!list_empty(head)) { struct nfs_page *req = nfs_list_entry(head->next); nfs_list_remove_request(req); - nfs_redirty_request(req); + nfs_mark_request_dirty(req); nfs_clear_page_writeback(req); } return -ENOMEM; @@ -1009,7 +1126,7 @@ static int nfs_flush_list(struct inode *inode, struct list_head *head, int npage while (!list_empty(head)) { req = nfs_list_entry(head->next); nfs_list_remove_request(req); - nfs_redirty_request(req); + nfs_mark_request_dirty(req); nfs_clear_page_writeback(req); } return error; @@ -1325,7 +1442,7 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata) } /* We have a mismatch. Write the page again */ dprintk(" mismatch\n"); - nfs_redirty_request(req); + nfs_mark_request_dirty(req); next: nfs_clear_page_writeback(req); } @@ -1342,17 +1459,18 @@ static inline int nfs_commit_list(struct inode *inode, struct list_head *head, i } #endif -static long nfs_flush_mapping(struct address_space *mapping, struct writeback_control *wbc, int how) +static int nfs_flush_inode(struct inode *inode, unsigned long idx_start, + unsigned int npages, int how) { - struct nfs_inode *nfsi = NFS_I(mapping->host); + struct nfs_inode *nfsi = NFS_I(inode); LIST_HEAD(head); - long res; + int res; spin_lock(&nfsi->req_lock); - res = nfs_scan_dirty(mapping, wbc, &head); + res = nfs_scan_dirty(inode, &head, idx_start, npages); spin_unlock(&nfsi->req_lock); if (res) { - int error = nfs_flush_list(mapping->host, &head, res, how); + int error = nfs_flush_list(inode, &head, res, how); if (error < 0) return error; } @@ -1378,62 +1496,38 @@ int nfs_commit_inode(struct inode *inode, int how) } #endif -long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_control *wbc, int how) +int nfs_sync_inode_wait(struct inode *inode, unsigned long idx_start, + unsigned int npages, int how) { - struct inode *inode = mapping->host; struct nfs_inode *nfsi = NFS_I(inode); - unsigned long idx_start, idx_end; - unsigned int npages = 0; LIST_HEAD(head); int nocommit = how & FLUSH_NOCOMMIT; - long pages, ret; - - /* FIXME */ - if (wbc->range_cyclic) - idx_start = 0; - else { - idx_start = wbc->range_start >> PAGE_CACHE_SHIFT; - idx_end = wbc->range_end >> PAGE_CACHE_SHIFT; - if (idx_end > idx_start) { - unsigned long l_npages = 1 + idx_end - idx_start; - npages = l_npages; - if (sizeof(npages) != sizeof(l_npages) && - (unsigned long)npages != l_npages) - npages = 0; - } - } + int pages, ret; + how &= ~FLUSH_NOCOMMIT; spin_lock(&nfsi->req_lock); do { - wbc->pages_skipped = 0; ret = nfs_wait_on_requests_locked(inode, idx_start, npages); if (ret != 0) continue; - pages = nfs_scan_dirty(mapping, wbc, &head); + pages = nfs_scan_dirty(inode, &head, idx_start, npages); if (pages != 0) { spin_unlock(&nfsi->req_lock); - if (how & FLUSH_INVALIDATE) { + if (how & FLUSH_INVALIDATE) nfs_cancel_dirty_list(&head); - ret = pages; - } else + else ret = nfs_flush_list(inode, &head, pages, how); spin_lock(&nfsi->req_lock); continue; } - if (wbc->pages_skipped != 0) - continue; if (nocommit) break; pages = nfs_scan_commit(inode, &head, idx_start, npages); - if (pages == 0) { - if (wbc->pages_skipped != 0) - continue; + if (pages == 0) break; - } if (how & FLUSH_INVALIDATE) { spin_unlock(&nfsi->req_lock); nfs_cancel_commit_list(&head); - ret = pages; spin_lock(&nfsi->req_lock); continue; } @@ -1446,106 +1540,6 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr return ret; } -/* - * flush the inode to disk. - */ -int nfs_wb_all(struct inode *inode) -{ - struct address_space *mapping = inode->i_mapping; - struct writeback_control wbc = { - .bdi = mapping->backing_dev_info, - .sync_mode = WB_SYNC_ALL, - .nr_to_write = LONG_MAX, - .for_writepages = 1, - .range_cyclic = 1, - }; - int ret; - - ret = generic_writepages(mapping, &wbc); - if (ret < 0) - goto out; - ret = nfs_sync_mapping_wait(mapping, &wbc, 0); - if (ret >= 0) - return 0; -out: - __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); - return ret; -} - -int nfs_sync_mapping_range(struct address_space *mapping, loff_t range_start, loff_t range_end, int how) -{ - struct writeback_control wbc = { - .bdi = mapping->backing_dev_info, - .sync_mode = WB_SYNC_ALL, - .nr_to_write = LONG_MAX, - .range_start = range_start, - .range_end = range_end, - .for_writepages = 1, - }; - int ret; - - if (!(how & FLUSH_NOWRITEPAGE)) { - ret = generic_writepages(mapping, &wbc); - if (ret < 0) - goto out; - } - ret = nfs_sync_mapping_wait(mapping, &wbc, how); - if (ret >= 0) - return 0; -out: - __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); - return ret; -} - -int nfs_wb_page_priority(struct inode *inode, struct page *page, int how) -{ - loff_t range_start = page_offset(page); - loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1); - struct writeback_control wbc = { - .bdi = page->mapping->backing_dev_info, - .sync_mode = WB_SYNC_ALL, - .nr_to_write = LONG_MAX, - .range_start = range_start, - .range_end = range_end, - }; - int ret; - - BUG_ON(!PageLocked(page)); - if (!(how & FLUSH_NOWRITEPAGE) && clear_page_dirty_for_io(page)) { - ret = nfs_writepage_locked(page, &wbc); - if (ret < 0) - goto out; - } - ret = nfs_sync_mapping_wait(page->mapping, &wbc, how); - if (ret >= 0) - return 0; -out: - __mark_inode_dirty(inode, I_DIRTY_PAGES); - return ret; -} - -/* - * Write back all requests on one page - we do this before reading it. - */ -int nfs_wb_page(struct inode *inode, struct page* page) -{ - return nfs_wb_page_priority(inode, page, FLUSH_STABLE); -} - -int nfs_set_page_dirty(struct page *page) -{ - struct nfs_page *req; - - req = nfs_page_find_request(page); - if (req != NULL) { - /* Mark any existing write requests for flushing */ - set_bit(PG_NEED_FLUSH, &req->wb_flags); - nfs_release_request(req); - } - return __set_page_dirty_nobuffers(page); -} - - int __init nfs_init_writepagecache(void) { nfs_wdata_cachep = kmem_cache_create("nfs_write_data", diff --git a/trunk/include/asm-m68knommu/rtc.h b/trunk/include/asm-m68knommu/rtc.h new file mode 100644 index 000000000000..eaf18ec83c8e --- /dev/null +++ b/trunk/include/asm-m68knommu/rtc.h @@ -0,0 +1 @@ +#include diff --git a/trunk/include/linux/nfs_fs.h b/trunk/include/linux/nfs_fs.h index 04963063e620..625ffea98561 100644 --- a/trunk/include/linux/nfs_fs.h +++ b/trunk/include/linux/nfs_fs.h @@ -33,7 +33,6 @@ #define FLUSH_HIGHPRI 16 /* high priority memory reclaim flush */ #define FLUSH_NOCOMMIT 32 /* Don't send the NFSv3/v4 COMMIT */ #define FLUSH_INVALIDATE 64 /* Invalidate the page cache */ -#define FLUSH_NOWRITEPAGE 128 /* Don't call writepage() */ #ifdef __KERNEL__ @@ -428,21 +427,19 @@ extern int nfs_flush_incompatible(struct file *file, struct page *page); extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int); extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *); extern void nfs_writedata_release(void *); -extern int nfs_set_page_dirty(struct page *); + +#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +struct nfs_write_data *nfs_commit_alloc(void); +void nfs_commit_free(struct nfs_write_data *p); +#endif /* * Try to write back everything synchronously (but check the * return value!) */ -extern long nfs_sync_mapping_wait(struct address_space *, struct writeback_control *, int); -extern int nfs_sync_mapping_range(struct address_space *, loff_t, loff_t, int); -extern int nfs_wb_all(struct inode *inode); -extern int nfs_wb_page(struct inode *inode, struct page* page); -extern int nfs_wb_page_priority(struct inode *inode, struct page* page, int how); +extern int nfs_sync_inode_wait(struct inode *, unsigned long, unsigned int, int); #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) extern int nfs_commit_inode(struct inode *, int); -extern struct nfs_write_data *nfs_commit_alloc(void); -extern void nfs_commit_free(struct nfs_write_data *wdata); extern void nfs_commit_release(void *wdata); #else static inline int @@ -458,6 +455,28 @@ nfs_have_writebacks(struct inode *inode) return NFS_I(inode)->npages != 0; } +static inline int +nfs_wb_all(struct inode *inode) +{ + int error = nfs_sync_inode_wait(inode, 0, 0, 0); + return (error < 0) ? error : 0; +} + +/* + * Write back all requests on one page - we do this before reading it. + */ +static inline int nfs_wb_page_priority(struct inode *inode, struct page* page, int how) +{ + int error = nfs_sync_inode_wait(inode, page->index, 1, + how | FLUSH_STABLE); + return (error < 0) ? error : 0; +} + +static inline int nfs_wb_page(struct inode *inode, struct page* page) +{ + return nfs_wb_page_priority(inode, page, 0); +} + /* * Allocate nfs_write_data structures */ diff --git a/trunk/include/linux/nfs_page.h b/trunk/include/linux/nfs_page.h index 2e555d49c9b7..1f7bd287c230 100644 --- a/trunk/include/linux/nfs_page.h +++ b/trunk/include/linux/nfs_page.h @@ -30,8 +30,6 @@ #define PG_BUSY 0 #define PG_NEED_COMMIT 1 #define PG_NEED_RESCHED 2 -#define PG_NEED_FLUSH 3 -#define PG_FLUSHING 4 struct nfs_inode; struct nfs_page { @@ -62,9 +60,8 @@ extern void nfs_clear_request(struct nfs_page *req); extern void nfs_release_request(struct nfs_page *req); -extern long nfs_scan_dirty(struct address_space *mapping, - struct writeback_control *wbc, - struct list_head *dst); +extern int nfs_scan_lock_dirty(struct nfs_inode *nfsi, struct list_head *dst, + unsigned long idx_start, unsigned int npages); extern int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head, struct list_head *dst, unsigned long idx_start, unsigned int npages); extern int nfs_coalesce_requests(struct list_head *, struct list_head *, diff --git a/trunk/include/linux/nfs_xdr.h b/trunk/include/linux/nfs_xdr.h index 9ee9da5e1cc9..768c1ad5ff6f 100644 --- a/trunk/include/linux/nfs_xdr.h +++ b/trunk/include/linux/nfs_xdr.h @@ -785,6 +785,8 @@ struct nfs_rpc_ops { int (*readlink)(struct inode *, struct page *, unsigned int, unsigned int); int (*read) (struct nfs_read_data *); + int (*write) (struct nfs_write_data *); + int (*commit) (struct nfs_write_data *); int (*create) (struct inode *, struct dentry *, struct iattr *, int, struct nameidata *); int (*remove) (struct inode *, struct qstr *); diff --git a/trunk/include/linux/sunrpc/auth_gss.h b/trunk/include/linux/sunrpc/auth_gss.h index 2db2fbf34947..97b62e97dd8d 100644 --- a/trunk/include/linux/sunrpc/auth_gss.h +++ b/trunk/include/linux/sunrpc/auth_gss.h @@ -90,6 +90,8 @@ struct gss_cred { #define gc_flags gc_base.cr_flags #define gc_expire gc_base.cr_expire +void print_hexl(u32 *p, u_int length, u_int offset); + #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_AUTH_GSS_H */ diff --git a/trunk/include/linux/sunrpc/clnt.h b/trunk/include/linux/sunrpc/clnt.h index a1be89deb3af..f6d1d646ce05 100644 --- a/trunk/include/linux/sunrpc/clnt.h +++ b/trunk/include/linux/sunrpc/clnt.h @@ -53,7 +53,6 @@ struct rpc_clnt { struct dentry * cl_dentry; /* inode */ struct rpc_clnt * cl_parent; /* Points to parent of clones */ struct rpc_rtt cl_rtt_default; - struct rpc_program * cl_program; char cl_inline_name[32]; }; diff --git a/trunk/include/linux/sunrpc/debug.h b/trunk/include/linux/sunrpc/debug.h index 60fce3c92857..e4729aa67654 100644 --- a/trunk/include/linux/sunrpc/debug.h +++ b/trunk/include/linux/sunrpc/debug.h @@ -62,6 +62,12 @@ extern unsigned int nlm_debug; # define RPC_IFDEBUG(x) #endif +#ifdef RPC_PROFILE +# define pprintk(args...) printk(## args) +#else +# define pprintk(args...) do ; while (0) +#endif + /* * Sysctl interface for RPC debugging */ diff --git a/trunk/include/linux/sunrpc/gss_krb5.h b/trunk/include/linux/sunrpc/gss_krb5.h index 5a4b1e0206e3..e30ba201910a 100644 --- a/trunk/include/linux/sunrpc/gss_krb5.h +++ b/trunk/include/linux/sunrpc/gss_krb5.h @@ -42,6 +42,10 @@ struct krb5_ctx { int initiate; /* 1 = initiating, 0 = accepting */ + int seed_init; + unsigned char seed[16]; + int signalg; + int sealalg; struct crypto_blkcipher *enc; struct crypto_blkcipher *seq; s32 endtime; @@ -113,7 +117,7 @@ enum seal_alg { #define ENCTYPE_UNKNOWN 0x01ff s32 -make_checksum(char *, char *header, int hdrlen, struct xdr_buf *body, +make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body, int body_offset, struct xdr_netobj *cksum); u32 gss_get_mic_kerberos(struct gss_ctx *, struct xdr_buf *, diff --git a/trunk/include/linux/sunrpc/gss_spkm3.h b/trunk/include/linux/sunrpc/gss_spkm3.h index e3e6a3437f8b..2cf3fbb40b4f 100644 --- a/trunk/include/linux/sunrpc/gss_spkm3.h +++ b/trunk/include/linux/sunrpc/gss_spkm3.h @@ -12,19 +12,27 @@ #include struct spkm3_ctx { - struct xdr_netobj ctx_id; /* per message context id */ - int endtime; /* endtime of the context */ + struct xdr_netobj ctx_id; /* per message context id */ + int qop; /* negotiated qop */ struct xdr_netobj mech_used; unsigned int ret_flags ; - struct xdr_netobj conf_alg; - struct xdr_netobj derived_conf_key; - struct xdr_netobj intg_alg; - struct xdr_netobj derived_integ_key; + unsigned int req_flags ; + struct xdr_netobj share_key; + int conf_alg; + struct crypto_blkcipher *derived_conf_key; + int intg_alg; + struct crypto_blkcipher *derived_integ_key; + int keyestb_alg; /* alg used to get share_key */ + int owf_alg; /* one way function */ }; -/* OIDs declarations for K-ALG, I-ALG, C-ALG, and OWF-ALG */ -extern const struct xdr_netobj hmac_md5_oid; -extern const struct xdr_netobj cast5_cbc_oid; +/* from openssl/objects.h */ +/* XXX need SEAL_ALG_NONE */ +#define NID_md5 4 +#define NID_dhKeyAgreement 28 +#define NID_des_cbc 31 +#define NID_sha1 64 +#define NID_cast5_cbc 108 /* SPKM InnerContext Token types */ @@ -38,13 +46,11 @@ u32 spkm3_make_token(struct spkm3_ctx *ctx, struct xdr_buf * text, struct xdr_ne u32 spkm3_read_token(struct spkm3_ctx *ctx, struct xdr_netobj *read_token, struct xdr_buf *message_buffer, int toktype); #define CKSUMTYPE_RSA_MD5 0x0007 -#define CKSUMTYPE_HMAC_MD5 0x0008 -s32 make_spkm3_checksum(s32 cksumtype, struct xdr_netobj *key, char *header, - unsigned int hdrlen, struct xdr_buf *body, - unsigned int body_offset, struct xdr_netobj *cksum); +s32 make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body, + int body_offset, struct xdr_netobj *cksum); void asn1_bitstring_len(struct xdr_netobj *in, int *enclen, int *zerobits); -int decode_asn1_bitstring(struct xdr_netobj *out, char *in, int enclen, +int decode_asn1_bitstring(struct xdr_netobj *out, char *in, int enclen, int explen); void spkm3_mic_header(unsigned char **hdrbuf, unsigned int *hdrlen, unsigned char *ctxhdr, int elen, int zbit); diff --git a/trunk/include/linux/sunrpc/sched.h b/trunk/include/linux/sunrpc/sched.h index b6b6ad6253b4..f399c138f79d 100644 --- a/trunk/include/linux/sunrpc/sched.h +++ b/trunk/include/linux/sunrpc/sched.h @@ -11,7 +11,6 @@ #include #include -#include #include #include #include @@ -86,7 +85,6 @@ struct rpc_task { union { struct work_struct tk_work; /* Async task work queue */ struct rpc_wait tk_wait; /* RPC wait */ - struct rcu_head tk_rcu; /* for task deletion */ } u; unsigned short tk_timeouts; /* maj timeouts */ @@ -180,6 +178,13 @@ struct rpc_call_ops { } while (0) #define RPC_IS_ACTIVATED(t) (test_bit(RPC_TASK_ACTIVE, &(t)->tk_runstate)) +#define rpc_set_active(t) (set_bit(RPC_TASK_ACTIVE, &(t)->tk_runstate)) +#define rpc_clear_active(t) \ + do { \ + smp_mb__before_clear_bit(); \ + clear_bit(RPC_TASK_ACTIVE, &(t)->tk_runstate); \ + smp_mb__after_clear_bit(); \ + } while(0) /* * Task priorities. @@ -249,10 +254,8 @@ struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags, void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, int flags, const struct rpc_call_ops *ops, void *data); -void rpc_put_task(struct rpc_task *); void rpc_release_task(struct rpc_task *); void rpc_exit_task(struct rpc_task *); -void rpc_release_calldata(const struct rpc_call_ops *, void *); void rpc_killall_tasks(struct rpc_clnt *); int rpc_execute(struct rpc_task *); void rpc_init_priority_wait_queue(struct rpc_wait_queue *, const char *); diff --git a/trunk/include/linux/sunrpc/xdr.h b/trunk/include/linux/sunrpc/xdr.h index 9e340fa23c06..9a527c364394 100644 --- a/trunk/include/linux/sunrpc/xdr.h +++ b/trunk/include/linux/sunrpc/xdr.h @@ -11,7 +11,6 @@ #include #include -#include /* * Buffer adjustment @@ -140,30 +139,29 @@ xdr_adjust_iovec(struct kvec *iov, __be32 *p) */ extern void xdr_shift_buf(struct xdr_buf *, size_t); extern void xdr_buf_from_iov(struct kvec *, struct xdr_buf *); -extern int xdr_buf_subsegment(struct xdr_buf *, struct xdr_buf *, unsigned int, unsigned int); -extern int xdr_buf_read_netobj(struct xdr_buf *, struct xdr_netobj *, unsigned int); -extern int read_bytes_from_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); -extern int write_bytes_to_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); +extern int xdr_buf_subsegment(struct xdr_buf *, struct xdr_buf *, int, int); +extern int xdr_buf_read_netobj(struct xdr_buf *, struct xdr_netobj *, int); +extern int read_bytes_from_xdr_buf(struct xdr_buf *, int, void *, int); +extern int write_bytes_to_xdr_buf(struct xdr_buf *, int, void *, int); /* * Helper structure for copying from an sk_buff. */ -struct xdr_skb_reader { +typedef struct { struct sk_buff *skb; unsigned int offset; size_t count; __wsum csum; -}; +} skb_reader_t; -typedef size_t (*xdr_skb_read_actor)(struct xdr_skb_reader *desc, void *to, size_t len); +typedef size_t (*skb_read_actor_t)(skb_reader_t *desc, void *to, size_t len); -size_t xdr_skb_read_bits(struct xdr_skb_reader *desc, void *to, size_t len); extern int csum_partial_copy_to_xdr(struct xdr_buf *, struct sk_buff *); extern ssize_t xdr_partial_copy_from_skb(struct xdr_buf *, unsigned int, - struct xdr_skb_reader *, xdr_skb_read_actor); + skb_reader_t *, skb_read_actor_t); -extern int xdr_encode_word(struct xdr_buf *, unsigned int, u32); -extern int xdr_decode_word(struct xdr_buf *, unsigned int, u32 *); +extern int xdr_encode_word(struct xdr_buf *, int, u32); +extern int xdr_decode_word(struct xdr_buf *, int, u32 *); struct xdr_array2_desc; typedef int (*xdr_xcode_elem_t)(struct xdr_array2_desc *desc, void *elem); @@ -198,7 +196,6 @@ extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes); extern void xdr_read_pages(struct xdr_stream *xdr, unsigned int len); extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len); -extern int xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len, int (*actor)(struct scatterlist *, void *), void *data); #endif /* __KERNEL__ */ diff --git a/trunk/include/linux/sunrpc/xprt.h b/trunk/include/linux/sunrpc/xprt.h index f780e72fc417..60394fbc4c70 100644 --- a/trunk/include/linux/sunrpc/xprt.h +++ b/trunk/include/linux/sunrpc/xprt.h @@ -106,6 +106,7 @@ struct rpc_rqst { struct rpc_xprt_ops { void (*set_buffer_size)(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize); + char * (*print_addr)(struct rpc_xprt *xprt, enum rpc_display_format_t format); int (*reserve_xprt)(struct rpc_task *task); void (*release_xprt)(struct rpc_xprt *xprt, struct rpc_task *task); void (*rpcbind)(struct rpc_task *task); @@ -125,6 +126,8 @@ struct rpc_xprt_ops { struct rpc_xprt { struct kref kref; /* Reference count */ struct rpc_xprt_ops * ops; /* transport methods */ + struct socket * sock; /* BSD socket layer */ + struct sock * inet; /* INET layer */ struct rpc_timeout timeout; /* timeout parms */ struct sockaddr_storage addr; /* server address */ @@ -134,6 +137,9 @@ struct rpc_xprt { unsigned long cong; /* current congestion */ unsigned long cwnd; /* congestion window */ + size_t rcvsize, /* transport rcv buffer size */ + sndsize; /* transport send buffer size */ + size_t max_payload; /* largest RPC payload size, in bytes */ unsigned int tsh_size; /* size of transport specific @@ -151,12 +157,28 @@ struct rpc_xprt { unsigned char shutdown : 1, /* being shut down */ resvport : 1; /* use a reserved port */ + /* + * XID + */ + __u32 xid; /* Next XID value to use */ + + /* + * State of TCP reply receive stuff + */ + __be32 tcp_recm, /* Fragment header */ + tcp_xid; /* Current XID */ + u32 tcp_reclen, /* fragment length */ + tcp_offset; /* fragment offset */ + unsigned long tcp_copied, /* copied to request */ + tcp_flags; /* * Connection of transports */ unsigned long connect_timeout, bind_timeout, reestablish_timeout; + struct work_struct connect_worker; + unsigned short port; /* * Disconnection of idle transports @@ -171,8 +193,8 @@ struct rpc_xprt { */ spinlock_t transport_lock; /* lock transport info */ spinlock_t reserve_lock; /* lock slot table */ - u32 xid; /* Next XID value to use */ struct rpc_task * snd_task; /* Task blocked in send */ + struct list_head recv; struct { @@ -188,9 +210,18 @@ struct rpc_xprt { bklog_u; /* backlog queue utilization */ } stat; + void (*old_data_ready)(struct sock *, int); + void (*old_state_change)(struct sock *); + void (*old_write_space)(struct sock *); + char * address_strings[RPC_DISPLAY_MAX]; }; +#define XPRT_LAST_FRAG (1 << 0) +#define XPRT_COPY_RECM (1 << 1) +#define XPRT_COPY_XID (1 << 2) +#define XPRT_COPY_DATA (1 << 3) + #ifdef __KERNEL__ /* @@ -239,8 +270,8 @@ void xprt_disconnect(struct rpc_xprt *xprt); /* * Socket transport setup operations */ -struct rpc_xprt * xs_setup_udp(struct sockaddr *addr, size_t addrlen, struct rpc_timeout *to); -struct rpc_xprt * xs_setup_tcp(struct sockaddr *addr, size_t addrlen, struct rpc_timeout *to); +int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to); +int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to); /* * Reserved bit positions in xprt->state diff --git a/trunk/net/sunrpc/auth_gss/auth_gss.c b/trunk/net/sunrpc/auth_gss/auth_gss.c index a02ecc1f230d..e5a84a482e57 100644 --- a/trunk/net/sunrpc/auth_gss/auth_gss.c +++ b/trunk/net/sunrpc/auth_gss/auth_gss.c @@ -68,7 +68,7 @@ static struct rpc_credops gss_credops; #define GSS_CRED_SLACK 1024 /* XXX: unused */ /* length of a krb5 verifier (48), plus data added before arguments when * using integrity (two 4-byte integers): */ -#define GSS_VERF_SLACK 100 +#define GSS_VERF_SLACK 56 /* XXX this define must match the gssd define * as it is passed to gssd to signal the use of @@ -94,6 +94,46 @@ struct gss_auth { static void gss_destroy_ctx(struct gss_cl_ctx *); static struct rpc_pipe_ops gss_upcall_ops; +void +print_hexl(u32 *p, u_int length, u_int offset) +{ + u_int i, j, jm; + u8 c, *cp; + + dprintk("RPC: print_hexl: length %d\n",length); + dprintk("\n"); + cp = (u8 *) p; + + for (i = 0; i < length; i += 0x10) { + dprintk(" %04x: ", (u_int)(i + offset)); + jm = length - i; + jm = jm > 16 ? 16 : jm; + + for (j = 0; j < jm; j++) { + if ((j % 2) == 1) + dprintk("%02x ", (u_int)cp[i+j]); + else + dprintk("%02x", (u_int)cp[i+j]); + } + for (; j < 16; j++) { + if ((j % 2) == 1) + dprintk(" "); + else + dprintk(" "); + } + dprintk(" "); + + for (j = 0; j < jm; j++) { + c = cp[i+j]; + c = isprint(c) ? c : '.'; + dprintk("%c", c); + } + dprintk("\n"); + } +} + +EXPORT_SYMBOL(print_hexl); + static inline struct gss_cl_ctx * gss_get_ctx(struct gss_cl_ctx *ctx) { diff --git a/trunk/net/sunrpc/auth_gss/gss_krb5_crypto.c b/trunk/net/sunrpc/auth_gss/gss_krb5_crypto.c index d926cda88623..e11a40b25cce 100644 --- a/trunk/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/trunk/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -43,7 +43,6 @@ #include #include #include -#include #ifdef RPC_DEBUG # define RPCDBG_FACILITY RPCDBG_AUTH @@ -62,6 +61,9 @@ krb5_encrypt( u8 local_iv[16] = {0}; struct blkcipher_desc desc = { .tfm = tfm, .info = local_iv }; + dprintk("RPC: krb5_encrypt: input data:\n"); + print_hexl((u32 *)in, length, 0); + if (length % crypto_blkcipher_blocksize(tfm) != 0) goto out; @@ -78,9 +80,12 @@ krb5_encrypt( sg_set_buf(sg, out, length); ret = crypto_blkcipher_encrypt_iv(&desc, sg, sg, length); + + dprintk("RPC: krb5_encrypt: output data:\n"); + print_hexl((u32 *)out, length, 0); out: dprintk("RPC: krb5_encrypt returns %d\n",ret); - return ret; + return(ret); } EXPORT_SYMBOL(krb5_encrypt); @@ -98,6 +103,9 @@ krb5_decrypt( u8 local_iv[16] = {0}; struct blkcipher_desc desc = { .tfm = tfm, .info = local_iv }; + dprintk("RPC: krb5_decrypt: input data:\n"); + print_hexl((u32 *)in, length, 0); + if (length % crypto_blkcipher_blocksize(tfm) != 0) goto out; @@ -113,13 +121,82 @@ krb5_decrypt( sg_set_buf(sg, out, length); ret = crypto_blkcipher_decrypt_iv(&desc, sg, sg, length); + + dprintk("RPC: krb5_decrypt: output_data:\n"); + print_hexl((u32 *)out, length, 0); out: dprintk("RPC: gss_k5decrypt returns %d\n",ret); - return ret; + return(ret); } EXPORT_SYMBOL(krb5_decrypt); +static int +process_xdr_buf(struct xdr_buf *buf, int offset, int len, + int (*actor)(struct scatterlist *, void *), void *data) +{ + int i, page_len, thislen, page_offset, ret = 0; + struct scatterlist sg[1]; + + if (offset >= buf->head[0].iov_len) { + offset -= buf->head[0].iov_len; + } else { + thislen = buf->head[0].iov_len - offset; + if (thislen > len) + thislen = len; + sg_set_buf(sg, buf->head[0].iov_base + offset, thislen); + ret = actor(sg, data); + if (ret) + goto out; + offset = 0; + len -= thislen; + } + if (len == 0) + goto out; + + if (offset >= buf->page_len) { + offset -= buf->page_len; + } else { + page_len = buf->page_len - offset; + if (page_len > len) + page_len = len; + len -= page_len; + page_offset = (offset + buf->page_base) & (PAGE_CACHE_SIZE - 1); + i = (offset + buf->page_base) >> PAGE_CACHE_SHIFT; + thislen = PAGE_CACHE_SIZE - page_offset; + do { + if (thislen > page_len) + thislen = page_len; + sg->page = buf->pages[i]; + sg->offset = page_offset; + sg->length = thislen; + ret = actor(sg, data); + if (ret) + goto out; + page_len -= thislen; + i++; + page_offset = 0; + thislen = PAGE_CACHE_SIZE; + } while (page_len != 0); + offset = 0; + } + if (len == 0) + goto out; + + if (offset < buf->tail[0].iov_len) { + thislen = buf->tail[0].iov_len - offset; + if (thislen > len) + thislen = len; + sg_set_buf(sg, buf->tail[0].iov_base + offset, thislen); + ret = actor(sg, data); + len -= thislen; + } + if (len != 0) + ret = -EINVAL; +out: + return ret; +} + static int checksummer(struct scatterlist *sg, void *data) { @@ -130,13 +207,23 @@ checksummer(struct scatterlist *sg, void *data) /* checksum the plaintext data and hdrlen bytes of the token header */ s32 -make_checksum(char *cksumname, char *header, int hdrlen, struct xdr_buf *body, +make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body, int body_offset, struct xdr_netobj *cksum) { + char *cksumname; struct hash_desc desc; /* XXX add to ctx? */ struct scatterlist sg[1]; int err; + switch (cksumtype) { + case CKSUMTYPE_RSA_MD5: + cksumname = "md5"; + break; + default: + dprintk("RPC: krb5_make_checksum:" + " unsupported checksum %d", cksumtype); + return GSS_S_FAILURE; + } desc.tfm = crypto_alloc_hash(cksumname, 0, CRYPTO_ALG_ASYNC); if (IS_ERR(desc.tfm)) return GSS_S_FAILURE; @@ -150,7 +237,7 @@ make_checksum(char *cksumname, char *header, int hdrlen, struct xdr_buf *body, err = crypto_hash_update(&desc, sg, hdrlen); if (err) goto out; - err = xdr_process_buf(body, body_offset, body->len - body_offset, + err = process_xdr_buf(body, body_offset, body->len - body_offset, checksummer, &desc); if (err) goto out; @@ -248,7 +335,7 @@ gss_encrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf, desc.fragno = 0; desc.fraglen = 0; - ret = xdr_process_buf(buf, offset, buf->len - offset, encryptor, &desc); + ret = process_xdr_buf(buf, offset, buf->len - offset, encryptor, &desc); return ret; } @@ -314,7 +401,7 @@ gss_decrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf, desc.desc.flags = 0; desc.fragno = 0; desc.fraglen = 0; - return xdr_process_buf(buf, offset, buf->len - offset, decryptor, &desc); + return process_xdr_buf(buf, offset, buf->len - offset, decryptor, &desc); } EXPORT_SYMBOL(gss_decrypt_xdr_buf); diff --git a/trunk/net/sunrpc/auth_gss/gss_krb5_mech.c b/trunk/net/sunrpc/auth_gss/gss_krb5_mech.c index 05d4bee86fc0..754b8cd6439f 100644 --- a/trunk/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/trunk/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -129,7 +129,6 @@ gss_import_sec_context_kerberos(const void *p, { const void *end = (const void *)((const char *)p + len); struct krb5_ctx *ctx; - int tmp; if (!(ctx = kzalloc(sizeof(*ctx), GFP_KERNEL))) goto out_err; @@ -137,22 +136,17 @@ gss_import_sec_context_kerberos(const void *p, p = simple_get_bytes(p, end, &ctx->initiate, sizeof(ctx->initiate)); if (IS_ERR(p)) goto out_err_free_ctx; - /* The downcall format was designed before we completely understood - * the uses of the context fields; so it includes some stuff we - * just give some minimal sanity-checking, and some we ignore - * completely (like the next twenty bytes): */ - if (unlikely(p + 20 > end || p + 20 < p)) - goto out_err_free_ctx; - p += 20; - p = simple_get_bytes(p, end, &tmp, sizeof(tmp)); + p = simple_get_bytes(p, end, &ctx->seed_init, sizeof(ctx->seed_init)); if (IS_ERR(p)) goto out_err_free_ctx; - if (tmp != SGN_ALG_DES_MAC_MD5) + p = simple_get_bytes(p, end, ctx->seed, sizeof(ctx->seed)); + if (IS_ERR(p)) goto out_err_free_ctx; - p = simple_get_bytes(p, end, &tmp, sizeof(tmp)); + p = simple_get_bytes(p, end, &ctx->signalg, sizeof(ctx->signalg)); if (IS_ERR(p)) goto out_err_free_ctx; - if (tmp != SEAL_ALG_DES) + p = simple_get_bytes(p, end, &ctx->sealalg, sizeof(ctx->sealalg)); + if (IS_ERR(p)) goto out_err_free_ctx; p = simple_get_bytes(p, end, &ctx->endtime, sizeof(ctx->endtime)); if (IS_ERR(p)) diff --git a/trunk/net/sunrpc/auth_gss/gss_krb5_seal.c b/trunk/net/sunrpc/auth_gss/gss_krb5_seal.c index d0bb5064f8c5..08601ee4cd73 100644 --- a/trunk/net/sunrpc/auth_gss/gss_krb5_seal.c +++ b/trunk/net/sunrpc/auth_gss/gss_krb5_seal.c @@ -77,6 +77,7 @@ gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text, struct xdr_netobj *token) { struct krb5_ctx *ctx = gss_ctx->internal_ctx_id; + s32 checksum_type; char cksumdata[16]; struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata}; unsigned char *ptr, *krb5_hdr, *msg_start; @@ -87,6 +88,21 @@ gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text, now = get_seconds(); + switch (ctx->signalg) { + case SGN_ALG_DES_MAC_MD5: + checksum_type = CKSUMTYPE_RSA_MD5; + break; + default: + dprintk("RPC: gss_krb5_seal: ctx->signalg %d not" + " supported\n", ctx->signalg); + goto out_err; + } + if (ctx->sealalg != SEAL_ALG_NONE && ctx->sealalg != SEAL_ALG_DES) { + dprintk("RPC: gss_krb5_seal: ctx->sealalg %d not supported\n", + ctx->sealalg); + goto out_err; + } + token->len = g_token_size(&ctx->mech_used, 22); ptr = token->data; @@ -99,26 +115,37 @@ gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text, krb5_hdr = ptr - 2; msg_start = krb5_hdr + 24; - *(__be16 *)(krb5_hdr + 2) = htons(SGN_ALG_DES_MAC_MD5); + *(__be16 *)(krb5_hdr + 2) = htons(ctx->signalg); memset(krb5_hdr + 4, 0xff, 4); - if (make_checksum("md5", krb5_hdr, 8, text, 0, &md5cksum)) - return GSS_S_FAILURE; - - if (krb5_encrypt(ctx->seq, NULL, md5cksum.data, - md5cksum.data, md5cksum.len)) - return GSS_S_FAILURE; - - memcpy(krb5_hdr + 16, md5cksum.data + md5cksum.len - KRB5_CKSUM_LENGTH, - KRB5_CKSUM_LENGTH); + if (make_checksum(checksum_type, krb5_hdr, 8, text, 0, &md5cksum)) + goto out_err; + + switch (ctx->signalg) { + case SGN_ALG_DES_MAC_MD5: + if (krb5_encrypt(ctx->seq, NULL, md5cksum.data, + md5cksum.data, md5cksum.len)) + goto out_err; + memcpy(krb5_hdr + 16, + md5cksum.data + md5cksum.len - KRB5_CKSUM_LENGTH, + KRB5_CKSUM_LENGTH); + + dprintk("RPC: make_seal_token: cksum data: \n"); + print_hexl((u32 *) (krb5_hdr + 16), KRB5_CKSUM_LENGTH, 0); + break; + default: + BUG(); + } spin_lock(&krb5_seq_lock); seq_send = ctx->seq_send++; spin_unlock(&krb5_seq_lock); - if (krb5_make_seq_num(ctx->seq, ctx->initiate ? 0 : 0xff, - ctx->seq_send, krb5_hdr + 16, krb5_hdr + 8)) - return GSS_S_FAILURE; + if ((krb5_make_seq_num(ctx->seq, ctx->initiate ? 0 : 0xff, + seq_send, krb5_hdr + 16, krb5_hdr + 8))) + goto out_err; - return (ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE; + return ((ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE); +out_err: + return GSS_S_FAILURE; } diff --git a/trunk/net/sunrpc/auth_gss/gss_krb5_unseal.c b/trunk/net/sunrpc/auth_gss/gss_krb5_unseal.c index 87f8977ccece..0828cf64100f 100644 --- a/trunk/net/sunrpc/auth_gss/gss_krb5_unseal.c +++ b/trunk/net/sunrpc/auth_gss/gss_krb5_unseal.c @@ -78,6 +78,7 @@ gss_verify_mic_kerberos(struct gss_ctx *gss_ctx, struct krb5_ctx *ctx = gss_ctx->internal_ctx_id; int signalg; int sealalg; + s32 checksum_type; char cksumdata[16]; struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata}; s32 now; @@ -85,54 +86,96 @@ gss_verify_mic_kerberos(struct gss_ctx *gss_ctx, s32 seqnum; unsigned char *ptr = (unsigned char *)read_token->data; int bodysize; + u32 ret = GSS_S_DEFECTIVE_TOKEN; dprintk("RPC: krb5_read_token\n"); if (g_verify_token_header(&ctx->mech_used, &bodysize, &ptr, read_token->len)) - return GSS_S_DEFECTIVE_TOKEN; + goto out; if ((*ptr++ != ((KG_TOK_MIC_MSG>>8)&0xff)) || (*ptr++ != ( KG_TOK_MIC_MSG &0xff)) ) - return GSS_S_DEFECTIVE_TOKEN; + goto out; /* XXX sanity-check bodysize?? */ - signalg = ptr[0] + (ptr[1] << 8); - if (signalg != SGN_ALG_DES_MAC_MD5) - return GSS_S_DEFECTIVE_TOKEN; + /* get the sign and seal algorithms */ + signalg = ptr[0] + (ptr[1] << 8); sealalg = ptr[2] + (ptr[3] << 8); - if (sealalg != SEAL_ALG_NONE) - return GSS_S_DEFECTIVE_TOKEN; - - if ((ptr[4] != 0xff) || (ptr[5] != 0xff)) - return GSS_S_DEFECTIVE_TOKEN; - - if (make_checksum("md5", ptr - 2, 8, message_buffer, 0, &md5cksum)) - return GSS_S_FAILURE; - if (krb5_encrypt(ctx->seq, NULL, md5cksum.data, md5cksum.data, 16)) - return GSS_S_FAILURE; + /* Sanity checks */ - if (memcmp(md5cksum.data + 8, ptr + 14, 8)) - return GSS_S_BAD_SIG; + if ((ptr[4] != 0xff) || (ptr[5] != 0xff)) + goto out; + + if (sealalg != 0xffff) + goto out; + + /* there are several mappings of seal algorithms to sign algorithms, + but few enough that we can try them all. */ + + if ((ctx->sealalg == SEAL_ALG_NONE && signalg > 1) || + (ctx->sealalg == SEAL_ALG_1 && signalg != SGN_ALG_3) || + (ctx->sealalg == SEAL_ALG_DES3KD && + signalg != SGN_ALG_HMAC_SHA1_DES3_KD)) + goto out; + + /* compute the checksum of the message */ + + /* initialize the the cksum */ + switch (signalg) { + case SGN_ALG_DES_MAC_MD5: + checksum_type = CKSUMTYPE_RSA_MD5; + break; + default: + ret = GSS_S_DEFECTIVE_TOKEN; + goto out; + } + + switch (signalg) { + case SGN_ALG_DES_MAC_MD5: + ret = make_checksum(checksum_type, ptr - 2, 8, + message_buffer, 0, &md5cksum); + if (ret) + goto out; + + ret = krb5_encrypt(ctx->seq, NULL, md5cksum.data, + md5cksum.data, 16); + if (ret) + goto out; + + if (memcmp(md5cksum.data + 8, ptr + 14, 8)) { + ret = GSS_S_BAD_SIG; + goto out; + } + break; + default: + ret = GSS_S_DEFECTIVE_TOKEN; + goto out; + } /* it got through unscathed. Make sure the context is unexpired */ now = get_seconds(); + ret = GSS_S_CONTEXT_EXPIRED; if (now > ctx->endtime) - return GSS_S_CONTEXT_EXPIRED; + goto out; /* do sequencing checks */ - if (krb5_get_seq_num(ctx->seq, ptr + 14, ptr + 6, &direction, &seqnum)) - return GSS_S_FAILURE; + ret = GSS_S_BAD_SIG; + if ((ret = krb5_get_seq_num(ctx->seq, ptr + 14, ptr + 6, &direction, + &seqnum))) + goto out; if ((ctx->initiate && direction != 0xff) || (!ctx->initiate && direction != 0)) - return GSS_S_BAD_SIG; + goto out; - return GSS_S_COMPLETE; + ret = GSS_S_COMPLETE; +out: + return ret; } diff --git a/trunk/net/sunrpc/auth_gss/gss_krb5_wrap.c b/trunk/net/sunrpc/auth_gss/gss_krb5_wrap.c index fe25b3d898dc..cc45c1605f80 100644 --- a/trunk/net/sunrpc/auth_gss/gss_krb5_wrap.c +++ b/trunk/net/sunrpc/auth_gss/gss_krb5_wrap.c @@ -57,9 +57,9 @@ gss_krb5_remove_padding(struct xdr_buf *buf, int blocksize) >>PAGE_CACHE_SHIFT; int offset = (buf->page_base + len - 1) & (PAGE_CACHE_SIZE - 1); - ptr = kmap_atomic(buf->pages[last], KM_USER0); + ptr = kmap_atomic(buf->pages[last], KM_SKB_SUNRPC_DATA); pad = *(ptr + offset); - kunmap_atomic(ptr, KM_USER0); + kunmap_atomic(ptr, KM_SKB_SUNRPC_DATA); goto out; } else len -= buf->page_len; @@ -120,6 +120,7 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf, struct page **pages) { struct krb5_ctx *kctx = ctx->internal_ctx_id; + s32 checksum_type; char cksumdata[16]; struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata}; int blocksize = 0, plainlen; @@ -133,6 +134,21 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset, now = get_seconds(); + switch (kctx->signalg) { + case SGN_ALG_DES_MAC_MD5: + checksum_type = CKSUMTYPE_RSA_MD5; + break; + default: + dprintk("RPC: gss_krb5_seal: kctx->signalg %d not" + " supported\n", kctx->signalg); + goto out_err; + } + if (kctx->sealalg != SEAL_ALG_NONE && kctx->sealalg != SEAL_ALG_DES) { + dprintk("RPC: gss_krb5_seal: kctx->sealalg %d not supported\n", + kctx->sealalg); + goto out_err; + } + blocksize = crypto_blkcipher_blocksize(kctx->enc); gss_krb5_add_padding(buf, offset, blocksize); BUG_ON((buf->len - offset) % blocksize); @@ -159,27 +175,37 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset, /* ptr now at byte 2 of header described in rfc 1964, section 1.2.1: */ krb5_hdr = ptr - 2; msg_start = krb5_hdr + 24; + /* XXXJBF: */ BUG_ON(buf->head[0].iov_base + offset + headlen != msg_start + blocksize); - *(__be16 *)(krb5_hdr + 2) = htons(SGN_ALG_DES_MAC_MD5); + *(__be16 *)(krb5_hdr + 2) = htons(kctx->signalg); memset(krb5_hdr + 4, 0xff, 4); - *(__be16 *)(krb5_hdr + 4) = htons(SEAL_ALG_DES); + *(__be16 *)(krb5_hdr + 4) = htons(kctx->sealalg); make_confounder(msg_start, blocksize); /* XXXJBF: UGH!: */ tmp_pages = buf->pages; buf->pages = pages; - if (make_checksum("md5", krb5_hdr, 8, buf, + if (make_checksum(checksum_type, krb5_hdr, 8, buf, offset + headlen - blocksize, &md5cksum)) - return GSS_S_FAILURE; + goto out_err; buf->pages = tmp_pages; - if (krb5_encrypt(kctx->seq, NULL, md5cksum.data, - md5cksum.data, md5cksum.len)) - return GSS_S_FAILURE; - memcpy(krb5_hdr + 16, - md5cksum.data + md5cksum.len - KRB5_CKSUM_LENGTH, - KRB5_CKSUM_LENGTH); + switch (kctx->signalg) { + case SGN_ALG_DES_MAC_MD5: + if (krb5_encrypt(kctx->seq, NULL, md5cksum.data, + md5cksum.data, md5cksum.len)) + goto out_err; + memcpy(krb5_hdr + 16, + md5cksum.data + md5cksum.len - KRB5_CKSUM_LENGTH, + KRB5_CKSUM_LENGTH); + + dprintk("RPC: make_seal_token: cksum data: \n"); + print_hexl((u32 *) (krb5_hdr + 16), KRB5_CKSUM_LENGTH, 0); + break; + default: + BUG(); + } spin_lock(&krb5_seq_lock); seq_send = kctx->seq_send++; @@ -189,13 +215,15 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset, * and encrypt at the same time: */ if ((krb5_make_seq_num(kctx->seq, kctx->initiate ? 0 : 0xff, seq_send, krb5_hdr + 16, krb5_hdr + 8))) - return GSS_S_FAILURE; + goto out_err; if (gss_encrypt_xdr_buf(kctx->enc, buf, offset + headlen - blocksize, pages)) - return GSS_S_FAILURE; + goto out_err; - return (kctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE; + return ((kctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE); +out_err: + return GSS_S_FAILURE; } u32 @@ -204,6 +232,7 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf) struct krb5_ctx *kctx = ctx->internal_ctx_id; int signalg; int sealalg; + s32 checksum_type; char cksumdata[16]; struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata}; s32 now; @@ -211,6 +240,7 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf) s32 seqnum; unsigned char *ptr; int bodysize; + u32 ret = GSS_S_DEFECTIVE_TOKEN; void *data_start, *orig_start; int data_len; int blocksize; @@ -220,58 +250,98 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf) ptr = (u8 *)buf->head[0].iov_base + offset; if (g_verify_token_header(&kctx->mech_used, &bodysize, &ptr, buf->len - offset)) - return GSS_S_DEFECTIVE_TOKEN; + goto out; if ((*ptr++ != ((KG_TOK_WRAP_MSG>>8)&0xff)) || (*ptr++ != (KG_TOK_WRAP_MSG &0xff)) ) - return GSS_S_DEFECTIVE_TOKEN; + goto out; /* XXX sanity-check bodysize?? */ /* get the sign and seal algorithms */ signalg = ptr[0] + (ptr[1] << 8); - if (signalg != SGN_ALG_DES_MAC_MD5) - return GSS_S_DEFECTIVE_TOKEN; - sealalg = ptr[2] + (ptr[3] << 8); - if (sealalg != SEAL_ALG_DES) - return GSS_S_DEFECTIVE_TOKEN; + + /* Sanity checks */ if ((ptr[4] != 0xff) || (ptr[5] != 0xff)) - return GSS_S_DEFECTIVE_TOKEN; + goto out; + + if (sealalg == 0xffff) + goto out; + + /* in the current spec, there is only one valid seal algorithm per + key type, so a simple comparison is ok */ + + if (sealalg != kctx->sealalg) + goto out; + + /* there are several mappings of seal algorithms to sign algorithms, + but few enough that we can try them all. */ + + if ((kctx->sealalg == SEAL_ALG_NONE && signalg > 1) || + (kctx->sealalg == SEAL_ALG_1 && signalg != SGN_ALG_3) || + (kctx->sealalg == SEAL_ALG_DES3KD && + signalg != SGN_ALG_HMAC_SHA1_DES3_KD)) + goto out; if (gss_decrypt_xdr_buf(kctx->enc, buf, ptr + 22 - (unsigned char *)buf->head[0].iov_base)) - return GSS_S_DEFECTIVE_TOKEN; - - if (make_checksum("md5", ptr - 2, 8, buf, - ptr + 22 - (unsigned char *)buf->head[0].iov_base, &md5cksum)) - return GSS_S_FAILURE; + goto out; - if (krb5_encrypt(kctx->seq, NULL, md5cksum.data, - md5cksum.data, md5cksum.len)) - return GSS_S_FAILURE; + /* compute the checksum of the message */ - if (memcmp(md5cksum.data + 8, ptr + 14, 8)) - return GSS_S_BAD_SIG; + /* initialize the the cksum */ + switch (signalg) { + case SGN_ALG_DES_MAC_MD5: + checksum_type = CKSUMTYPE_RSA_MD5; + break; + default: + ret = GSS_S_DEFECTIVE_TOKEN; + goto out; + } + + switch (signalg) { + case SGN_ALG_DES_MAC_MD5: + ret = make_checksum(checksum_type, ptr - 2, 8, buf, + ptr + 22 - (unsigned char *)buf->head[0].iov_base, &md5cksum); + if (ret) + goto out; + + ret = krb5_encrypt(kctx->seq, NULL, md5cksum.data, + md5cksum.data, md5cksum.len); + if (ret) + goto out; + + if (memcmp(md5cksum.data + 8, ptr + 14, 8)) { + ret = GSS_S_BAD_SIG; + goto out; + } + break; + default: + ret = GSS_S_DEFECTIVE_TOKEN; + goto out; + } /* it got through unscathed. Make sure the context is unexpired */ now = get_seconds(); + ret = GSS_S_CONTEXT_EXPIRED; if (now > kctx->endtime) - return GSS_S_CONTEXT_EXPIRED; + goto out; /* do sequencing checks */ - if (krb5_get_seq_num(kctx->seq, ptr + 14, ptr + 6, &direction, - &seqnum)) - return GSS_S_BAD_SIG; + ret = GSS_S_BAD_SIG; + if ((ret = krb5_get_seq_num(kctx->seq, ptr + 14, ptr + 6, &direction, + &seqnum))) + goto out; if ((kctx->initiate && direction != 0xff) || (!kctx->initiate && direction != 0)) - return GSS_S_BAD_SIG; + goto out; /* Copy the data back to the right position. XXX: Would probably be * better to copy and encrypt at the same time. */ @@ -284,8 +354,11 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf) buf->head[0].iov_len -= (data_start - orig_start); buf->len -= (data_start - orig_start); + ret = GSS_S_DEFECTIVE_TOKEN; if (gss_krb5_remove_padding(buf, blocksize)) - return GSS_S_DEFECTIVE_TOKEN; + goto out; - return GSS_S_COMPLETE; + ret = GSS_S_COMPLETE; +out: + return ret; } diff --git a/trunk/net/sunrpc/auth_gss/gss_spkm3_mech.c b/trunk/net/sunrpc/auth_gss/gss_spkm3_mech.c index 41465072d0b5..d57f60838895 100644 --- a/trunk/net/sunrpc/auth_gss/gss_spkm3_mech.c +++ b/trunk/net/sunrpc/auth_gss/gss_spkm3_mech.c @@ -82,73 +82,133 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res) return q; } +static inline const void * +get_key(const void *p, const void *end, struct crypto_blkcipher **res, + int *resalg) +{ + struct xdr_netobj key = { 0 }; + int setkey = 0; + char *alg_name; + + p = simple_get_bytes(p, end, resalg, sizeof(*resalg)); + if (IS_ERR(p)) + goto out_err; + p = simple_get_netobj(p, end, &key); + if (IS_ERR(p)) + goto out_err; + + switch (*resalg) { + case NID_des_cbc: + alg_name = "cbc(des)"; + setkey = 1; + break; + case NID_cast5_cbc: + /* XXXX here in name only, not used */ + alg_name = "cbc(cast5)"; + setkey = 0; /* XXX will need to set to 1 */ + break; + case NID_md5: + if (key.len == 0) { + dprintk("RPC: SPKM3 get_key: NID_md5 zero Key length\n"); + } + alg_name = "md5"; + setkey = 0; + break; + default: + dprintk("gss_spkm3_mech: unsupported algorithm %d\n", *resalg); + goto out_err_free_key; + } + *res = crypto_alloc_blkcipher(alg_name, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(*res)) { + printk("gss_spkm3_mech: unable to initialize crypto algorthm %s\n", alg_name); + *res = NULL; + goto out_err_free_key; + } + if (setkey) { + if (crypto_blkcipher_setkey(*res, key.data, key.len)) { + printk("gss_spkm3_mech: error setting key for crypto algorthm %s\n", alg_name); + goto out_err_free_tfm; + } + } + + if(key.len > 0) + kfree(key.data); + return p; + +out_err_free_tfm: + crypto_free_blkcipher(*res); +out_err_free_key: + if(key.len > 0) + kfree(key.data); + p = ERR_PTR(-EINVAL); +out_err: + return p; +} + static int gss_import_sec_context_spkm3(const void *p, size_t len, struct gss_ctx *ctx_id) { const void *end = (const void *)((const char *)p + len); struct spkm3_ctx *ctx; - int version; if (!(ctx = kzalloc(sizeof(*ctx), GFP_KERNEL))) goto out_err; - p = simple_get_bytes(p, end, &version, sizeof(version)); - if (IS_ERR(p)) - goto out_err_free_ctx; - if (version != 1) { - dprintk("RPC: unknown spkm3 token format: obsolete nfs-utils?\n"); - goto out_err_free_ctx; - } - p = simple_get_netobj(p, end, &ctx->ctx_id); if (IS_ERR(p)) goto out_err_free_ctx; - p = simple_get_bytes(p, end, &ctx->endtime, sizeof(ctx->endtime)); + p = simple_get_bytes(p, end, &ctx->qop, sizeof(ctx->qop)); if (IS_ERR(p)) goto out_err_free_ctx_id; p = simple_get_netobj(p, end, &ctx->mech_used); if (IS_ERR(p)) - goto out_err_free_ctx_id; + goto out_err_free_mech; p = simple_get_bytes(p, end, &ctx->ret_flags, sizeof(ctx->ret_flags)); if (IS_ERR(p)) goto out_err_free_mech; - p = simple_get_netobj(p, end, &ctx->conf_alg); + p = simple_get_bytes(p, end, &ctx->req_flags, sizeof(ctx->req_flags)); if (IS_ERR(p)) goto out_err_free_mech; - p = simple_get_netobj(p, end, &ctx->derived_conf_key); + p = simple_get_netobj(p, end, &ctx->share_key); + if (IS_ERR(p)) + goto out_err_free_s_key; + + p = get_key(p, end, &ctx->derived_conf_key, &ctx->conf_alg); if (IS_ERR(p)) - goto out_err_free_conf_alg; + goto out_err_free_s_key; - p = simple_get_netobj(p, end, &ctx->intg_alg); + p = get_key(p, end, &ctx->derived_integ_key, &ctx->intg_alg); if (IS_ERR(p)) - goto out_err_free_conf_key; + goto out_err_free_key1; - p = simple_get_netobj(p, end, &ctx->derived_integ_key); + p = simple_get_bytes(p, end, &ctx->keyestb_alg, sizeof(ctx->keyestb_alg)); if (IS_ERR(p)) - goto out_err_free_intg_alg; + goto out_err_free_key2; + + p = simple_get_bytes(p, end, &ctx->owf_alg, sizeof(ctx->owf_alg)); + if (IS_ERR(p)) + goto out_err_free_key2; if (p != end) - goto out_err_free_intg_key; + goto out_err_free_key2; ctx_id->internal_ctx_id = ctx; dprintk("Successfully imported new spkm context.\n"); return 0; -out_err_free_intg_key: - kfree(ctx->derived_integ_key.data); -out_err_free_intg_alg: - kfree(ctx->intg_alg.data); -out_err_free_conf_key: - kfree(ctx->derived_conf_key.data); -out_err_free_conf_alg: - kfree(ctx->conf_alg.data); +out_err_free_key2: + crypto_free_blkcipher(ctx->derived_integ_key); +out_err_free_key1: + crypto_free_blkcipher(ctx->derived_conf_key); +out_err_free_s_key: + kfree(ctx->share_key.data); out_err_free_mech: kfree(ctx->mech_used.data); out_err_free_ctx_id: @@ -160,16 +220,13 @@ gss_import_sec_context_spkm3(const void *p, size_t len, } static void -gss_delete_sec_context_spkm3(void *internal_ctx) -{ +gss_delete_sec_context_spkm3(void *internal_ctx) { struct spkm3_ctx *sctx = internal_ctx; - kfree(sctx->derived_integ_key.data); - kfree(sctx->intg_alg.data); - kfree(sctx->derived_conf_key.data); - kfree(sctx->conf_alg.data); + crypto_free_blkcipher(sctx->derived_integ_key); + crypto_free_blkcipher(sctx->derived_conf_key); + kfree(sctx->share_key.data); kfree(sctx->mech_used.data); - kfree(sctx->ctx_id.data); kfree(sctx); } @@ -181,6 +238,7 @@ gss_verify_mic_spkm3(struct gss_ctx *ctx, u32 maj_stat = 0; struct spkm3_ctx *sctx = ctx->internal_ctx_id; + dprintk("RPC: gss_verify_mic_spkm3 calling spkm3_read_token\n"); maj_stat = spkm3_read_token(sctx, checksum, signbuf, SPKM_MIC_TOK); dprintk("RPC: gss_verify_mic_spkm3 returning %d\n", maj_stat); @@ -195,9 +253,10 @@ gss_get_mic_spkm3(struct gss_ctx *ctx, u32 err = 0; struct spkm3_ctx *sctx = ctx->internal_ctx_id; + dprintk("RPC: gss_get_mic_spkm3\n"); + err = spkm3_make_token(sctx, message_buffer, - message_token, SPKM_MIC_TOK); - dprintk("RPC: gss_get_mic_spkm3 returning %d\n", err); + message_token, SPKM_MIC_TOK); return err; } diff --git a/trunk/net/sunrpc/auth_gss/gss_spkm3_seal.c b/trunk/net/sunrpc/auth_gss/gss_spkm3_seal.c index b179d58c6249..18c7862bc234 100644 --- a/trunk/net/sunrpc/auth_gss/gss_spkm3_seal.c +++ b/trunk/net/sunrpc/auth_gss/gss_spkm3_seal.c @@ -39,17 +39,11 @@ #include #include #include -#include -#include -#include #ifdef RPC_DEBUG # define RPCDBG_FACILITY RPCDBG_AUTH #endif -const struct xdr_netobj hmac_md5_oid = { 8, "\x2B\x06\x01\x05\x05\x08\x01\x01"}; -const struct xdr_netobj cast5_cbc_oid = {9, "\x2A\x86\x48\x86\xF6\x7D\x07\x42\x0A"}; - /* * spkm3_make_token() * @@ -72,23 +66,29 @@ spkm3_make_token(struct spkm3_ctx *ctx, int ctxelen = 0, ctxzbit = 0; int md5elen = 0, md5zbit = 0; + dprintk("RPC: spkm3_make_token\n"); + now = jiffies; if (ctx->ctx_id.len != 16) { dprintk("RPC: spkm3_make_token BAD ctx_id.len %d\n", - ctx->ctx_id.len); + ctx->ctx_id.len); goto out_err; } - - if (!g_OID_equal(&ctx->intg_alg, &hmac_md5_oid)) { - dprintk("RPC: gss_spkm3_seal: unsupported I-ALG algorithm." - "only support hmac-md5 I-ALG.\n"); - goto out_err; - } else - checksum_type = CKSUMTYPE_HMAC_MD5; - - if (!g_OID_equal(&ctx->conf_alg, &cast5_cbc_oid)) { - dprintk("RPC: gss_spkm3_seal: unsupported C-ALG algorithm\n"); + + switch (ctx->intg_alg) { + case NID_md5: + checksum_type = CKSUMTYPE_RSA_MD5; + break; + default: + dprintk("RPC: gss_spkm3_seal: ctx->signalg %d not" + " supported\n", ctx->intg_alg); + goto out_err; + } + /* XXX since we don't support WRAP, perhaps we don't care... */ + if (ctx->conf_alg != NID_cast5_cbc) { + dprintk("RPC: gss_spkm3_seal: ctx->sealalg %d not supported\n", + ctx->conf_alg); goto out_err; } @@ -96,10 +96,10 @@ spkm3_make_token(struct spkm3_ctx *ctx, /* Calculate checksum over the mic-header */ asn1_bitstring_len(&ctx->ctx_id, &ctxelen, &ctxzbit); spkm3_mic_header(&mic_hdr.data, &mic_hdr.len, ctx->ctx_id.data, - ctxelen, ctxzbit); - if (make_spkm3_checksum(checksum_type, &ctx->derived_integ_key, - (char *)mic_hdr.data, mic_hdr.len, - text, 0, &md5cksum)) + ctxelen, ctxzbit); + + if (make_checksum(checksum_type, mic_hdr.data, mic_hdr.len, + text, 0, &md5cksum)) goto out_err; asn1_bitstring_len(&md5cksum, &md5elen, &md5zbit); @@ -121,66 +121,7 @@ spkm3_make_token(struct spkm3_ctx *ctx, return GSS_S_COMPLETE; out_err: - if (md5cksum.data) - kfree(md5cksum.data); - token->data = NULL; token->len = 0; return GSS_S_FAILURE; } - -static int -spkm3_checksummer(struct scatterlist *sg, void *data) -{ - struct hash_desc *desc = data; - - return crypto_hash_update(desc, sg, sg->length); -} - -/* checksum the plaintext data and hdrlen bytes of the token header */ -s32 -make_spkm3_checksum(s32 cksumtype, struct xdr_netobj *key, char *header, - unsigned int hdrlen, struct xdr_buf *body, - unsigned int body_offset, struct xdr_netobj *cksum) -{ - char *cksumname; - struct hash_desc desc; /* XXX add to ctx? */ - struct scatterlist sg[1]; - int err; - - switch (cksumtype) { - case CKSUMTYPE_HMAC_MD5: - cksumname = "md5"; - break; - default: - dprintk("RPC: spkm3_make_checksum:" - " unsupported checksum %d", cksumtype); - return GSS_S_FAILURE; - } - - if (key->data == NULL || key->len <= 0) return GSS_S_FAILURE; - - desc.tfm = crypto_alloc_hash(cksumname, 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(desc.tfm)) - return GSS_S_FAILURE; - cksum->len = crypto_hash_digestsize(desc.tfm); - desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP; - - err = crypto_hash_setkey(desc.tfm, key->data, key->len); - if (err) - goto out; - - sg_set_buf(sg, header, hdrlen); - crypto_hash_update(&desc, sg, 1); - - xdr_process_buf(body, body_offset, body->len - body_offset, - spkm3_checksummer, &desc); - crypto_hash_final(&desc, cksum->data); - -out: - crypto_free_hash(desc.tfm); - - return err ? GSS_S_FAILURE : 0; -} - -EXPORT_SYMBOL(make_spkm3_checksum); diff --git a/trunk/net/sunrpc/auth_gss/gss_spkm3_token.c b/trunk/net/sunrpc/auth_gss/gss_spkm3_token.c index 35188b6ea8f7..854a983ccf26 100644 --- a/trunk/net/sunrpc/auth_gss/gss_spkm3_token.c +++ b/trunk/net/sunrpc/auth_gss/gss_spkm3_token.c @@ -172,10 +172,10 @@ spkm3_mic_header(unsigned char **hdrbuf, unsigned int *hdrlen, unsigned char *ct *(u8 *)hptr++ = zbit; memcpy(hptr, ctxdata, elen); hptr += elen; - *hdrlen = hptr - top; + *hdrlen = hptr - top; } - -/* + +/* * spkm3_mic_innercontext_token() * * *tokp points to the beginning of the SPKM_MIC token described diff --git a/trunk/net/sunrpc/auth_gss/gss_spkm3_unseal.c b/trunk/net/sunrpc/auth_gss/gss_spkm3_unseal.c index e54581ca7570..8537f581ef9b 100644 --- a/trunk/net/sunrpc/auth_gss/gss_spkm3_unseal.c +++ b/trunk/net/sunrpc/auth_gss/gss_spkm3_unseal.c @@ -54,70 +54,70 @@ spkm3_read_token(struct spkm3_ctx *ctx, struct xdr_buf *message_buffer, /* signbuf */ int toktype) { - s32 checksum_type; s32 code; struct xdr_netobj wire_cksum = {.len =0, .data = NULL}; char cksumdata[16]; struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata}; unsigned char *ptr = (unsigned char *)read_token->data; - unsigned char *cksum; + unsigned char *cksum; int bodysize, md5elen; int mic_hdrlen; u32 ret = GSS_S_DEFECTIVE_TOKEN; + dprintk("RPC: spkm3_read_token read_token->len %d\n", read_token->len); + if (g_verify_token_header((struct xdr_netobj *) &ctx->mech_used, &bodysize, &ptr, read_token->len)) goto out; /* decode the token */ - if (toktype != SPKM_MIC_TOK) { - dprintk("RPC: BAD SPKM3 token type: %d\n", toktype); - goto out; - } - - if ((ret = spkm3_verify_mic_token(&ptr, &mic_hdrlen, &cksum))) - goto out; - - if (*cksum++ != 0x03) { - dprintk("RPC: spkm3_read_token BAD checksum type\n"); - goto out; - } - md5elen = *cksum++; - cksum++; /* move past the zbit */ - - if (!decode_asn1_bitstring(&wire_cksum, cksum, md5elen - 1, 16)) - goto out; - - /* HARD CODED FOR MD5 */ - - /* compute the checksum of the message. - * ptr + 2 = start of header piece of checksum - * mic_hdrlen + 2 = length of header piece of checksum - */ - ret = GSS_S_DEFECTIVE_TOKEN; - if (!g_OID_equal(&ctx->intg_alg, &hmac_md5_oid)) { - dprintk("RPC: gss_spkm3_seal: unsupported I-ALG algorithm\n"); - goto out; - } - - checksum_type = CKSUMTYPE_HMAC_MD5; - - code = make_spkm3_checksum(checksum_type, - &ctx->derived_integ_key, ptr + 2, mic_hdrlen + 2, - message_buffer, 0, &md5cksum); - - if (code) - goto out; - - ret = GSS_S_BAD_SIG; - code = memcmp(md5cksum.data, wire_cksum.data, wire_cksum.len); - if (code) { - dprintk("RPC: bad MIC checksum\n"); + if (toktype == SPKM_MIC_TOK) { + + if ((ret = spkm3_verify_mic_token(&ptr, &mic_hdrlen, &cksum))) + goto out; + + if (*cksum++ != 0x03) { + dprintk("RPC: spkm3_read_token BAD checksum type\n"); + goto out; + } + md5elen = *cksum++; + cksum++; /* move past the zbit */ + + if(!decode_asn1_bitstring(&wire_cksum, cksum, md5elen - 1, 16)) + goto out; + + /* HARD CODED FOR MD5 */ + + /* compute the checksum of the message. + * ptr + 2 = start of header piece of checksum + * mic_hdrlen + 2 = length of header piece of checksum + */ + ret = GSS_S_DEFECTIVE_TOKEN; + code = make_checksum(CKSUMTYPE_RSA_MD5, ptr + 2, + mic_hdrlen + 2, + message_buffer, 0, &md5cksum); + + if (code) + goto out; + + dprintk("RPC: spkm3_read_token: digest wire_cksum.len %d:\n", + wire_cksum.len); + dprintk(" md5cksum.data\n"); + print_hexl((u32 *) md5cksum.data, 16, 0); + dprintk(" cksum.data:\n"); + print_hexl((u32 *) wire_cksum.data, wire_cksum.len, 0); + + ret = GSS_S_BAD_SIG; + code = memcmp(md5cksum.data, wire_cksum.data, wire_cksum.len); + if (code) + goto out; + + } else { + dprintk("RPC: BAD or UNSUPPORTED SPKM3 token type: %d\n",toktype); goto out; } - /* XXX: need to add expiration and sequencing */ ret = GSS_S_COMPLETE; out: diff --git a/trunk/net/sunrpc/clnt.c b/trunk/net/sunrpc/clnt.c index aba528b9ae76..dfeea4fea95a 100644 --- a/trunk/net/sunrpc/clnt.c +++ b/trunk/net/sunrpc/clnt.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include @@ -142,10 +141,6 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s clnt->cl_vers = version->number; clnt->cl_stats = program->stats; clnt->cl_metrics = rpc_alloc_iostats(clnt); - err = -ENOMEM; - if (clnt->cl_metrics == NULL) - goto out_no_stats; - clnt->cl_program = program; if (!xprt_bound(clnt->cl_xprt)) clnt->cl_autobind = 1; @@ -178,8 +173,6 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s rpc_put_mount(); } out_no_path: - rpc_free_iostats(clnt->cl_metrics); -out_no_stats: if (clnt->cl_server != clnt->cl_inline_name) kfree(clnt->cl_server); kfree(clnt); @@ -259,19 +252,12 @@ struct rpc_clnt * rpc_clone_client(struct rpc_clnt *clnt) { struct rpc_clnt *new; - int err = -ENOMEM; new = kmemdup(clnt, sizeof(*new), GFP_KERNEL); if (!new) goto out_no_clnt; atomic_set(&new->cl_count, 1); atomic_set(&new->cl_users, 0); - new->cl_metrics = rpc_alloc_iostats(clnt); - if (new->cl_metrics == NULL) - goto out_no_stats; - err = rpc_setup_pipedir(new, clnt->cl_program->pipe_dir_name); - if (err != 0) - goto out_no_path; new->cl_parent = clnt; atomic_inc(&clnt->cl_count); new->cl_xprt = xprt_get(clnt->cl_xprt); @@ -279,17 +265,16 @@ rpc_clone_client(struct rpc_clnt *clnt) new->cl_autobind = 0; new->cl_oneshot = 0; new->cl_dead = 0; + if (!IS_ERR(new->cl_dentry)) + dget(new->cl_dentry); rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval); if (new->cl_auth) atomic_inc(&new->cl_auth->au_count); + new->cl_metrics = rpc_alloc_iostats(clnt); return new; -out_no_path: - rpc_free_iostats(new->cl_metrics); -out_no_stats: - kfree(new); out_no_clnt: - dprintk("RPC: %s returned error %d\n", __FUNCTION__, err); - return ERR_PTR(err); + printk(KERN_INFO "RPC: out of memory in %s\n", __FUNCTION__); + return ERR_PTR(-ENOMEM); } /* @@ -342,14 +327,16 @@ rpc_destroy_client(struct rpc_clnt *clnt) rpcauth_destroy(clnt->cl_auth); clnt->cl_auth = NULL; } - if (!IS_ERR(clnt->cl_dentry)) { - rpc_rmdir(clnt->cl_dentry); - rpc_put_mount(); - } if (clnt->cl_parent != clnt) { + if (!IS_ERR(clnt->cl_dentry)) + dput(clnt->cl_dentry); rpc_destroy_client(clnt->cl_parent); goto out_free; } + if (!IS_ERR(clnt->cl_dentry)) { + rpc_rmdir(clnt->cl_dentry); + rpc_put_mount(); + } if (clnt->cl_server != clnt->cl_inline_name) kfree(clnt->cl_server); out_free: @@ -479,9 +466,10 @@ int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) BUG_ON(flags & RPC_TASK_ASYNC); + status = -ENOMEM; task = rpc_new_task(clnt, flags, &rpc_default_ops, NULL); if (task == NULL) - return -ENOMEM; + goto out; /* Mask signals on RPC calls _and_ GSS_AUTH upcalls */ rpc_task_sigmask(task, &oldset); @@ -490,17 +478,15 @@ int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) /* Set up the call info struct and execute the task */ status = task->tk_status; - if (status != 0) { - rpc_release_task(task); - goto out; + if (status == 0) { + atomic_inc(&task->tk_count); + status = rpc_execute(task); + if (status == 0) + status = task->tk_status; } - atomic_inc(&task->tk_count); - status = rpc_execute(task); - if (status == 0) - status = task->tk_status; - rpc_put_task(task); -out: rpc_restore_sigmask(&oldset); + rpc_release_task(task); +out: return status; } @@ -542,7 +528,8 @@ rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags, rpc_restore_sigmask(&oldset); return status; out_release: - rpc_release_calldata(tk_ops, data); + if (tk_ops->rpc_release != NULL) + tk_ops->rpc_release(data); return status; } @@ -594,11 +581,7 @@ EXPORT_SYMBOL_GPL(rpc_peeraddr); char *rpc_peeraddr2str(struct rpc_clnt *clnt, enum rpc_display_format_t format) { struct rpc_xprt *xprt = clnt->cl_xprt; - - if (xprt->address_strings[format] != NULL) - return xprt->address_strings[format]; - else - return "unprintable"; + return xprt->ops->print_addr(xprt, format); } EXPORT_SYMBOL_GPL(rpc_peeraddr2str); @@ -828,10 +811,8 @@ call_encode(struct rpc_task *task) if (encode == NULL) return; - lock_kernel(); task->tk_status = rpcauth_wrap_req(task, encode, req, p, task->tk_msg.rpc_argp); - unlock_kernel(); if (task->tk_status == -ENOMEM) { /* XXX: Is this sane? */ rpc_delay(task, 3*HZ); @@ -1162,12 +1143,9 @@ call_decode(struct rpc_task *task) task->tk_action = rpc_exit_task; - if (decode) { - lock_kernel(); + if (decode) task->tk_status = rpcauth_unwrap_resp(task, decode, req, p, task->tk_msg.rpc_resp); - unlock_kernel(); - } dprintk("RPC: %4d call_decode result %d\n", task->tk_pid, task->tk_status); return; diff --git a/trunk/net/sunrpc/pmap_clnt.c b/trunk/net/sunrpc/pmap_clnt.c index 3946ec3eb517..e52afab413de 100644 --- a/trunk/net/sunrpc/pmap_clnt.c +++ b/trunk/net/sunrpc/pmap_clnt.c @@ -101,14 +101,14 @@ void rpc_getport(struct rpc_task *task) /* Autobind on cloned rpc clients is discouraged */ BUG_ON(clnt->cl_parent != clnt); - status = -EACCES; /* tell caller to check again */ - if (xprt_test_and_set_binding(xprt)) - goto bailout_nowake; - /* Put self on queue before sending rpcbind request, in case * pmap_getport_done completes before we return from rpc_run_task */ rpc_sleep_on(&xprt->binding, task, NULL, NULL); + status = -EACCES; /* tell caller to check again */ + if (xprt_test_and_set_binding(xprt)) + goto bailout_nofree; + /* Someone else may have bound if we slept */ status = 0; if (xprt_bound(xprt)) @@ -134,7 +134,7 @@ void rpc_getport(struct rpc_task *task) child = rpc_run_task(pmap_clnt, RPC_TASK_ASYNC, &pmap_getport_ops, map); if (IS_ERR(child)) goto bailout; - rpc_put_task(child); + rpc_release_task(child); task->tk_xprt->stat.bind_count++; return; @@ -143,9 +143,8 @@ void rpc_getport(struct rpc_task *task) pmap_map_free(map); xprt_put(xprt); bailout_nofree: - pmap_wake_portmap_waiters(xprt, status); -bailout_nowake: task->tk_status = status; + pmap_wake_portmap_waiters(xprt, status); } #ifdef CONFIG_ROOT_NFS diff --git a/trunk/net/sunrpc/sched.c b/trunk/net/sunrpc/sched.c index f9fd66b1d48b..a1ab4eed41f4 100644 --- a/trunk/net/sunrpc/sched.c +++ b/trunk/net/sunrpc/sched.c @@ -266,28 +266,12 @@ static int rpc_wait_bit_interruptible(void *word) return 0; } -static void rpc_set_active(struct rpc_task *task) -{ - if (test_and_set_bit(RPC_TASK_ACTIVE, &task->tk_runstate) != 0) - return; - spin_lock(&rpc_sched_lock); -#ifdef RPC_DEBUG - task->tk_magic = RPC_TASK_MAGIC_ID; - task->tk_pid = rpc_task_id++; -#endif - /* Add to global list of all tasks */ - list_add_tail(&task->tk_task, &all_tasks); - spin_unlock(&rpc_sched_lock); -} - /* * Mark an RPC call as having completed by clearing the 'active' bit */ -static void rpc_mark_complete_task(struct rpc_task *task) +static inline void rpc_mark_complete_task(struct rpc_task *task) { - smp_mb__before_clear_bit(); - clear_bit(RPC_TASK_ACTIVE, &task->tk_runstate); - smp_mb__after_clear_bit(); + rpc_clear_active(task); wake_up_bit(&task->tk_runstate, RPC_TASK_ACTIVE); } @@ -311,15 +295,13 @@ EXPORT_SYMBOL(__rpc_wait_for_completion_task); */ static void rpc_make_runnable(struct rpc_task *task) { + int do_ret; + BUG_ON(task->tk_timeout_fn); + do_ret = rpc_test_and_set_running(task); rpc_clear_queued(task); - if (rpc_test_and_set_running(task)) + if (do_ret) return; - /* We might have raced */ - if (RPC_IS_QUEUED(task)) { - rpc_clear_running(task); - return; - } if (RPC_IS_ASYNC(task)) { int status; @@ -351,6 +333,9 @@ static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, return; } + /* Mark the task as being activated if so needed */ + rpc_set_active(task); + __rpc_add_wait_queue(q, task); BUG_ON(task->tk_callback != NULL); @@ -361,9 +346,6 @@ static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, rpc_action action, rpc_action timer) { - /* Mark the task as being activated if so needed */ - rpc_set_active(task); - /* * Protect the queue operations. */ @@ -427,19 +409,16 @@ __rpc_default_timer(struct rpc_task *task) */ void rpc_wake_up_task(struct rpc_task *task) { - rcu_read_lock_bh(); if (rpc_start_wakeup(task)) { if (RPC_IS_QUEUED(task)) { struct rpc_wait_queue *queue = task->u.tk_wait.rpc_waitq; - /* Note: we're already in a bh-safe context */ - spin_lock(&queue->lock); + spin_lock_bh(&queue->lock); __rpc_do_wake_up_task(task); - spin_unlock(&queue->lock); + spin_unlock_bh(&queue->lock); } rpc_finish_wakeup(task); } - rcu_read_unlock_bh(); } /* @@ -502,16 +481,14 @@ struct rpc_task * rpc_wake_up_next(struct rpc_wait_queue *queue) struct rpc_task *task = NULL; dprintk("RPC: wake_up_next(%p \"%s\")\n", queue, rpc_qname(queue)); - rcu_read_lock_bh(); - spin_lock(&queue->lock); + spin_lock_bh(&queue->lock); if (RPC_IS_PRIORITY(queue)) task = __rpc_wake_up_next_priority(queue); else { task_for_first(task, &queue->tasks[0]) __rpc_wake_up_task(task); } - spin_unlock(&queue->lock); - rcu_read_unlock_bh(); + spin_unlock_bh(&queue->lock); return task; } @@ -527,8 +504,7 @@ void rpc_wake_up(struct rpc_wait_queue *queue) struct rpc_task *task, *next; struct list_head *head; - rcu_read_lock_bh(); - spin_lock(&queue->lock); + spin_lock_bh(&queue->lock); head = &queue->tasks[queue->maxpriority]; for (;;) { list_for_each_entry_safe(task, next, head, u.tk_wait.list) @@ -537,8 +513,7 @@ void rpc_wake_up(struct rpc_wait_queue *queue) break; head--; } - spin_unlock(&queue->lock); - rcu_read_unlock_bh(); + spin_unlock_bh(&queue->lock); } /** @@ -553,8 +528,7 @@ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status) struct rpc_task *task, *next; struct list_head *head; - rcu_read_lock_bh(); - spin_lock(&queue->lock); + spin_lock_bh(&queue->lock); head = &queue->tasks[queue->maxpriority]; for (;;) { list_for_each_entry_safe(task, next, head, u.tk_wait.list) { @@ -565,8 +539,7 @@ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status) break; head--; } - spin_unlock(&queue->lock); - rcu_read_unlock_bh(); + spin_unlock_bh(&queue->lock); } static void __rpc_atrun(struct rpc_task *task) @@ -588,9 +561,7 @@ void rpc_delay(struct rpc_task *task, unsigned long delay) */ static void rpc_prepare_task(struct rpc_task *task) { - lock_kernel(); task->tk_ops->rpc_call_prepare(task, task->tk_calldata); - unlock_kernel(); } /* @@ -600,9 +571,7 @@ void rpc_exit_task(struct rpc_task *task) { task->tk_action = NULL; if (task->tk_ops->rpc_call_done != NULL) { - lock_kernel(); task->tk_ops->rpc_call_done(task, task->tk_calldata); - unlock_kernel(); if (task->tk_action != NULL) { WARN_ON(RPC_ASSASSINATED(task)); /* Always release the RPC slot and buffer memory */ @@ -612,15 +581,6 @@ void rpc_exit_task(struct rpc_task *task) } EXPORT_SYMBOL(rpc_exit_task); -void rpc_release_calldata(const struct rpc_call_ops *ops, void *calldata) -{ - if (ops->rpc_release != NULL) { - lock_kernel(); - ops->rpc_release(calldata); - unlock_kernel(); - } -} - /* * This is the RPC `scheduler' (or rather, the finite state machine). */ @@ -655,7 +615,9 @@ static int __rpc_execute(struct rpc_task *task) */ save_callback=task->tk_callback; task->tk_callback=NULL; + lock_kernel(); save_callback(task); + unlock_kernel(); } /* @@ -666,7 +628,9 @@ static int __rpc_execute(struct rpc_task *task) if (!RPC_IS_QUEUED(task)) { if (task->tk_action == NULL) break; + lock_kernel(); task->tk_action(task); + unlock_kernel(); } /* @@ -707,6 +671,8 @@ static int __rpc_execute(struct rpc_task *task) } dprintk("RPC: %4d, return %d, status %d\n", task->tk_pid, status, task->tk_status); + /* Wake up anyone who is waiting for task completion */ + rpc_mark_complete_task(task); /* Release all resources associated with the task */ rpc_release_task(task); return status; @@ -820,6 +786,15 @@ void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, int flags, cons task->tk_flags |= RPC_TASK_NOINTR; } +#ifdef RPC_DEBUG + task->tk_magic = RPC_TASK_MAGIC_ID; + task->tk_pid = rpc_task_id++; +#endif + /* Add to global list of all tasks */ + spin_lock(&rpc_sched_lock); + list_add_tail(&task->tk_task, &all_tasks); + spin_unlock(&rpc_sched_lock); + BUG_ON(task->tk_ops == NULL); /* starting timestamp */ @@ -835,9 +810,8 @@ rpc_alloc_task(void) return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS); } -static void rpc_free_task(struct rcu_head *rcu) +static void rpc_free_task(struct rpc_task *task) { - struct rpc_task *task = container_of(rcu, struct rpc_task, u.tk_rcu); dprintk("RPC: %4d freeing task\n", task->tk_pid); mempool_free(task, rpc_task_mempool); } @@ -873,34 +847,16 @@ struct rpc_task *rpc_new_task(struct rpc_clnt *clnt, int flags, const struct rpc goto out; } - -void rpc_put_task(struct rpc_task *task) +void rpc_release_task(struct rpc_task *task) { const struct rpc_call_ops *tk_ops = task->tk_ops; void *calldata = task->tk_calldata; - if (!atomic_dec_and_test(&task->tk_count)) - return; - /* Release resources */ - if (task->tk_rqstp) - xprt_release(task); - if (task->tk_msg.rpc_cred) - rpcauth_unbindcred(task); - if (task->tk_client) { - rpc_release_client(task->tk_client); - task->tk_client = NULL; - } - if (task->tk_flags & RPC_TASK_DYNAMIC) - call_rcu_bh(&task->u.tk_rcu, rpc_free_task); - rpc_release_calldata(tk_ops, calldata); -} -EXPORT_SYMBOL(rpc_put_task); - -void rpc_release_task(struct rpc_task *task) -{ #ifdef RPC_DEBUG BUG_ON(task->tk_magic != RPC_TASK_MAGIC_ID); #endif + if (!atomic_dec_and_test(&task->tk_count)) + return; dprintk("RPC: %4d release task\n", task->tk_pid); /* Remove from global task list */ @@ -913,13 +869,23 @@ void rpc_release_task(struct rpc_task *task) /* Synchronously delete any running timer */ rpc_delete_timer(task); + /* Release resources */ + if (task->tk_rqstp) + xprt_release(task); + if (task->tk_msg.rpc_cred) + rpcauth_unbindcred(task); + if (task->tk_client) { + rpc_release_client(task->tk_client); + task->tk_client = NULL; + } + #ifdef RPC_DEBUG task->tk_magic = 0; #endif - /* Wake up anyone who is waiting for task completion */ - rpc_mark_complete_task(task); - - rpc_put_task(task); + if (task->tk_flags & RPC_TASK_DYNAMIC) + rpc_free_task(task); + if (tk_ops->rpc_release) + tk_ops->rpc_release(calldata); } /** @@ -936,7 +902,8 @@ struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags, struct rpc_task *task; task = rpc_new_task(clnt, flags, ops, data); if (task == NULL) { - rpc_release_calldata(ops, data); + if (ops->rpc_release != NULL) + ops->rpc_release(data); return ERR_PTR(-ENOMEM); } atomic_inc(&task->tk_count); diff --git a/trunk/net/sunrpc/socklib.c b/trunk/net/sunrpc/socklib.c index 634885b0c04d..2635c543ba06 100644 --- a/trunk/net/sunrpc/socklib.c +++ b/trunk/net/sunrpc/socklib.c @@ -16,7 +16,7 @@ /** - * xdr_skb_read_bits - copy some data bits from skb to internal buffer + * skb_read_bits - copy some data bits from skb to internal buffer * @desc: sk_buff copy helper * @to: copy destination * @len: number of bytes to copy @@ -24,11 +24,11 @@ * Possibly called several times to iterate over an sk_buff and copy * data out of it. */ -size_t xdr_skb_read_bits(struct xdr_skb_reader *desc, void *to, size_t len) +static size_t skb_read_bits(skb_reader_t *desc, void *to, size_t len) { if (len > desc->count) len = desc->count; - if (unlikely(skb_copy_bits(desc->skb, desc->offset, to, len))) + if (skb_copy_bits(desc->skb, desc->offset, to, len)) return 0; desc->count -= len; desc->offset += len; @@ -36,14 +36,14 @@ size_t xdr_skb_read_bits(struct xdr_skb_reader *desc, void *to, size_t len) } /** - * xdr_skb_read_and_csum_bits - copy and checksum from skb to buffer + * skb_read_and_csum_bits - copy and checksum from skb to buffer * @desc: sk_buff copy helper * @to: copy destination * @len: number of bytes to copy * * Same as skb_read_bits, but calculate a checksum at the same time. */ -static size_t xdr_skb_read_and_csum_bits(struct xdr_skb_reader *desc, void *to, size_t len) +static size_t skb_read_and_csum_bits(skb_reader_t *desc, void *to, size_t len) { unsigned int pos; __wsum csum2; @@ -66,7 +66,7 @@ static size_t xdr_skb_read_and_csum_bits(struct xdr_skb_reader *desc, void *to, * @copy_actor: virtual method for copying data * */ -ssize_t xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, struct xdr_skb_reader *desc, xdr_skb_read_actor copy_actor) +ssize_t xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, skb_reader_t *desc, skb_read_actor_t copy_actor) { struct page **ppage = xdr->pages; unsigned int len, pglen = xdr->page_len; @@ -148,7 +148,7 @@ ssize_t xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, struct */ int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb) { - struct xdr_skb_reader desc; + skb_reader_t desc; desc.skb = skb; desc.offset = sizeof(struct udphdr); @@ -158,7 +158,7 @@ int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb) goto no_checksum; desc.csum = csum_partial(skb->data, desc.offset, skb->csum); - if (xdr_partial_copy_from_skb(xdr, 0, &desc, xdr_skb_read_and_csum_bits) < 0) + if (xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_and_csum_bits) < 0) return -1; if (desc.offset != skb->len) { __wsum csum2; @@ -173,7 +173,7 @@ int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb) netdev_rx_csum_fault(skb->dev); return 0; no_checksum: - if (xdr_partial_copy_from_skb(xdr, 0, &desc, xdr_skb_read_bits) < 0) + if (xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_bits) < 0) return -1; if (desc.count) return -1; diff --git a/trunk/net/sunrpc/sunrpc_syms.c b/trunk/net/sunrpc/sunrpc_syms.c index d85fddeb6388..192dff5dabcb 100644 --- a/trunk/net/sunrpc/sunrpc_syms.c +++ b/trunk/net/sunrpc/sunrpc_syms.c @@ -33,6 +33,7 @@ EXPORT_SYMBOL(rpciod_down); EXPORT_SYMBOL(rpciod_up); EXPORT_SYMBOL(rpc_new_task); EXPORT_SYMBOL(rpc_wake_up_status); +EXPORT_SYMBOL(rpc_release_task); /* RPC client functions */ EXPORT_SYMBOL(rpc_clone_client); @@ -138,8 +139,6 @@ EXPORT_SYMBOL(nlm_debug); extern int register_rpc_pipefs(void); extern void unregister_rpc_pipefs(void); extern struct cache_detail ip_map_cache; -extern int init_socket_xprt(void); -extern void cleanup_socket_xprt(void); static int __init init_sunrpc(void) @@ -157,7 +156,6 @@ init_sunrpc(void) rpc_proc_init(); #endif cache_register(&ip_map_cache); - init_socket_xprt(); out: return err; } @@ -165,7 +163,6 @@ init_sunrpc(void) static void __exit cleanup_sunrpc(void) { - cleanup_socket_xprt(); unregister_rpc_pipefs(); rpc_destroy_mempool(); if (cache_unregister(&ip_map_cache)) diff --git a/trunk/net/sunrpc/sysctl.c b/trunk/net/sunrpc/sysctl.c index 82b27528d0c4..d89b048ad6bb 100644 --- a/trunk/net/sunrpc/sysctl.c +++ b/trunk/net/sunrpc/sysctl.c @@ -18,6 +18,7 @@ #include #include #include +#include /* * Declare the debug flags here @@ -118,6 +119,11 @@ proc_dodebug(ctl_table *table, int write, struct file *file, } +static unsigned int min_slot_table_size = RPC_MIN_SLOT_TABLE; +static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE; +static unsigned int xprt_min_resvport_limit = RPC_MIN_RESVPORT; +static unsigned int xprt_max_resvport_limit = RPC_MAX_RESVPORT; + static ctl_table debug_table[] = { { .ctl_name = CTL_RPCDEBUG, @@ -151,6 +157,50 @@ static ctl_table debug_table[] = { .mode = 0644, .proc_handler = &proc_dodebug }, + { + .ctl_name = CTL_SLOTTABLE_UDP, + .procname = "udp_slot_table_entries", + .data = &xprt_udp_slot_table_entries, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &min_slot_table_size, + .extra2 = &max_slot_table_size + }, + { + .ctl_name = CTL_SLOTTABLE_TCP, + .procname = "tcp_slot_table_entries", + .data = &xprt_tcp_slot_table_entries, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &min_slot_table_size, + .extra2 = &max_slot_table_size + }, + { + .ctl_name = CTL_MIN_RESVPORT, + .procname = "min_resvport", + .data = &xprt_min_resvport, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &xprt_min_resvport_limit, + .extra2 = &xprt_max_resvport_limit + }, + { + .ctl_name = CTL_MAX_RESVPORT, + .procname = "max_resvport", + .data = &xprt_max_resvport, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &xprt_min_resvport_limit, + .extra2 = &xprt_max_resvport_limit + }, { .ctl_name = 0 } }; diff --git a/trunk/net/sunrpc/xdr.c b/trunk/net/sunrpc/xdr.c index a0af250ca319..9022eb8b37ed 100644 --- a/trunk/net/sunrpc/xdr.c +++ b/trunk/net/sunrpc/xdr.c @@ -640,30 +640,41 @@ xdr_buf_from_iov(struct kvec *iov, struct xdr_buf *buf) buf->buflen = buf->len = iov->iov_len; } +/* Sets subiov to the intersection of iov with the buffer of length len + * starting base bytes after iov. Indicates empty intersection by setting + * length of subiov to zero. Decrements len by length of subiov, sets base + * to zero (or decrements it by length of iov if subiov is empty). */ +static void +iov_subsegment(struct kvec *iov, struct kvec *subiov, int *base, int *len) +{ + if (*base > iov->iov_len) { + subiov->iov_base = NULL; + subiov->iov_len = 0; + *base -= iov->iov_len; + } else { + subiov->iov_base = iov->iov_base + *base; + subiov->iov_len = min(*len, (int)iov->iov_len - *base); + *base = 0; + } + *len -= subiov->iov_len; +} + /* Sets subbuf to the portion of buf of length len beginning base bytes * from the start of buf. Returns -1 if base of length are out of bounds. */ int xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf, - unsigned int base, unsigned int len) + int base, int len) { + int i; + subbuf->buflen = subbuf->len = len; - if (base < buf->head[0].iov_len) { - subbuf->head[0].iov_base = buf->head[0].iov_base + base; - subbuf->head[0].iov_len = min_t(unsigned int, len, - buf->head[0].iov_len - base); - len -= subbuf->head[0].iov_len; - base = 0; - } else { - subbuf->head[0].iov_base = NULL; - subbuf->head[0].iov_len = 0; - base -= buf->head[0].iov_len; - } + iov_subsegment(buf->head, subbuf->head, &base, &len); if (base < buf->page_len) { - subbuf->page_len = min(buf->page_len - base, len); - base += buf->page_base; - subbuf->page_base = base & ~PAGE_CACHE_MASK; - subbuf->pages = &buf->pages[base >> PAGE_CACHE_SHIFT]; + i = (base + buf->page_base) >> PAGE_CACHE_SHIFT; + subbuf->pages = &buf->pages[i]; + subbuf->page_base = (base + buf->page_base) & ~PAGE_CACHE_MASK; + subbuf->page_len = min((int)buf->page_len - base, len); len -= subbuf->page_len; base = 0; } else { @@ -671,85 +682,66 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf, subbuf->page_len = 0; } - if (base < buf->tail[0].iov_len) { - subbuf->tail[0].iov_base = buf->tail[0].iov_base + base; - subbuf->tail[0].iov_len = min_t(unsigned int, len, - buf->tail[0].iov_len - base); - len -= subbuf->tail[0].iov_len; - base = 0; - } else { - subbuf->tail[0].iov_base = NULL; - subbuf->tail[0].iov_len = 0; - base -= buf->tail[0].iov_len; - } - + iov_subsegment(buf->tail, subbuf->tail, &base, &len); if (base || len) return -1; return 0; } -static void __read_bytes_from_xdr_buf(struct xdr_buf *subbuf, void *obj, unsigned int len) +/* obj is assumed to point to allocated memory of size at least len: */ +int +read_bytes_from_xdr_buf(struct xdr_buf *buf, int base, void *obj, int len) { - unsigned int this_len; + struct xdr_buf subbuf; + int this_len; + int status; - this_len = min_t(unsigned int, len, subbuf->head[0].iov_len); - memcpy(obj, subbuf->head[0].iov_base, this_len); + status = xdr_buf_subsegment(buf, &subbuf, base, len); + if (status) + goto out; + this_len = min(len, (int)subbuf.head[0].iov_len); + memcpy(obj, subbuf.head[0].iov_base, this_len); len -= this_len; obj += this_len; - this_len = min_t(unsigned int, len, subbuf->page_len); + this_len = min(len, (int)subbuf.page_len); if (this_len) - _copy_from_pages(obj, subbuf->pages, subbuf->page_base, this_len); + _copy_from_pages(obj, subbuf.pages, subbuf.page_base, this_len); len -= this_len; obj += this_len; - this_len = min_t(unsigned int, len, subbuf->tail[0].iov_len); - memcpy(obj, subbuf->tail[0].iov_base, this_len); + this_len = min(len, (int)subbuf.tail[0].iov_len); + memcpy(obj, subbuf.tail[0].iov_base, this_len); +out: + return status; } /* obj is assumed to point to allocated memory of size at least len: */ -int read_bytes_from_xdr_buf(struct xdr_buf *buf, unsigned int base, void *obj, unsigned int len) +int +write_bytes_to_xdr_buf(struct xdr_buf *buf, int base, void *obj, int len) { struct xdr_buf subbuf; + int this_len; int status; status = xdr_buf_subsegment(buf, &subbuf, base, len); - if (status != 0) - return status; - __read_bytes_from_xdr_buf(&subbuf, obj, len); - return 0; -} - -static void __write_bytes_to_xdr_buf(struct xdr_buf *subbuf, void *obj, unsigned int len) -{ - unsigned int this_len; - - this_len = min_t(unsigned int, len, subbuf->head[0].iov_len); - memcpy(subbuf->head[0].iov_base, obj, this_len); + if (status) + goto out; + this_len = min(len, (int)subbuf.head[0].iov_len); + memcpy(subbuf.head[0].iov_base, obj, this_len); len -= this_len; obj += this_len; - this_len = min_t(unsigned int, len, subbuf->page_len); + this_len = min(len, (int)subbuf.page_len); if (this_len) - _copy_to_pages(subbuf->pages, subbuf->page_base, obj, this_len); + _copy_to_pages(subbuf.pages, subbuf.page_base, obj, this_len); len -= this_len; obj += this_len; - this_len = min_t(unsigned int, len, subbuf->tail[0].iov_len); - memcpy(subbuf->tail[0].iov_base, obj, this_len); -} - -/* obj is assumed to point to allocated memory of size at least len: */ -int write_bytes_to_xdr_buf(struct xdr_buf *buf, unsigned int base, void *obj, unsigned int len) -{ - struct xdr_buf subbuf; - int status; - - status = xdr_buf_subsegment(buf, &subbuf, base, len); - if (status != 0) - return status; - __write_bytes_to_xdr_buf(&subbuf, obj, len); - return 0; + this_len = min(len, (int)subbuf.tail[0].iov_len); + memcpy(subbuf.tail[0].iov_base, obj, this_len); +out: + return status; } int -xdr_decode_word(struct xdr_buf *buf, unsigned int base, u32 *obj) +xdr_decode_word(struct xdr_buf *buf, int base, u32 *obj) { __be32 raw; int status; @@ -762,7 +754,7 @@ xdr_decode_word(struct xdr_buf *buf, unsigned int base, u32 *obj) } int -xdr_encode_word(struct xdr_buf *buf, unsigned int base, u32 obj) +xdr_encode_word(struct xdr_buf *buf, int base, u32 obj) { __be32 raw = htonl(obj); @@ -773,37 +765,44 @@ xdr_encode_word(struct xdr_buf *buf, unsigned int base, u32 obj) * entirely in the head or the tail, set object to point to it; otherwise * try to find space for it at the end of the tail, copy it there, and * set obj to point to it. */ -int xdr_buf_read_netobj(struct xdr_buf *buf, struct xdr_netobj *obj, unsigned int offset) +int +xdr_buf_read_netobj(struct xdr_buf *buf, struct xdr_netobj *obj, int offset) { - struct xdr_buf subbuf; + u32 tail_offset = buf->head[0].iov_len + buf->page_len; + u32 obj_end_offset; if (xdr_decode_word(buf, offset, &obj->len)) - return -EFAULT; - if (xdr_buf_subsegment(buf, &subbuf, offset + 4, obj->len)) - return -EFAULT; - - /* Is the obj contained entirely in the head? */ - obj->data = subbuf.head[0].iov_base; - if (subbuf.head[0].iov_len == obj->len) - return 0; - /* ..or is the obj contained entirely in the tail? */ - obj->data = subbuf.tail[0].iov_base; - if (subbuf.tail[0].iov_len == obj->len) - return 0; + goto out; + obj_end_offset = offset + 4 + obj->len; + + if (obj_end_offset <= buf->head[0].iov_len) { + /* The obj is contained entirely in the head: */ + obj->data = buf->head[0].iov_base + offset + 4; + } else if (offset + 4 >= tail_offset) { + if (obj_end_offset - tail_offset + > buf->tail[0].iov_len) + goto out; + /* The obj is contained entirely in the tail: */ + obj->data = buf->tail[0].iov_base + + offset - tail_offset + 4; + } else { + /* use end of tail as storage for obj: + * (We don't copy to the beginning because then we'd have + * to worry about doing a potentially overlapping copy. + * This assumes the object is at most half the length of the + * tail.) */ + if (obj->len > buf->tail[0].iov_len) + goto out; + obj->data = buf->tail[0].iov_base + buf->tail[0].iov_len - + obj->len; + if (read_bytes_from_xdr_buf(buf, offset + 4, + obj->data, obj->len)) + goto out; - /* use end of tail as storage for obj: - * (We don't copy to the beginning because then we'd have - * to worry about doing a potentially overlapping copy. - * This assumes the object is at most half the length of the - * tail.) */ - if (obj->len > buf->buflen - buf->len) - return -ENOMEM; - if (buf->tail[0].iov_len != 0) - obj->data = buf->tail[0].iov_base + buf->tail[0].iov_len; - else - obj->data = buf->head[0].iov_base + buf->head[0].iov_len; - __read_bytes_from_xdr_buf(&subbuf, obj->data, obj->len); + } return 0; +out: + return -1; } /* Returns 0 on success, or else a negative error code. */ @@ -1021,71 +1020,3 @@ xdr_encode_array2(struct xdr_buf *buf, unsigned int base, return xdr_xcode_array2(buf, base, desc, 1); } - -int -xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len, - int (*actor)(struct scatterlist *, void *), void *data) -{ - int i, ret = 0; - unsigned page_len, thislen, page_offset; - struct scatterlist sg[1]; - - if (offset >= buf->head[0].iov_len) { - offset -= buf->head[0].iov_len; - } else { - thislen = buf->head[0].iov_len - offset; - if (thislen > len) - thislen = len; - sg_set_buf(sg, buf->head[0].iov_base + offset, thislen); - ret = actor(sg, data); - if (ret) - goto out; - offset = 0; - len -= thislen; - } - if (len == 0) - goto out; - - if (offset >= buf->page_len) { - offset -= buf->page_len; - } else { - page_len = buf->page_len - offset; - if (page_len > len) - page_len = len; - len -= page_len; - page_offset = (offset + buf->page_base) & (PAGE_CACHE_SIZE - 1); - i = (offset + buf->page_base) >> PAGE_CACHE_SHIFT; - thislen = PAGE_CACHE_SIZE - page_offset; - do { - if (thislen > page_len) - thislen = page_len; - sg->page = buf->pages[i]; - sg->offset = page_offset; - sg->length = thislen; - ret = actor(sg, data); - if (ret) - goto out; - page_len -= thislen; - i++; - page_offset = 0; - thislen = PAGE_CACHE_SIZE; - } while (page_len != 0); - offset = 0; - } - if (len == 0) - goto out; - if (offset < buf->tail[0].iov_len) { - thislen = buf->tail[0].iov_len - offset; - if (thislen > len) - thislen = len; - sg_set_buf(sg, buf->tail[0].iov_base + offset, thislen); - ret = actor(sg, data); - len -= thislen; - } - if (len != 0) - ret = -EINVAL; -out: - return ret; -} -EXPORT_SYMBOL(xdr_process_buf); - diff --git a/trunk/net/sunrpc/xprt.c b/trunk/net/sunrpc/xprt.c index f8ca0a93454c..80857470dc11 100644 --- a/trunk/net/sunrpc/xprt.c +++ b/trunk/net/sunrpc/xprt.c @@ -459,6 +459,7 @@ int xprt_adjust_timeout(struct rpc_rqst *req) if (to->to_maxval && req->rq_timeout >= to->to_maxval) req->rq_timeout = to->to_maxval; req->rq_retries++; + pprintk("RPC: %lu retrans\n", jiffies); } else { req->rq_timeout = to->to_initval; req->rq_retries = 0; @@ -467,6 +468,7 @@ int xprt_adjust_timeout(struct rpc_rqst *req) spin_lock_bh(&xprt->transport_lock); rpc_init_rtt(req->rq_task->tk_client->cl_rtt, to->to_initval); spin_unlock_bh(&xprt->transport_lock); + pprintk("RPC: %lu timeout\n", jiffies); status = -ETIMEDOUT; } @@ -889,25 +891,39 @@ void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long i */ struct rpc_xprt *xprt_create_transport(int proto, struct sockaddr *ap, size_t size, struct rpc_timeout *to) { + int result; struct rpc_xprt *xprt; struct rpc_rqst *req; + if ((xprt = kzalloc(sizeof(struct rpc_xprt), GFP_KERNEL)) == NULL) { + dprintk("RPC: xprt_create_transport: no memory\n"); + return ERR_PTR(-ENOMEM); + } + if (size <= sizeof(xprt->addr)) { + memcpy(&xprt->addr, ap, size); + xprt->addrlen = size; + } else { + kfree(xprt); + dprintk("RPC: xprt_create_transport: address too large\n"); + return ERR_PTR(-EBADF); + } + switch (proto) { case IPPROTO_UDP: - xprt = xs_setup_udp(ap, size, to); + result = xs_setup_udp(xprt, to); break; case IPPROTO_TCP: - xprt = xs_setup_tcp(ap, size, to); + result = xs_setup_tcp(xprt, to); break; default: printk(KERN_ERR "RPC: unrecognized transport protocol: %d\n", proto); return ERR_PTR(-EIO); } - if (IS_ERR(xprt)) { - dprintk("RPC: xprt_create_transport: failed, %ld\n", - -PTR_ERR(xprt)); - return xprt; + if (result) { + kfree(xprt); + dprintk("RPC: xprt_create_transport: failed, %d\n", result); + return ERR_PTR(result); } kref_init(&xprt->kref); @@ -953,11 +969,8 @@ static void xprt_destroy(struct kref *kref) dprintk("RPC: destroying transport %p\n", xprt); xprt->shutdown = 1; del_timer_sync(&xprt->timer); - - /* - * Tear down transport state and free the rpc_xprt - */ xprt->ops->destroy(xprt); + kfree(xprt); } /** diff --git a/trunk/net/sunrpc/xprtsock.c b/trunk/net/sunrpc/xprtsock.c index 21438d7dc47b..757fc91ef25d 100644 --- a/trunk/net/sunrpc/xprtsock.c +++ b/trunk/net/sunrpc/xprtsock.c @@ -45,92 +45,6 @@ unsigned int xprt_tcp_slot_table_entries = RPC_DEF_SLOT_TABLE; unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT; unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT; -/* - * We can register our own files under /proc/sys/sunrpc by - * calling register_sysctl_table() again. The files in that - * directory become the union of all files registered there. - * - * We simply need to make sure that we don't collide with - * someone else's file names! - */ - -#ifdef RPC_DEBUG - -static unsigned int min_slot_table_size = RPC_MIN_SLOT_TABLE; -static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE; -static unsigned int xprt_min_resvport_limit = RPC_MIN_RESVPORT; -static unsigned int xprt_max_resvport_limit = RPC_MAX_RESVPORT; - -static struct ctl_table_header *sunrpc_table_header; - -/* - * FIXME: changing the UDP slot table size should also resize the UDP - * socket buffers for existing UDP transports - */ -static ctl_table xs_tunables_table[] = { - { - .ctl_name = CTL_SLOTTABLE_UDP, - .procname = "udp_slot_table_entries", - .data = &xprt_udp_slot_table_entries, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, - .extra1 = &min_slot_table_size, - .extra2 = &max_slot_table_size - }, - { - .ctl_name = CTL_SLOTTABLE_TCP, - .procname = "tcp_slot_table_entries", - .data = &xprt_tcp_slot_table_entries, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, - .extra1 = &min_slot_table_size, - .extra2 = &max_slot_table_size - }, - { - .ctl_name = CTL_MIN_RESVPORT, - .procname = "min_resvport", - .data = &xprt_min_resvport, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, - .extra1 = &xprt_min_resvport_limit, - .extra2 = &xprt_max_resvport_limit - }, - { - .ctl_name = CTL_MAX_RESVPORT, - .procname = "max_resvport", - .data = &xprt_max_resvport, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, - .extra1 = &xprt_min_resvport_limit, - .extra2 = &xprt_max_resvport_limit - }, - { - .ctl_name = 0, - }, -}; - -static ctl_table sunrpc_table[] = { - { - .ctl_name = CTL_SUNRPC, - .procname = "sunrpc", - .mode = 0555, - .child = xs_tunables_table - }, - { - .ctl_name = 0, - }, -}; - -#endif - /* * How many times to try sending a request on a socket before waiting * for the socket buffer to clear. @@ -211,55 +125,6 @@ static inline void xs_pktdump(char *msg, u32 *packet, unsigned int count) } #endif -struct sock_xprt { - struct rpc_xprt xprt; - - /* - * Network layer - */ - struct socket * sock; - struct sock * inet; - - /* - * State of TCP reply receive - */ - __be32 tcp_fraghdr, - tcp_xid; - - u32 tcp_offset, - tcp_reclen; - - unsigned long tcp_copied, - tcp_flags; - - /* - * Connection of transports - */ - struct work_struct connect_worker; - unsigned short port; - - /* - * UDP socket buffer size parameters - */ - size_t rcvsize, - sndsize; - - /* - * Saved socket callback addresses - */ - void (*old_data_ready)(struct sock *, int); - void (*old_state_change)(struct sock *); - void (*old_write_space)(struct sock *); -}; - -/* - * TCP receive state flags - */ -#define TCP_RCV_LAST_FRAG (1UL << 0) -#define TCP_RCV_COPY_FRAGHDR (1UL << 1) -#define TCP_RCV_COPY_XID (1UL << 2) -#define TCP_RCV_COPY_DATA (1UL << 3) - static void xs_format_peer_addresses(struct rpc_xprt *xprt) { struct sockaddr_in *addr = (struct sockaddr_in *) &xprt->addr; @@ -303,52 +168,37 @@ static void xs_free_peer_addresses(struct rpc_xprt *xprt) #define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL) -static int xs_send_kvec(struct socket *sock, struct sockaddr *addr, int addrlen, struct kvec *vec, unsigned int base, int more) +static inline int xs_send_head(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, unsigned int len) { + struct kvec iov = { + .iov_base = xdr->head[0].iov_base + base, + .iov_len = len - base, + }; struct msghdr msg = { .msg_name = addr, .msg_namelen = addrlen, - .msg_flags = XS_SENDMSG_FLAGS | (more ? MSG_MORE : 0), - }; - struct kvec iov = { - .iov_base = vec->iov_base + base, - .iov_len = vec->iov_len - base, + .msg_flags = XS_SENDMSG_FLAGS, }; - if (iov.iov_len != 0) + if (xdr->len > len) + msg.msg_flags |= MSG_MORE; + + if (likely(iov.iov_len)) return kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len); return kernel_sendmsg(sock, &msg, NULL, 0, 0); } -static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned int base, int more) +static int xs_send_tail(struct socket *sock, struct xdr_buf *xdr, unsigned int base, unsigned int len) { - struct page **ppage; - unsigned int remainder; - int err, sent = 0; - - remainder = xdr->page_len - base; - base += xdr->page_base; - ppage = xdr->pages + (base >> PAGE_SHIFT); - base &= ~PAGE_MASK; - for(;;) { - unsigned int len = min_t(unsigned int, PAGE_SIZE - base, remainder); - int flags = XS_SENDMSG_FLAGS; + struct kvec iov = { + .iov_base = xdr->tail[0].iov_base + base, + .iov_len = len - base, + }; + struct msghdr msg = { + .msg_flags = XS_SENDMSG_FLAGS, + }; - remainder -= len; - if (remainder != 0 || more) - flags |= MSG_MORE; - err = sock->ops->sendpage(sock, *ppage, base, len, flags); - if (remainder == 0 || err != len) - break; - sent += err; - ppage++; - base = 0; - } - if (sent == 0) - return err; - if (err > 0) - sent += err; - return sent; + return kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len); } /** @@ -360,51 +210,76 @@ static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned i * @base: starting position in the buffer * */ -static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base) +static inline int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base) { - unsigned int remainder = xdr->len - base; - int err, sent = 0; + struct page **ppage = xdr->pages; + unsigned int len, pglen = xdr->page_len; + int err, ret = 0; if (unlikely(!sock)) return -ENOTCONN; clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags); - if (base != 0) { - addr = NULL; - addrlen = 0; - } - if (base < xdr->head[0].iov_len || addr != NULL) { - unsigned int len = xdr->head[0].iov_len - base; - remainder -= len; - err = xs_send_kvec(sock, addr, addrlen, &xdr->head[0], base, remainder != 0); - if (remainder == 0 || err != len) + len = xdr->head[0].iov_len; + if (base < len || (addr != NULL && base == 0)) { + err = xs_send_head(sock, addr, addrlen, xdr, base, len); + if (ret == 0) + ret = err; + else if (err > 0) + ret += err; + if (err != (len - base)) goto out; - sent += err; base = 0; } else - base -= xdr->head[0].iov_len; + base -= len; + + if (unlikely(pglen == 0)) + goto copy_tail; + if (unlikely(base >= pglen)) { + base -= pglen; + goto copy_tail; + } + if (base || xdr->page_base) { + pglen -= base; + base += xdr->page_base; + ppage += base >> PAGE_CACHE_SHIFT; + base &= ~PAGE_CACHE_MASK; + } + + do { + int flags = XS_SENDMSG_FLAGS; + + len = PAGE_CACHE_SIZE; + if (base) + len -= base; + if (pglen < len) + len = pglen; + + if (pglen != len || xdr->tail[0].iov_len != 0) + flags |= MSG_MORE; - if (base < xdr->page_len) { - unsigned int len = xdr->page_len - base; - remainder -= len; - err = xs_send_pagedata(sock, xdr, base, remainder != 0); - if (remainder == 0 || err != len) + err = kernel_sendpage(sock, *ppage, base, len, flags); + if (ret == 0) + ret = err; + else if (err > 0) + ret += err; + if (err != len) goto out; - sent += err; base = 0; - } else - base -= xdr->page_len; - - if (base >= xdr->tail[0].iov_len) - return sent; - err = xs_send_kvec(sock, NULL, 0, &xdr->tail[0], base, 0); + ppage++; + } while ((pglen -= len) != 0); +copy_tail: + len = xdr->tail[0].iov_len; + if (base < len) { + err = xs_send_tail(sock, xdr, base, len); + if (ret == 0) + ret = err; + else if (err > 0) + ret += err; + } out: - if (sent == 0) - return err; - if (err > 0) - sent += err; - return sent; + return ret; } /** @@ -416,20 +291,19 @@ static void xs_nospace(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; - struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); dprintk("RPC: %4d xmit incomplete (%u left of %u)\n", task->tk_pid, req->rq_slen - req->rq_bytes_sent, req->rq_slen); - if (test_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags)) { + if (test_bit(SOCK_ASYNC_NOSPACE, &xprt->sock->flags)) { /* Protect against races with write_space */ spin_lock_bh(&xprt->transport_lock); /* Don't race with disconnect */ if (!xprt_connected(xprt)) task->tk_status = -ENOTCONN; - else if (test_bit(SOCK_NOSPACE, &transport->sock->flags)) + else if (test_bit(SOCK_NOSPACE, &xprt->sock->flags)) xprt_wait_for_buffer_space(task); spin_unlock_bh(&xprt->transport_lock); @@ -453,7 +327,6 @@ static int xs_udp_send_request(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; - struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); struct xdr_buf *xdr = &req->rq_snd_buf; int status; @@ -462,10 +335,8 @@ static int xs_udp_send_request(struct rpc_task *task) req->rq_svec->iov_len); req->rq_xtime = jiffies; - status = xs_sendpages(transport->sock, - (struct sockaddr *) &xprt->addr, - xprt->addrlen, xdr, - req->rq_bytes_sent); + status = xs_sendpages(xprt->sock, (struct sockaddr *) &xprt->addr, + xprt->addrlen, xdr, req->rq_bytes_sent); dprintk("RPC: xs_udp_send_request(%u) = %d\n", xdr->len - req->rq_bytes_sent, status); @@ -521,7 +392,6 @@ static int xs_tcp_send_request(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; - struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); struct xdr_buf *xdr = &req->rq_snd_buf; int status, retry = 0; @@ -536,8 +406,8 @@ static int xs_tcp_send_request(struct rpc_task *task) * called sendmsg(). */ while (1) { req->rq_xtime = jiffies; - status = xs_sendpages(transport->sock, - NULL, 0, xdr, req->rq_bytes_sent); + status = xs_sendpages(xprt->sock, NULL, 0, xdr, + req->rq_bytes_sent); dprintk("RPC: xs_tcp_send_request(%u) = %d\n", xdr->len - req->rq_bytes_sent, status); @@ -615,9 +485,8 @@ static void xs_tcp_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task) */ static void xs_close(struct rpc_xprt *xprt) { - struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); - struct socket *sock = transport->sock; - struct sock *sk = transport->inet; + struct socket *sock = xprt->sock; + struct sock *sk = xprt->inet; if (!sk) goto clear_close_wait; @@ -625,13 +494,13 @@ static void xs_close(struct rpc_xprt *xprt) dprintk("RPC: xs_close xprt %p\n", xprt); write_lock_bh(&sk->sk_callback_lock); - transport->inet = NULL; - transport->sock = NULL; + xprt->inet = NULL; + xprt->sock = NULL; sk->sk_user_data = NULL; - sk->sk_data_ready = transport->old_data_ready; - sk->sk_state_change = transport->old_state_change; - sk->sk_write_space = transport->old_write_space; + sk->sk_data_ready = xprt->old_data_ready; + sk->sk_state_change = xprt->old_state_change; + sk->sk_write_space = xprt->old_write_space; write_unlock_bh(&sk->sk_callback_lock); sk->sk_no_check = 0; @@ -650,18 +519,15 @@ static void xs_close(struct rpc_xprt *xprt) */ static void xs_destroy(struct rpc_xprt *xprt) { - struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); - dprintk("RPC: xs_destroy xprt %p\n", xprt); - cancel_delayed_work(&transport->connect_worker); + cancel_delayed_work(&xprt->connect_worker); flush_scheduled_work(); xprt_disconnect(xprt); xs_close(xprt); xs_free_peer_addresses(xprt); kfree(xprt->slot); - kfree(xprt); } static inline struct rpc_xprt *xprt_from_sock(struct sock *sk) @@ -737,75 +603,91 @@ static void xs_udp_data_ready(struct sock *sk, int len) read_unlock(&sk->sk_callback_lock); } -static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, struct xdr_skb_reader *desc) +static inline size_t xs_tcp_copy_data(skb_reader_t *desc, void *p, size_t len) +{ + if (len > desc->count) + len = desc->count; + if (skb_copy_bits(desc->skb, desc->offset, p, len)) { + dprintk("RPC: failed to copy %zu bytes from skb. %zu bytes remain\n", + len, desc->count); + return 0; + } + desc->offset += len; + desc->count -= len; + dprintk("RPC: copied %zu bytes from skb. %zu bytes remain\n", + len, desc->count); + return len; +} + +static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, skb_reader_t *desc) { - struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); size_t len, used; char *p; - p = ((char *) &transport->tcp_fraghdr) + transport->tcp_offset; - len = sizeof(transport->tcp_fraghdr) - transport->tcp_offset; - used = xdr_skb_read_bits(desc, p, len); - transport->tcp_offset += used; + p = ((char *) &xprt->tcp_recm) + xprt->tcp_offset; + len = sizeof(xprt->tcp_recm) - xprt->tcp_offset; + used = xs_tcp_copy_data(desc, p, len); + xprt->tcp_offset += used; if (used != len) return; - transport->tcp_reclen = ntohl(transport->tcp_fraghdr); - if (transport->tcp_reclen & RPC_LAST_STREAM_FRAGMENT) - transport->tcp_flags |= TCP_RCV_LAST_FRAG; + xprt->tcp_reclen = ntohl(xprt->tcp_recm); + if (xprt->tcp_reclen & RPC_LAST_STREAM_FRAGMENT) + xprt->tcp_flags |= XPRT_LAST_FRAG; else - transport->tcp_flags &= ~TCP_RCV_LAST_FRAG; - transport->tcp_reclen &= RPC_FRAGMENT_SIZE_MASK; + xprt->tcp_flags &= ~XPRT_LAST_FRAG; + xprt->tcp_reclen &= RPC_FRAGMENT_SIZE_MASK; - transport->tcp_flags &= ~TCP_RCV_COPY_FRAGHDR; - transport->tcp_offset = 0; + xprt->tcp_flags &= ~XPRT_COPY_RECM; + xprt->tcp_offset = 0; /* Sanity check of the record length */ - if (unlikely(transport->tcp_reclen < 4)) { + if (unlikely(xprt->tcp_reclen < 4)) { dprintk("RPC: invalid TCP record fragment length\n"); xprt_disconnect(xprt); return; } dprintk("RPC: reading TCP record fragment of length %d\n", - transport->tcp_reclen); + xprt->tcp_reclen); } -static void xs_tcp_check_fraghdr(struct sock_xprt *transport) +static void xs_tcp_check_recm(struct rpc_xprt *xprt) { - if (transport->tcp_offset == transport->tcp_reclen) { - transport->tcp_flags |= TCP_RCV_COPY_FRAGHDR; - transport->tcp_offset = 0; - if (transport->tcp_flags & TCP_RCV_LAST_FRAG) { - transport->tcp_flags &= ~TCP_RCV_COPY_DATA; - transport->tcp_flags |= TCP_RCV_COPY_XID; - transport->tcp_copied = 0; + dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u, tcp_flags = %lx\n", + xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen, xprt->tcp_flags); + if (xprt->tcp_offset == xprt->tcp_reclen) { + xprt->tcp_flags |= XPRT_COPY_RECM; + xprt->tcp_offset = 0; + if (xprt->tcp_flags & XPRT_LAST_FRAG) { + xprt->tcp_flags &= ~XPRT_COPY_DATA; + xprt->tcp_flags |= XPRT_COPY_XID; + xprt->tcp_copied = 0; } } } -static inline void xs_tcp_read_xid(struct sock_xprt *transport, struct xdr_skb_reader *desc) +static inline void xs_tcp_read_xid(struct rpc_xprt *xprt, skb_reader_t *desc) { size_t len, used; char *p; - len = sizeof(transport->tcp_xid) - transport->tcp_offset; + len = sizeof(xprt->tcp_xid) - xprt->tcp_offset; dprintk("RPC: reading XID (%Zu bytes)\n", len); - p = ((char *) &transport->tcp_xid) + transport->tcp_offset; - used = xdr_skb_read_bits(desc, p, len); - transport->tcp_offset += used; + p = ((char *) &xprt->tcp_xid) + xprt->tcp_offset; + used = xs_tcp_copy_data(desc, p, len); + xprt->tcp_offset += used; if (used != len) return; - transport->tcp_flags &= ~TCP_RCV_COPY_XID; - transport->tcp_flags |= TCP_RCV_COPY_DATA; - transport->tcp_copied = 4; + xprt->tcp_flags &= ~XPRT_COPY_XID; + xprt->tcp_flags |= XPRT_COPY_DATA; + xprt->tcp_copied = 4; dprintk("RPC: reading reply for XID %08x\n", - ntohl(transport->tcp_xid)); - xs_tcp_check_fraghdr(transport); + ntohl(xprt->tcp_xid)); + xs_tcp_check_recm(xprt); } -static inline void xs_tcp_read_request(struct rpc_xprt *xprt, struct xdr_skb_reader *desc) +static inline void xs_tcp_read_request(struct rpc_xprt *xprt, skb_reader_t *desc) { - struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); struct rpc_rqst *req; struct xdr_buf *rcvbuf; size_t len; @@ -813,118 +695,116 @@ static inline void xs_tcp_read_request(struct rpc_xprt *xprt, struct xdr_skb_rea /* Find and lock the request corresponding to this xid */ spin_lock(&xprt->transport_lock); - req = xprt_lookup_rqst(xprt, transport->tcp_xid); + req = xprt_lookup_rqst(xprt, xprt->tcp_xid); if (!req) { - transport->tcp_flags &= ~TCP_RCV_COPY_DATA; + xprt->tcp_flags &= ~XPRT_COPY_DATA; dprintk("RPC: XID %08x request not found!\n", - ntohl(transport->tcp_xid)); + ntohl(xprt->tcp_xid)); spin_unlock(&xprt->transport_lock); return; } rcvbuf = &req->rq_private_buf; len = desc->count; - if (len > transport->tcp_reclen - transport->tcp_offset) { - struct xdr_skb_reader my_desc; + if (len > xprt->tcp_reclen - xprt->tcp_offset) { + skb_reader_t my_desc; - len = transport->tcp_reclen - transport->tcp_offset; + len = xprt->tcp_reclen - xprt->tcp_offset; memcpy(&my_desc, desc, sizeof(my_desc)); my_desc.count = len; - r = xdr_partial_copy_from_skb(rcvbuf, transport->tcp_copied, - &my_desc, xdr_skb_read_bits); + r = xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, + &my_desc, xs_tcp_copy_data); desc->count -= r; desc->offset += r; } else - r = xdr_partial_copy_from_skb(rcvbuf, transport->tcp_copied, - desc, xdr_skb_read_bits); + r = xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, + desc, xs_tcp_copy_data); if (r > 0) { - transport->tcp_copied += r; - transport->tcp_offset += r; + xprt->tcp_copied += r; + xprt->tcp_offset += r; } if (r != len) { /* Error when copying to the receive buffer, * usually because we weren't able to allocate * additional buffer pages. All we can do now - * is turn off TCP_RCV_COPY_DATA, so the request + * is turn off XPRT_COPY_DATA, so the request * will not receive any additional updates, * and time out. * Any remaining data from this record will * be discarded. */ - transport->tcp_flags &= ~TCP_RCV_COPY_DATA; + xprt->tcp_flags &= ~XPRT_COPY_DATA; dprintk("RPC: XID %08x truncated request\n", - ntohl(transport->tcp_xid)); + ntohl(xprt->tcp_xid)); dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u\n", - xprt, transport->tcp_copied, transport->tcp_offset, - transport->tcp_reclen); + xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen); goto out; } dprintk("RPC: XID %08x read %Zd bytes\n", - ntohl(transport->tcp_xid), r); + ntohl(xprt->tcp_xid), r); dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u\n", - xprt, transport->tcp_copied, transport->tcp_offset, - transport->tcp_reclen); - - if (transport->tcp_copied == req->rq_private_buf.buflen) - transport->tcp_flags &= ~TCP_RCV_COPY_DATA; - else if (transport->tcp_offset == transport->tcp_reclen) { - if (transport->tcp_flags & TCP_RCV_LAST_FRAG) - transport->tcp_flags &= ~TCP_RCV_COPY_DATA; + xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen); + + if (xprt->tcp_copied == req->rq_private_buf.buflen) + xprt->tcp_flags &= ~XPRT_COPY_DATA; + else if (xprt->tcp_offset == xprt->tcp_reclen) { + if (xprt->tcp_flags & XPRT_LAST_FRAG) + xprt->tcp_flags &= ~XPRT_COPY_DATA; } out: - if (!(transport->tcp_flags & TCP_RCV_COPY_DATA)) - xprt_complete_rqst(req->rq_task, transport->tcp_copied); + if (!(xprt->tcp_flags & XPRT_COPY_DATA)) + xprt_complete_rqst(req->rq_task, xprt->tcp_copied); spin_unlock(&xprt->transport_lock); - xs_tcp_check_fraghdr(transport); + xs_tcp_check_recm(xprt); } -static inline void xs_tcp_read_discard(struct sock_xprt *transport, struct xdr_skb_reader *desc) +static inline void xs_tcp_read_discard(struct rpc_xprt *xprt, skb_reader_t *desc) { size_t len; - len = transport->tcp_reclen - transport->tcp_offset; + len = xprt->tcp_reclen - xprt->tcp_offset; if (len > desc->count) len = desc->count; desc->count -= len; desc->offset += len; - transport->tcp_offset += len; + xprt->tcp_offset += len; dprintk("RPC: discarded %Zu bytes\n", len); - xs_tcp_check_fraghdr(transport); + xs_tcp_check_recm(xprt); } static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, unsigned int offset, size_t len) { struct rpc_xprt *xprt = rd_desc->arg.data; - struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); - struct xdr_skb_reader desc = { + skb_reader_t desc = { .skb = skb, .offset = offset, .count = len, + .csum = 0 }; dprintk("RPC: xs_tcp_data_recv started\n"); do { /* Read in a new fragment marker if necessary */ /* Can we ever really expect to get completely empty fragments? */ - if (transport->tcp_flags & TCP_RCV_COPY_FRAGHDR) { + if (xprt->tcp_flags & XPRT_COPY_RECM) { xs_tcp_read_fraghdr(xprt, &desc); continue; } /* Read in the xid if necessary */ - if (transport->tcp_flags & TCP_RCV_COPY_XID) { - xs_tcp_read_xid(transport, &desc); + if (xprt->tcp_flags & XPRT_COPY_XID) { + xs_tcp_read_xid(xprt, &desc); continue; } /* Read in the request data */ - if (transport->tcp_flags & TCP_RCV_COPY_DATA) { + if (xprt->tcp_flags & XPRT_COPY_DATA) { xs_tcp_read_request(xprt, &desc); continue; } /* Skip over any trailing bytes on short reads */ - xs_tcp_read_discard(transport, &desc); + xs_tcp_read_discard(xprt, &desc); } while (desc.count); dprintk("RPC: xs_tcp_data_recv done\n"); return len - desc.count; @@ -978,16 +858,11 @@ static void xs_tcp_state_change(struct sock *sk) case TCP_ESTABLISHED: spin_lock_bh(&xprt->transport_lock); if (!xprt_test_and_set_connected(xprt)) { - struct sock_xprt *transport = container_of(xprt, - struct sock_xprt, xprt); - /* Reset TCP record info */ - transport->tcp_offset = 0; - transport->tcp_reclen = 0; - transport->tcp_copied = 0; - transport->tcp_flags = - TCP_RCV_COPY_FRAGHDR | TCP_RCV_COPY_XID; - + xprt->tcp_offset = 0; + xprt->tcp_reclen = 0; + xprt->tcp_copied = 0; + xprt->tcp_flags = XPRT_COPY_RECM | XPRT_COPY_XID; xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; xprt_wake_pending_tasks(xprt, 0); } @@ -1076,16 +951,15 @@ static void xs_tcp_write_space(struct sock *sk) static void xs_udp_do_set_buffer_size(struct rpc_xprt *xprt) { - struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); - struct sock *sk = transport->inet; + struct sock *sk = xprt->inet; - if (transport->rcvsize) { + if (xprt->rcvsize) { sk->sk_userlocks |= SOCK_RCVBUF_LOCK; - sk->sk_rcvbuf = transport->rcvsize * xprt->max_reqs * 2; + sk->sk_rcvbuf = xprt->rcvsize * xprt->max_reqs * 2; } - if (transport->sndsize) { + if (xprt->sndsize) { sk->sk_userlocks |= SOCK_SNDBUF_LOCK; - sk->sk_sndbuf = transport->sndsize * xprt->max_reqs * 2; + sk->sk_sndbuf = xprt->sndsize * xprt->max_reqs * 2; sk->sk_write_space(sk); } } @@ -1100,14 +974,12 @@ static void xs_udp_do_set_buffer_size(struct rpc_xprt *xprt) */ static void xs_udp_set_buffer_size(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize) { - struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); - - transport->sndsize = 0; + xprt->sndsize = 0; if (sndsize) - transport->sndsize = sndsize + 1024; - transport->rcvsize = 0; + xprt->sndsize = sndsize + 1024; + xprt->rcvsize = 0; if (rcvsize) - transport->rcvsize = rcvsize + 1024; + xprt->rcvsize = rcvsize + 1024; xs_udp_do_set_buffer_size(xprt); } @@ -1130,6 +1002,19 @@ static unsigned short xs_get_random_port(void) return rand + xprt_min_resvport; } +/** + * xs_print_peer_address - format an IPv4 address for printing + * @xprt: generic transport + * @format: flags field indicating which parts of the address to render + */ +static char *xs_print_peer_address(struct rpc_xprt *xprt, enum rpc_display_format_t format) +{ + if (xprt->address_strings[format] != NULL) + return xprt->address_strings[format]; + else + return "unprintable"; +} + /** * xs_set_port - reset the port number in the remote endpoint address * @xprt: generic transport @@ -1145,20 +1030,20 @@ static void xs_set_port(struct rpc_xprt *xprt, unsigned short port) sap->sin_port = htons(port); } -static int xs_bindresvport(struct sock_xprt *transport, struct socket *sock) +static int xs_bindresvport(struct rpc_xprt *xprt, struct socket *sock) { struct sockaddr_in myaddr = { .sin_family = AF_INET, }; int err; - unsigned short port = transport->port; + unsigned short port = xprt->port; do { myaddr.sin_port = htons(port); err = kernel_bind(sock, (struct sockaddr *) &myaddr, sizeof(myaddr)); if (err == 0) { - transport->port = port; + xprt->port = port; dprintk("RPC: xs_bindresvport bound to port %u\n", port); return 0; @@ -1167,7 +1052,7 @@ static int xs_bindresvport(struct sock_xprt *transport, struct socket *sock) port = xprt_max_resvport; else port--; - } while (err == -EADDRINUSE && port != transport->port); + } while (err == -EADDRINUSE && port != xprt->port); dprintk("RPC: can't bind to reserved port (%d).\n", -err); return err; @@ -1181,9 +1066,8 @@ static int xs_bindresvport(struct sock_xprt *transport, struct socket *sock) */ static void xs_udp_connect_worker(void *args) { - struct sock_xprt *transport = (struct sock_xprt *)args; - struct rpc_xprt *xprt = &transport->xprt; - struct socket *sock = transport->sock; + struct rpc_xprt *xprt = (struct rpc_xprt *) args; + struct socket *sock = xprt->sock; int err, status = -EIO; if (xprt->shutdown || !xprt_bound(xprt)) @@ -1197,23 +1081,23 @@ static void xs_udp_connect_worker(void *args) goto out; } - if (xprt->resvport && xs_bindresvport(transport, sock) < 0) { + if (xprt->resvport && xs_bindresvport(xprt, sock) < 0) { sock_release(sock); goto out; } dprintk("RPC: worker connecting xprt %p to address: %s\n", - xprt, xprt->address_strings[RPC_DISPLAY_ALL]); + xprt, xs_print_peer_address(xprt, RPC_DISPLAY_ALL)); - if (!transport->inet) { + if (!xprt->inet) { struct sock *sk = sock->sk; write_lock_bh(&sk->sk_callback_lock); sk->sk_user_data = xprt; - transport->old_data_ready = sk->sk_data_ready; - transport->old_state_change = sk->sk_state_change; - transport->old_write_space = sk->sk_write_space; + xprt->old_data_ready = sk->sk_data_ready; + xprt->old_state_change = sk->sk_state_change; + xprt->old_write_space = sk->sk_write_space; sk->sk_data_ready = xs_udp_data_ready; sk->sk_write_space = xs_udp_write_space; sk->sk_no_check = UDP_CSUM_NORCV; @@ -1222,8 +1106,8 @@ static void xs_udp_connect_worker(void *args) xprt_set_connected(xprt); /* Reset to new socket */ - transport->sock = sock; - transport->inet = sk; + xprt->sock = sock; + xprt->inet = sk; write_unlock_bh(&sk->sk_callback_lock); } @@ -1241,7 +1125,7 @@ static void xs_udp_connect_worker(void *args) static void xs_tcp_reuse_connection(struct rpc_xprt *xprt) { int result; - struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + struct socket *sock = xprt->sock; struct sockaddr any; dprintk("RPC: disconnecting xprt %p to reuse port\n", xprt); @@ -1252,7 +1136,7 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt) */ memset(&any, 0, sizeof(any)); any.sa_family = AF_UNSPEC; - result = kernel_connect(transport->sock, &any, sizeof(any), 0); + result = kernel_connect(sock, &any, sizeof(any), 0); if (result) dprintk("RPC: AF_UNSPEC connect return code %d\n", result); @@ -1266,22 +1150,21 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt) */ static void xs_tcp_connect_worker(void *args) { - struct sock_xprt *transport = (struct sock_xprt *)args; - struct rpc_xprt *xprt = &transport->xprt; - struct socket *sock = transport->sock; + struct rpc_xprt *xprt = (struct rpc_xprt *)args; + struct socket *sock = xprt->sock; int err, status = -EIO; if (xprt->shutdown || !xprt_bound(xprt)) goto out; - if (!sock) { + if (!xprt->sock) { /* start from scratch */ if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { dprintk("RPC: can't create TCP transport socket (%d).\n", -err); goto out; } - if (xprt->resvport && xs_bindresvport(transport, sock) < 0) { + if (xprt->resvport && xs_bindresvport(xprt, sock) < 0) { sock_release(sock); goto out; } @@ -1290,17 +1173,17 @@ static void xs_tcp_connect_worker(void *args) xs_tcp_reuse_connection(xprt); dprintk("RPC: worker connecting xprt %p to address: %s\n", - xprt, xprt->address_strings[RPC_DISPLAY_ALL]); + xprt, xs_print_peer_address(xprt, RPC_DISPLAY_ALL)); - if (!transport->inet) { + if (!xprt->inet) { struct sock *sk = sock->sk; write_lock_bh(&sk->sk_callback_lock); sk->sk_user_data = xprt; - transport->old_data_ready = sk->sk_data_ready; - transport->old_state_change = sk->sk_state_change; - transport->old_write_space = sk->sk_write_space; + xprt->old_data_ready = sk->sk_data_ready; + xprt->old_state_change = sk->sk_state_change; + xprt->old_write_space = sk->sk_write_space; sk->sk_data_ready = xs_tcp_data_ready; sk->sk_state_change = xs_tcp_state_change; sk->sk_write_space = xs_tcp_write_space; @@ -1315,8 +1198,8 @@ static void xs_tcp_connect_worker(void *args) xprt_clear_connected(xprt); /* Reset to new socket */ - transport->sock = sock; - transport->inet = sk; + xprt->sock = sock; + xprt->inet = sk; write_unlock_bh(&sk->sk_callback_lock); } @@ -1365,22 +1248,21 @@ static void xs_tcp_connect_worker(void *args) static void xs_connect(struct rpc_task *task) { struct rpc_xprt *xprt = task->tk_xprt; - struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); if (xprt_test_and_set_connecting(xprt)) return; - if (transport->sock != NULL) { + if (xprt->sock != NULL) { dprintk("RPC: xs_connect delayed xprt %p for %lu seconds\n", xprt, xprt->reestablish_timeout / HZ); - schedule_delayed_work(&transport->connect_worker, + schedule_delayed_work(&xprt->connect_worker, xprt->reestablish_timeout); xprt->reestablish_timeout <<= 1; if (xprt->reestablish_timeout > XS_TCP_MAX_REEST_TO) xprt->reestablish_timeout = XS_TCP_MAX_REEST_TO; } else { dprintk("RPC: xs_connect scheduled xprt %p\n", xprt); - schedule_work(&transport->connect_worker); + schedule_work(&xprt->connect_worker); /* flush_scheduled_work can sleep... */ if (!RPC_IS_ASYNC(task)) @@ -1396,10 +1278,8 @@ static void xs_connect(struct rpc_task *task) */ static void xs_udp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) { - struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); - seq_printf(seq, "\txprt:\tudp %u %lu %lu %lu %lu %Lu %Lu\n", - transport->port, + xprt->port, xprt->stat.bind_count, xprt->stat.sends, xprt->stat.recvs, @@ -1416,14 +1296,13 @@ static void xs_udp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) */ static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) { - struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); long idle_time = 0; if (xprt_connected(xprt)) idle_time = (long)(jiffies - xprt->last_used) / HZ; seq_printf(seq, "\txprt:\ttcp %u %lu %lu %lu %ld %lu %lu %lu %Lu %Lu\n", - transport->port, + xprt->port, xprt->stat.bind_count, xprt->stat.connect_count, xprt->stat.connect_time, @@ -1437,6 +1316,7 @@ static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) static struct rpc_xprt_ops xs_udp_ops = { .set_buffer_size = xs_udp_set_buffer_size, + .print_addr = xs_print_peer_address, .reserve_xprt = xprt_reserve_xprt_cong, .release_xprt = xprt_release_xprt_cong, .rpcbind = rpc_getport, @@ -1454,6 +1334,7 @@ static struct rpc_xprt_ops xs_udp_ops = { }; static struct rpc_xprt_ops xs_tcp_ops = { + .print_addr = xs_print_peer_address, .reserve_xprt = xprt_reserve_xprt, .release_xprt = xs_tcp_release_xprt, .rpcbind = rpc_getport, @@ -1468,64 +1349,33 @@ static struct rpc_xprt_ops xs_tcp_ops = { .print_stats = xs_tcp_print_stats, }; -static struct rpc_xprt *xs_setup_xprt(struct sockaddr *addr, size_t addrlen, unsigned int slot_table_size) -{ - struct rpc_xprt *xprt; - struct sock_xprt *new; - - if (addrlen > sizeof(xprt->addr)) { - dprintk("RPC: xs_setup_xprt: address too large\n"); - return ERR_PTR(-EBADF); - } - - new = kzalloc(sizeof(*new), GFP_KERNEL); - if (new == NULL) { - dprintk("RPC: xs_setup_xprt: couldn't allocate rpc_xprt\n"); - return ERR_PTR(-ENOMEM); - } - xprt = &new->xprt; - - xprt->max_reqs = slot_table_size; - xprt->slot = kcalloc(xprt->max_reqs, sizeof(struct rpc_rqst), GFP_KERNEL); - if (xprt->slot == NULL) { - kfree(xprt); - dprintk("RPC: xs_setup_xprt: couldn't allocate slot table\n"); - return ERR_PTR(-ENOMEM); - } - - memcpy(&xprt->addr, addr, addrlen); - xprt->addrlen = addrlen; - new->port = xs_get_random_port(); - - return xprt; -} - /** * xs_setup_udp - Set up transport to use a UDP socket - * @addr: address of remote server - * @addrlen: length of address in bytes + * @xprt: transport to set up * @to: timeout parameters * */ -struct rpc_xprt *xs_setup_udp(struct sockaddr *addr, size_t addrlen, struct rpc_timeout *to) +int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to) { - struct rpc_xprt *xprt; - struct sock_xprt *transport; + size_t slot_table_size; + struct sockaddr_in *addr = (struct sockaddr_in *) &xprt->addr; - xprt = xs_setup_xprt(addr, addrlen, xprt_udp_slot_table_entries); - if (IS_ERR(xprt)) - return xprt; - transport = container_of(xprt, struct sock_xprt, xprt); + xprt->max_reqs = xprt_udp_slot_table_entries; + slot_table_size = xprt->max_reqs * sizeof(xprt->slot[0]); + xprt->slot = kzalloc(slot_table_size, GFP_KERNEL); + if (xprt->slot == NULL) + return -ENOMEM; - if (ntohs(((struct sockaddr_in *)addr)->sin_port) != 0) + if (ntohs(addr->sin_port) != 0) xprt_set_bound(xprt); + xprt->port = xs_get_random_port(); xprt->prot = IPPROTO_UDP; xprt->tsh_size = 0; /* XXX: header size can vary due to auth type, IPv6, etc. */ xprt->max_payload = (1U << 16) - (MAX_HEADER << 3); - INIT_WORK(&transport->connect_worker, xs_udp_connect_worker, transport); + INIT_WORK(&xprt->connect_worker, xs_udp_connect_worker, xprt); xprt->bind_timeout = XS_BIND_TO; xprt->connect_timeout = XS_UDP_CONN_TO; xprt->reestablish_timeout = XS_UDP_REEST_TO; @@ -1540,36 +1390,37 @@ struct rpc_xprt *xs_setup_udp(struct sockaddr *addr, size_t addrlen, struct rpc_ xs_format_peer_addresses(xprt); dprintk("RPC: set up transport to address %s\n", - xprt->address_strings[RPC_DISPLAY_ALL]); + xs_print_peer_address(xprt, RPC_DISPLAY_ALL)); - return xprt; + return 0; } /** * xs_setup_tcp - Set up transport to use a TCP socket - * @addr: address of remote server - * @addrlen: length of address in bytes + * @xprt: transport to set up * @to: timeout parameters * */ -struct rpc_xprt *xs_setup_tcp(struct sockaddr *addr, size_t addrlen, struct rpc_timeout *to) +int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to) { - struct rpc_xprt *xprt; - struct sock_xprt *transport; + size_t slot_table_size; + struct sockaddr_in *addr = (struct sockaddr_in *) &xprt->addr; - xprt = xs_setup_xprt(addr, addrlen, xprt_tcp_slot_table_entries); - if (IS_ERR(xprt)) - return xprt; - transport = container_of(xprt, struct sock_xprt, xprt); + xprt->max_reqs = xprt_tcp_slot_table_entries; + slot_table_size = xprt->max_reqs * sizeof(xprt->slot[0]); + xprt->slot = kzalloc(slot_table_size, GFP_KERNEL); + if (xprt->slot == NULL) + return -ENOMEM; - if (ntohs(((struct sockaddr_in *)addr)->sin_port) != 0) + if (ntohs(addr->sin_port) != 0) xprt_set_bound(xprt); + xprt->port = xs_get_random_port(); xprt->prot = IPPROTO_TCP; xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32); xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; - INIT_WORK(&transport->connect_worker, xs_tcp_connect_worker, transport); + INIT_WORK(&xprt->connect_worker, xs_tcp_connect_worker, xprt); xprt->bind_timeout = XS_BIND_TO; xprt->connect_timeout = XS_TCP_CONN_TO; xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; @@ -1584,40 +1435,7 @@ struct rpc_xprt *xs_setup_tcp(struct sockaddr *addr, size_t addrlen, struct rpc_ xs_format_peer_addresses(xprt); dprintk("RPC: set up transport to address %s\n", - xprt->address_strings[RPC_DISPLAY_ALL]); - - return xprt; -} - -/** - * init_socket_xprt - set up xprtsock's sysctls - * - */ -int init_socket_xprt(void) -{ -#ifdef RPC_DEBUG - if (!sunrpc_table_header) { - sunrpc_table_header = register_sysctl_table(sunrpc_table, 1); -#ifdef CONFIG_PROC_FS - if (sunrpc_table[0].de) - sunrpc_table[0].de->owner = THIS_MODULE; -#endif - } -#endif + xs_print_peer_address(xprt, RPC_DISPLAY_ALL)); return 0; } - -/** - * cleanup_socket_xprt - remove xprtsock's sysctls - * - */ -void cleanup_socket_xprt(void) -{ -#ifdef RPC_DEBUG - if (sunrpc_table_header) { - unregister_sysctl_table(sunrpc_table_header); - sunrpc_table_header = NULL; - } -#endif -}