From 607f31e80b6f982d7c0dd7a5045377fc368fe507 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 28 Jun 2006 16:52:45 -0400 Subject: [PATCH 01/40] Revert "Merge branch 'odirect'" This reverts ccf01ef7aa9c6c293a1c64c27331a2ce227916ec commit. No idea how git managed this one: when I asked it to merge the odirect topic branch it actually generated a patch which reverted the change. Reverting the 'merge' will once again reveal Chuck's recent NFS/O_DIRECT work to the world. Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 435 +++++++++++++++++++--------------------- include/linux/nfs_xdr.h | 2 + 2 files changed, 203 insertions(+), 234 deletions(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 8ca9707be6c9..9ae7b6f6bf30 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -68,25 +68,19 @@ struct nfs_direct_req { struct kref kref; /* release manager */ /* I/O parameters */ - struct list_head list, /* nfs_read/write_data structs */ - rewrite_list; /* saved nfs_write_data structs */ struct nfs_open_context *ctx; /* file open context info */ struct kiocb * iocb; /* controlling i/o request */ struct inode * inode; /* target file of i/o */ - unsigned long user_addr; /* location of user's buffer */ - size_t user_count; /* total bytes to move */ - loff_t pos; /* starting offset in file */ - struct page ** pages; /* pages in our buffer */ - unsigned int npages; /* count of pages */ /* completion state */ + atomic_t io_count; /* i/os we're waiting for */ spinlock_t lock; /* protect completion state */ - int outstanding; /* i/os we're waiting for */ ssize_t count, /* bytes actually processed */ error; /* any reported error */ struct completion completion; /* wait for i/o completion */ /* commit state */ + struct list_head rewrite_list; /* saved nfs_write_data structs */ struct nfs_write_data * commit_data; /* special write_data for commits */ int flags; #define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */ @@ -94,8 +88,37 @@ struct nfs_direct_req { struct nfs_writeverf verf; /* unstable write verifier */ }; -static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync); static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode); +static const struct rpc_call_ops nfs_write_direct_ops; + +static inline void get_dreq(struct nfs_direct_req *dreq) +{ + atomic_inc(&dreq->io_count); +} + +static inline int put_dreq(struct nfs_direct_req *dreq) +{ + return atomic_dec_and_test(&dreq->io_count); +} + +/* + * "size" is never larger than rsize or wsize. + */ +static inline int nfs_direct_count_pages(unsigned long user_addr, size_t size) +{ + int page_count; + + page_count = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT; + page_count -= user_addr >> PAGE_SHIFT; + BUG_ON(page_count < 0); + + return page_count; +} + +static inline unsigned int nfs_max_pages(unsigned int size) +{ + return (size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; +} /** * nfs_direct_IO - NFS address space operation for direct I/O @@ -119,50 +142,21 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_ return -EINVAL; } -static void nfs_free_user_pages(struct page **pages, int npages, int do_dirty) +static void nfs_direct_dirty_pages(struct page **pages, int npages) { int i; for (i = 0; i < npages; i++) { struct page *page = pages[i]; - if (do_dirty && !PageCompound(page)) + if (!PageCompound(page)) set_page_dirty_lock(page); - page_cache_release(page); } - kfree(pages); } -static inline int nfs_get_user_pages(int rw, unsigned long user_addr, size_t size, struct page ***pages) +static void nfs_direct_release_pages(struct page **pages, int npages) { - int result = -ENOMEM; - unsigned long page_count; - size_t array_size; - - page_count = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT; - page_count -= user_addr >> PAGE_SHIFT; - - array_size = (page_count * sizeof(struct page *)); - *pages = kmalloc(array_size, GFP_KERNEL); - if (*pages) { - down_read(¤t->mm->mmap_sem); - result = get_user_pages(current, current->mm, user_addr, - page_count, (rw == READ), 0, - *pages, NULL); - up_read(¤t->mm->mmap_sem); - if (result != page_count) { - /* - * If we got fewer pages than expected from - * get_user_pages(), the user buffer runs off the - * end of a mapping; return EFAULT. - */ - if (result >= 0) { - nfs_free_user_pages(*pages, result, 0); - result = -EFAULT; - } else - kfree(*pages); - *pages = NULL; - } - } - return result; + int i; + for (i = 0; i < npages; i++) + page_cache_release(pages[i]); } static inline struct nfs_direct_req *nfs_direct_req_alloc(void) @@ -174,13 +168,13 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void) return NULL; kref_init(&dreq->kref); + kref_get(&dreq->kref); init_completion(&dreq->completion); - INIT_LIST_HEAD(&dreq->list); INIT_LIST_HEAD(&dreq->rewrite_list); dreq->iocb = NULL; dreq->ctx = NULL; spin_lock_init(&dreq->lock); - dreq->outstanding = 0; + atomic_set(&dreq->io_count, 0); dreq->count = 0; dreq->error = 0; dreq->flags = 0; @@ -221,18 +215,11 @@ static ssize_t nfs_direct_wait(struct nfs_direct_req *dreq) } /* - * We must hold a reference to all the pages in this direct read request - * until the RPCs complete. This could be long *after* we are woken up in - * nfs_direct_wait (for instance, if someone hits ^C on a slow server). - * - * In addition, synchronous I/O uses a stack-allocated iocb. Thus we - * can't trust the iocb is still valid here if this is a synchronous - * request. If the waiter is woken prematurely, the iocb is long gone. + * Synchronous I/O uses a stack-allocated iocb. Thus we can't trust + * the iocb is still valid here if this is a synchronous request. */ static void nfs_direct_complete(struct nfs_direct_req *dreq) { - nfs_free_user_pages(dreq->pages, dreq->npages, 1); - if (dreq->iocb) { long res = (long) dreq->error; if (!res) @@ -245,48 +232,10 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq) } /* - * Note we also set the number of requests we have in the dreq when we are - * done. This prevents races with I/O completion so we will always wait - * until all requests have been dispatched and completed. + * We must hold a reference to all the pages in this direct read request + * until the RPCs complete. This could be long *after* we are woken up in + * nfs_direct_wait (for instance, if someone hits ^C on a slow server). */ -static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, size_t rsize) -{ - struct list_head *list; - struct nfs_direct_req *dreq; - unsigned int rpages = (rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - - dreq = nfs_direct_req_alloc(); - if (!dreq) - return NULL; - - list = &dreq->list; - for(;;) { - struct nfs_read_data *data = nfs_readdata_alloc(rpages); - - if (unlikely(!data)) { - while (!list_empty(list)) { - data = list_entry(list->next, - struct nfs_read_data, pages); - list_del(&data->pages); - nfs_readdata_free(data); - } - kref_put(&dreq->kref, nfs_direct_req_release); - return NULL; - } - - INIT_LIST_HEAD(&data->pages); - list_add(&data->pages, list); - - data->req = (struct nfs_page *) dreq; - dreq->outstanding++; - if (nbytes <= rsize) - break; - nbytes -= rsize; - } - kref_get(&dreq->kref); - return dreq; -} - static void nfs_direct_read_result(struct rpc_task *task, void *calldata) { struct nfs_read_data *data = calldata; @@ -295,6 +244,9 @@ static void nfs_direct_read_result(struct rpc_task *task, void *calldata) if (nfs_readpage_result(task, data) != 0) return; + nfs_direct_dirty_pages(data->pagevec, data->npages); + nfs_direct_release_pages(data->pagevec, data->npages); + spin_lock(&dreq->lock); if (likely(task->tk_status >= 0)) @@ -302,13 +254,10 @@ static void nfs_direct_read_result(struct rpc_task *task, void *calldata) else dreq->error = task->tk_status; - if (--dreq->outstanding) { - spin_unlock(&dreq->lock); - return; - } - spin_unlock(&dreq->lock); - nfs_direct_complete(dreq); + + if (put_dreq(dreq)) + nfs_direct_complete(dreq); } static const struct rpc_call_ops nfs_read_direct_ops = { @@ -317,41 +266,60 @@ static const struct rpc_call_ops nfs_read_direct_ops = { }; /* - * For each nfs_read_data struct that was allocated on the list, dispatch - * an NFS READ operation + * For each rsize'd chunk of the user's buffer, dispatch an NFS READ + * operation. If nfs_readdata_alloc() or get_user_pages() fails, + * bail and stop sending more reads. Read length accounting is + * handled automatically by nfs_direct_read_result(). Otherwise, if + * no requests have been sent, just return an error. */ -static void nfs_direct_read_schedule(struct nfs_direct_req *dreq) +static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos) { struct nfs_open_context *ctx = dreq->ctx; struct inode *inode = ctx->dentry->d_inode; - struct list_head *list = &dreq->list; - struct page **pages = dreq->pages; - size_t count = dreq->user_count; - loff_t pos = dreq->pos; size_t rsize = NFS_SERVER(inode)->rsize; - unsigned int curpage, pgbase; + unsigned int rpages = nfs_max_pages(rsize); + unsigned int pgbase; + int result; + ssize_t started = 0; + + get_dreq(dreq); - curpage = 0; - pgbase = dreq->user_addr & ~PAGE_MASK; + pgbase = user_addr & ~PAGE_MASK; do { struct nfs_read_data *data; size_t bytes; + result = -ENOMEM; + data = nfs_readdata_alloc(rpages); + if (unlikely(!data)) + break; + bytes = rsize; if (count < rsize) bytes = count; - BUG_ON(list_empty(list)); - data = list_entry(list->next, struct nfs_read_data, pages); - list_del_init(&data->pages); + data->npages = nfs_direct_count_pages(user_addr, bytes); + down_read(¤t->mm->mmap_sem); + result = get_user_pages(current, current->mm, user_addr, + data->npages, 1, 0, data->pagevec, NULL); + up_read(¤t->mm->mmap_sem); + if (unlikely(result < data->npages)) { + if (result > 0) + nfs_direct_release_pages(data->pagevec, result); + nfs_readdata_release(data); + break; + } + + get_dreq(dreq); + data->req = (struct nfs_page *) dreq; data->inode = inode; data->cred = ctx->cred; data->args.fh = NFS_FH(inode); data->args.context = ctx; data->args.offset = pos; data->args.pgbase = pgbase; - data->args.pages = &pages[curpage]; + data->args.pages = data->pagevec; data->args.count = bytes; data->res.fattr = &data->fattr; data->res.eof = 0; @@ -374,33 +342,35 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq) bytes, (unsigned long long)data->args.offset); + started += bytes; + user_addr += bytes; pos += bytes; pgbase += bytes; - curpage += pgbase >> PAGE_SHIFT; pgbase &= ~PAGE_MASK; count -= bytes; } while (count != 0); - BUG_ON(!list_empty(list)); + + if (put_dreq(dreq)) + nfs_direct_complete(dreq); + + if (started) + return 0; + return result < 0 ? (ssize_t) result : -EFAULT; } -static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos, struct page **pages, unsigned int nr_pages) +static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos) { - ssize_t result; + ssize_t result = 0; sigset_t oldset; struct inode *inode = iocb->ki_filp->f_mapping->host; struct rpc_clnt *clnt = NFS_CLIENT(inode); struct nfs_direct_req *dreq; - dreq = nfs_direct_read_alloc(count, NFS_SERVER(inode)->rsize); + dreq = nfs_direct_req_alloc(); if (!dreq) return -ENOMEM; - dreq->user_addr = user_addr; - dreq->user_count = count; - dreq->pos = pos; - dreq->pages = pages; - dreq->npages = nr_pages; dreq->inode = inode; dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data); if (!is_sync_kiocb(iocb)) @@ -408,8 +378,9 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size nfs_add_stats(inode, NFSIOS_DIRECTREADBYTES, count); rpc_clnt_sigmask(clnt, &oldset); - nfs_direct_read_schedule(dreq); - result = nfs_direct_wait(dreq); + result = nfs_direct_read_schedule(dreq, user_addr, count, pos); + if (!result) + result = nfs_direct_wait(dreq); rpc_clnt_sigunmask(clnt, &oldset); return result; @@ -417,10 +388,10 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size static void nfs_direct_free_writedata(struct nfs_direct_req *dreq) { - list_splice_init(&dreq->rewrite_list, &dreq->list); - while (!list_empty(&dreq->list)) { - struct nfs_write_data *data = list_entry(dreq->list.next, struct nfs_write_data, pages); + while (!list_empty(&dreq->rewrite_list)) { + struct nfs_write_data *data = list_entry(dreq->rewrite_list.next, struct nfs_write_data, pages); list_del(&data->pages); + nfs_direct_release_pages(data->pagevec, data->npages); nfs_writedata_release(data); } } @@ -428,14 +399,51 @@ static void nfs_direct_free_writedata(struct nfs_direct_req *dreq) #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) { - struct list_head *pos; + struct inode *inode = dreq->inode; + struct list_head *p; + struct nfs_write_data *data; - list_splice_init(&dreq->rewrite_list, &dreq->list); - list_for_each(pos, &dreq->list) - dreq->outstanding++; dreq->count = 0; + get_dreq(dreq); + + list_for_each(p, &dreq->rewrite_list) { + data = list_entry(p, struct nfs_write_data, pages); + + get_dreq(dreq); + + /* + * Reset data->res. + */ + nfs_fattr_init(&data->fattr); + data->res.count = data->args.count; + memset(&data->verf, 0, sizeof(data->verf)); + + /* + * Reuse data->task; data->args should not have changed + * since the original request was sent. + */ + rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC, + &nfs_write_direct_ops, data); + NFS_PROTO(inode)->write_setup(data, FLUSH_STABLE); + + data->task.tk_priority = RPC_PRIORITY_NORMAL; + data->task.tk_cookie = (unsigned long) inode; + + /* + * We're called via an RPC callback, so BKL is already held. + */ + rpc_execute(&data->task); + + dprintk("NFS: %5u rescheduled direct write call (req %s/%Ld, %u bytes @ offset %Lu)\n", + data->task.tk_pid, + inode->i_sb->s_id, + (long long)NFS_FILEID(inode), + data->args.count, + (unsigned long long)data->args.offset); + } - nfs_direct_write_schedule(dreq, FLUSH_STABLE); + if (put_dreq(dreq)) + nfs_direct_write_complete(dreq, inode); } static void nfs_direct_commit_result(struct rpc_task *task, void *calldata) @@ -472,8 +480,8 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) data->cred = dreq->ctx->cred; data->args.fh = NFS_FH(data->inode); - data->args.offset = dreq->pos; - data->args.count = dreq->user_count; + data->args.offset = 0; + data->args.count = 0; data->res.count = 0; data->res.fattr = &data->fattr; data->res.verf = &data->verf; @@ -535,47 +543,6 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode } #endif -static struct nfs_direct_req *nfs_direct_write_alloc(size_t nbytes, size_t wsize) -{ - struct list_head *list; - struct nfs_direct_req *dreq; - unsigned int wpages = (wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - - dreq = nfs_direct_req_alloc(); - if (!dreq) - return NULL; - - list = &dreq->list; - for(;;) { - struct nfs_write_data *data = nfs_writedata_alloc(wpages); - - if (unlikely(!data)) { - while (!list_empty(list)) { - data = list_entry(list->next, - struct nfs_write_data, pages); - list_del(&data->pages); - nfs_writedata_free(data); - } - kref_put(&dreq->kref, nfs_direct_req_release); - return NULL; - } - - INIT_LIST_HEAD(&data->pages); - list_add(&data->pages, list); - - data->req = (struct nfs_page *) dreq; - dreq->outstanding++; - if (nbytes <= wsize) - break; - nbytes -= wsize; - } - - nfs_alloc_commit_data(dreq); - - kref_get(&dreq->kref); - return dreq; -} - static void nfs_direct_write_result(struct rpc_task *task, void *calldata) { struct nfs_write_data *data = calldata; @@ -605,8 +572,6 @@ static void nfs_direct_write_result(struct rpc_task *task, void *calldata) } } } - /* In case we have to resend */ - data->args.stable = NFS_FILE_SYNC; spin_unlock(&dreq->lock); } @@ -620,14 +585,8 @@ static void nfs_direct_write_release(void *calldata) struct nfs_write_data *data = calldata; struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; - spin_lock(&dreq->lock); - if (--dreq->outstanding) { - spin_unlock(&dreq->lock); - return; - } - spin_unlock(&dreq->lock); - - nfs_direct_write_complete(dreq, data->inode); + if (put_dreq(dreq)) + nfs_direct_write_complete(dreq, data->inode); } static const struct rpc_call_ops nfs_write_direct_ops = { @@ -636,41 +595,62 @@ static const struct rpc_call_ops nfs_write_direct_ops = { }; /* - * For each nfs_write_data struct that was allocated on the list, dispatch - * an NFS WRITE operation + * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE + * operation. If nfs_writedata_alloc() or get_user_pages() fails, + * bail and stop sending more writes. Write length accounting is + * handled automatically by nfs_direct_write_result(). Otherwise, if + * no requests have been sent, just return an error. */ -static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync) +static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos, int sync) { struct nfs_open_context *ctx = dreq->ctx; struct inode *inode = ctx->dentry->d_inode; - struct list_head *list = &dreq->list; - struct page **pages = dreq->pages; - size_t count = dreq->user_count; - loff_t pos = dreq->pos; size_t wsize = NFS_SERVER(inode)->wsize; - unsigned int curpage, pgbase; + unsigned int wpages = nfs_max_pages(wsize); + unsigned int pgbase; + int result; + ssize_t started = 0; - curpage = 0; - pgbase = dreq->user_addr & ~PAGE_MASK; + get_dreq(dreq); + + pgbase = user_addr & ~PAGE_MASK; do { struct nfs_write_data *data; size_t bytes; + result = -ENOMEM; + data = nfs_writedata_alloc(wpages); + if (unlikely(!data)) + break; + bytes = wsize; if (count < wsize) bytes = count; - BUG_ON(list_empty(list)); - data = list_entry(list->next, struct nfs_write_data, pages); + data->npages = nfs_direct_count_pages(user_addr, bytes); + down_read(¤t->mm->mmap_sem); + result = get_user_pages(current, current->mm, user_addr, + data->npages, 0, 0, data->pagevec, NULL); + up_read(¤t->mm->mmap_sem); + if (unlikely(result < data->npages)) { + if (result > 0) + nfs_direct_release_pages(data->pagevec, result); + nfs_writedata_release(data); + break; + } + + get_dreq(dreq); + list_move_tail(&data->pages, &dreq->rewrite_list); + data->req = (struct nfs_page *) dreq; data->inode = inode; data->cred = ctx->cred; data->args.fh = NFS_FH(inode); data->args.context = ctx; data->args.offset = pos; data->args.pgbase = pgbase; - data->args.pages = &pages[curpage]; + data->args.pages = data->pagevec; data->args.count = bytes; data->res.fattr = &data->fattr; data->res.count = bytes; @@ -694,19 +674,26 @@ static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync) bytes, (unsigned long long)data->args.offset); + started += bytes; + user_addr += bytes; pos += bytes; pgbase += bytes; - curpage += pgbase >> PAGE_SHIFT; pgbase &= ~PAGE_MASK; count -= bytes; } while (count != 0); - BUG_ON(!list_empty(list)); + + if (put_dreq(dreq)) + nfs_direct_write_complete(dreq, inode); + + if (started) + return 0; + return result < 0 ? (ssize_t) result : -EFAULT; } -static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos, struct page **pages, int nr_pages) +static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos) { - ssize_t result; + ssize_t result = 0; sigset_t oldset; struct inode *inode = iocb->ki_filp->f_mapping->host; struct rpc_clnt *clnt = NFS_CLIENT(inode); @@ -714,17 +701,14 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz size_t wsize = NFS_SERVER(inode)->wsize; int sync = 0; - dreq = nfs_direct_write_alloc(count, wsize); + dreq = nfs_direct_req_alloc(); if (!dreq) return -ENOMEM; + nfs_alloc_commit_data(dreq); + if (dreq->commit_data == NULL || count < wsize) sync = FLUSH_STABLE; - dreq->user_addr = user_addr; - dreq->user_count = count; - dreq->pos = pos; - dreq->pages = pages; - dreq->npages = nr_pages; dreq->inode = inode; dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data); if (!is_sync_kiocb(iocb)) @@ -735,8 +719,9 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz nfs_begin_data_update(inode); rpc_clnt_sigmask(clnt, &oldset); - nfs_direct_write_schedule(dreq, sync); - result = nfs_direct_wait(dreq); + result = nfs_direct_write_schedule(dreq, user_addr, count, pos, sync); + if (!result) + result = nfs_direct_wait(dreq); rpc_clnt_sigunmask(clnt, &oldset); return result; @@ -766,8 +751,6 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos) { ssize_t retval = -EINVAL; - int page_count; - struct page **pages; struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; @@ -789,14 +772,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, if (retval) goto out; - retval = nfs_get_user_pages(READ, (unsigned long) buf, - count, &pages); - if (retval < 0) - goto out; - page_count = retval; - - retval = nfs_direct_read(iocb, (unsigned long) buf, count, pos, - pages, page_count); + retval = nfs_direct_read(iocb, (unsigned long) buf, count, pos); if (retval > 0) iocb->ki_pos = pos + retval; @@ -832,8 +808,6 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos) { ssize_t retval; - int page_count; - struct page **pages; struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; @@ -861,14 +835,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t if (retval) goto out; - retval = nfs_get_user_pages(WRITE, (unsigned long) buf, - count, &pages); - if (retval < 0) - goto out; - page_count = retval; - - retval = nfs_direct_write(iocb, (unsigned long) buf, count, - pos, pages, page_count); + retval = nfs_direct_write(iocb, (unsigned long) buf, count, pos); /* * XXX: nfs_end_data_update() already ensures this file's diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 7c7320fa51aa..2d3fb6416d91 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -729,6 +729,7 @@ struct nfs_read_data { struct list_head pages; /* Coalesced read requests */ struct nfs_page *req; /* multi ops per nfs_page */ struct page **pagevec; + unsigned int npages; /* active pages in pagevec */ struct nfs_readargs args; struct nfs_readres res; #ifdef CONFIG_NFS_V4 @@ -747,6 +748,7 @@ struct nfs_write_data { struct list_head pages; /* Coalesced requests we wish to flush */ struct nfs_page *req; /* multi ops per nfs_page */ struct page **pagevec; + unsigned int npages; /* active pages in pagevec */ struct nfs_writeargs args; /* argument struct */ struct nfs_writeres res; /* result struct */ #ifdef CONFIG_NFS_V4 From 20e652761cbf6983fd067aef2f0242c262057737 Mon Sep 17 00:00:00 2001 From: Andrew Victor Date: Wed, 5 Jul 2006 14:31:13 +0100 Subject: [PATCH 02/40] [ARM] 3710/1: AT91 Serial: Use GPIO API Patch from Andrew Victor The AT91RM9200 errata work-around should be using the GPIO API and not accessing the PIO registers directly. Signed-off-by: Andrew Victor Signed-off-by: Russell King --- drivers/serial/at91_serial.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/serial/at91_serial.c b/drivers/serial/at91_serial.c index a7d664383dae..54c6b2adf7b7 100644 --- a/drivers/serial/at91_serial.c +++ b/drivers/serial/at91_serial.c @@ -41,6 +41,7 @@ #include #include #include +#include #if defined(CONFIG_SERIAL_AT91_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ) #define SUPPORT_SYSRQ @@ -140,9 +141,9 @@ static void at91_set_mctrl(struct uart_port *port, u_int mctrl) */ if (port->mapbase == AT91_BASE_US0) { if (mctrl & TIOCM_RTS) - at91_sys_write(AT91_PIOA + PIO_CODR, AT91_PA21_RTS0); + at91_set_gpio_value(AT91_PIN_PA21, 0); else - at91_sys_write(AT91_PIOA + PIO_SODR, AT91_PA21_RTS0); + at91_set_gpio_value(AT91_PIN_PA21, 1); } } From 5904a7f9167cdeb95569799e0be652c2ce6d3298 Mon Sep 17 00:00:00 2001 From: Vitaly Wool Date: Wed, 5 Jul 2006 14:47:20 +0100 Subject: [PATCH 03/40] [ARM] 3709/1: pnx4008: convert to generic irq subsystem Patch from Vitaly Wool Convert pnx4008 chip support to use generic irq subsystem Signed-off-by: Vitaly Wool Signed-off-by: Russell King --- arch/arm/mach-pnx4008/core.c | 2 -- arch/arm/mach-pnx4008/dma.c | 1 - arch/arm/mach-pnx4008/irq.c | 22 ++++++++++------------ arch/arm/mach-pnx4008/time.c | 8 +++----- 4 files changed, 13 insertions(+), 20 deletions(-) diff --git a/arch/arm/mach-pnx4008/core.c b/arch/arm/mach-pnx4008/core.c index ba91daad64fb..3d73c1e93752 100644 --- a/arch/arm/mach-pnx4008/core.c +++ b/arch/arm/mach-pnx4008/core.c @@ -27,7 +27,6 @@ #include #include -#include #include #include #include @@ -36,7 +35,6 @@ #include #include -#include #include #include diff --git a/arch/arm/mach-pnx4008/dma.c b/arch/arm/mach-pnx4008/dma.c index 981aa9dcdede..ec01574f88ac 100644 --- a/arch/arm/mach-pnx4008/dma.c +++ b/arch/arm/mach-pnx4008/dma.c @@ -23,7 +23,6 @@ #include #include -#include #include #include #include diff --git a/arch/arm/mach-pnx4008/irq.c b/arch/arm/mach-pnx4008/irq.c index 9b0a8e084e99..3a4bcf3d91fa 100644 --- a/arch/arm/mach-pnx4008/irq.c +++ b/arch/arm/mach-pnx4008/irq.c @@ -22,8 +22,8 @@ #include #include #include +#include #include -#include #include #include #include @@ -96,26 +96,24 @@ void __init pnx4008_init_irq(void) { unsigned int i; - /* configure and enable IRQ 0,1,30,31 (cascade interrupts) mask all others */ + /* configure IRQ's */ + for (i = 0; i < NR_IRQS; i++) { + set_irq_flags(i, IRQF_VALID); + set_irq_chip(i, &pnx4008_irq_chip); + pnx4008_set_irq_type(i, pnx4008_irq_type[i]); + } + + /* configure and enable IRQ 0,1,30,31 (cascade interrupts) */ pnx4008_set_irq_type(SUB1_IRQ_N, pnx4008_irq_type[SUB1_IRQ_N]); pnx4008_set_irq_type(SUB2_IRQ_N, pnx4008_irq_type[SUB2_IRQ_N]); pnx4008_set_irq_type(SUB1_FIQ_N, pnx4008_irq_type[SUB1_FIQ_N]); pnx4008_set_irq_type(SUB2_FIQ_N, pnx4008_irq_type[SUB2_FIQ_N]); + /* mask all others */ __raw_writel((1 << SUB2_FIQ_N) | (1 << SUB1_FIQ_N) | (1 << SUB2_IRQ_N) | (1 << SUB1_IRQ_N), INTC_ER(MAIN_BASE_INT)); __raw_writel(0, INTC_ER(SIC1_BASE_INT)); __raw_writel(0, INTC_ER(SIC2_BASE_INT)); - - /* configure all other IRQ's */ - for (i = 0; i < NR_IRQS; i++) { - if (i == SUB2_FIQ_N || i == SUB1_FIQ_N || - i == SUB2_IRQ_N || i == SUB1_IRQ_N) - continue; - set_irq_flags(i, IRQF_VALID); - set_irq_chip(i, &pnx4008_irq_chip); - pnx4008_set_irq_type(i, pnx4008_irq_type[i]); - } } diff --git a/arch/arm/mach-pnx4008/time.c b/arch/arm/mach-pnx4008/time.c index 888bf6cfba8a..756228ddd035 100644 --- a/arch/arm/mach-pnx4008/time.c +++ b/arch/arm/mach-pnx4008/time.c @@ -20,17 +20,15 @@ #include #include #include +#include +#include +#include #include #include #include #include -#include -#include #include - -#include -#include #include /*! Note: all timers are UPCOUNTING */ From ba854e18413d2d827f050984edeb8286c3335895 Mon Sep 17 00:00:00 2001 From: Andrew Victor Date: Wed, 5 Jul 2006 17:22:52 +0100 Subject: [PATCH 04/40] [ARM] 3711/1: AT91 timer update Patch from Andrew Victor The AIC interrupt controller is the same on the Atmel AT91RM9200, AT91SAM9261 and AT91SAM9260 processors. This patch removes any RM9200-specific naming from the IRQ driver, and moves the AT91RM9200's default IRQ priority table into at91rm9200.c. Signed-off-by: Andrew Victor Signed-off-by: Russell King --- arch/arm/mach-at91rm9200/at91rm9200.c | 45 +++++++++++++++++ arch/arm/mach-at91rm9200/generic.h | 8 ++- arch/arm/mach-at91rm9200/irq.c | 70 ++++++-------------------- include/asm-arm/arch-at91rm9200/irqs.h | 8 --- 4 files changed, 66 insertions(+), 65 deletions(-) diff --git a/arch/arm/mach-at91rm9200/at91rm9200.c b/arch/arm/mach-at91rm9200/at91rm9200.c index 7e1d072bdd80..0985b1c42c7c 100644 --- a/arch/arm/mach-at91rm9200/at91rm9200.c +++ b/arch/arm/mach-at91rm9200/at91rm9200.c @@ -107,3 +107,48 @@ void __init at91rm9200_map_io(void) iotable_init(at91rm9200_io_desc, ARRAY_SIZE(at91rm9200_io_desc)); } +/* + * The default interrupt priority levels (0 = lowest, 7 = highest). + */ +static unsigned int at91rm9200_default_irq_priority[NR_AIC_IRQS] __initdata = { + 7, /* Advanced Interrupt Controller (FIQ) */ + 7, /* System Peripherals */ + 0, /* Parallel IO Controller A */ + 0, /* Parallel IO Controller B */ + 0, /* Parallel IO Controller C */ + 0, /* Parallel IO Controller D */ + 6, /* USART 0 */ + 6, /* USART 1 */ + 6, /* USART 2 */ + 6, /* USART 3 */ + 0, /* Multimedia Card Interface */ + 4, /* USB Device Port */ + 0, /* Two-Wire Interface */ + 6, /* Serial Peripheral Interface */ + 5, /* Serial Synchronous Controller 0 */ + 5, /* Serial Synchronous Controller 1 */ + 5, /* Serial Synchronous Controller 2 */ + 0, /* Timer Counter 0 */ + 0, /* Timer Counter 1 */ + 0, /* Timer Counter 2 */ + 0, /* Timer Counter 3 */ + 0, /* Timer Counter 4 */ + 0, /* Timer Counter 5 */ + 3, /* USB Host port */ + 3, /* Ethernet MAC */ + 0, /* Advanced Interrupt Controller (IRQ0) */ + 0, /* Advanced Interrupt Controller (IRQ1) */ + 0, /* Advanced Interrupt Controller (IRQ2) */ + 0, /* Advanced Interrupt Controller (IRQ3) */ + 0, /* Advanced Interrupt Controller (IRQ4) */ + 0, /* Advanced Interrupt Controller (IRQ5) */ + 0 /* Advanced Interrupt Controller (IRQ6) */ +}; + +void __init at91rm9200_init_irq(unsigned int priority[NR_AIC_IRQS]) +{ + if (!priority) + priority = at91rm9200_default_irq_priority; + + at91_aic_init(priority); +} diff --git a/arch/arm/mach-at91rm9200/generic.h b/arch/arm/mach-at91rm9200/generic.h index f0d969d7d874..7979d8ab7e07 100644 --- a/arch/arm/mach-at91rm9200/generic.h +++ b/arch/arm/mach-at91rm9200/generic.h @@ -8,13 +8,19 @@ * published by the Free Software Foundation. */ -void at91_gpio_irq_setup(unsigned banks); + /* Interrupts */ +extern void __init at91rm9200_init_irq(unsigned int priority[]); +extern void __init at91_aic_init(unsigned int priority[]); +extern void __init at91_gpio_irq_setup(unsigned banks); + /* Timer */ struct sys_timer; extern struct sys_timer at91rm9200_timer; + /* Memory Map */ extern void __init at91rm9200_map_io(void); + /* Clocks */ extern int __init at91_clock_init(unsigned long main_clock); struct device; extern void __init at91_clock_associate(const char *id, struct device *dev, const char *func); diff --git a/arch/arm/mach-at91rm9200/irq.c b/arch/arm/mach-at91rm9200/irq.c index dcd560dbcfb7..9b0911320417 100644 --- a/arch/arm/mach-at91rm9200/irq.c +++ b/arch/arm/mach-at91rm9200/irq.c @@ -36,58 +36,20 @@ #include "generic.h" -/* - * The default interrupt priority levels (0 = lowest, 7 = highest). - */ -static unsigned int at91rm9200_default_irq_priority[NR_AIC_IRQS] __initdata = { - 7, /* Advanced Interrupt Controller */ - 7, /* System Peripheral */ - 0, /* Parallel IO Controller A */ - 0, /* Parallel IO Controller B */ - 0, /* Parallel IO Controller C */ - 0, /* Parallel IO Controller D */ - 6, /* USART 0 */ - 6, /* USART 1 */ - 6, /* USART 2 */ - 6, /* USART 3 */ - 0, /* Multimedia Card Interface */ - 4, /* USB Device Port */ - 0, /* Two-Wire Interface */ - 6, /* Serial Peripheral Interface */ - 5, /* Serial Synchronous Controller */ - 5, /* Serial Synchronous Controller */ - 5, /* Serial Synchronous Controller */ - 0, /* Timer Counter 0 */ - 0, /* Timer Counter 1 */ - 0, /* Timer Counter 2 */ - 0, /* Timer Counter 3 */ - 0, /* Timer Counter 4 */ - 0, /* Timer Counter 5 */ - 3, /* USB Host port */ - 3, /* Ethernet MAC */ - 0, /* Advanced Interrupt Controller */ - 0, /* Advanced Interrupt Controller */ - 0, /* Advanced Interrupt Controller */ - 0, /* Advanced Interrupt Controller */ - 0, /* Advanced Interrupt Controller */ - 0, /* Advanced Interrupt Controller */ - 0 /* Advanced Interrupt Controller */ -}; - -static void at91rm9200_mask_irq(unsigned int irq) +static void at91_aic_mask_irq(unsigned int irq) { /* Disable interrupt on AIC */ at91_sys_write(AT91_AIC_IDCR, 1 << irq); } -static void at91rm9200_unmask_irq(unsigned int irq) +static void at91_aic_unmask_irq(unsigned int irq) { /* Enable interrupt on AIC */ at91_sys_write(AT91_AIC_IECR, 1 << irq); } -static int at91rm9200_irq_type(unsigned irq, unsigned type) +static int at91_aic_set_type(unsigned irq, unsigned type) { unsigned int smr, srctype; @@ -122,7 +84,7 @@ static int at91rm9200_irq_type(unsigned irq, unsigned type) static u32 wakeups; static u32 backups; -static int at91rm9200_irq_set_wake(unsigned irq, unsigned value) +static int at91_aic_set_wake(unsigned irq, unsigned value) { if (unlikely(irq >= 32)) return -EINVAL; @@ -149,28 +111,24 @@ void at91_irq_resume(void) } #else -#define at91rm9200_irq_set_wake NULL +#define at91_aic_set_wake NULL #endif -static struct irqchip at91rm9200_irq_chip = { - .ack = at91rm9200_mask_irq, - .mask = at91rm9200_mask_irq, - .unmask = at91rm9200_unmask_irq, - .set_type = at91rm9200_irq_type, - .set_wake = at91rm9200_irq_set_wake, +static struct irqchip at91_aic_chip = { + .ack = at91_aic_mask_irq, + .mask = at91_aic_mask_irq, + .unmask = at91_aic_unmask_irq, + .set_type = at91_aic_set_type, + .set_wake = at91_aic_set_wake, }; /* * Initialize the AIC interrupt controller. */ -void __init at91rm9200_init_irq(unsigned int priority[NR_AIC_IRQS]) +void __init at91_aic_init(unsigned int priority[NR_AIC_IRQS]) { unsigned int i; - /* No priority list specified for this board -> use defaults */ - if (priority == NULL) - priority = at91rm9200_default_irq_priority; - /* * The IVR is used by macro get_irqnr_and_base to read and verify. * The irq number is NR_AIC_IRQS when a spurious interrupt has occurred. @@ -178,10 +136,10 @@ void __init at91rm9200_init_irq(unsigned int priority[NR_AIC_IRQS]) for (i = 0; i < NR_AIC_IRQS; i++) { /* Put irq number in Source Vector Register: */ at91_sys_write(AT91_AIC_SVR(i), i); - /* Store the Source Mode Register as defined in table above */ + /* Active Low interrupt, with the specified priority */ at91_sys_write(AT91_AIC_SMR(i), AT91_AIC_SRCTYPE_LOW | priority[i]); - set_irq_chip(i, &at91rm9200_irq_chip); + set_irq_chip(i, &at91_aic_chip); set_irq_handler(i, do_level_IRQ); set_irq_flags(i, IRQF_VALID | IRQF_PROBE); diff --git a/include/asm-arm/arch-at91rm9200/irqs.h b/include/asm-arm/arch-at91rm9200/irqs.h index 2dc93b174a8f..f63842c2c093 100644 --- a/include/asm-arm/arch-at91rm9200/irqs.h +++ b/include/asm-arm/arch-at91rm9200/irqs.h @@ -39,12 +39,4 @@ */ #define NR_IRQS (NR_AIC_IRQS + (4 * 32)) - -#ifndef __ASSEMBLY__ -/* - * Initialize the IRQ controller. - */ -extern void at91rm9200_init_irq(unsigned int priority[]); -#endif - #endif From ba1826e5eced176cc9ec0033ad8ee0f1cd5ad2e4 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 5 Jul 2006 15:36:15 +1000 Subject: [PATCH 05/40] [PATCH] powerpc: Fix loss of interrupts with MPIC With the new interrupt rework, an interrupt "host" map() callback can be called after the interrupt is already active. It's called again for an already mapped interrupt to allow changing the trigger setup, and currently this is not guarded with a test of wether the interrupt is requested or not. I plan to change some of this logic to be a bit less lenient against random reconfiguring of live interrupts but just not yet. The ported MPIC driver has a bug where when that happens, it will mask the interrupt. This changes it to preserve the previous masking of the interrupt instead. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Linus Torvalds --- arch/powerpc/sysdev/mpic.c | 39 ++++++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index 7d31d7cc392d..9cecebaa0360 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -405,20 +405,22 @@ static void mpic_unmask_irq(unsigned int irq) unsigned int loops = 100000; struct mpic *mpic = mpic_from_irq(irq); unsigned int src = mpic_irq_to_hw(irq); + unsigned long flags; DBG("%p: %s: enable_irq: %d (src %d)\n", mpic, mpic->name, irq, src); + spin_lock_irqsave(&mpic_lock, flags); mpic_irq_write(src, MPIC_IRQ_VECTOR_PRI, mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) & ~MPIC_VECPRI_MASK); - /* make sure mask gets to controller before we return to user */ do { if (!loops--) { printk(KERN_ERR "mpic_enable_irq timeout\n"); break; } - } while(mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) & MPIC_VECPRI_MASK); + } while(mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) & MPIC_VECPRI_MASK); + spin_unlock_irqrestore(&mpic_lock, flags); } static void mpic_mask_irq(unsigned int irq) @@ -426,9 +428,11 @@ static void mpic_mask_irq(unsigned int irq) unsigned int loops = 100000; struct mpic *mpic = mpic_from_irq(irq); unsigned int src = mpic_irq_to_hw(irq); + unsigned long flags; DBG("%s: disable_irq: %d (src %d)\n", mpic->name, irq, src); + spin_lock_irqsave(&mpic_lock, flags); mpic_irq_write(src, MPIC_IRQ_VECTOR_PRI, mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) | MPIC_VECPRI_MASK); @@ -440,6 +444,7 @@ static void mpic_mask_irq(unsigned int irq) break; } } while(!(mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) & MPIC_VECPRI_MASK)); + spin_unlock_irqrestore(&mpic_lock, flags); } static void mpic_end_irq(unsigned int irq) @@ -624,9 +629,10 @@ static int mpic_host_map(struct irq_host *h, unsigned int virq, struct irq_desc *desc = get_irq_desc(virq); struct irq_chip *chip; struct mpic *mpic = h->host_data; - unsigned int vecpri = MPIC_VECPRI_SENSE_LEVEL | + u32 v, vecpri = MPIC_VECPRI_SENSE_LEVEL | MPIC_VECPRI_POLARITY_NEGATIVE; int level; + unsigned long iflags; pr_debug("mpic: map virq %d, hwirq 0x%lx, flags: 0x%x\n", virq, hw, flags); @@ -668,11 +674,21 @@ static int mpic_host_map(struct irq_host *h, unsigned int virq, } #endif - /* Reconfigure irq */ - vecpri |= MPIC_VECPRI_MASK | hw | (8 << MPIC_VECPRI_PRIORITY_SHIFT); - mpic_irq_write(hw, MPIC_IRQ_VECTOR_PRI, vecpri); - - pr_debug("mpic: mapping as IRQ\n"); + /* Reconfigure irq. We must preserve the mask bit as we can be called + * while the interrupt is still active (This may change in the future + * but for now, it is the case). + */ + spin_lock_irqsave(&mpic_lock, iflags); + v = mpic_irq_read(hw, MPIC_IRQ_VECTOR_PRI); + vecpri = (v & + ~(MPIC_VECPRI_POLARITY_MASK | MPIC_VECPRI_SENSE_MASK)) | + vecpri; + if (vecpri != v) + mpic_irq_write(hw, MPIC_IRQ_VECTOR_PRI, vecpri); + spin_unlock_irqrestore(&mpic_lock, iflags); + + pr_debug("mpic: mapping as IRQ, vecpri = 0x%08x (was 0x%08x)\n", + vecpri, v); set_irq_chip_data(virq, mpic); set_irq_chip_and_handler(virq, chip, handle_fasteoi_irq); @@ -904,8 +920,8 @@ void __init mpic_init(struct mpic *mpic) /* do senses munging */ if (mpic->senses && i < mpic->senses_count) - vecpri = mpic_flags_to_vecpri(mpic->senses[i], - &level); + vecpri |= mpic_flags_to_vecpri(mpic->senses[i], + &level); else vecpri |= MPIC_VECPRI_SENSE_LEVEL; @@ -955,14 +971,17 @@ void __init mpic_set_clk_ratio(struct mpic *mpic, u32 clock_ratio) void __init mpic_set_serial_int(struct mpic *mpic, int enable) { + unsigned long flags; u32 v; + spin_lock_irqsave(&mpic_lock, flags); v = mpic_read(mpic->gregs, MPIC_GREG_GLOBAL_CONF_1); if (enable) v |= MPIC_GREG_GLOBAL_CONF_1_SIE; else v &= ~MPIC_GREG_GLOBAL_CONF_1_SIE; mpic_write(mpic->gregs, MPIC_GREG_GLOBAL_CONF_1, v); + spin_unlock_irqrestore(&mpic_lock, flags); } void mpic_irq_set_priority(unsigned int irq, unsigned int pri) From a8e0c51c71fc973b400f6502382063553b82af5a Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 5 Jul 2006 11:24:26 +0100 Subject: [PATCH 06/40] [PATCH] powerpc: implement missing jiffies64_to_cputime64() asm-powerpc/cputime.h doesn't declare jiffies64_to_cputime64() or cputime64_sub(), and due to CONFIG_VIRT_CPU_ACCOUNTING it's not picking up the definition from asm-generic like x86-64 & friends do. Cc: Dave Jones Cc: Andrew Morton Signed-off-by: David Woodhouse Signed-off-by: Linus Torvalds --- include/asm-powerpc/cputime.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/include/asm-powerpc/cputime.h b/include/asm-powerpc/cputime.h index a21185d47883..310804485208 100644 --- a/include/asm-powerpc/cputime.h +++ b/include/asm-powerpc/cputime.h @@ -43,6 +43,7 @@ typedef u64 cputime64_t; #define cputime64_zero ((cputime64_t)0) #define cputime64_add(__a, __b) ((__a) + (__b)) +#define cputime64_sub(__a, __b) ((__a) - (__b)) #define cputime_to_cputime64(__ct) (__ct) #ifdef __KERNEL__ @@ -74,6 +75,23 @@ static inline cputime_t jiffies_to_cputime(const unsigned long jif) return ct; } +static inline cputime64_t jiffies64_to_cputime64(const u64 jif) +{ + cputime_t ct; + u64 sec; + + /* have to be a little careful about overflow */ + ct = jif % HZ; + sec = jif / HZ; + if (ct) { + ct *= tb_ticks_per_sec; + do_div(ct, HZ); + } + if (sec) + ct += (cputime_t) sec * tb_ticks_per_sec; + return ct; +} + static inline u64 cputime64_to_jiffies64(const cputime_t ct) { return mulhdu(ct, __cputime_jiffies_factor); From e340221acda6bc0bf05a0ff6e6114902c4307670 Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Wed, 5 Jul 2006 18:57:48 +0200 Subject: [PATCH 07/40] [PATCH] Makefile typo Fix a typo in the toplevel makefile. Signed-off-by: Andreas Schwab Signed-off-by: Linus Torvalds --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 11a850cffd3d..82f76a96cfe6 100644 --- a/Makefile +++ b/Makefile @@ -528,7 +528,7 @@ export MODLIB ifdef INSTALL_MOD_STRIP ifeq ($(INSTALL_MOD_STRIP),1) -mod_strip_cmd = $STRIP) --strip-debug +mod_strip_cmd = $(STRIP) --strip-debug else mod_strip_cmd = $(STRIP) $(INSTALL_MOD_STRIP) endif # INSTALL_MOD_STRIP=1 From f475ae957db66650db66916c62604ac27409d884 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 29 Jun 2006 16:38:32 -0400 Subject: [PATCH 08/40] VFS: Allow caller to determine if BSD or posix locks were actually freed Change posix_lock_file_conf(), and flock_lock_file() so that if called with an F_UNLCK argument, and the FL_EXISTS flag they will indicate whether or not any locks were actually freed by returning 0 or -ENOENT. Signed-off-by: Trond Myklebust --- fs/locks.c | 18 ++++++++++++++++-- include/linux/fs.h | 1 + 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/fs/locks.c b/fs/locks.c index 1ad29c9b6252..50cb0a2b74d9 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -725,6 +725,10 @@ static int posix_locks_deadlock(struct file_lock *caller_fl, /* Try to create a FLOCK lock on filp. We always insert new FLOCK locks * at the head of the list, but that's secret knowledge known only to * flock_lock_file and posix_lock_file. + * + * Note that if called with an FL_EXISTS argument, the caller may determine + * whether or not a lock was successfully freed by testing the return + * value for -ENOENT. */ static int flock_lock_file(struct file *filp, struct file_lock *request) { @@ -750,8 +754,11 @@ static int flock_lock_file(struct file *filp, struct file_lock *request) break; } - if (request->fl_type == F_UNLCK) + if (request->fl_type == F_UNLCK) { + if ((request->fl_flags & FL_EXISTS) && !found) + error = -ENOENT; goto out; + } error = -ENOMEM; new_fl = locks_alloc_lock(); @@ -948,8 +955,11 @@ static int __posix_lock_file_conf(struct inode *inode, struct file_lock *request error = 0; if (!added) { - if (request->fl_type == F_UNLCK) + if (request->fl_type == F_UNLCK) { + if (request->fl_flags & FL_EXISTS) + error = -ENOENT; goto out; + } if (!new_fl) { error = -ENOLCK; @@ -996,6 +1006,10 @@ static int __posix_lock_file_conf(struct inode *inode, struct file_lock *request * Add a POSIX style lock to a file. * We merge adjacent & overlapping locks whenever possible. * POSIX locks are sorted by owner task, then by starting address + * + * Note that if called with an FL_EXISTS argument, the caller may determine + * whether or not a lock was successfully freed by testing the return + * value for -ENOENT. */ int posix_lock_file(struct file *filp, struct file_lock *fl) { diff --git a/include/linux/fs.h b/include/linux/fs.h index 134b32068246..43aef9b230fd 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -716,6 +716,7 @@ extern spinlock_t files_lock; #define FL_POSIX 1 #define FL_FLOCK 2 #define FL_ACCESS 8 /* not trying to lock, just looking */ +#define FL_EXISTS 16 /* when unlocking, test for existence */ #define FL_LEASE 32 /* lease held on this file */ #define FL_CLOSE 64 /* unlock on close */ #define FL_SLEEP 128 /* A blocking lock */ From 9b07357490e5c7a1c3c2b6f4679d7ee4b4185ecd Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 29 Jun 2006 16:38:34 -0400 Subject: [PATCH 09/40] NLM,NFSv4: Don't put UNLOCK requests on the wire unless we hold a lock Use the new behaviour of {flock,posix}_file_lock(F_UNLCK) to determine if we held a lock, and only send the RPC request to the server if this was the case. Signed-off-by: Trond Myklebust --- fs/lockd/clntproc.c | 18 ++++++++++-------- fs/nfs/nfs4proc.c | 31 ++++++++++++------------------- 2 files changed, 22 insertions(+), 27 deletions(-) diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 5980c45998cc..24c691f5480e 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -454,7 +454,7 @@ static void nlmclnt_locks_init_private(struct file_lock *fl, struct nlm_host *ho fl->fl_ops = &nlmclnt_lock_ops; } -static void do_vfs_lock(struct file_lock *fl) +static int do_vfs_lock(struct file_lock *fl) { int res = 0; switch (fl->fl_flags & (FL_POSIX|FL_FLOCK)) { @@ -467,9 +467,7 @@ static void do_vfs_lock(struct file_lock *fl) default: BUG(); } - if (res < 0) - printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", - __FUNCTION__); + return res; } /* @@ -541,7 +539,8 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl) } fl->fl_flags |= FL_SLEEP; /* Ensure the resulting lock will get added to granted list */ - do_vfs_lock(fl); + if (do_vfs_lock(fl) < 0) + printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __FUNCTION__); up_read(&host->h_rwsem); } status = nlm_stat_to_errno(resp->status); @@ -606,15 +605,19 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl) { struct nlm_host *host = req->a_host; struct nlm_res *resp = &req->a_res; - int status; + int status = 0; /* * Note: the server is supposed to either grant us the unlock * request, or to deny it with NLM_LCK_DENIED_GRACE_PERIOD. In either * case, we want to unlock. */ + fl->fl_flags |= FL_EXISTS; down_read(&host->h_rwsem); - do_vfs_lock(fl); + if (do_vfs_lock(fl) == -ENOENT) { + up_read(&host->h_rwsem); + goto out; + } up_read(&host->h_rwsem); if (req->a_flags & RPC_TASK_ASYNC) @@ -624,7 +627,6 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl) if (status < 0) goto out; - status = 0; if (resp->status == NLM_LCK_GRANTED) goto out; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index b4916b092194..b8c63757f039 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3144,9 +3144,6 @@ static int do_vfs_lock(struct file *file, struct file_lock *fl) default: BUG(); } - if (res < 0) - printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", - __FUNCTION__); return res; } @@ -3258,8 +3255,6 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl, return ERR_PTR(-ENOMEM); } - /* Unlock _before_ we do the RPC call */ - do_vfs_lock(fl->fl_file, fl); return rpc_run_task(NFS_CLIENT(lsp->ls_state->inode), RPC_TASK_ASYNC, &nfs4_locku_ops, data); } @@ -3270,30 +3265,28 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock * struct rpc_task *task; int status = 0; - /* Is this a delegated lock? */ - if (test_bit(NFS_DELEGATED_STATE, &state->flags)) - goto out_unlock; - /* Is this open_owner holding any locks on the server? */ - if (test_bit(LK_STATE_IN_USE, &state->flags) == 0) - goto out_unlock; - status = nfs4_set_lock_state(state, request); + /* Unlock _before_ we do the RPC call */ + request->fl_flags |= FL_EXISTS; + if (do_vfs_lock(request->fl_file, request) == -ENOENT) + goto out; if (status != 0) - goto out_unlock; + goto out; + /* Is this a delegated lock? */ + if (test_bit(NFS_DELEGATED_STATE, &state->flags)) + goto out; lsp = request->fl_u.nfs4_fl.owner; - status = -ENOMEM; seqid = nfs_alloc_seqid(&lsp->ls_seqid); + status = -ENOMEM; if (seqid == NULL) - goto out_unlock; + goto out; task = nfs4_do_unlck(request, request->fl_file->private_data, lsp, seqid); status = PTR_ERR(task); if (IS_ERR(task)) - goto out_unlock; + goto out; status = nfs4_wait_for_completion_rpc_task(task); rpc_release_task(task); - return status; -out_unlock: - do_vfs_lock(request->fl_file, request); +out: return status; } From 42a2d13eee3c895d22e9d1a52b96d15ca49adabc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 29 Jun 2006 16:38:36 -0400 Subject: [PATCH 10/40] NFSv4: Ensure nfs4_lock_expired() caches delegated locks Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index b8c63757f039..8bdfe3ff7925 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3454,10 +3454,10 @@ static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request struct nfs4_exception exception = { }; int err; - /* Cache the lock if possible... */ - if (test_bit(NFS_DELEGATED_STATE, &state->flags)) - return 0; do { + /* Cache the lock if possible... */ + if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0) + return 0; err = _nfs4_do_setlk(state, F_SETLK, request, 1); if (err != -NFS4ERR_DELAY) break; @@ -3476,6 +3476,8 @@ static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request if (err != 0) return err; do { + if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0) + return 0; err = _nfs4_do_setlk(state, F_SETLK, request, 0); if (err != -NFS4ERR_DELAY) break; From f07f18dd6f29f11887b8d9cf7ecb736bf2f7dc62 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 29 Jun 2006 16:38:37 -0400 Subject: [PATCH 11/40] VFS: Add support for the FL_ACCESS flag to flock_lock_file() Signed-off-by: Trond Myklebust --- fs/locks.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/locks.c b/fs/locks.c index 50cb0a2b74d9..b0b41a64e10b 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -739,6 +739,8 @@ static int flock_lock_file(struct file *filp, struct file_lock *request) int found = 0; lock_kernel(); + if (request->fl_flags & FL_ACCESS) + goto find_conflict; for_each_lock(inode, before) { struct file_lock *fl = *before; if (IS_POSIX(fl)) @@ -771,6 +773,7 @@ static int flock_lock_file(struct file *filp, struct file_lock *request) if (found) cond_resched(); +find_conflict: for_each_lock(inode, before) { struct file_lock *fl = *before; if (IS_POSIX(fl)) @@ -784,6 +787,8 @@ static int flock_lock_file(struct file *filp, struct file_lock *request) locks_insert_block(fl, request); goto out; } + if (request->fl_flags & FL_ACCESS) + goto out; locks_copy_lock(new_fl, request); locks_insert_lock(&inode->i_flock, new_fl); new_fl = NULL; From 01c3b861cd77b28565a2d18c7caa3ce7f938e35c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 29 Jun 2006 16:38:39 -0400 Subject: [PATCH 12/40] NLM,NFSv4: Wait on local locks before we put RPC calls on the wire Use FL_ACCESS flag to test and/or wait for local locks before we try requesting a lock from the server Signed-off-by: Trond Myklebust --- fs/lockd/clntproc.c | 8 +++++++- fs/nfs/nfs4proc.c | 35 ++++++++++++++++++++++++----------- 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 24c691f5480e..89ba0df14c22 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -496,6 +496,7 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl) struct nlm_host *host = req->a_host; struct nlm_res *resp = &req->a_res; struct nlm_wait *block = NULL; + unsigned char fl_flags = fl->fl_flags; int status = -ENOLCK; if (!host->h_monitored && nsm_monitor(host) < 0) { @@ -503,6 +504,10 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl) host->h_name); goto out; } + fl->fl_flags |= FL_ACCESS; + status = do_vfs_lock(fl); + if (status < 0) + goto out; block = nlmclnt_prepare_block(host, fl); again: @@ -537,8 +542,8 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl) up_read(&host->h_rwsem); goto again; } - fl->fl_flags |= FL_SLEEP; /* Ensure the resulting lock will get added to granted list */ + fl->fl_flags = fl_flags | FL_SLEEP; if (do_vfs_lock(fl) < 0) printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __FUNCTION__); up_read(&host->h_rwsem); @@ -551,6 +556,7 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl) nlmclnt_cancel(host, req->a_args.block, fl); out: nlm_release_call(req); + fl->fl_flags = fl_flags; return status; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 8bdfe3ff7925..e6ee97f19d81 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3489,29 +3489,42 @@ static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) { struct nfs4_client *clp = state->owner->so_client; + unsigned char fl_flags = request->fl_flags; int status; /* Is this a delegated open? */ - if (NFS_I(state->inode)->delegation_state != 0) { - /* Yes: cache locks! */ - status = do_vfs_lock(request->fl_file, request); - /* ...but avoid races with delegation recall... */ - if (status < 0 || test_bit(NFS_DELEGATED_STATE, &state->flags)) - return status; - } - down_read(&clp->cl_sem); status = nfs4_set_lock_state(state, request); if (status != 0) goto out; + request->fl_flags |= FL_ACCESS; + status = do_vfs_lock(request->fl_file, request); + if (status < 0) + goto out; + down_read(&clp->cl_sem); + if (test_bit(NFS_DELEGATED_STATE, &state->flags)) { + struct nfs_inode *nfsi = NFS_I(state->inode); + /* Yes: cache locks! */ + down_read(&nfsi->rwsem); + /* ...but avoid races with delegation recall... */ + if (test_bit(NFS_DELEGATED_STATE, &state->flags)) { + request->fl_flags = fl_flags & ~FL_SLEEP; + status = do_vfs_lock(request->fl_file, request); + up_read(&nfsi->rwsem); + goto out_unlock; + } + up_read(&nfsi->rwsem); + } status = _nfs4_do_setlk(state, cmd, request, 0); if (status != 0) - goto out; + goto out_unlock; /* Note: we always want to sleep here! */ - request->fl_flags |= FL_SLEEP; + request->fl_flags = fl_flags | FL_SLEEP; if (do_vfs_lock(request->fl_file, request) < 0) printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __FUNCTION__); -out: +out_unlock: up_read(&clp->cl_sem); +out: + request->fl_flags = fl_flags; return status; } From 83715ad54fad5a7ed330110f83e31ae92630e9d9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 5 Jul 2006 13:17:12 -0400 Subject: [PATCH 13/40] NFS: Fix NFS page_state usage The introduction of the FLUSH_INVALIDATE argument to nfs_sync_inode_wait() does not clear the nr_unstable page state counter for pages that are being released. Also fix a longstanding similar bug when nfs_commit_list() fails. Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index bca5734ca9fb..86bac6a5008e 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -578,7 +578,7 @@ static int nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, un return ret; } -static void nfs_cancel_requests(struct list_head *head) +static void nfs_cancel_dirty_list(struct list_head *head) { struct nfs_page *req; while(!list_empty(head)) { @@ -589,6 +589,19 @@ static void nfs_cancel_requests(struct list_head *head) } } +static void nfs_cancel_commit_list(struct list_head *head) +{ + struct nfs_page *req; + + while(!list_empty(head)) { + req = nfs_list_entry(head->next); + nfs_list_remove_request(req); + nfs_inode_remove_request(req); + nfs_clear_page_writeback(req); + dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); + } +} + /* * nfs_scan_dirty - Scan an inode for dirty requests * @inode: NFS inode to scan @@ -1381,6 +1394,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how) nfs_list_remove_request(req); nfs_mark_request_commit(req); nfs_clear_page_writeback(req); + dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); } return -ENOMEM; } @@ -1499,7 +1513,7 @@ int nfs_sync_inode_wait(struct inode *inode, unsigned long idx_start, if (pages != 0) { spin_unlock(&nfsi->req_lock); if (how & FLUSH_INVALIDATE) - nfs_cancel_requests(&head); + nfs_cancel_dirty_list(&head); else ret = nfs_flush_list(inode, &head, pages, how); spin_lock(&nfsi->req_lock); @@ -1512,7 +1526,7 @@ int nfs_sync_inode_wait(struct inode *inode, unsigned long idx_start, break; if (how & FLUSH_INVALIDATE) { spin_unlock(&nfsi->req_lock); - nfs_cancel_requests(&head); + nfs_cancel_commit_list(&head); spin_lock(&nfsi->req_lock); continue; } From bce3481c91801665e17f8daf59ede946129f3d3f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 5 Jul 2006 13:17:12 -0400 Subject: [PATCH 14/40] This fixes a panic doing the first READDIR or READDIRPLUS call when: * the client is ia64 or any platform that actually implements flush_dcache_page(), and * the server returns fsinfo.dtpref >= client's PAGE_SIZE, and * the server does *not* return post-op attributes for the directory in the READDIR reply. Problem diagnosed by Greg Banks Signed-off-by: Trond Myklebust --- net/sunrpc/xdr.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 49174f0d0a3e..6ac45103a272 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -191,7 +191,6 @@ _shift_data_right_pages(struct page **pages, size_t pgto_base, do { /* Are any pointers crossing a page boundary? */ if (pgto_base == 0) { - flush_dcache_page(*pgto); pgto_base = PAGE_CACHE_SIZE; pgto--; } @@ -211,11 +210,11 @@ _shift_data_right_pages(struct page **pages, size_t pgto_base, vto = kmap_atomic(*pgto, KM_USER0); vfrom = kmap_atomic(*pgfrom, KM_USER1); memmove(vto + pgto_base, vfrom + pgfrom_base, copy); + flush_dcache_page(*pgto); kunmap_atomic(vfrom, KM_USER1); kunmap_atomic(vto, KM_USER0); } while ((len -= copy) != 0); - flush_dcache_page(*pgto); } /* From 4e0641a7ad98fca5646a6be17605bc80f6c4ebde Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 5 Jul 2006 13:05:13 -0400 Subject: [PATCH 15/40] NFS: Optimise away an excessive GETATTR call when a file is symlinked In the case when compiling via a symlink tree, we want to ensure that the close-to-open GETATTR call is applied only to the final file, and not to the symlink. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 3ddda6f7ecc2..e7ffb4deb3e5 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -690,7 +690,9 @@ int nfs_lookup_verify_inode(struct inode *inode, struct nameidata *nd) goto out_force; /* This is an open(2) */ if (nfs_lookup_check_intent(nd, LOOKUP_OPEN) != 0 && - !(server->flags & NFS_MOUNT_NOCTO)) + !(server->flags & NFS_MOUNT_NOCTO) && + (S_ISREG(inode->i_mode) || + S_ISDIR(inode->i_mode))) goto out_force; } return nfs_revalidate_inode(server, inode); From a93620b860434a19820072e656f4933e101ea6d4 Mon Sep 17 00:00:00 2001 From: Martin Hicks Date: Wed, 28 Jun 2006 12:17:47 -0400 Subject: [PATCH 16/40] [PATCH] sata_vsc: data_xfer should use mmio Hi, sata_vsc is an MMIO device, and should use the correct data_xfer function. This problem was introduced by: commit a6b2c5d4754dc539a560fdf0d3fb78a14174394a Author: Alan Cox Date: Mon May 22 16:59:59 2006 +0100 [PATCH] PATCH: libata. Add ->data_xfer method Signed-off-by: Martin Hicks Signed-off-by: Jeff Garzik --- drivers/scsi/sata_vsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/sata_vsc.c b/drivers/scsi/sata_vsc.c index 916fe6fba756..ad37871594f5 100644 --- a/drivers/scsi/sata_vsc.c +++ b/drivers/scsi/sata_vsc.c @@ -297,7 +297,7 @@ static const struct ata_port_operations vsc_sata_ops = { .bmdma_status = ata_bmdma_status, .qc_prep = ata_qc_prep, .qc_issue = ata_qc_issue_prot, - .data_xfer = ata_pio_data_xfer, + .data_xfer = ata_mmio_data_xfer, .freeze = ata_bmdma_freeze, .thaw = ata_bmdma_thaw, .error_handler = ata_bmdma_error_handler, From e6d902a3bfd53da375588e498251f4f4f6cd9650 Mon Sep 17 00:00:00 2001 From: Brian King Date: Wed, 28 Jun 2006 08:30:31 -0500 Subject: [PATCH 17/40] [PATCH] libata: Conditionally set host->max_cmd_len In preparation for SAS attached SATA devices, which will not have a libata scsi_host, only setup host->max_cmd_len if ap->host exists. Signed-off-by: Brian King Signed-off-by: Jeff Garzik --- drivers/scsi/libata-core.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c index 1c960ac1617f..fd0e6cc0619f 100644 --- a/drivers/scsi/libata-core.c +++ b/drivers/scsi/libata-core.c @@ -1325,6 +1325,19 @@ static void ata_dev_config_ncq(struct ata_device *dev, snprintf(desc, desc_sz, "NCQ (depth %d/%d)", hdepth, ddepth); } +static void ata_set_port_max_cmd_len(struct ata_port *ap) +{ + int i; + + if (ap->host) { + ap->host->max_cmd_len = 0; + for (i = 0; i < ATA_MAX_DEVICES; i++) + ap->host->max_cmd_len = max_t(unsigned int, + ap->host->max_cmd_len, + ap->device[i].cdb_len); + } +} + /** * ata_dev_configure - Configure the specified ATA/ATAPI device * @dev: Target device to configure @@ -1344,7 +1357,7 @@ int ata_dev_configure(struct ata_device *dev, int print_info) struct ata_port *ap = dev->ap; const u16 *id = dev->id; unsigned int xfer_mask; - int i, rc; + int rc; if (!ata_dev_enabled(dev) && ata_msg_info(ap)) { ata_dev_printk(dev, KERN_INFO, @@ -1474,11 +1487,7 @@ int ata_dev_configure(struct ata_device *dev, int print_info) cdb_intr_string); } - ap->host->max_cmd_len = 0; - for (i = 0; i < ATA_MAX_DEVICES; i++) - ap->host->max_cmd_len = max_t(unsigned int, - ap->host->max_cmd_len, - ap->device[i].cdb_len); + ata_set_port_max_cmd_len(ap); /* limit bridge transfers to udma5, 200 sectors */ if (ata_dev_knobble(dev)) { From b51e9e5db0e36239f786692f1cac6e435ed30c66 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 29 Jun 2006 01:29:30 +0900 Subject: [PATCH 18/40] [PATCH] libata: add ap->pflags and move core dynamic flags to it ap->flags is way too clamped. Separate out core dynamic flags to ap->pflags. ATA_FLAG_DISABLED is a dynamic flag but left alone as it's referenced by a lot of LLDs and it's gonna be removed once all LLDs are converted to new EH. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- drivers/scsi/ahci.c | 2 +- drivers/scsi/libata-core.c | 24 +++++++++--------- drivers/scsi/libata-eh.c | 50 +++++++++++++++++++------------------- drivers/scsi/libata-scsi.c | 2 +- drivers/scsi/sata_sil.c | 2 +- include/linux/libata.h | 30 ++++++++++++++--------- 6 files changed, 58 insertions(+), 52 deletions(-) diff --git a/drivers/scsi/ahci.c b/drivers/scsi/ahci.c index 15f6cd4279b7..7447ba0374e8 100644 --- a/drivers/scsi/ahci.c +++ b/drivers/scsi/ahci.c @@ -1052,7 +1052,7 @@ static void ahci_thaw(struct ata_port *ap) static void ahci_error_handler(struct ata_port *ap) { - if (!(ap->flags & ATA_FLAG_FROZEN)) { + if (!(ap->pflags & ATA_PFLAG_FROZEN)) { /* restart engine */ ahci_stop_engine(ap); ahci_start_engine(ap); diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c index fd0e6cc0619f..ccab1d7ceada 100644 --- a/drivers/scsi/libata-core.c +++ b/drivers/scsi/libata-core.c @@ -907,7 +907,7 @@ void ata_port_queue_task(struct ata_port *ap, void (*fn)(void *), void *data, { int rc; - if (ap->flags & ATA_FLAG_FLUSH_PORT_TASK) + if (ap->pflags & ATA_PFLAG_FLUSH_PORT_TASK) return; PREPARE_WORK(&ap->port_task, fn, data); @@ -938,7 +938,7 @@ void ata_port_flush_task(struct ata_port *ap) DPRINTK("ENTER\n"); spin_lock_irqsave(ap->lock, flags); - ap->flags |= ATA_FLAG_FLUSH_PORT_TASK; + ap->pflags |= ATA_PFLAG_FLUSH_PORT_TASK; spin_unlock_irqrestore(ap->lock, flags); DPRINTK("flush #1\n"); @@ -957,7 +957,7 @@ void ata_port_flush_task(struct ata_port *ap) } spin_lock_irqsave(ap->lock, flags); - ap->flags &= ~ATA_FLAG_FLUSH_PORT_TASK; + ap->pflags &= ~ATA_PFLAG_FLUSH_PORT_TASK; spin_unlock_irqrestore(ap->lock, flags); if (ata_msg_ctl(ap)) @@ -1009,7 +1009,7 @@ unsigned ata_exec_internal(struct ata_device *dev, spin_lock_irqsave(ap->lock, flags); /* no internal command while frozen */ - if (ap->flags & ATA_FLAG_FROZEN) { + if (ap->pflags & ATA_PFLAG_FROZEN) { spin_unlock_irqrestore(ap->lock, flags); return AC_ERR_SYSTEM; } @@ -2641,7 +2641,7 @@ int ata_std_prereset(struct ata_port *ap) /* if SATA, resume phy */ if (ap->cbl == ATA_CBL_SATA) { - if (ap->flags & ATA_FLAG_LOADING) + if (ap->pflags & ATA_PFLAG_LOADING) timing = sata_deb_timing_boot; else timing = sata_deb_timing_eh; @@ -4294,7 +4294,7 @@ static struct ata_queued_cmd *ata_qc_new(struct ata_port *ap) unsigned int i; /* no command while frozen */ - if (unlikely(ap->flags & ATA_FLAG_FROZEN)) + if (unlikely(ap->pflags & ATA_PFLAG_FROZEN)) return NULL; /* the last tag is reserved for internal command. */ @@ -4416,7 +4416,7 @@ void ata_qc_complete(struct ata_queued_cmd *qc) * taken care of. */ if (ap->ops->error_handler) { - WARN_ON(ap->flags & ATA_FLAG_FROZEN); + WARN_ON(ap->pflags & ATA_PFLAG_FROZEN); if (unlikely(qc->err_mask)) qc->flags |= ATA_QCFLAG_FAILED; @@ -5051,13 +5051,13 @@ int ata_device_resume(struct ata_device *dev) { struct ata_port *ap = dev->ap; - if (ap->flags & ATA_FLAG_SUSPENDED) { + if (ap->pflags & ATA_PFLAG_SUSPENDED) { struct ata_device *failed_dev; ata_busy_sleep(ap, ATA_TMOUT_BOOT_QUICK, ATA_TMOUT_BOOT); ata_busy_wait(ap, ATA_BUSY | ATA_DRQ, 200000); - ap->flags &= ~ATA_FLAG_SUSPENDED; + ap->pflags &= ~ATA_PFLAG_SUSPENDED; while (ata_set_mode(ap, &failed_dev)) ata_dev_disable(failed_dev); } @@ -5088,7 +5088,7 @@ int ata_device_suspend(struct ata_device *dev, pm_message_t state) if (state.event != PM_EVENT_FREEZE) ata_standby_drive(dev); - ap->flags |= ATA_FLAG_SUSPENDED; + ap->pflags |= ATA_PFLAG_SUSPENDED; return 0; } @@ -5459,7 +5459,7 @@ int ata_device_add(const struct ata_probe_ent *ent) ap->eh_info.probe_mask = (1 << ATA_MAX_DEVICES) - 1; ap->eh_info.action |= ATA_EH_SOFTRESET; - ap->flags |= ATA_FLAG_LOADING; + ap->pflags |= ATA_PFLAG_LOADING; ata_port_schedule_eh(ap); spin_unlock_irqrestore(ap->lock, flags); @@ -5527,7 +5527,7 @@ void ata_port_detach(struct ata_port *ap) /* tell EH we're leaving & flush EH */ spin_lock_irqsave(ap->lock, flags); - ap->flags |= ATA_FLAG_UNLOADING; + ap->pflags |= ATA_PFLAG_UNLOADING; spin_unlock_irqrestore(ap->lock, flags); ata_port_wait_eh(ap); diff --git a/drivers/scsi/libata-eh.c b/drivers/scsi/libata-eh.c index bf5a72aca8a4..f2f29a8bc38e 100644 --- a/drivers/scsi/libata-eh.c +++ b/drivers/scsi/libata-eh.c @@ -270,13 +270,13 @@ void ata_scsi_error(struct Scsi_Host *host) ap->eh_context.i = ap->eh_info; memset(&ap->eh_info, 0, sizeof(ap->eh_info)); - ap->flags |= ATA_FLAG_EH_IN_PROGRESS; - ap->flags &= ~ATA_FLAG_EH_PENDING; + ap->pflags |= ATA_PFLAG_EH_IN_PROGRESS; + ap->pflags &= ~ATA_PFLAG_EH_PENDING; spin_unlock_irqrestore(ap_lock, flags); /* invoke EH. if unloading, just finish failed qcs */ - if (!(ap->flags & ATA_FLAG_UNLOADING)) + if (!(ap->pflags & ATA_PFLAG_UNLOADING)) ap->ops->error_handler(ap); else ata_eh_finish(ap); @@ -287,7 +287,7 @@ void ata_scsi_error(struct Scsi_Host *host) */ spin_lock_irqsave(ap_lock, flags); - if (ap->flags & ATA_FLAG_EH_PENDING) { + if (ap->pflags & ATA_PFLAG_EH_PENDING) { if (--repeat_cnt) { ata_port_printk(ap, KERN_INFO, "EH pending after completion, " @@ -323,19 +323,19 @@ void ata_scsi_error(struct Scsi_Host *host) /* clean up */ spin_lock_irqsave(ap_lock, flags); - if (ap->flags & ATA_FLAG_LOADING) { - ap->flags &= ~ATA_FLAG_LOADING; + if (ap->pflags & ATA_PFLAG_LOADING) { + ap->pflags &= ~ATA_PFLAG_LOADING; } else { - if (ap->flags & ATA_FLAG_SCSI_HOTPLUG) + if (ap->pflags & ATA_PFLAG_SCSI_HOTPLUG) queue_work(ata_aux_wq, &ap->hotplug_task); - if (ap->flags & ATA_FLAG_RECOVERED) + if (ap->pflags & ATA_PFLAG_RECOVERED) ata_port_printk(ap, KERN_INFO, "EH complete\n"); } - ap->flags &= ~(ATA_FLAG_SCSI_HOTPLUG | ATA_FLAG_RECOVERED); + ap->pflags &= ~(ATA_PFLAG_SCSI_HOTPLUG | ATA_PFLAG_RECOVERED); /* tell wait_eh that we're done */ - ap->flags &= ~ATA_FLAG_EH_IN_PROGRESS; + ap->pflags &= ~ATA_PFLAG_EH_IN_PROGRESS; wake_up_all(&ap->eh_wait_q); spin_unlock_irqrestore(ap_lock, flags); @@ -360,7 +360,7 @@ void ata_port_wait_eh(struct ata_port *ap) retry: spin_lock_irqsave(ap->lock, flags); - while (ap->flags & (ATA_FLAG_EH_PENDING | ATA_FLAG_EH_IN_PROGRESS)) { + while (ap->pflags & (ATA_PFLAG_EH_PENDING | ATA_PFLAG_EH_IN_PROGRESS)) { prepare_to_wait(&ap->eh_wait_q, &wait, TASK_UNINTERRUPTIBLE); spin_unlock_irqrestore(ap->lock, flags); schedule(); @@ -489,7 +489,7 @@ void ata_qc_schedule_eh(struct ata_queued_cmd *qc) WARN_ON(!ap->ops->error_handler); qc->flags |= ATA_QCFLAG_FAILED; - qc->ap->flags |= ATA_FLAG_EH_PENDING; + qc->ap->pflags |= ATA_PFLAG_EH_PENDING; /* The following will fail if timeout has already expired. * ata_scsi_error() takes care of such scmds on EH entry. @@ -513,7 +513,7 @@ void ata_port_schedule_eh(struct ata_port *ap) { WARN_ON(!ap->ops->error_handler); - ap->flags |= ATA_FLAG_EH_PENDING; + ap->pflags |= ATA_PFLAG_EH_PENDING; scsi_schedule_eh(ap->host); DPRINTK("port EH scheduled\n"); @@ -578,7 +578,7 @@ static void __ata_port_freeze(struct ata_port *ap) if (ap->ops->freeze) ap->ops->freeze(ap); - ap->flags |= ATA_FLAG_FROZEN; + ap->pflags |= ATA_PFLAG_FROZEN; DPRINTK("ata%u port frozen\n", ap->id); } @@ -646,7 +646,7 @@ void ata_eh_thaw_port(struct ata_port *ap) spin_lock_irqsave(ap->lock, flags); - ap->flags &= ~ATA_FLAG_FROZEN; + ap->pflags &= ~ATA_PFLAG_FROZEN; if (ap->ops->thaw) ap->ops->thaw(ap); @@ -731,7 +731,7 @@ static void ata_eh_detach_dev(struct ata_device *dev) if (ata_scsi_offline_dev(dev)) { dev->flags |= ATA_DFLAG_DETACHED; - ap->flags |= ATA_FLAG_SCSI_HOTPLUG; + ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; } /* clear per-dev EH actions */ @@ -761,7 +761,7 @@ static void ata_eh_about_to_do(struct ata_port *ap, struct ata_device *dev, spin_lock_irqsave(ap->lock, flags); ata_eh_clear_action(dev, &ap->eh_info, action); - ap->flags |= ATA_FLAG_RECOVERED; + ap->pflags |= ATA_PFLAG_RECOVERED; spin_unlock_irqrestore(ap->lock, flags); } @@ -1027,7 +1027,7 @@ static void ata_eh_analyze_ncq_error(struct ata_port *ap) int tag, rc; /* if frozen, we can't do much */ - if (ap->flags & ATA_FLAG_FROZEN) + if (ap->pflags & ATA_PFLAG_FROZEN) return; /* is it NCQ device error? */ @@ -1327,7 +1327,7 @@ static void ata_eh_autopsy(struct ata_port *ap) } /* enforce default EH actions */ - if (ap->flags & ATA_FLAG_FROZEN || + if (ap->pflags & ATA_PFLAG_FROZEN || all_err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT)) action |= ATA_EH_SOFTRESET; else if (all_err_mask) @@ -1385,7 +1385,7 @@ static void ata_eh_report(struct ata_port *ap) return; frozen = ""; - if (ap->flags & ATA_FLAG_FROZEN) + if (ap->pflags & ATA_PFLAG_FROZEN) frozen = " frozen"; if (ehc->i.dev) { @@ -1465,7 +1465,7 @@ static int ata_eh_reset(struct ata_port *ap, int classify, struct ata_eh_context *ehc = &ap->eh_context; unsigned int *classes = ehc->classes; int tries = ATA_EH_RESET_TRIES; - int verbose = !(ap->flags & ATA_FLAG_LOADING); + int verbose = !(ap->pflags & ATA_PFLAG_LOADING); unsigned int action; ata_reset_fn_t reset; int i, did_followup_srst, rc; @@ -1636,7 +1636,7 @@ static int ata_eh_revalidate_and_attach(struct ata_port *ap, } spin_lock_irqsave(ap->lock, flags); - ap->flags |= ATA_FLAG_SCSI_HOTPLUG; + ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; spin_unlock_irqrestore(ap->lock, flags); } } @@ -1673,7 +1673,7 @@ static int ata_eh_skip_recovery(struct ata_port *ap) struct ata_eh_context *ehc = &ap->eh_context; int i; - if (ap->flags & ATA_FLAG_FROZEN || ata_port_nr_enabled(ap)) + if (ap->pflags & ATA_PFLAG_FROZEN || ata_port_nr_enabled(ap)) return 0; /* skip if class codes for all vacant slots are ATA_DEV_NONE */ @@ -1744,7 +1744,7 @@ static int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, rc = 0; /* if UNLOADING, finish immediately */ - if (ap->flags & ATA_FLAG_UNLOADING) + if (ap->pflags & ATA_PFLAG_UNLOADING) goto out; /* skip EH if possible. */ @@ -1908,7 +1908,7 @@ void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset, ata_reset_fn_t softreset, ata_reset_fn_t hardreset, ata_postreset_fn_t postreset) { - if (!(ap->flags & ATA_FLAG_LOADING)) { + if (!(ap->pflags & ATA_PFLAG_LOADING)) { ata_eh_autopsy(ap); ata_eh_report(ap); } diff --git a/drivers/scsi/libata-scsi.c b/drivers/scsi/libata-scsi.c index 2915bca691e8..153452e77264 100644 --- a/drivers/scsi/libata-scsi.c +++ b/drivers/scsi/libata-scsi.c @@ -2930,7 +2930,7 @@ void ata_scsi_hotplug(void *data) struct ata_port *ap = data; int i; - if (ap->flags & ATA_FLAG_UNLOADING) { + if (ap->pflags & ATA_PFLAG_UNLOADING) { DPRINTK("ENTER/EXIT - unloading\n"); return; } diff --git a/drivers/scsi/sata_sil.c b/drivers/scsi/sata_sil.c index 7aabb45c35e5..70dbfb8a7d51 100644 --- a/drivers/scsi/sata_sil.c +++ b/drivers/scsi/sata_sil.c @@ -370,7 +370,7 @@ static void sil_host_intr(struct ata_port *ap, u32 bmdma2) * during hardreset makes controllers with broken SIEN * repeat probing needlessly. */ - if (!(ap->flags & ATA_FLAG_FROZEN)) { + if (!(ap->pflags & ATA_PFLAG_FROZEN)) { ata_ehi_hotplugged(&ap->eh_info); ap->eh_info.serror |= serror; } diff --git a/include/linux/libata.h b/include/linux/libata.h index f4284bf89758..b5d247d780f8 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -160,22 +160,27 @@ enum { ATA_FLAG_HRST_TO_RESUME = (1 << 11), /* hardreset to resume phy */ ATA_FLAG_SKIP_D2H_BSY = (1 << 12), /* can't wait for the first D2H * Register FIS clearing BSY */ - ATA_FLAG_DEBUGMSG = (1 << 13), - ATA_FLAG_FLUSH_PORT_TASK = (1 << 14), /* flush port task */ - ATA_FLAG_EH_PENDING = (1 << 15), /* EH pending */ - ATA_FLAG_EH_IN_PROGRESS = (1 << 16), /* EH in progress */ - ATA_FLAG_FROZEN = (1 << 17), /* port is frozen */ - ATA_FLAG_RECOVERED = (1 << 18), /* recovery action performed */ - ATA_FLAG_LOADING = (1 << 19), /* boot/loading probe */ - ATA_FLAG_UNLOADING = (1 << 20), /* module is unloading */ - ATA_FLAG_SCSI_HOTPLUG = (1 << 21), /* SCSI hotplug scheduled */ + /* The following flag belongs to ap->pflags but is kept in + * ap->flags because it's referenced in many LLDs and will be + * removed in not-too-distant future. + */ + ATA_FLAG_DISABLED = (1 << 23), /* port is disabled, ignore it */ + + /* bits 24:31 of ap->flags are reserved for LLD specific flags */ - ATA_FLAG_DISABLED = (1 << 22), /* port is disabled, ignore it */ - ATA_FLAG_SUSPENDED = (1 << 23), /* port is suspended (power) */ + /* struct ata_port pflags */ + ATA_PFLAG_EH_PENDING = (1 << 0), /* EH pending */ + ATA_PFLAG_EH_IN_PROGRESS = (1 << 1), /* EH in progress */ + ATA_PFLAG_FROZEN = (1 << 2), /* port is frozen */ + ATA_PFLAG_RECOVERED = (1 << 3), /* recovery action performed */ + ATA_PFLAG_LOADING = (1 << 4), /* boot/loading probe */ + ATA_PFLAG_UNLOADING = (1 << 5), /* module is unloading */ + ATA_PFLAG_SCSI_HOTPLUG = (1 << 6), /* SCSI hotplug scheduled */ - /* bits 24:31 of ap->flags are reserved for LLDD specific flags */ + ATA_PFLAG_FLUSH_PORT_TASK = (1 << 16), /* flush port task */ + ATA_PFLAG_SUSPENDED = (1 << 17), /* port is suspended (power) */ /* struct ata_queued_cmd flags */ ATA_QCFLAG_ACTIVE = (1 << 0), /* cmd not yet ack'd to scsi lyer */ @@ -486,6 +491,7 @@ struct ata_port { const struct ata_port_operations *ops; spinlock_t *lock; unsigned long flags; /* ATA_FLAG_xxx */ + unsigned int pflags; /* ATA_PFLAG_xxx */ unsigned int id; /* unique id req'd by scsi midlyr */ unsigned int port_no; /* unique port #; from zero */ unsigned int hard_port_no; /* hardware port #; from zero */ From 0662c58b3265f52f708a6d59476bc7862b01f9c0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Jul 2006 02:54:58 +0900 Subject: [PATCH 19/40] [PATCH] libata: fix ehc->i.action setting in ata_eh_autopsy() ata_eh_autopsy() used to directly assign determined action mask to ehc->i.action thus overriding actions set by some of nested analyze functions. This patch makes ata_eh_autopsy() add action masks just as it's done in other places. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- drivers/scsi/libata-eh.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/libata-eh.c b/drivers/scsi/libata-eh.c index f2f29a8bc38e..4a670db9aa05 100644 --- a/drivers/scsi/libata-eh.c +++ b/drivers/scsi/libata-eh.c @@ -1346,7 +1346,7 @@ static void ata_eh_autopsy(struct ata_port *ap) /* record autopsy result */ ehc->i.dev = failed_dev; - ehc->i.action = action; + ehc->i.action |= action; DPRINTK("EXIT\n"); } From e30349d27e093f32ef517b5416d9dce1998d4676 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Jul 2006 03:02:15 +0900 Subject: [PATCH 20/40] [PATCH] libata: replace ap_lock w/ ap->lock in ata_scsi_error() ap_lock was used because &ap->host_set->lock was too long and used a lot. Now that &ap->host_set->lock is replaced with ap->lock, there's no reason to keep ap_lock. [ed. note: that's not entirely true. ap_lock is a local variable, caching the results of a de-ref. In theory, if the compiler is smart enough, this patch is cosmetic. However, since this is not a fast path (it is the error path), this patch is nonetheless acceptable, even though it _may_ introduce a performance regression.] Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- drivers/scsi/libata-eh.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/drivers/scsi/libata-eh.c b/drivers/scsi/libata-eh.c index 4a670db9aa05..d19666c376ad 100644 --- a/drivers/scsi/libata-eh.c +++ b/drivers/scsi/libata-eh.c @@ -190,7 +190,6 @@ enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd) void ata_scsi_error(struct Scsi_Host *host) { struct ata_port *ap = ata_shost_to_port(host); - spinlock_t *ap_lock = ap->lock; int i, repeat_cnt = ATA_EH_MAX_REPEAT; unsigned long flags; @@ -217,7 +216,7 @@ void ata_scsi_error(struct Scsi_Host *host) struct scsi_cmnd *scmd, *tmp; int nr_timedout = 0; - spin_lock_irqsave(ap_lock, flags); + spin_lock_irqsave(ap->lock, flags); list_for_each_entry_safe(scmd, tmp, &host->eh_cmd_q, eh_entry) { struct ata_queued_cmd *qc; @@ -256,15 +255,15 @@ void ata_scsi_error(struct Scsi_Host *host) if (nr_timedout) __ata_port_freeze(ap); - spin_unlock_irqrestore(ap_lock, flags); + spin_unlock_irqrestore(ap->lock, flags); } else - spin_unlock_wait(ap_lock); + spin_unlock_wait(ap->lock); repeat: /* invoke error handler */ if (ap->ops->error_handler) { /* fetch & clear EH info */ - spin_lock_irqsave(ap_lock, flags); + spin_lock_irqsave(ap->lock, flags); memset(&ap->eh_context, 0, sizeof(ap->eh_context)); ap->eh_context.i = ap->eh_info; @@ -273,7 +272,7 @@ void ata_scsi_error(struct Scsi_Host *host) ap->pflags |= ATA_PFLAG_EH_IN_PROGRESS; ap->pflags &= ~ATA_PFLAG_EH_PENDING; - spin_unlock_irqrestore(ap_lock, flags); + spin_unlock_irqrestore(ap->lock, flags); /* invoke EH. if unloading, just finish failed qcs */ if (!(ap->pflags & ATA_PFLAG_UNLOADING)) @@ -285,14 +284,14 @@ void ata_scsi_error(struct Scsi_Host *host) * recovered the port but before this point. Repeat * EH in such case. */ - spin_lock_irqsave(ap_lock, flags); + spin_lock_irqsave(ap->lock, flags); if (ap->pflags & ATA_PFLAG_EH_PENDING) { if (--repeat_cnt) { ata_port_printk(ap, KERN_INFO, "EH pending after completion, " "repeating EH (cnt=%d)\n", repeat_cnt); - spin_unlock_irqrestore(ap_lock, flags); + spin_unlock_irqrestore(ap->lock, flags); goto repeat; } ata_port_printk(ap, KERN_ERR, "EH pending after %d " @@ -302,14 +301,14 @@ void ata_scsi_error(struct Scsi_Host *host) /* this run is complete, make sure EH info is clear */ memset(&ap->eh_info, 0, sizeof(ap->eh_info)); - /* Clear host_eh_scheduled while holding ap_lock such + /* Clear host_eh_scheduled while holding ap->lock such * that if exception occurs after this point but * before EH completion, SCSI midlayer will * re-initiate EH. */ host->host_eh_scheduled = 0; - spin_unlock_irqrestore(ap_lock, flags); + spin_unlock_irqrestore(ap->lock, flags); } else { WARN_ON(ata_qc_from_tag(ap, ap->active_tag) == NULL); ap->ops->eng_timeout(ap); @@ -321,7 +320,7 @@ void ata_scsi_error(struct Scsi_Host *host) scsi_eh_flush_done_q(&ap->eh_done_q); /* clean up */ - spin_lock_irqsave(ap_lock, flags); + spin_lock_irqsave(ap->lock, flags); if (ap->pflags & ATA_PFLAG_LOADING) { ap->pflags &= ~ATA_PFLAG_LOADING; @@ -338,7 +337,7 @@ void ata_scsi_error(struct Scsi_Host *host) ap->pflags &= ~ATA_PFLAG_EH_IN_PROGRESS; wake_up_all(&ap->eh_wait_q); - spin_unlock_irqrestore(ap_lock, flags); + spin_unlock_irqrestore(ap->lock, flags); DPRINTK("EXIT\n"); } From 28324304350e23db24d679c55de3f06a5b1e40aa Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Jul 2006 16:07:26 +0900 Subject: [PATCH 21/40] [PATCH] libata: implement ATA_EHI_RESUME_LINK Implement ATA_EHI_RESUME_LINK, which indicates that the link needs to be resumed. This used to be implied by ATA_EHI_HOTPLUGGED. However, hotplug isn't the only event which requires link resume and separating this out allows other places to request link resume. This differentiation also allows better debounce timing selection. This patch converts user scan to use ATA_EHI_RESUME_LINK. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- drivers/scsi/libata-core.c | 15 ++++++++------- drivers/scsi/libata-scsi.c | 1 + include/linux/libata.h | 3 ++- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c index ccab1d7ceada..90db054fa994 100644 --- a/drivers/scsi/libata-core.c +++ b/drivers/scsi/libata-core.c @@ -2627,13 +2627,14 @@ int ata_std_prereset(struct ata_port *ap) const unsigned long *timing; int rc; - /* hotplug? */ - if (ehc->i.flags & ATA_EHI_HOTPLUGGED) { - if (ap->flags & ATA_FLAG_HRST_TO_RESUME) - ehc->i.action |= ATA_EH_HARDRESET; - if (ap->flags & ATA_FLAG_SKIP_D2H_BSY) - ata_wait_spinup(ap); - } + /* handle link resume & hotplug spinup */ + if ((ehc->i.flags & ATA_EHI_RESUME_LINK) && + (ap->flags & ATA_FLAG_HRST_TO_RESUME)) + ehc->i.action |= ATA_EH_HARDRESET; + + if ((ehc->i.flags & ATA_EHI_HOTPLUGGED) && + (ap->flags & ATA_FLAG_SKIP_D2H_BSY)) + ata_wait_spinup(ap); /* if we're about to do hardreset, nothing more to do */ if (ehc->i.action & ATA_EH_HARDRESET) diff --git a/drivers/scsi/libata-scsi.c b/drivers/scsi/libata-scsi.c index 153452e77264..ba3e515f2b9d 100644 --- a/drivers/scsi/libata-scsi.c +++ b/drivers/scsi/libata-scsi.c @@ -3011,6 +3011,7 @@ static int ata_scsi_user_scan(struct Scsi_Host *shost, unsigned int channel, if (dev) { ap->eh_info.probe_mask |= 1 << dev->devno; ap->eh_info.action |= ATA_EH_SOFTRESET; + ap->eh_info.flags |= ATA_EHI_RESUME_LINK; } else rc = -EINVAL; } diff --git a/include/linux/libata.h b/include/linux/libata.h index b5d247d780f8..4d4ed2c8fec7 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -259,6 +259,7 @@ enum { /* ata_eh_info->flags */ ATA_EHI_HOTPLUGGED = (1 << 0), /* could have been hotplugged */ + ATA_EHI_RESUME_LINK = (1 << 1), /* need to resume link */ ATA_EHI_DID_RESET = (1 << 16), /* already reset this port */ @@ -836,7 +837,7 @@ static inline void ata_ehi_hotplugged(struct ata_eh_info *ehi) if (ehi->flags & ATA_EHI_HOTPLUGGED) return; - ehi->flags |= ATA_EHI_HOTPLUGGED; + ehi->flags |= ATA_EHI_HOTPLUGGED | ATA_EHI_RESUME_LINK; ehi->hotplug_timestamp = jiffies; ehi->err_mask |= AC_ERR_ATA_BUS; From e9c839142d698086d3fe33a0daafde55ddd00c4e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Jul 2006 16:07:26 +0900 Subject: [PATCH 22/40] [PATCH] libata: clean up debounce parameters and improve parameter selection The names of predefined debounce timing parameters didn't exactly match their usages. Rename to more generic names and implement param selection helper sata_ehc_deb_timing() which uses EHI_HOTPLUGGED to select params. Combined with the previous EHI_RESUME_LINK differentiation, this makes parameter selection accurate. e.g. user scan resumes link but normal deb param is used instead of hotplug param. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- drivers/scsi/libata-core.c | 25 +++++++++++-------------- drivers/scsi/sata_sil24.c | 2 +- include/linux/libata.h | 15 ++++++++++++--- 3 files changed, 24 insertions(+), 18 deletions(-) diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c index 90db054fa994..73174452d1c1 100644 --- a/drivers/scsi/libata-core.c +++ b/drivers/scsi/libata-core.c @@ -61,9 +61,9 @@ #include "libata.h" /* debounce timing parameters in msecs { interval, duration, timeout } */ -const unsigned long sata_deb_timing_boot[] = { 5, 100, 2000 }; -const unsigned long sata_deb_timing_eh[] = { 25, 500, 2000 }; -const unsigned long sata_deb_timing_before_fsrst[] = { 100, 2000, 5000 }; +const unsigned long sata_deb_timing_normal[] = { 5, 100, 2000 }; +const unsigned long sata_deb_timing_hotplug[] = { 25, 500, 2000 }; +const unsigned long sata_deb_timing_long[] = { 100, 2000, 5000 }; static unsigned int ata_dev_init_params(struct ata_device *dev, u16 heads, u16 sectors); @@ -2588,7 +2588,7 @@ static void ata_wait_spinup(struct ata_port *ap) /* first, debounce phy if SATA */ if (ap->cbl == ATA_CBL_SATA) { - rc = sata_phy_debounce(ap, sata_deb_timing_eh); + rc = sata_phy_debounce(ap, sata_deb_timing_hotplug); /* if debounced successfully and offline, no need to wait */ if ((rc == 0 || rc == -EOPNOTSUPP) && ata_port_offline(ap)) @@ -2624,7 +2624,7 @@ static void ata_wait_spinup(struct ata_port *ap) int ata_std_prereset(struct ata_port *ap) { struct ata_eh_context *ehc = &ap->eh_context; - const unsigned long *timing; + const unsigned long *timing = sata_ehc_deb_timing(ehc); int rc; /* handle link resume & hotplug spinup */ @@ -2642,11 +2642,6 @@ int ata_std_prereset(struct ata_port *ap) /* if SATA, resume phy */ if (ap->cbl == ATA_CBL_SATA) { - if (ap->pflags & ATA_PFLAG_LOADING) - timing = sata_deb_timing_boot; - else - timing = sata_deb_timing_eh; - rc = sata_phy_resume(ap, timing); if (rc && rc != -EOPNOTSUPP) { /* phy resume failed */ @@ -2734,6 +2729,8 @@ int ata_std_softreset(struct ata_port *ap, unsigned int *classes) */ int sata_std_hardreset(struct ata_port *ap, unsigned int *class) { + struct ata_eh_context *ehc = &ap->eh_context; + const unsigned long *timing = sata_ehc_deb_timing(ehc); u32 scontrol; int rc; @@ -2771,7 +2768,7 @@ int sata_std_hardreset(struct ata_port *ap, unsigned int *class) msleep(1); /* bring phy back */ - sata_phy_resume(ap, sata_deb_timing_eh); + sata_phy_resume(ap, timing); /* TODO: phy layer with polling, timeouts, etc. */ if (ata_port_offline(ap)) { @@ -5852,9 +5849,9 @@ u32 ata_wait_register(void __iomem *reg, u32 mask, u32 val, * Do not depend on ABI/API stability. */ -EXPORT_SYMBOL_GPL(sata_deb_timing_boot); -EXPORT_SYMBOL_GPL(sata_deb_timing_eh); -EXPORT_SYMBOL_GPL(sata_deb_timing_before_fsrst); +EXPORT_SYMBOL_GPL(sata_deb_timing_normal); +EXPORT_SYMBOL_GPL(sata_deb_timing_hotplug); +EXPORT_SYMBOL_GPL(sata_deb_timing_long); EXPORT_SYMBOL_GPL(ata_std_bios_param); EXPORT_SYMBOL_GPL(ata_std_ports); EXPORT_SYMBOL_GPL(ata_device_add); diff --git a/drivers/scsi/sata_sil24.c b/drivers/scsi/sata_sil24.c index 07a1c6a8a414..04ae1ef25484 100644 --- a/drivers/scsi/sata_sil24.c +++ b/drivers/scsi/sata_sil24.c @@ -607,7 +607,7 @@ static int sil24_hardreset(struct ata_port *ap, unsigned int *class) /* SStatus oscillates between zero and valid status after * DEV_RST, debounce it. */ - rc = sata_phy_debounce(ap, sata_deb_timing_before_fsrst); + rc = sata_phy_debounce(ap, sata_deb_timing_long); if (rc) { reason = "PHY debouncing failed"; goto err; diff --git a/include/linux/libata.h b/include/linux/libata.h index 4d4ed2c8fec7..2f7bbfc0c41b 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -629,9 +629,18 @@ struct ata_timing { #define FIT(v,vmin,vmax) max_t(short,min_t(short,v,vmax),vmin) -extern const unsigned long sata_deb_timing_boot[]; -extern const unsigned long sata_deb_timing_eh[]; -extern const unsigned long sata_deb_timing_before_fsrst[]; +extern const unsigned long sata_deb_timing_normal[]; +extern const unsigned long sata_deb_timing_hotplug[]; +extern const unsigned long sata_deb_timing_long[]; + +static inline const unsigned long * +sata_ehc_deb_timing(struct ata_eh_context *ehc) +{ + if (ehc->i.flags & ATA_EHI_HOTPLUGGED) + return sata_deb_timing_hotplug; + else + return sata_deb_timing_normal; +} extern void ata_port_probe(struct ata_port *); extern void __sata_phy_reset(struct ata_port *ap); From 1cdaf534f829b8759ba30f97d5e8dceb2ab77ba4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Jul 2006 16:07:26 +0900 Subject: [PATCH 23/40] [PATCH] libata: implement ATA_EHI_NO_AUTOPSY and QUIET Implement ATA_EHI_NO_AUTOPSY and QUIET. These used to be implied by ATA_PFLAG_LOADING, but new power management and PMP support need to use these separately. e.g. Suspend/resume operations shouldn't print full EH messages and resume shouldn't be recorded as an error. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- drivers/scsi/libata-core.c | 6 ++++-- drivers/scsi/libata-eh.c | 31 +++++++++++++++++-------------- include/linux/libata.h | 2 ++ 3 files changed, 23 insertions(+), 16 deletions(-) diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c index 73174452d1c1..f368536f8e91 100644 --- a/drivers/scsi/libata-core.c +++ b/drivers/scsi/libata-core.c @@ -5447,6 +5447,7 @@ int ata_device_add(const struct ata_probe_ent *ent) } if (ap->ops->error_handler) { + struct ata_eh_info *ehi = &ap->eh_info; unsigned long flags; ata_port_probe(ap); @@ -5454,8 +5455,9 @@ int ata_device_add(const struct ata_probe_ent *ent) /* kick EH for boot probing */ spin_lock_irqsave(ap->lock, flags); - ap->eh_info.probe_mask = (1 << ATA_MAX_DEVICES) - 1; - ap->eh_info.action |= ATA_EH_SOFTRESET; + ehi->probe_mask = (1 << ATA_MAX_DEVICES) - 1; + ehi->action |= ATA_EH_SOFTRESET; + ehi->flags |= ATA_EHI_NO_AUTOPSY | ATA_EHI_QUIET; ap->pflags |= ATA_PFLAG_LOADING; ata_port_schedule_eh(ap); diff --git a/drivers/scsi/libata-eh.c b/drivers/scsi/libata-eh.c index d19666c376ad..1e9e73d13485 100644 --- a/drivers/scsi/libata-eh.c +++ b/drivers/scsi/libata-eh.c @@ -322,14 +322,13 @@ void ata_scsi_error(struct Scsi_Host *host) /* clean up */ spin_lock_irqsave(ap->lock, flags); - if (ap->pflags & ATA_PFLAG_LOADING) { + if (ap->pflags & ATA_PFLAG_LOADING) ap->pflags &= ~ATA_PFLAG_LOADING; - } else { - if (ap->pflags & ATA_PFLAG_SCSI_HOTPLUG) - queue_work(ata_aux_wq, &ap->hotplug_task); - if (ap->pflags & ATA_PFLAG_RECOVERED) - ata_port_printk(ap, KERN_INFO, "EH complete\n"); - } + else if (ap->pflags & ATA_PFLAG_SCSI_HOTPLUG) + queue_work(ata_aux_wq, &ap->hotplug_task); + + if (ap->pflags & ATA_PFLAG_RECOVERED) + ata_port_printk(ap, KERN_INFO, "EH complete\n"); ap->pflags &= ~(ATA_PFLAG_SCSI_HOTPLUG | ATA_PFLAG_RECOVERED); @@ -759,8 +758,12 @@ static void ata_eh_about_to_do(struct ata_port *ap, struct ata_device *dev, unsigned long flags; spin_lock_irqsave(ap->lock, flags); + ata_eh_clear_action(dev, &ap->eh_info, action); - ap->pflags |= ATA_PFLAG_RECOVERED; + + if (!(ap->eh_context.i.flags & ATA_EHI_QUIET)) + ap->pflags |= ATA_PFLAG_RECOVERED; + spin_unlock_irqrestore(ap->lock, flags); } @@ -1274,6 +1277,9 @@ static void ata_eh_autopsy(struct ata_port *ap) DPRINTK("ENTER\n"); + if (ehc->i.flags & ATA_EHI_NO_AUTOPSY) + return; + /* obtain and analyze SError */ rc = sata_scr_read(ap, SCR_ERROR, &serror); if (rc == 0) { @@ -1464,7 +1470,7 @@ static int ata_eh_reset(struct ata_port *ap, int classify, struct ata_eh_context *ehc = &ap->eh_context; unsigned int *classes = ehc->classes; int tries = ATA_EH_RESET_TRIES; - int verbose = !(ap->pflags & ATA_PFLAG_LOADING); + int verbose = !(ehc->i.flags & ATA_EHI_QUIET); unsigned int action; ata_reset_fn_t reset; int i, did_followup_srst, rc; @@ -1907,11 +1913,8 @@ void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset, ata_reset_fn_t softreset, ata_reset_fn_t hardreset, ata_postreset_fn_t postreset) { - if (!(ap->pflags & ATA_PFLAG_LOADING)) { - ata_eh_autopsy(ap); - ata_eh_report(ap); - } - + ata_eh_autopsy(ap); + ata_eh_report(ap); ata_eh_recover(ap, prereset, softreset, hardreset, postreset); ata_eh_finish(ap); } diff --git a/include/linux/libata.h b/include/linux/libata.h index 2f7bbfc0c41b..36938ae1254d 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -260,6 +260,8 @@ enum { /* ata_eh_info->flags */ ATA_EHI_HOTPLUGGED = (1 << 0), /* could have been hotplugged */ ATA_EHI_RESUME_LINK = (1 << 1), /* need to resume link */ + ATA_EHI_NO_AUTOPSY = (1 << 2), /* no autopsy */ + ATA_EHI_QUIET = (1 << 3), /* be quiet */ ATA_EHI_DID_RESET = (1 << 16), /* already reset this port */ From c0b6c0377c32fe3f6a2cf1e018db6da8a3b78379 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Jul 2006 16:07:26 +0900 Subject: [PATCH 24/40] [PATCH] libata: separate out __ata_ehi_hotplugged() Separate out __ata_ehi_hotplugged() from ata_ehi_hotplugged(). The underscored version doesn't set AC_ERR_ATA_BUS. This will be used for resume which is a hotplug event but not an ATA bus error. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- include/linux/libata.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/include/linux/libata.h b/include/linux/libata.h index 36938ae1254d..2aa1398bbd52 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -843,7 +843,7 @@ extern void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset, (ehi)->desc_len = 0; \ } while (0) -static inline void ata_ehi_hotplugged(struct ata_eh_info *ehi) +static inline void __ata_ehi_hotplugged(struct ata_eh_info *ehi) { if (ehi->flags & ATA_EHI_HOTPLUGGED) return; @@ -851,11 +851,16 @@ static inline void ata_ehi_hotplugged(struct ata_eh_info *ehi) ehi->flags |= ATA_EHI_HOTPLUGGED | ATA_EHI_RESUME_LINK; ehi->hotplug_timestamp = jiffies; - ehi->err_mask |= AC_ERR_ATA_BUS; ehi->action |= ATA_EH_SOFTRESET; ehi->probe_mask |= (1 << ATA_MAX_DEVICES) - 1; } +static inline void ata_ehi_hotplugged(struct ata_eh_info *ehi) +{ + __ata_ehi_hotplugged(ehi); + ehi->err_mask |= AC_ERR_ATA_BUS; +} + /* * qc helpers */ From 02670bf379267f55a43aa57f6895689697e90eb3 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Jul 2006 16:07:26 +0900 Subject: [PATCH 25/40] [PATCH] libata: implement PM EH actions Implement two PM per-dev EH actions - ATA_EH_SUSPEND and ATA_EH_RESUME. Each action puts the target device into suspended mode and resumes from it respectively. Once a device is put to suspended mode, no EH operations other than RESUME is allowed on the device. The device will stay suspended till it gets resumed and thus reset and revalidated. To implement this, a new device state helper - ata_dev_ready() - is implemented and used in EH action implementations to make them operate only on attached & running devices. If all possible devices on a port are suspended, reset is skipped too. This prevents spurious events including hotplug events from disrupting suspended devices. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- drivers/scsi/libata-core.c | 5 +- drivers/scsi/libata-eh.c | 185 ++++++++++++++++++++++++++++++++++++- include/linux/libata.h | 12 ++- 3 files changed, 198 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c index f368536f8e91..ad5cac79627c 100644 --- a/drivers/scsi/libata-core.c +++ b/drivers/scsi/libata-core.c @@ -2146,7 +2146,7 @@ int ata_set_mode(struct ata_port *ap, struct ata_device **r_failed_dev) * return error code and failing device on failure. */ for (i = 0; i < ATA_MAX_DEVICES; i++) { - if (ata_dev_enabled(&ap->device[i])) { + if (ata_dev_ready(&ap->device[i])) { ap->ops->set_mode(ap); break; } @@ -2212,7 +2212,8 @@ int ata_set_mode(struct ata_port *ap, struct ata_device **r_failed_dev) for (i = 0; i < ATA_MAX_DEVICES; i++) { dev = &ap->device[i]; - if (!ata_dev_enabled(dev)) + /* don't udpate suspended devices' xfer mode */ + if (!ata_dev_ready(dev)) continue; rc = ata_dev_set_mode(dev); diff --git a/drivers/scsi/libata-eh.c b/drivers/scsi/libata-eh.c index 1e9e73d13485..b9df49a36214 100644 --- a/drivers/scsi/libata-eh.c +++ b/drivers/scsi/libata-eh.c @@ -1610,7 +1610,7 @@ static int ata_eh_revalidate_and_attach(struct ata_port *ap, dev = &ap->device[i]; action = ata_eh_dev_action(dev); - if (action & ATA_EH_REVALIDATE && ata_dev_enabled(dev)) { + if (action & ATA_EH_REVALIDATE && ata_dev_ready(dev)) { if (ata_port_offline(ap)) { rc = -EIO; break; @@ -1653,6 +1653,164 @@ static int ata_eh_revalidate_and_attach(struct ata_port *ap, return rc; } +/** + * ata_eh_suspend - handle suspend EH action + * @ap: target host port + * @r_failed_dev: result parameter to indicate failing device + * + * Handle suspend EH action. Disk devices are spinned down and + * other types of devices are just marked suspended. Once + * suspended, no EH action to the device is allowed until it is + * resumed. + * + * LOCKING: + * Kernel thread context (may sleep). + * + * RETURNS: + * 0 on success, -errno otherwise + */ +static int ata_eh_suspend(struct ata_port *ap, struct ata_device **r_failed_dev) +{ + struct ata_device *dev; + int i, rc = 0; + + DPRINTK("ENTER\n"); + + for (i = 0; i < ATA_MAX_DEVICES; i++) { + unsigned long flags; + unsigned int action, err_mask; + + dev = &ap->device[i]; + action = ata_eh_dev_action(dev); + + if (!ata_dev_enabled(dev) || !(action & ATA_EH_SUSPEND)) + continue; + + WARN_ON(dev->flags & ATA_DFLAG_SUSPENDED); + + ata_eh_about_to_do(ap, dev, ATA_EH_SUSPEND); + + if (dev->class == ATA_DEV_ATA && !(action & ATA_EH_PM_FREEZE)) { + /* flush cache */ + rc = ata_flush_cache(dev); + if (rc) + break; + + /* spin down */ + err_mask = ata_do_simple_cmd(dev, ATA_CMD_STANDBYNOW1); + if (err_mask) { + ata_dev_printk(dev, KERN_ERR, "failed to " + "spin down (err_mask=0x%x)\n", + err_mask); + rc = -EIO; + break; + } + } + + spin_lock_irqsave(ap->lock, flags); + dev->flags |= ATA_DFLAG_SUSPENDED; + spin_unlock_irqrestore(ap->lock, flags); + + ata_eh_done(ap, dev, ATA_EH_SUSPEND); + } + + if (rc) + *r_failed_dev = dev; + + DPRINTK("EXIT\n"); + return 0; +} + +/** + * ata_eh_prep_resume - prep for resume EH action + * @ap: target host port + * + * Clear SUSPENDED in preparation for scheduled resume actions. + * This allows other parts of EH to access the devices being + * resumed. + * + * LOCKING: + * Kernel thread context (may sleep). + */ +static void ata_eh_prep_resume(struct ata_port *ap) +{ + struct ata_device *dev; + unsigned long flags; + int i; + + DPRINTK("ENTER\n"); + + for (i = 0; i < ATA_MAX_DEVICES; i++) { + unsigned int action; + + dev = &ap->device[i]; + action = ata_eh_dev_action(dev); + + if (!ata_dev_enabled(dev) || !(action & ATA_EH_RESUME)) + continue; + + spin_lock_irqsave(ap->lock, flags); + dev->flags &= ~ATA_DFLAG_SUSPENDED; + spin_unlock_irqrestore(ap->lock, flags); + } + + DPRINTK("EXIT\n"); +} + +/** + * ata_eh_resume - handle resume EH action + * @ap: target host port + * @r_failed_dev: result parameter to indicate failing device + * + * Handle resume EH action. Target devices are already reset and + * revalidated. Spinning up is the only operation left. + * + * LOCKING: + * Kernel thread context (may sleep). + * + * RETURNS: + * 0 on success, -errno otherwise + */ +static int ata_eh_resume(struct ata_port *ap, struct ata_device **r_failed_dev) +{ + struct ata_device *dev; + int i, rc = 0; + + DPRINTK("ENTER\n"); + + for (i = 0; i < ATA_MAX_DEVICES; i++) { + unsigned int action, err_mask; + + dev = &ap->device[i]; + action = ata_eh_dev_action(dev); + + if (!ata_dev_enabled(dev) || !(action & ATA_EH_RESUME)) + continue; + + ata_eh_about_to_do(ap, dev, ATA_EH_RESUME); + + if (dev->class == ATA_DEV_ATA && !(action & ATA_EH_PM_FREEZE)) { + err_mask = ata_do_simple_cmd(dev, + ATA_CMD_IDLEIMMEDIATE); + if (err_mask) { + ata_dev_printk(dev, KERN_ERR, "failed to " + "spin up (err_mask=0x%x)\n", + err_mask); + rc = -EIO; + break; + } + } + + ata_eh_done(ap, dev, ATA_EH_RESUME); + } + + if (rc) + *r_failed_dev = dev; + + DPRINTK("EXIT\n"); + return 0; +} + static int ata_port_nr_enabled(struct ata_port *ap) { int i, cnt = 0; @@ -1678,6 +1836,18 @@ static int ata_eh_skip_recovery(struct ata_port *ap) struct ata_eh_context *ehc = &ap->eh_context; int i; + /* skip if all possible devices are suspended */ + for (i = 0; i < ata_port_max_devices(ap); i++) { + struct ata_device *dev = &ap->device[i]; + + if (ata_dev_absent(dev) || ata_dev_ready(dev)) + break; + } + + if (i == ata_port_max_devices(ap)) + return 1; + + /* always thaw frozen port and recover failed devices */ if (ap->pflags & ATA_PFLAG_FROZEN || ata_port_nr_enabled(ap)) return 0; @@ -1752,6 +1922,9 @@ static int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, if (ap->pflags & ATA_PFLAG_UNLOADING) goto out; + /* prep for resume */ + ata_eh_prep_resume(ap); + /* skip EH if possible. */ if (ata_eh_skip_recovery(ap)) ehc->i.action = 0; @@ -1779,6 +1952,11 @@ static int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, if (rc) goto dev_fail; + /* resume devices */ + rc = ata_eh_resume(ap, &dev); + if (rc) + goto dev_fail; + /* configure transfer mode if the port has been reset */ if (ehc->i.flags & ATA_EHI_DID_RESET) { rc = ata_set_mode(ap, &dev); @@ -1788,6 +1966,11 @@ static int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, } } + /* suspend devices */ + rc = ata_eh_suspend(ap, &dev); + if (rc) + goto dev_fail; + goto out; dev_fail: diff --git a/include/linux/libata.h b/include/linux/libata.h index 2aa1398bbd52..363c7501843a 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -131,6 +131,7 @@ enum { ATA_DFLAG_CFG_MASK = (1 << 8) - 1, ATA_DFLAG_PIO = (1 << 8), /* device currently in PIO mode */ + ATA_DFLAG_SUSPENDED = (1 << 9), /* device suspended */ ATA_DFLAG_INIT_MASK = (1 << 16) - 1, ATA_DFLAG_DETACH = (1 << 16), @@ -253,9 +254,13 @@ enum { ATA_EH_REVALIDATE = (1 << 0), ATA_EH_SOFTRESET = (1 << 1), ATA_EH_HARDRESET = (1 << 2), + ATA_EH_SUSPEND = (1 << 3), + ATA_EH_RESUME = (1 << 4), + ATA_EH_PM_FREEZE = (1 << 5), ATA_EH_RESET_MASK = ATA_EH_SOFTRESET | ATA_EH_HARDRESET, - ATA_EH_PERDEV_MASK = ATA_EH_REVALIDATE, + ATA_EH_PERDEV_MASK = ATA_EH_REVALIDATE | ATA_EH_SUSPEND | + ATA_EH_RESUME | ATA_EH_PM_FREEZE, /* ata_eh_info->flags */ ATA_EHI_HOTPLUGGED = (1 << 0), /* could have been hotplugged */ @@ -944,6 +949,11 @@ static inline unsigned int ata_dev_absent(const struct ata_device *dev) return ata_class_absent(dev->class); } +static inline unsigned int ata_dev_ready(const struct ata_device *dev) +{ + return ata_dev_enabled(dev) && !(dev->flags & ATA_DFLAG_SUSPENDED); +} + /* * port helpers */ From d6f26d1f1f1128a896f38a7f8426daed0a1205a2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Jul 2006 16:07:26 +0900 Subject: [PATCH 26/40] [PATCH] libata: reimplement per-dev PM Reimplement per-dev PM. The original implementation directly put the device into suspended mode and didn't synchronize w/ EH operations including hotplug. This patch reimplements ata_scsi_device_suspend() and ata_scsi_device_resume() such that they request EH to perform the respective operations. Both functions synchronize with hotplug such that it doesn't operate on detached devices. Suspend waits for completion but resume just issues request and returns. This allows parallel wake up of devices and thus speeds up system resume. Due to sdev detach synchronization, it's not feasible to separate out EH requesting from sdev handling; thus, ata_device_suspend/resume() are removed and everything is implemented in the respective libata-scsi functions. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- drivers/scsi/libata-core.c | 84 ------------------------- drivers/scsi/libata-scsi.c | 121 +++++++++++++++++++++++++++++++++++-- include/linux/libata.h | 2 - 3 files changed, 115 insertions(+), 92 deletions(-) diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c index ad5cac79627c..51dbc5221934 100644 --- a/drivers/scsi/libata-core.c +++ b/drivers/scsi/libata-core.c @@ -5009,88 +5009,6 @@ int ata_flush_cache(struct ata_device *dev) return 0; } -static int ata_standby_drive(struct ata_device *dev) -{ - unsigned int err_mask; - - err_mask = ata_do_simple_cmd(dev, ATA_CMD_STANDBYNOW1); - if (err_mask) { - ata_dev_printk(dev, KERN_ERR, "failed to standby drive " - "(err_mask=0x%x)\n", err_mask); - return -EIO; - } - - return 0; -} - -static int ata_start_drive(struct ata_device *dev) -{ - unsigned int err_mask; - - err_mask = ata_do_simple_cmd(dev, ATA_CMD_IDLEIMMEDIATE); - if (err_mask) { - ata_dev_printk(dev, KERN_ERR, "failed to start drive " - "(err_mask=0x%x)\n", err_mask); - return -EIO; - } - - return 0; -} - -/** - * ata_device_resume - wakeup a previously suspended devices - * @dev: the device to resume - * - * Kick the drive back into action, by sending it an idle immediate - * command and making sure its transfer mode matches between drive - * and host. - * - */ -int ata_device_resume(struct ata_device *dev) -{ - struct ata_port *ap = dev->ap; - - if (ap->pflags & ATA_PFLAG_SUSPENDED) { - struct ata_device *failed_dev; - - ata_busy_sleep(ap, ATA_TMOUT_BOOT_QUICK, ATA_TMOUT_BOOT); - ata_busy_wait(ap, ATA_BUSY | ATA_DRQ, 200000); - - ap->pflags &= ~ATA_PFLAG_SUSPENDED; - while (ata_set_mode(ap, &failed_dev)) - ata_dev_disable(failed_dev); - } - if (!ata_dev_enabled(dev)) - return 0; - if (dev->class == ATA_DEV_ATA) - ata_start_drive(dev); - - return 0; -} - -/** - * ata_device_suspend - prepare a device for suspend - * @dev: the device to suspend - * @state: target power management state - * - * Flush the cache on the drive, if appropriate, then issue a - * standbynow command. - */ -int ata_device_suspend(struct ata_device *dev, pm_message_t state) -{ - struct ata_port *ap = dev->ap; - - if (!ata_dev_enabled(dev)) - return 0; - if (dev->class == ATA_DEV_ATA) - ata_flush_cache(dev); - - if (state.event != PM_EVENT_FREEZE) - ata_standby_drive(dev); - ap->pflags |= ATA_PFLAG_SUSPENDED; - return 0; -} - /** * ata_port_start - Set port up for dma. * @ap: Port to initialize @@ -5946,8 +5864,6 @@ EXPORT_SYMBOL_GPL(ata_pci_default_filter); EXPORT_SYMBOL_GPL(ata_pci_clear_simplex); #endif /* CONFIG_PCI */ -EXPORT_SYMBOL_GPL(ata_device_suspend); -EXPORT_SYMBOL_GPL(ata_device_resume); EXPORT_SYMBOL_GPL(ata_scsi_device_suspend); EXPORT_SYMBOL_GPL(ata_scsi_device_resume); diff --git a/drivers/scsi/libata-scsi.c b/drivers/scsi/libata-scsi.c index ba3e515f2b9d..7ced41ecde86 100644 --- a/drivers/scsi/libata-scsi.c +++ b/drivers/scsi/libata-scsi.c @@ -397,20 +397,129 @@ void ata_dump_status(unsigned id, struct ata_taskfile *tf) } } -int ata_scsi_device_resume(struct scsi_device *sdev) +/** + * ata_scsi_device_suspend - suspend ATA device associated with sdev + * @sdev: the SCSI device to suspend + * @state: target power management state + * + * Request suspend EH action on the ATA device associated with + * @sdev and wait for the operation to complete. + * + * LOCKING: + * Kernel thread context (may sleep). + * + * RETURNS: + * 0 on success, -errno otherwise. + */ +int ata_scsi_device_suspend(struct scsi_device *sdev, pm_message_t state) { struct ata_port *ap = ata_shost_to_port(sdev->host); - struct ata_device *dev = __ata_scsi_find_dev(ap, sdev); + struct ata_device *dev = ata_scsi_find_dev(ap, sdev); + unsigned long flags; + unsigned int action; + int rc = 0; + + if (!dev) + goto out; + + spin_lock_irqsave(ap->lock, flags); + + /* wait for the previous resume to complete */ + while (dev->flags & ATA_DFLAG_SUSPENDED) { + spin_unlock_irqrestore(ap->lock, flags); + ata_port_wait_eh(ap); + spin_lock_irqsave(ap->lock, flags); + } + + /* if @sdev is already detached, nothing to do */ + if (sdev->sdev_state == SDEV_OFFLINE || + sdev->sdev_state == SDEV_CANCEL || sdev->sdev_state == SDEV_DEL) + goto out_unlock; + + /* request suspend */ + action = ATA_EH_SUSPEND; + if (state.event != PM_EVENT_SUSPEND) + action |= ATA_EH_PM_FREEZE; + ap->eh_info.dev_action[dev->devno] |= action; + ap->eh_info.flags |= ATA_EHI_QUIET; + ata_port_schedule_eh(ap); + + spin_unlock_irqrestore(ap->lock, flags); + + /* wait for EH to do the job */ + ata_port_wait_eh(ap); + + spin_lock_irqsave(ap->lock, flags); + + /* If @sdev is still attached but the associated ATA device + * isn't suspended, the operation failed. + */ + if (sdev->sdev_state != SDEV_OFFLINE && + sdev->sdev_state != SDEV_CANCEL && sdev->sdev_state != SDEV_DEL && + !(dev->flags & ATA_DFLAG_SUSPENDED)) + rc = -EIO; - return ata_device_resume(dev); + out_unlock: + spin_unlock_irqrestore(ap->lock, flags); + out: + if (rc == 0) + sdev->sdev_gendev.power.power_state = state; + return rc; } -int ata_scsi_device_suspend(struct scsi_device *sdev, pm_message_t state) +/** + * ata_scsi_device_resume - resume ATA device associated with sdev + * @sdev: the SCSI device to resume + * + * Request resume EH action on the ATA device associated with + * @sdev and return immediately. This enables parallel + * wakeup/spinup of devices. + * + * LOCKING: + * Kernel thread context (may sleep). + * + * RETURNS: + * 0. + */ +int ata_scsi_device_resume(struct scsi_device *sdev) { struct ata_port *ap = ata_shost_to_port(sdev->host); - struct ata_device *dev = __ata_scsi_find_dev(ap, sdev); + struct ata_device *dev = ata_scsi_find_dev(ap, sdev); + struct ata_eh_info *ehi = &ap->eh_info; + unsigned long flags; + unsigned int action; + + if (!dev) + goto out; + + spin_lock_irqsave(ap->lock, flags); + + /* if @sdev is already detached, nothing to do */ + if (sdev->sdev_state == SDEV_OFFLINE || + sdev->sdev_state == SDEV_CANCEL || sdev->sdev_state == SDEV_DEL) + goto out_unlock; - return ata_device_suspend(dev, state); + /* request resume */ + action = ATA_EH_RESUME; + if (sdev->sdev_gendev.power.power_state.event == PM_EVENT_SUSPEND) + __ata_ehi_hotplugged(ehi); + else + action |= ATA_EH_PM_FREEZE | ATA_EH_SOFTRESET; + ehi->dev_action[dev->devno] |= action; + + /* We don't want autopsy and verbose EH messages. Disable + * those if we're the only device on this link. + */ + if (ata_port_max_devices(ap) == 1) + ehi->flags |= ATA_EHI_NO_AUTOPSY | ATA_EHI_QUIET; + + ata_port_schedule_eh(ap); + + out_unlock: + spin_unlock_irqrestore(ap->lock, flags); + out: + sdev->sdev_gendev.power.power_state = PMSG_ON; + return 0; } /** diff --git a/include/linux/libata.h b/include/linux/libata.h index 363c7501843a..5ac262608199 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -687,8 +687,6 @@ extern int ata_port_online(struct ata_port *ap); extern int ata_port_offline(struct ata_port *ap); extern int ata_scsi_device_resume(struct scsi_device *); extern int ata_scsi_device_suspend(struct scsi_device *, pm_message_t state); -extern int ata_device_resume(struct ata_device *); -extern int ata_device_suspend(struct ata_device *, pm_message_t state); extern int ata_ratelimit(void); extern unsigned int ata_busy_sleep(struct ata_port *ap, unsigned long timeout_pat, From 500530f652f9e5dabe7571b018dec47742ce0f16 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Jul 2006 16:07:27 +0900 Subject: [PATCH 27/40] [PATCH] libata: reimplement controller-wide PM Reimplement controller-wide PM. ata_host_set_suspend/resume() are defined to suspend and resume a host_set. While suspended, EHs for all ports in the host_set are pegged using ATA_FLAG_SUSPENDED and frozen. Because SCSI device hotplug is done asynchronously against the rest of libata EH and the same mutex is used when adding new device, suspend cannot wait for hotplug to complete. So, if SCSI device hotplug is in progress, suspend fails with -EBUSY. In most cases, host_set resume is followed by device resume. As each resume operation requires a reset, a single host_set-wide resume operation may result in multiple resets. To avoid this, resume waits upto 1 second giving PM to request resume for devices. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- drivers/scsi/libata-core.c | 165 +++++++++++++++++++++++++++++++++++-- drivers/scsi/libata-eh.c | 128 +++++++++++++++++++++++++++- include/linux/libata.h | 12 +++ 3 files changed, 298 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c index 51dbc5221934..fa65b990f8b0 100644 --- a/drivers/scsi/libata-core.c +++ b/drivers/scsi/libata-core.c @@ -5009,6 +5009,122 @@ int ata_flush_cache(struct ata_device *dev) return 0; } +static int ata_host_set_request_pm(struct ata_host_set *host_set, + pm_message_t mesg, unsigned int action, + unsigned int ehi_flags, int wait) +{ + unsigned long flags; + int i, rc; + + for (i = 0; i < host_set->n_ports; i++) { + struct ata_port *ap = host_set->ports[i]; + + /* Previous resume operation might still be in + * progress. Wait for PM_PENDING to clear. + */ + if (ap->pflags & ATA_PFLAG_PM_PENDING) { + ata_port_wait_eh(ap); + WARN_ON(ap->pflags & ATA_PFLAG_PM_PENDING); + } + + /* request PM ops to EH */ + spin_lock_irqsave(ap->lock, flags); + + ap->pm_mesg = mesg; + if (wait) { + rc = 0; + ap->pm_result = &rc; + } + + ap->pflags |= ATA_PFLAG_PM_PENDING; + ap->eh_info.action |= action; + ap->eh_info.flags |= ehi_flags; + + ata_port_schedule_eh(ap); + + spin_unlock_irqrestore(ap->lock, flags); + + /* wait and check result */ + if (wait) { + ata_port_wait_eh(ap); + WARN_ON(ap->pflags & ATA_PFLAG_PM_PENDING); + if (rc) + return rc; + } + } + + return 0; +} + +/** + * ata_host_set_suspend - suspend host_set + * @host_set: host_set to suspend + * @mesg: PM message + * + * Suspend @host_set. Actual operation is performed by EH. This + * function requests EH to perform PM operations and waits for EH + * to finish. + * + * LOCKING: + * Kernel thread context (may sleep). + * + * RETURNS: + * 0 on success, -errno on failure. + */ +int ata_host_set_suspend(struct ata_host_set *host_set, pm_message_t mesg) +{ + int i, j, rc; + + rc = ata_host_set_request_pm(host_set, mesg, 0, ATA_EHI_QUIET, 1); + if (rc) + goto fail; + + /* EH is quiescent now. Fail if we have any ready device. + * This happens if hotplug occurs between completion of device + * suspension and here. + */ + for (i = 0; i < host_set->n_ports; i++) { + struct ata_port *ap = host_set->ports[i]; + + for (j = 0; j < ATA_MAX_DEVICES; j++) { + struct ata_device *dev = &ap->device[j]; + + if (ata_dev_ready(dev)) { + ata_port_printk(ap, KERN_WARNING, + "suspend failed, device %d " + "still active\n", dev->devno); + rc = -EBUSY; + goto fail; + } + } + } + + host_set->dev->power.power_state = mesg; + return 0; + + fail: + ata_host_set_resume(host_set); + return rc; +} + +/** + * ata_host_set_resume - resume host_set + * @host_set: host_set to resume + * + * Resume @host_set. Actual operation is performed by EH. This + * function requests EH to perform PM operations and returns. + * Note that all resume operations are performed parallely. + * + * LOCKING: + * Kernel thread context (may sleep). + */ +void ata_host_set_resume(struct ata_host_set *host_set) +{ + ata_host_set_request_pm(host_set, PMSG_ON, ATA_EH_SOFTRESET, + ATA_EHI_NO_AUTOPSY | ATA_EHI_QUIET, 0); + host_set->dev->power.power_state = PMSG_ON; +} + /** * ata_port_start - Set port up for dma. * @ap: Port to initialize @@ -5651,20 +5767,55 @@ int pci_test_config_bits(struct pci_dev *pdev, const struct pci_bits *bits) return (tmp == bits->val) ? 1 : 0; } -int ata_pci_device_suspend(struct pci_dev *pdev, pm_message_t state) +void ata_pci_device_do_suspend(struct pci_dev *pdev, pm_message_t state) { pci_save_state(pdev); - pci_disable_device(pdev); - pci_set_power_state(pdev, PCI_D3hot); - return 0; + + if (state.event == PM_EVENT_SUSPEND) { + pci_disable_device(pdev); + pci_set_power_state(pdev, PCI_D3hot); + } } -int ata_pci_device_resume(struct pci_dev *pdev) +void ata_pci_device_do_resume(struct pci_dev *pdev) { pci_set_power_state(pdev, PCI_D0); pci_restore_state(pdev); pci_enable_device(pdev); pci_set_master(pdev); +} + +int ata_pci_device_suspend(struct pci_dev *pdev, pm_message_t state) +{ + struct ata_host_set *host_set = dev_get_drvdata(&pdev->dev); + int rc = 0; + + rc = ata_host_set_suspend(host_set, state); + if (rc) + return rc; + + if (host_set->next) { + rc = ata_host_set_suspend(host_set->next, state); + if (rc) { + ata_host_set_resume(host_set); + return rc; + } + } + + ata_pci_device_do_suspend(pdev, state); + + return 0; +} + +int ata_pci_device_resume(struct pci_dev *pdev) +{ + struct ata_host_set *host_set = dev_get_drvdata(&pdev->dev); + + ata_pci_device_do_resume(pdev); + ata_host_set_resume(host_set); + if (host_set->next) + ata_host_set_resume(host_set->next); + return 0; } #endif /* CONFIG_PCI */ @@ -5844,6 +5995,8 @@ EXPORT_SYMBOL_GPL(sata_scr_write); EXPORT_SYMBOL_GPL(sata_scr_write_flush); EXPORT_SYMBOL_GPL(ata_port_online); EXPORT_SYMBOL_GPL(ata_port_offline); +EXPORT_SYMBOL_GPL(ata_host_set_suspend); +EXPORT_SYMBOL_GPL(ata_host_set_resume); EXPORT_SYMBOL_GPL(ata_id_string); EXPORT_SYMBOL_GPL(ata_id_c_string); EXPORT_SYMBOL_GPL(ata_scsi_simulate); @@ -5858,6 +6011,8 @@ EXPORT_SYMBOL_GPL(ata_pci_host_stop); EXPORT_SYMBOL_GPL(ata_pci_init_native_mode); EXPORT_SYMBOL_GPL(ata_pci_init_one); EXPORT_SYMBOL_GPL(ata_pci_remove_one); +EXPORT_SYMBOL_GPL(ata_pci_device_do_suspend); +EXPORT_SYMBOL_GPL(ata_pci_device_do_resume); EXPORT_SYMBOL_GPL(ata_pci_device_suspend); EXPORT_SYMBOL_GPL(ata_pci_device_resume); EXPORT_SYMBOL_GPL(ata_pci_default_filter); diff --git a/drivers/scsi/libata-eh.c b/drivers/scsi/libata-eh.c index b9df49a36214..4b6aa30f4d68 100644 --- a/drivers/scsi/libata-eh.c +++ b/drivers/scsi/libata-eh.c @@ -47,6 +47,8 @@ static void __ata_port_freeze(struct ata_port *ap); static void ata_eh_finish(struct ata_port *ap); +static void ata_eh_handle_port_suspend(struct ata_port *ap); +static void ata_eh_handle_port_resume(struct ata_port *ap); static void ata_ering_record(struct ata_ering *ering, int is_io, unsigned int err_mask) @@ -262,6 +264,9 @@ void ata_scsi_error(struct Scsi_Host *host) repeat: /* invoke error handler */ if (ap->ops->error_handler) { + /* process port resume request */ + ata_eh_handle_port_resume(ap); + /* fetch & clear EH info */ spin_lock_irqsave(ap->lock, flags); @@ -274,12 +279,15 @@ void ata_scsi_error(struct Scsi_Host *host) spin_unlock_irqrestore(ap->lock, flags); - /* invoke EH. if unloading, just finish failed qcs */ - if (!(ap->pflags & ATA_PFLAG_UNLOADING)) + /* invoke EH, skip if unloading or suspended */ + if (!(ap->pflags & (ATA_PFLAG_UNLOADING | ATA_PFLAG_SUSPENDED))) ap->ops->error_handler(ap); else ata_eh_finish(ap); + /* process port suspend request */ + ata_eh_handle_port_suspend(ap); + /* Exception might have happend after ->error_handler * recovered the port but before this point. Repeat * EH in such case. @@ -2101,3 +2109,119 @@ void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset, ata_eh_recover(ap, prereset, softreset, hardreset, postreset); ata_eh_finish(ap); } + +/** + * ata_eh_handle_port_suspend - perform port suspend operation + * @ap: port to suspend + * + * Suspend @ap. + * + * LOCKING: + * Kernel thread context (may sleep). + */ +static void ata_eh_handle_port_suspend(struct ata_port *ap) +{ + unsigned long flags; + int rc = 0; + + /* are we suspending? */ + spin_lock_irqsave(ap->lock, flags); + if (!(ap->pflags & ATA_PFLAG_PM_PENDING) || + ap->pm_mesg.event == PM_EVENT_ON) { + spin_unlock_irqrestore(ap->lock, flags); + return; + } + spin_unlock_irqrestore(ap->lock, flags); + + WARN_ON(ap->pflags & ATA_PFLAG_SUSPENDED); + + /* suspend */ + ata_eh_freeze_port(ap); + + if (ap->ops->port_suspend) + rc = ap->ops->port_suspend(ap, ap->pm_mesg); + + /* report result */ + spin_lock_irqsave(ap->lock, flags); + + ap->pflags &= ~ATA_PFLAG_PM_PENDING; + if (rc == 0) + ap->pflags |= ATA_PFLAG_SUSPENDED; + else + ata_port_schedule_eh(ap); + + if (ap->pm_result) { + *ap->pm_result = rc; + ap->pm_result = NULL; + } + + spin_unlock_irqrestore(ap->lock, flags); + + return; +} + +/** + * ata_eh_handle_port_resume - perform port resume operation + * @ap: port to resume + * + * Resume @ap. + * + * This function also waits upto one second until all devices + * hanging off this port requests resume EH action. This is to + * prevent invoking EH and thus reset multiple times on resume. + * + * On DPM resume, where some of devices might not be resumed + * together, this may delay port resume upto one second, but such + * DPM resumes are rare and 1 sec delay isn't too bad. + * + * LOCKING: + * Kernel thread context (may sleep). + */ +static void ata_eh_handle_port_resume(struct ata_port *ap) +{ + unsigned long timeout; + unsigned long flags; + int i, rc = 0; + + /* are we resuming? */ + spin_lock_irqsave(ap->lock, flags); + if (!(ap->pflags & ATA_PFLAG_PM_PENDING) || + ap->pm_mesg.event != PM_EVENT_ON) { + spin_unlock_irqrestore(ap->lock, flags); + return; + } + spin_unlock_irqrestore(ap->lock, flags); + + /* spurious? */ + if (!(ap->pflags & ATA_PFLAG_SUSPENDED)) + goto done; + + if (ap->ops->port_resume) + rc = ap->ops->port_resume(ap); + + /* give devices time to request EH */ + timeout = jiffies + HZ; /* 1s max */ + while (1) { + for (i = 0; i < ATA_MAX_DEVICES; i++) { + struct ata_device *dev = &ap->device[i]; + unsigned int action = ata_eh_dev_action(dev); + + if ((dev->flags & ATA_DFLAG_SUSPENDED) && + !(action & ATA_EH_RESUME)) + break; + } + + if (i == ATA_MAX_DEVICES || time_after(jiffies, timeout)) + break; + msleep(10); + } + + done: + spin_lock_irqsave(ap->lock, flags); + ap->pflags &= ~(ATA_PFLAG_PM_PENDING | ATA_PFLAG_SUSPENDED); + if (ap->pm_result) { + *ap->pm_result = rc; + ap->pm_result = NULL; + } + spin_unlock_irqrestore(ap->lock, flags); +} diff --git a/include/linux/libata.h b/include/linux/libata.h index 5ac262608199..6cc497a2b6da 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -182,6 +182,7 @@ enum { ATA_PFLAG_FLUSH_PORT_TASK = (1 << 16), /* flush port task */ ATA_PFLAG_SUSPENDED = (1 << 17), /* port is suspended (power) */ + ATA_PFLAG_PM_PENDING = (1 << 18), /* PM operation pending */ /* struct ata_queued_cmd flags */ ATA_QCFLAG_ACTIVE = (1 << 0), /* cmd not yet ack'd to scsi lyer */ @@ -549,6 +550,9 @@ struct ata_port { struct list_head eh_done_q; wait_queue_head_t eh_wait_q; + pm_message_t pm_mesg; + int *pm_result; + void *private_data; u8 sector_buf[ATA_SECT_SIZE]; /* owned by EH */ @@ -603,6 +607,9 @@ struct ata_port_operations { void (*scr_write) (struct ata_port *ap, unsigned int sc_reg, u32 val); + int (*port_suspend) (struct ata_port *ap, pm_message_t mesg); + int (*port_resume) (struct ata_port *ap); + int (*port_start) (struct ata_port *ap); void (*port_stop) (struct ata_port *ap); @@ -667,6 +674,8 @@ extern void ata_std_ports(struct ata_ioports *ioaddr); extern int ata_pci_init_one (struct pci_dev *pdev, struct ata_port_info **port_info, unsigned int n_ports); extern void ata_pci_remove_one (struct pci_dev *pdev); +extern void ata_pci_device_do_suspend(struct pci_dev *pdev, pm_message_t state); +extern void ata_pci_device_do_resume(struct pci_dev *pdev); extern int ata_pci_device_suspend(struct pci_dev *pdev, pm_message_t state); extern int ata_pci_device_resume(struct pci_dev *pdev); extern int ata_pci_clear_simplex(struct pci_dev *pdev); @@ -687,6 +696,9 @@ extern int ata_port_online(struct ata_port *ap); extern int ata_port_offline(struct ata_port *ap); extern int ata_scsi_device_resume(struct scsi_device *); extern int ata_scsi_device_suspend(struct scsi_device *, pm_message_t state); +extern int ata_host_set_suspend(struct ata_host_set *host_set, + pm_message_t mesg); +extern void ata_host_set_resume(struct ata_host_set *host_set); extern int ata_ratelimit(void); extern unsigned int ata_busy_sleep(struct ata_port *ap, unsigned long timeout_pat, From 3d8ec91352099b32a400f1952112dc076da28106 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Jul 2006 16:07:27 +0900 Subject: [PATCH 28/40] [PATCH] sata_sil: separate out sil_init_controller() Separate out controller initialization from sil_init_one() into sil_init_controller(). This will be used by resume. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- drivers/scsi/sata_sil.c | 86 +++++++++++++++++++++++------------------ 1 file changed, 48 insertions(+), 38 deletions(-) diff --git a/drivers/scsi/sata_sil.c b/drivers/scsi/sata_sil.c index 70dbfb8a7d51..6033f796397f 100644 --- a/drivers/scsi/sata_sil.c +++ b/drivers/scsi/sata_sil.c @@ -561,6 +561,52 @@ static void sil_dev_config(struct ata_port *ap, struct ata_device *dev) } } +static void sil_init_controller(struct pci_dev *pdev, + int n_ports, unsigned long host_flags, + void __iomem *mmio_base) +{ + u8 cls; + u32 tmp; + int i; + + /* Initialize FIFO PCI bus arbitration */ + cls = sil_get_device_cache_line(pdev); + if (cls) { + cls >>= 3; + cls++; /* cls = (line_size/8)+1 */ + for (i = 0; i < n_ports; i++) + writew(cls << 8 | cls, + mmio_base + sil_port[i].fifo_cfg); + } else + dev_printk(KERN_WARNING, &pdev->dev, + "cache line size not set. Driver may not function\n"); + + /* Apply R_ERR on DMA activate FIS errata workaround */ + if (host_flags & SIL_FLAG_RERR_ON_DMA_ACT) { + int cnt; + + for (i = 0, cnt = 0; i < n_ports; i++) { + tmp = readl(mmio_base + sil_port[i].sfis_cfg); + if ((tmp & 0x3) != 0x01) + continue; + if (!cnt) + dev_printk(KERN_INFO, &pdev->dev, + "Applying R_ERR on DMA activate " + "FIS errata fix\n"); + writel(tmp & ~0x3, mmio_base + sil_port[i].sfis_cfg); + cnt++; + } + } + + if (n_ports == 4) { + /* flip the magic "make 4 ports work" bit */ + tmp = readl(mmio_base + sil_port[2].bmdma); + if ((tmp & SIL_INTR_STEERING) == 0) + writel(tmp | SIL_INTR_STEERING, + mmio_base + sil_port[2].bmdma); + } +} + static int sil_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) { static int printed_version; @@ -570,8 +616,6 @@ static int sil_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) int rc; unsigned int i; int pci_dev_busy = 0; - u32 tmp; - u8 cls; if (!printed_version++) dev_printk(KERN_DEBUG, &pdev->dev, "version " DRV_VERSION "\n"); @@ -630,42 +674,8 @@ static int sil_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) ata_std_ports(&probe_ent->port[i]); } - /* Initialize FIFO PCI bus arbitration */ - cls = sil_get_device_cache_line(pdev); - if (cls) { - cls >>= 3; - cls++; /* cls = (line_size/8)+1 */ - for (i = 0; i < probe_ent->n_ports; i++) - writew(cls << 8 | cls, - mmio_base + sil_port[i].fifo_cfg); - } else - dev_printk(KERN_WARNING, &pdev->dev, - "cache line size not set. Driver may not function\n"); - - /* Apply R_ERR on DMA activate FIS errata workaround */ - if (probe_ent->host_flags & SIL_FLAG_RERR_ON_DMA_ACT) { - int cnt; - - for (i = 0, cnt = 0; i < probe_ent->n_ports; i++) { - tmp = readl(mmio_base + sil_port[i].sfis_cfg); - if ((tmp & 0x3) != 0x01) - continue; - if (!cnt) - dev_printk(KERN_INFO, &pdev->dev, - "Applying R_ERR on DMA activate " - "FIS errata fix\n"); - writel(tmp & ~0x3, mmio_base + sil_port[i].sfis_cfg); - cnt++; - } - } - - if (ent->driver_data == sil_3114) { - /* flip the magic "make 4 ports work" bit */ - tmp = readl(mmio_base + sil_port[2].bmdma); - if ((tmp & SIL_INTR_STEERING) == 0) - writel(tmp | SIL_INTR_STEERING, - mmio_base + sil_port[2].bmdma); - } + sil_init_controller(pdev, probe_ent->n_ports, probe_ent->host_flags, + mmio_base); pci_set_master(pdev); From afb5a7cb84b1ea8b6045945e3d288303e6b71336 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Jul 2006 16:07:27 +0900 Subject: [PATCH 29/40] [PATCH] sata_sil: add suspend/sleep support Add suspend/sleep support. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- drivers/scsi/sata_sil.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/scsi/sata_sil.c b/drivers/scsi/sata_sil.c index 6033f796397f..d0a85073ebf7 100644 --- a/drivers/scsi/sata_sil.c +++ b/drivers/scsi/sata_sil.c @@ -109,6 +109,7 @@ enum { }; static int sil_init_one (struct pci_dev *pdev, const struct pci_device_id *ent); +static int sil_pci_device_resume(struct pci_dev *pdev); static void sil_dev_config(struct ata_port *ap, struct ata_device *dev); static u32 sil_scr_read (struct ata_port *ap, unsigned int sc_reg); static void sil_scr_write (struct ata_port *ap, unsigned int sc_reg, u32 val); @@ -160,6 +161,8 @@ static struct pci_driver sil_pci_driver = { .id_table = sil_pci_tbl, .probe = sil_init_one, .remove = ata_pci_remove_one, + .suspend = ata_pci_device_suspend, + .resume = sil_pci_device_resume, }; static struct scsi_host_template sil_sht = { @@ -178,6 +181,8 @@ static struct scsi_host_template sil_sht = { .slave_configure = ata_scsi_slave_config, .slave_destroy = ata_scsi_slave_destroy, .bios_param = ata_std_bios_param, + .suspend = ata_scsi_device_suspend, + .resume = ata_scsi_device_resume, }; static const struct ata_port_operations sil_ops = { @@ -695,6 +700,18 @@ static int sil_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) return rc; } +static int sil_pci_device_resume(struct pci_dev *pdev) +{ + struct ata_host_set *host_set = dev_get_drvdata(&pdev->dev); + + ata_pci_device_do_resume(pdev); + sil_init_controller(pdev, host_set->n_ports, host_set->ports[0]->flags, + host_set->mmio_base); + ata_host_set_resume(host_set); + + return 0; +} + static int __init sil_init(void) { return pci_module_init(&sil_pci_driver); From 2a41a6108d93a62910d1c36913d83a79b550b40a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Jul 2006 16:07:27 +0900 Subject: [PATCH 30/40] [PATCH] sata_sil24: separate out sil24_init_controller() Separate out controller initialization from sil24_init_one() into sil24_init_controller(). This will be used by resume. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- drivers/scsi/sata_sil24.c | 107 ++++++++++++++++++++++---------------- 1 file changed, 62 insertions(+), 45 deletions(-) diff --git a/drivers/scsi/sata_sil24.c b/drivers/scsi/sata_sil24.c index 04ae1ef25484..1850c2e3aa21 100644 --- a/drivers/scsi/sata_sil24.c +++ b/drivers/scsi/sata_sil24.c @@ -988,6 +988,64 @@ static void sil24_host_stop(struct ata_host_set *host_set) kfree(hpriv); } +static void sil24_init_controller(struct pci_dev *pdev, int n_ports, + unsigned long host_flags, + void __iomem *host_base, + void __iomem *port_base) +{ + u32 tmp; + int i; + + /* GPIO off */ + writel(0, host_base + HOST_FLASH_CMD); + + /* clear global reset & mask interrupts during initialization */ + writel(0, host_base + HOST_CTRL); + + /* init ports */ + for (i = 0; i < n_ports; i++) { + void __iomem *port = port_base + i * PORT_REGS_SIZE; + + /* Initial PHY setting */ + writel(0x20c, port + PORT_PHY_CFG); + + /* Clear port RST */ + tmp = readl(port + PORT_CTRL_STAT); + if (tmp & PORT_CS_PORT_RST) { + writel(PORT_CS_PORT_RST, port + PORT_CTRL_CLR); + tmp = ata_wait_register(port + PORT_CTRL_STAT, + PORT_CS_PORT_RST, + PORT_CS_PORT_RST, 10, 100); + if (tmp & PORT_CS_PORT_RST) + dev_printk(KERN_ERR, &pdev->dev, + "failed to clear port RST\n"); + } + + /* Configure IRQ WoC */ + if (host_flags & SIL24_FLAG_PCIX_IRQ_WOC) + writel(PORT_CS_IRQ_WOC, port + PORT_CTRL_STAT); + else + writel(PORT_CS_IRQ_WOC, port + PORT_CTRL_CLR); + + /* Zero error counters. */ + writel(0x8000, port + PORT_DECODE_ERR_THRESH); + writel(0x8000, port + PORT_CRC_ERR_THRESH); + writel(0x8000, port + PORT_HSHK_ERR_THRESH); + writel(0x0000, port + PORT_DECODE_ERR_CNT); + writel(0x0000, port + PORT_CRC_ERR_CNT); + writel(0x0000, port + PORT_HSHK_ERR_CNT); + + /* Always use 64bit activation */ + writel(PORT_CS_32BIT_ACTV, port + PORT_CTRL_CLR); + + /* Clear port multiplier enable and resume bits */ + writel(PORT_CS_PM_EN | PORT_CS_RESUME, port + PORT_CTRL_CLR); + } + + /* Turn on interrupts */ + writel(IRQ_STAT_4PORTS, host_base + HOST_CTRL); +} + static int sil24_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { static int printed_version = 0; @@ -1076,9 +1134,6 @@ static int sil24_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) } } - /* GPIO off */ - writel(0, host_base + HOST_FLASH_CMD); - /* Apply workaround for completion IRQ loss on PCI-X errata */ if (probe_ent->host_flags & SIL24_FLAG_PCIX_IRQ_WOC) { tmp = readl(host_base + HOST_CTRL); @@ -1090,56 +1145,18 @@ static int sil24_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) probe_ent->host_flags &= ~SIL24_FLAG_PCIX_IRQ_WOC; } - /* clear global reset & mask interrupts during initialization */ - writel(0, host_base + HOST_CTRL); - for (i = 0; i < probe_ent->n_ports; i++) { - void __iomem *port = port_base + i * PORT_REGS_SIZE; - unsigned long portu = (unsigned long)port; + unsigned long portu = + (unsigned long)port_base + i * PORT_REGS_SIZE; probe_ent->port[i].cmd_addr = portu; probe_ent->port[i].scr_addr = portu + PORT_SCONTROL; ata_std_ports(&probe_ent->port[i]); - - /* Initial PHY setting */ - writel(0x20c, port + PORT_PHY_CFG); - - /* Clear port RST */ - tmp = readl(port + PORT_CTRL_STAT); - if (tmp & PORT_CS_PORT_RST) { - writel(PORT_CS_PORT_RST, port + PORT_CTRL_CLR); - tmp = ata_wait_register(port + PORT_CTRL_STAT, - PORT_CS_PORT_RST, - PORT_CS_PORT_RST, 10, 100); - if (tmp & PORT_CS_PORT_RST) - dev_printk(KERN_ERR, &pdev->dev, - "failed to clear port RST\n"); - } - - /* Configure IRQ WoC */ - if (probe_ent->host_flags & SIL24_FLAG_PCIX_IRQ_WOC) - writel(PORT_CS_IRQ_WOC, port + PORT_CTRL_STAT); - else - writel(PORT_CS_IRQ_WOC, port + PORT_CTRL_CLR); - - /* Zero error counters. */ - writel(0x8000, port + PORT_DECODE_ERR_THRESH); - writel(0x8000, port + PORT_CRC_ERR_THRESH); - writel(0x8000, port + PORT_HSHK_ERR_THRESH); - writel(0x0000, port + PORT_DECODE_ERR_CNT); - writel(0x0000, port + PORT_CRC_ERR_CNT); - writel(0x0000, port + PORT_HSHK_ERR_CNT); - - /* Always use 64bit activation */ - writel(PORT_CS_32BIT_ACTV, port + PORT_CTRL_CLR); - - /* Clear port multiplier enable and resume bits */ - writel(PORT_CS_PM_EN | PORT_CS_RESUME, port + PORT_CTRL_CLR); } - /* Turn on interrupts */ - writel(IRQ_STAT_4PORTS, host_base + HOST_CTRL); + sil24_init_controller(pdev, probe_ent->n_ports, probe_ent->host_flags, + host_base, port_base); pci_set_master(pdev); From d2298dca9a2ec8a8ff23e251d8aaab00e9d229f9 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Jul 2006 16:07:27 +0900 Subject: [PATCH 31/40] [PATCH] sata_sil24: add suspend/sleep support Add suspend/sleep support. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- drivers/scsi/sata_sil24.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/drivers/scsi/sata_sil24.c b/drivers/scsi/sata_sil24.c index 1850c2e3aa21..2e0f4a4076af 100644 --- a/drivers/scsi/sata_sil24.c +++ b/drivers/scsi/sata_sil24.c @@ -92,6 +92,7 @@ enum { HOST_CTRL_STOP = (1 << 18), /* latched PCI STOP */ HOST_CTRL_DEVSEL = (1 << 19), /* latched PCI DEVSEL */ HOST_CTRL_REQ64 = (1 << 20), /* latched PCI REQ64 */ + HOST_CTRL_GLOBAL_RST = (1 << 31), /* global reset */ /* * Port registers @@ -338,6 +339,7 @@ static int sil24_port_start(struct ata_port *ap); static void sil24_port_stop(struct ata_port *ap); static void sil24_host_stop(struct ata_host_set *host_set); static int sil24_init_one(struct pci_dev *pdev, const struct pci_device_id *ent); +static int sil24_pci_device_resume(struct pci_dev *pdev); static const struct pci_device_id sil24_pci_tbl[] = { { 0x1095, 0x3124, PCI_ANY_ID, PCI_ANY_ID, 0, 0, BID_SIL3124 }, @@ -353,6 +355,8 @@ static struct pci_driver sil24_pci_driver = { .id_table = sil24_pci_tbl, .probe = sil24_init_one, .remove = ata_pci_remove_one, /* safe? */ + .suspend = ata_pci_device_suspend, + .resume = sil24_pci_device_resume, }; static struct scsi_host_template sil24_sht = { @@ -372,6 +376,8 @@ static struct scsi_host_template sil24_sht = { .slave_configure = ata_scsi_slave_config, .slave_destroy = ata_scsi_slave_destroy, .bios_param = ata_std_bios_param, + .suspend = ata_scsi_device_suspend, + .resume = ata_scsi_device_resume, }; static const struct ata_port_operations sil24_ops = { @@ -1179,6 +1185,25 @@ static int sil24_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) return rc; } +static int sil24_pci_device_resume(struct pci_dev *pdev) +{ + struct ata_host_set *host_set = dev_get_drvdata(&pdev->dev); + struct sil24_host_priv *hpriv = host_set->private_data; + + ata_pci_device_do_resume(pdev); + + if (pdev->dev.power.power_state.event == PM_EVENT_SUSPEND) + writel(HOST_CTRL_GLOBAL_RST, hpriv->host_base + HOST_CTRL); + + sil24_init_controller(pdev, host_set->n_ports, + host_set->ports[0]->flags, + hpriv->host_base, hpriv->port_base); + + ata_host_set_resume(host_set); + + return 0; +} + static int __init sil24_init(void) { return pci_module_init(&sil24_pci_driver); From 5afc81427f79193ed55edd7184ba2b6ce119a649 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 27 Jun 2006 14:51:25 +0200 Subject: [PATCH 32/40] [PATCH] libata-core.c: restore configuration boot messages in ata_dev_configure(), v2 This one looks better, IMHO. This restores the default libata configuration messages printed during booting. Signed-off-by: Signed-off-by: Jeff Garzik --- drivers/scsi/libata-core.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c index fa65b990f8b0..386e5f21e191 100644 --- a/drivers/scsi/libata-core.c +++ b/drivers/scsi/libata-core.c @@ -1417,7 +1417,7 @@ int ata_dev_configure(struct ata_device *dev, int print_info) ata_dev_config_ncq(dev, ncq_desc, sizeof(ncq_desc)); /* print device info to dmesg */ - if (ata_msg_info(ap)) + if (ata_msg_drv(ap) && print_info) ata_dev_printk(dev, KERN_INFO, "ATA-%d, " "max %s, %Lu sectors: %s %s\n", ata_id_major_version(id), @@ -1440,7 +1440,7 @@ int ata_dev_configure(struct ata_device *dev, int print_info) } /* print device info to dmesg */ - if (ata_msg_info(ap)) + if (ata_msg_drv(ap) && print_info) ata_dev_printk(dev, KERN_INFO, "ATA-%d, " "max %s, %Lu sectors: CHS %u/%u/%u\n", ata_id_major_version(id), @@ -1452,7 +1452,7 @@ int ata_dev_configure(struct ata_device *dev, int print_info) if (dev->id[59] & 0x100) { dev->multi_count = dev->id[59] & 0xff; - if (ata_msg_info(ap)) + if (ata_msg_drv(ap) && print_info) ata_dev_printk(dev, KERN_INFO, "ata%u: dev %u multi count %u\n", ap->id, dev->devno, dev->multi_count); @@ -1481,7 +1481,7 @@ int ata_dev_configure(struct ata_device *dev, int print_info) } /* print device info to dmesg */ - if (ata_msg_info(ap)) + if (ata_msg_drv(ap) && print_info) ata_dev_printk(dev, KERN_INFO, "ATAPI, max %s%s\n", ata_mode_string(xfer_mask), cdb_intr_string); @@ -1491,7 +1491,7 @@ int ata_dev_configure(struct ata_device *dev, int print_info) /* limit bridge transfers to udma5, 200 sectors */ if (ata_dev_knobble(dev)) { - if (ata_msg_info(ap)) + if (ata_msg_drv(ap) && print_info) ata_dev_printk(dev, KERN_INFO, "applying bridge limits\n"); dev->udma_mask &= ATA_UDMA5; From 9545b5781cfa822ea1528bc65965dbace0c39a5d Mon Sep 17 00:00:00 2001 From: root Date: Wed, 5 Jul 2006 22:58:20 -0400 Subject: [PATCH 33/40] [PATCH] ahci: Ensure that we don't grab both functions When we force the chip into dual fn mode so we get PATA and AHCI we must be sure we don't then do anything dumb like try and grab both with the AHCI driver. Signed-off-by: Alan Cox Signed-off-by: Jeff Garzik --- drivers/scsi/ahci.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/ahci.c b/drivers/scsi/ahci.c index 7447ba0374e8..77e7202a0eba 100644 --- a/drivers/scsi/ahci.c +++ b/drivers/scsi/ahci.c @@ -1323,6 +1323,17 @@ static int ahci_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) if (!printed_version++) dev_printk(KERN_DEBUG, &pdev->dev, "version " DRV_VERSION "\n"); + /* JMicron-specific fixup: make sure we're in AHCI mode */ + /* This is protected from races with ata_jmicron by the pci probe + locking */ + if (pdev->vendor == PCI_VENDOR_ID_JMICRON) { + /* AHCI enable, AHCI on function 0 */ + pci_write_config_byte(pdev, 0x41, 0xa1); + /* Function 1 is the PATA controller */ + if (PCI_FUNC(pdev->devfn)) + return -ENODEV; + } + rc = pci_enable_device(pdev); if (rc) return rc; @@ -1378,10 +1389,6 @@ static int ahci_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) if (have_msi) hpriv->flags |= AHCI_FLAG_MSI; - /* JMicron-specific fixup: make sure we're in AHCI mode */ - if (pdev->vendor == 0x197b) - pci_write_config_byte(pdev, 0x41, 0xa1); - /* initialize adapter */ rc = ahci_host_init(probe_ent); if (rc) From 309bade002e9226781c2d7a015340d0089e399b5 Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Wed, 5 Jul 2006 23:02:48 -0400 Subject: [PATCH 34/40] [PCI] Add JMicron PCI ID constants They will be used in several IDE/libata files. Signed-off-by: Jeff Garzik --- include/linux/pci_ids.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 685081c01342..c09396d2c77b 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2019,6 +2019,13 @@ #define PCI_VENDOR_ID_TDI 0x192E #define PCI_DEVICE_ID_TDI_EHCI 0x0101 +#define PCI_VENDOR_ID_JMICRON 0x197B +#define PCI_DEVICE_ID_JMICRON_JMB360 0x2360 +#define PCI_DEVICE_ID_JMICRON_JMB361 0x2361 +#define PCI_DEVICE_ID_JMICRON_JMB363 0x2363 +#define PCI_DEVICE_ID_JMICRON_JMB365 0x2365 +#define PCI_DEVICE_ID_JMICRON_JMB366 0x2366 +#define PCI_DEVICE_ID_JMICRON_JMB368 0x2368 #define PCI_VENDOR_ID_TEKRAM 0x1de1 #define PCI_DEVICE_ID_TEKRAM_DC290 0xdc29 From 7233589d77fdb593b482a8b7ee867e901f54b593 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 5 Jul 2006 20:18:39 -0700 Subject: [PATCH 35/40] [SPARC64]: Fix sparc64 build errors when CONFIG_PCI=n. Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- arch/sparc64/kernel/prom.c | 2 ++ arch/sparc64/kernel/time.c | 5 ++++- include/asm-sparc64/dma-mapping.h | 14 ++++++++++++++ 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/arch/sparc64/kernel/prom.c b/arch/sparc64/kernel/prom.c index fa484d4f241e..99daeee4209d 100644 --- a/arch/sparc64/kernel/prom.c +++ b/arch/sparc64/kernel/prom.c @@ -1032,7 +1032,9 @@ static void sun4v_vdev_irq_trans_init(struct device_node *dp) static void irq_trans_init(struct device_node *dp) { const char *model; +#ifdef CONFIG_PCI int i; +#endif model = of_get_property(dp, "model", NULL); if (!model) diff --git a/arch/sparc64/kernel/time.c b/arch/sparc64/kernel/time.c index 8dcbfbffacc9..b43de647ba73 100644 --- a/arch/sparc64/kernel/time.c +++ b/arch/sparc64/kernel/time.c @@ -788,12 +788,15 @@ static int __devinit clock_probe(struct of_device *op, const struct of_device_id if (!regs) return -ENOMEM; +#ifdef CONFIG_PCI if (!strcmp(model, "ds1287") || !strcmp(model, "m5819") || !strcmp(model, "m5819p") || !strcmp(model, "m5823")) { ds1287_regs = (unsigned long) regs; - } else if (model[5] == '0' && model[6] == '2') { + } else +#endif + if (model[5] == '0' && model[6] == '2') { mstk48t02_regs = regs; } else if(model[5] == '0' && model[6] == '8') { mstk48t08_regs = regs; diff --git a/include/asm-sparc64/dma-mapping.h b/include/asm-sparc64/dma-mapping.h index 0f5b89c9323b..27c46fbeebd6 100644 --- a/include/asm-sparc64/dma-mapping.h +++ b/include/asm-sparc64/dma-mapping.h @@ -160,6 +160,20 @@ static inline void dma_free_coherent(struct device *dev, size_t size, BUG(); } +static inline void +dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size, + enum dma_data_direction direction) +{ + BUG(); +} + +static inline void +dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size, + enum dma_data_direction direction) +{ + BUG(); +} + #endif /* PCI */ From 37e64e5ae1a9554762b6ec494871adcf48be20cf Mon Sep 17 00:00:00 2001 From: Mikael Pettersson Date: Wed, 5 Jul 2006 20:42:58 -0700 Subject: [PATCH 36/40] [SPARC64]: Fix stack overflow checking in modular non-SMP kernels. The sparc64 kernel's EXPORT_SYMBOL(_mcount) is inside an #ifdef CONFIG_SMP. This breaks modules in non-SMP kernels built with stack overflow checking (CONFIG_STACK_DEBUG=y), as modules_install reports: WARNING: /lib/modules/2.6.17/kernel/drivers/ide/ide-cd.ko needs unknown symbol _mcount Trivially fixed by moving EXPORT_SYMBOL(_mcount) outside of the #ifdef CONFIG_SMP. Signed-off-by: Mikael Pettersson Signed-off-by: David S. Miller --- arch/sparc64/kernel/sparc64_ksyms.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c index 4173de425f09..237524d87cab 100644 --- a/arch/sparc64/kernel/sparc64_ksyms.c +++ b/arch/sparc64/kernel/sparc64_ksyms.c @@ -124,11 +124,6 @@ EXPORT_SYMBOL(__write_lock); EXPORT_SYMBOL(__write_unlock); EXPORT_SYMBOL(__write_trylock); -#if defined(CONFIG_MCOUNT) -extern void _mcount(void); -EXPORT_SYMBOL(_mcount); -#endif - /* CPU online map and active count. */ EXPORT_SYMBOL(cpu_online_map); EXPORT_SYMBOL(phys_cpu_present_map); @@ -136,6 +131,11 @@ EXPORT_SYMBOL(phys_cpu_present_map); EXPORT_SYMBOL(smp_call_function); #endif /* CONFIG_SMP */ +#if defined(CONFIG_MCOUNT) +extern void _mcount(void); +EXPORT_SYMBOL(_mcount); +#endif + EXPORT_SYMBOL(sparc64_get_clock_tick); /* semaphores */ From 26dab8930b408d5e5eb9ef496d68364dc955e249 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Wed, 5 Jul 2006 20:45:06 -0700 Subject: [PATCH 37/40] [PKT_SCHED]: Fix illegal memory dereferences when dumping actions The TCA_ACT_KIND attribute is used without checking its availability when dumping actions therefore leading to a value of 0x4 being dereferenced. The use of strcmp() in tc_lookup_action_n() isn't safe when fed with string from an attribute without enforcing proper NUL termination. Both bugs can be triggered with malformed netlink message and don't require any privileges. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/sched/act_api.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 5b9397b33238..f9d1d78e17f8 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -776,7 +776,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg) return ret; } -static char * +static struct rtattr * find_dump_kind(struct nlmsghdr *n) { struct rtattr *tb1, *tb2[TCA_ACT_MAX+1]; @@ -804,7 +804,7 @@ find_dump_kind(struct nlmsghdr *n) return NULL; kind = tb2[TCA_ACT_KIND-1]; - return (char *) RTA_DATA(kind); + return kind; } static int @@ -817,16 +817,15 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) struct tc_action a; int ret = 0; struct tcamsg *t = (struct tcamsg *) NLMSG_DATA(cb->nlh); - char *kind = find_dump_kind(cb->nlh); + struct rtattr *kind = find_dump_kind(cb->nlh); if (kind == NULL) { printk("tc_dump_action: action bad kind\n"); return 0; } - a_o = tc_lookup_action_n(kind); + a_o = tc_lookup_action(kind); if (a_o == NULL) { - printk("failed to find %s\n", kind); return 0; } @@ -834,7 +833,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) a.ops = a_o; if (a_o->walk == NULL) { - printk("tc_dump_action: %s !capable of dumping table\n", kind); + printk("tc_dump_action: %s !capable of dumping table\n", a_o->kind); goto rtattr_failure; } From d152b4e1e9a18f332ecd9e66492d706edc083345 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Wed, 5 Jul 2006 20:45:57 -0700 Subject: [PATCH 38/40] [PKT_SCHED]: Return ENOENT if action module is unavailable Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/sched/act_api.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index f9d1d78e17f8..9b2e3975be0b 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -305,6 +305,7 @@ struct tc_action *tcf_action_init_1(struct rtattr *rta, struct rtattr *est, goto err_mod; } #endif + *err = -ENOENT; goto err_out; } From 4fe683f50d3fc8e36d4749277631dfc711393aa0 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Wed, 5 Jul 2006 20:47:28 -0700 Subject: [PATCH 39/40] [PKT_SCHED]: Fix error handling while dumping actions "return -err" and blindly inheriting the error code in the netlink failure exception handler causes errors codes to be returned as positive value therefore making them being ignored by the caller. May lead to sending out incomplete netlink messages. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/sched/act_api.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 9b2e3975be0b..599423cc9d0d 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -250,15 +250,17 @@ tcf_action_dump(struct sk_buff *skb, struct tc_action *act, int bind, int ref) RTA_PUT(skb, a->order, 0, NULL); err = tcf_action_dump_1(skb, a, bind, ref); if (err < 0) - goto rtattr_failure; + goto errout; r->rta_len = skb->tail - (u8*)r; } return 0; rtattr_failure: + err = -EINVAL; +errout: skb_trim(skb, b - skb->data); - return -err; + return err; } struct tc_action *tcf_action_init_1(struct rtattr *rta, struct rtattr *est, From 120bda20c6f64b32e8bfbdd7b34feafaa5f5332e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 5 Jul 2006 21:09:49 -0700 Subject: [PATCH 40/40] Linux 2.6.18-rc1 It's all good. --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 82f76a96cfe6..7c010f3325a9 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 -SUBLEVEL = 17 -EXTRAVERSION = +SUBLEVEL = 18 +EXTRAVERSION = -rc1 NAME=Crazed Snow-Weasel # *DOCUMENTATION*