diff --git a/[refs] b/[refs] index 7130a88c20cb..3df147df6d57 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 5869619cb5b26754574375472fe54a390edf34c7 +refs/heads/master: 150e6c67f4bf6ab51e62defc41bd19a2eefe5709 diff --git a/trunk/block/cfq-iosched.c b/trunk/block/cfq-iosched.c index aa00d8f2d0b0..8b5ba184cf91 100644 --- a/trunk/block/cfq-iosched.c +++ b/trunk/block/cfq-iosched.c @@ -2625,12 +2625,10 @@ static void cfq_prio_boost(struct cfq_queue *cfqq) cfqq->ioprio = IOPRIO_NORM; } else { /* - * check if we need to unboost the queue + * unboost the queue (if needed) */ - if (cfqq->ioprio_class != cfqq->org_ioprio_class) - cfqq->ioprio_class = cfqq->org_ioprio_class; - if (cfqq->ioprio != cfqq->org_ioprio) - cfqq->ioprio = cfqq->org_ioprio; + cfqq->ioprio_class = cfqq->org_ioprio_class; + cfqq->ioprio = cfqq->org_ioprio; } } diff --git a/trunk/drivers/block/drbd/drbd_req.c b/trunk/drivers/block/drbd/drbd_req.c index d3426ff405b3..3678d3d66c6c 100644 --- a/trunk/drivers/block/drbd/drbd_req.c +++ b/trunk/drivers/block/drbd/drbd_req.c @@ -40,7 +40,7 @@ static void _drbd_start_io_acct(struct drbd_conf *mdev, struct drbd_request *req part_stat_inc(cpu, &mdev->vdisk->part0, ios[rw]); part_stat_add(cpu, &mdev->vdisk->part0, sectors[rw], bio_sectors(bio)); part_stat_unlock(); - mdev->vdisk->part0.in_flight++; + mdev->vdisk->part0.in_flight[rw]++; } /* Update disk stats when completing request upwards */ @@ -53,7 +53,7 @@ static void _drbd_end_io_acct(struct drbd_conf *mdev, struct drbd_request *req) part_stat_add(cpu, &mdev->vdisk->part0, ticks[rw], duration); part_round_stats(cpu, &mdev->vdisk->part0); part_stat_unlock(); - mdev->vdisk->part0.in_flight--; + mdev->vdisk->part0.in_flight[rw]--; } static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const int rw) diff --git a/trunk/fs/aio.c b/trunk/fs/aio.c index 02a2c9340573..c30dfc006108 100644 --- a/trunk/fs/aio.c +++ b/trunk/fs/aio.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #define DEBUG 0 @@ -32,6 +33,9 @@ #include #include #include +#include +#include +#include #include #include @@ -60,6 +64,14 @@ static DECLARE_WORK(fput_work, aio_fput_routine); static DEFINE_SPINLOCK(fput_lock); static LIST_HEAD(fput_head); +#define AIO_BATCH_HASH_BITS 3 /* allocated on-stack, so don't go crazy */ +#define AIO_BATCH_HASH_SIZE (1 << AIO_BATCH_HASH_BITS) +struct aio_batch_entry { + struct hlist_node list; + struct address_space *mapping; +}; +mempool_t *abe_pool; + static void aio_kick_handler(struct work_struct *); static void aio_queue_work(struct kioctx *); @@ -73,6 +85,8 @@ static int __init aio_setup(void) kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); aio_wq = create_workqueue("aio"); + abe_pool = mempool_create_kmalloc_pool(1, sizeof(struct aio_batch_entry)); + BUG_ON(!abe_pool); pr_debug("aio_setup: sizeof(struct page) = %d\n", (int)sizeof(struct page)); @@ -1531,8 +1545,44 @@ static int aio_wake_function(wait_queue_t *wait, unsigned mode, return 1; } +static void aio_batch_add(struct address_space *mapping, + struct hlist_head *batch_hash) +{ + struct aio_batch_entry *abe; + struct hlist_node *pos; + unsigned bucket; + + bucket = hash_ptr(mapping, AIO_BATCH_HASH_BITS); + hlist_for_each_entry(abe, pos, &batch_hash[bucket], list) { + if (abe->mapping == mapping) + return; + } + + abe = mempool_alloc(abe_pool, GFP_KERNEL); + BUG_ON(!igrab(mapping->host)); + abe->mapping = mapping; + hlist_add_head(&abe->list, &batch_hash[bucket]); + return; +} + +static void aio_batch_free(struct hlist_head *batch_hash) +{ + struct aio_batch_entry *abe; + struct hlist_node *pos, *n; + int i; + + for (i = 0; i < AIO_BATCH_HASH_SIZE; i++) { + hlist_for_each_entry_safe(abe, pos, n, &batch_hash[i], list) { + blk_run_address_space(abe->mapping); + iput(abe->mapping->host); + hlist_del(&abe->list); + mempool_free(abe, abe_pool); + } + } +} + static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, - struct iocb *iocb) + struct iocb *iocb, struct hlist_head *batch_hash) { struct kiocb *req; struct file *file; @@ -1608,6 +1658,12 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, ; } spin_unlock_irq(&ctx->ctx_lock); + if (req->ki_opcode == IOCB_CMD_PREAD || + req->ki_opcode == IOCB_CMD_PREADV || + req->ki_opcode == IOCB_CMD_PWRITE || + req->ki_opcode == IOCB_CMD_PWRITEV) + aio_batch_add(file->f_mapping, batch_hash); + aio_put_req(req); /* drop extra ref to req */ return 0; @@ -1635,6 +1691,7 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr, struct kioctx *ctx; long ret = 0; int i; + struct hlist_head batch_hash[AIO_BATCH_HASH_SIZE] = { { 0, }, }; if (unlikely(nr < 0)) return -EINVAL; @@ -1666,10 +1723,11 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr, break; } - ret = io_submit_one(ctx, user_iocb, &tmp); + ret = io_submit_one(ctx, user_iocb, &tmp, batch_hash); if (ret) break; } + aio_batch_free(batch_hash); put_ioctx(ctx); return i ? i : ret; diff --git a/trunk/fs/block_dev.c b/trunk/fs/block_dev.c index 9cf4b926f8e4..dde91e7e1c3a 100644 --- a/trunk/fs/block_dev.c +++ b/trunk/fs/block_dev.c @@ -405,7 +405,17 @@ static loff_t block_llseek(struct file *file, loff_t offset, int origin) static int block_fsync(struct file *filp, struct dentry *dentry, int datasync) { - return sync_blockdev(I_BDEV(filp->f_mapping->host)); + struct block_device *bdev = I_BDEV(filp->f_mapping->host); + int error; + + error = sync_blockdev(bdev); + if (error) + return error; + + error = blkdev_issue_flush(bdev, NULL); + if (error == -EOPNOTSUPP) + error = 0; + return error; } /* diff --git a/trunk/fs/direct-io.c b/trunk/fs/direct-io.c index 8b10b87dc01a..3af761c8c5cc 100644 --- a/trunk/fs/direct-io.c +++ b/trunk/fs/direct-io.c @@ -1028,9 +1028,6 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, if (dio->bio) dio_bio_submit(dio); - /* All IO is now issued, send it on its way */ - blk_run_address_space(inode->i_mapping); - /* * It is possible that, we return short IO due to end of file. * In that case, we need to release all the pages we got hold on. @@ -1057,8 +1054,11 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, ((rw & READ) || (dio->result == dio->size))) ret = -EIOCBQUEUED; - if (ret != -EIOCBQUEUED) + if (ret != -EIOCBQUEUED) { + /* All IO is now issued, send it on its way */ + blk_run_address_space(inode->i_mapping); dio_await_completion(dio); + } /* * Sync will always be dropping the final ref and completing the @@ -1124,7 +1124,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, int acquire_i_mutex = 0; if (rw & WRITE) - rw = WRITE_ODIRECT; + rw = WRITE_SYNC_PLUG; if (bdev) bdev_blkbits = blksize_bits(bdev_logical_block_size(bdev)); diff --git a/trunk/include/linux/backing-dev.h b/trunk/include/linux/backing-dev.h index b449e738533a..fcbc26af00e4 100644 --- a/trunk/include/linux/backing-dev.h +++ b/trunk/include/linux/backing-dev.h @@ -331,4 +331,17 @@ static inline int bdi_sched_wait(void *word) return 0; } +static inline void blk_run_backing_dev(struct backing_dev_info *bdi, + struct page *page) +{ + if (bdi && bdi->unplug_io_fn) + bdi->unplug_io_fn(bdi, page); +} + +static inline void blk_run_address_space(struct address_space *mapping) +{ + if (mapping) + blk_run_backing_dev(mapping->backing_dev_info, NULL); +} + #endif /* _LINUX_BACKING_DEV_H */ diff --git a/trunk/include/linux/bio.h b/trunk/include/linux/bio.h index 5be93f18d842..474792b825d0 100644 --- a/trunk/include/linux/bio.h +++ b/trunk/include/linux/bio.h @@ -450,11 +450,8 @@ extern struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly; /* * remember never ever reenable interrupts between a bvec_kmap_irq and * bvec_kunmap_irq! - * - * This function MUST be inlined - it plays with the CPU interrupt flags. */ -static __always_inline char *bvec_kmap_irq(struct bio_vec *bvec, - unsigned long *flags) +static inline char *bvec_kmap_irq(struct bio_vec *bvec, unsigned long *flags) { unsigned long addr; @@ -470,8 +467,7 @@ static __always_inline char *bvec_kmap_irq(struct bio_vec *bvec, return (char *) addr + bvec->bv_offset; } -static __always_inline void bvec_kunmap_irq(char *buffer, - unsigned long *flags) +static inline void bvec_kunmap_irq(char *buffer, unsigned long *flags) { unsigned long ptr = (unsigned long) buffer & PAGE_MASK; diff --git a/trunk/include/linux/blkdev.h b/trunk/include/linux/blkdev.h index 221cecd86bd3..39c601f783a0 100644 --- a/trunk/include/linux/blkdev.h +++ b/trunk/include/linux/blkdev.h @@ -823,19 +823,6 @@ static inline struct request_queue *bdev_get_queue(struct block_device *bdev) return bdev->bd_disk->queue; } -static inline void blk_run_backing_dev(struct backing_dev_info *bdi, - struct page *page) -{ - if (bdi && bdi->unplug_io_fn) - bdi->unplug_io_fn(bdi, page); -} - -static inline void blk_run_address_space(struct address_space *mapping) -{ - if (mapping) - blk_run_backing_dev(mapping->backing_dev_info, NULL); -} - /* * blk_rq_pos() : the current sector * blk_rq_bytes() : bytes left in the entire request diff --git a/trunk/include/linux/fs.h b/trunk/include/linux/fs.h index 2620a8c63571..2f5fca4147c2 100644 --- a/trunk/include/linux/fs.h +++ b/trunk/include/linux/fs.h @@ -129,7 +129,6 @@ struct inodes_stat_t { * WRITE_SYNC Like WRITE_SYNC_PLUG, but also unplugs the device * immediately after submission. The write equivalent * of READ_SYNC. - * WRITE_ODIRECT Special case write for O_DIRECT only. * SWRITE_SYNC * SWRITE_SYNC_PLUG Like WRITE_SYNC/WRITE_SYNC_PLUG, but locks the buffer. * See SWRITE. @@ -151,7 +150,6 @@ struct inodes_stat_t { #define READ_META (READ | (1 << BIO_RW_META)) #define WRITE_SYNC_PLUG (WRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE)) #define WRITE_SYNC (WRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG)) -#define WRITE_ODIRECT (WRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG)) #define SWRITE_SYNC_PLUG \ (SWRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE)) #define SWRITE_SYNC (SWRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG))