From 0492b05e8843d26d3075940590642c72a6aa781d Mon Sep 17 00:00:00 2001 From: Corrado Zoccolo Date: Mon, 26 Oct 2009 22:44:04 +0100 Subject: [PATCH] --- yaml --- r: 172923 b: refs/heads/master c: 5db5d64277bf390056b1a87d0bb288c8b8553f96 h: refs/heads/master i: 172921: b38f037d49d93616523ab7ab41c01e22f0194c77 172919: a98259fc22eb95eebafb5022fc0ecfd28e58c3a6 v: v3 --- [refs] | 2 +- trunk/block/cfq-iosched.c | 61 +++++++++++++++++++++++++--- trunk/drivers/block/drbd/drbd_req.c | 4 +- trunk/fs/aio.c | 62 +---------------------------- trunk/fs/block_dev.c | 12 +----- trunk/fs/direct-io.c | 10 ++--- trunk/include/linux/backing-dev.h | 13 ------ trunk/include/linux/bio.h | 8 +++- trunk/include/linux/blkdev.h | 13 ++++++ trunk/include/linux/fs.h | 2 + 10 files changed, 88 insertions(+), 99 deletions(-) diff --git a/[refs] b/[refs] index 4d9cb98dd0a8..20cbfec0e81d 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 4f570f995f68ef77aae7e5a441222f59232f2d0e +refs/heads/master: 5db5d64277bf390056b1a87d0bb288c8b8553f96 diff --git a/trunk/block/cfq-iosched.c b/trunk/block/cfq-iosched.c index 757010d8fb7a..97d946585bc3 100644 --- a/trunk/block/cfq-iosched.c +++ b/trunk/block/cfq-iosched.c @@ -27,6 +27,8 @@ static const int cfq_slice_sync = HZ / 10; static int cfq_slice_async = HZ / 25; static const int cfq_slice_async_rq = 2; static int cfq_slice_idle = HZ / 125; +static const int cfq_target_latency = HZ * 3/10; /* 300 ms */ +static const int cfq_hist_divisor = 4; /* * offset from end of service tree @@ -148,6 +150,8 @@ struct cfq_data { struct rb_root prio_trees[CFQ_PRIO_LISTS]; unsigned int busy_queues; + unsigned int busy_rt_queues; + unsigned int busy_queues_avg[2]; int rq_in_driver[2]; int sync_flight; @@ -315,10 +319,52 @@ cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio); } +/* + * get averaged number of queues of RT/BE priority. + * average is updated, with a formula that gives more weight to higher numbers, + * to quickly follows sudden increases and decrease slowly + */ + +static inline unsigned +cfq_get_avg_queues(struct cfq_data *cfqd, bool rt) { + unsigned min_q, max_q; + unsigned mult = cfq_hist_divisor - 1; + unsigned round = cfq_hist_divisor / 2; + unsigned busy = cfqd->busy_rt_queues; + + if (!rt) + busy = cfqd->busy_queues - cfqd->busy_rt_queues; + + min_q = min(cfqd->busy_queues_avg[rt], busy); + max_q = max(cfqd->busy_queues_avg[rt], busy); + cfqd->busy_queues_avg[rt] = (mult * max_q + min_q + round) / + cfq_hist_divisor; + return cfqd->busy_queues_avg[rt]; +} + static inline void cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) { - cfqq->slice_end = cfq_prio_to_slice(cfqd, cfqq) + jiffies; + unsigned slice = cfq_prio_to_slice(cfqd, cfqq); + if (cfqd->cfq_latency) { + /* interested queues (we consider only the ones with the same + * priority class) */ + unsigned iq = cfq_get_avg_queues(cfqd, cfq_class_rt(cfqq)); + unsigned sync_slice = cfqd->cfq_slice[1]; + unsigned expect_latency = sync_slice * iq; + if (expect_latency > cfq_target_latency) { + unsigned base_low_slice = 2 * cfqd->cfq_slice_idle; + /* scale low_slice according to IO priority + * and sync vs async */ + unsigned low_slice = + min(slice, base_low_slice * slice / sync_slice); + /* the adapted slice value is scaled to fit all iqs + * into the target latency */ + slice = max(slice * cfq_target_latency / expect_latency, + low_slice); + } + } + cfqq->slice_end = jiffies + slice; cfq_log_cfqq(cfqd, cfqq, "set_slice=%lu", cfqq->slice_end - jiffies); } @@ -669,7 +715,8 @@ static void cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) BUG_ON(cfq_cfqq_on_rr(cfqq)); cfq_mark_cfqq_on_rr(cfqq); cfqd->busy_queues++; - + if (cfq_class_rt(cfqq)) + cfqd->busy_rt_queues++; cfq_resort_rr_list(cfqd, cfqq); } @@ -692,6 +739,8 @@ static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) BUG_ON(!cfqd->busy_queues); cfqd->busy_queues--; + if (cfq_class_rt(cfqq)) + cfqd->busy_rt_queues--; } /* @@ -2359,10 +2408,12 @@ static void cfq_prio_boost(struct cfq_queue *cfqq) cfqq->ioprio = IOPRIO_NORM; } else { /* - * unboost the queue (if needed) + * check if we need to unboost the queue */ - cfqq->ioprio_class = cfqq->org_ioprio_class; - cfqq->ioprio = cfqq->org_ioprio; + if (cfqq->ioprio_class != cfqq->org_ioprio_class) + cfqq->ioprio_class = cfqq->org_ioprio_class; + if (cfqq->ioprio != cfqq->org_ioprio) + cfqq->ioprio = cfqq->org_ioprio; } } diff --git a/trunk/drivers/block/drbd/drbd_req.c b/trunk/drivers/block/drbd/drbd_req.c index 3678d3d66c6c..d3426ff405b3 100644 --- a/trunk/drivers/block/drbd/drbd_req.c +++ b/trunk/drivers/block/drbd/drbd_req.c @@ -40,7 +40,7 @@ static void _drbd_start_io_acct(struct drbd_conf *mdev, struct drbd_request *req part_stat_inc(cpu, &mdev->vdisk->part0, ios[rw]); part_stat_add(cpu, &mdev->vdisk->part0, sectors[rw], bio_sectors(bio)); part_stat_unlock(); - mdev->vdisk->part0.in_flight[rw]++; + mdev->vdisk->part0.in_flight++; } /* Update disk stats when completing request upwards */ @@ -53,7 +53,7 @@ static void _drbd_end_io_acct(struct drbd_conf *mdev, struct drbd_request *req) part_stat_add(cpu, &mdev->vdisk->part0, ticks[rw], duration); part_round_stats(cpu, &mdev->vdisk->part0); part_stat_unlock(); - mdev->vdisk->part0.in_flight[rw]--; + mdev->vdisk->part0.in_flight--; } static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const int rw) diff --git a/trunk/fs/aio.c b/trunk/fs/aio.c index c30dfc006108..02a2c9340573 100644 --- a/trunk/fs/aio.c +++ b/trunk/fs/aio.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #define DEBUG 0 @@ -33,9 +32,6 @@ #include #include #include -#include -#include -#include #include #include @@ -64,14 +60,6 @@ static DECLARE_WORK(fput_work, aio_fput_routine); static DEFINE_SPINLOCK(fput_lock); static LIST_HEAD(fput_head); -#define AIO_BATCH_HASH_BITS 3 /* allocated on-stack, so don't go crazy */ -#define AIO_BATCH_HASH_SIZE (1 << AIO_BATCH_HASH_BITS) -struct aio_batch_entry { - struct hlist_node list; - struct address_space *mapping; -}; -mempool_t *abe_pool; - static void aio_kick_handler(struct work_struct *); static void aio_queue_work(struct kioctx *); @@ -85,8 +73,6 @@ static int __init aio_setup(void) kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); aio_wq = create_workqueue("aio"); - abe_pool = mempool_create_kmalloc_pool(1, sizeof(struct aio_batch_entry)); - BUG_ON(!abe_pool); pr_debug("aio_setup: sizeof(struct page) = %d\n", (int)sizeof(struct page)); @@ -1545,44 +1531,8 @@ static int aio_wake_function(wait_queue_t *wait, unsigned mode, return 1; } -static void aio_batch_add(struct address_space *mapping, - struct hlist_head *batch_hash) -{ - struct aio_batch_entry *abe; - struct hlist_node *pos; - unsigned bucket; - - bucket = hash_ptr(mapping, AIO_BATCH_HASH_BITS); - hlist_for_each_entry(abe, pos, &batch_hash[bucket], list) { - if (abe->mapping == mapping) - return; - } - - abe = mempool_alloc(abe_pool, GFP_KERNEL); - BUG_ON(!igrab(mapping->host)); - abe->mapping = mapping; - hlist_add_head(&abe->list, &batch_hash[bucket]); - return; -} - -static void aio_batch_free(struct hlist_head *batch_hash) -{ - struct aio_batch_entry *abe; - struct hlist_node *pos, *n; - int i; - - for (i = 0; i < AIO_BATCH_HASH_SIZE; i++) { - hlist_for_each_entry_safe(abe, pos, n, &batch_hash[i], list) { - blk_run_address_space(abe->mapping); - iput(abe->mapping->host); - hlist_del(&abe->list); - mempool_free(abe, abe_pool); - } - } -} - static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, - struct iocb *iocb, struct hlist_head *batch_hash) + struct iocb *iocb) { struct kiocb *req; struct file *file; @@ -1658,12 +1608,6 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, ; } spin_unlock_irq(&ctx->ctx_lock); - if (req->ki_opcode == IOCB_CMD_PREAD || - req->ki_opcode == IOCB_CMD_PREADV || - req->ki_opcode == IOCB_CMD_PWRITE || - req->ki_opcode == IOCB_CMD_PWRITEV) - aio_batch_add(file->f_mapping, batch_hash); - aio_put_req(req); /* drop extra ref to req */ return 0; @@ -1691,7 +1635,6 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr, struct kioctx *ctx; long ret = 0; int i; - struct hlist_head batch_hash[AIO_BATCH_HASH_SIZE] = { { 0, }, }; if (unlikely(nr < 0)) return -EINVAL; @@ -1723,11 +1666,10 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr, break; } - ret = io_submit_one(ctx, user_iocb, &tmp, batch_hash); + ret = io_submit_one(ctx, user_iocb, &tmp); if (ret) break; } - aio_batch_free(batch_hash); put_ioctx(ctx); return i ? i : ret; diff --git a/trunk/fs/block_dev.c b/trunk/fs/block_dev.c index dde91e7e1c3a..9cf4b926f8e4 100644 --- a/trunk/fs/block_dev.c +++ b/trunk/fs/block_dev.c @@ -405,17 +405,7 @@ static loff_t block_llseek(struct file *file, loff_t offset, int origin) static int block_fsync(struct file *filp, struct dentry *dentry, int datasync) { - struct block_device *bdev = I_BDEV(filp->f_mapping->host); - int error; - - error = sync_blockdev(bdev); - if (error) - return error; - - error = blkdev_issue_flush(bdev, NULL); - if (error == -EOPNOTSUPP) - error = 0; - return error; + return sync_blockdev(I_BDEV(filp->f_mapping->host)); } /* diff --git a/trunk/fs/direct-io.c b/trunk/fs/direct-io.c index 3af761c8c5cc..8b10b87dc01a 100644 --- a/trunk/fs/direct-io.c +++ b/trunk/fs/direct-io.c @@ -1028,6 +1028,9 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, if (dio->bio) dio_bio_submit(dio); + /* All IO is now issued, send it on its way */ + blk_run_address_space(inode->i_mapping); + /* * It is possible that, we return short IO due to end of file. * In that case, we need to release all the pages we got hold on. @@ -1054,11 +1057,8 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, ((rw & READ) || (dio->result == dio->size))) ret = -EIOCBQUEUED; - if (ret != -EIOCBQUEUED) { - /* All IO is now issued, send it on its way */ - blk_run_address_space(inode->i_mapping); + if (ret != -EIOCBQUEUED) dio_await_completion(dio); - } /* * Sync will always be dropping the final ref and completing the @@ -1124,7 +1124,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, int acquire_i_mutex = 0; if (rw & WRITE) - rw = WRITE_SYNC_PLUG; + rw = WRITE_ODIRECT; if (bdev) bdev_blkbits = blksize_bits(bdev_logical_block_size(bdev)); diff --git a/trunk/include/linux/backing-dev.h b/trunk/include/linux/backing-dev.h index fcbc26af00e4..b449e738533a 100644 --- a/trunk/include/linux/backing-dev.h +++ b/trunk/include/linux/backing-dev.h @@ -331,17 +331,4 @@ static inline int bdi_sched_wait(void *word) return 0; } -static inline void blk_run_backing_dev(struct backing_dev_info *bdi, - struct page *page) -{ - if (bdi && bdi->unplug_io_fn) - bdi->unplug_io_fn(bdi, page); -} - -static inline void blk_run_address_space(struct address_space *mapping) -{ - if (mapping) - blk_run_backing_dev(mapping->backing_dev_info, NULL); -} - #endif /* _LINUX_BACKING_DEV_H */ diff --git a/trunk/include/linux/bio.h b/trunk/include/linux/bio.h index 474792b825d0..5be93f18d842 100644 --- a/trunk/include/linux/bio.h +++ b/trunk/include/linux/bio.h @@ -450,8 +450,11 @@ extern struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly; /* * remember never ever reenable interrupts between a bvec_kmap_irq and * bvec_kunmap_irq! + * + * This function MUST be inlined - it plays with the CPU interrupt flags. */ -static inline char *bvec_kmap_irq(struct bio_vec *bvec, unsigned long *flags) +static __always_inline char *bvec_kmap_irq(struct bio_vec *bvec, + unsigned long *flags) { unsigned long addr; @@ -467,7 +470,8 @@ static inline char *bvec_kmap_irq(struct bio_vec *bvec, unsigned long *flags) return (char *) addr + bvec->bv_offset; } -static inline void bvec_kunmap_irq(char *buffer, unsigned long *flags) +static __always_inline void bvec_kunmap_irq(char *buffer, + unsigned long *flags) { unsigned long ptr = (unsigned long) buffer & PAGE_MASK; diff --git a/trunk/include/linux/blkdev.h b/trunk/include/linux/blkdev.h index 39c601f783a0..221cecd86bd3 100644 --- a/trunk/include/linux/blkdev.h +++ b/trunk/include/linux/blkdev.h @@ -823,6 +823,19 @@ static inline struct request_queue *bdev_get_queue(struct block_device *bdev) return bdev->bd_disk->queue; } +static inline void blk_run_backing_dev(struct backing_dev_info *bdi, + struct page *page) +{ + if (bdi && bdi->unplug_io_fn) + bdi->unplug_io_fn(bdi, page); +} + +static inline void blk_run_address_space(struct address_space *mapping) +{ + if (mapping) + blk_run_backing_dev(mapping->backing_dev_info, NULL); +} + /* * blk_rq_pos() : the current sector * blk_rq_bytes() : bytes left in the entire request diff --git a/trunk/include/linux/fs.h b/trunk/include/linux/fs.h index 2f5fca4147c2..2620a8c63571 100644 --- a/trunk/include/linux/fs.h +++ b/trunk/include/linux/fs.h @@ -129,6 +129,7 @@ struct inodes_stat_t { * WRITE_SYNC Like WRITE_SYNC_PLUG, but also unplugs the device * immediately after submission. The write equivalent * of READ_SYNC. + * WRITE_ODIRECT Special case write for O_DIRECT only. * SWRITE_SYNC * SWRITE_SYNC_PLUG Like WRITE_SYNC/WRITE_SYNC_PLUG, but locks the buffer. * See SWRITE. @@ -150,6 +151,7 @@ struct inodes_stat_t { #define READ_META (READ | (1 << BIO_RW_META)) #define WRITE_SYNC_PLUG (WRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE)) #define WRITE_SYNC (WRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG)) +#define WRITE_ODIRECT (WRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG)) #define SWRITE_SYNC_PLUG \ (SWRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE)) #define SWRITE_SYNC (SWRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG))