diff --git a/[refs] b/[refs] index a89cdad6c8d1..773437c1ad6e 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 9562ad9ab36df7ccef920d119f3b5100025db95f +refs/heads/master: 456be1484ffc72a24bdb4200b5847c4fa90139d9 diff --git a/trunk/Makefile b/trunk/Makefile index 2652089bf541..31f967c31e7f 100644 --- a/trunk/Makefile +++ b/trunk/Makefile @@ -1,7 +1,7 @@ VERSION = 3 PATCHLEVEL = 1 SUBLEVEL = 0 -EXTRAVERSION = -rc10 +EXTRAVERSION = -rc9 NAME = "Divemaster Edition" # *DOCUMENTATION* diff --git a/trunk/arch/m68k/emu/nfblock.c b/trunk/arch/m68k/emu/nfblock.c index e3011338ab40..48e50f8c1c7e 100644 --- a/trunk/arch/m68k/emu/nfblock.c +++ b/trunk/arch/m68k/emu/nfblock.c @@ -59,7 +59,7 @@ struct nfhd_device { struct gendisk *disk; }; -static void nfhd_make_request(struct request_queue *queue, struct bio *bio) +static int nfhd_make_request(struct request_queue *queue, struct bio *bio) { struct nfhd_device *dev = queue->queuedata; struct bio_vec *bvec; @@ -76,6 +76,7 @@ static void nfhd_make_request(struct request_queue *queue, struct bio *bio) sec += len; } bio_endio(bio, 0); + return 0; } static int nfhd_getgeo(struct block_device *bdev, struct hd_geometry *geo) diff --git a/trunk/arch/powerpc/sysdev/axonram.c b/trunk/arch/powerpc/sysdev/axonram.c index ba4271919062..265f0f09395a 100644 --- a/trunk/arch/powerpc/sysdev/axonram.c +++ b/trunk/arch/powerpc/sysdev/axonram.c @@ -104,7 +104,7 @@ axon_ram_irq_handler(int irq, void *dev) * axon_ram_make_request - make_request() method for block device * @queue, @bio: see blk_queue_make_request() */ -static void +static int axon_ram_make_request(struct request_queue *queue, struct bio *bio) { struct axon_ram_bank *bank = bio->bi_bdev->bd_disk->private_data; @@ -113,6 +113,7 @@ axon_ram_make_request(struct request_queue *queue, struct bio *bio) struct bio_vec *vec; unsigned int transfered; unsigned short idx; + int rc = 0; phys_mem = bank->io_addr + (bio->bi_sector << AXON_RAM_SECTOR_SHIFT); phys_end = bank->io_addr + bank->size; @@ -120,7 +121,8 @@ axon_ram_make_request(struct request_queue *queue, struct bio *bio) bio_for_each_segment(vec, bio, idx) { if (unlikely(phys_mem + vec->bv_len > phys_end)) { bio_io_error(bio); - return; + rc = -ERANGE; + break; } user_mem = page_address(vec->bv_page) + vec->bv_offset; @@ -133,6 +135,8 @@ axon_ram_make_request(struct request_queue *queue, struct bio *bio) transfered += vec->bv_len; } bio_endio(bio, 0); + + return rc; } /** diff --git a/trunk/block/blk-cgroup.c b/trunk/block/blk-cgroup.c index d61ec5636ce0..b596e54ddd71 100644 --- a/trunk/block/blk-cgroup.c +++ b/trunk/block/blk-cgroup.c @@ -768,14 +768,25 @@ static uint64_t blkio_get_stat(struct blkio_group *blkg, return disk_total; } +static int blkio_check_dev_num(dev_t dev) +{ + int part = 0; + struct gendisk *disk; + + disk = get_gendisk(dev, &part); + if (!disk || part) + return -ENODEV; + + return 0; +} + static int blkio_policy_parse_and_set(char *buf, struct blkio_policy_node *newpn, enum blkio_policy_id plid, int fileid) { - struct gendisk *disk = NULL; char *s[4], *p, *major_s = NULL, *minor_s = NULL; + int ret; unsigned long major, minor; - int i = 0, ret = -EINVAL; - int part; + int i = 0; dev_t dev; u64 temp; @@ -793,36 +804,37 @@ static int blkio_policy_parse_and_set(char *buf, } if (i != 2) - goto out; + return -EINVAL; p = strsep(&s[0], ":"); if (p != NULL) major_s = p; else - goto out; + return -EINVAL; minor_s = s[0]; if (!minor_s) - goto out; + return -EINVAL; - if (strict_strtoul(major_s, 10, &major)) - goto out; + ret = strict_strtoul(major_s, 10, &major); + if (ret) + return -EINVAL; - if (strict_strtoul(minor_s, 10, &minor)) - goto out; + ret = strict_strtoul(minor_s, 10, &minor); + if (ret) + return -EINVAL; dev = MKDEV(major, minor); - if (strict_strtoull(s[1], 10, &temp)) - goto out; + ret = strict_strtoull(s[1], 10, &temp); + if (ret) + return -EINVAL; /* For rule removal, do not check for device presence. */ if (temp) { - disk = get_gendisk(dev, &part); - if (!disk || part) { - ret = -ENODEV; - goto out; - } + ret = blkio_check_dev_num(dev); + if (ret) + return ret; } newpn->dev = dev; @@ -831,7 +843,7 @@ static int blkio_policy_parse_and_set(char *buf, case BLKIO_POLICY_PROP: if ((temp < BLKIO_WEIGHT_MIN && temp > 0) || temp > BLKIO_WEIGHT_MAX) - goto out; + return -EINVAL; newpn->plid = plid; newpn->fileid = fileid; @@ -848,7 +860,7 @@ static int blkio_policy_parse_and_set(char *buf, case BLKIO_THROTL_read_iops_device: case BLKIO_THROTL_write_iops_device: if (temp > THROTL_IOPS_MAX) - goto out; + return -EINVAL; newpn->plid = plid; newpn->fileid = fileid; @@ -859,10 +871,8 @@ static int blkio_policy_parse_and_set(char *buf, default: BUG(); } - ret = 0; -out: - put_disk(disk); - return ret; + + return 0; } unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg, diff --git a/trunk/block/blk-cgroup.h b/trunk/block/blk-cgroup.h index 6f3ace7e792f..a71d2904ffb9 100644 --- a/trunk/block/blk-cgroup.h +++ b/trunk/block/blk-cgroup.h @@ -188,7 +188,7 @@ struct blkio_policy_node { union { unsigned int weight; /* - * Rate read/write in terms of bytes per second + * Rate read/write in terms of byptes per second * Whether this rate represents read or write is determined * by file type "fileid". */ diff --git a/trunk/block/blk-core.c b/trunk/block/blk-core.c index da697936d220..d34433ae7917 100644 --- a/trunk/block/blk-core.c +++ b/trunk/block/blk-core.c @@ -28,7 +28,6 @@ #include #include #include -#include #define CREATE_TRACE_POINTS #include @@ -39,6 +38,8 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap); EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); +static int __make_request(struct request_queue *q, struct bio *bio); + /* * For the allocated request tables */ @@ -346,75 +347,30 @@ void blk_put_queue(struct request_queue *q) } EXPORT_SYMBOL(blk_put_queue); -/** - * blk_drain_queue - drain requests from request_queue - * @q: queue to drain - * @drain_all: whether to drain all requests or only the ones w/ ELVPRIV - * - * Drain requests from @q. If @drain_all is set, all requests are drained. - * If not, only ELVPRIV requests are drained. The caller is responsible - * for ensuring that no new requests which need to be drained are queued. - */ -void blk_drain_queue(struct request_queue *q, bool drain_all) -{ - while (true) { - int nr_rqs; - - spin_lock_irq(q->queue_lock); - - elv_drain_elevator(q); - if (drain_all) - blk_throtl_drain(q); - - __blk_run_queue(q); - - if (drain_all) - nr_rqs = q->rq.count[0] + q->rq.count[1]; - else - nr_rqs = q->rq.elvpriv; - - spin_unlock_irq(q->queue_lock); - - if (!nr_rqs) - break; - msleep(10); - } -} - -/** - * blk_cleanup_queue - shutdown a request queue - * @q: request queue to shutdown - * - * Mark @q DEAD, drain all pending requests, destroy and put it. All - * future requests will be failed immediately with -ENODEV. +/* + * Note: If a driver supplied the queue lock, it is disconnected + * by this function. The actual state of the lock doesn't matter + * here as the request_queue isn't accessible after this point + * (QUEUE_FLAG_DEAD is set) and no other requests will be queued. */ void blk_cleanup_queue(struct request_queue *q) { - spinlock_t *lock = q->queue_lock; + /* + * We know we have process context here, so we can be a little + * cautious and ensure that pending block actions on this device + * are done before moving on. Going into this function, we should + * not have processes doing IO to this device. + */ + blk_sync_queue(q); - /* mark @q DEAD, no new request or merges will be allowed afterwards */ + del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer); mutex_lock(&q->sysfs_lock); queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q); - - spin_lock_irq(lock); - queue_flag_set(QUEUE_FLAG_NOMERGES, q); - queue_flag_set(QUEUE_FLAG_NOXMERGES, q); - queue_flag_set(QUEUE_FLAG_DEAD, q); + mutex_unlock(&q->sysfs_lock); if (q->queue_lock != &q->__queue_lock) q->queue_lock = &q->__queue_lock; - spin_unlock_irq(lock); - mutex_unlock(&q->sysfs_lock); - - /* drain all requests queued before DEAD marking */ - blk_drain_queue(q, true); - - /* @q won't process any more request, flush async actions */ - del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer); - blk_sync_queue(q); - - /* @q is and will stay empty, shutdown and put */ blk_put_queue(q); } EXPORT_SYMBOL(blk_cleanup_queue); @@ -585,7 +541,7 @@ blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn, /* * This also sets hw/phys segments, boundary and size */ - blk_queue_make_request(q, blk_queue_bio); + blk_queue_make_request(q, __make_request); q->sg_reserved_size = INT_MAX; @@ -620,7 +576,7 @@ static inline void blk_free_request(struct request_queue *q, struct request *rq) } static struct request * -blk_alloc_request(struct request_queue *q, unsigned int flags, gfp_t gfp_mask) +blk_alloc_request(struct request_queue *q, int flags, int priv, gfp_t gfp_mask) { struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); @@ -631,10 +587,12 @@ blk_alloc_request(struct request_queue *q, unsigned int flags, gfp_t gfp_mask) rq->cmd_flags = flags | REQ_ALLOCED; - if ((flags & REQ_ELVPRIV) && - unlikely(elv_set_request(q, rq, gfp_mask))) { - mempool_free(rq, q->rq.rq_pool); - return NULL; + if (priv) { + if (unlikely(elv_set_request(q, rq, gfp_mask))) { + mempool_free(rq, q->rq.rq_pool); + return NULL; + } + rq->cmd_flags |= REQ_ELVPRIV; } return rq; @@ -693,13 +651,12 @@ static void __freed_request(struct request_queue *q, int sync) * A request has just been released. Account for it, update the full and * congestion status, wake up any waiters. Called under q->queue_lock. */ -static void freed_request(struct request_queue *q, unsigned int flags) +static void freed_request(struct request_queue *q, int sync, int priv) { struct request_list *rl = &q->rq; - int sync = rw_is_sync(flags); rl->count[sync]--; - if (flags & REQ_ELVPRIV) + if (priv) rl->elvpriv--; __freed_request(q, sync); @@ -727,19 +684,10 @@ static bool blk_rq_should_init_elevator(struct bio *bio) return true; } -/** - * get_request - get a free request - * @q: request_queue to allocate request from - * @rw_flags: RW and SYNC flags - * @bio: bio to allocate request for (can be %NULL) - * @gfp_mask: allocation mask - * - * Get a free request from @q. This function may fail under memory - * pressure or if @q is dead. - * - * Must be callled with @q->queue_lock held and, - * Returns %NULL on failure, with @q->queue_lock held. - * Returns !%NULL on success, with @q->queue_lock *not held*. +/* + * Get a free request, queue_lock must be held. + * Returns NULL on failure, with queue_lock held. + * Returns !NULL on success, with queue_lock *not held*. */ static struct request *get_request(struct request_queue *q, int rw_flags, struct bio *bio, gfp_t gfp_mask) @@ -748,10 +696,7 @@ static struct request *get_request(struct request_queue *q, int rw_flags, struct request_list *rl = &q->rq; struct io_context *ioc = NULL; const bool is_sync = rw_is_sync(rw_flags) != 0; - int may_queue; - - if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) - return NULL; + int may_queue, priv = 0; may_queue = elv_may_queue(q, rw_flags); if (may_queue == ELV_MQUEUE_NO) @@ -795,17 +740,17 @@ static struct request *get_request(struct request_queue *q, int rw_flags, rl->count[is_sync]++; rl->starved[is_sync] = 0; - if (blk_rq_should_init_elevator(bio) && - !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags)) { - rw_flags |= REQ_ELVPRIV; - rl->elvpriv++; + if (blk_rq_should_init_elevator(bio)) { + priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); + if (priv) + rl->elvpriv++; } if (blk_queue_io_stat(q)) rw_flags |= REQ_IO_STAT; spin_unlock_irq(q->queue_lock); - rq = blk_alloc_request(q, rw_flags, gfp_mask); + rq = blk_alloc_request(q, rw_flags, priv, gfp_mask); if (unlikely(!rq)) { /* * Allocation failed presumably due to memory. Undo anything @@ -815,7 +760,7 @@ static struct request *get_request(struct request_queue *q, int rw_flags, * wait queue, but this is pretty rare. */ spin_lock_irq(q->queue_lock); - freed_request(q, rw_flags); + freed_request(q, is_sync, priv); /* * in the very unlikely event that allocation failed and no @@ -845,18 +790,11 @@ static struct request *get_request(struct request_queue *q, int rw_flags, return rq; } -/** - * get_request_wait - get a free request with retry - * @q: request_queue to allocate request from - * @rw_flags: RW and SYNC flags - * @bio: bio to allocate request for (can be %NULL) - * - * Get a free request from @q. This function keeps retrying under memory - * pressure and fails iff @q is dead. +/* + * No available requests for this queue, wait for some requests to become + * available. * - * Must be callled with @q->queue_lock held and, - * Returns %NULL on failure, with @q->queue_lock held. - * Returns !%NULL on success, with @q->queue_lock *not held*. + * Called with q->queue_lock held, and returns with it unlocked. */ static struct request *get_request_wait(struct request_queue *q, int rw_flags, struct bio *bio) @@ -870,9 +808,6 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags, struct io_context *ioc; struct request_list *rl = &q->rq; - if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) - return NULL; - prepare_to_wait_exclusive(&rl->wait[is_sync], &wait, TASK_UNINTERRUPTIBLE); @@ -903,15 +838,19 @@ struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) { struct request *rq; + if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) + return NULL; + BUG_ON(rw != READ && rw != WRITE); spin_lock_irq(q->queue_lock); - if (gfp_mask & __GFP_WAIT) + if (gfp_mask & __GFP_WAIT) { rq = get_request_wait(q, rw, NULL); - else + } else { rq = get_request(q, rw, NULL, gfp_mask); - if (!rq) - spin_unlock_irq(q->queue_lock); + if (!rq) + spin_unlock_irq(q->queue_lock); + } /* q->queue_lock is unlocked at this point */ return rq; @@ -1113,13 +1052,14 @@ void __blk_put_request(struct request_queue *q, struct request *req) * it didn't come out of our reserved rq pools */ if (req->cmd_flags & REQ_ALLOCED) { - unsigned int flags = req->cmd_flags; + int is_sync = rq_is_sync(req) != 0; + int priv = req->cmd_flags & REQ_ELVPRIV; BUG_ON(!list_empty(&req->queuelist)); BUG_ON(!hlist_unhashed(&req->hash)); blk_free_request(q, req); - freed_request(q, flags); + freed_request(q, is_sync, priv); } } EXPORT_SYMBOL_GPL(__blk_put_request); @@ -1221,32 +1161,18 @@ static bool bio_attempt_front_merge(struct request_queue *q, return true; } -/** - * attempt_plug_merge - try to merge with %current's plugged list - * @q: request_queue new bio is being queued at - * @bio: new bio being queued - * @request_count: out parameter for number of traversed plugged requests - * - * Determine whether @bio being queued on @q can be merged with a request - * on %current's plugged list. Returns %true if merge was successful, - * otherwise %false. - * - * This function is called without @q->queue_lock; however, elevator is - * accessed iff there already are requests on the plugged list which in - * turn guarantees validity of the elevator. - * - * Note that, on successful merge, elevator operation - * elevator_bio_merged_fn() will be called without queue lock. Elevator - * must be ready for this. +/* + * Attempts to merge with the plugged list in the current process. Returns + * true if merge was successful, otherwise false. */ -static bool attempt_plug_merge(struct request_queue *q, struct bio *bio, - unsigned int *request_count) +static bool attempt_plug_merge(struct task_struct *tsk, struct request_queue *q, + struct bio *bio, unsigned int *request_count) { struct blk_plug *plug; struct request *rq; bool ret = false; - plug = current->plug; + plug = tsk->plug; if (!plug) goto out; *request_count = 0; @@ -1276,6 +1202,7 @@ static bool attempt_plug_merge(struct request_queue *q, struct bio *bio, void init_request_from_bio(struct request *req, struct bio *bio) { + req->cpu = bio->bi_comp_cpu; req->cmd_type = REQ_TYPE_FS; req->cmd_flags |= bio->bi_rw & REQ_COMMON_MASK; @@ -1288,7 +1215,7 @@ void init_request_from_bio(struct request *req, struct bio *bio) blk_rq_bio_prep(req->q, req, bio); } -void blk_queue_bio(struct request_queue *q, struct bio *bio) +static int __make_request(struct request_queue *q, struct bio *bio) { const bool sync = !!(bio->bi_rw & REQ_SYNC); struct blk_plug *plug; @@ -1313,8 +1240,8 @@ void blk_queue_bio(struct request_queue *q, struct bio *bio) * Check if we can merge with the plugged list before grabbing * any locks. */ - if (attempt_plug_merge(q, bio, &request_count)) - return; + if (attempt_plug_merge(current, q, bio, &request_count)) + goto out; spin_lock_irq(q->queue_lock); @@ -1348,10 +1275,6 @@ void blk_queue_bio(struct request_queue *q, struct bio *bio) * Returns with the queue unlocked. */ req = get_request_wait(q, rw_flags, bio); - if (unlikely(!req)) { - bio_endio(bio, -ENODEV); /* @q is dead */ - goto out_unlock; - } /* * After dropping the lock and possibly sleeping here, our request @@ -1361,7 +1284,8 @@ void blk_queue_bio(struct request_queue *q, struct bio *bio) */ init_request_from_bio(req, bio); - if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags)) + if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) || + bio_flagged(bio, BIO_CPU_AFFINE)) req->cpu = raw_smp_processor_id(); plug = current->plug; @@ -1392,8 +1316,9 @@ void blk_queue_bio(struct request_queue *q, struct bio *bio) out_unlock: spin_unlock_irq(q->queue_lock); } +out: + return 0; } -EXPORT_SYMBOL_GPL(blk_queue_bio); /* for device mapper only */ /* * If bio->bi_dev is a partition, remap the location @@ -1492,135 +1417,165 @@ static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors) return 0; } -static noinline_for_stack bool -generic_make_request_checks(struct bio *bio) +/** + * generic_make_request - hand a buffer to its device driver for I/O + * @bio: The bio describing the location in memory and on the device. + * + * generic_make_request() is used to make I/O requests of block + * devices. It is passed a &struct bio, which describes the I/O that needs + * to be done. + * + * generic_make_request() does not return any status. The + * success/failure status of the request, along with notification of + * completion, is delivered asynchronously through the bio->bi_end_io + * function described (one day) else where. + * + * The caller of generic_make_request must make sure that bi_io_vec + * are set to describe the memory buffer, and that bi_dev and bi_sector are + * set to describe the device address, and the + * bi_end_io and optionally bi_private are set to describe how + * completion notification should be signaled. + * + * generic_make_request and the drivers it calls may use bi_next if this + * bio happens to be merged with someone else, and may change bi_dev and + * bi_sector for remaps as it sees fit. So the values of these fields + * should NOT be depended on after the call to generic_make_request. + */ +static inline void __generic_make_request(struct bio *bio) { struct request_queue *q; - int nr_sectors = bio_sectors(bio); + sector_t old_sector; + int ret, nr_sectors = bio_sectors(bio); + dev_t old_dev; int err = -EIO; - char b[BDEVNAME_SIZE]; - struct hd_struct *part; might_sleep(); if (bio_check_eod(bio, nr_sectors)) goto end_io; - q = bdev_get_queue(bio->bi_bdev); - if (unlikely(!q)) { - printk(KERN_ERR - "generic_make_request: Trying to access " - "nonexistent block-device %s (%Lu)\n", - bdevname(bio->bi_bdev, b), - (long long) bio->bi_sector); - goto end_io; - } + /* + * Resolve the mapping until finished. (drivers are + * still free to implement/resolve their own stacking + * by explicitly returning 0) + * + * NOTE: we don't repeat the blk_size check for each new device. + * Stacking drivers are expected to know what they are doing. + */ + old_sector = -1; + old_dev = 0; + do { + char b[BDEVNAME_SIZE]; + struct hd_struct *part; - if (unlikely(!(bio->bi_rw & REQ_DISCARD) && - nr_sectors > queue_max_hw_sectors(q))) { - printk(KERN_ERR "bio too big device %s (%u > %u)\n", - bdevname(bio->bi_bdev, b), - bio_sectors(bio), - queue_max_hw_sectors(q)); - goto end_io; - } + q = bdev_get_queue(bio->bi_bdev); + if (unlikely(!q)) { + printk(KERN_ERR + "generic_make_request: Trying to access " + "nonexistent block-device %s (%Lu)\n", + bdevname(bio->bi_bdev, b), + (long long) bio->bi_sector); + goto end_io; + } - part = bio->bi_bdev->bd_part; - if (should_fail_request(part, bio->bi_size) || - should_fail_request(&part_to_disk(part)->part0, - bio->bi_size)) - goto end_io; + if (unlikely(!(bio->bi_rw & REQ_DISCARD) && + nr_sectors > queue_max_hw_sectors(q))) { + printk(KERN_ERR "bio too big device %s (%u > %u)\n", + bdevname(bio->bi_bdev, b), + bio_sectors(bio), + queue_max_hw_sectors(q)); + goto end_io; + } - /* - * If this device has partitions, remap block n - * of partition p to block n+start(p) of the disk. - */ - blk_partition_remap(bio); + if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) + goto end_io; - if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) - goto end_io; + part = bio->bi_bdev->bd_part; + if (should_fail_request(part, bio->bi_size) || + should_fail_request(&part_to_disk(part)->part0, + bio->bi_size)) + goto end_io; - if (bio_check_eod(bio, nr_sectors)) - goto end_io; + /* + * If this device has partitions, remap block n + * of partition p to block n+start(p) of the disk. + */ + blk_partition_remap(bio); - /* - * Filter flush bio's early so that make_request based - * drivers without flush support don't have to worry - * about them. - */ - if ((bio->bi_rw & (REQ_FLUSH | REQ_FUA)) && !q->flush_flags) { - bio->bi_rw &= ~(REQ_FLUSH | REQ_FUA); - if (!nr_sectors) { - err = 0; + if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) goto end_io; + + if (old_sector != -1) + trace_block_bio_remap(q, bio, old_dev, old_sector); + + old_sector = bio->bi_sector; + old_dev = bio->bi_bdev->bd_dev; + + if (bio_check_eod(bio, nr_sectors)) + goto end_io; + + /* + * Filter flush bio's early so that make_request based + * drivers without flush support don't have to worry + * about them. + */ + if ((bio->bi_rw & (REQ_FLUSH | REQ_FUA)) && !q->flush_flags) { + bio->bi_rw &= ~(REQ_FLUSH | REQ_FUA); + if (!nr_sectors) { + err = 0; + goto end_io; + } } - } - if ((bio->bi_rw & REQ_DISCARD) && - (!blk_queue_discard(q) || - ((bio->bi_rw & REQ_SECURE) && - !blk_queue_secdiscard(q)))) { - err = -EOPNOTSUPP; - goto end_io; - } + if ((bio->bi_rw & REQ_DISCARD) && + (!blk_queue_discard(q) || + ((bio->bi_rw & REQ_SECURE) && + !blk_queue_secdiscard(q)))) { + err = -EOPNOTSUPP; + goto end_io; + } - if (blk_throtl_bio(q, bio)) - return false; /* throttled, will be resubmitted later */ + if (blk_throtl_bio(q, &bio)) + goto end_io; - trace_block_bio_queue(q, bio); - return true; + /* + * If bio = NULL, bio has been throttled and will be submitted + * later. + */ + if (!bio) + break; + + trace_block_bio_queue(q, bio); + + ret = q->make_request_fn(q, bio); + } while (ret); + + return; end_io: bio_endio(bio, err); - return false; } -/** - * generic_make_request - hand a buffer to its device driver for I/O - * @bio: The bio describing the location in memory and on the device. - * - * generic_make_request() is used to make I/O requests of block - * devices. It is passed a &struct bio, which describes the I/O that needs - * to be done. - * - * generic_make_request() does not return any status. The - * success/failure status of the request, along with notification of - * completion, is delivered asynchronously through the bio->bi_end_io - * function described (one day) else where. - * - * The caller of generic_make_request must make sure that bi_io_vec - * are set to describe the memory buffer, and that bi_dev and bi_sector are - * set to describe the device address, and the - * bi_end_io and optionally bi_private are set to describe how - * completion notification should be signaled. - * - * generic_make_request and the drivers it calls may use bi_next if this - * bio happens to be merged with someone else, and may resubmit the bio to - * a lower device by calling into generic_make_request recursively, which - * means the bio should NOT be touched after the call to ->make_request_fn. +/* + * We only want one ->make_request_fn to be active at a time, + * else stack usage with stacked devices could be a problem. + * So use current->bio_list to keep a list of requests + * submited by a make_request_fn function. + * current->bio_list is also used as a flag to say if + * generic_make_request is currently active in this task or not. + * If it is NULL, then no make_request is active. If it is non-NULL, + * then a make_request is active, and new requests should be added + * at the tail */ void generic_make_request(struct bio *bio) { struct bio_list bio_list_on_stack; - if (!generic_make_request_checks(bio)) - return; - - /* - * We only want one ->make_request_fn to be active at a time, else - * stack usage with stacked devices could be a problem. So use - * current->bio_list to keep a list of requests submited by a - * make_request_fn function. current->bio_list is also used as a - * flag to say if generic_make_request is currently active in this - * task or not. If it is NULL, then no make_request is active. If - * it is non-NULL, then a make_request is active, and new requests - * should be added at the tail - */ if (current->bio_list) { + /* make_request is active */ bio_list_add(current->bio_list, bio); return; } - /* following loop may be a bit non-obvious, and so deserves some * explanation. * Before entering the loop, bio->bi_next is NULL (as all callers @@ -1628,21 +1583,22 @@ void generic_make_request(struct bio *bio) * We pretend that we have just taken it off a longer list, so * we assign bio_list to a pointer to the bio_list_on_stack, * thus initialising the bio_list of new bios to be - * added. ->make_request() may indeed add some more bios + * added. __generic_make_request may indeed add some more bios * through a recursive call to generic_make_request. If it * did, we find a non-NULL value in bio_list and re-enter the loop * from the top. In this case we really did just take the bio * of the top of the list (no pretending) and so remove it from - * bio_list, and call into ->make_request() again. + * bio_list, and call into __generic_make_request again. + * + * The loop was structured like this to make only one call to + * __generic_make_request (which is important as it is large and + * inlined) and to keep the structure simple. */ BUG_ON(bio->bi_next); bio_list_init(&bio_list_on_stack); current->bio_list = &bio_list_on_stack; do { - struct request_queue *q = bdev_get_queue(bio->bi_bdev); - - q->make_request_fn(q, bio); - + __generic_make_request(bio); bio = bio_list_pop(current->bio_list); } while (bio); current->bio_list = NULL; /* deactivate */ @@ -2672,20 +2628,6 @@ EXPORT_SYMBOL(kblockd_schedule_delayed_work); #define PLUG_MAGIC 0x91827364 -/** - * blk_start_plug - initialize blk_plug and track it inside the task_struct - * @plug: The &struct blk_plug that needs to be initialized - * - * Description: - * Tracking blk_plug inside the task_struct will help with auto-flushing the - * pending I/O should the task end up blocking between blk_start_plug() and - * blk_finish_plug(). This is important from a performance perspective, but - * also ensures that we don't deadlock. For instance, if the task is blocking - * for a memory allocation, memory reclaim could end up wanting to free a - * page belonging to that request that is currently residing in our private - * plug. By flushing the pending I/O when the process goes to sleep, we avoid - * this kind of deadlock. - */ void blk_start_plug(struct blk_plug *plug) { struct task_struct *tsk = current; diff --git a/trunk/block/blk-sysfs.c b/trunk/block/blk-sysfs.c index e7f9f657f105..60fda88c57f0 100644 --- a/trunk/block/blk-sysfs.c +++ b/trunk/block/blk-sysfs.c @@ -457,11 +457,11 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr, } /** - * blk_release_queue: - release a &struct request_queue when it is no longer needed - * @kobj: the kobj belonging to the request queue to be released + * blk_cleanup_queue: - release a &struct request_queue when it is no longer needed + * @kobj: the kobj belonging of the request queue to be released * * Description: - * blk_release_queue is the pair to blk_init_queue() or + * blk_cleanup_queue is the pair to blk_init_queue() or * blk_queue_make_request(). It should be called when a request queue is * being released; typically when a block device is being de-registered. * Currently, its primary task it to free all the &struct request @@ -490,7 +490,6 @@ static void blk_release_queue(struct kobject *kobj) if (q->queue_tags) __blk_queue_free_tags(q); - blk_throtl_release(q); blk_trace_shutdown(q); bdi_destroy(&q->backing_dev_info); diff --git a/trunk/block/blk-throttle.c b/trunk/block/blk-throttle.c index 8edb9499b509..a19f58c6fc3a 100644 --- a/trunk/block/blk-throttle.c +++ b/trunk/block/blk-throttle.c @@ -10,7 +10,6 @@ #include #include #include "blk-cgroup.h" -#include "blk.h" /* Max dispatch from a group in 1 round */ static int throtl_grp_quantum = 8; @@ -303,16 +302,16 @@ throtl_grp *throtl_find_tg(struct throtl_data *td, struct blkio_cgroup *blkcg) return tg; } +/* + * This function returns with queue lock unlocked in case of error, like + * request queue is no more + */ static struct throtl_grp * throtl_get_tg(struct throtl_data *td) { struct throtl_grp *tg = NULL, *__tg = NULL; struct blkio_cgroup *blkcg; struct request_queue *q = td->queue; - /* no throttling for dead queue */ - if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) - return NULL; - rcu_read_lock(); blkcg = task_blkio_cgroup(current); tg = throtl_find_tg(td, blkcg); @@ -324,22 +323,32 @@ static struct throtl_grp * throtl_get_tg(struct throtl_data *td) /* * Need to allocate a group. Allocation of group also needs allocation * of per cpu stats which in-turn takes a mutex() and can block. Hence - * we need to drop rcu lock and queue_lock before we call alloc. + * we need to drop rcu lock and queue_lock before we call alloc + * + * Take the request queue reference to make sure queue does not + * go away once we return from allocation. */ + blk_get_queue(q); rcu_read_unlock(); spin_unlock_irq(q->queue_lock); tg = throtl_alloc_tg(td); + /* + * We might have slept in group allocation. Make sure queue is not + * dead + */ + if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) { + blk_put_queue(q); + if (tg) + kfree(tg); + + return ERR_PTR(-ENODEV); + } + blk_put_queue(q); /* Group allocated and queue is still alive. take the lock */ spin_lock_irq(q->queue_lock); - /* Make sure @q is still alive */ - if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) { - kfree(tg); - return NULL; - } - /* * Initialize the new group. After sleeping, read the blkcg again. */ @@ -1005,6 +1014,11 @@ static void throtl_release_tgs(struct throtl_data *td) } } +static void throtl_td_free(struct throtl_data *td) +{ + kfree(td); +} + /* * Blk cgroup controller notification saying that blkio_group object is being * delinked as associated cgroup object is going away. That also means that @@ -1109,17 +1123,17 @@ static struct blkio_policy_type blkio_policy_throtl = { .plid = BLKIO_POLICY_THROTL, }; -bool blk_throtl_bio(struct request_queue *q, struct bio *bio) +int blk_throtl_bio(struct request_queue *q, struct bio **biop) { struct throtl_data *td = q->td; struct throtl_grp *tg; + struct bio *bio = *biop; bool rw = bio_data_dir(bio), update_disptime = true; struct blkio_cgroup *blkcg; - bool throttled = false; if (bio->bi_rw & REQ_THROTTLED) { bio->bi_rw &= ~REQ_THROTTLED; - goto out; + return 0; } /* @@ -1138,7 +1152,7 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio) blkiocg_update_dispatch_stats(&tg->blkg, bio->bi_size, rw, rw_is_sync(bio->bi_rw)); rcu_read_unlock(); - goto out; + return 0; } } rcu_read_unlock(); @@ -1147,10 +1161,18 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio) * Either group has not been allocated yet or it is not an unlimited * IO group */ + spin_lock_irq(q->queue_lock); tg = throtl_get_tg(td); - if (unlikely(!tg)) - goto out_unlock; + + if (IS_ERR(tg)) { + if (PTR_ERR(tg) == -ENODEV) { + /* + * Queue is gone. No queue lock held here. + */ + return -ENODEV; + } + } if (tg->nr_queued[rw]) { /* @@ -1178,7 +1200,7 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio) * So keep on trimming slice even if bio is not queued. */ throtl_trim_slice(td, tg, rw); - goto out_unlock; + goto out; } queue_bio: @@ -1190,52 +1212,16 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio) tg->nr_queued[READ], tg->nr_queued[WRITE]); throtl_add_bio_tg(q->td, tg, bio); - throttled = true; + *biop = NULL; if (update_disptime) { tg_update_disptime(td, tg); throtl_schedule_next_dispatch(td); } -out_unlock: - spin_unlock_irq(q->queue_lock); out: - return throttled; -} - -/** - * blk_throtl_drain - drain throttled bios - * @q: request_queue to drain throttled bios for - * - * Dispatch all currently throttled bios on @q through ->make_request_fn(). - */ -void blk_throtl_drain(struct request_queue *q) - __releases(q->queue_lock) __acquires(q->queue_lock) -{ - struct throtl_data *td = q->td; - struct throtl_rb_root *st = &td->tg_service_tree; - struct throtl_grp *tg; - struct bio_list bl; - struct bio *bio; - - lockdep_is_held(q->queue_lock); - - bio_list_init(&bl); - - while ((tg = throtl_rb_first(st))) { - throtl_dequeue_tg(td, tg); - - while ((bio = bio_list_peek(&tg->bio_lists[READ]))) - tg_dispatch_one_bio(td, tg, bio_data_dir(bio), &bl); - while ((bio = bio_list_peek(&tg->bio_lists[WRITE]))) - tg_dispatch_one_bio(td, tg, bio_data_dir(bio), &bl); - } spin_unlock_irq(q->queue_lock); - - while ((bio = bio_list_pop(&bl))) - generic_make_request(bio); - - spin_lock_irq(q->queue_lock); + return 0; } int blk_throtl_init(struct request_queue *q) @@ -1310,11 +1296,7 @@ void blk_throtl_exit(struct request_queue *q) * it. */ throtl_shutdown_wq(q); -} - -void blk_throtl_release(struct request_queue *q) -{ - kfree(q->td); + throtl_td_free(td); } static int __init throtl_init(void) diff --git a/trunk/block/blk.h b/trunk/block/blk.h index 3f6551b3c92d..20b900a377c9 100644 --- a/trunk/block/blk.h +++ b/trunk/block/blk.h @@ -15,7 +15,6 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq, struct bio *bio); int blk_rq_append_bio(struct request_queue *q, struct request *rq, struct bio *bio); -void blk_drain_queue(struct request_queue *q, bool drain_all); void blk_dequeue_request(struct request *rq); void __blk_queue_free_tags(struct request_queue *q); bool __blk_end_bidi_request(struct request *rq, int error, @@ -189,21 +188,4 @@ static inline int blk_do_io_stat(struct request *rq) (rq->cmd_flags & REQ_DISCARD)); } -#ifdef CONFIG_BLK_DEV_THROTTLING -extern bool blk_throtl_bio(struct request_queue *q, struct bio *bio); -extern void blk_throtl_drain(struct request_queue *q); -extern int blk_throtl_init(struct request_queue *q); -extern void blk_throtl_exit(struct request_queue *q); -extern void blk_throtl_release(struct request_queue *q); -#else /* CONFIG_BLK_DEV_THROTTLING */ -static inline bool blk_throtl_bio(struct request_queue *q, struct bio *bio) -{ - return false; -} -static inline void blk_throtl_drain(struct request_queue *q) { } -static inline int blk_throtl_init(struct request_queue *q) { return 0; } -static inline void blk_throtl_exit(struct request_queue *q) { } -static inline void blk_throtl_release(struct request_queue *q) { } -#endif /* CONFIG_BLK_DEV_THROTTLING */ - -#endif /* BLK_INTERNAL_H */ +#endif diff --git a/trunk/block/elevator.c b/trunk/block/elevator.c index 66343d6917d0..a3b64bc71d88 100644 --- a/trunk/block/elevator.c +++ b/trunk/block/elevator.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -181,7 +182,7 @@ static void elevator_attach(struct request_queue *q, struct elevator_queue *eq, eq->elevator_data = data; } -static char chosen_elevator[ELV_NAME_MAX]; +static char chosen_elevator[16]; static int __init elevator_setup(char *str) { @@ -605,35 +606,43 @@ void elv_requeue_request(struct request_queue *q, struct request *rq) void elv_drain_elevator(struct request_queue *q) { static int printed; - - lockdep_assert_held(q->queue_lock); - while (q->elevator->ops->elevator_dispatch_fn(q, 1)) ; - if (q->nr_sorted && printed++ < 10) { + if (q->nr_sorted == 0) + return; + if (printed++ < 10) { printk(KERN_ERR "%s: forced dispatching is broken " "(nr_sorted=%u), please report this\n", q->elevator->elevator_type->elevator_name, q->nr_sorted); } } +/* + * Call with queue lock held, interrupts disabled + */ void elv_quiesce_start(struct request_queue *q) { if (!q->elevator) return; - spin_lock_irq(q->queue_lock); queue_flag_set(QUEUE_FLAG_ELVSWITCH, q); - spin_unlock_irq(q->queue_lock); - blk_drain_queue(q, false); + /* + * make sure we don't have any requests in flight + */ + elv_drain_elevator(q); + while (q->rq.elvpriv) { + __blk_run_queue(q); + spin_unlock_irq(q->queue_lock); + msleep(10); + spin_lock_irq(q->queue_lock); + elv_drain_elevator(q); + } } void elv_quiesce_end(struct request_queue *q) { - spin_lock_irq(q->queue_lock); queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q); - spin_unlock_irq(q->queue_lock); } void __elv_add_request(struct request_queue *q, struct request *rq, int where) @@ -963,6 +972,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) /* * Turn on BYPASS and drain all requests w/ elevator private data */ + spin_lock_irq(q->queue_lock); elv_quiesce_start(q); /* @@ -973,8 +983,8 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) /* * attach and start new elevator */ - spin_lock_irq(q->queue_lock); elevator_attach(q, e, data); + spin_unlock_irq(q->queue_lock); if (old_elevator->registered) { @@ -989,7 +999,9 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) * finally exit old elevator and turn off BYPASS. */ elevator_exit(old_elevator); + spin_lock_irq(q->queue_lock); elv_quiesce_end(q); + spin_unlock_irq(q->queue_lock); blk_add_trace_msg(q, "elv switch: %s", e->elevator_type->elevator_name); @@ -1003,7 +1015,10 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) elevator_exit(e); q->elevator = old_elevator; elv_register_queue(q); - elv_quiesce_end(q); + + spin_lock_irq(q->queue_lock); + queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q); + spin_unlock_irq(q->queue_lock); return err; } diff --git a/trunk/block/genhd.c b/trunk/block/genhd.c index d261b73b9744..e2f67902dd02 100644 --- a/trunk/block/genhd.c +++ b/trunk/block/genhd.c @@ -611,12 +611,6 @@ void add_disk(struct gendisk *disk) register_disk(disk); blk_register_queue(disk); - /* - * Take an extra ref on queue which will be put on disk_release() - * so that it sticks around as long as @disk is there. - */ - WARN_ON_ONCE(blk_get_queue(disk->queue)); - retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj, "bdi"); WARN_ON(retval); @@ -1101,8 +1095,6 @@ static void disk_release(struct device *dev) disk_replace_part_tbl(disk, NULL); free_part_stats(&disk->part0); free_part_info(&disk->part0); - if (disk->queue) - blk_put_queue(disk->queue); kfree(disk); } struct class block_class = { diff --git a/trunk/block/scsi_ioctl.c b/trunk/block/scsi_ioctl.c index fbdf0d802ec4..4f4230b79bb6 100644 --- a/trunk/block/scsi_ioctl.c +++ b/trunk/block/scsi_ioctl.c @@ -565,7 +565,7 @@ int scsi_cmd_ioctl(struct request_queue *q, struct gendisk *bd_disk, fmode_t mod { int err; - if (!q) + if (!q || blk_get_queue(q)) return -ENXIO; switch (cmd) { @@ -686,6 +686,7 @@ int scsi_cmd_ioctl(struct request_queue *q, struct gendisk *bd_disk, fmode_t mod err = -ENOTTY; } + blk_put_queue(q); return err; } EXPORT_SYMBOL(scsi_cmd_ioctl); diff --git a/trunk/drivers/block/aoe/aoeblk.c b/trunk/drivers/block/aoe/aoeblk.c index 167ba0af47f5..528f6318ded1 100644 --- a/trunk/drivers/block/aoe/aoeblk.c +++ b/trunk/drivers/block/aoe/aoeblk.c @@ -159,7 +159,7 @@ aoeblk_release(struct gendisk *disk, fmode_t mode) return 0; } -static void +static int aoeblk_make_request(struct request_queue *q, struct bio *bio) { struct sk_buff_head queue; @@ -172,25 +172,25 @@ aoeblk_make_request(struct request_queue *q, struct bio *bio) if (bio == NULL) { printk(KERN_ERR "aoe: bio is NULL\n"); BUG(); - return; + return 0; } d = bio->bi_bdev->bd_disk->private_data; if (d == NULL) { printk(KERN_ERR "aoe: bd_disk->private_data is NULL\n"); BUG(); bio_endio(bio, -ENXIO); - return; + return 0; } else if (bio->bi_io_vec == NULL) { printk(KERN_ERR "aoe: bi_io_vec is NULL\n"); BUG(); bio_endio(bio, -ENXIO); - return; + return 0; } buf = mempool_alloc(d->bufpool, GFP_NOIO); if (buf == NULL) { printk(KERN_INFO "aoe: buf allocation failure\n"); bio_endio(bio, -ENOMEM); - return; + return 0; } memset(buf, 0, sizeof(*buf)); INIT_LIST_HEAD(&buf->bufs); @@ -211,7 +211,7 @@ aoeblk_make_request(struct request_queue *q, struct bio *bio) spin_unlock_irqrestore(&d->lock, flags); mempool_free(buf, d->bufpool); bio_endio(bio, -ENXIO); - return; + return 0; } list_add_tail(&buf->bufs, &d->bufq); @@ -222,6 +222,8 @@ aoeblk_make_request(struct request_queue *q, struct bio *bio) spin_unlock_irqrestore(&d->lock, flags); aoenet_xmit(&queue); + + return 0; } static int diff --git a/trunk/drivers/block/brd.c b/trunk/drivers/block/brd.c index d22119d49e53..dba1c32e1ddf 100644 --- a/trunk/drivers/block/brd.c +++ b/trunk/drivers/block/brd.c @@ -323,7 +323,7 @@ static int brd_do_bvec(struct brd_device *brd, struct page *page, return err; } -static void brd_make_request(struct request_queue *q, struct bio *bio) +static int brd_make_request(struct request_queue *q, struct bio *bio) { struct block_device *bdev = bio->bi_bdev; struct brd_device *brd = bdev->bd_disk->private_data; @@ -359,6 +359,8 @@ static void brd_make_request(struct request_queue *q, struct bio *bio) out: bio_endio(bio, err); + + return 0; } #ifdef CONFIG_BLK_DEV_XIP diff --git a/trunk/drivers/block/drbd/drbd_int.h b/trunk/drivers/block/drbd/drbd_int.h index 36eee3969a98..ef2ceed3be4b 100644 --- a/trunk/drivers/block/drbd/drbd_int.h +++ b/trunk/drivers/block/drbd/drbd_int.h @@ -1507,7 +1507,7 @@ extern void drbd_free_mdev(struct drbd_conf *mdev); extern int proc_details; /* drbd_req */ -extern void drbd_make_request(struct request_queue *q, struct bio *bio); +extern int drbd_make_request(struct request_queue *q, struct bio *bio); extern int drbd_read_remote(struct drbd_conf *mdev, struct drbd_request *req); extern int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *bvec); extern int is_valid_ar_handle(struct drbd_request *, sector_t); diff --git a/trunk/drivers/block/drbd/drbd_req.c b/trunk/drivers/block/drbd/drbd_req.c index 4a0f314086e5..3424d675b769 100644 --- a/trunk/drivers/block/drbd/drbd_req.c +++ b/trunk/drivers/block/drbd/drbd_req.c @@ -1073,7 +1073,7 @@ static int drbd_fail_request_early(struct drbd_conf *mdev, int is_write) return 0; } -void drbd_make_request(struct request_queue *q, struct bio *bio) +int drbd_make_request(struct request_queue *q, struct bio *bio) { unsigned int s_enr, e_enr; struct drbd_conf *mdev = (struct drbd_conf *) q->queuedata; @@ -1081,7 +1081,7 @@ void drbd_make_request(struct request_queue *q, struct bio *bio) if (drbd_fail_request_early(mdev, bio_data_dir(bio) & WRITE)) { bio_endio(bio, -EPERM); - return; + return 0; } start_time = jiffies; @@ -1100,8 +1100,7 @@ void drbd_make_request(struct request_queue *q, struct bio *bio) if (likely(s_enr == e_enr)) { inc_ap_bio(mdev, 1); - drbd_make_request_common(mdev, bio, start_time); - return; + return drbd_make_request_common(mdev, bio, start_time); } /* can this bio be split generically? @@ -1149,6 +1148,7 @@ void drbd_make_request(struct request_queue *q, struct bio *bio) bio_pair_release(bp); } + return 0; } /* This is called by bio_add_page(). With this function we reduce diff --git a/trunk/drivers/block/loop.c b/trunk/drivers/block/loop.c index 157ddcb9d0a5..46cdd6945557 100644 --- a/trunk/drivers/block/loop.c +++ b/trunk/drivers/block/loop.c @@ -202,74 +202,6 @@ lo_do_transfer(struct loop_device *lo, int cmd, return lo->transfer(lo, cmd, rpage, roffs, lpage, loffs, size, rblock); } -/** - * do_lo_send_aops - helper for writing data to a loop device - * - * This is the fast version for backing filesystems which implement the address - * space operations write_begin and write_end. - */ -static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec, - loff_t pos, struct page *unused) -{ - struct file *file = lo->lo_backing_file; /* kudos to NFsckingS */ - struct address_space *mapping = file->f_mapping; - pgoff_t index; - unsigned offset, bv_offs; - int len, ret; - - mutex_lock(&mapping->host->i_mutex); - index = pos >> PAGE_CACHE_SHIFT; - offset = pos & ((pgoff_t)PAGE_CACHE_SIZE - 1); - bv_offs = bvec->bv_offset; - len = bvec->bv_len; - while (len > 0) { - sector_t IV; - unsigned size, copied; - int transfer_result; - struct page *page; - void *fsdata; - - IV = ((sector_t)index << (PAGE_CACHE_SHIFT - 9))+(offset >> 9); - size = PAGE_CACHE_SIZE - offset; - if (size > len) - size = len; - - ret = pagecache_write_begin(file, mapping, pos, size, 0, - &page, &fsdata); - if (ret) - goto fail; - - file_update_time(file); - - transfer_result = lo_do_transfer(lo, WRITE, page, offset, - bvec->bv_page, bv_offs, size, IV); - copied = size; - if (unlikely(transfer_result)) - copied = 0; - - ret = pagecache_write_end(file, mapping, pos, size, copied, - page, fsdata); - if (ret < 0 || ret != copied) - goto fail; - - if (unlikely(transfer_result)) - goto fail; - - bv_offs += copied; - len -= copied; - offset = 0; - index++; - pos += copied; - } - ret = 0; -out: - mutex_unlock(&mapping->host->i_mutex); - return ret; -fail: - ret = -1; - goto out; -} - /** * __do_lo_send_write - helper for writing data to a loop device * @@ -297,10 +229,8 @@ static int __do_lo_send_write(struct file *file, /** * do_lo_send_direct_write - helper for writing data to a loop device * - * This is the fast, non-transforming version for backing filesystems which do - * not implement the address space operations write_begin and write_end. - * It uses the write file operation which should be present on all writeable - * filesystems. + * This is the fast, non-transforming version that does not need double + * buffering. */ static int do_lo_send_direct_write(struct loop_device *lo, struct bio_vec *bvec, loff_t pos, struct page *page) @@ -316,15 +246,9 @@ static int do_lo_send_direct_write(struct loop_device *lo, /** * do_lo_send_write - helper for writing data to a loop device * - * This is the slow, transforming version for filesystems which do not - * implement the address space operations write_begin and write_end. It - * uses the write file operation which should be present on all writeable - * filesystems. - * - * Using fops->write is slower than using aops->{prepare,commit}_write in the - * transforming case because we need to double buffer the data as we cannot do - * the transformations in place as we do not have direct access to the - * destination pages of the backing file. + * This is the slow, transforming version that needs to double buffer the + * data as it cannot do the transformations in place without having direct + * access to the destination pages of the backing file. */ static int do_lo_send_write(struct loop_device *lo, struct bio_vec *bvec, loff_t pos, struct page *page) @@ -350,17 +274,16 @@ static int lo_send(struct loop_device *lo, struct bio *bio, loff_t pos) struct page *page = NULL; int i, ret = 0; - do_lo_send = do_lo_send_aops; - if (!(lo->lo_flags & LO_FLAGS_USE_AOPS)) { + if (lo->transfer != transfer_none) { + page = alloc_page(GFP_NOIO | __GFP_HIGHMEM); + if (unlikely(!page)) + goto fail; + kmap(page); + do_lo_send = do_lo_send_write; + } else { do_lo_send = do_lo_send_direct_write; - if (lo->transfer != transfer_none) { - page = alloc_page(GFP_NOIO | __GFP_HIGHMEM); - if (unlikely(!page)) - goto fail; - kmap(page); - do_lo_send = do_lo_send_write; - } } + bio_for_each_segment(bvec, bio, i) { ret = do_lo_send(lo, bvec, pos, page); if (ret < 0) @@ -514,7 +437,7 @@ static struct bio *loop_get_bio(struct loop_device *lo) return bio_list_pop(&lo->lo_bio_list); } -static void loop_make_request(struct request_queue *q, struct bio *old_bio) +static int loop_make_request(struct request_queue *q, struct bio *old_bio) { struct loop_device *lo = q->queuedata; int rw = bio_rw(old_bio); @@ -532,11 +455,12 @@ static void loop_make_request(struct request_queue *q, struct bio *old_bio) loop_add_bio(lo, old_bio); wake_up(&lo->lo_event); spin_unlock_irq(&lo->lo_lock); - return; + return 0; out: spin_unlock_irq(&lo->lo_lock); bio_io_error(old_bio); + return 0; } struct switch_request { @@ -848,35 +772,23 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, mapping = file->f_mapping; inode = mapping->host; - if (!(file->f_mode & FMODE_WRITE)) - lo_flags |= LO_FLAGS_READ_ONLY; - error = -EINVAL; - if (S_ISREG(inode->i_mode) || S_ISBLK(inode->i_mode)) { - const struct address_space_operations *aops = mapping->a_ops; - - if (aops->write_begin) - lo_flags |= LO_FLAGS_USE_AOPS; - if (!(lo_flags & LO_FLAGS_USE_AOPS) && !file->f_op->write) - lo_flags |= LO_FLAGS_READ_ONLY; + if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode)) + goto out_putf; - lo_blocksize = S_ISBLK(inode->i_mode) ? - inode->i_bdev->bd_block_size : PAGE_SIZE; + if (!(file->f_mode & FMODE_WRITE) || !(mode & FMODE_WRITE) || + !file->f_op->write) + lo_flags |= LO_FLAGS_READ_ONLY; - error = 0; - } else { - goto out_putf; - } + lo_blocksize = S_ISBLK(inode->i_mode) ? + inode->i_bdev->bd_block_size : PAGE_SIZE; + error = -EFBIG; size = get_loop_size(lo, file); - - if ((loff_t)(sector_t)size != size) { - error = -EFBIG; + if ((loff_t)(sector_t)size != size) goto out_putf; - } - if (!(mode & FMODE_WRITE)) - lo_flags |= LO_FLAGS_READ_ONLY; + error = 0; set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0); diff --git a/trunk/drivers/block/pktcdvd.c b/trunk/drivers/block/pktcdvd.c index a63b0a2b7805..e133f094ab08 100644 --- a/trunk/drivers/block/pktcdvd.c +++ b/trunk/drivers/block/pktcdvd.c @@ -2444,7 +2444,7 @@ static void pkt_end_io_read_cloned(struct bio *bio, int err) pkt_bio_finished(pd); } -static void pkt_make_request(struct request_queue *q, struct bio *bio) +static int pkt_make_request(struct request_queue *q, struct bio *bio) { struct pktcdvd_device *pd; char b[BDEVNAME_SIZE]; @@ -2473,7 +2473,7 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio) cloned_bio->bi_end_io = pkt_end_io_read_cloned; pd->stats.secs_r += bio->bi_size >> 9; pkt_queue_bio(pd, cloned_bio); - return; + return 0; } if (!test_bit(PACKET_WRITABLE, &pd->flags)) { @@ -2509,7 +2509,7 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio) pkt_make_request(q, &bp->bio1); pkt_make_request(q, &bp->bio2); bio_pair_release(bp); - return; + return 0; } } @@ -2533,7 +2533,7 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio) } spin_unlock(&pkt->lock); spin_unlock(&pd->cdrw.active_list_lock); - return; + return 0; } else { blocked_bio = 1; } @@ -2584,9 +2584,10 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio) */ wake_up(&pd->wqueue); } - return; + return 0; end_io: bio_io_error(bio); + return 0; } diff --git a/trunk/drivers/block/ps3vram.c b/trunk/drivers/block/ps3vram.c index 7fad7af87eb2..b3bdb8af89cf 100644 --- a/trunk/drivers/block/ps3vram.c +++ b/trunk/drivers/block/ps3vram.c @@ -596,7 +596,7 @@ static struct bio *ps3vram_do_bio(struct ps3_system_bus_device *dev, return next; } -static void ps3vram_make_request(struct request_queue *q, struct bio *bio) +static int ps3vram_make_request(struct request_queue *q, struct bio *bio) { struct ps3_system_bus_device *dev = q->queuedata; struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev); @@ -610,11 +610,13 @@ static void ps3vram_make_request(struct request_queue *q, struct bio *bio) spin_unlock_irq(&priv->lock); if (busy) - return; + return 0; do { bio = ps3vram_do_bio(dev, bio); } while (bio); + + return 0; } static int __devinit ps3vram_probe(struct ps3_system_bus_device *dev) diff --git a/trunk/drivers/block/umem.c b/trunk/drivers/block/umem.c index aa2712060bfb..031ca720d926 100644 --- a/trunk/drivers/block/umem.c +++ b/trunk/drivers/block/umem.c @@ -513,7 +513,7 @@ static void process_page(unsigned long data) } } -static void mm_make_request(struct request_queue *q, struct bio *bio) +static int mm_make_request(struct request_queue *q, struct bio *bio) { struct cardinfo *card = q->queuedata; pr_debug("mm_make_request %llu %u\n", @@ -525,7 +525,7 @@ static void mm_make_request(struct request_queue *q, struct bio *bio) card->biotail = &bio->bi_next; spin_unlock_irq(&card->lock); - return; + return 0; } static irqreturn_t mm_interrupt(int irq, void *__card) diff --git a/trunk/drivers/md/dm.c b/trunk/drivers/md/dm.c index 7b986e77b75e..52b39f335bb3 100644 --- a/trunk/drivers/md/dm.c +++ b/trunk/drivers/md/dm.c @@ -180,6 +180,9 @@ struct mapped_device { /* forced geometry settings */ struct hd_geometry geometry; + /* For saving the address of __make_request for request based dm */ + make_request_fn *saved_make_request_fn; + /* sysfs handle */ struct kobject kobj; @@ -1388,7 +1391,7 @@ static int dm_merge_bvec(struct request_queue *q, * The request function that just remaps the bio built up by * dm_merge_bvec. */ -static void _dm_request(struct request_queue *q, struct bio *bio) +static int _dm_request(struct request_queue *q, struct bio *bio) { int rw = bio_data_dir(bio); struct mapped_device *md = q->queuedata; @@ -1409,12 +1412,19 @@ static void _dm_request(struct request_queue *q, struct bio *bio) queue_io(md, bio); else bio_io_error(bio); - return; + return 0; } __split_and_process_bio(md, bio); up_read(&md->io_lock); - return; + return 0; +} + +static int dm_make_request(struct request_queue *q, struct bio *bio) +{ + struct mapped_device *md = q->queuedata; + + return md->saved_make_request_fn(q, bio); /* call __make_request() */ } static int dm_request_based(struct mapped_device *md) @@ -1422,14 +1432,14 @@ static int dm_request_based(struct mapped_device *md) return blk_queue_stackable(md->queue); } -static void dm_request(struct request_queue *q, struct bio *bio) +static int dm_request(struct request_queue *q, struct bio *bio) { struct mapped_device *md = q->queuedata; if (dm_request_based(md)) - blk_queue_bio(q, bio); - else - _dm_request(q, bio); + return dm_make_request(q, bio); + + return _dm_request(q, bio); } void dm_dispatch_request(struct request *rq) @@ -2162,6 +2172,7 @@ static int dm_init_request_based_queue(struct mapped_device *md) return 0; md->queue = q; + md->saved_make_request_fn = md->queue->make_request_fn; dm_init_md_queue(md); blk_queue_softirq_done(md->queue, dm_softirq_done); blk_queue_prep_rq(md->queue, dm_prep_fn); diff --git a/trunk/drivers/md/faulty.c b/trunk/drivers/md/faulty.c index 5ef304d4341c..23078dabb6df 100644 --- a/trunk/drivers/md/faulty.c +++ b/trunk/drivers/md/faulty.c @@ -169,7 +169,7 @@ static void add_sector(conf_t *conf, sector_t start, int mode) conf->nfaults = n+1; } -static void make_request(mddev_t *mddev, struct bio *bio) +static int make_request(mddev_t *mddev, struct bio *bio) { conf_t *conf = mddev->private; int failit = 0; @@ -181,7 +181,7 @@ static void make_request(mddev_t *mddev, struct bio *bio) * just fail immediately */ bio_endio(bio, -EIO); - return; + return 0; } if (check_sector(conf, bio->bi_sector, bio->bi_sector+(bio->bi_size>>9), @@ -211,15 +211,15 @@ static void make_request(mddev_t *mddev, struct bio *bio) } if (failit) { struct bio *b = bio_clone_mddev(bio, GFP_NOIO, mddev); - b->bi_bdev = conf->rdev->bdev; b->bi_private = bio; b->bi_end_io = faulty_fail; - bio = b; - } else + generic_make_request(b); + return 0; + } else { bio->bi_bdev = conf->rdev->bdev; - - generic_make_request(bio); + return 1; + } } static void status(struct seq_file *seq, mddev_t *mddev) diff --git a/trunk/drivers/md/linear.c b/trunk/drivers/md/linear.c index c6ee491d98e7..6cd2c313e800 100644 --- a/trunk/drivers/md/linear.c +++ b/trunk/drivers/md/linear.c @@ -264,14 +264,14 @@ static int linear_stop (mddev_t *mddev) return 0; } -static void linear_make_request (mddev_t *mddev, struct bio *bio) +static int linear_make_request (mddev_t *mddev, struct bio *bio) { dev_info_t *tmp_dev; sector_t start_sector; if (unlikely(bio->bi_rw & REQ_FLUSH)) { md_flush_request(mddev, bio); - return; + return 0; } rcu_read_lock(); @@ -293,7 +293,7 @@ static void linear_make_request (mddev_t *mddev, struct bio *bio) (unsigned long long)start_sector); rcu_read_unlock(); bio_io_error(bio); - return; + return 0; } if (unlikely(bio->bi_sector + (bio->bi_size >> 9) > tmp_dev->end_sector)) { @@ -307,17 +307,20 @@ static void linear_make_request (mddev_t *mddev, struct bio *bio) bp = bio_split(bio, end_sector - bio->bi_sector); - linear_make_request(mddev, &bp->bio1); - linear_make_request(mddev, &bp->bio2); + if (linear_make_request(mddev, &bp->bio1)) + generic_make_request(&bp->bio1); + if (linear_make_request(mddev, &bp->bio2)) + generic_make_request(&bp->bio2); bio_pair_release(bp); - return; + return 0; } bio->bi_bdev = tmp_dev->rdev->bdev; bio->bi_sector = bio->bi_sector - start_sector + tmp_dev->rdev->data_offset; rcu_read_unlock(); - generic_make_request(bio); + + return 1; } static void linear_status (struct seq_file *seq, mddev_t *mddev) diff --git a/trunk/drivers/md/md.c b/trunk/drivers/md/md.c index 8f52d4eb78a0..5c95ccb59500 100644 --- a/trunk/drivers/md/md.c +++ b/trunk/drivers/md/md.c @@ -335,17 +335,18 @@ static DEFINE_SPINLOCK(all_mddevs_lock); * call has finished, the bio has been linked into some internal structure * and so is visible to ->quiesce(), so we don't need the refcount any more. */ -static void md_make_request(struct request_queue *q, struct bio *bio) +static int md_make_request(struct request_queue *q, struct bio *bio) { const int rw = bio_data_dir(bio); mddev_t *mddev = q->queuedata; + int rv; int cpu; unsigned int sectors; if (mddev == NULL || mddev->pers == NULL || !mddev->ready) { bio_io_error(bio); - return; + return 0; } smp_rmb(); /* Ensure implications of 'active' are visible */ rcu_read_lock(); @@ -370,7 +371,7 @@ static void md_make_request(struct request_queue *q, struct bio *bio) * go away inside make_request */ sectors = bio_sectors(bio); - mddev->pers->make_request(mddev, bio); + rv = mddev->pers->make_request(mddev, bio); cpu = part_stat_lock(); part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); @@ -379,6 +380,8 @@ static void md_make_request(struct request_queue *q, struct bio *bio) if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended) wake_up(&mddev->sb_wait); + + return rv; } /* mddev_suspend makes sure no new requests are submitted @@ -477,7 +480,8 @@ static void md_submit_flush_data(struct work_struct *ws) bio_endio(bio, 0); else { bio->bi_rw &= ~REQ_FLUSH; - mddev->pers->make_request(mddev, bio); + if (mddev->pers->make_request(mddev, bio)) + generic_make_request(bio); } mddev->flush_bio = NULL; diff --git a/trunk/drivers/md/md.h b/trunk/drivers/md/md.h index 1509a3eb9ae1..0a309dc29b45 100644 --- a/trunk/drivers/md/md.h +++ b/trunk/drivers/md/md.h @@ -424,7 +424,7 @@ struct mdk_personality int level; struct list_head list; struct module *owner; - void (*make_request)(mddev_t *mddev, struct bio *bio); + int (*make_request)(mddev_t *mddev, struct bio *bio); int (*run)(mddev_t *mddev); int (*stop)(mddev_t *mddev); void (*status)(struct seq_file *seq, mddev_t *mddev); diff --git a/trunk/drivers/md/multipath.c b/trunk/drivers/md/multipath.c index 618dd9e22513..d5b5fb300171 100644 --- a/trunk/drivers/md/multipath.c +++ b/trunk/drivers/md/multipath.c @@ -106,7 +106,7 @@ static void multipath_end_request(struct bio *bio, int error) rdev_dec_pending(rdev, conf->mddev); } -static void multipath_make_request(mddev_t *mddev, struct bio * bio) +static int multipath_make_request(mddev_t *mddev, struct bio * bio) { multipath_conf_t *conf = mddev->private; struct multipath_bh * mp_bh; @@ -114,7 +114,7 @@ static void multipath_make_request(mddev_t *mddev, struct bio * bio) if (unlikely(bio->bi_rw & REQ_FLUSH)) { md_flush_request(mddev, bio); - return; + return 0; } mp_bh = mempool_alloc(conf->pool, GFP_NOIO); @@ -126,7 +126,7 @@ static void multipath_make_request(mddev_t *mddev, struct bio * bio) if (mp_bh->path < 0) { bio_endio(bio, -EIO); mempool_free(mp_bh, conf->pool); - return; + return 0; } multipath = conf->multipaths + mp_bh->path; @@ -137,7 +137,7 @@ static void multipath_make_request(mddev_t *mddev, struct bio * bio) mp_bh->bio.bi_end_io = multipath_end_request; mp_bh->bio.bi_private = mp_bh; generic_make_request(&mp_bh->bio); - return; + return 0; } static void multipath_status (struct seq_file *seq, mddev_t *mddev) diff --git a/trunk/drivers/md/raid0.c b/trunk/drivers/md/raid0.c index 4066615d61af..e86bf3682e1e 100644 --- a/trunk/drivers/md/raid0.c +++ b/trunk/drivers/md/raid0.c @@ -466,7 +466,7 @@ static inline int is_io_in_chunk_boundary(mddev_t *mddev, } } -static void raid0_make_request(mddev_t *mddev, struct bio *bio) +static int raid0_make_request(mddev_t *mddev, struct bio *bio) { unsigned int chunk_sects; sector_t sector_offset; @@ -475,7 +475,7 @@ static void raid0_make_request(mddev_t *mddev, struct bio *bio) if (unlikely(bio->bi_rw & REQ_FLUSH)) { md_flush_request(mddev, bio); - return; + return 0; } chunk_sects = mddev->chunk_sectors; @@ -495,10 +495,13 @@ static void raid0_make_request(mddev_t *mddev, struct bio *bio) else bp = bio_split(bio, chunk_sects - sector_div(sector, chunk_sects)); - raid0_make_request(mddev, &bp->bio1); - raid0_make_request(mddev, &bp->bio2); + if (raid0_make_request(mddev, &bp->bio1)) + generic_make_request(&bp->bio1); + if (raid0_make_request(mddev, &bp->bio2)) + generic_make_request(&bp->bio2); + bio_pair_release(bp); - return; + return 0; } sector_offset = bio->bi_sector; @@ -508,9 +511,10 @@ static void raid0_make_request(mddev_t *mddev, struct bio *bio) bio->bi_bdev = tmp_dev->bdev; bio->bi_sector = sector_offset + zone->dev_start + tmp_dev->data_offset; - - generic_make_request(bio); - return; + /* + * Let the main block layer submit the IO and resolve recursion: + */ + return 1; bad_map: printk("md/raid0:%s: make_request bug: can't convert block across chunks" @@ -519,7 +523,7 @@ static void raid0_make_request(mddev_t *mddev, struct bio *bio) (unsigned long long)bio->bi_sector, bio->bi_size >> 10); bio_io_error(bio); - return; + return 0; } static void raid0_status(struct seq_file *seq, mddev_t *mddev) diff --git a/trunk/drivers/md/raid1.c b/trunk/drivers/md/raid1.c index 2948a520f7ba..d9587dffe533 100644 --- a/trunk/drivers/md/raid1.c +++ b/trunk/drivers/md/raid1.c @@ -785,7 +785,7 @@ static void alloc_behind_pages(struct bio *bio, r1bio_t *r1_bio) PRINTK("%dB behind alloc failed, doing sync I/O\n", bio->bi_size); } -static void make_request(mddev_t *mddev, struct bio * bio) +static int make_request(mddev_t *mddev, struct bio * bio) { conf_t *conf = mddev->private; mirror_info_t *mirror; @@ -870,7 +870,7 @@ static void make_request(mddev_t *mddev, struct bio * bio) if (rdisk < 0) { /* couldn't find anywhere to read from */ raid_end_bio_io(r1_bio); - return; + return 0; } mirror = conf->mirrors + rdisk; @@ -928,7 +928,7 @@ static void make_request(mddev_t *mddev, struct bio * bio) goto read_again; } else generic_make_request(read_bio); - return; + return 0; } /* @@ -1123,6 +1123,8 @@ static void make_request(mddev_t *mddev, struct bio * bio) if (do_sync || !bitmap || !plugged) md_wakeup_thread(mddev->thread); + + return 0; } static void status(struct seq_file *seq, mddev_t *mddev) @@ -2172,6 +2174,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i bio->bi_next = NULL; bio->bi_flags &= ~(BIO_POOL_MASK-1); bio->bi_flags |= 1 << BIO_UPTODATE; + bio->bi_comp_cpu = -1; bio->bi_rw = READ; bio->bi_vcnt = 0; bio->bi_idx = 0; diff --git a/trunk/drivers/md/raid10.c b/trunk/drivers/md/raid10.c index ea5fc0b6a84c..0cd9672cf9cb 100644 --- a/trunk/drivers/md/raid10.c +++ b/trunk/drivers/md/raid10.c @@ -830,7 +830,7 @@ static void unfreeze_array(conf_t *conf) spin_unlock_irq(&conf->resync_lock); } -static void make_request(mddev_t *mddev, struct bio * bio) +static int make_request(mddev_t *mddev, struct bio * bio) { conf_t *conf = mddev->private; mirror_info_t *mirror; @@ -849,7 +849,7 @@ static void make_request(mddev_t *mddev, struct bio * bio) if (unlikely(bio->bi_rw & REQ_FLUSH)) { md_flush_request(mddev, bio); - return; + return 0; } /* If this request crosses a chunk boundary, we need to @@ -881,8 +881,10 @@ static void make_request(mddev_t *mddev, struct bio * bio) conf->nr_waiting++; spin_unlock_irq(&conf->resync_lock); - make_request(mddev, &bp->bio1); - make_request(mddev, &bp->bio2); + if (make_request(mddev, &bp->bio1)) + generic_make_request(&bp->bio1); + if (make_request(mddev, &bp->bio2)) + generic_make_request(&bp->bio2); spin_lock_irq(&conf->resync_lock); conf->nr_waiting--; @@ -890,14 +892,14 @@ static void make_request(mddev_t *mddev, struct bio * bio) spin_unlock_irq(&conf->resync_lock); bio_pair_release(bp); - return; + return 0; bad_map: printk("md/raid10:%s: make_request bug: can't convert block across chunks" " or bigger than %dk %llu %d\n", mdname(mddev), chunk_sects/2, (unsigned long long)bio->bi_sector, bio->bi_size >> 10); bio_io_error(bio); - return; + return 0; } md_write_start(mddev, bio); @@ -940,7 +942,7 @@ static void make_request(mddev_t *mddev, struct bio * bio) slot = r10_bio->read_slot; if (disk < 0) { raid_end_bio_io(r10_bio); - return; + return 0; } mirror = conf->mirrors + disk; @@ -988,7 +990,7 @@ static void make_request(mddev_t *mddev, struct bio * bio) goto read_again; } else generic_make_request(read_bio); - return; + return 0; } /* @@ -1156,6 +1158,7 @@ static void make_request(mddev_t *mddev, struct bio * bio) if (do_sync || !mddev->bitmap || !plugged) md_wakeup_thread(mddev->thread); + return 0; } static void status(struct seq_file *seq, mddev_t *mddev) diff --git a/trunk/drivers/md/raid5.c b/trunk/drivers/md/raid5.c index 83f2c44e170f..ac5e8b57e50f 100644 --- a/trunk/drivers/md/raid5.c +++ b/trunk/drivers/md/raid5.c @@ -3695,7 +3695,7 @@ static struct stripe_head *__get_priority_stripe(raid5_conf_t *conf) return sh; } -static void make_request(mddev_t *mddev, struct bio * bi) +static int make_request(mddev_t *mddev, struct bio * bi) { raid5_conf_t *conf = mddev->private; int dd_idx; @@ -3708,7 +3708,7 @@ static void make_request(mddev_t *mddev, struct bio * bi) if (unlikely(bi->bi_rw & REQ_FLUSH)) { md_flush_request(mddev, bi); - return; + return 0; } md_write_start(mddev, bi); @@ -3716,7 +3716,7 @@ static void make_request(mddev_t *mddev, struct bio * bi) if (rw == READ && mddev->reshape_position == MaxSector && chunk_aligned_read(mddev,bi)) - return; + return 0; logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1); last_sector = bi->bi_sector + (bi->bi_size>>9); @@ -3851,6 +3851,8 @@ static void make_request(mddev_t *mddev, struct bio * bi) bio_endio(bi, 0); } + + return 0; } static sector_t raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks); diff --git a/trunk/drivers/s390/block/dcssblk.c b/trunk/drivers/s390/block/dcssblk.c index a5a55da2a1ac..9b43ae94beba 100644 --- a/trunk/drivers/s390/block/dcssblk.c +++ b/trunk/drivers/s390/block/dcssblk.c @@ -27,7 +27,7 @@ static int dcssblk_open(struct block_device *bdev, fmode_t mode); static int dcssblk_release(struct gendisk *disk, fmode_t mode); -static void dcssblk_make_request(struct request_queue *q, struct bio *bio); +static int dcssblk_make_request(struct request_queue *q, struct bio *bio); static int dcssblk_direct_access(struct block_device *bdev, sector_t secnum, void **kaddr, unsigned long *pfn); @@ -814,7 +814,7 @@ dcssblk_release(struct gendisk *disk, fmode_t mode) return rc; } -static void +static int dcssblk_make_request(struct request_queue *q, struct bio *bio) { struct dcssblk_dev_info *dev_info; @@ -871,9 +871,10 @@ dcssblk_make_request(struct request_queue *q, struct bio *bio) bytes_done += bvec->bv_len; } bio_endio(bio, 0); - return; + return 0; fail: bio_io_error(bio); + return 0; } static int diff --git a/trunk/drivers/s390/block/xpram.c b/trunk/drivers/s390/block/xpram.c index 98f3e4ade924..1f6a4d894e73 100644 --- a/trunk/drivers/s390/block/xpram.c +++ b/trunk/drivers/s390/block/xpram.c @@ -181,7 +181,7 @@ static unsigned long xpram_highest_page_index(void) /* * Block device make request function. */ -static void xpram_make_request(struct request_queue *q, struct bio *bio) +static int xpram_make_request(struct request_queue *q, struct bio *bio) { xpram_device_t *xdev = bio->bi_bdev->bd_disk->private_data; struct bio_vec *bvec; @@ -221,9 +221,10 @@ static void xpram_make_request(struct request_queue *q, struct bio *bio) } set_bit(BIO_UPTODATE, &bio->bi_flags); bio_endio(bio, 0); - return; + return 0; fail: bio_io_error(bio); + return 0; } static int xpram_getgeo(struct block_device *bdev, struct hd_geometry *geo) diff --git a/trunk/drivers/staging/zram/zram_drv.c b/trunk/drivers/staging/zram/zram_drv.c index 02589cab6710..d70ec1ad10de 100644 --- a/trunk/drivers/staging/zram/zram_drv.c +++ b/trunk/drivers/staging/zram/zram_drv.c @@ -556,22 +556,24 @@ static inline int valid_io_request(struct zram *zram, struct bio *bio) /* * Handler function for all zram I/O requests. */ -static void zram_make_request(struct request_queue *queue, struct bio *bio) +static int zram_make_request(struct request_queue *queue, struct bio *bio) { struct zram *zram = queue->queuedata; if (!valid_io_request(zram, bio)) { zram_stat64_inc(zram, &zram->stats.invalid_io); bio_io_error(bio); - return; + return 0; } if (unlikely(!zram->init_done) && zram_init_device(zram)) { bio_io_error(bio); - return; + return 0; } __zram_make_request(zram, bio, bio_data_dir(bio)); + + return 0; } void zram_reset_device(struct zram *zram) diff --git a/trunk/fs/bio.c b/trunk/fs/bio.c index 41c93c722244..9bfade8a609b 100644 --- a/trunk/fs/bio.c +++ b/trunk/fs/bio.c @@ -255,6 +255,7 @@ void bio_init(struct bio *bio) { memset(bio, 0, sizeof(*bio)); bio->bi_flags = 1 << BIO_UPTODATE; + bio->bi_comp_cpu = -1; atomic_set(&bio->bi_cnt, 1); } EXPORT_SYMBOL(bio_init); diff --git a/trunk/fs/block_dev.c b/trunk/fs/block_dev.c index 1c44b8d54504..95f786ec7f08 100644 --- a/trunk/fs/block_dev.c +++ b/trunk/fs/block_dev.c @@ -1085,7 +1085,6 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part); static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) { struct gendisk *disk; - struct module *owner; int ret; int partno; int perm = 0; @@ -1111,7 +1110,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) disk = get_gendisk(bdev->bd_dev, &partno); if (!disk) goto out; - owner = disk->fops->owner; disk_block_events(disk); mutex_lock_nested(&bdev->bd_mutex, for_part); @@ -1139,8 +1137,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) bdev->bd_disk = NULL; mutex_unlock(&bdev->bd_mutex); disk_unblock_events(disk); + module_put(disk->fops->owner); put_disk(disk); - module_put(owner); goto restart; } } @@ -1196,8 +1194,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) goto out_unlock_bdev; } /* only one opener holds refs to the module and disk */ + module_put(disk->fops->owner); put_disk(disk); - module_put(owner); } bdev->bd_openers++; if (for_part) @@ -1217,8 +1215,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) out_unlock_bdev: mutex_unlock(&bdev->bd_mutex); disk_unblock_events(disk); + module_put(disk->fops->owner); put_disk(disk); - module_put(owner); out: bdput(bdev); @@ -1444,15 +1442,14 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) if (!bdev->bd_openers) { struct module *owner = disk->fops->owner; + put_disk(disk); + module_put(owner); disk_put_part(bdev->bd_part); bdev->bd_part = NULL; bdev->bd_disk = NULL; if (bdev != bdev->bd_contains) victim = bdev->bd_contains; bdev->bd_contains = NULL; - - put_disk(disk); - module_put(owner); } mutex_unlock(&bdev->bd_mutex); bdput(bdev); diff --git a/trunk/include/linux/bio.h b/trunk/include/linux/bio.h index a3c071c9e189..ce33e6868a2f 100644 --- a/trunk/include/linux/bio.h +++ b/trunk/include/linux/bio.h @@ -268,6 +268,14 @@ extern struct bio_vec *bvec_alloc_bs(gfp_t, int, unsigned long *, struct bio_set extern void bvec_free_bs(struct bio_set *, struct bio_vec *, unsigned int); extern unsigned int bvec_nr_vecs(unsigned short idx); +/* + * Allow queuer to specify a completion CPU for this bio + */ +static inline void bio_set_completion_cpu(struct bio *bio, unsigned int cpu) +{ + bio->bi_comp_cpu = cpu; +} + /* * bio_set is used to allow other portions of the IO system to * allocate their own private memory pools for bio and iovec structures. diff --git a/trunk/include/linux/blk_types.h b/trunk/include/linux/blk_types.h index 4053cbd4490e..71fc53bb8f1c 100644 --- a/trunk/include/linux/blk_types.h +++ b/trunk/include/linux/blk_types.h @@ -59,6 +59,8 @@ struct bio { unsigned int bi_max_vecs; /* max bvl_vecs we can hold */ + unsigned int bi_comp_cpu; /* completion CPU */ + atomic_t bi_cnt; /* pin count */ struct bio_vec *bi_io_vec; /* the actual vec list */ @@ -91,10 +93,11 @@ struct bio { #define BIO_BOUNCED 5 /* bio is a bounce bio */ #define BIO_USER_MAPPED 6 /* contains user pages */ #define BIO_EOPNOTSUPP 7 /* not supported */ -#define BIO_NULL_MAPPED 8 /* contains invalid user pages */ -#define BIO_FS_INTEGRITY 9 /* fs owns integrity data, not block layer */ -#define BIO_QUIET 10 /* Make BIO Quiet */ -#define BIO_MAPPED_INTEGRITY 11/* integrity metadata has been remapped */ +#define BIO_CPU_AFFINE 8 /* complete bio on same CPU as submitted */ +#define BIO_NULL_MAPPED 9 /* contains invalid user pages */ +#define BIO_FS_INTEGRITY 10 /* fs owns integrity data, not block layer */ +#define BIO_QUIET 11 /* Make BIO Quiet */ +#define BIO_MAPPED_INTEGRITY 12/* integrity metadata has been remapped */ #define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag))) /* diff --git a/trunk/include/linux/blkdev.h b/trunk/include/linux/blkdev.h index 5267cd2f20dc..7fbaa9103344 100644 --- a/trunk/include/linux/blkdev.h +++ b/trunk/include/linux/blkdev.h @@ -195,7 +195,7 @@ struct request_pm_state #include typedef void (request_fn_proc) (struct request_queue *q); -typedef void (make_request_fn) (struct request_queue *q, struct bio *bio); +typedef int (make_request_fn) (struct request_queue *q, struct bio *bio); typedef int (prep_rq_fn) (struct request_queue *, struct request *); typedef void (unprep_rq_fn) (struct request_queue *, struct request *); @@ -680,8 +680,6 @@ extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t, extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t, struct scsi_ioctl_command __user *); -extern void blk_queue_bio(struct request_queue *q, struct bio *bio); - /* * A queue has just exitted congestion. Note this in the global counter of * congested queues, and wake up anyone who was waiting for requests to be @@ -865,22 +863,16 @@ struct request_queue *blk_alloc_queue_node(gfp_t, int); extern void blk_put_queue(struct request_queue *); /* - * blk_plug permits building a queue of related requests by holding the I/O - * fragments for a short period. This allows merging of sequential requests - * into single larger request. As the requests are moved from a per-task list to - * the device's request_queue in a batch, this results in improved scalability - * as the lock contention for request_queue lock is reduced. - * - * It is ok not to disable preemption when adding the request to the plug list - * or when attempting a merge, because blk_schedule_flush_list() will only flush - * the plug list when the task sleeps by itself. For details, please see - * schedule() where blk_schedule_flush_plug() is called. + * Note: Code in between changing the blk_plug list/cb_list or element of such + * lists is preemptable, but such code can't do sleep (or be very careful), + * otherwise data is corrupted. For details, please check schedule() where + * blk_schedule_flush_plug() is called. */ struct blk_plug { - unsigned long magic; /* detect uninitialized use-cases */ - struct list_head list; /* requests */ - struct list_head cb_list; /* md requires an unplug callback */ - unsigned int should_sort; /* list to be sorted before flushing? */ + unsigned long magic; + struct list_head list; + struct list_head cb_list; + unsigned int should_sort; }; #define BLK_MAX_REQUEST_COUNT 16 @@ -1197,6 +1189,20 @@ static inline uint64_t rq_io_start_time_ns(struct request *req) } #endif +#ifdef CONFIG_BLK_DEV_THROTTLING +extern int blk_throtl_init(struct request_queue *q); +extern void blk_throtl_exit(struct request_queue *q); +extern int blk_throtl_bio(struct request_queue *q, struct bio **bio); +#else /* CONFIG_BLK_DEV_THROTTLING */ +static inline int blk_throtl_bio(struct request_queue *q, struct bio **bio) +{ + return 0; +} + +static inline int blk_throtl_init(struct request_queue *q) { return 0; } +static inline int blk_throtl_exit(struct request_queue *q) { return 0; } +#endif /* CONFIG_BLK_DEV_THROTTLING */ + #define MODULE_ALIAS_BLOCKDEV(major,minor) \ MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor)) #define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \ diff --git a/trunk/include/linux/elevator.h b/trunk/include/linux/elevator.h index 1d0f7a2ff73b..d800d5142184 100644 --- a/trunk/include/linux/elevator.h +++ b/trunk/include/linux/elevator.h @@ -38,12 +38,6 @@ struct elevator_ops elevator_merged_fn *elevator_merged_fn; elevator_merge_req_fn *elevator_merge_req_fn; elevator_allow_merge_fn *elevator_allow_merge_fn; - - /* - * Used for both plugged list and elevator merging and in the - * former case called without queue_lock. Read comment on top of - * attempt_plug_merge() for details. - */ elevator_bio_merged_fn *elevator_bio_merged_fn; elevator_dispatch_fn *elevator_dispatch_fn; diff --git a/trunk/include/linux/loop.h b/trunk/include/linux/loop.h index 683d69890119..a06880689115 100644 --- a/trunk/include/linux/loop.h +++ b/trunk/include/linux/loop.h @@ -73,7 +73,6 @@ struct loop_device { */ enum { LO_FLAGS_READ_ONLY = 1, - LO_FLAGS_USE_AOPS = 2, LO_FLAGS_AUTOCLEAR = 4, }; diff --git a/trunk/kernel/sys.c b/trunk/kernel/sys.c index 1dbbe695a5ef..18ee1d2f6474 100644 --- a/trunk/kernel/sys.c +++ b/trunk/kernel/sys.c @@ -1172,7 +1172,7 @@ DECLARE_RWSEM(uts_sem); static int override_release(char __user *release, int len) { int ret = 0; - char buf[65]; + char buf[len]; if (current->personality & UNAME26) { char *rest = UTS_RELEASE; diff --git a/trunk/mm/bounce.c b/trunk/mm/bounce.c index 434fb4f0c5e4..1481de68184b 100644 --- a/trunk/mm/bounce.c +++ b/trunk/mm/bounce.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include @@ -27,10 +26,12 @@ static mempool_t *page_pool, *isa_page_pool; #ifdef CONFIG_HIGHMEM static __init int init_emergency_pool(void) { -#ifndef CONFIG_MEMORY_HOTPLUG - if (max_pfn <= max_low_pfn) + struct sysinfo i; + si_meminfo(&i); + si_swapinfo(&i); + + if (!i.totalhigh) return 0; -#endif page_pool = mempool_create_page_pool(POOL_SIZE, 0); BUG_ON(!page_pool);