diff --git a/[refs] b/[refs]
index a89cdad6c8d1..773437c1ad6e 100644
--- a/[refs]
+++ b/[refs]
@@ -1,2 +1,2 @@
 ---
-refs/heads/master: 9562ad9ab36df7ccef920d119f3b5100025db95f
+refs/heads/master: 456be1484ffc72a24bdb4200b5847c4fa90139d9
diff --git a/trunk/Makefile b/trunk/Makefile
index 2652089bf541..31f967c31e7f 100644
--- a/trunk/Makefile
+++ b/trunk/Makefile
@@ -1,7 +1,7 @@
 VERSION = 3
 PATCHLEVEL = 1
 SUBLEVEL = 0
-EXTRAVERSION = -rc10
+EXTRAVERSION = -rc9
 NAME = "Divemaster Edition"
 
 # *DOCUMENTATION*
diff --git a/trunk/arch/m68k/emu/nfblock.c b/trunk/arch/m68k/emu/nfblock.c
index e3011338ab40..48e50f8c1c7e 100644
--- a/trunk/arch/m68k/emu/nfblock.c
+++ b/trunk/arch/m68k/emu/nfblock.c
@@ -59,7 +59,7 @@ struct nfhd_device {
 	struct gendisk *disk;
 };
 
-static void nfhd_make_request(struct request_queue *queue, struct bio *bio)
+static int nfhd_make_request(struct request_queue *queue, struct bio *bio)
 {
 	struct nfhd_device *dev = queue->queuedata;
 	struct bio_vec *bvec;
@@ -76,6 +76,7 @@ static void nfhd_make_request(struct request_queue *queue, struct bio *bio)
 		sec += len;
 	}
 	bio_endio(bio, 0);
+	return 0;
 }
 
 static int nfhd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
diff --git a/trunk/arch/powerpc/sysdev/axonram.c b/trunk/arch/powerpc/sysdev/axonram.c
index ba4271919062..265f0f09395a 100644
--- a/trunk/arch/powerpc/sysdev/axonram.c
+++ b/trunk/arch/powerpc/sysdev/axonram.c
@@ -104,7 +104,7 @@ axon_ram_irq_handler(int irq, void *dev)
  * axon_ram_make_request - make_request() method for block device
  * @queue, @bio: see blk_queue_make_request()
  */
-static void
+static int
 axon_ram_make_request(struct request_queue *queue, struct bio *bio)
 {
 	struct axon_ram_bank *bank = bio->bi_bdev->bd_disk->private_data;
@@ -113,6 +113,7 @@ axon_ram_make_request(struct request_queue *queue, struct bio *bio)
 	struct bio_vec *vec;
 	unsigned int transfered;
 	unsigned short idx;
+	int rc = 0;
 
 	phys_mem = bank->io_addr + (bio->bi_sector << AXON_RAM_SECTOR_SHIFT);
 	phys_end = bank->io_addr + bank->size;
@@ -120,7 +121,8 @@ axon_ram_make_request(struct request_queue *queue, struct bio *bio)
 	bio_for_each_segment(vec, bio, idx) {
 		if (unlikely(phys_mem + vec->bv_len > phys_end)) {
 			bio_io_error(bio);
-			return;
+			rc = -ERANGE;
+			break;
 		}
 
 		user_mem = page_address(vec->bv_page) + vec->bv_offset;
@@ -133,6 +135,8 @@ axon_ram_make_request(struct request_queue *queue, struct bio *bio)
 		transfered += vec->bv_len;
 	}
 	bio_endio(bio, 0);
+
+	return rc;
 }
 
 /**
diff --git a/trunk/block/blk-cgroup.c b/trunk/block/blk-cgroup.c
index d61ec5636ce0..b596e54ddd71 100644
--- a/trunk/block/blk-cgroup.c
+++ b/trunk/block/blk-cgroup.c
@@ -768,14 +768,25 @@ static uint64_t blkio_get_stat(struct blkio_group *blkg,
 	return disk_total;
 }
 
+static int blkio_check_dev_num(dev_t dev)
+{
+	int part = 0;
+	struct gendisk *disk;
+
+	disk = get_gendisk(dev, &part);
+	if (!disk || part)
+		return -ENODEV;
+
+	return 0;
+}
+
 static int blkio_policy_parse_and_set(char *buf,
 	struct blkio_policy_node *newpn, enum blkio_policy_id plid, int fileid)
 {
-	struct gendisk *disk = NULL;
 	char *s[4], *p, *major_s = NULL, *minor_s = NULL;
+	int ret;
 	unsigned long major, minor;
-	int i = 0, ret = -EINVAL;
-	int part;
+	int i = 0;
 	dev_t dev;
 	u64 temp;
 
@@ -793,36 +804,37 @@ static int blkio_policy_parse_and_set(char *buf,
 	}
 
 	if (i != 2)
-		goto out;
+		return -EINVAL;
 
 	p = strsep(&s[0], ":");
 	if (p != NULL)
 		major_s = p;
 	else
-		goto out;
+		return -EINVAL;
 
 	minor_s = s[0];
 	if (!minor_s)
-		goto out;
+		return -EINVAL;
 
-	if (strict_strtoul(major_s, 10, &major))
-		goto out;
+	ret = strict_strtoul(major_s, 10, &major);
+	if (ret)
+		return -EINVAL;
 
-	if (strict_strtoul(minor_s, 10, &minor))
-		goto out;
+	ret = strict_strtoul(minor_s, 10, &minor);
+	if (ret)
+		return -EINVAL;
 
 	dev = MKDEV(major, minor);
 
-	if (strict_strtoull(s[1], 10, &temp))
-		goto out;
+	ret = strict_strtoull(s[1], 10, &temp);
+	if (ret)
+		return -EINVAL;
 
 	/* For rule removal, do not check for device presence. */
 	if (temp) {
-		disk = get_gendisk(dev, &part);
-		if (!disk || part) {
-			ret = -ENODEV;
-			goto out;
-		}
+		ret = blkio_check_dev_num(dev);
+		if (ret)
+			return ret;
 	}
 
 	newpn->dev = dev;
@@ -831,7 +843,7 @@ static int blkio_policy_parse_and_set(char *buf,
 	case BLKIO_POLICY_PROP:
 		if ((temp < BLKIO_WEIGHT_MIN && temp > 0) ||
 		     temp > BLKIO_WEIGHT_MAX)
-			goto out;
+			return -EINVAL;
 
 		newpn->plid = plid;
 		newpn->fileid = fileid;
@@ -848,7 +860,7 @@ static int blkio_policy_parse_and_set(char *buf,
 		case BLKIO_THROTL_read_iops_device:
 		case BLKIO_THROTL_write_iops_device:
 			if (temp > THROTL_IOPS_MAX)
-				goto out;
+				return -EINVAL;
 
 			newpn->plid = plid;
 			newpn->fileid = fileid;
@@ -859,10 +871,8 @@ static int blkio_policy_parse_and_set(char *buf,
 	default:
 		BUG();
 	}
-	ret = 0;
-out:
-	put_disk(disk);
-	return ret;
+
+	return 0;
 }
 
 unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg,
diff --git a/trunk/block/blk-cgroup.h b/trunk/block/blk-cgroup.h
index 6f3ace7e792f..a71d2904ffb9 100644
--- a/trunk/block/blk-cgroup.h
+++ b/trunk/block/blk-cgroup.h
@@ -188,7 +188,7 @@ struct blkio_policy_node {
 	union {
 		unsigned int weight;
 		/*
-		 * Rate read/write in terms of bytes per second
+		 * Rate read/write in terms of byptes per second
 		 * Whether this rate represents read or write is determined
 		 * by file type "fileid".
 		 */
diff --git a/trunk/block/blk-core.c b/trunk/block/blk-core.c
index da697936d220..d34433ae7917 100644
--- a/trunk/block/blk-core.c
+++ b/trunk/block/blk-core.c
@@ -28,7 +28,6 @@
 #include <linux/task_io_accounting_ops.h>
 #include <linux/fault-inject.h>
 #include <linux/list_sort.h>
-#include <linux/delay.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/block.h>
@@ -39,6 +38,8 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
 
+static int __make_request(struct request_queue *q, struct bio *bio);
+
 /*
  * For the allocated request tables
  */
@@ -346,75 +347,30 @@ void blk_put_queue(struct request_queue *q)
 }
 EXPORT_SYMBOL(blk_put_queue);
 
-/**
- * blk_drain_queue - drain requests from request_queue
- * @q: queue to drain
- * @drain_all: whether to drain all requests or only the ones w/ ELVPRIV
- *
- * Drain requests from @q.  If @drain_all is set, all requests are drained.
- * If not, only ELVPRIV requests are drained.  The caller is responsible
- * for ensuring that no new requests which need to be drained are queued.
- */
-void blk_drain_queue(struct request_queue *q, bool drain_all)
-{
-	while (true) {
-		int nr_rqs;
-
-		spin_lock_irq(q->queue_lock);
-
-		elv_drain_elevator(q);
-		if (drain_all)
-			blk_throtl_drain(q);
-
-		__blk_run_queue(q);
-
-		if (drain_all)
-			nr_rqs = q->rq.count[0] + q->rq.count[1];
-		else
-			nr_rqs = q->rq.elvpriv;
-
-		spin_unlock_irq(q->queue_lock);
-
-		if (!nr_rqs)
-			break;
-		msleep(10);
-	}
-}
-
-/**
- * blk_cleanup_queue - shutdown a request queue
- * @q: request queue to shutdown
- *
- * Mark @q DEAD, drain all pending requests, destroy and put it.  All
- * future requests will be failed immediately with -ENODEV.
+/*
+ * Note: If a driver supplied the queue lock, it is disconnected
+ * by this function. The actual state of the lock doesn't matter
+ * here as the request_queue isn't accessible after this point
+ * (QUEUE_FLAG_DEAD is set) and no other requests will be queued.
  */
 void blk_cleanup_queue(struct request_queue *q)
 {
-	spinlock_t *lock = q->queue_lock;
+	/*
+	 * We know we have process context here, so we can be a little
+	 * cautious and ensure that pending block actions on this device
+	 * are done before moving on. Going into this function, we should
+	 * not have processes doing IO to this device.
+	 */
+	blk_sync_queue(q);
 
-	/* mark @q DEAD, no new request or merges will be allowed afterwards */
+	del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer);
 	mutex_lock(&q->sysfs_lock);
 	queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q);
-
-	spin_lock_irq(lock);
-	queue_flag_set(QUEUE_FLAG_NOMERGES, q);
-	queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
-	queue_flag_set(QUEUE_FLAG_DEAD, q);
+	mutex_unlock(&q->sysfs_lock);
 
 	if (q->queue_lock != &q->__queue_lock)
 		q->queue_lock = &q->__queue_lock;
 
-	spin_unlock_irq(lock);
-	mutex_unlock(&q->sysfs_lock);
-
-	/* drain all requests queued before DEAD marking */
-	blk_drain_queue(q, true);
-
-	/* @q won't process any more request, flush async actions */
-	del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer);
-	blk_sync_queue(q);
-
-	/* @q is and will stay empty, shutdown and put */
 	blk_put_queue(q);
 }
 EXPORT_SYMBOL(blk_cleanup_queue);
@@ -585,7 +541,7 @@ blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn,
 	/*
 	 * This also sets hw/phys segments, boundary and size
 	 */
-	blk_queue_make_request(q, blk_queue_bio);
+	blk_queue_make_request(q, __make_request);
 
 	q->sg_reserved_size = INT_MAX;
 
@@ -620,7 +576,7 @@ static inline void blk_free_request(struct request_queue *q, struct request *rq)
 }
 
 static struct request *
-blk_alloc_request(struct request_queue *q, unsigned int flags, gfp_t gfp_mask)
+blk_alloc_request(struct request_queue *q, int flags, int priv, gfp_t gfp_mask)
 {
 	struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
 
@@ -631,10 +587,12 @@ blk_alloc_request(struct request_queue *q, unsigned int flags, gfp_t gfp_mask)
 
 	rq->cmd_flags = flags | REQ_ALLOCED;
 
-	if ((flags & REQ_ELVPRIV) &&
-	    unlikely(elv_set_request(q, rq, gfp_mask))) {
-		mempool_free(rq, q->rq.rq_pool);
-		return NULL;
+	if (priv) {
+		if (unlikely(elv_set_request(q, rq, gfp_mask))) {
+			mempool_free(rq, q->rq.rq_pool);
+			return NULL;
+		}
+		rq->cmd_flags |= REQ_ELVPRIV;
 	}
 
 	return rq;
@@ -693,13 +651,12 @@ static void __freed_request(struct request_queue *q, int sync)
  * A request has just been released.  Account for it, update the full and
  * congestion status, wake up any waiters.   Called under q->queue_lock.
  */
-static void freed_request(struct request_queue *q, unsigned int flags)
+static void freed_request(struct request_queue *q, int sync, int priv)
 {
 	struct request_list *rl = &q->rq;
-	int sync = rw_is_sync(flags);
 
 	rl->count[sync]--;
-	if (flags & REQ_ELVPRIV)
+	if (priv)
 		rl->elvpriv--;
 
 	__freed_request(q, sync);
@@ -727,19 +684,10 @@ static bool blk_rq_should_init_elevator(struct bio *bio)
 	return true;
 }
 
-/**
- * get_request - get a free request
- * @q: request_queue to allocate request from
- * @rw_flags: RW and SYNC flags
- * @bio: bio to allocate request for (can be %NULL)
- * @gfp_mask: allocation mask
- *
- * Get a free request from @q.  This function may fail under memory
- * pressure or if @q is dead.
- *
- * Must be callled with @q->queue_lock held and,
- * Returns %NULL on failure, with @q->queue_lock held.
- * Returns !%NULL on success, with @q->queue_lock *not held*.
+/*
+ * Get a free request, queue_lock must be held.
+ * Returns NULL on failure, with queue_lock held.
+ * Returns !NULL on success, with queue_lock *not held*.
  */
 static struct request *get_request(struct request_queue *q, int rw_flags,
 				   struct bio *bio, gfp_t gfp_mask)
@@ -748,10 +696,7 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
 	struct request_list *rl = &q->rq;
 	struct io_context *ioc = NULL;
 	const bool is_sync = rw_is_sync(rw_flags) != 0;
-	int may_queue;
-
-	if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
-		return NULL;
+	int may_queue, priv = 0;
 
 	may_queue = elv_may_queue(q, rw_flags);
 	if (may_queue == ELV_MQUEUE_NO)
@@ -795,17 +740,17 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
 	rl->count[is_sync]++;
 	rl->starved[is_sync] = 0;
 
-	if (blk_rq_should_init_elevator(bio) &&
-	    !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags)) {
-		rw_flags |= REQ_ELVPRIV;
-		rl->elvpriv++;
+	if (blk_rq_should_init_elevator(bio)) {
+		priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
+		if (priv)
+			rl->elvpriv++;
 	}
 
 	if (blk_queue_io_stat(q))
 		rw_flags |= REQ_IO_STAT;
 	spin_unlock_irq(q->queue_lock);
 
-	rq = blk_alloc_request(q, rw_flags, gfp_mask);
+	rq = blk_alloc_request(q, rw_flags, priv, gfp_mask);
 	if (unlikely(!rq)) {
 		/*
 		 * Allocation failed presumably due to memory. Undo anything
@@ -815,7 +760,7 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
 		 * wait queue, but this is pretty rare.
 		 */
 		spin_lock_irq(q->queue_lock);
-		freed_request(q, rw_flags);
+		freed_request(q, is_sync, priv);
 
 		/*
 		 * in the very unlikely event that allocation failed and no
@@ -845,18 +790,11 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
 	return rq;
 }
 
-/**
- * get_request_wait - get a free request with retry
- * @q: request_queue to allocate request from
- * @rw_flags: RW and SYNC flags
- * @bio: bio to allocate request for (can be %NULL)
- *
- * Get a free request from @q.  This function keeps retrying under memory
- * pressure and fails iff @q is dead.
+/*
+ * No available requests for this queue, wait for some requests to become
+ * available.
  *
- * Must be callled with @q->queue_lock held and,
- * Returns %NULL on failure, with @q->queue_lock held.
- * Returns !%NULL on success, with @q->queue_lock *not held*.
+ * Called with q->queue_lock held, and returns with it unlocked.
  */
 static struct request *get_request_wait(struct request_queue *q, int rw_flags,
 					struct bio *bio)
@@ -870,9 +808,6 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags,
 		struct io_context *ioc;
 		struct request_list *rl = &q->rq;
 
-		if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
-			return NULL;
-
 		prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,
 				TASK_UNINTERRUPTIBLE);
 
@@ -903,15 +838,19 @@ struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
 {
 	struct request *rq;
 
+	if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
+		return NULL;
+
 	BUG_ON(rw != READ && rw != WRITE);
 
 	spin_lock_irq(q->queue_lock);
-	if (gfp_mask & __GFP_WAIT)
+	if (gfp_mask & __GFP_WAIT) {
 		rq = get_request_wait(q, rw, NULL);
-	else
+	} else {
 		rq = get_request(q, rw, NULL, gfp_mask);
-	if (!rq)
-		spin_unlock_irq(q->queue_lock);
+		if (!rq)
+			spin_unlock_irq(q->queue_lock);
+	}
 	/* q->queue_lock is unlocked at this point */
 
 	return rq;
@@ -1113,13 +1052,14 @@ void __blk_put_request(struct request_queue *q, struct request *req)
 	 * it didn't come out of our reserved rq pools
 	 */
 	if (req->cmd_flags & REQ_ALLOCED) {
-		unsigned int flags = req->cmd_flags;
+		int is_sync = rq_is_sync(req) != 0;
+		int priv = req->cmd_flags & REQ_ELVPRIV;
 
 		BUG_ON(!list_empty(&req->queuelist));
 		BUG_ON(!hlist_unhashed(&req->hash));
 
 		blk_free_request(q, req);
-		freed_request(q, flags);
+		freed_request(q, is_sync, priv);
 	}
 }
 EXPORT_SYMBOL_GPL(__blk_put_request);
@@ -1221,32 +1161,18 @@ static bool bio_attempt_front_merge(struct request_queue *q,
 	return true;
 }
 
-/**
- * attempt_plug_merge - try to merge with %current's plugged list
- * @q: request_queue new bio is being queued at
- * @bio: new bio being queued
- * @request_count: out parameter for number of traversed plugged requests
- *
- * Determine whether @bio being queued on @q can be merged with a request
- * on %current's plugged list.  Returns %true if merge was successful,
- * otherwise %false.
- *
- * This function is called without @q->queue_lock; however, elevator is
- * accessed iff there already are requests on the plugged list which in
- * turn guarantees validity of the elevator.
- *
- * Note that, on successful merge, elevator operation
- * elevator_bio_merged_fn() will be called without queue lock.  Elevator
- * must be ready for this.
+/*
+ * Attempts to merge with the plugged list in the current process. Returns
+ * true if merge was successful, otherwise false.
  */
-static bool attempt_plug_merge(struct request_queue *q, struct bio *bio,
-			       unsigned int *request_count)
+static bool attempt_plug_merge(struct task_struct *tsk, struct request_queue *q,
+			       struct bio *bio, unsigned int *request_count)
 {
 	struct blk_plug *plug;
 	struct request *rq;
 	bool ret = false;
 
-	plug = current->plug;
+	plug = tsk->plug;
 	if (!plug)
 		goto out;
 	*request_count = 0;
@@ -1276,6 +1202,7 @@ static bool attempt_plug_merge(struct request_queue *q, struct bio *bio,
 
 void init_request_from_bio(struct request *req, struct bio *bio)
 {
+	req->cpu = bio->bi_comp_cpu;
 	req->cmd_type = REQ_TYPE_FS;
 
 	req->cmd_flags |= bio->bi_rw & REQ_COMMON_MASK;
@@ -1288,7 +1215,7 @@ void init_request_from_bio(struct request *req, struct bio *bio)
 	blk_rq_bio_prep(req->q, req, bio);
 }
 
-void blk_queue_bio(struct request_queue *q, struct bio *bio)
+static int __make_request(struct request_queue *q, struct bio *bio)
 {
 	const bool sync = !!(bio->bi_rw & REQ_SYNC);
 	struct blk_plug *plug;
@@ -1313,8 +1240,8 @@ void blk_queue_bio(struct request_queue *q, struct bio *bio)
 	 * Check if we can merge with the plugged list before grabbing
 	 * any locks.
 	 */
-	if (attempt_plug_merge(q, bio, &request_count))
-		return;
+	if (attempt_plug_merge(current, q, bio, &request_count))
+		goto out;
 
 	spin_lock_irq(q->queue_lock);
 
@@ -1348,10 +1275,6 @@ void blk_queue_bio(struct request_queue *q, struct bio *bio)
 	 * Returns with the queue unlocked.
 	 */
 	req = get_request_wait(q, rw_flags, bio);
-	if (unlikely(!req)) {
-		bio_endio(bio, -ENODEV);	/* @q is dead */
-		goto out_unlock;
-	}
 
 	/*
 	 * After dropping the lock and possibly sleeping here, our request
@@ -1361,7 +1284,8 @@ void blk_queue_bio(struct request_queue *q, struct bio *bio)
 	 */
 	init_request_from_bio(req, bio);
 
-	if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags))
+	if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||
+	    bio_flagged(bio, BIO_CPU_AFFINE))
 		req->cpu = raw_smp_processor_id();
 
 	plug = current->plug;
@@ -1392,8 +1316,9 @@ void blk_queue_bio(struct request_queue *q, struct bio *bio)
 out_unlock:
 		spin_unlock_irq(q->queue_lock);
 	}
+out:
+	return 0;
 }
-EXPORT_SYMBOL_GPL(blk_queue_bio);	/* for device mapper only */
 
 /*
  * If bio->bi_dev is a partition, remap the location
@@ -1492,135 +1417,165 @@ static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors)
 	return 0;
 }
 
-static noinline_for_stack bool
-generic_make_request_checks(struct bio *bio)
+/**
+ * generic_make_request - hand a buffer to its device driver for I/O
+ * @bio:  The bio describing the location in memory and on the device.
+ *
+ * generic_make_request() is used to make I/O requests of block
+ * devices. It is passed a &struct bio, which describes the I/O that needs
+ * to be done.
+ *
+ * generic_make_request() does not return any status.  The
+ * success/failure status of the request, along with notification of
+ * completion, is delivered asynchronously through the bio->bi_end_io
+ * function described (one day) else where.
+ *
+ * The caller of generic_make_request must make sure that bi_io_vec
+ * are set to describe the memory buffer, and that bi_dev and bi_sector are
+ * set to describe the device address, and the
+ * bi_end_io and optionally bi_private are set to describe how
+ * completion notification should be signaled.
+ *
+ * generic_make_request and the drivers it calls may use bi_next if this
+ * bio happens to be merged with someone else, and may change bi_dev and
+ * bi_sector for remaps as it sees fit.  So the values of these fields
+ * should NOT be depended on after the call to generic_make_request.
+ */
+static inline void __generic_make_request(struct bio *bio)
 {
 	struct request_queue *q;
-	int nr_sectors = bio_sectors(bio);
+	sector_t old_sector;
+	int ret, nr_sectors = bio_sectors(bio);
+	dev_t old_dev;
 	int err = -EIO;
-	char b[BDEVNAME_SIZE];
-	struct hd_struct *part;
 
 	might_sleep();
 
 	if (bio_check_eod(bio, nr_sectors))
 		goto end_io;
 
-	q = bdev_get_queue(bio->bi_bdev);
-	if (unlikely(!q)) {
-		printk(KERN_ERR
-		       "generic_make_request: Trying to access "
-			"nonexistent block-device %s (%Lu)\n",
-			bdevname(bio->bi_bdev, b),
-			(long long) bio->bi_sector);
-		goto end_io;
-	}
+	/*
+	 * Resolve the mapping until finished. (drivers are
+	 * still free to implement/resolve their own stacking
+	 * by explicitly returning 0)
+	 *
+	 * NOTE: we don't repeat the blk_size check for each new device.
+	 * Stacking drivers are expected to know what they are doing.
+	 */
+	old_sector = -1;
+	old_dev = 0;
+	do {
+		char b[BDEVNAME_SIZE];
+		struct hd_struct *part;
 
-	if (unlikely(!(bio->bi_rw & REQ_DISCARD) &&
-		     nr_sectors > queue_max_hw_sectors(q))) {
-		printk(KERN_ERR "bio too big device %s (%u > %u)\n",
-		       bdevname(bio->bi_bdev, b),
-		       bio_sectors(bio),
-		       queue_max_hw_sectors(q));
-		goto end_io;
-	}
+		q = bdev_get_queue(bio->bi_bdev);
+		if (unlikely(!q)) {
+			printk(KERN_ERR
+			       "generic_make_request: Trying to access "
+				"nonexistent block-device %s (%Lu)\n",
+				bdevname(bio->bi_bdev, b),
+				(long long) bio->bi_sector);
+			goto end_io;
+		}
 
-	part = bio->bi_bdev->bd_part;
-	if (should_fail_request(part, bio->bi_size) ||
-	    should_fail_request(&part_to_disk(part)->part0,
-				bio->bi_size))
-		goto end_io;
+		if (unlikely(!(bio->bi_rw & REQ_DISCARD) &&
+			     nr_sectors > queue_max_hw_sectors(q))) {
+			printk(KERN_ERR "bio too big device %s (%u > %u)\n",
+			       bdevname(bio->bi_bdev, b),
+			       bio_sectors(bio),
+			       queue_max_hw_sectors(q));
+			goto end_io;
+		}
 
-	/*
-	 * If this device has partitions, remap block n
-	 * of partition p to block n+start(p) of the disk.
-	 */
-	blk_partition_remap(bio);
+		if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
+			goto end_io;
 
-	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio))
-		goto end_io;
+		part = bio->bi_bdev->bd_part;
+		if (should_fail_request(part, bio->bi_size) ||
+		    should_fail_request(&part_to_disk(part)->part0,
+					bio->bi_size))
+			goto end_io;
 
-	if (bio_check_eod(bio, nr_sectors))
-		goto end_io;
+		/*
+		 * If this device has partitions, remap block n
+		 * of partition p to block n+start(p) of the disk.
+		 */
+		blk_partition_remap(bio);
 
-	/*
-	 * Filter flush bio's early so that make_request based
-	 * drivers without flush support don't have to worry
-	 * about them.
-	 */
-	if ((bio->bi_rw & (REQ_FLUSH | REQ_FUA)) && !q->flush_flags) {
-		bio->bi_rw &= ~(REQ_FLUSH | REQ_FUA);
-		if (!nr_sectors) {
-			err = 0;
+		if (bio_integrity_enabled(bio) && bio_integrity_prep(bio))
 			goto end_io;
+
+		if (old_sector != -1)
+			trace_block_bio_remap(q, bio, old_dev, old_sector);
+
+		old_sector = bio->bi_sector;
+		old_dev = bio->bi_bdev->bd_dev;
+
+		if (bio_check_eod(bio, nr_sectors))
+			goto end_io;
+
+		/*
+		 * Filter flush bio's early so that make_request based
+		 * drivers without flush support don't have to worry
+		 * about them.
+		 */
+		if ((bio->bi_rw & (REQ_FLUSH | REQ_FUA)) && !q->flush_flags) {
+			bio->bi_rw &= ~(REQ_FLUSH | REQ_FUA);
+			if (!nr_sectors) {
+				err = 0;
+				goto end_io;
+			}
 		}
-	}
 
-	if ((bio->bi_rw & REQ_DISCARD) &&
-	    (!blk_queue_discard(q) ||
-	     ((bio->bi_rw & REQ_SECURE) &&
-	      !blk_queue_secdiscard(q)))) {
-		err = -EOPNOTSUPP;
-		goto end_io;
-	}
+		if ((bio->bi_rw & REQ_DISCARD) &&
+		    (!blk_queue_discard(q) ||
+		     ((bio->bi_rw & REQ_SECURE) &&
+		      !blk_queue_secdiscard(q)))) {
+			err = -EOPNOTSUPP;
+			goto end_io;
+		}
 
-	if (blk_throtl_bio(q, bio))
-		return false;	/* throttled, will be resubmitted later */
+		if (blk_throtl_bio(q, &bio))
+			goto end_io;
 
-	trace_block_bio_queue(q, bio);
-	return true;
+		/*
+		 * If bio = NULL, bio has been throttled and will be submitted
+		 * later.
+		 */
+		if (!bio)
+			break;
+
+		trace_block_bio_queue(q, bio);
+
+		ret = q->make_request_fn(q, bio);
+	} while (ret);
+
+	return;
 
 end_io:
 	bio_endio(bio, err);
-	return false;
 }
 
-/**
- * generic_make_request - hand a buffer to its device driver for I/O
- * @bio:  The bio describing the location in memory and on the device.
- *
- * generic_make_request() is used to make I/O requests of block
- * devices. It is passed a &struct bio, which describes the I/O that needs
- * to be done.
- *
- * generic_make_request() does not return any status.  The
- * success/failure status of the request, along with notification of
- * completion, is delivered asynchronously through the bio->bi_end_io
- * function described (one day) else where.
- *
- * The caller of generic_make_request must make sure that bi_io_vec
- * are set to describe the memory buffer, and that bi_dev and bi_sector are
- * set to describe the device address, and the
- * bi_end_io and optionally bi_private are set to describe how
- * completion notification should be signaled.
- *
- * generic_make_request and the drivers it calls may use bi_next if this
- * bio happens to be merged with someone else, and may resubmit the bio to
- * a lower device by calling into generic_make_request recursively, which
- * means the bio should NOT be touched after the call to ->make_request_fn.
+/*
+ * We only want one ->make_request_fn to be active at a time,
+ * else stack usage with stacked devices could be a problem.
+ * So use current->bio_list to keep a list of requests
+ * submited by a make_request_fn function.
+ * current->bio_list is also used as a flag to say if
+ * generic_make_request is currently active in this task or not.
+ * If it is NULL, then no make_request is active.  If it is non-NULL,
+ * then a make_request is active, and new requests should be added
+ * at the tail
  */
 void generic_make_request(struct bio *bio)
 {
 	struct bio_list bio_list_on_stack;
 
-	if (!generic_make_request_checks(bio))
-		return;
-
-	/*
-	 * We only want one ->make_request_fn to be active at a time, else
-	 * stack usage with stacked devices could be a problem.  So use
-	 * current->bio_list to keep a list of requests submited by a
-	 * make_request_fn function.  current->bio_list is also used as a
-	 * flag to say if generic_make_request is currently active in this
-	 * task or not.  If it is NULL, then no make_request is active.  If
-	 * it is non-NULL, then a make_request is active, and new requests
-	 * should be added at the tail
-	 */
 	if (current->bio_list) {
+		/* make_request is active */
 		bio_list_add(current->bio_list, bio);
 		return;
 	}
-
 	/* following loop may be a bit non-obvious, and so deserves some
 	 * explanation.
 	 * Before entering the loop, bio->bi_next is NULL (as all callers
@@ -1628,21 +1583,22 @@ void generic_make_request(struct bio *bio)
 	 * We pretend that we have just taken it off a longer list, so
 	 * we assign bio_list to a pointer to the bio_list_on_stack,
 	 * thus initialising the bio_list of new bios to be
-	 * added.  ->make_request() may indeed add some more bios
+	 * added.  __generic_make_request may indeed add some more bios
 	 * through a recursive call to generic_make_request.  If it
 	 * did, we find a non-NULL value in bio_list and re-enter the loop
 	 * from the top.  In this case we really did just take the bio
 	 * of the top of the list (no pretending) and so remove it from
-	 * bio_list, and call into ->make_request() again.
+	 * bio_list, and call into __generic_make_request again.
+	 *
+	 * The loop was structured like this to make only one call to
+	 * __generic_make_request (which is important as it is large and
+	 * inlined) and to keep the structure simple.
 	 */
 	BUG_ON(bio->bi_next);
 	bio_list_init(&bio_list_on_stack);
 	current->bio_list = &bio_list_on_stack;
 	do {
-		struct request_queue *q = bdev_get_queue(bio->bi_bdev);
-
-		q->make_request_fn(q, bio);
-
+		__generic_make_request(bio);
 		bio = bio_list_pop(current->bio_list);
 	} while (bio);
 	current->bio_list = NULL; /* deactivate */
@@ -2672,20 +2628,6 @@ EXPORT_SYMBOL(kblockd_schedule_delayed_work);
 
 #define PLUG_MAGIC	0x91827364
 
-/**
- * blk_start_plug - initialize blk_plug and track it inside the task_struct
- * @plug:	The &struct blk_plug that needs to be initialized
- *
- * Description:
- *   Tracking blk_plug inside the task_struct will help with auto-flushing the
- *   pending I/O should the task end up blocking between blk_start_plug() and
- *   blk_finish_plug(). This is important from a performance perspective, but
- *   also ensures that we don't deadlock. For instance, if the task is blocking
- *   for a memory allocation, memory reclaim could end up wanting to free a
- *   page belonging to that request that is currently residing in our private
- *   plug. By flushing the pending I/O when the process goes to sleep, we avoid
- *   this kind of deadlock.
- */
 void blk_start_plug(struct blk_plug *plug)
 {
 	struct task_struct *tsk = current;
diff --git a/trunk/block/blk-sysfs.c b/trunk/block/blk-sysfs.c
index e7f9f657f105..60fda88c57f0 100644
--- a/trunk/block/blk-sysfs.c
+++ b/trunk/block/blk-sysfs.c
@@ -457,11 +457,11 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr,
 }
 
 /**
- * blk_release_queue: - release a &struct request_queue when it is no longer needed
- * @kobj:    the kobj belonging to the request queue to be released
+ * blk_cleanup_queue: - release a &struct request_queue when it is no longer needed
+ * @kobj:    the kobj belonging of the request queue to be released
  *
  * Description:
- *     blk_release_queue is the pair to blk_init_queue() or
+ *     blk_cleanup_queue is the pair to blk_init_queue() or
  *     blk_queue_make_request().  It should be called when a request queue is
  *     being released; typically when a block device is being de-registered.
  *     Currently, its primary task it to free all the &struct request
@@ -490,7 +490,6 @@ static void blk_release_queue(struct kobject *kobj)
 	if (q->queue_tags)
 		__blk_queue_free_tags(q);
 
-	blk_throtl_release(q);
 	blk_trace_shutdown(q);
 
 	bdi_destroy(&q->backing_dev_info);
diff --git a/trunk/block/blk-throttle.c b/trunk/block/blk-throttle.c
index 8edb9499b509..a19f58c6fc3a 100644
--- a/trunk/block/blk-throttle.c
+++ b/trunk/block/blk-throttle.c
@@ -10,7 +10,6 @@
 #include <linux/bio.h>
 #include <linux/blktrace_api.h>
 #include "blk-cgroup.h"
-#include "blk.h"
 
 /* Max dispatch from a group in 1 round */
 static int throtl_grp_quantum = 8;
@@ -303,16 +302,16 @@ throtl_grp *throtl_find_tg(struct throtl_data *td, struct blkio_cgroup *blkcg)
 	return tg;
 }
 
+/*
+ * This function returns with queue lock unlocked in case of error, like
+ * request queue is no more
+ */
 static struct throtl_grp * throtl_get_tg(struct throtl_data *td)
 {
 	struct throtl_grp *tg = NULL, *__tg = NULL;
 	struct blkio_cgroup *blkcg;
 	struct request_queue *q = td->queue;
 
-	/* no throttling for dead queue */
-	if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
-		return NULL;
-
 	rcu_read_lock();
 	blkcg = task_blkio_cgroup(current);
 	tg = throtl_find_tg(td, blkcg);
@@ -324,22 +323,32 @@ static struct throtl_grp * throtl_get_tg(struct throtl_data *td)
 	/*
 	 * Need to allocate a group. Allocation of group also needs allocation
 	 * of per cpu stats which in-turn takes a mutex() and can block. Hence
-	 * we need to drop rcu lock and queue_lock before we call alloc.
+	 * we need to drop rcu lock and queue_lock before we call alloc
+	 *
+	 * Take the request queue reference to make sure queue does not
+	 * go away once we return from allocation.
 	 */
+	blk_get_queue(q);
 	rcu_read_unlock();
 	spin_unlock_irq(q->queue_lock);
 
 	tg = throtl_alloc_tg(td);
+	/*
+	 * We might have slept in group allocation. Make sure queue is not
+	 * dead
+	 */
+	if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {
+		blk_put_queue(q);
+		if (tg)
+			kfree(tg);
+
+		return ERR_PTR(-ENODEV);
+	}
+	blk_put_queue(q);
 
 	/* Group allocated and queue is still alive. take the lock */
 	spin_lock_irq(q->queue_lock);
 
-	/* Make sure @q is still alive */
-	if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {
-		kfree(tg);
-		return NULL;
-	}
-
 	/*
 	 * Initialize the new group. After sleeping, read the blkcg again.
 	 */
@@ -1005,6 +1014,11 @@ static void throtl_release_tgs(struct throtl_data *td)
 	}
 }
 
+static void throtl_td_free(struct throtl_data *td)
+{
+	kfree(td);
+}
+
 /*
  * Blk cgroup controller notification saying that blkio_group object is being
  * delinked as associated cgroup object is going away. That also means that
@@ -1109,17 +1123,17 @@ static struct blkio_policy_type blkio_policy_throtl = {
 	.plid = BLKIO_POLICY_THROTL,
 };
 
-bool blk_throtl_bio(struct request_queue *q, struct bio *bio)
+int blk_throtl_bio(struct request_queue *q, struct bio **biop)
 {
 	struct throtl_data *td = q->td;
 	struct throtl_grp *tg;
+	struct bio *bio = *biop;
 	bool rw = bio_data_dir(bio), update_disptime = true;
 	struct blkio_cgroup *blkcg;
-	bool throttled = false;
 
 	if (bio->bi_rw & REQ_THROTTLED) {
 		bio->bi_rw &= ~REQ_THROTTLED;
-		goto out;
+		return 0;
 	}
 
 	/*
@@ -1138,7 +1152,7 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio)
 			blkiocg_update_dispatch_stats(&tg->blkg, bio->bi_size,
 					rw, rw_is_sync(bio->bi_rw));
 			rcu_read_unlock();
-			goto out;
+			return 0;
 		}
 	}
 	rcu_read_unlock();
@@ -1147,10 +1161,18 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio)
 	 * Either group has not been allocated yet or it is not an unlimited
 	 * IO group
 	 */
+
 	spin_lock_irq(q->queue_lock);
 	tg = throtl_get_tg(td);
-	if (unlikely(!tg))
-		goto out_unlock;
+
+	if (IS_ERR(tg)) {
+		if (PTR_ERR(tg)	== -ENODEV) {
+			/*
+			 * Queue is gone. No queue lock held here.
+			 */
+			return -ENODEV;
+		}
+	}
 
 	if (tg->nr_queued[rw]) {
 		/*
@@ -1178,7 +1200,7 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio)
 		 * So keep on trimming slice even if bio is not queued.
 		 */
 		throtl_trim_slice(td, tg, rw);
-		goto out_unlock;
+		goto out;
 	}
 
 queue_bio:
@@ -1190,52 +1212,16 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio)
 			tg->nr_queued[READ], tg->nr_queued[WRITE]);
 
 	throtl_add_bio_tg(q->td, tg, bio);
-	throttled = true;
+	*biop = NULL;
 
 	if (update_disptime) {
 		tg_update_disptime(td, tg);
 		throtl_schedule_next_dispatch(td);
 	}
 
-out_unlock:
-	spin_unlock_irq(q->queue_lock);
 out:
-	return throttled;
-}
-
-/**
- * blk_throtl_drain - drain throttled bios
- * @q: request_queue to drain throttled bios for
- *
- * Dispatch all currently throttled bios on @q through ->make_request_fn().
- */
-void blk_throtl_drain(struct request_queue *q)
-	__releases(q->queue_lock) __acquires(q->queue_lock)
-{
-	struct throtl_data *td = q->td;
-	struct throtl_rb_root *st = &td->tg_service_tree;
-	struct throtl_grp *tg;
-	struct bio_list bl;
-	struct bio *bio;
-
-	lockdep_is_held(q->queue_lock);
-
-	bio_list_init(&bl);
-
-	while ((tg = throtl_rb_first(st))) {
-		throtl_dequeue_tg(td, tg);
-
-		while ((bio = bio_list_peek(&tg->bio_lists[READ])))
-			tg_dispatch_one_bio(td, tg, bio_data_dir(bio), &bl);
-		while ((bio = bio_list_peek(&tg->bio_lists[WRITE])))
-			tg_dispatch_one_bio(td, tg, bio_data_dir(bio), &bl);
-	}
 	spin_unlock_irq(q->queue_lock);
-
-	while ((bio = bio_list_pop(&bl)))
-		generic_make_request(bio);
-
-	spin_lock_irq(q->queue_lock);
+	return 0;
 }
 
 int blk_throtl_init(struct request_queue *q)
@@ -1310,11 +1296,7 @@ void blk_throtl_exit(struct request_queue *q)
 	 * it.
 	 */
 	throtl_shutdown_wq(q);
-}
-
-void blk_throtl_release(struct request_queue *q)
-{
-	kfree(q->td);
+	throtl_td_free(td);
 }
 
 static int __init throtl_init(void)
diff --git a/trunk/block/blk.h b/trunk/block/blk.h
index 3f6551b3c92d..20b900a377c9 100644
--- a/trunk/block/blk.h
+++ b/trunk/block/blk.h
@@ -15,7 +15,6 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
 			struct bio *bio);
 int blk_rq_append_bio(struct request_queue *q, struct request *rq,
 		      struct bio *bio);
-void blk_drain_queue(struct request_queue *q, bool drain_all);
 void blk_dequeue_request(struct request *rq);
 void __blk_queue_free_tags(struct request_queue *q);
 bool __blk_end_bidi_request(struct request *rq, int error,
@@ -189,21 +188,4 @@ static inline int blk_do_io_stat(struct request *rq)
 	        (rq->cmd_flags & REQ_DISCARD));
 }
 
-#ifdef CONFIG_BLK_DEV_THROTTLING
-extern bool blk_throtl_bio(struct request_queue *q, struct bio *bio);
-extern void blk_throtl_drain(struct request_queue *q);
-extern int blk_throtl_init(struct request_queue *q);
-extern void blk_throtl_exit(struct request_queue *q);
-extern void blk_throtl_release(struct request_queue *q);
-#else /* CONFIG_BLK_DEV_THROTTLING */
-static inline bool blk_throtl_bio(struct request_queue *q, struct bio *bio)
-{
-	return false;
-}
-static inline void blk_throtl_drain(struct request_queue *q) { }
-static inline int blk_throtl_init(struct request_queue *q) { return 0; }
-static inline void blk_throtl_exit(struct request_queue *q) { }
-static inline void blk_throtl_release(struct request_queue *q) { }
-#endif /* CONFIG_BLK_DEV_THROTTLING */
-
-#endif /* BLK_INTERNAL_H */
+#endif
diff --git a/trunk/block/elevator.c b/trunk/block/elevator.c
index 66343d6917d0..a3b64bc71d88 100644
--- a/trunk/block/elevator.c
+++ b/trunk/block/elevator.c
@@ -31,6 +31,7 @@
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/compiler.h>
+#include <linux/delay.h>
 #include <linux/blktrace_api.h>
 #include <linux/hash.h>
 #include <linux/uaccess.h>
@@ -181,7 +182,7 @@ static void elevator_attach(struct request_queue *q, struct elevator_queue *eq,
 	eq->elevator_data = data;
 }
 
-static char chosen_elevator[ELV_NAME_MAX];
+static char chosen_elevator[16];
 
 static int __init elevator_setup(char *str)
 {
@@ -605,35 +606,43 @@ void elv_requeue_request(struct request_queue *q, struct request *rq)
 void elv_drain_elevator(struct request_queue *q)
 {
 	static int printed;
-
-	lockdep_assert_held(q->queue_lock);
-
 	while (q->elevator->ops->elevator_dispatch_fn(q, 1))
 		;
-	if (q->nr_sorted && printed++ < 10) {
+	if (q->nr_sorted == 0)
+		return;
+	if (printed++ < 10) {
 		printk(KERN_ERR "%s: forced dispatching is broken "
 		       "(nr_sorted=%u), please report this\n",
 		       q->elevator->elevator_type->elevator_name, q->nr_sorted);
 	}
 }
 
+/*
+ * Call with queue lock held, interrupts disabled
+ */
 void elv_quiesce_start(struct request_queue *q)
 {
 	if (!q->elevator)
 		return;
 
-	spin_lock_irq(q->queue_lock);
 	queue_flag_set(QUEUE_FLAG_ELVSWITCH, q);
-	spin_unlock_irq(q->queue_lock);
 
-	blk_drain_queue(q, false);
+	/*
+	 * make sure we don't have any requests in flight
+	 */
+	elv_drain_elevator(q);
+	while (q->rq.elvpriv) {
+		__blk_run_queue(q);
+		spin_unlock_irq(q->queue_lock);
+		msleep(10);
+		spin_lock_irq(q->queue_lock);
+		elv_drain_elevator(q);
+	}
 }
 
 void elv_quiesce_end(struct request_queue *q)
 {
-	spin_lock_irq(q->queue_lock);
 	queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q);
-	spin_unlock_irq(q->queue_lock);
 }
 
 void __elv_add_request(struct request_queue *q, struct request *rq, int where)
@@ -963,6 +972,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
 	/*
 	 * Turn on BYPASS and drain all requests w/ elevator private data
 	 */
+	spin_lock_irq(q->queue_lock);
 	elv_quiesce_start(q);
 
 	/*
@@ -973,8 +983,8 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
 	/*
 	 * attach and start new elevator
 	 */
-	spin_lock_irq(q->queue_lock);
 	elevator_attach(q, e, data);
+
 	spin_unlock_irq(q->queue_lock);
 
 	if (old_elevator->registered) {
@@ -989,7 +999,9 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
 	 * finally exit old elevator and turn off BYPASS.
 	 */
 	elevator_exit(old_elevator);
+	spin_lock_irq(q->queue_lock);
 	elv_quiesce_end(q);
+	spin_unlock_irq(q->queue_lock);
 
 	blk_add_trace_msg(q, "elv switch: %s", e->elevator_type->elevator_name);
 
@@ -1003,7 +1015,10 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
 	elevator_exit(e);
 	q->elevator = old_elevator;
 	elv_register_queue(q);
-	elv_quiesce_end(q);
+
+	spin_lock_irq(q->queue_lock);
+	queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q);
+	spin_unlock_irq(q->queue_lock);
 
 	return err;
 }
diff --git a/trunk/block/genhd.c b/trunk/block/genhd.c
index d261b73b9744..e2f67902dd02 100644
--- a/trunk/block/genhd.c
+++ b/trunk/block/genhd.c
@@ -611,12 +611,6 @@ void add_disk(struct gendisk *disk)
 	register_disk(disk);
 	blk_register_queue(disk);
 
-	/*
-	 * Take an extra ref on queue which will be put on disk_release()
-	 * so that it sticks around as long as @disk is there.
-	 */
-	WARN_ON_ONCE(blk_get_queue(disk->queue));
-
 	retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj,
 				   "bdi");
 	WARN_ON(retval);
@@ -1101,8 +1095,6 @@ static void disk_release(struct device *dev)
 	disk_replace_part_tbl(disk, NULL);
 	free_part_stats(&disk->part0);
 	free_part_info(&disk->part0);
-	if (disk->queue)
-		blk_put_queue(disk->queue);
 	kfree(disk);
 }
 struct class block_class = {
diff --git a/trunk/block/scsi_ioctl.c b/trunk/block/scsi_ioctl.c
index fbdf0d802ec4..4f4230b79bb6 100644
--- a/trunk/block/scsi_ioctl.c
+++ b/trunk/block/scsi_ioctl.c
@@ -565,7 +565,7 @@ int scsi_cmd_ioctl(struct request_queue *q, struct gendisk *bd_disk, fmode_t mod
 {
 	int err;
 
-	if (!q)
+	if (!q || blk_get_queue(q))
 		return -ENXIO;
 
 	switch (cmd) {
@@ -686,6 +686,7 @@ int scsi_cmd_ioctl(struct request_queue *q, struct gendisk *bd_disk, fmode_t mod
 			err = -ENOTTY;
 	}
 
+	blk_put_queue(q);
 	return err;
 }
 EXPORT_SYMBOL(scsi_cmd_ioctl);
diff --git a/trunk/drivers/block/aoe/aoeblk.c b/trunk/drivers/block/aoe/aoeblk.c
index 167ba0af47f5..528f6318ded1 100644
--- a/trunk/drivers/block/aoe/aoeblk.c
+++ b/trunk/drivers/block/aoe/aoeblk.c
@@ -159,7 +159,7 @@ aoeblk_release(struct gendisk *disk, fmode_t mode)
 	return 0;
 }
 
-static void
+static int
 aoeblk_make_request(struct request_queue *q, struct bio *bio)
 {
 	struct sk_buff_head queue;
@@ -172,25 +172,25 @@ aoeblk_make_request(struct request_queue *q, struct bio *bio)
 	if (bio == NULL) {
 		printk(KERN_ERR "aoe: bio is NULL\n");
 		BUG();
-		return;
+		return 0;
 	}
 	d = bio->bi_bdev->bd_disk->private_data;
 	if (d == NULL) {
 		printk(KERN_ERR "aoe: bd_disk->private_data is NULL\n");
 		BUG();
 		bio_endio(bio, -ENXIO);
-		return;
+		return 0;
 	} else if (bio->bi_io_vec == NULL) {
 		printk(KERN_ERR "aoe: bi_io_vec is NULL\n");
 		BUG();
 		bio_endio(bio, -ENXIO);
-		return;
+		return 0;
 	}
 	buf = mempool_alloc(d->bufpool, GFP_NOIO);
 	if (buf == NULL) {
 		printk(KERN_INFO "aoe: buf allocation failure\n");
 		bio_endio(bio, -ENOMEM);
-		return;
+		return 0;
 	}
 	memset(buf, 0, sizeof(*buf));
 	INIT_LIST_HEAD(&buf->bufs);
@@ -211,7 +211,7 @@ aoeblk_make_request(struct request_queue *q, struct bio *bio)
 		spin_unlock_irqrestore(&d->lock, flags);
 		mempool_free(buf, d->bufpool);
 		bio_endio(bio, -ENXIO);
-		return;
+		return 0;
 	}
 
 	list_add_tail(&buf->bufs, &d->bufq);
@@ -222,6 +222,8 @@ aoeblk_make_request(struct request_queue *q, struct bio *bio)
 
 	spin_unlock_irqrestore(&d->lock, flags);
 	aoenet_xmit(&queue);
+
+	return 0;
 }
 
 static int
diff --git a/trunk/drivers/block/brd.c b/trunk/drivers/block/brd.c
index d22119d49e53..dba1c32e1ddf 100644
--- a/trunk/drivers/block/brd.c
+++ b/trunk/drivers/block/brd.c
@@ -323,7 +323,7 @@ static int brd_do_bvec(struct brd_device *brd, struct page *page,
 	return err;
 }
 
-static void brd_make_request(struct request_queue *q, struct bio *bio)
+static int brd_make_request(struct request_queue *q, struct bio *bio)
 {
 	struct block_device *bdev = bio->bi_bdev;
 	struct brd_device *brd = bdev->bd_disk->private_data;
@@ -359,6 +359,8 @@ static void brd_make_request(struct request_queue *q, struct bio *bio)
 
 out:
 	bio_endio(bio, err);
+
+	return 0;
 }
 
 #ifdef CONFIG_BLK_DEV_XIP
diff --git a/trunk/drivers/block/drbd/drbd_int.h b/trunk/drivers/block/drbd/drbd_int.h
index 36eee3969a98..ef2ceed3be4b 100644
--- a/trunk/drivers/block/drbd/drbd_int.h
+++ b/trunk/drivers/block/drbd/drbd_int.h
@@ -1507,7 +1507,7 @@ extern void drbd_free_mdev(struct drbd_conf *mdev);
 extern int proc_details;
 
 /* drbd_req */
-extern void drbd_make_request(struct request_queue *q, struct bio *bio);
+extern int drbd_make_request(struct request_queue *q, struct bio *bio);
 extern int drbd_read_remote(struct drbd_conf *mdev, struct drbd_request *req);
 extern int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *bvec);
 extern int is_valid_ar_handle(struct drbd_request *, sector_t);
diff --git a/trunk/drivers/block/drbd/drbd_req.c b/trunk/drivers/block/drbd/drbd_req.c
index 4a0f314086e5..3424d675b769 100644
--- a/trunk/drivers/block/drbd/drbd_req.c
+++ b/trunk/drivers/block/drbd/drbd_req.c
@@ -1073,7 +1073,7 @@ static int drbd_fail_request_early(struct drbd_conf *mdev, int is_write)
 	return 0;
 }
 
-void drbd_make_request(struct request_queue *q, struct bio *bio)
+int drbd_make_request(struct request_queue *q, struct bio *bio)
 {
 	unsigned int s_enr, e_enr;
 	struct drbd_conf *mdev = (struct drbd_conf *) q->queuedata;
@@ -1081,7 +1081,7 @@ void drbd_make_request(struct request_queue *q, struct bio *bio)
 
 	if (drbd_fail_request_early(mdev, bio_data_dir(bio) & WRITE)) {
 		bio_endio(bio, -EPERM);
-		return;
+		return 0;
 	}
 
 	start_time = jiffies;
@@ -1100,8 +1100,7 @@ void drbd_make_request(struct request_queue *q, struct bio *bio)
 
 	if (likely(s_enr == e_enr)) {
 		inc_ap_bio(mdev, 1);
-		drbd_make_request_common(mdev, bio, start_time);
-		return;
+		return drbd_make_request_common(mdev, bio, start_time);
 	}
 
 	/* can this bio be split generically?
@@ -1149,6 +1148,7 @@ void drbd_make_request(struct request_queue *q, struct bio *bio)
 
 		bio_pair_release(bp);
 	}
+	return 0;
 }
 
 /* This is called by bio_add_page().  With this function we reduce
diff --git a/trunk/drivers/block/loop.c b/trunk/drivers/block/loop.c
index 157ddcb9d0a5..46cdd6945557 100644
--- a/trunk/drivers/block/loop.c
+++ b/trunk/drivers/block/loop.c
@@ -202,74 +202,6 @@ lo_do_transfer(struct loop_device *lo, int cmd,
 	return lo->transfer(lo, cmd, rpage, roffs, lpage, loffs, size, rblock);
 }
 
-/**
- * do_lo_send_aops - helper for writing data to a loop device
- *
- * This is the fast version for backing filesystems which implement the address
- * space operations write_begin and write_end.
- */
-static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec,
-		loff_t pos, struct page *unused)
-{
-	struct file *file = lo->lo_backing_file; /* kudos to NFsckingS */
-	struct address_space *mapping = file->f_mapping;
-	pgoff_t index;
-	unsigned offset, bv_offs;
-	int len, ret;
-
-	mutex_lock(&mapping->host->i_mutex);
-	index = pos >> PAGE_CACHE_SHIFT;
-	offset = pos & ((pgoff_t)PAGE_CACHE_SIZE - 1);
-	bv_offs = bvec->bv_offset;
-	len = bvec->bv_len;
-	while (len > 0) {
-		sector_t IV;
-		unsigned size, copied;
-		int transfer_result;
-		struct page *page;
-		void *fsdata;
-
-		IV = ((sector_t)index << (PAGE_CACHE_SHIFT - 9))+(offset >> 9);
-		size = PAGE_CACHE_SIZE - offset;
-		if (size > len)
-			size = len;
-
-		ret = pagecache_write_begin(file, mapping, pos, size, 0,
-							&page, &fsdata);
-		if (ret)
-			goto fail;
-
-		file_update_time(file);
-
-		transfer_result = lo_do_transfer(lo, WRITE, page, offset,
-				bvec->bv_page, bv_offs, size, IV);
-		copied = size;
-		if (unlikely(transfer_result))
-			copied = 0;
-
-		ret = pagecache_write_end(file, mapping, pos, size, copied,
-							page, fsdata);
-		if (ret < 0 || ret != copied)
-			goto fail;
-
-		if (unlikely(transfer_result))
-			goto fail;
-
-		bv_offs += copied;
-		len -= copied;
-		offset = 0;
-		index++;
-		pos += copied;
-	}
-	ret = 0;
-out:
-	mutex_unlock(&mapping->host->i_mutex);
-	return ret;
-fail:
-	ret = -1;
-	goto out;
-}
-
 /**
  * __do_lo_send_write - helper for writing data to a loop device
  *
@@ -297,10 +229,8 @@ static int __do_lo_send_write(struct file *file,
 /**
  * do_lo_send_direct_write - helper for writing data to a loop device
  *
- * This is the fast, non-transforming version for backing filesystems which do
- * not implement the address space operations write_begin and write_end.
- * It uses the write file operation which should be present on all writeable
- * filesystems.
+ * This is the fast, non-transforming version that does not need double
+ * buffering.
  */
 static int do_lo_send_direct_write(struct loop_device *lo,
 		struct bio_vec *bvec, loff_t pos, struct page *page)
@@ -316,15 +246,9 @@ static int do_lo_send_direct_write(struct loop_device *lo,
 /**
  * do_lo_send_write - helper for writing data to a loop device
  *
- * This is the slow, transforming version for filesystems which do not
- * implement the address space operations write_begin and write_end.  It
- * uses the write file operation which should be present on all writeable
- * filesystems.
- *
- * Using fops->write is slower than using aops->{prepare,commit}_write in the
- * transforming case because we need to double buffer the data as we cannot do
- * the transformations in place as we do not have direct access to the
- * destination pages of the backing file.
+ * This is the slow, transforming version that needs to double buffer the
+ * data as it cannot do the transformations in place without having direct
+ * access to the destination pages of the backing file.
  */
 static int do_lo_send_write(struct loop_device *lo, struct bio_vec *bvec,
 		loff_t pos, struct page *page)
@@ -350,17 +274,16 @@ static int lo_send(struct loop_device *lo, struct bio *bio, loff_t pos)
 	struct page *page = NULL;
 	int i, ret = 0;
 
-	do_lo_send = do_lo_send_aops;
-	if (!(lo->lo_flags & LO_FLAGS_USE_AOPS)) {
+	if (lo->transfer != transfer_none) {
+		page = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
+		if (unlikely(!page))
+			goto fail;
+		kmap(page);
+		do_lo_send = do_lo_send_write;
+	} else {
 		do_lo_send = do_lo_send_direct_write;
-		if (lo->transfer != transfer_none) {
-			page = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
-			if (unlikely(!page))
-				goto fail;
-			kmap(page);
-			do_lo_send = do_lo_send_write;
-		}
 	}
+
 	bio_for_each_segment(bvec, bio, i) {
 		ret = do_lo_send(lo, bvec, pos, page);
 		if (ret < 0)
@@ -514,7 +437,7 @@ static struct bio *loop_get_bio(struct loop_device *lo)
 	return bio_list_pop(&lo->lo_bio_list);
 }
 
-static void loop_make_request(struct request_queue *q, struct bio *old_bio)
+static int loop_make_request(struct request_queue *q, struct bio *old_bio)
 {
 	struct loop_device *lo = q->queuedata;
 	int rw = bio_rw(old_bio);
@@ -532,11 +455,12 @@ static void loop_make_request(struct request_queue *q, struct bio *old_bio)
 	loop_add_bio(lo, old_bio);
 	wake_up(&lo->lo_event);
 	spin_unlock_irq(&lo->lo_lock);
-	return;
+	return 0;
 
 out:
 	spin_unlock_irq(&lo->lo_lock);
 	bio_io_error(old_bio);
+	return 0;
 }
 
 struct switch_request {
@@ -848,35 +772,23 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
 	mapping = file->f_mapping;
 	inode = mapping->host;
 
-	if (!(file->f_mode & FMODE_WRITE))
-		lo_flags |= LO_FLAGS_READ_ONLY;
-
 	error = -EINVAL;
-	if (S_ISREG(inode->i_mode) || S_ISBLK(inode->i_mode)) {
-		const struct address_space_operations *aops = mapping->a_ops;
-
-		if (aops->write_begin)
-			lo_flags |= LO_FLAGS_USE_AOPS;
-		if (!(lo_flags & LO_FLAGS_USE_AOPS) && !file->f_op->write)
-			lo_flags |= LO_FLAGS_READ_ONLY;
+	if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
+		goto out_putf;
 
-		lo_blocksize = S_ISBLK(inode->i_mode) ?
-			inode->i_bdev->bd_block_size : PAGE_SIZE;
+	if (!(file->f_mode & FMODE_WRITE) || !(mode & FMODE_WRITE) ||
+	    !file->f_op->write)
+		lo_flags |= LO_FLAGS_READ_ONLY;
 
-		error = 0;
-	} else {
-		goto out_putf;
-	}
+	lo_blocksize = S_ISBLK(inode->i_mode) ?
+		inode->i_bdev->bd_block_size : PAGE_SIZE;
 
+	error = -EFBIG;
 	size = get_loop_size(lo, file);
-
-	if ((loff_t)(sector_t)size != size) {
-		error = -EFBIG;
+	if ((loff_t)(sector_t)size != size)
 		goto out_putf;
-	}
 
-	if (!(mode & FMODE_WRITE))
-		lo_flags |= LO_FLAGS_READ_ONLY;
+	error = 0;
 
 	set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0);
 
diff --git a/trunk/drivers/block/pktcdvd.c b/trunk/drivers/block/pktcdvd.c
index a63b0a2b7805..e133f094ab08 100644
--- a/trunk/drivers/block/pktcdvd.c
+++ b/trunk/drivers/block/pktcdvd.c
@@ -2444,7 +2444,7 @@ static void pkt_end_io_read_cloned(struct bio *bio, int err)
 	pkt_bio_finished(pd);
 }
 
-static void pkt_make_request(struct request_queue *q, struct bio *bio)
+static int pkt_make_request(struct request_queue *q, struct bio *bio)
 {
 	struct pktcdvd_device *pd;
 	char b[BDEVNAME_SIZE];
@@ -2473,7 +2473,7 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio)
 		cloned_bio->bi_end_io = pkt_end_io_read_cloned;
 		pd->stats.secs_r += bio->bi_size >> 9;
 		pkt_queue_bio(pd, cloned_bio);
-		return;
+		return 0;
 	}
 
 	if (!test_bit(PACKET_WRITABLE, &pd->flags)) {
@@ -2509,7 +2509,7 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio)
 			pkt_make_request(q, &bp->bio1);
 			pkt_make_request(q, &bp->bio2);
 			bio_pair_release(bp);
-			return;
+			return 0;
 		}
 	}
 
@@ -2533,7 +2533,7 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio)
 				}
 				spin_unlock(&pkt->lock);
 				spin_unlock(&pd->cdrw.active_list_lock);
-				return;
+				return 0;
 			} else {
 				blocked_bio = 1;
 			}
@@ -2584,9 +2584,10 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio)
 		 */
 		wake_up(&pd->wqueue);
 	}
-	return;
+	return 0;
 end_io:
 	bio_io_error(bio);
+	return 0;
 }
 
 
diff --git a/trunk/drivers/block/ps3vram.c b/trunk/drivers/block/ps3vram.c
index 7fad7af87eb2..b3bdb8af89cf 100644
--- a/trunk/drivers/block/ps3vram.c
+++ b/trunk/drivers/block/ps3vram.c
@@ -596,7 +596,7 @@ static struct bio *ps3vram_do_bio(struct ps3_system_bus_device *dev,
 	return next;
 }
 
-static void ps3vram_make_request(struct request_queue *q, struct bio *bio)
+static int ps3vram_make_request(struct request_queue *q, struct bio *bio)
 {
 	struct ps3_system_bus_device *dev = q->queuedata;
 	struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
@@ -610,11 +610,13 @@ static void ps3vram_make_request(struct request_queue *q, struct bio *bio)
 	spin_unlock_irq(&priv->lock);
 
 	if (busy)
-		return;
+		return 0;
 
 	do {
 		bio = ps3vram_do_bio(dev, bio);
 	} while (bio);
+
+	return 0;
 }
 
 static int __devinit ps3vram_probe(struct ps3_system_bus_device *dev)
diff --git a/trunk/drivers/block/umem.c b/trunk/drivers/block/umem.c
index aa2712060bfb..031ca720d926 100644
--- a/trunk/drivers/block/umem.c
+++ b/trunk/drivers/block/umem.c
@@ -513,7 +513,7 @@ static void process_page(unsigned long data)
 	}
 }
 
-static void mm_make_request(struct request_queue *q, struct bio *bio)
+static int mm_make_request(struct request_queue *q, struct bio *bio)
 {
 	struct cardinfo *card = q->queuedata;
 	pr_debug("mm_make_request %llu %u\n",
@@ -525,7 +525,7 @@ static void mm_make_request(struct request_queue *q, struct bio *bio)
 	card->biotail = &bio->bi_next;
 	spin_unlock_irq(&card->lock);
 
-	return;
+	return 0;
 }
 
 static irqreturn_t mm_interrupt(int irq, void *__card)
diff --git a/trunk/drivers/md/dm.c b/trunk/drivers/md/dm.c
index 7b986e77b75e..52b39f335bb3 100644
--- a/trunk/drivers/md/dm.c
+++ b/trunk/drivers/md/dm.c
@@ -180,6 +180,9 @@ struct mapped_device {
 	/* forced geometry settings */
 	struct hd_geometry geometry;
 
+	/* For saving the address of __make_request for request based dm */
+	make_request_fn *saved_make_request_fn;
+
 	/* sysfs handle */
 	struct kobject kobj;
 
@@ -1388,7 +1391,7 @@ static int dm_merge_bvec(struct request_queue *q,
  * The request function that just remaps the bio built up by
  * dm_merge_bvec.
  */
-static void _dm_request(struct request_queue *q, struct bio *bio)
+static int _dm_request(struct request_queue *q, struct bio *bio)
 {
 	int rw = bio_data_dir(bio);
 	struct mapped_device *md = q->queuedata;
@@ -1409,12 +1412,19 @@ static void _dm_request(struct request_queue *q, struct bio *bio)
 			queue_io(md, bio);
 		else
 			bio_io_error(bio);
-		return;
+		return 0;
 	}
 
 	__split_and_process_bio(md, bio);
 	up_read(&md->io_lock);
-	return;
+	return 0;
+}
+
+static int dm_make_request(struct request_queue *q, struct bio *bio)
+{
+	struct mapped_device *md = q->queuedata;
+
+	return md->saved_make_request_fn(q, bio); /* call __make_request() */
 }
 
 static int dm_request_based(struct mapped_device *md)
@@ -1422,14 +1432,14 @@ static int dm_request_based(struct mapped_device *md)
 	return blk_queue_stackable(md->queue);
 }
 
-static void dm_request(struct request_queue *q, struct bio *bio)
+static int dm_request(struct request_queue *q, struct bio *bio)
 {
 	struct mapped_device *md = q->queuedata;
 
 	if (dm_request_based(md))
-		blk_queue_bio(q, bio);
-	else
-		_dm_request(q, bio);
+		return dm_make_request(q, bio);
+
+	return _dm_request(q, bio);
 }
 
 void dm_dispatch_request(struct request *rq)
@@ -2162,6 +2172,7 @@ static int dm_init_request_based_queue(struct mapped_device *md)
 		return 0;
 
 	md->queue = q;
+	md->saved_make_request_fn = md->queue->make_request_fn;
 	dm_init_md_queue(md);
 	blk_queue_softirq_done(md->queue, dm_softirq_done);
 	blk_queue_prep_rq(md->queue, dm_prep_fn);
diff --git a/trunk/drivers/md/faulty.c b/trunk/drivers/md/faulty.c
index 5ef304d4341c..23078dabb6df 100644
--- a/trunk/drivers/md/faulty.c
+++ b/trunk/drivers/md/faulty.c
@@ -169,7 +169,7 @@ static void add_sector(conf_t *conf, sector_t start, int mode)
 		conf->nfaults = n+1;
 }
 
-static void make_request(mddev_t *mddev, struct bio *bio)
+static int make_request(mddev_t *mddev, struct bio *bio)
 {
 	conf_t *conf = mddev->private;
 	int failit = 0;
@@ -181,7 +181,7 @@ static void make_request(mddev_t *mddev, struct bio *bio)
 			 * just fail immediately
 			 */
 			bio_endio(bio, -EIO);
-			return;
+			return 0;
 		}
 
 		if (check_sector(conf, bio->bi_sector, bio->bi_sector+(bio->bi_size>>9),
@@ -211,15 +211,15 @@ static void make_request(mddev_t *mddev, struct bio *bio)
 	}
 	if (failit) {
 		struct bio *b = bio_clone_mddev(bio, GFP_NOIO, mddev);
-
 		b->bi_bdev = conf->rdev->bdev;
 		b->bi_private = bio;
 		b->bi_end_io = faulty_fail;
-		bio = b;
-	} else
+		generic_make_request(b);
+		return 0;
+	} else {
 		bio->bi_bdev = conf->rdev->bdev;
-
-	generic_make_request(bio);
+		return 1;
+	}
 }
 
 static void status(struct seq_file *seq, mddev_t *mddev)
diff --git a/trunk/drivers/md/linear.c b/trunk/drivers/md/linear.c
index c6ee491d98e7..6cd2c313e800 100644
--- a/trunk/drivers/md/linear.c
+++ b/trunk/drivers/md/linear.c
@@ -264,14 +264,14 @@ static int linear_stop (mddev_t *mddev)
 	return 0;
 }
 
-static void linear_make_request (mddev_t *mddev, struct bio *bio)
+static int linear_make_request (mddev_t *mddev, struct bio *bio)
 {
 	dev_info_t *tmp_dev;
 	sector_t start_sector;
 
 	if (unlikely(bio->bi_rw & REQ_FLUSH)) {
 		md_flush_request(mddev, bio);
-		return;
+		return 0;
 	}
 
 	rcu_read_lock();
@@ -293,7 +293,7 @@ static void linear_make_request (mddev_t *mddev, struct bio *bio)
 		       (unsigned long long)start_sector);
 		rcu_read_unlock();
 		bio_io_error(bio);
-		return;
+		return 0;
 	}
 	if (unlikely(bio->bi_sector + (bio->bi_size >> 9) >
 		     tmp_dev->end_sector)) {
@@ -307,17 +307,20 @@ static void linear_make_request (mddev_t *mddev, struct bio *bio)
 
 		bp = bio_split(bio, end_sector - bio->bi_sector);
 
-		linear_make_request(mddev, &bp->bio1);
-		linear_make_request(mddev, &bp->bio2);
+		if (linear_make_request(mddev, &bp->bio1))
+			generic_make_request(&bp->bio1);
+		if (linear_make_request(mddev, &bp->bio2))
+			generic_make_request(&bp->bio2);
 		bio_pair_release(bp);
-		return;
+		return 0;
 	}
 		    
 	bio->bi_bdev = tmp_dev->rdev->bdev;
 	bio->bi_sector = bio->bi_sector - start_sector
 		+ tmp_dev->rdev->data_offset;
 	rcu_read_unlock();
-	generic_make_request(bio);
+
+	return 1;
 }
 
 static void linear_status (struct seq_file *seq, mddev_t *mddev)
diff --git a/trunk/drivers/md/md.c b/trunk/drivers/md/md.c
index 8f52d4eb78a0..5c95ccb59500 100644
--- a/trunk/drivers/md/md.c
+++ b/trunk/drivers/md/md.c
@@ -335,17 +335,18 @@ static DEFINE_SPINLOCK(all_mddevs_lock);
  * call has finished, the bio has been linked into some internal structure
  * and so is visible to ->quiesce(), so we don't need the refcount any more.
  */
-static void md_make_request(struct request_queue *q, struct bio *bio)
+static int md_make_request(struct request_queue *q, struct bio *bio)
 {
 	const int rw = bio_data_dir(bio);
 	mddev_t *mddev = q->queuedata;
+	int rv;
 	int cpu;
 	unsigned int sectors;
 
 	if (mddev == NULL || mddev->pers == NULL
 	    || !mddev->ready) {
 		bio_io_error(bio);
-		return;
+		return 0;
 	}
 	smp_rmb(); /* Ensure implications of  'active' are visible */
 	rcu_read_lock();
@@ -370,7 +371,7 @@ static void md_make_request(struct request_queue *q, struct bio *bio)
 	 * go away inside make_request
 	 */
 	sectors = bio_sectors(bio);
-	mddev->pers->make_request(mddev, bio);
+	rv = mddev->pers->make_request(mddev, bio);
 
 	cpu = part_stat_lock();
 	part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
@@ -379,6 +380,8 @@ static void md_make_request(struct request_queue *q, struct bio *bio)
 
 	if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
 		wake_up(&mddev->sb_wait);
+
+	return rv;
 }
 
 /* mddev_suspend makes sure no new requests are submitted
@@ -477,7 +480,8 @@ static void md_submit_flush_data(struct work_struct *ws)
 		bio_endio(bio, 0);
 	else {
 		bio->bi_rw &= ~REQ_FLUSH;
-		mddev->pers->make_request(mddev, bio);
+		if (mddev->pers->make_request(mddev, bio))
+			generic_make_request(bio);
 	}
 
 	mddev->flush_bio = NULL;
diff --git a/trunk/drivers/md/md.h b/trunk/drivers/md/md.h
index 1509a3eb9ae1..0a309dc29b45 100644
--- a/trunk/drivers/md/md.h
+++ b/trunk/drivers/md/md.h
@@ -424,7 +424,7 @@ struct mdk_personality
 	int level;
 	struct list_head list;
 	struct module *owner;
-	void (*make_request)(mddev_t *mddev, struct bio *bio);
+	int (*make_request)(mddev_t *mddev, struct bio *bio);
 	int (*run)(mddev_t *mddev);
 	int (*stop)(mddev_t *mddev);
 	void (*status)(struct seq_file *seq, mddev_t *mddev);
diff --git a/trunk/drivers/md/multipath.c b/trunk/drivers/md/multipath.c
index 618dd9e22513..d5b5fb300171 100644
--- a/trunk/drivers/md/multipath.c
+++ b/trunk/drivers/md/multipath.c
@@ -106,7 +106,7 @@ static void multipath_end_request(struct bio *bio, int error)
 	rdev_dec_pending(rdev, conf->mddev);
 }
 
-static void multipath_make_request(mddev_t *mddev, struct bio * bio)
+static int multipath_make_request(mddev_t *mddev, struct bio * bio)
 {
 	multipath_conf_t *conf = mddev->private;
 	struct multipath_bh * mp_bh;
@@ -114,7 +114,7 @@ static void multipath_make_request(mddev_t *mddev, struct bio * bio)
 
 	if (unlikely(bio->bi_rw & REQ_FLUSH)) {
 		md_flush_request(mddev, bio);
-		return;
+		return 0;
 	}
 
 	mp_bh = mempool_alloc(conf->pool, GFP_NOIO);
@@ -126,7 +126,7 @@ static void multipath_make_request(mddev_t *mddev, struct bio * bio)
 	if (mp_bh->path < 0) {
 		bio_endio(bio, -EIO);
 		mempool_free(mp_bh, conf->pool);
-		return;
+		return 0;
 	}
 	multipath = conf->multipaths + mp_bh->path;
 
@@ -137,7 +137,7 @@ static void multipath_make_request(mddev_t *mddev, struct bio * bio)
 	mp_bh->bio.bi_end_io = multipath_end_request;
 	mp_bh->bio.bi_private = mp_bh;
 	generic_make_request(&mp_bh->bio);
-	return;
+	return 0;
 }
 
 static void multipath_status (struct seq_file *seq, mddev_t *mddev)
diff --git a/trunk/drivers/md/raid0.c b/trunk/drivers/md/raid0.c
index 4066615d61af..e86bf3682e1e 100644
--- a/trunk/drivers/md/raid0.c
+++ b/trunk/drivers/md/raid0.c
@@ -466,7 +466,7 @@ static inline int is_io_in_chunk_boundary(mddev_t *mddev,
 	}
 }
 
-static void raid0_make_request(mddev_t *mddev, struct bio *bio)
+static int raid0_make_request(mddev_t *mddev, struct bio *bio)
 {
 	unsigned int chunk_sects;
 	sector_t sector_offset;
@@ -475,7 +475,7 @@ static void raid0_make_request(mddev_t *mddev, struct bio *bio)
 
 	if (unlikely(bio->bi_rw & REQ_FLUSH)) {
 		md_flush_request(mddev, bio);
-		return;
+		return 0;
 	}
 
 	chunk_sects = mddev->chunk_sectors;
@@ -495,10 +495,13 @@ static void raid0_make_request(mddev_t *mddev, struct bio *bio)
 		else
 			bp = bio_split(bio, chunk_sects -
 				       sector_div(sector, chunk_sects));
-		raid0_make_request(mddev, &bp->bio1);
-		raid0_make_request(mddev, &bp->bio2);
+		if (raid0_make_request(mddev, &bp->bio1))
+			generic_make_request(&bp->bio1);
+		if (raid0_make_request(mddev, &bp->bio2))
+			generic_make_request(&bp->bio2);
+
 		bio_pair_release(bp);
-		return;
+		return 0;
 	}
 
 	sector_offset = bio->bi_sector;
@@ -508,9 +511,10 @@ static void raid0_make_request(mddev_t *mddev, struct bio *bio)
 	bio->bi_bdev = tmp_dev->bdev;
 	bio->bi_sector = sector_offset + zone->dev_start +
 		tmp_dev->data_offset;
-
-	generic_make_request(bio);
-	return;
+	/*
+	 * Let the main block layer submit the IO and resolve recursion:
+	 */
+	return 1;
 
 bad_map:
 	printk("md/raid0:%s: make_request bug: can't convert block across chunks"
@@ -519,7 +523,7 @@ static void raid0_make_request(mddev_t *mddev, struct bio *bio)
 	       (unsigned long long)bio->bi_sector, bio->bi_size >> 10);
 
 	bio_io_error(bio);
-	return;
+	return 0;
 }
 
 static void raid0_status(struct seq_file *seq, mddev_t *mddev)
diff --git a/trunk/drivers/md/raid1.c b/trunk/drivers/md/raid1.c
index 2948a520f7ba..d9587dffe533 100644
--- a/trunk/drivers/md/raid1.c
+++ b/trunk/drivers/md/raid1.c
@@ -785,7 +785,7 @@ static void alloc_behind_pages(struct bio *bio, r1bio_t *r1_bio)
 	PRINTK("%dB behind alloc failed, doing sync I/O\n", bio->bi_size);
 }
 
-static void make_request(mddev_t *mddev, struct bio * bio)
+static int make_request(mddev_t *mddev, struct bio * bio)
 {
 	conf_t *conf = mddev->private;
 	mirror_info_t *mirror;
@@ -870,7 +870,7 @@ static void make_request(mddev_t *mddev, struct bio * bio)
 		if (rdisk < 0) {
 			/* couldn't find anywhere to read from */
 			raid_end_bio_io(r1_bio);
-			return;
+			return 0;
 		}
 		mirror = conf->mirrors + rdisk;
 
@@ -928,7 +928,7 @@ static void make_request(mddev_t *mddev, struct bio * bio)
 			goto read_again;
 		} else
 			generic_make_request(read_bio);
-		return;
+		return 0;
 	}
 
 	/*
@@ -1123,6 +1123,8 @@ static void make_request(mddev_t *mddev, struct bio * bio)
 
 	if (do_sync || !bitmap || !plugged)
 		md_wakeup_thread(mddev->thread);
+
+	return 0;
 }
 
 static void status(struct seq_file *seq, mddev_t *mddev)
@@ -2172,6 +2174,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 		bio->bi_next = NULL;
 		bio->bi_flags &= ~(BIO_POOL_MASK-1);
 		bio->bi_flags |= 1 << BIO_UPTODATE;
+		bio->bi_comp_cpu = -1;
 		bio->bi_rw = READ;
 		bio->bi_vcnt = 0;
 		bio->bi_idx = 0;
diff --git a/trunk/drivers/md/raid10.c b/trunk/drivers/md/raid10.c
index ea5fc0b6a84c..0cd9672cf9cb 100644
--- a/trunk/drivers/md/raid10.c
+++ b/trunk/drivers/md/raid10.c
@@ -830,7 +830,7 @@ static void unfreeze_array(conf_t *conf)
 	spin_unlock_irq(&conf->resync_lock);
 }
 
-static void make_request(mddev_t *mddev, struct bio * bio)
+static int make_request(mddev_t *mddev, struct bio * bio)
 {
 	conf_t *conf = mddev->private;
 	mirror_info_t *mirror;
@@ -849,7 +849,7 @@ static void make_request(mddev_t *mddev, struct bio * bio)
 
 	if (unlikely(bio->bi_rw & REQ_FLUSH)) {
 		md_flush_request(mddev, bio);
-		return;
+		return 0;
 	}
 
 	/* If this request crosses a chunk boundary, we need to
@@ -881,8 +881,10 @@ static void make_request(mddev_t *mddev, struct bio * bio)
 		conf->nr_waiting++;
 		spin_unlock_irq(&conf->resync_lock);
 
-		make_request(mddev, &bp->bio1);
-		make_request(mddev, &bp->bio2);
+		if (make_request(mddev, &bp->bio1))
+			generic_make_request(&bp->bio1);
+		if (make_request(mddev, &bp->bio2))
+			generic_make_request(&bp->bio2);
 
 		spin_lock_irq(&conf->resync_lock);
 		conf->nr_waiting--;
@@ -890,14 +892,14 @@ static void make_request(mddev_t *mddev, struct bio * bio)
 		spin_unlock_irq(&conf->resync_lock);
 
 		bio_pair_release(bp);
-		return;
+		return 0;
 	bad_map:
 		printk("md/raid10:%s: make_request bug: can't convert block across chunks"
 		       " or bigger than %dk %llu %d\n", mdname(mddev), chunk_sects/2,
 		       (unsigned long long)bio->bi_sector, bio->bi_size >> 10);
 
 		bio_io_error(bio);
-		return;
+		return 0;
 	}
 
 	md_write_start(mddev, bio);
@@ -940,7 +942,7 @@ static void make_request(mddev_t *mddev, struct bio * bio)
 		slot = r10_bio->read_slot;
 		if (disk < 0) {
 			raid_end_bio_io(r10_bio);
-			return;
+			return 0;
 		}
 		mirror = conf->mirrors + disk;
 
@@ -988,7 +990,7 @@ static void make_request(mddev_t *mddev, struct bio * bio)
 			goto read_again;
 		} else
 			generic_make_request(read_bio);
-		return;
+		return 0;
 	}
 
 	/*
@@ -1156,6 +1158,7 @@ static void make_request(mddev_t *mddev, struct bio * bio)
 
 	if (do_sync || !mddev->bitmap || !plugged)
 		md_wakeup_thread(mddev->thread);
+	return 0;
 }
 
 static void status(struct seq_file *seq, mddev_t *mddev)
diff --git a/trunk/drivers/md/raid5.c b/trunk/drivers/md/raid5.c
index 83f2c44e170f..ac5e8b57e50f 100644
--- a/trunk/drivers/md/raid5.c
+++ b/trunk/drivers/md/raid5.c
@@ -3695,7 +3695,7 @@ static struct stripe_head *__get_priority_stripe(raid5_conf_t *conf)
 	return sh;
 }
 
-static void make_request(mddev_t *mddev, struct bio * bi)
+static int make_request(mddev_t *mddev, struct bio * bi)
 {
 	raid5_conf_t *conf = mddev->private;
 	int dd_idx;
@@ -3708,7 +3708,7 @@ static void make_request(mddev_t *mddev, struct bio * bi)
 
 	if (unlikely(bi->bi_rw & REQ_FLUSH)) {
 		md_flush_request(mddev, bi);
-		return;
+		return 0;
 	}
 
 	md_write_start(mddev, bi);
@@ -3716,7 +3716,7 @@ static void make_request(mddev_t *mddev, struct bio * bi)
 	if (rw == READ &&
 	     mddev->reshape_position == MaxSector &&
 	     chunk_aligned_read(mddev,bi))
-		return;
+		return 0;
 
 	logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1);
 	last_sector = bi->bi_sector + (bi->bi_size>>9);
@@ -3851,6 +3851,8 @@ static void make_request(mddev_t *mddev, struct bio * bi)
 
 		bio_endio(bi, 0);
 	}
+
+	return 0;
 }
 
 static sector_t raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks);
diff --git a/trunk/drivers/s390/block/dcssblk.c b/trunk/drivers/s390/block/dcssblk.c
index a5a55da2a1ac..9b43ae94beba 100644
--- a/trunk/drivers/s390/block/dcssblk.c
+++ b/trunk/drivers/s390/block/dcssblk.c
@@ -27,7 +27,7 @@
 
 static int dcssblk_open(struct block_device *bdev, fmode_t mode);
 static int dcssblk_release(struct gendisk *disk, fmode_t mode);
-static void dcssblk_make_request(struct request_queue *q, struct bio *bio);
+static int dcssblk_make_request(struct request_queue *q, struct bio *bio);
 static int dcssblk_direct_access(struct block_device *bdev, sector_t secnum,
 				 void **kaddr, unsigned long *pfn);
 
@@ -814,7 +814,7 @@ dcssblk_release(struct gendisk *disk, fmode_t mode)
 	return rc;
 }
 
-static void
+static int
 dcssblk_make_request(struct request_queue *q, struct bio *bio)
 {
 	struct dcssblk_dev_info *dev_info;
@@ -871,9 +871,10 @@ dcssblk_make_request(struct request_queue *q, struct bio *bio)
 		bytes_done += bvec->bv_len;
 	}
 	bio_endio(bio, 0);
-	return;
+	return 0;
 fail:
 	bio_io_error(bio);
+	return 0;
 }
 
 static int
diff --git a/trunk/drivers/s390/block/xpram.c b/trunk/drivers/s390/block/xpram.c
index 98f3e4ade924..1f6a4d894e73 100644
--- a/trunk/drivers/s390/block/xpram.c
+++ b/trunk/drivers/s390/block/xpram.c
@@ -181,7 +181,7 @@ static unsigned long xpram_highest_page_index(void)
 /*
  * Block device make request function.
  */
-static void xpram_make_request(struct request_queue *q, struct bio *bio)
+static int xpram_make_request(struct request_queue *q, struct bio *bio)
 {
 	xpram_device_t *xdev = bio->bi_bdev->bd_disk->private_data;
 	struct bio_vec *bvec;
@@ -221,9 +221,10 @@ static void xpram_make_request(struct request_queue *q, struct bio *bio)
 	}
 	set_bit(BIO_UPTODATE, &bio->bi_flags);
 	bio_endio(bio, 0);
-	return;
+	return 0;
 fail:
 	bio_io_error(bio);
+	return 0;
 }
 
 static int xpram_getgeo(struct block_device *bdev, struct hd_geometry *geo)
diff --git a/trunk/drivers/staging/zram/zram_drv.c b/trunk/drivers/staging/zram/zram_drv.c
index 02589cab6710..d70ec1ad10de 100644
--- a/trunk/drivers/staging/zram/zram_drv.c
+++ b/trunk/drivers/staging/zram/zram_drv.c
@@ -556,22 +556,24 @@ static inline int valid_io_request(struct zram *zram, struct bio *bio)
 /*
  * Handler function for all zram I/O requests.
  */
-static void zram_make_request(struct request_queue *queue, struct bio *bio)
+static int zram_make_request(struct request_queue *queue, struct bio *bio)
 {
 	struct zram *zram = queue->queuedata;
 
 	if (!valid_io_request(zram, bio)) {
 		zram_stat64_inc(zram, &zram->stats.invalid_io);
 		bio_io_error(bio);
-		return;
+		return 0;
 	}
 
 	if (unlikely(!zram->init_done) && zram_init_device(zram)) {
 		bio_io_error(bio);
-		return;
+		return 0;
 	}
 
 	__zram_make_request(zram, bio, bio_data_dir(bio));
+
+	return 0;
 }
 
 void zram_reset_device(struct zram *zram)
diff --git a/trunk/fs/bio.c b/trunk/fs/bio.c
index 41c93c722244..9bfade8a609b 100644
--- a/trunk/fs/bio.c
+++ b/trunk/fs/bio.c
@@ -255,6 +255,7 @@ void bio_init(struct bio *bio)
 {
 	memset(bio, 0, sizeof(*bio));
 	bio->bi_flags = 1 << BIO_UPTODATE;
+	bio->bi_comp_cpu = -1;
 	atomic_set(&bio->bi_cnt, 1);
 }
 EXPORT_SYMBOL(bio_init);
diff --git a/trunk/fs/block_dev.c b/trunk/fs/block_dev.c
index 1c44b8d54504..95f786ec7f08 100644
--- a/trunk/fs/block_dev.c
+++ b/trunk/fs/block_dev.c
@@ -1085,7 +1085,6 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
 static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 {
 	struct gendisk *disk;
-	struct module *owner;
 	int ret;
 	int partno;
 	int perm = 0;
@@ -1111,7 +1110,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 	disk = get_gendisk(bdev->bd_dev, &partno);
 	if (!disk)
 		goto out;
-	owner = disk->fops->owner;
 
 	disk_block_events(disk);
 	mutex_lock_nested(&bdev->bd_mutex, for_part);
@@ -1139,8 +1137,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 					bdev->bd_disk = NULL;
 					mutex_unlock(&bdev->bd_mutex);
 					disk_unblock_events(disk);
+					module_put(disk->fops->owner);
 					put_disk(disk);
-					module_put(owner);
 					goto restart;
 				}
 			}
@@ -1196,8 +1194,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 				goto out_unlock_bdev;
 		}
 		/* only one opener holds refs to the module and disk */
+		module_put(disk->fops->owner);
 		put_disk(disk);
-		module_put(owner);
 	}
 	bdev->bd_openers++;
 	if (for_part)
@@ -1217,8 +1215,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
  out_unlock_bdev:
 	mutex_unlock(&bdev->bd_mutex);
 	disk_unblock_events(disk);
+	module_put(disk->fops->owner);
 	put_disk(disk);
-	module_put(owner);
  out:
 	bdput(bdev);
 
@@ -1444,15 +1442,14 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
 	if (!bdev->bd_openers) {
 		struct module *owner = disk->fops->owner;
 
+		put_disk(disk);
+		module_put(owner);
 		disk_put_part(bdev->bd_part);
 		bdev->bd_part = NULL;
 		bdev->bd_disk = NULL;
 		if (bdev != bdev->bd_contains)
 			victim = bdev->bd_contains;
 		bdev->bd_contains = NULL;
-
-		put_disk(disk);
-		module_put(owner);
 	}
 	mutex_unlock(&bdev->bd_mutex);
 	bdput(bdev);
diff --git a/trunk/include/linux/bio.h b/trunk/include/linux/bio.h
index a3c071c9e189..ce33e6868a2f 100644
--- a/trunk/include/linux/bio.h
+++ b/trunk/include/linux/bio.h
@@ -268,6 +268,14 @@ extern struct bio_vec *bvec_alloc_bs(gfp_t, int, unsigned long *, struct bio_set
 extern void bvec_free_bs(struct bio_set *, struct bio_vec *, unsigned int);
 extern unsigned int bvec_nr_vecs(unsigned short idx);
 
+/*
+ * Allow queuer to specify a completion CPU for this bio
+ */
+static inline void bio_set_completion_cpu(struct bio *bio, unsigned int cpu)
+{
+	bio->bi_comp_cpu = cpu;
+}
+
 /*
  * bio_set is used to allow other portions of the IO system to
  * allocate their own private memory pools for bio and iovec structures.
diff --git a/trunk/include/linux/blk_types.h b/trunk/include/linux/blk_types.h
index 4053cbd4490e..71fc53bb8f1c 100644
--- a/trunk/include/linux/blk_types.h
+++ b/trunk/include/linux/blk_types.h
@@ -59,6 +59,8 @@ struct bio {
 
 	unsigned int		bi_max_vecs;	/* max bvl_vecs we can hold */
 
+	unsigned int		bi_comp_cpu;	/* completion CPU */
+
 	atomic_t		bi_cnt;		/* pin count */
 
 	struct bio_vec		*bi_io_vec;	/* the actual vec list */
@@ -91,10 +93,11 @@ struct bio {
 #define BIO_BOUNCED	5	/* bio is a bounce bio */
 #define BIO_USER_MAPPED 6	/* contains user pages */
 #define BIO_EOPNOTSUPP	7	/* not supported */
-#define BIO_NULL_MAPPED 8	/* contains invalid user pages */
-#define BIO_FS_INTEGRITY 9	/* fs owns integrity data, not block layer */
-#define BIO_QUIET	10	/* Make BIO Quiet */
-#define BIO_MAPPED_INTEGRITY 11/* integrity metadata has been remapped */
+#define BIO_CPU_AFFINE	8	/* complete bio on same CPU as submitted */
+#define BIO_NULL_MAPPED 9	/* contains invalid user pages */
+#define BIO_FS_INTEGRITY 10	/* fs owns integrity data, not block layer */
+#define BIO_QUIET	11	/* Make BIO Quiet */
+#define BIO_MAPPED_INTEGRITY 12/* integrity metadata has been remapped */
 #define bio_flagged(bio, flag)	((bio)->bi_flags & (1 << (flag)))
 
 /*
diff --git a/trunk/include/linux/blkdev.h b/trunk/include/linux/blkdev.h
index 5267cd2f20dc..7fbaa9103344 100644
--- a/trunk/include/linux/blkdev.h
+++ b/trunk/include/linux/blkdev.h
@@ -195,7 +195,7 @@ struct request_pm_state
 #include <linux/elevator.h>
 
 typedef void (request_fn_proc) (struct request_queue *q);
-typedef void (make_request_fn) (struct request_queue *q, struct bio *bio);
+typedef int (make_request_fn) (struct request_queue *q, struct bio *bio);
 typedef int (prep_rq_fn) (struct request_queue *, struct request *);
 typedef void (unprep_rq_fn) (struct request_queue *, struct request *);
 
@@ -680,8 +680,6 @@ extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t,
 extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t,
 			 struct scsi_ioctl_command __user *);
 
-extern void blk_queue_bio(struct request_queue *q, struct bio *bio);
-
 /*
  * A queue has just exitted congestion.  Note this in the global counter of
  * congested queues, and wake up anyone who was waiting for requests to be
@@ -865,22 +863,16 @@ struct request_queue *blk_alloc_queue_node(gfp_t, int);
 extern void blk_put_queue(struct request_queue *);
 
 /*
- * blk_plug permits building a queue of related requests by holding the I/O
- * fragments for a short period. This allows merging of sequential requests
- * into single larger request. As the requests are moved from a per-task list to
- * the device's request_queue in a batch, this results in improved scalability
- * as the lock contention for request_queue lock is reduced.
- *
- * It is ok not to disable preemption when adding the request to the plug list
- * or when attempting a merge, because blk_schedule_flush_list() will only flush
- * the plug list when the task sleeps by itself. For details, please see
- * schedule() where blk_schedule_flush_plug() is called.
+ * Note: Code in between changing the blk_plug list/cb_list or element of such
+ * lists is preemptable, but such code can't do sleep (or be very careful),
+ * otherwise data is corrupted. For details, please check schedule() where
+ * blk_schedule_flush_plug() is called.
  */
 struct blk_plug {
-	unsigned long magic; /* detect uninitialized use-cases */
-	struct list_head list; /* requests */
-	struct list_head cb_list; /* md requires an unplug callback */
-	unsigned int should_sort; /* list to be sorted before flushing? */
+	unsigned long magic;
+	struct list_head list;
+	struct list_head cb_list;
+	unsigned int should_sort;
 };
 #define BLK_MAX_REQUEST_COUNT 16
 
@@ -1197,6 +1189,20 @@ static inline uint64_t rq_io_start_time_ns(struct request *req)
 }
 #endif
 
+#ifdef CONFIG_BLK_DEV_THROTTLING
+extern int blk_throtl_init(struct request_queue *q);
+extern void blk_throtl_exit(struct request_queue *q);
+extern int blk_throtl_bio(struct request_queue *q, struct bio **bio);
+#else /* CONFIG_BLK_DEV_THROTTLING */
+static inline int blk_throtl_bio(struct request_queue *q, struct bio **bio)
+{
+	return 0;
+}
+
+static inline int blk_throtl_init(struct request_queue *q) { return 0; }
+static inline int blk_throtl_exit(struct request_queue *q) { return 0; }
+#endif /* CONFIG_BLK_DEV_THROTTLING */
+
 #define MODULE_ALIAS_BLOCKDEV(major,minor) \
 	MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor))
 #define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \
diff --git a/trunk/include/linux/elevator.h b/trunk/include/linux/elevator.h
index 1d0f7a2ff73b..d800d5142184 100644
--- a/trunk/include/linux/elevator.h
+++ b/trunk/include/linux/elevator.h
@@ -38,12 +38,6 @@ struct elevator_ops
 	elevator_merged_fn *elevator_merged_fn;
 	elevator_merge_req_fn *elevator_merge_req_fn;
 	elevator_allow_merge_fn *elevator_allow_merge_fn;
-
-	/*
-	 * Used for both plugged list and elevator merging and in the
-	 * former case called without queue_lock.  Read comment on top of
-	 * attempt_plug_merge() for details.
-	 */
 	elevator_bio_merged_fn *elevator_bio_merged_fn;
 
 	elevator_dispatch_fn *elevator_dispatch_fn;
diff --git a/trunk/include/linux/loop.h b/trunk/include/linux/loop.h
index 683d69890119..a06880689115 100644
--- a/trunk/include/linux/loop.h
+++ b/trunk/include/linux/loop.h
@@ -73,7 +73,6 @@ struct loop_device {
  */
 enum {
 	LO_FLAGS_READ_ONLY	= 1,
-	LO_FLAGS_USE_AOPS	= 2,
 	LO_FLAGS_AUTOCLEAR	= 4,
 };
 
diff --git a/trunk/kernel/sys.c b/trunk/kernel/sys.c
index 1dbbe695a5ef..18ee1d2f6474 100644
--- a/trunk/kernel/sys.c
+++ b/trunk/kernel/sys.c
@@ -1172,7 +1172,7 @@ DECLARE_RWSEM(uts_sem);
 static int override_release(char __user *release, int len)
 {
 	int ret = 0;
-	char buf[65];
+	char buf[len];
 
 	if (current->personality & UNAME26) {
 		char *rest = UTS_RELEASE;
diff --git a/trunk/mm/bounce.c b/trunk/mm/bounce.c
index 434fb4f0c5e4..1481de68184b 100644
--- a/trunk/mm/bounce.c
+++ b/trunk/mm/bounce.c
@@ -14,7 +14,6 @@
 #include <linux/init.h>
 #include <linux/hash.h>
 #include <linux/highmem.h>
-#include <linux/bootmem.h>
 #include <asm/tlbflush.h>
 
 #include <trace/events/block.h>
@@ -27,10 +26,12 @@ static mempool_t *page_pool, *isa_page_pool;
 #ifdef CONFIG_HIGHMEM
 static __init int init_emergency_pool(void)
 {
-#ifndef CONFIG_MEMORY_HOTPLUG
-	if (max_pfn <= max_low_pfn)
+	struct sysinfo i;
+	si_meminfo(&i);
+	si_swapinfo(&i);
+
+	if (!i.totalhigh)
 		return 0;
-#endif
 
 	page_pool = mempool_create_page_pool(POOL_SIZE, 0);
 	BUG_ON(!page_pool);