Merge branch 'for-linus' of git://git.kernel.dk/linux-block

Says Jens: "Time to push off some of the pending items. I really wanted to wait until we had the regression nailed, but alas it's not quite there yet. But I'm very confident that it's "just" a missing expire on exit, so fix from Tejun should be fairly trivial. I'm headed out for a week on the slopes. - Killing the barrier part of mtip32xx. It doesn't really support barriers, and it doesn't need them (writes are fully ordered). - A few fixes from Dan Carpenter, preventing overflows of integer multiplication. - A fixup for loop, fixing a previous commit that didn't quite solve the partial read problem from Dave Young. - A bio integer overflow fix from Kent Overstreet. - Improvement/fix of the door "keep locked" part of the cdrom shared code from Paolo Benzini. - A few cfq fixes from Shaohua Li. - A fix for bsg sysfs warning when removing a file it did not create from Stanislaw Gruszka. - Two fixes for floppy from Vivek, preventing a crash. - A few block core fixes from Tejun. One killing the over-optimized ioc exit path, cleaning that up nicely. Two others fixing an oops on elevator switch, due to calling into the scheduler merge check code without holding the queue lock." * 'for-linus' of git://git.kernel.dk/linux-block: block: fix lockdep warning on io_context release put_io_context() relay: prevent integer overflow in relay_open() loop: zero fill bio instead of return -EIO for partial read bio: don't overflow in bio_get_nr_vecs() floppy: Fix a crash during rmmod floppy: Cleanup disk->queue before caling put_disk() if add_disk() was never called cdrom: move shared static to cdrom_device_info bsg: fix sysfs link remove warning block: don't call elevator callbacks for plug merges block: separate out blk_rq_merge_ok() and blk_try_merge() from elevator functions mtip32xx: removed the irrelevant argument of mtip_hw_submit_io() and the unused member of struct driver_data block: strip out locking optimization in put_io_context() cdrom: use copy_to_user() without the underscores block: fix ioc locking warning block: fix NULL icq_cache reference block,cfq: change code order
mariux64 · Feb 11, 2012 · 3ec1e88 · 3ec1e88
2 parents 8df54d6 + d8c66c5
commit 3ec1e88
Show file tree

Hide file tree

Showing 21 changed files with 155 additions and 233 deletions.
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
@@ -1659,7 +1659,7 @@ static void blkiocg_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
 		ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE);
 		if (ioc) {
 			ioc_cgroup_changed(ioc);
-			put_io_context(ioc, NULL);
+			put_io_context(ioc);
 		}
 	}
 }

diff --git a/block/blk-core.c b/block/blk-core.c
@@ -642,7 +642,7 @@ static inline void blk_free_request(struct request_queue *q, struct request *rq)
 	if (rq->cmd_flags & REQ_ELVPRIV) {
 		elv_put_request(q, rq);
 		if (rq->elv.icq)
-			put_io_context(rq->elv.icq->ioc, q);
+			put_io_context(rq->elv.icq->ioc);
 	}
 
 	mempool_free(rq, q->rq.rq_pool);
@@ -872,13 +872,15 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
 	spin_unlock_irq(q->queue_lock);
 
 	/* create icq if missing */
-	if (unlikely(et->icq_cache && !icq))
+	if ((rw_flags & REQ_ELVPRIV) && unlikely(et->icq_cache && !icq)) {
 		icq = ioc_create_icq(q, gfp_mask);
+		if (!icq)
+			goto fail_icq;
+	}
 
-	/* rqs are guaranteed to have icq on elv_set_request() if requested */
-	if (likely(!et->icq_cache || icq))
-		rq = blk_alloc_request(q, icq, rw_flags, gfp_mask);
+	rq = blk_alloc_request(q, icq, rw_flags, gfp_mask);
 
+fail_icq:
 	if (unlikely(!rq)) {
 		/*
 		 * Allocation failed presumably due to memory. Undo anything
@@ -1210,7 +1212,6 @@ static bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
 	req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
 
 	drive_stat_acct(req, 0);
-	elv_bio_merged(q, req, bio);
 	return true;
 }
 
@@ -1241,7 +1242,6 @@ static bool bio_attempt_front_merge(struct request_queue *q,
 	req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
 
 	drive_stat_acct(req, 0);
-	elv_bio_merged(q, req, bio);
 	return true;
 }
 
@@ -1255,13 +1255,12 @@ static bool bio_attempt_front_merge(struct request_queue *q,
  * on %current's plugged list.  Returns %true if merge was successful,
  * otherwise %false.
  *
- * This function is called without @q->queue_lock; however, elevator is
- * accessed iff there already are requests on the plugged list which in
- * turn guarantees validity of the elevator.
- *
- * Note that, on successful merge, elevator operation
- * elevator_bio_merged_fn() will be called without queue lock.  Elevator
- * must be ready for this.
+ * Plugging coalesces IOs from the same issuer for the same purpose without
+ * going through @q->queue_lock.  As such it's more of an issuing mechanism
+ * than scheduling, and the request, while may have elvpriv data, is not
+ * added on the elevator at this point.  In addition, we don't have
+ * reliable access to the elevator outside queue lock.  Only check basic
+ * merging parameters without querying the elevator.
  */
 static bool attempt_plug_merge(struct request_queue *q, struct bio *bio,
 			       unsigned int *request_count)
@@ -1280,10 +1279,10 @@ static bool attempt_plug_merge(struct request_queue *q, struct bio *bio,
 
 		(*request_count)++;
 
-		if (rq->q != q)
+		if (rq->q != q || !blk_rq_merge_ok(rq, bio))
 			continue;
 
-		el_ret = elv_try_merge(rq, bio);
+		el_ret = blk_try_merge(rq, bio);
 		if (el_ret == ELEVATOR_BACK_MERGE) {
 			ret = bio_attempt_back_merge(q, rq, bio);
 			if (ret)
@@ -1345,12 +1344,14 @@ void blk_queue_bio(struct request_queue *q, struct bio *bio)
 	el_ret = elv_merge(q, &req, bio);
 	if (el_ret == ELEVATOR_BACK_MERGE) {
 		if (bio_attempt_back_merge(q, req, bio)) {
+			elv_bio_merged(q, req, bio);
 			if (!attempt_back_merge(q, req))
 				elv_merged_request(q, req, el_ret);
 			goto out_unlock;
 		}
 	} else if (el_ret == ELEVATOR_FRONT_MERGE) {
 		if (bio_attempt_front_merge(q, req, bio)) {
+			elv_bio_merged(q, req, bio);
 			if (!attempt_front_merge(q, req))
 				elv_merged_request(q, req, el_ret);
 			goto out_unlock;

diff --git a/block/blk-ioc.c b/block/blk-ioc.c
@@ -29,21 +29,6 @@ void get_io_context(struct io_context *ioc)
 }
 EXPORT_SYMBOL(get_io_context);
 
-/*
- * Releasing ioc may nest into another put_io_context() leading to nested
- * fast path release.  As the ioc's can't be the same, this is okay but
- * makes lockdep whine.  Keep track of nesting and use it as subclass.
- */
-#ifdef CONFIG_LOCKDEP
-#define ioc_release_depth(q)		((q) ? (q)->ioc_release_depth : 0)
-#define ioc_release_depth_inc(q)	(q)->ioc_release_depth++
-#define ioc_release_depth_dec(q)	(q)->ioc_release_depth--
-#else
-#define ioc_release_depth(q)		0
-#define ioc_release_depth_inc(q)	do { } while (0)
-#define ioc_release_depth_dec(q)	do { } while (0)
-#endif
-
 static void icq_free_icq_rcu(struct rcu_head *head)
 {
 	struct io_cq *icq = container_of(head, struct io_cq, __rcu_head);
@@ -75,11 +60,8 @@ static void ioc_exit_icq(struct io_cq *icq)
 	if (rcu_dereference_raw(ioc->icq_hint) == icq)
 		rcu_assign_pointer(ioc->icq_hint, NULL);
 
-	if (et->ops.elevator_exit_icq_fn) {
-		ioc_release_depth_inc(q);
+	if (et->ops.elevator_exit_icq_fn)
 		et->ops.elevator_exit_icq_fn(icq);
-		ioc_release_depth_dec(q);
-	}
 
 	/*
 	 * @icq->q might have gone away by the time RCU callback runs
@@ -98,8 +80,15 @@ static void ioc_release_fn(struct work_struct *work)
 	struct io_context *ioc = container_of(work, struct io_context,
 					      release_work);
 	struct request_queue *last_q = NULL;
+	unsigned long flags;
 
-	spin_lock_irq(&ioc->lock);
+	/*
+	 * Exiting icq may call into put_io_context() through elevator
+	 * which will trigger lockdep warning.  The ioc's are guaranteed to
+	 * be different, use a different locking subclass here.  Use
+	 * irqsave variant as there's no spin_lock_irq_nested().
+	 */
+	spin_lock_irqsave_nested(&ioc->lock, flags, 1);
 
 	while (!hlist_empty(&ioc->icq_list)) {
 		struct io_cq *icq = hlist_entry(ioc->icq_list.first,
@@ -121,26 +110,26 @@ static void ioc_release_fn(struct work_struct *work)
 			 */
 			if (last_q) {
 				spin_unlock(last_q->queue_lock);
-				spin_unlock_irq(&ioc->lock);
+				spin_unlock_irqrestore(&ioc->lock, flags);
 				blk_put_queue(last_q);
 			} else {
-				spin_unlock_irq(&ioc->lock);
+				spin_unlock_irqrestore(&ioc->lock, flags);
 			}
 
 			last_q = this_q;
-			spin_lock_irq(this_q->queue_lock);
-			spin_lock(&ioc->lock);
+			spin_lock_irqsave(this_q->queue_lock, flags);
+			spin_lock_nested(&ioc->lock, 1);
 			continue;
 		}
 		ioc_exit_icq(icq);
 	}
 
 	if (last_q) {
 		spin_unlock(last_q->queue_lock);
-		spin_unlock_irq(&ioc->lock);
+		spin_unlock_irqrestore(&ioc->lock, flags);
 		blk_put_queue(last_q);
 	} else {
-		spin_unlock_irq(&ioc->lock);
+		spin_unlock_irqrestore(&ioc->lock, flags);
 	}
 
 	kmem_cache_free(iocontext_cachep, ioc);
@@ -149,79 +138,29 @@ static void ioc_release_fn(struct work_struct *work)
 /**
  * put_io_context - put a reference of io_context
  * @ioc: io_context to put
- * @locked_q: request_queue the caller is holding queue_lock of (hint)
  *
  * Decrement reference count of @ioc and release it if the count reaches
- * zero.  If the caller is holding queue_lock of a queue, it can indicate
- * that with @locked_q.  This is an optimization hint and the caller is
- * allowed to pass in %NULL even when it's holding a queue_lock.
+ * zero.
  */
-void put_io_context(struct io_context *ioc, struct request_queue *locked_q)
+void put_io_context(struct io_context *ioc)
 {
-	struct request_queue *last_q = locked_q;
 	unsigned long flags;
 
 	if (ioc == NULL)
 		return;
 
 	BUG_ON(atomic_long_read(&ioc->refcount) <= 0);
-	if (locked_q)
-		lockdep_assert_held(locked_q->queue_lock);
-
-	if (!atomic_long_dec_and_test(&ioc->refcount))
-		return;
 
 	/*
-	 * Destroy @ioc.  This is a bit messy because icq's are chained
-	 * from both ioc and queue, and ioc->lock nests inside queue_lock.
-	 * The inner ioc->lock should be held to walk our icq_list and then
-	 * for each icq the outer matching queue_lock should be grabbed.
-	 * ie. We need to do reverse-order double lock dancing.
-	 *
-	 * Another twist is that we are often called with one of the
-	 * matching queue_locks held as indicated by @locked_q, which
-	 * prevents performing double-lock dance for other queues.
-	 *
-	 * So, we do it in two stages.  The fast path uses the queue_lock
-	 * the caller is holding and, if other queues need to be accessed,
-	 * uses trylock to avoid introducing locking dependency.  This can
-	 * handle most cases, especially if @ioc was performing IO on only
-	 * single device.
-	 *
-	 * If trylock doesn't cut it, we defer to @ioc->release_work which
-	 * can do all the double-locking dancing.
+	 * Releasing ioc requires reverse order double locking and we may
+	 * already be holding a queue_lock.  Do it asynchronously from wq.
 	 */
-	spin_lock_irqsave_nested(&ioc->lock, flags,
-				 ioc_release_depth(locked_q));
-
-	while (!hlist_empty(&ioc->icq_list)) {
-		struct io_cq *icq = hlist_entry(ioc->icq_list.first,
-						struct io_cq, ioc_node);
-		struct request_queue *this_q = icq->q;
-
-		if (this_q != last_q) {
-			if (last_q && last_q != locked_q)
-				spin_unlock(last_q->queue_lock);
-			last_q = NULL;
-
-			if (!spin_trylock(this_q->queue_lock))
-				break;
-			last_q = this_q;
-			continue;
-		}
-		ioc_exit_icq(icq);
+	if (atomic_long_dec_and_test(&ioc->refcount)) {
+		spin_lock_irqsave(&ioc->lock, flags);
+		if (!hlist_empty(&ioc->icq_list))
+			schedule_work(&ioc->release_work);
+		spin_unlock_irqrestore(&ioc->lock, flags);
 	}
-
-	if (last_q && last_q != locked_q)
-		spin_unlock(last_q->queue_lock);
-
-	spin_unlock_irqrestore(&ioc->lock, flags);
-
-	/* if no icq is left, we're done; otherwise, kick release_work */
-	if (hlist_empty(&ioc->icq_list))
-		kmem_cache_free(iocontext_cachep, ioc);
-	else
-		schedule_work(&ioc->release_work);
 }
 EXPORT_SYMBOL(put_io_context);
 
@@ -236,7 +175,7 @@ void exit_io_context(struct task_struct *task)
 	task_unlock(task);
 
 	atomic_dec(&ioc->nr_tasks);
-	put_io_context(ioc, NULL);
+	put_io_context(ioc);
 }
 
 /**

diff --git a/block/blk-merge.c b/block/blk-merge.c
@@ -471,3 +471,40 @@ int blk_attempt_req_merge(struct request_queue *q, struct request *rq,
 {
 	return attempt_merge(q, rq, next);
 }
+
+bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
+{
+	if (!rq_mergeable(rq))
+		return false;
+
+	/* don't merge file system requests and discard requests */
+	if ((bio->bi_rw & REQ_DISCARD) != (rq->bio->bi_rw & REQ_DISCARD))
+		return false;
+
+	/* don't merge discard requests and secure discard requests */
+	if ((bio->bi_rw & REQ_SECURE) != (rq->bio->bi_rw & REQ_SECURE))
+		return false;
+
+	/* different data direction or already started, don't merge */
+	if (bio_data_dir(bio) != rq_data_dir(rq))
+		return false;
+
+	/* must be same device and not a special request */
+	if (rq->rq_disk != bio->bi_bdev->bd_disk || rq->special)
+		return false;
+
+	/* only merge integrity protected bio into ditto rq */
+	if (bio_integrity(bio) != blk_integrity_rq(rq))
+		return false;
+
+	return true;
+}
+
+int blk_try_merge(struct request *rq, struct bio *bio)
+{
+	if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_sector)
+		return ELEVATOR_BACK_MERGE;
+	else if (blk_rq_pos(rq) - bio_sectors(bio) == bio->bi_sector)
+		return ELEVATOR_FRONT_MERGE;
+	return ELEVATOR_NO_MERGE;
+}
diff --git a/block/blk.h b/block/blk.h
@@ -137,6 +137,8 @@ int blk_attempt_req_merge(struct request_queue *q, struct request *rq,
 				struct request *next);
 void blk_recalc_rq_segments(struct request *rq);
 void blk_rq_set_mixed_merge(struct request *rq);
+bool blk_rq_merge_ok(struct request *rq, struct bio *bio);
+int blk_try_merge(struct request *rq, struct bio *bio);
 
 void blk_queue_congestion_threshold(struct request_queue *q);
 

diff --git a/block/bsg.c b/block/bsg.c
@@ -983,7 +983,8 @@ void bsg_unregister_queue(struct request_queue *q)
 
 	mutex_lock(&bsg_mutex);
 	idr_remove(&bsg_minor_idr, bcd->minor);
-	sysfs_remove_link(&q->kobj, "bsg");
+	if (q->kobj.sd)
+		sysfs_remove_link(&q->kobj, "bsg");
 	device_unregister(bcd->class_dev);
 	bcd->class_dev = NULL;
 	kref_put(&bcd->ref, bsg_kref_release_function);

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
@@ -1699,18 +1699,11 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq,
 
 	/*
 	 * Lookup the cfqq that this bio will be queued with and allow
-	 * merge only if rq is queued there.  This function can be called
-	 * from plug merge without queue_lock.  In such cases, ioc of @rq
-	 * and %current are guaranteed to be equal.  Avoid lookup which
-	 * requires queue_lock by using @rq's cic.
+	 * merge only if rq is queued there.
 	 */
-	if (current->io_context == RQ_CIC(rq)->icq.ioc) {
-		cic = RQ_CIC(rq);
-	} else {
-		cic = cfq_cic_lookup(cfqd, current->io_context);
-		if (!cic)
-			return false;
-	}
+	cic = cfq_cic_lookup(cfqd, current->io_context);
+	if (!cic)
+		return false;
 
 	cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio));
 	return cfqq == RQ_CFQQ(rq);
@@ -1794,7 +1787,7 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 		cfqd->active_queue = NULL;
 
 	if (cfqd->active_cic) {
-		put_io_context(cfqd->active_cic->icq.ioc, cfqd->queue);
+		put_io_context(cfqd->active_cic->icq.ioc);
 		cfqd->active_cic = NULL;
 	}
 }
@@ -3117,17 +3110,18 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
  */
 static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
+	enum wl_type_t old_type = cfqq_type(cfqd->active_queue);
+
 	cfq_log_cfqq(cfqd, cfqq, "preempt");
+	cfq_slice_expired(cfqd, 1);
 
 	/*
 	 * workload type is changed, don't save slice, otherwise preempt
 	 * doesn't happen
 	 */
-	if (cfqq_type(cfqd->active_queue) != cfqq_type(cfqq))
+	if (old_type != cfqq_type(cfqq))
 		cfqq->cfqg->saved_workload_slice = 0;
 
-	cfq_slice_expired(cfqd, 1);
-
 	/*
 	 * Put the new queue at the front of the of the current list,
 	 * so we know that it will be selected next.