5 files changed, 57 insertions, 252 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index d3b0782ec343..ca5d40d2d3b6 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1423,68 +1423,6 @@ bool bio_attempt_front_merge(struct request_queue *q, struct request *req,
 	return true;
 }
 
-/**
- * blk_attempt_plug_merge - try to merge with %current's plugged list
- * @q: request_queue new bio is being queued at
- * @bio: new bio being queued
- * @request_count: out parameter for number of traversed plugged requests
- *
- * Determine whether @bio being queued on @q can be merged with a request
- * on %current's plugged list.  Returns %true if merge was successful,
- * otherwise %false.
- *
- * Plugging coalesces IOs from the same issuer for the same purpose without
- * going through @q->queue_lock.  As such it's more of an issuing mechanism
- * than scheduling, and the request, while may have elvpriv data, is not
- * added on the elevator at this point.  In addition, we don't have
- * reliable access to the elevator outside queue lock.  Only check basic
- * merging parameters without querying the elevator.
- */
-bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
-			    unsigned int *request_count)
-{
-	struct blk_plug *plug;
-	struct request *rq;
-	bool ret = false;
-	struct list_head *plug_list;
-
-	if (blk_queue_nomerges(q))
-		goto out;
-
-	plug = current->plug;
-	if (!plug)
-		goto out;
-	*request_count = 0;
-
-	if (q->mq_ops)
-		plug_list = &plug->mq_list;
-	else
-		plug_list = &plug->list;
-
-	list_for_each_entry_reverse(rq, plug_list, queuelist) {
-		int el_ret;
-
-		if (rq->q == q)
-			(*request_count)++;
-
-		if (rq->q != q || !blk_rq_merge_ok(rq, bio))
-			continue;
-
-		el_ret = blk_try_merge(rq, bio);
-		if (el_ret == ELEVATOR_BACK_MERGE) {
-			ret = bio_attempt_back_merge(q, rq, bio);
-			if (ret)
-				break;
-		} else if (el_ret == ELEVATOR_FRONT_MERGE) {
-			ret = bio_attempt_front_merge(q, rq, bio);
-			if (ret)
-				break;
-		}
-	}
-out:
-	return ret;
-}
-
 void init_request_from_bio(struct request *req, struct bio *bio)
 {
 	req->cmd_type = REQ_TYPE_FS;
@@ -1502,10 +1440,10 @@ void init_request_from_bio(struct request *req, struct bio *bio)
 void blk_queue_bio(struct request_queue *q, struct bio *bio)
 {
 	const bool sync = !!(bio->bi_rw & REQ_SYNC);
-	struct blk_plug *plug;
 	int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT;
 	struct request *req;
-	unsigned int request_count = 0;
+
+	spin_lock_irq(q->queue_lock);
 
 	blk_queue_split(q, &bio, q->bio_split);
 
@@ -1522,20 +1460,10 @@ void blk_queue_bio(struct request_queue *q, struct bio *bio)
 	}
 
 	if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) {
-		spin_lock_irq(q->queue_lock);
 		where = ELEVATOR_INSERT_FLUSH;
 		goto get_rq;
 	}
 
-	/*
-	 * Check if we can merge with the plugged list before grabbing
-	 * any locks.
-	 */
-	if (blk_attempt_plug_merge(q, bio, &request_count))
-		return;
-
-	spin_lock_irq(q->queue_lock);
-
 	el_ret = elv_merge(q, &req, bio);
 	if (el_ret == ELEVATOR_BACK_MERGE) {
 		if (bio_attempt_back_merge(q, req, bio)) {
@@ -1584,29 +1512,11 @@ get_rq:
 	if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags))
 		req->cpu = raw_smp_processor_id();
 
-	plug = current->plug;
-	if (plug) {
-		/*
-		 * If this is the first request added after a plug, fire
-		 * of a plug trace.
-		 */
-		if (!request_count)
-			trace_block_plug(q);
-		else {
-			if (request_count >= BLK_MAX_REQUEST_COUNT) {
-				blk_flush_plug_list(plug, false);
-				trace_block_plug(q);
-			}
-		}
-		list_add_tail(&req->queuelist, &plug->list);
-		blk_account_io_start(req, true);
-	} else {
-		spin_lock_irq(q->queue_lock);
-		add_acct_request(q, req, where);
-		__blk_run_queue(q);
+	spin_lock_irq(q->queue_lock);
+	add_acct_request(q, req, where);
+	__blk_run_queue(q);
 out_unlock:
-		spin_unlock_irq(q->queue_lock);
-	}
+	spin_unlock_irq(q->queue_lock);
 }
 EXPORT_SYMBOL_GPL(blk_queue_bio);	/* for device mapper only */
 
@@ -1718,6 +1628,8 @@ generic_make_request_checks(struct bio *bio)
 
 	might_sleep();
 
+	BUG_ON(bio->bi_next);
+
 	if (bio_check_eod(bio, nr_sectors))
 		goto end_io;
 
@@ -1816,11 +1728,19 @@ end_io:
  */
 void generic_make_request(struct bio *bio)
 {
-	struct bio_list bio_list_on_stack;
+	struct task_struct *tsk = current;
+	struct request_queue *q;
+	struct bio **p;
+	struct bio_list splits;
+	struct blk_plug plug;
+
+	bio_list_init(&splits);
 
 	if (!generic_make_request_checks(bio))
 		return;
 
+	q = bdev_get_queue(bio->bi_bdev);
+
 	/*
 	 * We only want one ->make_request_fn to be active at a time, else
 	 * stack usage with stacked devices could be a problem.  So use
@@ -1831,36 +1751,27 @@ void generic_make_request(struct bio *bio)
 	 * it is non-NULL, then a make_request is active, and new requests
 	 * should be added at the tail
 	 */
-	if (current->bio_list) {
-		bio_list_add(current->bio_list, bio);
-		return;
-	}
+	blk_start_plug(&plug);
 
-	/* following loop may be a bit non-obvious, and so deserves some
-	 * explanation.
-	 * Before entering the loop, bio->bi_next is NULL (as all callers
-	 * ensure that) so we have a list with a single bio.
-	 * We pretend that we have just taken it off a longer list, so
-	 * we assign bio_list to a pointer to the bio_list_on_stack,
-	 * thus initialising the bio_list of new bios to be
-	 * added.  ->make_request() may indeed add some more bios
-	 * through a recursive call to generic_make_request.  If it
-	 * did, we find a non-NULL value in bio_list and re-enter the loop
-	 * from the top.  In this case we really did just take the bio
-	 * of the top of the list (no pretending) and so remove it from
-	 * bio_list, and call into ->make_request() again.
-	 */
-	BUG_ON(bio->bi_next);
-	bio_list_init(&bio_list_on_stack);
-	current->bio_list = &bio_list_on_stack;
-	do {
-		struct request_queue *q = bdev_get_queue(bio->bi_bdev);
+	bio = splits.head;
+	p = &tsk->plug->list.head;
 
-		q->make_request_fn(q, bio);
+	while (*p &&
+	       ((bio->bi_bdev > (*p)->bi_bdev) ||
+		(bio->bi_bdev == (*p)->bi_bdev &&
+		 bio->bi_iter.bi_sector > (*p)->bi_iter.bi_sector)))
+		p = &(*p)->bi_next;
 
-		bio = bio_list_pop(current->bio_list);
-	} while (bio);
-	current->bio_list = NULL; /* deactivate */
+	splits.tail->bi_next = *p;
+	if (!splits.tail->bi_next)
+		tsk->plug->list.tail = splits.tail;
+
+	*p = splits.head;
+
+	if (tsk->plug != &plug)
+		return;
+
+	blk_finish_plug(&plug);
 }
 EXPORT_SYMBOL(generic_make_request);
 
@@ -2938,8 +2849,8 @@ void blk_start_plug(struct blk_plug *plug)
 	struct task_struct *tsk = current;
 
 	plug->magic = PLUG_MAGIC;
-	INIT_LIST_HEAD(&plug->list);
 	INIT_LIST_HEAD(&plug->mq_list);
+	bio_list_init(&plug->list);
 	INIT_LIST_HEAD(&plug->cb_list);
 
 	/*
@@ -2956,34 +2867,6 @@ void blk_start_plug(struct blk_plug *plug)
 }
 EXPORT_SYMBOL(blk_start_plug);
 
-static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b)
-{
-	struct request *rqa = container_of(a, struct request, queuelist);
-	struct request *rqb = container_of(b, struct request, queuelist);
-
-	return !(rqa->q < rqb->q ||
-		(rqa->q == rqb->q && blk_rq_pos(rqa) < blk_rq_pos(rqb)));
-}
-
-/*
- * If 'from_schedule' is true, then postpone the dispatch of requests
- * until a safe kblockd context. We due this to avoid accidental big
- * additional stack usage in driver dispatch, in places where the originally
- * plugger did not intend it.
- */
-static void queue_unplugged(struct request_queue *q, unsigned int depth,
-			    bool from_schedule)
-	__releases(q->queue_lock)
-{
-	trace_block_unplug(q, depth, !from_schedule);
-
-	if (from_schedule)
-		blk_run_queue_async(q);
-	else
-		__blk_run_queue(q);
-	spin_unlock(q->queue_lock);
-}
-
 static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule)
 {
 	LIST_HEAD(callbacks);
@@ -3029,10 +2912,7 @@ EXPORT_SYMBOL(blk_check_plugged);
 void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 {
 	struct request_queue *q;
-	unsigned long flags;
-	struct request *rq;
-	LIST_HEAD(list);
-	unsigned int depth;
+	struct bio *bio, *end;
 
 	BUG_ON(plug->magic != PLUG_MAGIC);
 
@@ -3041,62 +2921,28 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 	if (!list_empty(&plug->mq_list))
 		blk_mq_flush_plug_list(plug, from_schedule);
 
-	if (list_empty(&plug->list))
-		return;
 
-	list_splice_init(&plug->list, &list);
+	while (plug->list.head) {
+		end = bio = plug->list.head;
+		while (end->bi_next &&
+		       end->bi_next->bi_bdev == bio->bi_bdev)
+			end = end->bi_next;
 
-	list_sort(NULL, &list, plug_rq_cmp);
+		plug->list.head = end->bi_next;
+		end->bi_next = NULL;
 
-	q = NULL;
-	depth = 0;
+		q = bdev_get_queue(bio->bi_bdev);
 
-	/*
-	 * Save and disable interrupts here, to avoid doing it for every
-	 * queue lock we have to take.
-	 */
-	local_irq_save(flags);
-	while (!list_empty(&list)) {
-		rq = list_entry_rq(list.next);
-		list_del_init(&rq->queuelist);
-		BUG_ON(!rq->q);
-		if (rq->q != q) {
-			/*
-			 * This drops the queue lock
-			 */
-			if (q)
-				queue_unplugged(q, depth, from_schedule);
-			q = rq->q;
-			depth = 0;
-			spin_lock(q->queue_lock);
-		}
+		while (bio) {
+			struct bio *p = bio;
 
-		/*
-		 * Short-circuit if @q is dead
-		 */
-		if (unlikely(blk_queue_dying(q))) {
-			__blk_end_request_all(rq, -ENODEV);
-			continue;
+			bio = bio->bi_next;
+			p->bi_next = NULL;
+			q->make_request_fn(q, p);
 		}
-
-		/*
-		 * rq is already accounted, so use raw insert
-		 */
-		if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA))
-			__elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH);
-		else
-			__elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE);
-
-		depth++;
 	}
 
-	/*
-	 * This drops the queue lock
-	 */
-	if (q)
-		queue_unplugged(q, depth, from_schedule);
-
-	local_irq_restore(flags);
+	bio_list_init(&plug->list);
 }
 
 void blk_finish_plug(struct blk_plug *plug)
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index f7096a557115..ac8f14283eba 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -954,9 +954,7 @@ static void wait_barrier(struct r10conf *conf)
 		 */
 		wait_event_lock_irq(conf->wait_barrier,
 				    !conf->barrier ||
-				    (conf->nr_pending &&
-				     current->bio_list &&
-				     !bio_list_empty(current->bio_list)),
+				    conf->nr_pending,
 				    conf->resync_lock);
 		conf->nr_waiting--;
 	}
@@ -1032,7 +1030,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
 	struct r10conf *conf = mddev->private;
 	struct bio *bio;
 
-	if (from_schedule || current->bio_list) {
+	if (from_schedule) {
 		spin_lock_irq(&conf->device_lock);
 		bio_list_merge(&conf->pending_bio_list, &plug->pending);
 		conf->pending_count += plug->pending_cnt;
diff --git a/fs/bio.c b/fs/bio.c
index 6ca489f8f151..82fdcd74d36d 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -339,6 +339,7 @@ static void bio_alloc_rescue(struct work_struct *work)
 	}
 }
 
+#if 0
 static void punt_bios_to_rescuer(struct bio_set *bs)
 {
 	struct bio_list punt, nopunt;
@@ -369,6 +370,7 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
 
 	queue_work(bs->rescue_workqueue, &bs->rescue_work);
 }
+#endif
 
 /**
  * bio_alloc_bioset - allocate a bio for I/O
@@ -407,7 +409,6 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
  */
 struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
 {
-	gfp_t saved_gfp = gfp_mask;
 	unsigned front_pad;
 	unsigned inline_vecs;
 	unsigned long idx = BIO_POOL_NONE;
@@ -425,37 +426,7 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
 		front_pad = 0;
 		inline_vecs = nr_iovecs;
 	} else {
-		/*
-		 * generic_make_request() converts recursion to iteration; this
-		 * means if we're running beneath it, any bios we allocate and
-		 * submit will not be submitted (and thus freed) until after we
-		 * return.
-		 *
-		 * This exposes us to a potential deadlock if we allocate
-		 * multiple bios from the same bio_set() while running
-		 * underneath generic_make_request(). If we were to allocate
-		 * multiple bios (say a stacking block driver that was splitting
-		 * bios), we would deadlock if we exhausted the mempool's
-		 * reserve.
-		 *
-		 * We solve this, and guarantee forward progress, with a rescuer
-		 * workqueue per bio_set. If we go to allocate and there are
-		 * bios on current->bio_list, we first try the allocation
-		 * without __GFP_WAIT; if that fails, we punt those bios we
-		 * would be blocking to the rescuer workqueue before we retry
-		 * with the original gfp_flags.
-		 */
-
-		if (current->bio_list && !bio_list_empty(current->bio_list))
-			gfp_mask &= ~__GFP_WAIT;
-
 		p = mempool_alloc(bs->bio_pool, gfp_mask);
-		if (!p && gfp_mask != saved_gfp) {
-			punt_bios_to_rescuer(bs);
-			gfp_mask = saved_gfp;
-			p = mempool_alloc(bs->bio_pool, gfp_mask);
-		}
-
 		front_pad = bs->front_pad;
 		inline_vecs = BIO_INLINE_VECS;
 	}
@@ -468,12 +439,6 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
 
 	if (nr_iovecs > inline_vecs) {
 		bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool);
-		if (!bvl && gfp_mask != saved_gfp) {
-			punt_bios_to_rescuer(bs);
-			gfp_mask = saved_gfp;
-			bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool);
-		}
-
 		if (unlikely(!bvl))
 			goto err_free;
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ff90dcf761be..ba911b583b1a 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1036,11 +1036,10 @@ static inline void blk_post_runtime_resume(struct request_queue *q, int err) {}
  */
 struct blk_plug {
 	unsigned long magic; /* detect uninitialized use-cases */
-	struct list_head list; /* requests */
+	struct bio_list list;
 	struct list_head mq_list; /* blk-mq requests */
 	struct list_head cb_list; /* md requires an unplug callback */
 };
-#define BLK_MAX_REQUEST_COUNT 16
 
 struct blk_plug_cb;
 typedef void (*blk_plug_cb_fn)(struct blk_plug_cb *, bool);
@@ -1076,7 +1075,7 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk)
 	struct blk_plug *plug = tsk->plug;
 
 	return plug &&
-		(!list_empty(&plug->list) ||
+		(!bio_list_empty(&plug->list) ||
 		 !list_empty(&plug->mq_list) ||
 		 !list_empty(&plug->cb_list));
 }
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a781dec1cd0b..ddbbc4935e7a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1412,9 +1412,6 @@ struct task_struct {
 /* journalling filesystem info */
 	void *journal_info;
 
-/* stacked block device info */
-	struct bio_list *bio_list;
-
 #ifdef CONFIG_BLOCK
 /* stack plugging */
 	struct blk_plug *plug;