diff options
-rw-r--r-- | block/blk-core.c | 256 | ||||
-rw-r--r-- | drivers/md/raid10.c | 6 | ||||
-rw-r--r-- | fs/bio.c | 39 | ||||
-rw-r--r-- | include/linux/blkdev.h | 5 | ||||
-rw-r--r-- | include/linux/sched.h | 3 |
5 files changed, 57 insertions, 252 deletions
diff --git a/block/blk-core.c b/block/blk-core.c index d3b0782ec343..ca5d40d2d3b6 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1423,68 +1423,6 @@ bool bio_attempt_front_merge(struct request_queue *q, struct request *req, return true; } -/** - * blk_attempt_plug_merge - try to merge with %current's plugged list - * @q: request_queue new bio is being queued at - * @bio: new bio being queued - * @request_count: out parameter for number of traversed plugged requests - * - * Determine whether @bio being queued on @q can be merged with a request - * on %current's plugged list. Returns %true if merge was successful, - * otherwise %false. - * - * Plugging coalesces IOs from the same issuer for the same purpose without - * going through @q->queue_lock. As such it's more of an issuing mechanism - * than scheduling, and the request, while may have elvpriv data, is not - * added on the elevator at this point. In addition, we don't have - * reliable access to the elevator outside queue lock. Only check basic - * merging parameters without querying the elevator. - */ -bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, - unsigned int *request_count) -{ - struct blk_plug *plug; - struct request *rq; - bool ret = false; - struct list_head *plug_list; - - if (blk_queue_nomerges(q)) - goto out; - - plug = current->plug; - if (!plug) - goto out; - *request_count = 0; - - if (q->mq_ops) - plug_list = &plug->mq_list; - else - plug_list = &plug->list; - - list_for_each_entry_reverse(rq, plug_list, queuelist) { - int el_ret; - - if (rq->q == q) - (*request_count)++; - - if (rq->q != q || !blk_rq_merge_ok(rq, bio)) - continue; - - el_ret = blk_try_merge(rq, bio); - if (el_ret == ELEVATOR_BACK_MERGE) { - ret = bio_attempt_back_merge(q, rq, bio); - if (ret) - break; - } else if (el_ret == ELEVATOR_FRONT_MERGE) { - ret = bio_attempt_front_merge(q, rq, bio); - if (ret) - break; - } - } -out: - return ret; -} - void init_request_from_bio(struct request *req, struct bio *bio) { req->cmd_type = REQ_TYPE_FS; @@ -1502,10 +1440,10 @@ void init_request_from_bio(struct request *req, struct bio *bio) void blk_queue_bio(struct request_queue *q, struct bio *bio) { const bool sync = !!(bio->bi_rw & REQ_SYNC); - struct blk_plug *plug; int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT; struct request *req; - unsigned int request_count = 0; + + spin_lock_irq(q->queue_lock); blk_queue_split(q, &bio, q->bio_split); @@ -1522,20 +1460,10 @@ void blk_queue_bio(struct request_queue *q, struct bio *bio) } if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) { - spin_lock_irq(q->queue_lock); where = ELEVATOR_INSERT_FLUSH; goto get_rq; } - /* - * Check if we can merge with the plugged list before grabbing - * any locks. - */ - if (blk_attempt_plug_merge(q, bio, &request_count)) - return; - - spin_lock_irq(q->queue_lock); - el_ret = elv_merge(q, &req, bio); if (el_ret == ELEVATOR_BACK_MERGE) { if (bio_attempt_back_merge(q, req, bio)) { @@ -1584,29 +1512,11 @@ get_rq: if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags)) req->cpu = raw_smp_processor_id(); - plug = current->plug; - if (plug) { - /* - * If this is the first request added after a plug, fire - * of a plug trace. - */ - if (!request_count) - trace_block_plug(q); - else { - if (request_count >= BLK_MAX_REQUEST_COUNT) { - blk_flush_plug_list(plug, false); - trace_block_plug(q); - } - } - list_add_tail(&req->queuelist, &plug->list); - blk_account_io_start(req, true); - } else { - spin_lock_irq(q->queue_lock); - add_acct_request(q, req, where); - __blk_run_queue(q); + spin_lock_irq(q->queue_lock); + add_acct_request(q, req, where); + __blk_run_queue(q); out_unlock: - spin_unlock_irq(q->queue_lock); - } + spin_unlock_irq(q->queue_lock); } EXPORT_SYMBOL_GPL(blk_queue_bio); /* for device mapper only */ @@ -1718,6 +1628,8 @@ generic_make_request_checks(struct bio *bio) might_sleep(); + BUG_ON(bio->bi_next); + if (bio_check_eod(bio, nr_sectors)) goto end_io; @@ -1816,11 +1728,19 @@ end_io: */ void generic_make_request(struct bio *bio) { - struct bio_list bio_list_on_stack; + struct task_struct *tsk = current; + struct request_queue *q; + struct bio **p; + struct bio_list splits; + struct blk_plug plug; + + bio_list_init(&splits); if (!generic_make_request_checks(bio)) return; + q = bdev_get_queue(bio->bi_bdev); + /* * We only want one ->make_request_fn to be active at a time, else * stack usage with stacked devices could be a problem. So use @@ -1831,36 +1751,27 @@ void generic_make_request(struct bio *bio) * it is non-NULL, then a make_request is active, and new requests * should be added at the tail */ - if (current->bio_list) { - bio_list_add(current->bio_list, bio); - return; - } + blk_start_plug(&plug); - /* following loop may be a bit non-obvious, and so deserves some - * explanation. - * Before entering the loop, bio->bi_next is NULL (as all callers - * ensure that) so we have a list with a single bio. - * We pretend that we have just taken it off a longer list, so - * we assign bio_list to a pointer to the bio_list_on_stack, - * thus initialising the bio_list of new bios to be - * added. ->make_request() may indeed add some more bios - * through a recursive call to generic_make_request. If it - * did, we find a non-NULL value in bio_list and re-enter the loop - * from the top. In this case we really did just take the bio - * of the top of the list (no pretending) and so remove it from - * bio_list, and call into ->make_request() again. - */ - BUG_ON(bio->bi_next); - bio_list_init(&bio_list_on_stack); - current->bio_list = &bio_list_on_stack; - do { - struct request_queue *q = bdev_get_queue(bio->bi_bdev); + bio = splits.head; + p = &tsk->plug->list.head; - q->make_request_fn(q, bio); + while (*p && + ((bio->bi_bdev > (*p)->bi_bdev) || + (bio->bi_bdev == (*p)->bi_bdev && + bio->bi_iter.bi_sector > (*p)->bi_iter.bi_sector))) + p = &(*p)->bi_next; - bio = bio_list_pop(current->bio_list); - } while (bio); - current->bio_list = NULL; /* deactivate */ + splits.tail->bi_next = *p; + if (!splits.tail->bi_next) + tsk->plug->list.tail = splits.tail; + + *p = splits.head; + + if (tsk->plug != &plug) + return; + + blk_finish_plug(&plug); } EXPORT_SYMBOL(generic_make_request); @@ -2938,8 +2849,8 @@ void blk_start_plug(struct blk_plug *plug) struct task_struct *tsk = current; plug->magic = PLUG_MAGIC; - INIT_LIST_HEAD(&plug->list); INIT_LIST_HEAD(&plug->mq_list); + bio_list_init(&plug->list); INIT_LIST_HEAD(&plug->cb_list); /* @@ -2956,34 +2867,6 @@ void blk_start_plug(struct blk_plug *plug) } EXPORT_SYMBOL(blk_start_plug); -static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b) -{ - struct request *rqa = container_of(a, struct request, queuelist); - struct request *rqb = container_of(b, struct request, queuelist); - - return !(rqa->q < rqb->q || - (rqa->q == rqb->q && blk_rq_pos(rqa) < blk_rq_pos(rqb))); -} - -/* - * If 'from_schedule' is true, then postpone the dispatch of requests - * until a safe kblockd context. We due this to avoid accidental big - * additional stack usage in driver dispatch, in places where the originally - * plugger did not intend it. - */ -static void queue_unplugged(struct request_queue *q, unsigned int depth, - bool from_schedule) - __releases(q->queue_lock) -{ - trace_block_unplug(q, depth, !from_schedule); - - if (from_schedule) - blk_run_queue_async(q); - else - __blk_run_queue(q); - spin_unlock(q->queue_lock); -} - static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule) { LIST_HEAD(callbacks); @@ -3029,10 +2912,7 @@ EXPORT_SYMBOL(blk_check_plugged); void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) { struct request_queue *q; - unsigned long flags; - struct request *rq; - LIST_HEAD(list); - unsigned int depth; + struct bio *bio, *end; BUG_ON(plug->magic != PLUG_MAGIC); @@ -3041,62 +2921,28 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) if (!list_empty(&plug->mq_list)) blk_mq_flush_plug_list(plug, from_schedule); - if (list_empty(&plug->list)) - return; - list_splice_init(&plug->list, &list); + while (plug->list.head) { + end = bio = plug->list.head; + while (end->bi_next && + end->bi_next->bi_bdev == bio->bi_bdev) + end = end->bi_next; - list_sort(NULL, &list, plug_rq_cmp); + plug->list.head = end->bi_next; + end->bi_next = NULL; - q = NULL; - depth = 0; + q = bdev_get_queue(bio->bi_bdev); - /* - * Save and disable interrupts here, to avoid doing it for every - * queue lock we have to take. - */ - local_irq_save(flags); - while (!list_empty(&list)) { - rq = list_entry_rq(list.next); - list_del_init(&rq->queuelist); - BUG_ON(!rq->q); - if (rq->q != q) { - /* - * This drops the queue lock - */ - if (q) - queue_unplugged(q, depth, from_schedule); - q = rq->q; - depth = 0; - spin_lock(q->queue_lock); - } + while (bio) { + struct bio *p = bio; - /* - * Short-circuit if @q is dead - */ - if (unlikely(blk_queue_dying(q))) { - __blk_end_request_all(rq, -ENODEV); - continue; + bio = bio->bi_next; + p->bi_next = NULL; + q->make_request_fn(q, p); } - - /* - * rq is already accounted, so use raw insert - */ - if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA)) - __elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH); - else - __elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE); - - depth++; } - /* - * This drops the queue lock - */ - if (q) - queue_unplugged(q, depth, from_schedule); - - local_irq_restore(flags); + bio_list_init(&plug->list); } void blk_finish_plug(struct blk_plug *plug) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index f7096a557115..ac8f14283eba 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -954,9 +954,7 @@ static void wait_barrier(struct r10conf *conf) */ wait_event_lock_irq(conf->wait_barrier, !conf->barrier || - (conf->nr_pending && - current->bio_list && - !bio_list_empty(current->bio_list)), + conf->nr_pending, conf->resync_lock); conf->nr_waiting--; } @@ -1032,7 +1030,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule) struct r10conf *conf = mddev->private; struct bio *bio; - if (from_schedule || current->bio_list) { + if (from_schedule) { spin_lock_irq(&conf->device_lock); bio_list_merge(&conf->pending_bio_list, &plug->pending); conf->pending_count += plug->pending_cnt; @@ -339,6 +339,7 @@ static void bio_alloc_rescue(struct work_struct *work) } } +#if 0 static void punt_bios_to_rescuer(struct bio_set *bs) { struct bio_list punt, nopunt; @@ -369,6 +370,7 @@ static void punt_bios_to_rescuer(struct bio_set *bs) queue_work(bs->rescue_workqueue, &bs->rescue_work); } +#endif /** * bio_alloc_bioset - allocate a bio for I/O @@ -407,7 +409,6 @@ static void punt_bios_to_rescuer(struct bio_set *bs) */ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) { - gfp_t saved_gfp = gfp_mask; unsigned front_pad; unsigned inline_vecs; unsigned long idx = BIO_POOL_NONE; @@ -425,37 +426,7 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) front_pad = 0; inline_vecs = nr_iovecs; } else { - /* - * generic_make_request() converts recursion to iteration; this - * means if we're running beneath it, any bios we allocate and - * submit will not be submitted (and thus freed) until after we - * return. - * - * This exposes us to a potential deadlock if we allocate - * multiple bios from the same bio_set() while running - * underneath generic_make_request(). If we were to allocate - * multiple bios (say a stacking block driver that was splitting - * bios), we would deadlock if we exhausted the mempool's - * reserve. - * - * We solve this, and guarantee forward progress, with a rescuer - * workqueue per bio_set. If we go to allocate and there are - * bios on current->bio_list, we first try the allocation - * without __GFP_WAIT; if that fails, we punt those bios we - * would be blocking to the rescuer workqueue before we retry - * with the original gfp_flags. - */ - - if (current->bio_list && !bio_list_empty(current->bio_list)) - gfp_mask &= ~__GFP_WAIT; - p = mempool_alloc(bs->bio_pool, gfp_mask); - if (!p && gfp_mask != saved_gfp) { - punt_bios_to_rescuer(bs); - gfp_mask = saved_gfp; - p = mempool_alloc(bs->bio_pool, gfp_mask); - } - front_pad = bs->front_pad; inline_vecs = BIO_INLINE_VECS; } @@ -468,12 +439,6 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) if (nr_iovecs > inline_vecs) { bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool); - if (!bvl && gfp_mask != saved_gfp) { - punt_bios_to_rescuer(bs); - gfp_mask = saved_gfp; - bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool); - } - if (unlikely(!bvl)) goto err_free; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ff90dcf761be..ba911b583b1a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1036,11 +1036,10 @@ static inline void blk_post_runtime_resume(struct request_queue *q, int err) {} */ struct blk_plug { unsigned long magic; /* detect uninitialized use-cases */ - struct list_head list; /* requests */ + struct bio_list list; struct list_head mq_list; /* blk-mq requests */ struct list_head cb_list; /* md requires an unplug callback */ }; -#define BLK_MAX_REQUEST_COUNT 16 struct blk_plug_cb; typedef void (*blk_plug_cb_fn)(struct blk_plug_cb *, bool); @@ -1076,7 +1075,7 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk) struct blk_plug *plug = tsk->plug; return plug && - (!list_empty(&plug->list) || + (!bio_list_empty(&plug->list) || !list_empty(&plug->mq_list) || !list_empty(&plug->cb_list)); } diff --git a/include/linux/sched.h b/include/linux/sched.h index a781dec1cd0b..ddbbc4935e7a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1412,9 +1412,6 @@ struct task_struct { /* journalling filesystem info */ void *journal_info; -/* stacked block device info */ - struct bio_list *bio_list; - #ifdef CONFIG_BLOCK /* stack plugging */ struct blk_plug *plug; |