diff --git a/arch/m68k/emu/nfblock.c b/arch/m68k/emu/nfblock.c index 92d26c81244134b07f5c9d6c0ffa27542892979a..b5e91653289dca4288921cd50e31d9f0ee548251 100644 --- a/arch/m68k/emu/nfblock.c +++ b/arch/m68k/emu/nfblock.c @@ -59,7 +59,7 @@ struct nfhd_device { struct gendisk *disk; }; -static blk_qc_t nfhd_submit_bio(struct bio *bio) +static void nfhd_submit_bio(struct bio *bio) { struct nfhd_device *dev = bio->bi_disk->private_data; struct bio_vec bvec; @@ -77,7 +77,6 @@ static blk_qc_t nfhd_submit_bio(struct bio *bio) sec += len; } bio_endio(bio); - return BLK_QC_T_NONE; } static int nfhd_getgeo(struct block_device *bdev, struct hd_geometry *geo) diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c index 2b3c829f655b3e4a1abc5b6abd39431bf760147a..03ac97379c5cbf32be5e0929c99320692d2eecce 100644 --- a/arch/xtensa/platforms/iss/simdisk.c +++ b/arch/xtensa/platforms/iss/simdisk.c @@ -101,7 +101,7 @@ static void simdisk_transfer(struct simdisk *dev, unsigned long sector, spin_unlock(&dev->lock); } -static blk_qc_t simdisk_submit_bio(struct bio *bio) +static void simdisk_submit_bio(struct bio *bio) { struct simdisk *dev = bio->bi_disk->private_data; struct bio_vec bvec; @@ -119,7 +119,6 @@ static blk_qc_t simdisk_submit_bio(struct bio *bio) } bio_endio(bio); - return BLK_QC_T_NONE; } static int simdisk_open(struct block_device *bdev, fmode_t mode) diff --git a/block/bio.c b/block/bio.c index 6299be459d3e918d0753813527aeed4b719678a4..22a409c9c3c2067d99d4f807be6485614bf90c1a 100644 --- a/block/bio.c +++ b/block/bio.c @@ -105,7 +105,7 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size) snprintf(bslab->name, sizeof(bslab->name), "bio-%d", entry); slab = kmem_cache_create(bslab->name, sz, ARCH_KMALLOC_MINALIGN, - SLAB_HWCACHE_ALIGN, NULL); + SLAB_HWCACHE_ALIGN | SLAB_TYPESAFE_BY_RCU, NULL); if (!slab) goto out_unlock; @@ -281,6 +281,7 @@ void bio_init(struct bio *bio, struct bio_vec *table, memset(bio, 0, sizeof(*bio)); atomic_set(&bio->__bi_remaining, 1); atomic_set(&bio->__bi_cnt, 1); + bio->bi_cookie = BLK_QC_T_NONE; bio->bi_io_vec = table; bio->bi_max_vecs = max_vecs; diff --git a/block/blk-core.c b/block/blk-core.c index 30b80b4851990814503b7cf1e53b13c076ec17ec..f6b08c4386049aa8c0a97fec0663e15cbc1a5eb2 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -854,7 +854,7 @@ static noinline_for_stack bool submit_bio_checks(struct bio *bio) } if (!test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) - bio->bi_opf &= ~REQ_HIPRI; + bio->bi_opf &= ~REQ_POLLED; switch (bio_op(bio)) { case REQ_OP_DISCARD: @@ -921,18 +921,22 @@ static noinline_for_stack bool submit_bio_checks(struct bio *bio) return false; } -static blk_qc_t __submit_bio(struct bio *bio) +static void __submit_bio(struct bio *bio) { struct gendisk *disk = bio->bi_disk; - blk_qc_t ret = BLK_QC_T_NONE; - if (blk_crypto_bio_prep(&bio)) { - if (!disk->fops->submit_bio) - return blk_mq_submit_bio(bio); - ret = disk->fops->submit_bio(bio); + if (unlikely(bio_queue_enter(bio) != 0)) + return; + + if (!submit_bio_checks(bio) || !blk_crypto_bio_prep(&bio)) + goto queue_exit; + if (!disk->fops->submit_bio) { + blk_mq_submit_bio(bio); + return; } + disk->fops->submit_bio(bio); +queue_exit: blk_queue_exit(disk->queue); - return ret; } /* @@ -954,10 +958,9 @@ static blk_qc_t __submit_bio(struct bio *bio) * bio_list_on_stack[1] contains bios that were submitted before the current * ->submit_bio_bio, but that haven't been processed yet. */ -static blk_qc_t __submit_bio_noacct(struct bio *bio) +static void __submit_bio_noacct(struct bio *bio) { struct bio_list bio_list_on_stack[2]; - blk_qc_t ret = BLK_QC_T_NONE; BUG_ON(bio->bi_next); @@ -968,16 +971,13 @@ static blk_qc_t __submit_bio_noacct(struct bio *bio) struct request_queue *q = bio->bi_disk->queue; struct bio_list lower, same; - if (unlikely(bio_queue_enter(bio) != 0)) - continue; - /* * Create a fresh bio_list for all subordinate requests. */ bio_list_on_stack[1] = bio_list_on_stack[0]; bio_list_init(&bio_list_on_stack[0]); - ret = __submit_bio(bio); + __submit_bio(bio); /* * Sort new bios into those for a lower level and those for the @@ -1000,36 +1000,22 @@ static blk_qc_t __submit_bio_noacct(struct bio *bio) } while ((bio = bio_list_pop(&bio_list_on_stack[0]))); current->bio_list = NULL; - return ret; } -static blk_qc_t __submit_bio_noacct_mq(struct bio *bio) +static void __submit_bio_noacct_mq(struct bio *bio) { struct bio_list bio_list[2] = { }; - blk_qc_t ret = BLK_QC_T_NONE; current->bio_list = bio_list; do { - struct gendisk *disk = bio->bi_disk; - - if (unlikely(bio_queue_enter(bio) != 0)) - continue; - - if (!blk_crypto_bio_prep(&bio)) { - blk_queue_exit(disk->queue); - ret = BLK_QC_T_NONE; - continue; - } - - ret = blk_mq_submit_bio(bio); + __submit_bio(bio); } while ((bio = bio_list_pop(&bio_list[0]))); current->bio_list = NULL; - return ret; } -blk_qc_t submit_bio_noacct_nocheck(struct bio *bio) +void submit_bio_noacct_nocheck(struct bio *bio) { blk_cgroup_bio_start(bio); blkcg_bio_issue_init(bio); @@ -1049,14 +1035,12 @@ blk_qc_t submit_bio_noacct_nocheck(struct bio *bio) * to collect a list of requests submited by a ->submit_bio method while * it is active, and then process them after it returned. */ - if (current->bio_list) { + if (current->bio_list) bio_list_add(¤t->bio_list[0], bio); - return BLK_QC_T_NONE; - } - - if (!bio->bi_disk->fops->submit_bio) - return __submit_bio_noacct_mq(bio); - return __submit_bio_noacct(bio); + else if (!bio->bi_disk->fops->submit_bio) + __submit_bio_noacct_mq(bio); + else + __submit_bio_noacct(bio); } /** @@ -1068,11 +1052,9 @@ blk_qc_t submit_bio_noacct_nocheck(struct bio *bio) * systems and other upper level users of the block layer should use * submit_bio() instead. */ -blk_qc_t submit_bio_noacct(struct bio *bio) +void submit_bio_noacct(struct bio *bio) { - if (!submit_bio_checks(bio)) - return BLK_QC_T_NONE; - return submit_bio_noacct_nocheck(bio); + submit_bio_noacct_nocheck(bio); } EXPORT_SYMBOL(submit_bio_noacct); @@ -1089,10 +1071,10 @@ EXPORT_SYMBOL(submit_bio_noacct); * in @bio. The bio must NOT be touched by thecaller until ->bi_end_io() has * been called. */ -blk_qc_t submit_bio(struct bio *bio) +void submit_bio(struct bio *bio) { if (blkcg_punt_bio_submit(bio)) - return BLK_QC_T_NONE; + return; /* * If it's a regular read/write or a barrier with data attached, @@ -1132,19 +1114,92 @@ blk_qc_t submit_bio(struct bio *bio) if (unlikely(bio_op(bio) == REQ_OP_READ && bio_flagged(bio, BIO_WORKINGSET))) { unsigned long pflags; - blk_qc_t ret; psi_memstall_enter(&pflags); - ret = submit_bio_noacct(bio); + submit_bio_noacct(bio); psi_memstall_leave(&pflags); - - return ret; + return; } - return submit_bio_noacct(bio); + submit_bio_noacct(bio); } EXPORT_SYMBOL(submit_bio); +/** + * bio_poll - poll for BIO completions + * @bio: bio to poll for + * @flags: BLK_POLL_* flags that control the behavior + * + * Poll for completions on queue associated with the bio. Returns number of + * completed entries found. + * + * Note: the caller must either be the context that submitted @bio, or + * be in a RCU critical section to prevent freeing of @bio. + */ +int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags) +{ + struct request_queue *q = bio->bi_disk->queue; + blk_qc_t cookie = READ_ONCE(bio->bi_cookie); + int ret; + + if (cookie == BLK_QC_T_NONE || + !test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) + return 0; + + if (current->plug) + blk_flush_plug_list(current->plug, false); + + if (bio_queue_enter(bio)) + return 0; + if (WARN_ON_ONCE(!queue_is_mq(q))) + ret = 0; /* not yet implemented, should not happen */ + else + ret = blk_mq_poll(q, cookie, iob, flags); + blk_queue_exit(q); + return ret; +} +EXPORT_SYMBOL_GPL(bio_poll); + +/* + * Helper to implement file_operations.iopoll. Requires the bio to be stored + * in iocb->private, and cleared before freeing the bio. + */ +int iocb_bio_iopoll(struct kiocb *kiocb, struct io_comp_batch *iob, + unsigned int flags) +{ + struct bio *bio; + int ret = 0; + + /* + * Note: the bio cache only uses SLAB_TYPESAFE_BY_RCU, so bio can + * point to a freshly allocated bio at this point. If that happens + * we have a few cases to consider: + * + * 1) the bio is beeing initialized and bi_bdev is NULL. We can just + * simply nothing in this case + * 2) the bio points to a not poll enabled device. bio_poll will catch + * this and return 0 + * 3) the bio points to a poll capable device, including but not + * limited to the one that the original bio pointed to. In this + * case we will call into the actual poll method and poll for I/O, + * even if we don't need to, but it won't cause harm either. + * + * For cases 2) and 3) above the RCU grace period ensures that bi_bdev + * is still allocated. Because partitions hold a reference to the whole + * device bdev and thus disk, the disk is also still valid. Grabbing + * a reference to the queue in bio_poll() ensures the hctxs and requests + * are still valid as well. + */ + rcu_read_lock(); + bio = READ_ONCE(kiocb->private); + if (bio && bio->bi_disk) + ret = bio_poll(bio, iob, flags); + rcu_read_unlock(); + + return ret; +} +EXPORT_SYMBOL_GPL(iocb_bio_iopoll); + /** * blk_cloned_rq_check_limits - Helper function to check a cloned request * for the new queue limits @@ -1682,6 +1737,31 @@ int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, } EXPORT_SYMBOL(kblockd_mod_delayed_work_on); +void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios) +{ + struct task_struct *tsk = current; + + /* + * If this is a nested plug, don't actually assign it. + */ + if (tsk->plug) + return; + + INIT_LIST_HEAD(&plug->mq_list); + plug->cached_rq = NULL; + plug->nr_ios = min_t(unsigned short, nr_ios, BLK_MAX_REQUEST_COUNT); + plug->rq_count = 0; + plug->multiple_queues = false; + plug->nowait = false; + INIT_LIST_HEAD(&plug->cb_list); + + /* + * Store ordering should not be needed here, since a potential + * preempt will imply a full memory barrier + */ + tsk->plug = plug; +} + /** * blk_start_plug - initialize blk_plug and track it inside the task_struct * @plug: The &struct blk_plug that needs to be initialized @@ -1707,25 +1787,7 @@ EXPORT_SYMBOL(kblockd_mod_delayed_work_on); */ void blk_start_plug(struct blk_plug *plug) { - struct task_struct *tsk = current; - - /* - * If this is a nested plug, don't actually assign it. - */ - if (tsk->plug) - return; - - INIT_LIST_HEAD(&plug->mq_list); - INIT_LIST_HEAD(&plug->cb_list); - plug->rq_count = 0; - plug->multiple_queues = false; - plug->nowait = false; - - /* - * Store ordering should not be needed here, since a potential - * preempt will imply a full memory barrier - */ - tsk->plug = plug; + blk_start_plug_nr_ios(plug, 1); } EXPORT_SYMBOL(blk_start_plug); @@ -1777,6 +1839,8 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) if (!list_empty(&plug->mq_list)) blk_mq_flush_plug_list(plug, from_schedule); + if (unlikely(!from_schedule && plug->cached_rq)) + blk_mq_free_plug_rqs(plug); } /** diff --git a/block/blk-merge.c b/block/blk-merge.c index 92878bef03aaf058cdb46923861d5eeb28bc1fe0..5acc5252d1fe738ad03aea25748645b6051ed88a 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -286,7 +286,7 @@ static struct bio *blk_bio_segment_split(struct request_queue *q, * iopoll in direct IO routine. Given performance gain of iopoll for * big IO can be trival, disable iopoll when split needed. */ - bio->bi_opf &= ~REQ_HIPRI; + bio->bi_opf &= ~REQ_POLLED; return bio_split(bio, sectors, GFP_NOIO, bs); } diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index fb5fe971ca6e06434ff121ca240e418f6daee5ff..17d7f24c7d78e8471b28953f0603dc640fade173 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -326,7 +326,7 @@ static const char *const cmd_flag_name[] = { CMD_FLAG_NAME(BACKGROUND), CMD_FLAG_NAME(NOWAIT), CMD_FLAG_NAME(NOUNMAP), - CMD_FLAG_NAME(HIPRI), + CMD_FLAG_NAME(POLLED), }; #undef CMD_FLAG_NAME diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 16ad9e65661086f982ce7efa437825aa9b731c39..fba3f10249e751f34c6204afc1af87db06bd5029 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -192,6 +192,12 @@ void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx, } } +void blk_mq_put_tags(struct blk_mq_tags *tags, int *tag_array, int nr_tags) +{ + sbitmap_queue_clear_batch(tags->bitmap_tags, tags->nr_reserved_tags, + tag_array, nr_tags); +} + struct bt_iter_data { struct blk_mq_hw_ctx *hctx; busy_iter_fn *fn; diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h index edf918c863afe79b31a39b6ddd4f303af0e34c85..15729ad84ed0933cd84cd132d6f19e990a0b6d39 100644 --- a/block/blk-mq-tag.h +++ b/block/blk-mq-tag.h @@ -47,6 +47,7 @@ extern void blk_mq_exit_shared_sbitmap(struct blk_mq_tag_set *set); extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data); extern void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx, unsigned int tag); +void blk_mq_put_tags(struct blk_mq_tags *tags, int *tag_array, int nr_tags); extern int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx, struct blk_mq_tags **tags, unsigned int depth, bool can_grow); diff --git a/block/blk-mq.c b/block/blk-mq.c index 215aa0e5481b80dee1f86392da0717bc0b0eb1b3..ebf4b1cd2490ab6b827090a3159e437c2df95c5a 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -63,6 +63,32 @@ static int blk_mq_poll_stats_bkt(const struct request *rq) return bucket; } +#define BLK_QC_T_SHIFT 16 +#define BLK_QC_T_INTERNAL (1U << 31) + +static inline struct blk_mq_hw_ctx *blk_qc_to_hctx(struct request_queue *q, + blk_qc_t qc) +{ + return q->queue_hw_ctx[(qc & ~BLK_QC_T_INTERNAL) >> BLK_QC_T_SHIFT]; +} + +static inline struct request *blk_qc_to_rq(struct blk_mq_hw_ctx *hctx, + blk_qc_t qc) +{ + unsigned int tag = qc & ((1U << BLK_QC_T_SHIFT) - 1); + + if (qc & BLK_QC_T_INTERNAL) + return blk_mq_tag_to_rq(hctx->sched_tags, tag); + return blk_mq_tag_to_rq(hctx->tags, tag); +} + +static inline blk_qc_t blk_rq_to_qc(struct request *rq) +{ + return (rq->mq_hctx->queue_num << BLK_QC_T_SHIFT) | + (rq->tag != -1 ? + rq->tag : (rq->internal_tag | BLK_QC_T_INTERNAL)); +} + /* * Check if any of the ctx, dispatch list or elevator * have pending work in this hardware queue. @@ -294,15 +320,6 @@ void blk_mq_wake_waiters(struct request_queue *q) blk_mq_tag_wakeup_all(hctx->tags, true); } -/* - * Only need start/end time stamping if we have iostat or - * blk stats enabled, or using an IO scheduler. - */ -static inline bool blk_mq_need_time_stamp(struct request *rq) -{ - return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS)) || rq->q->elevator; -} - static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, unsigned int tag, u64 alloc_time_ns) { @@ -379,6 +396,7 @@ static struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data) struct request_queue *q = data->q; struct elevator_queue *e = q->elevator; u64 alloc_time_ns = 0; + struct request *rq; unsigned int tag; /* alloc_time includes depth and tag waits */ @@ -411,10 +429,20 @@ static struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data) * case just retry the hctx assignment and tag allocation as CPU hotplug * should have migrated us to an online CPU by now. */ - tag = blk_mq_get_tag(data); - if (tag == BLK_MQ_NO_TAG) { + do { + tag = blk_mq_get_tag(data); + if (tag != BLK_MQ_NO_TAG) { + rq = blk_mq_rq_ctx_init(data, tag, alloc_time_ns); + if (!--data->nr_tags) + return rq; + if (e || data->hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) + return rq; + rq_list_add(data->cached_rq, rq); + data->flags |= BLK_MQ_REQ_NOWAIT; + continue; + } if (data->flags & BLK_MQ_REQ_NOWAIT) - return NULL; + break; /* * Give up the CPU and sleep for a random short time to ensure @@ -423,8 +451,9 @@ static struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data) */ msleep(3); goto retry; - } - return blk_mq_rq_ctx_init(data, tag, alloc_time_ns); + } while (1); + + return rq_list_pop(data->cached_rq); } struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op, @@ -434,6 +463,7 @@ struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op, .q = q, .flags = flags, .cmd_flags = op, + .nr_tags = 1, }; struct request *rq; int ret; @@ -462,6 +492,7 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, .q = q, .flags = flags, .cmd_flags = op, + .nr_tags = 1, }; u64 alloc_time_ns = 0; unsigned int cpu; @@ -566,21 +597,31 @@ void blk_mq_free_request(struct request *rq) } EXPORT_SYMBOL_GPL(blk_mq_free_request); -inline void __blk_mq_end_request(struct request *rq, blk_status_t error) +void blk_mq_free_plug_rqs(struct blk_plug *plug) { - u64 now = 0; + struct request *rq; - if (blk_mq_need_time_stamp(rq)) - now = ktime_get_ns(); + while ((rq = rq_list_pop(&plug->cached_rq)) != NULL) { + percpu_ref_get(&rq->q->q_usage_counter); + blk_mq_free_request(rq); + } +} +static inline void __blk_mq_end_request_acct(struct request *rq, u64 now) +{ if (rq->rq_flags & RQF_STATS) { blk_mq_poll_stats_start(rq->q); blk_stat_add(rq, now); } blk_mq_sched_completed_request(rq, now); - blk_account_io_done(rq, now); +} + +inline void __blk_mq_end_request(struct request *rq, blk_status_t error) +{ + if (blk_mq_need_time_stamp(rq)) + __blk_mq_end_request_acct(rq, ktime_get_ns()); if (rq->end_io) { rq_qos_done(rq->q, rq); @@ -599,6 +640,57 @@ void blk_mq_end_request(struct request *rq, blk_status_t error) } EXPORT_SYMBOL(blk_mq_end_request); +#define TAG_COMP_BATCH 32 + +static inline void blk_mq_flush_tag_batch(struct blk_mq_hw_ctx *hctx, + int *tag_array, int nr_tags) +{ + struct request_queue *q = hctx->queue; + + blk_mq_put_tags(hctx->tags, tag_array, nr_tags); + percpu_ref_put_many(&q->q_usage_counter, nr_tags); +} + +void blk_mq_end_request_batch(struct io_comp_batch *iob) +{ + int tags[TAG_COMP_BATCH], nr_tags = 0; + struct blk_mq_hw_ctx *last_hctx = NULL; + struct request *rq; + u64 now = 0; + + if (iob->need_ts) + now = ktime_get_ns(); + + while ((rq = rq_list_pop(&iob->req_list)) != NULL) { + prefetch(rq->bio); + prefetch(rq->rq_next); + + blk_update_request(rq, BLK_STS_OK, blk_rq_bytes(rq)); + if (iob->need_ts) + __blk_mq_end_request_acct(rq, now); + + WRITE_ONCE(rq->state, MQ_RQ_IDLE); + if (!refcount_dec_and_test(&rq->ref)) + continue; + + blk_crypto_free_request(rq); + blk_pm_mark_last_busy(rq); + rq_qos_done(rq->q, rq); + + if (nr_tags == TAG_COMP_BATCH || + (last_hctx && last_hctx != rq->mq_hctx)) { + blk_mq_flush_tag_batch(last_hctx, tags, nr_tags); + nr_tags = 0; + } + tags[nr_tags++] = rq->tag; + last_hctx = rq->mq_hctx; + } + + if (nr_tags) + blk_mq_flush_tag_batch(last_hctx, tags, nr_tags); +} +EXPORT_SYMBOL_GPL(blk_mq_end_request_batch); + /* * Softirq action handler - move entries to local list and loop over them * while passing them to the queue registered handler. @@ -701,7 +793,7 @@ bool blk_mq_complete_request_remote(struct request *rq) * For a polled request, always complete locallly, it's pointless * to redirect the completion. */ - if (rq->cmd_flags & REQ_HIPRI) + if (rq->cmd_flags & REQ_POLLED) return false; if (blk_mq_complete_need_ipi(rq)) { @@ -781,6 +873,8 @@ void blk_mq_start_request(struct request *rq) if (blk_integrity_rq(rq) && req_op(rq) == REQ_OP_WRITE) q->integrity.profile->prepare_fn(rq); #endif + if (rq->bio && rq->bio->bi_opf & REQ_POLLED) + WRITE_ONCE(rq->bio->bi_cookie, blk_rq_to_qc(rq)); } EXPORT_SYMBOL(blk_mq_start_request); @@ -1975,19 +2069,15 @@ static void blk_mq_bio_to_request(struct request *rq, struct bio *bio, } static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx, - struct request *rq, - blk_qc_t *cookie, bool last) + struct request *rq, bool last) { struct request_queue *q = rq->q; struct blk_mq_queue_data bd = { .rq = rq, .last = last, }; - blk_qc_t new_cookie; blk_status_t ret; - new_cookie = request_to_qc_t(hctx, rq); - /* * For OK queue, we are done. For error, caller may kill it. * Any other error (busy), just add it to our list as we @@ -1997,7 +2087,6 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx, switch (ret) { case BLK_STS_OK: blk_mq_update_dispatch_busy(hctx, false); - *cookie = new_cookie; break; case BLK_STS_RESOURCE: case BLK_STS_DEV_RESOURCE: @@ -2006,7 +2095,6 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx, break; default: blk_mq_update_dispatch_busy(hctx, false); - *cookie = BLK_QC_T_NONE; break; } @@ -2015,7 +2103,6 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx, static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, struct request *rq, - blk_qc_t *cookie, bool bypass_insert, bool last) { struct request_queue *q = rq->q; @@ -2045,7 +2132,7 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, goto insert; } - return __blk_mq_issue_directly(hctx, rq, cookie, last); + return __blk_mq_issue_directly(hctx, rq, last); insert: if (bypass_insert) return BLK_STS_RESOURCE; @@ -2059,7 +2146,6 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, * blk_mq_try_issue_directly - Try to send a request directly to device driver. * @hctx: Pointer of the associated hardware queue. * @rq: Pointer to request to be sent. - * @cookie: Request queue cookie. * * If the device has enough resources to accept a new request now, send the * request directly to device driver. Else, insert at hctx->dispatch queue, so @@ -2067,7 +2153,7 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, * queue have higher priority. */ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, - struct request *rq, blk_qc_t *cookie) + struct request *rq) { blk_status_t ret; int srcu_idx; @@ -2076,7 +2162,7 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, hctx_lock(hctx, &srcu_idx); - ret = __blk_mq_try_issue_directly(hctx, rq, cookie, false, true); + ret = __blk_mq_try_issue_directly(hctx, rq, false, true); if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) blk_mq_request_bypass_insert(rq, false, true); else if (ret != BLK_STS_OK) @@ -2089,11 +2175,10 @@ blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last) { blk_status_t ret; int srcu_idx; - blk_qc_t unused_cookie; struct blk_mq_hw_ctx *hctx = rq->mq_hctx; hctx_lock(hctx, &srcu_idx); - ret = __blk_mq_try_issue_directly(hctx, rq, &unused_cookie, true, last); + ret = __blk_mq_try_issue_directly(hctx, rq, true, last); hctx_unlock(hctx, srcu_idx); return ret; @@ -2173,24 +2258,21 @@ static inline unsigned short blk_plug_max_rq_count(struct blk_plug *plug) * * It will not queue the request if there is an error with the bio, or at the * request creation. - * - * Returns: Request queue cookie. */ -blk_qc_t blk_mq_submit_bio(struct bio *bio) +void blk_mq_submit_bio(struct bio *bio) { struct request_queue *q = bio->bi_disk->queue; const int is_sync = op_is_sync(bio->bi_opf); const int is_flush_fua = op_is_flush(bio->bi_opf); struct blk_mq_alloc_data data = { .q = q, + .nr_tags = 1, }; struct request *rq; struct blk_plug *plug; struct request *same_queue_rq = NULL; unsigned int nr_segs; - blk_qc_t cookie; blk_status_t ret; - bool hipri; blk_queue_bounce(q, &bio); __blk_queue_split(&bio, &nr_segs); @@ -2207,23 +2289,31 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio) rq_qos_throttle(q, bio); - hipri = bio->bi_opf & REQ_HIPRI; - - data.cmd_flags = bio->bi_opf; - rq = __blk_mq_alloc_request(&data); - if (unlikely(!rq)) { - rq_qos_cleanup(q, bio); - if (bio->bi_opf & REQ_NOWAIT) - bio_wouldblock_error(bio); - goto queue_exit; + plug = blk_mq_plug(q, bio); + if (plug && plug->cached_rq) { + rq = rq_list_pop(&plug->cached_rq); + INIT_LIST_HEAD(&rq->queuelist); + data.hctx = rq->mq_hctx; + } else { + data.cmd_flags = bio->bi_opf; + if (plug) { + data.nr_tags = plug->nr_ios; + plug->nr_ios = 1; + data.cached_rq = &plug->cached_rq; + } + rq = __blk_mq_alloc_request(&data); + if (unlikely(!rq)) { + rq_qos_cleanup(q, bio); + if (bio->bi_opf & REQ_NOWAIT) + bio_wouldblock_error(bio); + goto queue_exit; + } } trace_block_getrq(q, bio, bio->bi_opf); rq_qos_track(q, rq, bio); - cookie = request_to_qc_t(data.hctx, rq); - blk_mq_bio_to_request(rq, bio, nr_segs); ret = blk_crypto_init_request(rq); @@ -2231,10 +2321,9 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio) bio->bi_status = ret; bio_endio(bio); blk_mq_free_request(rq); - return BLK_QC_T_NONE; + return; } - plug = blk_mq_plug(q, bio); if (unlikely(is_flush_fua)) { /* Bypass scheduler for flush requests */ blk_insert_flush(rq); @@ -2287,8 +2376,7 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio) if (same_queue_rq) { data.hctx = same_queue_rq->mq_hctx; trace_block_unplug(q, 1, true); - blk_mq_try_issue_directly(data.hctx, same_queue_rq, - &cookie); + blk_mq_try_issue_directly(data.hctx, same_queue_rq); } } else if ((q->nr_hw_queues > 1 && is_sync) || !data.hctx->dispatch_busy) { @@ -2296,18 +2384,15 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio) * There is no scheduler and we can try to send directly * to the hardware. */ - blk_mq_try_issue_directly(data.hctx, rq, &cookie); + blk_mq_try_issue_directly(data.hctx, rq); } else { /* Default case. */ blk_mq_sched_insert_request(rq, false, true, true); } - if (!hipri) - return BLK_QC_T_NONE; - return cookie; + return; queue_exit: blk_queue_exit(q); - return BLK_QC_T_NONE; } static size_t order_to_size(unsigned int order) @@ -3863,15 +3948,20 @@ static unsigned long blk_mq_poll_nsecs(struct request_queue *q, return ret; } -static bool blk_mq_poll_hybrid_sleep(struct request_queue *q, - struct request *rq) +static bool blk_mq_poll_hybrid(struct request_queue *q, blk_qc_t qc) { + struct blk_mq_hw_ctx *hctx = blk_qc_to_hctx(q, qc); + struct request *rq = blk_qc_to_rq(hctx, qc); struct hrtimer_sleeper hs; enum hrtimer_mode mode; unsigned int nsecs; ktime_t kt; - if (rq->rq_flags & RQF_MQ_POLL_SLEPT) + /* + * If a request has completed on queue that uses an I/O scheduler, we + * won't get back a request from blk_qc_to_rq. + */ + if (!rq || (rq->rq_flags & RQF_MQ_POLL_SLEPT)) return false; /* @@ -3913,79 +4003,30 @@ static bool blk_mq_poll_hybrid_sleep(struct request_queue *q, __set_current_state(TASK_RUNNING); destroy_hrtimer_on_stack(&hs.timer); - return true; -} - -static bool blk_mq_poll_hybrid(struct request_queue *q, - struct blk_mq_hw_ctx *hctx, blk_qc_t cookie) -{ - struct request *rq; - - if (q->poll_nsec == BLK_MQ_POLL_CLASSIC) - return false; - - if (!blk_qc_t_is_internal(cookie)) - rq = blk_mq_tag_to_rq(hctx->tags, blk_qc_t_to_tag(cookie)); - else { - rq = blk_mq_tag_to_rq(hctx->sched_tags, blk_qc_t_to_tag(cookie)); - /* - * With scheduling, if the request has completed, we'll - * get a NULL return here, as we clear the sched tag when - * that happens. The request still remains valid, like always, - * so we should be safe with just the NULL check. - */ - if (!rq) - return false; - } - return blk_mq_poll_hybrid_sleep(q, rq); + /* + * If we sleep, have the caller restart the poll loop to reset the + * state. Like for the other success return cases, the caller is + * responsible for checking if the IO completed. If the IO isn't + * complete, we'll get called again and will go straight to the busy + * poll loop. + */ + return true; } -/** - * blk_poll - poll for IO completions - * @q: the queue - * @cookie: cookie passed back at IO submission time - * @spin: whether to spin for completions - * - * Description: - * Poll for completions on the passed in queue. Returns number of - * completed entries found. If @spin is true, then blk_poll will continue - * looping until at least one completion is found, unless the task is - * otherwise marked running (or we need to reschedule). - */ -int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin) +static int blk_mq_poll_classic(struct request_queue *q, blk_qc_t cookie, + struct io_comp_batch *iob, unsigned int flags) { - struct blk_mq_hw_ctx *hctx; - long state; - - if (!blk_qc_t_valid(cookie) || - !test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) - return 0; - - if (current->plug) - blk_flush_plug_list(current->plug, false); - - hctx = q->queue_hw_ctx[blk_qc_t_to_queue_num(cookie)]; - - /* - * If we sleep, have the caller restart the poll loop to reset - * the state. Like for the other success return cases, the - * caller is responsible for checking if the IO completed. If - * the IO isn't complete, we'll get called again and will go - * straight to the busy poll loop. - */ - if (blk_mq_poll_hybrid(q, hctx, cookie)) - return 1; + struct blk_mq_hw_ctx *hctx = blk_qc_to_hctx(q, cookie); + long state = current->state; + int ret; hctx->poll_considered++; - state = current->state; do { - int ret; - hctx->poll_invoked++; - ret = q->mq_ops->poll(hctx); + ret = q->mq_ops->poll(hctx, iob); if (ret > 0) { hctx->poll_success++; __set_current_state(TASK_RUNNING); @@ -3994,10 +4035,10 @@ int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin) if (signal_pending_state(state, current)) __set_current_state(TASK_RUNNING); - if (current->state == TASK_RUNNING) return 1; - if (ret < 0 || !spin) + + if (ret < 0 || (flags & BLK_POLL_ONESHOT)) break; cpu_relax(); } while (!need_resched()); @@ -4005,7 +4046,17 @@ int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin) __set_current_state(TASK_RUNNING); return 0; } -EXPORT_SYMBOL_GPL(blk_poll); + +int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, struct io_comp_batch *iob, + unsigned int flags) +{ + if (!(flags & BLK_POLL_NOSLEEP) && + q->poll_nsec != BLK_MQ_POLL_CLASSIC) { + if (blk_mq_poll_hybrid(q, cookie)) + return 1; + } + return blk_mq_poll_classic(q, cookie, iob, flags); +} unsigned int blk_mq_rq_cpu(struct request *rq) { diff --git a/block/blk-mq.h b/block/blk-mq.h index 4ec334dfd7bdd5bcf7f91755cf9638a713cf87db..356a155e0e4aabc69d437f1d2e5509438b35f47d 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -37,6 +37,9 @@ struct blk_mq_ctx { struct kobject kobj; } ____cacheline_aligned_in_smp; +void blk_mq_submit_bio(struct bio *bio); +int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, struct io_comp_batch *iob, + unsigned int flags); void blk_mq_exit_queue(struct request_queue *q); int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr); void blk_mq_wake_waiters(struct request_queue *q); @@ -109,9 +112,9 @@ static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, enum hctx_type type = HCTX_TYPE_DEFAULT; /* - * The caller ensure that if REQ_HIPRI, poll must be enabled. + * The caller ensure that if REQ_POLLED, poll must be enabled. */ - if (flags & REQ_HIPRI) + if (flags & REQ_POLLED) type = HCTX_TYPE_POLL; else if ((flags & REQ_OP_MASK) == REQ_OP_READ) type = HCTX_TYPE_READ; @@ -128,6 +131,7 @@ extern int __blk_mq_register_dev(struct device *dev, struct request_queue *q); extern int blk_mq_sysfs_register(struct request_queue *q); extern void blk_mq_sysfs_unregister(struct request_queue *q); extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx); +void blk_mq_free_plug_rqs(struct blk_plug *plug); void blk_mq_release(struct request_queue *q); @@ -155,6 +159,10 @@ struct blk_mq_alloc_data { unsigned int shallow_depth; unsigned int cmd_flags; + /* allocate multiple requests/tags in one go */ + unsigned int nr_tags; + struct request **cached_rq; + /* input & output parameter */ struct blk_mq_ctx *ctx; struct blk_mq_hw_ctx *hctx; diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 61de1627a233e15befe2fd2d319191c7f2692c08..55a630ee945239b2c270d265956d93f5df555cec 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -417,23 +417,12 @@ static ssize_t queue_poll_show(struct request_queue *q, char *page) static ssize_t queue_poll_store(struct request_queue *q, const char *page, size_t count) { - unsigned long poll_on; - ssize_t ret; - - if (!q->tag_set || q->tag_set->nr_maps <= HCTX_TYPE_POLL || - !q->tag_set->map[HCTX_TYPE_POLL].nr_queues) + if (!test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) return -EINVAL; - ret = queue_var_store(&poll_on, page, count); - if (ret < 0) - return ret; - - if (poll_on) - blk_queue_flag_set(QUEUE_FLAG_POLL, q); - else - blk_queue_flag_clear(QUEUE_FLAG_POLL, q); - - return ret; + pr_info_ratelimited("writes to the poll attribute are ignored.\n"); + pr_info_ratelimited("please use driver specific parameters instead.\n"); + return count; } static ssize_t queue_io_timeout_show(struct request_queue *q, char *page) diff --git a/block/blk.h b/block/blk.h index 641e4315cfdce4294f73d3afcb0f9d62ea9534fb..1c1ca880398bf5eeeaa9707ab2df62e13732e466 100644 --- a/block/blk.h +++ b/block/blk.h @@ -51,7 +51,7 @@ struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size, void blk_free_flush_queue(struct blk_flush_queue *q); void blk_freeze_queue(struct request_queue *q); -blk_qc_t submit_bio_noacct_nocheck(struct bio *bio); +void submit_bio_noacct_nocheck(struct bio *bio); static inline bool biovec_phys_mergeable(struct request_queue *q, struct bio_vec *vec1, struct bio_vec *vec2) diff --git a/drivers/block/brd.c b/drivers/block/brd.c index cc49a921339f778aaf948b52f92dd797677203d8..b086559a0ff92ceac764a79f59715ecec74fb8c6 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -282,7 +282,7 @@ static int brd_do_bvec(struct brd_device *brd, struct page *page, return err; } -static blk_qc_t brd_submit_bio(struct bio *bio) +static void brd_submit_bio(struct bio *bio) { struct brd_device *brd = bio->bi_disk->private_data; struct bio_vec bvec; @@ -290,8 +290,10 @@ static blk_qc_t brd_submit_bio(struct bio *bio) struct bvec_iter iter; sector = bio->bi_iter.bi_sector; - if (bio_end_sector(bio) > get_capacity(bio->bi_disk)) - goto io_error; + if (bio_end_sector(bio) > get_capacity(bio->bi_disk)) { + bio_io_error(bio); + return; + } bio_for_each_segment(bvec, bio, iter) { unsigned int len = bvec.bv_len; @@ -303,16 +305,14 @@ static blk_qc_t brd_submit_bio(struct bio *bio) err = brd_do_bvec(brd, bvec.bv_page, len, bvec.bv_offset, bio_op(bio), sector); - if (err) - goto io_error; + if (err) { + bio_io_error(bio); + return; + } sector += len >> SECTOR_SHIFT; } bio_endio(bio); - return BLK_QC_T_NONE; -io_error: - bio_io_error(bio); - return BLK_QC_T_NONE; } static int brd_rw_page(struct block_device *bdev, sector_t sector, diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 60b9ca53c0a354ec6260b92463b616d11538a3ed..ea63951535a2f5edd1e42b238c9634173fff792d 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1450,7 +1450,7 @@ extern void conn_free_crypto(struct drbd_connection *connection); /* drbd_req */ extern void do_submit(struct work_struct *ws); extern void __drbd_make_request(struct drbd_device *, struct bio *, unsigned long); -extern blk_qc_t drbd_submit_bio(struct bio *bio); +void drbd_submit_bio(struct bio *bio); extern int drbd_read_remote(struct drbd_device *device, struct drbd_request *req); extern int is_valid_ar_handle(struct drbd_request *, sector_t); diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 69638146f949cfa339e6c428de6d8af1b0dd1a12..d78caf298c3f172d9fadabbfe1183990737e6568 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -1594,7 +1594,7 @@ void do_submit(struct work_struct *ws) } } -blk_qc_t drbd_submit_bio(struct bio *bio) +void drbd_submit_bio(struct bio *bio) { struct drbd_device *device = bio->bi_disk->private_data; unsigned long start_jif; @@ -1610,7 +1610,6 @@ blk_qc_t drbd_submit_bio(struct bio *bio) inc_ap_bio(device); __drbd_make_request(device, bio, start_jif); - return BLK_QC_T_NONE; } static bool net_timeout_reached(struct drbd_request *net_req, diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c index bb3686c3869def033448a904c1148ded1e4600bb..9a577b6bd39fbff1dec5f60336543562b3639a6a 100644 --- a/drivers/block/null_blk_main.c +++ b/drivers/block/null_blk_main.c @@ -1417,7 +1417,7 @@ static struct nullb_queue *nullb_to_queue(struct nullb *nullb) return &nullb->queues[index]; } -static blk_qc_t null_submit_bio(struct bio *bio) +static void null_submit_bio(struct bio *bio) { sector_t sector = bio->bi_iter.bi_sector; sector_t nr_sectors = bio_sectors(bio); @@ -1429,7 +1429,6 @@ static blk_qc_t null_submit_bio(struct bio *bio) cmd->bio = bio; null_handle_cmd(cmd, sector, nr_sectors, bio_op(bio)); - return BLK_QC_T_NONE; } static bool should_timeout_request(struct request *rq) diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 467dbd06b7cdb13a860a7b717dbd4320e92002f2..a98e2cc79572393c031a5d149497376b9d0b8031 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2367,7 +2367,7 @@ static void pkt_make_request_write(struct request_queue *q, struct bio *bio) } } -static blk_qc_t pkt_submit_bio(struct bio *bio) +static void pkt_submit_bio(struct bio *bio) { struct pktcdvd_device *pd; char b[BDEVNAME_SIZE]; @@ -2390,7 +2390,7 @@ static blk_qc_t pkt_submit_bio(struct bio *bio) */ if (bio_data_dir(bio) == READ) { pkt_make_request_read(pd, bio); - return BLK_QC_T_NONE; + return; } if (!test_bit(PACKET_WRITABLE, &pd->flags)) { @@ -2422,10 +2422,9 @@ static blk_qc_t pkt_submit_bio(struct bio *bio) pkt_make_request_write(bio->bi_disk->queue, split); } while (split != bio); - return BLK_QC_T_NONE; + return; end_io: bio_io_error(bio); - return BLK_QC_T_NONE; } static void pkt_init_queue(struct pktcdvd_device *pd) diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c index 1088798c8dd0c995db8a061c9b6b8a21236ea376..c8a9639437f5094b15b094d0ac36072e1d341296 100644 --- a/drivers/block/ps3vram.c +++ b/drivers/block/ps3vram.c @@ -579,7 +579,7 @@ static struct bio *ps3vram_do_bio(struct ps3_system_bus_device *dev, return next; } -static blk_qc_t ps3vram_submit_bio(struct bio *bio) +static void ps3vram_submit_bio(struct bio *bio) { struct ps3_system_bus_device *dev = bio->bi_disk->private_data; struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev); @@ -595,13 +595,11 @@ static blk_qc_t ps3vram_submit_bio(struct bio *bio) spin_unlock_irq(&priv->lock); if (busy) - return BLK_QC_T_NONE; + return; do { bio = ps3vram_do_bio(dev, bio); } while (bio); - - return BLK_QC_T_NONE; } static const struct block_device_operations ps3vram_fops = { diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c index edacefff6e355bde6f4943660372fb5a47ff2e5e..3016ba7e3afda2312545b10aead5df4c08e4f6e2 100644 --- a/drivers/block/rsxx/dev.c +++ b/drivers/block/rsxx/dev.c @@ -50,7 +50,7 @@ struct rsxx_bio_meta { static struct kmem_cache *bio_meta_pool; -static blk_qc_t rsxx_submit_bio(struct bio *bio); +static void rsxx_submit_bio(struct bio *bio); /*----------------- Block Device Operations -----------------*/ static int rsxx_blkdev_ioctl(struct block_device *bdev, @@ -120,7 +120,7 @@ static void bio_dma_done_cb(struct rsxx_cardinfo *card, } } -static blk_qc_t rsxx_submit_bio(struct bio *bio) +static void rsxx_submit_bio(struct bio *bio) { struct rsxx_cardinfo *card = bio->bi_disk->private_data; struct rsxx_bio_meta *bio_meta; @@ -169,7 +169,7 @@ static blk_qc_t rsxx_submit_bio(struct bio *bio) if (st) goto queue_err; - return BLK_QC_T_NONE; + return; queue_err: kmem_cache_free(bio_meta_pool, bio_meta); @@ -177,7 +177,6 @@ static blk_qc_t rsxx_submit_bio(struct bio *bio) if (st) bio->bi_status = st; bio_endio(bio); - return BLK_QC_T_NONE; } /*----------------- Device Setup -------------------*/ diff --git a/drivers/block/umem.c b/drivers/block/umem.c index 5eb44e4a91eebcec599e5ddb6f7d15c7137343b2..de0fd82ba82c52fb3c3d569fdf445af17c95d083 100644 --- a/drivers/block/umem.c +++ b/drivers/block/umem.c @@ -519,7 +519,7 @@ static int mm_check_plugged(struct cardinfo *card) return !!blk_check_plugged(mm_unplug, card, sizeof(struct blk_plug_cb)); } -static blk_qc_t mm_submit_bio(struct bio *bio) +static void mm_submit_bio(struct bio *bio) { struct cardinfo *card = bio->bi_disk->private_data; @@ -536,8 +536,6 @@ static blk_qc_t mm_submit_bio(struct bio *bio) if (op_is_sync(bio->bi_opf) || !mm_check_plugged(card)) activate(card); spin_unlock_irq(&card->lock); - - return BLK_QC_T_NONE; } static irqreturn_t mm_interrupt(int irq, void *__card) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 0636df6b67db65969760a17644dd75d1cf28916c..0af6f57b625577ad1f29009d4d4f70a70ed721d6 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1593,22 +1593,18 @@ static void __zram_make_request(struct zram *zram, struct bio *bio) /* * Handler function for all zram I/O requests. */ -static blk_qc_t zram_submit_bio(struct bio *bio) +static void zram_submit_bio(struct bio *bio) { struct zram *zram = bio->bi_disk->private_data; if (!valid_io_request(zram, bio->bi_iter.bi_sector, bio->bi_iter.bi_size)) { atomic64_inc(&zram->stats.invalid_io); - goto error; + bio_io_error(bio); + return; } __zram_make_request(zram, bio); - return BLK_QC_T_NONE; - -error: - bio_io_error(bio); - return BLK_QC_T_NONE; } static void zram_slot_free_notify(struct block_device *bdev, diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index b6246f73895cf8419117335d023fbfa2bd60ca8d..0a17c0cfa69a45b1b42bf836e2bd5c0869c35f80 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -47,7 +47,7 @@ static struct pblk_global_caches pblk_caches = { struct bio_set pblk_bio_set; -static blk_qc_t pblk_submit_bio(struct bio *bio) +static void pblk_submit_bio(struct bio *bio) { struct pblk *pblk = bio->bi_disk->queue->queuedata; @@ -55,7 +55,7 @@ static blk_qc_t pblk_submit_bio(struct bio *bio) pblk_discard(pblk, bio); if (!(bio->bi_opf & REQ_PREFLUSH)) { bio_endio(bio); - return BLK_QC_T_NONE; + return; } } @@ -75,8 +75,6 @@ static blk_qc_t pblk_submit_bio(struct bio *bio) pblk_write_to_cache(pblk, bio, PBLK_IOTYPE_USER); } - - return BLK_QC_T_NONE; } static const struct block_device_operations pblk_bops = { diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 97895262fc5420d08d2d210ebcd00c079597fb7b..657276b21c07096c36d1df38e76b8ec7bb9e2442 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -1170,7 +1170,7 @@ static void quit_max_writeback_rate(struct cache_set *c, /* Cached devices - read & write stuff */ -blk_qc_t cached_dev_submit_bio(struct bio *bio) +void cached_dev_submit_bio(struct bio *bio) { struct search *s; struct bcache_device *d = bio->bi_disk->private_data; @@ -1181,7 +1181,7 @@ blk_qc_t cached_dev_submit_bio(struct bio *bio) dc->io_disable)) { bio->bi_status = BLK_STS_IOERR; bio_endio(bio); - return BLK_QC_T_NONE; + return; } if (likely(d->c)) { @@ -1225,8 +1225,6 @@ blk_qc_t cached_dev_submit_bio(struct bio *bio) } else /* I/O request sent to backing device */ detached_dev_do_request(d, bio); - - return BLK_QC_T_NONE; } static int cached_dev_ioctl(struct bcache_device *d, fmode_t mode, @@ -1275,7 +1273,7 @@ static void flash_dev_nodata(struct closure *cl) continue_at(cl, search_free, NULL); } -blk_qc_t flash_dev_submit_bio(struct bio *bio) +void flash_dev_submit_bio(struct bio *bio) { struct search *s; struct closure *cl; @@ -1284,7 +1282,7 @@ blk_qc_t flash_dev_submit_bio(struct bio *bio) if (unlikely(d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags))) { bio->bi_status = BLK_STS_IOERR; bio_endio(bio); - return BLK_QC_T_NONE; + return; } s = search_alloc(bio, d); @@ -1300,7 +1298,7 @@ blk_qc_t flash_dev_submit_bio(struct bio *bio) continue_at_nobarrier(&s->cl, flash_dev_nodata, bcache_wq); - return BLK_QC_T_NONE; + return; } else if (bio_data_dir(bio)) { bch_keybuf_check_overlapping(&s->iop.c->moving_gc_keys, &KEY(d->id, bio->bi_iter.bi_sector, 0), @@ -1316,7 +1314,6 @@ blk_qc_t flash_dev_submit_bio(struct bio *bio) } continue_at(cl, search_free, NULL); - return BLK_QC_T_NONE; } static int flash_dev_ioctl(struct bcache_device *d, fmode_t mode, diff --git a/drivers/md/bcache/request.h b/drivers/md/bcache/request.h index 82b38366a95deb8f9fabf61379df5ca3bb51ff91..38ab4856eaab0dd9f73698db5dd30e012070f0d4 100644 --- a/drivers/md/bcache/request.h +++ b/drivers/md/bcache/request.h @@ -37,10 +37,10 @@ unsigned int bch_get_congested(const struct cache_set *c); void bch_data_insert(struct closure *cl); void bch_cached_dev_request_init(struct cached_dev *dc); -blk_qc_t cached_dev_submit_bio(struct bio *bio); +void cached_dev_submit_bio(struct bio *bio); void bch_flash_dev_request_init(struct bcache_device *d); -blk_qc_t flash_dev_submit_bio(struct bio *bio); +void flash_dev_submit_bio(struct bio *bio); extern struct kmem_cache *bch_search_cache; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 97211bf2950439fe84d940f97e9f090ea7b7e5e0..82c03c7e6447e49e5965d763d51d54afef82f1c7 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1294,14 +1294,13 @@ static noinline void __set_swap_bios_limit(struct mapped_device *md, int latch) mutex_unlock(&md->swap_bios_lock); } -static blk_qc_t __map_bio(struct dm_target_io *tio) +static void __map_bio(struct dm_target_io *tio) { int r; sector_t sector; struct bio *clone = &tio->clone; struct dm_io *io = tio->io; struct dm_target *ti = tio->ti; - blk_qc_t ret = BLK_QC_T_NONE; clone->bi_end_io = clone_endio; @@ -1329,7 +1328,7 @@ static blk_qc_t __map_bio(struct dm_target_io *tio) /* the bio has been remapped so dispatch it */ trace_block_bio_remap(clone->bi_disk->queue, clone, bio_dev(io->orig_bio), sector); - ret = submit_bio_noacct(clone); + submit_bio_noacct(clone); break; case DM_MAPIO_KILL: if (unlikely(swap_bios_limit(ti, clone))) { @@ -1351,8 +1350,6 @@ static blk_qc_t __map_bio(struct dm_target_io *tio) DMWARN("unimplemented target map return value: %d", r); BUG(); } - - return ret; } static void bio_setup_sector(struct bio *bio, sector_t sector, unsigned len) @@ -1439,7 +1436,7 @@ static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci, } } -static blk_qc_t __clone_and_map_simple_bio(struct clone_info *ci, +static void __clone_and_map_simple_bio(struct clone_info *ci, struct dm_target_io *tio, unsigned *len) { struct bio *clone = &tio->clone; @@ -1449,8 +1446,7 @@ static blk_qc_t __clone_and_map_simple_bio(struct clone_info *ci, __bio_clone_fast(clone, ci->bio); if (len) bio_setup_sector(clone, ci->sector, *len); - - return __map_bio(tio); + __map_bio(tio); } static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti, @@ -1464,7 +1460,7 @@ static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti, while ((bio = bio_list_pop(&blist))) { tio = container_of(bio, struct dm_target_io, clone); - (void) __clone_and_map_simple_bio(ci, tio, len); + __clone_and_map_simple_bio(ci, tio, len); } } @@ -1515,7 +1511,7 @@ static int __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti, free_tio(tio); return r; } - (void) __map_bio(tio); + __map_bio(tio); return 0; } @@ -1630,11 +1626,10 @@ static void init_clone_info(struct clone_info *ci, struct mapped_device *md, /* * Entry point to split a bio into clones and submit them to the targets. */ -static blk_qc_t __split_and_process_bio(struct mapped_device *md, +static void __split_and_process_bio(struct mapped_device *md, struct dm_table *map, struct bio *bio) { struct clone_info ci; - blk_qc_t ret = BLK_QC_T_NONE; int error = 0; init_clone_info(&ci, md, map, bio); @@ -1678,7 +1673,7 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md, bio_chain(b, bio); trace_block_split(md->queue, b, bio->bi_iter.bi_sector); - ret = submit_bio_noacct(bio); + submit_bio_noacct(bio); break; } } @@ -1686,13 +1681,11 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md, /* drop the extra reference count */ dec_pending(ci.io, errno_to_blk_status(error)); - return ret; } -static blk_qc_t dm_submit_bio(struct bio *bio) +static void dm_submit_bio(struct bio *bio) { struct mapped_device *md = bio->bi_disk->private_data; - blk_qc_t ret = BLK_QC_T_NONE; int srcu_idx; struct dm_table *map; @@ -1717,10 +1710,9 @@ static blk_qc_t dm_submit_bio(struct bio *bio) if (is_abnormal_io(bio)) blk_queue_split(&bio); - ret = __split_and_process_bio(md, map, bio); + __split_and_process_bio(md, map, bio); out: dm_put_live_table(md, srcu_idx); - return ret; } /*----------------------------------------------------------------- diff --git a/drivers/md/md.c b/drivers/md/md.c index f6a4d0893c470ff2b566884739e20271e0a22ea9..dc55d5c8be8f0915584d04105e04ed6106fed583 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -459,7 +459,7 @@ void md_handle_request(struct mddev *mddev, struct bio *bio) } EXPORT_SYMBOL(md_handle_request); -static blk_qc_t md_submit_bio(struct bio *bio) +static void md_submit_bio(struct bio *bio) { const int rw = bio_data_dir(bio); const int sgrp = op_stat_group(bio_op(bio)); @@ -468,12 +468,12 @@ static blk_qc_t md_submit_bio(struct bio *bio) if (mddev == NULL || mddev->pers == NULL) { bio_io_error(bio); - return BLK_QC_T_NONE; + return; } if (unlikely(test_bit(MD_BROKEN, &mddev->flags)) && (rw == WRITE)) { bio_io_error(bio); - return BLK_QC_T_NONE; + return; } blk_queue_split(&bio); @@ -482,7 +482,7 @@ static blk_qc_t md_submit_bio(struct bio *bio) if (bio_sectors(bio) != 0) bio->bi_status = BLK_STS_IOERR; bio_endio(bio); - return BLK_QC_T_NONE; + return; } /* @@ -499,8 +499,6 @@ static blk_qc_t md_submit_bio(struct bio *bio) part_stat_inc(&mddev->gendisk->part0, ios[sgrp]); part_stat_add(&mddev->gendisk->part0, sectors[sgrp], sectors); part_stat_unlock(); - - return BLK_QC_T_NONE; } /* mddev_suspend makes sure no new requests are submitted diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c index 22e5617b2cea14d94ce66819d04f1fe4c9cf0039..5255500df82c6dc384c5befe0a95f686cd47b799 100644 --- a/drivers/nvdimm/blk.c +++ b/drivers/nvdimm/blk.c @@ -162,7 +162,7 @@ static int nsblk_do_bvec(struct nd_namespace_blk *nsblk, return err; } -static blk_qc_t nd_blk_submit_bio(struct bio *bio) +static void nd_blk_submit_bio(struct bio *bio) { struct bio_integrity_payload *bip; struct nd_namespace_blk *nsblk = bio->bi_disk->private_data; @@ -173,7 +173,7 @@ static blk_qc_t nd_blk_submit_bio(struct bio *bio) bool do_acct; if (!bio_integrity_prep(bio)) - return BLK_QC_T_NONE; + return; bip = bio_integrity(bio); rw = bio_data_dir(bio); @@ -199,7 +199,6 @@ static blk_qc_t nd_blk_submit_bio(struct bio *bio) bio_end_io_acct(bio, start); bio_endio(bio); - return BLK_QC_T_NONE; } static int nsblk_rw_bytes(struct nd_namespace_common *ndns, diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index 12ff6f8784ac11cf382d376bc877e460159b68d0..696be5407e0ed2d2c3ca6ba1e7f257fcfe5ffeca 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -1439,7 +1439,7 @@ static int btt_do_bvec(struct btt *btt, struct bio_integrity_payload *bip, return ret; } -static blk_qc_t btt_submit_bio(struct bio *bio) +static void btt_submit_bio(struct bio *bio) { struct bio_integrity_payload *bip = bio_integrity(bio); struct btt *btt = bio->bi_disk->private_data; @@ -1450,7 +1450,7 @@ static blk_qc_t btt_submit_bio(struct bio *bio) bool do_acct; if (!bio_integrity_prep(bio)) - return BLK_QC_T_NONE; + return; do_acct = blk_queue_io_stat(bio->bi_disk->queue); if (do_acct) @@ -1482,7 +1482,6 @@ static blk_qc_t btt_submit_bio(struct bio *bio) bio_end_io_acct(bio, start); bio_endio(bio); - return BLK_QC_T_NONE; } static int btt_rw_page(struct block_device *bdev, sector_t sector, diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 5e7e2b74ed694b42ce341cd42819f85e03ac6619..80d3d71ad3ab5dc2f242db64c64ce0c8a271dd87 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -189,7 +189,7 @@ static blk_status_t pmem_do_write(struct pmem_device *pmem, return rc; } -static blk_qc_t pmem_submit_bio(struct bio *bio) +static void pmem_submit_bio(struct bio *bio) { int ret = 0; blk_status_t rc = 0; @@ -228,7 +228,6 @@ static blk_qc_t pmem_submit_bio(struct bio *bio) bio->bi_status = errno_to_blk_status(ret); bio_endio(bio); - return BLK_QC_T_NONE; } static int pmem_rw_page(struct block_device *bdev, sector_t sector, diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 2b3799dd6e49c99bdd97f4f6ef6c682a17a214e0..d85b2dd99acd4acacf657f83d7f48c6aabbb29dc 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -337,15 +337,19 @@ static inline enum nvme_disposition nvme_decide_disposition(struct request *req) return RETRY; } -static inline void nvme_end_req(struct request *req) +static inline void nvme_end_req_zoned(struct request *req) { - blk_status_t status = nvme_error_status(nvme_req(req)->status); - if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && req_op(req) == REQ_OP_ZONE_APPEND) req->__sector = nvme_lba_to_sect(req->q->queuedata, le64_to_cpu(nvme_req(req)->result.u64)); +} + +static inline void nvme_end_req(struct request *req) +{ + blk_status_t status = nvme_error_status(nvme_req(req)->status); + nvme_end_req_zoned(req); nvme_trace_bio_complete(req, status); blk_mq_end_request(req, status); } @@ -372,6 +376,13 @@ void nvme_complete_rq(struct request *req) } EXPORT_SYMBOL_GPL(nvme_complete_rq); +void nvme_complete_batch_req(struct request *req) +{ + nvme_cleanup_cmd(req); + nvme_end_req_zoned(req); +} +EXPORT_SYMBOL_GPL(nvme_complete_batch_req); + /* * Called to unwind from ->queue_rq on a failed command submission so that the * multipathing code gets called to potentially failover to another path. @@ -1029,12 +1040,12 @@ static void nvme_execute_rq_polled(struct request_queue *q, WARN_ON_ONCE(!test_bit(QUEUE_FLAG_POLL, &q->queue_flags)); - rq->cmd_flags |= REQ_HIPRI; + rq->cmd_flags |= REQ_POLLED; rq->end_io_data = &wait; blk_execute_rq_nowait(q, bd_disk, rq, at_head, nvme_end_sync_rq); while (!completion_done(&wait)) { - blk_poll(q, request_to_qc_t(rq->mq_hctx, rq), true); + bio_poll(rq->bio, NULL, 0); cond_resched(); } } diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index ab348dee23cf5863ec6b50608f755764e18c23e5..b988f10eaf3d30a0539cfd4a7d6735dc6d1f28f1 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -463,7 +463,7 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, blk_flags = BLK_MQ_REQ_NOWAIT; } if (issue_flags & IO_URING_F_IOPOLL) - rq_flags |= REQ_HIPRI; + rq_flags |= REQ_POLLED; retry: req = nvme_alloc_user_request(q, &c, nvme_to_user_ptr(d.addr), @@ -475,15 +475,15 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, return PTR_ERR(req); req->end_io_data = ioucmd; - if (issue_flags & IO_URING_F_IOPOLL && rq_flags & REQ_HIPRI) { + if (issue_flags & IO_URING_F_IOPOLL && rq_flags & REQ_POLLED) { if (unlikely(!req->bio)) { /* we can't poll this, so alloc regular req instead */ blk_mq_free_request(req); - rq_flags &= ~REQ_HIPRI; + rq_flags &= ~REQ_POLLED; goto retry; } else { WRITE_ONCE(ioucmd->cookie, req); - req->bio->bi_opf |= REQ_HIPRI; + req->bio->bi_opf |= REQ_POLLED; } } /* to free bio on completion, as req->bio will be null at that time */ @@ -633,7 +633,9 @@ int nvme_ns_chr_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags) return nvme_ns_uring_cmd(ns, ioucmd, issue_flags); } -int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd) +int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd, + struct io_comp_batch *iob, + unsigned int poll_flags) { struct request *req; int ret = 0; @@ -645,7 +647,7 @@ int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd) struct nvme_ns, cdev); q = ns->queue; if (test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) - ret = blk_poll(q, request_to_qc_t(req->mq_hctx, req), true); + ret = bio_poll(req->bio, iob, 0); return ret; } #ifdef CONFIG_NVME_MULTIPATH @@ -731,7 +733,9 @@ int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd, return ret; } -int nvme_ns_head_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd) +int nvme_ns_head_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd, + struct io_comp_batch *iob, + unsigned int poll_flags) { struct cdev *cdev = file_inode(ioucmd->file)->i_cdev; struct nvme_ns_head *head = container_of(cdev, struct nvme_ns_head, cdev); @@ -745,7 +749,7 @@ int nvme_ns_head_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd) req = READ_ONCE(ioucmd->cookie); q = ns->queue; if (test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) - ret = blk_poll(q, request_to_qc_t(req->mq_hctx, req), true); + ret = bio_poll(req->bio, iob, 0); } srcu_read_unlock(&head->srcu, srcu_idx); return ret; diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 029fbe30ae577e4a8e87638a77d210c2a0bd852d..3fd041305c62a40ba9d9ed6133186484d3cc78f0 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -293,12 +293,11 @@ static bool nvme_available_path(struct nvme_ns_head *head) return false; } -static blk_qc_t nvme_ns_head_submit_bio(struct bio *bio) +static void nvme_ns_head_submit_bio(struct bio *bio) { struct nvme_ns_head *head = bio->bi_disk->private_data; struct device *dev = disk_to_dev(head->disk); struct nvme_ns *ns; - blk_qc_t ret = BLK_QC_T_NONE; int srcu_idx; /* @@ -316,7 +315,7 @@ static blk_qc_t nvme_ns_head_submit_bio(struct bio *bio) trace_block_bio_remap(bio->bi_disk->queue, bio, disk_devt(ns->head->disk), bio->bi_iter.bi_sector); - ret = submit_bio_noacct(bio); + submit_bio_noacct(bio); } else if (nvme_available_path(head)) { dev_warn_ratelimited(dev, "no usable path - requeuing I/O\n"); @@ -331,7 +330,6 @@ static blk_qc_t nvme_ns_head_submit_bio(struct bio *bio) } srcu_read_unlock(&head->srcu, srcu_idx); - return ret; } static int nvme_ns_head_open(struct block_device *bdev, fmode_t mode) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 98668ed0b9cc7b094ceef4bd1436cdf854bbd244..e5185781d00a06cf74e67b1311fc0548d4c07b0e 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -684,6 +684,20 @@ static inline bool nvme_is_aen_req(u16 qid, __u16 command_id) } void nvme_complete_rq(struct request *req); +void nvme_complete_batch_req(struct request *req); + +static __always_inline void nvme_complete_batch(struct io_comp_batch *iob, + void (*fn)(struct request *rq)) +{ + struct request *req; + + rq_list_for_each(&iob->req_list, req) { + fn(req); + nvme_complete_batch_req(req); + } + blk_mq_end_request_batch(iob); +} + blk_status_t nvme_host_path_error(struct request *req); bool nvme_cancel_request(struct request *req, void *data, bool reserved); void nvme_cancel_tagset(struct nvme_ctrl *ctrl); @@ -781,8 +795,10 @@ long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg); long nvme_dev_ioctl(struct file *file, unsigned int cmd, unsigned long arg); -int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd); -int nvme_ns_head_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd); +int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd, + struct io_comp_batch *iob, unsigned int poll_flags); +int nvme_ns_head_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd, + struct io_comp_batch *iob, unsigned int poll_flags); int nvme_ns_chr_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags); int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd, diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 2fffb08385043f8967c9ab4c825ab999a14ccc84..8cbaf166e55d0e5799b936e45e6cdc638adad03d 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -971,7 +971,7 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx, return ret; } -static void nvme_pci_complete_rq(struct request *req) +static __always_inline void nvme_pci_unmap_rq(struct request *req) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); struct nvme_dev *dev = iod->nvmeq->dev; @@ -981,9 +981,19 @@ static void nvme_pci_complete_rq(struct request *req) rq_integrity_vec(req)->bv_len, rq_data_dir(req)); if (blk_rq_nr_phys_segments(req)) nvme_unmap_data(dev, req); +} + +static void nvme_pci_complete_rq(struct request *req) +{ + nvme_pci_unmap_rq(req); nvme_complete_rq(req); } +static void nvme_pci_complete_batch(struct io_comp_batch *iob) +{ + nvme_complete_batch(iob, nvme_pci_unmap_rq); +} + /* We read the CQE phase first to check if the rest of the entry is valid */ static inline bool nvme_cqe_pending(struct nvme_queue *nvmeq) { @@ -1008,7 +1018,8 @@ static inline struct blk_mq_tags *nvme_queue_tagset(struct nvme_queue *nvmeq) return nvmeq->dev->tagset.tags[nvmeq->qid - 1]; } -static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx) +static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, + struct io_comp_batch *iob, u16 idx) { struct nvme_completion *cqe = &nvmeq->cqes[idx]; __u16 command_id = READ_ONCE(cqe->command_id); @@ -1035,7 +1046,9 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx) } trace_nvme_sq(req, cqe->sq_head, nvmeq->sq_tail); - if (!nvme_try_complete_req(req, cqe->status, cqe->result)) + if (!nvme_try_complete_req(req, cqe->status, cqe->result) && + !blk_mq_add_to_batch(req, iob, nvme_req(req)->status, + nvme_pci_complete_batch)) nvme_pci_complete_rq(req); } @@ -1051,7 +1064,8 @@ static inline void nvme_update_cq_head(struct nvme_queue *nvmeq) } } -static inline int nvme_process_cq(struct nvme_queue *nvmeq) +static inline int nvme_poll_cq(struct nvme_queue *nvmeq, + struct io_comp_batch *iob) { int found = 0; @@ -1062,7 +1076,7 @@ static inline int nvme_process_cq(struct nvme_queue *nvmeq) * the cqe requires a full read memory barrier */ dma_rmb(); - nvme_handle_cqe(nvmeq, nvmeq->cq_head); + nvme_handle_cqe(nvmeq, iob, nvmeq->cq_head); nvme_update_cq_head(nvmeq); } @@ -1075,14 +1089,18 @@ static irqreturn_t nvme_irq(int irq, void *data) { struct nvme_queue *nvmeq = data; irqreturn_t ret = IRQ_NONE; + DEFINE_IO_COMP_BATCH(iob); /* * The rmb/wmb pair ensures we see all updates from a previous run of * the irq handler, even if that was on another CPU. */ rmb(); - if (nvme_process_cq(nvmeq)) + if (nvme_poll_cq(nvmeq, &iob)) { + if (!rq_list_empty(iob.req_list)) + nvme_pci_complete_batch(&iob); ret = IRQ_HANDLED; + } wmb(); return ret; @@ -1108,11 +1126,11 @@ static void nvme_poll_irqdisable(struct nvme_queue *nvmeq) WARN_ON_ONCE(test_bit(NVMEQ_POLLED, &nvmeq->flags)); disable_irq(pci_irq_vector(pdev, nvmeq->cq_vector)); - nvme_process_cq(nvmeq); + nvme_poll_cq(nvmeq, NULL); enable_irq(pci_irq_vector(pdev, nvmeq->cq_vector)); } -static int nvme_poll(struct blk_mq_hw_ctx *hctx) +static int nvme_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) { struct nvme_queue *nvmeq = hctx->driver_data; bool found; @@ -1121,7 +1139,7 @@ static int nvme_poll(struct blk_mq_hw_ctx *hctx) return 0; spin_lock(&nvmeq->cq_poll_lock); - found = nvme_process_cq(nvmeq); + found = nvme_poll_cq(nvmeq, iob); spin_unlock(&nvmeq->cq_poll_lock); return found; @@ -1298,7 +1316,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) * Did we miss an interrupt? */ if (test_bit(NVMEQ_POLLED, &nvmeq->flags)) - nvme_poll(req->mq_hctx); + nvme_poll(req->mq_hctx, NULL); else nvme_poll_irqdisable(nvmeq); @@ -1460,7 +1478,7 @@ static void nvme_reap_pending_cqes(struct nvme_dev *dev) for (i = dev->ctrl.queue_count - 1; i > 0; i--) { spin_lock(&dev->queues[i].cq_poll_lock); - nvme_process_cq(&dev->queues[i]); + nvme_poll_cq(&dev->queues[i], NULL); spin_unlock(&dev->queues[i].cq_poll_lock); } } diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 5fad331aed2ccffc09e586af773506ed7c8a3fd4..be245e61564e1e5be05b31641f27db5e88dddfa0 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -2146,7 +2146,7 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, return ret; } -static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx) +static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) { struct nvme_rdma_queue *queue = hctx->driver_data; diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 21f7d7f9661ba29497fdc5a6b0cca43089a2e01a..a2d6a4e0710f46513af35fac98e768114591ad34 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2522,7 +2522,7 @@ static int nvme_tcp_map_queues(struct blk_mq_tag_set *set) return 0; } -static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx) +static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) { struct nvme_tcp_queue *queue = hctx->driver_data; struct sock *sk = queue->sock->sk; diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 299e77ec2c416395515182a2ec983649cf0d4ca6..9301b37cf53f7a46d4132eea74896423fe1c2e42 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -31,7 +31,7 @@ static int dcssblk_open(struct block_device *bdev, fmode_t mode); static void dcssblk_release(struct gendisk *disk, fmode_t mode); -static blk_qc_t dcssblk_submit_bio(struct bio *bio); +static void dcssblk_submit_bio(struct bio *bio); static long dcssblk_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn); @@ -865,7 +865,7 @@ dcssblk_release(struct gendisk *disk, fmode_t mode) up_write(&dcssblk_devices_sem); } -static blk_qc_t +static void dcssblk_submit_bio(struct bio *bio) { struct dcssblk_dev_info *dev_info; @@ -923,10 +923,9 @@ dcssblk_submit_bio(struct bio *bio) bytes_done += bvec.bv_len; } bio_endio(bio); - return BLK_QC_T_NONE; + return; fail: bio_io_error(bio); - return BLK_QC_T_NONE; } static long diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c index c2536f7767b3664acef25e702e374aaca0349604..0a16f76afe8f403d3c1ac775562c14d6d99e8855 100644 --- a/drivers/s390/block/xpram.c +++ b/drivers/s390/block/xpram.c @@ -182,7 +182,7 @@ static unsigned long xpram_highest_page_index(void) /* * Block device make request function. */ -static blk_qc_t xpram_submit_bio(struct bio *bio) +static void xpram_submit_bio(struct bio *bio) { xpram_device_t *xdev = bio->bi_disk->private_data; struct bio_vec bvec; @@ -224,10 +224,9 @@ static blk_qc_t xpram_submit_bio(struct bio *bio) } } bio_endio(bio); - return BLK_QC_T_NONE; + return; fail: bio_io_error(bio); - return BLK_QC_T_NONE; } static int xpram_getgeo(struct block_device *bdev, struct hd_geometry *geo) diff --git a/fs/block_dev.c b/fs/block_dev.c index a4d92a4622f6a07c0a23b8af23323312be4a83aa..a8eca502e045ad7055f61e456afe2c83c393ccab 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -244,7 +244,6 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, bool should_dirty = false; struct bio bio; ssize_t ret; - blk_qc_t qc; if ((pos | iov_iter_alignment(iter)) & (bdev_logical_block_size(bdev) - 1)) @@ -285,13 +284,12 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, if (iocb->ki_flags & IOCB_HIPRI) bio_set_polled(&bio, iocb); - qc = submit_bio(&bio); + submit_bio(&bio); for (;;) { set_current_state(TASK_UNINTERRUPTIBLE); if (!READ_ONCE(bio.bi_private)) break; - if (!(iocb->ki_flags & IOCB_HIPRI) || - !blk_poll(bdev_get_queue(bdev), qc, true)) + if (!(iocb->ki_flags & IOCB_HIPRI) || !bio_poll(&bio, NULL, 0)) blk_io_schedule(); } __set_current_state(TASK_RUNNING); @@ -324,14 +322,6 @@ struct blkdev_dio { static struct bio_set blkdev_dio_pool; -static int blkdev_iopoll(struct kiocb *kiocb, bool wait) -{ - struct block_device *bdev = I_BDEV(kiocb->ki_filp->f_mapping->host); - struct request_queue *q = bdev_get_queue(bdev); - - return blk_poll(q, READ_ONCE(kiocb->ki_cookie), wait); -} - static void blkdev_bio_end_io(struct bio *bio) { struct blkdev_dio *dio = bio->bi_private; @@ -345,6 +335,8 @@ static void blkdev_bio_end_io(struct bio *bio) struct kiocb *iocb = dio->iocb; ssize_t ret; + WRITE_ONCE(iocb->private, NULL); + if (likely(!dio->bio.bi_status)) { ret = dio->size; iocb->ki_pos += ret; @@ -380,10 +372,9 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) struct blk_plug plug; struct blkdev_dio *dio; struct bio *bio; - bool is_poll = (iocb->ki_flags & IOCB_HIPRI) != 0; + bool do_poll = (iocb->ki_flags & IOCB_HIPRI); bool is_read = (iov_iter_rw(iter) == READ), is_sync; loff_t pos = iocb->ki_pos; - blk_qc_t qc = BLK_QC_T_NONE; int ret = 0; if ((pos | iov_iter_alignment(iter)) & @@ -409,7 +400,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) * Don't plug for HIPRI/polled IO, as those should go straight * to issue */ - if (!is_poll) + if (!(iocb->ki_flags & IOCB_HIPRI)) blk_start_plug(&plug); for (;;) { @@ -443,17 +434,13 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) nr_pages = iov_iter_npages(iter, BIO_MAX_PAGES); if (!nr_pages) { - bool polled = false; - - if (iocb->ki_flags & IOCB_HIPRI) { + if (do_poll) bio_set_polled(bio, iocb); - polled = true; - } - qc = submit_bio(bio); + submit_bio(bio); - if (polled) - WRITE_ONCE(iocb->ki_cookie, qc); + if (do_poll) + WRITE_ONCE(iocb->private, bio); break; } @@ -467,6 +454,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) bio_get(bio); dio->multi_bio = true; atomic_set(&dio->ref, 2); + do_poll = false; } else { atomic_inc(&dio->ref); } @@ -475,7 +463,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) bio = bio_alloc(GFP_KERNEL, nr_pages); } - if (!is_poll) + if (!(iocb->ki_flags & IOCB_HIPRI)) blk_finish_plug(&plug); if (!is_sync) @@ -486,8 +474,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) if (!READ_ONCE(dio->waiter)) break; - if (!(iocb->ki_flags & IOCB_HIPRI) || - !blk_poll(bdev_get_queue(bdev), qc, true)) + if (!do_poll || !bio_poll(bio, NULL, 0)) blk_io_schedule(); } __set_current_state(TASK_RUNNING); @@ -2075,7 +2062,7 @@ const struct file_operations def_blk_fops = { .llseek = block_llseek, .read_iter = blkdev_read_iter, .write_iter = blkdev_write_iter, - .iopoll = blkdev_iopoll, + .iopoll = iocb_bio_iopoll, .mmap = generic_file_mmap, .fsync = blkdev_fsync, .unlocked_ioctl = block_ioctl, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7782a447b3c45299a83343e8951b3e533582aa8f..40f277f201858e15e59c5e9010b4fc4675a2b552 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7863,7 +7863,7 @@ static struct btrfs_dio_private *btrfs_create_dio_private(struct bio *dio_bio, return dip; } -static blk_qc_t btrfs_submit_direct(const struct iomap_iter *iter, +static void btrfs_submit_direct(const struct iomap_iter *iter, struct bio *dio_bio, loff_t file_offset) { struct inode *inode = iter->inode; @@ -7892,7 +7892,7 @@ static blk_qc_t btrfs_submit_direct(const struct iomap_iter *iter, } dio_bio->bi_status = BLK_STS_RESOURCE; bio_endio(dio_bio); - return BLK_QC_T_NONE; + return; } if (!write && csum) { @@ -7968,12 +7968,11 @@ static blk_qc_t btrfs_submit_direct(const struct iomap_iter *iter, start_sector += clone_len >> 9; file_offset += clone_len; } while (submit_len > 0); - return BLK_QC_T_NONE; + return; out_err: dip->dio_bio->bi_status = status; btrfs_dio_private_put(dip); - return BLK_QC_T_NONE; } static ssize_t check_direct_IO(struct btrfs_fs_info *fs_info, diff --git a/fs/direct-io.c b/fs/direct-io.c index c64d4eb38995a7639eec0756111071d8a861830f..5fe6a16464c5e6ac096793ac217c74562cad4017 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -119,7 +119,6 @@ struct dio { int flags; /* doesn't change */ int op; int op_flags; - blk_qc_t bio_cookie; struct gendisk *bio_disk; struct inode *inode; loff_t i_size; /* i_size when submitted */ @@ -436,11 +435,10 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio) dio->bio_disk = bio->bi_disk; - if (sdio->submit_io) { + if (sdio->submit_io) sdio->submit_io(bio, dio->inode, sdio->logical_offset_in_bio); - dio->bio_cookie = BLK_QC_T_NONE; - } else - dio->bio_cookie = submit_bio(bio); + else + submit_bio(bio); sdio->bio = NULL; sdio->boundary = 0; @@ -479,9 +477,7 @@ static struct bio *dio_await_one(struct dio *dio) __set_current_state(TASK_UNINTERRUPTIBLE); dio->waiter = current; spin_unlock_irqrestore(&dio->bio_lock, flags); - if (!(dio->iocb->ki_flags & IOCB_HIPRI) || - !blk_poll(dio->bio_disk->queue, dio->bio_cookie, true)) - blk_io_schedule(); + blk_io_schedule(); /* wake up sets us TASK_RUNNING */ spin_lock_irqsave(&dio->bio_lock, flags); dio->waiter = NULL; @@ -1212,8 +1208,6 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, } else { dio->op = REQ_OP_READ; } - if (iocb->ki_flags & IOCB_HIPRI) - dio->op_flags |= REQ_HIPRI; /* * For AIO O_(D)SYNC writes we need to defer completions to a workqueue diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 19a8e55d6a3073d0df016d8c65e6667ad79d07d3..19221aec95f4d9fdc5adb83b7b2bfb8921c93776 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -901,7 +901,7 @@ const struct file_operations ext4_file_operations = { .llseek = ext4_llseek, .read_iter = ext4_file_read_iter, .write_iter = ext4_file_write_iter, - .iopoll = iomap_dio_iopoll, + .iopoll = iocb_bio_iopoll, .unlocked_ioctl = ext4_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ext4_compat_ioctl, diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 24ab28f02004b42e71e428724d51287700d70b93..c1391cb2015212a4f6f1d182907b4f149db9519f 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -1365,7 +1365,7 @@ const struct file_operations gfs2_file_fops = { .llseek = gfs2_llseek, .read_iter = gfs2_file_read_iter, .write_iter = gfs2_file_write_iter, - .iopoll = iomap_dio_iopoll, + .iopoll = iocb_bio_iopoll, .unlocked_ioctl = gfs2_ioctl, .compat_ioctl = gfs2_compat_ioctl, .mmap = gfs2_mmap, @@ -1398,7 +1398,7 @@ const struct file_operations gfs2_file_fops_nolock = { .llseek = gfs2_llseek, .read_iter = gfs2_file_read_iter, .write_iter = gfs2_file_write_iter, - .iopoll = iomap_dio_iopoll, + .iopoll = iocb_bio_iopoll, .unlocked_ioctl = gfs2_ioctl, .compat_ioctl = gfs2_compat_ioctl, .mmap = gfs2_mmap, diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index fd8548171261a469e7dd1db3f7613171baa6cf1c..f065df2626cbf5a391a186f1ccf335363a5ecf1b 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -38,8 +38,7 @@ struct iomap_dio { struct { struct iov_iter *iter; struct task_struct *waiter; - struct request_queue *last_queue; - blk_qc_t cookie; + struct bio *poll_bio; } submit; /* used for aio completion: */ @@ -49,29 +48,20 @@ struct iomap_dio { }; }; -int iomap_dio_iopoll(struct kiocb *kiocb, bool spin) -{ - struct request_queue *q = READ_ONCE(kiocb->private); - - if (!q) - return 0; - return blk_poll(q, READ_ONCE(kiocb->ki_cookie), spin); -} -EXPORT_SYMBOL_GPL(iomap_dio_iopoll); - static void iomap_dio_submit_bio(const struct iomap_iter *iter, struct iomap_dio *dio, struct bio *bio, loff_t pos) { atomic_inc(&dio->ref); - if (dio->iocb->ki_flags & IOCB_HIPRI) + if (dio->iocb->ki_flags & IOCB_HIPRI) { bio_set_polled(bio, dio->iocb); + dio->submit.poll_bio = bio; + } - dio->submit.last_queue = bdev_get_queue(iter->iomap.bdev); if (dio->dops && dio->dops->submit_io) - dio->submit.cookie = dio->dops->submit_io(iter, bio, pos); + dio->dops->submit_io(iter, bio, pos); else - dio->submit.cookie = submit_bio(bio); + submit_bio(bio); } ssize_t iomap_dio_complete(struct iomap_dio *dio) @@ -162,9 +152,11 @@ static void iomap_dio_bio_end_io(struct bio *bio) } else if (dio->flags & IOMAP_DIO_WRITE) { struct inode *inode = file_inode(dio->iocb->ki_filp); + WRITE_ONCE(dio->iocb->private, NULL); INIT_WORK(&dio->aio.work, iomap_dio_complete_work); queue_work(inode->i_sb->s_dio_done_wq, &dio->aio.work); } else { + WRITE_ONCE(dio->iocb->private, NULL); iomap_dio_complete_work(&dio->aio.work); } } @@ -254,6 +246,13 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter, goto out; } + /* + * We can only poll for single bio I/Os. + */ + if (need_zeroout || + ((dio->flags & IOMAP_DIO_WRITE) && pos >= i_size_read(inode))) + dio->iocb->ki_flags &= ~IOCB_HIPRI; + if (need_zeroout) { /* zero out from the start of the block to the write offset */ pad = pos & (fs_block_size - 1); @@ -307,6 +306,12 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter, copied += n; nr_pages = iov_iter_npages(dio->submit.iter, BIO_MAX_PAGES); + + /* + * We can only poll for single bio I/Os. + */ + if (nr_pages) + dio->iocb->ki_flags &= ~IOCB_HIPRI; iomap_dio_submit_bio(iter, dio, bio, pos); pos += n; } while (nr_pages); @@ -453,8 +458,7 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, dio->submit.iter = iter; dio->submit.waiter = current; - dio->submit.cookie = BLK_QC_T_NONE; - dio->submit.last_queue = NULL; + dio->submit.poll_bio = NULL; if (iov_iter_rw(iter) == READ) { if (iomi.pos >= dio->i_size) @@ -525,8 +529,15 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, inode_dio_begin(inode); blk_start_plug(&plug); - while ((ret = iomap_iter(&iomi, ops)) > 0) + while ((ret = iomap_iter(&iomi, ops)) > 0) { iomi.processed = iomap_dio_iter(&iomi, dio); + + /* + * We can only poll for single bio I/Os. + */ + iocb->ki_flags &= ~IOCB_HIPRI; + } + blk_finish_plug(&plug); /* @@ -552,8 +563,7 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, if (dio->flags & IOMAP_DIO_WRITE_FUA) dio->flags &= ~IOMAP_DIO_NEED_SYNC; - WRITE_ONCE(iocb->ki_cookie, dio->submit.cookie); - WRITE_ONCE(iocb->private, dio->submit.last_queue); + WRITE_ONCE(iocb->private, dio->submit.poll_bio); /* * We are about to drop our additional submission reference, which @@ -580,10 +590,8 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, if (!READ_ONCE(dio->submit.waiter)) break; - if (!(iocb->ki_flags & IOCB_HIPRI) || - !dio->submit.last_queue || - !blk_poll(dio->submit.last_queue, - dio->submit.cookie, true)) + if (!dio->submit.poll_bio || + !bio_poll(dio->submit.poll_bio, NULL, 0)) blk_io_schedule(); } __set_current_state(TASK_RUNNING); diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index a7bc5c923113a9ac8f77800af3450763b99aaf6b..f107cb2818c582a63c88b8357941b58bec75879c 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1451,7 +1451,7 @@ const struct file_operations xfs_file_operations = { .write_iter = xfs_file_write_iter, .splice_read = generic_file_splice_read, .splice_write = iter_file_splice_write, - .iopoll = iomap_dio_iopoll, + .iopoll = iocb_bio_iopoll, .unlocked_ioctl = xfs_file_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = xfs_file_compat_ioctl, diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index c49f784eb2541026e0d26609b51fe3c914faee24..1a3c710aabc30340f22bd431d14c1d35e2d91792 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -1178,7 +1178,7 @@ static const struct file_operations zonefs_file_operations = { .write_iter = zonefs_file_write_iter, .splice_read = generic_file_splice_read, .splice_write = iter_file_splice_write, - .iopoll = iomap_dio_iopoll, + .iopoll = iocb_bio_iopoll, }; static struct kmem_cache *zonefs_inode_cachep; diff --git a/include/linux/bio.h b/include/linux/bio.h index e68418798187d4a2e0a78d7792b79f8ee401de49..945e5e3d4d4faa33fb3f08bf4b1255a0458ecadf 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -435,7 +435,7 @@ static inline struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned int nr_iovecs) return bio_alloc_bioset(gfp_mask, nr_iovecs, NULL); } -extern blk_qc_t submit_bio(struct bio *); +void submit_bio(struct bio *bio); extern void bio_endio(struct bio *); @@ -837,7 +837,7 @@ static inline int bio_integrity_add_page(struct bio *bio, struct page *page, */ static inline void bio_set_polled(struct bio *bio, struct kiocb *kiocb) { - bio->bi_opf |= REQ_HIPRI; + bio->bi_opf |= REQ_POLLED; if (!is_sync_kiocb(kiocb)) bio->bi_opf |= REQ_NOWAIT; } diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 268aab7bb0430fc2e6b9257a5407e5bbdf2162bb..8b93579b167e09afebdd72bc9350159b80963db2 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -152,9 +152,9 @@ struct blk_mq_hw_ctx { /** @kobj: Kernel object for sysfs. */ struct kobject kobj; - /** @poll_considered: Count times blk_poll() was called. */ + /** @poll_considered: Count times blk_mq_poll() was called. */ unsigned long poll_considered; - /** @poll_invoked: Count how many requests blk_poll() polled. */ + /** @poll_invoked: Count how many requests blk_mq_poll() polled. */ unsigned long poll_invoked; /** @poll_success: Count how many polled requests were completed. */ unsigned long poll_success; @@ -345,7 +345,7 @@ struct blk_mq_ops { /** * @poll: Called to poll for completion of a specific tag. */ - int (*poll)(struct blk_mq_hw_ctx *); + int (*poll)(struct blk_mq_hw_ctx *, struct io_comp_batch *); /** * @complete: Mark the request as complete. @@ -538,6 +538,35 @@ static inline void blk_mq_set_request_complete(struct request *rq) void blk_mq_start_request(struct request *rq); void blk_mq_end_request(struct request *rq, blk_status_t error); void __blk_mq_end_request(struct request *rq, blk_status_t error); +void blk_mq_end_request_batch(struct io_comp_batch *ib); + +/* + * Only need start/end time stamping if we have iostat or + * blk stats enabled, or using an IO scheduler. + */ +static inline bool blk_mq_need_time_stamp(struct request *rq) +{ + return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS)) || rq->q->elevator; +} + +/* + * Batched completions only work when there is no I/O error and no special + * ->end_io handler. + */ +static inline bool blk_mq_add_to_batch(struct request *req, + struct io_comp_batch *iob, int ioerror, + void (*complete)(struct io_comp_batch *)) +{ + if (!iob || req->q->elevator || req->end_io || ioerror) + return false; + if (!iob->complete) + iob->complete = complete; + else if (iob->complete != complete) + return false; + iob->need_ts |= blk_mq_need_time_stamp(req); + rq_list_add(&iob->req_list, req); + return true; +} void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list); void blk_mq_kick_requeue_list(struct request_queue *q); @@ -619,22 +648,10 @@ static inline void *blk_mq_rq_to_pdu(struct request *rq) for ((i) = 0; (i) < (hctx)->nr_ctx && \ ({ ctx = (hctx)->ctxs[(i)]; 1; }); (i)++) -static inline blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, - struct request *rq) -{ - if (rq->tag != -1) - return rq->tag | (hctx->queue_num << BLK_QC_T_SHIFT); - - return rq->internal_tag | (hctx->queue_num << BLK_QC_T_SHIFT) | - BLK_QC_T_INTERNAL; -} - static inline void blk_mq_cleanup_rq(struct request *rq) { if (rq->q->mq_ops->cleanup_rq) rq->q->mq_ops->cleanup_rq(rq); } -blk_qc_t blk_mq_submit_bio(struct bio *bio); - #endif diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 8b2100d9f58119ee15d39258b25dd1dbae5df961..d76c0ea3b32394704f4353bf45f5e5eb31f9f234 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -204,6 +204,9 @@ static inline void bio_issue_init(struct bio_issue *issue, ((u64)size << BIO_ISSUE_SIZE_SHIFT)); } +typedef unsigned int blk_qc_t; +#define BLK_QC_T_NONE -1U + /* * main unit of I/O for the block layer and lower layers (ie drivers and * stacking drivers) @@ -224,8 +227,8 @@ struct bio { struct bvec_iter bi_iter; + blk_qc_t bi_cookie; bio_end_io_t *bi_end_io; - void *bi_private; #ifdef CONFIG_BLK_CGROUP /* @@ -451,7 +454,7 @@ enum req_flag_bits { /* command specific flags for REQ_OP_WRITE_ZEROES: */ __REQ_NOUNMAP, /* do not free blocks when zeroing */ - __REQ_HIPRI, + __REQ_POLLED, /* caller polls for completion using bio_poll */ /* for driver use */ __REQ_DRV, @@ -476,7 +479,7 @@ enum req_flag_bits { #define REQ_CGROUP_PUNT (1ULL << __REQ_CGROUP_PUNT) #define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP) -#define REQ_HIPRI (1ULL << __REQ_HIPRI) +#define REQ_POLLED (1ULL << __REQ_POLLED) #define REQ_DRV (1ULL << __REQ_DRV) #define REQ_SWAP (1ULL << __REQ_SWAP) @@ -564,31 +567,6 @@ static inline int op_stat_group(unsigned int op) return op_is_write(op); } -typedef unsigned int blk_qc_t; -#define BLK_QC_T_NONE -1U -#define BLK_QC_T_SHIFT 16 -#define BLK_QC_T_INTERNAL (1U << 31) - -static inline bool blk_qc_t_valid(blk_qc_t cookie) -{ - return cookie != BLK_QC_T_NONE; -} - -static inline unsigned int blk_qc_t_to_queue_num(blk_qc_t cookie) -{ - return (cookie & ~BLK_QC_T_INTERNAL) >> BLK_QC_T_SHIFT; -} - -static inline unsigned int blk_qc_t_to_tag(blk_qc_t cookie) -{ - return cookie & ((1u << BLK_QC_T_SHIFT) - 1); -} - -static inline bool blk_qc_t_is_internal(blk_qc_t cookie) -{ - return (cookie & BLK_QC_T_INTERNAL) != 0; -} - struct blk_rq_stat { u64 mean; u64 min; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0fbb2bee8f5175de6c78067990079be71e3ee933..c738b67897a59d6d269431d35d697f01780255f4 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -39,6 +39,7 @@ struct sg_io_hdr; struct bsg_job; struct blkcg_gq; struct blk_flush_queue; +struct kiocb; struct pr_ops; struct rq_qos; struct blk_queue_stats; @@ -151,7 +152,10 @@ struct request { struct bio *bio; struct bio *biotail; - struct list_head queuelist; + union { + struct list_head queuelist; + struct request *rq_next; + }; /* * The hash is used inside the scheduler, and killed once the @@ -942,7 +946,7 @@ static inline void rq_flush_dcache_pages(struct request *rq) extern int blk_register_queue(struct gendisk *disk); extern void blk_unregister_queue(struct gendisk *disk); -blk_qc_t submit_bio_noacct(struct bio *bio); +void submit_bio_noacct(struct bio *bio); extern void blk_rq_init(struct request_queue *q, struct request *rq); extern void blk_put_request(struct request *); extern struct request *blk_get_request(struct request_queue *, unsigned int op, @@ -990,7 +994,13 @@ extern const char *blk_op_str(unsigned int op); int blk_status_to_errno(blk_status_t status); blk_status_t errno_to_blk_status(int errno); -int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin); +/* only poll the hardware once, don't continue until a completion was found */ +#define BLK_POLL_ONESHOT (1 << 0) +/* do not sleep to wait for the expected completion time */ +#define BLK_POLL_NOSLEEP (1 << 1) +int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags); +int iocb_bio_iopoll(struct kiocb *kiocb, struct io_comp_batch *iob, + unsigned int flags); static inline struct request_queue *bdev_get_queue(struct block_device *bdev) { @@ -1276,11 +1286,17 @@ extern void blk_set_queue_dying(struct request_queue *); */ struct blk_plug { struct list_head mq_list; /* blk-mq requests */ - struct list_head cb_list; /* md requires an unplug callback */ + + /* if ios_left is > 1, we can batch tag/rq allocations */ + struct request *cached_rq; + unsigned short nr_ios; + unsigned short rq_count; bool multiple_queues; bool nowait; + struct list_head cb_list; /* md requires an unplug callback */ + CK_KABI_RESERVE(1) CK_KABI_RESERVE(2) CK_KABI_RESERVE(3) @@ -1297,6 +1313,7 @@ struct blk_plug_cb { extern struct blk_plug_cb *blk_check_plugged(blk_plug_cb_fn unplug, void *data, int size); extern void blk_start_plug(struct blk_plug *); +extern void blk_start_plug_nr_ios(struct blk_plug *, unsigned short); extern void blk_finish_plug(struct blk_plug *); extern void blk_flush_plug_list(struct blk_plug *, bool); @@ -1331,6 +1348,11 @@ long nr_blockdev_pages(void); struct blk_plug { }; +static inline void blk_start_plug_nr_ios(struct blk_plug *plug, + unsigned short nr_ios) +{ +} + static inline void blk_start_plug(struct blk_plug *plug) { } @@ -1905,7 +1927,7 @@ static inline void blk_ksm_unregister(struct request_queue *q) { } struct block_device_operations { - blk_qc_t (*submit_bio) (struct bio *bio); + void (*submit_bio)(struct bio *bio); int (*open) (struct block_device *, fmode_t); void (*release) (struct gendisk *, fmode_t); int (*rw_page)(struct block_device *, sector_t, struct page *, unsigned int); @@ -2096,4 +2118,41 @@ int fsync_bdev(struct block_device *bdev); struct super_block *freeze_bdev(struct block_device *bdev); int thaw_bdev(struct block_device *bdev, struct super_block *sb); +struct io_comp_batch { + struct request *req_list; + bool need_ts; + void (*complete)(struct io_comp_batch *); +}; + +#define DEFINE_IO_COMP_BATCH(name) struct io_comp_batch name = { } + +#define rq_list_add(listptr, rq) do { \ + (rq)->rq_next = *(listptr); \ + *(listptr) = rq; \ +} while (0) + +#define rq_list_pop(listptr) \ +({ \ + struct request *__req = NULL; \ + if ((listptr) && *(listptr)) { \ + __req = *(listptr); \ + *(listptr) = __req->rq_next; \ + } \ + __req; \ +}) + +#define rq_list_peek(listptr) \ +({ \ + struct request *__req = NULL; \ + if ((listptr) && *(listptr)) \ + __req = *(listptr); \ + __req; \ +}) + +#define rq_list_for_each(listptr, pos) \ + for (pos = rq_list_peek((listptr)); pos; pos = rq_list_next(pos)) \ + +#define rq_list_next(rq) (rq)->rq_next +#define rq_list_empty(list) ((list) == (struct request *) NULL) + #endif /* _LINUX_BLKDEV_H */ diff --git a/include/linux/bvec.h b/include/linux/bvec.h index 2efec10bf79213e4f87712f09965f9f4bfc7389f..7ae15a379d47d9727183c0a6e02aa34d3e51c9e2 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -43,7 +43,7 @@ struct bvec_iter { unsigned int bi_bvec_done; /* number of bytes completed in current bvec */ -}; +} __packed __aligned(4); struct bvec_iter_all { struct bio_vec bv; diff --git a/include/linux/fs.h b/include/linux/fs.h index d0e9620729b65373f7b4c807ee322b4ee83252aa..4d579c541bf26b1fd51cb3a08bb8bd8ebb18d8fc 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -47,6 +47,7 @@ struct backing_dev_info; struct bdi_writeback; struct bio; +struct io_comp_batch; struct export_operations; struct fiemap_extent_info; struct hd_geometry; @@ -332,11 +333,7 @@ struct kiocb { int ki_flags; u16 ki_hint; u16 ki_ioprio; /* See linux/ioprio.h */ - union { - unsigned int ki_cookie; /* for ->iopoll */ - struct wait_page_queue *ki_waitq; /* for async buffered IO */ - }; - + struct wait_page_queue *ki_waitq; /* for async buffered IO */ randomized_struct_fields_end }; @@ -1880,7 +1877,8 @@ struct file_operations { ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); ssize_t (*read_iter) (struct kiocb *, struct iov_iter *); ssize_t (*write_iter) (struct kiocb *, struct iov_iter *); - int (*iopoll)(struct kiocb *kiocb, bool spin); + int (*iopoll)(struct kiocb *kiocb, struct io_comp_batch *, + unsigned int flags); int (*iterate) (struct file *, struct dir_context *); int (*iterate_shared) (struct file *, struct dir_context *); __poll_t (*poll) (struct file *, struct poll_table_struct *); @@ -1914,7 +1912,8 @@ struct file_operations { loff_t len, unsigned int remap_flags); int (*fadvise)(struct file *, loff_t, loff_t, int); int (*uring_cmd)(struct io_uring_cmd *ioucmd, unsigned int issue_flags); - int (*uring_cmd_iopoll)(struct io_uring_cmd *ioucmd); + int (*uring_cmd_iopoll)(struct io_uring_cmd *ioucmd, struct io_comp_batch *, + unsigned int poll_flags); CK_KABI_DEPRECATE(bool, may_pollfree) CK_KABI_RESERVE(1) diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 1f6ba5f9c5564b874e78f35d0259976c4dab4f0b..4ce041cbfcb814e9e524df3fa1b4c77edc520660 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -324,8 +324,8 @@ int iomap_writepages(struct address_space *mapping, struct iomap_dio_ops { int (*end_io)(struct kiocb *iocb, ssize_t size, int error, unsigned flags); - blk_qc_t (*submit_io)(const struct iomap_iter *iter, struct bio *bio, - loff_t file_offset); + void (*submit_io)(const struct iomap_iter *iter, struct bio *bio, + loff_t file_offset); }; /* @@ -341,7 +341,6 @@ struct iomap_dio *__iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, const struct iomap_ops *ops, const struct iomap_dio_ops *dops, unsigned int dio_flags); ssize_t iomap_dio_complete(struct iomap_dio *dio); -int iomap_dio_iopoll(struct kiocb *kiocb, bool spin); #ifdef CONFIG_SWAP struct file; diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index 02a68ab78ca58b45da2d354e60bb08dd4c5f31e1..6ffbfbfd4acd5a56721d954a579db03238f38f73 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -490,6 +490,17 @@ void sbitmap_queue_min_shallow_depth(struct sbitmap_queue *sbq, void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, unsigned int cpu); +/** + * sbitmap_queue_clear_batch() - Free a batch of allocated bits + * &struct sbitmap_queue. + * @sbq: Bitmap to free from. + * @offset: offset for each tag in array + * @tags: array of tags + * @nr_tags: number of tags in array + */ +void sbitmap_queue_clear_batch(struct sbitmap_queue *sbq, int offset, + int *tags, int nr_tags); + static inline int sbq_index_inc(int index) { return (index + 1) & (SBQ_WAIT_QUEUES - 1); diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 7d9ce6d21492794c92048e23ae1bd70828f630d2..4a218a8dcedb9f46343d7660e6fb76983cb67265 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -314,6 +314,7 @@ struct io_submit_state { unsigned int free_reqs; bool plug_started; + unsigned short submit_nr; /* * Batch completion logic @@ -2636,14 +2637,16 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, long min) { struct io_kiocb *req, *tmp; + unsigned int poll_flags = BLK_POLL_NOSLEEP; + DEFINE_IO_COMP_BATCH(iob); LIST_HEAD(done); - bool spin; /* * Only spin for completions if we don't have multiple devices hanging * off our complete list, and we're under the requested amount. */ - spin = !ctx->poll_multi_queue && *nr_events < min; + if (ctx->poll_multi_queue || *nr_events >= min) + poll_flags |= BLK_POLL_ONESHOT; list_for_each_entry_safe(req, tmp, &ctx->iopoll_list, inflight_entry) { struct kiocb *kiocb = &req->rw.kiocb; @@ -2664,19 +2667,23 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, if (req->opcode == IORING_OP_URING_CMD) { struct io_uring_cmd *ioucmd = &req->uring_cmd; - ret = req->file->f_op->uring_cmd_iopoll(ioucmd); + ret = req->file->f_op->uring_cmd_iopoll(ioucmd, &iob, + poll_flags); } else - ret = kiocb->ki_filp->f_op->iopoll(kiocb, spin); + ret = kiocb->ki_filp->f_op->iopoll(kiocb, &iob, poll_flags); if (unlikely(ret < 0)) return ret; else if (ret) - spin = false; + poll_flags |= BLK_POLL_ONESHOT; /* iopoll may have completed current req */ - if (READ_ONCE(req->iopoll_completed)) + if (!rq_list_empty(iob.req_list) || + READ_ONCE(req->iopoll_completed)) list_move_tail(&req->inflight_entry, &done); } + if (!rq_list_empty(iob.req_list)) + iob.complete(&iob); if (!list_empty(&done)) io_iopoll_complete(ctx, nr_events, &done); @@ -2958,19 +2965,12 @@ static void io_iopoll_req_issued(struct io_kiocb *req) ctx->poll_multi_queue = false; } else if (!ctx->poll_multi_queue) { struct io_kiocb *list_req; - unsigned int queue_num0, queue_num1; list_req = list_first_entry(&ctx->iopoll_list, struct io_kiocb, inflight_entry); - if (list_req->file != req->file) { + if (list_req->file != req->file) ctx->poll_multi_queue = true; - } else { - queue_num0 = blk_qc_t_to_queue_num(list_req->rw.kiocb.ki_cookie); - queue_num1 = blk_qc_t_to_queue_num(req->rw.kiocb.ki_cookie); - if (queue_num0 != queue_num1) - ctx->poll_multi_queue = true; - } } /* @@ -3094,6 +3094,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, !kiocb->ki_filp->f_op->iopoll) return -EOPNOTSUPP; + kiocb->private = NULL; kiocb->ki_flags |= IOCB_HIPRI; kiocb->ki_complete = io_complete_rw_iopoll; req->iopoll_completed = 0; @@ -7417,7 +7418,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, */ if (!state->plug_started && state->ios_left > 1 && io_op_defs[req->opcode].plug) { - blk_start_plug(&state->plug); + blk_start_plug_nr_ios(&state->plug, state->submit_nr); state->plug_started = true; } @@ -7537,6 +7538,7 @@ static void io_submit_state_start(struct io_submit_state *state, { state->plug_started = false; state->ios_left = max_ios; + state->submit_nr = max_ios; /* set only head, no need to init link_last in advance */ state->link.head = NULL; } diff --git a/lib/sbitmap.c b/lib/sbitmap.c index 267aa7709416d61343fdbba05788625617786c62..d30ad425f61782bbfb6836ac3ccfdf850890d9e6 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -559,6 +559,47 @@ void sbitmap_queue_wake_up(struct sbitmap_queue *sbq) } EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up); +static inline void sbitmap_update_cpu_hint(struct sbitmap_queue *sbq, int cpu, + int tag) +{ + if (likely(!sbq->round_robin && tag < sbq->sb.depth)) + *per_cpu_ptr(sbq->alloc_hint, cpu) = tag; +} + +void sbitmap_queue_clear_batch(struct sbitmap_queue *sbq, int offset, + int *tags, int nr_tags) +{ + struct sbitmap *sb = &sbq->sb; + unsigned long *addr = NULL; + unsigned long mask = 0; + int i; + + smp_mb__before_atomic(); + for (i = 0; i < nr_tags; i++) { + const int tag = tags[i] - offset; + unsigned long *this_addr; + + /* since we're clearing a batch, skip the deferred map */ + this_addr = &sb->map[SB_NR_TO_INDEX(sb, tag)].word; + if (!addr) { + addr = this_addr; + } else if (addr != this_addr) { + atomic_long_andnot(mask, (atomic_long_t *) addr); + mask = 0; + addr = this_addr; + } + mask |= (1UL << SB_NR_TO_BIT(sb, tag)); + } + + if (mask) + atomic_long_andnot(mask, (atomic_long_t *) addr); + + smp_mb__after_atomic(); + sbitmap_queue_wake_up(sbq); + sbitmap_update_cpu_hint(sbq, raw_smp_processor_id(), + tags[nr_tags - 1] - offset); +} + void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, unsigned int cpu) { @@ -584,8 +625,7 @@ void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, smp_mb__after_atomic(); sbitmap_queue_wake_up(sbq); - if (likely(!sbq->round_robin && nr < sbq->sb.depth)) - *per_cpu_ptr(sbq->alloc_hint, cpu) = nr; + sbitmap_update_cpu_hint(sbq, cpu, nr); } EXPORT_SYMBOL_GPL(sbitmap_queue_clear); diff --git a/mm/page_io.c b/mm/page_io.c index f0ada4455895c7772fdbf2e588ce12f31810f021..d27db97c31f1192c7fdabeab6f491b5636a2dec2 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -326,8 +326,6 @@ int swap_readpage(struct page *page, bool synchronous) struct bio *bio; int ret = 0; struct swap_info_struct *sis = page_swap_info(page); - blk_qc_t qc; - struct gendisk *disk; unsigned long pflags; VM_BUG_ON_PAGE(!PageSwapCache(page) && !synchronous, page); @@ -372,26 +370,25 @@ int swap_readpage(struct page *page, bool synchronous) ret = -ENOMEM; goto out; } - disk = bio->bi_disk; /* * Keep this task valid during swap readpage because the oom killer may * attempt to access it in the page fault retry time check. */ bio_set_op_attrs(bio, REQ_OP_READ, 0); if (synchronous) { - bio->bi_opf |= REQ_HIPRI; + bio->bi_opf |= REQ_POLLED; get_task_struct(current); bio->bi_private = current; } count_vm_event(PSWPIN); bio_get(bio); - qc = submit_bio(bio); + submit_bio(bio); while (synchronous) { set_current_state(TASK_UNINTERRUPTIBLE); if (!READ_ONCE(bio->bi_private)) break; - if (!blk_poll(disk->queue, qc, true)) + if (!bio_poll(bio, NULL, 0)) blk_io_schedule(); } __set_current_state(TASK_RUNNING);