diff --git a/block/bio-integrity.c b/block/bio-integrity.c index a5fd04db5ae8e2aec1a5477de74271a5c3f28eb6..7ce634cffeb5888e978b7b06c86eceed033c859b 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -14,8 +14,6 @@ #include #include "blk.h" -#define BIP_INLINE_VECS 4 - static struct kmem_cache *bip_slab; static struct workqueue_struct *kintegrityd_wq; @@ -30,7 +28,7 @@ static void __bio_integrity_free(struct bio_set *bs, if (bs && mempool_initialized(&bs->bio_integrity_pool)) { if (bip->bip_vec) bvec_free(&bs->bvec_integrity_pool, bip->bip_vec, - bip->bip_slab); + bip->bip_max_vcnt); mempool_free(bip, &bs->bio_integrity_pool); } else { kfree(bip); @@ -63,7 +61,7 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, inline_vecs = nr_vecs; } else { bip = mempool_alloc(&bs->bio_integrity_pool, gfp_mask); - inline_vecs = BIP_INLINE_VECS; + inline_vecs = BIO_INLINE_VECS; } if (unlikely(!bip)) @@ -72,14 +70,11 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, memset(bip, 0, sizeof(*bip)); if (nr_vecs > inline_vecs) { - unsigned long idx = 0; - - bip->bip_vec = bvec_alloc(gfp_mask, nr_vecs, &idx, - &bs->bvec_integrity_pool); + bip->bip_max_vcnt = nr_vecs; + bip->bip_vec = bvec_alloc(&bs->bvec_integrity_pool, + &bip->bip_max_vcnt, gfp_mask); if (!bip->bip_vec) goto err; - bip->bip_max_vcnt = bvec_nr_vecs(idx); - bip->bip_slab = idx; } else { bip->bip_vec = bip->bip_inline_vecs; bip->bip_max_vcnt = inline_vecs; @@ -480,6 +475,6 @@ void __init bio_integrity_init(void) bip_slab = kmem_cache_create("bio_integrity_payload", sizeof(struct bio_integrity_payload) + - sizeof(struct bio_vec) * BIP_INLINE_VECS, + sizeof(struct bio_vec) * BIO_INLINE_VECS, 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); } diff --git a/block/bio.c b/block/bio.c index 43bf0920aa32d8af5a4888fdb8cc121cead636d9..8168636d7ad31ae689b3bb65da3d2c6f7de2536f 100644 --- a/block/bio.c +++ b/block/bio.c @@ -24,22 +24,34 @@ #include "blk.h" #include "blk-rq-qos.h" -/* - * Test patch to inline a certain number of bi_io_vec's inside the bio - * itself, to shrink a bio data allocation from two mempool calls to one - */ -#define BIO_INLINE_VECS 4 - -/* - * if you change this list, also change bvec_alloc or things will - * break badly! cannot be bigger than what you can fit into an - * unsigned short - */ -#define BV(x, n) { .nr_vecs = x, .name = "biovec-"#n } -static struct biovec_slab bvec_slabs[BVEC_POOL_NR] __read_mostly = { - BV(1, 1), BV(4, 4), BV(16, 16), BV(64, 64), BV(128, 128), BV(BIO_MAX_PAGES, max), +static struct biovec_slab { + int nr_vecs; + char *name; + struct kmem_cache *slab; +} bvec_slabs[] __read_mostly = { + { .nr_vecs = 16, .name = "biovec-16" }, + { .nr_vecs = 64, .name = "biovec-64" }, + { .nr_vecs = 128, .name = "biovec-128" }, + { .nr_vecs = BIO_MAX_PAGES, .name = "biovec-max" }, }; -#undef BV + +static struct biovec_slab *biovec_slab(unsigned short nr_vecs) +{ + switch (nr_vecs) { + /* smaller bios use inline vecs */ + case 5 ... 16: + return &bvec_slabs[0]; + case 17 ... 64: + return &bvec_slabs[1]; + case 65 ... 128: + return &bvec_slabs[2]; + case 129 ... BIO_MAX_PAGES: + return &bvec_slabs[3]; + default: + BUG(); + return NULL; + } +} /* * fs_bio_set is the bio_set containing bio and iovec memory pools used by @@ -146,90 +158,55 @@ static void bio_put_slab(struct bio_set *bs) mutex_unlock(&bio_slab_lock); } -unsigned int bvec_nr_vecs(unsigned short idx) +void bvec_free(mempool_t *pool, struct bio_vec *bv, unsigned short nr_vecs) { - return bvec_slabs[--idx].nr_vecs; -} - -void bvec_free(mempool_t *pool, struct bio_vec *bv, unsigned int idx) -{ - if (!idx) - return; - idx--; + BIO_BUG_ON(nr_vecs > BIO_MAX_PAGES); - BIO_BUG_ON(idx >= BVEC_POOL_NR); - - if (idx == BVEC_POOL_MAX) { + if (nr_vecs == BIO_MAX_PAGES) mempool_free(bv, pool); - } else { - struct biovec_slab *bvs = bvec_slabs + idx; + else if (nr_vecs > BIO_INLINE_VECS) + kmem_cache_free(biovec_slab(nr_vecs)->slab, bv); +} - kmem_cache_free(bvs->slab, bv); - } +/* + * Make the first allocation restricted and don't dump info on allocation + * failures, since we'll fall back to the mempool in case of failure. + */ +static inline gfp_t bvec_alloc_gfp(gfp_t gfp) +{ + return (gfp & ~(__GFP_DIRECT_RECLAIM | __GFP_IO)) | + __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN; } -struct bio_vec *bvec_alloc(gfp_t gfp_mask, int nr, unsigned long *idx, - mempool_t *pool) +struct bio_vec *bvec_alloc(mempool_t *pool, unsigned short *nr_vecs, + gfp_t gfp_mask) { - struct bio_vec *bvl; + struct biovec_slab *bvs = biovec_slab(*nr_vecs); - /* - * see comment near bvec_array define! - */ - switch (nr) { - case 1: - *idx = 0; - break; - case 2 ... 4: - *idx = 1; - break; - case 5 ... 16: - *idx = 2; - break; - case 17 ... 64: - *idx = 3; - break; - case 65 ... 128: - *idx = 4; - break; - case 129 ... BIO_MAX_PAGES: - *idx = 5; - break; - default: + if (WARN_ON_ONCE(!bvs)) return NULL; - } /* - * idx now points to the pool we want to allocate from. only the - * 1-vec entry pool is mempool backed. + * Upgrade the nr_vecs request to take full advantage of the allocation. + * We also rely on this in the bvec_free path. */ - if (*idx == BVEC_POOL_MAX) { -fallback: - bvl = mempool_alloc(pool, gfp_mask); - } else { - struct biovec_slab *bvs = bvec_slabs + *idx; - gfp_t __gfp_mask = gfp_mask & ~(__GFP_DIRECT_RECLAIM | __GFP_IO); + *nr_vecs = bvs->nr_vecs; - /* - * Make this allocation restricted and don't dump info on - * allocation failures, since we'll fallback to the mempool - * in case of failure. - */ - __gfp_mask |= __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN; + /* + * Try a slab allocation first for all smaller allocations. If that + * fails and __GFP_DIRECT_RECLAIM is set retry with the mempool. + * The mempool is sized to handle up to BIO_MAX_PAGES entries. + */ + if (*nr_vecs < BIO_MAX_PAGES) { + struct bio_vec *bvl; - /* - * Try a slab allocation. If this fails and __GFP_DIRECT_RECLAIM - * is set, retry with the 1-entry mempool - */ - bvl = kmem_cache_alloc(bvs->slab, __gfp_mask); - if (unlikely(!bvl && (gfp_mask & __GFP_DIRECT_RECLAIM))) { - *idx = BVEC_POOL_MAX; - goto fallback; - } + bvl = kmem_cache_alloc(bvs->slab, bvec_alloc_gfp(gfp_mask)); + if (likely(bvl) || !(gfp_mask & __GFP_DIRECT_RECLAIM)) + return bvl; + *nr_vecs = BIO_MAX_PAGES; } - (*idx)++; - return bvl; + return mempool_alloc(pool, gfp_mask); } void bio_uninit(struct bio *bio) @@ -255,7 +232,7 @@ static void bio_free(struct bio *bio) bio_uninit(bio); if (bs) { - bvec_free(&bs->bvec_pool, bio->bi_io_vec, BVEC_POOL_IDX(bio)); + bvec_free(&bs->bvec_pool, bio->bi_io_vec, bio->bi_max_vecs); /* * If we have front padding, adjust the bio pointer before freeing @@ -299,12 +276,8 @@ EXPORT_SYMBOL(bio_init); */ void bio_reset(struct bio *bio) { - unsigned long flags = bio->bi_flags & (~0UL << BIO_RESET_BITS); - bio_uninit(bio); - memset(bio, 0, BIO_RESET_BYTES); - bio->bi_flags = flags; atomic_set(&bio->__bi_remaining, 1); } EXPORT_SYMBOL(bio_reset); @@ -434,7 +407,7 @@ static void punt_bios_to_rescuer(struct bio_set *bs) * RETURNS: * Pointer to new bio on success, NULL on failure. */ -struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned int nr_iovecs, +struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned int nr_iovecs_int, struct bio_set *bs) { gfp_t saved_gfp = gfp_mask; @@ -443,6 +416,7 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned int nr_iovecs, struct bio_vec *bvl = NULL; struct bio *bio; void *p; + unsigned short nr_iovecs = (unsigned short)nr_iovecs_int; if (!bs) { if (nr_iovecs > UIO_MAXIOV) @@ -501,19 +475,17 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned int nr_iovecs, bio_init(bio, NULL, 0); if (nr_iovecs > inline_vecs) { - unsigned long idx = 0; - bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, &bs->bvec_pool); + bvl = bvec_alloc(&bs->bvec_pool, &nr_iovecs, gfp_mask); if (!bvl && gfp_mask != saved_gfp) { punt_bios_to_rescuer(bs); gfp_mask = saved_gfp; - bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, &bs->bvec_pool); + bvl = bvec_alloc(&bs->bvec_pool, &nr_iovecs, gfp_mask); } if (unlikely(!bvl)) goto err_free; - bio->bi_flags |= idx << BVEC_POOL_OFFSET; } else if (nr_iovecs) { bvl = bio->bi_inline_vecs; } @@ -674,7 +646,7 @@ EXPORT_SYMBOL(bio_put); */ void __bio_clone_fast(struct bio *bio, struct bio *bio_src) { - BUG_ON(bio->bi_pool && BVEC_POOL_IDX(bio)); + WARN_ON_ONCE(bio->bi_pool && bio->bi_max_vecs); /* * most users will be overriding ->bi_disk with a new target, @@ -1133,8 +1105,10 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) } } while (!ret && iov_iter_count(iter) && !bio_full(bio, 0)); - if (is_bvec) + if (is_bvec) { bio_set_flag(bio, BIO_NO_PAGE_REF); + bio_set_flag(bio, BIO_CLONED); + } /* don't account direct I/O as memory stall */ bio_clear_flag(bio, BIO_WORKINGSET); @@ -1436,8 +1410,7 @@ void bio_endio(struct bio *bio) if (!bio_integrity_endio(bio)) return; - if (bio->bi_disk && bio_flagged(bio, BIO_TRACKED)) - rq_qos_done_bio(bio->bi_disk->queue, bio); + rq_qos_done_bio(bio); /* * Need to have a real endio function for chained bios, otherwise @@ -1540,7 +1513,7 @@ EXPORT_SYMBOL_GPL(bio_trim); */ int biovec_init_pool(mempool_t *pool, int pool_entries) { - struct biovec_slab *bp = bvec_slabs + BVEC_POOL_MAX; + struct biovec_slab *bp = bvec_slabs + ARRAY_SIZE(bvec_slabs) - 1; return mempool_init_slab_pool(pool, pool_entries, bp->slab); } @@ -1644,39 +1617,28 @@ int bioset_init_from_src(struct bio_set *bs, struct bio_set *src) } EXPORT_SYMBOL(bioset_init_from_src); -static void __init biovec_init_slabs(void) -{ - int i; - - for (i = 0; i < BVEC_POOL_NR; i++) { - int size; - struct biovec_slab *bvs = bvec_slabs + i; - - if (bvs->nr_vecs <= BIO_INLINE_VECS) { - bvs->slab = NULL; - continue; - } - - size = bvs->nr_vecs * sizeof(struct bio_vec); - bvs->slab = kmem_cache_create(bvs->name, size, 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); - } -} - static int __init init_bio(void) { + int i; bio_slab_max = 2; bio_slab_nr = 0; bio_slabs = kcalloc(bio_slab_max, sizeof(struct bio_slab), GFP_KERNEL); - BUILD_BUG_ON(BIO_FLAG_LAST > BVEC_POOL_OFFSET); - if (!bio_slabs) panic("bio: can't allocate bios\n"); + BUILD_BUG_ON(BIO_FLAG_LAST > 8 * sizeof_field(struct bio, bi_flags)); + bio_integrity_init(); - biovec_init_slabs(); + + for (i = 0; i < ARRAY_SIZE(bvec_slabs); i++) { + struct biovec_slab *bvs = bvec_slabs + i; + + bvs->slab = kmem_cache_create(bvs->name, + bvs->nr_vecs * sizeof(struct bio_vec), 0, + SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); + } if (bioset_init(&fs_bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS)) panic("bio: can't allocate bios\n"); diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index 9811ee74b69f51253000091a912e6df4b9641c48..3e42f7d845882e409601261c89e925e6b417b0de 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -601,7 +601,7 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio) int inflight = 0; blkg = bio->bi_blkg; - if (!blkg || !bio_flagged(bio, BIO_TRACKED)) + if (!blkg || !bio_flagged(bio, BIO_QOS_THROTTLED)) return; iolat = blkg_to_lat(bio->bi_blkg); diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h index 37c59d7d6ba7f2333ed37ca7d2653173c79d7945..31e54f84ac89df5bd348273543d58690c1168239 100644 --- a/block/blk-rq-qos.h +++ b/block/blk-rq-qos.h @@ -198,21 +198,22 @@ static inline void rq_qos_requeue(struct request_queue *q, struct request *rq) __rq_qos_requeue(q->rq_qos, rq); } -static inline void rq_qos_done_bio(struct request_queue *q, struct bio *bio) +static inline void rq_qos_done_bio(struct bio *bio) { - if (q->rq_qos) - __rq_qos_done_bio(q->rq_qos, bio); + if (bio->bi_disk && (bio_flagged(bio, BIO_QOS_THROTTLED) || + bio_flagged(bio, BIO_QOS_MERGED))) { + struct request_queue *q = bio->bi_disk->queue; + if (q->rq_qos) + __rq_qos_done_bio(q->rq_qos, bio); + } } static inline void rq_qos_throttle(struct request_queue *q, struct bio *bio) { - /* - * BIO_TRACKED lets controllers know that a bio went through the - * normal rq_qos path. - */ - bio_set_flag(bio, BIO_TRACKED); - if (q->rq_qos) + if (q->rq_qos) { + bio_set_flag(bio, BIO_QOS_THROTTLED); __rq_qos_throttle(q->rq_qos, bio); + } } static inline void rq_qos_track(struct request_queue *q, struct request *rq, @@ -225,8 +226,10 @@ static inline void rq_qos_track(struct request_queue *q, struct request *rq, static inline void rq_qos_merge(struct request_queue *q, struct request *rq, struct bio *bio) { - if (q->rq_qos) + if (q->rq_qos) { + bio_set_flag(bio, BIO_QOS_MERGED); __rq_qos_merge(q->rq_qos, rq, bio); + } } static inline void rq_qos_queue_depth_changed(struct request_queue *q) diff --git a/block/blk.h b/block/blk.h index c86d27d80ba0eca94d75650a02e8be662118ec92..ca40bcb0abd5e5bcc1a729fc8e593ad38276811e 100644 --- a/block/blk.h +++ b/block/blk.h @@ -52,6 +52,7 @@ void blk_free_flush_queue(struct blk_flush_queue *q); void blk_freeze_queue(struct request_queue *q); +#define BIO_INLINE_VECS 4 static inline bool biovec_phys_mergeable(struct request_queue *q, struct bio_vec *vec1, struct bio_vec *vec2) { diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 0162dbe17cf3e22e7db7f87b403f8f3d3f5e3d96..bd99296c7b6ac89308047a9724a6c0cc3f99b7cf 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -4554,10 +4554,6 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, read_bio->bi_private = r10_bio; read_bio->bi_end_io = end_reshape_read; bio_set_op_attrs(read_bio, REQ_OP_READ, 0); - read_bio->bi_flags &= (~0UL << BIO_RESET_BITS); - read_bio->bi_status = 0; - read_bio->bi_vcnt = 0; - read_bio->bi_iter.bi_size = 0; r10_bio->master_bio = read_bio; r10_bio->read_slot = r10_bio->devs[r10_bio->read_slot].devnum; diff --git a/include/linux/bio.h b/include/linux/bio.h index e895c84be0bc49859ff2cace78f6c32add149a3d..85bb3e5b313dc063b6c096d0a588454020b14f5f 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -312,7 +312,7 @@ struct bio_integrity_payload { struct bvec_iter bip_iter; - unsigned short bip_slab; /* slab the bip came from */ + KABI_DEPRECATE(unsigned short, bip_slab) unsigned short bip_vcnt; /* # of integrity bio_vecs */ unsigned short bip_max_vcnt; /* integrity bio_vec slots */ unsigned short bip_flags; /* control flags */ @@ -466,8 +466,9 @@ static inline void zero_fill_bio(struct bio *bio) zero_fill_bio_iter(bio, bio->bi_iter); } -extern struct bio_vec *bvec_alloc(gfp_t, int, unsigned long *, mempool_t *); -extern void bvec_free(mempool_t *, struct bio_vec *, unsigned int); +extern struct bio_vec *bvec_alloc(mempool_t *pool, unsigned short *nr_vecs, + gfp_t gfp_mask); +extern void bvec_free(mempool_t *pool, struct bio_vec *bv, unsigned short nr_vecs); extern unsigned int bvec_nr_vecs(unsigned short idx); extern const char *bio_devname(struct bio *bio, char *buffer); @@ -706,12 +707,6 @@ struct bio_set { KABI_RESERVE(4) }; -struct biovec_slab { - int nr_vecs; - char *name; - struct kmem_cache *slab; -}; - static inline bool bioset_initialized(struct bio_set *bs) { return bs->bio_slab != NULL; diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index b49d97547009113b1f812ce3178deae39fe03171..adc411d427ff01feb174ce2e49bd292202d6039d 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -223,7 +223,7 @@ struct bio { * top bits REQ_OP. Use * accessors. */ - unsigned short bi_flags; /* status, etc and bvec pool number */ + unsigned short bi_flags; /* BIO_* below */ unsigned short bi_ioprio; unsigned short bi_write_hint; blk_status_t bi_status; @@ -305,38 +305,12 @@ enum { BIO_TRACE_COMPLETION, /* bio_endio() should trace the final completion * of this bio. */ BIO_CGROUP_ACCT, /* has been accounted to a cgroup */ - BIO_TRACKED, /* set if bio goes through the rq_qos path */ + BIO_QOS_THROTTLED, /* bio went through rq_qos throttle path */ + BIO_QOS_MERGED, /* but went through rq_qos merge path */ BIO_ASYNC, /* has been dispatched asynchronously */ BIO_FLAG_LAST }; -/* See BVEC_POOL_OFFSET below before adding new flags */ - -/* - * We support 6 different bvec pools, the last one is magic in that it - * is backed by a mempool. - */ -#define BVEC_POOL_NR 6 -#define BVEC_POOL_MAX (BVEC_POOL_NR - 1) - -/* - * Top 3 bits of bio flags indicate the pool the bvecs came from. We add - * 1 to the actual index so that 0 indicates that there are no bvecs to be - * freed. - */ -#define BVEC_POOL_BITS (3) -#define BVEC_POOL_OFFSET (16 - BVEC_POOL_BITS) -#define BVEC_POOL_IDX(bio) ((bio)->bi_flags >> BVEC_POOL_OFFSET) -#if (1<< BVEC_POOL_BITS) < (BVEC_POOL_NR+1) -# error "BVEC_POOL_BITS is too small" -#endif - -/* - * Flags starting here get preserved by bio_reset() - this includes - * only BVEC_POOL_IDX() - */ -#define BIO_RESET_BITS BVEC_POOL_OFFSET - typedef __u32 __bitwise blk_mq_req_flags_t; /*