From d6fc303db2288c62f7564f13388dccc2e3add80a Mon Sep 17 00:00:00 2001 From: Ferry Meng Date: Wed, 12 Mar 2025 20:40:23 +0800 Subject: [PATCH 01/17] anolis: virtio_ring: introduce dma map page api for virtqueue ANBZ: #19447 Wrapping new API "virtqueue_dma_map_page_attrs" above dma_map_page_attrs, which checks vq use dma_address or not. Signed-off-by: Jingbo Xu Signed-off-by: Ferry Meng --- drivers/virtio/virtio_ring.c | 53 ++++++++++++++++++++++++++++++++++++ include/linux/virtio.h | 5 ++++ 2 files changed, 58 insertions(+) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 4fb8b3e255d8..cc60218c56fb 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2989,6 +2989,59 @@ void virtqueue_dma_unmap_single_attrs(struct virtqueue *_vq, dma_addr_t addr, } EXPORT_SYMBOL_GPL(virtqueue_dma_unmap_single_attrs); +/** + * virtqueue_dma_map_page_attrs - map DMA for _vq + * @_vq: the struct virtqueue we're talking about. + * @page: the page descriptor of the buffer to do dma + * @offset: the offset of the buffer to do dma inside the page + * @size: the size of the buffer to do dma + * @dir: DMA direction + * @attrs: DMA Attrs + * + * The caller calls this to do dma mapping in advance. The DMA address can be + * passed to this _vq when it is in pre-mapped mode. + * + * return DMA address. Caller should check that by virtqueue_dma_mapping_error(). + */ +dma_addr_t virtqueue_dma_map_page_attrs(struct virtqueue *_vq, struct page *page, + size_t offset, size_t size, + enum dma_data_direction dir, + unsigned long attrs) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + + if (!vq->use_dma_api) + return (dma_addr_t)(page_to_phys(page) + offset); + + return dma_map_page_attrs(vring_dma_dev(vq), page, offset, + size, dir, attrs); +} +EXPORT_SYMBOL_GPL(virtqueue_dma_map_page_attrs); + +/** + * virtqueue_dma_unmap_page_attrs - unmap DMA for _vq + * @_vq: the struct virtqueue we're talking about. + * @addr: the dma address to unmap + * @size: the size of the buffer + * @dir: DMA direction + * @attrs: DMA Attrs + * + * Unmap the address that is mapped by the virtqueue_dma_map_* APIs. + * + */ +void virtqueue_dma_unmap_page_attrs(struct virtqueue *_vq, dma_addr_t addr, + size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + + if (!vq->use_dma_api) + return; + + dma_unmap_page_attrs(vring_dma_dev(vq), addr, size, dir, attrs); +} +EXPORT_SYMBOL_GPL(virtqueue_dma_unmap_page_attrs); + /** * virtqueue_dma_mapping_error - check dma address * @_vq: the struct virtqueue we're talking about. diff --git a/include/linux/virtio.h b/include/linux/virtio.h index e098b68971de..2a76388ea74c 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -216,6 +216,11 @@ dma_addr_t virtqueue_dma_map_single_attrs(struct virtqueue *_vq, void *ptr, size void virtqueue_dma_unmap_single_attrs(struct virtqueue *_vq, dma_addr_t addr, size_t size, enum dma_data_direction dir, unsigned long attrs); +dma_addr_t virtqueue_dma_map_page_attrs(struct virtqueue *_vq, struct page *page, + size_t offset, size_t size, + enum dma_data_direction dir, unsigned long attrs); +void virtqueue_dma_unmap_page_attrs(struct virtqueue *_vq, dma_addr_t addr, size_t size, + enum dma_data_direction dir, unsigned long attrs); int virtqueue_dma_mapping_error(struct virtqueue *_vq, dma_addr_t addr); bool virtqueue_dma_need_sync(struct virtqueue *_vq, dma_addr_t addr); -- Gitee From 3793f91d12b5bc3a79e69637b902b3fe68945424 Mon Sep 17 00:00:00 2001 From: Ferry Meng Date: Wed, 19 Mar 2025 17:20:08 +0800 Subject: [PATCH 02/17] anolis: virtio-blk: add VIRTIO_BLK_RING_PAIR kconfig ANBZ: #19447 Add a new Kconfig for the following ring pair feature. Virtio-blk can use two neighbor virtqueues to serve one request queue. This feature needs backend support, so recommend to disable it if your env don't have a match backend. Signed-off-by: Ferry Meng --- drivers/block/Kconfig | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 1b0c5833e8ff..7ad988d0626e 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -443,6 +443,15 @@ config VIRTIO_BLK This is the virtual block driver for virtio. It can be used with QEMU based VMMs (like KVM or Xen). Say Y or M. +config VIRTIO_BLK_RING_PAIR + bool "Virtio block driver ring pair support" + depends on VIRTIO_BLK + help + This enables virtio-blk use two virtqueues per request queue. Must + be supported by backend. + + If unsure, say N. + config BLK_DEV_RBD tristate "Rados block device (RBD)" depends on INET && BLOCK -- Gitee From f103fd730223c633e242523bb17f79322b5c96bd Mon Sep 17 00:00:00 2001 From: Ferry Meng Date: Thu, 20 Mar 2025 15:24:39 +0800 Subject: [PATCH 03/17] anolis: virtio-blk: duplicate functions to cleanup for ring pair ANBZ: #19447 This is in preparation for virtio-blk ring pair feature. If enabled, two neighbor virtqueues are bound to support I/O. One is used for dispatching(SQ) and the other fetches completion(CQ). The first queue will not respond to irq, polling the SQ to recycle used ring. The second queue responds to completion irq, and reaps the CQ." As preparation for virtio-blk ring_pair feature, we duplicate related funcions for later modifications. Signed-off-by: Ferry Meng --- drivers/block/virtio_blk.c | 353 ++++++++++++++++++++++++++++++++++++- 1 file changed, 352 insertions(+), 1 deletion(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 1087b319f483..743e5991fa6e 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -55,6 +55,16 @@ static struct class *vd_chr_class; static struct workqueue_struct *virtblk_wq; +#ifdef CONFIG_VIRTIO_BLK_RING_PAIR +enum virtblk_ring_t { + /* ring_pair submission queue */ + VIRTBLK_RING_SQ = 0, + /* ring_pair completion queue */ + VIRTBLK_RING_CQ = 1, + VIRTBLK_RING_NUM = 2 +}; +#endif + struct virtblk_uring_cmd_pdu { struct request *req; struct bio *bio; @@ -150,6 +160,54 @@ static inline bool vbr_is_bidirectional(struct virtblk_req *vbr) return op_is_bidirectional(req->cmd_flags); } +#ifdef CONFIG_VIRTIO_BLK_RING_PAIR +static int virtblk_add_req_bidirectional_rpair(struct virtqueue *vq, + struct virtblk_req *vbr, struct scatterlist *data_sg, + struct scatterlist *data_sg_extra) +{ + struct scatterlist hdr, status, *sgs[4]; + unsigned int num_out = 0, num_in = 0; + + /* + * vritblk_add_req use 'bool' have_data, while we use int num to + * validate both OUT and IN direction have data. For bidirectional + * request, __blk_bios_map_sg_bidir() should map at least 2 segments. + */ + if ((sg_nents(data_sg) == 0) || (sg_nents(data_sg_extra) == 0)) + return -EINVAL; + + sg_init_one(&hdr, &vbr->out_hdr, sizeof(vbr->out_hdr)); + sg_init_one(&status, &vbr->status, sizeof(vbr->status)); + sgs[num_out++] = &hdr; + sgs[num_out++] = data_sg; + sgs[num_out + num_in++] = data_sg_extra; + sgs[num_out + num_in++] = &status; + + return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC); +} + +static int virtblk_add_req_rpair(struct virtqueue *vq, struct virtblk_req *vbr, + struct scatterlist *data_sg, bool have_data) +{ + struct scatterlist hdr, status, *sgs[3]; + unsigned int num_out = 0, num_in = 0; + + sg_init_one(&hdr, &vbr->out_hdr, sizeof(vbr->out_hdr)); + sgs[num_out++] = &hdr; + + if (have_data) { + if (vbr->out_hdr.type & cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_OUT)) + sgs[num_out++] = data_sg; + else + sgs[num_out + num_in++] = data_sg; + } + + sg_init_one(&status, &vbr->status, sizeof(vbr->status)); + sgs[num_out + num_in++] = &status; + + return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC); +} +#endif static int virtblk_add_req_bidirectional(struct virtqueue *vq, struct virtblk_req *vbr, struct scatterlist *data_sg, struct scatterlist *data_sg_extra) @@ -319,6 +377,59 @@ static void virtblk_cleanup_cmd(struct request *req) } } +#ifdef CONFIG_VIRTIO_BLK_RING_PAIR +static blk_status_t virtblk_setup_cmd_rpair(struct virtio_device *vdev, + struct request *req, + struct virtblk_req *vbr) +{ + bool unmap = false; + u32 type; + u64 sector = 0; + + switch (req_op(req)) { + case REQ_OP_READ: + type = VIRTIO_BLK_T_IN; + sector = blk_rq_pos(req); + break; + case REQ_OP_WRITE: + type = VIRTIO_BLK_T_OUT; + sector = blk_rq_pos(req); + break; + case REQ_OP_FLUSH: + type = VIRTIO_BLK_T_FLUSH; + break; + case REQ_OP_DISCARD: + type = VIRTIO_BLK_T_DISCARD; + break; + case REQ_OP_WRITE_ZEROES: + type = VIRTIO_BLK_T_WRITE_ZEROES; + unmap = !(req->cmd_flags & REQ_NOUNMAP); + break; + case REQ_OP_DRV_IN: + case REQ_OP_DRV_OUT: + /* Out header already filled in, nothing to do + * Attention, currently not support DISCARD and + * WRITE_ZEROES for VIRTBLK_PASSTHROUGH. + */ + return 0; + default: + WARN_ON_ONCE(1); + return BLK_STS_IOERR; + } + + vbr->out_hdr.type = cpu_to_virtio32(vdev, type); + vbr->out_hdr.sector = cpu_to_virtio64(vdev, sector); + vbr->out_hdr.ioprio = cpu_to_virtio32(vdev, req_get_ioprio(req)); + + if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES) { + if (virtblk_setup_discard_write_zeroes(req, unmap)) + return BLK_STS_RESOURCE; + } + + return 0; +} +#endif + static blk_status_t virtblk_setup_cmd(struct virtio_device *vdev, struct request *req, struct virtblk_req *vbr) @@ -382,6 +493,37 @@ static inline void virtblk_request_done(struct request *req) blk_mq_end_request(req, virtblk_result(vbr)); } +#ifdef CONFIG_VIRTIO_BLK_RING_PAIR +static void virtblk_done_rpair(struct virtqueue *vq) +{ + struct virtio_blk *vblk = vq->vdev->priv; + bool req_done = false; + int qid = vq->index; + struct virtblk_req *vbr; + unsigned long flags; + unsigned int len; + + spin_lock_irqsave(&vblk->vqs[qid].lock, flags); + do { + virtqueue_disable_cb(vq); + while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) { + struct request *req = blk_mq_rq_from_pdu(vbr); + + if (likely(!blk_should_fake_timeout(req->q))) + blk_mq_complete_request(req); + req_done = true; + } + if (unlikely(virtqueue_is_broken(vq))) + break; + } while (!virtqueue_enable_cb(vq)); + + /* In case queue is stopped waiting for more buffers. */ + if (req_done) + blk_mq_start_stopped_hw_queues(vblk->disk->queue, true); + spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); +} +#endif + static void virtblk_done(struct virtqueue *vq) { struct virtio_blk *vblk = vq->vdev->priv; @@ -425,6 +567,78 @@ static void virtio_commit_rqs(struct blk_mq_hw_ctx *hctx) virtqueue_notify(vq->vq); } +#ifdef CONFIG_VIRTIO_BLK_RING_PAIR +static blk_status_t virtio_queue_rq_rpair(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) +{ + struct virtio_blk *vblk = hctx->queue->queuedata; + struct request *req = bd->rq; + struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); + unsigned long flags; + int num; + int qid = hctx->queue_num; + bool notify = false; + blk_status_t status; + int err; + + status = virtblk_setup_cmd(vblk->vdev, req, vbr); + if (unlikely(status)) + return status; + + blk_mq_start_request(req); + + if (vbr_is_bidirectional(vbr)) + num = virtblk_map_data_bidirectional(hctx, req, vbr); + else + num = virtblk_map_data(hctx, req, vbr); + + if (unlikely(num < 0)) { + virtblk_cleanup_cmd(req); + return BLK_STS_RESOURCE; + } + + spin_lock_irqsave(&vblk->vqs[qid].lock, flags); + if (vbr_is_bidirectional(vbr)) + err = virtblk_add_req_bidirectional(vblk->vqs[qid].vq, + vbr, vbr->sg_table.sgl, + vbr->sg_table_extra.sgl); + else + err = virtblk_add_req(vblk->vqs[qid].vq, vbr, + vbr->sg_table.sgl, num); + + if (err) { + virtqueue_kick(vblk->vqs[qid].vq); + /* Don't stop the queue if -ENOMEM: we may have failed to + * bounce the buffer due to global resource outage. + */ + if (err == -ENOSPC) + blk_mq_stop_hw_queue(hctx); + spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); + if (vbr_is_bidirectional(vbr)) + virtblk_unmap_data_bidirectional(req, vbr); + else + virtblk_unmap_data(req, vbr); + virtblk_cleanup_cmd(req); + switch (err) { + case -ENOSPC: + return BLK_STS_DEV_RESOURCE; + case -ENOMEM: + return BLK_STS_RESOURCE; + default: + return BLK_STS_IOERR; + } + } + + if (bd->last && virtqueue_kick_prepare(vblk->vqs[qid].vq)) + notify = true; + spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); + + if (notify) + virtqueue_notify(vblk->vqs[qid].vq); + return BLK_STS_OK; +} +#endif + static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { @@ -693,9 +907,97 @@ static void virtblk_config_changed(struct virtio_device *vdev) queue_work(virtblk_wq, &vblk->config_work); } +#ifdef CONFIG_VIRTIO_BLK_RING_PAIR +bool virtblk_rpair_disable; +module_param_named(rpair_disable, virtblk_rpair_disable, bool, 0444); +MODULE_PARM_DESC(rpair_disable, "disable vring pair detective. (0=Not [default], 1=Yes)"); + +static int init_vq_rpair(struct virtio_blk *vblk) +{ + int err = 0; + int i; + vq_callback_t **callbacks; + const char **names; + struct virtqueue **vqs; + unsigned short num_vqs; + unsigned int num_poll_vqs; + struct virtio_device *vdev = vblk->vdev; + struct irq_affinity desc = { 0, }; + + err = virtio_cread_feature(vdev, VIRTIO_BLK_F_MQ, + struct virtio_blk_config, num_queues, + &num_vqs); + if (err) + num_vqs = 1; + + if (!err && !num_vqs) { + dev_err(&vdev->dev, "MQ advertised but zero queues reported\n"); + return -EINVAL; + } + + num_vqs = min_t(unsigned int, + min_not_zero(num_request_queues, nr_cpu_ids), + num_vqs); + + num_poll_vqs = min_t(unsigned int, poll_queues, num_vqs - 1); + + vblk->io_queues[HCTX_TYPE_DEFAULT] = num_vqs - num_poll_vqs; + vblk->io_queues[HCTX_TYPE_READ] = 0; + vblk->io_queues[HCTX_TYPE_POLL] = num_poll_vqs; + + dev_info(&vdev->dev, "%d/%d/%d default/read/poll queues\n", + vblk->io_queues[HCTX_TYPE_DEFAULT], + vblk->io_queues[HCTX_TYPE_READ], + vblk->io_queues[HCTX_TYPE_POLL]); + + vblk->vqs = kmalloc_array(num_vqs, sizeof(*vblk->vqs), GFP_KERNEL); + if (!vblk->vqs) + return -ENOMEM; + + names = kmalloc_array(num_vqs, sizeof(*names), GFP_KERNEL); + callbacks = kmalloc_array(num_vqs, sizeof(*callbacks), GFP_KERNEL); + vqs = kmalloc_array(num_vqs, sizeof(*vqs), GFP_KERNEL); + if (!names || !callbacks || !vqs) { + err = -ENOMEM; + goto out; + } + + for (i = 0; i < num_vqs - num_poll_vqs; i++) { + callbacks[i] = virtblk_done; + snprintf(vblk->vqs[i].name, VQ_NAME_LEN, "req.%d", i); + names[i] = vblk->vqs[i].name; + } + + for (; i < num_vqs; i++) { + callbacks[i] = NULL; + snprintf(vblk->vqs[i].name, VQ_NAME_LEN, "req_poll.%d", i); + names[i] = vblk->vqs[i].name; + } + + /* Discover virtqueues and write information to configuration. */ + err = virtio_find_vqs(vdev, num_vqs, vqs, callbacks, names, &desc); + if (err) + goto out; + + for (i = 0; i < num_vqs; i++) { + spin_lock_init(&vblk->vqs[i].lock); + vblk->vqs[i].vq = vqs[i]; + } + vblk->num_vqs = num_vqs; + +out: + kfree(vqs); + kfree(callbacks); + kfree(names); + if (err) + kfree(vblk->vqs); + return err; +} +#endif + static int init_vq(struct virtio_blk *vblk) { - int err; + int err = 1; int i; vq_callback_t **callbacks; const char **names; @@ -705,6 +1007,14 @@ static int init_vq(struct virtio_blk *vblk) struct virtio_device *vdev = vblk->vdev; struct irq_affinity desc = { 0, }; +#ifdef CONFIG_VIRTIO_BLK_RING_PAIR + if (!virtblk_rpair_disable) + err = init_vq_rpair(vblk); + + /* if err > 0, then vring pair fall back to original virtqueue use*/ + if (err <= 0) + return err; +#endif err = virtio_cread_feature(vdev, VIRTIO_BLK_F_MQ, struct virtio_blk_config, num_queues, &num_vqs); @@ -942,6 +1252,37 @@ static void virtblk_complete_batch(struct io_comp_batch *iob) blk_mq_end_request_batch(iob); } +#ifdef CONFIG_VIRTIO_BLK_RING_PAIR +static int virtblk_poll_rpair(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) +{ + struct virtio_blk *vblk = hctx->queue->queuedata; + struct virtio_blk_vq *vq = get_virtio_blk_vq(hctx); + struct virtblk_req *vbr; + unsigned long flags; + unsigned int len; + int found = 0; + + spin_lock_irqsave(&vq->lock, flags); + + while ((vbr = virtqueue_get_buf(vq->vq, &len)) != NULL) { + struct request *req = blk_mq_rq_from_pdu(vbr); + + found++; + if (!blk_mq_complete_request_remote(req) && + !blk_mq_add_to_batch(req, iob, vbr->status, + virtblk_complete_batch)) + virtblk_request_done(req); + } + + if (found) + blk_mq_start_stopped_hw_queues(vblk->disk->queue, true); + + spin_unlock_irqrestore(&vq->lock, flags); + + return found; +} +#endif + static int virtblk_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) { struct virtio_blk *vblk = hctx->queue->queuedata; @@ -971,6 +1312,16 @@ static int virtblk_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) return found; } +#ifdef CONFIG_VIRTIO_BLK_RING_PAIR +static const struct blk_mq_ops virtio_mq_pair_ops = { + .queue_rq = virtio_queue_rq_rpair, + .commit_rqs = virtio_commit_rqs, + .complete = virtblk_request_done, + .map_queues = virtblk_map_queues, + .poll = virtblk_poll_rpair, +}; +#endif + static const struct blk_mq_ops virtio_mq_ops = { .queue_rq = virtio_queue_rq, .commit_rqs = virtio_commit_rqs, -- Gitee From 628ffab56f4d3b838c90edcadd1078aff338e490 Mon Sep 17 00:00:00 2001 From: Ferry Meng Date: Tue, 25 Mar 2025 21:42:01 +0800 Subject: [PATCH 04/17] anolis: virtio-blk: premap DMA buf ANBZ: #19447 If enabled vring pair, the process of mapping sgs should be managed by virtio-blk driver itself. As SQ virt_ring is responsed by backend (but I/O not fully executed and return IRQ back), we should recycle SQ slot but can't do rq_unmap. Besides, we should maintain scatterlist for hdr and status in virtblk_req. DMA buf will be unmap after CQ return one I/O back. Signed-off-by: Ferry Meng --- drivers/block/virtio_blk.c | 135 +++++++++++++++++++++++++++++++++---- 1 file changed, 123 insertions(+), 12 deletions(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 743e5991fa6e..d55f343a728d 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -119,6 +119,9 @@ struct virtio_blk { struct virtblk_req { struct virtio_blk_outhdr out_hdr; u8 status; +#ifdef CONFIG_VIRTIO_BLK_RING_PAIR + struct scatterlist inline_sg[2]; +#endif struct sg_table sg_table; struct sg_table sg_table_extra; struct scatterlist sg[]; @@ -161,12 +164,101 @@ static inline bool vbr_is_bidirectional(struct virtblk_req *vbr) } #ifdef CONFIG_VIRTIO_BLK_RING_PAIR +static int virtblk_map_sg(struct virtqueue *vq, struct scatterlist *sglist, + enum dma_data_direction dir) +{ + struct scatterlist *sg, *last; + + for (sg = sglist; sg; sg = sg_next(sg)) { + sg->dma_address = virtqueue_dma_map_page_attrs(vq, sg_page(sg), + sg->offset, sg->length, dir, 0); + if (virtqueue_dma_mapping_error(vq, sg->dma_address)) { + last = sg; + goto out; + } + } + return 0; +out: + for (sg = sglist; sg && sg != last; sg = sg_next(sg)) + virtqueue_dma_unmap_page_attrs(vq, sg->dma_address, + sg->length, dir, 0); + return -ENOMEM; +} + +static void virtblk_unmap_sg(struct virtqueue *vq, struct scatterlist *sglist, + enum dma_data_direction dir) +{ + struct scatterlist *sg; + + for (sg = sglist; sg; sg = sg_next(sg)) + virtqueue_dma_unmap_page_attrs(vq, sg->dma_address, + sg->length, dir, 0); +} + +static int virtblk_rq_map(struct virtqueue *vq, struct scatterlist *sgs[], + unsigned int out_sgs, unsigned int in_sgs) +{ + int i, ret, done_out_sgs, done_in_sgs; + + for (i = 0; i < out_sgs; i++) { + ret = virtblk_map_sg(vq, sgs[i], DMA_TO_DEVICE); + if (ret < 0) { + done_out_sgs = i; + goto cleanup_out_map; + } + } + + for (; i < out_sgs + in_sgs; i++) { + ret = virtblk_map_sg(vq, sgs[i], DMA_FROM_DEVICE); + if (ret < 0) { + done_out_sgs = out_sgs; + done_in_sgs = i - out_sgs; + goto cleanup_in_map; + } + } + return 0; + +cleanup_in_map: + for (i = out_sgs; i < out_sgs + done_in_sgs; i++) + virtblk_unmap_sg(vq, sgs[i], DMA_FROM_DEVICE); +cleanup_out_map: + for (i = 0; i < done_out_sgs; i++) + virtblk_unmap_sg(vq, sgs[i], DMA_TO_DEVICE); + return -ENOMEM; +} + +static void virtblk_rq_unmap(struct virtqueue *vq, struct virtblk_req *vbr) +{ + struct request *req = blk_mq_rq_from_pdu(vbr); + int dir; + + virtblk_unmap_sg(vq, &vbr->inline_sg[0], DMA_TO_DEVICE); + virtblk_unmap_sg(vq, &vbr->inline_sg[1], DMA_FROM_DEVICE); + + if (!blk_rq_nr_phys_segments(req)) + return; + + if (vbr_is_bidirectional(vbr)) { + virtblk_unmap_sg(vq, vbr->sg_table.sgl, DMA_TO_DEVICE); + virtblk_unmap_sg(vq, vbr->sg_table_extra.sgl, DMA_FROM_DEVICE); + } else { + if (req_op(req) == REQ_OP_WRITE) + dir = DMA_TO_DEVICE; + else + dir = DMA_FROM_DEVICE; + virtblk_unmap_sg(vq, vbr->sg_table.sgl, dir); + } +} + static int virtblk_add_req_bidirectional_rpair(struct virtqueue *vq, struct virtblk_req *vbr, struct scatterlist *data_sg, struct scatterlist *data_sg_extra) { - struct scatterlist hdr, status, *sgs[4]; + struct scatterlist *sgs[4]; + struct scatterlist *hdr = &vbr->inline_sg[0]; + struct scatterlist *status = &vbr->inline_sg[1]; unsigned int num_out = 0, num_in = 0; + int ret; /* * vritblk_add_req use 'bool' have_data, while we use int num to @@ -176,24 +268,34 @@ static int virtblk_add_req_bidirectional_rpair(struct virtqueue *vq, if ((sg_nents(data_sg) == 0) || (sg_nents(data_sg_extra) == 0)) return -EINVAL; - sg_init_one(&hdr, &vbr->out_hdr, sizeof(vbr->out_hdr)); - sg_init_one(&status, &vbr->status, sizeof(vbr->status)); - sgs[num_out++] = &hdr; + sg_init_one(hdr, &vbr->out_hdr, sizeof(vbr->out_hdr)); + sg_init_one(status, &vbr->status, sizeof(vbr->status)); + sgs[num_out++] = hdr; sgs[num_out++] = data_sg; sgs[num_out + num_in++] = data_sg_extra; - sgs[num_out + num_in++] = &status; + sgs[num_out + num_in++] = status; - return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC); + ret = virtblk_rq_map(vq, sgs, num_out, num_in); + if (ret < 0) + return ret; + + ret = virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC); + if (ret < 0) + virtblk_rq_unmap(vq, vbr); + return ret; } static int virtblk_add_req_rpair(struct virtqueue *vq, struct virtblk_req *vbr, struct scatterlist *data_sg, bool have_data) { - struct scatterlist hdr, status, *sgs[3]; + struct scatterlist *sgs[3]; + struct scatterlist *hdr = &vbr->inline_sg[0]; + struct scatterlist *status = &vbr->inline_sg[1]; unsigned int num_out = 0, num_in = 0; + int ret; - sg_init_one(&hdr, &vbr->out_hdr, sizeof(vbr->out_hdr)); - sgs[num_out++] = &hdr; + sg_init_one(hdr, &vbr->out_hdr, sizeof(vbr->out_hdr)); + sgs[num_out++] = hdr; if (have_data) { if (vbr->out_hdr.type & cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_OUT)) @@ -202,12 +304,20 @@ static int virtblk_add_req_rpair(struct virtqueue *vq, struct virtblk_req *vbr, sgs[num_out + num_in++] = data_sg; } - sg_init_one(&status, &vbr->status, sizeof(vbr->status)); - sgs[num_out + num_in++] = &status; + sg_init_one(status, &vbr->status, sizeof(vbr->status)); + sgs[num_out + num_in++] = status; - return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC); + ret = virtblk_rq_map(vq, sgs, num_out, num_in); + if (ret < 0) + return ret; + + ret = virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC); + if (ret < 0) + virtblk_rq_unmap(vq, vbr); + return ret; } #endif + static int virtblk_add_req_bidirectional(struct virtqueue *vq, struct virtblk_req *vbr, struct scatterlist *data_sg, struct scatterlist *data_sg_extra) @@ -980,6 +1090,7 @@ static int init_vq_rpair(struct virtio_blk *vblk) goto out; for (i = 0; i < num_vqs; i++) { + virtqueue_set_dma_premapped(vqs[i]); spin_lock_init(&vblk->vqs[i].lock); vblk->vqs[i].vq = vqs[i]; } -- Gitee From fcaa0ce67a3eae6f2a1d1531f361bd273fcfb245 Mon Sep 17 00:00:00 2001 From: Ferry Meng Date: Mon, 24 Mar 2025 21:04:34 +0800 Subject: [PATCH 05/17] anolis: virtio-blk/virtio_ring: separate ring_pair add_sgs functions. ANBZ: #19447 This is a preparation patch for the following two patches. We want to separate "add_sgs" related functions for vring pair into individual functions, which will be modified later. Please note that in the current version, our support for ring_pair is limited to split_queue with indirect enabled. Signed-off-by: Ferry Meng --- drivers/virtio/virtio_ring.c | 248 +++++++++++++++++++++++++++++++++++ include/linux/virtio.h | 7 + 2 files changed, 255 insertions(+) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index cc60218c56fb..dc300aca1972 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -744,6 +744,200 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, return -ENOMEM; } +static inline int virtqueue_add_split_rpair(struct virtqueue *_vq, + struct scatterlist *sgs[], + unsigned int total_sg, + unsigned int out_sgs, + unsigned int in_sgs, + void *data, + void *ctx, + gfp_t gfp) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + struct scatterlist *sg; + struct vring_desc *desc; + unsigned int i, n, avail, descs_used, prev, err_idx; + int head; + bool indirect; + + START_USE(vq); + + BUG_ON(data == NULL); + BUG_ON(ctx && vq->indirect); + + if (unlikely(vq->broken)) { + END_USE(vq); + return -EIO; + } + + LAST_ADD_TIME_UPDATE(vq); + + BUG_ON(total_sg == 0); + + head = vq->free_head; + + if (virtqueue_use_indirect(_vq, total_sg)) + desc = alloc_indirect_split(_vq, total_sg, gfp); + else { + desc = NULL; + WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect); + } + + if (desc) { + /* Use a single buffer which doesn't continue */ + indirect = true; + /* Set up rest to use this indirect table. */ + i = 0; + descs_used = 1; + } else { + indirect = false; + desc = vq->split.vring.desc; + i = head; + descs_used = total_sg; + } + + if (unlikely(vq->vq.num_free < descs_used)) { + pr_debug("Can't add buf len %i - avail = %i\n", + descs_used, vq->vq.num_free); + /* FIXME: for historical reasons, we force a notify here if + * there are outgoing parts to the buffer. Presumably the + * host should service the ring ASAP. + */ + if (out_sgs) + vq->notify(&vq->vq); + if (indirect) + kfree(desc); + END_USE(vq); + return -ENOSPC; + } + + for (n = 0; n < out_sgs; n++) { + for (sg = sgs[n]; sg; sg = sg_next(sg)) { + dma_addr_t addr; + + if (vring_map_one_sg(vq, sg, DMA_TO_DEVICE, &addr)) + goto unmap_release; + + prev = i; + /* Note that we trust indirect descriptor + * table since it use stream DMA mapping. + */ + i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length, + VRING_DESC_F_NEXT, + indirect); + } + } + for (; n < (out_sgs + in_sgs); n++) { + for (sg = sgs[n]; sg; sg = sg_next(sg)) { + dma_addr_t addr; + + if (vring_map_one_sg(vq, sg, DMA_FROM_DEVICE, &addr)) + goto unmap_release; + + prev = i; + /* Note that we trust indirect descriptor + * table since it use stream DMA mapping. + */ + i = virtqueue_add_desc_split(_vq, desc, i, addr, + sg->length, + VRING_DESC_F_NEXT | + VRING_DESC_F_WRITE, + indirect); + } + } + /* Last one doesn't continue. */ + desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT); + if (!indirect && vq->do_unmap) + vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags &= + ~VRING_DESC_F_NEXT; + + if (indirect) { + /* Now that the indirect table is filled in, map it. */ + dma_addr_t addr = vring_map_single( + vq, desc, total_sg * sizeof(struct vring_desc), + DMA_TO_DEVICE); + if (vring_mapping_error(vq, addr)) { + if (vq->premapped) + goto free_indirect; + + goto unmap_release; + } + + virtqueue_add_desc_split(_vq, vq->split.vring.desc, + head, addr, + total_sg * sizeof(struct vring_desc), + VRING_DESC_F_INDIRECT, + false); + } + + /* We're using some buffers from the free list. */ + vq->vq.num_free -= descs_used; + + /* Update free pointer */ + if (indirect) + vq->free_head = vq->split.desc_extra[head].next; + else + vq->free_head = i; + + /* Store token and indirect buffer state. */ + vq->split.desc_state[head].data = data; + if (indirect) + vq->split.desc_state[head].indir_desc = desc; + else + vq->split.desc_state[head].indir_desc = ctx; + + /* Put entry in available array (but don't update avail->idx until they + * do sync). + */ + avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1); + vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head); + + /* Descriptors and available array need to be set before we expose the + * new available array entries. + */ + virtio_wmb(vq->weak_barriers); + vq->split.avail_idx_shadow++; + vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, + vq->split.avail_idx_shadow); + vq->num_added++; + + pr_debug("Added buffer head %i to %p\n", head, vq); + END_USE(vq); + + /* This is very unlikely, but theoretically possible. Kick + * just in case. + */ + if (unlikely(vq->num_added == (1 << 16) - 1)) + virtqueue_kick(_vq); + + return 0; + +unmap_release: + err_idx = i; + + if (indirect) + i = 0; + else + i = head; + + for (n = 0; n < total_sg; n++) { + if (i == err_idx) + break; + if (indirect) { + vring_unmap_one_split_indirect(vq, &desc[i]); + i = virtio16_to_cpu(_vq->vdev, desc[i].next); + } else + i = vring_unmap_one_split(vq, i); + } + +free_indirect: + if (indirect) + kfree(desc); + + END_USE(vq); + return -ENOMEM; +} + static bool virtqueue_kick_prepare_split(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); @@ -2169,6 +2363,23 @@ static inline int virtqueue_add(struct virtqueue *_vq, out_sgs, in_sgs, data, ctx, gfp); } +/* + * Generic functions and exported symbols for ringpair mode. + */ + +static inline int virtqueue_add_rpair(struct virtqueue *_vq, + struct scatterlist *sgs[], + unsigned int total_sg, + unsigned int out_sgs, + unsigned int in_sgs, + void *data, + void *ctx, + gfp_t gfp) +{ + return virtqueue_add_split_rpair(_vq, sgs, total_sg, + out_sgs, in_sgs, data, ctx, gfp); +} + /** * virtqueue_add_sgs - expose buffers to other end * @_vq: the struct virtqueue we're talking about. @@ -2204,6 +2415,43 @@ int virtqueue_add_sgs(struct virtqueue *_vq, } EXPORT_SYMBOL_GPL(virtqueue_add_sgs); +/** + * virtqueue_add_sgs_rpair - expose buffers to other end + * @_vq: the struct virtqueue we're talking about. + * @sgs: array of terminated scatterlists. + * @out_sgs: the number of scatterlists readable by other side + * @in_sgs: the number of scatterlists which are writable (after readable ones) + * @data: the token identifying the buffer. + * @gfp: how to do memory allocations (if necessary). + * + * Caller must ensure we don't call this with other virtqueue operations + * at the same time (except where noted). + * + * Only work for ring pair mode + * + * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). + */ +int virtqueue_add_sgs_rpair(struct virtqueue *_vq, + struct scatterlist *sgs[], + unsigned int out_sgs, + unsigned int in_sgs, + void *data, + gfp_t gfp) +{ + unsigned int i, total_sg = 0; + + /* Count them first. */ + for (i = 0; i < out_sgs + in_sgs; i++) { + struct scatterlist *sg; + + for (sg = sgs[i]; sg; sg = sg_next(sg)) + total_sg++; + } + return virtqueue_add_rpair(_vq, sgs, total_sg, out_sgs, in_sgs, + data, NULL, gfp); +} +EXPORT_SYMBOL_GPL(virtqueue_add_sgs_rpair); + /** * virtqueue_add_outbuf - expose output buffers to other end * @vq: the struct virtqueue we're talking about. diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 2a76388ea74c..1dc1c3fd8767 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -62,6 +62,13 @@ int virtqueue_add_sgs(struct virtqueue *vq, void *data, gfp_t gfp); +int virtqueue_add_sgs_rpair(struct virtqueue *vq, + struct scatterlist *sgs[], + unsigned int out_sgs, + unsigned int in_sgs, + void *data, + gfp_t gfp); + struct device *virtqueue_dma_dev(struct virtqueue *vq); bool virtqueue_kick(struct virtqueue *vq); -- Gitee From 384a2b2acf8b03820ca8510ac2505e1c737fab8b Mon Sep 17 00:00:00 2001 From: Ferry Meng Date: Mon, 24 Mar 2025 21:16:15 +0800 Subject: [PATCH 06/17] anolis: virtio-blk/virtio_ring: vring_pair pass indirect descriptor ANBZ: #19447 After enabling the vring pair feature, there is an issue. As indir_desc slot is freed when "submission_queue is finished but completion_queue not response yet", if a error occurs, both 'DRIVER' and 'BACKEND' can't locate 'unfinished I/Os'. Thus we need a method to save indirect_desc message until I/O done. This patch achieve this by passing indir_desc to backend, in other words, let backend save indir_desc. Details are following: When enabled INDIRECT virtqueue and vring pair, L1 indirect descriptor (infact the sg filled into virtqueue) will be also passed with L2. This feature is only needed by virtio-blk, and strongly depends on backend driver support. As a customized solution, the current form is: Original -> Now Content Dir | Contnet Dir out_hdr OUT | out_hdr OUT out_sg1 OUT | L1_desc OUT ... | out_sg1 OUT ... | ... status IN | ... | status IN As we can see, there are two major modifications: 1. total_sg +=1 2. L1 descriptor need to be mapped in advance (If error occurred, should call unmap cautiously) Finally, We should emphasize that current version is only compatible with the scene when the following conditions are all satisfied: 1. vring pair feature enabled. 2. virtio-blk with backend driver support. 3. only "split + indirect" virtqueue. 4. only SQ pass indir_desc, CQ not involved. Signed-off-by: Ferry Meng --- drivers/block/virtio_blk.c | 4 +- drivers/virtio/virtio_ring.c | 78 +++++++++++++++++++++++++++++------- include/linux/virtio.h | 2 + 3 files changed, 68 insertions(+), 16 deletions(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index d55f343a728d..210ab727e0b6 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -279,7 +279,7 @@ static int virtblk_add_req_bidirectional_rpair(struct virtqueue *vq, if (ret < 0) return ret; - ret = virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC); + ret = virtqueue_add_sgs_rpair(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC); if (ret < 0) virtblk_rq_unmap(vq, vbr); return ret; @@ -311,7 +311,7 @@ static int virtblk_add_req_rpair(struct virtqueue *vq, struct virtblk_req *vbr, if (ret < 0) return ret; - ret = virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC); + ret = virtqueue_add_sgs_rpair(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC); if (ret < 0) virtblk_rq_unmap(vq, vbr); return ret; diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index dc300aca1972..07a1761d1f3d 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -185,6 +185,11 @@ struct vring_virtqueue { */ bool do_unmap; + /* If enable vring pair, Virtqueue will save the indirect desc + * pointer and avoid the pre-unmap. + */ + bool save_indir; + /* Head of free buffer list. */ unsigned int free_head; /* Number we've added since last sync. */ @@ -479,7 +484,7 @@ static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq, flags = extra[i].flags; if (flags & VRING_DESC_F_INDIRECT) { - if (!vq->use_dma_api) + if (!vq->use_dma_api || vq->save_indir) goto out; dma_unmap_single(vring_dma_dev(vq), @@ -759,6 +764,7 @@ static inline int virtqueue_add_split_rpair(struct virtqueue *_vq, unsigned int i, n, avail, descs_used, prev, err_idx; int head; bool indirect; + dma_addr_t l1_addr; START_USE(vq); @@ -776,9 +782,10 @@ static inline int virtqueue_add_split_rpair(struct virtqueue *_vq, head = vq->free_head; - if (virtqueue_use_indirect(_vq, total_sg)) + if (virtqueue_use_indirect(_vq, total_sg)) { + total_sg += 1; desc = alloc_indirect_split(_vq, total_sg, gfp); - else { + } else { desc = NULL; WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect); } @@ -811,6 +818,14 @@ static inline int virtqueue_add_split_rpair(struct virtqueue *_vq, return -ENOSPC; } + if (indirect && vq->save_indir) { + l1_addr = vring_map_single(vq, desc, + total_sg * sizeof(struct vring_desc), + DMA_TO_DEVICE); + if (vring_mapping_error(vq, l1_addr)) + goto free_indirect; + } + for (n = 0; n < out_sgs; n++) { for (sg = sgs[n]; sg; sg = sg_next(sg)) { dma_addr_t addr; @@ -826,6 +841,13 @@ static inline int virtqueue_add_split_rpair(struct virtqueue *_vq, VRING_DESC_F_NEXT, indirect); } + if ((n == 0) && indirect && vq->save_indir) { + prev = i; + i = virtqueue_add_desc_split(_vq, desc, i, l1_addr, + total_sg * sizeof(struct vring_desc), + VRING_DESC_F_NEXT, + indirect); + } } for (; n < (out_sgs + in_sgs); n++) { for (sg = sgs[n]; sg; sg = sg_next(sg)) { @@ -852,19 +874,21 @@ static inline int virtqueue_add_split_rpair(struct virtqueue *_vq, ~VRING_DESC_F_NEXT; if (indirect) { - /* Now that the indirect table is filled in, map it. */ - dma_addr_t addr = vring_map_single( - vq, desc, total_sg * sizeof(struct vring_desc), - DMA_TO_DEVICE); - if (vring_mapping_error(vq, addr)) { - if (vq->premapped) - goto free_indirect; + if (!vq->save_indir) { + /* Now that the indirect table is filled in, map it. */ + l1_addr = vring_map_single( + vq, desc, total_sg * sizeof(struct vring_desc), + DMA_TO_DEVICE); + if (vring_mapping_error(vq, l1_addr)) { + if (vq->premapped) + goto free_indirect; - goto unmap_release; - } + goto unmap_release; + } + } virtqueue_add_desc_split(_vq, vq->split.vring.desc, - head, addr, + head, l1_addr, total_sg * sizeof(struct vring_desc), VRING_DESC_F_INDIRECT, false); @@ -1014,7 +1038,8 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, vring_unmap_one_split_indirect(vq, &indir_desc[j]); } - kfree(indir_desc); + if (!vq->save_indir) + kfree(indir_desc); vq->split.desc_state[head].indir_desc = NULL; } else if (ctx) { *ctx = vq->split.desc_state[head].indir_desc; @@ -2244,6 +2269,7 @@ static struct virtqueue *vring_create_virtqueue_packed( vq->use_dma_api = vring_use_dma_api(vdev); vq->premapped = false; vq->do_unmap = vq->use_dma_api; + vq->save_indir = false; vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && !context; @@ -2438,8 +2464,14 @@ int virtqueue_add_sgs_rpair(struct virtqueue *_vq, void *data, gfp_t gfp) { + struct vring_virtqueue *vq = to_vvq(_vq); unsigned int i, total_sg = 0; + if (!vq->save_indir) { + pr_err("virtqueue_add_rpair() only supported for SQ."); + return -EINVAL; + } + /* Count them first. */ for (i = 0; i < out_sgs + in_sgs; i++) { struct scatterlist *sg; @@ -2812,6 +2844,7 @@ static struct virtqueue *__vring_new_virtqueue(unsigned int index, vq->use_dma_api = vring_use_dma_api(vdev); vq->premapped = false; vq->do_unmap = vq->use_dma_api; + vq->save_indir = false; vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && !context; @@ -2913,6 +2946,23 @@ int virtqueue_resize(struct virtqueue *_vq, u32 num, } EXPORT_SYMBOL_GPL(virtqueue_resize); +/** + * virtqueue_set_save_indir - set the vring save_indir + * @_vq: the struct virtqueue we're talking about. + * + * Enable the save_indir mode of the vq. + * + */ +void virtqueue_set_save_indir(struct virtqueue *_vq) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + + START_USE(vq); + vq->save_indir = true; + END_USE(vq); +} +EXPORT_SYMBOL_GPL(virtqueue_set_save_indir); + /** * virtqueue_set_dma_premapped - set the vring premapped mode * @_vq: the struct virtqueue we're talking about. diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 1dc1c3fd8767..3f77e311d973 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -100,6 +100,8 @@ unsigned int virtqueue_get_vring_size(struct virtqueue *vq); bool virtqueue_is_broken(struct virtqueue *vq); +void virtqueue_set_save_indir(struct virtqueue *_vq); + const struct vring *virtqueue_get_vring(struct virtqueue *vq); dma_addr_t virtqueue_get_desc_addr(struct virtqueue *vq); dma_addr_t virtqueue_get_avail_addr(struct virtqueue *vq); -- Gitee From a2a8befd08f485c362f18fbb95e5bb7dda8c9695 Mon Sep 17 00:00:00 2001 From: Ferry Meng Date: Tue, 25 Mar 2025 21:02:54 +0800 Subject: [PATCH 07/17] anolis: virtio-blk: add ring pair support ANBZ: #19447 Provide a new way to use virtqueue named "ring_pair". Two adjacent queues are called a pair of queues, with even numbered queues (e.g 0) are responsible for sending requests (called SQ) and odd queues harvests the requests (called CQ). There are some points: 1.SQ ret only means that backend has gotten the req, not finished. And this queue will not trigger irq. Res_q's ret means the request is done, triggered by irq. 2.How to match CQ's vbr to the real request? Now we reuse ioprio to pass request 'TAG', and 'len' in CQ is used for pass 'TAG' back. 3.Driver should recycle SQ voluntarily, and shouldn't unmap sgs until I/O really done, which means 'detach_buf_xxx' should be carefully handled. We can't free indir_desc either. 4.According to 3, to free indir_desc at I/O end, we should save it in driver. Correspondingly, driver should do dma_unmap for indir_desc area in 'virtblk_unmap_and_clear_desc'. 5.num_vqs must be Multiple of 2, or ring_pair mode can't be established. ------- This meams that backend handlers need to make corresponding modifications: 1.support basic vring pair. 2.After get request from SQ, update last_used_index directly. 3.Record tag value, ret with 'len' in virtqueue. Signed-off-by: Ferry Meng --- drivers/block/virtio_blk.c | 405 +++++++++++++++++++++++++++++--- drivers/virtio/virtio_ring.c | 27 +++ include/linux/virtio.h | 3 + include/uapi/linux/virtio_blk.h | 8 +- 4 files changed, 415 insertions(+), 28 deletions(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 210ab727e0b6..1d55bd04d9b8 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -63,6 +63,19 @@ enum virtblk_ring_t { VIRTBLK_RING_CQ = 1, VIRTBLK_RING_NUM = 2 }; + +struct virtblk_cq_req { + struct virtio_blk_outhdr out_hdr; + u8 status; + struct scatterlist inline_sg[2]; + struct scatterlist *sgs[2]; +}; + +struct virtblk_indir_desc { + struct vring_desc *desc; + dma_addr_t dma_addr; + u32 len; +}; #endif struct virtblk_uring_cmd_pdu { @@ -74,6 +87,10 @@ struct virtio_blk_vq { struct virtqueue *vq; spinlock_t lock; char name[VQ_NAME_LEN]; +#ifdef CONFIG_VIRTIO_BLK_RING_PAIR + /* prealloced prefill req for CQ */ + struct virtblk_cq_req *cq_req; +#endif } ____cacheline_aligned_in_smp; struct virtio_blk { @@ -114,6 +131,12 @@ struct virtio_blk { struct cdev cdev; struct device cdev_device; + +#ifdef CONFIG_VIRTIO_BLK_RING_PAIR + bool ring_pair; + /* saved indirect desc pointer, dma_addr and dma_len for SQ */ + struct virtblk_indir_desc **indir_desc; +#endif }; struct virtblk_req { @@ -250,6 +273,159 @@ static void virtblk_rq_unmap(struct virtqueue *vq, struct virtblk_req *vbr) } } +static inline void virtblk_save_desc(struct virtqueue *vq, struct virtblk_req *vbr, + struct vring_desc *desc, dma_addr_t dma_addr, + u32 len) +{ + struct virtio_blk *vblk = vq->vdev->priv; + struct request *req = blk_mq_rq_from_pdu(vbr); + int tag = req->tag, qid = vq->index / VIRTBLK_RING_NUM; + struct virtblk_indir_desc *indir_desc = &vblk->indir_desc[qid][tag]; + + indir_desc->desc = desc; + indir_desc->dma_addr = dma_addr; + indir_desc->len = len; +} + +static inline void virtblk_unmap_and_clear_desc(struct virtqueue *vq, + struct virtblk_req *vbr) +{ + struct virtio_blk *vblk = vq->vdev->priv; + struct request *req = blk_mq_rq_from_pdu(vbr); + int tag = req->tag, qid = vq->index / VIRTBLK_RING_NUM; + struct virtblk_indir_desc *indir_desc = &vblk->indir_desc[qid][tag]; + + WARN_ON(!indir_desc->desc); + virtqueue_dma_unmap_page_attrs(vq, indir_desc->dma_addr, + indir_desc->len, DMA_TO_DEVICE, 0); + + kfree(indir_desc->desc); + indir_desc->desc = NULL; +} + +static int virtblk_qid_to_sq_qid(int qid) +{ + return qid * VIRTBLK_RING_NUM; +} + +static int virtblk_qid_to_cq_qid(int qid) +{ + return qid * VIRTBLK_RING_NUM + 1; +} + +static void virtblk_recycle_buf(struct virtqueue *vq) +{ + unsigned int unused; + + while (virtqueue_get_buf(vq, &unused)) + ; +} + +static inline int virtblk_cq_rq_map(struct virtqueue *vq, struct scatterlist *sgs[]) +{ + int ret; + + ret = virtblk_map_sg(vq, sgs[0], DMA_TO_DEVICE); + if (ret < 0) + return ret; + ret = virtblk_map_sg(vq, sgs[1], DMA_FROM_DEVICE); + if (ret < 0) + virtblk_unmap_sg(vq, sgs[0], DMA_TO_DEVICE); + + return ret; +} + +static void virtblk_cq_rq_unmap(struct virtqueue *vq, struct scatterlist *sgs[]) +{ + virtblk_unmap_sg(vq, sgs[0], DMA_TO_DEVICE); + virtblk_unmap_sg(vq, sgs[1], DMA_FROM_DEVICE); +} + +static inline void virtblk_kfree_vqs_cq_reqs(struct virtio_blk *vblk) +{ + int i; + + if (!vblk->ring_pair) + return; + + if (vblk->vqs != NULL) { + for (i = 0; i < vblk->num_vqs; i++) { + if ((i % VIRTBLK_RING_NUM) == VIRTBLK_RING_CQ) + kfree(vblk->vqs[i].cq_req); + } + } +} + +static inline void virtblk_kfree_vblk_indir_descs(struct virtio_blk *vblk) +{ + int i; + + if (!vblk->ring_pair) + return; + + if (vblk->indir_desc != NULL) { + for (i = 0; i < vblk->num_vqs / VIRTBLK_RING_NUM; i++) + kfree(vblk->indir_desc[i]); + } + kfree(vblk->indir_desc); +} + +static int virtblk_prefill_res(struct virtio_blk *vblk, + struct virtqueue **vqs, int num_vqs) +{ + int i, j, ret, fail_i, fail_j; + unsigned int vring_size; + unsigned long flags; + struct virtblk_cq_req *vbr_res; + + for (i = 1; i < num_vqs; i += VIRTBLK_RING_NUM) { + vring_size = virtqueue_get_vring_size(vqs[i]); + + spin_lock_irqsave(&vblk->vqs[i].lock, flags); + for (j = 0; j < vring_size; j++) { + vbr_res = &vblk->vqs[i].cq_req[j]; + sg_init_one(&vbr_res->inline_sg[0], &vbr_res->out_hdr, + sizeof(struct virtio_blk_outhdr)); + sg_init_one(&vbr_res->inline_sg[1], &vbr_res->status, sizeof(u8)); + + vbr_res->sgs[0] = &vbr_res->inline_sg[0]; + vbr_res->sgs[1] = &vbr_res->inline_sg[1]; + + ret = virtblk_cq_rq_map(vqs[i], vbr_res->sgs); + if (ret < 0) { + spin_unlock_irqrestore(&vblk->vqs[i].lock, flags); + goto err; + } + + ret = virtqueue_add_sgs(vqs[i], vbr_res->sgs, 1, 1, vbr_res, GFP_ATOMIC); + if (ret < 0) { + virtblk_cq_rq_unmap(vqs[i], vbr_res->sgs); + spin_unlock_irqrestore(&vblk->vqs[i].lock, flags); + goto err; + } + } + virtqueue_kick(vqs[i]); + spin_unlock_irqrestore(&vblk->vqs[i].lock, flags); + } + return 0; + +err: + fail_i = i; + fail_j = j; + for (i = 1; i <= fail_i; i += VIRTBLK_RING_NUM) { + if (i == fail_i) + vring_size = fail_j; + else + vring_size = virtqueue_get_vring_size(vqs[i]); + + for (j = 0; j < vring_size; j++) { + vbr_res = &vblk->vqs[i].cq_req[j]; + virtblk_cq_rq_unmap(vqs[i], vbr_res->sgs); + } + } + return -1; +} + static int virtblk_add_req_bidirectional_rpair(struct virtqueue *vq, struct virtblk_req *vbr, struct scatterlist *data_sg, struct scatterlist *data_sg_extra) @@ -257,7 +433,10 @@ static int virtblk_add_req_bidirectional_rpair(struct virtqueue *vq, struct scatterlist *sgs[4]; struct scatterlist *hdr = &vbr->inline_sg[0]; struct scatterlist *status = &vbr->inline_sg[1]; + struct vring_desc *desc; unsigned int num_out = 0, num_in = 0; + dma_addr_t dma_addr; + u32 dma_len; int ret; /* @@ -275,13 +454,19 @@ static int virtblk_add_req_bidirectional_rpair(struct virtqueue *vq, sgs[num_out + num_in++] = data_sg_extra; sgs[num_out + num_in++] = status; + virtblk_recycle_buf(vq); ret = virtblk_rq_map(vq, sgs, num_out, num_in); if (ret < 0) return ret; ret = virtqueue_add_sgs_rpair(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC); - if (ret < 0) + if (ret < 0) { virtblk_rq_unmap(vq, vbr); + return ret; + } + desc = virtqueue_indir_get_last_desc_split(vq, &dma_addr, &dma_len); + virtblk_save_desc(vq, vbr, desc, dma_addr, dma_len); + return ret; } @@ -291,7 +476,10 @@ static int virtblk_add_req_rpair(struct virtqueue *vq, struct virtblk_req *vbr, struct scatterlist *sgs[3]; struct scatterlist *hdr = &vbr->inline_sg[0]; struct scatterlist *status = &vbr->inline_sg[1]; + struct vring_desc *desc; unsigned int num_out = 0, num_in = 0; + dma_addr_t dma_addr; + u32 dma_len; int ret; sg_init_one(hdr, &vbr->out_hdr, sizeof(vbr->out_hdr)); @@ -307,15 +495,56 @@ static int virtblk_add_req_rpair(struct virtqueue *vq, struct virtblk_req *vbr, sg_init_one(status, &vbr->status, sizeof(vbr->status)); sgs[num_out + num_in++] = status; + virtblk_recycle_buf(vq); ret = virtblk_rq_map(vq, sgs, num_out, num_in); if (ret < 0) return ret; ret = virtqueue_add_sgs_rpair(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC); - if (ret < 0) + if (ret < 0) { virtblk_rq_unmap(vq, vbr); + return ret; + } + desc = virtqueue_indir_get_last_desc_split(vq, &dma_addr, &dma_len); + virtblk_save_desc(vq, vbr, desc, dma_addr, dma_len); + return ret; } + +static inline void *virtblk_get_buf(struct virtio_blk *vblk, struct virtqueue *vq, u32 *len) +{ + struct virtblk_req *vbr; + struct virtqueue *sq_vq; + + vbr = virtqueue_get_buf(vq, len); + if (vbr) { + /* get request from paired req ring in ring_pair mode */ + int qid = vq->index / VIRTBLK_RING_NUM; + int tag = *len; + struct request *req = blk_mq_tag_to_rq(vblk->tag_set.tags[qid], tag); + struct virtblk_cq_req *vbr_res = (void *)vbr; + int ret; + + sq_vq = vblk->vqs[vq->index - 1].vq; + if (!req) { + pr_err("could not locate request for tag %#x, queue %d\n", + tag, qid); + return NULL; + } + + vbr = blk_mq_rq_to_pdu(req); + /* set status to the real response status. */ + vbr->status = vbr_res->status; + virtblk_rq_unmap(sq_vq, vbr); + virtblk_unmap_and_clear_desc(sq_vq, vbr); + + ret = virtqueue_add_sgs(vq, vbr_res->sgs, 1, 1, vbr_res, GFP_ATOMIC); + if (ret < 0) + pr_err("failed to refill res ring %d\n", ret); + + } + return vbr; +} #endif static int virtblk_add_req_bidirectional(struct virtqueue *vq, @@ -495,6 +724,10 @@ static blk_status_t virtblk_setup_cmd_rpair(struct virtio_device *vdev, bool unmap = false; u32 type; u64 sector = 0; + u32 ioprio; + + /* for ring_pair, tag is used and occupied high 16bit of ioprio*/ + vbr->out_hdr.rpair.tag = cpu_to_virtio16(vdev, req->tag); switch (req_op(req)) { case REQ_OP_READ: @@ -527,9 +760,10 @@ static blk_status_t virtblk_setup_cmd_rpair(struct virtio_device *vdev, return BLK_STS_IOERR; } + ioprio = req_get_ioprio(req); vbr->out_hdr.type = cpu_to_virtio32(vdev, type); vbr->out_hdr.sector = cpu_to_virtio64(vdev, sector); - vbr->out_hdr.ioprio = cpu_to_virtio32(vdev, req_get_ioprio(req)); + vbr->out_hdr.rpair.ioprio = cpu_to_virtio16(vdev, (u16)ioprio); if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES) { if (virtblk_setup_discard_write_zeroes(req, unmap)) @@ -612,11 +846,12 @@ static void virtblk_done_rpair(struct virtqueue *vq) struct virtblk_req *vbr; unsigned long flags; unsigned int len; + bool kick = false; spin_lock_irqsave(&vblk->vqs[qid].lock, flags); do { virtqueue_disable_cb(vq); - while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) { + while ((vbr = virtblk_get_buf(vblk, vblk->vqs[qid].vq, &len)) != NULL) { struct request *req = blk_mq_rq_from_pdu(vbr); if (likely(!blk_should_fake_timeout(req->q))) @@ -628,9 +863,14 @@ static void virtblk_done_rpair(struct virtqueue *vq) } while (!virtqueue_enable_cb(vq)); /* In case queue is stopped waiting for more buffers. */ - if (req_done) + if (req_done) { blk_mq_start_stopped_hw_queues(vblk->disk->queue, true); + kick = virtqueue_kick_prepare(vq); + } spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); + + if (kick) + virtqueue_notify(vq); } #endif @@ -685,13 +925,13 @@ static blk_status_t virtio_queue_rq_rpair(struct blk_mq_hw_ctx *hctx, struct request *req = bd->rq; struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); unsigned long flags; - int num; - int qid = hctx->queue_num; + int num, qid; bool notify = false; blk_status_t status; int err; - status = virtblk_setup_cmd(vblk->vdev, req, vbr); + qid = virtblk_qid_to_sq_qid(hctx->queue_num); + status = virtblk_setup_cmd_rpair(vblk->vdev, req, vbr); if (unlikely(status)) return status; @@ -709,11 +949,11 @@ static blk_status_t virtio_queue_rq_rpair(struct blk_mq_hw_ctx *hctx, spin_lock_irqsave(&vblk->vqs[qid].lock, flags); if (vbr_is_bidirectional(vbr)) - err = virtblk_add_req_bidirectional(vblk->vqs[qid].vq, + err = virtblk_add_req_bidirectional_rpair(vblk->vqs[qid].vq, vbr, vbr->sg_table.sgl, vbr->sg_table_extra.sgl); else - err = virtblk_add_req(vblk->vqs[qid].vq, vbr, + err = virtblk_add_req_rpair(vblk->vqs[qid].vq, vbr, vbr->sg_table.sgl, num); if (err) { @@ -859,6 +1099,9 @@ static void virtblk_put(struct virtio_blk *vblk) if (refcount_dec_and_test(&vblk->refs)) { ida_simple_remove(&vd_index_ida, vblk->index); mutex_destroy(&vblk->vdev_mutex); +#ifdef CONFIG_VIRTIO_BLK_RING_PAIR + virtblk_kfree_vblk_indir_descs(vblk); +#endif kfree(vblk); } } @@ -1030,7 +1273,7 @@ static int init_vq_rpair(struct virtio_blk *vblk) const char **names; struct virtqueue **vqs; unsigned short num_vqs; - unsigned int num_poll_vqs; + unsigned int num_poll_vqs, num_queues, num_poll_queues, vring_size; struct virtio_device *vdev = vblk->vdev; struct irq_affinity desc = { 0, }; @@ -1045,22 +1288,42 @@ static int init_vq_rpair(struct virtio_blk *vblk) return -EINVAL; } - num_vqs = min_t(unsigned int, - min_not_zero(num_request_queues, nr_cpu_ids), - num_vqs); + if (num_vqs % VIRTBLK_RING_NUM) { + dev_err(&vdev->dev, + "RING_PAIR advertised but odd queues reported\n"); + vblk->ring_pair = false; + } - num_poll_vqs = min_t(unsigned int, poll_queues, num_vqs - 1); + /* ring pair only support split virtqueue + indirect enabled */ + if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED) || + !virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC)) { + dev_err(&vdev->dev, "rpair only support indir+split queue\n"); + vblk->ring_pair = false; + } - vblk->io_queues[HCTX_TYPE_DEFAULT] = num_vqs - num_poll_vqs; + /* If vring pair is not enabled, fall back to orig virtqueue use. */ + if (!vblk->ring_pair) + return 1; + + num_queues = num_vqs / VIRTBLK_RING_NUM; + num_queues = min_t(unsigned int, + min_not_zero(num_request_queues, nr_cpu_ids), + num_queues); + num_poll_queues = min_t(unsigned int, poll_queues, num_queues - 1); + num_poll_vqs = num_poll_queues * VIRTBLK_RING_NUM; + num_vqs = num_queues * VIRTBLK_RING_NUM; + + vblk->io_queues[HCTX_TYPE_DEFAULT] = num_queues - num_poll_queues; vblk->io_queues[HCTX_TYPE_READ] = 0; - vblk->io_queues[HCTX_TYPE_POLL] = num_poll_vqs; + vblk->io_queues[HCTX_TYPE_POLL] = num_poll_queues; dev_info(&vdev->dev, "%d/%d/%d default/read/poll queues\n", vblk->io_queues[HCTX_TYPE_DEFAULT], vblk->io_queues[HCTX_TYPE_READ], vblk->io_queues[HCTX_TYPE_POLL]); - vblk->vqs = kmalloc_array(num_vqs, sizeof(*vblk->vqs), GFP_KERNEL); + vblk->vqs = kmalloc_array(num_vqs, sizeof(*vblk->vqs), + GFP_KERNEL | __GFP_ZERO); if (!vblk->vqs) return -ENOMEM; @@ -1073,14 +1336,28 @@ static int init_vq_rpair(struct virtio_blk *vblk) } for (i = 0; i < num_vqs - num_poll_vqs; i++) { - callbacks[i] = virtblk_done; - snprintf(vblk->vqs[i].name, VQ_NAME_LEN, "req.%d", i); + unsigned int index = i / VIRTBLK_RING_NUM; + unsigned int role = i % VIRTBLK_RING_NUM; + + if (role == VIRTBLK_RING_SQ) { + callbacks[i] = NULL; + snprintf(vblk->vqs[i].name, VQ_NAME_LEN, "req.%d", index); + } else { + callbacks[i] = virtblk_done_rpair; + snprintf(vblk->vqs[i].name, VQ_NAME_LEN, "res.%d", index); + } names[i] = vblk->vqs[i].name; } for (; i < num_vqs; i++) { + unsigned int index = i / VIRTBLK_RING_NUM; + unsigned int role = i % VIRTBLK_RING_NUM; + + if (role == VIRTBLK_RING_SQ) + snprintf(vblk->vqs[i].name, VQ_NAME_LEN, "req-poll.%d", index); + else + snprintf(vblk->vqs[i].name, VQ_NAME_LEN, "res-poll.%d", index); callbacks[i] = NULL; - snprintf(vblk->vqs[i].name, VQ_NAME_LEN, "req_poll.%d", i); names[i] = vblk->vqs[i].name; } @@ -1090,18 +1367,38 @@ static int init_vq_rpair(struct virtio_blk *vblk) goto out; for (i = 0; i < num_vqs; i++) { + vring_size = virtqueue_get_vring_size(vqs[i]); + if ((i % VIRTBLK_RING_NUM) == VIRTBLK_RING_CQ) { + vblk->vqs[i].cq_req = kmalloc_array(vring_size, + sizeof(struct virtblk_cq_req), + GFP_KERNEL | __GFP_ZERO); + if (!vblk->vqs[i].cq_req) { + err = -ENOMEM; + goto out; + } + } else { + virtqueue_set_save_indir(vqs[i]); + vblk->vqs[i].cq_req = NULL; + } virtqueue_set_dma_premapped(vqs[i]); spin_lock_init(&vblk->vqs[i].lock); vblk->vqs[i].vq = vqs[i]; } + + err = virtblk_prefill_res(vblk, vqs, num_vqs); + if (err < 0) + vdev->config->del_vqs(vdev); + vblk->num_vqs = num_vqs; out: kfree(vqs); kfree(callbacks); kfree(names); - if (err) + if (err < 0) { + virtblk_kfree_vqs_cq_reqs(vblk); kfree(vblk->vqs); + } return err; } #endif @@ -1119,6 +1416,8 @@ static int init_vq(struct virtio_blk *vblk) struct irq_affinity desc = { 0, }; #ifdef CONFIG_VIRTIO_BLK_RING_PAIR + vblk->ring_pair = false; + if (!virtblk_rpair_disable) err = init_vq_rpair(vblk); @@ -1367,15 +1666,17 @@ static void virtblk_complete_batch(struct io_comp_batch *iob) static int virtblk_poll_rpair(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) { struct virtio_blk *vblk = hctx->queue->queuedata; - struct virtio_blk_vq *vq = get_virtio_blk_vq(hctx); + struct virtio_blk_vq *vq = &vblk->vqs[virtblk_qid_to_cq_qid(hctx->queue_num)]; struct virtblk_req *vbr; unsigned long flags; unsigned int len; int found = 0; + bool kick = false; + /* get buf from paired CQ ring in ring_pair mode */ spin_lock_irqsave(&vq->lock, flags); - while ((vbr = virtqueue_get_buf(vq->vq, &len)) != NULL) { + while ((vbr = virtblk_get_buf(vblk, vq->vq, &len)) != NULL) { struct request *req = blk_mq_rq_from_pdu(vbr); found++; @@ -1385,11 +1686,16 @@ static int virtblk_poll_rpair(struct blk_mq_hw_ctx *hctx, struct io_comp_batch * virtblk_request_done(req); } - if (found) + if (found) { blk_mq_start_stopped_hw_queues(vblk->disk->queue, true); + kick = virtqueue_kick_prepare(vq->vq); + } spin_unlock_irqrestore(&vq->lock, flags); + if (kick) + virtqueue_notify(vq->vq); + return found; } #endif @@ -1732,7 +2038,7 @@ static int virtblk_probe(struct virtio_device *vdev) { struct virtio_blk *vblk; struct request_queue *q; - int err, index; + int err, index, i; u32 v, blk_size, max_size, sg_elems, opt_io_size; u16 min_io_size; @@ -1799,7 +2105,15 @@ static int virtblk_probe(struct virtio_device *vdev) } memset(&vblk->tag_set, 0, sizeof(vblk->tag_set)); +#ifdef CONFIG_VIRTIO_BLK_RING_PAIR + vblk->tag_set.ops = vblk->ring_pair ? &virtio_mq_pair_ops : + &virtio_mq_ops; + vblk->tag_set.nr_hw_queues = vblk->ring_pair ? vblk->num_vqs / VIRTBLK_RING_NUM : + vblk->num_vqs; +#else vblk->tag_set.ops = &virtio_mq_ops; + vblk->tag_set.nr_hw_queues = vblk->num_vqs; +#endif vblk->tag_set.queue_depth = queue_depth; vblk->tag_set.numa_node = NUMA_NO_NODE; vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; @@ -1813,11 +2127,36 @@ static int virtblk_probe(struct virtio_device *vdev) sizeof(struct virtblk_req) + sizeof(struct scatterlist) * 2 * VIRTIO_BLK_INLINE_SG_CNT; vblk->tag_set.driver_data = vblk; - vblk->tag_set.nr_hw_queues = vblk->num_vqs; vblk->tag_set.nr_maps = 1; if (vblk->io_queues[HCTX_TYPE_POLL]) vblk->tag_set.nr_maps = 3; +#ifdef CONFIG_VIRTIO_BLK_RING_PAIR + /* Beginning here, we know queue_depth of tag_set, so we should alloc + * vblk->indir_desc here. If alloc goes -ENOMEM, kfree will be + * executed. + */ + if (vblk->ring_pair) { + vblk->indir_desc = kmalloc_array(vblk->num_vqs / VIRTBLK_RING_NUM, + sizeof(struct virtblk_indir_desc *), + GFP_KERNEL | __GFP_ZERO); + if (!vblk->indir_desc) { + err = -ENOMEM; + goto out_put_disk; + } + for (i = 0; i < vblk->num_vqs / VIRTBLK_RING_NUM ; i++) { + vblk->indir_desc[i] = kmalloc_array(vblk->tag_set.queue_depth, + sizeof(struct virtblk_indir_desc), + GFP_KERNEL | __GFP_ZERO); + if (!vblk->indir_desc[i]) { + err = -ENOMEM; + goto out_put_disk; + } + } + } + +#endif + err = blk_mq_alloc_tag_set(&vblk->tag_set); if (err) goto out_put_disk; @@ -1951,9 +2290,15 @@ static int virtblk_probe(struct virtio_device *vdev) out_free_tags: blk_mq_free_tag_set(&vblk->tag_set); out_put_disk: +#ifdef CONFIG_VIRTIO_BLK_RING_PAIR + virtblk_kfree_vblk_indir_descs(vblk); +#endif put_disk(vblk->disk); out_free_vq: vdev->config->del_vqs(vdev); +#ifdef CONFIG_VIRTIO_BLK_RING_PAIR + virtblk_kfree_vqs_cq_reqs(vblk); +#endif kfree(vblk->vqs); out_free_vblk: kfree(vblk); @@ -1987,6 +2332,9 @@ static void virtblk_remove(struct virtio_device *vdev) put_disk(vblk->disk); vdev->config->del_vqs(vdev); +#ifdef CONFIG_VIRTIO_BLK_RING_PAIR + virtblk_kfree_vqs_cq_reqs(vblk); +#endif kfree(vblk->vqs); mutex_unlock(&vblk->vdev_mutex); @@ -2012,6 +2360,9 @@ static int virtblk_freeze(struct virtio_device *vdev) flush_work(&vblk->config_work); vdev->config->del_vqs(vdev); +#ifdef CONFIG_VIRTIO_BLK_RING_PAIR + virtblk_kfree_vqs_cq_reqs(vblk); +#endif kfree(vblk->vqs); return 0; diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 07a1761d1f3d..b2230aac9838 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -530,6 +530,29 @@ static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq, return desc; } +struct vring_desc *virtqueue_indir_get_last_desc_split(struct virtqueue *_vq, + dma_addr_t *dma_addr, u32 *len) +{ + int tmp, idx; + struct vring_virtqueue *vq = to_vvq(_vq); + /* + * we should ensure this func is called after virtqueue_add_desc_split + * and before virtqueue_kick_prepare. + */ + if (!vq->indirect) + return NULL; + idx = (vq->split.avail_idx_shadow - 1) & (vq->split.vring.num - 1); + tmp = virtio16_to_cpu(_vq->vdev, vq->split.vring.avail->ring[idx]); + + /* get the last desc's dma_addr and dma_len + */ + *dma_addr = vq->split.desc_extra[tmp].addr; + *len = vq->split.desc_extra[tmp].len; + + return vq->split.desc_state[tmp].indir_desc; +} +EXPORT_SYMBOL(virtqueue_indir_get_last_desc_split); + static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq, struct vring_desc *desc, unsigned int i, @@ -2995,11 +3018,15 @@ int virtqueue_set_dma_premapped(struct virtqueue *_vq) if (num != vq->vq.num_free) { END_USE(vq); + pr_debug("%s:%d err num is not equal to num_free, num=%u, num_free=%u\n", + __func__, __LINE__, num, vq->vq.num_free); return -EINVAL; } if (!vq->use_dma_api) { END_USE(vq); + pr_debug("%s:%d err vring does not use the dma api\n", + __func__, __LINE__); return -EINVAL; } diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 3f77e311d973..082527593f3d 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -69,6 +69,9 @@ int virtqueue_add_sgs_rpair(struct virtqueue *vq, void *data, gfp_t gfp); +struct vring_desc *virtqueue_indir_get_last_desc_split(struct virtqueue *_vq, + dma_addr_t *dma_addr, u32 *len); + struct device *virtqueue_dma_dev(struct virtqueue *vq); bool virtqueue_kick(struct virtqueue *vq); diff --git a/include/uapi/linux/virtio_blk.h b/include/uapi/linux/virtio_blk.h index 78af28484c30..2c587a4d0530 100644 --- a/include/uapi/linux/virtio_blk.h +++ b/include/uapi/linux/virtio_blk.h @@ -169,7 +169,13 @@ struct virtio_blk_outhdr { /* VIRTIO_BLK_T* */ __virtio32 type; /* io priority. */ - __virtio32 ioprio; + union { + struct { + __virtio16 ioprio; + __virtio16 tag; + } rpair; + __virtio32 ioprio; + }; /* Sector (ie. 512 byte offset) */ __virtio64 sector; }; -- Gitee From 061a7fc5a8258dc915b3335637348680f768cc46 Mon Sep 17 00:00:00 2001 From: Ferry Meng Date: Mon, 24 Mar 2025 21:32:38 +0800 Subject: [PATCH 08/17] anolis: virtio_blk: reuse CQ ioprio to locate error position ANBZ: #19447 After enabling vring_pair feature, if we harvest a CQE from CQ, we add it into sgs immediately. This may causes backend don't know if a slot has been harvest. In order to locate error position more accurately and execute failover, backend need to account how many I/Os it has responsed to and which CQ slot is free. We reused CQ out_hdr ioprio to store a counter value. After we harvest a CQE, increase counter and save it in ioprio. From the view of backend, it can knows that last I/O is finished and this is a empty slot. Thus backend won't do failover for this I/O later. Signed-off-by: Ferry Meng --- drivers/block/virtio_blk.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 1d55bd04d9b8..b5dad0555465 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -88,6 +88,8 @@ struct virtio_blk_vq { spinlock_t lock; char name[VQ_NAME_LEN]; #ifdef CONFIG_VIRTIO_BLK_RING_PAIR + /* check num for CQ */ + u16 counter; /* prealloced prefill req for CQ */ struct virtblk_cq_req *cq_req; #endif @@ -380,10 +382,14 @@ static int virtblk_prefill_res(struct virtio_blk *vblk, for (i = 1; i < num_vqs; i += VIRTBLK_RING_NUM) { vring_size = virtqueue_get_vring_size(vqs[i]); + vblk->vqs[i].counter = 0; spin_lock_irqsave(&vblk->vqs[i].lock, flags); for (j = 0; j < vring_size; j++) { vbr_res = &vblk->vqs[i].cq_req[j]; + vbr_res->out_hdr.rpair.tag = cpu_to_virtio16(vblk->vdev, + vblk->vqs[i].counter); + vblk->vqs[i].counter += 1; sg_init_one(&vbr_res->inline_sg[0], &vbr_res->out_hdr, sizeof(struct virtio_blk_outhdr)); sg_init_one(&vbr_res->inline_sg[1], &vbr_res->status, sizeof(u8)); @@ -538,6 +544,8 @@ static inline void *virtblk_get_buf(struct virtio_blk *vblk, struct virtqueue *v virtblk_rq_unmap(sq_vq, vbr); virtblk_unmap_and_clear_desc(sq_vq, vbr); + vbr_res->out_hdr.rpair.tag = cpu_to_virtio16(vblk->vdev, + vblk->vqs[vq->index].counter++); ret = virtqueue_add_sgs(vq, vbr_res->sgs, 1, 1, vbr_res, GFP_ATOMIC); if (ret < 0) pr_err("failed to refill res ring %d\n", ret); -- Gitee From 78206abacd9afe792c452500370ce27ab4c1c992 Mon Sep 17 00:00:00 2001 From: Ferry Meng Date: Thu, 27 Mar 2025 00:19:11 +0800 Subject: [PATCH 09/17] anolis: virtio-blk: add ext_feature negotiation method and ring-pair feature bit ANBZ: #19447 Use external feature bits to management ring pair feature. This needs backend specific support. Driver read host_ext_features, and writes to guest_ext_features. Now ring pair feature holds virtio-blk extra feature bit 0. Signed-off-by: Ferry Meng Reviewed-by: Yifei Zhou --- drivers/block/Makefile | 3 + drivers/block/virtio_blk.c | 66 +++++++++++-- drivers/block/virtio_blk_ext.c | 171 +++++++++++++++++++++++++++++++++ drivers/block/virtio_blk_ext.h | 16 +++ 4 files changed, 250 insertions(+), 6 deletions(-) create mode 100644 drivers/block/virtio_blk_ext.c create mode 100644 drivers/block/virtio_blk_ext.h diff --git a/drivers/block/Makefile b/drivers/block/Makefile index edfd8503471b..722e2eb2bd93 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -29,6 +29,9 @@ obj-$(CONFIG_BLK_DEV_NBD) += nbd.o obj-$(CONFIG_BLK_DEV_CRYPTOLOOP) += cryptoloop.o obj-$(CONFIG_VIRTIO_BLK) += virtio_blk.o +virtio_blk-y := virtio_blk.o +virtio_blk-$(CONFIG_VIRTIO_BLK_RING_PAIR) += virtio_blk_ext.o + obj-$(CONFIG_BLK_DEV_SX8) += sx8.o obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index b5dad0555465..41c24f2115be 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -20,6 +20,11 @@ #include #include #include +#ifdef CONFIG_VIRTIO_BLK_RING_PAIR +#include "../virtio/virtio_pci_common.h" +#include +#include "virtio_blk_ext.h" +#endif #define PART_BITS 4 #define VQ_NAME_LEN 16 @@ -1273,6 +1278,23 @@ bool virtblk_rpair_disable; module_param_named(rpair_disable, virtblk_rpair_disable, bool, 0444); MODULE_PARM_DESC(rpair_disable, "disable vring pair detective. (0=Not [default], 1=Yes)"); +int check_ext_feature(struct virtio_blk *vblk, void __iomem *ioaddr, + u32 *host_ext_features, + u32 *guest_ext_features) +{ + int ret = 0; + + ret = virtblk_get_ext_feature(ioaddr, host_ext_features); + if (ret < 0) + return ret; + + vblk->ring_pair = !!(*host_ext_features & VIRTIO_BLK_EXT_F_RING_PAIR); + if (vblk->ring_pair) + *guest_ext_features |= (VIRTIO_BLK_EXT_F_RING_PAIR); + + return 0; +} + static int init_vq_rpair(struct virtio_blk *vblk) { int err = 0; @@ -1282,8 +1304,27 @@ static int init_vq_rpair(struct virtio_blk *vblk) struct virtqueue **vqs; unsigned short num_vqs; unsigned int num_poll_vqs, num_queues, num_poll_queues, vring_size; + u32 ext_host_features = 0, ext_guest_features = 0, ext_bar_offset = 0; struct virtio_device *vdev = vblk->vdev; struct irq_affinity desc = { 0, }; + void __iomem *ioaddr = NULL; + + err = virtblk_get_ext_feature_bar(vdev, &ext_bar_offset); + /* if check ext feature error, fall back to orig virtqueue use. */ + if ((err < 0) || !ext_bar_offset) + return 1; + + ioaddr = pci_iomap_range(to_vp_device(vdev)->pci_dev, 0, ext_bar_offset, 16); + if (!ioaddr) { + err = 1; + goto negotiate_err; + } + + err = check_ext_feature(vblk, ioaddr, &ext_host_features, &ext_guest_features); + if ((err < 0) || !vblk->ring_pair) { + err = 1; + goto negotiate_err; + } err = virtio_cread_feature(vdev, VIRTIO_BLK_F_MQ, struct virtio_blk_config, num_queues, @@ -1293,25 +1334,29 @@ static int init_vq_rpair(struct virtio_blk *vblk) if (!err && !num_vqs) { dev_err(&vdev->dev, "MQ advertised but zero queues reported\n"); - return -EINVAL; + err = -EINVAL; + goto negotiate_err; } if (num_vqs % VIRTBLK_RING_NUM) { dev_err(&vdev->dev, "RING_PAIR advertised but odd queues reported\n"); - vblk->ring_pair = false; + err = 1; + goto negotiate_err; } /* ring pair only support split virtqueue + indirect enabled */ if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED) || !virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC)) { dev_err(&vdev->dev, "rpair only support indir+split queue\n"); - vblk->ring_pair = false; + err = 1; + goto negotiate_err; } - /* If vring pair is not enabled, fall back to orig virtqueue use. */ - if (!vblk->ring_pair) - return 1; + virtblk_set_ext_feature(ioaddr, ext_guest_features); + pci_iounmap(to_vp_device(vdev)->pci_dev, ioaddr); + dev_info(&vdev->dev, "rpair enabled, ext_guest_feature set 0x%x\n", + ext_guest_features); num_queues = num_vqs / VIRTBLK_RING_NUM; num_queues = min_t(unsigned int, @@ -1408,6 +1453,15 @@ static int init_vq_rpair(struct virtio_blk *vblk) kfree(vblk->vqs); } return err; + +negotiate_err: + if (ioaddr) { + ext_guest_features &= ~VIRTIO_BLK_EXT_F_RING_PAIR; + virtblk_set_ext_feature(ioaddr, ext_guest_features); + pci_iounmap(to_vp_device(vdev)->pci_dev, ioaddr); + } + vblk->ring_pair = false; + return err; } #endif diff --git a/drivers/block/virtio_blk_ext.c b/drivers/block/virtio_blk_ext.c new file mode 100644 index 000000000000..c4d686a333ac --- /dev/null +++ b/drivers/block/virtio_blk_ext.c @@ -0,0 +1,171 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include "../virtio/virtio_pci_common.h" +#include "virtio_blk_ext.h" +#include + + +#define VIRTIO_PCI_VSF_MAGIC_NUM 0x0 +#define VIRTIO_PCI_VSF_MAGIC_NUM_VAL 0x7D4FEE9D +#define VIRTIO_PCI_HOST_VNDR_SPEC_FEATURE_SELECT 0x04 +/* A 32-bit r/o bitmask of the vendor specific features supported by the host */ +#define VIRTIO_PCI_HOST_VNDR_SPEC_FEATURES 0x08 + +#define VIRTIO_PCI_GUEST_VNDR_SPEC_FEATURE_SELECT 0x0c +/* A 32-bit r/w bitmask of the vendor specific features activated by the guest */ +#define VIRTIO_PCI_GUEST_VNDR_SPEC_FEATURES 0x10 + + +/* xdragon vsc */ +#define PCI_CAP_ID_VNDR 0x09 /* Vendor specific */ +#define PCI_XDRAGON_VSC_CFGTYPE 0xff + +/* xdragon vsec */ +#define PCI_EXT_CAP_ID_VNDR 0x0B +#define PCI_EXP_XDRAGON_VSEC_CFGTYPE 0xff +#define XDRAGON_VSEC_VERSION 1 + +#define XDRAGON_XVCS_MAGIC 0x53435658 +#define XDRAGON_XVCS_VSF_KEY "xvcs-vsf" +#define XDRAGON_XVCS_VERSION 1 +#define XDRAGON_XVCS_NUM_MAX 32U +#define XDRAGON_XVCS_KEY_MAX 16 + +#define XDRAGON_XVCS_O_MAGIC 0 +#define XDRAGON_XVCS_O_VER 4 +#define XDRAGON_XVCS_O_ADDR 12 +#define XDRAGON_XVCS_O_F_CNT 16 +#define XDRAGON_XVCS_O_CUR 16 +#define XDRAGON_XVCS_O_NEXT 20 +#define XDRAGON_XVCS_O_VSF 32 +static void xdragon_read_xvcs(struct pci_dev *d, u32 pos, + u32 cap_len, u32 addr, u32 num, void *data) +{ + u32 idx, where; + + for (idx = 0; idx < num; idx += 4) { + where = addr + idx; + pci_write_config_dword(d, pos + cap_len - 8, where); + pci_read_config_dword(d, pos + cap_len - 4, (u32 *)((u8 *)data + idx)); + } +} + +static int xdragon_vcs_find_vsf_bar0_offset(struct pci_dev *dev, uint32_t cap_len, + uint32_t pos, u32 *bar0_offset) +{ + u8 buf[XDRAGON_XVCS_KEY_MAX+1]; + u32 where; + u32 idx, num; + u32 reg; + + /* check xvcs magic */ + xdragon_read_xvcs(dev, pos, cap_len, XDRAGON_XVCS_O_MAGIC, sizeof(reg), ®); + if (reg != XDRAGON_XVCS_MAGIC) { + pr_err("%s: xvcs magic 0x%x not match\n", __func__, reg); + return -1; + } + /* check xvcs version */ + xdragon_read_xvcs(dev, pos, cap_len, XDRAGON_XVCS_O_VER, sizeof(reg), ®); + if (reg != XDRAGON_XVCS_VERSION) { + pr_err("%s: xvcs version 0x%x not match\n", __func__, reg); + return -1; + } + /* xvcs feat block addr */ + xdragon_read_xvcs(dev, pos, cap_len, XDRAGON_XVCS_O_ADDR, sizeof(reg), ®); + where = reg; + /* xvcs feat cnt */ + xdragon_read_xvcs(dev, pos, cap_len, XDRAGON_XVCS_O_F_CNT, sizeof(reg), ®); + num = reg; + for (idx = 0; (idx < min(XDRAGON_XVCS_NUM_MAX, num)) && (where > 0); idx++) { + memset(buf, 0, sizeof(buf)); + + /* self addr check */ + xdragon_read_xvcs(dev, pos, cap_len, + where + XDRAGON_XVCS_O_CUR, sizeof(reg), ®); + if (reg != where) + return -1; + + /* check key */ + xdragon_read_xvcs(dev, pos, cap_len, where, XDRAGON_XVCS_KEY_MAX, buf); + + /* found vsf */ + if (strncmp(buf, XDRAGON_XVCS_VSF_KEY, sizeof(XDRAGON_XVCS_VSF_KEY)) == 0) { + xdragon_read_xvcs(dev, pos, cap_len, where + XDRAGON_XVCS_O_VSF, + sizeof(reg), ®); + *bar0_offset = reg; + return 0; + } + /* next vcs feat */ + xdragon_read_xvcs(dev, pos, cap_len, + where + XDRAGON_XVCS_O_NEXT, sizeof(reg), ®); + where = reg; + } + pr_err("%s: vsf offset not found\n", __func__); + return -1; +} + +int virtblk_get_ext_feature_bar(struct virtio_device *vdev, u32 *bar_offset) +{ + struct pci_dev *dev = to_vp_device(vdev)->pci_dev; + int cap_len, vsec = 0; + u16 val; + u8 type, len = 0; + bool found = false; + + /* try to find vsc */ + for (vsec = pci_find_capability(dev, PCI_CAP_ID_VNDR); + vsec > 0; + vsec = pci_find_next_capability(dev, vsec, PCI_CAP_ID_VNDR)) { + pci_read_config_byte(dev, vsec + offsetof(struct virtio_pci_cap, cfg_type), &type); + if (type == PCI_XDRAGON_VSC_CFGTYPE) { + pci_read_config_byte(dev, + vsec + offsetof(struct virtio_pci_cap, cap_len), &len); + cap_len = len; + found = true; + break; + } + } + + /* try to find vsec */ + if (!found) { + vsec = 0; + while ((vsec = pci_find_next_ext_capability(dev, vsec, + PCI_EXT_CAP_ID_VNDR))) { + pci_read_config_word(dev, vsec + 0x4, &val); + /* vsec found */ + if (val == PCI_EXP_XDRAGON_VSEC_CFGTYPE) { + /* get vsec cap len */ + pci_read_config_word(dev, vsec + 0x6, &val); + if ((val & 0xF) != XDRAGON_VSEC_VERSION) + continue; + cap_len = (val >> 4) & (0xFFF); + found = true; + break; + } + } + } + + return found ? xdragon_vcs_find_vsf_bar0_offset(dev, cap_len, vsec, bar_offset) : -1; +} + +int virtblk_get_ext_feature(void __iomem *ioaddr, u32 *host_features) +{ + int ret; + + /* read ext bar magci number */ + ret = ioread32(ioaddr); + if (ret != VIRTIO_PCI_VSF_MAGIC_NUM_VAL) + return -EOPNOTSUPP; + + iowrite32(0, ioaddr + VIRTIO_PCI_HOST_VNDR_SPEC_FEATURE_SELECT); + *host_features = ioread32(ioaddr + VIRTIO_PCI_HOST_VNDR_SPEC_FEATURES); + + return 0; +} + +void virtblk_set_ext_feature(void __iomem *ioaddr, u32 guest_ext_features) +{ + iowrite32(0, ioaddr + VIRTIO_PCI_GUEST_VNDR_SPEC_FEATURE_SELECT); + iowrite32(guest_ext_features, ioaddr + VIRTIO_PCI_GUEST_VNDR_SPEC_FEATURES); +} diff --git a/drivers/block/virtio_blk_ext.h b/drivers/block/virtio_blk_ext.h new file mode 100644 index 000000000000..5f96f73179c0 --- /dev/null +++ b/drivers/block/virtio_blk_ext.h @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include + +/* ext feature bit definition */ +#define VIRTIO_BLK_EXT_F_RING_PAIR (1U << 0) +#define VIRTIO_BLK_EXT_F_INVAL (-1) + +#ifdef CONFIG_VIRTIO_BLK_RING_PAIR +void virtblk_set_ext_feature(void __iomem *ioaddr, u32 guest_ext_features); + +int virtblk_get_ext_feature(void __iomem *ioaddr, u32 *host_features); + +int virtblk_get_ext_feature_bar(struct virtio_device *vdev, u32 *bar_offset); +#endif -- Gitee From 11d3cbfe199351047c93c13f1f10c2f4d70f666f Mon Sep 17 00:00:00 2001 From: Ferry Meng Date: Thu, 27 Mar 2025 00:23:32 +0800 Subject: [PATCH 10/17] anolis: virtio-blk: add no_align extra feature bit ANBZ: #19447 Add NO_ALIGN support feature bit at ext-feature bit-1. This controls blk-mq dma alignment(0). Signed-off-by: Ferry Meng --- drivers/block/virtio_blk.c | 10 ++++++++++ drivers/block/virtio_blk_ext.h | 1 + 2 files changed, 11 insertions(+) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 41c24f2115be..60b2dd174e79 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -141,6 +141,7 @@ struct virtio_blk { #ifdef CONFIG_VIRTIO_BLK_RING_PAIR bool ring_pair; + bool no_algin; /* saved indirect desc pointer, dma_addr and dma_len for SQ */ struct virtblk_indir_desc **indir_desc; #endif @@ -1291,6 +1292,9 @@ int check_ext_feature(struct virtio_blk *vblk, void __iomem *ioaddr, vblk->ring_pair = !!(*host_ext_features & VIRTIO_BLK_EXT_F_RING_PAIR); if (vblk->ring_pair) *guest_ext_features |= (VIRTIO_BLK_EXT_F_RING_PAIR); + vblk->no_algin = !!(*host_ext_features & VIRTIO_BLK_EXT_F_RING_NO_ALIGN); + if (vblk->no_algin) + *guest_ext_features |= (VIRTIO_BLK_EXT_F_RING_NO_ALIGN); return 0; } @@ -1479,6 +1483,7 @@ static int init_vq(struct virtio_blk *vblk) #ifdef CONFIG_VIRTIO_BLK_RING_PAIR vblk->ring_pair = false; + vblk->no_algin = false; if (!virtblk_rpair_disable) err = init_vq_rpair(vblk); @@ -2265,6 +2270,11 @@ static int virtblk_probe(struct virtio_device *vdev) blk_queue_max_segment_size(q, max_size); +#ifdef CONFIG_VIRTIO_BLK_RING_PAIR + if (vblk->no_algin) + blk_queue_dma_alignment(q, 0); +#endif + /* Host can optionally specify the block size of the device */ err = virtio_cread_feature(vdev, VIRTIO_BLK_F_BLK_SIZE, struct virtio_blk_config, blk_size, diff --git a/drivers/block/virtio_blk_ext.h b/drivers/block/virtio_blk_ext.h index 5f96f73179c0..389fb58f1518 100644 --- a/drivers/block/virtio_blk_ext.h +++ b/drivers/block/virtio_blk_ext.h @@ -5,6 +5,7 @@ /* ext feature bit definition */ #define VIRTIO_BLK_EXT_F_RING_PAIR (1U << 0) +#define VIRTIO_BLK_EXT_F_RING_NO_ALIGN (1U << 1) #define VIRTIO_BLK_EXT_F_INVAL (-1) #ifdef CONFIG_VIRTIO_BLK_RING_PAIR -- Gitee From b447e0f2dd5f80c060d46cbc93ec017765c51456 Mon Sep 17 00:00:00 2001 From: Ferry Meng Date: Thu, 27 Mar 2025 00:24:57 +0800 Subject: [PATCH 11/17] anolis: virtio-blk: add hide_bdev extra feature bit ANBZ: #19447 Add hide block device feature bit at ext-feature bit-2. This controls hide /dev/vdX, chardev still exists. Signed-off-by: Ferry Meng --- drivers/block/virtio_blk.c | 10 ++++++++++ drivers/block/virtio_blk_ext.h | 3 ++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 60b2dd174e79..f943fa131871 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -142,6 +142,7 @@ struct virtio_blk { #ifdef CONFIG_VIRTIO_BLK_RING_PAIR bool ring_pair; bool no_algin; + bool hide_bdev; /* saved indirect desc pointer, dma_addr and dma_len for SQ */ struct virtblk_indir_desc **indir_desc; #endif @@ -1295,6 +1296,9 @@ int check_ext_feature(struct virtio_blk *vblk, void __iomem *ioaddr, vblk->no_algin = !!(*host_ext_features & VIRTIO_BLK_EXT_F_RING_NO_ALIGN); if (vblk->no_algin) *guest_ext_features |= (VIRTIO_BLK_EXT_F_RING_NO_ALIGN); + vblk->hide_bdev = !!(*host_ext_features & VIRTIO_BLK_EXT_F_HIDE_BLOCK); + if (vblk->hide_bdev) + *guest_ext_features |= (VIRTIO_BLK_EXT_F_HIDE_BLOCK); return 0; } @@ -1484,6 +1488,7 @@ static int init_vq(struct virtio_blk *vblk) #ifdef CONFIG_VIRTIO_BLK_RING_PAIR vblk->ring_pair = false; vblk->no_algin = false; + vblk->hide_bdev = false; if (!virtblk_rpair_disable) err = init_vq_rpair(vblk); @@ -2354,7 +2359,12 @@ static int virtblk_probe(struct virtio_device *vdev) virtblk_update_capacity(vblk, false); virtio_device_ready(vdev); +#ifdef CONFIG_VIRTIO_BLK_RING_PAIR + if (!vblk->hide_bdev) + device_add_disk(&vdev->dev, vblk->disk, virtblk_attr_groups); +#else device_add_disk(&vdev->dev, vblk->disk, virtblk_attr_groups); +#endif WARN_ON(virtblk_cdev_add(vblk)); return 0; diff --git a/drivers/block/virtio_blk_ext.h b/drivers/block/virtio_blk_ext.h index 389fb58f1518..1f8c61d1d9f0 100644 --- a/drivers/block/virtio_blk_ext.h +++ b/drivers/block/virtio_blk_ext.h @@ -5,7 +5,8 @@ /* ext feature bit definition */ #define VIRTIO_BLK_EXT_F_RING_PAIR (1U << 0) -#define VIRTIO_BLK_EXT_F_RING_NO_ALIGN (1U << 1) +#define VIRTIO_BLK_EXT_F_RING_NO_ALIGN (1U << 1) +#define VIRTIO_BLK_EXT_F_HIDE_BLOCK (1U << 2) #define VIRTIO_BLK_EXT_F_INVAL (-1) #ifdef CONFIG_VIRTIO_BLK_RING_PAIR -- Gitee From aae5c60ad6bc3e0691962de0d95468ae44bdb847 Mon Sep 17 00:00:00 2001 From: Ferry Meng Date: Wed, 19 Mar 2025 19:02:38 +0800 Subject: [PATCH 12/17] anolis: virtio-blk: enable CONFIG_VIRTIO_BLK_RING_PAIR default ANBZ: #19447 For anolis kernel from ANCK-5.10-019, enable this feature by default. To use virtio-blk vring pair, you need: 1. backend ring pair support. 2. modprobe virtio-blk with params "rp_enable" 3. (not must) dynamic request queue configurations "nr_pre_rqs" Signed-off-by: Ferry Meng --- anolis/configs/L0-MANDATORY/default/CONFIG_VIRTIO_BLK_RING_PAIR | 1 + 1 file changed, 1 insertion(+) create mode 100644 anolis/configs/L0-MANDATORY/default/CONFIG_VIRTIO_BLK_RING_PAIR diff --git a/anolis/configs/L0-MANDATORY/default/CONFIG_VIRTIO_BLK_RING_PAIR b/anolis/configs/L0-MANDATORY/default/CONFIG_VIRTIO_BLK_RING_PAIR new file mode 100644 index 000000000000..621ab9591ddf --- /dev/null +++ b/anolis/configs/L0-MANDATORY/default/CONFIG_VIRTIO_BLK_RING_PAIR @@ -0,0 +1 @@ +CONFIG_VIRTIO_BLK_RING_PAIR=y -- Gitee From b33b4c3c7b5bed6d8a6d6a37005fa82f1a8645a6 Mon Sep 17 00:00:00 2001 From: Ferry Meng Date: Wed, 19 Mar 2025 15:02:10 +0800 Subject: [PATCH 13/17] anolis: blk-mq: support dynamic request alloc ANBZ: #19447 In blk-mq layer, tag_set->static_rqs is preallocated and distributed later. When queue_depth is too large , we want to avoid too much memory overhead caused by static preallocation. Design a new tag_set flag "BLK_MQ_F_DYN_ALLOC" and tag_set new member "nr_static_rqs". After setting the flag, tag No. above 'nr_static_rqs' requests (struct request + driver defined pdu ) will be allocated. Others will be allocated during blk_mq_alloc_request. old_pages = [sizeof(struct request) + sizeof(struct pdu)] * queue_depth / page_size new_pages = [sizeof(struct request) + sizeof(struct pdu)] * nr_static_rqs / page_size Signed-off-by: Ferry Meng --- block/blk-mq.c | 42 +++++++++++++++++++++++++++++++++++++----- include/linux/blk-mq.h | 5 +++++ 2 files changed, 42 insertions(+), 5 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index e46e8c125eee..23ec7071cb12 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -324,7 +324,14 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, unsigned int tag, u64 alloc_time_ns) { struct blk_mq_tags *tags = blk_mq_tags_from_data(data); - struct request *rq = tags->static_rqs[tag]; + struct request *rq; + struct blk_mq_tag_set *set = data->q->tag_set; + + if ((set->flags & BLK_MQ_F_DYN_ALLOC) && (tag >= set->nr_static_rqs)) + tags->static_rqs[tag] = kmalloc(sizeof(struct request) + + set->cmd_size, GFP_KERNEL | __GFP_ZERO); + + rq = tags->static_rqs[tag]; if (data->q->elevator) { rq->tag = BLK_MQ_NO_TAG; @@ -553,15 +560,28 @@ void __blk_mq_free_request(struct request *rq) struct request_queue *q = rq->q; struct blk_mq_ctx *ctx = rq->mq_ctx; struct blk_mq_hw_ctx *hctx = rq->mq_hctx; - const int sched_tag = rq->internal_tag; + const int sched_tag = rq->internal_tag, tag = rq->tag; blk_crypto_free_request(rq); blk_pm_mark_last_busy(rq); rq->mq_hctx = NULL; - if (rq->tag != BLK_MQ_NO_TAG) - blk_mq_put_tag(hctx->tags, ctx, rq->tag); - if (sched_tag != BLK_MQ_NO_TAG) + if (rq->tag != BLK_MQ_NO_TAG) { + if ((q->tag_set->flags & BLK_MQ_F_DYN_ALLOC) && + rq->tag >= q->tag_set->nr_static_rqs) { + hctx->tags->static_rqs[rq->tag] = NULL; + kfree(rq); + } + blk_mq_put_tag(hctx->tags, ctx, tag); + } + if (sched_tag != BLK_MQ_NO_TAG) { + if ((q->tag_set->flags & BLK_MQ_F_DYN_ALLOC) && + sched_tag >= q->tag_set->nr_static_rqs) { + hctx->sched_tags->static_rqs[sched_tag] = NULL; + kfree(rq); + } blk_mq_put_tag(hctx->sched_tags, ctx, sched_tag); + } + blk_mq_sched_restart(hctx); blk_queue_exit(q); } @@ -2498,6 +2518,9 @@ void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, if (!rq) continue; set->ops->exit_request(set, rq, hctx_idx); + if ((set->flags & BLK_MQ_F_DYN_ALLOC) && + (i >= set->nr_static_rqs)) + kfree(rq); tags->static_rqs[i] = NULL; } } @@ -2597,6 +2620,14 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, */ rq_size = round_up(sizeof(struct request) + set->cmd_size, cache_line_size()); + + if (set->flags & BLK_MQ_F_DYN_ALLOC) { + if (!set->nr_static_rqs || (set->nr_static_rqs > depth)) + set->nr_static_rqs = depth; + + depth = set->nr_static_rqs; + } + left = rq_size * depth; for (i = 0; i < depth; ) { @@ -3296,6 +3327,7 @@ struct request_queue *blk_mq_init_sq_queue(struct blk_mq_tag_set *set, set->queue_depth = queue_depth; set->numa_node = NUMA_NO_NODE; set->flags = set_flags; + set->nr_static_rqs = 0; ret = blk_mq_alloc_tag_set(set); if (ret) diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index ddda5ccea39f..1fb570bef500 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -273,6 +273,9 @@ struct blk_mq_tag_set { struct mutex tag_list_lock; struct list_head tag_list; + /* number of static alloc rqs if dyn_alloc flag is set */ + unsigned int nr_static_rqs; + CK_KABI_RESERVE(1) CK_KABI_RESERVE(2) CK_KABI_RESERVE(3) @@ -428,6 +431,8 @@ enum { BLK_MQ_F_ALLOC_POLICY_START_BIT = 8, BLK_MQ_F_ALLOC_POLICY_BITS = 1, + BLK_MQ_F_DYN_ALLOC = 1 << 31, + BLK_MQ_S_STOPPED = 0, BLK_MQ_S_TAG_ACTIVE = 1, BLK_MQ_S_SCHED_RESTART = 2, -- Gitee From dba94469668931ca7a93322155525a64a82d7e9c Mon Sep 17 00:00:00 2001 From: Ferry Meng Date: Wed, 12 Mar 2025 16:04:15 +0800 Subject: [PATCH 14/17] anolis: virtio-blk: dyn request alloc when enabled vring pair ANBZ: #19447 Enable dyn request alloc if one block device enable vring pair. Besides, we also set BLK_MQ_F_NO_SCHED to avoid using tag scheduler. Providing an extra module param 'dyn_max_rqs' for virtio-blk driver. After enabling ring_pair mode, dyn_max_rqs will be the real queue_depth, original queue_depth indicates the amount of pre_alloc 'struct requests +pdu' in static_rqs. If param not set, use default value(16384). Signed-off-by: Ferry Meng --- drivers/block/virtio_blk.c | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index f943fa131871..a99a251d9dde 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -2106,6 +2106,12 @@ static int virtblk_cdev_add(struct virtio_blk *vblk) static unsigned int virtblk_queue_depth; module_param_named(queue_depth, virtblk_queue_depth, uint, 0444); +#ifdef CONFIG_VIRTIO_BLK_RING_PAIR +static unsigned short virtblk_dyn_max_rqs = 16384; +module_param_named(dyn_max_rqs, virtblk_dyn_max_rqs, short, 0444); +MODULE_PARM_DESC(dyn_max_rqs, "Max requests per rpair(0~65535), default 2^14"); +#endif + static int virtblk_probe(struct virtio_device *vdev) { struct virtio_blk *vblk; @@ -2178,17 +2184,28 @@ static int virtblk_probe(struct virtio_device *vdev) memset(&vblk->tag_set, 0, sizeof(vblk->tag_set)); #ifdef CONFIG_VIRTIO_BLK_RING_PAIR - vblk->tag_set.ops = vblk->ring_pair ? &virtio_mq_pair_ops : - &virtio_mq_ops; - vblk->tag_set.nr_hw_queues = vblk->ring_pair ? vblk->num_vqs / VIRTBLK_RING_NUM : - vblk->num_vqs; + if (vblk->ring_pair) { + vblk->tag_set.ops = &virtio_mq_pair_ops; + vblk->tag_set.nr_hw_queues = vblk->num_vqs / VIRTBLK_RING_NUM; + /* For ring pair, we don't want to use io scheduler. So we set + * NO_SCHED flag, in this case BLK_MQ_F_SHOULD_MERGE is unused. + */ + vblk->tag_set.flags = BLK_MQ_F_DYN_ALLOC | BLK_MQ_F_NO_SCHED; + vblk->tag_set.queue_depth = virtblk_dyn_max_rqs; + vblk->tag_set.nr_static_rqs = queue_depth; + } else { + vblk->tag_set.ops = &virtio_mq_ops; + vblk->tag_set.nr_hw_queues = vblk->num_vqs; + vblk->tag_set.queue_depth = queue_depth; + vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; + } #else vblk->tag_set.ops = &virtio_mq_ops; vblk->tag_set.nr_hw_queues = vblk->num_vqs; -#endif vblk->tag_set.queue_depth = queue_depth; - vblk->tag_set.numa_node = NUMA_NO_NODE; vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; +#endif + vblk->tag_set.numa_node = NUMA_NO_NODE; /* For bidirectional passthrough vblk request, both WRITE and READ * operations need pre-alloc inline SGs. So we should prealloc twice * the size than original ways. Due to the inability to predict whether -- Gitee From a50345ec58afdd20be513550ccc90a68c9d5573b Mon Sep 17 00:00:00 2001 From: Ferry Meng Date: Fri, 7 Feb 2025 16:57:28 +0800 Subject: [PATCH 15/17] anolis: virtio-blk: add trace events ANBZ: #19447 add trace events for virtio-blk Signed-off-by: Ferry Meng --- drivers/block/virtio_blk.c | 9 ++++ include/trace/events/virtio_blk.h | 81 +++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+) create mode 100644 include/trace/events/virtio_blk.h diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index a99a251d9dde..36eea97200ee 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -26,6 +26,9 @@ #include "virtio_blk_ext.h" #endif +#define CREATE_TRACE_POINTS +#include + #define PART_BITS 4 #define VQ_NAME_LEN 16 #define MAX_DISCARD_SEGMENTS 256u @@ -844,6 +847,8 @@ static inline void virtblk_request_done(struct request *req) { struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); + trace_virtblk_request_done(req, vbr->status); + if (vbr_is_bidirectional(vbr)) virtblk_unmap_data_bidirectional(req, vbr); else @@ -1028,6 +1033,8 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx, else num = virtblk_map_data(hctx, req, vbr); + trace_virtio_queue_rq(req, vbr_is_bidirectional(vbr), num); + if (unlikely(num < 0)) { virtblk_cleanup_cmd(req); return BLK_STS_RESOURCE; @@ -1982,6 +1989,8 @@ static int virtblk_uring_cmd_io(struct virtio_blk *vblk, } } + trace_virtblk_uring_cmd_io(req, type, cmd->sector); + /* to free bio on completion, as req->bio will be null at that time */ pdu->bio = req->bio; req->end_io_data = ioucmd; diff --git a/include/trace/events/virtio_blk.h b/include/trace/events/virtio_blk.h new file mode 100644 index 000000000000..2289eaa38d9f --- /dev/null +++ b/include/trace/events/virtio_blk.h @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM virtio_blk + +#if !defined(_TRACE_VIRTIO_BLK_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_VIRTIO_BLK_H + +#include + +struct request; + +TRACE_EVENT(virtblk_request_done, + + TP_PROTO(struct request *req, u8 ret), + + TP_ARGS(req, ret), + + TP_STRUCT__entry( + __field(struct request *, req) + __field(u8, ret) + ), + + TP_fast_assign( + __entry->req = req; + __entry->ret = ret; + ), + + TP_printk("DONE: req=%p qid=%d tag=%d ret=%d ioucmd=%p", + __entry->req, __entry->req->q->id, __entry->req->tag, + __entry->ret, __entry->req->end_io_data) +); + +TRACE_EVENT(virtblk_uring_cmd_io, + + TP_PROTO(struct request *req, u32 type, u64 sector), + + TP_ARGS(req, type, sector), + + TP_STRUCT__entry( + __field(struct request *, req) + __field(u32, type) + __field(u64, sector) + ), + + TP_fast_assign( + __entry->req = req; + __entry->type = type; + __entry->sector = sector; + ), + + TP_printk("URING: req=%p tag=%d type=%d sector=%llu", + __entry->req, __entry->req->tag, __entry->req->cmd_flags, + __entry->sector) +); + +TRACE_EVENT(virtio_queue_rq, + + TP_PROTO(struct request *req, bool bid, int num), + + TP_ARGS(req, bid, num), + + TP_STRUCT__entry( + __field(struct request *, req) + __field(bool, bid) + __field(int, num) + ), + + TP_fast_assign( + __entry->req = req; + __entry->bid = bid; + __entry->num = num; + ), + + TP_printk("QUEUE: req=%p tag=%d bid=%d sgs=%d", + __entry->req, __entry->req->tag, __entry->bid, __entry->num) +); + +#endif /* _TRACE_VIRTIO_BLK_H */ + +/* This part must be outside protection */ +#include -- Gitee From 224b0f07d3a09456c4f8f9983f2273952af82bda Mon Sep 17 00:00:00 2001 From: Ferry Meng Date: Fri, 28 Mar 2025 01:40:13 +0800 Subject: [PATCH 16/17] anolis: virtio_ring: add split queue seq_printf interface ANBZ: #19447 Provide ability for split virtqueue to show descriptors condition. Pay attention to isolating concurrent access before call this function. Signed-off-by: Ferry Meng --- drivers/virtio/virtio_ring.c | 98 ++++++++++++++++++++++++++++++++++++ include/linux/virtio.h | 1 + 2 files changed, 99 insertions(+) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index b2230aac9838..c4833d0fc994 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -14,6 +14,7 @@ #include #include #include +#include static bool vring_force_dma_api; @@ -3461,4 +3462,101 @@ void virtqueue_dma_sync_single_range_for_device(struct virtqueue *_vq, } EXPORT_SYMBOL_GPL(virtqueue_dma_sync_single_range_for_device); +/** + * virtqueue_show_split_message - print split queue structure + * @_vq: the struct virtqueue we're talking about. + * @s: the struct seq_file + * Before calling this function, get lock to confirm that + * the virtqueue is not in use. + */ +void virtqueue_show_split_message(struct virtqueue *_vq, struct seq_file *s) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + struct vring_virtqueue_split *split = &vq->split; + u16 last_used_idx, used_idx, idx, idx_in_used_ring, flags; + struct vring_desc *desc; + int len, i; + + last_used_idx = vq->last_used_idx; + used_idx = virtio16_to_cpu(vq->vq.vdev, split->vring.used->idx); + + seq_printf(s, "Virtqueue %d (0x%px): num %d\n", _vq->index, + vq, split->vring.num); + seq_printf(s, "Descriptor Table: num_free %d, free_head %d\n", + _vq->num_free, vq->free_head); + seq_printf(s, "Available Ring: flags 0x%x, avail_idx %d\n", + split->avail_flags_shadow, split->vring.avail->idx); + seq_printf(s, "Used Ring: used %d, last_used_index %d\n", + used_idx, last_used_idx); + + if (last_used_idx == used_idx) + goto out; + + seq_puts(s, "---------- ---------------- -------\n"); + seq_puts(s, "USED_INDEX DESC_TABLE_INDEX DRVDATA\n"); + while (last_used_idx != used_idx) { + idx = last_used_idx & (split->vring.num - 1); + idx_in_used_ring = virtio32_to_cpu(vq->vq.vdev, + split->vring.used->ring[idx].id); + + seq_printf(s, "%10d %16d 0x%px\n", idx, idx_in_used_ring, + split->desc_state[idx_in_used_ring].data); + last_used_idx++; + } + seq_puts(s, "---------- ---------------- -------\n"); + last_used_idx = vq->last_used_idx; + while (last_used_idx != used_idx) { + idx = last_used_idx & (split->vring.num - 1); + idx_in_used_ring = virtio32_to_cpu(vq->vq.vdev, + split->vring.used->ring[idx].id); + + if (!vq->indirect) { + seq_printf(s, "Direct desc[%d]\n", idx_in_used_ring); + i = idx_in_used_ring; + do { + desc = &split->vring.desc[i]; + flags = virtio16_to_cpu(vq->vq.vdev, desc->flags); + + seq_printf(s, " desc[%d] ", i); + seq_printf(s, "dma_addr=0x%-16llx ", + virtio64_to_cpu(vq->vq.vdev, desc->addr)); + seq_printf(s, "flags=0x%-4x ", flags); + seq_printf(s, "len=%-8d ", + virtio32_to_cpu(vq->vq.vdev, desc->len)); + seq_printf(s, "next=%-4d\n", + virtio16_to_cpu(vq->vq.vdev, desc->next)); + i = desc->next; + } while (flags & VRING_DESC_F_NEXT); + } else { + desc = &split->vring.desc[idx_in_used_ring]; + len = split->desc_extra[idx_in_used_ring].len; + seq_printf(s, "P{0x%px} desc[%d]", desc, idx_in_used_ring); + seq_printf(s, "dma_addr=0x%-16llx len=%-8d\n", + virtio64_to_cpu(vq->vq.vdev, desc[i].addr), + virtio32_to_cpu(vq->vq.vdev, desc[i].len)); + + /* print indir_descs */ + desc = split->desc_state[idx_in_used_ring].indir_desc; + for (i = 0; i < len / sizeof(struct vring_desc); i++) { + seq_printf(s, " indir_desc[%d] ", i); + seq_printf(s, "dma_addr=0x%-16llx ", + virtio64_to_cpu(vq->vq.vdev, desc[i].addr)); + seq_printf(s, "flags=0x%-4x ", + virtio16_to_cpu(vq->vq.vdev, desc[i].flags)); + seq_printf(s, "len=%-8d ", + virtio32_to_cpu(vq->vq.vdev, desc[i].len)); + seq_printf(s, "next=%-4d\n", + virtio16_to_cpu(vq->vq.vdev, desc[i].next)); + } + } + last_used_idx++; + } + +out: + seq_puts(s, "=======================================\n"); + return; + +} +EXPORT_SYMBOL_GPL(virtqueue_show_split_message); + MODULE_LICENSE("GPL"); diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 082527593f3d..24f9983ce0fa 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -242,4 +242,5 @@ void virtqueue_dma_sync_single_range_for_cpu(struct virtqueue *_vq, dma_addr_t a void virtqueue_dma_sync_single_range_for_device(struct virtqueue *_vq, dma_addr_t addr, unsigned long offset, size_t size, enum dma_data_direction dir); +void virtqueue_show_split_message(struct virtqueue *_vq, struct seq_file *s); #endif /* _LINUX_VIRTIO_H */ -- Gitee From 74b55207c6be9356cc2bd09665e431f7f5168d25 Mon Sep 17 00:00:00 2001 From: Ferry Meng Date: Fri, 28 Mar 2025 01:43:26 +0800 Subject: [PATCH 17/17] anolis: virtio-blk: add debugfs interface for ring_pair ANBZ: #19447 Providing debugfs interface for virtio-blk ring pair feature. Path is /sys/kernel/debug/block/vdX/insight/[rpair|virtqueue] For virtqueue, it shows desc info of every request stay in virtqueue. For rpair, we can see every unfinished I/O's indirect_desc, dma_addr and dma_len. Signed-off-by: Ferry Meng --- drivers/block/virtio_blk.c | 107 +++++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 36eea97200ee..0f5672aee249 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -20,6 +20,7 @@ #include #include #include +#include #ifdef CONFIG_VIRTIO_BLK_RING_PAIR #include "../virtio/virtio_pci_common.h" #include @@ -149,6 +150,11 @@ struct virtio_blk { /* saved indirect desc pointer, dma_addr and dma_len for SQ */ struct virtblk_indir_desc **indir_desc; #endif + +#ifdef CONFIG_DEBUG_FS + struct dentry *dbg_dir; +#endif + }; struct virtblk_req { @@ -2112,6 +2118,105 @@ static int virtblk_cdev_add(struct virtio_blk *vblk) return ret; } +#ifdef CONFIG_DEBUG_FS +static int virtblk_dbg_virtqueues_show(struct seq_file *s, void *unused) +{ + struct virtio_blk *vblk = s->private; + unsigned long flags; + int i; + + for (i = 0; i < vblk->num_vqs; i++) { + spin_lock_irqsave(&vblk->vqs[i].lock, flags); + virtqueue_show_split_message(vblk->vqs[i].vq, s); + spin_unlock_irqrestore(&vblk->vqs[i].lock, flags); + } + return 0; +} + +static int virtblk_dbg_virtqueues_open(struct inode *inode, struct file *file) +{ + return single_open(file, virtblk_dbg_virtqueues_show, inode->i_private); +} + +static const struct file_operations virtblk_dbg_virtqueue_ops = { + .open = virtblk_dbg_virtqueues_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +#ifdef CONFIG_VIRTIO_BLK_RING_PAIR +static int virtblk_dbg_rqs_show(struct seq_file *s, void *unused) +{ + struct virtio_blk *vblk = s->private; + struct virtblk_indir_desc *indir_desc; + int i, j; + + seq_printf(s, "ring_pair is %d\n", vblk->ring_pair); + if (!vblk->ring_pair) + return 0; + + for (i = 0; i < vblk->num_vqs / VIRTBLK_RING_NUM; i++) { + for (j = 0; j < vblk->tag_set.queue_depth; j++) { + indir_desc = &vblk->indir_desc[i][j]; + if (indir_desc->desc) { + seq_printf(s, "hctx %d, tag %d, desc 0x%px, ", + i / VIRTBLK_RING_NUM, j, + indir_desc->desc); + seq_printf(s, "dma_addr 0x%llx, len 0x%x\n", + indir_desc->dma_addr, indir_desc->len); + } + } + } + + return 0; +} + +static int virtblk_dbg_rqs_open(struct inode *inode, struct file *file) +{ + return single_open(file, virtblk_dbg_rqs_show, inode->i_private); +} + +static const struct file_operations virtblk_dbg_rqs_ops = { + .open = virtblk_dbg_rqs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; +#endif + +static int virtio_blk_dev_dbg_init(struct virtio_blk *vblk) +{ + struct dentry *dir, *parent_block_dir; + + parent_block_dir = vblk->disk->queue->debugfs_dir; + if (!parent_block_dir) + return -EIO; + + dir = debugfs_create_dir("insight", parent_block_dir); + if (IS_ERR(dir)) { + dev_err(&vblk->vdev->dev, "Failed to get debugfs dir for '%s'\n", + vblk->disk->disk_name); + return -EIO; + } + + debugfs_create_file("virtqueues", 0444, dir, vblk, &virtblk_dbg_virtqueue_ops); +#ifdef CONFIG_VIRTIO_BLK_RING_PAIR + debugfs_create_file("rpair", 0444, dir, vblk, &virtblk_dbg_rqs_ops); +#endif + vblk->dbg_dir = dir; + return 0; +} + +static void virtblk_dev_dbg_close(struct virtio_blk *vblk) +{ + debugfs_remove_recursive(vblk->dbg_dir); +} +#else +static int virtblk_dev_dbg_init(struct virtio_blk *vblk) { return 0; } +static void virtblk_dev_dbg_close(struct virtio_blk *vblk) { } +#endif + static unsigned int virtblk_queue_depth; module_param_named(queue_depth, virtblk_queue_depth, uint, 0444); @@ -2391,6 +2496,7 @@ static int virtblk_probe(struct virtio_device *vdev) #else device_add_disk(&vdev->dev, vblk->disk, virtblk_attr_groups); #endif + virtio_blk_dev_dbg_init(vblk); WARN_ON(virtblk_cdev_add(vblk)); return 0; @@ -2420,6 +2526,7 @@ static void virtblk_remove(struct virtio_device *vdev) { struct virtio_blk *vblk = vdev->priv; + virtblk_dev_dbg_close(vblk); /* Make sure no work handler is accessing the device. */ flush_work(&vblk->config_work); -- Gitee