diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 1b633c835279ec77a22e65bbbe94603b7c0d1658..06afe72f8e3d876478ac326f9868c228edba13e6 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -5833,6 +5833,8 @@ CONFIG_VHOST_IOTLB=m CONFIG_VHOST=m CONFIG_VHOST_MENU=y CONFIG_VHOST_NET=m +CONFIG_VHOST_NET_HFT_POLLING=y +CONFIG_VHOST_NET_HFT_THRESHOLD=0 CONFIG_VHOST_SCSI=m CONFIG_VHOST_VSOCK=m # CONFIG_VHOST_CROSS_ENDIAN_LEGACY is not set diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index ff4475ef1822e5e6310ab1dddeba66a8c7466f7b..8d546710c6425a498a8fc95581a6159a44345dc5 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -6481,6 +6481,8 @@ CONFIG_VHOST_IOTLB=m CONFIG_VHOST=m CONFIG_VHOST_MENU=y CONFIG_VHOST_NET=m +CONFIG_VHOST_NET_HFT_POLLING=y +CONFIG_VHOST_NET_HFT_THRESHOLD=0 # CONFIG_VHOST_SCSI is not set CONFIG_VHOST_VSOCK=m # CONFIG_VHOST_CROSS_ENDIAN_LEGACY is not set diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig index 587fbae0618213ea5399a2cf0bf7f55cd8ed558b..4890bdd425a4b5b3d735c2c8b2da85368664f67c 100644 --- a/drivers/vhost/Kconfig +++ b/drivers/vhost/Kconfig @@ -38,6 +38,28 @@ config VHOST_NET To compile this driver as a module, choose M here: the module will be called vhost_net. +config VHOST_NET_HFT_POLLING + bool "Enalbe vhost-net polling for high frequency TX" + depends on VHOST_NET + default n + help + Enalbe vhost-net polling for high frequency TX mode. + When enabled, vhost-net enters polling mode if the observed + continuous TX interval is less than a threshold, which is + initialized to CONFIG_VHOST_NET_HFT_THRESHOLD. + + When enabling this option, please set CONFIG_VHOST_NET_HFT_THRESHOLD + to an appriopriate value as well. + +config VHOST_NET_HFT_THRESHOLD + int "Value for vhost net high frequency TX interval threshold (unit is ns)" + depends on VHOST_NET_HFT_POLLING + default 0 + help + vhost-net enters polling mode if the observed continuous TX + interval is less than a threshold, which is initialized by + this value. + config VHOST_SCSI tristate "VHOST_SCSI TCM fabric driver" depends on TARGET_CORE && EVENTFD diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index a6a1a01319d845330ad0f43c16f39b7699365f62..e4c24acde95aab18a6ab0c43db254d03a99adcd6 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -40,6 +40,20 @@ module_param(experimental_zcopytx, int, 0444); MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;" " 1 -Enable; 0 - Disable"); +#ifdef CONFIG_VHOST_NET_HFT_POLLING +static_assert(CONFIG_VHOST_NET_HFT_THRESHOLD >= 0); + +static unsigned int high_freq_txi_threshold = + (unsigned int)CONFIG_VHOST_NET_HFT_THRESHOLD; +module_param(high_freq_txi_threshold, uint, 0644); +MODULE_PARM_DESC(high_freq_txi_threshold, + "vhost-net will enter polling mode " + "if the observed continuous TX interval " + "is less than this value. " + "The unit is nanosecond, and the default value is " + __stringify(CONFIG_VHOST_NET_HFT_THRESHOLD)); +#endif + /* Max number of bytes transferred before requeueing the job. * Using this limit prevents one virtqueue from starving others. */ #define VHOST_NET_WEIGHT 0x80000 @@ -126,6 +140,10 @@ struct vhost_net_virtqueue { struct vhost_net_buf rxq; /* Batched XDP buffs */ struct xdp_buff *xdp; +#ifdef CONFIG_VHOST_NET_HFT_POLLING + u64 tx_time; + u64 tx_interval; +#endif }; struct vhost_net { @@ -311,6 +329,10 @@ static void vhost_net_vq_reset(struct vhost_net *n) n->vqs[i].ubufs = NULL; n->vqs[i].vhost_hlen = 0; n->vqs[i].sock_hlen = 0; +#ifdef CONFIG_VHOST_NET_HFT_POLLING + n->vqs[i].tx_time = 0; + n->vqs[i].tx_interval = 1000000; /* 1ms */ +#endif vhost_net_buf_init(&n->vqs[i].rxq); } @@ -456,6 +478,25 @@ static void vhost_net_signal_used(struct vhost_net_virtqueue *nvq) nvq->done_idx = 0; } +#ifdef CONFIG_VHOST_NET_HFT_POLLING + +static void vhost_update_tx_interval(struct vhost_net_virtqueue *nvq) +{ + u64 time = ktime_get_mono_fast_ns(); + + if (likely(nvq->tx_time != 0)) { + u64 x = nvq->tx_interval; + u64 y = time - nvq->tx_time; + + /* tx_interval = 0.25 * old_interval + 0.75 * new_interval */ + nvq->tx_interval = (x >> 2) + (y - (y >> 2)); + } + + nvq->tx_time = time; +} + +#endif + static void vhost_tx_batch(struct vhost_net *net, struct vhost_net_virtqueue *nvq, struct socket *sock, @@ -489,6 +530,9 @@ static void vhost_tx_batch(struct vhost_net *net, } signal_used: +#ifdef CONFIG_VHOST_NET_HFT_POLLING + vhost_update_tx_interval(nvq); +#endif vhost_net_signal_used(nvq); nvq->batched_xdp = 0; } @@ -783,6 +827,10 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock) int sent_pkts = 0; bool sock_can_batch = (sock->sk->sk_sndbuf == INT_MAX); +#ifdef CONFIG_VHOST_NET_HFT_POLLING + int last_done_idx = 0; +#endif + do { bool busyloop_intr = false; @@ -798,6 +846,24 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock) if (head == vq->num) { if (unlikely(busyloop_intr)) { vhost_poll_queue(&vq->poll); +#ifdef CONFIG_VHOST_NET_HFT_POLLING + } else if (nvq->tx_interval < high_freq_txi_threshold && + ktime_get_mono_fast_ns() - nvq->tx_time < + high_freq_txi_threshold) { + + /* Avoid virtio waiting blindly for a long time + * due to vhost silly polling + */ + if (nvq->done_idx >= vq->num / 2) + vhost_tx_batch(net, nvq, sock, &msg); + + /* Update TX interval if we get some packets */ + if (last_done_idx < nvq->done_idx) + vhost_update_tx_interval(nvq); + + last_done_idx = nvq->done_idx; + continue; +#endif } else if (unlikely(vhost_enable_notify(&net->dev, vq))) { vhost_disable_notify(&net->dev, vq);