During high-frequency packet transmission, if vhost immediately notifies virtio driver after reading the virtio_queue empty, virtio driver will perform a kick after sending the next packet. However, if vhost waits a little longer, it could get the next packet sent by virtio driver without the need for virtio to perform a kick.
This patch optimizes for this issue. If the TX interval recently recorded by vhost-net is within 50us, it is considered a high-frequency packet sending. At this time, after detecting the virtio_queue is empty, vhost-net will wait for new packets to arrive before notifing the virtio driver.
Xu Kuohai (2): vhost_net: Suppress kick ratio when high frequency TX detected openeuler: vhost_net: Enable vhost net polling for openeuler arm64 and x86
arch/arm64/configs/openeuler_defconfig | 2 + arch/x86/configs/openeuler_defconfig | 2 + drivers/vhost/Kconfig | 22 +++++++++ drivers/vhost/net.c | 66 ++++++++++++++++++++++++++ 4 files changed, 92 insertions(+)
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/1123 邮件列表地址: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/thread/4T...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/1123 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/thread/4T...
hulk inclusion category: feature bugzilla: NA CVE: N/A
----------------------------------------------------
During high-frequency packet transmission, if vhost immediately notifies virtio driver after reading the virtio_queue empty, virtio driver will perform a kick after sending the next packet. However, if vhost waits a little longer, it could get the next packet sent by virtio driver without the need for virtio to perform a kick.
This patch optimizes for this issue. If the TX interval recently recorded by vhost-net is within 50us, it is considered a high-frequency packet sending. At this time, after detecting the virtio_queue is empty, vhost-net will wait for new packets to arrive before notifing the virtio driver.
Signed-off-by: Xu Kuohai xukuohai@huawei.com --- drivers/vhost/Kconfig | 22 +++++++++++++++ drivers/vhost/net.c | 66 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+)
diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig index 587fbae06182..4890bdd425a4 100644 --- a/drivers/vhost/Kconfig +++ b/drivers/vhost/Kconfig @@ -38,6 +38,28 @@ config VHOST_NET To compile this driver as a module, choose M here: the module will be called vhost_net.
+config VHOST_NET_HFT_POLLING + bool "Enalbe vhost-net polling for high frequency TX" + depends on VHOST_NET + default n + help + Enalbe vhost-net polling for high frequency TX mode. + When enabled, vhost-net enters polling mode if the observed + continuous TX interval is less than a threshold, which is + initialized to CONFIG_VHOST_NET_HFT_THRESHOLD. + + When enabling this option, please set CONFIG_VHOST_NET_HFT_THRESHOLD + to an appriopriate value as well. + +config VHOST_NET_HFT_THRESHOLD + int "Value for vhost net high frequency TX interval threshold (unit is ns)" + depends on VHOST_NET_HFT_POLLING + default 0 + help + vhost-net enters polling mode if the observed continuous TX + interval is less than a threshold, which is initialized by + this value. + config VHOST_SCSI tristate "VHOST_SCSI TCM fabric driver" depends on TARGET_CORE && EVENTFD diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index a6a1a01319d8..e4c24acde95a 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -40,6 +40,20 @@ module_param(experimental_zcopytx, int, 0444); MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;" " 1 -Enable; 0 - Disable");
+#ifdef CONFIG_VHOST_NET_HFT_POLLING +static_assert(CONFIG_VHOST_NET_HFT_THRESHOLD >= 0); + +static unsigned int high_freq_txi_threshold = + (unsigned int)CONFIG_VHOST_NET_HFT_THRESHOLD; +module_param(high_freq_txi_threshold, uint, 0644); +MODULE_PARM_DESC(high_freq_txi_threshold, + "vhost-net will enter polling mode " + "if the observed continuous TX interval " + "is less than this value. " + "The unit is nanosecond, and the default value is " + __stringify(CONFIG_VHOST_NET_HFT_THRESHOLD)); +#endif + /* Max number of bytes transferred before requeueing the job. * Using this limit prevents one virtqueue from starving others. */ #define VHOST_NET_WEIGHT 0x80000 @@ -126,6 +140,10 @@ struct vhost_net_virtqueue { struct vhost_net_buf rxq; /* Batched XDP buffs */ struct xdp_buff *xdp; +#ifdef CONFIG_VHOST_NET_HFT_POLLING + u64 tx_time; + u64 tx_interval; +#endif };
struct vhost_net { @@ -311,6 +329,10 @@ static void vhost_net_vq_reset(struct vhost_net *n) n->vqs[i].ubufs = NULL; n->vqs[i].vhost_hlen = 0; n->vqs[i].sock_hlen = 0; +#ifdef CONFIG_VHOST_NET_HFT_POLLING + n->vqs[i].tx_time = 0; + n->vqs[i].tx_interval = 1000000; /* 1ms */ +#endif vhost_net_buf_init(&n->vqs[i].rxq); }
@@ -456,6 +478,25 @@ static void vhost_net_signal_used(struct vhost_net_virtqueue *nvq) nvq->done_idx = 0; }
+#ifdef CONFIG_VHOST_NET_HFT_POLLING + +static void vhost_update_tx_interval(struct vhost_net_virtqueue *nvq) +{ + u64 time = ktime_get_mono_fast_ns(); + + if (likely(nvq->tx_time != 0)) { + u64 x = nvq->tx_interval; + u64 y = time - nvq->tx_time; + + /* tx_interval = 0.25 * old_interval + 0.75 * new_interval */ + nvq->tx_interval = (x >> 2) + (y - (y >> 2)); + } + + nvq->tx_time = time; +} + +#endif + static void vhost_tx_batch(struct vhost_net *net, struct vhost_net_virtqueue *nvq, struct socket *sock, @@ -489,6 +530,9 @@ static void vhost_tx_batch(struct vhost_net *net, }
signal_used: +#ifdef CONFIG_VHOST_NET_HFT_POLLING + vhost_update_tx_interval(nvq); +#endif vhost_net_signal_used(nvq); nvq->batched_xdp = 0; } @@ -783,6 +827,10 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock) int sent_pkts = 0; bool sock_can_batch = (sock->sk->sk_sndbuf == INT_MAX);
+#ifdef CONFIG_VHOST_NET_HFT_POLLING + int last_done_idx = 0; +#endif + do { bool busyloop_intr = false;
@@ -798,6 +846,24 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock) if (head == vq->num) { if (unlikely(busyloop_intr)) { vhost_poll_queue(&vq->poll); +#ifdef CONFIG_VHOST_NET_HFT_POLLING + } else if (nvq->tx_interval < high_freq_txi_threshold && + ktime_get_mono_fast_ns() - nvq->tx_time < + high_freq_txi_threshold) { + + /* Avoid virtio waiting blindly for a long time + * due to vhost silly polling + */ + if (nvq->done_idx >= vq->num / 2) + vhost_tx_batch(net, nvq, sock, &msg); + + /* Update TX interval if we get some packets */ + if (last_done_idx < nvq->done_idx) + vhost_update_tx_interval(nvq); + + last_done_idx = nvq->done_idx; + continue; +#endif } else if (unlikely(vhost_enable_notify(&net->dev, vq))) { vhost_disable_notify(&net->dev, vq);
hulk inclusion category: feature bugzilla: NA CVE: N/A
----------------------------------------------------
Enable vhost net TX high frequency polling for openeuler.
Signed-off-by: Xu Kuohai xukuohai@huawei.com --- arch/arm64/configs/openeuler_defconfig | 2 ++ arch/x86/configs/openeuler_defconfig | 2 ++ 2 files changed, 4 insertions(+)
diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 8261f11b54fd..74bbc0f17590 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -5832,6 +5832,8 @@ CONFIG_VHOST_IOTLB=m CONFIG_VHOST=m CONFIG_VHOST_MENU=y CONFIG_VHOST_NET=m +CONFIG_VHOST_NET_HFT_POLLING=y +CONFIG_VHOST_NET_HFT_THRESHOLD=0 CONFIG_VHOST_SCSI=m CONFIG_VHOST_VSOCK=m # CONFIG_VHOST_CROSS_ENDIAN_LEGACY is not set diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index ff4475ef1822..8d546710c642 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -6481,6 +6481,8 @@ CONFIG_VHOST_IOTLB=m CONFIG_VHOST=m CONFIG_VHOST_MENU=y CONFIG_VHOST_NET=m +CONFIG_VHOST_NET_HFT_POLLING=y +CONFIG_VHOST_NET_HFT_THRESHOLD=0 # CONFIG_VHOST_SCSI is not set CONFIG_VHOST_VSOCK=m # CONFIG_VHOST_CROSS_ENDIAN_LEGACY is not set