During high-frequency packet transmission, if vhost immediately notifies virtio driver after reading the virtio_queue empty, virtio driver will perform a kick after sending the next packet. However, if vhost waits a little longer, it could get the next packet sent by virtio driver without the need for virtio to perform a kick.
This patch optimizes for this issue. If the TX interval recently recorded by vhost-net is within 50us, it is considered a high-frequency packet sending. At this time, after detecting the virtio_queue is empty, vhost-net will wait for new packets to arrive before notifing the virtio driver.
Xu Kuohai (2): vhost_net: Suppress kick ratio when high frequency TX detected vhost_net: Enable vhost net polling for openeuler arm64 and x86
arch/arm64/configs/openeuler_defconfig | 1 + arch/x86/configs/openeuler_defconfig | 1 + drivers/vhost/Kconfig | 9 ++++++ drivers/vhost/net.c | 45 ++++++++++++++++++++++++++ 4 files changed, 56 insertions(+)
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/1096 邮件列表地址: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/thread/IQ...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/1096 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/thread/IQ...
hulk inclusion category: feature bugzilla: NA CVE: N/A
----------------------------------------------------
During high-frequency packet transmission, if vhost immediately notifies virtio driver after reading the virtio_queue empty, virtio driver will perform a kick after sending the next packet. However, if vhost waits a little longer, it could get the next packet sent by virtio driver without the need for virtio to perform a kick.
This patch optimizes for this issue. If the TX interval recently recorded by vhost-net is within 50us, it is considered a high-frequency packet sending. At this time, after detecting the virtio_queue is empty, vhost-net will wait for new packets to arrive before notifing the virtio driver.
Signed-off-by: Xu Kuohai xukuohai@huawei.com --- drivers/vhost/Kconfig | 9 +++++++++ drivers/vhost/net.c | 45 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+)
diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig index 587fbae06182..2bbf42c914c4 100644 --- a/drivers/vhost/Kconfig +++ b/drivers/vhost/Kconfig @@ -38,6 +38,15 @@ config VHOST_NET To compile this driver as a module, choose M here: the module will be called vhost_net.
+config VHOST_NET_HIGH_FREQ_TX_INTERVAL_THRESHOLD + int "Initial value for vhost net high frequency tx interval threshold (unit is ns)" + depends on VHOST_NET + default 0 + help + vhost-net enters polling mode if the observed continuous TX + interval is less than a threshold, which is initialized by + this value. + config VHOST_SCSI tristate "VHOST_SCSI TCM fabric driver" depends on TARGET_CORE && EVENTFD diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index a6a1a01319d8..091915319bc3 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -40,6 +40,16 @@ module_param(experimental_zcopytx, int, 0444); MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;" " 1 -Enable; 0 - Disable");
+static unsigned int high_freq_txi_threshold = + (unsigned int)CONFIG_VHOST_NET_HIGH_FREQ_TX_INTERVAL_THRESHOLD; +module_param(high_freq_txi_threshold, uint, 0644); +MODULE_PARM_DESC(high_freq_txi_threshold, + "vhost-net will enter polling mode " + "if the observed continuous TX interval " + "is less than this value. " + "The unit is nanosecond, and the default value is " + __stringify(CONFIG_VHOST_NET_HIGH_FREQ_TX_INTERVAL_THRESHOLD)); + /* Max number of bytes transferred before requeueing the job. * Using this limit prevents one virtqueue from starving others. */ #define VHOST_NET_WEIGHT 0x80000 @@ -126,6 +136,8 @@ struct vhost_net_virtqueue { struct vhost_net_buf rxq; /* Batched XDP buffs */ struct xdp_buff *xdp; + u64 tx_time; + u64 tx_interval; };
struct vhost_net { @@ -311,6 +323,8 @@ static void vhost_net_vq_reset(struct vhost_net *n) n->vqs[i].ubufs = NULL; n->vqs[i].vhost_hlen = 0; n->vqs[i].sock_hlen = 0; + n->vqs[i].tx_time = 0; + n->vqs[i].tx_interval = 1000000; /* 1ms */ vhost_net_buf_init(&n->vqs[i].rxq); }
@@ -456,6 +470,21 @@ static void vhost_net_signal_used(struct vhost_net_virtqueue *nvq) nvq->done_idx = 0; }
+static void vhost_update_tx_interval(struct vhost_net_virtqueue *nvq) +{ + u64 time = ktime_get_mono_fast_ns(); + + if (likely(nvq->tx_time != 0)) { + u64 x = nvq->tx_interval; + u64 y = time - nvq->tx_time; + + /* tx_interval = 0.25 * old_interval + 0.75 * new_interval */ + nvq->tx_interval = (x >> 2) + (y - (y >> 2)); + } + + nvq->tx_time = time; +} + static void vhost_tx_batch(struct vhost_net *net, struct vhost_net_virtqueue *nvq, struct socket *sock, @@ -489,6 +518,7 @@ static void vhost_tx_batch(struct vhost_net *net, }
signal_used: + vhost_update_tx_interval(nvq); vhost_net_signal_used(nvq); nvq->batched_xdp = 0; } @@ -782,6 +812,7 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock) int err; int sent_pkts = 0; bool sock_can_batch = (sock->sk->sk_sndbuf == INT_MAX); + int last_done_idx = 0;
do { bool busyloop_intr = false; @@ -798,6 +829,20 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock) if (head == vq->num) { if (unlikely(busyloop_intr)) { vhost_poll_queue(&vq->poll); + } else if (nvq->tx_interval < high_freq_txi_threshold && + ktime_get_mono_fast_ns() - nvq->tx_time < + high_freq_txi_threshold) { + + /* Avoid deadlock due to vhost silly polling */ + if (nvq->done_idx >= vq->num / 2) + vhost_tx_batch(net, nvq, sock, &msg); + + /* Update TX interval if we get some packets */ + if (last_done_idx < nvq->done_idx) + vhost_update_tx_interval(nvq); + + last_done_idx = nvq->done_idx; + continue; } else if (unlikely(vhost_enable_notify(&net->dev, vq))) { vhost_disable_notify(&net->dev, vq);
hulk inclusion category: feature bugzilla: NA CVE: N/A
----------------------------------------------------
Enable and set the default vhost net TX high frequency threshold to 50us for openeuler.
Signed-off-by: Xu Kuohai xukuohai@huawei.com --- arch/arm64/configs/openeuler_defconfig | 1 + arch/x86/configs/openeuler_defconfig | 1 + 2 files changed, 2 insertions(+)
diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 8261f11b54fd..92436a334217 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -5832,6 +5832,7 @@ CONFIG_VHOST_IOTLB=m CONFIG_VHOST=m CONFIG_VHOST_MENU=y CONFIG_VHOST_NET=m +CONFIG_VHOST_NET_HIGH_FREQ_TX_INTERVAL_THRESHOLD=50000 CONFIG_VHOST_SCSI=m CONFIG_VHOST_VSOCK=m # CONFIG_VHOST_CROSS_ENDIAN_LEGACY is not set diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index ff4475ef1822..4e550e3d77fa 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -6481,6 +6481,7 @@ CONFIG_VHOST_IOTLB=m CONFIG_VHOST=m CONFIG_VHOST_MENU=y CONFIG_VHOST_NET=m +CONFIG_VHOST_NET_HIGH_FREQ_TX_INTERVAL_THRESHOLD=50000 # CONFIG_VHOST_SCSI is not set CONFIG_VHOST_VSOCK=m # CONFIG_VHOST_CROSS_ENDIAN_LEGACY is not set