From: Chengwen Feng fengchengwen@huawei.com
The Tx performance deteriorates in the case of larger packets size and larger burst. It may take a long time to optimize in these scenarios, so this commit reverts commit 0b77e8f3d364 ("net/hns3: optimize Tx performance")
Fixes: 0b77e8f3d364 ("net/hns3: optimize Tx performance") Cc: stable@dpdk.org
Signed-off-by: Chengwen Feng fengchengwen@huawei.com Signed-off-by: Dongdong Liu liudongdong3@huawei.com --- drivers/net/hns3/hns3_rxtx.c | 115 ++++++++++++++++++----------------- 1 file changed, 60 insertions(+), 55 deletions(-)
diff --git a/drivers/net/hns3/hns3_rxtx.c b/drivers/net/hns3/hns3_rxtx.c index 93cc70477d..21c3ef72b1 100644 --- a/drivers/net/hns3/hns3_rxtx.c +++ b/drivers/net/hns3/hns3_rxtx.c @@ -3075,51 +3075,40 @@ hns3_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc, return 0; }
-static int +static void hns3_tx_free_useless_buffer(struct hns3_tx_queue *txq) { uint16_t tx_next_clean = txq->next_to_clean; - uint16_t tx_next_use = txq->next_to_use; - struct hns3_entry *tx_entry = &txq->sw_ring[tx_next_clean]; + uint16_t tx_next_use = txq->next_to_use; + uint16_t tx_bd_ready = txq->tx_bd_ready; + uint16_t tx_bd_max = txq->nb_tx_desc; + struct hns3_entry *tx_bak_pkt = &txq->sw_ring[tx_next_clean]; struct hns3_desc *desc = &txq->tx_ring[tx_next_clean]; - uint16_t i; - - if (tx_next_use >= tx_next_clean && - tx_next_use < tx_next_clean + txq->tx_rs_thresh) - return -1; + struct rte_mbuf *mbuf;
- /* - * All mbufs can be released only when the VLD bits of all - * descriptors in a batch are cleared. - */ - for (i = 0; i < txq->tx_rs_thresh; i++) { - if (desc[i].tx.tp_fe_sc_vld_ra_ri & - rte_le_to_cpu_16(BIT(HNS3_TXD_VLD_B))) - return -1; - } + while ((!(desc->tx.tp_fe_sc_vld_ra_ri & + rte_cpu_to_le_16(BIT(HNS3_TXD_VLD_B)))) && + tx_next_use != tx_next_clean) { + mbuf = tx_bak_pkt->mbuf; + if (mbuf) { + rte_pktmbuf_free_seg(mbuf); + tx_bak_pkt->mbuf = NULL; + }
- for (i = 0; i < txq->tx_rs_thresh; i++) { - rte_pktmbuf_free_seg(tx_entry[i].mbuf); - tx_entry[i].mbuf = NULL; + desc++; + tx_bak_pkt++; + tx_next_clean++; + tx_bd_ready++; + + if (tx_next_clean >= tx_bd_max) { + tx_next_clean = 0; + desc = txq->tx_ring; + tx_bak_pkt = txq->sw_ring; + } }
- /* Update numbers of available descriptor due to buffer freed */ - txq->tx_bd_ready += txq->tx_rs_thresh; - txq->next_to_clean += txq->tx_rs_thresh; - if (txq->next_to_clean >= txq->nb_tx_desc) - txq->next_to_clean = 0; - - return 0; -} - -static inline int -hns3_tx_free_required_buffer(struct hns3_tx_queue *txq, uint16_t required_bds) -{ - while (required_bds > txq->tx_bd_ready) { - if (hns3_tx_free_useless_buffer(txq) != 0) - return -1; - } - return 0; + txq->next_to_clean = tx_next_clean; + txq->tx_bd_ready = tx_bd_ready; }
int @@ -4162,8 +4151,7 @@ hns3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) uint16_t nb_tx; uint16_t i;
- if (txq->tx_bd_ready < txq->tx_free_thresh) - (void)hns3_tx_free_useless_buffer(txq); + hns3_tx_free_useless_buffer(txq);
tx_next_use = txq->next_to_use; tx_bd_max = txq->nb_tx_desc; @@ -4178,14 +4166,10 @@ hns3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) nb_buf = tx_pkt->nb_segs;
if (nb_buf > txq->tx_bd_ready) { - /* Try to release the required MBUF, but avoid releasing - * all MBUFs, otherwise, the MBUFs will be released for - * a long time and may cause jitter. - */ - if (hns3_tx_free_required_buffer(txq, nb_buf) != 0) { - txq->dfx_stats.queue_full_cnt++; - goto end_of_tx; - } + txq->dfx_stats.queue_full_cnt++; + if (nb_tx == 0) + return 0; + goto end_of_tx; }
/* @@ -4609,22 +4593,43 @@ hns3_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id) static int hns3_tx_done_cleanup_full(struct hns3_tx_queue *txq, uint32_t free_cnt) { - uint16_t round_cnt; + uint16_t next_to_clean = txq->next_to_clean; + uint16_t next_to_use = txq->next_to_use; + uint16_t tx_bd_ready = txq->tx_bd_ready; + struct hns3_entry *tx_pkt = &txq->sw_ring[next_to_clean]; + struct hns3_desc *desc = &txq->tx_ring[next_to_clean]; uint32_t idx;
if (free_cnt == 0 || free_cnt > txq->nb_tx_desc) free_cnt = txq->nb_tx_desc;
- if (txq->tx_rs_thresh == 0) - return 0; - - round_cnt = rounddown(free_cnt, txq->tx_rs_thresh); - for (idx = 0; idx < round_cnt; idx += txq->tx_rs_thresh) { - if (hns3_tx_free_useless_buffer(txq) != 0) + for (idx = 0; idx < free_cnt; idx++) { + if (next_to_clean == next_to_use) + break; + if (desc->tx.tp_fe_sc_vld_ra_ri & + rte_cpu_to_le_16(BIT(HNS3_TXD_VLD_B))) break; + if (tx_pkt->mbuf != NULL) { + rte_pktmbuf_free_seg(tx_pkt->mbuf); + tx_pkt->mbuf = NULL; + } + next_to_clean++; + tx_bd_ready++; + tx_pkt++; + desc++; + if (next_to_clean == txq->nb_tx_desc) { + tx_pkt = txq->sw_ring; + desc = txq->tx_ring; + next_to_clean = 0; + } + } + + if (idx > 0) { + txq->next_to_clean = next_to_clean; + txq->tx_bd_ready = tx_bd_ready; }
- return idx; + return (int)idx; }
int