From: Yunsheng Lin linyunsheng@huawei.com
mainline inclusion from mainline-v5.10-rc1 commit 20d06ca2679cb1810135c70be340e5477c83a808 category: feature bugzilla: NA CVE: NA
----------------------------
Currently HNS3_RING_TX_RING_HEAD_REG register is read to determine how many tx desc can be cleaned. To avoid the register read operation in the critical data path, use the valid bit in the tx desc to determine if a specific tx desc can be cleaned.
The hns3 driver sets valid bit in the tx desc before ringing a doorbell to the hw, and hw will only clear the valid bit of the tx desc after corresponding packet is sent out to the wire. And because next_to_use for tx ring is a changing variable when the driver is filling the tx desc, so reuse the pull_len for rx ring to record the tx desc that has notified to the hw, so that hns3_nic_reclaim_desc() can decide how many tx desc's valid bit need checking when reclaiming tx desc.
And io_err_cnt stat is also removed for it is not used anymore.
Signed-off-by: Yunsheng Lin linyunsheng@huawei.com Signed-off-by: Huazhong Tan tanhuazhong@huawei.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Yonglong Liu liuyonglong@huawei.com Reviewed-by: li yongxin liyongxin1@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- .../hisilicon/hns3/hns3_cae/hns3_cae_stat.c | 4 -- .../net/ethernet/hisilicon/hns3/hns3_enet.c | 64 +++++++++---------- .../net/ethernet/hisilicon/hns3/hns3_enet.h | 12 ++-- .../ethernet/hisilicon/hns3/hns3_ethtool.c | 2 - 4 files changed, 33 insertions(+), 49 deletions(-)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_cae/hns3_cae_stat.c b/drivers/net/ethernet/hisilicon/hns3/hns3_cae/hns3_cae_stat.c index 9d4d16109bb3b..684d72953c306 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_cae/hns3_cae_stat.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_cae/hns3_cae_stat.c @@ -4,7 +4,6 @@ #include "hns3_cae_stat.h"
const struct ring_stats_name hns3_ring_stats_name[] = { - {"io_err_cnt", IO_ERR_CNT}, {"sw_err_cnt", SW_ERR_CNT}, {"seg_pkt_cnt", SEG_PKT_CNT}, {"tx_pkts", TX_PKTS}, @@ -43,9 +42,6 @@ static int hns3_get_stat_val(struct ring_stats *r_stats, char *val_name, } } switch (stats_name_id) { - case IO_ERR_CNT: - *val = &r_stats->io_err_cnt; - break; case SW_ERR_CNT: *val = &r_stats->sw_err_cnt; break; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index f9783907cf46d..580ea9ef66292 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -1403,6 +1403,7 @@ static void hns3_tx_doorbell(struct hns3_enet_ring *ring, int num,
hnae3_queue_xmit(ring->tqp, ring->pending_buf); ring->pending_buf = 0; + WRITE_ONCE(ring->last_to_use, ring->next_to_use); }
netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev) @@ -1920,10 +1921,9 @@ bool hns3_get_tx_timeo_queue_info(struct net_device *ndev) tx_ring->next_to_clean, napi->state);
netdev_info(ndev, - "tx_pkts: %llu, tx_bytes: %llu, io_err_cnt: %llu, sw_err_cnt: %llu, tx_pending: %d\n", + "tx_pkts: %llu, tx_bytes: %llu, sw_err_cnt: %llu, tx_pending: %d\n", tx_ring->stats.tx_pkts, tx_ring->stats.tx_bytes, - tx_ring->stats.io_err_cnt, tx_ring->stats.sw_err_cnt, - tx_ring->pending_buf); + tx_ring->stats.sw_err_cnt, tx_ring->pending_buf);
netdev_info(ndev, "seg_pkt_cnt: %llu, tx_more: %llu, restart_queue: %llu, tx_busy: %llu\n", @@ -2589,13 +2589,26 @@ static void hns3_reuse_buffer(struct hns3_enet_ring *ring, int i) DMA_FROM_DEVICE); }
-static void hns3_nic_reclaim_desc(struct hns3_enet_ring *ring, int head, +static bool hns3_nic_reclaim_desc(struct hns3_enet_ring *ring, int *bytes, int *pkts) { + /* pair with ring->last_to_use update in hns3_tx_doorbell(), + * smp_store_release() is not used in hns3_tx_doorbell() because + * the doorbell operation already have the needed barrier operation. + */ + int ltu = smp_load_acquire(&ring->last_to_use); int ntc = ring->next_to_clean; struct hns3_desc_cb *desc_cb; + bool reclaimed = false; + struct hns3_desc *desc; + + while (ltu != ntc) { + desc = &ring->desc[ntc]; + + if (le16_to_cpu(desc->tx.bdtp_fe_sc_vld_ra_ri) & + BIT(HNS3_TXD_VLD_B)) + break;
- while (head != ntc) { desc_cb = &ring->desc_cb[ntc]; (*pkts) += (desc_cb->type == DESC_TYPE_SKB); (*bytes) += desc_cb->length; @@ -2607,23 +2620,17 @@ static void hns3_nic_reclaim_desc(struct hns3_enet_ring *ring, int head,
/* Issue prefetch for next Tx descriptor */ prefetch(&ring->desc_cb[ntc]); + reclaimed = true; }
+ if (unlikely(!reclaimed)) + return false; + /* This smp_store_release() pairs with smp_load_acquire() in * ring_space called by hns3_nic_net_xmit. */ smp_store_release(&ring->next_to_clean, ntc); -} - -static int is_valid_clean_head(struct hns3_enet_ring *ring, int h) -{ - int u = ring->next_to_use; - int c = ring->next_to_clean; - - if (unlikely(h > ring->desc_num)) - return 0; - - return u > c ? (h > c && h <= u) : (h > c || h <= u); + return true; }
void hns3_clean_tx_ring(struct hns3_enet_ring *ring) @@ -2632,28 +2639,12 @@ void hns3_clean_tx_ring(struct hns3_enet_ring *ring) struct hns3_nic_priv *priv = netdev_priv(netdev); struct netdev_queue *dev_queue; int bytes, pkts; - int head; - - head = readl_relaxed(ring->tqp->io_base + HNS3_RING_TX_RING_HEAD_REG); - - if (is_ring_empty(ring) || head == ring->next_to_clean) - return; /* no data to poll */ - - rmb(); /* Make sure head is ready before touch any data */ - - if (unlikely(!is_valid_clean_head(ring, head))) { - hns3_rl_err(netdev, "wrong head (%d, %d-%d)\n", head, - ring->next_to_use, ring->next_to_clean); - - u64_stats_update_begin(&ring->syncp); - ring->stats.io_err_cnt++; - u64_stats_update_end(&ring->syncp); - return; - }
bytes = 0; pkts = 0; - hns3_nic_reclaim_desc(ring, head, &bytes, &pkts); + + if (unlikely(!hns3_nic_reclaim_desc(ring, &bytes, &pkts))) + return;
ring->tqp_vector->tx_group.total_bytes += bytes; ring->tqp_vector->tx_group.total_packets += pkts; @@ -3818,6 +3809,7 @@ static void hns3_ring_get_cfg(struct hnae3_queue *q, struct hns3_nic_priv *priv, ring->desc_num = desc_num; ring->next_to_use = 0; ring->next_to_clean = 0; + ring->last_to_use = 0; }
static void hns3_queue_to_ring(struct hnae3_queue *tqp, @@ -3897,6 +3889,7 @@ void hns3_fini_ring(struct hns3_enet_ring *ring) ring->desc_cb = NULL; ring->next_to_clean = 0; ring->next_to_use = 0; + ring->last_to_use = 0; ring->pending_buf = 0; if (ring->skb) { dev_kfree_skb_any(ring->skb); @@ -4415,6 +4408,7 @@ int hns3_nic_reset_all_ring(struct hnae3_handle *h) hns3_clear_tx_ring(&priv->ring[i]); priv->ring[i].next_to_clean = 0; priv->ring[i].next_to_use = 0; + priv->ring[i].last_to_use = 0;
rx_ring = &priv->ring[i + h->kinfo.num_tqps]; hns3_init_ring_hw(rx_ring); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h index 90b991962fb99..d98cf6085c7d7 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h @@ -370,7 +370,6 @@ enum hns3_pkt_ol4type { };
struct ring_stats { - u64 io_err_cnt; u64 sw_err_cnt; u64 seg_pkt_cnt; union { @@ -424,8 +423,10 @@ struct hns3_enet_ring { * next_to_use */ int next_to_clean; - - u32 pull_len; /* head length for current packet */ + union { + int last_to_use; /* last idx used by xmit */ + u32 pull_len; /* memcpy len for current rx packet */ + }; u32 frag_num; void *va; /* first buffer address for current packet */
@@ -562,11 +563,6 @@ static inline int ring_space(struct hns3_enet_ring *ring) (begin - end)) - 1; }
-static inline int is_ring_empty(struct hns3_enet_ring *ring) -{ - return ring->next_to_use == ring->next_to_clean; -} - static inline u32 hns3_read_reg(void __iomem *base, u32 reg) { return readl(base + reg); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c index d6788923e30be..d9154dcfe33ea 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c @@ -31,7 +31,6 @@ struct hns3_sfp_type {
static const struct hns3_stats hns3_txq_stats[] = { /* Tx per-queue statistics */ - HNS3_TQP_STAT("io_err_cnt", io_err_cnt), HNS3_TQP_STAT("dropped", sw_err_cnt), HNS3_TQP_STAT("seg_pkt_cnt", seg_pkt_cnt), HNS3_TQP_STAT("packets", tx_pkts), @@ -50,7 +49,6 @@ static const struct hns3_stats hns3_txq_stats[] = {
static const struct hns3_stats hns3_rxq_stats[] = { /* Rx per-queue statistics */ - HNS3_TQP_STAT("io_err_cnt", io_err_cnt), HNS3_TQP_STAT("dropped", sw_err_cnt), HNS3_TQP_STAT("seg_pkt_cnt", seg_pkt_cnt), HNS3_TQP_STAT("packets", rx_pkts),