From: Juan Zhou zhoujuan51@h-partners.com
1.Support hns HW stats 2.Add dfx cnt stats
Chengchang Tang (2): RDMA/hns: Support hns HW stats RDMA/hns: Add dfx cnt stats
drivers/infiniband/hw/hns/hns_roce_ah.c | 8 +- drivers/infiniband/hw/hns/hns_roce_cmd.c | 17 ++- drivers/infiniband/hw/hns/hns_roce_cq.c | 17 ++- drivers/infiniband/hw/hns/hns_roce_device.h | 50 +++++++ drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 59 ++++++++ drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 1 + drivers/infiniband/hw/hns/hns_roce_main.c | 146 +++++++++++++++++++- drivers/infiniband/hw/hns/hns_roce_mr.c | 22 ++- drivers/infiniband/hw/hns/hns_roce_pd.c | 10 +- drivers/infiniband/hw/hns/hns_roce_qp.c | 16 ++- drivers/infiniband/hw/hns/hns_roce_srq.c | 7 +- 11 files changed, 320 insertions(+), 33 deletions(-)
-- 2.30.0
driver inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I6GSZL
---------------------------------------------------------------
Support query hns HW stats to help debugging several issues.
Signed-off-by: Chengchang Tang tangchengchang@huawei.com Reviewed-by: Yangyang Li liyangyang20@huawei.com --- drivers/infiniband/hw/hns/hns_roce_device.h | 28 ++++++++ drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 51 +++++++++++++ drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 1 + drivers/infiniband/hw/hns/hns_roce_main.c | 79 +++++++++++++++++++++ 4 files changed, 159 insertions(+)
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 69a14b1ca99c..23b2e393fa71 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -914,6 +914,32 @@ enum hns_roce_device_state { HNS_ROCE_DEVICE_STATE_UNINIT, };
+enum hns_roce_hw_pkt_stat_index { + HNS_ROCE_HW_RX_RC_PKT_CNT, + HNS_ROCE_HW_RX_UC_PKT_CNT, + HNS_ROCE_HW_RX_UD_PKT_CNT, + HNS_ROCE_HW_RX_XRC_PKT_CNT, + HNS_ROCE_HW_RX_PKT_CNT, + HNS_ROCE_HW_RX_ERR_PKT_CNT, + HNS_ROCE_HW_RX_CNP_PKT_CNT, + HNS_ROCE_HW_TX_RC_PKT_CNT, + HNS_ROCE_HW_TX_UC_PKT_CNT, + HNS_ROCE_HW_TX_UD_PKT_CNT, + HNS_ROCE_HW_TX_XRC_PKT_CNT, + HNS_ROCE_HW_TX_PKT_CNT, + HNS_ROCE_HW_TX_ERR_PKT_CNT, + HNS_ROCE_HW_TX_CNP_PKT_CNT, + HNS_ROCE_HW_TRP_GET_MPT_ERR_PKT_CNT, + HNS_ROCE_HW_TRP_GET_IRRL_ERR_PKT_CNT, + HNS_ROCE_HW_ECN_DB_CNT, + HNS_ROCE_HW_RX_BUF_CNT, + HNS_ROCE_HW_TRP_RX_SOF_CNT, + HNS_ROCE_HW_CQ_CQE_CNT, + HNS_ROCE_HW_CQ_POE_CNT, + HNS_ROCE_HW_CQ_NOTIFY_CNT, + HNS_ROCE_HW_CNT_TOTAL, +}; + struct hns_roce_hw { int (*cmq_init)(struct hns_roce_dev *hr_dev); void (*cmq_exit)(struct hns_roce_dev *hr_dev); @@ -962,6 +988,8 @@ struct hns_roce_hw { int (*query_mpt)(struct hns_roce_dev *hr_dev, u32 key, void *buffer); int (*get_dscp)(struct hns_roce_dev *hr_dev, u8 dscp, u8 *tc_mode, u8 *priority); + int (*query_hw_counter)(struct hns_roce_dev *hr_dev, + u64 *stats, u32 port, int *hw_counters); const struct ib_device_ops *hns_roce_dev_ops; const struct ib_device_ops *hns_roce_dev_srq_ops; int (*bond_init)(struct hns_roce_dev *hr_dev); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 6e53864938c6..f480709144ca 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1772,6 +1772,56 @@ static int hns_roce_query_func_info(struct hns_roce_dev *hr_dev) return 0; }
+static int hns_roce_hw_v2_query_counter(struct hns_roce_dev *hr_dev, + u64 *stats, u32 port, int *num_counters) +{ +#define CNT_PER_DESC 3 + struct hns_roce_cmq_desc *desc; + int bd_idx, cnt_idx; + __le64 *cnt_data; + int desc_num; + int ret; + int i; + + if (port > hr_dev->caps.num_ports) + return -EINVAL; + + desc_num = DIV_ROUND_UP(HNS_ROCE_HW_CNT_TOTAL, CNT_PER_DESC); + desc = kcalloc(desc_num, sizeof(*desc), GFP_KERNEL); + if (!desc) + return -ENOMEM; + + for (i = 0; i < desc_num; i++) { + hns_roce_cmq_setup_basic_desc(&desc[i], + HNS_ROCE_OPC_QUERY_COUNTER, true); + if (i != desc_num - 1) + desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); + } + + ret = hns_roce_cmq_send(hr_dev, desc, desc_num); + if (ret) { + ibdev_err(&hr_dev->ib_dev, + "failed to get counter, ret = %d.\n", ret); + goto err_out; + } + + for (i = 0; i < HNS_ROCE_HW_CNT_TOTAL && i < *num_counters; i++) { + bd_idx = i / CNT_PER_DESC; + if (!(desc[bd_idx].flag & HNS_ROCE_CMD_FLAG_NEXT) && + bd_idx != HNS_ROCE_HW_CNT_TOTAL / CNT_PER_DESC) + break; + + cnt_data = (__le64 *)&desc[bd_idx].data[0]; + cnt_idx = i % CNT_PER_DESC; + stats[i] = le64_to_cpu(cnt_data[cnt_idx]); + } + *num_counters = i; + +err_out: + kfree(desc); + return ret; +} + static int hns_roce_config_global_param(struct hns_roce_dev *hr_dev) { struct hns_roce_cmq_desc desc; @@ -7140,6 +7190,7 @@ static const struct hns_roce_hw hns_roce_hw_v2 = { .bond_init = hns_roce_bond_init, .bond_is_active = hns_roce_bond_is_active, .get_bond_netdev = hns_roce_get_bond_netdev, + .query_hw_counter = hns_roce_hw_v2_query_counter, };
static const struct pci_device_id hns_roce_hw_v2_pci_tbl[] = { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 7751b3de2ff0..7d5c304a8342 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -230,6 +230,7 @@ enum hns_roce_opcode_type { HNS_ROCE_OPC_QUERY_HW_VER = 0x8000, HNS_ROCE_OPC_CFG_GLOBAL_PARAM = 0x8001, HNS_ROCE_OPC_ALLOC_PF_RES = 0x8004, + HNS_ROCE_OPC_QUERY_COUNTER = 0x8206, HNS_ROCE_OPC_QUERY_PF_RES = 0x8400, HNS_ROCE_OPC_ALLOC_VF_RES = 0x8401, HNS_ROCE_OPC_CFG_EXT_LLM = 0x8403, diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index c7f5dfa9521a..7ef3d133f7d3 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -703,6 +703,83 @@ static void hns_roce_get_fw_ver(struct ib_device *device, char *str) sub_minor); }
+#define HNS_ROCE_HW_CNT(ename, cname) \ + [HNS_ROCE_HW_##ename##_CNT] = cname + +static const char *const hns_roce_port_stats_descs[] = { + HNS_ROCE_HW_CNT(RX_RC_PKT, "rx_rc_pkt"), + HNS_ROCE_HW_CNT(RX_UC_PKT, "rx_uc_pkt"), + HNS_ROCE_HW_CNT(RX_UD_PKT, "rx_ud_pkt"), + HNS_ROCE_HW_CNT(RX_XRC_PKT, "rx_xrc_pkt"), + HNS_ROCE_HW_CNT(RX_PKT, "rx_pkt"), + HNS_ROCE_HW_CNT(RX_ERR_PKT, "rx_err_pkt"), + HNS_ROCE_HW_CNT(RX_CNP_PKT, "rx_cnp_pkt"), + HNS_ROCE_HW_CNT(TX_RC_PKT, "tx_rc_pkt"), + HNS_ROCE_HW_CNT(TX_UC_PKT, "tx_uc_pkt"), + HNS_ROCE_HW_CNT(TX_UD_PKT, "tx_ud_pkt"), + HNS_ROCE_HW_CNT(TX_XRC_PKT, "tx_xrc_pkt"), + HNS_ROCE_HW_CNT(TX_PKT, "tx_pkt"), + HNS_ROCE_HW_CNT(TX_ERR_PKT, "tx_err_pkt"), + HNS_ROCE_HW_CNT(TX_CNP_PKT, "tx_cnp_pkt"), + HNS_ROCE_HW_CNT(TRP_GET_MPT_ERR_PKT, "trp_get_mpt_err_pkt"), + HNS_ROCE_HW_CNT(TRP_GET_IRRL_ERR_PKT, "trp_get_irrl_err_pkt"), + HNS_ROCE_HW_CNT(ECN_DB, "ecn_doorbell"), + HNS_ROCE_HW_CNT(RX_BUF, "rx_buffer"), + HNS_ROCE_HW_CNT(TRP_RX_SOF, "trp_rx_sof"), + HNS_ROCE_HW_CNT(CQ_CQE, "cq_cqe"), + HNS_ROCE_HW_CNT(CQ_POE, "cq_poe"), + HNS_ROCE_HW_CNT(CQ_NOTIFY, "cq_notify"), +}; + +static struct rdma_hw_stats *hns_roce_alloc_hw_port_stats(struct ib_device *device, + u8 port_num) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(device); + u32 port = port_num - 1; + + if (hr_dev->pci_dev->revision <= PCI_REVISION_ID_HIP08 || + hr_dev->is_vf) + return NULL; + + if (port > hr_dev->caps.num_ports) { + ibdev_err(device, "invalid port num.\n"); + return NULL; + } + + return rdma_alloc_hw_stats_struct(hns_roce_port_stats_descs, + ARRAY_SIZE(hns_roce_port_stats_descs), + RDMA_HW_STATS_DEFAULT_LIFESPAN); +} + +static int hns_roce_get_hw_stats(struct ib_device *device, + struct rdma_hw_stats *stats, + u8 port, int index) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(device); + int num_counters = HNS_ROCE_HW_CNT_TOTAL; + int ret; + + if (port == 0) + return 0; + + if (port > hr_dev->caps.num_ports) + return -EINVAL; + + if (hr_dev->pci_dev->revision <= PCI_REVISION_ID_HIP08 || + hr_dev->is_vf) + return -EOPNOTSUPP; + + ret = hr_dev->hw->query_hw_counter(hr_dev, stats->value, port, + &num_counters); + if (ret) { + ibdev_err(device, "failed to query hw counter, ret = %d.\n", + ret); + return ret; + } + + return num_counters; +} + static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev) { struct hns_roce_ib_iboe *iboe = &hr_dev->iboe; @@ -753,6 +830,8 @@ static const struct ib_device_ops hns_roce_dev_ops = { .query_pkey = hns_roce_query_pkey, .query_port = hns_roce_query_port, .reg_user_mr = hns_roce_reg_user_mr, + .alloc_hw_stats = hns_roce_alloc_hw_port_stats, + .get_hw_stats = hns_roce_get_hw_stats,
INIT_RDMA_OBJ_SIZE(ib_ah, hns_roce_ah, ibah), INIT_RDMA_OBJ_SIZE(ib_cq, hns_roce_cq, ib_cq), -- 2.30.0
driver inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I6GSZL
---------------------------------------------------------------
Add more dfx cnt to help diagnosis. And this stats could be got by sysfs or rdmatool.
Signed-off-by: Chengchang Tang tangchengchang@huawei.com Reviewed-by: Yangyang Li liyangyang20@huawei.com --- drivers/infiniband/hw/hns/hns_roce_ah.c | 8 +- drivers/infiniband/hw/hns/hns_roce_cmd.c | 17 +++- drivers/infiniband/hw/hns/hns_roce_cq.c | 17 ++-- drivers/infiniband/hw/hns/hns_roce_device.h | 22 +++++ drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 7 ++ drivers/infiniband/hw/hns/hns_roce_main.c | 94 ++++++++++++++++----- drivers/infiniband/hw/hns/hns_roce_mr.c | 22 +++-- drivers/infiniband/hw/hns/hns_roce_pd.c | 10 ++- drivers/infiniband/hw/hns/hns_roce_qp.c | 16 +++- drivers/infiniband/hw/hns/hns_roce_srq.c | 7 +- 10 files changed, 174 insertions(+), 46 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index 975f58d9b8f0..c92179da38bd 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -85,7 +85,7 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, ret = 0;
if (ret && grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) - return ret; + goto err_out;
if (tc_mode == HNAE3_TC_MAP_MODE_DSCP && grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) @@ -101,7 +101,7 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, ret = rdma_read_gid_l2_fields(ah_attr->grh.sgid_attr, &ah->av.vlan_id, NULL); if (ret) - return ret; + goto err_out;
ah->av.vlan_en = ah->av.vlan_id < VLAN_N_VID; } @@ -113,6 +113,10 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, min(udata->outlen, sizeof(resp))); }
+err_out: + if (ret) + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_AH_CREATE_ERR_CNT]); + return ret; }
diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.c b/drivers/infiniband/hw/hns/hns_roce_cmd.c index 864413607571..a269578c8d89 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cmd.c +++ b/drivers/infiniband/hw/hns/hns_roce_cmd.c @@ -41,7 +41,14 @@ static int hns_roce_cmd_mbox_post_hw(struct hns_roce_dev *hr_dev, struct hns_roce_mbox_msg *mbox_msg) { - return hr_dev->hw->post_mbox(hr_dev, mbox_msg); + int ret; + + ret = hr_dev->hw->post_mbox(hr_dev, mbox_msg); + if (ret) + return ret; + + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MBX_POSTED_CNT]); + return 0; }
/* this should be called with "poll_sem" */ @@ -58,7 +65,12 @@ static int __hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev, return ret; }
- return hr_dev->hw->poll_mbox_done(hr_dev); + ret = hr_dev->hw->poll_mbox_done(hr_dev); + if (ret) + return ret; + + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MBX_POLLED_CNT]); + return 0; }
static int hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev, @@ -89,6 +101,7 @@ void hns_roce_cmd_event(struct hns_roce_dev *hr_dev, u16 token, u8 status, context->result = (status == HNS_ROCE_CMD_SUCCESS) ? 0 : (-EIO); context->out_param = out_param; complete(&context->done); + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MBX_EVENT_CNT]); }
static int __hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 98dcbc40f964..6997c3dbdc96 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -363,17 +363,19 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, struct hns_roce_ib_create_cq ucmd = {}; int ret;
- if (attr->flags) - return -EOPNOTSUPP; + if (attr->flags) { + ret = -EOPNOTSUPP; + goto err_out; + }
ret = verify_cq_create_attr(hr_dev, attr); if (ret) - return ret; + goto err_out;
if (udata) { ret = get_cq_ucmd(hr_cq, udata, &ucmd); if (ret) - return ret; + goto err_out;
}
@@ -381,12 +383,12 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
ret = set_cqe_size(hr_cq, udata, &ucmd); if (ret) - return ret; + goto err_out;
ret = alloc_cq_buf(hr_dev, hr_cq, udata, ucmd.buf_addr); if (ret) { ibdev_err(ibdev, "failed to alloc CQ buf, ret = %d.\n", ret); - return ret; + goto err_out; }
ret = alloc_cq_db(hr_dev, hr_cq, udata, ucmd.db_addr, &resp); @@ -431,6 +433,9 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, free_cq_db(hr_dev, hr_cq, udata); err_cq_buf: free_cq_buf(hr_dev, hr_cq); +err_out: + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CQ_CREATE_ERR_CNT]); + return ret; }
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 23b2e393fa71..869d952b32b2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -940,6 +940,27 @@ enum hns_roce_hw_pkt_stat_index { HNS_ROCE_HW_CNT_TOTAL, };
+enum hns_roce_hw_dfx_stat_index { + HNS_ROCE_DFX_AEQE_CNT, + HNS_ROCE_DFX_CEQE_CNT, + HNS_ROCE_DFX_CMDS_CNT, + HNS_ROCE_DFX_CMDS_ERR_CNT, + HNS_ROCE_DFX_MBX_POSTED_CNT, + HNS_ROCE_DFX_MBX_POLLED_CNT, + HNS_ROCE_DFX_MBX_EVENT_CNT, + HNS_ROCE_DFX_QP_CREATE_ERR_CNT, + HNS_ROCE_DFX_QP_MODIFY_ERR_CNT, + HNS_ROCE_DFX_CQ_CREATE_ERR_CNT, + HNS_ROCE_DFX_SRQ_CREATE_ERR_CNT, + HNS_ROCE_DFX_XRCD_CREATE_ERR_CNT, + HNS_ROCE_DFX_MR_REG_ERR_CNT, + HNS_ROCE_DFX_MR_REREG_ERR_CNT, + HNS_ROCE_DFX_AH_CREATE_ERR_CNT, + HNS_ROCE_DFX_MMAP_ERR_CNT, + HNS_ROCE_DFX_UCTX_ALLOC_ERR_CNT, + HNS_ROCE_DFX_CNT_TOTAL +}; + struct hns_roce_hw { int (*cmq_init)(struct hns_roce_dev *hr_dev); void (*cmq_exit)(struct hns_roce_dev *hr_dev); @@ -1073,6 +1094,7 @@ struct hns_roce_dev { struct delayed_work bond_work; struct hns_roce_bond_group *bond_grp; struct netdev_lag_lower_state_info slave_state; + atomic64_t *dfx_cnt; };
static inline struct hns_roce_dev *to_hr_dev(struct ib_device *ib_dev) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index f480709144ca..a76a57c7e36a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1396,6 +1396,8 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, /* Write to hardware */ roce_write(hr_dev, ROCEE_TX_CMQ_PI_REG, csq->head);
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CMDS_CNT]); + do { if (hns_roce_cmq_csq_done(hr_dev)) break; @@ -1433,6 +1435,9 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
spin_unlock_bh(&csq->lock);
+ if (ret) + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CMDS_ERR_CNT]); + return ret; }
@@ -6515,6 +6520,7 @@ static irqreturn_t hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, eq->sub_type = sub_type; ++eq->cons_index; aeqe_found = IRQ_HANDLED; + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_AEQE_CNT]);
hns_roce_v2_init_irq_work(hr_dev, eq, queue_num);
@@ -6557,6 +6563,7 @@ static irqreturn_t hns_roce_v2_ceq_int(struct hns_roce_dev *hr_dev,
++eq->cons_index; ceqe_found = IRQ_HANDLED; + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CEQE_CNT]);
ceqe = next_ceqe_sw_v2(eq); } diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 7ef3d133f7d3..33914c9994b2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -478,10 +478,10 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, struct hns_roce_dev *hr_dev = to_hr_dev(uctx->device); struct hns_roce_ib_alloc_ucontext_resp resp = {}; struct hns_roce_ib_alloc_ucontext ucmd = {}; - int ret; + int ret = -EAGAIN;
if (!hr_dev->active) - return -EAGAIN; + goto error_fail_uar_alloc;
context->pid = current->pid; INIT_LIST_HEAD(&context->list); @@ -489,7 +489,7 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, ret = ib_copy_from_udata(&ucmd, udata, min(udata->inlen, sizeof(ucmd))); if (ret) - return ret; + goto error_fail_uar_alloc;
if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) context->config = ucmd.config & HNS_ROCE_EXSGE_FLAGS; @@ -559,6 +559,8 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, ida_free(&hr_dev->uar_ida.ida, (int)context->uar.logic_idx);
error_fail_uar_alloc: + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_UCTX_ALLOC_ERR_CNT]); + return ret; }
@@ -621,8 +623,10 @@ static int hns_roce_mmap(struct ib_ucontext *uctx, struct vm_area_struct *vma) int ret;
rdma_entry = rdma_user_mmap_entry_get_pgoff(uctx, vma->vm_pgoff); - if (!rdma_entry) - return -EINVAL; + if (!rdma_entry) { + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MMAP_ERR_CNT]); + return -EINVAL; + }
entry = to_hns_mmap(rdma_entry); pfn = entry->address >> PAGE_SHIFT; @@ -649,6 +653,8 @@ static int hns_roce_mmap(struct ib_ucontext *uctx, struct vm_area_struct *vma)
out: rdma_user_mmap_entry_put(rdma_entry); + if (ret) + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MMAP_ERR_CNT]);
return ret; } @@ -703,10 +709,30 @@ static void hns_roce_get_fw_ver(struct ib_device *device, char *str) sub_minor); }
+#define HNS_ROCE_DFX_STATS(ename, cname) \ + [HNS_ROCE_DFX_##ename##_CNT] = cname + #define HNS_ROCE_HW_CNT(ename, cname) \ - [HNS_ROCE_HW_##ename##_CNT] = cname + [HNS_ROCE_DFX_CNT_TOTAL + HNS_ROCE_HW_##ename##_CNT] = cname
static const char *const hns_roce_port_stats_descs[] = { + HNS_ROCE_DFX_STATS(AEQE, "aeqe"), + HNS_ROCE_DFX_STATS(CEQE, "ceqe"), + HNS_ROCE_DFX_STATS(CMDS, "cmds"), + HNS_ROCE_DFX_STATS(CMDS_ERR, "cmds_err"), + HNS_ROCE_DFX_STATS(MBX_POSTED, "posted_mbx"), + HNS_ROCE_DFX_STATS(MBX_POLLED, "polled_mbx"), + HNS_ROCE_DFX_STATS(MBX_EVENT, "mbx_event"), + HNS_ROCE_DFX_STATS(QP_CREATE_ERR, "qp_create_err"), + HNS_ROCE_DFX_STATS(QP_MODIFY_ERR, "qp_modify_err"), + HNS_ROCE_DFX_STATS(CQ_CREATE_ERR, "cq_create_err"), + HNS_ROCE_DFX_STATS(SRQ_CREATE_ERR, "srq_create_err"), + HNS_ROCE_DFX_STATS(XRCD_CREATE_ERR, "xrcd_create_err"), + HNS_ROCE_DFX_STATS(MR_REG_ERR, "mr_reg_err"), + HNS_ROCE_DFX_STATS(MR_REREG_ERR, "mr_rereg_err"), + HNS_ROCE_DFX_STATS(AH_CREATE_ERR, "ah_create_err"), + HNS_ROCE_DFX_STATS(MMAP_ERR, "mmap_err"), + HNS_ROCE_DFX_STATS(UCTX_ALLOC_ERR, "uctx_alloc_err"), HNS_ROCE_HW_CNT(RX_RC_PKT, "rx_rc_pkt"), HNS_ROCE_HW_CNT(RX_UC_PKT, "rx_uc_pkt"), HNS_ROCE_HW_CNT(RX_UD_PKT, "rx_ud_pkt"), @@ -735,19 +761,21 @@ static struct rdma_hw_stats *hns_roce_alloc_hw_port_stats(struct ib_device *devi u8 port_num) { struct hns_roce_dev *hr_dev = to_hr_dev(device); - u32 port = port_num - 1; - - if (hr_dev->pci_dev->revision <= PCI_REVISION_ID_HIP08 || - hr_dev->is_vf) - return NULL; + int num_counters;
- if (port > hr_dev->caps.num_ports) { + if (port_num > hr_dev->caps.num_ports) { ibdev_err(device, "invalid port num.\n"); return NULL; }
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09 && + !hr_dev->is_vf) + num_counters = ARRAY_SIZE(hns_roce_port_stats_descs); + else + num_counters = HNS_ROCE_DFX_CNT_TOTAL; + return rdma_alloc_hw_stats_struct(hns_roce_port_stats_descs, - ARRAY_SIZE(hns_roce_port_stats_descs), + num_counters, RDMA_HW_STATS_DEFAULT_LIFESPAN); }
@@ -756,8 +784,9 @@ static int hns_roce_get_hw_stats(struct ib_device *device, u8 port, int index) { struct hns_roce_dev *hr_dev = to_hr_dev(device); - int num_counters = HNS_ROCE_HW_CNT_TOTAL; + int hw_counters = HNS_ROCE_HW_CNT_TOTAL; int ret; + int i;
if (port == 0) return 0; @@ -765,19 +794,24 @@ static int hns_roce_get_hw_stats(struct ib_device *device, if (port > hr_dev->caps.num_ports) return -EINVAL;
+ for (i = 0; i < HNS_ROCE_DFX_CNT_TOTAL; i++) + stats->value[i] = atomic64_read(&hr_dev->dfx_cnt[i]); + if (hr_dev->pci_dev->revision <= PCI_REVISION_ID_HIP08 || hr_dev->is_vf) - return -EOPNOTSUPP; + return HNS_ROCE_DFX_CNT_TOTAL;
- ret = hr_dev->hw->query_hw_counter(hr_dev, stats->value, port, - &num_counters); + hw_counters = HNS_ROCE_HW_CNT_TOTAL; + ret = hr_dev->hw->query_hw_counter(hr_dev, + &stats->value[HNS_ROCE_DFX_CNT_TOTAL], + port, &hw_counters); if (ret) { ibdev_err(device, "failed to query hw counter, ret = %d.\n", ret); return ret; }
- return num_counters; + return hw_counters + HNS_ROCE_DFX_CNT_TOTAL; }
static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev) @@ -1285,6 +1319,21 @@ void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev) spin_unlock_irqrestore(&hr_dev->qp_list_lock, flags); }
+static int hns_roce_alloc_dfx_cnt(struct hns_roce_dev *hr_dev) +{ + hr_dev->dfx_cnt = kcalloc(HNS_ROCE_DFX_CNT_TOTAL, sizeof(atomic64_t), + GFP_KERNEL); + if (!hr_dev->dfx_cnt) + return -ENOMEM; + + return 0; +} + +static void hns_roce_dealloc_dfx_cnt(struct hns_roce_dev *hr_dev) +{ + kfree(hr_dev->dfx_cnt); +} + int hns_roce_init(struct hns_roce_dev *hr_dev) { struct device *dev = hr_dev->dev; @@ -1292,11 +1341,15 @@ int hns_roce_init(struct hns_roce_dev *hr_dev)
hr_dev->is_reset = false;
+ ret = hns_roce_alloc_dfx_cnt(hr_dev); + if (ret) + return ret; + if (hr_dev->hw->cmq_init) { ret = hr_dev->hw->cmq_init(hr_dev); if (ret) { dev_err(dev, "Init RoCE Command Queue failed!\n"); - return ret; + goto error_failed_alloc_dfx_cnt; } }
@@ -1376,6 +1429,9 @@ int hns_roce_init(struct hns_roce_dev *hr_dev) if (hr_dev->hw->cmq_exit) hr_dev->hw->cmq_exit(hr_dev);
+error_failed_alloc_dfx_cnt: + hns_roce_dealloc_dfx_cnt(hr_dev); + return ret; }
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 111a397544d7..81aa3239a54a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -234,8 +234,10 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, int ret;
mr = kzalloc(sizeof(*mr), GFP_KERNEL); - if (!mr) - return ERR_PTR(-ENOMEM); + if (!mr) { + ret = -ENOMEM; + goto err_out; + }
mr->iova = virt_addr; mr->size = length; @@ -266,6 +268,9 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, free_mr_key(hr_dev, mr); err_alloc_mr: kfree(mr); +err_out: + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MR_REG_ERR_CNT]); + return ERR_PTR(ret); }
@@ -280,12 +285,15 @@ int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length, unsigned long mtpt_idx; int ret;
- if (!mr->enabled) - return -EINVAL; + if (!mr->enabled) { + ret = -EINVAL; + goto err_out; + }
mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); - if (IS_ERR(mailbox)) - return PTR_ERR(mailbox); + ret = PTR_ERR_OR_ZERO(mailbox); + if (ret) + goto err_out;
mtpt_idx = key_to_hw_index(mr->key) & (hr_dev->caps.num_mtpts - 1); ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, HNS_ROCE_CMD_QUERY_MPT, @@ -335,6 +343,8 @@ int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length,
free_cmd_mbox: hns_roce_free_cmd_mailbox(hr_dev, mailbox); +err_out: + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MR_REREG_ERR_CNT]);
return ret; } diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c index 783e71852c50..5402b06729b3 100644 --- a/drivers/infiniband/hw/hns/hns_roce_pd.c +++ b/drivers/infiniband/hw/hns/hns_roce_pd.c @@ -147,16 +147,18 @@ int hns_roce_alloc_xrcd(struct ib_xrcd *ib_xrcd, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ib_xrcd->device); struct hns_roce_xrcd *xrcd = to_hr_xrcd(ib_xrcd); - int ret; + int ret = -EINVAL;
if (!(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC)) - return -EINVAL; + goto err_out;
ret = hns_roce_xrcd_alloc(hr_dev, &xrcd->xrcdn); + +err_out: if (ret) - return ret; + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_XRCD_CREATE_ERR_CNT]);
- return 0; + return ret; }
int hns_roce_dealloc_xrcd(struct ib_xrcd *ib_xrcd, struct ib_udata *udata) diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index def237557a49..06ac218a4be5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -1295,11 +1295,13 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd,
ret = check_qp_type(hr_dev, init_attr->qp_type, !!udata); if (ret) - return ERR_PTR(ret); + goto err_out;
hr_qp = kzalloc(sizeof(*hr_qp), GFP_KERNEL); - if (!hr_qp) - return ERR_PTR(-ENOMEM); + if (!hr_qp) { + ret = -ENOMEM; + goto err_out; + }
if (init_attr->qp_type == IB_QPT_XRC_TGT) hr_qp->xrcdn = to_hr_xrcd(init_attr->xrcd)->xrcdn; @@ -1315,10 +1317,14 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd, init_attr->qp_type, ret);
kfree(hr_qp); - return ERR_PTR(ret); + goto err_out; }
return &hr_qp->ibqp; + +err_out: + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_QP_CREATE_ERR_CNT]); + return ERR_PTR(ret); }
int to_hr_qp_type(int qp_type) @@ -1485,6 +1491,8 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
out: mutex_unlock(&hr_qp->mutex); + if (ret) + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_QP_MODIFY_ERR_CNT]);
return ret; } diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 727c7a134102..320aaa24be01 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -401,11 +401,11 @@ int hns_roce_create_srq(struct ib_srq *ib_srq,
ret = set_srq_param(srq, init_attr, udata); if (ret) - return ret; + goto err_out;
ret = alloc_srq_buf(hr_dev, srq, udata); if (ret) - return ret; + goto err_out;
ret = alloc_srqn(hr_dev, srq); if (ret) @@ -437,7 +437,8 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, free_srqn(hr_dev, srq); err_srq_buf: free_srq_buf(hr_dev, srq); - +err_out: + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_SRQ_CREATE_ERR_CNT]); return ret; }
-- 2.30.0