[PATCH for-next 0/3] RDMA/hns: Misc patches
This patchset contains several misc patches. Chengchang Tang (3): RDMA/hns: Fix hung task when drain qp failed. RDMA/hns: Fix missing CQE when UD QP use different SL RDMA/hns: Support setting GSI QP SL drivers/infiniband/hw/hns/hns_roce_device.h | 2 + drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 23 +++++--- drivers/infiniband/hw/hns/hns_roce_sysfs.c | 63 +++++++++++++++++++++ 3 files changed, 81 insertions(+), 7 deletions(-) -- 2.33.0
From: Chengchang Tang <tangchengchang@huawei.com> Since the flush CQE is actually executed asynchronously. If the drain QP has already triggered the flush CQE, but a HW error occurs during this process, the driver is unable to detect the flush failure. In this case, because we are using wait_for_completion(), this will cause the drain QP thread to wait for the signal indefinitely, leading to a hung task exception warning. we replace the wait_for_completion() with wait_for_completion_timeout() to avoid the indefinitely waiting. Signed-off-by: Chengchang Tang <tangchengchang@huawei.com> --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index ca26e20c86f8..e2572f61f42f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1015,6 +1015,7 @@ static void handle_drain_completion(struct ib_cq *ibcq, struct hns_roce_drain_cqe *drain, struct hns_roce_dev *hr_dev) { +#define DRAIN_QP_TMO (HZ * 30) #define TIMEOUT (HZ / 10) struct hns_roce_cq *hr_cq = to_hr_cq(ibcq); unsigned long flags; @@ -1059,8 +1060,10 @@ static void handle_drain_completion(struct ib_cq *ibcq, ibcq->comp_handler(ibcq, ibcq->cq_context); waiting_done: - if (ibcq->comp_handler) - wait_for_completion(&drain->done); + if (ibcq->comp_handler) { + if (!wait_for_completion_timeout(&drain->done, DRAIN_QP_TMO)) + ibdev_err_ratelimited(&hr_dev->ib_dev, "Drain qp timeout!\n"); + } } static void hns_roce_v2_drain_rq(struct ib_qp *ibqp) -- 2.33.0
From: Chengchang Tang <tangchengchang@huawei.com> Due to the HW issue, CQE may be dropped if UD QP use multiple SLs. Signed-off-by: Chengchang Tang <tangchengchang@huawei.com> --- drivers/infiniband/hw/hns/hns_roce_device.h | 1 + drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 15 ++++++++++----- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 1817a2eafa45..245fe626a267 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -726,6 +726,7 @@ struct hns_roce_qp { spinlock_t flush_lock; struct hns_roce_dip *dip; bool delayed_destroy_flag; + bool ud_sl_set; }; struct hns_roce_ib_iboe { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index e2572f61f42f..312aeccfed1f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -505,7 +505,8 @@ static int set_ud_opcode(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe, return 0; } -static int fill_ud_av(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe, +static int fill_ud_av(struct hns_roce_qp *qp, + struct hns_roce_v2_ud_send_wqe *ud_sq_wqe, struct hns_roce_ah *ah) { struct ib_device *ib_dev = ah->ibah.device; @@ -515,7 +516,12 @@ static int fill_ud_av(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe, hr_reg_write(ud_sq_wqe, UD_SEND_WQE_HOPLIMIT, ah->av.hop_limit); hr_reg_write(ud_sq_wqe, UD_SEND_WQE_TCLASS, ah->av.tclass); hr_reg_write(ud_sq_wqe, UD_SEND_WQE_FLOW_LABEL, ah->av.flowlabel); - hr_reg_write(ud_sq_wqe, UD_SEND_WQE_SL, ah->av.sl); + if (!qp->ud_sl_set) { + qp->sl = ah->av.sl; + qp->ud_sl_set = true; + } + + hr_reg_write(ud_sq_wqe, UD_SEND_WQE_SL, qp->sl); ud_sq_wqe->sgid_index = ah->av.gid_index; @@ -565,12 +571,10 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp, qp->qkey : ud_wr(wr)->remote_qkey); hr_reg_write(ud_sq_wqe, UD_SEND_WQE_DQPN, ud_wr(wr)->remote_qpn); - ret = fill_ud_av(ud_sq_wqe, ah); + ret = fill_ud_av(qp, ud_sq_wqe, ah); if (ret) return ret; - qp->sl = to_hr_ah(ud_wr(wr)->ah)->av.sl; - set_extend_sge(qp, wr->sg_list, &curr_idx, valid_num_sge); /* @@ -5741,6 +5745,7 @@ static void v2_set_flushed_fields(struct ib_qp *ibqp, hr_reg_write(context, QPC_SQ_PRODUCER_IDX, hr_qp->sq.head); hr_reg_clear(qpc_mask, QPC_SQ_PRODUCER_IDX); hr_qp->state = IB_QPS_ERR; + hr_qp->ud_sl_set = false; spin_unlock_irqrestore(&hr_qp->sq.lock, sq_flag); if (ibqp->srq || ibqp->qp_type == IB_QPT_XRC_INI) /* no RQ */ -- 2.33.0
From: Chengchang Tang <tangchengchang@huawei.com> Support setting GSI QP SL by sysfs. Signed-off-by: Chengchang Tang <tangchengchang@huawei.com> --- drivers/infiniband/hw/hns/hns_roce_device.h | 1 + drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 5 +- drivers/infiniband/hw/hns/hns_roce_sysfs.c | 63 +++++++++++++++++++++ 3 files changed, 67 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 245fe626a267..7368ef1e9e3a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -1142,6 +1142,7 @@ struct hns_roce_dev { void *dca_safe_buf; dma_addr_t dca_safe_page; siphash_key_t dca_safe_hash_key; + u8 gsi_sl; }; enum hns_roce_trace_type { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 312aeccfed1f..feaa03ab3a43 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -516,8 +516,9 @@ static int fill_ud_av(struct hns_roce_qp *qp, hr_reg_write(ud_sq_wqe, UD_SEND_WQE_HOPLIMIT, ah->av.hop_limit); hr_reg_write(ud_sq_wqe, UD_SEND_WQE_TCLASS, ah->av.tclass); hr_reg_write(ud_sq_wqe, UD_SEND_WQE_FLOW_LABEL, ah->av.flowlabel); - if (!qp->ud_sl_set) { - qp->sl = ah->av.sl; + if (!qp->ud_sl_set || qp->ibqp.qp_type == IB_QPT_GSI) { + qp->sl = qp->ibqp.qp_type == IB_QPT_GSI ? + hr_dev->gsi_sl : ah->av.sl; qp->ud_sl_set = true; } diff --git a/drivers/infiniband/hw/hns/hns_roce_sysfs.c b/drivers/infiniband/hw/hns/hns_roce_sysfs.c index cd498c5ea417..054e08df0187 100644 --- a/drivers/infiniband/hw/hns/hns_roce_sysfs.c +++ b/drivers/infiniband/hw/hns/hns_roce_sysfs.c @@ -348,10 +348,73 @@ static const struct attribute_group dip_cc_param_group = { .is_visible = scc_attr_is_visible, }; +static umode_t gsi_sl_attr_is_visible(struct kobject *kobj, + struct attribute *attr, int i) +{ + return 0600; +} + +static ssize_t gsi_sl_attr_show(struct ib_device *ibdev, u32 port_num, + struct ib_port_attribute *attr, char *buf) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ibdev); + + return sysfs_emit(buf, "%u\n", hr_dev->gsi_sl); +} + +static ssize_t gsi_sl_attr_store(struct ib_device *ibdev, u32 port_num, + struct ib_port_attribute *attr, + const char *buf, size_t count) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ibdev); + struct net_device *netdev; + u32 val; + int ret; + + netdev = ib_device_get_netdev(ibdev, port_num); + if (!netdev) + return -ENODEV; + + if (ib_get_curr_port_state(netdev) == IB_PORT_ACTIVE) { + ret = -EOPNOTSUPP; + goto out; + } + + ret = kstrtou32(buf, 0, &val); + if (ret) + goto out; + + if (val > MAX_SERVICE_LEVEL) { + ret = -EINVAL; + goto out; + } + + hr_dev->gsi_sl = val; + +out: + dev_put(netdev); + return ret ? : count; +} + +static struct ib_port_attribute hns_roce_port_attr_gsi_sl = + __ATTR(sl, 0600, gsi_sl_attr_show, gsi_sl_attr_store); + +static struct attribute *gsi_sl_param_attrs[] = { + &hns_roce_port_attr_gsi_sl.attr, + NULL, +}; + +static const struct attribute_group gsi_sl_param_group = { + .name = "gsi_sl", + .attrs = gsi_sl_param_attrs, + .is_visible = gsi_sl_attr_is_visible, +}; + const struct attribute_group *hns_attr_port_groups[] = { &dcqcn_cc_param_group, &ldcp_cc_param_group, &hc3_cc_param_group, &dip_cc_param_group, + &gsi_sl_param_group, NULL, }; -- 2.33.0
participants (1)
-
Junxian Huang