From: Yixing Liu liuyixing1@huawei.com
driver inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I6N1G4
---------------------------------------------------------------
This patch supports to configure congestion control algorithm based on QP granulariy. The configuration will be sent to driver from user space. And then driver configures the selected algorithm into QPC.
The current XRC type QP cannot deliver the configured algorithm to kernel space, so the driver will set the default algorithm for XRC type QP. And the default algorithm type is controlled by the firmware.
Signed-off-by: Yixing Liu liuyixing1@huawei.com Reviewed-by: Yangyang Li liyangyang20@huawei.com --- drivers/infiniband/hw/hns/hns_roce_device.h | 2 + drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 5 ++- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 1 + drivers/infiniband/hw/hns/hns_roce_main.c | 4 ++ drivers/infiniband/hw/hns/hns_roce_qp.c | 46 +++++++++++++++++++++ include/uapi/rdma/hns-abi.h | 18 +++++++- 6 files changed, 73 insertions(+), 3 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 26014b8a3253..08d437462d78 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -654,6 +654,7 @@ struct hns_roce_qp { struct hns_roce_db rdb; struct hns_roce_db sdb; unsigned long en_flags; + unsigned long congest_type; u32 doorbell_qpn; enum ib_sig_type sq_signal_bits; struct hns_roce_wq sq; @@ -914,6 +915,7 @@ struct hns_roce_caps { u16 default_aeq_arm_st; u16 default_ceq_arm_st; u8 congest_type; + u8 default_congest_type; };
enum hns_roce_device_state { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index b0be5cfe1a12..36397517e679 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -2452,6 +2452,7 @@ static int hns_roce_query_caps(struct hns_roce_dev *hr_dev) caps->ceqe_depth = 1 << hr_reg_read(resp_d, PF_CAPS_D_CEQ_DEPTH); caps->num_comp_vectors = hr_reg_read(resp_d, PF_CAPS_D_NUM_CEQS); caps->aeqe_depth = 1 << hr_reg_read(resp_d, PF_CAPS_D_AEQ_DEPTH); + caps->default_congest_type = hr_reg_read(resp_d, PF_CAPS_D_DEFAULT_ALG); caps->reserved_pds = hr_reg_read(resp_d, PF_CAPS_D_RSV_PDS); caps->num_uars = 1 << hr_reg_read(resp_d, PF_CAPS_D_NUM_UARS); caps->reserved_qps = hr_reg_read(resp_d, PF_CAPS_D_RSV_QPS); @@ -5094,10 +5095,10 @@ enum { static int check_congest_type(struct ib_qp *ibqp, struct hns_roce_congestion_algorithm *congest_alg) { - struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); + struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
/* different congestion types match different configurations */ - switch (hr_dev->caps.congest_type) { + switch (hr_qp->congest_type) { case HNS_ROCE_CONGEST_TYPE_DCQCN: congest_alg->alg_sel = CONGEST_DCQCN; congest_alg->alg_sub_sel = UNSUPPORT_CONGEST_LEVEL; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 8ea1695eee6f..1605a093dae8 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -1241,6 +1241,7 @@ struct hns_roce_query_pf_caps_d { #define PF_CAPS_D_AEQ_DEPTH PF_CAPS_D_FIELD_LOC(117, 96) #define PF_CAPS_D_AEQ_ARM_ST PF_CAPS_D_FIELD_LOC(119, 118) #define PF_CAPS_D_CEQ_ARM_ST PF_CAPS_D_FIELD_LOC(121, 120) +#define PF_CAPS_D_DEFAULT_ALG PF_CAPS_D_FIELD_LOC(127, 122) #define PF_CAPS_D_RSV_PDS PF_CAPS_D_FIELD_LOC(147, 128) #define PF_CAPS_D_NUM_UARS PF_CAPS_D_FIELD_LOC(155, 148) #define PF_CAPS_D_RSV_QPS PF_CAPS_D_FIELD_LOC(179, 160) diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index b171058df0cf..7702279e2364 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -440,6 +440,10 @@ static void ucontext_set_resp(struct ib_ucontext *uctx, resp->srq_tab_size = hr_dev->caps.num_srqs; resp->cqe_size = hr_dev->caps.cqe_sz; resp->mac_type = hr_dev->mac_type; + + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) + resp->congest_type = hr_dev->caps.congest_type; + if (context->dca_ctx.dca_mmap_entry) { resp->dca_qps = context->dca_ctx.max_qps; resp->dca_mmap_size = PAGE_SIZE * context->dca_ctx.status_npage; diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 06ac218a4be5..5b31f1aa8f6b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -1072,6 +1072,48 @@ static void free_kernel_wrid(struct hns_roce_qp *hr_qp) kfree(hr_qp->sq.wrid); }
+static inline void default_congest_type(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp) +{ + struct hns_roce_caps *caps = &hr_dev->caps; + + hr_qp->congest_type = 1 << caps->default_congest_type; +} + +static int set_congest_type(struct hns_roce_qp *hr_qp, + struct hns_roce_ib_create_qp *ucmd) +{ + int ret = 0; + + if (ucmd->congest_type_flags & HNS_ROCE_CREATE_QP_FLAGS_DCQCN) + hr_qp->congest_type = HNS_ROCE_CREATE_QP_FLAGS_DCQCN; + else if (ucmd->congest_type_flags & HNS_ROCE_CREATE_QP_FLAGS_LDCP) + hr_qp->congest_type = HNS_ROCE_CREATE_QP_FLAGS_LDCP; + else if (ucmd->congest_type_flags & HNS_ROCE_CREATE_QP_FLAGS_HC3) + hr_qp->congest_type = HNS_ROCE_CREATE_QP_FLAGS_HC3; + else if (ucmd->congest_type_flags & HNS_ROCE_CREATE_QP_FLAGS_DIP) + hr_qp->congest_type = HNS_ROCE_CREATE_QP_FLAGS_DIP; + else + ret = -EINVAL; + + return ret; +} + +static void set_congest_param(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp, + struct hns_roce_ib_create_qp *ucmd) +{ + int ret; + + if (ucmd->comp_mask & HNS_ROCE_CREATE_QP_MASK_CONGEST_TYPE) { + ret = set_congest_type(hr_qp, ucmd); + if (ret == 0) + return; + } + + default_congest_type(hr_dev, hr_qp); +} + static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct ib_qp_init_attr *init_attr, struct ib_udata *udata, @@ -1096,6 +1138,9 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, return ret; }
+ if (init_attr->qp_type == IB_QPT_XRC_TGT) + default_congest_type(hr_dev, hr_qp); + if (udata) { ret = ib_copy_from_udata(ucmd, udata, min(udata->inlen, sizeof(*ucmd))); @@ -1113,6 +1158,7 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, if (ret) ibdev_err(ibdev, "Failed to set user SQ size, ret = %d\n", ret); + set_congest_param(hr_dev, hr_qp, ucmd); } else { if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) { diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h index 5988a6288d14..bd19927d7ed9 100644 --- a/include/uapi/rdma/hns-abi.h +++ b/include/uapi/rdma/hns-abi.h @@ -63,6 +63,18 @@ struct hns_roce_ib_create_srq_resp { __u32 reserved; };
+enum hns_roce_create_qp_comp_mask { + HNS_ROCE_CREATE_QP_MASK_CREATE_FLAGS = 1 << 0, + HNS_ROCE_CREATE_QP_MASK_CONGEST_TYPE = 1 << 1, +}; + +enum hns_roce_congest_type_flags { + HNS_ROCE_CREATE_QP_FLAGS_DCQCN = 1 << 0, + HNS_ROCE_CREATE_QP_FLAGS_LDCP = 1 << 1, + HNS_ROCE_CREATE_QP_FLAGS_HC3 = 1 << 2, + HNS_ROCE_CREATE_QP_FLAGS_DIP = 1 << 3, +}; + struct hns_roce_ib_create_qp { __aligned_u64 buf_addr; __aligned_u64 db_addr; @@ -71,6 +83,9 @@ struct hns_roce_ib_create_qp { __u8 sq_no_prefetch; __u8 reserved[5]; __aligned_u64 sdb_addr; + __aligned_u64 comp_mask; + __aligned_u64 create_flags; + __aligned_u64 congest_type_flags; };
enum hns_roce_qp_cap_flags { @@ -123,7 +138,8 @@ struct hns_roce_ib_alloc_ucontext_resp { __u32 config; __u32 max_inline_data; __u8 mac_type; - __u8 rsv1[7]; + __u8 congest_type; + __u8 rsv1[6]; __u32 dca_qps; __u32 dca_mmap_size; __aligned_u64 dca_mmap_key;