STARS is a HW scheduler. These patches support STARS taking over HNS RoCE.
Chengchang Tang (6): RDMA/hns: Support query HW ID from user space. RDMA/hns: Fix print after query hw id failed. RDMA/hns: Support configuring POE channels and creating POE CQs RDMA/hns: Support STARS mode QP RDMA/hns: Support kernel ULP querying HW ID RDMA/hns: Support write with notify
drivers/infiniband/hw/hns/Makefile | 3 +- drivers/infiniband/hw/hns/hns_roce_common.h | 3 + drivers/infiniband/hw/hns/hns_roce_cq.c | 114 +++++++++++++++++- drivers/infiniband/hw/hns/hns_roce_debugfs.c | 111 ++++++++++++++++++ drivers/infiniband/hw/hns/hns_roce_device.h | 63 ++++++++++ drivers/infiniband/hw/hns/hns_roce_ext.c | 143 +++++++++++++++++++++++ drivers/infiniband/hw/hns/hns_roce_ext.h | 66 +++++++++++ drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 169 +++++++++++++++++++++++++-- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 52 +++++++++ drivers/infiniband/hw/hns/hns_roce_main.c | 62 +++++++++- drivers/infiniband/hw/hns/hns_roce_poe.c | 97 +++++++++++++++ drivers/infiniband/hw/hns/hns_roce_qp.c | 62 +++++++++- include/uapi/rdma/hns-abi.h | 37 +++++- 13 files changed, 966 insertions(+), 16 deletions(-) create mode 100644 drivers/infiniband/hw/hns/hns_roce_ext.c create mode 100644 drivers/infiniband/hw/hns/hns_roce_ext.h create mode 100644 drivers/infiniband/hw/hns/hns_roce_poe.c
-- 2.9.5
This patch supports users to query HW ID. The driver will obtain the HW ID from the firmware during initialization. The user-mode driver can request the kernel-mode driver to read this information through the query_device_ex().
Signed-off-by: Chengchang Tang tangchengchang@huawei.com --- drivers/infiniband/hw/hns/hns_roce_device.h | 4 ++++ drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 35 +++++++++++++++++++++++++++++ drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 10 +++++++++ drivers/infiniband/hw/hns/hns_roce_main.c | 28 ++++++++++++++++++++++- include/uapi/rdma/hns-abi.h | 13 +++++++++++ 5 files changed, 89 insertions(+), 1 deletion(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 453c088..5d48990 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -1091,6 +1091,10 @@ struct hns_roce_dev { u32 vendor_id; u32 vendor_part_id; u32 hw_rev; + u16 chip_id; + u16 die_id; + u16 mac_id; + u16 func_id; void __iomem *priv_addr;
struct hns_roce_cmdq cmd; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 8a1b971..8f4b236 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1570,6 +1570,39 @@ static int hns_roce_cmq_query_hw_info(struct hns_roce_dev *hr_dev) return 0; }
+static void hns_roce_cmq_query_hw_id(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_hw_id_query_cmq *resp; + struct hns_roce_cmq_desc desc; + int ret; + + if (hr_dev->is_vf) + goto invalid_val; + + hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_QUERY_HW_ID, true); + ret = hns_roce_cmq_send(hr_dev, &desc, 1); + if (ret) { + if (desc.retval != CMD_NOT_EXIST) + ibdev_warn(&hr_dev->ib_dev, + "failed to query hw id, ret = %d.\n", ret); + + goto invalid_val; + } + + resp = (struct hns_roce_hw_id_query_cmq *)desc.data; + hr_dev->chip_id = resp->chip_id; + hr_dev->die_id = resp->die_id; + hr_dev->mac_id = resp->mac_id; + hr_dev->func_id = (u16)le32_to_cpu(resp->func_id); + return; + +invalid_val: + hr_dev->func_id = HNS_IB_INVALID_ID; + hr_dev->mac_id = HNS_IB_INVALID_ID; + hr_dev->die_id = HNS_IB_INVALID_ID; + hr_dev->chip_id = HNS_IB_INVALID_ID; +} + static void func_clr_hw_resetting_state(struct hns_roce_dev *hr_dev, struct hnae3_handle *handle) { @@ -2586,6 +2619,8 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) return ret; }
+ hns_roce_cmq_query_hw_id(hr_dev); + hr_dev->vendor_part_id = hr_dev->pci_dev->device; hr_dev->sys_image_guid = be64_to_cpu(hr_dev->ib_dev.node_guid);
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index cbcd0f6..e6a9140 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -200,6 +200,7 @@ enum hns_roce_opcode_type { HNS_ROCE_OPC_CFG_LDCP_PARAM = 0x1A81, HNS_ROCE_OPC_CFG_HC3_PARAM = 0x1A82, HNS_ROCE_OPC_CFG_DIP_PARAM = 0x1A83, + HNS_ROCE_OPC_QUERY_HW_ID = 0x7032, HNS_ROCE_OPC_QUERY_HW_VER = 0x8000, HNS_ROCE_OPC_CFG_GLOBAL_PARAM = 0x8001, HNS_ROCE_OPC_ALLOC_PF_RES = 0x8004, @@ -939,6 +940,15 @@ struct hns_roce_v2_wqe_data_seg { __le64 addr; };
+struct hns_roce_hw_id_query_cmq { + __u8 chip_id; + __u8 die_id; + __u8 mac_id; + __u8 reserved; + __le32 func_id; + __le32 rsv[4]; +}; + struct hns_roce_query_version { __le16 rocee_vendor_id; __le16 rocee_hw_version; diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 51628ab..142a562 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -234,6 +234,32 @@ static int hns_roce_setup_mtu_mac(struct hns_roce_dev *hr_dev) return 0; }
+static int set_attrx(struct hns_roce_dev *hr_dev, struct ib_udata *uhw) +{ + struct hns_roce_ib_query_device_resp resp = {}; + size_t uhw_outlen; + + if (!uhw || !uhw->outlen) + return 0; + + uhw_outlen = uhw->outlen; + resp.len = sizeof(resp.comp_mask) + sizeof(resp.len); + if (uhw_outlen < resp.len) + return -EINVAL; + + if (uhw->inlen && !ib_is_udata_cleared(uhw, 0, uhw->inlen)) + return -EINVAL; + + if (uhw_outlen >= offsetofend(typeof(resp), hw_id)) { + resp.len += sizeof(resp.hw_id); + resp.hw_id.chip_id = hr_dev->chip_id; + resp.hw_id.die_id = hr_dev->die_id; + resp.hw_id.func_id = hr_dev->func_id; + } + + return ib_copy_to_udata(uhw, &resp, resp.len); +} + static int hns_roce_query_device(struct ib_device *ib_dev, struct ib_device_attr *props, struct ib_udata *uhw) @@ -281,7 +307,7 @@ static int hns_roce_query_device(struct ib_device *ib_dev, if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC) props->device_cap_flags |= IB_DEVICE_XRC;
- return 0; + return set_attrx(hr_dev, uhw); }
static int hns_roce_query_port(struct ib_device *ib_dev, u8 port_num, diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h index a95b1d0..9d18a69 100644 --- a/include/uapi/rdma/hns-abi.h +++ b/include/uapi/rdma/hns-abi.h @@ -232,4 +232,17 @@ enum hns_ib_dca_mem_query_attrs { HNS_IB_ATTR_DCA_MEM_QUERY_OUT_PAGE_COUNT, };
+#define HNS_IB_INVALID_ID 0XFFFF +struct hns_roce_ib_hw_id { + __u16 chip_id; + __u16 die_id; + __u16 func_id; + __u16 reserved; +}; + +struct hns_roce_ib_query_device_resp { + __u32 comp_mask; + __u32 len; + struct hns_roce_ib_hw_id hw_id; +}; #endif /* HNS_ABI_USER_H */
This patch avoid print log about the failure of querying HW ID which is expected in some platform.
Signed-off-by: Chengchang Tang tangchengchang@huawei.com --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 8f4b236..05edb6c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1436,9 +1436,11 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, if (likely(desc_ret == CMD_EXEC_SUCCESS)) continue;
- dev_err_ratelimited(hr_dev->dev, - "Cmdq IO error, opcode = 0x%x, return = 0x%x.\n", - desc->opcode, desc_ret); + if (desc->opcode != HNS_ROCE_OPC_QUERY_HW_ID && + desc_ret != CMD_NOT_EXIST) + dev_err_ratelimited(hr_dev->dev, + "Cmdq IO error, opcode = 0x%x, return = 0x%x.\n", + desc->opcode, desc_ret); ret = hns_roce_cmd_err_convert_errno(desc_ret); } } else {
This patch provides a pair of APIs rdma_register_poe_channel()/ rdma_unregister_poe_channel() to support register/unregister POE channels which could be used for CQs.
At the same time, this patch also supports user creation of CQ bound to a POE channel.
BTW, This patch also add a debugfs for POE to help with problem diagnosis.
Signed-off-by: Chengchang Tang tangchengchang@huawei.com --- drivers/infiniband/hw/hns/Makefile | 3 +- drivers/infiniband/hw/hns/hns_roce_cq.c | 58 +++++++++++++- drivers/infiniband/hw/hns/hns_roce_debugfs.c | 111 +++++++++++++++++++++++++++ drivers/infiniband/hw/hns/hns_roce_device.h | 27 +++++++ drivers/infiniband/hw/hns/hns_roce_ext.c | 39 ++++++++++ drivers/infiniband/hw/hns/hns_roce_ext.h | 22 ++++++ drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 71 +++++++++++++++++ drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 17 ++++ drivers/infiniband/hw/hns/hns_roce_main.c | 32 ++++++++ drivers/infiniband/hw/hns/hns_roce_poe.c | 97 +++++++++++++++++++++++ include/uapi/rdma/hns-abi.h | 8 ++ 11 files changed, 482 insertions(+), 3 deletions(-) create mode 100644 drivers/infiniband/hw/hns/hns_roce_ext.c create mode 100644 drivers/infiniband/hw/hns/hns_roce_ext.h create mode 100644 drivers/infiniband/hw/hns/hns_roce_poe.c
diff --git a/drivers/infiniband/hw/hns/Makefile b/drivers/infiniband/hw/hns/Makefile index 09f95fe..04dce5e 100644 --- a/drivers/infiniband/hw/hns/Makefile +++ b/drivers/infiniband/hw/hns/Makefile @@ -10,7 +10,8 @@ ccflags-y += -I $(srctree)/drivers/net/ethernet/hisilicon/hns3/hns3_common hns-roce-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_pd.o \ hns_roce_ah.o hns_roce_hem.o hns_roce_mr.o hns_roce_qp.o \ hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o hns_roce_srq.o hns_roce_restrack.o \ - hns_roce_bond.o hns_roce_dca.o hns_roce_debugfs.o hns_roce_sysfs.o + hns_roce_bond.o hns_roce_dca.o hns_roce_debugfs.o hns_roce_sysfs.o \ + hns_roce_poe.o hns_roce_ext.o
ifdef CONFIG_INFINIBAND_HNS_HIP08 hns-roce-hw-v2-objs := hns_roce_hw_v2.o $(hns-roce-objs) diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 6997c3d..3bc029c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -311,10 +311,32 @@ static int get_cq_ucmd(struct hns_roce_cq *hr_cq, struct ib_udata *udata, return 0; }
-static void set_cq_param(struct hns_roce_cq *hr_cq, u32 cq_entries, int vector, +static int set_poe_param(struct hns_roce_dev *hr_dev, + struct hns_roce_cq *hr_cq, + struct hns_roce_ib_create_cq *ucmd) +{ + if (!(ucmd->create_flags & HNS_ROCE_CREATE_CQ_FLAGS_POE_MODE)) + return 0; + + if (!poe_is_supported(hr_dev)) + return -EOPNOTSUPP; + + if (ucmd->poe_channel >= hr_dev->poe_ctx.poe_num) + return -EINVAL; + + if (!hr_dev->poe_ctx.poe_ch[ucmd->poe_channel].en) + return -EFAULT; + + hr_cq->flags |= HNS_ROCE_CQ_FLAG_POE_EN; + hr_cq->poe_channel = ucmd->poe_channel; + return 0; +} + +static int set_cq_param(struct hns_roce_cq *hr_cq, u32 cq_entries, int vector, struct hns_roce_ib_create_cq *ucmd) { struct hns_roce_dev *hr_dev = to_hr_dev(hr_cq->ib_cq.device); + int ret;
cq_entries = max(cq_entries, hr_dev->caps.min_cqes); cq_entries = roundup_pow_of_two(cq_entries); @@ -325,6 +347,15 @@ static void set_cq_param(struct hns_roce_cq *hr_cq, u32 cq_entries, int vector, spin_lock_init(&hr_cq->lock); INIT_LIST_HEAD(&hr_cq->sq_list); INIT_LIST_HEAD(&hr_cq->rq_list); + + if (!(ucmd->create_flags)) + return 0; + + ret = set_poe_param(hr_dev, hr_cq, ucmd); + if (ret) + return ret; + + return 0; }
static int set_cqe_size(struct hns_roce_cq *hr_cq, struct ib_udata *udata, @@ -353,6 +384,22 @@ static int set_cqe_size(struct hns_roce_cq *hr_cq, struct ib_udata *udata, return 0; }
+static void poe_ch_ref_cnt_inc(struct hns_roce_dev *hr_dev, + struct hns_roce_cq *hr_cq) +{ + struct hns_roce_poe_ch *poe_ch = + &hr_dev->poe_ctx.poe_ch[hr_cq->poe_channel]; + refcount_inc(&poe_ch->ref_cnt); +} + +static void poe_ch_ref_cnt_dec(struct hns_roce_dev *hr_dev, + struct hns_roce_cq *hr_cq) +{ + struct hns_roce_poe_ch *poe_ch = + &hr_dev->poe_ctx.poe_ch[hr_cq->poe_channel]; + refcount_dec(&poe_ch->ref_cnt); +} + int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, struct ib_udata *udata) { @@ -379,7 +426,9 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
}
- set_cq_param(hr_cq, attr->cqe, attr->comp_vector, &ucmd); + ret = set_cq_param(hr_cq, attr->cqe, attr->comp_vector, &ucmd); + if (ret) + goto err_out;
ret = set_cqe_size(hr_cq, udata, &ucmd); if (ret) @@ -412,6 +461,9 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
if (udata) { resp.cqn = hr_cq->cqn; + resp.cap_flags = hr_cq->flags; + if (hr_cq->flags & HNS_ROCE_CQ_FLAG_POE_EN) + poe_ch_ref_cnt_inc(hr_dev, hr_cq); ret = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp))); if (ret) @@ -444,6 +496,8 @@ int hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device); struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq);
+ if (hr_cq->flags & HNS_ROCE_CQ_FLAG_POE_EN) + poe_ch_ref_cnt_dec(hr_dev, hr_cq); free_cqc(hr_dev, hr_cq); free_cqn(hr_dev, hr_cq->cqn); free_cq_db(hr_dev, hr_cq, udata); diff --git a/drivers/infiniband/hw/hns/hns_roce_debugfs.c b/drivers/infiniband/hw/hns/hns_roce_debugfs.c index 35d81e7..023b111 100644 --- a/drivers/infiniband/hw/hns/hns_roce_debugfs.c +++ b/drivers/infiniband/hw/hns/hns_roce_debugfs.c @@ -74,10 +74,22 @@ struct hns_dca_debugfs { struct hns_dca_ctx_debugfs kctx; /* kDCA context */ };
+struct hns_poe_ch_debugfs { + struct dentry *root; /* dev debugfs entry */ + struct hns_debugfs_seqfile en; /* enable stats fir this ch */ + struct hns_debugfs_seqfile addr; /* addr of this ch */ + struct hns_debugfs_seqfile ref_cnt; /* ref_cnt for this ch */ +}; + +struct hns_poe_debugfs { + struct dentry *root; /* dev debugfs entry */ +}; + /* Debugfs for device */ struct hns_roce_dev_debugfs { struct dentry *root; struct hns_dca_debugfs *dca_root; + struct hns_poe_debugfs *poe_root; };
struct dca_mem_stats { @@ -497,6 +509,97 @@ static void destroy_dca_debugfs(struct hns_dca_debugfs *dca_dbgfs) kfree(dca_dbgfs); }
+static int poe_debugfs_en_show(struct seq_file *file, void *offset) +{ + struct hns_roce_poe_ch *poe_ch = file->private; + + seq_printf(file, "%-10s\n", poe_ch->en ? "enable" : "disable"); + return 0; +} + +static int poe_debugfs_addr_show(struct seq_file *file, void *offset) +{ +#define POE_ADDR_OFFSET_MASK GENMASK(31, 0) + struct hns_roce_poe_ch *poe_ch = file->private; + + seq_printf(file, "0x%llx\n", poe_ch->addr & POE_ADDR_OFFSET_MASK); + return 0; +} + +static int poe_debugfs_ref_cnt_show(struct seq_file *file, void *offset) +{ + struct hns_roce_poe_ch *poe_ch = file->private; + + seq_printf(file, "0x%-10u\n", refcount_read(&poe_ch->ref_cnt)); + return 0; +} + +static void init_poe_ch_debugfs(struct hns_roce_dev *hr_dev, uint8_t index, + struct dentry *parent) +{ +#define POE_CH_NAME_LEN 10 + struct hns_roce_poe_ch *poe_ch = &hr_dev->poe_ctx.poe_ch[index]; + struct hns_poe_ch_debugfs *dbgfs; + char name[POE_CH_NAME_LEN]; + + dbgfs = kvzalloc(sizeof(*dbgfs), GFP_KERNEL); + if (!dbgfs) + return; + + snprintf(name, sizeof(name), "poe_%u", index); + dbgfs->root = debugfs_create_dir(name, parent); + + init_debugfs_seqfile(&dbgfs->en, "en", dbgfs->root, + poe_debugfs_en_show, poe_ch); + init_debugfs_seqfile(&dbgfs->addr, "addr", dbgfs->root, + poe_debugfs_addr_show, poe_ch); + init_debugfs_seqfile(&dbgfs->ref_cnt, "ref_cnt", dbgfs->root, + poe_debugfs_ref_cnt_show, poe_ch); + poe_ch->poe_ch_debugfs = dbgfs; +} + +static void cleanup_poe_ch_debugfs(struct hns_roce_dev *hr_dev, uint8_t index) +{ + struct hns_roce_poe_ch *poe_ch = &hr_dev->poe_ctx.poe_ch[index]; + struct hns_poe_ch_debugfs *dbgfs = poe_ch->poe_ch_debugfs; + + cleanup_debugfs_seqfile(&dbgfs->en); + cleanup_debugfs_seqfile(&dbgfs->addr); + cleanup_debugfs_seqfile(&dbgfs->ref_cnt); + debugfs_remove_recursive(dbgfs->root); + kvfree(dbgfs); +} + +static struct hns_poe_debugfs * +create_poe_debugfs(struct hns_roce_dev *hr_dev, struct dentry *parent) +{ + struct hns_poe_debugfs *dbgfs; + int i; + + dbgfs = kvzalloc(sizeof(*dbgfs), GFP_KERNEL); + if (!dbgfs) + return NULL; + + dbgfs->root = debugfs_create_dir("poe", parent); + + for (i = 0; i < hr_dev->poe_ctx.poe_num; i++) + init_poe_ch_debugfs(hr_dev, i, dbgfs->root); + + return dbgfs; +} + +static void destroy_poe_debugfs(struct hns_roce_dev *hr_dev, + struct hns_poe_debugfs *poe_dbgfs) +{ + int i; + + for (i = 0; i < hr_dev->poe_ctx.poe_num; i++) + cleanup_poe_ch_debugfs(hr_dev, i); + + debugfs_remove_recursive(poe_dbgfs->root); + kvfree(poe_dbgfs); +} + /* debugfs for ucontext */ void hns_roce_register_uctx_debugfs(struct hns_roce_dev *hr_dev, struct hns_roce_ucontext *uctx) @@ -553,6 +656,9 @@ void hns_roce_register_debugfs(struct hns_roce_dev *hr_dev) if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_DCA_MODE) dbgfs->dca_root = create_dca_debugfs(hr_dev, dbgfs->root);
+ if (poe_is_supported(hr_dev)) + dbgfs->poe_root = create_poe_debugfs(hr_dev, dbgfs->root); + hr_dev->dbgfs = dbgfs; }
@@ -572,6 +678,11 @@ void hns_roce_unregister_debugfs(struct hns_roce_dev *hr_dev) dbgfs->dca_root = NULL; }
+ if (dbgfs->poe_root) { + destroy_poe_debugfs(hr_dev, dbgfs->poe_root); + dbgfs->poe_root = NULL; + } + debugfs_remove_recursive(dbgfs->root); hr_dev->dbgfs = NULL; kfree(dbgfs); diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 5d48990..ef8bef9 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -164,6 +164,7 @@ enum { HNS_ROCE_CAP_FLAG_CQE_INLINE = BIT(19), HNS_ROCE_CAP_FLAG_BOND = BIT(21), HNS_ROCE_CAP_FLAG_SRQ_RECORD_DB = BIT(22), + HNS_ROCE_CAP_FLAG_POE = BIT(27), };
#define HNS_ROCE_DB_TYPE_COUNT 2 @@ -221,6 +222,18 @@ struct hns_user_mmap_entry { u64 address; };
+struct hns_roce_poe_ch { + uint8_t en; + refcount_t ref_cnt; + uint64_t addr; + void *poe_ch_debugfs; +}; + +struct hns_roce_poe_ctx { + uint8_t poe_num; + struct hns_roce_poe_ch *poe_ch; +}; + struct hns_roce_dca_ctx { struct list_head pool; /* all DCA mems link to @pool */ spinlock_t pool_lock; /* protect @pool */ @@ -493,6 +506,7 @@ struct hns_roce_cq { struct list_head rq_list; /* all qps on this recv cq */ int is_armed; /* cq is armed */ struct list_head node; /* all armed cqs are on a list */ + u8 poe_channel; };
struct hns_roce_idx_que { @@ -788,6 +802,8 @@ enum congest_type { HNS_ROCE_CONGEST_TYPE_DIP = 1 << HNS_ROCE_SCC_ALGO_DIP, };
+#define HNS_ROCE_POE_CH_NUM 4 + struct hns_roce_caps { u64 fw_ver; u8 num_ports; @@ -919,6 +935,7 @@ struct hns_roce_caps { u16 default_ceq_arm_st; u8 congest_type; u8 default_congest_type; + u8 poe_ch_num; };
enum hns_roce_device_state { @@ -1033,6 +1050,7 @@ struct hns_roce_hw { enum hns_roce_scc_algo algo); int (*query_scc_param)(struct hns_roce_dev *hr_dev, u8 port_num, enum hns_roce_scc_algo alog); + int (*cfg_poe_ch)(struct hns_roce_dev *hr_dev, u32 index, u64 poe_addr); };
#define HNS_ROCE_SCC_PARAM_SIZE 4 @@ -1132,6 +1150,7 @@ struct hns_roce_dev { struct notifier_block bond_nb; struct hns_roce_port port_data[HNS_ROCE_MAX_PORTS]; atomic64_t *dfx_cnt; + struct hns_roce_poe_ctx poe_ctx; /* poe ch array */ };
static inline struct hns_roce_dev *to_hr_dev(struct ib_device *ib_dev) @@ -1289,6 +1308,11 @@ static inline u8 get_hr_bus_num(struct hns_roce_dev *hr_dev) return hr_dev->pci_dev->bus->number; }
+static inline bool poe_is_supported(struct hns_roce_dev *hr_dev) +{ + return !!(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_POE); +} + void hns_roce_init_uar_table(struct hns_roce_dev *dev); int hns_roce_uar_alloc(struct hns_roce_dev *dev, struct hns_roce_uar *uar);
@@ -1430,4 +1454,7 @@ hns_roce_user_mmap_entry_insert(struct ib_ucontext *ucontext, u64 address, int hns_roce_create_port_files(struct ib_device *ibdev, u8 port_num, struct kobject *kobj); void hns_roce_unregister_sysfs(struct hns_roce_dev *hr_dev); +int hns_roce_register_poe_channel(struct hns_roce_dev *hr_dev, u8 channel, + u64 poe_addr); +int hns_roce_unregister_poe_channel(struct hns_roce_dev *hr_dev, u8 channel); #endif /* _HNS_ROCE_DEVICE_H */ diff --git a/drivers/infiniband/hw/hns/hns_roce_ext.c b/drivers/infiniband/hw/hns/hns_roce_ext.c new file mode 100644 index 00000000..9aa17de --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_ext.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2023 Hisilicon Limited. + */ + +#include <rdma/ib_verbs.h> +#include "hns_roce_device.h" + +static bool is_hns_roce(struct ib_device *ib_dev) +{ + if (ib_dev && ib_dev->ops.driver_id == RDMA_DRIVER_HNS) + return true; + + return false; +} + +int rdma_register_poe_channel(struct ib_device *ib_dev, u8 channel, + u64 poe_addr) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev); + + if (!is_hns_roce(ib_dev)) + return -EOPNOTSUPP; + + return hns_roce_register_poe_channel(hr_dev, channel, poe_addr); +} +EXPORT_SYMBOL(rdma_register_poe_channel); + +int rdma_unregister_poe_channel(struct ib_device *ib_dev, u8 channel) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev); + + if (!is_hns_roce(ib_dev)) + return -EOPNOTSUPP; + + return hns_roce_unregister_poe_channel(hr_dev, channel); +} +EXPORT_SYMBOL(rdma_unregister_poe_channel); + diff --git a/drivers/infiniband/hw/hns/hns_roce_ext.h b/drivers/infiniband/hw/hns/hns_roce_ext.h new file mode 100644 index 00000000..7d71465 --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_ext.h @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2023 Hisilicon Limited. + */ + +#ifndef __HNS_ROCE_EXT_H +#define __HNS_ROCE_EXT_H +#include <linux/types.h> + +/** + * rdma_register_notify_addr - Register an POE channel for this RDMA device. + * @channel - POE channel index. + * @poe_addr - POE channel address. + * + * If the current POE device is not associated with CQ, then it will be + * allowed to be re-registered. Otherwise, re-registration or + * de-registration will report an EBUSY error. + */ +int rdma_register_poe_channel(struct ib_device *ib_dev, u8 channel, u64 poe_addr); +int rdma_unregister_poe_channel(struct ib_device *ib_dev, u8 channel); + +#endif diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 05edb6c..3d35d7c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -2448,6 +2448,9 @@ static int hns_roce_query_caps(struct hns_roce_dev *hr_dev) if (!(caps->page_size_cap & PAGE_SIZE)) caps->page_size_cap = HNS_ROCE_V2_PAGE_SIZE_SUPPORTED;
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) + caps->poe_ch_num = HNS_ROCE_POE_CH_NUM; + if (!hr_dev->is_vf) { caps->cqe_sz = resp_a->cqe_sz; caps->qpc_sz = le16_to_cpu(resp_b->qpc_sz); @@ -3862,6 +3865,11 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev, hr_reg_write(cq_context, CQC_CEQN, hr_cq->vector); hr_reg_write(cq_context, CQC_CQN, hr_cq->cqn);
+ if (hr_cq->flags & HNS_ROCE_CQ_FLAG_POE_EN) { + hr_reg_enable(cq_context, CQC_POE_EN); + hr_reg_write(cq_context, CQC_POE_NUM, hr_cq->poe_channel); + } + if (hr_cq->cqe_size == HNS_ROCE_V3_CQE_SIZE) hr_reg_write(cq_context, CQC_CQE_SIZE, CQE_SIZE_64B);
@@ -7154,6 +7162,68 @@ static int hns_roce_v2_query_scc_param(struct hns_roce_dev *hr_dev, return 0; }
+static int config_poe_addr(struct hns_roce_dev *hr_dev, + u32 channel_id, u64 addr) +{ + struct hns_roce_poe_cfg_addr_cmq *cmd; + struct hns_roce_cmq_desc desc; + int ret; + + hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_POE_ADDR, false); + cmd = (struct hns_roce_poe_cfg_addr_cmq *)desc.data; + cmd->channel_id = cpu_to_le32(channel_id); + cmd->poe_addr_l = cpu_to_le32(lower_32_bits(addr)); + cmd->poe_addr_h = cpu_to_le32(upper_32_bits(addr)); + + ret = hns_roce_cmq_send(hr_dev, &desc, 1); + if (ret) + ibdev_err_ratelimited(&hr_dev->ib_dev, + "configure poe channel %u addr failed, ret = %d.\n", + channel_id, ret); + return ret; +} + +static int config_poe_attr(struct hns_roce_dev *hr_dev, u32 channel_id, bool en) +{ + struct hns_roce_poe_cfg_attr_cmq *cmd; + struct hns_roce_cmq_desc desc; + int ret; + + hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_POE_ATTR, false); + cmd = (struct hns_roce_poe_cfg_attr_cmq *)desc.data; + cmd->channel_id = cpu_to_le32(channel_id); + cmd->rsv_en_outstd = en ? 1 : 0; + + ret = hns_roce_cmq_send(hr_dev, &desc, 1); + if (ret) + ibdev_err_ratelimited(&hr_dev->ib_dev, + "configure poe channel %u attr failed, ret = %d.\n", + channel_id, ret); + return ret; +} + +static int hns_roce_cfg_poe_ch(struct hns_roce_dev *hr_dev, u32 index, + u64 poe_addr) +{ + int ret; + + if (index >= hr_dev->caps.poe_ch_num) { + ibdev_err_ratelimited(&hr_dev->ib_dev, + "invalid POE channel %u.\n", index); + return -EINVAL; + } + + ret = config_poe_addr(hr_dev, index, poe_addr); + if (ret) + return ret; + + ret = config_poe_attr(hr_dev, index, !!poe_addr); + if (ret) + config_poe_addr(hr_dev, index, 0); + + return ret; +} + static const struct ib_device_ops hns_roce_v2_dev_ops = { .destroy_qp = hns_roce_v2_destroy_qp, .modify_cq = hns_roce_v2_modify_cq, @@ -7208,6 +7278,7 @@ static const struct hns_roce_hw hns_roce_hw_v2 = { .query_hw_counter = hns_roce_hw_v2_query_counter, .config_scc_param = hns_roce_v2_config_scc_param, .query_scc_param = hns_roce_v2_query_scc_param, + .cfg_poe_ch = hns_roce_cfg_poe_ch, };
static const struct pci_device_id hns_roce_hw_v2_pci_tbl[] = { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index e6a9140..d3b0fa1 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -204,6 +204,8 @@ enum hns_roce_opcode_type { HNS_ROCE_OPC_QUERY_HW_VER = 0x8000, HNS_ROCE_OPC_CFG_GLOBAL_PARAM = 0x8001, HNS_ROCE_OPC_ALLOC_PF_RES = 0x8004, + HNS_ROCE_OPC_CFG_POE_ADDR = 0x801B, + HNS_ROCE_OPC_CFG_POE_ATTR = 0x801C, HNS_ROCE_OPC_QUERY_COUNTER = 0x8206, HNS_ROCE_OPC_QUERY_PF_RES = 0x8400, HNS_ROCE_OPC_ALLOC_VF_RES = 0x8401, @@ -1282,6 +1284,21 @@ struct hns_roce_cmq_req { __le32 data[6]; };
+struct hns_roce_poe_cfg_addr_cmq { + __le32 channel_id; + __le32 poe_addr_l; + __le32 poe_addr_h; + __le32 rsv[3]; +}; + +#define V2_POE_ATTR_EN V2_POE_ATTR_FIELD_LOC(40, 40) +#define V2_POE_ATTR_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_poe_cfg_attr_cmq, h, l) +struct hns_roce_poe_cfg_attr_cmq { + __le32 channel_id; + __le32 rsv_en_outstd; + __le32 rsv[4]; +}; + #define CMQ_REQ_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_cmq_req, h, l)
struct hns_roce_cmq_desc { diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 142a562..af9d054 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -1427,6 +1427,36 @@ void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev) spin_unlock_irqrestore(&hr_dev->qp_list_lock, flags); }
+static void hns_roce_register_poe_ch(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_poe_ch *poe_ch; + + if (!poe_is_supported(hr_dev) || hr_dev->caps.poe_ch_num <= 0) + goto out; + + poe_ch = kvcalloc(hr_dev->caps.poe_ch_num, + sizeof(struct hns_roce_poe_ch), GFP_KERNEL); + if (!poe_ch) + goto out; + + hr_dev->poe_ctx.poe_num = hr_dev->caps.poe_ch_num; + hr_dev->poe_ctx.poe_ch = poe_ch; + return; + +out: + hr_dev->poe_ctx.poe_num = 0; + hr_dev->poe_ctx.poe_ch = NULL; + +} + +static void hns_roce_unregister_poe_ch(struct hns_roce_dev *hr_dev) +{ + if (!poe_is_supported(hr_dev) || hr_dev->caps.poe_ch_num <= 0) + return; + + kvfree(hr_dev->poe_ctx.poe_ch); +} + static int hns_roce_alloc_dfx_cnt(struct hns_roce_dev *hr_dev) { hr_dev->dfx_cnt = kcalloc(HNS_ROCE_DFX_CNT_TOTAL, sizeof(atomic64_t), @@ -1511,6 +1541,7 @@ int hns_roce_init(struct hns_roce_dev *hr_dev) if (ret) goto error_failed_register_device;
+ hns_roce_register_poe_ch(hr_dev); hns_roce_register_debugfs(hr_dev);
return 0; @@ -1548,6 +1579,7 @@ void hns_roce_exit(struct hns_roce_dev *hr_dev, bool bond_cleanup) hns_roce_unregister_sysfs(hr_dev); hns_roce_unregister_device(hr_dev, bond_cleanup); hns_roce_unregister_debugfs(hr_dev); + hns_roce_unregister_poe_ch(hr_dev);
if (hr_dev->hw->hw_exit) hr_dev->hw->hw_exit(hr_dev); diff --git a/drivers/infiniband/hw/hns/hns_roce_poe.c b/drivers/infiniband/hw/hns/hns_roce_poe.c new file mode 100644 index 00000000..4de51eb --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_poe.c @@ -0,0 +1,97 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2023 Hisilicon Limited. All rights reserved. + */ + +#include <rdma/ib_user_verbs.h> +#include <rdma/ib_verbs.h> +#include <rdma/uverbs_types.h> +#include <rdma/uverbs_ioctl.h> +#include <rdma/ib_umem.h> +#include "hns_roce_device.h" + +static int hns_roce_config_poe_ch(struct hns_roce_dev *hr_dev, u32 index, + u64 poe_addr) +{ + int ret; + + if (!hr_dev->hw->cfg_poe_ch) { + ibdev_err_ratelimited(&hr_dev->ib_dev, + "configure POE channel has not been supported in this device.\n"); + return -EOPNOTSUPP; + } + + ret = hr_dev->hw->cfg_poe_ch(hr_dev, index, poe_addr); + if (ret) + ibdev_err_ratelimited(&hr_dev->ib_dev, + "configure POE channel %u failed, ret = %d.\n", + index, ret); + + return ret; +} + +static bool check_poe_in_use(struct hns_roce_poe_ch *poe_ch) +{ + return poe_ch->en && refcount_read(&poe_ch->ref_cnt) > 1; +} + +static void update_poe_ch(struct hns_roce_poe_ch *poe_ch, u64 poe_addr) +{ + if (poe_addr) { + if (poe_addr != poe_ch->addr) + refcount_set(&poe_ch->ref_cnt, 1); + } else { + refcount_set(&poe_ch->ref_cnt, 0); + } + poe_ch->en = !!poe_addr; + poe_ch->addr = poe_addr; +} + +int hns_roce_register_poe_channel(struct hns_roce_dev *hr_dev, u8 channel, + u64 poe_addr) +{ + struct hns_roce_poe_ch *poe_ch; + int ret; + + if (!poe_is_supported(hr_dev)) + return -EOPNOTSUPP; + + if (channel >= hr_dev->poe_ctx.poe_num || !poe_addr) + return -EINVAL; + + poe_ch = &hr_dev->poe_ctx.poe_ch[channel]; + if (check_poe_in_use(poe_ch)) + return -EBUSY; + + ret = hns_roce_config_poe_ch(hr_dev, channel, poe_addr); + if (ret) + return ret; + + update_poe_ch(poe_ch, poe_addr); + + return ret; +} + +int hns_roce_unregister_poe_channel(struct hns_roce_dev *hr_dev, u8 channel) +{ + struct hns_roce_poe_ch *poe_ch; + int ret; + + if (!poe_is_supported(hr_dev)) + return -EOPNOTSUPP; + + if (channel >= hr_dev->poe_ctx.poe_num) + return -EINVAL; + + poe_ch = &hr_dev->poe_ctx.poe_ch[channel]; + if (check_poe_in_use(poe_ch)) + return -EBUSY; + + ret = hns_roce_config_poe_ch(hr_dev, channel, 0); + if (ret) + return ret; + + update_poe_ch(poe_ch, 0); + + return ret; +} diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h index 9d18a69..82eabfc 100644 --- a/include/uapi/rdma/hns-abi.h +++ b/include/uapi/rdma/hns-abi.h @@ -36,15 +36,23 @@
#include <linux/types.h>
+enum hns_roce_create_cq_create_flags { + HNS_ROCE_CREATE_CQ_FLAGS_POE_MODE = 1 << 0, +}; + struct hns_roce_ib_create_cq { __aligned_u64 buf_addr; __aligned_u64 db_addr; __u32 cqe_size; __u32 reserved; + __aligned_u64 create_flags; /* Use enum hns_roce_create_cq_create_flags */ + __u8 poe_channel; + __u8 rsv[7]; };
enum hns_roce_cq_cap_flags { HNS_ROCE_CQ_FLAG_RECORD_DB = 1 << 0, + HNS_ROCE_CQ_FLAG_POE_EN = 1 << 2, };
struct hns_roce_ib_create_cq_resp {
STARS is a HW scheduler. QP in STARS mode will be taken over by STARS's HW.
This patch supports configuring STARS mode QP. At the same time, a kernel API rdma_query_qp_db() is provided for querying the QP DB address taken over by STARS.
Signed-off-by: Chengchang Tang tangchengchang@huawei.com --- drivers/infiniband/hw/hns/hns_roce_device.h | 2 ++ drivers/infiniband/hw/hns/hns_roce_ext.c | 33 +++++++++++++++++++++ drivers/infiniband/hw/hns/hns_roce_ext.h | 13 ++++++++ drivers/infiniband/hw/hns/hns_roce_qp.c | 46 +++++++++++++++++++++++++++-- include/uapi/rdma/hns-abi.h | 11 +++++-- 5 files changed, 100 insertions(+), 5 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index ef8bef9..7b07f8c5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -104,6 +104,8 @@ #define CQ_BANKID_SHIFT 2 #define CQ_BANKID_MASK GENMASK(1, 0)
+#define HNS_ROCE_MEM_BAR 2 + enum { SERV_TYPE_RC, SERV_TYPE_UC, diff --git a/drivers/infiniband/hw/hns/hns_roce_ext.c b/drivers/infiniband/hw/hns/hns_roce_ext.c index 9aa17de..0a89ba2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ext.c +++ b/drivers/infiniband/hw/hns/hns_roce_ext.c @@ -14,6 +14,25 @@ static bool is_hns_roce(struct ib_device *ib_dev) return false; }
+static bool is_hns_roce_vf(struct hns_roce_dev *hr_dev) +{ + return hr_dev->is_vf; +} + +bool rdma_support_stars(struct ib_device *ib_dev) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev); + + if (!is_hns_roce(ib_dev) || is_hns_roce_vf(hr_dev)) + return false; + + if (poe_is_supported(hr_dev)) + return true; + + return false; +} +EXPORT_SYMBOL(rdma_support_stars); + int rdma_register_poe_channel(struct ib_device *ib_dev, u8 channel, u64 poe_addr) { @@ -37,3 +56,17 @@ int rdma_unregister_poe_channel(struct ib_device *ib_dev, u8 channel) } EXPORT_SYMBOL(rdma_unregister_poe_channel);
+u64 rdma_query_qp_db(struct ib_device *ib_dev, int qp_index) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev); + u64 bar_addr; + + if (!rdma_support_stars(ib_dev)) + return 0; + + bar_addr = pci_resource_start(hr_dev->pci_dev, HNS_ROCE_MEM_BAR); + return bar_addr + hr_dev->sdb_offset + + DB_REG_OFFSET * hr_dev->priv_uar.index; +} +EXPORT_SYMBOL(rdma_query_qp_db); + diff --git a/drivers/infiniband/hw/hns/hns_roce_ext.h b/drivers/infiniband/hw/hns/hns_roce_ext.h index 7d71465..f9402b9 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ext.h +++ b/drivers/infiniband/hw/hns/hns_roce_ext.h @@ -19,4 +19,17 @@ int rdma_register_poe_channel(struct ib_device *ib_dev, u8 channel, u64 poe_addr); int rdma_unregister_poe_channel(struct ib_device *ib_dev, u8 channel);
+/** + * rdma_support_stars - Helper function to determine whether the + * current device supports STARS. + */ +bool rdma_support_stars(struct ib_device *ib_dev); + +/** + * rdma_query_qp_db - Helper function to get the doorbell address of this + * device. Currently, it only supports use in STARS scenarios. + * @qp_index - QP number. + */ +u64 rdma_query_qp_db(struct ib_device *ib_dev, int qp_index); + #endif diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 77de664..fcb5e95 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -1133,6 +1133,45 @@ static void set_congest_param(struct hns_roce_dev *hr_dev, default_congest_type(hr_dev, hr_qp); }
+static bool check_cq_poe_en(struct ib_cq *ib_cq) +{ + struct hns_roce_cq *hr_cq = ib_cq ? to_hr_cq(ib_cq) : NULL; + + return hr_cq && hr_cq->flags & HNS_ROCE_CQ_FLAG_POE_EN; +} + +static int set_uqp_create_flag_param(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp, + struct ib_qp_init_attr *init_attr, + struct hns_roce_ib_create_qp *ucmd) +{ + struct ib_device *ibdev = &hr_dev->ib_dev; + + if (check_cq_poe_en(init_attr->recv_cq) || + check_cq_poe_en(init_attr->send_cq)) { + if (!(ucmd->create_flags & + HNS_ROCE_CREATE_QP_FLAGS_STARS_MODE)) { + ibdev_err(ibdev, + "POE CQ only support STARS QP.\n"); + return -EINVAL; + } + } + + if (!(ucmd->comp_mask & HNS_ROCE_CREATE_QP_MASK_CREATE_FLAGS)) + return 0; + + if (ucmd->create_flags & HNS_ROCE_CREATE_QP_FLAGS_STARS_MODE) { + if (!check_cq_poe_en(init_attr->send_cq)) { + ibdev_err(ibdev, + "STARS QP SQ should be bound with POE CQ.\n"); + return -EINVAL; + } + + hr_qp->en_flags |= HNS_ROCE_QP_CAP_STARS_SQ_MODE; + } + return 0; +} + static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct ib_qp_init_attr *init_attr, struct ib_udata *udata, @@ -1173,10 +1212,13 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, ibucontext); hr_qp->config = uctx->config; ret = set_user_sq_size(hr_dev, &init_attr->cap, hr_qp, ucmd); - if (ret) ibdev_err(ibdev, "Failed to set user SQ size, ret = %d\n", ret); + + ret = set_uqp_create_flag_param(hr_dev, hr_qp, init_attr, ucmd); + if (ret) + return ret; set_congest_param(hr_dev, hr_qp, ucmd); } else { if (init_attr->create_flags & @@ -1209,7 +1251,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, { struct hns_roce_ib_create_qp_resp resp = {}; struct ib_device *ibdev = &hr_dev->ib_dev; - struct hns_roce_ib_create_qp ucmd; + struct hns_roce_ib_create_qp ucmd = {}; int ret;
mutex_init(&hr_qp->mutex); diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h index 82eabfc..eeee57f 100644 --- a/include/uapi/rdma/hns-abi.h +++ b/include/uapi/rdma/hns-abi.h @@ -86,6 +86,10 @@ enum hns_roce_create_qp_comp_mask { HNS_ROCE_CREATE_QP_MASK_CONGEST_TYPE = 1 << 1, };
+enum hns_roce_create_qp_flags { + HNS_ROCE_CREATE_QP_FLAGS_STARS_MODE = 1 << 0, +}; + enum hns_roce_congest_type_flags { HNS_ROCE_CREATE_QP_FLAGS_DCQCN = 1 << 0, HNS_ROCE_CREATE_QP_FLAGS_LDCP = 1 << 1, @@ -102,8 +106,8 @@ struct hns_roce_ib_create_qp { __u8 reserved[4]; __u8 pageshift; __aligned_u64 sdb_addr; - __aligned_u64 comp_mask; - __aligned_u64 create_flags; + __aligned_u64 comp_mask; /* Use enum hns_roce_create_qp_comp_mask */ + __aligned_u64 create_flags; /* Use enum hns_roce_create_qp_flags */ __aligned_u64 congest_type_flags; };
@@ -115,10 +119,11 @@ enum hns_roce_qp_cap_flags { HNS_ROCE_QP_CAP_DYNAMIC_CTX_ATTACH = 1 << 4, HNS_ROCE_QP_CAP_DIRECT_WQE = 1 << 5, HNS_ROCE_QP_CAP_DYNAMIC_CTX_DETACH = 1 << 6, + HNS_ROCE_QP_CAP_STARS_SQ_MODE = 1 << 7, };
struct hns_roce_ib_create_qp_resp { - __aligned_u64 cap_flags; + __aligned_u64 cap_flags; /* Use enum hns_roce_qp_cap_flags */ __aligned_u64 dwqe_mmap_key; };
Add API rdma_query_hw_id() to enable kernel-mode applications to query the HW ID of the current device.
Signed-off-by: Chengchang Tang tangchengchang@huawei.com --- drivers/infiniband/hw/hns/hns_roce_ext.c | 21 +++++++++++++++++++++ drivers/infiniband/hw/hns/hns_roce_ext.h | 8 ++++++++ 2 files changed, 29 insertions(+)
diff --git a/drivers/infiniband/hw/hns/hns_roce_ext.c b/drivers/infiniband/hw/hns/hns_roce_ext.c index 0a89ba2..826f34e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ext.c +++ b/drivers/infiniband/hw/hns/hns_roce_ext.c @@ -70,3 +70,24 @@ u64 rdma_query_qp_db(struct ib_device *ib_dev, int qp_index) } EXPORT_SYMBOL(rdma_query_qp_db);
+int rdma_query_hw_id(struct ib_device *ib_dev, u32 *chip_id, + u32 *die_id, u32 *func_id) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev); + + if (!is_hns_roce(ib_dev) || is_hns_roce_vf(hr_dev)) + return -EOPNOTSUPP; + + if (!chip_id || !die_id || !func_id) + return -EINVAL; + + if (hr_dev->chip_id == HNS_IB_INVALID_ID) + return -EINVAL; + + *chip_id = hr_dev->chip_id; + *die_id = hr_dev->die_id; + *func_id = hr_dev->func_id; + return 0; +} +EXPORT_SYMBOL(rdma_query_hw_id); + diff --git a/drivers/infiniband/hw/hns/hns_roce_ext.h b/drivers/infiniband/hw/hns/hns_roce_ext.h index f9402b9..d80fdfc 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ext.h +++ b/drivers/infiniband/hw/hns/hns_roce_ext.h @@ -32,4 +32,12 @@ bool rdma_support_stars(struct ib_device *ib_dev); */ u64 rdma_query_qp_db(struct ib_device *ib_dev, int qp_index);
+/** + * rdma_query_hw_id - Get the relevant hardware ID of the current device. + * @chip_id - The ID of the chip where the current device is located. + * @die_id - The ID of the IO DIE where the current device is located. + * @func_id - The function ID of this device. + */ +int rdma_query_hw_id(struct ib_device *ib_dev, u32 *chip_id, + u32 *die_id, u32 *func_id); #endif
This patch adds support for the write with notify operation. Including support for configuring cq to enable write with notify, and qp to enable write with notify.
At the same time, a pair of APIs are added in the kernel side, rdma_register_notify_addr()/rdma_unregister_notify_addr().
Users need to use this API to register the notify address.
Signed-off-by: Chengchang Tang tangchengchang@huawei.com --- drivers/infiniband/hw/hns/hns_roce_common.h | 3 ++ drivers/infiniband/hw/hns/hns_roce_cq.c | 60 ++++++++++++++++++++++++++++- drivers/infiniband/hw/hns/hns_roce_device.h | 30 +++++++++++++++ drivers/infiniband/hw/hns/hns_roce_ext.c | 52 ++++++++++++++++++++++++- drivers/infiniband/hw/hns/hns_roce_ext.h | 23 +++++++++++ drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 55 ++++++++++++++++++++++++-- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 25 ++++++++++++ drivers/infiniband/hw/hns/hns_roce_main.c | 2 + drivers/infiniband/hw/hns/hns_roce_qp.c | 16 ++++++++ include/uapi/rdma/hns-abi.h | 7 +++- 10 files changed, 265 insertions(+), 8 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h index 465d1f9..c2470b5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_common.h +++ b/drivers/infiniband/hw/hns/hns_roce_common.h @@ -42,6 +42,9 @@ #define roce_get_field(origin, mask, shift) \ ((le32_to_cpu(origin) & (mask)) >> (u32)(shift))
+#define roce_get_field64(origin, mask, shift) \ + ((le64_to_cpu(origin) & (mask)) >> (u32)(shift)) + #define roce_get_bit(origin, shift) \ roce_get_field((origin), (1ul << (shift)), (shift))
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 3bc029c..1858b07 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -332,6 +332,58 @@ static int set_poe_param(struct hns_roce_dev *hr_dev, return 0; }
+static bool is_notify_support(struct hns_roce_dev *hr_dev, + enum hns_roce_notify_mode notify_mode, + enum hns_roce_notify_device_en device_en) +{ + if (!is_write_notify_supported(hr_dev)) + return false; + + /* some configuration is not supported in HIP10 */ + if (hr_dev->pci_dev->revision != PCI_REVISION_ID_HIP10) + return true; + + if (notify_mode == HNS_ROCE_NOTIFY_MODE_64B_ALIGN || + device_en == HNS_ROCE_NOTIFY_DDR) { + ibdev_err(&hr_dev->ib_dev, "Unsupported notify_mode.\n"); + return false; + } + + return true; +} + +static int set_write_notify_param(struct hns_roce_dev *hr_dev, + struct hns_roce_cq *hr_cq, + struct hns_roce_ib_create_cq *ucmd) +{ +#define NOTIFY_MODE_MASK 0x3 + const struct { + u8 mode; + u8 mem_type; + } notify_attr[] = { + {HNS_ROCE_NOTIFY_MODE_64B_ALIGN, HNS_ROCE_NOTIFY_DEV}, + {HNS_ROCE_NOTIFY_MODE_4B_ALIGN, HNS_ROCE_NOTIFY_DEV}, + {HNS_ROCE_NOTIFY_MODE_64B_ALIGN, HNS_ROCE_NOTIFY_DDR}, + {HNS_ROCE_NOTIFY_MODE_4B_ALIGN, HNS_ROCE_NOTIFY_DDR}, + }; + u8 attr = ucmd->notify_mode & NOTIFY_MODE_MASK; + + if (!(ucmd->create_flags & HNS_ROCE_CREATE_CQ_FLAGS_WRITE_WITH_NOTIFY)) + return 0; + + if (!is_notify_support(hr_dev, notify_attr[attr].mode, + notify_attr[attr].mem_type)) + return -EOPNOTSUPP; + + hr_cq->flags |= HNS_ROCE_CQ_FLAG_NOTIFY_EN; + hr_cq->write_notify.notify_addr = + hr_dev->notify_tbl[ucmd->notify_idx].base_addr; + hr_cq->write_notify.notify_mode = notify_attr[attr].mode; + hr_cq->write_notify.notify_device_en = notify_attr[attr].mem_type; + + return 0; +} + static int set_cq_param(struct hns_roce_cq *hr_cq, u32 cq_entries, int vector, struct hns_roce_ib_create_cq *ucmd) { @@ -348,14 +400,18 @@ static int set_cq_param(struct hns_roce_cq *hr_cq, u32 cq_entries, int vector, INIT_LIST_HEAD(&hr_cq->sq_list); INIT_LIST_HEAD(&hr_cq->rq_list);
- if (!(ucmd->create_flags)) + if (!ucmd->create_flags) return 0;
+ if ((ucmd->create_flags & HNS_ROCE_CREATE_CQ_FLAGS_POE_MODE) && + (ucmd->create_flags & HNS_ROCE_CREATE_CQ_FLAGS_WRITE_WITH_NOTIFY)) + return -EINVAL; + ret = set_poe_param(hr_dev, hr_cq, ucmd); if (ret) return ret;
- return 0; + return set_write_notify_param(hr_dev, hr_cq, ucmd); }
static int set_cqe_size(struct hns_roce_cq *hr_cq, struct ib_udata *udata, diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 7b07f8c5..f808985 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -37,9 +37,11 @@ #include <rdma/ib_verbs.h> #include <rdma/hns-abi.h> #include "hns_roce_bond.h" +#include "hns_roce_ext.h"
#define PCI_REVISION_ID_HIP08 0x21 #define PCI_REVISION_ID_HIP09 0x30 +#define PCI_REVISION_ID_HIP10 0x32
#define HNS_ROCE_MAX_MSG_LEN 0x80000000
@@ -104,6 +106,8 @@ #define CQ_BANKID_SHIFT 2 #define CQ_BANKID_MASK GENMASK(1, 0)
+#define MAX_NOTIFY_MEM_SIZE BIT(24) + #define HNS_ROCE_MEM_BAR 2
enum { @@ -162,6 +166,7 @@ enum { HNS_ROCE_CAP_FLAG_SVE_DIRECT_WQE = BIT(13), HNS_ROCE_CAP_FLAG_SDI_MODE = BIT(14), HNS_ROCE_CAP_FLAG_DCA_MODE = BIT(15), + HNS_ROCE_CAP_FLAG_WRITE_NOTIFY = BIT(16), HNS_ROCE_CAP_FLAG_STASH = BIT(17), HNS_ROCE_CAP_FLAG_CQE_INLINE = BIT(19), HNS_ROCE_CAP_FLAG_BOND = BIT(21), @@ -488,6 +493,22 @@ struct hns_roce_db { unsigned long order; };
+enum hns_roce_notify_mode { + HNS_ROCE_NOTIFY_MODE_64B_ALIGN = 0, + HNS_ROCE_NOTIFY_MODE_4B_ALIGN = 1, +}; + +enum hns_roce_notify_device_en { + HNS_ROCE_NOTIFY_DEV = 0, + HNS_ROCE_NOTIFY_DDR = 1, +}; + +struct hns_roce_notify_conf { + u64 notify_addr; /* should be aligned to 4k */ + u8 notify_mode; /* use enum hns_roce_notify_mode */ + u8 notify_device_en; /* use enum hns_roce_notify_device_en */ +}; + struct hns_roce_cq { struct ib_cq ib_cq; struct hns_roce_mtr mtr; @@ -509,6 +530,7 @@ struct hns_roce_cq { int is_armed; /* cq is armed */ struct list_head node; /* all armed cqs are on a list */ u8 poe_channel; + struct hns_roce_notify_conf write_notify; };
struct hns_roce_idx_que { @@ -1153,6 +1175,9 @@ struct hns_roce_dev { struct hns_roce_port port_data[HNS_ROCE_MAX_PORTS]; atomic64_t *dfx_cnt; struct hns_roce_poe_ctx poe_ctx; /* poe ch array */ + + struct rdma_notify_mem *notify_tbl; + size_t notify_num; };
static inline struct hns_roce_dev *to_hr_dev(struct ib_device *ib_dev) @@ -1315,6 +1340,11 @@ static inline bool poe_is_supported(struct hns_roce_dev *hr_dev) return !!(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_POE); }
+static inline bool is_write_notify_supported(struct hns_roce_dev *dev) +{ + return !!(dev->caps.flags & HNS_ROCE_CAP_FLAG_WRITE_NOTIFY); +} + void hns_roce_init_uar_table(struct hns_roce_dev *dev); int hns_roce_uar_alloc(struct hns_roce_dev *dev, struct hns_roce_uar *uar);
diff --git a/drivers/infiniband/hw/hns/hns_roce_ext.c b/drivers/infiniband/hw/hns/hns_roce_ext.c index 826f34e..63d0a48 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ext.c +++ b/drivers/infiniband/hw/hns/hns_roce_ext.c @@ -26,7 +26,7 @@ bool rdma_support_stars(struct ib_device *ib_dev) if (!is_hns_roce(ib_dev) || is_hns_roce_vf(hr_dev)) return false;
- if (poe_is_supported(hr_dev)) + if (poe_is_supported(hr_dev) && is_write_notify_supported(hr_dev)) return true;
return false; @@ -91,3 +91,53 @@ int rdma_query_hw_id(struct ib_device *ib_dev, u32 *chip_id, } EXPORT_SYMBOL(rdma_query_hw_id);
+int rdma_register_notify_addr(struct ib_device *ib_dev, + size_t num, struct rdma_notify_mem *notify_mem) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev); + size_t i; + + if (!is_hns_roce(ib_dev) || !is_write_notify_supported(hr_dev)) + return -EOPNOTSUPP; + + if (hr_dev->notify_tbl) + return -EBUSY; + + if (!num || !notify_mem) + return -EINVAL; + + for (i = 0; i < num; i++) { + if (!notify_mem[i].size || + notify_mem[i].size > MAX_NOTIFY_MEM_SIZE) + return -EINVAL; + if (!notify_mem[i].base_addr) + return -EINVAL; + } + + hr_dev->notify_tbl = kvmalloc_array(num, sizeof(*notify_mem), + GFP_KERNEL); + if (!hr_dev->notify_tbl) + return -ENOMEM; + + hr_dev->notify_num = num; + memcpy(hr_dev->notify_tbl, notify_mem, sizeof(*notify_mem) * num); + + return 0; +} +EXPORT_SYMBOL(rdma_register_notify_addr); + +int rdma_unregister_notify_addr(struct ib_device *ib_dev) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev); + + if (!is_hns_roce(ib_dev) || !is_write_notify_supported(hr_dev)) + return -EOPNOTSUPP; + + if (hr_dev->notify_tbl) + kvfree(hr_dev->notify_tbl); + + hr_dev->notify_tbl = NULL; + + return 0; +} +EXPORT_SYMBOL(rdma_unregister_notify_addr); diff --git a/drivers/infiniband/hw/hns/hns_roce_ext.h b/drivers/infiniband/hw/hns/hns_roce_ext.h index d80fdfc..11a2d3c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ext.h +++ b/drivers/infiniband/hw/hns/hns_roce_ext.h @@ -40,4 +40,27 @@ u64 rdma_query_qp_db(struct ib_device *ib_dev, int qp_index); */ int rdma_query_hw_id(struct ib_device *ib_dev, u32 *chip_id, u32 *die_id, u32 *func_id); +/** + * struct rdma_notify_mem + * @base_addr - The memory region base addr for write with notify operation. + * @size - size of the notify memory region + */ +struct rdma_notify_mem { + u64 base_addr; + u32 size; +}; + +/** + * rdma_register_notify_addr - Register an memory region which will be used by + * write with notify operation. + * @num - How many elements in array + * @notify_mem - Notify memory array. + * + * If notify_mem has already been registered, re-registration + * will not be allowed. + */ +int rdma_register_notify_addr(struct ib_device *ib_dev, + size_t num, struct rdma_notify_mem *notify_mem); +int rdma_unregister_notify_addr(struct ib_device *ib_dev); + #endif diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 3d35d7c..45988eb 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -3850,6 +3850,44 @@ static void hns_roce_v2_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn, spin_unlock_irq(&hr_cq->lock); }
+static void enable_write_notify(struct hns_roce_cq *hr_cq, + struct hns_roce_v2_cq_context *cq_context) +{ + hr_reg_enable(cq_context, CQC_NOTIFY_EN); + hr_reg_write(cq_context, CQC_NOTIFY_DEVICE_EN, + hr_cq->write_notify.notify_device_en); + hr_reg_write(cq_context, CQC_NOTIFY_MODE, + hr_cq->write_notify.notify_mode); + hr_reg_write(cq_context, CQC_NOTIFY_ADDR_0, + (u32)roce_get_field64(hr_cq->write_notify.notify_addr, + CQC_NOTIFY_ADDR_0_M, + CQC_NOTIFY_ADDR_0_S)); + hr_reg_write(cq_context, CQC_NOTIFY_ADDR_1, + (u32)roce_get_field64(hr_cq->write_notify.notify_addr, + CQC_NOTIFY_ADDR_1_M, + CQC_NOTIFY_ADDR_1_S)); + hr_reg_write(cq_context, CQC_NOTIFY_ADDR_2, + (u32)roce_get_field64(hr_cq->write_notify.notify_addr, + CQC_NOTIFY_ADDR_2_M, + CQC_NOTIFY_ADDR_2_S)); + hr_reg_write(cq_context, CQC_NOTIFY_ADDR_3, + (u32)roce_get_field64(hr_cq->write_notify.notify_addr, + CQC_NOTIFY_ADDR_3_M, + CQC_NOTIFY_ADDR_3_S)); + hr_reg_write(cq_context, CQC_NOTIFY_ADDR_4, + (u32)roce_get_field64(hr_cq->write_notify.notify_addr, + CQC_NOTIFY_ADDR_4_M, + CQC_NOTIFY_ADDR_4_S)); + hr_reg_write(cq_context, CQC_NOTIFY_ADDR_5, + (u32)roce_get_field64(hr_cq->write_notify.notify_addr, + CQC_NOTIFY_ADDR_5_M, + CQC_NOTIFY_ADDR_5_S)); + hr_reg_write(cq_context, CQC_NOTIFY_ADDR_6, + (u32)roce_get_field64(hr_cq->write_notify.notify_addr, + CQC_NOTIFY_ADDR_6_M, + CQC_NOTIFY_ADDR_6_S)); +} + static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, void *mb_buf, u64 *mtts, dma_addr_t dma_handle) @@ -3870,12 +3908,14 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev, hr_reg_write(cq_context, CQC_POE_NUM, hr_cq->poe_channel); }
+ if (hr_cq->flags & HNS_ROCE_CQ_FLAG_NOTIFY_EN) + enable_write_notify(hr_cq, cq_context); + else if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_STASH) + hr_reg_enable(cq_context, CQC_STASH); + if (hr_cq->cqe_size == HNS_ROCE_V3_CQE_SIZE) hr_reg_write(cq_context, CQC_CQE_SIZE, CQE_SIZE_64B);
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_STASH) - hr_reg_enable(cq_context, CQC_STASH); - hr_reg_write(cq_context, CQC_CQE_CUR_BLK_ADDR_L, to_hr_hw_page_addr(mtts[0])); hr_reg_write(cq_context, CQC_CQE_CUR_BLK_ADDR_H, @@ -4541,6 +4581,12 @@ static void set_access_flags(struct hns_roce_qp *hr_qp, hr_reg_write_bool(context, QPC_EXT_ATE, access_flags & IB_ACCESS_REMOTE_ATOMIC); hr_reg_clear(qpc_mask, QPC_EXT_ATE); + + if ((hr_qp->en_flags & HNS_ROCE_QP_CAP_WRITE_WITH_NOTIFY) && + (access_flags & IB_ACCESS_REMOTE_WRITE)) { + hr_reg_enable(context, QPC_WN_EN); + hr_reg_clear(qpc_mask, QPC_WN_EN); + } }
static void set_qpc_wqe_cnt(struct hns_roce_qp *hr_qp, @@ -4623,7 +4669,8 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp, if (hr_dev->caps.qpc_sz < HNS_ROCE_V3_QPC_SZ) return;
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_STASH) + if (!(hr_qp->en_flags & HNS_ROCE_QP_CAP_WRITE_WITH_NOTIFY) && + (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_STASH)) hr_reg_enable(&context->ext, QPCEX_STASH); }
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index d3b0fa1..6fa920b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -291,32 +291,54 @@ struct hns_roce_v2_cq_context { #define HNS_ROCE_V2_CQ_DEFAULT_BURST_NUM 0x0 #define HNS_ROCE_V2_CQ_DEFAULT_INTERVAL 0x0
+#define CQC_NOTIFY_ADDR_0_S 12 +#define CQC_NOTIFY_ADDR_0_M GENMASK(19, 12) +#define CQC_NOTIFY_ADDR_1_S 20 +#define CQC_NOTIFY_ADDR_1_M GENMASK(29, 20) +#define CQC_NOTIFY_ADDR_2_S 30 +#define CQC_NOTIFY_ADDR_2_M GENMASK(33, 30) +#define CQC_NOTIFY_ADDR_3_S 34 +#define CQC_NOTIFY_ADDR_3_M GENMASK(41, 34) +#define CQC_NOTIFY_ADDR_4_S 42 +#define CQC_NOTIFY_ADDR_4_M GENMASK(49, 42) +#define CQC_NOTIFY_ADDR_5_S 50 +#define CQC_NOTIFY_ADDR_5_M GENMASK(57, 50) +#define CQC_NOTIFY_ADDR_6_S 58 +#define CQC_NOTIFY_ADDR_6_M GENMASK(63, 58) + #define CQC_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_cq_context, h, l)
#define CQC_CQ_ST CQC_FIELD_LOC(1, 0) #define CQC_POLL CQC_FIELD_LOC(2, 2) #define CQC_SE CQC_FIELD_LOC(3, 3) #define CQC_OVER_IGNORE CQC_FIELD_LOC(4, 4) +#define CQC_NOTIFY_MODE CQC_FIELD_LOC(4, 4) #define CQC_ARM_ST CQC_FIELD_LOC(7, 6) #define CQC_SHIFT CQC_FIELD_LOC(12, 8) #define CQC_CMD_SN CQC_FIELD_LOC(14, 13) #define CQC_CEQN CQC_FIELD_LOC(23, 15) +#define CQC_NOTIFY_ADDR_0 CQC_FIELD_LOC(31, 24) #define CQC_CQN CQC_FIELD_LOC(55, 32) #define CQC_POE_EN CQC_FIELD_LOC(56, 56) #define CQC_POE_NUM CQC_FIELD_LOC(58, 57) #define CQC_CQE_SIZE CQC_FIELD_LOC(60, 59) #define CQC_CQ_CNT_MODE CQC_FIELD_LOC(61, 61) +#define CQC_NOTIFY_DEVICE_EN CQC_FIELD_LOC(62, 62) #define CQC_STASH CQC_FIELD_LOC(63, 63) #define CQC_CQE_CUR_BLK_ADDR_L CQC_FIELD_LOC(95, 64) #define CQC_CQE_CUR_BLK_ADDR_H CQC_FIELD_LOC(115, 96) #define CQC_POE_QID CQC_FIELD_LOC(125, 116) +#define CQC_NOTIFY_ADDR_1 CQC_FIELD_LOC(125, 116) #define CQC_CQE_HOP_NUM CQC_FIELD_LOC(127, 126) #define CQC_CQE_NEX_BLK_ADDR_L CQC_FIELD_LOC(159, 128) #define CQC_CQE_NEX_BLK_ADDR_H CQC_FIELD_LOC(179, 160) +#define CQC_NOTIFY_ADDR_2 CQC_FIELD_LOC(183, 180) #define CQC_CQE_BAR_PG_SZ CQC_FIELD_LOC(187, 184) #define CQC_CQE_BUF_PG_SZ CQC_FIELD_LOC(191, 188) #define CQC_CQ_PRODUCER_IDX CQC_FIELD_LOC(215, 192) +#define CQC_NOTIFY_ADDR_3 CQC_FIELD_LOC(223, 216) #define CQC_CQ_CONSUMER_IDX CQC_FIELD_LOC(247, 224) +#define CQC_NOTIFY_ADDR_4 CQC_FIELD_LOC(255, 248) #define CQC_CQE_BA_L CQC_FIELD_LOC(287, 256) #define CQC_CQE_BA_H CQC_FIELD_LOC(316, 288) #define CQC_POE_QID_H_0 CQC_FIELD_LOC(319, 317) @@ -324,11 +346,14 @@ struct hns_roce_v2_cq_context { #define CQC_CQE_DB_RECORD_ADDR_L CQC_FIELD_LOC(351, 321) #define CQC_CQE_DB_RECORD_ADDR_H CQC_FIELD_LOC(383, 352) #define CQC_CQE_CNT CQC_FIELD_LOC(407, 384) +#define CQC_NOTIFY_ADDR_5 CQC_FIELD_LOC(415, 408) #define CQC_CQ_MAX_CNT CQC_FIELD_LOC(431, 416) #define CQC_CQ_PERIOD CQC_FIELD_LOC(447, 432) #define CQC_CQE_REPORT_TIMER CQC_FIELD_LOC(471, 448) #define CQC_WR_CQE_IDX CQC_FIELD_LOC(479, 472) #define CQC_SE_CQE_IDX CQC_FIELD_LOC(503, 480) +#define CQC_NOTIFY_ADDR_6 CQC_FIELD_LOC(509, 504) +#define CQC_NOTIFY_EN CQC_FIELD_LOC(510, 510) #define CQC_POE_QID_H_1 CQC_FIELD_LOC(511, 511)
struct hns_roce_srq_context { diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index af9d054..239e08a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -1594,6 +1594,8 @@ void hns_roce_exit(struct hns_roce_dev *hr_dev, bool bond_cleanup) if (hr_dev->hw->cmq_exit) hr_dev->hw->cmq_exit(hr_dev); hns_roce_dealloc_dfx_cnt(hr_dev); + if (hr_dev->notify_tbl) + kvfree(hr_dev->notify_tbl); }
MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index fcb5e95..287523f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -1133,6 +1133,20 @@ static void set_congest_param(struct hns_roce_dev *hr_dev, default_congest_type(hr_dev, hr_qp); }
+static void set_qp_notify_param(struct hns_roce_qp *hr_qp, + struct ib_cq *ib_cq) +{ + struct hns_roce_cq *hr_cq = ib_cq ? to_hr_cq(ib_cq) : NULL; + + /* + * Always enable write with notify for XRC TGT since no flag + * could be passed to kernel for this type of QP + */ + if ((hr_cq && hr_cq->flags & HNS_ROCE_CQ_FLAG_NOTIFY_EN) || + (hr_qp->ibqp.qp_type == IB_QPT_XRC_TGT)) + hr_qp->en_flags |= HNS_ROCE_QP_CAP_WRITE_WITH_NOTIFY; +} + static bool check_cq_poe_en(struct ib_cq *ib_cq) { struct hns_roce_cq *hr_cq = ib_cq ? to_hr_cq(ib_cq) : NULL; @@ -1199,6 +1213,8 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, if (init_attr->qp_type == IB_QPT_XRC_TGT) default_congest_type(hr_dev, hr_qp);
+ set_qp_notify_param(hr_qp, init_attr->recv_cq); + if (udata) { ret = ib_copy_from_udata(ucmd, udata, min(udata->inlen, sizeof(*ucmd))); diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h index eeee57f..7223135 100644 --- a/include/uapi/rdma/hns-abi.h +++ b/include/uapi/rdma/hns-abi.h @@ -38,6 +38,7 @@
enum hns_roce_create_cq_create_flags { HNS_ROCE_CREATE_CQ_FLAGS_POE_MODE = 1 << 0, + HNS_ROCE_CREATE_CQ_FLAGS_WRITE_WITH_NOTIFY = 1 << 1, };
struct hns_roce_ib_create_cq { @@ -47,12 +48,15 @@ struct hns_roce_ib_create_cq { __u32 reserved; __aligned_u64 create_flags; /* Use enum hns_roce_create_cq_create_flags */ __u8 poe_channel; - __u8 rsv[7]; + __u8 notify_mode; + __u16 notify_idx; + __u16 rsv[2]; };
enum hns_roce_cq_cap_flags { HNS_ROCE_CQ_FLAG_RECORD_DB = 1 << 0, HNS_ROCE_CQ_FLAG_POE_EN = 1 << 2, + HNS_ROCE_CQ_FLAG_NOTIFY_EN = 1 << 3, };
struct hns_roce_ib_create_cq_resp { @@ -120,6 +124,7 @@ enum hns_roce_qp_cap_flags { HNS_ROCE_QP_CAP_DIRECT_WQE = 1 << 5, HNS_ROCE_QP_CAP_DYNAMIC_CTX_DETACH = 1 << 6, HNS_ROCE_QP_CAP_STARS_SQ_MODE = 1 << 7, + HNS_ROCE_QP_CAP_WRITE_WITH_NOTIFY = 1 << 8, };
struct hns_roce_ib_create_qp_resp {