[PATCH OLK-5.10 0/7] Some bug fix patches for OLK-5.10 hns RoCE

From: Xinghai Cen <cenxinghai@h-partners.com> Some bug fix patches for OLK-5.10 hns RoCE: Junxian Huang (5): RDMA/hns: Change mtr member to pointer in hns QP/CQ/MR/SRQ/EQ struct RDMA/hns: Move mtr_node into the mtr struct RDMA/hns: Fix delayed destruction of db not taking effect RDMA/hns: Fix delay-destruction mechanism not processing kernel db RDMA/hns: Fix soft lockup by adding cond_resched() to bt pages loop Xinghai Cen (1): RDMA/hns: Fix unmatched kmalloc and kvfree Yuyu Li (1): RDMA/hns: Fix ifnullfree static warning drivers/infiniband/hw/hns/hns_roce_cq.c | 35 ++--- drivers/infiniband/hw/hns/hns_roce_db.c | 86 +++++++---- drivers/infiniband/hw/hns/hns_roce_dca.c | 6 +- drivers/infiniband/hw/hns/hns_roce_device.h | 69 ++++----- drivers/infiniband/hw/hns/hns_roce_ext.c | 3 +- drivers/infiniband/hw/hns/hns_roce_hem.c | 17 ++- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 82 +++++----- drivers/infiniband/hw/hns/hns_roce_main.c | 14 +- drivers/infiniband/hw/hns/hns_roce_mr.c | 144 ++++++++---------- drivers/infiniband/hw/hns/hns_roce_qp.c | 35 ++--- drivers/infiniband/hw/hns/hns_roce_restrack.c | 4 +- drivers/infiniband/hw/hns/hns_roce_srq.c | 63 +++----- 12 files changed, 271 insertions(+), 287 deletions(-) -- 2.33.0

driver inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBQK42 ---------------------------------------------------------------------- Change mtr member to pointer in hns QP/CQ/MR/SRQ/EQ struct to decouple the life cycle of mtr from these structs. This is the preparation for the following refactoring. No functional changes. Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com> Signed-off-by: wenglianfa <wenglianfa@huawei.com> Signed-off-by: Xinghai Cen <cenxinghai@h-partners.com> --- drivers/infiniband/hw/hns/hns_roce_cq.c | 19 ++-- drivers/infiniband/hw/hns/hns_roce_dca.c | 6 +- drivers/infiniband/hw/hns/hns_roce_device.h | 21 ++--- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 87 ++++++++++--------- drivers/infiniband/hw/hns/hns_roce_mr.c | 62 ++++++++----- drivers/infiniband/hw/hns/hns_roce_qp.c | 17 ++-- drivers/infiniband/hw/hns/hns_roce_restrack.c | 4 +- drivers/infiniband/hw/hns/hns_roce_srq.c | 32 +++---- 8 files changed, 134 insertions(+), 114 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index b4fd2227cae9..b0696585b4b6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -190,7 +190,7 @@ static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) u64 mtts[MTT_MIN_COUNT] = {}; int ret; - ret = hns_roce_mtr_find(hr_dev, &hr_cq->mtr, 0, mtts, ARRAY_SIZE(mtts)); + ret = hns_roce_mtr_find(hr_dev, hr_cq->mtr, 0, mtts, ARRAY_SIZE(mtts)); if (ret) { ibdev_err(ibdev, "failed to find CQ mtr, ret = %d.\n", ret); return ret; @@ -211,7 +211,7 @@ static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) } ret = hns_roce_create_cqc(hr_dev, hr_cq, mtts, - hns_roce_get_mtr_ba(&hr_cq->mtr)); + hns_roce_get_mtr_ba(hr_cq->mtr)); if (ret) goto err_xa; @@ -262,7 +262,7 @@ static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, { struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_buf_attr buf_attr = {}; - int ret; + int ret = 0; hr_cq->mtr_node = kvmalloc(sizeof(*hr_cq->mtr_node), GFP_KERNEL); if (!hr_cq->mtr_node) @@ -273,10 +273,11 @@ static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, buf_attr.region[0].hopnum = hr_dev->caps.cqe_hop_num; buf_attr.region_count = 1; - ret = hns_roce_mtr_create(hr_dev, &hr_cq->mtr, &buf_attr, - hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT, - udata, addr); - if (ret) { + hr_cq->mtr = hns_roce_mtr_create(hr_dev, &buf_attr, + hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT, + udata, addr); + if (IS_ERR(hr_cq->mtr)) { + ret = PTR_ERR(hr_cq->mtr); ibdev_err(ibdev, "Failed to alloc CQ mtr, ret = %d\n", ret); kvfree(hr_cq->mtr_node); hr_cq->mtr_node = NULL; @@ -288,9 +289,9 @@ static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, static void free_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) { if (hr_cq->delayed_destroy_flag) { - hns_roce_add_unfree_mtr(hr_cq->mtr_node, hr_dev, &hr_cq->mtr); + hns_roce_add_unfree_mtr(hr_cq->mtr_node, hr_dev, hr_cq->mtr); } else { - hns_roce_mtr_destroy(hr_dev, &hr_cq->mtr); + hns_roce_mtr_destroy(hr_dev, hr_cq->mtr); kvfree(hr_cq->mtr_node); hr_cq->mtr_node = NULL; } diff --git a/drivers/infiniband/hw/hns/hns_roce_dca.c b/drivers/infiniband/hw/hns/hns_roce_dca.c index 65ed7666a826..dcf560513205 100644 --- a/drivers/infiniband/hw/hns/hns_roce_dca.c +++ b/drivers/infiniband/hw/hns/hns_roce_dca.c @@ -324,7 +324,7 @@ int hns_roce_map_dca_safe_page(struct hns_roce_dev *hr_dev, for (i = 0; i < page_count; i++) pages[i] = hr_dev->dca_safe_page; - ret = hns_roce_mtr_map(hr_dev, &hr_qp->mtr, pages, page_count); + ret = hns_roce_mtr_map(hr_dev, hr_qp->mtr, pages, page_count); if (ret) ibdev_err(ibdev, "failed to map safe page for DCA, ret = %d.\n", ret); @@ -338,7 +338,7 @@ static int config_dca_qpc(struct hns_roce_dev *hr_dev, int page_count) { struct ib_device *ibdev = &hr_dev->ib_dev; - struct hns_roce_mtr *mtr = &hr_qp->mtr; + struct hns_roce_mtr *mtr = hr_qp->mtr; int ret; ret = hns_roce_mtr_map(hr_dev, mtr, pages, page_count); @@ -698,7 +698,7 @@ static u32 alloc_buf_from_dca_mem(struct hns_roce_qp *hr_qp, buf_id = HNS_DCA_TO_BUF_ID(hr_qp->qpn, hr_qp->dca_cfg.attach_count); /* Assign pages from free pages */ - unit_pages = hr_qp->mtr.hem_cfg.is_direct ? buf_pages : 1; + unit_pages = hr_qp->mtr->hem_cfg.is_direct ? buf_pages : 1; alloc_pages = assign_dca_pages(ctx, buf_id, buf_pages, unit_pages); if (buf_pages != alloc_pages) { if (alloc_pages > 0) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 7d30b8380af6..defd9bb72fb8 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -426,7 +426,7 @@ struct hns_roce_mr { int enabled; /* MR's active status */ int type; /* MR's register type */ u32 pbl_hop_num; /* multi-hop number */ - struct hns_roce_mtr pbl_mtr; + struct hns_roce_mtr *pbl_mtr; u32 npages; dma_addr_t *page_list; bool delayed_destroy_flag; @@ -540,7 +540,7 @@ struct hns_roce_notify_conf { struct hns_roce_cq { struct ib_cq ib_cq; - struct hns_roce_mtr mtr; + struct hns_roce_mtr *mtr; struct hns_roce_db db; u32 flags; spinlock_t lock; @@ -565,7 +565,7 @@ struct hns_roce_cq { }; struct hns_roce_idx_que { - struct hns_roce_mtr mtr; + struct hns_roce_mtr *mtr; u32 entry_shift; unsigned long *bitmap; u32 head; @@ -587,7 +587,7 @@ struct hns_roce_srq { refcount_t refcount; struct completion free; - struct hns_roce_mtr buf_mtr; + struct hns_roce_mtr *buf_mtr; u64 *wrid; struct hns_roce_idx_que idx_que; @@ -733,7 +733,7 @@ struct hns_roce_qp { enum ib_sig_type sq_signal_bits; struct hns_roce_wq sq; - struct hns_roce_mtr mtr; + struct hns_roce_mtr *mtr; struct hns_roce_dca_cfg dca_cfg; u32 buff_size; @@ -834,7 +834,7 @@ struct hns_roce_eq { int coalesce; int arm_st; int hop_num; - struct hns_roce_mtr mtr; + struct hns_roce_mtr *mtr; u16 eq_max_cnt; u32 eq_period; int shift; @@ -1423,10 +1423,11 @@ static inline dma_addr_t hns_roce_get_mtr_ba(struct hns_roce_mtr *mtr) } int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, u32 offset, u64 *mtt_buf, int mtt_max); -int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, - struct hns_roce_buf_attr *buf_attr, - unsigned int page_shift, struct ib_udata *udata, - unsigned long user_addr); +struct hns_roce_mtr *hns_roce_mtr_create(struct hns_roce_dev *hr_dev, + struct hns_roce_buf_attr *buf_attr, + unsigned int ba_page_shift, + struct ib_udata *udata, + unsigned long user_addr); void hns_roce_mtr_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr); int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 5a7539f8c14d..e00807308850 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -152,7 +152,7 @@ static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, hr_reg_write_bool(fseg, FRMR_LW, wr->access & IB_ACCESS_LOCAL_WRITE); /* Data structure reuse may lead to confusion */ - pbl_ba = mr->pbl_mtr.hem_cfg.root_ba; + pbl_ba = mr->pbl_mtr->hem_cfg.root_ba; rc_sq_wqe->msg_len = cpu_to_le32(lower_32_bits(pbl_ba)); rc_sq_wqe->inv_key = cpu_to_le32(upper_32_bits(pbl_ba)); @@ -163,7 +163,7 @@ static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, hr_reg_write(fseg, FRMR_PBL_SIZE, mr->npages); hr_reg_write(fseg, FRMR_PBL_BUF_PG_SZ, - to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift)); + to_hr_hw_page_shift(mr->pbl_mtr->hem_cfg.buf_pg_shift)); hr_reg_clear(fseg, FRMR_BLK_MODE); } @@ -981,12 +981,12 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, static void *get_srq_wqe_buf(struct hns_roce_srq *srq, u32 n) { - return hns_roce_buf_offset(srq->buf_mtr.kmem, n << srq->wqe_shift); + return hns_roce_buf_offset(srq->buf_mtr->kmem, n << srq->wqe_shift); } static void *get_idx_buf(struct hns_roce_idx_que *idx_que, u32 n) { - return hns_roce_buf_offset(idx_que->mtr.kmem, + return hns_roce_buf_offset(idx_que->mtr->kmem, n << idx_que->entry_shift); } @@ -3648,7 +3648,7 @@ static int set_mtpt_pbl(struct hns_roce_dev *hr_dev, int ret; int i; - ret = hns_roce_mtr_find(hr_dev, &mr->pbl_mtr, 0, pages, + ret = hns_roce_mtr_find(hr_dev, mr->pbl_mtr, 0, pages, min_t(int, ARRAY_SIZE(pages), mr->npages)); if (ret) { ibdev_err(ibdev, "failed to find PBL mtr, ret = %d.\n", ret); @@ -3659,7 +3659,7 @@ static int set_mtpt_pbl(struct hns_roce_dev *hr_dev, for (i = 0; i < ARRAY_SIZE(pages); i++) pages[i] >>= MPT_PBL_BUF_ADDR_S; - pbl_ba = hns_roce_get_mtr_ba(&mr->pbl_mtr); + pbl_ba = hns_roce_get_mtr_ba(mr->pbl_mtr); mpt_entry->pbl_size = cpu_to_le32(mr->npages); mpt_entry->pbl_ba_l = cpu_to_le32(pbl_ba >> MPT_PBL_BA_ADDR_S); @@ -3672,7 +3672,7 @@ static int set_mtpt_pbl(struct hns_roce_dev *hr_dev, mpt_entry->pa1_l = cpu_to_le32(lower_32_bits(pages[1])); hr_reg_write(mpt_entry, MPT_PA1_H, upper_32_bits(pages[1])); hr_reg_write(mpt_entry, MPT_PBL_BUF_PG_SZ, - to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift)); + to_hr_hw_page_shift(mr->pbl_mtr->hem_cfg.buf_pg_shift)); return 0; } @@ -3715,7 +3715,7 @@ static int hns_roce_v2_write_mtpt(struct hns_roce_dev *hr_dev, hr_reg_write(mpt_entry, MPT_PBL_HOP_NUM, mr->pbl_hop_num); hr_reg_write(mpt_entry, MPT_PBL_BA_PG_SZ, - to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.ba_pg_shift)); + to_hr_hw_page_shift(mr->pbl_mtr->hem_cfg.ba_pg_shift)); hr_reg_enable(mpt_entry, MPT_INNER_PA_VLD); return set_mtpt_pbl(hr_dev, mpt_entry, mr); @@ -3759,7 +3759,7 @@ static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev, static int hns_roce_v2_frmr_write_mtpt(void *mb_buf, struct hns_roce_mr *mr) { - dma_addr_t pbl_ba = mr->pbl_mtr.hem_cfg.root_ba; + dma_addr_t pbl_ba = mr->pbl_mtr->hem_cfg.root_ba; struct hns_roce_v2_mpt_entry *mpt_entry; mpt_entry = mb_buf; @@ -3778,9 +3778,9 @@ static int hns_roce_v2_frmr_write_mtpt(void *mb_buf, struct hns_roce_mr *mr) hr_reg_write(mpt_entry, MPT_PBL_HOP_NUM, 1); hr_reg_write(mpt_entry, MPT_PBL_BA_PG_SZ, - to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.ba_pg_shift)); + to_hr_hw_page_shift(mr->pbl_mtr->hem_cfg.ba_pg_shift)); hr_reg_write(mpt_entry, MPT_PBL_BUF_PG_SZ, - to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift)); + to_hr_hw_page_shift(mr->pbl_mtr->hem_cfg.buf_pg_shift)); mpt_entry->pbl_size = cpu_to_le32(mr->npages); @@ -3919,7 +3919,7 @@ static void hns_roce_v2_dereg_mr(struct hns_roce_dev *hr_dev) static void *get_cqe_v2(struct hns_roce_cq *hr_cq, int n) { - return hns_roce_buf_offset(hr_cq->mtr.kmem, n * hr_cq->cqe_size); + return hns_roce_buf_offset(hr_cq->mtr->kmem, n * hr_cq->cqe_size); } static void *get_sw_cqe_v2(struct hns_roce_cq *hr_cq, unsigned int n) @@ -4078,9 +4078,9 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev, hr_reg_write(cq_context, CQC_CQE_NEX_BLK_ADDR_H, upper_32_bits(to_hr_hw_page_addr(mtts[1]))); hr_reg_write(cq_context, CQC_CQE_BAR_PG_SZ, - to_hr_hw_page_shift(hr_cq->mtr.hem_cfg.ba_pg_shift)); + to_hr_hw_page_shift(hr_cq->mtr->hem_cfg.ba_pg_shift)); hr_reg_write(cq_context, CQC_CQE_BUF_PG_SZ, - to_hr_hw_page_shift(hr_cq->mtr.hem_cfg.buf_pg_shift)); + to_hr_hw_page_shift(hr_cq->mtr->hem_cfg.buf_pg_shift)); hr_reg_write(cq_context, CQC_CQE_BA_L, dma_handle >> CQC_CQE_BA_L_S); hr_reg_write(cq_context, CQC_CQE_BA_H, dma_handle >> CQC_CQE_BA_H_S); hr_reg_write_bool(cq_context, CQC_DB_RECORD_EN, @@ -4957,7 +4957,7 @@ static int config_qp_rq_buf(struct hns_roce_dev *hr_dev, int ret; /* Search qp buf's mtts */ - ret = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, hr_qp->rq.wqe_offset, + ret = hns_roce_mtr_find(hr_dev, hr_qp->mtr, hr_qp->rq.wqe_offset, mtts, ARRAY_SIZE(mtts)); if (hr_qp->rq.wqe_cnt && ret) { ibdev_err(&hr_dev->ib_dev, @@ -4965,7 +4965,7 @@ static int config_qp_rq_buf(struct hns_roce_dev *hr_dev, return -EINVAL; } - wqe_sge_ba = hns_roce_get_mtr_ba(&hr_qp->mtr); + wqe_sge_ba = hns_roce_get_mtr_ba(hr_qp->mtr); context->wqe_sge_ba = cpu_to_le32(wqe_sge_ba >> 3); qpc_mask->wqe_sge_ba = 0; @@ -4996,11 +4996,11 @@ static int config_qp_rq_buf(struct hns_roce_dev *hr_dev, hr_reg_clear(qpc_mask, QPC_RQ_HOP_NUM); hr_reg_write(context, QPC_WQE_SGE_BA_PG_SZ, - to_hr_hw_page_shift(hr_qp->mtr.hem_cfg.ba_pg_shift)); + to_hr_hw_page_shift(hr_qp->mtr->hem_cfg.ba_pg_shift)); hr_reg_clear(qpc_mask, QPC_WQE_SGE_BA_PG_SZ); hr_reg_write(context, QPC_WQE_SGE_BUF_PG_SZ, - to_hr_hw_page_shift(hr_qp->mtr.hem_cfg.buf_pg_shift)); + to_hr_hw_page_shift(hr_qp->mtr->hem_cfg.buf_pg_shift)); hr_reg_clear(qpc_mask, QPC_WQE_SGE_BUF_PG_SZ); context->rq_cur_blk_addr = cpu_to_le32(to_hr_hw_page_addr(mtts[0])); @@ -5034,17 +5034,16 @@ static int config_qp_sq_buf(struct hns_roce_dev *hr_dev, int ret; /* search qp buf's mtts */ - ret = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, hr_qp->sq.wqe_offset, - &sq_cur_blk, 1); + ret = hns_roce_mtr_find(hr_dev, hr_qp->mtr, hr_qp->sq.wqe_offset, + &sq_cur_blk, 1); if (ret) { ibdev_err(ibdev, "failed to find QP(0x%lx) SQ buf, ret = %d.\n", hr_qp->qpn, ret); return ret; } if (hr_qp->sge.sge_cnt > 0) { - ret = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, - hr_qp->sge.wqe_offset, &sge_cur_blk, - 1); + ret = hns_roce_mtr_find(hr_dev, hr_qp->mtr, + hr_qp->sge.wqe_offset, &sge_cur_blk, 1); if (ret) { ibdev_err(ibdev, "failed to find QP(0x%lx) SGE buf, ret = %d.\n", hr_qp->qpn, ret); @@ -6416,7 +6415,7 @@ static int hns_roce_v2_write_srqc_index_queue(struct hns_roce_srq *srq, int ret; /* Get physical address of idx que buf */ - ret = hns_roce_mtr_find(hr_dev, &idx_que->mtr, 0, mtts_idx, + ret = hns_roce_mtr_find(hr_dev, idx_que->mtr, 0, mtts_idx, ARRAY_SIZE(mtts_idx)); if (ret) { ibdev_err(ibdev, "failed to find mtr for SRQ idx, ret = %d.\n", @@ -6424,7 +6423,7 @@ static int hns_roce_v2_write_srqc_index_queue(struct hns_roce_srq *srq, return ret; } - dma_handle_idx = hns_roce_get_mtr_ba(&idx_que->mtr); + dma_handle_idx = hns_roce_get_mtr_ba(idx_que->mtr); hr_reg_write(ctx, SRQC_IDX_HOP_NUM, to_hr_hem_hopnum(hr_dev->caps.idx_hop_num, srq->wqe_cnt)); @@ -6434,9 +6433,9 @@ static int hns_roce_v2_write_srqc_index_queue(struct hns_roce_srq *srq, upper_32_bits(dma_handle_idx >> DMA_IDX_SHIFT)); hr_reg_write(ctx, SRQC_IDX_BA_PG_SZ, - to_hr_hw_page_shift(idx_que->mtr.hem_cfg.ba_pg_shift)); + to_hr_hw_page_shift(idx_que->mtr->hem_cfg.ba_pg_shift)); hr_reg_write(ctx, SRQC_IDX_BUF_PG_SZ, - to_hr_hw_page_shift(idx_que->mtr.hem_cfg.buf_pg_shift)); + to_hr_hw_page_shift(idx_que->mtr->hem_cfg.buf_pg_shift)); hr_reg_write(ctx, SRQC_IDX_CUR_BLK_ADDR_L, to_hr_hw_page_addr(mtts_idx[0])); @@ -6463,7 +6462,7 @@ static int hns_roce_v2_write_srqc(struct hns_roce_srq *srq, void *mb_buf) memset(ctx, 0, sizeof(*ctx)); /* Get the physical address of srq buf */ - ret = hns_roce_mtr_find(hr_dev, &srq->buf_mtr, 0, mtts_wqe, + ret = hns_roce_mtr_find(hr_dev, srq->buf_mtr, 0, mtts_wqe, ARRAY_SIZE(mtts_wqe)); if (ret) { ibdev_err(ibdev, "failed to find mtr for SRQ WQE, ret = %d.\n", @@ -6471,7 +6470,7 @@ static int hns_roce_v2_write_srqc(struct hns_roce_srq *srq, void *mb_buf) return ret; } - dma_handle_wqe = hns_roce_get_mtr_ba(&srq->buf_mtr); + dma_handle_wqe = hns_roce_get_mtr_ba(srq->buf_mtr); hr_reg_write(ctx, SRQC_SRQ_ST, 1); hr_reg_write_bool(ctx, SRQC_SRQ_TYPE, @@ -6493,9 +6492,9 @@ static int hns_roce_v2_write_srqc(struct hns_roce_srq *srq, void *mb_buf) upper_32_bits(dma_handle_wqe >> DMA_WQE_SHIFT)); hr_reg_write(ctx, SRQC_WQE_BA_PG_SZ, - to_hr_hw_page_shift(srq->buf_mtr.hem_cfg.ba_pg_shift)); + to_hr_hw_page_shift(srq->buf_mtr->hem_cfg.ba_pg_shift)); hr_reg_write(ctx, SRQC_WQE_BUF_PG_SZ, - to_hr_hw_page_shift(srq->buf_mtr.hem_cfg.buf_pg_shift)); + to_hr_hw_page_shift(srq->buf_mtr->hem_cfg.buf_pg_shift)); if (srq->cap_flags & HNS_ROCE_SRQ_CAP_RECORD_DB) { hr_reg_enable(ctx, SRQC_DB_RECORD_EN); @@ -6848,7 +6847,7 @@ static struct hns_roce_aeqe *next_aeqe_sw_v2(struct hns_roce_eq *eq) { struct hns_roce_aeqe *aeqe; - aeqe = hns_roce_buf_offset(eq->mtr.kmem, + aeqe = hns_roce_buf_offset(eq->mtr->kmem, (eq->cons_index & (eq->entries - 1)) * eq->eqe_size); @@ -6915,7 +6914,7 @@ static struct hns_roce_ceqe *next_ceqe_sw_v2(struct hns_roce_eq *eq) { struct hns_roce_ceqe *ceqe; - ceqe = hns_roce_buf_offset(eq->mtr.kmem, + ceqe = hns_roce_buf_offset(eq->mtr->kmem, (eq->cons_index & (eq->entries - 1)) * eq->eqe_size); @@ -7155,7 +7154,7 @@ static void hns_roce_v2_int_mask_enable(struct hns_roce_dev *hr_dev, static void free_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq) { - hns_roce_mtr_destroy(hr_dev, &eq->mtr); + hns_roce_mtr_destroy(hr_dev, eq->mtr); } static void hns_roce_v2_destroy_eqc(struct hns_roce_dev *hr_dev, @@ -7202,14 +7201,14 @@ static int config_eqc(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq, init_eq_config(hr_dev, eq); /* if not multi-hop, eqe buffer only use one trunk */ - ret = hns_roce_mtr_find(hr_dev, &eq->mtr, 0, eqe_ba, + ret = hns_roce_mtr_find(hr_dev, eq->mtr, 0, eqe_ba, ARRAY_SIZE(eqe_ba)); if (ret) { dev_err(hr_dev->dev, "failed to find EQE mtr, ret = %d\n", ret); return ret; } - bt_ba = hns_roce_get_mtr_ba(&eq->mtr); + bt_ba = hns_roce_get_mtr_ba(eq->mtr); hr_reg_write(eqc, EQC_EQ_ST, HNS_ROCE_V2_EQ_STATE_VALID); hr_reg_write(eqc, EQC_EQE_HOP_NUM, eq->hop_num); @@ -7219,9 +7218,9 @@ static int config_eqc(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq, hr_reg_write(eqc, EQC_EQN, eq->eqn); hr_reg_write(eqc, EQC_EQE_CNT, HNS_ROCE_EQ_INIT_EQE_CNT); hr_reg_write(eqc, EQC_EQE_BA_PG_SZ, - to_hr_hw_page_shift(eq->mtr.hem_cfg.ba_pg_shift)); + to_hr_hw_page_shift(eq->mtr->hem_cfg.ba_pg_shift)); hr_reg_write(eqc, EQC_EQE_BUF_PG_SZ, - to_hr_hw_page_shift(eq->mtr.hem_cfg.buf_pg_shift)); + to_hr_hw_page_shift(eq->mtr->hem_cfg.buf_pg_shift)); hr_reg_write(eqc, EQC_EQ_PROD_INDX, HNS_ROCE_EQ_INIT_PROD_IDX); hr_reg_write(eqc, EQC_EQ_MAX_CNT, eq->eq_max_cnt); @@ -7254,7 +7253,7 @@ static int config_eqc(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq, static int alloc_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq) { struct hns_roce_buf_attr buf_attr = {}; - int err; + int err = 0; if (hr_dev->caps.eqe_hop_num == HNS_ROCE_HOP_NUM_0) eq->hop_num = 0; @@ -7266,11 +7265,13 @@ static int alloc_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq) buf_attr.region[0].hopnum = eq->hop_num; buf_attr.region_count = 1; - err = hns_roce_mtr_create(hr_dev, &eq->mtr, &buf_attr, - hr_dev->caps.eqe_ba_pg_sz + PAGE_SHIFT, NULL, - 0); - if (err) + eq->mtr = hns_roce_mtr_create(hr_dev, &buf_attr, + hr_dev->caps.eqe_ba_pg_sz + PAGE_SHIFT, + NULL, 0); + if (IS_ERR(eq->mtr)) { + err = PTR_ERR(eq->mtr); dev_err(hr_dev->dev, "Failed to alloc EQE mtr, err %d\n", err); + } return err; } diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 17f0a83eaad4..d28c555ec37f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -97,7 +97,7 @@ static int alloc_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, struct ib_device *ibdev = &hr_dev->ib_dev; bool is_fast = mr->type == MR_TYPE_FRMR; struct hns_roce_buf_attr buf_attr = {}; - int err; + int err = 0; mr->mtr_node = kvmalloc(sizeof(*mr->mtr_node), GFP_KERNEL); if (!mr->mtr_node) @@ -116,17 +116,17 @@ static int alloc_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, /* pagesize and hopnum is fixed for fast MR */ buf_attr.adaptive = !is_fast; - err = hns_roce_mtr_create(hr_dev, &mr->pbl_mtr, &buf_attr, - hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT, - udata, start); - if (err) { + mr->pbl_mtr = hns_roce_mtr_create(hr_dev, &buf_attr, + hr_dev->caps.pbl_ba_pg_sz + HNS_HW_PAGE_SHIFT, udata, start); + if (IS_ERR(mr->pbl_mtr)) { + err = PTR_ERR(mr->pbl_mtr); ibdev_err(ibdev, "failed to alloc pbl mtr, ret = %d.\n", err); kvfree(mr->mtr_node); mr->mtr_node = NULL; return err; } - mr->npages = mr->pbl_mtr.hem_cfg.buf_pg_count; + mr->npages = mr->pbl_mtr->hem_cfg.buf_pg_count; mr->pbl_hop_num = buf_attr.region[0].hopnum; return err; @@ -135,9 +135,9 @@ static int alloc_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, static void free_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr) { if (mr->delayed_destroy_flag && mr->type != MR_TYPE_DMA) { - hns_roce_add_unfree_mtr(mr->mtr_node, hr_dev, &mr->pbl_mtr); + hns_roce_add_unfree_mtr(mr->mtr_node, hr_dev, mr->pbl_mtr); } else { - hns_roce_mtr_destroy(hr_dev, &mr->pbl_mtr); + hns_roce_mtr_destroy(hr_dev, mr->pbl_mtr); kvfree(mr->mtr_node); mr->mtr_node = NULL; } @@ -213,18 +213,22 @@ struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc) { struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); struct hns_roce_mr *mr; - int ret; + int ret = -ENOMEM; mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (mr == NULL) return ERR_PTR(-ENOMEM); + mr->pbl_mtr = kvzalloc(sizeof(*mr->pbl_mtr), GFP_KERNEL); + if (!mr->pbl_mtr) + goto err_mtr; + mr->type = MR_TYPE_DMA; mr->pd = to_hr_pd(pd)->pdn; mr->access = acc; /* Allocate memory region key */ - hns_roce_hem_list_init(&mr->pbl_mtr.hem_list); + hns_roce_hem_list_init(&mr->pbl_mtr->hem_list); ret = alloc_mr_key(hr_dev, mr); if (ret) goto err_free; @@ -240,6 +244,8 @@ struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc) free_mr_key(hr_dev, mr); err_free: + kvfree(mr->pbl_mtr); +err_mtr: kfree(mr); return ERR_PTR(ret); } @@ -437,7 +443,7 @@ static int hns_roce_set_page(struct ib_mr *ibmr, u64 addr) { struct hns_roce_mr *mr = to_hr_mr(ibmr); - if (likely(mr->npages < mr->pbl_mtr.hem_cfg.buf_pg_count)) { + if (likely(mr->npages < mr->pbl_mtr->hem_cfg.buf_pg_count)) { mr->page_list[mr->npages++] = addr; return 0; } @@ -452,7 +458,7 @@ int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device); struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_mr *mr = to_hr_mr(ibmr); - struct hns_roce_mtr *mtr = &mr->pbl_mtr; + struct hns_roce_mtr *mtr = mr->pbl_mtr; int ret, sg_num = 0; if (!IS_ALIGNED(sg_offset, HNS_ROCE_FRMR_ALIGN_SIZE) || @@ -461,7 +467,7 @@ int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, return sg_num; mr->npages = 0; - mr->page_list = kvcalloc(mr->pbl_mtr.hem_cfg.buf_pg_count, + mr->page_list = kvcalloc(mr->pbl_mtr->hem_cfg.buf_pg_count, sizeof(dma_addr_t), GFP_KERNEL); if (!mr->page_list) return sg_num; @@ -469,7 +475,7 @@ int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, sg_num = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset_p, hns_roce_set_page); if (sg_num < 1) { ibdev_err(ibdev, "failed to store sg pages %u %u, cnt = %d.\n", - mr->npages, mr->pbl_mtr.hem_cfg.buf_pg_count, sg_num); + mr->npages, mr->pbl_mtr->hem_cfg.buf_pg_count, sg_num); goto err_page_list; } @@ -482,7 +488,7 @@ int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, ibdev_err(ibdev, "failed to map sg mtr, ret = %d.\n", ret); sg_num = 0; } else { - mr->pbl_mtr.hem_cfg.buf_pg_shift = (u32)ilog2(ibmr->page_size); + mr->pbl_mtr->hem_cfg.buf_pg_shift = (u32)ilog2(ibmr->page_size); } err_page_list: @@ -1189,26 +1195,31 @@ static void mtr_free_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr) * hns_roce_mtr_create - Create hns memory translate region. * * @hr_dev: RoCE device struct pointer - * @mtr: memory translate region * @buf_attr: buffer attribute for creating mtr * @ba_page_shift: page shift for multi-hop base address table * @udata: user space context, if it's NULL, means kernel space * @user_addr: userspace virtual address to start at */ -int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, - struct hns_roce_buf_attr *buf_attr, - unsigned int ba_page_shift, struct ib_udata *udata, - unsigned long user_addr) +struct hns_roce_mtr *hns_roce_mtr_create(struct hns_roce_dev *hr_dev, + struct hns_roce_buf_attr *buf_attr, + unsigned int ba_page_shift, + struct ib_udata *udata, + unsigned long user_addr) { struct ib_device *ibdev = &hr_dev->ib_dev; + struct hns_roce_mtr *mtr; int ret; + mtr = kvzalloc(sizeof(*mtr), GFP_KERNEL); + if (!mtr) + return ERR_PTR(-ENOMEM); + if (!buf_attr->mtt_only) { ret = mtr_alloc_bufs(hr_dev, mtr, buf_attr, udata, user_addr); if (ret) { ibdev_err(ibdev, "failed to alloc mtr bufs, ret = %d.\n", ret); - return ret; + goto err_out; } ret = get_best_page_shift(hr_dev, mtr, buf_attr); @@ -1236,7 +1247,7 @@ int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, if (buf_attr->mtt_only) { mtr->umem = NULL; mtr->kmem = NULL; - return 0; + return mtr; } /* Write buffer's dma address to MTT */ @@ -1245,13 +1256,15 @@ int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, ibdev_err(ibdev, "failed to map mtr bufs, ret = %d.\n", ret); goto err_alloc_mtt; } - return 0; + return mtr; err_alloc_mtt: mtr_free_mtt(hr_dev, mtr); err_init_buf: mtr_free_bufs(hr_dev, mtr); - return ret; +err_out: + kvfree(mtr); + return ERR_PTR(ret); } void hns_roce_mtr_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr) @@ -1261,6 +1274,7 @@ void hns_roce_mtr_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr) /* free buffers */ mtr_free_bufs(hr_dev, mtr); + kvfree(mtr); } static void hns_roce_copy_mtr(struct hns_roce_mtr *new_mtr, struct hns_roce_mtr *old_mtr) diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index fc123b16797b..deeac1d50c0e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -836,7 +836,7 @@ static int alloc_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct ib_udata *udata, unsigned long addr) { struct ib_device *ibdev = &hr_dev->ib_dev; - int ret; + int ret = 0; hr_qp->mtr_node = kvmalloc(sizeof(*hr_qp->mtr_node), GFP_KERNEL); if (!hr_qp->mtr_node) @@ -866,10 +866,11 @@ static int alloc_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, hr_qp->en_flags |= HNS_ROCE_QP_CAP_SVE_DIRECT_WQE; } - ret = hns_roce_mtr_create(hr_dev, &hr_qp->mtr, buf_attr, - PAGE_SHIFT + hr_dev->caps.mtt_ba_pg_sz, - udata, addr); - if (ret) { + hr_qp->mtr = hns_roce_mtr_create(hr_dev, buf_attr, + PAGE_SHIFT + hr_dev->caps.mtt_ba_pg_sz, + udata, addr); + if (IS_ERR(hr_qp->mtr)) { + ret = PTR_ERR(hr_qp->mtr); ibdev_err(ibdev, "failed to create WQE mtr, ret = %d.\n", ret); if (dca_en) hns_roce_disable_dca(hr_dev, hr_qp, udata); @@ -886,9 +887,9 @@ static void free_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct ib_udata *udata) { if (hr_qp->delayed_destroy_flag) { - hns_roce_add_unfree_mtr(hr_qp->mtr_node, hr_dev, &hr_qp->mtr); + hns_roce_add_unfree_mtr(hr_qp->mtr_node, hr_dev, hr_qp->mtr); } else { - hns_roce_mtr_destroy(hr_dev, &hr_qp->mtr); + hns_roce_mtr_destroy(hr_dev, hr_qp->mtr); kvfree(hr_qp->mtr_node); hr_qp->mtr_node = NULL; } @@ -1795,7 +1796,7 @@ static inline void *get_wqe(struct hns_roce_qp *hr_qp, u32 offset) if (unlikely(hr_qp->dca_cfg.buf_list)) return dca_buf_offset(&hr_qp->dca_cfg, offset); else - return hns_roce_buf_offset(hr_qp->mtr.kmem, offset); + return hns_roce_buf_offset(hr_qp->mtr->kmem, offset); } void *hns_roce_get_recv_wqe(struct hns_roce_qp *hr_qp, unsigned int n) diff --git a/drivers/infiniband/hw/hns/hns_roce_restrack.c b/drivers/infiniband/hw/hns/hns_roce_restrack.c index 9b8b40998b2e..ec9ecb8a4a8b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_restrack.c +++ b/drivers/infiniband/hw/hns/hns_roce_restrack.c @@ -145,11 +145,11 @@ int hns_roce_fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ib_mr) goto err; if (rdma_nl_put_driver_u32_hex(msg, "ba_pg_shift", - hr_mr->pbl_mtr.hem_cfg.ba_pg_shift)) + hr_mr->pbl_mtr->hem_cfg.ba_pg_shift)) goto err; if (rdma_nl_put_driver_u32_hex(msg, "buf_pg_shift", - hr_mr->pbl_mtr.hem_cfg.buf_pg_shift)) + hr_mr->pbl_mtr->hem_cfg.buf_pg_shift)) goto err; nla_nest_end(msg, table_attr); diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 454a7370a8b7..76381a96df90 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -183,7 +183,7 @@ static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, struct hns_roce_idx_que *idx_que = &srq->idx_que; struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_buf_attr buf_attr = {}; - int ret; + int ret = 0; idx_que->mtr_node = kvmalloc(sizeof(*idx_que->mtr_node), GFP_KERNEL); if (!idx_que->mtr_node) @@ -197,10 +197,11 @@ static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, buf_attr.region[0].hopnum = hr_dev->caps.idx_hop_num; buf_attr.region_count = 1; - ret = hns_roce_mtr_create(hr_dev, &idx_que->mtr, &buf_attr, - hr_dev->caps.idx_ba_pg_sz + PAGE_SHIFT, - udata, addr); - if (ret) { + idx_que->mtr = hns_roce_mtr_create(hr_dev, &buf_attr, + hr_dev->caps.idx_ba_pg_sz + PAGE_SHIFT, + udata, addr); + if (IS_ERR(idx_que->mtr)) { + ret = PTR_ERR(idx_que->mtr); ibdev_err(ibdev, "Failed to alloc SRQ idx mtr, ret = %d.\n", ret); goto err_kvmalloc; } @@ -219,7 +220,7 @@ static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, return 0; err_idx_mtr: - hns_roce_mtr_destroy(hr_dev, &idx_que->mtr); + hns_roce_mtr_destroy(hr_dev, idx_que->mtr); err_kvmalloc: kvfree(idx_que->mtr_node); idx_que->mtr_node = NULL; @@ -234,9 +235,9 @@ static void free_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) bitmap_free(idx_que->bitmap); idx_que->bitmap = NULL; if (srq->delayed_destroy_flag) { - hns_roce_add_unfree_mtr(idx_que->mtr_node, hr_dev, &idx_que->mtr); + hns_roce_add_unfree_mtr(idx_que->mtr_node, hr_dev, idx_que->mtr); } else { - hns_roce_mtr_destroy(hr_dev, &idx_que->mtr); + hns_roce_mtr_destroy(hr_dev, idx_que->mtr); kvfree(idx_que->mtr_node); idx_que->mtr_node = NULL; } @@ -248,7 +249,7 @@ static int alloc_srq_wqe_buf(struct hns_roce_dev *hr_dev, { struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_buf_attr buf_attr = {}; - int ret; + int ret = 0; srq->mtr_node = kvmalloc(sizeof(*srq->mtr_node), GFP_KERNEL); if (!srq->mtr_node) @@ -264,10 +265,11 @@ static int alloc_srq_wqe_buf(struct hns_roce_dev *hr_dev, buf_attr.region[0].hopnum = hr_dev->caps.srqwqe_hop_num; buf_attr.region_count = 1; - ret = hns_roce_mtr_create(hr_dev, &srq->buf_mtr, &buf_attr, - hr_dev->caps.srqwqe_ba_pg_sz + PAGE_SHIFT, - udata, addr); - if (ret) { + srq->buf_mtr = hns_roce_mtr_create(hr_dev, &buf_attr, + hr_dev->caps.srqwqe_ba_pg_sz + PAGE_SHIFT, + udata, addr); + if (IS_ERR(srq->buf_mtr)) { + ret = PTR_ERR(srq->buf_mtr); ibdev_err(ibdev, "failed to alloc SRQ buf mtr, ret = %d.\n", ret); kvfree(srq->mtr_node); @@ -281,9 +283,9 @@ static void free_srq_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) { if (srq->delayed_destroy_flag) { - hns_roce_add_unfree_mtr(srq->mtr_node, hr_dev, &srq->buf_mtr); + hns_roce_add_unfree_mtr(srq->mtr_node, hr_dev, srq->buf_mtr); } else { - hns_roce_mtr_destroy(hr_dev, &srq->buf_mtr); + hns_roce_mtr_destroy(hr_dev, srq->buf_mtr); kvfree(srq->mtr_node); srq->mtr_node = NULL; } -- 2.33.0

driver inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBQK42 ---------------------------------------------------------------------- Previously driver had to copy a new mtr and store it in mtr_node so that it could be found when freeing delayed-destruction resources, because the life cycle of the origin mtr was over when QP/CQ/MR/SRQ structs were freed. But since the life cycle of mtr has been decoupled, driver don't need to copy the mtr now. Move mtr_node into the mtr struct so that mtr can be found with no need to copying a new one. Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com> Signed-off-by: Xinghai Cen <cenxinghai@h-partners.com> --- drivers/infiniband/hw/hns/hns_roce_cq.c | 15 ++---- drivers/infiniband/hw/hns/hns_roce_device.h | 14 +----- drivers/infiniband/hw/hns/hns_roce_mr.c | 54 ++++----------------- drivers/infiniband/hw/hns/hns_roce_qp.c | 17 ++----- drivers/infiniband/hw/hns/hns_roce_srq.c | 33 +++---------- 5 files changed, 24 insertions(+), 109 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index b0696585b4b6..575734a83e44 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -264,10 +264,6 @@ static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, struct hns_roce_buf_attr buf_attr = {}; int ret = 0; - hr_cq->mtr_node = kvmalloc(sizeof(*hr_cq->mtr_node), GFP_KERNEL); - if (!hr_cq->mtr_node) - return -ENOMEM; - buf_attr.page_shift = hr_dev->caps.cqe_buf_pg_sz + PAGE_SHIFT; buf_attr.region[0].size = hr_cq->cq_depth * hr_cq->cqe_size; buf_attr.region[0].hopnum = hr_dev->caps.cqe_hop_num; @@ -279,8 +275,6 @@ static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, if (IS_ERR(hr_cq->mtr)) { ret = PTR_ERR(hr_cq->mtr); ibdev_err(ibdev, "Failed to alloc CQ mtr, ret = %d\n", ret); - kvfree(hr_cq->mtr_node); - hr_cq->mtr_node = NULL; } return ret; @@ -288,13 +282,10 @@ static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, static void free_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) { - if (hr_cq->delayed_destroy_flag) { - hns_roce_add_unfree_mtr(hr_cq->mtr_node, hr_dev, hr_cq->mtr); - } else { + if (hr_cq->delayed_destroy_flag) + hns_roce_add_unfree_mtr(hr_dev, hr_cq->mtr); + else hns_roce_mtr_destroy(hr_dev, hr_cq->mtr); - kvfree(hr_cq->mtr_node); - hr_cq->mtr_node = NULL; - } } static int alloc_cq_db(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index defd9bb72fb8..e29f12108279 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -391,6 +391,7 @@ struct hns_roce_mtr { struct ib_umem *umem; /* user space buffer */ struct hns_roce_buf *kmem; /* kernel space buffer */ struct hns_roce_hem_cfg hem_cfg; /* config for hardware addressing */ + struct list_head node; /* list node for delay-destruction */ }; /* DCA config */ @@ -430,7 +431,6 @@ struct hns_roce_mr { u32 npages; dma_addr_t *page_list; bool delayed_destroy_flag; - struct hns_roce_mtr_node *mtr_node; }; struct hns_roce_mr_table { @@ -561,7 +561,6 @@ struct hns_roce_cq { u8 poe_channel; bool delayed_destroy_flag; struct hns_roce_notify_conf write_notify; - struct hns_roce_mtr_node *mtr_node; }; struct hns_roce_idx_que { @@ -570,7 +569,6 @@ struct hns_roce_idx_que { unsigned long *bitmap; u32 head; u32 tail; - struct hns_roce_mtr_node *mtr_node; }; struct hns_roce_srq { @@ -597,7 +595,6 @@ struct hns_roce_srq { struct hns_roce_db rdb; u32 cap_flags; bool delayed_destroy_flag; - struct hns_roce_mtr_node *mtr_node; }; struct hns_roce_uar_table { @@ -771,7 +768,6 @@ struct hns_roce_qp { u8 tc_mode; u8 priority; bool delayed_destroy_flag; - struct hns_roce_mtr_node *mtr_node; spinlock_t flush_lock; struct hns_roce_dip *dip; }; @@ -1146,11 +1142,6 @@ struct hns_roce_port { struct hns_roce_cnp_pri_param *cnp_pri_param; }; -struct hns_roce_mtr_node { - struct hns_roce_mtr mtr; - struct list_head list; -}; - struct hns_roce_dev { struct ib_device ib_dev; struct pci_dev *pci_dev; @@ -1549,8 +1540,7 @@ int hns_roce_fill_res_mr_entry_raw(struct sk_buff *msg, struct ib_mr *ib_mr); void hns_roce_add_unfree_umem(struct hns_roce_user_db_page *user_page, struct hns_roce_dev *hr_dev); void hns_roce_free_unfree_umem(struct hns_roce_dev *hr_dev); -void hns_roce_add_unfree_mtr(struct hns_roce_mtr_node *pos, - struct hns_roce_dev *hr_dev, +void hns_roce_add_unfree_mtr(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr); void hns_roce_free_unfree_mtr(struct hns_roce_dev *hr_dev); struct hns_user_mmap_entry * diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index f612554b5bf8..6a2fe8ac46c4 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -99,10 +99,6 @@ static int alloc_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, struct hns_roce_buf_attr buf_attr = {}; int err = 0; - mr->mtr_node = kvmalloc(sizeof(*mr->mtr_node), GFP_KERNEL); - if (!mr->mtr_node) - return -ENOMEM; - mr->pbl_hop_num = is_fast ? 1 : hr_dev->caps.pbl_hop_num; buf_attr.page_shift = is_fast ? PAGE_SHIFT : hr_dev->caps.pbl_buf_pg_sz + PAGE_SHIFT; @@ -121,8 +117,6 @@ static int alloc_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, if (IS_ERR(mr->pbl_mtr)) { err = PTR_ERR(mr->pbl_mtr); ibdev_err(ibdev, "failed to alloc pbl mtr, ret = %d.\n", err); - kvfree(mr->mtr_node); - mr->mtr_node = NULL; return err; } @@ -134,13 +128,10 @@ static int alloc_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, static void free_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr) { - if (mr->delayed_destroy_flag && mr->type != MR_TYPE_DMA) { - hns_roce_add_unfree_mtr(mr->mtr_node, hr_dev, mr->pbl_mtr); - } else { + if (mr->delayed_destroy_flag && mr->type != MR_TYPE_DMA) + hns_roce_add_unfree_mtr(hr_dev, mr->pbl_mtr); + else hns_roce_mtr_destroy(hr_dev, mr->pbl_mtr); - kvfree(mr->mtr_node); - mr->mtr_node = NULL; - } } static void hns_roce_mr_free(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr) @@ -1277,49 +1268,22 @@ void hns_roce_mtr_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr) kvfree(mtr); } -static void hns_roce_copy_mtr(struct hns_roce_mtr *new_mtr, struct hns_roce_mtr *old_mtr) -{ - struct list_head *new_head, *old_head; - int i, j; - - memcpy(new_mtr, old_mtr, sizeof(*old_mtr)); - - for (i = 0; i < HNS_ROCE_MAX_BT_REGION; i++) - for (j = 0; j < HNS_ROCE_MAX_BT_LEVEL; j++) { - new_head = &new_mtr->hem_list.mid_bt[i][j]; - old_head = &old_mtr->hem_list.mid_bt[i][j]; - list_replace(old_head, new_head); - } - - new_head = &new_mtr->hem_list.root_bt; - old_head = &old_mtr->hem_list.root_bt; - list_replace(old_head, new_head); - - new_head = &new_mtr->hem_list.btm_bt; - old_head = &old_mtr->hem_list.btm_bt; - list_replace(old_head, new_head); -} - -void hns_roce_add_unfree_mtr(struct hns_roce_mtr_node *pos, - struct hns_roce_dev *hr_dev, +void hns_roce_add_unfree_mtr(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr) { - hns_roce_copy_mtr(&pos->mtr, mtr); - mutex_lock(&hr_dev->mtr_unfree_list_mutex); - list_add_tail(&pos->list, &hr_dev->mtr_unfree_list); + list_add_tail(&mtr->node, &hr_dev->mtr_unfree_list); mutex_unlock(&hr_dev->mtr_unfree_list_mutex); } void hns_roce_free_unfree_mtr(struct hns_roce_dev *hr_dev) { - struct hns_roce_mtr_node *pos, *next; + struct hns_roce_mtr *mtr, *next; mutex_lock(&hr_dev->mtr_unfree_list_mutex); - list_for_each_entry_safe(pos, next, &hr_dev->mtr_unfree_list, list) { - list_del(&pos->list); - hns_roce_mtr_destroy(hr_dev, &pos->mtr); - kvfree(pos); + list_for_each_entry_safe(mtr, next, &hr_dev->mtr_unfree_list, node) { + list_del(&mtr->node); + hns_roce_mtr_destroy(hr_dev, mtr); } mutex_unlock(&hr_dev->mtr_unfree_list_mutex); } diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index deeac1d50c0e..0710ddc77079 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -838,18 +838,12 @@ static int alloc_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct ib_device *ibdev = &hr_dev->ib_dev; int ret = 0; - hr_qp->mtr_node = kvmalloc(sizeof(*hr_qp->mtr_node), GFP_KERNEL); - if (!hr_qp->mtr_node) - return -ENOMEM; - if (dca_en) { /* DCA must be enabled after the buffer attr is configured. */ ret = hns_roce_enable_dca(hr_qp, udata); if (ret) { ibdev_err(ibdev, "failed to enable DCA, ret = %d.\n", ret); - kvfree(hr_qp->mtr_node); - hr_qp->mtr_node = NULL; return ret; } @@ -874,8 +868,6 @@ static int alloc_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, ibdev_err(ibdev, "failed to create WQE mtr, ret = %d.\n", ret); if (dca_en) hns_roce_disable_dca(hr_dev, hr_qp, udata); - kvfree(hr_qp->mtr_node); - hr_qp->mtr_node = NULL; } else if (dca_en) { ret = hns_roce_map_dca_safe_page(hr_dev, hr_qp); } @@ -886,13 +878,10 @@ static int alloc_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, static void free_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct ib_udata *udata) { - if (hr_qp->delayed_destroy_flag) { - hns_roce_add_unfree_mtr(hr_qp->mtr_node, hr_dev, hr_qp->mtr); - } else { + if (hr_qp->delayed_destroy_flag) + hns_roce_add_unfree_mtr(hr_dev, hr_qp->mtr); + else hns_roce_mtr_destroy(hr_dev, hr_qp->mtr); - kvfree(hr_qp->mtr_node); - hr_qp->mtr_node = NULL; - } if (hr_qp->en_flags & HNS_ROCE_QP_CAP_DYNAMIC_CTX_ATTACH) hns_roce_disable_dca(hr_dev, hr_qp, udata); diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 76381a96df90..68fbd5dabc3f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -185,10 +185,6 @@ static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, struct hns_roce_buf_attr buf_attr = {}; int ret = 0; - idx_que->mtr_node = kvmalloc(sizeof(*idx_que->mtr_node), GFP_KERNEL); - if (!idx_que->mtr_node) - return -ENOMEM; - srq->idx_que.entry_shift = ilog2(HNS_ROCE_IDX_QUE_ENTRY_SZ); buf_attr.page_shift = hr_dev->caps.idx_buf_pg_sz + PAGE_SHIFT; @@ -203,7 +199,7 @@ static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, if (IS_ERR(idx_que->mtr)) { ret = PTR_ERR(idx_que->mtr); ibdev_err(ibdev, "Failed to alloc SRQ idx mtr, ret = %d.\n", ret); - goto err_kvmalloc; + return ret; } if (!udata) { @@ -221,9 +217,6 @@ static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, return 0; err_idx_mtr: hns_roce_mtr_destroy(hr_dev, idx_que->mtr); -err_kvmalloc: - kvfree(idx_que->mtr_node); - idx_que->mtr_node = NULL; return ret; } @@ -234,13 +227,10 @@ static void free_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) bitmap_free(idx_que->bitmap); idx_que->bitmap = NULL; - if (srq->delayed_destroy_flag) { - hns_roce_add_unfree_mtr(idx_que->mtr_node, hr_dev, idx_que->mtr); - } else { + if (srq->delayed_destroy_flag) + hns_roce_add_unfree_mtr(hr_dev, idx_que->mtr); + else hns_roce_mtr_destroy(hr_dev, idx_que->mtr); - kvfree(idx_que->mtr_node); - idx_que->mtr_node = NULL; - } } static int alloc_srq_wqe_buf(struct hns_roce_dev *hr_dev, @@ -251,10 +241,6 @@ static int alloc_srq_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_buf_attr buf_attr = {}; int ret = 0; - srq->mtr_node = kvmalloc(sizeof(*srq->mtr_node), GFP_KERNEL); - if (!srq->mtr_node) - return -ENOMEM; - srq->wqe_shift = ilog2(roundup_pow_of_two(max(HNS_ROCE_SGE_SIZE, HNS_ROCE_SGE_SIZE * srq->max_gs))); @@ -272,8 +258,6 @@ static int alloc_srq_wqe_buf(struct hns_roce_dev *hr_dev, ret = PTR_ERR(srq->buf_mtr); ibdev_err(ibdev, "failed to alloc SRQ buf mtr, ret = %d.\n", ret); - kvfree(srq->mtr_node); - srq->mtr_node = NULL; } return ret; @@ -282,13 +266,10 @@ static int alloc_srq_wqe_buf(struct hns_roce_dev *hr_dev, static void free_srq_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) { - if (srq->delayed_destroy_flag) { - hns_roce_add_unfree_mtr(srq->mtr_node, hr_dev, srq->buf_mtr); - } else { + if (srq->delayed_destroy_flag) + hns_roce_add_unfree_mtr(hr_dev, srq->buf_mtr); + else hns_roce_mtr_destroy(hr_dev, srq->buf_mtr); - kvfree(srq->mtr_node); - srq->mtr_node = NULL; - } } static int alloc_srq_wrid(struct hns_roce_srq *srq) -- 2.33.0

driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IBQK42 ---------------------------------------------------------------------- Driver allocates and frees db in a unit of page. One db page will be shared by multiple dbs. Currently the delayed destruiction of db only depends on the delayed_unmap_flag of the db itself. It means if this flag of the last db in a page is not set, this page will still be freed, and the delayed-destruction mechanism won't take effect despite the flag of some previous dbs may be set. A db page is associated with a umem_node. Add a flag to umem_node to indicate whether this page should be delayed-destroyed. Fixes: 431c875e4b02 ("RDMA/hns: Fix simultaneous reset and resource deregistration") Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com> Signed-off-by: Xinghai Cen <cenxinghai@h-partners.com> --- drivers/infiniband/hw/hns/hns_roce_db.c | 7 +++++-- drivers/infiniband/hw/hns/hns_roce_device.h | 1 + 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_db.c b/drivers/infiniband/hw/hns/hns_roce_db.c index 78121524a794..6c4aaa38e199 100644 --- a/drivers/infiniband/hw/hns/hns_roce_db.c +++ b/drivers/infiniband/hw/hns/hns_roce_db.c @@ -67,17 +67,20 @@ void hns_roce_db_unmap_user(struct hns_roce_ucontext *context, bool delayed_unmap_flag) { struct hns_roce_dev *hr_dev = to_hr_dev(context->ibucontext.device); + struct hns_roce_umem_node *umem_node = db->u.user_page->umem_node; mutex_lock(&context->page_mutex); + umem_node->delayed_unmap_flag |= delayed_unmap_flag; + refcount_dec(&db->u.user_page->refcount); if (refcount_dec_if_one(&db->u.user_page->refcount)) { list_del(&db->u.user_page->list); - if (delayed_unmap_flag) { + if (umem_node->delayed_unmap_flag) { hns_roce_add_unfree_umem(db->u.user_page, hr_dev); } else { ib_umem_release(db->u.user_page->umem); - kvfree(db->u.user_page->umem_node); + kvfree(umem_node); } kfree(db->u.user_page); } diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index e29f12108279..09798d124aa9 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -500,6 +500,7 @@ struct hns_roce_db_pgdir { struct hns_roce_umem_node { struct ib_umem *umem; struct list_head list; + bool delayed_unmap_flag; }; struct hns_roce_user_db_page { -- 2.33.0

driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IBQK42 ---------------------------------------------------------------------- Kernel dbs are not processed by delayed-destruction mechanism. This may lead to HW UAF described in the fixes commit. Expand the hns_roce_umem_node to hns_roce_db_pg_node with kernel db information. This struct is now used by both userspace and kernel db pages. Fixes: 431c875e4b02 ("RDMA/hns: Fix simultaneous reset and resource deregistration") Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com> --- drivers/infiniband/hw/hns/hns_roce_cq.c | 3 +- drivers/infiniband/hw/hns/hns_roce_db.c | 85 +++++++++++++-------- drivers/infiniband/hw/hns/hns_roce_device.h | 35 +++++---- drivers/infiniband/hw/hns/hns_roce_main.c | 10 +-- drivers/infiniband/hw/hns/hns_roce_mr.c | 30 ++++---- drivers/infiniband/hw/hns/hns_roce_qp.c | 3 +- drivers/infiniband/hw/hns/hns_roce_srq.c | 2 +- 7 files changed, 99 insertions(+), 69 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 575734a83e44..a2f1e722d019 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -339,7 +339,8 @@ static void free_cq_db(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, hns_roce_db_unmap_user(uctx, &hr_cq->db, hr_cq->delayed_destroy_flag); } else { - hns_roce_free_db(hr_dev, &hr_cq->db); + hns_roce_free_db(hr_dev, &hr_cq->db, + hr_cq->delayed_destroy_flag); } } diff --git a/drivers/infiniband/hw/hns/hns_roce_db.c b/drivers/infiniband/hw/hns/hns_roce_db.c index 6c4aaa38e199..dc3d3a075525 100644 --- a/drivers/infiniband/hw/hns/hns_roce_db.c +++ b/drivers/infiniband/hw/hns/hns_roce_db.c @@ -12,6 +12,7 @@ int hns_roce_db_map_user(struct hns_roce_ucontext *context, unsigned long virt, { unsigned long page_addr = virt & PAGE_MASK; struct hns_roce_user_db_page *page; + struct ib_umem *umem; unsigned int offset; int ret = 0; @@ -29,31 +30,32 @@ int hns_roce_db_map_user(struct hns_roce_ucontext *context, unsigned long virt, refcount_set(&page->refcount, 1); page->user_virt = page_addr; - page->umem = ib_umem_get(context->ibucontext.device, page_addr, - PAGE_SIZE, 0); - if (IS_ERR(page->umem)) { - ret = PTR_ERR(page->umem); + page->db_node = kvzalloc(sizeof(*page->db_node), GFP_KERNEL); + if (!page->db_node) { + ret = -ENOMEM; goto err_page; } - page->umem_node = kvmalloc(sizeof(*page->umem_node), GFP_KERNEL); - if (!page->umem_node) { - ret = -ENOMEM; - goto err_umem; + + umem = ib_umem_get(context->ibucontext.device, page_addr, PAGE_SIZE, 0); + if (IS_ERR(umem)) { + ret = PTR_ERR(umem); + goto err_dbnode; } + page->db_node->umem = umem; list_add(&page->list, &context->page_list); found: offset = virt - page_addr; - db->dma = sg_dma_address(page->umem->sg_head.sgl) + offset; - db->virt_addr = sg_virt(page->umem->sg_head.sgl) + offset; + db->dma = sg_dma_address(page->db_node->umem->sg_head.sgl) + offset; + db->virt_addr = sg_virt(page->db_node->umem->sg_head.sgl) + offset; db->u.user_page = page; refcount_inc(&page->refcount); mutex_unlock(&context->page_mutex); return 0; -err_umem: - ib_umem_release(page->umem); +err_dbnode: + kvfree(page->db_node); err_page: kvfree(page); err_out: @@ -67,20 +69,20 @@ void hns_roce_db_unmap_user(struct hns_roce_ucontext *context, bool delayed_unmap_flag) { struct hns_roce_dev *hr_dev = to_hr_dev(context->ibucontext.device); - struct hns_roce_umem_node *umem_node = db->u.user_page->umem_node; + struct hns_roce_db_pg_node *db_node = db->u.user_page->db_node; mutex_lock(&context->page_mutex); - umem_node->delayed_unmap_flag |= delayed_unmap_flag; + db_node->delayed_unmap_flag |= delayed_unmap_flag; refcount_dec(&db->u.user_page->refcount); if (refcount_dec_if_one(&db->u.user_page->refcount)) { list_del(&db->u.user_page->list); - if (umem_node->delayed_unmap_flag) { - hns_roce_add_unfree_umem(db->u.user_page, hr_dev); + if (db_node->delayed_unmap_flag) { + hns_roce_add_unfree_db(db_node, hr_dev); } else { - ib_umem_release(db->u.user_page->umem); - kvfree(umem_node); + ib_umem_release(db_node->umem); + kvfree(db_node); } kfree(db->u.user_page); } @@ -92,23 +94,36 @@ static struct hns_roce_db_pgdir *hns_roce_alloc_db_pgdir( struct device *dma_device) { struct hns_roce_db_pgdir *pgdir; + dma_addr_t db_dma; + u32 *page; pgdir = kzalloc(sizeof(*pgdir), GFP_KERNEL); if (!pgdir) return NULL; bitmap_fill(pgdir->order1, - HNS_ROCE_DB_PER_PAGE / HNS_ROCE_DB_TYPE_COUNT); + HNS_ROCE_DB_PER_PAGE / HNS_ROCE_DB_TYPE_COUNT); pgdir->bits[0] = pgdir->order0; pgdir->bits[1] = pgdir->order1; - pgdir->page = dma_alloc_coherent(dma_device, PAGE_SIZE, - &pgdir->db_dma, GFP_KERNEL); - if (!pgdir->page) { - kfree(pgdir); - return NULL; - } + pgdir->db_node = kvzalloc(sizeof(*pgdir->db_node), GFP_KERNEL); + if (!pgdir->db_node) + goto err_node; + + page = dma_alloc_coherent(dma_device, PAGE_SIZE, &db_dma, GFP_KERNEL); + if (!page) + goto err_dma; + + pgdir->db_node->kdb.page = page; + pgdir->db_node->kdb.db_dma = db_dma; return pgdir; + +err_dma: + kvfree(pgdir->db_node); +err_node: + kfree(pgdir); + return NULL; + } static int hns_roce_alloc_db_from_pgdir(struct hns_roce_db_pgdir *pgdir, @@ -135,8 +150,8 @@ static int hns_roce_alloc_db_from_pgdir(struct hns_roce_db_pgdir *pgdir, db->u.pgdir = pgdir; db->index = i; - db->db_record = pgdir->page + db->index; - db->dma = pgdir->db_dma + db->index * HNS_ROCE_DB_UNIT_SIZE; + db->db_record = pgdir->db_node->kdb.page + db->index; + db->dma = pgdir->db_node->kdb.db_dma + db->index * HNS_ROCE_DB_UNIT_SIZE; db->order = order; return 0; @@ -171,13 +186,17 @@ int hns_roce_alloc_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db, return ret; } -void hns_roce_free_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db) +void hns_roce_free_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db, + bool delayed_unmap_flag) { + struct hns_roce_db_pg_node *db_node = db->u.pgdir->db_node; unsigned long o; unsigned long i; mutex_lock(&hr_dev->pgdir_mutex); + db_node->delayed_unmap_flag |= delayed_unmap_flag; + o = db->order; i = db->index; @@ -191,9 +210,15 @@ void hns_roce_free_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db) if (bitmap_full(db->u.pgdir->order1, HNS_ROCE_DB_PER_PAGE / HNS_ROCE_DB_TYPE_COUNT)) { - dma_free_coherent(hr_dev->dev, PAGE_SIZE, db->u.pgdir->page, - db->u.pgdir->db_dma); list_del(&db->u.pgdir->list); + if (db_node->delayed_unmap_flag) { + hns_roce_add_unfree_db(db_node, hr_dev); + } else { + dma_free_coherent(hr_dev->dev, PAGE_SIZE, + db_node->kdb.page, + db_node->kdb.db_dma); + kvfree(db_node); + } kfree(db->u.pgdir); } diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 09798d124aa9..fc7ff0476917 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -488,27 +488,29 @@ struct hns_roce_buf { unsigned int page_shift; }; +struct hns_roce_db_pg_node { + struct list_head list; + struct ib_umem *umem; + struct { + u32 *page; + dma_addr_t db_dma; + } kdb; + bool delayed_unmap_flag; +}; + struct hns_roce_db_pgdir { struct list_head list; DECLARE_BITMAP(order0, HNS_ROCE_DB_PER_PAGE); DECLARE_BITMAP(order1, HNS_ROCE_DB_PER_PAGE / HNS_ROCE_DB_TYPE_COUNT); unsigned long *bits[HNS_ROCE_DB_TYPE_COUNT]; - u32 *page; - dma_addr_t db_dma; -}; - -struct hns_roce_umem_node { - struct ib_umem *umem; - struct list_head list; - bool delayed_unmap_flag; + struct hns_roce_db_pg_node *db_node; }; struct hns_roce_user_db_page { struct list_head list; - struct ib_umem *umem; unsigned long user_virt; refcount_t refcount; - struct hns_roce_umem_node *umem_node; + struct hns_roce_db_pg_node *db_node; }; struct hns_roce_db { @@ -1225,8 +1227,8 @@ struct hns_roce_dev { size_t notify_num; struct list_head mtr_unfree_list; /* list of unfree mtr on this dev */ struct mutex mtr_unfree_list_mutex; /* protect mtr_unfree_list */ - struct list_head umem_unfree_list; /* list of unfree umem on this dev */ - struct mutex umem_unfree_list_mutex; /* protect umem_unfree_list */ + struct list_head db_unfree_list; /* list of unfree db on this dev */ + struct mutex db_unfree_list_mutex; /* protect db_unfree_list */ void *dca_safe_buf; dma_addr_t dca_safe_page; @@ -1520,7 +1522,8 @@ void hns_roce_db_unmap_user(struct hns_roce_ucontext *context, bool delayed_unmap_flag); int hns_roce_alloc_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db, int order); -void hns_roce_free_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db); +void hns_roce_free_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db, + bool delayed_unmap_flag); void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn); void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type); @@ -1538,9 +1541,9 @@ int hns_roce_fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ib_qp); int hns_roce_fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ib_qp); int hns_roce_fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ib_mr); int hns_roce_fill_res_mr_entry_raw(struct sk_buff *msg, struct ib_mr *ib_mr); -void hns_roce_add_unfree_umem(struct hns_roce_user_db_page *user_page, - struct hns_roce_dev *hr_dev); -void hns_roce_free_unfree_umem(struct hns_roce_dev *hr_dev); +void hns_roce_add_unfree_db(struct hns_roce_db_pg_node *db_node, + struct hns_roce_dev *hr_dev); +void hns_roce_free_unfree_db(struct hns_roce_dev *hr_dev); void hns_roce_add_unfree_mtr(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr); void hns_roce_free_unfree_mtr(struct hns_roce_dev *hr_dev); diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 99accce12665..f982b59a668a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -1296,7 +1296,7 @@ static void hns_roce_teardown_hca(struct hns_roce_dev *hr_dev) hns_roce_cleanup_dca(hr_dev); hns_roce_cleanup_bitmap(hr_dev); - mutex_destroy(&hr_dev->umem_unfree_list_mutex); + mutex_destroy(&hr_dev->db_unfree_list_mutex); mutex_destroy(&hr_dev->mtr_unfree_list_mutex); mutex_destroy(&hr_dev->uctx_list_mutex); @@ -1326,8 +1326,8 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev) INIT_LIST_HEAD(&hr_dev->mtr_unfree_list); mutex_init(&hr_dev->mtr_unfree_list_mutex); - INIT_LIST_HEAD(&hr_dev->umem_unfree_list); - mutex_init(&hr_dev->umem_unfree_list_mutex); + INIT_LIST_HEAD(&hr_dev->db_unfree_list); + mutex_init(&hr_dev->db_unfree_list_mutex); if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB || hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) { @@ -1369,7 +1369,7 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev) err_uar_table_free: ida_destroy(&hr_dev->uar_ida.ida); - mutex_destroy(&hr_dev->umem_unfree_list_mutex); + mutex_destroy(&hr_dev->db_unfree_list_mutex); mutex_destroy(&hr_dev->mtr_unfree_list_mutex); mutex_destroy(&hr_dev->uctx_list_mutex); @@ -1594,7 +1594,7 @@ void hns_roce_exit(struct hns_roce_dev *hr_dev, bool bond_cleanup) if (hr_dev->hw->hw_exit) hr_dev->hw->hw_exit(hr_dev); - hns_roce_free_unfree_umem(hr_dev); + hns_roce_free_unfree_db(hr_dev); hns_roce_free_unfree_mtr(hr_dev); hns_roce_teardown_hca(hr_dev); hns_roce_cleanup_hem(hr_dev); diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 6a2fe8ac46c4..17d19c696392 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -1288,27 +1288,27 @@ void hns_roce_free_unfree_mtr(struct hns_roce_dev *hr_dev) mutex_unlock(&hr_dev->mtr_unfree_list_mutex); } -void hns_roce_add_unfree_umem(struct hns_roce_user_db_page *user_page, - struct hns_roce_dev *hr_dev) +void hns_roce_add_unfree_db(struct hns_roce_db_pg_node *db_node, + struct hns_roce_dev *hr_dev) { - struct hns_roce_umem_node *pos = user_page->umem_node; - - pos->umem = user_page->umem; - - mutex_lock(&hr_dev->umem_unfree_list_mutex); - list_add_tail(&pos->list, &hr_dev->umem_unfree_list); - mutex_unlock(&hr_dev->umem_unfree_list_mutex); + mutex_lock(&hr_dev->db_unfree_list_mutex); + list_add_tail(&db_node->list, &hr_dev->db_unfree_list); + mutex_unlock(&hr_dev->db_unfree_list_mutex); } -void hns_roce_free_unfree_umem(struct hns_roce_dev *hr_dev) +void hns_roce_free_unfree_db(struct hns_roce_dev *hr_dev) { - struct hns_roce_umem_node *pos, *next; + struct hns_roce_db_pg_node *pos, *next; - mutex_lock(&hr_dev->umem_unfree_list_mutex); - list_for_each_entry_safe(pos, next, &hr_dev->umem_unfree_list, list) { + mutex_lock(&hr_dev->db_unfree_list_mutex); + list_for_each_entry_safe(pos, next, &hr_dev->db_unfree_list, list) { list_del(&pos->list); - ib_umem_release(pos->umem); + if (pos->umem) + ib_umem_release(pos->umem); + else + dma_free_coherent(hr_dev->dev, PAGE_SIZE, + pos->kdb.page, pos->kdb.db_dma); kvfree(pos); } - mutex_unlock(&hr_dev->umem_unfree_list_mutex); + mutex_unlock(&hr_dev->db_unfree_list_mutex); } diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 0710ddc77079..cd3a67e66eff 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -1132,7 +1132,8 @@ static void free_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, qp_user_mmap_entry_remove(hr_qp); } else { if (hr_qp->en_flags & HNS_ROCE_QP_CAP_RQ_RECORD_DB) - hns_roce_free_db(hr_dev, &hr_qp->rdb); + hns_roce_free_db(hr_dev, &hr_qp->rdb, + hr_qp->delayed_destroy_flag); } } diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 68fbd5dabc3f..a4d50d99c80c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -442,7 +442,7 @@ static void free_srq_db(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, hns_roce_db_unmap_user(uctx, &srq->rdb, srq->delayed_destroy_flag); } else { - hns_roce_free_db(hr_dev, &srq->rdb); + hns_roce_free_db(hr_dev, &srq->rdb, srq->delayed_destroy_flag); } } -- 2.33.0

From: Xinghai Cen <cenxinghai@h-partners.com> driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IBQK42 ---------------------------------------------------------------------- page is allocated with kmalloc(), and should be freed with kfree(). Fixes: 431c875e4b02 ("RDMA/hns: Fix simultaneous reset and resource deregistration") Signed-off-by: Xinghai Cen <cenxinghai@h-partners.com> --- drivers/infiniband/hw/hns/hns_roce_db.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_db.c b/drivers/infiniband/hw/hns/hns_roce_db.c index dc3d3a075525..52325cbf9629 100644 --- a/drivers/infiniband/hw/hns/hns_roce_db.c +++ b/drivers/infiniband/hw/hns/hns_roce_db.c @@ -57,7 +57,7 @@ int hns_roce_db_map_user(struct hns_roce_ucontext *context, unsigned long virt, err_dbnode: kvfree(page->db_node); err_page: - kvfree(page); + kfree(page); err_out: mutex_unlock(&context->page_mutex); -- 2.33.0

driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IBQK42 ---------------------------------------------------------------------- Driver runs a for-loop when allocating bt pages and mapping them with buffer pages. When a large buffer (e.g. MR over 100GB) is being allocated, it may require a considerable loop count. This will lead to soft lockup: watchdog: BUG: soft lockup - CPU#27 stuck for 22s! ... Call trace: hem_list_alloc_mid_bt+0x124/0x394 [hns_roce_hw_v2] hns_roce_hem_list_request+0xf8/0x160 [hns_roce_hw_v2] hns_roce_mtr_create+0x2e4/0x360 [hns_roce_hw_v2] alloc_mr_pbl+0xd4/0x17c [hns_roce_hw_v2] hns_roce_reg_user_mr+0xf8/0x190 [hns_roce_hw_v2] ib_uverbs_reg_mr+0x118/0x290 watchdog: BUG: soft lockup - CPU#35 stuck for 23s! ... Call trace: hns_roce_hem_list_find_mtt+0x7c/0xb0 [hns_roce_hw_v2] mtr_map_bufs+0xc4/0x204 [hns_roce_hw_v2] hns_roce_mtr_create+0x31c/0x3c4 [hns_roce_hw_v2] alloc_mr_pbl+0xb0/0x160 [hns_roce_hw_v2] hns_roce_reg_user_mr+0x108/0x1c0 [hns_roce_hw_v2] ib_uverbs_reg_mr+0x120/0x2bc Add a cond_resched() to fix soft lockup during these loops. In order not to affect the allocation performance of normal-size buffer, set the loop count of a 100GB MR as the threshold to call cond_resched(). Fixes: 38389eaa4db1 ("RDMA/hns: Add mtr support for mixed multihop addressing") Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com> Signed-off-by: Xinghai Cen <cenxinghai@h-partners.com> --- drivers/infiniband/hw/hns/hns_roce_hem.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index 6d48ccaed1f0..144b8ed4fbb6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -1420,6 +1420,11 @@ static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev, return ret; } +/* This is the bottom bt pages number of a 100G MR on 4K OS, assuming + * the bt page size is not expanded by cal_best_bt_pg_sz() + */ +#define RESCHED_LOOP_CNT_THRESHOLD_ON_4K 12800 + /* construct the base address table and link them by address hop config */ int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev, struct hns_roce_hem_list *hem_list, @@ -1428,6 +1433,7 @@ int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev, { const struct hns_roce_buf_region *r; int ofs, end; + int loop; int unit; int ret; int i; @@ -1445,7 +1451,10 @@ int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev, continue; end = r->offset + r->count; - for (ofs = r->offset; ofs < end; ofs += unit) { + for (ofs = r->offset, loop = 1; ofs < end; ofs += unit, loop++) { + if (!(loop % RESCHED_LOOP_CNT_THRESHOLD_ON_4K)) + cond_resched(); + ret = hem_list_alloc_mid_bt(hr_dev, r, unit, ofs, hem_list->mid_bt[i], &hem_list->btm_bt); @@ -1502,9 +1511,14 @@ void *hns_roce_hem_list_find_mtt(struct hns_roce_dev *hr_dev, struct list_head *head = &hem_list->btm_bt; struct hns_roce_hem_item *hem, *temp_hem; void *cpu_base = NULL; + int loop = 1; int nr = 0; list_for_each_entry_safe(hem, temp_hem, head, sibling) { + if (!(loop % RESCHED_LOOP_CNT_THRESHOLD_ON_4K)) + cond_resched(); + loop++; + if (hem_list_page_is_in_range(hem, offset)) { nr = offset - hem->start; cpu_base = hem->addr + nr * BA_BYTE_LEN; -- 2.33.0

From: Yuyu Li <liyuyu6@huawei.com> driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IBQK42 ---------------------------------------------------------------------- Fix ifnullfree static warning: NULL check before some freeing functions is not needed. Fixes: 21cacb516f20 ("RDMA/hns: Support write with notify") Signed-off-by: Yuyu Li <liyuyu6@huawei.com> Signed-off-by: Xinghai Cen <cenxinghai@h-partners.com> --- drivers/infiniband/hw/hns/hns_roce_ext.c | 3 +-- drivers/infiniband/hw/hns/hns_roce_main.c | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_ext.c b/drivers/infiniband/hw/hns/hns_roce_ext.c index 63d0a48abe71..1faa23702d92 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ext.c +++ b/drivers/infiniband/hw/hns/hns_roce_ext.c @@ -133,8 +133,7 @@ int rdma_unregister_notify_addr(struct ib_device *ib_dev) if (!is_hns_roce(ib_dev) || !is_write_notify_supported(hr_dev)) return -EOPNOTSUPP; - if (hr_dev->notify_tbl) - kvfree(hr_dev->notify_tbl); + kvfree(hr_dev->notify_tbl); hr_dev->notify_tbl = NULL; diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index f982b59a668a..13cace92f7ff 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -1607,8 +1607,8 @@ void hns_roce_exit(struct hns_roce_dev *hr_dev, bool bond_cleanup) if (hr_dev->hw->cmq_exit) hr_dev->hw->cmq_exit(hr_dev); hns_roce_dealloc_dfx_cnt(hr_dev); - if (hr_dev->notify_tbl) - kvfree(hr_dev->notify_tbl); + + kvfree(hr_dev->notify_tbl); } MODULE_LICENSE("Dual BSD/GPL"); -- 2.33.0

反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/15420 邮件列表地址:https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/JFG... FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/15420 Mailing list address: https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/JFG...
participants (2)
-
Junxian Huang
-
patchwork bot