From: wenglianfa wenglianfa@huawei.com
mainline inclusion from mainline-v6.13-rc1 commit 323275ac2ff15b2b7b3eac391ae5d8c5a3c3a999 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/IBEGGK CVE: CVE-2024-56722
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
During reset, cmd to destroy resources such as qp, cq, and mr may fail, and error logs will be printed. When a large number of resources are destroyed, there will be lots of printings, and it may lead to a cpu stuck.
Delete some unnecessary printings and replace other printing functions in these paths with the ratelimited version.
Fixes: 9a4435375cd1 ("IB/hns: Add driver files for hns RoCE driver") Fixes: c7bcb13442e1 ("RDMA/hns: Add SRQ support for hip08 kernel mode") Fixes: 70f92521584f ("RDMA/hns: Use the reserved loopback QPs to free MR before destroying MPT") Fixes: 926a01dc000d ("RDMA/hns: Add QP operations support for hip08 SoC") Signed-off-by: wenglianfa wenglianfa@huawei.com Signed-off-by: Junxian Huang huangjunxian6@hisilicon.com Link: https://patch.msgid.link/20241024124000.2931869-6-huangjunxian6@hisilicon.co... Signed-off-by: Leon Romanovsky leon@kernel.org Conflicts: drivers/infiniband/hw/hns/hns_roce_hw_v2.c [This is due to we did not backport the cleanup patch b9989ab3f61e ("RDMA/hns: Remove unnecessary QP type checks")] Signed-off-by: Tengda Wu wutengda2@huawei.com --- drivers/infiniband/hw/hns/hns_roce_cq.c | 4 +- drivers/infiniband/hw/hns/hns_roce_hem.c | 4 +- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 74 +++++++++++----------- drivers/infiniband/hw/hns/hns_roce_mr.c | 4 +- drivers/infiniband/hw/hns/hns_roce_srq.c | 4 +- 5 files changed, 46 insertions(+), 44 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 01a18fbc5525..2045bd3db831 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -180,8 +180,8 @@ static void free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_CQC, hr_cq->cqn); if (ret) - dev_err(dev, "DESTROY_CQ failed (%d) for CQN %06lx\n", ret, - hr_cq->cqn); + dev_err_ratelimited(dev, "DESTROY_CQ failed (%d) for CQN %06lx\n", + ret, hr_cq->cqn);
xa_erase_irq(&cq_table->array, hr_cq->cqn);
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index f21358515bf9..9a24a3a68e8e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -713,8 +713,8 @@ void hns_roce_table_put(struct hns_roce_dev *hr_dev,
ret = hr_dev->hw->clear_hem(hr_dev, table, obj, HEM_HOP_STEP_DIRECT); if (ret) - dev_warn(dev, "failed to clear HEM base address, ret = %d.\n", - ret); + dev_warn_ratelimited(dev, "failed to clear HEM base address, ret = %d.\n", + ret);
hns_roce_free_hem(hr_dev, table->hem[i]); table->hem[i] = NULL; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index fa958ea9c569..21ff5ccde4ae 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -451,19 +451,21 @@ static int check_send_valid(struct hns_roce_dev *hr_dev, } else if (unlikely(hr_qp->state == IB_QPS_RESET || hr_qp->state == IB_QPS_INIT || hr_qp->state == IB_QPS_RTR)) { - ibdev_err(ibdev, "failed to post WQE, QP state %u!\n", - hr_qp->state); + ibdev_err_ratelimited(ibdev, + "failed to post WQE, QP state %u!\n", + hr_qp->state); return -EINVAL; } else if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN)) { - ibdev_err(ibdev, "failed to post WQE, dev state %d!\n", - hr_dev->state); + ibdev_err_ratelimited(ibdev, + "failed to post WQE, dev state %d!\n", + hr_dev->state); return -EIO; }
if (check_dca_attach_enable(hr_qp)) { ret = dca_attach_qp_buf(hr_dev, hr_qp); if (unlikely(ret)) { - ibdev_err(&hr_dev->ib_dev, + ibdev_err_ratelimited(&hr_dev->ib_dev, "failed to attach DCA for QP-%ld send!\n", hr_qp->qpn); return ret; @@ -3027,8 +3029,8 @@ static int free_mr_modify_rsv_qp(struct hns_roce_dev *hr_dev, ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, attr, mask, IB_QPS_INIT, IB_QPS_INIT, NULL); if (ret) { - ibdev_err(ibdev, "failed to modify qp to init, ret = %d.\n", - ret); + ibdev_err_ratelimited(ibdev, "failed to modify qp to init, ret = %d.\n", + ret); return ret; }
@@ -3716,8 +3718,8 @@ static int free_mr_post_send_lp_wqe(struct hns_roce_qp *hr_qp)
ret = hns_roce_v2_post_send(&hr_qp->ibqp, send_wr, &bad_wr); if (ret) { - ibdev_err(ibdev, "failed to post wqe for free mr, ret = %d.\n", - ret); + ibdev_err_ratelimited(ibdev, "failed to post wqe for free mr, ret = %d.\n", + ret); return ret; }
@@ -3756,9 +3758,9 @@ static void free_mr_send_cmd_to_hw(struct hns_roce_dev *hr_dev)
ret = free_mr_post_send_lp_wqe(hr_qp); if (ret) { - ibdev_err(ibdev, - "failed to send wqe (qp:0x%lx) for free mr, ret = %d.\n", - hr_qp->qpn, ret); + ibdev_err_ratelimited(ibdev, + "failed to send wqe (qp:0x%lx) for free mr, ret = %d.\n", + hr_qp->qpn, ret); break; }
@@ -3769,16 +3771,16 @@ static void free_mr_send_cmd_to_hw(struct hns_roce_dev *hr_dev) while (cqe_cnt) { npolled = hns_roce_v2_poll_cq(&free_mr->rsv_cq->ib_cq, cqe_cnt, wc); if (npolled < 0) { - ibdev_err(ibdev, - "failed to poll cqe for free mr, remain %d cqe.\n", - cqe_cnt); + ibdev_err_ratelimited(ibdev, + "failed to poll cqe for free mr, remain %d cqe.\n", + cqe_cnt); goto out; }
if (time_after(jiffies, end)) { - ibdev_err(ibdev, - "failed to poll cqe for free mr and timeout, remain %d cqe.\n", - cqe_cnt); + ibdev_err_ratelimited(ibdev, + "failed to poll cqe for free mr and timeout, remain %d cqe.\n", + cqe_cnt); goto out; } cqe_cnt -= npolled; @@ -5387,10 +5389,8 @@ static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp, struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); int ret = 0;
- if (!check_qp_state(cur_state, new_state)) { - ibdev_err(&hr_dev->ib_dev, "Illegal state for QP!\n"); + if (!check_qp_state(cur_state, new_state)) return -EINVAL; - }
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { memset(qpc_mask, 0, hr_dev->caps.qpc_sz); @@ -5652,7 +5652,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, /* SW pass context to HW */ ret = hns_roce_v2_qp_modify(hr_dev, context, qpc_mask, hr_qp); if (ret) { - ibdev_err(ibdev, "failed to modify QP, ret = %d.\n", ret); + ibdev_err_ratelimited(ibdev, "failed to modify QP, ret = %d.\n", ret); goto out; }
@@ -5777,7 +5777,9 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
ret = hns_roce_v2_query_qpc(hr_dev, hr_qp->qpn, &context); if (ret) { - ibdev_err(ibdev, "failed to query QPC, ret = %d.\n", ret); + ibdev_err_ratelimited(ibdev, + "failed to query QPC, ret = %d.\n", + ret); ret = -EINVAL; goto out; } @@ -5785,7 +5787,7 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, state = hr_reg_read(&context, QPC_QP_ST); tmp_qp_state = to_ib_qp_st((enum hns_roce_v2_qp_state)state); if (tmp_qp_state == -1) { - ibdev_err(ibdev, "Illegal ib_qp_state\n"); + ibdev_err_ratelimited(ibdev, "Illegal ib_qp_state\n"); ret = -EINVAL; goto out; } @@ -5911,9 +5913,9 @@ int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, NULL, 0, hr_qp->state, IB_QPS_RESET, udata); if (ret) - ibdev_err(ibdev, - "failed to modify QP to RST, ret = %d.\n", - ret); + ibdev_err_ratelimited(ibdev, + "failed to modify QP to RST, ret = %d.\n", + ret); }
send_cq = hr_qp->ibqp.send_cq ? to_hr_cq(hr_qp->ibqp.send_cq) : NULL; @@ -5949,9 +5951,9 @@ int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
ret = hns_roce_v2_destroy_qp_common(hr_dev, hr_qp, udata); if (ret) - ibdev_err(&hr_dev->ib_dev, - "failed to destroy QP, QPN = 0x%06lx, ret = %d.\n", - hr_qp->qpn, ret); + ibdev_err_ratelimited(&hr_dev->ib_dev, + "failed to destroy QP, QPN = 0x%06lx, ret = %d.\n", + hr_qp->qpn, ret);
hns_roce_qp_destroy(hr_dev, hr_qp, udata);
@@ -6224,9 +6226,9 @@ static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) HNS_ROCE_CMD_MODIFY_CQC, hr_cq->cqn); hns_roce_free_cmd_mailbox(hr_dev, mailbox); if (ret) - ibdev_err(&hr_dev->ib_dev, - "failed to process cmd when modifying CQ, ret = %d.\n", - ret); + ibdev_err_ratelimited(&hr_dev->ib_dev, + "failed to process cmd when modifying CQ, ret = %d.\n", + ret);
return ret; } @@ -6246,9 +6248,9 @@ static int hns_roce_v2_query_cqc(struct hns_roce_dev *hr_dev, u32 cqn, ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, HNS_ROCE_CMD_QUERY_CQC, cqn); if (ret) { - ibdev_err(&hr_dev->ib_dev, - "failed to process cmd when querying CQ, ret = %d.\n", - ret); + ibdev_err_ratelimited(&hr_dev->ib_dev, + "failed to process cmd when querying CQ, ret = %d.\n", + ret); goto err_mailbox; }
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 5c11bb22facc..09ef095524da 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -136,8 +136,8 @@ static void hns_roce_mr_free(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr key_to_hw_index(mr->key) & (hr_dev->caps.num_mtpts - 1)); if (ret) - ibdev_warn(ibdev, "failed to destroy mpt, ret = %d.\n", - ret); + ibdev_warn_ratelimited(ibdev, "failed to destroy mpt, ret = %d.\n", + ret); }
free_mr_pbl(hr_dev, mr); diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index a3999fc69719..b004df101e9d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -150,8 +150,8 @@ static void free_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_SRQ, srq->srqn); if (ret) - dev_err(hr_dev->dev, "DESTROY_SRQ failed (%d) for SRQN %06lx\n", - ret, srq->srqn); + dev_err_ratelimited(hr_dev->dev, "DESTROY_SRQ failed (%d) for SRQN %06lx\n", + ret, srq->srqn);
xa_erase_irq(&srq_table->xa, srq->srqn);
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,转换为PR失败! 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/5... 失败原因:同步源码仓代码到fork仓失败 建议解决方法:请稍等,机器人会在下一次任务重新执行
FeedBack: The patch(es) which you have sent to kernel@openeuler.org has been converted to PR failed! Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/5... Failed Reason: sync origin kernel's codes to the fork repository failed Suggest Solution: please wait, the bot will retry in the next interval
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,转换为PR失败! 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/5... 失败原因:同步源码仓代码到fork仓失败 建议解决方法:请稍等,机器人会在下一次任务重新执行
FeedBack: The patch(es) which you have sent to kernel@openeuler.org has been converted to PR failed! Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/5... Failed Reason: sync origin kernel's codes to the fork repository failed Suggest Solution: please wait, the bot will retry in the next interval
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,转换为PR失败! 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/5... 失败原因:同步源码仓代码到fork仓失败 建议解决方法:请稍等,机器人会在下一次任务重新执行
FeedBack: The patch(es) which you have sent to kernel@openeuler.org has been converted to PR failed! Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/5... Failed Reason: sync origin kernel's codes to the fork repository failed Suggest Solution: please wait, the bot will retry in the next interval
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,转换为PR失败! 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/5... 失败原因:同步源码仓代码到fork仓失败 建议解决方法:请稍等,机器人会在下一次任务重新执行
FeedBack: The patch(es) which you have sent to kernel@openeuler.org has been converted to PR failed! Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/5... Failed Reason: sync origin kernel's codes to the fork repository failed Suggest Solution: please wait, the bot will retry in the next interval
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,转换为PR失败! 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/5... 失败原因:同步源码仓代码到fork仓失败 建议解决方法:请稍等,机器人会在下一次任务重新执行
FeedBack: The patch(es) which you have sent to kernel@openeuler.org has been converted to PR failed! Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/5... Failed Reason: sync origin kernel's codes to the fork repository failed Suggest Solution: please wait, the bot will retry in the next interval