From: Chengchang Tang <tangchengchang@huawei.com> Since the flush CQE is actually executed asynchronously. If the drain QP has already triggered the flush CQE, but a HW error occurs during this process, the driver is unable to detect the flush failure. In this case, because we are using wait_for_completion(), this will cause the drain QP thread to wait for the signal indefinitely, leading to a hung task exception warning. we replace the wait_for_completion() with wait_for_completion_timeout() to avoid the indefinitely waiting. Signed-off-by: Chengchang Tang <tangchengchang@huawei.com> --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index ca26e20c86f8..e2572f61f42f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1015,6 +1015,7 @@ static void handle_drain_completion(struct ib_cq *ibcq, struct hns_roce_drain_cqe *drain, struct hns_roce_dev *hr_dev) { +#define DRAIN_QP_TMO (HZ * 30) #define TIMEOUT (HZ / 10) struct hns_roce_cq *hr_cq = to_hr_cq(ibcq); unsigned long flags; @@ -1059,8 +1060,10 @@ static void handle_drain_completion(struct ib_cq *ibcq, ibcq->comp_handler(ibcq, ibcq->cq_context); waiting_done: - if (ibcq->comp_handler) - wait_for_completion(&drain->done); + if (ibcq->comp_handler) { + if (!wait_for_completion_timeout(&drain->done, DRAIN_QP_TMO)) + ibdev_err_ratelimited(&hr_dev->ib_dev, "Drain qp timeout!\n"); + } } static void hns_roce_v2_drain_rq(struct ib_qp *ibqp) -- 2.33.0