From: Chengchang Tang tangchengchang@huawei.com
driver inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I63KVU
----------------------------------------------------------
If a uQP works in DCA mode, the userspace driver needs to drop the WQE buffer by calling the 'HNS_IB_METHOD_DCA_MEM_DETACH' method when the QP's CI is equal to PI, that means, the hns ROCEE will not access the WQE's buffer at this time, and the userspace driver can free this WQE's buffer.
This method will start an worker queue to recycle the WQE buffer in kernel space, if the WQE buffer is indeed not being accessed by hns ROCEE, the worker will change the pages' state as free in DCA memory pool.
Signed-off-by: Chengchang Tang tangchengchang@huawei.com Reviewed-by: Yangyang Li liyangyang20@huawei.com Reviewed-by: YueHaibing yuehaibing@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- drivers/infiniband/hw/hns/hns_roce_dca.c | 164 +++++++++++++++++++- drivers/infiniband/hw/hns/hns_roce_dca.h | 7 +- drivers/infiniband/hw/hns/hns_roce_device.h | 4 + drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 50 ++++++ drivers/infiniband/hw/hns/hns_roce_qp.c | 4 +- include/uapi/rdma/hns-abi.h | 6 + 6 files changed, 230 insertions(+), 5 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_dca.c b/drivers/infiniband/hw/hns/hns_roce_dca.c index 21b360be4996..1be0e9822c9a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_dca.c +++ b/drivers/infiniband/hw/hns/hns_roce_dca.c @@ -15,6 +15,9 @@ #define UVERBS_MODULE_NAME hns_ib #include <rdma/uverbs_named_ioctl.h>
+/* DCA mem ageing interval time */ +#define DCA_MEM_AGEING_MSES 1000 + /* DCA memory */ struct dca_mem { #define DCA_MEM_FLAGS_ALLOCED BIT(0) @@ -42,6 +45,12 @@ static inline void set_dca_page_to_free(struct hns_dca_page_state *state) state->lock = 0; }
+static inline void set_dca_page_to_inactive(struct hns_dca_page_state *state) +{ + state->active = 0; + state->lock = 0; +} + static inline void lock_dca_page_to_attach(struct hns_dca_page_state *state, u32 buf_id) { @@ -710,7 +719,10 @@ static int attach_dca_mem(struct hns_roce_dev *hr_dev, u32 buf_id; int ret;
+ /* Stop DCA mem ageing worker */ + cancel_delayed_work(&cfg->dwork); resp->alloc_flags = 0; + spin_lock(&cfg->lock); buf_id = cfg->buf_id; /* Already attached */ @@ -749,20 +761,140 @@ static int attach_dca_mem(struct hns_roce_dev *hr_dev, return 0; }
+struct dca_page_free_buf_attr { + u32 buf_id; + u32 max_pages; + u32 free_pages; + u32 clean_mems; +}; + +static int free_buffer_pages_proc(struct dca_mem *mem, int index, void *param) +{ + struct dca_page_free_buf_attr *attr = param; + struct hns_dca_page_state *state; + bool changed = false; + bool stop = false; + int i, free_pages; + + free_pages = 0; + for (i = 0; !stop && i < mem->page_count; i++) { + state = &mem->states[i]; + /* Change matched pages state */ + if (dca_page_is_attached(state, attr->buf_id)) { + set_dca_page_to_free(state); + changed = true; + attr->free_pages++; + if (attr->free_pages == attr->max_pages) + stop = true; + } + + if (dca_page_is_free(state)) + free_pages++; + } + + for (; changed && i < mem->page_count; i++) + if (dca_page_is_free(state)) + free_pages++; + + if (changed && free_pages == mem->page_count) + attr->clean_mems++; + + return stop ? DCA_MEM_STOP_ITERATE : DCA_MEM_NEXT_ITERATE; +} + +static void free_buf_from_dca_mem(struct hns_roce_dca_ctx *ctx, + struct hns_roce_dca_cfg *cfg) +{ + struct dca_page_free_buf_attr attr = {}; + unsigned long flags; + u32 buf_id; + + spin_lock(&cfg->lock); + buf_id = cfg->buf_id; + cfg->buf_id = HNS_DCA_INVALID_BUF_ID; + spin_unlock(&cfg->lock); + if (buf_id == HNS_DCA_INVALID_BUF_ID) + return; + + attr.buf_id = buf_id; + attr.max_pages = cfg->npages; + travel_dca_pages(ctx, &attr, free_buffer_pages_proc); + + /* Update free size */ + spin_lock_irqsave(&ctx->pool_lock, flags); + ctx->free_mems += attr.clean_mems; + ctx->free_size += attr.free_pages << HNS_HW_PAGE_SHIFT; + spin_unlock_irqrestore(&ctx->pool_lock, flags); +} + +static void kick_dca_mem(struct hns_roce_dev *hr_dev, + struct hns_roce_dca_cfg *cfg, + struct hns_roce_ucontext *uctx) +{ + struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(uctx); + + /* Stop ageing worker and free DCA buffer from pool */ + cancel_delayed_work_sync(&cfg->dwork); + free_buf_from_dca_mem(ctx, cfg); +} + +static void dca_mem_ageing_work(struct work_struct *work) +{ + struct hns_roce_qp *hr_qp = container_of(work, struct hns_roce_qp, + dca_cfg.dwork.work); + struct hns_roce_dev *hr_dev = to_hr_dev(hr_qp->ibqp.device); + struct hns_roce_dca_ctx *ctx = hr_qp_to_dca_ctx(hr_qp); + bool hw_is_inactive; + + hw_is_inactive = hr_dev->hw->chk_dca_buf_inactive && + hr_dev->hw->chk_dca_buf_inactive(hr_dev, hr_qp); + if (hw_is_inactive) + free_buf_from_dca_mem(ctx, &hr_qp->dca_cfg); +} + +void hns_roce_dca_kick(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) +{ + struct hns_roce_ucontext *uctx; + + if (hr_qp->ibqp.uobject && hr_qp->ibqp.pd->uobject) { + uctx = to_hr_ucontext(hr_qp->ibqp.pd->uobject->context); + kick_dca_mem(hr_dev, &hr_qp->dca_cfg, uctx); + } +} + +static void detach_dca_mem(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp, + struct hns_dca_detach_attr *attr) +{ + struct hns_roce_dca_cfg *cfg = &hr_qp->dca_cfg; + + /* Start an ageing worker to free buffer */ + cancel_delayed_work(&cfg->dwork); + spin_lock(&cfg->lock); + cfg->sq_idx = attr->sq_idx; + queue_delayed_work(hr_dev->irq_workq, &cfg->dwork, + msecs_to_jiffies(DCA_MEM_AGEING_MSES)); + spin_unlock(&cfg->lock); +} + void hns_roce_enable_dca(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { struct hns_roce_dca_cfg *cfg = &hr_qp->dca_cfg;
spin_lock_init(&cfg->lock); + INIT_DELAYED_WORK(&cfg->dwork, dca_mem_ageing_work); cfg->buf_id = HNS_DCA_INVALID_BUF_ID; cfg->npages = hr_qp->buff_size >> HNS_HW_PAGE_SHIFT; }
void hns_roce_disable_dca(struct hns_roce_dev *hr_dev, - struct hns_roce_qp *hr_qp) + struct hns_roce_qp *hr_qp, struct ib_udata *udata) { + struct hns_roce_ucontext *uctx = rdma_udata_to_drv_context(udata, + struct hns_roce_ucontext, ibucontext); struct hns_roce_dca_cfg *cfg = &hr_qp->dca_cfg;
+ kick_dca_mem(hr_dev, cfg, uctx); cfg->buf_id = HNS_DCA_INVALID_BUF_ID; }
@@ -951,12 +1083,40 @@ DECLARE_UVERBS_NAMED_METHOD( UVERBS_ATTR_PTR_OUT(HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_PAGES, UVERBS_ATTR_TYPE(u32), UA_MANDATORY));
+static int UVERBS_HANDLER(HNS_IB_METHOD_DCA_MEM_DETACH)( + struct uverbs_attr_bundle *attrs) +{ + struct hns_roce_qp *hr_qp = uverbs_attr_to_hr_qp(attrs); + struct hns_dca_detach_attr attr = {}; + int ret; + + if (!hr_qp) + return -EINVAL; + + ret = uverbs_copy_from(&attr.sq_idx, attrs, + HNS_IB_ATTR_DCA_MEM_DETACH_SQ_INDEX); + if (ret) + return ret; + + detach_dca_mem(to_hr_dev(hr_qp->ibqp.device), hr_qp, &attr); + + return 0; +} + +DECLARE_UVERBS_NAMED_METHOD( + HNS_IB_METHOD_DCA_MEM_DETACH, + UVERBS_ATTR_IDR(HNS_IB_ATTR_DCA_MEM_DETACH_HANDLE, UVERBS_OBJECT_QP, + UVERBS_ACCESS_WRITE, UA_MANDATORY), + UVERBS_ATTR_PTR_IN(HNS_IB_ATTR_DCA_MEM_DETACH_SQ_INDEX, + UVERBS_ATTR_TYPE(u32), UA_MANDATORY)); + DECLARE_UVERBS_NAMED_OBJECT(HNS_IB_OBJECT_DCA_MEM, UVERBS_TYPE_ALLOC_IDR(dca_cleanup), &UVERBS_METHOD(HNS_IB_METHOD_DCA_MEM_REG), &UVERBS_METHOD(HNS_IB_METHOD_DCA_MEM_DEREG), &UVERBS_METHOD(HNS_IB_METHOD_DCA_MEM_SHRINK), - &UVERBS_METHOD(HNS_IB_METHOD_DCA_MEM_ATTACH)); + &UVERBS_METHOD(HNS_IB_METHOD_DCA_MEM_ATTACH), + &UVERBS_METHOD(HNS_IB_METHOD_DCA_MEM_DETACH));
static bool dca_is_supported(struct ib_device *device) { diff --git a/drivers/infiniband/hw/hns/hns_roce_dca.h b/drivers/infiniband/hw/hns/hns_roce_dca.h index f9eea9beb092..fdc3aaa4b10b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_dca.h +++ b/drivers/infiniband/hw/hns/hns_roce_dca.h @@ -50,6 +50,10 @@ struct hns_dca_attach_resp { u32 alloc_pages; };
+struct hns_dca_detach_attr { + u32 sq_idx; +}; + void hns_roce_register_udca(struct hns_roce_dev *hr_dev, struct hns_roce_ucontext *uctx); void hns_roce_unregister_udca(struct hns_roce_dev *hr_dev, @@ -58,5 +62,6 @@ void hns_roce_unregister_udca(struct hns_roce_dev *hr_dev, void hns_roce_enable_dca(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp); void hns_roce_disable_dca(struct hns_roce_dev *hr_dev, - struct hns_roce_qp *hr_qp); + struct hns_roce_qp *hr_qp, struct ib_udata *udata); +void hns_roce_dca_kick(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp); #endif diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index a58b7d598f4f..ac9dcdf59887 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -319,6 +319,8 @@ struct hns_roce_dca_cfg { u32 buf_id; u16 attach_count; u32 npages; + u32 sq_idx; + struct delayed_work dwork; };
/* DCA attr for setting WQE buffer */ @@ -912,6 +914,8 @@ struct hns_roce_hw { int (*set_dca_buf)(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct hns_roce_dca_attr *attr); + bool (*chk_dca_buf_inactive)(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp); int (*modify_qp)(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, enum ib_qp_state cur_state, enum ib_qp_state new_state, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 1e9fa0f14268..f14a8e41aafa 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -47,6 +47,7 @@ #include "hns_roce_device.h" #include "hns_roce_cmd.h" #include "hns_roce_hem.h" +#include "hns_roce_dca.h" #include "hns_roce_hw_v2.h"
enum { @@ -370,6 +371,11 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, return 0; }
+static inline bool check_qp_dca_enable(struct hns_roce_qp *hr_qp) +{ + return !!(hr_qp->en_flags & HNS_ROCE_QP_CAP_DCA); +} + static int check_send_valid(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { @@ -5522,6 +5528,10 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, if (new_state == IB_QPS_RESET && !ibqp->uobject) clear_qp(hr_qp);
+ if (check_qp_dca_enable(hr_qp) && + (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR)) + hns_roce_dca_kick(hr_dev, hr_qp); + out: return ret; } @@ -5754,6 +5764,45 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, return ret; }
+static bool hns_roce_v2_chk_dca_buf_inactive(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp) +{ + struct hns_roce_dca_cfg *cfg = &hr_qp->dca_cfg; + struct hns_roce_v2_qp_context context = {}; + struct ib_device *ibdev = &hr_dev->ib_dev; + u32 tmp, sq_idx; + int state; + int ret; + + ret = hns_roce_v2_query_qpc(hr_dev, hr_qp->qpn, &context); + if (ret) { + ibdev_err(ibdev, "failed to query DCA QPC, ret = %d.\n", ret); + return false; + } + + state = hr_reg_read(&context, QPC_QP_ST); + if (state == HNS_ROCE_QP_ST_ERR || state == HNS_ROCE_QP_ST_RST) + return true; + + /* If RQ is not empty, the buffer is always active until the QP stops + * working. + */ + if (hr_qp->rq.wqe_cnt > 0) + return false; + + if (hr_qp->sq.wqe_cnt > 0) { + tmp = (u32)hr_reg_read(&context, QPC_RETRY_MSG_MSN); + sq_idx = tmp & (hr_qp->sq.wqe_cnt - 1); + /* If SQ-PI equals to retry_msg_msn in QPC, the QP is + * inactive. + */ + if (sq_idx != cfg->sq_idx) + return false; + } + + return true; +} + static inline int modify_qp_is_ok(struct hns_roce_qp *hr_qp) { return ((hr_qp->ibqp.qp_type == IB_QPT_RC || @@ -6991,6 +7040,7 @@ static const struct hns_roce_hw hns_roce_hw_v2 = { .set_hem = hns_roce_v2_set_hem, .clear_hem = hns_roce_v2_clear_hem, .set_dca_buf = hns_roce_v2_set_dca_buf, + .chk_dca_buf_inactive = hns_roce_v2_chk_dca_buf_inactive, .modify_qp = hns_roce_v2_modify_qp, .dereg_mr = hns_roce_v2_dereg_mr, .qp_flow_control_init = hns_roce_v2_qp_flow_control_init, diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index e42461d5ea4c..36868618cd51 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -791,7 +791,7 @@ static int alloc_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, if (ret) { ibdev_err(ibdev, "failed to create WQE mtr, ret = %d.\n", ret); if (dca_en) - hns_roce_disable_dca(hr_dev, hr_qp); + hns_roce_disable_dca(hr_dev, hr_qp, udata); }
return ret; @@ -803,7 +803,7 @@ static void free_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, hns_roce_mtr_destroy(hr_dev, &hr_qp->mtr);
if (hr_qp->en_flags & HNS_ROCE_QP_CAP_DCA) - hns_roce_disable_dca(hr_dev, hr_qp); + hns_roce_disable_dca(hr_dev, hr_qp, udata); }
static int alloc_qp_wqe(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h index ebaf917a30bc..96daeccfb7dd 100644 --- a/include/uapi/rdma/hns-abi.h +++ b/include/uapi/rdma/hns-abi.h @@ -144,6 +144,7 @@ enum hns_ib_dca_mem_methods { HNS_IB_METHOD_DCA_MEM_DEREG, HNS_IB_METHOD_DCA_MEM_SHRINK, HNS_IB_METHOD_DCA_MEM_ATTACH, + HNS_IB_METHOD_DCA_MEM_DETACH, };
enum hns_ib_dca_mem_reg_attrs { @@ -174,4 +175,9 @@ enum hns_ib_dca_mem_attach_attrs { HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_FLAGS, HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_PAGES, }; + +enum hns_ib_dca_mem_detach_attrs { + HNS_IB_ATTR_DCA_MEM_DETACH_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + HNS_IB_ATTR_DCA_MEM_DETACH_SQ_INDEX, +}; #endif /* HNS_ABI_USER_H */