From: Chengchang Tang tangchengchang@huawei.com
driver inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I63KVU
----------------------------------------------------------
This patch add DCA support for kernel space.
Signed-off-by: Chengchang Tang tangchengchang@huawei.com Reviewed-by: Yangyang Li liyangyang20@huawei.com Reviewed-by: YueHaibing yuehaibing@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- drivers/infiniband/hw/hns/hns_roce_dca.c | 1326 ++++++++++++------- drivers/infiniband/hw/hns/hns_roce_dca.h | 25 +- drivers/infiniband/hw/hns/hns_roce_device.h | 45 +- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 214 +-- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 4 + drivers/infiniband/hw/hns/hns_roce_main.c | 32 +- drivers/infiniband/hw/hns/hns_roce_mr.c | 12 +- drivers/infiniband/hw/hns/hns_roce_qp.c | 32 +- include/uapi/rdma/hns-abi.h | 3 +- 9 files changed, 1088 insertions(+), 605 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_dca.c b/drivers/infiniband/hw/hns/hns_roce_dca.c index 97450d8dcd88..f33a59ef3bc3 100644 --- a/drivers/infiniband/hw/hns/hns_roce_dca.c +++ b/drivers/infiniband/hw/hns/hns_roce_dca.c @@ -104,25 +104,71 @@ static inline bool dca_mem_is_available(struct dca_mem *mem) return mem->flags == (DCA_MEM_FLAGS_ALLOCED | DCA_MEM_FLAGS_REGISTERED); }
-static void *alloc_dca_pages(struct hns_roce_dev *hr_dev, struct dca_mem *mem, - struct dca_mem_attr *attr) +static void free_dca_pages(struct hns_roce_dev *hr_dev, bool is_user, + void *pages) +{ + if (is_user) + ib_umem_release(pages); + else + hns_roce_buf_free(hr_dev, pages); +} + +static void *alloc_dca_pages(struct hns_roce_dev *hr_dev, bool is_user, + struct dca_mem *mem, struct dca_mem_attr *attr) { struct ib_device *ibdev = &hr_dev->ib_dev; - struct ib_umem *umem; + struct hns_roce_buf *kmem; + + if (is_user) { + struct ib_umem *umem; + + umem = ib_umem_get(ibdev, attr->addr, attr->size, 0); + if (IS_ERR(umem)) { + ibdev_err(ibdev, "failed to get uDCA pages, ret = %ld.\n", + PTR_ERR(umem)); + return NULL; + }
- umem = ib_umem_get(ibdev, attr->addr, attr->size, 0); - if (IS_ERR(umem)) { - ibdev_err(ibdev, "failed to get uDCA pages, ret = %ld.\n", - PTR_ERR(umem)); + mem->page_count = ib_umem_num_dma_blocks(umem, + HNS_HW_PAGE_SIZE); + return umem; + } + + kmem = hns_roce_buf_alloc(hr_dev, attr->size, HNS_HW_PAGE_SHIFT, + HNS_ROCE_BUF_NOSLEEP | HNS_ROCE_BUF_NOFAIL); + if (IS_ERR(kmem)) { + ibdev_err(ibdev, "failed to alloc kDCA pages, ret = %ld.\n", + PTR_ERR(kmem)); return NULL; }
- mem->page_count = ib_umem_num_dma_blocks(umem, HNS_HW_PAGE_SIZE); + mem->page_count = kmem->npages; + /* Override the attr->size by actually alloced size */ + attr->size = kmem->ntrunks << kmem->trunk_shift; + return kmem; + +} + +static void init_dca_kmem_states(struct hns_roce_dev *hr_dev, + struct hns_dca_page_state *states, int count, + struct hns_roce_buf *kmem) +{ + dma_addr_t cur_addr; + dma_addr_t pre_addr; + int i; + + pre_addr = 0; + for (i = 0; i < kmem->npages && i < count; i++) { + cur_addr = hns_roce_buf_page(kmem, i); + if (cur_addr - pre_addr != HNS_HW_PAGE_SIZE) + states[i].head = 1;
- return umem; + pre_addr = cur_addr; + } }
-static void init_dca_umem_states(struct hns_dca_page_state *states, int count, +static void init_dca_umem_states(struct hns_roce_dev *hr_dev, + struct hns_dca_page_state *states, int count, struct ib_umem *umem) { struct ib_block_iter biter; @@ -144,7 +190,9 @@ static void init_dca_umem_states(struct hns_dca_page_state *states, int count, } }
-static struct hns_dca_page_state *alloc_dca_states(void *pages, int count) +static struct hns_dca_page_state *alloc_dca_states(struct hns_roce_dev *hr_dev, + void *pages, int count, + bool is_user) { struct hns_dca_page_state *states;
@@ -152,7 +200,10 @@ static struct hns_dca_page_state *alloc_dca_states(void *pages, int count) if (!states) return NULL;
- init_dca_umem_states(states, count, pages); + if (is_user) + init_dca_umem_states(hr_dev, states, count, pages); + else + init_dca_kmem_states(hr_dev, states, count, pages);
return states; } @@ -192,17 +243,143 @@ static void travel_dca_pages(struct hns_roce_dca_ctx *ctx, void *param, spin_unlock_irqrestore(&ctx->pool_lock, flags); }
-/* user DCA is managed by ucontext */ +struct dca_get_alloced_pages_attr { + u32 buf_id; + dma_addr_t *pages; + u32 total; + u32 max; +}; + +static int get_alloced_kmem_proc(struct dca_mem *mem, int index, void *param) + +{ + struct dca_get_alloced_pages_attr *attr = param; + struct hns_dca_page_state *states = mem->states; + struct hns_roce_buf *kmem = mem->pages; + u32 i; + + for (i = 0; i < kmem->npages; i++) { + if (dca_page_is_allocated(&states[i], attr->buf_id)) { + attr->pages[attr->total++] = hns_roce_buf_page(kmem, i); + if (attr->total >= attr->max) + return DCA_MEM_STOP_ITERATE; + } + } + + return DCA_MEM_NEXT_ITERATE; +} + +static int get_alloced_umem_proc(struct dca_mem *mem, int index, void *param) + +{ + struct dca_get_alloced_pages_attr *attr = param; + struct hns_dca_page_state *states = mem->states; + struct ib_umem *umem = mem->pages; + struct ib_block_iter biter; + u32 i = 0; + + rdma_for_each_block(umem->sg_head.sgl, &biter, + umem->sg_head.nents, HNS_HW_PAGE_SIZE) { + if (dca_page_is_allocated(&states[i], attr->buf_id)) { + attr->pages[attr->total++] = + rdma_block_iter_dma_address(&biter); + if (attr->total >= attr->max) + return DCA_MEM_STOP_ITERATE; + } + i++; + } + + return DCA_MEM_NEXT_ITERATE; +} + +/* user DCA is managed by ucontext, kernel DCA is managed by device */ +static inline struct hns_roce_dca_ctx * +to_hr_dca_ctx(struct hns_roce_dev *hr_dev, struct hns_roce_ucontext *uctx) +{ + return uctx ? &uctx->dca_ctx : &hr_dev->dca_ctx; +} + static inline struct hns_roce_dca_ctx * -to_hr_dca_ctx(struct hns_roce_ucontext *uctx) +hr_qp_to_dca_ctx(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) +{ + struct hns_roce_ucontext *uctx = NULL; + + if (hr_qp->ibqp.pd->uobject) + uctx = to_hr_ucontext(hr_qp->ibqp.pd->uobject->context); + + return to_hr_dca_ctx(hr_dev, uctx); +} + +static int config_dca_qpc(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp, dma_addr_t *pages, + int page_count) +{ + struct ib_device *ibdev = &hr_dev->ib_dev; + struct hns_roce_mtr *mtr = &hr_qp->mtr; + int ret; + + ret = hns_roce_mtr_map(hr_dev, mtr, pages, page_count); + if (ret) { + ibdev_err(ibdev, "failed to map DCA pages, ret = %d.\n", ret); + return ret; + } + + if (hr_dev->hw->set_dca_buf) { + ret = hr_dev->hw->set_dca_buf(hr_dev, hr_qp); + if (ret) { + ibdev_err(ibdev, "failed to set DCA to HW, ret = %d.\n", + ret); + return ret; + } + } + + return 0; +} + +static int setup_dca_buf_to_hw(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp, + struct hns_roce_dca_ctx *ctx, u32 buf_id, + u32 count) { - return &uctx->dca_ctx; + struct dca_get_alloced_pages_attr attr = {}; + dma_addr_t *pages; + int ret; + + /* alloc a tmp array to store buffer's dma address */ + pages = kvcalloc(count, sizeof(dma_addr_t), GFP_ATOMIC); + if (!pages) + return -ENOMEM; + + attr.buf_id = buf_id; + attr.pages = pages; + attr.max = count; + + if (hr_qp->ibqp.uobject) + travel_dca_pages(ctx, &attr, get_alloced_umem_proc); + else + travel_dca_pages(ctx, &attr, get_alloced_kmem_proc); + + if (attr.total != count) { + ibdev_err(&hr_dev->ib_dev, "failed to get DCA page %u != %u.\n", + attr.total, count); + ret = -ENOMEM; + goto err_get_pages; + } + + ret = config_dca_qpc(hr_dev, hr_qp, pages, count); +err_get_pages: + /* drop tmp array */ + kvfree(pages); + + return ret; }
-static void unregister_dca_mem(struct hns_roce_ucontext *uctx, +static void unregister_dca_mem(struct hns_roce_dev *hr_dev, + struct hns_roce_ucontext *uctx, struct dca_mem *mem) { - struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(uctx); + struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(hr_dev, uctx); + bool is_user = !!uctx; unsigned long flags; void *states, *pages;
@@ -224,24 +401,25 @@ static void unregister_dca_mem(struct hns_roce_ucontext *uctx, spin_unlock_irqrestore(&ctx->pool_lock, flags);
kfree(states); - ib_umem_release(pages); + free_dca_pages(hr_dev, is_user, pages); }
static int register_dca_mem(struct hns_roce_dev *hr_dev, struct hns_roce_ucontext *uctx, struct dca_mem *mem, struct dca_mem_attr *attr) { - struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(uctx); + struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(hr_dev, uctx); + bool is_user = !!uctx; void *states, *pages; unsigned long flags;
- pages = alloc_dca_pages(hr_dev, mem, attr); + pages = alloc_dca_pages(hr_dev, is_user, mem, attr); if (!pages) return -ENOMEM;
- states = alloc_dca_states(pages, mem->page_count); + states = alloc_dca_states(hr_dev, pages, mem->page_count, is_user); if (!states) { - ib_umem_release(pages); + free_dca_pages(hr_dev, is_user, pages); return -ENOMEM; }
@@ -263,266 +441,358 @@ static int register_dca_mem(struct hns_roce_dev *hr_dev, return 0; }
-struct dca_mem_shrink_attr { - u64 shrink_key; - u32 shrink_mems; +struct dca_page_clear_attr { + u32 buf_id; + u32 max_pages; + u32 clear_pages; };
-static int shrink_dca_page_proc(struct dca_mem *mem, int index, void *param) +static int clear_dca_pages_proc(struct dca_mem *mem, int index, void *param) { - struct dca_mem_shrink_attr *attr = param; - struct hns_dca_page_state *state; - int i, free_pages; - - free_pages = 0; - for (i = 0; i < mem->page_count; i++) { - state = &mem->states[i]; - if (dca_page_is_free(state)) - free_pages++; - } - - /* No pages are in use */ - if (free_pages == mem->page_count) { - /* unregister first empty DCA mem */ - if (!attr->shrink_mems) { - mem->flags &= ~DCA_MEM_FLAGS_REGISTERED; - attr->shrink_key = mem->key; - } + struct hns_dca_page_state *state = &mem->states[index]; + struct dca_page_clear_attr *attr = param;
- attr->shrink_mems++; + if (dca_page_is_attached(state, attr->buf_id)) { + set_dca_page_to_free(state); + attr->clear_pages++; }
- if (attr->shrink_mems > 1) + if (attr->clear_pages >= attr->max_pages) return DCA_MEM_STOP_ITERATE; else - return DCA_MEM_NEXT_ITERATE; + return 0; }
-static void shrink_dca_mem(struct hns_roce_dev *hr_dev, - struct hns_roce_ucontext *uctx, u64 reserved_size, - struct hns_dca_shrink_resp *resp) +static void clear_dca_pages(struct hns_roce_dca_ctx *ctx, u32 buf_id, u32 count) { - struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(uctx); - struct dca_mem_shrink_attr attr = {}; - unsigned long flags; - bool need_shink; - - spin_lock_irqsave(&ctx->pool_lock, flags); - need_shink = ctx->free_mems > 0 && ctx->free_size > reserved_size; - spin_unlock_irqrestore(&ctx->pool_lock, flags); - if (!need_shink) - return; + struct dca_page_clear_attr attr = {};
- travel_dca_pages(ctx, &attr, shrink_dca_page_proc); - resp->free_mems = attr.shrink_mems; - resp->free_key = attr.shrink_key; + attr.buf_id = buf_id; + attr.max_pages = count; + travel_dca_pages(ctx, &attr, clear_dca_pages_proc); }
-static void init_dca_context(struct hns_roce_dca_ctx *ctx) +struct dca_page_assign_attr { + u32 buf_id; + int unit; + int total; + int max; +}; + +static bool dca_page_is_allocable(struct hns_dca_page_state *state, bool head) { - INIT_LIST_HEAD(&ctx->pool); - spin_lock_init(&ctx->pool_lock); - ctx->total_size = 0; + bool is_free = dca_page_is_free(state) || dca_page_is_inactive(state); + + return head ? is_free : is_free && !state->head; }
-static void cleanup_dca_context(struct hns_roce_dev *hr_dev, - struct hns_roce_dca_ctx *ctx) +static int assign_dca_pages_proc(struct dca_mem *mem, int index, void *param) { - struct dca_mem *mem, *tmp; - unsigned long flags; + struct dca_page_assign_attr *attr = param; + struct hns_dca_page_state *state; + int checked_pages = 0; + int start_index = 0; + int free_pages = 0; + int i;
- spin_lock_irqsave(&ctx->pool_lock, flags); - list_for_each_entry_safe(mem, tmp, &ctx->pool, list) { - list_del(&mem->list); - mem->flags = 0; - spin_unlock_irqrestore(&ctx->pool_lock, flags); + /* Check the continuous pages count is not smaller than unit count */ + for (i = index; free_pages < attr->unit && i < mem->page_count; i++) { + checked_pages++; + state = &mem->states[i]; + if (dca_page_is_allocable(state, free_pages == 0)) { + if (free_pages == 0) + start_index = i;
- kfree(mem->states); - ib_umem_release(mem->pages); - kfree(mem); + free_pages++; + } else { + free_pages = 0; + } + }
- spin_lock_irqsave(&ctx->pool_lock, flags); + if (free_pages < attr->unit) + return DCA_MEM_NEXT_ITERATE; + + for (i = 0; i < free_pages; i++) { + state = &mem->states[start_index + i]; + lock_dca_page_to_attach(state, attr->buf_id); + attr->total++; } - ctx->total_size = 0; - spin_unlock_irqrestore(&ctx->pool_lock, flags); -}
-void hns_roce_register_udca(struct hns_roce_dev *hr_dev, - struct hns_roce_ucontext *uctx) -{ - if (!(uctx->config & HNS_ROCE_UCTX_CONFIG_DCA)) - return; + if (attr->total >= attr->max) + return DCA_MEM_STOP_ITERATE;
- init_dca_context(&uctx->dca_ctx); + return checked_pages; }
-void hns_roce_unregister_udca(struct hns_roce_dev *hr_dev, - struct hns_roce_ucontext *uctx) +static u32 assign_dca_pages(struct hns_roce_dca_ctx *ctx, u32 buf_id, u32 count, + u32 unit) { - if (!(uctx->config & HNS_ROCE_UCTX_CONFIG_DCA)) - return; + struct dca_page_assign_attr attr = {};
- cleanup_dca_context(hr_dev, &uctx->dca_ctx); + attr.buf_id = buf_id; + attr.unit = unit; + attr.max = count; + travel_dca_pages(ctx, &attr, assign_dca_pages_proc); + return attr.total; }
-static struct dca_mem *alloc_dca_mem(struct hns_roce_dca_ctx *ctx) -{ - struct dca_mem *mem, *tmp, *found = NULL; - unsigned long flags; - - spin_lock_irqsave(&ctx->pool_lock, flags); - list_for_each_entry_safe(mem, tmp, &ctx->pool, list) { - spin_lock(&mem->lock); - if (!mem->flags) { - found = mem; - mem->flags |= DCA_MEM_FLAGS_ALLOCED; - spin_unlock(&mem->lock); - break; +struct dca_page_active_attr { + u32 buf_id; + u32 max_pages; + u32 alloc_pages; + u32 dirty_mems; +}; + +static int active_dca_pages_proc(struct dca_mem *mem, int index, void *param) +{ + struct dca_page_active_attr *attr = param; + struct hns_dca_page_state *state; + bool changed = false; + bool stop = false; + int i, free_pages; + + free_pages = 0; + for (i = 0; !stop && i < mem->page_count; i++) { + state = &mem->states[i]; + if (dca_page_is_free(state)) { + free_pages++; + } else if (dca_page_is_allocated(state, attr->buf_id)) { + free_pages++; + /* Change matched pages state */ + unlock_dca_page_to_active(state, attr->buf_id); + changed = true; + attr->alloc_pages++; + if (attr->alloc_pages == attr->max_pages) + stop = true; } - spin_unlock(&mem->lock); } - spin_unlock_irqrestore(&ctx->pool_lock, flags);
- if (found) - return found; + for (; changed && i < mem->page_count; i++) + if (dca_page_is_free(state)) + free_pages++;
- mem = kzalloc(sizeof(*mem), GFP_NOWAIT); - if (!mem) - return NULL; + /* Clean mem changed to dirty */ + if (changed && free_pages == mem->page_count) + attr->dirty_mems++;
- spin_lock_init(&mem->lock); - INIT_LIST_HEAD(&mem->list); + return stop ? DCA_MEM_STOP_ITERATE : DCA_MEM_NEXT_ITERATE; +}
- mem->flags |= DCA_MEM_FLAGS_ALLOCED; +static u32 active_dca_pages(struct hns_roce_dca_ctx *ctx, u32 buf_id, u32 count) +{ + struct dca_page_active_attr attr = {}; + unsigned long flags;
+ attr.buf_id = buf_id; + attr.max_pages = count; + travel_dca_pages(ctx, &attr, active_dca_pages_proc); + + /* Update free size */ spin_lock_irqsave(&ctx->pool_lock, flags); - list_add(&mem->list, &ctx->pool); + ctx->free_mems -= attr.dirty_mems; + ctx->free_size -= attr.alloc_pages << HNS_HW_PAGE_SHIFT; spin_unlock_irqrestore(&ctx->pool_lock, flags);
- return mem; -} - -static void free_dca_mem(struct dca_mem *mem) -{ - /* We cannot hold the whole pool's lock during the DCA is working - * until cleanup the context in cleanup_dca_context(), so we just - * set the DCA mem state as free when destroying DCA mem object. - */ - spin_lock(&mem->lock); - mem->flags = 0; - spin_unlock(&mem->lock); -} - -static inline struct hns_roce_dca_ctx *hr_qp_to_dca_ctx(struct hns_roce_qp *qp) -{ - return to_hr_dca_ctx(to_hr_ucontext(qp->ibqp.pd->uobject->context)); + return attr.alloc_pages; }
-struct dca_page_clear_attr { +struct dca_page_query_active_attr { u32 buf_id; - u32 max_pages; - u32 clear_pages; + u32 curr_index; + u32 start_index; + u32 page_index; + u32 page_count; + u64 mem_key; };
-static int clear_dca_pages_proc(struct dca_mem *mem, int index, void *param) +static int query_dca_active_pages_proc(struct dca_mem *mem, int index, + void *param) { struct hns_dca_page_state *state = &mem->states[index]; - struct dca_page_clear_attr *attr = param; + struct dca_page_query_active_attr *attr = param;
- if (dca_page_is_attached(state, attr->buf_id)) { - set_dca_page_to_free(state); - attr->clear_pages++; - } + if (!dca_page_is_active(state, attr->buf_id)) + return 0;
- if (attr->clear_pages >= attr->max_pages) - return DCA_MEM_STOP_ITERATE; - else + if (attr->curr_index < attr->start_index) { + attr->curr_index++; return 0; + } else if (attr->curr_index > attr->start_index) { + return DCA_MEM_STOP_ITERATE; + } + + /* Search first page in DCA mem */ + attr->page_index = index; + attr->mem_key = mem->key; + /* Search active pages in continuous addresses */ + while (index < mem->page_count) { + state = &mem->states[index]; + if (!dca_page_is_active(state, attr->buf_id)) + break; + + index++; + attr->page_count++; + } + + return DCA_MEM_STOP_ITERATE; }
-static void clear_dca_pages(struct hns_roce_dca_ctx *ctx, u32 buf_id, u32 count) +static int sync_dca_buf_offset(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp, + struct hns_dca_attach_attr *attr) { - struct dca_page_clear_attr attr = {}; + struct ib_device *ibdev = &hr_dev->ib_dev;
- attr.buf_id = buf_id; - attr.max_pages = count; - travel_dca_pages(ctx, &attr, clear_dca_pages_proc); + if (hr_qp->sq.wqe_cnt > 0) { + if (attr->sq_offset >= hr_qp->sge.offset) { + ibdev_err(ibdev, "failed to check SQ offset = %u\n", + attr->sq_offset); + return -EINVAL; + } + hr_qp->sq.wqe_offset = hr_qp->sq.offset + attr->sq_offset; + } + + if (hr_qp->sge.sge_cnt > 0) { + if (attr->sge_offset >= hr_qp->rq.offset) { + ibdev_err(ibdev, "failed to check exSGE offset = %u\n", + attr->sge_offset); + return -EINVAL; + } + hr_qp->sge.wqe_offset = hr_qp->sge.offset + attr->sge_offset; + } + + if (hr_qp->rq.wqe_cnt > 0) { + if (attr->rq_offset >= hr_qp->buff_size) { + ibdev_err(ibdev, "failed to check RQ offset = %u\n", + attr->rq_offset); + return -EINVAL; + } + hr_qp->rq.wqe_offset = hr_qp->rq.offset + attr->rq_offset; + } + + return 0; }
-struct dca_page_assign_attr { +static u32 alloc_buf_from_dca_mem(struct hns_roce_qp *hr_qp, + struct hns_roce_dca_ctx *ctx) +{ + u32 buf_pages, unit_pages, alloc_pages; u32 buf_id; - int unit; - int total; - int max; -};
-static bool dca_page_is_allocable(struct hns_dca_page_state *state, bool head) -{ - bool is_free = dca_page_is_free(state) || dca_page_is_inactive(state); + buf_pages = hr_qp->dca_cfg.npages; + /* Gen new buf id */ + buf_id = HNS_DCA_TO_BUF_ID(hr_qp->qpn, hr_qp->dca_cfg.attach_count);
- return head ? is_free : is_free && !state->head; + /* Assign pages from free pages */ + unit_pages = hr_qp->mtr.hem_cfg.is_direct ? buf_pages : 1; + alloc_pages = assign_dca_pages(ctx, buf_id, buf_pages, unit_pages); + if (buf_pages != alloc_pages) { + if (alloc_pages > 0) + clear_dca_pages(ctx, buf_id, alloc_pages); + return HNS_DCA_INVALID_BUF_ID; + } + return buf_id; }
-static int assign_dca_pages_proc(struct dca_mem *mem, int index, void *param) +static int active_alloced_buf(struct hns_roce_qp *hr_qp, + struct hns_roce_dca_ctx *ctx, + struct hns_dca_attach_attr *attr, u32 buf_id) { - struct dca_page_assign_attr *attr = param; - struct hns_dca_page_state *state; - int checked_pages = 0; - int start_index = 0; - int free_pages = 0; - int i; - - /* Check the continuous pages count is not smaller than unit count */ - for (i = index; free_pages < attr->unit && i < mem->page_count; i++) { - checked_pages++; - state = &mem->states[i]; - if (dca_page_is_allocable(state, free_pages == 0)) { - if (free_pages == 0) - start_index = i; + struct hns_roce_dev *hr_dev = to_hr_dev(hr_qp->ibqp.device); + struct ib_device *ibdev = &hr_dev->ib_dev; + u32 active_pages, alloc_pages; + int ret;
- free_pages++; - } else { - free_pages = 0; - } + alloc_pages = hr_qp->dca_cfg.npages; + ret = sync_dca_buf_offset(hr_dev, hr_qp, attr); + if (ret) { + ibdev_err(ibdev, "failed to sync DCA offset, ret = %d\n", ret); + goto active_fail; }
- if (free_pages < attr->unit) - return DCA_MEM_NEXT_ITERATE; + ret = setup_dca_buf_to_hw(hr_dev, hr_qp, ctx, buf_id, alloc_pages); + if (ret) { + ibdev_err(ibdev, "failed to setup DCA buf, ret = %d.\n", ret); + goto active_fail; + }
- for (i = 0; i < free_pages; i++) { - state = &mem->states[start_index + i]; - lock_dca_page_to_attach(state, attr->buf_id); - attr->total++; + active_pages = active_dca_pages(ctx, buf_id, alloc_pages); + if (active_pages != alloc_pages) { + ibdev_err(ibdev, "failed to active DCA pages, %u != %u.\n", + active_pages, alloc_pages); + ret = -ENOBUFS; + goto active_fail; }
- if (attr->total >= attr->max) - return DCA_MEM_STOP_ITERATE; + return 0;
- return checked_pages; +active_fail: + clear_dca_pages(ctx, buf_id, alloc_pages); + return ret; }
-static u32 assign_dca_pages(struct hns_roce_dca_ctx *ctx, u32 buf_id, u32 count, - u32 unit) +static int attach_dca_mem(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp, + struct hns_dca_attach_attr *attr, + struct hns_dca_attach_resp *resp) { - struct dca_page_assign_attr attr = {}; + struct hns_roce_dca_ctx *ctx = hr_qp_to_dca_ctx(hr_dev, hr_qp); + struct hns_roce_dca_cfg *cfg = &hr_qp->dca_cfg; + u32 buf_id; + int ret;
- attr.buf_id = buf_id; - attr.unit = unit; - attr.max = count; - travel_dca_pages(ctx, &attr, assign_dca_pages_proc); - return attr.total; + /* Stop DCA mem ageing worker */ + cancel_delayed_work(&cfg->dwork); + resp->alloc_flags = 0; + + spin_lock(&cfg->lock); + buf_id = cfg->buf_id; + /* Already attached */ + if (buf_id != HNS_DCA_INVALID_BUF_ID) { + resp->alloc_pages = cfg->npages; + spin_unlock(&cfg->lock); + return 0; + } + + /* Start to new attach */ + resp->alloc_pages = 0; + buf_id = alloc_buf_from_dca_mem(hr_qp, ctx); + if (buf_id == HNS_DCA_INVALID_BUF_ID) { + spin_unlock(&cfg->lock); + /* No report fail, need try again after the pool increased */ + return 0; + } + + ret = active_alloced_buf(hr_qp, ctx, attr, buf_id); + if (ret) { + spin_unlock(&cfg->lock); + ibdev_err(&hr_dev->ib_dev, + "failed to active DCA buf for QP-%lu, ret = %d.\n", + hr_qp->qpn, ret); + return ret; + } + + /* Attach ok */ + cfg->buf_id = buf_id; + cfg->attach_count++; + spin_unlock(&cfg->lock); + + resp->alloc_flags |= HNS_DCA_ATTACH_FLAGS_NEW_BUFFER; + resp->alloc_pages = cfg->npages; + + return 0; }
-struct dca_page_active_attr { +struct dca_page_free_buf_attr { u32 buf_id; u32 max_pages; - u32 alloc_pages; - u32 dirty_mems; + u32 free_pages; + u32 clean_mems; };
-static int active_dca_pages_proc(struct dca_mem *mem, int index, void *param) +static int free_buffer_pages_proc(struct dca_mem *mem, int index, void *param) { - struct dca_page_active_attr *attr = param; + struct dca_page_free_buf_attr *attr = param; struct hns_dca_page_state *state; bool changed = false; bool stop = false; @@ -531,360 +801,453 @@ static int active_dca_pages_proc(struct dca_mem *mem, int index, void *param) free_pages = 0; for (i = 0; !stop && i < mem->page_count; i++) { state = &mem->states[i]; - if (dca_page_is_free(state)) { - free_pages++; - } else if (dca_page_is_allocated(state, attr->buf_id)) { - free_pages++; - /* Change matched pages state */ - unlock_dca_page_to_active(state, attr->buf_id); + /* Change matched pages state */ + if (dca_page_is_attached(state, attr->buf_id)) { + set_dca_page_to_free(state); changed = true; - attr->alloc_pages++; - if (attr->alloc_pages == attr->max_pages) + attr->free_pages++; + if (attr->free_pages == attr->max_pages) stop = true; } + + if (dca_page_is_free(state)) + free_pages++; }
for (; changed && i < mem->page_count; i++) if (dca_page_is_free(state)) free_pages++;
- /* Clean mem changed to dirty */ if (changed && free_pages == mem->page_count) - attr->dirty_mems++; + attr->clean_mems++;
return stop ? DCA_MEM_STOP_ITERATE : DCA_MEM_NEXT_ITERATE; }
-static u32 active_dca_pages(struct hns_roce_dca_ctx *ctx, u32 buf_id, u32 count) +static void free_buf_from_dca_mem(struct hns_roce_dca_ctx *ctx, + struct hns_roce_dca_cfg *cfg) { - struct dca_page_active_attr attr = {}; + struct dca_page_free_buf_attr attr = {}; unsigned long flags; + u32 buf_id; + + spin_lock(&cfg->lock); + buf_id = cfg->buf_id; + cfg->buf_id = HNS_DCA_INVALID_BUF_ID; + spin_unlock(&cfg->lock); + if (buf_id == HNS_DCA_INVALID_BUF_ID) + return;
attr.buf_id = buf_id; - attr.max_pages = count; - travel_dca_pages(ctx, &attr, active_dca_pages_proc); + attr.max_pages = cfg->npages; + travel_dca_pages(ctx, &attr, free_buffer_pages_proc);
/* Update free size */ spin_lock_irqsave(&ctx->pool_lock, flags); - ctx->free_mems -= attr.dirty_mems; - ctx->free_size -= attr.alloc_pages << HNS_HW_PAGE_SHIFT; + ctx->free_mems += attr.clean_mems; + ctx->free_size += attr.free_pages << HNS_HW_PAGE_SHIFT; spin_unlock_irqrestore(&ctx->pool_lock, flags); +}
- return attr.alloc_pages; +static void detach_dca_mem(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp, + struct hns_dca_detach_attr *attr) +{ + struct hns_roce_dca_cfg *cfg = &hr_qp->dca_cfg; + + /* Start an ageing worker to free buffer */ + cancel_delayed_work(&cfg->dwork); + spin_lock(&cfg->lock); + cfg->sq_idx = attr->sq_idx; + queue_delayed_work(hr_dev->irq_workq, &cfg->dwork, + msecs_to_jiffies(DCA_MEM_AGEING_MSES)); + spin_unlock(&cfg->lock); }
-struct dca_get_alloced_pages_attr { - u32 buf_id; - dma_addr_t *pages; - u32 total; - u32 max; +struct dca_mem_shrink_attr { + u64 shrink_key; + u32 shrink_mems; };
-static int get_alloced_umem_proc(struct dca_mem *mem, int index, void *param) - +static int shrink_dca_page_proc(struct dca_mem *mem, int index, void *param) { - struct dca_get_alloced_pages_attr *attr = param; - struct hns_dca_page_state *states = mem->states; - struct ib_umem *umem = mem->pages; - struct ib_block_iter biter; - u32 i = 0; + struct dca_mem_shrink_attr *attr = param; + struct hns_dca_page_state *state; + int i, free_pages;
- rdma_for_each_block(umem->sg_head.sgl, &biter, umem->nmap, - HNS_HW_PAGE_SIZE) { - if (dca_page_is_allocated(&states[i], attr->buf_id)) { - attr->pages[attr->total++] = - rdma_block_iter_dma_address(&biter); - if (attr->total >= attr->max) - return DCA_MEM_STOP_ITERATE; + free_pages = 0; + for (i = 0; i < mem->page_count; i++) { + state = &mem->states[i]; + if (dca_page_is_free(state)) + free_pages++; + } + + /* No any page be used */ + if (free_pages == mem->page_count) { + /* unregister first empty DCA mem */ + if (!attr->shrink_mems) { + mem->flags &= ~DCA_MEM_FLAGS_REGISTERED; + attr->shrink_key = mem->key; } - i++; + + attr->shrink_mems++; }
- return DCA_MEM_NEXT_ITERATE; + if (attr->shrink_mems > 1) + return DCA_MEM_STOP_ITERATE; + else + return DCA_MEM_NEXT_ITERATE; }
-static int apply_dca_cfg(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, - struct hns_dca_attach_attr *attach_attr) +struct hns_dca_shrink_resp { + u64 free_key; + u32 free_mems; +}; + +static void shrink_dca_mem(struct hns_roce_dev *hr_dev, + struct hns_roce_ucontext *uctx, u64 reserved_size, + struct hns_dca_shrink_resp *resp) { - struct hns_roce_dca_attr attr; + struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(hr_dev, uctx); + struct dca_mem_shrink_attr attr = {}; + unsigned long flags; + bool need_shink;
- if (hr_dev->hw->set_dca_buf) { - attr.sq_offset = attach_attr->sq_offset; - attr.sge_offset = attach_attr->sge_offset; - attr.rq_offset = attach_attr->rq_offset; - return hr_dev->hw->set_dca_buf(hr_dev, hr_qp, &attr); - } + spin_lock_irqsave(&ctx->pool_lock, flags); + need_shink = ctx->free_mems > 0 && ctx->free_size > reserved_size; + spin_unlock_irqrestore(&ctx->pool_lock, flags); + if (!need_shink) + return;
- return 0; + travel_dca_pages(ctx, &attr, shrink_dca_page_proc); + resp->free_mems = attr.shrink_mems; + resp->free_key = attr.shrink_key; }
-static int setup_dca_buf_to_hw(struct hns_roce_dca_ctx *ctx, - struct hns_roce_qp *hr_qp, u32 buf_id, - struct hns_dca_attach_attr *attach_attr) +static void init_dca_context(struct hns_roce_dca_ctx *ctx) { - struct hns_roce_dev *hr_dev = to_hr_dev(hr_qp->ibqp.device); - struct dca_get_alloced_pages_attr attr = {}; - struct ib_device *ibdev = &hr_dev->ib_dev; - u32 count = hr_qp->dca_cfg.npages; - dma_addr_t *pages; - int ret; + INIT_LIST_HEAD(&ctx->pool); + spin_lock_init(&ctx->pool_lock); + ctx->total_size = 0; +}
- /* Alloc a tmp array to store buffer's dma address */ - pages = kvcalloc(count, sizeof(dma_addr_t), GFP_NOWAIT); - if (!pages) - return -ENOMEM; +static void cleanup_dca_context(struct hns_roce_dev *hr_dev, + struct hns_roce_dca_ctx *ctx) +{ + struct dca_mem *mem, *tmp; + unsigned long flags; + bool is_user;
- attr.buf_id = buf_id; - attr.pages = pages; - attr.max = count; + is_user = (ctx != &hr_dev->dca_ctx); + spin_lock_irqsave(&ctx->pool_lock, flags); + list_for_each_entry_safe(mem, tmp, &ctx->pool, list) { + list_del(&mem->list); + spin_lock(&mem->lock); + mem->flags = 0; + spin_unlock(&mem->lock); + spin_unlock_irqrestore(&ctx->pool_lock, flags);
- travel_dca_pages(ctx, &attr, get_alloced_umem_proc); - if (attr.total != count) { - ibdev_err(ibdev, "failed to get DCA page %u != %u.\n", - attr.total, count); - ret = -ENOMEM; - goto done; - } + kfree(mem->states); + free_dca_pages(hr_dev, is_user, mem->pages); + kfree(mem);
- /* Update MTT for ROCEE addressing */ - ret = hns_roce_mtr_map(hr_dev, &hr_qp->mtr, pages, count); - if (ret) { - ibdev_err(ibdev, "failed to map DCA pages, ret = %d.\n", ret); - goto done; + spin_lock_irqsave(&ctx->pool_lock, flags); } + ctx->total_size = 0; + spin_unlock_irqrestore(&ctx->pool_lock, flags); +}
- /* Apply the changes for WQE address */ - ret = apply_dca_cfg(hr_dev, hr_qp, attach_attr); - if (ret) - ibdev_err(ibdev, "failed to apply DCA cfg, ret = %d.\n", ret); +#define DCA_MAX_MEM_SIZE ~0UL
-done: - /* Drop tmp array */ - kvfree(pages); - return ret; +static uint dca_unit_size; +static ulong dca_min_size = DCA_MAX_MEM_SIZE; +static ulong dca_max_size = DCA_MAX_MEM_SIZE; + +static void config_kdca_context(struct hns_roce_dca_ctx *ctx) +{ + unsigned int unit_size; + + unit_size = ALIGN(dca_unit_size, PAGE_SIZE); + ctx->unit_size = unit_size; + if (!unit_size) + return; + + if (dca_max_size == DCA_MAX_MEM_SIZE || dca_max_size == 0) + ctx->max_size = DCA_MAX_MEM_SIZE; + else + ctx->max_size = roundup(dca_max_size, unit_size); + + if (dca_min_size == DCA_MAX_MEM_SIZE) + ctx->min_size = ctx->max_size; + else + ctx->min_size = roundup(dca_min_size, unit_size); }
-static u32 alloc_buf_from_dca_mem(struct hns_roce_qp *hr_qp, - struct hns_roce_dca_ctx *ctx) +void hns_roce_init_dca(struct hns_roce_dev *hr_dev) { - u32 buf_pages, unit_pages, alloc_pages; - u32 buf_id; + init_dca_context(&hr_dev->dca_ctx); + + config_kdca_context(&hr_dev->dca_ctx); +} + +void hns_roce_cleanup_dca(struct hns_roce_dev *hr_dev) +{ + cleanup_dca_context(hr_dev, &hr_dev->dca_ctx); +} + +void hns_roce_register_udca(struct hns_roce_dev *hr_dev, + struct hns_roce_ucontext *uctx) +{ + if (!(uctx->config & HNS_ROCE_UCTX_CONFIG_DCA)) + return; + + init_dca_context(&uctx->dca_ctx); +} + +void hns_roce_unregister_udca(struct hns_roce_dev *hr_dev, + struct hns_roce_ucontext *uctx) +{ + if (!(uctx->config & HNS_ROCE_UCTX_CONFIG_DCA)) + return; + + cleanup_dca_context(hr_dev, &uctx->dca_ctx); +} + +static struct dca_mem *key_to_dca_mem(struct list_head *head, u64 key) +{ + struct dca_mem *mem; + + list_for_each_entry(mem, head, list) + if (mem->key == key) + return mem; + + return NULL; +} + +static bool add_dca_mem_enabled(struct hns_roce_dca_ctx *ctx, u32 alloc_size) +{ + unsigned long flags; + bool enable; + + spin_lock_irqsave(&ctx->pool_lock, flags); + + /* Pool size no limit */ + if (ctx->max_size == DCA_MAX_MEM_SIZE) + enable = true; + else /* Pool size not exceed max size */ + enable = (ctx->total_size + alloc_size) < ctx->max_size; + + spin_unlock_irqrestore(&ctx->pool_lock, flags);
- buf_pages = hr_qp->dca_cfg.npages; - /* Gen new buf id */ - buf_id = HNS_DCA_TO_BUF_ID(hr_qp->qpn, hr_qp->dca_cfg.attach_count); + return enable; +}
- /* Assign pages from free pages */ - unit_pages = hr_qp->mtr.hem_cfg.is_direct ? buf_pages : 1; - alloc_pages = assign_dca_pages(ctx, buf_id, buf_pages, unit_pages); - if (buf_pages != alloc_pages) { - if (alloc_pages > 0) - clear_dca_pages(ctx, buf_id, alloc_pages); - return HNS_DCA_INVALID_BUF_ID; - } +static bool shrink_dca_mem_enabled(struct hns_roce_dca_ctx *ctx) +{ + unsigned long flags; + bool enable;
- return buf_id; + spin_lock_irqsave(&ctx->pool_lock, flags); + enable = ctx->total_size > 0 && ctx->min_size < ctx->max_size; + spin_unlock_irqrestore(&ctx->pool_lock, flags); + + return enable; }
-static int active_alloced_buf(struct hns_roce_qp *hr_qp, - struct hns_roce_dca_ctx *ctx, - struct hns_dca_attach_attr *attr, u32 buf_id) +static struct dca_mem *alloc_dca_mem(struct hns_roce_dca_ctx *ctx) { - struct hns_roce_dev *hr_dev = to_hr_dev(hr_qp->ibqp.device); - struct ib_device *ibdev = &hr_dev->ib_dev; - u32 active_pages, alloc_pages; - int ret; + struct dca_mem *mem, *tmp, *found = NULL; + unsigned long flags;
- ret = setup_dca_buf_to_hw(ctx, hr_qp, buf_id, attr); - if (ret) { - ibdev_err(ibdev, "failed to setup DCA buf, ret = %d.\n", ret); - goto active_fail; + spin_lock_irqsave(&ctx->pool_lock, flags); + list_for_each_entry_safe(mem, tmp, &ctx->pool, list) { + spin_lock(&mem->lock); + if (!mem->flags) { + found = mem; + mem->flags |= DCA_MEM_FLAGS_ALLOCED; + spin_unlock(&mem->lock); + break; + } + spin_unlock(&mem->lock); } + spin_unlock_irqrestore(&ctx->pool_lock, flags);
- alloc_pages = hr_qp->dca_cfg.npages; - active_pages = active_dca_pages(ctx, buf_id, alloc_pages); - if (active_pages != alloc_pages) { - ibdev_err(ibdev, "failed to active DCA pages, %u != %u.\n", - active_pages, alloc_pages); - ret = -ENOBUFS; - goto active_fail; - } + if (found) + return found;
- return 0; + mem = kzalloc(sizeof(*mem), GFP_ATOMIC); + if (!mem) + return NULL;
-active_fail: - clear_dca_pages(ctx, buf_id, alloc_pages); - return ret; + spin_lock_init(&mem->lock); + INIT_LIST_HEAD(&mem->list); + + mem->flags |= DCA_MEM_FLAGS_ALLOCED; + + spin_lock_irqsave(&ctx->pool_lock, flags); + list_add(&mem->list, &ctx->pool); + spin_unlock_irqrestore(&ctx->pool_lock, flags); + return mem; }
-static int attach_dca_mem(struct hns_roce_dev *hr_dev, - struct hns_roce_qp *hr_qp, - struct hns_dca_attach_attr *attr, - struct hns_dca_attach_resp *resp) +static void free_dca_mem(struct dca_mem *mem) { - struct hns_roce_dca_ctx *ctx = hr_qp_to_dca_ctx(hr_qp); - struct hns_roce_dca_cfg *cfg = &hr_qp->dca_cfg; - u32 buf_id; - int ret; + /* When iterate all DCA mems in travel_dca_pages(), we will NOT hold the + * pool's lock and just set the DCA mem as free state during the DCA is + * working until cleanup the DCA context in hns_roce_cleanup_dca(). + */ + spin_lock(&mem->lock); + mem->flags = 0; + spin_unlock(&mem->lock); +}
- /* Stop DCA mem ageing worker */ - cancel_delayed_work(&cfg->dwork); - resp->alloc_flags = 0; +static int add_dca_mem(struct hns_roce_dev *hr_dev, u32 new_size) +{ + struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(hr_dev, NULL); + struct dca_mem_attr attr = {}; + struct dca_mem *mem = NULL; + int ret;
- spin_lock(&cfg->lock); - buf_id = cfg->buf_id; - /* Already attached */ - if (buf_id != HNS_DCA_INVALID_BUF_ID) { - resp->alloc_pages = cfg->npages; - spin_unlock(&cfg->lock); - return 0; - } + if (!add_dca_mem_enabled(ctx, new_size)) + return -ENOMEM;
- /* Start to new attach */ - resp->alloc_pages = 0; - buf_id = alloc_buf_from_dca_mem(hr_qp, ctx); - if (buf_id == HNS_DCA_INVALID_BUF_ID) { - spin_unlock(&cfg->lock); - /* No report fail, need try again after the pool increased */ - return 0; - } + /* Add new DCA mem */ + mem = alloc_dca_mem(ctx); + if (!mem) + return -ENOMEM;
- ret = active_alloced_buf(hr_qp, ctx, attr, buf_id); + attr.key = (u64)mem; + attr.size = roundup(new_size, ctx->unit_size); + ret = register_dca_mem(hr_dev, NULL, mem, &attr); if (ret) { - spin_unlock(&cfg->lock); + free_dca_mem(mem); ibdev_err(&hr_dev->ib_dev, - "failed to active DCA buf for QP-%lu, ret = %d.\n", - hr_qp->qpn, ret); - return ret; + "failed to register DCA mem, ret = %d.\n", ret); }
- /* Attach ok */ - cfg->buf_id = buf_id; - cfg->attach_count++; - spin_unlock(&cfg->lock); - - resp->alloc_flags |= HNS_IB_ATTACH_FLAGS_NEW_BUFFER; - resp->alloc_pages = cfg->npages; - - return 0; + return ret; }
-struct dca_page_query_active_attr { +struct dca_page_get_active_buf_attr { u32 buf_id; - u32 curr_index; - u32 start_index; - u32 page_index; - u32 page_count; - u64 mem_key; + void **buf_list; + u32 total; + u32 max; };
-static int query_dca_active_pages_proc(struct dca_mem *mem, int index, - void *param) +static int get_active_kbuf_proc(struct dca_mem *mem, int index, void *param) { - struct hns_dca_page_state *state = &mem->states[index]; - struct dca_page_query_active_attr *attr = param; - - if (!dca_page_is_active(state, attr->buf_id)) - return 0; - - if (attr->curr_index < attr->start_index) { - attr->curr_index++; - return 0; - } else if (attr->curr_index > attr->start_index) { - return DCA_MEM_STOP_ITERATE; + struct dca_page_get_active_buf_attr *attr = param; + struct hns_dca_page_state *states = mem->states; + struct hns_roce_buf *kmem = mem->pages; + void *buf; + u32 i; + + for (i = 0; i < kmem->npages; i++) { + if (!dca_page_is_active(&states[i], attr->buf_id)) + continue; + + buf = hns_roce_buf_offset(kmem, i << HNS_HW_PAGE_SHIFT); + attr->buf_list[attr->total++] = buf; + if (attr->total >= attr->max) + return DCA_MEM_STOP_ITERATE; }
- /* Search first page in DCA mem */ - attr->page_index = index; - attr->mem_key = mem->key; - /* Search active pages in continuous addresses */ - while (index < mem->page_count) { - state = &mem->states[index]; - if (!dca_page_is_active(state, attr->buf_id)) - break; + return DCA_MEM_NEXT_ITERATE; +}
- index++; - attr->page_count++; - } +static int setup_dca_buf_list(struct hns_roce_dca_ctx *ctx, + struct hns_roce_dca_cfg *cfg) +{ + struct dca_page_get_active_buf_attr attr = {};
- return DCA_MEM_STOP_ITERATE; -} + attr.buf_id = cfg->buf_id; + attr.buf_list = cfg->buf_list; + attr.max = cfg->npages; + travel_dca_pages(ctx, &attr, get_active_kbuf_proc);
-struct dca_page_free_buf_attr { - u32 buf_id; - u32 max_pages; - u32 free_pages; - u32 clean_mems; -}; + return attr.total == attr.max ? 0 : -ENOMEM; +}
-static int free_buffer_pages_proc(struct dca_mem *mem, int index, void *param) +#define DCA_EXPAND_MEM_TRY_TIMES 3 +int hns_roce_dca_attach(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, + struct hns_dca_attach_attr *attr) { - struct dca_page_free_buf_attr *attr = param; - struct hns_dca_page_state *state; - bool changed = false; - bool stop = false; - int i, free_pages; + struct hns_roce_dca_cfg *cfg = &hr_qp->dca_cfg; + struct hns_dca_attach_resp resp = {}; + bool is_new_buf = true; + int try_times = 0; + int ret;
- free_pages = 0; - for (i = 0; !stop && i < mem->page_count; i++) { - state = &mem->states[i]; - /* Change matched pages state */ - if (dca_page_is_attached(state, attr->buf_id)) { - set_dca_page_to_free(state); - changed = true; - attr->free_pages++; - if (attr->free_pages == attr->max_pages) - stop = true; + do { + resp.alloc_flags = 0; + ret = attach_dca_mem(hr_dev, hr_qp, attr, &resp); + if (ret) + break; + + if (resp.alloc_pages >= cfg->npages) { + is_new_buf = !!(resp.alloc_flags & + HNS_DCA_ATTACH_FLAGS_NEW_BUFFER); + break; }
- if (dca_page_is_free(state)) - free_pages++; - } + ret = add_dca_mem(hr_dev, hr_qp->buff_size); + if (ret) + break; + } while (try_times++ < DCA_EXPAND_MEM_TRY_TIMES);
- for (; changed && i < mem->page_count; i++) - if (dca_page_is_free(state)) - free_pages++; + if (ret || resp.alloc_pages < cfg->npages) { + ibdev_err(&hr_dev->ib_dev, + "failed to attach buf %u != %u, try %d, ret = %d.\n", + cfg->npages, resp.alloc_pages, try_times, ret); + return -ENOMEM; + }
- if (changed && free_pages == mem->page_count) - attr->clean_mems++; + /* DCA config not changed */ + if (!is_new_buf && cfg->buf_list[0]) + return 0;
- return stop ? DCA_MEM_STOP_ITERATE : DCA_MEM_NEXT_ITERATE; + return setup_dca_buf_list(hr_qp_to_dca_ctx(hr_dev, hr_qp), cfg); }
-static void free_buf_from_dca_mem(struct hns_roce_dca_ctx *ctx, - struct hns_roce_dca_cfg *cfg) +static void remove_unused_dca_mem(struct hns_roce_dev *hr_dev) { - struct dca_page_free_buf_attr attr = {}; + struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(hr_dev, NULL); + struct hns_dca_shrink_resp resp = {}; + struct dca_mem *mem; unsigned long flags; - u32 buf_id; - - spin_lock(&cfg->lock); - buf_id = cfg->buf_id; - cfg->buf_id = HNS_DCA_INVALID_BUF_ID; - spin_unlock(&cfg->lock); - if (buf_id == HNS_DCA_INVALID_BUF_ID) - return; - - attr.buf_id = buf_id; - attr.max_pages = cfg->npages; - travel_dca_pages(ctx, &attr, free_buffer_pages_proc);
- /* Update free size */ - spin_lock_irqsave(&ctx->pool_lock, flags); - ctx->free_mems += attr.clean_mems; - ctx->free_size += attr.free_pages << HNS_HW_PAGE_SHIFT; - spin_unlock_irqrestore(&ctx->pool_lock, flags); + while (shrink_dca_mem_enabled(ctx)) { + resp.free_mems = 0; + shrink_dca_mem(hr_dev, NULL, ctx->min_size, &resp); + if (resp.free_mems < 1) + break; + spin_lock_irqsave(&ctx->pool_lock, flags); + mem = key_to_dca_mem(&ctx->pool, resp.free_key); + spin_unlock_irqrestore(&ctx->pool_lock, flags); + if (!mem) + break; + unregister_dca_mem(hr_dev, NULL, mem); + free_dca_mem(mem); + /* No more free memory */ + if (resp.free_mems <= 1) + break; + } }
static void kick_dca_mem(struct hns_roce_dev *hr_dev, struct hns_roce_dca_cfg *cfg, struct hns_roce_ucontext *uctx) { - struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(uctx); + struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(hr_dev, uctx);
/* Stop ageing worker and free DCA buffer from pool */ cancel_delayed_work_sync(&cfg->dwork); free_buf_from_dca_mem(ctx, cfg); + + /* Shrink kenrel DCA mem */ + if (!uctx) + remove_unused_dca_mem(hr_dev); }
static void dca_mem_ageing_work(struct work_struct *work) @@ -892,41 +1255,36 @@ static void dca_mem_ageing_work(struct work_struct *work) struct hns_roce_qp *hr_qp = container_of(work, struct hns_roce_qp, dca_cfg.dwork.work); struct hns_roce_dev *hr_dev = to_hr_dev(hr_qp->ibqp.device); - struct hns_roce_dca_ctx *ctx = hr_qp_to_dca_ctx(hr_qp); + struct hns_roce_dca_ctx *ctx = hr_qp_to_dca_ctx(hr_dev, hr_qp); bool hw_is_inactive;
hw_is_inactive = hr_dev->hw->chk_dca_buf_inactive && hr_dev->hw->chk_dca_buf_inactive(hr_dev, hr_qp); if (hw_is_inactive) free_buf_from_dca_mem(ctx, &hr_qp->dca_cfg); + + /* Shrink kenrel DCA mem */ + if (!hr_qp->ibqp.uobject) + remove_unused_dca_mem(hr_dev); }
-void hns_roce_dca_kick(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) +void hns_roce_dca_detach(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, + struct hns_dca_detach_attr *attr) { - struct hns_roce_ucontext *uctx; - - if (hr_qp->ibqp.uobject && hr_qp->ibqp.pd->uobject) { - uctx = to_hr_ucontext(hr_qp->ibqp.pd->uobject->context); - kick_dca_mem(hr_dev, &hr_qp->dca_cfg, uctx); - } + detach_dca_mem(hr_dev, hr_qp, attr); }
-static void detach_dca_mem(struct hns_roce_dev *hr_dev, - struct hns_roce_qp *hr_qp, - struct hns_dca_detach_attr *attr) +void hns_roce_dca_kick(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, + struct ib_udata *udata) { - struct hns_roce_dca_cfg *cfg = &hr_qp->dca_cfg; + struct hns_roce_ucontext *uctx = rdma_udata_to_drv_context(udata, + struct hns_roce_ucontext, ibucontext);
- /* Start an ageing worker to free buffer */ - cancel_delayed_work(&cfg->dwork); - spin_lock(&cfg->lock); - cfg->sq_idx = attr->sq_idx; - queue_delayed_work(hr_dev->irq_workq, &cfg->dwork, - msecs_to_jiffies(DCA_MEM_AGEING_MSES)); - spin_unlock(&cfg->lock); + kick_dca_mem(hr_dev, &hr_qp->dca_cfg, uctx); }
-void hns_roce_enable_dca(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) +int hns_roce_enable_dca(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, + struct ib_udata *udata) { struct hns_roce_dca_cfg *cfg = &hr_qp->dca_cfg;
@@ -934,6 +1292,16 @@ void hns_roce_enable_dca(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) INIT_DELAYED_WORK(&cfg->dwork, dca_mem_ageing_work); cfg->buf_id = HNS_DCA_INVALID_BUF_ID; cfg->npages = hr_qp->buff_size >> HNS_HW_PAGE_SHIFT; + + /* DCA page list for kernel QP */ + if (!udata && cfg->npages) { + cfg->buf_list = kcalloc(cfg->npages, sizeof(void *), + GFP_KERNEL); + if (!cfg->buf_list) + return -ENOMEM; + } + + return 0; }
void hns_roce_disable_dca(struct hns_roce_dev *hr_dev, @@ -944,7 +1312,12 @@ void hns_roce_disable_dca(struct hns_roce_dev *hr_dev, struct hns_roce_dca_cfg *cfg = &hr_qp->dca_cfg;
kick_dca_mem(hr_dev, cfg, uctx); - cfg->buf_id = HNS_DCA_INVALID_BUF_ID; + + /* Free kenrel DCA buffer list */ + if (!udata && cfg->buf_list) { + kfree(cfg->buf_list); + cfg->buf_list = NULL; + } }
static inline struct hns_roce_ucontext * @@ -976,7 +1349,7 @@ static int UVERBS_HANDLER(HNS_IB_METHOD_DCA_MEM_REG)( if (ret) return ret;
- mem = alloc_dca_mem(to_hr_dca_ctx(uctx)); + mem = alloc_dca_mem(to_hr_dca_ctx(hr_dev, uctx)); if (!mem) return -ENOMEM;
@@ -1005,7 +1378,7 @@ static int dca_cleanup(struct ib_uobject *uobject, enum rdma_remove_reason why, return 0;
mem = uobject->object; - unregister_dca_mem(uctx, mem); + unregister_dca_mem(to_hr_dev(uctx->ibucontext.device), uctx, mem); free_dca_mem(mem);
return 0; @@ -1163,7 +1536,8 @@ static int UVERBS_HANDLER(HNS_IB_METHOD_DCA_MEM_QUERY)( struct uverbs_attr_bundle *attrs) { struct hns_roce_qp *hr_qp = uverbs_attr_to_hr_qp(attrs); - struct hns_roce_dca_ctx *ctx = hr_qp_to_dca_ctx(hr_qp); + struct hns_roce_dev *hr_dev = to_hr_dev(hr_qp->ibqp.device); + struct hns_roce_dca_ctx *ctx = hr_qp_to_dca_ctx(hr_dev, hr_qp); struct dca_page_query_active_attr active_attr = {}; u32 page_idx, page_ofs; int ret; @@ -1234,3 +1608,7 @@ const struct uapi_definition hns_roce_dca_uapi_defs[] = { UAPI_DEF_IS_OBJ_SUPPORTED(dca_is_supported)), {} }; + +module_param(dca_unit_size, uint, 0444); +module_param(dca_max_size, ulong, 0444); +module_param(dca_min_size, ulong, 0444); diff --git a/drivers/infiniband/hw/hns/hns_roce_dca.h b/drivers/infiniband/hw/hns/hns_roce_dca.h index fdc3aaa4b10b..f378102778e3 100644 --- a/drivers/infiniband/hw/hns/hns_roce_dca.h +++ b/drivers/infiniband/hw/hns/hns_roce_dca.h @@ -18,11 +18,6 @@ struct hns_dca_page_state {
extern const struct uapi_definition hns_roce_dca_uapi_defs[];
-struct hns_dca_shrink_resp { - u64 free_key; /* free buffer's key which registered by the user */ - u32 free_mems; /* free buffer count which no any QP be using */ -}; - #define HNS_DCA_INVALID_BUF_ID 0UL
/* @@ -46,6 +41,7 @@ struct hns_dca_attach_attr { };
struct hns_dca_attach_resp { +#define HNS_DCA_ATTACH_FLAGS_NEW_BUFFER BIT(0) u32 alloc_flags; u32 alloc_pages; }; @@ -54,14 +50,27 @@ struct hns_dca_detach_attr { u32 sq_idx; };
+typedef int (*hns_dca_enum_callback)(struct hns_dca_page_state *, u32, void *); + +void hns_roce_init_dca(struct hns_roce_dev *hr_dev); +void hns_roce_cleanup_dca(struct hns_roce_dev *hr_dev); + void hns_roce_register_udca(struct hns_roce_dev *hr_dev, struct hns_roce_ucontext *uctx); void hns_roce_unregister_udca(struct hns_roce_dev *hr_dev, struct hns_roce_ucontext *uctx);
-void hns_roce_enable_dca(struct hns_roce_dev *hr_dev, - struct hns_roce_qp *hr_qp); +int hns_roce_enable_dca(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, + struct ib_udata *udata); void hns_roce_disable_dca(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct ib_udata *udata); -void hns_roce_dca_kick(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp); + +int hns_roce_dca_attach(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, + struct hns_dca_attach_attr *attr); +void hns_roce_dca_detach(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, + struct hns_dca_detach_attr *attr); + +void hns_roce_dca_kick(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, + struct ib_udata *udata); + #endif diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index ac9dcdf59887..b1c1f640a7a0 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -130,6 +130,15 @@ enum hns_roce_event { HNS_ROCE_EVENT_TYPE_INVALID_XRCETH = 0x17, };
+/* Private QP creation flags to be passed in ib_qp_init_attr.create_flags. + * + * These flags are intended for internal use by the hns driver, and they + * rely on the range reserved for that use in the ib_qp_create_flags enum. + */ +enum hns_roce_qp_create_flags { + HNS_ROCE_QP_CREATE_DCA_EN = IB_QP_CREATE_RESERVED_START, +}; + enum { HNS_ROCE_CAP_FLAG_REREG_MR = BIT(0), HNS_ROCE_CAP_FLAG_ROCE_V1_V2 = BIT(1), @@ -210,6 +219,9 @@ struct hns_roce_dca_ctx { unsigned int free_mems; /* free mem num in pool */ size_t free_size; /* free mem size in pool */ size_t total_size; /* total size in pool */ + size_t max_size; /* max size the pool can expand to */ + size_t min_size; /* shrink if @free_size > @min_size */ + unsigned int unit_size; /* unit size per DCA mem */ };
struct hns_roce_ucontext { @@ -314,20 +326,15 @@ struct hns_roce_mtr { struct hns_roce_hem_cfg hem_cfg; /* config for hardware addressing */ };
+/* DCA config */ struct hns_roce_dca_cfg { - spinlock_t lock; - u32 buf_id; - u16 attach_count; - u32 npages; - u32 sq_idx; - struct delayed_work dwork; -}; - -/* DCA attr for setting WQE buffer */ -struct hns_roce_dca_attr { - u32 sq_offset; - u32 sge_offset; - u32 rq_offset; + spinlock_t lock; + u32 buf_id; + u16 attach_count; + void **buf_list; + u32 npages; + u32 sq_idx; + struct delayed_work dwork; };
struct hns_roce_mw { @@ -367,6 +374,7 @@ struct hns_roce_wq { u32 max_gs; u32 rsv_sge; u32 offset; + int wqe_offset; u32 wqe_shift; /* WQE size */ u32 head; u32 tail; @@ -378,6 +386,7 @@ struct hns_roce_sge { unsigned int sge_cnt; /* SGE num */ u32 offset; u32 sge_shift; /* SGE size */ + int wqe_offset; };
struct hns_roce_buf_list { @@ -912,8 +921,7 @@ struct hns_roce_hw { struct hns_roce_hem_table *table, int obj, u32 step_idx); int (*set_dca_buf)(struct hns_roce_dev *hr_dev, - struct hns_roce_qp *hr_qp, - struct hns_roce_dca_attr *attr); + struct hns_roce_qp *hr_qp); bool (*chk_dca_buf_inactive)(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp); int (*modify_qp)(struct ib_qp *ibqp, const struct ib_qp_attr *attr, @@ -941,6 +949,11 @@ struct hns_roce_dev { struct ib_device ib_dev; struct pci_dev *pci_dev; struct device *dev; + void *dbgfs; /* debugfs for this dev */ + + struct list_head uctx_list; /* list of all uctx on this dev */ + spinlock_t uctx_list_lock; /* protect @uctx_list */ + struct hns_roce_uar priv_uar; const char *irq_names[HNS_ROCE_MAX_IRQ_NUM]; spinlock_t sm_lock; @@ -963,6 +976,8 @@ struct hns_roce_dev { struct hns_roce_caps caps; struct xarray qp_table_xa;
+ struct hns_roce_dca_ctx dca_ctx; + unsigned char dev_addr[HNS_ROCE_MAX_PORTS][ETH_ALEN]; u64 sys_image_guid; u32 vendor_id; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index f14a8e41aafa..c4b61266a242 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -376,11 +376,64 @@ static inline bool check_qp_dca_enable(struct hns_roce_qp *hr_qp) return !!(hr_qp->en_flags & HNS_ROCE_QP_CAP_DCA); }
+static int dca_attach_qp_buf(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp) +{ + struct hns_dca_attach_attr attr = {}; + unsigned long flags_sq, flags_rq; + u32 idx; + + spin_lock_irqsave(&hr_qp->sq.lock, flags_sq); + spin_lock_irqsave(&hr_qp->rq.lock, flags_rq); + + if (hr_qp->sq.wqe_cnt > 0) { + idx = hr_qp->sq.head & (hr_qp->sq.wqe_cnt - 1); + attr.sq_offset = idx << hr_qp->sq.wqe_shift; + } + + if (hr_qp->sge.sge_cnt > 0) { + idx = hr_qp->next_sge & (hr_qp->sge.sge_cnt - 1); + attr.sge_offset = idx << hr_qp->sge.sge_shift; + } + + if (hr_qp->rq.wqe_cnt > 0) { + idx = hr_qp->rq.head & (hr_qp->rq.wqe_cnt - 1); + attr.rq_offset = idx << hr_qp->rq.wqe_shift; + } + + spin_unlock_irqrestore(&hr_qp->rq.lock, flags_rq); + spin_unlock_irqrestore(&hr_qp->sq.lock, flags_sq); + + return hns_roce_dca_attach(hr_dev, hr_qp, &attr); +} + +static void dca_detach_qp_buf(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp) +{ + struct hns_dca_detach_attr attr = {}; + unsigned long flags_sq, flags_rq; + bool is_empty; + + spin_lock_irqsave(&hr_qp->sq.lock, flags_sq); + spin_lock_irqsave(&hr_qp->rq.lock, flags_rq); + is_empty = hr_qp->sq.head == hr_qp->sq.tail && + hr_qp->rq.head == hr_qp->rq.tail; + if (is_empty && hr_qp->sq.wqe_cnt > 0) + attr.sq_idx = hr_qp->sq.head & (hr_qp->sq.wqe_cnt - 1); + + spin_unlock_irqrestore(&hr_qp->rq.lock, flags_rq); + spin_unlock_irqrestore(&hr_qp->sq.lock, flags_sq); + + if (is_empty) + hns_roce_dca_detach(hr_dev, hr_qp, &attr); +} + static int check_send_valid(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { struct ib_device *ibdev = &hr_dev->ib_dev; struct ib_qp *ibqp = &hr_qp->ibqp; + int ret;
if (unlikely(ibqp->qp_type != IB_QPT_RC && ibqp->qp_type != IB_QPT_GSI && @@ -400,6 +453,16 @@ static int check_send_valid(struct hns_roce_dev *hr_dev, return -EIO; }
+ if (check_qp_dca_enable(hr_qp)) { + ret = dca_attach_qp_buf(hr_dev, hr_qp); + if (unlikely(ret)) { + ibdev_err(&hr_dev->ib_dev, + "failed to attach DCA for QP-%ld send!\n", + hr_qp->qpn); + return ret; + } + } + return 0; }
@@ -586,6 +649,14 @@ static int set_rc_opcode(struct hns_roce_dev *hr_dev, return ret; }
+static inline void fill_dca_fields(struct hns_roce_qp *hr_qp, + struct hns_roce_v2_rc_send_wqe *wqe) +{ + hr_reg_write(wqe, RC_SEND_WQE_SQPN_L, hr_qp->qpn); + hr_reg_write(wqe, RC_SEND_WQE_SQPN_H, + hr_qp->qpn >> V2_RC_SEND_WQE_BYTE_4_SQPN_L_W); +} + static inline int set_rc_wqe(struct hns_roce_qp *qp, const struct ib_send_wr *wr, void *wqe, unsigned int *sge_idx, @@ -622,6 +693,9 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp, ret = set_rwqe_data_seg(&qp->ibqp, wr, rc_sq_wqe, &curr_idx, valid_num_sge);
+ if (qp->en_flags & HNS_ROCE_QP_CAP_DCA) + fill_dca_fields(qp, rc_sq_wqe); + /* * The pipeline can sequentially post all valid WQEs into WQ buffer, * including new WQEs waiting for the doorbell to update the PI again. @@ -706,12 +780,26 @@ static void write_dwqe(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp, hns_roce_write512(hr_dev, wqe, qp->sq.db_reg); }
+static int check_sq_enabled(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp, + const struct ib_send_wr *wr, int nreq) +{ + if (hns_roce_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) + return -ENOMEM; + + if (unlikely(wr->num_sge > qp->sq.max_gs)) { + ibdev_err(&hr_dev->ib_dev, "num_sge=%d > qp->sq.max_gs=%u\n", + wr->num_sge, qp->sq.max_gs); + return -EINVAL; + } + + return 0; +} + static int hns_roce_v2_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, const struct ib_send_wr **bad_wr) { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); - struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_qp *qp = to_hr_qp(ibqp); unsigned long flags = 0; unsigned int owner_bit; @@ -721,34 +809,25 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, u32 nreq; int ret;
- spin_lock_irqsave(&qp->sq.lock, flags);
ret = check_send_valid(hr_dev, qp); if (unlikely(ret)) { *bad_wr = wr; - nreq = 0; - goto out; + return ret; }
+ spin_lock_irqsave(&qp->sq.lock, flags); sge_idx = qp->next_sge;
for (nreq = 0; wr; ++nreq, wr = wr->next) { - if (hns_roce_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) { - ret = -ENOMEM; + ret = check_sq_enabled(hr_dev, qp, wr, nreq); + if (unlikely(ret)) { *bad_wr = wr; goto out; }
wqe_idx = (qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1);
- if (unlikely(wr->num_sge > qp->sq.max_gs)) { - ibdev_err(ibdev, "num_sge = %d > qp->sq.max_gs = %u.\n", - wr->num_sge, qp->sq.max_gs); - ret = -EINVAL; - *bad_wr = wr; - goto out; - } - wqe = hns_roce_get_send_wqe(qp, wqe_idx); qp->sq.wrid[wqe_idx] = wr->wr_id; owner_bit = @@ -787,6 +866,7 @@ static int check_recv_valid(struct hns_roce_dev *hr_dev, { struct ib_device *ibdev = &hr_dev->ib_dev; struct ib_qp *ibqp = &hr_qp->ibqp; + int ret;
if (unlikely(ibqp->qp_type != IB_QPT_RC && ibqp->qp_type != IB_QPT_GSI && @@ -802,6 +882,16 @@ static int check_recv_valid(struct hns_roce_dev *hr_dev, if (hr_qp->state == IB_QPS_RESET) return -EINVAL;
+ if (check_qp_dca_enable(hr_qp)) { + ret = dca_attach_qp_buf(hr_dev, hr_qp); + if (unlikely(ret)) { + ibdev_err(ibdev, + "failed to attach DCA for QP-%lu recv!\n", + hr_qp->qpn); + return ret; + } + } + return 0; }
@@ -852,15 +942,15 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, unsigned long flags; int ret;
- spin_lock_irqsave(&hr_qp->rq.lock, flags);
ret = check_recv_valid(hr_dev, hr_qp); if (unlikely(ret)) { *bad_wr = wr; - nreq = 0; - goto out; + return ret; }
+ spin_lock_irqsave(&hr_qp->rq.lock, flags); + max_sge = hr_qp->rq.max_gs - hr_qp->rq.rsv_sge; for (nreq = 0; wr; ++nreq, wr = wr->next) { if (unlikely(hns_roce_wq_overflow(&hr_qp->rq, nreq, @@ -2088,7 +2178,8 @@ static void set_default_caps(struct hns_roce_dev *hr_dev)
if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) { caps->flags |= HNS_ROCE_CAP_FLAG_STASH | - HNS_ROCE_CAP_FLAG_DIRECT_WQE; + HNS_ROCE_CAP_FLAG_DIRECT_WQE | + HNS_ROCE_CAP_FLAG_DCA_MODE; caps->max_sq_inline = HNS_ROCE_V3_MAX_SQ_INLINE; } else { caps->max_sq_inline = HNS_ROCE_V2_MAX_SQ_INLINE; @@ -4162,6 +4253,7 @@ static int hns_roce_v2_poll_cq(struct ib_cq *ibcq, int num_entries, struct hns_roce_qp *cur_qp = NULL; unsigned long flags; int npolled; + int ret;
spin_lock_irqsave(&hr_cq->lock, flags);
@@ -4178,7 +4270,10 @@ static int hns_roce_v2_poll_cq(struct ib_cq *ibcq, int num_entries, }
for (npolled = 0; npolled < num_entries; ++npolled) { - if (hns_roce_v2_poll_one(hr_cq, &cur_qp, wc + npolled)) + ret = hns_roce_v2_poll_one(hr_cq, &cur_qp, wc + npolled); + if (cur_qp && check_qp_dca_enable(cur_qp)) + dca_detach_qp_buf(hr_dev, cur_qp); + if (ret) break; }
@@ -4548,15 +4643,14 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp, static int config_qp_rq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct hns_roce_v2_qp_context *context, - struct hns_roce_v2_qp_context *qpc_mask, - struct hns_roce_dca_attr *dca_attr) + struct hns_roce_v2_qp_context *qpc_mask) { u64 mtts[MTT_MIN_COUNT] = { 0 }; u64 wqe_sge_ba; int count;
/* Search qp buf's mtts */ - count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, dca_attr->rq_offset, + count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, hr_qp->rq.wqe_offset, mtts, ARRAY_SIZE(mtts), &wqe_sge_ba); if (hr_qp->rq.wqe_cnt && count < 1) { ibdev_err(&hr_dev->ib_dev, @@ -4623,8 +4717,7 @@ static int config_qp_rq_buf(struct hns_roce_dev *hr_dev, static int config_qp_sq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct hns_roce_v2_qp_context *context, - struct hns_roce_v2_qp_context *qpc_mask, - struct hns_roce_dca_attr *dca_attr) + struct hns_roce_v2_qp_context *qpc_mask) { struct ib_device *ibdev = &hr_dev->ib_dev; u64 sge_cur_blk = 0; @@ -4632,7 +4725,7 @@ static int config_qp_sq_buf(struct hns_roce_dev *hr_dev, int count;
/* search qp buf's mtts */ - count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, dca_attr->sq_offset, + count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, hr_qp->sq.wqe_offset, &sq_cur_blk, 1, NULL); if (count < 1) { ibdev_err(ibdev, "failed to find QP(0x%lx) SQ buf.\n", @@ -4641,8 +4734,8 @@ static int config_qp_sq_buf(struct hns_roce_dev *hr_dev, } if (hr_qp->sge.sge_cnt > 0) { count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, - dca_attr->sge_offset, &sge_cur_blk, 1, - NULL); + hr_qp->sge.wqe_offset, &sge_cur_blk, + 1, NULL); if (count < 1) { ibdev_err(ibdev, "failed to find QP(0x%lx) SGE buf.\n", hr_qp->qpn); @@ -4700,7 +4793,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); struct ib_device *ibdev = &hr_dev->ib_dev; - struct hns_roce_dca_attr dca_attr = {}; dma_addr_t trrl_ba; dma_addr_t irrl_ba; enum ib_mtu ib_mtu; @@ -4712,8 +4804,8 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, int port; int ret;
- dca_attr.rq_offset = hr_qp->rq.offset; - ret = config_qp_rq_buf(hr_dev, hr_qp, context, qpc_mask, &dca_attr); + hr_qp->rq.wqe_offset = hr_qp->rq.offset; + ret = config_qp_rq_buf(hr_dev, hr_qp, context, qpc_mask); if (ret) { ibdev_err(ibdev, "failed to config rq buf, ret = %d.\n", ret); return ret; @@ -4859,7 +4951,6 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); struct ib_device *ibdev = &hr_dev->ib_dev; - struct hns_roce_dca_attr dca_attr = {}; int ret;
/* Not support alternate path and path migration */ @@ -4868,9 +4959,9 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, return -EINVAL; }
- dca_attr.sq_offset = hr_qp->sq.offset; - dca_attr.sge_offset = hr_qp->sge.offset; - ret = config_qp_sq_buf(hr_dev, hr_qp, context, qpc_mask, &dca_attr); + hr_qp->sq.wqe_offset = hr_qp->sq.offset; + hr_qp->sge.wqe_offset = hr_qp->sge.offset; + ret = config_qp_sq_buf(hr_dev, hr_qp, context, qpc_mask); if (ret) { ibdev_err(ibdev, "failed to config sq buf, ret = %d.\n", ret); return ret; @@ -5530,83 +5621,38 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
if (check_qp_dca_enable(hr_qp) && (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR)) - hns_roce_dca_kick(hr_dev, hr_qp); + hns_roce_dca_kick(hr_dev, hr_qp, udata);
out: return ret; }
-static int init_dca_buf_attr(struct hns_roce_dev *hr_dev, - struct hns_roce_qp *hr_qp, - struct hns_roce_dca_attr *init_attr, - struct hns_roce_dca_attr *dca_attr) -{ - struct ib_device *ibdev = &hr_dev->ib_dev; - - if (hr_qp->sq.wqe_cnt > 0) { - dca_attr->sq_offset = hr_qp->sq.offset + init_attr->sq_offset; - if (dca_attr->sq_offset >= hr_qp->sge.offset) { - ibdev_err(ibdev, "failed to check SQ offset = %u\n", - init_attr->sq_offset); - return -EINVAL; - } - } - - if (hr_qp->sge.sge_cnt > 0) { - dca_attr->sge_offset = hr_qp->sge.offset + init_attr->sge_offset; - if (dca_attr->sge_offset >= hr_qp->rq.offset) { - ibdev_err(ibdev, "failed to check exSGE offset = %u\n", - init_attr->sge_offset); - return -EINVAL; - } - } - - if (hr_qp->rq.wqe_cnt > 0) { - dca_attr->rq_offset = hr_qp->rq.offset + init_attr->rq_offset; - if (dca_attr->rq_offset >= hr_qp->buff_size) { - ibdev_err(ibdev, "failed to check RQ offset = %u\n", - init_attr->rq_offset); - return -EINVAL; - } - } - - return 0; -} - static int hns_roce_v2_set_dca_buf(struct hns_roce_dev *hr_dev, - struct hns_roce_qp *hr_qp, - struct hns_roce_dca_attr *init_attr) + struct hns_roce_qp *hr_qp) { struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_v2_qp_context *qpc, *msk; - struct hns_roce_dca_attr dca_attr = {}; struct hns_roce_mbox_msg mbox_msg = {}; dma_addr_t dma_handle; int qpc_sz; int ret;
- ret = init_dca_buf_attr(hr_dev, hr_qp, init_attr, &dca_attr); - if (ret) { - ibdev_err(ibdev, "failed to init DCA attr, ret = %d.\n", ret); - return ret; - } - qpc_sz = hr_dev->caps.qpc_sz; WARN_ON(2 * qpc_sz > HNS_ROCE_MAILBOX_SIZE); - qpc = dma_pool_alloc(hr_dev->cmd.pool, GFP_NOWAIT, &dma_handle); + qpc = dma_pool_alloc(hr_dev->cmd.pool, GFP_ATOMIC, &dma_handle); if (!qpc) return -ENOMEM;
msk = (struct hns_roce_v2_qp_context *)((void *)qpc + qpc_sz); memset(msk, 0xff, qpc_sz);
- ret = config_qp_rq_buf(hr_dev, hr_qp, qpc, msk, &dca_attr); + ret = config_qp_rq_buf(hr_dev, hr_qp, qpc, msk); if (ret) { ibdev_err(ibdev, "failed to config rq qpc, ret = %d.\n", ret); goto done; }
- ret = config_qp_sq_buf(hr_dev, hr_qp, qpc, msk, &dca_attr); + ret = config_qp_sq_buf(hr_dev, hr_qp, qpc, msk); if (ret) { ibdev_err(ibdev, "failed to config sq qpc, ret = %d.\n", ret); goto done; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index e01d24f95933..28381993278f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -920,6 +920,8 @@ struct hns_roce_v2_rc_send_wqe { #define RC_SEND_WQE_OPCODE RC_SEND_WQE_FIELD_LOC(4, 0) #define RC_SEND_WQE_DB_SL_L RC_SEND_WQE_FIELD_LOC(6, 5) #define RC_SEND_WQE_DB_SL_H RC_SEND_WQE_FIELD_LOC(14, 13) +#define RC_SEND_WQE_SQPN_L RC_SEND_WQE_FIELD_LOC(6, 5) +#define RC_SEND_WQE_SQPN_H RC_SEND_WQE_FIELD_LOC(30, 13) #define RC_SEND_WQE_OWNER RC_SEND_WQE_FIELD_LOC(7, 7) #define RC_SEND_WQE_CQE RC_SEND_WQE_FIELD_LOC(8, 8) #define RC_SEND_WQE_FENCE RC_SEND_WQE_FIELD_LOC(9, 9) @@ -933,6 +935,8 @@ struct hns_roce_v2_rc_send_wqe { #define RC_SEND_WQE_MSG_START_SGE_IDX RC_SEND_WQE_FIELD_LOC(151, 128) #define RC_SEND_WQE_INL_TYPE RC_SEND_WQE_FIELD_LOC(159, 159)
+#define V2_RC_SEND_WQE_BYTE_4_SQPN_L_W 2 + struct hns_roce_wqe_frmr_seg { __le32 pbl_size; __le32 byte_40; diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index d9d787fbc70b..d14eaecdbf15 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -945,6 +945,14 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) return ret; }
+static void hns_roce_teardown_hca(struct hns_roce_dev *hr_dev) +{ + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_DCA_MODE) + hns_roce_cleanup_dca(hr_dev); + + hns_roce_cleanup_bitmap(hr_dev); +} + /** * hns_roce_setup_hca - setup host channel adapter * @hr_dev: pointer to hns roce device @@ -957,6 +965,14 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev)
spin_lock_init(&hr_dev->sm_lock);
+ INIT_LIST_HEAD(&hr_dev->qp_list); + spin_lock_init(&hr_dev->qp_list_lock); + INIT_LIST_HEAD(&hr_dev->dip_list); + spin_lock_init(&hr_dev->dip_list_lock); + + INIT_LIST_HEAD(&hr_dev->uctx_list); + spin_lock_init(&hr_dev->uctx_list_lock); + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB || hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) { INIT_LIST_HEAD(&hr_dev->pgdir_list); @@ -990,6 +1006,9 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev) hns_roce_init_srq_table(hr_dev); }
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_DCA_MODE) + hns_roce_init_dca(hr_dev); + return 0;
err_uar_table_free: @@ -1014,7 +1033,7 @@ static void check_and_get_armed_cq(struct list_head *cq_list, struct ib_cq *cq)
void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev) { - struct hns_roce_qp *hr_qp; + struct hns_roce_qp *hr_qp, *hr_qp_next; struct hns_roce_cq *hr_cq; struct list_head cq_list; unsigned long flags_qp; @@ -1023,7 +1042,7 @@ void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev) INIT_LIST_HEAD(&cq_list);
spin_lock_irqsave(&hr_dev->qp_list_lock, flags); - list_for_each_entry(hr_qp, &hr_dev->qp_list, node) { + list_for_each_entry_safe(hr_qp, hr_qp_next, &hr_dev->qp_list, node) { spin_lock_irqsave(&hr_qp->sq.lock, flags_qp); if (hr_qp->sq.tail != hr_qp->sq.head) check_and_get_armed_cq(&cq_list, hr_qp->ibqp.send_cq); @@ -1102,11 +1121,6 @@ int hns_roce_init(struct hns_roce_dev *hr_dev) } }
- INIT_LIST_HEAD(&hr_dev->qp_list); - spin_lock_init(&hr_dev->qp_list_lock); - INIT_LIST_HEAD(&hr_dev->dip_list); - spin_lock_init(&hr_dev->dip_list_lock); - ret = hns_roce_register_device(hr_dev); if (ret) goto error_failed_register_device; @@ -1118,7 +1132,7 @@ int hns_roce_init(struct hns_roce_dev *hr_dev) hr_dev->hw->hw_exit(hr_dev);
error_failed_engine_init: - hns_roce_cleanup_bitmap(hr_dev); + hns_roce_teardown_hca(hr_dev);
error_failed_setup_hca: hns_roce_cleanup_hem(hr_dev); @@ -1144,7 +1158,7 @@ void hns_roce_exit(struct hns_roce_dev *hr_dev)
if (hr_dev->hw->hw_exit) hr_dev->hw->hw_exit(hr_dev); - hns_roce_cleanup_bitmap(hr_dev); + hns_roce_teardown_hca(hr_dev); hns_roce_cleanup_hem(hr_dev);
if (hr_dev->cmd_mod) diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 30c2f5e8e84a..111a397544d7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -652,16 +652,12 @@ static inline int mtr_check_direct_pages(dma_addr_t *pages, int page_count, static void mtr_free_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr) { /* release user buffers */ - if (mtr->umem) { - ib_umem_release(mtr->umem); - mtr->umem = NULL; - } + ib_umem_release(mtr->umem); + mtr->umem = NULL;
/* release kernel buffers */ - if (mtr->kmem) { - hns_roce_buf_free(hr_dev, mtr->kmem); - mtr->kmem = NULL; - } + hns_roce_buf_free(hr_dev, mtr->kmem); + mtr->kmem = NULL; }
static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 36868618cd51..302f2ea75749 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -639,7 +639,9 @@ static int set_user_sq_size(struct hns_roce_dev *hr_dev, return 0; }
-static bool check_dca_is_enable(struct hns_roce_dev *hr_dev, bool is_user, +static bool check_dca_is_enable(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp, + struct ib_qp_init_attr *init_attr, bool is_user, unsigned long addr) { if (!(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_DCA_MODE)) @@ -649,6 +651,12 @@ static bool check_dca_is_enable(struct hns_roce_dev *hr_dev, bool is_user, if (is_user) return !addr;
+ /* Only RC and XRC support DCA for kernel QP */ + if (hr_dev->dca_ctx.max_size > 0 && + (init_attr->qp_type == IB_QPT_RC || + init_attr->qp_type == IB_QPT_XRC_INI)) + return !!(init_attr->create_flags & HNS_ROCE_QP_CREATE_DCA_EN); + return false; }
@@ -772,8 +780,13 @@ static int alloc_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, int ret;
if (dca_en) { - /* DCA must be enabled after the buffer size is configured. */ - hns_roce_enable_dca(hr_dev, hr_qp); + /* DCA must be enabled after the buffer attr is configured. */ + ret = hns_roce_enable_dca(hr_dev, hr_qp, udata); + if (ret) { + ibdev_err(ibdev, "failed to enable DCA, ret = %d.\n", + ret); + return ret; + }
hr_qp->en_flags |= HNS_ROCE_QP_CAP_DCA; } else { @@ -815,7 +828,7 @@ static int alloc_qp_wqe(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, bool dca_en; int ret;
- dca_en = check_dca_is_enable(hr_dev, !!udata, addr); + dca_en = check_dca_is_enable(hr_dev, hr_qp, init_attr, !!udata, addr); ret = set_wqe_buf_attr(hr_dev, hr_qp, dca_en, &buf_attr); if (ret) { ibdev_err(ibdev, "failed to split WQE buf, ret = %d.\n", ret); @@ -1509,9 +1522,18 @@ void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq, } }
+static inline void *dca_buf_offset(struct hns_roce_dca_cfg *dca_cfg, u32 offset) +{ + return (char *)(dca_cfg->buf_list[offset >> HNS_HW_PAGE_SHIFT]) + + (offset & ((1 << HNS_HW_PAGE_SHIFT) - 1)); +} + static inline void *get_wqe(struct hns_roce_qp *hr_qp, u32 offset) { - return hns_roce_buf_offset(hr_qp->mtr.kmem, offset); + if (hr_qp->en_flags & HNS_ROCE_QP_CAP_DCA) + return dca_buf_offset(&hr_qp->dca_cfg, offset); + else + return hns_roce_buf_offset(hr_qp->mtr.kmem, offset); }
void *hns_roce_get_recv_wqe(struct hns_roce_qp *hr_qp, unsigned int n) diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h index 3429df900cdf..1faa11b8060b 100644 --- a/include/uapi/rdma/hns-abi.h +++ b/include/uapi/rdma/hns-abi.h @@ -150,6 +150,7 @@ enum hns_ib_dca_mem_methods {
enum hns_ib_dca_mem_reg_attrs { HNS_IB_ATTR_DCA_MEM_REG_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + HNS_IB_ATTR_DCA_MEM_REG_FLAGS, HNS_IB_ATTR_DCA_MEM_REG_LEN, HNS_IB_ATTR_DCA_MEM_REG_ADDR, HNS_IB_ATTR_DCA_MEM_REG_KEY, @@ -166,8 +167,6 @@ enum hns_ib_dca_mem_shrink_attrs { HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_MEMS, };
-#define HNS_IB_ATTACH_FLAGS_NEW_BUFFER 1U - enum hns_ib_dca_mem_attach_attrs { HNS_IB_ATTR_DCA_MEM_ATTACH_HANDLE = (1U << UVERBS_ID_NS_SHIFT), HNS_IB_ATTR_DCA_MEM_ATTACH_SQ_OFFSET,