From: Juan Zhou zhoujuan51@h-partners.com
Support hns roce DCA mode.
Chengchang Tang (12): RDMA/hns: Introduce DCA for RC QP RDMA/hns: Add method for shrinking DCA memory pool RDMA/hns: Configure DCA mode for the userspace QP RDMA/hns: Add method for attaching WQE buffer RDMA/hns: Setup the configuration of WQE addressing to QPC RDMA/hns: Add method to detach WQE buffer RDMA/hns: Add method to query WQE buffer's address RDMA/hns: Add DCA support for kernel space RDMA/hns: Add debugfs support for DCA RDMA/hns: Optimize user DCA perfermance by sharing DCA status RDMA/hns: Fixes concurrent ressetting and post_recv in DCA mode RDMA/hns: Fix DCA's dependence on ib_uverbs
drivers/infiniband/hw/hns/Makefile | 2 +- drivers/infiniband/hw/hns/hns_roce_dca.c | 1875 ++++++++++++++++++ drivers/infiniband/hw/hns/hns_roce_dca.h | 78 + drivers/infiniband/hw/hns/hns_roce_debugfs.c | 406 ++++ drivers/infiniband/hw/hns/hns_roce_debugfs.h | 19 + drivers/infiniband/hw/hns/hns_roce_device.h | 69 +- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 304 ++- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 5 + drivers/infiniband/hw/hns/hns_roce_main.c | 196 +- drivers/infiniband/hw/hns/hns_roce_mr.c | 12 +- drivers/infiniband/hw/hns/hns_roce_qp.c | 197 +- include/uapi/rdma/hns-abi.h | 73 +- 12 files changed, 3099 insertions(+), 137 deletions(-) create mode 100644 drivers/infiniband/hw/hns/hns_roce_dca.c create mode 100644 drivers/infiniband/hw/hns/hns_roce_dca.h
driver inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9CK0O
--------------------------------------------------------------------------
The hip09 introduces the DCA(Dynamic context attachment) feature which supports many RC QPs to share the WQE buffer in a memory pool, this will reduce the memory consumption when there are too many QPs are inactive.
If a QP enables DCA feature, the WQE's buffer will not be allocated when creating. But when the users start to post WRs, the hns driver will allocate a buffer from the memory pool and then fill WQEs which tagged with this QP's number.
The hns ROCEE will stop accessing the WQE buffer when the user polled all of the CQEs for a DCA QP, then the driver will recycle this WQE's buffer to the memory pool.
This patch adds a group of methods to support the user space register buffers to a memory pool which belongs to the user context. The hns kernel driver will update the pages state in this pool when the user calling the post/poll methods and the user driver can get the QP's WQE buffer address by the key and offset which queried from kernel.
Signed-off-by: Chengchang Tang tangchengchang@huawei.com --- drivers/infiniband/hw/hns/Makefile | 2 +- drivers/infiniband/hw/hns/hns_roce_dca.c | 359 ++++++++++++++++++++ drivers/infiniband/hw/hns/hns_roce_dca.h | 26 ++ drivers/infiniband/hw/hns/hns_roce_device.h | 10 + drivers/infiniband/hw/hns/hns_roce_main.c | 90 +++-- include/uapi/rdma/hns-abi.h | 25 ++ 6 files changed, 480 insertions(+), 32 deletions(-) create mode 100644 drivers/infiniband/hw/hns/hns_roce_dca.c create mode 100644 drivers/infiniband/hw/hns/hns_roce_dca.h
diff --git a/drivers/infiniband/hw/hns/Makefile b/drivers/infiniband/hw/hns/Makefile index 8faa43009..588de234b 100644 --- a/drivers/infiniband/hw/hns/Makefile +++ b/drivers/infiniband/hw/hns/Makefile @@ -10,7 +10,7 @@ ccflags-y += -I $(srctree)/drivers/net/ethernet/hisilicon/hns3/hns3_common hns-roce-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_pd.o \ hns_roce_ah.o hns_roce_hem.o hns_roce_mr.o hns_roce_qp.o \ hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o hns_roce_srq.o hns_roce_restrack.o \ - hns_roce_debugfs.o hns_roce_sysfs.o hns_roce_bond.o + hns_roce_debugfs.o hns_roce_sysfs.o hns_roce_bond.o hns_roce_dca.o
ifdef CONFIG_INFINIBAND_HNS_HIP08 hns-roce-hw-v2-objs := hns_roce_hw_v2.o $(hns-roce-objs) diff --git a/drivers/infiniband/hw/hns/hns_roce_dca.c b/drivers/infiniband/hw/hns/hns_roce_dca.c new file mode 100644 index 000000000..190d7efd1 --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_dca.c @@ -0,0 +1,359 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2022 Hisilicon Limited. All rights reserved. + */ + +#include <rdma/ib_user_verbs.h> +#include <rdma/ib_verbs.h> +#include <rdma/uverbs_types.h> +#include <rdma/uverbs_ioctl.h> +#include <rdma/uverbs_std_types.h> +#include <rdma/ib_umem.h> +#include "hns_roce_device.h" +#include "hns_roce_dca.h" + +#define UVERBS_MODULE_NAME hns_ib +#include <rdma/uverbs_named_ioctl.h> + +/* DCA memory */ +struct dca_mem { +#define DCA_MEM_FLAGS_ALLOCED BIT(0) +#define DCA_MEM_FLAGS_REGISTERED BIT(1) + u32 flags; + struct list_head list; /* link to mem list in dca context */ + spinlock_t lock; /* protect the @flags and @list */ + int page_count; /* page count in this mem obj */ + u64 key; /* register by caller */ + u32 size; /* bytes in this mem object */ + struct hns_dca_page_state *states; /* record each page's state */ + void *pages; /* memory handle for getting dma address */ +}; + +struct dca_mem_attr { + u64 key; + u64 addr; + u32 size; +}; + +static void *alloc_dca_pages(struct hns_roce_dev *hr_dev, struct dca_mem *mem, + struct dca_mem_attr *attr) +{ + struct ib_device *ibdev = &hr_dev->ib_dev; + struct ib_umem *umem; + + umem = ib_umem_get(ibdev, attr->addr, attr->size, 0); + if (IS_ERR(umem)) { + ibdev_err(ibdev, "failed to get uDCA pages, ret = %ld.\n", + PTR_ERR(umem)); + return NULL; + } + + mem->page_count = ib_umem_num_dma_blocks(umem, HNS_HW_PAGE_SIZE); + + return umem; +} + +static void init_dca_umem_states(struct hns_dca_page_state *states, int count, + struct ib_umem *umem) +{ + struct ib_block_iter biter; + dma_addr_t cur_addr; + dma_addr_t pre_addr; + int i = 0; + + pre_addr = 0; + rdma_for_each_block(umem->sgt_append.sgt.sgl, &biter, + umem->sgt_append.sgt.nents, HNS_HW_PAGE_SIZE) { + cur_addr = rdma_block_iter_dma_address(&biter); + if (i < count) { + if (cur_addr - pre_addr != HNS_HW_PAGE_SIZE) + states[i].head = 1; + } + + pre_addr = cur_addr; + i++; + } +} + +static struct hns_dca_page_state *alloc_dca_states(void *pages, int count) +{ + struct hns_dca_page_state *states; + + states = kcalloc(count, sizeof(*states), GFP_KERNEL); + if (!states) + return NULL; + + init_dca_umem_states(states, count, pages); + + return states; +} + +/* user DCA is managed by ucontext */ +static inline struct hns_roce_dca_ctx * +to_hr_dca_ctx(struct hns_roce_ucontext *uctx) +{ + return &uctx->dca_ctx; +} + +static void unregister_dca_mem(struct hns_roce_ucontext *uctx, + struct dca_mem *mem) +{ + struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(uctx); + unsigned long flags; + void *states, *pages; + + spin_lock_irqsave(&ctx->pool_lock, flags); + + spin_lock(&mem->lock); + mem->flags &= ~DCA_MEM_FLAGS_REGISTERED; + mem->page_count = 0; + pages = mem->pages; + mem->pages = NULL; + states = mem->states; + mem->states = NULL; + spin_unlock(&mem->lock); + + ctx->free_mems--; + ctx->free_size -= mem->size; + + ctx->total_size -= mem->size; + spin_unlock_irqrestore(&ctx->pool_lock, flags); + + kfree(states); + ib_umem_release(pages); +} + +static int register_dca_mem(struct hns_roce_dev *hr_dev, + struct hns_roce_ucontext *uctx, + struct dca_mem *mem, struct dca_mem_attr *attr) +{ + struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(uctx); + void *states, *pages; + unsigned long flags; + + pages = alloc_dca_pages(hr_dev, mem, attr); + if (!pages) + return -ENOMEM; + + states = alloc_dca_states(pages, mem->page_count); + if (!states) { + ib_umem_release(pages); + return -ENOMEM; + } + + spin_lock_irqsave(&ctx->pool_lock, flags); + + spin_lock(&mem->lock); + mem->pages = pages; + mem->states = states; + mem->key = attr->key; + mem->size = attr->size; + mem->flags |= DCA_MEM_FLAGS_REGISTERED; + spin_unlock(&mem->lock); + + ctx->free_mems++; + ctx->free_size += attr->size; + ctx->total_size += attr->size; + spin_unlock_irqrestore(&ctx->pool_lock, flags); + + return 0; +} + +static void init_dca_context(struct hns_roce_dca_ctx *ctx) +{ + INIT_LIST_HEAD(&ctx->pool); + spin_lock_init(&ctx->pool_lock); + ctx->total_size = 0; +} + +static void cleanup_dca_context(struct hns_roce_dev *hr_dev, + struct hns_roce_dca_ctx *ctx) +{ + struct dca_mem *mem, *tmp; + unsigned long flags; + + spin_lock_irqsave(&ctx->pool_lock, flags); + list_for_each_entry_safe(mem, tmp, &ctx->pool, list) { + list_del(&mem->list); + mem->flags = 0; + spin_unlock_irqrestore(&ctx->pool_lock, flags); + + kfree(mem->states); + ib_umem_release(mem->pages); + kfree(mem); + + spin_lock_irqsave(&ctx->pool_lock, flags); + } + ctx->total_size = 0; + spin_unlock_irqrestore(&ctx->pool_lock, flags); +} + +void hns_roce_register_udca(struct hns_roce_dev *hr_dev, + struct hns_roce_ucontext *uctx) +{ + if (!(uctx->config & HNS_ROCE_UCTX_CONFIG_DCA)) + return; + + init_dca_context(&uctx->dca_ctx); +} + +void hns_roce_unregister_udca(struct hns_roce_dev *hr_dev, + struct hns_roce_ucontext *uctx) +{ + if (!(uctx->config & HNS_ROCE_UCTX_CONFIG_DCA)) + return; + + cleanup_dca_context(hr_dev, &uctx->dca_ctx); +} + +static struct dca_mem *alloc_dca_mem(struct hns_roce_dca_ctx *ctx) +{ + struct dca_mem *mem, *tmp, *found = NULL; + unsigned long flags; + + spin_lock_irqsave(&ctx->pool_lock, flags); + list_for_each_entry_safe(mem, tmp, &ctx->pool, list) { + spin_lock(&mem->lock); + if (!mem->flags) { + found = mem; + mem->flags |= DCA_MEM_FLAGS_ALLOCED; + spin_unlock(&mem->lock); + break; + } + spin_unlock(&mem->lock); + } + spin_unlock_irqrestore(&ctx->pool_lock, flags); + + if (found) + return found; + + mem = kzalloc(sizeof(*mem), GFP_NOWAIT); + if (!mem) + return NULL; + + spin_lock_init(&mem->lock); + INIT_LIST_HEAD(&mem->list); + + mem->flags |= DCA_MEM_FLAGS_ALLOCED; + + spin_lock_irqsave(&ctx->pool_lock, flags); + list_add(&mem->list, &ctx->pool); + spin_unlock_irqrestore(&ctx->pool_lock, flags); + + return mem; +} + +static void free_dca_mem(struct dca_mem *mem) +{ + /* We cannot hold the whole pool's lock during the DCA is working + * until cleanup the context in cleanup_dca_context(), so we just + * set the DCA mem state as free when destroying DCA mem object. + */ + spin_lock(&mem->lock); + mem->flags = 0; + spin_unlock(&mem->lock); +} + +static inline struct hns_roce_ucontext * +uverbs_attr_to_hr_uctx(struct uverbs_attr_bundle *attrs) +{ + return rdma_udata_to_drv_context(&attrs->driver_udata, + struct hns_roce_ucontext, ibucontext); +} + +static int UVERBS_HANDLER(HNS_IB_METHOD_DCA_MEM_REG)( + struct uverbs_attr_bundle *attrs) +{ + struct hns_roce_ucontext *uctx = uverbs_attr_to_hr_uctx(attrs); + struct hns_roce_dev *hr_dev = to_hr_dev(uctx->ibucontext.device); + struct ib_uobject *uobj = + uverbs_attr_get_uobject(attrs, HNS_IB_ATTR_DCA_MEM_REG_HANDLE); + struct dca_mem_attr init_attr = {}; + struct dca_mem *mem; + int ret; + + ret = uverbs_copy_from(&init_attr.addr, attrs, + HNS_IB_ATTR_DCA_MEM_REG_ADDR); + if (!ret) + ret = uverbs_copy_from(&init_attr.size, attrs, + HNS_IB_ATTR_DCA_MEM_REG_LEN); + if (!ret) + ret = uverbs_copy_from(&init_attr.key, attrs, + HNS_IB_ATTR_DCA_MEM_REG_KEY); + if (ret) + return ret; + + if (!init_attr.size) + return -EINVAL; + + init_attr.size = hr_hw_page_align(init_attr.size); + + mem = alloc_dca_mem(to_hr_dca_ctx(uctx)); + if (!mem) + return -ENOMEM; + + ret = register_dca_mem(hr_dev, uctx, mem, &init_attr); + if (ret) { + free_dca_mem(mem); + return ret; + } + + uobj->object = mem; + + return 0; +} + +static int dca_cleanup(struct ib_uobject *uobject, enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) +{ + struct hns_roce_ucontext *uctx = uverbs_attr_to_hr_uctx(attrs); + struct dca_mem *mem; + + /* One DCA MEM maybe shared by many QPs, so the DCA mem uobject must + * be destroyed before all QP uobjects, and we will destroy the DCA + * uobjects when cleanup DCA context by calling hns_roce_cleanup_dca(). + */ + if (why == RDMA_REMOVE_CLOSE || why == RDMA_REMOVE_DRIVER_REMOVE) + return 0; + + mem = uobject->object; + unregister_dca_mem(uctx, mem); + free_dca_mem(mem); + + return 0; +} + +DECLARE_UVERBS_NAMED_METHOD( + HNS_IB_METHOD_DCA_MEM_REG, + UVERBS_ATTR_IDR(HNS_IB_ATTR_DCA_MEM_REG_HANDLE, HNS_IB_OBJECT_DCA_MEM, + UVERBS_ACCESS_NEW, UA_MANDATORY), + UVERBS_ATTR_PTR_IN(HNS_IB_ATTR_DCA_MEM_REG_LEN, UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(HNS_IB_ATTR_DCA_MEM_REG_ADDR, UVERBS_ATTR_TYPE(u64), + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(HNS_IB_ATTR_DCA_MEM_REG_KEY, UVERBS_ATTR_TYPE(u64), + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + HNS_IB_METHOD_DCA_MEM_DEREG, + UVERBS_ATTR_IDR(HNS_IB_ATTR_DCA_MEM_DEREG_HANDLE, HNS_IB_OBJECT_DCA_MEM, + UVERBS_ACCESS_DESTROY, UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_OBJECT(HNS_IB_OBJECT_DCA_MEM, + UVERBS_TYPE_ALLOC_IDR(dca_cleanup), + &UVERBS_METHOD(HNS_IB_METHOD_DCA_MEM_REG), + &UVERBS_METHOD(HNS_IB_METHOD_DCA_MEM_DEREG)); + +static bool dca_is_supported(struct ib_device *device) +{ + struct hns_roce_dev *dev = to_hr_dev(device); + + return dev->caps.flags & HNS_ROCE_CAP_FLAG_DCA_MODE; +} + +const struct uapi_definition hns_roce_dca_uapi_defs[] = { + UAPI_DEF_CHAIN_OBJ_TREE_NAMED( + HNS_IB_OBJECT_DCA_MEM, + UAPI_DEF_IS_OBJ_SUPPORTED(dca_is_supported)), + {} +}; diff --git a/drivers/infiniband/hw/hns/hns_roce_dca.h b/drivers/infiniband/hw/hns/hns_roce_dca.h new file mode 100644 index 000000000..e303c3cae --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_dca.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* + * Copyright (c) 2022 Hisilicon Limited. All rights reserved. + */ + +#ifndef __HNS_ROCE_DCA_H +#define __HNS_ROCE_DCA_H + +#include <rdma/uverbs_ioctl.h> + +/* DCA page state (32 bit) */ +struct hns_dca_page_state { + u32 buf_id : 29; /* If zero, means page can be used by any buffer. */ + u32 lock : 1; /* @buf_id locked this page to prepare access. */ + u32 active : 1; /* @buf_id is accessing this page. */ + u32 head : 1; /* This page is the head in a continuous address range. */ +}; + +extern const struct uapi_definition hns_roce_dca_uapi_defs[]; + +void hns_roce_register_udca(struct hns_roce_dev *hr_dev, + struct hns_roce_ucontext *uctx); +void hns_roce_unregister_udca(struct hns_roce_dev *hr_dev, + struct hns_roce_ucontext *uctx); + +#endif diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 7e65c05cb..5d54b657b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -148,6 +148,7 @@ enum { HNS_ROCE_CAP_FLAG_ATOMIC = BIT(10), HNS_ROCE_CAP_FLAG_DIRECT_WQE = BIT(12), HNS_ROCE_CAP_FLAG_SDI_MODE = BIT(14), + HNS_ROCE_CAP_FLAG_DCA_MODE = BIT(15), HNS_ROCE_CAP_FLAG_STASH = BIT(17), HNS_ROCE_CAP_FLAG_CQE_INLINE = BIT(19), HNS_ROCE_CAP_FLAG_BOND = BIT(21), @@ -207,6 +208,14 @@ struct hns_user_mmap_entry { u64 address; };
+struct hns_roce_dca_ctx { + struct list_head pool; /* all DCA mems link to @pool */ + spinlock_t pool_lock; /* protect @pool */ + unsigned int free_mems; /* free mem num in pool */ + size_t free_size; /* free mem size in pool */ + size_t total_size; /* total size in pool */ +}; + struct hns_roce_ucontext { struct ib_ucontext ibucontext; struct hns_roce_uar uar; @@ -215,6 +224,7 @@ struct hns_roce_ucontext { struct hns_user_mmap_entry *db_mmap_entry; struct hns_user_mmap_entry *reset_mmap_entry; u32 config; + struct hns_roce_dca_ctx dca_ctx; };
struct hns_roce_pd { diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index e65310f13..b5ec62ed3 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -42,6 +42,7 @@ #include "hns_roce_device.h" #include "hns_roce_hem.h" #include "hns_roce_hw_v2.h" +#include "hns_roce_dca.h"
static struct net_device *hns_roce_get_netdev(struct ib_device *ib_dev, u32 port_num) @@ -451,35 +452,61 @@ static int hns_roce_alloc_reset_entry(struct ib_ucontext *uctx) return 0; }
-static void hns_roce_get_uctx_config(struct hns_roce_dev *hr_dev, - struct hns_roce_ucontext *context, - struct hns_roce_ib_alloc_ucontext *ucmd, - struct hns_roce_ib_alloc_ucontext_resp *resp) +static void ucontext_set_resp(struct ib_ucontext *uctx, + struct hns_roce_ib_alloc_ucontext_resp *resp) { + struct hns_roce_ucontext *context = to_hr_ucontext(uctx); + struct hns_roce_dev *hr_dev = to_hr_dev(uctx->device); + struct rdma_user_mmap_entry *rdma_entry; + + resp->qp_tab_size = hr_dev->caps.num_qps; + resp->srq_tab_size = hr_dev->caps.num_srqs; + resp->cqe_size = hr_dev->caps.cqe_sz; + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) - context->config = ucmd->config & HNS_ROCE_EXSGE_FLAGS; + resp->congest_type = hr_dev->caps.cong_cap; + + if (context->reset_mmap_entry) { + rdma_entry = &context->reset_mmap_entry->rdma_entry; + resp->reset_mmap_key = rdma_user_mmap_get_offset(rdma_entry); + }
if (context->config & HNS_ROCE_EXSGE_FLAGS) { resp->config |= HNS_ROCE_RSP_EXSGE_FLAGS; resp->max_inline_data = hr_dev->caps.max_sq_inline; }
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) { + if (context->config & HNS_ROCE_RQ_INLINE_FLAGS) + resp->config |= HNS_ROCE_RSP_RQ_INLINE_FLAGS; + + if (context->config & HNS_ROCE_CQE_INLINE_FLAGS) + resp->config |= HNS_ROCE_RSP_CQE_INLINE_FLAGS; + + if (context->config & HNS_ROCE_UCTX_DYN_QP_PGSZ) + resp->config |= HNS_ROCE_UCTX_RSP_DYN_QP_PGSZ; + + if (context->config & HNS_ROCE_UCTX_CONFIG_DCA) + resp->config |= HNS_ROCE_UCTX_RSP_DCA_FLAGS; +} + +static void hns_roce_get_uctx_config(struct hns_roce_dev *hr_dev, + struct hns_roce_ucontext *context, + struct hns_roce_ib_alloc_ucontext *ucmd) +{ + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) + context->config = ucmd->config & HNS_ROCE_EXSGE_FLAGS; + + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) context->config |= ucmd->config & HNS_ROCE_RQ_INLINE_FLAGS; - if (context->config & HNS_ROCE_RQ_INLINE_FLAGS) - resp->config |= HNS_ROCE_RSP_RQ_INLINE_FLAGS; - }
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQE_INLINE) { + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQE_INLINE) context->config |= ucmd->config & HNS_ROCE_CQE_INLINE_FLAGS; - if (context->config & HNS_ROCE_CQE_INLINE_FLAGS) - resp->config |= HNS_ROCE_RSP_CQE_INLINE_FLAGS; - }
- if (ucmd->config & HNS_ROCE_UCTX_DYN_QP_PGSZ) { + if (ucmd->config & HNS_ROCE_UCTX_DYN_QP_PGSZ) context->config |= HNS_ROCE_UCTX_DYN_QP_PGSZ; - resp->config |= HNS_ROCE_UCTX_RSP_DYN_QP_PGSZ; - } + + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_DCA_MODE) + context->config |= ucmd->config & HNS_ROCE_UCTX_CONFIG_DCA; }
static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, @@ -489,24 +516,17 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, struct hns_roce_dev *hr_dev = to_hr_dev(uctx->device); struct hns_roce_ib_alloc_ucontext_resp resp = {}; struct hns_roce_ib_alloc_ucontext ucmd = {}; - struct rdma_user_mmap_entry *rdma_entry; int ret = -EAGAIN;
if (!hr_dev->active) goto error_out;
- resp.qp_tab_size = hr_dev->caps.num_qps; - resp.srq_tab_size = hr_dev->caps.num_srqs; - ret = ib_copy_from_udata(&ucmd, udata, min(udata->inlen, sizeof(ucmd))); if (ret) goto error_out;
- if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) - resp.congest_type = hr_dev->caps.cong_cap; - - hns_roce_get_uctx_config(hr_dev, context, &ucmd, &resp); + hns_roce_get_uctx_config(hr_dev, context, &ucmd);
ret = hns_roce_uar_alloc(hr_dev, &context->uar); if (ret) @@ -522,17 +542,13 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, mutex_init(&context->page_mutex); }
+ hns_roce_register_udca(hr_dev, context); + ret = hns_roce_alloc_reset_entry(uctx); if (ret) goto error_fail_reset_entry;
- if (context->reset_mmap_entry) { - rdma_entry = &context->reset_mmap_entry->rdma_entry; - resp.reset_mmap_key = rdma_user_mmap_get_offset(rdma_entry); - } - - resp.cqe_size = hr_dev->caps.cqe_sz; - + ucontext_set_resp(uctx, &resp); ret = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp))); if (ret) @@ -541,6 +557,7 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, return 0;
error_fail_copy_to_udata: + hns_roce_unregister_udca(hr_dev, context); hns_roce_dealloc_reset_entry(context);
error_fail_reset_entry: @@ -560,6 +577,8 @@ static void hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext) struct hns_roce_ucontext *context = to_hr_ucontext(ibcontext); struct hns_roce_dev *hr_dev = to_hr_dev(ibcontext->device);
+ hns_roce_unregister_udca(hr_dev, context); + hns_roce_dealloc_uar_entry(context); hns_roce_dealloc_reset_entry(context);
@@ -768,6 +787,11 @@ normal_unregister: ib_unregister_device(&hr_dev->ib_dev); }
+const struct uapi_definition hns_roce_uapi_defs[] = { + UAPI_DEF_CHAIN(hns_roce_dca_uapi_defs), + {} +}; + static const struct ib_device_ops hns_roce_dev_ops = { .owner = THIS_MODULE, .driver_id = RDMA_DRIVER_HNS, @@ -902,6 +926,10 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) ib_set_device_ops(ib_dev, hr_dev->hw->hns_roce_dev_ops); ib_set_device_ops(ib_dev, &hns_roce_dev_ops); ib_set_device_ops(ib_dev, &hns_roce_dev_restrack_ops); + + if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)) + ib_dev->driver_def = hns_roce_uapi_defs; + for (i = 0; i < hr_dev->caps.num_ports; i++) { net_dev = get_hr_netdev(hr_dev, i); if (!net_dev) diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h index 1d5161201..70d4e0468 100644 --- a/include/uapi/rdma/hns-abi.h +++ b/include/uapi/rdma/hns-abi.h @@ -120,6 +120,7 @@ enum { HNS_ROCE_EXSGE_FLAGS = 1 << 0, HNS_ROCE_RQ_INLINE_FLAGS = 1 << 1, HNS_ROCE_CQE_INLINE_FLAGS = 1 << 2, + HNS_ROCE_UCTX_CONFIG_DCA = 1 << 3, HNS_ROCE_UCTX_DYN_QP_PGSZ = 1 << 4, };
@@ -127,6 +128,7 @@ enum { HNS_ROCE_RSP_EXSGE_FLAGS = 1 << 0, HNS_ROCE_RSP_RQ_INLINE_FLAGS = 1 << 1, HNS_ROCE_RSP_CQE_INLINE_FLAGS = 1 << 2, + HNS_ROCE_UCTX_RSP_DCA_FLAGS = HNS_ROCE_UCTX_CONFIG_DCA, HNS_ROCE_UCTX_RSP_DYN_QP_PGSZ = HNS_ROCE_UCTX_DYN_QP_PGSZ, };
@@ -158,4 +160,27 @@ struct hns_roce_ib_create_ah_resp { __u8 tc_mode; };
+#define UVERBS_ID_NS_MASK 0xF000 +#define UVERBS_ID_NS_SHIFT 12 + +enum hns_ib_objects { + HNS_IB_OBJECT_DCA_MEM = (1U << UVERBS_ID_NS_SHIFT), +}; + +enum hns_ib_dca_mem_methods { + HNS_IB_METHOD_DCA_MEM_REG = (1U << UVERBS_ID_NS_SHIFT), + HNS_IB_METHOD_DCA_MEM_DEREG, +}; + +enum hns_ib_dca_mem_reg_attrs { + HNS_IB_ATTR_DCA_MEM_REG_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + HNS_IB_ATTR_DCA_MEM_REG_LEN, + HNS_IB_ATTR_DCA_MEM_REG_ADDR, + HNS_IB_ATTR_DCA_MEM_REG_KEY, +}; + +enum hns_ib_dca_mem_dereg_attrs { + HNS_IB_ATTR_DCA_MEM_DEREG_HANDLE = (1U << UVERBS_ID_NS_SHIFT), +}; + #endif /* HNS_ABI_USER_H */
driver inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9CK0O
--------------------------------------------------------------------------
If no QP is using a DCA mem object, the userspace driver can destroy it. So add a new method 'HNS_IB_METHOD_DCA_MEM_SHRINK' to allow the userspace dirver to remove an object from DCA memory pool.
If a DCA mem object has been shrunk, the userspace driver can destroy it by 'HNS_IB_METHOD_DCA_MEM_DEREG' method and free the buffer which is allocated in userspace.
Signed-off-by: Chengchang Tang tangchengchang@huawei.com --- drivers/infiniband/hw/hns/hns_roce_dca.c | 146 ++++++++++++++++++++++- drivers/infiniband/hw/hns/hns_roce_dca.h | 7 ++ include/uapi/rdma/hns-abi.h | 8 ++ 3 files changed, 160 insertions(+), 1 deletion(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_dca.c b/drivers/infiniband/hw/hns/hns_roce_dca.c index 190d7efd1..84ff70c0a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_dca.c +++ b/drivers/infiniband/hw/hns/hns_roce_dca.c @@ -35,6 +35,16 @@ struct dca_mem_attr { u32 size; };
+static inline bool dca_page_is_free(struct hns_dca_page_state *state) +{ + return state->buf_id == HNS_DCA_INVALID_BUF_ID; +} + +static inline bool dca_mem_is_available(struct dca_mem *mem) +{ + return mem->flags == (DCA_MEM_FLAGS_ALLOCED | DCA_MEM_FLAGS_REGISTERED); +} + static void *alloc_dca_pages(struct hns_roce_dev *hr_dev, struct dca_mem *mem, struct dca_mem_attr *attr) { @@ -88,6 +98,41 @@ static struct hns_dca_page_state *alloc_dca_states(void *pages, int count) return states; }
+#define DCA_MEM_STOP_ITERATE -1 +#define DCA_MEM_NEXT_ITERATE -2 +static void travel_dca_pages(struct hns_roce_dca_ctx *ctx, void *param, + int (*cb)(struct dca_mem *, int, void *)) +{ + struct dca_mem *mem, *tmp; + unsigned long flags; + bool avail; + int ret; + int i; + + spin_lock_irqsave(&ctx->pool_lock, flags); + list_for_each_entry_safe(mem, tmp, &ctx->pool, list) { + spin_unlock_irqrestore(&ctx->pool_lock, flags); + + spin_lock(&mem->lock); + avail = dca_mem_is_available(mem); + ret = 0; + for (i = 0; avail && i < mem->page_count; i++) { + ret = cb(mem, i, param); + if (ret == DCA_MEM_STOP_ITERATE || + ret == DCA_MEM_NEXT_ITERATE) + break; + } + spin_unlock(&mem->lock); + spin_lock_irqsave(&ctx->pool_lock, flags); + + if (ret == DCA_MEM_STOP_ITERATE) + goto done; + } + +done: + spin_unlock_irqrestore(&ctx->pool_lock, flags); +} + /* user DCA is managed by ucontext */ static inline struct hns_roce_dca_ctx * to_hr_dca_ctx(struct hns_roce_ucontext *uctx) @@ -159,6 +204,63 @@ static int register_dca_mem(struct hns_roce_dev *hr_dev, return 0; }
+struct dca_mem_shrink_attr { + u64 shrink_key; + u32 shrink_mems; +}; + +static int shrink_dca_page_proc(struct dca_mem *mem, int index, void *param) +{ + struct dca_mem_shrink_attr *attr = param; + struct hns_dca_page_state *state; + int i, free_pages; + + free_pages = 0; + for (i = 0; i < mem->page_count; i++) { + state = &mem->states[i]; + if (dca_page_is_free(state)) + free_pages++; + } + + /* No pages are in use */ + if (free_pages == mem->page_count) { + /* unregister first empty DCA mem */ + if (!attr->shrink_mems) { + mem->flags &= ~DCA_MEM_FLAGS_REGISTERED; + attr->shrink_key = mem->key; + } + + attr->shrink_mems++; + } + + if (attr->shrink_mems > 1) + return DCA_MEM_STOP_ITERATE; + else + return DCA_MEM_NEXT_ITERATE; +} + +static int shrink_dca_mem(struct hns_roce_dev *hr_dev, + struct hns_roce_ucontext *uctx, u64 reserved_size, + struct hns_dca_shrink_resp *resp) +{ + struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(uctx); + struct dca_mem_shrink_attr attr = {}; + unsigned long flags; + bool need_shink; + + spin_lock_irqsave(&ctx->pool_lock, flags); + need_shink = ctx->free_mems > 0 && ctx->free_size > reserved_size; + spin_unlock_irqrestore(&ctx->pool_lock, flags); + if (!need_shink) + return 0; + + travel_dca_pages(ctx, &attr, shrink_dca_page_proc); + resp->free_mems = attr.shrink_mems; + resp->free_key = attr.shrink_key; + + return 0; +} + static void init_dca_context(struct hns_roce_dca_ctx *ctx) { INIT_LIST_HEAD(&ctx->pool); @@ -339,10 +441,52 @@ DECLARE_UVERBS_NAMED_METHOD_DESTROY( UVERBS_ATTR_IDR(HNS_IB_ATTR_DCA_MEM_DEREG_HANDLE, HNS_IB_OBJECT_DCA_MEM, UVERBS_ACCESS_DESTROY, UA_MANDATORY));
+static int UVERBS_HANDLER(HNS_IB_METHOD_DCA_MEM_SHRINK)( + struct uverbs_attr_bundle *attrs) +{ + struct hns_roce_ucontext *uctx = uverbs_attr_to_hr_uctx(attrs); + struct hns_dca_shrink_resp resp = {}; + u64 reserved_size = 0; + int ret; + + ret = uverbs_copy_from(&reserved_size, attrs, + HNS_IB_ATTR_DCA_MEM_SHRINK_RESERVED_SIZE); + if (ret) + return ret; + + ret = shrink_dca_mem(to_hr_dev(uctx->ibucontext.device), uctx, + reserved_size, &resp); + if (ret) + return ret; + + ret = uverbs_copy_to(attrs, HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_KEY, + &resp.free_key, sizeof(resp.free_key)); + if (!ret) + ret = uverbs_copy_to(attrs, + HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_MEMS, + &resp.free_mems, sizeof(resp.free_mems)); + if (ret) + return ret; + + return 0; +} + +DECLARE_UVERBS_NAMED_METHOD( + HNS_IB_METHOD_DCA_MEM_SHRINK, + UVERBS_ATTR_IDR(HNS_IB_ATTR_DCA_MEM_SHRINK_HANDLE, + HNS_IB_OBJECT_DCA_MEM, UVERBS_ACCESS_WRITE, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(HNS_IB_ATTR_DCA_MEM_SHRINK_RESERVED_SIZE, + UVERBS_ATTR_TYPE(u64), UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_KEY, + UVERBS_ATTR_TYPE(u64), UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_MEMS, + UVERBS_ATTR_TYPE(u32), UA_MANDATORY)); DECLARE_UVERBS_NAMED_OBJECT(HNS_IB_OBJECT_DCA_MEM, UVERBS_TYPE_ALLOC_IDR(dca_cleanup), &UVERBS_METHOD(HNS_IB_METHOD_DCA_MEM_REG), - &UVERBS_METHOD(HNS_IB_METHOD_DCA_MEM_DEREG)); + &UVERBS_METHOD(HNS_IB_METHOD_DCA_MEM_DEREG), + &UVERBS_METHOD(HNS_IB_METHOD_DCA_MEM_SHRINK));
static bool dca_is_supported(struct ib_device *device) { diff --git a/drivers/infiniband/hw/hns/hns_roce_dca.h b/drivers/infiniband/hw/hns/hns_roce_dca.h index e303c3cae..14153a96a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_dca.h +++ b/drivers/infiniband/hw/hns/hns_roce_dca.h @@ -18,6 +18,13 @@ struct hns_dca_page_state {
extern const struct uapi_definition hns_roce_dca_uapi_defs[];
+struct hns_dca_shrink_resp { + u64 free_key; /* free buffer's key which registered by the user */ + u32 free_mems; /* free buffer count which no any QP be using */ +}; + +#define HNS_DCA_INVALID_BUF_ID 0UL + void hns_roce_register_udca(struct hns_roce_dev *hr_dev, struct hns_roce_ucontext *uctx); void hns_roce_unregister_udca(struct hns_roce_dev *hr_dev, diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h index 70d4e0468..aa3803c9d 100644 --- a/include/uapi/rdma/hns-abi.h +++ b/include/uapi/rdma/hns-abi.h @@ -170,6 +170,7 @@ enum hns_ib_objects { enum hns_ib_dca_mem_methods { HNS_IB_METHOD_DCA_MEM_REG = (1U << UVERBS_ID_NS_SHIFT), HNS_IB_METHOD_DCA_MEM_DEREG, + HNS_IB_METHOD_DCA_MEM_SHRINK, };
enum hns_ib_dca_mem_reg_attrs { @@ -183,4 +184,11 @@ enum hns_ib_dca_mem_dereg_attrs { HNS_IB_ATTR_DCA_MEM_DEREG_HANDLE = (1U << UVERBS_ID_NS_SHIFT), };
+enum hns_ib_dca_mem_shrink_attrs { + HNS_IB_ATTR_DCA_MEM_SHRINK_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + HNS_IB_ATTR_DCA_MEM_SHRINK_RESERVED_SIZE, + HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_KEY, + HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_MEMS, +}; + #endif /* HNS_ABI_USER_H */
driver inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9CK0O
--------------------------------------------------------------------------
If the userspace driver assign a NULL to the field of 'buf_addr' in 'struct hns_roce_ib_create_qp' when creating QP, this means the kernel driver need setup the QP as DCA mode. So add a QP capability bit in response to indicate the userspace driver that the DCA mode has been enabled.
Signed-off-by: Chengchang Tang tangchengchang@huawei.com --- drivers/infiniband/hw/hns/hns_roce_dca.c | 27 +++-- drivers/infiniband/hw/hns/hns_roce_dca.h | 4 + drivers/infiniband/hw/hns/hns_roce_device.h | 5 + drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 12 +- drivers/infiniband/hw/hns/hns_roce_qp.c | 115 +++++++++++++++----- include/uapi/rdma/hns-abi.h | 1 + 6 files changed, 125 insertions(+), 39 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_dca.c b/drivers/infiniband/hw/hns/hns_roce_dca.c index 84ff70c0a..72aa86f29 100644 --- a/drivers/infiniband/hw/hns/hns_roce_dca.c +++ b/drivers/infiniband/hw/hns/hns_roce_dca.c @@ -239,7 +239,7 @@ static int shrink_dca_page_proc(struct dca_mem *mem, int index, void *param) return DCA_MEM_NEXT_ITERATE; }
-static int shrink_dca_mem(struct hns_roce_dev *hr_dev, +static void shrink_dca_mem(struct hns_roce_dev *hr_dev, struct hns_roce_ucontext *uctx, u64 reserved_size, struct hns_dca_shrink_resp *resp) { @@ -252,13 +252,11 @@ static int shrink_dca_mem(struct hns_roce_dev *hr_dev, need_shink = ctx->free_mems > 0 && ctx->free_size > reserved_size; spin_unlock_irqrestore(&ctx->pool_lock, flags); if (!need_shink) - return 0; + return;
travel_dca_pages(ctx, &attr, shrink_dca_page_proc); resp->free_mems = attr.shrink_mems; resp->free_key = attr.shrink_key; - - return 0; }
static void init_dca_context(struct hns_roce_dca_ctx *ctx) @@ -356,6 +354,21 @@ static void free_dca_mem(struct dca_mem *mem) spin_unlock(&mem->lock); }
+void hns_roce_enable_dca(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) +{ + struct hns_roce_dca_cfg *cfg = &hr_qp->dca_cfg; + + cfg->buf_id = HNS_DCA_INVALID_BUF_ID; +} + +void hns_roce_disable_dca(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp) +{ + struct hns_roce_dca_cfg *cfg = &hr_qp->dca_cfg; + + cfg->buf_id = HNS_DCA_INVALID_BUF_ID; +} + static inline struct hns_roce_ucontext * uverbs_attr_to_hr_uctx(struct uverbs_attr_bundle *attrs) { @@ -454,10 +467,8 @@ static int UVERBS_HANDLER(HNS_IB_METHOD_DCA_MEM_SHRINK)( if (ret) return ret;
- ret = shrink_dca_mem(to_hr_dev(uctx->ibucontext.device), uctx, - reserved_size, &resp); - if (ret) - return ret; + shrink_dca_mem(to_hr_dev(uctx->ibucontext.device), uctx, + reserved_size, &resp);
ret = uverbs_copy_to(attrs, HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_KEY, &resp.free_key, sizeof(resp.free_key)); diff --git a/drivers/infiniband/hw/hns/hns_roce_dca.h b/drivers/infiniband/hw/hns/hns_roce_dca.h index 14153a96a..c930b4616 100644 --- a/drivers/infiniband/hw/hns/hns_roce_dca.h +++ b/drivers/infiniband/hw/hns/hns_roce_dca.h @@ -30,4 +30,8 @@ void hns_roce_register_udca(struct hns_roce_dev *hr_dev, void hns_roce_unregister_udca(struct hns_roce_dev *hr_dev, struct hns_roce_ucontext *uctx);
+void hns_roce_enable_dca(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp); +void hns_roce_disable_dca(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp); #endif diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 5d54b657b..d9bb48d35 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -324,6 +324,10 @@ struct hns_roce_mtr { struct hns_roce_hem_cfg hem_cfg; /* config for hardware addressing */ };
+struct hns_roce_dca_cfg { + u32 buf_id; +}; + struct hns_roce_mw { struct ib_mw ibmw; u32 pdn; @@ -628,6 +632,7 @@ struct hns_roce_qp { struct hns_roce_wq sq;
struct hns_roce_mtr mtr; + struct hns_roce_dca_cfg dca_cfg;
u32 buff_size; struct mutex mutex; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 57ded25df..246f75224 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -4625,6 +4625,16 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, hr_reg_write(context, QPC_TRRL_BA_H, trrl_ba >> (32 + 16 + 4)); hr_reg_clear(qpc_mask, QPC_TRRL_BA_H);
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) { + if (hr_qp->en_flags & HNS_ROCE_QP_CAP_DCA) { + hr_reg_enable(context, QPC_DCA_MODE); + hr_reg_clear(qpc_mask, QPC_DCA_MODE); + } + } else { + /* reset IRRL_HEAD */ + hr_reg_clear(qpc_mask, QPC_V2_IRRL_HEAD); + } + context->irrl_ba = cpu_to_le32(irrl_ba >> 6); qpc_mask->irrl_ba = 0; hr_reg_write(context, QPC_IRRL_BA_H, irrl_ba >> (32 + 6)); @@ -4759,8 +4769,6 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp,
hr_reg_clear(qpc_mask, QPC_CHECK_FLG);
- hr_reg_clear(qpc_mask, QPC_V2_IRRL_HEAD); - return 0; }
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 33c1a5cef..082ed09f4 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -37,6 +37,7 @@ #include "hns_roce_common.h" #include "hns_roce_device.h" #include "hns_roce_hem.h" +#include "hns_roce_dca.h"
static void flush_work_handle(struct work_struct *work) { @@ -640,9 +641,22 @@ static int set_user_sq_size(struct hns_roce_dev *hr_dev, return 0; }
+static bool check_dca_is_enable(struct hns_roce_dev *hr_dev, bool is_user, + unsigned long addr) +{ + if (!(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_DCA_MODE)) + return false; + + /* If the user QP's buffer addr is 0, the DCA mode should be enabled */ + if (is_user) + return !addr; + + return false; +} + static int set_wqe_buf_attr(struct hns_roce_dev *hr_dev, - struct hns_roce_qp *hr_qp, u8 page_shift, - struct hns_roce_buf_attr *buf_attr) + struct hns_roce_qp *hr_qp, bool dca_en, + u8 page_shift, struct hns_roce_buf_attr *buf_attr) { unsigned int page_size = BIT(page_shift); int buf_size; @@ -653,6 +667,13 @@ static int set_wqe_buf_attr(struct hns_roce_dev *hr_dev, if (page_shift > PAGE_SHIFT || page_shift < HNS_HW_PAGE_SHIFT) return -EOPNOTSUPP;
+ /* + * When enable DCA, there's no need to alloc buffer now, and + * the page shift should be fixed to 4K. + */ + if (dca_en && page_shift != HNS_HW_PAGE_SHIFT) + return -EOPNOTSUPP; + /* SQ WQE */ hr_qp->sq.offset = 0; buf_size = ALIGN(hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift, page_size); @@ -686,6 +707,7 @@ static int set_wqe_buf_attr(struct hns_roce_dev *hr_dev, if (hr_qp->buff_size < 1) return -EINVAL;
+ buf_attr->mtt_only = dca_en; buf_attr->region_count = idx; buf_attr->page_shift = page_shift;
@@ -741,7 +763,48 @@ static int hns_roce_qp_has_rq(struct ib_qp_init_attr *attr) return 1; }
-static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, +static int alloc_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, + bool dca_en, struct hns_roce_buf_attr *buf_attr, + struct ib_udata *udata, unsigned long addr) +{ + struct ib_device *ibdev = &hr_dev->ib_dev; + int ret; + + if (dca_en) { + /* DCA must be enabled after the buffer size is configured. */ + hns_roce_enable_dca(hr_dev, hr_qp); + hr_qp->en_flags |= HNS_ROCE_QP_CAP_DCA; + } else { + /* + * Because DCA and DWQE share the same fileds in RCWQE buffer, + * so DWQE only supported when DCA is disable. + */ + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_DIRECT_WQE) + hr_qp->en_flags |= HNS_ROCE_QP_CAP_DIRECT_WQE; + } + + ret = hns_roce_mtr_create(hr_dev, &hr_qp->mtr, buf_attr, + PAGE_SHIFT + hr_dev->caps.mtt_ba_pg_sz, + udata, addr); + if (ret) { + ibdev_err(ibdev, "failed to create WQE mtr, ret = %d.\n", ret); + if (dca_en) + hns_roce_disable_dca(hr_dev, hr_qp); + } + + return ret; +} + +static void free_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, + struct ib_udata *udata) +{ + hns_roce_mtr_destroy(hr_dev, &hr_qp->mtr); + + if (hr_qp->en_flags & HNS_ROCE_QP_CAP_DCA) + hns_roce_disable_dca(hr_dev, hr_qp); +} + +static int alloc_qp_wqe(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct ib_qp_init_attr *init_attr, struct ib_udata *udata, struct hns_roce_ib_create_qp *ucmd) @@ -751,37 +814,31 @@ static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_buf_attr buf_attr = {}; u8 page_shift = HNS_HW_PAGE_SHIFT; + bool dca_en; int ret;
if (uctx && (uctx->config & HNS_ROCE_UCTX_DYN_QP_PGSZ)) page_shift = ucmd->pageshift;
- ret = set_wqe_buf_attr(hr_dev, hr_qp, page_shift, &buf_attr); + dca_en = check_dca_is_enable(hr_dev, !!udata, ucmd->buf_addr); + ret = set_wqe_buf_attr(hr_dev, hr_qp, dca_en, page_shift, &buf_attr); if (ret) { ibdev_err(ibdev, "failed to split WQE buf, ret = %d.\n", ret); - goto err_inline; - } - ret = hns_roce_mtr_create(hr_dev, &hr_qp->mtr, &buf_attr, - PAGE_SHIFT + hr_dev->caps.mtt_ba_pg_sz, - udata, ucmd->buf_addr); - if (ret) { - ibdev_err(ibdev, "failed to create WQE mtr, ret = %d.\n", ret); - goto err_inline; + return ret; }
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_DIRECT_WQE) - hr_qp->en_flags |= HNS_ROCE_QP_CAP_DIRECT_WQE; - - return 0; - -err_inline: + ret = alloc_wqe_buf(hr_dev, hr_qp, dca_en, + &buf_attr, udata, ucmd->buf_addr); + if (ret) + ibdev_err(ibdev, "failed to alloc WQE buf, ret = %d.\n", ret);
return ret; }
-static void free_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) +static void free_qp_wqe(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, + struct ib_udata *udata) { - hns_roce_mtr_destroy(hr_dev, &hr_qp->mtr); + free_wqe_buf(hr_dev, hr_qp, udata); }
static inline bool user_qp_has_sdb(struct hns_roce_dev *hr_dev, @@ -1159,18 +1216,18 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, } }
- ret = alloc_qp_buf(hr_dev, hr_qp, init_attr, udata, &ucmd); - if (ret) { - ibdev_err(ibdev, "failed to alloc QP buffer, ret = %d.\n", ret); - goto err_buf; - } - ret = alloc_qpn(hr_dev, hr_qp, init_attr); if (ret) { ibdev_err(ibdev, "failed to alloc QPN, ret = %d.\n", ret); goto err_qpn; }
+ ret = alloc_qp_wqe(hr_dev, hr_qp, init_attr, udata, &ucmd); + if (ret) { + ibdev_err(ibdev, "failed to alloc QP buffer, ret = %d.\n", ret); + goto err_buf; + } + ret = alloc_qp_db(hr_dev, hr_qp, init_attr, udata, &ucmd, &resp); if (ret) { ibdev_err(ibdev, "failed to alloc QP doorbell, ret = %d.\n", @@ -1221,10 +1278,10 @@ err_store: err_qpc: free_qp_db(hr_dev, hr_qp, udata); err_db: + free_qp_wqe(hr_dev, hr_qp, udata); +err_buf: free_qpn(hr_dev, hr_qp); err_qpn: - free_qp_buf(hr_dev, hr_qp); -err_buf: free_kernel_wrid(hr_qp); return ret; } @@ -1238,7 +1295,7 @@ void hns_roce_qp_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
free_qpc(hr_dev, hr_qp); free_qpn(hr_dev, hr_qp); - free_qp_buf(hr_dev, hr_qp); + free_qp_wqe(hr_dev, hr_qp, udata); free_kernel_wrid(hr_qp); free_qp_db(hr_dev, hr_qp, udata); } diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h index aa3803c9d..b5954ba52 100644 --- a/include/uapi/rdma/hns-abi.h +++ b/include/uapi/rdma/hns-abi.h @@ -102,6 +102,7 @@ enum hns_roce_qp_cap_flags { HNS_ROCE_QP_CAP_RQ_RECORD_DB = 1 << 0, HNS_ROCE_QP_CAP_SQ_RECORD_DB = 1 << 1, HNS_ROCE_QP_CAP_OWNER_DB = 1 << 2, + HNS_ROCE_QP_CAP_DCA = 1 << 4, HNS_ROCE_QP_CAP_DIRECT_WQE = 1 << 5, };
driver inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9CK0O
--------------------------------------------------------------------------
If a uQP works as DCA mode, the userspace driver need config the WQE buffer by calling the 'HNS_IB_METHOD_DCA_MEM_ATTACH' method before filling the WQE. This method will allocate a group of pages from DCA memory pool and write the configuration of addressing to QPC.
Signed-off-by: Chengchang Tang tangchengchang@huawei.com Reviewed-by: Yangyang Li liyangyang20@huawei.com Reviewed-by: YueHaibing yuehaibing@huawei.com Signed-off-by: Juan Zhou zhoujuan51@h-partners.com --- drivers/infiniband/hw/hns/hns_roce_dca.c | 466 +++++++++++++++++++- drivers/infiniband/hw/hns/hns_roce_dca.h | 25 ++ drivers/infiniband/hw/hns/hns_roce_device.h | 13 + drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 15 +- include/uapi/rdma/hns-abi.h | 11 + 5 files changed, 523 insertions(+), 7 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_dca.c b/drivers/infiniband/hw/hns/hns_roce_dca.c index 72aa86f29..9099c2e0c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_dca.c +++ b/drivers/infiniband/hw/hns/hns_roce_dca.c @@ -35,11 +35,53 @@ struct dca_mem_attr { u32 size; };
+static inline void set_dca_page_to_free(struct hns_dca_page_state *state) +{ + state->buf_id = HNS_DCA_INVALID_BUF_ID; + state->active = 0; + state->lock = 0; +} + +static inline void lock_dca_page_to_attach(struct hns_dca_page_state *state, + u32 buf_id) +{ + state->buf_id = HNS_DCA_ID_MASK & buf_id; + state->active = 0; + state->lock = 1; +} + +static inline void unlock_dca_page_to_active(struct hns_dca_page_state *state, + u32 buf_id) +{ + state->buf_id = HNS_DCA_ID_MASK & buf_id; + state->active = 1; + state->lock = 0; +} + static inline bool dca_page_is_free(struct hns_dca_page_state *state) { return state->buf_id == HNS_DCA_INVALID_BUF_ID; }
+static inline bool dca_page_is_attached(struct hns_dca_page_state *state, + u32 buf_id) +{ + /* only the own bit needs to be matched. */ + return (HNS_DCA_OWN_MASK & buf_id) == + (HNS_DCA_OWN_MASK & state->buf_id); +} + +static inline bool dca_page_is_allocated(struct hns_dca_page_state *state, + u32 buf_id) +{ + return dca_page_is_attached(state, buf_id) && state->lock; +} + +static inline bool dca_page_is_inactive(struct hns_dca_page_state *state) +{ + return !state->lock && !state->active; +} + static inline bool dca_mem_is_available(struct dca_mem *mem) { return mem->flags == (DCA_MEM_FLAGS_ALLOCED | DCA_MEM_FLAGS_REGISTERED); @@ -354,11 +396,366 @@ static void free_dca_mem(struct dca_mem *mem) spin_unlock(&mem->lock); }
+static inline struct hns_roce_dca_ctx *hr_qp_to_dca_ctx(struct hns_roce_qp *qp) +{ + return to_hr_dca_ctx(to_hr_ucontext(qp->ibqp.pd->uobject->context)); +} + +struct dca_page_clear_attr { + u32 buf_id; + u32 max_pages; + u32 clear_pages; +}; + +static int clear_dca_pages_proc(struct dca_mem *mem, int index, void *param) +{ + struct hns_dca_page_state *state = &mem->states[index]; + struct dca_page_clear_attr *attr = param; + + if (dca_page_is_attached(state, attr->buf_id)) { + set_dca_page_to_free(state); + attr->clear_pages++; + } + + if (attr->clear_pages >= attr->max_pages) + return DCA_MEM_STOP_ITERATE; + else + return 0; +} + +static void clear_dca_pages(struct hns_roce_dca_ctx *ctx, u32 buf_id, u32 count) +{ + struct dca_page_clear_attr attr = {}; + + attr.buf_id = buf_id; + attr.max_pages = count; + travel_dca_pages(ctx, &attr, clear_dca_pages_proc); +} + +struct dca_page_assign_attr { + u32 buf_id; + int unit; + int total; + int max; +}; + +static bool dca_page_is_allocable(struct hns_dca_page_state *state, bool head) +{ + bool is_free = dca_page_is_free(state) || dca_page_is_inactive(state); + + return head ? is_free : is_free && !state->head; +} + +static int assign_dca_pages_proc(struct dca_mem *mem, int index, void *param) +{ + struct dca_page_assign_attr *attr = param; + struct hns_dca_page_state *state; + int checked_pages = 0; + int start_index = 0; + int free_pages = 0; + int i; + + /* Check the continuous pages count is not smaller than unit count */ + for (i = index; free_pages < attr->unit && i < mem->page_count; i++) { + checked_pages++; + state = &mem->states[i]; + if (dca_page_is_allocable(state, free_pages == 0)) { + if (free_pages == 0) + start_index = i; + + free_pages++; + } else { + free_pages = 0; + } + } + + if (free_pages < attr->unit) + return DCA_MEM_NEXT_ITERATE; + + for (i = 0; i < free_pages; i++) { + state = &mem->states[start_index + i]; + lock_dca_page_to_attach(state, attr->buf_id); + attr->total++; + } + + if (attr->total >= attr->max) + return DCA_MEM_STOP_ITERATE; + + return checked_pages; +} + +static u32 assign_dca_pages(struct hns_roce_dca_ctx *ctx, u32 buf_id, u32 count, + u32 unit) +{ + struct dca_page_assign_attr attr = {}; + + attr.buf_id = buf_id; + attr.unit = unit; + attr.max = count; + travel_dca_pages(ctx, &attr, assign_dca_pages_proc); + return attr.total; +} + +struct dca_page_active_attr { + u32 buf_id; + u32 max_pages; + u32 alloc_pages; + u32 dirty_mems; +}; + +static int active_dca_pages_proc(struct dca_mem *mem, int index, void *param) +{ + struct dca_page_active_attr *attr = param; + struct hns_dca_page_state *state; + bool changed = false; + bool stop = false; + int i, free_pages; + + free_pages = 0; + for (i = 0; !stop && i < mem->page_count; i++) { + state = &mem->states[i]; + if (dca_page_is_free(state)) { + free_pages++; + } else if (dca_page_is_allocated(state, attr->buf_id)) { + free_pages++; + /* Change matched pages state */ + unlock_dca_page_to_active(state, attr->buf_id); + changed = true; + attr->alloc_pages++; + if (attr->alloc_pages == attr->max_pages) + stop = true; + } + } + + for (; changed && i < mem->page_count; i++) + if (dca_page_is_free(state)) + free_pages++; + + /* Clean mem changed to dirty */ + if (changed && free_pages == mem->page_count) + attr->dirty_mems++; + + return stop ? DCA_MEM_STOP_ITERATE : DCA_MEM_NEXT_ITERATE; +} + +static u32 active_dca_pages(struct hns_roce_dca_ctx *ctx, u32 buf_id, u32 count) +{ + struct dca_page_active_attr attr = {}; + unsigned long flags; + + attr.buf_id = buf_id; + attr.max_pages = count; + travel_dca_pages(ctx, &attr, active_dca_pages_proc); + + /* Update free size */ + spin_lock_irqsave(&ctx->pool_lock, flags); + ctx->free_mems -= attr.dirty_mems; + ctx->free_size -= attr.alloc_pages << HNS_HW_PAGE_SHIFT; + spin_unlock_irqrestore(&ctx->pool_lock, flags); + + return attr.alloc_pages; +} + +struct dca_get_alloced_pages_attr { + u32 buf_id; + dma_addr_t *pages; + u32 total; + u32 max; +}; + +static int get_alloced_umem_proc(struct dca_mem *mem, int index, void *param) + +{ + struct dca_get_alloced_pages_attr *attr = param; + struct hns_dca_page_state *states = mem->states; + struct ib_umem *umem = mem->pages; + struct ib_block_iter biter; + u32 i = 0; + + rdma_for_each_block(umem->sgt_append.sgt.sgl, &biter, + umem->sgt_append.sgt.nents, HNS_HW_PAGE_SIZE) { + if (dca_page_is_allocated(&states[i], attr->buf_id)) { + attr->pages[attr->total++] = + rdma_block_iter_dma_address(&biter); + if (attr->total >= attr->max) + return DCA_MEM_STOP_ITERATE; + } + i++; + } + + return DCA_MEM_NEXT_ITERATE; +} + +static int apply_dca_cfg(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, + struct hns_dca_attach_attr *attach_attr) +{ + struct hns_roce_dca_attr attr; + + if (hr_dev->hw->set_dca_buf) { + attr.sq_offset = attach_attr->sq_offset; + attr.sge_offset = attach_attr->sge_offset; + attr.rq_offset = attach_attr->rq_offset; + return hr_dev->hw->set_dca_buf(hr_dev, hr_qp, &attr); + } + + return 0; +} + +static int setup_dca_buf_to_hw(struct hns_roce_dca_ctx *ctx, + struct hns_roce_qp *hr_qp, u32 buf_id, + struct hns_dca_attach_attr *attach_attr) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(hr_qp->ibqp.device); + struct dca_get_alloced_pages_attr attr = {}; + struct ib_device *ibdev = &hr_dev->ib_dev; + u32 count = hr_qp->dca_cfg.npages; + dma_addr_t *pages; + int ret; + + /* Alloc a tmp array to store buffer's dma address */ + pages = kvcalloc(count, sizeof(dma_addr_t), GFP_NOWAIT); + if (!pages) + return -ENOMEM; + + attr.buf_id = buf_id; + attr.pages = pages; + attr.max = count; + + travel_dca_pages(ctx, &attr, get_alloced_umem_proc); + if (attr.total != count) { + ibdev_err(ibdev, "failed to get DCA page %u != %u.\n", + attr.total, count); + ret = -ENOMEM; + goto done; + } + + /* Update MTT for ROCEE addressing */ + ret = hns_roce_mtr_map(hr_dev, &hr_qp->mtr, pages, count); + if (ret) { + ibdev_err(ibdev, "failed to map DCA pages, ret = %d.\n", ret); + goto done; + } + + /* Apply the changes for WQE address */ + ret = apply_dca_cfg(hr_dev, hr_qp, attach_attr); + if (ret) + ibdev_err(ibdev, "failed to apply DCA cfg, ret = %d.\n", ret); + +done: + /* Drop tmp array */ + kvfree(pages); + return ret; +} + +static u32 alloc_buf_from_dca_mem(struct hns_roce_qp *hr_qp, + struct hns_roce_dca_ctx *ctx) +{ + u32 buf_pages, unit_pages, alloc_pages; + u32 buf_id; + + buf_pages = hr_qp->dca_cfg.npages; + /* Gen new buf id */ + buf_id = HNS_DCA_TO_BUF_ID(hr_qp->qpn, hr_qp->dca_cfg.attach_count); + + /* Assign pages from free pages */ + unit_pages = hr_qp->mtr.hem_cfg.is_direct ? buf_pages : 1; + alloc_pages = assign_dca_pages(ctx, buf_id, buf_pages, unit_pages); + if (buf_pages != alloc_pages) { + if (alloc_pages > 0) + clear_dca_pages(ctx, buf_id, alloc_pages); + return HNS_DCA_INVALID_BUF_ID; + } + + return buf_id; +} + +static int active_alloced_buf(struct hns_roce_qp *hr_qp, + struct hns_roce_dca_ctx *ctx, + struct hns_dca_attach_attr *attr, u32 buf_id) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(hr_qp->ibqp.device); + struct ib_device *ibdev = &hr_dev->ib_dev; + u32 active_pages, alloc_pages; + int ret; + + ret = setup_dca_buf_to_hw(ctx, hr_qp, buf_id, attr); + if (ret) { + ibdev_err(ibdev, "failed to setup DCA buf, ret = %d.\n", ret); + goto active_fail; + } + + alloc_pages = hr_qp->dca_cfg.npages; + active_pages = active_dca_pages(ctx, buf_id, alloc_pages); + if (active_pages != alloc_pages) { + ibdev_err(ibdev, "failed to active DCA pages, %u != %u.\n", + active_pages, alloc_pages); + ret = -ENOBUFS; + goto active_fail; + } + + return 0; + +active_fail: + clear_dca_pages(ctx, buf_id, alloc_pages); + return ret; +} + +static int attach_dca_mem(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp, + struct hns_dca_attach_attr *attr, + struct hns_dca_attach_resp *resp) +{ + struct hns_roce_dca_ctx *ctx = hr_qp_to_dca_ctx(hr_qp); + struct hns_roce_dca_cfg *cfg = &hr_qp->dca_cfg; + u32 buf_id; + int ret; + + resp->alloc_flags = 0; + spin_lock(&cfg->lock); + buf_id = cfg->buf_id; + /* Already attached */ + if (buf_id != HNS_DCA_INVALID_BUF_ID) { + resp->alloc_pages = cfg->npages; + spin_unlock(&cfg->lock); + return 0; + } + + /* Start to new attach */ + resp->alloc_pages = 0; + buf_id = alloc_buf_from_dca_mem(hr_qp, ctx); + if (buf_id == HNS_DCA_INVALID_BUF_ID) { + spin_unlock(&cfg->lock); + /* No report fail, need try again after the pool increased */ + return 0; + } + + ret = active_alloced_buf(hr_qp, ctx, attr, buf_id); + if (ret) { + spin_unlock(&cfg->lock); + ibdev_err(&hr_dev->ib_dev, + "failed to active DCA buf for QP-%lu, ret = %d.\n", + hr_qp->qpn, ret); + return ret; + } + + /* Attach ok */ + cfg->buf_id = buf_id; + cfg->attach_count++; + spin_unlock(&cfg->lock); + + resp->alloc_flags |= HNS_IB_ATTACH_FLAGS_NEW_BUFFER; + resp->alloc_pages = cfg->npages; + + return 0; +} + void hns_roce_enable_dca(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { struct hns_roce_dca_cfg *cfg = &hr_qp->dca_cfg;
+ spin_lock_init(&cfg->lock); cfg->buf_id = HNS_DCA_INVALID_BUF_ID; + cfg->npages = hr_qp->buff_size >> HNS_HW_PAGE_SHIFT; }
void hns_roce_disable_dca(struct hns_roce_dev *hr_dev, @@ -493,11 +890,78 @@ DECLARE_UVERBS_NAMED_METHOD( UVERBS_ATTR_TYPE(u64), UA_MANDATORY), UVERBS_ATTR_PTR_OUT(HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_MEMS, UVERBS_ATTR_TYPE(u32), UA_MANDATORY)); + +static inline struct hns_roce_qp * +uverbs_attr_to_hr_qp(struct uverbs_attr_bundle *attrs) +{ + struct ib_uobject *uobj = + uverbs_attr_get_uobject(attrs, 1U << UVERBS_ID_NS_SHIFT); + + if (uobj_get_object_id(uobj) == UVERBS_OBJECT_QP) + return to_hr_qp(uobj->object); + + return NULL; +} + +static int UVERBS_HANDLER(HNS_IB_METHOD_DCA_MEM_ATTACH)( + struct uverbs_attr_bundle *attrs) +{ + struct hns_roce_qp *hr_qp = uverbs_attr_to_hr_qp(attrs); + struct hns_dca_attach_attr attr = {}; + struct hns_dca_attach_resp resp = {}; + int ret; + + if (!hr_qp) + return -EINVAL; + + ret = uverbs_copy_from(&attr.sq_offset, attrs, + HNS_IB_ATTR_DCA_MEM_ATTACH_SQ_OFFSET); + if (!ret) + ret = uverbs_copy_from(&attr.sge_offset, attrs, + HNS_IB_ATTR_DCA_MEM_ATTACH_SGE_OFFSET); + if (!ret) + ret = uverbs_copy_from(&attr.rq_offset, attrs, + HNS_IB_ATTR_DCA_MEM_ATTACH_RQ_OFFSET); + if (ret) + return ret; + + ret = attach_dca_mem(to_hr_dev(hr_qp->ibqp.device), hr_qp, &attr, + &resp); + if (ret) + return ret; + + ret = uverbs_copy_to(attrs, HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_FLAGS, + &resp.alloc_flags, sizeof(resp.alloc_flags)); + if (!ret) + ret = uverbs_copy_to(attrs, + HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_PAGES, + &resp.alloc_pages, + sizeof(resp.alloc_pages)); + + return ret; +} + +DECLARE_UVERBS_NAMED_METHOD( + HNS_IB_METHOD_DCA_MEM_ATTACH, + UVERBS_ATTR_IDR(HNS_IB_ATTR_DCA_MEM_ATTACH_HANDLE, UVERBS_OBJECT_QP, + UVERBS_ACCESS_WRITE, UA_MANDATORY), + UVERBS_ATTR_PTR_IN(HNS_IB_ATTR_DCA_MEM_ATTACH_SQ_OFFSET, + UVERBS_ATTR_TYPE(u32), UA_MANDATORY), + UVERBS_ATTR_PTR_IN(HNS_IB_ATTR_DCA_MEM_ATTACH_SGE_OFFSET, + UVERBS_ATTR_TYPE(u32), UA_MANDATORY), + UVERBS_ATTR_PTR_IN(HNS_IB_ATTR_DCA_MEM_ATTACH_RQ_OFFSET, + UVERBS_ATTR_TYPE(u32), UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_FLAGS, + UVERBS_ATTR_TYPE(u32), UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_PAGES, + UVERBS_ATTR_TYPE(u32), UA_MANDATORY)); + DECLARE_UVERBS_NAMED_OBJECT(HNS_IB_OBJECT_DCA_MEM, UVERBS_TYPE_ALLOC_IDR(dca_cleanup), &UVERBS_METHOD(HNS_IB_METHOD_DCA_MEM_REG), &UVERBS_METHOD(HNS_IB_METHOD_DCA_MEM_DEREG), - &UVERBS_METHOD(HNS_IB_METHOD_DCA_MEM_SHRINK)); + &UVERBS_METHOD(HNS_IB_METHOD_DCA_MEM_SHRINK), + &UVERBS_METHOD(HNS_IB_METHOD_DCA_MEM_ATTACH));
static bool dca_is_supported(struct ib_device *device) { diff --git a/drivers/infiniband/hw/hns/hns_roce_dca.h b/drivers/infiniband/hw/hns/hns_roce_dca.h index c930b4616..f9eea9beb 100644 --- a/drivers/infiniband/hw/hns/hns_roce_dca.h +++ b/drivers/infiniband/hw/hns/hns_roce_dca.h @@ -25,6 +25,31 @@ struct hns_dca_shrink_resp {
#define HNS_DCA_INVALID_BUF_ID 0UL
+/* + * buffer id(29b) = tag(7b) + owner(22b) + * [28:22] tag : indicate the QP config update times. + * [21: 0] owner: indicate the QP to which the page belongs. + */ +#define HNS_DCA_ID_MASK GENMASK(28, 0) +#define HNS_DCA_TAG_MASK GENMASK(28, 22) +#define HNS_DCA_OWN_MASK GENMASK(21, 0) + +#define HNS_DCA_BUF_ID_TO_TAG(buf_id) (((buf_id) & HNS_DCA_TAG_MASK) >> 22) +#define HNS_DCA_BUF_ID_TO_QPN(buf_id) ((buf_id) & HNS_DCA_OWN_MASK) +#define HNS_DCA_TO_BUF_ID(qpn, tag) (((qpn) & HNS_DCA_OWN_MASK) | \ + (((tag) << 22) & HNS_DCA_TAG_MASK)) + +struct hns_dca_attach_attr { + u32 sq_offset; + u32 sge_offset; + u32 rq_offset; +}; + +struct hns_dca_attach_resp { + u32 alloc_flags; + u32 alloc_pages; +}; + void hns_roce_register_udca(struct hns_roce_dev *hr_dev, struct hns_roce_ucontext *uctx); void hns_roce_unregister_udca(struct hns_roce_dev *hr_dev, diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index d9bb48d35..85f414088 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -325,7 +325,17 @@ struct hns_roce_mtr { };
struct hns_roce_dca_cfg { + spinlock_t lock; u32 buf_id; + u16 attach_count; + u32 npages; +}; + +/* DCA attr for setting WQE buffer */ +struct hns_roce_dca_attr { + u32 sq_offset; + u32 sge_offset; + u32 rq_offset; };
struct hns_roce_mw { @@ -967,6 +977,9 @@ struct hns_roce_hw { int (*clear_hem)(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table, int obj, u32 step_idx); + int (*set_dca_buf)(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp, + struct hns_roce_dca_attr *attr); int (*modify_qp)(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, enum ib_qp_state cur_state, enum ib_qp_state new_state, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 246f75224..b899937c5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -4491,12 +4491,15 @@ static int config_qp_rq_buf(struct hns_roce_dev *hr_dev, upper_32_bits(to_hr_hw_page_addr(mtts[0]))); hr_reg_clear(qpc_mask, QPC_RQ_CUR_BLK_ADDR_H);
- context->rq_nxt_blk_addr = cpu_to_le32(to_hr_hw_page_addr(mtts[1])); - qpc_mask->rq_nxt_blk_addr = 0; - - hr_reg_write(context, QPC_RQ_NXT_BLK_ADDR_H, - upper_32_bits(to_hr_hw_page_addr(mtts[1]))); - hr_reg_clear(qpc_mask, QPC_RQ_NXT_BLK_ADDR_H); + /* The rq next block address is only valid for HIP08 QPC. */ + if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) { + context->rq_nxt_blk_addr = + cpu_to_le32(to_hr_hw_page_addr(mtts[1])); + qpc_mask->rq_nxt_blk_addr = 0; + hr_reg_write(context, QPC_RQ_NXT_BLK_ADDR_H, + upper_32_bits(to_hr_hw_page_addr(mtts[1]))); + hr_reg_clear(qpc_mask, QPC_RQ_NXT_BLK_ADDR_H); + }
return 0; } diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h index b5954ba52..3a36552fb 100644 --- a/include/uapi/rdma/hns-abi.h +++ b/include/uapi/rdma/hns-abi.h @@ -172,6 +172,7 @@ enum hns_ib_dca_mem_methods { HNS_IB_METHOD_DCA_MEM_REG = (1U << UVERBS_ID_NS_SHIFT), HNS_IB_METHOD_DCA_MEM_DEREG, HNS_IB_METHOD_DCA_MEM_SHRINK, + HNS_IB_METHOD_DCA_MEM_ATTACH, };
enum hns_ib_dca_mem_reg_attrs { @@ -192,4 +193,14 @@ enum hns_ib_dca_mem_shrink_attrs { HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_MEMS, };
+#define HNS_IB_ATTACH_FLAGS_NEW_BUFFER 1U + +enum hns_ib_dca_mem_attach_attrs { + HNS_IB_ATTR_DCA_MEM_ATTACH_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + HNS_IB_ATTR_DCA_MEM_ATTACH_SQ_OFFSET, + HNS_IB_ATTR_DCA_MEM_ATTACH_SGE_OFFSET, + HNS_IB_ATTR_DCA_MEM_ATTACH_RQ_OFFSET, + HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_FLAGS, + HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_PAGES, +}; #endif /* HNS_ABI_USER_H */
driver inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9CK0O
--------------------------------------------------------------------------
Add a new command to update the configuration of WQE buffer addressing to QPC in DCA mode.
Signed-off-by: Chengchang Tang tangchengchang@huawei.com --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 150 +++++++++++++++++++-- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 1 + 2 files changed, 138 insertions(+), 13 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index b899937c5..f8683a4a6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -3049,6 +3049,16 @@ static void hns_roce_v2_exit(struct hns_roce_dev *hr_dev) free_dip_list(hr_dev); }
+static inline void mbox_desc_init(struct hns_roce_post_mbox *mb, + struct hns_roce_mbox_msg *mbox_msg) +{ + mb->in_param_l = cpu_to_le32(mbox_msg->in_param); + mb->in_param_h = cpu_to_le32(mbox_msg->in_param >> 32); + mb->out_param_l = cpu_to_le32(mbox_msg->out_param); + mb->out_param_h = cpu_to_le32(mbox_msg->out_param >> 32); + mb->cmd_tag = cpu_to_le32(mbox_msg->tag << 8 | mbox_msg->cmd); +} + static int hns_roce_mbox_post(struct hns_roce_dev *hr_dev, struct hns_roce_mbox_msg *mbox_msg) { @@ -3057,17 +3067,34 @@ static int hns_roce_mbox_post(struct hns_roce_dev *hr_dev,
hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_POST_MB, false);
- mb->in_param_l = cpu_to_le32(mbox_msg->in_param); - mb->in_param_h = cpu_to_le32(mbox_msg->in_param >> 32); - mb->out_param_l = cpu_to_le32(mbox_msg->out_param); - mb->out_param_h = cpu_to_le32(mbox_msg->out_param >> 32); - mb->cmd_tag = cpu_to_le32(mbox_msg->tag << 8 | mbox_msg->cmd); + mbox_desc_init(mb, mbox_msg); mb->token_event_en = cpu_to_le32(mbox_msg->event_en << 16 | mbox_msg->token);
return hns_roce_cmq_send(hr_dev, &desc, 1); }
+static int hns_roce_mbox_send(struct hns_roce_dev *hr_dev, + struct hns_roce_mbox_msg *mbox_msg) +{ + struct hns_roce_cmq_desc desc; + struct hns_roce_post_mbox *mb = (struct hns_roce_post_mbox *)desc.data; + + hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_SYNC_MB, false); + + mbox_desc_init(mb, mbox_msg); + + /* The hardware doesn't care about the token fields when working in + * sync mode. + */ + mb->token_event_en = 0; + + /* The cmdq send returns 0 indicates that the hardware has already + * finished the operation defined in this mbox. + */ + return hns_roce_cmq_send(hr_dev, &desc, 1); +} + static int v2_wait_mbox_complete(struct hns_roce_dev *hr_dev, u32 timeout, u8 *complete_status) { @@ -4430,15 +4457,16 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp, static int config_qp_rq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct hns_roce_v2_qp_context *context, - struct hns_roce_v2_qp_context *qpc_mask) + struct hns_roce_v2_qp_context *qpc_mask, + struct hns_roce_dca_attr *dca_attr) { u64 mtts[MTT_MIN_COUNT] = { 0 }; u64 wqe_sge_ba; int ret;
/* Search qp buf's mtts */ - ret = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, hr_qp->rq.offset, mtts, - MTT_MIN_COUNT); + ret = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, dca_attr->rq_offset, mtts, + ARRAY_SIZE(mtts)); if (hr_qp->rq.wqe_cnt && ret) { ibdev_err(&hr_dev->ib_dev, "failed to find QP(0x%lx) RQ WQE buf, ret = %d.\n", @@ -4507,7 +4535,8 @@ static int config_qp_rq_buf(struct hns_roce_dev *hr_dev, static int config_qp_sq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct hns_roce_v2_qp_context *context, - struct hns_roce_v2_qp_context *qpc_mask) + struct hns_roce_v2_qp_context *qpc_mask, + struct hns_roce_dca_attr *dca_attr) { struct ib_device *ibdev = &hr_dev->ib_dev; u64 sge_cur_blk = 0; @@ -4515,7 +4544,7 @@ static int config_qp_sq_buf(struct hns_roce_dev *hr_dev, int ret;
/* search qp buf's mtts */ - ret = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, hr_qp->sq.offset, + ret = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, dca_attr->sq_offset, &sq_cur_blk, 1); if (ret) { ibdev_err(ibdev, "failed to find QP(0x%lx) SQ WQE buf, ret = %d.\n", @@ -4524,7 +4553,7 @@ static int config_qp_sq_buf(struct hns_roce_dev *hr_dev, } if (hr_qp->sge.sge_cnt > 0) { ret = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, - hr_qp->sge.offset, &sge_cur_blk, 1); + dca_attr->sge_offset, &sge_cur_blk, 1); if (ret) { ibdev_err(ibdev, "failed to find QP(0x%lx) SGE buf, ret = %d.\n", hr_qp->qpn, ret); @@ -4582,6 +4611,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); struct ib_device *ibdev = &hr_dev->ib_dev; + struct hns_roce_dca_attr dca_attr = {}; dma_addr_t trrl_ba; dma_addr_t irrl_ba; enum ib_mtu ib_mtu; @@ -4593,7 +4623,8 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, int mtu; int ret;
- ret = config_qp_rq_buf(hr_dev, hr_qp, context, qpc_mask); + dca_attr.rq_offset = hr_qp->rq.offset; + ret = config_qp_rq_buf(hr_dev, hr_qp, context, qpc_mask, &dca_attr); if (ret) { ibdev_err(ibdev, "failed to config rq buf, ret = %d.\n", ret); return ret; @@ -4737,6 +4768,7 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); struct ib_device *ibdev = &hr_dev->ib_dev; + struct hns_roce_dca_attr dca_attr = {}; int ret;
/* Not support alternate path and path migration */ @@ -4745,7 +4777,9 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, return -EINVAL; }
- ret = config_qp_sq_buf(hr_dev, hr_qp, context, qpc_mask); + dca_attr.sq_offset = hr_qp->sq.offset; + dca_attr.sge_offset = hr_qp->sge.offset; + ret = config_qp_sq_buf(hr_dev, hr_qp, context, qpc_mask, &dca_attr); if (ret) { ibdev_err(ibdev, "failed to config sq buf, ret = %d.\n", ret); return ret; @@ -5404,6 +5438,95 @@ out: return ret; }
+static int init_dca_buf_attr(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp, + struct hns_roce_dca_attr *init_attr, + struct hns_roce_dca_attr *dca_attr) +{ + struct ib_device *ibdev = &hr_dev->ib_dev; + + if (hr_qp->sq.wqe_cnt > 0) { + dca_attr->sq_offset = hr_qp->sq.offset + init_attr->sq_offset; + if (dca_attr->sq_offset >= hr_qp->sge.offset) { + ibdev_err(ibdev, "failed to check SQ offset = %u\n", + init_attr->sq_offset); + return -EINVAL; + } + } + + if (hr_qp->sge.sge_cnt > 0) { + dca_attr->sge_offset = hr_qp->sge.offset + init_attr->sge_offset; + if (dca_attr->sge_offset >= hr_qp->rq.offset) { + ibdev_err(ibdev, "failed to check exSGE offset = %u\n", + init_attr->sge_offset); + return -EINVAL; + } + } + + if (hr_qp->rq.wqe_cnt > 0) { + dca_attr->rq_offset = hr_qp->rq.offset + init_attr->rq_offset; + if (dca_attr->rq_offset >= hr_qp->buff_size) { + ibdev_err(ibdev, "failed to check RQ offset = %u\n", + init_attr->rq_offset); + return -EINVAL; + } + } + + return 0; +} + +static int hns_roce_v2_set_dca_buf(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp, + struct hns_roce_dca_attr *init_attr) +{ + struct ib_device *ibdev = &hr_dev->ib_dev; + struct hns_roce_v2_qp_context *qpc, *msk; + struct hns_roce_dca_attr dca_attr = {}; + struct hns_roce_mbox_msg mbox_msg = {}; + dma_addr_t dma_handle; + int qpc_sz; + int ret; + + ret = init_dca_buf_attr(hr_dev, hr_qp, init_attr, &dca_attr); + if (ret) { + ibdev_err(ibdev, "failed to init DCA attr, ret = %d.\n", ret); + return ret; + } + + qpc_sz = hr_dev->caps.qpc_sz; + WARN_ON(2 * qpc_sz > HNS_ROCE_MAILBOX_SIZE); + qpc = dma_pool_alloc(hr_dev->cmd.pool, GFP_NOWAIT, &dma_handle); + if (!qpc) + return -ENOMEM; + + msk = (struct hns_roce_v2_qp_context *)((void *)qpc + qpc_sz); + memset(msk, 0xff, qpc_sz); + + ret = config_qp_rq_buf(hr_dev, hr_qp, qpc, msk, &dca_attr); + if (ret) { + ibdev_err(ibdev, "failed to config rq qpc, ret = %d.\n", ret); + goto done; + } + + ret = config_qp_sq_buf(hr_dev, hr_qp, qpc, msk, &dca_attr); + if (ret) { + ibdev_err(ibdev, "failed to config sq qpc, ret = %d.\n", ret); + goto done; + } + + mbox_msg.in_param = dma_handle; + mbox_msg.tag = hr_qp->qpn; + mbox_msg.cmd = HNS_ROCE_CMD_MODIFY_QPC; + ret = hns_roce_mbox_send(hr_dev, &mbox_msg); + if (ret) + ibdev_err(ibdev, "failed to modify DCA buf, ret = %d.\n", ret); + +done: + dma_pool_free(hr_dev->cmd.pool, qpc, dma_handle); + + return ret; +} + static int to_ib_qp_st(enum hns_roce_v2_qp_state state) { static const enum ib_qp_state map[] = { @@ -6933,6 +7056,7 @@ static const struct hns_roce_hw hns_roce_hw_v2 = { .write_cqc = hns_roce_v2_write_cqc, .set_hem = hns_roce_v2_set_hem, .clear_hem = hns_roce_v2_clear_hem, + .set_dca_buf = hns_roce_v2_set_dca_buf, .modify_qp = hns_roce_v2_modify_qp, .dereg_mr = hns_roce_v2_dereg_mr, .qp_flow_control_init = hns_roce_v2_qp_flow_control_init, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 2b4a1f788..6ab49dd18 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -224,6 +224,7 @@ enum hns_roce_opcode_type { HNS_ROCE_OPC_QUERY_VF_RES = 0x850e, HNS_ROCE_OPC_CFG_GMV_TBL = 0x850f, HNS_ROCE_OPC_CFG_GMV_BT = 0x8510, + HNS_ROCE_OPC_SYNC_MB = 0x8511, HNS_ROCE_QUERY_RAM_ECC = 0x8513, HNS_SWITCH_PARAMETER_CFG = 0x1033, HNS_ROCE_OPC_SET_BOND_INFO = 0x8601,
driver inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9CK0O
--------------------------------------------------------------------------
If a uQP works in DCA mode, the userspace driver needs to drop the WQE buffer by calling the 'HNS_IB_METHOD_DCA_MEM_DETACH' method when the QP's CI is equal to PI, that means, the hns ROCEE will not access the WQE's buffer at this time, and the userspace driver can free this WQE's buffer.
This method will start an worker queue to recycle the WQE buffer in kernel space, if the WQE buffer is indeed not being accessed by hns ROCEE, the worker will change the pages' state as free in DCA memory pool.
Signed-off-by: Chengchang Tang tangchengchang@huawei.com Reviewed-by: Yangyang Li liyangyang20@huawei.com Reviewed-by: YueHaibing yuehaibing@huawei.com Signed-off-by: Juan Zhou zhoujuan51@h-partners.com --- drivers/infiniband/hw/hns/hns_roce_dca.c | 164 +++++++++++++++++++- drivers/infiniband/hw/hns/hns_roce_dca.h | 7 +- drivers/infiniband/hw/hns/hns_roce_device.h | 4 + drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 50 ++++++ drivers/infiniband/hw/hns/hns_roce_qp.c | 4 +- include/uapi/rdma/hns-abi.h | 6 + 6 files changed, 230 insertions(+), 5 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_dca.c b/drivers/infiniband/hw/hns/hns_roce_dca.c index 9099c2e0c..c26b3dcae 100644 --- a/drivers/infiniband/hw/hns/hns_roce_dca.c +++ b/drivers/infiniband/hw/hns/hns_roce_dca.c @@ -15,6 +15,9 @@ #define UVERBS_MODULE_NAME hns_ib #include <rdma/uverbs_named_ioctl.h>
+/* DCA mem ageing interval time */ +#define DCA_MEM_AGEING_MSES 1000 + /* DCA memory */ struct dca_mem { #define DCA_MEM_FLAGS_ALLOCED BIT(0) @@ -42,6 +45,12 @@ static inline void set_dca_page_to_free(struct hns_dca_page_state *state) state->lock = 0; }
+static inline void set_dca_page_to_inactive(struct hns_dca_page_state *state) +{ + state->active = 0; + state->lock = 0; +} + static inline void lock_dca_page_to_attach(struct hns_dca_page_state *state, u32 buf_id) { @@ -710,7 +719,10 @@ static int attach_dca_mem(struct hns_roce_dev *hr_dev, u32 buf_id; int ret;
+ /* Stop DCA mem ageing worker */ + cancel_delayed_work(&cfg->dwork); resp->alloc_flags = 0; + spin_lock(&cfg->lock); buf_id = cfg->buf_id; /* Already attached */ @@ -749,20 +761,140 @@ static int attach_dca_mem(struct hns_roce_dev *hr_dev, return 0; }
+struct dca_page_free_buf_attr { + u32 buf_id; + u32 max_pages; + u32 free_pages; + u32 clean_mems; +}; + +static int free_buffer_pages_proc(struct dca_mem *mem, int index, void *param) +{ + struct dca_page_free_buf_attr *attr = param; + struct hns_dca_page_state *state; + bool changed = false; + bool stop = false; + int i, free_pages; + + free_pages = 0; + for (i = 0; !stop && i < mem->page_count; i++) { + state = &mem->states[i]; + /* Change matched pages state */ + if (dca_page_is_attached(state, attr->buf_id)) { + set_dca_page_to_free(state); + changed = true; + attr->free_pages++; + if (attr->free_pages == attr->max_pages) + stop = true; + } + + if (dca_page_is_free(state)) + free_pages++; + } + + for (; changed && i < mem->page_count; i++) + if (dca_page_is_free(state)) + free_pages++; + + if (changed && free_pages == mem->page_count) + attr->clean_mems++; + + return stop ? DCA_MEM_STOP_ITERATE : DCA_MEM_NEXT_ITERATE; +} + +static void free_buf_from_dca_mem(struct hns_roce_dca_ctx *ctx, + struct hns_roce_dca_cfg *cfg) +{ + struct dca_page_free_buf_attr attr = {}; + unsigned long flags; + u32 buf_id; + + spin_lock(&cfg->lock); + buf_id = cfg->buf_id; + cfg->buf_id = HNS_DCA_INVALID_BUF_ID; + spin_unlock(&cfg->lock); + if (buf_id == HNS_DCA_INVALID_BUF_ID) + return; + + attr.buf_id = buf_id; + attr.max_pages = cfg->npages; + travel_dca_pages(ctx, &attr, free_buffer_pages_proc); + + /* Update free size */ + spin_lock_irqsave(&ctx->pool_lock, flags); + ctx->free_mems += attr.clean_mems; + ctx->free_size += attr.free_pages << HNS_HW_PAGE_SHIFT; + spin_unlock_irqrestore(&ctx->pool_lock, flags); +} + +static void kick_dca_mem(struct hns_roce_dev *hr_dev, + struct hns_roce_dca_cfg *cfg, + struct hns_roce_ucontext *uctx) +{ + struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(uctx); + + /* Stop ageing worker and free DCA buffer from pool */ + cancel_delayed_work_sync(&cfg->dwork); + free_buf_from_dca_mem(ctx, cfg); +} + +static void dca_mem_ageing_work(struct work_struct *work) +{ + struct hns_roce_qp *hr_qp = container_of(work, struct hns_roce_qp, + dca_cfg.dwork.work); + struct hns_roce_dev *hr_dev = to_hr_dev(hr_qp->ibqp.device); + struct hns_roce_dca_ctx *ctx = hr_qp_to_dca_ctx(hr_qp); + bool hw_is_inactive; + + hw_is_inactive = hr_dev->hw->chk_dca_buf_inactive && + hr_dev->hw->chk_dca_buf_inactive(hr_dev, hr_qp); + if (hw_is_inactive) + free_buf_from_dca_mem(ctx, &hr_qp->dca_cfg); +} + +void hns_roce_dca_kick(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) +{ + struct hns_roce_ucontext *uctx; + + if (hr_qp->ibqp.uobject && hr_qp->ibqp.pd->uobject) { + uctx = to_hr_ucontext(hr_qp->ibqp.pd->uobject->context); + kick_dca_mem(hr_dev, &hr_qp->dca_cfg, uctx); + } +} + +static void detach_dca_mem(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp, + struct hns_dca_detach_attr *attr) +{ + struct hns_roce_dca_cfg *cfg = &hr_qp->dca_cfg; + + /* Start an ageing worker to free buffer */ + cancel_delayed_work(&cfg->dwork); + spin_lock(&cfg->lock); + cfg->sq_idx = attr->sq_idx; + queue_delayed_work(hr_dev->irq_workq, &cfg->dwork, + msecs_to_jiffies(DCA_MEM_AGEING_MSES)); + spin_unlock(&cfg->lock); +} + void hns_roce_enable_dca(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { struct hns_roce_dca_cfg *cfg = &hr_qp->dca_cfg;
spin_lock_init(&cfg->lock); + INIT_DELAYED_WORK(&cfg->dwork, dca_mem_ageing_work); cfg->buf_id = HNS_DCA_INVALID_BUF_ID; cfg->npages = hr_qp->buff_size >> HNS_HW_PAGE_SHIFT; }
void hns_roce_disable_dca(struct hns_roce_dev *hr_dev, - struct hns_roce_qp *hr_qp) + struct hns_roce_qp *hr_qp, struct ib_udata *udata) { + struct hns_roce_ucontext *uctx = rdma_udata_to_drv_context(udata, + struct hns_roce_ucontext, ibucontext); struct hns_roce_dca_cfg *cfg = &hr_qp->dca_cfg;
+ kick_dca_mem(hr_dev, cfg, uctx); cfg->buf_id = HNS_DCA_INVALID_BUF_ID; }
@@ -956,12 +1088,40 @@ DECLARE_UVERBS_NAMED_METHOD( UVERBS_ATTR_PTR_OUT(HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_PAGES, UVERBS_ATTR_TYPE(u32), UA_MANDATORY));
+static int UVERBS_HANDLER(HNS_IB_METHOD_DCA_MEM_DETACH)( + struct uverbs_attr_bundle *attrs) +{ + struct hns_roce_qp *hr_qp = uverbs_attr_to_hr_qp(attrs); + struct hns_dca_detach_attr attr = {}; + int ret; + + if (!hr_qp) + return -EINVAL; + + ret = uverbs_copy_from(&attr.sq_idx, attrs, + HNS_IB_ATTR_DCA_MEM_DETACH_SQ_INDEX); + if (ret) + return ret; + + detach_dca_mem(to_hr_dev(hr_qp->ibqp.device), hr_qp, &attr); + + return 0; +} + +DECLARE_UVERBS_NAMED_METHOD( + HNS_IB_METHOD_DCA_MEM_DETACH, + UVERBS_ATTR_IDR(HNS_IB_ATTR_DCA_MEM_DETACH_HANDLE, UVERBS_OBJECT_QP, + UVERBS_ACCESS_WRITE, UA_MANDATORY), + UVERBS_ATTR_PTR_IN(HNS_IB_ATTR_DCA_MEM_DETACH_SQ_INDEX, + UVERBS_ATTR_TYPE(u32), UA_MANDATORY)); + DECLARE_UVERBS_NAMED_OBJECT(HNS_IB_OBJECT_DCA_MEM, UVERBS_TYPE_ALLOC_IDR(dca_cleanup), &UVERBS_METHOD(HNS_IB_METHOD_DCA_MEM_REG), &UVERBS_METHOD(HNS_IB_METHOD_DCA_MEM_DEREG), &UVERBS_METHOD(HNS_IB_METHOD_DCA_MEM_SHRINK), - &UVERBS_METHOD(HNS_IB_METHOD_DCA_MEM_ATTACH)); + &UVERBS_METHOD(HNS_IB_METHOD_DCA_MEM_ATTACH), + &UVERBS_METHOD(HNS_IB_METHOD_DCA_MEM_DETACH));
static bool dca_is_supported(struct ib_device *device) { diff --git a/drivers/infiniband/hw/hns/hns_roce_dca.h b/drivers/infiniband/hw/hns/hns_roce_dca.h index f9eea9beb..fdc3aaa4b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_dca.h +++ b/drivers/infiniband/hw/hns/hns_roce_dca.h @@ -50,6 +50,10 @@ struct hns_dca_attach_resp { u32 alloc_pages; };
+struct hns_dca_detach_attr { + u32 sq_idx; +}; + void hns_roce_register_udca(struct hns_roce_dev *hr_dev, struct hns_roce_ucontext *uctx); void hns_roce_unregister_udca(struct hns_roce_dev *hr_dev, @@ -58,5 +62,6 @@ void hns_roce_unregister_udca(struct hns_roce_dev *hr_dev, void hns_roce_enable_dca(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp); void hns_roce_disable_dca(struct hns_roce_dev *hr_dev, - struct hns_roce_qp *hr_qp); + struct hns_roce_qp *hr_qp, struct ib_udata *udata); +void hns_roce_dca_kick(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp); #endif diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 85f414088..a76d6d153 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -329,6 +329,8 @@ struct hns_roce_dca_cfg { u32 buf_id; u16 attach_count; u32 npages; + u32 sq_idx; + struct delayed_work dwork; };
/* DCA attr for setting WQE buffer */ @@ -980,6 +982,8 @@ struct hns_roce_hw { int (*set_dca_buf)(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct hns_roce_dca_attr *attr); + bool (*chk_dca_buf_inactive)(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp); int (*modify_qp)(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, enum ib_qp_state cur_state, enum ib_qp_state new_state, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index f8683a4a6..5cdb19d81 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -48,6 +48,7 @@ #include "hns_roce_device.h" #include "hns_roce_cmd.h" #include "hns_roce_hem.h" +#include "hns_roce_dca.h" #include "hns_roce_hw_v2.h"
enum { @@ -370,6 +371,11 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, return 0; }
+static inline bool check_qp_dca_enable(struct hns_roce_qp *hr_qp) +{ + return !!(hr_qp->en_flags & HNS_ROCE_QP_CAP_DCA); +} + static int check_send_valid(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { @@ -5434,6 +5440,10 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, if (new_state == IB_QPS_RESET && !ibqp->uobject) clear_qp(hr_qp);
+ if (check_qp_dca_enable(hr_qp) && + (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR)) + hns_roce_dca_kick(hr_dev, hr_qp); + out: return ret; } @@ -5728,6 +5738,45 @@ out: return ret; }
+static bool hns_roce_v2_chk_dca_buf_inactive(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp) +{ + struct hns_roce_dca_cfg *cfg = &hr_qp->dca_cfg; + struct hns_roce_v2_qp_context context = {}; + struct ib_device *ibdev = &hr_dev->ib_dev; + u32 tmp, sq_idx; + int state; + int ret; + + ret = hns_roce_v2_query_qpc(hr_dev, hr_qp->qpn, &context); + if (ret) { + ibdev_err(ibdev, "failed to query DCA QPC, ret = %d.\n", ret); + return false; + } + + state = hr_reg_read(&context, QPC_QP_ST); + if (state == HNS_ROCE_QP_ST_ERR || state == HNS_ROCE_QP_ST_RST) + return true; + + /* If RQ is not empty, the buffer is always active until the QP stops + * working. + */ + if (hr_qp->rq.wqe_cnt > 0) + return false; + + if (hr_qp->sq.wqe_cnt > 0) { + tmp = (u32)hr_reg_read(&context, QPC_RETRY_MSG_MSN); + sq_idx = tmp & (hr_qp->sq.wqe_cnt - 1); + /* If SQ-PI equals to retry_msg_msn in QPC, the QP is + * inactive. + */ + if (sq_idx != cfg->sq_idx) + return false; + } + + return true; +} + static inline int modify_qp_is_ok(struct hns_roce_qp *hr_qp) { return ((hr_qp->ibqp.qp_type == IB_QPT_RC || @@ -7057,6 +7106,7 @@ static const struct hns_roce_hw hns_roce_hw_v2 = { .set_hem = hns_roce_v2_set_hem, .clear_hem = hns_roce_v2_clear_hem, .set_dca_buf = hns_roce_v2_set_dca_buf, + .chk_dca_buf_inactive = hns_roce_v2_chk_dca_buf_inactive, .modify_qp = hns_roce_v2_modify_qp, .dereg_mr = hns_roce_v2_dereg_mr, .qp_flow_control_init = hns_roce_v2_qp_flow_control_init, diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 082ed09f4..45eb75bb4 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -789,7 +789,7 @@ static int alloc_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, if (ret) { ibdev_err(ibdev, "failed to create WQE mtr, ret = %d.\n", ret); if (dca_en) - hns_roce_disable_dca(hr_dev, hr_qp); + hns_roce_disable_dca(hr_dev, hr_qp, udata); }
return ret; @@ -801,7 +801,7 @@ static void free_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, hns_roce_mtr_destroy(hr_dev, &hr_qp->mtr);
if (hr_qp->en_flags & HNS_ROCE_QP_CAP_DCA) - hns_roce_disable_dca(hr_dev, hr_qp); + hns_roce_disable_dca(hr_dev, hr_qp, udata); }
static int alloc_qp_wqe(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h index 3a36552fb..d58cd12ef 100644 --- a/include/uapi/rdma/hns-abi.h +++ b/include/uapi/rdma/hns-abi.h @@ -173,6 +173,7 @@ enum hns_ib_dca_mem_methods { HNS_IB_METHOD_DCA_MEM_DEREG, HNS_IB_METHOD_DCA_MEM_SHRINK, HNS_IB_METHOD_DCA_MEM_ATTACH, + HNS_IB_METHOD_DCA_MEM_DETACH, };
enum hns_ib_dca_mem_reg_attrs { @@ -203,4 +204,9 @@ enum hns_ib_dca_mem_attach_attrs { HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_FLAGS, HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_PAGES, }; + +enum hns_ib_dca_mem_detach_attrs { + HNS_IB_ATTR_DCA_MEM_DETACH_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + HNS_IB_ATTR_DCA_MEM_DETACH_SQ_INDEX, +}; #endif /* HNS_ABI_USER_H */
driver inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9CK0O
--------------------------------------------------------------------------
If a uQP works in DCA mode, the userspace driver need to get the buffer's address in DCA memory pool by calling the 'HNS_IB_METHOD_DCA_MEM_QUERY' method after the QP was attached by calling the 'HNS_IB_METHOD_DCA_MEM_ATTACH' method.
This method will return the DCA mem object's key and the offset to let the userspace driver get the WQE's virtual address in DCA memory pool.
Signed-off-by: Chengchang Tang tangchengchang@huawei.com Reviewed-by: Yangyang Li liyangyang20@huawei.com Reviewed-by: YueHaibing yuehaibing@huawei.com Signed-off-by: Juan Zhou zhoujuan51@h-partners.com --- drivers/infiniband/hw/hns/hns_roce_dca.c | 105 ++++++++++++++++++++++- include/uapi/rdma/hns-abi.h | 10 +++ 2 files changed, 114 insertions(+), 1 deletion(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_dca.c b/drivers/infiniband/hw/hns/hns_roce_dca.c index c26b3dcae..9b4f3e646 100644 --- a/drivers/infiniband/hw/hns/hns_roce_dca.c +++ b/drivers/infiniband/hw/hns/hns_roce_dca.c @@ -80,6 +80,14 @@ static inline bool dca_page_is_attached(struct hns_dca_page_state *state, (HNS_DCA_OWN_MASK & state->buf_id); }
+static inline bool dca_page_is_active(struct hns_dca_page_state *state, + u32 buf_id) +{ + /* all buf id bits must be matched */ + return (HNS_DCA_ID_MASK & buf_id) == state->buf_id && + !state->lock && state->active; +} + static inline bool dca_page_is_allocated(struct hns_dca_page_state *state, u32 buf_id) { @@ -761,6 +769,47 @@ static int attach_dca_mem(struct hns_roce_dev *hr_dev, return 0; }
+struct dca_page_query_active_attr { + u32 buf_id; + u32 curr_index; + u32 start_index; + u32 page_index; + u32 page_count; + u64 mem_key; +}; + +static int query_dca_active_pages_proc(struct dca_mem *mem, int index, + void *param) +{ + struct hns_dca_page_state *state = &mem->states[index]; + struct dca_page_query_active_attr *attr = param; + + if (!dca_page_is_active(state, attr->buf_id)) + return 0; + + if (attr->curr_index < attr->start_index) { + attr->curr_index++; + return 0; + } else if (attr->curr_index > attr->start_index) { + return DCA_MEM_STOP_ITERATE; + } + + /* Search first page in DCA mem */ + attr->page_index = index; + attr->mem_key = mem->key; + /* Search active pages in continuous addresses */ + while (index < mem->page_count) { + state = &mem->states[index]; + if (!dca_page_is_active(state, attr->buf_id)) + break; + + index++; + attr->page_count++; + } + + return DCA_MEM_STOP_ITERATE; +} + struct dca_page_free_buf_attr { u32 buf_id; u32 max_pages; @@ -1115,13 +1164,67 @@ DECLARE_UVERBS_NAMED_METHOD( UVERBS_ATTR_PTR_IN(HNS_IB_ATTR_DCA_MEM_DETACH_SQ_INDEX, UVERBS_ATTR_TYPE(u32), UA_MANDATORY));
+static int UVERBS_HANDLER(HNS_IB_METHOD_DCA_MEM_QUERY)( + struct uverbs_attr_bundle *attrs) +{ + struct hns_roce_qp *hr_qp = uverbs_attr_to_hr_qp(attrs); + struct hns_roce_dca_ctx *ctx = hr_qp_to_dca_ctx(hr_qp); + struct dca_page_query_active_attr active_attr = {}; + u32 page_idx, page_ofs; + int ret; + + if (!hr_qp) + return -EINVAL; + + ret = uverbs_copy_from(&page_idx, attrs, + HNS_IB_ATTR_DCA_MEM_QUERY_PAGE_INDEX); + if (ret) + return ret; + + active_attr.buf_id = hr_qp->dca_cfg.buf_id; + active_attr.start_index = page_idx; + travel_dca_pages(ctx, &active_attr, query_dca_active_pages_proc); + page_ofs = active_attr.page_index << HNS_HW_PAGE_SHIFT; + + if (!active_attr.page_count) + return -ENOMEM; + + ret = uverbs_copy_to(attrs, HNS_IB_ATTR_DCA_MEM_QUERY_OUT_KEY, + &active_attr.mem_key, sizeof(active_attr.mem_key)); + if (!ret) + ret = uverbs_copy_to(attrs, + HNS_IB_ATTR_DCA_MEM_QUERY_OUT_OFFSET, + &page_ofs, sizeof(page_ofs)); + if (!ret) + ret = uverbs_copy_to(attrs, + HNS_IB_ATTR_DCA_MEM_QUERY_OUT_PAGE_COUNT, + &active_attr.page_count, + sizeof(active_attr.page_count)); + + return ret; +} + +DECLARE_UVERBS_NAMED_METHOD( + HNS_IB_METHOD_DCA_MEM_QUERY, + UVERBS_ATTR_IDR(HNS_IB_ATTR_DCA_MEM_QUERY_HANDLE, UVERBS_OBJECT_QP, + UVERBS_ACCESS_READ, UA_MANDATORY), + UVERBS_ATTR_PTR_IN(HNS_IB_ATTR_DCA_MEM_QUERY_PAGE_INDEX, + UVERBS_ATTR_TYPE(u32), UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(HNS_IB_ATTR_DCA_MEM_QUERY_OUT_KEY, + UVERBS_ATTR_TYPE(u64), UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(HNS_IB_ATTR_DCA_MEM_QUERY_OUT_OFFSET, + UVERBS_ATTR_TYPE(u32), UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(HNS_IB_ATTR_DCA_MEM_QUERY_OUT_PAGE_COUNT, + UVERBS_ATTR_TYPE(u32), UA_MANDATORY)); + DECLARE_UVERBS_NAMED_OBJECT(HNS_IB_OBJECT_DCA_MEM, UVERBS_TYPE_ALLOC_IDR(dca_cleanup), &UVERBS_METHOD(HNS_IB_METHOD_DCA_MEM_REG), &UVERBS_METHOD(HNS_IB_METHOD_DCA_MEM_DEREG), &UVERBS_METHOD(HNS_IB_METHOD_DCA_MEM_SHRINK), &UVERBS_METHOD(HNS_IB_METHOD_DCA_MEM_ATTACH), - &UVERBS_METHOD(HNS_IB_METHOD_DCA_MEM_DETACH)); + &UVERBS_METHOD(HNS_IB_METHOD_DCA_MEM_DETACH), + &UVERBS_METHOD(HNS_IB_METHOD_DCA_MEM_QUERY));
static bool dca_is_supported(struct ib_device *device) { diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h index d58cd12ef..99e7c3a82 100644 --- a/include/uapi/rdma/hns-abi.h +++ b/include/uapi/rdma/hns-abi.h @@ -174,6 +174,7 @@ enum hns_ib_dca_mem_methods { HNS_IB_METHOD_DCA_MEM_SHRINK, HNS_IB_METHOD_DCA_MEM_ATTACH, HNS_IB_METHOD_DCA_MEM_DETACH, + HNS_IB_METHOD_DCA_MEM_QUERY, };
enum hns_ib_dca_mem_reg_attrs { @@ -209,4 +210,13 @@ enum hns_ib_dca_mem_detach_attrs { HNS_IB_ATTR_DCA_MEM_DETACH_HANDLE = (1U << UVERBS_ID_NS_SHIFT), HNS_IB_ATTR_DCA_MEM_DETACH_SQ_INDEX, }; + +enum hns_ib_dca_mem_query_attrs { + HNS_IB_ATTR_DCA_MEM_QUERY_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + HNS_IB_ATTR_DCA_MEM_QUERY_PAGE_INDEX, + HNS_IB_ATTR_DCA_MEM_QUERY_OUT_KEY, + HNS_IB_ATTR_DCA_MEM_QUERY_OUT_OFFSET, + HNS_IB_ATTR_DCA_MEM_QUERY_OUT_PAGE_COUNT, +}; + #endif /* HNS_ABI_USER_H */
driver inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9CK0O
--------------------------------------------------------------------------
This patch add DCA support for kernel space.
Signed-off-by: Chengchang Tang tangchengchang@huawei.com Reviewed-by: Yangyang Li liyangyang20@huawei.com Reviewed-by: YueHaibing yuehaibing@huawei.com Signed-off-by: Juan Zhou zhoujuan51@h-partners.com --- drivers/infiniband/hw/hns/hns_roce_dca.c | 1360 ++++++++++++------- drivers/infiniband/hw/hns/hns_roce_dca.h | 25 +- drivers/infiniband/hw/hns/hns_roce_device.h | 44 +- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 209 +-- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 4 + drivers/infiniband/hw/hns/hns_roce_main.c | 32 +- drivers/infiniband/hw/hns/hns_roce_mr.c | 12 +- drivers/infiniband/hw/hns/hns_roce_qp.c | 37 +- include/uapi/rdma/hns-abi.h | 3 +- 9 files changed, 1106 insertions(+), 620 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_dca.c b/drivers/infiniband/hw/hns/hns_roce_dca.c index 9b4f3e646..5ec307faf 100644 --- a/drivers/infiniband/hw/hns/hns_roce_dca.c +++ b/drivers/infiniband/hw/hns/hns_roce_dca.c @@ -104,25 +104,71 @@ static inline bool dca_mem_is_available(struct dca_mem *mem) return mem->flags == (DCA_MEM_FLAGS_ALLOCED | DCA_MEM_FLAGS_REGISTERED); }
-static void *alloc_dca_pages(struct hns_roce_dev *hr_dev, struct dca_mem *mem, - struct dca_mem_attr *attr) +static void free_dca_pages(struct hns_roce_dev *hr_dev, bool is_user, + void *pages) +{ + if (is_user) + ib_umem_release(pages); + else + hns_roce_buf_free(hr_dev, pages); +} + +static void *alloc_dca_pages(struct hns_roce_dev *hr_dev, bool is_user, + struct dca_mem *mem, struct dca_mem_attr *attr) { struct ib_device *ibdev = &hr_dev->ib_dev; - struct ib_umem *umem; + struct hns_roce_buf *kmem; + + if (is_user) { + struct ib_umem *umem; + + umem = ib_umem_get(ibdev, attr->addr, attr->size, 0); + if (IS_ERR(umem)) { + ibdev_err(ibdev, "failed to get uDCA pages, ret = %ld.\n", + PTR_ERR(umem)); + return NULL; + }
- umem = ib_umem_get(ibdev, attr->addr, attr->size, 0); - if (IS_ERR(umem)) { - ibdev_err(ibdev, "failed to get uDCA pages, ret = %ld.\n", - PTR_ERR(umem)); + mem->page_count = ib_umem_num_dma_blocks(umem, + HNS_HW_PAGE_SIZE); + return umem; + } + + kmem = hns_roce_buf_alloc(hr_dev, attr->size, HNS_HW_PAGE_SHIFT, + HNS_ROCE_BUF_NOSLEEP | HNS_ROCE_BUF_NOFAIL); + if (IS_ERR(kmem)) { + ibdev_err(ibdev, "failed to alloc kDCA pages, ret = %ld.\n", + PTR_ERR(kmem)); return NULL; }
- mem->page_count = ib_umem_num_dma_blocks(umem, HNS_HW_PAGE_SIZE); + mem->page_count = kmem->npages; + /* Override the attr->size by actually alloced size */ + attr->size = kmem->ntrunks << kmem->trunk_shift; + return kmem; + +} + +static void init_dca_kmem_states(struct hns_roce_dev *hr_dev, + struct hns_dca_page_state *states, int count, + struct hns_roce_buf *kmem) +{ + dma_addr_t cur_addr; + dma_addr_t pre_addr; + int i; + + pre_addr = 0; + for (i = 0; i < kmem->npages && i < count; i++) { + cur_addr = hns_roce_buf_page(kmem, i); + if (cur_addr - pre_addr != HNS_HW_PAGE_SIZE) + states[i].head = 1;
- return umem; + pre_addr = cur_addr; + } }
-static void init_dca_umem_states(struct hns_dca_page_state *states, int count, +static void init_dca_umem_states(struct hns_roce_dev *hr_dev, + struct hns_dca_page_state *states, int count, struct ib_umem *umem) { struct ib_block_iter biter; @@ -144,7 +190,9 @@ static void init_dca_umem_states(struct hns_dca_page_state *states, int count, } }
-static struct hns_dca_page_state *alloc_dca_states(void *pages, int count) +static struct hns_dca_page_state *alloc_dca_states(struct hns_roce_dev *hr_dev, + void *pages, int count, + bool is_user) { struct hns_dca_page_state *states;
@@ -152,7 +200,10 @@ static struct hns_dca_page_state *alloc_dca_states(void *pages, int count) if (!states) return NULL;
- init_dca_umem_states(states, count, pages); + if (is_user) + init_dca_umem_states(hr_dev, states, count, pages); + else + init_dca_kmem_states(hr_dev, states, count, pages);
return states; } @@ -192,17 +243,142 @@ done: spin_unlock_irqrestore(&ctx->pool_lock, flags); }
-/* user DCA is managed by ucontext */ +struct dca_get_alloced_pages_attr { + u32 buf_id; + dma_addr_t *pages; + u32 total; + u32 max; +}; + +static int get_alloced_kmem_proc(struct dca_mem *mem, int index, void *param) +{ + struct dca_get_alloced_pages_attr *attr = param; + struct hns_dca_page_state *states = mem->states; + struct hns_roce_buf *kmem = mem->pages; + u32 i; + + for (i = 0; i < kmem->npages; i++) { + if (dca_page_is_allocated(&states[i], attr->buf_id)) { + attr->pages[attr->total++] = hns_roce_buf_page(kmem, i); + if (attr->total >= attr->max) + return DCA_MEM_STOP_ITERATE; + } + } + + return DCA_MEM_NEXT_ITERATE; +} + +static int get_alloced_umem_proc(struct dca_mem *mem, int index, void *param) + +{ + struct dca_get_alloced_pages_attr *attr = param; + struct hns_dca_page_state *states = mem->states; + struct ib_umem *umem = mem->pages; + struct ib_block_iter biter; + u32 i = 0; + + rdma_for_each_block(umem->sgt_append.sgt.sgl, &biter, + umem->sgt_append.sgt.nents, HNS_HW_PAGE_SIZE) { + if (dca_page_is_allocated(&states[i], attr->buf_id)) { + attr->pages[attr->total++] = + rdma_block_iter_dma_address(&biter); + if (attr->total >= attr->max) + return DCA_MEM_STOP_ITERATE; + } + i++; + } + + return DCA_MEM_NEXT_ITERATE; +} + +/* user DCA is managed by ucontext, kernel DCA is managed by device */ +static inline struct hns_roce_dca_ctx * +to_hr_dca_ctx(struct hns_roce_dev *hr_dev, struct hns_roce_ucontext *uctx) +{ + return uctx ? &uctx->dca_ctx : &hr_dev->dca_ctx; +} + static inline struct hns_roce_dca_ctx * -to_hr_dca_ctx(struct hns_roce_ucontext *uctx) +hr_qp_to_dca_ctx(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) +{ + struct hns_roce_ucontext *uctx = NULL; + + if (hr_qp->ibqp.pd->uobject) + uctx = to_hr_ucontext(hr_qp->ibqp.pd->uobject->context); + + return to_hr_dca_ctx(hr_dev, uctx); +} + +static int config_dca_qpc(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp, dma_addr_t *pages, + int page_count) +{ + struct ib_device *ibdev = &hr_dev->ib_dev; + struct hns_roce_mtr *mtr = &hr_qp->mtr; + int ret; + + ret = hns_roce_mtr_map(hr_dev, mtr, pages, page_count); + if (ret) { + ibdev_err(ibdev, "failed to map DCA pages, ret = %d.\n", ret); + return ret; + } + + if (hr_dev->hw->set_dca_buf) { + ret = hr_dev->hw->set_dca_buf(hr_dev, hr_qp); + if (ret) { + ibdev_err(ibdev, "failed to set DCA to HW, ret = %d.\n", + ret); + return ret; + } + } + + return 0; +} + +static int setup_dca_buf_to_hw(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp, + struct hns_roce_dca_ctx *ctx, u32 buf_id, + u32 count) { - return &uctx->dca_ctx; + struct dca_get_alloced_pages_attr attr = {}; + dma_addr_t *pages; + int ret; + + /* alloc a tmp array to store buffer's dma address */ + pages = kcalloc(count, sizeof(dma_addr_t), GFP_ATOMIC); + if (!pages) + return -ENOMEM; + + attr.buf_id = buf_id; + attr.pages = pages; + attr.max = count; + + if (hr_qp->ibqp.uobject) + travel_dca_pages(ctx, &attr, get_alloced_umem_proc); + else + travel_dca_pages(ctx, &attr, get_alloced_kmem_proc); + + if (attr.total != count) { + ibdev_err(&hr_dev->ib_dev, "failed to get DCA page %u != %u.\n", + attr.total, count); + ret = -ENOMEM; + goto err_get_pages; + } + + ret = config_dca_qpc(hr_dev, hr_qp, pages, count); +err_get_pages: + /* drop tmp array */ + kfree(pages); + + return ret; }
-static void unregister_dca_mem(struct hns_roce_ucontext *uctx, +static void unregister_dca_mem(struct hns_roce_dev *hr_dev, + struct hns_roce_ucontext *uctx, struct dca_mem *mem) { - struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(uctx); + struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(hr_dev, uctx); + bool is_user = !!uctx; unsigned long flags; void *states, *pages;
@@ -224,24 +400,25 @@ static void unregister_dca_mem(struct hns_roce_ucontext *uctx, spin_unlock_irqrestore(&ctx->pool_lock, flags);
kfree(states); - ib_umem_release(pages); + free_dca_pages(hr_dev, is_user, pages); }
static int register_dca_mem(struct hns_roce_dev *hr_dev, struct hns_roce_ucontext *uctx, struct dca_mem *mem, struct dca_mem_attr *attr) { - struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(uctx); + struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(hr_dev, uctx); + bool is_user = !!uctx; void *states, *pages; unsigned long flags;
- pages = alloc_dca_pages(hr_dev, mem, attr); + pages = alloc_dca_pages(hr_dev, is_user, mem, attr); if (!pages) return -ENOMEM;
- states = alloc_dca_states(pages, mem->page_count); + states = alloc_dca_states(hr_dev, pages, mem->page_count, is_user); if (!states) { - ib_umem_release(pages); + free_dca_pages(hr_dev, is_user, pages); return -ENOMEM; }
@@ -263,266 +440,358 @@ static int register_dca_mem(struct hns_roce_dev *hr_dev, return 0; }
-struct dca_mem_shrink_attr { - u64 shrink_key; - u32 shrink_mems; +struct dca_page_clear_attr { + u32 buf_id; + u32 max_pages; + u32 clear_pages; };
-static int shrink_dca_page_proc(struct dca_mem *mem, int index, void *param) +static int clear_dca_pages_proc(struct dca_mem *mem, int index, void *param) { - struct dca_mem_shrink_attr *attr = param; - struct hns_dca_page_state *state; - int i, free_pages; - - free_pages = 0; - for (i = 0; i < mem->page_count; i++) { - state = &mem->states[i]; - if (dca_page_is_free(state)) - free_pages++; - } - - /* No pages are in use */ - if (free_pages == mem->page_count) { - /* unregister first empty DCA mem */ - if (!attr->shrink_mems) { - mem->flags &= ~DCA_MEM_FLAGS_REGISTERED; - attr->shrink_key = mem->key; - } + struct hns_dca_page_state *state = &mem->states[index]; + struct dca_page_clear_attr *attr = param;
- attr->shrink_mems++; + if (dca_page_is_attached(state, attr->buf_id)) { + set_dca_page_to_free(state); + attr->clear_pages++; }
- if (attr->shrink_mems > 1) + if (attr->clear_pages >= attr->max_pages) return DCA_MEM_STOP_ITERATE; else - return DCA_MEM_NEXT_ITERATE; + return 0; }
-static void shrink_dca_mem(struct hns_roce_dev *hr_dev, - struct hns_roce_ucontext *uctx, u64 reserved_size, - struct hns_dca_shrink_resp *resp) +static void clear_dca_pages(struct hns_roce_dca_ctx *ctx, u32 buf_id, u32 count) { - struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(uctx); - struct dca_mem_shrink_attr attr = {}; - unsigned long flags; - bool need_shink; - - spin_lock_irqsave(&ctx->pool_lock, flags); - need_shink = ctx->free_mems > 0 && ctx->free_size > reserved_size; - spin_unlock_irqrestore(&ctx->pool_lock, flags); - if (!need_shink) - return; + struct dca_page_clear_attr attr = {};
- travel_dca_pages(ctx, &attr, shrink_dca_page_proc); - resp->free_mems = attr.shrink_mems; - resp->free_key = attr.shrink_key; + attr.buf_id = buf_id; + attr.max_pages = count; + travel_dca_pages(ctx, &attr, clear_dca_pages_proc); }
-static void init_dca_context(struct hns_roce_dca_ctx *ctx) +struct dca_page_assign_attr { + u32 buf_id; + int unit; + int total; + int max; +}; + +static bool dca_page_is_allocable(struct hns_dca_page_state *state, bool head) { - INIT_LIST_HEAD(&ctx->pool); - spin_lock_init(&ctx->pool_lock); - ctx->total_size = 0; + bool is_free = dca_page_is_free(state) || dca_page_is_inactive(state); + + return head ? is_free : is_free && !state->head; }
-static void cleanup_dca_context(struct hns_roce_dev *hr_dev, - struct hns_roce_dca_ctx *ctx) +static int assign_dca_pages_proc(struct dca_mem *mem, int index, void *param) { - struct dca_mem *mem, *tmp; - unsigned long flags; + struct dca_page_assign_attr *attr = param; + struct hns_dca_page_state *state; + int checked_pages = 0; + int start_index = 0; + int free_pages = 0; + int i;
- spin_lock_irqsave(&ctx->pool_lock, flags); - list_for_each_entry_safe(mem, tmp, &ctx->pool, list) { - list_del(&mem->list); - mem->flags = 0; - spin_unlock_irqrestore(&ctx->pool_lock, flags); + /* Check the continuous pages count is not smaller than unit count */ + for (i = index; free_pages < attr->unit && i < mem->page_count; i++) { + checked_pages++; + state = &mem->states[i]; + if (dca_page_is_allocable(state, free_pages == 0)) { + if (free_pages == 0) + start_index = i;
- kfree(mem->states); - ib_umem_release(mem->pages); - kfree(mem); + free_pages++; + } else { + free_pages = 0; + } + }
- spin_lock_irqsave(&ctx->pool_lock, flags); + if (free_pages < attr->unit) + return DCA_MEM_NEXT_ITERATE; + + for (i = 0; i < free_pages; i++) { + state = &mem->states[start_index + i]; + lock_dca_page_to_attach(state, attr->buf_id); + attr->total++; } - ctx->total_size = 0; - spin_unlock_irqrestore(&ctx->pool_lock, flags); -}
-void hns_roce_register_udca(struct hns_roce_dev *hr_dev, - struct hns_roce_ucontext *uctx) -{ - if (!(uctx->config & HNS_ROCE_UCTX_CONFIG_DCA)) - return; + if (attr->total >= attr->max) + return DCA_MEM_STOP_ITERATE;
- init_dca_context(&uctx->dca_ctx); + return checked_pages; }
-void hns_roce_unregister_udca(struct hns_roce_dev *hr_dev, - struct hns_roce_ucontext *uctx) +static u32 assign_dca_pages(struct hns_roce_dca_ctx *ctx, u32 buf_id, u32 count, + u32 unit) { - if (!(uctx->config & HNS_ROCE_UCTX_CONFIG_DCA)) - return; + struct dca_page_assign_attr attr = {};
- cleanup_dca_context(hr_dev, &uctx->dca_ctx); + attr.buf_id = buf_id; + attr.unit = unit; + attr.max = count; + travel_dca_pages(ctx, &attr, assign_dca_pages_proc); + return attr.total; }
-static struct dca_mem *alloc_dca_mem(struct hns_roce_dca_ctx *ctx) -{ - struct dca_mem *mem, *tmp, *found = NULL; - unsigned long flags; +struct dca_page_active_attr { + u32 buf_id; + u32 max_pages; + u32 alloc_pages; + u32 dirty_mems; +};
- spin_lock_irqsave(&ctx->pool_lock, flags); - list_for_each_entry_safe(mem, tmp, &ctx->pool, list) { - spin_lock(&mem->lock); - if (!mem->flags) { - found = mem; - mem->flags |= DCA_MEM_FLAGS_ALLOCED; - spin_unlock(&mem->lock); - break; +static int active_dca_pages_proc(struct dca_mem *mem, int index, void *param) +{ + struct dca_page_active_attr *attr = param; + struct hns_dca_page_state *state; + bool changed = false; + bool stop = false; + int i, free_pages; + + free_pages = 0; + for (i = 0; !stop && i < mem->page_count; i++) { + state = &mem->states[i]; + if (dca_page_is_free(state)) { + free_pages++; + } else if (dca_page_is_allocated(state, attr->buf_id)) { + free_pages++; + /* Change matched pages state */ + unlock_dca_page_to_active(state, attr->buf_id); + changed = true; + attr->alloc_pages++; + if (attr->alloc_pages == attr->max_pages) + stop = true; } - spin_unlock(&mem->lock); } - spin_unlock_irqrestore(&ctx->pool_lock, flags);
- if (found) - return found; + for (; changed && i < mem->page_count; i++) + if (dca_page_is_free(state)) + free_pages++;
- mem = kzalloc(sizeof(*mem), GFP_NOWAIT); - if (!mem) - return NULL; + /* Clean mem changed to dirty */ + if (changed && free_pages == mem->page_count) + attr->dirty_mems++;
- spin_lock_init(&mem->lock); - INIT_LIST_HEAD(&mem->list); + return stop ? DCA_MEM_STOP_ITERATE : DCA_MEM_NEXT_ITERATE; +}
- mem->flags |= DCA_MEM_FLAGS_ALLOCED; +static u32 active_dca_pages(struct hns_roce_dca_ctx *ctx, u32 buf_id, u32 count) +{ + struct dca_page_active_attr attr = {}; + unsigned long flags; + + attr.buf_id = buf_id; + attr.max_pages = count; + travel_dca_pages(ctx, &attr, active_dca_pages_proc);
+ /* Update free size */ spin_lock_irqsave(&ctx->pool_lock, flags); - list_add(&mem->list, &ctx->pool); + ctx->free_mems -= attr.dirty_mems; + ctx->free_size -= attr.alloc_pages << HNS_HW_PAGE_SHIFT; spin_unlock_irqrestore(&ctx->pool_lock, flags);
- return mem; -} - -static void free_dca_mem(struct dca_mem *mem) -{ - /* We cannot hold the whole pool's lock during the DCA is working - * until cleanup the context in cleanup_dca_context(), so we just - * set the DCA mem state as free when destroying DCA mem object. - */ - spin_lock(&mem->lock); - mem->flags = 0; - spin_unlock(&mem->lock); -} - -static inline struct hns_roce_dca_ctx *hr_qp_to_dca_ctx(struct hns_roce_qp *qp) -{ - return to_hr_dca_ctx(to_hr_ucontext(qp->ibqp.pd->uobject->context)); + return attr.alloc_pages; }
-struct dca_page_clear_attr { +struct dca_page_query_active_attr { u32 buf_id; - u32 max_pages; - u32 clear_pages; + u32 curr_index; + u32 start_index; + u32 page_index; + u32 page_count; + u64 mem_key; };
-static int clear_dca_pages_proc(struct dca_mem *mem, int index, void *param) +static int query_dca_active_pages_proc(struct dca_mem *mem, int index, + void *param) { struct hns_dca_page_state *state = &mem->states[index]; - struct dca_page_clear_attr *attr = param; + struct dca_page_query_active_attr *attr = param;
- if (dca_page_is_attached(state, attr->buf_id)) { - set_dca_page_to_free(state); - attr->clear_pages++; - } + if (!dca_page_is_active(state, attr->buf_id)) + return 0;
- if (attr->clear_pages >= attr->max_pages) - return DCA_MEM_STOP_ITERATE; - else + if (attr->curr_index < attr->start_index) { + attr->curr_index++; return 0; + } else if (attr->curr_index > attr->start_index) { + return DCA_MEM_STOP_ITERATE; + } + + /* Search first page in DCA mem */ + attr->page_index = index; + attr->mem_key = mem->key; + /* Search active pages in continuous addresses */ + while (index < mem->page_count) { + state = &mem->states[index]; + if (!dca_page_is_active(state, attr->buf_id)) + break; + + index++; + attr->page_count++; + } + + return DCA_MEM_STOP_ITERATE; }
-static void clear_dca_pages(struct hns_roce_dca_ctx *ctx, u32 buf_id, u32 count) +static int sync_dca_buf_offset(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp, + struct hns_dca_attach_attr *attr) { - struct dca_page_clear_attr attr = {}; + struct ib_device *ibdev = &hr_dev->ib_dev;
- attr.buf_id = buf_id; - attr.max_pages = count; - travel_dca_pages(ctx, &attr, clear_dca_pages_proc); + if (hr_qp->sq.wqe_cnt > 0) { + if (attr->sq_offset >= hr_qp->sge.offset) { + ibdev_err(ibdev, "failed to check SQ offset = %u\n", + attr->sq_offset); + return -EINVAL; + } + hr_qp->sq.wqe_offset = hr_qp->sq.offset + attr->sq_offset; + } + + if (hr_qp->sge.sge_cnt > 0) { + if (attr->sge_offset >= hr_qp->rq.offset) { + ibdev_err(ibdev, "failed to check exSGE offset = %u\n", + attr->sge_offset); + return -EINVAL; + } + hr_qp->sge.wqe_offset = hr_qp->sge.offset + attr->sge_offset; + } + + if (hr_qp->rq.wqe_cnt > 0) { + if (attr->rq_offset >= hr_qp->buff_size) { + ibdev_err(ibdev, "failed to check RQ offset = %u\n", + attr->rq_offset); + return -EINVAL; + } + hr_qp->rq.wqe_offset = hr_qp->rq.offset + attr->rq_offset; + } + + return 0; }
-struct dca_page_assign_attr { +static u32 alloc_buf_from_dca_mem(struct hns_roce_qp *hr_qp, + struct hns_roce_dca_ctx *ctx) +{ + u32 buf_pages, unit_pages, alloc_pages; u32 buf_id; - int unit; - int total; - int max; -};
-static bool dca_page_is_allocable(struct hns_dca_page_state *state, bool head) -{ - bool is_free = dca_page_is_free(state) || dca_page_is_inactive(state); + buf_pages = hr_qp->dca_cfg.npages; + /* Gen new buf id */ + buf_id = HNS_DCA_TO_BUF_ID(hr_qp->qpn, hr_qp->dca_cfg.attach_count);
- return head ? is_free : is_free && !state->head; + /* Assign pages from free pages */ + unit_pages = hr_qp->mtr.hem_cfg.is_direct ? buf_pages : 1; + alloc_pages = assign_dca_pages(ctx, buf_id, buf_pages, unit_pages); + if (buf_pages != alloc_pages) { + if (alloc_pages > 0) + clear_dca_pages(ctx, buf_id, alloc_pages); + return HNS_DCA_INVALID_BUF_ID; + } + return buf_id; }
-static int assign_dca_pages_proc(struct dca_mem *mem, int index, void *param) +static int active_alloced_buf(struct hns_roce_qp *hr_qp, + struct hns_roce_dca_ctx *ctx, + struct hns_dca_attach_attr *attr, u32 buf_id) { - struct dca_page_assign_attr *attr = param; - struct hns_dca_page_state *state; - int checked_pages = 0; - int start_index = 0; - int free_pages = 0; - int i; - - /* Check the continuous pages count is not smaller than unit count */ - for (i = index; free_pages < attr->unit && i < mem->page_count; i++) { - checked_pages++; - state = &mem->states[i]; - if (dca_page_is_allocable(state, free_pages == 0)) { - if (free_pages == 0) - start_index = i; + struct hns_roce_dev *hr_dev = to_hr_dev(hr_qp->ibqp.device); + struct ib_device *ibdev = &hr_dev->ib_dev; + u32 active_pages, alloc_pages; + int ret;
- free_pages++; - } else { - free_pages = 0; - } + alloc_pages = hr_qp->dca_cfg.npages; + ret = sync_dca_buf_offset(hr_dev, hr_qp, attr); + if (ret) { + ibdev_err(ibdev, "failed to sync DCA offset, ret = %d\n", ret); + goto active_fail; }
- if (free_pages < attr->unit) - return DCA_MEM_NEXT_ITERATE; + ret = setup_dca_buf_to_hw(hr_dev, hr_qp, ctx, buf_id, alloc_pages); + if (ret) { + ibdev_err(ibdev, "failed to setup DCA buf, ret = %d.\n", ret); + goto active_fail; + }
- for (i = 0; i < free_pages; i++) { - state = &mem->states[start_index + i]; - lock_dca_page_to_attach(state, attr->buf_id); - attr->total++; + active_pages = active_dca_pages(ctx, buf_id, alloc_pages); + if (active_pages != alloc_pages) { + ibdev_err(ibdev, "failed to active DCA pages, %u != %u.\n", + active_pages, alloc_pages); + ret = -ENOBUFS; + goto active_fail; }
- if (attr->total >= attr->max) - return DCA_MEM_STOP_ITERATE; + return 0;
- return checked_pages; +active_fail: + clear_dca_pages(ctx, buf_id, alloc_pages); + return ret; }
-static u32 assign_dca_pages(struct hns_roce_dca_ctx *ctx, u32 buf_id, u32 count, - u32 unit) +static int attach_dca_mem(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp, + struct hns_dca_attach_attr *attr, + struct hns_dca_attach_resp *resp) { - struct dca_page_assign_attr attr = {}; + struct hns_roce_dca_ctx *ctx = hr_qp_to_dca_ctx(hr_dev, hr_qp); + struct hns_roce_dca_cfg *cfg = &hr_qp->dca_cfg; + u32 buf_id; + int ret;
- attr.buf_id = buf_id; - attr.unit = unit; - attr.max = count; - travel_dca_pages(ctx, &attr, assign_dca_pages_proc); - return attr.total; + /* Stop DCA mem ageing worker */ + cancel_delayed_work(&cfg->dwork); + resp->alloc_flags = 0; + + spin_lock(&cfg->lock); + buf_id = cfg->buf_id; + /* Already attached */ + if (buf_id != HNS_DCA_INVALID_BUF_ID) { + resp->alloc_pages = cfg->npages; + spin_unlock(&cfg->lock); + return 0; + } + + /* Start to new attach */ + resp->alloc_pages = 0; + buf_id = alloc_buf_from_dca_mem(hr_qp, ctx); + if (buf_id == HNS_DCA_INVALID_BUF_ID) { + spin_unlock(&cfg->lock); + /* No report fail, need try again after the pool increased */ + return 0; + } + + ret = active_alloced_buf(hr_qp, ctx, attr, buf_id); + if (ret) { + spin_unlock(&cfg->lock); + ibdev_err(&hr_dev->ib_dev, + "failed to active DCA buf for QP-%lu, ret = %d.\n", + hr_qp->qpn, ret); + return ret; + } + + /* Attach ok */ + cfg->buf_id = buf_id; + cfg->attach_count++; + spin_unlock(&cfg->lock); + + resp->alloc_flags |= HNS_DCA_ATTACH_FLAGS_NEW_BUFFER; + resp->alloc_pages = cfg->npages; + + return 0; }
-struct dca_page_active_attr { +struct dca_page_free_buf_attr { u32 buf_id; u32 max_pages; - u32 alloc_pages; - u32 dirty_mems; + u32 free_pages; + u32 clean_mems; };
-static int active_dca_pages_proc(struct dca_mem *mem, int index, void *param) +static int free_buffer_pages_proc(struct dca_mem *mem, int index, void *param) { - struct dca_page_active_attr *attr = param; + struct dca_page_free_buf_attr *attr = param; struct hns_dca_page_state *state; bool changed = false; bool stop = false; @@ -531,360 +800,453 @@ static int active_dca_pages_proc(struct dca_mem *mem, int index, void *param) free_pages = 0; for (i = 0; !stop && i < mem->page_count; i++) { state = &mem->states[i]; - if (dca_page_is_free(state)) { - free_pages++; - } else if (dca_page_is_allocated(state, attr->buf_id)) { - free_pages++; - /* Change matched pages state */ - unlock_dca_page_to_active(state, attr->buf_id); + /* Change matched pages state */ + if (dca_page_is_attached(state, attr->buf_id)) { + set_dca_page_to_free(state); changed = true; - attr->alloc_pages++; - if (attr->alloc_pages == attr->max_pages) + attr->free_pages++; + if (attr->free_pages == attr->max_pages) stop = true; } + + if (dca_page_is_free(state)) + free_pages++; }
for (; changed && i < mem->page_count; i++) if (dca_page_is_free(state)) free_pages++;
- /* Clean mem changed to dirty */ if (changed && free_pages == mem->page_count) - attr->dirty_mems++; + attr->clean_mems++;
return stop ? DCA_MEM_STOP_ITERATE : DCA_MEM_NEXT_ITERATE; }
-static u32 active_dca_pages(struct hns_roce_dca_ctx *ctx, u32 buf_id, u32 count) +static void free_buf_from_dca_mem(struct hns_roce_dca_ctx *ctx, + struct hns_roce_dca_cfg *cfg) { - struct dca_page_active_attr attr = {}; + struct dca_page_free_buf_attr attr = {}; unsigned long flags; + u32 buf_id; + + spin_lock(&cfg->lock); + buf_id = cfg->buf_id; + cfg->buf_id = HNS_DCA_INVALID_BUF_ID; + spin_unlock(&cfg->lock); + if (buf_id == HNS_DCA_INVALID_BUF_ID) + return;
attr.buf_id = buf_id; - attr.max_pages = count; - travel_dca_pages(ctx, &attr, active_dca_pages_proc); + attr.max_pages = cfg->npages; + travel_dca_pages(ctx, &attr, free_buffer_pages_proc);
/* Update free size */ spin_lock_irqsave(&ctx->pool_lock, flags); - ctx->free_mems -= attr.dirty_mems; - ctx->free_size -= attr.alloc_pages << HNS_HW_PAGE_SHIFT; + ctx->free_mems += attr.clean_mems; + ctx->free_size += attr.free_pages << HNS_HW_PAGE_SHIFT; spin_unlock_irqrestore(&ctx->pool_lock, flags); +}
- return attr.alloc_pages; +static void detach_dca_mem(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp, + struct hns_dca_detach_attr *attr) +{ + struct hns_roce_dca_cfg *cfg = &hr_qp->dca_cfg; + + /* Start an ageing worker to free buffer */ + cancel_delayed_work(&cfg->dwork); + spin_lock(&cfg->lock); + cfg->sq_idx = attr->sq_idx; + queue_delayed_work(hr_dev->irq_workq, &cfg->dwork, + msecs_to_jiffies(DCA_MEM_AGEING_MSES)); + spin_unlock(&cfg->lock); }
-struct dca_get_alloced_pages_attr { - u32 buf_id; - dma_addr_t *pages; - u32 total; - u32 max; +struct dca_mem_shrink_attr { + u64 shrink_key; + u32 shrink_mems; };
-static int get_alloced_umem_proc(struct dca_mem *mem, int index, void *param) - +static int shrink_dca_page_proc(struct dca_mem *mem, int index, void *param) { - struct dca_get_alloced_pages_attr *attr = param; - struct hns_dca_page_state *states = mem->states; - struct ib_umem *umem = mem->pages; - struct ib_block_iter biter; - u32 i = 0; + struct dca_mem_shrink_attr *attr = param; + struct hns_dca_page_state *state; + int i, free_pages;
- rdma_for_each_block(umem->sgt_append.sgt.sgl, &biter, - umem->sgt_append.sgt.nents, HNS_HW_PAGE_SIZE) { - if (dca_page_is_allocated(&states[i], attr->buf_id)) { - attr->pages[attr->total++] = - rdma_block_iter_dma_address(&biter); - if (attr->total >= attr->max) - return DCA_MEM_STOP_ITERATE; + free_pages = 0; + for (i = 0; i < mem->page_count; i++) { + state = &mem->states[i]; + if (dca_page_is_free(state)) + free_pages++; + } + + /* No any page be used */ + if (free_pages == mem->page_count) { + /* unregister first empty DCA mem */ + if (!attr->shrink_mems) { + mem->flags &= ~DCA_MEM_FLAGS_REGISTERED; + attr->shrink_key = mem->key; } - i++; + + attr->shrink_mems++; }
- return DCA_MEM_NEXT_ITERATE; + if (attr->shrink_mems > 1) + return DCA_MEM_STOP_ITERATE; + else + return DCA_MEM_NEXT_ITERATE; }
-static int apply_dca_cfg(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, - struct hns_dca_attach_attr *attach_attr) +struct hns_dca_shrink_resp { + u64 free_key; + u32 free_mems; +}; + +static void shrink_dca_mem(struct hns_roce_dev *hr_dev, + struct hns_roce_ucontext *uctx, u64 reserved_size, + struct hns_dca_shrink_resp *resp) { - struct hns_roce_dca_attr attr; + struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(hr_dev, uctx); + struct dca_mem_shrink_attr attr = {}; + unsigned long flags; + bool need_shink;
- if (hr_dev->hw->set_dca_buf) { - attr.sq_offset = attach_attr->sq_offset; - attr.sge_offset = attach_attr->sge_offset; - attr.rq_offset = attach_attr->rq_offset; - return hr_dev->hw->set_dca_buf(hr_dev, hr_qp, &attr); - } + spin_lock_irqsave(&ctx->pool_lock, flags); + need_shink = ctx->free_mems > 0 && ctx->free_size > reserved_size; + spin_unlock_irqrestore(&ctx->pool_lock, flags); + if (!need_shink) + return;
- return 0; + travel_dca_pages(ctx, &attr, shrink_dca_page_proc); + resp->free_mems = attr.shrink_mems; + resp->free_key = attr.shrink_key; }
-static int setup_dca_buf_to_hw(struct hns_roce_dca_ctx *ctx, - struct hns_roce_qp *hr_qp, u32 buf_id, - struct hns_dca_attach_attr *attach_attr) +static void init_dca_context(struct hns_roce_dca_ctx *ctx) { - struct hns_roce_dev *hr_dev = to_hr_dev(hr_qp->ibqp.device); - struct dca_get_alloced_pages_attr attr = {}; - struct ib_device *ibdev = &hr_dev->ib_dev; - u32 count = hr_qp->dca_cfg.npages; - dma_addr_t *pages; - int ret; + INIT_LIST_HEAD(&ctx->pool); + spin_lock_init(&ctx->pool_lock); + ctx->total_size = 0; +}
- /* Alloc a tmp array to store buffer's dma address */ - pages = kvcalloc(count, sizeof(dma_addr_t), GFP_NOWAIT); - if (!pages) - return -ENOMEM; +static void cleanup_dca_context(struct hns_roce_dev *hr_dev, + struct hns_roce_dca_ctx *ctx) +{ + struct dca_mem *mem, *tmp; + unsigned long flags; + bool is_user;
- attr.buf_id = buf_id; - attr.pages = pages; - attr.max = count; + is_user = (ctx != &hr_dev->dca_ctx); + spin_lock_irqsave(&ctx->pool_lock, flags); + list_for_each_entry_safe(mem, tmp, &ctx->pool, list) { + list_del(&mem->list); + spin_lock(&mem->lock); + mem->flags = 0; + spin_unlock(&mem->lock); + spin_unlock_irqrestore(&ctx->pool_lock, flags);
- travel_dca_pages(ctx, &attr, get_alloced_umem_proc); - if (attr.total != count) { - ibdev_err(ibdev, "failed to get DCA page %u != %u.\n", - attr.total, count); - ret = -ENOMEM; - goto done; - } + kfree(mem->states); + free_dca_pages(hr_dev, is_user, mem->pages); + kfree(mem);
- /* Update MTT for ROCEE addressing */ - ret = hns_roce_mtr_map(hr_dev, &hr_qp->mtr, pages, count); - if (ret) { - ibdev_err(ibdev, "failed to map DCA pages, ret = %d.\n", ret); - goto done; + spin_lock_irqsave(&ctx->pool_lock, flags); } + ctx->total_size = 0; + spin_unlock_irqrestore(&ctx->pool_lock, flags); +}
- /* Apply the changes for WQE address */ - ret = apply_dca_cfg(hr_dev, hr_qp, attach_attr); - if (ret) - ibdev_err(ibdev, "failed to apply DCA cfg, ret = %d.\n", ret); +#define DCA_MAX_MEM_SIZE ~0UL
-done: - /* Drop tmp array */ - kvfree(pages); - return ret; +static uint dca_unit_size; +static ulong dca_min_size = DCA_MAX_MEM_SIZE; +static ulong dca_max_size = DCA_MAX_MEM_SIZE; + +static void config_kdca_context(struct hns_roce_dca_ctx *ctx) +{ + unsigned int unit_size; + + unit_size = ALIGN(dca_unit_size, PAGE_SIZE); + ctx->unit_size = unit_size; + if (!unit_size) + return; + + if (dca_max_size == DCA_MAX_MEM_SIZE || dca_max_size == 0) + ctx->max_size = DCA_MAX_MEM_SIZE; + else + ctx->max_size = roundup(dca_max_size, unit_size); + + if (dca_min_size == DCA_MAX_MEM_SIZE) + ctx->min_size = ctx->max_size; + else + ctx->min_size = roundup(dca_min_size, unit_size); }
-static u32 alloc_buf_from_dca_mem(struct hns_roce_qp *hr_qp, - struct hns_roce_dca_ctx *ctx) +void hns_roce_init_dca(struct hns_roce_dev *hr_dev) { - u32 buf_pages, unit_pages, alloc_pages; - u32 buf_id; + init_dca_context(&hr_dev->dca_ctx);
- buf_pages = hr_qp->dca_cfg.npages; - /* Gen new buf id */ - buf_id = HNS_DCA_TO_BUF_ID(hr_qp->qpn, hr_qp->dca_cfg.attach_count); + config_kdca_context(&hr_dev->dca_ctx); +} + +void hns_roce_cleanup_dca(struct hns_roce_dev *hr_dev) +{ + cleanup_dca_context(hr_dev, &hr_dev->dca_ctx); +} + +void hns_roce_register_udca(struct hns_roce_dev *hr_dev, + struct hns_roce_ucontext *uctx) +{ + if (!(uctx->config & HNS_ROCE_UCTX_CONFIG_DCA)) + return; + + init_dca_context(&uctx->dca_ctx); +} + +void hns_roce_unregister_udca(struct hns_roce_dev *hr_dev, + struct hns_roce_ucontext *uctx) +{ + if (!(uctx->config & HNS_ROCE_UCTX_CONFIG_DCA)) + return; + + cleanup_dca_context(hr_dev, &uctx->dca_ctx); +} + +static struct dca_mem *key_to_dca_mem(struct list_head *head, u64 key) +{ + struct dca_mem *mem; + + list_for_each_entry(mem, head, list) + if (mem->key == key) + return mem; + + return NULL; +} + +static bool add_dca_mem_enabled(struct hns_roce_dca_ctx *ctx, u32 alloc_size) +{ + unsigned long flags; + bool enable; + + spin_lock_irqsave(&ctx->pool_lock, flags); + + /* Pool size no limit */ + if (ctx->max_size == DCA_MAX_MEM_SIZE) + enable = true; + else /* Pool size not exceed max size */ + enable = (ctx->total_size + alloc_size) < ctx->max_size; + + spin_unlock_irqrestore(&ctx->pool_lock, flags); + + return enable; +} + +static bool shrink_dca_mem_enabled(struct hns_roce_dca_ctx *ctx) +{ + unsigned long flags; + bool enable;
- /* Assign pages from free pages */ - unit_pages = hr_qp->mtr.hem_cfg.is_direct ? buf_pages : 1; - alloc_pages = assign_dca_pages(ctx, buf_id, buf_pages, unit_pages); - if (buf_pages != alloc_pages) { - if (alloc_pages > 0) - clear_dca_pages(ctx, buf_id, alloc_pages); - return HNS_DCA_INVALID_BUF_ID; - } + spin_lock_irqsave(&ctx->pool_lock, flags); + enable = ctx->total_size > 0 && ctx->min_size < ctx->max_size; + spin_unlock_irqrestore(&ctx->pool_lock, flags);
- return buf_id; + return enable; }
-static int active_alloced_buf(struct hns_roce_qp *hr_qp, - struct hns_roce_dca_ctx *ctx, - struct hns_dca_attach_attr *attr, u32 buf_id) +static struct dca_mem *alloc_dca_mem(struct hns_roce_dca_ctx *ctx) { - struct hns_roce_dev *hr_dev = to_hr_dev(hr_qp->ibqp.device); - struct ib_device *ibdev = &hr_dev->ib_dev; - u32 active_pages, alloc_pages; - int ret; + struct dca_mem *mem, *tmp, *found = NULL; + unsigned long flags;
- ret = setup_dca_buf_to_hw(ctx, hr_qp, buf_id, attr); - if (ret) { - ibdev_err(ibdev, "failed to setup DCA buf, ret = %d.\n", ret); - goto active_fail; + spin_lock_irqsave(&ctx->pool_lock, flags); + list_for_each_entry_safe(mem, tmp, &ctx->pool, list) { + spin_lock(&mem->lock); + if (!mem->flags) { + found = mem; + mem->flags |= DCA_MEM_FLAGS_ALLOCED; + spin_unlock(&mem->lock); + break; + } + spin_unlock(&mem->lock); } + spin_unlock_irqrestore(&ctx->pool_lock, flags);
- alloc_pages = hr_qp->dca_cfg.npages; - active_pages = active_dca_pages(ctx, buf_id, alloc_pages); - if (active_pages != alloc_pages) { - ibdev_err(ibdev, "failed to active DCA pages, %u != %u.\n", - active_pages, alloc_pages); - ret = -ENOBUFS; - goto active_fail; - } + if (found) + return found;
- return 0; + mem = kzalloc(sizeof(*mem), GFP_ATOMIC); + if (!mem) + return NULL;
-active_fail: - clear_dca_pages(ctx, buf_id, alloc_pages); - return ret; + spin_lock_init(&mem->lock); + INIT_LIST_HEAD(&mem->list); + + mem->flags |= DCA_MEM_FLAGS_ALLOCED; + + spin_lock_irqsave(&ctx->pool_lock, flags); + list_add(&mem->list, &ctx->pool); + spin_unlock_irqrestore(&ctx->pool_lock, flags); + return mem; }
-static int attach_dca_mem(struct hns_roce_dev *hr_dev, - struct hns_roce_qp *hr_qp, - struct hns_dca_attach_attr *attr, - struct hns_dca_attach_resp *resp) +static void free_dca_mem(struct dca_mem *mem) { - struct hns_roce_dca_ctx *ctx = hr_qp_to_dca_ctx(hr_qp); - struct hns_roce_dca_cfg *cfg = &hr_qp->dca_cfg; - u32 buf_id; - int ret; + /* When iterate all DCA mems in travel_dca_pages(), we will NOT hold the + * pool's lock and just set the DCA mem as free state during the DCA is + * working until cleanup the DCA context in hns_roce_cleanup_dca(). + */ + spin_lock(&mem->lock); + mem->flags = 0; + spin_unlock(&mem->lock); +}
- /* Stop DCA mem ageing worker */ - cancel_delayed_work(&cfg->dwork); - resp->alloc_flags = 0; +static int add_dca_mem(struct hns_roce_dev *hr_dev, u32 new_size) +{ + struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(hr_dev, NULL); + struct dca_mem_attr attr = {}; + struct dca_mem *mem = NULL; + int ret;
- spin_lock(&cfg->lock); - buf_id = cfg->buf_id; - /* Already attached */ - if (buf_id != HNS_DCA_INVALID_BUF_ID) { - resp->alloc_pages = cfg->npages; - spin_unlock(&cfg->lock); - return 0; - } + if (!add_dca_mem_enabled(ctx, new_size)) + return -ENOMEM;
- /* Start to new attach */ - resp->alloc_pages = 0; - buf_id = alloc_buf_from_dca_mem(hr_qp, ctx); - if (buf_id == HNS_DCA_INVALID_BUF_ID) { - spin_unlock(&cfg->lock); - /* No report fail, need try again after the pool increased */ - return 0; - } + /* Add new DCA mem */ + mem = alloc_dca_mem(ctx); + if (!mem) + return -ENOMEM;
- ret = active_alloced_buf(hr_qp, ctx, attr, buf_id); + attr.key = (u64)mem; + attr.size = roundup(new_size, ctx->unit_size); + ret = register_dca_mem(hr_dev, NULL, mem, &attr); if (ret) { - spin_unlock(&cfg->lock); + free_dca_mem(mem); ibdev_err(&hr_dev->ib_dev, - "failed to active DCA buf for QP-%lu, ret = %d.\n", - hr_qp->qpn, ret); - return ret; + "failed to register DCA mem, ret = %d.\n", ret); }
- /* Attach ok */ - cfg->buf_id = buf_id; - cfg->attach_count++; - spin_unlock(&cfg->lock); - - resp->alloc_flags |= HNS_IB_ATTACH_FLAGS_NEW_BUFFER; - resp->alloc_pages = cfg->npages; - - return 0; + return ret; }
-struct dca_page_query_active_attr { +struct dca_page_get_active_buf_attr { u32 buf_id; - u32 curr_index; - u32 start_index; - u32 page_index; - u32 page_count; - u64 mem_key; + void **buf_list; + u32 total; + u32 max; };
-static int query_dca_active_pages_proc(struct dca_mem *mem, int index, - void *param) +static int get_active_kbuf_proc(struct dca_mem *mem, int index, void *param) { - struct hns_dca_page_state *state = &mem->states[index]; - struct dca_page_query_active_attr *attr = param; - - if (!dca_page_is_active(state, attr->buf_id)) - return 0; - - if (attr->curr_index < attr->start_index) { - attr->curr_index++; - return 0; - } else if (attr->curr_index > attr->start_index) { - return DCA_MEM_STOP_ITERATE; + struct dca_page_get_active_buf_attr *attr = param; + struct hns_dca_page_state *states = mem->states; + struct hns_roce_buf *kmem = mem->pages; + void *buf; + u32 i; + + for (i = 0; i < kmem->npages; i++) { + if (!dca_page_is_active(&states[i], attr->buf_id)) + continue; + + buf = hns_roce_buf_offset(kmem, i << HNS_HW_PAGE_SHIFT); + attr->buf_list[attr->total++] = buf; + if (attr->total >= attr->max) + return DCA_MEM_STOP_ITERATE; }
- /* Search first page in DCA mem */ - attr->page_index = index; - attr->mem_key = mem->key; - /* Search active pages in continuous addresses */ - while (index < mem->page_count) { - state = &mem->states[index]; - if (!dca_page_is_active(state, attr->buf_id)) - break; + return DCA_MEM_NEXT_ITERATE; +}
- index++; - attr->page_count++; - } +static int setup_dca_buf_list(struct hns_roce_dca_ctx *ctx, + struct hns_roce_dca_cfg *cfg) +{ + struct dca_page_get_active_buf_attr attr = {};
- return DCA_MEM_STOP_ITERATE; -} + attr.buf_id = cfg->buf_id; + attr.buf_list = cfg->buf_list; + attr.max = cfg->npages; + travel_dca_pages(ctx, &attr, get_active_kbuf_proc);
-struct dca_page_free_buf_attr { - u32 buf_id; - u32 max_pages; - u32 free_pages; - u32 clean_mems; -}; + return attr.total == attr.max ? 0 : -ENOMEM; +}
-static int free_buffer_pages_proc(struct dca_mem *mem, int index, void *param) +#define DCA_EXPAND_MEM_TRY_TIMES 3 +int hns_roce_dca_attach(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, + struct hns_dca_attach_attr *attr) { - struct dca_page_free_buf_attr *attr = param; - struct hns_dca_page_state *state; - bool changed = false; - bool stop = false; - int i, free_pages; + struct hns_roce_dca_cfg *cfg = &hr_qp->dca_cfg; + struct hns_dca_attach_resp resp = {}; + bool is_new_buf = true; + int try_times = 0; + int ret;
- free_pages = 0; - for (i = 0; !stop && i < mem->page_count; i++) { - state = &mem->states[i]; - /* Change matched pages state */ - if (dca_page_is_attached(state, attr->buf_id)) { - set_dca_page_to_free(state); - changed = true; - attr->free_pages++; - if (attr->free_pages == attr->max_pages) - stop = true; + do { + resp.alloc_flags = 0; + ret = attach_dca_mem(hr_dev, hr_qp, attr, &resp); + if (ret) + break; + + if (resp.alloc_pages >= cfg->npages) { + is_new_buf = !!(resp.alloc_flags & + HNS_DCA_ATTACH_FLAGS_NEW_BUFFER); + break; }
- if (dca_page_is_free(state)) - free_pages++; - } + ret = add_dca_mem(hr_dev, hr_qp->buff_size); + if (ret) + break; + } while (try_times++ < DCA_EXPAND_MEM_TRY_TIMES);
- for (; changed && i < mem->page_count; i++) - if (dca_page_is_free(state)) - free_pages++; + if (ret || resp.alloc_pages < cfg->npages) { + ibdev_err(&hr_dev->ib_dev, + "failed to attach buf %u != %u, try %d, ret = %d.\n", + cfg->npages, resp.alloc_pages, try_times, ret); + return -ENOMEM; + }
- if (changed && free_pages == mem->page_count) - attr->clean_mems++; + /* DCA config not changed */ + if (!is_new_buf && cfg->buf_list[0]) + return 0;
- return stop ? DCA_MEM_STOP_ITERATE : DCA_MEM_NEXT_ITERATE; + return setup_dca_buf_list(hr_qp_to_dca_ctx(hr_dev, hr_qp), cfg); }
-static void free_buf_from_dca_mem(struct hns_roce_dca_ctx *ctx, - struct hns_roce_dca_cfg *cfg) +static void remove_unused_dca_mem(struct hns_roce_dev *hr_dev) { - struct dca_page_free_buf_attr attr = {}; + struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(hr_dev, NULL); + struct hns_dca_shrink_resp resp = {}; + struct dca_mem *mem; unsigned long flags; - u32 buf_id; - - spin_lock(&cfg->lock); - buf_id = cfg->buf_id; - cfg->buf_id = HNS_DCA_INVALID_BUF_ID; - spin_unlock(&cfg->lock); - if (buf_id == HNS_DCA_INVALID_BUF_ID) - return;
- attr.buf_id = buf_id; - attr.max_pages = cfg->npages; - travel_dca_pages(ctx, &attr, free_buffer_pages_proc); - - /* Update free size */ - spin_lock_irqsave(&ctx->pool_lock, flags); - ctx->free_mems += attr.clean_mems; - ctx->free_size += attr.free_pages << HNS_HW_PAGE_SHIFT; - spin_unlock_irqrestore(&ctx->pool_lock, flags); + while (shrink_dca_mem_enabled(ctx)) { + resp.free_mems = 0; + shrink_dca_mem(hr_dev, NULL, ctx->min_size, &resp); + if (resp.free_mems < 1) + break; + spin_lock_irqsave(&ctx->pool_lock, flags); + mem = key_to_dca_mem(&ctx->pool, resp.free_key); + spin_unlock_irqrestore(&ctx->pool_lock, flags); + if (!mem) + break; + unregister_dca_mem(hr_dev, NULL, mem); + free_dca_mem(mem); + /* No more free memory */ + if (resp.free_mems <= 1) + break; + } }
static void kick_dca_mem(struct hns_roce_dev *hr_dev, struct hns_roce_dca_cfg *cfg, struct hns_roce_ucontext *uctx) { - struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(uctx); + struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(hr_dev, uctx);
/* Stop ageing worker and free DCA buffer from pool */ cancel_delayed_work_sync(&cfg->dwork); free_buf_from_dca_mem(ctx, cfg); + + /* Shrink kenrel DCA mem */ + if (!uctx) + remove_unused_dca_mem(hr_dev); }
static void dca_mem_ageing_work(struct work_struct *work) @@ -892,41 +1254,36 @@ static void dca_mem_ageing_work(struct work_struct *work) struct hns_roce_qp *hr_qp = container_of(work, struct hns_roce_qp, dca_cfg.dwork.work); struct hns_roce_dev *hr_dev = to_hr_dev(hr_qp->ibqp.device); - struct hns_roce_dca_ctx *ctx = hr_qp_to_dca_ctx(hr_qp); + struct hns_roce_dca_ctx *ctx = hr_qp_to_dca_ctx(hr_dev, hr_qp); bool hw_is_inactive;
hw_is_inactive = hr_dev->hw->chk_dca_buf_inactive && hr_dev->hw->chk_dca_buf_inactive(hr_dev, hr_qp); if (hw_is_inactive) free_buf_from_dca_mem(ctx, &hr_qp->dca_cfg); + + /* Shrink kenrel DCA mem */ + if (!hr_qp->ibqp.uobject) + remove_unused_dca_mem(hr_dev); }
-void hns_roce_dca_kick(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) +void hns_roce_dca_detach(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, + struct hns_dca_detach_attr *attr) { - struct hns_roce_ucontext *uctx; - - if (hr_qp->ibqp.uobject && hr_qp->ibqp.pd->uobject) { - uctx = to_hr_ucontext(hr_qp->ibqp.pd->uobject->context); - kick_dca_mem(hr_dev, &hr_qp->dca_cfg, uctx); - } + detach_dca_mem(hr_dev, hr_qp, attr); }
-static void detach_dca_mem(struct hns_roce_dev *hr_dev, - struct hns_roce_qp *hr_qp, - struct hns_dca_detach_attr *attr) +void hns_roce_dca_kick(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, + struct ib_udata *udata) { - struct hns_roce_dca_cfg *cfg = &hr_qp->dca_cfg; + struct hns_roce_ucontext *uctx = rdma_udata_to_drv_context(udata, + struct hns_roce_ucontext, ibucontext);
- /* Start an ageing worker to free buffer */ - cancel_delayed_work(&cfg->dwork); - spin_lock(&cfg->lock); - cfg->sq_idx = attr->sq_idx; - queue_delayed_work(hr_dev->irq_workq, &cfg->dwork, - msecs_to_jiffies(DCA_MEM_AGEING_MSES)); - spin_unlock(&cfg->lock); + kick_dca_mem(hr_dev, &hr_qp->dca_cfg, uctx); }
-void hns_roce_enable_dca(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) +int hns_roce_enable_dca(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, + struct ib_udata *udata) { struct hns_roce_dca_cfg *cfg = &hr_qp->dca_cfg;
@@ -934,6 +1291,16 @@ void hns_roce_enable_dca(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) INIT_DELAYED_WORK(&cfg->dwork, dca_mem_ageing_work); cfg->buf_id = HNS_DCA_INVALID_BUF_ID; cfg->npages = hr_qp->buff_size >> HNS_HW_PAGE_SHIFT; + + /* DCA page list for kernel QP */ + if (!udata && cfg->npages) { + cfg->buf_list = kcalloc(cfg->npages, sizeof(void *), + GFP_KERNEL); + if (!cfg->buf_list) + return -ENOMEM; + } + + return 0; }
void hns_roce_disable_dca(struct hns_roce_dev *hr_dev, @@ -944,7 +1311,12 @@ void hns_roce_disable_dca(struct hns_roce_dev *hr_dev, struct hns_roce_dca_cfg *cfg = &hr_qp->dca_cfg;
kick_dca_mem(hr_dev, cfg, uctx); - cfg->buf_id = HNS_DCA_INVALID_BUF_ID; + + /* Free kenrel DCA buffer list */ + if (!udata && cfg->buf_list) { + kfree(cfg->buf_list); + cfg->buf_list = NULL; + } }
static inline struct hns_roce_ucontext * @@ -981,7 +1353,7 @@ static int UVERBS_HANDLER(HNS_IB_METHOD_DCA_MEM_REG)(
init_attr.size = hr_hw_page_align(init_attr.size);
- mem = alloc_dca_mem(to_hr_dca_ctx(uctx)); + mem = alloc_dca_mem(to_hr_dca_ctx(hr_dev, uctx)); if (!mem) return -ENOMEM;
@@ -1010,7 +1382,7 @@ static int dca_cleanup(struct ib_uobject *uobject, enum rdma_remove_reason why, return 0;
mem = uobject->object; - unregister_dca_mem(uctx, mem); + unregister_dca_mem(to_hr_dev(uctx->ibucontext.device), uctx, mem); free_dca_mem(mem);
return 0; @@ -1073,27 +1445,27 @@ DECLARE_UVERBS_NAMED_METHOD( UVERBS_ATTR_TYPE(u32), UA_MANDATORY));
static inline struct hns_roce_qp * -uverbs_attr_to_hr_qp(struct uverbs_attr_bundle *attrs) +uverbs_attr_to_hr_qp(struct uverbs_attr_bundle *attrs, u16 idx) { - struct ib_uobject *uobj = - uverbs_attr_get_uobject(attrs, 1U << UVERBS_ID_NS_SHIFT); + struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, idx);
- if (uobj_get_object_id(uobj) == UVERBS_OBJECT_QP) - return to_hr_qp(uobj->object); + if (IS_ERR(uobj)) + return ERR_CAST(uobj);
- return NULL; + return to_hr_qp(uobj->object); }
static int UVERBS_HANDLER(HNS_IB_METHOD_DCA_MEM_ATTACH)( struct uverbs_attr_bundle *attrs) { - struct hns_roce_qp *hr_qp = uverbs_attr_to_hr_qp(attrs); + struct hns_roce_qp *hr_qp = uverbs_attr_to_hr_qp(attrs, + HNS_IB_ATTR_DCA_MEM_ATTACH_HANDLE); struct hns_dca_attach_attr attr = {}; struct hns_dca_attach_resp resp = {}; int ret;
- if (!hr_qp) - return -EINVAL; + if (IS_ERR(hr_qp)) + return PTR_ERR(hr_qp);
ret = uverbs_copy_from(&attr.sq_offset, attrs, HNS_IB_ATTR_DCA_MEM_ATTACH_SQ_OFFSET); @@ -1140,12 +1512,13 @@ DECLARE_UVERBS_NAMED_METHOD( static int UVERBS_HANDLER(HNS_IB_METHOD_DCA_MEM_DETACH)( struct uverbs_attr_bundle *attrs) { - struct hns_roce_qp *hr_qp = uverbs_attr_to_hr_qp(attrs); + struct hns_roce_qp *hr_qp = uverbs_attr_to_hr_qp(attrs, + HNS_IB_ATTR_DCA_MEM_DETACH_HANDLE); struct hns_dca_detach_attr attr = {}; int ret;
- if (!hr_qp) - return -EINVAL; + if (IS_ERR(hr_qp)) + return PTR_ERR(hr_qp);
ret = uverbs_copy_from(&attr.sq_idx, attrs, HNS_IB_ATTR_DCA_MEM_DETACH_SQ_INDEX); @@ -1167,14 +1540,21 @@ DECLARE_UVERBS_NAMED_METHOD( static int UVERBS_HANDLER(HNS_IB_METHOD_DCA_MEM_QUERY)( struct uverbs_attr_bundle *attrs) { - struct hns_roce_qp *hr_qp = uverbs_attr_to_hr_qp(attrs); - struct hns_roce_dca_ctx *ctx = hr_qp_to_dca_ctx(hr_qp); + struct hns_roce_qp *hr_qp = uverbs_attr_to_hr_qp(attrs, + HNS_IB_ATTR_DCA_MEM_QUERY_HANDLE); struct dca_page_query_active_attr active_attr = {}; + struct hns_roce_dca_ctx *ctx = NULL; + struct hns_roce_dev *hr_dev = NULL; u32 page_idx, page_ofs; int ret;
- if (!hr_qp) - return -EINVAL; + if (IS_ERR(hr_qp)) + return PTR_ERR(hr_qp); + + hr_dev = to_hr_dev(hr_qp->ibqp.device); + ctx = hr_qp_to_dca_ctx(hr_dev, hr_qp); + if (!ctx) + return -ENOENT;
ret = uverbs_copy_from(&page_idx, attrs, HNS_IB_ATTR_DCA_MEM_QUERY_PAGE_INDEX); @@ -1239,3 +1619,7 @@ const struct uapi_definition hns_roce_dca_uapi_defs[] = { UAPI_DEF_IS_OBJ_SUPPORTED(dca_is_supported)), {} }; + +module_param(dca_unit_size, uint, 0444); +module_param(dca_max_size, ulong, 0444); +module_param(dca_min_size, ulong, 0444); diff --git a/drivers/infiniband/hw/hns/hns_roce_dca.h b/drivers/infiniband/hw/hns/hns_roce_dca.h index fdc3aaa4b..f37810277 100644 --- a/drivers/infiniband/hw/hns/hns_roce_dca.h +++ b/drivers/infiniband/hw/hns/hns_roce_dca.h @@ -18,11 +18,6 @@ struct hns_dca_page_state {
extern const struct uapi_definition hns_roce_dca_uapi_defs[];
-struct hns_dca_shrink_resp { - u64 free_key; /* free buffer's key which registered by the user */ - u32 free_mems; /* free buffer count which no any QP be using */ -}; - #define HNS_DCA_INVALID_BUF_ID 0UL
/* @@ -46,6 +41,7 @@ struct hns_dca_attach_attr { };
struct hns_dca_attach_resp { +#define HNS_DCA_ATTACH_FLAGS_NEW_BUFFER BIT(0) u32 alloc_flags; u32 alloc_pages; }; @@ -54,14 +50,27 @@ struct hns_dca_detach_attr { u32 sq_idx; };
+typedef int (*hns_dca_enum_callback)(struct hns_dca_page_state *, u32, void *); + +void hns_roce_init_dca(struct hns_roce_dev *hr_dev); +void hns_roce_cleanup_dca(struct hns_roce_dev *hr_dev); + void hns_roce_register_udca(struct hns_roce_dev *hr_dev, struct hns_roce_ucontext *uctx); void hns_roce_unregister_udca(struct hns_roce_dev *hr_dev, struct hns_roce_ucontext *uctx);
-void hns_roce_enable_dca(struct hns_roce_dev *hr_dev, - struct hns_roce_qp *hr_qp); +int hns_roce_enable_dca(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, + struct ib_udata *udata); void hns_roce_disable_dca(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct ib_udata *udata); -void hns_roce_dca_kick(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp); + +int hns_roce_dca_attach(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, + struct hns_dca_attach_attr *attr); +void hns_roce_dca_detach(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, + struct hns_dca_detach_attr *attr); + +void hns_roce_dca_kick(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, + struct ib_udata *udata); + #endif diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index a76d6d153..584698eb0 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -134,6 +134,15 @@ enum hns_roce_event { HNS_ROCE_EVENT_TYPE_INVALID_XRCETH = 0x17, };
+/* Private QP creation flags to be passed in ib_qp_init_attr.create_flags. + * + * These flags are intended for internal use by the hns driver, and they + * rely on the range reserved for that use in the ib_qp_create_flags enum. + */ +enum hns_roce_qp_create_flags { + HNS_ROCE_QP_CREATE_DCA_EN = IB_QP_CREATE_RESERVED_START, +}; + enum { HNS_ROCE_CAP_FLAG_REREG_MR = BIT(0), HNS_ROCE_CAP_FLAG_ROCE_V1_V2 = BIT(1), @@ -214,6 +223,9 @@ struct hns_roce_dca_ctx { unsigned int free_mems; /* free mem num in pool */ size_t free_size; /* free mem size in pool */ size_t total_size; /* total size in pool */ + size_t max_size; /* max size the pool can expand to */ + size_t min_size; /* shrink if @free_size > @min_size */ + unsigned int unit_size; /* unit size per DCA mem */ };
struct hns_roce_ucontext { @@ -324,20 +336,15 @@ struct hns_roce_mtr { struct hns_roce_hem_cfg hem_cfg; /* config for hardware addressing */ };
+/* DCA config */ struct hns_roce_dca_cfg { - spinlock_t lock; - u32 buf_id; - u16 attach_count; - u32 npages; - u32 sq_idx; - struct delayed_work dwork; -}; - -/* DCA attr for setting WQE buffer */ -struct hns_roce_dca_attr { - u32 sq_offset; - u32 sge_offset; - u32 rq_offset; + spinlock_t lock; + u32 buf_id; + u16 attach_count; + void **buf_list; + u32 npages; + u32 sq_idx; + struct delayed_work dwork; };
struct hns_roce_mw { @@ -377,6 +384,7 @@ struct hns_roce_wq { u32 max_gs; u32 rsv_sge; u32 offset; + int wqe_offset; u32 wqe_shift; /* WQE size */ u32 head; u32 tail; @@ -388,6 +396,7 @@ struct hns_roce_sge { unsigned int sge_cnt; /* SGE num */ u32 offset; u32 sge_shift; /* SGE size */ + int wqe_offset; };
struct hns_roce_buf_list { @@ -980,8 +989,7 @@ struct hns_roce_hw { struct hns_roce_hem_table *table, int obj, u32 step_idx); int (*set_dca_buf)(struct hns_roce_dev *hr_dev, - struct hns_roce_qp *hr_qp, - struct hns_roce_dca_attr *attr); + struct hns_roce_qp *hr_qp); bool (*chk_dca_buf_inactive)(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp); int (*modify_qp)(struct ib_qp *ibqp, const struct ib_qp_attr *attr, @@ -1027,6 +1035,10 @@ struct hns_roce_dev { struct ib_device ib_dev; struct pci_dev *pci_dev; struct device *dev; + + struct list_head uctx_list; /* list of all uctx on this dev */ + spinlock_t uctx_list_lock; /* protect @uctx_list */ + struct hns_roce_uar priv_uar; const char *irq_names[HNS_ROCE_MAX_IRQ_NUM]; spinlock_t sm_lock; @@ -1049,6 +1061,8 @@ struct hns_roce_dev { struct hns_roce_caps caps; struct xarray qp_table_xa;
+ struct hns_roce_dca_ctx dca_ctx; + unsigned char dev_addr[HNS_ROCE_MAX_PORTS][ETH_ALEN]; u64 sys_image_guid; u32 vendor_id; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 5cdb19d81..44eba2b0f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -376,10 +376,63 @@ static inline bool check_qp_dca_enable(struct hns_roce_qp *hr_qp) return !!(hr_qp->en_flags & HNS_ROCE_QP_CAP_DCA); }
+static int dca_attach_qp_buf(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp) +{ + struct hns_dca_attach_attr attr = {}; + unsigned long flags_sq, flags_rq; + u32 idx; + + spin_lock_irqsave(&hr_qp->sq.lock, flags_sq); + spin_lock_irqsave(&hr_qp->rq.lock, flags_rq); + + if (hr_qp->sq.wqe_cnt > 0) { + idx = hr_qp->sq.head & (hr_qp->sq.wqe_cnt - 1); + attr.sq_offset = idx << hr_qp->sq.wqe_shift; + } + + if (hr_qp->sge.sge_cnt > 0) { + idx = hr_qp->next_sge & (hr_qp->sge.sge_cnt - 1); + attr.sge_offset = idx << hr_qp->sge.sge_shift; + } + + if (hr_qp->rq.wqe_cnt > 0) { + idx = hr_qp->rq.head & (hr_qp->rq.wqe_cnt - 1); + attr.rq_offset = idx << hr_qp->rq.wqe_shift; + } + + spin_unlock_irqrestore(&hr_qp->rq.lock, flags_rq); + spin_unlock_irqrestore(&hr_qp->sq.lock, flags_sq); + + return hns_roce_dca_attach(hr_dev, hr_qp, &attr); +} + +static void dca_detach_qp_buf(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp) +{ + struct hns_dca_detach_attr attr = {}; + unsigned long flags_sq, flags_rq; + bool is_empty; + + spin_lock_irqsave(&hr_qp->sq.lock, flags_sq); + spin_lock_irqsave(&hr_qp->rq.lock, flags_rq); + is_empty = hr_qp->sq.head == hr_qp->sq.tail && + hr_qp->rq.head == hr_qp->rq.tail; + if (is_empty && hr_qp->sq.wqe_cnt > 0) + attr.sq_idx = hr_qp->sq.head & (hr_qp->sq.wqe_cnt - 1); + + spin_unlock_irqrestore(&hr_qp->rq.lock, flags_rq); + spin_unlock_irqrestore(&hr_qp->sq.lock, flags_sq); + + if (is_empty) + hns_roce_dca_detach(hr_dev, hr_qp, &attr); +} + static int check_send_valid(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { struct ib_device *ibdev = &hr_dev->ib_dev; + int ret;
if (unlikely(hr_qp->state == IB_QPS_RESET || hr_qp->state == IB_QPS_INIT || @@ -393,6 +446,16 @@ static int check_send_valid(struct hns_roce_dev *hr_dev, return -EIO; }
+ if (check_qp_dca_enable(hr_qp)) { + ret = dca_attach_qp_buf(hr_dev, hr_qp); + if (unlikely(ret)) { + ibdev_err(ibdev, + "failed to attach DCA for QP-%ld send!\n", + hr_qp->qpn); + return ret; + } + } + return 0; }
@@ -572,6 +635,14 @@ static int set_rc_opcode(struct hns_roce_dev *hr_dev, return ret; }
+static inline void fill_dca_fields(struct hns_roce_qp *hr_qp, + struct hns_roce_v2_rc_send_wqe *wqe) +{ + hr_reg_write(wqe, RC_SEND_WQE_SQPN_L, hr_qp->qpn); + hr_reg_write(wqe, RC_SEND_WQE_SQPN_H, + hr_qp->qpn >> V2_RC_SEND_WQE_BYTE_4_SQPN_L_W); +} + static inline int set_rc_wqe(struct hns_roce_qp *qp, const struct ib_send_wr *wr, void *wqe, unsigned int *sge_idx, @@ -608,6 +679,9 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp, ret = set_rwqe_data_seg(&qp->ibqp, wr, rc_sq_wqe, &curr_idx, valid_num_sge);
+ if (qp->en_flags & HNS_ROCE_QP_CAP_DCA) + fill_dca_fields(qp, rc_sq_wqe); + /* * The pipeline can sequentially post all valid WQEs into WQ buffer, * including new WQEs waiting for the doorbell to update the PI again. @@ -692,12 +766,26 @@ static void write_dwqe(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp, hns_roce_write512(hr_dev, wqe, qp->sq.db_reg); }
+static int check_sq_enabled(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp, + const struct ib_send_wr *wr, int nreq) +{ + if (hns_roce_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) + return -ENOMEM; + + if (unlikely(wr->num_sge > qp->sq.max_gs)) { + ibdev_err(&hr_dev->ib_dev, "num_sge=%d > qp->sq.max_gs=%u\n", + wr->num_sge, qp->sq.max_gs); + return -EINVAL; + } + + return 0; +} + static int hns_roce_v2_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, const struct ib_send_wr **bad_wr) { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); - struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_qp *qp = to_hr_qp(ibqp); unsigned long flags = 0; unsigned int owner_bit; @@ -707,34 +795,25 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, u32 nreq; int ret;
- spin_lock_irqsave(&qp->sq.lock, flags);
ret = check_send_valid(hr_dev, qp); if (unlikely(ret)) { *bad_wr = wr; - nreq = 0; - goto out; + return ret; }
+ spin_lock_irqsave(&qp->sq.lock, flags); sge_idx = qp->next_sge;
for (nreq = 0; wr; ++nreq, wr = wr->next) { - if (hns_roce_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) { - ret = -ENOMEM; + ret = check_sq_enabled(hr_dev, qp, wr, nreq); + if (unlikely(ret)) { *bad_wr = wr; goto out; }
wqe_idx = (qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1);
- if (unlikely(wr->num_sge > qp->sq.max_gs)) { - ibdev_err(ibdev, "num_sge = %d > qp->sq.max_gs = %u.\n", - wr->num_sge, qp->sq.max_gs); - ret = -EINVAL; - *bad_wr = wr; - goto out; - } - wqe = hns_roce_get_send_wqe(qp, wqe_idx); qp->sq.wrid[wqe_idx] = wr->wr_id; owner_bit = @@ -772,12 +851,23 @@ out: static int check_recv_valid(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { + int ret; if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN)) return -EIO;
if (hr_qp->state == IB_QPS_RESET) return -EINVAL;
+ if (check_qp_dca_enable(hr_qp)) { + ret = dca_attach_qp_buf(hr_dev, hr_qp); + if (unlikely(ret)) { + ibdev_err(&hr_dev->ib_dev, + "failed to attach DCA for QP-%lu recv!\n", + hr_qp->qpn); + return ret; + } + } + return 0; }
@@ -828,15 +918,15 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, unsigned long flags; int ret;
- spin_lock_irqsave(&hr_qp->rq.lock, flags);
ret = check_recv_valid(hr_dev, hr_qp); if (unlikely(ret)) { *bad_wr = wr; - nreq = 0; - goto out; + return ret; }
+ spin_lock_irqsave(&hr_qp->rq.lock, flags); + max_sge = hr_qp->rq.max_gs - hr_qp->rq.rsv_sge; for (nreq = 0; wr; ++nreq, wr = wr->next) { if (unlikely(hns_roce_wq_overflow(&hr_qp->rq, nreq, @@ -4083,6 +4173,7 @@ static int hns_roce_v2_poll_cq(struct ib_cq *ibcq, int num_entries, struct hns_roce_qp *cur_qp = NULL; unsigned long flags; int npolled; + int ret;
spin_lock_irqsave(&hr_cq->lock, flags);
@@ -4099,7 +4190,10 @@ static int hns_roce_v2_poll_cq(struct ib_cq *ibcq, int num_entries, }
for (npolled = 0; npolled < num_entries; ++npolled) { - if (hns_roce_v2_poll_one(hr_cq, &cur_qp, wc + npolled)) + ret = hns_roce_v2_poll_one(hr_cq, &cur_qp, wc + npolled); + if (cur_qp && check_qp_dca_enable(cur_qp)) + dca_detach_qp_buf(hr_dev, cur_qp); + if (ret) break; }
@@ -4463,15 +4557,14 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp, static int config_qp_rq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct hns_roce_v2_qp_context *context, - struct hns_roce_v2_qp_context *qpc_mask, - struct hns_roce_dca_attr *dca_attr) + struct hns_roce_v2_qp_context *qpc_mask) { u64 mtts[MTT_MIN_COUNT] = { 0 }; u64 wqe_sge_ba; int ret;
/* Search qp buf's mtts */ - ret = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, dca_attr->rq_offset, mtts, + ret = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, hr_qp->rq.wqe_offset, mtts, ARRAY_SIZE(mtts)); if (hr_qp->rq.wqe_cnt && ret) { ibdev_err(&hr_dev->ib_dev, @@ -4541,8 +4634,7 @@ static int config_qp_rq_buf(struct hns_roce_dev *hr_dev, static int config_qp_sq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct hns_roce_v2_qp_context *context, - struct hns_roce_v2_qp_context *qpc_mask, - struct hns_roce_dca_attr *dca_attr) + struct hns_roce_v2_qp_context *qpc_mask) { struct ib_device *ibdev = &hr_dev->ib_dev; u64 sge_cur_blk = 0; @@ -4550,7 +4642,7 @@ static int config_qp_sq_buf(struct hns_roce_dev *hr_dev, int ret;
/* search qp buf's mtts */ - ret = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, dca_attr->sq_offset, + ret = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, hr_qp->sq.wqe_offset, &sq_cur_blk, 1); if (ret) { ibdev_err(ibdev, "failed to find QP(0x%lx) SQ WQE buf, ret = %d.\n", @@ -4559,7 +4651,7 @@ static int config_qp_sq_buf(struct hns_roce_dev *hr_dev, } if (hr_qp->sge.sge_cnt > 0) { ret = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, - dca_attr->sge_offset, &sge_cur_blk, 1); + hr_qp->sge.wqe_offset, &sge_cur_blk, 1); if (ret) { ibdev_err(ibdev, "failed to find QP(0x%lx) SGE buf, ret = %d.\n", hr_qp->qpn, ret); @@ -4617,7 +4709,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); struct ib_device *ibdev = &hr_dev->ib_dev; - struct hns_roce_dca_attr dca_attr = {}; dma_addr_t trrl_ba; dma_addr_t irrl_ba; enum ib_mtu ib_mtu; @@ -4629,8 +4720,8 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, int mtu; int ret;
- dca_attr.rq_offset = hr_qp->rq.offset; - ret = config_qp_rq_buf(hr_dev, hr_qp, context, qpc_mask, &dca_attr); + hr_qp->rq.wqe_offset = hr_qp->rq.offset; + ret = config_qp_rq_buf(hr_dev, hr_qp, context, qpc_mask); if (ret) { ibdev_err(ibdev, "failed to config rq buf, ret = %d.\n", ret); return ret; @@ -4774,7 +4865,6 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); struct ib_device *ibdev = &hr_dev->ib_dev; - struct hns_roce_dca_attr dca_attr = {}; int ret;
/* Not support alternate path and path migration */ @@ -4783,9 +4873,9 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, return -EINVAL; }
- dca_attr.sq_offset = hr_qp->sq.offset; - dca_attr.sge_offset = hr_qp->sge.offset; - ret = config_qp_sq_buf(hr_dev, hr_qp, context, qpc_mask, &dca_attr); + hr_qp->sq.wqe_offset = hr_qp->sq.offset; + hr_qp->sge.wqe_offset = hr_qp->sge.offset; + ret = config_qp_sq_buf(hr_dev, hr_qp, context, qpc_mask); if (ret) { ibdev_err(ibdev, "failed to config sq buf, ret = %d.\n", ret); return ret; @@ -5442,83 +5532,38 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
if (check_qp_dca_enable(hr_qp) && (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR)) - hns_roce_dca_kick(hr_dev, hr_qp); + hns_roce_dca_kick(hr_dev, hr_qp, udata);
out: return ret; }
-static int init_dca_buf_attr(struct hns_roce_dev *hr_dev, - struct hns_roce_qp *hr_qp, - struct hns_roce_dca_attr *init_attr, - struct hns_roce_dca_attr *dca_attr) -{ - struct ib_device *ibdev = &hr_dev->ib_dev; - - if (hr_qp->sq.wqe_cnt > 0) { - dca_attr->sq_offset = hr_qp->sq.offset + init_attr->sq_offset; - if (dca_attr->sq_offset >= hr_qp->sge.offset) { - ibdev_err(ibdev, "failed to check SQ offset = %u\n", - init_attr->sq_offset); - return -EINVAL; - } - } - - if (hr_qp->sge.sge_cnt > 0) { - dca_attr->sge_offset = hr_qp->sge.offset + init_attr->sge_offset; - if (dca_attr->sge_offset >= hr_qp->rq.offset) { - ibdev_err(ibdev, "failed to check exSGE offset = %u\n", - init_attr->sge_offset); - return -EINVAL; - } - } - - if (hr_qp->rq.wqe_cnt > 0) { - dca_attr->rq_offset = hr_qp->rq.offset + init_attr->rq_offset; - if (dca_attr->rq_offset >= hr_qp->buff_size) { - ibdev_err(ibdev, "failed to check RQ offset = %u\n", - init_attr->rq_offset); - return -EINVAL; - } - } - - return 0; -} - static int hns_roce_v2_set_dca_buf(struct hns_roce_dev *hr_dev, - struct hns_roce_qp *hr_qp, - struct hns_roce_dca_attr *init_attr) + struct hns_roce_qp *hr_qp) { struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_v2_qp_context *qpc, *msk; - struct hns_roce_dca_attr dca_attr = {}; struct hns_roce_mbox_msg mbox_msg = {}; dma_addr_t dma_handle; int qpc_sz; int ret;
- ret = init_dca_buf_attr(hr_dev, hr_qp, init_attr, &dca_attr); - if (ret) { - ibdev_err(ibdev, "failed to init DCA attr, ret = %d.\n", ret); - return ret; - } - qpc_sz = hr_dev->caps.qpc_sz; WARN_ON(2 * qpc_sz > HNS_ROCE_MAILBOX_SIZE); - qpc = dma_pool_alloc(hr_dev->cmd.pool, GFP_NOWAIT, &dma_handle); + qpc = dma_pool_alloc(hr_dev->cmd.pool, GFP_ATOMIC, &dma_handle); if (!qpc) return -ENOMEM;
msk = (struct hns_roce_v2_qp_context *)((void *)qpc + qpc_sz); memset(msk, 0xff, qpc_sz);
- ret = config_qp_rq_buf(hr_dev, hr_qp, qpc, msk, &dca_attr); + ret = config_qp_rq_buf(hr_dev, hr_qp, qpc, msk); if (ret) { ibdev_err(ibdev, "failed to config rq qpc, ret = %d.\n", ret); goto done; }
- ret = config_qp_sq_buf(hr_dev, hr_qp, qpc, msk, &dca_attr); + ret = config_qp_sq_buf(hr_dev, hr_qp, qpc, msk); if (ret) { ibdev_err(ibdev, "failed to config sq qpc, ret = %d.\n", ret); goto done; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 6ab49dd18..5adb3c1cf 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -905,6 +905,8 @@ struct hns_roce_v2_rc_send_wqe { #define RC_SEND_WQE_OPCODE RC_SEND_WQE_FIELD_LOC(4, 0) #define RC_SEND_WQE_DB_SL_L RC_SEND_WQE_FIELD_LOC(6, 5) #define RC_SEND_WQE_DB_SL_H RC_SEND_WQE_FIELD_LOC(14, 13) +#define RC_SEND_WQE_SQPN_L RC_SEND_WQE_FIELD_LOC(6, 5) +#define RC_SEND_WQE_SQPN_H RC_SEND_WQE_FIELD_LOC(30, 13) #define RC_SEND_WQE_OWNER RC_SEND_WQE_FIELD_LOC(7, 7) #define RC_SEND_WQE_CQE RC_SEND_WQE_FIELD_LOC(8, 8) #define RC_SEND_WQE_FENCE RC_SEND_WQE_FIELD_LOC(9, 9) @@ -917,6 +919,8 @@ struct hns_roce_v2_rc_send_wqe { #define RC_SEND_WQE_MSG_START_SGE_IDX RC_SEND_WQE_FIELD_LOC(151, 128) #define RC_SEND_WQE_INL_TYPE RC_SEND_WQE_FIELD_LOC(159, 159)
+#define V2_RC_SEND_WQE_BYTE_4_SQPN_L_W 2 + struct hns_roce_wqe_frmr_seg { __le32 pbl_size; __le32 byte_40; diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index b5ec62ed3..05f91647c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -1143,6 +1143,14 @@ err_unmap_dmpt: return ret; }
+static void hns_roce_teardown_hca(struct hns_roce_dev *hr_dev) +{ + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_DCA_MODE) + hns_roce_cleanup_dca(hr_dev); + + hns_roce_cleanup_bitmap(hr_dev); +} + /** * hns_roce_setup_hca - setup host channel adapter * @hr_dev: pointer to hns roce device @@ -1155,6 +1163,14 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev)
spin_lock_init(&hr_dev->sm_lock);
+ INIT_LIST_HEAD(&hr_dev->qp_list); + spin_lock_init(&hr_dev->qp_list_lock); + INIT_LIST_HEAD(&hr_dev->dip_list); + spin_lock_init(&hr_dev->dip_list_lock); + + INIT_LIST_HEAD(&hr_dev->uctx_list); + spin_lock_init(&hr_dev->uctx_list_lock); + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB || hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) { INIT_LIST_HEAD(&hr_dev->pgdir_list); @@ -1187,6 +1203,9 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev) if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) hns_roce_init_srq_table(hr_dev);
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_DCA_MODE) + hns_roce_init_dca(hr_dev); + return 0;
err_uar_table_free: @@ -1211,7 +1230,7 @@ static void check_and_get_armed_cq(struct list_head *cq_list, struct ib_cq *cq)
void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev) { - struct hns_roce_qp *hr_qp; + struct hns_roce_qp *hr_qp, *hr_qp_next; struct hns_roce_cq *hr_cq; struct list_head cq_list; unsigned long flags_qp; @@ -1220,7 +1239,7 @@ void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev) INIT_LIST_HEAD(&cq_list);
spin_lock_irqsave(&hr_dev->qp_list_lock, flags); - list_for_each_entry(hr_qp, &hr_dev->qp_list, node) { + list_for_each_entry_safe(hr_qp, hr_qp_next, &hr_dev->qp_list, node) { spin_lock_irqsave(&hr_qp->sq.lock, flags_qp); if (hr_qp->sq.tail != hr_qp->sq.head) check_and_get_armed_cq(&cq_list, hr_qp->ibqp.send_cq); @@ -1318,11 +1337,6 @@ int hns_roce_init(struct hns_roce_dev *hr_dev) } }
- INIT_LIST_HEAD(&hr_dev->qp_list); - spin_lock_init(&hr_dev->qp_list_lock); - INIT_LIST_HEAD(&hr_dev->dip_list); - spin_lock_init(&hr_dev->dip_list_lock); - ret = hns_roce_register_device(hr_dev); if (ret) goto error_failed_register_device; @@ -1337,7 +1351,7 @@ error_failed_register_device: hr_dev->hw->hw_exit(hr_dev);
error_failed_engine_init: - hns_roce_cleanup_bitmap(hr_dev); + hns_roce_teardown_hca(hr_dev);
error_failed_setup_hca: hns_roce_cleanup_hem(hr_dev); @@ -1368,7 +1382,7 @@ void hns_roce_exit(struct hns_roce_dev *hr_dev, bool bond_cleanup)
if (hr_dev->hw->hw_exit) hr_dev->hw->hw_exit(hr_dev); - hns_roce_cleanup_bitmap(hr_dev); + hns_roce_teardown_hca(hr_dev); hns_roce_cleanup_hem(hr_dev);
if (hr_dev->cmd_mod) diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 9e05b57a2..15382fb89 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -669,16 +669,12 @@ static inline int mtr_check_direct_pages(dma_addr_t *pages, int page_count, static void mtr_free_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr) { /* release user buffers */ - if (mtr->umem) { - ib_umem_release(mtr->umem); - mtr->umem = NULL; - } + ib_umem_release(mtr->umem); + mtr->umem = NULL;
/* release kernel buffers */ - if (mtr->kmem) { - hns_roce_buf_free(hr_dev, mtr->kmem); - mtr->kmem = NULL; - } + hns_roce_buf_free(hr_dev, mtr->kmem); + mtr->kmem = NULL; }
static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 45eb75bb4..88d71fc1d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -641,7 +641,9 @@ static int set_user_sq_size(struct hns_roce_dev *hr_dev, return 0; }
-static bool check_dca_is_enable(struct hns_roce_dev *hr_dev, bool is_user, +static bool check_dca_is_enable(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp, + struct ib_qp_init_attr *init_attr, bool is_user, unsigned long addr) { if (!(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_DCA_MODE)) @@ -651,6 +653,12 @@ static bool check_dca_is_enable(struct hns_roce_dev *hr_dev, bool is_user, if (is_user) return !addr;
+ /* Only RC and XRC support DCA for kernel QP */ + if (hr_dev->dca_ctx.max_size > 0 && + (init_attr->qp_type == IB_QPT_RC || + init_attr->qp_type == IB_QPT_XRC_INI)) + return !!(init_attr->create_flags & HNS_ROCE_QP_CREATE_DCA_EN); + return false; }
@@ -771,8 +779,14 @@ static int alloc_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, int ret;
if (dca_en) { - /* DCA must be enabled after the buffer size is configured. */ - hns_roce_enable_dca(hr_dev, hr_qp); + /* DCA must be enabled after the buffer attr is configured. */ + ret = hns_roce_enable_dca(hr_dev, hr_qp, udata); + if (ret) { + ibdev_err(ibdev, "failed to enable DCA, ret = %d.\n", + ret); + return ret; + } + hr_qp->en_flags |= HNS_ROCE_QP_CAP_DCA; } else { /* @@ -820,7 +834,8 @@ static int alloc_qp_wqe(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, if (uctx && (uctx->config & HNS_ROCE_UCTX_DYN_QP_PGSZ)) page_shift = ucmd->pageshift;
- dca_en = check_dca_is_enable(hr_dev, !!udata, ucmd->buf_addr); + dca_en = check_dca_is_enable(hr_dev, hr_qp, init_attr, !!udata, + ucmd->buf_addr); ret = set_wqe_buf_attr(hr_dev, hr_qp, dca_en, page_shift, &buf_attr); if (ret) { ibdev_err(ibdev, "failed to split WQE buf, ret = %d.\n", ret); @@ -1198,9 +1213,6 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, hr_qp->state = IB_QPS_RESET; hr_qp->flush_flag = 0;
- if (init_attr->create_flags) - return -EOPNOTSUPP; - ret = set_qp_param(hr_dev, hr_qp, init_attr, udata, &ucmd); if (ret) { ibdev_err(ibdev, "failed to set QP param, ret = %d.\n", ret); @@ -1564,9 +1576,18 @@ void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq, } }
+static inline void *dca_buf_offset(struct hns_roce_dca_cfg *dca_cfg, u32 offset) +{ + return (char *)(dca_cfg->buf_list[offset >> HNS_HW_PAGE_SHIFT]) + + (offset & ((1 << HNS_HW_PAGE_SHIFT) - 1)); +} + static inline void *get_wqe(struct hns_roce_qp *hr_qp, u32 offset) { - return hns_roce_buf_offset(hr_qp->mtr.kmem, offset); + if (hr_qp->en_flags & HNS_ROCE_QP_CAP_DCA) + return dca_buf_offset(&hr_qp->dca_cfg, offset); + else + return hns_roce_buf_offset(hr_qp->mtr.kmem, offset); }
void *hns_roce_get_recv_wqe(struct hns_roce_qp *hr_qp, unsigned int n) diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h index 99e7c3a82..31c9c3b43 100644 --- a/include/uapi/rdma/hns-abi.h +++ b/include/uapi/rdma/hns-abi.h @@ -179,6 +179,7 @@ enum hns_ib_dca_mem_methods {
enum hns_ib_dca_mem_reg_attrs { HNS_IB_ATTR_DCA_MEM_REG_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + HNS_IB_ATTR_DCA_MEM_REG_FLAGS, HNS_IB_ATTR_DCA_MEM_REG_LEN, HNS_IB_ATTR_DCA_MEM_REG_ADDR, HNS_IB_ATTR_DCA_MEM_REG_KEY, @@ -195,8 +196,6 @@ enum hns_ib_dca_mem_shrink_attrs { HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_MEMS, };
-#define HNS_IB_ATTACH_FLAGS_NEW_BUFFER 1U - enum hns_ib_dca_mem_attach_attrs { HNS_IB_ATTR_DCA_MEM_ATTACH_HANDLE = (1U << UVERBS_ID_NS_SHIFT), HNS_IB_ATTR_DCA_MEM_ATTACH_SQ_OFFSET,
driver inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9CK0O
--------------------------------------------------------------------------
This patch synchonize DCA code from CI and is based on RFC v2 from the community including DCA kernel suuport and debugfs support.
Add a group of debugfs files for DCA memory pool statistics.
The debugfs entries for DCA memory statistics include: hns_roce/<ibdev_name>/dca/qp : show all DCA QPs for each device. hns_roce/<ibdev_name>/dca/pool : show all DCA mem for each device. hns_roce/<ibdev_name>/<pid>/qp : show all active DCA QPs for one process. hns_roce/<ibdev_name>/<pid>/mstats : show DCA mem info for one process.
Signed-off-by: Chengchang Tang tangchengchang@huawei.com Reviewed-by: Yangyang Li liyangyang20@huawei.com Reviewed-by: YueHaibing yuehaibing@huawei.com Signed-off-by: Juan Zhou zhoujuan51@h-partners.com --- drivers/infiniband/hw/hns/hns_roce_dca.c | 26 ++ drivers/infiniband/hw/hns/hns_roce_dca.h | 2 + drivers/infiniband/hw/hns/hns_roce_debugfs.c | 405 +++++++++++++++++++ drivers/infiniband/hw/hns/hns_roce_debugfs.h | 19 + drivers/infiniband/hw/hns/hns_roce_device.h | 5 +- drivers/infiniband/hw/hns/hns_roce_main.c | 18 +- 6 files changed, 473 insertions(+), 2 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_dca.c b/drivers/infiniband/hw/hns/hns_roce_dca.c index 5ec307faf..273913d95 100644 --- a/drivers/infiniband/hw/hns/hns_roce_dca.c +++ b/drivers/infiniband/hw/hns/hns_roce_dca.c @@ -1620,6 +1620,32 @@ const struct uapi_definition hns_roce_dca_uapi_defs[] = { {} };
+/* enum DCA pool */ +struct dca_mem_enum_attr { + void *param; + hns_dca_enum_callback enum_fn; +}; + +static int enum_dca_pool_proc(struct dca_mem *mem, int index, void *param) +{ + struct dca_mem_enum_attr *attr = param; + int ret; + + ret = attr->enum_fn(mem->states, mem->page_count, attr->param); + + return ret ? DCA_MEM_STOP_ITERATE : DCA_MEM_NEXT_ITERATE; +} + +void hns_roce_enum_dca_pool(struct hns_roce_dca_ctx *dca_ctx, void *param, + hns_dca_enum_callback cb) +{ + struct dca_mem_enum_attr attr; + + attr.enum_fn = cb; + attr.param = param; + travel_dca_pages(dca_ctx, &attr, enum_dca_pool_proc); +} + module_param(dca_unit_size, uint, 0444); module_param(dca_max_size, ulong, 0444); module_param(dca_min_size, ulong, 0444); diff --git a/drivers/infiniband/hw/hns/hns_roce_dca.h b/drivers/infiniband/hw/hns/hns_roce_dca.h index f37810277..11bade706 100644 --- a/drivers/infiniband/hw/hns/hns_roce_dca.h +++ b/drivers/infiniband/hw/hns/hns_roce_dca.h @@ -73,4 +73,6 @@ void hns_roce_dca_detach(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, void hns_roce_dca_kick(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct ib_udata *udata);
+void hns_roce_enum_dca_pool(struct hns_roce_dca_ctx *dca_ctx, void *param, + hns_dca_enum_callback cb); #endif diff --git a/drivers/infiniband/hw/hns/hns_roce_debugfs.c b/drivers/infiniband/hw/hns/hns_roce_debugfs.c index e8febb40f..a649d5081 100644 --- a/drivers/infiniband/hw/hns/hns_roce_debugfs.c +++ b/drivers/infiniband/hw/hns/hns_roce_debugfs.c @@ -7,9 +7,13 @@ #include <linux/device.h>
#include "hns_roce_device.h" +#include "hns_roce_common.h" +#include "hns_roce_dca.h"
static struct dentry *hns_roce_dbgfs_root;
+#define KB 1024 + static int hns_debugfs_seqfile_open(struct inode *inode, struct file *f) { struct hns_debugfs_seqfile *seqfile = inode->i_private; @@ -81,6 +85,404 @@ static void create_sw_stat_debugfs(struct hns_roce_dev *hr_dev, sw_stat_debugfs_show, hr_dev); }
+struct dca_mem_stats { + unsigned int total_mems; + unsigned int clean_mems; + size_t free_size; + size_t total_size; + size_t active_size; + size_t locked_size; +}; + +#define DCA_CTX_PID_LEN 10 + +#define DCA_CTX_STATE_LEN 22 + +#define LOADING_PERCENT_SCALE 100 + +#define LOADING_PERCENT_SHIFT 2 + +static int stats_dca_pool_proc(struct hns_dca_page_state *states, u32 count, + void *param) +{ + struct dca_mem_stats *stats = param; + struct hns_dca_page_state *s; + int i, free_pages; + + free_pages = 0; + for (i = 0; i < count; i++) { + s = &states[i]; + if (s->buf_id == HNS_DCA_INVALID_BUF_ID) { + free_pages++; + stats->free_size += HNS_HW_PAGE_SIZE; + } else { + if (s->lock) + stats->locked_size += HNS_HW_PAGE_SIZE; + + if (s->active) + stats->active_size += HNS_HW_PAGE_SIZE; + } + } + + stats->total_size += (count * HNS_HW_PAGE_SIZE); + stats->total_mems++; + if (free_pages == count) + stats->clean_mems++; + + return 0; +} + +/* stats QPs in DCA pool */ +struct dca_stats_qp_attr { + unsigned long *qpn_bitmap; + unsigned int qpn_max; +}; + +static int stats_dca_qp_proc(struct hns_dca_page_state *states, u32 count, + void *param) +{ + struct dca_stats_qp_attr *attr = param; + struct hns_dca_page_state *s; + u32 qpn; + int i; + + for (i = 0; i < count; i++) { + s = &states[i]; + if (s->buf_id == HNS_DCA_INVALID_BUF_ID || s->lock || + !s->active) + continue; + + qpn = HNS_DCA_BUF_ID_TO_QPN(s->buf_id); + if (qpn < attr->qpn_max) + set_bit(qpn, attr->qpn_bitmap); + } + + return 0; +} + +static void dca_ctx_stats_qp(struct hns_roce_dca_ctx *ctx, + unsigned long *qpn_bitmap, unsigned int qpn_max) +{ + struct dca_stats_qp_attr attr; + + attr.qpn_bitmap = qpn_bitmap; + attr.qpn_max = qpn_max; + hns_roce_enum_dca_pool(ctx, &attr, stats_dca_qp_proc); +} + +static void dca_ctx_stats_mem(struct hns_roce_dca_ctx *ctx, + struct dca_mem_stats *stats) +{ + hns_roce_enum_dca_pool(ctx, stats, stats_dca_pool_proc); +} + +static void dca_setup_pool_name(pid_t pid, bool is_kdca, char *name, int size) +{ + if (is_kdca) + snprintf(name, size, "kernel"); + else + snprintf(name, size, "%d", pid); +} + +static u64 calc_loading_percent(size_t total, size_t free, u32 *out_rem) +{ + u32 all_pages, used_pages, free_pages, scale; + u64 percent = 0; + u32 rem = 0; + + all_pages = total >> HNS_HW_PAGE_SHIFT; + free_pages = free >> HNS_HW_PAGE_SHIFT; + if (all_pages >= free_pages) { + used_pages = all_pages - free_pages; + scale = LOADING_PERCENT_SCALE * LOADING_PERCENT_SCALE; + percent = (used_pages * scale) / all_pages; + percent = div_u64_rem(percent, LOADING_PERCENT_SCALE, &rem); + } + + if (out_rem) + *out_rem = rem; + + return percent; +} + +static void dca_print_pool_stats(struct hns_roce_dca_ctx *ctx, pid_t pid, + bool is_kdca, struct seq_file *file) +{ + char name[DCA_CTX_PID_LEN]; + u64 percent; + u32 rem = 0; + + percent = calc_loading_percent(ctx->total_size, ctx->free_size, &rem); + dca_setup_pool_name(pid, is_kdca, name, sizeof(name)); + seq_printf(file, "%-10s %-16ld %-16ld %-16u %llu.%0*u\n", name, + ctx->total_size / KB, ctx->free_size / KB, ctx->free_mems, + percent, LOADING_PERCENT_SHIFT, rem); +} + +static void dca_stats_dev_pool_in_seqfile(struct hns_roce_dev *hr_dev, + struct seq_file *file) +{ + struct hns_roce_ucontext *uctx, *tmp; + + seq_printf(file, "%-10s %-16s %-16s %-16s %-s\n", "PID", "Total(kB)", + "Free(kB)", "Clean(BLK)", "Loading"); + + /* Write kernel DCA pool stats */ + dca_print_pool_stats(&hr_dev->dca_ctx, 0, true, file); + /* Write user DCA pool stats */ + mutex_lock(&hr_dev->uctx_list_mutex); + list_for_each_entry_safe(uctx, tmp, &hr_dev->uctx_list, list) { + dca_print_pool_stats(&uctx->dca_ctx, uctx->pid, false, file); + } + mutex_unlock(&hr_dev->uctx_list_mutex); +} + +struct dca_qp_stats { + char name[DCA_CTX_PID_LEN]; + char state[DCA_CTX_STATE_LEN]; + u32 qpn; + u32 total_size; + u32 sq_size; + u32 rq_size; + u32 sge_size; +}; + +static void dca_setup_qp_state(struct hns_roce_qp *hr_qp, char *buf, int size) +{ + struct hns_roce_dca_cfg *cfg = &hr_qp->dca_cfg; + + if (cfg->buf_id == HNS_DCA_INVALID_BUF_ID) + snprintf(buf, size, "detached"); + else if (hr_qp->rq.wqe_cnt > 0) + snprintf(buf, size, "stable"); + else + snprintf(buf, size, "attached-%-u", cfg->attach_count); +} + +static void dca_setup_qp_stats(struct hns_roce_qp *hr_qp, + struct dca_qp_stats *stats) +{ + struct hns_roce_ucontext *uctx = NULL; + + if (!(hr_qp->en_flags & HNS_ROCE_QP_CAP_DCA) || !hr_qp->ibqp.pd) + return; + + if (hr_qp->ibqp.pd->uobject) + uctx = to_hr_ucontext(hr_qp->ibqp.pd->uobject->context); + + dca_setup_pool_name(uctx ? uctx->pid : 0, !uctx, stats->name, + sizeof(stats->name)); + stats->qpn = (u32)hr_qp->qpn; + stats->total_size = hr_qp->buff_size; + + stats->sq_size = to_hr_hem_entries_size(hr_qp->sq.wqe_cnt, + hr_qp->sq.wqe_shift); + stats->sge_size = to_hr_hem_entries_size(hr_qp->sge.sge_cnt, + hr_qp->sge.sge_shift); + stats->rq_size = to_hr_hem_entries_size(hr_qp->rq.wqe_cnt, + hr_qp->rq.wqe_shift); + + dca_setup_qp_state(hr_qp, stats->state, sizeof(stats->state)); +} + +static void dca_stats_dev_qp_in_seqfile(struct hns_roce_dev *hr_dev, + struct seq_file *file) +{ + struct dca_qp_stats stats; + struct hns_roce_qp *hr_qp; + unsigned long id; + + seq_printf(file, "%-10s %-10s %-10s %s\n", "QPN", "Size(kB)", "PID", + "State"); + + xa_lock_irq(&hr_dev->qp_table_xa); + xa_for_each(&hr_dev->qp_table_xa, id, hr_qp) { + stats.total_size = 0; + dca_setup_qp_stats(hr_qp, &stats); + if (!stats.total_size) + continue; + + xa_unlock_irq(&hr_dev->qp_table_xa); + seq_printf(file, "%-10u %-10u %-10s %-s\n", stats.qpn, + stats.total_size / KB, stats.name, stats.state); + xa_lock_irq(&hr_dev->qp_table_xa); + } + xa_unlock_irq(&hr_dev->qp_table_xa); +} + +static void dca_stats_ctx_qp_in_seqfile(struct hns_roce_dev *hr_dev, + struct hns_roce_dca_ctx *ctx, + struct seq_file *file) +{ + struct dca_qp_stats stats; + struct hns_roce_qp *hr_qp; + unsigned int qpn, nbits; + unsigned long *bitmap; + + nbits = hr_dev->caps.num_qps; + if (nbits < 1) + return; + + bitmap = bitmap_zalloc(nbits, GFP_ATOMIC); + if (!bitmap) + return; + + seq_printf(file, "%-10s %-10s %-10s %-10s %-10s\n", "QPN", "Total(kB)", + "SQ(kB)", "SGE(kB)", "RQ(kB)"); + + dca_ctx_stats_qp(ctx, bitmap, nbits); + for_each_set_bit(qpn, bitmap, nbits) { + stats.total_size = 0; + xa_lock_irq(&hr_dev->qp_table_xa); + hr_qp = __hns_roce_qp_lookup(hr_dev, qpn); + if (hr_qp) + dca_setup_qp_stats(hr_qp, &stats); + xa_unlock_irq(&hr_dev->qp_table_xa); + if (!stats.total_size) + continue; + + seq_printf(file, "%-10u %-10u %-10u %-10u %-10u\n", + stats.qpn, stats.total_size / KB, stats.sq_size / KB, + stats.sge_size / KB, stats.rq_size / KB); + } + bitmap_free(bitmap); +} + +static void dca_stats_ctx_mem_in_seqfile(struct hns_roce_dca_ctx *ctx, + bool is_kdca, struct seq_file *file) +{ + struct dca_mem_stats stats = {}; + u64 percent; + u32 rem = 0; + +#define DCA_STAT_NAME_FMT "%-22s " +#define dca_ctx_print_mem_size(f, n, fmt, v) \ + seq_printf(f, DCA_STAT_NAME_FMT fmt "\n", n, v) + +#define dca_ctx_print_mem_kb(f, n, v) \ + dca_ctx_print_mem_size(f, n, "%-u kB", (u32)((v) / KB)) + + dca_ctx_stats_mem(ctx, &stats); + percent = calc_loading_percent(stats.total_size, stats.free_size, &rem); + seq_printf(file, DCA_STAT_NAME_FMT "%llu.%0*u\n", "Loading:", percent, + LOADING_PERCENT_SHIFT, rem); + dca_ctx_print_mem_kb(file, "Total:", stats.total_size); + dca_ctx_print_mem_kb(file, "Free:", stats.free_size); + dca_ctx_print_mem_kb(file, "Active:", stats.active_size); + dca_ctx_print_mem_kb(file, "Locked:", stats.locked_size); + dca_ctx_print_mem_size(file, "Dirty:", "%-u Blocks", + stats.total_mems - stats.clean_mems); + dca_ctx_print_mem_size(file, "Clean:", "%-u Blocks", stats.clean_mems); + if (is_kdca) { + dca_ctx_print_mem_size(file, "Unit:", "%-u", ctx->unit_size); + dca_ctx_print_mem_size(file, "Max:", "%-zu", ctx->max_size); + dca_ctx_print_mem_size(file, "Min:", "%-zu", ctx->min_size); + } +} + +static int dca_debugfs_pool_show(struct seq_file *file, void *offset) +{ + struct hns_roce_dev *hr_dev = file->private; + + dca_stats_dev_pool_in_seqfile(hr_dev, file); + return 0; +} + +static int dca_debugfs_qp_show(struct seq_file *file, void *offset) +{ + struct hns_roce_dev *hr_dev = file->private; + + dca_stats_dev_qp_in_seqfile(hr_dev, file); + return 0; +} + +static int dca_debugfs_kctx_qp_stats_show(struct seq_file *file, void *offset) +{ + struct hns_roce_dev *hr_dev = file->private; + + dca_stats_ctx_qp_in_seqfile(hr_dev, &hr_dev->dca_ctx, file); + return 0; +} + +static int dca_debugfs_uctx_qp_stats_show(struct seq_file *file, void *offset) +{ + struct hns_roce_ucontext *uctx = file->private; + + dca_stats_ctx_qp_in_seqfile(to_hr_dev(uctx->ibucontext.device), + &uctx->dca_ctx, file); + return 0; +} + +static int dca_debugfs_kctx_mem_stats_show(struct seq_file *file, void *offset) +{ + struct hns_roce_dev *hr_dev = file->private; + + dca_stats_ctx_mem_in_seqfile(&hr_dev->dca_ctx, true, file); + return 0; +} + +static int dca_debugfs_uctx_mem_stats_show(struct seq_file *file, void *offset) +{ + struct hns_roce_ucontext *uctx = file->private; + + dca_stats_ctx_mem_in_seqfile(&uctx->dca_ctx, false, file); + return 0; +} + +static void init_dca_ctx_debugfs(struct hns_dca_ctx_debugfs *dbgfs, + struct dentry *parent, + struct hns_roce_dev *hr_dev, + struct hns_roce_ucontext *uctx) +{ + char name[DCA_CTX_PID_LEN]; + + dca_setup_pool_name(uctx ? uctx->pid : 0, !uctx, name, sizeof(name)); + dbgfs->root = debugfs_create_dir(name, parent); + + if (uctx) { + init_debugfs_seqfile(&dbgfs->mem, "mstats", dbgfs->root, + dca_debugfs_uctx_mem_stats_show, uctx); + init_debugfs_seqfile(&dbgfs->qp, "qp", dbgfs->root, + dca_debugfs_uctx_qp_stats_show, uctx); + } else { + init_debugfs_seqfile(&dbgfs->mem, "mstats", dbgfs->root, + dca_debugfs_kctx_mem_stats_show, hr_dev); + init_debugfs_seqfile(&dbgfs->qp, "qp", dbgfs->root, + dca_debugfs_kctx_qp_stats_show, hr_dev); + } +} + +static void create_dca_debugfs(struct hns_roce_dev *hr_dev, + struct dentry *parent) +{ + struct hns_dca_debugfs *dbgfs = &hr_dev->dbgfs.dca_root; + + dbgfs->root = debugfs_create_dir("dca", parent); + + init_debugfs_seqfile(&dbgfs->pool, "pool", dbgfs->root, + dca_debugfs_pool_show, hr_dev); + init_debugfs_seqfile(&dbgfs->qp, "qp", dbgfs->root, + dca_debugfs_qp_show, hr_dev); + + init_dca_ctx_debugfs(&dbgfs->kctx, dbgfs->root, hr_dev, NULL); +} + +/* debugfs for ucontext */ +void hns_roce_register_uctx_debugfs(struct hns_roce_dev *hr_dev, + struct hns_roce_ucontext *uctx) +{ + struct hns_dca_debugfs *dca_dbgfs = &hr_dev->dbgfs.dca_root; + + if (uctx->config & HNS_ROCE_UCTX_CONFIG_DCA) + init_dca_ctx_debugfs(&uctx->dca_dbgfs, dca_dbgfs->root, + hr_dev, uctx); +} + +void hns_roce_unregister_uctx_debugfs(struct hns_roce_ucontext *uctx) +{ + debugfs_remove_recursive(uctx->dca_dbgfs.root); +} + /* debugfs for device */ void hns_roce_register_debugfs(struct hns_roce_dev *hr_dev) { @@ -89,6 +491,9 @@ void hns_roce_register_debugfs(struct hns_roce_dev *hr_dev) dbgfs->root = debugfs_create_dir(dev_name(&hr_dev->ib_dev.dev), hns_roce_dbgfs_root);
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_DCA_MODE) + create_dca_debugfs(hr_dev, dbgfs->root); + create_sw_stat_debugfs(hr_dev, dbgfs->root); }
diff --git a/drivers/infiniband/hw/hns/hns_roce_debugfs.h b/drivers/infiniband/hw/hns/hns_roce_debugfs.h index 98e87bd31..7fff3aa98 100644 --- a/drivers/infiniband/hw/hns/hns_roce_debugfs.h +++ b/drivers/infiniband/hw/hns/hns_roce_debugfs.h @@ -17,17 +17,36 @@ struct hns_sw_stat_debugfs { struct hns_debugfs_seqfile sw_stat; };
+/* DCA debugfs */ +struct hns_dca_ctx_debugfs { + struct dentry *root; /* pool debugfs entry */ + struct hns_debugfs_seqfile mem; /* mems in pool */ + struct hns_debugfs_seqfile qp; /* QPs stats in pool */ +}; + +struct hns_dca_debugfs { + struct dentry *root; /* dev debugfs entry */ + struct hns_debugfs_seqfile pool; /* pools stats on device */ + struct hns_debugfs_seqfile qp; /* QPs stats on device */ + struct hns_dca_ctx_debugfs kctx; /* kDCA context */ +}; + /* Debugfs for device */ struct hns_roce_dev_debugfs { struct dentry *root; struct hns_sw_stat_debugfs sw_stat_root; + struct hns_dca_debugfs dca_root; };
struct hns_roce_dev; +struct hns_roce_ucontext;
void hns_roce_init_debugfs(void); void hns_roce_cleanup_debugfs(void); void hns_roce_register_debugfs(struct hns_roce_dev *hr_dev); void hns_roce_unregister_debugfs(struct hns_roce_dev *hr_dev); +void hns_roce_register_uctx_debugfs(struct hns_roce_dev *hr_dev, + struct hns_roce_ucontext *uctx); +void hns_roce_unregister_uctx_debugfs(struct hns_roce_ucontext *uctx);
#endif diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 584698eb0..c9bbbe4b6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -237,6 +237,9 @@ struct hns_roce_ucontext { struct hns_user_mmap_entry *reset_mmap_entry; u32 config; struct hns_roce_dca_ctx dca_ctx; + struct list_head list; /* link all uctx to uctx_list on hr_dev */ + pid_t pid; /* process id to which the uctx belongs */ + struct hns_dca_ctx_debugfs dca_dbgfs; };
struct hns_roce_pd { @@ -1037,7 +1040,7 @@ struct hns_roce_dev { struct device *dev;
struct list_head uctx_list; /* list of all uctx on this dev */ - spinlock_t uctx_list_lock; /* protect @uctx_list */ + struct mutex uctx_list_mutex; /* protect @uctx_list */
struct hns_roce_uar priv_uar; const char *irq_names[HNS_ROCE_MAX_IRQ_NUM]; diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 05f91647c..b6d10435c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -521,6 +521,9 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, if (!hr_dev->active) goto error_out;
+ context->pid = current->pid; + INIT_LIST_HEAD(&context->list); + ret = ib_copy_from_udata(&ucmd, udata, min(udata->inlen, sizeof(ucmd))); if (ret) @@ -554,6 +557,12 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, if (ret) goto error_fail_copy_to_udata;
+ mutex_lock(&hr_dev->uctx_list_mutex); + list_add(&context->list, &hr_dev->uctx_list); + mutex_unlock(&hr_dev->uctx_list_mutex); + + hns_roce_register_uctx_debugfs(hr_dev, context); + return 0;
error_fail_copy_to_udata: @@ -577,6 +586,12 @@ static void hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext) struct hns_roce_ucontext *context = to_hr_ucontext(ibcontext); struct hns_roce_dev *hr_dev = to_hr_dev(ibcontext->device);
+ mutex_lock(&hr_dev->uctx_list_mutex); + list_del(&context->list); + mutex_unlock(&hr_dev->uctx_list_mutex); + + hns_roce_unregister_uctx_debugfs(context); + hns_roce_unregister_udca(hr_dev, context);
hns_roce_dealloc_uar_entry(context); @@ -1149,6 +1164,7 @@ static void hns_roce_teardown_hca(struct hns_roce_dev *hr_dev) hns_roce_cleanup_dca(hr_dev);
hns_roce_cleanup_bitmap(hr_dev); + mutex_destroy(&hr_dev->uctx_list_mutex); }
/** @@ -1169,7 +1185,7 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev) spin_lock_init(&hr_dev->dip_list_lock);
INIT_LIST_HEAD(&hr_dev->uctx_list); - spin_lock_init(&hr_dev->uctx_list_lock); + mutex_init(&hr_dev->uctx_list_mutex);
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB || hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) {
driver inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9CK0O
--------------------------------------------------------------------------
Use the shared memory to store the DCA status by getting the max qp num from uctx alloc param.
Signed-off-by: Chengchang Tang tangchengchang@huawei.com Signed-off-by: Juan Zhou zhoujuan51@h-partners.com --- drivers/infiniband/hw/hns/hns_roce_dca.c | 336 +++++++++++++++---- drivers/infiniband/hw/hns/hns_roce_dca.h | 8 +- drivers/infiniband/hw/hns/hns_roce_debugfs.c | 3 +- drivers/infiniband/hw/hns/hns_roce_device.h | 24 +- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 30 +- drivers/infiniband/hw/hns/hns_roce_main.c | 66 +++- drivers/infiniband/hw/hns/hns_roce_qp.c | 59 ++-- include/uapi/rdma/hns-abi.h | 15 +- 8 files changed, 433 insertions(+), 108 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_dca.c b/drivers/infiniband/hw/hns/hns_roce_dca.c index 273913d95..4d3e52dd5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_dca.c +++ b/drivers/infiniband/hw/hns/hns_roce_dca.c @@ -143,7 +143,7 @@ static void *alloc_dca_pages(struct hns_roce_dev *hr_dev, bool is_user, }
mem->page_count = kmem->npages; - /* Override the attr->size by actually alloced size */ + /* Overwrite the attr->size by actually alloced size */ attr->size = kmem->ntrunks << kmem->trunk_shift; return kmem;
@@ -730,6 +730,72 @@ active_fail: return ret; }
+#define DCAN_TO_SYNC_BIT(n) ((n) * HNS_DCA_BITS_PER_STATUS) +#define DCAN_TO_STAT_BIT(n) DCAN_TO_SYNC_BIT(n) +static bool start_free_dca_buf(struct hns_roce_dca_ctx *ctx, u32 dcan) +{ + unsigned long *st = ctx->sync_status; + + if (st && dcan < ctx->max_qps) + return !test_and_set_bit_lock(DCAN_TO_SYNC_BIT(dcan), st); + + return true; +} + +static void stop_free_dca_buf(struct hns_roce_dca_ctx *ctx, u32 dcan) +{ + unsigned long *st = ctx->sync_status; + + if (st && dcan < ctx->max_qps) + clear_bit_unlock(DCAN_TO_SYNC_BIT(dcan), st); +} + +static void update_dca_buf_status(struct hns_roce_dca_ctx *ctx, u32 dcan, + bool en) +{ + unsigned long *st = ctx->buf_status; + + if (st && dcan < ctx->max_qps) { + if (en) + set_bit(DCAN_TO_STAT_BIT(dcan), st); + else + clear_bit(DCAN_TO_STAT_BIT(dcan), st); + + /* sync status with user-space rdma */ + smp_mb__after_atomic(); + } +} + +static void restart_aging_dca_mem(struct hns_roce_dev *hr_dev, + struct hns_roce_dca_ctx *ctx) +{ + spin_lock(&ctx->aging_lock); + ctx->exit_aging = false; + if (!list_empty(&ctx->aging_new_list)) + queue_delayed_work(hr_dev->irq_workq, &ctx->aging_dwork, + msecs_to_jiffies(DCA_MEM_AGEING_MSES)); + + spin_unlock(&ctx->aging_lock); +} + +static void stop_aging_dca_mem(struct hns_roce_dca_ctx *ctx, + struct hns_roce_dca_cfg *cfg, bool stop_worker) +{ + spin_lock(&ctx->aging_lock); + if (stop_worker) { + ctx->exit_aging = true; + cancel_delayed_work(&ctx->aging_dwork); + } + + spin_lock(&cfg->lock); + + if (!list_empty(&cfg->aging_node)) + list_del_init(&cfg->aging_node); + + spin_unlock(&cfg->lock); + spin_unlock(&ctx->aging_lock); +} + static int attach_dca_mem(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct hns_dca_attach_attr *attr, @@ -740,8 +806,8 @@ static int attach_dca_mem(struct hns_roce_dev *hr_dev, u32 buf_id; int ret;
- /* Stop DCA mem ageing worker */ - cancel_delayed_work(&cfg->dwork); + if (hr_qp->en_flags & HNS_ROCE_QP_CAP_DYNAMIC_CTX_DETACH) + stop_aging_dca_mem(ctx, cfg, false); resp->alloc_flags = 0;
spin_lock(&cfg->lock); @@ -778,6 +844,7 @@ static int attach_dca_mem(struct hns_roce_dev *hr_dev,
resp->alloc_flags |= HNS_DCA_ATTACH_FLAGS_NEW_BUFFER; resp->alloc_pages = cfg->npages; + update_dca_buf_status(ctx, cfg->dcan, true);
return 0; } @@ -830,6 +897,7 @@ static void free_buf_from_dca_mem(struct hns_roce_dca_ctx *ctx, unsigned long flags; u32 buf_id;
+ update_dca_buf_status(ctx, cfg->dcan, false); spin_lock(&cfg->lock); buf_id = cfg->buf_id; cfg->buf_id = HNS_DCA_INVALID_BUF_ID; @@ -848,19 +916,22 @@ static void free_buf_from_dca_mem(struct hns_roce_dca_ctx *ctx, spin_unlock_irqrestore(&ctx->pool_lock, flags); }
-static void detach_dca_mem(struct hns_roce_dev *hr_dev, - struct hns_roce_qp *hr_qp, - struct hns_dca_detach_attr *attr) +void hns_roce_dca_detach(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, + struct hns_dca_detach_attr *attr) { + struct hns_roce_dca_ctx *ctx = hr_qp_to_dca_ctx(hr_dev, hr_qp); struct hns_roce_dca_cfg *cfg = &hr_qp->dca_cfg;
- /* Start an ageing worker to free buffer */ - cancel_delayed_work(&cfg->dwork); + stop_aging_dca_mem(ctx, cfg, true); + + spin_lock(&ctx->aging_lock); spin_lock(&cfg->lock); cfg->sq_idx = attr->sq_idx; - queue_delayed_work(hr_dev->irq_workq, &cfg->dwork, - msecs_to_jiffies(DCA_MEM_AGEING_MSES)); + list_add_tail(&cfg->aging_node, &ctx->aging_new_list); spin_unlock(&cfg->lock); + spin_unlock(&ctx->aging_lock); + + restart_aging_dca_mem(hr_dev, ctx); }
struct dca_mem_shrink_attr { @@ -923,11 +994,87 @@ static void shrink_dca_mem(struct hns_roce_dev *hr_dev, resp->free_key = attr.shrink_key; }
-static void init_dca_context(struct hns_roce_dca_ctx *ctx) +static void process_aging_dca_mem(struct hns_roce_dev *hr_dev, + struct hns_roce_dca_ctx *ctx) +{ + struct hns_roce_dca_cfg *cfg, *tmp_cfg; + struct hns_roce_qp *hr_qp; + + spin_lock(&ctx->aging_lock); + list_for_each_entry_safe(cfg, tmp_cfg, &ctx->aging_new_list, aging_node) + list_move(&cfg->aging_node, &ctx->aging_proc_list); + + while (!ctx->exit_aging && !list_empty(&ctx->aging_proc_list)) { + cfg = list_first_entry(&ctx->aging_proc_list, + struct hns_roce_dca_cfg, aging_node); + list_del_init_careful(&cfg->aging_node); + hr_qp = container_of(cfg, struct hns_roce_qp, dca_cfg); + spin_unlock(&ctx->aging_lock); + + if (start_free_dca_buf(ctx, cfg->dcan)) { + if (hr_dev->hw->chk_dca_buf_inactive(hr_dev, hr_qp)) + free_buf_from_dca_mem(ctx, cfg); + + stop_free_dca_buf(ctx, cfg->dcan); + } + + spin_lock(&ctx->aging_lock); + + spin_lock(&cfg->lock); + + if (cfg->buf_id != HNS_DCA_INVALID_BUF_ID) + list_move(&cfg->aging_node, &ctx->aging_new_list); + + spin_unlock(&cfg->lock); + } + spin_unlock(&ctx->aging_lock); +} + +static void udca_mem_aging_work(struct work_struct *work) +{ + struct hns_roce_dca_ctx *ctx = container_of(work, + struct hns_roce_dca_ctx, aging_dwork.work); + struct hns_roce_ucontext *uctx = container_of(ctx, + struct hns_roce_ucontext, dca_ctx); + struct hns_roce_dev *hr_dev = to_hr_dev(uctx->ibucontext.device); + + cancel_delayed_work(&ctx->aging_dwork); + process_aging_dca_mem(hr_dev, ctx); + if (!ctx->exit_aging) + restart_aging_dca_mem(hr_dev, ctx); +} + +static void remove_unused_dca_mem(struct hns_roce_dev *hr_dev); + +static void kdca_mem_aging_work(struct work_struct *work) +{ + struct hns_roce_dca_ctx *ctx = container_of(work, + struct hns_roce_dca_ctx, aging_dwork.work); + struct hns_roce_dev *hr_dev = container_of(ctx, struct hns_roce_dev, + dca_ctx); + + cancel_delayed_work(&ctx->aging_dwork); + process_aging_dca_mem(hr_dev, ctx); + remove_unused_dca_mem(hr_dev); + if (!ctx->exit_aging) + restart_aging_dca_mem(hr_dev, ctx); +} + +static void init_dca_context(struct hns_roce_dca_ctx *ctx, bool is_user) { INIT_LIST_HEAD(&ctx->pool); spin_lock_init(&ctx->pool_lock); ctx->total_size = 0; + + ida_init(&ctx->ida); + INIT_LIST_HEAD(&ctx->aging_new_list); + INIT_LIST_HEAD(&ctx->aging_proc_list); + spin_lock_init(&ctx->aging_lock); + ctx->exit_aging = false; + if (is_user) + INIT_DELAYED_WORK(&ctx->aging_dwork, udca_mem_aging_work); + else + INIT_DELAYED_WORK(&ctx->aging_dwork, kdca_mem_aging_work); }
static void cleanup_dca_context(struct hns_roce_dev *hr_dev, @@ -937,6 +1084,8 @@ static void cleanup_dca_context(struct hns_roce_dev *hr_dev, unsigned long flags; bool is_user;
+ cancel_delayed_work_sync(&ctx->aging_dwork); + is_user = (ctx != &hr_dev->dca_ctx); spin_lock_irqsave(&ctx->pool_lock, flags); list_for_each_entry_safe(mem, tmp, &ctx->pool, list) { @@ -962,7 +1111,7 @@ static uint dca_unit_size; static ulong dca_min_size = DCA_MAX_MEM_SIZE; static ulong dca_max_size = DCA_MAX_MEM_SIZE;
-static void config_kdca_context(struct hns_roce_dca_ctx *ctx) +static void load_kdca_param(struct hns_roce_dca_ctx *ctx) { unsigned int unit_size;
@@ -984,9 +1133,8 @@ static void config_kdca_context(struct hns_roce_dca_ctx *ctx)
void hns_roce_init_dca(struct hns_roce_dev *hr_dev) { - init_dca_context(&hr_dev->dca_ctx); - - config_kdca_context(&hr_dev->dca_ctx); + load_kdca_param(&hr_dev->dca_ctx); + init_dca_context(&hr_dev->dca_ctx, false); }
void hns_roce_cleanup_dca(struct hns_roce_dev *hr_dev) @@ -994,22 +1142,68 @@ void hns_roce_cleanup_dca(struct hns_roce_dev *hr_dev) cleanup_dca_context(hr_dev, &hr_dev->dca_ctx); }
-void hns_roce_register_udca(struct hns_roce_dev *hr_dev, +static void init_udca_status(struct hns_roce_ucontext *uctx, int udca_max_qps, + unsigned int dev_max_qps) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(uctx->ibucontext.device); + const unsigned int bits_per_qp = 2 * HNS_DCA_BITS_PER_STATUS; + struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(hr_dev, uctx); + struct ib_ucontext *ib_uctx = &uctx->ibucontext; + void *kaddr; + size_t size; + + size = BITS_TO_BYTES(udca_max_qps * bits_per_qp); + ctx->status_npage = DIV_ROUND_UP(size, PAGE_SIZE); + + size = ctx->status_npage * PAGE_SIZE; + ctx->max_qps = min_t(unsigned int, dev_max_qps, + size * BITS_PER_BYTE / bits_per_qp); + + kaddr = alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO); + if (!kaddr) + return; + + ctx->dca_mmap_entry = hns_roce_user_mmap_entry_insert(ib_uctx, + (u64)kaddr, size, HNS_ROCE_MMAP_TYPE_DCA); + if (!ctx->dca_mmap_entry) { + free_pages_exact(kaddr, size); + return; + } + + ctx->buf_status = (unsigned long *)kaddr; + ctx->sync_status = (unsigned long *)(kaddr + size / 2); +} + +void hns_roce_register_udca(struct hns_roce_dev *hr_dev, int max_qps, struct hns_roce_ucontext *uctx) { + struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(hr_dev, uctx); + if (!(uctx->config & HNS_ROCE_UCTX_CONFIG_DCA)) return;
- init_dca_context(&uctx->dca_ctx); + init_dca_context(ctx, true); + if (max_qps > 0) + init_udca_status(uctx, max_qps, hr_dev->caps.num_qps); }
void hns_roce_unregister_udca(struct hns_roce_dev *hr_dev, struct hns_roce_ucontext *uctx) { + struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(hr_dev, uctx); + if (!(uctx->config & HNS_ROCE_UCTX_CONFIG_DCA)) return;
- cleanup_dca_context(hr_dev, &uctx->dca_ctx); + cleanup_dca_context(hr_dev, ctx); + + if (ctx->buf_status) { + free_pages_exact(ctx->buf_status, + ctx->status_npage * PAGE_SIZE); + ctx->buf_status = NULL; + } + + ida_destroy(&ctx->ida); }
static struct dca_mem *key_to_dca_mem(struct list_head *head, u64 key) @@ -1226,6 +1420,7 @@ static void remove_unused_dca_mem(struct hns_roce_dev *hr_dev) spin_unlock_irqrestore(&ctx->pool_lock, flags); if (!mem) break; + unregister_dca_mem(hr_dev, NULL, mem); free_dca_mem(mem); /* No more free memory */ @@ -1234,52 +1429,56 @@ static void remove_unused_dca_mem(struct hns_roce_dev *hr_dev) } }
-static void kick_dca_mem(struct hns_roce_dev *hr_dev, +static void kick_dca_buf(struct hns_roce_dev *hr_dev, struct hns_roce_dca_cfg *cfg, - struct hns_roce_ucontext *uctx) + struct hns_roce_dca_ctx *ctx) { - struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(hr_dev, uctx); - - /* Stop ageing worker and free DCA buffer from pool */ - cancel_delayed_work_sync(&cfg->dwork); + stop_aging_dca_mem(ctx, cfg, true); free_buf_from_dca_mem(ctx, cfg); + restart_aging_dca_mem(hr_dev, ctx);
/* Shrink kenrel DCA mem */ - if (!uctx) + if (ctx == &hr_dev->dca_ctx) remove_unused_dca_mem(hr_dev); }
-static void dca_mem_ageing_work(struct work_struct *work) +static u32 alloc_dca_num(struct hns_roce_dca_ctx *ctx) { - struct hns_roce_qp *hr_qp = container_of(work, struct hns_roce_qp, - dca_cfg.dwork.work); - struct hns_roce_dev *hr_dev = to_hr_dev(hr_qp->ibqp.device); - struct hns_roce_dca_ctx *ctx = hr_qp_to_dca_ctx(hr_dev, hr_qp); - bool hw_is_inactive; + int ret;
- hw_is_inactive = hr_dev->hw->chk_dca_buf_inactive && - hr_dev->hw->chk_dca_buf_inactive(hr_dev, hr_qp); - if (hw_is_inactive) - free_buf_from_dca_mem(ctx, &hr_qp->dca_cfg); + ret = ida_alloc_max(&ctx->ida, ctx->max_qps - 1, GFP_KERNEL); + if (ret < 0) + return HNS_DCA_INVALID_DCA_NUM;
- /* Shrink kenrel DCA mem */ - if (!hr_qp->ibqp.uobject) - remove_unused_dca_mem(hr_dev); + stop_free_dca_buf(ctx, ret); + update_dca_buf_status(ctx, ret, false); + return ret; }
-void hns_roce_dca_detach(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, - struct hns_dca_detach_attr *attr) +static void free_dca_num(u32 dcan, struct hns_roce_dca_ctx *ctx) { - detach_dca_mem(hr_dev, hr_qp, attr); + if (dcan == HNS_DCA_INVALID_DCA_NUM) + return; + + ida_free(&ctx->ida, dcan); }
-void hns_roce_dca_kick(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, - struct ib_udata *udata) +static int setup_kdca(struct hns_roce_dca_cfg *cfg) { - struct hns_roce_ucontext *uctx = rdma_udata_to_drv_context(udata, - struct hns_roce_ucontext, ibucontext); + if (!cfg->npages) + return -EINVAL; + + cfg->buf_list = kcalloc(cfg->npages, sizeof(void *), GFP_KERNEL); + if (!cfg->buf_list) + return -ENOMEM;
- kick_dca_mem(hr_dev, &hr_qp->dca_cfg, uctx); + return 0; +} + +static void teardown_kdca(struct hns_roce_dca_cfg *cfg) +{ + kfree(cfg->buf_list); + cfg->buf_list = NULL; }
int hns_roce_enable_dca(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, @@ -1288,17 +1487,16 @@ int hns_roce_enable_dca(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct hns_roce_dca_cfg *cfg = &hr_qp->dca_cfg;
spin_lock_init(&cfg->lock); - INIT_DELAYED_WORK(&cfg->dwork, dca_mem_ageing_work); + INIT_LIST_HEAD(&cfg->aging_node); cfg->buf_id = HNS_DCA_INVALID_BUF_ID; cfg->npages = hr_qp->buff_size >> HNS_HW_PAGE_SHIFT; + cfg->dcan = HNS_DCA_INVALID_DCA_NUM; + /* Cannot support dynamic detach when rq is not empty */ + if (!hr_qp->rq.wqe_cnt) + hr_qp->en_flags |= HNS_ROCE_QP_CAP_DYNAMIC_CTX_DETACH;
- /* DCA page list for kernel QP */ - if (!udata && cfg->npages) { - cfg->buf_list = kcalloc(cfg->npages, sizeof(void *), - GFP_KERNEL); - if (!cfg->buf_list) - return -ENOMEM; - } + if (!udata) + return setup_kdca(cfg);
return 0; } @@ -1308,14 +1506,32 @@ void hns_roce_disable_dca(struct hns_roce_dev *hr_dev, { struct hns_roce_ucontext *uctx = rdma_udata_to_drv_context(udata, struct hns_roce_ucontext, ibucontext); + struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(hr_dev, uctx); struct hns_roce_dca_cfg *cfg = &hr_qp->dca_cfg;
- kick_dca_mem(hr_dev, cfg, uctx); + kick_dca_buf(hr_dev, cfg, ctx); + free_dca_num(cfg->dcan, ctx); + cfg->dcan = HNS_DCA_INVALID_DCA_NUM; + + if (!udata) + teardown_kdca(&hr_qp->dca_cfg); +} + +void hns_roce_modify_dca(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, + struct ib_udata *udata) +{ + struct hns_roce_ucontext *uctx = rdma_udata_to_drv_context(udata, + struct hns_roce_ucontext, ibucontext); + struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(hr_dev, uctx); + struct hns_roce_dca_cfg *cfg = &hr_qp->dca_cfg;
- /* Free kenrel DCA buffer list */ - if (!udata && cfg->buf_list) { - kfree(cfg->buf_list); - cfg->buf_list = NULL; + if (hr_qp->state == IB_QPS_RESET || hr_qp->state == IB_QPS_ERR) { + kick_dca_buf(hr_dev, cfg, ctx); + free_dca_num(cfg->dcan, ctx); + cfg->dcan = HNS_DCA_INVALID_DCA_NUM; + } else if (hr_qp->state == IB_QPS_RTR) { + free_dca_num(cfg->dcan, ctx); + cfg->dcan = alloc_dca_num(ctx); } }
@@ -1525,7 +1741,7 @@ static int UVERBS_HANDLER(HNS_IB_METHOD_DCA_MEM_DETACH)( if (ret) return ret;
- detach_dca_mem(to_hr_dev(hr_qp->ibqp.device), hr_qp, &attr); + hns_roce_dca_detach(to_hr_dev(hr_qp->ibqp.device), hr_qp, &attr);
return 0; } diff --git a/drivers/infiniband/hw/hns/hns_roce_dca.h b/drivers/infiniband/hw/hns/hns_roce_dca.h index 11bade706..7733887ce 100644 --- a/drivers/infiniband/hw/hns/hns_roce_dca.h +++ b/drivers/infiniband/hw/hns/hns_roce_dca.h @@ -19,6 +19,7 @@ struct hns_dca_page_state { extern const struct uapi_definition hns_roce_dca_uapi_defs[];
#define HNS_DCA_INVALID_BUF_ID 0UL +#define HNS_DCA_INVALID_DCA_NUM ~0U
/* * buffer id(29b) = tag(7b) + owner(22b) @@ -55,7 +56,7 @@ typedef int (*hns_dca_enum_callback)(struct hns_dca_page_state *, u32, void *); void hns_roce_init_dca(struct hns_roce_dev *hr_dev); void hns_roce_cleanup_dca(struct hns_roce_dev *hr_dev);
-void hns_roce_register_udca(struct hns_roce_dev *hr_dev, +void hns_roce_register_udca(struct hns_roce_dev *hr_dev, int max_qps, struct hns_roce_ucontext *uctx); void hns_roce_unregister_udca(struct hns_roce_dev *hr_dev, struct hns_roce_ucontext *uctx); @@ -69,9 +70,8 @@ int hns_roce_dca_attach(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct hns_dca_attach_attr *attr); void hns_roce_dca_detach(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct hns_dca_detach_attr *attr); - -void hns_roce_dca_kick(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, - struct ib_udata *udata); +void hns_roce_modify_dca(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, + struct ib_udata *udata);
void hns_roce_enum_dca_pool(struct hns_roce_dca_ctx *dca_ctx, void *param, hns_dca_enum_callback cb); diff --git a/drivers/infiniband/hw/hns/hns_roce_debugfs.c b/drivers/infiniband/hw/hns/hns_roce_debugfs.c index a649d5081..2e4b3503f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_debugfs.c +++ b/drivers/infiniband/hw/hns/hns_roce_debugfs.c @@ -264,7 +264,8 @@ static void dca_setup_qp_stats(struct hns_roce_qp *hr_qp, { struct hns_roce_ucontext *uctx = NULL;
- if (!(hr_qp->en_flags & HNS_ROCE_QP_CAP_DCA) || !hr_qp->ibqp.pd) + if (!(hr_qp->en_flags & HNS_ROCE_QP_CAP_DYNAMIC_CTX_ATTACH) || + !hr_qp->ibqp.pd) return;
if (hr_qp->ibqp.pd->uobject) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index c9bbbe4b6..851e397dc 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -208,7 +208,8 @@ struct hns_roce_uar { enum hns_roce_mmap_type { HNS_ROCE_MMAP_TYPE_DB = 1, HNS_ROCE_MMAP_TYPE_DWQE, - HNS_ROCE_MMAP_TYPE_RESET = 4, + HNS_ROCE_MMAP_TYPE_DCA, + HNS_ROCE_MMAP_TYPE_RESET, };
struct hns_user_mmap_entry { @@ -226,6 +227,21 @@ struct hns_roce_dca_ctx { size_t max_size; /* max size the pool can expand to */ size_t min_size; /* shrink if @free_size > @min_size */ unsigned int unit_size; /* unit size per DCA mem */ + + unsigned int max_qps; + unsigned int status_npage; + struct ida ida; + +#define HNS_DCA_BITS_PER_STATUS 1 + unsigned long *buf_status; + unsigned long *sync_status; + + bool exit_aging; + struct list_head aging_proc_list; + struct list_head aging_new_list; + spinlock_t aging_lock; + struct delayed_work aging_dwork; + struct hns_user_mmap_entry *dca_mmap_entry; };
struct hns_roce_ucontext { @@ -342,12 +358,14 @@ struct hns_roce_mtr { /* DCA config */ struct hns_roce_dca_cfg { spinlock_t lock; - u32 buf_id; u16 attach_count; + u32 buf_id; + u32 dcan; void **buf_list; u32 npages; u32 sq_idx; - struct delayed_work dwork; + bool aging_enable; + struct list_head aging_node; };
struct hns_roce_mw { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 44eba2b0f..36e707e48 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -371,9 +371,9 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, return 0; }
-static inline bool check_qp_dca_enable(struct hns_roce_qp *hr_qp) +static bool check_dca_attach_enable(struct hns_roce_qp *hr_qp) { - return !!(hr_qp->en_flags & HNS_ROCE_QP_CAP_DCA); + return hr_qp->en_flags & HNS_ROCE_QP_CAP_DYNAMIC_CTX_ATTACH; }
static int dca_attach_qp_buf(struct hns_roce_dev *hr_dev, @@ -407,6 +407,11 @@ static int dca_attach_qp_buf(struct hns_roce_dev *hr_dev, return hns_roce_dca_attach(hr_dev, hr_qp, &attr); }
+static bool check_dca_detach_enable(struct hns_roce_qp *hr_qp) +{ + return hr_qp->en_flags & HNS_ROCE_QP_CAP_DYNAMIC_CTX_DETACH; +} + static void dca_detach_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { @@ -446,7 +451,7 @@ static int check_send_valid(struct hns_roce_dev *hr_dev, return -EIO; }
- if (check_qp_dca_enable(hr_qp)) { + if (check_dca_attach_enable(hr_qp)) { ret = dca_attach_qp_buf(hr_dev, hr_qp); if (unlikely(ret)) { ibdev_err(ibdev, @@ -679,7 +684,7 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp, ret = set_rwqe_data_seg(&qp->ibqp, wr, rc_sq_wqe, &curr_idx, valid_num_sge);
- if (qp->en_flags & HNS_ROCE_QP_CAP_DCA) + if (qp->en_flags & HNS_ROCE_QP_CAP_DYNAMIC_CTX_ATTACH) fill_dca_fields(qp, rc_sq_wqe);
/* @@ -858,7 +863,7 @@ static int check_recv_valid(struct hns_roce_dev *hr_dev, if (hr_qp->state == IB_QPS_RESET) return -EINVAL;
- if (check_qp_dca_enable(hr_qp)) { + if (check_dca_attach_enable(hr_qp)) { ret = dca_attach_qp_buf(hr_dev, hr_qp); if (unlikely(ret)) { ibdev_err(&hr_dev->ib_dev, @@ -4191,7 +4196,7 @@ static int hns_roce_v2_poll_cq(struct ib_cq *ibcq, int num_entries,
for (npolled = 0; npolled < num_entries; ++npolled) { ret = hns_roce_v2_poll_one(hr_cq, &cur_qp, wc + npolled); - if (cur_qp && check_qp_dca_enable(cur_qp)) + if (cur_qp && check_dca_detach_enable(cur_qp)) dca_detach_qp_buf(hr_dev, cur_qp); if (ret) break; @@ -4757,7 +4762,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, hr_reg_clear(qpc_mask, QPC_TRRL_BA_H);
if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) { - if (hr_qp->en_flags & HNS_ROCE_QP_CAP_DCA) { + if (hr_qp->en_flags & HNS_ROCE_QP_CAP_DYNAMIC_CTX_ATTACH) { hr_reg_enable(context, QPC_DCA_MODE); hr_reg_clear(qpc_mask, QPC_DCA_MODE); } @@ -5530,9 +5535,8 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, if (new_state == IB_QPS_RESET && !ibqp->uobject) clear_qp(hr_qp);
- if (check_qp_dca_enable(hr_qp) && - (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR)) - hns_roce_dca_kick(hr_dev, hr_qp, udata); + if (check_dca_attach_enable(hr_qp)) + hns_roce_modify_dca(hr_dev, hr_qp, udata);
out: return ret; @@ -5803,12 +5807,6 @@ static bool hns_roce_v2_chk_dca_buf_inactive(struct hns_roce_dev *hr_dev, if (state == HNS_ROCE_QP_ST_ERR || state == HNS_ROCE_QP_ST_RST) return true;
- /* If RQ is not empty, the buffer is always active until the QP stops - * working. - */ - if (hr_qp->rq.wqe_cnt > 0) - return false; - if (hr_qp->sq.wqe_cnt > 0) { tmp = (u32)hr_reg_read(&context, QPC_RETRY_MSG_MSN); sq_idx = tmp & (hr_qp->sq.wqe_cnt - 1); diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index b6d10435c..9dc50ec62 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -36,6 +36,7 @@ #include <rdma/ib_smi.h> #include <rdma/ib_user_verbs.h> #include <rdma/ib_cache.h> +#include <rdma/uverbs_ioctl.h>
#include "hnae3.h" #include "hns_roce_common.h" @@ -394,6 +395,7 @@ hns_roce_user_mmap_entry_insert(struct ib_ucontext *ucontext, u64 address, break; case HNS_ROCE_MMAP_TYPE_DWQE: case HNS_ROCE_MMAP_TYPE_RESET: + case HNS_ROCE_MMAP_TYPE_DCA: ret = rdma_user_mmap_entry_insert_range( ucontext, &entry->rdma_entry, length, 1, U32_MAX); @@ -416,6 +418,9 @@ static void hns_roce_dealloc_uar_entry(struct hns_roce_ucontext *context) if (context->db_mmap_entry) rdma_user_mmap_entry_remove( &context->db_mmap_entry->rdma_entry); + if (context->dca_ctx.dca_mmap_entry) + rdma_user_mmap_entry_remove( + &context->dca_ctx.dca_mmap_entry->rdma_entry); }
static int hns_roce_alloc_uar_entry(struct ib_ucontext *uctx) @@ -487,6 +492,29 @@ static void ucontext_set_resp(struct ib_ucontext *uctx,
if (context->config & HNS_ROCE_UCTX_CONFIG_DCA) resp->config |= HNS_ROCE_UCTX_RSP_DCA_FLAGS; + + if (context->dca_ctx.dca_mmap_entry) { + resp->dca_qps = context->dca_ctx.max_qps; + resp->dca_mmap_size = PAGE_SIZE * context->dca_ctx.status_npage; + rdma_entry = &context->dca_ctx.dca_mmap_entry->rdma_entry; + resp->dca_mmap_key = rdma_user_mmap_get_offset(rdma_entry); + } +} + +static u32 get_udca_max_qps(struct hns_roce_dev *hr_dev, + struct hns_roce_ib_alloc_ucontext *ucmd) +{ + u32 qp_num; + + if (ucmd->comp & HNS_ROCE_ALLOC_UCTX_COMP_DCA_MAX_QPS) { + qp_num = ucmd->dca_max_qps; + if (!qp_num) + qp_num = hr_dev->caps.num_qps; + } else { + qp_num = 0; + } + + return qp_num; }
static void hns_roce_get_uctx_config(struct hns_roce_dev *hr_dev, @@ -545,12 +573,13 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, mutex_init(&context->page_mutex); }
- hns_roce_register_udca(hr_dev, context); - ret = hns_roce_alloc_reset_entry(uctx); if (ret) goto error_fail_reset_entry;
+ hns_roce_register_udca(hr_dev, get_udca_max_qps(hr_dev, &ucmd), + context); + ucontext_set_resp(uctx, &resp); ret = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp))); @@ -600,6 +629,36 @@ static void hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext) ida_free(&hr_dev->uar_ida.ida, (int)context->uar.logic_idx); }
+static int mmap_dca(struct ib_ucontext *context, struct vm_area_struct *vma) +{ + struct hns_roce_ucontext *uctx = to_hr_ucontext(context); + struct hns_roce_dca_ctx *ctx = &uctx->dca_ctx; + struct page **pages; + unsigned long num; + int ret; + + if ((vma->vm_end - vma->vm_start != (ctx->status_npage * PAGE_SIZE) || + !(vma->vm_flags & VM_SHARED))) + return -EINVAL; + + if (!(vma->vm_flags & VM_WRITE) || (vma->vm_flags & VM_EXEC)) + return -EPERM; + + if (!ctx->buf_status) + return -EOPNOTSUPP; + + pages = kcalloc(ctx->status_npage, sizeof(struct page *), GFP_KERNEL); + if (!pages) + return -ENOMEM; + + for (num = 0; num < ctx->status_npage; num++) + pages[num] = virt_to_page(ctx->buf_status + num * PAGE_SIZE); + + ret = vm_insert_pages(vma, vma->vm_start, pages, &num); + kfree(pages); + return ret; +} + static int hns_roce_mmap(struct ib_ucontext *uctx, struct vm_area_struct *vma) { struct hns_roce_dev *hr_dev = to_hr_dev(uctx->device); @@ -623,6 +682,9 @@ static int hns_roce_mmap(struct ib_ucontext *uctx, struct vm_area_struct *vma) case HNS_ROCE_MMAP_TYPE_DWQE: prot = pgprot_device(vma->vm_page_prot); break; + case HNS_ROCE_MMAP_TYPE_DCA: + ret = mmap_dca(uctx, vma); + goto out; case HNS_ROCE_MMAP_TYPE_RESET: if (vma->vm_flags & (VM_WRITE | VM_EXEC)) { ret = -EINVAL; diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 88d71fc1d..cc92a54d0 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -787,7 +787,7 @@ static int alloc_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, return ret; }
- hr_qp->en_flags |= HNS_ROCE_QP_CAP_DCA; + hr_qp->en_flags |= HNS_ROCE_QP_CAP_DYNAMIC_CTX_ATTACH; } else { /* * Because DCA and DWQE share the same fileds in RCWQE buffer, @@ -814,7 +814,7 @@ static void free_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, { hns_roce_mtr_destroy(hr_dev, &hr_qp->mtr);
- if (hr_qp->en_flags & HNS_ROCE_QP_CAP_DCA) + if (hr_qp->en_flags & HNS_ROCE_QP_CAP_DYNAMIC_CTX_ATTACH) hns_roce_disable_dca(hr_dev, hr_qp, udata); }
@@ -1460,22 +1460,17 @@ static int hns_roce_check_qp_attr(struct ib_qp *ibqp, struct ib_qp_attr *attr, return 0; }
-int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, - int attr_mask, struct ib_udata *udata) +static int check_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, enum ib_qp_state cur_state, + enum ib_qp_state new_state) { - struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); - struct hns_roce_ib_modify_qp_resp resp = {}; struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); - enum ib_qp_state cur_state, new_state; - int ret = -EINVAL; - - mutex_lock(&hr_qp->mutex); - - if (attr_mask & IB_QP_CUR_STATE && attr->cur_qp_state != hr_qp->state) - goto out; + int ret;
- cur_state = hr_qp->state; - new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; + if (attr_mask & IB_QP_CUR_STATE && attr->cur_qp_state != hr_qp->state) { + ibdev_err(ibqp->device, "failed to check modify curr state\n"); + return -EINVAL; + }
if (ibqp->uobject && (attr_mask & IB_QP_STATE) && new_state == IB_QPS_ERR) { @@ -1485,19 +1480,42 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (hr_qp->en_flags & HNS_ROCE_QP_CAP_RQ_RECORD_DB) hr_qp->rq.head = *(int *)(hr_qp->rdb.virt_addr); } else { - ibdev_warn(&hr_dev->ib_dev, + ibdev_warn(ibqp->device, "flush cqe is not supported in userspace!\n"); - goto out; + return -EINVAL; } }
if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) { - ibdev_err(&hr_dev->ib_dev, "ib_modify_qp_is_ok failed\n"); - goto out; + ibdev_err(ibqp->device, "failed to check modify qp state\n"); + return -EINVAL; }
ret = hns_roce_check_qp_attr(ibqp, attr, attr_mask); + if (ret) { + ibdev_err(ibqp->device, "failed to check modify qp attr\n"); + return ret; + } + + return 0; +} + +int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); + struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); + struct hns_roce_ib_modify_qp_resp resp = {}; + enum ib_qp_state cur_state, new_state; + int ret; + + mutex_lock(&hr_qp->mutex); + + cur_state = hr_qp->state; + new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; + + ret = check_modify_qp(ibqp, attr, attr_mask, cur_state, new_state); if (ret) goto out;
@@ -1512,6 +1530,7 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (udata && udata->outlen) { resp.tc_mode = hr_qp->tc_mode; resp.priority = hr_qp->sl; + resp.dcan = hr_qp->dca_cfg.dcan; ret = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp))); if (ret) @@ -1584,7 +1603,7 @@ static inline void *dca_buf_offset(struct hns_roce_dca_cfg *dca_cfg, u32 offset)
static inline void *get_wqe(struct hns_roce_qp *hr_qp, u32 offset) { - if (hr_qp->en_flags & HNS_ROCE_QP_CAP_DCA) + if (unlikely(hr_qp->dca_cfg.buf_list)) return dca_buf_offset(&hr_qp->dca_cfg, offset); else return hns_roce_buf_offset(hr_qp->mtr.kmem, offset); diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h index 31c9c3b43..c73ff42c4 100644 --- a/include/uapi/rdma/hns-abi.h +++ b/include/uapi/rdma/hns-abi.h @@ -102,8 +102,9 @@ enum hns_roce_qp_cap_flags { HNS_ROCE_QP_CAP_RQ_RECORD_DB = 1 << 0, HNS_ROCE_QP_CAP_SQ_RECORD_DB = 1 << 1, HNS_ROCE_QP_CAP_OWNER_DB = 1 << 2, - HNS_ROCE_QP_CAP_DCA = 1 << 4, + HNS_ROCE_QP_CAP_DYNAMIC_CTX_ATTACH = 1 << 4, HNS_ROCE_QP_CAP_DIRECT_WQE = 1 << 5, + HNS_ROCE_QP_CAP_DYNAMIC_CTX_DETACH = 1 << 6, };
struct hns_roce_ib_create_qp_resp { @@ -115,6 +116,8 @@ struct hns_roce_ib_modify_qp_resp { __u8 tc_mode; __u8 priority; __u8 reserved[6]; + __u32 dcan; + __u32 rsv2; };
enum { @@ -142,13 +145,21 @@ struct hns_roce_ib_alloc_ucontext_resp { __u32 max_inline_data; __u8 congest_type; __u8 reserved0[7]; - __aligned_u64 rsv_for_dca[2]; + __u32 dca_qps; + __u32 dca_mmap_size; + __aligned_u64 dca_mmap_key; __aligned_u64 reset_mmap_key; };
+enum hns_roce_uctx_comp_mask { + HNS_ROCE_ALLOC_UCTX_COMP_DCA_MAX_QPS = 1 << 0, +}; + struct hns_roce_ib_alloc_ucontext { __u32 config; __u32 reserved; + __u32 comp; /* use hns_roce_uctx_comp_mask */ + __u32 dca_max_qps; };
struct hns_roce_ib_alloc_pd_resp {
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I9CK0O
--------------------------------------------------------------------------
read_poll_timeout() in MBX may cause sleep, especially at reset, the probability becomes higher. In other words, it is not safe to use MBX in an atomic context.
In order to ensure the atomicity of QPC setup, DCA will use locks to protect the QPC setup operation in DCA ATTACH_MEM phase(i.e. post_send/post_recv). This results in the above-mentioned problem at reset.
Replace read_poll_timeout() with read_poll_timeout_atomic() to avoid MBX operation sleep in an atomic context().
Fixes: 306b8c76257b ("RDMA/hns: Do not destroy QP resources in the hw resetting phase") Signed-off-by: Chengchang Tang tangchengchang@huawei.com Signed-off-by: Juan Zhou zhoujuan51@h-partners.com --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 36e707e48..f06079352 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1130,7 +1130,7 @@ static u32 hns_roce_v2_cmd_hw_resetting(struct hns_roce_dev *hr_dev, unsigned long reset_stage) { #define HW_RESET_TIMEOUT_US 1000000 -#define HW_RESET_SLEEP_US 1000 +#define HW_RESET_DELAY_US 1
struct hns_roce_v2_priv *priv = hr_dev->priv; struct hnae3_handle *handle = priv->handle; @@ -1149,8 +1149,8 @@ static u32 hns_roce_v2_cmd_hw_resetting(struct hns_roce_dev *hr_dev, */ hr_dev->dis_db = true;
- ret = read_poll_timeout(ops->ae_dev_reset_cnt, val, - val > hr_dev->reset_cnt, HW_RESET_SLEEP_US, + ret = read_poll_timeout_atomic(ops->ae_dev_reset_cnt, val, + val > hr_dev->reset_cnt, HW_RESET_DELAY_US, HW_RESET_TIMEOUT_US, false, handle); if (!ret) hr_dev->is_reset = true;
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I9CK0O
--------------------------------------------------------------------------
Currently, compiling DCA strongly relies on ib_uverbs. If ib_uverbs is not configured, it will cause the entire hns-roce-hw-v2 compilation to fail.
This patch isolates the parts of the DCA code that rely on ib_uverbs to fix this problem.
Fixes: bca9ff271249 ("RDMA/hns: Add method for shrinking DCA memory pool") Fixes: d8cca476a8d2 ("RDMA/hns: Add method for attaching WQE buffer") Fixes: 0273952c5e6e ("RDMA/hns: Add method to detach WQE buffer") Fixes: f0384ddcf1ee ("RDMA/hns: Add method to query WQE buffer's address") Signed-off-by: Chengchang Tang tangchengchang@huawei.com --- drivers/infiniband/hw/hns/hns_roce_dca.c | 72 +++++++++++++----------- 1 file changed, 40 insertions(+), 32 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_dca.c b/drivers/infiniband/hw/hns/hns_roce_dca.c index 4d3e52dd5..49adf4326 100644 --- a/drivers/infiniband/hw/hns/hns_roce_dca.c +++ b/drivers/infiniband/hw/hns/hns_roce_dca.c @@ -604,38 +604,6 @@ struct dca_page_query_active_attr { u64 mem_key; };
-static int query_dca_active_pages_proc(struct dca_mem *mem, int index, - void *param) -{ - struct hns_dca_page_state *state = &mem->states[index]; - struct dca_page_query_active_attr *attr = param; - - if (!dca_page_is_active(state, attr->buf_id)) - return 0; - - if (attr->curr_index < attr->start_index) { - attr->curr_index++; - return 0; - } else if (attr->curr_index > attr->start_index) { - return DCA_MEM_STOP_ITERATE; - } - - /* Search first page in DCA mem */ - attr->page_index = index; - attr->mem_key = mem->key; - /* Search active pages in continuous addresses */ - while (index < mem->page_count) { - state = &mem->states[index]; - if (!dca_page_is_active(state, attr->buf_id)) - break; - - index++; - attr->page_count++; - } - - return DCA_MEM_STOP_ITERATE; -} - static int sync_dca_buf_offset(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct hns_dca_attach_attr *attr) @@ -1542,6 +1510,7 @@ uverbs_attr_to_hr_uctx(struct uverbs_attr_bundle *attrs) struct hns_roce_ucontext, ibucontext); }
+#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) static int UVERBS_HANDLER(HNS_IB_METHOD_DCA_MEM_REG)( struct uverbs_attr_bundle *attrs) { @@ -1753,6 +1722,38 @@ DECLARE_UVERBS_NAMED_METHOD( UVERBS_ATTR_PTR_IN(HNS_IB_ATTR_DCA_MEM_DETACH_SQ_INDEX, UVERBS_ATTR_TYPE(u32), UA_MANDATORY));
+static int query_dca_active_pages_proc(struct dca_mem *mem, int index, + void *param) +{ + struct hns_dca_page_state *state = &mem->states[index]; + struct dca_page_query_active_attr *attr = param; + + if (!dca_page_is_active(state, attr->buf_id)) + return 0; + + if (attr->curr_index < attr->start_index) { + attr->curr_index++; + return 0; + } else if (attr->curr_index > attr->start_index) { + return DCA_MEM_STOP_ITERATE; + } + + /* Search first page in DCA mem */ + attr->page_index = index; + attr->mem_key = mem->key; + /* Search active pages in continuous addresses */ + while (index < mem->page_count) { + state = &mem->states[index]; + if (!dca_page_is_active(state, attr->buf_id)) + break; + + index++; + attr->page_count++; + } + + return DCA_MEM_STOP_ITERATE; +} + static int UVERBS_HANDLER(HNS_IB_METHOD_DCA_MEM_QUERY)( struct uverbs_attr_bundle *attrs) { @@ -1836,6 +1837,13 @@ const struct uapi_definition hns_roce_dca_uapi_defs[] = { {} };
+#else + +const struct uapi_definition hns_roce_dca_uapi_defs[] = { +}; + +#endif + /* enum DCA pool */ struct dca_mem_enum_attr { void *param;