From: Chengchang Tang tangchengchang@huawei.com
driver inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I63KVU
----------------------------------------------------------
Use the shared memory to store the DCA status by getting the max qp num from uctx alloc param.
Signed-off-by: Chengchang Tang tangchengchang@huawei.com Reviewed-by: Yangyang Li liyangyang20@huawei.com Reviewed-by: YueHaibing yuehaibing@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- drivers/infiniband/hw/hns/hns_roce_dca.c | 338 +++++++++++++++---- drivers/infiniband/hw/hns/hns_roce_dca.h | 8 +- drivers/infiniband/hw/hns/hns_roce_debugfs.c | 3 +- drivers/infiniband/hw/hns/hns_roce_device.h | 22 +- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 30 +- drivers/infiniband/hw/hns/hns_roce_main.c | 66 +++- drivers/infiniband/hw/hns/hns_roce_qp.c | 59 ++-- include/uapi/rdma/hns-abi.h | 14 +- 8 files changed, 435 insertions(+), 105 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_dca.c b/drivers/infiniband/hw/hns/hns_roce_dca.c index 2c13c619ee32..d79c90ef33ed 100644 --- a/drivers/infiniband/hw/hns/hns_roce_dca.c +++ b/drivers/infiniband/hw/hns/hns_roce_dca.c @@ -143,7 +143,7 @@ static void *alloc_dca_pages(struct hns_roce_dev *hr_dev, bool is_user, }
mem->page_count = kmem->npages; - /* Override the attr->size by actually alloced size */ + /* Overwrite the attr->size by actually alloced size */ attr->size = kmem->ntrunks << kmem->trunk_shift; return kmem;
@@ -731,6 +731,72 @@ static int active_alloced_buf(struct hns_roce_qp *hr_qp, return ret; }
+#define DCAN_TO_SYNC_BIT(n) ((n) * HNS_DCA_BITS_PER_STATUS) +#define DCAN_TO_STAT_BIT(n) DCAN_TO_SYNC_BIT(n) +static bool start_free_dca_buf(struct hns_roce_dca_ctx *ctx, u32 dcan) +{ + unsigned long *st = ctx->sync_status; + + if (st && dcan < ctx->max_qps) + return !test_and_set_bit_lock(DCAN_TO_SYNC_BIT(dcan), st); + + return true; +} + +static void stop_free_dca_buf(struct hns_roce_dca_ctx *ctx, u32 dcan) +{ + unsigned long *st = ctx->sync_status; + + if (st && dcan < ctx->max_qps) + clear_bit_unlock(DCAN_TO_SYNC_BIT(dcan), st); +} + +static void update_dca_buf_status(struct hns_roce_dca_ctx *ctx, u32 dcan, + bool en) +{ + unsigned long *st = ctx->buf_status; + + if (st && dcan < ctx->max_qps) { + if (en) + set_bit(DCAN_TO_STAT_BIT(dcan), st); + else + clear_bit(DCAN_TO_STAT_BIT(dcan), st); + + /* sync status with user-space rdma */ + smp_mb__after_atomic(); + } +} + +static void restart_aging_dca_mem(struct hns_roce_dev *hr_dev, + struct hns_roce_dca_ctx *ctx) +{ + spin_lock(&ctx->aging_lock); + ctx->exit_aging = false; + if (!list_empty(&ctx->aging_new_list)) + queue_delayed_work(hr_dev->irq_workq, &ctx->aging_dwork, + msecs_to_jiffies(DCA_MEM_AGEING_MSES)); + + spin_unlock(&ctx->aging_lock); +} + +static void stop_aging_dca_mem(struct hns_roce_dca_ctx *ctx, + struct hns_roce_dca_cfg *cfg, bool stop_worker) +{ + spin_lock(&ctx->aging_lock); + if (stop_worker) { + ctx->exit_aging = true; + cancel_delayed_work(&ctx->aging_dwork); + } + + spin_lock(&cfg->lock); + + if (!list_empty(&cfg->aging_node)) + list_del_init(&cfg->aging_node); + + spin_unlock(&cfg->lock); + spin_unlock(&ctx->aging_lock); +} + static int attach_dca_mem(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct hns_dca_attach_attr *attr, @@ -741,8 +807,8 @@ static int attach_dca_mem(struct hns_roce_dev *hr_dev, u32 buf_id; int ret;
- /* Stop DCA mem ageing worker */ - cancel_delayed_work(&cfg->dwork); + if (hr_qp->en_flags & HNS_ROCE_QP_CAP_DYNAMIC_CTX_DETACH) + stop_aging_dca_mem(ctx, cfg, false); resp->alloc_flags = 0;
spin_lock(&cfg->lock); @@ -779,6 +845,7 @@ static int attach_dca_mem(struct hns_roce_dev *hr_dev,
resp->alloc_flags |= HNS_DCA_ATTACH_FLAGS_NEW_BUFFER; resp->alloc_pages = cfg->npages; + update_dca_buf_status(ctx, cfg->dcan, true);
return 0; } @@ -831,6 +898,7 @@ static void free_buf_from_dca_mem(struct hns_roce_dca_ctx *ctx, unsigned long flags; u32 buf_id;
+ update_dca_buf_status(ctx, cfg->dcan, false); spin_lock(&cfg->lock); buf_id = cfg->buf_id; cfg->buf_id = HNS_DCA_INVALID_BUF_ID; @@ -849,19 +917,22 @@ static void free_buf_from_dca_mem(struct hns_roce_dca_ctx *ctx, spin_unlock_irqrestore(&ctx->pool_lock, flags); }
-static void detach_dca_mem(struct hns_roce_dev *hr_dev, - struct hns_roce_qp *hr_qp, - struct hns_dca_detach_attr *attr) +void hns_roce_dca_detach(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, + struct hns_dca_detach_attr *attr) { + struct hns_roce_dca_ctx *ctx = hr_qp_to_dca_ctx(hr_dev, hr_qp); struct hns_roce_dca_cfg *cfg = &hr_qp->dca_cfg;
- /* Start an ageing worker to free buffer */ - cancel_delayed_work(&cfg->dwork); + stop_aging_dca_mem(ctx, cfg, true); + + spin_lock(&ctx->aging_lock); spin_lock(&cfg->lock); cfg->sq_idx = attr->sq_idx; - queue_delayed_work(hr_dev->irq_workq, &cfg->dwork, - msecs_to_jiffies(DCA_MEM_AGEING_MSES)); + list_add_tail(&cfg->aging_node, &ctx->aging_new_list); spin_unlock(&cfg->lock); + spin_unlock(&ctx->aging_lock); + + restart_aging_dca_mem(hr_dev, ctx); }
struct dca_mem_shrink_attr { @@ -924,11 +995,87 @@ static void shrink_dca_mem(struct hns_roce_dev *hr_dev, resp->free_key = attr.shrink_key; }
-static void init_dca_context(struct hns_roce_dca_ctx *ctx) +static void process_aging_dca_mem(struct hns_roce_dev *hr_dev, + struct hns_roce_dca_ctx *ctx) +{ + struct hns_roce_dca_cfg *cfg, *tmp_cfg; + struct hns_roce_qp *hr_qp; + + spin_lock(&ctx->aging_lock); + list_for_each_entry_safe(cfg, tmp_cfg, &ctx->aging_new_list, aging_node) + list_move(&cfg->aging_node, &ctx->aging_proc_list); + + while (!ctx->exit_aging && !list_empty(&ctx->aging_proc_list)) { + cfg = list_first_entry(&ctx->aging_proc_list, + struct hns_roce_dca_cfg, aging_node); + list_del_init_careful(&cfg->aging_node); + hr_qp = container_of(cfg, struct hns_roce_qp, dca_cfg); + spin_unlock(&ctx->aging_lock); + + if (start_free_dca_buf(ctx, cfg->dcan)) { + if (hr_dev->hw->chk_dca_buf_inactive(hr_dev, hr_qp)) + free_buf_from_dca_mem(ctx, cfg); + + stop_free_dca_buf(ctx, cfg->dcan); + } + + spin_lock(&ctx->aging_lock); + + spin_lock(&cfg->lock); + + if (cfg->buf_id != HNS_DCA_INVALID_BUF_ID) + list_move(&cfg->aging_node, &ctx->aging_new_list); + + spin_unlock(&cfg->lock); + } + spin_unlock(&ctx->aging_lock); +} + +static void udca_mem_aging_work(struct work_struct *work) +{ + struct hns_roce_dca_ctx *ctx = container_of(work, + struct hns_roce_dca_ctx, aging_dwork.work); + struct hns_roce_ucontext *uctx = container_of(ctx, + struct hns_roce_ucontext, dca_ctx); + struct hns_roce_dev *hr_dev = to_hr_dev(uctx->ibucontext.device); + + cancel_delayed_work(&ctx->aging_dwork); + process_aging_dca_mem(hr_dev, ctx); + if (!ctx->exit_aging) + restart_aging_dca_mem(hr_dev, ctx); +} + +static void remove_unused_dca_mem(struct hns_roce_dev *hr_dev); + +static void kdca_mem_aging_work(struct work_struct *work) +{ + struct hns_roce_dca_ctx *ctx = container_of(work, + struct hns_roce_dca_ctx, aging_dwork.work); + struct hns_roce_dev *hr_dev = container_of(ctx, struct hns_roce_dev, + dca_ctx); + + cancel_delayed_work(&ctx->aging_dwork); + process_aging_dca_mem(hr_dev, ctx); + remove_unused_dca_mem(hr_dev); + if (!ctx->exit_aging) + restart_aging_dca_mem(hr_dev, ctx); +} + +static void init_dca_context(struct hns_roce_dca_ctx *ctx, bool is_user) { INIT_LIST_HEAD(&ctx->pool); spin_lock_init(&ctx->pool_lock); ctx->total_size = 0; + + ida_init(&ctx->ida); + INIT_LIST_HEAD(&ctx->aging_new_list); + INIT_LIST_HEAD(&ctx->aging_proc_list); + spin_lock_init(&ctx->aging_lock); + ctx->exit_aging = false; + if (is_user) + INIT_DELAYED_WORK(&ctx->aging_dwork, udca_mem_aging_work); + else + INIT_DELAYED_WORK(&ctx->aging_dwork, kdca_mem_aging_work); }
static void cleanup_dca_context(struct hns_roce_dev *hr_dev, @@ -938,6 +1085,10 @@ static void cleanup_dca_context(struct hns_roce_dev *hr_dev, unsigned long flags; bool is_user;
+ spin_lock(&ctx->aging_lock); + cancel_delayed_work_sync(&ctx->aging_dwork); + spin_unlock(&ctx->aging_lock); + is_user = (ctx != &hr_dev->dca_ctx); spin_lock_irqsave(&ctx->pool_lock, flags); list_for_each_entry_safe(mem, tmp, &ctx->pool, list) { @@ -963,7 +1114,7 @@ static uint dca_unit_size; static ulong dca_min_size = DCA_MAX_MEM_SIZE; static ulong dca_max_size = DCA_MAX_MEM_SIZE;
-static void config_kdca_context(struct hns_roce_dca_ctx *ctx) +static void load_kdca_param(struct hns_roce_dca_ctx *ctx) { unsigned int unit_size;
@@ -985,9 +1136,8 @@ static void config_kdca_context(struct hns_roce_dca_ctx *ctx)
void hns_roce_init_dca(struct hns_roce_dev *hr_dev) { - init_dca_context(&hr_dev->dca_ctx); - - config_kdca_context(&hr_dev->dca_ctx); + load_kdca_param(&hr_dev->dca_ctx); + init_dca_context(&hr_dev->dca_ctx, false); }
void hns_roce_cleanup_dca(struct hns_roce_dev *hr_dev) @@ -995,22 +1145,68 @@ void hns_roce_cleanup_dca(struct hns_roce_dev *hr_dev) cleanup_dca_context(hr_dev, &hr_dev->dca_ctx); }
-void hns_roce_register_udca(struct hns_roce_dev *hr_dev, +static void init_udca_status(struct hns_roce_ucontext *uctx, int udca_max_qps, + unsigned int dev_max_qps) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(uctx->ibucontext.device); + const unsigned int bits_per_qp = 2 * HNS_DCA_BITS_PER_STATUS; + struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(hr_dev, uctx); + struct ib_ucontext *ib_uctx = &uctx->ibucontext; + void *kaddr; + size_t size; + + size = BITS_TO_BYTES(udca_max_qps * bits_per_qp); + ctx->status_npage = DIV_ROUND_UP(size, PAGE_SIZE); + + size = ctx->status_npage * PAGE_SIZE; + ctx->max_qps = min_t(unsigned int, dev_max_qps, + size * BITS_PER_BYTE / bits_per_qp); + + kaddr = alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO); + if (!kaddr) + return; + + ctx->dca_mmap_entry = hns_roce_user_mmap_entry_insert(ib_uctx, + (u64)kaddr, size, HNS_ROCE_MMAP_TYPE_DCA); + if (!ctx->dca_mmap_entry) { + free_pages_exact(kaddr, size); + return; + } + + ctx->buf_status = (unsigned long *)kaddr; + ctx->sync_status = (unsigned long *)(kaddr + size / 2); +} + +void hns_roce_register_udca(struct hns_roce_dev *hr_dev, int max_qps, struct hns_roce_ucontext *uctx) { + struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(hr_dev, uctx); + if (!(uctx->config & HNS_ROCE_UCTX_CONFIG_DCA)) return;
- init_dca_context(&uctx->dca_ctx); + init_dca_context(ctx, true); + if (max_qps > 0) + init_udca_status(uctx, max_qps, hr_dev->caps.num_qps); }
void hns_roce_unregister_udca(struct hns_roce_dev *hr_dev, struct hns_roce_ucontext *uctx) { + struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(hr_dev, uctx); + if (!(uctx->config & HNS_ROCE_UCTX_CONFIG_DCA)) return;
- cleanup_dca_context(hr_dev, &uctx->dca_ctx); + cleanup_dca_context(hr_dev, ctx); + + if (ctx->buf_status) { + free_pages_exact(ctx->buf_status, + ctx->status_npage * PAGE_SIZE); + ctx->buf_status = NULL; + } + + ida_destroy(&ctx->ida); }
static struct dca_mem *key_to_dca_mem(struct list_head *head, u64 key) @@ -1227,6 +1423,7 @@ static void remove_unused_dca_mem(struct hns_roce_dev *hr_dev) spin_unlock_irqrestore(&ctx->pool_lock, flags); if (!mem) break; + unregister_dca_mem(hr_dev, NULL, mem); free_dca_mem(mem); /* No more free memory */ @@ -1235,52 +1432,56 @@ static void remove_unused_dca_mem(struct hns_roce_dev *hr_dev) } }
-static void kick_dca_mem(struct hns_roce_dev *hr_dev, +static void kick_dca_buf(struct hns_roce_dev *hr_dev, struct hns_roce_dca_cfg *cfg, - struct hns_roce_ucontext *uctx) + struct hns_roce_dca_ctx *ctx) { - struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(hr_dev, uctx); - - /* Stop ageing worker and free DCA buffer from pool */ - cancel_delayed_work_sync(&cfg->dwork); + stop_aging_dca_mem(ctx, cfg, true); free_buf_from_dca_mem(ctx, cfg); + restart_aging_dca_mem(hr_dev, ctx);
/* Shrink kenrel DCA mem */ - if (!uctx) + if (ctx == &hr_dev->dca_ctx) remove_unused_dca_mem(hr_dev); }
-static void dca_mem_ageing_work(struct work_struct *work) +static u32 alloc_dca_num(struct hns_roce_dca_ctx *ctx) { - struct hns_roce_qp *hr_qp = container_of(work, struct hns_roce_qp, - dca_cfg.dwork.work); - struct hns_roce_dev *hr_dev = to_hr_dev(hr_qp->ibqp.device); - struct hns_roce_dca_ctx *ctx = hr_qp_to_dca_ctx(hr_dev, hr_qp); - bool hw_is_inactive; + int ret;
- hw_is_inactive = hr_dev->hw->chk_dca_buf_inactive && - hr_dev->hw->chk_dca_buf_inactive(hr_dev, hr_qp); - if (hw_is_inactive) - free_buf_from_dca_mem(ctx, &hr_qp->dca_cfg); + ret = ida_alloc_max(&ctx->ida, ctx->max_qps - 1, GFP_KERNEL); + if (ret < 0) + return HNS_DCA_INVALID_DCA_NUM;
- /* Shrink kenrel DCA mem */ - if (!hr_qp->ibqp.uobject) - remove_unused_dca_mem(hr_dev); + stop_free_dca_buf(ctx, ret); + update_dca_buf_status(ctx, ret, false); + return ret; }
-void hns_roce_dca_detach(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, - struct hns_dca_detach_attr *attr) +static void free_dca_num(u32 dcan, struct hns_roce_dca_ctx *ctx) { - detach_dca_mem(hr_dev, hr_qp, attr); + if (dcan == HNS_DCA_INVALID_DCA_NUM) + return; + + ida_free(&ctx->ida, dcan); }
-void hns_roce_dca_kick(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, - struct ib_udata *udata) +static int setup_kdca(struct hns_roce_dca_cfg *cfg) { - struct hns_roce_ucontext *uctx = rdma_udata_to_drv_context(udata, - struct hns_roce_ucontext, ibucontext); + if (!cfg->npages) + return -EINVAL; + + cfg->buf_list = kcalloc(cfg->npages, sizeof(void *), GFP_KERNEL); + if (!cfg->buf_list) + return -ENOMEM;
- kick_dca_mem(hr_dev, &hr_qp->dca_cfg, uctx); + return 0; +} + +static void teardown_kdca(struct hns_roce_dca_cfg *cfg) +{ + kfree(cfg->buf_list); + cfg->buf_list = NULL; }
int hns_roce_enable_dca(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, @@ -1289,17 +1490,16 @@ int hns_roce_enable_dca(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct hns_roce_dca_cfg *cfg = &hr_qp->dca_cfg;
spin_lock_init(&cfg->lock); - INIT_DELAYED_WORK(&cfg->dwork, dca_mem_ageing_work); + INIT_LIST_HEAD(&cfg->aging_node); cfg->buf_id = HNS_DCA_INVALID_BUF_ID; cfg->npages = hr_qp->buff_size >> HNS_HW_PAGE_SHIFT; + cfg->dcan = HNS_DCA_INVALID_DCA_NUM; + /* Cannot support dynamic detach when rq is not empty */ + if (!hr_qp->rq.wqe_cnt) + hr_qp->en_flags |= HNS_ROCE_QP_CAP_DYNAMIC_CTX_DETACH;
- /* DCA page list for kernel QP */ - if (!udata && cfg->npages) { - cfg->buf_list = kcalloc(cfg->npages, sizeof(void *), - GFP_KERNEL); - if (!cfg->buf_list) - return -ENOMEM; - } + if (!udata) + return setup_kdca(cfg);
return 0; } @@ -1309,14 +1509,32 @@ void hns_roce_disable_dca(struct hns_roce_dev *hr_dev, { struct hns_roce_ucontext *uctx = rdma_udata_to_drv_context(udata, struct hns_roce_ucontext, ibucontext); + struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(hr_dev, uctx); struct hns_roce_dca_cfg *cfg = &hr_qp->dca_cfg;
- kick_dca_mem(hr_dev, cfg, uctx); + kick_dca_buf(hr_dev, cfg, ctx); + free_dca_num(cfg->dcan, ctx); + cfg->dcan = HNS_DCA_INVALID_DCA_NUM; + + if (!udata) + teardown_kdca(&hr_qp->dca_cfg); +} + +void hns_roce_modify_dca(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, + struct ib_udata *udata) +{ + struct hns_roce_ucontext *uctx = rdma_udata_to_drv_context(udata, + struct hns_roce_ucontext, ibucontext); + struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(hr_dev, uctx); + struct hns_roce_dca_cfg *cfg = &hr_qp->dca_cfg;
- /* Free kenrel DCA buffer list */ - if (!udata && cfg->buf_list) { - kfree(cfg->buf_list); - cfg->buf_list = NULL; + if (hr_qp->state == IB_QPS_RESET || hr_qp->state == IB_QPS_ERR) { + kick_dca_buf(hr_dev, cfg, ctx); + free_dca_num(cfg->dcan, ctx); + cfg->dcan = HNS_DCA_INVALID_DCA_NUM; + } else if (hr_qp->state == IB_QPS_RTR) { + free_dca_num(cfg->dcan, ctx); + cfg->dcan = alloc_dca_num(ctx); } }
@@ -1520,7 +1738,7 @@ static int UVERBS_HANDLER(HNS_IB_METHOD_DCA_MEM_DETACH)( if (ret) return ret;
- detach_dca_mem(to_hr_dev(hr_qp->ibqp.device), hr_qp, &attr); + hns_roce_dca_detach(to_hr_dev(hr_qp->ibqp.device), hr_qp, &attr);
return 0; } diff --git a/drivers/infiniband/hw/hns/hns_roce_dca.h b/drivers/infiniband/hw/hns/hns_roce_dca.h index 11bade706bd7..7733887ce5e1 100644 --- a/drivers/infiniband/hw/hns/hns_roce_dca.h +++ b/drivers/infiniband/hw/hns/hns_roce_dca.h @@ -19,6 +19,7 @@ struct hns_dca_page_state { extern const struct uapi_definition hns_roce_dca_uapi_defs[];
#define HNS_DCA_INVALID_BUF_ID 0UL +#define HNS_DCA_INVALID_DCA_NUM ~0U
/* * buffer id(29b) = tag(7b) + owner(22b) @@ -55,7 +56,7 @@ typedef int (*hns_dca_enum_callback)(struct hns_dca_page_state *, u32, void *); void hns_roce_init_dca(struct hns_roce_dev *hr_dev); void hns_roce_cleanup_dca(struct hns_roce_dev *hr_dev);
-void hns_roce_register_udca(struct hns_roce_dev *hr_dev, +void hns_roce_register_udca(struct hns_roce_dev *hr_dev, int max_qps, struct hns_roce_ucontext *uctx); void hns_roce_unregister_udca(struct hns_roce_dev *hr_dev, struct hns_roce_ucontext *uctx); @@ -69,9 +70,8 @@ int hns_roce_dca_attach(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct hns_dca_attach_attr *attr); void hns_roce_dca_detach(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct hns_dca_detach_attr *attr); - -void hns_roce_dca_kick(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, - struct ib_udata *udata); +void hns_roce_modify_dca(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, + struct ib_udata *udata);
void hns_roce_enum_dca_pool(struct hns_roce_dca_ctx *dca_ctx, void *param, hns_dca_enum_callback cb); diff --git a/drivers/infiniband/hw/hns/hns_roce_debugfs.c b/drivers/infiniband/hw/hns/hns_roce_debugfs.c index eedb24ee103e..cacdeb4d9fad 100644 --- a/drivers/infiniband/hw/hns/hns_roce_debugfs.c +++ b/drivers/infiniband/hw/hns/hns_roce_debugfs.c @@ -259,7 +259,8 @@ static void dca_setup_qp_stats(struct hns_roce_qp *hr_qp, { struct hns_roce_ucontext *uctx = NULL;
- if (!(hr_qp->en_flags & HNS_ROCE_QP_CAP_DCA) || !hr_qp->ibqp.pd) + if (!(hr_qp->en_flags & HNS_ROCE_QP_CAP_DYNAMIC_CTX_ATTACH) || + !hr_qp->ibqp.pd) return;
if (hr_qp->ibqp.pd->uobject) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index e8ba256a1885..5b5f6c5920f1 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -205,6 +205,7 @@ struct hns_roce_uar { enum hns_roce_mmap_type { HNS_ROCE_MMAP_TYPE_DB = 1, HNS_ROCE_MMAP_TYPE_DWQE, + HNS_ROCE_MMAP_TYPE_DCA, };
struct hns_user_mmap_entry { @@ -222,6 +223,21 @@ struct hns_roce_dca_ctx { size_t max_size; /* max size the pool can expand to */ size_t min_size; /* shrink if @free_size > @min_size */ unsigned int unit_size; /* unit size per DCA mem */ + + unsigned int max_qps; + unsigned int status_npage; + struct ida ida; + +#define HNS_DCA_BITS_PER_STATUS 1 + unsigned long *buf_status; + unsigned long *sync_status; + + bool exit_aging; + struct list_head aging_proc_list; + struct list_head aging_new_list; + spinlock_t aging_lock; + struct delayed_work aging_dwork; + struct hns_user_mmap_entry *dca_mmap_entry; };
struct hns_roce_ucontext { @@ -332,12 +348,14 @@ struct hns_roce_mtr { /* DCA config */ struct hns_roce_dca_cfg { spinlock_t lock; - u32 buf_id; u16 attach_count; + u32 buf_id; + u32 dcan; void **buf_list; u32 npages; u32 sq_idx; - struct delayed_work dwork; + bool aging_enable; + struct list_head aging_node; };
struct hns_roce_mw { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index dfdbcaf6508c..728f860ffc99 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -372,9 +372,9 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, return 0; }
-static inline bool check_qp_dca_enable(struct hns_roce_qp *hr_qp) +static bool check_dca_attach_enable(struct hns_roce_qp *hr_qp) { - return !!(hr_qp->en_flags & HNS_ROCE_QP_CAP_DCA); + return hr_qp->en_flags & HNS_ROCE_QP_CAP_DYNAMIC_CTX_ATTACH; }
static int dca_attach_qp_buf(struct hns_roce_dev *hr_dev, @@ -408,6 +408,11 @@ static int dca_attach_qp_buf(struct hns_roce_dev *hr_dev, return hns_roce_dca_attach(hr_dev, hr_qp, &attr); }
+static bool check_dca_detach_enable(struct hns_roce_qp *hr_qp) +{ + return hr_qp->en_flags & HNS_ROCE_QP_CAP_DYNAMIC_CTX_DETACH; +} + static void dca_detach_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { @@ -454,7 +459,7 @@ static int check_send_valid(struct hns_roce_dev *hr_dev, return -EIO; }
- if (check_qp_dca_enable(hr_qp)) { + if (check_dca_attach_enable(hr_qp)) { ret = dca_attach_qp_buf(hr_dev, hr_qp); if (unlikely(ret)) { ibdev_err(&hr_dev->ib_dev, @@ -694,7 +699,7 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp, ret = set_rwqe_data_seg(&qp->ibqp, wr, rc_sq_wqe, &curr_idx, valid_num_sge);
- if (qp->en_flags & HNS_ROCE_QP_CAP_DCA) + if (qp->en_flags & HNS_ROCE_QP_CAP_DYNAMIC_CTX_ATTACH) fill_dca_fields(qp, rc_sq_wqe);
/* @@ -883,7 +888,7 @@ static int check_recv_valid(struct hns_roce_dev *hr_dev, if (hr_qp->state == IB_QPS_RESET) return -EINVAL;
- if (check_qp_dca_enable(hr_qp)) { + if (check_dca_attach_enable(hr_qp)) { ret = dca_attach_qp_buf(hr_dev, hr_qp); if (unlikely(ret)) { ibdev_err(ibdev, @@ -4272,7 +4277,7 @@ static int hns_roce_v2_poll_cq(struct ib_cq *ibcq, int num_entries,
for (npolled = 0; npolled < num_entries; ++npolled) { ret = hns_roce_v2_poll_one(hr_cq, &cur_qp, wc + npolled); - if (cur_qp && check_qp_dca_enable(cur_qp)) + if (cur_qp && check_dca_detach_enable(cur_qp)) dca_detach_qp_buf(hr_dev, cur_qp); if (ret) break; @@ -4842,7 +4847,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, hr_reg_clear(qpc_mask, QPC_TRRL_BA_H);
if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) { - if (hr_qp->en_flags & HNS_ROCE_QP_CAP_DCA) { + if (hr_qp->en_flags & HNS_ROCE_QP_CAP_DYNAMIC_CTX_ATTACH) { hr_reg_enable(context, QPC_DCA_MODE); hr_reg_clear(qpc_mask, QPC_DCA_MODE); } @@ -5620,9 +5625,8 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, if (new_state == IB_QPS_RESET && !ibqp->uobject) clear_qp(hr_qp);
- if (check_qp_dca_enable(hr_qp) && - (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR)) - hns_roce_dca_kick(hr_dev, hr_qp, udata); + if (check_dca_attach_enable(hr_qp)) + hns_roce_modify_dca(hr_dev, hr_qp, udata);
out: return ret; @@ -5831,12 +5835,6 @@ static bool hns_roce_v2_chk_dca_buf_inactive(struct hns_roce_dev *hr_dev, if (state == HNS_ROCE_QP_ST_ERR || state == HNS_ROCE_QP_ST_RST) return true;
- /* If RQ is not empty, the buffer is always active until the QP stops - * working. - */ - if (hr_qp->rq.wqe_cnt > 0) - return false; - if (hr_qp->sq.wqe_cnt > 0) { tmp = (u32)hr_reg_read(&context, QPC_RETRY_MSG_MSN); sq_idx = tmp & (hr_qp->sq.wqe_cnt - 1); diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index fe4ad13654cf..cdfcefb1f660 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -37,6 +37,7 @@ #include <rdma/ib_smi.h> #include <rdma/ib_user_verbs.h> #include <rdma/ib_cache.h> +#include <rdma/uverbs_ioctl.h>
#include "hnae3.h" #include "hns_roce_common.h" @@ -341,6 +342,7 @@ hns_roce_user_mmap_entry_insert(struct ib_ucontext *ucontext, u64 address, ucontext, &entry->rdma_entry, length, 0); break; case HNS_ROCE_MMAP_TYPE_DWQE: + case HNS_ROCE_MMAP_TYPE_DCA: ret = rdma_user_mmap_entry_insert_range( ucontext, &entry->rdma_entry, length, 1, U32_MAX); @@ -363,6 +365,9 @@ static void hns_roce_dealloc_uar_entry(struct hns_roce_ucontext *context) if (context->db_mmap_entry) rdma_user_mmap_entry_remove( &context->db_mmap_entry->rdma_entry); + if (context->dca_ctx.dca_mmap_entry) + rdma_user_mmap_entry_remove( + &context->dca_ctx.dca_mmap_entry->rdma_entry); }
static int hns_roce_alloc_uar_entry(struct ib_ucontext *uctx) @@ -382,12 +387,36 @@ static int hns_roce_alloc_uar_entry(struct ib_ucontext *uctx) static void ucontext_set_resp(struct ib_ucontext *uctx, struct hns_roce_ib_alloc_ucontext_resp *resp) { + struct hns_roce_ucontext *context = to_hr_ucontext(uctx); struct hns_roce_dev *hr_dev = to_hr_dev(uctx->device); + struct rdma_user_mmap_entry *rdma_entry;
resp->qp_tab_size = hr_dev->caps.num_qps; resp->srq_tab_size = hr_dev->caps.num_srqs; resp->cqe_size = hr_dev->caps.cqe_sz; resp->mac_type = hr_dev->mac_type; + if (context->dca_ctx.dca_mmap_entry) { + resp->dca_qps = context->dca_ctx.max_qps; + resp->dca_mmap_size = PAGE_SIZE * context->dca_ctx.status_npage; + rdma_entry = &context->dca_ctx.dca_mmap_entry->rdma_entry; + resp->dca_mmap_key = rdma_user_mmap_get_offset(rdma_entry); + } +} + +static u32 get_udca_max_qps(struct hns_roce_dev *hr_dev, + struct hns_roce_ib_alloc_ucontext *ucmd) +{ + u32 qp_num; + + if (ucmd->comp & HNS_ROCE_ALLOC_UCTX_COMP_DCA_MAX_QPS) { + qp_num = ucmd->dca_max_qps; + if (!qp_num) + qp_num = hr_dev->caps.num_qps; + } else { + qp_num = 0; + } + + return qp_num; }
static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, @@ -447,7 +476,8 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, mutex_init(&context->page_mutex); }
- hns_roce_register_udca(hr_dev, context); + hns_roce_register_udca(hr_dev, get_udca_max_qps(hr_dev, &ucmd), + context);
ucontext_set_resp(uctx, &resp); ret = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp))); @@ -492,6 +522,36 @@ static void hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext) ida_free(&hr_dev->uar_ida.ida, (int)context->uar.logic_idx); }
+static int mmap_dca(struct ib_ucontext *context, struct vm_area_struct *vma) +{ + struct hns_roce_ucontext *uctx = to_hr_ucontext(context); + struct hns_roce_dca_ctx *ctx = &uctx->dca_ctx; + struct page **pages; + unsigned long num; + int ret; + + if ((vma->vm_end - vma->vm_start != (ctx->status_npage * PAGE_SIZE) || + !(vma->vm_flags & VM_SHARED))) + return -EINVAL; + + if (!(vma->vm_flags & VM_WRITE) || (vma->vm_flags & VM_EXEC)) + return -EPERM; + + if (!ctx->buf_status) + return -EOPNOTSUPP; + + pages = kcalloc(ctx->status_npage, sizeof(struct page *), GFP_KERNEL); + if (!pages) + return -ENOMEM; + + for (num = 0; num < ctx->status_npage; num++) + pages[num] = virt_to_page(ctx->buf_status + num * PAGE_SIZE); + + ret = vm_insert_pages(vma, vma->vm_start, pages, &num); + kfree(pages); + return ret; +} + static int hns_roce_mmap(struct ib_ucontext *uctx, struct vm_area_struct *vma) { struct rdma_user_mmap_entry *rdma_entry; @@ -512,6 +572,9 @@ static int hns_roce_mmap(struct ib_ucontext *uctx, struct vm_area_struct *vma) case HNS_ROCE_MMAP_TYPE_DWQE: prot = pgprot_device(vma->vm_page_prot); break; + case HNS_ROCE_MMAP_TYPE_DCA: + ret = mmap_dca(uctx, vma); + goto out; default: return -EINVAL; } @@ -519,6 +582,7 @@ static int hns_roce_mmap(struct ib_ucontext *uctx, struct vm_area_struct *vma) ret = rdma_user_mmap_io(uctx, vma, pfn, rdma_entry->npages * PAGE_SIZE, prot, rdma_entry);
+out: rdma_user_mmap_entry_put(rdma_entry);
return ret; diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 302f2ea75749..de70c8637333 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -788,7 +788,7 @@ static int alloc_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, return ret; }
- hr_qp->en_flags |= HNS_ROCE_QP_CAP_DCA; + hr_qp->en_flags |= HNS_ROCE_QP_CAP_DYNAMIC_CTX_ATTACH; } else { /* * Because DCA and DWQE share the same fileds in RCWQE buffer, @@ -815,7 +815,7 @@ static void free_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, { hns_roce_mtr_destroy(hr_dev, &hr_qp->mtr);
- if (hr_qp->en_flags & HNS_ROCE_QP_CAP_DCA) + if (hr_qp->en_flags & HNS_ROCE_QP_CAP_DYNAMIC_CTX_ATTACH) hns_roce_disable_dca(hr_dev, hr_qp, udata); }
@@ -1408,22 +1408,17 @@ static int hns_roce_check_qp_attr(struct ib_qp *ibqp, struct ib_qp_attr *attr, return 0; }
-int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, - int attr_mask, struct ib_udata *udata) +static int check_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, enum ib_qp_state cur_state, + enum ib_qp_state new_state) { - struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); - struct hns_roce_ib_modify_qp_resp resp = {}; - enum ib_qp_state cur_state, new_state; - int ret = -EINVAL; - - mutex_lock(&hr_qp->mutex); - - if (attr_mask & IB_QP_CUR_STATE && attr->cur_qp_state != hr_qp->state) - goto out; + int ret;
- cur_state = hr_qp->state; - new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; + if (attr_mask & IB_QP_CUR_STATE && attr->cur_qp_state != hr_qp->state) { + ibdev_err(ibqp->device, "failed to check modify curr state\n"); + return -EINVAL; + }
if (ibqp->uobject && (attr_mask & IB_QP_STATE) && new_state == IB_QPS_ERR) { @@ -1433,19 +1428,42 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (hr_qp->en_flags & HNS_ROCE_QP_CAP_RQ_RECORD_DB) hr_qp->rq.head = *(int *)(hr_qp->rdb.virt_addr); } else { - ibdev_warn(&hr_dev->ib_dev, + ibdev_warn(ibqp->device, "flush cqe is not supported in userspace!\n"); - goto out; + return -EINVAL; } }
if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) { - ibdev_err(&hr_dev->ib_dev, "ib_modify_qp_is_ok failed\n"); - goto out; + ibdev_err(ibqp->device, "failed to check modify qp state\n"); + return -EINVAL; }
ret = hns_roce_check_qp_attr(ibqp, attr, attr_mask); + if (ret) { + ibdev_err(ibqp->device, "failed to check modify qp attr\n"); + return ret; + } + + return 0; +} + +int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); + struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); + struct hns_roce_ib_modify_qp_resp resp = {}; + enum ib_qp_state cur_state, new_state; + int ret; + + mutex_lock(&hr_qp->mutex); + + cur_state = hr_qp->state; + new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; + + ret = check_modify_qp(ibqp, attr, attr_mask, cur_state, new_state); if (ret) goto out;
@@ -1460,6 +1478,7 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (udata && udata->outlen) { resp.tc_mode = hr_qp->tc_mode; resp.priority = hr_qp->sl; + resp.dcan = hr_qp->dca_cfg.dcan; ret = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp))); if (ret) @@ -1530,7 +1549,7 @@ static inline void *dca_buf_offset(struct hns_roce_dca_cfg *dca_cfg, u32 offset)
static inline void *get_wqe(struct hns_roce_qp *hr_qp, u32 offset) { - if (hr_qp->en_flags & HNS_ROCE_QP_CAP_DCA) + if (unlikely(hr_qp->dca_cfg.buf_list)) return dca_buf_offset(&hr_qp->dca_cfg, offset); else return hns_roce_buf_offset(hr_qp->mtr.kmem, offset); diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h index 1faa11b8060b..69508419d3a0 100644 --- a/include/uapi/rdma/hns-abi.h +++ b/include/uapi/rdma/hns-abi.h @@ -77,8 +77,9 @@ enum hns_roce_qp_cap_flags { HNS_ROCE_QP_CAP_RQ_RECORD_DB = 1 << 0, HNS_ROCE_QP_CAP_SQ_RECORD_DB = 1 << 1, HNS_ROCE_QP_CAP_OWNER_DB = 1 << 2, - HNS_ROCE_QP_CAP_DCA = 1 << 4, + HNS_ROCE_QP_CAP_DYNAMIC_CTX_ATTACH = 1 << 4, HNS_ROCE_QP_CAP_DIRECT_WQE = 1 << 5, + HNS_ROCE_QP_CAP_DYNAMIC_CTX_DETACH = 1 << 6, };
struct hns_roce_ib_create_qp_resp { @@ -96,6 +97,8 @@ struct hns_roce_ib_modify_qp_resp { __u8 tc_mode; __u8 priority; __u8 reserved[6]; + __u32 dcan; + __u32 rsv2; };
enum { @@ -121,10 +124,19 @@ struct hns_roce_ib_alloc_ucontext_resp { __u32 max_inline_data; __u8 mac_type; __u8 rsv1[7]; + __u32 dca_qps; + __u32 dca_mmap_size; + __aligned_u64 dca_mmap_key; +}; + +enum hns_roce_uctx_comp_mask { + HNS_ROCE_ALLOC_UCTX_COMP_DCA_MAX_QPS = 1 << 0, };
struct hns_roce_ib_alloc_ucontext { __u32 config; + __u32 comp; /* use hns_roce_uctx_comp_mask */ + __u32 dca_max_qps; __u32 reserved; };