
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IBV8UW ---------------------------------------------------------------------- Kernel dbs are not processed by delayed-destruction mechanism. This may lead to HW UAF described in the fixes commit. Expand the hns_roce_umem_node to hns_roce_db_pg_node with kernel db information. This struct is now used by both userspace and kernel db pages. Fixes: 04c5d76e4f15 ("RDMA/hns: Fix simultaneous reset and resource deregistration") Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com> Signed-off-by: Xinghai Cen <cenxinghai@h-partners.com> --- drivers/infiniband/hw/hns/hns_roce_cq.c | 3 +- drivers/infiniband/hw/hns/hns_roce_db.c | 82 +++++++++++++-------- drivers/infiniband/hw/hns/hns_roce_device.h | 35 +++++---- drivers/infiniband/hw/hns/hns_roce_main.c | 10 +-- drivers/infiniband/hw/hns/hns_roce_mr.c | 30 ++++---- drivers/infiniband/hw/hns/hns_roce_qp.c | 3 +- drivers/infiniband/hw/hns/hns_roce_srq.c | 2 +- 7 files changed, 97 insertions(+), 68 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index b0fdf073519c..a18d379d401c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -338,7 +338,8 @@ static void free_cq_db(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, hns_roce_db_unmap_user(uctx, &hr_cq->db, hr_cq->delayed_destroy_flag); } else { - hns_roce_free_db(hr_dev, &hr_cq->db); + hns_roce_free_db(hr_dev, &hr_cq->db, + hr_cq->delayed_destroy_flag); } } diff --git a/drivers/infiniband/hw/hns/hns_roce_db.c b/drivers/infiniband/hw/hns/hns_roce_db.c index 6e276a45b3ea..d9b217891b93 100644 --- a/drivers/infiniband/hw/hns/hns_roce_db.c +++ b/drivers/infiniband/hw/hns/hns_roce_db.c @@ -12,6 +12,7 @@ int hns_roce_db_map_user(struct hns_roce_ucontext *context, unsigned long virt, { unsigned long page_addr = virt & PAGE_MASK; struct hns_roce_user_db_page *page; + struct ib_umem *umem; unsigned int offset; int ret = 0; @@ -29,32 +30,33 @@ int hns_roce_db_map_user(struct hns_roce_ucontext *context, unsigned long virt, refcount_set(&page->refcount, 1); page->user_virt = page_addr; - page->umem = ib_umem_get(context->ibucontext.device, page_addr, - PAGE_SIZE, 0); - if (IS_ERR(page->umem)) { - ret = PTR_ERR(page->umem); + page->db_node = kvzalloc(sizeof(*page->db_node), GFP_KERNEL); + if (!page->db_node) { + ret = -ENOMEM; goto err_page; } - page->umem_node = kvmalloc(sizeof(*page->umem_node), GFP_KERNEL); - if (!page->umem_node) { - ret = -ENOMEM; - goto err_umem; + + umem = ib_umem_get(context->ibucontext.device, page_addr, PAGE_SIZE, 0); + if (IS_ERR(umem)) { + ret = PTR_ERR(umem); + goto err_dbnode; } + page->db_node->umem = umem; list_add(&page->list, &context->page_list); found: offset = virt - page_addr; - db->dma = sg_dma_address(page->umem->sgt_append.sgt.sgl) + offset; - db->virt_addr = sg_virt(page->umem->sgt_append.sgt.sgl) + offset; + db->dma = sg_dma_address(page->db_node->umem->sgt_append.sgt.sgl) + offset; + db->virt_addr = sg_virt(page->db_node->umem->sgt_append.sgt.sgl) + offset; db->u.user_page = page; refcount_inc(&page->refcount); mutex_unlock(&context->page_mutex); return 0; -err_umem: - ib_umem_release(page->umem); +err_dbnode: + kvfree(page->db_node); err_page: kfree(page); err_out: @@ -68,20 +70,20 @@ void hns_roce_db_unmap_user(struct hns_roce_ucontext *context, bool delayed_unmap_flag) { struct hns_roce_dev *hr_dev = to_hr_dev(context->ibucontext.device); - struct hns_roce_umem_node *umem_node = db->u.user_page->umem_node; + struct hns_roce_db_pg_node *db_node = db->u.user_page->db_node; mutex_lock(&context->page_mutex); - umem_node->delayed_unmap_flag |= delayed_unmap_flag; + db_node->delayed_unmap_flag |= delayed_unmap_flag; refcount_dec(&db->u.user_page->refcount); if (refcount_dec_if_one(&db->u.user_page->refcount)) { list_del(&db->u.user_page->list); - if (umem_node->delayed_unmap_flag) { - hns_roce_add_unfree_umem(db->u.user_page, hr_dev); + if (db_node->delayed_unmap_flag) { + hns_roce_add_unfree_db(db_node, hr_dev); } else { - ib_umem_release(db->u.user_page->umem); - kvfree(umem_node); + ib_umem_release(db_node->umem); + kvfree(db_node); } kfree(db->u.user_page); } @@ -93,6 +95,8 @@ static struct hns_roce_db_pgdir *hns_roce_alloc_db_pgdir( struct device *dma_device) { struct hns_roce_db_pgdir *pgdir; + dma_addr_t db_dma; + u32 *page; pgdir = kzalloc(sizeof(*pgdir), GFP_KERNEL); if (!pgdir) @@ -102,14 +106,24 @@ static struct hns_roce_db_pgdir *hns_roce_alloc_db_pgdir( HNS_ROCE_DB_PER_PAGE / HNS_ROCE_DB_TYPE_COUNT); pgdir->bits[0] = pgdir->order0; pgdir->bits[1] = pgdir->order1; - pgdir->page = dma_alloc_coherent(dma_device, PAGE_SIZE, - &pgdir->db_dma, GFP_KERNEL); - if (!pgdir->page) { - kfree(pgdir); - return NULL; - } + pgdir->db_node = kvzalloc(sizeof(*pgdir->db_node), GFP_KERNEL); + if (!pgdir->db_node) + goto err_node; + + page = dma_alloc_coherent(dma_device, PAGE_SIZE, &db_dma, GFP_KERNEL); + if (!page) + goto err_dma; + + pgdir->db_node->kdb.page = page; + pgdir->db_node->kdb.db_dma = db_dma; return pgdir; + +err_dma: + kvfree(pgdir->db_node); +err_node: + kfree(pgdir); + return NULL; } static int hns_roce_alloc_db_from_pgdir(struct hns_roce_db_pgdir *pgdir, @@ -136,8 +150,8 @@ static int hns_roce_alloc_db_from_pgdir(struct hns_roce_db_pgdir *pgdir, db->u.pgdir = pgdir; db->index = i; - db->db_record = pgdir->page + db->index; - db->dma = pgdir->db_dma + db->index * HNS_ROCE_DB_UNIT_SIZE; + db->db_record = pgdir->db_node->kdb.page + db->index; + db->dma = pgdir->db_node->kdb.db_dma + db->index * HNS_ROCE_DB_UNIT_SIZE; db->order = order; return 0; @@ -172,13 +186,17 @@ int hns_roce_alloc_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db, return ret; } -void hns_roce_free_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db) +void hns_roce_free_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db, + bool delayed_unmap_flag) { + struct hns_roce_db_pg_node *db_node = db->u.pgdir->db_node; unsigned long o; unsigned long i; mutex_lock(&hr_dev->pgdir_mutex); + db_node->delayed_unmap_flag |= delayed_unmap_flag; + o = db->order; i = db->index; @@ -192,9 +210,15 @@ void hns_roce_free_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db) if (bitmap_full(db->u.pgdir->order1, HNS_ROCE_DB_PER_PAGE / HNS_ROCE_DB_TYPE_COUNT)) { - dma_free_coherent(hr_dev->dev, PAGE_SIZE, db->u.pgdir->page, - db->u.pgdir->db_dma); list_del(&db->u.pgdir->list); + if (db_node->delayed_unmap_flag) { + hns_roce_add_unfree_db(db_node, hr_dev); + } else { + dma_free_coherent(hr_dev->dev, PAGE_SIZE, + db_node->kdb.page, + db_node->kdb.db_dma); + kvfree(db_node); + } kfree(db->u.pgdir); } diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 3a58fd1b1364..9b81f2974c82 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -465,27 +465,29 @@ struct hns_roce_buf { unsigned int page_shift; }; +struct hns_roce_db_pg_node { + struct list_head list; + struct ib_umem *umem; + struct { + u32 *page; + dma_addr_t db_dma; + } kdb; + bool delayed_unmap_flag; +}; + struct hns_roce_db_pgdir { struct list_head list; DECLARE_BITMAP(order0, HNS_ROCE_DB_PER_PAGE); DECLARE_BITMAP(order1, HNS_ROCE_DB_PER_PAGE / HNS_ROCE_DB_TYPE_COUNT); unsigned long *bits[HNS_ROCE_DB_TYPE_COUNT]; - u32 *page; - dma_addr_t db_dma; -}; - -struct hns_roce_umem_node { - struct ib_umem *umem; - struct list_head list; - bool delayed_unmap_flag; + struct hns_roce_db_pg_node *db_node; }; struct hns_roce_user_db_page { struct list_head list; - struct ib_umem *umem; unsigned long user_virt; refcount_t refcount; - struct hns_roce_umem_node *umem_node; + struct hns_roce_db_pg_node *db_node; }; struct hns_roce_db { @@ -1154,8 +1156,8 @@ struct hns_roce_dev { struct list_head mtr_unfree_list; /* list of unfree mtr on this dev */ struct mutex mtr_unfree_list_mutex; /* protect mtr_unfree_list */ - struct list_head umem_unfree_list; /* list of unfree umem on this dev */ - struct mutex umem_unfree_list_mutex; /* protect umem_unfree_list */ + struct list_head db_unfree_list; /* list of unfree db on this dev */ + struct mutex db_unfree_list_mutex; /* protect db_unfree_list */ void *dca_safe_buf; dma_addr_t dca_safe_page; @@ -1438,7 +1440,8 @@ void hns_roce_db_unmap_user(struct hns_roce_ucontext *context, bool delayed_unmap_flag); int hns_roce_alloc_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db, int order); -void hns_roce_free_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db); +void hns_roce_free_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db, + bool delayed_unmap_flag); void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn); void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type); @@ -1461,9 +1464,9 @@ struct hns_user_mmap_entry * hns_roce_user_mmap_entry_insert(struct ib_ucontext *ucontext, u64 address, size_t length, enum hns_roce_mmap_type mmap_type); -void hns_roce_add_unfree_umem(struct hns_roce_user_db_page *user_page, - struct hns_roce_dev *hr_dev); -void hns_roce_free_unfree_umem(struct hns_roce_dev *hr_dev); +void hns_roce_add_unfree_db(struct hns_roce_db_pg_node *db_node, + struct hns_roce_dev *hr_dev); +void hns_roce_free_unfree_db(struct hns_roce_dev *hr_dev); void hns_roce_add_unfree_mtr(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr); void hns_roce_free_unfree_mtr(struct hns_roce_dev *hr_dev); diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 8f110e64e601..a0b9b9f1ff2c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -1235,7 +1235,7 @@ static void hns_roce_teardown_hca(struct hns_roce_dev *hr_dev) hns_roce_cleanup_dca(hr_dev); hns_roce_cleanup_bitmap(hr_dev); - mutex_destroy(&hr_dev->umem_unfree_list_mutex); + mutex_destroy(&hr_dev->db_unfree_list_mutex); mutex_destroy(&hr_dev->mtr_unfree_list_mutex); mutex_destroy(&hr_dev->uctx_list_mutex); if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB || @@ -1264,8 +1264,8 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev) INIT_LIST_HEAD(&hr_dev->mtr_unfree_list); mutex_init(&hr_dev->mtr_unfree_list_mutex); - INIT_LIST_HEAD(&hr_dev->umem_unfree_list); - mutex_init(&hr_dev->umem_unfree_list_mutex); + INIT_LIST_HEAD(&hr_dev->db_unfree_list); + mutex_init(&hr_dev->db_unfree_list_mutex); if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB || hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) { @@ -1309,7 +1309,7 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev) if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB || hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) mutex_destroy(&hr_dev->pgdir_mutex); - mutex_destroy(&hr_dev->umem_unfree_list_mutex); + mutex_destroy(&hr_dev->db_unfree_list_mutex); mutex_destroy(&hr_dev->mtr_unfree_list_mutex); mutex_destroy(&hr_dev->uctx_list_mutex); @@ -1503,7 +1503,7 @@ void hns_roce_exit(struct hns_roce_dev *hr_dev, bool bond_cleanup) if (hr_dev->hw->hw_exit) hr_dev->hw->hw_exit(hr_dev); - hns_roce_free_unfree_umem(hr_dev); + hns_roce_free_unfree_db(hr_dev); hns_roce_free_unfree_mtr(hr_dev); hns_roce_teardown_hca(hr_dev); hns_roce_cleanup_hem(hr_dev); diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 737a7c28acab..b3a1e5b4cd8d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -1246,27 +1246,27 @@ void hns_roce_free_unfree_mtr(struct hns_roce_dev *hr_dev) mutex_unlock(&hr_dev->mtr_unfree_list_mutex); } -void hns_roce_add_unfree_umem(struct hns_roce_user_db_page *user_page, - struct hns_roce_dev *hr_dev) +void hns_roce_add_unfree_db(struct hns_roce_db_pg_node *db_node, + struct hns_roce_dev *hr_dev) { - struct hns_roce_umem_node *pos = user_page->umem_node; - - pos->umem = user_page->umem; - - mutex_lock(&hr_dev->umem_unfree_list_mutex); - list_add_tail(&pos->list, &hr_dev->umem_unfree_list); - mutex_unlock(&hr_dev->umem_unfree_list_mutex); + mutex_lock(&hr_dev->db_unfree_list_mutex); + list_add_tail(&db_node->list, &hr_dev->db_unfree_list); + mutex_unlock(&hr_dev->db_unfree_list_mutex); } -void hns_roce_free_unfree_umem(struct hns_roce_dev *hr_dev) +void hns_roce_free_unfree_db(struct hns_roce_dev *hr_dev) { - struct hns_roce_umem_node *pos, *next; + struct hns_roce_db_pg_node *pos, *next; - mutex_lock(&hr_dev->umem_unfree_list_mutex); - list_for_each_entry_safe(pos, next, &hr_dev->umem_unfree_list, list) { + mutex_lock(&hr_dev->db_unfree_list_mutex); + list_for_each_entry_safe(pos, next, &hr_dev->db_unfree_list, list) { list_del(&pos->list); - ib_umem_release(pos->umem); + if (pos->umem) + ib_umem_release(pos->umem); + else + dma_free_coherent(hr_dev->dev, PAGE_SIZE, + pos->kdb.page, pos->kdb.db_dma); kvfree(pos); } - mutex_unlock(&hr_dev->umem_unfree_list_mutex); + mutex_unlock(&hr_dev->db_unfree_list_mutex); } diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 828bf9e59e27..b888955ec0b3 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -1131,7 +1131,8 @@ static void free_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, qp_user_mmap_entry_remove(hr_qp); } else { if (hr_qp->en_flags & HNS_ROCE_QP_CAP_RQ_RECORD_DB) - hns_roce_free_db(hr_dev, &hr_qp->rdb); + hns_roce_free_db(hr_dev, &hr_qp->rdb, + hr_qp->delayed_destroy_flag); } } diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 965ed2d682ad..1262e9535c22 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -431,7 +431,7 @@ static void free_srq_db(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, hns_roce_db_unmap_user(uctx, &srq->rdb, srq->delayed_destroy_flag); } else { - hns_roce_free_db(hr_dev, &srq->rdb); + hns_roce_free_db(hr_dev, &srq->rdb, srq->delayed_destroy_flag); } } -- 2.33.0