From: Xinghai Cen cenxinghai@h-partners.com
Some bugfix patches for hns roce.
Chengchang Tang (4): RDMA/hns: Fix 1bit-ECC recovery address in non-4K OS RDMA/hns: Fix possible RAS when DCA is not attached RDMA/hns: Fix a meaningless loop in active_dca_pages_proc() RDMA/hns: Fix list_*_careful() not being used in pairs
Junxian Huang (1): RDMA/hns: Fix VF triggering PF reset in abnormal interrupt handler
drivers/infiniband/hw/hns/hns_roce_dca.c | 60 +++++++++++++++++++-- drivers/infiniband/hw/hns/hns_roce_dca.h | 2 + drivers/infiniband/hw/hns/hns_roce_device.h | 3 ++ drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 9 ++-- drivers/infiniband/hw/hns/hns_roce_main.c | 12 +++++ drivers/infiniband/hw/hns/hns_roce_qp.c | 23 ++++++++ 6 files changed, 102 insertions(+), 7 deletions(-)
-- 2.33.0
From: Junxian Huang huangjunxian6@hisilicon.com
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IAMO4D
----------------------------------------------------------------------
In abnormal interrupt handler, a PF reset will be triggered even if the device is a VF. It should be a VF reset.
Fixes: 2b9acb9a97fe ("RDMA/hns: Add the process of AEQ overflow for hip08") Signed-off-by: Junxian Huang huangjunxian6@hisilicon.com Signed-off-by: Xinghai Cen cenxinghai@h-partners.com --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 33af1fc89892..ba483b49e2e0 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -6927,6 +6927,7 @@ static irqreturn_t abnormal_interrupt_basic(struct hns_roce_dev *hr_dev, struct pci_dev *pdev = hr_dev->pci_dev; struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev); const struct hnae3_ae_ops *ops = ae_dev->ops; + enum hnae3_reset_type reset_type; irqreturn_t int_work = IRQ_NONE; u32 int_en;
@@ -6938,10 +6939,12 @@ static irqreturn_t abnormal_interrupt_basic(struct hns_roce_dev *hr_dev, roce_write(hr_dev, ROCEE_VF_ABN_INT_ST_REG, 1 << HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S);
+ reset_type = hr_dev->is_vf ? + HNAE3_VF_FUNC_RESET : HNAE3_FUNC_RESET; + /* Set reset level for reset_event() */ if (ops->set_default_reset_request) - ops->set_default_reset_request(ae_dev, - HNAE3_FUNC_RESET); + ops->set_default_reset_request(ae_dev, reset_type); if (ops->reset_event) ops->reset_event(pdev, NULL);
-- 2.33.0
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IAMO4D
----------------------------------------------------------------------
The 1bit-ECC recovery address read from HW only contain bits 64:12, so it should be fixed left-shifted 12 bits when used.
Fixes: 1a8c6fa4adfb ("RDMA/hns: Recover 1bit-ECC error of RAM on chip") Signed-off-by: Chengchang Tang tangchengchang@huawei.com Signed-off-by: Xinghai Cen cenxinghai@h-partners.com --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index ba483b49e2e0..0812f654323b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -7014,7 +7014,7 @@ static u64 fmea_get_ram_res_addr(u32 res_type, __le64 *data) res_type == ECC_RESOURCE_SCCC) return le64_to_cpu(*data);
- return le64_to_cpu(*data) << PAGE_SHIFT; + return le64_to_cpu(*data) << HNS_HW_PAGE_SHIFT; }
static int fmea_recover_others(struct hns_roce_dev *hr_dev, u32 res_type, -- 2.33.0
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IAMO4D
----------------------------------------------------------------------
RAS may occur if the DCA buffer is not attached and the DB is knocked out.
This patch adds a safe page for DCA, which will be attached to QP if no DCA buffer is attached to avoid the HW accessing illegal addresses.
Fixes: d8cca476a8d2 ("RDMA/hns: Add method for attaching WQE buffer") Signed-off-by: Chengchang Tang tangchengchang@huawei.com Signed-off-by: Xinghai Cen cenxinghai@h-partners.com --- drivers/infiniband/hw/hns/hns_roce_dca.c | 56 ++++++++++++++++++++- drivers/infiniband/hw/hns/hns_roce_dca.h | 2 + drivers/infiniband/hw/hns/hns_roce_device.h | 3 ++ drivers/infiniband/hw/hns/hns_roce_main.c | 12 +++++ drivers/infiniband/hw/hns/hns_roce_qp.c | 23 +++++++++ 5 files changed, 94 insertions(+), 2 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_dca.c b/drivers/infiniband/hw/hns/hns_roce_dca.c index ddc21f10b923..882f27766848 100644 --- a/drivers/infiniband/hw/hns/hns_roce_dca.c +++ b/drivers/infiniband/hw/hns/hns_roce_dca.c @@ -306,6 +306,33 @@ hr_qp_to_dca_ctx(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) return to_hr_dca_ctx(hr_dev, uctx); }
+int hns_roce_map_dca_safe_page(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp) +{ + unsigned int page_count = hr_qp->dca_cfg.npages; + struct ib_device *ibdev = &hr_dev->ib_dev; + dma_addr_t *pages; + unsigned int i; + int ret; + + pages = kvcalloc(page_count, sizeof(dma_addr_t), GFP_KERNEL); + if (IS_ERR_OR_NULL(pages)) { + ibdev_err(ibdev, "failed to alloc DCA safe page array.\n"); + return IS_ERR(pages) ? PTR_ERR(pages) : -ENOMEM; + } + + for (i = 0; i < page_count; i++) + pages[i] = hr_dev->dca_safe_page; + + ret = hns_roce_mtr_map(hr_dev, &hr_qp->mtr, pages, page_count); + if (ret) + ibdev_err(ibdev, "failed to map safe page for DCA, ret = %d.\n", + ret); + + kvfree(pages); + return ret; +} + static int config_dca_qpc(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, dma_addr_t *pages, int page_count) @@ -332,6 +359,29 @@ static int config_dca_qpc(struct hns_roce_dev *hr_dev, return 0; }
+static int config_dca_qpc_to_safe_page(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp) +{ + unsigned int page_count = hr_qp->dca_cfg.npages; + dma_addr_t *pages; + unsigned int i; + int ret; + + might_sleep(); + + pages = kvcalloc(page_count, sizeof(dma_addr_t), GFP_KERNEL); + if (IS_ERR_OR_NULL(pages)) + return -ENOMEM; + + for (i = 0; i < page_count; i++) + pages[i] = hr_dev->dca_safe_page; + + ret = config_dca_qpc(hr_dev, hr_qp, pages, page_count); + + kvfree(pages); + return ret; +} + static int setup_dca_buf_to_hw(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct hns_roce_dca_ctx *ctx, u32 buf_id, @@ -977,8 +1027,10 @@ static void process_aging_dca_mem(struct hns_roce_dev *hr_dev, spin_unlock(&ctx->aging_lock);
if (start_free_dca_buf(ctx, cfg->dcan)) { - if (hr_dev->hw->chk_dca_buf_inactive(hr_dev, hr_qp)) - free_buf_from_dca_mem(ctx, cfg); + if (hr_dev->hw->chk_dca_buf_inactive(hr_dev, hr_qp)) { + if (!config_dca_qpc_to_safe_page(hr_dev, hr_qp)) + free_buf_from_dca_mem(ctx, cfg); + }
stop_free_dca_buf(ctx, cfg->dcan); } diff --git a/drivers/infiniband/hw/hns/hns_roce_dca.h b/drivers/infiniband/hw/hns/hns_roce_dca.h index 68f46f8688ae..16e6e0a03431 100644 --- a/drivers/infiniband/hw/hns/hns_roce_dca.h +++ b/drivers/infiniband/hw/hns/hns_roce_dca.h @@ -74,4 +74,6 @@ void hns_roce_modify_dca(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
void hns_roce_enum_dca_pool(struct hns_roce_dca_ctx *dca_ctx, void *param, hns_dca_enum_callback cb); +int hns_roce_map_dca_safe_page(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp); #endif diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 92a9dfc04a80..67ceeb42d366 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -1234,6 +1234,9 @@ struct hns_roce_dev { struct mutex mtr_unfree_list_mutex; /* protect mtr_unfree_list */ struct list_head umem_unfree_list; /* list of unfree umem on this dev */ struct mutex umem_unfree_list_mutex; /* protect umem_unfree_list */ + + void *dca_safe_buf; + dma_addr_t dca_safe_page; };
static inline struct hns_roce_dev *to_hr_dev(struct ib_device *ib_dev) diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index f203ffdfd4b3..406663897a2a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -1489,6 +1489,17 @@ static void hns_roce_dealloc_dfx_cnt(struct hns_roce_dev *hr_dev) kvfree(hr_dev->dfx_cnt); }
+static void hns_roce_free_dca_safe_buf(struct hns_roce_dev *hr_dev) +{ + if (!hr_dev->dca_safe_buf) + return; + + dma_free_coherent(hr_dev->dev, PAGE_SIZE, hr_dev->dca_safe_buf, + hr_dev->dca_safe_page); + hr_dev->dca_safe_page = 0; + hr_dev->dca_safe_buf = NULL; +} + int hns_roce_init(struct hns_roce_dev *hr_dev) { struct device *dev = hr_dev->dev; @@ -1599,6 +1610,7 @@ void hns_roce_exit(struct hns_roce_dev *hr_dev, bool bond_cleanup) hns_roce_unregister_device(hr_dev, bond_cleanup); hns_roce_unregister_debugfs(hr_dev); hns_roce_unregister_poe_ch(hr_dev); + hns_roce_free_dca_safe_buf(hr_dev);
if (hr_dev->hw->hw_exit) hr_dev->hw->hw_exit(hr_dev); diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 13705d3de9a7..ec26398c9d9c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -825,6 +825,8 @@ static int alloc_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, hns_roce_disable_dca(hr_dev, hr_qp, udata); kvfree(hr_qp->mtr_node); hr_qp->mtr_node = NULL; + } else if (dca_en) { + ret = hns_roce_map_dca_safe_page(hr_dev, hr_qp); }
return ret; @@ -845,6 +847,21 @@ static void free_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, hns_roce_disable_dca(hr_dev, hr_qp, udata); }
+static int alloc_dca_safe_page(struct hns_roce_dev *hr_dev) +{ + struct ib_device *ibdev = &hr_dev->ib_dev; + + hr_dev->dca_safe_buf = dma_alloc_coherent(hr_dev->dev, PAGE_SIZE, + &hr_dev->dca_safe_page, + GFP_KERNEL); + if (!hr_dev->dca_safe_buf) { + ibdev_err(ibdev, "failed to alloc dca safe page.\n"); + return -ENOMEM; + } + + return 0; +} + static int alloc_qp_wqe(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct ib_qp_init_attr *init_attr, struct ib_udata *udata, @@ -862,6 +879,12 @@ static int alloc_qp_wqe(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, page_shift = ucmd->pageshift;
dca_en = check_dca_is_enable(hr_dev, init_attr, !!udata, ucmd->buf_addr); + if (dca_en && !hr_dev->dca_safe_buf) { + ret = alloc_dca_safe_page(hr_dev); + if (ret) + return ret; + } + ret = set_wqe_buf_attr(hr_dev, hr_qp, dca_en, page_shift, &buf_attr); if (ret) { ibdev_err(ibdev, "failed to split WQE buf, ret = %d.\n", ret); -- 2.33.0
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IAMO4D
----------------------------------------------------------------------
The iterated element does not change, making the loop in active_dca_pages_proc() meaningless.
Fixes: 12aa71f83089 ("RDMA/hns: Add DCA support for kernel space") Signed-off-by: Chengchang Tang tangchengchang@huawei.com Signed-off-by: Xinghai Cen cenxinghai@h-partners.com --- drivers/infiniband/hw/hns/hns_roce_dca.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_dca.c b/drivers/infiniband/hw/hns/hns_roce_dca.c index 882f27766848..d017a986589f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_dca.c +++ b/drivers/infiniband/hw/hns/hns_roce_dca.c @@ -614,7 +614,7 @@ static int active_dca_pages_proc(struct dca_mem *mem, int index, void *param) }
for (; changed && i < mem->page_count; i++) - if (dca_page_is_free(state)) + if (dca_page_is_free(&mem->states[i])) free_pages++;
/* Clean mem changed to dirty */ -- 2.33.0
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IAMO4D
----------------------------------------------------------------------
list_del_init_careful() is designed to be used together with list_empty_careful().
Fixes: d8cca476a8d2 ("RDMA/hns: Add method for attaching WQE buffer") Signed-off-by: Chengchang Tang tangchengchang@huawei.com Signed-off-by: Xinghai Cen cenxinghai@h-partners.com --- drivers/infiniband/hw/hns/hns_roce_dca.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_dca.c b/drivers/infiniband/hw/hns/hns_roce_dca.c index d017a986589f..91b6b65c83df 100644 --- a/drivers/infiniband/hw/hns/hns_roce_dca.c +++ b/drivers/infiniband/hw/hns/hns_roce_dca.c @@ -1019,7 +1019,7 @@ static void process_aging_dca_mem(struct hns_roce_dev *hr_dev, list_for_each_entry_safe(cfg, tmp_cfg, &ctx->aging_new_list, aging_node) list_move(&cfg->aging_node, &ctx->aging_proc_list);
- while (!ctx->exit_aging && !list_empty(&ctx->aging_proc_list)) { + while (!ctx->exit_aging && !list_empty_careful(&ctx->aging_proc_list)) { cfg = list_first_entry(&ctx->aging_proc_list, struct hns_roce_dca_cfg, aging_node); list_del_init_careful(&cfg->aging_node); -- 2.33.0
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/11187 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/D...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/11187 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/D...