driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I9FIHP
----------------------------------------------------------------------
A hardware reset is required to stop traffic injection, currently this is done by obtaining a notification from the kernel space driver and stop ringing the doorbell in user space. This notification is implemented through a shared memory. If concurrency scenarios are involved, the shared memory mechanism needs barriers to ensure reliability, but barriers will severely affect performance.
This patch uses a new scheme to solve this problem. Before resetting, the kernel-mode driver will zap all the shared memory between user-mode driver and kernel-mode driver, and point these VMAs to a zero page, so that user-mode can no longer access any hardware address during reset, thus achieving flow stop.
Fixes: 9f651379c548 ("RDMA/hns: Kernel notify usr space to stop ring db") Signed-off-by: Chengchang Tang tangchengchang@huawei.com Signed-off-by: Juan Zhou zhoujuan51@h-partners.com --- drivers/infiniband/core/ib_core_uverbs.c | 85 ++++++++++++++++++++++ drivers/infiniband/core/rdma_core.h | 1 - drivers/infiniband/core/uverbs_main.c | 64 ---------------- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 14 ++-- include/rdma/ib_verbs.h | 2 + 5 files changed, 94 insertions(+), 72 deletions(-)
diff --git a/drivers/infiniband/core/ib_core_uverbs.c b/drivers/infiniband/core/ib_core_uverbs.c index b51bd7087..4e27389a7 100644 --- a/drivers/infiniband/core/ib_core_uverbs.c +++ b/drivers/infiniband/core/ib_core_uverbs.c @@ -5,6 +5,7 @@ * Copyright 2019 Marvell. All rights reserved. */ #include <linux/xarray.h> +#include <linux/sched/mm.h> #include "uverbs.h" #include "core_priv.h"
@@ -365,3 +366,87 @@ int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext, U32_MAX); } EXPORT_SYMBOL(rdma_user_mmap_entry_insert); + +void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) +{ + struct rdma_umap_priv *priv, *next_priv; + + lockdep_assert_held(&ufile->hw_destroy_rwsem); + + while (1) { + struct mm_struct *mm = NULL; + + /* Get an arbitrary mm pointer that hasn't been cleaned yet */ + mutex_lock(&ufile->umap_lock); + while (!list_empty(&ufile->umaps)) { + int ret; + + priv = list_first_entry(&ufile->umaps, + struct rdma_umap_priv, list); + mm = priv->vma->vm_mm; + ret = mmget_not_zero(mm); + if (!ret) { + list_del_init(&priv->list); + if (priv->entry) { + rdma_user_mmap_entry_put(priv->entry); + priv->entry = NULL; + } + mm = NULL; + continue; + } + break; + } + mutex_unlock(&ufile->umap_lock); + if (!mm) + return; + + /* + * The umap_lock is nested under mmap_lock since it used within + * the vma_ops callbacks, so we have to clean the list one mm + * at a time to get the lock ordering right. Typically there + * will only be one mm, so no big deal. + */ + mmap_read_lock(mm); + mutex_lock(&ufile->umap_lock); + list_for_each_entry_safe(priv, next_priv, &ufile->umaps, list) { + struct vm_area_struct *vma = priv->vma; + + if (vma->vm_mm != mm) + continue; + list_del_init(&priv->list); + + zap_vma_ptes(vma, vma->vm_start, + vma->vm_end - vma->vm_start); + + if (priv->entry) { + rdma_user_mmap_entry_put(priv->entry); + priv->entry = NULL; + } + } + mutex_unlock(&ufile->umap_lock); + mmap_read_unlock(mm); + mmput(mm); + } +} +EXPORT_SYMBOL(uverbs_user_mmap_disassociate); + +/** + * rdma_user_mmap_disassociate() - disassociate the mmap from the ucontext. + * + * @ucontext: associated user context. + * + * This function should be called by drivers that need to disable mmap for + * some ucontexts. + */ +void rdma_user_mmap_disassociate(struct ib_ucontext *ucontext) +{ + struct ib_uverbs_file *ufile = ucontext->ufile; + + /* Racing with uverbs_destroy_ufile_hw */ + if (!down_read_trylock(&ufile->hw_destroy_rwsem)) + return; + + uverbs_user_mmap_disassociate(ufile); + up_read(&ufile->hw_destroy_rwsem); +} +EXPORT_SYMBOL(rdma_user_mmap_disassociate); diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h index 33706dad6..ad01fbd52 100644 --- a/drivers/infiniband/core/rdma_core.h +++ b/drivers/infiniband/core/rdma_core.h @@ -149,7 +149,6 @@ void uverbs_disassociate_api(struct uverbs_api *uapi); void uverbs_destroy_api(struct uverbs_api *uapi); void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm, unsigned int num_attrs); -void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile);
extern const struct uapi_definition uverbs_def_obj_async_fd[]; extern const struct uapi_definition uverbs_def_obj_counters[]; diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 495d5a5d0..f1db48c2c 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -45,7 +45,6 @@ #include <linux/cdev.h> #include <linux/anon_inodes.h> #include <linux/slab.h> -#include <linux/sched/mm.h>
#include <linux/uaccess.h>
@@ -817,69 +816,6 @@ static const struct vm_operations_struct rdma_umap_ops = { .fault = rdma_umap_fault, };
-void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) -{ - struct rdma_umap_priv *priv, *next_priv; - - lockdep_assert_held(&ufile->hw_destroy_rwsem); - - while (1) { - struct mm_struct *mm = NULL; - - /* Get an arbitrary mm pointer that hasn't been cleaned yet */ - mutex_lock(&ufile->umap_lock); - while (!list_empty(&ufile->umaps)) { - int ret; - - priv = list_first_entry(&ufile->umaps, - struct rdma_umap_priv, list); - mm = priv->vma->vm_mm; - ret = mmget_not_zero(mm); - if (!ret) { - list_del_init(&priv->list); - if (priv->entry) { - rdma_user_mmap_entry_put(priv->entry); - priv->entry = NULL; - } - mm = NULL; - continue; - } - break; - } - mutex_unlock(&ufile->umap_lock); - if (!mm) - return; - - /* - * The umap_lock is nested under mmap_lock since it used within - * the vma_ops callbacks, so we have to clean the list one mm - * at a time to get the lock ordering right. Typically there - * will only be one mm, so no big deal. - */ - mmap_read_lock(mm); - mutex_lock(&ufile->umap_lock); - list_for_each_entry_safe (priv, next_priv, &ufile->umaps, - list) { - struct vm_area_struct *vma = priv->vma; - - if (vma->vm_mm != mm) - continue; - list_del_init(&priv->list); - - zap_vma_ptes(vma, vma->vm_start, - vma->vm_end - vma->vm_start); - - if (priv->entry) { - rdma_user_mmap_entry_put(priv->entry); - priv->entry = NULL; - } - } - mutex_unlock(&ufile->umap_lock); - mmap_read_unlock(mm); - mmput(mm); - } -} - /* * ib_uverbs_open() does not need the BKL: * diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 9815853f5..9774ddd4e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -41,6 +41,7 @@ #include <rdma/ib_cache.h> #include <rdma/ib_umem.h> #include <rdma/uverbs_ioctl.h> +#include <rdma/ib_verbs.h>
#include "hnae3.h" #include "hclge_main.h" @@ -7438,14 +7439,13 @@ int hns_roce_bond_uninit_client(struct hns_roce_bond_group *bond_grp,
static void hns_roce_v2_reset_notify_user(struct hns_roce_dev *hr_dev) { - struct hns_roce_v2_reset_state *state; - - state = (struct hns_roce_v2_reset_state *)hr_dev->reset_kaddr; + struct hns_roce_ucontext *uctx, *tmp;
- state->reset_state = HNS_ROCE_IS_RESETTING; - state->hw_ready = 0; - /* Ensure reset state was flushed in memory */ - wmb(); + mutex_lock(&hr_dev->uctx_list_mutex); + list_for_each_entry_safe(uctx, tmp, &hr_dev->uctx_list, list) { + rdma_user_mmap_disassociate(&uctx->ibucontext); + } + mutex_unlock(&hr_dev->uctx_list_mutex); }
static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle) diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 1930dfbf1..7e602a7e6 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2949,6 +2949,7 @@ int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext, struct rdma_user_mmap_entry *entry, size_t length, u32 min_pgoff, u32 max_pgoff); +void rdma_user_mmap_disassociate(struct ib_ucontext *ucontext);
static inline int rdma_user_mmap_entry_insert_exact(struct ib_ucontext *ucontext, @@ -4728,6 +4729,7 @@ void rdma_roce_rescan_device(struct ib_device *ibdev); struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile);
int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs); +void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile);
struct net_device *rdma_alloc_netdev(struct ib_device *device, u32 port_num, enum rdma_netdev_t type, const char *name,