From: Ding Tianhong <dingtianhong(a)huawei.com>
ascend inclusion
category: feature
bugzilla: NA
CVE: NA
-------------------------------------------------
The sp_mutex is used to protect all critical path for share pool,
it has serious affected the performance of the the memory alloc
and release interface when there is a lot of process in the same
memory group, it will serious break the scailability of the system,
so add a new read semaphore lock to instead of the big lock for allocation
and release critical path.
The scailability has been greatly improved by this modification.
Show the test result:
number of process: alloc 4M avg time:
Before the patch: 1 32us
3 96us
10 330us
after the patch: 1 32us
3 40us
10 60us
v2: fix some conflicts and clean some code.
Signed-off-by: Ding Tianhong <dingtianhong(a)huawei.com>
Reviewed-by: Tang Yizhou <tangyizhou(a)huawei.com>
Reviewed-by: Kefeng Wang <wangkefeng.wang(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
include/linux/share_pool.h | 5 ++
kernel/fork.c | 4 +-
mm/share_pool.c | 170 ++++++++++++++++++++-----------------
3 files changed, 100 insertions(+), 79 deletions(-)
diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h
index 70b841d0eb8e..f2d17cb85fa5 100644
--- a/include/linux/share_pool.h
+++ b/include/linux/share_pool.h
@@ -93,6 +93,8 @@ struct sp_group {
unsigned long dvpp_va_start;
unsigned long dvpp_size;
atomic_t use_count;
+ /* protect the group internal elements */
+ struct rw_semaphore rw_lock;
};
struct sp_walk_data {
@@ -238,6 +240,8 @@ extern void *vmalloc_hugepage_user(unsigned long size);
extern void *buff_vzalloc_user(unsigned long size);
extern void *buff_vzalloc_hugepage_user(unsigned long size);
+void sp_exit_mm(struct mm_struct *mm);
+
#else
static inline int sp_group_add_task(int pid, int spg_id)
@@ -400,6 +404,7 @@ static inline void *buff_vzalloc_hugepage_user(unsigned long size)
{
return NULL;
}
+
#endif
#endif /* LINUX_SHARE_POOL_H */
diff --git a/kernel/fork.c b/kernel/fork.c
index d1d8ac083c80..61496b70cfb8 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1055,8 +1055,6 @@ static inline void __mmput(struct mm_struct *mm)
{
VM_BUG_ON(atomic_read(&mm->mm_users));
- sp_group_exit(mm);
-
uprobe_clear_state(mm);
exit_aio(mm);
ksm_exit(mm);
@@ -1084,6 +1082,8 @@ void mmput(struct mm_struct *mm)
{
might_sleep();
+ sp_group_exit(mm);
+
if (atomic_dec_and_test(&mm->mm_users))
__mmput(mm);
}
diff --git a/mm/share_pool.c b/mm/share_pool.c
index e326c95104da..27792a641401 100644
--- a/mm/share_pool.c
+++ b/mm/share_pool.c
@@ -197,6 +197,16 @@ static bool host_svm_sp_enable = false;
int sysctl_share_pool_hugepage_enable = 1;
+static void free_sp_group(struct sp_group *spg);
+
+static bool sp_group_get(struct sp_group *spg)
+{
+ if (spg_valid(spg) && atomic_inc_not_zero(&spg->use_count))
+ return true;
+
+ return false;
+}
+
static unsigned long spa_size(struct sp_area *spa)
{
return spa->real_size;
@@ -337,7 +347,9 @@ static struct sp_group *__sp_find_spg(int pid, int spg_id)
put_task_struct(tsk);
} else {
+ mutex_lock(&sp_mutex);
spg = idr_find(&sp_group_idr, spg_id);
+ mutex_unlock(&sp_mutex);
}
return spg;
@@ -392,6 +404,8 @@ static struct sp_group *find_or_alloc_sp_group(int spg_id)
INIT_LIST_HEAD(&spg->procs);
INIT_LIST_HEAD(&spg->spa_list);
+ init_rwsem(&spg->rw_lock);
+
ret = idr_alloc(&sp_group_idr, spg, spg_id, spg_id+1,
GFP_KERNEL);
if (ret < 0) {
@@ -422,9 +436,8 @@ static struct sp_group *find_or_alloc_sp_group(int spg_id)
goto out_fput;
}
} else {
- if (!spg_valid(spg))
+ if (!sp_group_get(spg))
return ERR_PTR(-ENODEV);
- atomic_inc(&spg->use_count);
}
return spg;
@@ -607,6 +620,8 @@ int sp_group_add_task(int pid, int spg_id)
}
mm->sp_group = spg;
+
+ down_write(&spg->rw_lock);
/* We reactive the spg even the spg exists already. */
spg->is_alive = true;
list_add_tail(&mm->sp_node, &spg->procs);
@@ -675,11 +690,14 @@ int sp_group_add_task(int pid, int spg_id)
mm->sp_group = NULL;
}
+ up_write(&spg->rw_lock);
out_drop_group:
if (unlikely(ret))
__sp_group_drop_locked(spg);
out_put_mm:
- mmput(mm);
+ /* No need to put the mm if the sp group add this mm success.*/
+ if (unlikely(ret))
+ mmput(mm);
out_put_task:
put_task_struct(tsk);
out_unlock:
@@ -712,44 +730,12 @@ static void spg_exit_unlock(bool unlock)
mutex_unlock(&sp_mutex);
}
-/*
- * Do cleanup when a process exits.
- */
-void sp_group_exit(struct mm_struct *mm)
-{
- bool is_alive = true;
- bool unlock;
-
- /*
- * Nothing to do if this thread group doesn't belong to any sp_group.
- * No need to protect this check with lock because we can add a task
- * to a group if !PF_EXITING.
- */
- if (!mm->sp_group)
- return;
-
- spg_exit_lock(&unlock);
- if (list_is_singular(&mm->sp_group->procs))
- is_alive = mm->sp_group->is_alive = false;
- list_del(&mm->sp_node);
- spg_exit_unlock(unlock);
-
- /*
- * To avoid calling this with sp_mutex held, we first mark the
- * sp_group as dead and then send the notification and then do
- * the real cleanup in sp_group_post_exit().
- */
- if (!is_alive)
- blocking_notifier_call_chain(&sp_notifier_chain, 0,
- mm->sp_group);
-}
-
void sp_group_post_exit(struct mm_struct *mm)
{
struct sp_proc_stat *stat;
bool unlock;
- if (!mm->sp_group)
+ if (!enable_ascend_share_pool || !mm->sp_group)
return;
spg_exit_lock(&unlock);
@@ -1139,8 +1125,6 @@ static void sp_munmap(struct mm_struct *mm, unsigned long addr,
{
int err;
- if (!mmget_not_zero(mm))
- return;
down_write(&mm->mmap_sem);
err = do_munmap(mm, addr, size, NULL);
@@ -1150,7 +1134,6 @@ static void sp_munmap(struct mm_struct *mm, unsigned long addr,
}
up_write(&mm->mmap_sem);
- mmput(mm);
}
/* The caller must hold sp_mutex. */
@@ -1183,8 +1166,6 @@ int sp_free(unsigned long addr)
check_interrupt_context();
- mutex_lock(&sp_mutex);
-
/*
* Access control: a share pool addr can only be freed by another task
* in the same spg or a kthread (such as buff_module_guard_work)
@@ -1217,6 +1198,8 @@ int sp_free(unsigned long addr)
sp_dump_stack();
+ down_read(&spa->spg->rw_lock);
+
__sp_free(spa->spg, spa->va_start, spa_size(spa), NULL);
/* Free the memory of the backing shmem or hugetlbfs */
@@ -1226,6 +1209,9 @@ int sp_free(unsigned long addr)
if (ret)
pr_err("share pool: sp free fallocate failed: %d\n", ret);
+ up_read(&spa->spg->rw_lock);
+
+ mutex_lock(&sp_mutex);
/* pointer stat may be invalid because of kthread buff_module_guard_work */
if (current->mm == NULL) {
kthread_stat.alloc_size -= spa->real_size;
@@ -1236,12 +1222,11 @@ int sp_free(unsigned long addr)
else
BUG();
}
+ mutex_unlock(&sp_mutex);
drop_spa:
__sp_area_drop(spa);
out:
- mutex_unlock(&sp_mutex);
-
sp_try_to_compact();
return ret;
}
@@ -1317,9 +1302,7 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id)
if (sp_flags & SP_HUGEPAGE_ONLY)
sp_flags |= SP_HUGEPAGE;
- mutex_lock(&sp_mutex);
spg = __sp_find_spg(current->pid, SPG_ID_DEFAULT);
- mutex_unlock(&sp_mutex);
if (!spg) { /* DVPP pass through scene: first call sp_alloc() */
/* mdc scene hack */
if (enable_mdc_default_group)
@@ -1336,14 +1319,16 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id)
ret);
return ERR_PTR(ret);
}
- mutex_lock(&sp_mutex);
spg = current->mm->sp_group;
} else { /* other scenes */
- mutex_lock(&sp_mutex);
if (spg_id != SPG_ID_DEFAULT) {
+ mutex_lock(&sp_mutex);
/* the caller should be a member of the sp group */
- if (spg != idr_find(&sp_group_idr, spg_id))
+ if (spg != idr_find(&sp_group_idr, spg_id)) {
+ mutex_unlock(&sp_mutex);
goto out;
+ }
+ mutex_unlock(&sp_mutex);
}
}
@@ -1352,6 +1337,7 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id)
goto out;
}
+ down_read(&spg->rw_lock);
if (sp_flags & SP_HUGEPAGE) {
file = spg->file_hugetlb;
size_aligned = ALIGN(size, PMD_SIZE);
@@ -1376,31 +1362,25 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id)
unsigned long populate = 0;
struct vm_area_struct *vma;
- if (!mmget_not_zero(mm))
- continue;
-
down_write(&mm->mmap_sem);
mmap_addr = sp_mmap(mm, file, spa, &populate);
if (IS_ERR_VALUE(mmap_addr)) {
up_write(&mm->mmap_sem);
p = (void *)mmap_addr;
__sp_free(spg, sp_addr, size_aligned, mm);
- mmput(mm);
pr_err("share pool: allocation sp mmap failed, ret %ld\n", mmap_addr);
goto out;
}
- p =(void *)mmap_addr; /* success */
+ p = (void *)mmap_addr; /* success */
if (populate == 0) {
up_write(&mm->mmap_sem);
- mmput(mm);
continue;
}
vma = find_vma(mm, sp_addr);
if (unlikely(!vma)) {
up_write(&mm->mmap_sem);
- mmput(mm);
pr_err("share pool: allocation failed due to find %pK vma failure\n",
(void *)sp_addr);
p = ERR_PTR(-EINVAL);
@@ -1461,24 +1441,22 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id)
size_aligned = ALIGN(size, PAGE_SIZE);
sp_flags &= ~SP_HUGEPAGE;
__sp_area_drop(spa);
- mmput(mm);
goto try_again;
}
}
-
- mmput(mm);
break;
}
- mmput(mm);
}
+out:
+ up_read(&spg->rw_lock);
+
+ mutex_lock(&sp_mutex);
if (!IS_ERR(p)) {
stat = idr_find(&sp_stat_idr, current->mm->sp_stat_id);
if (stat)
stat->alloc_size += size_aligned;
}
-
-out:
mutex_unlock(&sp_mutex);
/* this will free spa if mmap failed */
@@ -1556,10 +1534,6 @@ static unsigned long sp_remap_kva_to_vma(unsigned long kva, struct sp_area *spa,
}
}
- if (!mmget_not_zero(mm)) {
- ret_addr = -ESPGMMEXIT;
- goto put_file;
- }
down_write(&mm->mmap_sem);
ret_addr = sp_mmap(mm, file, spa, &populate);
@@ -1604,8 +1578,7 @@ static unsigned long sp_remap_kva_to_vma(unsigned long kva, struct sp_area *spa,
put_mm:
up_write(&mm->mmap_sem);
- mmput(mm);
-put_file:
+
if (!spa->spg && file)
fput(file);
@@ -1769,10 +1742,12 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size,
*/
stat = sp_init_proc_stat(tsk, mm);
if (IS_ERR(stat)) {
+ mutex_unlock(&sp_mutex);
uva = stat;
pr_err("share pool: init proc stat failed, ret %lx\n", PTR_ERR(stat));
goto out_unlock;
}
+ mutex_unlock(&sp_mutex);
spg = __sp_find_spg(pid, SPG_ID_DEFAULT);
if (spg == NULL) {
@@ -1794,6 +1769,7 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size,
}
if (!vmalloc_area_set_flag(spa, kva_aligned, VM_SHAREPOOL)) {
+ up_read(&spg->rw_lock);
pr_err("share pool: %s: the kva %pK is not valid\n", __func__, (void *)kva_aligned);
goto out_drop_spa;
}
@@ -1808,12 +1784,14 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size,
goto out_unlock;
}
+ down_read(&spg->rw_lock);
if (enable_share_k2u_spg)
spa = sp_alloc_area(size_aligned, sp_flags, spg, SPA_TYPE_K2SPG);
else
spa = sp_alloc_area(size_aligned, sp_flags, NULL, SPA_TYPE_K2TASK);
if (IS_ERR(spa)) {
+ up_read(&spg->rw_lock);
if (printk_ratelimit())
pr_err("share pool: k2u(spg) failed due to alloc spa failure "
"(potential no enough virtual memory when -75): %ld\n",
@@ -1831,14 +1809,18 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size,
uva = sp_make_share_kva_to_spg(kva_aligned, spa, spg);
else
uva = sp_make_share_kva_to_task(kva_aligned, spa, mm);
+
+ up_read(&spg->rw_lock);
} else {
/* group is dead, return -ENODEV */
pr_err("share pool: failed to make k2u, sp group is dead\n");
}
if (!IS_ERR(uva)) {
+ mutex_lock(&sp_mutex);
uva = uva + (kva - kva_aligned);
stat->k2u_size += size_aligned;
+ mutex_unlock(&sp_mutex);
} else {
/* associate vma and spa */
if (!vmalloc_area_clr_flag(spa, kva_aligned, VM_SHAREPOOL))
@@ -1849,7 +1831,6 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size,
out_drop_spa:
__sp_area_drop(spa);
out_unlock:
- mutex_unlock(&sp_mutex);
mmput(mm);
out_put_task:
put_task_struct(tsk);
@@ -2144,7 +2125,6 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size, int pid, int sp
unsigned int page_size;
struct sp_proc_stat *stat;
- mutex_lock(&sp_mutex);
/*
* at first we guess it's a hugepage addr
* we can tolerate at most PMD_SIZE or PAGE_SIZE which is matched in k2u
@@ -2157,7 +2137,7 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size, int pid, int sp
if (printk_ratelimit())
pr_err("share pool: invalid input uva %pK in unshare uva\n",
(void *)uva);
- goto out_unlock;
+ goto out;
}
}
@@ -2259,10 +2239,14 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size, int pid, int sp
goto out_drop_area;
}
+ down_read(&spa->spg->rw_lock);
__sp_free(spa->spg, uva_aligned, size_aligned, NULL);
+ up_read(&spa->spg->rw_lock);
}
sp_dump_stack();
+
+ mutex_lock(&sp_mutex);
/* pointer stat may be invalid because of kthread buff_module_guard_work */
if (current->mm == NULL) {
kthread_stat.k2u_size -= spa->real_size;
@@ -2273,6 +2257,7 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size, int pid, int sp
else
WARN(1, "share_pool: %s: null process stat\n", __func__);
}
+ mutex_unlock(&sp_mutex);
out_clr_flag:
/* deassociate vma and spa */
@@ -2281,8 +2266,7 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size, int pid, int sp
out_drop_area:
__sp_area_drop(spa);
-out_unlock:
- mutex_unlock(&sp_mutex);
+out:
return ret;
}
@@ -2446,7 +2430,7 @@ bool sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid)
check_interrupt_context();
if (device_id < 0 || device_id >= MAX_DEVID || pid < 0 || size <= 0 ||
- size> MMAP_SHARE_POOL_16G_SIZE)
+ size > MMAP_SHARE_POOL_16G_SIZE)
return false;
mutex_lock(&sp_mutex);
@@ -2468,9 +2452,10 @@ EXPORT_SYMBOL_GPL(sp_config_dvpp_range);
/* Check whether the address belongs to the share pool. */
bool is_sharepool_addr(unsigned long addr)
{
- if (host_svm_sp_enable == false)
- return addr >= MMAP_SHARE_POOL_START && addr < (MMAP_SHARE_POOL_16G_START + MMAP_SHARE_POOL_16G_SIZE);
- return addr >= MMAP_SHARE_POOL_START && addr < MMAP_SHARE_POOL_END;
+ if (host_svm_sp_enable == false)
+ return addr >= MMAP_SHARE_POOL_START && addr < (MMAP_SHARE_POOL_16G_START + MMAP_SHARE_POOL_16G_SIZE);
+
+ return addr >= MMAP_SHARE_POOL_START && addr < MMAP_SHARE_POOL_END;
}
EXPORT_SYMBOL_GPL(is_sharepool_addr);
@@ -2515,7 +2500,8 @@ int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns,
return 0;
}
-static void rb_spa_stat_show(struct seq_file *seq) {
+static void rb_spa_stat_show(struct seq_file *seq)
+{
struct rb_node *node;
struct sp_area *spa;
@@ -2814,6 +2800,36 @@ vm_fault_t sharepool_no_page(struct mm_struct *mm,
}
EXPORT_SYMBOL(sharepool_no_page);
+#define MM_WOULD_FREE 2
+
+void sp_group_exit(struct mm_struct *mm)
+{
+ struct sp_group *spg = NULL;
+ bool is_alive = true, unlock;
+
+ if (!enable_ascend_share_pool)
+ return;
+
+ spg = mm->sp_group;
+
+ /* If the mm_users is 2, it means that the mm is ready to be freed
+ because the last owner of this mm is in exiting process.
+ */
+ if (spg_valid(spg) && atomic_read(&mm->mm_users) == MM_WOULD_FREE) {
+ spg_exit_lock(&unlock);
+ down_write(&spg->rw_lock);
+ if (list_is_singular(&spg->procs))
+ is_alive = spg->is_alive = false;
+ list_del(&mm->sp_node);
+ up_write(&spg->rw_lock);
+ if (!is_alive)
+ blocking_notifier_call_chain(&sp_notifier_chain, 0,
+ mm->sp_group);
+ atomic_dec(&mm->mm_users);
+ spg_exit_unlock(unlock);
+ }
+}
+
struct page *sp_alloc_pages(struct vm_struct *area, gfp_t mask,
unsigned int page_order, int node)
{
--
2.25.1