From: Zhang Ming 154842638@qq.com
openEuler inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I3D58V CVE: NA
----------------------------------
No unlock operation is performed on the mpam_devices_lock before the return statement, which may lead to a deadlock.
Signed-off-by: Zhang Ming 154842638@qq.com Reported-by: Cheng Jian cj.chengjian@huawei.com Suggested-by: Cheng Jian cj.chengjian@huawei.com Reviewed-by: Wang ShaoBo bobo.shaobowang@huawei.com Reviewed-by: Xie XiuQi xiexiuqi@huawei.com Signed-off-by: Cheng Jian cj.chengjian@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/arm64/kernel/mpam/mpam_device.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/kernel/mpam/mpam_device.c b/arch/arm64/kernel/mpam/mpam_device.c index fc7aa1ae0b825..f8840274b902f 100644 --- a/arch/arm64/kernel/mpam/mpam_device.c +++ b/arch/arm64/kernel/mpam/mpam_device.c @@ -560,8 +560,10 @@ static void __init mpam_enable(struct work_struct *work) mutex_lock(&mpam_devices_lock); mpam_enable_squash_features(); err = mpam_allocate_config(); - if (err) + if (err) { + mutex_unlock(&mpam_devices_lock); return; + } mutex_unlock(&mpam_devices_lock);
mpam_enable_irqs();
From: Tang Yizhou tangyizhou@huawei.com
ascend inclusion category: bugfix bugzilla: 50615 CVE: NA
-------------------------------------------------
sysctl_share_pool_hugepage_enable is no longer used.
Signed-off-by: Tang Yizhou tangyizhou@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- kernel/sysctl.c | 10 ---------- mm/share_pool.c | 2 -- 2 files changed, 12 deletions(-)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 90f71ecdec636..7594564194da1 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1745,16 +1745,6 @@ static struct ctl_table vm_table[] = { .extra1 = &zero, .extra2 = &one, }, - { - /* 0: disable, 1: enable */ - .procname = "share_pool_hugepage_enable", - .data = &sysctl_share_pool_hugepage_enable, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, - }, { /* 0: map_unlock, 1: map_lock */ .procname = "share_pool_map_lock_enable", diff --git a/mm/share_pool.c b/mm/share_pool.c index 6ba479887f0da..0d2dee3c100b6 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -220,8 +220,6 @@ static DEFINE_SPINLOCK(sp_area_lock); static struct rb_root sp_area_root = RB_ROOT; static bool host_svm_sp_enable = false;
-int sysctl_share_pool_hugepage_enable = 1; - static unsigned long spa_size(struct sp_area *spa) { return spa->real_size;
From: Tang Yizhou tangyizhou@huawei.com
ascend inclusion category: perf bugzilla: 50615 CVE: NA
-------------------------------------------------
sp_group_exit() and sp_group_post_exit() should be put together, and so shall it be for free_sp_group() and sp_group_drop().
This helps the code to be more readable, and improves cache hit ratio.
Signed-off-by: Tang Yizhou tangyizhou@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/share_pool.c | 94 ++++++++++++++++++++++++------------------------- 1 file changed, 47 insertions(+), 47 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 0d2dee3c100b6..ec1e92a32b8ab 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -331,6 +331,12 @@ static void free_sp_group(struct sp_group *spg) kfree(spg); }
+static void sp_group_drop(struct sp_group *spg) +{ + if (atomic_dec_and_test(&spg->use_count)) + free_sp_group(spg); +} + /* user must call sp_group_drop() after use */ static struct sp_group *__sp_find_spg_locked(int pid, int spg_id) { @@ -386,12 +392,6 @@ static struct sp_group *__sp_find_spg(int pid, int spg_id) return spg; }
-static void sp_group_drop(struct sp_group *spg) -{ - if (atomic_dec_and_test(&spg->use_count)) - free_sp_group(spg); -} - int sp_group_id_by_pid(int pid) { struct sp_group *spg; @@ -771,47 +771,6 @@ int sp_group_add_task(int pid, int spg_id) } EXPORT_SYMBOL_GPL(sp_group_add_task);
-void sp_group_post_exit(struct mm_struct *mm) -{ - struct sp_proc_stat *stat; - struct sp_group *spg = mm->sp_group; - long alloc_size, k2u_size; - - if (!spg || !enable_ascend_share_pool) - return; - - stat = sp_get_proc_stat(mm->sp_stat_id); - /* - * There are two basic scenarios when a process in the share pool is - * exiting but its share pool memory usage is not 0. - * 1. Process A called sp_alloc(), but it terminates without calling - * sp_free(). Then its share pool memory usage is a positive number. - * 2. Process A never called sp_alloc(), and process B in the same spg - * called sp_alloc() to get an addr u. Then A gets u somehow and - * called sp_free(u). Now A's share pool memory usage is a negative - * number. Notice B's memory usage will be a positive number. - * - * We decide to print a info when seeing both of the scenarios. - */ - if (stat) { - alloc_size = atomic64_read(&stat->alloc_size); - k2u_size = atomic64_read(&stat->k2u_size); - if (alloc_size != 0 || k2u_size != 0) - pr_info("share pool: process %s(%d) of sp group %d exits. " - "It applied %ld aligned KB, k2u shared %ld aligned KB\n", - stat->comm, mm->sp_stat_id, mm->sp_group->id, - byte2kb(alloc_size), byte2kb(k2u_size)); - - /* match with sp_get_proc_stat in THIS function */ - sp_proc_stat_drop(stat); - /* match with sp_init_proc_stat, we expect stat is released after this call */ - sp_proc_stat_drop(stat); - } - - /* match with sp_group_add_task -> find_or_alloc_sp_group */ - sp_group_drop(spg); -} - /* the caller must hold sp_area_lock */ static void __insert_sp_area(struct sp_area *spa) { @@ -3006,6 +2965,47 @@ void sp_group_exit(struct mm_struct *mm) up_write(&spg->rw_lock); }
+void sp_group_post_exit(struct mm_struct *mm) +{ + struct sp_proc_stat *stat; + struct sp_group *spg = mm->sp_group; + long alloc_size, k2u_size; + + if (!spg || !enable_ascend_share_pool) + return; + + stat = sp_get_proc_stat(mm->sp_stat_id); + /* + * There are two basic scenarios when a process in the share pool is + * exiting but its share pool memory usage is not 0. + * 1. Process A called sp_alloc(), but it terminates without calling + * sp_free(). Then its share pool memory usage is a positive number. + * 2. Process A never called sp_alloc(), and process B in the same spg + * called sp_alloc() to get an addr u. Then A gets u somehow and + * called sp_free(u). Now A's share pool memory usage is a negative + * number. Notice B's memory usage will be a positive number. + * + * We decide to print a info when seeing both of the scenarios. + */ + if (stat) { + alloc_size = atomic64_read(&stat->alloc_size); + k2u_size = atomic64_read(&stat->k2u_size); + if (alloc_size != 0 || k2u_size != 0) + pr_info("share pool: process %s(%d) of sp group %d exits. " + "It applied %ld aligned KB, k2u shared %ld aligned KB\n", + stat->comm, mm->sp_stat_id, mm->sp_group->id, + byte2kb(alloc_size), byte2kb(k2u_size)); + + /* match with sp_get_proc_stat in THIS function */ + sp_proc_stat_drop(stat); + /* match with sp_init_proc_stat, we expect stat is released after this call */ + sp_proc_stat_drop(stat); + } + + /* match with sp_group_add_task -> find_or_alloc_sp_group */ + sp_group_drop(spg); +} + struct page *sp_alloc_pages(struct vm_struct *area, gfp_t mask, unsigned int page_order, int node) {
From: Tang Yizhou tangyizhou@huawei.com
ascend inclusion category: bugfix bugzilla: 50615 CVE: NA
-------------------------------------------------
For the implementation of Linux, statistics of RSS has a maximum 64 pages deviation (256KB) but the track of share pool are all precise. So the calculation results may be negative and confuse people.
We decide to show zeros when the results are negative. It is still imprecise, but maybe better.
Signed-off-by: Tang Yizhou tangyizhou@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/share_pool.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index ec1e92a32b8ab..d25dae2833b4b 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -2788,9 +2788,14 @@ static int idr_proc_stat_cb(int id, void *p, void *data) file = get_mm_counter(mm, MM_FILEPAGES); shmem = get_mm_counter(mm, MM_SHMEMPAGES); total_rss = anon + file + shmem; + /* + * Statistics of RSS has a maximum 64 pages deviation (256KB). + * Please check_sync_rss_stat(). + */ non_sp_res = page2kb(total_rss) - sp_alloc_nsize; + non_sp_res = non_sp_res < 0 ? 0 : non_sp_res; non_sp_shm = page2kb(shmem) - sp_alloc_nsize; - non_sp_shm = non_sp_shm < 0 ? 0 : non_sp_shm; /* to be investigated */ + non_sp_shm = non_sp_shm < 0 ? 0 : non_sp_shm;
seq_printf(seq, "%-8d ", id); if (spg_id == 0)
From: Tang Yizhou tangyizhou@huawei.com
ascend inclusion category: perf bugzilla: 50615 CVE: NA
------------------------------------------------- According to kernel comment, printk_ratelimited is not recommended, we should use pr_<level>_ratelimited instead.
This also helps to reduce cyclomatic complexity.
In addition, %pK is not useful when we want to know the virtual memory address. We decide to use pr_debug() and %lx, only root users have the permission to switch /sys/kernel/debug/dynamic_debug/control which meets the security requirements.
Signed-off-by: Tang Yizhou tangyizhou@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/share_pool.c | 212 ++++++++++++++++-------------------------------- 1 file changed, 72 insertions(+), 140 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index d25dae2833b4b..3037b51a25e36 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -129,7 +129,7 @@ static struct sp_proc_stat *sp_init_proc_stat(struct task_struct *tsk, return stat; } else { /* if enter this branch, that's our mistake */ - pr_err("share pool: sp_init_proc_stat invalid id %d\n", id); + pr_err_ratelimited("share pool: proc stat invalid id %d\n", id); return ERR_PTR(-EBUSY); } } @@ -137,8 +137,7 @@ static struct sp_proc_stat *sp_init_proc_stat(struct task_struct *tsk, stat = kzalloc(sizeof(*stat), GFP_KERNEL); if (stat == NULL) { up_write(&sp_stat_sem); - if (printk_ratelimit()) - pr_err("share pool: alloc proc stat failed due to lack of memory\n"); + pr_err_ratelimited("share pool: alloc proc stat failed due to lack of memory\n"); return ERR_PTR(-ENOMEM); }
@@ -153,8 +152,7 @@ static struct sp_proc_stat *sp_init_proc_stat(struct task_struct *tsk, ret = idr_alloc(&sp_stat_idr, stat, tgid, tgid + 1, GFP_KERNEL); if (ret < 0) { up_write(&sp_stat_sem); - if (printk_ratelimit()) - pr_err("share pool: proc stat idr alloc failed %d\n", ret); + pr_err_ratelimited("share pool: proc stat idr alloc failed %d\n", ret); kfree(stat); return ERR_PTR(ret); } @@ -428,16 +426,14 @@ static struct sp_group *find_or_alloc_sp_group(int spg_id)
spg = kzalloc(sizeof(*spg), GFP_KERNEL); if (spg == NULL) { - if (printk_ratelimit()) - pr_err("share pool: alloc spg failed due to lack of memory\n"); + pr_err_ratelimited("share pool: alloc spg failed due to lack of memory\n"); return ERR_PTR(-ENOMEM); } ret = idr_alloc(&sp_group_idr, spg, spg_id, spg_id + 1, GFP_KERNEL); up_write(&sp_group_sem); if (ret < 0) { - if (printk_ratelimit()) - pr_err("share pool: create group idr alloc failed\n"); + pr_err_ratelimited("share pool: create group idr alloc failed\n"); goto out_kfree; }
@@ -461,9 +457,7 @@ static struct sp_group *find_or_alloc_sp_group(int spg_id) spg->file = shmem_kernel_file_setup(name, MAX_LFS_FILESIZE, VM_NORESERVE); if (IS_ERR(spg->file)) { - if (printk_ratelimit()) - pr_err("share pool: file setup for small page failed %ld\n", - PTR_ERR(spg->file)); + pr_err("share pool: file setup for small page failed %ld\n", PTR_ERR(spg->file)); ret = PTR_ERR(spg->file); goto out_idr; } @@ -472,9 +466,7 @@ static struct sp_group *find_or_alloc_sp_group(int spg_id) VM_NORESERVE, &user, HUGETLB_ANONHUGE_INODE, hsize_log); if (IS_ERR(spg->file_hugetlb)) { - if (printk_ratelimit()) - pr_err("share pool: file setup for hugepage failed %ld\n", - PTR_ERR(spg->file_hugetlb)); + pr_err("share pool: file setup for hugepage failed %ld\n", PTR_ERR(spg->file_hugetlb)); ret = PTR_ERR(spg->file_hugetlb); goto out_fput; } @@ -566,8 +558,7 @@ int sp_group_add_task(int pid, int spg_id)
if ((spg_id < SPG_ID_MIN || spg_id > SPG_ID_AUTO) && spg_id != SPG_ID_DVPP_PASS_THROUGH) { - if (printk_ratelimit()) - pr_err("share pool: task add group failed due to invalid group id %d\n", spg_id); + pr_err_ratelimited("share pool: task add group failed, invalid group id %d\n", spg_id); return -EINVAL; }
@@ -575,17 +566,14 @@ int sp_group_add_task(int pid, int spg_id) spg = __sp_find_spg(pid, spg_id);
if (!spg) { - if (printk_ratelimit()) - pr_err("share pool: spg %d hasn't been created\n", spg_id); + pr_err_ratelimited("share pool: spg %d hasn't been created\n", spg_id); return -EINVAL; }
down_read(&spg->rw_lock); if (!spg_valid(spg)) { up_read(&spg->rw_lock); - if (printk_ratelimit()) - pr_err("share pool: task add group failed because group id %d " - "is dead\n", spg_id); + pr_err_ratelimited("share pool: task add group failed, group id %d is dead\n", spg_id); sp_group_drop(spg); return -EINVAL; } @@ -598,9 +586,7 @@ int sp_group_add_task(int pid, int spg_id) spg_id = ida_alloc_range(&sp_group_id_ida, SPG_ID_AUTO_MIN, SPG_ID_AUTO_MAX, GFP_ATOMIC); if (spg_id < 0) { - if (printk_ratelimit()) - pr_err("share pool: task add group failed when automatically " - "generate group id failed\n"); + pr_err_ratelimited("share pool: task add group failed, auto generate group id failed\n"); return spg_id; } id_newly_generated = true; @@ -611,9 +597,7 @@ int sp_group_add_task(int pid, int spg_id) SPG_ID_DVPP_PASS_THROUGH_MIN, SPG_ID_DVPP_PASS_THROUGH_MAX, GFP_ATOMIC); if (spg_id < 0) { - if (printk_ratelimit()) - pr_err("share pool: task add group failed when automatically " - "generate group id failed in DVPP pass through\n"); + pr_err_ratelimited("share pool: task add group failed, DVPP auto generate group id failed\n"); return spg_id; } id_newly_generated = true; @@ -677,7 +661,7 @@ int sp_group_add_task(int pid, int spg_id) stat = sp_init_proc_stat(tsk, mm); if (IS_ERR(stat)) { ret = PTR_ERR(stat); - pr_err("share pool: init proc stat failed, ret %lx\n", PTR_ERR(stat)); + pr_err_ratelimited("share pool: init proc stat failed, ret %lx\n", PTR_ERR(stat)); goto out_drop_group; }
@@ -726,11 +710,9 @@ int sp_group_add_task(int pid, int spg_id) if (populate) { ret = do_mm_populate(mm, spa->va_start, populate, 0); if (ret) { - if (printk_ratelimit()) { - pr_warn("share pool: task add group failed when mm populate " - "failed (potential no enough memory): %d " - "spa type is %d\n", ret, spa->type); - } + pr_warn_ratelimited("share pool: task add group failed, mm populate failed " + "(potential no enough memory when -12): %d, spa type is %d\n", + ret, spa->type); down_write(&mm->mmap_sem); sp_munmap_task_areas(mm, spa->link.next); up_write(&mm->mmap_sem); @@ -821,8 +803,7 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, vend = MMAP_SHARE_POOL_16G_START + MMAP_SHARE_POOL_16G_SIZE; } else { if (!spg) { - if (printk_ratelimit()) - pr_err("share pool: don't allow k2u(task) in host svm multiprocess scene\n"); + pr_err_ratelimited("share pool: don't allow k2u(task) in host svm multiprocess scene\n"); return ERR_PTR(-EINVAL); } vstart = spg->dvpp_va_start; @@ -832,8 +813,7 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags,
spa = kmalloc(sizeof(struct sp_area), GFP_KERNEL); if (unlikely(!spa)) { - if (printk_ratelimit()) - pr_err("share pool: alloc spa failed due to lack of memory\n"); + pr_err_ratelimited("share pool: alloc spa failed due to lack of memory\n"); return ERR_PTR(-ENOMEM); }
@@ -1183,16 +1163,13 @@ int sp_free(unsigned long addr) } } else { /* spa == NULL */ ret = -EINVAL; - if (printk_ratelimit()) - pr_err("share pool: sp free invalid input addr %pK\n", (void *)addr); + pr_debug("share pool: sp free invalid input addr %lx\n", (unsigned long)addr); goto out; }
if (spa->type != SPA_TYPE_ALLOC) { ret = -EINVAL; - if (printk_ratelimit()) - pr_err("share pool: sp free failed, addr %pK is not from sp_alloc\n", - (void *)addr); + pr_debug("share pool: sp free failed, addr %lx is not from sp alloc\n", (unsigned long)addr); goto drop_spa; }
@@ -1296,14 +1273,12 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) spg_id = mdc_default_group_id;
if (spg_id != SPG_ID_DEFAULT && spg_id < SPG_ID_MIN) { - if (printk_ratelimit()) - pr_err("share pool: allocation failed due to invalid group id %d\n", spg_id); + pr_err_ratelimited("share pool: allocation failed, invalid group id %d\n", spg_id); return ERR_PTR(-EINVAL); }
if (sp_flags & ~(SP_HUGEPAGE_ONLY | SP_HUGEPAGE | SP_DVPP)) { - if (printk_ratelimit()) - pr_err("share pool: allocation failed due to invalid flag %lu\n", sp_flags); + pr_err_ratelimited("share pool: allocation failed, invalid flag %lx\n", sp_flags); return ERR_PTR(-EINVAL); }
@@ -1323,8 +1298,7 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) * The judgment is added to prevent exit in this case. */ if (ret < 0 && (ret != -EEXIST)) { - pr_err("share pool: allocation failed due to add group error %d in DVPP pass through scenario", - ret); + pr_err_ratelimited("share pool: allocation failed, add group error %d in DVPP pass through\n", ret); return ERR_PTR(ret); } spg = current->mm->sp_group; @@ -1350,7 +1324,7 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) if (!spg_valid(spg)) { up_read(&spg->rw_lock); sp_group_drop(spg); - pr_err("share pool: sp alloc failed, spg is dead\n"); + pr_err_ratelimited("share pool: sp alloc failed, spg is dead\n"); return ERR_PTR(-ENODEV); }
@@ -1364,10 +1338,8 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) try_again: spa = sp_alloc_area(size_aligned, sp_flags, spg, SPA_TYPE_ALLOC); if (IS_ERR(spa)) { - if (printk_ratelimit()) - pr_err("share pool: allocation failed due to alloc spa failure " - "(potential no enough virtual memory when -75): %ld\n", - PTR_ERR(spa)); + pr_err_ratelimited("share pool: allocation failed due to alloc spa failure " + "(potential no enough virtual memory when -75): %ld\n", PTR_ERR(spa)); p = spa; goto out; } @@ -1397,8 +1369,7 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) vma = find_vma(mm, sp_addr); if (unlikely(!vma)) { up_write(&mm->mmap_sem); - pr_err("share pool: allocation failed due to find %pK vma failure\n", - (void *)sp_addr); + pr_debug("share pool: allocation failed due to find %lx vma failure\n", (unsigned long)sp_addr); p = ERR_PTR(-EINVAL); goto out; } @@ -1435,10 +1406,8 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) if (ret) { __sp_free(spg, sp_addr, size_aligned, list_next_entry(mm, sp_node)); - - if (printk_ratelimit()) - pr_warn("share pool: allocation failed due to mm populate failed" - "(potential no enough memory when -12): %d\n", ret); + pr_warn_ratelimited("share pool: allocation failed due to mm populate failed" + "(potential no enough memory when -12): %d\n", ret); p = ERR_PTR(ret);
mode = FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE; @@ -1496,15 +1465,13 @@ static int is_vmap_hugepage(unsigned long addr) struct vm_struct *area;
if (unlikely(!addr)) { - if (printk_ratelimit()) - pr_err("share pool: null pointer when judge vmap addr\n"); + pr_err_ratelimited("share pool: null pointer when judge vmap addr\n"); return -EINVAL; }
area = find_vm_area((void *)addr); if (unlikely(!area)) { - if (printk_ratelimit()) - pr_err("share pool: failed to find vm area(%lx)\n", addr); + pr_err_ratelimited("share pool: failed to find vm area(%lx)\n", addr); return -EINVAL; }
@@ -1570,8 +1537,8 @@ static unsigned long sp_remap_kva_to_vma(unsigned long kva, struct sp_area *spa, ret = remap_vmalloc_hugepage_range(vma, (void *)kva, 0); if (ret) { do_munmap(mm, ret_addr, spa_size(spa), NULL); - pr_err("share pool: remap vmalloc hugepage failed, " - "ret %d, kva is %pK\n", ret, (void *)kva); + pr_debug("share pool: remap vmalloc hugepage failed, " + "ret %d, kva is %lx\n", ret, (unsigned long)kva); ret_addr = ret; goto put_mm; } @@ -1711,8 +1678,7 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size, check_interrupt_context();
if (sp_flags & ~SP_DVPP) { - if (printk_ratelimit()) - pr_err("share pool: k2u sp_flags %lu error\n", sp_flags); + pr_err_ratelimited("share pool: k2u sp_flags %lx error\n", sp_flags); return ERR_PTR(-EINVAL); }
@@ -1723,7 +1689,7 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size, } else if (is_hugepage == 0) { /* do nothing */ } else { - pr_err("share pool: k2u kva not vmalloc address\n"); + pr_err_ratelimited("share pool: k2u kva not vmalloc address\n"); return ERR_PTR(is_hugepage); }
@@ -1755,7 +1721,7 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size, stat = sp_init_proc_stat(tsk, mm); if (IS_ERR(stat)) { uva = stat; - pr_err("share pool: init proc stat failed, ret %lx\n", PTR_ERR(stat)); + pr_err_ratelimited("share pool: init proc stat failed, ret %lx\n", PTR_ERR(stat)); goto out_put_mm; }
@@ -1763,24 +1729,21 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size, if (spg == NULL) { /* k2u to task */ if (spg_id != SPG_ID_NONE && spg_id != SPG_ID_DEFAULT) { - if (printk_ratelimit()) - pr_err("share pool: k2task invalid spg id %d\n", spg_id); + pr_err_ratelimited("share pool: k2task invalid spg id %d\n", spg_id); uva = ERR_PTR(-EINVAL); goto out_drop_proc_stat; } spa = sp_alloc_area(size_aligned, sp_flags, NULL, SPA_TYPE_K2TASK); if (IS_ERR(spa)) { - if (printk_ratelimit()) - pr_err("share pool: k2u(task) failed due to alloc spa failure " - "(potential no enough virtual memory when -75): %ld\n", - PTR_ERR(spa)); + pr_err_ratelimited("share pool: k2u(task) failed due to alloc spa failure " + "(potential no enough virtual memory when -75): %ld\n", PTR_ERR(spa)); uva = spa; goto out_drop_proc_stat; }
if (!vmalloc_area_set_flag(spa, kva_aligned, VM_SHAREPOOL)) { up_read(&spg->rw_lock); - pr_err("share pool: %s: the kva %pK is not valid\n", __func__, (void *)kva_aligned); + pr_debug("share pool: %s: the kva %lx is not valid\n", __func__, (unsigned long)kva_aligned); goto out_drop_spa; }
@@ -1793,8 +1756,7 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size, /* k2u to group */ if (spg_id != SPG_ID_DEFAULT && spg_id != spg->id) { up_read(&spg->rw_lock); - if (printk_ratelimit()) - pr_err("share pool: k2spg invalid spg id %d\n", spg_id); + pr_err_ratelimited("share pool: k2spg invalid spg id %d\n", spg_id); uva = ERR_PTR(-EINVAL); goto out_drop_spg; } @@ -1806,17 +1768,15 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size,
if (IS_ERR(spa)) { up_read(&spg->rw_lock); - if (printk_ratelimit()) - pr_err("share pool: k2u(spg) failed due to alloc spa failure " - "(potential no enough virtual memory when -75): %ld\n", - PTR_ERR(spa)); + pr_err_ratelimited("share pool: k2u(spg) failed due to alloc spa failure " + "(potential no enough virtual memory when -75): %ld\n", PTR_ERR(spa)); uva = spa; goto out_drop_spg; }
if (!vmalloc_area_set_flag(spa, kva_aligned, VM_SHAREPOOL)) { up_read(&spg->rw_lock); - pr_err("share pool: %s: the kva %pK is not valid\n", __func__, (void *)kva_aligned); + pr_err("share pool: %s: the kva %lx is not valid\n", __func__, (unsigned long)kva_aligned); goto out_drop_spa; }
@@ -1827,7 +1787,7 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size,
} else { /* group is dead, return -ENODEV */ - pr_err("share pool: failed to make k2u, sp group is dead\n"); + pr_err_ratelimited("share pool: failed to make k2u, sp group is dead\n"); uva = ERR_PTR(-ENODEV); } up_read(&spg->rw_lock); @@ -1839,8 +1799,8 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size, } else { /* associate vma and spa */ if (!vmalloc_area_clr_flag(spa, kva_aligned, VM_SHAREPOOL)) - pr_warn("share pool: %s: the kva %pK is not valid\n", - __func__, (void *)kva_aligned); + pr_warn("share pool: %s: the kva %lx is not valid\n", + __func__, (unsigned long)kva_aligned); }
out_drop_spa: @@ -1867,8 +1827,7 @@ static int sp_pte_entry(pte_t *pte, unsigned long addr, struct sp_walk_data *sp_walk_data;
if (unlikely(!pte_present(*pte))) { - if (printk_ratelimit()) - pr_err("share pool: the page of addr %pK unexpectedly not in RAM\n", (void *)addr); + pr_debug("share pool: the page of addr %lx unexpectedly not in RAM\n", (unsigned long)addr); return -EFAULT; }
@@ -1894,9 +1853,7 @@ static int sp_test_walk(unsigned long addr, unsigned long next, static int sp_pte_hole(unsigned long start, unsigned long end, struct mm_walk *walk) { - if (printk_ratelimit()) - pr_err("share pool: hole [%pK, %pK) appeared unexpectedly\n", - (void *)start, (void *)end); + pr_debug("share pool: hole [%lx, %lx) appeared unexpectedly\n", (unsigned long)start, (unsigned long)end); return -EFAULT; }
@@ -1909,9 +1866,7 @@ static int sp_hugetlb_entry(pte_t *ptep, unsigned long hmask, struct sp_walk_data *sp_walk_data;
if (unlikely(!pte_present(pte))) { - if (printk_ratelimit()) - pr_err("share pool: the page of addr %pK unexpectedly " - "not in RAM\n", (void *)addr); + pr_err_ratelimited("share pool: the page of addr %lx unexpectedly not in RAM\n", (unsigned long)addr); return -EFAULT; }
@@ -1967,8 +1922,7 @@ static int __sp_walk_page_range(unsigned long uva, unsigned long size, */ vma = find_vma(mm, uva); if (!vma) { - if (printk_ratelimit()) - pr_err("share pool: u2k input uva %pK is invalid\n", (void *)uva); + pr_debug("share pool: u2k input uva %lx is invalid\n", (unsigned long)uva); return -EINVAL; } if ((is_vm_hugetlb_page(vma)) || is_vm_huge_special(vma)) @@ -1995,16 +1949,14 @@ static int __sp_walk_page_range(unsigned long uva, unsigned long size, size_aligned = ALIGN(uva + size, page_size) - uva_aligned;
if (uva_aligned + size_aligned < uva_aligned) { - if (printk_ratelimit()) - pr_err("share pool: overflow happened in walk page range\n"); + pr_err_ratelimited("share pool: overflow happened in walk page range\n"); return -EINVAL; }
page_nr = size_aligned / page_size; pages = kvmalloc(page_nr * sizeof(struct page *), GFP_KERNEL); if (!pages) { - if (printk_ratelimit()) - pr_err("share pool: alloc page array failed in walk page range\n"); + pr_err_ratelimited("share pool: alloc page array failed in walk page range\n"); return -ENOMEM; } sp_walk_data->pages = pages; @@ -2076,7 +2028,7 @@ void *sp_make_share_u2k(unsigned long uva, unsigned long size, int pid) down_write(&mm->mmap_sem); ret = __sp_walk_page_range(uva, size, mm, &sp_walk_data); if (ret) { - pr_err("share pool: walk page range failed, ret %d\n", ret); + pr_err_ratelimited("share pool: walk page range failed, ret %d\n", ret); up_write(&mm->mmap_sem); mmput(mm); p = ERR_PTR(ret); @@ -2093,8 +2045,7 @@ void *sp_make_share_u2k(unsigned long uva, unsigned long size, int pid) mmput(mm);
if (!p) { - if (printk_ratelimit()) - pr_err("share pool: vmap(huge) in u2k failed\n"); + pr_err("share pool: vmap(huge) in u2k failed\n"); __sp_walk_page_free(&sp_walk_data); p = ERR_PTR(-ENOMEM); goto out_put_task; @@ -2154,15 +2105,13 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size, int pid, int sp spa = __find_sp_area(ALIGN_DOWN(uva, PAGE_SIZE)); if (!spa) { ret = -EINVAL; - if (printk_ratelimit()) - pr_err("share pool: invalid input uva %pK in unshare uva\n", - (void *)uva); + pr_debug("share pool: invalid input uva %lx in unshare uva\n", (unsigned long)uva); goto out; } }
if (spa->type != SPA_TYPE_K2TASK && spa->type != SPA_TYPE_K2SPG) { - pr_err("share pool: this spa should not be unshare here\n"); + pr_err_ratelimited("share pool: this spa should not be unshare here\n"); ret = -EINVAL; goto out_drop_area; } @@ -2178,25 +2127,19 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size, int pid, int sp
if (size_aligned < ALIGN(size, page_size)) { ret = -EINVAL; - if (printk_ratelimit()) - pr_err("share pool: unshare uva failed due to invalid parameter size %lu\n", - size); + pr_err_ratelimited("share pool: unshare uva failed due to invalid parameter size %lu\n", size); goto out_drop_area; }
if (spa->type == SPA_TYPE_K2TASK) { if (spg_id != SPG_ID_NONE && spg_id != SPG_ID_DEFAULT) { - if (printk_ratelimit()) - pr_err("share pool: unshare uva(to task) failed, " - "invalid spg id %d\n", spg_id); + pr_err_ratelimited("share pool: unshare uva(to task) failed, invalid spg id %d\n", spg_id); ret = -EINVAL; goto out_drop_area; }
if (!spa->mm) { - if (printk_ratelimit()) - pr_err("share pool: unshare uva(to task) failed, " - "none spa owner\n"); + pr_err_ratelimited("share pool: unshare uva(to task) failed, none spa owner\n"); ret = -EINVAL; goto out_drop_area; } @@ -2210,16 +2153,13 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size, int pid, int sp */ mm = get_task_mm(current->group_leader); if (!mm) { - if (printk_ratelimit()) - pr_info("share pool: no need to unshare uva(to task), " - "target process mm is exiting\n"); + pr_info_ratelimited("share pool: no need to unshare uva(to task), " + "target process mm is exiting\n"); goto out_clr_flag; }
if (spa->mm != mm) { - if (printk_ratelimit()) - pr_err("share pool: unshare uva(to task) failed, " - "spa not belong to the task\n"); + pr_err_ratelimited("share pool: unshare uva(to task) failed, spa not belong to the task\n"); ret = -EINVAL; mmput(mm); goto out_drop_area; @@ -2236,9 +2176,7 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size, int pid, int sp } } else if (spa->type == SPA_TYPE_K2SPG) { if (spg_id < 0) { - if (printk_ratelimit()) - pr_err("share pool: unshare uva(to group) failed, " - "invalid spg id %d\n", spg_id); + pr_err_ratelimited("share pool: unshare uva(to group) failed, invalid spg id %d\n", spg_id); ret = -EINVAL; goto out_drop_area; } @@ -2252,18 +2190,16 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size, int pid, int sp down_read(&spa->spg->rw_lock); if (!spg_valid(spa->spg)) { up_read(&spa->spg->rw_lock); - if (printk_ratelimit()) - pr_info("share pool: no need to unshare uva(to group), " - "sp group of spa is dead\n"); + pr_info_ratelimited("share pool: no need to unshare uva(to group), " + "sp group of spa is dead\n"); goto out_clr_flag; } up_read(&spa->spg->rw_lock);
/* alway allow kthread and dvpp channel destroy procedure */ if (current->mm && current->mm->sp_group != spa->spg) { - if (printk_ratelimit()) - pr_err("share pool: unshare uva(to group) failed, " - "caller process doesn't belong to target group\n"); + pr_err_ratelimited("share pool: unshare uva(to group) failed, " + "caller process doesn't belong to target group\n"); ret = -EINVAL; goto out_drop_area; } @@ -2318,14 +2254,12 @@ static int sp_unshare_kva(unsigned long kva, unsigned long size) step = PAGE_SIZE; is_hugepage = false; } else { - if (printk_ratelimit()) - pr_err("share pool: check vmap hugepage failed, ret %d\n", ret); + pr_err_ratelimited("share pool: check vmap hugepage failed, ret %d\n", ret); return -EINVAL; }
if (kva_aligned + size_aligned < kva_aligned) { - if (printk_ratelimit()) - pr_err("share pool: overflow happened in unshare kva\n"); + pr_err_ratelimited("share pool: overflow happened in unshare kva\n"); return -EINVAL; }
@@ -2371,8 +2305,7 @@ int sp_unshare(unsigned long va, unsigned long size, int pid, int spg_id) ret = sp_unshare_kva(va, size); } else { /* regard user and kernel address ranges as bad address */ - if (printk_ratelimit()) - pr_err("share pool: unshare addr %pK is not a user or kernel addr", (void *)va); + pr_debug("share pool: unshare addr %lx is not a user or kernel addr\n", (unsigned long)va); ret = -EFAULT; }
@@ -2393,8 +2326,7 @@ int sp_walk_page_range(unsigned long uva, unsigned long size, check_interrupt_context();
if (unlikely(!sp_walk_data)) { - if (printk_ratelimit()) - pr_err("share pool: null pointer when walk page range\n"); + pr_err_ratelimited("share pool: null pointer when walk page range\n"); return -EINVAL; } if (!tsk || (tsk->flags & PF_EXITING))
From: Tang Yizhou tangyizhou@huawei.com
ascend inclusion category: bugfix bugzilla: 50615 CVE: NA
-------------------------------------------------
We encounter a call trace as follows:
[ 243.545984] Call trace: [ 243.545984] find_vma+0x90/0xa0 [ 243.545985] find_extend_vma+0x2c/0xd0 [ 243.545985] __get_user_pages+0x94/0x378 [ 243.545985] get_dump_page+0x50/0x80 [ 243.545986] elf_core_dump+0x560/0x8d8 [ 243.545986] do_coredump+0x508/0xe40 [ 243.545986] get_signal+0x130/0x788 [ 243.545987] do_signal+0x1d4/0x290 [ 243.545987] do_notify_resume+0x150/0x1c0 [ 243.545988] work_pending+0x8/0x10
elf_core_dump() doesn't hold mmap_sem because the other threads in the same thread group are killed and blocked in exit_mm(), waiting for calling coredump_finish().
However, share pool operations can modify the mm of any process in the same share group and lead to concurrent problems when coredump happens.
Solution: in share pool operations, check whether coredump happened with mm->core_state.
Signed-off-by: Tang Yizhou tangyizhou@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/share_pool.c | 52 ++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 5 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 3037b51a25e36..3761bf4c3bed5 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -696,6 +696,15 @@ int sp_group_add_task(int pid, int spg_id) }
down_write(&mm->mmap_sem); + if (unlikely(mm->core_state)) { + sp_munmap_task_areas(mm, &spa->link); + up_write(&mm->mmap_sem); + ret = -EBUSY; + pr_err("share pool: task add group: encountered coredump, abort\n"); + spin_lock(&sp_area_lock); + break; + } + addr = sp_mmap(mm, file, spa, &populate); if (IS_ERR_VALUE(addr)) { sp_munmap_task_areas(mm, &spa->link); @@ -1110,6 +1119,11 @@ static void sp_munmap(struct mm_struct *mm, unsigned long addr, int err;
down_write(&mm->mmap_sem); + if (unlikely(mm->core_state)) { + up_write(&mm->mmap_sem); + pr_info("share pool: munmap: encoutered coredump\n"); + return; + }
err = do_munmap(mm, addr, size, NULL); if (err) { @@ -1351,6 +1365,12 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) struct vm_area_struct *vma;
down_write(&mm->mmap_sem); + if (unlikely(mm->core_state)) { + up_write(&mm->mmap_sem); + pr_info("share pool: allocation encountered coredump\n"); + continue; + } + mmap_addr = sp_mmap(mm, file, spa, &populate); if (IS_ERR_VALUE(mmap_addr)) { up_write(&mm->mmap_sem); @@ -1521,6 +1541,11 @@ static unsigned long sp_remap_kva_to_vma(unsigned long kva, struct sp_area *spa, }
down_write(&mm->mmap_sem); + if (unlikely(mm->core_state)) { + pr_err("share pool: k2u mmap: encountered coredump, abort\n"); + ret_addr = -EBUSY; + goto put_mm; + }
ret_addr = sp_mmap(mm, file, spa, &populate); if (IS_ERR_VALUE(ret_addr)) { @@ -2002,7 +2027,7 @@ void *sp_make_share_u2k(unsigned long uva, unsigned long size, int pid) int ret = 0; struct task_struct *tsk; struct mm_struct *mm; - void *p = ERR_PTR(-ENODEV); + void *p = ERR_PTR(-ESRCH); struct sp_walk_data sp_walk_data = { .page_count = 0, }; @@ -2017,15 +2042,20 @@ void *sp_make_share_u2k(unsigned long uva, unsigned long size, int pid) else get_task_struct(tsk); rcu_read_unlock(); - if (ret) { - p = ERR_PTR(ret); + if (ret) goto out; - }
mm = get_task_mm(tsk); if (mm == NULL) goto out_put_task; down_write(&mm->mmap_sem); + if (unlikely(mm->core_state)) { + up_write(&mm->mmap_sem); + pr_err("share pool: u2k: encountered coredump, abort\n"); + mmput(mm); + goto out_put_task; + } + ret = __sp_walk_page_range(uva, size, mm, &sp_walk_data); if (ret) { pr_err_ratelimited("share pool: walk page range failed, ret %d\n", ret); @@ -2166,6 +2196,13 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size, int pid, int sp }
down_write(&mm->mmap_sem); + if (unlikely(mm->core_state)) { + ret = 0; + up_write(&mm->mmap_sem); + mmput(mm); + goto out_drop_area; + } + ret = do_munmap(mm, uva_aligned, size_aligned, NULL); up_write(&mm->mmap_sem); mmput(mm); @@ -2341,7 +2378,12 @@ int sp_walk_page_range(unsigned long uva, unsigned long size,
sp_walk_data->page_count = 0; down_write(&mm->mmap_sem); - ret = __sp_walk_page_range(uva, size, mm, sp_walk_data); + if (likely(!mm->core_state)) + ret = __sp_walk_page_range(uva, size, mm, sp_walk_data); + else { + pr_err("share pool: walk page range: encoutered coredump\n"); + ret = -ESRCH; + } up_write(&mm->mmap_sem);
mmput(mm);
From: Tang Yizhou tangyizhou@huawei.com
ascend inclusion category: feature bugzilla: 50615 CVE: NA
-------------------------------------------------
/proc/sharepool/spa_stat now can show the pid of applier process, which helps to debug and check memleak.
Signed-off-by: Tang Yizhou tangyizhou@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/share_pool.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 3761bf4c3bed5..ab7244de8850f 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -213,6 +213,7 @@ struct sp_area { enum spa_type type; /* where spa born from */ struct mm_struct *mm; /* owner of k2u(task) */ unsigned long kva; /* shared kva */ + pid_t applier; /* the original applier process */ }; static DEFINE_SPINLOCK(sp_area_lock); static struct rb_root sp_area_root = RB_ROOT; @@ -797,7 +798,8 @@ static unsigned long cached_vstart; /* affected by SP_DVPP and sp_config_dvpp_r * Return NULL if fail. */ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, - struct sp_group *spg, enum spa_type type) + struct sp_group *spg, enum spa_type type, + pid_t applier) { struct sp_area *spa, *first, *err; struct rb_node *n; @@ -914,6 +916,7 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, spa->type = type; spa->mm = NULL; spa->kva = 0; /* NULL pointer */ + spa->applier = applier;
if (spa_inc_usage(type, size, (flags & SP_DVPP))) { err = ERR_PTR(-EINVAL); @@ -1350,7 +1353,7 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) size_aligned = ALIGN(size, PAGE_SIZE); } try_again: - spa = sp_alloc_area(size_aligned, sp_flags, spg, SPA_TYPE_ALLOC); + spa = sp_alloc_area(size_aligned, sp_flags, spg, SPA_TYPE_ALLOC, current->tgid); if (IS_ERR(spa)) { pr_err_ratelimited("share pool: allocation failed due to alloc spa failure " "(potential no enough virtual memory when -75): %ld\n", PTR_ERR(spa)); @@ -1758,7 +1761,7 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size, uva = ERR_PTR(-EINVAL); goto out_drop_proc_stat; } - spa = sp_alloc_area(size_aligned, sp_flags, NULL, SPA_TYPE_K2TASK); + spa = sp_alloc_area(size_aligned, sp_flags, NULL, SPA_TYPE_K2TASK, tsk->tgid); if (IS_ERR(spa)) { pr_err_ratelimited("share pool: k2u(task) failed due to alloc spa failure " "(potential no enough virtual memory when -75): %ld\n", PTR_ERR(spa)); @@ -1787,9 +1790,9 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size, }
if (enable_share_k2u_spg) - spa = sp_alloc_area(size_aligned, sp_flags, spg, SPA_TYPE_K2SPG); + spa = sp_alloc_area(size_aligned, sp_flags, spg, SPA_TYPE_K2SPG, tsk->tgid); else - spa = sp_alloc_area(size_aligned, sp_flags, NULL, SPA_TYPE_K2TASK); + spa = sp_alloc_area(size_aligned, sp_flags, NULL, SPA_TYPE_K2TASK, tsk->tgid);
if (IS_ERR(spa)) { up_read(&spg->rw_lock); @@ -2577,7 +2580,7 @@ static void rb_spa_stat_show(struct seq_file *seq) up_read(&spa->spg->rw_lock); }
- seq_printf(seq, "%2s%-14lx %2s%-14lx %-13ld ", + seq_printf(seq, "%2s%-14lx %2s%-14lx %-10ld ", "0x", spa->va_start, "0x", spa->va_end, byte2kb(spa->real_size)); @@ -2602,7 +2605,8 @@ static void rb_spa_stat_show(struct seq_file *seq) else seq_printf(seq, "%-5s ", "N");
- seq_printf(seq, "%-10d\n", atomic_read(&spa->use_count)); + seq_printf(seq, "%-8d ", spa->applier); + seq_printf(seq, "%-8d\n", atomic_read(&spa->use_count));
spin_lock(&sp_area_lock); } @@ -2712,8 +2716,8 @@ static int spa_stat_show(struct seq_file *seq, void *offset) spg_overview_show(seq); spa_overview_show(seq); /* print the file header */ - seq_printf(seq, "%-10s %-16s %-16s %-13s %-7s %-5s %-10s\n", - "Group ID", "va_start", "va_end", "Aligned KB", "Type", "Huge", "Ref"); + seq_printf(seq, "%-10s %-16s %-16s %-10s %-7s %-5s %-8s %-8s\n", + "Group ID", "va_start", "va_end", "Size(KB)", "Type", "Huge", "PID", "Ref"); rb_spa_stat_show(seq); return 0; }
From: Tang Yizhou tangyizhou@huawei.com
ascend inclusion category: feature bugzilla: 50615 CVE: NA
-------------------------------------------------
Let function names be general. Rename buff_vzalloc_user to vzalloc_user_account. Rename buff_vzalloc_hugepage_user to vzalloc_hugepage_user_account.
To support NUMA configuration, we also introduce parameter *node*, which means NUMA node id.
Signed-off-by: Tang Yizhou tangyizhou@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/linux/share_pool.h | 8 ++++---- mm/share_pool.c | 18 ++++++++++-------- 2 files changed, 14 insertions(+), 12 deletions(-)
diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index 859efd3525f35..b3041654084d6 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -253,8 +253,8 @@ vm_fault_t sharepool_no_page(struct mm_struct *mm,
extern void *vmalloc_hugepage(unsigned long size); extern void *vmalloc_hugepage_user(unsigned long size); -extern void *buff_vzalloc_user(unsigned long size); -extern void *buff_vzalloc_hugepage_user(unsigned long size); +extern void *vzalloc_user_account(unsigned long size, int node); +extern void *vzalloc_hugepage_user_account(unsigned long size, int node);
void sp_exit_mm(struct mm_struct *mm);
@@ -456,12 +456,12 @@ static inline void *vmalloc_hugepage_user(unsigned long size) return NULL; }
-static inline void *buff_vzalloc_user(unsigned long size) +static inline void *vzalloc_user_account(unsigned long size, int node) { return NULL; }
-static inline void *buff_vzalloc_hugepage_user(unsigned long size) +static inline void *vzalloc_hugepage_user_account(unsigned long size, int node) { return NULL; } diff --git a/mm/share_pool.c b/mm/share_pool.c index ab7244de8850f..c85101434792e 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -3043,9 +3043,10 @@ void *vmalloc_hugepage_user(unsigned long size) EXPORT_SYMBOL(vmalloc_hugepage_user);
/** - * buff_vzalloc_user - allocate zeroed virtually contiguous memory + * vzalloc_user_account - allocate zeroed virtually contiguous memory * for userspace * @size: allocation size + * @node: NUMA node id * * The resulting memory area is zeroed so it can be mapped to userspace * without leaking data. @@ -3053,19 +3054,20 @@ EXPORT_SYMBOL(vmalloc_hugepage_user); * Compare to vmalloc_user(), this is a customized function because * __GFP_ACCOUNT is used to limit memory usage. */ -void *buff_vzalloc_user(unsigned long size) +void *vzalloc_user_account(unsigned long size, int node) { return __vmalloc_node_range(size, SHMLBA, VMALLOC_START, VMALLOC_END, GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT, PAGE_KERNEL, - VM_USERMAP, NUMA_NO_NODE, + VM_USERMAP, node, __builtin_return_address(0)); } -EXPORT_SYMBOL(buff_vzalloc_user); +EXPORT_SYMBOL(vzalloc_user_account);
/** - * buff_vzalloc_hugepage_user - allocate virtually contiguous hugetlb memory + * vzalloc_hugepage_user_account - allocate virtually contiguous hugetlb memory * for userspace * @size: allocation size + * @node: NUMA node id * * Allocate enough huge pages to cover @size and map them into * contiguous kernel virtual space. The resulting memory area @@ -3076,17 +3078,17 @@ EXPORT_SYMBOL(buff_vzalloc_user); * Compare to vmalloc_hugepage_user(), this is a customized function because * __GFP_ACCOUNT is used to limit memory usage. */ -void *buff_vzalloc_hugepage_user(unsigned long size) +void *vzalloc_hugepage_user_account(unsigned long size, int node) { /* PMD hugepage aligned */ size = PMD_ALIGN(size);
return __vmalloc_node_range(size, PMD_SIZE, VMALLOC_START, VMALLOC_END, GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT, PAGE_KERNEL, - VM_HUGE_PAGES | VM_USERMAP, NUMA_NO_NODE, + VM_HUGE_PAGES | VM_USERMAP, node, __builtin_return_address(0)); } -EXPORT_SYMBOL(buff_vzalloc_hugepage_user); +EXPORT_SYMBOL(vzalloc_hugepage_user_account);
int enable_ascend_share_pool;
From: Ding Tianhong dingtianhong@huawei.com
ascend inclusion category: perf bugzilla: 50615 CVE: NA
-------------------------------------------------
We decide to remove sp_mutex thoroughly and use sp_group_sem instead.
Moreover, we use down_read(&spg->rw_lock) instead of down_write(), this increases parallelism of sp_add_group_task(), especially when there are many normal pages in the target sp_group.
Test: First a process is added into sp_group 1, then it calls sp_alloc() to get 3.5G normal page memory and 5.5G hugepage memory. Then 30 processes are created and added to sp_group 1 concurrently. The result is as follows.
without this patch: 15.0s with this patch: 4.5s gain: 70%
Tested-by: Tang Yizhou tangyizhou@huawei.com Signed-off-by: Ding Tianhong dingtianhong@huawei.com Signed-off-by: Tang Yizhou tangyizhou@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/share_pool.c | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index c85101434792e..f6603e4c87e5d 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -71,9 +71,6 @@ int sysctl_sp_debug_mode;
int sysctl_share_pool_map_lock_enable;
-/* for inter-group operations */ -static DEFINE_MUTEX(sp_mutex); - /* idr of all sp_groups */ static DEFINE_IDR(sp_group_idr); /* rw semaphore for sp_group_idr */ @@ -412,13 +409,13 @@ int sp_group_id_by_pid(int pid) } EXPORT_SYMBOL_GPL(sp_group_id_by_pid);
+/* the caller must hold sp_group_sem */ static struct sp_group *find_or_alloc_sp_group(int spg_id) { struct sp_group *spg; int ret; char name[20];
- down_write(&sp_group_sem); spg = __sp_find_spg_locked(current->pid, spg_id);
if (!spg) { @@ -432,7 +429,6 @@ static struct sp_group *find_or_alloc_sp_group(int spg_id) } ret = idr_alloc(&sp_group_idr, spg, spg_id, spg_id + 1, GFP_KERNEL); - up_write(&sp_group_sem); if (ret < 0) { pr_err_ratelimited("share pool: create group idr alloc failed\n"); goto out_kfree; @@ -472,7 +468,6 @@ static struct sp_group *find_or_alloc_sp_group(int spg_id) goto out_fput; } } else { - up_write(&sp_group_sem); down_read(&spg->rw_lock); if (!spg_valid(spg)) { up_read(&spg->rw_lock); @@ -488,9 +483,7 @@ static struct sp_group *find_or_alloc_sp_group(int spg_id) out_fput: fput(spg->file); out_idr: - down_write(&sp_group_sem); idr_remove(&sp_group_idr, spg_id); - up_write(&sp_group_sem); out_kfree: kfree(spg); return ERR_PTR(ret); @@ -604,7 +597,7 @@ int sp_group_add_task(int pid, int spg_id) id_newly_generated = true; }
- mutex_lock(&sp_mutex); + down_write(&sp_group_sem);
rcu_read_lock();
@@ -618,6 +611,7 @@ int sp_group_add_task(int pid, int spg_id) if (ret) { if (id_newly_generated) free_sp_group_id((unsigned int)spg_id); + up_write(&sp_group_sem); goto out_unlock; }
@@ -636,9 +630,11 @@ int sp_group_add_task(int pid, int spg_id) mm = get_task_mm(tsk->group_leader); if (!mm) { ret = -ESRCH; + up_write(&sp_group_sem); goto out_put_task; } else if (mm->sp_group) { ret = -EEXIST; + up_write(&sp_group_sem); goto out_put_mm; }
@@ -647,6 +643,7 @@ int sp_group_add_task(int pid, int spg_id) ret = PTR_ERR(spg); if (id_newly_generated) free_sp_group_id((unsigned int)spg_id); + up_write(&sp_group_sem); goto out_put_mm; }
@@ -654,10 +651,14 @@ int sp_group_add_task(int pid, int spg_id) if (sysctl_ac_mode == AC_SINGLE_OWNER) { if (spg->owner != current->group_leader) { ret = -EPERM; + up_write(&sp_group_sem); goto out_drop_group; } }
+ mm->sp_group = spg; + up_write(&sp_group_sem); + /* per process statistics initialization */ stat = sp_init_proc_stat(tsk, mm); if (IS_ERR(stat)) { @@ -666,10 +667,7 @@ int sp_group_add_task(int pid, int spg_id) goto out_drop_group; }
- mm->sp_group = spg; - - down_write(&spg->rw_lock); - list_add_tail(&mm->sp_node, &spg->procs); + down_read(&spg->rw_lock); /* * create mappings of existing shared memory segments into this * new process' page table. @@ -735,22 +733,25 @@ int sp_group_add_task(int pid, int spg_id) } __sp_area_drop_locked(prev); spin_unlock(&sp_area_lock); + up_read(&spg->rw_lock);
- if (unlikely(ret)) { - /* spg->procs is modified, spg->rw_lock should be put below */ - list_del(&mm->sp_node); - mm->sp_group = NULL; - } - up_write(&spg->rw_lock); + sp_proc_stat_drop(stat); /* match with sp_init_proc_stat */
/* double drop when fail: ensure release stat */ if (unlikely(ret)) sp_proc_stat_drop(stat); - sp_proc_stat_drop(stat); /* match with sp_init_proc_stat */
out_drop_group: - if (unlikely(ret)) + if (unlikely(ret)) { + down_write(&sp_group_sem); + mm->sp_group = NULL; + up_write(&sp_group_sem); sp_group_drop(spg); + } else { + down_write(&spg->rw_lock); + list_add_tail(&mm->sp_node, &spg->procs); + up_write(&spg->rw_lock); + } out_put_mm: /* No need to put the mm if the sp group adds this mm successfully */ if (unlikely(ret)) @@ -758,7 +759,6 @@ int sp_group_add_task(int pid, int spg_id) out_put_task: put_task_struct(tsk); out_unlock: - mutex_unlock(&sp_mutex); return ret == 0 ? spg_id : ret; } EXPORT_SYMBOL_GPL(sp_group_add_task);
From: Tang Yizhou tangyizhou@huawei.com
ascend inclusion category: feature bugzilla: 50615 CVE: NA
-------------------------------------------------
We find there are two differnt errnos returned when do_mm_polulate is called and current thread is being killed, which may confuse logger readers.
1. do_mm_populate -> populate_vma_page_range -> __get_user_pages. if fatal_signal_pending fails, return -ERESTARTSYS(-512). 2. __get_user_pages -> follow_hugetlb_page. if fatal_signal_pending fails, return -EFAULT(-14).
So we call fatal_signal_pending after do_mm_populte fails in share pool and give info about the thread killing event.
Signed-off-by: Tang Yizhou tangyizhou@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/share_pool.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index f6603e4c87e5d..28fc074791833 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -718,8 +718,11 @@ int sp_group_add_task(int pid, int spg_id) if (populate) { ret = do_mm_populate(mm, spa->va_start, populate, 0); if (ret) { - pr_warn_ratelimited("share pool: task add group failed, mm populate failed " - "(potential no enough memory when -12): %d, spa type is %d\n", + if (unlikely(fatal_signal_pending(current))) + pr_warn_ratelimited("share pool: task add group failed, current thread is killed\n"); + else + pr_warn_ratelimited("share pool: task add group failed, mm populate failed " + "(potential no enough memory when -12): %d, spa type is %d\n", ret, spa->type); down_write(&mm->mmap_sem); sp_munmap_task_areas(mm, spa->link.next); @@ -1427,10 +1430,12 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) sp_add_work_compact(); } if (ret) { - __sp_free(spg, sp_addr, size_aligned, - list_next_entry(mm, sp_node)); - pr_warn_ratelimited("share pool: allocation failed due to mm populate failed" - "(potential no enough memory when -12): %d\n", ret); + __sp_free(spg, sp_addr, size_aligned, list_next_entry(mm, sp_node)); + if (unlikely(fatal_signal_pending(current))) + pr_warn_ratelimited("share pool: allocation failed, current thread is killed\n"); + else + pr_warn_ratelimited("share pool: allocation failed due to mm populate failed" + "(potential no enough memory when -12): %d\n", ret); p = ERR_PTR(ret);
mode = FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE;
From: Liu Shixin liushixin2@huawei.com
hulk inclusion category: bugfix bugzilla: 47240 CVE: NA
-------------------------------------------------
Patch a222f3415868 ("mm: generalize putback scan functions") has combined move_active_pages_to_lru() and putback_inactive_pages() into single move_pages_to_lru(). But we didn't backport this patch so move_active_pages_to_lru() is still existed. When We moved mem_cgroup_uncharge() in 7ae88534cdd9 ("mm: move mem_cgroup_uncharge out of __page_cache_release()"), move_active_pages_to_lru() should be changed together.
Fixes: 7ae88534cdd9 ("mm: move mem_cgroup_uncharge out of __page_cache_release()") Signed-off-by: Liu Shixin liushixin2@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/vmscan.c | 1 - 1 file changed, 1 deletion(-)
diff --git a/mm/vmscan.c b/mm/vmscan.c index 92be608b467b6..6f3c655fc8879 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2068,7 +2068,6 @@ static unsigned move_active_pages_to_lru(struct lruvec *lruvec,
if (unlikely(PageCompound(page))) { spin_unlock_irq(&pgdat->lru_lock); - mem_cgroup_uncharge(page); (*get_compound_page_dtor(page))(page); spin_lock_irq(&pgdat->lru_lock); } else