Wang Wensheng (5): mm/mmap: Don't merge vma from sharepool mm/sharepool: Use mmap_write_[un]lock helper mm/sharepool: Return -ENOMEM when allocate hugepage failed mm/sharepool: Protect the va reserved for sharepool mm/sharepool: Mmap for the current process at first
include/linux/share_pool.h | 31 +++++++++++++-------- mm/mmap.c | 17 +++++++++--- mm/mremap.c | 4 +++ mm/share_pool.c | 56 ++++++++++++++++++++++++-------------- 4 files changed, 73 insertions(+), 35 deletions(-)
ascend inclusion category: Feature bugzilla: https://gitee.com/openeuler/kernel/issues/I7YF5R
---------------------------------------------
The vma mapped from sharepool cannot merge because the sp_area cannot merge. Check this in is_mergeable_vma() instead of vma_merge().
Signed-off-by: Wang Wensheng wangwensheng4@huawei.com --- mm/mmap.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/mm/mmap.c b/mm/mmap.c index 7d04e54494b6..1e5a20d81aa9 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -827,6 +827,10 @@ static inline bool is_mergeable_vma(struct vm_area_struct *vma, return false; if (!anon_vma_name_eq(anon_vma_name(vma), anon_name)) return false; + /* don't merge this kind of vma as sp_area couldn't be merged */ + if (sp_check_vm_share_pool(vm_flags)) + return false; + return true; }
@@ -976,10 +980,6 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm, if (vm_flags & VM_SPECIAL) return NULL;
- /* don't merge this kind of vma as sp_area couldn't be merged */ - if (sp_check_vm_share_pool(vm_flags)) - return NULL; - /* Does the input range span an existing VMA? (cases 5 - 8) */ curr = find_vma_intersection(mm, prev ? prev->vm_end : 0, end);
ascend inclusion category: Feature bugzilla: https://gitee.com/openeuler/kernel/issues/I7YF5R
---------------------------------------------
Use the mmap_write_[un]lock helper. Since the PER_VMA_LOCK series been merged, the mmpa_write_unlock is not equal to down_write(&mm->mmap_lock).
Signed-off-by: Wang Wensheng wangwensheng4@huawei.com --- mm/share_pool.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 1ca5b867147d..66f14fd38d25 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -1777,9 +1777,9 @@ static void sp_munmap(struct mm_struct *mm, unsigned long addr, { int err;
- down_write(&mm->mmap_lock); + mmap_write_lock(mm); if (unlikely(!mmget_not_zero(mm))) { - up_write(&mm->mmap_lock); + mmap_write_unlock(mm); pr_warn("munmap: target mm is exiting\n"); return; } @@ -1789,7 +1789,7 @@ static void sp_munmap(struct mm_struct *mm, unsigned long addr, if (err) pr_err("failed to unmap VA %pK when sp munmap, %d\n", (void *)addr, err);
- up_write(&mm->mmap_lock); + mmap_write_unlock(mm); mmput_async(mm); }
@@ -2090,9 +2090,9 @@ static int sp_map_spa_to_mm(struct mm_struct *mm, struct sp_area *spa, unsigned long mmap_addr; unsigned long populate = 0;
- down_write(&mm->mmap_lock); + mmap_write_lock(mm); if (unlikely(!mmget_not_zero(mm))) { - up_write(&mm->mmap_lock); + mmap_write_unlock(mm); pr_warn("sp_map: target mm is exiting\n"); return SP_SKIP_ERR; } @@ -2100,19 +2100,19 @@ static int sp_map_spa_to_mm(struct mm_struct *mm, struct sp_area *spa, /* when success, mmap_addr == spa->va_start */ mmap_addr = sp_mmap(mm, spa_file(spa), spa, &populate, prot); if (IS_ERR_VALUE(mmap_addr)) { - up_write(&mm->mmap_lock); + mmap_write_unlock(mm); mmput_async(mm); pr_err("%s, sp mmap failed %ld\n", str, mmap_addr); return (int)mmap_addr; }
if (spa->type == SPA_TYPE_ALLOC) { - up_write(&mm->mmap_lock); + mmap_write_unlock(mm); ret = sp_alloc_populate(mm, spa, populate, ac); if (ret) { - down_write(&mm->mmap_lock); + mmap_write_lock(mm); do_munmap(mm, mmap_addr, spa_size(spa), NULL); - up_write(&mm->mmap_lock); + mmap_write_unlock(mm); } } else { ret = sp_k2u_populate(mm, spa); @@ -2120,7 +2120,7 @@ static int sp_map_spa_to_mm(struct mm_struct *mm, struct sp_area *spa, do_munmap(mm, mmap_addr, spa_size(spa), NULL); pr_info("k2u populate failed, %d\n", ret); } - up_write(&mm->mmap_lock); + mmap_write_unlock(mm); } mmput_async(mm);
@@ -2735,11 +2735,11 @@ void *mg_sp_make_share_u2k(unsigned long uva, unsigned long size, int tgid) return ERR_PTR(-EPERM); }
- down_write(&mm->mmap_lock); + mmap_write_lock(mm); ret = __sp_walk_page_range(uva, size, mm, &sp_walk_data); if (ret) { pr_err_ratelimited("walk page range failed %d\n", ret); - up_write(&mm->mmap_lock); + mmap_write_unlock(mm); return ERR_PTR(ret); }
@@ -2749,7 +2749,7 @@ void *mg_sp_make_share_u2k(unsigned long uva, unsigned long size, int tgid) else p = vmap(sp_walk_data.pages, sp_walk_data.page_count, VM_MAP, PAGE_KERNEL); - up_write(&mm->mmap_lock); + mmap_write_unlock(mm);
if (!p) { pr_err("vmap(huge) in u2k failed\n"); @@ -2892,9 +2892,9 @@ int mg_sp_walk_page_range(unsigned long uva, unsigned long size, return -ESRCH; }
- down_write(&mm->mmap_lock); + mmap_write_lock(mm); ret = __sp_walk_page_range(uva, size, mm, sp_walk_data); - up_write(&mm->mmap_lock); + mmap_write_unlock(mm);
mmput(mm); put_task_struct(tsk);
ascend inclusion category: Feature bugzilla: https://gitee.com/openeuler/kernel/issues/I7YF5R
---------------------------------------------
Return -ENOMEM when allocate hugepages failed.
Signed-off-by: Wang Wensheng wangwensheng4@huawei.com --- mm/share_pool.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 66f14fd38d25..1b9498fc0187 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -2064,6 +2064,8 @@ static int sp_alloc_populate(struct mm_struct *mm, struct sp_area *spa, * depends on this feature (and MAP_LOCKED) to work correctly. */ ret = do_mm_populate(mm, spa->va_start, populate, 0); + if (ac && (ac->sp_flags & SP_HUGEPAGE) && unlikely(ret == -EFAULT)) + ret = -ENOMEM; if (ret) { if (unlikely(fatal_signal_pending(current))) pr_warn("allocation failed, current thread is killed\n");
ascend inclusion category: Feature bugzilla: https://gitee.com/openeuler/kernel/issues/I7YF5R
---------------------------------------------
Add protections for the va reserved for sharepool. Forbid mremap/munmap to access that range.
Signed-off-by: Wang Wensheng wangwensheng4@huawei.com --- include/linux/share_pool.h | 31 ++++++++++++++++++++----------- mm/mmap.c | 9 +++++++++ mm/mremap.c | 4 ++++ 3 files changed, 33 insertions(+), 11 deletions(-)
diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index b0711eea4b73..693ceea5999b 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -158,6 +158,23 @@ static inline bool sp_check_vm_share_pool(unsigned long vm_flags) return false; }
+static inline bool sp_check_addr(unsigned long addr) +{ + if (sp_is_enabled() && mg_is_sharepool_addr(addr)) + return true; + else + return false; +} + +static inline bool sp_check_mmap_addr(unsigned long addr, unsigned long flags) +{ + if (sp_is_enabled() && mg_is_sharepool_addr(addr) && + !(flags & MAP_SHARE_POOL)) + return true; + else + return false; +} + #else /* CONFIG_SHARE_POOL */
static inline int mg_sp_group_add_task(int tgid, unsigned long prot, int spg_id) @@ -233,14 +250,6 @@ static inline bool mg_is_sharepool_addr(unsigned long addr) return false; }
-static inline void spa_overview_show(struct seq_file *seq) -{ -} - -static inline void spg_overview_show(struct seq_file *seq) -{ -} - static inline bool sp_is_enabled(void) { return false; @@ -255,14 +264,14 @@ static inline bool sp_check_vm_share_pool(unsigned long vm_flags) return false; }
-static inline bool is_vm_huge_special(struct vm_area_struct *vma) +static inline bool sp_check_addr(unsigned long addr) { return false; }
-static inline int sp_node_id(struct vm_area_struct *vma) +static inline bool sp_check_mmap_addr(unsigned long addr, unsigned long flags) { - return numa_node_id(); + return false; }
#endif /* !CONFIG_SHARE_POOL */ diff --git a/mm/mmap.c b/mm/mmap.c index 1e5a20d81aa9..eb24efdba25d 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1765,6 +1765,9 @@ generic_get_unmapped_area(struct file *filp, unsigned long addr, if (len > mmap_end - mmap_min_addr) return -ENOMEM;
+ if (sp_check_mmap_addr(addr, flags)) + return -EINVAL; + if (flags & MAP_FIXED) return addr;
@@ -1814,6 +1817,9 @@ generic_get_unmapped_area_topdown(struct file *filp, unsigned long addr, if (len > mmap_end - mmap_min_addr) return -ENOMEM;
+ if (sp_check_mmap_addr(addr, flags)) + return -EINVAL; + if (flags & MAP_FIXED) return addr;
@@ -3083,6 +3089,9 @@ static int __vm_munmap(unsigned long start, size_t len, bool downgrade) LIST_HEAD(uf); VMA_ITERATOR(vmi, mm, start);
+ if (sp_check_addr(start)) + return -EINVAL; + if (mmap_write_lock_killable(mm)) return -EINTR;
diff --git a/mm/mremap.c b/mm/mremap.c index b11ce6c92099..add907c0a9af 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -25,6 +25,7 @@ #include <linux/uaccess.h> #include <linux/userfaultfd_k.h> #include <linux/mempolicy.h> +#include <linux/share_pool.h>
#include <asm/cacheflush.h> #include <asm/tlb.h> @@ -936,6 +937,9 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, if (offset_in_page(addr)) return ret;
+ if (sp_check_addr(addr) || sp_check_addr(new_addr)) + return ret; + old_len = PAGE_ALIGN(old_len); new_len = PAGE_ALIGN(new_len);
ascend inclusion category: Feature bugzilla: https://gitee.com/openeuler/kernel/issues/I7YF5R
---------------------------------------------
When the user calling mg_sp_alloc(), the physical memory is allocated in do_mm_populate() for the first process and the pages are charged into the memcg of that process.
This is unreasonable and may cause memcg of first process oom. We should start at the current process so as to charge the pages to the memcg of current process.
Signed-off-by: Wang Wensheng wangwensheng4@huawei.com --- mm/share_pool.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 1b9498fc0187..99a46d9f05a1 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -2129,16 +2129,28 @@ static int sp_map_spa_to_mm(struct mm_struct *mm, struct sp_area *spa, return ret; }
-static int sp_alloc_mmap_populate(struct sp_area *spa, struct sp_alloc_context *ac) +static int sp_alloc_mmap_populate(struct sp_area *spa, struct sp_alloc_context *ac, + struct sp_group_node *spg_node) { - int ret = -EINVAL; + int ret = 0; int mmap_ret = 0; struct mm_struct *mm; - struct sp_group_node *spg_node; + bool reach_current = false; + + mmap_ret = sp_map_spa_to_mm(current->mm, spa, spg_node->prot, ac, "sp_alloc"); + if (mmap_ret) { + /* Don't skip error for current process */ + mmap_ret = (mmap_ret == SP_SKIP_ERR) ? -EINVAL : mmap_ret; + goto fallocate; + }
/* create mapping for each process in the group */ list_for_each_entry(spg_node, &spa->spg->proc_head, proc_node) { mm = spg_node->master->mm; + if (mm == current->mm) { + reach_current = true; + continue; + } mmap_ret = sp_map_spa_to_mm(mm, spa, spg_node->prot, ac, "sp_alloc"); if (mmap_ret) { /* @@ -2158,7 +2170,9 @@ static int sp_alloc_mmap_populate(struct sp_area *spa, struct sp_alloc_context *
unmap: __sp_free(spa, mm); - + if (!reach_current) + sp_munmap(current->mm, spa->va_start, spa_size(spa)); +fallocate: /* * Sometimes do_mm_populate() allocates some memory and then failed to * allocate more. (e.g. memory use reaches cgroup limit.) @@ -2212,7 +2226,7 @@ static void *__mg_sp_alloc_nodemask(unsigned long size, unsigned long sp_flags, goto out; }
- ret = sp_alloc_mmap_populate(spa, &ac); + ret = sp_alloc_mmap_populate(spa, &ac, spg_node); if (ret == -ENOMEM && sp_alloc_fallback(spa, &ac)) goto try_again;
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/2142 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/N...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/2142 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/N...