From: Wang Wensheng wangwensheng4@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S CVE: NA
--------------------------------------------------
This is not used for THP but the user page table is just like THP. The user alloc hugepages via a special driver and its vma is not marked with VM_HUGETLB. This commit allow to share those vma to kernel.
Signed-off-by: Wang Wensheng wangwensheng4@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- include/linux/share_pool.h | 1 + mm/share_pool.c | 44 +++++++++++++++++++++++++++++++++----- 2 files changed, 40 insertions(+), 5 deletions(-)
diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index 6f294911c6af..d95084b8f624 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -178,6 +178,7 @@ struct sp_walk_data { unsigned long uva_aligned; unsigned long page_size; bool is_hugepage; + bool is_page_type_set; pmd_t *pmd; };
diff --git a/mm/share_pool.c b/mm/share_pool.c index 76088952d0a5..60ad48e238c4 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -2994,9 +2994,40 @@ EXPORT_SYMBOL_GPL(mg_sp_make_share_k2u); static int sp_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long next, struct mm_walk *walk) { + struct page *page; struct sp_walk_data *sp_walk_data = walk->private;
+ /* + * There exist a scene in DVPP where the pagetable is huge page but its + * vma doesn't record it, something like THP. + * So we cannot make out whether it is a hugepage map until we access the + * pmd here. If mixed size of pages appear, just return an error. + */ + if (pmd_huge(*pmd)) { + if (!sp_walk_data->is_page_type_set) { + sp_walk_data->is_page_type_set = true; + sp_walk_data->is_hugepage = true; + } else if (!sp_walk_data->is_hugepage) + return -EFAULT; + + /* To skip pte level walk */ + walk->action = ACTION_CONTINUE; + + page = pmd_page(*pmd); + get_page(page); + sp_walk_data->pages[sp_walk_data->page_count++] = page; + + return 0; + } + + if (!sp_walk_data->is_page_type_set) { + sp_walk_data->is_page_type_set = true; + sp_walk_data->is_hugepage = false; + } else if (sp_walk_data->is_hugepage) + return -EFAULT; + sp_walk_data->pmd = pmd; + return 0; }
@@ -3140,6 +3171,8 @@ static int __sp_walk_page_range(unsigned long uva, unsigned long size, sp_walk.pmd_entry = sp_pmd_entry; }
+ sp_walk_data->is_page_type_set = false; + sp_walk_data->page_count = 0; sp_walk_data->page_size = page_size; uva_aligned = ALIGN_DOWN(uva, page_size); sp_walk_data->uva_aligned = uva_aligned; @@ -3164,8 +3197,12 @@ static int __sp_walk_page_range(unsigned long uva, unsigned long size,
ret = walk_page_range(mm, uva_aligned, uva_aligned + size_aligned, &sp_walk, sp_walk_data); - if (ret) + if (ret) { + while (sp_walk_data->page_count--) + put_page(pages[sp_walk_data->page_count]); kvfree(pages); + sp_walk_data->pages = NULL; + }
return ret; } @@ -3201,9 +3238,7 @@ void *sp_make_share_u2k(unsigned long uva, unsigned long size, int pid) int ret = 0; struct mm_struct *mm = current->mm; void *p = ERR_PTR(-ESRCH); - struct sp_walk_data sp_walk_data = { - .page_count = 0, - }; + struct sp_walk_data sp_walk_data; struct vm_struct *area;
check_interrupt_context(); @@ -3544,7 +3579,6 @@ int sp_walk_page_range(unsigned long uva, unsigned long size, return -ESRCH; }
- sp_walk_data->page_count = 0; down_write(&mm->mmap_lock); if (likely(!mm->core_state)) ret = __sp_walk_page_range(uva, size, mm, sp_walk_data);
From: Guo Mengqi guomengqi3@huawei.com
ascend inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S CVE: NA
-------------------------------------------------
Fix following situation:
when the last process in a group exits, and a second process tries to add to this group.
The second process may get a invalid spg. However the group's use_count is increased by 1, which caused the first process failed to free the group when it exits. And then second process called sp_group_drop --> free_sp_group and cause a double request of rwsem.
Signed-off-by: Guo Mengqi guomengqi3@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- mm/share_pool.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 60ad48e238c4..a8266a4a7e79 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -698,20 +698,25 @@ static void free_new_spg_id(bool new, int spg_id) free_sp_group_id(spg_id); }
-static void free_sp_group(struct sp_group *spg) +static void free_sp_group_locked(struct sp_group *spg) { fput(spg->file); fput(spg->file_hugetlb); free_spg_stat(spg->id); - down_write(&sp_group_sem); idr_remove(&sp_group_idr, spg->id); - up_write(&sp_group_sem); free_sp_group_id((unsigned int)spg->id); kfree(spg); system_group_count--; WARN(system_group_count < 0, "unexpected group count\n"); }
+static void free_sp_group(struct sp_group *spg) +{ + down_write(&sp_group_sem); + free_sp_group_locked(spg); + up_write(&sp_group_sem); +} + static void sp_group_drop(struct sp_group *spg) { if (atomic_dec_and_test(&spg->use_count)) @@ -4473,14 +4478,15 @@ void sp_group_post_exit(struct mm_struct *mm) sp_proc_stat_drop(stat); }
- /* lockless traverse */ + down_write(&sp_group_sem); list_for_each_entry_safe(spg_node, tmp, &master->node_list, group_node) { spg = spg_node->spg; /* match with refcount inc in sp_group_add_task */ - sp_group_drop(spg); + if (atomic_dec_and_test(&spg->use_count)) + free_sp_group_locked(spg); kfree(spg_node); } - + up_write(&sp_group_sem); kfree(master); }
From: Wang Wensheng wangwensheng4@huawei.com
ascend inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S CVE: NA
-------------------------------------------------
We use device_id to select the correct dvpp vspace range when SP_DVPP flag is specified.
Signed-off-by: Wang Wensheng wangwensheng4@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- mm/share_pool.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index a8266a4a7e79..087e8f8cbfb3 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -2852,10 +2852,11 @@ static int sp_k2u_prepare(unsigned long kva, unsigned long size,
trace_sp_k2u_begin(kc);
- if (sp_flags & ~SP_DVPP) { + if (sp_flags & ~SP_FLAG_MASK) { pr_err_ratelimited("k2u sp_flags %lx error\n", sp_flags); return -EINVAL; } + sp_flags &= ~SP_HUGEPAGE;
if (!current->mm) { pr_err_ratelimited("k2u: kthread is not allowed\n");
From: Guo Mengqi guomengqi3@huawei.com
ascend inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S CVE: NA
-----------------------------------
In sp_mmap(), if use offset = va - MMAP_BASE/DVPP_BASE, then normal sp_alloc pgoff may have same value with DVPP pgoff, causing DVPP and sp_alloc mapped to overlapped part of file unexpectedly.
To fix the problem, pass VA value as mmap offset, for in this scenario, VA value in one task address space will not be same.
Signed-off-by: Guo Mengqi guomengqi3@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- mm/share_pool.c | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 087e8f8cbfb3..29bbe0732781 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -58,6 +58,11 @@
#define spg_valid(spg) ((spg)->is_alive == true)
+/* Use spa va address as mmap offset. This can work because spa_file + * is setup with 64-bit address space. So va shall be well covered. + */ +#define addr_offset(spa) ((spa)->va_start) + #define byte2kb(size) ((size) >> 10) #define byte2mb(size) ((size) >> 20) #define page2kb(page_num) ((page_num) << (PAGE_SHIFT - 10)) @@ -931,22 +936,6 @@ static bool is_device_addr(unsigned long addr) return false; }
-static loff_t addr_offset(struct sp_area *spa) -{ - unsigned long addr; - - if (unlikely(!spa)) { - WARN(1, "invalid spa when calculate addr offset\n"); - return 0; - } - addr = spa->va_start; - - if (!is_device_addr(addr)) - return (loff_t)(addr - MMAP_SHARE_POOL_START); - - return (loff_t)(addr - sp_dev_va_start[spa->device_id]); -} - static struct sp_group *create_spg(int spg_id) { int ret;
From: Zhou Guanghui zhouguanghui1@huawei.com
ascend inclusion category: Bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S CVE: NA
--------------------------------
When the driver uses the shared pool memory to share the memory with the user space, the user space is not allowed to operate this area. This prevents users from damaging sensitive data.
When the sp_alloc and k2u processes apply for private memory, read-only memory can be applied for.
Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- include/linux/share_pool.h | 3 ++- mm/share_pool.c | 18 ++++++++++++++++-- 2 files changed, 18 insertions(+), 3 deletions(-)
diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index d95084b8f624..022e61bb6ce4 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -15,6 +15,7 @@ #define SP_HUGEPAGE_ONLY (1 << 1) #define SP_DVPP (1 << 2) #define SP_SPEC_NODE_ID (1 << 3) +#define SP_PROT_RO (1 << 16)
#define DEVICE_ID_BITS 4UL #define DEVICE_ID_MASK ((1UL << DEVICE_ID_BITS) - 1UL) @@ -24,7 +25,7 @@ #define NODE_ID_SHIFT (DEVICE_ID_SHIFT + DEVICE_ID_BITS)
#define SP_FLAG_MASK (SP_HUGEPAGE | SP_HUGEPAGE_ONLY | SP_DVPP | \ - SP_SPEC_NODE_ID | \ + SP_SPEC_NODE_ID | SP_PROT_RO | \ (DEVICE_ID_MASK << DEVICE_ID_SHIFT) | \ (NODE_ID_MASK << NODE_ID_SHIFT))
diff --git a/mm/share_pool.c b/mm/share_pool.c index 29bbe0732781..8be3f75d0449 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -2325,6 +2325,9 @@ static int sp_alloc_mmap(struct mm_struct *mm, struct sp_area *spa, if (spg_node) prot = spg_node->prot;
+ if (ac->sp_flags & SP_PROT_RO) + prot = PROT_READ; + /* when success, mmap_addr == spa->va_start */ mmap_addr = sp_mmap(mm, spa_file(spa), spa, &populate, prot); if (IS_ERR_VALUE(mmap_addr)) { @@ -2349,6 +2352,10 @@ static int sp_alloc_mmap(struct mm_struct *mm, struct sp_area *spa, ret = -EINVAL; goto unmap; } + + if (ac->sp_flags & SP_PROT_RO) + vma->vm_flags &= ~VM_MAYWRITE; + /* clean PTE_RDONLY flags or trigger SMMU event */ if (prot & PROT_WRITE) vma->vm_page_prot = __pgprot(((~PTE_RDONLY) & vma->vm_page_prot.pgprot) | PTE_DIRTY); @@ -2644,6 +2651,9 @@ static unsigned long sp_remap_kva_to_vma(unsigned long kva, struct sp_area *spa, goto put_mm; }
+ if (kc && kc->sp_flags & SP_PROT_RO) + prot = PROT_READ; + ret_addr = sp_mmap(mm, spa_file(spa), spa, &populate, prot); if (IS_ERR_VALUE(ret_addr)) { pr_debug("k2u mmap failed %lx\n", ret_addr); @@ -2656,6 +2666,9 @@ static unsigned long sp_remap_kva_to_vma(unsigned long kva, struct sp_area *spa, if (prot & PROT_WRITE) vma->vm_page_prot = __pgprot(((~PTE_RDONLY) & vma->vm_page_prot.pgprot) | PTE_DIRTY);
+ if (kc && kc->sp_flags & SP_PROT_RO) + vma->vm_flags &= ~VM_MAYWRITE; + if (is_vm_hugetlb_page(vma)) { ret = remap_vmalloc_hugepage_range(vma, (void *)kva, 0); if (ret) { @@ -2707,6 +2720,7 @@ static void *sp_make_share_kva_to_task(unsigned long kva, unsigned long size, un struct sp_area *spa; struct spg_proc_stat *stat; unsigned long prot = PROT_READ | PROT_WRITE; + struct sp_k2u_context kc;
down_write(&sp_group_sem); stat = sp_init_process_stat(current, current->mm, spg_none); @@ -2725,8 +2739,8 @@ static void *sp_make_share_kva_to_task(unsigned long kva, unsigned long size, un }
spa->kva = kva; - - uva = (void *)sp_remap_kva_to_vma(kva, spa, current->mm, prot, NULL); + kc.sp_flags = sp_flags; + uva = (void *)sp_remap_kva_to_vma(kva, spa, current->mm, prot, &kc); __sp_area_drop(spa); if (IS_ERR(uva)) pr_err("remap k2u to task failed %ld\n", PTR_ERR(uva));
From: Wang Wensheng wangwensheng4@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S CVE: NA
-------------------------------------------------
Add the missing initialization for kc.sp_flag in sp_make_share_kva_to_spg(). Or a random value would be used in sp_remap_kva_to_vma().
Signed-off-by: Wang Wensheng wangwensheng4@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- mm/share_pool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 8be3f75d0449..edba445d4bbf 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -2781,7 +2781,7 @@ static void *sp_make_share_kva_to_spg(unsigned long kva, unsigned long size, }
spa->kva = kva; - + kc.sp_flags = sp_flags; list_for_each_entry(spg_node, &spg->procs, proc_node) { mm = spg_node->master->mm; kc.state = K2U_NORMAL;
From: Yuan Can yuancan@huawei.com
ascend inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S CVE: NA
------------------------------------------------------
create_spg_node() may fail with NULL pointer returened, and in the out_drop_spg_node path, the NULL pointer will be dereferenced in free_spg_node().
Signed-off-by: Yuan Can yuancan@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- mm/share_pool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index edba445d4bbf..caf3e89b41c4 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -1283,7 +1283,7 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) node = create_spg_node(mm, prot, spg); if (unlikely(IS_ERR(node))) { ret = PTR_ERR(node); - goto out_drop_spg_node; + goto out_drop_group; }
/* per process statistics initialization */
From: Zhou Guanghui zhouguanghui1@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S CVE: NA
-------------------------------------------------
The single-group mode has no application scenario. Therefore, the related branch is deleted.
The boot option "enable_sp_multi_group_mode" does not take effect.
Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- mm/share_pool.c | 137 +++++++++--------------------------------------- 1 file changed, 25 insertions(+), 112 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index caf3e89b41c4..076243713f83 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -67,9 +67,6 @@ #define byte2mb(size) ((size) >> 20) #define page2kb(page_num) ((page_num) << (PAGE_SHIFT - 10))
-#define SINGLE_GROUP_MODE 1 -#define MULTI_GROUP_MODE 2 - #define MAX_GROUP_FOR_SYSTEM 50000 #define MAX_GROUP_FOR_TASK 3000 #define MAX_PROC_PER_GROUP 1024 @@ -98,8 +95,6 @@ int sysctl_share_pool_map_lock_enable; int sysctl_sp_perf_k2u; int sysctl_sp_perf_alloc;
-static int share_pool_group_mode = SINGLE_GROUP_MODE; - static int system_group_count;
static unsigned int sp_device_number; @@ -1068,12 +1063,6 @@ static int mm_add_group_init(struct mm_struct *mm, struct sp_group *spg) struct sp_group_master *master = mm->sp_group_master; bool exist = false;
- if (share_pool_group_mode == SINGLE_GROUP_MODE && master && - master->count == 1) { - pr_err_ratelimited("at most one sp group for a task is allowed in single mode\n"); - return -EEXIST; - } - master = sp_init_group_master_locked(mm, &exist); if (IS_ERR(master)) return PTR_ERR(master); @@ -2211,72 +2200,30 @@ static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags, if (sp_flags & SP_HUGEPAGE_ONLY) sp_flags |= SP_HUGEPAGE;
- if (share_pool_group_mode == SINGLE_GROUP_MODE) { - spg = __sp_find_spg(current->pid, SPG_ID_DEFAULT); - if (spg) { - if (spg_id != SPG_ID_DEFAULT && spg->id != spg_id) { - sp_group_drop(spg); - return -ENODEV; - } - - /* up_read will be at the end of sp_alloc */ - down_read(&spg->rw_lock); - if (!spg_valid(spg)) { - up_read(&spg->rw_lock); - sp_group_drop(spg); - pr_err_ratelimited("allocation failed, spg is dead\n"); - return -ENODEV; - } - } else { /* alocation pass through scene */ - if (enable_mdc_default_group) { - int ret = 0; - - ret = sp_group_add_task(current->tgid, spg_id); - if (ret < 0) { - pr_err_ratelimited("add group failed in pass through\n"); - return ret; - } - - spg = __sp_find_spg(current->pid, SPG_ID_DEFAULT); - - /* up_read will be at the end of sp_alloc */ - down_read(&spg->rw_lock); - if (!spg_valid(spg)) { - up_read(&spg->rw_lock); - sp_group_drop(spg); - pr_err_ratelimited("pass through allocation failed, spg is dead\n"); - return -ENODEV; - } - } else { - spg = spg_none; - } + if (spg_id != SPG_ID_DEFAULT) { + spg = __sp_find_spg(current->pid, spg_id); + if (!spg) { + pr_err_ratelimited("allocation failed, can't find group\n"); + return -ENODEV; } - } else { - if (spg_id != SPG_ID_DEFAULT) { - spg = __sp_find_spg(current->pid, spg_id); - if (!spg) { - pr_err_ratelimited("allocation failed, can't find group\n"); - return -ENODEV; - }
- /* up_read will be at the end of sp_alloc */ - down_read(&spg->rw_lock); - if (!spg_valid(spg)) { - up_read(&spg->rw_lock); - sp_group_drop(spg); - pr_err_ratelimited("allocation failed, spg is dead\n"); - return -ENODEV; - } + /* up_read will be at the end of sp_alloc */ + down_read(&spg->rw_lock); + if (!spg_valid(spg)) { + up_read(&spg->rw_lock); + sp_group_drop(spg); + pr_err_ratelimited("allocation failed, spg is dead\n"); + return -ENODEV; + }
- if (!is_process_in_group(spg, current->mm)) { - up_read(&spg->rw_lock); - sp_group_drop(spg); - pr_err_ratelimited("allocation failed, task not in group\n"); - return -ENODEV; - } - } else { /* alocation pass through scene */ - spg = spg_none; + if (!is_process_in_group(spg, current->mm)) { + up_read(&spg->rw_lock); + sp_group_drop(spg); + pr_err_ratelimited("allocation failed, task not in group\n"); + return -ENODEV; } + } else { /* alocation pass through scene */ + spg = spg_none; }
if (sp_flags & SP_HUGEPAGE) { @@ -2892,33 +2839,12 @@ static int sp_k2u_prepare(unsigned long kva, unsigned long size, kc->size_aligned = size_aligned; kc->sp_flags = sp_flags; kc->spg_id = spg_id; - kc->to_task = false; - return 0; -} - -static int sp_check_k2task(struct sp_k2u_context *kc) -{ - int ret = 0; - int spg_id = kc->spg_id; - - if (share_pool_group_mode == SINGLE_GROUP_MODE) { - struct sp_group *spg = get_first_group(current->mm); + if (spg_id == SPG_ID_DEFAULT || spg_id == SPG_ID_NONE) + kc->to_task = true; + else + kc->to_task = false;
- if (!spg) { - if (spg_id != SPG_ID_NONE && spg_id != SPG_ID_DEFAULT) - ret = -EINVAL; - else - kc->to_task = true; - } else { - if (spg_id != SPG_ID_DEFAULT && spg_id != spg->id) - ret = -EINVAL; - sp_group_drop(spg); - } - } else { - if (spg_id == SPG_ID_DEFAULT || spg_id == SPG_ID_NONE) - kc->to_task = true; - } - return ret; + return 0; }
static void *sp_k2u_finish(void *uva, struct sp_k2u_context *kc) @@ -2963,12 +2889,6 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size, if (ret) return ERR_PTR(ret);
- ret = sp_check_k2task(&kc); - if (ret) { - uva = ERR_PTR(ret); - goto out; - } - if (kc.to_task) uva = sp_make_share_kva_to_task(kc.kva_aligned, kc.size_aligned, kc.sp_flags); else { @@ -3735,13 +3655,6 @@ static int __init enable_share_k2u_to_group(char *s) } __setup("enable_sp_share_k2u_spg", enable_share_k2u_to_group);
-static int __init enable_sp_multi_group_mode(char *s) -{ - share_pool_group_mode = MULTI_GROUP_MODE; - return 1; -} -__setup("enable_sp_multi_group_mode", enable_sp_multi_group_mode); - /*** Statistical and maintenance functions ***/
static void free_process_spg_proc_stat(struct sp_proc_stat *proc_stat)
From: Zhou Guanghui zhouguanghui1@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S CVE: NA
-------------------------------------------------
struct sp_mapping is used to manage the address space of a shared pool. During the initialization of the shared pool, normal address spaces are created to allocate the memory of the current shared pool.
Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- include/linux/share_pool.h | 18 +++++++++++++ mm/share_pool.c | 52 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+)
diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index 022e61bb6ce4..654dc8cc2922 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -101,6 +101,17 @@ struct sp_proc_stat { atomic64_t k2u_size; };
+/* + * address space management + */ +struct sp_mapping { + unsigned long flag; + atomic_t user; + unsigned long start[MAX_DEVID]; + unsigned long end[MAX_DEVID]; + struct rb_root area_root; +}; + /* Processes in the same sp_group can share memory. * Memory layout for share pool: * @@ -142,6 +153,8 @@ struct sp_group { atomic_t use_count; /* protect the group internal elements, except spa_list */ struct rw_semaphore rw_lock; + struct sp_mapping *dvpp; + struct sp_mapping *normal; };
/* a per-process(per mm) struct which manages a sp_group_node list */ @@ -155,6 +168,11 @@ struct sp_group_master { struct list_head node_list; struct mm_struct *mm; struct sp_proc_stat *stat; + /* + * Used to apply for the shared pool memory of the current process. + * For example, sp_alloc non-share memory or k2task. + */ + struct sp_group *local; };
/* diff --git a/mm/share_pool.c b/mm/share_pool.c index 076243713f83..6c70ff72b7af 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -130,6 +130,48 @@ static DECLARE_RWSEM(sp_spg_stat_sem); /* for kthread buff_module_guard_work */ static struct sp_proc_stat kthread_stat;
+#define SP_MAPPING_DVPP 0x1 +#define SP_MAPPING_NORMAL 0x2 +static struct sp_mapping *sp_mapping_normal; + +static void sp_mapping_range_init(struct sp_mapping *spm) +{ + int i; + + for (i = 0; i < MAX_DEVID; i++) { + if (spm->flag & SP_MAPPING_NORMAL) { + spm->start[i] = MMAP_SHARE_POOL_START; + spm->end[i] = MMAP_SHARE_POOL_16G_START; + continue; + } + + if (!is_sp_dev_addr_enabled(i)) { + spm->start[i] = MMAP_SHARE_POOL_16G_START + + i * MMAP_SHARE_POOL_16G_START; + spm->end[i] = spm->start[i] + MMAP_SHARE_POOL_16G_START; + } else { + spm->start[i] = sp_dev_va_start[i]; + spm->end[i] = spm->start[i] + sp_dev_va_size[i]; + } + } +} + +static struct sp_mapping *sp_mapping_create(unsigned long flag) +{ + struct sp_mapping *spm; + + spm = kzalloc(sizeof(struct sp_mapping), GFP_KERNEL); + if (!spm) + return ERR_PTR(-ENOMEM); + + spm->flag = flag; + sp_mapping_range_init(spm); + atomic_set(&spm->user, 0); + spm->area_root = RB_ROOT; + + return spm; +} + /* The caller must hold sp_group_sem */ static struct sp_group_master *sp_init_group_master_locked( struct mm_struct *mm, bool *exist) @@ -4432,12 +4474,22 @@ static void __init sp_device_number_detect(void)
static int __init share_pool_init(void) { + if (!sp_is_enabled()) + return 0; + /* lockless, as init kthread has no sp operation else */ spg_none = create_spg(GROUP_NONE); /* without free spg_none, not a serious problem */ if (IS_ERR(spg_none) || !spg_none) goto fail;
+ sp_mapping_normal = sp_mapping_create(SP_MAPPING_NORMAL); + if (IS_ERR(sp_mapping_normal)) { + sp_group_drop(spg_none); + goto fail; + } + atomic_inc(&sp_mapping_normal->user); + sp_device_number_detect(); proc_sharepool_init();
From: Wang Wensheng wangwensheng4@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S CVE: NA
-------------------------------------------------
The sp_group_master structure is used only in sharepool subsys and no other drivers use it.
Signed-off-by: Wang Wensheng wangwensheng4@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- include/linux/share_pool.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index 654dc8cc2922..b1b81947fa3f 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -10,6 +10,7 @@ #include <linux/hashtable.h> #include <linux/numa.h> #include <linux/jump_label.h> +#include <linux/kabi.h>
#define SP_HUGEPAGE (1 << 0) #define SP_HUGEPAGE_ONLY (1 << 1) @@ -172,7 +173,7 @@ struct sp_group_master { * Used to apply for the shared pool memory of the current process. * For example, sp_alloc non-share memory or k2task. */ - struct sp_group *local; + KABI_EXTEND(struct sp_group *local) };
/*
From: Zhou Guanghui zhouguanghui1@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S CVE: NA
-------------------------------------------------
Separately manage the normal and dvpp address spaces of the sp_group and set the normal and dvpp address spaces of the corresponding groups when adding a group, sp_alloc, and k2task.
Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- include/linux/share_pool.h | 6 + mm/share_pool.c | 296 +++++++++++++++++++++++++++++-------- 2 files changed, 237 insertions(+), 65 deletions(-)
diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index b1b81947fa3f..289877dde2fb 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -40,6 +40,8 @@ #define SPG_ID_AUTO_MIN 100000 #define SPG_ID_AUTO_MAX 199999 #define SPG_ID_AUTO 200000 /* generate group id automatically */ +#define SPG_ID_LOCAL_MIN 200001 +#define SPG_ID_LOCAL_MAX 299999
#define MAX_DEVID 8 /* the max num of Da-vinci devices */
@@ -111,6 +113,10 @@ struct sp_mapping { unsigned long start[MAX_DEVID]; unsigned long end[MAX_DEVID]; struct rb_root area_root; + + struct rb_node *free_area_cache; + unsigned long cached_hole_size; + unsigned long cached_vstart; };
/* Processes in the same sp_group can share memory. diff --git a/mm/share_pool.c b/mm/share_pool.c index 6c70ff72b7af..c3634d62e1fa 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -147,8 +147,8 @@ static void sp_mapping_range_init(struct sp_mapping *spm)
if (!is_sp_dev_addr_enabled(i)) { spm->start[i] = MMAP_SHARE_POOL_16G_START + - i * MMAP_SHARE_POOL_16G_START; - spm->end[i] = spm->start[i] + MMAP_SHARE_POOL_16G_START; + i * MMAP_SHARE_POOL_16G_SIZE; + spm->end[i] = spm->start[i] + MMAP_SHARE_POOL_16G_SIZE; } else { spm->start[i] = sp_dev_va_start[i]; spm->end[i] = spm->start[i] + sp_dev_va_size[i]; @@ -172,10 +172,91 @@ static struct sp_mapping *sp_mapping_create(unsigned long flag) return spm; }
+static void sp_mapping_destroy(struct sp_mapping *spm) +{ + kfree(spm); +} + +static void sp_mapping_attach(struct sp_group *spg, struct sp_mapping *spm) +{ + atomic_inc(&spm->user); + if (spm->flag & SP_MAPPING_DVPP) + spg->dvpp = spm; + else if (spm->flag & SP_MAPPING_NORMAL) + spg->normal = spm; +} + +static void sp_mapping_detach(struct sp_group *spg, struct sp_mapping *spm) +{ + if (spm && atomic_dec_and_test(&spm->user)) + sp_mapping_destroy(spm); +} + +/* + * When you set the address space of a group, the normal address space + * is globally unified. When processing the DVPP address space, consider + * the following situations: + * 1. If a process is added to a non-new group, the DVPP address space + * must have been created. If the local group of the process also + * contains the DVPP address space and they are different, this + * scenario is not allowed to avoid address conflict. + * 2. If the DVPP address space does not exist in the local group of the + * process, attach the local group of the process to the DVPP address + * space of the group. + * 3. Add a new group. If the process has applied for the dvpp address + * space (sp_alloc or k2u), attach the new group to the dvpp address + * space of the current process. + * 4. If the process has not applied for the DVPP address space, attach + * the new group and the local group of the current process to the + * newly created DVPP address space. + * + * the caller must hold sp_group_sem + */ +static int sp_mapping_group_setup(struct mm_struct *mm, struct sp_group *spg) +{ + struct sp_group_master *master = mm->sp_group_master; + struct sp_group *local = master->local; + struct sp_mapping *spm; + + if (!list_empty(&spg->procs)) { + /* 1 */ + if (local->dvpp && local->dvpp != spg->dvpp) { + pr_info_ratelimited("Duplicate address space, id=%d\n", + spg->id); + return 0; + } + + /* 2 */ + if (!local->dvpp) { + sp_mapping_attach(local, spg->dvpp); + sp_mapping_attach(local, spg->normal); + } + } else { + /* 4 */ + if (!local->dvpp) { + spm = sp_mapping_create(SP_MAPPING_DVPP); + if (IS_ERR(spm)) + return PTR_ERR(spm); + sp_mapping_attach(local, spm); + sp_mapping_attach(local, sp_mapping_normal); + } + + /* 3 */ + sp_mapping_attach(spg, local->dvpp); + sp_mapping_attach(spg, sp_mapping_normal); + } + + return 0; +} + +static struct sp_group *create_spg(int spg_id); +static void free_new_spg_id(bool new, int spg_id); /* The caller must hold sp_group_sem */ static struct sp_group_master *sp_init_group_master_locked( struct mm_struct *mm, bool *exist) { + int spg_id; + struct sp_group *spg; struct sp_group_master *master = mm->sp_group_master;
if (master) { @@ -187,16 +268,92 @@ static struct sp_group_master *sp_init_group_master_locked( if (master == NULL) return ERR_PTR(-ENOMEM);
+ spg_id = ida_alloc_range(&sp_group_id_ida, SPG_ID_LOCAL_MIN, + SPG_ID_LOCAL_MAX, GFP_ATOMIC); + if (spg_id < 0) { + kfree(master); + pr_err_ratelimited("generate local group id failed %d\n", spg_id); + return ERR_PTR(spg_id); + } + + spg = create_spg(spg_id); + if (IS_ERR(spg)) { + free_new_spg_id(true, spg_id); + kfree(master); + return (struct sp_group_master *)spg; + } + INIT_LIST_HEAD(&master->node_list); master->count = 0; master->stat = NULL; master->mm = mm; + master->local = spg; mm->sp_group_master = master;
*exist = false; return master; }
+static inline bool is_local_group(int spg_id) +{ + return spg_id >= SPG_ID_LOCAL_MIN && spg_id <= SPG_ID_LOCAL_MAX; +} + +/* + * If the process is added to a group first, the address space of the local + * group of the process must have been set. If the process is not added to + * a group, directly create or attach the process to the corresponding DVPP + * and normal address space. + */ +static int sp_mapping_group_setup_local(struct mm_struct *mm) +{ + struct sp_group_master *master; + struct sp_mapping *spm; + bool exist = false; + + master = sp_init_group_master_locked(mm, &exist); + if (IS_ERR(master)) + return PTR_ERR(master); + + if (master->local->dvpp) + return 0; + + spm = sp_mapping_create(SP_MAPPING_DVPP); + if (IS_ERR(spm)) + return PTR_ERR(spm); + sp_mapping_attach(master->local, spm); + sp_mapping_attach(master->local, sp_mapping_normal); + + return 0; +} + +static struct sp_group *sp_get_local_group(struct mm_struct *mm) +{ + int ret; + struct sp_group_master *master; + + down_read(&sp_group_sem); + master = mm->sp_group_master; + if (master && master->local) { + atomic_inc(&master->local->use_count); + up_read(&sp_group_sem); + return master->local; + } + up_read(&sp_group_sem); + + down_write(&sp_group_sem); + ret = sp_mapping_group_setup_local(mm); + if (ret) { + up_write(&sp_group_sem); + return ERR_PTR(ret); + } + master = mm->sp_group_master; + atomic_inc(&master->local->use_count); + up_write(&sp_group_sem); + + return master->local; +} + static struct sp_proc_stat *sp_get_proc_stat(struct mm_struct *mm) { struct sp_proc_stat *stat; @@ -580,7 +737,7 @@ static void spa_inc_usage(struct sp_area *spa) case SPA_TYPE_K2TASK: spa_stat.k2u_task_num += 1; spa_stat.k2u_task_size += size; - update_spg_stat_k2u(size, true, spg_none->stat); + update_spg_stat_k2u(size, true, spa->spg->stat); break; case SPA_TYPE_K2SPG: spa_stat.k2u_spg_num += 1; @@ -603,7 +760,7 @@ static void spa_inc_usage(struct sp_area *spa) spa_stat.total_num += 1; spa_stat.total_size += size;
- if (spa->spg != spg_none) { + if (!is_local_group(spa->spg->id)) { atomic_inc(&sp_overall_stat.spa_total_num); atomic64_add(size, &sp_overall_stat.spa_total_size); } @@ -626,7 +783,7 @@ static void spa_dec_usage(struct sp_area *spa) case SPA_TYPE_K2TASK: spa_stat.k2u_task_num -= 1; spa_stat.k2u_task_size -= size; - update_spg_stat_k2u(size, false, spg_none->stat); + update_spg_stat_k2u(size, false, spa->spg->stat); break; case SPA_TYPE_K2SPG: spa_stat.k2u_spg_num -= 1; @@ -645,7 +802,7 @@ static void spa_dec_usage(struct sp_area *spa) spa_stat.total_num -= 1; spa_stat.total_size -= size;
- if (spa->spg != spg_none) { + if (!is_local_group(spa->spg->id)) { atomic_dec(&sp_overall_stat.spa_total_num); atomic64_sub(spa->real_size, &sp_overall_stat.spa_total_size); } @@ -730,7 +887,8 @@ static unsigned long sp_remap_kva_to_vma(unsigned long kva, struct sp_area *spa, static void free_sp_group_id(int spg_id) { /* ida operation is protected by an internal spin_lock */ - if (spg_id >= SPG_ID_AUTO_MIN && spg_id <= SPG_ID_AUTO_MAX) + if ((spg_id >= SPG_ID_AUTO_MIN && spg_id <= SPG_ID_AUTO_MAX) || + (spg_id >= SPG_ID_LOCAL_MIN && spg_id <= SPG_ID_LOCAL_MAX)) ida_free(&sp_group_id_ida, spg_id); }
@@ -747,8 +905,11 @@ static void free_sp_group_locked(struct sp_group *spg) free_spg_stat(spg->id); idr_remove(&sp_group_idr, spg->id); free_sp_group_id((unsigned int)spg->id); + sp_mapping_detach(spg, spg->dvpp); + sp_mapping_detach(spg, spg->normal); + if (!is_local_group(spg->id)) + system_group_count--; kfree(spg); - system_group_count--; WARN(system_group_count < 0, "unexpected group count\n"); }
@@ -981,7 +1142,8 @@ static struct sp_group *create_spg(int spg_id) struct user_struct *user = NULL; int hsize_log = MAP_HUGE_2MB >> MAP_HUGE_SHIFT;
- if (unlikely(system_group_count + 1 == MAX_GROUP_FOR_SYSTEM)) { + if (unlikely(system_group_count + 1 == MAX_GROUP_FOR_SYSTEM && + !is_local_group(spg_id))) { pr_err_ratelimited("reach system max group num\n"); return ERR_PTR(-ENOSPC); } @@ -1028,7 +1190,8 @@ static struct sp_group *create_spg(int spg_id) if (ret < 0) goto out_fput_all;
- system_group_count++; + if (!is_local_group(spg_id)) + system_group_count++; return spg;
out_fput_all: @@ -1311,6 +1474,10 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) if (ret) goto out_drop_group;
+ ret = sp_mapping_group_setup(mm, spg); + if (ret) + goto out_drop_group; + node = create_spg_node(mm, prot, spg); if (unlikely(IS_ERR(node))) { ret = PTR_ERR(node); @@ -1592,7 +1759,6 @@ static void __insert_sp_area(struct sp_area *spa)
/* The sp_area cache globals are protected by sp_area_lock */ static struct rb_node *free_sp_area_cache; -static unsigned long cached_hole_size; static unsigned long cached_vstart; /* affected by SP_DVPP and sp_config_dvpp_range() */
/** @@ -1611,11 +1777,12 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, { struct sp_area *spa, *first, *err; struct rb_node *n; - unsigned long vstart = MMAP_SHARE_POOL_START; - unsigned long vend = MMAP_SHARE_POOL_16G_START; + unsigned long vstart; + unsigned long vend; unsigned long addr; unsigned long size_align = ALIGN(size, PMD_SIZE); /* va aligned to 2M */ int device_id, node_id; + struct sp_mapping *mapping;
device_id = sp_flags_device_id(flags); node_id = flags & SP_SPEC_NODE_ID ? sp_flags_node_id(flags) : device_id; @@ -1625,17 +1792,13 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, return ERR_PTR(-EINVAL); }
- if ((flags & SP_DVPP)) { - if (!is_sp_dev_addr_enabled(device_id)) { - vstart = MMAP_SHARE_POOL_16G_START + - device_id * MMAP_SHARE_POOL_16G_SIZE; - vend = vstart + MMAP_SHARE_POOL_16G_SIZE; - } else { - vstart = sp_dev_va_start[device_id]; - vend = vstart + sp_dev_va_size[device_id]; - } - } + if (flags & SP_DVPP) + mapping = spg->dvpp; + else + mapping = spg->normal;
+ vstart = mapping->start[device_id]; + vend = mapping->end[device_id]; spa = __kmalloc_node(sizeof(struct sp_area), GFP_KERNEL, node_id); if (unlikely(!spa)) return ERR_PTR(-ENOMEM); @@ -1651,18 +1814,18 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, * Note that sp_free_area may update free_sp_area_cache * without updating cached_hole_size. */ - if (!free_sp_area_cache || size_align < cached_hole_size || - vstart != cached_vstart) { - cached_hole_size = 0; - free_sp_area_cache = NULL; + if (!mapping->free_area_cache || size_align < mapping->cached_hole_size || + vstart != mapping->cached_vstart) { + mapping->cached_hole_size = 0; + mapping->free_area_cache = NULL; }
/* record if we encounter less permissive parameters */ - cached_vstart = vstart; + mapping->cached_vstart = vstart;
/* find starting point for our search */ - if (free_sp_area_cache) { - first = rb_entry(free_sp_area_cache, struct sp_area, rb_node); + if (mapping->free_area_cache) { + first = rb_entry(mapping->free_area_cache, struct sp_area, rb_node); addr = first->va_end; if (addr + size_align < addr) { err = ERR_PTR(-EOVERFLOW); @@ -1675,7 +1838,7 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, goto error; }
- n = sp_area_root.rb_node; + n = mapping->area_root.rb_node; first = NULL;
while (n) { @@ -1697,8 +1860,8 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags,
/* from the starting point, traverse areas until a suitable hole is found */ while (addr + size_align > first->va_start && addr + size_align <= vend) { - if (addr + cached_hole_size < first->va_start) - cached_hole_size = first->va_start - addr; + if (addr + mapping->cached_hole_size < first->va_start) + mapping->cached_hole_size = first->va_start - addr; addr = first->va_end; if (addr + size_align < addr) { err = ERR_PTR(-EOVERFLOW); @@ -1736,9 +1899,8 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags,
spa_inc_usage(spa); __insert_sp_area(spa); - free_sp_area_cache = &spa->rb_node; - if (spa->spg != spg_none) - list_add_tail(&spa->link, &spg->spa_list); + mapping->free_area_cache = &spa->rb_node; + list_add_tail(&spa->link, &spg->spa_list);
spin_unlock(&sp_area_lock);
@@ -1829,8 +1991,7 @@ static void sp_free_area(struct sp_area *spa) pr_debug("clear spa->kva %ld is not valid\n", spa->kva);
spa_dec_usage(spa); - if (spa->spg != spg_none) - list_del(&spa->link); + list_del(&spa->link);
rb_erase(&spa->rb_node, &sp_area_root); RB_CLEAR_NODE(&spa->rb_node); @@ -1990,7 +2151,7 @@ static void sp_fallocate(struct sp_area *spa)
static void sp_free_unmap_fallocate(struct sp_area *spa) { - if (spa->spg != spg_none) { + if (!is_local_group(spa->spg->id)) { down_read(&spa->spg->rw_lock); __sp_free(spa->spg, spa->va_start, spa_size(spa), NULL); sp_fallocate(spa); @@ -2195,7 +2356,6 @@ static void trace_sp_alloc_begin(struct sp_alloc_context *ac) static void trace_sp_alloc_finish(struct sp_alloc_context *ac, unsigned long va) { unsigned long cost; - bool is_pass_through = ac->spg == spg_none ? true : false;
if (!sysctl_sp_perf_alloc) return; @@ -2207,7 +2367,8 @@ static void trace_sp_alloc_finish(struct sp_alloc_context *ac, unsigned long va) if (cost >= (unsigned long)sysctl_sp_perf_alloc) { pr_err("Task %s(%d/%d) sp_alloc returns 0x%lx consumes %luus, size is %luKB, size_aligned is %luKB, sp_flags is %lx, pass through is %d\n", current->comm, current->tgid, current->pid, - va, cost, byte2kb(ac->size), byte2kb(ac->size_aligned), ac->sp_flags, is_pass_through); + va, cost, byte2kb(ac->size), byte2kb(ac->size_aligned), ac->sp_flags, + is_local_group(ac->spg->id)); } }
@@ -2265,7 +2426,9 @@ static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags, return -ENODEV; } } else { /* alocation pass through scene */ - spg = spg_none; + spg = sp_get_local_group(current->mm); + if (IS_ERR(spg)) + return PTR_ERR(spg); }
if (sp_flags & SP_HUGEPAGE) { @@ -2288,7 +2451,7 @@ static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags, static void sp_alloc_unmap(struct mm_struct *mm, struct sp_area *spa, struct sp_group_node *spg_node) { - if (spa->spg != spg_none) + if (!is_local_group(spa->spg->id)) __sp_free(spa->spg, spa->va_start, spa->real_size, mm); }
@@ -2353,7 +2516,7 @@ static int sp_alloc_mmap(struct mm_struct *mm, struct sp_area *spa, return ret;
unmap: - if (spa->spg != spg_none) + if (!is_local_group(spa->spg->id)) sp_alloc_unmap(list_next_entry(spg_node, proc_node)->master->mm, spa, spg_node); else sp_munmap(mm, spa->va_start, spa->real_size); @@ -2456,7 +2619,7 @@ static int __sp_alloc_mmap_populate(struct mm_struct *mm, struct sp_area *spa, ret = sp_alloc_populate(mm, spa, ac); if (ret) { err: - if (spa->spg != spg_none) + if (!is_local_group(spa->spg->id)) sp_alloc_unmap(list_next_entry(spg_node, proc_node)->master->mm, spa, spg_node); else sp_munmap(mm, spa->va_start, spa->real_size); @@ -2482,7 +2645,7 @@ static int sp_alloc_mmap_populate(struct sp_area *spa, struct mm_struct *mm; struct sp_group_node *spg_node;
- if (spa->spg == spg_none) { + if (is_local_group(spa->spg->id)) { ret = __sp_alloc_mmap_populate(current->mm, spa, NULL, ac); } else { /* create mapping for each process in the group */ @@ -2506,10 +2669,9 @@ static void sp_alloc_finish(int result, struct sp_area *spa, struct sp_alloc_context *ac) { struct sp_group *spg = ac->spg; - bool is_pass_through = spg == spg_none ? true : false;
- /* match sp_alloc_check_prepare */ - if (!is_pass_through) + /* match sp_alloc_prepare */ + if (!is_local_group(spg->id)) up_read(&spg->rw_lock);
if (!result) @@ -2521,9 +2683,7 @@ static void sp_alloc_finish(int result, struct sp_area *spa, trace_sp_alloc_finish(ac, spa->va_start); }
- if (!is_pass_through) - sp_group_drop(spg); - + sp_group_drop(spg); sp_dump_stack(); sp_try_to_compact(); } @@ -2705,22 +2865,33 @@ static unsigned long sp_remap_kva_to_vma(unsigned long kva, struct sp_area *spa, */ static void *sp_make_share_kva_to_task(unsigned long kva, unsigned long size, unsigned long sp_flags) { + int ret; void *uva; struct sp_area *spa; struct spg_proc_stat *stat; unsigned long prot = PROT_READ | PROT_WRITE; struct sp_k2u_context kc; + struct sp_group *spg;
down_write(&sp_group_sem); - stat = sp_init_process_stat(current, current->mm, spg_none); - up_write(&sp_group_sem); + ret = sp_mapping_group_setup_local(current->mm); + if (ret) { + up_write(&sp_group_sem); + pr_err_ratelimited("k2u_task init local mapping failed %d\n", ret); + return ERR_PTR(ret); + } + + spg = current->mm->sp_group_master->local; + stat = sp_init_process_stat(current, current->mm, spg); if (IS_ERR(stat)) { + up_write(&sp_group_sem); pr_err_ratelimited("k2u_task init process stat failed %lx\n", PTR_ERR(stat)); return stat; } + up_write(&sp_group_sem);
- spa = sp_alloc_area(size, sp_flags, spg_none, SPA_TYPE_K2TASK, current->tgid); + spa = sp_alloc_area(size, sp_flags, spg, SPA_TYPE_K2TASK, current->tgid); if (IS_ERR(spa)) { pr_err_ratelimited("alloc spa failed in k2u_task (potential no enough virtual memory when -75): %ld\n", PTR_ERR(spa)); @@ -3916,7 +4087,7 @@ static void rb_spa_stat_show(struct seq_file *seq) atomic_inc(&spa->use_count); spin_unlock(&sp_area_lock);
- if (spa->spg == spg_none) /* k2u to task */ + if (is_local_group(spa->spg->id)) /* k2u to task */ seq_printf(seq, "%-10s ", "None"); else { down_read(&spa->spg->rw_lock); @@ -4446,6 +4617,9 @@ void sp_group_post_exit(struct mm_struct *mm) kfree(spg_node); } up_write(&sp_group_sem); + + if (master->local) + sp_group_drop(master->local); kfree(master); }
@@ -4477,17 +4651,9 @@ static int __init share_pool_init(void) if (!sp_is_enabled()) return 0;
- /* lockless, as init kthread has no sp operation else */ - spg_none = create_spg(GROUP_NONE); - /* without free spg_none, not a serious problem */ - if (IS_ERR(spg_none) || !spg_none) - goto fail; - sp_mapping_normal = sp_mapping_create(SP_MAPPING_NORMAL); - if (IS_ERR(sp_mapping_normal)) { - sp_group_drop(spg_none); + if (IS_ERR(sp_mapping_normal)) goto fail; - } atomic_inc(&sp_mapping_normal->user);
sp_device_number_detect();
From: Zhou Guanghui zhouguanghui1@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S CVE: NA
-------------------------------------------------
The DVPP address space is per process or per sharing group. During sp_free and unshare, you need to know which address space the current address belongs to.
Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- include/linux/share_pool.h | 12 ++++++++++++ mm/share_pool.c | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+)
diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index 289877dde2fb..077816a6fb3b 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -284,6 +284,9 @@ extern bool mg_is_sharepool_addr(unsigned long addr); extern int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id); extern int sp_group_add_task(int pid, int spg_id);
+extern int sp_id_of_current(void); +extern int mg_sp_id_of_current(void); + extern void sp_area_drop(struct vm_area_struct *vma); extern int sp_group_exit(struct mm_struct *mm); extern void sp_group_post_exit(struct mm_struct *mm); @@ -431,6 +434,15 @@ static inline int mg_sp_unshare(unsigned long va, unsigned long size) return -EPERM; }
+static inline int sp_id_of_current(void) +{ + return -EPERM; +} + +static inline int mg_sp_id_of_current(void) +{ + return -EPERM; +}
static inline void sp_init_mm(struct mm_struct *mm) { diff --git a/mm/share_pool.c b/mm/share_pool.c index c3634d62e1fa..39ecde90c01b 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -1734,6 +1734,43 @@ int sp_group_del_task(int pid, int spg_id) } EXPORT_SYMBOL_GPL(sp_group_del_task);
+int sp_id_of_current(void) +{ + int ret, spg_id; + struct sp_group_master *master; + + if (current->flags & PF_KTHREAD || !current->mm) + return -EINVAL; + + down_read(&sp_group_sem); + master = current->mm->sp_group_master; + if (master && master->local) { + spg_id = master->local->id; + up_read(&sp_group_sem); + return spg_id; + } + up_read(&sp_group_sem); + + down_write(&sp_group_sem); + ret = sp_mapping_group_setup_local(current->mm); + if (ret) { + up_write(&sp_group_sem); + return ret; + } + master = current->mm->sp_group_master; + spg_id = master->local->id; + up_write(&sp_group_sem); + + return spg_id; +} +EXPORT_SYMBOL_GPL(sp_id_of_current); + +int mg_sp_id_of_current(void) +{ + return sp_id_of_current(); +} +EXPORT_SYMBOL_GPL(mg_sp_id_of_current); + /* the caller must hold sp_area_lock */ static void __insert_sp_area(struct sp_area *spa) {
From: Zhou Guanghui zhouguanghui1@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S CVE: NA
-------------------------------------------------
The address space of the DVPP is managed by group. When releasing the shared pool memory, you need to find the corresponding address space based on the ID.
Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- include/linux/share_pool.h | 12 +- mm/share_pool.c | 235 ++++++++++++++++++------------------- 2 files changed, 122 insertions(+), 125 deletions(-)
diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index 077816a6fb3b..8eb964230fd4 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -250,8 +250,8 @@ extern int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, extern void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id); extern void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id);
-extern int sp_free(unsigned long addr); -extern int mg_sp_free(unsigned long addr); +extern int sp_free(unsigned long addr, int id); +extern int mg_sp_free(unsigned long addr, int id);
extern void *sp_make_share_k2u(unsigned long kva, unsigned long size, unsigned long sp_flags, int pid, int spg_id); @@ -262,7 +262,7 @@ extern void *sp_make_share_u2k(unsigned long uva, unsigned long size, int pid); extern void *mg_sp_make_share_u2k(unsigned long uva, unsigned long size, int pid);
extern int sp_unshare(unsigned long va, unsigned long size, int pid, int spg_id); -extern int mg_sp_unshare(unsigned long va, unsigned long size); +extern int mg_sp_unshare(unsigned long va, unsigned long size, int id);
extern int sp_walk_page_range(unsigned long uva, unsigned long size, struct task_struct *tsk, struct sp_walk_data *sp_walk_data); @@ -392,12 +392,12 @@ static inline void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int return NULL; }
-static inline int sp_free(unsigned long addr) +static inline int sp_free(unsigned long addr, int id) { return -EPERM; }
-static inline int mg_sp_free(unsigned long addr) +static inline int mg_sp_free(unsigned long addr, int id) { return -EPERM; } @@ -429,7 +429,7 @@ static inline int sp_unshare(unsigned long va, unsigned long size, int pid, int return -EPERM; }
-static inline int mg_sp_unshare(unsigned long va, unsigned long size) +static inline int mg_sp_unshare(unsigned long va, unsigned long size, int id) { return -EPERM; } diff --git a/mm/share_pool.c b/mm/share_pool.c index 39ecde90c01b..45cdb77a4299 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -642,12 +642,6 @@ static void free_spg_stat(int spg_id) kfree(stat); }
-/* - * Group '0' for k2u_task and pass through. No process will be actually - * added to. - */ -static struct sp_group *spg_none; - /* statistics of all sp area, protected by sp_area_lock */ struct sp_spa_stat { unsigned int total_num; @@ -944,26 +938,6 @@ static int get_task(int pid, struct task_struct **task) return 0; }
-static struct sp_group *get_first_group(struct mm_struct *mm) -{ - struct sp_group *spg = NULL; - struct sp_group_master *master = mm->sp_group_master; - - if (master && master->count >= 1) { - struct sp_group_node *spg_node = NULL; - - spg_node = list_first_entry(&master->node_list, - struct sp_group_node, group_node); - spg = spg_node->spg; - - /* don't revive a dead group */ - if (!spg || !atomic_inc_not_zero(&spg->use_count)) - spg = NULL; - } - - return spg; -} - /* * the caller must: * 1. hold spg->rw_lock @@ -988,35 +962,27 @@ static struct sp_group *__sp_find_spg_locked(int pid, int spg_id) struct task_struct *tsk = NULL; int ret = 0;
- ret = get_task(pid, &tsk); - if (ret) - return NULL; - if (spg_id == SPG_ID_DEFAULT) { - /* - * Once we encounter a concurrency problem here. - * To fix it, we believe get_task_mm() and mmput() is too - * heavy because we just get the pointer of sp_group. - */ + ret = get_task(pid, &tsk); + if (ret) + return NULL; + task_lock(tsk); if (tsk->mm == NULL) spg = NULL; - else - spg = get_first_group(tsk->mm); + else if (tsk->mm->sp_group_master) + spg = tsk->mm->sp_group_master->local; task_unlock(tsk); + + put_task_struct(tsk); } else { spg = idr_find(&sp_group_idr, spg_id); - /* don't revive a dead group */ - if (!spg || !atomic_inc_not_zero(&spg->use_count)) - goto fail; }
- put_task_struct(tsk); - return spg; + if (!spg || !atomic_inc_not_zero(&spg->use_count)) + return NULL;
-fail: - put_task_struct(tsk); - return NULL; + return spg; }
static struct sp_group *__sp_find_spg(int pid, int spg_id) @@ -1772,9 +1738,9 @@ int mg_sp_id_of_current(void) EXPORT_SYMBOL_GPL(mg_sp_id_of_current);
/* the caller must hold sp_area_lock */ -static void __insert_sp_area(struct sp_area *spa) +static void __insert_sp_area(struct sp_mapping *spm, struct sp_area *spa) { - struct rb_node **p = &sp_area_root.rb_node; + struct rb_node **p = &spm->area_root.rb_node; struct rb_node *parent = NULL;
while (*p) { @@ -1791,13 +1757,9 @@ static void __insert_sp_area(struct sp_area *spa) }
rb_link_node(&spa->rb_node, parent, p); - rb_insert_color(&spa->rb_node, &sp_area_root); + rb_insert_color(&spa->rb_node, &spm->area_root); }
-/* The sp_area cache globals are protected by sp_area_lock */ -static struct rb_node *free_sp_area_cache; -static unsigned long cached_vstart; /* affected by SP_DVPP and sp_config_dvpp_range() */ - /** * sp_alloc_area() - Allocate a region of VA from the share pool. * @size: the size of VA to allocate. @@ -1845,10 +1807,10 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, /* * Invalidate cache if we have more permissive parameters. * cached_hole_size notes the largest hole noticed _below_ - * the sp_area cached in free_sp_area_cache: if size fits + * the sp_area cached in free_area_cache: if size fits * into that hole, we want to scan from vstart to reuse - * the hole instead of allocating above free_sp_area_cache. - * Note that sp_free_area may update free_sp_area_cache + * the hole instead of allocating above free_area_cache. + * Note that sp_free_area may update free_area_cache * without updating cached_hole_size. */ if (!mapping->free_area_cache || size_align < mapping->cached_hole_size || @@ -1935,7 +1897,7 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, spa->device_id = device_id;
spa_inc_usage(spa); - __insert_sp_area(spa); + __insert_sp_area(mapping, spa); mapping->free_area_cache = &spa->rb_node; list_add_tail(&spa->link, &spg->spa_list);
@@ -1950,9 +1912,15 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, }
/* the caller should hold sp_area_lock */ -static struct sp_area *__find_sp_area_locked(unsigned long addr) +static struct sp_area *__find_sp_area_locked(struct sp_group *spg, + unsigned long addr) { - struct rb_node *n = sp_area_root.rb_node; + struct rb_node *n; + + if (addr >= MMAP_SHARE_POOL_START && addr < MMAP_SHARE_POOL_16G_START) + n = spg->normal->area_root.rb_node; + else + n = spg->dvpp->area_root.rb_node;
while (n) { struct sp_area *spa; @@ -1970,12 +1938,12 @@ static struct sp_area *__find_sp_area_locked(unsigned long addr) return NULL; }
-static struct sp_area *__find_sp_area(unsigned long addr) +static struct sp_area *__find_sp_area(struct sp_group *spg, unsigned long addr) { struct sp_area *n;
spin_lock(&sp_area_lock); - n = __find_sp_area_locked(addr); + n = __find_sp_area_locked(spg, addr); if (n) atomic_inc(&n->use_count); spin_unlock(&sp_area_lock); @@ -2000,22 +1968,30 @@ static bool vmalloc_area_clr_flag(unsigned long kva, unsigned long flags) */ static void sp_free_area(struct sp_area *spa) { + unsigned long addr = spa->va_start; + struct sp_mapping *spm; + lockdep_assert_held(&sp_area_lock);
- if (free_sp_area_cache) { + if (addr >= MMAP_SHARE_POOL_START && addr < MMAP_SHARE_POOL_16G_START) + spm = spa->spg->normal; + else + spm = spa->spg->dvpp; + + if (spm->free_area_cache) { struct sp_area *cache;
- cache = rb_entry(free_sp_area_cache, struct sp_area, rb_node); + cache = rb_entry(spm->free_area_cache, struct sp_area, rb_node); if (spa->va_start <= cache->va_start) { - free_sp_area_cache = rb_prev(&spa->rb_node); + spm->free_area_cache = rb_prev(&spa->rb_node); /* * the new cache node may be changed to another region, * i.e. from DVPP region to normal region */ - if (free_sp_area_cache) { - cache = rb_entry(free_sp_area_cache, + if (spm->free_area_cache) { + cache = rb_entry(spm->free_area_cache, struct sp_area, rb_node); - cached_vstart = cache->region_vstart; + spm->cached_vstart = cache->region_vstart; } /* * We don't try to update cached_hole_size, @@ -2030,7 +2006,7 @@ static void sp_free_area(struct sp_area *spa) spa_dec_usage(spa); list_del(&spa->link);
- rb_erase(&spa->rb_node, &sp_area_root); + rb_erase(&spa->rb_node, &spm->area_root); RB_CLEAR_NODE(&spa->rb_node); kfree(spa); } @@ -2072,7 +2048,7 @@ void sp_area_drop(struct vm_area_struct *vma) * an atomic operation. */ spin_lock(&sp_area_lock); - spa = __find_sp_area_locked(vma->vm_start); + spa = __find_sp_area_locked(vma->vm_mm->sp_group_master->local, vma->vm_start); __sp_area_drop_locked(spa); spin_unlock(&sp_area_lock); } @@ -2204,7 +2180,7 @@ static int sp_check_caller_permission(struct sp_group *spg, struct mm_struct *mm int ret = 0;
down_read(&spg->rw_lock); - if (!is_process_in_group(spg, mm)) + if (!is_local_group(spg->id) && !is_process_in_group(spg, mm)) ret = -EPERM; up_read(&spg->rw_lock); return ret; @@ -2217,6 +2193,7 @@ struct sp_free_context { unsigned long addr; struct sp_area *spa; int state; + int spg_id; };
/* when success, __sp_area_drop(spa) should be used */ @@ -2225,10 +2202,18 @@ static int sp_free_get_spa(struct sp_free_context *fc) int ret = 0; unsigned long addr = fc->addr; struct sp_area *spa; + struct sp_group *spg; + + spg = __sp_find_spg(current->tgid, fc->spg_id); + if (!spg) { + pr_debug("sp free get group failed %d\n", fc->spg_id); + return -EINVAL; + }
fc->state = FREE_CONT;
- spa = __find_sp_area(addr); + spa = __find_sp_area(spg, addr); + sp_group_drop(spg); if (!spa) { pr_debug("sp free invalid input addr %lx\n", addr); return -EINVAL; @@ -2241,46 +2226,37 @@ static int sp_free_get_spa(struct sp_free_context *fc) } fc->spa = spa;
- if (spa->spg != spg_none) { - /* - * Access control: an sp addr can only be freed by - * 1. another task in the same spg - * 2. a kthread - * - * a passthrough addr can only be freed by the applier process - */ - if (!current->mm) - goto check_spa; + if (!current->mm) + goto check_spa;
- ret = sp_check_caller_permission(spa->spg, current->mm); - if (ret < 0) - goto drop_spa; + ret = sp_check_caller_permission(spa->spg, current->mm); + if (ret < 0) + goto drop_spa;
check_spa: - down_write(&spa->spg->rw_lock); - if (!spg_valid(spa->spg)) { - fc->state = FREE_END; - up_write(&spa->spg->rw_lock); - goto drop_spa; - /* we must return success(0) in this situation */ - } - /* the life cycle of spa has a direct relation with sp group */ - if (unlikely(spa->is_dead)) { - up_write(&spa->spg->rw_lock); - pr_err_ratelimited("unexpected double sp free\n"); - dump_stack(); - ret = -EINVAL; - goto drop_spa; - } - spa->is_dead = true; - up_write(&spa->spg->rw_lock); + if (is_local_group(spa->spg->id) && (current->tgid != spa->applier)) { + ret = -EPERM; + goto drop_spa; + }
- } else { - if (current->tgid != spa->applier) { - ret = -EPERM; - goto drop_spa; - } + down_write(&spa->spg->rw_lock); + if (!spg_valid(spa->spg)) { + fc->state = FREE_END; + up_write(&spa->spg->rw_lock); + goto drop_spa; + /* we must return success(0) in this situation */ + } + /* the life cycle of spa has a direct relation with sp group */ + if (unlikely(spa->is_dead)) { + up_write(&spa->spg->rw_lock); + pr_err_ratelimited("unexpected double sp free\n"); + dump_stack(); + ret = -EINVAL; + goto drop_spa; } + spa->is_dead = true; + up_write(&spa->spg->rw_lock); + return 0;
drop_spa: @@ -2291,21 +2267,26 @@ static int sp_free_get_spa(struct sp_free_context *fc) /** * sp_free() - Free the memory allocated by sp_alloc(). * @addr: the starting VA of the memory. + * @id: Address space identifier, which is used to distinguish the addr. * * Return: * * 0 - success. * * -EINVAL - the memory can't be found or was not allocted by share pool. * * -EPERM - the caller has no permision to free the memory. */ -int sp_free(unsigned long addr) +int sp_free(unsigned long addr, int id) { int ret = 0; struct sp_free_context fc = { .addr = addr, + .spg_id = id, };
check_interrupt_context();
+ if (current->flags & PF_KTHREAD) + return -EINVAL; + ret = sp_free_get_spa(&fc); if (ret || fc.state == FREE_END) goto out; @@ -2326,9 +2307,9 @@ int sp_free(unsigned long addr) } EXPORT_SYMBOL_GPL(sp_free);
-int mg_sp_free(unsigned long addr) +int mg_sp_free(unsigned long addr, int id) { - return sp_free(addr); + return sp_free(addr, id); } EXPORT_SYMBOL_GPL(mg_sp_free);
@@ -2422,6 +2403,11 @@ static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags, if (enable_mdc_default_group) spg_id = mdc_default_group_id;
+ if (current->flags & PF_KTHREAD) { + pr_err_ratelimited("allocation failed, task is kthread\n"); + return -EINVAL; + } + if (unlikely(!size || (size >> PAGE_SHIFT) > totalram_pages())) { pr_err_ratelimited("allocation failed, invalid size %lu\n", size); return -EINVAL; @@ -2462,7 +2448,7 @@ static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags, pr_err_ratelimited("allocation failed, task not in group\n"); return -ENODEV; } - } else { /* alocation pass through scene */ + } else { /* allocation pass through scene */ spg = sp_get_local_group(current->mm); if (IS_ERR(spg)) return PTR_ERR(spg); @@ -3493,7 +3479,7 @@ EXPORT_SYMBOL_GPL(mg_sp_make_share_u2k); * * This also means we must trust DVPP channel destroy and guard worker code. */ -static int sp_unshare_uva(unsigned long uva, unsigned long size) +static int sp_unshare_uva(unsigned long uva, unsigned long size, int group_id) { int ret = 0; struct mm_struct *mm; @@ -3501,14 +3487,21 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size) unsigned long uva_aligned; unsigned long size_aligned; unsigned int page_size; + struct sp_group *spg; + + spg = __sp_find_spg(current->tgid, group_id); + if (!spg) { + pr_debug("sp unshare find group failed %d\n", group_id); + return -EINVAL; + }
/* * at first we guess it's a hugepage addr * we can tolerate at most PMD_SIZE or PAGE_SIZE which is matched in k2u */ - spa = __find_sp_area(ALIGN_DOWN(uva, PMD_SIZE)); + spa = __find_sp_area(spg, ALIGN_DOWN(uva, PMD_SIZE)); if (!spa) { - spa = __find_sp_area(ALIGN_DOWN(uva, PAGE_SIZE)); + spa = __find_sp_area(spg, ALIGN_DOWN(uva, PAGE_SIZE)); if (!spa) { ret = -EINVAL; pr_debug("invalid input uva %lx in unshare uva\n", (unsigned long)uva); @@ -3639,6 +3632,7 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size) out_drop_area: __sp_area_drop(spa); out: + sp_group_drop(spg); return ret; }
@@ -3702,9 +3696,12 @@ int sp_unshare(unsigned long va, unsigned long size, int pid, int spg_id)
check_interrupt_context();
+ if (current->flags & PF_KTHREAD) + return -EINVAL; + if (va < TASK_SIZE) { /* user address */ - ret = sp_unshare_uva(va, size); + ret = sp_unshare_uva(va, size, spg_id); } else if (va >= PAGE_OFFSET) { /* kernel address */ ret = sp_unshare_kva(va, size); @@ -3718,9 +3715,9 @@ int sp_unshare(unsigned long va, unsigned long size, int pid, int spg_id) } EXPORT_SYMBOL_GPL(sp_unshare);
-int mg_sp_unshare(unsigned long va, unsigned long size) +int mg_sp_unshare(unsigned long va, unsigned long size, int id) { - return sp_unshare(va, size, 0, 0); + return sp_unshare(va, size, 0, id); } EXPORT_SYMBOL_GPL(mg_sp_unshare);
@@ -3880,8 +3877,8 @@ int sp_node_id(struct vm_area_struct *vma) if (!sp_is_enabled()) return node_id;
- if (vma) { - spa = __find_sp_area(vma->vm_start); + if (vma && vma->vm_flags & VM_SHARE_POOL) { + spa = __find_sp_area(vma->vm_mm->sp_group_master->local, vma->vm_start); if (spa) { node_id = spa->node_id; __sp_area_drop(spa); @@ -4047,7 +4044,7 @@ static void print_process_prot(struct seq_file *seq, unsigned long prot) seq_puts(seq, "R"); else if (prot == (PROT_READ | PROT_WRITE)) seq_puts(seq, "RW"); - else /* e.g. spg_none */ + else seq_puts(seq, "-"); }
@@ -4448,7 +4445,7 @@ vm_fault_t sharepool_no_page(struct mm_struct *mm, int node_id; struct sp_area *spa;
- spa = __find_sp_area(vma->vm_start); + spa = __find_sp_area(mm->sp_group_master->local, vma->vm_start); if (!spa) { pr_err("share pool: vma is invalid, not from sp mmap\n"); return ret;
From: Zhou Guanghui zhouguanghui1@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S CVE: NA
-------------------------------------------------
The management of the address space is adjusted, and the statistical data processing of the shared pool needs to be adapted.
Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com Signed-off-by: Zhang Jian zhangjian210@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- mm/share_pool.c | 69 ++++++++++++++++++++++++++++--------------------- 1 file changed, 40 insertions(+), 29 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 45cdb77a4299..e673b05bda5a 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -699,7 +699,6 @@ struct sp_area { int device_id; }; static DEFINE_SPINLOCK(sp_area_lock); -static struct rb_root sp_area_root = RB_ROOT;
static unsigned long spa_size(struct sp_area *spa) { @@ -4106,14 +4105,13 @@ int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, return 0; }
-static void rb_spa_stat_show(struct seq_file *seq) +static void spa_stat_of_mapping_show(struct seq_file *seq, struct sp_mapping *spm) { struct rb_node *node; struct sp_area *spa, *prev = NULL;
spin_lock(&sp_area_lock); - - for (node = rb_first(&sp_area_root); node; node = rb_next(node)) { + for (node = rb_first(&spm->area_root); node; node = rb_next(node)) { __sp_area_drop_locked(prev);
spa = rb_entry(node, struct sp_area, rb_node); @@ -4121,16 +4119,12 @@ static void rb_spa_stat_show(struct seq_file *seq) atomic_inc(&spa->use_count); spin_unlock(&sp_area_lock);
- if (is_local_group(spa->spg->id)) /* k2u to task */ - seq_printf(seq, "%-10s ", "None"); - else { - down_read(&spa->spg->rw_lock); - if (spg_valid(spa->spg)) /* k2u to group */ - seq_printf(seq, "%-10d ", spa->spg->id); - else /* spg is dead */ - seq_printf(seq, "%-10s ", "Dead"); - up_read(&spa->spg->rw_lock); - } + down_read(&spa->spg->rw_lock); + if (spg_valid(spa->spg)) /* k2u to group */ + seq_printf(seq, "%-10d ", spa->spg->id); + else /* spg is dead */ + seq_printf(seq, "%-10s ", "Dead"); + up_read(&spa->spg->rw_lock);
seq_printf(seq, "%2s%-14lx %2s%-14lx %-10ld ", "0x", spa->va_start, @@ -4166,6 +4160,30 @@ static void rb_spa_stat_show(struct seq_file *seq) spin_unlock(&sp_area_lock); }
+static void spa_normal_stat_show(struct seq_file *seq) +{ + spa_stat_of_mapping_show(seq, sp_mapping_normal); +} + +static int idr_spg_dvpp_stat_show_cb(int id, void *p, void *data) +{ + struct sp_group *spg = p; + struct seq_file *seq = data; + + if (!is_local_group(spg->id) || atomic_read(&spg->dvpp->user) == 1) + spa_stat_of_mapping_show(seq, spg->dvpp); + + return 0; +} + +static void spa_dvpp_stat_show(struct seq_file *seq) +{ + down_read(&sp_group_sem); + idr_for_each(&sp_group_idr, idr_spg_dvpp_stat_show_cb, seq); + up_read(&sp_group_sem); +} + + void spa_overview_show(struct seq_file *seq) { unsigned int total_num, alloc_num, k2u_task_num, k2u_spg_num; @@ -4219,12 +4237,11 @@ static int idr_spg_stat_cb(int id, void *p, void *data) struct sp_spg_stat *s = p; struct seq_file *seq = data;
- if (seq != NULL) { - if (id == 0) - seq_puts(seq, "Non Group "); - else - seq_printf(seq, "Group %6d ", id); + if (is_local_group(id) && atomic64_read(&s->size) == 0) + return 0;
+ if (seq != NULL) { + seq_printf(seq, "Group %6d ", id); seq_printf(seq, "size: %lld KB, spa num: %d, total alloc: %lld KB, normal alloc: %lld KB, huge alloc: %lld KB\n", byte2kb(atomic64_read(&s->size)), atomic_read(&s->spa_num), @@ -4232,11 +4249,7 @@ static int idr_spg_stat_cb(int id, void *p, void *data) byte2kb(atomic64_read(&s->alloc_nsize)), byte2kb(atomic64_read(&s->alloc_hsize))); } else { - if (id == 0) - pr_info("Non Group "); - else - pr_info("Group %6d ", id); - + pr_info("Group %6d ", id); pr_info("size: %lld KB, spa num: %d, total alloc: %lld KB, normal alloc: %lld KB, huge alloc: %lld KB\n", byte2kb(atomic64_read(&s->size)), atomic_read(&s->spa_num), @@ -4280,7 +4293,8 @@ static int spa_stat_show(struct seq_file *seq, void *offset) /* print the file header */ seq_printf(seq, "%-10s %-16s %-16s %-10s %-7s %-5s %-8s %-8s\n", "Group ID", "va_start", "va_end", "Size(KB)", "Type", "Huge", "PID", "Ref"); - rb_spa_stat_show(seq); + spa_normal_stat_show(seq); + spa_dvpp_stat_show(seq); return 0; }
@@ -4317,10 +4331,7 @@ static int idr_proc_stat_cb(int id, void *p, void *data) prot = get_process_prot_locked(id, mm);
seq_printf(seq, "%-8d ", tgid); - if (id == 0) - seq_printf(seq, "%-8c ", '-'); - else - seq_printf(seq, "%-8d ", id); + seq_printf(seq, "%-8d ", id); seq_printf(seq, "%-9ld %-9ld %-9ld %-10ld %-10ld %-8ld %-7ld %-7ld %-10ld ", get_spg_proc_alloc(spg_proc_stat), get_spg_proc_k2u(spg_proc_stat),
From: Wang Wensheng wangwensheng4@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S CVE: NA
-------------------------------------------------
When we destroy a vma, we first find the spa depending on the vma->vm_start, during which we should hold the sp_area_lock. While we store the spa in vma, we can get the spa directly. Don't worry if the spa exists or if it's to be freed soon, since we have increaced the refcount for the spa when it's mappend into a vma.
Signed-off-by: Wang Wensheng wangwensheng4@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- mm/share_pool.c | 45 ++++++++++++++++----------------------------- 1 file changed, 16 insertions(+), 29 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index e673b05bda5a..c7b185739b61 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -855,7 +855,7 @@ static inline bool check_aoscore_process(struct task_struct *tsk)
static unsigned long sp_mmap(struct mm_struct *mm, struct file *file, struct sp_area *spa, unsigned long *populate, - unsigned long prot); + unsigned long prot, struct vm_area_struct **pvma); static void sp_munmap(struct mm_struct *mm, unsigned long addr, unsigned long size);
#define K2U_NORMAL 0 @@ -1504,7 +1504,7 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) break; }
- addr = sp_mmap(mm, file, spa, &populate, prot); + addr = sp_mmap(mm, file, spa, &populate, prot, NULL); if (IS_ERR_VALUE(addr)) { sp_munmap_task_areas(mm, spg, &spa->link); up_write(&mm->mmap_lock); @@ -2034,8 +2034,6 @@ static void __sp_area_drop(struct sp_area *spa)
void sp_area_drop(struct vm_area_struct *vma) { - struct sp_area *spa; - if (!(vma->vm_flags & VM_SHARE_POOL)) return;
@@ -2047,8 +2045,7 @@ void sp_area_drop(struct vm_area_struct *vma) * an atomic operation. */ spin_lock(&sp_area_lock); - spa = __find_sp_area_locked(vma->vm_mm->sp_group_master->local, vma->vm_start); - __sp_area_drop_locked(spa); + __sp_area_drop_locked(vma->vm_private_data); spin_unlock(&sp_area_lock); }
@@ -2315,7 +2312,7 @@ EXPORT_SYMBOL_GPL(mg_sp_free); /* wrapper of __do_mmap() and the caller must hold down_write(&mm->mmap_lock). */ static unsigned long sp_mmap(struct mm_struct *mm, struct file *file, struct sp_area *spa, unsigned long *populate, - unsigned long prot) + unsigned long prot, struct vm_area_struct **pvma) { unsigned long addr = spa->va_start; unsigned long size = spa_size(spa); @@ -2323,6 +2320,7 @@ static unsigned long sp_mmap(struct mm_struct *mm, struct file *file, MAP_SHARE_POOL; unsigned long vm_flags = VM_NORESERVE | VM_SHARE_POOL | VM_DONTCOPY; unsigned long pgoff = addr_offset(spa) >> PAGE_SHIFT; + struct vm_area_struct *vma;
/* Mark the mapped region to be locked. After the MAP_LOCKED is enable, * multiple tasks will preempt resources, causing performance loss. @@ -2338,8 +2336,13 @@ static unsigned long sp_mmap(struct mm_struct *mm, struct file *file, pr_err("do_mmap fails %ld\n", addr); } else { BUG_ON(addr != spa->va_start); + vma = find_vma(mm, addr); + vma->vm_private_data = spa; + if (pvma) + *pvma = vma; }
+ return addr; }
@@ -2484,7 +2487,6 @@ static int sp_alloc_mmap(struct mm_struct *mm, struct sp_area *spa, unsigned long mmap_addr; /* pass through default permission */ unsigned long prot = PROT_READ | PROT_WRITE; - unsigned long sp_addr = spa->va_start; unsigned long populate = 0; struct vm_area_struct *vma;
@@ -2503,7 +2505,7 @@ static int sp_alloc_mmap(struct mm_struct *mm, struct sp_area *spa, prot = PROT_READ;
/* when success, mmap_addr == spa->va_start */ - mmap_addr = sp_mmap(mm, spa_file(spa), spa, &populate, prot); + mmap_addr = sp_mmap(mm, spa_file(spa), spa, &populate, prot, &vma); if (IS_ERR_VALUE(mmap_addr)) { up_write(&mm->mmap_lock); sp_alloc_unmap(mm, spa, spg_node); @@ -2519,14 +2521,6 @@ static int sp_alloc_mmap(struct mm_struct *mm, struct sp_area *spa, } ac->populate = populate;
- vma = find_vma(mm, sp_addr); - if (unlikely(!vma)) { - up_write(&mm->mmap_lock); - WARN(1, "allocation failed, can't find %lx vma\n", sp_addr); - ret = -EINVAL; - goto unmap; - } - if (ac->sp_flags & SP_PROT_RO) vma->vm_flags &= ~VM_MAYWRITE;
@@ -2825,15 +2819,12 @@ static unsigned long sp_remap_kva_to_vma(unsigned long kva, struct sp_area *spa, if (kc && kc->sp_flags & SP_PROT_RO) prot = PROT_READ;
- ret_addr = sp_mmap(mm, spa_file(spa), spa, &populate, prot); + ret_addr = sp_mmap(mm, spa_file(spa), spa, &populate, prot, &vma); if (IS_ERR_VALUE(ret_addr)) { pr_debug("k2u mmap failed %lx\n", ret_addr); goto put_mm; } - BUG_ON(ret_addr != spa->va_start);
- vma = find_vma(mm, ret_addr); - BUG_ON(vma == NULL); if (prot & PROT_WRITE) vma->vm_page_prot = __pgprot(((~PTE_RDONLY) & vma->vm_page_prot.pgprot) | PTE_DIRTY);
@@ -3876,12 +3867,9 @@ int sp_node_id(struct vm_area_struct *vma) if (!sp_is_enabled()) return node_id;
- if (vma && vma->vm_flags & VM_SHARE_POOL) { - spa = __find_sp_area(vma->vm_mm->sp_group_master->local, vma->vm_start); - if (spa) { - node_id = spa->node_id; - __sp_area_drop(spa); - } + if (vma && vma->vm_flags & VM_SHARE_POOL && vma->vm_private_data) { + spa = vma->vm_private_data; + node_id = spa->node_id; }
return node_id; @@ -4456,13 +4444,12 @@ vm_fault_t sharepool_no_page(struct mm_struct *mm, int node_id; struct sp_area *spa;
- spa = __find_sp_area(mm->sp_group_master->local, vma->vm_start); + spa = vma->vm_private_data; if (!spa) { pr_err("share pool: vma is invalid, not from sp mmap\n"); return ret; } node_id = spa->node_id; - __sp_area_drop(spa);
retry: page = find_lock_page(mapping, idx);
From: Wang Wensheng wangwensheng4@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S CVE: NA
-------------------------------------------------
There are two types of memory allocated from sharepool: passthrough memory for DVPP and shared memory. Currently, we branch to different routines depending on the memory type, both during the allocation and free process. Since we have already create a local group for passthrough memory, with just one step ahead, we could drop the redundant branches in allocation and free process and in all the fallback process when an error occurs.
Here is the content of this patch: 1. Add erery process to its local group when initilizing its group_master. 2. Avoid to return the local group in find_sp_group_id_by_pid(). 3. Delete the redundant branches during allocation and free process.
Signed-off-by: Wang Wensheng wangwensheng4@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- mm/share_pool.c | 139 ++++++++++++++++++++++++++++++------------------ 1 file changed, 87 insertions(+), 52 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index c7b185739b61..b627c9347f78 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -249,13 +249,15 @@ static int sp_mapping_group_setup(struct mm_struct *mm, struct sp_group *spg) return 0; }
+static void free_sp_group_locked(struct sp_group *spg); +static int local_group_add_task(struct mm_struct *mm, struct sp_group *spg); static struct sp_group *create_spg(int spg_id); static void free_new_spg_id(bool new, int spg_id); /* The caller must hold sp_group_sem */ static struct sp_group_master *sp_init_group_master_locked( struct mm_struct *mm, bool *exist) { - int spg_id; + int spg_id, ret; struct sp_group *spg; struct sp_group_master *master = mm->sp_group_master;
@@ -271,16 +273,15 @@ static struct sp_group_master *sp_init_group_master_locked( spg_id = ida_alloc_range(&sp_group_id_ida, SPG_ID_LOCAL_MIN, SPG_ID_LOCAL_MAX, GFP_ATOMIC); if (spg_id < 0) { - kfree(master); pr_err_ratelimited("generate local group id failed %d\n", spg_id); - return ERR_PTR(spg_id); + ret = spg_id; + goto free_master; }
spg = create_spg(spg_id); if (IS_ERR(spg)) { - free_new_spg_id(true, spg_id); - kfree(master); - return (struct sp_group_master *)spg; + ret = PTR_ERR(spg); + goto free_spg_id; }
INIT_LIST_HEAD(&master->node_list); @@ -290,8 +291,20 @@ static struct sp_group_master *sp_init_group_master_locked( master->local = spg; mm->sp_group_master = master;
+ ret = local_group_add_task(mm, spg); + if (ret < 0) + goto free_spg; + *exist = false; return master; + +free_spg: + free_sp_group_locked(spg); +free_spg_id: + free_new_spg_id(true, spg_id); +free_master: + kfree(master); + return ERR_PTR(ret); }
static inline bool is_local_group(int spg_id) @@ -670,6 +683,8 @@ static struct sp_overall_stat sp_overall_stat;
enum spa_type { SPA_TYPE_ALLOC = 1, + /* NOTE: reorganize after the statisical structure is reconstructed. */ + SPA_TYPE_ALLOC_PRIVATE = SPA_TYPE_ALLOC, SPA_TYPE_K2TASK, SPA_TYPE_K2SPG, }; @@ -1037,7 +1052,7 @@ EXPORT_SYMBOL_GPL(sp_group_id_by_pid); */ int mg_sp_group_id_by_pid(int pid, int *spg_ids, int *num) { - int ret = 0; + int ret = 0, real_count; struct sp_group_node *node; struct sp_group_master *master = NULL; struct task_struct *tsk; @@ -1062,18 +1077,28 @@ int mg_sp_group_id_by_pid(int pid, int *spg_ids, int *num) goto out_up_read; }
- if (!master->count) { + /* + * There is a local group for each process which is used for + * passthrough allocation. The local group is a internal + * implementation for convenience and is not attempt to bother + * the user. + */ + real_count = master->count - 1; + if (real_count <= 0) { ret = -ENODEV; goto out_up_read; } - if ((unsigned int)*num < master->count) { + if ((unsigned int)*num < real_count) { ret = -E2BIG; goto out_up_read; } - *num = master->count; + *num = real_count;
- list_for_each_entry(node, &master->node_list, group_node) + list_for_each_entry(node, &master->node_list, group_node) { + if (is_local_group(node->spg->id)) + continue; *(spg_ids++) = node->spg->id; + }
out_up_read: up_read(&sp_group_sem); @@ -1245,7 +1270,7 @@ static int mm_add_group_init(struct mm_struct *mm, struct sp_group *spg) return -EEXIST; }
- if (master->count + 1 == MAX_GROUP_FOR_TASK) { + if (master->count == MAX_GROUP_FOR_TASK) { pr_err("task reaches max group num\n"); return -ENOSPC; } @@ -1289,6 +1314,29 @@ static int insert_spg_node(struct sp_group *spg, struct sp_group_node *node) return 0; }
+static int local_group_add_task(struct mm_struct *mm, struct sp_group *spg) +{ + struct sp_group_node *node; + struct spg_proc_stat *stat; + + node = create_spg_node(mm, PROT_READ | PROT_WRITE, spg); + if (IS_ERR(node)) + return PTR_ERR(node); + + /* use current just to avoid compile error, rebuild in following patch */ + stat = sp_init_process_stat(current, mm, spg); + if (IS_ERR(stat)) { + free_sp_group_locked(spg); + pr_err_ratelimited("init process stat failed %lx\n", PTR_ERR(stat)); + return PTR_ERR(stat); + } + + insert_spg_node(spg, node); + mmget(mm); + + return 0; +} + /* the caller must down_write(&spg->rw_lock) */ static void delete_spg_node(struct sp_group *spg, struct sp_group_node *node) { @@ -2160,15 +2208,10 @@ static void sp_fallocate(struct sp_area *spa)
static void sp_free_unmap_fallocate(struct sp_area *spa) { - if (!is_local_group(spa->spg->id)) { - down_read(&spa->spg->rw_lock); - __sp_free(spa->spg, spa->va_start, spa_size(spa), NULL); - sp_fallocate(spa); - up_read(&spa->spg->rw_lock); - } else { - sp_munmap(current->mm, spa->va_start, spa_size(spa)); - sp_fallocate(spa); - } + down_read(&spa->spg->rw_lock); + __sp_free(spa->spg, spa->va_start, spa_size(spa), NULL); + sp_fallocate(spa); + up_read(&spa->spg->rw_lock); }
static int sp_check_caller_permission(struct sp_group *spg, struct mm_struct *mm) @@ -2176,9 +2219,10 @@ static int sp_check_caller_permission(struct sp_group *spg, struct mm_struct *mm int ret = 0;
down_read(&spg->rw_lock); - if (!is_local_group(spg->id) && !is_process_in_group(spg, mm)) + if (!is_process_in_group(spg, mm)) ret = -EPERM; up_read(&spg->rw_lock); + return ret; }
@@ -2363,6 +2407,7 @@ struct sp_alloc_context { struct timespec64 start; struct timespec64 end; bool have_mbind; + enum spa_type type; };
static void trace_sp_alloc_begin(struct sp_alloc_context *ac) @@ -2450,10 +2495,13 @@ static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags, pr_err_ratelimited("allocation failed, task not in group\n"); return -ENODEV; } + ac->type = SPA_TYPE_ALLOC; } else { /* allocation pass through scene */ spg = sp_get_local_group(current->mm); if (IS_ERR(spg)) return PTR_ERR(spg); + down_read(&spg->rw_lock); + ac->type = SPA_TYPE_ALLOC_PRIVATE; }
if (sp_flags & SP_HUGEPAGE) { @@ -2476,8 +2524,7 @@ static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags, static void sp_alloc_unmap(struct mm_struct *mm, struct sp_area *spa, struct sp_group_node *spg_node) { - if (!is_local_group(spa->spg->id)) - __sp_free(spa->spg, spa->va_start, spa->real_size, mm); + __sp_free(spa->spg, spa->va_start, spa->real_size, mm); }
static int sp_alloc_mmap(struct mm_struct *mm, struct sp_area *spa, @@ -2532,10 +2579,7 @@ static int sp_alloc_mmap(struct mm_struct *mm, struct sp_area *spa, return ret;
unmap: - if (!is_local_group(spa->spg->id)) - sp_alloc_unmap(list_next_entry(spg_node, proc_node)->master->mm, spa, spg_node); - else - sp_munmap(mm, spa->va_start, spa->real_size); + sp_alloc_unmap(list_next_entry(spg_node, proc_node)->master->mm, spa, spg_node); return ret; }
@@ -2635,10 +2679,7 @@ static int __sp_alloc_mmap_populate(struct mm_struct *mm, struct sp_area *spa, ret = sp_alloc_populate(mm, spa, ac); if (ret) { err: - if (!is_local_group(spa->spg->id)) - sp_alloc_unmap(list_next_entry(spg_node, proc_node)->master->mm, spa, spg_node); - else - sp_munmap(mm, spa->va_start, spa->real_size); + sp_alloc_unmap(list_next_entry(spg_node, proc_node)->master->mm, spa, spg_node);
if (unlikely(fatal_signal_pending(current))) pr_warn_ratelimited("allocation failed, current thread is killed\n"); @@ -2661,34 +2702,30 @@ static int sp_alloc_mmap_populate(struct sp_area *spa, struct mm_struct *mm; struct sp_group_node *spg_node;
- if (is_local_group(spa->spg->id)) { - ret = __sp_alloc_mmap_populate(current->mm, spa, NULL, ac); - } else { - /* create mapping for each process in the group */ - list_for_each_entry(spg_node, &spa->spg->procs, proc_node) { - mm = spg_node->master->mm; - mmap_ret = __sp_alloc_mmap_populate(mm, spa, spg_node, ac); - if (mmap_ret) { - if (ac->state != ALLOC_COREDUMP) - return mmap_ret; - ac->state = ALLOC_NORMAL; - continue; - } - ret = mmap_ret; + /* create mapping for each process in the group */ + list_for_each_entry(spg_node, &spa->spg->procs, proc_node) { + mm = spg_node->master->mm; + mmap_ret = __sp_alloc_mmap_populate(mm, spa, spg_node, ac); + if (mmap_ret) { + if (ac->state != ALLOC_COREDUMP) + return mmap_ret; + ac->state = ALLOC_NORMAL; + continue; } + ret = mmap_ret; } + return ret; }
/* spa maybe an error pointer, so introduce variable spg */ static void sp_alloc_finish(int result, struct sp_area *spa, - struct sp_alloc_context *ac) + struct sp_alloc_context *ac) { struct sp_group *spg = ac->spg;
/* match sp_alloc_prepare */ - if (!is_local_group(spg->id)) - up_read(&spg->rw_lock); + up_read(&spg->rw_lock);
if (!result) sp_update_process_stat(current, true, spa); @@ -2728,7 +2765,7 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id)
try_again: spa = sp_alloc_area(ac.size_aligned, ac.sp_flags, ac.spg, - SPA_TYPE_ALLOC, current->tgid); + ac.type, current->tgid); if (IS_ERR(spa)) { pr_err_ratelimited("alloc spa failed in allocation(potential no enough virtual memory when -75): %ld\n", PTR_ERR(spa)); @@ -4650,8 +4687,6 @@ void sp_group_post_exit(struct mm_struct *mm) } up_write(&sp_group_sem);
- if (master->local) - sp_group_drop(master->local); kfree(master); }
From: Wang Wensheng wangwensheng4@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S CVE: NA
-------------------------------------------------
A few structures must have been created when a process want to get into sharepool subsystem, including allocating sharepool memory, being added into a spg or doing k2u and so on.
Currently we create those structures just before we actually need them. For example, we find or create a sp_spa_stat after a successful memory allocation and before updating the statistical structure. The creation of a new structure may fail due to oom and we should then reclaim the memory allocated and revert all the process before. Or we just forget to do that and a potential memory-leak occurs. This design makes it confused when we indeed create a structure and we always worry about potential memory-leak when we changes the code around it.
A better solution is to initialize all that structures at the same time when a process join in sharepool subsystem. And in future, we will clear the unnecessary statistical structures.
Signed-off-by: Wang Wensheng wangwensheng4@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- include/linux/share_pool.h | 4 - mm/share_pool.c | 278 ++++++++++++++++--------------------- 2 files changed, 116 insertions(+), 166 deletions(-)
diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index 8eb964230fd4..51710669b1a7 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -507,10 +507,6 @@ static inline struct sp_proc_stat *sp_get_proc_stat_ref(struct mm_struct *mm) return NULL; }
-static inline void sp_proc_stat_drop(struct sp_proc_stat *stat) -{ -} - static inline void spa_overview_show(struct seq_file *seq) { } diff --git a/mm/share_pool.c b/mm/share_pool.c index b627c9347f78..75339e9d130e 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -249,33 +249,22 @@ static int sp_mapping_group_setup(struct mm_struct *mm, struct sp_group *spg) return 0; }
-static void free_sp_group_locked(struct sp_group *spg); -static int local_group_add_task(struct mm_struct *mm, struct sp_group *spg); static struct sp_group *create_spg(int spg_id); static void free_new_spg_id(bool new, int spg_id); -/* The caller must hold sp_group_sem */ -static struct sp_group_master *sp_init_group_master_locked( - struct mm_struct *mm, bool *exist) +static void free_sp_group_locked(struct sp_group *spg); +static int local_group_add_task(struct mm_struct *mm, struct sp_group *spg); +static int init_local_group(struct mm_struct *mm) { int spg_id, ret; struct sp_group *spg; + struct sp_mapping *spm; struct sp_group_master *master = mm->sp_group_master;
- if (master) { - *exist = true; - return master; - } - - master = kmalloc(sizeof(struct sp_group_master), GFP_KERNEL); - if (master == NULL) - return ERR_PTR(-ENOMEM); - spg_id = ida_alloc_range(&sp_group_id_ida, SPG_ID_LOCAL_MIN, SPG_ID_LOCAL_MAX, GFP_ATOMIC); if (spg_id < 0) { pr_err_ratelimited("generate local group id failed %d\n", spg_id); - ret = spg_id; - goto free_master; + return spg_id; }
spg = create_spg(spg_id); @@ -284,60 +273,73 @@ static struct sp_group_master *sp_init_group_master_locked( goto free_spg_id; }
- INIT_LIST_HEAD(&master->node_list); - master->count = 0; - master->stat = NULL; - master->mm = mm; master->local = spg; - mm->sp_group_master = master; + spm = sp_mapping_create(SP_MAPPING_DVPP); + if (IS_ERR(spm)) { + ret = PTR_ERR(spm); + goto free_spg; + } + sp_mapping_attach(master->local, spm); + sp_mapping_attach(master->local, sp_mapping_normal);
ret = local_group_add_task(mm, spg); if (ret < 0) + /* The spm would be released while destroying the spg*/ goto free_spg;
- *exist = false; - return master; + return 0;
free_spg: free_sp_group_locked(spg); + master->local = NULL; free_spg_id: free_new_spg_id(true, spg_id); -free_master: - kfree(master); - return ERR_PTR(ret); -}
-static inline bool is_local_group(int spg_id) -{ - return spg_id >= SPG_ID_LOCAL_MIN && spg_id <= SPG_ID_LOCAL_MAX; + return ret; }
-/* - * If the process is added to a group first, the address space of the local - * group of the process must have been set. If the process is not added to - * a group, directly create or attach the process to the corresponding DVPP - * and normal address space. - */ -static int sp_mapping_group_setup_local(struct mm_struct *mm) +static void sp_proc_stat_drop(struct sp_proc_stat *stat); +static int sp_init_proc_stat(struct mm_struct *mm, struct task_struct *tsk); +/* The caller must hold sp_group_sem */ +static int sp_init_group_master_locked(struct task_struct *tsk, struct mm_struct *mm) { + int ret; struct sp_group_master *master; - struct sp_mapping *spm; - bool exist = false; - - master = sp_init_group_master_locked(mm, &exist); - if (IS_ERR(master)) - return PTR_ERR(master);
- if (master->local->dvpp) + if (mm->sp_group_master) return 0;
- spm = sp_mapping_create(SP_MAPPING_DVPP); - if (IS_ERR(spm)) - return PTR_ERR(spm); - sp_mapping_attach(master->local, spm); - sp_mapping_attach(master->local, sp_mapping_normal); + master = kmalloc(sizeof(struct sp_group_master), GFP_KERNEL); + if (!master) + return -ENOMEM; + + INIT_LIST_HEAD(&master->node_list); + master->count = 0; + master->mm = mm; + mm->sp_group_master = master; + + ret = sp_init_proc_stat(mm, tsk); + if (ret) + goto free_master; + + ret = init_local_group(mm); + if (ret) + goto put_stat;
return 0; + +put_stat: + sp_proc_stat_drop(master->stat); +free_master: + mm->sp_group_master = NULL; + kfree(master); + + return ret; +} + +static inline bool is_local_group(int spg_id) +{ + return spg_id >= SPG_ID_LOCAL_MIN && spg_id <= SPG_ID_LOCAL_MAX; }
static struct sp_group *sp_get_local_group(struct mm_struct *mm) @@ -355,7 +357,7 @@ static struct sp_group *sp_get_local_group(struct mm_struct *mm) up_read(&sp_group_sem);
down_write(&sp_group_sem); - ret = sp_mapping_group_setup_local(mm); + ret = sp_init_group_master_locked(current, mm); if (ret) { up_write(&sp_group_sem); return ERR_PTR(ret); @@ -403,37 +405,29 @@ static struct sp_proc_stat *create_proc_stat(struct mm_struct *mm, return stat; }
-static struct sp_proc_stat *sp_init_proc_stat(struct sp_group_master *master, - struct mm_struct *mm, struct task_struct *tsk) +static int sp_init_proc_stat(struct mm_struct *mm, struct task_struct *tsk) { struct sp_proc_stat *stat; int alloc_id, tgid = tsk->tgid; - - down_write(&sp_proc_stat_sem); - stat = master->stat; - if (stat) { - up_write(&sp_proc_stat_sem); - return stat; - } + struct sp_group_master *master = mm->sp_group_master;
stat = create_proc_stat(mm, tsk); - if (IS_ERR(stat)) { - up_write(&sp_proc_stat_sem); - return stat; - } + if (IS_ERR(stat)) + return PTR_ERR(stat);
+ down_write(&sp_proc_stat_sem); alloc_id = idr_alloc(&sp_proc_stat_idr, stat, tgid, tgid + 1, GFP_KERNEL); if (alloc_id < 0) { up_write(&sp_proc_stat_sem); pr_err_ratelimited("proc stat idr alloc failed %d\n", alloc_id); kfree(stat); - return ERR_PTR(alloc_id); + return alloc_id; }
master->stat = stat; up_write(&sp_proc_stat_sem);
- return stat; + return 0; }
static void update_spg_stat_alloc(unsigned long size, bool inc, @@ -547,18 +541,14 @@ static struct spg_proc_stat *create_spg_proc_stat(int tgid, int spg_id) return stat; }
-static struct spg_proc_stat *sp_init_spg_proc_stat( - struct sp_proc_stat *proc_stat, int tgid, struct sp_group *spg) +static struct spg_proc_stat *sp_init_spg_proc_stat(struct sp_proc_stat *proc_stat, + struct sp_group *spg) { struct spg_proc_stat *stat; int spg_id = spg->id; /* visit spg id locklessly */ struct sp_spg_stat *spg_stat = spg->stat;
- stat = find_spg_proc_stat(proc_stat, tgid, spg_id); - if (stat) - return stat; - - stat = create_spg_proc_stat(tgid, spg_id); + stat = create_spg_proc_stat(proc_stat->tgid, spg_id); if (IS_ERR(stat)) return stat;
@@ -575,31 +565,6 @@ static struct spg_proc_stat *sp_init_spg_proc_stat( return stat; }
-/* - * The caller must - * 1. ensure no concurrency problem for task_struct and mm_struct. - * 2. hold sp_group_sem for sp_group_master (pay attention to ABBA deadlock) - */ -static struct spg_proc_stat *sp_init_process_stat(struct task_struct *tsk, - struct mm_struct *mm, struct sp_group *spg) -{ - struct sp_group_master *master; - bool exist; - struct sp_proc_stat *proc_stat; - struct spg_proc_stat *spg_proc_stat; - - master = sp_init_group_master_locked(mm, &exist); - if (IS_ERR(master)) - return (struct spg_proc_stat *)master; - - proc_stat = sp_init_proc_stat(master, mm, tsk); - if (IS_ERR(proc_stat)) - return (struct spg_proc_stat *)proc_stat; - - spg_proc_stat = sp_init_spg_proc_stat(proc_stat, tsk->tgid, spg); - return spg_proc_stat; -} - static struct sp_spg_stat *create_spg_stat(int spg_id) { struct sp_spg_stat *stat; @@ -846,9 +811,9 @@ static void sp_update_process_stat(struct task_struct *tsk, bool inc, enum spa_type type = spa->type;
down_write(&sp_group_sem); - stat = sp_init_process_stat(tsk, tsk->mm, spa->spg); + stat = find_spg_proc_stat(tsk->mm->sp_group_master->stat, tsk->tgid, spa->spg->id); up_write(&sp_group_sem); - if (unlikely(IS_ERR(stat))) + if (!stat) return;
update_spg_proc_stat(size, inc, stat, type); @@ -1253,26 +1218,27 @@ static void sp_munmap_task_areas(struct mm_struct *mm, struct sp_group *spg, str }
/* the caller must hold sp_group_sem */ -static int mm_add_group_init(struct mm_struct *mm, struct sp_group *spg) +static int mm_add_group_init(struct task_struct *tsk, struct mm_struct *mm, + struct sp_group *spg) { - struct sp_group_master *master = mm->sp_group_master; - bool exist = false; - - master = sp_init_group_master_locked(mm, &exist); - if (IS_ERR(master)) - return PTR_ERR(master); - - if (!exist) - return 0; + int ret; + struct sp_group_master *master;
- if (is_process_in_group(spg, mm)) { - pr_err_ratelimited("task already in target group, id=%d\n", spg->id); - return -EEXIST; - } + if (!mm->sp_group_master) { + ret = sp_init_group_master_locked(tsk, mm); + if (ret) + return ret; + } else { + if (is_process_in_group(spg, mm)) { + pr_err_ratelimited("task already in target group, id=%d\n", spg->id); + return -EEXIST; + }
- if (master->count == MAX_GROUP_FOR_TASK) { - pr_err("task reaches max group num\n"); - return -ENOSPC; + master = mm->sp_group_master; + if (master->count == MAX_GROUP_FOR_TASK) { + pr_err("task reaches max group num\n"); + return -ENOSPC; + } }
return 0; @@ -1311,29 +1277,13 @@ static int insert_spg_node(struct sp_group *spg, struct sp_group_node *node)
spg->proc_num++; list_add_tail(&node->proc_node, &spg->procs); - return 0; -} - -static int local_group_add_task(struct mm_struct *mm, struct sp_group *spg) -{ - struct sp_group_node *node; - struct spg_proc_stat *stat; - - node = create_spg_node(mm, PROT_READ | PROT_WRITE, spg); - if (IS_ERR(node)) - return PTR_ERR(node); - - /* use current just to avoid compile error, rebuild in following patch */ - stat = sp_init_process_stat(current, mm, spg); - if (IS_ERR(stat)) { - free_sp_group_locked(spg); - pr_err_ratelimited("init process stat failed %lx\n", PTR_ERR(stat)); - return PTR_ERR(stat); - } - - insert_spg_node(spg, node); - mmget(mm);
+ /* + * The only way where sp_init_spg_proc_stat got failed is that there is no + * memory for sp_spg_stat. We will avoid this failure when we put sp_spg_stat + * into sp_group_node later. + */ + sp_init_spg_proc_stat(node->master->stat, spg); return 0; }
@@ -1356,6 +1306,20 @@ static void free_spg_node(struct mm_struct *mm, struct sp_group *spg, kfree(spg_node); }
+static int local_group_add_task(struct mm_struct *mm, struct sp_group *spg) +{ + struct sp_group_node *node; + + node = create_spg_node(mm, PROT_READ | PROT_WRITE, spg); + if (IS_ERR(node)) + return PTR_ERR(node); + + insert_spg_node(spg, node); + mmget(mm); + + return 0; +} + /** * sp_group_add_task() - Add a process to an share group (sp_group). * @pid: the pid of the task to be added. @@ -1380,7 +1344,6 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) int ret = 0; bool id_newly_generated = false; struct sp_area *spa, *prev = NULL; - struct spg_proc_stat *stat;
check_interrupt_context();
@@ -1483,29 +1446,26 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) } }
- ret = mm_add_group_init(mm, spg); - if (ret) + down_write(&spg->rw_lock); + ret = mm_add_group_init(tsk, mm, spg); + if (ret) { + up_write(&spg->rw_lock); goto out_drop_group; + }
ret = sp_mapping_group_setup(mm, spg); - if (ret) + if (ret) { + up_write(&spg->rw_lock); goto out_drop_group; + }
node = create_spg_node(mm, prot, spg); if (unlikely(IS_ERR(node))) { + up_write(&spg->rw_lock); ret = PTR_ERR(node); goto out_drop_group; }
- /* per process statistics initialization */ - stat = sp_init_process_stat(tsk, mm, spg); - if (IS_ERR(stat)) { - ret = PTR_ERR(stat); - pr_err_ratelimited("init process stat failed %lx\n", PTR_ERR(stat)); - goto out_drop_spg_node; - } - - down_write(&spg->rw_lock); ret = insert_spg_node(spg, node); if (unlikely(ret)) { up_write(&spg->rw_lock); @@ -1757,7 +1717,7 @@ int sp_id_of_current(void)
down_read(&sp_group_sem); master = current->mm->sp_group_master; - if (master && master->local) { + if (master) { spg_id = master->local->id; up_read(&sp_group_sem); return spg_id; @@ -1765,7 +1725,7 @@ int sp_id_of_current(void) up_read(&sp_group_sem);
down_write(&sp_group_sem); - ret = sp_mapping_group_setup_local(current->mm); + ret = sp_init_group_master_locked(current, current->mm); if (ret) { up_write(&sp_group_sem); return ret; @@ -2924,7 +2884,7 @@ static void *sp_make_share_kva_to_task(unsigned long kva, unsigned long size, un struct sp_group *spg;
down_write(&sp_group_sem); - ret = sp_mapping_group_setup_local(current->mm); + ret = sp_init_group_master_locked(current, current->mm); if (ret) { up_write(&sp_group_sem); pr_err_ratelimited("k2u_task init local mapping failed %d\n", ret); @@ -2932,13 +2892,7 @@ static void *sp_make_share_kva_to_task(unsigned long kva, unsigned long size, un }
spg = current->mm->sp_group_master->local; - stat = sp_init_process_stat(current, current->mm, spg); - if (IS_ERR(stat)) { - up_write(&sp_group_sem); - pr_err_ratelimited("k2u_task init process stat failed %lx\n", - PTR_ERR(stat)); - return stat; - } + stat = find_spg_proc_stat(current->mm->sp_group_master->stat, current->tgid, spg->id); up_write(&sp_group_sem);
spa = sp_alloc_area(size, sp_flags, spg, SPA_TYPE_K2TASK, current->tgid); @@ -3959,7 +3913,7 @@ static void free_sp_proc_stat(struct sp_proc_stat *stat) }
/* the caller make sure stat is not NULL */ -void sp_proc_stat_drop(struct sp_proc_stat *stat) +static void sp_proc_stat_drop(struct sp_proc_stat *stat) { if (atomic_dec_and_test(&stat->use_count)) free_sp_proc_stat(stat);
From: Wang Wensheng wangwensheng4@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S CVE: NA
-------------------------------------------------
1. Add a list for sp_mapping to record all the sp_groups attached to it. 2. Initialize the sp_mapping for local_group when it is created. So when we add a task to a group, we should merge the dvpp mapping of the local group. 3. Every two groups can be merged if and only if at least one of them is empty. Then the empty mapping would be dropped and another mapping would be attached to the two groups. This need to traverse all the groups attached to the mapping. 4. A mapping is considered empty when no spa is allocated from it and its address space is default.
Signed-off-by: Wang Wensheng wangwensheng4@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- include/linux/share_pool.h | 9 ++++-- mm/share_pool.c | 65 +++++++++++++++++++++++--------------- 2 files changed, 47 insertions(+), 27 deletions(-)
diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index 51710669b1a7..f002370ab5f8 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -117,6 +117,9 @@ struct sp_mapping { struct rb_node *free_area_cache; unsigned long cached_hole_size; unsigned long cached_vstart; + + /* list head for all groups attached to this mapping, dvpp mapping only */ + struct list_head group_head; };
/* Processes in the same sp_group can share memory. @@ -160,8 +163,10 @@ struct sp_group { atomic_t use_count; /* protect the group internal elements, except spa_list */ struct rw_semaphore rw_lock; - struct sp_mapping *dvpp; - struct sp_mapping *normal; + /* list node for dvpp mapping */ + struct list_head mnode; + struct sp_mapping *dvpp; + struct sp_mapping *normal; };
/* a per-process(per mm) struct which manages a sp_group_node list */ diff --git a/mm/share_pool.c b/mm/share_pool.c index 75339e9d130e..e38436b25efc 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -168,6 +168,7 @@ static struct sp_mapping *sp_mapping_create(unsigned long flag) sp_mapping_range_init(spm); atomic_set(&spm->user, 0); spm->area_root = RB_ROOT; + INIT_LIST_HEAD(&spm->group_head);
return spm; } @@ -180,18 +181,45 @@ static void sp_mapping_destroy(struct sp_mapping *spm) static void sp_mapping_attach(struct sp_group *spg, struct sp_mapping *spm) { atomic_inc(&spm->user); - if (spm->flag & SP_MAPPING_DVPP) + if (spm->flag & SP_MAPPING_DVPP) { spg->dvpp = spm; - else if (spm->flag & SP_MAPPING_NORMAL) + list_add_tail(&spg->mnode, &spm->group_head); + } else if (spm->flag & SP_MAPPING_NORMAL) spg->normal = spm; }
static void sp_mapping_detach(struct sp_group *spg, struct sp_mapping *spm) { - if (spm && atomic_dec_and_test(&spm->user)) + if (!spm) + return; + if (spm->flag & SP_MAPPING_DVPP) + list_del(&spg->mnode); + if (atomic_dec_and_test(&spm->user)) sp_mapping_destroy(spm); }
+/* merge old mapping to new, and the old mapping would be destroyed */ +static void sp_mapping_merge(struct sp_mapping *new, struct sp_mapping *old) +{ + struct sp_group *spg, *tmp; + + if (new == old) + return; + + list_for_each_entry_safe(spg, tmp, &old->group_head, mnode) { + list_move_tail(&spg->mnode, &new->group_head); + spg->dvpp = new; + } + + atomic_add(atomic_read(&old->user), &new->user); + sp_mapping_destroy(old); +} + +static bool is_mapping_empty(struct sp_mapping *spm) +{ + return RB_EMPTY_ROOT(&spm->area_root); +} + /* * When you set the address space of a group, the normal address space * is globally unified. When processing the DVPP address space, consider @@ -216,32 +244,18 @@ static int sp_mapping_group_setup(struct mm_struct *mm, struct sp_group *spg) { struct sp_group_master *master = mm->sp_group_master; struct sp_group *local = master->local; - struct sp_mapping *spm;
if (!list_empty(&spg->procs)) { - /* 1 */ - if (local->dvpp && local->dvpp != spg->dvpp) { - pr_info_ratelimited("Duplicate address space, id=%d\n", - spg->id); - return 0; - } - - /* 2 */ - if (!local->dvpp) { - sp_mapping_attach(local, spg->dvpp); - sp_mapping_attach(local, spg->normal); + if (is_mapping_empty(local->dvpp)) + sp_mapping_merge(spg->dvpp, local->dvpp); + else if (is_mapping_empty(spg->dvpp)) + sp_mapping_merge(local->dvpp, spg->dvpp); + else { + pr_info_ratelimited("Duplicate address space, id=%d\n", spg->id); + return -EINVAL; } } else { - /* 4 */ - if (!local->dvpp) { - spm = sp_mapping_create(SP_MAPPING_DVPP); - if (IS_ERR(spm)) - return PTR_ERR(spm); - sp_mapping_attach(local, spm); - sp_mapping_attach(local, sp_mapping_normal); - } - - /* 3 */ + /* the mapping of local group is always set */ sp_mapping_attach(spg, local->dvpp); sp_mapping_attach(spg, sp_mapping_normal); } @@ -1121,6 +1135,7 @@ static struct sp_group *create_spg(int spg_id) atomic_set(&spg->use_count, 1); INIT_LIST_HEAD(&spg->procs); INIT_LIST_HEAD(&spg->spa_list); + INIT_LIST_HEAD(&spg->mnode); init_rwsem(&spg->rw_lock);
sprintf(name, "sp_group_%d", spg_id);
From: Wang Wensheng wangwensheng4@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S CVE: NA
-------------------------------------------------
When SPG_NOD_DVPP is specified to sp_group_add_task, we don't create a DVPP mapping for the newly created sp_group. And the new group cannot support allocating DVPP memory.
Signed-off-by: Wang Wensheng wangwensheng4@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- include/linux/share_pool.h | 7 +++-- mm/share_pool.c | 60 ++++++++++++++++++++------------------ 2 files changed, 36 insertions(+), 31 deletions(-)
diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index f002370ab5f8..1911cd35843b 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -43,6 +43,9 @@ #define SPG_ID_LOCAL_MIN 200001 #define SPG_ID_LOCAL_MAX 299999
+#define SPG_FLAG_NON_DVPP (1 << 0) +#define SPG_FLAG_MASK (SPG_FLAG_NON_DVPP) + #define MAX_DEVID 8 /* the max num of Da-vinci devices */
extern int sysctl_share_pool_hugepage_enable; @@ -146,6 +149,7 @@ struct sp_mapping { */ struct sp_group { int id; + unsigned long flag; struct file *file; struct file *file_hugetlb; /* number of process in this group */ @@ -286,9 +290,6 @@ extern bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, in extern bool is_sharepool_addr(unsigned long addr); extern bool mg_is_sharepool_addr(unsigned long addr);
-extern int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id); -extern int sp_group_add_task(int pid, int spg_id); - extern int sp_id_of_current(void); extern int mg_sp_id_of_current(void);
diff --git a/mm/share_pool.c b/mm/share_pool.c index e38436b25efc..8297635c6dac 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -221,31 +221,20 @@ static bool is_mapping_empty(struct sp_mapping *spm) }
/* - * When you set the address space of a group, the normal address space - * is globally unified. When processing the DVPP address space, consider - * the following situations: - * 1. If a process is added to a non-new group, the DVPP address space - * must have been created. If the local group of the process also - * contains the DVPP address space and they are different, this - * scenario is not allowed to avoid address conflict. - * 2. If the DVPP address space does not exist in the local group of the - * process, attach the local group of the process to the DVPP address - * space of the group. - * 3. Add a new group. If the process has applied for the dvpp address - * space (sp_alloc or k2u), attach the new group to the dvpp address - * space of the current process. - * 4. If the process has not applied for the DVPP address space, attach - * the new group and the local group of the current process to the - * newly created DVPP address space. - * + * 1. The mappings of local group is set on creating. + * 2. This is used to setup the mapping for groups created during add_task. + * 3. The normal mapping exists for all groups. + * 4. The dvpp mappings for the new group and local group can merge _iff_ at + * least one of the mapping is empty. * the caller must hold sp_group_sem + * NOTE: undo the mergeing when the later process failed. */ static int sp_mapping_group_setup(struct mm_struct *mm, struct sp_group *spg) { struct sp_group_master *master = mm->sp_group_master; struct sp_group *local = master->local;
- if (!list_empty(&spg->procs)) { + if (!list_empty(&spg->procs) && !(spg->flag & SPG_FLAG_NON_DVPP)) { if (is_mapping_empty(local->dvpp)) sp_mapping_merge(spg->dvpp, local->dvpp); else if (is_mapping_empty(spg->dvpp)) @@ -255,15 +244,17 @@ static int sp_mapping_group_setup(struct mm_struct *mm, struct sp_group *spg) return -EINVAL; } } else { - /* the mapping of local group is always set */ - sp_mapping_attach(spg, local->dvpp); - sp_mapping_attach(spg, sp_mapping_normal); + if (!(spg->flag & SPG_FLAG_NON_DVPP)) + /* the mapping of local group is always set */ + sp_mapping_attach(spg, local->dvpp); + if (!spg->normal) + sp_mapping_attach(spg, sp_mapping_normal); }
return 0; }
-static struct sp_group *create_spg(int spg_id); +static struct sp_group *create_spg(int spg_id, unsigned long flag); static void free_new_spg_id(bool new, int spg_id); static void free_sp_group_locked(struct sp_group *spg); static int local_group_add_task(struct mm_struct *mm, struct sp_group *spg); @@ -281,7 +272,7 @@ static int init_local_group(struct mm_struct *mm) return spg_id; }
- spg = create_spg(spg_id); + spg = create_spg(spg_id, 0); if (IS_ERR(spg)) { ret = PTR_ERR(spg); goto free_spg_id; @@ -1103,7 +1094,7 @@ static bool is_device_addr(unsigned long addr) return false; }
-static struct sp_group *create_spg(int spg_id) +static struct sp_group *create_spg(int spg_id, unsigned long flag) { int ret; struct sp_group *spg; @@ -1117,6 +1108,11 @@ static struct sp_group *create_spg(int spg_id) return ERR_PTR(-ENOSPC); }
+ if (flag & ~SPG_FLAG_MASK) { + pr_err_ratelimited("invalid flag:%#lx\n", flag); + return ERR_PTR(-EINVAL); + } + spg = kzalloc(sizeof(*spg), GFP_KERNEL); if (spg == NULL) return ERR_PTR(-ENOMEM); @@ -1129,6 +1125,7 @@ static struct sp_group *create_spg(int spg_id) }
spg->id = spg_id; + spg->flag = flag; spg->is_alive = true; spg->proc_num = 0; spg->owner = current->group_leader; @@ -1176,14 +1173,14 @@ static struct sp_group *create_spg(int spg_id) }
/* the caller must hold sp_group_sem */ -static struct sp_group *find_or_alloc_sp_group(int spg_id) +static struct sp_group *find_or_alloc_sp_group(int spg_id, unsigned long flag) { struct sp_group *spg;
spg = __sp_find_spg_locked(current->pid, spg_id);
if (!spg) { - spg = create_spg(spg_id); + spg = create_spg(spg_id, flag); } else { down_read(&spg->rw_lock); if (!spg_valid(spg)) { @@ -1336,10 +1333,11 @@ static int local_group_add_task(struct mm_struct *mm, struct sp_group *spg) }
/** - * sp_group_add_task() - Add a process to an share group (sp_group). + * mg_sp_group_add_task() - Add a process to an share group (sp_group). * @pid: the pid of the task to be added. * @prot: the prot of task for this spg. * @spg_id: the ID of the sp_group. + * @flag: to give some special message. * * A process can't be added to more than one sp_group in single group mode * and can in multiple group mode. @@ -1352,6 +1350,7 @@ static int local_group_add_task(struct mm_struct *mm, struct sp_group *spg) */ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) { + unsigned long flag = 0; struct task_struct *tsk; struct mm_struct *mm; struct sp_group *spg; @@ -1445,7 +1444,7 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) goto out_put_task; }
- spg = find_or_alloc_sp_group(spg_id); + spg = find_or_alloc_sp_group(spg_id, flag); if (IS_ERR(spg)) { up_write(&sp_group_sem); ret = PTR_ERR(spg); @@ -1818,6 +1817,11 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, else mapping = spg->normal;
+ if (!mapping) { + pr_err_ratelimited("non DVPP spg, id %d\n", spg->id); + return ERR_PTR(-EINVAL); + } + vstart = mapping->start[device_id]; vend = mapping->end[device_id]; spa = __kmalloc_node(sizeof(struct sp_area), GFP_KERNEL, node_id);
From: Wang Wensheng wangwensheng4@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S CVE: NA
-------------------------------------------------
Currently the dvpp range is global for each device. And it is unreasonable after the reconstruction that makes the DVPP mappings private to each process or group.
This allows to configure the dvpp range for each process. The dvpp range for each dvpp mapping can only be configured once just as the old version.
Signed-off-by: Wang Wensheng wangwensheng4@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- mm/share_pool.c | 66 +++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 58 insertions(+), 8 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 8297635c6dac..1c10b136f20f 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -220,6 +220,17 @@ static bool is_mapping_empty(struct sp_mapping *spm) return RB_EMPTY_ROOT(&spm->area_root); }
+static bool can_mappings_merge(struct sp_mapping *m1, struct sp_mapping *m2) +{ + int i; + + for (i = 0; i < sp_device_number; i++) + if (m1->start[i] != m2->start[i] || m1->end[i] != m2->end[i]) + return false; + + return true; +} + /* * 1. The mappings of local group is set on creating. * 2. This is used to setup the mapping for groups created during add_task. @@ -235,6 +246,11 @@ static int sp_mapping_group_setup(struct mm_struct *mm, struct sp_group *spg) struct sp_group *local = master->local;
if (!list_empty(&spg->procs) && !(spg->flag & SPG_FLAG_NON_DVPP)) { + if (!can_mappings_merge(local->dvpp, spg->dvpp)) { + pr_info_ratelimited("address space conflict, id=%d\n", spg->id); + return -EINVAL; + } + if (is_mapping_empty(local->dvpp)) sp_mapping_merge(spg->dvpp, local->dvpp); else if (is_mapping_empty(spg->dvpp)) @@ -3825,16 +3841,50 @@ EXPORT_SYMBOL_GPL(sp_unregister_notifier); */ bool sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) { - if (pid < 0 || - size <= 0 || size > MMAP_SHARE_POOL_16G_SIZE || - device_id < 0 || device_id >= sp_device_number || - !is_online_node_id(device_id) || - is_sp_dev_addr_enabled(device_id)) + int ret; + bool err = false; + struct task_struct *tsk; + struct mm_struct *mm; + struct sp_group *spg; + struct sp_mapping *spm; + unsigned long default_start; + + /* NOTE: check the start address */ + if (pid < 0 || size <= 0 || size > MMAP_SHARE_POOL_16G_SIZE || + device_id < 0 || device_id >= sp_device_number || !is_online_node_id(device_id)) return false;
- sp_dev_va_start[device_id] = start; - sp_dev_va_size[device_id] = size; - return true; + ret = get_task(pid, &tsk); + if (ret) + return false; + + mm = get_task_mm(tsk->group_leader); + if (!mm) + goto put_task; + + spg = sp_get_local_group(mm); + if (IS_ERR(spg)) + goto put_mm; + + spm = spg->dvpp; + default_start = MMAP_SHARE_POOL_16G_START + device_id * MMAP_SHARE_POOL_16G_SIZE; + /* The dvpp range of each group can be configured only once */ + if (spm->start[device_id] != default_start) + goto put_spg; + + spm->start[device_id] = start; + spm->end[device_id] = start + size; + + err = true; + +put_spg: + sp_group_drop(spg); +put_mm: + mmput(mm); +put_task: + put_task_struct(tsk); + + return err; } EXPORT_SYMBOL_GPL(sp_config_dvpp_range);
From: Wang Wensheng wangwensheng4@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S CVE: NA
-------------------------------------------------
The user doesn't care about the start address of the dvpp range, what is mattered is that the virtual space tagged DVPP located at in a 16G range. So we can safely drop the dvpp address space as long as it's empty during merging process.
Signed-off-by: Wang Wensheng wangwensheng4@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- mm/share_pool.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 1c10b136f20f..b14152acbee8 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -246,16 +246,23 @@ static int sp_mapping_group_setup(struct mm_struct *mm, struct sp_group *spg) struct sp_group *local = master->local;
if (!list_empty(&spg->procs) && !(spg->flag & SPG_FLAG_NON_DVPP)) { - if (!can_mappings_merge(local->dvpp, spg->dvpp)) { - pr_info_ratelimited("address space conflict, id=%d\n", spg->id); - return -EINVAL; - } + /* + * Don't return an error when the mappings' address range conflict. + * As long as the mapping is unused, we can drop the empty mapping. + * This may change the address range for the task or group implicitly, + * give a warn for it. + */ + bool is_conflict = !can_mappings_merge(local->dvpp, spg->dvpp);
- if (is_mapping_empty(local->dvpp)) + if (is_mapping_empty(local->dvpp)) { sp_mapping_merge(spg->dvpp, local->dvpp); - else if (is_mapping_empty(spg->dvpp)) + if (is_conflict) + pr_warn_ratelimited("task address space conflict, spg_id=%d\n", spg->id); + } else if (is_mapping_empty(spg->dvpp)) { sp_mapping_merge(local->dvpp, spg->dvpp); - else { + if (is_conflict) + pr_warn_ratelimited("group address space conflict, spg_id=%d\n", spg->id); + } else { pr_info_ratelimited("Duplicate address space, id=%d\n", spg->id); return -EINVAL; }
From: Wang Wensheng wangwensheng4@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S CVE: NA
-------------------------------------------------
sp_get_local_group() could be invoked in kthread, where the current process isn't the process we want. Add a parameter and let the caller to avoid this problem.
Signed-off-by: Wang Wensheng wangwensheng4@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- mm/share_pool.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index b14152acbee8..a39bece9af96 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -370,7 +370,7 @@ static inline bool is_local_group(int spg_id) return spg_id >= SPG_ID_LOCAL_MIN && spg_id <= SPG_ID_LOCAL_MAX; }
-static struct sp_group *sp_get_local_group(struct mm_struct *mm) +static struct sp_group *sp_get_local_group(struct task_struct *tsk, struct mm_struct *mm) { int ret; struct sp_group_master *master; @@ -385,7 +385,7 @@ static struct sp_group *sp_get_local_group(struct mm_struct *mm) up_read(&sp_group_sem);
down_write(&sp_group_sem); - ret = sp_init_group_master_locked(current, mm); + ret = sp_init_group_master_locked(tsk, mm); if (ret) { up_write(&sp_group_sem); return ERR_PTR(ret); @@ -2499,7 +2499,7 @@ static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags, } ac->type = SPA_TYPE_ALLOC; } else { /* allocation pass through scene */ - spg = sp_get_local_group(current->mm); + spg = sp_get_local_group(current, current->mm); if (IS_ERR(spg)) return PTR_ERR(spg); down_read(&spg->rw_lock); @@ -3869,7 +3869,7 @@ bool sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) if (!mm) goto put_task;
- spg = sp_get_local_group(mm); + spg = sp_get_local_group(tsk, mm); if (IS_ERR(spg)) goto put_mm;
From: Wang Wensheng wangwensheng4@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S CVE: NA
--------------------------------------------------
We should forbid the usage of sharepool interfaces if sharepool is not enabled. Or undefined behaviour would panic the kernel.
Signed-off-by: Wang Wensheng wangwensheng4@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- mm/share_pool.c | 45 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index a39bece9af96..750524f1afc2 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -1015,6 +1015,9 @@ int sp_group_id_by_pid(int pid) struct sp_group *spg; int spg_id = -ENODEV;
+ if (!sp_is_enabled()) + return -EOPNOTSUPP; + check_interrupt_context();
spg = __sp_find_spg(pid, SPG_ID_DEFAULT); @@ -1050,6 +1053,9 @@ int mg_sp_group_id_by_pid(int pid, int *spg_ids, int *num) struct sp_group_master *master = NULL; struct task_struct *tsk;
+ if (!sp_is_enabled()) + return -EOPNOTSUPP; + check_interrupt_context();
if (!spg_ids || num <= 0) @@ -1382,6 +1388,9 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) bool id_newly_generated = false; struct sp_area *spa, *prev = NULL;
+ if (!sp_is_enabled()) + return -EOPNOTSUPP; + check_interrupt_context();
/* only allow READ, READ | WRITE */ @@ -1658,6 +1667,9 @@ int mg_sp_group_del_task(int pid, int spg_id) struct mm_struct *mm = NULL; bool is_alive = true;
+ if (!sp_is_enabled()) + return -EOPNOTSUPP; + if (spg_id < SPG_ID_MIN || spg_id > SPG_ID_AUTO) { pr_err_ratelimited("del from group failed, invalid group id %d\n", spg_id); return -EINVAL; @@ -1749,6 +1761,9 @@ int sp_id_of_current(void) int ret, spg_id; struct sp_group_master *master;
+ if (!sp_is_enabled()) + return -EOPNOTSUPP; + if (current->flags & PF_KTHREAD || !current->mm) return -EINVAL;
@@ -2324,6 +2339,9 @@ int sp_free(unsigned long addr, int id) .spg_id = id, };
+ if (!sp_is_enabled()) + return -EOPNOTSUPP; + check_interrupt_context();
if (current->flags & PF_KTHREAD) @@ -2761,6 +2779,9 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) int ret = 0; struct sp_alloc_context ac;
+ if (!sp_is_enabled()) + return ERR_PTR(-EOPNOTSUPP); + ret = sp_alloc_prepare(size, sp_flags, spg_id, &ac); if (ret) return ERR_PTR(ret); @@ -3142,6 +3163,9 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size, int ret; struct sp_k2u_context kc;
+ if (!sp_is_enabled()) + return ERR_PTR(-EOPNOTSUPP); + check_interrupt_context();
ret = sp_k2u_prepare(kva, size, sp_flags, spg_id, &kc); @@ -3429,6 +3453,9 @@ void *sp_make_share_u2k(unsigned long uva, unsigned long size, int pid) struct sp_walk_data sp_walk_data; struct vm_struct *area;
+ if (!sp_is_enabled()) + return ERR_PTR(-EOPNOTSUPP); + check_interrupt_context();
if (mm == NULL) { @@ -3717,6 +3744,9 @@ int sp_unshare(unsigned long va, unsigned long size, int pid, int spg_id) { int ret = 0;
+ if (!sp_is_enabled()) + return -EOPNOTSUPP; + check_interrupt_context();
if (current->flags & PF_KTHREAD) @@ -3762,6 +3792,9 @@ int sp_walk_page_range(unsigned long uva, unsigned long size, struct mm_struct *mm; int ret = 0;
+ if (!sp_is_enabled()) + return -EOPNOTSUPP; + check_interrupt_context();
if (unlikely(!sp_walk_data)) { @@ -3807,6 +3840,9 @@ EXPORT_SYMBOL_GPL(mg_sp_walk_page_range); */ void sp_walk_page_free(struct sp_walk_data *sp_walk_data) { + if (!sp_is_enabled()) + return; + check_interrupt_context();
if (!sp_walk_data) @@ -3856,6 +3892,9 @@ bool sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) struct sp_mapping *spm; unsigned long default_start;
+ if (!sp_is_enabled()) + return false; + /* NOTE: check the start address */ if (pid < 0 || size <= 0 || size > MMAP_SHARE_POOL_16G_SIZE || device_id < 0 || device_id >= sp_device_number || !is_online_node_id(device_id)) @@ -3916,7 +3955,8 @@ static bool is_sp_normal_addr(unsigned long addr) */ bool is_sharepool_addr(unsigned long addr) { - return is_sp_normal_addr(addr) || is_device_addr(addr); + return sp_is_enabled() && + (is_sp_normal_addr(addr) || is_device_addr(addr)); } EXPORT_SYMBOL_GPL(is_sharepool_addr);
@@ -4113,6 +4153,9 @@ int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, unsigned long anon, file, shmem, total_rss, prot; long sp_res, sp_res_nsize, non_sp_res, non_sp_shm;
+ if (!sp_is_enabled()) + return 0; + if (!mm) return 0;