From: Zhou Guanghui zhouguanghui1@huawei.com
Offering: HULK ascend inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S
--------------------------------
When the driver uses the shared pool memory to share the memory with the user space, the user space is not allowed to operate this area. This prevents users from damaging sensitive data.
When the sp_alloc and k2u processes apply for private memory, read-only memory can be applied for.
Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com --- include/linux/share_pool.h | 3 ++- mm/share_pool.c | 18 ++++++++++++++++-- 2 files changed, 18 insertions(+), 3 deletions(-)
diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index 6f294911c6af..5539a17da7a9 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -15,6 +15,7 @@ #define SP_HUGEPAGE_ONLY (1 << 1) #define SP_DVPP (1 << 2) #define SP_SPEC_NODE_ID (1 << 3) +#define SP_PROT_RO (1 << 16)
#define DEVICE_ID_BITS 4UL #define DEVICE_ID_MASK ((1UL << DEVICE_ID_BITS) - 1UL) @@ -24,7 +25,7 @@ #define NODE_ID_SHIFT (DEVICE_ID_SHIFT + DEVICE_ID_BITS)
#define SP_FLAG_MASK (SP_HUGEPAGE | SP_HUGEPAGE_ONLY | SP_DVPP | \ - SP_SPEC_NODE_ID | \ + SP_SPEC_NODE_ID | SP_PROT_RO | \ (DEVICE_ID_MASK << DEVICE_ID_SHIFT) | \ (NODE_ID_MASK << NODE_ID_SHIFT))
diff --git a/mm/share_pool.c b/mm/share_pool.c index 76088952d0a5..99f25a551afc 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -2331,6 +2331,9 @@ static int sp_alloc_mmap(struct mm_struct *mm, struct sp_area *spa, if (spg_node) prot = spg_node->prot;
+ if (ac->sp_flags & SP_PROT_RO) + prot = PROT_READ; + /* when success, mmap_addr == spa->va_start */ mmap_addr = sp_mmap(mm, spa_file(spa), spa, &populate, prot); if (IS_ERR_VALUE(mmap_addr)) { @@ -2355,6 +2358,10 @@ static int sp_alloc_mmap(struct mm_struct *mm, struct sp_area *spa, ret = -EINVAL; goto unmap; } + + if (ac->sp_flags & SP_PROT_RO) + vma->vm_flags &= ~VM_MAYWRITE; + /* clean PTE_RDONLY flags or trigger SMMU event */ if (prot & PROT_WRITE) vma->vm_page_prot = __pgprot(((~PTE_RDONLY) & vma->vm_page_prot.pgprot) | PTE_DIRTY); @@ -2650,6 +2657,9 @@ static unsigned long sp_remap_kva_to_vma(unsigned long kva, struct sp_area *spa, goto put_mm; }
+ if (kc && kc->sp_flags & SP_PROT_RO) + prot = PROT_READ; + ret_addr = sp_mmap(mm, spa_file(spa), spa, &populate, prot); if (IS_ERR_VALUE(ret_addr)) { pr_debug("k2u mmap failed %lx\n", ret_addr); @@ -2662,6 +2672,9 @@ static unsigned long sp_remap_kva_to_vma(unsigned long kva, struct sp_area *spa, if (prot & PROT_WRITE) vma->vm_page_prot = __pgprot(((~PTE_RDONLY) & vma->vm_page_prot.pgprot) | PTE_DIRTY);
+ if (kc && kc->sp_flags & SP_PROT_RO) + vma->vm_flags &= ~VM_MAYWRITE; + if (is_vm_hugetlb_page(vma)) { ret = remap_vmalloc_hugepage_range(vma, (void *)kva, 0); if (ret) { @@ -2713,6 +2726,7 @@ static void *sp_make_share_kva_to_task(unsigned long kva, unsigned long size, un struct sp_area *spa; struct spg_proc_stat *stat; unsigned long prot = PROT_READ | PROT_WRITE; + struct sp_k2u_context kc;
down_write(&sp_group_sem); stat = sp_init_process_stat(current, current->mm, spg_none); @@ -2731,8 +2745,8 @@ static void *sp_make_share_kva_to_task(unsigned long kva, unsigned long size, un }
spa->kva = kva; - - uva = (void *)sp_remap_kva_to_vma(kva, spa, current->mm, prot, NULL); + kc.sp_flags = sp_flags; + uva = (void *)sp_remap_kva_to_vma(kva, spa, current->mm, prot, &kc); __sp_area_drop(spa); if (IS_ERR(uva)) pr_err("remap k2u to task failed %ld\n", PTR_ERR(uva));
From: Wang Wensheng wangwensheng4@huawei.com
Offering: HULK ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S
-------------------------------------------------
Add the missing initialization for kc.sp_flag in sp_make_share_kva_to_spg(). Or a random value would be used in sp_remap_kva_to_vma().
Signed-off-by: Wang Wensheng wangwensheng4@huawei.com --- mm/share_pool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 99f25a551afc..db6ab098d403 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -2787,7 +2787,7 @@ static void *sp_make_share_kva_to_spg(unsigned long kva, unsigned long size, }
spa->kva = kva; - + kc.sp_flags = sp_flags; list_for_each_entry(spg_node, &spg->procs, proc_node) { mm = spg_node->master->mm; kc.state = K2U_NORMAL;
From: Guo Mengqi guomengqi3@huawei.com
Offering: HULK ascend inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S
-------------------------------------------------
Fix following situation:
when the last process in a group exits, and a second process tries to add to this group.
The second process may get a invalid spg. However the group's use_count is increased by 1, which caused the first process failed to free the group when it exits. And then second process called sp_group_drop --> free_sp_group and cause a double request of rwsem.
Signed-off-by: Guo Mengqi guomengqi3@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/share_pool.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index db6ab098d403..35edab122509 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -698,20 +698,25 @@ static void free_new_spg_id(bool new, int spg_id) free_sp_group_id(spg_id); }
-static void free_sp_group(struct sp_group *spg) +static void free_sp_group_locked(struct sp_group *spg) { fput(spg->file); fput(spg->file_hugetlb); free_spg_stat(spg->id); - down_write(&sp_group_sem); idr_remove(&sp_group_idr, spg->id); - up_write(&sp_group_sem); free_sp_group_id((unsigned int)spg->id); kfree(spg); system_group_count--; WARN(system_group_count < 0, "unexpected group count\n"); }
+static void free_sp_group(struct sp_group *spg) +{ + down_write(&sp_group_sem); + free_sp_group_locked(spg); + up_write(&sp_group_sem); +} + static void sp_group_drop(struct sp_group *spg) { if (atomic_dec_and_test(&spg->use_count)) @@ -4453,14 +4458,15 @@ void sp_group_post_exit(struct mm_struct *mm) sp_proc_stat_drop(stat); }
- /* lockless traverse */ + down_write(&sp_group_sem); list_for_each_entry_safe(spg_node, tmp, &master->node_list, group_node) { spg = spg_node->spg; /* match with refcount inc in sp_group_add_task */ - sp_group_drop(spg); + if (atomic_dec_and_test(&spg->use_count)) + free_sp_group_locked(spg); kfree(spg_node); } - + up_write(&sp_group_sem); kfree(master); }
From: Wang Wensheng wangwensheng4@huawei.com
Offering: HULK ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S
--------------------------------------------------
This is not used for THP but the user page table is just like THP. The user alloc hugepages via a special driver and its vma is not marked with VM_HUGETLB. This commit allow to share those vma to kernel.
Signed-off-by: Wang Wensheng wangwensheng4@huawei.com --- include/linux/share_pool.h | 1 + mm/share_pool.c | 44 +++++++++++++++++++++++++++++++++----- 2 files changed, 40 insertions(+), 5 deletions(-)
diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index 5539a17da7a9..022e61bb6ce4 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -179,6 +179,7 @@ struct sp_walk_data { unsigned long uva_aligned; unsigned long page_size; bool is_hugepage; + bool is_page_type_set; pmd_t *pmd; };
diff --git a/mm/share_pool.c b/mm/share_pool.c index 35edab122509..115200a1ee0d 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -3013,9 +3013,40 @@ EXPORT_SYMBOL_GPL(mg_sp_make_share_k2u); static int sp_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long next, struct mm_walk *walk) { + struct page *page; struct sp_walk_data *sp_walk_data = walk->private;
+ /* + * There exist a scene in DVPP where the pagetable is huge page but its + * vma doesn't record it, something like THP. + * So we cannot make out whether it is a hugepage map until we access the + * pmd here. If mixed size of pages appear, just return an error. + */ + if (pmd_huge(*pmd)) { + if (!sp_walk_data->is_page_type_set) { + sp_walk_data->is_page_type_set = true; + sp_walk_data->is_hugepage = true; + } else if (!sp_walk_data->is_hugepage) + return -EFAULT; + + /* To skip pte level walk */ + walk->action = ACTION_CONTINUE; + + page = pmd_page(*pmd); + get_page(page); + sp_walk_data->pages[sp_walk_data->page_count++] = page; + + return 0; + } + + if (!sp_walk_data->is_page_type_set) { + sp_walk_data->is_page_type_set = true; + sp_walk_data->is_hugepage = false; + } else if (sp_walk_data->is_hugepage) + return -EFAULT; + sp_walk_data->pmd = pmd; + return 0; }
@@ -3159,6 +3190,8 @@ static int __sp_walk_page_range(unsigned long uva, unsigned long size, sp_walk.pmd_entry = sp_pmd_entry; }
+ sp_walk_data->is_page_type_set = false; + sp_walk_data->page_count = 0; sp_walk_data->page_size = page_size; uva_aligned = ALIGN_DOWN(uva, page_size); sp_walk_data->uva_aligned = uva_aligned; @@ -3183,8 +3216,12 @@ static int __sp_walk_page_range(unsigned long uva, unsigned long size,
ret = walk_page_range(mm, uva_aligned, uva_aligned + size_aligned, &sp_walk, sp_walk_data); - if (ret) + if (ret) { + while (sp_walk_data->page_count--) + put_page(pages[sp_walk_data->page_count]); kvfree(pages); + sp_walk_data->pages = NULL; + }
return ret; } @@ -3220,9 +3257,7 @@ void *sp_make_share_u2k(unsigned long uva, unsigned long size, int pid) int ret = 0; struct mm_struct *mm = current->mm; void *p = ERR_PTR(-ESRCH); - struct sp_walk_data sp_walk_data = { - .page_count = 0, - }; + struct sp_walk_data sp_walk_data; struct vm_struct *area;
check_interrupt_context(); @@ -3563,7 +3598,6 @@ int sp_walk_page_range(unsigned long uva, unsigned long size, return -ESRCH; }
- sp_walk_data->page_count = 0; down_write(&mm->mmap_lock); if (likely(!mm->core_state)) ret = __sp_walk_page_range(uva, size, mm, sp_walk_data);
From: Zhou Guanghui zhouguanghui1@huawei.com
Offering: HULK ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S
-------------------------------------------------
The single-group mode has no application scenario. Therefore, the related branch is deleted.
The boot option "enable_sp_multi_group_mode" does not take effect.
Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com --- mm/share_pool.c | 137 +++++++++--------------------------------------- 1 file changed, 25 insertions(+), 112 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 115200a1ee0d..06c699dd6d3d 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -62,9 +62,6 @@ #define byte2mb(size) ((size) >> 20) #define page2kb(page_num) ((page_num) << (PAGE_SHIFT - 10))
-#define SINGLE_GROUP_MODE 1 -#define MULTI_GROUP_MODE 2 - #define MAX_GROUP_FOR_SYSTEM 50000 #define MAX_GROUP_FOR_TASK 3000 #define MAX_PROC_PER_GROUP 1024 @@ -93,8 +90,6 @@ int sysctl_share_pool_map_lock_enable; int sysctl_sp_perf_k2u; int sysctl_sp_perf_alloc;
-static int share_pool_group_mode = SINGLE_GROUP_MODE; - static int system_group_count;
static unsigned int sp_device_number; @@ -1079,12 +1074,6 @@ static int mm_add_group_init(struct mm_struct *mm, struct sp_group *spg) struct sp_group_master *master = mm->sp_group_master; bool exist = false;
- if (share_pool_group_mode == SINGLE_GROUP_MODE && master && - master->count == 1) { - pr_err_ratelimited("at most one sp group for a task is allowed in single mode\n"); - return -EEXIST; - } - master = sp_init_group_master_locked(mm, &exist); if (IS_ERR(master)) return PTR_ERR(master); @@ -2222,72 +2211,30 @@ static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags, if (sp_flags & SP_HUGEPAGE_ONLY) sp_flags |= SP_HUGEPAGE;
- if (share_pool_group_mode == SINGLE_GROUP_MODE) { - spg = __sp_find_spg(current->pid, SPG_ID_DEFAULT); - if (spg) { - if (spg_id != SPG_ID_DEFAULT && spg->id != spg_id) { - sp_group_drop(spg); - return -ENODEV; - } - - /* up_read will be at the end of sp_alloc */ - down_read(&spg->rw_lock); - if (!spg_valid(spg)) { - up_read(&spg->rw_lock); - sp_group_drop(spg); - pr_err_ratelimited("allocation failed, spg is dead\n"); - return -ENODEV; - } - } else { /* alocation pass through scene */ - if (enable_mdc_default_group) { - int ret = 0; - - ret = sp_group_add_task(current->tgid, spg_id); - if (ret < 0) { - pr_err_ratelimited("add group failed in pass through\n"); - return ret; - } - - spg = __sp_find_spg(current->pid, SPG_ID_DEFAULT); - - /* up_read will be at the end of sp_alloc */ - down_read(&spg->rw_lock); - if (!spg_valid(spg)) { - up_read(&spg->rw_lock); - sp_group_drop(spg); - pr_err_ratelimited("pass through allocation failed, spg is dead\n"); - return -ENODEV; - } - } else { - spg = spg_none; - } + if (spg_id != SPG_ID_DEFAULT) { + spg = __sp_find_spg(current->pid, spg_id); + if (!spg) { + pr_err_ratelimited("allocation failed, can't find group\n"); + return -ENODEV; } - } else { - if (spg_id != SPG_ID_DEFAULT) { - spg = __sp_find_spg(current->pid, spg_id); - if (!spg) { - pr_err_ratelimited("allocation failed, can't find group\n"); - return -ENODEV; - }
- /* up_read will be at the end of sp_alloc */ - down_read(&spg->rw_lock); - if (!spg_valid(spg)) { - up_read(&spg->rw_lock); - sp_group_drop(spg); - pr_err_ratelimited("allocation failed, spg is dead\n"); - return -ENODEV; - } + /* up_read will be at the end of sp_alloc */ + down_read(&spg->rw_lock); + if (!spg_valid(spg)) { + up_read(&spg->rw_lock); + sp_group_drop(spg); + pr_err_ratelimited("allocation failed, spg is dead\n"); + return -ENODEV; + }
- if (!is_process_in_group(spg, current->mm)) { - up_read(&spg->rw_lock); - sp_group_drop(spg); - pr_err_ratelimited("allocation failed, task not in group\n"); - return -ENODEV; - } - } else { /* alocation pass through scene */ - spg = spg_none; + if (!is_process_in_group(spg, current->mm)) { + up_read(&spg->rw_lock); + sp_group_drop(spg); + pr_err_ratelimited("allocation failed, task not in group\n"); + return -ENODEV; } + } else { /* alocation pass through scene */ + spg = spg_none; }
if (sp_flags & SP_HUGEPAGE) { @@ -2902,33 +2849,12 @@ static int sp_k2u_prepare(unsigned long kva, unsigned long size, kc->size_aligned = size_aligned; kc->sp_flags = sp_flags; kc->spg_id = spg_id; - kc->to_task = false; - return 0; -} - -static int sp_check_k2task(struct sp_k2u_context *kc) -{ - int ret = 0; - int spg_id = kc->spg_id; - - if (share_pool_group_mode == SINGLE_GROUP_MODE) { - struct sp_group *spg = get_first_group(current->mm); + if (spg_id == SPG_ID_DEFAULT || spg_id == SPG_ID_NONE) + kc->to_task = true; + else + kc->to_task = false;
- if (!spg) { - if (spg_id != SPG_ID_NONE && spg_id != SPG_ID_DEFAULT) - ret = -EINVAL; - else - kc->to_task = true; - } else { - if (spg_id != SPG_ID_DEFAULT && spg_id != spg->id) - ret = -EINVAL; - sp_group_drop(spg); - } - } else { - if (spg_id == SPG_ID_DEFAULT || spg_id == SPG_ID_NONE) - kc->to_task = true; - } - return ret; + return 0; }
static void *sp_k2u_finish(void *uva, struct sp_k2u_context *kc) @@ -2973,12 +2899,6 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size, if (ret) return ERR_PTR(ret);
- ret = sp_check_k2task(&kc); - if (ret) { - uva = ERR_PTR(ret); - goto out; - } - if (kc.to_task) uva = sp_make_share_kva_to_task(kc.kva_aligned, kc.size_aligned, kc.sp_flags); else { @@ -3745,13 +3665,6 @@ static int __init enable_share_k2u_to_group(char *s) } __setup("enable_sp_share_k2u_spg", enable_share_k2u_to_group);
-static int __init enable_sp_multi_group_mode(char *s) -{ - share_pool_group_mode = MULTI_GROUP_MODE; - return 1; -} -__setup("enable_sp_multi_group_mode", enable_sp_multi_group_mode); - /*** Statistical and maintenance functions ***/
static void free_process_spg_proc_stat(struct sp_proc_stat *proc_stat)
From: Zhou Guanghui zhouguanghui1@huawei.com
Offering: HULK ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S
-------------------------------------------------
struct sp_mapping is used to manage the address space of a shared pool. During the initialization of the shared pool, normal address spaces are created to allocate the memory of the current shared pool.
Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com --- include/linux/share_pool.h | 18 +++++++++++++ mm/share_pool.c | 52 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+)
diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index 022e61bb6ce4..654dc8cc2922 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -101,6 +101,17 @@ struct sp_proc_stat { atomic64_t k2u_size; };
+/* + * address space management + */ +struct sp_mapping { + unsigned long flag; + atomic_t user; + unsigned long start[MAX_DEVID]; + unsigned long end[MAX_DEVID]; + struct rb_root area_root; +}; + /* Processes in the same sp_group can share memory. * Memory layout for share pool: * @@ -142,6 +153,8 @@ struct sp_group { atomic_t use_count; /* protect the group internal elements, except spa_list */ struct rw_semaphore rw_lock; + struct sp_mapping *dvpp; + struct sp_mapping *normal; };
/* a per-process(per mm) struct which manages a sp_group_node list */ @@ -155,6 +168,11 @@ struct sp_group_master { struct list_head node_list; struct mm_struct *mm; struct sp_proc_stat *stat; + /* + * Used to apply for the shared pool memory of the current process. + * For example, sp_alloc non-share memory or k2task. + */ + struct sp_group *local; };
/* diff --git a/mm/share_pool.c b/mm/share_pool.c index 06c699dd6d3d..2589dab17096 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -125,6 +125,48 @@ static DECLARE_RWSEM(sp_spg_stat_sem); /* for kthread buff_module_guard_work */ static struct sp_proc_stat kthread_stat;
+#define SP_MAPPING_DVPP 0x1 +#define SP_MAPPING_NORMAL 0x2 +static struct sp_mapping *sp_mapping_normal; + +static void sp_mapping_range_init(struct sp_mapping *spm) +{ + int i; + + for (i = 0; i < MAX_DEVID; i++) { + if (spm->flag & SP_MAPPING_NORMAL) { + spm->start[i] = MMAP_SHARE_POOL_START; + spm->end[i] = MMAP_SHARE_POOL_16G_START; + continue; + } + + if (!is_sp_dev_addr_enabled(i)) { + spm->start[i] = MMAP_SHARE_POOL_16G_START + + i * MMAP_SHARE_POOL_16G_START; + spm->end[i] = spm->start[i] + MMAP_SHARE_POOL_16G_START; + } else { + spm->start[i] = sp_dev_va_start[i]; + spm->end[i] = spm->start[i] + sp_dev_va_size[i]; + } + } +} + +static struct sp_mapping *sp_mapping_create(unsigned long flag) +{ + struct sp_mapping *spm; + + spm = kzalloc(sizeof(struct sp_mapping), GFP_KERNEL); + if (!spm) + return ERR_PTR(-ENOMEM); + + spm->flag = flag; + sp_mapping_range_init(spm); + atomic_set(&spm->user, 0); + spm->area_root = RB_ROOT; + + return spm; +} + /* The caller must hold sp_group_sem */ static struct sp_group_master *sp_init_group_master_locked( struct mm_struct *mm, bool *exist) @@ -4442,12 +4484,22 @@ static void __init sp_device_number_detect(void)
static int __init share_pool_init(void) { + if (!sp_is_enabled()) + return 0; + /* lockless, as init kthread has no sp operation else */ spg_none = create_spg(GROUP_NONE); /* without free spg_none, not a serious problem */ if (IS_ERR(spg_none) || !spg_none) goto fail;
+ sp_mapping_normal = sp_mapping_create(SP_MAPPING_NORMAL); + if (IS_ERR(sp_mapping_normal)) { + sp_group_drop(spg_none); + goto fail; + } + atomic_inc(&sp_mapping_normal->user); + sp_device_number_detect(); proc_sharepool_init();
From: Zhou Guanghui zhouguanghui1@huawei.com
Offering: HULK ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S
-------------------------------------------------
Separately manage the normal and dvpp address spaces of the sp_group and set the normal and dvpp address spaces of the corresponding groups when adding a group, sp_alloc, and k2task.
Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com --- include/linux/share_pool.h | 6 + mm/share_pool.c | 299 +++++++++++++++++++++++++++++-------- 2 files changed, 239 insertions(+), 66 deletions(-)
diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index 654dc8cc2922..92cc1ffa3946 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -39,6 +39,8 @@ #define SPG_ID_AUTO_MIN 100000 #define SPG_ID_AUTO_MAX 199999 #define SPG_ID_AUTO 200000 /* generate group id automatically */ +#define SPG_ID_LOCAL_MIN 200001 +#define SPG_ID_LOCAL_MAX 299999
#define MAX_DEVID 8 /* the max num of Da-vinci devices */
@@ -110,6 +112,10 @@ struct sp_mapping { unsigned long start[MAX_DEVID]; unsigned long end[MAX_DEVID]; struct rb_root area_root; + + struct rb_node *free_area_cache; + unsigned long cached_hole_size; + unsigned long cached_vstart; };
/* Processes in the same sp_group can share memory. diff --git a/mm/share_pool.c b/mm/share_pool.c index 2589dab17096..bff066611ade 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -142,8 +142,8 @@ static void sp_mapping_range_init(struct sp_mapping *spm)
if (!is_sp_dev_addr_enabled(i)) { spm->start[i] = MMAP_SHARE_POOL_16G_START + - i * MMAP_SHARE_POOL_16G_START; - spm->end[i] = spm->start[i] + MMAP_SHARE_POOL_16G_START; + i * MMAP_SHARE_POOL_16G_SIZE; + spm->end[i] = spm->start[i] + MMAP_SHARE_POOL_16G_SIZE; } else { spm->start[i] = sp_dev_va_start[i]; spm->end[i] = spm->start[i] + sp_dev_va_size[i]; @@ -167,10 +167,91 @@ static struct sp_mapping *sp_mapping_create(unsigned long flag) return spm; }
+static void sp_mapping_destroy(struct sp_mapping *spm) +{ + kfree(spm); +} + +static void sp_mapping_attach(struct sp_group *spg, struct sp_mapping *spm) +{ + atomic_inc(&spm->user); + if (spm->flag & SP_MAPPING_DVPP) + spg->dvpp = spm; + else if (spm->flag & SP_MAPPING_NORMAL) + spg->normal = spm; +} + +static void sp_mapping_detach(struct sp_group *spg, struct sp_mapping *spm) +{ + if (spm && atomic_dec_and_test(&spm->user)) + sp_mapping_destroy(spm); +} + +/* + * When you set the address space of a group, the normal address space + * is globally unified. When processing the DVPP address space, consider + * the following situations: + * 1. If a process is added to a non-new group, the DVPP address space + * must have been created. If the local group of the process also + * contains the DVPP address space and they are different, this + * scenario is not allowed to avoid address conflict. + * 2. If the DVPP address space does not exist in the local group of the + * process, attach the local group of the process to the DVPP address + * space of the group. + * 3. Add a new group. If the process has applied for the dvpp address + * space (sp_alloc or k2u), attach the new group to the dvpp address + * space of the current process. + * 4. If the process has not applied for the DVPP address space, attach + * the new group and the local group of the current process to the + * newly created DVPP address space. + * + * the caller must hold sp_group_sem + */ +static int sp_mapping_group_setup(struct mm_struct *mm, struct sp_group *spg) +{ + struct sp_group_master *master = mm->sp_group_master; + struct sp_group *local = master->local; + struct sp_mapping *spm; + + if (!list_empty(&spg->procs)) { + /* 1 */ + if (local->dvpp && local->dvpp != spg->dvpp) { + pr_info_ratelimited("Duplicate address space, id=%d\n", + spg->id); + return 0; + } + + /* 2 */ + if (!local->dvpp) { + sp_mapping_attach(local, spg->dvpp); + sp_mapping_attach(local, spg->normal); + } + } else { + /* 4 */ + if (!local->dvpp) { + spm = sp_mapping_create(SP_MAPPING_DVPP); + if (IS_ERR(spm)) + return PTR_ERR(spm); + sp_mapping_attach(local, spm); + sp_mapping_attach(local, sp_mapping_normal); + } + + /* 3 */ + sp_mapping_attach(spg, local->dvpp); + sp_mapping_attach(spg, sp_mapping_normal); + } + + return 0; +} + +static struct sp_group *create_spg(int spg_id); +static void free_new_spg_id(bool new, int spg_id); /* The caller must hold sp_group_sem */ static struct sp_group_master *sp_init_group_master_locked( struct mm_struct *mm, bool *exist) { + int spg_id; + struct sp_group *spg; struct sp_group_master *master = mm->sp_group_master;
if (master) { @@ -182,16 +263,92 @@ static struct sp_group_master *sp_init_group_master_locked( if (master == NULL) return ERR_PTR(-ENOMEM);
+ spg_id = ida_alloc_range(&sp_group_id_ida, SPG_ID_LOCAL_MIN, + SPG_ID_LOCAL_MAX, GFP_ATOMIC); + if (spg_id < 0) { + kfree(master); + pr_err_ratelimited("generate local group id failed %d\n", spg_id); + return ERR_PTR(spg_id); + } + + spg = create_spg(spg_id); + if (IS_ERR(spg)) { + free_new_spg_id(true, spg_id); + kfree(master); + return (struct sp_group_master *)spg; + } + INIT_LIST_HEAD(&master->node_list); master->count = 0; master->stat = NULL; master->mm = mm; + master->local = spg; mm->sp_group_master = master;
*exist = false; return master; }
+static inline bool is_local_group(int spg_id) +{ + return spg_id >= SPG_ID_LOCAL_MIN && spg_id <= SPG_ID_LOCAL_MAX; +} + +/* + * If the process is added to a group first, the address space of the local + * group of the process must have been set. If the process is not added to + * a group, directly create or attach the process to the corresponding DVPP + * and normal address space. + */ +static int sp_mapping_group_setup_local(struct mm_struct *mm) +{ + struct sp_group_master *master; + struct sp_mapping *spm; + bool exist = false; + + master = sp_init_group_master_locked(mm, &exist); + if (IS_ERR(master)) + return PTR_ERR(master); + + if (master->local->dvpp) + return 0; + + spm = sp_mapping_create(SP_MAPPING_DVPP); + if (IS_ERR(spm)) + return PTR_ERR(spm); + sp_mapping_attach(master->local, spm); + sp_mapping_attach(master->local, sp_mapping_normal); + + return 0; +} + +static struct sp_group *sp_get_local_group(struct mm_struct *mm) +{ + int ret; + struct sp_group_master *master; + + down_read(&sp_group_sem); + master = mm->sp_group_master; + if (master && master->local) { + atomic_inc(&master->local->use_count); + up_read(&sp_group_sem); + return master->local; + } + up_read(&sp_group_sem); + + down_write(&sp_group_sem); + ret = sp_mapping_group_setup_local(mm); + if (ret) { + up_write(&sp_group_sem); + return ERR_PTR(ret); + } + master = mm->sp_group_master; + atomic_inc(&master->local->use_count); + up_write(&sp_group_sem); + + return master->local; +} + static struct sp_proc_stat *sp_get_proc_stat(struct mm_struct *mm) { struct sp_proc_stat *stat; @@ -575,7 +732,7 @@ static void spa_inc_usage(struct sp_area *spa) case SPA_TYPE_K2TASK: spa_stat.k2u_task_num += 1; spa_stat.k2u_task_size += size; - update_spg_stat_k2u(size, true, spg_none->stat); + update_spg_stat_k2u(size, true, spa->spg->stat); break; case SPA_TYPE_K2SPG: spa_stat.k2u_spg_num += 1; @@ -598,7 +755,7 @@ static void spa_inc_usage(struct sp_area *spa) spa_stat.total_num += 1; spa_stat.total_size += size;
- if (spa->spg != spg_none) { + if (!is_local_group(spa->spg->id)) { atomic_inc(&sp_overall_stat.spa_total_num); atomic64_add(size, &sp_overall_stat.spa_total_size); } @@ -621,7 +778,7 @@ static void spa_dec_usage(struct sp_area *spa) case SPA_TYPE_K2TASK: spa_stat.k2u_task_num -= 1; spa_stat.k2u_task_size -= size; - update_spg_stat_k2u(size, false, spg_none->stat); + update_spg_stat_k2u(size, false, spa->spg->stat); break; case SPA_TYPE_K2SPG: spa_stat.k2u_spg_num -= 1; @@ -640,7 +797,7 @@ static void spa_dec_usage(struct sp_area *spa) spa_stat.total_num -= 1; spa_stat.total_size -= size;
- if (spa->spg != spg_none) { + if (!is_local_group(spa->spg->id)) { atomic_dec(&sp_overall_stat.spa_total_num); atomic64_sub(spa->real_size, &sp_overall_stat.spa_total_size); } @@ -725,7 +882,8 @@ static unsigned long sp_remap_kva_to_vma(unsigned long kva, struct sp_area *spa, static void free_sp_group_id(int spg_id) { /* ida operation is protected by an internal spin_lock */ - if (spg_id >= SPG_ID_AUTO_MIN && spg_id <= SPG_ID_AUTO_MAX) + if ((spg_id >= SPG_ID_AUTO_MIN && spg_id <= SPG_ID_AUTO_MAX) || + (spg_id >= SPG_ID_LOCAL_MIN && spg_id <= SPG_ID_LOCAL_MAX)) ida_free(&sp_group_id_ida, spg_id); }
@@ -742,8 +900,11 @@ static void free_sp_group_locked(struct sp_group *spg) free_spg_stat(spg->id); idr_remove(&sp_group_idr, spg->id); free_sp_group_id((unsigned int)spg->id); + sp_mapping_detach(spg, spg->dvpp); + sp_mapping_detach(spg, spg->normal); + if (!is_local_group(spg->id)) + system_group_count--; kfree(spg); - system_group_count--; WARN(system_group_count < 0, "unexpected group count\n"); }
@@ -992,7 +1153,8 @@ static struct sp_group *create_spg(int spg_id) struct user_struct *user = NULL; int hsize_log = MAP_HUGE_2MB >> MAP_HUGE_SHIFT;
- if (unlikely(system_group_count + 1 == MAX_GROUP_FOR_SYSTEM)) { + if (unlikely(system_group_count + 1 == MAX_GROUP_FOR_SYSTEM && + !is_local_group(spg_id))) { pr_err_ratelimited("reach system max group num\n"); return ERR_PTR(-ENOSPC); } @@ -1039,7 +1201,8 @@ static struct sp_group *create_spg(int spg_id) if (ret < 0) goto out_fput_all;
- system_group_count++; + if (!is_local_group(spg_id)) + system_group_count++; return spg;
out_fput_all: @@ -1322,6 +1485,10 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) if (ret) goto out_drop_group;
+ ret = sp_mapping_group_setup(mm, spg); + if (ret) + goto out_drop_group; + node = create_spg_node(mm, prot, spg); if (unlikely(IS_ERR(node))) { ret = PTR_ERR(node); @@ -1603,7 +1770,6 @@ static void __insert_sp_area(struct sp_area *spa)
/* The sp_area cache globals are protected by sp_area_lock */ static struct rb_node *free_sp_area_cache; -static unsigned long cached_hole_size; static unsigned long cached_vstart; /* affected by SP_DVPP and sp_config_dvpp_range() */
/** @@ -1622,11 +1788,12 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, { struct sp_area *spa, *first, *err; struct rb_node *n; - unsigned long vstart = MMAP_SHARE_POOL_START; - unsigned long vend = MMAP_SHARE_POOL_16G_START; + unsigned long vstart; + unsigned long vend; unsigned long addr; unsigned long size_align = ALIGN(size, PMD_SIZE); /* va aligned to 2M */ int device_id, node_id; + struct sp_mapping *mapping;
device_id = sp_flags_device_id(flags); node_id = flags & SP_SPEC_NODE_ID ? sp_flags_node_id(flags) : device_id; @@ -1636,17 +1803,13 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, return ERR_PTR(-EINVAL); }
- if ((flags & SP_DVPP)) { - if (!is_sp_dev_addr_enabled(device_id)) { - vstart = MMAP_SHARE_POOL_16G_START + - device_id * MMAP_SHARE_POOL_16G_SIZE; - vend = vstart + MMAP_SHARE_POOL_16G_SIZE; - } else { - vstart = sp_dev_va_start[device_id]; - vend = vstart + sp_dev_va_size[device_id]; - } - } + if (flags & SP_DVPP) + mapping = spg->dvpp; + else + mapping = spg->normal;
+ vstart = mapping->start[device_id]; + vend = mapping->end[device_id]; spa = __kmalloc_node(sizeof(struct sp_area), GFP_KERNEL, node_id); if (unlikely(!spa)) return ERR_PTR(-ENOMEM); @@ -1662,18 +1825,18 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, * Note that sp_free_area may update free_sp_area_cache * without updating cached_hole_size. */ - if (!free_sp_area_cache || size_align < cached_hole_size || - vstart != cached_vstart) { - cached_hole_size = 0; - free_sp_area_cache = NULL; + if (!mapping->free_area_cache || size_align < mapping->cached_hole_size || + vstart != mapping->cached_vstart) { + mapping->cached_hole_size = 0; + mapping->free_area_cache = NULL; }
/* record if we encounter less permissive parameters */ - cached_vstart = vstart; + mapping->cached_vstart = vstart;
/* find starting point for our search */ - if (free_sp_area_cache) { - first = rb_entry(free_sp_area_cache, struct sp_area, rb_node); + if (mapping->free_area_cache) { + first = rb_entry(mapping->free_area_cache, struct sp_area, rb_node); addr = first->va_end; if (addr + size_align < addr) { err = ERR_PTR(-EOVERFLOW); @@ -1686,7 +1849,7 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, goto error; }
- n = sp_area_root.rb_node; + n = mapping->area_root.rb_node; first = NULL;
while (n) { @@ -1708,8 +1871,8 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags,
/* from the starting point, traverse areas until a suitable hole is found */ while (addr + size_align > first->va_start && addr + size_align <= vend) { - if (addr + cached_hole_size < first->va_start) - cached_hole_size = first->va_start - addr; + if (addr + mapping->cached_hole_size < first->va_start) + mapping->cached_hole_size = first->va_start - addr; addr = first->va_end; if (addr + size_align < addr) { err = ERR_PTR(-EOVERFLOW); @@ -1747,9 +1910,8 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags,
spa_inc_usage(spa); __insert_sp_area(spa); - free_sp_area_cache = &spa->rb_node; - if (spa->spg != spg_none) - list_add_tail(&spa->link, &spg->spa_list); + mapping->free_area_cache = &spa->rb_node; + list_add_tail(&spa->link, &spg->spa_list);
spin_unlock(&sp_area_lock);
@@ -1840,8 +2002,7 @@ static void sp_free_area(struct sp_area *spa) pr_debug("clear spa->kva %ld is not valid\n", spa->kva);
spa_dec_usage(spa); - if (spa->spg != spg_none) - list_del(&spa->link); + list_del(&spa->link);
rb_erase(&spa->rb_node, &sp_area_root); RB_CLEAR_NODE(&spa->rb_node); @@ -2001,7 +2162,7 @@ static void sp_fallocate(struct sp_area *spa)
static void sp_free_unmap_fallocate(struct sp_area *spa) { - if (spa->spg != spg_none) { + if (!is_local_group(spa->spg->id)) { down_read(&spa->spg->rw_lock); __sp_free(spa->spg, spa->va_start, spa_size(spa), NULL); sp_fallocate(spa); @@ -2206,7 +2367,6 @@ static void trace_sp_alloc_begin(struct sp_alloc_context *ac) static void trace_sp_alloc_finish(struct sp_alloc_context *ac, unsigned long va) { unsigned long cost; - bool is_pass_through = ac->spg == spg_none ? true : false;
if (!sysctl_sp_perf_alloc) return; @@ -2218,7 +2378,8 @@ static void trace_sp_alloc_finish(struct sp_alloc_context *ac, unsigned long va) if (cost >= (unsigned long)sysctl_sp_perf_alloc) { pr_err("Task %s(%d/%d) sp_alloc returns 0x%lx consumes %luus, size is %luKB, size_aligned is %luKB, sp_flags is %lx, pass through is %d\n", current->comm, current->tgid, current->pid, - va, cost, byte2kb(ac->size), byte2kb(ac->size_aligned), ac->sp_flags, is_pass_through); + va, cost, byte2kb(ac->size), byte2kb(ac->size_aligned), ac->sp_flags, + is_local_group(ac->spg->id)); } }
@@ -2276,7 +2437,9 @@ static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags, return -ENODEV; } } else { /* alocation pass through scene */ - spg = spg_none; + spg = sp_get_local_group(current->mm); + if (IS_ERR(spg)) + return PTR_ERR(spg); }
if (sp_flags & SP_HUGEPAGE) { @@ -2299,7 +2462,7 @@ static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags, static void sp_alloc_unmap(struct mm_struct *mm, struct sp_area *spa, struct sp_group_node *spg_node) { - if (spa->spg != spg_none) + if (!is_local_group(spa->spg->id)) __sp_free(spa->spg, spa->va_start, spa->real_size, mm); }
@@ -2364,7 +2527,7 @@ static int sp_alloc_mmap(struct mm_struct *mm, struct sp_area *spa, return ret;
unmap: - if (spa->spg != spg_none) + if (!is_local_group(spa->spg->id)) sp_alloc_unmap(list_next_entry(spg_node, proc_node)->master->mm, spa, spg_node); else sp_munmap(mm, spa->va_start, spa->real_size); @@ -2467,8 +2630,9 @@ static int __sp_alloc_mmap_populate(struct mm_struct *mm, struct sp_area *spa, ret = sp_alloc_populate(mm, spa, ac); if (ret) { err: - if (spa->spg != spg_none) - sp_alloc_unmap(list_next_entry(spg_node, proc_node)->master->mm, spa, spg_node); + if (!is_local_group(spa->spg->id)) + sp_alloc_unmap(list_next_entry(spg_node, proc_node)->master->mm, spa, + spg_node); else sp_munmap(mm, spa->va_start, spa->real_size);
@@ -2493,7 +2657,7 @@ static int sp_alloc_mmap_populate(struct sp_area *spa, struct mm_struct *mm; struct sp_group_node *spg_node;
- if (spa->spg == spg_none) { + if (is_local_group(spa->spg->id)) { ret = __sp_alloc_mmap_populate(current->mm, spa, NULL, ac); } else { /* create mapping for each process in the group */ @@ -2517,10 +2681,9 @@ static void sp_alloc_finish(int result, struct sp_area *spa, struct sp_alloc_context *ac) { struct sp_group *spg = ac->spg; - bool is_pass_through = spg == spg_none ? true : false;
- /* match sp_alloc_check_prepare */ - if (!is_pass_through) + /* match sp_alloc_prepare */ + if (!is_local_group(spg->id)) up_read(&spg->rw_lock);
if (!result) @@ -2532,9 +2695,7 @@ static void sp_alloc_finish(int result, struct sp_area *spa, trace_sp_alloc_finish(ac, spa->va_start); }
- if (!is_pass_through) - sp_group_drop(spg); - + sp_group_drop(spg); sp_dump_stack(); sp_try_to_compact(); } @@ -2716,22 +2877,33 @@ static unsigned long sp_remap_kva_to_vma(unsigned long kva, struct sp_area *spa, */ static void *sp_make_share_kva_to_task(unsigned long kva, unsigned long size, unsigned long sp_flags) { + int ret; void *uva; struct sp_area *spa; struct spg_proc_stat *stat; unsigned long prot = PROT_READ | PROT_WRITE; struct sp_k2u_context kc; + struct sp_group *spg;
down_write(&sp_group_sem); - stat = sp_init_process_stat(current, current->mm, spg_none); - up_write(&sp_group_sem); + ret = sp_mapping_group_setup_local(current->mm); + if (ret) { + up_write(&sp_group_sem); + pr_err_ratelimited("k2u_task init local mapping failed %d\n", ret); + return ERR_PTR(ret); + } + + spg = current->mm->sp_group_master->local; + stat = sp_init_process_stat(current, current->mm, spg); if (IS_ERR(stat)) { + up_write(&sp_group_sem); pr_err_ratelimited("k2u_task init process stat failed %lx\n", PTR_ERR(stat)); return stat; } + up_write(&sp_group_sem);
- spa = sp_alloc_area(size, sp_flags, spg_none, SPA_TYPE_K2TASK, current->tgid); + spa = sp_alloc_area(size, sp_flags, spg, SPA_TYPE_K2TASK, current->tgid); if (IS_ERR(spa)) { pr_err_ratelimited("alloc spa failed in k2u_task (potential no enough virtual memory when -75): %ld\n", PTR_ERR(spa)); @@ -3926,7 +4098,7 @@ static void rb_spa_stat_show(struct seq_file *seq) atomic_inc(&spa->use_count); spin_unlock(&sp_area_lock);
- if (spa->spg == spg_none) /* k2u to task */ + if (is_local_group(spa->spg->id)) /* k2u to task */ seq_printf(seq, "%-10s ", "None"); else { down_read(&spa->spg->rw_lock); @@ -4456,6 +4628,9 @@ void sp_group_post_exit(struct mm_struct *mm) kfree(spg_node); } up_write(&sp_group_sem); + + if (master->local) + sp_group_drop(master->local); kfree(master); }
@@ -4487,17 +4662,9 @@ static int __init share_pool_init(void) if (!sp_is_enabled()) return 0;
- /* lockless, as init kthread has no sp operation else */ - spg_none = create_spg(GROUP_NONE); - /* without free spg_none, not a serious problem */ - if (IS_ERR(spg_none) || !spg_none) - goto fail; - sp_mapping_normal = sp_mapping_create(SP_MAPPING_NORMAL); - if (IS_ERR(sp_mapping_normal)) { - sp_group_drop(spg_none); + if (IS_ERR(sp_mapping_normal)) goto fail; - } atomic_inc(&sp_mapping_normal->user);
sp_device_number_detect();
From: Zhou Guanghui zhouguanghui1@huawei.com
Offering: HULK ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S
-------------------------------------------------
The DVPP address space is per process or per sharing group. During sp_free and unshare, you need to know which address space the current address belongs to.
Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com --- include/linux/share_pool.h | 12 ++++++++++++ mm/share_pool.c | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+)
diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index 92cc1ffa3946..4e282b4122a3 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -283,6 +283,9 @@ extern bool mg_is_sharepool_addr(unsigned long addr); extern int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id); extern int sp_group_add_task(int pid, int spg_id);
+extern int sp_id_of_current(void); +extern int mg_sp_id_of_current(void); + extern void sp_area_drop(struct vm_area_struct *vma); extern int sp_group_exit(struct mm_struct *mm); extern void sp_group_post_exit(struct mm_struct *mm); @@ -430,6 +433,15 @@ static inline int mg_sp_unshare(unsigned long va, unsigned long size) return -EPERM; }
+static inline int sp_id_of_current(void) +{ + return -EPERM; +} + +static inline int mg_sp_id_of_current(void) +{ + return -EPERM; +}
static inline void sp_init_mm(struct mm_struct *mm) { diff --git a/mm/share_pool.c b/mm/share_pool.c index bff066611ade..403e86e29ecb 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -1745,6 +1745,43 @@ int sp_group_del_task(int pid, int spg_id) } EXPORT_SYMBOL_GPL(sp_group_del_task);
+int sp_id_of_current(void) +{ + int ret, spg_id; + struct sp_group_master *master; + + if (current->flags & PF_KTHREAD || !current->mm) + return -EINVAL; + + down_read(&sp_group_sem); + master = current->mm->sp_group_master; + if (master && master->local) { + spg_id = master->local->id; + up_read(&sp_group_sem); + return spg_id; + } + up_read(&sp_group_sem); + + down_write(&sp_group_sem); + ret = sp_mapping_group_setup_local(current->mm); + if (ret) { + up_write(&sp_group_sem); + return ret; + } + master = current->mm->sp_group_master; + spg_id = master->local->id; + up_write(&sp_group_sem); + + return spg_id; +} +EXPORT_SYMBOL_GPL(sp_id_of_current); + +int mg_sp_id_of_current(void) +{ + return sp_id_of_current(); +} +EXPORT_SYMBOL_GPL(mg_sp_id_of_current); + /* the caller must hold sp_area_lock */ static void __insert_sp_area(struct sp_area *spa) {
From: Zhou Guanghui zhouguanghui1@huawei.com
Offering: HULK ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S
-------------------------------------------------
The address space of the DVPP is managed by group. When releasing the shared pool memory, you need to find the corresponding address space based on the ID.
Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com --- include/linux/share_pool.h | 12 +- mm/share_pool.c | 235 ++++++++++++++++++------------------- 2 files changed, 122 insertions(+), 125 deletions(-)
diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index 4e282b4122a3..7b536b30907d 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -249,8 +249,8 @@ extern int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, extern void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id); extern void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id);
-extern int sp_free(unsigned long addr); -extern int mg_sp_free(unsigned long addr); +extern int sp_free(unsigned long addr, int id); +extern int mg_sp_free(unsigned long addr, int id);
extern void *sp_make_share_k2u(unsigned long kva, unsigned long size, unsigned long sp_flags, int pid, int spg_id); @@ -261,7 +261,7 @@ extern void *sp_make_share_u2k(unsigned long uva, unsigned long size, int pid); extern void *mg_sp_make_share_u2k(unsigned long uva, unsigned long size, int pid);
extern int sp_unshare(unsigned long va, unsigned long size, int pid, int spg_id); -extern int mg_sp_unshare(unsigned long va, unsigned long size); +extern int mg_sp_unshare(unsigned long va, unsigned long size, int id);
extern int sp_walk_page_range(unsigned long uva, unsigned long size, struct task_struct *tsk, struct sp_walk_data *sp_walk_data); @@ -391,12 +391,12 @@ static inline void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int return NULL; }
-static inline int sp_free(unsigned long addr) +static inline int sp_free(unsigned long addr, int id) { return -EPERM; }
-static inline int mg_sp_free(unsigned long addr) +static inline int mg_sp_free(unsigned long addr, int id) { return -EPERM; } @@ -428,7 +428,7 @@ static inline int sp_unshare(unsigned long va, unsigned long size, int pid, int return -EPERM; }
-static inline int mg_sp_unshare(unsigned long va, unsigned long size) +static inline int mg_sp_unshare(unsigned long va, unsigned long size, int id) { return -EPERM; } diff --git a/mm/share_pool.c b/mm/share_pool.c index 403e86e29ecb..57a004674970 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -637,12 +637,6 @@ static void free_spg_stat(int spg_id) kfree(stat); }
-/* - * Group '0' for k2u_task and pass through. No process will be actually - * added to. - */ -static struct sp_group *spg_none; - /* statistics of all sp area, protected by sp_area_lock */ struct sp_spa_stat { unsigned int total_num; @@ -939,26 +933,6 @@ static int get_task(int pid, struct task_struct **task) return 0; }
-static struct sp_group *get_first_group(struct mm_struct *mm) -{ - struct sp_group *spg = NULL; - struct sp_group_master *master = mm->sp_group_master; - - if (master && master->count >= 1) { - struct sp_group_node *spg_node = NULL; - - spg_node = list_first_entry(&master->node_list, - struct sp_group_node, group_node); - spg = spg_node->spg; - - /* don't revive a dead group */ - if (!spg || !atomic_inc_not_zero(&spg->use_count)) - spg = NULL; - } - - return spg; -} - /* * the caller must: * 1. hold spg->rw_lock @@ -983,35 +957,27 @@ static struct sp_group *__sp_find_spg_locked(int pid, int spg_id) struct task_struct *tsk = NULL; int ret = 0;
- ret = get_task(pid, &tsk); - if (ret) - return NULL; - if (spg_id == SPG_ID_DEFAULT) { - /* - * Once we encounter a concurrency problem here. - * To fix it, we believe get_task_mm() and mmput() is too - * heavy because we just get the pointer of sp_group. - */ + ret = get_task(pid, &tsk); + if (ret) + return NULL; + task_lock(tsk); if (tsk->mm == NULL) spg = NULL; - else - spg = get_first_group(tsk->mm); + else if (tsk->mm->sp_group_master) + spg = tsk->mm->sp_group_master->local; task_unlock(tsk); + + put_task_struct(tsk); } else { spg = idr_find(&sp_group_idr, spg_id); - /* don't revive a dead group */ - if (!spg || !atomic_inc_not_zero(&spg->use_count)) - goto fail; }
- put_task_struct(tsk); - return spg; + if (!spg || !atomic_inc_not_zero(&spg->use_count)) + return NULL;
-fail: - put_task_struct(tsk); - return NULL; + return spg; }
static struct sp_group *__sp_find_spg(int pid, int spg_id) @@ -1783,9 +1749,9 @@ int mg_sp_id_of_current(void) EXPORT_SYMBOL_GPL(mg_sp_id_of_current);
/* the caller must hold sp_area_lock */ -static void __insert_sp_area(struct sp_area *spa) +static void __insert_sp_area(struct sp_mapping *spm, struct sp_area *spa) { - struct rb_node **p = &sp_area_root.rb_node; + struct rb_node **p = &spm->area_root.rb_node; struct rb_node *parent = NULL;
while (*p) { @@ -1802,13 +1768,9 @@ static void __insert_sp_area(struct sp_area *spa) }
rb_link_node(&spa->rb_node, parent, p); - rb_insert_color(&spa->rb_node, &sp_area_root); + rb_insert_color(&spa->rb_node, &spm->area_root); }
-/* The sp_area cache globals are protected by sp_area_lock */ -static struct rb_node *free_sp_area_cache; -static unsigned long cached_vstart; /* affected by SP_DVPP and sp_config_dvpp_range() */ - /** * sp_alloc_area() - Allocate a region of VA from the share pool. * @size: the size of VA to allocate. @@ -1856,10 +1818,10 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, /* * Invalidate cache if we have more permissive parameters. * cached_hole_size notes the largest hole noticed _below_ - * the sp_area cached in free_sp_area_cache: if size fits + * the sp_area cached in free_area_cache: if size fits * into that hole, we want to scan from vstart to reuse - * the hole instead of allocating above free_sp_area_cache. - * Note that sp_free_area may update free_sp_area_cache + * the hole instead of allocating above free_area_cache. + * Note that sp_free_area may update free_area_cache * without updating cached_hole_size. */ if (!mapping->free_area_cache || size_align < mapping->cached_hole_size || @@ -1946,7 +1908,7 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, spa->device_id = device_id;
spa_inc_usage(spa); - __insert_sp_area(spa); + __insert_sp_area(mapping, spa); mapping->free_area_cache = &spa->rb_node; list_add_tail(&spa->link, &spg->spa_list);
@@ -1961,9 +1923,15 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, }
/* the caller should hold sp_area_lock */ -static struct sp_area *__find_sp_area_locked(unsigned long addr) +static struct sp_area *__find_sp_area_locked(struct sp_group *spg, + unsigned long addr) { - struct rb_node *n = sp_area_root.rb_node; + struct rb_node *n; + + if (addr >= MMAP_SHARE_POOL_START && addr < MMAP_SHARE_POOL_16G_START) + n = spg->normal->area_root.rb_node; + else + n = spg->dvpp->area_root.rb_node;
while (n) { struct sp_area *spa; @@ -1981,12 +1949,12 @@ static struct sp_area *__find_sp_area_locked(unsigned long addr) return NULL; }
-static struct sp_area *__find_sp_area(unsigned long addr) +static struct sp_area *__find_sp_area(struct sp_group *spg, unsigned long addr) { struct sp_area *n;
spin_lock(&sp_area_lock); - n = __find_sp_area_locked(addr); + n = __find_sp_area_locked(spg, addr); if (n) atomic_inc(&n->use_count); spin_unlock(&sp_area_lock); @@ -2011,22 +1979,30 @@ static bool vmalloc_area_clr_flag(unsigned long kva, unsigned long flags) */ static void sp_free_area(struct sp_area *spa) { + unsigned long addr = spa->va_start; + struct sp_mapping *spm; + lockdep_assert_held(&sp_area_lock);
- if (free_sp_area_cache) { + if (addr >= MMAP_SHARE_POOL_START && addr < MMAP_SHARE_POOL_16G_START) + spm = spa->spg->normal; + else + spm = spa->spg->dvpp; + + if (spm->free_area_cache) { struct sp_area *cache;
- cache = rb_entry(free_sp_area_cache, struct sp_area, rb_node); + cache = rb_entry(spm->free_area_cache, struct sp_area, rb_node); if (spa->va_start <= cache->va_start) { - free_sp_area_cache = rb_prev(&spa->rb_node); + spm->free_area_cache = rb_prev(&spa->rb_node); /* * the new cache node may be changed to another region, * i.e. from DVPP region to normal region */ - if (free_sp_area_cache) { - cache = rb_entry(free_sp_area_cache, + if (spm->free_area_cache) { + cache = rb_entry(spm->free_area_cache, struct sp_area, rb_node); - cached_vstart = cache->region_vstart; + spm->cached_vstart = cache->region_vstart; } /* * We don't try to update cached_hole_size, @@ -2041,7 +2017,7 @@ static void sp_free_area(struct sp_area *spa) spa_dec_usage(spa); list_del(&spa->link);
- rb_erase(&spa->rb_node, &sp_area_root); + rb_erase(&spa->rb_node, &spm->area_root); RB_CLEAR_NODE(&spa->rb_node); kfree(spa); } @@ -2083,7 +2059,7 @@ void sp_area_drop(struct vm_area_struct *vma) * an atomic operation. */ spin_lock(&sp_area_lock); - spa = __find_sp_area_locked(vma->vm_start); + spa = __find_sp_area_locked(vma->vm_mm->sp_group_master->local, vma->vm_start); __sp_area_drop_locked(spa); spin_unlock(&sp_area_lock); } @@ -2215,7 +2191,7 @@ static int sp_check_caller_permission(struct sp_group *spg, struct mm_struct *mm int ret = 0;
down_read(&spg->rw_lock); - if (!is_process_in_group(spg, mm)) + if (!is_local_group(spg->id) && !is_process_in_group(spg, mm)) ret = -EPERM; up_read(&spg->rw_lock); return ret; @@ -2228,6 +2204,7 @@ struct sp_free_context { unsigned long addr; struct sp_area *spa; int state; + int spg_id; };
/* when success, __sp_area_drop(spa) should be used */ @@ -2236,10 +2213,18 @@ static int sp_free_get_spa(struct sp_free_context *fc) int ret = 0; unsigned long addr = fc->addr; struct sp_area *spa; + struct sp_group *spg; + + spg = __sp_find_spg(current->tgid, fc->spg_id); + if (!spg) { + pr_debug("sp free get group failed %d\n", fc->spg_id); + return -EINVAL; + }
fc->state = FREE_CONT;
- spa = __find_sp_area(addr); + spa = __find_sp_area(spg, addr); + sp_group_drop(spg); if (!spa) { pr_debug("sp free invalid input addr %lx\n", addr); return -EINVAL; @@ -2252,46 +2237,37 @@ static int sp_free_get_spa(struct sp_free_context *fc) } fc->spa = spa;
- if (spa->spg != spg_none) { - /* - * Access control: an sp addr can only be freed by - * 1. another task in the same spg - * 2. a kthread - * - * a passthrough addr can only be freed by the applier process - */ - if (!current->mm) - goto check_spa; + if (!current->mm) + goto check_spa;
- ret = sp_check_caller_permission(spa->spg, current->mm); - if (ret < 0) - goto drop_spa; + ret = sp_check_caller_permission(spa->spg, current->mm); + if (ret < 0) + goto drop_spa;
check_spa: - down_write(&spa->spg->rw_lock); - if (!spg_valid(spa->spg)) { - fc->state = FREE_END; - up_write(&spa->spg->rw_lock); - goto drop_spa; - /* we must return success(0) in this situation */ - } - /* the life cycle of spa has a direct relation with sp group */ - if (unlikely(spa->is_dead)) { - up_write(&spa->spg->rw_lock); - pr_err_ratelimited("unexpected double sp free\n"); - dump_stack(); - ret = -EINVAL; - goto drop_spa; - } - spa->is_dead = true; - up_write(&spa->spg->rw_lock); + if (is_local_group(spa->spg->id) && (current->tgid != spa->applier)) { + ret = -EPERM; + goto drop_spa; + }
- } else { - if (current->tgid != spa->applier) { - ret = -EPERM; - goto drop_spa; - } + down_write(&spa->spg->rw_lock); + if (!spg_valid(spa->spg)) { + fc->state = FREE_END; + up_write(&spa->spg->rw_lock); + goto drop_spa; + /* we must return success(0) in this situation */ + } + /* the life cycle of spa has a direct relation with sp group */ + if (unlikely(spa->is_dead)) { + up_write(&spa->spg->rw_lock); + pr_err_ratelimited("unexpected double sp free\n"); + dump_stack(); + ret = -EINVAL; + goto drop_spa; } + spa->is_dead = true; + up_write(&spa->spg->rw_lock); + return 0;
drop_spa: @@ -2302,21 +2278,26 @@ static int sp_free_get_spa(struct sp_free_context *fc) /** * sp_free() - Free the memory allocated by sp_alloc(). * @addr: the starting VA of the memory. + * @id: Address space identifier, which is used to distinguish the addr. * * Return: * * 0 - success. * * -EINVAL - the memory can't be found or was not allocted by share pool. * * -EPERM - the caller has no permision to free the memory. */ -int sp_free(unsigned long addr) +int sp_free(unsigned long addr, int id) { int ret = 0; struct sp_free_context fc = { .addr = addr, + .spg_id = id, };
check_interrupt_context();
+ if (current->flags & PF_KTHREAD) + return -EINVAL; + ret = sp_free_get_spa(&fc); if (ret || fc.state == FREE_END) goto out; @@ -2337,9 +2318,9 @@ int sp_free(unsigned long addr) } EXPORT_SYMBOL_GPL(sp_free);
-int mg_sp_free(unsigned long addr) +int mg_sp_free(unsigned long addr, int id) { - return sp_free(addr); + return sp_free(addr, id); } EXPORT_SYMBOL_GPL(mg_sp_free);
@@ -2433,6 +2414,11 @@ static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags, if (enable_mdc_default_group) spg_id = mdc_default_group_id;
+ if (current->flags & PF_KTHREAD) { + pr_err_ratelimited("allocation failed, task is kthread\n"); + return -EINVAL; + } + if (unlikely(!size || (size >> PAGE_SHIFT) > totalram_pages())) { pr_err_ratelimited("allocation failed, invalid size %lu\n", size); return -EINVAL; @@ -2473,7 +2459,7 @@ static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags, pr_err_ratelimited("allocation failed, task not in group\n"); return -ENODEV; } - } else { /* alocation pass through scene */ + } else { /* allocation pass through scene */ spg = sp_get_local_group(current->mm); if (IS_ERR(spg)) return PTR_ERR(spg); @@ -3504,7 +3490,7 @@ EXPORT_SYMBOL_GPL(mg_sp_make_share_u2k); * * This also means we must trust DVPP channel destroy and guard worker code. */ -static int sp_unshare_uva(unsigned long uva, unsigned long size) +static int sp_unshare_uva(unsigned long uva, unsigned long size, int group_id) { int ret = 0; struct mm_struct *mm; @@ -3512,14 +3498,21 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size) unsigned long uva_aligned; unsigned long size_aligned; unsigned int page_size; + struct sp_group *spg; + + spg = __sp_find_spg(current->tgid, group_id); + if (!spg) { + pr_debug("sp unshare find group failed %d\n", group_id); + return -EINVAL; + }
/* * at first we guess it's a hugepage addr * we can tolerate at most PMD_SIZE or PAGE_SIZE which is matched in k2u */ - spa = __find_sp_area(ALIGN_DOWN(uva, PMD_SIZE)); + spa = __find_sp_area(spg, ALIGN_DOWN(uva, PMD_SIZE)); if (!spa) { - spa = __find_sp_area(ALIGN_DOWN(uva, PAGE_SIZE)); + spa = __find_sp_area(spg, ALIGN_DOWN(uva, PAGE_SIZE)); if (!spa) { ret = -EINVAL; pr_debug("invalid input uva %lx in unshare uva\n", (unsigned long)uva); @@ -3650,6 +3643,7 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size) out_drop_area: __sp_area_drop(spa); out: + sp_group_drop(spg); return ret; }
@@ -3713,9 +3707,12 @@ int sp_unshare(unsigned long va, unsigned long size, int pid, int spg_id)
check_interrupt_context();
+ if (current->flags & PF_KTHREAD) + return -EINVAL; + if (va < TASK_SIZE) { /* user address */ - ret = sp_unshare_uva(va, size); + ret = sp_unshare_uva(va, size, spg_id); } else if (va >= PAGE_OFFSET) { /* kernel address */ ret = sp_unshare_kva(va, size); @@ -3729,9 +3726,9 @@ int sp_unshare(unsigned long va, unsigned long size, int pid, int spg_id) } EXPORT_SYMBOL_GPL(sp_unshare);
-int mg_sp_unshare(unsigned long va, unsigned long size) +int mg_sp_unshare(unsigned long va, unsigned long size, int id) { - return sp_unshare(va, size, 0, 0); + return sp_unshare(va, size, 0, id); } EXPORT_SYMBOL_GPL(mg_sp_unshare);
@@ -3891,8 +3888,8 @@ int sp_node_id(struct vm_area_struct *vma) if (!sp_is_enabled()) return node_id;
- if (vma) { - spa = __find_sp_area(vma->vm_start); + if (vma && vma->vm_flags & VM_SHARE_POOL) { + spa = __find_sp_area(vma->vm_mm->sp_group_master->local, vma->vm_start); if (spa) { node_id = spa->node_id; __sp_area_drop(spa); @@ -4058,7 +4055,7 @@ static void print_process_prot(struct seq_file *seq, unsigned long prot) seq_puts(seq, "R"); else if (prot == (PROT_READ | PROT_WRITE)) seq_puts(seq, "RW"); - else /* e.g. spg_none */ + else seq_puts(seq, "-"); }
@@ -4459,7 +4456,7 @@ vm_fault_t sharepool_no_page(struct mm_struct *mm, int node_id; struct sp_area *spa;
- spa = __find_sp_area(vma->vm_start); + spa = __find_sp_area(mm->sp_group_master->local, vma->vm_start); if (!spa) { pr_err("share pool: vma is invalid, not from sp mmap\n"); return ret;
From: Zhou Guanghui zhouguanghui1@huawei.com
Offering: HULK ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S
-------------------------------------------------
The management of the address space is adjusted, and the statistical data processing of the shared pool needs to be adapted.
Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com Signed-off-by: Zhang Jian zhangjian210@huawei.com --- mm/share_pool.c | 69 ++++++++++++++++++++++++++++--------------------- 1 file changed, 40 insertions(+), 29 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 57a004674970..de9abb9ca97d 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -694,7 +694,6 @@ struct sp_area { int device_id; }; static DEFINE_SPINLOCK(sp_area_lock); -static struct rb_root sp_area_root = RB_ROOT;
static unsigned long spa_size(struct sp_area *spa) { @@ -4117,14 +4116,13 @@ int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, return 0; }
-static void rb_spa_stat_show(struct seq_file *seq) +static void spa_stat_of_mapping_show(struct seq_file *seq, struct sp_mapping *spm) { struct rb_node *node; struct sp_area *spa, *prev = NULL;
spin_lock(&sp_area_lock); - - for (node = rb_first(&sp_area_root); node; node = rb_next(node)) { + for (node = rb_first(&spm->area_root); node; node = rb_next(node)) { __sp_area_drop_locked(prev);
spa = rb_entry(node, struct sp_area, rb_node); @@ -4132,16 +4130,12 @@ static void rb_spa_stat_show(struct seq_file *seq) atomic_inc(&spa->use_count); spin_unlock(&sp_area_lock);
- if (is_local_group(spa->spg->id)) /* k2u to task */ - seq_printf(seq, "%-10s ", "None"); - else { - down_read(&spa->spg->rw_lock); - if (spg_valid(spa->spg)) /* k2u to group */ - seq_printf(seq, "%-10d ", spa->spg->id); - else /* spg is dead */ - seq_printf(seq, "%-10s ", "Dead"); - up_read(&spa->spg->rw_lock); - } + down_read(&spa->spg->rw_lock); + if (spg_valid(spa->spg)) /* k2u to group */ + seq_printf(seq, "%-10d ", spa->spg->id); + else /* spg is dead */ + seq_printf(seq, "%-10s ", "Dead"); + up_read(&spa->spg->rw_lock);
seq_printf(seq, "%2s%-14lx %2s%-14lx %-10ld ", "0x", spa->va_start, @@ -4177,6 +4171,30 @@ static void rb_spa_stat_show(struct seq_file *seq) spin_unlock(&sp_area_lock); }
+static void spa_normal_stat_show(struct seq_file *seq) +{ + spa_stat_of_mapping_show(seq, sp_mapping_normal); +} + +static int idr_spg_dvpp_stat_show_cb(int id, void *p, void *data) +{ + struct sp_group *spg = p; + struct seq_file *seq = data; + + if (!is_local_group(spg->id) || atomic_read(&spg->dvpp->user) == 1) + spa_stat_of_mapping_show(seq, spg->dvpp); + + return 0; +} + +static void spa_dvpp_stat_show(struct seq_file *seq) +{ + down_read(&sp_group_sem); + idr_for_each(&sp_group_idr, idr_spg_dvpp_stat_show_cb, seq); + up_read(&sp_group_sem); +} + + void spa_overview_show(struct seq_file *seq) { unsigned int total_num, alloc_num, k2u_task_num, k2u_spg_num; @@ -4230,12 +4248,11 @@ static int idr_spg_stat_cb(int id, void *p, void *data) struct sp_spg_stat *s = p; struct seq_file *seq = data;
- if (seq != NULL) { - if (id == 0) - seq_puts(seq, "Non Group "); - else - seq_printf(seq, "Group %6d ", id); + if (is_local_group(id) && atomic64_read(&s->size) == 0) + return 0;
+ if (seq != NULL) { + seq_printf(seq, "Group %6d ", id); seq_printf(seq, "size: %lld KB, spa num: %d, total alloc: %lld KB, normal alloc: %lld KB, huge alloc: %lld KB\n", byte2kb(atomic64_read(&s->size)), atomic_read(&s->spa_num), @@ -4243,11 +4260,7 @@ static int idr_spg_stat_cb(int id, void *p, void *data) byte2kb(atomic64_read(&s->alloc_nsize)), byte2kb(atomic64_read(&s->alloc_hsize))); } else { - if (id == 0) - pr_info("Non Group "); - else - pr_info("Group %6d ", id); - + pr_info("Group %6d ", id); pr_info("size: %lld KB, spa num: %d, total alloc: %lld KB, normal alloc: %lld KB, huge alloc: %lld KB\n", byte2kb(atomic64_read(&s->size)), atomic_read(&s->spa_num), @@ -4291,7 +4304,8 @@ static int spa_stat_show(struct seq_file *seq, void *offset) /* print the file header */ seq_printf(seq, "%-10s %-16s %-16s %-10s %-7s %-5s %-8s %-8s\n", "Group ID", "va_start", "va_end", "Size(KB)", "Type", "Huge", "PID", "Ref"); - rb_spa_stat_show(seq); + spa_normal_stat_show(seq); + spa_dvpp_stat_show(seq); return 0; }
@@ -4328,10 +4342,7 @@ static int idr_proc_stat_cb(int id, void *p, void *data) prot = get_process_prot_locked(id, mm);
seq_printf(seq, "%-8d ", tgid); - if (id == 0) - seq_printf(seq, "%-8c ", '-'); - else - seq_printf(seq, "%-8d ", id); + seq_printf(seq, "%-8d ", id); seq_printf(seq, "%-9ld %-9ld %-9ld %-10ld %-10ld %-8ld %-7ld %-7ld %-10ld ", get_spg_proc_alloc(spg_proc_stat), get_spg_proc_k2u(spg_proc_stat),
From: Wang Wensheng wangwensheng4@huawei.com
Offering: HULK ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S
-------------------------------------------------
When we destroy a vma, we first find the spa depending on the vma->vm_start, during which we should hold the sp_area_lock. While we store the spa in vma, we can get the spa directly. Don't worry if the spa exists or if it's to be freed soon, since we have increaced the refcount for the spa when it's mappend into a vma.
Signed-off-by: Wang Wensheng wangwensheng4@huawei.com --- mm/share_pool.c | 45 ++++++++++++++++----------------------------- 1 file changed, 16 insertions(+), 29 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index de9abb9ca97d..45d967a7d142 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -850,7 +850,7 @@ static inline bool check_aoscore_process(struct task_struct *tsk)
static unsigned long sp_mmap(struct mm_struct *mm, struct file *file, struct sp_area *spa, unsigned long *populate, - unsigned long prot); + unsigned long prot, struct vm_area_struct **pvma); static void sp_munmap(struct mm_struct *mm, unsigned long addr, unsigned long size);
#define K2U_NORMAL 0 @@ -1515,7 +1515,7 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) break; }
- addr = sp_mmap(mm, file, spa, &populate, prot); + addr = sp_mmap(mm, file, spa, &populate, prot, NULL); if (IS_ERR_VALUE(addr)) { sp_munmap_task_areas(mm, spg, &spa->link); up_write(&mm->mmap_lock); @@ -2045,8 +2045,6 @@ static void __sp_area_drop(struct sp_area *spa)
void sp_area_drop(struct vm_area_struct *vma) { - struct sp_area *spa; - if (!(vma->vm_flags & VM_SHARE_POOL)) return;
@@ -2058,8 +2056,7 @@ void sp_area_drop(struct vm_area_struct *vma) * an atomic operation. */ spin_lock(&sp_area_lock); - spa = __find_sp_area_locked(vma->vm_mm->sp_group_master->local, vma->vm_start); - __sp_area_drop_locked(spa); + __sp_area_drop_locked(vma->vm_private_data); spin_unlock(&sp_area_lock); }
@@ -2326,7 +2323,7 @@ EXPORT_SYMBOL_GPL(mg_sp_free); /* wrapper of __do_mmap() and the caller must hold down_write(&mm->mmap_lock). */ static unsigned long sp_mmap(struct mm_struct *mm, struct file *file, struct sp_area *spa, unsigned long *populate, - unsigned long prot) + unsigned long prot, struct vm_area_struct **pvma) { unsigned long addr = spa->va_start; unsigned long size = spa_size(spa); @@ -2334,6 +2331,7 @@ static unsigned long sp_mmap(struct mm_struct *mm, struct file *file, MAP_SHARE_POOL; unsigned long vm_flags = VM_NORESERVE | VM_SHARE_POOL | VM_DONTCOPY; unsigned long pgoff = addr_offset(spa) >> PAGE_SHIFT; + struct vm_area_struct *vma;
/* Mark the mapped region to be locked. After the MAP_LOCKED is enable, * multiple tasks will preempt resources, causing performance loss. @@ -2349,8 +2347,13 @@ static unsigned long sp_mmap(struct mm_struct *mm, struct file *file, pr_err("do_mmap fails %ld\n", addr); } else { BUG_ON(addr != spa->va_start); + vma = find_vma(mm, addr); + vma->vm_private_data = spa; + if (pvma) + *pvma = vma; }
+ return addr; }
@@ -2495,7 +2498,6 @@ static int sp_alloc_mmap(struct mm_struct *mm, struct sp_area *spa, unsigned long mmap_addr; /* pass through default permission */ unsigned long prot = PROT_READ | PROT_WRITE; - unsigned long sp_addr = spa->va_start; unsigned long populate = 0; struct vm_area_struct *vma;
@@ -2514,7 +2516,7 @@ static int sp_alloc_mmap(struct mm_struct *mm, struct sp_area *spa, prot = PROT_READ;
/* when success, mmap_addr == spa->va_start */ - mmap_addr = sp_mmap(mm, spa_file(spa), spa, &populate, prot); + mmap_addr = sp_mmap(mm, spa_file(spa), spa, &populate, prot, &vma); if (IS_ERR_VALUE(mmap_addr)) { up_write(&mm->mmap_lock); sp_alloc_unmap(mm, spa, spg_node); @@ -2530,14 +2532,6 @@ static int sp_alloc_mmap(struct mm_struct *mm, struct sp_area *spa, } ac->populate = populate;
- vma = find_vma(mm, sp_addr); - if (unlikely(!vma)) { - up_write(&mm->mmap_lock); - WARN(1, "allocation failed, can't find %lx vma\n", sp_addr); - ret = -EINVAL; - goto unmap; - } - if (ac->sp_flags & SP_PROT_RO) vma->vm_flags &= ~VM_MAYWRITE;
@@ -2837,15 +2831,12 @@ static unsigned long sp_remap_kva_to_vma(unsigned long kva, struct sp_area *spa, if (kc && kc->sp_flags & SP_PROT_RO) prot = PROT_READ;
- ret_addr = sp_mmap(mm, spa_file(spa), spa, &populate, prot); + ret_addr = sp_mmap(mm, spa_file(spa), spa, &populate, prot, &vma); if (IS_ERR_VALUE(ret_addr)) { pr_debug("k2u mmap failed %lx\n", ret_addr); goto put_mm; } - BUG_ON(ret_addr != spa->va_start);
- vma = find_vma(mm, ret_addr); - BUG_ON(vma == NULL); if (prot & PROT_WRITE) vma->vm_page_prot = __pgprot(((~PTE_RDONLY) & vma->vm_page_prot.pgprot) | PTE_DIRTY);
@@ -3887,12 +3878,9 @@ int sp_node_id(struct vm_area_struct *vma) if (!sp_is_enabled()) return node_id;
- if (vma && vma->vm_flags & VM_SHARE_POOL) { - spa = __find_sp_area(vma->vm_mm->sp_group_master->local, vma->vm_start); - if (spa) { - node_id = spa->node_id; - __sp_area_drop(spa); - } + if (vma && vma->vm_flags & VM_SHARE_POOL && vma->vm_private_data) { + spa = vma->vm_private_data; + node_id = spa->node_id; }
return node_id; @@ -4467,13 +4455,12 @@ vm_fault_t sharepool_no_page(struct mm_struct *mm, int node_id; struct sp_area *spa;
- spa = __find_sp_area(mm->sp_group_master->local, vma->vm_start); + spa = vma->vm_private_data; if (!spa) { pr_err("share pool: vma is invalid, not from sp mmap\n"); return ret; } node_id = spa->node_id; - __sp_area_drop(spa);
retry: page = find_lock_page(mapping, idx);
From: Wang Wensheng wangwensheng4@huawei.com
Offering: HULK ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S
-------------------------------------------------
There are two types of memory allocated from sharepool: passthrough memory for DVPP and shared memory. Currently, we branch to different routines depending on the memory type, both during the allocation and free process. Since we have already create a local group for passthrough memory, with just one step ahead, we could drop the redundant branches in allocation and free process and in all the fallback process when an error occurs.
Here is the content of this patch: 1. Add erery process to its local group when initilizing its group_master. 2. Avoid to return the local group in find_sp_group_id_by_pid(). 3. Delete the redundant branches during allocation and free process.
Signed-off-by: Wang Wensheng wangwensheng4@huawei.com --- mm/share_pool.c | 140 ++++++++++++++++++++++++++++++------------------ 1 file changed, 87 insertions(+), 53 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 45d967a7d142..970fe76b4972 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -244,13 +244,15 @@ static int sp_mapping_group_setup(struct mm_struct *mm, struct sp_group *spg) return 0; }
+static void free_sp_group_locked(struct sp_group *spg); +static int local_group_add_task(struct mm_struct *mm, struct sp_group *spg); static struct sp_group *create_spg(int spg_id); static void free_new_spg_id(bool new, int spg_id); /* The caller must hold sp_group_sem */ static struct sp_group_master *sp_init_group_master_locked( struct mm_struct *mm, bool *exist) { - int spg_id; + int spg_id, ret; struct sp_group *spg; struct sp_group_master *master = mm->sp_group_master;
@@ -266,16 +268,15 @@ static struct sp_group_master *sp_init_group_master_locked( spg_id = ida_alloc_range(&sp_group_id_ida, SPG_ID_LOCAL_MIN, SPG_ID_LOCAL_MAX, GFP_ATOMIC); if (spg_id < 0) { - kfree(master); pr_err_ratelimited("generate local group id failed %d\n", spg_id); - return ERR_PTR(spg_id); + ret = spg_id; + goto free_master; }
spg = create_spg(spg_id); if (IS_ERR(spg)) { - free_new_spg_id(true, spg_id); - kfree(master); - return (struct sp_group_master *)spg; + ret = PTR_ERR(spg); + goto free_spg_id; }
INIT_LIST_HEAD(&master->node_list); @@ -285,8 +286,20 @@ static struct sp_group_master *sp_init_group_master_locked( master->local = spg; mm->sp_group_master = master;
+ ret = local_group_add_task(mm, spg); + if (ret < 0) + goto free_spg; + *exist = false; return master; + +free_spg: + free_sp_group_locked(spg); +free_spg_id: + free_new_spg_id(true, spg_id); +free_master: + kfree(master); + return ERR_PTR(ret); }
static inline bool is_local_group(int spg_id) @@ -665,6 +678,8 @@ static struct sp_overall_stat sp_overall_stat;
enum spa_type { SPA_TYPE_ALLOC = 1, + /* NOTE: reorganize after the statisical structure is reconstructed. */ + SPA_TYPE_ALLOC_PRIVATE = SPA_TYPE_ALLOC, SPA_TYPE_K2TASK, SPA_TYPE_K2SPG, }; @@ -1032,7 +1047,7 @@ EXPORT_SYMBOL_GPL(sp_group_id_by_pid); */ int mg_sp_group_id_by_pid(int pid, int *spg_ids, int *num) { - int ret = 0; + int ret = 0, real_count; struct sp_group_node *node; struct sp_group_master *master = NULL; struct task_struct *tsk; @@ -1057,18 +1072,28 @@ int mg_sp_group_id_by_pid(int pid, int *spg_ids, int *num) goto out_up_read; }
- if (!master->count) { + /* + * There is a local group for each process which is used for + * passthrough allocation. The local group is a internal + * implementation for convenience and is not attempt to bother + * the user. + */ + real_count = master->count - 1; + if (real_count <= 0) { ret = -ENODEV; goto out_up_read; } - if ((unsigned int)*num < master->count) { + if ((unsigned int)*num < real_count) { ret = -E2BIG; goto out_up_read; } - *num = master->count; + *num = real_count;
- list_for_each_entry(node, &master->node_list, group_node) + list_for_each_entry(node, &master->node_list, group_node) { + if (is_local_group(node->spg->id)) + continue; *(spg_ids++) = node->spg->id; + }
out_up_read: up_read(&sp_group_sem); @@ -1256,7 +1281,7 @@ static int mm_add_group_init(struct mm_struct *mm, struct sp_group *spg) return -EEXIST; }
- if (master->count + 1 == MAX_GROUP_FOR_TASK) { + if (master->count == MAX_GROUP_FOR_TASK) { pr_err("task reaches max group num\n"); return -ENOSPC; } @@ -1300,6 +1325,29 @@ static int insert_spg_node(struct sp_group *spg, struct sp_group_node *node) return 0; }
+static int local_group_add_task(struct mm_struct *mm, struct sp_group *spg) +{ + struct sp_group_node *node; + struct spg_proc_stat *stat; + + node = create_spg_node(mm, PROT_READ | PROT_WRITE, spg); + if (IS_ERR(node)) + return PTR_ERR(node); + + /* use current just to avoid compile error, rebuild in following patch */ + stat = sp_init_process_stat(current, mm, spg); + if (IS_ERR(stat)) { + free_sp_group_locked(spg); + pr_err_ratelimited("init process stat failed %lx\n", PTR_ERR(stat)); + return PTR_ERR(stat); + } + + insert_spg_node(spg, node); + mmget(mm); + + return 0; +} + /* the caller must down_write(&spg->rw_lock) */ static void delete_spg_node(struct sp_group *spg, struct sp_group_node *node) { @@ -2171,15 +2219,10 @@ static void sp_fallocate(struct sp_area *spa)
static void sp_free_unmap_fallocate(struct sp_area *spa) { - if (!is_local_group(spa->spg->id)) { - down_read(&spa->spg->rw_lock); - __sp_free(spa->spg, spa->va_start, spa_size(spa), NULL); - sp_fallocate(spa); - up_read(&spa->spg->rw_lock); - } else { - sp_munmap(current->mm, spa->va_start, spa_size(spa)); - sp_fallocate(spa); - } + down_read(&spa->spg->rw_lock); + __sp_free(spa->spg, spa->va_start, spa_size(spa), NULL); + sp_fallocate(spa); + up_read(&spa->spg->rw_lock); }
static int sp_check_caller_permission(struct sp_group *spg, struct mm_struct *mm) @@ -2187,9 +2230,10 @@ static int sp_check_caller_permission(struct sp_group *spg, struct mm_struct *mm int ret = 0;
down_read(&spg->rw_lock); - if (!is_local_group(spg->id) && !is_process_in_group(spg, mm)) + if (!is_process_in_group(spg, mm)) ret = -EPERM; up_read(&spg->rw_lock); + return ret; }
@@ -2374,6 +2418,7 @@ struct sp_alloc_context { struct timespec64 start; struct timespec64 end; bool have_mbind; + enum spa_type type; };
static void trace_sp_alloc_begin(struct sp_alloc_context *ac) @@ -2461,10 +2506,13 @@ static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags, pr_err_ratelimited("allocation failed, task not in group\n"); return -ENODEV; } + ac->type = SPA_TYPE_ALLOC; } else { /* allocation pass through scene */ spg = sp_get_local_group(current->mm); if (IS_ERR(spg)) return PTR_ERR(spg); + down_read(&spg->rw_lock); + ac->type = SPA_TYPE_ALLOC_PRIVATE; }
if (sp_flags & SP_HUGEPAGE) { @@ -2487,8 +2535,7 @@ static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags, static void sp_alloc_unmap(struct mm_struct *mm, struct sp_area *spa, struct sp_group_node *spg_node) { - if (!is_local_group(spa->spg->id)) - __sp_free(spa->spg, spa->va_start, spa->real_size, mm); + __sp_free(spa->spg, spa->va_start, spa->real_size, mm); }
static int sp_alloc_mmap(struct mm_struct *mm, struct sp_area *spa, @@ -2543,10 +2590,7 @@ static int sp_alloc_mmap(struct mm_struct *mm, struct sp_area *spa, return ret;
unmap: - if (!is_local_group(spa->spg->id)) - sp_alloc_unmap(list_next_entry(spg_node, proc_node)->master->mm, spa, spg_node); - else - sp_munmap(mm, spa->va_start, spa->real_size); + sp_alloc_unmap(list_next_entry(spg_node, proc_node)->master->mm, spa, spg_node); return ret; }
@@ -2646,11 +2690,7 @@ static int __sp_alloc_mmap_populate(struct mm_struct *mm, struct sp_area *spa, ret = sp_alloc_populate(mm, spa, ac); if (ret) { err: - if (!is_local_group(spa->spg->id)) - sp_alloc_unmap(list_next_entry(spg_node, proc_node)->master->mm, spa, - spg_node); - else - sp_munmap(mm, spa->va_start, spa->real_size); + sp_alloc_unmap(list_next_entry(spg_node, proc_node)->master->mm, spa, spg_node);
if (unlikely(fatal_signal_pending(current))) pr_warn_ratelimited("allocation failed, current thread is killed\n"); @@ -2673,34 +2713,30 @@ static int sp_alloc_mmap_populate(struct sp_area *spa, struct mm_struct *mm; struct sp_group_node *spg_node;
- if (is_local_group(spa->spg->id)) { - ret = __sp_alloc_mmap_populate(current->mm, spa, NULL, ac); - } else { - /* create mapping for each process in the group */ - list_for_each_entry(spg_node, &spa->spg->procs, proc_node) { - mm = spg_node->master->mm; - mmap_ret = __sp_alloc_mmap_populate(mm, spa, spg_node, ac); - if (mmap_ret) { - if (ac->state != ALLOC_COREDUMP) - return mmap_ret; - ac->state = ALLOC_NORMAL; - continue; - } - ret = mmap_ret; + /* create mapping for each process in the group */ + list_for_each_entry(spg_node, &spa->spg->procs, proc_node) { + mm = spg_node->master->mm; + mmap_ret = __sp_alloc_mmap_populate(mm, spa, spg_node, ac); + if (mmap_ret) { + if (ac->state != ALLOC_COREDUMP) + return mmap_ret; + ac->state = ALLOC_NORMAL; + continue; } + ret = mmap_ret; } + return ret; }
/* spa maybe an error pointer, so introduce variable spg */ static void sp_alloc_finish(int result, struct sp_area *spa, - struct sp_alloc_context *ac) + struct sp_alloc_context *ac) { struct sp_group *spg = ac->spg;
/* match sp_alloc_prepare */ - if (!is_local_group(spg->id)) - up_read(&spg->rw_lock); + up_read(&spg->rw_lock);
if (!result) sp_update_process_stat(current, true, spa); @@ -2740,7 +2776,7 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id)
try_again: spa = sp_alloc_area(ac.size_aligned, ac.sp_flags, ac.spg, - SPA_TYPE_ALLOC, current->tgid); + ac.type, current->tgid); if (IS_ERR(spa)) { pr_err_ratelimited("alloc spa failed in allocation(potential no enough virtual memory when -75): %ld\n", PTR_ERR(spa)); @@ -4661,8 +4697,6 @@ void sp_group_post_exit(struct mm_struct *mm) } up_write(&sp_group_sem);
- if (master->local) - sp_group_drop(master->local); kfree(master); }
From: Wang Wensheng wangwensheng4@huawei.com
Offering: HULK ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S
-------------------------------------------------
A few structures must have been created when a process want to get into sharepool subsystem, including allocating sharepool memory, being added into a spg or doing k2u and so on.
Currently we create those structures just before we actually need them. For example, we find or create a sp_spa_stat after a successful memory allocation and before updating the statistical structure. The creation of a new structure may fail due to oom and we should then reclaim the memory allocated and revert all the process before. Or we just forget to do that and a potential memory-leak occurs. This design makes it confused when we indeed create a structure and we always worry about potential memory-leak when we changes the code around it.
A better solution is to initialize all that structures at the same time when a process join in sharepool subsystem. And in future, we will clear the unnecessary statistical structures.
Signed-off-by: Wang Wensheng wangwensheng4@huawei.com --- include/linux/share_pool.h | 4 - mm/share_pool.c | 279 ++++++++++++++++--------------------- 2 files changed, 117 insertions(+), 166 deletions(-)
diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index 7b536b30907d..64ea7f688de9 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -506,10 +506,6 @@ static inline struct sp_proc_stat *sp_get_proc_stat_ref(struct mm_struct *mm) return NULL; }
-static inline void sp_proc_stat_drop(struct sp_proc_stat *stat) -{ -} - static inline void spa_overview_show(struct seq_file *seq) { } diff --git a/mm/share_pool.c b/mm/share_pool.c index 970fe76b4972..8f5ecea44f08 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -244,33 +244,22 @@ static int sp_mapping_group_setup(struct mm_struct *mm, struct sp_group *spg) return 0; }
-static void free_sp_group_locked(struct sp_group *spg); -static int local_group_add_task(struct mm_struct *mm, struct sp_group *spg); static struct sp_group *create_spg(int spg_id); static void free_new_spg_id(bool new, int spg_id); -/* The caller must hold sp_group_sem */ -static struct sp_group_master *sp_init_group_master_locked( - struct mm_struct *mm, bool *exist) +static void free_sp_group_locked(struct sp_group *spg); +static int local_group_add_task(struct mm_struct *mm, struct sp_group *spg); +static int init_local_group(struct mm_struct *mm) { int spg_id, ret; struct sp_group *spg; + struct sp_mapping *spm; struct sp_group_master *master = mm->sp_group_master;
- if (master) { - *exist = true; - return master; - } - - master = kmalloc(sizeof(struct sp_group_master), GFP_KERNEL); - if (master == NULL) - return ERR_PTR(-ENOMEM); - spg_id = ida_alloc_range(&sp_group_id_ida, SPG_ID_LOCAL_MIN, SPG_ID_LOCAL_MAX, GFP_ATOMIC); if (spg_id < 0) { pr_err_ratelimited("generate local group id failed %d\n", spg_id); - ret = spg_id; - goto free_master; + return spg_id; }
spg = create_spg(spg_id); @@ -279,60 +268,73 @@ static struct sp_group_master *sp_init_group_master_locked( goto free_spg_id; }
- INIT_LIST_HEAD(&master->node_list); - master->count = 0; - master->stat = NULL; - master->mm = mm; master->local = spg; - mm->sp_group_master = master; + spm = sp_mapping_create(SP_MAPPING_DVPP); + if (IS_ERR(spm)) { + ret = PTR_ERR(spm); + goto free_spg; + } + sp_mapping_attach(master->local, spm); + sp_mapping_attach(master->local, sp_mapping_normal);
ret = local_group_add_task(mm, spg); if (ret < 0) + /* The spm would be released while destroying the spg*/ goto free_spg;
- *exist = false; - return master; + return 0;
free_spg: free_sp_group_locked(spg); + master->local = NULL; free_spg_id: free_new_spg_id(true, spg_id); -free_master: - kfree(master); - return ERR_PTR(ret); -}
-static inline bool is_local_group(int spg_id) -{ - return spg_id >= SPG_ID_LOCAL_MIN && spg_id <= SPG_ID_LOCAL_MAX; + return ret; }
-/* - * If the process is added to a group first, the address space of the local - * group of the process must have been set. If the process is not added to - * a group, directly create or attach the process to the corresponding DVPP - * and normal address space. - */ -static int sp_mapping_group_setup_local(struct mm_struct *mm) +static void sp_proc_stat_drop(struct sp_proc_stat *stat); +static int sp_init_proc_stat(struct mm_struct *mm, struct task_struct *tsk); +/* The caller must hold sp_group_sem */ +static int sp_init_group_master_locked(struct task_struct *tsk, struct mm_struct *mm) { + int ret; struct sp_group_master *master; - struct sp_mapping *spm; - bool exist = false; - - master = sp_init_group_master_locked(mm, &exist); - if (IS_ERR(master)) - return PTR_ERR(master);
- if (master->local->dvpp) + if (mm->sp_group_master) return 0;
- spm = sp_mapping_create(SP_MAPPING_DVPP); - if (IS_ERR(spm)) - return PTR_ERR(spm); - sp_mapping_attach(master->local, spm); - sp_mapping_attach(master->local, sp_mapping_normal); + master = kmalloc(sizeof(struct sp_group_master), GFP_KERNEL); + if (!master) + return -ENOMEM; + + INIT_LIST_HEAD(&master->node_list); + master->count = 0; + master->mm = mm; + mm->sp_group_master = master; + + ret = sp_init_proc_stat(mm, tsk); + if (ret) + goto free_master; + + ret = init_local_group(mm); + if (ret) + goto put_stat;
return 0; + +put_stat: + sp_proc_stat_drop(master->stat); +free_master: + mm->sp_group_master = NULL; + kfree(master); + + return ret; +} + +static inline bool is_local_group(int spg_id) +{ + return spg_id >= SPG_ID_LOCAL_MIN && spg_id <= SPG_ID_LOCAL_MAX; }
static struct sp_group *sp_get_local_group(struct mm_struct *mm) @@ -350,7 +352,7 @@ static struct sp_group *sp_get_local_group(struct mm_struct *mm) up_read(&sp_group_sem);
down_write(&sp_group_sem); - ret = sp_mapping_group_setup_local(mm); + ret = sp_init_group_master_locked(current, mm); if (ret) { up_write(&sp_group_sem); return ERR_PTR(ret); @@ -398,37 +400,29 @@ static struct sp_proc_stat *create_proc_stat(struct mm_struct *mm, return stat; }
-static struct sp_proc_stat *sp_init_proc_stat(struct sp_group_master *master, - struct mm_struct *mm, struct task_struct *tsk) +static int sp_init_proc_stat(struct mm_struct *mm, struct task_struct *tsk) { struct sp_proc_stat *stat; int alloc_id, tgid = tsk->tgid; - - down_write(&sp_proc_stat_sem); - stat = master->stat; - if (stat) { - up_write(&sp_proc_stat_sem); - return stat; - } + struct sp_group_master *master = mm->sp_group_master;
stat = create_proc_stat(mm, tsk); - if (IS_ERR(stat)) { - up_write(&sp_proc_stat_sem); - return stat; - } + if (IS_ERR(stat)) + return PTR_ERR(stat);
+ down_write(&sp_proc_stat_sem); alloc_id = idr_alloc(&sp_proc_stat_idr, stat, tgid, tgid + 1, GFP_KERNEL); if (alloc_id < 0) { up_write(&sp_proc_stat_sem); pr_err_ratelimited("proc stat idr alloc failed %d\n", alloc_id); kfree(stat); - return ERR_PTR(alloc_id); + return alloc_id; }
master->stat = stat; up_write(&sp_proc_stat_sem);
- return stat; + return 0; }
static void update_spg_stat_alloc(unsigned long size, bool inc, @@ -542,18 +536,14 @@ static struct spg_proc_stat *create_spg_proc_stat(int tgid, int spg_id) return stat; }
-static struct spg_proc_stat *sp_init_spg_proc_stat( - struct sp_proc_stat *proc_stat, int tgid, struct sp_group *spg) +static struct spg_proc_stat *sp_init_spg_proc_stat(struct sp_proc_stat *proc_stat, + struct sp_group *spg) { struct spg_proc_stat *stat; int spg_id = spg->id; /* visit spg id locklessly */ struct sp_spg_stat *spg_stat = spg->stat;
- stat = find_spg_proc_stat(proc_stat, tgid, spg_id); - if (stat) - return stat; - - stat = create_spg_proc_stat(tgid, spg_id); + stat = create_spg_proc_stat(proc_stat->tgid, spg_id); if (IS_ERR(stat)) return stat;
@@ -570,31 +560,6 @@ static struct spg_proc_stat *sp_init_spg_proc_stat( return stat; }
-/* - * The caller must - * 1. ensure no concurrency problem for task_struct and mm_struct. - * 2. hold sp_group_sem for sp_group_master (pay attention to ABBA deadlock) - */ -static struct spg_proc_stat *sp_init_process_stat(struct task_struct *tsk, - struct mm_struct *mm, struct sp_group *spg) -{ - struct sp_group_master *master; - bool exist; - struct sp_proc_stat *proc_stat; - struct spg_proc_stat *spg_proc_stat; - - master = sp_init_group_master_locked(mm, &exist); - if (IS_ERR(master)) - return (struct spg_proc_stat *)master; - - proc_stat = sp_init_proc_stat(master, mm, tsk); - if (IS_ERR(proc_stat)) - return (struct spg_proc_stat *)proc_stat; - - spg_proc_stat = sp_init_spg_proc_stat(proc_stat, tsk->tgid, spg); - return spg_proc_stat; -} - static struct sp_spg_stat *create_spg_stat(int spg_id) { struct sp_spg_stat *stat; @@ -841,9 +806,9 @@ static void sp_update_process_stat(struct task_struct *tsk, bool inc, enum spa_type type = spa->type;
down_write(&sp_group_sem); - stat = sp_init_process_stat(tsk, tsk->mm, spa->spg); + stat = find_spg_proc_stat(tsk->mm->sp_group_master->stat, tsk->tgid, spa->spg->id); up_write(&sp_group_sem); - if (unlikely(IS_ERR(stat))) + if (!stat) return;
update_spg_proc_stat(size, inc, stat, type); @@ -1264,26 +1229,27 @@ static void sp_munmap_task_areas(struct mm_struct *mm, struct sp_group *spg, str }
/* the caller must hold sp_group_sem */ -static int mm_add_group_init(struct mm_struct *mm, struct sp_group *spg) +static int mm_add_group_init(struct task_struct *tsk, struct mm_struct *mm, + struct sp_group *spg) { - struct sp_group_master *master = mm->sp_group_master; - bool exist = false; - - master = sp_init_group_master_locked(mm, &exist); - if (IS_ERR(master)) - return PTR_ERR(master); - - if (!exist) - return 0; + int ret; + struct sp_group_master *master;
- if (is_process_in_group(spg, mm)) { - pr_err_ratelimited("task already in target group, id=%d\n", spg->id); - return -EEXIST; - } + if (!mm->sp_group_master) { + ret = sp_init_group_master_locked(tsk, mm); + if (ret) + return ret; + } else { + if (is_process_in_group(spg, mm)) { + pr_err_ratelimited("task already in target group, id=%d\n", spg->id); + return -EEXIST; + }
- if (master->count == MAX_GROUP_FOR_TASK) { - pr_err("task reaches max group num\n"); - return -ENOSPC; + master = mm->sp_group_master; + if (master->count == MAX_GROUP_FOR_TASK) { + pr_err("task reaches max group num\n"); + return -ENOSPC; + } }
return 0; @@ -1322,29 +1288,13 @@ static int insert_spg_node(struct sp_group *spg, struct sp_group_node *node)
spg->proc_num++; list_add_tail(&node->proc_node, &spg->procs); - return 0; -} - -static int local_group_add_task(struct mm_struct *mm, struct sp_group *spg) -{ - struct sp_group_node *node; - struct spg_proc_stat *stat; - - node = create_spg_node(mm, PROT_READ | PROT_WRITE, spg); - if (IS_ERR(node)) - return PTR_ERR(node); - - /* use current just to avoid compile error, rebuild in following patch */ - stat = sp_init_process_stat(current, mm, spg); - if (IS_ERR(stat)) { - free_sp_group_locked(spg); - pr_err_ratelimited("init process stat failed %lx\n", PTR_ERR(stat)); - return PTR_ERR(stat); - } - - insert_spg_node(spg, node); - mmget(mm);
+ /* + * The only way where sp_init_spg_proc_stat got failed is that there is no + * memory for sp_spg_stat. We will avoid this failure when we put sp_spg_stat + * into sp_group_node later. + */ + sp_init_spg_proc_stat(node->master->stat, spg); return 0; }
@@ -1367,6 +1317,20 @@ static void free_spg_node(struct mm_struct *mm, struct sp_group *spg, kfree(spg_node); }
+static int local_group_add_task(struct mm_struct *mm, struct sp_group *spg) +{ + struct sp_group_node *node; + + node = create_spg_node(mm, PROT_READ | PROT_WRITE, spg); + if (IS_ERR(node)) + return PTR_ERR(node); + + insert_spg_node(spg, node); + mmget(mm); + + return 0; +} + /** * sp_group_add_task() - Add a process to an share group (sp_group). * @pid: the pid of the task to be added. @@ -1391,7 +1355,6 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) int ret = 0; bool id_newly_generated = false; struct sp_area *spa, *prev = NULL; - struct spg_proc_stat *stat;
check_interrupt_context();
@@ -1494,29 +1457,27 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) } }
- ret = mm_add_group_init(mm, spg); - if (ret) + + down_write(&spg->rw_lock); + ret = mm_add_group_init(tsk, mm, spg); + if (ret) { + up_write(&spg->rw_lock); goto out_drop_group; + }
ret = sp_mapping_group_setup(mm, spg); - if (ret) + if (ret) { + up_write(&spg->rw_lock); goto out_drop_group; + }
node = create_spg_node(mm, prot, spg); if (unlikely(IS_ERR(node))) { + up_write(&spg->rw_lock); ret = PTR_ERR(node); goto out_drop_spg_node; }
- /* per process statistics initialization */ - stat = sp_init_process_stat(tsk, mm, spg); - if (IS_ERR(stat)) { - ret = PTR_ERR(stat); - pr_err_ratelimited("init process stat failed %lx\n", PTR_ERR(stat)); - goto out_drop_spg_node; - } - - down_write(&spg->rw_lock); ret = insert_spg_node(spg, node); if (unlikely(ret)) { up_write(&spg->rw_lock); @@ -1768,7 +1729,7 @@ int sp_id_of_current(void)
down_read(&sp_group_sem); master = current->mm->sp_group_master; - if (master && master->local) { + if (master) { spg_id = master->local->id; up_read(&sp_group_sem); return spg_id; @@ -1776,7 +1737,7 @@ int sp_id_of_current(void) up_read(&sp_group_sem);
down_write(&sp_group_sem); - ret = sp_mapping_group_setup_local(current->mm); + ret = sp_init_group_master_locked(current, current->mm); if (ret) { up_write(&sp_group_sem); return ret; @@ -2935,7 +2896,7 @@ static void *sp_make_share_kva_to_task(unsigned long kva, unsigned long size, un struct sp_group *spg;
down_write(&sp_group_sem); - ret = sp_mapping_group_setup_local(current->mm); + ret = sp_init_group_master_locked(current, current->mm); if (ret) { up_write(&sp_group_sem); pr_err_ratelimited("k2u_task init local mapping failed %d\n", ret); @@ -2943,13 +2904,7 @@ static void *sp_make_share_kva_to_task(unsigned long kva, unsigned long size, un }
spg = current->mm->sp_group_master->local; - stat = sp_init_process_stat(current, current->mm, spg); - if (IS_ERR(stat)) { - up_write(&sp_group_sem); - pr_err_ratelimited("k2u_task init process stat failed %lx\n", - PTR_ERR(stat)); - return stat; - } + stat = find_spg_proc_stat(current->mm->sp_group_master->stat, current->tgid, spg->id); up_write(&sp_group_sem);
spa = sp_alloc_area(size, sp_flags, spg, SPA_TYPE_K2TASK, current->tgid); @@ -3969,7 +3924,7 @@ static void free_sp_proc_stat(struct sp_proc_stat *stat) }
/* the caller make sure stat is not NULL */ -void sp_proc_stat_drop(struct sp_proc_stat *stat) +static void sp_proc_stat_drop(struct sp_proc_stat *stat) { if (atomic_dec_and_test(&stat->use_count)) free_sp_proc_stat(stat);
From: Wang Wensheng wangwensheng4@huawei.com
Offering: HULK ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S
-------------------------------------------------
1. Add a list for sp_mapping to record all the sp_groups attached to it. 2. Initialize the sp_mapping for local_group when it is created. So when we add a task to a group, we should merge the dvpp mapping of the local group. 3. Every two groups can be merged if and only if at least one of them is empty. Then the empty mapping would be dropped and another mapping would be attached to the two groups. This need to traverse all the groups attached to the mapping. 4. A mapping is considered empty when no spa is allocated from it and its address space is default.
Signed-off-by: Wang Wensheng wangwensheng4@huawei.com --- include/linux/share_pool.h | 9 ++++-- mm/share_pool.c | 65 +++++++++++++++++++++++--------------- 2 files changed, 47 insertions(+), 27 deletions(-)
diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index 64ea7f688de9..2ae1b75e5aba 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -116,6 +116,9 @@ struct sp_mapping { struct rb_node *free_area_cache; unsigned long cached_hole_size; unsigned long cached_vstart; + + /* list head for all groups attached to this mapping, dvpp mapping only */ + struct list_head group_head; };
/* Processes in the same sp_group can share memory. @@ -159,8 +162,10 @@ struct sp_group { atomic_t use_count; /* protect the group internal elements, except spa_list */ struct rw_semaphore rw_lock; - struct sp_mapping *dvpp; - struct sp_mapping *normal; + /* list node for dvpp mapping */ + struct list_head mnode; + struct sp_mapping *dvpp; + struct sp_mapping *normal; };
/* a per-process(per mm) struct which manages a sp_group_node list */ diff --git a/mm/share_pool.c b/mm/share_pool.c index 8f5ecea44f08..778f2658b191 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -163,6 +163,7 @@ static struct sp_mapping *sp_mapping_create(unsigned long flag) sp_mapping_range_init(spm); atomic_set(&spm->user, 0); spm->area_root = RB_ROOT; + INIT_LIST_HEAD(&spm->group_head);
return spm; } @@ -175,18 +176,45 @@ static void sp_mapping_destroy(struct sp_mapping *spm) static void sp_mapping_attach(struct sp_group *spg, struct sp_mapping *spm) { atomic_inc(&spm->user); - if (spm->flag & SP_MAPPING_DVPP) + if (spm->flag & SP_MAPPING_DVPP) { spg->dvpp = spm; - else if (spm->flag & SP_MAPPING_NORMAL) + list_add_tail(&spg->mnode, &spm->group_head); + } else if (spm->flag & SP_MAPPING_NORMAL) spg->normal = spm; }
static void sp_mapping_detach(struct sp_group *spg, struct sp_mapping *spm) { - if (spm && atomic_dec_and_test(&spm->user)) + if (!spm) + return; + if (spm->flag & SP_MAPPING_DVPP) + list_del(&spg->mnode); + if (atomic_dec_and_test(&spm->user)) sp_mapping_destroy(spm); }
+/* merge old mapping to new, and the old mapping would be destroyed */ +static void sp_mapping_merge(struct sp_mapping *new, struct sp_mapping *old) +{ + struct sp_group *spg, *tmp; + + if (new == old) + return; + + list_for_each_entry_safe(spg, tmp, &old->group_head, mnode) { + list_move_tail(&spg->mnode, &new->group_head); + spg->dvpp = new; + } + + atomic_add(atomic_read(&old->user), &new->user); + sp_mapping_destroy(old); +} + +static bool is_mapping_empty(struct sp_mapping *spm) +{ + return RB_EMPTY_ROOT(&spm->area_root); +} + /* * When you set the address space of a group, the normal address space * is globally unified. When processing the DVPP address space, consider @@ -211,32 +239,18 @@ static int sp_mapping_group_setup(struct mm_struct *mm, struct sp_group *spg) { struct sp_group_master *master = mm->sp_group_master; struct sp_group *local = master->local; - struct sp_mapping *spm;
if (!list_empty(&spg->procs)) { - /* 1 */ - if (local->dvpp && local->dvpp != spg->dvpp) { - pr_info_ratelimited("Duplicate address space, id=%d\n", - spg->id); - return 0; - } - - /* 2 */ - if (!local->dvpp) { - sp_mapping_attach(local, spg->dvpp); - sp_mapping_attach(local, spg->normal); + if (is_mapping_empty(local->dvpp)) + sp_mapping_merge(spg->dvpp, local->dvpp); + else if (is_mapping_empty(spg->dvpp)) + sp_mapping_merge(local->dvpp, spg->dvpp); + else { + pr_info_ratelimited("Duplicate address space, id=%d\n", spg->id); + return -EINVAL; } } else { - /* 4 */ - if (!local->dvpp) { - spm = sp_mapping_create(SP_MAPPING_DVPP); - if (IS_ERR(spm)) - return PTR_ERR(spm); - sp_mapping_attach(local, spm); - sp_mapping_attach(local, sp_mapping_normal); - } - - /* 3 */ + /* the mapping of local group is always set */ sp_mapping_attach(spg, local->dvpp); sp_mapping_attach(spg, sp_mapping_normal); } @@ -1132,6 +1146,7 @@ static struct sp_group *create_spg(int spg_id) atomic_set(&spg->use_count, 1); INIT_LIST_HEAD(&spg->procs); INIT_LIST_HEAD(&spg->spa_list); + INIT_LIST_HEAD(&spg->mnode); init_rwsem(&spg->rw_lock);
sprintf(name, "sp_group_%d", spg_id);
From: Wang Wensheng wangwensheng4@huawei.com
Offering: HULK ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S
-------------------------------------------------
When SPG_NOD_DVPP is specified to sp_group_add_task, we don't create a DVPP mapping for the newly created sp_group. And the new group cannot support allocating DVPP memory.
Signed-off-by: Wang Wensheng wangwensheng4@huawei.com --- include/linux/share_pool.h | 7 +++-- mm/share_pool.c | 60 ++++++++++++++++++++------------------ 2 files changed, 36 insertions(+), 31 deletions(-)
diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index 2ae1b75e5aba..25b84d995619 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -42,6 +42,9 @@ #define SPG_ID_LOCAL_MIN 200001 #define SPG_ID_LOCAL_MAX 299999
+#define SPG_FLAG_NON_DVPP (1 << 0) +#define SPG_FLAG_MASK (SPG_FLAG_NON_DVPP) + #define MAX_DEVID 8 /* the max num of Da-vinci devices */
extern int sysctl_share_pool_hugepage_enable; @@ -145,6 +148,7 @@ struct sp_mapping { */ struct sp_group { int id; + unsigned long flag; struct file *file; struct file *file_hugetlb; /* number of process in this group */ @@ -285,9 +289,6 @@ extern bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, in extern bool is_sharepool_addr(unsigned long addr); extern bool mg_is_sharepool_addr(unsigned long addr);
-extern int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id); -extern int sp_group_add_task(int pid, int spg_id); - extern int sp_id_of_current(void); extern int mg_sp_id_of_current(void);
diff --git a/mm/share_pool.c b/mm/share_pool.c index 778f2658b191..3a1d99b8a515 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -216,31 +216,20 @@ static bool is_mapping_empty(struct sp_mapping *spm) }
/* - * When you set the address space of a group, the normal address space - * is globally unified. When processing the DVPP address space, consider - * the following situations: - * 1. If a process is added to a non-new group, the DVPP address space - * must have been created. If the local group of the process also - * contains the DVPP address space and they are different, this - * scenario is not allowed to avoid address conflict. - * 2. If the DVPP address space does not exist in the local group of the - * process, attach the local group of the process to the DVPP address - * space of the group. - * 3. Add a new group. If the process has applied for the dvpp address - * space (sp_alloc or k2u), attach the new group to the dvpp address - * space of the current process. - * 4. If the process has not applied for the DVPP address space, attach - * the new group and the local group of the current process to the - * newly created DVPP address space. - * + * 1. The mappings of local group is set on creating. + * 2. This is used to setup the mapping for groups created during add_task. + * 3. The normal mapping exists for all groups. + * 4. The dvpp mappings for the new group and local group can merge _iff_ at + * least one of the mapping is empty. * the caller must hold sp_group_sem + * NOTE: undo the mergeing when the later process failed. */ static int sp_mapping_group_setup(struct mm_struct *mm, struct sp_group *spg) { struct sp_group_master *master = mm->sp_group_master; struct sp_group *local = master->local;
- if (!list_empty(&spg->procs)) { + if (!list_empty(&spg->procs) && !(spg->flag & SPG_FLAG_NON_DVPP)) { if (is_mapping_empty(local->dvpp)) sp_mapping_merge(spg->dvpp, local->dvpp); else if (is_mapping_empty(spg->dvpp)) @@ -250,15 +239,17 @@ static int sp_mapping_group_setup(struct mm_struct *mm, struct sp_group *spg) return -EINVAL; } } else { - /* the mapping of local group is always set */ - sp_mapping_attach(spg, local->dvpp); - sp_mapping_attach(spg, sp_mapping_normal); + if (!(spg->flag & SPG_FLAG_NON_DVPP)) + /* the mapping of local group is always set */ + sp_mapping_attach(spg, local->dvpp); + if (!spg->normal) + sp_mapping_attach(spg, sp_mapping_normal); }
return 0; }
-static struct sp_group *create_spg(int spg_id); +static struct sp_group *create_spg(int spg_id, unsigned long flag); static void free_new_spg_id(bool new, int spg_id); static void free_sp_group_locked(struct sp_group *spg); static int local_group_add_task(struct mm_struct *mm, struct sp_group *spg); @@ -276,7 +267,7 @@ static int init_local_group(struct mm_struct *mm) return spg_id; }
- spg = create_spg(spg_id); + spg = create_spg(spg_id, 0); if (IS_ERR(spg)) { ret = PTR_ERR(spg); goto free_spg_id; @@ -1114,7 +1105,7 @@ static loff_t addr_offset(struct sp_area *spa) return (loff_t)(addr - sp_dev_va_start[spa->device_id]); }
-static struct sp_group *create_spg(int spg_id) +static struct sp_group *create_spg(int spg_id, unsigned long flag) { int ret; struct sp_group *spg; @@ -1128,6 +1119,11 @@ static struct sp_group *create_spg(int spg_id) return ERR_PTR(-ENOSPC); }
+ if (flag & ~SPG_FLAG_MASK) { + pr_err_ratelimited("invalid flag:%#lx\n", flag); + return ERR_PTR(-EINVAL); + } + spg = kzalloc(sizeof(*spg), GFP_KERNEL); if (spg == NULL) return ERR_PTR(-ENOMEM); @@ -1140,6 +1136,7 @@ static struct sp_group *create_spg(int spg_id) }
spg->id = spg_id; + spg->flag = flag; spg->is_alive = true; spg->proc_num = 0; spg->owner = current->group_leader; @@ -1187,14 +1184,14 @@ static struct sp_group *create_spg(int spg_id) }
/* the caller must hold sp_group_sem */ -static struct sp_group *find_or_alloc_sp_group(int spg_id) +static struct sp_group *find_or_alloc_sp_group(int spg_id, unsigned long flag) { struct sp_group *spg;
spg = __sp_find_spg_locked(current->pid, spg_id);
if (!spg) { - spg = create_spg(spg_id); + spg = create_spg(spg_id, flag); } else { down_read(&spg->rw_lock); if (!spg_valid(spg)) { @@ -1347,10 +1344,11 @@ static int local_group_add_task(struct mm_struct *mm, struct sp_group *spg) }
/** - * sp_group_add_task() - Add a process to an share group (sp_group). + * mg_sp_group_add_task() - Add a process to an share group (sp_group). * @pid: the pid of the task to be added. * @prot: the prot of task for this spg. * @spg_id: the ID of the sp_group. + * @flag: to give some special message. * * A process can't be added to more than one sp_group in single group mode * and can in multiple group mode. @@ -1363,6 +1361,7 @@ static int local_group_add_task(struct mm_struct *mm, struct sp_group *spg) */ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) { + unsigned long flag = 0; struct task_struct *tsk; struct mm_struct *mm; struct sp_group *spg; @@ -1456,7 +1455,7 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) goto out_put_task; }
- spg = find_or_alloc_sp_group(spg_id); + spg = find_or_alloc_sp_group(spg_id, flag); if (IS_ERR(spg)) { up_write(&sp_group_sem); ret = PTR_ERR(spg); @@ -1830,6 +1829,11 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, else mapping = spg->normal;
+ if (!mapping) { + pr_err_ratelimited("non DVPP spg, id %d\n", spg->id); + return ERR_PTR(-EINVAL); + } + vstart = mapping->start[device_id]; vend = mapping->end[device_id]; spa = __kmalloc_node(sizeof(struct sp_area), GFP_KERNEL, node_id);
From: Wang Wensheng wangwensheng4@huawei.com
Offering: HULK ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S
-------------------------------------------------
Currently the dvpp range is global for each device. And it is unreasonable after the reconstruction that makes the DVPP mappings private to each process or group.
This allows to configure the dvpp range for each process. The dvpp range for each dvpp mapping can only be configured once just as the old version.
Signed-off-by: Wang Wensheng wangwensheng4@huawei.com --- mm/share_pool.c | 66 +++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 58 insertions(+), 8 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 3a1d99b8a515..66d02929741c 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -215,6 +215,17 @@ static bool is_mapping_empty(struct sp_mapping *spm) return RB_EMPTY_ROOT(&spm->area_root); }
+static bool can_mappings_merge(struct sp_mapping *m1, struct sp_mapping *m2) +{ + int i; + + for (i = 0; i < sp_device_number; i++) + if (m1->start[i] != m2->start[i] || m1->end[i] != m2->end[i]) + return false; + + return true; +} + /* * 1. The mappings of local group is set on creating. * 2. This is used to setup the mapping for groups created during add_task. @@ -230,6 +241,11 @@ static int sp_mapping_group_setup(struct mm_struct *mm, struct sp_group *spg) struct sp_group *local = master->local;
if (!list_empty(&spg->procs) && !(spg->flag & SPG_FLAG_NON_DVPP)) { + if (!can_mappings_merge(local->dvpp, spg->dvpp)) { + pr_info_ratelimited("address space conflict, id=%d\n", spg->id); + return -EINVAL; + } + if (is_mapping_empty(local->dvpp)) sp_mapping_merge(spg->dvpp, local->dvpp); else if (is_mapping_empty(spg->dvpp)) @@ -3836,16 +3852,50 @@ EXPORT_SYMBOL_GPL(sp_unregister_notifier); */ bool sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) { - if (pid < 0 || - size <= 0 || size > MMAP_SHARE_POOL_16G_SIZE || - device_id < 0 || device_id >= sp_device_number || - !is_online_node_id(device_id) || - is_sp_dev_addr_enabled(device_id)) + int ret; + bool err = false; + struct task_struct *tsk; + struct mm_struct *mm; + struct sp_group *spg; + struct sp_mapping *spm; + unsigned long default_start; + + /* NOTE: check the start address */ + if (pid < 0 || size <= 0 || size > MMAP_SHARE_POOL_16G_SIZE || + device_id < 0 || device_id >= sp_device_number || !is_online_node_id(device_id)) return false;
- sp_dev_va_start[device_id] = start; - sp_dev_va_size[device_id] = size; - return true; + ret = get_task(pid, &tsk); + if (ret) + return false; + + mm = get_task_mm(tsk->group_leader); + if (!mm) + goto put_task; + + spg = sp_get_local_group(mm); + if (IS_ERR(spg)) + goto put_mm; + + spm = spg->dvpp; + default_start = MMAP_SHARE_POOL_16G_START + device_id * MMAP_SHARE_POOL_16G_SIZE; + /* The dvpp range of each group can be configured only once */ + if (spm->start[device_id] != default_start) + goto put_spg; + + spm->start[device_id] = start; + spm->end[device_id] = start + size; + + err = true; + +put_spg: + sp_group_drop(spg); +put_mm: + mmput(mm); +put_task: + put_task_struct(tsk); + + return err; } EXPORT_SYMBOL_GPL(sp_config_dvpp_range);
From: Wang Wensheng wangwensheng4@huawei.com
Offering: HULK ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S
-------------------------------------------------
The user doesn't care about the start address of the dvpp range, what is mattered is that the virtual space tagged DVPP located at in a 16G range. So we can safely drop the dvpp address space as long as it's empty during merging process.
Signed-off-by: Wang Wensheng wangwensheng4@huawei.com --- mm/share_pool.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 66d02929741c..e1d16143cdd9 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -241,16 +241,25 @@ static int sp_mapping_group_setup(struct mm_struct *mm, struct sp_group *spg) struct sp_group *local = master->local;
if (!list_empty(&spg->procs) && !(spg->flag & SPG_FLAG_NON_DVPP)) { - if (!can_mappings_merge(local->dvpp, spg->dvpp)) { - pr_info_ratelimited("address space conflict, id=%d\n", spg->id); - return -EINVAL; - } + /* + * Don't return an error when the mappings' address range conflict. + * As long as the mapping is unused, we can drop the empty mapping. + * This may change the address range for the task or group implicitly, + * give a warn for it. + */ + bool is_conflict = !can_mappings_merge(local->dvpp, spg->dvpp);
- if (is_mapping_empty(local->dvpp)) + if (is_mapping_empty(local->dvpp)) { sp_mapping_merge(spg->dvpp, local->dvpp); - else if (is_mapping_empty(spg->dvpp)) + if (is_conflict) + pr_warn_ratelimited("task address space conflict, spg_id=%d\n", + spg->id); + } else if (is_mapping_empty(spg->dvpp)) { sp_mapping_merge(local->dvpp, spg->dvpp); - else { + if (is_conflict) + pr_warn_ratelimited("group address space conflict, spg_id=%d\n", + spg->id); + } else { pr_info_ratelimited("Duplicate address space, id=%d\n", spg->id); return -EINVAL; }
From: Wang Wensheng wangwensheng4@huawei.com
Offering: HULK ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S
-------------------------------------------------
sp_get_local_group() could be invoked in kthread, where the current process isn't the process we want. Add a parameter and let the caller to avoid this problem.
Signed-off-by: Wang Wensheng wangwensheng4@huawei.com --- mm/share_pool.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index e1d16143cdd9..c1a00a5ac948 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -367,7 +367,7 @@ static inline bool is_local_group(int spg_id) return spg_id >= SPG_ID_LOCAL_MIN && spg_id <= SPG_ID_LOCAL_MAX; }
-static struct sp_group *sp_get_local_group(struct mm_struct *mm) +static struct sp_group *sp_get_local_group(struct task_struct *tsk, struct mm_struct *mm) { int ret; struct sp_group_master *master; @@ -382,7 +382,7 @@ static struct sp_group *sp_get_local_group(struct mm_struct *mm) up_read(&sp_group_sem);
down_write(&sp_group_sem); - ret = sp_init_group_master_locked(current, mm); + ret = sp_init_group_master_locked(tsk, mm); if (ret) { up_write(&sp_group_sem); return ERR_PTR(ret); @@ -2513,7 +2513,7 @@ static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags, } ac->type = SPA_TYPE_ALLOC; } else { /* allocation pass through scene */ - spg = sp_get_local_group(current->mm); + spg = sp_get_local_group(current, current->mm); if (IS_ERR(spg)) return PTR_ERR(spg); down_read(&spg->rw_lock); @@ -3882,7 +3882,7 @@ bool sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) if (!mm) goto put_task;
- spg = sp_get_local_group(mm); + spg = sp_get_local_group(tsk, mm); if (IS_ERR(spg)) goto put_mm;
From: Wang Wensheng wangwensheng4@huawei.com
Offering: HULK ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S
--------------------------------------------------
We should forbid the usage of sharepool interfaces if sharepool is not enabled. Or undefined behaviour would panic the kernel.
Signed-off-by: Wang Wensheng wangwensheng4@huawei.com --- mm/share_pool.c | 45 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index c1a00a5ac948..db6b8237c7f9 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -1012,6 +1012,9 @@ int sp_group_id_by_pid(int pid) struct sp_group *spg; int spg_id = -ENODEV;
+ if (!sp_is_enabled()) + return -EOPNOTSUPP; + check_interrupt_context();
spg = __sp_find_spg(pid, SPG_ID_DEFAULT); @@ -1047,6 +1050,9 @@ int mg_sp_group_id_by_pid(int pid, int *spg_ids, int *num) struct sp_group_master *master = NULL; struct task_struct *tsk;
+ if (!sp_is_enabled()) + return -EOPNOTSUPP; + check_interrupt_context();
if (!spg_ids || num <= 0) @@ -1395,6 +1401,9 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) bool id_newly_generated = false; struct sp_area *spa, *prev = NULL;
+ if (!sp_is_enabled()) + return -EOPNOTSUPP; + check_interrupt_context();
/* only allow READ, READ | WRITE */ @@ -1672,6 +1681,9 @@ int mg_sp_group_del_task(int pid, int spg_id) struct mm_struct *mm = NULL; bool is_alive = true;
+ if (!sp_is_enabled()) + return -EOPNOTSUPP; + if (spg_id < SPG_ID_MIN || spg_id > SPG_ID_AUTO) { pr_err_ratelimited("del from group failed, invalid group id %d\n", spg_id); return -EINVAL; @@ -1763,6 +1775,9 @@ int sp_id_of_current(void) int ret, spg_id; struct sp_group_master *master;
+ if (!sp_is_enabled()) + return -EOPNOTSUPP; + if (current->flags & PF_KTHREAD || !current->mm) return -EINVAL;
@@ -2338,6 +2353,9 @@ int sp_free(unsigned long addr, int id) .spg_id = id, };
+ if (!sp_is_enabled()) + return -EOPNOTSUPP; + check_interrupt_context();
if (current->flags & PF_KTHREAD) @@ -2775,6 +2793,9 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) int ret = 0; struct sp_alloc_context ac;
+ if (!sp_is_enabled()) + return ERR_PTR(-EOPNOTSUPP); + ret = sp_alloc_prepare(size, sp_flags, spg_id, &ac); if (ret) return ERR_PTR(ret); @@ -3155,6 +3176,9 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size, int ret; struct sp_k2u_context kc;
+ if (!sp_is_enabled()) + return ERR_PTR(-EOPNOTSUPP); + check_interrupt_context();
ret = sp_k2u_prepare(kva, size, sp_flags, spg_id, &kc); @@ -3442,6 +3466,9 @@ void *sp_make_share_u2k(unsigned long uva, unsigned long size, int pid) struct sp_walk_data sp_walk_data; struct vm_struct *area;
+ if (!sp_is_enabled()) + return ERR_PTR(-EOPNOTSUPP); + check_interrupt_context();
if (mm == NULL) { @@ -3730,6 +3757,9 @@ int sp_unshare(unsigned long va, unsigned long size, int pid, int spg_id) { int ret = 0;
+ if (!sp_is_enabled()) + return -EOPNOTSUPP; + check_interrupt_context();
if (current->flags & PF_KTHREAD) @@ -3775,6 +3805,9 @@ int sp_walk_page_range(unsigned long uva, unsigned long size, struct mm_struct *mm; int ret = 0;
+ if (!sp_is_enabled()) + return -EOPNOTSUPP; + check_interrupt_context();
if (unlikely(!sp_walk_data)) { @@ -3820,6 +3853,9 @@ EXPORT_SYMBOL_GPL(mg_sp_walk_page_range); */ void sp_walk_page_free(struct sp_walk_data *sp_walk_data) { + if (!sp_is_enabled()) + return; + check_interrupt_context();
if (!sp_walk_data) @@ -3869,6 +3905,9 @@ bool sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) struct sp_mapping *spm; unsigned long default_start;
+ if (!sp_is_enabled()) + return false; + /* NOTE: check the start address */ if (pid < 0 || size <= 0 || size > MMAP_SHARE_POOL_16G_SIZE || device_id < 0 || device_id >= sp_device_number || !is_online_node_id(device_id)) @@ -3929,7 +3968,8 @@ static bool is_sp_normal_addr(unsigned long addr) */ bool is_sharepool_addr(unsigned long addr) { - return is_sp_normal_addr(addr) || is_device_addr(addr); + return sp_is_enabled() && + (is_sp_normal_addr(addr) || is_device_addr(addr)); } EXPORT_SYMBOL_GPL(is_sharepool_addr);
@@ -4126,6 +4166,9 @@ int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, unsigned long anon, file, shmem, total_rss, prot; long sp_res, sp_res_nsize, non_sp_res, non_sp_shm;
+ if (!sp_is_enabled()) + return 0; + if (!mm) return 0;
From: Wang Wensheng wangwensheng4@huawei.com
Offering: HULK ascend inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S
-------------------------------------------------
We use device_id to select the correct dvpp vspace range when SP_DVPP flag is specified.
Signed-off-by: Wang Wensheng wangwensheng4@huawei.com --- mm/share_pool.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index db6b8237c7f9..9c5b8e7fd6b7 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -3096,10 +3096,11 @@ static int sp_k2u_prepare(unsigned long kva, unsigned long size,
trace_sp_k2u_begin(kc);
- if (sp_flags & ~SP_DVPP) { + if (sp_flags & ~SP_FLAG_MASK) { pr_err_ratelimited("k2u sp_flags %lx error\n", sp_flags); return -EINVAL; } + sp_flags &= ~SP_HUGEPAGE;
if (!current->mm) { pr_err_ratelimited("k2u: kthread is not allowed\n");
From: Yuan Can yuancan@huawei.com
Offering: HULK ascend inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S
------------------------------------------------------
create_spg_node() may fail with NULL pointer returened, and in the out_drop_spg_node path, the NULL pointer will be dereferenced in free_spg_node().
Signed-off-by: Yuan Can yuancan@huawei.com --- mm/share_pool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 9c5b8e7fd6b7..e14753da004d 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -1523,7 +1523,7 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) if (unlikely(IS_ERR(node))) { up_write(&spg->rw_lock); ret = PTR_ERR(node); - goto out_drop_spg_node; + goto out_drop_group; }
ret = insert_spg_node(spg, node);
From: Guo Mengqi guomengqi3@huawei.com
Offering: HULK ascend inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S
-----------------------------------
In sp_mmap(), if use offset = va - MMAP_BASE/DVPP_BASE, then normal sp_alloc pgoff may have same value with DVPP pgoff, causing DVPP and sp_alloc mapped to overlapped part of file unexpectedly.
To fix the problem, pass VA value as mmap offset, for in this scenario, VA value in one task address space will not be same.
Signed-off-by: Guo Mengqi guomengqi3@huawei.com --- mm/share_pool.c | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index e14753da004d..2c29edae135c 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -58,6 +58,11 @@
#define spg_valid(spg) ((spg)->is_alive == true)
+/* Use spa va address as mmap offset. This can work because spa_file + * is setup with 64-bit address space. So va shall be well covered. + */ +#define addr_offset(spa) ((spa)->va_start) + #define byte2kb(size) ((size) >> 10) #define byte2mb(size) ((size) >> 20) #define page2kb(page_num) ((page_num) << (PAGE_SHIFT - 10)) @@ -1120,22 +1125,6 @@ static bool is_device_addr(unsigned long addr) return false; }
-static loff_t addr_offset(struct sp_area *spa) -{ - unsigned long addr; - - if (unlikely(!spa)) { - WARN(1, "invalid spa when calculate addr offset\n"); - return 0; - } - addr = spa->va_start; - - if (!is_device_addr(addr)) - return (loff_t)(addr - MMAP_SHARE_POOL_START); - - return (loff_t)(addr - sp_dev_va_start[spa->device_id]); -} - static struct sp_group *create_spg(int spg_id, unsigned long flag) { int ret;
From: Wang Wensheng wangwensheng4@huawei.com
Offering: HULK ascend inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S
-------------------------------------------------
The sp_group_master structure is used only in sharepool subsys and no other drivers use it.
Signed-off-by: Wang Wensheng wangwensheng4@huawei.com --- include/linux/share_pool.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index 25b84d995619..1911cd35843b 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -10,6 +10,7 @@ #include <linux/hashtable.h> #include <linux/numa.h> #include <linux/jump_label.h> +#include <linux/kabi.h>
#define SP_HUGEPAGE (1 << 0) #define SP_HUGEPAGE_ONLY (1 << 1) @@ -187,7 +188,7 @@ struct sp_group_master { * Used to apply for the shared pool memory of the current process. * For example, sp_alloc non-share memory or k2task. */ - struct sp_group *local; + KABI_EXTEND(struct sp_group *local) };
/*
From: Guo Mengqi guomengqi3@huawei.com
Offering: HULK hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5J0YW
--------------------------------
Sharepool owns an statistics system which allow user to check the memory use easily. The statistics codes are quite independent from the major functions. However, the realization is very similar with the major functions, which doubles the lock use and cause nesting problems.
Thus we remove the statistics system, and put all the statistics into raw data structures as built-in statistics. The user api did not change. This can greatly reduce the complexity of locks, as well as remove hundred lines of redundant codes.
Signed-off-by: Guo Mengqi guomengqi3@huawei.com --- include/linux/share_pool.h | 7 +- mm/share_pool.c | 843 ++++++++++++++++--------------------- 2 files changed, 375 insertions(+), 475 deletions(-)
diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index 1911cd35843b..c2ef26661a4f 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -65,6 +65,7 @@ extern int sysctl_sp_perf_alloc;
extern int sysctl_sp_perf_k2u;
+#ifdef __GENKSYMS__ /* we estimate an sp-group ususally contains at most 64 sp-group */ #define SP_SPG_HASH_BITS 6
@@ -206,6 +207,7 @@ struct sp_group_node { struct sp_group *spg; unsigned long prot; }; +#endif
struct sp_walk_data { struct page **pages; @@ -508,11 +510,6 @@ static inline bool mg_is_sharepool_addr(unsigned long addr) return false; }
-static inline struct sp_proc_stat *sp_get_proc_stat_ref(struct mm_struct *mm) -{ - return NULL; -} - static inline void spa_overview_show(struct seq_file *seq) { } diff --git a/mm/share_pool.c b/mm/share_pool.c index 2c29edae135c..aef3cec4eca8 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -117,19 +117,175 @@ static DEFINE_IDA(sp_group_id_ida);
/*** Statistical and maintenance tools ***/
-/* idr of all sp_proc_stats */ -static DEFINE_IDR(sp_proc_stat_idr); -/* rw semaphore for sp_proc_stat_idr */ -static DECLARE_RWSEM(sp_proc_stat_sem); - -/* idr of all sp_spg_stats */ -static DEFINE_IDR(sp_spg_stat_idr); -/* rw semaphore for sp_spg_stat_idr */ -static DECLARE_RWSEM(sp_spg_stat_sem); +/* list of all sp_group_masters */ +static LIST_HEAD(master_list); +/* mutex to protect insert/delete ops from master_list */ +static DEFINE_MUTEX(master_list_lock);
/* for kthread buff_module_guard_work */ static struct sp_proc_stat kthread_stat;
+#ifndef __GENKSYMS__ +struct sp_spg_stat { + int spg_id; + /* record the number of hugepage allocation failures */ + atomic_t hugepage_failures; + /* number of sp_area */ + atomic_t spa_num; + /* total size of all sp_area from sp_alloc and k2u */ + atomic64_t size; + /* total size of all sp_area from sp_alloc 0-order page */ + atomic64_t alloc_nsize; + /* total size of all sp_area from sp_alloc hugepage */ + atomic64_t alloc_hsize; + /* total size of all sp_area from ap_alloc */ + atomic64_t alloc_size; + /* total size of all sp_area from sp_k2u */ + atomic64_t k2u_size; +}; + +/* per process memory usage statistics indexed by tgid */ +struct sp_proc_stat { + int tgid; + struct mm_struct *mm; + char comm[TASK_COMM_LEN]; + /* + * alloc amount minus free amount, may be negative when freed by + * another task in the same sp group. + */ + atomic64_t alloc_size; + atomic64_t alloc_nsize; + atomic64_t alloc_hsize; + atomic64_t k2u_size; +}; + +/* per process/sp-group memory usage statistics */ +struct spg_proc_stat { + int tgid; + int spg_id; /* 0 for non-group data, such as k2u_task */ + /* + * alloc amount minus free amount, may be negative when freed by + * another task in the same sp group. + */ + atomic64_t alloc_size; + atomic64_t alloc_nsize; + atomic64_t alloc_hsize; + atomic64_t k2u_size; +}; + +/* + * address space management + */ +struct sp_mapping { + unsigned long flag; + atomic_t user; + unsigned long start[MAX_DEVID]; + unsigned long end[MAX_DEVID]; + struct rb_root area_root; + + struct rb_node *free_area_cache; + unsigned long cached_hole_size; + unsigned long cached_vstart; + + /* list head for all groups attached to this mapping, dvpp mapping only */ + struct list_head group_head; +}; + +/* Processes in the same sp_group can share memory. + * Memory layout for share pool: + * + * |-------------------- 8T -------------------|---|------ 8T ------------| + * | Device 0 | Device 1 |...| | + * |----------------------------------------------------------------------| + * |------------- 16G -------------| 16G | | | + * | DVPP GROUP0 | DVPP GROUP1 | ... | ... |...| sp normal memory | + * | sp | sp | | | | | + * |----------------------------------------------------------------------| + * + * The host SVM feature reserves 8T virtual memory by mmap, and due to the + * restriction of DVPP, while SVM and share pool will both allocate memory + * for DVPP, the memory have to be in the same 32G range. + * + * Share pool reserves 16T memory, with 8T for normal uses and 8T for DVPP. + * Within this 8T DVPP memory, SVM will call sp_config_dvpp_range() to + * tell us which 16G memory range is reserved for share pool . + * + * In some scenarios where there is no host SVM feature, share pool uses + * the default 8G memory setting for DVPP. + */ +struct sp_group { + int id; + unsigned long flag; + struct file *file; + struct file *file_hugetlb; + /* number of process in this group */ + int proc_num; + /* list head of processes (sp_group_node, each represents a process) */ + struct list_head procs; + /* list head of sp_area. it is protected by spin_lock sp_area_lock */ + struct list_head spa_list; + /* group statistics */ + struct sp_spg_stat instat; + /* we define the creator process of a sp_group as owner */ + struct task_struct *owner; + /* is_alive == false means it's being destroyed */ + bool is_alive; + atomic_t use_count; + /* protect the group internal elements, except spa_list */ + struct rw_semaphore rw_lock; + /* list node for dvpp mapping */ + struct list_head mnode; + struct sp_mapping *dvpp; + struct sp_mapping *normal; +}; + +/* a per-process(per mm) struct which manages a sp_group_node list */ +struct sp_group_master { + /* + * number of sp groups the process belongs to, + * a.k.a the number of sp_node in node_list + */ + unsigned int count; + /* list head of sp_node */ + struct list_head node_list; + struct mm_struct *mm; + /* + * Used to apply for the shared pool memory of the current process. + * For example, sp_alloc non-share memory or k2task. + */ + struct sp_group *local; + struct sp_proc_stat instat; + struct list_head list_node; +}; + +/* + * each instance represents an sp group the process belongs to + * sp_group_master : sp_group_node = 1 : N + * sp_group_node->spg : sp_group = 1 : 1 + * sp_group_node : sp_group->procs = N : 1 + */ +struct sp_group_node { + /* list node in sp_group->procs */ + struct list_head proc_node; + /* list node in sp_group_maseter->node_list */ + struct list_head group_node; + struct sp_group_master *master; + struct sp_group *spg; + unsigned long prot; + struct spg_proc_stat instat; +}; +#endif + +/* The caller should hold mmap_sem to protect master (TBD) */ +static void sp_init_group_master_stat(struct mm_struct *mm, struct sp_proc_stat *stat) +{ + atomic64_set(&stat->alloc_nsize, 0); + atomic64_set(&stat->alloc_hsize, 0); + atomic64_set(&stat->k2u_size, 0); + stat->mm = mm; + get_task_comm(stat->comm, current); +} + #define SP_MAPPING_DVPP 0x1 #define SP_MAPPING_NORMAL 0x2 static struct sp_mapping *sp_mapping_normal; @@ -328,8 +484,6 @@ static int init_local_group(struct mm_struct *mm) return ret; }
-static void sp_proc_stat_drop(struct sp_proc_stat *stat); -static int sp_init_proc_stat(struct mm_struct *mm, struct task_struct *tsk); /* The caller must hold sp_group_sem */ static int sp_init_group_master_locked(struct task_struct *tsk, struct mm_struct *mm) { @@ -346,20 +500,19 @@ static int sp_init_group_master_locked(struct task_struct *tsk, struct mm_struct INIT_LIST_HEAD(&master->node_list); master->count = 0; master->mm = mm; + sp_init_group_master_stat(mm, &master->instat); mm->sp_group_master = master;
- ret = sp_init_proc_stat(mm, tsk); - if (ret) - goto free_master; + mutex_lock(&master_list_lock); + list_add_tail(&master->list_node, &master_list); + mutex_unlock(&master_list_lock);
ret = init_local_group(mm); if (ret) - goto put_stat; + goto free_master;
return 0;
-put_stat: - sp_proc_stat_drop(master->stat); free_master: mm->sp_group_master = NULL; kfree(master); @@ -399,67 +552,6 @@ static struct sp_group *sp_get_local_group(struct task_struct *tsk, struct mm_st return master->local; }
-static struct sp_proc_stat *sp_get_proc_stat(struct mm_struct *mm) -{ - struct sp_proc_stat *stat; - - if (!mm->sp_group_master) - return NULL; - - down_read(&sp_proc_stat_sem); - stat = mm->sp_group_master->stat; - up_read(&sp_proc_stat_sem); - - /* maybe NULL or not, we always return it */ - return stat; -} - -static struct sp_proc_stat *create_proc_stat(struct mm_struct *mm, - struct task_struct *tsk) -{ - struct sp_proc_stat *stat; - - stat = kmalloc(sizeof(*stat), GFP_KERNEL); - if (stat == NULL) - return ERR_PTR(-ENOMEM); - - atomic_set(&stat->use_count, 1); - atomic64_set(&stat->alloc_size, 0); - atomic64_set(&stat->k2u_size, 0); - stat->tgid = tsk->tgid; - stat->mm = mm; - mutex_init(&stat->lock); - hash_init(stat->hash); - get_task_comm(stat->comm, tsk); - - return stat; -} - -static int sp_init_proc_stat(struct mm_struct *mm, struct task_struct *tsk) -{ - struct sp_proc_stat *stat; - int alloc_id, tgid = tsk->tgid; - struct sp_group_master *master = mm->sp_group_master; - - stat = create_proc_stat(mm, tsk); - if (IS_ERR(stat)) - return PTR_ERR(stat); - - down_write(&sp_proc_stat_sem); - alloc_id = idr_alloc(&sp_proc_stat_idr, stat, tgid, tgid + 1, GFP_KERNEL); - if (alloc_id < 0) { - up_write(&sp_proc_stat_sem); - pr_err_ratelimited("proc stat idr alloc failed %d\n", alloc_id); - kfree(stat); - return alloc_id; - } - - master->stat = stat; - up_write(&sp_proc_stat_sem); - - return 0; -} - static void update_spg_stat_alloc(unsigned long size, bool inc, bool huge, struct sp_spg_stat *stat) { @@ -496,158 +588,64 @@ static void update_spg_stat_k2u(unsigned long size, bool inc, } }
-/* per process/sp-group memory usage statistics */ -struct spg_proc_stat { - int tgid; - int spg_id; /* 0 for non-group data, such as k2u_task */ - struct hlist_node pnode; /* hlist node in sp_proc_stat->hash */ - struct hlist_node gnode; /* hlist node in sp_spg_stat->hash */ - struct sp_proc_stat *proc_stat; - struct sp_spg_stat *spg_stat; - /* - * alloc amount minus free amount, may be negative when freed by - * another task in the same sp group. - */ - atomic64_t alloc_size; - atomic64_t k2u_size; -}; - -static void update_spg_proc_stat_alloc(unsigned long size, bool inc, - struct spg_proc_stat *stat) +static void update_mem_usage_alloc(unsigned long size, bool inc, + bool is_hugepage, struct sp_group_node *spg_node) { - struct sp_proc_stat *proc_stat = stat->proc_stat; + struct sp_proc_stat *proc_stat = &spg_node->master->instat;
if (inc) { - atomic64_add(size, &stat->alloc_size); - atomic64_add(size, &proc_stat->alloc_size); - } else { - atomic64_sub(size, &stat->alloc_size); - atomic64_sub(size, &proc_stat->alloc_size); + if (is_hugepage) { + atomic64_add(size, &spg_node->instat.alloc_hsize); + atomic64_add(size, &proc_stat->alloc_hsize); + return; + } + atomic64_add(size, &spg_node->instat.alloc_nsize); + atomic64_add(size, &proc_stat->alloc_nsize); + return; } + + if (is_hugepage) { + atomic64_sub(size, &spg_node->instat.alloc_hsize); + atomic64_sub(size, &proc_stat->alloc_hsize); + return; + } + atomic64_sub(size, &spg_node->instat.alloc_nsize); + atomic64_sub(size, &proc_stat->alloc_nsize); + return; }
-static void update_spg_proc_stat_k2u(unsigned long size, bool inc, - struct spg_proc_stat *stat) +static void update_mem_usage_k2u(unsigned long size, bool inc, + struct sp_group_node *spg_node) { - struct sp_proc_stat *proc_stat = stat->proc_stat; + struct sp_proc_stat *proc_stat = &spg_node->master->instat;
if (inc) { - atomic64_add(size, &stat->k2u_size); + atomic64_add(size, &spg_node->instat.k2u_size); atomic64_add(size, &proc_stat->k2u_size); } else { - atomic64_sub(size, &stat->k2u_size); + atomic64_sub(size, &spg_node->instat.k2u_size); atomic64_sub(size, &proc_stat->k2u_size); } }
-static struct spg_proc_stat *find_spg_proc_stat( - struct sp_proc_stat *proc_stat, int tgid, int spg_id) -{ - struct spg_proc_stat *stat = NULL; - - mutex_lock(&proc_stat->lock); - hash_for_each_possible(proc_stat->hash, stat, pnode, spg_id) { - if (stat->spg_id == spg_id) - break; - } - mutex_unlock(&proc_stat->lock); - - return stat; -} - -static struct spg_proc_stat *create_spg_proc_stat(int tgid, int spg_id) +static void sp_init_spg_proc_stat(struct spg_proc_stat *stat, int spg_id) { - struct spg_proc_stat *stat; - - stat = kmalloc(sizeof(struct spg_proc_stat), GFP_KERNEL); - if (stat == NULL) - return ERR_PTR(-ENOMEM); - - stat->tgid = tgid; + stat->tgid = current->tgid; stat->spg_id = spg_id; - atomic64_set(&stat->alloc_size, 0); + atomic64_set(&stat->alloc_nsize, 0); + atomic64_set(&stat->alloc_hsize, 0); atomic64_set(&stat->k2u_size, 0); - - return stat; -} - -static struct spg_proc_stat *sp_init_spg_proc_stat(struct sp_proc_stat *proc_stat, - struct sp_group *spg) -{ - struct spg_proc_stat *stat; - int spg_id = spg->id; /* visit spg id locklessly */ - struct sp_spg_stat *spg_stat = spg->stat; - - stat = create_spg_proc_stat(proc_stat->tgid, spg_id); - if (IS_ERR(stat)) - return stat; - - stat->proc_stat = proc_stat; - stat->spg_stat = spg_stat; - - mutex_lock(&proc_stat->lock); - hash_add(proc_stat->hash, &stat->pnode, stat->spg_id); - mutex_unlock(&proc_stat->lock); - - mutex_lock(&spg_stat->lock); - hash_add(spg_stat->hash, &stat->gnode, stat->tgid); - mutex_unlock(&spg_stat->lock); - return stat; }
-static struct sp_spg_stat *create_spg_stat(int spg_id) +static void sp_init_group_stat(struct sp_spg_stat *stat) { - struct sp_spg_stat *stat; - - stat = kmalloc(sizeof(*stat), GFP_KERNEL); - if (stat == NULL) - return ERR_PTR(-ENOMEM); - - stat->spg_id = spg_id; atomic_set(&stat->hugepage_failures, 0); atomic_set(&stat->spa_num, 0); atomic64_set(&stat->size, 0); atomic64_set(&stat->alloc_nsize, 0); atomic64_set(&stat->alloc_hsize, 0); atomic64_set(&stat->alloc_size, 0); - mutex_init(&stat->lock); - hash_init(stat->hash); - - return stat; -} - -static int sp_init_spg_stat(struct sp_group *spg) -{ - struct sp_spg_stat *stat; - int ret, spg_id = spg->id; - - stat = create_spg_stat(spg_id); - if (IS_ERR(stat)) - return PTR_ERR(stat); - - down_write(&sp_spg_stat_sem); - ret = idr_alloc(&sp_spg_stat_idr, stat, spg_id, spg_id + 1, - GFP_KERNEL); - up_write(&sp_spg_stat_sem); - if (ret < 0) { - pr_err_ratelimited("group %d idr alloc failed, ret %d\n", - spg_id, ret); - kfree(stat); - } - - spg->stat = stat; - return ret; -} - -static void free_spg_stat(int spg_id) -{ - struct sp_spg_stat *stat; - - down_write(&sp_spg_stat_sem); - stat = idr_remove(&sp_spg_stat_idr, spg_id); - up_write(&sp_spg_stat_sem); - WARN_ON(!stat); - kfree(stat); + atomic64_set(&stat->k2u_size, 0); }
/* statistics of all sp area, protected by sp_area_lock */ @@ -735,17 +733,17 @@ static void spa_inc_usage(struct sp_area *spa) case SPA_TYPE_ALLOC: spa_stat.alloc_num += 1; spa_stat.alloc_size += size; - update_spg_stat_alloc(size, true, is_huge, spa->spg->stat); + update_spg_stat_alloc(size, true, is_huge, &spa->spg->instat); break; case SPA_TYPE_K2TASK: spa_stat.k2u_task_num += 1; spa_stat.k2u_task_size += size; - update_spg_stat_k2u(size, true, spa->spg->stat); + update_spg_stat_k2u(size, true, &spa->spg->instat); break; case SPA_TYPE_K2SPG: spa_stat.k2u_spg_num += 1; spa_stat.k2u_spg_size += size; - update_spg_stat_k2u(size, true, spa->spg->stat); + update_spg_stat_k2u(size, true, &spa->spg->instat); break; default: WARN(1, "invalid spa type"); @@ -781,17 +779,17 @@ static void spa_dec_usage(struct sp_area *spa) case SPA_TYPE_ALLOC: spa_stat.alloc_num -= 1; spa_stat.alloc_size -= size; - update_spg_stat_alloc(size, false, is_huge, spa->spg->stat); + update_spg_stat_alloc(size, false, is_huge, &spa->spg->instat); break; case SPA_TYPE_K2TASK: spa_stat.k2u_task_num -= 1; spa_stat.k2u_task_size -= size; - update_spg_stat_k2u(size, false, spa->spg->stat); + update_spg_stat_k2u(size, false, &spa->spg->instat); break; case SPA_TYPE_K2SPG: spa_stat.k2u_spg_num -= 1; spa_stat.k2u_spg_size -= size; - update_spg_stat_k2u(size, false, spa->spg->stat); + update_spg_stat_k2u(size, false, &spa->spg->instat); break; default: WARN(1, "invalid spa type"); @@ -811,42 +809,52 @@ static void spa_dec_usage(struct sp_area *spa) } }
-static void update_spg_proc_stat(unsigned long size, bool inc, - struct spg_proc_stat *stat, enum spa_type type) +static void update_mem_usage(unsigned long size, bool inc, bool is_hugepage, + struct sp_group_node *spg_node, enum spa_type type) { - if (unlikely(!stat)) { + if (unlikely(!spg_node)) { sp_dump_stack(); - WARN(1, "null process stat\n"); + WARN(1, "null sp group node\n"); return; }
switch (type) { case SPA_TYPE_ALLOC: - update_spg_proc_stat_alloc(size, inc, stat); + update_mem_usage_alloc(size, inc, is_hugepage, spg_node); break; case SPA_TYPE_K2TASK: case SPA_TYPE_K2SPG: - update_spg_proc_stat_k2u(size, inc, stat); + update_mem_usage_k2u(size, inc, spg_node); break; default: WARN(1, "invalid stat type\n"); } }
+struct sp_group_node *find_spg_node_by_spg(struct mm_struct *mm, + struct sp_group *spg) +{ + struct sp_group_node *spg_node; + + list_for_each_entry(spg_node, &mm->sp_group_master->node_list, group_node) { + if (spg_node->spg == spg) + return spg_node; + } + return NULL; +} + static void sp_update_process_stat(struct task_struct *tsk, bool inc, struct sp_area *spa) { - struct spg_proc_stat *stat; + struct sp_group_node *spg_node; unsigned long size = spa->real_size; enum spa_type type = spa->type;
- down_write(&sp_group_sem); - stat = find_spg_proc_stat(tsk->mm->sp_group_master->stat, tsk->tgid, spa->spg->id); - up_write(&sp_group_sem); - if (!stat) - return; - - update_spg_proc_stat(size, inc, stat, type); + spg_node = find_spg_node_by_spg(tsk->mm, spa->spg); + if (!spg_node) + pr_err("share pool: spg node not found!\n"); + else + update_mem_usage(size, inc, spa->is_hugepage, spg_node, type); }
static inline void check_interrupt_context(void) @@ -905,7 +913,6 @@ static void free_sp_group_locked(struct sp_group *spg) { fput(spg->file); fput(spg->file_hugetlb); - free_spg_stat(spg->id); idr_remove(&sp_group_idr, spg->id); free_sp_group_id((unsigned int)spg->id); sp_mapping_detach(spg, spg->dvpp); @@ -1165,6 +1172,7 @@ static struct sp_group *create_spg(int spg_id, unsigned long flag) INIT_LIST_HEAD(&spg->spa_list); INIT_LIST_HEAD(&spg->mnode); init_rwsem(&spg->rw_lock); + sp_init_group_stat(&spg->instat);
sprintf(name, "sp_group_%d", spg_id); spg->file = shmem_kernel_file_setup(name, MAX_LFS_FILESIZE, @@ -1184,16 +1192,10 @@ static struct sp_group *create_spg(int spg_id, unsigned long flag) goto out_fput; }
- ret = sp_init_spg_stat(spg); - if (ret < 0) - goto out_fput_all; - if (!is_local_group(spg_id)) system_group_count++; return spg;
-out_fput_all: - fput(spg->file_hugetlb); out_fput: fput(spg->file); out_idr: @@ -1303,6 +1305,7 @@ static struct sp_group_node *create_spg_node(struct mm_struct *mm, spg_node->spg = spg; spg_node->master = master; spg_node->prot = prot; + sp_init_spg_proc_stat(&spg_node->instat, spg->id);
list_add_tail(&spg_node->group_node, &master->node_list); master->count++; @@ -1321,12 +1324,6 @@ static int insert_spg_node(struct sp_group *spg, struct sp_group_node *node) spg->proc_num++; list_add_tail(&node->proc_node, &spg->procs);
- /* - * The only way where sp_init_spg_proc_stat got failed is that there is no - * memory for sp_spg_stat. We will avoid this failure when we put sp_spg_stat - * into sp_group_node later. - */ - sp_init_spg_proc_stat(node->master->stat, spg); return 0; }
@@ -1501,6 +1498,7 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) up_write(&spg->rw_lock); goto out_drop_group; } + mm->sp_group_master->instat.tgid = tsk->tgid;
ret = sp_mapping_group_setup(mm, spg); if (ret) { @@ -1628,27 +1626,6 @@ int sp_group_add_task(int pid, int spg_id) } EXPORT_SYMBOL_GPL(sp_group_add_task);
-static void free_spg_proc_stat(struct mm_struct *mm, int spg_id) -{ - int i; - struct sp_proc_stat *proc_stat = sp_get_proc_stat(mm); - struct spg_proc_stat *stat; - struct sp_spg_stat *spg_stat; - struct hlist_node *tmp; - - hash_for_each_safe(proc_stat->hash, i, tmp, stat, pnode) { - if (stat->spg_id == spg_id) { - spg_stat = stat->spg_stat; - mutex_lock(&spg_stat->lock); - hash_del(&stat->gnode); - mutex_unlock(&spg_stat->lock); - hash_del(&stat->pnode); - kfree(stat); - break; - } - } -} - /** * mg_sp_group_del_task() - delete a process from a sp group. * @pid: the pid of the task to be deleted @@ -1740,7 +1717,6 @@ int mg_sp_group_del_task(int pid, int spg_id) return -EINVAL; }
- free_spg_proc_stat(mm, spg_id); up_write(&sp_group_sem);
out_put_mm: @@ -2608,14 +2584,12 @@ static int sp_alloc_mmap(struct mm_struct *mm, struct sp_area *spa,
static void sp_alloc_fallback(struct sp_area *spa, struct sp_alloc_context *ac) { - struct sp_spg_stat *stat = ac->spg->stat; - if (ac->file == ac->spg->file) { ac->state = ALLOC_NOMEM; return; }
- atomic_inc(&stat->hugepage_failures); + atomic_inc(&ac->spg->instat.hugepage_failures); if (!(ac->sp_flags & SP_HUGEPAGE_ONLY)) { ac->file = ac->spg->file; ac->size_aligned = ALIGN(ac->size, PAGE_SIZE); @@ -2944,7 +2918,7 @@ static void *sp_make_share_kva_to_task(unsigned long kva, unsigned long size, un int ret; void *uva; struct sp_area *spa; - struct spg_proc_stat *stat; + struct sp_group_node *spg_node; unsigned long prot = PROT_READ | PROT_WRITE; struct sp_k2u_context kc; struct sp_group *spg; @@ -2958,7 +2932,6 @@ static void *sp_make_share_kva_to_task(unsigned long kva, unsigned long size, un }
spg = current->mm->sp_group_master->local; - stat = find_spg_proc_stat(current->mm->sp_group_master->stat, current->tgid, spg->id); up_write(&sp_group_sem);
spa = sp_alloc_area(size, sp_flags, spg, SPA_TYPE_K2TASK, current->tgid); @@ -2975,7 +2948,11 @@ static void *sp_make_share_kva_to_task(unsigned long kva, unsigned long size, un if (IS_ERR(uva)) pr_err("remap k2u to task failed %ld\n", PTR_ERR(uva)); else { - update_spg_proc_stat(size, true, stat, SPA_TYPE_K2TASK); + spg_node = find_spg_node_by_spg(current->mm, spa->spg); + if (!spg_node) + pr_err("spg_node is null\n"); + else + update_mem_usage(size, true, spa->is_hugepage, spg_node, SPA_TYPE_K2TASK); spa->mm = current->mm; }
@@ -4001,43 +3978,6 @@ __setup("enable_sp_share_k2u_spg", enable_share_k2u_to_group);
/*** Statistical and maintenance functions ***/
-static void free_process_spg_proc_stat(struct sp_proc_stat *proc_stat) -{ - int i; - struct spg_proc_stat *stat; - struct hlist_node *tmp; - struct sp_spg_stat *spg_stat; - - /* traverse proc_stat->hash locklessly as process is exiting */ - hash_for_each_safe(proc_stat->hash, i, tmp, stat, pnode) { - spg_stat = stat->spg_stat; - mutex_lock(&spg_stat->lock); - hash_del(&stat->gnode); - mutex_unlock(&spg_stat->lock); - - hash_del(&stat->pnode); - kfree(stat); - } -} - -static void free_sp_proc_stat(struct sp_proc_stat *stat) -{ - free_process_spg_proc_stat(stat); - - down_write(&sp_proc_stat_sem); - stat->mm->sp_group_master->stat = NULL; - idr_remove(&sp_proc_stat_idr, stat->tgid); - up_write(&sp_proc_stat_sem); - kfree(stat); -} - -/* the caller make sure stat is not NULL */ -static void sp_proc_stat_drop(struct sp_proc_stat *stat) -{ - if (atomic_dec_and_test(&stat->use_count)) - free_sp_proc_stat(stat); -} - static void get_mm_rss_info(struct mm_struct *mm, unsigned long *anon, unsigned long *file, unsigned long *shmem, unsigned long *total_rss) { @@ -4047,54 +3987,29 @@ static void get_mm_rss_info(struct mm_struct *mm, unsigned long *anon, *total_rss = *anon + *file + *shmem; }
-static long get_proc_alloc(struct sp_proc_stat *stat) -{ - return byte2kb(atomic64_read(&stat->alloc_size)); -} - static long get_proc_k2u(struct sp_proc_stat *stat) { return byte2kb(atomic64_read(&stat->k2u_size)); }
-static long get_spg_alloc(struct sp_spg_stat *stat) -{ - return byte2kb(atomic64_read(&stat->alloc_size)); -} - -static long get_spg_alloc_nsize(struct sp_spg_stat *stat) +static long get_proc_alloc(struct sp_proc_stat *stat) { - return byte2kb(atomic64_read(&stat->alloc_nsize)); + return byte2kb(atomic64_read(&stat->alloc_nsize) + + atomic64_read(&stat->alloc_hsize)); }
-static long get_spg_proc_alloc(struct spg_proc_stat *stat) +static void get_process_sp_res(struct sp_proc_stat *stat, + long *sp_res_out, long *sp_res_nsize_out) { - return byte2kb(atomic64_read(&stat->alloc_size)); + *sp_res_out = byte2kb(atomic64_read(&stat->alloc_nsize) + + atomic64_read(&stat->alloc_hsize)); + *sp_res_nsize_out = byte2kb(atomic64_read(&stat->alloc_nsize)); }
-static long get_spg_proc_k2u(struct spg_proc_stat *stat) +static long get_sp_res_by_spg_proc(struct sp_group_node *spg_node) { - return byte2kb(atomic64_read(&stat->k2u_size)); -} - -static void get_process_sp_res(struct sp_proc_stat *stat, - long *sp_res_out, long *sp_res_nsize_out) -{ - int i; - struct spg_proc_stat *spg_proc_stat; - struct sp_spg_stat *spg_stat; - long sp_res = 0, sp_res_nsize = 0; - - mutex_lock(&stat->lock); - hash_for_each(stat->hash, i, spg_proc_stat, pnode) { - spg_stat = spg_proc_stat->spg_stat; - sp_res += get_spg_alloc(spg_stat); - sp_res_nsize += get_spg_alloc_nsize(spg_stat); - } - mutex_unlock(&stat->lock); - - *sp_res_out = sp_res; - *sp_res_nsize_out = sp_res_nsize; + return byte2kb(atomic64_read(&spg_node->instat.alloc_nsize) + + atomic64_read(&spg_node->instat.alloc_hsize)); }
/* @@ -4115,24 +4030,15 @@ static void get_process_non_sp_res(unsigned long total_rss, unsigned long shmem, *non_sp_shm_out = non_sp_shm; }
-static long get_sp_res_by_spg_proc(struct spg_proc_stat *stat) +static long get_spg_proc_alloc(struct sp_group_node *spg_node) { - return byte2kb(atomic64_read(&stat->spg_stat->alloc_size)); + return byte2kb(atomic64_read(&spg_node->instat.alloc_nsize) + + atomic64_read(&spg_node->instat.alloc_hsize)); }
-static unsigned long get_process_prot_locked(int spg_id, struct mm_struct *mm) +static long get_spg_proc_k2u(struct sp_group_node *spg_node) { - unsigned long prot = 0; - struct sp_group_node *spg_node; - struct sp_group_master *master = mm->sp_group_master; - - list_for_each_entry(spg_node, &master->node_list, group_node) { - if (spg_node->spg->id == spg_id) { - prot = spg_node->prot; - break; - } - } - return prot; + return byte2kb(atomic64_read(&spg_node->instat.k2u_size)); }
static void print_process_prot(struct seq_file *seq, unsigned long prot) @@ -4151,9 +4057,8 @@ int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, struct mm_struct *mm = task->mm; struct sp_group_master *master; struct sp_proc_stat *proc_stat; - struct spg_proc_stat *spg_proc_stat; - int i; - unsigned long anon, file, shmem, total_rss, prot; + struct sp_group_node *spg_node; + unsigned long anon, file, shmem, total_rss; long sp_res, sp_res_nsize, non_sp_res, non_sp_shm;
if (!sp_is_enabled()) @@ -4162,12 +4067,13 @@ int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, if (!mm) return 0;
+ down_read(&mm->mmap_lock); master = mm->sp_group_master; if (!master) return 0;
get_mm_rss_info(mm, &anon, &file, &shmem, &total_rss); - proc_stat = master->stat; + proc_stat = &master->instat; get_process_sp_res(proc_stat, &sp_res, &sp_res_nsize); get_process_non_sp_res(total_rss, shmem, sp_res_nsize, &non_sp_res, &non_sp_shm); @@ -4185,24 +4091,18 @@ int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns,
seq_puts(m, "\n\nProcess in Each SP Group\n\n"); seq_printf(m, "%-8s %-9s %-9s %-9s %-4s\n", - "Group_ID", "SP_ALLOC", "SP_K2U", "SP_RES", "PROT"); + "Group_ID", "SP_ALLOC", "SP_K2U", "SP_RES", "PROT");
- /* to prevent ABBA deadlock, first hold sp_group_sem */ - down_read(&sp_group_sem); - mutex_lock(&proc_stat->lock); - hash_for_each(proc_stat->hash, i, spg_proc_stat, pnode) { - prot = get_process_prot_locked(spg_proc_stat->spg_id, mm); + list_for_each_entry(spg_node, &master->node_list, proc_node) { seq_printf(m, "%-8d %-9ld %-9ld %-9ld ", - spg_proc_stat->spg_id, - get_spg_proc_alloc(spg_proc_stat), - get_spg_proc_k2u(spg_proc_stat), - get_sp_res_by_spg_proc(spg_proc_stat)); - print_process_prot(m, prot); + spg_node->spg->id, + get_spg_proc_alloc(spg_node), + get_spg_proc_k2u(spg_node), + get_sp_res_by_spg_proc(spg_node)); + print_process_prot(m, spg_node->prot); seq_putc(m, '\n'); } - mutex_unlock(&proc_stat->lock); - up_read(&sp_group_sem); - + up_read(&mm->mmap_lock); return 0; }
@@ -4332,31 +4232,42 @@ void spa_overview_show(struct seq_file *seq) } }
-/* the caller must hold sp_group_sem */ -static int idr_spg_stat_cb(int id, void *p, void *data) +static int spg_info_show(int id, void *p, void *data) { - struct sp_spg_stat *s = p; + struct sp_group *spg = p; struct seq_file *seq = data;
- if (is_local_group(id) && atomic64_read(&s->size) == 0) + if (id >= SPG_ID_LOCAL_MIN && id <= SPG_ID_LOCAL_MAX) return 0;
if (seq != NULL) { - seq_printf(seq, "Group %6d ", id); + if (id == 0) + seq_puts(seq, "Non Group "); + else + seq_printf(seq, "Group %6d ", id); + + down_read(&spg->rw_lock); seq_printf(seq, "size: %lld KB, spa num: %d, total alloc: %lld KB, normal alloc: %lld KB, huge alloc: %lld KB\n", - byte2kb(atomic64_read(&s->size)), - atomic_read(&s->spa_num), - byte2kb(atomic64_read(&s->alloc_size)), - byte2kb(atomic64_read(&s->alloc_nsize)), - byte2kb(atomic64_read(&s->alloc_hsize))); + byte2kb(atomic64_read(&spg->instat.size)), + atomic_read(&spg->instat.spa_num), + byte2kb(atomic64_read(&spg->instat.alloc_size)), + byte2kb(atomic64_read(&spg->instat.alloc_nsize)), + byte2kb(atomic64_read(&spg->instat.alloc_hsize))); + up_read(&spg->rw_lock); } else { - pr_info("Group %6d ", id); + if (id == 0) + pr_info("Non Group "); + else + pr_info("Group %6d ", id); + + down_read(&spg->rw_lock); pr_info("size: %lld KB, spa num: %d, total alloc: %lld KB, normal alloc: %lld KB, huge alloc: %lld KB\n", - byte2kb(atomic64_read(&s->size)), - atomic_read(&s->spa_num), - byte2kb(atomic64_read(&s->alloc_size)), - byte2kb(atomic64_read(&s->alloc_nsize)), - byte2kb(atomic64_read(&s->alloc_hsize))); + byte2kb(atomic64_read(&spg->instat.size)), + atomic_read(&spg->instat.spa_num), + byte2kb(atomic64_read(&spg->instat.alloc_size)), + byte2kb(atomic64_read(&spg->instat.alloc_nsize)), + byte2kb(atomic64_read(&spg->instat.alloc_hsize))); + up_read(&spg->rw_lock); }
return 0; @@ -4369,17 +4280,17 @@ void spg_overview_show(struct seq_file *seq)
if (seq != NULL) { seq_printf(seq, "Share pool total size: %lld KB, spa total num: %d.\n", - byte2kb(atomic64_read(&sp_overall_stat.spa_total_size)), - atomic_read(&sp_overall_stat.spa_total_num)); + byte2kb(atomic64_read(&sp_overall_stat.spa_total_size)), + atomic_read(&sp_overall_stat.spa_total_num)); } else { pr_info("Share pool total size: %lld KB, spa total num: %d.\n", - byte2kb(atomic64_read(&sp_overall_stat.spa_total_size)), - atomic_read(&sp_overall_stat.spa_total_num)); + byte2kb(atomic64_read(&sp_overall_stat.spa_total_size)), + atomic_read(&sp_overall_stat.spa_total_num)); }
- down_read(&sp_spg_stat_sem); - idr_for_each(&sp_spg_stat_idr, idr_spg_stat_cb, seq); - up_read(&sp_spg_stat_sem); + down_read(&sp_group_sem); + idr_for_each(&sp_group_idr, spg_info_show, seq); + up_read(&sp_group_sem);
if (seq != NULL) seq_puts(seq, "\n"); @@ -4393,118 +4304,109 @@ static int spa_stat_show(struct seq_file *seq, void *offset) spa_overview_show(seq); /* print the file header */ seq_printf(seq, "%-10s %-16s %-16s %-10s %-7s %-5s %-8s %-8s\n", - "Group ID", "va_start", "va_end", "Size(KB)", "Type", "Huge", "PID", "Ref"); + "Group ID", "va_start", "va_end", "Size(KB)", "Type", "Huge", "PID", "Ref"); spa_normal_stat_show(seq); spa_dvpp_stat_show(seq); return 0; }
-static int idr_proc_stat_cb(int id, void *p, void *data) +static int proc_usage_by_group(int id, void *p, void *data) { - struct sp_spg_stat *spg_stat = p; + struct sp_group *spg = p; struct seq_file *seq = data; - int i, tgid; - struct sp_proc_stat *proc_stat; - struct spg_proc_stat *spg_proc_stat; - + struct sp_group_node *spg_node; struct mm_struct *mm; - unsigned long anon, file, shmem, total_rss, prot; - /* - * non_sp_res: resident memory size excluding share pool memory - * sp_res: resident memory size of share pool, including normal - * page and hugepage memory - * non_sp_shm: resident shared memory size excluding share pool - * memory - */ + struct sp_group_master *master; + int tgid; + unsigned long anon, file, shmem, total_rss; long sp_res, sp_res_nsize, non_sp_res, non_sp_shm;
- /* to prevent ABBA deadlock, first hold sp_group_sem */ - mutex_lock(&spg_stat->lock); - hash_for_each(spg_stat->hash, i, spg_proc_stat, gnode) { - proc_stat = spg_proc_stat->proc_stat; - tgid = proc_stat->tgid; - mm = proc_stat->mm; + down_read(&spg->rw_lock); + list_for_each_entry(spg_node, &spg->procs, proc_node) { + + master = spg_node->master; + if (!master) { + pr_info("master is NULL! process %d, group %d\n", + spg_node->instat.tgid, id); + continue; + } + mm = master->mm; + tgid = master->instat.tgid;
get_mm_rss_info(mm, &anon, &file, &shmem, &total_rss); - get_process_sp_res(proc_stat, &sp_res, &sp_res_nsize); + get_process_sp_res(&master->instat, &sp_res, &sp_res_nsize); get_process_non_sp_res(total_rss, shmem, sp_res_nsize, - &non_sp_res, &non_sp_shm); - prot = get_process_prot_locked(id, mm); + &non_sp_res, &non_sp_shm);
seq_printf(seq, "%-8d ", tgid); - seq_printf(seq, "%-8d ", id); + if (id == 0) + seq_printf(seq, "%-8c ", '-'); + else + seq_printf(seq, "%-8d ", id); seq_printf(seq, "%-9ld %-9ld %-9ld %-10ld %-10ld %-8ld %-7ld %-7ld %-10ld ", - get_spg_proc_alloc(spg_proc_stat), - get_spg_proc_k2u(spg_proc_stat), - get_sp_res_by_spg_proc(spg_proc_stat), - sp_res, non_sp_res, - page2kb(mm->total_vm), page2kb(total_rss), - page2kb(shmem), non_sp_shm); - print_process_prot(seq, prot); + get_spg_proc_alloc(spg_node), + get_spg_proc_k2u(spg_node), + get_sp_res_by_spg_proc(spg_node), + sp_res, non_sp_res, + page2kb(mm->total_vm), page2kb(total_rss), + page2kb(shmem), non_sp_shm); + print_process_prot(seq, spg_node->prot); seq_putc(seq, '\n'); } - mutex_unlock(&spg_stat->lock); + up_read(&spg->rw_lock); + return 0; }
-static int proc_stat_show(struct seq_file *seq, void *offset) +static int proc_group_usage_show(struct seq_file *seq, void *offset) { spg_overview_show(seq); spa_overview_show(seq); + /* print the file header */ seq_printf(seq, "%-8s %-8s %-9s %-9s %-9s %-10s %-10s %-8s %-7s %-7s %-10s %-4s\n", - "PID", "Group_ID", "SP_ALLOC", "SP_K2U", "SP_RES", "SP_RES_T", - "Non-SP_RES", "VIRT", "RES", "Shm", "Non-SP_Shm", "PROT"); + "PID", "Group_ID", "SP_ALLOC", "SP_K2U", "SP_RES", "SP_RES_T", + "Non-SP_RES", "VIRT", "RES", "Shm", "Non-SP_Shm", "PROT"); /* print kthread buff_module_guard_work */ seq_printf(seq, "%-8s %-8s %-9lld %-9lld\n", - "guard", "-", - byte2kb(atomic64_read(&kthread_stat.alloc_size)), - byte2kb(atomic64_read(&kthread_stat.k2u_size))); + "guard", "-", + byte2kb(atomic64_read(&kthread_stat.alloc_size)), + byte2kb(atomic64_read(&kthread_stat.k2u_size)));
- /* - * This ugly code is just for fixing the ABBA deadlock against - * sp_group_add_task. - */ down_read(&sp_group_sem); - down_read(&sp_spg_stat_sem); - idr_for_each(&sp_spg_stat_idr, idr_proc_stat_cb, seq); - up_read(&sp_spg_stat_sem); + idr_for_each(&sp_group_idr, proc_usage_by_group, seq); up_read(&sp_group_sem);
return 0; }
-static int idr_proc_overview_cb(int id, void *p, void *data) +static int proc_usage_show(struct seq_file *seq, void *offset) { - struct sp_proc_stat *proc_stat = p; - struct seq_file *seq = data; - struct mm_struct *mm = proc_stat->mm; + struct sp_group_master *master = NULL; unsigned long anon, file, shmem, total_rss; long sp_res, sp_res_nsize, non_sp_res, non_sp_shm; + struct sp_proc_stat *proc_stat;
- get_mm_rss_info(mm, &anon, &file, &shmem, &total_rss); - get_process_sp_res(proc_stat, &sp_res, &sp_res_nsize); - get_process_non_sp_res(total_rss, shmem, sp_res_nsize, - &non_sp_res, &non_sp_shm); - - seq_printf(seq, "%-8d %-16s %-9ld %-9ld %-9ld %-10ld %-10ld %-8ld\n", - id, proc_stat->comm, - get_proc_alloc(proc_stat), - get_proc_k2u(proc_stat), - sp_res, non_sp_res, non_sp_shm, - page2kb(mm->total_vm)); - return 0; -} - -static int proc_overview_show(struct seq_file *seq, void *offset) -{ seq_printf(seq, "%-8s %-16s %-9s %-9s %-9s %-10s %-10s %-8s\n", - "PID", "COMM", "SP_ALLOC", "SP_K2U", "SP_RES", "Non-SP_RES", - "Non-SP_Shm", "VIRT"); + "PID", "COMM", "SP_ALLOC", "SP_K2U", "SP_RES", "Non-SP_RES", + "Non-SP_Shm", "VIRT"); + + mutex_lock(&master_list_lock); + list_for_each_entry(master, &master_list, list_node) { + proc_stat = &master->instat; + get_mm_rss_info(master->mm, &anon, &file, &shmem, &total_rss); + get_process_sp_res(&master->instat, &sp_res, &sp_res_nsize); + get_process_non_sp_res(total_rss, shmem, sp_res_nsize, + &non_sp_res, &non_sp_shm); + seq_printf(seq, "%-8d %-16s %-9ld %-9ld %-9ld %-10ld %-10ld %-8ld\n", + proc_stat->tgid, proc_stat->comm, + get_proc_alloc(proc_stat), + get_proc_k2u(proc_stat), + sp_res, non_sp_res, non_sp_shm, + page2kb(master->mm->total_vm)); + } + mutex_unlock(&master_list_lock);
- down_read(&sp_proc_stat_sem); - idr_for_each(&sp_proc_stat_idr, idr_proc_overview_cb, seq); - up_read(&sp_proc_stat_sem); return 0; }
@@ -4513,9 +4415,9 @@ static void __init proc_sharepool_init(void) if (!proc_mkdir("sharepool", NULL)) return;
- proc_create_single_data("sharepool/proc_stat", 0400, NULL, proc_stat_show, NULL); proc_create_single_data("sharepool/spa_stat", 0400, NULL, spa_stat_show, NULL); - proc_create_single_data("sharepool/proc_overview", 0400, NULL, proc_overview_show, NULL); + proc_create_single_data("sharepool/proc_stat", 0400, NULL, proc_group_usage_show, NULL); + proc_create_single_data("sharepool/proc_overview", 0400, NULL, proc_usage_show, NULL); }
/*** End of tatistical and maintenance functions ***/ @@ -4739,18 +4641,15 @@ void sp_group_post_exit(struct mm_struct *mm) * A process not in an sp group doesn't need to print because there * wont't be any memory which is not freed. */ - stat = sp_get_proc_stat(mm); + stat = &master->instat; if (stat) { - alloc_size = atomic64_read(&stat->alloc_size); + alloc_size = atomic64_read(&stat->alloc_nsize) + atomic64_read(&stat->alloc_hsize); k2u_size = atomic64_read(&stat->k2u_size);
if (alloc_size != 0 || k2u_size != 0) pr_info("process %s(%d) exits. It applied %ld aligned KB, k2u shared %ld aligned KB\n", stat->comm, stat->tgid, byte2kb(alloc_size), byte2kb(k2u_size)); - - /* match with sp_init_proc_stat, we expect stat is released after this call */ - sp_proc_stat_drop(stat); }
down_write(&sp_group_sem); @@ -4763,6 +4662,10 @@ void sp_group_post_exit(struct mm_struct *mm) } up_write(&sp_group_sem);
+ mutex_lock(&master_list_lock); + list_del(&master->list_node); + mutex_unlock(&master_list_lock); + kfree(master); }
From: Guo Mengqi guomengqi3@huawei.com
Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5J0Z9
--------------------------------
when there is only one mm in a group allocating memory, if process is killed, the error path in sp_alloc_mmap_populate tries to access the next spg_node->master->mm in group's proc list. However, in this case the next spg_node in proc list is head and spg_node->master would be NULL, which leads to log below:
[file:test_sp_alloc.c, func:alloc_large_repeat, line:437] start to alloc... [ 264.699086][ T1772] share pool: gonna sp_alloc_unmap... [ 264.699939][ T1772] share pool: list_next_entry(spg_node, proc_node) is ffff0004c4907028 [ 264.700380][ T1772] share pool: master is 0 [ 264.701240][ T1772] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000018 ... [ 264.704764][ T1772] Internal error: Oops: 96000006 [#1] SMP [ 264.705166][ T1772] Modules linked in: sharepool_dev(OE) [ 264.705823][ T1772] CPU: 3 PID: 1772 Comm: test_sp_alloc Tainted: G OE 5.10.0+ #23 ... [ 264.712513][ T1772] Call trace: [ 264.713057][ T1772] sp_alloc+0x528/0xa88 [ 264.713740][ T1772] dev_ioctl+0x6ec/0x1d00 [sharepool_dev] [ 264.714035][ T1772] __arm64_sys_ioctl+0xb0/0xe8 [ 264.714267][ T1772] el0_svc_common.constprop.0+0x88/0x268 [ 264.714539][ T1772] do_el0_svc+0x34/0xb8 [ 264.714743][ T1772] el0_svc+0x1c/0x28 [ 264.714938][ T1772] el0_sync_handler+0x8c/0xb0 [ 264.715163][ T1772] el0_sync+0x168/0x180 [ 264.715737][ T1772] Code: 9425adba f94012a1 d0006780 911c8000 (f9400c21) [ 264.716891][ T1772] ---[ end trace 1587677032f666c6 ]--- [ 264.717457][ T1772] Kernel panic - not syncing: Oops: Fatal exception [ 264.717961][ T1772] SMP: stopping secondary CPUs [ 264.718787][ T1772] Kernel Offset: disabled [ 264.719244][ T1772] CPU features: 0x0004,00200022,61006082 [ 264.719718][ T1772] Memory Limit: none [ 264.720333][ T1772] ---[ end Kernel panic - not syncing: Oops: Fatal exception ]---
Add a list_is_last check to avoid this null pointer access.
Signed-off-by: Guo Mengqi guomengqi3@huawei.com --- mm/share_pool.c | 38 +++++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 15 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index aef3cec4eca8..2ea1c3a4149a 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -2634,6 +2634,7 @@ static int sp_alloc_populate(struct mm_struct *mm, struct sp_area *spa, if (ret) sp_add_work_compact(); } + return ret; }
@@ -2654,14 +2655,8 @@ static int __sp_alloc_mmap_populate(struct mm_struct *mm, struct sp_area *spa, int ret;
ret = sp_alloc_mmap(mm, spa, spg_node, ac); - if (ret < 0) { - if (ac->need_fallocate) { - /* e.g. second sp_mmap fail */ - sp_fallocate(spa); - ac->need_fallocate = false; - } + if (ret < 0) return ret; - }
if (!ac->have_mbind) { ret = sp_mbind(mm, spa->va_start, spa->real_size, spa->node_id); @@ -2676,18 +2671,13 @@ static int __sp_alloc_mmap_populate(struct mm_struct *mm, struct sp_area *spa, ret = sp_alloc_populate(mm, spa, ac); if (ret) { err: - sp_alloc_unmap(list_next_entry(spg_node, proc_node)->master->mm, spa, spg_node); - if (unlikely(fatal_signal_pending(current))) pr_warn_ratelimited("allocation failed, current thread is killed\n"); else pr_warn_ratelimited("allocation failed due to mm populate failed(potential no enough memory when -12): %d\n", - ret); - sp_fallocate(spa); /* need this, otherwise memleak */ - sp_alloc_fallback(spa, ac); + ret); } else ac->need_fallocate = true; - return ret; }
@@ -2696,7 +2686,7 @@ static int sp_alloc_mmap_populate(struct sp_area *spa, { int ret = -EINVAL; int mmap_ret = 0; - struct mm_struct *mm; + struct mm_struct *mm, *end_mm = NULL; struct sp_group_node *spg_node;
/* create mapping for each process in the group */ @@ -2705,7 +2695,7 @@ static int sp_alloc_mmap_populate(struct sp_area *spa, mmap_ret = __sp_alloc_mmap_populate(mm, spa, spg_node, ac); if (mmap_ret) { if (ac->state != ALLOC_COREDUMP) - return mmap_ret; + goto unmap; ac->state = ALLOC_NORMAL; continue; } @@ -2713,6 +2703,24 @@ static int sp_alloc_mmap_populate(struct sp_area *spa, }
return ret; + +unmap: + /* use the next mm in proc list as end mark */ + if (!list_is_last(&spg_node->proc_node, &spa->spg->procs)) + end_mm = list_next_entry(spg_node, proc_node)->master->mm; + sp_alloc_unmap(end_mm, spa, spg_node); + + /* only fallocate spa if physical memory had been allocated */ + if (ac->need_fallocate) { + sp_fallocate(spa); + ac->need_fallocate = false; + } + + /* if hugepage allocation fails, this will transfer to normal page + * and try again. (only if SP_HUGEPAGE_ONLY is not flagged */ + sp_alloc_fallback(spa, ac); + + return mmap_ret; }
/* spa maybe an error pointer, so introduce variable spg */
From: Guo Mengqi guomengqi3@huawei.com
Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5KC7C
--------------------------------
Most interfaces starting with "sp_" are deprecated, remove them.
Signed-off-by: Guo Mengqi guomengqi3@huawei.com --- include/linux/share_pool.h | 90 +--------------------- mm/share_pool.c | 148 ++++++------------------------------- 2 files changed, 22 insertions(+), 216 deletions(-)
diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index c2ef26661a4f..e8bc9a368e34 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -246,53 +246,31 @@ static inline void sp_init_mm(struct mm_struct *mm) * Those interfaces are exported for modules */ extern int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id); -extern int sp_group_add_task(int pid, int spg_id); - extern int mg_sp_group_del_task(int pid, int spg_id); -extern int sp_group_del_task(int pid, int spg_id); - extern int mg_sp_group_id_by_pid(int pid, int *spg_ids, int *num); -extern int sp_group_id_by_pid(int pid); - -extern int sp_group_walk(int spg_id, void *data, int (*func)(struct mm_struct *mm, void *)); extern int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task);
-extern void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id); extern void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id); - -extern int sp_free(unsigned long addr, int id); extern int mg_sp_free(unsigned long addr, int id);
-extern void *sp_make_share_k2u(unsigned long kva, unsigned long size, - unsigned long sp_flags, int pid, int spg_id); extern void *mg_sp_make_share_k2u(unsigned long kva, unsigned long size, unsigned long sp_flags, int pid, int spg_id); - -extern void *sp_make_share_u2k(unsigned long uva, unsigned long size, int pid); extern void *mg_sp_make_share_u2k(unsigned long uva, unsigned long size, int pid); +extern int mg_sp_unshare(unsigned long va, unsigned long size, int spg_id);
-extern int sp_unshare(unsigned long va, unsigned long size, int pid, int spg_id); -extern int mg_sp_unshare(unsigned long va, unsigned long size, int id); - -extern int sp_walk_page_range(unsigned long uva, unsigned long size, - struct task_struct *tsk, struct sp_walk_data *sp_walk_data); extern int mg_sp_walk_page_range(unsigned long uva, unsigned long size, struct task_struct *tsk, struct sp_walk_data *sp_walk_data);
-extern void sp_walk_page_free(struct sp_walk_data *sp_walk_data); extern void mg_sp_walk_page_free(struct sp_walk_data *sp_walk_data);
extern int sp_register_notifier(struct notifier_block *nb); extern int sp_unregister_notifier(struct notifier_block *nb);
-extern bool sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid); extern bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid);
-extern bool is_sharepool_addr(unsigned long addr); extern bool mg_is_sharepool_addr(unsigned long addr);
-extern int sp_id_of_current(void); extern int mg_sp_id_of_current(void);
extern void sp_area_drop(struct vm_area_struct *vma); @@ -350,21 +328,11 @@ static inline int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) return -EPERM; }
-static inline int sp_group_add_task(int pid, int spg_id) -{ - return -EPERM; -} - static inline int mg_sp_group_del_task(int pid, int spg_id) { return -EPERM; }
-static inline int sp_group_del_task(int pid, int spg_id) -{ - return -EPERM; -} - static inline int sp_group_exit(struct mm_struct *mm) { return 0; @@ -379,74 +347,38 @@ static inline int mg_sp_group_id_by_pid(int pid, int *spg_ids, int *num) return -EPERM; }
-static inline int sp_group_id_by_pid(int pid) -{ - return -EPERM; -} - static inline int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { return -EPERM; }
-static inline void *sp_alloc(unsigned long size, unsigned long sp_flags, int sp_id) -{ - return NULL; -} - static inline void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) { return NULL; }
-static inline int sp_free(unsigned long addr, int id) -{ - return -EPERM; -} - static inline int mg_sp_free(unsigned long addr, int id) { return -EPERM; }
-static inline void *sp_make_share_k2u(unsigned long kva, unsigned long size, - unsigned long sp_flags, int pid, int spg_id) -{ - return NULL; -} - static inline void *mg_sp_make_share_k2u(unsigned long kva, unsigned long size, unsigned long sp_flags, int pid, int spg_id) { return NULL; }
-static inline void *sp_make_share_u2k(unsigned long uva, unsigned long size, int pid) -{ - return NULL; -} - static inline void *mg_sp_make_share_u2k(unsigned long uva, unsigned long size, int pid) { return NULL; }
-static inline int sp_unshare(unsigned long va, unsigned long size, int pid, int spg_id) -{ - return -EPERM; -} - static inline int mg_sp_unshare(unsigned long va, unsigned long size, int id) { return -EPERM; }
-static inline int sp_id_of_current(void) -{ - return -EPERM; -} - static inline int mg_sp_id_of_current(void) { return -EPERM; @@ -460,22 +392,12 @@ static inline void sp_area_drop(struct vm_area_struct *vma) { }
-static inline int sp_walk_page_range(unsigned long uva, unsigned long size, - struct task_struct *tsk, struct sp_walk_data *sp_walk_data) -{ - return 0; -} - static inline int mg_sp_walk_page_range(unsigned long uva, unsigned long size, struct task_struct *tsk, struct sp_walk_data *sp_walk_data) { return 0; }
-static inline void sp_walk_page_free(struct sp_walk_data *sp_walk_data) -{ -} - static inline void mg_sp_walk_page_free(struct sp_walk_data *sp_walk_data) { } @@ -490,21 +412,11 @@ static inline int sp_unregister_notifier(struct notifier_block *nb) return -EPERM; }
-static inline bool sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) -{ - return false; -} - static inline bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) { return false; }
-static inline bool is_sharepool_addr(unsigned long addr) -{ - return false; -} - static inline bool mg_is_sharepool_addr(unsigned long addr) { return false; diff --git a/mm/share_pool.c b/mm/share_pool.c index 2ea1c3a4149a..65c78ef24bdc 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -1011,38 +1011,6 @@ static struct sp_group *__sp_find_spg(int pid, int spg_id) return spg; }
-/** - * sp_group_id_by_pid() - Get the sp_group ID of a process. - * @pid: pid of target process. - * - * Return: - * 0 the sp_group ID. - * -ENODEV target process doesn't belong to any sp_group. - */ -int sp_group_id_by_pid(int pid) -{ - struct sp_group *spg; - int spg_id = -ENODEV; - - if (!sp_is_enabled()) - return -EOPNOTSUPP; - - check_interrupt_context(); - - spg = __sp_find_spg(pid, SPG_ID_DEFAULT); - if (!spg) - return -ENODEV; - - down_read(&spg->rw_lock); - if (spg_valid(spg)) - spg_id = spg->id; - up_read(&spg->rw_lock); - - sp_group_drop(spg); - return spg_id; -} -EXPORT_SYMBOL_GPL(sp_group_id_by_pid); - /** * mp_sp_group_id_by_pid() - Get the sp_group ID array of a process. * @pid: pid of target process. @@ -1620,12 +1588,6 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) } EXPORT_SYMBOL_GPL(mg_sp_group_add_task);
-int sp_group_add_task(int pid, int spg_id) -{ - return mg_sp_group_add_task(pid, PROT_READ | PROT_WRITE, spg_id); -} -EXPORT_SYMBOL_GPL(sp_group_add_task); - /** * mg_sp_group_del_task() - delete a process from a sp group. * @pid: the pid of the task to be deleted @@ -1729,13 +1691,7 @@ int mg_sp_group_del_task(int pid, int spg_id) } EXPORT_SYMBOL_GPL(mg_sp_group_del_task);
-int sp_group_del_task(int pid, int spg_id) -{ - return mg_sp_group_del_task(pid, spg_id); -} -EXPORT_SYMBOL_GPL(sp_group_del_task); - -int sp_id_of_current(void) +int mg_sp_id_of_current(void) { int ret, spg_id; struct sp_group_master *master; @@ -1767,12 +1723,6 @@ int sp_id_of_current(void)
return spg_id; } -EXPORT_SYMBOL_GPL(sp_id_of_current); - -int mg_sp_id_of_current(void) -{ - return sp_id_of_current(); -} EXPORT_SYMBOL_GPL(mg_sp_id_of_current);
/* the caller must hold sp_area_lock */ @@ -2301,7 +2251,7 @@ static int sp_free_get_spa(struct sp_free_context *fc) }
/** - * sp_free() - Free the memory allocated by sp_alloc(). + * mg_sp_free() - Free the memory allocated by mg_sp_alloc(). * @addr: the starting VA of the memory. * @id: Address space identifier, which is used to distinguish the addr. * @@ -2310,7 +2260,7 @@ static int sp_free_get_spa(struct sp_free_context *fc) * * -EINVAL - the memory can't be found or was not allocted by share pool. * * -EPERM - the caller has no permision to free the memory. */ -int sp_free(unsigned long addr, int id) +int mg_sp_free(unsigned long addr, int id) { int ret = 0; struct sp_free_context fc = { @@ -2344,12 +2294,6 @@ int sp_free(unsigned long addr, int id) sp_try_to_compact(); return ret; } -EXPORT_SYMBOL_GPL(sp_free); - -int mg_sp_free(unsigned long addr, int id) -{ - return sp_free(addr, id); -} EXPORT_SYMBOL_GPL(mg_sp_free);
/* wrapper of __do_mmap() and the caller must hold down_write(&mm->mmap_lock). */ @@ -2747,7 +2691,7 @@ static void sp_alloc_finish(int result, struct sp_area *spa, }
/** - * sp_alloc() - Allocate shared memory for all the processes in a sp_group. + * mg_sp_alloc() - Allocate shared memory for all the processes in a sp_group. * @size: the size of memory to allocate. * @sp_flags: how to allocate the memory. * @spg_id: the share group that the memory is allocated to. @@ -2758,7 +2702,7 @@ static void sp_alloc_finish(int result, struct sp_area *spa, * * if succeed, return the starting address of the shared memory. * * if fail, return the pointer of -errno. */ -void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) +void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) { struct sp_area *spa = NULL; int ret = 0; @@ -2792,12 +2736,6 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) else return (void *)(spa->va_start); } -EXPORT_SYMBOL_GPL(sp_alloc); - -void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) -{ - return sp_alloc(size, sp_flags, spg_id); -} EXPORT_SYMBOL_GPL(mg_sp_alloc);
/** @@ -3128,7 +3066,7 @@ static void *sp_k2u_finish(void *uva, struct sp_k2u_context *kc) }
/** - * sp_make_share_k2u() - Share kernel memory to current process or an sp_group. + * mg_sp_make_share_k2u() - Share kernel memory to current process or an sp_group. * @kva: the VA of shared kernel memory. * @size: the size of shared kernel memory. * @sp_flags: how to allocate the memory. We only support SP_DVPP. @@ -3144,7 +3082,7 @@ static void *sp_k2u_finish(void *uva, struct sp_k2u_context *kc) * * if succeed, return the shared user address to start at. * * if fail, return the pointer of -errno. */ -void *sp_make_share_k2u(unsigned long kva, unsigned long size, +void *mg_sp_make_share_k2u(unsigned long kva, unsigned long size, unsigned long sp_flags, int pid, int spg_id) { void *uva; @@ -3182,13 +3120,6 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size, out: return sp_k2u_finish(uva, &kc); } -EXPORT_SYMBOL_GPL(sp_make_share_k2u); - -void *mg_sp_make_share_k2u(unsigned long kva, unsigned long size, - unsigned long sp_flags, int pid, int spg_id) -{ - return sp_make_share_k2u(kva, size, sp_flags, pid, spg_id); -} EXPORT_SYMBOL_GPL(mg_sp_make_share_k2u);
static int sp_pmd_entry(pmd_t *pmd, unsigned long addr, @@ -3424,7 +3355,7 @@ static void __sp_walk_page_free(struct sp_walk_data *data) }
/** - * sp_make_share_u2k() - Share user memory of a specified process to kernel. + * mg_sp_make_share_u2k() - Share user memory of a specified process to kernel. * @uva: the VA of shared user memory * @size: the size of shared user memory * @pid: the pid of the specified process(Not currently in use) @@ -3433,7 +3364,7 @@ static void __sp_walk_page_free(struct sp_walk_data *data) * * if success, return the starting kernel address of the shared memory. * * if failed, return the pointer of -errno. */ -void *sp_make_share_u2k(unsigned long uva, unsigned long size, int pid) +void *mg_sp_make_share_u2k(unsigned long uva, unsigned long size, int pid) { int ret = 0; struct mm_struct *mm = current->mm; @@ -3492,12 +3423,6 @@ void *sp_make_share_u2k(unsigned long uva, unsigned long size, int pid) kvfree(sp_walk_data.pages); return p; } -EXPORT_SYMBOL_GPL(sp_make_share_u2k); - -void *mg_sp_make_share_u2k(unsigned long uva, unsigned long size, int pid) -{ - return sp_make_share_u2k(uva, size, pid); -} EXPORT_SYMBOL_GPL(mg_sp_make_share_u2k);
/* @@ -3719,7 +3644,7 @@ static int sp_unshare_kva(unsigned long kva, unsigned long size) }
/** - * sp_unshare() - Unshare the kernel or user memory which shared by calling + * mg_sp_unshare() - Unshare the kernel or user memory which shared by calling * sp_make_share_{k2u,u2k}(). * @va: the specified virtual address of memory * @size: the size of unshared memory @@ -3728,7 +3653,7 @@ static int sp_unshare_kva(unsigned long kva, unsigned long size) * * Return: 0 for success, -errno on failure. */ -int sp_unshare(unsigned long va, unsigned long size, int pid, int spg_id) +int mg_sp_unshare(unsigned long va, unsigned long size, int spg_id) { int ret = 0;
@@ -3754,16 +3679,10 @@ int sp_unshare(unsigned long va, unsigned long size, int pid, int spg_id)
return ret; } -EXPORT_SYMBOL_GPL(sp_unshare); - -int mg_sp_unshare(unsigned long va, unsigned long size, int id) -{ - return sp_unshare(va, size, 0, id); -} EXPORT_SYMBOL_GPL(mg_sp_unshare);
/** - * sp_walk_page_range() - Walk page table with caller specific callbacks. + * mg_sp_walk_page_range() - Walk page table with caller specific callbacks. * @uva: the start VA of user memory. * @size: the size of user memory. * @tsk: task struct of the target task. @@ -3774,7 +3693,7 @@ EXPORT_SYMBOL_GPL(mg_sp_unshare); * When return 0, sp_walk_data describing [uva, uva+size) can be used. * When return -errno, information in sp_walk_data is useless. */ -int sp_walk_page_range(unsigned long uva, unsigned long size, +int mg_sp_walk_page_range(unsigned long uva, unsigned long size, struct task_struct *tsk, struct sp_walk_data *sp_walk_data) { struct mm_struct *mm; @@ -3813,20 +3732,13 @@ int sp_walk_page_range(unsigned long uva, unsigned long size,
return ret; } -EXPORT_SYMBOL_GPL(sp_walk_page_range); - -int mg_sp_walk_page_range(unsigned long uva, unsigned long size, - struct task_struct *tsk, struct sp_walk_data *sp_walk_data) -{ - return sp_walk_page_range(uva, size, tsk, sp_walk_data); -} EXPORT_SYMBOL_GPL(mg_sp_walk_page_range);
/** - * sp_walk_page_free() - Free the sp_walk_data structure. + * mg_sp_walk_page_free() - Free the sp_walk_data structure. * @sp_walk_data: a structure of a page pointer array to be freed. */ -void sp_walk_page_free(struct sp_walk_data *sp_walk_data) +void mg_sp_walk_page_free(struct sp_walk_data *sp_walk_data) { if (!sp_is_enabled()) return; @@ -3838,12 +3750,6 @@ void sp_walk_page_free(struct sp_walk_data *sp_walk_data)
__sp_walk_page_free(sp_walk_data); } -EXPORT_SYMBOL_GPL(sp_walk_page_free); - -void mg_sp_walk_page_free(struct sp_walk_data *sp_walk_data) -{ - sp_walk_page_free(sp_walk_data); -} EXPORT_SYMBOL_GPL(mg_sp_walk_page_free);
int sp_register_notifier(struct notifier_block *nb) @@ -3859,7 +3765,7 @@ int sp_unregister_notifier(struct notifier_block *nb) EXPORT_SYMBOL_GPL(sp_unregister_notifier);
/** - * sp_config_dvpp_range() - User can config the share pool start address + * mg_sp_config_dvpp_range() - User can config the share pool start address * of each Da-vinci device. * @start: the value of share pool start * @size: the value of share pool @@ -3870,7 +3776,7 @@ EXPORT_SYMBOL_GPL(sp_unregister_notifier); * Return false if parameter invalid or has been set up. * This functuon has no concurrent problem. */ -bool sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) +bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) { int ret; bool err = false; @@ -3920,12 +3826,6 @@ bool sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid)
return err; } -EXPORT_SYMBOL_GPL(sp_config_dvpp_range); - -bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) -{ - return sp_config_dvpp_range(start, size, device_id, pid); -} EXPORT_SYMBOL_GPL(mg_sp_config_dvpp_range);
static bool is_sp_normal_addr(unsigned long addr) @@ -3936,22 +3836,16 @@ static bool is_sp_normal_addr(unsigned long addr) }
/** - * is_sharepool_addr() - Check if a user memory address belongs to share pool. + * mg_is_sharepool_addr() - Check if a user memory address belongs to share pool. * @addr: the userspace address to be checked. * * Return true if addr belongs to share pool, or false vice versa. */ -bool is_sharepool_addr(unsigned long addr) +bool mg_is_sharepool_addr(unsigned long addr) { return sp_is_enabled() && (is_sp_normal_addr(addr) || is_device_addr(addr)); } -EXPORT_SYMBOL_GPL(is_sharepool_addr); - -bool mg_is_sharepool_addr(unsigned long addr) -{ - return is_sharepool_addr(addr); -} EXPORT_SYMBOL_GPL(mg_is_sharepool_addr);
int sp_node_id(struct vm_area_struct *vma) @@ -4432,7 +4326,7 @@ static void __init proc_sharepool_init(void)
bool sp_check_addr(unsigned long addr) { - if (sp_is_enabled() && is_sharepool_addr(addr) && + if (sp_is_enabled() && mg_is_sharepool_addr(addr) && !check_aoscore_process(current)) { sp_dump_stack(); return true; @@ -4442,7 +4336,7 @@ bool sp_check_addr(unsigned long addr)
bool sp_check_mmap_addr(unsigned long addr, unsigned long flags) { - if (sp_is_enabled() && is_sharepool_addr(addr) && + if (sp_is_enabled() && mg_is_sharepool_addr(addr) && !check_aoscore_process(current) && !(flags & MAP_SHARE_POOL)) { sp_dump_stack(); return true;
From: Guo Mengqi guomengqi3@huawei.com
Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5K3MH
--------------------------------
After refactoring, cat /proc/pid_xx/sp_group will cause kernel panic. Fix this error.
Signed-off-by: Guo Mengqi guomengqi3@huawei.com --- mm/share_pool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 65c78ef24bdc..73e941636ba7 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -3995,7 +3995,7 @@ int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, seq_printf(m, "%-8s %-9s %-9s %-9s %-4s\n", "Group_ID", "SP_ALLOC", "SP_K2U", "SP_RES", "PROT");
- list_for_each_entry(spg_node, &master->node_list, proc_node) { + list_for_each_entry(spg_node, &master->node_list, group_node) { seq_printf(m, "%-8d %-9ld %-9ld %-9ld ", spg_node->spg->id, get_spg_proc_alloc(spg_node),
From: Guo Mengqi guomengqi3@huawei.com
Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5KSDH
--------------------------------
Fix sharepool redundant /proc/sharepool/spa_stat prints when there are multiple groups which are all attached to same sp_mapping.
Traverse all dvpp-mappings rather than all groups.
Signed-off-by: Guo Mengqi guomengqi3@huawei.com --- mm/share_pool.c | 44 ++++++++++++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 14 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 73e941636ba7..94c2aa02472c 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -122,6 +122,11 @@ static LIST_HEAD(master_list); /* mutex to protect insert/delete ops from master_list */ static DEFINE_MUTEX(master_list_lock);
+/* list of all spm-dvpp */ +static LIST_HEAD(spm_dvpp_list); +/* mutex to protect insert/delete ops from master_list */ +static DEFINE_MUTEX(spm_list_lock); + /* for kthread buff_module_guard_work */ static struct sp_proc_stat kthread_stat;
@@ -189,6 +194,7 @@ struct sp_mapping {
/* list head for all groups attached to this mapping, dvpp mapping only */ struct list_head group_head; + struct list_head spm_node; };
/* Processes in the same sp_group can share memory. @@ -290,6 +296,22 @@ static void sp_init_group_master_stat(struct mm_struct *mm, struct sp_proc_stat #define SP_MAPPING_NORMAL 0x2 static struct sp_mapping *sp_mapping_normal;
+static void sp_mapping_add_to_list(struct sp_mapping *spm) +{ + mutex_lock(&spm_list_lock); + if (spm->flag & SP_MAPPING_DVPP) + list_add_tail(&spm->spm_node, &spm_dvpp_list); + mutex_unlock(&spm_list_lock); +} + +static void sp_mapping_remove_from_list(struct sp_mapping *spm) +{ + mutex_lock(&spm_list_lock); + if (spm->flag & SP_MAPPING_DVPP) + list_del(&spm->spm_node); + mutex_unlock(&spm_list_lock); +} + static void sp_mapping_range_init(struct sp_mapping *spm) { int i; @@ -325,12 +347,14 @@ static struct sp_mapping *sp_mapping_create(unsigned long flag) atomic_set(&spm->user, 0); spm->area_root = RB_ROOT; INIT_LIST_HEAD(&spm->group_head); + sp_mapping_add_to_list(spm);
return spm; }
static void sp_mapping_destroy(struct sp_mapping *spm) { + sp_mapping_remove_from_list(spm); kfree(spm); }
@@ -4068,22 +4092,14 @@ static void spa_normal_stat_show(struct seq_file *seq) spa_stat_of_mapping_show(seq, sp_mapping_normal); }
-static int idr_spg_dvpp_stat_show_cb(int id, void *p, void *data) -{ - struct sp_group *spg = p; - struct seq_file *seq = data; - - if (!is_local_group(spg->id) || atomic_read(&spg->dvpp->user) == 1) - spa_stat_of_mapping_show(seq, spg->dvpp); - - return 0; -} - static void spa_dvpp_stat_show(struct seq_file *seq) { - down_read(&sp_group_sem); - idr_for_each(&sp_group_idr, idr_spg_dvpp_stat_show_cb, seq); - up_read(&sp_group_sem); + struct sp_mapping *spm; + + mutex_lock(&spm_list_lock); + list_for_each_entry(spm, &spm_dvpp_list, spm_node) + spa_stat_of_mapping_show(seq, spm); + mutex_unlock(&spm_list_lock); }
From: Wang Wensheng wangwensheng4@huawei.com
Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5LHGZ
--------------------------------
Delete unused sysctl interfaces in sharepool feature.
Signed-off-by: Wang Wensheng wangwensheng4@huawei.com --- include/linux/share_pool.h | 21 ---- kernel/sysctl.c | 67 ------------- mm/share_pool.c | 192 ++----------------------------------- 3 files changed, 6 insertions(+), 274 deletions(-)
diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index e8bc9a368e34..4860e4b00e57 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -48,23 +48,8 @@
#define MAX_DEVID 8 /* the max num of Da-vinci devices */
-extern int sysctl_share_pool_hugepage_enable; - -extern int sysctl_ac_mode; - -extern int sysctl_sp_debug_mode; - extern struct static_key_false share_pool_enabled_key;
-extern int sysctl_share_pool_map_lock_enable; - -extern int sysctl_sp_compact_enable; -extern unsigned long sysctl_sp_compact_interval; -extern unsigned long sysctl_sp_compact_interval_max; -extern int sysctl_sp_perf_alloc; - -extern int sysctl_sp_perf_k2u; - #ifdef __GENKSYMS__ /* we estimate an sp-group ususally contains at most 64 sp-group */ #define SP_SPG_HASH_BITS 6 @@ -307,12 +292,6 @@ static inline bool sp_check_vm_share_pool(unsigned long vm_flags) return false; }
-static inline void sp_dump_stack(void) -{ - if (sysctl_sp_debug_mode) - dump_stack(); -} - static inline bool is_vmalloc_sharepool(unsigned long vm_flags) { if (sp_is_enabled() && (vm_flags & VM_SHAREPOOL)) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 91812d673c6b..5fab117d7f52 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -71,7 +71,6 @@ #include <linux/coredump.h> #include <linux/latencytop.h> #include <linux/pid.h> -#include <linux/share_pool.h>
#include "../lib/kstrtox.h"
@@ -3238,72 +3237,6 @@ static struct ctl_table vm_table[] = { .extra1 = SYSCTL_ZERO, }, #endif -#ifdef CONFIG_ASCEND_SHARE_POOL - { - .procname = "sharepool_debug_mode", - .data = &sysctl_sp_debug_mode, - .maxlen = sizeof(sysctl_sp_debug_mode), - .mode = 0600, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, - { - .procname = "sharepool_compact_enable", - .data = &sysctl_sp_compact_enable, - .maxlen = sizeof(sysctl_sp_compact_enable), - .mode = 0600, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, - { - .procname = "sharepool_compact_interval", - .data = &sysctl_sp_compact_interval, - .maxlen = sizeof(sysctl_sp_compact_interval), - .mode = 0600, - .proc_handler = proc_doulongvec_minmax, - .extra1 = &zero_ul, - .extra2 = &sysctl_sp_compact_interval_max, - }, - { - /* 0: map_unlock, 1: map_lock */ - .procname = "share_pool_map_lock_enable", - .data = &sysctl_share_pool_map_lock_enable, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, - { - .procname = "sharepool_perf_k2u", - .data = &sysctl_sp_perf_k2u, - .maxlen = sizeof(sysctl_sp_perf_k2u), - .mode = 0600, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = &ten_thousand, - }, - { - .procname = "sharepool_perf_alloc", - .data = &sysctl_sp_perf_alloc, - .maxlen = sizeof(sysctl_sp_perf_alloc), - .mode = 0600, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = &ten_thousand, - }, - { - .procname = "sharepool_ac_mode", - .data = &sysctl_ac_mode, - .maxlen = sizeof(sysctl_ac_mode), - .mode = 0600, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, -#endif #ifdef CONFIG_HUGETLBFS { .procname = "hugepage_mig_noalloc", diff --git a/mm/share_pool.c b/mm/share_pool.c index 94c2aa02472c..80a5e01ca62d 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -44,7 +44,6 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/rmap.h> -#include <linux/compaction.h> #include <linux/preempt.h> #include <linux/swapops.h> #include <linux/mmzone.h> @@ -52,10 +51,6 @@ #include <linux/time64.h> #include <linux/pagewalk.h>
-/* access control mode macros */ -#define AC_NONE 0 -#define AC_SINGLE_OWNER 1 - #define spg_valid(spg) ((spg)->is_alive == true)
/* Use spa va address as mmap offset. This can work because spa_file @@ -82,19 +77,6 @@ static int __read_mostly enable_mdc_default_group; static const int mdc_default_group_id = 1;
-/* share the uva to the whole group */ -static int __read_mostly enable_share_k2u_spg = 1; - -/* access control mode */ -int sysctl_ac_mode = AC_NONE; -/* debug mode */ -int sysctl_sp_debug_mode; - -int sysctl_share_pool_map_lock_enable; - -int sysctl_sp_perf_k2u; -int sysctl_sp_perf_alloc; - static int system_group_count;
static unsigned int sp_device_number; @@ -232,8 +214,6 @@ struct sp_group { struct list_head spa_list; /* group statistics */ struct sp_spg_stat instat; - /* we define the creator process of a sp_group as owner */ - struct task_struct *owner; /* is_alive == false means it's being destroyed */ bool is_alive; atomic_t use_count; @@ -837,7 +817,6 @@ static void update_mem_usage(unsigned long size, bool inc, bool is_hugepage, struct sp_group_node *spg_node, enum spa_type type) { if (unlikely(!spg_node)) { - sp_dump_stack(); WARN(1, "null sp group node\n"); return; } @@ -912,8 +891,6 @@ struct sp_k2u_context { int state; int spg_id; bool to_task; - struct timespec64 start; - struct timespec64 end; };
static unsigned long sp_remap_kva_to_vma(unsigned long kva, struct sp_area *spa, @@ -1158,7 +1135,6 @@ static struct sp_group *create_spg(int spg_id, unsigned long flag) spg->flag = flag; spg->is_alive = true; spg->proc_num = 0; - spg->owner = current->group_leader; atomic_set(&spg->use_count, 1); INIT_LIST_HEAD(&spg->procs); INIT_LIST_HEAD(&spg->spa_list); @@ -1443,7 +1419,6 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) up_write(&sp_group_sem); ret = -EACCES; free_new_spg_id(id_newly_generated, spg_id); - sp_dump_stack(); goto out_put_task; }
@@ -1475,15 +1450,6 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) goto out_put_mm; }
- /* access control permission check */ - if (sysctl_ac_mode == AC_SINGLE_OWNER) { - if (spg->owner != current->group_leader) { - ret = -EPERM; - goto out_drop_group; - } - } - - down_write(&spg->rw_lock); ret = mm_add_group_init(tsk, mm, spg); if (ret) { @@ -2067,63 +2033,6 @@ void sp_area_drop(struct vm_area_struct *vma) spin_unlock(&sp_area_lock); }
-int sysctl_sp_compact_enable; -unsigned long sysctl_sp_compact_interval = 30UL; -unsigned long sysctl_sp_compact_interval_max = 1000UL; -static unsigned long compact_last_jiffies; -static unsigned long compact_daemon_status; -#define COMPACT_START 1 -#define COMPACT_STOP 0 - -static void sp_compact_nodes(struct work_struct *work) -{ - sysctl_compaction_handler(NULL, 1, NULL, NULL, NULL); - - kfree(work); - - compact_last_jiffies = jiffies; - cmpxchg(&compact_daemon_status, COMPACT_START, COMPACT_STOP); -} - -static void sp_add_work_compact(void) -{ - struct work_struct *compact_work; - - if (!sysctl_sp_compact_enable) - return; - - /* experimental compaction time: 4GB->1.7s, 8GB->3.4s */ - if (!time_after(jiffies, - compact_last_jiffies + sysctl_sp_compact_interval * HZ)) - return; - - if (cmpxchg(&compact_daemon_status, COMPACT_STOP, COMPACT_START) == - COMPACT_START) - return; - - compact_work = kzalloc(sizeof(*compact_work), GFP_KERNEL); - if (!compact_work) - return; - - INIT_WORK(compact_work, sp_compact_nodes); - schedule_work(compact_work); -} - -static void sp_try_to_compact(void) -{ - unsigned long totalram; - unsigned long freeram; - - totalram = totalram_pages(); - freeram = global_zone_page_state(NR_FREE_PAGES); - - /* free < total / 3 */ - if ((freeram + (freeram << 1)) > totalram) - return; - - sp_add_work_compact(); -} - /* * The function calls of do_munmap() won't change any non-atomic member * of struct sp_group. Please review the following chain: @@ -2314,8 +2223,6 @@ int mg_sp_free(unsigned long addr, int id)
__sp_area_drop(fc.spa); /* match __find_sp_area in sp_free_get_spa */ out: - sp_dump_stack(); - sp_try_to_compact(); return ret; } EXPORT_SYMBOL_GPL(mg_sp_free); @@ -2333,12 +2240,6 @@ static unsigned long sp_mmap(struct mm_struct *mm, struct file *file, unsigned long pgoff = addr_offset(spa) >> PAGE_SHIFT; struct vm_area_struct *vma;
- /* Mark the mapped region to be locked. After the MAP_LOCKED is enable, - * multiple tasks will preempt resources, causing performance loss. - */ - if (sysctl_share_pool_map_lock_enable) - flags |= MAP_LOCKED; - atomic_inc(&spa->use_count); addr = __do_mmap_mm(mm, file, addr, size, prot, flags, vm_flags, pgoff, populate, NULL); @@ -2353,7 +2254,6 @@ static unsigned long sp_mmap(struct mm_struct *mm, struct file *file, *pvma = vma; }
- return addr; }
@@ -2371,39 +2271,10 @@ struct sp_alloc_context { unsigned long populate; int state; bool need_fallocate; - struct timespec64 start; - struct timespec64 end; bool have_mbind; enum spa_type type; };
-static void trace_sp_alloc_begin(struct sp_alloc_context *ac) -{ - if (!sysctl_sp_perf_alloc) - return; - - ktime_get_ts64(&ac->start); -} - -static void trace_sp_alloc_finish(struct sp_alloc_context *ac, unsigned long va) -{ - unsigned long cost; - - if (!sysctl_sp_perf_alloc) - return; - - ktime_get_ts64(&ac->end); - - cost = SEC2US(ac->end.tv_sec - ac->start.tv_sec) + - NS2US(ac->end.tv_nsec - ac->start.tv_nsec); - if (cost >= (unsigned long)sysctl_sp_perf_alloc) { - pr_err("Task %s(%d/%d) sp_alloc returns 0x%lx consumes %luus, size is %luKB, size_aligned is %luKB, sp_flags is %lx, pass through is %d\n", - current->comm, current->tgid, current->pid, - va, cost, byte2kb(ac->size), byte2kb(ac->size_aligned), ac->sp_flags, - is_local_group(ac->spg->id)); - } -} - static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags, int spg_id, struct sp_alloc_context *ac) { @@ -2411,8 +2282,6 @@ static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags,
check_interrupt_context();
- trace_sp_alloc_begin(ac); - /* mdc scene hack */ if (enable_mdc_default_group) spg_id = mdc_default_group_id; @@ -2597,11 +2466,8 @@ static int sp_alloc_populate(struct mm_struct *mm, struct sp_area *spa, * depends on this feature (and MAP_LOCKED) to work correctly. */ ret = do_mm_populate(mm, sp_addr, ac->populate, 0); - if (spa->is_hugepage) { + if (spa->is_hugepage) memalloc_noreclaim_restore(noreclaim_flag); - if (ret) - sp_add_work_compact(); - }
return ret; } @@ -2704,14 +2570,10 @@ static void sp_alloc_finish(int result, struct sp_area *spa, sp_update_process_stat(current, true, spa);
/* this will free spa if mmap failed */ - if (spa && !IS_ERR(spa)) { + if (spa && !IS_ERR(spa)) __sp_area_drop(spa); - trace_sp_alloc_finish(ac, spa->va_start); - }
sp_group_drop(spg); - sp_dump_stack(); - sp_try_to_compact(); }
/** @@ -2996,33 +2858,6 @@ static bool vmalloc_area_set_flag(unsigned long kva, unsigned long flags) return false; }
-static void trace_sp_k2u_begin(struct sp_k2u_context *kc) -{ - if (!sysctl_sp_perf_k2u) - return; - - ktime_get_ts64(&kc->start); -} - -static void trace_sp_k2u_finish(struct sp_k2u_context *kc, void *uva) -{ - unsigned long cost; - - if (!sysctl_sp_perf_k2u) - return; - - ktime_get_ts64(&kc->end); - - cost = SEC2US(kc->end.tv_sec - kc->start.tv_sec) + - NS2US(kc->end.tv_nsec - kc->start.tv_nsec); - if (cost >= (unsigned long)sysctl_sp_perf_k2u) { - pr_err("Task %s(%d/%d) sp_k2u returns 0x%lx consumes %luus, size is %luKB, size_aligned is %luKB, sp_flags is %lx, to_task is %d\n", - current->comm, current->tgid, current->pid, - (unsigned long)uva, cost, byte2kb(kc->size), byte2kb(kc->size_aligned), - kc->sp_flags, kc->to_task); - } -} - static int sp_k2u_prepare(unsigned long kva, unsigned long size, unsigned long sp_flags, int spg_id, struct sp_k2u_context *kc) { @@ -3030,8 +2865,6 @@ static int sp_k2u_prepare(unsigned long kva, unsigned long size, unsigned int page_size = PAGE_SIZE; unsigned long kva_aligned, size_aligned;
- trace_sp_k2u_begin(kc); - if (sp_flags & ~SP_FLAG_MASK) { pr_err_ratelimited("k2u sp_flags %lx error\n", sp_flags); return -EINVAL; @@ -3084,8 +2917,6 @@ static void *sp_k2u_finish(void *uva, struct sp_k2u_context *kc) else uva = uva + (kc->kva - kc->kva_aligned);
- trace_sp_k2u_finish(kc, uva); - sp_dump_stack(); return uva; }
@@ -3609,8 +3440,6 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size, int group_id) WARN(1, "unshare uva invalid spa type"); }
- sp_dump_stack(); - out_clr_flag: if (!vmalloc_area_clr_flag(spa->kva, VM_SHAREPOOL)) pr_debug("clear spa->kva %ld is not valid\n", spa->kva); @@ -3895,13 +3724,6 @@ static int __init mdc_default_group(char *s) } __setup("enable_mdc_default_group", mdc_default_group);
-static int __init enable_share_k2u_to_group(char *s) -{ - enable_share_k2u_spg = 1; - return 1; -} -__setup("enable_sp_share_k2u_spg", enable_share_k2u_to_group); - /*** Statistical and maintenance functions ***/
static void get_mm_rss_info(struct mm_struct *mm, unsigned long *anon, @@ -4343,20 +4165,18 @@ static void __init proc_sharepool_init(void) bool sp_check_addr(unsigned long addr) { if (sp_is_enabled() && mg_is_sharepool_addr(addr) && - !check_aoscore_process(current)) { - sp_dump_stack(); + !check_aoscore_process(current)) return true; - } else + else return false; }
bool sp_check_mmap_addr(unsigned long addr, unsigned long flags) { if (sp_is_enabled() && mg_is_sharepool_addr(addr) && - !check_aoscore_process(current) && !(flags & MAP_SHARE_POOL)) { - sp_dump_stack(); + !check_aoscore_process(current) && !(flags & MAP_SHARE_POOL)) return true; - } else + else return false; }
From: Zhang Zekun zhangzekun11@huawei.com
Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5LY5K
-----------------------------------
Remove the unused sp_dev_va_start and sp_dev_va_size, the related code can be removed.
Add the dvpp_addr checker in mg_is_sharepool_addr() for current proc.
Signed-off-by: Zhang Zekun zhangzekun11@huawei.com --- mm/share_pool.c | 62 +++++++++++++++++++++++++++---------------------- 1 file changed, 34 insertions(+), 28 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 80a5e01ca62d..4a485f706007 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -80,13 +80,6 @@ static const int mdc_default_group_id = 1; static int system_group_count;
static unsigned int sp_device_number; -static unsigned long sp_dev_va_start[MAX_DEVID]; -static unsigned long sp_dev_va_size[MAX_DEVID]; - -static bool is_sp_dev_addr_enabled(int device_id) -{ - return sp_dev_va_size[device_id]; -}
/* idr of all sp_groups */ static DEFINE_IDR(sp_group_idr); @@ -303,14 +296,9 @@ static void sp_mapping_range_init(struct sp_mapping *spm) continue; }
- if (!is_sp_dev_addr_enabled(i)) { - spm->start[i] = MMAP_SHARE_POOL_16G_START + - i * MMAP_SHARE_POOL_16G_SIZE; - spm->end[i] = spm->start[i] + MMAP_SHARE_POOL_16G_SIZE; - } else { - spm->start[i] = sp_dev_va_start[i]; - spm->end[i] = spm->start[i] + sp_dev_va_size[i]; - } + spm->start[i] = MMAP_SHARE_POOL_16G_START + + i * MMAP_SHARE_POOL_16G_SIZE; + spm->end[i] = spm->start[i] + MMAP_SHARE_POOL_16G_SIZE; } }
@@ -1089,18 +1077,6 @@ static bool is_online_node_id(int node_id) return node_id >= 0 && node_id < MAX_NUMNODES && node_online(node_id); }
-static bool is_device_addr(unsigned long addr) -{ - int i; - - for (i = 0; i < sp_device_number; i++) { - if (addr >= sp_dev_va_start[i] && - addr < sp_dev_va_start[i] + sp_dev_va_size[i]) - return true; - } - return false; -} - static struct sp_group *create_spg(int spg_id, unsigned long flag) { int ret; @@ -3688,6 +3664,36 @@ static bool is_sp_normal_addr(unsigned long addr) sp_device_number * MMAP_SHARE_POOL_16G_SIZE; }
+static bool is_sp_dvpp_addr(unsigned long addr) +{ + int i; + struct mm_struct *mm; + struct sp_group_master *master; + struct sp_mapping *spm_dvpp; + + mm = current->mm; + if (!mm) + return false; + + down_read(&sp_group_sem); + master = mm->sp_group_master; + if (!master) { + up_read(&sp_group_sem); + return false; + } + + /* master->local and master->local->dvpp won't be NULL*/ + spm_dvpp = master->local->dvpp; + for (i = 0; i < MAX_DEVID; i++) { + if (addr >= spm_dvpp->start[i] && addr < spm_dvpp->end[i]) { + up_read(&sp_group_sem); + return true; + } + } + up_read(&sp_group_sem); + return false; +} + /** * mg_is_sharepool_addr() - Check if a user memory address belongs to share pool. * @addr: the userspace address to be checked. @@ -3697,7 +3703,7 @@ static bool is_sp_normal_addr(unsigned long addr) bool mg_is_sharepool_addr(unsigned long addr) { return sp_is_enabled() && - (is_sp_normal_addr(addr) || is_device_addr(addr)); + ((is_sp_normal_addr(addr) || is_sp_dvpp_addr(addr))); } EXPORT_SYMBOL_GPL(mg_is_sharepool_addr);
From: Zhang Zekun zhangzekun11@huawei.com
Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5LY4H
-----------------------------------------
Remove the sp_device_number, and we don't need 'sp_device_number' to detect the sp_device_number. Instead, we use maco 'MAX_DEVID' to take the place of sp_device_number.
Signed-off-by: Zhang Zekun zhangzekun11@huawei.com --- mm/share_pool.c | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 4a485f706007..a2ee3fe5eebc 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -79,8 +79,6 @@ static const int mdc_default_group_id = 1;
static int system_group_count;
-static unsigned int sp_device_number; - /* idr of all sp_groups */ static DEFINE_IDR(sp_group_idr); /* rw semaphore for sp_group_idr and mm->sp_group_master */ @@ -372,7 +370,7 @@ static bool can_mappings_merge(struct sp_mapping *m1, struct sp_mapping *m2) { int i;
- for (i = 0; i < sp_device_number; i++) + for (i = 0; i < MAX_DEVID; i++) if (m1->start[i] != m2->start[i] || m1->end[i] != m2->end[i]) return false;
@@ -3620,7 +3618,7 @@ bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid)
/* NOTE: check the start address */ if (pid < 0 || size <= 0 || size > MMAP_SHARE_POOL_16G_SIZE || - device_id < 0 || device_id >= sp_device_number || !is_online_node_id(device_id)) + device_id < 0 || device_id >= MAX_DEVID || !is_online_node_id(device_id)) return false;
ret = get_task(pid, &tsk); @@ -3661,7 +3659,7 @@ static bool is_sp_normal_addr(unsigned long addr) { return addr >= MMAP_SHARE_POOL_START && addr < MMAP_SHARE_POOL_16G_START + - sp_device_number * MMAP_SHARE_POOL_16G_SIZE; + MAX_DEVID * MMAP_SHARE_POOL_16G_SIZE; }
static bool is_sp_dvpp_addr(unsigned long addr) @@ -4424,18 +4422,6 @@ static int __init enable_share_pool(char *s) } __setup("enable_ascend_share_pool", enable_share_pool);
-static void __init sp_device_number_detect(void) -{ - /* NOTE: TO BE COMPLETED */ - sp_device_number = 4; - - if (sp_device_number > MAX_DEVID) { - pr_warn("sp_device_number %d exceed, truncate it to %d\n", - sp_device_number, MAX_DEVID); - sp_device_number = MAX_DEVID; - } -} - static int __init share_pool_init(void) { if (!sp_is_enabled()) @@ -4446,7 +4432,6 @@ static int __init share_pool_init(void) goto fail; atomic_inc(&sp_mapping_normal->user);
- sp_device_number_detect(); proc_sharepool_init();
return 0;
From: Zhang Zekun zhangzekun11@huawei.com
Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5LY51
----------------------------------------------
The variable enable_mdc_default_group has been deprecated, thus remove it and the corresponding code. The definition of is_process_in_group() can be ambiguous, thus change the return value type.
Signed-off-by: Zhang Zekun zhangzekun11@huawei.com --- mm/share_pool.c | 27 ++++----------------------- 1 file changed, 4 insertions(+), 23 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index a2ee3fe5eebc..826201284503 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -73,10 +73,6 @@
#define PF_DOMAIN_CORE 0x10000000 /* AOS CORE processes in sched.h */
-/* mdc scene hack */ -static int __read_mostly enable_mdc_default_group; -static const int mdc_default_group_id = 1; - static int system_group_count;
/* idr of all sp_groups */ @@ -946,16 +942,16 @@ static int get_task(int pid, struct task_struct **task) * 1. hold spg->rw_lock * 2. ensure no concurrency problem for mm_struct */ -static struct sp_group_node *is_process_in_group(struct sp_group *spg, +static bool is_process_in_group(struct sp_group *spg, struct mm_struct *mm) { struct sp_group_node *spg_node;
list_for_each_entry(spg_node, &spg->procs, proc_node) if (spg_node->master->mm == mm) - return spg_node; + return true;
- return NULL; + return false; }
/* user must call sp_group_drop() after use */ @@ -1341,10 +1337,6 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) return -EINVAL; }
- /* mdc scene hack */ - if (enable_mdc_default_group) - spg_id = mdc_default_group_id; - if (spg_id < SPG_ID_MIN || spg_id > SPG_ID_AUTO) { pr_err_ratelimited("add group failed, invalid group id %d\n", spg_id); return -EINVAL; @@ -1616,7 +1608,7 @@ int mg_sp_group_del_task(int pid, int spg_id) goto out_put_task; }
- spg_node = is_process_in_group(spg, mm); + spg_node = find_spg_node_by_spg(mm, spg); if (!spg_node) { up_write(&sp_group_sem); pr_err_ratelimited("process not in group"); @@ -2256,10 +2248,6 @@ static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags,
check_interrupt_context();
- /* mdc scene hack */ - if (enable_mdc_default_group) - spg_id = mdc_default_group_id; - if (current->flags & PF_KTHREAD) { pr_err_ratelimited("allocation failed, task is kthread\n"); return -EINVAL; @@ -3721,13 +3709,6 @@ int sp_node_id(struct vm_area_struct *vma) return node_id; }
-static int __init mdc_default_group(char *s) -{ - enable_mdc_default_group = 1; - return 1; -} -__setup("enable_mdc_default_group", mdc_default_group); - /*** Statistical and maintenance functions ***/
static void get_mm_rss_info(struct mm_struct *mm, unsigned long *anon,
From: Zhang Zekun zhangzekun11@huawei.com
Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5LY2R
-------------------------------------------
Remove the meaningless comment in mg_sp_free() and the fix the bug in mg_sp_group_id_by_pid() parameter check path.
Signed-off-by: Zhang Zekun zhangzekun11@huawei.com --- mm/share_pool.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 826201284503..cce68c468851 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -1018,7 +1018,7 @@ int mg_sp_group_id_by_pid(int pid, int *spg_ids, int *num)
check_interrupt_context();
- if (!spg_ids || num <= 0) + if (!spg_ids || !num || *num <= 0) return -EINVAL;
ret = get_task(pid, &tsk); @@ -2181,7 +2181,6 @@ int mg_sp_free(unsigned long addr, int id)
sp_free_unmap_fallocate(fc.spa);
- /* current->mm == NULL: allow kthread */ if (current->mm == NULL) atomic64_sub(fc.spa->real_size, &kthread_stat.alloc_size); else
From: Guo Mengqi guomengqi3@huawei.com
Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5M3PS
--------------------------------
- fix SP_RES value incorrect bug - fix SP_RES_T value incorrect bug - fix pid field uninitialized error in pass-through scenario
Signed-off-by: Guo Mengqi guomengqi3@huawei.com --- mm/share_pool.c | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index cce68c468851..099c359d2640 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -250,12 +250,14 @@ struct sp_group_node { #endif
/* The caller should hold mmap_sem to protect master (TBD) */ -static void sp_init_group_master_stat(struct mm_struct *mm, struct sp_proc_stat *stat) +static void sp_init_group_master_stat(int tgid, struct mm_struct *mm, + struct sp_proc_stat *stat) { atomic64_set(&stat->alloc_nsize, 0); atomic64_set(&stat->alloc_hsize, 0); atomic64_set(&stat->k2u_size, 0); stat->mm = mm; + stat->tgid = tgid; get_task_comm(stat->comm, current); }
@@ -486,7 +488,7 @@ static int sp_init_group_master_locked(struct task_struct *tsk, struct mm_struct INIT_LIST_HEAD(&master->node_list); master->count = 0; master->mm = mm; - sp_init_group_master_stat(mm, &master->instat); + sp_init_group_master_stat(tsk->tgid, mm, &master->instat); mm->sp_group_master = master;
mutex_lock(&master_list_lock); @@ -1422,7 +1424,6 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) up_write(&spg->rw_lock); goto out_drop_group; } - mm->sp_group_master->instat.tgid = tsk->tgid;
ret = sp_mapping_group_setup(mm, spg); if (ret) { @@ -3730,18 +3731,27 @@ static long get_proc_alloc(struct sp_proc_stat *stat) atomic64_read(&stat->alloc_hsize)); }
-static void get_process_sp_res(struct sp_proc_stat *stat, +static void get_process_sp_res(struct sp_group_master *master, long *sp_res_out, long *sp_res_nsize_out) { - *sp_res_out = byte2kb(atomic64_read(&stat->alloc_nsize) + - atomic64_read(&stat->alloc_hsize)); - *sp_res_nsize_out = byte2kb(atomic64_read(&stat->alloc_nsize)); + struct sp_group *spg; + struct sp_group_node *spg_node; + + *sp_res_out = 0; + *sp_res_nsize_out = 0; + + list_for_each_entry(spg_node, &master->node_list, group_node) { + spg = spg_node->spg; + *sp_res_out += byte2kb(atomic64_read(&spg->instat.alloc_nsize)); + *sp_res_out += byte2kb(atomic64_read(&spg->instat.alloc_hsize)); + *sp_res_nsize_out += byte2kb(atomic64_read(&spg->instat.alloc_nsize)); + } }
static long get_sp_res_by_spg_proc(struct sp_group_node *spg_node) { - return byte2kb(atomic64_read(&spg_node->instat.alloc_nsize) + - atomic64_read(&spg_node->instat.alloc_hsize)); + return byte2kb(atomic64_read(&spg_node->spg->instat.alloc_nsize) + + atomic64_read(&spg_node->spg->instat.alloc_hsize)); }
/* @@ -3806,7 +3816,7 @@ int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns,
get_mm_rss_info(mm, &anon, &file, &shmem, &total_rss); proc_stat = &master->instat; - get_process_sp_res(proc_stat, &sp_res, &sp_res_nsize); + get_process_sp_res(master, &sp_res, &sp_res_nsize); get_process_non_sp_res(total_rss, shmem, sp_res_nsize, &non_sp_res, &non_sp_shm);
@@ -4058,7 +4068,7 @@ static int proc_usage_by_group(int id, void *p, void *data) tgid = master->instat.tgid;
get_mm_rss_info(mm, &anon, &file, &shmem, &total_rss); - get_process_sp_res(&master->instat, &sp_res, &sp_res_nsize); + get_process_sp_res(master, &sp_res, &sp_res_nsize); get_process_non_sp_res(total_rss, shmem, sp_res_nsize, &non_sp_res, &non_sp_shm);
@@ -4119,7 +4129,7 @@ static int proc_usage_show(struct seq_file *seq, void *offset) list_for_each_entry(master, &master_list, list_node) { proc_stat = &master->instat; get_mm_rss_info(master->mm, &anon, &file, &shmem, &total_rss); - get_process_sp_res(&master->instat, &sp_res, &sp_res_nsize); + get_process_sp_res(master, &sp_res, &sp_res_nsize); get_process_non_sp_res(total_rss, shmem, sp_res_nsize, &non_sp_res, &non_sp_shm); seq_printf(seq, "%-8d %-16s %-9ld %-9ld %-9ld %-10ld %-10ld %-8ld\n",
From: Guo Mengqi guomengqi3@huawei.com
Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5MS48
--------------------------------
Fix two bugs revealed by static check:
- Release the mm->mmap_lock when mm->sp_group_master had not been initialized. - Do not add mm to master list if there process add group failed.
Signed-off-by: Guo Mengqi guomengqi3@huawei.com --- mm/share_pool.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 099c359d2640..ceed27360fd4 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -502,6 +502,7 @@ static int sp_init_group_master_locked(struct task_struct *tsk, struct mm_struct return 0;
free_master: + list_del(&master->list_node); mm->sp_group_master = NULL; kfree(master);
@@ -3811,8 +3812,10 @@ int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns,
down_read(&mm->mmap_lock); master = mm->sp_group_master; - if (!master) + if (!master) { + up_read(&mm->mmap_lock); return 0; + }
get_mm_rss_info(mm, &anon, &file, &shmem, &total_rss); proc_stat = &master->instat;
From: Zhang Zekun zhangzekun11@huawei.com
Offering: HULK hulk inclusion category: bugfix bugzilla: 187524
-----------------------------------------------
In function get_process_sp_res(), spg_node can be freed by other process, the access to spg_node->spg can cause kernel panic. Add a pair of read lock to fix this problem. Fix the same problem in proc_sp_group_state().
Fixes: 3d37f8717287 ("[Huawei] mm: sharepool: use built-in-statistics") Signed-off-by: Zhang Zekun zhangzekun11@huawei.com --- mm/share_pool.c | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/mm/share_pool.c b/mm/share_pool.c index ceed27360fd4..bd6b3fbf381e 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -3810,10 +3810,12 @@ int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, if (!mm) return 0;
+ down_read(&sp_group_sem); down_read(&mm->mmap_lock); master = mm->sp_group_master; if (!master) { up_read(&mm->mmap_lock); + up_read(&sp_group_sem); return 0; }
@@ -3848,6 +3850,7 @@ int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, seq_putc(m, '\n'); } up_read(&mm->mmap_lock); + up_read(&sp_group_sem); return 0; }
@@ -4128,6 +4131,7 @@ static int proc_usage_show(struct seq_file *seq, void *offset) "PID", "COMM", "SP_ALLOC", "SP_K2U", "SP_RES", "Non-SP_RES", "Non-SP_Shm", "VIRT");
+ down_read(&sp_group_sem); mutex_lock(&master_list_lock); list_for_each_entry(master, &master_list, list_node) { proc_stat = &master->instat; @@ -4143,6 +4147,7 @@ static int proc_usage_show(struct seq_file *seq, void *offset) page2kb(master->mm->total_vm)); } mutex_unlock(&master_list_lock); + up_read(&sp_group_sem);
return 0; }
From: Guo Mengqi guomengqi3@huawei.com
Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5O5RQ
--------------------------------
Notice that in sp_unshare_uva(), for authentication check, comparison between current->tgid and spa->applier is well enough. There is no need to check current->mm against spa->mm.
Other redundant cases: - find_spg_node_by_spg() will never return NULL in current use context; - spg_info_show() will not come across a group with id 0.
Therefore, delete these redundant paths.
Signed-off-by: Guo Mengqi guomengqi3@huawei.com --- include/linux/share_pool.h | 1 - mm/share_pool.c | 43 ++++---------------------------------- 2 files changed, 4 insertions(+), 40 deletions(-)
diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index 4860e4b00e57..ebf4b10a0965 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -44,7 +44,6 @@ #define SPG_ID_LOCAL_MAX 299999
#define SPG_FLAG_NON_DVPP (1 << 0) -#define SPG_FLAG_MASK (SPG_FLAG_NON_DVPP)
#define MAX_DEVID 8 /* the max num of Da-vinci devices */
diff --git a/mm/share_pool.c b/mm/share_pool.c index bd6b3fbf381e..f2db4101eb09 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -801,11 +801,6 @@ static void spa_dec_usage(struct sp_area *spa) static void update_mem_usage(unsigned long size, bool inc, bool is_hugepage, struct sp_group_node *spg_node, enum spa_type type) { - if (unlikely(!spg_node)) { - WARN(1, "null sp group node\n"); - return; - } - switch (type) { case SPA_TYPE_ALLOC: update_mem_usage_alloc(size, inc, is_hugepage, spg_node); @@ -839,10 +834,7 @@ static void sp_update_process_stat(struct task_struct *tsk, bool inc, enum spa_type type = spa->type;
spg_node = find_spg_node_by_spg(tsk->mm, spa->spg); - if (!spg_node) - pr_err("share pool: spg node not found!\n"); - else - update_mem_usage(size, inc, spa->is_hugepage, spg_node, type); + update_mem_usage(size, inc, spa->is_hugepage, spg_node, type); }
static inline void check_interrupt_context(void) @@ -1088,11 +1080,6 @@ static struct sp_group *create_spg(int spg_id, unsigned long flag) return ERR_PTR(-ENOSPC); }
- if (flag & ~SPG_FLAG_MASK) { - pr_err_ratelimited("invalid flag:%#lx\n", flag); - return ERR_PTR(-EINVAL); - } - spg = kzalloc(sizeof(*spg), GFP_KERNEL); if (spg == NULL) return ERR_PTR(-ENOMEM); @@ -2744,10 +2731,7 @@ static void *sp_make_share_kva_to_task(unsigned long kva, unsigned long size, un pr_err("remap k2u to task failed %ld\n", PTR_ERR(uva)); else { spg_node = find_spg_node_by_spg(current->mm, spa->spg); - if (!spg_node) - pr_err("spg_node is null\n"); - else - update_mem_usage(size, true, spa->is_hugepage, spg_node, SPA_TYPE_K2TASK); + update_mem_usage(size, true, spa->is_hugepage, spg_node, SPA_TYPE_K2TASK); spa->mm = current->mm; }
@@ -3318,12 +3302,6 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size, int group_id) goto out_drop_area; }
- if (!spa->mm) { - pr_err_ratelimited("unshare uva(to task) failed, none spa owner\n"); - ret = -EINVAL; - goto out_drop_area; - } - /* * current thread may be exiting in a multithread process * @@ -3337,13 +3315,6 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size, int group_id) goto out_clr_flag; }
- if (spa->mm != mm) { - pr_err_ratelimited("unshare uva(to task) failed, spa not belong to the task\n"); - ret = -EINVAL; - mmput(mm); - goto out_drop_area; - } - down_write(&mm->mmap_lock); if (unlikely(mm->core_state)) { ret = 0; @@ -3981,10 +3952,7 @@ static int spg_info_show(int id, void *p, void *data) return 0;
if (seq != NULL) { - if (id == 0) - seq_puts(seq, "Non Group "); - else - seq_printf(seq, "Group %6d ", id); + seq_printf(seq, "Group %6d ", id);
down_read(&spg->rw_lock); seq_printf(seq, "size: %lld KB, spa num: %d, total alloc: %lld KB, normal alloc: %lld KB, huge alloc: %lld KB\n", @@ -3995,10 +3963,7 @@ static int spg_info_show(int id, void *p, void *data) byte2kb(atomic64_read(&spg->instat.alloc_hsize))); up_read(&spg->rw_lock); } else { - if (id == 0) - pr_info("Non Group "); - else - pr_info("Group %6d ", id); + pr_info("Group %6d ", id);
down_read(&spg->rw_lock); pr_info("size: %lld KB, spa num: %d, total alloc: %lld KB, normal alloc: %lld KB, huge alloc: %lld KB\n",
From: Guo Mengqi guomengqi3@huawei.com
Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5ODCT
--------------------------------
When there are a large number of groups in the system, or with a large number of processes in each group, "cat /proc/sharepool/proc_stat" will encounter softlockup before all prints finished. This is because there are too many loops in the callback function. Remove one of the loops to reduce time cost and add a cond_resched() to avoid this.
root@buildroot:~/install# cat /proc/sharepool/proc_stat [ 1250.647469] watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [cat:309] [ 1250.648610] Modules linked in: sharepool_dev(OE) [ 1250.650795] CPU: 0 PID: 309 Comm: cat Tainted: G OE 5.10.0+ #43 [ 1250.651216] Hardware name: linux,dummy-virt (DT) [ 1250.651721] pstate: 80000005 (Nzcv daif -PAN -UAO -TCO BTYPE=--) [ 1250.652426] pc : get_process_sp_res+0x40/0x90 [ 1250.652747] lr : proc_usage_by_group+0x158/0x218 ... [ 1250.657903] Call trace: [ 1250.658376] get_process_sp_res+0x40/0x90 [ 1250.658602] proc_usage_by_group+0x158/0x218 [ 1250.658838] idr_for_each+0x6c/0xf0 [ 1250.659027] proc_group_usage_show+0x104/0x120 [ 1250.659263] seq_read_iter+0xe0/0x498 [ 1250.659462] proc_reg_read_iter+0xa8/0xe0 [ 1250.659660] generic_file_splice_read+0xf0/0x1b0 [ 1250.659865] do_splice_to+0x7c/0xd0 [ 1250.660029] splice_direct_to_actor+0xe0/0x2a8 [ 1250.660353] do_splice_direct+0xa4/0xf8 [ 1250.660902] do_sendfile+0x1bc/0x420 [ 1250.661079] __arm64_sys_sendfile64+0x170/0x178 [ 1250.661298] el0_svc_common.constprop.0+0x88/0x268 [ 1250.661505] do_el0_svc+0x34/0xb8 [ 1250.661686] el0_svc+0x1c/0x28 [ 1250.661836] el0_sync_handler+0x8c/0xb0 [ 1250.662033] el0_sync+0x168/0x180
Signed-off-by: Guo Mengqi guomengqi3@huawei.com --- mm/share_pool.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index f2db4101eb09..8cc37699149f 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -4024,7 +4024,6 @@ static int proc_usage_by_group(int id, void *p, void *data) struct sp_group_master *master; int tgid; unsigned long anon, file, shmem, total_rss; - long sp_res, sp_res_nsize, non_sp_res, non_sp_shm;
down_read(&spg->rw_lock); list_for_each_entry(spg_node, &spg->procs, proc_node) { @@ -4039,26 +4038,20 @@ static int proc_usage_by_group(int id, void *p, void *data) tgid = master->instat.tgid;
get_mm_rss_info(mm, &anon, &file, &shmem, &total_rss); - get_process_sp_res(master, &sp_res, &sp_res_nsize); - get_process_non_sp_res(total_rss, shmem, sp_res_nsize, - &non_sp_res, &non_sp_shm);
seq_printf(seq, "%-8d ", tgid); - if (id == 0) - seq_printf(seq, "%-8c ", '-'); - else - seq_printf(seq, "%-8d ", id); - seq_printf(seq, "%-9ld %-9ld %-9ld %-10ld %-10ld %-8ld %-7ld %-7ld %-10ld ", + seq_printf(seq, "%-8d ", id); + seq_printf(seq, "%-9ld %-9ld %-9ld %-8ld %-7ld %-7ld ", get_spg_proc_alloc(spg_node), get_spg_proc_k2u(spg_node), get_sp_res_by_spg_proc(spg_node), - sp_res, non_sp_res, page2kb(mm->total_vm), page2kb(total_rss), - page2kb(shmem), non_sp_shm); + page2kb(shmem)); print_process_prot(seq, spg_node->prot); seq_putc(seq, '\n'); } up_read(&spg->rw_lock); + cond_resched();
return 0; } @@ -4069,9 +4062,9 @@ static int proc_group_usage_show(struct seq_file *seq, void *offset) spa_overview_show(seq);
/* print the file header */ - seq_printf(seq, "%-8s %-8s %-9s %-9s %-9s %-10s %-10s %-8s %-7s %-7s %-10s %-4s\n", - "PID", "Group_ID", "SP_ALLOC", "SP_K2U", "SP_RES", "SP_RES_T", - "Non-SP_RES", "VIRT", "RES", "Shm", "Non-SP_Shm", "PROT"); + seq_printf(seq, "%-8s %-8s %-9s %-9s %-9s %-8s %-7s %-7s %-4s\n", + "PID", "Group_ID", "SP_ALLOC", "SP_K2U", "SP_RES", + "VIRT", "RES", "Shm", "PROT"); /* print kthread buff_module_guard_work */ seq_printf(seq, "%-8s %-8s %-9lld %-9lld\n", "guard", "-",
From: Guo Mengqi guomengqi3@huawei.com
Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5OE1J
--------------------------------
The mutex protecting spm_dvpp_list has an ABBA deadlock with spg->rw_lock. Try add a process to a sharepool group and cat /proc/sharepool/spa_stat at the same time will reproduce the problem.
Remove spg->rw_lock to avoid this.
[ 1101.013480]INFO: task test:3567 blocked for more than 30 seconds. [ 1101.014378] Tainted: G OE 5.10.0+ #45 [ 1101.015707]task:test state:D stack: 0 pid: 3567 [ 1101.016464]Call trace: [ 1101.016736] __switch_to+0xc0/0x128 [ 1101.017082] __schedule+0x3fc/0x898 [ 1101.017626] schedule+0x48/0xd8 [ 1101.017981] schedule_preempt_disabled+0x14/0x20 [ 1101.018519] __mutex_lock.isra.1+0x160/0x638 [ 1101.018899] __mutex_lock_slowpath+0x24/0x30 [ 1101.019291] mutex_lock+0x5c/0x68 [ 1101.019607] sp_mapping_create+0x118/0x1b0 [ 1101.019963] sp_init_group_master_locked.part.9+0x10c/0x288 [ 1101.020356] mg_sp_group_add_task.part.16+0x7dc/0xcd0 [ 1101.020750] mg_sp_group_add_task+0x54/0xd0 [ 1101.021120] dev_ioctl+0x360/0x1e20 [sharepool_dev] [ 1101.022171] __arm64_sys_ioctl+0xb0/0xe8 [ 1101.022695] el0_svc_common.constprop.0+0x88/0x268 [ 1101.023143] do_el0_svc+0x34/0xb8 [ 1101.023487] el0_svc+0x1c/0x28 [ 1101.023775] el0_sync_handler+0x8c/0xb0 [ 1101.024120] el0_sync+0x168/0x180
Signed-off-by: Guo Mengqi guomengqi3@huawei.com --- mm/share_pool.c | 2 -- 1 file changed, 2 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 8cc37699149f..6fc24591e9b0 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -3839,12 +3839,10 @@ static void spa_stat_of_mapping_show(struct seq_file *seq, struct sp_mapping *sp atomic_inc(&spa->use_count); spin_unlock(&sp_area_lock);
- down_read(&spa->spg->rw_lock); if (spg_valid(spa->spg)) /* k2u to group */ seq_printf(seq, "%-10d ", spa->spg->id); else /* spg is dead */ seq_printf(seq, "%-10s ", "Dead"); - up_read(&spa->spg->rw_lock);
seq_printf(seq, "%2s%-14lx %2s%-14lx %-10ld ", "0x", spa->va_start,
From: Guo Mengqi guomengqi3@huawei.com
Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5OE1J
--------------------------------
Fix a deadlock indicated below:
[ 171.669844] Chain exists of: [ 171.669844] &mm->mmap_lock --> sp_group_sem --> &spg->rw_lock [ 171.669844] [ 171.671469] Possible unsafe locking scenario: [ 171.671469] [ 171.672121] CPU0 CPU1 [ 171.672415] ---- ---- [ 171.672706] lock(&spg->rw_lock); [ 171.673114] lock(sp_group_sem); [ 171.673706] lock(&spg->rw_lock); [ 171.674208] lock(&mm->mmap_lock); [ 171.674863] [ 171.674863] *** DEADLOCK ***
sharepool use lock in order: sp_group_sem --> &spg->rw_lock --> mm->mmap_lock However, in sp_check_mmap_addr(), when mm->mmap_lock is held, it requested sp_group_sem, which is: mm->mmap_lock --> sp_group_sem. This causes ABBA problem.
This happens in:
[ 171.642687] the existing dependency chain (in reverse order) is: [ 171.643745] [ 171.643745] -> #2 (&spg->rw_lock){++++}-{3:3}: [ 171.644639] __lock_acquire+0x6f4/0xc40 [ 171.645189] lock_acquire+0x2f0/0x3c8 [ 171.645631] down_read+0x64/0x2d8 [ 171.646075] proc_usage_by_group+0x50/0x258 (spg->rw_lock) [ 171.646542] idr_for_each+0x6c/0xf0 [ 171.647011] proc_group_usage_show+0x140/0x178 [ 171.647629] seq_read_iter+0xe4/0x498 [ 171.648217] proc_reg_read_iter+0xa8/0xe0 [ 171.648776] new_sync_read+0xfc/0x1a0 [ 171.649002] vfs_read+0x1ac/0x1c8 [ 171.649217] ksys_read+0x74/0xf8 [ 171.649596] __arm64_sys_read+0x24/0x30 [ 171.649934] el0_svc_common.constprop.0+0x8c/0x270 [ 171.650528] do_el0_svc+0x34/0xb8 [ 171.651069] el0_svc+0x1c/0x28 [ 171.651278] el0_sync_handler+0x8c/0xb0 [ 171.651636] el0_sync+0x168/0x180 [ 171.652118] [ 171.652118] -> #1 (sp_group_sem){++++}-{3:3}: [ 171.652692] __lock_acquire+0x6f4/0xc40 [ 171.653059] lock_acquire+0x2f0/0x3c8 [ 171.653303] down_read+0x64/0x2d8 [ 171.653704] mg_is_sharepool_addr+0x184/0x340 (&sp_group_sem) [ 171.654085] sp_check_mmap_addr+0x64/0x108 [ 171.654668] arch_get_unmapped_area_topdown+0x9c/0x528 [ 171.655370] thp_get_unmapped_area+0x54/0x68 [ 171.656170] get_unmapped_area+0x94/0x160 [ 171.656415] __do_mmap_mm+0xd4/0x540 [ 171.656629] do_mmap+0x98/0x648 [ 171.656838] vm_mmap_pgoff+0xc0/0x188 [ 171.657129] vm_mmap+0x6c/0x98 [ 171.657619] elf_map+0xe0/0x118 [ 171.657835] load_elf_binary+0x4ec/0xfd8 [ 171.658103] bprm_execve.part.9+0x3ec/0x840 [ 171.658448] bprm_execve+0x7c/0xb0 [ 171.658919] kernel_execve+0x18c/0x198 [ 171.659500] run_init_process+0xf0/0x108 [ 171.660073] try_to_run_init_process+0x20/0x58 [ 171.660558] kernel_init+0xcc/0x120 [ 171.660862] ret_from_fork+0x10/0x18 [ 171.661273] [ 171.661273] -> #0 (&mm->mmap_lock){++++}-{3:3}: [ 171.661885] check_prev_add+0xa4/0xbd8 [ 171.662229] validate_chain+0xf54/0x14b8 [ 171.662705] __lock_acquire+0x6f4/0xc40 [ 171.663310] lock_acquire+0x2f0/0x3c8 [ 171.663658] down_write+0x60/0x208 [ 171.664179] mg_sp_alloc+0x24c/0x1150 (mm->mmap_lock) [ 171.665245] dev_ioctl+0x1128/0x1fb8 [sharepool_dev] [ 171.665688] __arm64_sys_ioctl+0xb0/0xe8 [ 171.666250] el0_svc_common.constprop.0+0x8c/0x270 [ 171.667255] do_el0_svc+0x34/0xb8 [ 171.667806] el0_svc+0x1c/0x28 [ 171.668249] el0_sync_handler+0x8c/0xb0 [ 171.668661] el0_sync+0x168/0x180
Signed-off-by: Guo Mengqi guomengqi3@huawei.com --- mm/share_pool.c | 42 +++++++++++++++++------------------------- 1 file changed, 17 insertions(+), 25 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 6fc24591e9b0..28c18aef777d 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -73,6 +73,9 @@
#define PF_DOMAIN_CORE 0x10000000 /* AOS CORE processes in sched.h */
+#define MMAP_SHARE_POOL_DVPP_BASE 0x100000000000ULL +#define MMAP_SHARE_POOL_DVPP_END (MMAP_SHARE_POOL_DVPP_BASE + MMAP_SHARE_POOL_16G_SIZE * 64) + static int system_group_count;
/* idr of all sp_groups */ @@ -502,7 +505,9 @@ static int sp_init_group_master_locked(struct task_struct *tsk, struct mm_struct return 0;
free_master: + mutex_lock(&master_list_lock); list_del(&master->list_node); + mutex_unlock(&master_list_lock); mm->sp_group_master = NULL; kfree(master);
@@ -3551,6 +3556,7 @@ int sp_unregister_notifier(struct notifier_block *nb) } EXPORT_SYMBOL_GPL(sp_unregister_notifier);
+static bool is_sp_dvpp_addr(unsigned long addr); /** * mg_sp_config_dvpp_range() - User can config the share pool start address * of each Da-vinci device. @@ -3578,7 +3584,8 @@ bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid)
/* NOTE: check the start address */ if (pid < 0 || size <= 0 || size > MMAP_SHARE_POOL_16G_SIZE || - device_id < 0 || device_id >= MAX_DEVID || !is_online_node_id(device_id)) + device_id < 0 || device_id >= MAX_DEVID || !is_online_node_id(device_id) + || !is_sp_dvpp_addr(start) || !is_sp_dvpp_addr(start + size)) return false;
ret = get_task(pid, &tsk); @@ -3622,34 +3629,19 @@ static bool is_sp_normal_addr(unsigned long addr) MAX_DEVID * MMAP_SHARE_POOL_16G_SIZE; }
+/* + * | 16G host | 16G device | ... | | + * ^ + * | + * MMAP_SHARE_POOL_DVPP_BASE + 16G * 64 + * We only check the device regions. + */ static bool is_sp_dvpp_addr(unsigned long addr) { - int i; - struct mm_struct *mm; - struct sp_group_master *master; - struct sp_mapping *spm_dvpp; - - mm = current->mm; - if (!mm) + if (addr < MMAP_SHARE_POOL_DVPP_BASE || addr >= MMAP_SHARE_POOL_DVPP_END) return false;
- down_read(&sp_group_sem); - master = mm->sp_group_master; - if (!master) { - up_read(&sp_group_sem); - return false; - } - - /* master->local and master->local->dvpp won't be NULL*/ - spm_dvpp = master->local->dvpp; - for (i = 0; i < MAX_DEVID; i++) { - if (addr >= spm_dvpp->start[i] && addr < spm_dvpp->end[i]) { - up_read(&sp_group_sem); - return true; - } - } - up_read(&sp_group_sem); - return false; + return (addr - MMAP_SHARE_POOL_DVPP_BASE) & MMAP_SHARE_POOL_16G_SIZE; }
/**
From: Wang Wensheng wangwensheng4@huawei.com
Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5PD4P
--------------------------------
[ 2058.802818][ T290] BUG: KASAN: use-after-free in get_process_sp_res+0x70/0x134 [ 2058.810194][ T290] Read of size 8 at addr ffff00088dc6ab28 by task test_debug_loop/290 [ 2058.820520][ T290] CPU: 5 PID: 290 Comm: test_debug_loop Tainted: G W OE 5.10.0+ #2 [ 2058.829377][ T290] Hardware name: EVB(EP) (DT) [ 2058.833982][ T290] Call trace: [ 2058.837217][ T290] dump_backtrace+0x0/0x30c [ 2058.841660][ T290] show_stack+0x20/0x30 [ 2058.845758][ T290] dump_stack+0x120/0x1b0 [ 2058.850028][ T290] print_address_description.constprop.0+0x2c/0x1fc [ 2058.856555][ T290] __kasan_report+0xfc/0x160 [ 2058.861086][ T290] kasan_report+0x44/0xb0 [ 2058.865356][ T290] __asan_load8+0x94/0xd0 [ 2058.869623][ T290] get_process_sp_res+0x70/0x134 [ 2058.874501][ T290] proc_usage_show+0x1ac/0x304 [ 2058.879208][ T290] seq_read_iter+0x254/0x750 [ 2058.883728][ T290] proc_reg_read_iter+0x100/0x140 [ 2058.888689][ T290] new_sync_read+0x1cc/0x2c0 [ 2058.893215][ T290] vfs_read+0x1f4/0x250 [ 2058.897304][ T290] ksys_read+0xcc/0x170 [ 2058.901399][ T290] __arm64_sys_read+0x4c/0x60 [ 2058.906016][ T290] el0_svc_common.constprop.0+0xb4/0x2a0 [ 2058.911584][ T290] do_el0_svc+0x8c/0xb0 [ 2058.915677][ T290] el0_svc+0x20/0x30 [ 2058.919503][ T290] el0_sync_handler+0xb0/0xbc [ 2058.924114][ T290] el0_sync+0x180/0x1c0 [ 2058.928190][ T290] [ 2058.930444][ T290] Allocated by task 2176: [ 2058.934714][ T290] kasan_save_stack+0x28/0x60 [ 2058.939328][ T290] __kasan_kmalloc.constprop.0+0xc8/0xf0 [ 2058.944909][ T290] kasan_kmalloc+0x10/0x20 [ 2058.949268][ T290] kmem_cache_alloc_trace+0x128/0xabc [ 2058.954577][ T290] create_spg_node+0x58/0x214 [ 2058.959188][ T290] local_group_add_task+0x30/0x14c [ 2058.964231][ T290] init_local_group+0xd0/0x1a0 [ 2058.968936][ T290] sp_init_group_master_locked.part.0+0x19c/0x290 [ 2058.975298][ T290] mg_sp_group_add_task+0x73c/0xdb0 [ 2058.980456][ T290] dev_sp_add_group+0x124/0x2dc [sharepool_dev] [ 2058.986647][ T290] dev_ioctl+0x21c/0x2ec [sharepool_dev] [ 2058.992222][ T290] __arm64_sys_ioctl+0xd8/0x120 [ 2058.997010][ T290] el0_svc_common.constprop.0+0xb4/0x2a0 [ 2059.002572][ T290] do_el0_svc+0x8c/0xb0 [ 2059.006662][ T290] el0_svc+0x20/0x30 [ 2059.010489][ T290] el0_sync_handler+0xb0/0xbc [ 2059.015101][ T290] el0_sync+0x180/0x1c0 [ 2059.019176][ T290] [ 2059.021427][ T290] Freed by task 4125: [ 2059.025343][ T290] kasan_save_stack+0x28/0x60 [ 2059.029949][ T290] kasan_set_track+0x28/0x40 [ 2059.034476][ T290] kasan_set_free_info+0x24/0x50 [ 2059.039347][ T290] __kasan_slab_free+0x104/0x1ac [ 2059.044227][ T290] kasan_slab_free+0x14/0x20 [ 2059.048744][ T290] kfree+0x164/0xb94 [ 2059.052576][ T290] sp_group_post_exit+0xf0/0x980 [ 2059.057448][ T290] mmput.part.0+0xb4/0x220 [ 2059.061790][ T290] mmput+0x2c/0x40 [ 2059.065450][ T290] exit_mm+0x27c/0x3a0 [ 2059.069450][ T290] do_exit+0x2a0/0x790 [ 2059.073448][ T290] do_group_exit+0x64/0x100 [ 2059.077884][ T290] get_signal+0x1fc/0x9fc [ 2059.082144][ T290] do_signal+0x110/0x2cc [ 2059.086320][ T290] do_notify_resume+0x158/0x2b0 [ 2059.091108][ T290] work_pending+0xc/0x6d4 [ 2059.095358][ T290]
Signed-off-by: Wang Wensheng wangwensheng4@huawei.com --- mm/share_pool.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 28c18aef777d..23756fe9cbfc 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -4346,6 +4346,7 @@ void sp_group_post_exit(struct mm_struct *mm) /* match with refcount inc in sp_group_add_task */ if (atomic_dec_and_test(&spg->use_count)) free_sp_group_locked(spg); + list_del(&spg_node->group_node); kfree(spg_node); } up_write(&sp_group_sem);
From: Zhou Guanghui zhouguanghui1@huawei.com
Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5PIA2
--------------------------------
When a process is added to a group, mm->mm_users increases by one. When a process is deleted from a group, mm->mm_users decreases by one. It is not possible to reduce to 0 because this function is preceded by get_task_mm.
Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com --- mm/share_pool.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 23756fe9cbfc..0f87eb6782c3 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -1623,11 +1623,7 @@ int mg_sp_group_del_task(int pid, int spg_id) list_del(&spg_node->group_node); mm->sp_group_master->count--; kfree(spg_node); - if (atomic_sub_and_test(1, &mm->mm_users)) { - up_write(&sp_group_sem); - WARN(1, "Invalid user counting\n"); - return -EINVAL; - } + atomic_dec(&mm->mm_users);
up_write(&sp_group_sem);
From: Zhou Guanghui zhouguanghui1@huawei.com
Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5PIA0
--------------------------------
The spa is used during the update_mem_usage. In this case, the spa has been released in the case of concurrency (mg_sp_unshare).
Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com --- mm/share_pool.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 0f87eb6782c3..de4e59e3fbbc 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -2727,7 +2727,6 @@ static void *sp_make_share_kva_to_task(unsigned long kva, unsigned long size, un spa->kva = kva; kc.sp_flags = sp_flags; uva = (void *)sp_remap_kva_to_vma(kva, spa, current->mm, prot, &kc); - __sp_area_drop(spa); if (IS_ERR(uva)) pr_err("remap k2u to task failed %ld\n", PTR_ERR(uva)); else { @@ -2735,6 +2734,7 @@ static void *sp_make_share_kva_to_task(unsigned long kva, unsigned long size, un update_mem_usage(size, true, spa->is_hugepage, spg_node, SPA_TYPE_K2TASK); spa->mm = current->mm; } + __sp_area_drop(spa);
return uva; } @@ -2786,9 +2786,9 @@ static void *sp_make_share_kva_to_spg(unsigned long kva, unsigned long size,
out: up_read(&spg->rw_lock); - __sp_area_drop(spa); if (!IS_ERR(uva)) sp_update_process_stat(current, true, spa); + __sp_area_drop(spa);
return uva; }
From: Zhou Guanghui zhouguanghui1@huawei.com
Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5PIA4
--------------------------------
The maximum value of spg_id is checked to ensure that the value of spg_id is within the valid range: SPG_ID_DEFAULT or [SPG_ID_MIN SPG_ID_AUTO)
Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com --- mm/share_pool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index de4e59e3fbbc..e490af418a33 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -2247,7 +2247,7 @@ static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags, return -EINVAL; }
- if (spg_id != SPG_ID_DEFAULT && spg_id < SPG_ID_MIN) { + if (spg_id != SPG_ID_DEFAULT && (spg_id < SPG_ID_MIN || spg_id >= SPG_ID_AUTO)) { pr_err_ratelimited("allocation failed, invalid group id %d\n", spg_id); return -EINVAL; }
From: Zhou Guanghui zhouguanghui1@huawei.com
Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5PIA6
--------------------------------
Use get_task_mm to avoid the mm being released when the information in mm_struct is used.
Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com --- mm/share_pool.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index e490af418a33..1c862ca41736 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -3756,7 +3756,7 @@ static void print_process_prot(struct seq_file *seq, unsigned long prot) int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { - struct mm_struct *mm = task->mm; + struct mm_struct *mm; struct sp_group_master *master; struct sp_proc_stat *proc_stat; struct sp_group_node *spg_node; @@ -3766,17 +3766,15 @@ int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, if (!sp_is_enabled()) return 0;
+ mm = get_task_mm(task); if (!mm) return 0;
down_read(&sp_group_sem); down_read(&mm->mmap_lock); master = mm->sp_group_master; - if (!master) { - up_read(&mm->mmap_lock); - up_read(&sp_group_sem); - return 0; - } + if (!master) + goto out;
get_mm_rss_info(mm, &anon, &file, &shmem, &total_rss); proc_stat = &master->instat; @@ -3808,8 +3806,11 @@ int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, print_process_prot(m, spg_node->prot); seq_putc(m, '\n'); } + +out: up_read(&mm->mmap_lock); up_read(&sp_group_sem); + mmput(mm); return 0; }
From: Chen Jun chenjun102@huawei.com
Offering: HULK hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5I72Q
--------------------------------
"TASK_SIZE - MMAP_SHARE_POOL_DVPP_SIZE" is puzzling.
MMAP_SHARE_POOL_START = MMAP_SHARE_POOL_END - MMAP_SHARE_POOL_SIZE and MMAP_SHARE_POOL_16G_START = MMAP_SHARE_POOL_END - MMAP_SHARE_POOL_DVPP_SIZE make the memory layout not unintuitive.
Signed-off-by: Chen Jun chenjun102@huawei.com Signed-off-by: Wang Wensheng wangwensheng4@huawei.com --- include/linux/share_pool.h | 18 +++++++++++++----- mm/share_pool.c | 33 ++++++++++++++------------------- 2 files changed, 27 insertions(+), 24 deletions(-)
diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index ebf4b10a0965..b5fa0d4d59e0 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -213,11 +213,19 @@ struct sp_walk_data { #define MMAP_SHARE_POOL_DVPP_SIZE 0x80000000000UL /* 16G size */ #define MMAP_SHARE_POOL_16G_SIZE 0x400000000UL -#define MMAP_SHARE_POOL_SIZE (MMAP_SHARE_POOL_NORMAL_SIZE + MMAP_SHARE_POOL_DVPP_SIZE) -/* align to 2M hugepage size, and MMAP_SHARE_POOL_TOP_16G_START should be align to 16G */ -#define MMAP_SHARE_POOL_END ((TASK_SIZE - MMAP_SHARE_POOL_DVPP_SIZE) & ~((1 << 21) - 1)) -#define MMAP_SHARE_POOL_START (MMAP_SHARE_POOL_END - MMAP_SHARE_POOL_SIZE) -#define MMAP_SHARE_POOL_16G_START (MMAP_SHARE_POOL_END - MMAP_SHARE_POOL_DVPP_SIZE) +/* skip 8T for stack */ +#define MMAP_SHARE_POOL_SKIP 0x80000000000UL +#define MMAP_SHARE_POOL_END (TASK_SIZE - MMAP_SHARE_POOL_SKIP) +#define MMAP_SHARE_POLL_DVPP_END (MMAP_SHARE_POOL_END) +/* MMAP_SHARE_POOL_DVPP_START should be align to 16G */ +#define MMAP_SHARE_POOL_DVPP_START (MMAP_SHARE_POLL_DVPP_END - MMAP_SHARE_POOL_DVPP_SIZE) +#define MMAP_SHARE_POOL_NORMAL_END (MMAP_SHARE_POOL_DVPP_START) +#define MMAP_SHARE_POOL_NORMAL_START (MMAP_SHARE_POOL_NORMAL_END - MMAP_SHARE_POOL_NORMAL_SIZE) +#define MMAP_SHARE_POOL_START (MMAP_SHARE_POOL_NORMAL_START) + +#define MMAP_SHARE_POOL_DYNAMIC_DVPP_BASE 0x100000000000ULL +#define MMAP_SHARE_POOL_DYNAMIC_DVPP_END (MMAP_SHARE_POOL_DYNAMIC_DVPP_BASE + \ + MMAP_SHARE_POOL_16G_SIZE * 64)
#ifdef CONFIG_ASCEND_SHARE_POOL
diff --git a/mm/share_pool.c b/mm/share_pool.c index 1c862ca41736..93ce238c5ae8 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -73,9 +73,6 @@
#define PF_DOMAIN_CORE 0x10000000 /* AOS CORE processes in sched.h */
-#define MMAP_SHARE_POOL_DVPP_BASE 0x100000000000ULL -#define MMAP_SHARE_POOL_DVPP_END (MMAP_SHARE_POOL_DVPP_BASE + MMAP_SHARE_POOL_16G_SIZE * 64) - static int system_group_count;
/* idr of all sp_groups */ @@ -290,12 +287,12 @@ static void sp_mapping_range_init(struct sp_mapping *spm)
for (i = 0; i < MAX_DEVID; i++) { if (spm->flag & SP_MAPPING_NORMAL) { - spm->start[i] = MMAP_SHARE_POOL_START; - spm->end[i] = MMAP_SHARE_POOL_16G_START; + spm->start[i] = MMAP_SHARE_POOL_NORMAL_START; + spm->end[i] = MMAP_SHARE_POOL_NORMAL_END; continue; }
- spm->start[i] = MMAP_SHARE_POOL_16G_START + + spm->start[i] = MMAP_SHARE_POOL_DVPP_START + i * MMAP_SHARE_POOL_16G_SIZE; spm->end[i] = spm->start[i] + MMAP_SHARE_POOL_16G_SIZE; } @@ -1856,7 +1853,7 @@ static struct sp_area *__find_sp_area_locked(struct sp_group *spg, { struct rb_node *n;
- if (addr >= MMAP_SHARE_POOL_START && addr < MMAP_SHARE_POOL_16G_START) + if (addr >= MMAP_SHARE_POOL_NORMAL_START && addr < MMAP_SHARE_POOL_NORMAL_END) n = spg->normal->area_root.rb_node; else n = spg->dvpp->area_root.rb_node; @@ -1912,7 +1909,7 @@ static void sp_free_area(struct sp_area *spa)
lockdep_assert_held(&sp_area_lock);
- if (addr >= MMAP_SHARE_POOL_START && addr < MMAP_SHARE_POOL_16G_START) + if (addr >= MMAP_SHARE_POOL_NORMAL_START && addr < MMAP_SHARE_POOL_NORMAL_END) spm = spa->spg->normal; else spm = spa->spg->dvpp; @@ -3552,7 +3549,7 @@ int sp_unregister_notifier(struct notifier_block *nb) } EXPORT_SYMBOL_GPL(sp_unregister_notifier);
-static bool is_sp_dvpp_addr(unsigned long addr); +static bool is_sp_dynamic_dvpp_addr(unsigned long addr); /** * mg_sp_config_dvpp_range() - User can config the share pool start address * of each Da-vinci device. @@ -3581,7 +3578,7 @@ bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) /* NOTE: check the start address */ if (pid < 0 || size <= 0 || size > MMAP_SHARE_POOL_16G_SIZE || device_id < 0 || device_id >= MAX_DEVID || !is_online_node_id(device_id) - || !is_sp_dvpp_addr(start) || !is_sp_dvpp_addr(start + size)) + || !is_sp_dynamic_dvpp_addr(start) || !is_sp_dynamic_dvpp_addr(start + size)) return false;
ret = get_task(pid, &tsk); @@ -3597,7 +3594,7 @@ bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) goto put_mm;
spm = spg->dvpp; - default_start = MMAP_SHARE_POOL_16G_START + device_id * MMAP_SHARE_POOL_16G_SIZE; + default_start = MMAP_SHARE_POOL_DVPP_START + device_id * MMAP_SHARE_POOL_16G_SIZE; /* The dvpp range of each group can be configured only once */ if (spm->start[device_id] != default_start) goto put_spg; @@ -3618,11 +3615,9 @@ bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) } EXPORT_SYMBOL_GPL(mg_sp_config_dvpp_range);
-static bool is_sp_normal_addr(unsigned long addr) +static bool is_sp_reserve_addr(unsigned long addr) { - return addr >= MMAP_SHARE_POOL_START && - addr < MMAP_SHARE_POOL_16G_START + - MAX_DEVID * MMAP_SHARE_POOL_16G_SIZE; + return addr >= MMAP_SHARE_POOL_START && addr < MMAP_SHARE_POOL_END; }
/* @@ -3632,12 +3627,12 @@ static bool is_sp_normal_addr(unsigned long addr) * MMAP_SHARE_POOL_DVPP_BASE + 16G * 64 * We only check the device regions. */ -static bool is_sp_dvpp_addr(unsigned long addr) +static bool is_sp_dynamic_dvpp_addr(unsigned long addr) { - if (addr < MMAP_SHARE_POOL_DVPP_BASE || addr >= MMAP_SHARE_POOL_DVPP_END) + if (addr < MMAP_SHARE_POOL_DYNAMIC_DVPP_BASE || addr >= MMAP_SHARE_POOL_DYNAMIC_DVPP_END) return false;
- return (addr - MMAP_SHARE_POOL_DVPP_BASE) & MMAP_SHARE_POOL_16G_SIZE; + return (addr - MMAP_SHARE_POOL_DYNAMIC_DVPP_BASE) & MMAP_SHARE_POOL_16G_SIZE; }
/** @@ -3649,7 +3644,7 @@ static bool is_sp_dvpp_addr(unsigned long addr) bool mg_is_sharepool_addr(unsigned long addr) { return sp_is_enabled() && - ((is_sp_normal_addr(addr) || is_sp_dvpp_addr(addr))); + ((is_sp_reserve_addr(addr) || is_sp_dynamic_dvpp_addr(addr))); } EXPORT_SYMBOL_GPL(mg_is_sharepool_addr);
From: Chen Jun chenjun102@huawei.com
Offering: HULK hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5I72Q
--------------------------------
Now, sp_mapping.flag is only used to distinguish sp_mapping types. So, 'type' are more suitable.
Signed-off-by: Chen Jun chenjun102@huawei.com --- mm/share_pool.c | 53 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 38 insertions(+), 15 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 93ce238c5ae8..08790abbd0c8 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -151,7 +151,7 @@ struct spg_proc_stat { * address space management */ struct sp_mapping { - unsigned long flag; + unsigned long type; atomic_t user; unsigned long start[MAX_DEVID]; unsigned long end[MAX_DEVID]; @@ -263,12 +263,23 @@ static void sp_init_group_master_stat(int tgid, struct mm_struct *mm,
#define SP_MAPPING_DVPP 0x1 #define SP_MAPPING_NORMAL 0x2 + +static unsigned long sp_mapping_type(struct sp_mapping *spm) +{ + return spm->type; +} + +static void sp_mapping_set_type(struct sp_mapping *spm, unsigned long type) +{ + spm->type = type; +} + static struct sp_mapping *sp_mapping_normal;
static void sp_mapping_add_to_list(struct sp_mapping *spm) { mutex_lock(&spm_list_lock); - if (spm->flag & SP_MAPPING_DVPP) + if (sp_mapping_type(spm) == SP_MAPPING_DVPP) list_add_tail(&spm->spm_node, &spm_dvpp_list); mutex_unlock(&spm_list_lock); } @@ -276,7 +287,7 @@ static void sp_mapping_add_to_list(struct sp_mapping *spm) static void sp_mapping_remove_from_list(struct sp_mapping *spm) { mutex_lock(&spm_list_lock); - if (spm->flag & SP_MAPPING_DVPP) + if (sp_mapping_type(spm) == SP_MAPPING_DVPP) list_del(&spm->spm_node); mutex_unlock(&spm_list_lock); } @@ -286,19 +297,23 @@ static void sp_mapping_range_init(struct sp_mapping *spm) int i;
for (i = 0; i < MAX_DEVID; i++) { - if (spm->flag & SP_MAPPING_NORMAL) { + switch (sp_mapping_type(spm)) { + case SP_MAPPING_NORMAL: spm->start[i] = MMAP_SHARE_POOL_NORMAL_START; - spm->end[i] = MMAP_SHARE_POOL_NORMAL_END; - continue; + spm->end[i] = MMAP_SHARE_POOL_NORMAL_END; + break; + case SP_MAPPING_DVPP: + spm->start[i] = MMAP_SHARE_POOL_DVPP_START + i * MMAP_SHARE_POOL_16G_SIZE; + spm->end[i] = spm->start[i] + MMAP_SHARE_POOL_16G_SIZE; + break; + default: + pr_err("Invalid sp_mapping type [%lu]\n", sp_mapping_type(spm)); + break; } - - spm->start[i] = MMAP_SHARE_POOL_DVPP_START + - i * MMAP_SHARE_POOL_16G_SIZE; - spm->end[i] = spm->start[i] + MMAP_SHARE_POOL_16G_SIZE; } }
-static struct sp_mapping *sp_mapping_create(unsigned long flag) +static struct sp_mapping *sp_mapping_create(unsigned long type) { struct sp_mapping *spm;
@@ -306,7 +321,7 @@ static struct sp_mapping *sp_mapping_create(unsigned long flag) if (!spm) return ERR_PTR(-ENOMEM);
- spm->flag = flag; + sp_mapping_set_type(spm, type); sp_mapping_range_init(spm); atomic_set(&spm->user, 0); spm->area_root = RB_ROOT; @@ -325,18 +340,26 @@ static void sp_mapping_destroy(struct sp_mapping *spm) static void sp_mapping_attach(struct sp_group *spg, struct sp_mapping *spm) { atomic_inc(&spm->user); - if (spm->flag & SP_MAPPING_DVPP) { + + switch (sp_mapping_type(spm)) { + case SP_MAPPING_DVPP: spg->dvpp = spm; list_add_tail(&spg->mnode, &spm->group_head); - } else if (spm->flag & SP_MAPPING_NORMAL) + break; + case SP_MAPPING_NORMAL: spg->normal = spm; + break; + default: + break; + } }
static void sp_mapping_detach(struct sp_group *spg, struct sp_mapping *spm) { if (!spm) return; - if (spm->flag & SP_MAPPING_DVPP) + + if (sp_mapping_type(spm) == SP_MAPPING_DVPP) list_del(&spg->mnode); if (atomic_dec_and_test(&spm->user)) sp_mapping_destroy(spm);
From: Chen Jun chenjun102@huawei.com
Offering: HULK hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5I72Q
--------------------------------
spg->dvpp and spg->normal can be combined into one array.
Signed-off-by: Chen Jun chenjun102@huawei.com --- mm/share_pool.c | 79 +++++++++++++++++++++++++++---------------------- 1 file changed, 44 insertions(+), 35 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 08790abbd0c8..3c970b090552 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -147,6 +147,13 @@ struct spg_proc_stat { atomic64_t k2u_size; };
+enum sp_mapping_type { + SP_MAPPING_START, + SP_MAPPING_DVPP = SP_MAPPING_START, + SP_MAPPING_NORMAL, + SP_MAPPING_END, +}; + /* * address space management */ @@ -208,8 +215,7 @@ struct sp_group { struct rw_semaphore rw_lock; /* list node for dvpp mapping */ struct list_head mnode; - struct sp_mapping *dvpp; - struct sp_mapping *normal; + struct sp_mapping *mapping[SP_MAPPING_END]; };
/* a per-process(per mm) struct which manages a sp_group_node list */ @@ -261,9 +267,6 @@ static void sp_init_group_master_stat(int tgid, struct mm_struct *mm, get_task_comm(stat->comm, current); }
-#define SP_MAPPING_DVPP 0x1 -#define SP_MAPPING_NORMAL 0x2 - static unsigned long sp_mapping_type(struct sp_mapping *spm) { return spm->type; @@ -339,30 +342,29 @@ static void sp_mapping_destroy(struct sp_mapping *spm)
static void sp_mapping_attach(struct sp_group *spg, struct sp_mapping *spm) { + unsigned long type = sp_mapping_type(spm); atomic_inc(&spm->user);
- switch (sp_mapping_type(spm)) { - case SP_MAPPING_DVPP: - spg->dvpp = spm; + spg->mapping[type] = spm; + if (type == SP_MAPPING_DVPP) list_add_tail(&spg->mnode, &spm->group_head); - break; - case SP_MAPPING_NORMAL: - spg->normal = spm; - break; - default: - break; - } }
static void sp_mapping_detach(struct sp_group *spg, struct sp_mapping *spm) { + unsigned long type; + if (!spm) return;
- if (sp_mapping_type(spm) == SP_MAPPING_DVPP) + type = sp_mapping_type(spm); + + if (type == SP_MAPPING_DVPP) list_del(&spg->mnode); if (atomic_dec_and_test(&spm->user)) sp_mapping_destroy(spm); + + spg->mapping[type] = NULL; }
/* merge old mapping to new, and the old mapping would be destroyed */ @@ -375,7 +377,7 @@ static void sp_mapping_merge(struct sp_mapping *new, struct sp_mapping *old)
list_for_each_entry_safe(spg, tmp, &old->group_head, mnode) { list_move_tail(&spg->mnode, &new->group_head); - spg->dvpp = new; + spg->mapping[SP_MAPPING_DVPP] = new; }
atomic_add(atomic_read(&old->user), &new->user); @@ -409,8 +411,10 @@ static bool can_mappings_merge(struct sp_mapping *m1, struct sp_mapping *m2) */ static int sp_mapping_group_setup(struct mm_struct *mm, struct sp_group *spg) { - struct sp_group_master *master = mm->sp_group_master; - struct sp_group *local = master->local; + struct sp_mapping *local_dvpp_mapping, *spg_dvpp_mapping; + + local_dvpp_mapping = mm->sp_group_master->local->mapping[SP_MAPPING_DVPP]; + spg_dvpp_mapping = spg->mapping[SP_MAPPING_DVPP];
if (!list_empty(&spg->procs) && !(spg->flag & SPG_FLAG_NON_DVPP)) { /* @@ -419,15 +423,15 @@ static int sp_mapping_group_setup(struct mm_struct *mm, struct sp_group *spg) * This may change the address range for the task or group implicitly, * give a warn for it. */ - bool is_conflict = !can_mappings_merge(local->dvpp, spg->dvpp); + bool is_conflict = !can_mappings_merge(local_dvpp_mapping, spg_dvpp_mapping);
- if (is_mapping_empty(local->dvpp)) { - sp_mapping_merge(spg->dvpp, local->dvpp); + if (is_mapping_empty(local_dvpp_mapping)) { + sp_mapping_merge(spg_dvpp_mapping, local_dvpp_mapping); if (is_conflict) pr_warn_ratelimited("task address space conflict, spg_id=%d\n", spg->id); - } else if (is_mapping_empty(spg->dvpp)) { - sp_mapping_merge(local->dvpp, spg->dvpp); + } else if (is_mapping_empty(spg_dvpp_mapping)) { + sp_mapping_merge(local_dvpp_mapping, spg_dvpp_mapping); if (is_conflict) pr_warn_ratelimited("group address space conflict, spg_id=%d\n", spg->id); @@ -438,8 +442,8 @@ static int sp_mapping_group_setup(struct mm_struct *mm, struct sp_group *spg) } else { if (!(spg->flag & SPG_FLAG_NON_DVPP)) /* the mapping of local group is always set */ - sp_mapping_attach(spg, local->dvpp); - if (!spg->normal) + sp_mapping_attach(spg, local_dvpp_mapping); + if (!spg->mapping[SP_MAPPING_NORMAL]) sp_mapping_attach(spg, sp_mapping_normal); }
@@ -914,14 +918,19 @@ static void free_new_spg_id(bool new, int spg_id)
static void free_sp_group_locked(struct sp_group *spg) { + int type; + fput(spg->file); fput(spg->file_hugetlb); idr_remove(&sp_group_idr, spg->id); free_sp_group_id((unsigned int)spg->id); - sp_mapping_detach(spg, spg->dvpp); - sp_mapping_detach(spg, spg->normal); + + for (type = SP_MAPPING_START; type < SP_MAPPING_END; type++) + sp_mapping_detach(spg, spg->mapping[type]); + if (!is_local_group(spg->id)) system_group_count--; + kfree(spg); WARN(system_group_count < 0, "unexpected group count\n"); } @@ -1746,9 +1755,9 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, }
if (flags & SP_DVPP) - mapping = spg->dvpp; + mapping = spg->mapping[SP_MAPPING_DVPP]; else - mapping = spg->normal; + mapping = spg->mapping[SP_MAPPING_NORMAL];
if (!mapping) { pr_err_ratelimited("non DVPP spg, id %d\n", spg->id); @@ -1877,9 +1886,9 @@ static struct sp_area *__find_sp_area_locked(struct sp_group *spg, struct rb_node *n;
if (addr >= MMAP_SHARE_POOL_NORMAL_START && addr < MMAP_SHARE_POOL_NORMAL_END) - n = spg->normal->area_root.rb_node; + n = spg->mapping[SP_MAPPING_NORMAL]->area_root.rb_node; else - n = spg->dvpp->area_root.rb_node; + n = spg->mapping[SP_MAPPING_DVPP]->area_root.rb_node;
while (n) { struct sp_area *spa; @@ -1933,9 +1942,9 @@ static void sp_free_area(struct sp_area *spa) lockdep_assert_held(&sp_area_lock);
if (addr >= MMAP_SHARE_POOL_NORMAL_START && addr < MMAP_SHARE_POOL_NORMAL_END) - spm = spa->spg->normal; + spm = spa->spg->mapping[SP_MAPPING_NORMAL]; else - spm = spa->spg->dvpp; + spm = spa->spg->mapping[SP_MAPPING_DVPP];
if (spm->free_area_cache) { struct sp_area *cache; @@ -3616,7 +3625,7 @@ bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) if (IS_ERR(spg)) goto put_mm;
- spm = spg->dvpp; + spm = spg->mapping[SP_MAPPING_DVPP]; default_start = MMAP_SHARE_POOL_DVPP_START + device_id * MMAP_SHARE_POOL_16G_SIZE; /* The dvpp range of each group can be configured only once */ if (spm->start[device_id] != default_start)
From: Chen Jun chenjun102@huawei.com
Offering: HULK hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5I72Q
--------------------------------
Extract code logic of obtaining sp_mapping by address into a function sp_mapping_find.
Signed-off-by: Chen Jun chenjun102@huawei.com --- mm/share_pool.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 3c970b090552..031956fbd844 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -450,6 +450,15 @@ static int sp_mapping_group_setup(struct mm_struct *mm, struct sp_group *spg) return 0; }
+static inline struct sp_mapping *sp_mapping_find(struct sp_group *spg, + unsigned long addr) +{ + if (addr >= MMAP_SHARE_POOL_NORMAL_START && addr < MMAP_SHARE_POOL_NORMAL_END) + return spg->mapping[SP_MAPPING_NORMAL]; + + return spg->mapping[SP_MAPPING_DVPP]; +} + static struct sp_group *create_spg(int spg_id, unsigned long flag); static void free_new_spg_id(bool new, int spg_id); static void free_sp_group_locked(struct sp_group *spg); @@ -1883,13 +1892,8 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, static struct sp_area *__find_sp_area_locked(struct sp_group *spg, unsigned long addr) { - struct rb_node *n; - - if (addr >= MMAP_SHARE_POOL_NORMAL_START && addr < MMAP_SHARE_POOL_NORMAL_END) - n = spg->mapping[SP_MAPPING_NORMAL]->area_root.rb_node; - else - n = spg->mapping[SP_MAPPING_DVPP]->area_root.rb_node; - + struct sp_mapping *spm = sp_mapping_find(spg, addr); + struct rb_node *n = spm->area_root.rb_node; while (n) { struct sp_area *spa;
@@ -1941,11 +1945,7 @@ static void sp_free_area(struct sp_area *spa)
lockdep_assert_held(&sp_area_lock);
- if (addr >= MMAP_SHARE_POOL_NORMAL_START && addr < MMAP_SHARE_POOL_NORMAL_END) - spm = spa->spg->mapping[SP_MAPPING_NORMAL]; - else - spm = spa->spg->mapping[SP_MAPPING_DVPP]; - + spm = sp_mapping_find(spa->spg, addr); if (spm->free_area_cache) { struct sp_area *cache;
From: Chen Jun chenjun102@huawei.com
Offering: HULK hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5I72Q
--------------------------------
1. Split sharepool normal area(8T) to sharepool readonly area(64G) and sharepool normal area(8T - 64G) 2. User programs can not write to the address in sharepool readonly area. 3. Add SP_PROT_FOCUS for sp_alloc. 4. sp_alloc with SP_PROT_RO | SP_PROT_FOCUS returns the virtual address within sharepool readonly area. 5. Other user programs which add into task with write prot can not write the address in sharepool readonly area.
Signed-off-by: Chen Jun chenjun102@huawei.com --- include/linux/share_pool.h | 36 +++++++++++++++++++++----------- mm/share_pool.c | 42 +++++++++++++++++++++++++++++++++++--- 2 files changed, 63 insertions(+), 15 deletions(-)
diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index b5fa0d4d59e0..1432aaa08087 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -17,6 +17,11 @@ #define SP_DVPP (1 << 2) #define SP_SPEC_NODE_ID (1 << 3) #define SP_PROT_RO (1 << 16) +/* + * SP_PROT_FOCUS should used with SP_PROT_RO, + * to alloc a memory within sharepool ro memory. + */ +#define SP_PROT_FOCUS (1 << 17)
#define DEVICE_ID_BITS 4UL #define DEVICE_ID_MASK ((1UL << DEVICE_ID_BITS) - 1UL) @@ -26,7 +31,7 @@ #define NODE_ID_SHIFT (DEVICE_ID_SHIFT + DEVICE_ID_BITS)
#define SP_FLAG_MASK (SP_HUGEPAGE | SP_HUGEPAGE_ONLY | SP_DVPP | \ - SP_SPEC_NODE_ID | SP_PROT_RO | \ + SP_SPEC_NODE_ID | SP_PROT_RO | SP_PROT_FOCUS | \ (DEVICE_ID_MASK << DEVICE_ID_SHIFT) | \ (NODE_ID_MASK << NODE_ID_SHIFT))
@@ -113,19 +118,22 @@ struct sp_mapping { /* Processes in the same sp_group can share memory. * Memory layout for share pool: * - * |-------------------- 8T -------------------|---|------ 8T ------------| - * | Device 0 | Device 1 |...| | - * |----------------------------------------------------------------------| - * |------------- 16G -------------| 16G | | | - * | DVPP GROUP0 | DVPP GROUP1 | ... | ... |...| sp normal memory | - * | sp | sp | | | | | - * |----------------------------------------------------------------------| + * |-------------------- 8T -------------------|---|---64G---|----- 8T-64G -----| + * | Device 0 | Device 1 |...| | | + * |-----------------------------------------------|---------|------------------| + * |------------- 16G -------------| 16G | | | | + * | DVPP GROUP0 | DVPP GROUP1 | ... | ... |...| sp ro | sp normal memory | + * | sp | sp | | | | | | + * |----------------------------------------------------------------------------| * * The host SVM feature reserves 8T virtual memory by mmap, and due to the * restriction of DVPP, while SVM and share pool will both allocate memory * for DVPP, the memory have to be in the same 32G range. * - * Share pool reserves 16T memory, with 8T for normal uses and 8T for DVPP. + * Share pool reserves 16T memory, 8T-64G for normal uses, 64G for ro memory + * and 8T for DVPP. + * Within this 64G ro memory, user application will never have write permission + * to this memory address. * Within this 8T DVPP memory, SVM will call sp_config_dvpp_range() to * tell us which 16G memory range is reserved for share pool . * @@ -207,8 +215,10 @@ struct sp_walk_data {
#define MMAP_TOP_4G_SIZE 0x100000000UL
-/* 8T size */ -#define MMAP_SHARE_POOL_NORMAL_SIZE 0x80000000000UL +/* 8T - 64G size */ +#define MMAP_SHARE_POOL_NORMAL_SIZE 0x7F000000000UL +/* 64G */ +#define MMAP_SHARE_POOL_RO_SIZE 0x1000000000UL /* 8T size*/ #define MMAP_SHARE_POOL_DVPP_SIZE 0x80000000000UL /* 16G size */ @@ -219,7 +229,9 @@ struct sp_walk_data { #define MMAP_SHARE_POLL_DVPP_END (MMAP_SHARE_POOL_END) /* MMAP_SHARE_POOL_DVPP_START should be align to 16G */ #define MMAP_SHARE_POOL_DVPP_START (MMAP_SHARE_POLL_DVPP_END - MMAP_SHARE_POOL_DVPP_SIZE) -#define MMAP_SHARE_POOL_NORMAL_END (MMAP_SHARE_POOL_DVPP_START) +#define MMAP_SHARE_POOL_RO_END (MMAP_SHARE_POOL_DVPP_START) +#define MMAP_SHARE_POOL_RO_START (MMAP_SHARE_POOL_RO_END - MMAP_SHARE_POOL_RO_SIZE) +#define MMAP_SHARE_POOL_NORMAL_END (MMAP_SHARE_POOL_RO_START) #define MMAP_SHARE_POOL_NORMAL_START (MMAP_SHARE_POOL_NORMAL_END - MMAP_SHARE_POOL_NORMAL_SIZE) #define MMAP_SHARE_POOL_START (MMAP_SHARE_POOL_NORMAL_START)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 031956fbd844..7b78a75f1bf7 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -151,6 +151,7 @@ enum sp_mapping_type { SP_MAPPING_START, SP_MAPPING_DVPP = SP_MAPPING_START, SP_MAPPING_NORMAL, + SP_MAPPING_RO, SP_MAPPING_END, };
@@ -278,6 +279,7 @@ static void sp_mapping_set_type(struct sp_mapping *spm, unsigned long type) }
static struct sp_mapping *sp_mapping_normal; +static struct sp_mapping *sp_mapping_ro;
static void sp_mapping_add_to_list(struct sp_mapping *spm) { @@ -301,6 +303,10 @@ static void sp_mapping_range_init(struct sp_mapping *spm)
for (i = 0; i < MAX_DEVID; i++) { switch (sp_mapping_type(spm)) { + case SP_MAPPING_RO: + spm->start[i] = MMAP_SHARE_POOL_RO_START; + spm->end[i] = MMAP_SHARE_POOL_RO_END; + break; case SP_MAPPING_NORMAL: spm->start[i] = MMAP_SHARE_POOL_NORMAL_START; spm->end[i] = MMAP_SHARE_POOL_NORMAL_END; @@ -445,6 +451,8 @@ static int sp_mapping_group_setup(struct mm_struct *mm, struct sp_group *spg) sp_mapping_attach(spg, local_dvpp_mapping); if (!spg->mapping[SP_MAPPING_NORMAL]) sp_mapping_attach(spg, sp_mapping_normal); + if (!spg->mapping[SP_MAPPING_RO]) + sp_mapping_attach(spg, sp_mapping_ro); }
return 0; @@ -456,6 +464,9 @@ static inline struct sp_mapping *sp_mapping_find(struct sp_group *spg, if (addr >= MMAP_SHARE_POOL_NORMAL_START && addr < MMAP_SHARE_POOL_NORMAL_END) return spg->mapping[SP_MAPPING_NORMAL];
+ if (addr >= MMAP_SHARE_POOL_RO_START && addr < MMAP_SHARE_POOL_RO_END) + return spg->mapping[SP_MAPPING_RO]; + return spg->mapping[SP_MAPPING_DVPP]; }
@@ -491,6 +502,7 @@ static int init_local_group(struct mm_struct *mm) } sp_mapping_attach(master->local, spm); sp_mapping_attach(master->local, sp_mapping_normal); + sp_mapping_attach(master->local, sp_mapping_ro);
ret = local_group_add_task(mm, spg); if (ret < 0) @@ -1485,6 +1497,10 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) unsigned long populate = 0; struct file *file = spa_file(spa); unsigned long addr; + unsigned long __prot = prot; + + if ((spa->flags & (SP_PROT_RO | SP_PROT_FOCUS)) == (SP_PROT_RO | SP_PROT_FOCUS)) + __prot &= ~PROT_WRITE;
__sp_area_drop_locked(prev); prev = spa; @@ -1497,7 +1513,7 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) spin_unlock(&sp_area_lock);
if (spa->type == SPA_TYPE_K2SPG && spa->kva) { - addr = sp_remap_kva_to_vma(spa->kva, spa, mm, prot, NULL); + addr = sp_remap_kva_to_vma(spa->kva, spa, mm, __prot, NULL); if (IS_ERR_VALUE(addr)) pr_warn("add group remap k2u failed %ld\n", addr);
@@ -1515,7 +1531,7 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) break; }
- addr = sp_mmap(mm, file, spa, &populate, prot, NULL); + addr = sp_mmap(mm, file, spa, &populate, __prot, NULL); if (IS_ERR_VALUE(addr)) { sp_munmap_task_areas(mm, spg, &spa->link); up_write(&mm->mmap_lock); @@ -1763,7 +1779,13 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, return ERR_PTR(-EINVAL); }
- if (flags & SP_DVPP) + if (flags & SP_PROT_FOCUS) { + if ((flags & (SP_DVPP | SP_PROT_RO)) != SP_PROT_RO) { + pr_err("invalid sp_flags [%lx]\n", flags); + return ERR_PTR(-EINVAL); + } + mapping = spg->mapping[SP_MAPPING_RO]; + } else if (flags & SP_DVPP) mapping = spg->mapping[SP_MAPPING_DVPP]; else mapping = spg->mapping[SP_MAPPING_NORMAL]; @@ -3894,6 +3916,11 @@ static void spa_stat_of_mapping_show(struct seq_file *seq, struct sp_mapping *sp spin_unlock(&sp_area_lock); }
+static void spa_ro_stat_show(struct seq_file *seq) +{ + spa_stat_of_mapping_show(seq, sp_mapping_ro); +} + static void spa_normal_stat_show(struct seq_file *seq) { spa_stat_of_mapping_show(seq, sp_mapping_normal); @@ -4024,6 +4051,7 @@ static int spa_stat_show(struct seq_file *seq, void *offset) /* print the file header */ seq_printf(seq, "%-10s %-16s %-16s %-10s %-7s %-5s %-8s %-8s\n", "Group ID", "va_start", "va_end", "Size(KB)", "Type", "Huge", "PID", "Ref"); + spa_ro_stat_show(seq); spa_normal_stat_show(seq); spa_dvpp_stat_show(seq); return 0; @@ -4403,9 +4431,17 @@ static int __init share_pool_init(void) goto fail; atomic_inc(&sp_mapping_normal->user);
+ sp_mapping_ro = sp_mapping_create(SP_MAPPING_RO); + if (IS_ERR(sp_mapping_ro)) + goto free_normal; + atomic_inc(&sp_mapping_ro->user); + proc_sharepool_init();
return 0; + +free_normal: + kfree(sp_mapping_normal); fail: pr_err("Ascend share pool initialization failed\n"); static_branch_disable(&share_pool_enabled_key);
From: Zhou Guanghui zhouguanghui1@huawei.com
Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5PZDX
--------------------------------
We could determine if a userspace map is huge-mapped after walking its pagetable. So the uva_align should be calculated again after walking the pagetable if it is huge-mapped.
Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com --- mm/share_pool.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 7b78a75f1bf7..1c0b2a0a6823 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -3189,6 +3189,9 @@ static int __sp_walk_page_range(unsigned long uva, unsigned long size, sp_walk_data->pages = NULL; }
+ if (sp_walk_data->is_hugepage) + sp_walk_data->uva_aligned = ALIGN_DOWN(uva, PMD_SIZE); + return ret; }
From: Guo Mengqi guomengqi3@huawei.com
Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5QETC
--------------------------------
sp_make_share_k2u only supports vmalloc address now. Therefore, delete a backup handle case.
Also master is guaranteed not be freed until master->node_list is emptied.
Signed-off-by: Guo Mengqi guomengqi3@huawei.com --- mm/share_pool.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 1c0b2a0a6823..32678b3e2175 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -2658,12 +2658,11 @@ static int is_vmap_hugepage(unsigned long addr)
static unsigned long __sp_remap_get_pfn(unsigned long kva) { - unsigned long pfn; + unsigned long pfn = -EINVAL;
+ /* sp_make_share_k2u only support vmalloc address */ if (is_vmalloc_addr((void *)kva)) pfn = vmalloc_to_pfn((void *)kva); - else - pfn = virt_to_pfn(kva);
return pfn; } @@ -4074,11 +4073,6 @@ static int proc_usage_by_group(int id, void *p, void *data) list_for_each_entry(spg_node, &spg->procs, proc_node) {
master = spg_node->master; - if (!master) { - pr_info("master is NULL! process %d, group %d\n", - spg_node->instat.tgid, id); - continue; - } mm = master->mm; tgid = master->instat.tgid;
From: Guo Mengqi guomengqi3@huawei.com
Offering: HULK hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5R0X9
--------------------------------
Fix a AA deadlock caused by nested lock in mg_sp_group_add_task().
Deadlock path:
mg_sp_group_add_task()
down_write(sp_group_sem) find_or_alloc_sp_group() !spg_valid() sp_group_drop() free_sp_group() -> down_write(sp_group_sem) ---> AA deadlock
Signed-off-by: Guo Mengqi guomengqi3@huawei.com --- mm/share_pool.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 32678b3e2175..55c3eb6ff9b0 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -963,6 +963,14 @@ static void free_sp_group(struct sp_group *spg) up_write(&sp_group_sem); }
+static void sp_group_drop_locked(struct sp_group *spg) +{ + lockdep_assert_held_write(&sp_group_sem); + + if (atomic_dec_and_test(&spg->use_count)) + free_sp_group_locked(spg); +} + static void sp_group_drop(struct sp_group *spg) { if (atomic_dec_and_test(&spg->use_count)) @@ -1201,7 +1209,7 @@ static struct sp_group *find_or_alloc_sp_group(int spg_id, unsigned long flag) down_read(&spg->rw_lock); if (!spg_valid(spg)) { up_read(&spg->rw_lock); - sp_group_drop(spg); + sp_group_drop_locked(spg); return ERR_PTR(-ENODEV); } up_read(&spg->rw_lock);
From: Guo Mengqi guomengqi3@huawei.com
Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5QQPG
--------------------------------
Add a size-0-check in mg_sp_make_share_k2u() to avoid passing 0-size spa to __insert_sp_area().
Signed-off-by: Guo Mengqi guomengqi3@huawei.com --- mm/share_pool.c | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 55c3eb6ff9b0..f7258b27f056 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -2871,6 +2871,11 @@ static int sp_k2u_prepare(unsigned long kva, unsigned long size, unsigned int page_size = PAGE_SIZE; unsigned long kva_aligned, size_aligned;
+ if (!size) { + pr_err_ratelimited("k2u input size is 0.\n"); + return -EINVAL; + } + if (sp_flags & ~SP_FLAG_MASK) { pr_err_ratelimited("k2u sp_flags %lx error\n", sp_flags); return -EINVAL;
From: Guo Mengqi guomengqi3@huawei.com
Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5RO2H
--------------------------------
When nr_hugepages is configured, sharepool allocates hugepages first from hugetlb pool, then from buddy system if the pool had been used up. Current page release function treat the buddy system hugepages as hugetlb pages, which caused HugePages_Rsvd to increase improperly.
Add a check in page release function: if the page is temporary, do not call hugetlb_unreserve_pages.
Signed-off-by: Guo Mengqi guomengqi3@huawei.com --- fs/hugetlbfs/inode.c | 19 +++++++++++++------ mm/share_pool.c | 3 +-- 2 files changed, 14 insertions(+), 8 deletions(-)
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 6f2943465bff..d5b9b267005d 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -596,11 +596,17 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, */ VM_BUG_ON(HPageRestoreReserve(page)); remove_huge_page(page); - freed++; - if (!truncate_op) { - if (unlikely(hugetlb_unreserve_pages(inode, - index, index + 1, 1))) - hugetlb_fix_reserve_counts(inode); + /* + * if the page is from buddy system, do not add to freed. + * because freed is used for hugetlbfs reservation accounting. + */ + if (!HPageTemporary(page)) { + freed++; + if (!truncate_op) { + if (unlikely(hugetlb_unreserve_pages(inode, + index, index + 1, 1))) + hugetlb_fix_reserve_counts(inode); + } }
unlock_page(page); @@ -1053,7 +1059,8 @@ static int hugetlbfs_error_remove_page(struct address_space *mapping, pgoff_t index = page->index;
remove_huge_page(page); - if (unlikely(hugetlb_unreserve_pages(inode, index, index + 1, 1))) + if (!HPageTemporary(page) && + unlikely(hugetlb_unreserve_pages(inode, index, index + 1, 1))) hugetlb_fix_reserve_counts(inode);
return 0; diff --git a/mm/share_pool.c b/mm/share_pool.c index f7258b27f056..8021dc47c1f1 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -4225,8 +4225,7 @@ vm_fault_t sharepool_no_page(struct mm_struct *mm,
page = alloc_huge_page(vma, haddr, 0); if (IS_ERR(page)) { - page = alloc_huge_page_nodemask(hstate_file(vma->vm_file), - node_id, NULL, GFP_KERNEL); + page = hugetlb_alloc_hugepage(node_id, HUGETLB_ALLOC_BUDDY); if (!page) page = ERR_PTR(-ENOMEM); }
From: Wang Wensheng wangwensheng4@huawei.com
When we allocate memory using SP_HUGEPAGE, we would try normal pages when there was no enough hugepages. The specified numa node information would get lost when we fallback to normal pages. The result is that we could allocate memory from other numa node than what we have specified.
The soultion is to rebind the node before retrying.
Signed-off-by: Wang Wensheng wangwensheng4@huawei.com --- mm/share_pool.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 8021dc47c1f1..bfed3ab4fe7f 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -2622,8 +2622,15 @@ void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) }
ret = sp_alloc_mmap_populate(spa, &ac); - if (ret && ac.state == ALLOC_RETRY) + if (ret && ac.state == ALLOC_RETRY) { + /* + * The mempolicy for shared memory is located at backend file, which varies + * between normal pages and huge pages. So we should set the mbind policy again + * when we retry using normal pages. + */ + ac.have_mbind = false; goto try_again; + }
out: sp_alloc_finish(ret, spa, &ac);
From: Guo Mengqi guomengqi3@huawei.com
If current->flag is set as PF_MEMALLOC, memcgroup will not check current's allocation against memory use limit, which cause system run out of memory.
According to https://lkml.indiana.edu/hypermail/linux/kernel/0911.2/00576.html, PF_MEMALLOC shall only be used when more memory are sure to be freed as a result of this allocation.
Do not use PF_MEMALLOC, rather, remove __GFP_RECLAIM from gfp_mask to ensure no reclaim.
Signed-off-by: Guo Mengqi guomengqi3@huawei.com --- include/linux/hugetlb.h | 6 ++++-- mm/hugetlb.c | 3 +++ mm/share_pool.c | 27 +++------------------------ 3 files changed, 10 insertions(+), 26 deletions(-)
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 634630ebc8a7..2537c1269a5b 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -621,9 +621,11 @@ int huge_add_to_page_cache(struct page *page, struct address_space *mapping, #define HUGETLB_ALLOC_NONE 0x00 #define HUGETLB_ALLOC_NORMAL 0x01 /* normal hugepage */ #define HUGETLB_ALLOC_BUDDY 0x02 /* buddy hugepage */ -#define HUGETLB_ALLOC_MASK (HUGETLB_ALLOC_NONE | \ +#define HUGETLB_ALLOC_NORECLAIM 0x04 /* no reclaim */ +#define HUGETLB_ALLOC_MASK (HUGETLB_ALLOC_NONE | \ HUGETLB_ALLOC_NORMAL | \ - HUGETLB_ALLOC_BUDDY) + HUGETLB_ALLOC_BUDDY | \ + HUGETLB_ALLOC_NORECLAIM)
const struct hstate *hugetlb_get_hstate(void); struct page *hugetlb_alloc_hugepage(int nid, int flag); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 34f3dfba5e82..acdc56e593af 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -6176,6 +6176,9 @@ struct page *hugetlb_alloc_hugepage(int nid, int flag) if (enable_charge_mighp) gfp_mask |= __GFP_ACCOUNT;
+ if (flag & HUGETLB_ALLOC_NORECLAIM) + gfp_mask &= ~__GFP_RECLAIM; + if (flag & HUGETLB_ALLOC_NORMAL) page = hugetlb_alloc_hugepage_normal(h, gfp_mask, nid); else if (flag & HUGETLB_ALLOC_BUDDY) diff --git a/mm/share_pool.c b/mm/share_pool.c index bfed3ab4fe7f..ba3e32da6c0d 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -2451,35 +2451,13 @@ static void sp_alloc_fallback(struct sp_area *spa, struct sp_alloc_context *ac) static int sp_alloc_populate(struct mm_struct *mm, struct sp_area *spa, struct sp_alloc_context *ac) { - int ret = 0; - unsigned long sp_addr = spa->va_start; - unsigned int noreclaim_flag = 0; - - /* - * The direct reclaim and compact may take a long - * time. As a result, sp mutex will be hold for too - * long time to casue the hung task problem. In this - * case, set the PF_MEMALLOC flag to prevent the - * direct reclaim and compact from being executed. - * Since direct reclaim and compact are not performed - * when the fragmentation is severe or the memory is - * insufficient, 2MB continuous physical pages fail - * to be allocated. This situation is allowed. - */ - if (spa->is_hugepage) - noreclaim_flag = memalloc_noreclaim_save(); - /* * We are not ignoring errors, so if we fail to allocate * physical memory we just return failure, so we won't encounter * page fault later on, and more importantly sp_make_share_u2k() * depends on this feature (and MAP_LOCKED) to work correctly. */ - ret = do_mm_populate(mm, sp_addr, ac->populate, 0); - if (spa->is_hugepage) - memalloc_noreclaim_restore(noreclaim_flag); - - return ret; + return do_mm_populate(mm, spa->va_start, ac->populate, 0); }
static long sp_mbind(struct mm_struct *mm, unsigned long start, unsigned long len, @@ -4232,7 +4210,8 @@ vm_fault_t sharepool_no_page(struct mm_struct *mm,
page = alloc_huge_page(vma, haddr, 0); if (IS_ERR(page)) { - page = hugetlb_alloc_hugepage(node_id, HUGETLB_ALLOC_BUDDY); + page = hugetlb_alloc_hugepage(node_id, + HUGETLB_ALLOC_BUDDY | HUGETLB_ALLOC_NORECLAIM); if (!page) page = ERR_PTR(-ENOMEM); }
From: Wang Wensheng wangwensheng4@huawei.com
This reverts commit 730bfcab402f846d64906d3d07fcbac9e1a9239e. --- include/linux/share_pool.h | 90 ++++++++++++++++++++++- mm/share_pool.c | 144 ++++++++++++++++++++++++++++++++----- 2 files changed, 214 insertions(+), 20 deletions(-)
diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index 1432aaa08087..406b59aa76ec 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -250,31 +250,53 @@ static inline void sp_init_mm(struct mm_struct *mm) * Those interfaces are exported for modules */ extern int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id); +extern int sp_group_add_task(int pid, int spg_id); + extern int mg_sp_group_del_task(int pid, int spg_id); +extern int sp_group_del_task(int pid, int spg_id); + extern int mg_sp_group_id_by_pid(int pid, int *spg_ids, int *num); +extern int sp_group_id_by_pid(int pid); + +extern int sp_group_walk(int spg_id, void *data, int (*func)(struct mm_struct *mm, void *)); extern int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task);
+extern void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id); extern void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id); + +extern int sp_free(unsigned long addr, int id); extern int mg_sp_free(unsigned long addr, int id);
+extern void *sp_make_share_k2u(unsigned long kva, unsigned long size, + unsigned long sp_flags, int pid, int spg_id); extern void *mg_sp_make_share_k2u(unsigned long kva, unsigned long size, unsigned long sp_flags, int pid, int spg_id); + +extern void *sp_make_share_u2k(unsigned long uva, unsigned long size, int pid); extern void *mg_sp_make_share_u2k(unsigned long uva, unsigned long size, int pid); -extern int mg_sp_unshare(unsigned long va, unsigned long size, int spg_id);
+extern int sp_unshare(unsigned long va, unsigned long size, int pid, int spg_id); +extern int mg_sp_unshare(unsigned long va, unsigned long size, int id); + +extern int sp_walk_page_range(unsigned long uva, unsigned long size, + struct task_struct *tsk, struct sp_walk_data *sp_walk_data); extern int mg_sp_walk_page_range(unsigned long uva, unsigned long size, struct task_struct *tsk, struct sp_walk_data *sp_walk_data);
+extern void sp_walk_page_free(struct sp_walk_data *sp_walk_data); extern void mg_sp_walk_page_free(struct sp_walk_data *sp_walk_data);
extern int sp_register_notifier(struct notifier_block *nb); extern int sp_unregister_notifier(struct notifier_block *nb);
+extern bool sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid); extern bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid);
+extern bool is_sharepool_addr(unsigned long addr); extern bool mg_is_sharepool_addr(unsigned long addr);
+extern int sp_id_of_current(void); extern int mg_sp_id_of_current(void);
extern void sp_area_drop(struct vm_area_struct *vma); @@ -326,11 +348,21 @@ static inline int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) return -EPERM; }
+static inline int sp_group_add_task(int pid, int spg_id) +{ + return -EPERM; +} + static inline int mg_sp_group_del_task(int pid, int spg_id) { return -EPERM; }
+static inline int sp_group_del_task(int pid, int spg_id) +{ + return -EPERM; +} + static inline int sp_group_exit(struct mm_struct *mm) { return 0; @@ -345,38 +377,74 @@ static inline int mg_sp_group_id_by_pid(int pid, int *spg_ids, int *num) return -EPERM; }
+static inline int sp_group_id_by_pid(int pid) +{ + return -EPERM; +} + static inline int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { return -EPERM; }
+static inline void *sp_alloc(unsigned long size, unsigned long sp_flags, int sp_id) +{ + return NULL; +} + static inline void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) { return NULL; }
+static inline int sp_free(unsigned long addr, int id) +{ + return -EPERM; +} + static inline int mg_sp_free(unsigned long addr, int id) { return -EPERM; }
+static inline void *sp_make_share_k2u(unsigned long kva, unsigned long size, + unsigned long sp_flags, int pid, int spg_id) +{ + return NULL; +} + static inline void *mg_sp_make_share_k2u(unsigned long kva, unsigned long size, unsigned long sp_flags, int pid, int spg_id) { return NULL; }
+static inline void *sp_make_share_u2k(unsigned long uva, unsigned long size, int pid) +{ + return NULL; +} + static inline void *mg_sp_make_share_u2k(unsigned long uva, unsigned long size, int pid) { return NULL; }
+static inline int sp_unshare(unsigned long va, unsigned long size, int pid, int spg_id) +{ + return -EPERM; +} + static inline int mg_sp_unshare(unsigned long va, unsigned long size, int id) { return -EPERM; }
+static inline int sp_id_of_current(void) +{ + return -EPERM; +} + static inline int mg_sp_id_of_current(void) { return -EPERM; @@ -390,12 +458,22 @@ static inline void sp_area_drop(struct vm_area_struct *vma) { }
+static inline int sp_walk_page_range(unsigned long uva, unsigned long size, + struct task_struct *tsk, struct sp_walk_data *sp_walk_data) +{ + return 0; +} + static inline int mg_sp_walk_page_range(unsigned long uva, unsigned long size, struct task_struct *tsk, struct sp_walk_data *sp_walk_data) { return 0; }
+static inline void sp_walk_page_free(struct sp_walk_data *sp_walk_data) +{ +} + static inline void mg_sp_walk_page_free(struct sp_walk_data *sp_walk_data) { } @@ -410,11 +488,21 @@ static inline int sp_unregister_notifier(struct notifier_block *nb) return -EPERM; }
+static inline bool sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) +{ + return false; +} + static inline bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) { return false; }
+static inline bool is_sharepool_addr(unsigned long addr) +{ + return false; +} + static inline bool mg_is_sharepool_addr(unsigned long addr) { return false; diff --git a/mm/share_pool.c b/mm/share_pool.c index ba3e32da6c0d..e787ececc355 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -1052,6 +1052,38 @@ static struct sp_group *__sp_find_spg(int pid, int spg_id) return spg; }
+/** + * sp_group_id_by_pid() - Get the sp_group ID of a process. + * @pid: pid of target process. + * + * Return: + * 0 the sp_group ID. + * -ENODEV target process doesn't belong to any sp_group. + */ +int sp_group_id_by_pid(int pid) +{ + struct sp_group *spg; + int spg_id = -ENODEV; + + if (!sp_is_enabled()) + return -EOPNOTSUPP; + + check_interrupt_context(); + + spg = __sp_find_spg(pid, SPG_ID_DEFAULT); + if (!spg) + return -ENODEV; + + down_read(&spg->rw_lock); + if (spg_valid(spg)) + spg_id = spg->id; + up_read(&spg->rw_lock); + + sp_group_drop(spg); + return spg_id; +} +EXPORT_SYMBOL_GPL(sp_group_id_by_pid); + /** * mp_sp_group_id_by_pid() - Get the sp_group ID array of a process. * @pid: pid of target process. @@ -1600,6 +1632,12 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) } EXPORT_SYMBOL_GPL(mg_sp_group_add_task);
+int sp_group_add_task(int pid, int spg_id) +{ + return mg_sp_group_add_task(pid, PROT_READ | PROT_WRITE, spg_id); +} +EXPORT_SYMBOL_GPL(sp_group_add_task); + /** * mg_sp_group_del_task() - delete a process from a sp group. * @pid: the pid of the task to be deleted @@ -1699,7 +1737,13 @@ int mg_sp_group_del_task(int pid, int spg_id) } EXPORT_SYMBOL_GPL(mg_sp_group_del_task);
-int mg_sp_id_of_current(void) +int sp_group_del_task(int pid, int spg_id) +{ + return mg_sp_group_del_task(pid, spg_id); +} +EXPORT_SYMBOL_GPL(sp_group_del_task); + +int sp_id_of_current(void) { int ret, spg_id; struct sp_group_master *master; @@ -1731,6 +1775,12 @@ int mg_sp_id_of_current(void)
return spg_id; } +EXPORT_SYMBOL_GPL(sp_id_of_current); + +int mg_sp_id_of_current(void) +{ + return sp_id_of_current(); +} EXPORT_SYMBOL_GPL(mg_sp_id_of_current);
/* the caller must hold sp_area_lock */ @@ -2199,7 +2249,7 @@ static int sp_free_get_spa(struct sp_free_context *fc) }
/** - * mg_sp_free() - Free the memory allocated by mg_sp_alloc(). + * sp_free() - Free the memory allocated by sp_alloc(). * @addr: the starting VA of the memory. * @id: Address space identifier, which is used to distinguish the addr. * @@ -2208,7 +2258,7 @@ static int sp_free_get_spa(struct sp_free_context *fc) * * -EINVAL - the memory can't be found or was not allocted by share pool. * * -EPERM - the caller has no permision to free the memory. */ -int mg_sp_free(unsigned long addr, int id) +int sp_free(unsigned long addr, int id) { int ret = 0; struct sp_free_context fc = { @@ -2239,6 +2289,12 @@ int mg_sp_free(unsigned long addr, int id) out: return ret; } +EXPORT_SYMBOL_GPL(sp_free); + +int mg_sp_free(unsigned long addr, int id) +{ + return sp_free(addr, id); +} EXPORT_SYMBOL_GPL(mg_sp_free);
/* wrapper of __do_mmap() and the caller must hold down_write(&mm->mmap_lock). */ @@ -2565,7 +2621,7 @@ static void sp_alloc_finish(int result, struct sp_area *spa, }
/** - * mg_sp_alloc() - Allocate shared memory for all the processes in a sp_group. + * sp_alloc() - Allocate shared memory for all the processes in a sp_group. * @size: the size of memory to allocate. * @sp_flags: how to allocate the memory. * @spg_id: the share group that the memory is allocated to. @@ -2576,7 +2632,7 @@ static void sp_alloc_finish(int result, struct sp_area *spa, * * if succeed, return the starting address of the shared memory. * * if fail, return the pointer of -errno. */ -void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) +void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) { struct sp_area *spa = NULL; int ret = 0; @@ -2617,6 +2673,12 @@ void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) else return (void *)(spa->va_start); } +EXPORT_SYMBOL_GPL(sp_alloc); + +void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) +{ + return sp_alloc(size, sp_flags, spg_id); +} EXPORT_SYMBOL_GPL(mg_sp_alloc);
/** @@ -2917,7 +2979,7 @@ static void *sp_k2u_finish(void *uva, struct sp_k2u_context *kc) }
/** - * mg_sp_make_share_k2u() - Share kernel memory to current process or an sp_group. + * sp_make_share_k2u() - Share kernel memory to current process or an sp_group. * @kva: the VA of shared kernel memory. * @size: the size of shared kernel memory. * @sp_flags: how to allocate the memory. We only support SP_DVPP. @@ -2933,7 +2995,7 @@ static void *sp_k2u_finish(void *uva, struct sp_k2u_context *kc) * * if succeed, return the shared user address to start at. * * if fail, return the pointer of -errno. */ -void *mg_sp_make_share_k2u(unsigned long kva, unsigned long size, +void *sp_make_share_k2u(unsigned long kva, unsigned long size, unsigned long sp_flags, int pid, int spg_id) { void *uva; @@ -2971,6 +3033,13 @@ void *mg_sp_make_share_k2u(unsigned long kva, unsigned long size, out: return sp_k2u_finish(uva, &kc); } +EXPORT_SYMBOL_GPL(sp_make_share_k2u); + +void *mg_sp_make_share_k2u(unsigned long kva, unsigned long size, + unsigned long sp_flags, int pid, int spg_id) +{ + return sp_make_share_k2u(kva, size, sp_flags, pid, spg_id); +} EXPORT_SYMBOL_GPL(mg_sp_make_share_k2u);
static int sp_pmd_entry(pmd_t *pmd, unsigned long addr, @@ -3209,7 +3278,7 @@ static void __sp_walk_page_free(struct sp_walk_data *data) }
/** - * mg_sp_make_share_u2k() - Share user memory of a specified process to kernel. + * sp_make_share_u2k() - Share user memory of a specified process to kernel. * @uva: the VA of shared user memory * @size: the size of shared user memory * @pid: the pid of the specified process(Not currently in use) @@ -3218,7 +3287,7 @@ static void __sp_walk_page_free(struct sp_walk_data *data) * * if success, return the starting kernel address of the shared memory. * * if failed, return the pointer of -errno. */ -void *mg_sp_make_share_u2k(unsigned long uva, unsigned long size, int pid) +void *sp_make_share_u2k(unsigned long uva, unsigned long size, int pid) { int ret = 0; struct mm_struct *mm = current->mm; @@ -3277,6 +3346,12 @@ void *mg_sp_make_share_u2k(unsigned long uva, unsigned long size, int pid) kvfree(sp_walk_data.pages); return p; } +EXPORT_SYMBOL_GPL(sp_make_share_u2k); + +void *mg_sp_make_share_u2k(unsigned long uva, unsigned long size, int pid) +{ + return sp_make_share_u2k(uva, size, pid); +} EXPORT_SYMBOL_GPL(mg_sp_make_share_u2k);
/* @@ -3483,7 +3558,7 @@ static int sp_unshare_kva(unsigned long kva, unsigned long size) }
/** - * mg_sp_unshare() - Unshare the kernel or user memory which shared by calling + * sp_unshare() - Unshare the kernel or user memory which shared by calling * sp_make_share_{k2u,u2k}(). * @va: the specified virtual address of memory * @size: the size of unshared memory @@ -3492,7 +3567,7 @@ static int sp_unshare_kva(unsigned long kva, unsigned long size) * * Return: 0 for success, -errno on failure. */ -int mg_sp_unshare(unsigned long va, unsigned long size, int spg_id) +int sp_unshare(unsigned long va, unsigned long size, int pid, int spg_id) { int ret = 0;
@@ -3518,10 +3593,16 @@ int mg_sp_unshare(unsigned long va, unsigned long size, int spg_id)
return ret; } +EXPORT_SYMBOL_GPL(sp_unshare); + +int mg_sp_unshare(unsigned long va, unsigned long size, int id) +{ + return sp_unshare(va, size, 0, id); +} EXPORT_SYMBOL_GPL(mg_sp_unshare);
/** - * mg_sp_walk_page_range() - Walk page table with caller specific callbacks. + * sp_walk_page_range() - Walk page table with caller specific callbacks. * @uva: the start VA of user memory. * @size: the size of user memory. * @tsk: task struct of the target task. @@ -3532,7 +3613,7 @@ EXPORT_SYMBOL_GPL(mg_sp_unshare); * When return 0, sp_walk_data describing [uva, uva+size) can be used. * When return -errno, information in sp_walk_data is useless. */ -int mg_sp_walk_page_range(unsigned long uva, unsigned long size, +int sp_walk_page_range(unsigned long uva, unsigned long size, struct task_struct *tsk, struct sp_walk_data *sp_walk_data) { struct mm_struct *mm; @@ -3571,13 +3652,20 @@ int mg_sp_walk_page_range(unsigned long uva, unsigned long size,
return ret; } +EXPORT_SYMBOL_GPL(sp_walk_page_range); + +int mg_sp_walk_page_range(unsigned long uva, unsigned long size, + struct task_struct *tsk, struct sp_walk_data *sp_walk_data) +{ + return sp_walk_page_range(uva, size, tsk, sp_walk_data); +} EXPORT_SYMBOL_GPL(mg_sp_walk_page_range);
/** - * mg_sp_walk_page_free() - Free the sp_walk_data structure. + * sp_walk_page_free() - Free the sp_walk_data structure. * @sp_walk_data: a structure of a page pointer array to be freed. */ -void mg_sp_walk_page_free(struct sp_walk_data *sp_walk_data) +void sp_walk_page_free(struct sp_walk_data *sp_walk_data) { if (!sp_is_enabled()) return; @@ -3589,6 +3677,12 @@ void mg_sp_walk_page_free(struct sp_walk_data *sp_walk_data)
__sp_walk_page_free(sp_walk_data); } +EXPORT_SYMBOL_GPL(sp_walk_page_free); + +void mg_sp_walk_page_free(struct sp_walk_data *sp_walk_data) +{ + sp_walk_page_free(sp_walk_data); +} EXPORT_SYMBOL_GPL(mg_sp_walk_page_free);
int sp_register_notifier(struct notifier_block *nb) @@ -3605,7 +3699,7 @@ EXPORT_SYMBOL_GPL(sp_unregister_notifier);
static bool is_sp_dynamic_dvpp_addr(unsigned long addr); /** - * mg_sp_config_dvpp_range() - User can config the share pool start address + * sp_config_dvpp_range() - User can config the share pool start address * of each Da-vinci device. * @start: the value of share pool start * @size: the value of share pool @@ -3616,7 +3710,7 @@ static bool is_sp_dynamic_dvpp_addr(unsigned long addr); * Return false if parameter invalid or has been set up. * This functuon has no concurrent problem. */ -bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) +bool sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) { int ret; bool err = false; @@ -3667,6 +3761,12 @@ bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid)
return err; } +EXPORT_SYMBOL_GPL(sp_config_dvpp_range); + +bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) +{ + return sp_config_dvpp_range(start, size, device_id, pid); +} EXPORT_SYMBOL_GPL(mg_sp_config_dvpp_range);
static bool is_sp_reserve_addr(unsigned long addr) @@ -3690,16 +3790,22 @@ static bool is_sp_dynamic_dvpp_addr(unsigned long addr) }
/** - * mg_is_sharepool_addr() - Check if a user memory address belongs to share pool. + * is_sharepool_addr() - Check if a user memory address belongs to share pool. * @addr: the userspace address to be checked. * * Return true if addr belongs to share pool, or false vice versa. */ -bool mg_is_sharepool_addr(unsigned long addr) +bool is_sharepool_addr(unsigned long addr) { return sp_is_enabled() && ((is_sp_reserve_addr(addr) || is_sp_dynamic_dvpp_addr(addr))); } +EXPORT_SYMBOL_GPL(is_sharepool_addr); + +bool mg_is_sharepool_addr(unsigned long addr) +{ + return is_sharepool_addr(addr); +} EXPORT_SYMBOL_GPL(mg_is_sharepool_addr);
int sp_node_id(struct vm_area_struct *vma)
From: Guo Mengqi guomengqi3@huawei.com
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5EOOG
Reference: https://gitee.com/openeuler/kernel/commit/496cbac7e54d80026325e5648a78f93aa9...
--------------------------------
This reverts commit da76349ca8776aa7f8b186010005fb563fb163bb. However, the iommu_fault_param and iommu_fault_event changes are reserved to avoid KABI change.
Signed-off-by: Guo Mengqi guomengqi3@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- drivers/iommu/iommu.c | 55 ------------------------------------------- include/linux/iommu.h | 4 ++-- 2 files changed, 2 insertions(+), 57 deletions(-)
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 9116c93945d0..97953fa27630 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1084,39 +1084,6 @@ int iommu_group_unregister_notifier(struct iommu_group *group, } EXPORT_SYMBOL_GPL(iommu_group_unregister_notifier);
-static void iommu_dev_fault_timer_fn(struct timer_list *t) -{ - struct iommu_fault_param *fparam = from_timer(fparam, t, timer); - struct iommu_fault_event *evt; - struct iommu_fault_page_request *prm; - - u64 now; - - now = get_jiffies_64(); - - /* The goal is to ensure driver or guest page fault handler(via vfio) - * send page response on time. Otherwise, limited queue resources - * may be occupied by some irresponsive guests or drivers. - * When per device pending fault list is not empty, we periodically checks - * if any anticipated page response time has expired. - * - * TODO: - * We could do the following if response time expires: - * 1. send page response code FAILURE to all pending PRQ - * 2. inform device driver or vfio - * 3. drain in-flight page requests and responses for this device - * 4. clear pending fault list such that driver can unregister fault - * handler(otherwise blocked when pending faults are present). - */ - list_for_each_entry(evt, &fparam->faults, list) { - prm = &evt->fault.prm; - if (time_after64(now, evt->expire)) - pr_err("Page response time expired!, pasid %d gid %d exp %llu now %llu\n", - prm->pasid, prm->grpid, evt->expire, now); - } - mod_timer(t, now + prq_timeout); -} - /** * iommu_register_device_fault_handler() - Register a device fault handler * @dev: the device @@ -1164,9 +1131,6 @@ int iommu_register_device_fault_handler(struct device *dev, mutex_init(¶m->fault_param->lock); INIT_LIST_HEAD(¶m->fault_param->faults);
- if (prq_timeout) - timer_setup(¶m->fault_param->timer, iommu_dev_fault_timer_fn, - TIMER_DEFERRABLE); done_unlock: mutex_unlock(¶m->lock);
@@ -1306,9 +1270,7 @@ int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt) struct dev_iommu *param = dev->iommu; struct iommu_fault_event *evt_pending = NULL; struct iommu_fault_param *fparam; - struct timer_list *tmr; int ret = 0; - u64 exp;
if (!param || !evt || WARN_ON_ONCE(!iommu_fault_valid(&evt->fault))) return -EINVAL; @@ -1329,17 +1291,7 @@ int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt) ret = -ENOMEM; goto done_unlock; } - /* Keep track of response expiration time */ - exp = get_jiffies_64() + prq_timeout; - evt_pending->expire = exp; mutex_lock(&fparam->lock); - if (list_empty(&fparam->faults)) { - /* First pending event, start timer */ - tmr = &fparam->timer; - WARN_ON(timer_pending(tmr)); - mod_timer(tmr, exp); - } - list_add_tail(&evt_pending->list, &fparam->faults); mutex_unlock(&fparam->lock); } @@ -1417,13 +1369,6 @@ int iommu_page_response(struct device *dev, break; }
- /* stop response timer if no more pending request */ - if (list_empty(¶m->fault_param->faults) && - timer_pending(¶m->fault_param->timer)) { - pr_debug("no pending PRQ, stop timer\n"); - del_timer(¶m->fault_param->timer); - } - done_unlock: mutex_unlock(¶m->fault_param->lock); return ret; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 8baf5ed66a84..092384b71ab2 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -393,7 +393,7 @@ struct iommu_device { struct iommu_fault_event { struct iommu_fault fault; struct list_head list; - u64 expire; + _KABI_DEPRECATE(u64, expire); };
/** @@ -408,7 +408,7 @@ struct iommu_fault_param { iommu_dev_fault_handler_t handler; void *data; struct list_head faults; - struct timer_list timer; + _KABI_DEPRECATE(struct timer_list, timer); struct mutex lock; };
From: Yuan Can yuancan@huawei.com
Offering: HULK hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5PZDX
--------------------------------
Export oom_type_notifier_call map_kernel_range and __get_vm_area_caller for ascend drivers. Export pm_autosleep_set_state and __vmalloc_node_range. Export alloc_workqueue_attrs, free_workqueue_attrs and apply_workqueue_attrs.
Signed-off-by: Yuan Can yuancan@huawei.com --- kernel/power/autosleep.c | 1 + kernel/workqueue.c | 3 +++ mm/oom_kill.c | 1 + mm/vmalloc.c | 3 +++ 4 files changed, 8 insertions(+)
diff --git a/kernel/power/autosleep.c b/kernel/power/autosleep.c index 9af5a50d3489..6aee5077fbfa 100644 --- a/kernel/power/autosleep.c +++ b/kernel/power/autosleep.c @@ -113,6 +113,7 @@ int pm_autosleep_set_state(suspend_state_t state) mutex_unlock(&autosleep_lock); return 0; } +EXPORT_SYMBOL_GPL(pm_autosleep_set_state);
int __init pm_autosleep_init(void) { diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 6bef482a152b..0725f94c2439 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -3352,6 +3352,7 @@ void free_workqueue_attrs(struct workqueue_attrs *attrs) kfree(attrs); } } +EXPORT_SYMBOL_GPL(free_workqueue_attrs);
/** * alloc_workqueue_attrs - allocate a workqueue_attrs @@ -3377,6 +3378,7 @@ struct workqueue_attrs *alloc_workqueue_attrs(void) free_workqueue_attrs(attrs); return NULL; } +EXPORT_SYMBOL_GPL(alloc_workqueue_attrs);
static void copy_workqueue_attrs(struct workqueue_attrs *to, const struct workqueue_attrs *from) @@ -4091,6 +4093,7 @@ int apply_workqueue_attrs(struct workqueue_struct *wq,
return ret; } +EXPORT_SYMBOL_GPL(apply_workqueue_attrs);
/** * wq_update_unbound_numa - update NUMA affinity of a wq for CPU hot[un]plug diff --git a/mm/oom_kill.c b/mm/oom_kill.c index ffbe8fe2bbf6..dd2b4f890403 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -1119,6 +1119,7 @@ int oom_type_notifier_call(unsigned int type, struct oom_control *oc)
return blocking_notifier_call_chain(&oom_type_notify_list, type, NULL); } +EXPORT_SYMBOL_GPL(oom_type_notifier_call); #endif
/** diff --git a/mm/vmalloc.c b/mm/vmalloc.c index dadbea29241d..d7a68eb0db42 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -643,6 +643,7 @@ int map_kernel_range(unsigned long start, unsigned long size, pgprot_t prot, flush_cache_vmap(start, start + size); return ret; } +EXPORT_SYMBOL(map_kernel_range);
int is_vmalloc_or_module_addr(const void *x) { @@ -2460,6 +2461,7 @@ struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags, return __get_vm_area_node(size, 1, PAGE_SHIFT, flags, start, end, NUMA_NO_NODE, GFP_KERNEL, caller); } +EXPORT_SYMBOL(__get_vm_area_caller);
/** * get_vm_area - reserve a contiguous kernel virtual area @@ -3058,6 +3060,7 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
return NULL; } +EXPORT_SYMBOL_GPL(__vmalloc_node_range);
/** * __vmalloc_node - allocate virtually contiguous memory
From: Xu Qiang xuqiang36@huawei.com
Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5PZDX
--------------------------------
Fix that value of CPU is too large in its_inc_lpi_count.
Signed-off-by: Xu Qiang xuqiang36@huawei.com --- drivers/irqchip/irq-gic-v3-its.c | 10 ++++++++++ 1 file changed, 10 insertions(+)
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 6b46cfdcb402..45d75bccb7e6 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -1535,6 +1535,11 @@ static __maybe_unused u32 its_read_lpi_count(struct irq_data *d, int cpu)
static void its_inc_lpi_count(struct irq_data *d, int cpu) { +#ifdef CONFIG_ASCEND_INIT_ALL_GICR + if (cpu >= nr_cpu_ids) + return; +#endif + if (irqd_affinity_is_managed(d)) atomic_inc(&per_cpu_ptr(&cpu_lpi_count, cpu)->managed); else @@ -1543,6 +1548,11 @@ static void its_inc_lpi_count(struct irq_data *d, int cpu)
static void its_dec_lpi_count(struct irq_data *d, int cpu) { +#ifdef CONFIG_ASCEND_INIT_ALL_GICR + if (cpu >= nr_cpu_ids) + return; +#endif + if (irqd_affinity_is_managed(d)) atomic_dec(&per_cpu_ptr(&cpu_lpi_count, cpu)->managed); else
From: Jing-Ting Wu Jing-Ting.Wu@mediatek.com
Root cause: The rebind_subsystems() is no lock held when move css object from A list to B list,then let B's head be treated as css node at list_for_each_entry_rcu().
Solution: Add grace period before invalidating the removed rstat_css_node.
Reported-by: Jing-Ting Wu jing-ting.wu@mediatek.com Suggested-by: Michal Koutný mkoutny@suse.com Signed-off-by: Jing-Ting Wu jing-ting.wu@mediatek.com Tested-by: Jing-Ting Wu jing-ting.wu@mediatek.com Link: https://lore.kernel.org/linux-arm-kernel/d8f0bc5e2fb6ed259f9334c83279b4c0112... Acked-by: Mukesh Ojha quic_mojha@quicinc.com Fixes: a7df69b81aac ("cgroup: rstat: support cgroup1") Cc: stable@vger.kernel.org # v5.13+ Signed-off-by: Tejun Heo tj@kernel.org --- kernel/cgroup/cgroup.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 57f4e19df8c6..46d5c120c626 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -1781,6 +1781,7 @@ int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
if (ss->css_rstat_flush) { list_del_rcu(&css->rstat_css_node); + synchronize_rcu(); list_add_rcu(&css->rstat_css_node, &dcgrp->rstat_css_list); }
From: Zhang Zekun zhangzekun11@huawei.com
Offering: HULK hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I53VVE
------------------------------------------
Add a config to control the feature in bugzilla which export the function collect_procs().
Signed-off-by: Zhang Zekun zhangzekun11@huawei.com --- arch/arm64/configs/openeuler_defconfig | 1 + arch/x86/configs/openeuler_defconfig | 1 + include/linux/mm.h | 3 +++ mm/Kconfig | 7 +++++++ mm/memory-failure.c | 7 +++++++ 5 files changed, 19 insertions(+)
diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 78a63cbc3db6..8dd8e7d0c5f3 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -1110,6 +1110,7 @@ CONFIG_ARCH_HAS_PTE_SPECIAL=y CONFIG_PIN_MEMORY=y CONFIG_PID_RESERVE=y CONFIG_MEMORY_RELIABLE=y +CONFIG_EXPORT_COLLECT_PROCS=y
# # Data Access Monitoring diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index f9c94b618ad4..9da862afc6b8 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -1059,6 +1059,7 @@ CONFIG_ARCH_HAS_PKEYS=y # CONFIG_READ_ONLY_THP_FOR_FS is not set CONFIG_ARCH_HAS_PTE_SPECIAL=y CONFIG_MAPPING_DIRTY_HELPERS=y +CONFIG_EXPORT_COLLECT_PROCS=y
# # Data Access Monitoring diff --git a/include/linux/mm.h b/include/linux/mm.h index a886f48b6a0e..03dbae9a3007 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3109,8 +3109,11 @@ extern int sysctl_memory_failure_recovery; extern void shake_page(struct page *p, int access); extern atomic_long_t num_poisoned_pages __read_mostly; extern int soft_offline_page(unsigned long pfn, int flags); + +#ifdef CONFIG_EXPORT_COLLECT_PROCS extern void collect_procs(struct page *page, struct list_head *tokill, int force_early); +#endif
/* * Error handlers for various types of pages. diff --git a/mm/Kconfig b/mm/Kconfig index 4475bd9f8762..a36deacdf480 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -969,6 +969,13 @@ config MEMORY_RELIABLE To enable this function, mirrored memory is needed and "kernelcore=reliable" need to be added in kernel parameters.
+config EXPORT_COLLECT_PROCS + bool "Export the function collect_procs()" + default n + help + Export this function to collect the processes who have the page + mapped via collect_procs(). + source "mm/damon/Kconfig"
endmenu diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 509fe34a0421..d2784bf013bb 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -541,8 +541,13 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill, /* * Collect the processes who have the corrupted page mapped to kill. */ +#ifdef CONFIG_EXPORT_COLLECT_PROCS void collect_procs(struct page *page, struct list_head *tokill, int force_early) +#else +static void collect_procs(struct page *page, struct list_head *tokill, + int force_early) +#endif { if (!page->mapping) return; @@ -552,7 +557,9 @@ void collect_procs(struct page *page, struct list_head *tokill, else collect_procs_file(page, tokill, force_early); } +#ifdef CONFIG_EXPORT_COLLECT_PROCS EXPORT_SYMBOL_GPL(collect_procs); +#endif
static const char *action_name[] = { [MF_IGNORED] = "Ignored",
From: Lijun Fang fanglijun3@huawei.com
--- drivers/gpio/gpiolib.c | 45 +++++++++++++++++++++++++++++++++----- include/asm-generic/gpio.h | 13 +++++++++++ include/linux/kernel.h | 6 +++++ 3 files changed, 58 insertions(+), 6 deletions(-)
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 00526fdd7691..12b005e24ae8 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -176,11 +176,34 @@ struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc) } EXPORT_SYMBOL_GPL(gpiod_to_chip);
+#ifdef CONFIG_ACPI +int ascend_resize_ngpio __read_mostly; + +static int __init ascend_enable_resize_ngpio(char *s) +{ + ascend_resize_ngpio = 1; + + pr_info("Ascend enable resize ngpio features\n"); + + return 1; +} + +__setup("enable_resize_ngpio", ascend_enable_resize_ngpio); +#endif + +static int set_gpio_base(int ngpio) +{ + if (ascend_resize_ngpio) + return RESIZE_NR_GPIOS - ngpio; + + return ARCH_NR_GPIOS - ngpio; +} + /* dynamic allocation of GPIOs, e.g. on a hotplugged device */ static int gpiochip_find_base(int ngpio) { struct gpio_device *gdev; - int base = ARCH_NR_GPIOS - ngpio; + int base = set_gpio_base(ngpio);
list_for_each_entry_reverse(gdev, &gpio_devices, list) { /* found a free space? */ @@ -191,12 +214,22 @@ static int gpiochip_find_base(int ngpio) base = gdev->base - ngpio; }
- if (gpio_is_valid(base)) { - pr_debug("%s: found new base at %d\n", __func__, base); - return base; + if (ascend_resize_ngpio) { + if (resize_gpio_is_valid(base)) { + pr_debug("%s: found resize new base at %d\n", __func__, base); + return base; + } else { + pr_err("%s: cannot find resize free range\n", __func__); + return -ENOSPC; + } } else { - pr_err("%s: cannot find free range\n", __func__); - return -ENOSPC; + if (gpio_is_valid(base)) { + pr_debug("%s: found new base at %d\n", __func__, base); + return base; + } else { + pr_err("%s: cannot find free range\n", __func__); + return -ENOSPC; + } } }
diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h index aea9aee1f3e9..5a9319dfe917 100644 --- a/include/asm-generic/gpio.h +++ b/include/asm-generic/gpio.h @@ -32,6 +32,8 @@ #endif #endif
+#define RESIZE_NR_GPIOS 1024 + /* * "valid" GPIO numbers are nonnegative and may be passed to * setup routines like gpio_request(). only some valid numbers @@ -46,6 +48,11 @@ static inline bool gpio_is_valid(int number) return number >= 0 && number < ARCH_NR_GPIOS; }
+static inline bool resize_gpio_is_valid(int number) +{ + return number >= 0 && number < RESIZE_NR_GPIOS; +} + struct device; struct gpio; struct seq_file; @@ -146,6 +153,12 @@ static inline bool gpio_is_valid(int number) return number >= 0; }
+static inline bool resize_gpio_is_valid(int number) +{ + /* only non-negative numbers are valid */ + return number >= 0; +} + /* platforms that don't directly support access to GPIOs through I2C, SPI, * or other blocking infrastructure can use these wrappers. */ diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 78a0907f0b04..d2c4a87c012a 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -216,6 +216,12 @@ static __always_inline void might_resched(void)
#endif /* CONFIG_PREEMPT_* */
+#ifdef CONFIG_ACPI +extern int ascend_resize_ngpio; +#else +#define ascend_resize_ngpio 0 +#endif + #ifdef CONFIG_DEBUG_ATOMIC_SLEEP extern void ___might_sleep(const char *file, int line, int preempt_offset); extern void __might_sleep(const char *file, int line, int preempt_offset);
From: Wang Wensheng wangwensheng4@huawei.com
This kernel paramater is used for ascend scene and would open all the options needed at once.
Signed-off-by: Wang Wensheng wangwensheng4@huawei.com --- arch/arm64/kernel/cpufeature.c | 2 +- arch/arm64/mm/init.c | 38 ++++++++++++++++++++++++++++++++++ mm/hugetlb.c | 2 +- mm/memcontrol.c | 2 +- 4 files changed, 41 insertions(+), 3 deletions(-)
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index f5ce1e3a532f..159481996630 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1729,7 +1729,7 @@ static void cpu_enable_e0pd(struct arm64_cpu_capabilities const *cap) #endif /* CONFIG_ARM64_E0PD */
#ifdef CONFIG_ARM64_PSEUDO_NMI -static bool enable_pseudo_nmi; +bool enable_pseudo_nmi;
static int __init early_enable_pseudo_nmi(char *p) { diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index f5bd046f9e19..ddce006b1b22 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -31,6 +31,8 @@ #include <linux/hugetlb.h> #include <linux/acpi_iort.h> #include <linux/pin_mem.h> +#include <linux/suspend.h> +#include <linux/nmi.h>
#include <asm/boot.h> #include <asm/fixmap.h> @@ -696,3 +698,39 @@ void dump_mem_limit(void) pr_emerg("Memory Limit: none\n"); } } + +void ascend_enable_all_features(void) +{ + if (IS_ENABLED(CONFIG_ASCEND_DVPP_MMAP)) + enable_mmap_dvpp = 1; + +#ifdef CONFIG_ASCEND_CHARGE_MIGRATE_HUGEPAGES + extern int enable_charge_mighp; + enable_charge_mighp = 1; +#endif + + if (IS_ENABLED(CONFIG_SUSPEND)) + mem_sleep_current = PM_SUSPEND_ON; + + if (IS_ENABLED(CONFIG_MEMCG_KMEM)) { + extern bool cgroup_memory_nokmem; + cgroup_memory_nokmem = false; + } + +#ifdef CONFIG_ARM64_PSEUDO_NMI + extern bool enable_pseudo_nmi; + enable_pseudo_nmi = true; +#endif + +#ifdef CONFIG_CORELOCKUP_DETECTOR + enable_corelockup_detector = true; +#endif +} + +static int __init ascend_enable_setup(char *__unused) +{ + ascend_enable_all_features(); + + return 0; +} +early_param("ascend_enable_all", ascend_enable_setup); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index acdc56e593af..2b938cb7347f 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -6133,7 +6133,7 @@ void __init hugetlb_cma_check(void) #endif /* CONFIG_CMA */
#ifdef CONFIG_ASCEND_FEATURES -static int enable_charge_mighp __read_mostly; +int enable_charge_mighp __read_mostly;
const struct hstate *hugetlb_get_hstate(void) { diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 7061f9283a34..9509232a8dbd 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -84,7 +84,7 @@ DEFINE_PER_CPU(struct mem_cgroup *, int_active_memcg); static bool cgroup_memory_nosocket;
/* Kernel memory accounting disabled */ -static bool cgroup_memory_nokmem = true; +bool cgroup_memory_nokmem = true;
/* Whether the swap controller is active */ #ifdef CONFIG_MEMCG_SWAP
From: Fang Lijun fanglijun3@huawei.com
Signed-off-by: Fang Lijun fanglijun3@huawei.com --- arch/arm64/Kconfig | 8 ++++++++ arch/arm64/mm/init.c | 3 +++ include/linux/init.h | 1 + kernel/panic.c | 31 +++++++++++++++++++++++++++++++ 4 files changed, 43 insertions(+)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 28b4e754e856..c6aa794901e1 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -2052,6 +2052,14 @@ config ASCEND_CLEAN_CDM the management structures for hbm nodes to the ddr nodes of the same partion to reduce the probability of kernel crashes.
+config ASCEND_FDM + bool "Enable support for fdm" + default n + help + Fdm write an interrupt to the register to activate HBM ECC check in panic + + This option only enabled in ascend910 now. + config ASCEND_OOM bool "Enable support for disable oom killer" default y diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index ddce006b1b22..5c6da9ec499c 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -717,6 +717,9 @@ void ascend_enable_all_features(void) cgroup_memory_nokmem = false; }
+ if (IS_ENABLED(CONFIG_ASCEND_FDM)) + ascend_fdm_enable = true; + #ifdef CONFIG_ARM64_PSEUDO_NMI extern bool enable_pseudo_nmi; enable_pseudo_nmi = true; diff --git a/include/linux/init.h b/include/linux/init.h index 7b53cb3092ee..885e9b2e7951 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -162,6 +162,7 @@ extern void (*late_time_init)(void);
extern bool initcall_debug;
+extern int ascend_fdm_enable; #endif
#ifndef MODULE diff --git a/kernel/panic.c b/kernel/panic.c index d991c3b1b559..e6c47ab9c4a6 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -58,6 +58,11 @@ bool panic_on_taint_nousertaint = false; int panic_timeout = CONFIG_PANIC_TIMEOUT; EXPORT_SYMBOL_GPL(panic_timeout);
+int ascend_fdm_enable; +char *fdm_base_reg; +#define FDM_BASE_ADDR 0x202010000 +#define FDM_SIZE 0x1000 + #define PANIC_PRINT_TASK_INFO 0x00000001 #define PANIC_PRINT_MEM_INFO 0x00000002 #define PANIC_PRINT_TIMER_INFO 0x00000004 @@ -166,6 +171,28 @@ static void panic_print_sys_info(void) ftrace_dump(DUMP_ALL); }
+static int remap_fdm_base(void) +{ + fdm_base_reg = ioremap(FDM_BASE_ADDR, FDM_SIZE); + if (!fdm_base_reg) + return -ENOMEM; + return 0; +} + +static void enable_fdm(void) +{ + u32 val; + + if (fdm_base_reg == NULL) + return; + val = readl(fdm_base_reg + 0x20); + writel(val, fdm_base_reg + 0x2C); + writel(0xFFFFFF00, fdm_base_reg + 0x04); + writel(0xFFFFFF00, fdm_base_reg + 0x24); + writel(0xFFFFFF00, fdm_base_reg + 0x14); + writel(0x1, fdm_base_reg + 0x18); +} + /** * panic - halt the system * @fmt: The text string to print @@ -183,6 +210,8 @@ void panic(const char *fmt, ...) int old_cpu, this_cpu; bool _crash_kexec_post_notifiers = crash_kexec_post_notifiers;
+ if (ascend_fdm_enable) + enable_fdm(); /* * Disable local interrupts. This will prevent panic_smp_self_stop * from deadlocking the first cpu that invokes the panic, since @@ -693,6 +722,8 @@ DEFINE_DEBUGFS_ATTRIBUTE(clear_warn_once_fops, NULL, clear_warn_once_set,
static __init int register_warn_debugfs(void) { + if (remap_fdm_base()) + pr_err("remap fdm base failed!\n"); /* Don't care about failure */ debugfs_create_file_unsafe("clear_warn_once", 0200, NULL, NULL, &clear_warn_once_fops);
From: Wang Wensheng wangwensheng4@huawei.com
ascend inclusion category: feature bugzilla: NA CVE: NA
------------------------------------------------------------------
Normally, pages can not be allocated from CDM node(explicit allocation requests from kernel or user process MPOL_BIND policy which has CDM nodes).
This situation may happen. Within the limit bytes range of the memcg, the CDM nodes have a large amount of free memory, and other nodes have no available free memory. Then, the kernel or user process can not get required memory resources normally.
For example: size of CMD : A mbytes size of non CMD : B mbytes limit mbytes of memcg : C mbytes. A,B < C < (A + B). If app(user app and OS service app) used up non CMD memory, but a large amount of CDM memory is available. Since OS service app can't get pages from CDM node, the allocating of pages should be failed. This is not what we expect. We hope that the memcg can be used to restrict the memory used by some user apps to ensure that certain memory is available for system services.
Therefore, the CDM memory does not need to be charged in the memcg. The total size of CDM is already a limit.
Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com Signed-off-by: Wang Wensheng wangwensheng4@huawei.com --- mm/memcontrol.c | 8 ++++++++ 1 file changed, 8 insertions(+)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 9509232a8dbd..93fa3cb7a269 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3053,6 +3053,11 @@ int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order)
objcg = get_obj_cgroup_from_current(); if (objcg) { + if (!mem_cgroup_is_root(objcg->memcg) && is_cdm_node(page_to_nid(page))) { + obj_cgroup_put(objcg); + return 0; + } + ret = obj_cgroup_charge_pages(objcg, gfp, 1 << order); if (!ret) { page->memcg_data = (unsigned long)objcg | @@ -7009,6 +7014,9 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) if (!memcg) memcg = get_mem_cgroup_from_mm(mm);
+ if (!mem_cgroup_is_root(memcg) && is_cdm_node(page_to_nid(page))) + goto out; + ret = try_charge(memcg, gfp_mask, nr_pages); if (ret) goto out_put;
From: Chen Jun chenjun102@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I584X2
Reference: https://gitee.com/openeuler/kernel/commit/744d0990ad137b26241934ea5174791722...
--------------------------------
Add support for HiSilicon SoC L3T PMU
Signed-off-by: Chen Jun chenjun102@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com
Signed-off-by: Chen Jun chenjun102@huawei.com --- drivers/perf/hisilicon/Makefile | 3 +- drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 2 +- drivers/perf/hisilicon/hisi_uncore_l3t_pmu.c | 403 +++++++++++++++++++ 3 files changed, 406 insertions(+), 2 deletions(-) create mode 100644 drivers/perf/hisilicon/hisi_uncore_l3t_pmu.c
diff --git a/drivers/perf/hisilicon/Makefile b/drivers/perf/hisilicon/Makefile index 7643c9f93e36..18abcb612216 100644 --- a/drivers/perf/hisilicon/Makefile +++ b/drivers/perf/hisilicon/Makefile @@ -1,4 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_HISI_PMU) += hisi_uncore_pmu.o hisi_uncore_l3c_pmu.o \ hisi_uncore_hha_pmu.o hisi_uncore_ddrc_pmu.o hisi_uncore_sllc_pmu.o \ - hisi_uncore_pa_pmu.o + hisi_uncore_pa_pmu.o \ + hisi_uncore_l3t_pmu.o diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 2c71a8971723..8b35f7cb4f38 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -608,7 +608,7 @@ static int __init hisi_l3c_pmu_module_init(void) int ret;
ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_HISI_L3_ONLINE, - "AP_PERF_ARM_HISI_L3_ONLINE", + "AP_PERF_ARM_HISI_L3T_ONLINE", hisi_uncore_pmu_online_cpu, hisi_uncore_pmu_offline_cpu); if (ret) { diff --git a/drivers/perf/hisilicon/hisi_uncore_l3t_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3t_pmu.c new file mode 100644 index 000000000000..f414dc1736aa --- /dev/null +++ b/drivers/perf/hisilicon/hisi_uncore_l3t_pmu.c @@ -0,0 +1,403 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * HiSilicon SoC L3T uncore Hardware event counters support + * + * Copyright (C) 2017 Hisilicon Limited + * Author: Anurup M anurup.m@huawei.com + * Shaokun Zhang zhangshaokun@hisilicon.com + * + * This code is based on the uncore PMUs like arm-cci and arm-ccn. + */ +#include <linux/acpi.h> +#include <linux/bug.h> +#include <linux/cpuhotplug.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/list.h> +#include <linux/smp.h> + +#include "hisi_uncore_pmu.h" + +/* L3T register definition */ +#define L3T_PERF_CTRL 0x0408 +#define L3T_INT_MASK 0x0800 +#define L3T_INT_STATUS 0x0808 +#define L3T_INT_CLEAR 0x080c +#define L3T_EVENT_CTRL 0x1c00 +#define L3T_VERSION 0x1cf0 +#define L3T_EVENT_TYPE0 0x1d00 +/* + * If the HW version only supports a 48-bit counter, then + * bits [63:48] are reserved, which are Read-As-Zero and + * Writes-Ignored. + */ +#define L3T_CNTR0_LOWER 0x1e00 + +/* L3T has 8-counters */ +#define L3T_NR_COUNTERS 0x8 + +#define L3T_PERF_CTRL_EN 0x20000 +#define L3T_EVTYPE_NONE 0xff +#define L3T_NR_EVENTS 0x59 + +/* + * Select the counter register offset using the counter index + */ +static u32 hisi_l3t_pmu_get_counter_offset(int cntr_idx) +{ + return (L3T_CNTR0_LOWER + (cntr_idx * 8)); +} + +static u64 hisi_l3t_pmu_read_counter(struct hisi_pmu *l3t_pmu, + struct hw_perf_event *hwc) +{ + return readq(l3t_pmu->base + hisi_l3t_pmu_get_counter_offset(hwc->idx)); +} + +static void hisi_l3t_pmu_write_counter(struct hisi_pmu *l3t_pmu, + struct hw_perf_event *hwc, u64 val) +{ + writeq(val, l3t_pmu->base + hisi_l3t_pmu_get_counter_offset(hwc->idx)); +} + +static void hisi_l3t_pmu_write_evtype(struct hisi_pmu *l3t_pmu, int idx, + u32 type) +{ + u32 reg, reg_idx, shift, val; + + /* + * Select the appropriate event select register(L3T_EVENT_TYPE0/1). + * There are 2 event select registers for the 8 hardware counters. + * Event code is 8-bits and for the former 4 hardware counters, + * L3T_EVENT_TYPE0 is chosen. For the latter 4 hardware counters, + * L3T_EVENT_TYPE1 is chosen. + */ + reg = L3T_EVENT_TYPE0 + (idx / 4) * 4; + reg_idx = idx % 4; + shift = 8 * reg_idx; + + /* Write event code to L3T_EVENT_TYPEx Register */ + val = readl(l3t_pmu->base + reg); + val &= ~(L3T_EVTYPE_NONE << shift); + val |= (type << shift); + writel(val, l3t_pmu->base + reg); +} + +static void hisi_l3t_pmu_start_counters(struct hisi_pmu *l3t_pmu) +{ + u32 val; + + /* + * Set perf_enable bit in L3T_PERF_CTRL register to start counting + * for all enabled counters. + */ + val = readl(l3t_pmu->base + L3T_PERF_CTRL); + val |= L3T_PERF_CTRL_EN; + writel(val, l3t_pmu->base + L3T_PERF_CTRL); +} + +static void hisi_l3t_pmu_stop_counters(struct hisi_pmu *l3t_pmu) +{ + u32 val; + + /* + * Clear perf_enable bit in L3T_PERF_CTRL register to stop counting + * for all enabled counters. + */ + val = readl(l3t_pmu->base + L3T_PERF_CTRL); + val &= ~(L3T_PERF_CTRL_EN); + writel(val, l3t_pmu->base + L3T_PERF_CTRL); +} + +static void hisi_l3t_pmu_enable_counter(struct hisi_pmu *l3t_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + /* Enable counter index in L3T_EVENT_CTRL register */ + val = readl(l3t_pmu->base + L3T_EVENT_CTRL); + val |= (1 << hwc->idx); + writel(val, l3t_pmu->base + L3T_EVENT_CTRL); +} + +static void hisi_l3t_pmu_disable_counter(struct hisi_pmu *l3t_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + /* Clear counter index in L3T_EVENT_CTRL register */ + val = readl(l3t_pmu->base + L3T_EVENT_CTRL); + val &= ~(1 << hwc->idx); + writel(val, l3t_pmu->base + L3T_EVENT_CTRL); +} + +static void hisi_l3t_pmu_enable_counter_int(struct hisi_pmu *l3t_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + val = readl(l3t_pmu->base + L3T_INT_MASK); + /* Write 0 to enable interrupt */ + val &= ~(1 << hwc->idx); + writel(val, l3t_pmu->base + L3T_INT_MASK); +} + +static void hisi_l3t_pmu_disable_counter_int(struct hisi_pmu *l3t_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + val = readl(l3t_pmu->base + L3T_INT_MASK); + /* Write 1 to mask interrupt */ + val |= (1 << hwc->idx); + writel(val, l3t_pmu->base + L3T_INT_MASK); +} + +static u32 hisi_l3t_pmu_get_int_status(struct hisi_pmu *l3t_pmu) +{ + return readl(l3t_pmu->base + L3T_INT_STATUS); +} + +static void hisi_l3t_pmu_clear_int_status(struct hisi_pmu *l3t_pmu, int idx) +{ + writel(1 << idx, l3t_pmu->base + L3T_INT_CLEAR); +} + +static const struct acpi_device_id hisi_l3t_pmu_acpi_match[] = { + {} +}; +MODULE_DEVICE_TABLE(acpi, hisi_l3t_pmu_acpi_match); + +static const struct of_device_id l3t_of_match[] = { + { .compatible = "hisilicon,l3t-pmu", }, + {}, +}; + +static int hisi_l3t_pmu_init_data(struct platform_device *pdev, + struct hisi_pmu *l3t_pmu) +{ + /* + * Use the SCCL_ID and CCL_ID to identify the L3T PMU, while + * SCCL_ID is in MPIDR[aff2] and CCL_ID is in MPIDR[aff1]. + */ + if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", + &l3t_pmu->sccl_id)) { + dev_err(&pdev->dev, "Can not read l3t sccl-id!\n"); + return -EINVAL; + } + + if (device_property_read_u32(&pdev->dev, "hisilicon,ccl-id", + &l3t_pmu->ccl_id)) { + dev_err(&pdev->dev, "Can not read l3t ccl-id!\n"); + return -EINVAL; + } + + l3t_pmu->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(l3t_pmu->base)) { + dev_err(&pdev->dev, "ioremap failed for l3t_pmu resource\n"); + return PTR_ERR(l3t_pmu->base); + } + + l3t_pmu->identifier = readl(l3t_pmu->base + L3T_VERSION); + + return 0; +} + +static struct attribute *hisi_l3t_pmu_v1_format_attr[] = { + HISI_PMU_FORMAT_ATTR(event, "config:0-7"), + NULL, +}; + +static const struct attribute_group hisi_l3t_pmu_v1_format_group = { + .name = "format", + .attrs = hisi_l3t_pmu_v1_format_attr, +}; + +static struct attribute *hisi_l3t_pmu_v1_events_attr[] = { + HISI_PMU_EVENT_ATTR(rd_cpipe, 0x00), + HISI_PMU_EVENT_ATTR(wr_cpipe, 0x01), + HISI_PMU_EVENT_ATTR(rd_hit_cpipe, 0x02), + HISI_PMU_EVENT_ATTR(wr_hit_cpipe, 0x03), + HISI_PMU_EVENT_ATTR(victim_num, 0x04), + HISI_PMU_EVENT_ATTR(rd_spipe, 0x20), + HISI_PMU_EVENT_ATTR(wr_spipe, 0x21), + HISI_PMU_EVENT_ATTR(rd_hit_spipe, 0x22), + HISI_PMU_EVENT_ATTR(wr_hit_spipe, 0x23), + HISI_PMU_EVENT_ATTR(back_invalid, 0x29), + HISI_PMU_EVENT_ATTR(retry_cpu, 0x40), + HISI_PMU_EVENT_ATTR(retry_ring, 0x41), + HISI_PMU_EVENT_ATTR(prefetch_drop, 0x42), + NULL, +}; + +static const struct attribute_group hisi_l3t_pmu_v1_events_group = { + .name = "events", + .attrs = hisi_l3t_pmu_v1_events_attr, +}; + +static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); + +static struct attribute *hisi_l3t_pmu_cpumask_attrs[] = { + &dev_attr_cpumask.attr, + NULL, +}; + +static const struct attribute_group hisi_l3t_pmu_cpumask_attr_group = { + .attrs = hisi_l3t_pmu_cpumask_attrs, +}; + +static struct device_attribute hisi_l3t_pmu_identifier_attr = + __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); + +static struct attribute *hisi_l3t_pmu_identifier_attrs[] = { + &hisi_l3t_pmu_identifier_attr.attr, + NULL +}; + +static struct attribute_group hisi_l3t_pmu_identifier_group = { + .attrs = hisi_l3t_pmu_identifier_attrs, +}; + +static const struct attribute_group *hisi_l3t_pmu_v1_attr_groups[] = { + &hisi_l3t_pmu_v1_format_group, + &hisi_l3t_pmu_v1_events_group, + &hisi_l3t_pmu_cpumask_attr_group, + &hisi_l3t_pmu_identifier_group, + NULL, +}; + +static const struct hisi_uncore_ops hisi_uncore_l3t_ops = { + .write_evtype = hisi_l3t_pmu_write_evtype, + .get_event_idx = hisi_uncore_pmu_get_event_idx, + .start_counters = hisi_l3t_pmu_start_counters, + .stop_counters = hisi_l3t_pmu_stop_counters, + .enable_counter = hisi_l3t_pmu_enable_counter, + .disable_counter = hisi_l3t_pmu_disable_counter, + .enable_counter_int = hisi_l3t_pmu_enable_counter_int, + .disable_counter_int = hisi_l3t_pmu_disable_counter_int, + .write_counter = hisi_l3t_pmu_write_counter, + .read_counter = hisi_l3t_pmu_read_counter, + .get_int_status = hisi_l3t_pmu_get_int_status, + .clear_int_status = hisi_l3t_pmu_clear_int_status, +}; + +static int hisi_l3t_pmu_dev_probe(struct platform_device *pdev, + struct hisi_pmu *l3t_pmu) +{ + int ret; + + ret = hisi_l3t_pmu_init_data(pdev, l3t_pmu); + if (ret) + return ret; + + ret = hisi_uncore_pmu_init_irq(l3t_pmu, pdev); + if (ret) + return ret; + + l3t_pmu->counter_bits = 48; + l3t_pmu->check_event = L3T_NR_EVENTS; + l3t_pmu->pmu_events.attr_groups = hisi_l3t_pmu_v1_attr_groups; + + l3t_pmu->num_counters = L3T_NR_COUNTERS; + l3t_pmu->ops = &hisi_uncore_l3t_ops; + l3t_pmu->dev = &pdev->dev; + l3t_pmu->on_cpu = -1; + + return 0; +} + +static int hisi_l3t_pmu_probe(struct platform_device *pdev) +{ + struct hisi_pmu *l3t_pmu; + char *name; + int ret; + + l3t_pmu = devm_kzalloc(&pdev->dev, sizeof(*l3t_pmu), GFP_KERNEL); + if (!l3t_pmu) + return -ENOMEM; + + platform_set_drvdata(pdev, l3t_pmu); + + ret = hisi_l3t_pmu_dev_probe(pdev, l3t_pmu); + if (ret) + return ret; + + if (device_property_read_u32(&pdev->dev, "hisilicon,index-id", &l3t_pmu->index_id)) { + dev_err(&pdev->dev, "Can not read l3t index-id!\n"); + return -EINVAL; + } + + /* + * CCL_ID is used to identify the L3T in the same SCCL which was + * used _UID by mistake. + */ + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_l3t%u", + l3t_pmu->sccl_id, l3t_pmu->index_id); + l3t_pmu->pmu = (struct pmu) { + .name = name, + .module = THIS_MODULE, + .task_ctx_nr = perf_invalid_context, + .event_init = hisi_uncore_pmu_event_init, + .pmu_enable = hisi_uncore_pmu_enable, + .pmu_disable = hisi_uncore_pmu_disable, + .add = hisi_uncore_pmu_add, + .del = hisi_uncore_pmu_del, + .start = hisi_uncore_pmu_start, + .stop = hisi_uncore_pmu_stop, + .read = hisi_uncore_pmu_read, + .attr_groups = l3t_pmu->pmu_events.attr_groups, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + }; + + /* Pick one core to use for cpumask attributes */ + cpumask_set_cpu(smp_processor_id(), &l3t_pmu->associated_cpus); + + l3t_pmu->on_cpu = cpumask_first(&l3t_pmu->associated_cpus); + if (l3t_pmu->on_cpu >= nr_cpu_ids) + return -EINVAL; + + ret = perf_pmu_register(&l3t_pmu->pmu, name, -1); + + return ret; +} + +static int hisi_l3t_pmu_remove(struct platform_device *pdev) +{ + struct hisi_pmu *l3t_pmu = platform_get_drvdata(pdev); + + perf_pmu_unregister(&l3t_pmu->pmu); + + return 0; +} + +static struct platform_driver hisi_l3t_pmu_driver = { + .driver = { + .name = "hisi_l3t_pmu", + .acpi_match_table = ACPI_PTR(hisi_l3t_pmu_acpi_match), + .of_match_table = l3t_of_match, + .suppress_bind_attrs = true, + }, + .probe = hisi_l3t_pmu_probe, + .remove = hisi_l3t_pmu_remove, +}; + +static int __init hisi_l3t_pmu_module_init(void) +{ + int ret; + + ret = platform_driver_register(&hisi_l3t_pmu_driver); + + return ret; +} +module_init(hisi_l3t_pmu_module_init); + +static void __exit hisi_l3t_pmu_module_exit(void) +{ + platform_driver_unregister(&hisi_l3t_pmu_driver); +} +module_exit(hisi_l3t_pmu_module_exit); + +MODULE_DESCRIPTION("HiSilicon SoC L3T uncore PMU driver"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Anurup M anurup.m@huawei.com"); +MODULE_AUTHOR("Shaokun Zhang zhangshaokun@hisilicon.com");
From: Chen Jun chenjun102@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I584X2
Reference: https://gitee.com/openeuler/kernel/commit/71a2e2250c79987a2a8804674dd3b41491...
--------------------------------
Add support for HiSilicon SoC LPDDRC PMU
Signed-off-by: Chen Jun chenjun102@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com
Signed-off-by: Chen Jun chenjun102@huawei.com --- drivers/perf/hisilicon/Makefile | 3 +- .../perf/hisilicon/hisi_uncore_lpddrc_pmu.c | 408 ++++++++++++++++++ 2 files changed, 410 insertions(+), 1 deletion(-) create mode 100644 drivers/perf/hisilicon/hisi_uncore_lpddrc_pmu.c
diff --git a/drivers/perf/hisilicon/Makefile b/drivers/perf/hisilicon/Makefile index 18abcb612216..22e384cdfd53 100644 --- a/drivers/perf/hisilicon/Makefile +++ b/drivers/perf/hisilicon/Makefile @@ -2,4 +2,5 @@ obj-$(CONFIG_HISI_PMU) += hisi_uncore_pmu.o hisi_uncore_l3c_pmu.o \ hisi_uncore_hha_pmu.o hisi_uncore_ddrc_pmu.o hisi_uncore_sllc_pmu.o \ hisi_uncore_pa_pmu.o \ - hisi_uncore_l3t_pmu.o + hisi_uncore_l3t_pmu.o \ + hisi_uncore_lpddrc_pmu.o diff --git a/drivers/perf/hisilicon/hisi_uncore_lpddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_lpddrc_pmu.c new file mode 100644 index 000000000000..03a4bb1a9948 --- /dev/null +++ b/drivers/perf/hisilicon/hisi_uncore_lpddrc_pmu.c @@ -0,0 +1,408 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * HiSilicon SoC LPDDRC uncore Hardware event counters support + * + * Copyright (C) 2017 Hisilicon Limited + * Author: Shaokun Zhang zhangshaokun@hisilicon.com + * Anurup M anurup.m@huawei.com + * + * This code is based on the uncore PMUs like arm-cci and arm-ccn. + */ +#include <linux/acpi.h> +#include <linux/bug.h> +#include <linux/cpuhotplug.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/list.h> +#include <linux/smp.h> + +#include "hisi_uncore_pmu.h" + +/* LPDDRC register definition in v1 */ +#define LPDDRC_PERF_CTRL 0x4930 +#define LPDDRC_FLUX_WR 0x4948 +#define LPDDRC_FLUX_RD 0x494c +#define LPDDRC_FLUX_WCMD 0x4950 +#define LPDDRC_FLUX_RCMD 0x4954 +#define LPDDRC_PRE_CMD 0x4984 +#define LPDDRC_ACT_CMD 0x4988 +#define LPDDRC_RNK_CHG 0x4990 +#define LPDDRC_RW_CHG 0x4994 +#define LPDDRC_EVENT_CTRL 0x4d60 +#define LPDDRC_INT_MASK 0x6c8 +#define LPDDRC_INT_STATUS 0x6cc +#define LPDDRC_INT_CLEAR 0x6d0 +#define LPDDRC_VERSION 0x710 + +#define LPDDRC_NR_COUNTERS 0x8 +#define LPDDRC_V1_PERF_CTRL_EN 0x1 +#define LPDDRC_V1_NR_EVENTS 0x7 + +/* + * For PMU v1, there are eight-events and every event has been mapped + * to fixed-purpose counters which register offset is not consistent. + * Therefore there is no write event type and we assume that event + * code (0 to 7) is equal to counter index in PMU driver. + */ +#define GET_LPDDRC_EVENTID(hwc) (hwc->config_base & 0x7) + +static const u32 lpddrc_reg_off[] = { + LPDDRC_FLUX_WR, LPDDRC_FLUX_RD, LPDDRC_FLUX_WCMD, LPDDRC_FLUX_RCMD, + LPDDRC_PRE_CMD, LPDDRC_ACT_CMD, LPDDRC_RNK_CHG, LPDDRC_RW_CHG +}; + +/* + * Select the counter register offset using the counter index. + * In PMU v1, there are no programmable counter, the count + * is read form the statistics counter register itself. + */ +static u32 hisi_lpddrc_pmu_v1_get_counter_offset(int cntr_idx) +{ + return lpddrc_reg_off[cntr_idx]; +} + +static u64 hisi_lpddrc_pmu_v1_read_counter(struct hisi_pmu *lpddrc_pmu, + struct hw_perf_event *hwc) +{ + return readl(lpddrc_pmu->base + + hisi_lpddrc_pmu_v1_get_counter_offset(hwc->idx)); +} + +/* + * For LPDDRC PMU, event counter should be reset when start counters, + * reset the prev_count by software, because the counter register was RO. + */ +static void hisi_lpddrc_pmu_v1_write_counter(struct hisi_pmu *lpddrc_pmu, + struct hw_perf_event *hwc, u64 val) +{ + local64_set(&hwc->prev_count, 0); +} + +/* + * For LPDDRC PMU v1, event has been mapped to fixed-purpose counter by hardware, + * so there is no need to write event type, while it is programmable counter in + * PMU v2. + */ +static void hisi_lpddrc_pmu_write_evtype(struct hisi_pmu *hha_pmu, int idx, + u32 type) +{ +} + +static void hisi_lpddrc_pmu_v1_start_counters(struct hisi_pmu *lpddrc_pmu) +{ + u32 val; + + /* Set perf_enable in LPDDRC_PERF_CTRL to start event counting */ + val = readl(lpddrc_pmu->base + LPDDRC_PERF_CTRL); + val |= LPDDRC_V1_PERF_CTRL_EN; + writel(val, lpddrc_pmu->base + LPDDRC_PERF_CTRL); +} + +static void hisi_lpddrc_pmu_v1_stop_counters(struct hisi_pmu *lpddrc_pmu) +{ + u32 val; + + /* Clear perf_enable in LPDDRC_PERF_CTRL to stop event counting */ + val = readl(lpddrc_pmu->base + LPDDRC_PERF_CTRL); + val &= ~LPDDRC_V1_PERF_CTRL_EN; + writel(val, lpddrc_pmu->base + LPDDRC_PERF_CTRL); +} + +static void hisi_lpddrc_pmu_v1_enable_counter(struct hisi_pmu *lpddrc_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + /* Set counter index(event code) in LPDDRC_EVENT_CTRL register */ + val = readl(lpddrc_pmu->base + LPDDRC_EVENT_CTRL); + val |= (1 << GET_LPDDRC_EVENTID(hwc)); + writel(val, lpddrc_pmu->base + LPDDRC_EVENT_CTRL); +} + +static void hisi_lpddrc_pmu_v1_disable_counter(struct hisi_pmu *lpddrc_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + /* Clear counter index(event code) in LPDDRC_EVENT_CTRL register */ + val = readl(lpddrc_pmu->base + LPDDRC_EVENT_CTRL); + val &= ~(1 << GET_LPDDRC_EVENTID(hwc)); + writel(val, lpddrc_pmu->base + LPDDRC_EVENT_CTRL); +} + +static int hisi_lpddrc_pmu_v1_get_event_idx(struct perf_event *event) +{ + struct hisi_pmu *lpddrc_pmu = to_hisi_pmu(event->pmu); + unsigned long *used_mask = lpddrc_pmu->pmu_events.used_mask; + struct hw_perf_event *hwc = &event->hw; + /* For LPDDRC PMU, we use event code as counter index */ + int idx = GET_LPDDRC_EVENTID(hwc); + + if (test_bit(idx, used_mask)) + return -EAGAIN; + + set_bit(idx, used_mask); + + return idx; +} + +static void hisi_lpddrc_pmu_v1_enable_counter_int(struct hisi_pmu *lpddrc_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + /* Write 0 to enable interrupt */ + val = readl(lpddrc_pmu->base + LPDDRC_INT_MASK); + val &= ~(1 << hwc->idx); + writel(val, lpddrc_pmu->base + LPDDRC_INT_MASK); +} + +static void hisi_lpddrc_pmu_v1_disable_counter_int(struct hisi_pmu *lpddrc_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + /* Write 1 to mask interrupt */ + val = readl(lpddrc_pmu->base + LPDDRC_INT_MASK); + val |= 1 << hwc->idx; + writel(val, lpddrc_pmu->base + LPDDRC_INT_MASK); +} + +static u32 hisi_lpddrc_pmu_v1_get_int_status(struct hisi_pmu *lpddrc_pmu) +{ + return readl(lpddrc_pmu->base + LPDDRC_INT_STATUS); +} + +static void hisi_lpddrc_pmu_v1_clear_int_status(struct hisi_pmu *lpddrc_pmu, + int idx) +{ + writel(1 << idx, lpddrc_pmu->base + LPDDRC_INT_CLEAR); +} + +static const struct acpi_device_id hisi_lpddrc_pmu_acpi_match[] = { + {} +}; +MODULE_DEVICE_TABLE(acpi, hisi_lpddrc_pmu_acpi_match); + +static const struct of_device_id lpddrc_of_match[] = { + { .compatible = "hisilicon,lpddrc-pmu", }, + {}, +}; + +static int hisi_lpddrc_pmu_init_data(struct platform_device *pdev, + struct hisi_pmu *lpddrc_pmu) +{ + /* + * Use the SCCL_ID and LPDDRC channel ID to identify the + * LPDDRC PMU, while SCCL_ID is in MPIDR[aff2]. + */ + if (device_property_read_u32(&pdev->dev, "hisilicon,ch-id", + &lpddrc_pmu->index_id)) { + dev_err(&pdev->dev, "Can not read lpddrc channel-id!\n"); + return -EINVAL; + } + + if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", + &lpddrc_pmu->sccl_id)) { + dev_err(&pdev->dev, "Can not read lpddrc sccl-id!\n"); + return -EINVAL; + } + /* LPDDRC PMUs only share the same SCCL */ + lpddrc_pmu->ccl_id = -1; + + lpddrc_pmu->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(lpddrc_pmu->base)) { + dev_err(&pdev->dev, "ioremap failed for lpddrc_pmu resource\n"); + return PTR_ERR(lpddrc_pmu->base); + } + + lpddrc_pmu->identifier = readl(lpddrc_pmu->base + LPDDRC_VERSION); + + return 0; +} + +static struct attribute *hisi_lpddrc_pmu_v1_format_attr[] = { + HISI_PMU_FORMAT_ATTR(event, "config:0-4"), + NULL, +}; + +static const struct attribute_group hisi_lpddrc_pmu_v1_format_group = { + .name = "format", + .attrs = hisi_lpddrc_pmu_v1_format_attr, +}; + +static struct attribute *hisi_lpddrc_pmu_v1_events_attr[] = { + HISI_PMU_EVENT_ATTR(flux_wr, 0x00), + HISI_PMU_EVENT_ATTR(flux_rd, 0x01), + HISI_PMU_EVENT_ATTR(flux_wcmd, 0x02), + HISI_PMU_EVENT_ATTR(flux_rcmd, 0x03), + HISI_PMU_EVENT_ATTR(pre_cmd, 0x04), + HISI_PMU_EVENT_ATTR(act_cmd, 0x05), + HISI_PMU_EVENT_ATTR(rnk_chg, 0x06), + HISI_PMU_EVENT_ATTR(rw_chg, 0x07), + NULL, +}; + +static const struct attribute_group hisi_lpddrc_pmu_v1_events_group = { + .name = "events", + .attrs = hisi_lpddrc_pmu_v1_events_attr, +}; + +static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); + +static struct attribute *hisi_lpddrc_pmu_cpumask_attrs[] = { + &dev_attr_cpumask.attr, + NULL, +}; + +static const struct attribute_group hisi_lpddrc_pmu_cpumask_attr_group = { + .attrs = hisi_lpddrc_pmu_cpumask_attrs, +}; + +static struct device_attribute hisi_lpddrc_pmu_identifier_attr = + __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); + +static struct attribute *hisi_lpddrc_pmu_identifier_attrs[] = { + &hisi_lpddrc_pmu_identifier_attr.attr, + NULL +}; + +static struct attribute_group hisi_lpddrc_pmu_identifier_group = { + .attrs = hisi_lpddrc_pmu_identifier_attrs, +}; + +static const struct attribute_group *hisi_lpddrc_pmu_v1_attr_groups[] = { + &hisi_lpddrc_pmu_v1_format_group, + &hisi_lpddrc_pmu_v1_events_group, + &hisi_lpddrc_pmu_cpumask_attr_group, + &hisi_lpddrc_pmu_identifier_group, + NULL, +}; + +static const struct hisi_uncore_ops hisi_uncore_lpddrc_v1_ops = { + .write_evtype = hisi_lpddrc_pmu_write_evtype, + .get_event_idx = hisi_lpddrc_pmu_v1_get_event_idx, + .start_counters = hisi_lpddrc_pmu_v1_start_counters, + .stop_counters = hisi_lpddrc_pmu_v1_stop_counters, + .enable_counter = hisi_lpddrc_pmu_v1_enable_counter, + .disable_counter = hisi_lpddrc_pmu_v1_disable_counter, + .enable_counter_int = hisi_lpddrc_pmu_v1_enable_counter_int, + .disable_counter_int = hisi_lpddrc_pmu_v1_disable_counter_int, + .write_counter = hisi_lpddrc_pmu_v1_write_counter, + .read_counter = hisi_lpddrc_pmu_v1_read_counter, + .get_int_status = hisi_lpddrc_pmu_v1_get_int_status, + .clear_int_status = hisi_lpddrc_pmu_v1_clear_int_status, +}; + +static int hisi_lpddrc_pmu_dev_probe(struct platform_device *pdev, + struct hisi_pmu *lpddrc_pmu) +{ + int ret; + + ret = hisi_lpddrc_pmu_init_data(pdev, lpddrc_pmu); + if (ret) + return ret; + + ret = hisi_uncore_pmu_init_irq(lpddrc_pmu, pdev); + if (ret) + return ret; + + lpddrc_pmu->counter_bits = 32; + lpddrc_pmu->check_event = LPDDRC_V1_NR_EVENTS; + lpddrc_pmu->pmu_events.attr_groups = hisi_lpddrc_pmu_v1_attr_groups; + lpddrc_pmu->ops = &hisi_uncore_lpddrc_v1_ops; + + lpddrc_pmu->num_counters = LPDDRC_NR_COUNTERS; + lpddrc_pmu->dev = &pdev->dev; + lpddrc_pmu->on_cpu = -1; + + return 0; +} + +static int hisi_lpddrc_pmu_probe(struct platform_device *pdev) +{ + struct hisi_pmu *lpddrc_pmu; + char *name; + int ret; + + lpddrc_pmu = devm_kzalloc(&pdev->dev, sizeof(*lpddrc_pmu), GFP_KERNEL); + if (!lpddrc_pmu) + return -ENOMEM; + + platform_set_drvdata(pdev, lpddrc_pmu); + + ret = hisi_lpddrc_pmu_dev_probe(pdev, lpddrc_pmu); + if (ret) + return ret; + + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, + "hisi_sccl%u_lpddrc%u", lpddrc_pmu->sccl_id, + lpddrc_pmu->index_id); + + lpddrc_pmu->pmu = (struct pmu) { + .name = name, + .module = THIS_MODULE, + .task_ctx_nr = perf_invalid_context, + .event_init = hisi_uncore_pmu_event_init, + .pmu_enable = hisi_uncore_pmu_enable, + .pmu_disable = hisi_uncore_pmu_disable, + .add = hisi_uncore_pmu_add, + .del = hisi_uncore_pmu_del, + .start = hisi_uncore_pmu_start, + .stop = hisi_uncore_pmu_stop, + .read = hisi_uncore_pmu_read, + .attr_groups = lpddrc_pmu->pmu_events.attr_groups, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + }; + + /* Pick one core to use for cpumask attributes */ + cpumask_set_cpu(smp_processor_id(), &lpddrc_pmu->associated_cpus); + + lpddrc_pmu->on_cpu = cpumask_first(&lpddrc_pmu->associated_cpus); + if (lpddrc_pmu->on_cpu >= nr_cpu_ids) + return -EINVAL; + + ret = perf_pmu_register(&lpddrc_pmu->pmu, name, -1); + + return ret; +} + +static int hisi_lpddrc_pmu_remove(struct platform_device *pdev) +{ + struct hisi_pmu *lpddrc_pmu = platform_get_drvdata(pdev); + + perf_pmu_unregister(&lpddrc_pmu->pmu); + return 0; +} + +static struct platform_driver hisi_lpddrc_pmu_driver = { + .driver = { + .name = "hisi_lpddrc_pmu", + .acpi_match_table = ACPI_PTR(hisi_lpddrc_pmu_acpi_match), + .of_match_table = lpddrc_of_match, + .suppress_bind_attrs = true, + }, + .probe = hisi_lpddrc_pmu_probe, + .remove = hisi_lpddrc_pmu_remove, +}; + +static int __init hisi_lpddrc_pmu_module_init(void) +{ + int ret; + + ret = platform_driver_register(&hisi_lpddrc_pmu_driver); + + return ret; +} +module_init(hisi_lpddrc_pmu_module_init); + +static void __exit hisi_lpddrc_pmu_module_exit(void) +{ + platform_driver_unregister(&hisi_lpddrc_pmu_driver); +} +module_exit(hisi_lpddrc_pmu_module_exit); + +MODULE_DESCRIPTION("HiSilicon SoC LPDDRC uncore PMU driver"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Shaokun Zhang zhangshaokun@hisilicon.com"); +MODULE_AUTHOR("Anurup M anurup.m@huawei.com");
From: Chen Jun chenjun102@huawei.com
Signed-off-by: Chen Jun chenjun102@huawei.com --- drivers/perf/hisilicon/Kconfig | 2 +- drivers/perf/hisilicon/hisi_uncore_hha_pmu.c | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-)
diff --git a/drivers/perf/hisilicon/Kconfig b/drivers/perf/hisilicon/Kconfig index c5d1b7019fff..3c2ce1c5d428 100644 --- a/drivers/perf/hisilicon/Kconfig +++ b/drivers/perf/hisilicon/Kconfig @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only config HISI_PMU tristate "HiSilicon SoC PMU drivers" - depends on ARM64 && ACPI + depends on ARM64 help Support for HiSilicon SoC L3 Cache performance monitor, Hydra Home Agent performance monitor and DDR Controller performance monitor. diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c index 0e61da607d55..1234e3690e91 100644 --- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c @@ -292,9 +292,6 @@ MODULE_DEVICE_TABLE(acpi, hisi_hha_pmu_acpi_match); static int hisi_hha_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *hha_pmu) { - unsigned long long id; - acpi_status status; - /* * Use SCCL_ID and UID to identify the HHA PMU, while * SCCL_ID is in MPIDR[aff2]. @@ -311,6 +308,9 @@ static int hisi_hha_pmu_init_data(struct platform_device *pdev, */ if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id", &hha_pmu->index_id)) { +#ifdef CONFIG_ACPI + unsigned long long id; + acpi_status status; status = acpi_evaluate_integer(ACPI_HANDLE(&pdev->dev), "_UID", NULL, &id); if (ACPI_FAILURE(status)) { @@ -319,6 +319,9 @@ static int hisi_hha_pmu_init_data(struct platform_device *pdev, }
hha_pmu->index_id = id; +#else + return -EINVAL; +#endif } /* HHA PMUs only share the same SCCL */ hha_pmu->ccl_id = -1;
From: Chen Jun chenjun102@huawei.com
Signed-off-by: Chen Jun chenjun102@huawei.com --- drivers/perf/hisilicon/Makefile | 4 +- .../hisilicon/hisi_uncore_ddrc_pmu_mini.c | 404 +++++++++++++ .../perf/hisilicon/hisi_uncore_l3c_pmu_mini.c | 530 ++++++++++++++++++ drivers/perf/hisilicon/hisi_uncore_pmu.c | 47 +- drivers/perf/hisilicon/hisi_uncore_pmu.h | 13 +- include/linux/cpuhotplug.h | 2 + 6 files changed, 995 insertions(+), 5 deletions(-) create mode 100644 drivers/perf/hisilicon/hisi_uncore_ddrc_pmu_mini.c create mode 100644 drivers/perf/hisilicon/hisi_uncore_l3c_pmu_mini.c
diff --git a/drivers/perf/hisilicon/Makefile b/drivers/perf/hisilicon/Makefile index 22e384cdfd53..a914e2211dcd 100644 --- a/drivers/perf/hisilicon/Makefile +++ b/drivers/perf/hisilicon/Makefile @@ -3,4 +3,6 @@ obj-$(CONFIG_HISI_PMU) += hisi_uncore_pmu.o hisi_uncore_l3c_pmu.o \ hisi_uncore_hha_pmu.o hisi_uncore_ddrc_pmu.o hisi_uncore_sllc_pmu.o \ hisi_uncore_pa_pmu.o \ hisi_uncore_l3t_pmu.o \ - hisi_uncore_lpddrc_pmu.o + hisi_uncore_lpddrc_pmu.o \ + hisi_uncore_ddrc_pmu_mini.o \ + hisi_uncore_l3c_pmu_mini.o diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu_mini.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu_mini.c new file mode 100644 index 000000000000..41f705a22ac8 --- /dev/null +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu_mini.c @@ -0,0 +1,404 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * HiSilicon SoC DDRC uncore Hardware event counters support + * + * Copyright (C) 2017 Hisilicon Limited + * Author: Shaokun Zhang zhangshaokun@hisilicon.com + * Anurup M anurup.m@huawei.com + * + * This code is based on the uncore PMUs like arm-cci and arm-ccn. + */ +#include <linux/acpi.h> +#include <linux/bug.h> +#include <linux/cpuhotplug.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/list.h> +#include <linux/smp.h> + +#include "hisi_uncore_pmu.h" + +/* DDRC register definition in v1 */ +#define DDRC_PERF_CTRL 0x4010 +#define DDRC_FLUX_WR 0x4380 +#define DDRC_FLUX_RD 0x4384 +#define DDRC_FLUX_WCMD 0x4388 +#define DDRC_FLUX_RCMD 0x438c +#define DDRC_PRE_CMD 0x43c0 +#define DDRC_ACT_CMD 0x43c4 +#define DDRC_RNK_CHG 0x43cc +#define DDRC_RW_CHG 0x43d0 +#define DDRC_EVENT_CTRL 0x46C0 +#define DDRC_INT_MASK 0x46c8 +#define DDRC_INT_STATUS 0x46cc +#define DDRC_INT_CLEAR 0x46d0 + +/* DDRC has 8-counters */ +#define DDRC_NR_COUNTERS 0x8 +#define DDRC_PERF_CTRL_EN 0x2 +#define DDRC_NR_EVENTS 0x7 + +/* + * For PMU v1, there are eight-events and every event has been mapped + * to fixed-purpose counters which register offset is not consistent. + * Therefore there is no write event type and we assume that event + * code (0 to 7) is equal to counter index in PMU driver. + */ +#define GET_DDRC_EVENTID(hwc) (hwc->config_base & 0x7) + +static const u32 ddrc_reg_off[] = { + DDRC_FLUX_WR, DDRC_FLUX_RD, DDRC_FLUX_WCMD, DDRC_FLUX_RCMD, + DDRC_PRE_CMD, DDRC_ACT_CMD, DDRC_RNK_CHG, DDRC_RW_CHG +}; + +/* + * Select the counter register offset using the counter index. + * In PMU v1, there are no programmable counter, the count + * is read form the statistics counter register itself. + */ +static u32 hisi_ddrc_pmu_mini_get_counter_offset(int cntr_idx) +{ + return ddrc_reg_off[cntr_idx]; +} + +static u64 hisi_ddrc_pmu_mini_read_counter(struct hisi_pmu *ddrc_pmu_mini, + struct hw_perf_event *hwc) +{ + return readl(ddrc_pmu_mini->base + + hisi_ddrc_pmu_mini_get_counter_offset(hwc->idx)); +} + +static void hisi_ddrc_pmu_mini_write_counter(struct hisi_pmu *ddrc_pmu_mini, + struct hw_perf_event *hwc, u64 val) +{ + writel((u32)val, + ddrc_pmu_mini->base + hisi_ddrc_pmu_mini_get_counter_offset(hwc->idx)); +} + +static void hisi_ddrc_pmu_mini_write_evtype(struct hisi_pmu *hha_pmu, int idx, + u32 type) +{ +} + +static void hisi_ddrc_pmu_mini_start_counters(struct hisi_pmu *ddrc_pmu_mini) +{ + u32 val; + + /* Set perf_enable in DDRC_PERF_CTRL to start event counting */ + val = readl(ddrc_pmu_mini->base + DDRC_PERF_CTRL); + val |= DDRC_PERF_CTRL_EN; + writel(val, ddrc_pmu_mini->base + DDRC_PERF_CTRL); +} + +static void hisi_ddrc_pmu_mini_stop_counters(struct hisi_pmu *ddrc_pmu_mini) +{ + u32 val; + + /* Clear perf_enable in DDRC_PERF_CTRL to stop event counting */ + val = readl(ddrc_pmu_mini->base + DDRC_PERF_CTRL); + val &= ~DDRC_PERF_CTRL_EN; + writel(val, ddrc_pmu_mini->base + DDRC_PERF_CTRL); +} + +static void hisi_ddrc_pmu_mini_enable_counter(struct hisi_pmu *ddrc_pmu_mini, + struct hw_perf_event *hwc) +{ + u32 val; + + /* Set counter index(event code) in DDRC_EVENT_CTRL register */ + val = readl(ddrc_pmu_mini->base + DDRC_EVENT_CTRL); + val |= (1 << GET_DDRC_EVENTID(hwc)); + writel(val, ddrc_pmu_mini->base + DDRC_EVENT_CTRL); +} + +static void hisi_ddrc_pmu_mini_disable_counter(struct hisi_pmu *ddrc_pmu_mini, + struct hw_perf_event *hwc) +{ + u32 val; + + /* Clear counter index(event code) in DDRC_EVENT_CTRL register */ + val = readl(ddrc_pmu_mini->base + DDRC_EVENT_CTRL); + val &= ~(1 << GET_DDRC_EVENTID(hwc)); + writel(val, ddrc_pmu_mini->base + DDRC_EVENT_CTRL); +} + +static int hisi_ddrc_pmu_mini_get_event_idx(struct perf_event *event) +{ + struct hisi_pmu *ddrc_pmu_mini = to_hisi_pmu(event->pmu); + unsigned long *used_mask = ddrc_pmu_mini->pmu_events.used_mask; + struct hw_perf_event *hwc = &event->hw; + /* For DDRC PMU, we use event code as counter index */ + int idx = GET_DDRC_EVENTID(hwc); + + if (test_bit(idx, used_mask)) + return -EAGAIN; + + set_bit(idx, used_mask); + + return idx; +} + +static void hisi_ddrc_pmu_mini_enable_counter_int(struct hisi_pmu *ddrc_pmu_mini, + struct hw_perf_event *hwc) +{ + u32 val; + + /* Write 0 to enable interrupt */ + val = readl(ddrc_pmu_mini->base + DDRC_INT_MASK); + val &= ~(1 << hwc->idx); + writel(val, ddrc_pmu_mini->base + DDRC_INT_MASK); +} + +static void hisi_ddrc_pmu_mini_disable_counter_int(struct hisi_pmu *ddrc_pmu_mini, + struct hw_perf_event *hwc) +{ + u32 val; + + /* Write 1 to mask interrupt */ + val = readl(ddrc_pmu_mini->base + DDRC_INT_MASK); + val |= 1 << hwc->idx; + writel(val, ddrc_pmu_mini->base + DDRC_INT_MASK); +} + +static u32 hisi_ddrc_pmu_mini_get_int_status(struct hisi_pmu *ddrc_pmu_mini) +{ + return readl(ddrc_pmu_mini->base + DDRC_INT_STATUS); +} + +static void hisi_ddrc_pmu_mini_clear_int_status(struct hisi_pmu *ddrc_pmu_mini, + int idx) +{ + writel(1 << idx, ddrc_pmu_mini->base + DDRC_INT_CLEAR); +} + +static const struct acpi_device_id hisi_ddrc_pmu_mini_acpi_match[] = { + {} +}; +MODULE_DEVICE_TABLE(acpi, hisi_ddrc_pmu_mini_acpi_match); + +static const struct of_device_id ddrc_of_match[] = { + { .compatible = "hisilicon,ddrc-pmu-mini", }, + {}, +}; +MODULE_DEVICE_TABLE(of, ddrc_of_match); + +static int hisi_ddrc_pmu_mini_init_data(struct platform_device *pdev, + struct hisi_pmu *ddrc_pmu_mini) +{ + /* + * Use the SCCL_ID and DDRC channel ID to identify the + * DDRC PMU, while SCCL_ID is in MPIDR[aff2]. + */ + if (device_property_read_u32(&pdev->dev, "hisilicon,ch-id", + &ddrc_pmu_mini->index_id)) { + dev_err(&pdev->dev, "Can not read ddrc channel-id!\n"); + return -EINVAL; + } + + if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", + &ddrc_pmu_mini->sccl_id)) { + dev_err(&pdev->dev, "Can not read ddrc sccl-id!\n"); + return -EINVAL; + } + /* DDRC PMUs only share the same SCCL */ + ddrc_pmu_mini->ccl_id = -1; + + ddrc_pmu_mini->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(ddrc_pmu_mini->base)) { + dev_err(&pdev->dev, "ioremap failed for ddrc_pmu_mini resource\n"); + return PTR_ERR(ddrc_pmu_mini->base); + } + + return 0; +} + +static struct attribute *hisi_ddrc_pmu_mini_format_attr[] = { + HISI_PMU_FORMAT_ATTR(event, "config:0-4"), + NULL, +}; + +static const struct attribute_group hisi_ddrc_pmu_mini_format_group = { + .name = "format", + .attrs = hisi_ddrc_pmu_mini_format_attr, +}; + +static struct attribute *hisi_ddrc_pmu_mini_events_attr[] = { + HISI_PMU_EVENT_ATTR(flux_write, 0x00), + HISI_PMU_EVENT_ATTR(flux_read, 0x01), + HISI_PMU_EVENT_ATTR(flux_write_cmd, 0x02), + HISI_PMU_EVENT_ATTR(flux_read_cmd, 0x03), + HISI_PMU_EVENT_ATTR(fluxid_write, 0x04), + HISI_PMU_EVENT_ATTR(fluxid_read, 0x05), + HISI_PMU_EVENT_ATTR(fluxid_write_cmd, 0x06), + HISI_PMU_EVENT_ATTR(fluxid_read_cmd, 0x07), + NULL, +}; + +static const struct attribute_group hisi_ddrc_pmu_mini_events_group = { + .name = "events", + .attrs = hisi_ddrc_pmu_mini_events_attr, +}; + +static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); + +static struct attribute *hisi_ddrc_pmu_mini_cpumask_attrs[] = { + &dev_attr_cpumask.attr, + NULL, +}; + +static const struct attribute_group hisi_ddrc_pmu_mini_cpumask_attr_group = { + .attrs = hisi_ddrc_pmu_mini_cpumask_attrs, +}; + +static const struct attribute_group *hisi_ddrc_pmu_mini_attr_groups[] = { + &hisi_ddrc_pmu_mini_format_group, + &hisi_ddrc_pmu_mini_events_group, + &hisi_ddrc_pmu_mini_cpumask_attr_group, + NULL, +}; + +static const struct hisi_uncore_ops hisi_uncore_ddrc_ops = { + .write_evtype = hisi_ddrc_pmu_mini_write_evtype, + .get_event_idx = hisi_ddrc_pmu_mini_get_event_idx, + .start_counters = hisi_ddrc_pmu_mini_start_counters, + .stop_counters = hisi_ddrc_pmu_mini_stop_counters, + .enable_counter = hisi_ddrc_pmu_mini_enable_counter, + .disable_counter = hisi_ddrc_pmu_mini_disable_counter, + .enable_counter_int = hisi_ddrc_pmu_mini_enable_counter_int, + .disable_counter_int = hisi_ddrc_pmu_mini_disable_counter_int, + .write_counter = hisi_ddrc_pmu_mini_write_counter, + .read_counter = hisi_ddrc_pmu_mini_read_counter, + .get_int_status = hisi_ddrc_pmu_mini_get_int_status, + .clear_int_status = hisi_ddrc_pmu_mini_clear_int_status, +}; + +static int hisi_ddrc_pmu_mini_dev_probe(struct platform_device *pdev, + struct hisi_pmu *ddrc_pmu_mini) +{ + int ret; + + ret = hisi_ddrc_pmu_mini_init_data(pdev, ddrc_pmu_mini); + if (ret) + return ret; + + ret = hisi_uncore_pmu_init_irq(ddrc_pmu_mini, pdev); + if (ret) + return ret; + + ddrc_pmu_mini->counter_bits = 32; + ddrc_pmu_mini->check_event = DDRC_NR_EVENTS; + ddrc_pmu_mini->pmu_events.attr_groups = hisi_ddrc_pmu_mini_attr_groups; + ddrc_pmu_mini->ops = &hisi_uncore_ddrc_ops; + ddrc_pmu_mini->num_counters = DDRC_NR_COUNTERS; + ddrc_pmu_mini->dev = &pdev->dev; + ddrc_pmu_mini->on_cpu = -1; + + return 0; +} + +static int hisi_ddrc_pmu_mini_probe(struct platform_device *pdev) +{ + struct hisi_pmu *ddrc_pmu_mini; + char *name; + int ret; + + ddrc_pmu_mini = devm_kzalloc(&pdev->dev, sizeof(*ddrc_pmu_mini), GFP_KERNEL); + if (!ddrc_pmu_mini) + return -ENOMEM; + + platform_set_drvdata(pdev, ddrc_pmu_mini); + + ret = hisi_ddrc_pmu_mini_dev_probe(pdev, ddrc_pmu_mini); + if (ret) + return ret; + + ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_HISI_DDRC_MINI_ONLINE, + &ddrc_pmu_mini->node); + if (ret) { + dev_err(&pdev->dev, "Error %d registering hotplug;\n", ret); + return ret; + } + + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_ddrc%u_%u", + ddrc_pmu_mini->index_id, ddrc_pmu_mini->sccl_id - 1); + + ddrc_pmu_mini->pmu = (struct pmu) { + .name = name, + .module = THIS_MODULE, + .task_ctx_nr = perf_invalid_context, + .event_init = hisi_uncore_pmu_event_init, + .pmu_enable = hisi_uncore_pmu_enable, + .pmu_disable = hisi_uncore_pmu_disable, + .add = hisi_uncore_pmu_add, + .del = hisi_uncore_pmu_del, + .start = hisi_uncore_pmu_start, + .stop = hisi_uncore_pmu_stop, + .read = hisi_uncore_pmu_read, + .attr_groups = ddrc_pmu_mini->pmu_events.attr_groups, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + }; + + ret = perf_pmu_register(&ddrc_pmu_mini->pmu, name, -1); + if (ret) { + dev_err(ddrc_pmu_mini->dev, "DDRC PMU register failed!\n"); + cpuhp_state_remove_instance_nocalls( + CPUHP_AP_PERF_ARM_HISI_DDRC_MINI_ONLINE, &ddrc_pmu_mini->node); + } + + return ret; +} + +static int hisi_ddrc_pmu_mini_remove(struct platform_device *pdev) +{ + struct hisi_pmu *ddrc_pmu_mini = platform_get_drvdata(pdev); + + perf_pmu_unregister(&ddrc_pmu_mini->pmu); + cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_DDRC_MINI_ONLINE, + &ddrc_pmu_mini->node); + return 0; +} + +static struct platform_driver hisi_ddrc_pmu_mini_driver = { + .driver = { + .name = "hisi_ddrc_pmu_mini", + .acpi_match_table = ACPI_PTR(hisi_ddrc_pmu_mini_acpi_match), + .of_match_table = ddrc_of_match, + .suppress_bind_attrs = true, + }, + .probe = hisi_ddrc_pmu_mini_probe, + .remove = hisi_ddrc_pmu_mini_remove, +}; + +static int __init hisi_ddrc_pmu_mini_module_init(void) +{ + int ret; + + ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_HISI_DDRC_MINI_ONLINE, + "AP_PERF_ARM_HISI_DDRC_MINI_ONLINE", + hisi_uncore_pmu_online_cpu, + hisi_uncore_pmu_offline_cpu); + if (ret) { + pr_err("DDRC PMU: setup hotplug, ret = %d\n", ret); + return ret; + } + + ret = platform_driver_register(&hisi_ddrc_pmu_mini_driver); + if (ret) + cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_DDRC_MINI_ONLINE); + + return ret; +} +module_init(hisi_ddrc_pmu_mini_module_init); + +static void __exit hisi_ddrc_pmu_mini_module_exit(void) +{ + platform_driver_unregister(&hisi_ddrc_pmu_mini_driver); + cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_DDRC_MINI_ONLINE); + +} +module_exit(hisi_ddrc_pmu_mini_module_exit); + +MODULE_DESCRIPTION("HiSilicon SoC DDRC uncore PMU mini driver"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Shaokun Zhang zhangshaokun@hisilicon.com"); +MODULE_AUTHOR("Anurup M anurup.m@huawei.com"); diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu_mini.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu_mini.c new file mode 100644 index 000000000000..d26ef9e6189c --- /dev/null +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu_mini.c @@ -0,0 +1,530 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * HiSilicon SoC L3C uncore Hardware event counters support + * + * Copyright (C) 2017 Hisilicon Limited + * Author: Anurup M anurup.m@huawei.com + * Shaokun Zhang zhangshaokun@hisilicon.com + * + * This code is based on the uncore PMUs like arm-cci and arm-ccn. + */ +#include <linux/acpi.h> +#include <linux/bug.h> +#include <linux/cpuhotplug.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/list.h> +#include <linux/smp.h> + +#include "hisi_uncore_pmu.h" + +/* + * ARMv8 HiSilicon L3C event types. + */ +enum armv8_hisi_l3c_event_types { + HISI_HWEVENT_L3C_READ_ALLOCATE = 0x0, + HISI_HWEVENT_L3C_WRITE_ALLOCATE = 0x01, + HISI_HWEVENT_L3C_READ_NOALLOCATE = 0x02, + HISI_HWEVENT_L3C_WRITE_NOALLOCATE = 0x03, + HISI_HWEVENT_L3C_READ_HIT = 0x04, + HISI_HWEVENT_L3C_WRITE_HIT = 0x05, + HISI_HWEVENT_L3C_DSID0 = 0x30, + HISI_HWEVENT_L3C_DSID1 = 0x31, + HISI_HWEVENT_L3C_DSID2 = 0x32, + HISI_HWEVENT_L3C_DSID3 = 0x33, + HISI_HWEVENT_L3C_DSID4 = 0x34, + HISI_HWEVENT_L3C_DSID5 = 0x35, + HISI_HWEVENT_L3C_DSID6 = 0x36, + HISI_HWEVENT_L3C_DSID7 = 0x37, + HISI_HWEVENT_L3C_EVENT_MAX = 0x41, +}; + +/* L3C register definition */ +#define L3C_PERF_CTRL 0x04 +#define L3C_INT_MASK 0x0800 +#define L3C_INT_STATUS 0x0808 +#define L3C_INT_CLEAR 0x080c +#define L3C_EVENT_CTRL 0x1c00 +#define L3C_EVENT_TYPE0 0x140 +#define L3C_CNTR0_LOWER 0x170 + +/* L3C has 8-counters */ +#define L3C_NR_COUNTERS 0x8 + +#define L3C_PERF_CTRL_EN 0x1000000 +#define L3C_EVTYPE_NONE 0xff + +#define L3C_NR_EVENTS 0x59 + +#define L3C_DSID_PART_REG(idx) (0x40 + ((idx) / 4) * 0x4) +#define LLC_AA_CPU_PART(n) (0x00040 + (n) * 0x4) +#define LLC_AA_PART_MODE (0x00090) + +#define L3C_HRTIMER_INTERVAL (10LL * MSEC_PER_SEC) +#define HISI_MAX_PERIOD(nr) (BIT_ULL(nr) - 1) + +static u32 hisi_l3c_pmu_mini_get_counter_offset(int cntr_idx) +{ + return (L3C_CNTR0_LOWER + (cntr_idx * 4)); +} + +static u64 hisi_l3c_pmu_mini_read_counter(struct hisi_pmu *l3c_pmu_mini, + struct hw_perf_event *hwc) +{ + return readl(l3c_pmu_mini->base + hisi_l3c_pmu_mini_get_counter_offset(hwc->idx)); +} + +static void hisi_l3c_pmu_mini_write_counter(struct hisi_pmu *l3c_pmu_mini, + struct hw_perf_event *hwc, u64 val) +{ + writel(val, l3c_pmu_mini->base + hisi_l3c_pmu_mini_get_counter_offset(hwc->idx)); +} + +static bool hisi_l3c_pmu_mini_cal_dsid_delta(struct perf_event *event) +{ + int event_id = event->hw.config_base & 0xff; + return event_id >= HISI_HWEVENT_L3C_DSID0 && event_id <= HISI_HWEVENT_L3C_DSID7; +} + +static void hisi_l3c_pmu_mini_write_evtype(struct hisi_pmu *l3c_pmu_mini, int idx, + u32 type) +{ + u32 reg, reg_idx, shift, val; + + /* + * Select the appropriate event select register(L3C_EVENT_TYPE0/1). + * There are 2 event select registers for the 8 hardware counters. + * Event code is 8-bits and for the former 4 hardware counters, + * L3C_EVENT_TYPE0 is chosen. For the latter 4 hardware counters, + * L3C_EVENT_TYPE1 is chosen. + */ + reg = L3C_EVENT_TYPE0 + (idx / 4) * 4; + reg_idx = idx % 4; + shift = 8 * reg_idx; + + /* Write event code to L3C_EVENT_TYPEx Register */ + val = readl(l3c_pmu_mini->base + reg); + val &= ~(L3C_EVTYPE_NONE << shift); + val |= (type << shift); + writel(val, l3c_pmu_mini->base + reg); + + writel(0x4, l3c_pmu_mini->aa_base + LLC_AA_PART_MODE); + writel(0xf10000, l3c_pmu_mini->aa_base + LLC_AA_CPU_PART(0)); + writel(0xf30000, l3c_pmu_mini->aa_base + LLC_AA_CPU_PART(1)); + writel(0xf50000, l3c_pmu_mini->aa_base + LLC_AA_CPU_PART(2)); + writel(0xf70000, l3c_pmu_mini->aa_base + LLC_AA_CPU_PART(3)); + writel(0xf90000, l3c_pmu_mini->aa_base + LLC_AA_CPU_PART(4)); + writel(0xfb0000, l3c_pmu_mini->aa_base + LLC_AA_CPU_PART(5)); + writel(0xfd0000, l3c_pmu_mini->aa_base + LLC_AA_CPU_PART(6)); + writel(0xff0000, l3c_pmu_mini->aa_base + LLC_AA_CPU_PART(7)); + writel(0xf7f5f3f1, l3c_pmu_mini->base + L3C_DSID_PART_REG(0)); + writel(0xfffdfbf9, l3c_pmu_mini->base + L3C_DSID_PART_REG(1)); +} + +static void hisi_l3c_pmu_mini_clear_evtype(struct hisi_pmu *l3c_pmu_mini, int idx) +{ + u32 reg, reg_idx, shift, val; + + /* + * Clear the event in L3C_EVENT_TYPEx Register + * Each byte in the 32 bit event select register is used to configure + * the event code. Each byte correspond to a counter register to use. + * Use (idx % 4) to select the byte to clear in event select register + * with the vale 0xff. + */ + + reg = L3C_EVENT_TYPE0 + (idx / 4) * 4; + reg_idx = idx % 4; + shift = 8 * reg_idx; + + /* Write event code to L3C_EVENT_TYPEx Register */ + val = readl(l3c_pmu_mini->base + reg); + val &= ~(L3C_EVTYPE_NONE << shift); + val |= (L3C_EVTYPE_NONE << shift); + writel(val, l3c_pmu_mini->base + reg); + + writel(0x0, l3c_pmu_mini->aa_base + LLC_AA_PART_MODE); + writel(0x0, l3c_pmu_mini->aa_base + LLC_AA_CPU_PART(0)); + writel(0x0, l3c_pmu_mini->aa_base + LLC_AA_CPU_PART(1)); + writel(0x0, l3c_pmu_mini->aa_base + LLC_AA_CPU_PART(2)); + writel(0x0, l3c_pmu_mini->aa_base + LLC_AA_CPU_PART(3)); + writel(0x0, l3c_pmu_mini->aa_base + LLC_AA_CPU_PART(4)); + writel(0x0, l3c_pmu_mini->aa_base + LLC_AA_CPU_PART(5)); + writel(0x0, l3c_pmu_mini->aa_base + LLC_AA_CPU_PART(6)); + writel(0x0, l3c_pmu_mini->aa_base + LLC_AA_CPU_PART(7)); + writel(0x0, l3c_pmu_mini->base + L3C_DSID_PART_REG(0)); + writel(0x0, l3c_pmu_mini->base + L3C_DSID_PART_REG(1)); +} + +static void hisi_l3c_pmu_mini_start_counters(struct hisi_pmu *l3c_pmu_mini) +{ + u32 val; + + /* + * Set perf_enable bit in L3C_PERF_CTRL register to start counting + * for all enabled counters. + */ + val = readl(l3c_pmu_mini->base + L3C_PERF_CTRL); + val |= L3C_PERF_CTRL_EN; + writel(val, l3c_pmu_mini->base + L3C_PERF_CTRL); +} + +static void hisi_l3c_pmu_mini_stop_counters(struct hisi_pmu *l3c_pmu_mini) +{ + u32 val; + + /* + * Clear perf_enable bit in L3C_PERF_CTRL register to stop counting + * for all enabled counters. + */ + val = readl(l3c_pmu_mini->base + L3C_PERF_CTRL); + val &= ~(L3C_PERF_CTRL_EN); + writel(val, l3c_pmu_mini->base + L3C_PERF_CTRL); +} + +static void hisi_l3c_pmu_mini_enable_counter(struct hisi_pmu *l3c_pmu_mini, + struct hw_perf_event *hwc) +{ + u32 val; + + /* Enable counter index in L3C_EVENT_CTRL register */ + val = readl(l3c_pmu_mini->base + L3C_EVENT_CTRL); + val |= (1 << hwc->idx); + writel(val, l3c_pmu_mini->base + L3C_EVENT_CTRL); +} + +static void hisi_l3c_pmu_mini_disable_counter(struct hisi_pmu *l3c_pmu_mini, + struct hw_perf_event *hwc) +{ + u32 val; + + /* Clear counter index in L3C_EVENT_CTRL register */ + val = readl(l3c_pmu_mini->base + L3C_EVENT_CTRL); + val &= ~(1 << hwc->idx); + writel(val, l3c_pmu_mini->base + L3C_EVENT_CTRL); +} + +static void hisi_l3c_pmu_mini_enable_counter_int(struct hisi_pmu *l3c_pmu_mini, + struct hw_perf_event *hwc) +{ + u32 val; + + val = readl(l3c_pmu_mini->base + L3C_INT_MASK); + /* Write 0 to enable interrupt */ + val &= ~(1 << hwc->idx); + writel(val, l3c_pmu_mini->base + L3C_INT_MASK); +} + +static void hisi_l3c_pmu_mini_disable_counter_int(struct hisi_pmu *l3c_pmu_mini, + struct hw_perf_event *hwc) +{ + u32 val; + + val = readl(l3c_pmu_mini->base + L3C_INT_MASK); + /* Write 1 to mask interrupt */ + val |= (1 << hwc->idx); + writel(val, l3c_pmu_mini->base + L3C_INT_MASK); +} + +static u32 hisi_l3c_pmu_mini_get_int_status(struct hisi_pmu *l3c_pmu_mini) +{ + return readl(l3c_pmu_mini->base + L3C_INT_STATUS); +} + +static void hisi_l3c_pmu_mini_clear_int_status(struct hisi_pmu *l3c_pmu_mini, int idx) +{ + writel(1 << idx, l3c_pmu_mini->base + L3C_INT_CLEAR); +} + +static const struct acpi_device_id hisi_l3c_pmu_mini_acpi_match[] = { + {} +}; +MODULE_DEVICE_TABLE(acpi, hisi_l3c_pmu_mini_acpi_match); + +static const struct of_device_id l3c_of_match[] = { + { .compatible = "hisilicon,l3c-pmu-mini", }, + {}, +}; +MODULE_DEVICE_TABLE(of, l3c_of_match); + +static int hisi_l3c_pmu_mini_init_data(struct platform_device *pdev, + struct hisi_pmu *l3c_pmu_mini) +{ + /* + * Use the SCCL_ID and CCL_ID to identify the L3C PMU, while + * SCCL_ID is in MPIDR[aff2] and CCL_ID is in MPIDR[aff1]. + */ + if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", + &l3c_pmu_mini->sccl_id)) { + dev_err(&pdev->dev, "Can not read l3c sccl-id!\n"); + return -EINVAL; + } + + if (device_property_read_u32(&pdev->dev, "hisilicon,ccl-id", + &l3c_pmu_mini->ccl_id)) { + dev_err(&pdev->dev, "Can not read l3c ccl-id!\n"); + return -EINVAL; + } + + l3c_pmu_mini->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(l3c_pmu_mini->base)) { + dev_err(&pdev->dev, "ioremap failed for l3c_pmu resource\n"); + return PTR_ERR(l3c_pmu_mini->base); + } + + return 0; +} + +static struct attribute *hisi_l3c_pmu_mini_format_attr[] = { + HISI_PMU_FORMAT_ATTR(event, "config:0-7"), + NULL, +}; + +static const struct attribute_group hisi_l3c_pmu_mini_format_group = { + .name = "format", + .attrs = hisi_l3c_pmu_mini_format_attr, +}; + +static struct attribute *hisi_l3c_pmu_mini_events_attr[] = { + HISI_PMU_EVENT_ATTR(read_allocate, 0x00), + HISI_PMU_EVENT_ATTR(write_allocate, 0x01), + HISI_PMU_EVENT_ATTR(read_noallocate, 0x02), + HISI_PMU_EVENT_ATTR(write_noallocate, 0x03), + HISI_PMU_EVENT_ATTR(read_hit, 0x04), + HISI_PMU_EVENT_ATTR(write_hit, 0x05), + HISI_PMU_EVENT_ATTR(dsid0, 0x30), + HISI_PMU_EVENT_ATTR(dsid1, 0x31), + HISI_PMU_EVENT_ATTR(dsid2, 0x32), + HISI_PMU_EVENT_ATTR(dsid3, 0x33), + HISI_PMU_EVENT_ATTR(dsid4, 0x34), + HISI_PMU_EVENT_ATTR(dsid5, 0x35), + HISI_PMU_EVENT_ATTR(dsid6, 0x36), + HISI_PMU_EVENT_ATTR(dsid7, 0x37), + NULL, +}; + +static const struct attribute_group hisi_l3c_pmu_mini_events_group = { + .name = "events", + .attrs = hisi_l3c_pmu_mini_events_attr, +}; + +static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); + +static struct attribute *hisi_l3c_pmu_mini_cpumask_attrs[] = { + &dev_attr_cpumask.attr, + NULL, +}; + +static const struct attribute_group hisi_l3c_pmu_mini_cpumask_attr_group = { + .attrs = hisi_l3c_pmu_mini_cpumask_attrs, +}; + +static const struct attribute_group *hisi_l3c_pmu_mini_attr_groups[] = { + &hisi_l3c_pmu_mini_format_group, + &hisi_l3c_pmu_mini_events_group, + &hisi_l3c_pmu_mini_cpumask_attr_group, + NULL, +}; + +static enum hrtimer_restart hisi_hrtimer_callback_llc(struct hrtimer *hrtimer) +{ + struct hisi_pmu *hisi_pmu = container_of(hrtimer, + struct hisi_pmu, hrtimer); + struct perf_event *event = NULL; + unsigned long flags; + int event_id; + + /* Return if no active events */ + if (!hisi_pmu->num_active) + return HRTIMER_NORESTART; + + local_irq_save(flags); + + /* Update event count for each active event */ + list_for_each_entry(event, &hisi_pmu->active_list, active_entry) { + /* Read hardware counter and update the Perf event counter */ + /* disd counts show the amount of cache, no increase */ + event_id = event->hw.config & 0xff; + if (event_id < HISI_HWEVENT_L3C_DSID0) + hisi_uncore_pmu_event_update(event); + } + + local_irq_restore(flags); + hrtimer_forward_now(hrtimer, ms_to_ktime(hisi_pmu->hrt_duration)); + return HRTIMER_RESTART; +} + +void hisi_hrtimer_init_llc(struct hisi_pmu *hisi_pmu_mini, u64 timer_interval) +{ + /* hr timer clock initalization */ + hrtimer_init(&hisi_pmu_mini->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hisi_pmu_mini->hrtimer.function = &hisi_hrtimer_callback_llc; + hisi_pmu_mini->hrt_duration = timer_interval; +} + +static const struct hisi_uncore_ops hisi_uncore_l3c_ops = { + .write_evtype = hisi_l3c_pmu_mini_write_evtype, + .clear_evtype = hisi_l3c_pmu_mini_clear_evtype, + .get_event_idx = hisi_uncore_pmu_get_event_idx, + .start_counters = hisi_l3c_pmu_mini_start_counters, + .stop_counters = hisi_l3c_pmu_mini_stop_counters, + .enable_counter = hisi_l3c_pmu_mini_enable_counter, + .disable_counter = hisi_l3c_pmu_mini_disable_counter, + .enable_counter_int = hisi_l3c_pmu_mini_enable_counter_int, + .disable_counter_int = hisi_l3c_pmu_mini_disable_counter_int, + .write_counter = hisi_l3c_pmu_mini_write_counter, + .read_counter = hisi_l3c_pmu_mini_read_counter, + .get_int_status = hisi_l3c_pmu_mini_get_int_status, + .clear_int_status = hisi_l3c_pmu_mini_clear_int_status, + .start_hrtimer = hisi_hrtimer_start, + .stop_hrtimer = hisi_hrtimer_stop, + .cal_dsid_delta = hisi_l3c_pmu_mini_cal_dsid_delta, +}; + +/* Initialize hrtimer to poll for avoiding counter overflow */ +static void hisi_l3c_pmu_mini_hrtimer_init(struct hisi_pmu *l3c_pmu_mini) +{ + INIT_LIST_HEAD(&l3c_pmu_mini->active_list); + hisi_hrtimer_init_llc(l3c_pmu_mini, L3C_HRTIMER_INTERVAL); +} + +static int hisi_l3c_pmu_mini_dev_probe(struct platform_device *pdev, + struct hisi_pmu *l3c_pmu_mini) +{ + int ret; + + ret = hisi_l3c_pmu_mini_init_data(pdev, l3c_pmu_mini); + if (ret) + return ret; + + ret = hisi_uncore_pmu_init_irq(l3c_pmu_mini, pdev); + if (ret) + return ret; + + l3c_pmu_mini->counter_bits = 32; + l3c_pmu_mini->check_event = L3C_NR_EVENTS; + l3c_pmu_mini->pmu_events.attr_groups = hisi_l3c_pmu_mini_attr_groups; + l3c_pmu_mini->num_counters = L3C_NR_COUNTERS; + l3c_pmu_mini->ops = &hisi_uncore_l3c_ops; + l3c_pmu_mini->dev = &pdev->dev; + l3c_pmu_mini->on_cpu = -1; + + l3c_pmu_mini->num_active = 0; + l3c_pmu_mini->aa_base = ioremap(0x100120000, 0x10000); + /* + * Use poll method to avoid counter overflow as overflow IRQ + * is not supported in v1,v2 hardware. + */ + hisi_l3c_pmu_mini_hrtimer_init(l3c_pmu_mini); + + return 0; +} + +static int hisi_l3c_pmu_mini_probe(struct platform_device *pdev) +{ + struct hisi_pmu *l3c_pmu_mini; + char *name; + int ret; + + l3c_pmu_mini = devm_kzalloc(&pdev->dev, sizeof(*l3c_pmu_mini), GFP_KERNEL); + if (!l3c_pmu_mini) + return -ENOMEM; + + platform_set_drvdata(pdev, l3c_pmu_mini); + + ret = hisi_l3c_pmu_mini_dev_probe(pdev, l3c_pmu_mini); + if (ret) + return ret; + + ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_HISI_L3C_MINI_ONLINE, + &l3c_pmu_mini->node); + if (ret) { + dev_err(&pdev->dev, "Error %d registering hotplug\n", ret); + return ret; + } + + /* + * CCL_ID is used to identify the L3C in the same SCCL which was + * used _UID by mistake. + */ + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_l3c%u_%u", + l3c_pmu_mini->index_id, l3c_pmu_mini->sccl_id); + l3c_pmu_mini->pmu = (struct pmu) { + .name = name, + .module = THIS_MODULE, + .task_ctx_nr = perf_invalid_context, + .event_init = hisi_uncore_pmu_event_init, + .pmu_enable = hisi_uncore_pmu_enable, + .pmu_disable = hisi_uncore_pmu_disable, + .add = hisi_uncore_pmu_add, + .del = hisi_uncore_pmu_del, + .start = hisi_uncore_pmu_start, + .stop = hisi_uncore_pmu_stop, + .read = hisi_uncore_pmu_read, + .attr_groups = l3c_pmu_mini->pmu_events.attr_groups, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + }; + + ret = perf_pmu_register(&l3c_pmu_mini->pmu, name, -1); + if (ret) { + dev_err(l3c_pmu_mini->dev, "L3C PMU register failed!\n"); + cpuhp_state_remove_instance_nocalls( + CPUHP_AP_PERF_ARM_HISI_L3C_MINI_ONLINE, &l3c_pmu_mini->node); + } + + return ret; +} + +static int hisi_l3c_pmu_mini_remove(struct platform_device *pdev) +{ + struct hisi_pmu *l3c_pmu_mini = platform_get_drvdata(pdev); + + perf_pmu_unregister(&l3c_pmu_mini->pmu); + cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_L3C_MINI_ONLINE, + &l3c_pmu_mini->node); + return 0; +} + +static struct platform_driver hisi_l3c_pmu_mini_driver = { + .driver = { + .name = "hisi_l3c_pmu_mini", + .acpi_match_table = ACPI_PTR(hisi_l3c_pmu_mini_acpi_match), + .of_match_table = of_match_ptr(l3c_of_match), + .suppress_bind_attrs = true, + }, + .probe = hisi_l3c_pmu_mini_probe, + .remove = hisi_l3c_pmu_mini_remove, +}; + +static int __init hisi_l3c_pmu_mini_module_init(void) +{ + int ret; + + ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_HISI_L3C_MINI_ONLINE, + "AP_PERF_ARM_HISI_L3C_MINI_ONLINE", + hisi_uncore_pmu_online_cpu, + hisi_uncore_pmu_offline_cpu); + if (ret) { + pr_err("L3C PMU: Error setup hotplug, ret = %d\n", ret); + return ret; + } + + ret = platform_driver_register(&hisi_l3c_pmu_mini_driver); + if (ret) + cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_L3C_MINI_ONLINE); + + return ret; +} +module_init(hisi_l3c_pmu_mini_module_init); + +static void __exit hisi_l3c_pmu_mini_module_exit(void) +{ + platform_driver_unregister(&hisi_l3c_pmu_mini_driver); + cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_L3C_MINI_ONLINE); +} +module_exit(hisi_l3c_pmu_mini_module_exit); + +MODULE_DESCRIPTION("HiSilicon SoC L3C uncore PMU mini driver"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Anurup M anurup.m@huawei.com"); +MODULE_AUTHOR("Shaokun Zhang zhangshaokun@hisilicon.com"); diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index 07f0c7015181..ab718a039b44 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -63,6 +63,18 @@ ssize_t hisi_cpumask_sysfs_show(struct device *dev, } EXPORT_SYMBOL_GPL(hisi_cpumask_sysfs_show);
+void hisi_hrtimer_start(struct hisi_pmu *hisi_pmu) +{ + hrtimer_start(&hisi_pmu->hrtimer, + ms_to_ktime(hisi_pmu->hrt_duration), + HRTIMER_MODE_REL_PINNED); +} + +void hisi_hrtimer_stop(struct hisi_pmu *hisi_pmu) +{ + hrtimer_cancel(&hisi_pmu->hrtimer); +} + static bool hisi_validate_event_group(struct perf_event *event) { struct perf_event *sibling, *leader = event->group_leader; @@ -265,12 +277,16 @@ static void hisi_uncore_pmu_disable_event(struct perf_event *event)
if (hisi_pmu->ops->disable_filter) hisi_pmu->ops->disable_filter(event); + + if (hisi_pmu->ops->clear_evtype != NULL) + hisi_pmu->ops->clear_evtype(hisi_pmu, hwc->idx); }
void hisi_uncore_pmu_set_event_period(struct perf_event *event) { struct hisi_pmu *hisi_pmu = to_hisi_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; + u64 val;
/* * The HiSilicon PMU counters support 32 bits or 48 bits, depending on @@ -279,7 +295,10 @@ void hisi_uncore_pmu_set_event_period(struct perf_event *event) * interrupt before another 2^(counter_bits - 1) events occur and the * counter overtakes its previous value. */ - u64 val = BIT_ULL(hisi_pmu->counter_bits - 1); + if (hisi_pmu->ops->cal_dsid_delta && hisi_pmu->ops->cal_dsid_delta(event)) + val = 0; + else + val = BIT_ULL(hisi_pmu->counter_bits - 1);
local64_set(&hwc->prev_count, val); /* Write start value to the hardware event counter */ @@ -302,8 +321,11 @@ void hisi_uncore_pmu_event_update(struct perf_event *event) /* * compute the delta */ - delta = (new_raw_count - prev_raw_count) & - HISI_MAX_PERIOD(hisi_pmu->counter_bits); + if (hisi_pmu->ops->cal_dsid_delta && hisi_pmu->ops->cal_dsid_delta(event)) + delta = new_raw_count; + else + delta = (new_raw_count - prev_raw_count) & + HISI_MAX_PERIOD(hisi_pmu->counter_bits); local64_add(delta, &event->count); } EXPORT_SYMBOL_GPL(hisi_uncore_pmu_event_update); @@ -326,6 +348,15 @@ void hisi_uncore_pmu_start(struct perf_event *event, int flags) hisi_pmu->ops->write_counter(hisi_pmu, hwc, prev_raw_count); }
+ /* Start hrtimer when the first event is started in this PMU */ + if (hisi_pmu->ops->start_hrtimer != NULL) { + hisi_pmu->num_active++; + list_add_tail(&event->active_entry, &hisi_pmu->active_list); + + if (hisi_pmu->num_active == 1) + hisi_pmu->ops->start_hrtimer(hisi_pmu); + } + hisi_uncore_pmu_enable_event(event); perf_event_update_userpage(event); } @@ -334,11 +365,21 @@ EXPORT_SYMBOL_GPL(hisi_uncore_pmu_start); void hisi_uncore_pmu_stop(struct perf_event *event, int flags) { struct hw_perf_event *hwc = &event->hw; + struct hisi_pmu *hisi_pmu = to_hisi_pmu(event->pmu);
hisi_uncore_pmu_disable_event(event); WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); hwc->state |= PERF_HES_STOPPED;
+ /* Stop hrtimer when the last event is stopped in this PMU */ + if (hisi_pmu->ops->stop_hrtimer != NULL) { + hisi_pmu->num_active--; + list_del(&event->active_entry); + + if (hisi_pmu->num_active == 0) + hisi_pmu->ops->stop_hrtimer(hisi_pmu); + } + if (hwc->state & PERF_HES_UPTODATE) return;
diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h index ea9d89bbc1ea..f9c17ef5b40c 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.h +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h @@ -47,6 +47,7 @@ struct hisi_pmu;
struct hisi_uncore_ops { void (*write_evtype)(struct hisi_pmu *, int, u32); + void (*clear_evtype)(struct hisi_pmu *, int); int (*get_event_idx)(struct perf_event *); u64 (*read_counter)(struct hisi_pmu *, struct hw_perf_event *); void (*write_counter)(struct hisi_pmu *, struct hw_perf_event *, u64); @@ -60,6 +61,9 @@ struct hisi_uncore_ops { void (*clear_int_status)(struct hisi_pmu *hisi_pmu, int idx); void (*enable_filter)(struct perf_event *event); void (*disable_filter)(struct perf_event *event); + void (*start_hrtimer)(struct hisi_pmu *); + void (*stop_hrtimer)(struct hisi_pmu *); + bool (*cal_dsid_delta)(struct perf_event *event); };
struct hisi_pmu_hwevents { @@ -79,16 +83,22 @@ struct hisi_pmu { int on_cpu; int irq; struct device *dev; + struct list_head active_list; /* Active events list */ + /* hrtimer to handle the counter overflow */ + struct hrtimer hrtimer; + u64 hrt_duration; /* hrtimer timeout */ struct hlist_node node; int sccl_id; int ccl_id; void __iomem *base; + void __iomem *aa_base; /* the ID of the PMU modules */ u32 index_id; /* For DDRC PMU v2: each DDRC has more than one DMC */ u32 sub_id; int num_counters; int counter_bits; + int num_active; /* check event code range */ int check_event; u32 identifier; @@ -113,7 +123,8 @@ ssize_t hisi_cpumask_sysfs_show(struct device *dev, struct device_attribute *attr, char *buf); int hisi_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *node); int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node); - +void hisi_hrtimer_start(struct hisi_pmu *hisi_pmu); +void hisi_hrtimer_stop(struct hisi_pmu *hisi_pmu); ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev, struct device_attribute *attr, char *page); diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index b98b9eb7d5f8..6ef2431c4aac 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -174,8 +174,10 @@ enum cpuhp_state { CPUHP_AP_PERF_ARM_CCI_ONLINE, CPUHP_AP_PERF_ARM_CCN_ONLINE, CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE, + CPUHP_AP_PERF_ARM_HISI_DDRC_MINI_ONLINE, CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE, CPUHP_AP_PERF_ARM_HISI_L3_ONLINE, + CPUHP_AP_PERF_ARM_HISI_L3C_MINI_ONLINE, CPUHP_AP_PERF_ARM_HISI_PA_ONLINE, CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE, CPUHP_AP_PERF_ARM_L2X0_ONLINE,
From: Baolin Wang baolin.wang@linux.alibaba.com
If we did not reserve extra CMA memory, the log buffer can be easily filled up by CMA failure warning when the devices calling dmam_alloc_coherent() to alloc DMA memory. Thus we can use pr_err_ratelimited() instead to reduce the duplicate CMA warning.
Link: https://lkml.kernel.org/r/ce2251ef49e1727a9a40531d1996660b05462bd2.161527982... Signed-off-by: Baolin Wang baolin.wang@linux.alibaba.com Reviewed-by: David Hildenbrand david@redhat.com Acked-by: Minchan Kim minchan@kernel.org Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org --- mm/cma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/mm/cma.c b/mm/cma.c index 9361ecaf52be..09f3b1e264c0 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -486,8 +486,8 @@ struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align, }
if (ret && !no_warn) { - pr_err("%s: alloc failed, req-size: %zu pages, ret: %d\n", - __func__, count, ret); + pr_err_ratelimited("%s: alloc failed, req-size: %zu pages, ret: %d\n", + __func__, count, ret); cma_debug_show_areas(cma); }
From: Zhou Guanghui zhouguanghui1@huawei.com
Offering:hulk CVE: Reference: Type:feature DTS/AR:NA reason:bugfix for hugetbl remap --- mm/hugetlb.c | 56 ++++++++++++++++++++++++++-------------------------- 1 file changed, 28 insertions(+), 28 deletions(-)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 2b938cb7347f..de47c996ee67 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -6190,39 +6190,43 @@ struct page *hugetlb_alloc_hugepage(int nid, int flag) } EXPORT_SYMBOL_GPL(hugetlb_alloc_hugepage);
+static pte_t *hugetlb_huge_pte_alloc(struct mm_struct *mm, unsigned long addr, + unsigned long size) +{ + pgd_t *pgdp; + p4d_t *p4dp; + pud_t *pudp; + pte_t *ptep = NULL; + + pgdp = pgd_offset(mm, addr); + p4dp = p4d_offset(pgdp, addr); + pudp = pud_alloc(mm, p4dp, addr); + if (!pudp) + return NULL; + + ptep = (pte_t *)pmd_alloc(mm, pudp, addr); + + return ptep; +} + static int __hugetlb_insert_hugepage(struct mm_struct *mm, unsigned long addr, - pgprot_t prot, unsigned long pfn, bool special) + pgprot_t prot, unsigned long pfn) { int ret = 0; pte_t *ptep, entry; struct hstate *h; - struct vm_area_struct *vma; - struct address_space *mapping; spinlock_t *ptl;
h = size_to_hstate(PMD_SIZE); if (!h) return -EINVAL;
- if (!IS_ALIGNED(addr, PMD_SIZE)) - return -EINVAL; - - vma = find_vma(mm, addr); - if (!vma || !range_in_vma(vma, addr, addr + PMD_SIZE)) - return -EINVAL; - - mapping = vma->vm_file->f_mapping; - i_mmap_lock_read(mapping); - ptep = huge_pte_alloc(mm, addr, huge_page_size(h)); - if (!ptep) { - ret = -ENXIO; - goto out_unlock; - } + ptep = hugetlb_huge_pte_alloc(mm, addr, huge_page_size(h)); + if (!ptep) + return -ENXIO;
- if (WARN_ON(ptep && !pte_none(*ptep) && !pmd_huge(*(pmd_t *)ptep))) { - ret = -ENXIO; - goto out_unlock; - } + if (WARN_ON(ptep && !pte_none(*ptep) && !pmd_huge(*(pmd_t *)ptep))) + return -ENXIO;
entry = pfn_pte(pfn, prot); entry = huge_pte_mkdirty(entry); @@ -6230,31 +6234,27 @@ static int __hugetlb_insert_hugepage(struct mm_struct *mm, unsigned long addr, entry = huge_pte_mkwrite(entry); entry = pte_mkyoung(entry); entry = pte_mkhuge(entry); - if (special) - entry = pte_mkspecial(entry); + entry = pte_mkspecial(entry);
ptl = huge_pte_lockptr(h, mm, ptep); spin_lock(ptl); set_huge_pte_at(mm, addr, ptep, entry); spin_unlock(ptl);
-out_unlock: - i_mmap_unlock_read(mapping); - return ret; }
int hugetlb_insert_hugepage_pte(struct mm_struct *mm, unsigned long addr, pgprot_t prot, struct page *hpage) { - return __hugetlb_insert_hugepage(mm, addr, prot, page_to_pfn(hpage), false); + return __hugetlb_insert_hugepage(mm, addr, prot, page_to_pfn(hpage)); } EXPORT_SYMBOL_GPL(hugetlb_insert_hugepage_pte);
int hugetlb_insert_hugepage_pte_by_pa(struct mm_struct *mm, unsigned long addr, pgprot_t prot, unsigned long phy_addr) { - return __hugetlb_insert_hugepage(mm, addr, prot, phy_addr >> PAGE_SHIFT, true); + return __hugetlb_insert_hugepage(mm, addr, prot, phy_addr >> PAGE_SHIFT); } EXPORT_SYMBOL_GPL(hugetlb_insert_hugepage_pte_by_pa);
From: Jian Zhang zhangjian210@huawei.com
[IMPACT] : add oom notifier call for oom panic note: update glibc
Signed-off-by: z00512904 z00512904@huawei.com --- mm/oom_kill.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index dd2b4f890403..c1f82c7623cc 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -1064,6 +1064,7 @@ static void check_panic_on_oom(struct oom_control *oc) if (is_sysrq_oom(oc)) return; dump_header(oc, NULL); + oom_type_notifier_call(0, oc); panic("Out of memory: %s panic_on_oom is enabled\n", sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide"); }
From: Zhou Guanghui zhouguanghui1@huawei.com
-----------------------------------------------------------------------
The current SMMU driver supports SPI WireInterrupt and Message Based SPI. However, some hisilicon chips use the Messaged Based SPI. Therefore, a special attribute is added to indentify this situation. Add an option "hisicion,message-based-spi" and addr of GICD_SETSPI to dts node of SMMU, like this: hisicion,message-based-spi iommu-spi-base=<0x10 0x9000040> //Addr of GICD_SETSPI: 0x1009000040
Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 50 ++++++++++++++++++++- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 3 ++ 2 files changed, 51 insertions(+), 2 deletions(-)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index e463fd31d268..303da4a2bc6e 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -30,6 +30,7 @@ #include <linux/pci.h> #include <linux/pci-ats.h> #include <linux/platform_device.h> +#include <linux/irq.h>
#include <linux/amba/bus.h>
@@ -125,6 +126,7 @@ struct arm_smmu_ctx_desc quiet_cd = { 0 }; static struct arm_smmu_option_prop arm_smmu_options[] = { { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" }, { ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"}, + { ARM_SMMU_OPT_MESSAGE_BASED_SPI, "hisilicon,message-based-spi"}, { 0, NULL}, };
@@ -406,7 +408,8 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu, * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI * payload, so the write will zero the entire command on that platform. */ - if (smmu->options & ARM_SMMU_OPT_MSIPOLL) { + if (smmu->options & ARM_SMMU_OPT_MSIPOLL && + !(smmu->options & ARM_SMMU_OPT_MESSAGE_BASED_SPI)) { ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) * q->ent_dwords * 8; } @@ -781,7 +784,8 @@ static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu, static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu, struct arm_smmu_ll_queue *llq) { - if (smmu->options & ARM_SMMU_OPT_MSIPOLL) + if (smmu->options & ARM_SMMU_OPT_MSIPOLL && + !(smmu->options & ARM_SMMU_OPT_MESSAGE_BASED_SPI)) return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
return __arm_smmu_cmdq_poll_until_consumed(smmu, llq); @@ -4872,6 +4876,37 @@ static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu, bool resume } }
+static void arm_smmu_setup_message_based_spi(struct arm_smmu_device *smmu) +{ + struct irq_desc *desc; + u32 event_hwirq, gerror_hwirq, pri_hwirq; + + desc = irq_to_desc(smmu->gerr_irq); + gerror_hwirq = desc->irq_data.hwirq; + writeq_relaxed(smmu->spi_base, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0); + writel_relaxed(gerror_hwirq, smmu->base + ARM_SMMU_GERROR_IRQ_CFG1); + writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, + smmu->base + ARM_SMMU_GERROR_IRQ_CFG2); + + desc = irq_to_desc(smmu->evtq.q.irq); + event_hwirq = desc->irq_data.hwirq; + writeq_relaxed(smmu->spi_base, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0); + writel_relaxed(event_hwirq, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG1); + writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, + smmu->base + ARM_SMMU_EVTQ_IRQ_CFG2); + + if (smmu->features & ARM_SMMU_FEAT_PRI) { + desc = irq_to_desc(smmu->priq.q.irq); + pri_hwirq = desc->irq_data.hwirq; + + writeq_relaxed(smmu->spi_base, + smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0); + writel_relaxed(pri_hwirq, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG1); + writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, + smmu->base + ARM_SMMU_PRIQ_IRQ_CFG2); + } +} + static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu, bool resume) { int ret, irq; @@ -4904,6 +4939,9 @@ static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu, bool resume) if (smmu->features & ARM_SMMU_FEAT_PRI) irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
+ if (smmu->options & ARM_SMMU_OPT_MESSAGE_BASED_SPI) + arm_smmu_setup_message_based_spi(smmu); + /* Enable interrupt generation on the SMMU */ ret = arm_smmu_write_reg_sync(smmu, irqen_flags, ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK); @@ -5625,6 +5663,14 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev,
parse_driver_options(smmu);
+ if (smmu->options & ARM_SMMU_OPT_MESSAGE_BASED_SPI) { + if (of_property_read_u64(dev->of_node, "iommu-spi-base", + &smmu->spi_base)) { + dev_err(dev, "missing irq base address\n"); + ret = -EINVAL; + } + } + if (of_dma_is_coherent(dev->of_node)) smmu->features |= ARM_SMMU_FEAT_COHERENCY;
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 1dd49bed58df..406e52a4d486 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -722,8 +722,11 @@ struct arm_smmu_device { #define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0) #define ARM_SMMU_OPT_PAGE0_REGS_ONLY (1 << 1) #define ARM_SMMU_OPT_MSIPOLL (1 << 2) +#define ARM_SMMU_OPT_MESSAGE_BASED_SPI (1 << 3) u32 options;
+ u64 spi_base; + union { u32 nr_ecmdq; u32 ecmdq_enabled;
From: Jian Zhang zhangjian210@huawei.com
Signed-off-by: Jian Zhang zhangjian210@huawei.com --- mm/memcontrol.c | 7 +++++++ 1 file changed, 7 insertions(+)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 93fa3cb7a269..12dd1cfeb2a5 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3077,6 +3077,7 @@ int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order) void __memcg_kmem_uncharge_page(struct page *page, int order) { struct obj_cgroup *objcg; + struct mem_cgroup *memcg; unsigned int nr_pages = 1 << order;
if (!PageMemcgKmem(page)) @@ -3084,6 +3085,12 @@ void __memcg_kmem_uncharge_page(struct page *page, int order)
objcg = __page_objcg(page); obj_cgroup_uncharge_pages(objcg, nr_pages); + + memcg = get_mem_cgroup_from_objcg(objcg); + if (!mem_cgroup_is_root(memcg)) + memcg_oom_recover(memcg); + css_put(&memcg->css); + page->memcg_data = 0; obj_cgroup_put(objcg); }
From: Jian Zhang zhangjian210@huawei.com
ascend inclusion category: bugfix bugzilla: NA CVE: NA --- arch/arm64/kernel/sdei.c | 11 ----------- 1 file changed, 11 deletions(-)
diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c index 2132bd953a87..37546f9c6f74 100644 --- a/arch/arm64/kernel/sdei.c +++ b/arch/arm64/kernel/sdei.c @@ -123,17 +123,6 @@ bool _on_sdei_stack(unsigned long sp, struct stack_info *info)
unsigned long sdei_arch_get_entry_point(int conduit) { - /* - * SDEI works between adjacent exception levels. If we booted at EL1 we - * assume a hypervisor is marshalling events. If we booted at EL2 and - * dropped to EL1 because we don't support VHE, then we can't support - * SDEI. - */ - if (is_hyp_mode_available() && !is_kernel_in_hyp_mode()) { - pr_err("Not supported on this hardware/boot configuration\n"); - return 0; - } - if (IS_ENABLED(CONFIG_VMAP_STACK)) { if (init_sdei_stacks()) return 0;
From: Zhou Guanghui zhouguanghui1@huawei.com
Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com --- arch/arm64/Kconfig | 7 +++++++ mm/hugetlb.c | 2 ++ 2 files changed, 9 insertions(+)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index c6aa794901e1..91175d4d2806 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -2092,6 +2092,13 @@ config ASCEND_SHARE_POOL This feature allows multiple processes to share virtual memory both in kernel and user level, which is only enabled for ascend platform.
+config ASCEND_CLEAR_HUGEPAGE_DISABLE + bool "Disable clear hugepage" + default n + help + Disable clear hugepage when alloc hugepages to improve the hugepage + application performance. + endif
endmenu diff --git a/mm/hugetlb.c b/mm/hugetlb.c index de47c996ee67..9556bf2d9b45 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4763,7 +4763,9 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, ret = vmf_error(PTR_ERR(page)); goto out; } +#ifndef CONFIG_ASCEND_CLEAR_HUGEPAGE_DISABLE clear_huge_page(page, address, pages_per_huge_page(h)); +#endif __SetPageUptodate(page); new_page = true;
From: Zhang Zekun zhangzekun11@huawei.com
Fix l3c pmu mini compile error, when it was compiled as a Module
Signed-off-by: Zhang Zekun zhangzekun11@huawei.com --- drivers/perf/hisilicon/hisi_uncore_pmu.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index ab718a039b44..9b13f2e13f4c 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -69,11 +69,13 @@ void hisi_hrtimer_start(struct hisi_pmu *hisi_pmu) ms_to_ktime(hisi_pmu->hrt_duration), HRTIMER_MODE_REL_PINNED); } +EXPORT_SYMBOL_GPL(hisi_hrtimer_start);
void hisi_hrtimer_stop(struct hisi_pmu *hisi_pmu) { hrtimer_cancel(&hisi_pmu->hrtimer); } +EXPORT_SYMBOL_GPL(hisi_hrtimer_stop);
static bool hisi_validate_event_group(struct perf_event *event) {