This patch set contains code optimizations and bugfixes for the sharepool.
v1->v2: 1. Reorganize commit messages. 2. Drop a fix patch and merge it into the original one.
Chen Jun (5): mm/sharepool: Make the definitions of MMAP_SHARE_POOL_{START|16G_START} more readable mm/sharepool: Rename sp_mapping.flag to sp_mapping.type mm/sharepool: replace spg->{dvpp|normal} with spg->mapping[SP_MAPPING_{DVPP|NORMAL}] mm/sharepool: Extract sp_mapping_find mm/sharepool: Support alloc ro mapping
Guo Mengqi (14): mm/sharepool: remove deprecated interfaces mm/sharepool: proc_sp_group_state bugfix mm/sharepool: fix dvpp spm redundant print error mm/sharepool: fix statistics error mm/sharepool: fix static code-check errors mm/sharepool: delete redundant codes mm/sharepool: fix softlockup in high pressure use case. mm/sharepool: fix deadlock in spa_stat_of_mapping_show mm/sharepool: fix deadlock in sp_check_mmap_addr mm/sharepool: delete unused codes mm/sharepool: fix potential AA deadlock mm/sharepool: check size=0 in mg_sp_make_share_k2u() mm/sharepool: fix hugepage_rsvd count increase error mm/sharepool: Fix sharepool hugepage cgroup uncount error.
Wang Wensheng (3): mm/sharepool: Delete unused sysctl interface mm/sharepool: Fix UAF reported by KASAN mm/sharepool: Rebind the numa node when fallback to normal pages
Zhang Zekun (7): mm/sharepool: Remove unused sp_dev_va_start and sp_dev_va_size mm/sharepool: Remove sp_device_number_detect function mm/sharepool: Remove enable_mdc_default_group and change the definition of is_process_in_group() mm/sharepool: Remove the comment and fix a bug in mg_sp_group_id_by_pid() mm/sharepool: Add a read lock in proc_usage_show() mm/sharepool: Fix code-style warnings mm/sharepool: Remove the leading double underlines for function name
Zhou Guanghui (6): mm/sharepool: delete unnecessary judgment mm/sharepool: Avoid UAF on spa mm/sharepool: Check the maximum value of spg_id mm/sharepool: Avoid UAF on mm mm/sharepool: bugfix for 2M U2K mm/sharepool: fix the incorrect judgement of the addr range
fs/hugetlbfs/inode.c | 19 +- include/linux/hugetlb.h | 6 +- include/linux/share_pool.h | 164 ++----- kernel/sysctl.c | 67 --- mm/hugetlb.c | 3 + mm/share_pool.c | 882 +++++++++++++------------------------ 6 files changed, 361 insertions(+), 780 deletions(-)
From: Guo Mengqi guomengqi3@huawei.com
Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5KC7C CVE: NA
--------------------------------
Most interfaces starting with "sp_" are deprecated, remove them.
Signed-off-by: Guo Mengqi guomengqi3@huawei.com --- include/linux/share_pool.h | 90 +--------------------- mm/share_pool.c | 148 ++++++------------------------------- 2 files changed, 22 insertions(+), 216 deletions(-)
diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index c2ef26661a4f..e8bc9a368e34 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -246,53 +246,31 @@ static inline void sp_init_mm(struct mm_struct *mm) * Those interfaces are exported for modules */ extern int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id); -extern int sp_group_add_task(int pid, int spg_id); - extern int mg_sp_group_del_task(int pid, int spg_id); -extern int sp_group_del_task(int pid, int spg_id); - extern int mg_sp_group_id_by_pid(int pid, int *spg_ids, int *num); -extern int sp_group_id_by_pid(int pid); - -extern int sp_group_walk(int spg_id, void *data, int (*func)(struct mm_struct *mm, void *)); extern int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task);
-extern void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id); extern void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id); - -extern int sp_free(unsigned long addr, int id); extern int mg_sp_free(unsigned long addr, int id);
-extern void *sp_make_share_k2u(unsigned long kva, unsigned long size, - unsigned long sp_flags, int pid, int spg_id); extern void *mg_sp_make_share_k2u(unsigned long kva, unsigned long size, unsigned long sp_flags, int pid, int spg_id); - -extern void *sp_make_share_u2k(unsigned long uva, unsigned long size, int pid); extern void *mg_sp_make_share_u2k(unsigned long uva, unsigned long size, int pid); +extern int mg_sp_unshare(unsigned long va, unsigned long size, int spg_id);
-extern int sp_unshare(unsigned long va, unsigned long size, int pid, int spg_id); -extern int mg_sp_unshare(unsigned long va, unsigned long size, int id); - -extern int sp_walk_page_range(unsigned long uva, unsigned long size, - struct task_struct *tsk, struct sp_walk_data *sp_walk_data); extern int mg_sp_walk_page_range(unsigned long uva, unsigned long size, struct task_struct *tsk, struct sp_walk_data *sp_walk_data);
-extern void sp_walk_page_free(struct sp_walk_data *sp_walk_data); extern void mg_sp_walk_page_free(struct sp_walk_data *sp_walk_data);
extern int sp_register_notifier(struct notifier_block *nb); extern int sp_unregister_notifier(struct notifier_block *nb);
-extern bool sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid); extern bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid);
-extern bool is_sharepool_addr(unsigned long addr); extern bool mg_is_sharepool_addr(unsigned long addr);
-extern int sp_id_of_current(void); extern int mg_sp_id_of_current(void);
extern void sp_area_drop(struct vm_area_struct *vma); @@ -350,21 +328,11 @@ static inline int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) return -EPERM; }
-static inline int sp_group_add_task(int pid, int spg_id) -{ - return -EPERM; -} - static inline int mg_sp_group_del_task(int pid, int spg_id) { return -EPERM; }
-static inline int sp_group_del_task(int pid, int spg_id) -{ - return -EPERM; -} - static inline int sp_group_exit(struct mm_struct *mm) { return 0; @@ -379,74 +347,38 @@ static inline int mg_sp_group_id_by_pid(int pid, int *spg_ids, int *num) return -EPERM; }
-static inline int sp_group_id_by_pid(int pid) -{ - return -EPERM; -} - static inline int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { return -EPERM; }
-static inline void *sp_alloc(unsigned long size, unsigned long sp_flags, int sp_id) -{ - return NULL; -} - static inline void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) { return NULL; }
-static inline int sp_free(unsigned long addr, int id) -{ - return -EPERM; -} - static inline int mg_sp_free(unsigned long addr, int id) { return -EPERM; }
-static inline void *sp_make_share_k2u(unsigned long kva, unsigned long size, - unsigned long sp_flags, int pid, int spg_id) -{ - return NULL; -} - static inline void *mg_sp_make_share_k2u(unsigned long kva, unsigned long size, unsigned long sp_flags, int pid, int spg_id) { return NULL; }
-static inline void *sp_make_share_u2k(unsigned long uva, unsigned long size, int pid) -{ - return NULL; -} - static inline void *mg_sp_make_share_u2k(unsigned long uva, unsigned long size, int pid) { return NULL; }
-static inline int sp_unshare(unsigned long va, unsigned long size, int pid, int spg_id) -{ - return -EPERM; -} - static inline int mg_sp_unshare(unsigned long va, unsigned long size, int id) { return -EPERM; }
-static inline int sp_id_of_current(void) -{ - return -EPERM; -} - static inline int mg_sp_id_of_current(void) { return -EPERM; @@ -460,22 +392,12 @@ static inline void sp_area_drop(struct vm_area_struct *vma) { }
-static inline int sp_walk_page_range(unsigned long uva, unsigned long size, - struct task_struct *tsk, struct sp_walk_data *sp_walk_data) -{ - return 0; -} - static inline int mg_sp_walk_page_range(unsigned long uva, unsigned long size, struct task_struct *tsk, struct sp_walk_data *sp_walk_data) { return 0; }
-static inline void sp_walk_page_free(struct sp_walk_data *sp_walk_data) -{ -} - static inline void mg_sp_walk_page_free(struct sp_walk_data *sp_walk_data) { } @@ -490,21 +412,11 @@ static inline int sp_unregister_notifier(struct notifier_block *nb) return -EPERM; }
-static inline bool sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) -{ - return false; -} - static inline bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) { return false; }
-static inline bool is_sharepool_addr(unsigned long addr) -{ - return false; -} - static inline bool mg_is_sharepool_addr(unsigned long addr) { return false; diff --git a/mm/share_pool.c b/mm/share_pool.c index ab77d0d7648c..a8fc2fc3a746 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -1009,38 +1009,6 @@ static struct sp_group *__sp_find_spg(int pid, int spg_id) return spg; }
-/** - * sp_group_id_by_pid() - Get the sp_group ID of a process. - * @pid: pid of target process. - * - * Return: - * 0 the sp_group ID. - * -ENODEV target process doesn't belong to any sp_group. - */ -int sp_group_id_by_pid(int pid) -{ - struct sp_group *spg; - int spg_id = -ENODEV; - - if (!sp_is_enabled()) - return -EOPNOTSUPP; - - check_interrupt_context(); - - spg = __sp_find_spg(pid, SPG_ID_DEFAULT); - if (!spg) - return -ENODEV; - - down_read(&spg->rw_lock); - if (spg_valid(spg)) - spg_id = spg->id; - up_read(&spg->rw_lock); - - sp_group_drop(spg); - return spg_id; -} -EXPORT_SYMBOL_GPL(sp_group_id_by_pid); - /** * mp_sp_group_id_by_pid() - Get the sp_group ID array of a process. * @pid: pid of target process. @@ -1617,12 +1585,6 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) } EXPORT_SYMBOL_GPL(mg_sp_group_add_task);
-int sp_group_add_task(int pid, int spg_id) -{ - return mg_sp_group_add_task(pid, PROT_READ | PROT_WRITE, spg_id); -} -EXPORT_SYMBOL_GPL(sp_group_add_task); - /** * mg_sp_group_del_task() - delete a process from a sp group. * @pid: the pid of the task to be deleted @@ -1726,13 +1688,7 @@ int mg_sp_group_del_task(int pid, int spg_id) } EXPORT_SYMBOL_GPL(mg_sp_group_del_task);
-int sp_group_del_task(int pid, int spg_id) -{ - return mg_sp_group_del_task(pid, spg_id); -} -EXPORT_SYMBOL_GPL(sp_group_del_task); - -int sp_id_of_current(void) +int mg_sp_id_of_current(void) { int ret, spg_id; struct sp_group_master *master; @@ -1764,12 +1720,6 @@ int sp_id_of_current(void)
return spg_id; } -EXPORT_SYMBOL_GPL(sp_id_of_current); - -int mg_sp_id_of_current(void) -{ - return sp_id_of_current(); -} EXPORT_SYMBOL_GPL(mg_sp_id_of_current);
/* the caller must hold sp_area_lock */ @@ -2298,7 +2248,7 @@ static int sp_free_get_spa(struct sp_free_context *fc) }
/** - * sp_free() - Free the memory allocated by sp_alloc(). + * mg_sp_free() - Free the memory allocated by mg_sp_alloc(). * @addr: the starting VA of the memory. * @id: Address space identifier, which is used to distinguish the addr. * @@ -2307,7 +2257,7 @@ static int sp_free_get_spa(struct sp_free_context *fc) * * -EINVAL - the memory can't be found or was not allocted by share pool. * * -EPERM - the caller has no permision to free the memory. */ -int sp_free(unsigned long addr, int id) +int mg_sp_free(unsigned long addr, int id) { int ret = 0; struct sp_free_context fc = { @@ -2341,12 +2291,6 @@ int sp_free(unsigned long addr, int id) sp_try_to_compact(); return ret; } -EXPORT_SYMBOL_GPL(sp_free); - -int mg_sp_free(unsigned long addr, int id) -{ - return sp_free(addr, id); -} EXPORT_SYMBOL_GPL(mg_sp_free);
/* wrapper of __do_mmap() and the caller must hold down_write(&mm->mmap_lock). */ @@ -2745,7 +2689,7 @@ static void sp_alloc_finish(int result, struct sp_area *spa, }
/** - * sp_alloc() - Allocate shared memory for all the processes in a sp_group. + * mg_sp_alloc() - Allocate shared memory for all the processes in a sp_group. * @size: the size of memory to allocate. * @sp_flags: how to allocate the memory. * @spg_id: the share group that the memory is allocated to. @@ -2756,7 +2700,7 @@ static void sp_alloc_finish(int result, struct sp_area *spa, * * if succeed, return the starting address of the shared memory. * * if fail, return the pointer of -errno. */ -void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) +void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) { struct sp_area *spa = NULL; int ret = 0; @@ -2790,12 +2734,6 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) else return (void *)(spa->va_start); } -EXPORT_SYMBOL_GPL(sp_alloc); - -void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) -{ - return sp_alloc(size, sp_flags, spg_id); -} EXPORT_SYMBOL_GPL(mg_sp_alloc);
/** @@ -3126,7 +3064,7 @@ static void *sp_k2u_finish(void *uva, struct sp_k2u_context *kc) }
/** - * sp_make_share_k2u() - Share kernel memory to current process or an sp_group. + * mg_sp_make_share_k2u() - Share kernel memory to current process or an sp_group. * @kva: the VA of shared kernel memory. * @size: the size of shared kernel memory. * @sp_flags: how to allocate the memory. We only support SP_DVPP. @@ -3142,7 +3080,7 @@ static void *sp_k2u_finish(void *uva, struct sp_k2u_context *kc) * * if succeed, return the shared user address to start at. * * if fail, return the pointer of -errno. */ -void *sp_make_share_k2u(unsigned long kva, unsigned long size, +void *mg_sp_make_share_k2u(unsigned long kva, unsigned long size, unsigned long sp_flags, int pid, int spg_id) { void *uva; @@ -3180,13 +3118,6 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size, out: return sp_k2u_finish(uva, &kc); } -EXPORT_SYMBOL_GPL(sp_make_share_k2u); - -void *mg_sp_make_share_k2u(unsigned long kva, unsigned long size, - unsigned long sp_flags, int pid, int spg_id) -{ - return sp_make_share_k2u(kva, size, sp_flags, pid, spg_id); -} EXPORT_SYMBOL_GPL(mg_sp_make_share_k2u);
static int sp_pmd_entry(pmd_t *pmd, unsigned long addr, @@ -3422,7 +3353,7 @@ static void __sp_walk_page_free(struct sp_walk_data *data) }
/** - * sp_make_share_u2k() - Share user memory of a specified process to kernel. + * mg_sp_make_share_u2k() - Share user memory of a specified process to kernel. * @uva: the VA of shared user memory * @size: the size of shared user memory * @pid: the pid of the specified process(Not currently in use) @@ -3431,7 +3362,7 @@ static void __sp_walk_page_free(struct sp_walk_data *data) * * if success, return the starting kernel address of the shared memory. * * if failed, return the pointer of -errno. */ -void *sp_make_share_u2k(unsigned long uva, unsigned long size, int pid) +void *mg_sp_make_share_u2k(unsigned long uva, unsigned long size, int pid) { int ret = 0; struct mm_struct *mm = current->mm; @@ -3490,12 +3421,6 @@ void *sp_make_share_u2k(unsigned long uva, unsigned long size, int pid) kvfree(sp_walk_data.pages); return p; } -EXPORT_SYMBOL_GPL(sp_make_share_u2k); - -void *mg_sp_make_share_u2k(unsigned long uva, unsigned long size, int pid) -{ - return sp_make_share_u2k(uva, size, pid); -} EXPORT_SYMBOL_GPL(mg_sp_make_share_u2k);
/* @@ -3717,7 +3642,7 @@ static int sp_unshare_kva(unsigned long kva, unsigned long size) }
/** - * sp_unshare() - Unshare the kernel or user memory which shared by calling + * mg_sp_unshare() - Unshare the kernel or user memory which shared by calling * sp_make_share_{k2u,u2k}(). * @va: the specified virtual address of memory * @size: the size of unshared memory @@ -3726,7 +3651,7 @@ static int sp_unshare_kva(unsigned long kva, unsigned long size) * * Return: 0 for success, -errno on failure. */ -int sp_unshare(unsigned long va, unsigned long size, int pid, int spg_id) +int mg_sp_unshare(unsigned long va, unsigned long size, int spg_id) { int ret = 0;
@@ -3752,16 +3677,10 @@ int sp_unshare(unsigned long va, unsigned long size, int pid, int spg_id)
return ret; } -EXPORT_SYMBOL_GPL(sp_unshare); - -int mg_sp_unshare(unsigned long va, unsigned long size, int id) -{ - return sp_unshare(va, size, 0, id); -} EXPORT_SYMBOL_GPL(mg_sp_unshare);
/** - * sp_walk_page_range() - Walk page table with caller specific callbacks. + * mg_sp_walk_page_range() - Walk page table with caller specific callbacks. * @uva: the start VA of user memory. * @size: the size of user memory. * @tsk: task struct of the target task. @@ -3772,7 +3691,7 @@ EXPORT_SYMBOL_GPL(mg_sp_unshare); * When return 0, sp_walk_data describing [uva, uva+size) can be used. * When return -errno, information in sp_walk_data is useless. */ -int sp_walk_page_range(unsigned long uva, unsigned long size, +int mg_sp_walk_page_range(unsigned long uva, unsigned long size, struct task_struct *tsk, struct sp_walk_data *sp_walk_data) { struct mm_struct *mm; @@ -3811,20 +3730,13 @@ int sp_walk_page_range(unsigned long uva, unsigned long size,
return ret; } -EXPORT_SYMBOL_GPL(sp_walk_page_range); - -int mg_sp_walk_page_range(unsigned long uva, unsigned long size, - struct task_struct *tsk, struct sp_walk_data *sp_walk_data) -{ - return sp_walk_page_range(uva, size, tsk, sp_walk_data); -} EXPORT_SYMBOL_GPL(mg_sp_walk_page_range);
/** - * sp_walk_page_free() - Free the sp_walk_data structure. + * mg_sp_walk_page_free() - Free the sp_walk_data structure. * @sp_walk_data: a structure of a page pointer array to be freed. */ -void sp_walk_page_free(struct sp_walk_data *sp_walk_data) +void mg_sp_walk_page_free(struct sp_walk_data *sp_walk_data) { if (!sp_is_enabled()) return; @@ -3836,12 +3748,6 @@ void sp_walk_page_free(struct sp_walk_data *sp_walk_data)
__sp_walk_page_free(sp_walk_data); } -EXPORT_SYMBOL_GPL(sp_walk_page_free); - -void mg_sp_walk_page_free(struct sp_walk_data *sp_walk_data) -{ - sp_walk_page_free(sp_walk_data); -} EXPORT_SYMBOL_GPL(mg_sp_walk_page_free);
int sp_register_notifier(struct notifier_block *nb) @@ -3857,7 +3763,7 @@ int sp_unregister_notifier(struct notifier_block *nb) EXPORT_SYMBOL_GPL(sp_unregister_notifier);
/** - * sp_config_dvpp_range() - User can config the share pool start address + * mg_sp_config_dvpp_range() - User can config the share pool start address * of each Da-vinci device. * @start: the value of share pool start * @size: the value of share pool @@ -3868,7 +3774,7 @@ EXPORT_SYMBOL_GPL(sp_unregister_notifier); * Return false if parameter invalid or has been set up. * This functuon has no concurrent problem. */ -bool sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) +bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) { int ret; bool err = false; @@ -3918,12 +3824,6 @@ bool sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid)
return err; } -EXPORT_SYMBOL_GPL(sp_config_dvpp_range); - -bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) -{ - return sp_config_dvpp_range(start, size, device_id, pid); -} EXPORT_SYMBOL_GPL(mg_sp_config_dvpp_range);
static bool is_sp_normal_addr(unsigned long addr) @@ -3934,22 +3834,16 @@ static bool is_sp_normal_addr(unsigned long addr) }
/** - * is_sharepool_addr() - Check if a user memory address belongs to share pool. + * mg_is_sharepool_addr() - Check if a user memory address belongs to share pool. * @addr: the userspace address to be checked. * * Return true if addr belongs to share pool, or false vice versa. */ -bool is_sharepool_addr(unsigned long addr) +bool mg_is_sharepool_addr(unsigned long addr) { return sp_is_enabled() && (is_sp_normal_addr(addr) || is_device_addr(addr)); } -EXPORT_SYMBOL_GPL(is_sharepool_addr); - -bool mg_is_sharepool_addr(unsigned long addr) -{ - return is_sharepool_addr(addr); -} EXPORT_SYMBOL_GPL(mg_is_sharepool_addr);
int sp_node_id(struct vm_area_struct *vma) @@ -4430,7 +4324,7 @@ static void __init proc_sharepool_init(void)
bool sp_check_addr(unsigned long addr) { - if (sp_is_enabled() && is_sharepool_addr(addr) && + if (sp_is_enabled() && mg_is_sharepool_addr(addr) && !check_aoscore_process(current)) { sp_dump_stack(); return true; @@ -4440,7 +4334,7 @@ bool sp_check_addr(unsigned long addr)
bool sp_check_mmap_addr(unsigned long addr, unsigned long flags) { - if (sp_is_enabled() && is_sharepool_addr(addr) && + if (sp_is_enabled() && mg_is_sharepool_addr(addr) && !check_aoscore_process(current) && !(flags & MAP_SHARE_POOL)) { sp_dump_stack(); return true;
From: Guo Mengqi guomengqi3@huawei.com
Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5K3MH CVE: NA
--------------------------------
After refactoring, cat /proc/pid_xx/sp_group will cause kernel panic. Fix this error.
Signed-off-by: Guo Mengqi guomengqi3@huawei.com --- mm/share_pool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index a8fc2fc3a746..837a60d25002 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -3993,7 +3993,7 @@ int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, seq_printf(m, "%-8s %-9s %-9s %-9s %-4s\n", "Group_ID", "SP_ALLOC", "SP_K2U", "SP_RES", "PROT");
- list_for_each_entry(spg_node, &master->node_list, proc_node) { + list_for_each_entry(spg_node, &master->node_list, group_node) { seq_printf(m, "%-8d %-9ld %-9ld %-9ld ", spg_node->spg->id, get_spg_proc_alloc(spg_node),
From: Guo Mengqi guomengqi3@huawei.com
Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5KSDH CVE: NA
--------------------------------
Fix sharepool redundant /proc/sharepool/spa_stat prints when there are multiple groups which are all attached to same sp_mapping.
Traverse all dvpp-mappings rather than all groups.
Signed-off-by: Guo Mengqi guomengqi3@huawei.com --- mm/share_pool.c | 44 ++++++++++++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 14 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 837a60d25002..6996fdc4a99a 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -122,6 +122,11 @@ static LIST_HEAD(master_list); /* mutex to protect insert/delete ops from master_list */ static DEFINE_MUTEX(master_list_lock);
+/* list of all spm-dvpp */ +static LIST_HEAD(spm_dvpp_list); +/* mutex to protect insert/delete ops from master_list */ +static DEFINE_MUTEX(spm_list_lock); + /* for kthread buff_module_guard_work */ static struct sp_proc_stat kthread_stat;
@@ -189,6 +194,7 @@ struct sp_mapping {
/* list head for all groups attached to this mapping, dvpp mapping only */ struct list_head group_head; + struct list_head spm_node; };
/* Processes in the same sp_group can share memory. @@ -290,6 +296,22 @@ static void sp_init_group_master_stat(struct mm_struct *mm, struct sp_proc_stat #define SP_MAPPING_NORMAL 0x2 static struct sp_mapping *sp_mapping_normal;
+static void sp_mapping_add_to_list(struct sp_mapping *spm) +{ + mutex_lock(&spm_list_lock); + if (spm->flag & SP_MAPPING_DVPP) + list_add_tail(&spm->spm_node, &spm_dvpp_list); + mutex_unlock(&spm_list_lock); +} + +static void sp_mapping_remove_from_list(struct sp_mapping *spm) +{ + mutex_lock(&spm_list_lock); + if (spm->flag & SP_MAPPING_DVPP) + list_del(&spm->spm_node); + mutex_unlock(&spm_list_lock); +} + static void sp_mapping_range_init(struct sp_mapping *spm) { int i; @@ -325,12 +347,14 @@ static struct sp_mapping *sp_mapping_create(unsigned long flag) atomic_set(&spm->user, 0); spm->area_root = RB_ROOT; INIT_LIST_HEAD(&spm->group_head); + sp_mapping_add_to_list(spm);
return spm; }
static void sp_mapping_destroy(struct sp_mapping *spm) { + sp_mapping_remove_from_list(spm); kfree(spm); }
@@ -4066,22 +4090,14 @@ static void spa_normal_stat_show(struct seq_file *seq) spa_stat_of_mapping_show(seq, sp_mapping_normal); }
-static int idr_spg_dvpp_stat_show_cb(int id, void *p, void *data) -{ - struct sp_group *spg = p; - struct seq_file *seq = data; - - if (!is_local_group(spg->id) || atomic_read(&spg->dvpp->user) == 1) - spa_stat_of_mapping_show(seq, spg->dvpp); - - return 0; -} - static void spa_dvpp_stat_show(struct seq_file *seq) { - down_read(&sp_group_sem); - idr_for_each(&sp_group_idr, idr_spg_dvpp_stat_show_cb, seq); - up_read(&sp_group_sem); + struct sp_mapping *spm; + + mutex_lock(&spm_list_lock); + list_for_each_entry(spm, &spm_dvpp_list, spm_node) + spa_stat_of_mapping_show(seq, spm); + mutex_unlock(&spm_list_lock); }
Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5LHGZ CVE: NA
--------------------------------
Delete unused sysctl interfaces in sharepool feature.
Signed-off-by: Wang Wensheng wangwensheng4@huawei.com --- include/linux/share_pool.h | 21 ---- kernel/sysctl.c | 67 ------------- mm/share_pool.c | 191 ++----------------------------------- 3 files changed, 6 insertions(+), 273 deletions(-)
diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index e8bc9a368e34..4860e4b00e57 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -48,23 +48,8 @@
#define MAX_DEVID 8 /* the max num of Da-vinci devices */
-extern int sysctl_share_pool_hugepage_enable; - -extern int sysctl_ac_mode; - -extern int sysctl_sp_debug_mode; - extern struct static_key_false share_pool_enabled_key;
-extern int sysctl_share_pool_map_lock_enable; - -extern int sysctl_sp_compact_enable; -extern unsigned long sysctl_sp_compact_interval; -extern unsigned long sysctl_sp_compact_interval_max; -extern int sysctl_sp_perf_alloc; - -extern int sysctl_sp_perf_k2u; - #ifdef __GENKSYMS__ /* we estimate an sp-group ususally contains at most 64 sp-group */ #define SP_SPG_HASH_BITS 6 @@ -307,12 +292,6 @@ static inline bool sp_check_vm_share_pool(unsigned long vm_flags) return false; }
-static inline void sp_dump_stack(void) -{ - if (sysctl_sp_debug_mode) - dump_stack(); -} - static inline bool is_vmalloc_sharepool(unsigned long vm_flags) { if (sp_is_enabled() && (vm_flags & VM_SHAREPOOL)) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 2f7b48f92103..0def805a81dc 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -71,7 +71,6 @@ #include <linux/coredump.h> #include <linux/latencytop.h> #include <linux/pid.h> -#include <linux/share_pool.h>
#include "../lib/kstrtox.h"
@@ -3198,72 +3197,6 @@ static struct ctl_table vm_table[] = { .extra2 = SYSCTL_ONE, }, #endif -#ifdef CONFIG_ASCEND_SHARE_POOL - { - .procname = "sharepool_debug_mode", - .data = &sysctl_sp_debug_mode, - .maxlen = sizeof(sysctl_sp_debug_mode), - .mode = 0600, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, - { - .procname = "sharepool_compact_enable", - .data = &sysctl_sp_compact_enable, - .maxlen = sizeof(sysctl_sp_compact_enable), - .mode = 0600, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, - { - .procname = "sharepool_compact_interval", - .data = &sysctl_sp_compact_interval, - .maxlen = sizeof(sysctl_sp_compact_interval), - .mode = 0600, - .proc_handler = proc_doulongvec_minmax, - .extra1 = &zero_ul, - .extra2 = &sysctl_sp_compact_interval_max, - }, - { - /* 0: map_unlock, 1: map_lock */ - .procname = "share_pool_map_lock_enable", - .data = &sysctl_share_pool_map_lock_enable, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, - { - .procname = "sharepool_perf_k2u", - .data = &sysctl_sp_perf_k2u, - .maxlen = sizeof(sysctl_sp_perf_k2u), - .mode = 0600, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = &ten_thousand, - }, - { - .procname = "sharepool_perf_alloc", - .data = &sysctl_sp_perf_alloc, - .maxlen = sizeof(sysctl_sp_perf_alloc), - .mode = 0600, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = &ten_thousand, - }, - { - .procname = "sharepool_ac_mode", - .data = &sysctl_ac_mode, - .maxlen = sizeof(sysctl_ac_mode), - .mode = 0600, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, -#endif #ifdef CONFIG_HUGETLBFS { .procname = "hugepage_mig_noalloc", diff --git a/mm/share_pool.c b/mm/share_pool.c index 6996fdc4a99a..9a5f7203f2ce 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -44,7 +44,6 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/rmap.h> -#include <linux/compaction.h> #include <linux/preempt.h> #include <linux/swapops.h> #include <linux/mmzone.h> @@ -52,10 +51,6 @@ #include <linux/time64.h> #include <linux/pagewalk.h>
-/* access control mode macros */ -#define AC_NONE 0 -#define AC_SINGLE_OWNER 1 - #define spg_valid(spg) ((spg)->is_alive == true)
/* Use spa va address as mmap offset. This can work because spa_file @@ -82,19 +77,6 @@ static int __read_mostly enable_mdc_default_group; static const int mdc_default_group_id = 1;
-/* share the uva to the whole group */ -static int __read_mostly enable_share_k2u_spg = 1; - -/* access control mode */ -int sysctl_ac_mode = AC_NONE; -/* debug mode */ -int sysctl_sp_debug_mode; - -int sysctl_share_pool_map_lock_enable; - -int sysctl_sp_perf_k2u; -int sysctl_sp_perf_alloc; - static int system_group_count;
static unsigned int sp_device_number; @@ -232,8 +214,6 @@ struct sp_group { struct list_head spa_list; /* group statistics */ struct sp_spg_stat instat; - /* we define the creator process of a sp_group as owner */ - struct task_struct *owner; /* is_alive == false means it's being destroyed */ bool is_alive; atomic_t use_count; @@ -835,7 +815,6 @@ static void update_mem_usage(unsigned long size, bool inc, bool is_hugepage, struct sp_group_node *spg_node, enum spa_type type) { if (unlikely(!spg_node)) { - sp_dump_stack(); WARN(1, "null sp group node\n"); return; } @@ -910,8 +889,6 @@ struct sp_k2u_context { int state; int spg_id; bool to_task; - struct timespec64 start; - struct timespec64 end; };
static unsigned long sp_remap_kva_to_vma(unsigned long kva, struct sp_area *spa, @@ -1156,7 +1133,6 @@ static struct sp_group *create_spg(int spg_id, unsigned long flag) spg->flag = flag; spg->is_alive = true; spg->proc_num = 0; - spg->owner = current->group_leader; atomic_set(&spg->use_count, 1); INIT_LIST_HEAD(&spg->procs); INIT_LIST_HEAD(&spg->spa_list); @@ -1441,7 +1417,6 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) up_write(&sp_group_sem); ret = -EACCES; free_new_spg_id(id_newly_generated, spg_id); - sp_dump_stack(); goto out_put_task; }
@@ -1473,14 +1448,6 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) goto out_put_mm; }
- /* access control permission check */ - if (sysctl_ac_mode == AC_SINGLE_OWNER) { - if (spg->owner != current->group_leader) { - ret = -EPERM; - goto out_drop_group; - } - } - down_write(&spg->rw_lock); ret = mm_add_group_init(tsk, mm, spg); if (ret) { @@ -2064,63 +2031,6 @@ void sp_area_drop(struct vm_area_struct *vma) spin_unlock(&sp_area_lock); }
-int sysctl_sp_compact_enable; -unsigned long sysctl_sp_compact_interval = 30UL; -unsigned long sysctl_sp_compact_interval_max = 1000UL; -static unsigned long compact_last_jiffies; -static unsigned long compact_daemon_status; -#define COMPACT_START 1 -#define COMPACT_STOP 0 - -static void sp_compact_nodes(struct work_struct *work) -{ - sysctl_compaction_handler(NULL, 1, NULL, NULL, NULL); - - kfree(work); - - compact_last_jiffies = jiffies; - cmpxchg(&compact_daemon_status, COMPACT_START, COMPACT_STOP); -} - -static void sp_add_work_compact(void) -{ - struct work_struct *compact_work; - - if (!sysctl_sp_compact_enable) - return; - - /* experimental compaction time: 4GB->1.7s, 8GB->3.4s */ - if (!time_after(jiffies, - compact_last_jiffies + sysctl_sp_compact_interval * HZ)) - return; - - if (cmpxchg(&compact_daemon_status, COMPACT_STOP, COMPACT_START) == - COMPACT_START) - return; - - compact_work = kzalloc(sizeof(*compact_work), GFP_KERNEL); - if (!compact_work) - return; - - INIT_WORK(compact_work, sp_compact_nodes); - schedule_work(compact_work); -} - -static void sp_try_to_compact(void) -{ - unsigned long totalram; - unsigned long freeram; - - totalram = totalram_pages(); - freeram = global_zone_page_state(NR_FREE_PAGES); - - /* free < total / 3 */ - if ((freeram + (freeram << 1)) > totalram) - return; - - sp_add_work_compact(); -} - /* * The function calls of do_munmap() won't change any non-atomic member * of struct sp_group. Please review the following chain: @@ -2311,8 +2221,6 @@ int mg_sp_free(unsigned long addr, int id)
__sp_area_drop(fc.spa); /* match __find_sp_area in sp_free_get_spa */ out: - sp_dump_stack(); - sp_try_to_compact(); return ret; } EXPORT_SYMBOL_GPL(mg_sp_free); @@ -2330,12 +2238,6 @@ static unsigned long sp_mmap(struct mm_struct *mm, struct file *file, unsigned long pgoff = addr_offset(spa) >> PAGE_SHIFT; struct vm_area_struct *vma;
- /* Mark the mapped region to be locked. After the MAP_LOCKED is enable, - * multiple tasks will preempt resources, causing performance loss. - */ - if (sysctl_share_pool_map_lock_enable) - flags |= MAP_LOCKED; - atomic_inc(&spa->use_count); addr = __do_mmap_mm(mm, file, addr, size, prot, flags, vm_flags, pgoff, populate, NULL); @@ -2350,7 +2252,6 @@ static unsigned long sp_mmap(struct mm_struct *mm, struct file *file, *pvma = vma; }
- return addr; }
@@ -2368,39 +2269,10 @@ struct sp_alloc_context { unsigned long populate; int state; bool need_fallocate; - struct timespec64 start; - struct timespec64 end; bool have_mbind; enum spa_type type; };
-static void trace_sp_alloc_begin(struct sp_alloc_context *ac) -{ - if (!sysctl_sp_perf_alloc) - return; - - ktime_get_ts64(&ac->start); -} - -static void trace_sp_alloc_finish(struct sp_alloc_context *ac, unsigned long va) -{ - unsigned long cost; - - if (!sysctl_sp_perf_alloc) - return; - - ktime_get_ts64(&ac->end); - - cost = SEC2US(ac->end.tv_sec - ac->start.tv_sec) + - NS2US(ac->end.tv_nsec - ac->start.tv_nsec); - if (cost >= (unsigned long)sysctl_sp_perf_alloc) { - pr_err("Task %s(%d/%d) sp_alloc returns 0x%lx consumes %luus, size is %luKB, size_aligned is %luKB, sp_flags is %lx, pass through is %d\n", - current->comm, current->tgid, current->pid, - va, cost, byte2kb(ac->size), byte2kb(ac->size_aligned), ac->sp_flags, - is_local_group(ac->spg->id)); - } -} - static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags, int spg_id, struct sp_alloc_context *ac) { @@ -2408,8 +2280,6 @@ static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags,
check_interrupt_context();
- trace_sp_alloc_begin(ac); - /* mdc scene hack */ if (enable_mdc_default_group) spg_id = mdc_default_group_id; @@ -2594,11 +2464,8 @@ static int sp_alloc_populate(struct mm_struct *mm, struct sp_area *spa, * depends on this feature (and MAP_LOCKED) to work correctly. */ ret = do_mm_populate(mm, sp_addr, ac->populate, 0); - if (spa->is_hugepage) { + if (spa->is_hugepage) memalloc_noreclaim_restore(noreclaim_flag); - if (ret) - sp_add_work_compact(); - }
return ret; } @@ -2702,14 +2569,10 @@ static void sp_alloc_finish(int result, struct sp_area *spa, sp_update_process_stat(current, true, spa);
/* this will free spa if mmap failed */ - if (spa && !IS_ERR(spa)) { + if (spa && !IS_ERR(spa)) __sp_area_drop(spa); - trace_sp_alloc_finish(ac, spa->va_start); - }
sp_group_drop(spg); - sp_dump_stack(); - sp_try_to_compact(); }
/** @@ -2994,33 +2857,6 @@ static bool vmalloc_area_set_flag(unsigned long kva, unsigned long flags) return false; }
-static void trace_sp_k2u_begin(struct sp_k2u_context *kc) -{ - if (!sysctl_sp_perf_k2u) - return; - - ktime_get_ts64(&kc->start); -} - -static void trace_sp_k2u_finish(struct sp_k2u_context *kc, void *uva) -{ - unsigned long cost; - - if (!sysctl_sp_perf_k2u) - return; - - ktime_get_ts64(&kc->end); - - cost = SEC2US(kc->end.tv_sec - kc->start.tv_sec) + - NS2US(kc->end.tv_nsec - kc->start.tv_nsec); - if (cost >= (unsigned long)sysctl_sp_perf_k2u) { - pr_err("Task %s(%d/%d) sp_k2u returns 0x%lx consumes %luus, size is %luKB, size_aligned is %luKB, sp_flags is %lx, to_task is %d\n", - current->comm, current->tgid, current->pid, - (unsigned long)uva, cost, byte2kb(kc->size), byte2kb(kc->size_aligned), - kc->sp_flags, kc->to_task); - } -} - static int sp_k2u_prepare(unsigned long kva, unsigned long size, unsigned long sp_flags, int spg_id, struct sp_k2u_context *kc) { @@ -3028,8 +2864,6 @@ static int sp_k2u_prepare(unsigned long kva, unsigned long size, unsigned int page_size = PAGE_SIZE; unsigned long kva_aligned, size_aligned;
- trace_sp_k2u_begin(kc); - if (sp_flags & ~SP_FLAG_MASK) { pr_err_ratelimited("k2u sp_flags %lx error\n", sp_flags); return -EINVAL; @@ -3082,8 +2916,6 @@ static void *sp_k2u_finish(void *uva, struct sp_k2u_context *kc) else uva = uva + (kc->kva - kc->kva_aligned);
- trace_sp_k2u_finish(kc, uva); - sp_dump_stack(); return uva; }
@@ -3607,8 +3439,6 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size, int group_id) WARN(1, "unshare uva invalid spa type"); }
- sp_dump_stack(); - out_clr_flag: if (!vmalloc_area_clr_flag(spa->kva, VM_SHAREPOOL)) pr_debug("clear spa->kva %ld is not valid\n", spa->kva); @@ -3893,13 +3723,6 @@ static int __init mdc_default_group(char *s) } __setup("enable_mdc_default_group", mdc_default_group);
-static int __init enable_share_k2u_to_group(char *s) -{ - enable_share_k2u_spg = 1; - return 1; -} -__setup("enable_sp_share_k2u_spg", enable_share_k2u_to_group); - /*** Statistical and maintenance functions ***/
static void get_mm_rss_info(struct mm_struct *mm, unsigned long *anon, @@ -4341,20 +4164,18 @@ static void __init proc_sharepool_init(void) bool sp_check_addr(unsigned long addr) { if (sp_is_enabled() && mg_is_sharepool_addr(addr) && - !check_aoscore_process(current)) { - sp_dump_stack(); + !check_aoscore_process(current)) return true; - } else + else return false; }
bool sp_check_mmap_addr(unsigned long addr, unsigned long flags) { if (sp_is_enabled() && mg_is_sharepool_addr(addr) && - !check_aoscore_process(current) && !(flags & MAP_SHARE_POOL)) { - sp_dump_stack(); + !check_aoscore_process(current) && !(flags & MAP_SHARE_POOL)) return true; - } else + else return false; }
From: Zhang Zekun zhangzekun11@huawei.com
Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5LY5K CVE: NA
-----------------------------------
Remove the unused sp_dev_va_start and sp_dev_va_size, the related code can be removed.
Add the dvpp_addr checker in mg_is_sharepool_addr() for current proc.
Signed-off-by: Zhang Zekun zhangzekun11@huawei.com --- mm/share_pool.c | 62 +++++++++++++++++++++++++++---------------------- 1 file changed, 34 insertions(+), 28 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 9a5f7203f2ce..466c72c17717 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -80,13 +80,6 @@ static const int mdc_default_group_id = 1; static int system_group_count;
static unsigned int sp_device_number; -static unsigned long sp_dev_va_start[MAX_DEVID]; -static unsigned long sp_dev_va_size[MAX_DEVID]; - -static bool is_sp_dev_addr_enabled(int device_id) -{ - return sp_dev_va_size[device_id]; -}
/* idr of all sp_groups */ static DEFINE_IDR(sp_group_idr); @@ -303,14 +296,9 @@ static void sp_mapping_range_init(struct sp_mapping *spm) continue; }
- if (!is_sp_dev_addr_enabled(i)) { - spm->start[i] = MMAP_SHARE_POOL_16G_START + - i * MMAP_SHARE_POOL_16G_SIZE; - spm->end[i] = spm->start[i] + MMAP_SHARE_POOL_16G_SIZE; - } else { - spm->start[i] = sp_dev_va_start[i]; - spm->end[i] = spm->start[i] + sp_dev_va_size[i]; - } + spm->start[i] = MMAP_SHARE_POOL_16G_START + + i * MMAP_SHARE_POOL_16G_SIZE; + spm->end[i] = spm->start[i] + MMAP_SHARE_POOL_16G_SIZE; } }
@@ -1087,18 +1075,6 @@ static bool is_online_node_id(int node_id) return node_id >= 0 && node_id < MAX_NUMNODES && node_online(node_id); }
-static bool is_device_addr(unsigned long addr) -{ - int i; - - for (i = 0; i < sp_device_number; i++) { - if (addr >= sp_dev_va_start[i] && - addr < sp_dev_va_start[i] + sp_dev_va_size[i]) - return true; - } - return false; -} - static struct sp_group *create_spg(int spg_id, unsigned long flag) { int ret; @@ -3687,6 +3663,36 @@ static bool is_sp_normal_addr(unsigned long addr) sp_device_number * MMAP_SHARE_POOL_16G_SIZE; }
+static bool is_sp_dvpp_addr(unsigned long addr) +{ + int i; + struct mm_struct *mm; + struct sp_group_master *master; + struct sp_mapping *spm_dvpp; + + mm = current->mm; + if (!mm) + return false; + + down_read(&sp_group_sem); + master = mm->sp_group_master; + if (!master) { + up_read(&sp_group_sem); + return false; + } + + /* master->local and master->local->dvpp won't be NULL*/ + spm_dvpp = master->local->dvpp; + for (i = 0; i < MAX_DEVID; i++) { + if (addr >= spm_dvpp->start[i] && addr < spm_dvpp->end[i]) { + up_read(&sp_group_sem); + return true; + } + } + up_read(&sp_group_sem); + return false; +} + /** * mg_is_sharepool_addr() - Check if a user memory address belongs to share pool. * @addr: the userspace address to be checked. @@ -3696,7 +3702,7 @@ static bool is_sp_normal_addr(unsigned long addr) bool mg_is_sharepool_addr(unsigned long addr) { return sp_is_enabled() && - (is_sp_normal_addr(addr) || is_device_addr(addr)); + ((is_sp_normal_addr(addr) || is_sp_dvpp_addr(addr))); } EXPORT_SYMBOL_GPL(mg_is_sharepool_addr);
From: Zhang Zekun zhangzekun11@huawei.com
Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5LY4H CVE: NA
-----------------------------------------
Remove the sp_device_number, and we don't need 'sp_device_number' to detect the sp_device_number. Instead, we use maco 'MAX_DEVID' to take the place of sp_device_number.
Signed-off-by: Zhang Zekun zhangzekun11@huawei.com --- mm/share_pool.c | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 466c72c17717..51d16555269b 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -79,8 +79,6 @@ static const int mdc_default_group_id = 1;
static int system_group_count;
-static unsigned int sp_device_number; - /* idr of all sp_groups */ static DEFINE_IDR(sp_group_idr); /* rw semaphore for sp_group_idr and mm->sp_group_master */ @@ -372,7 +370,7 @@ static bool can_mappings_merge(struct sp_mapping *m1, struct sp_mapping *m2) { int i;
- for (i = 0; i < sp_device_number; i++) + for (i = 0; i < MAX_DEVID; i++) if (m1->start[i] != m2->start[i] || m1->end[i] != m2->end[i]) return false;
@@ -3619,7 +3617,7 @@ bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid)
/* NOTE: check the start address */ if (pid < 0 || size <= 0 || size > MMAP_SHARE_POOL_16G_SIZE || - device_id < 0 || device_id >= sp_device_number || !is_online_node_id(device_id)) + device_id < 0 || device_id >= MAX_DEVID || !is_online_node_id(device_id)) return false;
ret = get_task(pid, &tsk); @@ -3660,7 +3658,7 @@ static bool is_sp_normal_addr(unsigned long addr) { return addr >= MMAP_SHARE_POOL_START && addr < MMAP_SHARE_POOL_16G_START + - sp_device_number * MMAP_SHARE_POOL_16G_SIZE; + MAX_DEVID * MMAP_SHARE_POOL_16G_SIZE; }
static bool is_sp_dvpp_addr(unsigned long addr) @@ -4423,18 +4421,6 @@ static int __init enable_share_pool(char *s) } __setup("enable_ascend_share_pool", enable_share_pool);
-static void __init sp_device_number_detect(void) -{ - /* NOTE: TO BE COMPLETED */ - sp_device_number = 4; - - if (sp_device_number > MAX_DEVID) { - pr_warn("sp_device_number %d exceed, truncate it to %d\n", - sp_device_number, MAX_DEVID); - sp_device_number = MAX_DEVID; - } -} - static int __init share_pool_init(void) { if (!sp_is_enabled()) @@ -4445,7 +4431,6 @@ static int __init share_pool_init(void) goto fail; atomic_inc(&sp_mapping_normal->user);
- sp_device_number_detect(); proc_sharepool_init();
return 0;
From: Zhang Zekun zhangzekun11@huawei.com
Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5LY51 CVE: NA
----------------------------------------------
The variable enable_mdc_default_group has been deprecated, thus remove it and the corresponding code. The definition of is_process_in_group() can be ambiguous, thus change the return value type.
Signed-off-by: Zhang Zekun zhangzekun11@huawei.com --- mm/share_pool.c | 27 ++++----------------------- 1 file changed, 4 insertions(+), 23 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 51d16555269b..6269702227f9 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -73,10 +73,6 @@
#define PF_DOMAIN_CORE 0x10000000 /* AOS CORE processes in sched.h */
-/* mdc scene hack */ -static int __read_mostly enable_mdc_default_group; -static const int mdc_default_group_id = 1; - static int system_group_count;
/* idr of all sp_groups */ @@ -944,16 +940,16 @@ static int get_task(int pid, struct task_struct **task) * 1. hold spg->rw_lock * 2. ensure no concurrency problem for mm_struct */ -static struct sp_group_node *is_process_in_group(struct sp_group *spg, +static bool is_process_in_group(struct sp_group *spg, struct mm_struct *mm) { struct sp_group_node *spg_node;
list_for_each_entry(spg_node, &spg->procs, proc_node) if (spg_node->master->mm == mm) - return spg_node; + return true;
- return NULL; + return false; }
/* user must call sp_group_drop() after use */ @@ -1339,10 +1335,6 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) return -EINVAL; }
- /* mdc scene hack */ - if (enable_mdc_default_group) - spg_id = mdc_default_group_id; - if (spg_id < SPG_ID_MIN || spg_id > SPG_ID_AUTO) { pr_err_ratelimited("add group failed, invalid group id %d\n", spg_id); return -EINVAL; @@ -1614,7 +1606,7 @@ int mg_sp_group_del_task(int pid, int spg_id) goto out_put_task; }
- spg_node = is_process_in_group(spg, mm); + spg_node = find_spg_node_by_spg(mm, spg); if (!spg_node) { up_write(&sp_group_sem); pr_err_ratelimited("process not in group"); @@ -2254,10 +2246,6 @@ static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags,
check_interrupt_context();
- /* mdc scene hack */ - if (enable_mdc_default_group) - spg_id = mdc_default_group_id; - if (current->flags & PF_KTHREAD) { pr_err_ratelimited("allocation failed, task is kthread\n"); return -EINVAL; @@ -3720,13 +3708,6 @@ int sp_node_id(struct vm_area_struct *vma) return node_id; }
-static int __init mdc_default_group(char *s) -{ - enable_mdc_default_group = 1; - return 1; -} -__setup("enable_mdc_default_group", mdc_default_group); - /*** Statistical and maintenance functions ***/
static void get_mm_rss_info(struct mm_struct *mm, unsigned long *anon,
From: Zhang Zekun zhangzekun11@huawei.com
Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5LY2R CVE: NA
-------------------------------------------
Remove the meaningless comment in mg_sp_free() and the fix the bug in mg_sp_group_id_by_pid() parameter check path.
Signed-off-by: Zhang Zekun zhangzekun11@huawei.com --- mm/share_pool.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 6269702227f9..a8654ef0e784 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -1016,7 +1016,7 @@ int mg_sp_group_id_by_pid(int pid, int *spg_ids, int *num)
check_interrupt_context();
- if (!spg_ids || num <= 0) + if (!spg_ids || !num || *num <= 0) return -EINVAL;
ret = get_task(pid, &tsk); @@ -2179,7 +2179,6 @@ int mg_sp_free(unsigned long addr, int id)
sp_free_unmap_fallocate(fc.spa);
- /* current->mm == NULL: allow kthread */ if (current->mm == NULL) atomic64_sub(fc.spa->real_size, &kthread_stat.alloc_size); else
From: Guo Mengqi guomengqi3@huawei.com
Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5M3PS CVE: NA
--------------------------------
- fix SP_RES value incorrect bug - fix SP_RES_T value incorrect bug - fix pid field uninitialized error in pass-through scenario
Signed-off-by: Guo Mengqi guomengqi3@huawei.com --- mm/share_pool.c | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index a8654ef0e784..8cc75d996e0f 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -250,12 +250,14 @@ struct sp_group_node { #endif
/* The caller should hold mmap_sem to protect master (TBD) */ -static void sp_init_group_master_stat(struct mm_struct *mm, struct sp_proc_stat *stat) +static void sp_init_group_master_stat(int tgid, struct mm_struct *mm, + struct sp_proc_stat *stat) { atomic64_set(&stat->alloc_nsize, 0); atomic64_set(&stat->alloc_hsize, 0); atomic64_set(&stat->k2u_size, 0); stat->mm = mm; + stat->tgid = tgid; get_task_comm(stat->comm, current); }
@@ -484,7 +486,7 @@ static int sp_init_group_master_locked(struct task_struct *tsk, struct mm_struct INIT_LIST_HEAD(&master->node_list); master->count = 0; master->mm = mm; - sp_init_group_master_stat(mm, &master->instat); + sp_init_group_master_stat(tsk->tgid, mm, &master->instat); mm->sp_group_master = master;
mutex_lock(&master_list_lock); @@ -1420,7 +1422,6 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) up_write(&spg->rw_lock); goto out_drop_group; } - mm->sp_group_master->instat.tgid = tsk->tgid;
ret = sp_mapping_group_setup(mm, spg); if (ret) { @@ -3729,18 +3730,27 @@ static long get_proc_alloc(struct sp_proc_stat *stat) atomic64_read(&stat->alloc_hsize)); }
-static void get_process_sp_res(struct sp_proc_stat *stat, +static void get_process_sp_res(struct sp_group_master *master, long *sp_res_out, long *sp_res_nsize_out) { - *sp_res_out = byte2kb(atomic64_read(&stat->alloc_nsize) + - atomic64_read(&stat->alloc_hsize)); - *sp_res_nsize_out = byte2kb(atomic64_read(&stat->alloc_nsize)); + struct sp_group *spg; + struct sp_group_node *spg_node; + + *sp_res_out = 0; + *sp_res_nsize_out = 0; + + list_for_each_entry(spg_node, &master->node_list, group_node) { + spg = spg_node->spg; + *sp_res_out += byte2kb(atomic64_read(&spg->instat.alloc_nsize)); + *sp_res_out += byte2kb(atomic64_read(&spg->instat.alloc_hsize)); + *sp_res_nsize_out += byte2kb(atomic64_read(&spg->instat.alloc_nsize)); + } }
static long get_sp_res_by_spg_proc(struct sp_group_node *spg_node) { - return byte2kb(atomic64_read(&spg_node->instat.alloc_nsize) + - atomic64_read(&spg_node->instat.alloc_hsize)); + return byte2kb(atomic64_read(&spg_node->spg->instat.alloc_nsize) + + atomic64_read(&spg_node->spg->instat.alloc_hsize)); }
/* @@ -3805,7 +3815,7 @@ int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns,
get_mm_rss_info(mm, &anon, &file, &shmem, &total_rss); proc_stat = &master->instat; - get_process_sp_res(proc_stat, &sp_res, &sp_res_nsize); + get_process_sp_res(master, &sp_res, &sp_res_nsize); get_process_non_sp_res(total_rss, shmem, sp_res_nsize, &non_sp_res, &non_sp_shm);
@@ -4057,7 +4067,7 @@ static int proc_usage_by_group(int id, void *p, void *data) tgid = master->instat.tgid;
get_mm_rss_info(mm, &anon, &file, &shmem, &total_rss); - get_process_sp_res(&master->instat, &sp_res, &sp_res_nsize); + get_process_sp_res(master, &sp_res, &sp_res_nsize); get_process_non_sp_res(total_rss, shmem, sp_res_nsize, &non_sp_res, &non_sp_shm);
@@ -4118,7 +4128,7 @@ static int proc_usage_show(struct seq_file *seq, void *offset) list_for_each_entry(master, &master_list, list_node) { proc_stat = &master->instat; get_mm_rss_info(master->mm, &anon, &file, &shmem, &total_rss); - get_process_sp_res(&master->instat, &sp_res, &sp_res_nsize); + get_process_sp_res(master, &sp_res, &sp_res_nsize); get_process_non_sp_res(total_rss, shmem, sp_res_nsize, &non_sp_res, &non_sp_shm); seq_printf(seq, "%-8d %-16s %-9ld %-9ld %-9ld %-10ld %-10ld %-8ld\n",
From: Guo Mengqi guomengqi3@huawei.com
Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5MS48 CVE: NA
--------------------------------
Fix two bugs revealed by static check:
- Release the mm->mmap_lock when mm->sp_group_master had not been initialized. - Do not add mm to master list if there process add group failed.
Signed-off-by: Guo Mengqi guomengqi3@huawei.com --- mm/share_pool.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 8cc75d996e0f..4fb594491cce 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -500,6 +500,7 @@ static int sp_init_group_master_locked(struct task_struct *tsk, struct mm_struct return 0;
free_master: + list_del(&master->list_node); mm->sp_group_master = NULL; kfree(master);
@@ -3810,8 +3811,10 @@ int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns,
down_read(&mm->mmap_lock); master = mm->sp_group_master; - if (!master) + if (!master) { + up_read(&mm->mmap_lock); return 0; + }
get_mm_rss_info(mm, &anon, &file, &shmem, &total_rss); proc_stat = &master->instat;
From: Zhang Zekun zhangzekun11@huawei.com
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5XQS4 CVE: NA
-----------------------------------------------
In function get_process_sp_res(), spg_node can be freed by other process, the access to spg_node->spg can cause kernel panic. Add a pair of read lock to fix this problem. Fix the same problem in proc_sp_group_state().
Fixes: 3d37f8717287 ("[Huawei] mm: sharepool: use built-in-statistics") Signed-off-by: Zhang Zekun zhangzekun11@huawei.com --- mm/share_pool.c | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 4fb594491cce..aa0d16a7befe 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -3809,10 +3809,12 @@ int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, if (!mm) return 0;
+ down_read(&sp_group_sem); down_read(&mm->mmap_lock); master = mm->sp_group_master; if (!master) { up_read(&mm->mmap_lock); + up_read(&sp_group_sem); return 0; }
@@ -3847,6 +3849,7 @@ int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, seq_putc(m, '\n'); } up_read(&mm->mmap_lock); + up_read(&sp_group_sem); return 0; }
@@ -4127,6 +4130,7 @@ static int proc_usage_show(struct seq_file *seq, void *offset) "PID", "COMM", "SP_ALLOC", "SP_K2U", "SP_RES", "Non-SP_RES", "Non-SP_Shm", "VIRT");
+ down_read(&sp_group_sem); mutex_lock(&master_list_lock); list_for_each_entry(master, &master_list, list_node) { proc_stat = &master->instat; @@ -4142,6 +4146,7 @@ static int proc_usage_show(struct seq_file *seq, void *offset) page2kb(master->mm->total_vm)); } mutex_unlock(&master_list_lock); + up_read(&sp_group_sem);
return 0; }
From: Guo Mengqi guomengqi3@huawei.com
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5O5RQ CVE: NA
--------------------------------
Notice that in sp_unshare_uva(), for authentication check, comparison between current->tgid and spa->applier is well enough. There is no need to check current->mm against spa->mm.
Other redundant cases: - find_spg_node_by_spg() will never return NULL in current use context; - spg_info_show() will not come across a group with id 0.
Therefore, delete these redundant paths.
Signed-off-by: Guo Mengqi guomengqi3@huawei.com --- include/linux/share_pool.h | 1 - mm/share_pool.c | 43 ++++---------------------------------- 2 files changed, 4 insertions(+), 40 deletions(-)
diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index 4860e4b00e57..ebf4b10a0965 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -44,7 +44,6 @@ #define SPG_ID_LOCAL_MAX 299999
#define SPG_FLAG_NON_DVPP (1 << 0) -#define SPG_FLAG_MASK (SPG_FLAG_NON_DVPP)
#define MAX_DEVID 8 /* the max num of Da-vinci devices */
diff --git a/mm/share_pool.c b/mm/share_pool.c index aa0d16a7befe..963cac4734ac 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -799,11 +799,6 @@ static void spa_dec_usage(struct sp_area *spa) static void update_mem_usage(unsigned long size, bool inc, bool is_hugepage, struct sp_group_node *spg_node, enum spa_type type) { - if (unlikely(!spg_node)) { - WARN(1, "null sp group node\n"); - return; - } - switch (type) { case SPA_TYPE_ALLOC: update_mem_usage_alloc(size, inc, is_hugepage, spg_node); @@ -837,10 +832,7 @@ static void sp_update_process_stat(struct task_struct *tsk, bool inc, enum spa_type type = spa->type;
spg_node = find_spg_node_by_spg(tsk->mm, spa->spg); - if (!spg_node) - pr_err("share pool: spg node not found!\n"); - else - update_mem_usage(size, inc, spa->is_hugepage, spg_node, type); + update_mem_usage(size, inc, spa->is_hugepage, spg_node, type); }
static inline void check_interrupt_context(void) @@ -1086,11 +1078,6 @@ static struct sp_group *create_spg(int spg_id, unsigned long flag) return ERR_PTR(-ENOSPC); }
- if (flag & ~SPG_FLAG_MASK) { - pr_err_ratelimited("invalid flag:%#lx\n", flag); - return ERR_PTR(-EINVAL); - } - spg = kzalloc(sizeof(*spg), GFP_KERNEL); if (spg == NULL) return ERR_PTR(-ENOMEM); @@ -2743,10 +2730,7 @@ static void *sp_make_share_kva_to_task(unsigned long kva, unsigned long size, un pr_err("remap k2u to task failed %ld\n", PTR_ERR(uva)); else { spg_node = find_spg_node_by_spg(current->mm, spa->spg); - if (!spg_node) - pr_err("spg_node is null\n"); - else - update_mem_usage(size, true, spa->is_hugepage, spg_node, SPA_TYPE_K2TASK); + update_mem_usage(size, true, spa->is_hugepage, spg_node, SPA_TYPE_K2TASK); spa->mm = current->mm; }
@@ -3317,12 +3301,6 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size, int group_id) goto out_drop_area; }
- if (!spa->mm) { - pr_err_ratelimited("unshare uva(to task) failed, none spa owner\n"); - ret = -EINVAL; - goto out_drop_area; - } - /* * current thread may be exiting in a multithread process * @@ -3336,13 +3314,6 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size, int group_id) goto out_clr_flag; }
- if (spa->mm != mm) { - pr_err_ratelimited("unshare uva(to task) failed, spa not belong to the task\n"); - ret = -EINVAL; - mmput(mm); - goto out_drop_area; - } - down_write(&mm->mmap_lock); if (unlikely(mm->core_state)) { ret = 0; @@ -3980,10 +3951,7 @@ static int spg_info_show(int id, void *p, void *data) return 0;
if (seq != NULL) { - if (id == 0) - seq_puts(seq, "Non Group "); - else - seq_printf(seq, "Group %6d ", id); + seq_printf(seq, "Group %6d ", id);
down_read(&spg->rw_lock); seq_printf(seq, "size: %lld KB, spa num: %d, total alloc: %lld KB, normal alloc: %lld KB, huge alloc: %lld KB\n", @@ -3994,10 +3962,7 @@ static int spg_info_show(int id, void *p, void *data) byte2kb(atomic64_read(&spg->instat.alloc_hsize))); up_read(&spg->rw_lock); } else { - if (id == 0) - pr_info("Non Group "); - else - pr_info("Group %6d ", id); + pr_info("Group %6d ", id);
down_read(&spg->rw_lock); pr_info("size: %lld KB, spa num: %d, total alloc: %lld KB, normal alloc: %lld KB, huge alloc: %lld KB\n",
From: Guo Mengqi guomengqi3@huawei.com
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5ODCT CVE: NA
--------------------------------
When there are a large number of groups in the system, or with a large number of processes in each group, "cat /proc/sharepool/proc_stat" will encounter softlockup before all prints finished. This is because there are too many loops in the callback function. Remove one of the loops to reduce time cost and add a cond_resched() to avoid this.
root@buildroot:~/install# cat /proc/sharepool/proc_stat [ 1250.647469] watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [cat:309] [ 1250.648610] Modules linked in: sharepool_dev(OE) [ 1250.650795] CPU: 0 PID: 309 Comm: cat Tainted: G OE 5.10.0+ #43 [ 1250.651216] Hardware name: linux,dummy-virt (DT) [ 1250.651721] pstate: 80000005 (Nzcv daif -PAN -UAO -TCO BTYPE=--) [ 1250.652426] pc : get_process_sp_res+0x40/0x90 [ 1250.652747] lr : proc_usage_by_group+0x158/0x218 ... [ 1250.657903] Call trace: [ 1250.658376] get_process_sp_res+0x40/0x90 [ 1250.658602] proc_usage_by_group+0x158/0x218 [ 1250.658838] idr_for_each+0x6c/0xf0 [ 1250.659027] proc_group_usage_show+0x104/0x120 [ 1250.659263] seq_read_iter+0xe0/0x498 [ 1250.659462] proc_reg_read_iter+0xa8/0xe0 [ 1250.659660] generic_file_splice_read+0xf0/0x1b0 [ 1250.659865] do_splice_to+0x7c/0xd0 [ 1250.660029] splice_direct_to_actor+0xe0/0x2a8 [ 1250.660353] do_splice_direct+0xa4/0xf8 [ 1250.660902] do_sendfile+0x1bc/0x420 [ 1250.661079] __arm64_sys_sendfile64+0x170/0x178 [ 1250.661298] el0_svc_common.constprop.0+0x88/0x268 [ 1250.661505] do_el0_svc+0x34/0xb8 [ 1250.661686] el0_svc+0x1c/0x28 [ 1250.661836] el0_sync_handler+0x8c/0xb0 [ 1250.662033] el0_sync+0x168/0x180
Signed-off-by: Guo Mengqi guomengqi3@huawei.com --- mm/share_pool.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 963cac4734ac..4a6933a16551 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -4023,7 +4023,6 @@ static int proc_usage_by_group(int id, void *p, void *data) struct sp_group_master *master; int tgid; unsigned long anon, file, shmem, total_rss; - long sp_res, sp_res_nsize, non_sp_res, non_sp_shm;
down_read(&spg->rw_lock); list_for_each_entry(spg_node, &spg->procs, proc_node) { @@ -4038,26 +4037,20 @@ static int proc_usage_by_group(int id, void *p, void *data) tgid = master->instat.tgid;
get_mm_rss_info(mm, &anon, &file, &shmem, &total_rss); - get_process_sp_res(master, &sp_res, &sp_res_nsize); - get_process_non_sp_res(total_rss, shmem, sp_res_nsize, - &non_sp_res, &non_sp_shm);
seq_printf(seq, "%-8d ", tgid); - if (id == 0) - seq_printf(seq, "%-8c ", '-'); - else - seq_printf(seq, "%-8d ", id); - seq_printf(seq, "%-9ld %-9ld %-9ld %-10ld %-10ld %-8ld %-7ld %-7ld %-10ld ", + seq_printf(seq, "%-8d ", id); + seq_printf(seq, "%-9ld %-9ld %-9ld %-8ld %-7ld %-7ld ", get_spg_proc_alloc(spg_node), get_spg_proc_k2u(spg_node), get_sp_res_by_spg_proc(spg_node), - sp_res, non_sp_res, page2kb(mm->total_vm), page2kb(total_rss), - page2kb(shmem), non_sp_shm); + page2kb(shmem)); print_process_prot(seq, spg_node->prot); seq_putc(seq, '\n'); } up_read(&spg->rw_lock); + cond_resched();
return 0; } @@ -4068,9 +4061,9 @@ static int proc_group_usage_show(struct seq_file *seq, void *offset) spa_overview_show(seq);
/* print the file header */ - seq_printf(seq, "%-8s %-8s %-9s %-9s %-9s %-10s %-10s %-8s %-7s %-7s %-10s %-4s\n", - "PID", "Group_ID", "SP_ALLOC", "SP_K2U", "SP_RES", "SP_RES_T", - "Non-SP_RES", "VIRT", "RES", "Shm", "Non-SP_Shm", "PROT"); + seq_printf(seq, "%-8s %-8s %-9s %-9s %-9s %-8s %-7s %-7s %-4s\n", + "PID", "Group_ID", "SP_ALLOC", "SP_K2U", "SP_RES", + "VIRT", "RES", "Shm", "PROT"); /* print kthread buff_module_guard_work */ seq_printf(seq, "%-8s %-8s %-9lld %-9lld\n", "guard", "-",
From: Guo Mengqi guomengqi3@huawei.com
Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5OE1J CVE: NA
--------------------------------
The mutex protecting spm_dvpp_list has an ABBA deadlock with spg->rw_lock. Try add a process to a sharepool group and cat /proc/sharepool/spa_stat at the same time will reproduce the problem.
Remove spg->rw_lock to avoid this.
[ 1101.013480]INFO: task test:3567 blocked for more than 30 seconds. [ 1101.014378] Tainted: G OE 5.10.0+ #45 [ 1101.015707]task:test state:D stack: 0 pid: 3567 [ 1101.016464]Call trace: [ 1101.016736] __switch_to+0xc0/0x128 [ 1101.017082] __schedule+0x3fc/0x898 [ 1101.017626] schedule+0x48/0xd8 [ 1101.017981] schedule_preempt_disabled+0x14/0x20 [ 1101.018519] __mutex_lock.isra.1+0x160/0x638 [ 1101.018899] __mutex_lock_slowpath+0x24/0x30 [ 1101.019291] mutex_lock+0x5c/0x68 [ 1101.019607] sp_mapping_create+0x118/0x1b0 [ 1101.019963] sp_init_group_master_locked.part.9+0x10c/0x288 [ 1101.020356] mg_sp_group_add_task.part.16+0x7dc/0xcd0 [ 1101.020750] mg_sp_group_add_task+0x54/0xd0 [ 1101.021120] dev_ioctl+0x360/0x1e20 [sharepool_dev] [ 1101.022171] __arm64_sys_ioctl+0xb0/0xe8 [ 1101.022695] el0_svc_common.constprop.0+0x88/0x268 [ 1101.023143] do_el0_svc+0x34/0xb8 [ 1101.023487] el0_svc+0x1c/0x28 [ 1101.023775] el0_sync_handler+0x8c/0xb0 [ 1101.024120] el0_sync+0x168/0x180
Signed-off-by: Guo Mengqi guomengqi3@huawei.com --- mm/share_pool.c | 2 -- 1 file changed, 2 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 4a6933a16551..11e82c2a40e0 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -3838,12 +3838,10 @@ static void spa_stat_of_mapping_show(struct seq_file *seq, struct sp_mapping *sp atomic_inc(&spa->use_count); spin_unlock(&sp_area_lock);
- down_read(&spa->spg->rw_lock); if (spg_valid(spa->spg)) /* k2u to group */ seq_printf(seq, "%-10d ", spa->spg->id); else /* spg is dead */ seq_printf(seq, "%-10s ", "Dead"); - up_read(&spa->spg->rw_lock);
seq_printf(seq, "%2s%-14lx %2s%-14lx %-10ld ", "0x", spa->va_start,
From: Guo Mengqi guomengqi3@huawei.com
Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5OE1J CVE: NA
--------------------------------
Fix a deadlock indicated below:
[ 171.669844] Chain exists of: [ 171.669844] &mm->mmap_lock --> sp_group_sem --> &spg->rw_lock [ 171.669844] [ 171.671469] Possible unsafe locking scenario: [ 171.671469] [ 171.672121] CPU0 CPU1 [ 171.672415] ---- ---- [ 171.672706] lock(&spg->rw_lock); [ 171.673114] lock(sp_group_sem); [ 171.673706] lock(&spg->rw_lock); [ 171.674208] lock(&mm->mmap_lock); [ 171.674863] [ 171.674863] *** DEADLOCK ***
sharepool use lock in order: sp_group_sem --> &spg->rw_lock --> mm->mmap_lock However, in sp_check_mmap_addr(), when mm->mmap_lock is held, it requested sp_group_sem, which is: mm->mmap_lock --> sp_group_sem. This causes ABBA problem.
This happens in:
[ 171.642687] the existing dependency chain (in reverse order) is: [ 171.643745] [ 171.643745] -> #2 (&spg->rw_lock){++++}-{3:3}: [ 171.644639] __lock_acquire+0x6f4/0xc40 [ 171.645189] lock_acquire+0x2f0/0x3c8 [ 171.645631] down_read+0x64/0x2d8 [ 171.646075] proc_usage_by_group+0x50/0x258 (spg->rw_lock) [ 171.646542] idr_for_each+0x6c/0xf0 [ 171.647011] proc_group_usage_show+0x140/0x178 [ 171.647629] seq_read_iter+0xe4/0x498 [ 171.648217] proc_reg_read_iter+0xa8/0xe0 [ 171.648776] new_sync_read+0xfc/0x1a0 [ 171.649002] vfs_read+0x1ac/0x1c8 [ 171.649217] ksys_read+0x74/0xf8 [ 171.649596] __arm64_sys_read+0x24/0x30 [ 171.649934] el0_svc_common.constprop.0+0x8c/0x270 [ 171.650528] do_el0_svc+0x34/0xb8 [ 171.651069] el0_svc+0x1c/0x28 [ 171.651278] el0_sync_handler+0x8c/0xb0 [ 171.651636] el0_sync+0x168/0x180 [ 171.652118] [ 171.652118] -> #1 (sp_group_sem){++++}-{3:3}: [ 171.652692] __lock_acquire+0x6f4/0xc40 [ 171.653059] lock_acquire+0x2f0/0x3c8 [ 171.653303] down_read+0x64/0x2d8 [ 171.653704] mg_is_sharepool_addr+0x184/0x340 (&sp_group_sem) [ 171.654085] sp_check_mmap_addr+0x64/0x108 [ 171.654668] arch_get_unmapped_area_topdown+0x9c/0x528 [ 171.655370] thp_get_unmapped_area+0x54/0x68 [ 171.656170] get_unmapped_area+0x94/0x160 [ 171.656415] __do_mmap_mm+0xd4/0x540 [ 171.656629] do_mmap+0x98/0x648 [ 171.656838] vm_mmap_pgoff+0xc0/0x188 [ 171.657129] vm_mmap+0x6c/0x98 [ 171.657619] elf_map+0xe0/0x118 [ 171.657835] load_elf_binary+0x4ec/0xfd8 [ 171.658103] bprm_execve.part.9+0x3ec/0x840 [ 171.658448] bprm_execve+0x7c/0xb0 [ 171.658919] kernel_execve+0x18c/0x198 [ 171.659500] run_init_process+0xf0/0x108 [ 171.660073] try_to_run_init_process+0x20/0x58 [ 171.660558] kernel_init+0xcc/0x120 [ 171.660862] ret_from_fork+0x10/0x18 [ 171.661273] [ 171.661273] -> #0 (&mm->mmap_lock){++++}-{3:3}: [ 171.661885] check_prev_add+0xa4/0xbd8 [ 171.662229] validate_chain+0xf54/0x14b8 [ 171.662705] __lock_acquire+0x6f4/0xc40 [ 171.663310] lock_acquire+0x2f0/0x3c8 [ 171.663658] down_write+0x60/0x208 [ 171.664179] mg_sp_alloc+0x24c/0x1150 (mm->mmap_lock) [ 171.665245] dev_ioctl+0x1128/0x1fb8 [sharepool_dev] [ 171.665688] __arm64_sys_ioctl+0xb0/0xe8 [ 171.666250] el0_svc_common.constprop.0+0x8c/0x270 [ 171.667255] do_el0_svc+0x34/0xb8 [ 171.667806] el0_svc+0x1c/0x28 [ 171.668249] el0_sync_handler+0x8c/0xb0 [ 171.668661] el0_sync+0x168/0x180
Signed-off-by: Guo Mengqi guomengqi3@huawei.com --- mm/share_pool.c | 42 +++++++++++++++++------------------------- 1 file changed, 17 insertions(+), 25 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 11e82c2a40e0..0176076c30b1 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -73,6 +73,9 @@
#define PF_DOMAIN_CORE 0x10000000 /* AOS CORE processes in sched.h */
+#define MMAP_SHARE_POOL_DVPP_BASE 0x100000000000ULL +#define MMAP_SHARE_POOL_DVPP_END (MMAP_SHARE_POOL_DVPP_BASE + MMAP_SHARE_POOL_16G_SIZE * 64) + static int system_group_count;
/* idr of all sp_groups */ @@ -500,7 +503,9 @@ static int sp_init_group_master_locked(struct task_struct *tsk, struct mm_struct return 0;
free_master: + mutex_lock(&master_list_lock); list_del(&master->list_node); + mutex_unlock(&master_list_lock); mm->sp_group_master = NULL; kfree(master);
@@ -3550,6 +3555,7 @@ int sp_unregister_notifier(struct notifier_block *nb) } EXPORT_SYMBOL_GPL(sp_unregister_notifier);
+static bool is_sp_dvpp_addr(unsigned long addr); /** * mg_sp_config_dvpp_range() - User can config the share pool start address * of each Da-vinci device. @@ -3577,7 +3583,8 @@ bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid)
/* NOTE: check the start address */ if (pid < 0 || size <= 0 || size > MMAP_SHARE_POOL_16G_SIZE || - device_id < 0 || device_id >= MAX_DEVID || !is_online_node_id(device_id)) + device_id < 0 || device_id >= MAX_DEVID || !is_online_node_id(device_id) + || !is_sp_dvpp_addr(start) || !is_sp_dvpp_addr(start + size)) return false;
ret = get_task(pid, &tsk); @@ -3621,34 +3628,19 @@ static bool is_sp_normal_addr(unsigned long addr) MAX_DEVID * MMAP_SHARE_POOL_16G_SIZE; }
+/* + * | 16G host | 16G device | ... | | + * ^ + * | + * MMAP_SHARE_POOL_DVPP_BASE + 16G * 64 + * We only check the device regions. + */ static bool is_sp_dvpp_addr(unsigned long addr) { - int i; - struct mm_struct *mm; - struct sp_group_master *master; - struct sp_mapping *spm_dvpp; - - mm = current->mm; - if (!mm) + if (addr < MMAP_SHARE_POOL_DVPP_BASE || addr >= MMAP_SHARE_POOL_DVPP_END) return false;
- down_read(&sp_group_sem); - master = mm->sp_group_master; - if (!master) { - up_read(&sp_group_sem); - return false; - } - - /* master->local and master->local->dvpp won't be NULL*/ - spm_dvpp = master->local->dvpp; - for (i = 0; i < MAX_DEVID; i++) { - if (addr >= spm_dvpp->start[i] && addr < spm_dvpp->end[i]) { - up_read(&sp_group_sem); - return true; - } - } - up_read(&sp_group_sem); - return false; + return (addr - MMAP_SHARE_POOL_DVPP_BASE) & MMAP_SHARE_POOL_16G_SIZE; }
/**
Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5PD4P CVE: NA
--------------------------------
[ 2058.802818][ T290] BUG: KASAN: use-after-free in get_process_sp_res+0x70/0x134 [ 2058.810194][ T290] Read of size 8 at addr ffff00088dc6ab28 by task test_debug_loop/290 [ 2058.820520][ T290] CPU: 5 PID: 290 Comm: test_debug_loop Tainted: G W OE 5.10.0+ #2 [ 2058.829377][ T290] Hardware name: EVB(EP) (DT) [ 2058.833982][ T290] Call trace: [ 2058.837217][ T290] dump_backtrace+0x0/0x30c [ 2058.841660][ T290] show_stack+0x20/0x30 [ 2058.845758][ T290] dump_stack+0x120/0x1b0 [ 2058.850028][ T290] print_address_description.constprop.0+0x2c/0x1fc [ 2058.856555][ T290] __kasan_report+0xfc/0x160 [ 2058.861086][ T290] kasan_report+0x44/0xb0 [ 2058.865356][ T290] __asan_load8+0x94/0xd0 [ 2058.869623][ T290] get_process_sp_res+0x70/0x134 [ 2058.874501][ T290] proc_usage_show+0x1ac/0x304 [ 2058.879208][ T290] seq_read_iter+0x254/0x750 [ 2058.883728][ T290] proc_reg_read_iter+0x100/0x140 [ 2058.888689][ T290] new_sync_read+0x1cc/0x2c0 [ 2058.893215][ T290] vfs_read+0x1f4/0x250 [ 2058.897304][ T290] ksys_read+0xcc/0x170 [ 2058.901399][ T290] __arm64_sys_read+0x4c/0x60 [ 2058.906016][ T290] el0_svc_common.constprop.0+0xb4/0x2a0 [ 2058.911584][ T290] do_el0_svc+0x8c/0xb0 [ 2058.915677][ T290] el0_svc+0x20/0x30 [ 2058.919503][ T290] el0_sync_handler+0xb0/0xbc [ 2058.924114][ T290] el0_sync+0x180/0x1c0 [ 2058.928190][ T290] [ 2058.930444][ T290] Allocated by task 2176: [ 2058.934714][ T290] kasan_save_stack+0x28/0x60 [ 2058.939328][ T290] __kasan_kmalloc.constprop.0+0xc8/0xf0 [ 2058.944909][ T290] kasan_kmalloc+0x10/0x20 [ 2058.949268][ T290] kmem_cache_alloc_trace+0x128/0xabc [ 2058.954577][ T290] create_spg_node+0x58/0x214 [ 2058.959188][ T290] local_group_add_task+0x30/0x14c [ 2058.964231][ T290] init_local_group+0xd0/0x1a0 [ 2058.968936][ T290] sp_init_group_master_locked.part.0+0x19c/0x290 [ 2058.975298][ T290] mg_sp_group_add_task+0x73c/0xdb0 [ 2058.980456][ T290] dev_sp_add_group+0x124/0x2dc [sharepool_dev] [ 2058.986647][ T290] dev_ioctl+0x21c/0x2ec [sharepool_dev] [ 2058.992222][ T290] __arm64_sys_ioctl+0xd8/0x120 [ 2058.997010][ T290] el0_svc_common.constprop.0+0xb4/0x2a0 [ 2059.002572][ T290] do_el0_svc+0x8c/0xb0 [ 2059.006662][ T290] el0_svc+0x20/0x30 [ 2059.010489][ T290] el0_sync_handler+0xb0/0xbc [ 2059.015101][ T290] el0_sync+0x180/0x1c0 [ 2059.019176][ T290] [ 2059.021427][ T290] Freed by task 4125: [ 2059.025343][ T290] kasan_save_stack+0x28/0x60 [ 2059.029949][ T290] kasan_set_track+0x28/0x40 [ 2059.034476][ T290] kasan_set_free_info+0x24/0x50 [ 2059.039347][ T290] __kasan_slab_free+0x104/0x1ac [ 2059.044227][ T290] kasan_slab_free+0x14/0x20 [ 2059.048744][ T290] kfree+0x164/0xb94 [ 2059.052576][ T290] sp_group_post_exit+0xf0/0x980 [ 2059.057448][ T290] mmput.part.0+0xb4/0x220 [ 2059.061790][ T290] mmput+0x2c/0x40 [ 2059.065450][ T290] exit_mm+0x27c/0x3a0 [ 2059.069450][ T290] do_exit+0x2a0/0x790 [ 2059.073448][ T290] do_group_exit+0x64/0x100 [ 2059.077884][ T290] get_signal+0x1fc/0x9fc [ 2059.082144][ T290] do_signal+0x110/0x2cc [ 2059.086320][ T290] do_notify_resume+0x158/0x2b0 [ 2059.091108][ T290] work_pending+0xc/0x6d4 [ 2059.095358][ T290]
Signed-off-by: Wang Wensheng wangwensheng4@huawei.com --- mm/share_pool.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 0176076c30b1..6942084a56d3 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -4345,6 +4345,7 @@ void sp_group_post_exit(struct mm_struct *mm) /* match with refcount inc in sp_group_add_task */ if (atomic_dec_and_test(&spg->use_count)) free_sp_group_locked(spg); + list_del(&spg_node->group_node); kfree(spg_node); } up_write(&sp_group_sem);
From: Zhou Guanghui zhouguanghui1@huawei.com
Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5PIA2 CVE: NA
--------------------------------
When a process is added to a group, mm->mm_users increases by one. When a process is deleted from a group, mm->mm_users decreases by one. It is not possible to reduce to 0 because this function is preceded by get_task_mm.
Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com --- mm/share_pool.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 6942084a56d3..5b55168fee08 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -1621,11 +1621,7 @@ int mg_sp_group_del_task(int pid, int spg_id) list_del(&spg_node->group_node); mm->sp_group_master->count--; kfree(spg_node); - if (atomic_sub_and_test(1, &mm->mm_users)) { - up_write(&sp_group_sem); - WARN(1, "Invalid user counting\n"); - return -EINVAL; - } + atomic_dec(&mm->mm_users);
up_write(&sp_group_sem);
From: Zhou Guanghui zhouguanghui1@huawei.com
Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5PIA0 CVE: NA
--------------------------------
The spa is used during the update_mem_usage. In this case, the spa has been released in the case of concurrency (mg_sp_unshare).
Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com --- mm/share_pool.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 5b55168fee08..7d08239b7401 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -2726,7 +2726,6 @@ static void *sp_make_share_kva_to_task(unsigned long kva, unsigned long size, un spa->kva = kva; kc.sp_flags = sp_flags; uva = (void *)sp_remap_kva_to_vma(kva, spa, current->mm, prot, &kc); - __sp_area_drop(spa); if (IS_ERR(uva)) pr_err("remap k2u to task failed %ld\n", PTR_ERR(uva)); else { @@ -2734,6 +2733,7 @@ static void *sp_make_share_kva_to_task(unsigned long kva, unsigned long size, un update_mem_usage(size, true, spa->is_hugepage, spg_node, SPA_TYPE_K2TASK); spa->mm = current->mm; } + __sp_area_drop(spa);
return uva; } @@ -2785,9 +2785,9 @@ static void *sp_make_share_kva_to_spg(unsigned long kva, unsigned long size,
out: up_read(&spg->rw_lock); - __sp_area_drop(spa); if (!IS_ERR(uva)) sp_update_process_stat(current, true, spa); + __sp_area_drop(spa);
return uva; }
From: Zhou Guanghui zhouguanghui1@huawei.com
Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5PIA4 CVE: NA
--------------------------------
The maximum value of spg_id is checked to ensure that the value of spg_id is within the valid range: SPG_ID_DEFAULT or [SPG_ID_MIN SPG_ID_AUTO)
Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com --- mm/share_pool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 7d08239b7401..b9e720d23c72 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -2245,7 +2245,7 @@ static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags, return -EINVAL; }
- if (spg_id != SPG_ID_DEFAULT && spg_id < SPG_ID_MIN) { + if (spg_id != SPG_ID_DEFAULT && (spg_id < SPG_ID_MIN || spg_id >= SPG_ID_AUTO)) { pr_err_ratelimited("allocation failed, invalid group id %d\n", spg_id); return -EINVAL; }
From: Zhou Guanghui zhouguanghui1@huawei.com
Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5PIA6 CVE: NA
--------------------------------
Use get_task_mm to avoid the mm being released when the information in mm_struct is used.
Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com --- mm/share_pool.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index b9e720d23c72..5b4c4ae6e933 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -3755,7 +3755,7 @@ static void print_process_prot(struct seq_file *seq, unsigned long prot) int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { - struct mm_struct *mm = task->mm; + struct mm_struct *mm; struct sp_group_master *master; struct sp_proc_stat *proc_stat; struct sp_group_node *spg_node; @@ -3765,17 +3765,15 @@ int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, if (!sp_is_enabled()) return 0;
+ mm = get_task_mm(task); if (!mm) return 0;
down_read(&sp_group_sem); down_read(&mm->mmap_lock); master = mm->sp_group_master; - if (!master) { - up_read(&mm->mmap_lock); - up_read(&sp_group_sem); - return 0; - } + if (!master) + goto out;
get_mm_rss_info(mm, &anon, &file, &shmem, &total_rss); proc_stat = &master->instat; @@ -3807,8 +3805,11 @@ int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, print_process_prot(m, spg_node->prot); seq_putc(m, '\n'); } + +out: up_read(&mm->mmap_lock); up_read(&sp_group_sem); + mmput(mm); return 0; }
From: Chen Jun chenjun102@huawei.com
Offering: HULK hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5I72Q CVE: NA
--------------------------------
"TASK_SIZE - MMAP_SHARE_POOL_DVPP_SIZE" is puzzling.
MMAP_SHARE_POOL_START = MMAP_SHARE_POOL_END - MMAP_SHARE_POOL_SIZE and MMAP_SHARE_POOL_16G_START = MMAP_SHARE_POOL_END - MMAP_SHARE_POOL_DVPP_SIZE make the memory layout not unintuitive.
Signed-off-by: Chen Jun chenjun102@huawei.com Signed-off-by: Wang Wensheng wangwensheng4@huawei.com --- include/linux/share_pool.h | 18 +++++++++++++----- mm/share_pool.c | 33 ++++++++++++++------------------- 2 files changed, 27 insertions(+), 24 deletions(-)
diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index ebf4b10a0965..b5fa0d4d59e0 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -213,11 +213,19 @@ struct sp_walk_data { #define MMAP_SHARE_POOL_DVPP_SIZE 0x80000000000UL /* 16G size */ #define MMAP_SHARE_POOL_16G_SIZE 0x400000000UL -#define MMAP_SHARE_POOL_SIZE (MMAP_SHARE_POOL_NORMAL_SIZE + MMAP_SHARE_POOL_DVPP_SIZE) -/* align to 2M hugepage size, and MMAP_SHARE_POOL_TOP_16G_START should be align to 16G */ -#define MMAP_SHARE_POOL_END ((TASK_SIZE - MMAP_SHARE_POOL_DVPP_SIZE) & ~((1 << 21) - 1)) -#define MMAP_SHARE_POOL_START (MMAP_SHARE_POOL_END - MMAP_SHARE_POOL_SIZE) -#define MMAP_SHARE_POOL_16G_START (MMAP_SHARE_POOL_END - MMAP_SHARE_POOL_DVPP_SIZE) +/* skip 8T for stack */ +#define MMAP_SHARE_POOL_SKIP 0x80000000000UL +#define MMAP_SHARE_POOL_END (TASK_SIZE - MMAP_SHARE_POOL_SKIP) +#define MMAP_SHARE_POLL_DVPP_END (MMAP_SHARE_POOL_END) +/* MMAP_SHARE_POOL_DVPP_START should be align to 16G */ +#define MMAP_SHARE_POOL_DVPP_START (MMAP_SHARE_POLL_DVPP_END - MMAP_SHARE_POOL_DVPP_SIZE) +#define MMAP_SHARE_POOL_NORMAL_END (MMAP_SHARE_POOL_DVPP_START) +#define MMAP_SHARE_POOL_NORMAL_START (MMAP_SHARE_POOL_NORMAL_END - MMAP_SHARE_POOL_NORMAL_SIZE) +#define MMAP_SHARE_POOL_START (MMAP_SHARE_POOL_NORMAL_START) + +#define MMAP_SHARE_POOL_DYNAMIC_DVPP_BASE 0x100000000000ULL +#define MMAP_SHARE_POOL_DYNAMIC_DVPP_END (MMAP_SHARE_POOL_DYNAMIC_DVPP_BASE + \ + MMAP_SHARE_POOL_16G_SIZE * 64)
#ifdef CONFIG_ASCEND_SHARE_POOL
diff --git a/mm/share_pool.c b/mm/share_pool.c index 5b4c4ae6e933..2620fc94a92b 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -73,9 +73,6 @@
#define PF_DOMAIN_CORE 0x10000000 /* AOS CORE processes in sched.h */
-#define MMAP_SHARE_POOL_DVPP_BASE 0x100000000000ULL -#define MMAP_SHARE_POOL_DVPP_END (MMAP_SHARE_POOL_DVPP_BASE + MMAP_SHARE_POOL_16G_SIZE * 64) - static int system_group_count;
/* idr of all sp_groups */ @@ -290,12 +287,12 @@ static void sp_mapping_range_init(struct sp_mapping *spm)
for (i = 0; i < MAX_DEVID; i++) { if (spm->flag & SP_MAPPING_NORMAL) { - spm->start[i] = MMAP_SHARE_POOL_START; - spm->end[i] = MMAP_SHARE_POOL_16G_START; + spm->start[i] = MMAP_SHARE_POOL_NORMAL_START; + spm->end[i] = MMAP_SHARE_POOL_NORMAL_END; continue; }
- spm->start[i] = MMAP_SHARE_POOL_16G_START + + spm->start[i] = MMAP_SHARE_POOL_DVPP_START + i * MMAP_SHARE_POOL_16G_SIZE; spm->end[i] = spm->start[i] + MMAP_SHARE_POOL_16G_SIZE; } @@ -1854,7 +1851,7 @@ static struct sp_area *__find_sp_area_locked(struct sp_group *spg, { struct rb_node *n;
- if (addr >= MMAP_SHARE_POOL_START && addr < MMAP_SHARE_POOL_16G_START) + if (addr >= MMAP_SHARE_POOL_NORMAL_START && addr < MMAP_SHARE_POOL_NORMAL_END) n = spg->normal->area_root.rb_node; else n = spg->dvpp->area_root.rb_node; @@ -1910,7 +1907,7 @@ static void sp_free_area(struct sp_area *spa)
lockdep_assert_held(&sp_area_lock);
- if (addr >= MMAP_SHARE_POOL_START && addr < MMAP_SHARE_POOL_16G_START) + if (addr >= MMAP_SHARE_POOL_NORMAL_START && addr < MMAP_SHARE_POOL_NORMAL_END) spm = spa->spg->normal; else spm = spa->spg->dvpp; @@ -3551,7 +3548,7 @@ int sp_unregister_notifier(struct notifier_block *nb) } EXPORT_SYMBOL_GPL(sp_unregister_notifier);
-static bool is_sp_dvpp_addr(unsigned long addr); +static bool is_sp_dynamic_dvpp_addr(unsigned long addr); /** * mg_sp_config_dvpp_range() - User can config the share pool start address * of each Da-vinci device. @@ -3580,7 +3577,7 @@ bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) /* NOTE: check the start address */ if (pid < 0 || size <= 0 || size > MMAP_SHARE_POOL_16G_SIZE || device_id < 0 || device_id >= MAX_DEVID || !is_online_node_id(device_id) - || !is_sp_dvpp_addr(start) || !is_sp_dvpp_addr(start + size)) + || !is_sp_dynamic_dvpp_addr(start) || !is_sp_dynamic_dvpp_addr(start + size)) return false;
ret = get_task(pid, &tsk); @@ -3596,7 +3593,7 @@ bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) goto put_mm;
spm = spg->dvpp; - default_start = MMAP_SHARE_POOL_16G_START + device_id * MMAP_SHARE_POOL_16G_SIZE; + default_start = MMAP_SHARE_POOL_DVPP_START + device_id * MMAP_SHARE_POOL_16G_SIZE; /* The dvpp range of each group can be configured only once */ if (spm->start[device_id] != default_start) goto put_spg; @@ -3617,11 +3614,9 @@ bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) } EXPORT_SYMBOL_GPL(mg_sp_config_dvpp_range);
-static bool is_sp_normal_addr(unsigned long addr) +static bool is_sp_reserve_addr(unsigned long addr) { - return addr >= MMAP_SHARE_POOL_START && - addr < MMAP_SHARE_POOL_16G_START + - MAX_DEVID * MMAP_SHARE_POOL_16G_SIZE; + return addr >= MMAP_SHARE_POOL_START && addr < MMAP_SHARE_POOL_END; }
/* @@ -3631,12 +3626,12 @@ static bool is_sp_normal_addr(unsigned long addr) * MMAP_SHARE_POOL_DVPP_BASE + 16G * 64 * We only check the device regions. */ -static bool is_sp_dvpp_addr(unsigned long addr) +static bool is_sp_dynamic_dvpp_addr(unsigned long addr) { - if (addr < MMAP_SHARE_POOL_DVPP_BASE || addr >= MMAP_SHARE_POOL_DVPP_END) + if (addr < MMAP_SHARE_POOL_DYNAMIC_DVPP_BASE || addr >= MMAP_SHARE_POOL_DYNAMIC_DVPP_END) return false;
- return (addr - MMAP_SHARE_POOL_DVPP_BASE) & MMAP_SHARE_POOL_16G_SIZE; + return (addr - MMAP_SHARE_POOL_DYNAMIC_DVPP_BASE) & MMAP_SHARE_POOL_16G_SIZE; }
/** @@ -3648,7 +3643,7 @@ static bool is_sp_dvpp_addr(unsigned long addr) bool mg_is_sharepool_addr(unsigned long addr) { return sp_is_enabled() && - ((is_sp_normal_addr(addr) || is_sp_dvpp_addr(addr))); + ((is_sp_reserve_addr(addr) || is_sp_dynamic_dvpp_addr(addr))); } EXPORT_SYMBOL_GPL(mg_is_sharepool_addr);
From: Chen Jun chenjun102@huawei.com
Offering: HULK hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5I72Q CVE: NA
--------------------------------
Now, sp_mapping.flag is only used to distinguish sp_mapping types. So, 'type' are more suitable.
Signed-off-by: Chen Jun chenjun102@huawei.com --- mm/share_pool.c | 53 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 38 insertions(+), 15 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 2620fc94a92b..9c254d25e7ef 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -151,7 +151,7 @@ struct spg_proc_stat { * address space management */ struct sp_mapping { - unsigned long flag; + unsigned long type; atomic_t user; unsigned long start[MAX_DEVID]; unsigned long end[MAX_DEVID]; @@ -263,12 +263,23 @@ static void sp_init_group_master_stat(int tgid, struct mm_struct *mm,
#define SP_MAPPING_DVPP 0x1 #define SP_MAPPING_NORMAL 0x2 + +static unsigned long sp_mapping_type(struct sp_mapping *spm) +{ + return spm->type; +} + +static void sp_mapping_set_type(struct sp_mapping *spm, unsigned long type) +{ + spm->type = type; +} + static struct sp_mapping *sp_mapping_normal;
static void sp_mapping_add_to_list(struct sp_mapping *spm) { mutex_lock(&spm_list_lock); - if (spm->flag & SP_MAPPING_DVPP) + if (sp_mapping_type(spm) == SP_MAPPING_DVPP) list_add_tail(&spm->spm_node, &spm_dvpp_list); mutex_unlock(&spm_list_lock); } @@ -276,7 +287,7 @@ static void sp_mapping_add_to_list(struct sp_mapping *spm) static void sp_mapping_remove_from_list(struct sp_mapping *spm) { mutex_lock(&spm_list_lock); - if (spm->flag & SP_MAPPING_DVPP) + if (sp_mapping_type(spm) == SP_MAPPING_DVPP) list_del(&spm->spm_node); mutex_unlock(&spm_list_lock); } @@ -286,19 +297,23 @@ static void sp_mapping_range_init(struct sp_mapping *spm) int i;
for (i = 0; i < MAX_DEVID; i++) { - if (spm->flag & SP_MAPPING_NORMAL) { + switch (sp_mapping_type(spm)) { + case SP_MAPPING_NORMAL: spm->start[i] = MMAP_SHARE_POOL_NORMAL_START; - spm->end[i] = MMAP_SHARE_POOL_NORMAL_END; - continue; + spm->end[i] = MMAP_SHARE_POOL_NORMAL_END; + break; + case SP_MAPPING_DVPP: + spm->start[i] = MMAP_SHARE_POOL_DVPP_START + i * MMAP_SHARE_POOL_16G_SIZE; + spm->end[i] = spm->start[i] + MMAP_SHARE_POOL_16G_SIZE; + break; + default: + pr_err("Invalid sp_mapping type [%lu]\n", sp_mapping_type(spm)); + break; } - - spm->start[i] = MMAP_SHARE_POOL_DVPP_START + - i * MMAP_SHARE_POOL_16G_SIZE; - spm->end[i] = spm->start[i] + MMAP_SHARE_POOL_16G_SIZE; } }
-static struct sp_mapping *sp_mapping_create(unsigned long flag) +static struct sp_mapping *sp_mapping_create(unsigned long type) { struct sp_mapping *spm;
@@ -306,7 +321,7 @@ static struct sp_mapping *sp_mapping_create(unsigned long flag) if (!spm) return ERR_PTR(-ENOMEM);
- spm->flag = flag; + sp_mapping_set_type(spm, type); sp_mapping_range_init(spm); atomic_set(&spm->user, 0); spm->area_root = RB_ROOT; @@ -325,18 +340,26 @@ static void sp_mapping_destroy(struct sp_mapping *spm) static void sp_mapping_attach(struct sp_group *spg, struct sp_mapping *spm) { atomic_inc(&spm->user); - if (spm->flag & SP_MAPPING_DVPP) { + + switch (sp_mapping_type(spm)) { + case SP_MAPPING_DVPP: spg->dvpp = spm; list_add_tail(&spg->mnode, &spm->group_head); - } else if (spm->flag & SP_MAPPING_NORMAL) + break; + case SP_MAPPING_NORMAL: spg->normal = spm; + break; + default: + break; + } }
static void sp_mapping_detach(struct sp_group *spg, struct sp_mapping *spm) { if (!spm) return; - if (spm->flag & SP_MAPPING_DVPP) + + if (sp_mapping_type(spm) == SP_MAPPING_DVPP) list_del(&spg->mnode); if (atomic_dec_and_test(&spm->user)) sp_mapping_destroy(spm);
From: Chen Jun chenjun102@huawei.com
Offering: HULK hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5I72Q CVE: NA
--------------------------------
spg->dvpp and spg->normal can be combined into one array.
Signed-off-by: Chen Jun chenjun102@huawei.com --- mm/share_pool.c | 79 +++++++++++++++++++++++++++---------------------- 1 file changed, 44 insertions(+), 35 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 9c254d25e7ef..bce2f0fa3b65 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -147,6 +147,13 @@ struct spg_proc_stat { atomic64_t k2u_size; };
+enum sp_mapping_type { + SP_MAPPING_START, + SP_MAPPING_DVPP = SP_MAPPING_START, + SP_MAPPING_NORMAL, + SP_MAPPING_END, +}; + /* * address space management */ @@ -208,8 +215,7 @@ struct sp_group { struct rw_semaphore rw_lock; /* list node for dvpp mapping */ struct list_head mnode; - struct sp_mapping *dvpp; - struct sp_mapping *normal; + struct sp_mapping *mapping[SP_MAPPING_END]; };
/* a per-process(per mm) struct which manages a sp_group_node list */ @@ -261,9 +267,6 @@ static void sp_init_group_master_stat(int tgid, struct mm_struct *mm, get_task_comm(stat->comm, current); }
-#define SP_MAPPING_DVPP 0x1 -#define SP_MAPPING_NORMAL 0x2 - static unsigned long sp_mapping_type(struct sp_mapping *spm) { return spm->type; @@ -339,30 +342,29 @@ static void sp_mapping_destroy(struct sp_mapping *spm)
static void sp_mapping_attach(struct sp_group *spg, struct sp_mapping *spm) { + unsigned long type = sp_mapping_type(spm); atomic_inc(&spm->user);
- switch (sp_mapping_type(spm)) { - case SP_MAPPING_DVPP: - spg->dvpp = spm; + spg->mapping[type] = spm; + if (type == SP_MAPPING_DVPP) list_add_tail(&spg->mnode, &spm->group_head); - break; - case SP_MAPPING_NORMAL: - spg->normal = spm; - break; - default: - break; - } }
static void sp_mapping_detach(struct sp_group *spg, struct sp_mapping *spm) { + unsigned long type; + if (!spm) return;
- if (sp_mapping_type(spm) == SP_MAPPING_DVPP) + type = sp_mapping_type(spm); + + if (type == SP_MAPPING_DVPP) list_del(&spg->mnode); if (atomic_dec_and_test(&spm->user)) sp_mapping_destroy(spm); + + spg->mapping[type] = NULL; }
/* merge old mapping to new, and the old mapping would be destroyed */ @@ -375,7 +377,7 @@ static void sp_mapping_merge(struct sp_mapping *new, struct sp_mapping *old)
list_for_each_entry_safe(spg, tmp, &old->group_head, mnode) { list_move_tail(&spg->mnode, &new->group_head); - spg->dvpp = new; + spg->mapping[SP_MAPPING_DVPP] = new; }
atomic_add(atomic_read(&old->user), &new->user); @@ -409,8 +411,10 @@ static bool can_mappings_merge(struct sp_mapping *m1, struct sp_mapping *m2) */ static int sp_mapping_group_setup(struct mm_struct *mm, struct sp_group *spg) { - struct sp_group_master *master = mm->sp_group_master; - struct sp_group *local = master->local; + struct sp_mapping *local_dvpp_mapping, *spg_dvpp_mapping; + + local_dvpp_mapping = mm->sp_group_master->local->mapping[SP_MAPPING_DVPP]; + spg_dvpp_mapping = spg->mapping[SP_MAPPING_DVPP];
if (!list_empty(&spg->procs) && !(spg->flag & SPG_FLAG_NON_DVPP)) { /* @@ -419,14 +423,14 @@ static int sp_mapping_group_setup(struct mm_struct *mm, struct sp_group *spg) * This may change the address range for the task or group implicitly, * give a warn for it. */ - bool is_conflict = !can_mappings_merge(local->dvpp, spg->dvpp); + bool is_conflict = !can_mappings_merge(local_dvpp_mapping, spg_dvpp_mapping);
- if (is_mapping_empty(local->dvpp)) { - sp_mapping_merge(spg->dvpp, local->dvpp); + if (is_mapping_empty(local_dvpp_mapping)) { + sp_mapping_merge(spg_dvpp_mapping, local_dvpp_mapping); if (is_conflict) pr_warn_ratelimited("task address space conflict, spg_id=%d\n", spg->id); - } else if (is_mapping_empty(spg->dvpp)) { - sp_mapping_merge(local->dvpp, spg->dvpp); + } else if (is_mapping_empty(spg_dvpp_mapping)) { + sp_mapping_merge(local_dvpp_mapping, spg_dvpp_mapping); if (is_conflict) pr_warn_ratelimited("group address space conflict, spg_id=%d\n", spg->id); } else { @@ -436,8 +440,8 @@ static int sp_mapping_group_setup(struct mm_struct *mm, struct sp_group *spg) } else { if (!(spg->flag & SPG_FLAG_NON_DVPP)) /* the mapping of local group is always set */ - sp_mapping_attach(spg, local->dvpp); - if (!spg->normal) + sp_mapping_attach(spg, local_dvpp_mapping); + if (!spg->mapping[SP_MAPPING_NORMAL]) sp_mapping_attach(spg, sp_mapping_normal); }
@@ -912,14 +916,19 @@ static void free_new_spg_id(bool new, int spg_id)
static void free_sp_group_locked(struct sp_group *spg) { + int type; + fput(spg->file); fput(spg->file_hugetlb); idr_remove(&sp_group_idr, spg->id); free_sp_group_id((unsigned int)spg->id); - sp_mapping_detach(spg, spg->dvpp); - sp_mapping_detach(spg, spg->normal); + + for (type = SP_MAPPING_START; type < SP_MAPPING_END; type++) + sp_mapping_detach(spg, spg->mapping[type]); + if (!is_local_group(spg->id)) system_group_count--; + kfree(spg); WARN(system_group_count < 0, "unexpected group count\n"); } @@ -1744,9 +1753,9 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, }
if (flags & SP_DVPP) - mapping = spg->dvpp; + mapping = spg->mapping[SP_MAPPING_DVPP]; else - mapping = spg->normal; + mapping = spg->mapping[SP_MAPPING_NORMAL];
if (!mapping) { pr_err_ratelimited("non DVPP spg, id %d\n", spg->id); @@ -1875,9 +1884,9 @@ static struct sp_area *__find_sp_area_locked(struct sp_group *spg, struct rb_node *n;
if (addr >= MMAP_SHARE_POOL_NORMAL_START && addr < MMAP_SHARE_POOL_NORMAL_END) - n = spg->normal->area_root.rb_node; + n = spg->mapping[SP_MAPPING_NORMAL]->area_root.rb_node; else - n = spg->dvpp->area_root.rb_node; + n = spg->mapping[SP_MAPPING_DVPP]->area_root.rb_node;
while (n) { struct sp_area *spa; @@ -1931,9 +1940,9 @@ static void sp_free_area(struct sp_area *spa) lockdep_assert_held(&sp_area_lock);
if (addr >= MMAP_SHARE_POOL_NORMAL_START && addr < MMAP_SHARE_POOL_NORMAL_END) - spm = spa->spg->normal; + spm = spa->spg->mapping[SP_MAPPING_NORMAL]; else - spm = spa->spg->dvpp; + spm = spa->spg->mapping[SP_MAPPING_DVPP];
if (spm->free_area_cache) { struct sp_area *cache; @@ -3615,7 +3624,7 @@ bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) if (IS_ERR(spg)) goto put_mm;
- spm = spg->dvpp; + spm = spg->mapping[SP_MAPPING_DVPP]; default_start = MMAP_SHARE_POOL_DVPP_START + device_id * MMAP_SHARE_POOL_16G_SIZE; /* The dvpp range of each group can be configured only once */ if (spm->start[device_id] != default_start)
From: Chen Jun chenjun102@huawei.com
Offering: HULK hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5I72Q CVE: NA
--------------------------------
Extract code logic of obtaining sp_mapping by address into a function sp_mapping_find.
Signed-off-by: Chen Jun chenjun102@huawei.com --- mm/share_pool.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index bce2f0fa3b65..ad37a7a7d0c5 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -448,6 +448,15 @@ static int sp_mapping_group_setup(struct mm_struct *mm, struct sp_group *spg) return 0; }
+static inline struct sp_mapping *sp_mapping_find(struct sp_group *spg, + unsigned long addr) +{ + if (addr >= MMAP_SHARE_POOL_NORMAL_START && addr < MMAP_SHARE_POOL_NORMAL_END) + return spg->mapping[SP_MAPPING_NORMAL]; + + return spg->mapping[SP_MAPPING_DVPP]; +} + static struct sp_group *create_spg(int spg_id, unsigned long flag); static void free_new_spg_id(bool new, int spg_id); static void free_sp_group_locked(struct sp_group *spg); @@ -1881,13 +1890,8 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, static struct sp_area *__find_sp_area_locked(struct sp_group *spg, unsigned long addr) { - struct rb_node *n; - - if (addr >= MMAP_SHARE_POOL_NORMAL_START && addr < MMAP_SHARE_POOL_NORMAL_END) - n = spg->mapping[SP_MAPPING_NORMAL]->area_root.rb_node; - else - n = spg->mapping[SP_MAPPING_DVPP]->area_root.rb_node; - + struct sp_mapping *spm = sp_mapping_find(spg, addr); + struct rb_node *n = spm->area_root.rb_node; while (n) { struct sp_area *spa;
@@ -1939,11 +1943,7 @@ static void sp_free_area(struct sp_area *spa)
lockdep_assert_held(&sp_area_lock);
- if (addr >= MMAP_SHARE_POOL_NORMAL_START && addr < MMAP_SHARE_POOL_NORMAL_END) - spm = spa->spg->mapping[SP_MAPPING_NORMAL]; - else - spm = spa->spg->mapping[SP_MAPPING_DVPP]; - + spm = sp_mapping_find(spa->spg, addr); if (spm->free_area_cache) { struct sp_area *cache;
From: Chen Jun chenjun102@huawei.com
Offering: HULK hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5I72Q CVE: NA
--------------------------------
1. Split sharepool normal area(8T) to sharepool readonly area(64G) and sharepool normal area(8T - 64G) 2. User programs can not write to the address in sharepool readonly area. 3. Add SP_PROT_FOCUS for sp_alloc. 4. sp_alloc with SP_PROT_RO | SP_PROT_FOCUS returns the virtual address within sharepool readonly area. 5. Other user programs which add into task with write prot can not write the address in sharepool readonly area.
Signed-off-by: Chen Jun chenjun102@huawei.com --- include/linux/share_pool.h | 36 +++++++++++++++++++++----------- mm/share_pool.c | 42 +++++++++++++++++++++++++++++++++++--- 2 files changed, 63 insertions(+), 15 deletions(-)
diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index b5fa0d4d59e0..1432aaa08087 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -17,6 +17,11 @@ #define SP_DVPP (1 << 2) #define SP_SPEC_NODE_ID (1 << 3) #define SP_PROT_RO (1 << 16) +/* + * SP_PROT_FOCUS should used with SP_PROT_RO, + * to alloc a memory within sharepool ro memory. + */ +#define SP_PROT_FOCUS (1 << 17)
#define DEVICE_ID_BITS 4UL #define DEVICE_ID_MASK ((1UL << DEVICE_ID_BITS) - 1UL) @@ -26,7 +31,7 @@ #define NODE_ID_SHIFT (DEVICE_ID_SHIFT + DEVICE_ID_BITS)
#define SP_FLAG_MASK (SP_HUGEPAGE | SP_HUGEPAGE_ONLY | SP_DVPP | \ - SP_SPEC_NODE_ID | SP_PROT_RO | \ + SP_SPEC_NODE_ID | SP_PROT_RO | SP_PROT_FOCUS | \ (DEVICE_ID_MASK << DEVICE_ID_SHIFT) | \ (NODE_ID_MASK << NODE_ID_SHIFT))
@@ -113,19 +118,22 @@ struct sp_mapping { /* Processes in the same sp_group can share memory. * Memory layout for share pool: * - * |-------------------- 8T -------------------|---|------ 8T ------------| - * | Device 0 | Device 1 |...| | - * |----------------------------------------------------------------------| - * |------------- 16G -------------| 16G | | | - * | DVPP GROUP0 | DVPP GROUP1 | ... | ... |...| sp normal memory | - * | sp | sp | | | | | - * |----------------------------------------------------------------------| + * |-------------------- 8T -------------------|---|---64G---|----- 8T-64G -----| + * | Device 0 | Device 1 |...| | | + * |-----------------------------------------------|---------|------------------| + * |------------- 16G -------------| 16G | | | | + * | DVPP GROUP0 | DVPP GROUP1 | ... | ... |...| sp ro | sp normal memory | + * | sp | sp | | | | | | + * |----------------------------------------------------------------------------| * * The host SVM feature reserves 8T virtual memory by mmap, and due to the * restriction of DVPP, while SVM and share pool will both allocate memory * for DVPP, the memory have to be in the same 32G range. * - * Share pool reserves 16T memory, with 8T for normal uses and 8T for DVPP. + * Share pool reserves 16T memory, 8T-64G for normal uses, 64G for ro memory + * and 8T for DVPP. + * Within this 64G ro memory, user application will never have write permission + * to this memory address. * Within this 8T DVPP memory, SVM will call sp_config_dvpp_range() to * tell us which 16G memory range is reserved for share pool . * @@ -207,8 +215,10 @@ struct sp_walk_data {
#define MMAP_TOP_4G_SIZE 0x100000000UL
-/* 8T size */ -#define MMAP_SHARE_POOL_NORMAL_SIZE 0x80000000000UL +/* 8T - 64G size */ +#define MMAP_SHARE_POOL_NORMAL_SIZE 0x7F000000000UL +/* 64G */ +#define MMAP_SHARE_POOL_RO_SIZE 0x1000000000UL /* 8T size*/ #define MMAP_SHARE_POOL_DVPP_SIZE 0x80000000000UL /* 16G size */ @@ -219,7 +229,9 @@ struct sp_walk_data { #define MMAP_SHARE_POLL_DVPP_END (MMAP_SHARE_POOL_END) /* MMAP_SHARE_POOL_DVPP_START should be align to 16G */ #define MMAP_SHARE_POOL_DVPP_START (MMAP_SHARE_POLL_DVPP_END - MMAP_SHARE_POOL_DVPP_SIZE) -#define MMAP_SHARE_POOL_NORMAL_END (MMAP_SHARE_POOL_DVPP_START) +#define MMAP_SHARE_POOL_RO_END (MMAP_SHARE_POOL_DVPP_START) +#define MMAP_SHARE_POOL_RO_START (MMAP_SHARE_POOL_RO_END - MMAP_SHARE_POOL_RO_SIZE) +#define MMAP_SHARE_POOL_NORMAL_END (MMAP_SHARE_POOL_RO_START) #define MMAP_SHARE_POOL_NORMAL_START (MMAP_SHARE_POOL_NORMAL_END - MMAP_SHARE_POOL_NORMAL_SIZE) #define MMAP_SHARE_POOL_START (MMAP_SHARE_POOL_NORMAL_START)
diff --git a/mm/share_pool.c b/mm/share_pool.c index ad37a7a7d0c5..ffaec8d250bb 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -151,6 +151,7 @@ enum sp_mapping_type { SP_MAPPING_START, SP_MAPPING_DVPP = SP_MAPPING_START, SP_MAPPING_NORMAL, + SP_MAPPING_RO, SP_MAPPING_END, };
@@ -278,6 +279,7 @@ static void sp_mapping_set_type(struct sp_mapping *spm, unsigned long type) }
static struct sp_mapping *sp_mapping_normal; +static struct sp_mapping *sp_mapping_ro;
static void sp_mapping_add_to_list(struct sp_mapping *spm) { @@ -301,6 +303,10 @@ static void sp_mapping_range_init(struct sp_mapping *spm)
for (i = 0; i < MAX_DEVID; i++) { switch (sp_mapping_type(spm)) { + case SP_MAPPING_RO: + spm->start[i] = MMAP_SHARE_POOL_RO_START; + spm->end[i] = MMAP_SHARE_POOL_RO_END; + break; case SP_MAPPING_NORMAL: spm->start[i] = MMAP_SHARE_POOL_NORMAL_START; spm->end[i] = MMAP_SHARE_POOL_NORMAL_END; @@ -443,6 +449,8 @@ static int sp_mapping_group_setup(struct mm_struct *mm, struct sp_group *spg) sp_mapping_attach(spg, local_dvpp_mapping); if (!spg->mapping[SP_MAPPING_NORMAL]) sp_mapping_attach(spg, sp_mapping_normal); + if (!spg->mapping[SP_MAPPING_RO]) + sp_mapping_attach(spg, sp_mapping_ro); }
return 0; @@ -454,6 +462,9 @@ static inline struct sp_mapping *sp_mapping_find(struct sp_group *spg, if (addr >= MMAP_SHARE_POOL_NORMAL_START && addr < MMAP_SHARE_POOL_NORMAL_END) return spg->mapping[SP_MAPPING_NORMAL];
+ if (addr >= MMAP_SHARE_POOL_RO_START && addr < MMAP_SHARE_POOL_RO_END) + return spg->mapping[SP_MAPPING_RO]; + return spg->mapping[SP_MAPPING_DVPP]; }
@@ -489,6 +500,7 @@ static int init_local_group(struct mm_struct *mm) } sp_mapping_attach(master->local, spm); sp_mapping_attach(master->local, sp_mapping_normal); + sp_mapping_attach(master->local, sp_mapping_ro);
ret = local_group_add_task(mm, spg); if (ret < 0) @@ -1483,6 +1495,10 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) unsigned long populate = 0; struct file *file = spa_file(spa); unsigned long addr; + unsigned long __prot = prot; + + if ((spa->flags & (SP_PROT_RO | SP_PROT_FOCUS)) == (SP_PROT_RO | SP_PROT_FOCUS)) + __prot &= ~PROT_WRITE;
__sp_area_drop_locked(prev); prev = spa; @@ -1495,7 +1511,7 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) spin_unlock(&sp_area_lock);
if (spa->type == SPA_TYPE_K2SPG && spa->kva) { - addr = sp_remap_kva_to_vma(spa->kva, spa, mm, prot, NULL); + addr = sp_remap_kva_to_vma(spa->kva, spa, mm, __prot, NULL); if (IS_ERR_VALUE(addr)) pr_warn("add group remap k2u failed %ld\n", addr);
@@ -1513,7 +1529,7 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) break; }
- addr = sp_mmap(mm, file, spa, &populate, prot, NULL); + addr = sp_mmap(mm, file, spa, &populate, __prot, NULL); if (IS_ERR_VALUE(addr)) { sp_munmap_task_areas(mm, spg, &spa->link); up_write(&mm->mmap_lock); @@ -1761,7 +1777,13 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, return ERR_PTR(-EINVAL); }
- if (flags & SP_DVPP) + if (flags & SP_PROT_FOCUS) { + if ((flags & (SP_DVPP | SP_PROT_RO)) != SP_PROT_RO) { + pr_err("invalid sp_flags [%lx]\n", flags); + return ERR_PTR(-EINVAL); + } + mapping = spg->mapping[SP_MAPPING_RO]; + } else if (flags & SP_DVPP) mapping = spg->mapping[SP_MAPPING_DVPP]; else mapping = spg->mapping[SP_MAPPING_NORMAL]; @@ -3893,6 +3915,11 @@ static void spa_stat_of_mapping_show(struct seq_file *seq, struct sp_mapping *sp spin_unlock(&sp_area_lock); }
+static void spa_ro_stat_show(struct seq_file *seq) +{ + spa_stat_of_mapping_show(seq, sp_mapping_ro); +} + static void spa_normal_stat_show(struct seq_file *seq) { spa_stat_of_mapping_show(seq, sp_mapping_normal); @@ -4023,6 +4050,7 @@ static int spa_stat_show(struct seq_file *seq, void *offset) /* print the file header */ seq_printf(seq, "%-10s %-16s %-16s %-10s %-7s %-5s %-8s %-8s\n", "Group ID", "va_start", "va_end", "Size(KB)", "Type", "Huge", "PID", "Ref"); + spa_ro_stat_show(seq); spa_normal_stat_show(seq); spa_dvpp_stat_show(seq); return 0; @@ -4402,9 +4430,17 @@ static int __init share_pool_init(void) goto fail; atomic_inc(&sp_mapping_normal->user);
+ sp_mapping_ro = sp_mapping_create(SP_MAPPING_RO); + if (IS_ERR(sp_mapping_ro)) + goto free_normal; + atomic_inc(&sp_mapping_ro->user); + proc_sharepool_init();
return 0; + +free_normal: + kfree(sp_mapping_normal); fail: pr_err("Ascend share pool initialization failed\n"); static_branch_disable(&share_pool_enabled_key);
From: Zhou Guanghui zhouguanghui1@huawei.com
Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5PZDX CVE: NA
--------------------------------
We could determine if a userspace map is huge-mapped after walking its pagetable. So the uva_align should be calculated again after walking the pagetable if it is huge-mapped.
Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com --- mm/share_pool.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/mm/share_pool.c b/mm/share_pool.c index ffaec8d250bb..0534987fcadc 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -3188,6 +3188,9 @@ static int __sp_walk_page_range(unsigned long uva, unsigned long size, sp_walk_data->pages = NULL; }
+ if (sp_walk_data->is_hugepage) + sp_walk_data->uva_aligned = ALIGN_DOWN(uva, PMD_SIZE); + return ret; }
From: Guo Mengqi guomengqi3@huawei.com
Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5QETC CVE: NA
--------------------------------
sp_make_share_k2u only supports vmalloc address now. Therefore, delete a backup handle case.
Also master is guaranteed not be freed until master->node_list is emptied.
Signed-off-by: Guo Mengqi guomengqi3@huawei.com --- mm/share_pool.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 0534987fcadc..938ee9a90aeb 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -2657,12 +2657,11 @@ static int is_vmap_hugepage(unsigned long addr)
static unsigned long __sp_remap_get_pfn(unsigned long kva) { - unsigned long pfn; + unsigned long pfn = -EINVAL;
+ /* sp_make_share_k2u only support vmalloc address */ if (is_vmalloc_addr((void *)kva)) pfn = vmalloc_to_pfn((void *)kva); - else - pfn = virt_to_pfn(kva);
return pfn; } @@ -4073,11 +4072,6 @@ static int proc_usage_by_group(int id, void *p, void *data) list_for_each_entry(spg_node, &spg->procs, proc_node) {
master = spg_node->master; - if (!master) { - pr_info("master is NULL! process %d, group %d\n", - spg_node->instat.tgid, id); - continue; - } mm = master->mm; tgid = master->instat.tgid;
From: Guo Mengqi guomengqi3@huawei.com
Offering: HULK hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5R0X9 CVE: NA
--------------------------------
Fix a AA deadlock caused by nested lock in mg_sp_group_add_task().
Deadlock path:
mg_sp_group_add_task()
down_write(sp_group_sem) find_or_alloc_sp_group() !spg_valid() sp_group_drop() free_sp_group() -> down_write(sp_group_sem) ---> AA deadlock
Signed-off-by: Guo Mengqi guomengqi3@huawei.com --- mm/share_pool.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 938ee9a90aeb..f1f29bc07361 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -961,6 +961,14 @@ static void free_sp_group(struct sp_group *spg) up_write(&sp_group_sem); }
+static void sp_group_drop_locked(struct sp_group *spg) +{ + lockdep_assert_held_write(&sp_group_sem); + + if (atomic_dec_and_test(&spg->use_count)) + free_sp_group_locked(spg); +} + static void sp_group_drop(struct sp_group *spg) { if (atomic_dec_and_test(&spg->use_count)) @@ -1199,7 +1207,7 @@ static struct sp_group *find_or_alloc_sp_group(int spg_id, unsigned long flag) down_read(&spg->rw_lock); if (!spg_valid(spg)) { up_read(&spg->rw_lock); - sp_group_drop(spg); + sp_group_drop_locked(spg); return ERR_PTR(-ENODEV); } up_read(&spg->rw_lock);
From: Guo Mengqi guomengqi3@huawei.com
Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5QQPG CVE: NA
--------------------------------
Add a size-0-check in mg_sp_make_share_k2u() to avoid passing 0-size spa to __insert_sp_area().
Signed-off-by: Guo Mengqi guomengqi3@huawei.com --- mm/share_pool.c | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/mm/share_pool.c b/mm/share_pool.c index f1f29bc07361..09e0b247bddb 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -2870,6 +2870,11 @@ static int sp_k2u_prepare(unsigned long kva, unsigned long size, unsigned int page_size = PAGE_SIZE; unsigned long kva_aligned, size_aligned;
+ if (!size) { + pr_err_ratelimited("k2u input size is 0.\n"); + return -EINVAL; + } + if (sp_flags & ~SP_FLAG_MASK) { pr_err_ratelimited("k2u sp_flags %lx error\n", sp_flags); return -EINVAL;
From: Guo Mengqi guomengqi3@huawei.com
Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5RO2H CVE: NA
--------------------------------
When nr_hugepages is configured, sharepool allocates hugepages first from hugetlb pool, then from buddy system if the pool had been used up. Current page release function treat the buddy system hugepages as hugetlb pages, which caused HugePages_Rsvd to increase improperly.
Add a check in page release function: if the page is temporary, do not call hugetlb_unreserve_pages.
Signed-off-by: Guo Mengqi guomengqi3@huawei.com --- fs/hugetlbfs/inode.c | 19 +++++++++++++------ mm/share_pool.c | 3 +-- 2 files changed, 14 insertions(+), 8 deletions(-)
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 8a87d1b43387..379aa008514d 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -597,11 +597,17 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, */ VM_BUG_ON(HPageRestoreReserve(page)); remove_huge_page(page); - freed++; - if (!truncate_op) { - if (unlikely(hugetlb_unreserve_pages(inode, - index, index + 1, 1))) - hugetlb_fix_reserve_counts(inode); + /* + * if the page is from buddy system, do not add to freed. + * because freed is used for hugetlbfs reservation accounting. + */ + if (!HPageTemporary(page)) { + freed++; + if (!truncate_op) { + if (unlikely(hugetlb_unreserve_pages(inode, + index, index + 1, 1))) + hugetlb_fix_reserve_counts(inode); + } }
unlock_page(page); @@ -1054,7 +1060,8 @@ static int hugetlbfs_error_remove_page(struct address_space *mapping, pgoff_t index = page->index;
remove_huge_page(page); - if (unlikely(hugetlb_unreserve_pages(inode, index, index + 1, 1))) + if (!HPageTemporary(page) && + unlikely(hugetlb_unreserve_pages(inode, index, index + 1, 1))) hugetlb_fix_reserve_counts(inode);
return 0; diff --git a/mm/share_pool.c b/mm/share_pool.c index 09e0b247bddb..42a63b9beabc 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -4224,8 +4224,7 @@ vm_fault_t sharepool_no_page(struct mm_struct *mm,
page = alloc_huge_page(vma, haddr, 0); if (IS_ERR(page)) { - page = alloc_huge_page_nodemask(hstate_file(vma->vm_file), - node_id, NULL, GFP_KERNEL); + page = hugetlb_alloc_hugepage(node_id, HUGETLB_ALLOC_BUDDY); if (!page) page = ERR_PTR(-ENOMEM); }
From: Zhang Zekun zhangzekun11@huawei.com
Offering: HULK hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5XQS4 CVE: NA
-----------------------------------------
1. Remove the inline clause before sp_mapping_find(). 2. Do not declare or define reserved identifiers. 3. Add brackets in if, elese/elseif statements. 4. The pointer(*) can't have no spaces neither before nor after it. 5. Use parentheses to specify the sequence of expressions in sp_remap_kva_to_vma(), sp_node_id(), init_local_group(). 6. Besides, change the name of __find_sp_area() to get_sp_area() to represent that this function need not to be called with lock hold and implicit that this function will increase the use_count.
Signed-off-by: Zhang Zekun zhangzekun11@huawei.com --- mm/share_pool.c | 54 +++++++++++++++++++++++++------------------------ 1 file changed, 28 insertions(+), 26 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 42a63b9beabc..7ad6efc8f1bc 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -216,7 +216,7 @@ struct sp_group { struct rw_semaphore rw_lock; /* list node for dvpp mapping */ struct list_head mnode; - struct sp_mapping *mapping[SP_MAPPING_END]; + struct sp_mapping *mapping[SP_MAPPING_END]; };
/* a per-process(per mm) struct which manages a sp_group_node list */ @@ -364,7 +364,6 @@ static void sp_mapping_detach(struct sp_group *spg, struct sp_mapping *spm) return;
type = sp_mapping_type(spm); - if (type == SP_MAPPING_DVPP) list_del(&spg->mnode); if (atomic_dec_and_test(&spm->user)) @@ -456,7 +455,7 @@ static int sp_mapping_group_setup(struct mm_struct *mm, struct sp_group *spg) return 0; }
-static inline struct sp_mapping *sp_mapping_find(struct sp_group *spg, +static struct sp_mapping *sp_mapping_find(struct sp_group *spg, unsigned long addr) { if (addr >= MMAP_SHARE_POOL_NORMAL_START && addr < MMAP_SHARE_POOL_NORMAL_END) @@ -504,7 +503,7 @@ static int init_local_group(struct mm_struct *mm)
ret = local_group_add_task(mm, spg); if (ret < 0) - /* The spm would be released while destroying the spg*/ + /* The spm would be released while destroying the spg */ goto free_spg;
return 0; @@ -1503,10 +1502,10 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) unsigned long populate = 0; struct file *file = spa_file(spa); unsigned long addr; - unsigned long __prot = prot; + unsigned long prot_spa = prot;
if ((spa->flags & (SP_PROT_RO | SP_PROT_FOCUS)) == (SP_PROT_RO | SP_PROT_FOCUS)) - __prot &= ~PROT_WRITE; + prot_spa &= ~PROT_WRITE;
__sp_area_drop_locked(prev); prev = spa; @@ -1519,7 +1518,7 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) spin_unlock(&sp_area_lock);
if (spa->type == SPA_TYPE_K2SPG && spa->kva) { - addr = sp_remap_kva_to_vma(spa->kva, spa, mm, __prot, NULL); + addr = sp_remap_kva_to_vma(spa->kva, spa, mm, prot_spa, NULL); if (IS_ERR_VALUE(addr)) pr_warn("add group remap k2u failed %ld\n", addr);
@@ -1537,7 +1536,7 @@ int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) break; }
- addr = sp_mmap(mm, file, spa, &populate, __prot, NULL); + addr = sp_mmap(mm, file, spa, &populate, prot_spa, NULL); if (IS_ERR_VALUE(addr)) { sp_munmap_task_areas(mm, spg, &spa->link); up_write(&mm->mmap_lock); @@ -1705,7 +1704,7 @@ int mg_sp_id_of_current(void) if (!sp_is_enabled()) return -EOPNOTSUPP;
- if (current->flags & PF_KTHREAD || !current->mm) + if ((current->flags & PF_KTHREAD) || !current->mm) return -EINVAL;
down_read(&sp_group_sem); @@ -1791,10 +1790,11 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, return ERR_PTR(-EINVAL); } mapping = spg->mapping[SP_MAPPING_RO]; - } else if (flags & SP_DVPP) + } else if (flags & SP_DVPP) { mapping = spg->mapping[SP_MAPPING_DVPP]; - else + } else { mapping = spg->mapping[SP_MAPPING_NORMAL]; + }
if (!mapping) { pr_err_ratelimited("non DVPP spg, id %d\n", spg->id); @@ -1938,7 +1938,7 @@ static struct sp_area *__find_sp_area_locked(struct sp_group *spg, return NULL; }
-static struct sp_area *__find_sp_area(struct sp_group *spg, unsigned long addr) +static struct sp_area *get_sp_area(struct sp_group *spg, unsigned long addr) { struct sp_area *n;
@@ -2144,7 +2144,7 @@ static int sp_free_get_spa(struct sp_free_context *fc)
fc->state = FREE_CONT;
- spa = __find_sp_area(spg, addr); + spa = get_sp_area(spg, addr); sp_group_drop(spg); if (!spa) { pr_debug("sp free invalid input addr %lx\n", addr); @@ -2233,7 +2233,7 @@ int mg_sp_free(unsigned long addr, int id) else sp_update_process_stat(current, false, fc.spa);
- __sp_area_drop(fc.spa); /* match __find_sp_area in sp_free_get_spa */ + __sp_area_drop(fc.spa); /* match get_sp_area in sp_free_get_spa */ out: return ret; } @@ -2518,8 +2518,9 @@ static int __sp_alloc_mmap_populate(struct mm_struct *mm, struct sp_area *spa, else pr_warn_ratelimited("allocation failed due to mm populate failed(potential no enough memory when -12): %d\n", ret); - } else + } else { ac->need_fallocate = true; + } return ret; }
@@ -2693,7 +2694,7 @@ static unsigned long sp_remap_kva_to_vma(unsigned long kva, struct sp_area *spa, goto put_mm; }
- if (kc && kc->sp_flags & SP_PROT_RO) + if (kc && (kc->sp_flags & SP_PROT_RO)) prot = PROT_READ;
ret_addr = sp_mmap(mm, spa_file(spa), spa, &populate, prot, &vma); @@ -2963,9 +2964,9 @@ void *mg_sp_make_share_k2u(unsigned long kva, unsigned long size, if (ret) return ERR_PTR(ret);
- if (kc.to_task) + if (kc.to_task) { uva = sp_make_share_kva_to_task(kc.kva_aligned, kc.size_aligned, kc.sp_flags); - else { + } else { struct sp_group *spg;
spg = __sp_find_spg(current->pid, kc.spg_id); @@ -2978,8 +2979,9 @@ void *mg_sp_make_share_k2u(unsigned long kva, unsigned long size, } uva = sp_make_share_kva_to_spg(kc.kva_aligned, kc.size_aligned, kc.sp_flags, spg); sp_group_drop(spg); - } else + } else { uva = ERR_PTR(-ENODEV); + } }
out: @@ -3003,8 +3005,9 @@ static int sp_pmd_entry(pmd_t *pmd, unsigned long addr, if (!sp_walk_data->is_page_type_set) { sp_walk_data->is_page_type_set = true; sp_walk_data->is_hugepage = true; - } else if (!sp_walk_data->is_hugepage) + } else if (!sp_walk_data->is_hugepage) { return -EFAULT; + }
/* To skip pte level walk */ walk->action = ACTION_CONTINUE; @@ -3330,9 +3333,9 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size, int group_id) * at first we guess it's a hugepage addr * we can tolerate at most PMD_SIZE or PAGE_SIZE which is matched in k2u */ - spa = __find_sp_area(spg, ALIGN_DOWN(uva, PMD_SIZE)); + spa = get_sp_area(spg, ALIGN_DOWN(uva, PMD_SIZE)); if (!spa) { - spa = __find_sp_area(spg, ALIGN_DOWN(uva, PAGE_SIZE)); + spa = get_sp_area(spg, ALIGN_DOWN(uva, PAGE_SIZE)); if (!spa) { ret = -EINVAL; pr_debug("invalid input uva %lx in unshare uva\n", (unsigned long)uva); @@ -3572,9 +3575,9 @@ int mg_sp_walk_page_range(unsigned long uva, unsigned long size, }
down_write(&mm->mmap_lock); - if (likely(!mm->core_state)) + if (likely(!mm->core_state)) { ret = __sp_walk_page_range(uva, size, mm, sp_walk_data); - else { + } else { pr_err("walk page range: encoutered coredump\n"); ret = -ESRCH; } @@ -3724,7 +3727,7 @@ int sp_node_id(struct vm_area_struct *vma) if (!sp_is_enabled()) return node_id;
- if (vma && vma->vm_flags & VM_SHARE_POOL && vma->vm_private_data) { + if (vma && (vma->vm_flags & VM_SHARE_POOL) && vma->vm_private_data) { spa = vma->vm_private_data; node_id = spa->node_id; } @@ -4083,7 +4086,6 @@ static int proc_usage_by_group(int id, void *p, void *data)
down_read(&spg->rw_lock); list_for_each_entry(spg_node, &spg->procs, proc_node) { - master = spg_node->master; mm = master->mm; tgid = master->instat.tgid;
From: Zhang Zekun zhangzekun11@huawei.com
Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5XQS4 CVE: NA
----------------------------------------------
Renaming __insert_sp_area to insert_sp_area. Renaming __find_sp_area_locked to find_sp_area_locked.
Fix this by renaming __insert_sp_area to insert_sp_area.
Signed-off-by: Zhang Zekun zhangzekun11@huawei.com --- mm/share_pool.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 7ad6efc8f1bc..96b2b1d248ff 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -1731,7 +1731,7 @@ int mg_sp_id_of_current(void) EXPORT_SYMBOL_GPL(mg_sp_id_of_current);
/* the caller must hold sp_area_lock */ -static void __insert_sp_area(struct sp_mapping *spm, struct sp_area *spa) +static void insert_sp_area(struct sp_mapping *spm, struct sp_area *spa) { struct rb_node **p = &spm->area_root.rb_node; struct rb_node *parent = NULL; @@ -1902,7 +1902,7 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, spa->device_id = device_id;
spa_inc_usage(spa); - __insert_sp_area(mapping, spa); + insert_sp_area(mapping, spa); mapping->free_area_cache = &spa->rb_node; list_add_tail(&spa->link, &spg->spa_list);
@@ -1917,7 +1917,7 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, }
/* the caller should hold sp_area_lock */ -static struct sp_area *__find_sp_area_locked(struct sp_group *spg, +static struct sp_area *find_sp_area_locked(struct sp_group *spg, unsigned long addr) { struct sp_mapping *spm = sp_mapping_find(spg, addr); @@ -1943,7 +1943,7 @@ static struct sp_area *get_sp_area(struct sp_group *spg, unsigned long addr) struct sp_area *n;
spin_lock(&sp_area_lock); - n = __find_sp_area_locked(spg, addr); + n = find_sp_area_locked(spg, addr); if (n) atomic_inc(&n->use_count); spin_unlock(&sp_area_lock); @@ -2038,7 +2038,7 @@ void sp_area_drop(struct vm_area_struct *vma) * Considering a situation where task A and B are in the same spg. * A is exiting and calling remove_vma() -> ... -> sp_area_drop(). * Concurrently, B is calling sp_free() to free the same spa. - * __find_sp_area_locked() and __sp_area_drop_locked() should be + * find_sp_area_locked() and __sp_area_drop_locked() should be * an atomic operation. */ spin_lock(&sp_area_lock);
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5XQS4 CVE: NA
--------------------------------
When we allocate memory using SP_HUGEPAGE, we would try normal pages when there was no enough hugepages. The specified numa node information would get lost when we fallback to normal pages. The result is that we could allocate memory from other numa node than what we have specified.
The soultion is to rebind the node before retrying.
Signed-off-by: Wang Wensheng wangwensheng4@huawei.com --- mm/share_pool.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 96b2b1d248ff..a119d8bf542d 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -2622,8 +2622,15 @@ void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) }
ret = sp_alloc_mmap_populate(spa, &ac); - if (ret && ac.state == ALLOC_RETRY) + if (ret && ac.state == ALLOC_RETRY) { + /* + * The mempolicy for shared memory is located at backend file, which varies + * between normal pages and huge pages. So we should set the mbind policy again + * when we retry using normal pages. + */ + ac.have_mbind = false; goto try_again; + }
out: sp_alloc_finish(ret, spa, &ac);
From: Guo Mengqi guomengqi3@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5XQS4 CVE: NA
--------------------------------
If current->flag is set as PF_MEMALLOC, memcgroup will not check current's allocation against memory use limit, which cause system run out of memory.
According to https://lkml.indiana.edu/hypermail/linux/kernel/0911.2/00576.html, PF_MEMALLOC shall only be used when more memory are sure to be freed as a result of this allocation.
Do not use PF_MEMALLOC, rather, remove __GFP_RECLAIM from gfp_mask to ensure no reclaim.
Signed-off-by: Guo Mengqi guomengqi3@huawei.com --- include/linux/hugetlb.h | 6 ++++-- mm/hugetlb.c | 3 +++ mm/share_pool.c | 27 +++------------------------ 3 files changed, 10 insertions(+), 26 deletions(-)
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 0dfe08439095..ea2c249281fc 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -621,9 +621,11 @@ int huge_add_to_page_cache(struct page *page, struct address_space *mapping, #define HUGETLB_ALLOC_NONE 0x00 #define HUGETLB_ALLOC_NORMAL 0x01 /* normal hugepage */ #define HUGETLB_ALLOC_BUDDY 0x02 /* buddy hugepage */ -#define HUGETLB_ALLOC_MASK (HUGETLB_ALLOC_NONE | \ +#define HUGETLB_ALLOC_NORECLAIM 0x04 /* no reclaim */ +#define HUGETLB_ALLOC_MASK (HUGETLB_ALLOC_NONE | \ HUGETLB_ALLOC_NORMAL | \ - HUGETLB_ALLOC_BUDDY) + HUGETLB_ALLOC_BUDDY | \ + HUGETLB_ALLOC_NORECLAIM)
const struct hstate *hugetlb_get_hstate(void); struct page *hugetlb_alloc_hugepage(int nid, int flag); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index c3b73c0b7e7e..3d59f4298cbe 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -6256,6 +6256,9 @@ struct page *hugetlb_alloc_hugepage(int nid, int flag) if (enable_charge_mighp) gfp_mask |= __GFP_ACCOUNT;
+ if (flag & HUGETLB_ALLOC_NORECLAIM) + gfp_mask &= ~__GFP_RECLAIM; + if (flag & HUGETLB_ALLOC_NORMAL) page = hugetlb_alloc_hugepage_normal(h, gfp_mask, nid); else if (flag & HUGETLB_ALLOC_BUDDY) diff --git a/mm/share_pool.c b/mm/share_pool.c index a119d8bf542d..4f76ecf742c4 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -2449,35 +2449,13 @@ static void sp_alloc_fallback(struct sp_area *spa, struct sp_alloc_context *ac) static int sp_alloc_populate(struct mm_struct *mm, struct sp_area *spa, struct sp_alloc_context *ac) { - int ret = 0; - unsigned long sp_addr = spa->va_start; - unsigned int noreclaim_flag = 0; - - /* - * The direct reclaim and compact may take a long - * time. As a result, sp mutex will be hold for too - * long time to casue the hung task problem. In this - * case, set the PF_MEMALLOC flag to prevent the - * direct reclaim and compact from being executed. - * Since direct reclaim and compact are not performed - * when the fragmentation is severe or the memory is - * insufficient, 2MB continuous physical pages fail - * to be allocated. This situation is allowed. - */ - if (spa->is_hugepage) - noreclaim_flag = memalloc_noreclaim_save(); - /* * We are not ignoring errors, so if we fail to allocate * physical memory we just return failure, so we won't encounter * page fault later on, and more importantly sp_make_share_u2k() * depends on this feature (and MAP_LOCKED) to work correctly. */ - ret = do_mm_populate(mm, sp_addr, ac->populate, 0); - if (spa->is_hugepage) - memalloc_noreclaim_restore(noreclaim_flag); - - return ret; + return do_mm_populate(mm, spa->va_start, ac->populate, 0); }
static long sp_mbind(struct mm_struct *mm, unsigned long start, unsigned long len, @@ -4233,7 +4211,8 @@ vm_fault_t sharepool_no_page(struct mm_struct *mm,
page = alloc_huge_page(vma, haddr, 0); if (IS_ERR(page)) { - page = hugetlb_alloc_hugepage(node_id, HUGETLB_ALLOC_BUDDY); + page = hugetlb_alloc_hugepage(node_id, + HUGETLB_ALLOC_BUDDY | HUGETLB_ALLOC_NORECLAIM); if (!page) page = ERR_PTR(-ENOMEM); }
From: Zhou Guanghui zhouguanghui1@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5XQS4 CVE: NA
--------------------------------
The address range of dvpp is [start, start + size), the value of start + size can be out of the address range.
Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com --- mm/share_pool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 4f76ecf742c4..68e8f5c93a1f 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -3634,7 +3634,7 @@ bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) /* NOTE: check the start address */ if (pid < 0 || size <= 0 || size > MMAP_SHARE_POOL_16G_SIZE || device_id < 0 || device_id >= MAX_DEVID || !is_online_node_id(device_id) - || !is_sp_dynamic_dvpp_addr(start) || !is_sp_dynamic_dvpp_addr(start + size)) + || !is_sp_dynamic_dvpp_addr(start) || !is_sp_dynamic_dvpp_addr(start + size - 1)) return false;
ret = get_task(pid, &tsk);