From: Tang Yizhou tangyizhou@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
-------------------------------------------------
We are going to redesign the accounting subsystem of share pool. We need to disambiguate the meaning of sp_stat_idr, as we will introduce an struct which representing per-spg statistics.
Signed-off-by: Tang Yizhou tangyizhou@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/share_pool.c | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index cd6e137fe6698..c4b8daa47fcfd 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -95,9 +95,9 @@ static DEFINE_IDA(sp_group_id_ida); /*** Statistical and maintenance tools ***/
/* idr of all sp_proc_stats */ -static DEFINE_IDR(sp_stat_idr); -/* rw semaphore for sp_stat_idr and mm->sp_stat_id */ -static DECLARE_RWSEM(sp_stat_sem); +static DEFINE_IDR(sp_proc_stat_idr); +/* rw semaphore for sp_proc_stat_idr */ +static DECLARE_RWSEM(sp_proc_stat_sem);
/* for kthread buff_module_guard_work */ static struct sp_proc_stat kthread_stat; @@ -107,7 +107,7 @@ static struct sp_proc_stat *sp_get_proc_stat_locked(int tgid) { struct sp_proc_stat *stat;
- stat = idr_find(&sp_stat_idr, tgid); + stat = idr_find(&sp_proc_stat_idr, tgid);
/* maybe NULL or not, we always return it */ return stat; @@ -118,7 +118,7 @@ static struct sp_proc_stat *sp_get_proc_stat_ref_locked(int tgid) { struct sp_proc_stat *stat;
- stat = idr_find(&sp_stat_idr, tgid); + stat = idr_find(&sp_proc_stat_idr, tgid); if (!stat || !atomic_inc_not_zero(&stat->use_count)) stat = NULL;
@@ -137,16 +137,16 @@ static struct sp_proc_stat *sp_init_proc_stat(struct task_struct *tsk, int id, tgid = tsk->tgid; int ret;
- down_write(&sp_stat_sem); + down_write(&sp_proc_stat_sem); id = mm->sp_group_master->sp_stat_id; if (id) { /* other threads in the same process may have initialized it */ stat = sp_get_proc_stat_locked(tgid); if (stat) { - up_write(&sp_stat_sem); + up_write(&sp_proc_stat_sem); return stat; } else { - up_write(&sp_stat_sem); + up_write(&sp_proc_stat_sem); /* if enter this branch, that's our mistake */ pr_err_ratelimited("share pool: proc stat invalid id %d\n", id); return ERR_PTR(-EBUSY); @@ -155,7 +155,7 @@ static struct sp_proc_stat *sp_init_proc_stat(struct task_struct *tsk,
stat = kzalloc(sizeof(*stat), GFP_KERNEL); if (stat == NULL) { - up_write(&sp_stat_sem); + up_write(&sp_proc_stat_sem); pr_err_ratelimited("share pool: alloc proc stat failed due to lack of memory\n"); return ERR_PTR(-ENOMEM); } @@ -167,16 +167,16 @@ static struct sp_proc_stat *sp_init_proc_stat(struct task_struct *tsk, stat->mm = mm; get_task_comm(stat->comm, tsk);
- ret = idr_alloc(&sp_stat_idr, stat, tgid, tgid + 1, GFP_KERNEL); + ret = idr_alloc(&sp_proc_stat_idr, stat, tgid, tgid + 1, GFP_KERNEL); if (ret < 0) { - up_write(&sp_stat_sem); + up_write(&sp_proc_stat_sem); pr_err_ratelimited("share pool: proc stat idr alloc failed %d\n", ret); kfree(stat); return ERR_PTR(ret); }
mm->sp_group_master->sp_stat_id = ret; - up_write(&sp_stat_sem); + up_write(&sp_proc_stat_sem); return stat; }
@@ -184,9 +184,9 @@ static struct sp_proc_stat *sp_get_proc_stat(int tgid) { struct sp_proc_stat *stat;
- down_read(&sp_stat_sem); + down_read(&sp_proc_stat_sem); stat = sp_get_proc_stat_locked(tgid); - up_read(&sp_stat_sem); + up_read(&sp_proc_stat_sem); return stat; }
@@ -195,9 +195,9 @@ struct sp_proc_stat *sp_get_proc_stat_ref(int tgid) { struct sp_proc_stat *stat;
- down_read(&sp_stat_sem); + down_read(&sp_proc_stat_sem); stat = sp_get_proc_stat_ref_locked(tgid); - up_read(&sp_stat_sem); + up_read(&sp_proc_stat_sem); return stat; }
@@ -2850,10 +2850,10 @@ __setup("enable_sp_multi_group_mode", enable_sp_multi_group_mode);
static void free_sp_proc_stat(struct sp_proc_stat *stat) { - down_write(&sp_stat_sem); + down_write(&sp_proc_stat_sem); stat->mm->sp_group_master->sp_stat_id = 0; - idr_remove(&sp_stat_idr, stat->tgid); - up_write(&sp_stat_sem); + idr_remove(&sp_proc_stat_idr, stat->tgid); + up_write(&sp_proc_stat_sem); kfree(stat); }
@@ -3163,9 +3163,9 @@ static int proc_stat_show(struct seq_file *seq, void *offset) byte2kb(atomic64_read(&kthread_stat.k2u_size)));
/* pay attention to potential ABBA deadlock */ - down_read(&sp_stat_sem); - idr_for_each(&sp_stat_idr, idr_proc_stat_cb, seq); - up_read(&sp_stat_sem); + down_read(&sp_proc_stat_sem); + idr_for_each(&sp_proc_stat_idr, idr_proc_stat_cb, seq); + up_read(&sp_proc_stat_sem); return 0; }
From: Tang Yizhou tangyizhou@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
-------------------------------------------------
When a process not in any sp group calls k2u_task, it will encounter a null pointer visit problem in sp_init_proc_stat:
mm->sp_group_master is null, and visit mm->sp_group_master->sp_stat_id is illegal.
To fix this, we initialize sp_group_master when call k2u_task.
Signed-off-by: Tang Yizhou tangyizhou@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/share_pool.c | 80 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 53 insertions(+), 27 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index c4b8daa47fcfd..3980e5632cdcf 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -102,6 +102,33 @@ static DECLARE_RWSEM(sp_proc_stat_sem); /* for kthread buff_module_guard_work */ static struct sp_proc_stat kthread_stat;
+/* The caller must hold sp_group_sem */ +static struct sp_group_master *sp_init_group_master_locked( + struct mm_struct *mm, bool *exist) +{ + struct sp_group_master *master = mm->sp_group_master; + + if (master) { + *exist = true; + return master; + } + + master = kmalloc(sizeof(struct sp_group_master), GFP_KERNEL); + if (master == NULL) { + pr_err_ratelimited("share pool: no memory for spg master\n"); + return ERR_PTR(-ENOMEM); + } + + INIT_LIST_HEAD(&master->node_list); + master->count = 0; + master->sp_stat_id = 0; + master->mm = mm; + mm->sp_group_master = master; + + *exist = false; + return master; +} + /* The caller must hold sp_stat_sem */ static struct sp_proc_stat *sp_get_proc_stat_locked(int tgid) { @@ -127,18 +154,25 @@ static struct sp_proc_stat *sp_get_proc_stat_ref_locked(int tgid) }
/* - * The caller must ensure no concurrency problem - * for task_struct and mm_struct. + * The caller must + * 1. ensure no concurrency problem for task_struct and mm_struct. + * 2. hold sp_group_sem for sp_group_master (pay attention to ABBA deadlock) */ static struct sp_proc_stat *sp_init_proc_stat(struct task_struct *tsk, struct mm_struct *mm) { + struct sp_group_master *master; + bool exist; struct sp_proc_stat *stat; int id, tgid = tsk->tgid; int ret;
+ master = sp_init_group_master_locked(mm, &exist); + if (IS_ERR(master)) + return (struct sp_proc_stat *)master; + down_write(&sp_proc_stat_sem); - id = mm->sp_group_master->sp_stat_id; + id = master->sp_stat_id; if (id) { /* other threads in the same process may have initialized it */ stat = sp_get_proc_stat_locked(tgid); @@ -175,7 +209,7 @@ static struct sp_proc_stat *sp_init_proc_stat(struct task_struct *tsk, return ERR_PTR(ret); }
- mm->sp_group_master->sp_stat_id = ret; + master->sp_stat_id = ret; up_write(&sp_proc_stat_sem); return stat; } @@ -631,6 +665,7 @@ static void sp_munmap_task_areas(struct mm_struct *mm, struct sp_group *spg, str static int mm_add_group_init(struct mm_struct *mm, struct sp_group *spg) { struct sp_group_master *master = mm->sp_group_master; + bool exist = false; struct sp_group_node *spg_node;
if (share_pool_group_mode == SINGLE_GROUP_MODE && master && @@ -639,32 +674,23 @@ static int mm_add_group_init(struct mm_struct *mm, struct sp_group *spg) return -EEXIST; }
- if (!master) { - master = kzalloc(sizeof(struct sp_group_master), GFP_KERNEL); - if (master == NULL) { - pr_err_ratelimited("share pool: no memory for spg master\n"); - return -ENOMEM; - } - } else { - list_for_each_entry(spg_node, &master->node_list, group_node) { - if (spg_node->spg == spg) { - pr_err("share pool: task is already in target group\n"); - return -EEXIST; - } + master = sp_init_group_master_locked(mm, &exist); + if (IS_ERR(master)) + return PTR_ERR(master); + + if (!exist) + return 0; + + list_for_each_entry(spg_node, &master->node_list, group_node) { + if (spg_node->spg == spg) { + pr_err("share pool: task is already in target group\n"); + return -EEXIST; } }
- if (!mm->sp_group_master) { - INIT_LIST_HEAD(&master->node_list); - master->count = 0; - master->mm = mm; - master->sp_stat_id = 0; - mm->sp_group_master = master; - } else { - if (master->count + 1 == MAX_GROUP_FOR_TASK) { - pr_err("share pool: task reaches max group num\n"); - return -ENOSPC; - } + if (master->count + 1 == MAX_GROUP_FOR_TASK) { + pr_err("share pool: task reaches max group num\n"); + return -ENOSPC; }
return 0;
From: Tang Yizhou tangyizhou@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
-------------------------------------------------
In multi-group mode, we introduce struct spg_proc_stat which represents statistics in an sp_group for a process. And struct sp_proc_stat is the accumulation of all structs of spg_proc_stat for a process.
Signed-off-by: Tang Yizhou tangyizhou@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/linux/share_pool.h | 6 + mm/share_pool.c | 365 +++++++++++++++++++++++++++---------- 2 files changed, 276 insertions(+), 95 deletions(-)
diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index 8ab4cfb2b2509..d4c0ec944adc1 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -6,6 +6,7 @@ #include <linux/notifier.h> #include <linux/vmalloc.h> #include <linux/printk.h> +#include <linux/hashtable.h>
#define SP_HUGEPAGE (1 << 0) #define SP_HUGEPAGE_ONLY (1 << 1) @@ -146,11 +147,16 @@ struct sp_walk_data { pmd_t *pmd; };
+/* we estimate a process ususally belongs to at most 16 sp-group */ +#define SP_PROC_HASH_BITS 4 + /* per process memory usage statistics indexed by tgid */ struct sp_proc_stat { atomic_t use_count; int tgid; struct mm_struct *mm; + struct mutex lock; /* protect hashtable */ + DECLARE_HASHTABLE(hash, SP_PROC_HASH_BITS); char comm[TASK_COMM_LEN]; /* * alloc amount minus free amount, may be negative when freed by diff --git a/mm/share_pool.c b/mm/share_pool.c index 3980e5632cdcf..4063dd0861cce 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -63,6 +63,8 @@ #define MAX_GROUP_FOR_TASK 3000 #define MAX_PROC_PER_GROUP 1024
+#define GROUP_NONE 0 + #define PF_DOMAIN_CORE 0x10000000 /* AOS CORE processes in sched.h */
/* mdc scene hack */ @@ -153,88 +155,214 @@ static struct sp_proc_stat *sp_get_proc_stat_ref_locked(int tgid) return stat; }
-/* - * The caller must - * 1. ensure no concurrency problem for task_struct and mm_struct. - * 2. hold sp_group_sem for sp_group_master (pay attention to ABBA deadlock) - */ -static struct sp_proc_stat *sp_init_proc_stat(struct task_struct *tsk, - struct mm_struct *mm) +static struct sp_proc_stat *sp_get_proc_stat(int tgid) { - struct sp_group_master *master; - bool exist; struct sp_proc_stat *stat; - int id, tgid = tsk->tgid; - int ret;
- master = sp_init_group_master_locked(mm, &exist); - if (IS_ERR(master)) - return (struct sp_proc_stat *)master; + down_read(&sp_proc_stat_sem); + stat = sp_get_proc_stat_locked(tgid); + up_read(&sp_proc_stat_sem); + return stat; +} + +/* user must call sp_proc_stat_drop() after use */ +struct sp_proc_stat *sp_get_proc_stat_ref(int tgid) +{ + struct sp_proc_stat *stat; + + down_read(&sp_proc_stat_sem); + stat = sp_get_proc_stat_ref_locked(tgid); + up_read(&sp_proc_stat_sem); + return stat; +} + +static struct sp_proc_stat *create_proc_stat(struct mm_struct *mm, + struct task_struct *tsk) +{ + struct sp_proc_stat *stat; + + stat = kmalloc(sizeof(*stat), GFP_KERNEL); + if (stat == NULL) { + pr_err_ratelimited("share pool: alloc proc stat failed, lack of memory\n"); + return ERR_PTR(-ENOMEM); + } + + atomic_set(&stat->use_count, 1); + atomic64_set(&stat->alloc_size, 0); + atomic64_set(&stat->k2u_size, 0); + stat->tgid = tsk->tgid; + stat->mm = mm; + mutex_init(&stat->lock); + hash_init(stat->hash); + get_task_comm(stat->comm, tsk); + + return stat; +} + +static struct sp_proc_stat *sp_init_proc_stat(struct sp_group_master *master, + struct mm_struct *mm, struct task_struct *tsk) +{ + struct sp_proc_stat *stat; + int id, alloc_id, tgid = tsk->tgid;
down_write(&sp_proc_stat_sem); id = master->sp_stat_id; if (id) { - /* other threads in the same process may have initialized it */ + /* may have been initialized */ stat = sp_get_proc_stat_locked(tgid); + up_write(&sp_proc_stat_sem); if (stat) { - up_write(&sp_proc_stat_sem); return stat; } else { up_write(&sp_proc_stat_sem); /* if enter this branch, that's our mistake */ - pr_err_ratelimited("share pool: proc stat invalid id %d\n", id); + WARN(1, "share pool: proc stat invalid id %d\n", id); return ERR_PTR(-EBUSY); } }
- stat = kzalloc(sizeof(*stat), GFP_KERNEL); - if (stat == NULL) { + stat = create_proc_stat(mm, tsk); + if (IS_ERR(stat)) { up_write(&sp_proc_stat_sem); - pr_err_ratelimited("share pool: alloc proc stat failed due to lack of memory\n"); - return ERR_PTR(-ENOMEM); + return stat; }
- atomic_set(&stat->use_count, 1); - atomic64_set(&stat->alloc_size, 0); - atomic64_set(&stat->k2u_size, 0); - stat->tgid = tgid; - stat->mm = mm; - get_task_comm(stat->comm, tsk); - - ret = idr_alloc(&sp_proc_stat_idr, stat, tgid, tgid + 1, GFP_KERNEL); - if (ret < 0) { + alloc_id = idr_alloc(&sp_proc_stat_idr, stat, tgid, tgid + 1, GFP_KERNEL); + if (alloc_id < 0) { up_write(&sp_proc_stat_sem); - pr_err_ratelimited("share pool: proc stat idr alloc failed %d\n", ret); + pr_err_ratelimited("share pool: proc stat idr alloc failed %d\n", alloc_id); kfree(stat); - return ERR_PTR(ret); + return ERR_PTR(alloc_id); }
- master->sp_stat_id = ret; + master->sp_stat_id = alloc_id; up_write(&sp_proc_stat_sem); + return stat; }
-static struct sp_proc_stat *sp_get_proc_stat(int tgid) +/* per process/sp-group memory usage statistics */ +struct spg_proc_stat { + int tgid; + int spg_id; /* 0 for non-group data, such as k2u_task */ + struct hlist_node pnode; /* hlist node in sp_proc_stat->hash */ + struct sp_proc_stat *proc_stat; + /* + * alloc amount minus free amount, may be negative when freed by + * another task in the same sp group. + */ + atomic64_t alloc_size; + atomic64_t k2u_size; +}; + +static void update_spg_proc_stat_alloc(unsigned long size, bool inc, + struct spg_proc_stat *stat) { - struct sp_proc_stat *stat; + struct sp_proc_stat *proc_stat = stat->proc_stat; + + if (inc) { + atomic64_add(size, &stat->alloc_size); + atomic64_add(size, &proc_stat->alloc_size); + } else { + atomic64_sub(size, &stat->alloc_size); + atomic64_sub(size, &proc_stat->alloc_size); + } +} + +static void update_spg_proc_stat_k2u(unsigned long size, bool inc, + struct spg_proc_stat *stat) +{ + struct sp_proc_stat *proc_stat = stat->proc_stat; + + if (inc) { + atomic64_add(size, &stat->k2u_size); + atomic64_add(size, &proc_stat->k2u_size); + + } else { + atomic64_sub(size, &stat->k2u_size); + atomic64_sub(size, &proc_stat->k2u_size); + } +} + +static struct spg_proc_stat *find_spg_proc_stat( + struct sp_proc_stat *proc_stat, int tgid, int spg_id) +{ + struct spg_proc_stat *stat = NULL; + + mutex_lock(&proc_stat->lock); + hash_for_each_possible(proc_stat->hash, stat, pnode, spg_id) { + if (stat->spg_id == spg_id) + break; + } + mutex_unlock(&proc_stat->lock);
- down_read(&sp_proc_stat_sem); - stat = sp_get_proc_stat_locked(tgid); - up_read(&sp_proc_stat_sem); return stat; }
-/* user must call sp_proc_stat_drop() after use */ -struct sp_proc_stat *sp_get_proc_stat_ref(int tgid) +static struct spg_proc_stat *create_spg_proc_stat( + struct sp_proc_stat *proc_stat, int tgid, int spg_id) { - struct sp_proc_stat *stat; + struct spg_proc_stat *stat;
- down_read(&sp_proc_stat_sem); - stat = sp_get_proc_stat_ref_locked(tgid); - up_read(&sp_proc_stat_sem); + stat = kmalloc(sizeof(struct spg_proc_stat), GFP_KERNEL); + if (stat == NULL) { + pr_err_ratelimited("share pool: no memory for spg proc stat\n"); + return ERR_PTR(-ENOMEM); + } + + stat->tgid = tgid; + stat->spg_id = spg_id; + stat->proc_stat = proc_stat; + atomic64_set(&stat->alloc_size, 0); + atomic64_set(&stat->k2u_size, 0); + + return stat; +} + +static struct spg_proc_stat *sp_init_spg_proc_stat( + struct sp_proc_stat *proc_stat, int tgid, int spg_id) +{ + struct spg_proc_stat *stat; + + stat = find_spg_proc_stat(proc_stat, tgid, spg_id); + if (stat) + return stat; + + stat = create_spg_proc_stat(proc_stat, tgid, spg_id); + if (IS_ERR(stat)) + return stat; + + mutex_lock(&proc_stat->lock); + hash_add(proc_stat->hash, &stat->pnode, stat->spg_id); + mutex_unlock(&proc_stat->lock); return stat; }
+/* + * The caller must + * 1. ensure no concurrency problem for task_struct and mm_struct. + * 2. hold sp_group_sem for sp_group_master (pay attention to ABBA deadlock) + */ +static struct spg_proc_stat *sp_init_process_stat(struct task_struct *tsk, + struct mm_struct *mm, int spg_id) +{ + struct sp_group_master *master; + bool exist; + struct sp_proc_stat *proc_stat; + struct spg_proc_stat *spg_proc_stat; + + master = sp_init_group_master_locked(mm, &exist); + if (IS_ERR(master)) + return (struct spg_proc_stat *)master; + + proc_stat = sp_init_proc_stat(master, mm, tsk); + if (IS_ERR(proc_stat)) + return (struct spg_proc_stat *)proc_stat; + + spg_proc_stat = sp_init_spg_proc_stat(proc_stat, tsk->tgid, spg_id); + return spg_proc_stat; +} + /* statistics of all sp area, protected by sp_area_lock */ struct sp_spa_stat { unsigned int total_num; @@ -373,6 +501,44 @@ static int spa_dec_usage(enum spa_type type, unsigned long size, bool is_dvpp) return 0; }
+static void update_spg_proc_stat(unsigned long size, bool inc, + struct spg_proc_stat *stat, enum spa_type type) +{ + if (unlikely(!stat)) { + sp_dump_stack(); + WARN(1, "share pool: null process stat\n"); + return; + } + + switch (type) { + case SPA_TYPE_ALLOC: + update_spg_proc_stat_alloc(size, inc, stat); + break; + case SPA_TYPE_K2TASK: + case SPA_TYPE_K2SPG: + update_spg_proc_stat_k2u(size, inc, stat); + break; + default: + WARN(1, "share pool: invalid stat type\n"); + } +} + +static void sp_update_process_stat(struct task_struct *tsk, bool inc, + int spg_id, struct sp_area *spa) +{ + struct spg_proc_stat *stat; + unsigned long size = spa->real_size; + enum spa_type type = spa->type; + + down_write(&sp_group_sem); + stat = sp_init_process_stat(tsk, tsk->mm, spg_id); + up_write(&sp_group_sem); + if (unlikely(IS_ERR(stat))) + return; + + update_spg_proc_stat(size, inc, stat, type); +} + static inline void check_interrupt_context(void) { if (unlikely(in_interrupt())) @@ -751,7 +917,7 @@ int sp_group_add_task(int pid, int spg_id) int ret = 0; bool id_newly_generated = false; struct sp_area *spa, *prev = NULL; - struct sp_proc_stat *stat; + struct spg_proc_stat *stat;
check_interrupt_context();
@@ -872,10 +1038,10 @@ int sp_group_add_task(int pid, int spg_id) }
/* per process statistics initialization */ - stat = sp_init_proc_stat(tsk, mm); + stat = sp_init_process_stat(tsk, mm, spg_id); if (IS_ERR(stat)) { ret = PTR_ERR(stat); - pr_err_ratelimited("share pool: init proc stat failed, ret %lx\n", PTR_ERR(stat)); + pr_err_ratelimited("share pool: init process stat failed, ret %lx\n", PTR_ERR(stat)); goto out_drop_group; }
@@ -954,9 +1120,7 @@ int sp_group_add_task(int pid, int spg_id) spin_unlock(&sp_area_lock); up_write(&spg->rw_lock);
- if (unlikely(ret)) - sp_proc_stat_drop(stat); - + /* no need to free spg_proc_stat, will be freed when process exits */ out_drop_group: if (unlikely(ret)) { if (mm->sp_group_master->count == 0) { @@ -1406,7 +1570,6 @@ static void __sp_free(struct sp_group *spg, unsigned long addr, int sp_free(unsigned long addr) { struct sp_area *spa; - struct sp_proc_stat *stat; int mode; loff_t offset; int ret = 0; @@ -1474,15 +1637,10 @@ int sp_free(unsigned long addr) up_read(&spa->spg->rw_lock);
/* pointer stat may be invalid because of kthread buff_module_guard_work */ - if (current->mm == NULL) { + if (current->mm == NULL) atomic64_sub(spa->real_size, &kthread_stat.alloc_size); - } else { - stat = sp_get_proc_stat(current->mm->sp_group_master->sp_stat_id); - if (stat) - atomic64_sub(spa->real_size, &stat->alloc_size); - else - WARN(1, "share pool: %s: null process stat\n", __func__); - } + else + sp_update_process_stat(current, false, spa->spg->id, spa);
drop_spa: __sp_area_drop(spa); @@ -1540,7 +1698,6 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) { struct sp_group *spg, *spg_tmp; struct sp_area *spa = NULL; - struct sp_proc_stat *stat; unsigned long sp_addr; unsigned long mmap_addr; void *p; /* return value */ @@ -1732,13 +1889,8 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) out: up_read(&spg->rw_lock);
- if (!IS_ERR(p)) { - stat = sp_get_proc_stat(current->mm->sp_group_master->sp_stat_id); - if (stat) - atomic64_add(size_aligned, &stat->alloc_size); - else - WARN(1, "share pool: %s: null process stat\n", __func__); - } + if (!IS_ERR(p)) + sp_update_process_stat(current, true, spg->id, spa);
/* this will free spa if mmap failed */ if (spa && !IS_ERR(spa)) @@ -1983,7 +2135,6 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size, unsigned int page_size = PAGE_SIZE; struct task_struct *tsk; struct mm_struct *mm; - struct sp_proc_stat *stat; int ret = 0, is_hugepage;
check_interrupt_context(); @@ -2025,25 +2176,27 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size, goto out_put_task; }
- /* - * Process statistics initialization. if the target process has been - * added to a sp group, then stat will be returned immediately. - */ - stat = sp_init_proc_stat(tsk, mm); - if (IS_ERR(stat)) { - uva = stat; - pr_err_ratelimited("share pool: init proc stat failed, ret %lx\n", PTR_ERR(stat)); - goto out_put_mm; - } - spg = __sp_find_spg(pid, SPG_ID_DEFAULT); if (spg == NULL) { /* k2u to task */ + struct spg_proc_stat *stat; + if (spg_id != SPG_ID_NONE && spg_id != SPG_ID_DEFAULT) { pr_err_ratelimited("share pool: k2task invalid spg id %d\n", spg_id); uva = ERR_PTR(-EINVAL); goto out_put_mm; } + + down_write(&sp_group_sem); + stat = sp_init_process_stat(tsk, mm, GROUP_NONE); + up_write(&sp_group_sem); + if (IS_ERR(stat)) { + uva = stat; + pr_err_ratelimited("share pool: k2u(task) init process stat failed, ret %lx\n", + PTR_ERR(stat)); + goto out_put_mm; + } + spa = sp_alloc_area(size_aligned, sp_flags, NULL, SPA_TYPE_K2TASK, tsk->tgid); if (IS_ERR(spa)) { pr_err_ratelimited("share pool: k2u(task) failed due to alloc spa failure " @@ -2058,7 +2211,12 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size, }
uva = sp_make_share_kva_to_task(kva_aligned, spa, mm); - goto accounting; + + if (!IS_ERR(uva)) + update_spg_proc_stat(size_aligned, true, stat, + SPA_TYPE_K2TASK); + + goto finish; }
down_read(&spg->rw_lock); @@ -2094,7 +2252,6 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size, uva = sp_make_share_kva_to_spg(kva_aligned, spa, spg); else uva = sp_make_share_kva_to_task(kva_aligned, spa, mm); - } else { /* group is dead, return -ENODEV */ pr_err_ratelimited("share pool: failed to make k2u, sp group is dead\n"); @@ -2102,10 +2259,12 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size, } up_read(&spg->rw_lock);
-accounting: + if (!IS_ERR(uva)) + sp_update_process_stat(tsk, true, spg_id, spa); + +finish: if (!IS_ERR(uva)) { uva = uva + (kva - kva_aligned); - atomic64_add(size_aligned, &stat->k2u_size); } else { /* associate vma and spa */ if (!vmalloc_area_clr_flag(spa, kva_aligned, VM_SHAREPOOL)) @@ -2445,7 +2604,6 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size, int pid, int sp unsigned long uva_aligned; unsigned long size_aligned; unsigned int page_size; - struct sp_proc_stat *stat; struct sp_group_node *spg_node;
/* @@ -2533,6 +2691,12 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size, int pid, int sp pr_err("share pool: failed to unmap VA %pK when munmap in unshare uva\n", (void *)uva_aligned); } + + if (unlikely(!current->mm)) + WARN(1, "share pool: unshare uva(to task) unexpected active kthread"); + else + sp_update_process_stat(current, false, GROUP_NONE, spa); + } else if (spa->type == SPA_TYPE_K2SPG) { if (spg_id < 0) { pr_err_ratelimited("share pool: unshare uva(to group) failed, invalid spg id %d\n", spg_id); @@ -2574,21 +2738,17 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size, int pid, int sp down_read(&spa->spg->rw_lock); __sp_free(spa->spg, uva_aligned, size_aligned, NULL); up_read(&spa->spg->rw_lock); - } - - sp_dump_stack();
- /* pointer stat may be invalid because of kthread buff_module_guard_work */ - if (current->mm == NULL) { - atomic64_sub(spa->real_size, &kthread_stat.k2u_size); - } else { - stat = sp_get_proc_stat(current->mm->sp_group_master->sp_stat_id); - if (stat) - atomic64_sub(spa->real_size, &stat->k2u_size); + if (current->mm == NULL) + atomic64_sub(spa->real_size, &kthread_stat.k2u_size); else - WARN(1, "share pool: %s: null process stat\n", __func__); + sp_update_process_stat(current, false, spa->spg->id, spa); + } else { + WARN(1, "share pool: unshare uva invalid spa type"); }
+ sp_dump_stack(); + out_clr_flag: /* deassociate vma and spa */ if (!vmalloc_area_clr_flag(spa, spa->kva, VM_SHAREPOOL)) @@ -2874,8 +3034,23 @@ __setup("enable_sp_multi_group_mode", enable_sp_multi_group_mode);
/*** Statistical and maintenance functions ***/
+static void free_process_spg_proc_stat(struct sp_proc_stat *proc_stat) +{ + int i; + struct spg_proc_stat *stat; + struct hlist_node *tmp; + + /* traverse proc_stat->hash locklessly as process is exiting */ + hash_for_each_safe(proc_stat->hash, i, tmp, stat, pnode) { + hash_del(&stat->pnode); + kfree(stat); + } +} + static void free_sp_proc_stat(struct sp_proc_stat *stat) { + free_process_spg_proc_stat(stat); + down_write(&sp_proc_stat_sem); stat->mm->sp_group_master->sp_stat_id = 0; idr_remove(&sp_proc_stat_idr, stat->tgid);
From: Tang Yizhou tangyizhou@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
-------------------------------------------------
In multi-group mode, we introduce struct sp_spg_stat which represents statistics for an sp_group. It is the accumulation of all structs of spg_proc_stat in an sp_group.
Signed-off-by: Tang Yizhou tangyizhou@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/linux/share_pool.h | 33 ++- mm/share_pool.c | 566 ++++++++++++++++++++++--------------- 2 files changed, 363 insertions(+), 236 deletions(-)
diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index d4c0ec944adc1..0b9b9fb62f052 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -53,6 +53,27 @@ extern unsigned long sysctl_sp_compact_interval_max; extern bool vmap_allow_huge; #endif
+/* we estimate an sp-group ususally contains at most 64 sp-group */ +#define SP_SPG_HASH_BITS 6 + +struct sp_spg_stat { + int spg_id; + /* number of sp_area */ + atomic_t spa_num; + /* total size of all sp_area from sp_alloc and k2u */ + atomic64_t size; + /* total size of all sp_area from sp_alloc 0-order page */ + atomic64_t alloc_nsize; + /* total size of all sp_area from sp_alloc hugepage */ + atomic64_t alloc_hsize; + /* total size of all sp_area from ap_alloc */ + atomic64_t alloc_size; + /* total size of all sp_area from sp_k2u */ + atomic64_t k2u_size; + struct mutex lock; /* protect hashtable */ + DECLARE_HASHTABLE(hash, SP_SPG_HASH_BITS); +}; + /* Processes in the same sp_group can share memory. * Memory layout for share pool: * @@ -87,16 +108,8 @@ struct sp_group { struct list_head procs; /* list head of sp_area. it is protected by spin_lock sp_area_lock */ struct list_head spa_list; - /* number of sp_area */ - atomic_t spa_num; - /* total size of all sp_area from sp_alloc and k2u(spg) */ - atomic64_t size; - /* total size of all sp_area from sp_alloc normal page */ - atomic64_t alloc_nsize; - /* total size of all sp_area from sp_alloc hugepage */ - atomic64_t alloc_hsize; - /* total size of all sp_area from ap_alloc */ - atomic64_t alloc_size; + /* group statistics */ + struct sp_spg_stat *stat; /* we define the creator process of a sp_group as owner */ struct task_struct *owner; /* is_alive == false means it's being destroyed */ diff --git a/mm/share_pool.c b/mm/share_pool.c index 4063dd0861cce..fde77960bb88d 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -101,6 +101,11 @@ static DEFINE_IDR(sp_proc_stat_idr); /* rw semaphore for sp_proc_stat_idr */ static DECLARE_RWSEM(sp_proc_stat_sem);
+/* idr of all sp_spg_stats */ +static DEFINE_IDR(sp_spg_stat_idr); +/* rw semaphore for sp_spg_stat_idr */ +static DECLARE_RWSEM(sp_spg_stat_sem); + /* for kthread buff_module_guard_work */ static struct sp_proc_stat kthread_stat;
@@ -241,12 +246,50 @@ static struct sp_proc_stat *sp_init_proc_stat(struct sp_group_master *master, return stat; }
+static void update_spg_stat_alloc(unsigned long size, bool inc, + bool huge, struct sp_spg_stat *stat) +{ + if (inc) { + atomic_inc(&stat->spa_num); + atomic64_add(size, &stat->size); + atomic64_add(size, &stat->alloc_size); + if (huge) + atomic64_add(size, &stat->alloc_hsize); + else + atomic64_add(size, &stat->alloc_nsize); + } else { + atomic_dec(&stat->spa_num); + atomic64_sub(size, &stat->size); + atomic64_sub(size, &stat->alloc_size); + if (huge) + atomic64_sub(size, &stat->alloc_hsize); + else + atomic64_sub(size, &stat->alloc_nsize); + } +} + +static void update_spg_stat_k2u(unsigned long size, bool inc, + struct sp_spg_stat *stat) +{ + if (inc) { + atomic_inc(&stat->spa_num); + atomic64_add(size, &stat->size); + atomic64_add(size, &stat->k2u_size); + } else { + atomic_dec(&stat->spa_num); + atomic64_sub(size, &stat->size); + atomic64_sub(size, &stat->k2u_size); + } +} + /* per process/sp-group memory usage statistics */ struct spg_proc_stat { int tgid; int spg_id; /* 0 for non-group data, such as k2u_task */ struct hlist_node pnode; /* hlist node in sp_proc_stat->hash */ + struct hlist_node gnode; /* hlist node in sp_spg_stat->hash */ struct sp_proc_stat *proc_stat; + struct sp_spg_stat *spg_stat; /* * alloc amount minus free amount, may be negative when freed by * another task in the same sp group. @@ -277,7 +320,6 @@ static void update_spg_proc_stat_k2u(unsigned long size, bool inc, if (inc) { atomic64_add(size, &stat->k2u_size); atomic64_add(size, &proc_stat->k2u_size); - } else { atomic64_sub(size, &stat->k2u_size); atomic64_sub(size, &proc_stat->k2u_size); @@ -299,8 +341,7 @@ static struct spg_proc_stat *find_spg_proc_stat( return stat; }
-static struct spg_proc_stat *create_spg_proc_stat( - struct sp_proc_stat *proc_stat, int tgid, int spg_id) +static struct spg_proc_stat *create_spg_proc_stat(int tgid, int spg_id) { struct spg_proc_stat *stat;
@@ -312,7 +353,6 @@ static struct spg_proc_stat *create_spg_proc_stat(
stat->tgid = tgid; stat->spg_id = spg_id; - stat->proc_stat = proc_stat; atomic64_set(&stat->alloc_size, 0); atomic64_set(&stat->k2u_size, 0);
@@ -320,21 +360,30 @@ static struct spg_proc_stat *create_spg_proc_stat( }
static struct spg_proc_stat *sp_init_spg_proc_stat( - struct sp_proc_stat *proc_stat, int tgid, int spg_id) + struct sp_proc_stat *proc_stat, int tgid, struct sp_group *spg) { struct spg_proc_stat *stat; + int spg_id = spg->id; /* visit spg id locklessly */ + struct sp_spg_stat *spg_stat = spg->stat;
stat = find_spg_proc_stat(proc_stat, tgid, spg_id); if (stat) return stat;
- stat = create_spg_proc_stat(proc_stat, tgid, spg_id); + stat = create_spg_proc_stat(tgid, spg_id); if (IS_ERR(stat)) return stat;
+ stat->proc_stat = proc_stat; + stat->spg_stat = spg_stat; + mutex_lock(&proc_stat->lock); hash_add(proc_stat->hash, &stat->pnode, stat->spg_id); mutex_unlock(&proc_stat->lock); + + mutex_lock(&spg_stat->lock); + hash_add(spg_stat->hash, &stat->gnode, stat->tgid); + mutex_unlock(&spg_stat->lock); return stat; }
@@ -344,7 +393,7 @@ static struct spg_proc_stat *sp_init_spg_proc_stat( * 2. hold sp_group_sem for sp_group_master (pay attention to ABBA deadlock) */ static struct spg_proc_stat *sp_init_process_stat(struct task_struct *tsk, - struct mm_struct *mm, int spg_id) + struct mm_struct *mm, struct sp_group *spg) { struct sp_group_master *master; bool exist; @@ -359,10 +408,72 @@ static struct spg_proc_stat *sp_init_process_stat(struct task_struct *tsk, if (IS_ERR(proc_stat)) return (struct spg_proc_stat *)proc_stat;
- spg_proc_stat = sp_init_spg_proc_stat(proc_stat, tsk->tgid, spg_id); + spg_proc_stat = sp_init_spg_proc_stat(proc_stat, tsk->tgid, spg); return spg_proc_stat; }
+static struct sp_spg_stat *create_spg_stat(int spg_id) +{ + struct sp_spg_stat *stat; + + stat = kmalloc(sizeof(*stat), GFP_KERNEL); + if (stat == NULL) { + pr_err_ratelimited("share pool: alloc spg stat failed, lack of memory\n"); + return ERR_PTR(-ENOMEM); + } + + stat->spg_id = spg_id; + atomic_set(&stat->spa_num, 0); + atomic64_set(&stat->size, 0); + atomic64_set(&stat->alloc_nsize, 0); + atomic64_set(&stat->alloc_hsize, 0); + atomic64_set(&stat->alloc_size, 0); + mutex_init(&stat->lock); + hash_init(stat->hash); + + return stat; +} + +static int sp_init_spg_stat(struct sp_group *spg) +{ + struct sp_spg_stat *stat; + int ret, spg_id = spg->id; + + stat = create_spg_stat(spg_id); + if (IS_ERR(stat)) + return PTR_ERR(stat); + + down_write(&sp_spg_stat_sem); + ret = idr_alloc(&sp_spg_stat_idr, stat, spg_id, spg_id + 1, + GFP_KERNEL); + up_write(&sp_spg_stat_sem); + if (ret < 0) { + pr_err_ratelimited("share pool: create group %d idr alloc failed, ret %d\n", + spg_id, ret); + kfree(stat); + } + + spg->stat = stat; + return ret; +} + +static void free_spg_stat(int spg_id) +{ + struct sp_spg_stat *stat; + + down_write(&sp_spg_stat_sem); + stat = idr_remove(&sp_spg_stat_idr, spg_id); + up_write(&sp_spg_stat_sem); + WARN_ON(!stat); + kfree(stat); +} + +/* + * Group '0' for k2u_task and pass through. No process will be actually + * added to. + */ +static struct sp_group *spg_none; + /* statistics of all sp area, protected by sp_area_lock */ struct sp_spa_stat { unsigned int total_num; @@ -436,24 +547,31 @@ static struct file *spa_file(struct sp_area *spa) }
/* the caller should hold sp_area_lock */ -static int spa_inc_usage(enum spa_type type, unsigned long size, bool is_dvpp) +static void spa_inc_usage(struct sp_area *spa) { + enum spa_type type = spa->type; + unsigned long size = spa->real_size; + bool is_dvpp = spa->flags & SP_DVPP; + bool is_huge = spa->is_hugepage; + switch (type) { case SPA_TYPE_ALLOC: spa_stat.alloc_num += 1; spa_stat.alloc_size += size; + update_spg_stat_alloc(size, true, is_huge, spa->spg->stat); break; case SPA_TYPE_K2TASK: spa_stat.k2u_task_num += 1; spa_stat.k2u_task_size += size; + update_spg_stat_k2u(size, true, spg_none->stat); break; case SPA_TYPE_K2SPG: spa_stat.k2u_spg_num += 1; spa_stat.k2u_spg_size += size; + update_spg_stat_k2u(size, true, spa->spg->stat); break; default: - /* usually impossible, perhaps a developer's mistake */ - return -EINVAL; + WARN(1, "invalid spa type"); }
if (is_dvpp) { @@ -467,28 +585,39 @@ static int spa_inc_usage(enum spa_type type, unsigned long size, bool is_dvpp) */ spa_stat.total_num += 1; spa_stat.total_size += size; - return 0; + + if (spa->spg != spg_none) { + atomic_inc(&sp_overall_stat.spa_total_num); + atomic64_add(size, &sp_overall_stat.spa_total_size); + } }
/* the caller should hold sp_area_lock */ -static int spa_dec_usage(enum spa_type type, unsigned long size, bool is_dvpp) +static void spa_dec_usage(struct sp_area *spa) { + enum spa_type type = spa->type; + unsigned long size = spa->real_size; + bool is_dvpp = spa->flags & SP_DVPP; + bool is_huge = spa->is_hugepage; + switch (type) { case SPA_TYPE_ALLOC: spa_stat.alloc_num -= 1; spa_stat.alloc_size -= size; + update_spg_stat_alloc(size, false, is_huge, spa->spg->stat); break; case SPA_TYPE_K2TASK: spa_stat.k2u_task_num -= 1; spa_stat.k2u_task_size -= size; + update_spg_stat_k2u(size, false, spg_none->stat); break; case SPA_TYPE_K2SPG: spa_stat.k2u_spg_num -= 1; spa_stat.k2u_spg_size -= size; + update_spg_stat_k2u(size, false, spa->spg->stat); break; default: - /* usually impossible, perhaps a developer's mistake */ - return -EINVAL; + WARN(1, "invalid spa type"); }
if (is_dvpp) { @@ -498,7 +627,11 @@ static int spa_dec_usage(enum spa_type type, unsigned long size, bool is_dvpp)
spa_stat.total_num -= 1; spa_stat.total_size -= size; - return 0; + + if (spa->spg != spg_none) { + atomic_dec(&sp_overall_stat.spa_total_num); + atomic64_sub(spa->real_size, &sp_overall_stat.spa_total_size); + } }
static void update_spg_proc_stat(unsigned long size, bool inc, @@ -524,14 +657,14 @@ static void update_spg_proc_stat(unsigned long size, bool inc, }
static void sp_update_process_stat(struct task_struct *tsk, bool inc, - int spg_id, struct sp_area *spa) + struct sp_area *spa) { struct spg_proc_stat *stat; unsigned long size = spa->real_size; enum spa_type type = spa->type;
down_write(&sp_group_sem); - stat = sp_init_process_stat(tsk, tsk->mm, spg_id); + stat = sp_init_process_stat(tsk, tsk->mm, spa->spg); up_write(&sp_group_sem); if (unlikely(IS_ERR(stat))) return; @@ -578,6 +711,7 @@ static void free_sp_group(struct sp_group *spg) { fput(spg->file); fput(spg->file_hugetlb); + free_spg_stat(spg->id); down_write(&sp_group_sem); idr_remove(&sp_group_idr, spg->id); up_write(&sp_group_sem); @@ -699,79 +833,95 @@ static loff_t addr_to_offset(unsigned long addr, struct sp_area *spa) if (sp_area_customized == false) return (loff_t)(addr - MMAP_SHARE_POOL_START);
- if (spa && spa->spg) + if (spa && spa->spg != spg_none) return (loff_t)(addr - spa->spg->dvpp_va_start);
pr_err("share pool: the addr is not belong to share pool range\n"); return addr; }
-/* the caller must hold sp_group_sem */ -static struct sp_group *find_or_alloc_sp_group(int spg_id) +static struct sp_group *create_spg(int spg_id) { - struct sp_group *spg; int ret; + struct sp_group *spg; char name[20]; + struct user_struct *user = NULL; + int hsize_log = MAP_HUGE_2MB >> MAP_HUGE_SHIFT;
- spg = __sp_find_spg_locked(current->pid, spg_id); - - if (!spg) { - struct user_struct *user = NULL; - int hsize_log = MAP_HUGE_2MB >> MAP_HUGE_SHIFT; + if (unlikely(system_group_count + 1 == MAX_GROUP_FOR_SYSTEM)) { + pr_err_ratelimited("share pool: reach system max group num\n"); + return ERR_PTR(-ENOSPC); + }
- if (unlikely(system_group_count + 1 == MAX_GROUP_FOR_SYSTEM)) { - pr_err_ratelimited("share pool: reach system max group num\n"); - return ERR_PTR(-ENOSPC); - } + spg = kzalloc(sizeof(*spg), GFP_KERNEL); + if (spg == NULL) { + pr_err_ratelimited("share pool: alloc spg failed due to lack of memory\n"); + return ERR_PTR(-ENOMEM); + } + ret = idr_alloc(&sp_group_idr, spg, spg_id, spg_id + 1, GFP_KERNEL); + if (ret < 0) { + pr_err_ratelimited("share pool: create group %d idr alloc failed, ret %d\n", + spg_id, ret); + goto out_kfree; + } + + spg->id = spg_id; + spg->is_alive = true; + spg->proc_num = 0; + spg->hugepage_failures = 0; + spg->dvpp_multi_spaces = false; + spg->owner = current->group_leader; + atomic_set(&spg->use_count, 1); + INIT_LIST_HEAD(&spg->procs); + INIT_LIST_HEAD(&spg->spa_list); + init_rwsem(&spg->rw_lock); + + sprintf(name, "sp_group_%d", spg_id); + spg->file = shmem_kernel_file_setup(name, MAX_LFS_FILESIZE, + VM_NORESERVE); + if (IS_ERR(spg->file)) { + pr_err("share pool: file setup for small page failed %ld\n", + PTR_ERR(spg->file)); + ret = PTR_ERR(spg->file); + goto out_idr; + } + + spg->file_hugetlb = hugetlb_file_setup(name, MAX_LFS_FILESIZE, + VM_NORESERVE, &user, HUGETLB_ANONHUGE_INODE, hsize_log); + if (IS_ERR(spg->file_hugetlb)) { + pr_err("share pool: file setup for hugepage failed %ld\n", + PTR_ERR(spg->file_hugetlb)); + ret = PTR_ERR(spg->file_hugetlb); + goto out_fput; + } + + ret = sp_init_spg_stat(spg); + if (ret < 0) + goto out_fput_all; + + system_group_count++; + return spg;
- spg = kzalloc(sizeof(*spg), GFP_KERNEL); - if (spg == NULL) { - pr_err_ratelimited("share pool: alloc spg failed due to lack of memory\n"); - return ERR_PTR(-ENOMEM); - } - ret = idr_alloc(&sp_group_idr, spg, spg_id, spg_id + 1, - GFP_KERNEL); - if (ret < 0) { - pr_err_ratelimited("share pool: create group idr alloc failed\n"); - goto out_kfree; - } +out_fput_all: + fput(spg->file_hugetlb); +out_fput: + fput(spg->file); +out_idr: + idr_remove(&sp_group_idr, spg_id); +out_kfree: + kfree(spg); + return ERR_PTR(ret); +}
- spg->id = spg_id; - spg->proc_num = 0; - atomic_set(&spg->spa_num, 0); - atomic64_set(&spg->size, 0); - atomic64_set(&spg->alloc_nsize, 0); - atomic64_set(&spg->alloc_hsize, 0); - atomic64_set(&spg->alloc_size, 0); - spg->is_alive = true; - spg->hugepage_failures = 0; - spg->dvpp_multi_spaces = false; - spg->owner = current->group_leader; - atomic_set(&spg->use_count, 1); - INIT_LIST_HEAD(&spg->procs); - INIT_LIST_HEAD(&spg->spa_list); - - init_rwsem(&spg->rw_lock); - - sprintf(name, "sp_group_%d", spg_id); - spg->file = shmem_kernel_file_setup(name, MAX_LFS_FILESIZE, - VM_NORESERVE); - if (IS_ERR(spg->file)) { - pr_err("share pool: file setup for small page failed %ld\n", PTR_ERR(spg->file)); - ret = PTR_ERR(spg->file); - goto out_idr; - } +/* the caller must hold sp_group_sem */ +static struct sp_group *find_or_alloc_sp_group(int spg_id) +{ + struct sp_group *spg;
- spg->file_hugetlb = hugetlb_file_setup(name, MAX_LFS_FILESIZE, - VM_NORESERVE, &user, - HUGETLB_ANONHUGE_INODE, hsize_log); - if (IS_ERR(spg->file_hugetlb)) { - pr_err("share pool: file setup for hugepage failed %ld\n", PTR_ERR(spg->file_hugetlb)); - ret = PTR_ERR(spg->file_hugetlb); - goto out_fput; - } + spg = __sp_find_spg_locked(current->pid, spg_id);
- system_group_count++; + if (!spg) { + spg = create_spg(spg_id); } else { down_read(&spg->rw_lock); if (!spg_valid(spg)) { @@ -784,14 +934,6 @@ static struct sp_group *find_or_alloc_sp_group(int spg_id) }
return spg; - -out_fput: - fput(spg->file); -out_idr: - idr_remove(&sp_group_idr, spg_id); -out_kfree: - kfree(spg); - return ERR_PTR(ret); }
static void __sp_area_drop_locked(struct sp_area *spa); @@ -1038,7 +1180,7 @@ int sp_group_add_task(int pid, int spg_id) }
/* per process statistics initialization */ - stat = sp_init_process_stat(tsk, mm, spg_id); + stat = sp_init_process_stat(tsk, mm, spg); if (IS_ERR(stat)) { ret = PTR_ERR(stat); pr_err_ratelimited("share pool: init process stat failed, ret %lx\n", PTR_ERR(stat)); @@ -1307,27 +1449,12 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, spa->applier = applier; spa->node_id = node_id;
- if (spa_inc_usage(type, size, (flags & SP_DVPP))) { - err = ERR_PTR(-EINVAL); - goto error; - } - + spa_inc_usage(spa); __insert_sp_area(spa); free_sp_area_cache = &spa->rb_node; - if (spa->spg) { - atomic_inc(&spg->spa_num); - atomic64_add(size, &spg->size); - if (type == SPA_TYPE_ALLOC) { - if (spa->is_hugepage) - atomic64_add(size, &spg->alloc_hsize); - else - atomic64_add(size, &spg->alloc_nsize); - atomic64_add(size, &spg->alloc_size); - } - atomic_inc(&sp_overall_stat.spa_total_num); - atomic64_add(size, &sp_overall_stat.spa_total_size); + if (spa->spg != spg_none) list_add_tail(&spa->link, &spg->spa_list); - } + spin_unlock(&sp_area_lock);
return spa; @@ -1398,21 +1525,10 @@ static void sp_free_area(struct sp_area *spa) } }
- spa_dec_usage(spa->type, spa->real_size, (spa->flags & SP_DVPP)); /* won't fail */ - if (spa->spg) { - atomic_dec(&spa->spg->spa_num); - atomic64_sub(spa->real_size, &spa->spg->size); - if (spa->type == SPA_TYPE_ALLOC) { - if (spa->is_hugepage) - atomic64_sub(spa->real_size, &spa->spg->alloc_hsize); - else - atomic64_sub(spa->real_size, &spa->spg->alloc_nsize); - atomic64_sub(spa->real_size, &spa->spg->alloc_size); - } - atomic_dec(&sp_overall_stat.spa_total_num); - atomic64_sub(spa->real_size, &sp_overall_stat.spa_total_size); + spa_dec_usage(spa); + if (spa->spg != spg_none) list_del(&spa->link); - } + rb_erase(&spa->rb_node, &sp_area_root); RB_CLEAR_NODE(&spa->rb_node); kfree(spa); @@ -1640,7 +1756,7 @@ int sp_free(unsigned long addr) if (current->mm == NULL) atomic64_sub(spa->real_size, &kthread_stat.alloc_size); else - sp_update_process_stat(current, false, spa->spg->id, spa); + sp_update_process_stat(current, false, spa);
drop_spa: __sp_area_drop(spa); @@ -1890,7 +2006,7 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) up_read(&spg->rw_lock);
if (!IS_ERR(p)) - sp_update_process_stat(current, true, spg->id, spa); + sp_update_process_stat(current, true, spa);
/* this will free spa if mmap failed */ if (spa && !IS_ERR(spa)) @@ -1952,27 +2068,9 @@ static unsigned long sp_remap_kva_to_vma(unsigned long kva, struct sp_area *spa, struct vm_area_struct *vma; unsigned long ret_addr; unsigned long populate = 0; - struct file *file = NULL; int ret = 0; - struct user_struct *user = NULL; - int hsize_log = MAP_HUGE_2MB >> MAP_HUGE_SHIFT; unsigned long addr, buf, offset;
- if (spa->spg != NULL) { - /* k2u to group */ - file = spa_file(spa); - } else { - /* k2u to task */ - if (spa->is_hugepage) { - file = hugetlb_file_setup(HUGETLB_ANON_FILE, spa_size(spa), VM_NORESERVE, - &user, HUGETLB_ANONHUGE_INODE, hsize_log); - if (IS_ERR(file)) { - pr_err("share pool: file setup for k2u hugepage failed %ld\n", PTR_ERR(file)); - return PTR_ERR(file); - } - } - } - down_write(&mm->mmap_sem); if (unlikely(mm->core_state)) { pr_err("share pool: k2u mmap: encountered coredump, abort\n"); @@ -1980,7 +2078,7 @@ static unsigned long sp_remap_kva_to_vma(unsigned long kva, struct sp_area *spa, goto put_mm; }
- ret_addr = sp_mmap(mm, file, spa, &populate); + ret_addr = sp_mmap(mm, spa_file(spa), spa, &populate); if (IS_ERR_VALUE(ret_addr)) { pr_debug("share pool: k2u mmap failed %lx\n", ret_addr); goto put_mm; @@ -2023,9 +2121,6 @@ static unsigned long sp_remap_kva_to_vma(unsigned long kva, struct sp_area *spa, put_mm: up_write(&mm->mmap_sem);
- if (!spa->spg && file) - fput(file); - return ret_addr; }
@@ -2188,7 +2283,7 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size, }
down_write(&sp_group_sem); - stat = sp_init_process_stat(tsk, mm, GROUP_NONE); + stat = sp_init_process_stat(tsk, mm, spg_none); up_write(&sp_group_sem); if (IS_ERR(stat)) { uva = stat; @@ -2197,7 +2292,7 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size, goto out_put_mm; }
- spa = sp_alloc_area(size_aligned, sp_flags, NULL, SPA_TYPE_K2TASK, tsk->tgid); + spa = sp_alloc_area(size_aligned, sp_flags, spg_none, SPA_TYPE_K2TASK, tsk->tgid); if (IS_ERR(spa)) { pr_err_ratelimited("share pool: k2u(task) failed due to alloc spa failure " "(potential no enough virtual memory when -75): %ld\n", PTR_ERR(spa)); @@ -2213,8 +2308,8 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size, uva = sp_make_share_kva_to_task(kva_aligned, spa, mm);
if (!IS_ERR(uva)) - update_spg_proc_stat(size_aligned, true, stat, - SPA_TYPE_K2TASK); + update_spg_proc_stat(size_aligned, true, + stat, SPA_TYPE_K2TASK);
goto finish; } @@ -2232,7 +2327,7 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size, if (enable_share_k2u_spg) spa = sp_alloc_area(size_aligned, sp_flags, spg, SPA_TYPE_K2SPG, tsk->tgid); else - spa = sp_alloc_area(size_aligned, sp_flags, NULL, SPA_TYPE_K2TASK, tsk->tgid); + spa = sp_alloc_area(size_aligned, sp_flags, spg_none, SPA_TYPE_K2TASK, tsk->tgid);
if (IS_ERR(spa)) { up_read(&spg->rw_lock); @@ -2248,7 +2343,7 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size, goto out_drop_spa; }
- if (spa->spg) + if (spa->spg != spg_none) uva = sp_make_share_kva_to_spg(kva_aligned, spa, spg); else uva = sp_make_share_kva_to_task(kva_aligned, spa, mm); @@ -2260,7 +2355,7 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size, up_read(&spg->rw_lock);
if (!IS_ERR(uva)) - sp_update_process_stat(tsk, true, spg_id, spa); + sp_update_process_stat(tsk, true, spa);
finish: if (!IS_ERR(uva)) { @@ -2695,7 +2790,7 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size, int pid, int sp if (unlikely(!current->mm)) WARN(1, "share pool: unshare uva(to task) unexpected active kthread"); else - sp_update_process_stat(current, false, GROUP_NONE, spa); + sp_update_process_stat(current, false, spa);
} else if (spa->type == SPA_TYPE_K2SPG) { if (spg_id < 0) { @@ -2742,7 +2837,7 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size, int pid, int sp if (current->mm == NULL) atomic64_sub(spa->real_size, &kthread_stat.k2u_size); else - sp_update_process_stat(current, false, spa->spg->id, spa); + sp_update_process_stat(current, false, spa); } else { WARN(1, "share pool: unshare uva invalid spa type"); } @@ -2981,10 +3076,10 @@ bool is_sharepool_addr(unsigned long addr) return is_sp_normal_addr(addr);
spa = __find_sp_area(addr); - if (spa && spa->spg) - ret = (addr >= spa->spg->dvpp_va_start && - addr < spa->spg->dvpp_va_start + spa->spg->dvpp_size) || - is_sp_normal_addr(addr); + if (spa && spa->spg != spg_none) + ret = is_sp_normal_addr(addr) || + (addr >= spa->spg->dvpp_va_start && + addr < spa->spg->dvpp_va_start + spa->spg->dvpp_size);
__sp_area_drop(spa); return ret; @@ -3039,9 +3134,15 @@ static void free_process_spg_proc_stat(struct sp_proc_stat *proc_stat) int i; struct spg_proc_stat *stat; struct hlist_node *tmp; + struct sp_spg_stat *spg_stat;
/* traverse proc_stat->hash locklessly as process is exiting */ hash_for_each_safe(proc_stat->hash, i, tmp, stat, pnode) { + spg_stat = stat->spg_stat; + mutex_lock(&spg_stat->lock); + hash_del(&stat->gnode); + mutex_unlock(&spg_stat->lock); + hash_del(&stat->pnode); kfree(stat); } @@ -3122,7 +3223,7 @@ static void rb_spa_stat_show(struct seq_file *seq) atomic_inc(&spa->use_count); spin_unlock(&sp_area_lock);
- if (!spa->spg) /* k2u to task */ + if (spa->spg == spg_none) /* k2u to task */ seq_printf(seq, "%-10s ", "None"); else { down_read(&spa->spg->rw_lock); @@ -3217,23 +3318,35 @@ void spa_overview_show(struct seq_file *seq) /* the caller must hold sp_group_sem */ static int idr_spg_stat_cb(int id, void *p, void *data) { - struct sp_group *spg = p; + struct sp_spg_stat *s = p; struct seq_file *seq = data;
if (seq != NULL) { - seq_printf(seq, "Group %6d size: %ld KB, spa num: %d, total alloc: %ld KB, " + if (id == 0) + seq_puts(seq, "Non Group "); + else + seq_printf(seq, "Group %6d ", id); + + seq_printf(seq, "size: %ld KB, spa num: %d, total alloc: %ld KB, " "normal alloc: %ld KB, huge alloc: %ld KB\n", - id, byte2kb(atomic64_read(&spg->size)), atomic_read(&spg->spa_num), - byte2kb(atomic64_read(&spg->alloc_size)), - byte2kb(atomic64_read(&spg->alloc_nsize)), - byte2kb(atomic64_read(&spg->alloc_hsize))); + byte2kb(atomic64_read(&s->size)), + atomic_read(&s->spa_num), + byte2kb(atomic64_read(&s->alloc_size)), + byte2kb(atomic64_read(&s->alloc_nsize)), + byte2kb(atomic64_read(&s->alloc_hsize))); } else { - pr_info("Group %6d size: %ld KB, spa num: %d, total alloc: %ld KB, " + if (id == 0) + pr_info("Non Group "); + else + pr_info("Group %6d ", id); + + pr_info("size: %ld KB, spa num: %d, total alloc: %ld KB, " "normal alloc: %ld KB, huge alloc: %ld KB\n", - id, byte2kb(atomic64_read(&spg->size)), atomic_read(&spg->spa_num), - byte2kb(atomic64_read(&spg->alloc_size)), - byte2kb(atomic64_read(&spg->alloc_nsize)), - byte2kb(atomic64_read(&spg->alloc_hsize))); + byte2kb(atomic64_read(&s->size)), + atomic_read(&s->spa_num), + byte2kb(atomic64_read(&s->alloc_size)), + byte2kb(atomic64_read(&s->alloc_nsize)), + byte2kb(atomic64_read(&s->alloc_hsize))); }
return 0; @@ -3255,7 +3368,7 @@ void spg_overview_show(struct seq_file *seq) }
down_read(&sp_group_sem); - idr_for_each(&sp_group_idr, idr_spg_stat_cb, seq); + idr_for_each(&sp_spg_stat_idr, idr_spg_stat_cb, seq); up_read(&sp_group_sem);
if (seq != NULL) @@ -3277,11 +3390,13 @@ static int spa_stat_show(struct seq_file *seq, void *offset)
static int idr_proc_stat_cb(int id, void *p, void *data) { - int spg_id; - struct sp_group *spg; - struct sp_proc_stat *stat = p; + struct sp_spg_stat *spg_stat = p; struct seq_file *seq = data; - struct mm_struct *mm = stat->mm; + int i, tgid; + struct sp_proc_stat *proc_stat; + struct spg_proc_stat *spg_proc_stat; + + struct mm_struct *mm; unsigned long anon, file, shmem, total_rss; /* * non_sp_res: resident memory size excluding share pool memory @@ -3292,60 +3407,41 @@ static int idr_proc_stat_cb(int id, void *p, void *data) */ long sp_alloc_nsize, non_sp_res, sp_res, non_sp_shm;
- anon = get_mm_counter(mm, MM_ANONPAGES); - file = get_mm_counter(mm, MM_FILEPAGES); - shmem = get_mm_counter(mm, MM_SHMEMPAGES); - total_rss = anon + file + shmem; + mutex_lock(&spg_stat->lock); + hash_for_each(spg_stat->hash, i, spg_proc_stat, gnode) { + proc_stat = spg_proc_stat->proc_stat; + tgid = proc_stat->tgid; + mm = proc_stat->mm;
- /* - * a task without adding to an sp group should be handled correctly. - */ - spg = __sp_find_spg(id, SPG_ID_DEFAULT); - if (!spg) - goto non_spg; + anon = get_mm_counter(mm, MM_ANONPAGES); + file = get_mm_counter(mm, MM_FILEPAGES); + shmem = get_mm_counter(mm, MM_SHMEMPAGES); + total_rss = anon + file + shmem;
- down_read(&spg->rw_lock); - if (!spg_valid(spg)) { - spg_id = 0; - sp_alloc_nsize = 0; - sp_res = 0; - } else { - spg_id = spg->id; - sp_alloc_nsize = byte2kb(atomic64_read(&spg->alloc_nsize)); - sp_res = byte2kb(atomic64_read(&spg->alloc_size)); - } - up_read(&spg->rw_lock); - sp_group_drop(spg); - - /* - * Statistics of RSS has a maximum 64 pages deviation (256KB). - * Please check_sync_rss_stat(). - */ - non_sp_res = page2kb(total_rss) - sp_alloc_nsize; - non_sp_res = non_sp_res < 0 ? 0 : non_sp_res; - non_sp_shm = page2kb(shmem) - sp_alloc_nsize; - non_sp_shm = non_sp_shm < 0 ? 0 : non_sp_shm; - - seq_printf(seq, "%-8d ", id); - if (spg_id == 0) - seq_printf(seq, "%-8c ", '-'); - else - seq_printf(seq, "%-8d ", spg_id); - seq_printf(seq, "%-9ld %-9ld %-9ld %-10ld %-8ld %-7ld %-7ld %-10ld\n", - byte2kb(atomic64_read(&stat->alloc_size)), - byte2kb(atomic64_read(&stat->k2u_size)), - sp_res, non_sp_res, - page2kb(mm->total_vm), page2kb(total_rss), - page2kb(shmem), non_sp_shm); - return 0; - -non_spg: - seq_printf(seq, "%-8d %-8c %-9d %-9ld %-9d %-10ld %-8ld %-7ld %-7ld %-10ld\n", - id, '-', 0, - byte2kb(atomic64_read(&stat->k2u_size)), - 0, page2kb(total_rss), - page2kb(mm->total_vm), page2kb(total_rss), - page2kb(shmem), page2kb(shmem)); + /* + * Statistics of RSS has a maximum 64 pages deviation (256KB). + * Please check_sync_rss_stat(). + */ + sp_alloc_nsize = byte2kb(atomic64_read(&spg_stat->alloc_nsize)); + sp_res = byte2kb(atomic64_read(&spg_stat->alloc_size)); + non_sp_res = page2kb(total_rss) - sp_alloc_nsize; + non_sp_res = non_sp_res < 0 ? 0 : non_sp_res; + non_sp_shm = page2kb(shmem) - sp_alloc_nsize; + non_sp_shm = non_sp_shm < 0 ? 0 : non_sp_shm; + + seq_printf(seq, "%-8d ", tgid); + if (id == 0) + seq_printf(seq, "%-8c ", '-'); + else + seq_printf(seq, "%-8d ", id); + seq_printf(seq, "%-9ld %-9ld %-9ld %-10ld %-8ld %-7ld %-7ld %-10ld\n", + byte2kb(atomic64_read(&spg_proc_stat->alloc_size)), + byte2kb(atomic64_read(&spg_proc_stat->k2u_size)), + sp_res, non_sp_res, + page2kb(mm->total_vm), page2kb(total_rss), + page2kb(shmem), non_sp_shm); + } + mutex_unlock(&spg_stat->lock); return 0; }
@@ -3364,9 +3460,9 @@ static int proc_stat_show(struct seq_file *seq, void *offset) byte2kb(atomic64_read(&kthread_stat.k2u_size)));
/* pay attention to potential ABBA deadlock */ - down_read(&sp_proc_stat_sem); - idr_for_each(&sp_proc_stat_idr, idr_proc_stat_cb, seq); - up_read(&sp_proc_stat_sem); + down_read(&sp_spg_stat_sem); + idr_for_each(&sp_spg_stat_idr, idr_proc_stat_cb, seq); + up_read(&sp_spg_stat_sem); return 0; }
@@ -3553,6 +3649,7 @@ int sp_group_exit(struct mm_struct *mm)
list_for_each_entry_safe(spg_node, tmp, &master->node_list, group_node) { spg = spg_node->spg; + down_write(&spg->rw_lock); /* a dead group should NOT be reactive again */ if (spg_valid(spg) && list_is_singular(&spg->procs)) @@ -3754,3 +3851,20 @@ static int __init enable_share_pool(char *s) return 1; } __setup("enable_ascend_share_pool", enable_share_pool); + +static int __init share_pool_init(void) +{ + /* lockless, as init kthread has no sp operation else */ + spg_none = create_spg(GROUP_NONE); + /* without free spg_none, not a serious problem */ + if (IS_ERR(spg_none) || !spg_none) + goto fail; + + return 0; +fail: + pr_err("Ascend share pool initialization failed\n"); + enable_ascend_share_pool = 0; + vmap_allow_huge = false; + return 1; +} +late_initcall(share_pool_init);
From: Peng Wu wupeng58@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
-------------------------------------------------
The prot parameter is added to sp_group_add_task function. It indicates the PROT_READ/PROT_WRITE permission of the task for this spg.
Signed-off-by: Peng Wu wupeng58@huawei.com Signed-off-by: Tang Yizhou tangyizhou@huawei.com Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/linux/share_pool.h | 3 ++- mm/share_pool.c | 53 ++++++++++++++++++++++++-------------- 2 files changed, 36 insertions(+), 20 deletions(-)
diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index 0b9b9fb62f052..5912d874b05fb 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -149,6 +149,7 @@ struct sp_group_node { struct list_head group_node; struct sp_group_master *master; struct sp_group *spg; + unsigned long prot; };
struct sp_walk_data { @@ -202,7 +203,7 @@ static inline void sp_init_mm(struct mm_struct *mm) mm->sp_group_master = NULL; }
-extern int sp_group_add_task(int pid, int spg_id); +extern int sp_group_add_task(int pid, unsigned long prot, int spg_id); extern int sp_group_exit(struct mm_struct *mm); extern void sp_group_post_exit(struct mm_struct *mm); extern int sp_group_id_by_pid(int pid); diff --git a/mm/share_pool.c b/mm/share_pool.c index fde77960bb88d..6971076a4c57c 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -687,10 +687,11 @@ static inline bool check_aoscore_process(struct task_struct *tsk) }
static unsigned long sp_mmap(struct mm_struct *mm, struct file *file, - struct sp_area *spa, unsigned long *populate); + struct sp_area *spa, unsigned long *populate, + unsigned long prot); static void sp_munmap(struct mm_struct *mm, unsigned long addr, unsigned long size); static unsigned long sp_remap_kva_to_vma(unsigned long kva, struct sp_area *spa, - struct mm_struct *mm); + struct mm_struct *mm, unsigned long prot);
static void free_sp_group_id(int spg_id) { @@ -1004,7 +1005,7 @@ static int mm_add_group_init(struct mm_struct *mm, struct sp_group *spg) return 0; }
-static int mm_add_group_finish(struct mm_struct *mm, struct sp_group *spg) +static int mm_add_group_finish(struct mm_struct *mm, struct sp_group *spg, unsigned long prot) { struct sp_group_master *master; struct sp_group_node *spg_node; @@ -1020,6 +1021,7 @@ static int mm_add_group_finish(struct mm_struct *mm, struct sp_group *spg) INIT_LIST_HEAD(&spg_node->proc_node); spg_node->spg = spg; spg_node->master = master; + spg_node->prot = prot;
down_write(&spg->rw_lock); if (spg->proc_num + 1 == MAX_PROC_PER_GROUP) { @@ -1041,9 +1043,11 @@ static int mm_add_group_finish(struct mm_struct *mm, struct sp_group *spg) /** * sp_group_add_task() - Add a process to an share group (sp_group). * @pid: the pid of the task to be added. + * @prot: the prot of task for this spg. * @spg_id: the ID of the sp_group. * - * A thread group can't be added to more than one sp_group. + * A process can't be added to more than one sp_group in single group mode + * and can in multiple group mode. * * Return: A postive group number for success, -errno on failure. * @@ -1051,7 +1055,7 @@ static int mm_add_group_finish(struct mm_struct *mm, struct sp_group *spg) * The automatically allocated ID is between [SPG_ID_AUTO_MIN, SPG_ID_AUTO_MAX]. * When negative, the return value is -errno. */ -int sp_group_add_task(int pid, int spg_id) +int sp_group_add_task(int pid, unsigned long prot, int spg_id) { struct task_struct *tsk; struct mm_struct *mm; @@ -1063,6 +1067,13 @@ int sp_group_add_task(int pid, int spg_id)
check_interrupt_context();
+ /* only allow READ, READ | WRITE */ + if (!((prot == PROT_READ) + || (prot == (PROT_READ | PROT_WRITE)))) { + pr_err_ratelimited("share pool: prot is invalid 0x%lx\n", prot); + return -EINVAL; + } + /* mdc scene hack */ if (enable_mdc_default_group) spg_id = mdc_default_group_id; @@ -1210,7 +1221,7 @@ int sp_group_add_task(int pid, int spg_id) spin_unlock(&sp_area_lock);
if (spa->type == SPA_TYPE_K2SPG && spa->kva) { - addr = sp_remap_kva_to_vma(spa->kva, spa, mm); + addr = sp_remap_kva_to_vma(spa->kva, spa, mm, prot); if (IS_ERR_VALUE(addr)) pr_warn("share pool: task add group remap k2u failed, ret %ld\n", addr);
@@ -1228,7 +1239,7 @@ int sp_group_add_task(int pid, int spg_id) break; }
- addr = sp_mmap(mm, file, spa, &populate); + addr = sp_mmap(mm, file, spa, &populate, prot); if (IS_ERR_VALUE(addr)) { sp_munmap_task_areas(mm, spg, &spa->link); up_write(&mm->mmap_sem); @@ -1272,7 +1283,7 @@ int sp_group_add_task(int pid, int spg_id) up_write(&sp_group_sem); sp_group_drop(spg); } else { - mm_add_group_finish(mm, spg); + mm_add_group_finish(mm, spg, prot); up_write(&sp_group_sem); } out_put_mm: @@ -1769,11 +1780,11 @@ EXPORT_SYMBOL_GPL(sp_free);
/* wrapper of __do_mmap() and the caller must hold down_write(&mm->mmap_sem). */ static unsigned long sp_mmap(struct mm_struct *mm, struct file *file, - struct sp_area *spa, unsigned long *populate) + struct sp_area *spa, unsigned long *populate, + unsigned long prot) { unsigned long addr = spa->va_start; unsigned long size = spa_size(spa); - unsigned long prot = PROT_READ | PROT_WRITE; unsigned long flags = MAP_FIXED | MAP_SHARED | MAP_POPULATE | MAP_SHARE_POOL; unsigned long vm_flags = VM_NORESERVE | VM_SHARE_POOL | VM_DONTCOPY; @@ -1852,10 +1863,11 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) spg = __sp_find_spg(current->pid, SPG_ID_DEFAULT); if (!spg) { /* DVPP pass through scene: first call sp_alloc() */ /* mdc scene hack */ + unsigned long prot = PROT_READ | PROT_WRITE; if (enable_mdc_default_group) - ret = sp_group_add_task(current->tgid, spg_id); + ret = sp_group_add_task(current->tgid, prot, spg_id); else - ret = sp_group_add_task(current->tgid, + ret = sp_group_add_task(current->tgid, prot, SPG_ID_DVPP_PASS_THROUGH); /* * The multi-thread contention may cause repeated joins to the group. @@ -1917,7 +1929,7 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) continue; }
- mmap_addr = sp_mmap(mm, file, spa, &populate); + mmap_addr = sp_mmap(mm, file, spa, &populate, spg_node->prot); if (IS_ERR_VALUE(mmap_addr)) { up_write(&mm->mmap_sem); p = (void *)mmap_addr; @@ -1940,7 +1952,8 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) goto out; } /* clean PTE_RDONLY flags or trigger SMMU event */ - vma->vm_page_prot = __pgprot(((~PTE_RDONLY) & vma->vm_page_prot.pgprot) | PTE_DIRTY); + if (spg_node->prot & PROT_WRITE) + vma->vm_page_prot = __pgprot(((~PTE_RDONLY) & vma->vm_page_prot.pgprot) | PTE_DIRTY); up_write(&mm->mmap_sem);
/* @@ -2063,7 +2076,7 @@ static unsigned long __sp_remap_get_pfn(unsigned long kva)
/* when called by k2u to group, always make sure rw_lock of spg is down */ static unsigned long sp_remap_kva_to_vma(unsigned long kva, struct sp_area *spa, - struct mm_struct *mm) + struct mm_struct *mm, unsigned long prot) { struct vm_area_struct *vma; unsigned long ret_addr; @@ -2078,7 +2091,7 @@ static unsigned long sp_remap_kva_to_vma(unsigned long kva, struct sp_area *spa, goto put_mm; }
- ret_addr = sp_mmap(mm, spa_file(spa), spa, &populate); + ret_addr = sp_mmap(mm, spa_file(spa), spa, &populate, prot); if (IS_ERR_VALUE(ret_addr)) { pr_debug("share pool: k2u mmap failed %lx\n", ret_addr); goto put_mm; @@ -2087,7 +2100,8 @@ static unsigned long sp_remap_kva_to_vma(unsigned long kva, struct sp_area *spa,
vma = find_vma(mm, ret_addr); BUG_ON(vma == NULL); - vma->vm_page_prot = __pgprot(((~PTE_RDONLY) & vma->vm_page_prot.pgprot) | PTE_DIRTY); + if (prot & PROT_WRITE) + vma->vm_page_prot = __pgprot(((~PTE_RDONLY) & vma->vm_page_prot.pgprot) | PTE_DIRTY);
if (is_vm_hugetlb_page(vma)) { ret = remap_vmalloc_hugepage_range(vma, (void *)kva, 0); @@ -2138,8 +2152,9 @@ static void *sp_make_share_kva_to_task(unsigned long kva, struct sp_area *spa, struct mm_struct *mm) { unsigned long ret_addr; + unsigned long prot = PROT_READ | PROT_WRITE;
- ret_addr = sp_remap_kva_to_vma(kva, spa, mm); + ret_addr = sp_remap_kva_to_vma(kva, spa, mm, prot); if (IS_ERR_VALUE(ret_addr)) { pr_err("share pool: remap k2u to task failed, ret %ld\n", ret_addr); return ERR_PTR(ret_addr); @@ -2160,7 +2175,7 @@ static void *sp_make_share_kva_to_spg(unsigned long kva, struct sp_area *spa,
list_for_each_entry(spg_node, &spg->procs, proc_node) { mm = spg_node->master->mm; - ret_addr = sp_remap_kva_to_vma(kva, spa, mm); + ret_addr = sp_remap_kva_to_vma(kva, spa, mm, spg_node->prot); if (IS_ERR_VALUE(ret_addr)) { pr_err("share pool: remap k2u to spg failed, ret %ld \n", ret_addr); __sp_free(spg, spa->va_start, spa_size(spa), mm);
From: guomengqi guomengqi3@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
-------------------------------------------------
Modified hugetlb_insert_hugepage_pte_by_pa to assure k2u hugepages can be set as READONLY.
Signed-off-by: guomengqi guomengqi3@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/hugetlb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 89404e28b2514..5190154de3b09 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -5374,7 +5374,8 @@ int hugetlb_insert_hugepage_pte_by_pa(struct mm_struct *mm,
entry = pfn_pte(phy_addr >> PAGE_SHIFT, prot); entry = huge_pte_mkdirty(entry); - entry = huge_pte_mkwrite(entry); + if (!(pgprot_val(prot) & PTE_RDONLY)) + entry = huge_pte_mkwrite(entry); entry = pte_mkyoung(entry); entry = pte_mkhuge(entry); entry = pte_mkspecial(entry);
From: Zhou Guanghui zhouguanghui1@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
-------------------------------------------------------
Remove unnecessary params(pid, spg_id) of sp_unshare.
Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/linux/share_pool.h | 2 +- mm/share_pool.c | 19 +++---------------- 2 files changed, 4 insertions(+), 17 deletions(-)
diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index 5912d874b05fb..2b052efa69072 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -216,7 +216,7 @@ extern int sp_free(unsigned long addr); extern void *sp_make_share_k2u(unsigned long kva, unsigned long size, unsigned long sp_flags, int pid, int spg_id); extern void *sp_make_share_u2k(unsigned long uva, unsigned long size, int pid); -extern int sp_unshare(unsigned long va, unsigned long size, int pid, int spg_id); +extern int sp_unshare(unsigned long va, unsigned long size);
extern void sp_area_drop(struct vm_area_struct *vma);
diff --git a/mm/share_pool.c b/mm/share_pool.c index 6971076a4c57c..8b9a4acea9b76 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -2705,7 +2705,7 @@ EXPORT_SYMBOL_GPL(sp_make_share_u2k); * * This also means we must trust DVPP channel destroy and guard worker code. */ -static int sp_unshare_uva(unsigned long uva, unsigned long size, int pid, int spg_id) +static int sp_unshare_uva(unsigned long uva, unsigned long size) { int ret = 0; bool found = false; @@ -2752,11 +2752,6 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size, int pid, int sp }
if (spa->type == SPA_TYPE_K2TASK) { - if (spg_id != SPG_ID_NONE && spg_id != SPG_ID_DEFAULT) { - pr_err_ratelimited("share pool: unshare uva(to task) failed, invalid spg id %d\n", spg_id); - ret = -EINVAL; - goto out_drop_area; - }
if (!spa->mm) { pr_err_ratelimited("share pool: unshare uva(to task) failed, none spa owner\n"); @@ -2808,12 +2803,6 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size, int pid, int sp sp_update_process_stat(current, false, spa);
} else if (spa->type == SPA_TYPE_K2SPG) { - if (spg_id < 0) { - pr_err_ratelimited("share pool: unshare uva(to group) failed, invalid spg id %d\n", spg_id); - ret = -EINVAL; - goto out_drop_area; - } - down_read(&spa->spg->rw_lock); /* always allow kthread and dvpp channel destroy procedure */ if (current->mm) { @@ -2921,14 +2910,12 @@ static int sp_unshare_kva(unsigned long kva, unsigned long size) * sp_make_share_{k2u,u2k}(). * @va: the specified virtual address of memory * @size: the size of unshared memory - * @pid: the pid of the specified process if the VA is user address - * @spg_id: the ID of the specified sp_group if the VA is user address * * Use spg_id of current thread if spg_id == SPG_ID_DEFAULT. * * Return: 0 for success, -errno on failure. */ -int sp_unshare(unsigned long va, unsigned long size, int pid, int spg_id) +int sp_unshare(unsigned long va, unsigned long size) { int ret = 0;
@@ -2936,7 +2923,7 @@ int sp_unshare(unsigned long va, unsigned long size, int pid, int spg_id)
if (va < TASK_SIZE) { /* user address */ - ret = sp_unshare_uva(va, size, pid, spg_id); + ret = sp_unshare_uva(va, size); } else if (va >= VA_START) { /* kernel address */ ret = sp_unshare_kva(va, size);
From: Zhou Guanghui zhouguanghui1@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/share_pool.c | 101 ++++++++++++++++-------------------------------- 1 file changed, 33 insertions(+), 68 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 8b9a4acea9b76..3107528ee5bed 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -2220,15 +2220,17 @@ static bool vmalloc_area_clr_flag(struct sp_area *spa, unsigned long kva, unsign }
/** - * sp_make_share_k2u() - Share kernel memory to a specified process or sp_group. + * sp_make_share_k2u() - Share kernel memory to current process or an sp_group. * @kva: the VA of shared kernel memory. * @size: the size of shared kernel memory. * @sp_flags: how to allocate the memory. We only support SP_DVPP. - * @pid: the pid of the specified process + * @pid: the pid of the specified process (Not currently in use). * @spg_id: the share group that the memory is shared to. * - * Use spg_id of current thread if spg_id == SPG_ID_DEFAULT. - * Share kernel memory to a specified task if spg_id == SPG_ID_NONE. + * Return: the shared target user address to start at + * + * Share kernel memory to current task if spg_id == SPG_ID_NONE + * or SPG_ID_DEFAULT in multi-group mode. * * Return: * * if succeed, return the shared user address to start at. @@ -2243,9 +2245,8 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size, unsigned long kva_aligned; unsigned long size_aligned; unsigned int page_size = PAGE_SIZE; - struct task_struct *tsk; - struct mm_struct *mm; - int ret = 0, is_hugepage; + struct mm_struct *mm = current->mm; + int is_hugepage;
check_interrupt_context();
@@ -2254,6 +2255,11 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size, return ERR_PTR(-EINVAL); }
+ if (mm == NULL) { + pr_err_ratelimited("k2u: kthread is not allowed\n"); + return ERR_PTR(-EPERM); + } + is_hugepage = is_vmap_hugepage(kva); if (is_hugepage > 0) { sp_flags |= SP_HUGEPAGE; @@ -2269,50 +2275,30 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size, kva_aligned = ALIGN_DOWN(kva, page_size); size_aligned = ALIGN(kva + size, page_size) - kva_aligned;
- rcu_read_lock(); - tsk = find_task_by_vpid(pid); - if (!tsk || (tsk->flags & PF_EXITING)) - ret = -ESRCH; - else - get_task_struct(tsk); - - rcu_read_unlock(); - if (ret) - return ERR_PTR(ret); - - mm = get_task_mm(tsk); - if (mm == NULL) { - uva = ERR_PTR(-ESRCH); - goto out_put_task; - } - - spg = __sp_find_spg(pid, SPG_ID_DEFAULT); + spg = get_first_group(mm); if (spg == NULL) { /* k2u to task */ struct spg_proc_stat *stat;
if (spg_id != SPG_ID_NONE && spg_id != SPG_ID_DEFAULT) { pr_err_ratelimited("share pool: k2task invalid spg id %d\n", spg_id); - uva = ERR_PTR(-EINVAL); - goto out_put_mm; + return ERR_PTR(-EINVAL); }
down_write(&sp_group_sem); - stat = sp_init_process_stat(tsk, mm, spg_none); + stat = sp_init_process_stat(current, mm, spg_none); up_write(&sp_group_sem); if (IS_ERR(stat)) { - uva = stat; pr_err_ratelimited("share pool: k2u(task) init process stat failed, ret %lx\n", PTR_ERR(stat)); - goto out_put_mm; + return stat; }
- spa = sp_alloc_area(size_aligned, sp_flags, spg_none, SPA_TYPE_K2TASK, tsk->tgid); + spa = sp_alloc_area(size_aligned, sp_flags, spg_none, SPA_TYPE_K2TASK, current->tgid); if (IS_ERR(spa)) { pr_err_ratelimited("share pool: k2u(task) failed due to alloc spa failure " "(potential no enough virtual memory when -75): %ld\n", PTR_ERR(spa)); - uva = spa; - goto out_put_mm; + return spa; }
if (!vmalloc_area_set_flag(spa, kva_aligned, VM_SHAREPOOL)) { @@ -2340,9 +2326,9 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size, }
if (enable_share_k2u_spg) - spa = sp_alloc_area(size_aligned, sp_flags, spg, SPA_TYPE_K2SPG, tsk->tgid); + spa = sp_alloc_area(size_aligned, sp_flags, spg, SPA_TYPE_K2SPG, current->tgid); else - spa = sp_alloc_area(size_aligned, sp_flags, spg_none, SPA_TYPE_K2TASK, tsk->tgid); + spa = sp_alloc_area(size_aligned, sp_flags, spg_none, SPA_TYPE_K2TASK, current->tgid);
if (IS_ERR(spa)) { up_read(&spg->rw_lock); @@ -2370,7 +2356,7 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size, up_read(&spg->rw_lock);
if (!IS_ERR(uva)) - sp_update_process_stat(tsk, true, spa); + sp_update_process_stat(current, true, spa);
finish: if (!IS_ERR(uva)) { @@ -2387,10 +2373,6 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size, out_drop_spg: if (spg) sp_group_drop(spg); -out_put_mm: - mmput(mm); -out_put_task: - put_task_struct(tsk);
sp_dump_stack(); return uva; @@ -2605,7 +2587,7 @@ static void __sp_walk_page_free(struct sp_walk_data *data) * sp_make_share_u2k() - Share user memory of a specified process to kernel. * @uva: the VA of shared user memory * @size: the size of shared user memory - * @pid: the pid of the specified process + * @pid: the pid of the specified process(Not currently in use) * * Return: * * if success, return the starting kernel address of the shared memory. @@ -2614,8 +2596,7 @@ static void __sp_walk_page_free(struct sp_walk_data *data) void *sp_make_share_u2k(unsigned long uva, unsigned long size, int pid) { int ret = 0; - struct task_struct *tsk; - struct mm_struct *mm; + struct mm_struct *mm = current->mm; void *p = ERR_PTR(-ESRCH); struct sp_walk_data sp_walk_data = { .page_count = 0, @@ -2624,34 +2605,23 @@ void *sp_make_share_u2k(unsigned long uva, unsigned long size, int pid)
check_interrupt_context();
- rcu_read_lock(); - tsk = find_task_by_vpid(pid); - if (!tsk || (tsk->flags & PF_EXITING)) - ret = -ESRCH; - else - get_task_struct(tsk); - rcu_read_unlock(); - if (ret) - goto out; + if (mm == NULL) { + pr_err("u2k: kthread is not allowed\n"); + return ERR_PTR(-EPERM); + }
- mm = get_task_mm(tsk); - if (mm == NULL) - goto out_put_task; down_write(&mm->mmap_sem); if (unlikely(mm->core_state)) { up_write(&mm->mmap_sem); pr_err("share pool: u2k: encountered coredump, abort\n"); - mmput(mm); - goto out_put_task; + return p; }
ret = __sp_walk_page_range(uva, size, mm, &sp_walk_data); if (ret) { pr_err_ratelimited("share pool: walk page range failed, ret %d\n", ret); up_write(&mm->mmap_sem); - mmput(mm); - p = ERR_PTR(ret); - goto out_put_task; + return ERR_PTR(ret); }
if (sp_walk_data.is_hugepage) @@ -2661,17 +2631,15 @@ void *sp_make_share_u2k(unsigned long uva, unsigned long size, int pid) p = vmap(sp_walk_data.pages, sp_walk_data.page_count, VM_MAP, PAGE_KERNEL); up_write(&mm->mmap_sem); - mmput(mm);
if (!p) { pr_err("share pool: vmap(huge) in u2k failed\n"); __sp_walk_page_free(&sp_walk_data); - p = ERR_PTR(-ENOMEM); - goto out_put_task; - } else { - p = p + (uva - sp_walk_data.uva_aligned); + return ERR_PTR(-ENOMEM); }
+ p = p + (uva - sp_walk_data.uva_aligned); + /* * kva p may be used later in k2u. Since p comes from uva originally, * it's reasonable to add flag VM_USERMAP so that p can be remapped @@ -2681,9 +2649,6 @@ void *sp_make_share_u2k(unsigned long uva, unsigned long size, int pid) area->flags |= VM_USERMAP;
kvfree(sp_walk_data.pages); -out_put_task: - put_task_struct(tsk); -out: return p; } EXPORT_SYMBOL_GPL(sp_make_share_u2k);
From: Tang Yizhou tangyizhou@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
-------------------------------------------------
After removing the 'pid' parameter of sp_unshare_uva, we need to implement a new access control for unshare uva (to task).
Signed-off-by: Tang Yizhou tangyizhou@huawei.com Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/share_pool.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 3107528ee5bed..45c3d510708c1 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -2717,6 +2717,11 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size) }
if (spa->type == SPA_TYPE_K2TASK) { + if (!current->mm || spa->applier != current->tgid) { + pr_err_ratelimited("share pool: unshare uva(to task) no permission\n"); + ret = -EPERM; + goto out_drop_area; + }
if (!spa->mm) { pr_err_ratelimited("share pool: unshare uva(to task) failed, none spa owner\n"); @@ -2761,11 +2766,7 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size) pr_err("share pool: failed to unmap VA %pK when munmap in unshare uva\n", (void *)uva_aligned); } - - if (unlikely(!current->mm)) - WARN(1, "share pool: unshare uva(to task) unexpected active kthread"); - else - sp_update_process_stat(current, false, spa); + sp_update_process_stat(current, false, spa);
} else if (spa->type == SPA_TYPE_K2SPG) { down_read(&spa->spg->rw_lock); @@ -2783,7 +2784,7 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size) up_read(&spa->spg->rw_lock); pr_err_ratelimited("share pool: unshare uva(to group) failed, " "caller process doesn't belong to target group\n"); - ret = -EINVAL; + ret = -EPERM; goto out_drop_area; } up_read(&spa->spg->rw_lock);
From: Tang Yizhou tangyizhou@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
-------------------------------------------------
Using pr_fmt to have the module name "share pool: " prepended.
Signed-off-by: Tang Yizhou tangyizhou@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/share_pool.c | 214 ++++++++++++++++++++++++------------------------ 1 file changed, 106 insertions(+), 108 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 45c3d510708c1..63c3d6d27ebf8 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -16,6 +16,8 @@ * published by the Free Software Foundation. */
+#define pr_fmt(fmt) "share pool: " fmt + #include <linux/share_pool.h> #include <linux/sched.h> #include <linux/sched/task.h> @@ -122,7 +124,7 @@ static struct sp_group_master *sp_init_group_master_locked(
master = kmalloc(sizeof(struct sp_group_master), GFP_KERNEL); if (master == NULL) { - pr_err_ratelimited("share pool: no memory for spg master\n"); + pr_err_ratelimited("no memory for spg master\n"); return ERR_PTR(-ENOMEM); }
@@ -188,7 +190,7 @@ static struct sp_proc_stat *create_proc_stat(struct mm_struct *mm,
stat = kmalloc(sizeof(*stat), GFP_KERNEL); if (stat == NULL) { - pr_err_ratelimited("share pool: alloc proc stat failed, lack of memory\n"); + pr_err_ratelimited("no memory for proc stat\n"); return ERR_PTR(-ENOMEM); }
@@ -221,7 +223,7 @@ static struct sp_proc_stat *sp_init_proc_stat(struct sp_group_master *master, } else { up_write(&sp_proc_stat_sem); /* if enter this branch, that's our mistake */ - WARN(1, "share pool: proc stat invalid id %d\n", id); + WARN(1, "proc stat invalid id %d\n", id); return ERR_PTR(-EBUSY); } } @@ -235,7 +237,7 @@ static struct sp_proc_stat *sp_init_proc_stat(struct sp_group_master *master, alloc_id = idr_alloc(&sp_proc_stat_idr, stat, tgid, tgid + 1, GFP_KERNEL); if (alloc_id < 0) { up_write(&sp_proc_stat_sem); - pr_err_ratelimited("share pool: proc stat idr alloc failed %d\n", alloc_id); + pr_err_ratelimited("proc stat idr alloc failed %d\n", alloc_id); kfree(stat); return ERR_PTR(alloc_id); } @@ -347,7 +349,7 @@ static struct spg_proc_stat *create_spg_proc_stat(int tgid, int spg_id)
stat = kmalloc(sizeof(struct spg_proc_stat), GFP_KERNEL); if (stat == NULL) { - pr_err_ratelimited("share pool: no memory for spg proc stat\n"); + pr_err_ratelimited("no memory for spg proc stat\n"); return ERR_PTR(-ENOMEM); }
@@ -418,7 +420,7 @@ static struct sp_spg_stat *create_spg_stat(int spg_id)
stat = kmalloc(sizeof(*stat), GFP_KERNEL); if (stat == NULL) { - pr_err_ratelimited("share pool: alloc spg stat failed, lack of memory\n"); + pr_err_ratelimited("no memory for spg stat\n"); return ERR_PTR(-ENOMEM); }
@@ -448,7 +450,7 @@ static int sp_init_spg_stat(struct sp_group *spg) GFP_KERNEL); up_write(&sp_spg_stat_sem); if (ret < 0) { - pr_err_ratelimited("share pool: create group %d idr alloc failed, ret %d\n", + pr_err_ratelimited("group %d idr alloc failed, ret %d\n", spg_id, ret); kfree(stat); } @@ -639,7 +641,7 @@ static void update_spg_proc_stat(unsigned long size, bool inc, { if (unlikely(!stat)) { sp_dump_stack(); - WARN(1, "share pool: null process stat\n"); + WARN(1, "null process stat\n"); return; }
@@ -652,7 +654,7 @@ static void update_spg_proc_stat(unsigned long size, bool inc, update_spg_proc_stat_k2u(size, inc, stat); break; default: - WARN(1, "share pool: invalid stat type\n"); + WARN(1, "invalid stat type\n"); } }
@@ -675,7 +677,7 @@ static void sp_update_process_stat(struct task_struct *tsk, bool inc, static inline void check_interrupt_context(void) { if (unlikely(in_interrupt())) - panic("share_pool: can't be used in interrupt context\n"); + panic("function can't be used in interrupt context\n"); }
static inline bool check_aoscore_process(struct task_struct *tsk) @@ -837,7 +839,7 @@ static loff_t addr_to_offset(unsigned long addr, struct sp_area *spa) if (spa && spa->spg != spg_none) return (loff_t)(addr - spa->spg->dvpp_va_start);
- pr_err("share pool: the addr is not belong to share pool range\n"); + pr_err("addr doesn't belong to share pool range\n"); return addr; }
@@ -850,18 +852,18 @@ static struct sp_group *create_spg(int spg_id) int hsize_log = MAP_HUGE_2MB >> MAP_HUGE_SHIFT;
if (unlikely(system_group_count + 1 == MAX_GROUP_FOR_SYSTEM)) { - pr_err_ratelimited("share pool: reach system max group num\n"); + pr_err_ratelimited("reach system max group num\n"); return ERR_PTR(-ENOSPC); }
spg = kzalloc(sizeof(*spg), GFP_KERNEL); if (spg == NULL) { - pr_err_ratelimited("share pool: alloc spg failed due to lack of memory\n"); + pr_err_ratelimited("no memory for spg\n"); return ERR_PTR(-ENOMEM); } ret = idr_alloc(&sp_group_idr, spg, spg_id, spg_id + 1, GFP_KERNEL); if (ret < 0) { - pr_err_ratelimited("share pool: create group %d idr alloc failed, ret %d\n", + pr_err_ratelimited("group %d idr alloc failed %d\n", spg_id, ret); goto out_kfree; } @@ -881,8 +883,7 @@ static struct sp_group *create_spg(int spg_id) spg->file = shmem_kernel_file_setup(name, MAX_LFS_FILESIZE, VM_NORESERVE); if (IS_ERR(spg->file)) { - pr_err("share pool: file setup for small page failed %ld\n", - PTR_ERR(spg->file)); + pr_err("spg file setup failed %ld\n", PTR_ERR(spg->file)); ret = PTR_ERR(spg->file); goto out_idr; } @@ -890,7 +891,7 @@ static struct sp_group *create_spg(int spg_id) spg->file_hugetlb = hugetlb_file_setup(name, MAX_LFS_FILESIZE, VM_NORESERVE, &user, HUGETLB_ANONHUGE_INODE, hsize_log); if (IS_ERR(spg->file_hugetlb)) { - pr_err("share pool: file setup for hugepage failed %ld\n", + pr_err("spg file_hugetlb setup failed %ld\n", PTR_ERR(spg->file_hugetlb)); ret = PTR_ERR(spg->file_hugetlb); goto out_fput; @@ -960,7 +961,7 @@ static void sp_munmap_task_areas(struct mm_struct *mm, struct sp_group *spg, str err = do_munmap(mm, spa->va_start, spa_size(spa), NULL); if (err) { /* we are not supposed to fail */ - pr_err("share pool: failed to unmap VA %pK when munmap task areas\n", + pr_err("failed to unmap VA %pK when munmap task areas\n", (void *)spa->va_start); }
@@ -979,7 +980,7 @@ static int mm_add_group_init(struct mm_struct *mm, struct sp_group *spg)
if (share_pool_group_mode == SINGLE_GROUP_MODE && master && master->count == 1) { - pr_err("share pool: at most one sp group for a task is allowed in single mode\n"); + pr_err_ratelimited("at most one sp group for a task is allowed in single mode\n"); return -EEXIST; }
@@ -992,13 +993,13 @@ static int mm_add_group_init(struct mm_struct *mm, struct sp_group *spg)
list_for_each_entry(spg_node, &master->node_list, group_node) { if (spg_node->spg == spg) { - pr_err("share pool: task is already in target group\n"); + pr_err_ratelimited("task already in target group, id = %d\n", spg->id); return -EEXIST; } }
if (master->count + 1 == MAX_GROUP_FOR_TASK) { - pr_err("share pool: task reaches max group num\n"); + pr_err("task reaches max group num\n"); return -ENOSPC; }
@@ -1012,7 +1013,7 @@ static int mm_add_group_finish(struct mm_struct *mm, struct sp_group *spg, unsig
spg_node = kzalloc(sizeof(struct sp_group_node), GFP_KERNEL); if (spg_node == NULL) { - pr_err_ratelimited("share pool: no memory for spg node\n"); + pr_err_ratelimited("no memory for spg node\n"); return -ENOMEM; }
@@ -1070,7 +1071,7 @@ int sp_group_add_task(int pid, unsigned long prot, int spg_id) /* only allow READ, READ | WRITE */ if (!((prot == PROT_READ) || (prot == (PROT_READ | PROT_WRITE)))) { - pr_err_ratelimited("share pool: prot is invalid 0x%lx\n", prot); + pr_err_ratelimited("prot is invalid 0x%lx\n", prot); return -EINVAL; }
@@ -1080,7 +1081,7 @@ int sp_group_add_task(int pid, unsigned long prot, int spg_id)
if ((spg_id < SPG_ID_MIN || spg_id > SPG_ID_AUTO) && spg_id != SPG_ID_DVPP_PASS_THROUGH) { - pr_err_ratelimited("share pool: task add group failed, invalid group id %d\n", spg_id); + pr_err_ratelimited("add group failed, invalid group id %d\n", spg_id); return -EINVAL; }
@@ -1088,14 +1089,14 @@ int sp_group_add_task(int pid, unsigned long prot, int spg_id) spg = __sp_find_spg(pid, spg_id);
if (!spg) { - pr_err_ratelimited("share pool: spg %d hasn't been created\n", spg_id); + pr_err_ratelimited("spg %d hasn't been created\n", spg_id); return -EINVAL; }
down_read(&spg->rw_lock); if (!spg_valid(spg)) { up_read(&spg->rw_lock); - pr_err_ratelimited("share pool: task add group failed, group id %d is dead\n", spg_id); + pr_err_ratelimited("add group failed, group id %d is dead\n", spg_id); sp_group_drop(spg); return -EINVAL; } @@ -1108,7 +1109,7 @@ int sp_group_add_task(int pid, unsigned long prot, int spg_id) spg_id = ida_alloc_range(&sp_group_id_ida, SPG_ID_AUTO_MIN, SPG_ID_AUTO_MAX, GFP_ATOMIC); if (spg_id < 0) { - pr_err_ratelimited("share pool: task add group failed, auto generate group id failed\n"); + pr_err_ratelimited("add group failed, auto generate group id failed\n"); return spg_id; } id_newly_generated = true; @@ -1119,7 +1120,7 @@ int sp_group_add_task(int pid, unsigned long prot, int spg_id) SPG_ID_DVPP_PASS_THROUGH_MIN, SPG_ID_DVPP_PASS_THROUGH_MAX, GFP_ATOMIC); if (spg_id < 0) { - pr_err_ratelimited("share pool: task add group failed, DVPP auto generate group id failed\n"); + pr_err_ratelimited("add group failed, DVPP auto generate group id failed\n"); return spg_id; } id_newly_generated = true; @@ -1194,7 +1195,7 @@ int sp_group_add_task(int pid, unsigned long prot, int spg_id) stat = sp_init_process_stat(tsk, mm, spg); if (IS_ERR(stat)) { ret = PTR_ERR(stat); - pr_err_ratelimited("share pool: init process stat failed, ret %lx\n", PTR_ERR(stat)); + pr_err_ratelimited("init process stat failed %lx\n", PTR_ERR(stat)); goto out_drop_group; }
@@ -1223,7 +1224,7 @@ int sp_group_add_task(int pid, unsigned long prot, int spg_id) if (spa->type == SPA_TYPE_K2SPG && spa->kva) { addr = sp_remap_kva_to_vma(spa->kva, spa, mm, prot); if (IS_ERR_VALUE(addr)) - pr_warn("share pool: task add group remap k2u failed, ret %ld\n", addr); + pr_warn("add group remap k2u failed %ld\n", addr);
spin_lock(&sp_area_lock); continue; @@ -1234,7 +1235,7 @@ int sp_group_add_task(int pid, unsigned long prot, int spg_id) sp_munmap_task_areas(mm, spg, &spa->link); up_write(&mm->mmap_sem); ret = -EBUSY; - pr_err("share pool: task add group: encountered coredump, abort\n"); + pr_err("add group: encountered coredump, abort\n"); spin_lock(&sp_area_lock); break; } @@ -1244,7 +1245,7 @@ int sp_group_add_task(int pid, unsigned long prot, int spg_id) sp_munmap_task_areas(mm, spg, &spa->link); up_write(&mm->mmap_sem); ret = addr; - pr_err("share pool: task add group sp mmap failed, ret %d\n", ret); + pr_err("add group: sp mmap failed %d\n", ret); spin_lock(&sp_area_lock); break; } @@ -1254,9 +1255,9 @@ int sp_group_add_task(int pid, unsigned long prot, int spg_id) ret = do_mm_populate(mm, spa->va_start, populate, 0); if (ret) { if (unlikely(fatal_signal_pending(current))) - pr_warn_ratelimited("share pool: task add group failed, current thread is killed\n"); + pr_warn_ratelimited("add group failed, current thread is killed\n"); else - pr_warn_ratelimited("share pool: task add group failed, mm populate failed " + pr_warn_ratelimited("add group failed, mm populate failed " "(potential no enough memory when -12): %d, spa type is %d\n", ret, spa->type); down_write(&mm->mmap_sem); @@ -1354,7 +1355,7 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, vend = vstart + MMAP_SHARE_POOL_16G_SIZE; } else { if (!spg) { - pr_err_ratelimited("share pool: don't allow k2u(task) in host svm multiprocess scene\n"); + pr_err_ratelimited("don't allow k2u(task) in host svm multiprocess scene\n"); return ERR_PTR(-EINVAL); } vstart = spg->dvpp_va_start; @@ -1364,7 +1365,7 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags,
spa = __kmalloc_node(sizeof(struct sp_area), GFP_KERNEL, node_id); if (unlikely(!spa)) { - pr_err_ratelimited("share pool: alloc spa failed due to lack of memory\n"); + pr_err_ratelimited("no memory for spa\n"); return ERR_PTR(-ENOMEM); }
@@ -1658,15 +1659,14 @@ static void sp_munmap(struct mm_struct *mm, unsigned long addr, down_write(&mm->mmap_sem); if (unlikely(mm->core_state)) { up_write(&mm->mmap_sem); - pr_info("share pool: munmap: encoutered coredump\n"); + pr_info("munmap: encoutered coredump\n"); return; }
err = do_munmap(mm, addr, size, NULL); - if (err) { - /* we are not supposed to fail */ - pr_err("share pool: failed to unmap VA %pK when sp munmap\n", (void *)addr); - } + /* we are not supposed to fail */ + if (err) + pr_err("failed to unmap VA %pK when sp munmap\n", (void *)addr);
up_write(&mm->mmap_sem); } @@ -1738,13 +1738,13 @@ int sp_free(unsigned long addr)
} else { /* spa == NULL */ ret = -EINVAL; - pr_debug("share pool: sp free invalid input addr %lx\n", (unsigned long)addr); + pr_debug("sp free invalid input addr %lx\n", (unsigned long)addr); goto out; }
if (spa->type != SPA_TYPE_ALLOC) { ret = -EINVAL; - pr_debug("share pool: sp free failed, addr %lx is not from sp alloc\n", (unsigned long)addr); + pr_debug("sp free failed, addr %lx is not from sp alloc\n", (unsigned long)addr); goto drop_spa; }
@@ -1759,7 +1759,7 @@ int sp_free(unsigned long addr) offset = addr_to_offset(addr, spa); ret = vfs_fallocate(spa_file(spa), mode, offset, spa_size(spa)); if (ret) - pr_err("share pool: sp free fallocate failed: %d\n", ret); + pr_err("fallocate in sp free failed: %d\n", ret);
up_read(&spa->spg->rw_lock);
@@ -1801,7 +1801,7 @@ static unsigned long sp_mmap(struct mm_struct *mm, struct file *file, populate, NULL); if (IS_ERR_VALUE(addr)) { atomic_dec(&spa->use_count); - pr_err("share pool: do_mmap fails %ld\n", addr); + pr_err("do_mmap fails %ld\n", addr); } else { BUG_ON(addr != spa->va_start); } @@ -1843,17 +1843,17 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) spg_id = mdc_default_group_id;
if (unlikely(!size)) { - pr_err_ratelimited("share pool: allocation failed, invalid size %lu\n", size); + pr_err_ratelimited("allocation failed, invalid size %lu\n", size); return ERR_PTR(-EINVAL); }
if (spg_id != SPG_ID_DEFAULT && spg_id < SPG_ID_MIN) { - pr_err_ratelimited("share pool: allocation failed, invalid group id %d\n", spg_id); + pr_err_ratelimited("allocation failed, invalid group id %d\n", spg_id); return ERR_PTR(-EINVAL); }
if (sp_flags & (~SP_FLAG_MASK)) { - pr_err_ratelimited("share pool: allocation failed, invalid flag %lx\n", sp_flags); + pr_err_ratelimited("allocation failed, invalid flag %lx\n", sp_flags); return ERR_PTR(-EINVAL); }
@@ -1895,7 +1895,7 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) if (!spg_valid(spg)) { up_read(&spg->rw_lock); sp_group_drop(spg); - pr_err_ratelimited("share pool: sp alloc failed, spg is dead\n"); + pr_err_ratelimited("sp alloc failed, spg is dead\n"); return ERR_PTR(-ENODEV); }
@@ -1909,7 +1909,7 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) try_again: spa = sp_alloc_area(size_aligned, sp_flags, spg, SPA_TYPE_ALLOC, current->tgid); if (IS_ERR(spa)) { - pr_err_ratelimited("share pool: allocation failed due to alloc spa failure " + pr_err_ratelimited("alloc spa failed in allocation" "(potential no enough virtual memory when -75): %ld\n", PTR_ERR(spa)); p = spa; goto out; @@ -1925,7 +1925,7 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) down_write(&mm->mmap_sem); if (unlikely(mm->core_state)) { up_write(&mm->mmap_sem); - pr_info("share pool: allocation encountered coredump\n"); + pr_info("allocation encountered coredump\n"); continue; }
@@ -1934,7 +1934,7 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) up_write(&mm->mmap_sem); p = (void *)mmap_addr; __sp_free(spg, sp_addr, size_aligned, mm); - pr_err("share pool: allocation sp mmap failed, ret %ld\n", mmap_addr); + pr_err("sp mmap in allocation failed %ld\n", mmap_addr); goto out; }
@@ -1947,7 +1947,7 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) vma = find_vma(mm, sp_addr); if (unlikely(!vma)) { up_write(&mm->mmap_sem); - pr_debug("share pool: allocation failed due to find %lx vma failure\n", (unsigned long)sp_addr); + pr_debug("allocation failed, can't find %lx vma\n", (unsigned long)sp_addr); p = ERR_PTR(-EINVAL); goto out; } @@ -1986,9 +1986,9 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) __sp_free(spg, sp_addr, size_aligned, (list_next_entry(spg_node, proc_node))->master->mm); if (unlikely(fatal_signal_pending(current))) - pr_warn_ratelimited("share pool: allocation failed, current thread is killed\n"); + pr_warn_ratelimited("allocation failed, current thread is killed\n"); else - pr_warn_ratelimited("share pool: allocation failed due to mm populate failed" + pr_warn_ratelimited("allocation failed due to mm populate failed" "(potential no enough memory when -12): %d\n", ret); p = ERR_PTR(ret);
@@ -1997,7 +1997,7 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id)
ret = vfs_fallocate(spa_file(spa), mode, offset, spa_size(spa)); if (ret) - pr_err("share pool: sp alloc normal page fallocate failed %d\n", ret); + pr_err("fallocate in allocation failed %d\n", ret);
if (file == spg->file_hugetlb) { spg->hugepage_failures++; @@ -2047,13 +2047,13 @@ static int is_vmap_hugepage(unsigned long addr) struct vm_struct *area;
if (unlikely(!addr)) { - pr_err_ratelimited("share pool: null pointer when judge vmap addr\n"); + pr_err_ratelimited("null vmap addr pointer\n"); return -EINVAL; }
area = find_vm_area((void *)addr); if (unlikely(!area)) { - pr_debug("share pool: failed to find vm area(%lx)\n", addr); + pr_debug("can't find vm area(%lx)\n", addr); return -EINVAL; }
@@ -2086,14 +2086,14 @@ static unsigned long sp_remap_kva_to_vma(unsigned long kva, struct sp_area *spa,
down_write(&mm->mmap_sem); if (unlikely(mm->core_state)) { - pr_err("share pool: k2u mmap: encountered coredump, abort\n"); + pr_err("k2u mmap: encountered coredump, abort\n"); ret_addr = -EBUSY; goto put_mm; }
ret_addr = sp_mmap(mm, spa_file(spa), spa, &populate, prot); if (IS_ERR_VALUE(ret_addr)) { - pr_debug("share pool: k2u mmap failed %lx\n", ret_addr); + pr_debug("k2u mmap failed %lx\n", ret_addr); goto put_mm; } BUG_ON(ret_addr != spa->va_start); @@ -2107,7 +2107,7 @@ static unsigned long sp_remap_kva_to_vma(unsigned long kva, struct sp_area *spa, ret = remap_vmalloc_hugepage_range(vma, (void *)kva, 0); if (ret) { do_munmap(mm, ret_addr, spa_size(spa), NULL); - pr_debug("share pool: remap vmalloc hugepage failed, " + pr_debug("remap vmalloc hugepage failed, " "ret %d, kva is %lx\n", ret, (unsigned long)kva); ret_addr = ret; goto put_mm; @@ -2122,7 +2122,7 @@ static unsigned long sp_remap_kva_to_vma(unsigned long kva, struct sp_area *spa, __pgprot(vma->vm_page_prot.pgprot)); if (ret) { do_munmap(mm, ret_addr, spa_size(spa), NULL); - pr_err("share pool: remap_pfn_range failed, ret %d\n", ret); + pr_err("remap_pfn_range failed %d\n", ret); ret_addr = ret; goto put_mm; } @@ -2156,7 +2156,7 @@ static void *sp_make_share_kva_to_task(unsigned long kva, struct sp_area *spa,
ret_addr = sp_remap_kva_to_vma(kva, spa, mm, prot); if (IS_ERR_VALUE(ret_addr)) { - pr_err("share pool: remap k2u to task failed, ret %ld\n", ret_addr); + pr_err("remap k2u to task failed %ld\n", ret_addr); return ERR_PTR(ret_addr); }
@@ -2177,7 +2177,7 @@ static void *sp_make_share_kva_to_spg(unsigned long kva, struct sp_area *spa, mm = spg_node->master->mm; ret_addr = sp_remap_kva_to_vma(kva, spa, mm, spg_node->prot); if (IS_ERR_VALUE(ret_addr)) { - pr_err("share pool: remap k2u to spg failed, ret %ld \n", ret_addr); + pr_err("remap k2u to spg failed %ld\n", ret_addr); __sp_free(spg, spa->va_start, spa_size(spa), mm); p = ERR_PTR(ret_addr); goto out; @@ -2251,7 +2251,7 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size, check_interrupt_context();
if (sp_flags & ~SP_DVPP) { - pr_err_ratelimited("share pool: k2u sp_flags %lx error\n", sp_flags); + pr_err_ratelimited("k2u sp_flags %lx error\n", sp_flags); return ERR_PTR(-EINVAL); }
@@ -2267,7 +2267,7 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size, } else if (is_hugepage == 0) { /* do nothing */ } else { - pr_err_ratelimited("share pool: k2u kva not vmalloc address\n"); + pr_err_ratelimited("k2u kva is not vmalloc address\n"); return ERR_PTR(is_hugepage); }
@@ -2281,7 +2281,7 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size, struct spg_proc_stat *stat;
if (spg_id != SPG_ID_NONE && spg_id != SPG_ID_DEFAULT) { - pr_err_ratelimited("share pool: k2task invalid spg id %d\n", spg_id); + pr_err_ratelimited("k2u_task invalid spg id %d\n", spg_id); return ERR_PTR(-EINVAL); }
@@ -2289,20 +2289,20 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size, stat = sp_init_process_stat(current, mm, spg_none); up_write(&sp_group_sem); if (IS_ERR(stat)) { - pr_err_ratelimited("share pool: k2u(task) init process stat failed, ret %lx\n", + pr_err_ratelimited("k2u_task init process stat failed %lx\n", PTR_ERR(stat)); return stat; }
spa = sp_alloc_area(size_aligned, sp_flags, spg_none, SPA_TYPE_K2TASK, current->tgid); if (IS_ERR(spa)) { - pr_err_ratelimited("share pool: k2u(task) failed due to alloc spa failure " + pr_err_ratelimited("alloc spa failed in k2u_task " "(potential no enough virtual memory when -75): %ld\n", PTR_ERR(spa)); return spa; }
if (!vmalloc_area_set_flag(spa, kva_aligned, VM_SHAREPOOL)) { - pr_debug("share pool: %s: the kva %lx is not valid\n", __func__, (unsigned long)kva_aligned); + pr_debug("k2u_task kva %lx is not valid\n", (unsigned long)kva_aligned); goto out_drop_spa; }
@@ -2320,7 +2320,7 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size, /* k2u to group */ if (spg_id != SPG_ID_DEFAULT && spg_id != spg->id) { up_read(&spg->rw_lock); - pr_err_ratelimited("share pool: k2spg invalid spg id %d\n", spg_id); + pr_err_ratelimited("k2u_spg invalid spg id %d\n", spg_id); uva = ERR_PTR(-EINVAL); goto out_drop_spg; } @@ -2332,7 +2332,7 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size,
if (IS_ERR(spa)) { up_read(&spg->rw_lock); - pr_err_ratelimited("share pool: k2u(spg) failed due to alloc spa failure " + pr_err_ratelimited("alloc spa failed in k2u_spg " "(potential no enough virtual memory when -75): %ld\n", PTR_ERR(spa)); uva = spa; goto out_drop_spg; @@ -2340,7 +2340,7 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size,
if (!vmalloc_area_set_flag(spa, kva_aligned, VM_SHAREPOOL)) { up_read(&spg->rw_lock); - pr_debug("share pool: %s: the kva %lx is not valid\n", __func__, (unsigned long)kva_aligned); + pr_debug("k2u_spg kva %lx is not valid\n", (unsigned long)kva_aligned); goto out_drop_spa; }
@@ -2349,8 +2349,7 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size, else uva = sp_make_share_kva_to_task(kva_aligned, spa, mm); } else { - /* group is dead, return -ENODEV */ - pr_err_ratelimited("share pool: failed to make k2u, sp group is dead\n"); + pr_err_ratelimited("k2u failed, sp group is dead\n"); uva = ERR_PTR(-ENODEV); } up_read(&spg->rw_lock); @@ -2364,8 +2363,8 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size, } else { /* associate vma and spa */ if (!vmalloc_area_clr_flag(spa, kva_aligned, VM_SHAREPOOL)) - pr_debug("share pool: %s: the kva %lx is not valid\n", - __func__, (unsigned long)kva_aligned); + pr_debug("k2u_spg clear kva %lx is not valid\n", + (unsigned long)kva_aligned); }
out_drop_spa: @@ -2420,7 +2419,7 @@ static int sp_pte_entry(pte_t *pte, unsigned long addr,
no_page: pte_unmap_unlock(pte, ptl); - pr_debug("share pool: the page of addr %lx unexpectedly not in RAM\n", + pr_debug("the page of addr %lx unexpectedly not in RAM\n", (unsigned long)addr); return -EFAULT; } @@ -2441,7 +2440,7 @@ static int sp_test_walk(unsigned long addr, unsigned long next, static int sp_pte_hole(unsigned long start, unsigned long end, struct mm_walk *walk) { - pr_debug("share pool: hole [%lx, %lx) appeared unexpectedly\n", (unsigned long)start, (unsigned long)end); + pr_debug("hole [%lx, %lx) appeared unexpectedly\n", (unsigned long)start, (unsigned long)end); return -EFAULT; }
@@ -2454,7 +2453,7 @@ static int sp_hugetlb_entry(pte_t *ptep, unsigned long hmask, struct sp_walk_data *sp_walk_data;
if (unlikely(!pte_present(pte))) { - pr_debug("share pool: the page of addr %lx unexpectedly not in RAM\n", (unsigned long)addr); + pr_debug("the page of addr %lx unexpectedly not in RAM\n", (unsigned long)addr); return -EFAULT; }
@@ -2516,7 +2515,7 @@ static int __sp_walk_page_range(unsigned long uva, unsigned long size, */ vma = find_vma(mm, uva); if (!vma) { - pr_debug("share pool: u2k input uva %lx is invalid\n", (unsigned long)uva); + pr_debug("u2k input uva %lx is invalid\n", (unsigned long)uva); return -EINVAL; } if ((is_vm_hugetlb_page(vma)) || is_vm_huge_special(vma)) @@ -2544,14 +2543,14 @@ static int __sp_walk_page_range(unsigned long uva, unsigned long size, size_aligned = ALIGN(uva + size, page_size) - uva_aligned;
if (uva_aligned + size_aligned < uva_aligned) { - pr_err_ratelimited("share pool: overflow happened in walk page range\n"); + pr_err_ratelimited("overflow happened in walk page range\n"); return -EINVAL; }
page_nr = size_aligned / page_size; pages = kvmalloc(page_nr * sizeof(struct page *), GFP_KERNEL); if (!pages) { - pr_err_ratelimited("share pool: alloc page array failed in walk page range\n"); + pr_err_ratelimited("alloc page array failed in walk page range\n"); return -ENOMEM; } sp_walk_data->pages = pages; @@ -2613,13 +2612,13 @@ void *sp_make_share_u2k(unsigned long uva, unsigned long size, int pid) down_write(&mm->mmap_sem); if (unlikely(mm->core_state)) { up_write(&mm->mmap_sem); - pr_err("share pool: u2k: encountered coredump, abort\n"); + pr_err("u2k: encountered coredump, abort\n"); return p; }
ret = __sp_walk_page_range(uva, size, mm, &sp_walk_data); if (ret) { - pr_err_ratelimited("share pool: walk page range failed, ret %d\n", ret); + pr_err_ratelimited("walk page range failed %d\n", ret); up_write(&mm->mmap_sem); return ERR_PTR(ret); } @@ -2633,7 +2632,7 @@ void *sp_make_share_u2k(unsigned long uva, unsigned long size, int pid) up_write(&mm->mmap_sem);
if (!p) { - pr_err("share pool: vmap(huge) in u2k failed\n"); + pr_err("vmap(huge) in u2k failed\n"); __sp_walk_page_free(&sp_walk_data); return ERR_PTR(-ENOMEM); } @@ -2690,13 +2689,13 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size) spa = __find_sp_area(ALIGN_DOWN(uva, PAGE_SIZE)); if (!spa) { ret = -EINVAL; - pr_debug("share pool: invalid input uva %lx in unshare uva\n", (unsigned long)uva); + pr_debug("invalid input uva %lx in unshare uva\n", (unsigned long)uva); goto out; } }
if (spa->type != SPA_TYPE_K2TASK && spa->type != SPA_TYPE_K2SPG) { - pr_err_ratelimited("share pool: this spa should not be unshare here\n"); + pr_err_ratelimited("unshare wrong type spa\n"); ret = -EINVAL; goto out_drop_area; } @@ -2712,19 +2711,19 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size)
if (size_aligned < ALIGN(size, page_size)) { ret = -EINVAL; - pr_err_ratelimited("share pool: unshare uva failed due to invalid parameter size %lu\n", size); + pr_err_ratelimited("unshare uva failed, invalid parameter size %lu\n", size); goto out_drop_area; }
if (spa->type == SPA_TYPE_K2TASK) { if (!current->mm || spa->applier != current->tgid) { - pr_err_ratelimited("share pool: unshare uva(to task) no permission\n"); + pr_err_ratelimited("unshare uva(to task) no permission\n"); ret = -EPERM; goto out_drop_area; }
if (!spa->mm) { - pr_err_ratelimited("share pool: unshare uva(to task) failed, none spa owner\n"); + pr_err_ratelimited("unshare uva(to task) failed, none spa owner\n"); ret = -EINVAL; goto out_drop_area; } @@ -2738,13 +2737,13 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size) */ mm = get_task_mm(current->group_leader); if (!mm) { - pr_info_ratelimited("share pool: no need to unshare uva(to task), " + pr_info_ratelimited("no need to unshare uva(to task), " "target process mm is exiting\n"); goto out_clr_flag; }
if (spa->mm != mm) { - pr_err_ratelimited("share pool: unshare uva(to task) failed, spa not belong to the task\n"); + pr_err_ratelimited("unshare uva(to task) failed, spa not belong to the task\n"); ret = -EINVAL; mmput(mm); goto out_drop_area; @@ -2761,11 +2760,10 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size) ret = do_munmap(mm, uva_aligned, size_aligned, NULL); up_write(&mm->mmap_sem); mmput(mm); - if (ret) { - /* we are not supposed to fail */ - pr_err("share pool: failed to unmap VA %pK when munmap in unshare uva\n", + /* we are not supposed to fail */ + if (ret) + pr_err("failed to unmap VA %pK when munmap in unshare uva\n", (void *)uva_aligned); - } sp_update_process_stat(current, false, spa);
} else if (spa->type == SPA_TYPE_K2SPG) { @@ -2782,7 +2780,7 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size)
if (!found) { up_read(&spa->spg->rw_lock); - pr_err_ratelimited("share pool: unshare uva(to group) failed, " + pr_err_ratelimited("unshare uva(to group) failed, " "caller process doesn't belong to target group\n"); ret = -EPERM; goto out_drop_area; @@ -2809,7 +2807,7 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size) else sp_update_process_stat(current, false, spa); } else { - WARN(1, "share pool: unshare uva invalid spa type"); + WARN(1, "unshare uva invalid spa type"); }
sp_dump_stack(); @@ -2817,7 +2815,7 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size) out_clr_flag: /* deassociate vma and spa */ if (!vmalloc_area_clr_flag(spa, spa->kva, VM_SHAREPOOL)) - pr_warn("share pool: %s: the spa->kva %ld is not valid\n", __func__, spa->kva); + pr_debug("clear spa->kva %ld is not valid\n", spa->kva);
out_drop_area: __sp_area_drop(spa); @@ -2845,12 +2843,12 @@ static int sp_unshare_kva(unsigned long kva, unsigned long size) step = PAGE_SIZE; is_hugepage = false; } else { - pr_err_ratelimited("share pool: check vmap hugepage failed, ret %d\n", ret); + pr_err_ratelimited("check vmap hugepage failed %d\n", ret); return -EINVAL; }
if (kva_aligned + size_aligned < kva_aligned) { - pr_err_ratelimited("share pool: overflow happened in unshare kva\n"); + pr_err_ratelimited("overflow happened in unshare kva\n"); return -EINVAL; }
@@ -2862,7 +2860,7 @@ static int sp_unshare_kva(unsigned long kva, unsigned long size) if (page) put_page(page); else - pr_warn("share pool: vmalloc %pK to page/hugepage failed\n", + WARN(1, "vmalloc %pK to page/hugepage failed\n", (void *)addr); }
@@ -2895,7 +2893,7 @@ int sp_unshare(unsigned long va, unsigned long size) ret = sp_unshare_kva(va, size); } else { /* regard user and kernel address ranges as bad address */ - pr_debug("share pool: unshare addr %lx is not a user or kernel addr\n", (unsigned long)va); + pr_debug("unshare addr %lx is not a user or kernel addr\n", (unsigned long)va); ret = -EFAULT; }
@@ -2924,7 +2922,7 @@ int sp_walk_page_range(unsigned long uva, unsigned long size, check_interrupt_context();
if (unlikely(!sp_walk_data)) { - pr_err_ratelimited("share pool: null pointer when walk page range\n"); + pr_err_ratelimited("null pointer when walk page range\n"); return -EINVAL; } if (!tsk || (tsk->flags & PF_EXITING)) @@ -2942,7 +2940,7 @@ int sp_walk_page_range(unsigned long uva, unsigned long size, if (likely(!mm->core_state)) ret = __sp_walk_page_range(uva, size, mm, sp_walk_data); else { - pr_err("share pool: walk page range: encoutered coredump\n"); + pr_err("walk page range: encoutered coredump\n"); ret = -ESRCH; } up_write(&mm->mmap_sem); @@ -3659,7 +3657,7 @@ void sp_group_post_exit(struct mm_struct *mm) alloc_size = atomic64_read(&stat->alloc_size); k2u_size = atomic64_read(&stat->k2u_size); } else - WARN(1, "share pool: can't find sp proc stat\n"); + WARN(1, "can't find sp proc stat\n");
/* * There are two basic scenarios when a process in the share pool is @@ -3678,7 +3676,7 @@ void sp_group_post_exit(struct mm_struct *mm) */ if (master) { if (alloc_size != 0 || k2u_size != 0) - pr_info("share pool: process %s(%d) exits. " + pr_info("process %s(%d) exits. " "It applied %ld aligned KB, k2u shared %ld aligned KB\n", stat->comm, master->sp_stat_id, byte2kb(alloc_size), byte2kb(k2u_size));
From: Tang Yizhou tangyizhou@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
-------------------------------------------------
Refactor sp_alloc to improve its readability. 1. Introduce struct sp_alloc_context to save allocation parameters. 2. Extract sp_alloc_prepare to check input parameters of sp_alloc and initialize sp_alloc_context instance.
Suggested-by: Zhou Guanghui zhouguanghui1@huawei.com Suggested-by: Wang Wensheng wangwensheng4@huawei.com Signed-off-by: Tang Yizhou tangyizhou@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/share_pool.c | 119 +++++++++++++++++++++++++++++++----------------- 1 file changed, 78 insertions(+), 41 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 63c3d6d27ebf8..210d25a97deac 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -1809,32 +1809,25 @@ static unsigned long sp_mmap(struct mm_struct *mm, struct file *file, return addr; }
-/** - * sp_alloc() - Allocate shared memory for all the processes in a sp_group. - * @size: the size of memory to allocate. - * @sp_flags: how to allocate the memory. - * @spg_id: the share group that the memory is allocated to. - * - * Use spg_id of current thread if spg_id == SPG_ID_DEFAULT. - * - * Return: - * * if succeed, return the starting kernel address of the shared memory. - * * if fail, return the pointer of -errno. - */ -void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) -{ - struct sp_group *spg, *spg_tmp; - struct sp_area *spa = NULL; - unsigned long sp_addr; - unsigned long mmap_addr; - void *p; /* return value */ - struct mm_struct *mm; +#define ALLOC_NORMAL 1 +#define ALLOC_RETRY 2 +#define ALLOC_NOMEM 3 + +struct sp_alloc_context { + struct sp_group *spg; struct file *file; + unsigned long size; unsigned long size_aligned; - int ret = 0; - unsigned long mode, offset; - unsigned int noreclaim_flag; - struct sp_group_node *spg_node; + unsigned long sp_flags; + unsigned long populate; + int state; +}; + +static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags, + int spg_id, struct sp_alloc_context *ac) +{ + int ret; + struct sp_group *spg, *spg_tmp;
check_interrupt_context();
@@ -1842,19 +1835,19 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) if (enable_mdc_default_group) spg_id = mdc_default_group_id;
- if (unlikely(!size)) { + if (unlikely(!size || (size >> PAGE_SHIFT) > totalram_pages)) { pr_err_ratelimited("allocation failed, invalid size %lu\n", size); - return ERR_PTR(-EINVAL); + return -EINVAL; }
if (spg_id != SPG_ID_DEFAULT && spg_id < SPG_ID_MIN) { pr_err_ratelimited("allocation failed, invalid group id %d\n", spg_id); - return ERR_PTR(-EINVAL); + return -EINVAL; }
if (sp_flags & (~SP_FLAG_MASK)) { pr_err_ratelimited("allocation failed, invalid flag %lx\n", sp_flags); - return ERR_PTR(-EINVAL); + return -EINVAL; }
if (sp_flags & SP_HUGEPAGE_ONLY) @@ -1874,8 +1867,8 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) * The judgment is added to prevent exit in this case. */ if (ret < 0 && (ret != -EEXIST)) { - pr_err_ratelimited("share pool: allocation failed, add group error %d in DVPP pass through\n", ret); - return ERR_PTR(ret); + pr_err_ratelimited("allocation failed, add group error %d in DVPP pass through\n", ret); + return ret; } spg = get_first_group(current->mm); } else { /* other scenes */ @@ -1885,33 +1878,75 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) sp_group_drop(spg); if (spg_tmp) sp_group_drop(spg_tmp); - return ERR_PTR(-ENODEV); + return -ENODEV; } sp_group_drop(spg_tmp); } }
+ /* up_read will be at the end of sp_alloc */ down_read(&spg->rw_lock); if (!spg_valid(spg)) { up_read(&spg->rw_lock); sp_group_drop(spg); pr_err_ratelimited("sp alloc failed, spg is dead\n"); - return ERR_PTR(-ENODEV); + return -ENODEV; }
if (sp_flags & SP_HUGEPAGE) { - file = spg->file_hugetlb; - size_aligned = ALIGN(size, PMD_SIZE); + ac->file = spg->file_hugetlb; + ac->size_aligned = ALIGN(size, PMD_SIZE); } else { - file = spg->file; - size_aligned = ALIGN(size, PAGE_SIZE); + ac->file = spg->file; + ac->size_aligned = ALIGN(size, PAGE_SIZE); } + + ac->spg = spg; + ac->size = size; + ac->sp_flags = sp_flags; + ac->state = ALLOC_NORMAL; + return 0; +} + +/** + * sp_alloc() - Allocate shared memory for all the processes in a sp_group. + * @size: the size of memory to allocate. + * @sp_flags: how to allocate the memory. + * @spg_id: the share group that the memory is allocated to. + * + * Use pass through allocation if spg_id == SPG_ID_DEFAULT in multi-group mode. + * + * Return: + * * if succeed, return the starting kernel address of the shared memory. + * * if fail, return the pointer of -errno. + */ +void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) +{ + struct sp_group *spg; + struct sp_area *spa = NULL; + unsigned long sp_addr; + unsigned long mmap_addr; + void *p; /* return value */ + struct mm_struct *mm; + struct file *file; + unsigned long size_aligned; + int ret = 0; + unsigned long mode, offset; + unsigned int noreclaim_flag; + struct sp_group_node *spg_node; + struct sp_alloc_context ac; + + ret = sp_alloc_prepare(size, sp_flags, spg_id, &ac); + if (ret) + return ERR_PTR(ret); + try_again: - spa = sp_alloc_area(size_aligned, sp_flags, spg, SPA_TYPE_ALLOC, current->tgid); + spa = sp_alloc_area(ac.size_aligned, ac.sp_flags, ac.spg, + SPA_TYPE_ALLOC, current->tgid); if (IS_ERR(spa)) { pr_err_ratelimited("alloc spa failed in allocation" "(potential no enough virtual memory when -75): %ld\n", PTR_ERR(spa)); - p = spa; + ret = PTR_ERR(spa); goto out; } sp_addr = spa->va_start; @@ -1990,7 +2025,6 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) else pr_warn_ratelimited("allocation failed due to mm populate failed" "(potential no enough memory when -12): %d\n", ret); - p = ERR_PTR(ret);
mode = FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE; offset = addr_to_offset(sp_addr, spa); @@ -2018,7 +2052,7 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) out: up_read(&spg->rw_lock);
- if (!IS_ERR(p)) + if (!ret) sp_update_process_stat(current, true, spa);
/* this will free spa if mmap failed */ @@ -2029,7 +2063,10 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id)
sp_dump_stack(); sp_try_to_compact(); - return p; + if (ret) + return ERR_PTR(ret); + else + return (void *)(spa->va_start); } EXPORT_SYMBOL_GPL(sp_alloc);
From: Tang Yizhou tangyizhou@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
-------------------------------------------------
Refactor sp_alloc to improve its readability. Extract sp_fallocate.
Signed-off-by: Tang Yizhou tangyizhou@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/share_pool.c | 51 ++++++++++++++++++++++++++----------------------- 1 file changed, 27 insertions(+), 24 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 210d25a97deac..ce09b2b3a0bc1 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -831,16 +831,23 @@ int sp_group_id_by_pid(int pid) } EXPORT_SYMBOL_GPL(sp_group_id_by_pid);
-static loff_t addr_to_offset(unsigned long addr, struct sp_area *spa) +static loff_t addr_offset(struct sp_area *spa) { + unsigned long addr; + + if (unlikely(!spa)) { + WARN(1, "invalid spa when calculate addr offset\n"); + return 0; + } + addr = spa->va_start; + if (sp_area_customized == false) return (loff_t)(addr - MMAP_SHARE_POOL_START);
- if (spa && spa->spg != spg_none) + if (spa->spg != spg_none) return (loff_t)(addr - spa->spg->dvpp_va_start); - - pr_err("addr doesn't belong to share pool range\n"); - return addr; + else + return (loff_t)(addr - MMAP_SHARE_POOL_START); }
static struct sp_group *create_spg(int spg_id) @@ -1685,6 +1692,18 @@ static void __sp_free(struct sp_group *spg, unsigned long addr, } }
+/* Free the memory of the backing shmem or hugetlbfs */ +static void sp_fallocate(struct sp_area *spa) +{ + int ret; + unsigned long mode = FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE; + unsigned long offset = addr_offset(spa); + + ret = vfs_fallocate(spa_file(spa), mode, offset, spa_size(spa)); + if (ret) + WARN(1, "sp fallocate failed %d\n", ret); +} + /** * sp_free() - Free the memory allocated by sp_alloc(). * @addr: the starting VA of the memory. @@ -1697,8 +1716,6 @@ static void __sp_free(struct sp_group *spg, unsigned long addr, int sp_free(unsigned long addr) { struct sp_area *spa; - int mode; - loff_t offset; int ret = 0;
check_interrupt_context(); @@ -1753,14 +1770,7 @@ int sp_free(unsigned long addr) down_read(&spa->spg->rw_lock);
__sp_free(spa->spg, spa->va_start, spa_size(spa), NULL); - - /* Free the memory of the backing shmem or hugetlbfs */ - mode = FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE; - offset = addr_to_offset(addr, spa); - ret = vfs_fallocate(spa_file(spa), mode, offset, spa_size(spa)); - if (ret) - pr_err("fallocate in sp free failed: %d\n", ret); - + sp_fallocate(spa); up_read(&spa->spg->rw_lock);
/* pointer stat may be invalid because of kthread buff_module_guard_work */ @@ -1788,7 +1798,7 @@ static unsigned long sp_mmap(struct mm_struct *mm, struct file *file, unsigned long flags = MAP_FIXED | MAP_SHARED | MAP_POPULATE | MAP_SHARE_POOL; unsigned long vm_flags = VM_NORESERVE | VM_SHARE_POOL | VM_DONTCOPY; - unsigned long pgoff = addr_to_offset(addr, spa) >> PAGE_SHIFT; + unsigned long pgoff = addr_offset(spa) >> PAGE_SHIFT;
/* Mark the mapped region to be locked. After the MAP_LOCKED is enable, * multiple tasks will preempt resources, causing performance loss. @@ -1931,7 +1941,6 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) struct file *file; unsigned long size_aligned; int ret = 0; - unsigned long mode, offset; unsigned int noreclaim_flag; struct sp_group_node *spg_node; struct sp_alloc_context ac; @@ -2026,13 +2035,7 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) pr_warn_ratelimited("allocation failed due to mm populate failed" "(potential no enough memory when -12): %d\n", ret);
- mode = FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE; - offset = addr_to_offset(sp_addr, spa); - - ret = vfs_fallocate(spa_file(spa), mode, offset, spa_size(spa)); - if (ret) - pr_err("fallocate in allocation failed %d\n", ret); - + sp_fallocate(spa); if (file == spg->file_hugetlb) { spg->hugepage_failures++;
From: Tang Yizhou tangyizhou@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
-------------------------------------------------
Refactor sp_alloc to improve its readability.
Extract sp_alloc_mmap_populate, which consists of sp_alloc_mmap and sp_alloc_populate.
Signed-off-by: Tang Yizhou tangyizhou@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/linux/share_pool.h | 4 +- mm/share_pool.c | 277 +++++++++++++++++++++++-------------- 2 files changed, 177 insertions(+), 104 deletions(-)
diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index 2b052efa69072..e37b39009d83f 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -58,6 +58,8 @@ extern bool vmap_allow_huge;
struct sp_spg_stat { int spg_id; + /* record the number of hugepage allocation failures */ + atomic_t hugepage_failures; /* number of sp_area */ atomic_t spa_num; /* total size of all sp_area from sp_alloc and k2u */ @@ -98,8 +100,6 @@ struct sp_spg_stat { */ struct sp_group { int id; - /* record the number of hugepage allocation failures */ - int hugepage_failures; struct file *file; struct file *file_hugetlb; /* number of process in this group */ diff --git a/mm/share_pool.c b/mm/share_pool.c index ce09b2b3a0bc1..ccbfa0e30c516 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -425,6 +425,7 @@ static struct sp_spg_stat *create_spg_stat(int spg_id) }
stat->spg_id = spg_id; + atomic_set(&stat->hugepage_failures, 0); atomic_set(&stat->spa_num, 0); atomic64_set(&stat->size, 0); atomic64_set(&stat->alloc_nsize, 0); @@ -878,7 +879,6 @@ static struct sp_group *create_spg(int spg_id) spg->id = spg_id; spg->is_alive = true; spg->proc_num = 0; - spg->hugepage_failures = 0; spg->dvpp_multi_spaces = false; spg->owner = current->group_leader; atomic_set(&spg->use_count, 1); @@ -1831,6 +1831,7 @@ struct sp_alloc_context { unsigned long sp_flags; unsigned long populate; int state; + bool need_fallocate; };
static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags, @@ -1915,9 +1916,178 @@ static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags, ac->size = size; ac->sp_flags = sp_flags; ac->state = ALLOC_NORMAL; + ac->need_fallocate = false; return 0; }
+static void sp_alloc_unmap(struct mm_struct *mm, struct sp_area *spa, + struct sp_group_node *spg_node) +{ + __sp_free(spa->spg, spa->va_start, spa->real_size, mm); +} + +static int sp_alloc_mmap(struct mm_struct *mm, struct sp_area *spa, + struct sp_group_node *spg_node, struct sp_alloc_context *ac) +{ + int ret = 0; + unsigned long mmap_addr; + unsigned long prot; + unsigned long sp_addr = spa->va_start; + unsigned long populate = 0; + struct vm_area_struct *vma; + + down_write(&mm->mmap_sem); + if (unlikely(mm->core_state)) { + up_write(&mm->mmap_sem); + sp_alloc_unmap(mm, spa, spg_node); + ac->state = ALLOC_NOMEM; + pr_info("allocation encountered coredump\n"); + return -EFAULT; + } + + prot = spg_node->prot; + + /* when success, mmap_addr == spa->va_start */ + mmap_addr = sp_mmap(mm, spa_file(spa), spa, &populate, prot); + if (IS_ERR_VALUE(mmap_addr)) { + up_write(&mm->mmap_sem); + sp_alloc_unmap(mm, spa, spg_node); + pr_err("sp mmap in allocation failed %ld\n", mmap_addr); + return PTR_ERR((void *)mmap_addr); + } + + if (unlikely(populate == 0)) { + up_write(&mm->mmap_sem); + pr_err("allocation sp mmap populate failed\n"); + ret = -EFAULT; + goto unmap; + } + ac->populate = populate; + + vma = find_vma(mm, sp_addr); + if (unlikely(!vma)) { + up_write(&mm->mmap_sem); + WARN(1, "allocation failed, can't find %lx vma\n", sp_addr); + ret = -EINVAL; + goto unmap; + } + /* clean PTE_RDONLY flags or trigger SMMU event */ + if (prot & PROT_WRITE) + vma->vm_page_prot = __pgprot(((~PTE_RDONLY) & vma->vm_page_prot.pgprot) | PTE_DIRTY); + up_write(&mm->mmap_sem); + + return ret; + +unmap: + sp_alloc_unmap(list_next_entry(spg_node, proc_node)->master->mm, spa, spg_node); + return ret; +} + +static void sp_alloc_fallback(struct sp_area *spa, struct sp_alloc_context *ac) +{ + struct sp_spg_stat *stat = ac->spg->stat; + + if (ac->file == ac->spg->file) { + ac->state = ALLOC_NOMEM; + return; + } + + atomic_inc(&stat->hugepage_failures); + if (!(ac->sp_flags & SP_HUGEPAGE_ONLY)) { + ac->file = ac->spg->file; + ac->size_aligned = ALIGN(ac->size, PAGE_SIZE); + ac->sp_flags &= ~SP_HUGEPAGE; + ac->state = ALLOC_RETRY; + __sp_area_drop(spa); + return; + } + ac->state = ALLOC_NOMEM; +} + +static int sp_alloc_populate(struct mm_struct *mm, struct sp_area *spa, + struct sp_group_node *spg_node, struct sp_alloc_context *ac) +{ + int ret = 0; + unsigned long sp_addr = spa->va_start; + unsigned int noreclaim_flag = 0; + + /* + * The direct reclaim and compact may take a long + * time. As a result, sp mutex will be hold for too + * long time to casue the hung task problem. In this + * case, set the PF_MEMALLOC flag to prevent the + * direct reclaim and compact from being executed. + * Since direct reclaim and compact are not performed + * when the fragmentation is severe or the memory is + * insufficient, 2MB continuous physical pages fail + * to be allocated. This situation is allowed. + */ + if (spa->is_hugepage) + noreclaim_flag = memalloc_noreclaim_save(); + + /* + * We are not ignoring errors, so if we fail to allocate + * physical memory we just return failure, so we won't encounter + * page fault later on, and more importantly sp_make_share_u2k() + * depends on this feature (and MAP_LOCKED) to work correctly. + */ + ret = do_mm_populate(mm, sp_addr, ac->populate, 0); + if (spa->is_hugepage) { + memalloc_noreclaim_restore(noreclaim_flag); + if (ret) + sp_add_work_compact(); + } + if (ret) { + sp_alloc_unmap(list_next_entry(spg_node, proc_node)->master->mm, spa, spg_node); + if (unlikely(fatal_signal_pending(current))) + pr_warn_ratelimited("allocation failed, current thread is killed\n"); + else + pr_warn_ratelimited("allocation failed due to mm populate failed" + "(potential no enough memory when -12): %d\n", ret); + sp_fallocate(spa); /* need this, otherwise memleak */ + sp_alloc_fallback(spa, ac); + } else { + ac->need_fallocate = true; + } + return ret; +} + +static int __sp_alloc_mmap_populate(struct mm_struct *mm, struct sp_area *spa, + struct sp_group_node *spg_node, struct sp_alloc_context *ac) +{ + int ret; + + ret = sp_alloc_mmap(mm, spa, spg_node, ac); + if (ret < 0) { + if (ac->need_fallocate) { + /* e.g. second sp_mmap fail */ + sp_fallocate(spa); + ac->need_fallocate = false; + } + return ret; + } + + ret = sp_alloc_populate(mm, spa, spg_node, ac); + return ret; +} + +static int sp_alloc_mmap_populate(struct sp_area *spa, + struct sp_alloc_context *ac) +{ + int ret; + struct mm_struct *mm; + struct sp_group_node *spg_node; + + /* create mapping for each process in the group */ + list_for_each_entry(spg_node, &spa->spg->procs, proc_node) { + mm = spg_node->master->mm; + ret = __sp_alloc_mmap_populate(mm, spa, spg_node, ac); + if (ret) + return ret; + } + return ret; +} + /** * sp_alloc() - Allocate shared memory for all the processes in a sp_group. * @size: the size of memory to allocate. @@ -1934,15 +2104,7 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) { struct sp_group *spg; struct sp_area *spa = NULL; - unsigned long sp_addr; - unsigned long mmap_addr; - void *p; /* return value */ - struct mm_struct *mm; - struct file *file; - unsigned long size_aligned; int ret = 0; - unsigned int noreclaim_flag; - struct sp_group_node *spg_node; struct sp_alloc_context ac;
ret = sp_alloc_prepare(size, sp_flags, spg_id, &ac); @@ -1958,99 +2120,10 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) ret = PTR_ERR(spa); goto out; } - sp_addr = spa->va_start; - - /* create mapping for each process in the group */ - list_for_each_entry(spg_node, &spg->procs, proc_node) { - unsigned long populate = 0; - struct vm_area_struct *vma; - mm = spg_node->master->mm;
- down_write(&mm->mmap_sem); - if (unlikely(mm->core_state)) { - up_write(&mm->mmap_sem); - pr_info("allocation encountered coredump\n"); - continue; - } - - mmap_addr = sp_mmap(mm, file, spa, &populate, spg_node->prot); - if (IS_ERR_VALUE(mmap_addr)) { - up_write(&mm->mmap_sem); - p = (void *)mmap_addr; - __sp_free(spg, sp_addr, size_aligned, mm); - pr_err("sp mmap in allocation failed %ld\n", mmap_addr); - goto out; - } - - p = (void *)mmap_addr; /* success */ - if (populate == 0) { - up_write(&mm->mmap_sem); - continue; - } - - vma = find_vma(mm, sp_addr); - if (unlikely(!vma)) { - up_write(&mm->mmap_sem); - pr_debug("allocation failed, can't find %lx vma\n", (unsigned long)sp_addr); - p = ERR_PTR(-EINVAL); - goto out; - } - /* clean PTE_RDONLY flags or trigger SMMU event */ - if (spg_node->prot & PROT_WRITE) - vma->vm_page_prot = __pgprot(((~PTE_RDONLY) & vma->vm_page_prot.pgprot) | PTE_DIRTY); - up_write(&mm->mmap_sem); - - /* - * The direct reclaim and compact may take a long - * time. As a result, sp mutex will be hold for too - * long time to casue the hung task problem. In this - * case, set the PF_MEMALLOC flag to prevent the - * direct reclaim and compact from being executed. - * Since direct reclaim and compact are not performed - * when the fragmentation is severe or the memory is - * insufficient, 2MB continuous physical pages fail - * to be allocated. This situation is allowed. - */ - if (spa->is_hugepage) - noreclaim_flag = memalloc_noreclaim_save(); - - /* - * We are not ignoring errors, so if we fail to allocate - * physical memory we just return failure, so we won't encounter - * page fault later on, and more importantly sp_make_share_u2k() - * depends on this feature (and MAP_LOCKED) to work correctly. - */ - ret = do_mm_populate(mm, sp_addr, populate, 0); - if (spa->is_hugepage) { - memalloc_noreclaim_restore(noreclaim_flag); - if (ret) - sp_add_work_compact(); - } - if (ret) { - __sp_free(spg, sp_addr, size_aligned, - (list_next_entry(spg_node, proc_node))->master->mm); - if (unlikely(fatal_signal_pending(current))) - pr_warn_ratelimited("allocation failed, current thread is killed\n"); - else - pr_warn_ratelimited("allocation failed due to mm populate failed" - "(potential no enough memory when -12): %d\n", ret); - - sp_fallocate(spa); - if (file == spg->file_hugetlb) { - spg->hugepage_failures++; - - /* fallback to small pages */ - if (!(sp_flags & SP_HUGEPAGE_ONLY)) { - file = spg->file; - size_aligned = ALIGN(size, PAGE_SIZE); - sp_flags &= ~SP_HUGEPAGE; - __sp_area_drop(spa); - goto try_again; - } - } - break; - } - } + ret = sp_alloc_mmap_populate(spa, &ac); + if (ret && ac.state == ALLOC_RETRY) + goto try_again;
out: up_read(&spg->rw_lock); @@ -3186,7 +3259,7 @@ int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, down_read(&spg->rw_lock); if (spg_valid(spg)) { spg_id = spg->id; - hugepage_failures = spg->hugepage_failures; + hugepage_failures = atomic_read(&spg->stat->hugepage_failures); up_read(&spg->rw_lock);
/* eliminate potential ABBA deadlock */
From: Tang Yizhou tangyizhou@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
-------------------------------------------------
Refactor sp_alloc to improve its readability. Extract sp_alloc_finish.
Suggested-by: Wang Wensheng wangwensheng4@huawei.com Signed-off-by: Tang Yizhou tangyizhou@huawei.com Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/share_pool.c | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index ccbfa0e30c516..23115aa524537 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -2088,6 +2088,25 @@ static int sp_alloc_mmap_populate(struct sp_area *spa, return ret; }
+/* spa maybe an error pointer, so introduce param spg */ +static void sp_alloc_finish(int result, struct sp_area *spa, struct sp_group *spg) +{ + /* match sp_alloc_check_prepare */ + up_read(&spg->rw_lock); + + if (!result) + sp_update_process_stat(current, true, spa); + + /* this will free spa if mmap failed */ + if (spa && !IS_ERR(spa)) + __sp_area_drop(spa); + + sp_group_drop(spg); + + sp_dump_stack(); + sp_try_to_compact(); +} + /** * sp_alloc() - Allocate shared memory for all the processes in a sp_group. * @size: the size of memory to allocate. @@ -2102,7 +2121,6 @@ static int sp_alloc_mmap_populate(struct sp_area *spa, */ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) { - struct sp_group *spg; struct sp_area *spa = NULL; int ret = 0; struct sp_alloc_context ac; @@ -2126,19 +2144,7 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) goto try_again;
out: - up_read(&spg->rw_lock); - - if (!ret) - sp_update_process_stat(current, true, spa); - - /* this will free spa if mmap failed */ - if (spa && !IS_ERR(spa)) - __sp_area_drop(spa); - - sp_group_drop(spg); - - sp_dump_stack(); - sp_try_to_compact(); + sp_alloc_finish(ret, spa, ac.spg); if (ret) return ERR_PTR(ret); else
From: Tang Yizhou tangyizhou@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
-------------------------------------------------
Refactor sp_free to improve its readability. Extract sp_free_get_spa to get sp_area and check its validity.
Signed-off-by: Tang Yizhou tangyizhou@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/share_pool.c | 131 ++++++++++++++++++++++++++++++------------------ 1 file changed, 82 insertions(+), 49 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 23115aa524537..d04a4f926e3a9 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -1704,6 +1704,79 @@ static void sp_fallocate(struct sp_area *spa) WARN(1, "sp fallocate failed %d\n", ret); }
+#define FREE_CONT 1 +#define FREE_END 2 + +struct sp_free_context { + unsigned long addr; + struct sp_area *spa; + int state; +}; + +/* when success, __sp_area_drop(spa) should be used */ +static int sp_free_get_spa(struct sp_free_context *fc) +{ + int ret = 0; + unsigned long addr = fc->addr; + struct sp_area *spa; + struct sp_group_node *spg_node; + bool found = false; + + fc->state = FREE_CONT; + + spa = __find_sp_area(addr); + if (!spa) { + pr_debug("sp free invalid input addr %lx\n", addr); + return -EINVAL; + } + + if (spa->type != SPA_TYPE_ALLOC) { + ret = -EINVAL; + pr_debug("sp free failed, %lx is not sp alloc addr\n", addr); + goto drop_spa; + } + fc->spa = spa; + + /* + * Access control: an sp addr can only be freed by + * 1. another task in the same spg + * 2. a kthread + */ + if (!current->mm) + goto check_spa; + + down_read(&spa->spg->rw_lock); + list_for_each_entry(spg_node, &spa->spg->procs, proc_node) { + if (spg_node->master->mm == current->mm) { + found = true; + break; + } + } + up_read(&spa->spg->rw_lock); + if (!found) { + ret = -EPERM; + goto drop_spa; + } + +check_spa: + down_write(&spa->spg->rw_lock); + if (!spg_valid(spa->spg)) { + /* we must return success(0) in this situation */ + fc->state = FREE_END; + up_write(&spa->spg->rw_lock); + goto drop_spa; + } + /* the life cycle of spa has a direct relation with sp group */ + spa->is_dead = true; + up_write(&spa->spg->rw_lock); + + return 0; + +drop_spa: + __sp_area_drop(spa); + return ret; +} + /** * sp_free() - Free the memory allocated by sp_alloc(). * @addr: the starting VA of the memory. @@ -1715,72 +1788,32 @@ static void sp_fallocate(struct sp_area *spa) */ int sp_free(unsigned long addr) { - struct sp_area *spa; int ret = 0; + struct sp_area *spa; + struct sp_free_context fc = { + .addr = addr, + };
check_interrupt_context();
- /* - * Access control: a share pool addr can only be freed by another task - * in the same spg or a kthread (such as buff_module_guard_work) - */ - spa = __find_sp_area(addr); - if (spa) { - if (current->mm != NULL) { - struct sp_group_node *spg_node; - bool found = false; - - down_read(&spa->spg->rw_lock); - list_for_each_entry(spg_node, &spa->spg->procs, proc_node) { - if (spg_node->master->mm == current->mm) { - found = true; - break; - } - } - up_read(&spa->spg->rw_lock); - if (!found) { - ret = -EPERM; - goto drop_spa; - } - } - - down_write(&spa->spg->rw_lock); - if (!spg_valid(spa->spg)) { - up_write(&spa->spg->rw_lock); - goto drop_spa; - } - /* the life cycle of spa has a direct relation with sp group */ - spa->is_dead = true; - up_write(&spa->spg->rw_lock); - - } else { /* spa == NULL */ - ret = -EINVAL; - pr_debug("sp free invalid input addr %lx\n", (unsigned long)addr); + ret = sp_free_get_spa(&fc); + if (ret || fc.state == FREE_END) goto out; - } - - if (spa->type != SPA_TYPE_ALLOC) { - ret = -EINVAL; - pr_debug("sp free failed, addr %lx is not from sp alloc\n", (unsigned long)addr); - goto drop_spa; - }
- sp_dump_stack(); + spa = fc.spa;
down_read(&spa->spg->rw_lock); - __sp_free(spa->spg, spa->va_start, spa_size(spa), NULL); sp_fallocate(spa); up_read(&spa->spg->rw_lock);
- /* pointer stat may be invalid because of kthread buff_module_guard_work */ + /* current->mm == NULL: allow kthread */ if (current->mm == NULL) atomic64_sub(spa->real_size, &kthread_stat.alloc_size); else sp_update_process_stat(current, false, spa);
-drop_spa: - __sp_area_drop(spa); + __sp_area_drop(spa); /* match __find_sp_area in sp_free_get_spa */ out: sp_dump_stack(); sp_try_to_compact();
From: Tang Yizhou tangyizhou@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
-------------------------------------------------
The requirement of sp_alloc pass through was not proposed in the original design and the first implementation confused many people.
When a process not in any sp group calls sp_alloc, it enters pass through procedure. In the first implementation, this process is added to a special sp group which id is between [SPG_ID_DVPP_PASS_THROUGH_MIN, SPG_ID_DVPP_PASS_THROUGH_MAX]. The main benefit is that most procedure of sp_alloc can be reused directly.
However, many code reviewers feel confused because of the sp group. The call of sp_group_add_task in sp_alloc is not only abrupt but also buggy. Moreover, a process once in sp alloc pass through procedure can't be added to any sp groups later because it is already in a special sp group. This is a scalability limitation.
Idealy, sp_alloc pass through procedure doesn't need any sp group. That's why we decide to redesign it with spg_none, which manages all accounting statistics of sp_areas generated by sp_alloc pass through and k2u_task but contains none.
Signed-off-by: Tang Yizhou tangyizhou@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/share_pool.c | 193 ++++++++++++++++++++++++++---------------------- 1 file changed, 103 insertions(+), 90 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index d04a4f926e3a9..09b8578183ad5 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -1704,6 +1704,19 @@ static void sp_fallocate(struct sp_area *spa) WARN(1, "sp fallocate failed %d\n", ret); }
+static void sp_free_unmap_fallocate(struct sp_area *spa) +{ + if (spa->spg != spg_none) { + down_read(&spa->spg->rw_lock); + __sp_free(spa->spg, spa->va_start, spa_size(spa), NULL); + sp_fallocate(spa); + up_read(&spa->spg->rw_lock); + } else { + sp_munmap(current->mm, spa->va_start, spa_size(spa)); + sp_fallocate(spa); + } +} + #define FREE_CONT 1 #define FREE_END 2
@@ -1719,8 +1732,6 @@ static int sp_free_get_spa(struct sp_free_context *fc) int ret = 0; unsigned long addr = fc->addr; struct sp_area *spa; - struct sp_group_node *spg_node; - bool found = false;
fc->state = FREE_CONT;
@@ -1737,39 +1748,51 @@ static int sp_free_get_spa(struct sp_free_context *fc) } fc->spa = spa;
- /* - * Access control: an sp addr can only be freed by - * 1. another task in the same spg - * 2. a kthread - */ - if (!current->mm) - goto check_spa; + if (spa->spg != spg_none) { + struct sp_group_node *spg_node; + bool found = false;
- down_read(&spa->spg->rw_lock); - list_for_each_entry(spg_node, &spa->spg->procs, proc_node) { - if (spg_node->master->mm == current->mm) { - found = true; - break; + /* + * Access control: an sp addr can only be freed by + * 1. another task in the same spg + * 2. a kthread + * + * a passthrough addr can only be freed by the applier process + */ + if (!current->mm) + goto check_spa; + + down_read(&spa->spg->rw_lock); + list_for_each_entry(spg_node, &spa->spg->procs, proc_node) { + if (spg_node->master->mm == current->mm) { + found = true; + break; + } + } + up_read(&spa->spg->rw_lock); + if (!found) { + ret = -EPERM; + goto drop_spa; } - } - up_read(&spa->spg->rw_lock); - if (!found) { - ret = -EPERM; - goto drop_spa; - }
check_spa: - down_write(&spa->spg->rw_lock); - if (!spg_valid(spa->spg)) { - /* we must return success(0) in this situation */ - fc->state = FREE_END; + down_write(&spa->spg->rw_lock); + if (!spg_valid(spa->spg)) { + fc->state = FREE_END; + up_write(&spa->spg->rw_lock); + goto drop_spa; + /* we must return success(0) in this situation */ + } + /* the life cycle of spa has a direct relation with sp group */ + spa->is_dead = true; up_write(&spa->spg->rw_lock); - goto drop_spa; - } - /* the life cycle of spa has a direct relation with sp group */ - spa->is_dead = true; - up_write(&spa->spg->rw_lock);
+ } else { + if (current->tgid != spa->applier) { + ret = -EPERM; + goto drop_spa; + } + } return 0;
drop_spa: @@ -1789,7 +1812,6 @@ static int sp_free_get_spa(struct sp_free_context *fc) int sp_free(unsigned long addr) { int ret = 0; - struct sp_area *spa; struct sp_free_context fc = { .addr = addr, }; @@ -1800,20 +1822,15 @@ int sp_free(unsigned long addr) if (ret || fc.state == FREE_END) goto out;
- spa = fc.spa; - - down_read(&spa->spg->rw_lock); - __sp_free(spa->spg, spa->va_start, spa_size(spa), NULL); - sp_fallocate(spa); - up_read(&spa->spg->rw_lock); + sp_free_unmap_fallocate(fc.spa);
/* current->mm == NULL: allow kthread */ if (current->mm == NULL) - atomic64_sub(spa->real_size, &kthread_stat.alloc_size); + atomic64_sub(fc.spa->real_size, &kthread_stat.alloc_size); else - sp_update_process_stat(current, false, spa); + sp_update_process_stat(current, false, fc.spa);
- __sp_area_drop(spa); /* match __find_sp_area in sp_free_get_spa */ + __sp_area_drop(fc.spa); /* match __find_sp_area in sp_free_get_spa */ out: sp_dump_stack(); sp_try_to_compact(); @@ -1870,8 +1887,7 @@ struct sp_alloc_context { static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags, int spg_id, struct sp_alloc_context *ac) { - int ret; - struct sp_group *spg, *spg_tmp; + struct sp_group *spg;
check_interrupt_context();
@@ -1897,44 +1913,23 @@ static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags, if (sp_flags & SP_HUGEPAGE_ONLY) sp_flags |= SP_HUGEPAGE;
- spg = __sp_find_spg(current->pid, SPG_ID_DEFAULT); - if (!spg) { /* DVPP pass through scene: first call sp_alloc() */ - /* mdc scene hack */ - unsigned long prot = PROT_READ | PROT_WRITE; - if (enable_mdc_default_group) - ret = sp_group_add_task(current->tgid, prot, spg_id); - else - ret = sp_group_add_task(current->tgid, prot, - SPG_ID_DVPP_PASS_THROUGH); - /* - * The multi-thread contention may cause repeated joins to the group. - * The judgment is added to prevent exit in this case. - */ - if (ret < 0 && (ret != -EEXIST)) { - pr_err_ratelimited("allocation failed, add group error %d in DVPP pass through\n", ret); - return ret; - } - spg = get_first_group(current->mm); - } else { /* other scenes */ - if (spg_id != SPG_ID_DEFAULT) { - spg_tmp = __sp_find_spg(current->pid, spg_id); - if (spg != spg_tmp) { - sp_group_drop(spg); - if (spg_tmp) - sp_group_drop(spg_tmp); - return -ENODEV; - } - sp_group_drop(spg_tmp); + if (spg_id != SPG_ID_DEFAULT) { + spg = __sp_find_spg(current->pid, spg_id); + if (!spg) { + pr_err_ratelimited("allocation failed, task not in group\n"); + return -ENODEV; } - }
- /* up_read will be at the end of sp_alloc */ - down_read(&spg->rw_lock); - if (!spg_valid(spg)) { - up_read(&spg->rw_lock); - sp_group_drop(spg); - pr_err_ratelimited("sp alloc failed, spg is dead\n"); - return -ENODEV; + /* up_read will be at the end of sp_alloc */ + down_read(&spg->rw_lock); + if (!spg_valid(spg)) { + up_read(&spg->rw_lock); + sp_group_drop(spg); + pr_err_ratelimited("allocation failed, spg is dead\n"); + return -ENODEV; + } + } else { /* alocation pass through scene */ + spg = spg_none; }
if (sp_flags & SP_HUGEPAGE) { @@ -1956,7 +1951,8 @@ static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags, static void sp_alloc_unmap(struct mm_struct *mm, struct sp_area *spa, struct sp_group_node *spg_node) { - __sp_free(spa->spg, spa->va_start, spa->real_size, mm); + if (spa->spg != spg_none) + __sp_free(spa->spg, spa->va_start, spa->real_size, mm); }
static int sp_alloc_mmap(struct mm_struct *mm, struct sp_area *spa, @@ -1964,7 +1960,8 @@ static int sp_alloc_mmap(struct mm_struct *mm, struct sp_area *spa, { int ret = 0; unsigned long mmap_addr; - unsigned long prot; + /* pass through default permission */ + unsigned long prot = PROT_READ | PROT_WRITE; unsigned long sp_addr = spa->va_start; unsigned long populate = 0; struct vm_area_struct *vma; @@ -1978,7 +1975,8 @@ static int sp_alloc_mmap(struct mm_struct *mm, struct sp_area *spa, return -EFAULT; }
- prot = spg_node->prot; + if (spg_node) + prot = spg_node->prot;
/* when success, mmap_addr == spa->va_start */ mmap_addr = sp_mmap(mm, spa_file(spa), spa, &populate, prot); @@ -2012,7 +2010,10 @@ static int sp_alloc_mmap(struct mm_struct *mm, struct sp_area *spa, return ret;
unmap: - sp_alloc_unmap(list_next_entry(spg_node, proc_node)->master->mm, spa, spg_node); + if (spa->spg != spg_none) + sp_alloc_unmap(list_next_entry(spg_node, proc_node)->master->mm, spa, spg_node); + else + sp_munmap(mm, spa->va_start, spa->real_size); return ret; }
@@ -2071,7 +2072,11 @@ static int sp_alloc_populate(struct mm_struct *mm, struct sp_area *spa, sp_add_work_compact(); } if (ret) { - sp_alloc_unmap(list_next_entry(spg_node, proc_node)->master->mm, spa, spg_node); + if (spa->spg != spg_none) + sp_alloc_unmap(list_next_entry(spg_node, proc_node)->master->mm, spa, spg_node); + else + sp_munmap(mm, spa->va_start, spa->real_size); + if (unlikely(fatal_signal_pending(current))) pr_warn_ratelimited("allocation failed, current thread is killed\n"); else @@ -2111,12 +2116,16 @@ static int sp_alloc_mmap_populate(struct sp_area *spa, struct mm_struct *mm; struct sp_group_node *spg_node;
- /* create mapping for each process in the group */ - list_for_each_entry(spg_node, &spa->spg->procs, proc_node) { - mm = spg_node->master->mm; - ret = __sp_alloc_mmap_populate(mm, spa, spg_node, ac); - if (ret) - return ret; + if (spa->spg == spg_none) { + ret = __sp_alloc_mmap_populate(current->mm, spa, NULL, ac); + } else { + /* create mapping for each process in the group */ + list_for_each_entry(spg_node, &spa->spg->procs, proc_node) { + mm = spg_node->master->mm; + ret = __sp_alloc_mmap_populate(mm, spa, spg_node, ac); + if (ret) + return ret; + } } return ret; } @@ -2124,8 +2133,11 @@ static int sp_alloc_mmap_populate(struct sp_area *spa, /* spa maybe an error pointer, so introduce param spg */ static void sp_alloc_finish(int result, struct sp_area *spa, struct sp_group *spg) { + bool is_pass_through = spg == spg_none ? true : false; + /* match sp_alloc_check_prepare */ - up_read(&spg->rw_lock); + if (!is_pass_through) + up_read(&spg->rw_lock);
if (!result) sp_update_process_stat(current, true, spa); @@ -2134,7 +2146,8 @@ static void sp_alloc_finish(int result, struct sp_area *spa, struct sp_group *sp if (spa && !IS_ERR(spa)) __sp_area_drop(spa);
- sp_group_drop(spg); + if (!is_pass_through) + sp_group_drop(spg);
sp_dump_stack(); sp_try_to_compact();
From: Tang Yizhou tangyizhou@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
-------------------------------------------------
SPG_ID_DVPP_PASS_THROUGH_MIN, SPG_ID_DVPP_PASS_THROUGH_MAX and SPG_ID_DVPP_PASS_THROUGH are now useless.
Signed-off-by: Tang Yizhou tangyizhou@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/linux/share_pool.h | 5 +---- mm/share_pool.c | 18 ++---------------- 2 files changed, 3 insertions(+), 20 deletions(-)
diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index e37b39009d83f..40ccab0e77fad 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -16,16 +16,13 @@ #define SP_FLAG_MASK (SP_HUGEPAGE | SP_HUGEPAGE_ONLY | SP_DVPP | \ (_AC(DEVICE_ID_MASK, UL) << DEVICE_ID_SHIFT))
-#define SPG_ID_NONE -1 /* not associated with sp_group, only for specified thread */ +#define SPG_ID_NONE (-1) /* not associated with sp_group, only for specified thread */ #define SPG_ID_DEFAULT 0 /* use the spg id of current thread */ #define SPG_ID_MIN 1 /* valid id should be >= 1 */ #define SPG_ID_MAX 99999 #define SPG_ID_AUTO_MIN 100000 #define SPG_ID_AUTO_MAX 199999 #define SPG_ID_AUTO 200000 /* generate group id automatically */ -#define SPG_ID_DVPP_PASS_THROUGH_MIN 800000 -#define SPG_ID_DVPP_PASS_THROUGH_MAX 899999 -#define SPG_ID_DVPP_PASS_THROUGH 900000
#define MAX_DEVID 2 /* the max num of Da-vinci devices */
diff --git a/mm/share_pool.c b/mm/share_pool.c index 09b8578183ad5..4dde5f8582cb8 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -699,9 +699,7 @@ static unsigned long sp_remap_kva_to_vma(unsigned long kva, struct sp_area *spa, static void free_sp_group_id(int spg_id) { /* ida operation is protected by an internal spin_lock */ - if ((spg_id >= SPG_ID_AUTO_MIN && spg_id <= SPG_ID_AUTO_MAX) || - (spg_id >= SPG_ID_DVPP_PASS_THROUGH_MIN && - spg_id <= SPG_ID_DVPP_PASS_THROUGH_MAX)) + if (spg_id >= SPG_ID_AUTO_MIN && spg_id <= SPG_ID_AUTO_MAX) ida_free(&sp_group_id_ida, spg_id); }
@@ -1086,8 +1084,7 @@ int sp_group_add_task(int pid, unsigned long prot, int spg_id) if (enable_mdc_default_group) spg_id = mdc_default_group_id;
- if ((spg_id < SPG_ID_MIN || spg_id > SPG_ID_AUTO) - && spg_id != SPG_ID_DVPP_PASS_THROUGH) { + if (spg_id < SPG_ID_MIN || spg_id > SPG_ID_AUTO) { pr_err_ratelimited("add group failed, invalid group id %d\n", spg_id); return -EINVAL; } @@ -1122,17 +1119,6 @@ int sp_group_add_task(int pid, unsigned long prot, int spg_id) id_newly_generated = true; }
- if (spg_id == SPG_ID_DVPP_PASS_THROUGH) { - spg_id = ida_alloc_range(&sp_group_id_ida, - SPG_ID_DVPP_PASS_THROUGH_MIN, - SPG_ID_DVPP_PASS_THROUGH_MAX, GFP_ATOMIC); - if (spg_id < 0) { - pr_err_ratelimited("add group failed, DVPP auto generate group id failed\n"); - return spg_id; - } - id_newly_generated = true; - } - down_write(&sp_group_sem);
rcu_read_lock();
From: Tang Yizhou tangyizhou@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
-------------------------------------------------
The procedure of finding task_struct and increasing its refcount is duplicate. Extract function get_task to eliminate redundant code.
Signed-off-by: Tang Yizhou tangyizhou@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/share_pool.c | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 4dde5f8582cb8..1cc2d3eba264e 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -729,6 +729,24 @@ static void sp_group_drop(struct sp_group *spg) free_sp_group(spg); }
+/* use with put_task_struct(task) */ +static int get_task(int pid, struct task_struct **task) +{ + struct task_struct *tsk; + + rcu_read_lock(); + tsk = find_task_by_vpid(pid); + if (!tsk || (tsk->flags & PF_EXITING)) { + rcu_read_unlock(); + return -ESRCH; + } + get_task_struct(tsk); + rcu_read_unlock(); + + *task = tsk; + return 0; +} + static struct sp_group *get_first_group(struct mm_struct *mm) { struct sp_group *spg = NULL; @@ -757,13 +775,7 @@ static struct sp_group *__sp_find_spg_locked(int pid, int spg_id) int ret = 0;
if (spg_id == SPG_ID_DEFAULT) { - rcu_read_lock(); - tsk = find_task_by_vpid(pid); - if (!tsk || (tsk->flags & PF_EXITING)) - ret = -ESRCH; - else - get_task_struct(tsk); - rcu_read_unlock(); + ret = get_task(pid, &tsk); if (ret) return NULL;
@@ -1121,15 +1133,7 @@ int sp_group_add_task(int pid, unsigned long prot, int spg_id)
down_write(&sp_group_sem);
- rcu_read_lock(); - - tsk = find_task_by_vpid(pid); - if (!tsk || (tsk->flags & PF_EXITING)) - ret = -ESRCH; - else - get_task_struct(tsk); - - rcu_read_unlock(); + ret = get_task(pid, &tsk); if (ret) { up_write(&sp_group_sem); free_new_spg_id(id_newly_generated, spg_id);
From: Wang Wensheng wangwensheng4@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
-------------------
Since a process could be added to multiple groups, we should return an array of spg_ids to cover all the groups.
Signed-off-by: Wang Wensheng wangwensheng4@huawei.com Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/linux/share_pool.h | 2 +- mm/share_pool.c | 56 +++++++++++++++++++++++++++++--------- 2 files changed, 44 insertions(+), 14 deletions(-)
diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index 40ccab0e77fad..403221c9ac931 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -203,7 +203,7 @@ static inline void sp_init_mm(struct mm_struct *mm) extern int sp_group_add_task(int pid, unsigned long prot, int spg_id); extern int sp_group_exit(struct mm_struct *mm); extern void sp_group_post_exit(struct mm_struct *mm); -extern int sp_group_id_by_pid(int pid); +extern int sp_group_id_by_pid(int pid, int *spg_ids, int *num); extern int sp_group_walk(int spg_id, void *data, int (*func)(struct mm_struct *mm, void *)); extern int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task); diff --git a/mm/share_pool.c b/mm/share_pool.c index 1cc2d3eba264e..70ff1cb2393c6 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -814,31 +814,61 @@ static struct sp_group *__sp_find_spg(int pid, int spg_id) }
/** - * sp_group_id_by_pid() - Get the sp_group ID of a process. + * sp_group_id_by_pid() - Get the sp_group ID array of a process. * @pid: pid of target process. + * @spg_ids point to an array to save the group ids the process belongs to + * @num input the spg_ids array size; output the spg number of the process * * Return: * >0 - the sp_group ID. * -ENODEV - target process doesn't belong to any sp_group. + * -EINVAL - spg_ids or num is NULL. + * -E2BIG - the num of groups process belongs to is larger than *num */ -int sp_group_id_by_pid(int pid) +int sp_group_id_by_pid(int pid, int *spg_ids, int *num) { - struct sp_group *spg; - int spg_id = -ENODEV; + int ret = 0; + struct sp_group_node *node; + struct sp_group_master *master = NULL; + struct task_struct *tsk;
check_interrupt_context();
- spg = __sp_find_spg(pid, SPG_ID_DEFAULT); - if (!spg) - return -ENODEV; + if (!spg_ids || num <= 0) + return -EINVAL;
- down_read(&spg->rw_lock); - if (spg_valid(spg)) - spg_id = spg->id; - up_read(&spg->rw_lock); + ret = get_task(pid, &tsk); + if (ret) + return ret;
- sp_group_drop(spg); - return spg_id; + down_read(&sp_group_sem); + task_lock(tsk); + if (tsk->mm) + master = tsk->mm->sp_group_master; + task_unlock(tsk); + + if (!master) { + ret = -ENODEV; + goto out_up_read; + } + + if (!master->count) { + ret = -ENODEV; + goto out_up_read; + } + if ((unsigned int)*num < master->count) { + ret = -E2BIG; + goto out_up_read; + } + *num = master->count; + + list_for_each_entry(node, &master->node_list, group_node) + *(spg_ids++) = node->spg->id; + +out_up_read: + up_read(&sp_group_sem); + put_task_struct(tsk); + return ret; } EXPORT_SYMBOL_GPL(sp_group_id_by_pid);
From: Wang Wensheng wangwensheng4@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
-------------------
Since a single process could be added to multiple groups, we must take the spg_id into account when we do k2spg.
Change: The caller must provide a valid spg_id for k2spg. SPG_ID_DEFAULT and SPG_ID_NONE is used to indicate that we should share the input kva to current process.
Signed-off-by: Wang Wensheng wangwensheng4@huawei.com Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/share_pool.c | 250 +++++++++++++++++++++++------------------------- 1 file changed, 121 insertions(+), 129 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 70ff1cb2393c6..ed553f64beac9 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -2324,77 +2324,116 @@ static unsigned long sp_remap_kva_to_vma(unsigned long kva, struct sp_area *spa, }
/** - * sp_make_share_kva_to_task() - Share kernel memory to a specified task. + * sp_make_share_kva_to_task() - Share kernel memory to current task. * @kva: the VA of shared kernel memory - * @spa: the sp area associated with the shared user address - * @mm: mm_struct of target task + * @size: the size of area to share, should be aligned properly + * @sp_flags: the flags for the opreation * * Return: * * if succeed, return the shared user address to start at. * * if fail, return the pointer of -errno. */ -static void *sp_make_share_kva_to_task(unsigned long kva, struct sp_area *spa, - struct mm_struct *mm) +static void *sp_make_share_kva_to_task(unsigned long kva, unsigned long size, unsigned long sp_flags) { - unsigned long ret_addr; + void *uva; + struct sp_area *spa; + struct spg_proc_stat *stat; unsigned long prot = PROT_READ | PROT_WRITE;
- ret_addr = sp_remap_kva_to_vma(kva, spa, mm, prot); - if (IS_ERR_VALUE(ret_addr)) { - pr_err("remap k2u to task failed %ld\n", ret_addr); - return ERR_PTR(ret_addr); + down_write(&sp_group_sem); + stat = sp_init_process_stat(current, current->mm, spg_none); + up_write(&sp_group_sem); + if (IS_ERR(stat)) { + pr_err_ratelimited("k2u_task init process stat failed %lx\n", + PTR_ERR(stat)); + return stat; }
- spa->mm = mm; - return (void *)ret_addr; + spa = sp_alloc_area(size, sp_flags, spg_none, SPA_TYPE_K2TASK, current->tgid); + if (IS_ERR(spa)) { + pr_err_ratelimited("alloc spa failed in k2u_task " + "(potential no enough virtual memory when -75): %ld\n", PTR_ERR(spa)); + return spa; + } + + spa->kva = kva; + + uva = (void *)sp_remap_kva_to_vma(kva, spa, current->mm, prot); + __sp_area_drop(spa); + if (IS_ERR(uva)) + pr_err("remap k2u to task failed %ld\n", PTR_ERR(uva)); + else { + update_spg_proc_stat(size, true, stat, SPA_TYPE_K2TASK); + spa->mm = current->mm; + } + + return uva; }
-static void *sp_make_share_kva_to_spg(unsigned long kva, struct sp_area *spa, - struct sp_group *spg) +/** + * Share kernel memory to a spg, the current process must be in that group + * @kva: the VA of shared kernel memory + * @size: the size of area to share, should be aligned properly + * @sp_flags: the flags for the opreation + * @spg: the sp group to be shared with + * + * Return: the shared user address to start at + */ +static void *sp_make_share_kva_to_spg(unsigned long kva, unsigned long size, + unsigned long sp_flags, struct sp_group *spg) { + struct sp_area *spa; struct mm_struct *mm; - unsigned long ret_addr = -ENODEV; - unsigned long uva = -ENODEV; - void *p = ERR_PTR(-ENODEV); struct sp_group_node *spg_node; + void *uva = ERR_PTR(-ENODEV); + + down_read(&spg->rw_lock); + spa = sp_alloc_area(size, sp_flags, spg, SPA_TYPE_K2SPG, current->tgid); + if (IS_ERR(spa)) { + up_read(&spg->rw_lock); + pr_err_ratelimited("alloc spa failed in k2u_spg " + "(potential no enough virtual memory when -75): %ld\n", PTR_ERR(spa)); + return spa; + } + + spa->kva = kva;
list_for_each_entry(spg_node, &spg->procs, proc_node) { mm = spg_node->master->mm; - ret_addr = sp_remap_kva_to_vma(kva, spa, mm, spg_node->prot); - if (IS_ERR_VALUE(ret_addr)) { - pr_err("remap k2u to spg failed %ld\n", ret_addr); + uva = (void *)sp_remap_kva_to_vma(kva, spa, mm, spg_node->prot); + if (IS_ERR(uva)) { + pr_err("remap k2u to spg failed %ld\n", PTR_ERR(uva)); __sp_free(spg, spa->va_start, spa_size(spa), mm); - p = ERR_PTR(ret_addr); goto out; } - - uva = ret_addr; } - p = (void *)uva; + out: - return p; + up_read(&spg->rw_lock); + __sp_area_drop(spa); + if (!IS_ERR(uva)) + sp_update_process_stat(current, true, spa); + + return uva; }
-static bool vmalloc_area_set_flag(struct sp_area *spa, unsigned long kva, unsigned long flags) +static bool vmalloc_area_set_flag(unsigned long kva, unsigned long flags) { struct vm_struct *area;
area = find_vm_area((void *)kva); if (area) { area->flags |= flags; - spa->kva = kva; return true; }
return false; }
-static bool vmalloc_area_clr_flag(struct sp_area *spa, unsigned long kva, unsigned long flags) +static bool vmalloc_area_clr_flag(unsigned long kva, unsigned long flags) { struct vm_struct *area;
- spa->kva = 0; - area = find_vm_area((void *)kva); if (area) { area->flags &= ~flags; @@ -2404,6 +2443,35 @@ static bool vmalloc_area_clr_flag(struct sp_area *spa, unsigned long kva, unsign return false; }
+/* + * return + * 1 k2task + * 0 k2group + * <0 error code + */ +static int is_k2task(int spg_id) +{ + if (share_pool_group_mode == SINGLE_GROUP_MODE) { + struct sp_group *spg = get_first_group(current->mm); + + if (!spg) { + if (spg_id != SPG_ID_NONE && spg_id != SPG_ID_DEFAULT) + return -EINVAL; + else + return 1; + } else { + int ret = 0; + + if (spg_id != SPG_ID_DEFAULT && spg_id != spg->id) + ret = -EINVAL; + sp_group_drop(spg); + + return ret; + } + } else + return (spg_id == SPG_ID_DEFAULT || spg_id == SPG_ID_NONE) ? 1 : 0; +} + /** * sp_make_share_k2u() - Share kernel memory to current process or an sp_group. * @kva: the VA of shared kernel memory. @@ -2425,13 +2493,10 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size, unsigned long sp_flags, int pid, int spg_id) { void *uva; - struct sp_group *spg; - struct sp_area *spa; unsigned long kva_aligned; unsigned long size_aligned; unsigned int page_size = PAGE_SIZE; - struct mm_struct *mm = current->mm; - int is_hugepage; + int is_hugepage, to_task;
check_interrupt_context();
@@ -2440,7 +2505,7 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size, return ERR_PTR(-EINVAL); }
- if (mm == NULL) { + if (!current->mm) { pr_err_ratelimited("k2u: kthread is not allowed\n"); return ERR_PTR(-EPERM); } @@ -2460,103 +2525,30 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size, kva_aligned = ALIGN_DOWN(kva, page_size); size_aligned = ALIGN(kva + size, page_size) - kva_aligned;
- spg = get_first_group(mm); - if (spg == NULL) { - /* k2u to task */ - struct spg_proc_stat *stat; - - if (spg_id != SPG_ID_NONE && spg_id != SPG_ID_DEFAULT) { - pr_err_ratelimited("k2u_task invalid spg id %d\n", spg_id); - return ERR_PTR(-EINVAL); - } - - down_write(&sp_group_sem); - stat = sp_init_process_stat(current, mm, spg_none); - up_write(&sp_group_sem); - if (IS_ERR(stat)) { - pr_err_ratelimited("k2u_task init process stat failed %lx\n", - PTR_ERR(stat)); - return stat; - } - - spa = sp_alloc_area(size_aligned, sp_flags, spg_none, SPA_TYPE_K2TASK, current->tgid); - if (IS_ERR(spa)) { - pr_err_ratelimited("alloc spa failed in k2u_task " - "(potential no enough virtual memory when -75): %ld\n", PTR_ERR(spa)); - return spa; - } - - if (!vmalloc_area_set_flag(spa, kva_aligned, VM_SHAREPOOL)) { - pr_debug("k2u_task kva %lx is not valid\n", (unsigned long)kva_aligned); - goto out_drop_spa; - } - - uva = sp_make_share_kva_to_task(kva_aligned, spa, mm); - - if (!IS_ERR(uva)) - update_spg_proc_stat(size_aligned, true, - stat, SPA_TYPE_K2TASK); - - goto finish; + if (!vmalloc_area_set_flag(kva_aligned, VM_SHAREPOOL)) { + pr_debug("k2u_task kva %lx is not valid\n", kva_aligned); + return ERR_PTR(-EINVAL); }
- down_read(&spg->rw_lock); - if (spg_valid(spg)) { - /* k2u to group */ - if (spg_id != SPG_ID_DEFAULT && spg_id != spg->id) { - up_read(&spg->rw_lock); - pr_err_ratelimited("k2u_spg invalid spg id %d\n", spg_id); - uva = ERR_PTR(-EINVAL); - goto out_drop_spg; - } + to_task = is_k2task(spg_id); + if (to_task == 1) + uva = sp_make_share_kva_to_task(kva_aligned, size_aligned, sp_flags); + else if (to_task == 0) { + struct sp_group *spg;
- if (enable_share_k2u_spg) - spa = sp_alloc_area(size_aligned, sp_flags, spg, SPA_TYPE_K2SPG, current->tgid); - else - spa = sp_alloc_area(size_aligned, sp_flags, spg_none, SPA_TYPE_K2TASK, current->tgid); - - if (IS_ERR(spa)) { - up_read(&spg->rw_lock); - pr_err_ratelimited("alloc spa failed in k2u_spg " - "(potential no enough virtual memory when -75): %ld\n", PTR_ERR(spa)); - uva = spa; - goto out_drop_spg; - } - - if (!vmalloc_area_set_flag(spa, kva_aligned, VM_SHAREPOOL)) { - up_read(&spg->rw_lock); - pr_debug("k2u_spg kva %lx is not valid\n", (unsigned long)kva_aligned); - goto out_drop_spa; - } - - if (spa->spg != spg_none) - uva = sp_make_share_kva_to_spg(kva_aligned, spa, spg); - else - uva = sp_make_share_kva_to_task(kva_aligned, spa, mm); - } else { - pr_err_ratelimited("k2u failed, sp group is dead\n"); - uva = ERR_PTR(-ENODEV); - } - up_read(&spg->rw_lock); - - if (!IS_ERR(uva)) - sp_update_process_stat(current, true, spa); + spg = __sp_find_spg(current->pid, spg_id); + if (spg) { + uva = sp_make_share_kva_to_spg(kva_aligned, size_aligned, sp_flags, spg); + sp_group_drop(spg); + } else + uva = ERR_PTR(-ENODEV); + } else + uva = ERR_PTR(to_task);
-finish: - if (!IS_ERR(uva)) { + if (IS_ERR(uva)) + vmalloc_area_clr_flag(kva_aligned, VM_SHAREPOOL); + else uva = uva + (kva - kva_aligned); - } else { - /* associate vma and spa */ - if (!vmalloc_area_clr_flag(spa, kva_aligned, VM_SHAREPOOL)) - pr_debug("k2u_spg clear kva %lx is not valid\n", - (unsigned long)kva_aligned); - } - -out_drop_spa: - __sp_area_drop(spa); -out_drop_spg: - if (spg) - sp_group_drop(spg);
sp_dump_stack(); return uva; @@ -2998,9 +2990,9 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size) sp_dump_stack();
out_clr_flag: - /* deassociate vma and spa */ - if (!vmalloc_area_clr_flag(spa, spa->kva, VM_SHAREPOOL)) + if (!vmalloc_area_clr_flag(spa->kva, VM_SHAREPOOL)) pr_debug("clear spa->kva %ld is not valid\n", spa->kva); + spa->kva = 0;
out_drop_area: __sp_area_drop(spa);
From: Tang Yizhou tangyizhou@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
-------------------------------------------------
Let va_start and va_size be global and not belong to any special group, and the configurations will be applied to all sp_groups.
Signed-off-by: Tang Yizhou tangyizhou@huawei.com Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/linux/share_pool.h | 6 +-- mm/share_pool.c | 99 +++++++++++++++++++------------------- 2 files changed, 50 insertions(+), 55 deletions(-)
diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index 403221c9ac931..c3cf15f5da051 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -93,7 +93,7 @@ struct sp_spg_stat { * tell us which 16G memory range is reserved for share pool . * * In some scenarios where there is no host SVM feature, share pool uses - * the default memory setting for DVPP. + * the default 8G memory setting for DVPP. */ struct sp_group { int id; @@ -111,10 +111,6 @@ struct sp_group { struct task_struct *owner; /* is_alive == false means it's being destroyed */ bool is_alive; - /* dvpp_multi_spaces == true means multiple dvpp 16G spaces are set */ - bool dvpp_multi_spaces; - unsigned long dvpp_va_start; - unsigned long dvpp_size; atomic_t use_count; /* protect the group internal elements, except spa_list */ struct rw_semaphore rw_lock; diff --git a/mm/share_pool.c b/mm/share_pool.c index ed553f64beac9..0d4850c540b67 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -47,6 +47,7 @@ #include <linux/compaction.h> #include <linux/preempt.h> #include <linux/swapops.h> +#include <linux/mmzone.h>
/* access control mode macros */ #define AC_NONE 0 @@ -87,6 +88,10 @@ static int share_pool_group_mode = SINGLE_GROUP_MODE;
static int system_group_count;
+static bool enable_sp_dev_addr; +static unsigned long sp_dev_va_start[MAX_DEVID]; +static unsigned long sp_dev_va_size[MAX_DEVID]; + /* idr of all sp_groups */ static DEFINE_IDR(sp_group_idr); /* rw semaphore for sp_group_idr and mm->sp_group_master */ @@ -534,7 +539,6 @@ struct sp_area { }; static DEFINE_SPINLOCK(sp_area_lock); static struct rb_root sp_area_root = RB_ROOT; -static bool sp_area_customized;
static unsigned long spa_size(struct sp_area *spa) { @@ -872,6 +876,29 @@ int sp_group_id_by_pid(int pid, int *spg_ids, int *num) } EXPORT_SYMBOL_GPL(sp_group_id_by_pid);
+static bool is_online_node_id(int node_id) +{ + pg_data_t *pgdat; + + for_each_online_pgdat(pgdat) { + if (node_id == pgdat->node_id) + return true; + } + return false; +} + +static bool is_device_addr(unsigned long addr) +{ + int i; + + for (i = 0; i < MAX_DEVID; i++) { + if (addr >= sp_dev_va_start[i] && + addr < sp_dev_va_start[i] + sp_dev_va_size[i]) + return true; + } + return false; +} + static loff_t addr_offset(struct sp_area *spa) { unsigned long addr; @@ -882,13 +909,10 @@ static loff_t addr_offset(struct sp_area *spa) } addr = spa->va_start;
- if (sp_area_customized == false) + if (!enable_sp_dev_addr || !is_device_addr(addr)) return (loff_t)(addr - MMAP_SHARE_POOL_START);
- if (spa->spg != spg_none) - return (loff_t)(addr - spa->spg->dvpp_va_start); - else - return (loff_t)(addr - MMAP_SHARE_POOL_START); + return (loff_t)(addr - sp_dev_va_start[spa->node_id]); }
static struct sp_group *create_spg(int spg_id) @@ -919,7 +943,6 @@ static struct sp_group *create_spg(int spg_id) spg->id = spg_id; spg->is_alive = true; spg->proc_num = 0; - spg->dvpp_multi_spaces = false; spg->owner = current->group_leader; atomic_set(&spg->use_count, 1); INIT_LIST_HEAD(&spg->procs); @@ -1375,18 +1398,20 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, unsigned long size_align = PMD_ALIGN(size); /* va aligned to 2M */ int node_id = (flags >> DEVICE_ID_SHIFT) & DEVICE_ID_MASK;
+ if (!is_online_node_id(node_id) || + node_id < 0 || node_id >= MAX_DEVID) { + pr_err_ratelimited("invalid numa node id %d\n", node_id); + return ERR_PTR(-EINVAL); + } + if ((flags & SP_DVPP)) { - if (sp_area_customized == false) { + if (!enable_sp_dev_addr) { vstart = MMAP_SHARE_POOL_16G_START + node_id * MMAP_SHARE_POOL_16G_SIZE; vend = vstart + MMAP_SHARE_POOL_16G_SIZE; } else { - if (!spg) { - pr_err_ratelimited("don't allow k2u(task) in host svm multiprocess scene\n"); - return ERR_PTR(-EINVAL); - } - vstart = spg->dvpp_va_start; - vend = spg->dvpp_va_start + spg->dvpp_size; + vstart = sp_dev_va_start[node_id]; + vend = vstart + sp_dev_va_size[node_id]; } }
@@ -3164,35 +3189,19 @@ EXPORT_SYMBOL_GPL(sp_unregister_notifier); * @device_id: the num of Da-vinci device * @pid: the pid of device process * - * Return true for success, false if parameter invalid of has been set up. + * Return true for success. + * Return false if parameter invalid or has been set up. + * This functuon has no concurrent problem. */ bool sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) { - struct sp_group *spg; - - check_interrupt_context(); - - if (device_id < 0 || device_id >= MAX_DEVID || pid < 0 || size <= 0 || - size > MMAP_SHARE_POOL_16G_SIZE) + if (!is_online_node_id(device_id) || device_id < 0 || device_id >= MAX_DEVID || + pid < 0 || size <= 0 || size > MMAP_SHARE_POOL_16G_SIZE || enable_sp_dev_addr) return false;
- spg = __sp_find_spg(pid, SPG_ID_DEFAULT); - if (!spg) - return false; - - down_write(&spg->rw_lock); - if (!spg_valid(spg) || spg->dvpp_multi_spaces == true) { - up_write(&spg->rw_lock); - return false; - } - spg->dvpp_va_start = start; - spg->dvpp_size = size; - spg->dvpp_multi_spaces = true; - up_write(&spg->rw_lock); - - sp_area_customized = true; - - sp_group_drop(spg); + sp_dev_va_start[device_id] = start; + sp_dev_va_size[device_id] = size; + enable_sp_dev_addr = true; return true; } EXPORT_SYMBOL_GPL(sp_config_dvpp_range); @@ -3212,20 +3221,10 @@ static bool is_sp_normal_addr(unsigned long addr) */ bool is_sharepool_addr(unsigned long addr) { - struct sp_area *spa; - bool ret = false; - - if (sp_area_customized == false) + if (!enable_sp_dev_addr) return is_sp_normal_addr(addr);
- spa = __find_sp_area(addr); - if (spa && spa->spg != spg_none) - ret = is_sp_normal_addr(addr) || - (addr >= spa->spg->dvpp_va_start && - addr < spa->spg->dvpp_va_start + spa->spg->dvpp_size); - - __sp_area_drop(spa); - return ret; + return is_sp_normal_addr(addr) || is_device_addr(addr); } EXPORT_SYMBOL_GPL(is_sharepool_addr);
From: Tang Yizhou tangyizhou@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
-------------------------------------------------
A process may belong to many sp_groups, so we need to check it. Extract is_process_in_group to help us do this.
Signed-off-by: Tang Yizhou tangyizhou@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/share_pool.c | 88 ++++++++++++++++++++++++++----------------------- 1 file changed, 47 insertions(+), 41 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 0d4850c540b67..c969a3faa9e19 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -771,6 +771,23 @@ static struct sp_group *get_first_group(struct mm_struct *mm) return spg; }
+/* + * the caller must: + * 1. hold spg->rw_lock + * 2. ensure no concurrency problem for mm_struct + */ +static struct sp_group_node *is_process_in_group(struct sp_group *spg, + struct mm_struct *mm) +{ + struct sp_group_node *spg_node; + + list_for_each_entry(spg_node, &spg->procs, proc_node) + if (spg_node->master->mm == mm) + return spg_node; + + return NULL; +} + /* user must call sp_group_drop() after use */ static struct sp_group *__sp_find_spg_locked(int pid, int spg_id) { @@ -778,11 +795,11 @@ static struct sp_group *__sp_find_spg_locked(int pid, int spg_id) struct task_struct *tsk = NULL; int ret = 0;
- if (spg_id == SPG_ID_DEFAULT) { - ret = get_task(pid, &tsk); - if (ret) - return NULL; + ret = get_task(pid, &tsk); + if (ret) + return NULL;
+ if (spg_id == SPG_ID_DEFAULT) { /* * Once we encounter a concurrency problem here. * To fix it, we believe get_task_mm() and mmput() is too @@ -793,18 +810,20 @@ static struct sp_group *__sp_find_spg_locked(int pid, int spg_id) spg = NULL; else spg = get_first_group(tsk->mm); - task_unlock(tsk); - - put_task_struct(tsk); } else { spg = idr_find(&sp_group_idr, spg_id); /* don't revive a dead group */ if (!spg || !atomic_inc_not_zero(&spg->use_count)) - spg = NULL; + goto fail; }
+ put_task_struct(tsk); return spg; + +fail: + put_task_struct(tsk); + return NULL; }
static struct sp_group *__sp_find_spg(int pid, int spg_id) @@ -1046,7 +1065,6 @@ static int mm_add_group_init(struct mm_struct *mm, struct sp_group *spg) { struct sp_group_master *master = mm->sp_group_master; bool exist = false; - struct sp_group_node *spg_node;
if (share_pool_group_mode == SINGLE_GROUP_MODE && master && master->count == 1) { @@ -1061,11 +1079,9 @@ static int mm_add_group_init(struct mm_struct *mm, struct sp_group *spg) if (!exist) return 0;
- list_for_each_entry(spg_node, &master->node_list, group_node) { - if (spg_node->spg == spg) { - pr_err_ratelimited("task already in target group, id = %d\n", spg->id); - return -EEXIST; - } + if (is_process_in_group(spg, mm)) { + pr_err_ratelimited("task already in target group, id=%d\n", spg->id); + return -EEXIST; }
if (master->count + 1 == MAX_GROUP_FOR_TASK) { @@ -1794,9 +1810,6 @@ static int sp_free_get_spa(struct sp_free_context *fc) fc->spa = spa;
if (spa->spg != spg_none) { - struct sp_group_node *spg_node; - bool found = false; - /* * Access control: an sp addr can only be freed by * 1. another task in the same spg @@ -1808,17 +1821,12 @@ static int sp_free_get_spa(struct sp_free_context *fc) goto check_spa;
down_read(&spa->spg->rw_lock); - list_for_each_entry(spg_node, &spa->spg->procs, proc_node) { - if (spg_node->master->mm == current->mm) { - found = true; - break; - } - } - up_read(&spa->spg->rw_lock); - if (!found) { + if (!is_process_in_group(spa->spg, current->mm)) { + up_read(&spa->spg->rw_lock); ret = -EPERM; goto drop_spa; } + up_read(&spa->spg->rw_lock);
check_spa: down_write(&spa->spg->rw_lock); @@ -1961,7 +1969,7 @@ static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags, if (spg_id != SPG_ID_DEFAULT) { spg = __sp_find_spg(current->pid, spg_id); if (!spg) { - pr_err_ratelimited("allocation failed, task not in group\n"); + pr_err_ratelimited("allocation failed, can't find group\n"); return -ENODEV; }
@@ -1973,6 +1981,13 @@ static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags, pr_err_ratelimited("allocation failed, spg is dead\n"); return -ENODEV; } + + if (!is_process_in_group(spg, current->mm)) { + up_read(&spg->rw_lock); + sp_group_drop(spg); + pr_err_ratelimited("allocation failed, task not in group\n"); + return -ENODEV; + } } else { /* alocation pass through scene */ spg = spg_none; } @@ -2874,13 +2889,11 @@ EXPORT_SYMBOL_GPL(sp_make_share_u2k); static int sp_unshare_uva(unsigned long uva, unsigned long size) { int ret = 0; - bool found = false; struct mm_struct *mm; struct sp_area *spa; unsigned long uva_aligned; unsigned long size_aligned; unsigned int page_size; - struct sp_group_node *spg_node;
/* * at first we guess it's a hugepage addr @@ -2972,21 +2985,14 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size) down_read(&spa->spg->rw_lock); /* always allow kthread and dvpp channel destroy procedure */ if (current->mm) { - list_for_each_entry(spg_node, &spa->spg->procs, proc_node) { - if (spg_node->master->mm == current->mm) { - found = true; - break; - } + if (!is_process_in_group(spa->spg, current->mm)) { + up_read(&spa->spg->rw_lock); + pr_err_ratelimited("unshare uva(to group) failed, " + "caller process doesn't belong to target group\n"); + ret = -EPERM; + goto out_drop_area; } } - - if (!found) { - up_read(&spa->spg->rw_lock); - pr_err_ratelimited("unshare uva(to group) failed, " - "caller process doesn't belong to target group\n"); - ret = -EPERM; - goto out_drop_area; - } up_read(&spa->spg->rw_lock);
down_write(&spa->spg->rw_lock);
From: Tang Yizhou tangyizhou@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
-------------------------------------------------
If group adding failed, we should release the memory of newly allocated spg_node. sp_group_master and sp_proc_stat don't need to be freed as they are released when process exits.
In addition, sp_proc_stat may not be initialized when group adding is failed. Check this in sp_group_post_exit.
Also adjust the position of access control permission check.
Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com Signed-off-by: Tang Yizhou tangyizhou@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.comdd Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/share_pool.c | 108 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 72 insertions(+), 36 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index c969a3faa9e19..e6f013e3b84ff 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -1061,6 +1061,7 @@ static void sp_munmap_task_areas(struct mm_struct *mm, struct sp_group *spg, str spin_unlock(&sp_area_lock); }
+/* the caller must hold sp_group_sem */ static int mm_add_group_init(struct mm_struct *mm, struct sp_group *spg) { struct sp_group_master *master = mm->sp_group_master; @@ -1092,39 +1093,61 @@ static int mm_add_group_init(struct mm_struct *mm, struct sp_group *spg) return 0; }
-static int mm_add_group_finish(struct mm_struct *mm, struct sp_group *spg, unsigned long prot) +/* the caller must hold sp_group_sem */ +static struct sp_group_node *create_spg_node(struct mm_struct *mm, + unsigned long prot, struct sp_group *spg) { - struct sp_group_master *master; + struct sp_group_master *master = mm->sp_group_master; struct sp_group_node *spg_node;
spg_node = kzalloc(sizeof(struct sp_group_node), GFP_KERNEL); if (spg_node == NULL) { pr_err_ratelimited("no memory for spg node\n"); - return -ENOMEM; + return ERR_PTR(-ENOMEM); }
- master = mm->sp_group_master; INIT_LIST_HEAD(&spg_node->group_node); INIT_LIST_HEAD(&spg_node->proc_node); spg_node->spg = spg; spg_node->master = master; spg_node->prot = prot;
- down_write(&spg->rw_lock); + list_add_tail(&spg_node->group_node, &master->node_list); + master->count++; + + return spg_node; +} + +/* the caller must down_write(&spg->rw_lock) */ +static int insert_spg_node(struct sp_group *spg, struct sp_group_node *node) +{ if (spg->proc_num + 1 == MAX_PROC_PER_GROUP) { - up_write(&spg->rw_lock); pr_err_ratelimited("add group: group reaches max process num\n"); - kfree(spg_node); return -ENOSPC; } + spg->proc_num++; - list_add_tail(&spg_node->proc_node, &spg->procs); - up_write(&spg->rw_lock); + list_add_tail(&node->proc_node, &spg->procs); + return 0; +}
- list_add_tail(&spg_node->group_node, &master->node_list); - master->count++; +/* the caller must down_write(&spg->rw_lock) */ +static void delete_spg_node(struct sp_group *spg, struct sp_group_node *node) +{ + list_del(&node->proc_node); + spg->proc_num--; +}
- return 0; +/* the caller must hold sp_group_sem */ +static void free_spg_node(struct mm_struct *mm, struct sp_group *spg, + struct sp_group_node *spg_node) +{ + struct sp_group_master *master = mm->sp_group_master; + + list_del(&spg_node->group_node); + master->count--; + + kfree(spg_node); }
/** @@ -1147,6 +1170,7 @@ int sp_group_add_task(int pid, unsigned long prot, int spg_id) struct task_struct *tsk; struct mm_struct *mm; struct sp_group *spg; + struct sp_group_node *node = NULL; int ret = 0; bool id_newly_generated = false; struct sp_area *spa, *prev = NULL; @@ -1245,10 +1269,6 @@ int sp_group_add_task(int pid, unsigned long prot, int spg_id) goto out_put_mm; }
- ret = mm_add_group_init(mm, spg); - if (ret) - goto out_drop_group; - /* access control permission check */ if (sysctl_ac_mode == AC_SINGLE_OWNER) { if (spg->owner != current->group_leader) { @@ -1257,15 +1277,31 @@ int sp_group_add_task(int pid, unsigned long prot, int spg_id) } }
+ ret = mm_add_group_init(mm, spg); + if (ret) + goto out_drop_group; + + node = create_spg_node(mm, prot, spg); + if (unlikely(IS_ERR(node))) { + ret = PTR_ERR(node); + goto out_drop_spg_node; + } + /* per process statistics initialization */ stat = sp_init_process_stat(tsk, mm, spg); if (IS_ERR(stat)) { ret = PTR_ERR(stat); pr_err_ratelimited("init process stat failed %lx\n", PTR_ERR(stat)); - goto out_drop_group; + goto out_drop_spg_node; }
down_write(&spg->rw_lock); + ret = insert_spg_node(spg, node); + if (unlikely(ret)) { + up_write(&spg->rw_lock); + goto out_drop_spg_node; + } + /* * create mappings of existing shared memory segments into this * new process' page table. @@ -1338,21 +1374,25 @@ int sp_group_add_task(int pid, unsigned long prot, int spg_id) } __sp_area_drop_locked(prev); spin_unlock(&sp_area_lock); + + if (unlikely(ret)) + delete_spg_node(spg, node); up_write(&spg->rw_lock);
- /* no need to free spg_proc_stat, will be freed when process exits */ +out_drop_spg_node: + if (unlikely(ret)) + free_spg_node(mm, spg, node); + /* + * to simplify design, we don't release the resource of + * group_master and proc_stat, they will be freed when + * process is exiting. + */ out_drop_group: if (unlikely(ret)) { - if (mm->sp_group_master->count == 0) { - kfree(mm->sp_group_master); - mm->sp_group_master = NULL; - } up_write(&sp_group_sem); sp_group_drop(spg); - } else { - mm_add_group_finish(mm, spg, prot); + } else up_write(&sp_group_sem); - } out_put_mm: /* No need to put the mm if the sp group adds this mm successfully */ if (unlikely(ret)) @@ -3834,13 +3874,6 @@ void sp_group_post_exit(struct mm_struct *mm) if (!enable_ascend_share_pool || !master) return;
- stat = sp_get_proc_stat(master->sp_stat_id); - if (stat) { - alloc_size = atomic64_read(&stat->alloc_size); - k2u_size = atomic64_read(&stat->k2u_size); - } else - WARN(1, "can't find sp proc stat\n"); - /* * There are two basic scenarios when a process in the share pool is * exiting but its share pool memory usage is not 0. @@ -3856,18 +3889,21 @@ void sp_group_post_exit(struct mm_struct *mm) * A process not in an sp group doesn't need to print because there * wont't be any memory which is not freed. */ - if (master) { + stat = sp_get_proc_stat(master->sp_stat_id); + if (stat) { + alloc_size = atomic64_read(&stat->alloc_size); + k2u_size = atomic64_read(&stat->k2u_size); + if (alloc_size != 0 || k2u_size != 0) pr_info("process %s(%d) exits. " "It applied %ld aligned KB, k2u shared %ld aligned KB\n", stat->comm, master->sp_stat_id, byte2kb(alloc_size), byte2kb(k2u_size));
+ /* match with sp_init_proc_stat, we expect stat is released after this call */ + sp_proc_stat_drop(stat); }
- /* match with sp_init_proc_stat, we expect stat is released after this call */ - sp_proc_stat_drop(stat); - /* lockless traverse */ list_for_each_entry_safe(spg_node, tmp, &master->node_list, group_node) { spg = spg_node->spg;
From: Tang Yizhou tangyizhou@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
-------------------------------------------------
To simplify the design, we can put the pointer of sp_proc_stat in sp_group_master so we don't need call proc_stat = idr_find(id).
Signed-off-by: Tang Yizhou tangyizhou@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/linux/share_pool.h | 44 ++++++++++++------------ mm/oom_kill.c | 2 +- mm/share_pool.c | 70 ++++++++++++++------------------------ 3 files changed, 48 insertions(+), 68 deletions(-)
diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index c3cf15f5da051..fc9f411f7a33c 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -73,6 +73,25 @@ struct sp_spg_stat { DECLARE_HASHTABLE(hash, SP_SPG_HASH_BITS); };
+/* we estimate a process ususally belongs to at most 16 sp-group */ +#define SP_PROC_HASH_BITS 4 + +/* per process memory usage statistics indexed by tgid */ +struct sp_proc_stat { + atomic_t use_count; + int tgid; + struct mm_struct *mm; + struct mutex lock; /* protect hashtable */ + DECLARE_HASHTABLE(hash, SP_PROC_HASH_BITS); + char comm[TASK_COMM_LEN]; + /* + * alloc amount minus free amount, may be negative when freed by + * another task in the same sp group. + */ + atomic64_t alloc_size; + atomic64_t k2u_size; +}; + /* Processes in the same sp_group can share memory. * Memory layout for share pool: * @@ -123,10 +142,10 @@ struct sp_group_master { * a.k.a the number of sp_node in node_list */ unsigned int count; - int sp_stat_id; /* list head of sp_node */ struct list_head node_list; struct mm_struct *mm; + struct sp_proc_stat *stat; };
/* @@ -154,25 +173,6 @@ struct sp_walk_data { pmd_t *pmd; };
-/* we estimate a process ususally belongs to at most 16 sp-group */ -#define SP_PROC_HASH_BITS 4 - -/* per process memory usage statistics indexed by tgid */ -struct sp_proc_stat { - atomic_t use_count; - int tgid; - struct mm_struct *mm; - struct mutex lock; /* protect hashtable */ - DECLARE_HASHTABLE(hash, SP_PROC_HASH_BITS); - char comm[TASK_COMM_LEN]; - /* - * alloc amount minus free amount, may be negative when freed by - * another task in the same sp group. - */ - atomic64_t alloc_size; - atomic64_t k2u_size; -}; - #define MAP_SHARE_POOL 0x100000
#define MMAP_TOP_4G_SIZE 0x100000000UL @@ -221,7 +221,7 @@ extern int sp_register_notifier(struct notifier_block *nb); extern int sp_unregister_notifier(struct notifier_block *nb); extern bool sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid); extern bool is_sharepool_addr(unsigned long addr); -extern struct sp_proc_stat *sp_get_proc_stat_ref(int tgid); +extern struct sp_proc_stat *sp_get_proc_stat_ref(struct mm_struct *mm); extern void sp_proc_stat_drop(struct sp_proc_stat *stat); extern void spa_overview_show(struct seq_file *seq); extern void spg_overview_show(struct seq_file *seq); @@ -426,7 +426,7 @@ static inline bool is_sharepool_addr(unsigned long addr) return false; }
-static inline struct sp_proc_stat *sp_get_proc_stat_ref(int tgid) +static inline struct sp_proc_stat *sp_get_proc_stat_ref(struct mm_struct *mm) { return NULL; } diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 19b0b266437c4..2799a47105014 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -481,7 +481,7 @@ static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask) }
if (ascend_sp_oom_show()) { - stat = sp_get_proc_stat_ref(task->tgid); + stat = sp_get_proc_stat_ref(task->mm);
pr_cont("[%7d] %5d %5d %8lu %8lu ", task->pid, from_kuid(&init_user_ns, task_uid(task)), diff --git a/mm/share_pool.c b/mm/share_pool.c index e6f013e3b84ff..b7af4752879f0 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -135,7 +135,7 @@ static struct sp_group_master *sp_init_group_master_locked(
INIT_LIST_HEAD(&master->node_list); master->count = 0; - master->sp_stat_id = 0; + master->stat = NULL; master->mm = mm; mm->sp_group_master = master;
@@ -143,48 +143,37 @@ static struct sp_group_master *sp_init_group_master_locked( return master; }
-/* The caller must hold sp_stat_sem */ -static struct sp_proc_stat *sp_get_proc_stat_locked(int tgid) +static struct sp_proc_stat *sp_get_proc_stat(struct mm_struct *mm) { struct sp_proc_stat *stat;
- stat = idr_find(&sp_proc_stat_idr, tgid); - - /* maybe NULL or not, we always return it */ - return stat; -} - -/* The caller must hold sp_stat_sem */ -static struct sp_proc_stat *sp_get_proc_stat_ref_locked(int tgid) -{ - struct sp_proc_stat *stat; + if (!mm->sp_group_master) + return NULL;
- stat = idr_find(&sp_proc_stat_idr, tgid); - if (!stat || !atomic_inc_not_zero(&stat->use_count)) - stat = NULL; + down_read(&sp_proc_stat_sem); + stat = mm->sp_group_master->stat; + up_read(&sp_proc_stat_sem);
/* maybe NULL or not, we always return it */ return stat; }
-static struct sp_proc_stat *sp_get_proc_stat(int tgid) +/* user must call sp_proc_stat_drop() after use */ +struct sp_proc_stat *sp_get_proc_stat_ref(struct mm_struct *mm) { struct sp_proc_stat *stat;
+ if (!mm->sp_group_master) + return NULL; + down_read(&sp_proc_stat_sem); - stat = sp_get_proc_stat_locked(tgid); + stat = mm->sp_group_master->stat; up_read(&sp_proc_stat_sem); - return stat; -}
-/* user must call sp_proc_stat_drop() after use */ -struct sp_proc_stat *sp_get_proc_stat_ref(int tgid) -{ - struct sp_proc_stat *stat; + if (!stat || !atomic_inc_not_zero(&stat->use_count)) + stat = NULL;
- down_read(&sp_proc_stat_sem); - stat = sp_get_proc_stat_ref_locked(tgid); - up_read(&sp_proc_stat_sem); + /* maybe NULL or not, we always return it */ return stat; }
@@ -215,22 +204,13 @@ static struct sp_proc_stat *sp_init_proc_stat(struct sp_group_master *master, struct mm_struct *mm, struct task_struct *tsk) { struct sp_proc_stat *stat; - int id, alloc_id, tgid = tsk->tgid; + int alloc_id, tgid = tsk->tgid;
down_write(&sp_proc_stat_sem); - id = master->sp_stat_id; - if (id) { - /* may have been initialized */ - stat = sp_get_proc_stat_locked(tgid); + stat = master->stat; + if (stat) { up_write(&sp_proc_stat_sem); - if (stat) { - return stat; - } else { - up_write(&sp_proc_stat_sem); - /* if enter this branch, that's our mistake */ - WARN(1, "proc stat invalid id %d\n", id); - return ERR_PTR(-EBUSY); - } + return stat; }
stat = create_proc_stat(mm, tsk); @@ -247,7 +227,7 @@ static struct sp_proc_stat *sp_init_proc_stat(struct sp_group_master *master, return ERR_PTR(alloc_id); }
- master->sp_stat_id = alloc_id; + master->stat = stat; up_write(&sp_proc_stat_sem);
return stat; @@ -3341,7 +3321,7 @@ static void free_sp_proc_stat(struct sp_proc_stat *stat) free_process_spg_proc_stat(stat);
down_write(&sp_proc_stat_sem); - stat->mm->sp_group_master->sp_stat_id = 0; + stat->mm->sp_group_master->stat = NULL; idr_remove(&sp_proc_stat_idr, stat->tgid); up_write(&sp_proc_stat_sem); kfree(stat); @@ -3372,7 +3352,7 @@ int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, up_read(&spg->rw_lock);
/* eliminate potential ABBA deadlock */ - stat = sp_get_proc_stat_ref(task->mm->sp_group_master->sp_stat_id); + stat = sp_get_proc_stat_ref(task->mm); if (unlikely(!stat)) { sp_group_drop(spg); return 0; @@ -3889,7 +3869,7 @@ void sp_group_post_exit(struct mm_struct *mm) * A process not in an sp group doesn't need to print because there * wont't be any memory which is not freed. */ - stat = sp_get_proc_stat(master->sp_stat_id); + stat = sp_get_proc_stat(mm); if (stat) { alloc_size = atomic64_read(&stat->alloc_size); k2u_size = atomic64_read(&stat->k2u_size); @@ -3897,7 +3877,7 @@ void sp_group_post_exit(struct mm_struct *mm) if (alloc_size != 0 || k2u_size != 0) pr_info("process %s(%d) exits. " "It applied %ld aligned KB, k2u shared %ld aligned KB\n", - stat->comm, master->sp_stat_id, + stat->comm, stat->tgid, byte2kb(alloc_size), byte2kb(k2u_size));
/* match with sp_init_proc_stat, we expect stat is released after this call */
From: Tang Yizhou tangyizhou@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
-------------------------------------------------
Let /proc/$pid/sp_group show multi-group info.
Signed-off-by: Tang Yizhou tangyizhou@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/share_pool.c | 179 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 128 insertions(+), 51 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index b7af4752879f0..b2d890936388b 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -3334,45 +3334,132 @@ void sp_proc_stat_drop(struct sp_proc_stat *stat) free_sp_proc_stat(stat); }
-int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, - struct pid *pid, struct task_struct *task) +static void get_mm_rss_info(struct mm_struct *mm, unsigned long *anon, + unsigned long *file, unsigned long *shmem, unsigned long *total_rss) { - struct sp_group *spg = NULL; - struct sp_proc_stat *stat; - int spg_id, hugepage_failures; + *anon = get_mm_counter(mm, MM_ANONPAGES); + *file = get_mm_counter(mm, MM_FILEPAGES); + *shmem = get_mm_counter(mm, MM_SHMEMPAGES); + *total_rss = *anon + *file + *shmem; +}
- spg = __sp_find_spg(task->pid, SPG_ID_DEFAULT); - if (!spg) - return 0; +static long get_proc_alloc(struct sp_proc_stat *stat) +{ + return byte2kb(atomic64_read(&stat->alloc_size)); +}
- down_read(&spg->rw_lock); - if (spg_valid(spg)) { - spg_id = spg->id; - hugepage_failures = atomic_read(&spg->stat->hugepage_failures); - up_read(&spg->rw_lock); +static long get_proc_k2u(struct sp_proc_stat *stat) +{ + return byte2kb(atomic64_read(&stat->k2u_size)); +}
- /* eliminate potential ABBA deadlock */ - stat = sp_get_proc_stat_ref(task->mm); - if (unlikely(!stat)) { - sp_group_drop(spg); - return 0; - } +static long get_spg_alloc(struct sp_spg_stat *stat) +{ + return byte2kb(atomic64_read(&stat->alloc_size)); +}
- /* print the file header */ - seq_printf(m, "%-8s %-9s %-13s\n", - "Group_ID", "SP_ALLOC", "HugePage Fail"); - seq_printf(m, "%-8d %-9ld %-13d\n", - spg_id, - byte2kb(atomic64_read(&stat->alloc_size)), - hugepage_failures); +static long get_spg_alloc_nsize(struct sp_spg_stat *stat) +{ + return byte2kb(atomic64_read(&stat->alloc_nsize)); +}
- sp_proc_stat_drop(stat); - sp_group_drop(spg); +static long get_spg_proc_alloc(struct spg_proc_stat *stat) +{ + return byte2kb(atomic64_read(&stat->alloc_size)); +} + +static long get_spg_proc_k2u(struct spg_proc_stat *stat) +{ + return byte2kb(atomic64_read(&stat->k2u_size)); +} + +static void get_process_sp_res(struct sp_proc_stat *stat, + long *sp_res_out, long *sp_res_nsize_out) +{ + int i; + struct spg_proc_stat *spg_proc_stat; + struct sp_spg_stat *spg_stat; + long sp_res = 0, sp_res_nsize = 0; + + mutex_lock(&stat->lock); + hash_for_each(stat->hash, i, spg_proc_stat, pnode) { + spg_stat = spg_proc_stat->spg_stat; + sp_res += get_spg_alloc(spg_stat); + sp_res_nsize += get_spg_alloc_nsize(spg_stat); + } + mutex_unlock(&stat->lock); + + *sp_res_out = sp_res; + *sp_res_nsize_out = sp_res_nsize; +} + +/* + * Statistics of RSS has a maximum 64 pages deviation (256KB). + * Please check_sync_rss_stat(). + */ +static void get_process_non_sp_res(unsigned long total_rss, unsigned long shmem, + long sp_res_nsize, long *non_sp_res_out, long *non_sp_shm_out) +{ + long non_sp_res, non_sp_shm; + + non_sp_res = page2kb(total_rss) - sp_res_nsize; + non_sp_res = non_sp_res < 0 ? 0 : non_sp_res; + non_sp_shm = page2kb(shmem) - sp_res_nsize; + non_sp_shm = non_sp_shm < 0 ? 0 : non_sp_shm; + + *non_sp_res_out = non_sp_res; + *non_sp_shm_out = non_sp_shm; +} + +static long get_sp_res_by_spg_proc(struct spg_proc_stat *stat) +{ + return byte2kb(atomic64_read(&stat->spg_stat->alloc_size)); +} + +int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task) +{ + struct mm_struct *mm = task->mm; + struct sp_group_master *master = mm->sp_group_master; + struct sp_proc_stat *proc_stat; + struct spg_proc_stat *spg_proc_stat; + int i; + unsigned long anon, file, shmem, total_rss; + long sp_res, sp_res_nsize, non_sp_res, non_sp_shm; + + if (!master) return 0; + + get_mm_rss_info(mm, &anon, &file, &shmem, &total_rss); + proc_stat = master->stat; + get_process_sp_res(proc_stat, &sp_res, &sp_res_nsize); + get_process_non_sp_res(total_rss, shmem, sp_res_nsize, + &non_sp_res, &non_sp_shm); + + seq_puts(m, "Share Pool Aggregate Data of This Process\n\n"); + seq_printf(m, "%-8s %-16s %-9s %-9s %-9s %-10s %-10s %-8s\n", + "PID", "COMM", "SP_ALLOC", "SP_K2U", "SP_RES", "Non-SP_RES", + "Non-SP_Shm", "VIRT"); + seq_printf(m, "%-8d %-16s %-9ld %-9ld %-9ld %-10ld %-10ld %-8ld\n", + proc_stat->tgid, proc_stat->comm, + get_proc_alloc(proc_stat), + get_proc_k2u(proc_stat), + sp_res, non_sp_res, non_sp_shm, + page2kb(mm->total_vm)); + + seq_puts(m, "\n\nProcess in Each SP Group\n\n"); + seq_printf(m, "%-8s %-9s %-9s %-9s\n", + "Group_ID", "SP_ALLOC", "SP_K2U", "SP_RES"); + mutex_lock(&proc_stat->lock); + hash_for_each(proc_stat->hash, i, spg_proc_stat, pnode) { + seq_printf(m, "%-8d %-9ld %-9ld %-9ld\n", + spg_proc_stat->spg_id, + get_spg_proc_alloc(spg_proc_stat), + get_spg_proc_k2u(spg_proc_stat), + get_sp_res_by_spg_proc(spg_proc_stat)); } - up_read(&spg->rw_lock); + mutex_unlock(&proc_stat->lock);
- sp_group_drop(spg); return 0; }
@@ -3573,7 +3660,7 @@ static int idr_proc_stat_cb(int id, void *p, void *data) * non_sp_shm: resident shared memory size size excluding share pool * memory */ - long sp_alloc_nsize, non_sp_res, sp_res, non_sp_shm; + long sp_res, sp_res_nsize, non_sp_res, non_sp_shm;
mutex_lock(&spg_stat->lock); hash_for_each(spg_stat->hash, i, spg_proc_stat, gnode) { @@ -3581,30 +3668,20 @@ static int idr_proc_stat_cb(int id, void *p, void *data) tgid = proc_stat->tgid; mm = proc_stat->mm;
- anon = get_mm_counter(mm, MM_ANONPAGES); - file = get_mm_counter(mm, MM_FILEPAGES); - shmem = get_mm_counter(mm, MM_SHMEMPAGES); - total_rss = anon + file + shmem; - - /* - * Statistics of RSS has a maximum 64 pages deviation (256KB). - * Please check_sync_rss_stat(). - */ - sp_alloc_nsize = byte2kb(atomic64_read(&spg_stat->alloc_nsize)); - sp_res = byte2kb(atomic64_read(&spg_stat->alloc_size)); - non_sp_res = page2kb(total_rss) - sp_alloc_nsize; - non_sp_res = non_sp_res < 0 ? 0 : non_sp_res; - non_sp_shm = page2kb(shmem) - sp_alloc_nsize; - non_sp_shm = non_sp_shm < 0 ? 0 : non_sp_shm; + get_mm_rss_info(mm, &anon, &file, &shmem, &total_rss); + get_process_sp_res(proc_stat, &sp_res, &sp_res_nsize); + get_process_non_sp_res(total_rss, shmem, sp_res_nsize, + &non_sp_res, &non_sp_shm);
seq_printf(seq, "%-8d ", tgid); if (id == 0) seq_printf(seq, "%-8c ", '-'); else seq_printf(seq, "%-8d ", id); - seq_printf(seq, "%-9ld %-9ld %-9ld %-10ld %-8ld %-7ld %-7ld %-10ld\n", - byte2kb(atomic64_read(&spg_proc_stat->alloc_size)), - byte2kb(atomic64_read(&spg_proc_stat->k2u_size)), + seq_printf(seq, "%-9ld %-9ld %-9ld %-10ld %-10ld %-8ld %-7ld %-7ld %-10ld\n", + get_spg_proc_alloc(spg_proc_stat), + get_spg_proc_k2u(spg_proc_stat), + get_sp_res_by_spg_proc(spg_proc_stat), sp_res, non_sp_res, page2kb(mm->total_vm), page2kb(total_rss), page2kb(shmem), non_sp_shm); @@ -3618,8 +3695,8 @@ static int proc_stat_show(struct seq_file *seq, void *offset) spg_overview_show(seq); spa_overview_show(seq); /* print the file header */ - seq_printf(seq, "%-8s %-8s %-9s %-9s %-9s %-10s %-8s %-7s %-7s %-10s\n", - "PID", "Group_ID", "SP_ALLOC", "SP_K2U", "SP_RES", + seq_printf(seq, "%-8s %-8s %-9s %-9s %-9s %-10s %-10s %-8s %-7s %-7s %-10s\n", + "PID", "Group_ID", "SP_ALLOC", "SP_K2U", "SP_RES", "SP_RES_T", "Non-SP_RES", "VIRT", "RES", "Shm", "Non-SP_Shm"); /* print kthread buff_module_guard_work */ seq_printf(seq, "%-8s %-8s %-9ld %-9ld\n",
From: Tang Yizhou tangyizhou@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
-------------------------------------------------
/proc/sharepool/proc_overview supports to show process overview info.
Signed-off-by: Tang Yizhou tangyizhou@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/share_pool.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+)
diff --git a/mm/share_pool.c b/mm/share_pool.c index b2d890936388b..7716bb12de167 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -3711,6 +3711,40 @@ static int proc_stat_show(struct seq_file *seq, void *offset) return 0; }
+static int idr_proc_overview_cb(int id, void *p, void *data) +{ + struct sp_proc_stat *proc_stat = p; + struct seq_file *seq = data; + struct mm_struct *mm = proc_stat->mm; + unsigned long anon, file, shmem, total_rss; + long sp_res, sp_res_nsize, non_sp_res, non_sp_shm; + + get_mm_rss_info(mm, &anon, &file, &shmem, &total_rss); + get_process_sp_res(proc_stat, &sp_res, &sp_res_nsize); + get_process_non_sp_res(total_rss, shmem, sp_res_nsize, + &non_sp_res, &non_sp_shm); + + seq_printf(seq, "%-8d %-16s %-9ld %-9ld %-9ld %-10ld %-10ld %-8ld\n", + id, proc_stat->comm, + get_proc_alloc(proc_stat), + get_proc_k2u(proc_stat), + sp_res, non_sp_res, non_sp_shm, + page2kb(mm->total_vm)); + return 0; +} + +static int proc_overview_show(struct seq_file *seq, void *offset) +{ + seq_printf(seq, "%-8s %-16s %-9s %-9s %-9s %-10s %-10s %-8s\n", + "PID", "COMM", "SP_ALLOC", "SP_K2U", "SP_RES", "Non-SP_RES", + "Non-SP_Shm", "VIRT"); + + down_read(&sp_proc_stat_sem); + idr_for_each(&sp_proc_stat_idr, idr_proc_overview_cb, seq); + up_read(&sp_proc_stat_sem); + return 0; +} + /* * Called by proc_root_init() to initialize the /proc/sharepool subtree */ @@ -3721,6 +3755,7 @@ void __init proc_sharepool_init(void)
proc_create_single_data("sharepool/proc_stat", 0400, NULL, proc_stat_show, NULL); proc_create_single_data("sharepool/spa_stat", 0400, NULL, spa_stat_show, NULL); + proc_create_single_data("sharepool/proc_overview", 0400, NULL, proc_overview_show, NULL); }
/*** End of tatistical and maintenance functions ***/
From: Tang Yizhou tangyizhou@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
-------------------------------------------------
Process prot will be shown in column "PROT" of /proc/sharepool/proc_stat and /proc/$pid/sp_group.
Signed-off-by: Tang Yizhou tangyizhou@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/share_pool.c | 54 +++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 46 insertions(+), 8 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 7716bb12de167..9aa65349168cd 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -3416,6 +3416,31 @@ static long get_sp_res_by_spg_proc(struct spg_proc_stat *stat) return byte2kb(atomic64_read(&stat->spg_stat->alloc_size)); }
+static unsigned long get_process_prot_locked(int spg_id, struct mm_struct *mm) +{ + unsigned long prot = 0; + struct sp_group_node *spg_node; + struct sp_group_master *master = mm->sp_group_master; + + list_for_each_entry(spg_node, &master->node_list, group_node) { + if (spg_node->spg->id == spg_id) { + prot = spg_node->prot; + break; + } + } + return prot; +} + +static void print_process_prot(struct seq_file *seq, unsigned long prot) +{ + if (prot == PROT_READ) + seq_puts(seq, "R"); + else if (prot == (PROT_READ | PROT_WRITE)) + seq_puts(seq, "RW"); + else /* e.g. spg_none */ + seq_puts(seq, "-"); +} + int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { @@ -3424,7 +3449,7 @@ int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, struct sp_proc_stat *proc_stat; struct spg_proc_stat *spg_proc_stat; int i; - unsigned long anon, file, shmem, total_rss; + unsigned long anon, file, shmem, total_rss, prot; long sp_res, sp_res_nsize, non_sp_res, non_sp_shm;
if (!master) @@ -3448,17 +3473,24 @@ int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, page2kb(mm->total_vm));
seq_puts(m, "\n\nProcess in Each SP Group\n\n"); - seq_printf(m, "%-8s %-9s %-9s %-9s\n", - "Group_ID", "SP_ALLOC", "SP_K2U", "SP_RES"); + seq_printf(m, "%-8s %-9s %-9s %-9s %-4s\n", + "Group_ID", "SP_ALLOC", "SP_K2U", "SP_RES", "PROT"); + + /* to prevent ABBA deadlock, first hold sp_group_sem */ + down_read(&sp_group_sem); mutex_lock(&proc_stat->lock); hash_for_each(proc_stat->hash, i, spg_proc_stat, pnode) { - seq_printf(m, "%-8d %-9ld %-9ld %-9ld\n", + prot = get_process_prot_locked(spg_proc_stat->spg_id, mm); + seq_printf(m, "%-8d %-9ld %-9ld %-9ld ", spg_proc_stat->spg_id, get_spg_proc_alloc(spg_proc_stat), get_spg_proc_k2u(spg_proc_stat), get_sp_res_by_spg_proc(spg_proc_stat)); + print_process_prot(m, prot); + seq_putc(m, '\n'); } mutex_unlock(&proc_stat->lock); + up_read(&sp_group_sem);
return 0; } @@ -3652,7 +3684,7 @@ static int idr_proc_stat_cb(int id, void *p, void *data) struct spg_proc_stat *spg_proc_stat;
struct mm_struct *mm; - unsigned long anon, file, shmem, total_rss; + unsigned long anon, file, shmem, total_rss, prot; /* * non_sp_res: resident memory size excluding share pool memory * sp_res: resident memory size of share pool, including normal @@ -3662,6 +3694,8 @@ static int idr_proc_stat_cb(int id, void *p, void *data) */ long sp_res, sp_res_nsize, non_sp_res, non_sp_shm;
+ /* to prevent ABBA deadlock, first hold sp_group_sem */ + down_read(&sp_group_sem); mutex_lock(&spg_stat->lock); hash_for_each(spg_stat->hash, i, spg_proc_stat, gnode) { proc_stat = spg_proc_stat->proc_stat; @@ -3672,21 +3706,25 @@ static int idr_proc_stat_cb(int id, void *p, void *data) get_process_sp_res(proc_stat, &sp_res, &sp_res_nsize); get_process_non_sp_res(total_rss, shmem, sp_res_nsize, &non_sp_res, &non_sp_shm); + prot = get_process_prot_locked(id, mm);
seq_printf(seq, "%-8d ", tgid); if (id == 0) seq_printf(seq, "%-8c ", '-'); else seq_printf(seq, "%-8d ", id); - seq_printf(seq, "%-9ld %-9ld %-9ld %-10ld %-10ld %-8ld %-7ld %-7ld %-10ld\n", + seq_printf(seq, "%-9ld %-9ld %-9ld %-10ld %-10ld %-8ld %-7ld %-7ld %-10ld ", get_spg_proc_alloc(spg_proc_stat), get_spg_proc_k2u(spg_proc_stat), get_sp_res_by_spg_proc(spg_proc_stat), sp_res, non_sp_res, page2kb(mm->total_vm), page2kb(total_rss), page2kb(shmem), non_sp_shm); + print_process_prot(seq, prot); + seq_putc(seq, '\n'); } mutex_unlock(&spg_stat->lock); + up_read(&sp_group_sem); return 0; }
@@ -3695,9 +3733,9 @@ static int proc_stat_show(struct seq_file *seq, void *offset) spg_overview_show(seq); spa_overview_show(seq); /* print the file header */ - seq_printf(seq, "%-8s %-8s %-9s %-9s %-9s %-10s %-10s %-8s %-7s %-7s %-10s\n", + seq_printf(seq, "%-8s %-8s %-9s %-9s %-9s %-10s %-10s %-8s %-7s %-7s %-10s %-4s\n", "PID", "Group_ID", "SP_ALLOC", "SP_K2U", "SP_RES", "SP_RES_T", - "Non-SP_RES", "VIRT", "RES", "Shm", "Non-SP_Shm"); + "Non-SP_RES", "VIRT", "RES", "Shm", "Non-SP_Shm", "PROT"); /* print kthread buff_module_guard_work */ seq_printf(seq, "%-8s %-8s %-9ld %-9ld\n", "guard", "-",
From: Tang Yizhou tangyizhou@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
-------------------------------------------------
/proc/sys/vm/sharepool_perf_alloc allows us to track the time consuming of sp_alloc.
Signed-off-by: Tang Yizhou tangyizhou@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/linux/share_pool.h | 1 + kernel/sysctl.c | 9 +++++++ mm/share_pool.c | 48 +++++++++++++++++++++++++++++++++++--- 3 files changed, 55 insertions(+), 3 deletions(-)
diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index fc9f411f7a33c..e118ac1cf15ca 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -45,6 +45,7 @@ extern int sysctl_share_pool_map_lock_enable; extern int sysctl_sp_compact_enable; extern unsigned long sysctl_sp_compact_interval; extern unsigned long sysctl_sp_compact_interval_max; +extern int sysctl_sp_perf_alloc;
#ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC extern bool vmap_allow_huge; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index c5d4395efd430..165d5ccd384f7 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1807,6 +1807,15 @@ static struct ctl_table vm_table[] = { .extra1 = &zero_ul, .extra2 = &sysctl_sp_compact_interval_max, }, + { + .procname = "sharepool_perf_alloc", + .data = &sysctl_sp_perf_alloc, + .maxlen = sizeof(sysctl_sp_perf_alloc), + .mode = 0600, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &ten_thousand, + }, #endif { } }; diff --git a/mm/share_pool.c b/mm/share_pool.c index 9aa65349168cd..716b3ea7c960d 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -48,6 +48,8 @@ #include <linux/preempt.h> #include <linux/swapops.h> #include <linux/mmzone.h> +#include <linux/timekeeping.h> +#include <linux/time64.h>
/* access control mode macros */ #define AC_NONE 0 @@ -68,6 +70,9 @@
#define GROUP_NONE 0
+#define SEC2US(sec) ((sec) * 1000000) +#define NS2US(ns) ((ns) / 1000) + #define PF_DOMAIN_CORE 0x10000000 /* AOS CORE processes in sched.h */
/* mdc scene hack */ @@ -84,6 +89,8 @@ int sysctl_sp_debug_mode;
int sysctl_share_pool_map_lock_enable;
+int sysctl_sp_perf_alloc; + static int share_pool_group_mode = SINGLE_GROUP_MODE;
static int system_group_count; @@ -1955,8 +1962,38 @@ struct sp_alloc_context { unsigned long populate; int state; bool need_fallocate; + struct timespec64 start; + struct timespec64 end; };
+static void trace_sp_alloc_begin(struct sp_alloc_context *ac) +{ + if (!sysctl_sp_perf_alloc) + return; + + ktime_get_ts64(&ac->start); +} + +static void trace_sp_alloc_finish(struct sp_alloc_context *ac, unsigned long va) +{ + unsigned long cost; + bool is_pass_through = ac->spg == spg_none ? true : false; + + if (!sysctl_sp_perf_alloc) + return; + + ktime_get_ts64(&ac->end); + + cost = SEC2US(ac->end.tv_sec - ac->start.tv_sec) + + NS2US(ac->end.tv_nsec - ac->start.tv_nsec); + if (cost >= (unsigned long)sysctl_sp_perf_alloc) { + pr_err("Task %s(%d/%d) sp_alloc returns 0x%lx consumes %luus, size is %luKB, " + "size_aligned is %luKB, sp_flags is %lx, pass through is %d\n", + current->comm, current->tgid, current->pid, + va, cost, byte2kb(ac->size), byte2kb(ac->size_aligned), ac->sp_flags, is_pass_through); + } +} + static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags, int spg_id, struct sp_alloc_context *ac) { @@ -1964,6 +2001,8 @@ static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags,
check_interrupt_context();
+ trace_sp_alloc_begin(ac); + /* mdc scene hack */ if (enable_mdc_default_group) spg_id = mdc_default_group_id; @@ -2210,9 +2249,11 @@ static int sp_alloc_mmap_populate(struct sp_area *spa, return ret; }
-/* spa maybe an error pointer, so introduce param spg */ -static void sp_alloc_finish(int result, struct sp_area *spa, struct sp_group *spg) +/* spa maybe an error pointer, so introduce variable spg */ +static void sp_alloc_finish(int result, struct sp_area *spa, + struct sp_alloc_context *ac) { + struct sp_group *spg = ac->spg; bool is_pass_through = spg == spg_none ? true : false;
/* match sp_alloc_check_prepare */ @@ -2229,6 +2270,7 @@ static void sp_alloc_finish(int result, struct sp_area *spa, struct sp_group *sp if (!is_pass_through) sp_group_drop(spg);
+ trace_sp_alloc_finish(ac, spa->va_start); sp_dump_stack(); sp_try_to_compact(); } @@ -2270,7 +2312,7 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) goto try_again;
out: - sp_alloc_finish(ret, spa, ac.spg); + sp_alloc_finish(ret, spa, &ac); if (ret) return ERR_PTR(ret); else
From: Tang Yizhou tangyizhou@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
-------------------------------------------------
Refactor sp_k2u to improve its readability. 1. Introduce struct sp_k2u_context to save mapping parameters. 2. Extract sp_k2u_prepare to check input parameters of sp_k2u and initialize sp_k2u_context instance. 3. Extract sp_k2u_finish.
Signed-off-by: Tang Yizhou tangyizhou@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/share_pool.c | 110 +++++++++++++++++++++++++++++++----------------- 1 file changed, 71 insertions(+), 39 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 716b3ea7c960d..824163e764e63 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -2574,42 +2574,30 @@ static int is_k2task(int spg_id) return (spg_id == SPG_ID_DEFAULT || spg_id == SPG_ID_NONE) ? 1 : 0; }
-/** - * sp_make_share_k2u() - Share kernel memory to current process or an sp_group. - * @kva: the VA of shared kernel memory. - * @size: the size of shared kernel memory. - * @sp_flags: how to allocate the memory. We only support SP_DVPP. - * @pid: the pid of the specified process (Not currently in use). - * @spg_id: the share group that the memory is shared to. - * - * Return: the shared target user address to start at - * - * Share kernel memory to current task if spg_id == SPG_ID_NONE - * or SPG_ID_DEFAULT in multi-group mode. - * - * Return: - * * if succeed, return the shared user address to start at. - * * if fail, return the pointer of -errno. - */ -void *sp_make_share_k2u(unsigned long kva, unsigned long size, - unsigned long sp_flags, int pid, int spg_id) -{ - void *uva; +struct sp_k2u_context { + unsigned long kva; unsigned long kva_aligned; + unsigned long size; unsigned long size_aligned; - unsigned int page_size = PAGE_SIZE; - int is_hugepage, to_task; + unsigned long sp_flags; + int spg_id; +};
- check_interrupt_context(); +static int sp_k2u_prepare(unsigned long kva, unsigned long size, + unsigned long sp_flags, int spg_id, struct sp_k2u_context *kc) +{ + int is_hugepage; + unsigned int page_size = PAGE_SIZE; + unsigned long kva_aligned, size_aligned;
if (sp_flags & ~SP_DVPP) { pr_err_ratelimited("k2u sp_flags %lx error\n", sp_flags); - return ERR_PTR(-EINVAL); + return -EINVAL; }
if (!current->mm) { pr_err_ratelimited("k2u: kthread is not allowed\n"); - return ERR_PTR(-EPERM); + return -EPERM; }
is_hugepage = is_vmap_hugepage(kva); @@ -2620,7 +2608,7 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size, /* do nothing */ } else { pr_err_ratelimited("k2u kva is not vmalloc address\n"); - return ERR_PTR(is_hugepage); + return is_hugepage; }
/* aligned down kva is convenient for caller to start with any valid kva */ @@ -2629,31 +2617,75 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size,
if (!vmalloc_area_set_flag(kva_aligned, VM_SHAREPOOL)) { pr_debug("k2u_task kva %lx is not valid\n", kva_aligned); - return ERR_PTR(-EINVAL); + return -EINVAL; }
- to_task = is_k2task(spg_id); + kc->kva = kva; + kc->kva_aligned = kva_aligned; + kc->size = size; + kc->size_aligned = size_aligned; + kc->sp_flags = sp_flags; + kc->spg_id = spg_id; + return 0; +} + +static void *sp_k2u_finish(void *uva, struct sp_k2u_context *kc) +{ + if (IS_ERR(uva)) + vmalloc_area_clr_flag(kc->kva_aligned, VM_SHAREPOOL); + else + uva = uva + (kc->kva - kc->kva_aligned); + + sp_dump_stack(); + return uva; +} + +/** + * sp_make_share_k2u() - Share kernel memory to current process or an sp_group. + * @kva: the VA of shared kernel memory. + * @size: the size of shared kernel memory. + * @sp_flags: how to allocate the memory. We only support SP_DVPP. + * @pid: the pid of the specified process (Not currently in use). + * @spg_id: the share group that the memory is shared to. + * + * Return: the shared target user address to start at + * + * Share kernel memory to current task if spg_id == SPG_ID_NONE + * or SPG_ID_DEFAULT in multi-group mode. + * + * Return: + * * if succeed, return the shared user address to start at. + * * if fail, return the pointer of -errno. + */ +void *sp_make_share_k2u(unsigned long kva, unsigned long size, + unsigned long sp_flags, int pid, int spg_id) +{ + void *uva; + int ret, to_task; + struct sp_k2u_context kc; + + check_interrupt_context(); + + ret = sp_k2u_prepare(kva, size, sp_flags, spg_id, &kc); + if (ret) + return ERR_PTR(ret); + + to_task = is_k2task(kc.spg_id); if (to_task == 1) - uva = sp_make_share_kva_to_task(kva_aligned, size_aligned, sp_flags); + uva = sp_make_share_kva_to_task(kc.kva_aligned, kc.size_aligned, kc.sp_flags); else if (to_task == 0) { struct sp_group *spg;
- spg = __sp_find_spg(current->pid, spg_id); + spg = __sp_find_spg(current->pid, kc.spg_id); if (spg) { - uva = sp_make_share_kva_to_spg(kva_aligned, size_aligned, sp_flags, spg); + uva = sp_make_share_kva_to_spg(kc.kva_aligned, kc.size_aligned, kc.sp_flags, spg); sp_group_drop(spg); } else uva = ERR_PTR(-ENODEV); } else uva = ERR_PTR(to_task);
- if (IS_ERR(uva)) - vmalloc_area_clr_flag(kva_aligned, VM_SHAREPOOL); - else - uva = uva + (kva - kva_aligned); - - sp_dump_stack(); - return uva; + return sp_k2u_finish(uva, &kc); } EXPORT_SYMBOL_GPL(sp_make_share_k2u);
From: Tang Yizhou tangyizhou@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
-------------------------------------------------
/proc/sys/vm/sharepool_perf_k2u allows us to track the time consuming of sp_k2u.
Signed-off-by: Tang Yizhou tangyizhou@huawei.com Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/linux/share_pool.h | 2 ++ kernel/sysctl.c | 9 +++++++++ mm/share_pool.c | 37 +++++++++++++++++++++++++++++++++++-- 3 files changed, 46 insertions(+), 2 deletions(-)
diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index e118ac1cf15ca..2c71d7ded2cac 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -47,6 +47,8 @@ extern unsigned long sysctl_sp_compact_interval; extern unsigned long sysctl_sp_compact_interval_max; extern int sysctl_sp_perf_alloc;
+extern int sysctl_sp_perf_k2u; + #ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC extern bool vmap_allow_huge; #endif diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 165d5ccd384f7..3d8c79986575a 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1816,6 +1816,15 @@ static struct ctl_table vm_table[] = { .extra1 = &zero, .extra2 = &ten_thousand, }, + { + .procname = "sharepool_perf_k2u", + .data = &sysctl_sp_perf_k2u, + .maxlen = sizeof(sysctl_sp_perf_k2u), + .mode = 0600, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &ten_thousand, + }, #endif { } }; diff --git a/mm/share_pool.c b/mm/share_pool.c index 824163e764e63..f3168eed3efc6 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -91,6 +91,8 @@ int sysctl_share_pool_map_lock_enable;
int sysctl_sp_perf_alloc;
+int sysctl_sp_perf_k2u; + static int share_pool_group_mode = SINGLE_GROUP_MODE;
static int system_group_count; @@ -2581,8 +2583,36 @@ struct sp_k2u_context { unsigned long size_aligned; unsigned long sp_flags; int spg_id; + struct timespec64 start; + struct timespec64 end; };
+static void trace_sp_k2u_begin(struct sp_k2u_context *kc) +{ + if (!sysctl_sp_perf_k2u) + return; + + ktime_get_ts64(&kc->start); +} + +static void trace_sp_k2u_finish(struct sp_k2u_context *kc, void *uva, int to_task) +{ + unsigned long cost; + + if (!sysctl_sp_perf_k2u) + return; + + ktime_get_ts64(&kc->end); + + cost = SEC2US(kc->end.tv_sec - kc->start.tv_sec) + + NS2US(kc->end.tv_nsec - kc->start.tv_nsec); + if (cost >= (unsigned long)sysctl_sp_perf_k2u) { + pr_err("Task %s(%d/%d) sp_k2u returns 0x%lx consumes %luus, size is %luKB, size_aligned is %luKB, " + "sp_flags is %lx, to_task is %d\n", + current->comm, current->tgid, current->pid, + (unsigned long)uva, cost, byte2kb(kc->size), byte2kb(kc->size_aligned), kc->sp_flags, to_task); + } +} static int sp_k2u_prepare(unsigned long kva, unsigned long size, unsigned long sp_flags, int spg_id, struct sp_k2u_context *kc) { @@ -2590,6 +2620,8 @@ static int sp_k2u_prepare(unsigned long kva, unsigned long size, unsigned int page_size = PAGE_SIZE; unsigned long kva_aligned, size_aligned;
+ trace_sp_k2u_begin(kc); + if (sp_flags & ~SP_DVPP) { pr_err_ratelimited("k2u sp_flags %lx error\n", sp_flags); return -EINVAL; @@ -2629,13 +2661,14 @@ static int sp_k2u_prepare(unsigned long kva, unsigned long size, return 0; }
-static void *sp_k2u_finish(void *uva, struct sp_k2u_context *kc) +static void *sp_k2u_finish(void *uva, int to_task, struct sp_k2u_context *kc) { if (IS_ERR(uva)) vmalloc_area_clr_flag(kc->kva_aligned, VM_SHAREPOOL); else uva = uva + (kc->kva - kc->kva_aligned);
+ trace_sp_k2u_finish(kc, uva, to_task); sp_dump_stack(); return uva; } @@ -2685,7 +2718,7 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size, } else uva = ERR_PTR(to_task);
- return sp_k2u_finish(uva, &kc); + return sp_k2u_finish(uva, to_task, &kc); } EXPORT_SYMBOL_GPL(sp_make_share_k2u);
From: Tang Yizhou tangyizhou@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
-------------------------------------------------
Add a new field to_task in struct sp_k2u_context, then is_k2task doesn't need magic number return values.
Signed-off-by: Tang Yizhou tangyizhou@huawei.com Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/share_pool.c | 84 ++++++++++++++++++++++++++----------------------- 1 file changed, 44 insertions(+), 40 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index f3168eed3efc6..aa56d962f6603 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -2547,35 +2547,6 @@ static bool vmalloc_area_clr_flag(unsigned long kva, unsigned long flags) return false; }
-/* - * return - * 1 k2task - * 0 k2group - * <0 error code - */ -static int is_k2task(int spg_id) -{ - if (share_pool_group_mode == SINGLE_GROUP_MODE) { - struct sp_group *spg = get_first_group(current->mm); - - if (!spg) { - if (spg_id != SPG_ID_NONE && spg_id != SPG_ID_DEFAULT) - return -EINVAL; - else - return 1; - } else { - int ret = 0; - - if (spg_id != SPG_ID_DEFAULT && spg_id != spg->id) - ret = -EINVAL; - sp_group_drop(spg); - - return ret; - } - } else - return (spg_id == SPG_ID_DEFAULT || spg_id == SPG_ID_NONE) ? 1 : 0; -} - struct sp_k2u_context { unsigned long kva; unsigned long kva_aligned; @@ -2583,6 +2554,7 @@ struct sp_k2u_context { unsigned long size_aligned; unsigned long sp_flags; int spg_id; + bool to_task; struct timespec64 start; struct timespec64 end; }; @@ -2595,7 +2567,7 @@ static void trace_sp_k2u_begin(struct sp_k2u_context *kc) ktime_get_ts64(&kc->start); }
-static void trace_sp_k2u_finish(struct sp_k2u_context *kc, void *uva, int to_task) +static void trace_sp_k2u_finish(struct sp_k2u_context *kc, void *uva) { unsigned long cost;
@@ -2610,7 +2582,8 @@ static void trace_sp_k2u_finish(struct sp_k2u_context *kc, void *uva, int to_tas pr_err("Task %s(%d/%d) sp_k2u returns 0x%lx consumes %luus, size is %luKB, size_aligned is %luKB, " "sp_flags is %lx, to_task is %d\n", current->comm, current->tgid, current->pid, - (unsigned long)uva, cost, byte2kb(kc->size), byte2kb(kc->size_aligned), kc->sp_flags, to_task); + (unsigned long)uva, cost, byte2kb(kc->size), byte2kb(kc->size_aligned), + kc->sp_flags, kc->to_task); } } static int sp_k2u_prepare(unsigned long kva, unsigned long size, @@ -2658,17 +2631,43 @@ static int sp_k2u_prepare(unsigned long kva, unsigned long size, kc->size_aligned = size_aligned; kc->sp_flags = sp_flags; kc->spg_id = spg_id; + kc->to_task = false; return 0; }
-static void *sp_k2u_finish(void *uva, int to_task, struct sp_k2u_context *kc) +static int sp_check_k2task(struct sp_k2u_context *kc) +{ + int ret = 0; + int spg_id = kc->spg_id; + + if (share_pool_group_mode == SINGLE_GROUP_MODE) { + struct sp_group *spg = get_first_group(current->mm); + + if (!spg) { + if (spg_id != SPG_ID_NONE && spg_id != SPG_ID_DEFAULT) + ret = -EINVAL; + else + kc->to_task = true; + } else { + if (spg_id != SPG_ID_DEFAULT && spg_id != spg->id) + ret = -EINVAL; + sp_group_drop(spg); + } + } else { + if (spg_id == SPG_ID_DEFAULT || spg_id == SPG_ID_NONE) + kc->to_task = true; + } + return ret; +} + +static void *sp_k2u_finish(void *uva, struct sp_k2u_context *kc) { if (IS_ERR(uva)) vmalloc_area_clr_flag(kc->kva_aligned, VM_SHAREPOOL); else uva = uva + (kc->kva - kc->kva_aligned);
- trace_sp_k2u_finish(kc, uva, to_task); + trace_sp_k2u_finish(kc, uva); sp_dump_stack(); return uva; } @@ -2694,7 +2693,7 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size, unsigned long sp_flags, int pid, int spg_id) { void *uva; - int ret, to_task; + int ret; struct sp_k2u_context kc;
check_interrupt_context(); @@ -2703,10 +2702,15 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size, if (ret) return ERR_PTR(ret);
- to_task = is_k2task(kc.spg_id); - if (to_task == 1) + ret = sp_check_k2task(&kc); + if (ret) { + uva = ERR_PTR(ret); + goto out; + } + + if (kc.to_task) uva = sp_make_share_kva_to_task(kc.kva_aligned, kc.size_aligned, kc.sp_flags); - else if (to_task == 0) { + else { struct sp_group *spg;
spg = __sp_find_spg(current->pid, kc.spg_id); @@ -2715,10 +2719,10 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size, sp_group_drop(spg); } else uva = ERR_PTR(-ENODEV); - } else - uva = ERR_PTR(to_task); + }
- return sp_k2u_finish(uva, to_task, &kc); +out: + return sp_k2u_finish(uva, &kc); } EXPORT_SYMBOL_GPL(sp_make_share_k2u);
From: Tang Yizhou tangyizhou@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
-------------------------------------------------
Add prefix 'mg' for multi-group interfaces.
Signed-off-by: Tang Yizhou tangyizhou@huawei.com Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/linux/share_pool.h | 96 +++++++++++++++++-- mm/share_pool.c | 187 +++++++++++++++++++++++++++++++------ 2 files changed, 249 insertions(+), 34 deletions(-)
diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index 2c71d7ded2cac..8440afb393e4e 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -199,31 +199,55 @@ static inline void sp_init_mm(struct mm_struct *mm) mm->sp_group_master = NULL; }
-extern int sp_group_add_task(int pid, unsigned long prot, int spg_id); +extern int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id); +extern int sp_group_add_task(int pid, int spg_id); + extern int sp_group_exit(struct mm_struct *mm); extern void sp_group_post_exit(struct mm_struct *mm); -extern int sp_group_id_by_pid(int pid, int *spg_ids, int *num); + +extern int mg_sp_group_id_by_pid(int pid, int *spg_ids, int *num); +extern int sp_group_id_by_pid(int pid); + extern int sp_group_walk(int spg_id, void *data, int (*func)(struct mm_struct *mm, void *)); extern int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task);
-extern void *sp_alloc(unsigned long size, unsigned long sp_flags, int sp_id); +extern void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id); +extern void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id); + extern int sp_free(unsigned long addr); +extern int mg_sp_free(unsigned long addr); + extern void *sp_make_share_k2u(unsigned long kva, unsigned long size, unsigned long sp_flags, int pid, int spg_id); +extern void *mg_sp_make_share_k2u(unsigned long kva, unsigned long size, + unsigned long sp_flags, int pid, int spg_id); + extern void *sp_make_share_u2k(unsigned long uva, unsigned long size, int pid); -extern int sp_unshare(unsigned long va, unsigned long size); +extern void *mg_sp_make_share_u2k(unsigned long uva, unsigned long size, int pid); + +extern int sp_unshare(unsigned long va, unsigned long size, int pid, int spg_id); +extern int mg_sp_unshare(unsigned long va, unsigned long size);
extern void sp_area_drop(struct vm_area_struct *vma);
extern int sp_walk_page_range(unsigned long uva, unsigned long size, - struct task_struct *tsk, struct sp_walk_data *sp_walk_data); + struct task_struct *tsk, struct sp_walk_data *sp_walk_data); +extern int mg_sp_walk_page_range(unsigned long uva, unsigned long size, + struct task_struct *tsk, struct sp_walk_data *sp_walk_data); + extern void sp_walk_page_free(struct sp_walk_data *sp_walk_data); +extern void mg_sp_walk_page_free(struct sp_walk_data *sp_walk_data);
extern int sp_register_notifier(struct notifier_block *nb); extern int sp_unregister_notifier(struct notifier_block *nb); + extern bool sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid); +extern bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid); + extern bool is_sharepool_addr(unsigned long addr); +extern bool mg_is_sharepool_addr(unsigned long addr); + extern struct sp_proc_stat *sp_get_proc_stat_ref(struct mm_struct *mm); extern void sp_proc_stat_drop(struct sp_proc_stat *stat); extern void spa_overview_show(struct seq_file *seq); @@ -341,6 +365,11 @@ extern bool sp_check_mmap_addr(unsigned long addr, unsigned long flags);
#else
+static inline int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) +{ + return -EPERM; +} + static inline int sp_group_add_task(int pid, int spg_id) { return -EPERM; @@ -355,6 +384,11 @@ static inline void sp_group_post_exit(struct mm_struct *mm) { }
+static inline int mg_sp_group_id_by_pid(int pid, int *spg_ids, int *num) +{ + return -EPERM; +} + static inline int sp_group_id_by_pid(int pid) { return -EPERM; @@ -371,13 +405,29 @@ static inline void *sp_alloc(unsigned long size, unsigned long sp_flags, int sp_ return NULL; }
+static inline void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) +{ + return NULL; +} + static inline int sp_free(unsigned long addr) { return -EPERM; }
+static inline int mg_sp_free(unsigned long addr) +{ + return -EPERM; +} + static inline void *sp_make_share_k2u(unsigned long kva, unsigned long size, - unsigned long sp_flags, int pid, int spg_id) + unsigned long sp_flags, int pid, int spg_id) +{ + return NULL; +} + +static inline void *mg_sp_make_share_k2u(unsigned long kva, unsigned long size, + unsigned long sp_flags, int pid, int spg_id) { return NULL; } @@ -386,11 +436,23 @@ static inline void *sp_make_share_u2k(unsigned long uva, unsigned long size, int { return NULL; } + +static inline void *mg_sp_make_share_u2k(unsigned long uva, unsigned long size, int pid) +{ + return NULL; +} + static inline int sp_unshare(unsigned long va, unsigned long size, int pid, int spg_id) { return -EPERM; }
+static inline int mg_sp_unshare(unsigned long va, unsigned long size) +{ + return -EPERM; +} + + static inline void sp_init_mm(struct mm_struct *mm) { } @@ -400,7 +462,13 @@ static inline void sp_area_drop(struct vm_area_struct *vma) }
static inline int sp_walk_page_range(unsigned long uva, unsigned long size, - struct task_struct *tsk, struct sp_walk_data *sp_walk_data) + struct task_struct *tsk, struct sp_walk_data *sp_walk_data) +{ + return 0; +} + +static inline int mg_sp_walk_page_range(unsigned long uva, unsigned long size, + struct task_struct *tsk, struct sp_walk_data *sp_walk_data) { return 0; } @@ -409,6 +477,10 @@ static inline void sp_walk_page_free(struct sp_walk_data *sp_walk_data) { }
+static inline void mg_sp_walk_page_free(struct sp_walk_data *sp_walk_data) +{ +} + static inline int sp_register_notifier(struct notifier_block *nb) { return -EPERM; @@ -424,11 +496,21 @@ static inline bool sp_config_dvpp_range(size_t start, size_t size, int device_id return false; }
+static inline bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) +{ + return false; +} + static inline bool is_sharepool_addr(unsigned long addr) { return false; }
+static inline bool mg_is_sharepool_addr(unsigned long addr) +{ + return false; +} + static inline struct sp_proc_stat *sp_get_proc_stat_ref(struct mm_struct *mm) { return NULL; diff --git a/mm/share_pool.c b/mm/share_pool.c index aa56d962f6603..3fc5c14ace44b 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -826,10 +826,39 @@ static struct sp_group *__sp_find_spg(int pid, int spg_id) }
/** - * sp_group_id_by_pid() - Get the sp_group ID array of a process. + * sp_group_id_by_pid() - Get the sp_group ID of a process. * @pid: pid of target process. - * @spg_ids point to an array to save the group ids the process belongs to - * @num input the spg_ids array size; output the spg number of the process + * + * Return: + * >0 - the sp_group ID. + * -ENODEV - target process doesn't belong to any sp_group. + */ +int sp_group_id_by_pid(int pid) +{ + struct sp_group *spg; + int spg_id = -ENODEV; + + check_interrupt_context(); + + spg = __sp_find_spg(pid, SPG_ID_DEFAULT); + if (!spg) + return -ENODEV; + + down_read(&spg->rw_lock); + if (spg_valid(spg)) + spg_id = spg->id; + up_read(&spg->rw_lock); + + sp_group_drop(spg); + return spg_id; +} +EXPORT_SYMBOL_GPL(sp_group_id_by_pid); + +/** + * mp_sp_group_id_by_pid() - Get the sp_group ID array of a process. + * @pid: pid of target process. + * @spg_ids: point to an array to save the group ids the process belongs to + * @num: input the spg_ids array size; output the spg number of the process * * Return: * >0 - the sp_group ID. @@ -837,7 +866,7 @@ static struct sp_group *__sp_find_spg(int pid, int spg_id) * -EINVAL - spg_ids or num is NULL. * -E2BIG - the num of groups process belongs to is larger than *num */ -int sp_group_id_by_pid(int pid, int *spg_ids, int *num) +int mg_sp_group_id_by_pid(int pid, int *spg_ids, int *num) { int ret = 0; struct sp_group_node *node; @@ -882,7 +911,7 @@ int sp_group_id_by_pid(int pid, int *spg_ids, int *num) put_task_struct(tsk); return ret; } -EXPORT_SYMBOL_GPL(sp_group_id_by_pid); +EXPORT_SYMBOL_GPL(mg_sp_group_id_by_pid);
static bool is_online_node_id(int node_id) { @@ -1154,7 +1183,7 @@ static void free_spg_node(struct mm_struct *mm, struct sp_group *spg, * The automatically allocated ID is between [SPG_ID_AUTO_MIN, SPG_ID_AUTO_MAX]. * When negative, the return value is -errno. */ -int sp_group_add_task(int pid, unsigned long prot, int spg_id) +int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id) { struct task_struct *tsk; struct mm_struct *mm; @@ -1391,6 +1420,12 @@ int sp_group_add_task(int pid, unsigned long prot, int spg_id) out: return ret == 0 ? spg_id : ret; } +EXPORT_SYMBOL_GPL(mg_sp_group_add_task); + +int sp_group_add_task(int pid, int spg_id) +{ + return mg_sp_group_add_task(pid, PROT_READ | PROT_WRITE, spg_id); +} EXPORT_SYMBOL_GPL(sp_group_add_task);
/* the caller must hold sp_area_lock */ @@ -1920,6 +1955,12 @@ int sp_free(unsigned long addr) } EXPORT_SYMBOL_GPL(sp_free);
+int mg_sp_free(unsigned long addr) +{ + return sp_free(addr); +} +EXPORT_SYMBOL_GPL(mg_sp_free); + /* wrapper of __do_mmap() and the caller must hold down_write(&mm->mmap_sem). */ static unsigned long sp_mmap(struct mm_struct *mm, struct file *file, struct sp_area *spa, unsigned long *populate, @@ -2027,30 +2068,72 @@ static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags, if (sp_flags & SP_HUGEPAGE_ONLY) sp_flags |= SP_HUGEPAGE;
- if (spg_id != SPG_ID_DEFAULT) { - spg = __sp_find_spg(current->pid, spg_id); - if (!spg) { - pr_err_ratelimited("allocation failed, can't find group\n"); - return -ENODEV; - } + if (share_pool_group_mode == SINGLE_GROUP_MODE) { + spg = __sp_find_spg(current->pid, SPG_ID_DEFAULT); + if (spg) { + if (spg_id != SPG_ID_DEFAULT && spg->id != spg_id) { + sp_group_drop(spg); + return -ENODEV; + }
- /* up_read will be at the end of sp_alloc */ - down_read(&spg->rw_lock); - if (!spg_valid(spg)) { - up_read(&spg->rw_lock); - sp_group_drop(spg); - pr_err_ratelimited("allocation failed, spg is dead\n"); - return -ENODEV; + /* up_read will be at the end of sp_alloc */ + down_read(&spg->rw_lock); + if (!spg_valid(spg)) { + up_read(&spg->rw_lock); + sp_group_drop(spg); + pr_err_ratelimited("allocation failed, spg is dead\n"); + return -ENODEV; + } + } else { /* alocation pass through scene */ + if (enable_mdc_default_group) { + int ret = 0; + + ret = sp_group_add_task(current->tgid, spg_id); + if (ret < 0) { + pr_err_ratelimited("add group failed in pass through\n"); + return ret; + } + + spg = __sp_find_spg(current->pid, SPG_ID_DEFAULT); + + /* up_read will be at the end of sp_alloc */ + down_read(&spg->rw_lock); + if (!spg_valid(spg)) { + up_read(&spg->rw_lock); + sp_group_drop(spg); + pr_err_ratelimited("pass through allocation failed, spg is dead\n"); + return -ENODEV; + } + } else { + spg = spg_none; + } } + } else { + if (spg_id != SPG_ID_DEFAULT) { + spg = __sp_find_spg(current->pid, spg_id); + if (!spg) { + pr_err_ratelimited("allocation failed, can't find group\n"); + return -ENODEV; + }
- if (!is_process_in_group(spg, current->mm)) { - up_read(&spg->rw_lock); - sp_group_drop(spg); - pr_err_ratelimited("allocation failed, task not in group\n"); - return -ENODEV; + /* up_read will be at the end of sp_alloc */ + down_read(&spg->rw_lock); + if (!spg_valid(spg)) { + up_read(&spg->rw_lock); + sp_group_drop(spg); + pr_err_ratelimited("allocation failed, spg is dead\n"); + return -ENODEV; + } + + if (!is_process_in_group(spg, current->mm)) { + up_read(&spg->rw_lock); + sp_group_drop(spg); + pr_err_ratelimited("allocation failed, task not in group\n"); + return -ENODEV; + } + } else { /* alocation pass through scene */ + spg = spg_none; } - } else { /* alocation pass through scene */ - spg = spg_none; }
if (sp_flags & SP_HUGEPAGE) { @@ -2322,6 +2405,12 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) } EXPORT_SYMBOL_GPL(sp_alloc);
+void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) +{ + return sp_alloc(size, sp_flags, spg_id); +} +EXPORT_SYMBOL_GPL(mg_sp_alloc); + /** * is_vmap_hugepage() - Check if a kernel address belongs to vmalloc family. * @addr: the kernel space address to be checked. @@ -2726,6 +2815,13 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size, } EXPORT_SYMBOL_GPL(sp_make_share_k2u);
+void *mg_sp_make_share_k2u(unsigned long kva, unsigned long size, + unsigned long sp_flags, int pid, int spg_id) +{ + return sp_make_share_k2u(kva, size, sp_flags, pid, spg_id); +} +EXPORT_SYMBOL_GPL(mg_sp_make_share_k2u); + static int sp_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long next, struct mm_walk *walk) { @@ -3000,6 +3096,12 @@ void *sp_make_share_u2k(unsigned long uva, unsigned long size, int pid) } EXPORT_SYMBOL_GPL(sp_make_share_u2k);
+void *mg_sp_make_share_u2k(unsigned long uva, unsigned long size, int pid) +{ + return sp_make_share_u2k(uva, size, pid); +} +EXPORT_SYMBOL_GPL(mg_sp_make_share_u2k); + /* * Input parameters uva, pid and spg_id are now useless. spg_id will be useful * when supporting a process in multiple sp groups. @@ -3218,7 +3320,7 @@ static int sp_unshare_kva(unsigned long kva, unsigned long size) * * Return: 0 for success, -errno on failure. */ -int sp_unshare(unsigned long va, unsigned long size) +int sp_unshare(unsigned long va, unsigned long size, int pid, int spg_id) { int ret = 0;
@@ -3240,6 +3342,12 @@ int sp_unshare(unsigned long va, unsigned long size) } EXPORT_SYMBOL_GPL(sp_unshare);
+int mg_sp_unshare(unsigned long va, unsigned long size) +{ + return sp_unshare(va, size, 0, 0); +} +EXPORT_SYMBOL_GPL(mg_sp_unshare); + /** * sp_walk_page_range() - Walk page table with caller specific callbacks. * @uva: the start VA of user memory. @@ -3291,6 +3399,13 @@ int sp_walk_page_range(unsigned long uva, unsigned long size, } EXPORT_SYMBOL_GPL(sp_walk_page_range);
+int mg_sp_walk_page_range(unsigned long uva, unsigned long size, + struct task_struct *tsk, struct sp_walk_data *sp_walk_data) +{ + return sp_walk_page_range(uva, size, tsk, sp_walk_data); +} +EXPORT_SYMBOL_GPL(mg_sp_walk_page_range); + /** * sp_walk_page_free() - Free the sp_walk_data structure. * @sp_walk_data: a structure of a page pointer array to be freed. @@ -3306,6 +3421,12 @@ void sp_walk_page_free(struct sp_walk_data *sp_walk_data) } EXPORT_SYMBOL_GPL(sp_walk_page_free);
+void mg_sp_walk_page_free(struct sp_walk_data *sp_walk_data) +{ + return sp_walk_page_free(sp_walk_data); +} +EXPORT_SYMBOL_GPL(mg_sp_walk_page_free); + int sp_register_notifier(struct notifier_block *nb) { return blocking_notifier_chain_register(&sp_notifier_chain, nb); @@ -3343,6 +3464,12 @@ bool sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) } EXPORT_SYMBOL_GPL(sp_config_dvpp_range);
+bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) +{ + return sp_config_dvpp_range(start, size, device_id, pid); +} +EXPORT_SYMBOL_GPL(mg_sp_config_dvpp_range); + static bool is_sp_normal_addr(unsigned long addr) { return addr >= MMAP_SHARE_POOL_START && @@ -3365,6 +3492,12 @@ bool is_sharepool_addr(unsigned long addr) } EXPORT_SYMBOL_GPL(is_sharepool_addr);
+bool mg_is_sharepool_addr(unsigned long addr) +{ + return is_sharepool_addr(addr); +} +EXPORT_SYMBOL_GPL(mg_is_sharepool_addr); + int sp_node_id(struct vm_area_struct *vma) { struct sp_area *spa;
From: Tang Yizhou tangyizhou@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
-------------------------------------------------
Considering a process get a uva by sp_alloc(), then let multiple processes call sp_free() to release it. Obviously this is a double-free problem and not allowed.
The same reason for sp_unshare_uva().
Signed-off-by: Tang Yizhou tangyizhou@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/share_pool.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 3fc5c14ace44b..19580b85d77fb 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -1901,6 +1901,13 @@ static int sp_free_get_spa(struct sp_free_context *fc) /* we must return success(0) in this situation */ } /* the life cycle of spa has a direct relation with sp group */ + if (unlikely(spa->is_dead)) { + up_write(&spa->spg->rw_lock); + pr_err_ratelimited("unexpected double sp free\n"); + dump_stack(); + ret = -EINVAL; + goto drop_spa; + } spa->is_dead = true; up_write(&spa->spg->rw_lock);
@@ -3236,6 +3243,13 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size) goto out_clr_flag; } /* the life cycle of spa has a direct relation with sp group */ + if (unlikely(spa->is_dead)) { + up_write(&spa->spg->rw_lock); + pr_err_ratelimited("unexpected double sp unshare\n"); + dump_stack(); + ret = -EINVAL; + goto out_drop_area; + } spa->is_dead = true; up_write(&spa->spg->rw_lock);
@@ -3264,6 +3278,7 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size) return ret; }
+/* No possible concurrent protection, take care when use */ static int sp_unshare_kva(unsigned long kva, unsigned long size) { unsigned long addr, kva_aligned;
From: Tang Yizhou tangyizhou@huawei.com
ascend inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
-------------------------------------------------
If all the processes in an sp group are exiting, the sp areas are also freed. We need to clear VM_SHAREPOOL flag for the k2u sp areas, otherwise __vunmap will fail later.
Signed-off-by: Tang Yizhou tangyizhou@huawei.com Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/share_pool.c | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 19580b85d77fb..64d9d9198830d 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -1641,6 +1641,19 @@ static struct sp_area *__find_sp_area(unsigned long addr) return n; }
+static bool vmalloc_area_clr_flag(unsigned long kva, unsigned long flags) +{ + struct vm_struct *area; + + area = find_vm_area((void *)kva); + if (area) { + area->flags &= ~flags; + return true; + } + + return false; +} + /* * Free the VA region starting from addr to the share pool */ @@ -1669,6 +1682,11 @@ static void sp_free_area(struct sp_area *spa) } }
+ if (spa->kva) { + if (!vmalloc_area_clr_flag(spa->kva, VM_SHAREPOOL)) + pr_debug("clear spa->kva %ld is not valid\n", spa->kva); + } + spa_dec_usage(spa); if (spa->spg != spg_none) list_del(&spa->link); @@ -2630,19 +2648,6 @@ static bool vmalloc_area_set_flag(unsigned long kva, unsigned long flags) return false; }
-static bool vmalloc_area_clr_flag(unsigned long kva, unsigned long flags) -{ - struct vm_struct *area; - - area = find_vm_area((void *)kva); - if (area) { - area->flags &= ~flags; - return true; - } - - return false; -} - struct sp_k2u_context { unsigned long kva; unsigned long kva_aligned;
From: Tang Yizhou tangyizhou@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
-------------------------------------------------
Use sp_check_caller_permission to check if the caller of sp_free and sp_k2spg is in the sp group.
Signed-off-by: Tang Yizhou tangyizhou@huawei.com Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/share_pool.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 64d9d9198830d..5768a77ab7eb5 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -1860,6 +1860,18 @@ static void sp_free_unmap_fallocate(struct sp_area *spa) } }
+static int sp_check_caller_permission(struct sp_group *spg, struct mm_struct *mm) +{ + int ret = 0; + + down_read(&spg->rw_lock); + if (!is_process_in_group(spg, mm)) + ret = -EPERM; + up_read(&spg->rw_lock); + return ret; +} + + #define FREE_CONT 1 #define FREE_END 2
@@ -1902,13 +1914,9 @@ static int sp_free_get_spa(struct sp_free_context *fc) if (!current->mm) goto check_spa;
- down_read(&spa->spg->rw_lock); - if (!is_process_in_group(spa->spg, current->mm)) { - up_read(&spa->spg->rw_lock); - ret = -EPERM; + ret = sp_check_caller_permission(spa->spg, current->mm); + if (ret < 0) goto drop_spa; - } - up_read(&spa->spg->rw_lock);
check_spa: down_write(&spa->spg->rw_lock); @@ -2816,6 +2824,12 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size,
spg = __sp_find_spg(current->pid, kc.spg_id); if (spg) { + ret = sp_check_caller_permission(spg, current->mm); + if (ret < 0) { + sp_group_drop(spg); + uva = ERR_PTR(ret); + goto out; + } uva = sp_make_share_kva_to_spg(kc.kva_aligned, kc.size_aligned, kc.sp_flags, spg); sp_group_drop(spg); } else
From: guomengqi guomengqi3@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
-------------------------------------------------
This interface is added to support the function of exiting a process from a sharing group.
Signed-off-by: guomengqi guomengqi3@huawei.com Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/linux/share_pool.h | 13 ++++ mm/share_pool.c | 128 +++++++++++++++++++++++++++++++++++++ 2 files changed, 141 insertions(+)
diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index 8440afb393e4e..e421587ff9773 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -202,6 +202,9 @@ static inline void sp_init_mm(struct mm_struct *mm) extern int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id); extern int sp_group_add_task(int pid, int spg_id);
+extern int mg_sp_group_del_task(int pid, int spg_id); +extern int sp_group_del_task(int pid, int spg_id); + extern int sp_group_exit(struct mm_struct *mm); extern void sp_group_post_exit(struct mm_struct *mm);
@@ -375,6 +378,16 @@ static inline int sp_group_add_task(int pid, int spg_id) return -EPERM; }
+static inline int mg_sp_group_del_task(int pid, int spg_id) +{ + return -EPERM; +} + +static inline int sp_group_del_task(int pid, int spg_id) +{ + return -EPERM; +} + static inline int sp_group_exit(struct mm_struct *mm) { return 0; diff --git a/mm/share_pool.c b/mm/share_pool.c index 5768a77ab7eb5..001c37d76a4b2 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -1428,6 +1428,134 @@ int sp_group_add_task(int pid, int spg_id) } EXPORT_SYMBOL_GPL(sp_group_add_task);
+static void free_spg_proc_stat(struct mm_struct *mm, int spg_id) +{ + int i; + struct sp_proc_stat *proc_stat = sp_get_proc_stat(mm); + struct spg_proc_stat *stat; + struct sp_spg_stat *spg_stat; + struct hlist_node *tmp; + + hash_for_each_safe(proc_stat->hash, i, tmp, stat, pnode) { + if (stat->spg_id == spg_id) { + spg_stat = stat->spg_stat; + mutex_lock(&spg_stat->lock); + hash_del(&stat->gnode); + mutex_unlock(&spg_stat->lock); + hash_del(&stat->pnode); + kfree(stat); + break; + } + } +} + +/** + * mg_sp_group_del_task() - delete a process from a sp group. + * @pid: the pid of the task to be deleted + * @spg_id: sharepool group id + * + * the group's spa list must be empty, or deletion will fail. + * + * Return: + * * if success, return 0. + * * -EINVAL, spg_id invalid or spa_lsit not emtpy or spg dead + * * -ESRCH, the task group of pid is not in group / process dead + */ +int mg_sp_group_del_task(int pid, int spg_id) +{ + int ret = 0; + struct sp_group *spg; + struct sp_group_node *spg_node; + struct task_struct *tsk = NULL; + struct mm_struct *mm = NULL; + bool is_alive = true; + + if (spg_id < SPG_ID_MIN || spg_id > SPG_ID_AUTO) { + pr_err_ratelimited("del from group failed, invalid group id %d\n", spg_id); + return -EINVAL; + } + + spg = __sp_find_spg(pid, spg_id); + if (!spg) { + pr_err_ratelimited("spg not found or get task failed."); + return -EINVAL; + } + down_write(&sp_group_sem); + + if (!spg_valid(spg)) { + up_write(&sp_group_sem); + pr_err_ratelimited("spg dead."); + ret = -EINVAL; + goto out; + } + + if (!list_empty(&spg->spa_list)) { + up_write(&sp_group_sem); + pr_err_ratelimited("spa is not empty"); + ret = -EINVAL; + goto out; + } + + ret = get_task(pid, &tsk); + if (ret) { + up_write(&sp_group_sem); + pr_err_ratelimited("task is not found"); + goto out; + } + mm = get_task_mm(tsk->group_leader); + if (!mm) { + up_write(&sp_group_sem); + pr_err_ratelimited("mm is not found"); + ret = -ESRCH; + goto out_put_task; + } + + spg_node = is_process_in_group(spg, mm); + if (!spg_node) { + up_write(&sp_group_sem); + pr_err_ratelimited("process not in group"); + ret = -ESRCH; + goto out_put_mm; + } + + down_write(&spg->rw_lock); + if (list_is_singular(&spg->procs)) + is_alive = spg->is_alive = false; + spg->proc_num--; + list_del(&spg_node->proc_node); + sp_group_drop(spg); + up_write(&spg->rw_lock); + if (!is_alive) + blocking_notifier_call_chain(&sp_notifier_chain, 0, spg); + + list_del(&spg_node->group_node); + mm->sp_group_master->count--; + kfree(spg_node); + if (atomic_sub_and_test(1, &mm->mm_users)) { + up_write(&sp_group_sem); + WARN(1, "Invalid user counting\n"); + return -EINVAL; + } + + free_spg_proc_stat(mm, spg_id); + up_write(&sp_group_sem); + +out_put_mm: + mmput(mm); +out_put_task: + put_task_struct(tsk); +out: + sp_group_drop(spg); /* if spg dead, freed here */ + return ret; +} +EXPORT_SYMBOL_GPL(mg_sp_group_del_task); + +int sp_group_del_task(int pid, int spg_id) +{ + return mg_sp_group_del_task(pid, spg_id); +} +EXPORT_SYMBOL_GPL(sp_group_del_task); + /* the caller must hold sp_area_lock */ static void __insert_sp_area(struct sp_area *spa) {
From: Fang Lijun fanglijun3@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
-------------------------------------------------
Signed-off-by: Fang Lijun fanglijun3@huawei.com Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/linux/mm.h | 6 ++++++ include/linux/share_pool.h | 6 ------ include/linux/vmalloc.h | 4 ++++ mm/vmalloc.c | 19 +++++++++++++++++++ 4 files changed, 29 insertions(+), 6 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h index 7ee7214f17bee..f801fa5e60289 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -156,6 +156,12 @@ extern int overcommit_kbytes_handler(struct ctl_table *, int, void __user *, /* test whether an address (unsigned long or pointer) is aligned to PAGE_SIZE */ #define PAGE_ALIGNED(addr) IS_ALIGNED((unsigned long)(addr), PAGE_SIZE)
+/* to align the pointer to the (next) PMD hugepage boundary */ +#define PMD_ALIGN(addr) ALIGN(addr, PMD_SIZE) + +/* test whether an address (unsigned long or pointer) is aligned to PMD_SIZE */ +#define PMD_ALIGNED(addr) IS_ALIGNED((unsigned long)(addr), PMD_SIZE) + /* * Linux kernel virtual memory manager primitives. * The idea being to have a "virtual" mm in the same way diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index e421587ff9773..6bd03c3504c42 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -26,12 +26,6 @@
#define MAX_DEVID 2 /* the max num of Da-vinci devices */
-/* to align the pointer to the (next) PMD boundary */ -#define PMD_ALIGN(addr) ALIGN(addr, PMD_SIZE) - -/* test whether an address (unsigned long or pointer) is aligned to PMD_SIZE */ -#define PMD_ALIGNED(addr) IS_ALIGNED((unsigned long)(addr), PMD_SIZE) - extern int sysctl_share_pool_hugepage_enable;
extern int sysctl_ac_mode; diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 298eff5579b21..7322909aed157 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -151,12 +151,16 @@ static inline size_t get_vm_area_size(const struct vm_struct *area) extern struct vm_struct *get_vm_area(unsigned long size, unsigned long flags); extern struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags, const void *caller); +extern struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags, + unsigned long start, unsigned long end); extern struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags, unsigned long start, unsigned long end, const void *caller); extern struct vm_struct *remove_vm_area(const void *addr); extern struct vm_struct *find_vm_area(const void *addr); +extern int map_vm_area(struct vm_struct *area, pgprot_t prot, + struct page **pages);
#ifdef CONFIG_MMU int vmap_range(unsigned long addr, unsigned long end, diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 9bd49a700707e..36f0e6d94a957 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2206,6 +2206,17 @@ void unmap_kernel_range(unsigned long addr, unsigned long size) flush_tlb_kernel_range(addr, end); }
+int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page **pages) +{ + unsigned long addr = (unsigned long)area->addr; + int err; + + err = map_kernel_range(addr, get_vm_area_size(area), prot, pages); + + return err > 0 ? 0 : err; +} +EXPORT_SYMBOL_GPL(map_vm_area); + static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va, unsigned long flags, const void *caller) { @@ -2264,6 +2275,14 @@ static struct vm_struct *__get_vm_area_node(unsigned long size, return area; }
+struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags, + unsigned long start, unsigned long end) +{ + return __get_vm_area_node(size, 1, flags, start, end, NUMA_NO_NODE, + GFP_KERNEL, __builtin_return_address(0)); +} +EXPORT_SYMBOL_GPL(__get_vm_area); + struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags, unsigned long start, unsigned long end, const void *caller)
From: Fang Lijun fanglijun3@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
------------------------------------------------- The reason of exporting __vmalloc_node() is that gfp_mask __GFP_ACCOUNT is used in mbuff to limit memory usage of vmalloc() with memory cgroup.
We add a new parameter vmflags for __vmalloc_node() because VM_USERMAP and VM_HUGE_PAGES is for vmalloc_hugepage_user().
By selecting HAVE_ARCH_HUGE_VMALLOC, vmalloc_hugepage_user() can allocate hugepage memory. Also, vmalloc() will allocate hugepage memory if possible. Reference: https://lwn.net/Articles/839107/
Signed-off-by: Tang Yizhou tangyizhou@huawei.com Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/Kconfig | 4 -- arch/arm64/Kconfig | 1 - include/linux/share_pool.h | 25 ---------- include/linux/vmalloc.h | 7 ++- mm/share_pool.c | 93 -------------------------------------- mm/vmalloc.c | 74 +++++++++++++++++++++++------- 6 files changed, 64 insertions(+), 140 deletions(-)
diff --git a/arch/Kconfig b/arch/Kconfig index e877b083238de..00f55932ba781 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -562,10 +562,6 @@ config HAVE_ARCH_HUGE_VMAP config HAVE_ARCH_HUGE_VMALLOC depends on HAVE_ARCH_HUGE_VMAP bool - help - Archs that select this would be capable of PMD-sized vmaps (i.e., - arch_vmap_pmd_supported() returns true), and they must make no - assumptions that vmalloc memory is mapped with PAGE_SIZE ptes.
config HAVE_ARCH_SOFT_DIRTY bool diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index f2106f2cb6edf..9d49b9524e1d4 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1467,7 +1467,6 @@ config ASCEND_SHARE_POOL default n select ARCH_USES_HIGH_VMA_FLAGS select MM_OWNER - depends on HAVE_ARCH_HUGE_VMALLOC help This feature allows multiple processes to share virtual memory both in kernel and user level, which is only enabled for ascend platform. diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index 6bd03c3504c42..442ee1ed5fa65 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -326,11 +326,6 @@ vm_fault_t sharepool_no_page(struct mm_struct *mm, struct address_space *mapping, pgoff_t idx, unsigned long address, pte_t *ptep, unsigned int flags);
-extern void *vmalloc_hugepage(unsigned long size); -extern void *vmalloc_hugepage_user(unsigned long size); -extern void *vzalloc_user_account(unsigned long size, int node); -extern void *vzalloc_hugepage_user_account(unsigned long size, int node); - void sp_exit_mm(struct mm_struct *mm);
static inline bool is_vmalloc_huge(unsigned long vm_flags) @@ -593,26 +588,6 @@ static inline bool ascend_sp_oom_show(void) return false; }
-static inline void *vmalloc_hugepage(unsigned long size) -{ - return NULL; -} - -static inline void *vmalloc_hugepage_user(unsigned long size) -{ - return NULL; -} - -static inline void *vzalloc_user_account(unsigned long size, int node) -{ - return NULL; -} - -static inline void *vzalloc_hugepage_user_account(unsigned long size, int node) -{ - return NULL; -} - static inline bool is_vmalloc_huge(unsigned long vm_flags) { return NULL; diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 7322909aed157..f7cc858732e29 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -94,11 +94,16 @@ extern void *vmalloc_exec(unsigned long size); extern void *vmalloc_32(unsigned long size); extern void *vmalloc_32_user(unsigned long size); extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot); +extern void *__vmalloc_node(unsigned long size, unsigned long align, + gfp_t gfp_mask, pgprot_t prot, + unsigned long vm_flags, + int node, const void *caller); extern void *__vmalloc_node_range(unsigned long size, unsigned long align, unsigned long start, unsigned long end, gfp_t gfp_mask, pgprot_t prot, unsigned long vm_flags, int node, const void *caller); - +extern void *vmalloc_hugepage(unsigned long size); +extern void *vmalloc_hugepage_user(unsigned long size); #ifndef CONFIG_MMU extern void *__vmalloc_node_flags(unsigned long size, int node, gfp_t flags); static inline void *__vmalloc_node_flags_caller(unsigned long size, int node, diff --git a/mm/share_pool.c b/mm/share_pool.c index 001c37d76a4b2..8b18648414344 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -4467,104 +4467,11 @@ struct page *sp_alloc_pages(struct vm_struct *area, gfp_t mask, return alloc_pages_node(node, mask, page_order); }
-/** - * vmalloc_hugepage - allocate virtually contiguous hugetlb memory - * @size: allocation size - * - * Allocate enough huge pages to cover @size and map them into - * contiguous kernel virtual space. - * - * The allocation size is aligned to PMD_SIZE automatically - */ -void *vmalloc_hugepage(unsigned long size) -{ - /* PMD hugepage aligned */ - size = PMD_ALIGN(size); - - return __vmalloc_node_range(size, PMD_SIZE, VMALLOC_START, VMALLOC_END, - GFP_KERNEL, PAGE_KERNEL, - VM_HUGE_PAGES, NUMA_NO_NODE, - __builtin_return_address(0)); -} -EXPORT_SYMBOL(vmalloc_hugepage); - -/** - * vmalloc_hugepage_user - allocate virtually contiguous hugetlb memory - * for userspace - * @size: allocation size - * - * Allocate enough huge pages to cover @size and map them into - * contiguous kernel virtual space. The resulting memory area - * is zeroed so it can be mapped to userspace without leaking data. - * - * The allocation size is aligned to PMD_SIZE automatically - */ -void *vmalloc_hugepage_user(unsigned long size) -{ - /* PMD hugepage aligned */ - size = PMD_ALIGN(size); - - return __vmalloc_node_range(size, PMD_SIZE, VMALLOC_START, VMALLOC_END, - GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL, - VM_HUGE_PAGES | VM_USERMAP, NUMA_NO_NODE, - __builtin_return_address(0)); -} -EXPORT_SYMBOL(vmalloc_hugepage_user); - -/** - * vzalloc_user_account - allocate zeroed virtually contiguous memory - * for userspace - * @size: allocation size - * @node: NUMA node id - * - * The resulting memory area is zeroed so it can be mapped to userspace - * without leaking data. - * - * Compare to vmalloc_user(), this is a customized function because - * __GFP_ACCOUNT is used to limit memory usage. - */ -void *vzalloc_user_account(unsigned long size, int node) -{ - return __vmalloc_node_range(size, SHMLBA, VMALLOC_START, VMALLOC_END, - GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT, PAGE_KERNEL, - VM_USERMAP, node, - __builtin_return_address(0)); -} -EXPORT_SYMBOL(vzalloc_user_account); - -/** - * vzalloc_hugepage_user_account - allocate virtually contiguous hugetlb memory - * for userspace - * @size: allocation size - * @node: NUMA node id - * - * Allocate enough huge pages to cover @size and map them into - * contiguous kernel virtual space. The resulting memory area - * is zeroed so it can be mapped to userspace without leaking data. - * - * The allocation size is aligned to PMD_SIZE automatically - * - * Compare to vmalloc_hugepage_user(), this is a customized function because - * __GFP_ACCOUNT is used to limit memory usage. - */ -void *vzalloc_hugepage_user_account(unsigned long size, int node) -{ - /* PMD hugepage aligned */ - size = PMD_ALIGN(size); - - return __vmalloc_node_range(size, PMD_SIZE, VMALLOC_START, VMALLOC_END, - GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT, PAGE_KERNEL, - VM_HUGE_PAGES | VM_USERMAP, node, - __builtin_return_address(0)); -} -EXPORT_SYMBOL(vzalloc_hugepage_user_account); - int enable_ascend_share_pool;
static int __init enable_share_pool(char *s) { enable_ascend_share_pool = 1; - vmap_allow_huge = true;
pr_info("Ascend enable share pool features\n");
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 36f0e6d94a957..c3df82a8ee422 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -43,7 +43,7 @@ #include "internal.h"
#ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC -bool __ro_after_init vmap_allow_huge; +bool __ro_after_init vmap_allow_huge = true;
static int __init set_nohugevmalloc(char *str) { @@ -2574,9 +2574,6 @@ void *vmap_hugepage(struct page **pages, unsigned int count, } EXPORT_SYMBOL(vmap_hugepage);
-static void *__vmalloc_node(unsigned long size, unsigned long align, - gfp_t gfp_mask, pgprot_t prot, - int node, const void *caller); static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot, unsigned int page_shift, int node) { @@ -2599,7 +2596,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, /* Please note that the recursion is strictly bounded. */ if (array_size > PAGE_SIZE) { pages = __vmalloc_node(array_size, 1, nested_gfp|highmem_mask, - PAGE_KERNEL, node, area->caller); + PAGE_KERNEL, 0, node, area->caller); } else { pages = kmalloc_node(array_size, nested_gfp, node); } @@ -2732,6 +2729,7 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, * @size: allocation size * @align: desired alignment * @gfp_mask: flags for the page level allocator + * @vm_flags: flags for vm_struct * @prot: protection mask for the allocated pages * @node: node to use for allocation or NUMA_NO_NODE * @caller: caller's return address @@ -2747,17 +2745,18 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, * with mm people. * */ -static void *__vmalloc_node(unsigned long size, unsigned long align, - gfp_t gfp_mask, pgprot_t prot, +void *__vmalloc_node(unsigned long size, unsigned long align, + gfp_t gfp_mask, pgprot_t prot, unsigned long vm_flags, int node, const void *caller) { return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END, - gfp_mask, prot, 0, node, caller); + gfp_mask, prot, vm_flags, node, caller); } +EXPORT_SYMBOL(__vmalloc_node);
void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot) { - return __vmalloc_node(size, 1, gfp_mask, prot, NUMA_NO_NODE, + return __vmalloc_node(size, 1, gfp_mask, prot, 0, NUMA_NO_NODE, __builtin_return_address(0)); } EXPORT_SYMBOL(__vmalloc); @@ -2765,7 +2764,7 @@ EXPORT_SYMBOL(__vmalloc); static inline void *__vmalloc_node_flags(unsigned long size, int node, gfp_t flags) { - return __vmalloc_node(size, 1, flags, PAGE_KERNEL, + return __vmalloc_node(size, 1, flags, PAGE_KERNEL, 0, node, __builtin_return_address(0)); }
@@ -2773,7 +2772,7 @@ static inline void *__vmalloc_node_flags(unsigned long size, void *__vmalloc_node_flags_caller(unsigned long size, int node, gfp_t flags, void *caller) { - return __vmalloc_node(size, 1, flags, PAGE_KERNEL, node, caller); + return __vmalloc_node(size, 1, flags, PAGE_KERNEL, 0, node, caller); }
/** @@ -2823,7 +2822,7 @@ void *vmalloc_user(unsigned long size)
ret = __vmalloc_node(size, SHMLBA, GFP_KERNEL | __GFP_ZERO, - PAGE_KERNEL, NUMA_NO_NODE, + PAGE_KERNEL, 0, NUMA_NO_NODE, __builtin_return_address(0)); if (ret) { area = find_vm_area(ret); @@ -2846,7 +2845,7 @@ EXPORT_SYMBOL(vmalloc_user); */ void *vmalloc_node(unsigned long size, int node) { - return __vmalloc_node(size, 1, GFP_KERNEL, PAGE_KERNEL, + return __vmalloc_node(size, 1, GFP_KERNEL, PAGE_KERNEL, 0, node, __builtin_return_address(0)); } EXPORT_SYMBOL(vmalloc_node); @@ -2884,7 +2883,7 @@ EXPORT_SYMBOL(vzalloc_node);
void *vmalloc_exec(unsigned long size) { - return __vmalloc_node(size, 1, GFP_KERNEL, PAGE_KERNEL_EXEC, + return __vmalloc_node(size, 1, GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, __builtin_return_address(0)); }
@@ -2909,7 +2908,7 @@ void *vmalloc_exec(unsigned long size) */ void *vmalloc_32(unsigned long size) { - return __vmalloc_node(size, 1, GFP_VMALLOC32, PAGE_KERNEL, + return __vmalloc_node(size, 1, GFP_VMALLOC32, PAGE_KERNEL, 0, NUMA_NO_NODE, __builtin_return_address(0)); } EXPORT_SYMBOL(vmalloc_32); @@ -2927,7 +2926,7 @@ void *vmalloc_32_user(unsigned long size) void *ret;
ret = __vmalloc_node(size, 1, GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL, - NUMA_NO_NODE, __builtin_return_address(0)); + 0, NUMA_NO_NODE, __builtin_return_address(0)); if (ret) { area = find_vm_area(ret); area->flags |= VM_USERMAP; @@ -2936,6 +2935,49 @@ void *vmalloc_32_user(unsigned long size) } EXPORT_SYMBOL(vmalloc_32_user);
+#ifdef CONFIG_ASCEND_SHARE_POOL +/** + * vmalloc_hugepage - allocate virtually contiguous hugetlb memory + * @size: allocation size + * + * Allocate enough huge pages to cover @size and map them into + * contiguous kernel virtual space. + * + * The allocation size is aligned to PMD_SIZE automatically + */ +void *vmalloc_hugepage(unsigned long size) +{ + /* PMD hugepage aligned */ + size = PMD_ALIGN(size); + + return __vmalloc_node(size, 1, GFP_KERNEL, PAGE_KERNEL, + VM_HUGE_PAGES, NUMA_NO_NODE, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(vmalloc_hugepage); + +/** + * vmalloc_hugepage_user - allocate virtually contiguous hugetlb memory + * for userspace + * @size: allocation size + * + * Allocate enough huge pages to cover @size and map them into + * contiguous kernel virtual space. The resulting memory area + * is zeroed so it can be mapped to userspace without leaking data. + * + * The allocation size is aligned to PMD_SIZE automatically + */ +void *vmalloc_hugepage_user(unsigned long size) +{ + /* PMD hugepage aligned */ + size = PMD_ALIGN(size); + + return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL, + VM_HUGE_PAGES | VM_USERMAP, NUMA_NO_NODE, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(vmalloc_hugepage_user); +#endif
/* * small helper routine , copy contents to buf from addr.
From: Fang Lijun fanglijun3@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
Signed-off-by: Fang Lijun fanglijun3@huawei.com Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/share_pool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 8b18648414344..c6dcae92f67b5 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -80,7 +80,7 @@ static int __read_mostly enable_mdc_default_group; static const int mdc_default_group_id = 1;
/* share the uva to the whole group */ -static int __read_mostly enable_share_k2u_spg; +static int __read_mostly enable_share_k2u_spg = 1;
/* access control mode */ int sysctl_ac_mode = AC_NONE;
From: Zhou Guanghui zhouguanghui1@huawei.com
ascend inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
----------------------------------------------------------- Solving the Problem that the 4G DVPP Address with Shared Pool coexist.
Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/linux/share_pool.h | 11 ++++++++--- mm/mmap.c | 6 +++--- 2 files changed, 11 insertions(+), 6 deletions(-)
diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index 442ee1ed5fa65..cd4c305449dd9 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -281,9 +281,13 @@ static inline bool sp_check_vm_huge_page(unsigned long flags) return false; }
-static inline void sp_area_work_around(struct vm_unmapped_area_info *info) +static inline void sp_area_work_around(struct vm_unmapped_area_info *info, + unsigned long flags) { - if (enable_ascend_share_pool) + /* the MAP_DVPP couldn't work with MAP_SHARE_POOL. In addition, the + * address ranges corresponding to the two flags must not overlap. + */ + if (enable_ascend_share_pool && !(flags & MAP_DVPP)) info->high_limit = min(info->high_limit, MMAP_SHARE_POOL_START); }
@@ -554,7 +558,8 @@ static inline bool sp_check_vm_huge_page(unsigned long flags) return false; }
-static inline void sp_area_work_around(struct vm_unmapped_area_info *info) +static inline void sp_area_work_around(struct vm_unmapped_area_info *info, + unsigned long flags) { }
diff --git a/mm/mmap.c b/mm/mmap.c index 4c8092b2f26fd..e529384b8ac62 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2380,7 +2380,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, if (enable_mmap_dvpp) dvpp_mmap_get_area(&info, flags);
- sp_area_work_around(&info); + sp_area_work_around(&info, flags);
return vm_unmapped_area(&info); } @@ -2435,7 +2435,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, if (enable_mmap_dvpp) dvpp_mmap_get_area(&info, flags);
- sp_area_work_around(&info); + sp_area_work_around(&info, flags);
addr = vm_unmapped_area(&info);
@@ -2454,7 +2454,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, if (enable_mmap_dvpp) dvpp_mmap_get_area(&info, flags);
- sp_area_work_around(&info); + sp_area_work_around(&info, flags);
addr = vm_unmapped_area(&info); }
From: Zhou Guanghui zhouguanghui1@huawei.com
ascend inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
--------------------------------------------------
The user needs the process pid, that is, task tgid.
Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/char/svm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/char/svm.c b/drivers/char/svm.c index 9c2965f6d4636..7289750dedd25 100644 --- a/drivers/char/svm.c +++ b/drivers/char/svm.c @@ -348,7 +348,7 @@ static void svm_set_slot_valid(unsigned long index, unsigned long phys)
*((unsigned long *)slot->data) = phys; slot->image_word = SVM_IMAGE_WORD_VALID; - slot->pid = current->pid; + slot->pid = current->tgid; slot->data_type = SVM_VA2PA_TYPE_DMA; __bitmap_set(va2pa_trunk.bitmap, index, 1); va2pa_trunk.slot_used++;