From: Ding Tianhong dingtianhong@huawei.com
ascend inclusion category: feature bugzilla: NA CVE: NA
-------------------------------------------------
The share pool is used widely for several accelerator, and it is difficult to debug the user problem, so add debug mode to analyse the problem, this mode is enabled by the sysctl_sp_debug_mode flag.
Some functions have been refactored to protect the critical area correctly, and output message more clearly.
Signed-off-by: Tang Yizhou tangyizhou@huawei.com Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com Signed-off-by: Wu Peng wupeng58@huawei.com Signed-off-by: Ding Tianhong dingtianhong@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/linux/share_pool.h | 15 +- kernel/sysctl.c | 9 + mm/share_pool.c | 537 +++++++++++++++++++++---------------- 3 files changed, 326 insertions(+), 235 deletions(-)
diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index 09afbae33d41..2557ef138122 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -5,6 +5,7 @@ #include <linux/mm_types.h> #include <linux/notifier.h> #include <linux/vmalloc.h> +#include <linux/printk.h>
#define SP_HUGEPAGE (1 << 0) #define SP_HUGEPAGE_ONLY (1 << 1) @@ -35,6 +36,8 @@ extern int sysctl_share_pool_hugepage_enable;
extern int sysctl_ac_mode;
+extern int sysctl_sp_debug_mode; + extern int enable_ascend_share_pool;
/* Processes in the same sp_group can share memory. @@ -70,7 +73,7 @@ struct sp_group { /* number of sp_area */ atomic_t spa_num; /* total size of all sp_area from sp_alloc and k2u(spg) */ - atomic_t size; + atomic64_t size; /* record the number of hugepage allocation failures */ int hugepage_failures; /* is_alive == false means it's being destroyed */ @@ -211,6 +214,12 @@ static inline bool sp_mmap_check(unsigned long flags) return false; }
+static inline void sp_dump_stack(void) +{ + if (sysctl_sp_debug_mode) + dump_stack(); +} + #else
static inline int sp_group_add_task(int pid, int spg_id) @@ -349,6 +358,10 @@ static inline bool sp_mmap_check(unsigned long flags) { return false; } + +static inline void sp_dump_stack(void) +{ +} #endif
#endif /* LINUX_SHARE_POOL_H */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 61e62f1ccee4..26c215fb37dc 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1737,6 +1737,15 @@ static struct ctl_table vm_table[] = { .extra1 = &zero, .extra2 = &one, }, + { + .procname = "sharepool_debug_mode", + .data = &sysctl_sp_debug_mode, + .maxlen = sizeof(sysctl_sp_debug_mode), + .mode = 0600, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, + }, #endif { } }; diff --git a/mm/share_pool.c b/mm/share_pool.c index fcbc831f7f8c..24c5dd680451 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -57,6 +57,8 @@ static const int mdc_default_group_id = 1;
/* access control mode */ int sysctl_ac_mode = AC_NONE; +/* debug mode */ +int sysctl_sp_debug_mode;
/* idr of all sp_groups */ static DEFINE_IDR(sp_group_idr); @@ -85,9 +87,11 @@ struct sp_proc_stat { /* for kthread buff_module_guard_work */ static struct sp_proc_stat kthread_stat = {0};
-/* The caller must hold sp_mutex. */ -static struct sp_proc_stat *sp_init_proc_stat(struct task_struct *tsk) -{ +/* + * The caller must hold sp_mutex and ensure no concurrency problem + * for task_struct and mm_struct. + */ +static struct sp_proc_stat *sp_init_proc_stat(struct task_struct *tsk) { struct sp_proc_stat *stat; int id = tsk->mm->sp_stat_id; int tgid = tsk->tgid; @@ -138,7 +142,7 @@ static struct sp_spa_stat spa_stat = {0}; /* statistics of all sp group born from sp_alloc and k2u(spg) */ struct sp_spg_stat { atomic_t spa_total_num; - atomic_t spa_total_size; + atomic64_t spa_total_size; };
static struct sp_spg_stat spg_stat = {0}; @@ -166,10 +170,11 @@ struct sp_area { struct list_head link; /* link to the spg->head */ struct sp_group *spg; enum spa_type type; /* where spa born from */ + struct mm_struct *mm; /* owner of k2u(task) */ }; static DEFINE_SPINLOCK(sp_area_lock); static struct rb_root sp_area_root = RB_ROOT; -bool host_svm_sp_enable = false; +static bool host_svm_sp_enable = false;
int sysctl_share_pool_hugepage_enable = 1;
@@ -241,7 +246,7 @@ static int spa_dec_usage(enum spa_type type, unsigned long size) return 0; }
-static void *sp_mmap(struct mm_struct *mm, struct file *file, +static unsigned long sp_mmap(struct mm_struct *mm, struct file *file, struct sp_area *spa, unsigned long *populate);
static void free_sp_group(struct sp_group *spg) @@ -274,7 +279,18 @@ static struct sp_group *__sp_find_spg(int pid, int spg_id) if (ret) return NULL;
- spg = tsk->mm->sp_group; + /* + * Once we encounter a concurrency problem here. + * To fix it, we believe get_task_mm() and mmput() is too + * heavy because we just get the pointer of sp_group. + */ + task_lock(tsk); + if (tsk->mm == NULL) + spg = NULL; + else + spg = tsk->mm->sp_group; + task_unlock(tsk); + put_task_struct(tsk); } else { spg = idr_find(&sp_group_idr, spg_id); @@ -318,7 +334,7 @@ static struct sp_group *find_or_alloc_sp_group(int spg_id) } spg->id = spg_id; atomic_set(&spg->spa_num, 0); - atomic_set(&spg->size, 0); + atomic64_set(&spg->size, 0); spg->is_alive = true; spg->hugepage_failures = 0; spg->dvpp_multi_spaces = false; @@ -377,9 +393,6 @@ static void sp_munmap_task_areas(struct mm_struct *mm, struct list_head *stop) struct sp_area *spa, *prev = NULL; int err;
- if (!mmget_not_zero(mm)) - return; - down_write(&mm->mmap_sem); spin_lock(&sp_area_lock);
list_for_each_entry(spa, &mm->sp_group->spa_list, link) { @@ -406,8 +419,17 @@ static void sp_munmap_task_areas(struct mm_struct *mm, struct list_head *stop) __sp_area_drop_locked(prev);
spin_unlock(&sp_area_lock); - up_write(&mm->mmap_sem); - mmput(mm); +} + +/* The caller must hold sp_mutex. */ +static void __sp_group_drop_locked(struct sp_group *spg) +{ + bool is_alive = spg->is_alive; + + if (atomic_dec_and_test(&spg->use_count)) { + BUG_ON(is_alive); + free_sp_group(spg); + } }
/** @@ -446,8 +468,9 @@ int sp_group_add_task(int pid, int spg_id) spg = idr_find(&sp_group_idr, spg_id); if (!spg_valid(spg)) { mutex_unlock(&sp_mutex); - pr_err("share pool: task add group failed because group id %d hasn't been create or dead\n", - spg_id); + if (printk_ratelimit()) + pr_err("share pool: task add group failed because group id %d " + "hasn't been create or dead\n", spg_id); return -EINVAL; } mutex_unlock(&sp_mutex); @@ -457,7 +480,9 @@ int sp_group_add_task(int pid, int spg_id) spg_id = ida_alloc_range(&sp_group_id_ida, SPG_ID_AUTO_MIN, SPG_ID_AUTO_MAX, GFP_ATOMIC); if (spg_id < 0) { - pr_err("share pool: task add group failed when automatically generate group id failed\n"); + if (printk_ratelimit()) + pr_err("share pool: task add group failed when automatically " + "generate group id failed\n"); return spg_id; } } @@ -467,8 +492,9 @@ int sp_group_add_task(int pid, int spg_id) SPG_ID_DVPP_PASS_THROUGH_MIN, SPG_ID_DVPP_PASS_THROUGH_MAX, GFP_ATOMIC); if (spg_id < 0) { - pr_err("share pool: task add group failed when automatically generate group id failed" - "in DVPP pass through\n"); + if (printk_ratelimit()) + pr_err("share pool: task add group failed when automatically " + "generate group id failed in DVPP pass through\n"); return spg_id; } } @@ -494,25 +520,31 @@ int sp_group_add_task(int pid, int spg_id) ret = PTR_ERR(spg); goto out_put_task; } + atomic_inc(&spg->use_count); + /* access control permission check */ if (sysctl_ac_mode == AC_SINGLE_OWNER) { if (spg->owner != current->group_leader) { ret = -EPERM; - goto out_put_task; + goto out_drop_group; } }
+ mm = get_task_mm(tsk); + if (!mm) { + ret = -ESRCH; + goto out_drop_group; + } + /* per process statistics initialization */ stat = sp_init_proc_stat(tsk); if (IS_ERR(stat)) { ret = PTR_ERR(stat); pr_err("share pool: init proc stat failed, ret %lx\n", PTR_ERR(stat)); - goto out_put_task; + goto out_put_mm; }
- mm = tsk->mm; mm->sp_group = spg; - atomic_inc(&spg->use_count); list_add_tail(&tsk->mm->sp_node, &spg->procs); /* * create mappings of existing shared memory segments into this @@ -523,7 +555,7 @@ int sp_group_add_task(int pid, int spg_id) list_for_each_entry(spa, &spg->spa_list, link) { unsigned long populate = 0; struct file *file = spa_file(spa); - void *p; + unsigned long addr;
if (prev) __sp_area_drop_locked(prev); @@ -532,28 +564,24 @@ int sp_group_add_task(int pid, int spg_id) atomic_inc(&spa->use_count); spin_unlock(&sp_area_lock);
- p = sp_mmap(mm, file, spa, &populate); - if (IS_ERR(p) && (PTR_ERR(p) != -ESPGMMEXIT)) { + down_write(&mm->mmap_sem); + addr = sp_mmap(mm, file, spa, &populate); + if (IS_ERR_VALUE(addr)) { sp_munmap_task_areas(mm, &spa->link); - ret = PTR_ERR(p); + up_write(&mm->mmap_sem); + ret = addr; pr_err("share pool: task add group sp mmap failed, ret %d\n", ret); spin_lock(&sp_area_lock); break; } - - if (PTR_ERR(p) == -ESPGMMEXIT) { - pr_err("share pool: task add group sp mmap failed, ret -ESPGMEXIT\n"); - spin_lock(&sp_area_lock); - ret = -ESPGMMEXIT; - break; - } + up_write(&mm->mmap_sem);
if (populate) { ret = do_mm_populate(mm, spa->va_start, populate, 0); if (ret) { if (printk_ratelimit()) - pr_err("share pool: task add group failed when mm populate failed: %d\n", - ret); + pr_warn("share pool: task add group failed when mm populate " + "failed (potential no enough memory): %d\n", ret); sp_munmap_task_areas(mm, spa->link.next); } } @@ -567,8 +595,16 @@ int sp_group_add_task(int pid, int spg_id) if (unlikely(ret)) { idr_remove(&sp_stat_idr, mm->sp_stat_id); kfree(stat); + mm->sp_stat_id = 0; + list_del(&mm->sp_node); + mm->sp_group = NULL; }
+out_put_mm: + mmput(mm); +out_drop_group: + if (unlikely(ret)) + __sp_group_drop_locked(spg); out_put_task: put_task_struct(tsk); out_unlock: @@ -609,9 +645,6 @@ void sp_group_exit(struct mm_struct *mm) bool is_alive = true; bool unlock;
- if (!enable_ascend_share_pool) - return; - /* * Nothing to do if this thread group doesn't belong to any sp_group. * No need to protect this check with lock because we can add a task @@ -638,18 +671,13 @@ void sp_group_exit(struct mm_struct *mm)
void sp_group_post_exit(struct mm_struct *mm) { - bool is_alive; struct sp_proc_stat *stat; bool unlock;
- if (!enable_ascend_share_pool) - return; - if (!mm->sp_group) return;
spg_exit_lock(&unlock); - is_alive = mm->sp_group->is_alive;
/* pointer stat must be valid, we don't need to check sanity */ stat = idr_find(&sp_stat_idr, mm->sp_stat_id); @@ -673,10 +701,7 @@ void sp_group_post_exit(struct mm_struct *mm)
idr_remove(&sp_stat_idr, mm->sp_stat_id);
- if (atomic_dec_and_test(&mm->sp_group->use_count)) { - BUG_ON(is_alive); - free_sp_group(mm->sp_group); - } + __sp_group_drop_locked(mm->sp_group); spg_exit_unlock(unlock);
kfree(stat); @@ -716,7 +741,7 @@ static void __insert_sp_area(struct sp_area *spa) static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, struct sp_group *spg, enum spa_type type) { - struct sp_area *spa; + struct sp_area *spa, *err; struct rb_node *n; unsigned long vstart = MMAP_SHARE_POOL_START; unsigned long vend = MMAP_SHARE_POOL_16G_START; @@ -728,6 +753,11 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, vstart = MMAP_SHARE_POOL_16G_START; vend = MMAP_SHARE_POOL_16G_START + MMAP_SHARE_POOL_16G_SIZE; } else { + if (!spg) { + if (printk_ratelimit()) + pr_err("share pool: don't allow k2u(task) in host svm multiprocess scene\n"); + return ERR_PTR(-EINVAL); + } vstart = spg->dvpp_va_start; vend = spg->dvpp_va_start + spg->dvpp_size; } @@ -735,14 +765,11 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags,
addr = vstart;
- if (!sysctl_share_pool_hugepage_enable) - flags &= ~(SP_HUGEPAGE_ONLY | SP_HUGEPAGE); - spa = kmalloc(sizeof(struct sp_area), GFP_KERNEL); if (unlikely(!spa)) { if (printk_ratelimit()) pr_err("share pool: alloc spa failed due to lack of memory\n"); - return NULL; + return ERR_PTR(-ENOMEM); }
spin_lock(&sp_area_lock); @@ -788,6 +815,7 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, } found: if (addr + size_align > vend) { + err = ERR_PTR(-EOVERFLOW); goto error; }
@@ -799,15 +827,17 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, atomic_set(&spa->use_count, 1); spa->type = type;
- if (spa_inc_usage(type, size)) + if (spa_inc_usage(type, size)) { + err = ERR_PTR(-EINVAL); goto error; + }
__insert_sp_area(spa); if (spa->spg) { atomic_inc(&spg->spa_num); - atomic_add(size, &spg->size); + atomic64_add(size, &spg->size); atomic_inc(&spg_stat.spa_total_num); - atomic_add(size, &spg_stat.spa_total_size); + atomic64_add(size, &spg_stat.spa_total_size); list_add_tail(&spa->link, &spg->spa_list); } spin_unlock(&sp_area_lock); @@ -817,7 +847,7 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, error: spin_unlock(&sp_area_lock); kfree(spa); - return NULL; + return err; }
/* the caller should hold sp_area_lock */ @@ -862,9 +892,9 @@ static void sp_free_area(struct sp_area *spa) spa_dec_usage(spa->type, spa->real_size); /* won't fail */ if (spa->spg) { atomic_dec(&spa->spg->spa_num); - atomic_sub(spa->real_size, &spa->spg->size); + atomic64_sub(spa->real_size, &spa->spg->size); atomic_dec(&spg_stat.spa_total_num); - atomic_sub(spa->real_size, &spg_stat.spa_total_size); + atomic64_sub(spa->real_size, &spg_stat.spa_total_size); list_del(&spa->link); } rb_erase(&spa->rb_node, &sp_area_root); @@ -898,7 +928,7 @@ void sp_area_drop(struct vm_area_struct *vma) { struct sp_area *spa;
- if (!sp_check_vm_share_pool(vma->vm_flags)) + if (!(vma->vm_flags & VM_SHARE_POOL)) return;
/* @@ -979,13 +1009,25 @@ int sp_free(unsigned long addr) } else { /* spa == NULL */ ret = -EINVAL; if (printk_ratelimit()) - pr_err("share pool: sp_free invalid input addr %pK\n", (void *)addr); + pr_err("share pool: sp free invalid input addr %pK\n", (void *)addr); goto out; }
+ if (spa->type != SPA_TYPE_ALLOC) { + if (printk_ratelimit()) + pr_err("share pool: sp free failed, addr %pK is not from sp_alloc\n", + (void *)addr); + } + if (!spg_valid(spa->spg)) goto drop_spa;
+ pr_notice("share pool: [sp free] caller %s(%d/%d); " + "group id %d addr 0x%pK, size %ld\n", + current->comm, current->tgid, current->pid, spa->spg->id, + (void *)spa->va_start, spa->real_size); + sp_dump_stack(); + __sp_free(spa->spg, spa->va_start, spa_size(spa), NULL);
/* Free the memory of the backing shmem or hugetlbfs */ @@ -993,7 +1035,7 @@ int sp_free(unsigned long addr) offset = addr - MMAP_SHARE_POOL_START; ret = vfs_fallocate(spa_file(spa), mode, offset, spa_size(spa)); if (ret) - pr_err("share pool: fallocate failed: %d\n", ret); + pr_err("share pool: sp free fallocate failed: %d\n", ret);
/* pointer stat may be invalid because of kthread buff_module_guard_work */ if (current->mm == NULL) { @@ -1016,7 +1058,7 @@ int sp_free(unsigned long addr) EXPORT_SYMBOL_GPL(sp_free);
/* wrapper of __do_mmap() and the caller must hold down_write(&mm->mmap_sem). */ -static unsigned long __sp_mmap(struct mm_struct *mm, struct file *file, +static unsigned long sp_mmap(struct mm_struct *mm, struct file *file, struct sp_area *spa, unsigned long *populate) { unsigned long addr = spa->va_start; @@ -1033,30 +1075,13 @@ static unsigned long __sp_mmap(struct mm_struct *mm, struct file *file, if (IS_ERR_VALUE(addr)) { atomic_dec(&spa->use_count); pr_err("share pool: do_mmap fails %ld\n", addr); + } else { + BUG_ON(addr != spa->va_start); }
return addr; }
-static void *sp_mmap(struct mm_struct *mm, struct file *file, - struct sp_area *spa, unsigned long *populate) -{ - unsigned long addr; - - if (!mmget_not_zero(mm)) - return ERR_PTR(-ESPGMMEXIT); - down_write(&mm->mmap_sem); - addr = __sp_mmap(mm, file, spa, populate); - up_write(&mm->mmap_sem); - mmput(mm); - - if (IS_ERR_VALUE(addr)) - return ERR_PTR(addr); - - BUG_ON(addr != spa->va_start); - return (void *)addr; -} - /** * Allocate shared memory for all the processes in the same sp_group * size - the size of memory to allocate @@ -1071,12 +1096,14 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) struct sp_area *spa = NULL; struct sp_proc_stat *stat; unsigned long sp_addr; - void *p_mmap, *p = ERR_PTR(-ENODEV); + unsigned long mmap_addr; + void *p = ERR_PTR(-ENODEV); struct mm_struct *mm; struct file *file; unsigned long size_aligned; int ret = 0; struct mm_struct *tmp; + unsigned long mode, offset;
/* mdc scene hack */ if (enable_mdc_default_group) @@ -1133,9 +1160,6 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) goto out; }
- if (!sysctl_share_pool_hugepage_enable) - sp_flags &= ~(SP_HUGEPAGE_ONLY | SP_HUGEPAGE); - if (sp_flags & SP_HUGEPAGE) { file = spg->file_hugetlb; size_aligned = ALIGN(size, PMD_SIZE); @@ -1145,10 +1169,12 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) } try_again: spa = sp_alloc_area(size_aligned, sp_flags, spg, SPA_TYPE_ALLOC); - if (!spa) { + if (IS_ERR(spa)) { if (printk_ratelimit()) - pr_err("share pool: allocation failed due to alloc spa failure\n"); - p = ERR_PTR(-ENOMEM); + pr_err("share pool: allocation failed due to alloc spa failure " + "(potential no enough virtual memory when -75): %ld\n", + PTR_ERR(spa)); + p = spa; goto out; } sp_addr = spa->va_start; @@ -1158,33 +1184,34 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) unsigned long populate = 0; struct vm_area_struct *vma;
- p_mmap = sp_mmap(mm, file, spa, &populate); - if (IS_ERR(p_mmap) && (PTR_ERR(p_mmap) != -ESPGMMEXIT)) { - p = p_mmap; + if (!mmget_not_zero(mm)) + continue; + + down_write(&mm->mmap_sem); + mmap_addr = sp_mmap(mm, file, spa, &populate); + if (IS_ERR_VALUE(mmap_addr)) { + up_write(&mm->mmap_sem); + p = (void *)mmap_addr; __sp_free(spg, sp_addr, size_aligned, mm); - pr_err("share pool: allocation sp mmap failed, ret %ld\n", PTR_ERR(p_mmap)); - break; + mmput(mm); + pr_err("share pool: allocation sp mmap failed, ret %ld\n", mmap_addr); + goto out; }
- if (PTR_ERR(p_mmap) == -ESPGMMEXIT) { - pr_info("share pool: allocation sp mmap failed, ret -ESPGMMEXIT\n"); + p =(void *)mmap_addr; /* success */ + if (populate == 0) { + up_write(&mm->mmap_sem); + mmput(mm); continue; }
- p = p_mmap; /* success */ - if (populate == 0) - continue; - - if (!mmget_not_zero(mm)) - continue; - down_write(&mm->mmap_sem); vma = find_vma(mm, sp_addr); if (unlikely(!vma)) { + up_write(&mm->mmap_sem); + mmput(mm); pr_err("share pool: allocation failed due to find %pK vma failure\n", (void *)sp_addr); p = ERR_PTR(-EINVAL); - up_write(&mm->mmap_sem); - mmput(mm); goto out; } /* clean PTE_RDONLY flags or trigger SMMU event */ @@ -1216,9 +1243,17 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) }
if (printk_ratelimit()) - pr_err("share pool: allocation failed due to mm populate failed: %d\n", - ret); + pr_warn("share pool: allocation failed due to mm populate failed" + "(potential no enough memory when -12): %d\n", ret); p = ERR_PTR(ret); + __sp_area_drop(spa); + + mode = FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE; + offset = sp_addr - MMAP_SHARE_POOL_START; + ret = vfs_fallocate(spa_file(spa), mode, offset, spa_size(spa)); + if (ret) + pr_err("share pool: fallocate failed %d\n", ret); + mmput(mm); break; } @@ -1235,24 +1270,20 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) mutex_unlock(&sp_mutex);
/* this will free spa if mmap failed */ - if (spa) + if (spa && !IS_ERR(spa)) __sp_area_drop(spa);
+ if (!IS_ERR(p)) { + pr_notice("share pool: [sp alloc] caller %s(%d/%d); group id %d; " + "return addr 0x%pK, size %ld\n", + current->comm, current->tgid, current->pid, spa->spg->id, + (void *)spa->va_start, spa->real_size); + sp_dump_stack(); + } return p; } EXPORT_SYMBOL_GPL(sp_alloc);
-static unsigned long __sp_remap_get_pfn(unsigned long kva) -{ - unsigned long pfn; - if (is_vmalloc_addr((void *)kva)) - pfn = vmalloc_to_pfn((void *)kva); - else - pfn = virt_to_pfn(kva); - - return pfn; -} - /* * return value: >0 means this is a hugepage addr * =0 means a normal addr. <0 means an errno. @@ -1286,7 +1317,6 @@ static unsigned long sp_remap_kva_to_vma(unsigned long kva, struct sp_area *spa, struct vm_area_struct *vma; unsigned long ret_addr; unsigned long populate = 0; - unsigned long addr, buf, offset; struct file *file = NULL; int ret = 0; struct user_struct *user = NULL; @@ -1307,7 +1337,7 @@ static unsigned long sp_remap_kva_to_vma(unsigned long kva, struct sp_area *spa, } down_write(&mm->mmap_sem);
- ret_addr = __sp_mmap(mm, file, spa, &populate); + ret_addr = sp_mmap(mm, file, spa, &populate); if (IS_ERR_VALUE(ret_addr)) { pr_err("share pool: k2u mmap failed %lx\n", ret_addr); goto out; @@ -1326,20 +1356,12 @@ static unsigned long sp_remap_kva_to_vma(unsigned long kva, struct sp_area *spa, goto out; } } else { - buf = ret_addr; - addr = kva; - offset = 0; - do { - ret = remap_pfn_range(vma, buf, __sp_remap_get_pfn(addr), PAGE_SIZE, - __pgprot(vma->vm_page_prot.pgprot)); - if (ret) { - ret_addr = ret; - goto out; - } - offset += PAGE_SIZE; - buf += PAGE_SIZE; - addr += PAGE_SIZE; - } while (offset < spa_size(spa)); + ret = remap_vmalloc_range(vma, (void *)kva, 0); + if (ret) { + pr_err("share pool: remap vmalloc failed, ret %d\n", ret); + ret_addr = ret; + goto out; + } }
out: @@ -1380,6 +1402,13 @@ static void *sp_make_share_kva_to_task(unsigned long kva, struct sp_area *spa, }
p = (void *)ret_addr; + + task_lock(tsk); + if (tsk->mm == NULL) + p = ERR_PTR(-ESRCH); + else + spa->mm = tsk->mm; + task_unlock(tsk); out: put_task_struct(tsk); return p; @@ -1438,6 +1467,7 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size, unsigned long kva_aligned; unsigned long size_aligned; unsigned int page_size = PAGE_SIZE; + enum spa_type type; int ret;
if (sp_flags & ~SP_DVPP) { @@ -1453,6 +1483,7 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size, } else if (ret == 0) { /* do nothing */ } else { + pr_err("it is not vmalloc address\n"); return ERR_PTR(ret); } /* aligned down kva is convenient for caller to start with any valid kva */ @@ -1460,24 +1491,42 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size, size_aligned = ALIGN(kva + size, page_size) - kva_aligned;
mutex_lock(&sp_mutex); - spg = __sp_find_spg(pid, spg_id); + spg = __sp_find_spg(pid, SPG_ID_DEFAULT); if (spg == NULL) { - spa = sp_alloc_area(size_aligned, sp_flags, NULL, SPA_TYPE_K2TASK); - if (!spa) { + type = SPA_TYPE_K2TASK; + if (spg_id != SPG_ID_NONE && spg_id != SPG_ID_DEFAULT) { mutex_unlock(&sp_mutex); if (printk_ratelimit()) - pr_err("share pool: k2u failed due to alloc spa failure\n"); - return ERR_PTR(-ENOMEM); + pr_err("share pool: k2task invalid spg id %d\n", spg_id); + return ERR_PTR(-EINVAL); + } + spa = sp_alloc_area(size_aligned, sp_flags, NULL, type); + if (IS_ERR(spa)) { + mutex_unlock(&sp_mutex); + if (printk_ratelimit()) + pr_err("share pool: k2u(task) failed due to alloc spa failure " + "(potential no enough virtual memory when -75): %ld\n", + PTR_ERR(spa)); + return spa; } uva = sp_make_share_kva_to_task(kva_aligned, spa, pid); mutex_unlock(&sp_mutex); } else if (spg_valid(spg)) { - spa = sp_alloc_area(size_aligned, sp_flags, spg, SPA_TYPE_K2SPG); - if (!spa) { + type = SPA_TYPE_K2SPG; + if (spg_id != SPG_ID_DEFAULT && spg_id != spg->id) { mutex_unlock(&sp_mutex); if (printk_ratelimit()) - pr_err("share pool: k2u failed due to alloc spa failure\n"); - return ERR_PTR(-ENOMEM); + pr_err("share pool: k2spg invalid spg id %d\n", spg_id); + return ERR_PTR(-EINVAL); + } + spa = sp_alloc_area(size_aligned, sp_flags, spg, type); + if (IS_ERR(spa)) { + mutex_unlock(&sp_mutex); + if (printk_ratelimit()) + pr_err("share pool: k2u(spg) failed due to alloc spa failure " + "(potential no enough virtual memory when -75): %ld\n", + PTR_ERR(spa)); + return spa; }
uva = sp_make_share_kva_to_spg(kva_aligned, spa, spg); @@ -1492,6 +1541,17 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size, uva = uva + (kva - kva_aligned);
__sp_area_drop(spa); + + if (!IS_ERR(uva)) { + if (spg_valid(spa->spg)) + spg_id = spa->spg->id; + pr_notice("share pool: [sp k2u type %d] caller %s(%d/%d); group id %d; " + "return addr 0x%pK size %ld\n", + type, current->comm, current->tgid, current->pid, spg_id, + (void *)spa->va_start, spa->real_size); + sp_dump_stack(); + } + return uva; } EXPORT_SYMBOL_GPL(sp_make_share_k2u); @@ -1531,7 +1591,8 @@ static int sp_pte_hole(unsigned long start, unsigned long end, struct mm_walk *walk) { if (printk_ratelimit()) - pr_err("share pool: hole [%pK, %pK) appeared unexpectedly\n", (void *)start, (void *)end); + pr_err("share pool: hole [%pK, %pK) appeared unexpectedly\n", + (void *)start, (void *)end); return -EFAULT; }
@@ -1545,7 +1606,8 @@ static int sp_hugetlb_entry(pte_t *ptep, unsigned long hmask,
if (unlikely(!pte_present(pte))) { if (printk_ratelimit()) - pr_err("share pool: the page of addr %pK unexpectedly not in RAM\n", (void *)addr); + pr_err("share pool: the page of addr %pK unexpectedly " + "not in RAM\n", (void *)addr); return -EFAULT; }
@@ -1758,6 +1820,11 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size, int pid, int sp } }
+ if (spa->type != SPA_TYPE_K2TASK && spa->type != SPA_TYPE_K2SPG) { + pr_err("share pool: this spa should not be unshare here\n"); + ret = -EINVAL; + goto out_drop_area; + } /* * 1. overflow actually won't happen due to an spa must be valid. * 2. we must unshare [spa->va_start, spa->va_start + spa->real_size) completely @@ -1771,32 +1838,57 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size, int pid, int sp if (size_aligned < ALIGN(size, page_size)) { ret = -EINVAL; if (printk_ratelimit()) - pr_err("share pool: unshare uva failed due to invalid parameter size %lu\n", size); + pr_err("share pool: unshare uva failed due to invalid parameter size %lu\n", + size); goto out_drop_area; }
- if (spg_id == SPG_ID_NONE) { - if (spa->spg) { - ret = -EINVAL; + if (spa->type == SPA_TYPE_K2TASK) { + if (spg_id != SPG_ID_NONE && spg_id != SPG_ID_DEFAULT) { if (printk_ratelimit()) - pr_err("share pool: unshare uva failed, SPG_ID_NONE is invalid\n"); + pr_err("share pool: unshare uva(to task) failed, " + "invalid spg id %d\n", spg_id); + ret = -EINVAL; goto out_drop_area; }
rcu_read_lock(); tsk = find_task_by_vpid(pid); - if (!tsk || (tsk->flags & PF_EXITING)) - ret = -ESRCH; - else - get_task_struct(tsk); - + if (!tsk || !tsk->mm || (tsk->flags & PF_EXITING)) { + if (printk_ratelimit()) + pr_info("share pool: no need to unshare uva(to task), " + "target process not found or do_exit\n"); + ret = -EINVAL; + rcu_read_unlock(); + sp_dump_stack(); + goto out_drop_area; + } + get_task_struct(tsk); rcu_read_unlock(); - if (ret) + + if (!spa->mm || + (current->mm && (current->mm != tsk->mm || tsk->mm != spa->mm))) { + if (printk_ratelimit()) + pr_err("share pool: unshare uva(to task) failed, " + "wrong pid or invalid spa\n"); + ret = -EINVAL; goto out_drop_area; + } + + if (spa->mm != tsk->mm) { + if (printk_ratelimit()) + pr_err("share pool: unshare uva(to task) failed, " + "spa not belong to the task\n"); + ret = -EINVAL; + goto out_drop_area; + }
if (!mmget_not_zero(tsk->mm)) { put_task_struct(tsk); - pr_info("share pool: no need to unshare uva, target process is exiting\n"); + if (printk_ratelimit()) + pr_info("share pool: no need to unshare uva(to task), " + "target process mm is not existing\n"); + sp_dump_stack(); goto out_drop_area; } down_write(&tsk->mm->mmap_sem); @@ -1809,32 +1901,51 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size, int pid, int sp (void *)uva_aligned); } put_task_struct(tsk); - } else { - /* - * k2u to task, then unshare_uva(..., spg_id) is invalid due to potential - * spa memory leak. - */ - if (!spa->spg) { + } else if (spa->type == SPA_TYPE_K2SPG) { + if (!spa->spg || spg_id == SPG_ID_NONE) { + if (printk_ratelimit()) + pr_err("share pool: unshare uva(to group) failed, " + "invalid spg id %d\n", spg_id); ret = -EINVAL; + goto out_drop_area; + } + + spg = __sp_find_spg(pid, SPG_ID_DEFAULT); + if (!spg_valid(spg)) { if (printk_ratelimit()) - pr_err("share pool: unshare uva failed, sp group id %d is invalid\n", spg_id); + pr_err("share pool: unshare uva(to group) invalid pid, " + "process not in sp group or group is dead\n"); + ret = -EINVAL; goto out_drop_area; }
- spg = __sp_find_spg(pid, spg_id); - if (spg_valid(spg)) { - __sp_free(spg, uva_aligned, size_aligned, NULL); - } else { - if (!spg) { - if (printk_ratelimit()) - pr_err("share pool: unshare uva failed, doesn't belong to group %d\n", - spg_id); - ret = -EINVAL; - goto out_drop_area; - } else { - pr_info("share pool: no need to unshare uva, target process is exiting\n"); - } + if (spa->spg != spg) { + if (printk_ratelimit()) + pr_err("share pool: unshare uva(to group) failed, " + "spa not belong to the group\n"); + ret = -EINVAL; + goto out_drop_area; } + + if (current->mm && current->mm->sp_group != spg) { + if (printk_ratelimit()) + pr_err("share pool: unshare uva(to group) failed, " + "caller process doesn't belong to target group\n"); + ret = -EINVAL; + goto out_drop_area; + } + + __sp_free(spg, uva_aligned, size_aligned, NULL); + } + + if (!ret) { + if (spg_valid(spa->spg)) + spg_id = spa->spg->id; + pr_notice("share pool: [sp unshare uva type %d] caller %s(%d/%d); " + "group id %d addr 0x%pK size %ld\n", + spa->type, current->comm, current->tgid, current->pid, + spg_id, (void *)spa->va_start, spa->real_size); + sp_dump_stack(); }
out_drop_area: @@ -1864,7 +1975,8 @@ static int sp_unshare_kva(unsigned long kva, unsigned long size) step = PAGE_SIZE; is_hugepage = false; } else { - pr_err("share pool: check vmap hugepage failed, ret %d\n", ret); + if (printk_ratelimit()) + pr_err("share pool: check vmap hugepage failed, ret %d\n", ret); return -EINVAL; }
@@ -1882,7 +1994,8 @@ static int sp_unshare_kva(unsigned long kva, unsigned long size) if (page) put_page(page); else - pr_err("share pool: vmalloc to hugepage failed\n"); + pr_err("share pool: vmalloc %pK to page/hugepage failed\n", + (void *)addr); }
vunmap((void *)kva_aligned); @@ -1944,7 +2057,7 @@ int sp_walk_page_range(unsigned long uva, unsigned long size, get_task_struct(tsk); if (!mmget_not_zero(tsk->mm)) { put_task_struct(tsk); - return -EINVAL; + return -ESRCH; } down_write(&tsk->mm->mmap_sem); ret = __sp_walk_page_range(uva, size, tsk, sp_walk_data); @@ -1973,46 +2086,6 @@ void sp_walk_page_free(struct sp_walk_data *sp_walk_data) } EXPORT_SYMBOL_GPL(sp_walk_page_free);
-/** - * Walk the mm_struct of processes in the specified sp_group - * and call CALLBACK once for each mm_struct. - * @spg_id: the ID of the specified sp_group - * @data: the param for callback function - * @func: caller specific callback function - * - * Return -errno if fail. - */ -int sp_group_walk(int spg_id, void *data, int (*func)(struct mm_struct *mm, void *)) -{ - struct sp_group *spg; - int ret = -ESRCH; - - if (!func) { - if (printk_ratelimit()) - pr_err("share pool: null func pointer\n"); - return -EINVAL; - } - - mutex_lock(&sp_mutex); - spg = idr_find(&sp_group_idr, spg_id); - if (spg_valid(spg)) { - struct mm_struct *mm; - struct mm_struct *tmp; - list_for_each_entry_safe(mm, tmp, &spg->procs, sp_node) { - if (func) { - ret = func(mm, data); - if (ret) - goto out_unlock; - } - } - } -out_unlock: - mutex_unlock(&sp_mutex); - - return ret; -} -EXPORT_SYMBOL_GPL(sp_group_walk); - int sp_register_notifier(struct notifier_block *nb) { return blocking_notifier_chain_register(&sp_notifier_chain, nb); @@ -2039,7 +2112,7 @@ bool sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid) struct sp_group *spg;
if (device_id < 0 || device_id >= MAX_DEVID || pid < 0 || size <= 0 || - size > MMAP_SHARE_POOL_16G_SIZE) + size> MMAP_SHARE_POOL_16G_SIZE) return false;
mutex_lock(&sp_mutex); @@ -2061,11 +2134,9 @@ EXPORT_SYMBOL_GPL(sp_config_dvpp_range); /* Check whether the address belongs to the share pool. */ bool is_sharepool_addr(unsigned long addr) { - if (host_svm_sp_enable == false) - return (addr >= MMAP_SHARE_POOL_START) && - addr < (MMAP_SHARE_POOL_16G_START + MMAP_SHARE_POOL_16G_SIZE); - - return addr >= MMAP_SHARE_POOL_START && addr < MMAP_SHARE_POOL_END; + if (host_svm_sp_enable == false) + return addr >= MMAP_SHARE_POOL_START && addr < (MMAP_SHARE_POOL_16G_START + MMAP_SHARE_POOL_16G_SIZE); + return addr >= MMAP_SHARE_POOL_START && addr < MMAP_SHARE_POOL_END; } EXPORT_SYMBOL_GPL(is_sharepool_addr);
@@ -2109,7 +2180,7 @@ static int idr_proc_stat_cb(int id, void *p, void *data)
mutex_lock(&sp_mutex); spg = __sp_find_spg(id, SPG_ID_DEFAULT); - if (spg) { + if (spg_valid(spg)) { seq_printf(seq, "%-12d %-10d %-18ld\n", id, spg->id, byte2kb(stat->amount)); } @@ -2130,8 +2201,7 @@ static int proc_stat_show(struct seq_file *seq, void *offset) return 0; }
-static void rb_spa_stat_show(struct seq_file *seq) -{ +static void rb_spa_stat_show(struct seq_file *seq) { struct rb_node *node; struct sp_area *spa;
@@ -2215,8 +2285,8 @@ static int idr_spg_stat_cb(int id, void *p, void *data) struct sp_group *spg = p; struct seq_file *seq = data;
- seq_printf(seq, "Group %-10d size: %13d KB, spa num: %d.\n", - id, byte2kb(atomic_read(&spg->size)), + seq_printf(seq, "Group %-10d size: %13ld KB, spa num: %d.\n", + id, byte2kb(atomic64_read(&spg->size)), atomic_read(&spg->spa_num));
return 0; @@ -2227,8 +2297,8 @@ static void spg_overview_show(struct seq_file *seq) mutex_lock(&sp_mutex); idr_for_each(&sp_group_idr, idr_spg_stat_cb, seq); mutex_unlock(&sp_mutex); - seq_printf(seq, "Share pool total size: %13d KB, spa total num: %d.\n\n", - byte2kb(atomic_read(&spg_stat.spa_total_size)), + seq_printf(seq, "Share pool total size: %13ld KB, spa total num: %d.\n\n", + byte2kb(atomic64_read(&spg_stat.spa_total_size)), atomic_read(&spg_stat.spa_total_num)); }
@@ -2255,7 +2325,6 @@ void __init proc_sharepool_init(void) proc_create_single_data("sharepool/spa_stat", 0, NULL, spa_stat_show, NULL); }
- struct page *sp_alloc_pages(struct vm_struct *area, gfp_t mask, unsigned int page_order, int node) {
From: Ding Tianhong dingtianhong@huawei.com
ascend inclusion category: feature bugzilla: NA CVE: NA
-------------------------------------------------
The svm need to share some memory to other process in the same group, so use the share pool function to support it.
The svm also export a new features to transfer the va2pa function for special use.
Signed-off-by: Tang Yizhou tangyizhou@huawei.com Signed-off-by: Zefan Li lizefan@huawei.com Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com Signed-off-by: Wu Peng wupeng58@huawei.com Signed-off-by: Ding Tianhong dingtianhong@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/char/svm.c | 547 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 452 insertions(+), 95 deletions(-)
diff --git a/drivers/char/svm.c b/drivers/char/svm.c index 87cf6c14dbc1..d36246910925 100644 --- a/drivers/char/svm.c +++ b/drivers/char/svm.c @@ -32,6 +32,7 @@ #include <linux/sched/mm.h> #include <linux/msi.h> #include <linux/acpi.h> +#include <linux/share_pool.h>
#define SVM_DEVICE_NAME "svm" #define ASID_SHIFT 48 @@ -39,11 +40,11 @@ #define SVM_IOCTL_PROCESS_BIND 0xffff #define SVM_IOCTL_GET_PHYS 0xfff9 #define SVM_IOCTL_SET_RC 0xfffc -#define SVM_IOCTL_GET_L2PTE_BASE 0xfffb #define SVM_IOCTL_LOAD_FLAG 0xfffa #define SVM_IOCTL_PIN_MEMORY 0xfff7 #define SVM_IOCTL_UNPIN_MEMORY 0xfff5 #define SVM_IOCTL_GETHUGEINFO 0xfff6 +#define SVM_IOCTL_GET_PHYMEMINFO 0xfff8 #define SVM_IOCTL_REMAP_PROC 0xfff4
#define SVM_REMAP_MEM_LEN_MAX (16 * 1024 * 1024) @@ -51,6 +52,9 @@ #define SVM_IOCTL_RELEASE_PHYS32 0xfff3 #define MMAP_PHY32_MAX (16 * 1024 * 1024)
+#define SVM_IOCTL_SP_ALLOC 0xfff2 +#define SVM_IOCTL_SP_FREE 0xfff1 +#define SPG_DEFAULT_ID 0 #define CORE_SID 0 static int probe_index; static LIST_HEAD(child_list); @@ -124,6 +128,24 @@ struct meminfo { unsigned long hugetlbtotal; };
+struct phymeminfo { + unsigned long normal_total; + unsigned long normal_free; + unsigned long huge_total; + unsigned long huge_free; +}; + +struct phymeminfo_ioctl { + struct phymeminfo *info; + unsigned long nodemask; +}; + +struct spalloc { + unsigned long addr; + unsigned long size; + unsigned long flag; +}; + static struct bus_type svm_bus_type = { .name = "svm_bus", }; @@ -137,14 +159,14 @@ static char *svm_cmd_to_string(unsigned int cmd) return "get phys"; case SVM_IOCTL_SET_RC: return "set rc"; - case SVM_IOCTL_GET_L2PTE_BASE: - return "get l2pte base"; case SVM_IOCTL_PIN_MEMORY: return "pin memory"; case SVM_IOCTL_UNPIN_MEMORY: return "unpin memory"; case SVM_IOCTL_GETHUGEINFO: return "get hugeinfo"; + case SVM_IOCTL_GET_PHYMEMINFO: + return "get physical memory info"; case SVM_IOCTL_REMAP_PROC: return "remap proc"; case SVM_IOCTL_LOAD_FLAG: @@ -160,6 +182,223 @@ static char *svm_cmd_to_string(unsigned int cmd)
extern void sysrq_sched_debug_tidy(void);
+/* + * image word of slot + * SVM_IMAGE_WORD_INIT: initial value, indicating that the slot is not used. + * SVM_IMAGE_WORD_VALID: valid data is filled in the slot + * SVM_IMAGE_WORD_DONE: the DMA operation is complete when the TS uses this address, + so, this slot can be freed. + */ +#define SVM_IMAGE_WORD_INIT 0x0 +#define SVM_IMAGE_WORD_VALID 0xaa55aa55 +#define SVM_IMAGE_WORD_DONE 0x55ff55ff + +/* + * The length of this structure must be 64 bytes, which is the agreement with the TS. + * And the data type and sequence cannot be changed, because the TS core reads data + * based on the data type and sequence. + * image_word: slot status. For details, see SVM_IMAGE_WORD_xxx + * pid: pid of process which ioctl svm device to get physical addr, it is used for + verification by TS. + * data_type: used to determine the data type by TS. Currently, data type must be + SVM_VA2PA_TYPE_DMA. + * char data[48]: for the data type SVM_VA2PA_TYPE_DMA, the DMA address is stored. + */ +struct svm_va2pa_slot { + int image_word; + int resv; + int pid; + int data_type; + char data[48]; +}; + +struct svm_va2pa_trunk { + struct svm_va2pa_slot *slots; + int slot_total; + int slot_used; + unsigned long *bitmap; + struct mutex mutex; +}; + +struct svm_va2pa_trunk va2pa_trunk; + +#define SVM_VA2PA_TRUNK_SIZE_MAX 0x3200000 +#define SVM_VA2PA_MEMORY_ALIGN 64 +#define SVM_VA2PA_SLOT_SIZE sizeof(struct svm_va2pa_slot) +#define SVM_VA2PA_TYPE_DMA 0x1 +#define SVM_MEM_REG "va2pa trunk" +#define SVM_VA2PA_CLEAN_BATCH_NUM 0x80 + +struct device_node *svm_find_mem_reg_node(struct device *dev, const char *compat) +{ + int index = 0; + struct device_node *tmp = NULL; + struct device_node *np = dev->of_node; + + for (; ; index++) { + tmp = of_parse_phandle(np, "memory-region", index); + if (!tmp) + break; + + if (of_device_is_compatible(tmp, compat)) + return tmp; + + of_node_put(tmp); + } + + return NULL; +} + +static int svm_parse_trunk_memory(struct device *dev, phys_addr_t *base, unsigned long *size) +{ + int err; + struct resource r; + struct device_node *trunk = NULL; + + trunk = svm_find_mem_reg_node(dev, SVM_MEM_REG); + if (!trunk) { + dev_err(dev, "Didn't find reserved memory\n"); + return -EINVAL; + } + + err = of_address_to_resource(trunk, 0, &r); + of_node_put(trunk); + if (err) { + dev_err(dev, "Couldn't address to resource for reserved memory\n"); + return -ENOMEM; + } + + *base = r.start; + *size = resource_size(&r); + + return 0; +} + +static int svm_setup_trunk(struct device *dev, phys_addr_t base, unsigned long size) +{ + int slot_total; + unsigned long *bitmap = NULL; + struct svm_va2pa_slot *slot = NULL; + + if (!IS_ALIGNED(base, SVM_VA2PA_MEMORY_ALIGN)) { + dev_err(dev, "Didn't aligned to %u\n", SVM_VA2PA_MEMORY_ALIGN); + return -EINVAL; + } + + if ((size == 0) || (size > SVM_VA2PA_TRUNK_SIZE_MAX)) { + dev_err(dev, "Size of reserved memory is not right\n"); + return -EINVAL; + } + + slot_total = size / SVM_VA2PA_SLOT_SIZE; + if (slot_total < BITS_PER_LONG) + return -EINVAL; + + bitmap = kvcalloc(slot_total / BITS_PER_LONG, sizeof(unsigned long), GFP_KERNEL); + if (!bitmap) { + dev_err(dev, "alloc memory failed\n"); + return -ENOMEM; + } + + slot = ioremap(base, size); + if (!slot) { + kvfree(bitmap); + dev_err(dev, "Ioremap trunk failed\n"); + return -ENXIO; + } + + va2pa_trunk.slots = slot; + va2pa_trunk.slot_used = 0; + va2pa_trunk.slot_total = slot_total; + va2pa_trunk.bitmap = bitmap; + mutex_init(&va2pa_trunk.mutex); + + return 0; +} + +static void svm_remove_trunk(struct device *dev) +{ + iounmap(va2pa_trunk.slots); + kvfree(va2pa_trunk.bitmap); + + va2pa_trunk.slots = NULL; + va2pa_trunk.bitmap = NULL; +} + +static void svm_set_slot_valid(unsigned long index, unsigned long phys) +{ + struct svm_va2pa_slot *slot = &va2pa_trunk.slots[index]; + + *((unsigned long *)slot->data) = phys; + slot->image_word = SVM_IMAGE_WORD_VALID; + slot->pid = current->pid; + slot->data_type = SVM_VA2PA_TYPE_DMA; + __bitmap_set(va2pa_trunk.bitmap, index, 1); + va2pa_trunk.slot_used++; +} + +static void svm_set_slot_init(unsigned long index) +{ + struct svm_va2pa_slot *slot = &va2pa_trunk.slots[index]; + + slot->image_word = SVM_IMAGE_WORD_INIT; + __bitmap_clear(va2pa_trunk.bitmap, index, 1); + va2pa_trunk.slot_used--; +} + +static void svm_clean_done_slots(void) +{ + int used = va2pa_trunk.slot_used; + int count = 0; + long temp = -1; + phys_addr_t addr; + unsigned long *bitmap = va2pa_trunk.bitmap; + + for (; count < used && count < SVM_VA2PA_CLEAN_BATCH_NUM;) { + temp = find_next_bit(bitmap, va2pa_trunk.slot_total, temp + 1); + if (temp == va2pa_trunk.slot_total) + break; + + count++; + if (va2pa_trunk.slots[temp].image_word != SVM_IMAGE_WORD_DONE) + continue; + + addr = *((phys_addr_t *)(va2pa_trunk.slots[temp].data)); + put_page(pfn_to_page(PHYS_PFN(addr))); + svm_set_slot_init(temp); + } +} + +static int svm_find_slot_init(unsigned long *index) +{ + int temp; + unsigned long *bitmap = va2pa_trunk.bitmap; + + temp = find_first_zero_bit(bitmap, va2pa_trunk.slot_total); + if (temp == va2pa_trunk.slot_total) + return -ENOSPC; + + *index = temp; + return 0; +} + +static int svm_va2pa_trunk_init(struct device *dev) +{ + int err; + phys_addr_t base; + unsigned long size; + + err = svm_parse_trunk_memory(dev, &base, &size); + if (err) + return err; + + err = svm_setup_trunk(dev, base, size); + if (err) + return err; + + return 0; +} + void sysrq_sched_debug_show_export(void) { #ifdef CONFIG_SCHED_DEBUG @@ -1083,56 +1322,91 @@ static pte_t *svm_get_pte(struct vm_area_struct *vma, return pte; }
+/* Must be called with mmap_sem held */ static pte_t *svm_walk_pt(unsigned long addr, unsigned long *page_size, unsigned long *offset) { pgd_t *pgd = NULL; pud_t *pud = NULL; - pte_t *pte = NULL; struct mm_struct *mm = current->mm; struct vm_area_struct *vma = NULL;
- down_read(&mm->mmap_sem); vma = find_vma(mm, addr); if (!vma) - goto err; + return NULL;
pgd = pgd_offset(mm, addr); if (pgd_none_or_clear_bad(pgd)) - goto err; + return NULL;
pud = pud_offset(pgd, addr); if (pud_none_or_clear_bad(pud)) - goto err; - - pte = svm_get_pte(vma, pud, addr, page_size, offset); + return NULL;
-err: - up_read(&mm->mmap_sem); - return pte; + return svm_get_pte(vma, pud, addr, page_size, offset); }
static int svm_get_phys(unsigned long __user *arg) { - pte_t *pte = NULL; + int err; + pte_t *ptep = NULL; + pte_t pte; + unsigned long index = 0; + struct page *page; unsigned long addr, phys, offset; + struct mm_struct *mm = current->mm;
if (!acpi_disabled) return -EPERM;
- if (arg == NULL) - return -EINVAL; - if (get_user(addr, arg)) return -EFAULT;
- pte = svm_walk_pt(addr, NULL, &offset); - if (pte && pte_present(*pte)) { - phys = PFN_PHYS(pte_pfn(*pte)) + offset; - return put_user(phys, arg); + down_read(&mm->mmap_sem); + ptep = svm_walk_pt(addr, NULL, &offset); + if (!ptep) { + up_read(&mm->mmap_sem); + return -EINVAL; }
- return -EINVAL; + pte = READ_ONCE(*ptep); + if (!pte_present(pte) || !(pfn_present(pte_pfn(pte)))) { + up_read(&mm->mmap_sem); + return -EINVAL; + } + + page = pte_page(pte); + get_page(page); + + phys = PFN_PHYS(pte_pfn(pte)) + offset; + up_read(&mm->mmap_sem); + + mutex_lock(&va2pa_trunk.mutex); + svm_clean_done_slots(); + if (va2pa_trunk.slot_used == va2pa_trunk.slot_total) { + err = -ENOSPC; + goto err_mutex_unlock; + } + + err = svm_find_slot_init(&index); + if (err) + goto err_mutex_unlock; + + svm_set_slot_valid(index, phys); + + err = put_user(index * SVM_VA2PA_SLOT_SIZE, (unsigned long __user *)arg); + if (err) + goto err_slot_init; + + mutex_unlock(&va2pa_trunk.mutex); + return 0; + +err_slot_init: + svm_set_slot_init(index); +err_mutex_unlock: + mutex_unlock(&va2pa_trunk.mutex); + put_page(page); + return err; }
int svm_get_pasid(pid_t vpid, int dev_id __maybe_unused) @@ -1188,6 +1462,7 @@ static int svm_set_rc(unsigned long __user *arg) unsigned long addr, size, rc; unsigned long end, page_size, offset; pte_t *pte = NULL; + struct mm_struct *mm = current->mm;
if (acpi_disabled) return -EPERM; @@ -1208,24 +1483,25 @@ static int svm_set_rc(unsigned long __user *arg) if (addr >= end) return -EINVAL;
+ down_read(&mm->mmap_sem); while (addr < end) { pte = svm_walk_pt(addr, &page_size, &offset); - if (!pte) + if (!pte) { + up_read(&mm->mmap_sem); return -ESRCH; + } pte->pte |= (rc & (u64)0x0f) << 59; addr += page_size - offset; } + up_read(&mm->mmap_sem);
return 0; }
-static int svm_get_l2pte_base(struct svm_device *sdev, - unsigned long __user *arg) +static long svm_get_hugeinfo(unsigned long __user *arg) { - int i = 0, err = -EINVAL; - unsigned long *base = NULL; - unsigned long vaddr, size; - struct mm_struct *mm = current->mm; + struct hstate *h = &default_hstate; + struct meminfo info;
if (!acpi_disabled) return -EPERM; @@ -1233,87 +1509,81 @@ static int svm_get_l2pte_base(struct svm_device *sdev, if (arg == NULL) return -EINVAL;
- if (get_user(vaddr, arg)) - return -EFAULT; + if (!hugepages_supported()) + return -ENOTSUPP;
- if (!IS_ALIGNED(vaddr, sdev->l2size)) - return -EINVAL; + info.hugetlbfree = h->free_huge_pages; + info.hugetlbtotal = h->nr_huge_pages;
- if (get_user(size, arg + 1)) + if (copy_to_user((void __user *)arg, &info, sizeof(info))) return -EFAULT;
- if (size != sdev->l2size || size != sdev->l2size) - return -EINVAL; + pr_info("svm get hugetlb info: order(%u), max_huge_pages(%lu)," + "nr_huge_pages(%lu), free_huge_pages(%lu), resv_huge_pages(%lu)", + h->order, + h->max_huge_pages, + h->nr_huge_pages, + h->free_huge_pages, + h->resv_huge_pages);
- size = ALIGN(size, PMD_SIZE) / PMD_SIZE; - base = kmalloc_array(size, sizeof(*base), GFP_KERNEL); - if (base == NULL) - return -ENOMEM; + return 0; +}
- while (size) { - pgd_t *pgd = NULL; - pud_t *pud = NULL; - pmd_t *pmd = NULL; +static void svm_get_node_memory_info_inc(unsigned long nid, struct phymeminfo *info) +{ + struct sysinfo i; + struct hstate *h = &default_hstate; + unsigned long huge_free = 0; + unsigned long huge_total = 0;
- pgd = pgd_offset(mm, vaddr); - if (pgd_none(*pgd) || pgd_bad(*pgd)) - goto err_out; + if (hugepages_supported()) { + huge_free = h->free_huge_pages_node[nid] * (PAGE_SIZE << huge_page_order(h)); + huge_total = h->nr_huge_pages_node[nid] * (PAGE_SIZE << huge_page_order(h)); + }
- pud = pud_offset(pgd, vaddr); - if (pud_none(*pud) || pud_bad(*pud)) - goto err_out; +#ifdef CONFIG_NUMA + si_meminfo_node(&i, nid); +#else + si_meminfo(&i); +#endif + info->normal_free += i.freeram * PAGE_SIZE; + info->normal_total += i.totalram * PAGE_SIZE - huge_total; + info->huge_total += huge_total; + info->huge_free += huge_free; +}
- pmd = pmd_offset(pud, vaddr); - if (pmd_none(*pmd) || pmd_bad(*pmd)) - goto err_out; +static void __svm_get_memory_info(unsigned long nodemask, struct phymeminfo *info) +{ + memset(info, 0x0, sizeof(struct phymeminfo));
- /* - * For small page base address, it should use pte_pfn - * instead of pmd_pfn. - */ - base[i] = PFN_PHYS(pte_pfn(*((pte_t *)pmd))); - vaddr += PMD_SIZE; - size--; - i++; - } + nodemask = nodemask & ((1UL << MAX_NUMNODES) - 1);
- /* lint !e647 */ - err = copy_to_user((void __user *)arg, base, i * sizeof(*base)); - if (err) - err = -EFAULT; -err_out: - kfree(base); - return err; + while (nodemask) { + unsigned long nid = find_first_bit(&nodemask, BITS_PER_LONG); + if (node_isset(nid, node_online_map)) { + (void)svm_get_node_memory_info_inc(nid, info); + } + + nodemask &= ~(1UL << nid); + } }
-static long svm_get_hugeinfo(unsigned long __user *arg) +static long svm_get_phy_memory_info(unsigned long __user *arg) { - struct hstate *h = &default_hstate; - struct meminfo info; - - if (!acpi_disabled) - return -EPERM; + struct phymeminfo info; + struct phymeminfo_ioctl para;
if (arg == NULL) return -EINVAL;
- if (!hugepages_supported()) - return -ENOTSUPP; + if (copy_from_user(¶, (void __user *)arg, sizeof(para))) + return -EFAULT;
- info.hugetlbfree = h->free_huge_pages; - info.hugetlbtotal = h->nr_huge_pages; + __svm_get_memory_info(para.nodemask, &info);
- if (copy_to_user((void __user *)arg, &info, sizeof(info))) + if (copy_to_user((void __user *)para.info, &info, sizeof(info))) return -EFAULT;
- pr_info("svm get hugetlb info: order(%u), max_huge_pages(%lu)," - "nr_huge_pages(%lu), free_huge_pages(%lu), resv_huge_pages(%lu)", - h->order, - h->max_huge_pages, - h->nr_huge_pages, - h->free_huge_pages, - h->resv_huge_pages); - return 0; }
@@ -1601,13 +1871,15 @@ static int svm_release_phys32(unsigned long __user *arg) if (get_user(addr, arg)) return -EFAULT;
+ down_read(&mm->mmap_sem); pte = svm_walk_pt(addr, NULL, &offset); - if (pte && pte_present(*pte)) + if (pte && pte_present(*pte)) { phys = PFN_PHYS(pte_pfn(*pte)) + offset; - else + } else { + up_read(&mm->mmap_sem); return -EINVAL; + }
- down_read(&mm->mmap_sem); vma = find_vma(mm, addr); if (!vma) { up_read(&mm->mmap_sem); @@ -1624,6 +1896,77 @@ static int svm_release_phys32(unsigned long __user *arg) return 0; }
+static unsigned long svm_sp_alloc_mem(unsigned long __user *arg) +{ + struct spalloc spallocinfo; + void *addr; + int ret; + + if (arg == NULL) { + pr_err("arg is invalid value.\n"); + return EFAULT; + } + + ret = copy_from_user(&spallocinfo, (void __user *)arg, sizeof(spallocinfo)); + if (ret) { + pr_err("failed to copy args from user space.\n"); + return EFAULT; + } + + addr = sp_alloc(spallocinfo.size, spallocinfo.flag, SPG_DEFAULT_ID); + if (IS_ERR_VALUE(addr)) { + pr_err("svm: sp alloc failed with %ld\n", PTR_ERR(addr)); + return EFAULT; + } + + pr_notice("svm: [sp alloc] caller %s(%d/%d); return addr 0x%pK, size %lu\n", + current->comm, current->tgid, current->pid, addr, spallocinfo.size); + sp_dump_stack(); + + spallocinfo.addr = (uintptr_t)addr; + if (copy_to_user((void __user *)arg, &spallocinfo, sizeof(struct spalloc))) { + sp_free(spallocinfo.addr); + return EFAULT; + } + + return 0; +} + +static int svm_sp_free_mem(unsigned long __user *arg) +{ + int ret; + struct spalloc spallocinfo; + + if (arg == NULL) { + pr_err("arg ivalue.\n"); + return -EFAULT; + } + + ret = copy_from_user(&spallocinfo, (void __user *)arg, sizeof(spallocinfo)); + if (ret) { + pr_err("failed to copy args from user space.\n"); + return -EFAULT; + } + + ret = is_sharepool_addr(spallocinfo.addr); + if (ret == FALSE){ + pr_err("svm: sp free failed because the addr is not from sp.\n"); + return -EINVAL; + } + + ret = sp_free(spallocinfo.addr); + if (ret != 0) { + pr_err("svm: sp free failed with %d.\n", ret); + return -EFAULT; + } + + pr_notice("svm: [sp free] caller %s(%d/%d); addr 0x%pK\n", + current->comm, current->tgid, current->pid, (void *)spallocinfo.addr); + sp_dump_stack(); + + return 0; +} + /*svm ioctl will include some case for HI1980 and HI1910*/ static long svm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) @@ -1675,9 +2018,6 @@ static long svm_ioctl(struct file *file, unsigned int cmd, case SVM_IOCTL_SET_RC: err = svm_set_rc((unsigned long __user *)arg); break; - case SVM_IOCTL_GET_L2PTE_BASE: - err = svm_get_l2pte_base(sdev, (unsigned long __user *)arg); - break; case SVM_IOCTL_PIN_MEMORY: err = svm_pin_memory((unsigned long __user *)arg); break; @@ -1687,6 +2027,9 @@ static long svm_ioctl(struct file *file, unsigned int cmd, case SVM_IOCTL_GETHUGEINFO: err = svm_get_hugeinfo((unsigned long __user *)arg); break; + case SVM_IOCTL_GET_PHYMEMINFO: + err = svm_get_phy_memory_info((unsigned long __user *)arg); + break; case SVM_IOCTL_REMAP_PROC: err = svm_remap_proc((unsigned long __user *)arg); break; @@ -1696,6 +2039,12 @@ static long svm_ioctl(struct file *file, unsigned int cmd, case SVM_IOCTL_RELEASE_PHYS32: err = svm_release_phys32((unsigned long __user *)arg); break; + case SVM_IOCTL_SP_ALLOC: + err = svm_sp_alloc_mem((unsigned long __user *)arg); + break; + case SVM_IOCTL_SP_FREE: + err = svm_sp_free_mem((unsigned long __user *)arg); + break; default: err = -EINVAL; } @@ -1806,10 +2155,15 @@ static int svm_device_probe(struct platform_device *pdev) if (err) dev_warn(dev, "Cannot get l2buff\n");
+ if (svm_va2pa_trunk_init(dev)) { + dev_err(dev, "failed to init va2pa trunk\n"); + goto err_unregister_misc; + } + err = svm_dt_init_core(sdev, np); if (err) { dev_err(dev, "failed to init dt cores\n"); - goto err_unregister_misc; + goto err_remove_trunk; }
probe_index++; @@ -1819,6 +2173,9 @@ static int svm_device_probe(struct platform_device *pdev)
return err;
+err_remove_trunk: + svm_remove_trunk(dev); + err_unregister_misc: misc_deregister(&sdev->miscdev);