Offering: HULK hulk inclusion category: feature bugzilla: NA ---------------------------------------- support register remote range Signed-off-by: Yin Tirui <yintirui@huawei.com> --- include/linux/share_pool.h | 18 ++ mm/share_pool.c | 443 ++++++++++++++++++++++++++++++++++--- 2 files changed, 435 insertions(+), 26 deletions(-) diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index ea581516838c..b230f108e89d 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -160,6 +160,12 @@ extern bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, in extern bool mg_is_sharepool_addr(unsigned long addr); +extern int mg_sp_register_remote_range(int spg_id, unsigned long va, + unsigned long pa, unsigned long size); + +extern int mg_sp_register_remote_range_multi(int spg_id, unsigned long va, + unsigned long *pa_array, unsigned long len, unsigned long page_size); + extern int mg_sp_id_of_current(void); extern void __sp_mm_clean(struct mm_struct *mm); @@ -283,6 +289,18 @@ static inline bool mg_is_sharepool_addr(unsigned long addr) return false; } +static inline int mg_sp_register_remote_range(int spg_id, unsigned long va, + unsigned long pa, unsigned long size) +{ + return -EPERM; +} + +static inline int mg_sp_register_remote_range_multi(int spg_id, unsigned long va, + unsigned long *pa_array, unsigned long len, unsigned long page_size) +{ + return -EPERM; +} + static inline bool sp_is_enabled(void) { return false; diff --git a/mm/share_pool.c b/mm/share_pool.c index 8965625cfe63..bffd8f5731b8 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -74,7 +74,12 @@ static int system_group_count; /* idr of all sp_groups */ static DEFINE_IDR(sp_group_idr); -/* rw semaphore for sp_group_idr and mm->sp_group_master */ +/* idr of all sp_global_groups */ +static DEFINE_IDR(sp_remote_group_idr); +/* + * rw semaphore for sp_group_idr and sp_remote_group_idr + * and mm->sp_group_master + */ static DECLARE_RWSEM(sp_global_sem); /*** Statistical and maintenance tools ***/ @@ -204,6 +209,30 @@ struct sp_group_master { char comm[TASK_COMM_LEN]; }; +struct sp_remote_group { + int id; + atomic_t use_count; + atomic_t spra_num; + struct list_head spra_list; + struct rw_semaphore rw_lock; +}; + +struct sp_remote_area { + unsigned long va_start; + unsigned long va_end; + unsigned long real_size; + unsigned long page_size; + struct list_head list; + struct sp_remote_group *sprg; + struct sp_area *spa; + bool multi; + atomic_t use_count; + union { + unsigned long pa; + unsigned long *pa_array; + }; +}; + /* * each instance represents an sp group the process belongs to * sp_group_master : sp_group_node = 1 : N @@ -649,6 +678,7 @@ static struct sp_overall_stat sp_overall_stat; enum spa_type { SPA_TYPE_ALLOC = 1, + SPA_TYPE_REMOTE, SPA_TYPE_K2TASK, SPA_TYPE_K2SPG, }; @@ -689,7 +719,10 @@ struct sp_area { struct rb_node rb_node; /* address sorted rbtree */ struct rb_node spg_link; /* link to the spg->rb_root */ struct sp_group *spg; - struct sp_mapping *spm; /* where spa born from */ + union { + struct sp_mapping *spm; /* where spa born from */ + struct sp_remote_area *spra; /* for remote range */ + }; enum spa_type type; union { unsigned long kva; /* shared kva */ @@ -753,6 +786,8 @@ static void spa_inc_usage(struct sp_area *spa) atomic64_add(size, &spa_stat.k2u_spg_size); meminfo_inc_k2u(size, &spa->spg->meminfo); break; + case SPA_TYPE_REMOTE: + break; default: WARN(1, "invalid spa type"); } @@ -792,6 +827,8 @@ static void spa_dec_usage(struct sp_area *spa) atomic64_sub(size, &spa_stat.k2u_spg_size); meminfo_dec_k2u(size, &spa->spg->meminfo); break; + case SPA_TYPE_REMOTE: + break; default: WARN(1, "invalid spa type"); } @@ -818,6 +855,8 @@ static void update_mem_usage(unsigned long size, bool inc, bool is_hugepage, case SPA_TYPE_K2SPG: update_mem_usage_k2u(size, inc, spg_node); break; + case SPA_TYPE_REMOTE: + break; default: WARN(1, "invalid stat type\n"); } @@ -1056,6 +1095,12 @@ static void sp_group_init(struct sp_group *spg, int spg_id) meminfo_init(&spg->meminfo); } +static struct sp_remote_group *sp_remote_group_get_from_idr_locked(int spg_id); +static void sp_group_insert_area(struct sp_group *spg, struct sp_area *spa); +static void sp_area_put_locked(struct sp_area *spa); +static struct sp_area *sp_area_alloc_by_spra(struct sp_group *spg, + struct sp_remote_area *spra); + /* * sp_group_create - create a new sp_group * @spg_id: specify the id for the new sp_group @@ -1077,6 +1122,9 @@ static struct sp_group *sp_group_create(int spg_id) struct sp_group *spg; char name[DNAME_INLINE_LEN]; int hsize_log = MAP_HUGE_2MB >> MAP_HUGE_SHIFT; + struct sp_remote_group *sprg; + struct sp_remote_area *spra; + struct sp_area *spa; if (unlikely(system_group_count + 1 == MAX_GROUP_FOR_SYSTEM && spg_id != SPG_ID_LOCAL)) { @@ -1131,8 +1179,35 @@ static struct sp_group *sp_group_create(int spg_id) if (!is_local_group(spg_id)) system_group_count++; + sprg = sp_remote_group_get_from_idr_locked(spg_id); + if (sprg) { + down_write(&sprg->rw_lock); + list_for_each_entry(spra, &sprg->spra_list, list) { + spa = sp_area_alloc_by_spra(spg, spra); + if (IS_ERR(spa)) { + up_write(&sprg->rw_lock); + goto out_spa_put; + } + spra->spa = spa; + sp_group_insert_area(spg, spa); + } + up_write(&sprg->rw_lock); + + } + return spg; +out_spa_put: + if (sprg) { + down_write(&sprg->rw_lock); + list_for_each_entry(spra, &sprg->spra_list, list) { + spa = spra->spa; + if (spa) + sp_area_put_locked(spa); + } + up_write(&sprg->rw_lock); + } + out_fput: fput(spg->file); out_idr_remove: @@ -1305,7 +1380,6 @@ static int mm_add_group_init(pid_t tgid, struct mm_struct **pmm) return ret; } -static void sp_area_put_locked(struct sp_area *spa); static void sp_munmap(struct mm_struct *mm, unsigned long addr, unsigned long size); /** * mg_sp_group_add_task() - Add a process to an share group (sp_group). @@ -1821,33 +1895,43 @@ static struct sp_area *sp_area_get(struct sp_group *spg, */ static void sp_area_free(struct sp_area *spa) { - struct sp_mapping *spm = spa->spm; - - spin_lock(&spm->sp_mapping_lock); - if (spm->free_area_cache) { - struct sp_area *cache; + struct sp_remote_area *spra; + struct sp_mapping *spm; - cache = rb_entry(spm->free_area_cache, struct sp_area, rb_node); - if (spa->va_start <= cache->va_start) { - spm->free_area_cache = rb_prev(&spa->rb_node); - /* - * the new cache node may be changed to another region, - * i.e. from DVPP region to normal region - */ - if (spm->free_area_cache) { - cache = rb_entry(spm->free_area_cache, - struct sp_area, rb_node); - spm->cached_vstart = cache->region_vstart; + if (spa->type == SPA_TYPE_REMOTE) { + spra = spa->spra; + if (likely(spra)) + spra->spa = NULL; + else + pr_err_ratelimited("%s, spra is NULL\n", __func__); // WARN ? + } else { + spm = spa->spm; + spin_lock(&spm->sp_mapping_lock); + if (spm->free_area_cache) { + struct sp_area *cache; + + cache = rb_entry(spm->free_area_cache, struct sp_area, rb_node); + if (spa->va_start <= cache->va_start) { + spm->free_area_cache = rb_prev(&spa->rb_node); + /* + * the new cache node may be changed to another region, + * i.e. from DVPP region to normal region + */ + if (spm->free_area_cache) { + cache = rb_entry(spm->free_area_cache, + struct sp_area, rb_node); + spm->cached_vstart = cache->region_vstart; + } + /* + * We don't try to update cached_hole_size, + * but it won't go very wrong. + */ } - /* - * We don't try to update cached_hole_size, - * but it won't go very wrong. - */ } - } - rb_erase(&spa->rb_node, &spm->area_root); - spin_unlock(&spm->sp_mapping_lock); + rb_erase(&spa->rb_node, &spm->area_root); + spin_unlock(&spm->sp_mapping_lock); + } RB_CLEAR_NODE(&spa->rb_node); sp_area_free_pages(spa); kfree(spa); @@ -1973,6 +2057,17 @@ static struct sp_group *sp_group_get_from_idr(int spg_id) return spg; } +static struct sp_group *sp_group_get_from_idr_locked(int spg_id) +{ + struct sp_group *spg; + + spg = idr_find(&sp_group_idr, spg_id); + if (!spg || !atomic_inc_not_zero(&spg->use_count)) + spg = NULL; + + return spg; +} + static int sp_free_inner(unsigned long addr, int spg_id, bool is_sp_free) { int ret = 0; @@ -2075,6 +2170,36 @@ static int sp_vma_populate_pages(struct vm_area_struct *vma, struct sp_area *spa return 0; } +static int sp_vma_populate_remote_pages(struct vm_area_struct *vma, struct sp_remote_area *spra, + unsigned long uaddr, unsigned long size) +{ + /* Currently, PMD_SIZE is the only page size for remote page mapping */ + unsigned long page_size = PMD_SIZE; + unsigned long pfn; + int i = 0; + int ret; + + if (spra->multi) { + + do { + pfn = PHYS_PFN(spra->pa_array[i]); + ret = remap_pfn_range_try_pmd(vma, uaddr, pfn, page_size, vma->vm_page_prot); + if (ret) + return ret; + + uaddr += page_size; + size -= page_size; + i++; + } while (size > 0); + + } else { + pfn = PHYS_PFN(spra->pa); + ret = remap_pfn_range_try_pmd(vma, uaddr, pfn, size, vma->vm_page_prot); + } + + return 0; +} + static inline void sp_update_prot(struct vm_area_struct *vma, unsigned long prot) { if (prot & PROT_WRITE) @@ -2325,6 +2450,8 @@ static int sp_map_spa_to_mm(struct mm_struct *mm, struct sp_area *spa, int ret = 0; unsigned long mmap_addr; unsigned long populate = 0; + struct vm_area_struct *vma; + struct sp_remote_area *spra; mmap_write_lock(mm); if (unlikely(!mmget_not_zero(mm))) { @@ -2368,6 +2495,24 @@ static int sp_map_spa_to_mm(struct mm_struct *mm, struct sp_area *spa, } mmap_write_unlock(mm); break; + case SPA_TYPE_REMOTE: + spra = spa->spra; + if (!spra) { + mmap_write_unlock(mm); + ret = -EFAULT; + pr_err_ratelimited("remote map failed, spra is NULL.\n"); + break; + } + vma = find_vma(mm, mmap_addr); + sp_update_prot(vma, prot); + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + ret = sp_vma_populate_remote_pages(vma, spra, mmap_addr, spa_size(spa)); + if (ret) { + do_munmap(mm, mmap_addr, spa_size(spa), NULL); + pr_err_ratelimited("remote map failed, ret=%d\n", ret); + } + mmap_write_unlock(mm); + break; default: break; } @@ -3337,6 +3482,252 @@ bool mg_is_sharepool_addr(unsigned long addr) } EXPORT_SYMBOL_GPL(mg_is_sharepool_addr); +static void sp_remote_group_insert_area(struct sp_remote_group *sprg, + struct sp_remote_area *spra) +{ + list_add_tail(&spra->list, &sprg->spra_list); + atomic_inc(&sprg->spra_num); + if (atomic_read(&sprg->spra_num) == 1) + atomic_inc(&sprg->use_count); +} + +static struct sp_remote_group *sp_remote_group_create(int spg_id) +{ + struct sp_remote_group *sprg; + int ret; + + sprg = kzalloc(sizeof(*sprg), GFP_KERNEL); + if (!sprg) + return ERR_PTR(-ENOMEM); + sprg->id = spg_id; + atomic_set(&sprg->use_count, 1); + atomic_set(&sprg->spra_num, 0); + INIT_LIST_HEAD(&sprg->spra_list); + init_rwsem(&sprg->rw_lock); + + ret = idr_alloc(&sp_remote_group_idr, sprg, spg_id, spg_id + 1, GFP_KERNEL); + if (ret < 0) { + pr_err("remote group %d idr alloc failed %d\n", spg_id, ret); + kfree(sprg); + return ERR_PTR(ret); + } + + return sprg; +} + +/* the caller must hold sp_global_sem */ +static struct sp_remote_group *sp_remote_group_get_or_alloc(int spg_id) +{ + struct sp_remote_group *sprg; + + sprg = idr_find(&sp_remote_group_idr, spg_id); + if (!sprg || !atomic_inc_not_zero(&sprg->use_count)) + sprg = sp_remote_group_create(spg_id); + + return sprg; +} + +static struct sp_remote_group *sp_remote_group_get_from_idr_locked(int spg_id) +{ + struct sp_remote_group *sprg; + + sprg = idr_find(&sp_remote_group_idr, spg_id); + if (!sprg || !atomic_inc_not_zero(&sprg->use_count)) + sprg = NULL; + + return sprg; +} + +/* the caller must hold spg->rw_lock */ +static struct sp_area *sp_area_alloc_by_spra(struct sp_group *spg, + struct sp_remote_area *spra) +{ + struct sp_area *spa; + + spa = kzalloc(sizeof(struct sp_area), GFP_KERNEL); + if (unlikely(!spa)) + return ERR_PTR(-ENOMEM); + + spa->va_start = spra->va_start; + spa->va_end = spra->va_end; + spa->real_size = spra->real_size; + spa->is_hugepage = true; + spa->type = SPA_TYPE_REMOTE; + spa->spg = spg; + spa->spra = spra; + atomic_set(&spa->use_count, 1); + return spa; +} + +static struct sp_remote_area *sp_remote_area_alloc(struct sp_remote_group *sprg, + unsigned long va, unsigned long *pa_array, unsigned long pa, + unsigned long len, unsigned long page_size, bool multi) +{ + struct sp_remote_area *spra; + unsigned long nr; + + // TODO: check spra overlap + spra = kzalloc(sizeof(struct sp_remote_area), GFP_KERNEL); + if (!spra) + return ERR_PTR(-ENOMEM); + + spra->sprg = sprg; + spra->va_start = va; + spra->va_end = va + len; + spra->real_size = len; + spra->page_size = page_size; + INIT_LIST_HEAD(&spra->list); + atomic_set(&spra->use_count, 1); + if (multi) { + spra->multi = true; + nr = len / page_size; + spra->pa_array = kvmalloc_array(nr, sizeof(unsigned long), GFP_KERNEL); + if (unlikely(!spra->pa_array)) { + kfree(spra); + return ERR_PTR(-ENOMEM); + } + memcpy(spra->pa_array, pa_array, nr * sizeof(unsigned long)); + } else { + spra->multi = false; + spra->pa = pa; + } + + return spra; +} + +static int sp_remote_mmap_populate(struct sp_group *spg, + struct sp_remote_group *sprg, struct sp_remote_area *spra) +{ + struct sp_group_node *spg_node; + struct mm_struct *mm; + struct sp_area *spa; + int mmap_ret = 0; + int ret = 0; + + spa = sp_area_alloc_by_spra(spg, spra); + if (IS_ERR(spa)) + return PTR_ERR(spa); + + spra->spa = spa; + sp_group_insert_area(spg, spa); + /* create mapping for each process in the group */ + list_for_each_entry(spg_node, &spg->proc_head, proc_node) { + mm = spg_node->master->mm; + kthread_use_mm(mm); + mmap_ret = sp_map_spa_to_mm(mm, spa, spg_node->prot, NULL, + "sp_remote_alloc"); + kthread_unuse_mm(mm); + if (mmap_ret) { + if (mmap_ret != SP_SKIP_ERR) + goto unmap; + + continue; + } + ret = mmap_ret; + } + + return ret; + +unmap: + __sp_free(spa, mm); + sp_area_put_locked(spa); + + return mmap_ret; +} + +static void free_sp_remote_group_locked(struct sp_remote_group *sprg) +{ + idr_remove(&sp_remote_group_idr, sprg->id); + kfree(sprg); +} + +static void sp_remote_group_put_locked(struct sp_remote_group *sprg) +{ + lockdep_assert_held_write(&sp_global_sem); + + if (atomic_dec_and_test(&sprg->use_count)) + free_sp_remote_group_locked(sprg); +} + +static int __register_remote(int spg_id, unsigned long va, + unsigned long *pa_array, unsigned long pa, unsigned long len, + unsigned long page_size, bool multi) +{ + struct sp_remote_group *sprg; + struct sp_remote_area *spra; + struct sp_group *spg; + int ret = 0; + + if (!sp_is_enabled()) + return -EOPNOTSUPP; + + if (unlikely(page_size != PMD_SIZE)) { + pr_err_ratelimited("register remote failed, invalid page_size 0x%lx\n", page_size); + return -EINVAL; + } + + if (!len || !IS_ALIGNED(va, page_size) || !IS_ALIGNED(pa, page_size) + || !IS_ALIGNED(len, page_size)) + return -EINVAL; + + down_write(&sp_global_sem); + sprg = sp_remote_group_get_or_alloc(spg_id); + if (IS_ERR(sprg)) { + pr_err_ratelimited("register remote failed, get sprg failed, ret=%ld\n", + PTR_ERR(sprg)); + ret = PTR_ERR(sprg); + goto out_unlock; + } + + spra = sp_remote_area_alloc(sprg, va, pa_array, pa, len, page_size, multi); + if (IS_ERR(spra)) { + pr_err_ratelimited("register remote failed, alloc spra failed, ret=%ld\n", + PTR_ERR(spra)); + ret = PTR_ERR(spra); + goto drop_sprg; + } + down_write(&sprg->rw_lock); + sp_remote_group_insert_area(sprg, spra); + up_write(&sprg->rw_lock); + + spg = sp_group_get_from_idr_locked(spg_id); + if (spg) { + down_write(&spg->rw_lock); + ret = sp_remote_mmap_populate(spg, sprg, spra); + up_write(&spg->rw_lock); + if (ret) { + pr_err_ratelimited("register remote failed, remote map failed, ret=%d\n", + ret); + goto drop_spg; + } + } + + up_write(&sp_global_sem); + return 0; + +drop_spg: + sp_group_put(spg); +drop_sprg: + sp_remote_group_put_locked(sprg); +out_unlock: + up_write(&sp_global_sem); + return ret; +} + +int mg_sp_register_remote_range(int spg_id, unsigned long va, + unsigned long pa, unsigned long len) +{ + return __register_remote(spg_id, va, NULL, pa, len, PMD_SIZE, false); +} +EXPORT_SYMBOL_GPL(mg_sp_register_remote_range); + +int mg_sp_register_remote_range_multi(int spg_id, unsigned long va, + unsigned long *pa_array, unsigned long len, unsigned long page_size) +{ + return __register_remote(spg_id, va, pa_array, 0, len, page_size, true); +} +EXPORT_SYMBOL_GPL(mg_sp_register_remote_range_multi); + /*** Statistical and maintenance functions ***/ static void get_mm_rss_info(struct mm_struct *mm, unsigned long *anon, -- 2.43.0