[PATCH OLK-6.6 0/11] Share Pool Remote Support
Offering: HULK hulk inclusion category: feature bugzilla: NA -------------------------------- Introduce sp_register_hugepage_allocator() to let drivers replace the default sharepool hugepage allocation functions. Signed-off-by: Yin Tirui <yintirui@huawei.com> --- include/linux/share_pool.h | 9 +++++++ mm/share_pool.c | 51 +++++++++++++++++++++++++++++++++++--- 2 files changed, 57 insertions(+), 3 deletions(-) diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index a3caedb99c5c..147e99620b9d 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -88,6 +88,8 @@ struct sp_walk_data { #define MMAP_SHARE_POOL_DYNAMIC_DVPP_END (MMAP_SHARE_POOL_DYNAMIC_DVPP_BASE + \ MMAP_SHARE_POOL_16G_SIZE * 64) +typedef unsigned long (*sp_alloc_hugepage_fn)(unsigned long size, int nid, nodemask_t *nodemask); + #ifdef CONFIG_SHARE_POOL /* @@ -161,6 +163,8 @@ static inline bool sp_check_mmap_addr(unsigned long addr, unsigned long flags) return sp_is_enabled() && mg_is_sharepool_addr(addr) && !(flags & MAP_SHARE_POOL); } +int sp_register_hugepage_allocator(sp_alloc_hugepage_fn alloc_func); + #else /* CONFIG_SHARE_POOL */ static inline int mg_sp_group_add_task(int tgid, unsigned long prot, int spg_id) @@ -260,6 +264,11 @@ static inline bool sp_check_mmap_addr(unsigned long addr, unsigned long flags) { return false; } + +static inline int sp_register_hugepage_allocator(sp_alloc_hugepage_fn alloc_func) +{ + return -EPERM; +} #endif /* !CONFIG_SHARE_POOL */ #endif /* LINUX_SHARE_POOL_H */ diff --git a/mm/share_pool.c b/mm/share_pool.c index d380fa74bb95..cec673f18160 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -88,6 +88,8 @@ static DEFINE_MUTEX(master_list_lock); static LIST_HEAD(spm_dvpp_list); /* mutex to protect insert/delete ops from master_list */ static DEFINE_MUTEX(spm_list_lock); +/* mutex to protect hugepage allocator registration */ +static DEFINE_MUTEX(sp_hugepage_reg_mutex); #define SEQ_printf(m, x...) \ do { \ @@ -1513,6 +1515,36 @@ static bool sp_group_delete_area(struct sp_group *spg, struct sp_area *spa) return atomic_dec_and_test(&spa->spg->spa_num); } +unsigned long sp_alloc_hugepage(unsigned long size, int nid, nodemask_t *nodemask) +{ + struct folio *folio = alloc_hugetlb_folio_nodemask_size(size, nid, nodemask); + + if (unlikely(!folio)) + return -ENOMEM; + + return folio_pfn(folio); +} + +static sp_alloc_hugepage_fn sp_alloc_hugepage_func = sp_alloc_hugepage; +static bool allow_hugepage_allocator_register = true; + +int sp_register_hugepage_allocator(sp_alloc_hugepage_fn alloc_func) +{ + if (unlikely(!alloc_func)) + return -EINVAL; + + mutex_lock(&sp_hugepage_reg_mutex); + if (!allow_hugepage_allocator_register) { + mutex_unlock(&sp_hugepage_reg_mutex); + return -EBUSY; + } + sp_alloc_hugepage_func = alloc_func; + allow_hugepage_allocator_register = false; + mutex_unlock(&sp_hugepage_reg_mutex); + return 0; +} +EXPORT_SYMBOL_GPL(sp_register_hugepage_allocator); + static bool spa_remap_huge __ro_after_init = true; static bool spa_remap_normal __ro_after_init; @@ -1544,13 +1576,16 @@ static bool sp_area_use_remap(struct sp_area *spa) static struct page *sp_area_alloc_hugepages(int nid, nodemask_t *nodemask) { struct page *page; + unsigned long pfn; - page = (struct page *)alloc_hugetlb_folio_nodemask_size(PMD_SIZE, nid, nodemask); - if (!page) + pfn = sp_alloc_hugepage_func(PMD_SIZE, nid, nodemask); + if (IS_ERR_VALUE(pfn)) { page = (struct page *)alloc_temporary_hugetlb_folio_nodemask(nid, nodemask, GFP_HIGHUSER_MOVABLE | __GFP_ACCOUNT | __GFP_COMP); + return page; + } - return page; + return pfn_to_page(pfn); } static bool sp_area_prepare_pages(struct sp_area *spa, int nid, nodemask_t *nodemask) @@ -2447,6 +2482,16 @@ static void *__mg_sp_alloc_nodemask(unsigned long size, unsigned long sp_flags, struct sp_alloc_context ac; struct sp_group_node *spg_node; + /** + * To avoid atomic risks, once share pool alloc memory, + * registering hugepage allocation hook is no longer supported. + */ + if (unlikely(allow_hugepage_allocator_register)) { + mutex_lock(&sp_hugepage_reg_mutex); + allow_hugepage_allocator_register = false; + mutex_unlock(&sp_hugepage_reg_mutex); + } + ret = sp_alloc_prepare(size, sp_flags, spg_id, &ac); if (ret) return ERR_PTR(ret); -- 2.43.0
From: Lin Ruifeng <linruifeng4@huawei.com> Offering: HULK hulk inclusion category: feature bugzilla: N/A -------------------------------- Overprinting may cause following softlockup, we use pr_warn_ratelimited and pr_err_ratelimited to limited the speed of print. watchdog: BUG: soft lockup - CPU#55 stuck for 23s! [test_u2k_add_an:50936] rcu: 55-....: (1447 ticks this GP) idle=cb2/1/0x4000000000000002 softirq=94729/94729 fqs=2509 Modules linked in: rcu: (detected by 12, t=5555 jiffies, g=123873, q=11191) sharepool_dev(O) Sending NMI from CPU 12 to CPUs 55: NMI backtrace for cpu 55 CPU: 55 PID: 50936 Comm: test_u2k_add_an Tainted: G W O 5.10.0+ #9 Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 Call trace: console_unlock+0x2d0/0x65c vprintk_emit+0x208/0x2fc vprintk_default+0x40/0x50 vprintk_func+0xf4/0x1a0 printk+0x64/0x8c sp_munmap+0xec/0x110 sp_free_inner+0x1b0/0x320 mg_sp_free+0x38/0x60 dev_ioctl+0x540/0x1b14 [sharepool_dev] __arm64_sys_ioctl+0xb0/0xfc el0_svc_common.constprop.0+0x88/0x250 do_el0_svc+0x2c/0x90 el0_svc+0x20/0x30 el0_sync_handler+0xb0/0xb4 el0_sync+0x180/0x1c0 Sample time: 1644475978700 ns(HZ: 250) Sample stat: curr: user: 69196000000, nice: 0, sys: 109948000000, idle: 1438412458000, iowait: 446000, irq: 0, softirq: 5740000000, st: 0 deta: user: 69196000000, nice: 0, sys: 109948000000, idle: 1438412458000, iowait: 446000, irq: 0, softirq: 5740000000, st: 0 Sample softirq: TIMER: 806 TASKLET: 7 SCHED: 9345 RCU: 94729 Kernel panic - not syncing: softlockup: hung tasks Signed-off-by: Lin Ruifeng <linruifeng4@huawei.com> Signed-off-by: Yin Tirui <yintirui@huawei.com> --- mm/share_pool.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/share_pool.c b/mm/share_pool.c index cec673f18160..bef8d470246d 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -1212,7 +1212,7 @@ static int sp_group_link_task(struct mm_struct *mm, struct sp_group *spg, } if (is_process_in_group(spg, mm)) { - pr_err("task already in target group(%d)\n", spg->id); + pr_warn_ratelimited("task already in target group(%d)\n", spg->id); return -EEXIST; } @@ -1924,7 +1924,7 @@ static void sp_munmap(struct mm_struct *mm, unsigned long addr, mmap_write_lock(mm); if (unlikely(!mmget_not_zero(mm))) { mmap_write_unlock(mm); - pr_warn("munmap: target mm is exiting\n"); + pr_warn_ratelimited("munmap: target mm is exiting\n"); return; } @@ -2340,7 +2340,7 @@ static int sp_map_spa_to_mm(struct mm_struct *mm, struct sp_area *spa, mmap_write_lock(mm); if (unlikely(!mmget_not_zero(mm))) { mmap_write_unlock(mm); - pr_warn("sp_map: target mm is exiting\n"); + pr_warn_ratelimited("sp_map: target mm is exiting\n"); return SP_SKIP_ERR; } -- 2.43.0
From: Yuan Can <yuancan@huawei.com> Offering: HULK hulk inclusion category: bugfix bugzilla: NA -------------------------------- sharepool memory does not support split. Fixes: fb31808921b8 ("mm/sharepool: Add base framework for share_pool") Signed-off-by: Yuan Can <yuancan@huawei.com> Signed-off-by: Yin Tirui <yintirui@huawei.com> --- mm/mmap.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/mmap.c b/mm/mmap.c index c76abe5f6ba3..e9175b67cc94 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2551,6 +2551,9 @@ int __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma, WARN_ON(vma->vm_start >= addr); WARN_ON(vma->vm_end <= addr); + if (sp_check_vm_share_pool(vma->vm_flags)) + return -EINVAL; + if (vma->vm_ops && vma->vm_ops->may_split) { err = vma->vm_ops->may_split(vma, addr); if (err) -- 2.43.0
Offering: HULK hulk inclusion category: feature bugzilla: NA ---------------------------------------- Introduce sp_update_prot(). Signed-off-by: Yin Tirui <yintirui@huawei.com> --- mm/share_pool.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/mm/share_pool.c b/mm/share_pool.c index bef8d470246d..793111b3325c 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -2086,6 +2086,16 @@ static int sp_vma_populate_pages(struct vm_area_struct *vma, struct sp_area *spa return 0; } +static inline void sp_update_prot(struct vm_area_struct *vma, unsigned long prot) +{ + if (prot & PROT_WRITE) + /* clean PTE_RDONLY flags or trigger SMMU event */ + vma->vm_page_prot = __pgprot(((~PTE_RDONLY) & vma->vm_page_prot.pgprot) | + PTE_DIRTY); + else + vm_flags_clear(vma, VM_MAYWRITE); +} + /* wrapper of __do_mmap() and the caller must hold mmap_write_lock(mm). */ static unsigned long sp_mmap(struct mm_struct *mm, struct file *file, struct sp_area *spa, unsigned long *populate, @@ -2115,12 +2125,7 @@ static unsigned long sp_mmap(struct mm_struct *mm, struct file *file, vma = find_vma(mm, addr); vma->spa = spa; - if (prot & PROT_WRITE) - /* clean PTE_RDONLY flags or trigger SMMU event */ - vma->vm_page_prot = __pgprot(((~PTE_RDONLY) & vma->vm_page_prot.pgprot) | - PTE_DIRTY); - else - vm_flags_clear(vma, VM_MAYWRITE); + sp_update_prot(vma, prot); if (sp_area_use_remap(spa)) { ret = sp_vma_populate_pages(vma, spa, addr, size); @@ -2297,12 +2302,7 @@ static int sp_nc_mmap(struct mm_struct *mm, struct sp_area *spa, unsigned long p vma = find_vma(mm, addr); vma->spa = spa; - if (prot & PROT_WRITE) - vma->vm_page_prot = __pgprot(((~PTE_RDONLY) & - vma->vm_page_prot.pgprot) | PTE_DIRTY); - else - vm_flags_clear(vma, VM_MAYWRITE); - + sp_update_prot(vma, prot); vma->vm_page_prot = sp_pgprot_writethrough(vma->vm_page_prot); if (sp_area_use_remap(spa)) { ret = sp_vma_populate_pages(vma, spa, addr, size); -- 2.43.0
From: Wang Wensheng <wangwensheng4@huawei.com> Offering: HULK hulk inclusion category: feature bugzilla: NA ---------------------------------------- The address range of sharepool could affect TSAN in mdc scene, so we cannot change it. Introduce a new config to isolate the features that modify the address range of sharepool. Signed-off-by: Wang Wensheng <wangwensheng4@huawei.com> --- include/linux/share_pool.h | 47 ++++++++++++++++++++++++++++++++++++-- mm/Kconfig | 9 ++++++++ 2 files changed, 54 insertions(+), 2 deletions(-) diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index 147e99620b9d..ea581516838c 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -72,12 +72,23 @@ struct sp_walk_data { #define MMAP_SHARE_POOL_DVPP_SIZE 0x80000000000UL /* 16G size */ #define MMAP_SHARE_POOL_16G_SIZE 0x400000000UL + +#ifdef CONFIG_SHARE_POOL_ADDRESS_SPACE_MDC_SCENE + +/* skip 8T for stack */ +#define MMAP_SHARE_POOL_SKIP 0x80000000000UL + +#else + /* skip 16T for stack */ #define MMAP_SHARE_POOL_SKIP 0x100000000000UL + +#endif + #define MMAP_SHARE_POOL_END (TASK_SIZE - MMAP_SHARE_POOL_SKIP) -#define MMAP_SHARE_POLL_DVPP_END (MMAP_SHARE_POOL_END) +#define MMAP_SHARE_POOL_DVPP_END (MMAP_SHARE_POOL_END) /* MMAP_SHARE_POOL_DVPP_START should be align to 16G */ -#define MMAP_SHARE_POOL_DVPP_START (MMAP_SHARE_POLL_DVPP_END - MMAP_SHARE_POOL_DVPP_SIZE) +#define MMAP_SHARE_POOL_DVPP_START (MMAP_SHARE_POOL_DVPP_END - MMAP_SHARE_POOL_DVPP_SIZE) #define MMAP_SHARE_POOL_RO_END (MMAP_SHARE_POOL_DVPP_START) #define MMAP_SHARE_POOL_RO_START (MMAP_SHARE_POOL_RO_END - MMAP_SHARE_POOL_RO_SIZE) #define MMAP_SHARE_POOL_NORMAL_END (MMAP_SHARE_POOL_RO_START) @@ -88,6 +99,38 @@ struct sp_walk_data { #define MMAP_SHARE_POOL_DYNAMIC_DVPP_END (MMAP_SHARE_POOL_DYNAMIC_DVPP_BASE + \ MMAP_SHARE_POOL_16G_SIZE * 64) +/* + * Address space for mdc scene + * |-------------------------------| <------- 0x1000000000000 + * | Stack | 8T + * |-------------------------------| <------- 0xf80000000000 + * | DVPP normal area | 8T + * |-------------------------------| <------- 0xf00000000000 + * | force readonly area | 64G + * |-------------------------------| <------- + * | Normal sharepool area | 8T - 64G + * |-------------------------------| <------- 0xe80000000000 + * | mmap area | + * |-------------------------------| + * + * Address space for dc scene + * |-------------------------------| <------- 0x1000000000000 + * | Stack | 8T + * |-------------------------------| <------- 0xf80000000000 + * | non-cache area | 8T + * |-------------------------------| <------- 0xf00000000000 + * | DVPP normal area | 8T + * |-------------------------------| <------- 0xe80000000000 + * | force readonly area | 64G + * |-------------------------------| <------- + * | Normal sharepool area | 8T - 64G + * |-------------------------------| <------- 0xe00000000000 + * | mmap area | + * |-------------------------------| + * | dynamic dvpp area | + * |-------------------------------| + */ + typedef unsigned long (*sp_alloc_hugepage_fn)(unsigned long size, int nid, nodemask_t *nodemask); #ifdef CONFIG_SHARE_POOL diff --git a/mm/Kconfig b/mm/Kconfig index 342710408a2b..2d4a83f4b5ad 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -1446,6 +1446,15 @@ config GUP_LONGTERM_ALLOW_MOVEABLE_MEMORY memory will not be offlined in the ascend scenario, we allow to alloc moveable mmeory when FOLL_LONGTERM is set. +config SHARE_POOL_ADDRESS_SPACE_MDC_SCENE + bool "Apply the address space of sharepool to MDC scene" + default n + depends on SHARE_POOL + help + Because the address space for sharepool could affect TSAN in mdc scene, + we cannot change it. Use this config to isolate some features that + change the sharepool address range and are not used in mdc scene. + config ASCEND_OOM bool "Enable support for disable oom killer" depends on ASCEND_FEATURES -- 2.43.0
From: Wang Wensheng <wangwensheng4@huawei.com> Offering: HULK hulk inclusion category: feature bugzilla: NA ---------------------------------------- Only sp_mapping of type SP_MAPPING_DVPP need some extra operations because this type of mapping is specified to a group. All the other mappings are global and are created when system starts and will not destroy. Now the general mapping operations do nothing meaningful for normal global mapping. So don't operate on normal mapping and rename those functions with prefix dvpp_mapping_ to simplify those operations. No logic change. Signed-off-by: Wang Wensheng <wangwensheng4@huawei.com> --- mm/share_pool.c | 128 ++++++++++++++++++++---------------------------- 1 file changed, 54 insertions(+), 74 deletions(-) diff --git a/mm/share_pool.c b/mm/share_pool.c index 793111b3325c..1e417a117838 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -121,7 +121,6 @@ enum sp_mapping_type { */ struct sp_mapping { unsigned long type; - atomic_t user; unsigned long start[MAX_DEVID]; unsigned long end[MAX_DEVID]; struct rb_root area_root; @@ -129,11 +128,13 @@ struct sp_mapping { struct rb_node *free_area_cache; unsigned long cached_hole_size; unsigned long cached_vstart; + spinlock_t sp_mapping_lock; - /* list head for all groups attached to this mapping, dvpp mapping only */ - struct list_head group_head; + /* the following three element used for non-global dvpp mapping only */ + atomic_t user; + /* list head for all groups attached to this mapping */ + struct list_head dvpp_group_head; struct list_head spm_node; - spinlock_t sp_mapping_lock; }; /* Processes in the same sp_group can share memory. @@ -175,8 +176,8 @@ struct sp_group { /* protect the group internal elements */ struct rw_semaphore rw_lock; /* list node for dvpp mapping */ - struct list_head mnode; - struct sp_mapping *mapping[SP_MAPPING_END]; + struct list_head dvpp_mnode; + struct sp_mapping *dvpp_mapping; }; /* a per-process(per mm) struct which manages a sp_group_node list */ @@ -303,22 +304,6 @@ static void sp_mapping_set_type(struct sp_mapping *spm, unsigned long type) static struct sp_mapping *sp_mapping_normal; static struct sp_mapping *sp_mapping_ro; -static void sp_mapping_add_to_list(struct sp_mapping *spm) -{ - mutex_lock(&spm_list_lock); - if (sp_mapping_type(spm) == SP_MAPPING_DVPP) - list_add_tail(&spm->spm_node, &spm_dvpp_list); - mutex_unlock(&spm_list_lock); -} - -static void sp_mapping_remove_from_list(struct sp_mapping *spm) -{ - mutex_lock(&spm_list_lock); - if (sp_mapping_type(spm) == SP_MAPPING_DVPP) - list_del(&spm->spm_node); - mutex_unlock(&spm_list_lock); -} - static void sp_mapping_range_init(struct sp_mapping *spm) { int i; @@ -354,62 +339,57 @@ static struct sp_mapping *sp_mapping_create(unsigned long type) sp_mapping_set_type(spm, type); sp_mapping_range_init(spm); - atomic_set(&spm->user, 0); spm->area_root = RB_ROOT; - INIT_LIST_HEAD(&spm->group_head); spin_lock_init(&spm->sp_mapping_lock); - sp_mapping_add_to_list(spm); return spm; } -static void sp_mapping_destroy(struct sp_mapping *spm) +static void dvpp_mapping_destroy(struct sp_mapping *spm) { - sp_mapping_remove_from_list(spm); + mutex_lock(&spm_list_lock); + list_del(&spm->spm_node); + mutex_unlock(&spm_list_lock); + kfree(spm); } -static void sp_mapping_attach(struct sp_group *spg, struct sp_mapping *spm) +static void dvpp_mapping_attach(struct sp_group *spg, struct sp_mapping *spm) { - unsigned long type = sp_mapping_type(spm); - atomic_inc(&spm->user); - spg->mapping[type] = spm; - if (type == SP_MAPPING_DVPP) - list_add_tail(&spg->mnode, &spm->group_head); + spg->dvpp_mapping = spm; + list_add_tail(&spg->dvpp_mnode, &spm->dvpp_group_head); } -static void sp_mapping_detach(struct sp_group *spg, struct sp_mapping *spm) +static void dvpp_mapping_detach(struct sp_group *spg) { - unsigned long type; + struct sp_mapping *spm = spg->dvpp_mapping; if (!spm) return; - type = sp_mapping_type(spm); - if (type == SP_MAPPING_DVPP) - list_del(&spg->mnode); + list_del(&spg->dvpp_mnode); if (atomic_dec_and_test(&spm->user)) - sp_mapping_destroy(spm); + dvpp_mapping_destroy(spm); - spg->mapping[type] = NULL; + spg->dvpp_mapping = NULL; } /* merge old mapping to new, and the old mapping would be destroyed */ -static void sp_mapping_merge(struct sp_mapping *new, struct sp_mapping *old) +static void dvpp_mapping_merge(struct sp_mapping *new, struct sp_mapping *old) { struct sp_group *spg, *tmp; if (new == old) return; - list_for_each_entry_safe(spg, tmp, &old->group_head, mnode) { - list_move_tail(&spg->mnode, &new->group_head); - spg->mapping[SP_MAPPING_DVPP] = new; + list_for_each_entry_safe(spg, tmp, &old->dvpp_group_head, dvpp_mnode) { + list_move_tail(&spg->dvpp_mnode, &new->dvpp_group_head); + spg->dvpp_mapping = new; } atomic_add(atomic_read(&old->user), &new->user); - sp_mapping_destroy(old); + dvpp_mapping_destroy(old); } static bool is_mapping_empty(struct sp_mapping *spm) @@ -437,12 +417,12 @@ static bool can_mappings_merge(struct sp_mapping *m1, struct sp_mapping *m2) * the caller must hold sp_global_sem * NOTE: undo the mergeing when the later process failed. */ -static int sp_group_setup_mapping_normal(struct mm_struct *mm, struct sp_group *spg) +static int sp_group_setup_dvpp_mapping_normal(struct mm_struct *mm, struct sp_group *spg) { struct sp_mapping *local_dvpp_mapping, *spg_dvpp_mapping; - local_dvpp_mapping = mm->sp_group_master->local->mapping[SP_MAPPING_DVPP]; - spg_dvpp_mapping = spg->mapping[SP_MAPPING_DVPP]; + local_dvpp_mapping = mm->sp_group_master->local->dvpp_mapping; + spg_dvpp_mapping = spg->dvpp_mapping; if (!list_empty(&spg->proc_head)) { /* @@ -454,12 +434,12 @@ static int sp_group_setup_mapping_normal(struct mm_struct *mm, struct sp_group * bool is_conflict = !can_mappings_merge(local_dvpp_mapping, spg_dvpp_mapping); if (is_mapping_empty(local_dvpp_mapping)) { - sp_mapping_merge(spg_dvpp_mapping, local_dvpp_mapping); + dvpp_mapping_merge(spg_dvpp_mapping, local_dvpp_mapping); if (is_conflict) pr_warn_ratelimited("task address space conflict, spg_id=%d\n", spg->id); } else if (is_mapping_empty(spg_dvpp_mapping)) { - sp_mapping_merge(local_dvpp_mapping, spg_dvpp_mapping); + dvpp_mapping_merge(local_dvpp_mapping, spg_dvpp_mapping); if (is_conflict) pr_warn_ratelimited("group address space conflict, spg_id=%d\n", spg->id); @@ -469,17 +449,23 @@ static int sp_group_setup_mapping_normal(struct mm_struct *mm, struct sp_group * } } else { /* the mapping of local group is always set */ - sp_mapping_attach(spg, local_dvpp_mapping); - if (!spg->mapping[SP_MAPPING_NORMAL]) - sp_mapping_attach(spg, sp_mapping_normal); - if (!spg->mapping[SP_MAPPING_RO]) - sp_mapping_attach(spg, sp_mapping_ro); + dvpp_mapping_attach(spg, local_dvpp_mapping); } return 0; } -static int sp_group_setup_mapping_local(struct mm_struct *mm, struct sp_group *local) +static void dvpp_mapping_init(struct sp_mapping *spm) +{ + atomic_set(&spm->user, 0); + INIT_LIST_HEAD(&spm->dvpp_group_head); + + mutex_lock(&spm_list_lock); + list_add_tail(&spm->spm_node, &spm_dvpp_list); + mutex_unlock(&spm_list_lock); +} + +static int sp_group_setup_dvpp_mapping_local(struct mm_struct *mm, struct sp_group *local) { struct sp_mapping *spm; @@ -487,9 +473,8 @@ static int sp_group_setup_mapping_local(struct mm_struct *mm, struct sp_group *l if (!spm) return -ENOMEM; - sp_mapping_attach(local, spm); - sp_mapping_attach(local, sp_mapping_normal); - sp_mapping_attach(local, sp_mapping_ro); + dvpp_mapping_init(spm); + dvpp_mapping_attach(local, spm); return 0; } @@ -499,12 +484,12 @@ static inline bool is_local_group(int spg_id) return spg_id >= SPG_ID_LOCAL_MIN && spg_id <= SPG_ID_LOCAL_MAX; } -static int sp_group_setup_mapping(struct mm_struct *mm, struct sp_group *spg) +static int sp_group_setup_dvpp_mapping(struct mm_struct *mm, struct sp_group *spg) { if (is_local_group(spg->id)) - return sp_group_setup_mapping_local(mm, spg); + return sp_group_setup_dvpp_mapping_local(mm, spg); else - return sp_group_setup_mapping_normal(mm, spg); + return sp_group_setup_dvpp_mapping_normal(mm, spg); } static struct sp_group *sp_group_create(int spg_id); @@ -865,14 +850,11 @@ struct sp_k2u_context { static void free_sp_group_locked(struct sp_group *spg) { - int type; - fput(spg->file); fput(spg->file_hugetlb); idr_remove(&sp_group_idr, spg->id); - for (type = SP_MAPPING_START; type < SP_MAPPING_END; type++) - sp_mapping_detach(spg, spg->mapping[type]); + dvpp_mapping_detach(spg); if (!is_local_group(spg->id)) system_group_count--; @@ -1065,7 +1047,7 @@ static void sp_group_init(struct sp_group *spg, int spg_id) atomic_set(&spg->use_count, 1); atomic_set(&spg->spa_num, 0); INIT_LIST_HEAD(&spg->proc_head); - INIT_LIST_HEAD(&spg->mnode); + INIT_LIST_HEAD(&spg->dvpp_mnode); init_rwsem(&spg->rw_lock); meminfo_init(&spg->meminfo); } @@ -1225,7 +1207,7 @@ static int sp_group_link_task(struct mm_struct *mm, struct sp_group *spg, if (!node) return -ENOMEM; - ret = sp_group_setup_mapping(mm, spg); + ret = sp_group_setup_dvpp_mapping(mm, spg); if (ret) goto out_kfree; @@ -1670,11 +1652,11 @@ static struct sp_area *sp_area_alloc(unsigned long size, unsigned long flags, pr_err("invalid sp_flags [%lx]\n", flags); return ERR_PTR(-EINVAL); } - mapping = spg->mapping[SP_MAPPING_RO]; + mapping = sp_mapping_ro; } else if (flags & SP_DVPP) { - mapping = spg->mapping[SP_MAPPING_DVPP]; + mapping = spg->dvpp_mapping; } else { - mapping = spg->mapping[SP_MAPPING_NORMAL]; + mapping = sp_mapping_normal; } if (!mapping) { @@ -3276,7 +3258,7 @@ bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, int tgid) if (IS_ERR(spg)) goto put_mm; - spm = spg->mapping[SP_MAPPING_DVPP]; + spm = spg->dvpp_mapping; default_start = MMAP_SHARE_POOL_DVPP_START + device_id * MMAP_SHARE_POOL_16G_SIZE; /* The dvpp range of each group can be configured only once */ if (spm->start[device_id] != default_start) @@ -3713,12 +3695,10 @@ static int __init share_pool_init(void) sp_mapping_normal = sp_mapping_create(SP_MAPPING_NORMAL); if (!sp_mapping_normal) goto fail; - atomic_inc(&sp_mapping_normal->user); sp_mapping_ro = sp_mapping_create(SP_MAPPING_RO); if (!sp_mapping_ro) goto free_normal; - atomic_inc(&sp_mapping_ro->user); proc_sharepool_init(); -- 2.43.0
From: Wang Wensheng <wangwensheng4@huawei.com> Offering: HULK hulk inclusion category: feature bugzilla: NA ---------------------------------------- If dvpp_mapping_global is true, we use a global sp_mapping for all dvpp mappings. This feature is disabled on default and is used in mdc scene. Signed-off-by: Wang Wensheng <wangwensheng4@huawei.com> --- mm/share_pool.c | 43 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 37 insertions(+), 6 deletions(-) diff --git a/mm/share_pool.c b/mm/share_pool.c index 1e417a117838..8965625cfe63 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -91,6 +91,9 @@ static DEFINE_MUTEX(spm_list_lock); /* mutex to protect hugepage allocator registration */ static DEFINE_MUTEX(sp_hugepage_reg_mutex); +/* Dvpp mapping global */ +static int __read_mostly dvpp_mapping_global; + #define SEQ_printf(m, x...) \ do { \ if (m) \ @@ -303,6 +306,7 @@ static void sp_mapping_set_type(struct sp_mapping *spm, unsigned long type) static struct sp_mapping *sp_mapping_normal; static struct sp_mapping *sp_mapping_ro; +static struct sp_mapping *sp_mapping_dvpp; static void sp_mapping_range_init(struct sp_mapping *spm) { @@ -1654,7 +1658,10 @@ static struct sp_area *sp_area_alloc(unsigned long size, unsigned long flags, } mapping = sp_mapping_ro; } else if (flags & SP_DVPP) { - mapping = spg->dvpp_mapping; + if (dvpp_mapping_global) + mapping = sp_mapping_dvpp; + else + mapping = spg->dvpp_mapping; } else { mapping = sp_mapping_normal; } @@ -3258,7 +3265,10 @@ bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, int tgid) if (IS_ERR(spg)) goto put_mm; - spm = spg->dvpp_mapping; + if (dvpp_mapping_global) + spm = sp_mapping_dvpp; + else + spm = spg->dvpp_mapping; default_start = MMAP_SHARE_POOL_DVPP_START + device_id * MMAP_SHARE_POOL_16G_SIZE; /* The dvpp range of each group can be configured only once */ if (spm->start[device_id] != default_start) @@ -3435,10 +3445,14 @@ static void spa_dvpp_stat_show(struct seq_file *seq) { struct sp_mapping *spm; - mutex_lock(&spm_list_lock); - list_for_each_entry(spm, &spm_dvpp_list, spm_node) - spa_stat_of_mapping_show(seq, spm); - mutex_unlock(&spm_list_lock); + if (dvpp_mapping_global) { + spa_stat_of_mapping_show(seq, sp_mapping_dvpp); + } else { + mutex_lock(&spm_list_lock); + list_for_each_entry(spm, &spm_dvpp_list, spm_node) + spa_stat_of_mapping_show(seq, spm); + mutex_unlock(&spm_list_lock); + } } @@ -3678,6 +3692,13 @@ void __sp_mm_clean(struct mm_struct *mm) DEFINE_STATIC_KEY_FALSE(share_pool_enabled_key); +static int __init enable_dvpp_mapping_global(char *s) +{ + dvpp_mapping_global = 1; + return 1; +} +__setup("dvpp_mapping_global", enable_dvpp_mapping_global); + static int __init enable_share_pool(char *s) { static_branch_enable(&share_pool_enabled_key); @@ -3700,10 +3721,20 @@ static int __init share_pool_init(void) if (!sp_mapping_ro) goto free_normal; + if (dvpp_mapping_global) { + sp_mapping_dvpp = sp_mapping_create(SP_MAPPING_DVPP); + if (!sp_mapping_dvpp) { + sp_mapping_dvpp = NULL; + goto free_ro; + } + } + proc_sharepool_init(); return 0; +free_ro: + kfree(sp_mapping_ro); free_normal: kfree(sp_mapping_normal); fail: -- 2.43.0
Offering: HULK hulk inclusion category: feature bugzilla: NA ---------------------------------------- support register remote range Signed-off-by: Yin Tirui <yintirui@huawei.com> --- include/linux/share_pool.h | 18 ++ mm/share_pool.c | 443 ++++++++++++++++++++++++++++++++++--- 2 files changed, 435 insertions(+), 26 deletions(-) diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index ea581516838c..b230f108e89d 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -160,6 +160,12 @@ extern bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, in extern bool mg_is_sharepool_addr(unsigned long addr); +extern int mg_sp_register_remote_range(int spg_id, unsigned long va, + unsigned long pa, unsigned long size); + +extern int mg_sp_register_remote_range_multi(int spg_id, unsigned long va, + unsigned long *pa_array, unsigned long len, unsigned long page_size); + extern int mg_sp_id_of_current(void); extern void __sp_mm_clean(struct mm_struct *mm); @@ -283,6 +289,18 @@ static inline bool mg_is_sharepool_addr(unsigned long addr) return false; } +static inline int mg_sp_register_remote_range(int spg_id, unsigned long va, + unsigned long pa, unsigned long size) +{ + return -EPERM; +} + +static inline int mg_sp_register_remote_range_multi(int spg_id, unsigned long va, + unsigned long *pa_array, unsigned long len, unsigned long page_size) +{ + return -EPERM; +} + static inline bool sp_is_enabled(void) { return false; diff --git a/mm/share_pool.c b/mm/share_pool.c index 8965625cfe63..bffd8f5731b8 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -74,7 +74,12 @@ static int system_group_count; /* idr of all sp_groups */ static DEFINE_IDR(sp_group_idr); -/* rw semaphore for sp_group_idr and mm->sp_group_master */ +/* idr of all sp_global_groups */ +static DEFINE_IDR(sp_remote_group_idr); +/* + * rw semaphore for sp_group_idr and sp_remote_group_idr + * and mm->sp_group_master + */ static DECLARE_RWSEM(sp_global_sem); /*** Statistical and maintenance tools ***/ @@ -204,6 +209,30 @@ struct sp_group_master { char comm[TASK_COMM_LEN]; }; +struct sp_remote_group { + int id; + atomic_t use_count; + atomic_t spra_num; + struct list_head spra_list; + struct rw_semaphore rw_lock; +}; + +struct sp_remote_area { + unsigned long va_start; + unsigned long va_end; + unsigned long real_size; + unsigned long page_size; + struct list_head list; + struct sp_remote_group *sprg; + struct sp_area *spa; + bool multi; + atomic_t use_count; + union { + unsigned long pa; + unsigned long *pa_array; + }; +}; + /* * each instance represents an sp group the process belongs to * sp_group_master : sp_group_node = 1 : N @@ -649,6 +678,7 @@ static struct sp_overall_stat sp_overall_stat; enum spa_type { SPA_TYPE_ALLOC = 1, + SPA_TYPE_REMOTE, SPA_TYPE_K2TASK, SPA_TYPE_K2SPG, }; @@ -689,7 +719,10 @@ struct sp_area { struct rb_node rb_node; /* address sorted rbtree */ struct rb_node spg_link; /* link to the spg->rb_root */ struct sp_group *spg; - struct sp_mapping *spm; /* where spa born from */ + union { + struct sp_mapping *spm; /* where spa born from */ + struct sp_remote_area *spra; /* for remote range */ + }; enum spa_type type; union { unsigned long kva; /* shared kva */ @@ -753,6 +786,8 @@ static void spa_inc_usage(struct sp_area *spa) atomic64_add(size, &spa_stat.k2u_spg_size); meminfo_inc_k2u(size, &spa->spg->meminfo); break; + case SPA_TYPE_REMOTE: + break; default: WARN(1, "invalid spa type"); } @@ -792,6 +827,8 @@ static void spa_dec_usage(struct sp_area *spa) atomic64_sub(size, &spa_stat.k2u_spg_size); meminfo_dec_k2u(size, &spa->spg->meminfo); break; + case SPA_TYPE_REMOTE: + break; default: WARN(1, "invalid spa type"); } @@ -818,6 +855,8 @@ static void update_mem_usage(unsigned long size, bool inc, bool is_hugepage, case SPA_TYPE_K2SPG: update_mem_usage_k2u(size, inc, spg_node); break; + case SPA_TYPE_REMOTE: + break; default: WARN(1, "invalid stat type\n"); } @@ -1056,6 +1095,12 @@ static void sp_group_init(struct sp_group *spg, int spg_id) meminfo_init(&spg->meminfo); } +static struct sp_remote_group *sp_remote_group_get_from_idr_locked(int spg_id); +static void sp_group_insert_area(struct sp_group *spg, struct sp_area *spa); +static void sp_area_put_locked(struct sp_area *spa); +static struct sp_area *sp_area_alloc_by_spra(struct sp_group *spg, + struct sp_remote_area *spra); + /* * sp_group_create - create a new sp_group * @spg_id: specify the id for the new sp_group @@ -1077,6 +1122,9 @@ static struct sp_group *sp_group_create(int spg_id) struct sp_group *spg; char name[DNAME_INLINE_LEN]; int hsize_log = MAP_HUGE_2MB >> MAP_HUGE_SHIFT; + struct sp_remote_group *sprg; + struct sp_remote_area *spra; + struct sp_area *spa; if (unlikely(system_group_count + 1 == MAX_GROUP_FOR_SYSTEM && spg_id != SPG_ID_LOCAL)) { @@ -1131,8 +1179,35 @@ static struct sp_group *sp_group_create(int spg_id) if (!is_local_group(spg_id)) system_group_count++; + sprg = sp_remote_group_get_from_idr_locked(spg_id); + if (sprg) { + down_write(&sprg->rw_lock); + list_for_each_entry(spra, &sprg->spra_list, list) { + spa = sp_area_alloc_by_spra(spg, spra); + if (IS_ERR(spa)) { + up_write(&sprg->rw_lock); + goto out_spa_put; + } + spra->spa = spa; + sp_group_insert_area(spg, spa); + } + up_write(&sprg->rw_lock); + + } + return spg; +out_spa_put: + if (sprg) { + down_write(&sprg->rw_lock); + list_for_each_entry(spra, &sprg->spra_list, list) { + spa = spra->spa; + if (spa) + sp_area_put_locked(spa); + } + up_write(&sprg->rw_lock); + } + out_fput: fput(spg->file); out_idr_remove: @@ -1305,7 +1380,6 @@ static int mm_add_group_init(pid_t tgid, struct mm_struct **pmm) return ret; } -static void sp_area_put_locked(struct sp_area *spa); static void sp_munmap(struct mm_struct *mm, unsigned long addr, unsigned long size); /** * mg_sp_group_add_task() - Add a process to an share group (sp_group). @@ -1821,33 +1895,43 @@ static struct sp_area *sp_area_get(struct sp_group *spg, */ static void sp_area_free(struct sp_area *spa) { - struct sp_mapping *spm = spa->spm; - - spin_lock(&spm->sp_mapping_lock); - if (spm->free_area_cache) { - struct sp_area *cache; + struct sp_remote_area *spra; + struct sp_mapping *spm; - cache = rb_entry(spm->free_area_cache, struct sp_area, rb_node); - if (spa->va_start <= cache->va_start) { - spm->free_area_cache = rb_prev(&spa->rb_node); - /* - * the new cache node may be changed to another region, - * i.e. from DVPP region to normal region - */ - if (spm->free_area_cache) { - cache = rb_entry(spm->free_area_cache, - struct sp_area, rb_node); - spm->cached_vstart = cache->region_vstart; + if (spa->type == SPA_TYPE_REMOTE) { + spra = spa->spra; + if (likely(spra)) + spra->spa = NULL; + else + pr_err_ratelimited("%s, spra is NULL\n", __func__); // WARN ? + } else { + spm = spa->spm; + spin_lock(&spm->sp_mapping_lock); + if (spm->free_area_cache) { + struct sp_area *cache; + + cache = rb_entry(spm->free_area_cache, struct sp_area, rb_node); + if (spa->va_start <= cache->va_start) { + spm->free_area_cache = rb_prev(&spa->rb_node); + /* + * the new cache node may be changed to another region, + * i.e. from DVPP region to normal region + */ + if (spm->free_area_cache) { + cache = rb_entry(spm->free_area_cache, + struct sp_area, rb_node); + spm->cached_vstart = cache->region_vstart; + } + /* + * We don't try to update cached_hole_size, + * but it won't go very wrong. + */ } - /* - * We don't try to update cached_hole_size, - * but it won't go very wrong. - */ } - } - rb_erase(&spa->rb_node, &spm->area_root); - spin_unlock(&spm->sp_mapping_lock); + rb_erase(&spa->rb_node, &spm->area_root); + spin_unlock(&spm->sp_mapping_lock); + } RB_CLEAR_NODE(&spa->rb_node); sp_area_free_pages(spa); kfree(spa); @@ -1973,6 +2057,17 @@ static struct sp_group *sp_group_get_from_idr(int spg_id) return spg; } +static struct sp_group *sp_group_get_from_idr_locked(int spg_id) +{ + struct sp_group *spg; + + spg = idr_find(&sp_group_idr, spg_id); + if (!spg || !atomic_inc_not_zero(&spg->use_count)) + spg = NULL; + + return spg; +} + static int sp_free_inner(unsigned long addr, int spg_id, bool is_sp_free) { int ret = 0; @@ -2075,6 +2170,36 @@ static int sp_vma_populate_pages(struct vm_area_struct *vma, struct sp_area *spa return 0; } +static int sp_vma_populate_remote_pages(struct vm_area_struct *vma, struct sp_remote_area *spra, + unsigned long uaddr, unsigned long size) +{ + /* Currently, PMD_SIZE is the only page size for remote page mapping */ + unsigned long page_size = PMD_SIZE; + unsigned long pfn; + int i = 0; + int ret; + + if (spra->multi) { + + do { + pfn = PHYS_PFN(spra->pa_array[i]); + ret = remap_pfn_range_try_pmd(vma, uaddr, pfn, page_size, vma->vm_page_prot); + if (ret) + return ret; + + uaddr += page_size; + size -= page_size; + i++; + } while (size > 0); + + } else { + pfn = PHYS_PFN(spra->pa); + ret = remap_pfn_range_try_pmd(vma, uaddr, pfn, size, vma->vm_page_prot); + } + + return 0; +} + static inline void sp_update_prot(struct vm_area_struct *vma, unsigned long prot) { if (prot & PROT_WRITE) @@ -2325,6 +2450,8 @@ static int sp_map_spa_to_mm(struct mm_struct *mm, struct sp_area *spa, int ret = 0; unsigned long mmap_addr; unsigned long populate = 0; + struct vm_area_struct *vma; + struct sp_remote_area *spra; mmap_write_lock(mm); if (unlikely(!mmget_not_zero(mm))) { @@ -2368,6 +2495,24 @@ static int sp_map_spa_to_mm(struct mm_struct *mm, struct sp_area *spa, } mmap_write_unlock(mm); break; + case SPA_TYPE_REMOTE: + spra = spa->spra; + if (!spra) { + mmap_write_unlock(mm); + ret = -EFAULT; + pr_err_ratelimited("remote map failed, spra is NULL.\n"); + break; + } + vma = find_vma(mm, mmap_addr); + sp_update_prot(vma, prot); + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + ret = sp_vma_populate_remote_pages(vma, spra, mmap_addr, spa_size(spa)); + if (ret) { + do_munmap(mm, mmap_addr, spa_size(spa), NULL); + pr_err_ratelimited("remote map failed, ret=%d\n", ret); + } + mmap_write_unlock(mm); + break; default: break; } @@ -3337,6 +3482,252 @@ bool mg_is_sharepool_addr(unsigned long addr) } EXPORT_SYMBOL_GPL(mg_is_sharepool_addr); +static void sp_remote_group_insert_area(struct sp_remote_group *sprg, + struct sp_remote_area *spra) +{ + list_add_tail(&spra->list, &sprg->spra_list); + atomic_inc(&sprg->spra_num); + if (atomic_read(&sprg->spra_num) == 1) + atomic_inc(&sprg->use_count); +} + +static struct sp_remote_group *sp_remote_group_create(int spg_id) +{ + struct sp_remote_group *sprg; + int ret; + + sprg = kzalloc(sizeof(*sprg), GFP_KERNEL); + if (!sprg) + return ERR_PTR(-ENOMEM); + sprg->id = spg_id; + atomic_set(&sprg->use_count, 1); + atomic_set(&sprg->spra_num, 0); + INIT_LIST_HEAD(&sprg->spra_list); + init_rwsem(&sprg->rw_lock); + + ret = idr_alloc(&sp_remote_group_idr, sprg, spg_id, spg_id + 1, GFP_KERNEL); + if (ret < 0) { + pr_err("remote group %d idr alloc failed %d\n", spg_id, ret); + kfree(sprg); + return ERR_PTR(ret); + } + + return sprg; +} + +/* the caller must hold sp_global_sem */ +static struct sp_remote_group *sp_remote_group_get_or_alloc(int spg_id) +{ + struct sp_remote_group *sprg; + + sprg = idr_find(&sp_remote_group_idr, spg_id); + if (!sprg || !atomic_inc_not_zero(&sprg->use_count)) + sprg = sp_remote_group_create(spg_id); + + return sprg; +} + +static struct sp_remote_group *sp_remote_group_get_from_idr_locked(int spg_id) +{ + struct sp_remote_group *sprg; + + sprg = idr_find(&sp_remote_group_idr, spg_id); + if (!sprg || !atomic_inc_not_zero(&sprg->use_count)) + sprg = NULL; + + return sprg; +} + +/* the caller must hold spg->rw_lock */ +static struct sp_area *sp_area_alloc_by_spra(struct sp_group *spg, + struct sp_remote_area *spra) +{ + struct sp_area *spa; + + spa = kzalloc(sizeof(struct sp_area), GFP_KERNEL); + if (unlikely(!spa)) + return ERR_PTR(-ENOMEM); + + spa->va_start = spra->va_start; + spa->va_end = spra->va_end; + spa->real_size = spra->real_size; + spa->is_hugepage = true; + spa->type = SPA_TYPE_REMOTE; + spa->spg = spg; + spa->spra = spra; + atomic_set(&spa->use_count, 1); + return spa; +} + +static struct sp_remote_area *sp_remote_area_alloc(struct sp_remote_group *sprg, + unsigned long va, unsigned long *pa_array, unsigned long pa, + unsigned long len, unsigned long page_size, bool multi) +{ + struct sp_remote_area *spra; + unsigned long nr; + + // TODO: check spra overlap + spra = kzalloc(sizeof(struct sp_remote_area), GFP_KERNEL); + if (!spra) + return ERR_PTR(-ENOMEM); + + spra->sprg = sprg; + spra->va_start = va; + spra->va_end = va + len; + spra->real_size = len; + spra->page_size = page_size; + INIT_LIST_HEAD(&spra->list); + atomic_set(&spra->use_count, 1); + if (multi) { + spra->multi = true; + nr = len / page_size; + spra->pa_array = kvmalloc_array(nr, sizeof(unsigned long), GFP_KERNEL); + if (unlikely(!spra->pa_array)) { + kfree(spra); + return ERR_PTR(-ENOMEM); + } + memcpy(spra->pa_array, pa_array, nr * sizeof(unsigned long)); + } else { + spra->multi = false; + spra->pa = pa; + } + + return spra; +} + +static int sp_remote_mmap_populate(struct sp_group *spg, + struct sp_remote_group *sprg, struct sp_remote_area *spra) +{ + struct sp_group_node *spg_node; + struct mm_struct *mm; + struct sp_area *spa; + int mmap_ret = 0; + int ret = 0; + + spa = sp_area_alloc_by_spra(spg, spra); + if (IS_ERR(spa)) + return PTR_ERR(spa); + + spra->spa = spa; + sp_group_insert_area(spg, spa); + /* create mapping for each process in the group */ + list_for_each_entry(spg_node, &spg->proc_head, proc_node) { + mm = spg_node->master->mm; + kthread_use_mm(mm); + mmap_ret = sp_map_spa_to_mm(mm, spa, spg_node->prot, NULL, + "sp_remote_alloc"); + kthread_unuse_mm(mm); + if (mmap_ret) { + if (mmap_ret != SP_SKIP_ERR) + goto unmap; + + continue; + } + ret = mmap_ret; + } + + return ret; + +unmap: + __sp_free(spa, mm); + sp_area_put_locked(spa); + + return mmap_ret; +} + +static void free_sp_remote_group_locked(struct sp_remote_group *sprg) +{ + idr_remove(&sp_remote_group_idr, sprg->id); + kfree(sprg); +} + +static void sp_remote_group_put_locked(struct sp_remote_group *sprg) +{ + lockdep_assert_held_write(&sp_global_sem); + + if (atomic_dec_and_test(&sprg->use_count)) + free_sp_remote_group_locked(sprg); +} + +static int __register_remote(int spg_id, unsigned long va, + unsigned long *pa_array, unsigned long pa, unsigned long len, + unsigned long page_size, bool multi) +{ + struct sp_remote_group *sprg; + struct sp_remote_area *spra; + struct sp_group *spg; + int ret = 0; + + if (!sp_is_enabled()) + return -EOPNOTSUPP; + + if (unlikely(page_size != PMD_SIZE)) { + pr_err_ratelimited("register remote failed, invalid page_size 0x%lx\n", page_size); + return -EINVAL; + } + + if (!len || !IS_ALIGNED(va, page_size) || !IS_ALIGNED(pa, page_size) + || !IS_ALIGNED(len, page_size)) + return -EINVAL; + + down_write(&sp_global_sem); + sprg = sp_remote_group_get_or_alloc(spg_id); + if (IS_ERR(sprg)) { + pr_err_ratelimited("register remote failed, get sprg failed, ret=%ld\n", + PTR_ERR(sprg)); + ret = PTR_ERR(sprg); + goto out_unlock; + } + + spra = sp_remote_area_alloc(sprg, va, pa_array, pa, len, page_size, multi); + if (IS_ERR(spra)) { + pr_err_ratelimited("register remote failed, alloc spra failed, ret=%ld\n", + PTR_ERR(spra)); + ret = PTR_ERR(spra); + goto drop_sprg; + } + down_write(&sprg->rw_lock); + sp_remote_group_insert_area(sprg, spra); + up_write(&sprg->rw_lock); + + spg = sp_group_get_from_idr_locked(spg_id); + if (spg) { + down_write(&spg->rw_lock); + ret = sp_remote_mmap_populate(spg, sprg, spra); + up_write(&spg->rw_lock); + if (ret) { + pr_err_ratelimited("register remote failed, remote map failed, ret=%d\n", + ret); + goto drop_spg; + } + } + + up_write(&sp_global_sem); + return 0; + +drop_spg: + sp_group_put(spg); +drop_sprg: + sp_remote_group_put_locked(sprg); +out_unlock: + up_write(&sp_global_sem); + return ret; +} + +int mg_sp_register_remote_range(int spg_id, unsigned long va, + unsigned long pa, unsigned long len) +{ + return __register_remote(spg_id, va, NULL, pa, len, PMD_SIZE, false); +} +EXPORT_SYMBOL_GPL(mg_sp_register_remote_range); + +int mg_sp_register_remote_range_multi(int spg_id, unsigned long va, + unsigned long *pa_array, unsigned long len, unsigned long page_size) +{ + return __register_remote(spg_id, va, pa_array, 0, len, page_size, true); +} +EXPORT_SYMBOL_GPL(mg_sp_register_remote_range_multi); + /*** Statistical and maintenance functions ***/ static void get_mm_rss_info(struct mm_struct *mm, unsigned long *anon, -- 2.43.0
Offering: HULK hulk inclusion category: feature bugzilla: NA ---------------------------------------- support unregister remote range Signed-off-by: Yin Tirui <yintirui@huawei.com> --- include/linux/share_pool.h | 7 +++ mm/share_pool.c | 91 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 98 insertions(+) diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index b230f108e89d..4102d0dcf25b 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -166,6 +166,8 @@ extern int mg_sp_register_remote_range(int spg_id, unsigned long va, extern int mg_sp_register_remote_range_multi(int spg_id, unsigned long va, unsigned long *pa_array, unsigned long len, unsigned long page_size); +extern int mg_sp_unregister_remote_range(int spg_id, unsigned long addr); + extern int mg_sp_id_of_current(void); extern void __sp_mm_clean(struct mm_struct *mm); @@ -301,6 +303,11 @@ static inline int mg_sp_register_remote_range_multi(int spg_id, unsigned long va return -EPERM; } +static inline int mg_sp_unregister_remote_range(int spg_id, unsigned long addr) +{ + return -EPERM; +} + static inline bool sp_is_enabled(void) { return false; diff --git a/mm/share_pool.c b/mm/share_pool.c index bffd8f5731b8..0eb309ad38e4 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -1575,6 +1575,13 @@ static bool sp_group_delete_area(struct sp_group *spg, struct sp_area *spa) return atomic_dec_and_test(&spa->spg->spa_num); } +static bool sp_remote_group_delete_area(struct sp_remote_group *sprg, + struct sp_remote_area *spra) +{ + list_del(&spra->list); + return atomic_dec_and_test(&sprg->spra_num); +} + unsigned long sp_alloc_hugepage(unsigned long size, int nid, nodemask_t *nodemask) { struct folio *folio = alloc_hugetlb_folio_nodemask_size(size, nid, nodemask); @@ -1937,6 +1944,17 @@ static void sp_area_free(struct sp_area *spa) kfree(spa); } +static void sp_remote_area_free(struct sp_remote_area *spra) +{ + struct sp_area *spa = spra->spa; + + if (spa) + spa->spra = NULL; + if (spra->multi) + kvfree(spra->pa_array); + kfree(spra); +} + static void sp_area_put_locked(struct sp_area *spa) { if (atomic_dec_and_test(&spa->use_count)) { @@ -1947,6 +1965,16 @@ static void sp_area_put_locked(struct sp_area *spa) } } +static void sp_remote_area_put_locked(struct sp_remote_area *spra) +{ + if (atomic_dec_and_test(&spra->use_count)) { + if (sp_remote_group_delete_area(spra->sprg, spra)) + /* the caller must hold a refcount for spra->sprg under sprg->rw_lock */ + atomic_dec(&spra->sprg->use_count); + sp_remote_area_free(spra); + } +} + static void sp_area_drop_func(struct work_struct *work) { bool spa_zero; @@ -3661,6 +3689,11 @@ static int __register_remote(int spg_id, unsigned long va, if (!sp_is_enabled()) return -EOPNOTSUPP; + if (!(current->flags & PF_KTHREAD)) { + pr_err_ratelimited("register remote failed, task is non-kthread\n"); + return -EOPNOTSUPP; + } + if (unlikely(page_size != PMD_SIZE)) { pr_err_ratelimited("register remote failed, invalid page_size 0x%lx\n", page_size); return -EINVAL; @@ -3728,6 +3761,64 @@ int mg_sp_register_remote_range_multi(int spg_id, unsigned long va, } EXPORT_SYMBOL_GPL(mg_sp_register_remote_range_multi); +int mg_sp_unregister_remote_range(int spg_id, unsigned long addr) +{ + int ret = 0; + struct sp_area *spa; + struct sp_group *spg; + struct sp_remote_group *sprg; + struct sp_remote_area *spra, *tmp; + + if (!sp_is_enabled()) + return -EOPNOTSUPP; + + if (!(current->flags & PF_KTHREAD)) { + pr_err_ratelimited("unregister remote failed, task is non-kthread\n"); + return -EOPNOTSUPP; + } + + down_write(&sp_global_sem); + sprg = sp_remote_group_get_from_idr_locked(spg_id); + if (unlikely(!sprg)) { + pr_err_ratelimited("unregister remote failed, invalid spg id %d\n", spg_id); + ret = -EINVAL; + goto out_unlock; + } + + down_write(&sprg->rw_lock); + list_for_each_entry_safe(spra, tmp, &sprg->spra_list, list) { + if (spra->va_start == addr) + goto drop_spa; + } + pr_err_ratelimited("unregister remote failed, invalid addr 0x%lx for spg id %d\n", + addr, spg_id); + ret = -EINVAL; + goto drop_sprg; + +drop_spa: + spa = spra->spa; + if (spa) { + spg = spa->spg; + if (!atomic_inc_not_zero(&spg->use_count)) + goto drop_spra; + down_write(&spg->rw_lock); + __sp_free(spa, NULL); + sp_area_put_locked(spa); + up_write(&spg->rw_lock); + sp_group_put_locked(spg); + } +drop_spra: + sp_remote_area_put_locked(spra); +drop_sprg: + sp_remote_group_put_locked(sprg); + up_write(&sprg->rw_lock); +out_unlock: + up_write(&sp_global_sem); + + return ret; +} +EXPORT_SYMBOL_GPL(mg_sp_unregister_remote_range); + /*** Statistical and maintenance functions ***/ static void get_mm_rss_info(struct mm_struct *mm, unsigned long *anon, -- 2.43.0
Offering: HULK hulk inclusion category: feature bugzilla: NA ---------------------------------------- Introduce cmdline `sp_normal_range` to configure sharepool normal range. Example: sp_normal_range=1T@0xea0000000000 Signed-off-by: Yin Tirui <yintirui@huawei.com> --- mm/share_pool.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 55 insertions(+), 2 deletions(-) diff --git a/mm/share_pool.c b/mm/share_pool.c index 0eb309ad38e4..90d3fa079c3a 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -337,6 +337,9 @@ static struct sp_mapping *sp_mapping_normal; static struct sp_mapping *sp_mapping_ro; static struct sp_mapping *sp_mapping_dvpp; +static unsigned long __ro_after_init sp_normal_size; +static unsigned long __ro_after_init sp_normal_start; + static void sp_mapping_range_init(struct sp_mapping *spm) { int i; @@ -348,8 +351,13 @@ static void sp_mapping_range_init(struct sp_mapping *spm) spm->end[i] = MMAP_SHARE_POOL_RO_END; break; case SP_MAPPING_NORMAL: - spm->start[i] = MMAP_SHARE_POOL_NORMAL_START; - spm->end[i] = MMAP_SHARE_POOL_NORMAL_END; + if (sp_normal_size) { + spm->start[i] = sp_normal_start; + spm->end[i] = sp_normal_start + sp_normal_size; + } else { + spm->start[i] = MMAP_SHARE_POOL_NORMAL_START; + spm->end[i] = MMAP_SHARE_POOL_NORMAL_END; + } break; case SP_MAPPING_DVPP: spm->start[i] = MMAP_SHARE_POOL_DVPP_START + i * MMAP_SHARE_POOL_16G_SIZE; @@ -4190,6 +4198,51 @@ static int __init enable_share_pool(char *s) } __setup("enable_ascend_share_pool", enable_share_pool); +static inline bool sp_normal_range_valid(unsigned long addr, unsigned long size) +{ + return addr >= MMAP_SHARE_POOL_NORMAL_START && + addr + size <= MMAP_SHARE_POOL_NORMAL_END; +} + +static int __init sp_parse_normal_range(char *p) +{ + char *oldp; + u64 start_at, mem_size; + + if (unlikely(!p)) + return -EINVAL; + + oldp = p; + mem_size = memparse(p, &p); + if (unlikely(p == oldp)) + return -EINVAL; + + if (unlikely(!mem_size)) + return -EINVAL; + + if (unlikely(*p != '@')) { + pr_err("unrecognized sp_normal_range option format.\n"); + return -EINVAL; + } + + start_at = memparse(p + 1, &p); + if (unlikely(*p != '\0')) { + pr_err("unrecognized sp_normal_range end format.\n"); + return -EINVAL; + } + + if (unlikely(!sp_normal_range_valid(start_at, mem_size))) { + pr_err("invalid sp normal range, addr: 0x%llx, size: 0x%llx\n", start_at, mem_size); + return -EINVAL; + } + + sp_normal_size = mem_size; + sp_normal_start = start_at; + + return 1; +} +__setup("sp_normal_range=", sp_parse_normal_range); + static int __init share_pool_init(void) { if (!sp_is_enabled()) -- 2.43.0
From: Fanhua Li <lifanhua5@huawei.com> Signed-off-by: Fanhua Li <lifanhua5@huawei.com> --- mm/share_pool.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/mm/share_pool.c b/mm/share_pool.c index 5f0e444869..9de856a451 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -2209,8 +2209,7 @@ static int sp_vma_populate_pages(struct vm_area_struct *vma, struct sp_area *spa static int sp_vma_populate_remote_pages(struct vm_area_struct *vma, struct sp_remote_area *spra, unsigned long uaddr, unsigned long size) { - /* Currently, PMD_SIZE is the only page size for remote page mapping */ - unsigned long page_size = PMD_SIZE; + unsigned long page_size = spra->page_size; unsigned long pfn; int i = 0; int ret; @@ -3587,7 +3586,7 @@ static struct sp_area *sp_area_alloc_by_spra(struct sp_group *spg, spa->va_start = spra->va_start; spa->va_end = spra->va_end; spa->real_size = spra->real_size; - spa->is_hugepage = true; + spa->is_hugepage = (spra->page_size != PAGE_SIZE); spa->type = SPA_TYPE_REMOTE; spa->spg = spg; spa->spra = spra; @@ -3702,7 +3701,7 @@ static int __register_remote(int spg_id, unsigned long va, return -EOPNOTSUPP; } - if (unlikely(page_size != PMD_SIZE)) { + if (unlikely(page_size != PMD_SIZE && page_size != PAGE_SIZE)) { pr_err_ratelimited("register remote failed, invalid page_size 0x%lx\n", page_size); return -EINVAL; } -- 2.43.0
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,转换为PR失败! 邮件列表地址:https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/HUQ... 失败原因:补丁集缺失封面信息 建议解决方法:请提供补丁集并重新发送您的补丁集到邮件列表 FeedBack: The patch(es) which you have sent to kernel@openeuler.org has been converted to PR failed! Mailing list address: https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/HUQ... Failed Reason: the cover of the patches is missing Suggest Solution: please checkout and apply the patches' cover and send all again
participants (2)
-
patchwork bot -
Yin Tirui