From: Chen Jun chenjun102@huawei.com
hulk inclusion category: feature bugzilla: N/A
--------------------------------
Support alloc memory from nodes.
mg_sp_alloc allow to alloc memory from one node. If the node have no enough memory, the caller would pick a next node. But that has a lot of overhead.
To improve performance, we support a new interface to alloc memory from nodes.
Signed-off-by: Chen Jun chenjun102@huawei.com --- include/linux/hugetlb.h | 15 +++++ include/linux/share_pool.h | 10 ++-- include/linux/share_pool_interface.h | 19 +++++++ mm/hugetlb.c | 30 ++++++++-- mm/mempolicy.c | 4 +- mm/share_pool.c | 84 +++++++++++++++++----------- 6 files changed, 118 insertions(+), 44 deletions(-) create mode 100644 include/linux/share_pool_interface.h
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 1c780d188e8c..366777ab3ba1 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -629,6 +629,9 @@ int huge_add_to_page_cache(struct page *page, struct address_space *mapping,
const struct hstate *hugetlb_get_hstate(void); struct page *hugetlb_alloc_hugepage(int nid, int flag); +struct page *hugetlb_alloc_hugepage_vma(struct vm_area_struct *vma, + unsigned long address, int flag); + int hugetlb_insert_hugepage_pte(struct mm_struct *mm, unsigned long addr, pgprot_t prot, struct page *hpage); int hugetlb_insert_hugepage_pte_by_pa(struct mm_struct *mm, @@ -645,6 +648,12 @@ static inline struct page *hugetlb_alloc_hugepage(int nid, int flag) return NULL; }
+static inline struct page *hugetlb_alloc_hugepage_vma(struct vm_area_struct *vma, + unsigned long address, int flag) +{ + return NULL; +} + static inline int hugetlb_insert_hugepage_pte(struct mm_struct *mm, unsigned long addr, pgprot_t prot, struct page *hpage) { @@ -1091,6 +1100,12 @@ static inline struct page *hugetlb_alloc_hugepage(int nid, int flag) return NULL; }
+static inline struct page *hugetlb_alloc_hugepage_vma(struct vm_area_struct *vma, + unsigned long address, int flag) +{ + return NULL; +} + static inline int hugetlb_insert_hugepage_pte(struct mm_struct *mm, unsigned long addr, pgprot_t prot, struct page *hpage) { diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index 8190c8d82439..04feea9b924d 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -12,6 +12,8 @@ #include <linux/jump_label.h> #include <linux/kabi.h>
+#include <linux/share_pool_interface.h> + #define SP_HUGEPAGE (1 << 0) #define SP_HUGEPAGE_ONLY (1 << 1) #define SP_DVPP (1 << 2) @@ -256,6 +258,8 @@ extern int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task);
extern void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id); +extern void *mg_sp_alloc_nodemask(unsigned long size, unsigned long sp_flags, int spg_id, + nodemask_t nodemask); extern int mg_sp_free(unsigned long addr, int id);
extern void *mg_sp_make_share_k2u(unsigned long kva, unsigned long size, @@ -286,7 +290,6 @@ vm_fault_t sharepool_no_page(struct mm_struct *mm, unsigned long address, pte_t *ptep, unsigned int flags); extern bool sp_check_addr(unsigned long addr); extern bool sp_check_mmap_addr(unsigned long addr, unsigned long flags); -extern int sp_node_id(struct vm_area_struct *vma);
static inline bool sp_is_enabled(void) { @@ -452,11 +455,6 @@ static inline bool is_vmalloc_sharepool(unsigned long vm_flags) return NULL; }
-static inline int sp_node_id(struct vm_area_struct *vma) -{ - return numa_node_id(); -} - static inline bool sp_check_addr(unsigned long addr) { return false; diff --git a/include/linux/share_pool_interface.h b/include/linux/share_pool_interface.h new file mode 100644 index 000000000000..8cd82859902f --- /dev/null +++ b/include/linux/share_pool_interface.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef LINUX_SHARE_POOL_INTERFACE_H +#define LINUX_SHARE_POOL_INTERFACE_H + +#include <linux/mman.h> +#include <linux/mm_types.h> +#include <linux/numa.h> +#include <linux/kabi.h> + +#ifdef CONFIG_ASCEND_SHARE_POOL +extern int sp_node_id(struct vm_area_struct *vma); +#else +static inline int sp_node_id(struct vm_area_struct *vma) +{ + return numa_node_id(); +} +#endif /* !CONFIG_ASCEND_SHARE_POOL */ + +#endif /* LINUX_SHARE_POOL_INTERFACE_H */ diff --git a/mm/hugetlb.c b/mm/hugetlb.c index a55197135afa..a1cf6a1e9cec 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -6312,7 +6312,7 @@ static struct page *hugetlb_alloc_hugepage_normal(struct hstate *h, /* * Allocate hugepage without reserve */ -struct page *hugetlb_alloc_hugepage(int nid, int flag) +struct page *hugetlb_alloc_hugepage_nodemask(int nid, int flag, nodemask_t *nodemask) { struct hstate *h = &default_hstate; gfp_t gfp_mask = htlb_alloc_mask(h); @@ -6327,7 +6327,6 @@ struct page *hugetlb_alloc_hugepage(int nid, int flag) if (flag & ~HUGETLB_ALLOC_MASK) return NULL;
- gfp_mask |= __GFP_THISNODE; if (enable_charge_mighp) gfp_mask |= __GFP_ACCOUNT;
@@ -6337,12 +6336,22 @@ struct page *hugetlb_alloc_hugepage(int nid, int flag) if (flag & HUGETLB_ALLOC_NORMAL) page = hugetlb_alloc_hugepage_normal(h, gfp_mask, nid); else if (flag & HUGETLB_ALLOC_BUDDY) - page = alloc_migrate_huge_page(h, gfp_mask, nid, NULL); + page = alloc_migrate_huge_page(h, gfp_mask, nid, nodemask); else - page = alloc_huge_page_nodemask(h, nid, NULL, gfp_mask); + page = alloc_huge_page_nodemask(h, nid, nodemask, gfp_mask);
return page; } + +struct page *hugetlb_alloc_hugepage(int nid, int flag) +{ + nodemask_t nodemask; + + nodes_clear(nodemask); + node_set(nid, nodemask); + + return hugetlb_alloc_hugepage_nodemask(nid, flag, &nodemask); +} EXPORT_SYMBOL_GPL(hugetlb_alloc_hugepage);
static pte_t *hugetlb_huge_pte_alloc(struct mm_struct *mm, unsigned long addr, @@ -6364,6 +6373,19 @@ static pte_t *hugetlb_huge_pte_alloc(struct mm_struct *mm, unsigned long addr, return ptep; }
+struct page *hugetlb_alloc_hugepage_vma(struct vm_area_struct *vma, unsigned long address, int flag) +{ + int nid; + struct hstate *h = hstate_vma(vma); + struct mempolicy *mpol; + nodemask_t *nodemask; + gfp_t gfp_mask; + + gfp_mask = htlb_alloc_mask(h); + nid = huge_node(vma, address, gfp_mask, &mpol, &nodemask); + return hugetlb_alloc_hugepage_nodemask(nid, flag, nodemask); +} + static int __hugetlb_insert_hugepage(struct mm_struct *mm, unsigned long addr, pgprot_t prot, unsigned long pfn) { diff --git a/mm/mempolicy.c b/mm/mempolicy.c index b58ec3f98896..c43df3206ab6 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -103,6 +103,8 @@ #include <linux/printk.h> #include <linux/swapops.h>
+#include <linux/share_pool_interface.h> + #include <asm/tlbflush.h> #include <linux/uaccess.h>
@@ -2198,7 +2200,7 @@ int huge_node(struct vm_area_struct *vma, unsigned long addr, gfp_t gfp_flags, nid = interleave_nid(*mpol, vma, addr, huge_page_shift(hstate_vma(vma))); } else { - nid = policy_node(gfp_flags, *mpol, numa_node_id()); + nid = policy_node(gfp_flags, *mpol, sp_node_id(vma)); if ((*mpol)->mode == MPOL_BIND || mode == MPOL_PREFERRED_MANY) *nodemask = &(*mpol)->v.nodes; } diff --git a/mm/share_pool.c b/mm/share_pool.c index ce4837da8a9a..8b46af5fae47 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -700,7 +700,7 @@ struct sp_area { struct mm_struct *mm; /* owner of k2u(task) */ unsigned long kva; /* shared kva */ pid_t applier; /* the original applier process */ - int node_id; /* memory node */ + int preferred_node_id; /* memory node */ int device_id; }; static DEFINE_SPINLOCK(sp_area_lock); @@ -1892,7 +1892,7 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, spa->mm = NULL; spa->kva = 0; /* NULL pointer */ spa->applier = applier; - spa->node_id = node_id; + spa->preferred_node_id = node_id; spa->device_id = device_id;
spa_inc_usage(spa); @@ -2191,7 +2191,9 @@ static int sp_free_get_spa(struct sp_free_context *fc) }
/** - * mg_sp_free() - Free the memory allocated by mg_sp_alloc(). + * mg_sp_free() - Free the memory allocated by mg_sp_alloc() or + * mg_sp_alloc_nodemask(). + * * @addr: the starting VA of the memory. * @id: Address space identifier, which is used to distinguish the addr. * @@ -2448,18 +2450,15 @@ static int sp_alloc_populate(struct mm_struct *mm, struct sp_area *spa, }
static long sp_mbind(struct mm_struct *mm, unsigned long start, unsigned long len, - unsigned long node) + nodemask_t *nodemask) { - nodemask_t nmask; - - nodes_clear(nmask); - node_set(node, nmask); return __do_mbind(start, len, MPOL_BIND, MPOL_F_STATIC_NODES, - &nmask, MPOL_MF_STRICT, mm); + nodemask, MPOL_MF_STRICT, mm); }
static int __sp_alloc_mmap_populate(struct mm_struct *mm, struct sp_area *spa, - struct sp_group_node *spg_node, struct sp_alloc_context *ac) + struct sp_group_node *spg_node, struct sp_alloc_context *ac, + nodemask_t *nodemask) { int ret;
@@ -2468,10 +2467,10 @@ static int __sp_alloc_mmap_populate(struct mm_struct *mm, struct sp_area *spa, return ret;
if (!ac->have_mbind) { - ret = sp_mbind(mm, spa->va_start, spa->real_size, spa->node_id); + ret = sp_mbind(mm, spa->va_start, spa->real_size, nodemask); if (ret < 0) { - pr_err("cannot bind the memory range to specified node:%d, err:%d\n", - spa->node_id, ret); + pr_err("cannot bind the memory range to node[%*pbl], err:%d\n", + nodemask_pr_args(nodemask), ret); return ret; } ac->have_mbind = true; @@ -2490,17 +2489,25 @@ static int __sp_alloc_mmap_populate(struct mm_struct *mm, struct sp_area *spa, }
static int sp_alloc_mmap_populate(struct sp_area *spa, - struct sp_alloc_context *ac) + struct sp_alloc_context *ac, + nodemask_t *nodemask) { int ret = -EINVAL; int mmap_ret = 0; struct mm_struct *mm, *end_mm = NULL; struct sp_group_node *spg_node; + nodemask_t __nodemask; + + if (!nodemask) { /* mg_sp_alloc */ + nodes_clear(__nodemask); + node_set(spa->preferred_node_id, __nodemask); + } else /* mg_sp_alloc_nodemask */ + __nodemask = *nodemask;
/* create mapping for each process in the group */ list_for_each_entry(spg_node, &spa->spg->procs, proc_node) { mm = spg_node->master->mm; - mmap_ret = __sp_alloc_mmap_populate(mm, spa, spg_node, ac); + mmap_ret = __sp_alloc_mmap_populate(mm, spa, spg_node, ac, &__nodemask); if (mmap_ret) {
/* @@ -2563,19 +2570,8 @@ static void sp_alloc_finish(int result, struct sp_area *spa, sp_group_put(spg); }
-/** - * mg_sp_alloc() - Allocate shared memory for all the processes in a sp_group. - * @size: the size of memory to allocate. - * @sp_flags: how to allocate the memory. - * @spg_id: the share group that the memory is allocated to. - * - * Use pass through allocation if spg_id == SPG_ID_DEFAULT in multi-group mode. - * - * Return: - * * if succeed, return the starting address of the shared memory. - * * if fail, return the pointer of -errno. - */ -void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) +void *__mg_sp_alloc_nodemask(unsigned long size, unsigned long sp_flags, int spg_id, + nodemask_t *nodemask) { struct sp_area *spa = NULL; int ret = 0; @@ -2598,7 +2594,7 @@ void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) goto out; }
- ret = sp_alloc_mmap_populate(spa, &ac); + ret = sp_alloc_mmap_populate(spa, &ac, nodemask); if (ret && ac.state == ALLOC_RETRY) { /* * The mempolicy for shared memory is located at backend file, which varies @@ -2616,6 +2612,30 @@ void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) else return (void *)(spa->va_start); } + +void *mg_sp_alloc_nodemask(unsigned long size, unsigned long sp_flags, int spg_id, + nodemask_t nodemask) +{ + return __mg_sp_alloc_nodemask(size, sp_flags, spg_id, &nodemask); +} +EXPORT_SYMBOL_GPL(mg_sp_alloc_nodemask); + +/** + * mg_sp_alloc() - Allocate shared memory for all the processes in a sp_group. + * @size: the size of memory to allocate. + * @sp_flags: how to allocate the memory. + * @spg_id: the share group that the memory is allocated to. + * + * Use pass through allocation if spg_id == SPG_ID_DEFAULT in multi-group mode. + * + * Return: + * * if succeed, return the starting address of the shared memory. + * * if fail, return the pointer of -errno. + */ +void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) +{ + return __mg_sp_alloc_nodemask(size, sp_flags, spg_id, NULL); +} EXPORT_SYMBOL_GPL(mg_sp_alloc);
/** @@ -3599,7 +3619,7 @@ int sp_node_id(struct vm_area_struct *vma)
if (vma && (vma->vm_flags & VM_SHARE_POOL) && vma->vm_private_data) { spa = vma->vm_private_data; - node_id = spa->node_id; + node_id = spa->preferred_node_id; }
return node_id; @@ -4028,7 +4048,6 @@ vm_fault_t sharepool_no_page(struct mm_struct *mm, unsigned long haddr = address & huge_page_mask(h); bool new_page = false; int err; - int node_id; struct sp_area *spa; bool charge_hpage;
@@ -4037,7 +4056,6 @@ vm_fault_t sharepool_no_page(struct mm_struct *mm, pr_err("share pool: vma is invalid, not from sp mmap\n"); return ret; } - node_id = spa->node_id;
retry: page = find_lock_page(mapping, idx); @@ -4049,7 +4067,7 @@ vm_fault_t sharepool_no_page(struct mm_struct *mm, charge_hpage = false; page = alloc_huge_page(vma, haddr, 0); if (IS_ERR(page)) { - page = hugetlb_alloc_hugepage(node_id, + page = hugetlb_alloc_hugepage_vma(vma, haddr, HUGETLB_ALLOC_BUDDY | HUGETLB_ALLOC_NORECLAIM); if (!page) page = ERR_PTR(-ENOMEM);