From: Fang Lijun fanglijun3@huawei.com
ascend inclusion category: Bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I4JMLR CVE: NA
--------------
System cann't use the cdm nodes memory, but it can mmap all nodes huge pages, so it will cause Bus error when mmap succeed but the huge pages were not enough.
When set the cdmmask, users will transfer the numa id by mmap flag to map the specific numa node hugepages, if there was not enough hugepages on this node, return -ENOMEM.
Dvpp use flags MAP_CHECKNODE to enable check node hugetlb. The global variable numanode will cause the mmap not be reenterable, so use the flags BITS[26:31] directly. v2: fix a compiling error on platforms such as mips
Signed-off-by: Fang Lijun fanglijun3@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- arch/alpha/include/uapi/asm/mman.h | 1 + arch/mips/include/uapi/asm/mman.h | 1 + arch/parisc/include/uapi/asm/mman.h | 1 + arch/powerpc/include/uapi/asm/mman.h | 1 + arch/sparc/include/uapi/asm/mman.h | 1 + arch/xtensa/include/uapi/asm/mman.h | 1 + fs/hugetlbfs/inode.c | 45 ++++++++++++++++++++++++++++ include/linux/hugetlb.h | 1 + include/linux/mm.h | 11 +++++++ include/linux/mman.h | 15 ++++++++++ include/uapi/asm-generic/mman.h | 1 + mm/hugetlb.c | 3 ++ mm/mmap.c | 22 ++++++++++++-- 13 files changed, 101 insertions(+), 3 deletions(-)
diff --git a/arch/alpha/include/uapi/asm/mman.h b/arch/alpha/include/uapi/asm/mman.h index 87abc7b03360..eeb0b9cc0bee 100644 --- a/arch/alpha/include/uapi/asm/mman.h +++ b/arch/alpha/include/uapi/asm/mman.h @@ -32,6 +32,7 @@ #define MAP_HUGETLB 0x100000 /* create a huge page mapping */ #define MAP_FIXED_NOREPLACE 0x200000/* MAP_FIXED which doesn't unmap underlying mapping */ #define MAP_PA32BIT 0x400000 /* physical address is within 4G */ +#define MAP_CHECKNODE 0x800000 /* hugetlb numa node check */
#define MS_ASYNC 1 /* sync memory asynchronously */ #define MS_SYNC 2 /* synchronous memory sync */ diff --git a/arch/mips/include/uapi/asm/mman.h b/arch/mips/include/uapi/asm/mman.h index 61cd225fcaa4..00437067f14d 100644 --- a/arch/mips/include/uapi/asm/mman.h +++ b/arch/mips/include/uapi/asm/mman.h @@ -50,6 +50,7 @@ #define MAP_HUGETLB 0x80000 /* create a huge page mapping */ #define MAP_FIXED_NOREPLACE 0x100000 /* MAP_FIXED which doesn't unmap underlying mapping */ #define MAP_PA32BIT 0x400000 /* physical address is within 4G */ +#define MAP_CHECKNODE 0x800000 /* hugetlb numa node check */
/* * Flags for msync diff --git a/arch/parisc/include/uapi/asm/mman.h b/arch/parisc/include/uapi/asm/mman.h index 851678907640..0bdf4ae5b69f 100644 --- a/arch/parisc/include/uapi/asm/mman.h +++ b/arch/parisc/include/uapi/asm/mman.h @@ -27,6 +27,7 @@ #define MAP_FIXED_NOREPLACE 0x100000 /* MAP_FIXED which doesn't unmap underlying mapping */ #define MAP_UNINITIALIZED 0 /* uninitialized anonymous mmap */ #define MAP_PA32BIT 0x400000 /* physical address is within 4G */ +#define MAP_CHECKNODE 0x800000 /* hugetlb numa node check */
#define MS_SYNC 1 /* synchronous memory sync */ #define MS_ASYNC 2 /* sync memory asynchronously */ diff --git a/arch/powerpc/include/uapi/asm/mman.h b/arch/powerpc/include/uapi/asm/mman.h index f0eb04780148..908fa2ad02cc 100644 --- a/arch/powerpc/include/uapi/asm/mman.h +++ b/arch/powerpc/include/uapi/asm/mman.h @@ -26,6 +26,7 @@ #define MCL_FUTURE 0x4000 /* lock all additions to address space */ #define MCL_ONFAULT 0x8000 /* lock all pages that are faulted in */ #define MAP_PA32BIT 0x400000 /* physical address is within 4G */ +#define MAP_CHECKNODE 0x800000 /* hugetlb numa node check */
/* Override any generic PKEY permission defines */ #define PKEY_DISABLE_EXECUTE 0x4 diff --git a/arch/sparc/include/uapi/asm/mman.h b/arch/sparc/include/uapi/asm/mman.h index 8caf19c604d0..06578c16a683 100644 --- a/arch/sparc/include/uapi/asm/mman.h +++ b/arch/sparc/include/uapi/asm/mman.h @@ -22,5 +22,6 @@ #define MCL_FUTURE 0x4000 /* lock all additions to address space */ #define MCL_ONFAULT 0x8000 /* lock all pages that are faulted in */ #define MAP_PA32BIT 0x400000 /* physical address is within 4G */ +#define MAP_CHECKNODE 0x800000 /* hugetlb numa node check */
#endif /* _UAPI__SPARC_MMAN_H__ */ diff --git a/arch/xtensa/include/uapi/asm/mman.h b/arch/xtensa/include/uapi/asm/mman.h index a52ac8462b7d..717561c7e85a 100644 --- a/arch/xtensa/include/uapi/asm/mman.h +++ b/arch/xtensa/include/uapi/asm/mman.h @@ -57,6 +57,7 @@ #define MAP_HUGETLB 0x80000 /* create a huge page mapping */ #define MAP_FIXED_NOREPLACE 0x100000 /* MAP_FIXED which doesn't unmap underlying mapping */ #define MAP_PA32BIT 0x400000 /* physical address is within 4G */ +#define MAP_CHECKNODE 0x800000 /* hugetlb numa node check */ #define MAP_UNINITIALIZED 0x4000000 /* For anonymous mmap, memory could be * uninitialized */
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 96c5f4c5ee6e..2e2e4983f1ba 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -118,6 +118,45 @@ static void huge_pagevec_release(struct pagevec *pvec) pagevec_reinit(pvec); }
+/* + * Check current numa node has enough free huge pages to mmap hugetlb. + * resv_huge_pages_node: mmap hugepages but haven't used in current + * numa node. + */ +static int hugetlb_checknode(struct vm_area_struct *vma, long nr) +{ + int nid; + int ret = 0; + struct hstate *h = &default_hstate; + + spin_lock(&hugetlb_lock); + + nid = vma->vm_flags >> CHECKNODE_BITS; + + if (nid >= MAX_NUMNODES) { + ret = -EINVAL; + goto err; + } + + if (h->free_huge_pages_node[nid] < nr) { + ret = -ENOMEM; + goto err; + } else { + if (h->resv_huge_pages_node[nid] + nr > + h->free_huge_pages_node[nid]) { + ret = -ENOMEM; + goto err; + } else { + h->resv_huge_pages_node[nid] += nr; + ret = 0; + } + } + +err: + spin_unlock(&hugetlb_lock); + return ret; +} + /* * Mask used when checking the page offset value passed in via system * calls. This value will be converted to a loff_t which is signed. @@ -175,6 +214,12 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) inode_lock(inode); file_accessed(file);
+ if (is_set_cdmmask() && (vma->vm_flags & VM_CHECKNODE)) { + ret = hugetlb_checknode(vma, len >> huge_page_shift(h)); + if (ret < 0) + goto out; + } + ret = -ENOMEM; if (hugetlb_reserve_pages(inode, vma->vm_pgoff >> huge_page_order(h), diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index bf734fb00a1d..fd9635a6a92f 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -581,6 +581,7 @@ struct hstate { unsigned int nr_huge_pages_node[MAX_NUMNODES]; unsigned int free_huge_pages_node[MAX_NUMNODES]; unsigned int surplus_huge_pages_node[MAX_NUMNODES]; + unsigned int resv_huge_pages_node[MAX_NUMNODES]; #ifdef CONFIG_HUGETLB_PAGE_FREE_VMEMMAP unsigned int nr_free_vmemmap_pages; #endif diff --git a/include/linux/mm.h b/include/linux/mm.h index ae9b6688677f..100c113e62a7 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -97,6 +97,15 @@ extern const int mmap_rnd_compat_bits_max; extern int mmap_rnd_compat_bits __read_mostly; #endif
+#ifdef CONFIG_COHERENT_DEVICE +static inline bool is_set_cdmmask(void) +{ + return !nodes_empty(cdmmask); +} +#else +#define is_set_cdmmask() (0) +#endif + #include <asm/page.h> #include <asm/processor.h>
@@ -304,6 +313,8 @@ extern unsigned int kobjsize(const void *objp); #define VM_CDM 0x100000000 /* Contains coherent device memory */ #endif
+#define VM_CHECKNODE 0x200000000 + #ifdef CONFIG_USERSWAP /* bit[32:36] is the protection key of intel, so use a large value for VM_USWAP */ #define VM_USWAP 0x2000000000000000 diff --git a/include/linux/mman.h b/include/linux/mman.h index 629cefc4ecba..7908bf3e5696 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -8,6 +8,21 @@ #include <linux/atomic.h> #include <uapi/linux/mman.h>
+#ifdef CONFIG_COHERENT_DEVICE +#define CHECKNODE_BITS 48 +#define CHECKNODE_MASK (~((_AC(1, UL) << CHECKNODE_BITS) - 1)) +static inline void set_vm_checknode(vm_flags_t *vm_flags, unsigned long flags) +{ + if (is_set_cdmmask()) + *vm_flags |= VM_CHECKNODE | ((((flags >> MAP_HUGE_SHIFT) & + MAP_HUGE_MASK) << CHECKNODE_BITS) & CHECKNODE_MASK); +} +#else +#define CHECKNODE_BITS (0) +static inline void set_vm_checknode(vm_flags_t *vm_flags, unsigned long flags) +{} +#endif + /* * Arrange for legacy / undefined architecture specific flags to be * ignored by mmap handling code. diff --git a/include/uapi/asm-generic/mman.h b/include/uapi/asm-generic/mman.h index 344bb9b090a7..d7f0f48117b0 100644 --- a/include/uapi/asm-generic/mman.h +++ b/include/uapi/asm-generic/mman.h @@ -5,6 +5,7 @@ #include <asm-generic/mman-common.h>
#define MAP_GROWSDOWN 0x0100 /* stack-like segment */ +#define MAP_CHECKNODE 0x0400 /* hugetlb numa node check */ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ #define MAP_EXECUTABLE 0x1000 /* mark it as an executable */ #define MAP_LOCKED 0x2000 /* pages are locked */ diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 6ae2d2e90681..d0672e482879 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -30,6 +30,7 @@ #include <linux/numa.h> #include <linux/llist.h> #include <linux/cma.h> +#include <linux/mman.h>
#include <asm/page.h> #include <asm/pgalloc.h> @@ -1164,6 +1165,8 @@ static struct page *dequeue_huge_page_vma(struct hstate *h, if (page && !avoid_reserve && vma_has_reserves(vma, chg)) { SetHPageRestoreReserve(page); h->resv_huge_pages--; + if (is_set_cdmmask() && (vma->vm_flags & VM_CHECKNODE)) + h->resv_huge_pages_node[vma->vm_flags >> CHECKNODE_BITS]--; }
mpol_cond_put(mpol); diff --git a/mm/mmap.c b/mm/mmap.c index f705137fd248..a208057be6f1 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1581,6 +1581,12 @@ __do_mmap(struct file *file, unsigned long addr, unsigned long len, vm_flags |= VM_NORESERVE; }
+ /* set numa node id into vm_flags, + * hugetlbfs file mmap will use it to check node + */ + if (flags & MAP_CHECKNODE) + set_vm_checknode(&vm_flags, flags); + addr = mmap_region(file, addr, len, vm_flags, pgoff, uf); if (!IS_ERR_VALUE(addr) && ((vm_flags & VM_LOCKED) || @@ -1825,12 +1831,23 @@ unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len, } else if (flags & MAP_HUGETLB) { struct user_struct *user = NULL; struct hstate *hs; + int page_size_log;
- hs = hstate_sizelog((flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK); + /* + * If config cdm node, flags bits [26:31] used for + * mmap hugetlb check node + */ + if (is_set_cdmmask()) + page_size_log = 0; + else + page_size_log = (flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK; + + hs = hstate_sizelog(page_size_log); if (!hs) return -EINVAL;
len = ALIGN(len, huge_page_size(hs)); + /* * VM_NORESERVE is used because the reservations will be * taken when vm_ops->mmap() is called @@ -1839,8 +1856,7 @@ unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len, */ file = hugetlb_file_setup(HUGETLB_ANON_FILE, len, VM_NORESERVE, - &user, HUGETLB_ANONHUGE_INODE, - (flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK); + &user, HUGETLB_ANONHUGE_INODE, page_size_log); if (IS_ERR(file)) return PTR_ERR(file); }