ascend inclusion category: Feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8LNGH
---------------------------------------------
The share pool features is a big feature, it is mainly used to share user virtual memory for different processes in the same group. It could be used by this steps: 1. Process A create a new group which is owned by process A. 2. Process A add process B to the group. 3. Process A add process C to the same group. 4. Process B alloc a new memory VA, and write something in it. 5. The VA was send to the process C by IPC, then process C got it. 6. The process C access the VA and got the data directly. 7. The process A could add more processes in the group to share the memory. 8. Fix the memory by use the free function or exit the group.
The new features is enabled both by CONFIG_SHARE_POOL and the enable_ascend_share_pool bootarg, it would not affect anything if disabled.
Signed-off-by: Wang Wensheng wangwensheng4@huawei.com --- include/linux/mm.h | 6 + include/linux/mm_types.h | 6 + include/linux/share_pool.h | 220 ++++++++++++++++++++++++++ mm/Kconfig | 11 ++ mm/Makefile | 1 + mm/share_pool.c | 315 +++++++++++++++++++++++++++++++++++++ 6 files changed, 559 insertions(+) create mode 100644 include/linux/share_pool.h create mode 100644 mm/share_pool.c
diff --git a/include/linux/mm.h b/include/linux/mm.h index 4fe823ead243..e2ba77243461 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -382,6 +382,12 @@ extern unsigned int kobjsize(const void *objp); # define VM_MTE_ALLOWED VM_NONE #endif
+#if defined(CONFIG_SHARE_POOL) +# define VM_SHARE_POOL VM_HIGH_ARCH_4 +#else +# define VM_SHARE_POOL VM_NONE +#endif + #ifndef VM_GROWSUP # define VM_GROWSUP VM_NONE #endif diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 582aa5e44a5a..0bc3c7c191a5 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -674,6 +674,9 @@ struct vm_area_struct { struct vma_numab_state *numab_state; /* NUMA Balancing state */ #endif struct vm_userfaultfd_ctx vm_userfaultfd_ctx; +#ifdef CONFIG_SHARE_POOL + struct sp_area *spa; +#endif } __randomize_layout;
#ifdef CONFIG_SCHED_MM_CID @@ -931,6 +934,9 @@ struct mm_struct { #endif } lru_gen; #endif /* CONFIG_LRU_GEN */ +#ifdef CONFIG_SHARE_POOL + struct sp_group_master *sp_group_master; +#endif } __randomize_layout;
/* diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h new file mode 100644 index 000000000000..1333b9994242 --- /dev/null +++ b/include/linux/share_pool.h @@ -0,0 +1,220 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef LINUX_SHARE_POOL_H +#define LINUX_SHARE_POOL_H + +#include <linux/mman.h> +#include <linux/mm_types.h> +#include <linux/notifier.h> +#include <linux/vmalloc.h> +#include <linux/printk.h> +#include <linux/hashtable.h> +#include <linux/numa.h> +#include <linux/jump_label.h> + +#define SP_HUGEPAGE (1 << 0) +#define SP_HUGEPAGE_ONLY (1 << 1) +#define SP_DVPP (1 << 2) +#define SP_SPEC_NODE_ID (1 << 3) +#define SP_PROT_RO (1 << 16) +/* + * SP_PROT_FOCUS should used with SP_PROT_RO, + * to alloc a memory within sharepool ro memory. + */ +#define SP_PROT_FOCUS (1 << 17) + +#define DEVICE_ID_BITS 4UL +#define DEVICE_ID_MASK ((1UL << DEVICE_ID_BITS) - 1UL) +#define DEVICE_ID_SHIFT 32UL +#define NODE_ID_BITS NODES_SHIFT +#define NODE_ID_MASK ((1UL << NODE_ID_BITS) - 1UL) +#define NODE_ID_SHIFT (DEVICE_ID_SHIFT + DEVICE_ID_BITS) + +#define SP_FLAG_MASK (SP_HUGEPAGE | SP_HUGEPAGE_ONLY | SP_DVPP | \ + SP_SPEC_NODE_ID | SP_PROT_RO | SP_PROT_FOCUS | \ + (DEVICE_ID_MASK << DEVICE_ID_SHIFT) | \ + (NODE_ID_MASK << NODE_ID_SHIFT)) + +#define sp_flags_device_id(flags) (((flags) >> DEVICE_ID_SHIFT) & DEVICE_ID_MASK) +#define sp_flags_node_id(flags) (((flags) >> NODE_ID_SHIFT) & NODE_ID_MASK) + +#define SPG_ID_NONE (-1) /* not associated with sp_group, only for specified thread */ +#define SPG_ID_DEFAULT 0 /* use the spg id of current thread */ +#define SPG_ID_MIN 1 /* valid id should be >= 1 */ +#define SPG_ID_MAX 99999 +#define SPG_ID_AUTO_MIN 100000 +#define SPG_ID_AUTO_MAX 199999 +#define SPG_ID_AUTO 200000 /* generate group id automatically */ +#define SPG_ID_LOCAL_MIN 200001 +#define SPG_ID_LOCAL_MAX 299999 +#define SPG_ID_LOCAL 300000 /* generate group id in local range */ + +#define MAX_DEVID 8 /* the max num of Da-vinci devices */ + +extern struct static_key_false share_pool_enabled_key; + +struct sp_walk_data { + struct page **pages; + unsigned int page_count; + unsigned long uva_aligned; + unsigned long page_size; + bool is_hugepage; + bool is_page_type_set; + pmd_t *pmd; +}; + +#define MAP_SHARE_POOL 0x200000 + +#define MMAP_TOP_4G_SIZE 0x100000000UL + +/* 8T - 64G size */ +#define MMAP_SHARE_POOL_NORMAL_SIZE 0x7F000000000UL +/* 64G */ +#define MMAP_SHARE_POOL_RO_SIZE 0x1000000000UL +/* 8T size*/ +#define MMAP_SHARE_POOL_DVPP_SIZE 0x80000000000UL +/* 16G size */ +#define MMAP_SHARE_POOL_16G_SIZE 0x400000000UL +/* skip 8T for stack */ +#define MMAP_SHARE_POOL_SKIP 0x80000000000UL +#define MMAP_SHARE_POOL_END (TASK_SIZE - MMAP_SHARE_POOL_SKIP) +#define MMAP_SHARE_POLL_DVPP_END (MMAP_SHARE_POOL_END) +/* MMAP_SHARE_POOL_DVPP_START should be align to 16G */ +#define MMAP_SHARE_POOL_DVPP_START (MMAP_SHARE_POLL_DVPP_END - MMAP_SHARE_POOL_DVPP_SIZE) +#define MMAP_SHARE_POOL_RO_END (MMAP_SHARE_POOL_DVPP_START) +#define MMAP_SHARE_POOL_RO_START (MMAP_SHARE_POOL_RO_END - MMAP_SHARE_POOL_RO_SIZE) +#define MMAP_SHARE_POOL_NORMAL_END (MMAP_SHARE_POOL_RO_START) +#define MMAP_SHARE_POOL_NORMAL_START (MMAP_SHARE_POOL_NORMAL_END - MMAP_SHARE_POOL_NORMAL_SIZE) +#define MMAP_SHARE_POOL_START (MMAP_SHARE_POOL_NORMAL_START) + +#define MMAP_SHARE_POOL_DYNAMIC_DVPP_BASE 0x100000000000ULL +#define MMAP_SHARE_POOL_DYNAMIC_DVPP_END (MMAP_SHARE_POOL_DYNAMIC_DVPP_BASE + \ + MMAP_SHARE_POOL_16G_SIZE * 64) + +#ifdef CONFIG_SHARE_POOL + +/* + * Those interfaces are exported for modules + */ +extern int mg_sp_group_add_task(int tgid, unsigned long prot, int spg_id); +extern int mg_sp_group_id_by_pid(int tgid, int *spg_ids, int *num); + +extern void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id); +extern void *mg_sp_alloc_nodemask(unsigned long size, unsigned long sp_flags, int spg_id, + nodemask_t nodemask); +extern int mg_sp_free(unsigned long addr, int id); + +extern void *mg_sp_make_share_k2u(unsigned long kva, unsigned long size, + unsigned long sp_flags, int tgid, int spg_id); +extern void *mg_sp_make_share_u2k(unsigned long uva, unsigned long size, int tgid); +extern int mg_sp_unshare(unsigned long va, unsigned long size, int spg_id); + +extern int mg_sp_walk_page_range(unsigned long uva, unsigned long size, + struct task_struct *tsk, struct sp_walk_data *sp_walk_data); + +extern void mg_sp_walk_page_free(struct sp_walk_data *sp_walk_data); + +extern bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, int tgid); + +extern bool mg_is_sharepool_addr(unsigned long addr); + +extern int mg_sp_id_of_current(void); + +static inline bool sp_is_enabled(void) +{ + return static_branch_likely(&share_pool_enabled_key); +} + +static inline void sp_area_work_around(struct vm_unmapped_area_info *info) +{ + if (sp_is_enabled()) + info->high_limit = min(info->high_limit, MMAP_SHARE_POOL_START); +} + +static inline bool sp_check_vm_share_pool(unsigned long vm_flags) +{ + if (sp_is_enabled() && (vm_flags & VM_SHARE_POOL)) + return true; + + return false; +} + +#else /* CONFIG_SHARE_POOL */ + +static inline int mg_sp_group_add_task(int tgid, unsigned long prot, int spg_id) +{ + return -EPERM; +} + +static inline int mg_sp_group_id_by_pid(int tgid, int *spg_ids, int *num) +{ + return -EPERM; +} + +static inline void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) +{ + return NULL; +} + +static inline int mg_sp_free(unsigned long addr, int id) +{ + return -EPERM; +} + +static inline void *mg_sp_make_share_k2u(unsigned long kva, unsigned long size, + unsigned long sp_flags, int tgid, int spg_id) +{ + return NULL; +} + +static inline void *mg_sp_make_share_u2k(unsigned long uva, unsigned long size, int tgid) +{ + return NULL; +} + +static inline int mg_sp_unshare(unsigned long va, unsigned long size, int id) +{ + return -EPERM; +} + +static inline int mg_sp_id_of_current(void) +{ + return -EPERM; +} + +static inline int mg_sp_walk_page_range(unsigned long uva, unsigned long size, + struct task_struct *tsk, struct sp_walk_data *sp_walk_data) +{ + return 0; +} + +static inline void mg_sp_walk_page_free(struct sp_walk_data *sp_walk_data) +{ +} + +static inline bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, int tgid) +{ + return false; +} + +static inline bool mg_is_sharepool_addr(unsigned long addr) +{ + return false; +} + +static inline bool sp_is_enabled(void) +{ + return false; +} + +static inline void sp_area_work_around(struct vm_unmapped_area_info *info) +{ +} + +static inline bool sp_check_vm_share_pool(unsigned long vm_flags) +{ + return false; +} + +#endif /* !CONFIG_SHARE_POOL */ + +#endif /* LINUX_SHARE_POOL_H */ diff --git a/mm/Kconfig b/mm/Kconfig index 64a8aea7f67a..0f68e5bbeb89 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -1274,6 +1274,7 @@ menuconfig ASCEND_FEATURES depends on ARM64 select HUGETLB_INSERT_PAGE select EXTEND_HUGEPAGE_MAPPING + select SHARE_POOL help The Ascend chip use the Hisilicon DaVinci architecture, and mainly focus on AI and machine leanring area, contains many external features. @@ -1292,6 +1293,16 @@ config EXTEND_HUGEPAGE_MAPPING This allow the user to do huge vmalloc and remap those hugepage range into userspace.
+config SHARE_POOL + bool + depends on EXTEND_HUGEPAGE_MAPPING + select ARCH_USES_HIGH_VMA_FLAGS + help + This feature allows multiple processes to share virtual memory both + in kernel and user level, which is only enabled for ascend platform. + To enable this feature, enable_ascend_share_pool bootarg is needed. + + source "mm/damon/Kconfig"
endmenu diff --git a/mm/Makefile b/mm/Makefile index ec65984e2ade..c51aca1d9ec7 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -138,3 +138,4 @@ obj-$(CONFIG_IO_MAPPING) += io-mapping.o obj-$(CONFIG_HAVE_BOOTMEM_INFO_NODE) += bootmem_info.o obj-$(CONFIG_GENERIC_IOREMAP) += ioremap.o obj-$(CONFIG_SHRINKER_DEBUG) += shrinker_debug.o +obj-$(CONFIG_SHARE_POOL) += share_pool.o diff --git a/mm/share_pool.c b/mm/share_pool.c new file mode 100644 index 000000000000..a9e30b06486e --- /dev/null +++ b/mm/share_pool.c @@ -0,0 +1,315 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Huawei Ascend Share Pool Memory + * + * Copyright (C) 2020 Huawei Limited + * Author: Tang Yizhou tangyizhou@huawei.com + * Zefan Li lizefan@huawei.com + * Wu Peng wupeng58@huawei.com + * Ding Tianhong dingtgianhong@huawei.com + * Zhou Guanghui zhouguanghui1@huawei.com + * Li Ming limingming.li@huawei.com + * + * This code is based on the hisilicon ascend platform. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#define pr_fmt(fmt) "share pool: " fmt + +#include <linux/share_pool.h> +#include <linux/sched.h> +#include <linux/sched/task.h> +#include <linux/sched/mm.h> +#include <linux/mm_types.h> +#include <linux/idr.h> +#include <linux/mutex.h> +#include <linux/rwsem.h> +#include <linux/spinlock.h> +#include <linux/slab.h> +#include <linux/rbtree.h> +#include <linux/shmem_fs.h> +#include <linux/file.h> +#include <linux/printk.h> +#include <linux/hugetlb.h> +#include <linux/vmalloc.h> +#include <linux/pid.h> +#include <linux/pid_namespace.h> +#include <linux/atomic.h> +#include <linux/lockdep.h> +#include <linux/kernel.h> +#include <linux/falloc.h> +#include <linux/types.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <linux/rmap.h> +#include <linux/preempt.h> +#include <linux/swapops.h> +#include <linux/mmzone.h> +#include <linux/timekeeping.h> +#include <linux/time64.h> +#include <linux/pagewalk.h> +#include <linux/workqueue.h> + +/** + * mp_sp_group_id_by_pid() - Get the sp_group ID array of a process. + * @tgid: tgid of target process. + * @spg_ids: point to an array to save the group ids the process belongs to + * @num: input the spg_ids array size; output the spg number of the process + * + * Return: + * >0 - the sp_group ID. + * -ENODEV - target process doesn't belong to any sp_group. + * -EINVAL - spg_ids or num is NULL. + * -E2BIG - the num of groups process belongs to is larger than *num + */ +int mg_sp_group_id_by_pid(int tgid, int *spg_ids, int *num) +{ + return -EOPNOTSUPP; +} +EXPORT_SYMBOL_GPL(mg_sp_group_id_by_pid); + +/** + * mg_sp_group_add_task() - Add a process to an share group (sp_group). + * @tgid: the tgid of the task to be added. + * @prot: the prot of task for this spg. + * @spg_id: the ID of the sp_group. + * + * Return: A postive group number for success, -errno on failure. + * + * Valid @spg_id: + * [SPG_ID_MIN, SPG_ID_MAX]: + * the task would be added to the group with @spg_id, if the + * group doesn't exist, just create it. + * [SPG_ID_AUTO_MIN, SPG_ID_AUTO_MAX]: + * the task would be added to the group with @spg_id, if it + * doesn't exist ,return failed. + * SPG_ID_AUTO: + * the task would be added into a new group with a new id in range + * [SPG_ID_AUTO_MIN, SPG_ID_AUTO_MAX]. + * + * This function can be taken into four parts: + * 1. Check and initlize the task specified by @tgid properly. + * 2. Create or get the spg specified by @spg_id. + * 3. Check the spg and task together and link the task into the spg if + * everything looks good. + * 4. Map the existing sp_area from the spg into the new task. + */ +int mg_sp_group_add_task(int tgid, unsigned long prot, int spg_id) +{ + return -EOPNOTSUPP; +} +EXPORT_SYMBOL_GPL(mg_sp_group_add_task); + +int mg_sp_id_of_current(void) +{ + return -EOPNOTSUPP; +} +EXPORT_SYMBOL_GPL(mg_sp_id_of_current); + +/** + * mg_sp_free() - Free the memory allocated by mg_sp_alloc() or + * mg_sp_alloc_nodemask(). + * + * @addr: the starting VA of the memory. + * @id: Address space identifier, which is used to distinguish the addr. + * + * Return: + * * 0 - success. + * * -EINVAL - the memory can't be found or was not allocated by share pool. + * * -EPERM - the caller has no permision to free the memory. + */ +int mg_sp_free(unsigned long addr, int id) +{ + return -EOPNOTSUPP; +} +EXPORT_SYMBOL_GPL(mg_sp_free); + +static void __init proc_sharepool_init(void) +{ + if (!proc_mkdir("sharepool", NULL)) + return; +} + +void *mg_sp_alloc_nodemask(unsigned long size, unsigned long sp_flags, int spg_id, + nodemask_t nodemask) +{ + return ERR_PTR(-EOPNOTSUPP); +} +EXPORT_SYMBOL_GPL(mg_sp_alloc_nodemask); + +/** + * mg_sp_alloc() - Allocate shared memory for all the processes in a sp_group. + * @size: the size of memory to allocate. + * @sp_flags: how to allocate the memory. + * @spg_id: the share group that the memory is allocated to. + * + * Use pass through allocation if spg_id == SPG_ID_DEFAULT in multi-group mode. + * + * Return: + * * if succeed, return the starting address of the shared memory. + * * if fail, return the pointer of -errno. + */ +void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) +{ + return ERR_PTR(-EOPNOTSUPP); +} +EXPORT_SYMBOL_GPL(mg_sp_alloc); + +/** + * mg_sp_make_share_k2u() - Share kernel memory to current process or an sp_group. + * @kva: the VA of shared kernel memory. + * @size: the size of shared kernel memory. + * @sp_flags: how to allocate the memory. We only support SP_DVPP. + * @tgid: the tgid of the specified process (Not currently in use). + * @spg_id: the share group that the memory is shared to. + * + * Return: the shared target user address to start at + * + * Share kernel memory to current task if spg_id == SPG_ID_NONE + * or SPG_ID_DEFAULT in multi-group mode. + * + * Return: + * * if succeed, return the shared user address to start at. + * * if fail, return the pointer of -errno. + */ +void *mg_sp_make_share_k2u(unsigned long kva, unsigned long size, + unsigned long sp_flags, int tgid, int spg_id) +{ + return ERR_PTR(-EOPNOTSUPP); +} +EXPORT_SYMBOL_GPL(mg_sp_make_share_k2u); + +/** + * mg_sp_make_share_u2k() - Share user memory of a specified process to kernel. + * @uva: the VA of shared user memory + * @size: the size of shared user memory + * @tgid: the tgid of the specified process(Not currently in use) + * + * Return: + * * if success, return the starting kernel address of the shared memory. + * * if failed, return the pointer of -errno. + */ +void *mg_sp_make_share_u2k(unsigned long uva, unsigned long size, int tgid) +{ + return ERR_PTR(-EOPNOTSUPP); +} +EXPORT_SYMBOL_GPL(mg_sp_make_share_u2k); + +/** + * mg_sp_unshare() - Unshare the kernel or user memory which shared by calling + * sp_make_share_{k2u,u2k}(). + * @va: the specified virtual address of memory + * @size: the size of unshared memory + * + * Use spg_id of current thread if spg_id == SPG_ID_DEFAULT. + * + * Return: 0 for success, -errno on failure. + */ +int mg_sp_unshare(unsigned long va, unsigned long size, int spg_id) +{ + return -EOPNOTSUPP; +} +EXPORT_SYMBOL_GPL(mg_sp_unshare); + +/** + * mg_sp_walk_page_range() - Walk page table with caller specific callbacks. + * @uva: the start VA of user memory. + * @size: the size of user memory. + * @tsk: task struct of the target task. + * @sp_walk_data: a structure of a page pointer array. + * + * Return: 0 for success, -errno on failure. + * + * When return 0, sp_walk_data describing [uva, uva+size) can be used. + * When return -errno, information in sp_walk_data is useless. + */ +int mg_sp_walk_page_range(unsigned long uva, unsigned long size, + struct task_struct *tsk, struct sp_walk_data *sp_walk_data) +{ + return -EOPNOTSUPP; +} +EXPORT_SYMBOL_GPL(mg_sp_walk_page_range); + +/** + * mg_sp_walk_page_free() - Free the sp_walk_data structure. + * @sp_walk_data: a structure of a page pointer array to be freed. + */ +void mg_sp_walk_page_free(struct sp_walk_data *sp_walk_data) +{ +} +EXPORT_SYMBOL_GPL(mg_sp_walk_page_free); + +/** + * mg_sp_config_dvpp_range() - User can config the share pool start address + * of each Da-vinci device. + * @start: the value of share pool start + * @size: the value of share pool + * @device_id: the num of Da-vinci device + * @tgid: the tgid of device process + * + * Return true for success. + * Return false if parameter invalid or has been set up. + * This functuon has no concurrent problem. + */ +bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, int tgid) +{ + return false; +} +EXPORT_SYMBOL_GPL(mg_sp_config_dvpp_range); + +static bool is_sp_reserve_addr(unsigned long addr) +{ + return addr >= MMAP_SHARE_POOL_START && addr < MMAP_SHARE_POOL_END; +} + +/* + * | 16G host | 16G device | ... | | + * ^ + * | + * MMAP_SHARE_POOL_DVPP_BASE + 16G * 64 + * We only check the device regions. + */ +static bool is_sp_dynamic_dvpp_addr(unsigned long addr) +{ + if (addr < MMAP_SHARE_POOL_DYNAMIC_DVPP_BASE || addr >= MMAP_SHARE_POOL_DYNAMIC_DVPP_END) + return false; + + return (addr - MMAP_SHARE_POOL_DYNAMIC_DVPP_BASE) & MMAP_SHARE_POOL_16G_SIZE; +} + +/** + * mg_is_sharepool_addr() - Check if a user memory address belongs to share pool. + * @addr: the userspace address to be checked. + * + * Return true if addr belongs to share pool, or false vice versa. + */ +bool mg_is_sharepool_addr(unsigned long addr) +{ + return sp_is_enabled() && + ((is_sp_reserve_addr(addr) || is_sp_dynamic_dvpp_addr(addr))); +} +EXPORT_SYMBOL_GPL(mg_is_sharepool_addr); + +DEFINE_STATIC_KEY_FALSE(share_pool_enabled_key); + +static int __init enable_share_pool(char *s) +{ + static_branch_enable(&share_pool_enabled_key); + pr_info("Ascend enable share pool features via bootargs\n"); + + return 1; +} +__setup("enable_ascend_share_pool", enable_share_pool); + +static int __init share_pool_init(void) +{ + if (!sp_is_enabled()) + return 0; + + proc_sharepool_init(); + + return 0; +} +late_initcall(share_pool_init);