[PATCH openEuler-5.10 65/87] share_pool: Add base framework for share_pool

30 Dec 2021

From: Wang Wensheng <wangwensheng4@huawei.com>

ascend inclusion
category: Feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I4NDAW
CVE: NA

-------------------

The share pool features is a big feature, it is mainly used to share
user virtual memory for different processes in the same group.
It could be used by this steps:
1. Process A create a new group which is owned by process A.
2. Process A add process B to the group.
3. Process A add process C to the same group.
4. Process B alloc a new memory VA, and write something in it.
5. The VA was send to the process C by IPC, then process C got it.
6. The process C access the VA and got the data directly.
7. The process A could add more processes in the group to share the
memory.
8. Fix the memory by use the free function or exit the group.

The new features is enabled both by CONFIG_ASCEND_SHARE_POOL and the
enable_ascend_share_pool bootarg, it would not affect anything if disabled.

Signed-off-by: Wang Wensheng <wangwensheng4@huawei.com>
Signed-off-by: Tang Yizhou <tangyizhou@huawei.com>
Reviewed-by: Kefeng Wang<wangkefeng.wang@huawei.com>
Reviewed-by: Weilong Chen <chenweilong@huawei.com>
Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com>
---
 arch/arm64/Kconfig         |   9 ++
 include/linux/mm_types.h   |   4 +
 include/linux/share_pool.h | 226 +++++++++++++++++++++++++++++++++++++
 kernel/fork.c              |   4 +
 mm/Makefile                |   1 +
 mm/share_pool.c            |  45 ++++++++
 6 files changed, 289 insertions(+)
 create mode 100644 include/linux/share_pool.h
 create mode 100644 mm/share_pool.c

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 85610dd3ac0c..405e5ce460ce 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -2034,6 +2034,15 @@ config ASCEND_CHARGE_MIGRATE_HUGEPAGES
 	  This option enable the feature to charge migrate hugepages to memory
 	  cgroup.
 
+config ASCEND_SHARE_POOL
+	bool "Enable support for the Share Pool Memory"
+	default n
+	depends on HAVE_ARCH_HUGE_VMALLOC
+	select ARCH_USES_HIGH_VMA_FLAGS
+	help
+	  This feature allows multiple processes to share virtual memory both
+	  in kernel and user level, which is only enabled for ascend platform.
+
 endif
 
 endmenu
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 2729eb58aca4..30b36a3adb87 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -585,6 +585,10 @@ struct mm_struct {
 	struct kvm *kvm;
 #endif
 
+#if IS_ENABLED(CONFIG_ASCEND_SHARE_POOL)
+	struct sp_group_master *sp_group_master;
+#endif
+
 	/*
 	 * The mm_cpumask needs to be at the end of mm_struct, because it
 	 * is dynamically sized based on nr_cpu_ids.
diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h
new file mode 100644
index 000000000000..84d9c539f12b
--- /dev/null
+++ b/include/linux/share_pool.h
@@ -0,0 +1,226 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef LINUX_SHARE_POOL_H
+#define LINUX_SHARE_POOL_H
+
+#include <linux/mman.h>
+#include <linux/mm_types.h>
+#include <linux/notifier.h>
+#include <linux/vmalloc.h>
+#include <linux/printk.h>
+#include <linux/hashtable.h>
+#include <linux/numa.h>
+#include <linux/jump_label.h>
+
+#define SP_HUGEPAGE		(1 << 0)
+#define SP_HUGEPAGE_ONLY	(1 << 1)
+#define SP_DVPP			(1 << 2)
+#define SP_SPEC_NODE_ID		(1 << 3)
+
+#define DEVICE_ID_BITS		4UL
+#define DEVICE_ID_MASK		((1UL << DEVICE_ID_BITS) - 1UL)
+#define DEVICE_ID_SHIFT		32UL
+#define NODE_ID_BITS		NODES_SHIFT
+#define NODE_ID_MASK		((1UL << NODE_ID_BITS) - 1UL)
+#define NODE_ID_SHIFT		(DEVICE_ID_SHIFT + DEVICE_ID_BITS)
+
+#define SP_FLAG_MASK		(SP_HUGEPAGE | SP_HUGEPAGE_ONLY | SP_DVPP | \
+				 SP_SPEC_NODE_ID | \
+				(DEVICE_ID_MASK << DEVICE_ID_SHIFT) | \
+				(NODE_ID_MASK << NODE_ID_SHIFT))
+
+#define sp_flags_device_id(flags) (((flags) >> DEVICE_ID_SHIFT) & DEVICE_ID_MASK)
+#define sp_flags_node_id(flags) (((flags) >> NODE_ID_SHIFT) & NODE_ID_MASK)
+
+#define SPG_ID_NONE	(-1)	/* not associated with sp_group, only for specified thread */
+#define SPG_ID_DEFAULT	0	/* use the spg id of current thread */
+#define SPG_ID_MIN	1	/* valid id should be >= 1 */
+#define SPG_ID_MAX	99999
+#define SPG_ID_AUTO_MIN 100000
+#define SPG_ID_AUTO_MAX 199999
+#define SPG_ID_AUTO     200000  /* generate group id automatically */
+
+#define MAX_DEVID 8	/* the max num of Da-vinci devices */
+
+extern int sysctl_share_pool_hugepage_enable;
+
+extern int sysctl_ac_mode;
+
+extern int sysctl_sp_debug_mode;
+
+extern struct static_key_false share_pool_enabled_key;
+
+extern int sysctl_share_pool_map_lock_enable;
+
+extern int sysctl_sp_compact_enable;
+extern unsigned long sysctl_sp_compact_interval;
+extern unsigned long sysctl_sp_compact_interval_max;
+extern int sysctl_sp_perf_alloc;
+
+extern int sysctl_sp_perf_k2u;
+
+/* we estimate an sp-group ususally contains at most 64 sp-group */
+#define SP_SPG_HASH_BITS 6
+
+struct sp_spg_stat {
+	int spg_id;
+	/* record the number of hugepage allocation failures */
+	atomic_t hugepage_failures;
+	/* number of sp_area */
+	atomic_t	 spa_num;
+	/* total size of all sp_area from sp_alloc and k2u */
+	atomic64_t	 size;
+	/* total size of all sp_area from sp_alloc 0-order page */
+	atomic64_t	 alloc_nsize;
+	/* total size of all sp_area from sp_alloc hugepage */
+	atomic64_t	 alloc_hsize;
+	/* total size of all sp_area from ap_alloc */
+	atomic64_t	 alloc_size;
+	/* total size of all sp_area from sp_k2u */
+	atomic64_t	 k2u_size;
+	struct mutex	 lock;  /* protect hashtable */
+	DECLARE_HASHTABLE(hash, SP_SPG_HASH_BITS);
+};
+
+/* we estimate a process ususally belongs to at most 16 sp-group */
+#define SP_PROC_HASH_BITS 4
+
+/* per process memory usage statistics indexed by tgid */
+struct sp_proc_stat {
+	atomic_t use_count;
+	int tgid;
+	struct mm_struct *mm;
+	struct mutex lock;  /* protect hashtable */
+	DECLARE_HASHTABLE(hash, SP_PROC_HASH_BITS);
+	char comm[TASK_COMM_LEN];
+	/*
+	 * alloc amount minus free amount, may be negative when freed by
+	 * another task in the same sp group.
+	 */
+	atomic64_t alloc_size;
+	atomic64_t k2u_size;
+};
+
+/* Processes in the same sp_group can share memory.
+ * Memory layout for share pool:
+ *
+ * |-------------------- 8T -------------------|---|------ 8T ------------|
+ * |		Device 0	   |  Device 1 |...|                      |
+ * |----------------------------------------------------------------------|
+ * |------------- 16G -------------|    16G    |   |                      |
+ * | DVPP GROUP0   | DVPP GROUP1   | ... | ... |...|  sp normal memory    |
+ * |     sp        |    sp         |     |     |   |                      |
+ * |----------------------------------------------------------------------|
+ *
+ * The host SVM feature reserves 8T virtual memory by mmap, and due to the
+ * restriction of DVPP, while SVM and share pool will both allocate memory
+ * for DVPP, the memory have to be in the same 32G range.
+ *
+ * Share pool reserves 16T memory, with 8T for normal uses and 8T for DVPP.
+ * Within this 8T DVPP memory, SVM will call sp_config_dvpp_range() to
+ * tell us which 16G memory range is reserved for share pool .
+ *
+ * In some scenarios where there is no host SVM feature, share pool uses
+ * the default 8G memory setting for DVPP.
+ */
+struct sp_group {
+	int		 id;
+	struct file	 *file;
+	struct file	 *file_hugetlb;
+	/* number of process in this group */
+	int		 proc_num;
+	/* list head of processes (sp_group_node, each represents a process) */
+	struct list_head procs;
+	/* list head of sp_area. it is protected by spin_lock sp_area_lock */
+	struct list_head spa_list;
+	/* group statistics */
+	struct sp_spg_stat *stat;
+	/* we define the creator process of a sp_group as owner */
+	struct task_struct *owner;
+	/* is_alive == false means it's being destroyed */
+	bool		 is_alive;
+	atomic_t	 use_count;
+	/* protect the group internal elements, except spa_list */
+	struct rw_semaphore	rw_lock;
+};
+
+/* a per-process(per mm) struct which manages a sp_group_node list */
+struct sp_group_master {
+	/*
+	 * number of sp groups the process belongs to,
+	 * a.k.a the number of sp_node in node_list
+	 */
+	unsigned int count;
+	/* list head of sp_node */
+	struct list_head node_list;
+	struct mm_struct *mm;
+	struct sp_proc_stat *stat;
+};
+
+/*
+ * each instance represents an sp group the process belongs to
+ * sp_group_master    : sp_group_node   = 1 : N
+ * sp_group_node->spg : sp_group        = 1 : 1
+ * sp_group_node      : sp_group->procs = N : 1
+ */
+struct sp_group_node {
+	/* list node in sp_group->procs */
+	struct list_head proc_node;
+	/* list node in sp_group_maseter->node_list */
+	struct list_head group_node;
+	struct sp_group_master *master;
+	struct sp_group *spg;
+	unsigned long prot;
+};
+
+struct sp_walk_data {
+	struct page **pages;
+	unsigned int page_count;
+	unsigned long uva_aligned;
+	unsigned long page_size;
+	bool is_hugepage;
+	pmd_t *pmd;
+};
+
+#define MAP_SHARE_POOL			0x200000
+
+#define MMAP_TOP_4G_SIZE		0x100000000UL
+
+/* 8T size */
+#define MMAP_SHARE_POOL_NORMAL_SIZE	0x80000000000UL
+/* 8T size*/
+#define MMAP_SHARE_POOL_DVPP_SIZE	0x80000000000UL
+/* 16G size */
+#define MMAP_SHARE_POOL_16G_SIZE	0x400000000UL
+#define MMAP_SHARE_POOL_SIZE		(MMAP_SHARE_POOL_NORMAL_SIZE + MMAP_SHARE_POOL_DVPP_SIZE)
+/* align to 2M hugepage size, and MMAP_SHARE_POOL_TOP_16G_START should be align to 16G */
+#define MMAP_SHARE_POOL_END		((TASK_SIZE - MMAP_SHARE_POOL_DVPP_SIZE) & ~((1 << 21) - 1))
+#define MMAP_SHARE_POOL_START		(MMAP_SHARE_POOL_END - MMAP_SHARE_POOL_SIZE)
+#define MMAP_SHARE_POOL_16G_START	(MMAP_SHARE_POOL_END - MMAP_SHARE_POOL_DVPP_SIZE)
+
+#ifdef CONFIG_ASCEND_SHARE_POOL
+
+static inline void sp_init_mm(struct mm_struct *mm)
+{
+	mm->sp_group_master = NULL;
+}
+
+extern int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id);
+extern int sp_group_add_task(int pid, int spg_id);
+
+static inline bool sp_is_enabled(void)
+{
+	return static_branch_likely(&share_pool_enabled_key);
+}
+
+#else /* CONFIG_ASCEND_SHARE_POOL */
+
+static inline void sp_init_mm(struct mm_struct *mm) { }
+
+static inline bool sp_is_enabled(void)
+{
+	return false;
+}
+
+#endif /* !CONFIG_ASCEND_SHARE_POOL */
+
+#endif /* LINUX_SHARE_POOL_H */
diff --git a/kernel/fork.c b/kernel/fork.c
index d730b57c9c22..454b42af1de8 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -96,6 +96,7 @@
 #include <linux/kasan.h>
 #include <linux/scs.h>
 #include <linux/io_uring.h>
+#include <linux/share_pool.h>
 
 #include <asm/pgalloc.h>
 #include <linux/uaccess.h>
@@ -1055,6 +1056,9 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
 		goto fail_nocontext;
 
 	mm->user_ns = get_user_ns(user_ns);
+
+	sp_init_mm(mm);
+
 	return mm;
 
 fail_nocontext:
diff --git a/mm/Makefile b/mm/Makefile
index c14522bd17ed..ec3d0ab14a6a 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -126,3 +126,4 @@ obj-$(CONFIG_PAGE_REPORTING) += page_reporting.o
 obj-$(CONFIG_HAVE_BOOTMEM_INFO_NODE) += bootmem_info.o
 obj-$(CONFIG_PIN_MEMORY) += pin_mem.o
 obj-$(CONFIG_SHRINK_PAGECACHE) += page_cache_limit.o
+obj-$(CONFIG_ASCEND_SHARE_POOL) += share_pool.o
diff --git a/mm/share_pool.c b/mm/share_pool.c
new file mode 100644
index 000000000000..32d473bada3a
--- /dev/null
+++ b/mm/share_pool.c
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Huawei Ascend Share Pool Memory
+ *
+ * Copyright (C) 2020 Huawei Limited
+ * Author: Tang Yizhou <tangyizhou@huawei.com>
+ *         Zefan Li <lizefan@huawei.com>
+ *         Wu Peng <wupeng58@huawei.com>
+ *         Ding Tianhong <dingtgianhong@huawei.com>
+ *         Zhou Guanghui <zhouguanghui1@huawei.com>
+ *         Li Ming <limingming.li@huawei.com>
+ *
+ * This code is based on the hisilicon ascend platform.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#define pr_fmt(fmt) "share pool: " fmt
+
+#include <linux/share_pool.h>
+
+int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id)
+{
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mg_sp_group_add_task);
+
+int sp_group_add_task(int pid, int spg_id)
+{
+	return 0;
+}
+EXPORT_SYMBOL_GPL(sp_group_add_task);
+
+DEFINE_STATIC_KEY_FALSE(share_pool_enabled_key);
+
+static int __init enable_share_pool(char *s)
+{
+	static_branch_enable(&share_pool_enabled_key);
+	pr_info("Ascend enable share pool features via bootargs\n");
+
+	return 1;
+}
+__setup("enable_ascend_share_pool", enable_share_pool);
-- 
2.20.1