[PATCH openEuler-1.0-LTS 051/103] mm, share_pool: Print share pool info of a process when oom

30 Oct 2021

From: Tang Yizhou <tangyizhou@huawei.com>

ascend inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI
CVE: NA

-------------------------------------------------

When oom happens, it's better to print share pool info of a process to
check if it allocates too many share pool memory or not.

We also print print share pool system level stats.

Suggested-by: Cui Bixuan <cuibixuan@huawei.com>
Signed-off-by: Tang Yizhou <tangyizhou@huawei.com>
Reviewed-by: Ding Tianhong <dingtianhong@huawei.com>
Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Reviewed-by: Weilong Chen <chenweilong@huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
---
 include/linux/share_pool.h |  41 ++++++++++++++
 mm/oom_kill.c              |  42 +++++++++++---
 mm/share_pool.c            | 109 +++++++++++++++++++++++++------------
 3 files changed, 149 insertions(+), 43 deletions(-)

diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h
index f2d17cb85fa52..fb7237351a995 100644
--- a/include/linux/share_pool.h
+++ b/include/linux/share_pool.h
@@ -105,6 +105,18 @@ struct sp_walk_data {
 	bool is_hugepage;
 };
 
+/* per process memory usage statistics indexed by tgid */
+struct sp_proc_stat {
+	struct mm_struct *mm;
+	char comm[TASK_COMM_LEN];
+	/*
+	 * alloc amount minus free amount, may be negative when freed by
+	 * another task in the same sp group.
+	 */
+	long alloc_size;
+	long k2u_size;
+};
+
 #ifdef CONFIG_ASCEND_SHARE_POOL
 
 #define MAP_SHARE_POOL			0x100000
@@ -155,6 +167,9 @@ extern int sp_register_notifier(struct notifier_block *nb);
 extern int sp_unregister_notifier(struct notifier_block *nb);
 extern bool sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid);
 extern bool is_sharepool_addr(unsigned long addr);
+extern struct sp_proc_stat *sp_get_proc_stat(int tgid);
+extern void spa_overview_show(struct seq_file *seq);
+extern void spg_overview_show(struct seq_file *seq);
 extern void proc_sharepool_init(void);
 
 static inline struct task_struct *sp_get_task(struct mm_struct *mm)
@@ -230,6 +245,11 @@ static inline void sp_dump_stack(void)
 		dump_stack();
 }
 
+static inline bool ascend_sp_oom_show(void)
+{
+	return enable_ascend_share_pool ? true : false;
+}
+
 vm_fault_t sharepool_no_page(struct mm_struct *mm,
 			struct vm_area_struct *vma,
 			struct address_space *mapping, pgoff_t idx,
@@ -310,6 +330,7 @@ static inline int sp_walk_page_range(unsigned long uva, unsigned long size,
 static inline void sp_walk_page_free(struct sp_walk_data *sp_walk_data)
 {
 }
+
 static inline int sp_register_notifier(struct notifier_block *nb)
 {
 	return -EPERM;
@@ -319,6 +340,7 @@ static inline int sp_unregister_notifier(struct notifier_block *nb)
 {
 	return -EPERM;
 }
+
 static inline bool sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid)
 {
 	return false;
@@ -329,6 +351,19 @@ static inline bool is_sharepool_addr(unsigned long addr)
 	return false;
 }
 
+static inline struct sp_proc_stat *sp_get_proc_stat(int tgid)
+{
+	return NULL;
+}
+
+static inline void spa_overview_show(struct seq_file *seq)
+{
+}
+
+static inline void spg_overview_show(struct seq_file *seq)
+{
+}
+
 static inline void proc_sharepool_init(void)
 {
 }
@@ -337,6 +372,7 @@ static inline struct task_struct  *sp_get_task(struct mm_struct *mm)
 {
 	return current;
 }
+
 static inline bool sp_check_hugepage(struct page *p)
 {
 	return false;
@@ -385,6 +421,11 @@ static inline void sp_dump_stack(void)
 {
 }
 
+static inline bool ascend_sp_oom_show(void)
+{
+	return false;
+}
+
 static inline void *vmalloc_hugepage(unsigned long size)
 {
 	return NULL;
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index d422223d2d6bf..0c77331492384 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -41,6 +41,7 @@
 #include <linux/kthread.h>
 #include <linux/init.h>
 #include <linux/mmu_notifier.h>
+#include <linux/share_pool.h>
 
 #include <asm/tlb.h>
 #include "internal.h"
@@ -454,9 +455,16 @@ static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask)
 {
 	struct task_struct *p;
 	struct task_struct *task;
+	struct sp_proc_stat *stat;
+
+	if (ascend_sp_oom_show()) {
+		pr_info("Tasks state (memory values in pages, share pool memory values in KB):\n");
+		pr_info("[  pid  ]   uid  tgid total_vm      rss sp_alloc  sp_k2u    pgtables_bytes swapents oom_score_adj name\n");
+	} else {
+		pr_info("Tasks state (memory values in pages):\n");
+		pr_info("[  pid  ]   uid  tgid total_vm      rss pgtables_bytes swapents oom_score_adj name\n");
+	}
 
-	pr_info("Tasks state (memory values in pages):\n");
-	pr_info("[  pid  ]   uid  tgid total_vm      rss pgtables_bytes swapents oom_score_adj name\n");
 	rcu_read_lock();
 	for_each_process(p) {
 		if (oom_unkillable_task(p, memcg, nodemask))
@@ -472,12 +480,28 @@ static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask)
 			continue;
 		}
 
-		pr_info("[%7d] %5d %5d %8lu %8lu %8ld %8lu         %5hd %s\n",
-			task->pid, from_kuid(&init_user_ns, task_uid(task)),
-			task->tgid, task->mm->total_vm, get_mm_rss(task->mm),
-			mm_pgtables_bytes(task->mm),
-			get_mm_counter(task->mm, MM_SWAPENTS),
-			task->signal->oom_score_adj, task->comm);
+		if (ascend_sp_oom_show()) {
+			stat = sp_get_proc_stat(task->tgid);
+
+			pr_cont("[%7d] %5d %5d %8lu %8lu ",
+				task->pid, from_kuid(&init_user_ns, task_uid(task)),
+				task->tgid, task->mm->total_vm, get_mm_rss(task->mm));
+			if (!stat)
+				pr_cont("%-9c %-9c ", '-', '-');
+			else
+				pr_cont("%-9ld %-9ld ", (stat->alloc_size) >> 10, (stat->k2u_size) >> 10); /* byte to KB */
+			pr_cont("%8ld %8lu         %5hd %s\n",
+				mm_pgtables_bytes(task->mm),
+				get_mm_counter(task->mm, MM_SWAPENTS),
+				task->signal->oom_score_adj, task->comm);
+		} else {
+			pr_info("[%7d] %5d %5d %8lu %8lu %8ld %8lu         %5hd %s\n",
+				task->pid, from_kuid(&init_user_ns, task_uid(task)),
+				task->tgid, task->mm->total_vm, get_mm_rss(task->mm),
+				mm_pgtables_bytes(task->mm),
+				get_mm_counter(task->mm, MM_SWAPENTS),
+				task->signal->oom_score_adj, task->comm);
+		}
 		task_unlock(task);
 	}
 	rcu_read_unlock();
@@ -1141,6 +1165,8 @@ int hisi_oom_notifier_call(unsigned long val, void *v)
 		pr_err("OOM_NOTIFIER: oom type %lu\n", val);
 		dump_stack();
 		show_mem(SHOW_MEM_FILTER_NODES, NULL);
+		spg_overview_show(NULL);
+		spa_overview_show(NULL);
 		dump_tasks(NULL, 0);
 		last_jiffies = jiffies;
 	}
diff --git a/mm/share_pool.c b/mm/share_pool.c
index d9f70526bae17..94e2be2ce96bc 100644
--- a/mm/share_pool.c
+++ b/mm/share_pool.c
@@ -83,18 +83,6 @@ static DEFINE_IDA(sp_group_id_ida);
 /* idr of all sp_proc_stats */
 static DEFINE_IDR(sp_stat_idr);
 
-/* per process memory usage statistics indexed by tgid */
-struct sp_proc_stat {
-	struct mm_struct *mm;
-	char comm[TASK_COMM_LEN];
-	/*
-	 * alloc amount minus free amount, may be negative when freed by
-	 * another task in the same sp group.
-	 */
-	long alloc_size;
-	long k2u_size;
-};
-
 /* for kthread buff_module_guard_work */
 static struct sp_proc_stat kthread_stat = {0};
 
@@ -2475,6 +2463,18 @@ __setup("enable_sp_share_k2u_spg", enable_share_k2u_to_group);
 
 /*** Statistical and maintenance functions ***/
 
+struct sp_proc_stat *sp_get_proc_stat(int tgid)
+{
+	struct sp_proc_stat *stat;
+
+	mutex_lock(&sp_mutex);
+	stat = idr_find(&sp_stat_idr, tgid);
+	mutex_unlock(&sp_mutex);
+
+	/* maybe NULL or not, we always return it */
+	return stat;
+}
+
 int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns,
 			struct pid *pid, struct task_struct *task)
 {
@@ -2484,12 +2484,12 @@ int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns,
 	mutex_lock(&sp_mutex);
 	spg = __sp_find_spg(task->pid, SPG_ID_DEFAULT);
 	if (spg_valid(spg)) {
-		/* print the file header */
 		stat = idr_find(&sp_stat_idr, task->mm->sp_stat_id);
 		if (!stat) {
 			mutex_unlock(&sp_mutex);
 			return 0;
 		}
+		/* print the file header */
 		seq_printf(m, "%-8s %-9s %-13s\n",
 			   "Group_ID", "SP_ALLOC", "HugePage Fail");
 		seq_printf(m, "%-8d %-9ld %-13d\n",
@@ -2553,12 +2553,15 @@ static void rb_spa_stat_show(struct seq_file *seq)
 	spin_unlock(&sp_area_lock);
 }
 
-static void spa_overview_show(struct seq_file *seq)
+void spa_overview_show(struct seq_file *seq)
 {
 	unsigned int total_num, alloc_num, k2u_task_num, k2u_spg_num;
 	unsigned long total_size, alloc_size, k2u_task_size, k2u_spg_size;
 	unsigned long dvpp_size, dvpp_va_size;
 
+	if (!enable_ascend_share_pool)
+		return;
+
 	spin_lock(&sp_area_lock);
 	total_num     = spa_stat.total_num;
 	alloc_num     = spa_stat.alloc_num;
@@ -2572,16 +2575,29 @@ static void spa_overview_show(struct seq_file *seq)
 	dvpp_va_size  = spa_stat.dvpp_va_size;
 	spin_unlock(&sp_area_lock);
 
-	seq_printf(seq, "Spa total num %u.\n", total_num);
-	seq_printf(seq, "Spa alloc num %u, k2u(task) num %u, k2u(spg) num %u.\n",
-		   alloc_num, k2u_task_num, k2u_spg_num);
-	seq_printf(seq, "Spa total size:     %13lu KB\n", byte2kb(total_size));
-	seq_printf(seq, "Spa alloc size:     %13lu KB\n", byte2kb(alloc_size));
-	seq_printf(seq, "Spa k2u(task) size: %13lu KB\n", byte2kb(k2u_task_size));
-	seq_printf(seq, "Spa k2u(spg) size:  %13lu KB\n", byte2kb(k2u_spg_size));
-	seq_printf(seq, "Spa dvpp size:      %13lu KB\n", byte2kb(dvpp_size));
-	seq_printf(seq, "Spa dvpp va size:   %13lu MB\n", byte2mb(dvpp_va_size));
-	seq_printf(seq, "\n");
+	if (seq != NULL) {
+		seq_printf(seq, "Spa total num %u.\n", total_num);
+		seq_printf(seq, "Spa alloc num %u, k2u(task) num %u, k2u(spg) num %u.\n",
+			   alloc_num, k2u_task_num, k2u_spg_num);
+		seq_printf(seq, "Spa total size:     %13lu KB\n", byte2kb(total_size));
+		seq_printf(seq, "Spa alloc size:     %13lu KB\n", byte2kb(alloc_size));
+		seq_printf(seq, "Spa k2u(task) size: %13lu KB\n", byte2kb(k2u_task_size));
+		seq_printf(seq, "Spa k2u(spg) size:  %13lu KB\n", byte2kb(k2u_spg_size));
+		seq_printf(seq, "Spa dvpp size:      %13lu KB\n", byte2kb(dvpp_size));
+		seq_printf(seq, "Spa dvpp va size:   %13lu MB\n", byte2mb(dvpp_va_size));
+		seq_puts(seq, "\n");
+	} else {
+		pr_info("Spa total num %u.\n", total_num);
+		pr_info("Spa alloc num %u, k2u(task) num %u, k2u(spg) num %u.\n",
+			alloc_num, k2u_task_num, k2u_spg_num);
+		pr_info("Spa total size:     %13lu KB\n", byte2kb(total_size));
+		pr_info("Spa alloc size:     %13lu KB\n", byte2kb(alloc_size));
+		pr_info("Spa k2u(task) size: %13lu KB\n", byte2kb(k2u_task_size));
+		pr_info("Spa k2u(spg) size:  %13lu KB\n", byte2kb(k2u_spg_size));
+		pr_info("Spa dvpp size:      %13lu KB\n", byte2kb(dvpp_size));
+		pr_info("Spa dvpp va size:   %13lu MB\n", byte2mb(dvpp_va_size));
+		pr_info("\n");
+	}
 }
 
 /* the caller must hold sp_mutex */
@@ -2590,25 +2606,48 @@ static int idr_spg_stat_cb(int id, void *p, void *data)
 	struct sp_group *spg = p;
 	struct seq_file *seq = data;
 
-	seq_printf(seq, "Group %6d size: %ld KB, spa num: %d, total alloc: %ld KB, "
-		   "normal alloc: %ld KB, huge alloc: %ld KB\n",
-		   id, byte2kb(atomic64_read(&spg->size)), atomic_read(&spg->spa_num),
-		   byte2kb(atomic64_read(&spg->alloc_size)),
-		   byte2kb(atomic64_read(&spg->alloc_nsize)),
-		   byte2kb(atomic64_read(&spg->alloc_hsize)));
+	if (seq != NULL) {
+		seq_printf(seq, "Group %6d size: %ld KB, spa num: %d, total alloc: %ld KB, "
+			   "normal alloc: %ld KB, huge alloc: %ld KB\n",
+			   id, byte2kb(atomic64_read(&spg->size)), atomic_read(&spg->spa_num),
+			   byte2kb(atomic64_read(&spg->alloc_size)),
+			   byte2kb(atomic64_read(&spg->alloc_nsize)),
+			   byte2kb(atomic64_read(&spg->alloc_hsize)));
+	} else {
+		pr_info("Group %6d size: %ld KB, spa num: %d, total alloc: %ld KB, "
+			"normal alloc: %ld KB, huge alloc: %ld KB\n",
+			id, byte2kb(atomic64_read(&spg->size)), atomic_read(&spg->spa_num),
+			byte2kb(atomic64_read(&spg->alloc_size)),
+			byte2kb(atomic64_read(&spg->alloc_nsize)),
+			byte2kb(atomic64_read(&spg->alloc_hsize)));
+	}
 
 	return 0;
 }
 
-static void spg_overview_show(struct seq_file *seq)
+void spg_overview_show(struct seq_file *seq)
 {
-	seq_printf(seq, "Share pool total size: %ld KB, spa total num: %d.\n",
-		   byte2kb(atomic64_read(&spg_stat.spa_total_size)),
-		   atomic_read(&spg_stat.spa_total_num));
+	if (!enable_ascend_share_pool)
+		return;
+
+	if (seq != NULL) {
+		seq_printf(seq, "Share pool total size: %ld KB, spa total num: %d.\n",
+			   byte2kb(atomic64_read(&spg_stat.spa_total_size)),
+			   atomic_read(&spg_stat.spa_total_num));
+	} else {
+		pr_info("Share pool total size: %ld KB, spa total num: %d.\n",
+			byte2kb(atomic64_read(&spg_stat.spa_total_size)),
+			atomic_read(&spg_stat.spa_total_num));
+	}
+
 	mutex_lock(&sp_mutex);
 	idr_for_each(&sp_group_idr, idr_spg_stat_cb, seq);
 	mutex_unlock(&sp_mutex);
-	seq_printf(seq, "\n");
+
+	if (seq != NULL)
+		seq_puts(seq, "\n");
+	else
+		pr_info("\n");
 }
 
 static int spa_stat_show(struct seq_file *seq, void *offset)
-- 
2.25.1