[PATCH 01/10] arm64: mm: Restore mm_cpumask (revert commit 38d96287504a ("arm64: mm: kill mm_cpumask usage"))

19 Sep 2024

From: Takao Indoh <indou.takao@fujitsu.com>

hulk inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4BLL0
CVE: NA

---------------------------

mm_cpumask was deleted by the commit 38d96287504a ("arm64: mm: kill
mm_cpumask usage") because it was not used at that time. Now this is needed
to find appropriate CPUs for TLB flush, so this patch reverts this commit.

Signed-off-by: QI Fuli <qi.fuli@fujitsu.com>
Signed-off-by: Takao Indoh <indou.takao@fujitsu.com>
Signed-off-by: Cheng Jian <cj.chengjian@huawei.com>
Reviewed-by: Xie XiuQi <xiexiuqi@huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: Yu Liao <liaoyu15@huawei.com>
---
 arch/arm64/kernel/smp.c | 6 ++++++
 arch/arm64/mm/context.c | 2 ++
 2 files changed, 8 insertions(+)

diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 13dc8228700f..6f7b760f65cd 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -411,6 +411,7 @@ asmlinkage notrace void secondary_start_kernel(void)
 	 */
 	mmgrab(mm);
 	current->active_mm = mm;
+	cpumask_set_cpu(cpu, mm_cpumask(mm));
 
 	/*
 	 * TTBR0 is only used for the identity mapping at this stage. Make it
@@ -525,6 +526,11 @@ int __cpu_disable(void)
 	 */
 	irq_migrate_all_off_this_cpu();
 
+	/*
+	 * Remove this CPU from the vm mask set of all processes.
+	 */
+	clear_tasks_mm_cpumask(cpu);
+
 	return 0;
 }
 
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 45c92d0f71d3..279d092555b9 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -224,6 +224,7 @@ set_asid:
 	__set_bit(asid, asid_map);
 	cur_idx = asid;
 
+	cpumask_clear(mm_cpumask(mm));
 	return idx2asid(asid) | generation;
 }
 
@@ -280,6 +281,7 @@ switch_mm_fastpath:
 		arm64_apply_bp_hardening();
 #else
 	arm64_apply_bp_hardening();
+	cpumask_set_cpu(cpu, mm_cpumask(mm));
 #endif
 
 	/*
-- 
CodeHub


From f21647c3989435fcf04949b0c9db5a93f2a80716 Mon Sep 17 00:00:00 2001
From: Takao Indoh <indou.takao@fujitsu.com>
Date: Mon, 20 Dec 2021 16:00:56 +0800
Subject: [PATCH 02/10] arm64: tlb: Add boot parameter to disable TLB flush
 within the same inner shareable domain

hulk inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4BLL0
CVE: NA

---------------------------

This patch adds new boot parameter 'disable_tlbflush_is' to disable TLB
flush within the same inner shareable domain for performance tuning.

In the case of flush_tlb_mm() *without* this parameter, TLB entry is
invalidated by __tlbi(aside1is, asid). By this instruction, all CPUs within
the same inner shareable domain check if there are TLB entries which have
this ASID, this causes performance noise, especially at large-scale HPC
environment, which has more than thousand nodes with low latency
interconnect.

When this new parameter is specified, TLB entry is invalidated by
__tlbi(aside1, asid) only on the CPUs specified by mm_cpumask(mm).
Therefore TLB flush is done on minimal CPUs and performance problem does
not occur.

Signed-off-by: QI Fuli <qi.fuli@fujitsu.com>
Signed-off-by: Takao Indoh <indou.takao@fujitsu.com>
Signed-off-by: Cheng Jian <cj.chengjian@huawei.com>
Reviewed-by: Xie XiuQi <xiexiuqi@huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: Yu Liao <liaoyu15@huawei.com>
---
 .../admin-guide/kernel-parameters.txt         |   4 +
 arch/arm64/include/asm/mmu_context.h          |   7 +-
 arch/arm64/include/asm/tlbflush.h             | 117 +-------
 arch/arm64/kernel/Makefile                    |   2 +-
 arch/arm64/kernel/tlbflush.c                  | 269 ++++++++++++++++++
 5 files changed, 292 insertions(+), 107 deletions(-)
 create mode 100644 arch/arm64/kernel/tlbflush.c

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index f216c001fe90..69c7777bd9c9 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -941,6 +941,10 @@
 	disable=	[IPV6]
 			See Documentation/networking/ipv6.rst.
 
+	disable_tlbflush_is
+			[ARM64] Disable using TLB instruction to flush
+			all PE within the same inner shareable domain.
+
 	hardened_usercopy=
                         [KNL] Under CONFIG_HARDENED_USERCOPY, whether
                         hardening is enabled for this boot. Hardened
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index ac4ab31267db..1ff4641b4339 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -235,9 +235,14 @@ static inline void
 switch_mm(struct mm_struct *prev, struct mm_struct *next,
 	  struct task_struct *tsk)
 {
+	unsigned int cpu = smp_processor_id();
+
 	isovm_update_tcr_ttbr(prev, next, tsk);
-	if (prev != next)
+	if (prev != next) {
 		__switch_mm(next);
+		cpumask_clear_cpu(cpu, mm_cpumask(prev));
+		local_flush_tlb_mm(prev);
+	}
 
 	/*
 	 * Update the saved TTBR0_EL1 of the scheduled-in task as the previous
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index a89bb836ae05..70b43306493c 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -302,6 +302,13 @@ __isovm_flush_tlb_range(unsigned long asid, unsigned long start, bool last_lvl,
 			int tlb_lvl, int scale, int num) {}
 #endif	/* CONFIG_RTOS_ISOLATION_VM */
 
+void flush_tlb_mm(struct mm_struct *mm);
+void flush_tlb_page_nosync(struct vm_area_struct *vma,
+			   unsigned long uaddr);
+void __flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+			unsigned long end, unsigned long stride, bool last_level,
+			int tlb_level);
+
 static inline void local_flush_tlb_all(void)
 {
 	dsb(nshst);
@@ -318,28 +325,14 @@ static inline void flush_tlb_all(void)
 	isb();
 }
 
-static inline void flush_tlb_mm(struct mm_struct *mm)
+static inline void local_flush_tlb_mm(struct mm_struct *mm)
 {
 	unsigned long asid;
-
-	dsb(ishst);
+	dsb(nshst);
 	asid = __TLBI_VADDR(0, ASID(mm));
-	__tlbi(aside1is, asid);
-	__tlbi_user(aside1is, asid);
-	isovm_flush_tlb_mm(ASID(mm));
-	dsb(ish);
-}
-
-static inline void flush_tlb_page_nosync(struct vm_area_struct *vma,
-					 unsigned long uaddr)
-{
-	unsigned long addr;
-
-	dsb(ishst);
-	addr = __TLBI_VADDR(uaddr, ASID(vma->vm_mm));
-	__tlbi(vale1is, addr);
-	__tlbi_user(vale1is, addr);
-	isovm_flush_tlb_page_nosync(ASID(vma->vm_mm), uaddr);
+	__tlbi(aside1, asid);
+	__tlbi_user(aside1, asid);
+	dsb(nsh);
 }
 
 static inline void flush_tlb_page(struct vm_area_struct *vma,
@@ -355,92 +348,6 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
  */
 #define MAX_TLBI_OPS	PTRS_PER_PTE
 
-static inline void __flush_tlb_range(struct vm_area_struct *vma,
-				     unsigned long start, unsigned long end,
-				     unsigned long stride, bool last_level,
-				     int tlb_level)
-{
-	int num = 0;
-	int scale = 0;
-	unsigned long asid, addr, pages;
-
-	start = round_down(start, stride);
-	end = round_up(end, stride);
-	pages = (end - start) >> PAGE_SHIFT;
-
-	/*
-	 * When not uses TLB range ops, we can handle up to
-	 * (MAX_TLBI_OPS - 1) pages;
-	 * When uses TLB range ops, we can handle up to
-	 * (MAX_TLBI_RANGE_PAGES - 1) pages.
-	 */
-	if ((!system_supports_tlb_range() &&
-	     (end - start) >= (MAX_TLBI_OPS * stride)) ||
-	    pages >= MAX_TLBI_RANGE_PAGES) {
-		flush_tlb_mm(vma->vm_mm);
-		return;
-	}
-
-	dsb(ishst);
-	asid = ASID(vma->vm_mm);
-
-	/*
-	 * When the CPU does not support TLB range operations, flush the TLB
-	 * entries one by one at the granularity of 'stride'. If the the TLB
-	 * range ops are supported, then:
-	 *
-	 * 1. If 'pages' is odd, flush the first page through non-range
-	 *    operations;
-	 *
-	 * 2. For remaining pages: the minimum range granularity is decided
-	 *    by 'scale', so multiple range TLBI operations may be required.
-	 *    Start from scale = 0, flush the corresponding number of pages
-	 *    ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it
-	 *    until no pages left.
-	 *
-	 * Note that certain ranges can be represented by either num = 31 and
-	 * scale or num = 0 and scale + 1. The loop below favours the latter
-	 * since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
-	 */
-	while (pages > 0) {
-		if (!system_supports_tlb_range() ||
-		    pages % 2 == 1) {
-			addr = __TLBI_VADDR(start, asid);
-			if (last_level) {
-				__tlbi_level(vale1is, addr, tlb_level);
-				__tlbi_user_level(vale1is, addr, tlb_level);
-			} else {
-				__tlbi_level(vae1is, addr, tlb_level);
-				__tlbi_user_level(vae1is, addr, tlb_level);
-			}
-			__isovm_flush_tlb_stride(asid, start, last_level,
-						 tlb_level);
-			start += stride;
-			pages -= stride >> PAGE_SHIFT;
-			continue;
-		}
-
-		num = __TLBI_RANGE_NUM(pages, scale);
-		if (num >= 0) {
-			addr = __TLBI_VADDR_RANGE(start, asid, scale,
-						  num, tlb_level);
-			if (last_level) {
-				__tlbi(rvale1is, addr);
-				__tlbi_user(rvale1is, addr);
-			} else {
-				__tlbi(rvae1is, addr);
-				__tlbi_user(rvae1is, addr);
-			}
-			__isovm_flush_tlb_range(asid, start, last_level,
-						tlb_level, scale, num);
-			start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT;
-			pages -= __TLBI_RANGE_PAGES(num, scale);
-		}
-		scale++;
-	}
-	dsb(ish);
-}
-
 static inline void flush_tlb_range(struct vm_area_struct *vma,
 				   unsigned long start, unsigned long end)
 {
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index d5aa9c62c003..4e400d8ab0af 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -28,7 +28,7 @@ obj-y			:= debug-monitors.o entry.o irq.o fpsimd.o		\
 			   return_address.o cpuinfo.o cpu_errata.o		\
 			   cpufeature.o alternative.o cacheinfo.o		\
 			   smp.o smp_spin_table.o topology.o smccc-call.o	\
-			   syscall.o proton-pack.o ipi_nmi.o
+			   syscall.o proton-pack.o ipi_nmi.o tlbflush.o
 
 targets			+= efi-entry.o
 
diff --git a/arch/arm64/kernel/tlbflush.c b/arch/arm64/kernel/tlbflush.c
new file mode 100644
index 000000000000..82ed8b21ba75
--- /dev/null
+++ b/arch/arm64/kernel/tlbflush.c
@@ -0,0 +1,269 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2019 FUJITSU LIMITED
+
+#include <linux/smp.h>
+#include <asm/tlbflush.h>
+
+struct tlb_args {
+	struct vm_area_struct *ta_vma;
+	unsigned long ta_start;
+	unsigned long ta_end;
+	unsigned long ta_stride;
+	bool ta_last_level;
+	int ta_tlb_level;
+};
+
+int disable_tlbflush_is;
+
+static int __init disable_tlbflush_is_setup(char *str)
+{
+	disable_tlbflush_is = 1;
+
+	return 0;
+}
+__setup("disable_tlbflush_is", disable_tlbflush_is_setup);
+
+static inline void __flush_tlb_mm(struct mm_struct *mm)
+{
+	unsigned long asid;
+
+	dsb(ishst);
+	asid = __TLBI_VADDR(0, ASID(mm));
+	__tlbi(aside1is, asid);
+	__tlbi_user(aside1is, asid);
+	dsb(ish);
+}
+
+static inline void ipi_flush_tlb_mm(void *arg)
+{
+	struct mm_struct *mm = arg;
+
+	local_flush_tlb_mm(mm);
+}
+
+void flush_tlb_mm(struct mm_struct *mm)
+{
+	if (disable_tlbflush_is)
+		on_each_cpu_mask(mm_cpumask(mm), ipi_flush_tlb_mm,
+				 (void *)mm, true);
+	else
+		__flush_tlb_mm(mm);
+}
+
+static inline void __flush_tlb_page_nosync(unsigned long addr)
+{
+	dsb(ishst);
+	__tlbi(vale1is, addr);
+	__tlbi_user(vale1is, addr);
+}
+
+static inline void __local_flush_tlb_page_nosync(unsigned long addr)
+{
+	dsb(nshst);
+	__tlbi(vale1, addr);
+	__tlbi_user(vale1, addr);
+	dsb(nsh);
+}
+
+static inline void ipi_flush_tlb_page_nosync(void *arg)
+{
+	unsigned long addr = *(unsigned long *)arg;
+
+	__local_flush_tlb_page_nosync(addr);
+}
+
+void flush_tlb_page_nosync(struct vm_area_struct *vma, unsigned long uaddr)
+{
+	unsigned long addr = __TLBI_VADDR(uaddr, ASID(vma->vm_mm));
+
+	if (disable_tlbflush_is)
+		on_each_cpu_mask(mm_cpumask(vma->vm_mm),
+				ipi_flush_tlb_page_nosync, &addr, true);
+	else
+		__flush_tlb_page_nosync(addr);
+}
+
+
+static inline void ___flush_tlb_range(struct vm_area_struct *vma,
+				long start, unsigned long end,
+				unsigned long stride, bool last_level,
+				int tlb_level)
+{
+	int num = 0;
+	int scale = 0;
+	unsigned long asid, addr, pages;
+
+	pages = (end - start) >> PAGE_SHIFT;
+
+	dsb(ishst);
+	asid = ASID(vma->vm_mm);
+
+	/*
+	 * When the CPU does not support TLB range operations, flush the TLB
+	 * entries one by one at the granularity of 'stride'. If the the TLB
+	 * range ops are supported, then:
+	 *
+	 * 1. If 'pages' is odd, flush the first page through non-range
+	 *    operations;
+	 *
+	 * 2. For remaining pages: the minimum range granularity is decided
+	 *    by 'scale', so multiple range TLBI operations may be required.
+	 *    Start from scale = 0, flush the corresponding number of pages
+	 *    ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it
+	 *    until no pages left.
+	 *
+	 * Note that certain ranges can be represented by either num = 31 and
+	 * scale or num = 0 and scale + 1. The loop below favours the latter
+	 * since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
+	 */
+	while (pages > 0) {
+		if (!system_supports_tlb_range() ||
+		    pages % 2 == 1) {
+			addr = __TLBI_VADDR(start, asid);
+			if (last_level) {
+				__tlbi_level(vale1is, addr, tlb_level);
+				__tlbi_user_level(vale1is, addr, tlb_level);
+			} else {
+				__tlbi_level(vae1is, addr, tlb_level);
+				__tlbi_user_level(vae1is, addr, tlb_level);
+			}
+			start += stride;
+			pages -= stride >> PAGE_SHIFT;
+			continue;
+		}
+
+		num = __TLBI_RANGE_NUM(pages, scale);
+		if (num >= 0) {
+			addr = __TLBI_VADDR_RANGE(start, asid, scale,
+						  num, tlb_level);
+			if (last_level) {
+				__tlbi(rvale1is, addr);
+				__tlbi_user(rvale1is, addr);
+			} else {
+				__tlbi(rvae1is, addr);
+				__tlbi_user(rvae1is, addr);
+			}
+			start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT;
+			pages -= __TLBI_RANGE_PAGES(num, scale);
+		}
+		scale++;
+	}
+	dsb(ish);
+}
+
+static inline void __local_flush_tlb_range(struct vm_area_struct *vma,
+					unsigned long start, unsigned long end,
+					unsigned long stride, bool last_level,
+					int tlb_level)
+{
+	int num = 0;
+	int scale = 0;
+	unsigned long asid, addr, pages;
+
+	pages = (end - start) >> PAGE_SHIFT;
+
+	dsb(nshst);
+	asid = ASID(vma->vm_mm);
+
+	/*
+	 * When the CPU does not support TLB range operations, flush the TLB
+	 * entries one by one at the granularity of 'stride'. If the the TLB
+	 * range ops are supported, then:
+	 *
+	 * 1. If 'pages' is odd, flush the first page through non-range
+	 *    operations;
+	 *
+	 * 2. For remaining pages: the minimum range granularity is decided
+	 *    by 'scale', so multiple range TLBI operations may be required.
+	 *    Start from scale = 0, flush the corresponding number of pages
+	 *    ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it
+	 *    until no pages left.
+	 *
+	 * Note that certain ranges can be represented by either num = 31 and
+	 * scale or num = 0 and scale + 1. The loop below favours the latter
+	 * since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
+	 */
+	while (pages > 0) {
+		if (!system_supports_tlb_range() ||
+		    pages % 2 == 1) {
+			addr = __TLBI_VADDR(start, asid);
+			if (last_level) {
+				__tlbi_level(vale1, addr, tlb_level);
+				__tlbi_user_level(vale1, addr, tlb_level);
+			} else {
+				__tlbi_level(vae1, addr, tlb_level);
+				__tlbi_user_level(vae1, addr, tlb_level);
+			}
+			start += stride;
+			pages -= stride >> PAGE_SHIFT;
+			continue;
+		}
+
+		num = __TLBI_RANGE_NUM(pages, scale);
+		if (num >= 0) {
+			addr = __TLBI_VADDR_RANGE(start, asid, scale,
+						  num, tlb_level);
+			if (last_level) {
+				__tlbi(rvale1, addr);
+				__tlbi_user(rvale1, addr);
+			} else {
+				__tlbi(rvae1, addr);
+				__tlbi_user(rvae1, addr);
+			}
+			start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT;
+			pages -= __TLBI_RANGE_PAGES(num, scale);
+		}
+		scale++;
+	}
+	dsb(nsh);
+}
+
+static inline void ipi_flush_tlb_range(void *arg)
+{
+	struct tlb_args *ta = (struct tlb_args *)arg;
+
+	__local_flush_tlb_range(ta->ta_vma, ta->ta_start, ta->ta_end, ta->ta_stride,
+			ta->ta_last_level, ta->ta_tlb_level);
+
+}
+
+void __flush_tlb_range(struct vm_area_struct *vma,
+				     unsigned long start, unsigned long end,
+				     unsigned long stride, bool last_level,
+				     int tlb_level)
+{
+	unsigned long pages;
+
+	start = round_down(start, stride);
+	end = round_up(end, stride);
+	pages = (end - start) >> PAGE_SHIFT;
+
+	/*
+	 * When not uses TLB range ops, we can handle up to
+	 * (MAX_TLBI_OPS - 1) pages;
+	 * When uses TLB range ops, we can handle up to
+	 * (MAX_TLBI_RANGE_PAGES - 1) pages.
+	 */
+	if ((!system_supports_tlb_range() &&
+	     (end - start) >= (MAX_TLBI_OPS * stride)) ||
+	    pages >= MAX_TLBI_RANGE_PAGES) {
+		flush_tlb_mm(vma->vm_mm);
+		return;
+	}
+
+	if (disable_tlbflush_is) {
+		struct tlb_args ta = {
+			.ta_vma		= vma,
+			.ta_start	= start,
+			.ta_end		= end,
+			.ta_stride	= stride,
+			.ta_last_level	= last_level,
+			.ta_tlb_level	= tlb_level,
+		};
+
+		on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_range,
+					    &ta, true);
+	} else
+		___flush_tlb_range(vma, start, end, stride, last_level, tlb_level);
+}
+
-- 
CodeHub


From 4c393bf82f57255cfc5d76bfa9824f5216c7d020 Mon Sep 17 00:00:00 2001
From: Yu Liao <liaoyu15@huawei.com>
Date: Tue, 7 May 2024 10:28:42 +0800
Subject: [PATCH 03/10] Revert "arm64/mm: save memory access in
 check_and_switch_context() fast switch path"

This reverts commit c4885bbb3afee80f41d39a33e49881a18e500f47.
---
 arch/arm64/include/asm/mmu_context.h |  6 ++++--
 arch/arm64/mm/context.c              | 10 ++++------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index 1ff4641b4339..564700e617b4 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -174,7 +174,7 @@ static inline void cpu_replace_ttbr1(pgd_t *pgdp)
  * take CPU migration into account.
  */
 #define destroy_context(mm)		do { } while(0)
-void check_and_switch_context(struct mm_struct *mm);
+void check_and_switch_context(struct mm_struct *mm, unsigned int cpu);
 
 static inline int
 init_new_context(struct task_struct *tsk, struct mm_struct *mm)
@@ -219,6 +219,8 @@ enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
 
 static inline void __switch_mm(struct mm_struct *next)
 {
+	unsigned int cpu = smp_processor_id();
+
 	/*
 	 * init_mm.pgd does not contain any user mappings and it is always
 	 * active for kernel addresses in TTBR1. Just set the reserved TTBR0.
@@ -228,7 +230,7 @@ static inline void __switch_mm(struct mm_struct *next)
 		return;
 	}
 
-	check_and_switch_context(next);
+	check_and_switch_context(next, cpu);
 }
 
 static inline void
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 279d092555b9..a7c186347deb 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -228,10 +228,9 @@ set_asid:
 	return idx2asid(asid) | generation;
 }
 
-void check_and_switch_context(struct mm_struct *mm)
+void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
 {
 	unsigned long flags;
-	unsigned int cpu;
 	u64 asid, old_active_asid;
 
 	if (system_supports_cnp())
@@ -253,9 +252,9 @@ void check_and_switch_context(struct mm_struct *mm)
 	 *   relaxed xchg in flush_context will treat us as reserved
 	 *   because atomic RmWs are totally ordered for a given location.
 	 */
-	old_active_asid = atomic64_read(this_cpu_ptr(&active_asids));
+	old_active_asid = atomic64_read(&per_cpu(active_asids, cpu));
 	if (old_active_asid && asid_gen_match(asid) &&
-	    atomic64_cmpxchg_relaxed(this_cpu_ptr(&active_asids),
+	    atomic64_cmpxchg_relaxed(&per_cpu(active_asids, cpu),
 				     old_active_asid, asid))
 		goto switch_mm_fastpath;
 
@@ -267,11 +266,10 @@ void check_and_switch_context(struct mm_struct *mm)
 		atomic64_set(&mm->context.id, asid);
 	}
 
-	cpu = smp_processor_id();
 	if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending))
 		local_flush_tlb_all();
 
-	atomic64_set(this_cpu_ptr(&active_asids), asid);
+	atomic64_set(&per_cpu(active_asids, cpu), asid);
 	raw_spin_unlock_irqrestore(&cpu_asid_lock, flags);
 
 switch_mm_fastpath:
-- 
CodeHub


From 297a6ee0e58aea4cc1fa8f4e883b35f9bd5bc62d Mon Sep 17 00:00:00 2001
From: Zhang Jianhua <chris.zjh@huawei.com>
Date: Fri, 6 Sep 2024 10:14:06 +0800
Subject: [PATCH 04/10] add testcase of test_smp_call

hulk inclusion
category: feature
bugzilla: NA
DTS: NA
CVE: NA

--------

Signed-off-by: Zhang Jianhua <chris.zjh@huawei.com>
---
 drivers/Makefile             |  1 +
 drivers/xint/Makefile        |  2 ++
 drivers/xint/test_smp_call.c | 36 ++++++++++++++++++++++++++++++++++++
 3 files changed, 39 insertions(+)
 create mode 100644 drivers/xint/Makefile
 create mode 100644 drivers/xint/test_smp_call.c

diff --git a/drivers/Makefile b/drivers/Makefile
index 50205b73f3f9..ce6feccd117f 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -200,3 +200,4 @@ obj-$(CONFIG_RTOS)		+= hal/
 obj-$(CONFIG_KEXEC_KERNEL_HOTUPGRADE)	+= vpmem/
 
 obj-$(CONFIG_ARCH_BSP)	+= vendor/
+obj-y			+= xint/
diff --git a/drivers/xint/Makefile b/drivers/xint/Makefile
new file mode 100644
index 000000000000..3d90c9723d4d
--- /dev/null
+++ b/drivers/xint/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-m += test_smp_call.o
diff --git a/drivers/xint/test_smp_call.c b/drivers/xint/test_smp_call.c
new file mode 100644
index 000000000000..4272001d9aca
--- /dev/null
+++ b/drivers/xint/test_smp_call.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) Huawei Technologies Co., Ltd. 2020. All rights reserved.
+ * Author: Huawei OS Kernel Lab
+ * Create: Wed Mar 06 14:36:38 2024
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <asm/smp.h>
+
+static int tmp = 1;
+
+static void ipi_func(void *arg)
+{
+	pr_info("CPU %d receive ipi, arg = %d\n", smp_processor_id(), *(int *)arg);
+}
+
+//向所有cpu都发送smp_call_function_tlbi
+static int test_init(void)
+{
+//	smp_call_function_tlbi(cpu_online_mask, ipi_func, &tmp);
+	on_each_cpu(ipi_func, &tmp, true);
+	return 0;
+}
+
+static void test_exit(void)
+{
+	return;
+}
+
+module_init(test_init);
+module_exit(test_exit);
+
+MODULE_DESCRIPTION("test smp_call_function_tlbi");
+MODULE_LICENSE("GPL");
-- 
CodeHub


From fc17bfc0ba827d8f172aa891c2aa1e7142f11010 Mon Sep 17 00:00:00 2001
From: Zhang Jianhua <chris.zjh@huawei.com>
Date: Sat, 7 Sep 2024 16:59:46 +0800
Subject: [PATCH 05/10] add on_each_cpu_mask_tlbi()

hulk inclusion
category: feature
bugzilla: NA
DTS: NA
CVE: NA

--------

Signed-off-by: Zhang Jianhua <chris.zjh@huawei.com>
---
 arch/arm64/include/asm/smp.h |  2 ++
 arch/arm64/kernel/smp.c      | 11 +++++++++++
 include/linux/smp.h          | 12 ++++++++++++
 kernel/smp.c                 | 28 ++++++++++++++++++++++++++++
 4 files changed, 53 insertions(+)

diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
index 8c5d2d650b8a..ed16b7dd054f 100644
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -90,6 +90,8 @@ extern void secondary_entry(void);
 extern void arch_send_call_function_single_ipi(int cpu);
 extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
 
+extern void arch_send_call_function_ipi_mask_tlbi(const struct cpumask *mask);
+
 #ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
 extern void arch_send_wakeup_ipi_mask(const struct cpumask *mask);
 #else
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 6f7b760f65cd..ffbf4d0eea56 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -88,6 +88,7 @@ static int cpus_stuck_in_kernel;
 enum ipi_msg_type {
 	IPI_RESCHEDULE,
 	IPI_CALL_FUNC,
+	IPI_TLBI,
 	IPI_CPU_STOP,
 	IPI_CPU_CRASH_STOP,
 	IPI_TIMER,
@@ -1041,6 +1042,7 @@ static const char *ipi_types[NR_IPI] __tracepoint_string = {
 #define S(x,s)	[x] = s
 	S(IPI_RESCHEDULE, "Rescheduling interrupts"),
 	S(IPI_CALL_FUNC, "Function call interrupts"),
+	S(IPI_TLBI, "Function call interrupts for tlbi"),
 	S(IPI_CPU_STOP, "CPU stop interrupts"),
 	S(IPI_CPU_CRASH_STOP, "CPU stop (for crash dump) interrupts"),
 	S(IPI_TIMER, "Timer broadcast interrupts"),
@@ -1085,6 +1087,11 @@ void arch_send_call_function_single_ipi(int cpu)
 	smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC);
 }
 
+void arch_send_call_function_ipi_mask_tlbi(const struct cpumask *mask)
+{
+	smp_cross_call(mask, IPI_TLBI);
+}
+
 #ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
 void arch_send_wakeup_ipi_mask(const struct cpumask *mask)
 {
@@ -1188,6 +1195,10 @@ static void do_handle_IPI(int ipinr)
 		generic_smp_call_function_interrupt();
 		break;
 
+	case IPI_TLBI:
+		ipi_tlbi_func(ipi_tlbi_info);
+		break;
+
 	case IPI_CPU_STOP:
 		local_cpu_stop();
 		break;
diff --git a/include/linux/smp.h b/include/linux/smp.h
index 812c26f61300..67b4738e6d4b 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -17,6 +17,9 @@
 typedef void (*smp_call_func_t)(void *info);
 typedef bool (*smp_cond_func_t)(int cpu, void *info);
 
+extern void *ipi_tlbi_info;
+extern smp_call_func_t ipi_tlbi_func;
+
 /*
  * structure shares (partial) layout with struct irq_work
  */
@@ -53,6 +56,9 @@ int smp_call_function_single(int cpuid, smp_call_func_t func, void *info,
 void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func,
 			   void *info, bool wait, const struct cpumask *mask);
 
+void on_each_cpu_cond_mask_tlbi(smp_cond_func_t cond_func, smp_call_func_t func,
+			   void *info, bool wait, const struct cpumask *mask);
+
 int smp_call_function_single_async(int cpu, call_single_data_t *csd);
 
 /*
@@ -85,6 +91,12 @@ static inline void on_each_cpu_mask(const struct cpumask *mask,
 	on_each_cpu_cond_mask(NULL, func, info, wait, mask);
 }
 
+static inline void on_each_cpu_mask_tlbi(const struct cpumask *mask,
+				    smp_call_func_t func, void *info, bool wait)
+{
+	on_each_cpu_cond_mask_tlbi(NULL, func, info, wait, mask);
+}
+
 /*
  * Call a function on each processor for which the supplied function
  * cond_func returns a positive value. This may include the local
diff --git a/kernel/smp.c b/kernel/smp.c
index 27ded167f36d..cedb47df4579 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -32,6 +32,9 @@
 
 #define CSD_TYPE(_csd)	((_csd)->node.u_flags & CSD_FLAG_TYPE_MASK)
 
+void *ipi_tlbi_info;
+smp_call_func_t ipi_tlbi_func;
+
 struct call_function_data {
 	call_single_data_t	__percpu *csd;
 	cpumask_var_t		cpumask;
@@ -763,6 +766,17 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
 	}
 }
 
+static void smp_call_function_many_cond_tlbi(const struct cpumask *mask,
+					smp_call_func_t func, void *info,
+					unsigned int scf_flags,
+					smp_cond_func_t cond_func)
+{
+	ipi_tlbi_func = func;
+	ipi_tlbi_info = info;
+
+	arch_send_call_function_ipi_mask_tlbi(mask);
+}
+
 /**
  * smp_call_function_many(): Run a function on a set of CPUs.
  * @mask: The set of cpus to run on (only runs on online subset).
@@ -929,6 +943,20 @@ void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func,
 }
 EXPORT_SYMBOL(on_each_cpu_cond_mask);
 
+void on_each_cpu_cond_mask_tlbi(smp_cond_func_t cond_func, smp_call_func_t func,
+			   void *info, bool wait, const struct cpumask *mask)
+{
+	unsigned int scf_flags = SCF_RUN_LOCAL;
+
+	if (wait)
+		scf_flags |= SCF_WAIT;
+
+	preempt_disable();
+	smp_call_function_many_cond_tlbi(mask, func, info, scf_flags, cond_func);
+	preempt_enable();
+}
+EXPORT_SYMBOL(on_each_cpu_cond_mask_tlbi);
+
 static void do_nothing(void *unused)
 {
 }
-- 
CodeHub


From 6f0f2786d337432f5c9d771813c318ee637d05d8 Mon Sep 17 00:00:00 2001
From: Zhang Jianhua <chris.zjh@huawei.com>
Date: Sat, 7 Sep 2024 17:50:00 +0800
Subject: [PATCH 06/10] implement real flush tlbi func

hulk inclusion
category: feature
bugzilla: NA
DTS: NA
CVE: NA

--------

Signed-off-by: Zhang Jianhua <chris.zjh@huawei.com>
---
 arch/arm64/kernel/smp.c      |   3 +-
 drivers/xint/test_smp_call.c |   4 +-
 include/linux/smp.h          |   4 ++
 kernel/smp.c                 | 106 ++++++++++++++++++++++++++++++++---
 4 files changed, 107 insertions(+), 10 deletions(-)

diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index ffbf4d0eea56..d2549052f22c 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -1196,7 +1196,8 @@ static void do_handle_IPI(int ipinr)
 		break;
 
 	case IPI_TLBI:
-		ipi_tlbi_func(ipi_tlbi_info);
+		generic_smp_call_function_interrupt_tlbi();
+		//ipi_tlbi_func(ipi_tlbi_info);
 		break;
 
 	case IPI_CPU_STOP:
diff --git a/drivers/xint/test_smp_call.c b/drivers/xint/test_smp_call.c
index 4272001d9aca..a0c681d5c5ba 100644
--- a/drivers/xint/test_smp_call.c
+++ b/drivers/xint/test_smp_call.c
@@ -19,8 +19,8 @@ static void ipi_func(void *arg)
 //向所有cpu都发送smp_call_function_tlbi
 static int test_init(void)
 {
-//	smp_call_function_tlbi(cpu_online_mask, ipi_func, &tmp);
-	on_each_cpu(ipi_func, &tmp, true);
+	on_each_cpu_mask_tlbi(cpu_online_mask, ipi_func, &tmp, true);
+
 	return 0;
 }
 
diff --git a/include/linux/smp.h b/include/linux/smp.h
index 67b4738e6d4b..2e402d91f40b 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -169,6 +169,10 @@ void generic_smp_call_function_single_interrupt(void);
 #define generic_smp_call_function_interrupt \
 	generic_smp_call_function_single_interrupt
 
+void generic_smp_call_function_single_interrupt_tlbi(void);
+#define generic_smp_call_function_interrupt_tlbi \
+	generic_smp_call_function_single_interrupt_tlbi
+
 /*
  * Mark the boot cpu "online" so that it can call console drivers in
  * printk() and can access its per-cpu storage.
diff --git a/kernel/smp.c b/kernel/smp.c
index cedb47df4579..b05edcb7a488 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -44,6 +44,7 @@ struct call_function_data {
 static DEFINE_PER_CPU_ALIGNED(struct call_function_data, cfd_data);
 
 static DEFINE_PER_CPU_SHARED_ALIGNED(struct llist_head, call_single_queue);
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct llist_head, call_single_queue_tlbi);
 
 static void flush_smp_call_function_queue(bool warn_cpu_offline);
 
@@ -107,8 +108,10 @@ void __init call_function_init(void)
 {
 	int i;
 
-	for_each_possible_cpu(i)
+	for_each_possible_cpu(i) {
 		init_llist_head(&per_cpu(call_single_queue, i));
+		init_llist_head(&per_cpu(call_single_queue_tlbi, i));
+	}
 
 	smpcfd_prepare_cpu(smp_processor_id());
 }
@@ -366,6 +369,50 @@ void generic_smp_call_function_single_interrupt(void)
 	flush_smp_call_function_queue(true);
 }
 
+static void flush_smp_call_function_queue_tlbi(bool warn_cpu_offline)
+{
+	call_single_data_t *csd, *csd_next;
+	struct llist_node *entry, *prev;
+	struct llist_head *head;
+
+	lockdep_assert_irqs_disabled();
+
+	head = this_cpu_ptr(&call_single_queue_tlbi);
+	entry = llist_del_all(head);
+	entry = llist_reverse_order(entry);
+
+	//只服务于tlbi，全都是CSD_TYPE_SYNC类型
+	/*
+	 * First; run all SYNC callbacks, people are waiting for us.
+	 */
+	prev = NULL;
+	llist_for_each_entry_safe(csd, csd_next, entry, node.llist) {
+		/* Do we wait until *after* callback? */
+		if (CSD_TYPE(csd) == CSD_TYPE_SYNC) {
+			smp_call_func_t func = csd->func;
+			void *info = csd->info;
+
+			if (prev) {
+				prev->next = &csd_next->node.llist;
+			} else {
+				entry = &csd_next->node.llist;
+			}
+
+			csd_lock_record(csd);
+			func(info);
+			csd_unlock(csd);
+			csd_lock_record(NULL);
+		} else {
+			prev = &csd->node.llist;
+		}
+	}
+}
+
+void generic_smp_call_function_single_interrupt_tlbi(void)
+{
+	flush_smp_call_function_queue_tlbi(true);
+}
+
 /**
  * flush_smp_call_function_queue - Flush pending smp-call-function callbacks
  *
@@ -771,10 +818,60 @@ static void smp_call_function_many_cond_tlbi(const struct cpumask *mask,
 					unsigned int scf_flags,
 					smp_cond_func_t cond_func)
 {
+#if 0
 	ipi_tlbi_func = func;
 	ipi_tlbi_info = info;
 
 	arch_send_call_function_ipi_mask_tlbi(mask);
+#endif
+
+	int cpu, this_cpu = smp_processor_id();
+	struct call_function_data *cfd;
+	bool run_remote = false;
+	bool run_local = false;
+
+	cfd = this_cpu_ptr(&cfd_data);
+	cpumask_and(cfd->cpumask, mask, cpu_online_mask);
+
+	/* Check if we need local execution. */
+	if (cpumask_test_cpu(this_cpu, cfd->cpumask))
+		run_local = true;
+
+	/* Check if we need remote execution, i.e., any CPU excluding this one. */
+	__cpumask_clear_cpu(this_cpu, cfd->cpumask);
+	if (!cpumask_empty(mask))
+		run_remote = true;
+
+	if (run_remote) {
+		for_each_cpu(cpu, cfd->cpumask) {
+			call_single_data_t *csd = per_cpu_ptr(cfd->csd, cpu);
+
+			csd_lock(csd);
+			csd->node.u_flags |= CSD_TYPE_SYNC;
+			csd->func = func;
+			csd->info = info;
+			llist_add(&csd->node.llist, &per_cpu(call_single_queue_tlbi, cpu));
+		}
+
+		arch_send_call_function_ipi_mask_tlbi(cfd->cpumask);
+	}
+
+	if (run_local) {
+		unsigned long flags;
+
+		local_irq_save(flags);
+		func(info);
+		local_irq_restore(flags);
+	}
+
+	if (run_remote) {
+		for_each_cpu(cpu, cfd->cpumask) {
+			call_single_data_t *csd;
+
+			csd = per_cpu_ptr(cfd->csd, cpu);
+			csd_lock_wait(csd);
+		}
+	}
 }
 
 /**
@@ -946,13 +1043,8 @@ EXPORT_SYMBOL(on_each_cpu_cond_mask);
 void on_each_cpu_cond_mask_tlbi(smp_cond_func_t cond_func, smp_call_func_t func,
 			   void *info, bool wait, const struct cpumask *mask)
 {
-	unsigned int scf_flags = SCF_RUN_LOCAL;
-
-	if (wait)
-		scf_flags |= SCF_WAIT;
-
 	preempt_disable();
-	smp_call_function_many_cond_tlbi(mask, func, info, scf_flags, cond_func);
+	smp_call_function_many_cond_tlbi(mask, func, info, 0, cond_func);
 	preempt_enable();
 }
 EXPORT_SYMBOL(on_each_cpu_cond_mask_tlbi);
-- 
CodeHub


From 1ca7aa7c99f02f752bfefb08adbd1e4c11a70368 Mon Sep 17 00:00:00 2001
From: Zhang Jianhua <chris.zjh@huawei.com>
Date: Sun, 8 Sep 2024 11:11:35 +0800
Subject: [PATCH 07/10] [Huawei] arm64: xint: support sgi

hulk inclusion
category: feature
bugzilla: NA
DTS: NA
CVE: NA

--------

Signed-off-by: Zhang Jianhua <chris.zjh@huawei.com>
---
 drivers/irqchip/irq-gic-v3.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 4f7966fde66e..1737d02137fa 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -825,6 +825,10 @@ static void update_xcache(struct irq_desc *desc, unsigned int hwirq, bool valid)
 static void xint_insert_desc(unsigned int hwirq, struct irq_desc *desc)
 {
 	switch (__get_intid_range(hwirq)) {
+	case SGI_RANGE:
+		xint_desc_array[hwirq] = desc;
+		return;
+
 	case SPI_RANGE:
 		xint_desc_array[hwirq] = desc;
 		return;
@@ -841,6 +845,9 @@ static void xint_insert_desc(unsigned int hwirq, struct irq_desc *desc)
 struct irq_desc *xint_to_desc(unsigned int hwirq)
 {
 	switch (__get_intid_range(hwirq)) {
+	case SGI_RANGE:
+		return xint_desc_array[hwirq];
+
 	case SPI_RANGE:
 		return xint_desc_array[hwirq];
 
@@ -855,6 +862,10 @@ struct irq_desc *xint_to_desc(unsigned int hwirq)
 static void xint_delete_desc(unsigned int hwirq)
 {
 	switch (__get_intid_range(hwirq)) {
+	case SGI_RANGE:
+		xint_desc_array[hwirq] = NULL;
+		return;
+
 	case SPI_RANGE:
 		xint_desc_array[hwirq] = NULL;
 		return;
-- 
CodeHub


From caba06c89b5ca237abe48132ec68a550fa61c620 Mon Sep 17 00:00:00 2001
From: Zhang Jianhua <chris.zjh@huawei.com>
Date: Sun, 8 Sep 2024 16:21:49 +0800
Subject: [PATCH 08/10] replace on_each_cpu_mask with on_each_cpu_mask_tlbi for
 tlbi

hulk inclusion
category: feature
bugzilla: NA
DTS: NA
CVE: NA

--------

Signed-off-by: Zhang Jianhua <chris.zjh@huawei.com>
---
 arch/arm64/kernel/tlbflush.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/kernel/tlbflush.c b/arch/arm64/kernel/tlbflush.c
index 82ed8b21ba75..305d9a119f85 100644
--- a/arch/arm64/kernel/tlbflush.c
+++ b/arch/arm64/kernel/tlbflush.c
@@ -44,7 +44,9 @@ static inline void ipi_flush_tlb_mm(void *arg)
 void flush_tlb_mm(struct mm_struct *mm)
 {
 	if (disable_tlbflush_is)
-		on_each_cpu_mask(mm_cpumask(mm), ipi_flush_tlb_mm,
+//		on_each_cpu_mask(mm_cpumask(mm), ipi_flush_tlb_mm,
+//				 (void *)mm, true);
+		on_each_cpu_mask_tlbi(mm_cpumask(mm), ipi_flush_tlb_mm,
 				 (void *)mm, true);
 	else
 		__flush_tlb_mm(mm);
@@ -77,7 +79,9 @@ void flush_tlb_page_nosync(struct vm_area_struct *vma, unsigned long uaddr)
 	unsigned long addr = __TLBI_VADDR(uaddr, ASID(vma->vm_mm));
 
 	if (disable_tlbflush_is)
-		on_each_cpu_mask(mm_cpumask(vma->vm_mm),
+//		on_each_cpu_mask(mm_cpumask(vma->vm_mm),
+//				ipi_flush_tlb_page_nosync, &addr, true);
+		on_each_cpu_mask_tlbi(mm_cpumask(vma->vm_mm),
 				ipi_flush_tlb_page_nosync, &addr, true);
 	else
 		__flush_tlb_page_nosync(addr);
@@ -261,7 +265,9 @@ void __flush_tlb_range(struct vm_area_struct *vma,
 			.ta_tlb_level	= tlb_level,
 		};
 
-		on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_range,
+//		on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_range,
+//					    &ta, true);
+		on_each_cpu_mask_tlbi(mm_cpumask(vma->vm_mm), ipi_flush_tlb_range,
 					    &ta, true);
 	} else
 		___flush_tlb_range(vma, start, end, stride, last_level, tlb_level);
-- 
CodeHub


From 2f0f9917658e8b7f23aae59fc2ab03dde8eced0d Mon Sep 17 00:00:00 2001
From: Zhang Jianhua <chris.zjh@huawei.com>
Date: Wed, 11 Sep 2024 10:46:43 +0800
Subject: [PATCH 09/10] add virq for /proc/interrupts

hulk inclusion
category: feature
bugzilla: NA
DTS: NA
CVE: NA

--------

Signed-off-by: Zhang Jianhua <chris.zjh@huawei.com>
---
 arch/arm64/kernel/smp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index d2549052f22c..1a6b04f901df 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -1067,7 +1067,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
 			   prec >= 4 ? " " : "");
 		for_each_online_cpu(cpu)
 			seq_printf(p, "%10u ", kstat_irqs_cpu(irq, cpu));
-		seq_printf(p, "      %s\n", ipi_types[i]);
+		seq_printf(p, "      %s, virq: %d\n", ipi_types[i], irq);
 	}
 #ifdef CONFIG_RTOS_HAL_SHARE_IPI
 	arch_show_shared_ipi(p, prec);
-- 
CodeHub


From 64e360b222e8243e156f93429862d863201e42c5 Mon Sep 17 00:00:00 2001
From: Zhang Jianhua <chris.zjh@huawei.com>
Date: Wed, 11 Sep 2024 11:14:10 +0800
Subject: [PATCH 10/10] simplify tlbi flow

hulk inclusion
category: feature
bugzilla: NA
DTS: NA
CVE: NA

--------

Signed-off-by: Zhang Jianhua <chris.zjh@huawei.com>
---
 arch/arm64/kernel/smp.c      |  4 +++-
 drivers/irqchip/irq-gic-v3.c | 22 +++++++++++++++++++
 kernel/smp.c                 | 42 +++++++++++++++---------------------
 3 files changed, 42 insertions(+), 26 deletions(-)

diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 1a6b04f901df..940436102078 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -1087,9 +1087,11 @@ void arch_send_call_function_single_ipi(int cpu)
 	smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC);
 }
 
+extern void gic_ipi_send_mask_tlbi(int hwirq, const struct cpumask *mask);
 void arch_send_call_function_ipi_mask_tlbi(const struct cpumask *mask)
 {
-	smp_cross_call(mask, IPI_TLBI);
+//	smp_cross_call(mask, IPI_TLBI);
+	gic_ipi_send_mask_tlbi(IPI_TLBI, mask);
 }
 
 #ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 1737d02137fa..8bb4ac6cff0b 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -1793,6 +1793,28 @@ static void gic_send_sgi(u64 cluster_id, u16 tlist, unsigned int irq)
 	gic_write_sgi1r(val);
 }
 
+void gic_ipi_send_mask_tlbi(int hwirq, const struct cpumask *mask)
+{
+	int cpu;
+
+	/*
+	 * Ensure that stores to Normal memory are visible to the
+	 * other CPUs before issuing the IPI.
+	 */
+	wmb();
+
+	for_each_cpu(cpu, mask) {
+		u64 cluster_id = MPIDR_TO_SGI_CLUSTER_ID(cpu_logical_map(cpu));
+		u16 tlist;
+
+		tlist = gic_compute_target_list(&cpu, mask, cluster_id);
+		gic_send_sgi(cluster_id, tlist, hwirq);
+	}
+
+	/* Force the above writes to ICC_SGI1R_EL1 to be executed */
+	isb();
+}
+
 static void gic_ipi_send_mask(struct irq_data *d, const struct cpumask *mask)
 {
 	int cpu;
diff --git a/kernel/smp.c b/kernel/smp.c
index b05edcb7a488..55b8fe89457f 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -372,10 +372,11 @@ void generic_smp_call_function_single_interrupt(void)
 static void flush_smp_call_function_queue_tlbi(bool warn_cpu_offline)
 {
 	call_single_data_t *csd, *csd_next;
-	struct llist_node *entry, *prev;
+	//struct llist_node *entry, *prev;
+	struct llist_node *entry;
 	struct llist_head *head;
 
-	lockdep_assert_irqs_disabled();
+//	lockdep_assert_irqs_disabled();
 
 	head = this_cpu_ptr(&call_single_queue_tlbi);
 	entry = llist_del_all(head);
@@ -385,26 +386,26 @@ static void flush_smp_call_function_queue_tlbi(bool warn_cpu_offline)
 	/*
 	 * First; run all SYNC callbacks, people are waiting for us.
 	 */
-	prev = NULL;
+//	prev = NULL;
 	llist_for_each_entry_safe(csd, csd_next, entry, node.llist) {
 		/* Do we wait until *after* callback? */
-		if (CSD_TYPE(csd) == CSD_TYPE_SYNC) {
+//		if (CSD_TYPE(csd) == CSD_TYPE_SYNC) {
 			smp_call_func_t func = csd->func;
 			void *info = csd->info;
 
-			if (prev) {
-				prev->next = &csd_next->node.llist;
-			} else {
+//			if (prev) {
+//				prev->next = &csd_next->node.llist;
+//			} else {
 				entry = &csd_next->node.llist;
-			}
+//			}
 
-			csd_lock_record(csd);
+//			csd_lock_record(csd);
 			func(info);
 			csd_unlock(csd);
-			csd_lock_record(NULL);
-		} else {
-			prev = &csd->node.llist;
-		}
+//			csd_lock_record(NULL);
+//		} else {
+//			prev = &csd->node.llist;
+//		}
 	}
 }
 
@@ -814,17 +815,8 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
 }
 
 static void smp_call_function_many_cond_tlbi(const struct cpumask *mask,
-					smp_call_func_t func, void *info,
-					unsigned int scf_flags,
-					smp_cond_func_t cond_func)
+					smp_call_func_t func, void *info)
 {
-#if 0
-	ipi_tlbi_func = func;
-	ipi_tlbi_info = info;
-
-	arch_send_call_function_ipi_mask_tlbi(mask);
-#endif
-
 	int cpu, this_cpu = smp_processor_id();
 	struct call_function_data *cfd;
 	bool run_remote = false;
@@ -847,7 +839,7 @@ static void smp_call_function_many_cond_tlbi(const struct cpumask *mask,
 			call_single_data_t *csd = per_cpu_ptr(cfd->csd, cpu);
 
 			csd_lock(csd);
-			csd->node.u_flags |= CSD_TYPE_SYNC;
+			//csd->node.u_flags |= CSD_TYPE_SYNC;
 			csd->func = func;
 			csd->info = info;
 			llist_add(&csd->node.llist, &per_cpu(call_single_queue_tlbi, cpu));
@@ -1044,7 +1036,7 @@ void on_each_cpu_cond_mask_tlbi(smp_cond_func_t cond_func, smp_call_func_t func,
 			   void *info, bool wait, const struct cpumask *mask)
 {
 	preempt_disable();
-	smp_call_function_many_cond_tlbi(mask, func, info, 0, cond_func);
+	smp_call_function_many_cond_tlbi(mask, func, info);
 	preempt_enable();
 }
 EXPORT_SYMBOL(on_each_cpu_cond_mask_tlbi);
-- 
CodeHub

    

[PATCH 01/10] arm64: mm: Restore mm_cpumask (revert commit 38d96287504a ("arm64: mm: kill mm_cpumask usage"))

Liao Chen