mm_cpumask was deleted by the commit 38d96287504a ("arm64: mm: kill
mm_cpumask usage") because it was not used at that time. Now this is needed
to find appropriate CPUs for TLB flush, so this patch reverts this commit.
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 13dc8228700f..6f7b760f65cd 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -411,6 +411,7 @@ asmlinkage notrace void secondary_start_kernel(void)
*/
mmgrab(mm);
current->active_mm = mm;
+ cpumask_set_cpu(cpu, mm_cpumask(mm));
/*
* TTBR0 is only used for the identity mapping at this stage. Make it
@@ -525,6 +526,11 @@ int __cpu_disable(void)
*/
irq_migrate_all_off_this_cpu();
+ /*
+ * Remove this CPU from the vm mask set of all processes.
+ */
+ clear_tasks_mm_cpumask(cpu);
+
return 0;
}
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 45c92d0f71d3..279d092555b9 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -224,6 +224,7 @@ set_asid:
__set_bit(asid, asid_map);
cur_idx = asid;
@@ -280,6 +281,7 @@ switch_mm_fastpath:
arm64_apply_bp_hardening();
#else
arm64_apply_bp_hardening();
+ cpumask_set_cpu(cpu, mm_cpumask(mm));
#endif
--
CodeHub
From f21647c3989435fcf04949b0c9db5a93f2a80716 Mon Sep 17 00:00:00 2001
From: Takao Indoh
indou.takao@fujitsu.com
Date: Mon, 20 Dec 2021 16:00:56 +0800
Subject: [PATCH 02/10] arm64: tlb: Add boot parameter to disable TLB flush
within the same inner shareable domain
hulk inclusion
category: bugfix
bugzilla:
https://gitee.com/openeuler/kernel/issues/I4BLL0
CVE: NA
---------------------------
This patch adds new boot parameter 'disable_tlbflush_is' to disable TLB
flush within the same inner shareable domain for performance tuning.
In the case of flush_tlb_mm() *without* this parameter, TLB entry is
invalidated by __tlbi(aside1is, asid). By this instruction, all CPUs within
the same inner shareable domain check if there are TLB entries which have
this ASID, this causes performance noise, especially at large-scale HPC
environment, which has more than thousand nodes with low latency
interconnect.
When this new parameter is specified, TLB entry is invalidated by
__tlbi(aside1, asid) only on the CPUs specified by mm_cpumask(mm).
Therefore TLB flush is done on minimal CPUs and performance problem does
not occur.
Signed-off-by: QI Fuli
qi.fuli@fujitsu.com
Signed-off-by: Takao Indoh
indou.takao@fujitsu.com
Signed-off-by: Cheng Jian
cj.chengjian@huawei.com
Reviewed-by: Xie XiuQi
xiexiuqi@huawei.com
Signed-off-by: Yang Yingliang
yangyingliang@huawei.com
Signed-off-by: Yu Liao
liaoyu15@huawei.com
---
.../admin-guide/kernel-parameters.txt | 4 +
arch/arm64/include/asm/mmu_context.h | 7 +-
arch/arm64/include/asm/tlbflush.h | 117 +-------
arch/arm64/kernel/Makefile | 2 +-
arch/arm64/kernel/tlbflush.c | 269 ++++++++++++++++++
5 files changed, 292 insertions(+), 107 deletions(-)
create mode 100644 arch/arm64/kernel/tlbflush.c
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index f216c001fe90..69c7777bd9c9 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -941,6 +941,10 @@
disable= [IPV6]
See Documentation/networking/ipv6.rst.
+ disable_tlbflush_is
+ [ARM64] Disable using TLB instruction to flush
+ all PE within the same inner shareable domain.
+
hardened_usercopy=
[KNL] Under CONFIG_HARDENED_USERCOPY, whether
hardening is enabled for this boot. Hardened
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index ac4ab31267db..1ff4641b4339 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -235,9 +235,14 @@ static inline void
switch_mm(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk)
{
+ unsigned int cpu = smp_processor_id();
+
isovm_update_tcr_ttbr(prev, next, tsk);
- if (prev != next)
+ if (prev != next) {
__switch_mm(next);
+ cpumask_clear_cpu(cpu, mm_cpumask(prev));
+ local_flush_tlb_mm(prev);
+ }
/*
* Update the saved TTBR0_EL1 of the scheduled-in task as the previous
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index a89bb836ae05..70b43306493c 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -302,6 +302,13 @@ __isovm_flush_tlb_range(unsigned long asid, unsigned long start, bool last_lvl,
int tlb_lvl, int scale, int num) {}
#endif /* CONFIG_RTOS_ISOLATION_VM */
+void flush_tlb_mm(struct mm_struct *mm);
+void flush_tlb_page_nosync(struct vm_area_struct *vma,
+ unsigned long uaddr);
+void __flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end, unsigned long stride, bool last_level,
+ int tlb_level);
+
static inline void local_flush_tlb_all(void)
{
dsb(nshst);
@@ -318,28 +325,14 @@ static inline void flush_tlb_all(void)
isb();
}
-static inline void flush_tlb_mm(struct mm_struct *mm)
+static inline void local_flush_tlb_mm(struct mm_struct *mm)
{
unsigned long asid;
-
- dsb(ishst);
+ dsb(nshst);
asid = __TLBI_VADDR(0, ASID(mm));
- __tlbi(aside1is, asid);
- __tlbi_user(aside1is, asid);
- isovm_flush_tlb_mm(ASID(mm));
- dsb(ish);
-}
-
-static inline void flush_tlb_page_nosync(struct vm_area_struct *vma,
- unsigned long uaddr)
-{
- unsigned long addr;
-
- dsb(ishst);
- addr = __TLBI_VADDR(uaddr, ASID(vma->vm_mm));
- __tlbi(vale1is, addr);
- __tlbi_user(vale1is, addr);
- isovm_flush_tlb_page_nosync(ASID(vma->vm_mm), uaddr);
+ __tlbi(aside1, asid);
+ __tlbi_user(aside1, asid);
+ dsb(nsh);
}
static inline void flush_tlb_page(struct vm_area_struct *vma,
@@ -355,92 +348,6 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
*/
#define MAX_TLBI_OPS PTRS_PER_PTE
-static inline void __flush_tlb_range(struct vm_area_struct *vma,
- unsigned long start, unsigned long end,
- unsigned long stride, bool last_level,
- int tlb_level)
-{
- int num = 0;
- int scale = 0;
- unsigned long asid, addr, pages;
-
- start = round_down(start, stride);
- end = round_up(end, stride);
- pages = (end - start) >> PAGE_SHIFT;
-
- /*
- * When not uses TLB range ops, we can handle up to
- * (MAX_TLBI_OPS - 1) pages;
- * When uses TLB range ops, we can handle up to
- * (MAX_TLBI_RANGE_PAGES - 1) pages.
- */
- if ((!system_supports_tlb_range() &&
- (end - start) >= (MAX_TLBI_OPS * stride)) ||
- pages >= MAX_TLBI_RANGE_PAGES) {
- flush_tlb_mm(vma->vm_mm);
- return;
- }
-
- dsb(ishst);
- asid = ASID(vma->vm_mm);
-
- /*
- * When the CPU does not support TLB range operations, flush the TLB
- * entries one by one at the granularity of 'stride'. If the the TLB
- * range ops are supported, then:
- *
- * 1. If 'pages' is odd, flush the first page through non-range
- * operations;
- *
- * 2. For remaining pages: the minimum range granularity is decided
- * by 'scale', so multiple range TLBI operations may be required.
- * Start from scale = 0, flush the corresponding number of pages
- * ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it
- * until no pages left.
- *
- * Note that certain ranges can be represented by either num = 31 and
- * scale or num = 0 and scale + 1. The loop below favours the latter
- * since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
- */
- while (pages > 0) {
- if (!system_supports_tlb_range() ||
- pages % 2 == 1) {
- addr = __TLBI_VADDR(start, asid);
- if (last_level) {
- __tlbi_level(vale1is, addr, tlb_level);
- __tlbi_user_level(vale1is, addr, tlb_level);
- } else {
- __tlbi_level(vae1is, addr, tlb_level);
- __tlbi_user_level(vae1is, addr, tlb_level);
- }
- __isovm_flush_tlb_stride(asid, start, last_level,
- tlb_level);
- start += stride;
- pages -= stride >> PAGE_SHIFT;
- continue;
- }
-
- num = __TLBI_RANGE_NUM(pages, scale);
- if (num >= 0) {
- addr = __TLBI_VADDR_RANGE(start, asid, scale,
- num, tlb_level);
- if (last_level) {
- __tlbi(rvale1is, addr);
- __tlbi_user(rvale1is, addr);
- } else {
- __tlbi(rvae1is, addr);
- __tlbi_user(rvae1is, addr);
- }
- __isovm_flush_tlb_range(asid, start, last_level,
- tlb_level, scale, num);
- start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT;
- pages -= __TLBI_RANGE_PAGES(num, scale);
- }
- scale++;
- }
- dsb(ish);
-}
-
static inline void flush_tlb_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index d5aa9c62c003..4e400d8ab0af 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -28,7 +28,7 @@ obj-y := debug-monitors.o entry.o irq.o fpsimd.o \
return_address.o cpuinfo.o cpu_errata.o \
cpufeature.o alternative.o cacheinfo.o \
smp.o smp_spin_table.o topology.o smccc-call.o \
- syscall.o proton-pack.o ipi_nmi.o
+ syscall.o proton-pack.o ipi_nmi.o tlbflush.o
targets += efi-entry.o
diff --git a/arch/arm64/kernel/tlbflush.c b/arch/arm64/kernel/tlbflush.c
new file mode 100644
index 000000000000..82ed8b21ba75
--- /dev/null
+++ b/arch/arm64/kernel/tlbflush.c
@@ -0,0 +1,269 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2019 FUJITSU LIMITED
+
+#include <linux/smp.h>
+#include <asm/tlbflush.h>
+
+struct tlb_args {
+ struct vm_area_struct *ta_vma;
+ unsigned long ta_start;
+ unsigned long ta_end;
+ unsigned long ta_stride;
+ bool ta_last_level;
+ int ta_tlb_level;
+};
+
+int disable_tlbflush_is;
+
+static int __init disable_tlbflush_is_setup(char *str)
+{
+ disable_tlbflush_is = 1;
+
+ return 0;
+}
+__setup("disable_tlbflush_is", disable_tlbflush_is_setup);
+
+static inline void __flush_tlb_mm(struct mm_struct *mm)
+{
+ unsigned long asid;
+
+ dsb(ishst);
+ asid = __TLBI_VADDR(0, ASID(mm));
+ __tlbi(aside1is, asid);
+ __tlbi_user(aside1is, asid);
+ dsb(ish);
+}
+
+static inline void ipi_flush_tlb_mm(void *arg)
+{
+ struct mm_struct *mm = arg;
+
+ local_flush_tlb_mm(mm);
+}
+
+void flush_tlb_mm(struct mm_struct *mm)
+{
+ if (disable_tlbflush_is)
+ on_each_cpu_mask(mm_cpumask(mm), ipi_flush_tlb_mm,
+ (void *)mm, true);
+ else
+ __flush_tlb_mm(mm);
+}
+
+static inline void __flush_tlb_page_nosync(unsigned long addr)
+{
+ dsb(ishst);
+ __tlbi(vale1is, addr);
+ __tlbi_user(vale1is, addr);
+}
+
+static inline void __local_flush_tlb_page_nosync(unsigned long addr)
+{
+ dsb(nshst);
+ __tlbi(vale1, addr);
+ __tlbi_user(vale1, addr);
+ dsb(nsh);
+}
+
+static inline void ipi_flush_tlb_page_nosync(void *arg)
+{
+ unsigned long addr = *(unsigned long *)arg;
+
+ __local_flush_tlb_page_nosync(addr);
+}
+
+void flush_tlb_page_nosync(struct vm_area_struct *vma, unsigned long uaddr)
+{
+ unsigned long addr = __TLBI_VADDR(uaddr, ASID(vma->vm_mm));
+
+ if (disable_tlbflush_is)
+ on_each_cpu_mask(mm_cpumask(vma->vm_mm),
+ ipi_flush_tlb_page_nosync, &addr, true);
+ else
+ __flush_tlb_page_nosync(addr);
+}
+
+
+static inline void ___flush_tlb_range(struct vm_area_struct *vma,
+ long start, unsigned long end,
+ unsigned long stride, bool last_level,
+ int tlb_level)
+{
+ int num = 0;
+ int scale = 0;
+ unsigned long asid, addr, pages;
+
+ pages = (end - start) >> PAGE_SHIFT;
+
+ dsb(ishst);
+ asid = ASID(vma->vm_mm);
+
+ /*
+ * When the CPU does not support TLB range operations, flush the TLB
+ * entries one by one at the granularity of 'stride'. If the the TLB
+ * range ops are supported, then:
+ *
+ * 1. If 'pages' is odd, flush the first page through non-range
+ * operations;
+ *
+ * 2. For remaining pages: the minimum range granularity is decided
+ * by 'scale', so multiple range TLBI operations may be required.
+ * Start from scale = 0, flush the corresponding number of pages
+ * ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it
+ * until no pages left.
+ *
+ * Note that certain ranges can be represented by either num = 31 and
+ * scale or num = 0 and scale + 1. The loop below favours the latter
+ * since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
+ */
+ while (pages > 0) {
+ if (!system_supports_tlb_range() ||
+ pages % 2 == 1) {
+ addr = __TLBI_VADDR(start, asid);
+ if (last_level) {
+ __tlbi_level(vale1is, addr, tlb_level);
+ __tlbi_user_level(vale1is, addr, tlb_level);
+ } else {
+ __tlbi_level(vae1is, addr, tlb_level);
+ __tlbi_user_level(vae1is, addr, tlb_level);
+ }
+ start += stride;
+ pages -= stride >> PAGE_SHIFT;
+ continue;
+ }
+
+ num = __TLBI_RANGE_NUM(pages, scale);
+ if (num >= 0) {
+ addr = __TLBI_VADDR_RANGE(start, asid, scale,
+ num, tlb_level);
+ if (last_level) {
+ __tlbi(rvale1is, addr);
+ __tlbi_user(rvale1is, addr);
+ } else {
+ __tlbi(rvae1is, addr);
+ __tlbi_user(rvae1is, addr);
+ }
+ start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT;
+ pages -= __TLBI_RANGE_PAGES(num, scale);
+ }
+ scale++;
+ }
+ dsb(ish);
+}
+
+static inline void __local_flush_tlb_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end,
+ unsigned long stride, bool last_level,
+ int tlb_level)
+{
+ int num = 0;
+ int scale = 0;
+ unsigned long asid, addr, pages;
+
+ pages = (end - start) >> PAGE_SHIFT;
+
+ dsb(nshst);
+ asid = ASID(vma->vm_mm);
+
+ /*
+ * When the CPU does not support TLB range operations, flush the TLB
+ * entries one by one at the granularity of 'stride'. If the the TLB
+ * range ops are supported, then:
+ *
+ * 1. If 'pages' is odd, flush the first page through non-range
+ * operations;
+ *
+ * 2. For remaining pages: the minimum range granularity is decided
+ * by 'scale', so multiple range TLBI operations may be required.
+ * Start from scale = 0, flush the corresponding number of pages
+ * ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it
+ * until no pages left.
+ *
+ * Note that certain ranges can be represented by either num = 31 and
+ * scale or num = 0 and scale + 1. The loop below favours the latter
+ * since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
+ */
+ while (pages > 0) {
+ if (!system_supports_tlb_range() ||
+ pages % 2 == 1) {
+ addr = __TLBI_VADDR(start, asid);
+ if (last_level) {
+ __tlbi_level(vale1, addr, tlb_level);
+ __tlbi_user_level(vale1, addr, tlb_level);
+ } else {
+ __tlbi_level(vae1, addr, tlb_level);
+ __tlbi_user_level(vae1, addr, tlb_level);
+ }
+ start += stride;
+ pages -= stride >> PAGE_SHIFT;
+ continue;
+ }
+
+ num = __TLBI_RANGE_NUM(pages, scale);
+ if (num >= 0) {
+ addr = __TLBI_VADDR_RANGE(start, asid, scale,
+ num, tlb_level);
+ if (last_level) {
+ __tlbi(rvale1, addr);
+ __tlbi_user(rvale1, addr);
+ } else {
+ __tlbi(rvae1, addr);
+ __tlbi_user(rvae1, addr);
+ }
+ start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT;
+ pages -= __TLBI_RANGE_PAGES(num, scale);
+ }
+ scale++;
+ }
+ dsb(nsh);
+}
+
+static inline void ipi_flush_tlb_range(void *arg)
+{
+ struct tlb_args *ta = (struct tlb_args *)arg;
+
+ __local_flush_tlb_range(ta->ta_vma, ta->ta_start, ta->ta_end, ta->ta_stride,
+ ta->ta_last_level, ta->ta_tlb_level);
+
+}
+
+void __flush_tlb_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end,
+ unsigned long stride, bool last_level,
+ int tlb_level)
+{
+ unsigned long pages;
+
+ start = round_down(start, stride);
+ end = round_up(end, stride);
+ pages = (end - start) >> PAGE_SHIFT;
+
+ /*
+ * When not uses TLB range ops, we can handle up to
+ * (MAX_TLBI_OPS - 1) pages;
+ * When uses TLB range ops, we can handle up to
+ * (MAX_TLBI_RANGE_PAGES - 1) pages.
+ */
+ if ((!system_supports_tlb_range() &&
+ (end - start) >= (MAX_TLBI_OPS * stride)) ||
+ pages >= MAX_TLBI_RANGE_PAGES) {
+ flush_tlb_mm(vma->vm_mm);
+ return;
+ }
+
+ if (disable_tlbflush_is) {
+ struct tlb_args ta = {
+ .ta_vma = vma,
+ .ta_start = start,
+ .ta_end = end,
+ .ta_stride = stride,
+ .ta_last_level = last_level,
+ .ta_tlb_level = tlb_level,
+ };
+
+ on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_range,
+ &ta, true);
+ } else
+ ___flush_tlb_range(vma, start, end, stride, last_level, tlb_level);
+}
+
--
CodeHub
From 4c393bf82f57255cfc5d76bfa9824f5216c7d020 Mon Sep 17 00:00:00 2001
From: Yu Liao
liaoyu15@huawei.com
Date: Tue, 7 May 2024 10:28:42 +0800
Subject: [PATCH 03/10] Revert "arm64/mm: save memory access in
check_and_switch_context() fast switch path"
This reverts commit c4885bbb3afee80f41d39a33e49881a18e500f47.
---
arch/arm64/include/asm/mmu_context.h | 6 ++++--
arch/arm64/mm/context.c | 10 ++++------
2 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index 1ff4641b4339..564700e617b4 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -174,7 +174,7 @@ static inline void cpu_replace_ttbr1(pgd_t *pgdp)
* take CPU migration into account.
*/
#define destroy_context(mm) do { } while(0)
-void check_and_switch_context(struct mm_struct *mm);
+void check_and_switch_context(struct mm_struct *mm, unsigned int cpu);
static inline int
init_new_context(struct task_struct *tsk, struct mm_struct *mm)
@@ -219,6 +219,8 @@ enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
static inline void __switch_mm(struct mm_struct *next)
{
+ unsigned int cpu = smp_processor_id();
+
/*
* init_mm.pgd does not contain any user mappings and it is always
* active for kernel addresses in TTBR1. Just set the reserved TTBR0.
@@ -228,7 +230,7 @@ static inline void __switch_mm(struct mm_struct *next)
return;
}
- check_and_switch_context(next);
+ check_and_switch_context(next, cpu);
}
static inline void
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 279d092555b9..a7c186347deb 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -228,10 +228,9 @@ set_asid:
return idx2asid(asid) | generation;
}
-void check_and_switch_context(struct mm_struct *mm)
+void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
{
unsigned long flags;
- unsigned int cpu;
u64 asid, old_active_asid;
if (system_supports_cnp())
@@ -253,9 +252,9 @@ void check_and_switch_context(struct mm_struct *mm)
* relaxed xchg in flush_context will treat us as reserved
* because atomic RmWs are totally ordered for a given location.
*/
- old_active_asid = atomic64_read(this_cpu_ptr(&active_asids));
+ old_active_asid = atomic64_read(&per_cpu(active_asids, cpu));
if (old_active_asid && asid_gen_match(asid) &&
- atomic64_cmpxchg_relaxed(this_cpu_ptr(&active_asids),
+ atomic64_cmpxchg_relaxed(&per_cpu(active_asids, cpu),
old_active_asid, asid))
goto switch_mm_fastpath;
@@ -267,11 +266,10 @@ void check_and_switch_context(struct mm_struct *mm)
atomic64_set(&mm->context.id, asid);
}
- cpu = smp_processor_id();
if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending))
local_flush_tlb_all();
- atomic64_set(this_cpu_ptr(&active_asids), asid);
+ atomic64_set(&per_cpu(active_asids, cpu), asid);
raw_spin_unlock_irqrestore(&cpu_asid_lock, flags);
switch_mm_fastpath:
--
CodeHub
From 297a6ee0e58aea4cc1fa8f4e883b35f9bd5bc62d Mon Sep 17 00:00:00 2001
From: Zhang Jianhua
chris.zjh@huawei.com
Date: Fri, 6 Sep 2024 10:14:06 +0800
Subject: [PATCH 04/10] add testcase of test_smp_call
hulk inclusion
category: feature
bugzilla: NA
DTS: NA
CVE: NA
--------
Signed-off-by: Zhang Jianhua
chris.zjh@huawei.com
---
drivers/Makefile | 1 +
drivers/xint/Makefile | 2 ++
drivers/xint/test_smp_call.c | 36 ++++++++++++++++++++++++++++++++++++
3 files changed, 39 insertions(+)
create mode 100644 drivers/xint/Makefile
create mode 100644 drivers/xint/test_smp_call.c
diff --git a/drivers/Makefile b/drivers/Makefile
index 50205b73f3f9..ce6feccd117f 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -200,3 +200,4 @@ obj-$(CONFIG_RTOS) += hal/
obj-$(CONFIG_KEXEC_KERNEL_HOTUPGRADE) += vpmem/
obj-$(CONFIG_ARCH_BSP) += vendor/
+obj-y += xint/
diff --git a/drivers/xint/Makefile b/drivers/xint/Makefile
new file mode 100644
index 000000000000..3d90c9723d4d
--- /dev/null
+++ b/drivers/xint/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-m += test_smp_call.o
diff --git a/drivers/xint/test_smp_call.c b/drivers/xint/test_smp_call.c
new file mode 100644
index 000000000000..4272001d9aca
--- /dev/null
+++ b/drivers/xint/test_smp_call.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) Huawei Technologies Co., Ltd. 2020. All rights reserved.
+ * Author: Huawei OS Kernel Lab
+ * Create: Wed Mar 06 14:36:38 2024
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <asm/smp.h>
+
+static int tmp = 1;
+
+static void ipi_func(void *arg)
+{
+ pr_info("CPU %d receive ipi, arg = %d\n", smp_processor_id(), *(int *)arg);
+}
+
+//向所有cpu都发送smp_call_function_tlbi
+static int test_init(void)
+{
+// smp_call_function_tlbi(cpu_online_mask, ipi_func, &tmp);
+ on_each_cpu(ipi_func, &tmp, true);
+ return 0;
+}
+
+static void test_exit(void)
+{
+ return;
+}
+
+module_init(test_init);
+module_exit(test_exit);
+
+MODULE_DESCRIPTION("test smp_call_function_tlbi");
+MODULE_LICENSE("GPL");
--
CodeHub
From fc17bfc0ba827d8f172aa891c2aa1e7142f11010 Mon Sep 17 00:00:00 2001
From: Zhang Jianhua
chris.zjh@huawei.com
Date: Sat, 7 Sep 2024 16:59:46 +0800
Subject: [PATCH 05/10] add on_each_cpu_mask_tlbi()
hulk inclusion
category: feature
bugzilla: NA
DTS: NA
CVE: NA
--------
Signed-off-by: Zhang Jianhua
chris.zjh@huawei.com
---
arch/arm64/include/asm/smp.h | 2 ++
arch/arm64/kernel/smp.c | 11 +++++++++++
include/linux/smp.h | 12 ++++++++++++
kernel/smp.c | 28 ++++++++++++++++++++++++++++
4 files changed, 53 insertions(+)
diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
index 8c5d2d650b8a..ed16b7dd054f 100644
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -90,6 +90,8 @@ extern void secondary_entry(void);
extern void arch_send_call_function_single_ipi(int cpu);
extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
+extern void arch_send_call_function_ipi_mask_tlbi(const struct cpumask *mask);
+
#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
extern void arch_send_wakeup_ipi_mask(const struct cpumask *mask);
#else
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 6f7b760f65cd..ffbf4d0eea56 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -88,6 +88,7 @@ static int cpus_stuck_in_kernel;
enum ipi_msg_type {
IPI_RESCHEDULE,
IPI_CALL_FUNC,
+ IPI_TLBI,
IPI_CPU_STOP,
IPI_CPU_CRASH_STOP,
IPI_TIMER,
@@ -1041,6 +1042,7 @@ static const char *ipi_types[NR_IPI] __tracepoint_string = {
#define S(x,s) [x] = s
S(IPI_RESCHEDULE, "Rescheduling interrupts"),
S(IPI_CALL_FUNC, "Function call interrupts"),
+ S(IPI_TLBI, "Function call interrupts for tlbi"),
S(IPI_CPU_STOP, "CPU stop interrupts"),
S(IPI_CPU_CRASH_STOP, "CPU stop (for crash dump) interrupts"),
S(IPI_TIMER, "Timer broadcast interrupts"),
@@ -1085,6 +1087,11 @@ void arch_send_call_function_single_ipi(int cpu)
smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC);
}
+void arch_send_call_function_ipi_mask_tlbi(const struct cpumask *mask)
+{
+ smp_cross_call(mask, IPI_TLBI);
+}
+
#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
void arch_send_wakeup_ipi_mask(const struct cpumask *mask)
{
@@ -1188,6 +1195,10 @@ static void do_handle_IPI(int ipinr)
generic_smp_call_function_interrupt();
break;
+ case IPI_TLBI:
+ ipi_tlbi_func(ipi_tlbi_info);
+ break;
+
case IPI_CPU_STOP:
local_cpu_stop();
break;
diff --git a/include/linux/smp.h b/include/linux/smp.h
index 812c26f61300..67b4738e6d4b 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -17,6 +17,9 @@
typedef void (*smp_call_func_t)(void *info);
typedef bool (*smp_cond_func_t)(int cpu, void *info);
+extern void *ipi_tlbi_info;
+extern smp_call_func_t ipi_tlbi_func;
+
/*
* structure shares (partial) layout with struct irq_work
*/
@@ -53,6 +56,9 @@ int smp_call_function_single(int cpuid, smp_call_func_t func, void *info,
void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func,
void *info, bool wait, const struct cpumask *mask);
+void on_each_cpu_cond_mask_tlbi(smp_cond_func_t cond_func, smp_call_func_t func,
+ void *info, bool wait, const struct cpumask *mask);
+
int smp_call_function_single_async(int cpu, call_single_data_t *csd);
/*
@@ -85,6 +91,12 @@ static inline void on_each_cpu_mask(const struct cpumask *mask,
on_each_cpu_cond_mask(NULL, func, info, wait, mask);
}
+static inline void on_each_cpu_mask_tlbi(const struct cpumask *mask,
+ smp_call_func_t func, void *info, bool wait)
+{
+ on_each_cpu_cond_mask_tlbi(NULL, func, info, wait, mask);
+}
+
/*
* Call a function on each processor for which the supplied function
* cond_func returns a positive value. This may include the local
diff --git a/kernel/smp.c b/kernel/smp.c
index 27ded167f36d..cedb47df4579 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -32,6 +32,9 @@
#define CSD_TYPE(_csd) ((_csd)->node.u_flags & CSD_FLAG_TYPE_MASK)
+void *ipi_tlbi_info;
+smp_call_func_t ipi_tlbi_func;
+
struct call_function_data {
call_single_data_t __percpu *csd;
cpumask_var_t cpumask;
@@ -763,6 +766,17 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
}
}
+static void smp_call_function_many_cond_tlbi(const struct cpumask *mask,
+ smp_call_func_t func, void *info,
+ unsigned int scf_flags,
+ smp_cond_func_t cond_func)
+{
+ ipi_tlbi_func = func;
+ ipi_tlbi_info = info;
+
+ arch_send_call_function_ipi_mask_tlbi(mask);
+}
+
/**
* smp_call_function_many(): Run a function on a set of CPUs.
* @mask: The set of cpus to run on (only runs on online subset).
@@ -929,6 +943,20 @@ void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func,
}
EXPORT_SYMBOL(on_each_cpu_cond_mask);
+void on_each_cpu_cond_mask_tlbi(smp_cond_func_t cond_func, smp_call_func_t func,
+ void *info, bool wait, const struct cpumask *mask)
+{
+ unsigned int scf_flags = SCF_RUN_LOCAL;
+
+ if (wait)
+ scf_flags |= SCF_WAIT;
+
+ preempt_disable();
+ smp_call_function_many_cond_tlbi(mask, func, info, scf_flags, cond_func);
+ preempt_enable();
+}
+EXPORT_SYMBOL(on_each_cpu_cond_mask_tlbi);
+
static void do_nothing(void *unused)
{
}
--
CodeHub
From 6f0f2786d337432f5c9d771813c318ee637d05d8 Mon Sep 17 00:00:00 2001
From: Zhang Jianhua
chris.zjh@huawei.com
Date: Sat, 7 Sep 2024 17:50:00 +0800
Subject: [PATCH 06/10] implement real flush tlbi func
hulk inclusion
category: feature
bugzilla: NA
DTS: NA
CVE: NA
--------
Signed-off-by: Zhang Jianhua
chris.zjh@huawei.com
---
arch/arm64/kernel/smp.c | 3 +-
drivers/xint/test_smp_call.c | 4 +-
include/linux/smp.h | 4 ++
kernel/smp.c | 106 ++++++++++++++++++++++++++++++++---
4 files changed, 107 insertions(+), 10 deletions(-)
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index ffbf4d0eea56..d2549052f22c 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -1196,7 +1196,8 @@ static void do_handle_IPI(int ipinr)
break;
case IPI_TLBI:
- ipi_tlbi_func(ipi_tlbi_info);
+ generic_smp_call_function_interrupt_tlbi();
+ //ipi_tlbi_func(ipi_tlbi_info);
break;
case IPI_CPU_STOP:
diff --git a/drivers/xint/test_smp_call.c b/drivers/xint/test_smp_call.c
index 4272001d9aca..a0c681d5c5ba 100644
--- a/drivers/xint/test_smp_call.c
+++ b/drivers/xint/test_smp_call.c
@@ -19,8 +19,8 @@ static void ipi_func(void *arg)
//向所有cpu都发送smp_call_function_tlbi
static int test_init(void)
{
-// smp_call_function_tlbi(cpu_online_mask, ipi_func, &tmp);
- on_each_cpu(ipi_func, &tmp, true);
+ on_each_cpu_mask_tlbi(cpu_online_mask, ipi_func, &tmp, true);
+
return 0;
}
diff --git a/include/linux/smp.h b/include/linux/smp.h
index 67b4738e6d4b..2e402d91f40b 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -169,6 +169,10 @@ void generic_smp_call_function_single_interrupt(void);
#define generic_smp_call_function_interrupt \
generic_smp_call_function_single_interrupt
+void generic_smp_call_function_single_interrupt_tlbi(void);
+#define generic_smp_call_function_interrupt_tlbi \
+ generic_smp_call_function_single_interrupt_tlbi
+
/*
* Mark the boot cpu "online" so that it can call console drivers in
* printk() and can access its per-cpu storage.
diff --git a/kernel/smp.c b/kernel/smp.c
index cedb47df4579..b05edcb7a488 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -44,6 +44,7 @@ struct call_function_data {
static DEFINE_PER_CPU_ALIGNED(struct call_function_data, cfd_data);
static DEFINE_PER_CPU_SHARED_ALIGNED(struct llist_head, call_single_queue);
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct llist_head, call_single_queue_tlbi);
static void flush_smp_call_function_queue(bool warn_cpu_offline);
@@ -107,8 +108,10 @@ void __init call_function_init(void)
{
int i;
- for_each_possible_cpu(i)
+ for_each_possible_cpu(i) {
init_llist_head(&per_cpu(call_single_queue, i));
+ init_llist_head(&per_cpu(call_single_queue_tlbi, i));
+ }
smpcfd_prepare_cpu(smp_processor_id());
}
@@ -366,6 +369,50 @@ void generic_smp_call_function_single_interrupt(void)
flush_smp_call_function_queue(true);
}
+static void flush_smp_call_function_queue_tlbi(bool warn_cpu_offline)
+{
+ call_single_data_t *csd, *csd_next;
+ struct llist_node *entry, *prev;
+ struct llist_head *head;
+
+ lockdep_assert_irqs_disabled();
+
+ head = this_cpu_ptr(&call_single_queue_tlbi);
+ entry = llist_del_all(head);
+ entry = llist_reverse_order(entry);
+
+ //只服务于tlbi,全都是CSD_TYPE_SYNC类型
+ /*
+ * First; run all SYNC callbacks, people are waiting for us.
+ */
+ prev = NULL;
+ llist_for_each_entry_safe(csd, csd_next, entry, node.llist) {
+ /* Do we wait until *after* callback? */
+ if (CSD_TYPE(csd) == CSD_TYPE_SYNC) {
+ smp_call_func_t func = csd->func;
+ void *info = csd->info;
+
+ if (prev) {
+ prev->next = &csd_next->node.llist;
+ } else {
+ entry = &csd_next->node.llist;
+ }
+
+ csd_lock_record(csd);
+ func(info);
+ csd_unlock(csd);
+ csd_lock_record(NULL);
+ } else {
+ prev = &csd->node.llist;
+ }
+ }
+}
+
+void generic_smp_call_function_single_interrupt_tlbi(void)
+{
+ flush_smp_call_function_queue_tlbi(true);
+}
+
/**
* flush_smp_call_function_queue - Flush pending smp-call-function callbacks
*
@@ -771,10 +818,60 @@ static void smp_call_function_many_cond_tlbi(const struct cpumask *mask,
unsigned int scf_flags,
smp_cond_func_t cond_func)
{
+#if 0
ipi_tlbi_func = func;
ipi_tlbi_info = info;
arch_send_call_function_ipi_mask_tlbi(mask);
+#endif
+
+ int cpu, this_cpu = smp_processor_id();
+ struct call_function_data *cfd;
+ bool run_remote = false;
+ bool run_local = false;
+
+ cfd = this_cpu_ptr(&cfd_data);
+ cpumask_and(cfd->cpumask, mask, cpu_online_mask);
+
+ /* Check if we need local execution. */
+ if (cpumask_test_cpu(this_cpu, cfd->cpumask))
+ run_local = true;
+
+ /* Check if we need remote execution, i.e., any CPU excluding this one. */
+ __cpumask_clear_cpu(this_cpu, cfd->cpumask);
+ if (!cpumask_empty(mask))
+ run_remote = true;
+
+ if (run_remote) {
+ for_each_cpu(cpu, cfd->cpumask) {
+ call_single_data_t *csd = per_cpu_ptr(cfd->csd, cpu);
+
+ csd_lock(csd);
+ csd->node.u_flags |= CSD_TYPE_SYNC;
+ csd->func = func;
+ csd->info = info;
+ llist_add(&csd->node.llist, &per_cpu(call_single_queue_tlbi, cpu));
+ }
+
+ arch_send_call_function_ipi_mask_tlbi(cfd->cpumask);
+ }
+
+ if (run_local) {
+ unsigned long flags;
+
+ local_irq_save(flags);
+ func(info);
+ local_irq_restore(flags);
+ }
+
+ if (run_remote) {
+ for_each_cpu(cpu, cfd->cpumask) {
+ call_single_data_t *csd;
+
+ csd = per_cpu_ptr(cfd->csd, cpu);
+ csd_lock_wait(csd);
+ }
+ }
}
/**
@@ -946,13 +1043,8 @@ EXPORT_SYMBOL(on_each_cpu_cond_mask);
void on_each_cpu_cond_mask_tlbi(smp_cond_func_t cond_func, smp_call_func_t func,
void *info, bool wait, const struct cpumask *mask)
{
- unsigned int scf_flags = SCF_RUN_LOCAL;
-
- if (wait)
- scf_flags |= SCF_WAIT;
-
preempt_disable();
- smp_call_function_many_cond_tlbi(mask, func, info, scf_flags, cond_func);
+ smp_call_function_many_cond_tlbi(mask, func, info, 0, cond_func);
preempt_enable();
}
EXPORT_SYMBOL(on_each_cpu_cond_mask_tlbi);
--
CodeHub
From 1ca7aa7c99f02f752bfefb08adbd1e4c11a70368 Mon Sep 17 00:00:00 2001
From: Zhang Jianhua
chris.zjh@huawei.com
Date: Sun, 8 Sep 2024 11:11:35 +0800
Subject: [PATCH 07/10] [Huawei] arm64: xint: support sgi
hulk inclusion
category: feature
bugzilla: NA
DTS: NA
CVE: NA
--------
Signed-off-by: Zhang Jianhua
chris.zjh@huawei.com
---
drivers/irqchip/irq-gic-v3.c | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 4f7966fde66e..1737d02137fa 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -825,6 +825,10 @@ static void update_xcache(struct irq_desc *desc, unsigned int hwirq, bool valid)
static void xint_insert_desc(unsigned int hwirq, struct irq_desc *desc)
{
switch (__get_intid_range(hwirq)) {
+ case SGI_RANGE:
+ xint_desc_array[hwirq] = desc;
+ return;
+
case SPI_RANGE:
xint_desc_array[hwirq] = desc;
return;
@@ -841,6 +845,9 @@ static void xint_insert_desc(unsigned int hwirq, struct irq_desc *desc)
struct irq_desc *xint_to_desc(unsigned int hwirq)
{
switch (__get_intid_range(hwirq)) {
+ case SGI_RANGE:
+ return xint_desc_array[hwirq];
+
case SPI_RANGE:
return xint_desc_array[hwirq];
@@ -855,6 +862,10 @@ struct irq_desc *xint_to_desc(unsigned int hwirq)
static void xint_delete_desc(unsigned int hwirq)
{
switch (__get_intid_range(hwirq)) {
+ case SGI_RANGE:
+ xint_desc_array[hwirq] = NULL;
+ return;
+
case SPI_RANGE:
xint_desc_array[hwirq] = NULL;
return;
--
CodeHub
From caba06c89b5ca237abe48132ec68a550fa61c620 Mon Sep 17 00:00:00 2001
From: Zhang Jianhua
chris.zjh@huawei.com
Date: Sun, 8 Sep 2024 16:21:49 +0800
Subject: [PATCH 08/10] replace on_each_cpu_mask with on_each_cpu_mask_tlbi for
tlbi
hulk inclusion
category: feature
bugzilla: NA
DTS: NA
CVE: NA
--------
Signed-off-by: Zhang Jianhua
chris.zjh@huawei.com
---
arch/arm64/kernel/tlbflush.c | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/arch/arm64/kernel/tlbflush.c b/arch/arm64/kernel/tlbflush.c
index 82ed8b21ba75..305d9a119f85 100644
--- a/arch/arm64/kernel/tlbflush.c
+++ b/arch/arm64/kernel/tlbflush.c
@@ -44,7 +44,9 @@ static inline void ipi_flush_tlb_mm(void *arg)
void flush_tlb_mm(struct mm_struct *mm)
{
if (disable_tlbflush_is)
- on_each_cpu_mask(mm_cpumask(mm), ipi_flush_tlb_mm,
+// on_each_cpu_mask(mm_cpumask(mm), ipi_flush_tlb_mm,
+// (void *)mm, true);
+ on_each_cpu_mask_tlbi(mm_cpumask(mm), ipi_flush_tlb_mm,
(void *)mm, true);
else
__flush_tlb_mm(mm);
@@ -77,7 +79,9 @@ void flush_tlb_page_nosync(struct vm_area_struct *vma, unsigned long uaddr)
unsigned long addr = __TLBI_VADDR(uaddr, ASID(vma->vm_mm));
if (disable_tlbflush_is)
- on_each_cpu_mask(mm_cpumask(vma->vm_mm),
+// on_each_cpu_mask(mm_cpumask(vma->vm_mm),
+// ipi_flush_tlb_page_nosync, &addr, true);
+ on_each_cpu_mask_tlbi(mm_cpumask(vma->vm_mm),
ipi_flush_tlb_page_nosync, &addr, true);
else
__flush_tlb_page_nosync(addr);
@@ -261,7 +265,9 @@ void __flush_tlb_range(struct vm_area_struct *vma,
.ta_tlb_level = tlb_level,
};
- on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_range,
+// on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_range,
+// &ta, true);
+ on_each_cpu_mask_tlbi(mm_cpumask(vma->vm_mm), ipi_flush_tlb_range,
&ta, true);
} else
___flush_tlb_range(vma, start, end, stride, last_level, tlb_level);
--
CodeHub
From 2f0f9917658e8b7f23aae59fc2ab03dde8eced0d Mon Sep 17 00:00:00 2001
From: Zhang Jianhua
chris.zjh@huawei.com
Date: Wed, 11 Sep 2024 10:46:43 +0800
Subject: [PATCH 09/10] add virq for /proc/interrupts
hulk inclusion
category: feature
bugzilla: NA
DTS: NA
CVE: NA
--------
Signed-off-by: Zhang Jianhua
chris.zjh@huawei.com
---
arch/arm64/kernel/smp.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index d2549052f22c..1a6b04f901df 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -1067,7 +1067,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
prec >= 4 ? " " : "");
for_each_online_cpu(cpu)
seq_printf(p, "%10u ", kstat_irqs_cpu(irq, cpu));
- seq_printf(p, " %s\n", ipi_types[i]);
+ seq_printf(p, " %s, virq: %d\n", ipi_types[i], irq);
}
#ifdef CONFIG_RTOS_HAL_SHARE_IPI
arch_show_shared_ipi(p, prec);
--
CodeHub
From 64e360b222e8243e156f93429862d863201e42c5 Mon Sep 17 00:00:00 2001
From: Zhang Jianhua
chris.zjh@huawei.com
Date: Wed, 11 Sep 2024 11:14:10 +0800
Subject: [PATCH 10/10] simplify tlbi flow
hulk inclusion
category: feature
bugzilla: NA
DTS: NA
CVE: NA
--------
Signed-off-by: Zhang Jianhua
chris.zjh@huawei.com
---
arch/arm64/kernel/smp.c | 4 +++-
drivers/irqchip/irq-gic-v3.c | 22 +++++++++++++++++++
kernel/smp.c | 42 +++++++++++++++---------------------
3 files changed, 42 insertions(+), 26 deletions(-)
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 1a6b04f901df..940436102078 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -1087,9 +1087,11 @@ void arch_send_call_function_single_ipi(int cpu)
smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC);
}
+extern void gic_ipi_send_mask_tlbi(int hwirq, const struct cpumask *mask);
void arch_send_call_function_ipi_mask_tlbi(const struct cpumask *mask)
{
- smp_cross_call(mask, IPI_TLBI);
+// smp_cross_call(mask, IPI_TLBI);
+ gic_ipi_send_mask_tlbi(IPI_TLBI, mask);
}
#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 1737d02137fa..8bb4ac6cff0b 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -1793,6 +1793,28 @@ static void gic_send_sgi(u64 cluster_id, u16 tlist, unsigned int irq)
gic_write_sgi1r(val);
}
+void gic_ipi_send_mask_tlbi(int hwirq, const struct cpumask *mask)
+{
+ int cpu;
+
+ /*
+ * Ensure that stores to Normal memory are visible to the
+ * other CPUs before issuing the IPI.
+ */
+ wmb();
+
+ for_each_cpu(cpu, mask) {
+ u64 cluster_id = MPIDR_TO_SGI_CLUSTER_ID(cpu_logical_map(cpu));
+ u16 tlist;
+
+ tlist = gic_compute_target_list(&cpu, mask, cluster_id);
+ gic_send_sgi(cluster_id, tlist, hwirq);
+ }
+
+ /* Force the above writes to ICC_SGI1R_EL1 to be executed */
+ isb();
+}
+
static void gic_ipi_send_mask(struct irq_data *d, const struct cpumask *mask)
{
int cpu;
diff --git a/kernel/smp.c b/kernel/smp.c
index b05edcb7a488..55b8fe89457f 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -372,10 +372,11 @@ void generic_smp_call_function_single_interrupt(void)
static void flush_smp_call_function_queue_tlbi(bool warn_cpu_offline)
{
call_single_data_t *csd, *csd_next;
- struct llist_node *entry, *prev;
+ //struct llist_node *entry, *prev;
+ struct llist_node *entry;
struct llist_head *head;
- lockdep_assert_irqs_disabled();
+// lockdep_assert_irqs_disabled();
head = this_cpu_ptr(&call_single_queue_tlbi);
entry = llist_del_all(head);
@@ -385,26 +386,26 @@ static void flush_smp_call_function_queue_tlbi(bool warn_cpu_offline)
/*
* First; run all SYNC callbacks, people are waiting for us.
*/
- prev = NULL;
+// prev = NULL;
llist_for_each_entry_safe(csd, csd_next, entry, node.llist) {
/* Do we wait until *after* callback? */
- if (CSD_TYPE(csd) == CSD_TYPE_SYNC) {
+// if (CSD_TYPE(csd) == CSD_TYPE_SYNC) {
smp_call_func_t func = csd->func;
void *info = csd->info;
- if (prev) {
- prev->next = &csd_next->node.llist;
- } else {
+// if (prev) {
+// prev->next = &csd_next->node.llist;
+// } else {
entry = &csd_next->node.llist;
- }
+// }
- csd_lock_record(csd);
+// csd_lock_record(csd);
func(info);
csd_unlock(csd);
- csd_lock_record(NULL);
- } else {
- prev = &csd->node.llist;
- }
+// csd_lock_record(NULL);
+// } else {
+// prev = &csd->node.llist;
+// }
}
}
@@ -814,17 +815,8 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
}
static void smp_call_function_many_cond_tlbi(const struct cpumask *mask,
- smp_call_func_t func, void *info,
- unsigned int scf_flags,
- smp_cond_func_t cond_func)
+ smp_call_func_t func, void *info)
{
-#if 0
- ipi_tlbi_func = func;
- ipi_tlbi_info = info;
-
- arch_send_call_function_ipi_mask_tlbi(mask);
-#endif
-
int cpu, this_cpu = smp_processor_id();
struct call_function_data *cfd;
bool run_remote = false;
@@ -847,7 +839,7 @@ static void smp_call_function_many_cond_tlbi(const struct cpumask *mask,
call_single_data_t *csd = per_cpu_ptr(cfd->csd, cpu);
csd_lock(csd);
- csd->node.u_flags |= CSD_TYPE_SYNC;
+ //csd->node.u_flags |= CSD_TYPE_SYNC;
csd->func = func;
csd->info = info;
llist_add(&csd->node.llist, &per_cpu(call_single_queue_tlbi, cpu));
@@ -1044,7 +1036,7 @@ void on_each_cpu_cond_mask_tlbi(smp_cond_func_t cond_func, smp_call_func_t func,
void *info, bool wait, const struct cpumask *mask)
{
preempt_disable();
- smp_call_function_many_cond_tlbi(mask, func, info, 0, cond_func);
+ smp_call_function_many_cond_tlbi(mask, func, info);
preempt_enable();
}
EXPORT_SYMBOL(on_each_cpu_cond_mask_tlbi);
--
CodeHub