hulk inclusion category: feature bugzilla: https://atomgit.com/openeuler/kernel/issues/8929 ---------------------------------------- SMT QoS leverage lightweight dedicated IPIs to expedite WFI sleep for offline tasks, ensuring they are promptly preempted upon online task arrival. Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com> --- arch/arm64/include/asm/cpufeature.h | 5 ++ arch/arm64/include/asm/xint.h | 15 ++++++ arch/arm64/kernel/Makefile | 1 + arch/arm64/kernel/entry-common.c | 16 ++++++ arch/arm64/kernel/entry.S | 14 +++-- arch/arm64/kernel/smp.c | 23 ++++++++ arch/arm64/kernel/smt_qos.c | 84 +++++++++++++++++++++++++++++ arch/arm64/kernel/xcall/entry.S | 78 +++++++++++++++++++++++++++ drivers/irqchip/irq-gic-v3.c | 43 +++++++++++++++ kernel/sched/fair.c | 44 +++++++++++++++ 10 files changed, 320 insertions(+), 3 deletions(-) create mode 100644 arch/arm64/include/asm/xint.h create mode 100644 arch/arm64/kernel/smt_qos.c diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 6f73a51d2422..fed81fd3baf2 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -851,6 +851,11 @@ static __always_inline bool system_uses_xcall_xint(void) cpus_have_const_cap(ARM64_HAS_HW_XCALL_XINT); } +static __always_inline bool system_uses_xint(void) +{ + return IS_ENABLED(CONFIG_FAST_IRQ) && cpus_have_const_cap(ARM64_HAS_XINT); +} + static __always_inline bool system_uses_irq_prio_masking(void) { return IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && diff --git a/arch/arm64/include/asm/xint.h b/arch/arm64/include/asm/xint.h new file mode 100644 index 000000000000..00ab27b327fa --- /dev/null +++ b/arch/arm64/include/asm/xint.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_XINT_H +#define __ASM_XINT_H + +#define NR_IPI_USER 7 // SGI + +#ifndef __ASSEMBLY__ +#include <linux/topology.h> + +extern void gic_handle_irq_noack(struct pt_regs *regs); +extern void gic_handle_nmi_noack(struct pt_regs *regs); +extern void arch_smp_send_ipi_user(int cpu); +extern bool should_restrict(void); +#endif /* __ASSEMBLY__ */ +#endif /* __ASM_XINT_H */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 300bfcb8a890..b61c72715daa 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -80,6 +80,7 @@ obj-y += vdso-wrap.o obj-$(CONFIG_COMPAT_VDSO) += vdso32-wrap.o obj-$(CONFIG_ARM64_ILP32) += vdso-ilp32/ obj-$(CONFIG_FAST_SYSCALL) += xcall/ +obj-$(CONFIG_SMT_QOS) += smt_qos.o obj-$(CONFIG_UNWIND_PATCH_PAC_INTO_SCS) += patch-scs.o obj-$(CONFIG_IPI_AS_NMI) += ipi_nmi.o obj-$(CONFIG_HISI_VIRTCCA_GUEST) += virtcca_cvm_guest.o virtcca_cvm_tsi.o diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index c72993bb4563..55f416ea0303 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -26,6 +26,7 @@ #include <asm/stacktrace.h> #include <asm/sysreg.h> #include <asm/system_misc.h> +#include <asm/xint.h> /* * Handle IRQ/context state management when entering from kernel mode. @@ -945,6 +946,21 @@ static void noinstr __el0_irq_handler_common(struct pt_regs *regs) el0_interrupt(regs, ISR_EL1_IS, handle_arch_irq, handle_arch_nmi_irq); } +#ifdef CONFIG_FAST_IRQ +DECLARE_PER_CPU(u32, cpu_iar); +/* + * The generic exception handler for SPI and LPI taken from EL0 on eary CPUs + * before 920G. Most of code comes from el0_interrupt(), except that it pass + * irqnr to GIC driver via per-cpu variable due to the IRQ Ack is completed + * in entry code. + */ +asmlinkage void noinstr el0t_64_acked_irq_handler(struct pt_regs *regs, u32 irqnr) +{ + this_cpu_write(cpu_iar, irqnr); + el0_interrupt(regs, ISR_EL1_IS, gic_handle_irq_noack, gic_handle_nmi_noack); +} +#endif /* CONFIG_FAST_IRQ */ + asmlinkage void noinstr el0t_64_irq_handler(struct pt_regs *regs) { __el0_irq_handler_common(regs); diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 039ec8d40899..d0518a31b689 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -554,7 +554,7 @@ tsk .req x28 // current thread_info .text -#ifdef CONFIG_FAST_SYSCALL +#if defined(CONFIG_FAST_SYSCALL) || defined(CONFIG_FAST_IRQ) #include "xcall/entry.S" #endif @@ -579,8 +579,12 @@ SYM_CODE_START(vectors) sync_ventry // Synchronous 64-bit EL0 #else kernel_ventry 0, t, 64, sync // Synchronous 64-bit EL0 -#endif +#endif /* CONFIG_FAST_SYSCALL */ +#ifdef CONFIG_FAST_IRQ + irq_ventry // XINT 64-bit EL0 +#else kernel_ventry 0, t, 64, irq // IRQ 64-bit EL0 +#endif kernel_ventry 0, t, 64, fiq // FIQ 64-bit EL0 kernel_ventry 0, t, 64, error // Error 64-bit EL0 @@ -607,8 +611,12 @@ SYM_CODE_START(vectors_xcall_xint) sync_ventry // Synchronous 64-bit EL0 #else kernel_ventry 0, t, 64, sync // Synchronous 64-bit EL0 -#endif +#endif /* CONFIG_FAST_SYSCALL */ +#ifdef CONFIG_FAST_IRQ + irq_ventry // XINT 64-bit EL0 +#else kernel_ventry 0, t, 64, irq // IRQ 64-bit EL0 +#endif kernel_ventry 0, t, 64, fiq // FIQ 64-bit EL0 kernel_ventry 0, t, 64, error // Error 64-bit EL0 diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index dfdc7b2b3c3f..e0f450aea847 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -55,6 +55,7 @@ #include <asm/tlbflush.h> #include <asm/ptrace.h> #include <asm/virt.h> +#include <asm/xint.h> #include <trace/events/ipi.h> @@ -78,6 +79,9 @@ enum ipi_msg_type { IPI_TIMER, IPI_IRQ_WORK, IPI_WAKEUP, +#ifdef CONFIG_SMT_QOS + IPI_USER, +#endif NR_IPI }; @@ -806,6 +810,9 @@ static const char *ipi_types[NR_IPI] __tracepoint_string = { [IPI_TIMER] = "Timer broadcast interrupts", [IPI_IRQ_WORK] = "IRQ work interrupts", [IPI_WAKEUP] = "CPU wake-up interrupts", +#ifdef CONFIG_SMT_QOS + [IPI_USER] = "Userspace IPI", +#endif }; static void smp_cross_call(const struct cpumask *target, unsigned int ipinr); @@ -961,6 +968,11 @@ static void do_handle_IPI(int ipinr) cpu); break; #endif +#ifdef CONFIG_SMT_QOS + case IPI_USER: + /* Do nothing */ + break; +#endif default: pr_crit("CPU%u: Unknown IPI message 0x%x\n", cpu, ipinr); @@ -1062,6 +1074,17 @@ void tick_broadcast(const struct cpumask *mask) } #endif +#ifdef CONFIG_SMT_QOS +void arch_smp_send_ipi_user(int cpu) +{ + struct irq_desc *desc = ipi_desc[IPI_USER]; + struct irq_data *data = irq_desc_get_irq_data(desc); + struct irq_chip *chip = irq_data_get_irq_chip(data); + + chip->ipi_send_mask(data, cpumask_of(cpu)); +} +#endif + /* * The number of CPUs online, not counting this CPU (which may not be * fully online and so not counted in num_online_cpus()). diff --git a/arch/arm64/kernel/smt_qos.c b/arch/arm64/kernel/smt_qos.c new file mode 100644 index 000000000000..4e97d1fea7b7 --- /dev/null +++ b/arch/arm64/kernel/smt_qos.c @@ -0,0 +1,84 @@ +// SPDX-License-Identifier: GPL-2.0 +#define pr_fmt(fmt) "smt_qos: " fmt + +#include <linux/module.h> + +#include <asm/arch_gicv3.h> +#include <asm/cpuidle.h> +#include <asm/daifflags.h> +#include <asm/timex.h> +#include <asm/xint.h> + +#include <vdso/time64.h> + +static unsigned int sysctl_sched_wfi_timeout = 50; +static DEFINE_STATIC_KEY_TRUE(split_mode); + +static void irq_complete(u32 irqnr) +{ + if (static_branch_likely(&split_mode)) + write_gicreg(irqnr, ICC_EOIR1_EL1); + isb(); +} + +static void irq_deactive(u32 irqnr) +{ + if (static_branch_likely(&split_mode)) { + gic_write_dir(irqnr); + } else { + write_gicreg(irqnr, ICC_EOIR1_EL1); + isb(); + } +} + +static __always_inline void throttle_offline(void) +{ + cycles_t start, end; + u64 delta_us = 0; + + local_daif_restore(DAIF_PROCCTX); + + start = get_cycles(); + while (delta_us < sysctl_sched_wfi_timeout && should_restrict()) { + cpu_do_idle(); + end = get_cycles(); + delta_us = (end - start) * USEC_PER_SEC / arch_timer_get_cntfrq(); + } + + local_daif_mask(); +} + +asmlinkage void el0_xint_ipi_handler(struct pt_regs *regs) +{ + irq_complete(NR_IPI_USER); + irq_deactive(NR_IPI_USER); + throttle_offline(); +} + +static struct ctl_table sched_wfi_timeout_sysctl_table[] = { + { + .procname = "sched_wfi_timeout_us", + .data = &sysctl_sched_wfi_timeout, + .maxlen = sizeof(sysctl_sched_wfi_timeout), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE_THOUSAND, + }, + {} +}; + +static int __init xint_init(void) +{ + if (!system_uses_xint()) + return 0; + + register_sysctl_init("kernel", sched_wfi_timeout_sysctl_table); + + if (!is_hyp_mode_available()) + static_branch_disable(&split_mode); + + pr_info("GIC split mode enabled: %d\n", static_key_enabled(&split_mode)); + return 0; +} +module_init(xint_init); diff --git a/arch/arm64/kernel/xcall/entry.S b/arch/arm64/kernel/xcall/entry.S index d5ed68db1547..eb5994352bd1 100644 --- a/arch/arm64/kernel/xcall/entry.S +++ b/arch/arm64/kernel/xcall/entry.S @@ -291,3 +291,81 @@ alternative_else_nop_endif br x20 .org .Lventry_start\@ + 128 // Did we overflow the ventry slot? .endm + +#ifdef CONFIG_FAST_IRQ +#include <asm/xint.h> + +SYM_CODE_START_LOCAL(el0t_64_irq_entry) + ldp x20, x21, [sp, #16 * 10] + kernel_entry 0, 64 + mov x0, sp + ldr x1, [sp, #(S_SYSCALLNO - 8)] + bl el0t_64_acked_irq_handler + b ret_to_user +SYM_CODE_END(el0t_64_irq_entry) + +#ifdef CONFIG_SMT_QOS +SYM_CODE_START_LOCAL(el0_xint_ipi) + ldp x20, x21, [sp, #16 * 10] + hw_xcall_save_base_regs + mov x0, sp + bl el0_xint_ipi_handler + hw_xcall_restore_base_regs +SYM_CODE_END(el0_xint_ipi) +#endif + +SYM_CODE_START_LOCAL(el0t_64_irq_table) + /* Add more of SGIs or PPIs handled in el0 here */ + .rept NR_IPI_USER + .word el0t_64_irq_table - el0t_64_irq_entry + .endr +#ifdef CONFIG_SMT_QOS + .word el0t_64_irq_table - el0_xint_ipi +#else + .word el0t_64_irq_table - el0t_64_irq_entry +#endif + .rept 31 - NR_IPI_USER + .word el0t_64_irq_table - el0t_64_irq_entry + .endr +SYM_CODE_END(el0t_64_irq_table) + + .macro irq_ventry + .align 7 +.Lventry_start\@: + /* + * This must be the first instruction of the EL0 vector entries. It is + * skipped by the trampoline vectors, to trigger the cleanup. + */ + b .Lskip_tramp_vectors_cleanup\@ + mrs x30, tpidrro_el0 + msr tpidrro_el0, xzr +.Lskip_tramp_vectors_cleanup\@: + sub sp, sp, #PT_REGS_SIZE +alternative_if_not ARM64_HAS_XINT + b el0t_64_irq +alternative_else_nop_endif + stp x20, x21, [sp, #16 * 10] +alternative_if ARM64_USES_NMI + mrs x21, isr_el1 + tbz x21, ISR_EL1_IS_SHIFT, 0f + mrs_s x21, SYS_ICC_NMIAR1_EL1 + dsb sy + b 1f +alternative_else_nop_endif +0: + mrs x21, icc_iar1_el1 + dsb sy +1: + /* Save irqnr for use later */ + str x21, [sp, #(S_SYSCALLNO - 8)] + /* All SPI and LPI back to kernel native entry */ + cmp x21, 32 + b.ge el0t_64_irq_entry + /* Using jump table for different SGIs and PPIs */ + adr x20, el0t_64_irq_table + ldr w21, [x20, x21, lsl #2] + sub x20, x20, x21 + br x20 +.org .Lventry_start\@ + 128 // Did we overflow the ventry slot? + .endm +#endif /* CONFIG_FAST_IRQ */ diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 849d2e0db4fd..83f836609c13 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -1003,6 +1003,49 @@ static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs } #ifdef CONFIG_FAST_IRQ +#include <asm/xint.h> + +/* + * Since the IRQ is taken from EL0 and IRQ Ack completed in entry code, + * So no need to read IAR here, Most of code comes from + * __gic_handle_irq_from_irqson(). + */ +DEFINE_PER_CPU(u32, cpu_iar); +asmlinkage void __exception_irq_entry gic_handle_irq_noack(struct pt_regs *regs) +{ + bool is_nmi; + u32 irqnr; + + irqnr = this_cpu_read(cpu_iar); + + is_nmi = gic_rpr_is_nmi_prio(); + + if (is_nmi) { + nmi_enter(); + __gic_handle_nmi(irqnr, regs); + nmi_exit(); + } + + if (gic_prio_masking_enabled()) { + gic_pmr_mask_irqs(); + gic_arch_enable_irqs(); + } else if (has_v3_3_nmi()) { +#ifdef CONFIG_ARM64_NMI + _allint_clear(); +#endif + } + + if (!is_nmi) + __gic_handle_irq(irqnr, regs); +} + +asmlinkage void __exception_irq_entry gic_handle_nmi_noack(struct pt_regs *regs) +{ + u32 irqnr = this_cpu_read(cpu_iar); + + __gic_handle_nmi(irqnr, regs); +} + DECLARE_BITMAP(irqnr_xint_map, 1024); static bool can_set_xint(unsigned int hwirq) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 3e9f0b8070b8..d79892871835 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -9322,6 +9322,8 @@ static int wake_soft_domain(struct task_struct *p, int target) #endif #ifdef CONFIG_SMT_QOS +#include <asm/xint.h> + static DEFINE_PER_CPU_ALIGNED(cpumask_t, smt_prefer_cpus); static unsigned long numa_smt_util[MAX_NUMNODES]; /* @@ -9412,6 +9414,45 @@ static inline void smt_qos_restore_task_select_cpus(struct task_struct *p, p->select_cpus = backup_select_cpus; } +bool should_restrict(void) +{ + int this_cpu = smp_processor_id(); + int cpu; + + if (idle_cpu(this_cpu)) + return false; + + for_each_cpu(cpu, cpu_smt_mask(this_cpu)) { + if (cpu == this_cpu) + continue; + + /* SMT master CPU is idle, need not throttle */ + if (idle_cpu(cpu)) + return false; + + /* SMT master CPU has finished online task */ + if (per_cpu(qos_smt_status, cpu) < QOS_LEVEL_ONLINE) + return false; + } + + return true; +} + +static void send_ipi_throttle_smt(int this_cpu) +{ + int cpu; + + if (!system_uses_xint()) + return; + + for_each_cpu(cpu, cpu_smt_mask(this_cpu)) { + if (cpu == this_cpu) + continue; + + arch_smp_send_ipi_user(cpu); + } +} + static inline void smt_qos_update_qos_level(int cpu, struct task_struct *p) { int new_status; @@ -9425,6 +9466,9 @@ static inline void smt_qos_update_qos_level(int cpu, struct task_struct *p) return; __this_cpu_write(qos_smt_status, new_status); + + if (cpumask_test_cpu(cpu, &master_smt_cpumask)) + send_ipi_throttle_smt(cpu); } static inline bool is_slave_to_master(int src_cpu, int dst_cpu) -- 2.34.1