hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/release-management/issues/IB6JLE
--------------------------------
Introduce xint software solution for kernel, it provides a lightweight interrupt processing framework for latency-sensitive interrupts, and enabled dynamically for each irq by /proc/irq/<irq>/xint interface.
The main implementation schemes are as follows: 1. For a small number of latency-sensitive interrupts, it could be configured as xint state, and process irq by xint framework instead of the kernel general interrupt framework, so improve performance by remove unnecessary processes. It is not recommended to configure too many interrupts as xint in the system, as this will affect system stability to some extent. 2. For each SGI/PPI/SPI interrupts whoes irq numbers are consecutive and limited, use a bitmap to check whether a hwirq is xint.
Signed-off-by: Zhang Jianhua chris.zjh@huawei.com Signed-off-by: Jinjie Ruan ruanjinjie@huawei.com --- arch/Kconfig | 25 +++++- arch/arm64/Kconfig | 1 + arch/arm64/kernel/cpufeature.c | 23 ++++++ arch/arm64/kernel/entry-common.c | 113 ++++++++++++++++++++++++++++ arch/arm64/kernel/entry.S | 57 ++++++++++++++ arch/arm64/tools/cpucaps | 1 + drivers/irqchip/irq-gic-v3.c | 117 +++++++++++++++++++++++++++++ include/linux/hardirq.h | 5 ++ include/linux/irqchip/arm-gic-v3.h | 13 ++++ kernel/irq/proc.c | 6 ++ kernel/softirq.c | 47 ++++++++++++ 11 files changed, 406 insertions(+), 2 deletions(-)
diff --git a/arch/Kconfig b/arch/Kconfig index d2da20c06454..98116fbfcff6 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -1549,9 +1549,30 @@ config FAST_SYSCALL exception handling path that only considers necessary features such as security, context saving, and recovery.
+config ARCH_SUPPORTS_FAST_IRQ + bool + +config FAST_IRQ + bool "Fast irq support" + depends on ARCH_SUPPORTS_FAST_IRQ + default n + help + The irq handling process, which includes auxiliary + functions for debug/trace and core functions like + KPTI, interrupt time record, interrupt processing as + a random number source, interrupt affinity + modification and interrupt processing race, as well as + spurious and unhandled interrupt debugging, has been + identified as overly "lengthy". + To address this, we introduce the concept of fast irq, + a fast interrupt handling path that only considers + necessary features such as security, context saving + and recovery, which adds an lightweight interrupt processing + framework for latency-sensitive interrupts. + config DEBUG_FEATURE_BYPASS bool "Bypass debug feature in fast syscall" - depends on FAST_SYSCALL + depends on FAST_SYSCALL || FAST_IRQ default y help This to bypass debug feature in fast syscall. @@ -1563,7 +1584,7 @@ config DEBUG_FEATURE_BYPASS
config SECURITY_FEATURE_BYPASS bool "Bypass security feature in fast syscall" - depends on FAST_SYSCALL + depends on FAST_SYSCALL || FAST_IRQ default y help This to bypass security feature in fast syscall. diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 2fd2a1712875..14eb378b1a47 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -262,6 +262,7 @@ config ARM64 select TRACE_IRQFLAGS_NMI_SUPPORT select HAVE_SOFTIRQ_ON_OWN_STACK select ARCH_SUPPORTS_FAST_SYSCALL if !ARM64_MTE && !KASAN_HW_TAGS + select ARCH_SUPPORTS_FAST_IRQ if ARM_GIC_V3 && !ARM64_MTE && !KASAN_HW_TAGS help ARM 64-bit (AArch64) Linux support.
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 6f0ce5b830f8..8f6bb2754592 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2395,6 +2395,21 @@ static bool has_xcall_support(const struct arm64_cpu_capabilities *entry, int __ } #endif
+#ifdef CONFIG_FAST_IRQ +bool is_xint_support; +static int __init xint_setup(char *str) +{ + is_xint_support = true; + return 1; +} +__setup("xint", xint_setup); + +static bool has_xint_support(const struct arm64_cpu_capabilities *entry, int __unused) +{ + return is_xint_support; +} +#endif + static const struct arm64_cpu_capabilities arm64_features[] = { { .capability = ARM64_ALWAYS_BOOT, @@ -2919,6 +2934,14 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_xcall_support, }, +#endif +#ifdef CONFIG_FAST_IRQ + { + .desc = "Xint Support", + .capability = ARM64_HAS_XINT, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_xint_support, + }, #endif {}, }; diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 4bd7feae98e9..9026ab4bb251 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -512,6 +512,119 @@ static __always_inline void __el1_pnmi(struct pt_regs *regs, arm64_exit_nmi(regs); }
+#ifdef CONFIG_FAST_IRQ +static __always_inline void __el1_xint(struct pt_regs *regs, + void (*handler)(struct pt_regs *)) +{ +#ifndef CONFIG_DEBUG_FEATURE_BYPASS + enter_from_kernel_mode(regs); +#endif + + xint_enter_rcu(); + do_interrupt_handler(regs, handler); + xint_exit_rcu(); + + arm64_preempt_schedule_irq(); + +#ifndef CONFIG_DEBUG_FEATURE_BYPASS + exit_to_kernel_mode(regs); +#endif +} + +static void noinstr el1_xint(struct pt_regs *regs, u64 nmi_flag, + void (*handler)(struct pt_regs *), + void (*nmi_handler)(struct pt_regs *)) +{ + /* Is there a NMI to handle? */ +#ifndef CONFIG_DEBUG_FEATURE_BYPASS + if (system_uses_nmi() && (read_sysreg(isr_el1) & nmi_flag)) { + __el1_nmi(regs, nmi_handler); + return; + } +#endif + + write_sysreg(DAIF_PROCCTX_NOIRQ, daif); + + if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs)) + __el1_pnmi(regs, handler); + else + __el1_xint(regs, handler); +} + +asmlinkage void noinstr el1h_64_xint_handler(struct pt_regs *regs) +{ + el1_xint(regs, ISR_EL1_IS, handle_arch_irq, handle_arch_nmi_irq); +} + +static __always_inline void xint_exit_to_user_mode_prepare(struct pt_regs *regs) +{ + unsigned long flags; + + local_daif_mask(); + + flags = read_thread_flags(); + if (unlikely(flags & _TIF_WORK_MASK)) + do_notify_resume(regs, flags); + +#ifndef CONFIG_DEBUG_FEATURE_BYPASS + lockdep_sys_exit(); +#endif +} + +static __always_inline void xint_exit_to_user_mode(struct pt_regs *regs) +{ + xint_exit_to_user_mode_prepare(regs); +#ifndef CONFIG_DEBUG_FEATURE_BYPASS + mte_check_tfsr_exit(); + __exit_to_user_mode(); +#endif +} + +static void noinstr el0_xint(struct pt_regs *regs, u64 nmi_flag, + void (*handler)(struct pt_regs *), + void (*nmi_handler)(struct pt_regs *)) +{ +#ifndef CONFIG_DEBUG_FEATURE_BYPASS + enter_from_user_mode(regs); + + /* Is there a NMI to handle? */ + if (system_uses_nmi() && (read_sysreg(isr_el1) & nmi_flag)) { + /* + * Any system with FEAT_NMI should have FEAT_CSV2 and + * not be affected by Spectre v2 so we don't mitigate + * here. + */ + + arm64_enter_nmi(regs); + do_interrupt_handler(regs, nmi_handler); + arm64_exit_nmi(regs); + + exit_to_user_mode(regs); + return; + } +#endif + + write_sysreg(DAIF_PROCCTX_NOIRQ, daif); + +#ifndef CONFIG_SECURITY_FEATURE_BYPASS + if (regs->pc & BIT(55)) + arm64_apply_bp_hardening(); +#endif + + xint_enter_rcu(); + do_interrupt_handler(regs, handler); + xint_exit_rcu(); + + xint_exit_to_user_mode(regs); +} + + +asmlinkage void noinstr el0t_64_xint_handler(struct pt_regs *regs) +{ + el0_xint(regs, ISR_EL1_IS, handle_arch_irq, handle_arch_nmi_irq); +} +#endif /* CONFIG_FAST_IRQ */ + static __always_inline void __el1_irq(struct pt_regs *regs, void (*handler)(struct pt_regs *)) { diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 3ac054d1c5e8..752272286e99 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -485,7 +485,9 @@ alternative_else_nop_endif add sp, sp, #PT_REGS_SIZE // restore sp
/* Ensure any device/NC reads complete */ + .if \fast_mode == std alternative_insn nop, "dmb sy", ARM64_WORKAROUND_1508412 + .endif
eret .endif @@ -653,6 +655,52 @@ SYM_CODE_END(__bad_stack) .endm #endif
+#ifdef CONFIG_FAST_IRQ +.macro check_xint_pre_kernel_entry el:req ht:req + stp x0, x1, [sp, #0] + stp x2, x3, [sp, #16] + + ldr x0, =irqnr_xint_map + /* get hpp irqnr */ + mrs_s x1, SYS_ICC_HPPIR1_EL1 + + /* xint hwirq can not exceed 1020 */ + cmp x1, 1020 + b.ge .Lskip_xint@ + + /* x2 = irqnr % 8 */ + and x2, x1, #7 + /* x3 = irqnr / 8 */ + lsr x3, x1, #3 + /* x1 is the byte of irqnr in irqnr_xint_map */ + ldr x1, [x0, x3] + + /* Get the check mask */ + mov x3, #1 + /* x3 = 1 << (irqnr % 8) */ + lsl x3, x3, x2 + + /* x1 = x1 & x3 */ + ands x1, x1, x3 + b.eq .Lskip_xint@ + + ldp x0, x1, [sp, #0] + ldp x2, x3, [sp, #16] +#ifdef CONFIG_SECURITY_FEATURE_BYPASS + kernel_entry \el, 64, xint +#else + kernel_entry \el, 64 +#endif + mov x0, sp + bl el\el\ht()_64_xint_handler + kernel_exit \el xint + +.Lskip_xint@: + ldp x0, x1, [sp, #0] + ldp x2, x3, [sp, #16] +.endm +#endif + .macro entry_handler el:req, ht:req, regsize:req, label:req SYM_CODE_START_LOCAL(el\el\ht()_\regsize()_\label) #ifdef CONFIG_FAST_SYSCALL @@ -664,6 +712,15 @@ SYM_CODE_START_LOCAL(el\el\ht()_\regsize()_\label) check_xcall_pre_kernel_entry .Lret_to_kernel_entry@: .endif +#endif +#ifdef CONFIG_FAST_IRQ + .if \regsize == 64 && \label == irq && (( \el == 0 && \ht == t) || (\el == 1 && \ht == h)) + alternative_if_not ARM64_HAS_XINT + b .Lskip_check_xint@ + alternative_else_nop_endif + check_xint_pre_kernel_entry \el \ht +.Lskip_check_xint@: + .endif #endif kernel_entry \el, \regsize mov x0, sp diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 1f662b0bc1f7..2bcc0aec6b6d 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -54,6 +54,7 @@ HAS_TWED HAS_VIRT_HOST_EXTN HAS_WFXT HAS_XCALL +HAS_XINT HW_DBM KVM_HVHE KVM_PROTECTED_MODE diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 0145495e7714..1430a7182a6e 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -982,6 +982,123 @@ static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs __gic_handle_irq_from_irqson(regs); }
+#ifdef CONFIG_FAST_IRQ +DECLARE_BITMAP(irqnr_xint_map, 1024); + +static bool can_set_xint(unsigned int hwirq) +{ + if (__get_intid_range(hwirq) == SGI_RANGE || + __get_intid_range(hwirq) == SPI_RANGE) + return true; + + return false; +} + +static bool xint_transform(int irqno, enum xint_op op) +{ + struct irq_data *data = irq_get_irq_data(irqno); + int hwirq; + + while (data->parent_data) + data = data->parent_data; + + hwirq = data->hwirq; + + if (!can_set_xint(hwirq)) + return false; + + switch (op) { + case IRQ_TO_XINT: + set_bit(hwirq, irqnr_xint_map); + return true; + case XINT_TO_IRQ: + clear_bit(hwirq, irqnr_xint_map); + return false; + case XINT_SET_CHECK: + return test_bit(hwirq, irqnr_xint_map); + case XINT_RANGE_CHECK: + return true; + default: + return false; + } +} + +static ssize_t xint_proc_write(struct file *file, + const char __user *buffer, size_t count, loff_t *pos) +{ + int irq = (int)(long)pde_data(file_inode(file)); + bool xint_state = false; + unsigned long val; + char *buf = NULL; + + if (!xint_transform(irq, XINT_RANGE_CHECK)) + return -EPERM; + + buf = memdup_user_nul(buffer, count); + if (IS_ERR(buf)) + return PTR_ERR(buf); + + if (kstrtoul(buf, 0, &val) || (val != 0 && val != 1)) { + kfree(buf); + return -EINVAL; + } + + xint_state = xint_transform(irq, XINT_SET_CHECK); + if (xint_state == val) { + kfree(buf); + return -EBUSY; + } + + local_irq_disable(); + disable_irq(irq); + + xint_transform(irq, xint_state ? XINT_TO_IRQ : IRQ_TO_XINT); + + enable_irq(irq); + local_irq_enable(); + + kfree(buf); + + return count; +} + +static int xint_proc_show(struct seq_file *m, void *v) +{ + seq_printf(m, "%d\n", xint_transform((long)m->private, XINT_SET_CHECK)); + return 0; +} + +static int xint_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, xint_proc_show, pde_data(inode)); +} + +static const struct proc_ops xint_proc_ops = { + .proc_open = xint_proc_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = single_release, + .proc_write = xint_proc_write, +}; + +void register_irqchip_proc(struct irq_desc *desc, void *irqp) +{ + if (!is_xint_support) + return; + + /* create /proc/irq/<irq>/xint */ + proc_create_data("xint", 0644, desc->dir, &xint_proc_ops, irqp); +} + +void unregister_irqchip_proc(struct irq_desc *desc) +{ + if (!is_xint_support) + return; + + remove_proc_entry("xint", desc->dir); +} +#endif /* CONFIG_FAST_IRQ */ + static u32 gic_get_pribits(void) { u32 pribits; diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index d57cab4d4c06..bfac616e6142 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -87,6 +87,11 @@ void irq_exit(void); */ void irq_exit_rcu(void);
+#ifdef CONFIG_FAST_IRQ +void xint_enter_rcu(void); +void xint_exit_rcu(void); +#endif + #ifndef arch_nmi_enter #define arch_nmi_enter() do { } while (0) #define arch_nmi_exit() do { } while (0) diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index ccf464326be7..548b8a5c46cf 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -721,6 +721,19 @@ static inline enum gic_intid_range __get_intid_range(irq_hw_number_t hwirq) } }
+#ifdef CONFIG_FAST_IRQ +extern bool is_xint_support; + +enum xint_op { + XINT_TO_IRQ, + IRQ_TO_XINT, + XINT_SET_CHECK, + XINT_RANGE_CHECK, +}; + +void register_irqchip_proc(struct irq_desc *desc, void *irqp); +void unregister_irqchip_proc(struct irq_desc *desc); +#endif #endif
#endif diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 623b8136e9af..0fdfde11ab81 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -331,6 +331,9 @@ void register_handler_proc(unsigned int irq, struct irqaction *action) action->dir = proc_mkdir(name, desc->dir); }
+void __weak register_irqchip_proc(struct irq_desc *desc, void *irqp) { } +void __weak unregister_irqchip_proc(struct irq_desc *desc) { } + #undef MAX_NAMELEN
#define MAX_NAMELEN 10 @@ -385,6 +388,7 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc) #endif proc_create_single_data("spurious", 0444, desc->dir, irq_spurious_proc_show, (void *)(long)irq); + register_irqchip_proc(desc, irqp);
out_unlock: mutex_unlock(®ister_lock); @@ -408,6 +412,8 @@ void unregister_irq_proc(unsigned int irq, struct irq_desc *desc) #endif remove_proc_entry("spurious", desc->dir);
+ unregister_irqchip_proc(desc); + sprintf(name, "%u", irq); remove_proc_entry(name, root_irq_dir); } diff --git a/kernel/softirq.c b/kernel/softirq.c index bd9716d7bb63..6dc0ea5baf06 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -663,6 +663,53 @@ void irq_exit(void) lockdep_hardirq_exit(); }
+#ifdef CONFIG_FAST_IRQ +void xint_enter_rcu(void) +{ + preempt_count_add(HARDIRQ_OFFSET); +#ifndef CONFIG_DEBUG_FEATURE_BYPASS + lockdep_hardirq_enter(); + + if (tick_nohz_full_cpu(smp_processor_id()) || + (is_idle_task(current) && (irq_count() == HARDIRQ_OFFSET))) + tick_irq_enter(); + + account_hardirq_enter(current); +#endif +} + +static inline void __xint_exit_rcu(void) +{ +#ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED + local_irq_disable(); +#else +#ifndef CONFIG_DEBUG_FEATURE_BYPASS + lockdep_assert_irqs_disabled(); +#endif +#endif + +#ifndef CONFIG_DEBUG_FEATURE_BYPASS + account_hardirq_exit(current); +#endif + preempt_count_sub(HARDIRQ_OFFSET); + if (!in_interrupt() && local_softirq_pending()) + invoke_softirq(); + +#ifndef CONFIG_DEBUG_FEATURE_BYPASS + tick_irq_exit(); +#endif +} + +void xint_exit_rcu(void) +{ + __xint_exit_rcu(); + /* must be last! */ +#ifndef CONFIG_DEBUG_FEATURE_BYPASS + lockdep_hardirq_exit(); +#endif +} +#endif /* CONFIG_FAST_IRQ */ + /* * This function must run with irqs disabled! */