From: Dong Kai dongkai11@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4F3V1 CVE: NA
--------------------------------
When using pmu events as nmi source, the pmu clock is disabled under wfi/wfe mode. And the nmi can't respond periodically. To minimize the misjudgment by wfi/wfe, we adopt a simple method which to disable wfi/wfe at the right time and the watchdog hrtimer is a good baseline.
The watchdog hrtimer is based on generate timer and has high freq than nmi. If watchdog hrtimer not works we disable wfi/wfe mode then the pmu nmi should always responds as long as the cpu core not suspend.
Signed-off-by: Dong Kai dongkai11@huawei.com Reviewed-by: Kuohai Xu xukuohai@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/arm64/include/asm/barrier.h | 15 ++++++++ include/linux/nmi.h | 2 + kernel/watchdog.c | 12 ++++++ kernel/watchdog_hld.c | 63 ++++++++++++++++++++++++++++++++ 4 files changed, 92 insertions(+)
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index 3cae78c1ce33b..519a30346e176 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -24,8 +24,23 @@ #define nops(n) asm volatile(__nops(n))
#define sev() asm volatile("sev" : : : "memory") +#ifdef CONFIG_CORELOCKUP_DETECTOR +extern unsigned int close_wfi_wfe; +#define wfe() \ + do { \ + if (likely(close_wfi_wfe == 0)) \ + asm volatile("wfe" : : : "memory"); \ + } while (0) +#define wfi() \ + do { \ + if (likely(close_wfi_wfe == 0)) \ + asm volatile("wfi" : : : "memory"); \ + } while (0) + +#else #define wfe() asm volatile("wfe" : : : "memory") #define wfi() asm volatile("wfi" : : : "memory") +#endif
#define isb() asm volatile("isb" : : : "memory") #define dmb(opt) asm volatile("dmb " #opt : : : "memory") diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 745d66c36e244..6cfb36e889fa4 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -128,6 +128,8 @@ static inline int hardlockup_detector_perf_init(void) { return 0; } extern void corelockup_detector_init(void); extern void corelockup_detector_online_cpu(unsigned int cpu); extern void corelockup_detector_offline_cpu(unsigned int cpu); +extern void watchdog_check_hrtimer(void); +extern unsigned long watchdog_hrtimer_interrupts(unsigned int cpu); #endif
void watchdog_nmi_stop(void); diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 8b54fd30a597f..9cea1ef8b41bc 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -355,6 +355,13 @@ static int softlockup_fn(void *data) return 0; }
+#ifdef CONFIG_CORELOCKUP_DETECTOR +unsigned long watchdog_hrtimer_interrupts(unsigned int cpu) +{ + return per_cpu(hrtimer_interrupts, cpu); +} +#endif + /* watchdog kicker functions */ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) { @@ -366,6 +373,11 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) if (!watchdog_enabled) return HRTIMER_NORESTART;
+#ifdef CONFIG_CORELOCKUP_DETECTOR + /* check hrtimer of detector cpu */ + watchdog_check_hrtimer(); +#endif + /* kick the hardlockup detector */ watchdog_interrupt_count();
diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c index e965c31958203..58d7acec4269d 100644 --- a/kernel/watchdog_hld.c +++ b/kernel/watchdog_hld.c @@ -60,16 +60,37 @@ EXPORT_SYMBOL(arch_touch_nmi_watchdog); * The detection chain is as following: * cpu0->cpu1->...->cpuN->cpu0 * + * When using pmu events as nmi source, the pmu clock is disabled + * under wfi/wfe mode. And the nmi can't respond periodically. + * To minimize the misjudgment by wfi/wfe, we adopt a simple method + * which to disable wfi/wfe at the right time and the watchdog hrtimer + * is a good baseline. + * + * The watchdog hrtimer is based on generate timer and has high freq + * than nmi. If watchdog hrtimer not works we disable wfi/wfe mode + * then the pmu nmi should always responds as long as the cpu core + * not suspend. + * * detector_cpu: the target cpu to detector of current cpu * nmi_interrupts: the nmi counts of current cpu * nmi_cnt_saved: saved nmi counts of detector_cpu * nmi_cnt_missed: the nmi consecutive miss counts of detector_cpu + * hrint_saved: saved hrtimer interrupts of detector_cpu + * hrint_missed: the hrtimer consecutive miss counts of detector_cpu + * corelockup_cpumask/close_wfi_wfe: + * the cpu mask is set if certain cpu maybe fall in suspend and close + * wfi/wfe mode if any bit is set */ static DEFINE_PER_CPU(unsigned int, detector_cpu); static DEFINE_PER_CPU(unsigned long, nmi_interrupts); static DEFINE_PER_CPU(unsigned long, nmi_cnt_saved); static DEFINE_PER_CPU(unsigned long, nmi_cnt_missed); static DEFINE_PER_CPU(bool, core_watchdog_warn); +static DEFINE_PER_CPU(unsigned long, hrint_saved); +static DEFINE_PER_CPU(unsigned long, hrint_missed); +struct cpumask corelockup_cpumask __read_mostly; +unsigned int close_wfi_wfe; +static bool pmu_based_nmi;
static void watchdog_nmi_interrupts(void) { @@ -80,6 +101,8 @@ static void corelockup_status_copy(unsigned int from, unsigned int to) { per_cpu(nmi_cnt_saved, to) = per_cpu(nmi_cnt_saved, from); per_cpu(nmi_cnt_missed, to) = per_cpu(nmi_cnt_missed, from); + per_cpu(hrint_saved, to) = per_cpu(hrint_saved, from); + per_cpu(hrint_missed, to) = per_cpu(hrint_missed, from);
/* always update detector cpu at the end */ per_cpu(detector_cpu, to) = per_cpu(detector_cpu, from); @@ -93,6 +116,8 @@ static void corelockup_status_init(unsigned int cpu, unsigned int target) */ per_cpu(nmi_cnt_saved, cpu) = ULONG_MAX; per_cpu(nmi_cnt_missed, cpu) = 0; + per_cpu(hrint_saved, cpu) = ULONG_MAX; + per_cpu(hrint_missed, cpu) = 0;
/* always update detector cpu at the end */ per_cpu(detector_cpu, cpu) = target; @@ -113,6 +138,38 @@ void __init corelockup_detector_init(void) } }
+void watchdog_check_hrtimer(void) +{ + unsigned int cpu = __this_cpu_read(detector_cpu); + unsigned long hrint = watchdog_hrtimer_interrupts(cpu); + + /* + * The freq of hrtimer is fast than nmi interrupts and + * the core mustn't hangs if hrtimer still working. + * So update the nmi interrupts in hrtimer either to + * improved robustness of nmi counts check. + */ + watchdog_nmi_interrupts(); + + if (!pmu_based_nmi) + return; + + if (__this_cpu_read(hrint_saved) != hrint) { + __this_cpu_write(hrint_saved, hrint); + __this_cpu_write(hrint_missed, 0); + cpumask_clear_cpu(cpu, &corelockup_cpumask); + } else { + __this_cpu_inc(hrint_missed); + if (__this_cpu_read(hrint_missed) > 2) + cpumask_set_cpu(cpu, &corelockup_cpumask); + } + + if (likely(cpumask_empty(&corelockup_cpumask))) + close_wfi_wfe = 0; + else + close_wfi_wfe = 1; +} + /* * Before: first->next * After: first->[new]->next @@ -141,6 +198,9 @@ void corelockup_detector_offline_cpu(unsigned int cpu) unsigned int prev = nr_cpu_ids; unsigned int i;
+ /* clear bitmap */ + cpumask_clear_cpu(cpu, &corelockup_cpumask); + /* found prev cpu */ for_each_cpu_and(i, &watchdog_cpumask, cpu_online_mask) { if (per_cpu(detector_cpu, i) == cpu) { @@ -476,6 +536,9 @@ int __init hardlockup_detector_perf_init(void) perf_event_release_kernel(this_cpu_read(watchdog_ev)); this_cpu_write(watchdog_ev, NULL); } +#ifdef CONFIG_CORELOCKUP_DETECTOR + pmu_based_nmi = true; +#endif return ret; } #endif /* CONFIG_HARDLOCKUP_DETECTOR_PERF */