Don't trap kernel to read the counter register, do this in userspace to improve performance.
Yang Yingliang (3): arm64: arch_timer: Disable CNTVCT_EL0 trap if workaround is enabled vdso: do cntvct workaround in the VDSO arm64: arch_timer: disable CONFIG_ARM_ARCH_TIMER_WORKAROUND_IN_USERSPACE
arch/arm64/configs/openeuler_defconfig | 1 + arch/arm64/include/asm/vdso/gettimeofday.h | 32 ++++++++++++++++++++++ drivers/clocksource/Kconfig | 5 ++++ drivers/clocksource/arm_arch_timer.c | 27 ++++++++++++++++++ include/linux/clocksource.h | 4 +++ include/vdso/datapage.h | 8 +++++- kernel/time/vsyscall.c | 16 +++++++++++ 7 files changed, 92 insertions(+), 1 deletion(-)
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8LFBU CVE: NA
--------------------------------------------------
It costs very much time to read CNTVCT_EL0, if a cntvct workaround and CNTVCT_EL0 trap is enabled. To decrease the read time, we disable CNTVCT_EL0 trap, introduce vdso_fix and vdso_shift for doing cntvct workaround in VDSO.
Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/clocksource/Kconfig | 5 +++++ drivers/clocksource/arm_arch_timer.c | 27 +++++++++++++++++++++++++++ include/linux/clocksource.h | 4 ++++ include/vdso/datapage.h | 8 +++++++- kernel/time/vsyscall.c | 16 ++++++++++++++++ 5 files changed, 59 insertions(+), 1 deletion(-)
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index 0ba0dc4ecf06..27ac530f6ee5 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -732,4 +732,9 @@ config GOLDFISH_TIMER help Support for the timer/counter of goldfish-rtc
+config ARM_ARCH_TIMER_WORKAROUND_IN_USERSPACE + bool "Enable arch timer workaround in vdso" + default y + depends on ARM_ARCH_TIMER_OOL_WORKAROUND + endmenu diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index 071b04f1ee73..8a8b579185dc 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -92,6 +92,10 @@ static enum vdso_clock_mode vdso_default = VDSO_CLOCKMODE_ARCHTIMER; #else static enum vdso_clock_mode vdso_default = VDSO_CLOCKMODE_NONE; #endif /* CONFIG_GENERIC_GETTIMEOFDAY */ +#ifdef CONFIG_ARM_ARCH_TIMER_WORKAROUND_IN_USERSPACE +static bool vdso_fix; +static u16 vdso_shift; +#endif
static cpumask_t evtstrm_available = CPU_MASK_NONE; static bool evtstrm_enable __ro_after_init = IS_ENABLED(CONFIG_ARM_ARCH_TIMER_EVTSTREAM); @@ -599,8 +603,23 @@ void arch_timer_enable_workaround(const struct arch_timer_erratum_workaround *wa * change both the default value and the vdso itself. */ if (wa->read_cntvct_el0) { +#ifdef CONFIG_ARM_ARCH_TIMER_WORKAROUND_IN_USERSPACE + if (!strncmp(wa->desc, "HiSilicon erratum 161010101", + strlen("HiSilicon erratum 161010101"))) { + vdso_fix = true; + vdso_shift = 5; + } else if (!strncmp(wa->desc, "Freescale erratum a005858", + strlen("Freescale erratum a005858"))) { + vdso_fix = true; + vdso_shift = 0; + } else { + clocksource_counter.vdso_clock_mode = VDSO_CLOCKMODE_NONE; + vdso_default = VDSO_CLOCKMODE_NONE; + } +#else clocksource_counter.vdso_clock_mode = VDSO_CLOCKMODE_NONE; vdso_default = VDSO_CLOCKMODE_NONE; +#endif } else if (wa->disable_compat_vdso && vdso_default != VDSO_CLOCKMODE_NONE) { vdso_default = VDSO_CLOCKMODE_ARCHTIMER_NOCOMPAT; clocksource_counter.vdso_clock_mode = vdso_default; @@ -973,7 +992,11 @@ static void arch_counter_set_user_access(void) * need to be workaround. The vdso may have been already * disabled though. */ +#ifdef CONFIG_ARM_ARCH_TIMER_WORKAROUND_IN_USERSPACE + if (arch_timer_this_cpu_has_cntvct_wa() && !vdso_fix) +#else if (arch_timer_this_cpu_has_cntvct_wa()) +#endif pr_info("CPU%d: Trapping CNTVCT access\n", smp_processor_id()); else cntkctl |= ARCH_TIMER_USR_VCT_ACCESS_EN; @@ -1129,6 +1152,10 @@ static void __init arch_counter_register(unsigned type)
arch_timer_read_counter = rd; clocksource_counter.vdso_clock_mode = vdso_default; +#ifdef CONFIG_ARM_ARCH_TIMER_WORKAROUND_IN_USERSPACE + clocksource_counter.vdso_fix = vdso_fix; + clocksource_counter.vdso_shift = vdso_shift; +#endif } else { arch_timer_read_counter = arch_counter_get_cntvct_mem; scr = arch_counter_get_cntvct_mem; diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 1d42d4b17327..6c1989c7bb81 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -110,6 +110,10 @@ struct clocksource { int rating; enum clocksource_ids id; enum vdso_clock_mode vdso_clock_mode; +#ifdef CONFIG_ARM_ARCH_TIMER_WORKAROUND_IN_USERSPACE + u16 vdso_fix; + u16 vdso_shift; +#endif unsigned long flags;
int (*enable)(struct clocksource *cs); diff --git a/include/vdso/datapage.h b/include/vdso/datapage.h index 73eb622e7663..f28f51c2c8f8 100644 --- a/include/vdso/datapage.h +++ b/include/vdso/datapage.h @@ -69,7 +69,8 @@ struct vdso_timestamp { * @tz_minuteswest: minutes west of Greenwich * @tz_dsttime: type of DST correction * @hrtimer_res: hrtimer resolution - * @__unused: unused + * @vdso_fix: avoid the clock bug in VDSO + * @vdso_shift: count of bit to be ignored * @arch_data: architecture specific data (optional, defaults * to an empty struct) * @@ -104,7 +105,12 @@ struct vdso_data { s32 tz_minuteswest; s32 tz_dsttime; u32 hrtimer_res; +#ifdef CONFIG_ARM_ARCH_TIMER_WORKAROUND_IN_USERSPACE + u16 vdso_fix; + u16 vdso_shift; +#else u32 __unused; +#endif
struct arch_vdso_data arch_data; }; diff --git a/kernel/time/vsyscall.c b/kernel/time/vsyscall.c index f0d5062d9cbc..031b14633484 100644 --- a/kernel/time/vsyscall.c +++ b/kernel/time/vsyscall.c @@ -74,14 +74,30 @@ void update_vsyscall(struct timekeeper *tk) struct vdso_data *vdata = __arch_get_k_vdso_data(); struct vdso_timestamp *vdso_ts; s32 clock_mode; +#ifdef CONFIG_ARM_ARCH_TIMER_WORKAROUND_IN_USERSPACE + u16 vdso_fix; + u16 vdso_shift; +#endif u64 nsec;
/* copy vsyscall data */ vdso_write_begin(vdata);
clock_mode = tk->tkr_mono.clock->vdso_clock_mode; +#ifdef CONFIG_ARM_ARCH_TIMER_WORKAROUND_IN_USERSPACE + vdso_fix = tk->tkr_mono.clock->vdso_fix; + vdso_shift = tk->tkr_mono.clock->vdso_shift; +#endif vdata[CS_HRES_COARSE].clock_mode = clock_mode; +#ifdef CONFIG_ARM_ARCH_TIMER_WORKAROUND_IN_USERSPACE + vdata[CS_HRES_COARSE].vdso_fix = vdso_fix; + vdata[CS_HRES_COARSE].vdso_shift = vdso_shift; +#endif vdata[CS_RAW].clock_mode = clock_mode; +#ifdef CONFIG_ARM_ARCH_TIMER_WORKAROUND_IN_USERSPACE + vdata[CS_RAW].vdso_fix = vdso_fix; + vdata[CS_RAW].vdso_shift = vdso_shift; +#endif
/* CLOCK_REALTIME also required for time() */ vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_REALTIME];
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8LFBU CVE: NA
--------------------------------------------------
If a cntvct workaround is enabled, read CNTVCT_EL0 twice in VDSO to avoid the clock bug.
Without this patch on Kunpeng916: ./gettimeofday -E -C 200 -L -S -W -N "gettimeofday" Running: gettimeofday# ./gettimeofday -E -C 200 -L -S -W -N gettimeofday prc thr usecs/call samples errors cnt/samp gettimeofday 1 1 0.31753 198 0 20000
With this patch on Kunpeng916: ./gettimeofday -E -C 200 -L -S -W -N "gettimeofday" Running: gettimeofday# ./gettimeofday -E -C 200 -L -S -W -N gettimeofday prc thr usecs/call samples errors cnt/samp gettimeofday 1 1 0.05244 198 0 20000
Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/arm64/include/asm/vdso/gettimeofday.h | 32 ++++++++++++++++++++++ 1 file changed, 32 insertions(+)
diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h index 764d13e2916c..23579c33822e 100644 --- a/arch/arm64/include/asm/vdso/gettimeofday.h +++ b/arch/arm64/include/asm/vdso/gettimeofday.h @@ -94,6 +94,38 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, : : "memory");
+#ifdef CONFIG_ARM_ARCH_TIMER_WORKAROUND_IN_USERSPACE + if (vd->vdso_fix) { + u64 new; + int retries = 50; + + asm volatile( + ALTERNATIVE("mrs %0, cntvct_el0", + __mrs_s("%0", SYS_CNTVCTSS_EL0), + ARM64_HAS_ECV) + : "=r" (new) + : + : "memory"); + while (unlikely((new - res) >> vd->vdso_shift) && retries) { + asm volatile( + ALTERNATIVE("mrs %0, cntvct_el0", + __mrs_s("%0", SYS_CNTVCTSS_EL0), + ARM64_HAS_ECV) + : "=r" (res) + : + : "memory"); + + asm volatile( + ALTERNATIVE("mrs %0, cntvct_el0", + __mrs_s("%0", SYS_CNTVCTSS_EL0), + ARM64_HAS_ECV) + : "=r" (new) + : + : "memory"); + retries--; + } + } +#endif arch_counter_enforce_ordering(res);
return res;
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8LFBU CVE: NA
--------------------------------------------------
It's only enabled in Hi1616.
Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/arm64/configs/openeuler_defconfig | 1 + 1 file changed, 1 insertion(+)
diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 9d2f717c1f7c..c366d70cb895 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -6403,6 +6403,7 @@ CONFIG_FSL_ERRATUM_A008585=y CONFIG_HISILICON_ERRATUM_161010101=y CONFIG_ARM64_ERRATUM_858921=y CONFIG_ARM_TIMER_SP804=y +# CONFIG_ARM_ARCH_TIMER_WORKAROUND_IN_USERSPACE is not set # end of Clock Source drivers
CONFIG_MAILBOX=y
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/3306 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/Y...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/3306 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/Y...