Don't trap kernel to read the counter register, do this in userspace to improve performance.
Yang Yingliang (2): arm64: arch_timer: Disable CNTVCT_EL0 trap if workaround is enabled vdso: do cntvct workaround in the VDSO
arch/arm64/include/asm/vdso/gettimeofday.h | 30 ++++++++++++++++++++++ drivers/clocksource/arm_arch_timer.c | 20 ++++++++++++--- include/linux/clocksource.h | 2 ++ include/vdso/datapage.h | 6 +++-- kernel/time/vsyscall.c | 8 ++++++ 5 files changed, 61 insertions(+), 5 deletions(-)
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8LFBU CVE: NA
--------------------------------------------------
It costs very much time to read CNTVCT_EL0, if a cntvct workaround and CNTVCT_EL0 trap is enabled. To decrease the read time, we disable CNTVCT_EL0 trap, introduce vdso_fix and vdso_shift for doing cntvct workaround in VDSO.
Signed-off-by: Yang Yingliang yangyingliang@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/clocksource/arm_arch_timer.c | 20 +++++++++++++++++--- include/linux/clocksource.h | 2 ++ include/vdso/datapage.h | 6 ++++-- kernel/time/vsyscall.c | 8 ++++++++ 4 files changed, 31 insertions(+), 5 deletions(-)
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index 071b04f1ee73..f47b2a59020f 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -92,6 +92,8 @@ static enum vdso_clock_mode vdso_default = VDSO_CLOCKMODE_ARCHTIMER; #else static enum vdso_clock_mode vdso_default = VDSO_CLOCKMODE_NONE; #endif /* CONFIG_GENERIC_GETTIMEOFDAY */ +static bool vdso_fix; +static u16 vdso_shift;
static cpumask_t evtstrm_available = CPU_MASK_NONE; static bool evtstrm_enable __ro_after_init = IS_ENABLED(CONFIG_ARM_ARCH_TIMER_EVTSTREAM); @@ -599,8 +601,18 @@ void arch_timer_enable_workaround(const struct arch_timer_erratum_workaround *wa * change both the default value and the vdso itself. */ if (wa->read_cntvct_el0) { - clocksource_counter.vdso_clock_mode = VDSO_CLOCKMODE_NONE; - vdso_default = VDSO_CLOCKMODE_NONE; + if (!strncmp(wa->desc, "HiSilicon erratum 161010101", + strlen("HiSilicon erratum 161010101"))) { + vdso_fix = true; + vdso_shift = 5; + } else if (!strncmp(wa->desc, "Freescale erratum a005858", + strlen("Freescale erratum a005858"))) { + vdso_fix = true; + vdso_shift = 0; + } else { + clocksource_counter.vdso_clock_mode = VDSO_CLOCKMODE_NONE; + vdso_default = VDSO_CLOCKMODE_NONE; + } } else if (wa->disable_compat_vdso && vdso_default != VDSO_CLOCKMODE_NONE) { vdso_default = VDSO_CLOCKMODE_ARCHTIMER_NOCOMPAT; clocksource_counter.vdso_clock_mode = vdso_default; @@ -973,7 +985,7 @@ static void arch_counter_set_user_access(void) * need to be workaround. The vdso may have been already * disabled though. */ - if (arch_timer_this_cpu_has_cntvct_wa()) + if (arch_timer_this_cpu_has_cntvct_wa() && !vdso_fix) pr_info("CPU%d: Trapping CNTVCT access\n", smp_processor_id()); else cntkctl |= ARCH_TIMER_USR_VCT_ACCESS_EN; @@ -1129,6 +1141,8 @@ static void __init arch_counter_register(unsigned type)
arch_timer_read_counter = rd; clocksource_counter.vdso_clock_mode = vdso_default; + clocksource_counter.vdso_fix = vdso_fix; + clocksource_counter.vdso_shift = vdso_shift; } else { arch_timer_read_counter = arch_counter_get_cntvct_mem; scr = arch_counter_get_cntvct_mem; diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 1d42d4b17327..e23eea881d0f 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -110,6 +110,8 @@ struct clocksource { int rating; enum clocksource_ids id; enum vdso_clock_mode vdso_clock_mode; + u16 vdso_fix; + u16 vdso_shift; unsigned long flags;
int (*enable)(struct clocksource *cs); diff --git a/include/vdso/datapage.h b/include/vdso/datapage.h index 73eb622e7663..6bbca70342f3 100644 --- a/include/vdso/datapage.h +++ b/include/vdso/datapage.h @@ -69,7 +69,8 @@ struct vdso_timestamp { * @tz_minuteswest: minutes west of Greenwich * @tz_dsttime: type of DST correction * @hrtimer_res: hrtimer resolution - * @__unused: unused + * @vdso_fix: avoid the clock bug in VDSO + * @vdso_shift: count of bit to be ignored * @arch_data: architecture specific data (optional, defaults * to an empty struct) * @@ -104,7 +105,8 @@ struct vdso_data { s32 tz_minuteswest; s32 tz_dsttime; u32 hrtimer_res; - u32 __unused; + u16 vdso_fix; + u16 vdso_shift;
struct arch_vdso_data arch_data; }; diff --git a/kernel/time/vsyscall.c b/kernel/time/vsyscall.c index f0d5062d9cbc..8045de999586 100644 --- a/kernel/time/vsyscall.c +++ b/kernel/time/vsyscall.c @@ -74,14 +74,22 @@ void update_vsyscall(struct timekeeper *tk) struct vdso_data *vdata = __arch_get_k_vdso_data(); struct vdso_timestamp *vdso_ts; s32 clock_mode; + u16 vdso_fix; + u16 vdso_shift; u64 nsec;
/* copy vsyscall data */ vdso_write_begin(vdata);
clock_mode = tk->tkr_mono.clock->vdso_clock_mode; + vdso_fix = tk->tkr_mono.clock->vdso_fix; + vdso_shift = tk->tkr_mono.clock->vdso_shift; vdata[CS_HRES_COARSE].clock_mode = clock_mode; + vdata[CS_HRES_COARSE].vdso_fix = vdso_fix; + vdata[CS_HRES_COARSE].vdso_shift = vdso_shift; vdata[CS_RAW].clock_mode = clock_mode; + vdata[CS_RAW].vdso_fix = vdso_fix; + vdata[CS_RAW].vdso_shift = vdso_shift;
/* CLOCK_REALTIME also required for time() */ vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_REALTIME];
On 2023/12/4 10:27, Yang Yingliang wrote:
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8LFBU CVE: NA
It costs very much time to read CNTVCT_EL0, if a cntvct workaround and CNTVCT_EL0 trap is enabled. To decrease the read time, we disable CNTVCT_EL0 trap, introduce vdso_fix and vdso_shift for doing cntvct workaround in VDSO.
Signed-off-by: Yang Yingliang yangyingliang@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com
这个要移除掉,只保留第一个;
. 下面的strncmp strlen是不是太重了 可以直接判断 wa->read_cntvct_el0 是否跟hisi_161010101_read_cntvct_el0相等吧;
drivers/clocksource/arm_arch_timer.c | 20 +++++++++++++++++--- include/linux/clocksource.h | 2 ++ include/vdso/datapage.h | 6 ++++-- kernel/time/vsyscall.c | 8 ++++++++ 4 files changed, 31 insertions(+), 5 deletions(-)
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index 071b04f1ee73..f47b2a59020f 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -92,6 +92,8 @@ static enum vdso_clock_mode vdso_default = VDSO_CLOCKMODE_ARCHTIMER; #else static enum vdso_clock_mode vdso_default = VDSO_CLOCKMODE_NONE; #endif /* CONFIG_GENERIC_GETTIMEOFDAY */ +static bool vdso_fix; +static u16 vdso_shift;
static cpumask_t evtstrm_available = CPU_MASK_NONE; static bool evtstrm_enable __ro_after_init = IS_ENABLED(CONFIG_ARM_ARCH_TIMER_EVTSTREAM); @@ -599,8 +601,18 @@ void arch_timer_enable_workaround(const struct arch_timer_erratum_workaround *wa * change both the default value and the vdso itself. */ if (wa->read_cntvct_el0) {
clocksource_counter.vdso_clock_mode = VDSO_CLOCKMODE_NONE;
vdso_default = VDSO_CLOCKMODE_NONE;
if (!strncmp(wa->desc, "HiSilicon erratum 161010101",
strlen("HiSilicon erratum 161010101"))) {
vdso_fix = true;
vdso_shift = 5;
} else if (!strncmp(wa->desc, "Freescale erratum a005858",
strlen("Freescale erratum a005858"))) {
vdso_fix = true;
vdso_shift = 0;
} else {
clocksource_counter.vdso_clock_mode = VDSO_CLOCKMODE_NONE;
vdso_default = VDSO_CLOCKMODE_NONE;
} else if (wa->disable_compat_vdso && vdso_default != VDSO_CLOCKMODE_NONE) { vdso_default = VDSO_CLOCKMODE_ARCHTIMER_NOCOMPAT; clocksource_counter.vdso_clock_mode = vdso_default;}
@@ -973,7 +985,7 @@ static void arch_counter_set_user_access(void) * need to be workaround. The vdso may have been already * disabled though. */
- if (arch_timer_this_cpu_has_cntvct_wa())
- if (arch_timer_this_cpu_has_cntvct_wa() && !vdso_fix) pr_info("CPU%d: Trapping CNTVCT access\n", smp_processor_id()); else cntkctl |= ARCH_TIMER_USR_VCT_ACCESS_EN;
@@ -1129,6 +1141,8 @@ static void __init arch_counter_register(unsigned type)
arch_timer_read_counter = rd; clocksource_counter.vdso_clock_mode = vdso_default;
clocksource_counter.vdso_fix = vdso_fix;
} else { arch_timer_read_counter = arch_counter_get_cntvct_mem; scr = arch_counter_get_cntvct_mem;clocksource_counter.vdso_shift = vdso_shift;
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 1d42d4b17327..e23eea881d0f 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -110,6 +110,8 @@ struct clocksource { int rating; enum clocksource_ids id; enum vdso_clock_mode vdso_clock_mode;
u16 vdso_fix;
u16 vdso_shift; unsigned long flags;
int (*enable)(struct clocksource *cs);
diff --git a/include/vdso/datapage.h b/include/vdso/datapage.h index 73eb622e7663..6bbca70342f3 100644 --- a/include/vdso/datapage.h +++ b/include/vdso/datapage.h @@ -69,7 +69,8 @@ struct vdso_timestamp {
- @tz_minuteswest: minutes west of Greenwich
- @tz_dsttime: type of DST correction
- @hrtimer_res: hrtimer resolution
- @__unused: unused
- @vdso_fix: avoid the clock bug in VDSO
- @vdso_shift: count of bit to be ignored
- @arch_data: architecture specific data (optional, defaults
to an empty struct)
@@ -104,7 +105,8 @@ struct vdso_data { s32 tz_minuteswest; s32 tz_dsttime; u32 hrtimer_res;
- u32 __unused;
u16 vdso_fix;
u16 vdso_shift;
struct arch_vdso_data arch_data; };
diff --git a/kernel/time/vsyscall.c b/kernel/time/vsyscall.c index f0d5062d9cbc..8045de999586 100644 --- a/kernel/time/vsyscall.c +++ b/kernel/time/vsyscall.c @@ -74,14 +74,22 @@ void update_vsyscall(struct timekeeper *tk) struct vdso_data *vdata = __arch_get_k_vdso_data(); struct vdso_timestamp *vdso_ts; s32 clock_mode;
u16 vdso_fix;
u16 vdso_shift; u64 nsec;
/* copy vsyscall data */ vdso_write_begin(vdata);
clock_mode = tk->tkr_mono.clock->vdso_clock_mode;
vdso_fix = tk->tkr_mono.clock->vdso_fix;
vdso_shift = tk->tkr_mono.clock->vdso_shift; vdata[CS_HRES_COARSE].clock_mode = clock_mode;
vdata[CS_HRES_COARSE].vdso_fix = vdso_fix;
vdata[CS_HRES_COARSE].vdso_shift = vdso_shift; vdata[CS_RAW].clock_mode = clock_mode;
vdata[CS_RAW].vdso_fix = vdso_fix;
vdata[CS_RAW].vdso_shift = vdso_shift;
/* CLOCK_REALTIME also required for time() */ vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_REALTIME];
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8LFBU CVE: NA
--------------------------------------------------
If a cntvct workaround is enabled, read CNTVCT_EL0 twice in VDSO to avoid the clock bug.
Without this patch on Kunpeng916: ./gettimeofday -E -C 200 -L -S -W -N "gettimeofday" Running: gettimeofday# ./gettimeofday -E -C 200 -L -S -W -N gettimeofday prc thr usecs/call samples errors cnt/samp gettimeofday 1 1 0.31753 198 0 20000
With this patch on Kunpeng916: ./gettimeofday -E -C 200 -L -S -W -N "gettimeofday" Running: gettimeofday# ./gettimeofday -E -C 200 -L -S -W -N gettimeofday prc thr usecs/call samples errors cnt/samp gettimeofday 1 1 0.05244 198 0 20000
Signed-off-by: Yang Yingliang yangyingliang@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/arm64/include/asm/vdso/gettimeofday.h | 30 ++++++++++++++++++++++ 1 file changed, 30 insertions(+)
diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h index 764d13e2916c..833b33e6c5bf 100644 --- a/arch/arm64/include/asm/vdso/gettimeofday.h +++ b/arch/arm64/include/asm/vdso/gettimeofday.h @@ -94,6 +94,36 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, : : "memory");
+ if (vd->vdso_fix) { + u64 new; + int retries = 50; + + asm volatile( + ALTERNATIVE("mrs %0, cntvct_el0", + __mrs_s("%0", SYS_CNTVCTSS_EL0), + ARM64_HAS_ECV) + : "=r" (new) + : + : "memory"); + while (unlikely((new - res) >> vd->vdso_shift) && retries) { + asm volatile( + ALTERNATIVE("mrs %0, cntvct_el0", + __mrs_s("%0", SYS_CNTVCTSS_EL0), + ARM64_HAS_ECV) + : "=r" (res) + : + : "memory"); + + asm volatile( + ALTERNATIVE("mrs %0, cntvct_el0", + __mrs_s("%0", SYS_CNTVCTSS_EL0), + ARM64_HAS_ECV) + : "=r" (new) + : + : "memory"); + retries--; + } + } arch_counter_enforce_ordering(res);
return res;
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/3144 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/T...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/3144 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/T...