[PATCH OLK-5.10 0/2] KVM: arm64: pvsched bugfix and module param

Jia Qingtong (1): KVM: arm64: save & restore pvsched's base addr when migrate Yanan Wang (1): KVM:arm64:Add a module param to enable/disable pv_preempted dynamically arch/arm64/include/asm/kvm_host.h | 5 +- arch/arm64/kvm/arm.c | 27 ++++++++- arch/arm64/kvm/pvsched.c | 2 + arch/arm64/kvm/vgic/vgic-its.c | 92 ++++++++++++++++++++++++++++++ arch/arm64/kvm/vgic/vgic-mmio-v3.c | 24 +++++++- include/linux/irqchip/arm-gic-v3.h | 1 + 6 files changed, 145 insertions(+), 6 deletions(-) -- 2.33.0

From: Jia Qingtong <jiaqingtong@huawei.com> virt inclusion category: performance bugzilla: https://gitee.com/openeuler/kernel/issues/IBNSBL -------------------------------- When migrate, target host's kvm lost pvsched's gpa, which the guest assign to the kvm before migrate when guest booting. This will lead to kvm assume guest doesn't enable pvsched. But guest still assume it has enable pvsched, and use pvsched info in sched. Solve the problem by saving & restore pvsched's gpa when migrate. In this case we decide to hook in gic its save & restore process, which already work in migrate (the qemu will call ioctl to save & restore). we only save pvsched's gpa, and store it in gic pending tables's first 1K area, which was initialized as 0, by offset of 0x0. New version kernel(kernel with this patch) will set GICR_WAKER's bit 31, and migrate to destination. destination check this bit to decide if we could restore pvsched gpa info. Thus we would't restore any pvsched info migrated from old kernel. Signed-off-by: Jia Qingtong <jiaqingtong@huawei.com> Signed-off-by: Dongxu Sun <sundongxu3@huawei.com> --- arch/arm64/kvm/vgic/vgic-its.c | 92 ++++++++++++++++++++++++++++++ arch/arm64/kvm/vgic/vgic-mmio-v3.c | 24 +++++++- include/linux/irqchip/arm-gic-v3.h | 1 + 3 files changed, 115 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index 4805a12e0616..201015e0ebd1 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -2746,6 +2746,96 @@ static int vgic_its_has_attr(struct kvm_device *dev, return -ENXIO; } +/* we use ram just at offset 0x0 */ +#define PVSCHED_GPA_OFFSET 0x0 +static void pvsched_save_all_gpa(struct kvm *kvm) +{ + int i, ret; + struct kvm_vcpu *vcpu; + gpa_t pendbase, ptr, val; + + kvm_for_each_vcpu(i, vcpu, kvm) { + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + + /* + * Quote the GIC spec - "Changing GICR_PENDBASER with + * GICR_CTLR.EnableLPIs == 1 is UNPREDICTABLE." We're pretty + * sure 'pendbaser' is valid with that. + */ + if (!vgic_cpu->lpis_enabled) + continue; + + pendbase = GICR_PENDBASER_ADDRESS(vcpu->arch.vgic_cpu.pendbaser); + ptr = pendbase + PVSCHED_GPA_OFFSET; + + ret = kvm_read_guest_lock(kvm, ptr, &val, sizeof(gpa_t)); + if (ret) { + kvm_err("%s: read from addr[%p] failed. ret[%d]", __func__, (void *)ptr, ret); + continue; + } + /* + * GIC spec states that "Behavior is UNPREDICTABLE if LPI + * Pending tables contains none zeros. Feel free to + * go ahead to corrupt the insane guest. + */ + if (val != 0) { + kvm_err("%s: read[%p] != 0 from addr[%p]", __func__, (void *)val, (void *)ptr); + } + + kvm_debug("%s: origin addr[%p]", __func__, (void *)val); + + /* + * We save the 'pvsched.base' as a generic state, regardless of + * whether it is valid or not. + */ + ret = kvm_write_guest_lock(kvm, ptr, &vcpu->arch.pvsched.base, sizeof(vcpu->arch.pvsched.base)); + kvm_debug("%s: save pvsched.base[%p] into addr[%p], ret = %d\n", __func__, (void *)vcpu->arch.pvsched.base, (void *)ptr, ret); + } +} + +static void pvsched_restore_all_gpa(struct kvm *kvm) +{ + int i, ret; + struct kvm_vcpu *vcpu; + gpa_t pendbase, ptr, val; + + kvm_for_each_vcpu(i, vcpu, kvm) { + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + + if (!vgic_cpu->lpis_enabled) + continue; + + pendbase = GICR_PENDBASER_ADDRESS(vcpu->arch.vgic_cpu.pendbaser); + ptr = pendbase + PVSCHED_GPA_OFFSET; + + ret = kvm_read_guest_lock(kvm, ptr, &val, sizeof(gpa_t)); + if (ret) { + kvm_err("%s: read from addr[%p] failed. ret[%d]", __func__, (void *)ptr, ret); + continue; + } + + kvm_debug("%s: restore [%p] into pvsched.base[%p] from addr[%p]", __func__, (void *)val, (void *)vcpu->arch.pvsched.base, (void *)ptr); + + if (val == 0) + continue; + + /* Here vcpu->arch.pvsched.pv_unhalted means pvsched info saved in pending tabls are valid. */ + if (vcpu->arch.pvsched.pv_unhalted) { + vcpu->arch.pvsched.base = val; + kvm_debug("%s: migrate from new version, pending table's pvsched info is valid. Restore it .\n", __func__); + } + + val = 0; + ret = kvm_write_guest_lock(kvm, ptr, &val, sizeof(vcpu->arch.pvsched.base)); + if (ret) + kvm_err("%s: restore 0 into addr[%p] failed. ret[%d]", __func__, (void *)ptr, ret); + } + kvm_for_each_vcpu(i, vcpu, kvm) { + /* Now vcpu->arch.pvsched.pv_unhalted mean original pvsched.pv_unhalted */ + vcpu->arch.pvsched.pv_unhalted = false; + } +} + static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr) { const struct vgic_its_abi *abi = vgic_its_get_abi(its); @@ -2769,9 +2859,11 @@ static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr) break; case KVM_DEV_ARM_ITS_SAVE_TABLES: ret = abi->save_tables(its); + pvsched_save_all_gpa(kvm); break; case KVM_DEV_ARM_ITS_RESTORE_TABLES: ret = abi->restore_tables(its); + pvsched_restore_all_gpa(kvm); break; } diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c index b6bdf22abab1..526fc05f8878 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -552,6 +552,25 @@ static void vgic_mmio_write_pendbase(struct kvm_vcpu *vcpu, pendbaser) != old_pendbaser); } +static unsigned long vgic_mmio_uaccess_read_waker(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + return GICR_WAKER_PVSCHED_VALID; +} + +/* If pvsched info stored in pending table valid. + * When migrate from old version, since kernel don't save pvsched info. + * here we just reuse vcpu->arch.pvsched.pv_unhalted, and clear it in pvsched_restore_all_gpa + */ +static int vgic_mmio_uaccess_write_waker(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + vcpu->arch.pvsched.pv_unhalted = !!(val & GICR_WAKER_PVSCHED_VALID); + + return 0; +} + /* * The GICv3 per-IRQ registers are split to control PPIs and SGIs in the * redistributors, while SPIs are covered by registers in the distributor @@ -648,8 +667,9 @@ static const struct vgic_register_region vgic_v3_rd_registers[] = { vgic_mmio_read_v3r_typer, vgic_mmio_write_wi, vgic_uaccess_read_v3r_typer, vgic_mmio_uaccess_write_wi, 8, VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_LENGTH(GICR_WAKER, - vgic_mmio_read_raz, vgic_mmio_write_wi, 4, + REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_WAKER, + vgic_mmio_read_raz, vgic_mmio_write_wi, + vgic_mmio_uaccess_read_waker, vgic_mmio_uaccess_write_waker, 4, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_LENGTH(GICR_PROPBASER, vgic_mmio_read_propbase, vgic_mmio_write_propbase, 8, diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 88b02e3b81da..3847c2de7694 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -147,6 +147,7 @@ #define GICR_WAKER_ProcessorSleep (1U << 1) #define GICR_WAKER_ChildrenAsleep (1U << 2) +#define GICR_WAKER_PVSCHED_VALID (1U << 31) #define GIC_BASER_CACHE_nCnB 0ULL #define GIC_BASER_CACHE_SameAsInner 0ULL -- 2.33.0

From: Yanan Wang <wangyanan55@huawei.com> virt inclusion category: performance bugzilla: https://gitee.com/openeuler/kernel/issues/IBNSBL -------------------------------- Pv_preempted can not ensure performance improvement in any scenario, so add a module param to enable/disable pv_preempted dynamically if we don't need it. Signed-off-by: Yanan Wang <wangyanan55@huawei.com> Signed-off-by: Dongxu Sun <sundongxu3@huawei.com> --- arch/arm64/include/asm/kvm_host.h | 5 ++++- arch/arm64/kvm/arm.c | 27 ++++++++++++++++++++++++--- arch/arm64/kvm/pvsched.c | 2 ++ 3 files changed, 30 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index f171ab3d0d37..aa69338f6628 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -412,6 +412,7 @@ struct kvm_vcpu_arch { /* Guest PV sched state */ struct { bool pv_unhalted; + bool preempted; gpa_t base; } pvsched; @@ -645,12 +646,14 @@ long kvm_hypercall_pvsched_features(struct kvm_vcpu *vcpu); void kvm_update_pvsched_preempted(struct kvm_vcpu *vcpu, u32 preempted); long kvm_pvsched_kick_vcpu(struct kvm_vcpu *vcpu); +extern bool pv_preempted_enable; static inline void kvm_arm_pvsched_vcpu_init(struct kvm_vcpu_arch *vcpu_arch) { vcpu_arch->pvsched.base = GPA_INVALID; + vcpu_arch->pvsched.preempted = false; } -static inline bool kvm_arm_is_pvsched_enabled(struct kvm_vcpu_arch *vcpu_arch) +static inline bool kvm_arm_is_pvsched_valid(struct kvm_vcpu_arch *vcpu_arch) { return (vcpu_arch->pvsched.base != GPA_INVALID); } diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 240edaa9eb50..d6962ea83e18 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -84,6 +84,15 @@ unsigned int twedel = 0; module_param(twedel, uint, S_IRUGO | S_IWUSR); #endif +static const struct kernel_param_ops pv_preempted_enable_ops = { + .set = param_set_bool, + .get = param_get_bool, +}; + +bool pv_preempted_enable = true; +MODULE_PARM_DESC(pv_preempted_enable, "bool"); +module_param_cb(pv_preempted_enable, &pv_preempted_enable_ops, &pv_preempted_enable, S_IRUGO | S_IWUSR); + static int vcpu_req_reload_wfi_traps(const char *val, const struct kernel_param *kp); static const struct kernel_param_ops force_wfi_trap_ops = { @@ -575,8 +584,20 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (vcpu_has_ptrauth(vcpu)) vcpu_ptrauth_disable(vcpu); - if (kvm_arm_is_pvsched_enabled(&vcpu->arch)) - kvm_update_pvsched_preempted(vcpu, 0); + /* + * When pv_preempted is changed from enabled to disabled, preempted + * state will not be updated in kvm_arch_vcpu_put/load. So we must + * update the preempted state to 0 for every vCPU in case some vCPUs' + * preempted state will always be 1. + */ + if (kvm_arm_is_pvsched_valid(&vcpu->arch)) { + if (pv_preempted_enable) + kvm_update_pvsched_preempted(vcpu, 0); + else { + if (vcpu->arch.pvsched.preempted) + kvm_update_pvsched_preempted(vcpu, 0); + } + } #ifdef CONFIG_KVM_HISI_VIRT kvm_hisi_dvmbm_load(vcpu); @@ -600,7 +621,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) vcpu->cpu = -1; - if (kvm_arm_is_pvsched_enabled(&vcpu->arch)) + if (kvm_arm_is_pvsched_valid(&vcpu->arch) && pv_preempted_enable) kvm_update_pvsched_preempted(vcpu, 1); #ifdef CONFIG_KVM_HISI_VIRT diff --git a/arch/arm64/kvm/pvsched.c b/arch/arm64/kvm/pvsched.c index dc1768815467..9693415226d1 100644 --- a/arch/arm64/kvm/pvsched.c +++ b/arch/arm64/kvm/pvsched.c @@ -34,6 +34,8 @@ void kvm_update_pvsched_preempted(struct kvm_vcpu *vcpu, u32 preempted) srcu_read_unlock(&kvm->srcu, idx); pagefault_enable(); + + vcpu->arch.pvsched.preempted = !!preempted; } long kvm_pvsched_kick_vcpu(struct kvm_vcpu *vcpu) -- 2.33.0

反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/15206 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/B... FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/15206 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/B...
participants (2)
-
Dongxu Sun
-
patchwork bot