
From: Jia Qingtong <jiaqingtong@huawei.com> virt inclusion category: performance bugzilla: https://gitee.com/openeuler/kernel/issues/IBNSBL -------------------------------- When migrate, target host's kvm lost pvsched's gpa, which the guest assign to the kvm before migrate when guest booting. This will lead to kvm assume guest doesn't enable pvsched. But guest still assume it has enable pvsched, and use pvsched info in sched. Solve the problem by saving & restore pvsched's gpa when migrate. In this case we decide to hook in gic its save & restore process, which already work in migrate (the qemu will call ioctl to save & restore). we only save pvsched's gpa, and store it in gic pending tables's first 1K area, which was initialized as 0, by offset of 0x0. New version kernel(kernel with this patch) will set GICR_WAKER's bit 31, and migrate to destination. destination check this bit to decide if we could restore pvsched gpa info. Thus we would't restore any pvsched info migrated from old kernel. Signed-off-by: Jia Qingtong <jiaqingtong@huawei.com> Signed-off-by: Dongxu Sun <sundongxu3@huawei.com> --- arch/arm64/kvm/vgic/vgic-its.c | 92 ++++++++++++++++++++++++++++++ arch/arm64/kvm/vgic/vgic-mmio-v3.c | 24 +++++++- include/linux/irqchip/arm-gic-v3.h | 1 + 3 files changed, 115 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index 4805a12e0616..201015e0ebd1 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -2746,6 +2746,96 @@ static int vgic_its_has_attr(struct kvm_device *dev, return -ENXIO; } +/* we use ram just at offset 0x0 */ +#define PVSCHED_GPA_OFFSET 0x0 +static void pvsched_save_all_gpa(struct kvm *kvm) +{ + int i, ret; + struct kvm_vcpu *vcpu; + gpa_t pendbase, ptr, val; + + kvm_for_each_vcpu(i, vcpu, kvm) { + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + + /* + * Quote the GIC spec - "Changing GICR_PENDBASER with + * GICR_CTLR.EnableLPIs == 1 is UNPREDICTABLE." We're pretty + * sure 'pendbaser' is valid with that. + */ + if (!vgic_cpu->lpis_enabled) + continue; + + pendbase = GICR_PENDBASER_ADDRESS(vcpu->arch.vgic_cpu.pendbaser); + ptr = pendbase + PVSCHED_GPA_OFFSET; + + ret = kvm_read_guest_lock(kvm, ptr, &val, sizeof(gpa_t)); + if (ret) { + kvm_err("%s: read from addr[%p] failed. ret[%d]", __func__, (void *)ptr, ret); + continue; + } + /* + * GIC spec states that "Behavior is UNPREDICTABLE if LPI + * Pending tables contains none zeros. Feel free to + * go ahead to corrupt the insane guest. + */ + if (val != 0) { + kvm_err("%s: read[%p] != 0 from addr[%p]", __func__, (void *)val, (void *)ptr); + } + + kvm_debug("%s: origin addr[%p]", __func__, (void *)val); + + /* + * We save the 'pvsched.base' as a generic state, regardless of + * whether it is valid or not. + */ + ret = kvm_write_guest_lock(kvm, ptr, &vcpu->arch.pvsched.base, sizeof(vcpu->arch.pvsched.base)); + kvm_debug("%s: save pvsched.base[%p] into addr[%p], ret = %d\n", __func__, (void *)vcpu->arch.pvsched.base, (void *)ptr, ret); + } +} + +static void pvsched_restore_all_gpa(struct kvm *kvm) +{ + int i, ret; + struct kvm_vcpu *vcpu; + gpa_t pendbase, ptr, val; + + kvm_for_each_vcpu(i, vcpu, kvm) { + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + + if (!vgic_cpu->lpis_enabled) + continue; + + pendbase = GICR_PENDBASER_ADDRESS(vcpu->arch.vgic_cpu.pendbaser); + ptr = pendbase + PVSCHED_GPA_OFFSET; + + ret = kvm_read_guest_lock(kvm, ptr, &val, sizeof(gpa_t)); + if (ret) { + kvm_err("%s: read from addr[%p] failed. ret[%d]", __func__, (void *)ptr, ret); + continue; + } + + kvm_debug("%s: restore [%p] into pvsched.base[%p] from addr[%p]", __func__, (void *)val, (void *)vcpu->arch.pvsched.base, (void *)ptr); + + if (val == 0) + continue; + + /* Here vcpu->arch.pvsched.pv_unhalted means pvsched info saved in pending tabls are valid. */ + if (vcpu->arch.pvsched.pv_unhalted) { + vcpu->arch.pvsched.base = val; + kvm_debug("%s: migrate from new version, pending table's pvsched info is valid. Restore it .\n", __func__); + } + + val = 0; + ret = kvm_write_guest_lock(kvm, ptr, &val, sizeof(vcpu->arch.pvsched.base)); + if (ret) + kvm_err("%s: restore 0 into addr[%p] failed. ret[%d]", __func__, (void *)ptr, ret); + } + kvm_for_each_vcpu(i, vcpu, kvm) { + /* Now vcpu->arch.pvsched.pv_unhalted mean original pvsched.pv_unhalted */ + vcpu->arch.pvsched.pv_unhalted = false; + } +} + static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr) { const struct vgic_its_abi *abi = vgic_its_get_abi(its); @@ -2769,9 +2859,11 @@ static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr) break; case KVM_DEV_ARM_ITS_SAVE_TABLES: ret = abi->save_tables(its); + pvsched_save_all_gpa(kvm); break; case KVM_DEV_ARM_ITS_RESTORE_TABLES: ret = abi->restore_tables(its); + pvsched_restore_all_gpa(kvm); break; } diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c index b6bdf22abab1..526fc05f8878 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -552,6 +552,25 @@ static void vgic_mmio_write_pendbase(struct kvm_vcpu *vcpu, pendbaser) != old_pendbaser); } +static unsigned long vgic_mmio_uaccess_read_waker(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + return GICR_WAKER_PVSCHED_VALID; +} + +/* If pvsched info stored in pending table valid. + * When migrate from old version, since kernel don't save pvsched info. + * here we just reuse vcpu->arch.pvsched.pv_unhalted, and clear it in pvsched_restore_all_gpa + */ +static int vgic_mmio_uaccess_write_waker(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + vcpu->arch.pvsched.pv_unhalted = !!(val & GICR_WAKER_PVSCHED_VALID); + + return 0; +} + /* * The GICv3 per-IRQ registers are split to control PPIs and SGIs in the * redistributors, while SPIs are covered by registers in the distributor @@ -648,8 +667,9 @@ static const struct vgic_register_region vgic_v3_rd_registers[] = { vgic_mmio_read_v3r_typer, vgic_mmio_write_wi, vgic_uaccess_read_v3r_typer, vgic_mmio_uaccess_write_wi, 8, VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_LENGTH(GICR_WAKER, - vgic_mmio_read_raz, vgic_mmio_write_wi, 4, + REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_WAKER, + vgic_mmio_read_raz, vgic_mmio_write_wi, + vgic_mmio_uaccess_read_waker, vgic_mmio_uaccess_write_waker, 4, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_LENGTH(GICR_PROPBASER, vgic_mmio_read_propbase, vgic_mmio_write_propbase, 8, diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 88b02e3b81da..3847c2de7694 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -147,6 +147,7 @@ #define GICR_WAKER_ProcessorSleep (1U << 1) #define GICR_WAKER_ChildrenAsleep (1U << 2) +#define GICR_WAKER_PVSCHED_VALID (1U << 31) #define GIC_BASER_CACHE_nCnB 0ULL #define GIC_BASER_CACHE_SameAsInner 0ULL -- 2.33.0