[PATCH v1 OLK-6.6 0/5] Support the FEAT_HDBSS introduced in Armv9.5

From: eillon <yezhenyu2@huawei.com> hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IB103D CVE: NA This series of patches add support to the Hardware Dirty state tracking Structure(HDBSS) feature, which is introduced by the ARM architecture in the DDI0601(ID121123) version. The HDBSS feature is an extension to the architecture that enhances tracking translation table descriptors' dirty state, identified as FEAT_HDBSS. The goal of this feature is to reduce the cost of surveying for dirtied granules, with minimal effect on recording when a granule has been dirtied. The purpose of this feature is to make the execution overhead of live migration lower to both the guest and the host, compared to existing approaches (write-protect or search stage 2 tables). After these patches, users(such as qemu) can use the KVM_CAP_ARM_HW_DIRTY_STATE_TRACK ioctl to enable or disable the HDBSS feature before and after the live migration. See patches for details, Thanks. eillon (5): arm64/sysreg: add HDBSS related register information arm64/kvm: support set the DBM attr during memory abort arm64/kvm: using ioctl to enable/disable the HDBSS feature arm64/kvm: support to handle the HDBSSF event arm64/config: add config to control whether enable HDBSS feature arch/arm64/Kconfig | 12 ++++ arch/arm64/include/asm/cpufeature.h | 14 +++++ arch/arm64/include/asm/esr.h | 2 + arch/arm64/include/asm/kvm_arm.h | 1 + arch/arm64/include/asm/kvm_host.h | 8 +++ arch/arm64/include/asm/kvm_mmu.h | 14 +++++ arch/arm64/include/asm/kvm_pgtable.h | 1 + arch/arm64/include/asm/sysreg.h | 14 +++++ arch/arm64/kvm/arm.c | 88 ++++++++++++++++++++++++++++ arch/arm64/kvm/handle_exit.c | 50 ++++++++++++++++ arch/arm64/kvm/hyp/pgtable.c | 12 ++++ arch/arm64/kvm/hyp/vhe/switch.c | 3 + arch/arm64/kvm/hyp/vhe/sysreg-sr.c | 4 ++ arch/arm64/kvm/mmu.c | 17 +++++- arch/arm64/kvm/reset.c | 9 +++ arch/arm64/tools/sysreg | 28 +++++++++ include/linux/kvm_host.h | 3 + include/uapi/linux/kvm.h | 2 + tools/include/uapi/linux/kvm.h | 2 + 19 files changed, 283 insertions(+), 1 deletion(-) -- 2.39.3

From: eillon <yezhenyu2@huawei.com> The ARM architecture added the HDBSS feature and descriptions of related registers (HDBSSBR/HDBSSPROD) in the DDI0601(ID121123) version, add them to Linux. Signed-off-by: eillon <yezhenyu2@huawei.com> --- arch/arm64/include/asm/esr.h | 2 ++ arch/arm64/include/asm/kvm_arm.h | 1 + arch/arm64/tools/sysreg | 28 ++++++++++++++++++++++++++++ 3 files changed, 31 insertions(+) diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index d5c30ee0985f..276eb39ed9a7 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -148,6 +148,8 @@ #define ESR_ELx_CM (UL(1) << ESR_ELx_CM_SHIFT) /* ISS2 field definitions for Data Aborts */ +#define ESR_ELx_HDBSSF_SHIFT (11) +#define ESR_ELx_HDBSSF (UL(1) << ESR_ELx_HDBSSF_SHIFT) #define ESR_ELx_TnD_SHIFT (10) #define ESR_ELx_TnD (UL(1) << ESR_ELx_TnD_SHIFT) #define ESR_ELx_TagAccess_SHIFT (9) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 9385293e6d45..1f38f8b6cc2d 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -128,6 +128,7 @@ TCR_EL2_ORGN0_MASK | TCR_EL2_IRGN0_MASK | TCR_EL2_T0SZ_MASK) /* VTCR_EL2 Registers bits */ +#define VTCR_EL2_HDBSS (1UL << 45) #define VTCR_EL2_RES1 (1U << 31) #define VTCR_EL2_HD (1 << 22) #define VTCR_EL2_HA (1 << 21) diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index baded26ab7d7..963177485fd8 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -2477,6 +2477,34 @@ Sysreg SMCR_EL2 3 4 1 2 6 Fields SMCR_ELx EndSysreg +Sysreg HDBSSBR_EL2 3 4 2 3 2 +Res0 63:56 +Field 55:12 BADDR +Res0 11:4 +Enum 3:0 SZ + 0b0001 8KB + 0b0010 16KB + 0b0011 32KB + 0b0100 64KB + 0b0101 128KB + 0b0110 256KB + 0b0111 512KB + 0b1000 1MB + 0b1001 2MB +EndEnum +EndSysreg + +Sysreg HDBSSPROD_EL2 3 4 2 3 3 +Res0 63:32 +Enum 31:26 FSC + 0b000000 OK + 0b010000 ExternalAbort + 0b101000 GPF +EndEnum +Res0 25:19 +Field 18:0 INDEX +EndSysreg + Sysreg DACR32_EL2 3 4 3 0 0 Res0 63:32 Field 31:30 D15 -- 2.39.3

From: eillon <yezhenyu2@huawei.com> Since the ARMv8, the page entry has supported the DBM attribute. Support set the attr during user_mem_abort(). Signed-off-by: eillon <yezhenyu2@huawei.com> --- arch/arm64/include/asm/kvm_pgtable.h | 1 + arch/arm64/kvm/hyp/pgtable.c | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index d3e354bb8351..4a03e4801127 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -180,6 +180,7 @@ enum kvm_pgtable_prot { KVM_PGTABLE_PROT_R = BIT(2), KVM_PGTABLE_PROT_DEVICE = BIT(3), + KVM_PGTABLE_PROT_DBM = BIT(4), KVM_PGTABLE_PROT_SW0 = BIT(55), KVM_PGTABLE_PROT_SW1 = BIT(56), diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index d79feecc2f51..056b21a1ca92 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -44,6 +44,8 @@ #define KVM_PTE_LEAF_ATTR_HI_S2_XN BIT(54) +#define KVM_PTE_LEAF_ATTR_HI_S2_DBM BIT(51) + #define KVM_PTE_LEAF_ATTR_HI_S1_GP BIT(50) #define KVM_PTE_LEAF_ATTR_S2_PERMS (KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | \ @@ -711,6 +713,9 @@ static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot p if (prot & KVM_PGTABLE_PROT_W) attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W; + if (prot & KVM_PGTABLE_PROT_DBM) + attr |= KVM_PTE_LEAF_ATTR_HI_S2_DBM; + attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh); attr |= KVM_PTE_LEAF_ATTR_LO_S2_AF; attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW; @@ -1315,6 +1320,9 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, if (prot & KVM_PGTABLE_PROT_W) set |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W; + if (prot & KVM_PGTABLE_PROT_DBM) + set |= KVM_PTE_LEAF_ATTR_HI_S2_DBM; + if (prot & KVM_PGTABLE_PROT_X) clr |= KVM_PTE_LEAF_ATTR_HI_S2_XN; -- 2.39.3

From: eillon <yezhenyu2@huawei.com> In ARM64, the buffer size corresponding to the HDBSS feature is configurable. Therefore, we cannot enable the HDBSS feature during KVM initialization, but we should enable it when triggering a live migration, where the buffer size can be configured by the user. The KVM_CAP_ARM_HW_DIRTY_STATE_TRACK ioctl is added to enable/disable this feature. Users (such as qemu) can invoke the ioctl to enable HDBSS at the beginning of the migration and disable the feature by invoking the ioctl again at the end of the migration with size set to 0. Signed-off-by: eillon <yezhenyu2@huawei.com> --- arch/arm64/include/asm/cpufeature.h | 12 +++++ arch/arm64/include/asm/kvm_host.h | 5 +++ arch/arm64/include/asm/kvm_mmu.h | 12 +++++ arch/arm64/include/asm/sysreg.h | 12 +++++ arch/arm64/kvm/arm.c | 70 +++++++++++++++++++++++++++++ arch/arm64/kvm/hyp/vhe/switch.c | 1 + arch/arm64/kvm/hyp/vhe/sysreg-sr.c | 2 + arch/arm64/kvm/mmu.c | 3 ++ arch/arm64/kvm/reset.c | 7 +++ include/linux/kvm_host.h | 1 + include/uapi/linux/kvm.h | 2 + tools/include/uapi/linux/kvm.h | 2 + 12 files changed, 129 insertions(+) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index d68307a50d13..d2b9771b4a82 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -752,6 +752,18 @@ static __always_inline bool system_supports_fpsimd(void) return !cpus_have_const_cap(ARM64_HAS_NO_FPSIMD); } +static inline bool system_supports_hdbss(void) +{ + u64 mmfr1; + u32 val; + + mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); + val = cpuid_feature_extract_unsigned_field(mmfr1, + ID_AA64MMFR1_EL1_HAFDBS_SHIFT); + + return val == ID_AA64MMFR1_EL1_HAFDBS_HDBSS; +} + static inline bool system_uses_hw_pan(void) { return IS_ENABLED(CONFIG_ARM64_PAN) && diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index ac4e59256f8e..51f99f3d824a 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -641,6 +641,11 @@ struct kvm_vcpu_arch { #ifdef CONFIG_HISI_VIRTCCA_HOST struct virtcca_cvm_tec tec; #endif + /* HDBSS registers info */ + struct { + u64 br_el2; + u64 prod_el2; + } hdbss; }; /* diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index d698ce35deb8..a76bc71010e7 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -310,6 +310,18 @@ static __always_inline void __load_stage2(struct kvm_s2_mmu *mmu, asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT)); } +static __always_inline void __load_hdbss(struct kvm_vcpu *vcpu) +{ + if (!vcpu->kvm->enable_hdbss) + return; + + write_sysreg_s(vcpu->arch.hdbss.br_el2, SYS_HDBSSBR_EL2); + write_sysreg_s(vcpu->arch.hdbss.prod_el2, SYS_HDBSSPROD_EL2); + + dsb(sy); + isb(); +} + static inline struct kvm *kvm_s2_mmu_to_kvm(struct kvm_s2_mmu *mmu) { return container_of(mmu->arch, struct kvm, arch); diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 435634a703c6..8494aac11824 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -1031,6 +1031,18 @@ #define PIRx_ELx_PERM(idx, perm) ((perm) << ((idx) * 4)) +/* + * Definitions for the HDBSS feature + */ +#define HDBSS_MAX_SIZE HDBSSBR_EL2_SZ_2MB + +#define HDBSSBR_EL2(baddr, sz) (((baddr) & GENMASK(55, 12 + sz)) | \ + ((sz) << HDBSSBR_EL2_SZ_SHIFT)) +#define HDBSSBR_BADDR(br) ((br) & GENMASK(55, (12 + HDBSSBR_SZ(br)))) +#define HDBSSBR_SZ(br) (((br) & HDBSSBR_EL2_SZ_MASK) >> HDBSSBR_EL2_SZ_SHIFT) + +#define HDBSSPROD_IDX(prod) (((prod) & HDBSSPROD_EL2_INDEX_MASK) >> HDBSSPROD_EL2_INDEX_SHIFT) + #define ARM64_FEATURE_FIELD_BITS 4 /* Defined for compatibility only, do not add new users. */ diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 5ba336d1efad..d72c6c72e60a 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -129,6 +129,70 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; } +static int kvm_cap_arm_enable_hdbss(struct kvm *kvm, + struct kvm_enable_cap *cap) +{ + unsigned long i; + struct kvm_vcpu *vcpu; + struct page *hdbss_pg; + int size = cap->args[0]; + + if (!system_supports_hdbss()) { + kvm_err("This system does not support HDBSS!\n"); + return -EINVAL; + } + + if (size < 0 || size > HDBSS_MAX_SIZE) { + kvm_err("Invalid HDBSS buffer size: %d!\n", size); + return -EINVAL; + } + + /* Enable the HDBSS feature if size > 0, otherwise disable it. */ + if (size) { + kvm->enable_hdbss = true; + kvm->arch.vtcr |= VTCR_EL2_HD | VTCR_EL2_HDBSS; + + kvm_for_each_vcpu(i, vcpu, kvm) { + hdbss_pg = alloc_pages(GFP_KERNEL, size); + if (!hdbss_pg) { + kvm_err("Alloc HDBSS buffer failed!\n"); + return -EINVAL; + } + + vcpu->arch.hdbss.br_el2 = HDBSSBR_EL2(page_to_phys(hdbss_pg), size); + vcpu->arch.hdbss.prod_el2 = 0; + + /* + * We should kick vcpus out of guest mode here to + * load new vtcr value to vtcr_el2 register when + * re-enter guest mode. + */ + kvm_vcpu_kick(vcpu); + } + + kvm_info("Enable HDBSS success, HDBSS buffer size: %d\n", size); + } else if (kvm->enable_hdbss) { + kvm->arch.vtcr &= ~(VTCR_EL2_HD | VTCR_EL2_HDBSS); + + kvm_for_each_vcpu(i, vcpu, kvm) { + /* Kick vcpus to flush hdbss buffer. */ + kvm_vcpu_kick(vcpu); + + hdbss_pg = phys_to_page(HDBSSBR_BADDR(vcpu->arch.hdbss.br_el2)); + if (hdbss_pg) + __free_pages(hdbss_pg, HDBSSBR_SZ(vcpu->arch.hdbss.br_el2)); + + vcpu->arch.hdbss.br_el2 = 0; + vcpu->arch.hdbss.prod_el2 = 0; + } + + kvm->enable_hdbss = false; + kvm_info("Disable HDBSS success\n"); + } + + return 0; +} + int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) { @@ -183,6 +247,9 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, r = kvm_cvm_enable_cap(kvm, cap); break; #endif + case KVM_CAP_ARM_HW_DIRTY_STATE_TRACK: + r = kvm_cap_arm_enable_hdbss(kvm, cap); + break; default: r = -EINVAL; break; @@ -436,6 +503,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = static_key_enabled(&virtcca_cvm_is_available); break; #endif + case KVM_CAP_ARM_HW_DIRTY_STATE_TRACK: + r = system_supports_hdbss(); + break; default: r = 0; } diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 821e366b54f2..3d6f5891010e 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -225,6 +225,7 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) * __activate_traps clear HCR_EL2.TGE (among other things). */ __load_stage2(vcpu->arch.hw_mmu, vcpu->arch.hw_mmu->arch); + __load_hdbss(vcpu); __activate_traps(vcpu); __kvm_adjust_pc(vcpu); diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c index 5cb4b70e0aef..236d07c1b0b8 100644 --- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c @@ -92,6 +92,8 @@ void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu) __sysreg_restore_el1_state(guest_ctxt); __mpam_guest_load(); + __load_hdbss(vcpu); + vcpu_set_flag(vcpu, SYSREGS_ON_CPU); activate_traps_vhe_load(vcpu); diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 65554248cb7f..ea7ab43154cc 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1586,6 +1586,9 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (writable) prot |= KVM_PGTABLE_PROT_W; + if (kvm->enable_hdbss && logging_active) + prot |= KVM_PGTABLE_PROT_DBM; + if (exec_fault) prot |= KVM_PGTABLE_PROT_X; diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index d38e74db97c2..806080553bc1 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -162,6 +162,7 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu) void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu) { void *sve_state = vcpu->arch.sve_state; + struct page *hdbss_pg; kvm_vcpu_unshare_task_fp(vcpu); kvm_unshare_hyp(vcpu, vcpu + 1); @@ -173,6 +174,12 @@ void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu) if (vcpu_is_tec(vcpu)) kvm_destroy_tec(vcpu); #endif + + if (vcpu->arch.hdbss.br_el2) { + hdbss_pg = phys_to_page(HDBSSBR_BADDR(vcpu->arch.hdbss.br_el2)); + if (hdbss_pg) + __free_pages(hdbss_pg, HDBSSBR_SZ(vcpu->arch.hdbss.br_el2)); + } } static void kvm_vcpu_reset_sve(struct kvm_vcpu *vcpu) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c848eb86c556..7235e88c726f 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -842,6 +842,7 @@ struct kvm { struct notifier_block pm_notifier; #endif char stats_id[KVM_STATS_NAME_SIZE]; + bool enable_hdbss; }; #define kvm_err(fmt, ...) \ diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 1f67e4d6ff7b..52695dea196e 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1210,6 +1210,8 @@ struct kvm_ppc_resize_hpt { /* support request to inject secret to CSV3 guest */ #define KVM_CAP_HYGON_COCO_EXT_CSV3_INJ_SECRET (1 << 2) +#define KVM_CAP_ARM_HW_DIRTY_STATE_TRACK 502 + #define KVM_CAP_ARM_VIRT_MSI_BYPASS 799 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 3a2b617b6429..bd1a496b5448 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -1193,6 +1193,8 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE 228 #define KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES 229 +#define KVM_CAP_ARM_HW_DIRTY_STATE_TRACK 502 + #ifdef KVM_CAP_IRQ_ROUTING struct kvm_irq_routing_irqchip { -- 2.39.3

From: eillon <yezhenyu2@huawei.com> Updating the dirty bitmap based on the HDBSS buffer. Similar to the implementation of the x86 pml feature, KVM flushes the buffers on all VM-Exits, thus we only need to kick running vCPUs to force a VM-Exit. Signed-off-by: eillon <yezhenyu2@huawei.com> --- arch/arm64/kvm/arm.c | 10 ++++++++ arch/arm64/kvm/handle_exit.c | 47 ++++++++++++++++++++++++++++++++++++ arch/arm64/kvm/mmu.c | 10 +++++++- 3 files changed, 66 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index d72c6c72e60a..f7206ab91c9c 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1837,7 +1837,17 @@ long kvm_arch_vcpu_ioctl(struct file *filp, void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) { + /* + * Flush all CPUs' dirty log buffers to the dirty_bitmap. Called + * before reporting dirty_bitmap to userspace. KVM flushes the buffers + * on all VM-Exits, thus we only need to kick running vCPUs to force a + * VM-Exit. + */ + struct kvm_vcpu *vcpu; + unsigned long i; + kvm_for_each_vcpu(i, vcpu, kvm) + kvm_vcpu_kick(vcpu); } static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm, diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 90959b8b6228..c9b3ce381018 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -292,6 +292,50 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu) return arm_exit_handlers[esr_ec]; } +#define HDBSS_ENTRY_VALID_SHIFT 0 +#define HDBSS_ENTRY_VALID_MASK (1UL << HDBSS_ENTRY_VALID_SHIFT) +#define HDBSS_ENTRY_IPA_SHIFT 12 +#define HDBSS_ENTRY_IPA_MASK GENMASK_ULL(55, HDBSS_ENTRY_IPA_SHIFT) + +static void kvm_flush_hdbss_buffer(struct kvm_vcpu *vcpu) +{ + int idx, curr_idx; + u64 *hdbss_buf; + + if (!vcpu->kvm->enable_hdbss) + return; + + dsb(sy); + isb(); + curr_idx = HDBSSPROD_IDX(read_sysreg_s(SYS_HDBSSPROD_EL2)); + + /* Do nothing if HDBSS buffer is empty or br_el2 is NULL */ + if (curr_idx == 0 || vcpu->arch.hdbss.br_el2 == 0) + return; + + hdbss_buf = page_address(phys_to_page(HDBSSBR_BADDR(vcpu->arch.hdbss.br_el2))); + if (!hdbss_buf) { + kvm_err("Enter flush hdbss buffer with buffer == NULL!"); + return; + } + + for (idx = 0; idx < curr_idx; idx++) { + u64 gpa; + + gpa = hdbss_buf[idx]; + if (!(gpa & HDBSS_ENTRY_VALID_MASK)) + continue; + + gpa = gpa & HDBSS_ENTRY_IPA_MASK; + kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT); + } + + /* reset HDBSS index */ + write_sysreg_s(0, SYS_HDBSSPROD_EL2); + dsb(sy); + isb(); +} + /* * We may be single-stepping an emulated instruction. If the emulation * has been completed in the kernel, we can return to userspace with a @@ -327,6 +371,9 @@ int handle_exit(struct kvm_vcpu *vcpu, int exception_index) { struct kvm_run *run = vcpu->run; + if (vcpu->kvm->enable_hdbss) + kvm_flush_hdbss_buffer(vcpu); + if (ARM_SERROR_PENDING(exception_index)) { /* * The SError is handled by handle_exit_early(). If the guest diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index ea7ab43154cc..0347ab14cf11 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1660,7 +1660,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) unsigned long fault_status; phys_addr_t fault_ipa; struct kvm_memory_slot *memslot; - unsigned long hva; + unsigned long hva, iss2; bool is_iabt, write_fault, writable; gfn_t gfn; int ret, idx; @@ -1670,6 +1670,14 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); is_iabt = kvm_vcpu_trap_is_iabt(vcpu); + /* + * HDBSS buffer already flushed when enter handle_trap_exceptions(). + * Nothing to do here. + */ + iss2 = ESR_ELx_ISS2(kvm_vcpu_get_esr(vcpu)); + if (fault_status == ESR_ELx_FSC_PERM && (iss2 & ESR_ELx_HDBSSF)) + return 1; + if (fault_status == ESR_ELx_FSC_FAULT) { /* Beyond sanitised PARange (which is the IPA limit) */ if (fault_ipa >= BIT_ULL(get_kvm_ipa_limit())) { -- 2.39.3

From: eillon <yezhenyu2@huawei.com> The HDBSS feature introduces new assembly registers (HDBSSBR_EL2 and HDBSSPROD_EL2), which depends on the armv9.5-a compilation support. So add ARM64_HDBSS config to control whether enable the HDBSS feature. Signed-off-by: eillon <yezhenyu2@huawei.com> --- arch/arm64/Kconfig | 12 ++++++++++++ arch/arm64/include/asm/cpufeature.h | 2 ++ arch/arm64/include/asm/kvm_host.h | 3 +++ arch/arm64/include/asm/kvm_mmu.h | 2 ++ arch/arm64/include/asm/sysreg.h | 2 ++ arch/arm64/kvm/arm.c | 8 ++++++++ arch/arm64/kvm/handle_exit.c | 5 ++++- arch/arm64/kvm/hyp/pgtable.c | 4 ++++ arch/arm64/kvm/hyp/vhe/switch.c | 2 ++ arch/arm64/kvm/hyp/vhe/sysreg-sr.c | 2 ++ arch/arm64/kvm/mmu.c | 4 ++++ arch/arm64/kvm/reset.c | 2 ++ include/linux/kvm_host.h | 2 ++ 13 files changed, 49 insertions(+), 1 deletion(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 9da9d58f1c02..7cbdd0b6259e 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -2376,6 +2376,18 @@ config ARM64_HAFT endmenu # "ARMv8.8 architectural features" +menu "ARMv9.5 architectural features" + +config ARM64_HDBSS + bool "Enable support for Hardware Dirty state tracking Structure (HDBSS)" + default y + help + Hardware Dirty state tracking Structure(HDBSS) enhances tracking + translation table descriptors’ dirty state to reduce the cost of + surveying for dirtied granules. + +endmenu # "ARMv9.5 architectural features" + config ARM64_SVE bool "ARM Scalable Vector Extension support" default y diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index d2b9771b4a82..a58c8b332b21 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -752,6 +752,7 @@ static __always_inline bool system_supports_fpsimd(void) return !cpus_have_const_cap(ARM64_HAS_NO_FPSIMD); } +#ifdef CONFIG_ARM64_HDBSS static inline bool system_supports_hdbss(void) { u64 mmfr1; @@ -763,6 +764,7 @@ static inline bool system_supports_hdbss(void) return val == ID_AA64MMFR1_EL1_HAFDBS_HDBSS; } +#endif static inline bool system_uses_hw_pan(void) { diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 51f99f3d824a..7c7e994cf12c 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -641,11 +641,14 @@ struct kvm_vcpu_arch { #ifdef CONFIG_HISI_VIRTCCA_HOST struct virtcca_cvm_tec tec; #endif + +#ifdef CONFIG_ARM64_HDBSS /* HDBSS registers info */ struct { u64 br_el2; u64 prod_el2; } hdbss; +#endif }; /* diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index a76bc71010e7..822d958b55e6 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -310,6 +310,7 @@ static __always_inline void __load_stage2(struct kvm_s2_mmu *mmu, asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT)); } +#ifdef CONFIG_ARM64_HDBSS static __always_inline void __load_hdbss(struct kvm_vcpu *vcpu) { if (!vcpu->kvm->enable_hdbss) @@ -321,6 +322,7 @@ static __always_inline void __load_hdbss(struct kvm_vcpu *vcpu) dsb(sy); isb(); } +#endif static inline struct kvm *kvm_s2_mmu_to_kvm(struct kvm_s2_mmu *mmu) { diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 8494aac11824..0d36f20ecd33 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -1031,6 +1031,7 @@ #define PIRx_ELx_PERM(idx, perm) ((perm) << ((idx) * 4)) +#ifdef CONFIG_ARM64_HDBSS /* * Definitions for the HDBSS feature */ @@ -1042,6 +1043,7 @@ #define HDBSSBR_SZ(br) (((br) & HDBSSBR_EL2_SZ_MASK) >> HDBSSBR_EL2_SZ_SHIFT) #define HDBSSPROD_IDX(prod) (((prod) & HDBSSPROD_EL2_INDEX_MASK) >> HDBSSPROD_EL2_INDEX_SHIFT) +#endif #define ARM64_FEATURE_FIELD_BITS 4 diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index f7206ab91c9c..d0d4e6bdc06b 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -129,6 +129,7 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; } +#ifdef CONFIG_ARM64_HDBSS static int kvm_cap_arm_enable_hdbss(struct kvm *kvm, struct kvm_enable_cap *cap) { @@ -192,6 +193,7 @@ static int kvm_cap_arm_enable_hdbss(struct kvm *kvm, return 0; } +#endif int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) @@ -247,9 +249,11 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, r = kvm_cvm_enable_cap(kvm, cap); break; #endif +#ifdef CONFIG_ARM64_HDBSS case KVM_CAP_ARM_HW_DIRTY_STATE_TRACK: r = kvm_cap_arm_enable_hdbss(kvm, cap); break; +#endif default: r = -EINVAL; break; @@ -503,9 +507,11 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = static_key_enabled(&virtcca_cvm_is_available); break; #endif +#ifdef CONFIG_ARM64_HDBSS case KVM_CAP_ARM_HW_DIRTY_STATE_TRACK: r = system_supports_hdbss(); break; +#endif default: r = 0; } @@ -1837,6 +1843,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) { +#ifdef CONFIG_ARM64_HDBSS /* * Flush all CPUs' dirty log buffers to the dirty_bitmap. Called * before reporting dirty_bitmap to userspace. KVM flushes the buffers @@ -1848,6 +1855,7 @@ void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) kvm_for_each_vcpu(i, vcpu, kvm) kvm_vcpu_kick(vcpu); +#endif } static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm, diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index c9b3ce381018..6919c3858193 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -292,6 +292,7 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu) return arm_exit_handlers[esr_ec]; } +#ifdef CONFIG_ARM64_HDBSS #define HDBSS_ENTRY_VALID_SHIFT 0 #define HDBSS_ENTRY_VALID_MASK (1UL << HDBSS_ENTRY_VALID_SHIFT) #define HDBSS_ENTRY_IPA_SHIFT 12 @@ -335,6 +336,7 @@ static void kvm_flush_hdbss_buffer(struct kvm_vcpu *vcpu) dsb(sy); isb(); } +#endif /* * We may be single-stepping an emulated instruction. If the emulation @@ -371,9 +373,10 @@ int handle_exit(struct kvm_vcpu *vcpu, int exception_index) { struct kvm_run *run = vcpu->run; +#ifdef CONFIG_ARM64_HDBSS if (vcpu->kvm->enable_hdbss) kvm_flush_hdbss_buffer(vcpu); - +#endif if (ARM_SERROR_PENDING(exception_index)) { /* * The SError is handled by handle_exit_early(). If the guest diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 056b21a1ca92..874244df723e 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -713,8 +713,10 @@ static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot p if (prot & KVM_PGTABLE_PROT_W) attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W; +#ifdef CONFIG_ARM64_HDBSS if (prot & KVM_PGTABLE_PROT_DBM) attr |= KVM_PTE_LEAF_ATTR_HI_S2_DBM; +#endif attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh); attr |= KVM_PTE_LEAF_ATTR_LO_S2_AF; @@ -1320,8 +1322,10 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, if (prot & KVM_PGTABLE_PROT_W) set |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W; +#ifdef CONFIG_ARM64_HDBSS if (prot & KVM_PGTABLE_PROT_DBM) set |= KVM_PTE_LEAF_ATTR_HI_S2_DBM; +#endif if (prot & KVM_PGTABLE_PROT_X) clr |= KVM_PTE_LEAF_ATTR_HI_S2_XN; diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 3d6f5891010e..9d315bd54e00 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -225,7 +225,9 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) * __activate_traps clear HCR_EL2.TGE (among other things). */ __load_stage2(vcpu->arch.hw_mmu, vcpu->arch.hw_mmu->arch); +#ifdef CONFIG_ARM64_HDBSS __load_hdbss(vcpu); +#endif __activate_traps(vcpu); __kvm_adjust_pc(vcpu); diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c index 236d07c1b0b8..283e19127591 100644 --- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c @@ -92,7 +92,9 @@ void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu) __sysreg_restore_el1_state(guest_ctxt); __mpam_guest_load(); +#ifdef CONFIG_ARM64_HDBSS __load_hdbss(vcpu); +#endif vcpu_set_flag(vcpu, SYSREGS_ON_CPU); diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 0347ab14cf11..3830aa0b07a0 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1586,8 +1586,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (writable) prot |= KVM_PGTABLE_PROT_W; +#ifdef CONFIG_ARM64_HDBSS if (kvm->enable_hdbss && logging_active) prot |= KVM_PGTABLE_PROT_DBM; +#endif if (exec_fault) prot |= KVM_PGTABLE_PROT_X; @@ -1670,6 +1672,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); is_iabt = kvm_vcpu_trap_is_iabt(vcpu); +#ifdef CONFIG_ARM64_HDBSS /* * HDBSS buffer already flushed when enter handle_trap_exceptions(). * Nothing to do here. @@ -1677,6 +1680,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) iss2 = ESR_ELx_ISS2(kvm_vcpu_get_esr(vcpu)); if (fault_status == ESR_ELx_FSC_PERM && (iss2 & ESR_ELx_HDBSSF)) return 1; +#endif if (fault_status == ESR_ELx_FSC_FAULT) { /* Beyond sanitised PARange (which is the IPA limit) */ diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 806080553bc1..0de1094d4e19 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -175,11 +175,13 @@ void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu) kvm_destroy_tec(vcpu); #endif +#ifdef CONFIG_ARM64_HDBSS if (vcpu->arch.hdbss.br_el2) { hdbss_pg = phys_to_page(HDBSSBR_BADDR(vcpu->arch.hdbss.br_el2)); if (hdbss_pg) __free_pages(hdbss_pg, HDBSSBR_SZ(vcpu->arch.hdbss.br_el2)); } +#endif } static void kvm_vcpu_reset_sve(struct kvm_vcpu *vcpu) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 7235e88c726f..560ff9dd2b27 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -842,7 +842,9 @@ struct kvm { struct notifier_block pm_notifier; #endif char stats_id[KVM_STATS_NAME_SIZE]; +#ifdef CONFIG_ARM64_HDBSS bool enable_hdbss; +#endif }; #define kvm_err(fmt, ...) \ -- 2.39.3

反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/15734 邮件列表地址:https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/MIP... FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/15734 Mailing list address: https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/MIP...
participants (2)
-
patchwork bot
-
Zhenyu Ye