From: Sean Christopherson sean.j.christopherson@intel.com
mainline inclusion from mainline-v5.4-rc1 commit 4af7715110a2617fc40ac2c1232f664019269f3a category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4MKP4 CVE: NA
--------------------------------
When shadow paging is enabled, KVM tracks the allowed access type for MMIO SPTEs so that it can do a permission check on a MMIO GVA cache hit without having to walk the guest's page tables. The tracking is done by retaining the WRITE and USER bits of the access when inserting the MMIO SPTE (read access is implicitly allowed), which allows the MMIO page fault handler to retrieve and cache the WRITE/USER bits from the SPTE.
Unfortunately for EPT, the mask used to retain the WRITE/USER bits is hardcoded using the x86 paging versions of the bits. This funkiness happens to work because KVM uses a completely different mask/value for MMIO SPTEs when EPT is enabled, and the EPT mask/value just happens to overlap exactly with the x86 WRITE/USER bits[*].
Explicitly define the access mask for MMIO SPTEs to accurately reflect that EPT does not want to incorporate any access bits into the SPTE, and so that KVM isn't subtly relying on EPT's WX bits always being set in MMIO SPTEs, e.g. attempting to use other bits for experimentation breaks horribly.
Note, vcpu_match_mmio_gva() explicits prevents matching GVA==0, and all TDP flows explicit set mmio_gva to 0, i.e. zeroing vcpu->arch.access for EPT has no (known) functional impact.
[*] Using WX to generate EPT misconfigurations (equivalent to reserved bit page fault) ensures KVM can employ its MMIO page fault tricks even platforms without reserved address bits.
Fixes: ce88decffd17 ("KVM: MMU: mmio page fault support") Signed-off-by: Sean Christopherson sean.j.christopherson@intel.com Signed-off-by: Paolo Bonzini pbonzini@redhat.com Signed-off-by: Jackie Liu liuyun01@kylinos.cn #openEuler_contributor Signed-off-by: Laibin Qiu qiulaibin@huawei.com Reviewed-by: Zenghui Yu yuzenghui@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/x86/kvm/mmu.c | 15 +++++++++------ arch/x86/kvm/mmu.h | 2 +- arch/x86/kvm/vmx.c | 2 +- 3 files changed, 11 insertions(+), 8 deletions(-)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 357f34904cee3..e04e0195d0241 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -239,6 +239,7 @@ static u64 __read_mostly shadow_accessed_mask; static u64 __read_mostly shadow_dirty_mask; static u64 __read_mostly shadow_mmio_mask; static u64 __read_mostly shadow_mmio_value; +static u64 __read_mostly shadow_mmio_access_mask; static u64 __read_mostly shadow_present_mask; static u64 __read_mostly shadow_me_mask;
@@ -296,11 +297,13 @@ kvm_mmu_calc_root_page_role(struct kvm_vcpu *vcpu); #include "mmutrace.h"
-void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value) +void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value, u64 access_mask) { + BUG_ON((u64)(unsigned)access_mask != access_mask); BUG_ON((mmio_mask & mmio_value) != mmio_value); shadow_mmio_value = mmio_value | SPTE_SPECIAL_MASK; shadow_mmio_mask = mmio_mask | SPTE_SPECIAL_MASK; + shadow_mmio_access_mask = access_mask; } EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask);
@@ -394,7 +397,7 @@ static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn, u64 mask = generation_mmio_spte_mask(gen); u64 gpa = gfn << PAGE_SHIFT;
- access &= ACC_WRITE_MASK | ACC_USER_MASK; + access &= shadow_mmio_access_mask; mask |= shadow_mmio_value | access; mask |= gpa | shadow_nonpresent_or_rsvd_mask; mask |= (gpa & shadow_nonpresent_or_rsvd_mask) @@ -421,8 +424,7 @@ static gfn_t get_mmio_spte_gfn(u64 spte)
static unsigned get_mmio_spte_access(u64 spte) { - u64 mask = generation_mmio_spte_mask(MMIO_SPTE_GEN_MASK) | shadow_mmio_mask; - return (spte & ~mask) & ~PAGE_MASK; + return spte & shadow_mmio_access_mask; }
static bool set_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn, @@ -3328,7 +3330,8 @@ static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, }
if (unlikely(is_noslot_pfn(pfn))) - vcpu_cache_mmio_info(vcpu, gva, gfn, access); + vcpu_cache_mmio_info(vcpu, gva, gfn, + access & shadow_mmio_access_mask);
return false; } @@ -6086,7 +6089,7 @@ static void kvm_set_mmio_spte_mask(void) if (IS_ENABLED(CONFIG_X86_64) && shadow_phys_bits == 52) mask &= ~1ull;
- kvm_mmu_set_mmio_spte_mask(mask, mask); + kvm_mmu_set_mmio_spte_mask(mask, mask, ACC_WRITE_MASK | ACC_USER_MASK); }
int kvm_mmu_module_init(void) diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index f7b2de7b6382f..a8b75ef4499e4 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -56,7 +56,7 @@ static inline u64 rsvd_bits(int s, int e) return ((1ULL << (e - s + 1)) - 1) << s; }
-void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value); +void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value, u64 access_mask);
void reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 4a79d878931a0..98663dc600397 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6593,7 +6593,7 @@ static void ept_set_mmio_spte_mask(void) * of an EPT paging-structure entry is 110b (write/execute). */ kvm_mmu_set_mmio_spte_mask(VMX_EPT_RWX_MASK, - VMX_EPT_MISCONFIG_WX_VALUE); + VMX_EPT_MISCONFIG_WX_VALUE, 0); }
#define VMX_XSS_EXIT_BITMAP 0