From: Sean Christopherson sean.j.christopherson@intel.com
mainline inclusion from mainline-v5.4-rc1 commit 4af7715110a2617fc40ac2c1232f664019269f3a category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4MKP4 CVE: NA
--------------------------------
When shadow paging is enabled, KVM tracks the allowed access type for MMIO SPTEs so that it can do a permission check on a MMIO GVA cache hit without having to walk the guest's page tables. The tracking is done by retaining the WRITE and USER bits of the access when inserting the MMIO SPTE (read access is implicitly allowed), which allows the MMIO page fault handler to retrieve and cache the WRITE/USER bits from the SPTE.
Unfortunately for EPT, the mask used to retain the WRITE/USER bits is hardcoded using the x86 paging versions of the bits. This funkiness happens to work because KVM uses a completely different mask/value for MMIO SPTEs when EPT is enabled, and the EPT mask/value just happens to overlap exactly with the x86 WRITE/USER bits[*].
Explicitly define the access mask for MMIO SPTEs to accurately reflect that EPT does not want to incorporate any access bits into the SPTE, and so that KVM isn't subtly relying on EPT's WX bits always being set in MMIO SPTEs, e.g. attempting to use other bits for experimentation breaks horribly.
Note, vcpu_match_mmio_gva() explicits prevents matching GVA==0, and all TDP flows explicit set mmio_gva to 0, i.e. zeroing vcpu->arch.access for EPT has no (known) functional impact.
[*] Using WX to generate EPT misconfigurations (equivalent to reserved bit page fault) ensures KVM can employ its MMIO page fault tricks even platforms without reserved address bits.
Fixes: ce88decffd17 ("KVM: MMU: mmio page fault support") Signed-off-by: Sean Christopherson sean.j.christopherson@intel.com Signed-off-by: Paolo Bonzini pbonzini@redhat.com Signed-off-by: Jackie Liu liuyun01@kylinos.cn Signed-off-by: Laibin Qiu qiulaibin@huawei.com --- arch/x86/kvm/mmu.c | 15 +++++++++------ arch/x86/kvm/mmu.h | 2 +- arch/x86/kvm/vmx.c | 2 +- 3 files changed, 11 insertions(+), 8 deletions(-)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 357f34904cee..e04e0195d024 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -239,6 +239,7 @@ static u64 __read_mostly shadow_accessed_mask; static u64 __read_mostly shadow_dirty_mask; static u64 __read_mostly shadow_mmio_mask; static u64 __read_mostly shadow_mmio_value; +static u64 __read_mostly shadow_mmio_access_mask; static u64 __read_mostly shadow_present_mask; static u64 __read_mostly shadow_me_mask;
@@ -296,11 +297,13 @@ kvm_mmu_calc_root_page_role(struct kvm_vcpu *vcpu); #include "mmutrace.h"
-void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value) +void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value, u64 access_mask) { + BUG_ON((u64)(unsigned)access_mask != access_mask); BUG_ON((mmio_mask & mmio_value) != mmio_value); shadow_mmio_value = mmio_value | SPTE_SPECIAL_MASK; shadow_mmio_mask = mmio_mask | SPTE_SPECIAL_MASK; + shadow_mmio_access_mask = access_mask; } EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask);
@@ -394,7 +397,7 @@ static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn, u64 mask = generation_mmio_spte_mask(gen); u64 gpa = gfn << PAGE_SHIFT;
- access &= ACC_WRITE_MASK | ACC_USER_MASK; + access &= shadow_mmio_access_mask; mask |= shadow_mmio_value | access; mask |= gpa | shadow_nonpresent_or_rsvd_mask; mask |= (gpa & shadow_nonpresent_or_rsvd_mask) @@ -421,8 +424,7 @@ static gfn_t get_mmio_spte_gfn(u64 spte)
static unsigned get_mmio_spte_access(u64 spte) { - u64 mask = generation_mmio_spte_mask(MMIO_SPTE_GEN_MASK) | shadow_mmio_mask; - return (spte & ~mask) & ~PAGE_MASK; + return spte & shadow_mmio_access_mask; }
static bool set_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn, @@ -3328,7 +3330,8 @@ static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, }
if (unlikely(is_noslot_pfn(pfn))) - vcpu_cache_mmio_info(vcpu, gva, gfn, access); + vcpu_cache_mmio_info(vcpu, gva, gfn, + access & shadow_mmio_access_mask);
return false; } @@ -6086,7 +6089,7 @@ static void kvm_set_mmio_spte_mask(void) if (IS_ENABLED(CONFIG_X86_64) && shadow_phys_bits == 52) mask &= ~1ull;
- kvm_mmu_set_mmio_spte_mask(mask, mask); + kvm_mmu_set_mmio_spte_mask(mask, mask, ACC_WRITE_MASK | ACC_USER_MASK); }
int kvm_mmu_module_init(void) diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index f7b2de7b6382..a8b75ef4499e 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -56,7 +56,7 @@ static inline u64 rsvd_bits(int s, int e) return ((1ULL << (e - s + 1)) - 1) << s; }
-void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value); +void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value, u64 access_mask);
void reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 347d045a5567..6faae26176bb 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6593,7 +6593,7 @@ static void ept_set_mmio_spte_mask(void) * of an EPT paging-structure entry is 110b (write/execute). */ kvm_mmu_set_mmio_spte_mask(VMX_EPT_RWX_MASK, - VMX_EPT_MISCONFIG_WX_VALUE); + VMX_EPT_MISCONFIG_WX_VALUE, 0); }
#define VMX_XSS_EXIT_BITMAP 0