From: Jingxian He hejingxian@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9CC0X CVE: N/A
Reference: N/A
--------------------------------
Add bounce buffer feature for cvm guest os: 1) Cvm guest mapped memory is secure memory. 2) Qemu/kvm cannot access the secure memory. 3) Use bounce buffer as memory shared by cvm guest and qemu/kvm.
Signed-off-by: Jingxian He hejingxian@huawei.com --- arch/arm64/configs/defconfig | 1 + arch/arm64/configs/openeuler_defconfig | 1 + arch/arm64/include/asm/cvm_guest.h | 21 ++++++ arch/arm64/kvm/Kconfig | 8 +++ arch/arm64/kvm/Makefile | 1 + arch/arm64/kvm/cvm_guest.c | 91 +++++++++++++++++++++++++ arch/arm64/mm/mmu.c | 12 +++- arch/arm64/mm/pageattr.c | 5 ++ include/linux/swiotlb.h | 11 +++ kernel/dma/direct.c | 39 +++++++++++ kernel/dma/swiotlb.c | 94 +++++++++++++++++++++++++- 11 files changed, 282 insertions(+), 2 deletions(-) create mode 100644 arch/arm64/include/asm/cvm_guest.h create mode 100644 arch/arm64/kvm/cvm_guest.c
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index ace2bf4ad..0ba4538d9 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -111,6 +111,7 @@ CONFIG_ACPI_APEI_EINJ=y CONFIG_VIRTUALIZATION=y CONFIG_KVM=y CONFIG_CVM_HOST=y +CONFIG_CVM_GUEST=y CONFIG_ARM64_CRYPTO=y CONFIG_CRYPTO_SHA1_ARM64_CE=y CONFIG_CRYPTO_SHA2_ARM64_CE=y diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index e298ca7e5..25a5fa5c7 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -763,6 +763,7 @@ CONFIG_IRQ_BYPASS_MANAGER=y CONFIG_VIRTUALIZATION=y CONFIG_KVM=y CONFIG_CVM_HOST=y +CONFIG_CVM_GUEST=y CONFIG_HAVE_KVM_IRQCHIP=y CONFIG_HAVE_KVM_IRQFD=y CONFIG_HAVE_KVM_IRQ_ROUTING=y diff --git a/arch/arm64/include/asm/cvm_guest.h b/arch/arm64/include/asm/cvm_guest.h new file mode 100644 index 000000000..b62685c1f --- /dev/null +++ b/arch/arm64/include/asm/cvm_guest.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved. + */ +#ifndef __ASM_CVM_GUEST_H +#define __ASM_CVM_GUEST_H + +#ifdef CONFIG_CVM_GUEST +static inline bool cvm_mem_encrypt_active(void) +{ + return false; +} + +int set_cvm_memory_encrypted(unsigned long addr, int numpages); + +int set_cvm_memory_decrypted(unsigned long addr, int numpages); + +bool is_cvm_world(void); + +#endif +#endif diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index dd7c0b1de..60259b979 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -54,6 +54,14 @@ config CVM_HOST
If unsure, say N.
+config CVM_GUEST + bool "Enable cvm guest run" + depends on KVM && SWIOTLB && ARM64 + help + Support CVM guest based on S-EL2 + + If unsure, say N. + if KVM
source "virt/kvm/Kconfig" diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 5fd1b8be1..8bb4d4ef1 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -30,3 +30,4 @@ kvm-$(CONFIG_CVM_HOST) += cvm_exit.o
kvm-$(CONFIG_KVM_ARM_PMU) += pmu-emul.o obj-$(CONFIG_KVM_HISI_VIRT) += hisilicon/ +obj-$(CONFIG_CVM_GUEST) += cvm_guest.o diff --git a/arch/arm64/kvm/cvm_guest.c b/arch/arm64/kvm/cvm_guest.c new file mode 100644 index 000000000..f2d367214 --- /dev/null +++ b/arch/arm64/kvm/cvm_guest.c @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved. + */ +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/vmalloc.h> + +#include <asm/cacheflush.h> +#include <asm/set_memory.h> +#include <asm/tlbflush.h> + +#define CVM_PTE_NS_MASK 0x20 +static bool cvm_guest_enable __read_mostly; + +static int __init setup_cvm_guest(char *str) +{ + int ret; + unsigned int val; + + if (!str) + return 0; + + cvm_guest_enable = false; + ret = kstrtouint(str, 10, &val); + if (ret) { + pr_warn("Unable to parse cvm_guest.\n"); + } else { + if (val) + cvm_guest_enable = true; + } + return ret; +} +early_param("cvm_guest", setup_cvm_guest); + +bool is_cvm_world(void) +{ + return cvm_guest_enable; +} + +static int change_page_range_cvm(pte_t *ptep, unsigned long addr, void *data) +{ + bool encrypt = (bool)data; + pte_t pte = READ_ONCE(*ptep); + + if (encrypt) { + if (!(pte.pte & CVM_PTE_NS_MASK)) + return 0; + pte.pte = pte.pte & (~CVM_PTE_NS_MASK); + } else { + if (pte.pte & CVM_PTE_NS_MASK) + return 0; + /* Set NS BIT */ + pte.pte = pte.pte | CVM_PTE_NS_MASK; + } + set_pte(ptep, pte); + + return 0; +} + +static int __change_memory_common_cvm(unsigned long start, unsigned long size, bool encrypt) +{ + int ret; + + ret = apply_to_page_range(&init_mm, start, size, change_page_range_cvm, (void *)encrypt); + flush_tlb_kernel_range(start, start + size); + return ret; +} + +static int __set_memory_encrypted(unsigned long addr, + int numpages, + bool encrypt) +{ + if (!is_cvm_world()) + return 0; + + WARN_ON(!__is_lm_address(addr)); + return __change_memory_common_cvm(addr, PAGE_SIZE * numpages, encrypt); +} + +int set_cvm_memory_encrypted(unsigned long addr, int numpages) +{ + return __set_memory_encrypted(addr, numpages, true); +} + +int set_cvm_memory_decrypted(unsigned long addr, int numpages) +{ + return __set_memory_encrypted(addr, numpages, false); +} diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 804d5197c..d5b6a9c19 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -38,6 +38,7 @@ #include <asm/ptdump.h> #include <asm/tlbflush.h> #include <asm/pgalloc.h> +#include <asm/cvm_guest.h>
#define NO_BLOCK_MAPPINGS BIT(0) #define NO_CONT_MAPPINGS BIT(1) @@ -497,6 +498,11 @@ static void __init map_mem(pgd_t *pgdp) if (rodata_full || debug_pagealloc_enabled()) flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
+#ifdef CONFIG_CVM_GUEST + if (is_cvm_world()) + flags |= (NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS); +#endif + #ifdef CONFIG_KFENCE /* * KFENCE requires linear map to be mapped at page granularity, so @@ -1513,10 +1519,14 @@ int arch_add_memory(int nid, u64 start, u64 size, return -EINVAL; }
- if (rodata_full || debug_pagealloc_enabled()) flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
+#ifdef CONFIG_CVM_GUEST + if (is_cvm_world()) + flags |= (NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS); +#endif + __create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start), size, params->pgprot, __pgd_pgtable_alloc, flags); diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index 0bc12dbf2..6773f9250 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -11,6 +11,7 @@ #include <asm/cacheflush.h> #include <asm/set_memory.h> #include <asm/tlbflush.h> +#include <asm/cvm_guest.h>
struct page_change_data { pgprot_t set_mask; @@ -188,6 +189,10 @@ int set_direct_map_default_noflush(struct page *page)
void __kernel_map_pages(struct page *page, int numpages, int enable) { +#ifdef CONFIG_CVM_GUEST + if (is_cvm_world()) + return; +#endif if (!debug_pagealloc_enabled() && !rodata_full) return;
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 5d2dbe7e0..f4d2de097 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -6,6 +6,7 @@ #include <linux/init.h> #include <linux/types.h> #include <linux/limits.h> +#include <asm/cvm_guest.h>
struct device; struct page; @@ -75,6 +76,16 @@ static inline bool is_swiotlb_buffer(phys_addr_t paddr) return paddr >= io_tlb_start && paddr < io_tlb_end; }
+#ifdef CONFIG_CVM_GUEST +static inline bool is_swiotlb_for_alloc(struct device *dev) +{ + return is_cvm_world(); +} + +struct page *swiotlb_alloc(struct device *dev, size_t size); +bool swiotlb_free(struct device *dev, struct page *page, size_t size); +#endif + void __init swiotlb_exit(void); unsigned int swiotlb_max_segment(void); size_t swiotlb_max_mapping_size(struct device *dev); diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 2922250f9..075e85cfb 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -75,6 +75,20 @@ static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit); }
+#ifdef CONFIG_CVM_GUEST +static struct page *dma_direct_alloc_swiotlb(struct device *dev, size_t size) +{ + struct page *page = swiotlb_alloc(dev, size); + + if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { + swiotlb_free(dev, page, size); + return NULL; + } + + return page; +} +#endif + static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, gfp_t gfp) { @@ -84,6 +98,11 @@ static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
WARN_ON_ONCE(!PAGE_ALIGNED(size));
+#ifdef CONFIG_CVM_GUEST + if (is_swiotlb_for_alloc(dev)) + return dma_direct_alloc_swiotlb(dev, size); +#endif + gfp |= dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask, &phys_limit); page = dma_alloc_contiguous(dev, size, gfp); @@ -237,6 +256,11 @@ void *dma_direct_alloc(struct device *dev, size_t size, return NULL; } out_free_pages: +#ifdef CONFIG_CVM_GUEST + if (is_swiotlb_for_alloc(dev) && + swiotlb_free(dev, page, size)) + return NULL; +#endif dma_free_contiguous(dev, page, size); return NULL; } @@ -271,6 +295,11 @@ void dma_direct_free(struct device *dev, size_t size, else if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_CLEAR_UNCACHED)) arch_dma_clear_uncached(cpu_addr, size);
+#ifdef CONFIG_CVM_GUEST + if (is_swiotlb_for_alloc(dev) && + swiotlb_free(dev, dma_direct_to_page(dev, dma_addr), size)) + return; +#endif dma_free_contiguous(dev, dma_direct_to_page(dev, dma_addr), size); }
@@ -307,6 +336,11 @@ struct page *dma_direct_alloc_pages(struct device *dev, size_t size, *dma_handle = phys_to_dma_direct(dev, page_to_phys(page)); return page; out_free_pages: +#ifdef CONFIG_CVM_GUEST + if (is_swiotlb_for_alloc(dev) && + swiotlb_free(dev, page, size)) + return NULL; +#endif dma_free_contiguous(dev, page, size); return NULL; } @@ -325,6 +359,11 @@ void dma_direct_free_pages(struct device *dev, size_t size, if (force_dma_unencrypted(dev)) set_memory_encrypted((unsigned long)vaddr, PFN_UP(size));
+#ifdef CONFIG_CVM_GUEST + if (is_swiotlb_for_alloc(dev) && + swiotlb_free(dev, page, size)) + return; +#endif dma_free_contiguous(dev, page, size); }
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index d897d1613..254489f50 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -46,6 +46,7 @@ #include <linux/init.h> #include <linux/memblock.h> #include <linux/iommu-helper.h> +#include <asm/cvm_guest.h>
#define CREATE_TRACE_POINTS #include <trace/events/swiotlb.h> @@ -183,6 +184,19 @@ static inline unsigned long nr_slots(u64 val) return DIV_ROUND_UP(val, IO_TLB_SIZE); }
+#ifdef CONFIG_CVM_GUEST +void __init cvm_swiotlb_update_mem_attributes(void) +{ + void *vaddr; + unsigned long bytes; + + vaddr = phys_to_virt(io_tlb_start); + bytes = PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT); + set_cvm_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT); + memset(vaddr, 0, bytes); +} +#endif + /* * Early SWIOTLB allocation may be too early to allow an architecture to * perform the desired operations. This function allows the architecture to @@ -194,6 +208,12 @@ void __init swiotlb_update_mem_attributes(void) void *vaddr; unsigned long bytes;
+#ifdef CONFIG_CVM_GUEST + if (is_cvm_world()) { + cvm_swiotlb_update_mem_attributes(); + return; + } +#endif if (no_iotlb_memory || late_alloc) return;
@@ -265,8 +285,13 @@ swiotlb_init(int verbose)
/* Get IO TLB memory from the low pages */ vstart = memblock_alloc_low(PAGE_ALIGN(bytes), PAGE_SIZE); - if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose)) + if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose)) { +#ifdef CONFIG_CVM_GUEST + if (is_cvm_world()) + swiotlb_update_mem_attributes(); +#endif return; + }
if (io_tlb_start) { memblock_free_early(io_tlb_start, @@ -772,3 +797,70 @@ static int __init swiotlb_create_debugfs(void) late_initcall(swiotlb_create_debugfs);
#endif + +#ifdef CONFIG_CVM_GUEST +struct page *swiotlb_alloc(struct device *dev, size_t size) +{ + phys_addr_t tlb_addr; + int index; + + index = find_slots(dev, 0, size); + if (index == -1) + return NULL; + + tlb_addr = slot_addr(io_tlb_start, index); + return pfn_to_page(PFN_DOWN(tlb_addr)); +} + +static void swiotlb_release_slots(struct device *hwdev, phys_addr_t tlb_addr, + size_t alloc_size) +{ + unsigned long flags; + unsigned int offset = swiotlb_align_offset(hwdev, tlb_addr); + int i, count, nslots = nr_slots(alloc_size + offset); + int index = (tlb_addr - offset - io_tlb_start) >> IO_TLB_SHIFT; + + /* + * Return the buffer to the free list by setting the corresponding + * entries to indicate the number of contiguous entries available. + * While returning the entries to the free list, we merge the entries + * with slots below and above the pool being returned. + */ + spin_lock_irqsave(&io_tlb_lock, flags); + if (index + nslots < ALIGN(index + 1, IO_TLB_SEGSIZE)) + count = io_tlb_list[index + nslots]; + else + count = 0; + + /* + * Step 1: return the slots to the free list, merging the slots with + * superceeding slots + */ + for (i = index + nslots - 1; i >= index; i--) { + io_tlb_list[i] = ++count; + io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; + } + + /* + * Step 2: merge the returned slots with the preceding slots, if + * available (non zero) + */ + for (i = index - 1; + io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && io_tlb_list[i]; + i--) + io_tlb_list[i] = ++count; + io_tlb_used -= nslots; + spin_unlock_irqrestore(&io_tlb_lock, flags); +} + +bool swiotlb_free(struct device *dev, struct page *page, size_t size) +{ + phys_addr_t tlb_addr = page_to_phys(page); + + if (!is_swiotlb_buffer(tlb_addr)) + return false; + + swiotlb_release_slots(dev, tlb_addr, size); + return true; +} +#endif