From: wanghaibin wanghaibin.wang@huawei.com
virt inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8K8HP CVE: NA
--------------------------------
The shadow device implement that establish relationships between virtual devices and back-end virtual platform devices.
Signed-off-by: wanghaibin wanghaibin.wang@huawei.com Signed-off-by: Zenghui Yu yuzenghui@huawei.com Signed-off-by: Kunkun Jiang jiangkunkun@huawei.com Signed-off-by: Dongxu Sun sundongxu3@huawei.com --- arch/arm64/kvm/Makefile | 2 +- arch/arm64/kvm/arm.c | 38 ++++ arch/arm64/kvm/vgic/shadow_dev.c | 327 +++++++++++++++++++++++++++++++ arch/arm64/kvm/vgic/vgic-init.c | 3 + include/kvm/arm_vgic.h | 24 +++ include/uapi/linux/kvm.h | 9 + 6 files changed, 402 insertions(+), 1 deletion(-) create mode 100644 arch/arm64/kvm/vgic/shadow_dev.c
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 6dc8c914c99b..395d65165367 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -22,7 +22,7 @@ kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \ vgic/vgic-v3.o vgic/vgic-v4.o \ vgic/vgic-mmio.o vgic/vgic-mmio-v2.o \ vgic/vgic-mmio-v3.o vgic/vgic-kvm-device.o \ - vgic/vgic-its.o vgic/vgic-debug.o + vgic/vgic-its.o vgic/shadow_dev.o vgic/vgic-debug.o
kvm-$(CONFIG_KVM_ARM_PMU) += pmu-emul.o obj-$(CONFIG_KVM_HISI_VIRT) += hisilicon/ diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 885975fcb918..3f28965095c4 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -259,6 +259,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_STEAL_TIME: r = kvm_arm_pvtime_supported(); break; + case KVM_CAP_ARM_VIRT_MSI_BYPASS: + r = sdev_enable; + break; default: r = kvm_arch_vm_ioctl_check_extension(kvm, ext); break; @@ -1444,6 +1447,34 @@ long kvm_arch_vm_ioctl(struct file *filp,
return 0; } + case KVM_CREATE_SHADOW_DEV: { + struct kvm_master_dev_info *mdi; + u32 nvectors; + int ret; + + if (get_user(nvectors, (const u32 __user *)argp)) + return -EFAULT; + if (!nvectors) + return -EINVAL; + + mdi = memdup_user(argp, sizeof(*mdi) + nvectors * sizeof(mdi->msi[0])); + if (IS_ERR(mdi)) + return PTR_ERR(mdi); + + ret = kvm_shadow_dev_create(kvm, mdi); + kfree(mdi); + + return ret; + } + case KVM_DEL_SHADOW_DEV: { + u32 devid; + + if (get_user(devid, (const u32 __user *)argp)) + return -EFAULT; + + kvm_shadow_dev_delete(kvm, devid); + return 0; + } default: return -EINVAL; } @@ -1885,6 +1916,11 @@ void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *cons) kvm_arm_resume_guest(irqfd->kvm); }
+void kvm_arch_pre_destroy_vm(struct kvm *kvm) +{ + kvm_shadow_dev_delete_all(kvm); +} + /** * Initialize Hyp-mode and memory mappings on all CPUs. */ @@ -1950,6 +1986,8 @@ int kvm_arch_init(void *opaque) else kvm_info("Hyp mode initialized successfully\n");
+ kvm_shadow_dev_init(); + return 0;
out_hyp: diff --git a/arch/arm64/kvm/vgic/shadow_dev.c b/arch/arm64/kvm/vgic/shadow_dev.c new file mode 100644 index 000000000000..6774801e6024 --- /dev/null +++ b/arch/arm64/kvm/vgic/shadow_dev.c @@ -0,0 +1,327 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2019-2020 HUAWEI TECHNOLOGIES CO., LTD., All Rights Reserved. + * Author: Wanghaibin wanghaibin.wang@huawei.com + */ + +#include <linux/irq.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <linux/module.h> +#include <linux/msi.h> +#include <linux/platform_device.h> +#include <linux/uaccess.h> + +static struct workqueue_struct *sdev_cleanup_wq; +static bool virt_msi_bypass; +bool sdev_enable; + +static void shadow_dev_destroy(struct work_struct *work); +static void sdev_virt_pdev_delete(struct platform_device *pdev); + +int shadow_dev_virq_bypass_inject(struct kvm *kvm, + struct kvm_kernel_irq_routing_entry *e) +{ + struct shadow_dev *sdev = e->cache.data; + u32 vec = e->msi.data; + u32 host_irq = sdev->host_irq[vec]; + int ret; + + ret = irq_set_irqchip_state(host_irq, IRQCHIP_STATE_PENDING, true); + WARN_RATELIMIT(ret, "IRQ %d", host_irq); + + return ret; +} + +/* Must be called with the dist->sdev_list_lock held */ +struct shadow_dev *kvm_shadow_dev_get(struct kvm *kvm, struct kvm_msi *msi) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct shadow_dev *sdev; + + if (!sdev_enable) + return NULL; + + list_for_each_entry(sdev, &dist->sdev_list_head, entry) { + if (sdev->devid != msi->devid) + continue; + + if (sdev->nvecs <= msi->data || + !test_bit(msi->data, sdev->enable)) + break; + + return sdev; + } + + return NULL; +} + +static struct platform_device *sdev_virt_pdev_add(u32 nvec) +{ + struct platform_device *virtdev; + int ret = -ENOMEM; + + virtdev = platform_device_alloc("virt_plat_dev", PLATFORM_DEVID_AUTO); + if (!virtdev) { + kvm_err("Allocate virtual platform device failed\n"); + goto out; + } + + dev_set_drvdata(&virtdev->dev, &nvec); + + ret = platform_device_add(virtdev); + if (ret) { + kvm_err("Add virtual platform device failed (%d)\n", ret); + goto put_device; + } + + return virtdev; + +put_device: + platform_device_put(virtdev); +out: + return ERR_PTR(ret); +} + +static void sdev_set_irq_entry(struct shadow_dev *sdev, + struct kvm_kernel_irq_routing_entry *irq_entries) +{ + int i; + + for (i = 0; i < sdev->nvecs; i++) { + irq_entries[i].msi.address_lo = sdev->msi[i].address_lo; + irq_entries[i].msi.address_hi = sdev->msi[i].address_hi; + irq_entries[i].msi.data = sdev->msi[i].data; + irq_entries[i].msi.flags = sdev->msi[i].flags; + irq_entries[i].msi.devid = sdev->msi[i].devid; + } +} + +static int sdev_virq_bypass_active(struct kvm *kvm, struct shadow_dev *sdev) +{ + struct kvm_kernel_irq_routing_entry *irq_entries; + struct msi_desc *desc; + u32 vec = 0; + + sdev->host_irq = kcalloc(sdev->nvecs, sizeof(int), GFP_KERNEL); + sdev->enable = bitmap_zalloc(sdev->nvecs, GFP_KERNEL); + irq_entries = kcalloc(sdev->nvecs, + sizeof(struct kvm_kernel_irq_routing_entry), + GFP_KERNEL); + + if (!irq_entries || !sdev->enable || !sdev->host_irq) { + kfree(sdev->host_irq); + kfree(sdev->enable); + kfree(irq_entries); + return -ENOMEM; + } + + sdev_set_irq_entry(sdev, irq_entries); + + for_each_msi_entry(desc, &sdev->pdev->dev) { + if (!kvm_vgic_v4_set_forwarding(kvm, desc->irq, + &irq_entries[vec])) { + set_bit(vec, sdev->enable); + sdev->host_irq[vec] = desc->irq; + } else { + /* + * Can not use shadow device for direct injection, + * though not fatal... + */ + kvm_err("Shadow device set (%d) forwarding failed", + desc->irq); + } + vec++; + } + + kfree(irq_entries); + return 0; +} + +static void sdev_msi_entry_init(struct kvm_master_dev_info *mdi, + struct shadow_dev *sdev) +{ + int i; + + for (i = 0; i < sdev->nvecs; i++) { + sdev->msi[i].address_lo = mdi->msi[i].address_lo; + sdev->msi[i].address_hi = mdi->msi[i].address_hi; + sdev->msi[i].data = mdi->msi[i].data; + sdev->msi[i].flags = mdi->msi[i].flags; + sdev->msi[i].devid = mdi->msi[i].devid; + } +} + +int kvm_shadow_dev_create(struct kvm *kvm, struct kvm_master_dev_info *mdi) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct shadow_dev *sdev; + struct kvm_msi *msi; + unsigned long flags; + int ret; + + if (WARN_ON(!sdev_enable)) + return -EINVAL; + + ret = -ENOMEM; + sdev = kzalloc(sizeof(struct shadow_dev), GFP_KERNEL); + if (!sdev) + return ret; + + sdev->nvecs = mdi->nvectors; + + msi = kcalloc(sdev->nvecs, sizeof(struct kvm_msi), GFP_KERNEL); + if (!msi) + goto free_sdev; + + sdev->msi = msi; + sdev_msi_entry_init(mdi, sdev); + sdev->devid = sdev->msi[0].devid; + + sdev->pdev = sdev_virt_pdev_add(sdev->nvecs); + if (IS_ERR(sdev->pdev)) { + ret = PTR_ERR(sdev->pdev); + goto free_sdev_msi; + } + + ret = sdev_virq_bypass_active(kvm, sdev); + if (ret) + goto delete_virtdev; + + sdev->kvm = kvm; + INIT_WORK(&sdev->destroy, shadow_dev_destroy); + + raw_spin_lock_irqsave(&dist->sdev_list_lock, flags); + list_add_tail(&sdev->entry, &dist->sdev_list_head); + raw_spin_unlock_irqrestore(&dist->sdev_list_lock, flags); + + kvm_info("Create shadow device: 0x%x\n", sdev->devid); + return ret; + +delete_virtdev: + sdev_virt_pdev_delete(sdev->pdev); +free_sdev_msi: + kfree(sdev->msi); +free_sdev: + kfree(sdev); + return ret; +} + +static void sdev_virt_pdev_delete(struct platform_device *pdev) +{ + platform_device_unregister(pdev); +} + +static void sdev_virq_bypass_deactive(struct kvm *kvm, struct shadow_dev *sdev) +{ + struct kvm_kernel_irq_routing_entry *irq_entries; + struct msi_desc *desc; + u32 vec = 0; + + irq_entries = kcalloc(sdev->nvecs, + sizeof(struct kvm_kernel_irq_routing_entry), + GFP_KERNEL); + if (!irq_entries) + return; + + sdev_set_irq_entry(sdev, irq_entries); + + for_each_msi_entry(desc, &sdev->pdev->dev) { + if (!kvm_vgic_v4_unset_forwarding(kvm, desc->irq, + &irq_entries[vec])) { + clear_bit(vec, sdev->enable); + sdev->host_irq[vec] = 0; + } else { + kvm_err("Shadow device unset (%d) forwarding failed", + desc->irq); + } + vec++; + } + + kfree(sdev->host_irq); + kfree(sdev->enable); + kfree(irq_entries); + + /* FIXME: no error handling */ +} + +static void shadow_dev_destroy(struct work_struct *work) +{ + struct shadow_dev *sdev = container_of(work, struct shadow_dev, destroy); + struct kvm *kvm = sdev->kvm; + + sdev_virq_bypass_deactive(kvm, sdev); + sdev_virt_pdev_delete(sdev->pdev); + + sdev->nvecs = 0; + kfree(sdev->msi); + kfree(sdev); +} + +void kvm_shadow_dev_delete(struct kvm *kvm, u32 devid) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct shadow_dev *sdev, *tmp; + unsigned long flags; + + if (WARN_ON(!sdev_enable)) + return; + + raw_spin_lock_irqsave(&dist->sdev_list_lock, flags); + WARN_ON(list_empty(&dist->sdev_list_head)); /* shouldn't be invoked */ + + list_for_each_entry_safe(sdev, tmp, &dist->sdev_list_head, entry) { + if (sdev->devid != devid) + continue; + + list_del(&sdev->entry); + queue_work(sdev_cleanup_wq, &sdev->destroy); + break; + } + raw_spin_unlock_irqrestore(&dist->sdev_list_lock, flags); + + flush_workqueue(sdev_cleanup_wq); +} + +void kvm_shadow_dev_delete_all(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct shadow_dev *sdev, *tmp; + unsigned long flags; + + if (!sdev_enable) + return; + + raw_spin_lock_irqsave(&dist->sdev_list_lock, flags); + + list_for_each_entry_safe(sdev, tmp, &dist->sdev_list_head, entry) { + list_del(&sdev->entry); + queue_work(sdev_cleanup_wq, &sdev->destroy); + } + + raw_spin_unlock_irqrestore(&dist->sdev_list_lock, flags); + + flush_workqueue(sdev_cleanup_wq); +} + +static int __init early_virt_msi_bypass(char *buf) +{ + return strtobool(buf, &virt_msi_bypass); +} +early_param("kvm-arm.virt_msi_bypass", early_virt_msi_bypass); + +void kvm_shadow_dev_init(void) +{ + /* + * FIXME: Ideally shadow device should only rely on a GICv4.0 + * capable ITS, but we should also take the reserved device ID + * pools into account. + */ + sdev_enable = kvm_vgic_global_state.has_gicv4 && virt_msi_bypass; + + sdev_cleanup_wq = alloc_workqueue("kvm-sdev-cleanup", 0, 0); + if (!sdev_cleanup_wq) + sdev_enable = false; + + kvm_info("Shadow device %sabled\n", sdev_enable ? "en" : "dis"); +} diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index e150b8dd2538..e711c878e563 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -62,6 +62,9 @@ void kvm_vgic_early_init(struct kvm *kvm) raw_spin_lock_init(lpi_lock); } raw_spin_lock_init(&dist->lpi_list_lock); + + INIT_LIST_HEAD(&dist->sdev_list_head); + raw_spin_lock_init(&dist->sdev_list_lock); }
/* CREATION */ diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 699cedb92f40..a8717362d409 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -33,6 +33,21 @@ #define irq_is_spi(irq) ((irq) >= VGIC_NR_PRIVATE_IRQS && \ (irq) <= VGIC_MAX_SPI)
+struct shadow_dev { + struct kvm *kvm; + struct list_head entry; + + u32 devid; /* guest visible device id */ + u32 nvecs; + unsigned long *enable; + int *host_irq; + struct kvm_msi *msi; + + struct platform_device *pdev; + + struct work_struct destroy; +}; + /* Information about HiSilicon implementation of vtimer (GICv4.1-based) */ struct vtimer_info { u32 intid; @@ -297,6 +312,9 @@ struct vgic_dist { * else. */ struct its_vm its_vm; + + raw_spinlock_t sdev_list_lock; + struct list_head sdev_list_head; };
struct vgic_v2_cpu_if { @@ -448,4 +466,10 @@ int kvm_vgic_config_vtimer_irqbypass(struct kvm_vcpu *vcpu, u32 vintid, bool (*get_as)(struct kvm_vcpu *, int), void (*set_as)(struct kvm_vcpu *, int, bool));
+extern bool sdev_enable; + +void kvm_shadow_dev_init(void); +int kvm_shadow_dev_create(struct kvm *kvm, struct kvm_master_dev_info *mdi); +void kvm_shadow_dev_delete(struct kvm *kvm, u32 devid); +void kvm_shadow_dev_delete_all(struct kvm *kvm); #endif /* __KVM_ARM_VGIC_H */ diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 90b2e4a3198d..771ea7882b15 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1095,6 +1095,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_ARM_CPU_FEATURE 555
+#define KVM_CAP_ARM_VIRT_MSI_BYPASS 799 #define KVM_CAP_LOONGARCH_FPU 800 #define KVM_CAP_LOONGARCH_LSX 801 #define KVM_CAP_LOONGARCH_VZ 802 @@ -1347,6 +1348,11 @@ struct id_registers { __u64 num; };
+struct kvm_master_dev_info { + u32 nvectors; + struct kvm_msi msi[0]; +}; + /* * ioctls for VM fds */ @@ -1464,6 +1470,9 @@ struct kvm_s390_ucas_mapping { #define KVM_GET_DEVICE_ATTR _IOW(KVMIO, 0xe2, struct kvm_device_attr) #define KVM_HAS_DEVICE_ATTR _IOW(KVMIO, 0xe3, struct kvm_device_attr)
+#define KVM_CREATE_SHADOW_DEV _IOW(KVMIO, 0xf0, struct kvm_master_dev_info) +#define KVM_DEL_SHADOW_DEV _IOW(KVMIO, 0xf1, __u32) + /* * ioctls for vcpu fds */