From: Anup Patel anup.patel@wdc.com
euleros inclusion category: feature bugzilla: NA CVE: NA
This patch implements VCPU interrupts and requests which are both asynchronous events.
The VCPU interrupts can be set/unset using KVM_INTERRUPT ioctl from user-space. In future, the in-kernel IRQCHIP emulation will use kvm_riscv_vcpu_set_interrupt() and kvm_riscv_vcpu_unset_interrupt() functions to set/unset VCPU interrupts.
Important VCPU requests implemented by this patch are: KVM_REQ_SLEEP - set whenever VCPU itself goes to sleep state KVM_REQ_VCPU_RESET - set whenever VCPU reset is requested
The WFI trap-n-emulate (added later) will use KVM_REQ_SLEEP request and kvm_riscv_vcpu_has_interrupt() function.
The KVM_REQ_VCPU_RESET request will be used by SBI emulation (added later) to power-up a VCPU in power-off state. The user-space can use the GET_MPSTATE/SET_MPSTATE ioctls to get/set power state of a VCPU.
Link: https://gitee.com/openeuler/kernel/issues/I1RR1Y Signed-off-by: Anup Patel anup.patel@wdc.com Acked-by: Paolo Bonzini pbonzini@redhat.com Reviewed-by: Paolo Bonzini pbonzini@redhat.com Reviewed-by: Alexander Graf graf@amazon.com Signed-off-by: Mingwang Li limingwang@huawei.com Reviewed-by: Yifei Jiang jiangyifei@huawei.com Signed-off-by: Xie XiuQi xiexiuqi@huawei.com --- arch/riscv/include/asm/kvm_host.h | 23 ++++ arch/riscv/include/uapi/asm/kvm.h | 3 + arch/riscv/kvm/vcpu.c | 182 +++++++++++++++++++++++++++--- 3 files changed, 195 insertions(+), 13 deletions(-)
diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h index e0aeed7d5144..7dc52559b4fc 100644 --- a/arch/riscv/include/asm/kvm_host.h +++ b/arch/riscv/include/asm/kvm_host.h @@ -132,6 +132,21 @@ struct kvm_vcpu_arch { /* CPU CSR context upon Guest VCPU reset */ struct kvm_vcpu_csr guest_reset_csr;
+ /* + * VCPU interrupts + * + * We have a lockless approach for tracking pending VCPU interrupts + * implemented using atomic bitops. The irqs_pending bitmap represent + * pending interrupts whereas irqs_pending_mask represent bits changed + * in irqs_pending. Our approach is modeled around multiple producer + * and single consumer problem where the consumer is the VCPU itself. + */ + unsigned long irqs_pending; + unsigned long irqs_pending_mask; + + /* VCPU power-off state */ + bool power_off; + /* Don't run the VCPU (blocked) */ bool pause;
@@ -156,4 +171,12 @@ int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
static inline void __kvm_riscv_switch_to(struct kvm_vcpu_arch *vcpu_arch) {}
+int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq); +int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq); +void kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu *vcpu); +void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu); +bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, unsigned long mask); +void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu); +void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu); + #endif /* __RISCV_KVM_HOST_H__ */ diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h index d15875818b6e..6dbc056d58ba 100644 --- a/arch/riscv/include/uapi/asm/kvm.h +++ b/arch/riscv/include/uapi/asm/kvm.h @@ -18,6 +18,9 @@
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
+#define KVM_INTERRUPT_SET -1U +#define KVM_INTERRUPT_UNSET -2U + /* for KVM_GET_REGS and KVM_SET_REGS */ struct kvm_regs { }; diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index 84deeddbffbe..7acb2e622597 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -11,6 +11,7 @@ #include <linux/err.h> #include <linux/kdebug.h> #include <linux/module.h> +#include <linux/percpu.h> #include <linux/uaccess.h> #include <linux/vmalloc.h> #include <linux/sched/signal.h> @@ -54,6 +55,9 @@ static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu) memcpy(csr, reset_csr, sizeof(*csr));
memcpy(cntx, reset_cntx, sizeof(*cntx)); + + WRITE_ONCE(vcpu->arch.irqs_pending, 0); + WRITE_ONCE(vcpu->arch.irqs_pending_mask, 0); }
int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) @@ -102,8 +106,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) { - /* TODO: */ - return 0; + return kvm_riscv_vcpu_has_interrupts(vcpu, 1UL << IRQ_VS_TIMER); }
void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) @@ -116,20 +119,18 @@ void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { - /* TODO: */ - return 0; + return (kvm_riscv_vcpu_has_interrupts(vcpu, -1UL) && + !vcpu->arch.power_off && !vcpu->arch.pause); }
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) { - /* TODO: */ - return 0; + return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; }
bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) { - /* TODO: */ - return false; + return (vcpu->arch.guest_context.sstatus & SR_SPP) ? true : false; }
bool kvm_arch_has_vcpu_debugfs(void) @@ -150,7 +151,21 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) long kvm_arch_vcpu_async_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { - /* TODO; */ + struct kvm_vcpu *vcpu = filp->private_data; + void __user *argp = (void __user *)arg; + + if (ioctl == KVM_INTERRUPT) { + struct kvm_interrupt irq; + + if (copy_from_user(&irq, argp, sizeof(irq))) + return -EFAULT; + + if (irq.irq == KVM_INTERRUPT_SET) + return kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_VS_EXT); + else + return kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_VS_EXT); + } + return -ENOIOCTLCMD; }
@@ -199,18 +214,121 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) return -EINVAL; }
+void kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; + unsigned long mask, val; + + if (READ_ONCE(vcpu->arch.irqs_pending_mask)) { + mask = xchg_acquire(&vcpu->arch.irqs_pending_mask, 0); + val = READ_ONCE(vcpu->arch.irqs_pending) & mask; + + csr->hvip &= ~mask; + csr->hvip |= val; + } +} + +void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu) +{ + unsigned long hvip; + struct kvm_vcpu_arch *v = &vcpu->arch; + struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; + + /* Read current HVIP and HIE CSRs */ + hvip = csr_read(CSR_HVIP); + csr->hie = csr_read(CSR_HIE); + + /* Sync-up HVIP.VSSIP bit changes does by Guest */ + if ((csr->hvip ^ hvip) & (1UL << IRQ_VS_SOFT)) { + if (hvip & (1UL << IRQ_VS_SOFT)) { + if (!test_and_set_bit(IRQ_VS_SOFT, + &v->irqs_pending_mask)) + set_bit(IRQ_VS_SOFT, &v->irqs_pending); + } else { + if (!test_and_set_bit(IRQ_VS_SOFT, + &v->irqs_pending_mask)) + clear_bit(IRQ_VS_SOFT, &v->irqs_pending); + } + } +} + +int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq) +{ + if (irq != IRQ_VS_SOFT && + irq != IRQ_VS_TIMER && + irq != IRQ_VS_EXT) + return -EINVAL; + + set_bit(irq, &vcpu->arch.irqs_pending); + smp_mb__before_atomic(); + set_bit(irq, &vcpu->arch.irqs_pending_mask); + + kvm_vcpu_kick(vcpu); + + return 0; +} + +int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq) +{ + if (irq != IRQ_VS_SOFT && + irq != IRQ_VS_TIMER && + irq != IRQ_VS_EXT) + return -EINVAL; + + clear_bit(irq, &vcpu->arch.irqs_pending); + smp_mb__before_atomic(); + set_bit(irq, &vcpu->arch.irqs_pending_mask); + + return 0; +} + +bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, unsigned long mask) +{ + return (READ_ONCE(vcpu->arch.irqs_pending) & + vcpu->arch.guest_csr.hie & mask) ? true : false; +} + +void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu) +{ + vcpu->arch.power_off = true; + kvm_make_request(KVM_REQ_SLEEP, vcpu); + kvm_vcpu_kick(vcpu); +} + +void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu) +{ + vcpu->arch.power_off = false; + kvm_vcpu_wake_up(vcpu); +} + int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { - /* TODO: */ + if (vcpu->arch.power_off) + mp_state->mp_state = KVM_MP_STATE_STOPPED; + else + mp_state->mp_state = KVM_MP_STATE_RUNNABLE; + return 0; }
int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { - /* TODO: */ - return 0; + int ret = 0; + + switch (mp_state->mp_state) { + case KVM_MP_STATE_RUNNABLE: + vcpu->arch.power_off = false; + break; + case KVM_MP_STATE_STOPPED: + kvm_riscv_vcpu_power_off(vcpu); + break; + default: + ret = -EINVAL; + } + + return ret; }
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, @@ -234,7 +352,33 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu) { - /* TODO: */ + struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu); + + if (kvm_request_pending(vcpu)) { + if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) { + rcuwait_wait_event(wait, + (!vcpu->arch.power_off) && (!vcpu->arch.pause), + TASK_INTERRUPTIBLE); + + if (vcpu->arch.power_off || vcpu->arch.pause) { + /* + * Awaken to handle a signal, request to + * sleep again later. + */ + kvm_make_request(KVM_REQ_SLEEP, vcpu); + } + } + + if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu)) + kvm_riscv_reset_vcpu(vcpu); + } +} + +static void kvm_riscv_update_hvip(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; + + csr_write(CSR_HVIP, csr->hvip); }
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) @@ -298,6 +442,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx); smp_mb__after_srcu_read_unlock();
+ /* + * We might have got VCPU interrupts updated asynchronously + * so update it in HW. + */ + kvm_riscv_vcpu_flush_interrupts(vcpu); + + /* Update HVIP CSR for current CPU */ + kvm_riscv_update_hvip(vcpu); + if (ret <= 0 || kvm_request_pending(vcpu)) { vcpu->mode = OUTSIDE_GUEST_MODE; @@ -325,6 +478,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) trap.htval = csr_read(CSR_HTVAL); trap.htinst = csr_read(CSR_HTINST);
+ /* Syncup interrupts state with HW */ + kvm_riscv_vcpu_sync_interrupts(vcpu); + /* * We may have taken a host interrupt in VS/VU-mode (i.e. * while executing the guest). This interrupt is still