diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 4a900cdbc62e9..46ca84600dcab 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -1476,14 +1476,43 @@ Possible values are: [s390] KVM_MP_STATE_LOAD the vcpu is in a special load/startup state [s390] + KVM_MP_STATE_SUSPENDED the vcpu is in a suspend state and is waiting + for a wakeup event [arm64] ========================== =============================================== On x86, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an in-kernel irqchip, the multiprocessing state must be maintained by userspace on these architectures. -For arm64/riscv: -^^^^^^^^^^^^^^^^ +For arm64: +^^^^^^^^^^ + +If a vCPU is in the KVM_MP_STATE_SUSPENDED state, KVM will emulate the +architectural execution of a WFI instruction. + +If a wakeup event is recognized, KVM will exit to userspace with a +KVM_SYSTEM_EVENT exit, where the event type is KVM_SYSTEM_EVENT_WAKEUP. If +userspace wants to honor the wakeup, it must set the vCPU's MP state to +KVM_MP_STATE_RUNNABLE. If it does not, KVM will continue to await a wakeup +event in subsequent calls to KVM_RUN. + +.. warning:: + + If userspace intends to keep the vCPU in a SUSPENDED state, it is + strongly recommended that userspace take action to suppress the + wakeup event (such as masking an interrupt). Otherwise, subsequent + calls to KVM_RUN will immediately exit with a KVM_SYSTEM_EVENT_WAKEUP + event and inadvertently waste CPU cycles. + + Additionally, if userspace takes action to suppress a wakeup event, + it is strongly recommended that it also restores the vCPU to its + original state when the vCPU is made RUNNABLE again. For example, + if userspace masked a pending interrupt to suppress the wakeup, + the interrupt should be unmasked before returning control to the + guest. + +For riscv: +^^^^^^^^^^ The only states that are valid are KVM_MP_STATE_STOPPED and KVM_MP_STATE_RUNNABLE which reflect if the vcpu is paused or not. @@ -5985,6 +6014,7 @@ should put the acknowledged interrupt vector into the 'epr' field. #define KVM_SYSTEM_EVENT_SHUTDOWN 1 #define KVM_SYSTEM_EVENT_RESET 2 #define KVM_SYSTEM_EVENT_CRASH 3 + #define KVM_SYSTEM_EVENT_WAKEUP 4 __u32 type; __u32 ndata; __u64 data[16]; @@ -6009,6 +6039,9 @@ Valid values for 'type' are: has requested a crash condition maintenance. Userspace can choose to ignore the request, or to gather VM memory core dump and/or reset/shutdown of the VM. + - KVM_SYSTEM_EVENT_WAKEUP -- the exiting vCPU is in a suspended state and + KVM has recognized a wakeup event. Userspace may honor this event by + marking the exiting vCPU as runnable, or deny it and call KVM_RUN again. If KVM_CAP_SYSTEM_EVENT_DATA is present, the 'data' field can contain architecture specific information for the system-level event. Only diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index f3f93d48e21a9..46027b9b80cad 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -46,6 +46,7 @@ #define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(3) #define KVM_REQ_RELOAD_GICv4 KVM_ARCH_REQ(4) #define KVM_REQ_RELOAD_PMU KVM_ARCH_REQ(5) +#define KVM_REQ_SUSPEND KVM_ARCH_REQ(6) #define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \ KVM_DIRTY_LOG_INITIALLY_SET) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index efe54aba5cced..abd32a84ed7a4 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -444,6 +444,18 @@ bool kvm_arm_vcpu_stopped(struct kvm_vcpu *vcpu) return vcpu->arch.mp_state.mp_state == KVM_MP_STATE_STOPPED; } +static void kvm_arm_vcpu_suspend(struct kvm_vcpu *vcpu) +{ + vcpu->arch.mp_state.mp_state = KVM_MP_STATE_SUSPENDED; + kvm_make_request(KVM_REQ_SUSPEND, vcpu); + kvm_vcpu_kick(vcpu); +} + +static bool kvm_arm_vcpu_suspended(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.mp_state.mp_state == KVM_MP_STATE_SUSPENDED; +} + int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { @@ -464,6 +476,9 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, case KVM_MP_STATE_STOPPED: kvm_arm_vcpu_power_off(vcpu); break; + case KVM_MP_STATE_SUSPENDED: + kvm_arm_vcpu_suspend(vcpu); + break; default: ret = -EINVAL; } @@ -648,6 +663,39 @@ void kvm_vcpu_wfi(struct kvm_vcpu *vcpu) preempt_enable(); } +static int kvm_vcpu_suspend(struct kvm_vcpu *vcpu) +{ + if (!kvm_arm_vcpu_suspended(vcpu)) + return 1; + + kvm_vcpu_wfi(vcpu); + + /* + * The suspend state is sticky; we do not leave it until userspace + * explicitly marks the vCPU as runnable. Request that we suspend again + * later. + */ + kvm_make_request(KVM_REQ_SUSPEND, vcpu); + + /* + * Check to make sure the vCPU is actually runnable. If so, exit to + * userspace informing it of the wakeup condition. + */ + if (kvm_arch_vcpu_runnable(vcpu)) { + memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event)); + vcpu->run->system_event.type = KVM_SYSTEM_EVENT_WAKEUP; + vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT; + return 0; + } + + /* + * Otherwise, we were unblocked to process a different event, such as a + * pending signal. Return 1 and allow kvm_arch_vcpu_ioctl_run() to + * process the event. + */ + return 1; +} + /** * check_vcpu_requests - check and handle pending vCPU requests * @vcpu: the VCPU pointer @@ -686,6 +734,9 @@ static int check_vcpu_requests(struct kvm_vcpu *vcpu) if (kvm_check_request(KVM_REQ_RELOAD_PMU, vcpu)) kvm_pmu_handle_pmcr(vcpu, __vcpu_sys_reg(vcpu, PMCR_EL0)); + + if (kvm_check_request(KVM_REQ_SUSPEND, vcpu)) + return kvm_vcpu_suspend(vcpu); } return 1; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 6a184d260c7f2..7f72fb7b05f27 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -444,6 +444,7 @@ struct kvm_run { #define KVM_SYSTEM_EVENT_SHUTDOWN 1 #define KVM_SYSTEM_EVENT_RESET 2 #define KVM_SYSTEM_EVENT_CRASH 3 +#define KVM_SYSTEM_EVENT_WAKEUP 4 __u32 type; __u32 ndata; union { @@ -646,6 +647,7 @@ struct kvm_vapic_addr { #define KVM_MP_STATE_OPERATING 7 #define KVM_MP_STATE_LOAD 8 #define KVM_MP_STATE_AP_RESET_HOLD 9 +#define KVM_MP_STATE_SUSPENDED 10 struct kvm_mp_state { __u32 mp_state;