From 9696d9ae016573568dfd65dd2a92d6e8d277b25b Mon Sep 17 00:00:00 2001 From: Nuno Das Neves Date: Mon, 25 Nov 2024 15:24:40 -0800 Subject: [PATCH 01/16] hyperv: Move hv_connection_id to hyperv-tlfs.h This definition is in the wrong file; it is part of the TLFS doc. Signed-off-by: Nuno Das Neves Acked-by: Wei Liu Reviewed-by: Easwar Hariharan Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/1732577084-2122-2-git-send-email-nunodasneves@linux.microsoft.com Signed-off-by: Wei Liu Message-ID: <1732577084-2122-2-git-send-email-nunodasneves@linux.microsoft.com> --- include/asm-generic/hyperv-tlfs.h | 9 +++++++++ include/linux/hyperv.h | 9 --------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h index 814207e7c37fc..52274c9aefefc 100644 --- a/include/asm-generic/hyperv-tlfs.h +++ b/include/asm-generic/hyperv-tlfs.h @@ -871,4 +871,13 @@ struct hv_mmio_write_input { u8 data[HV_HYPERCALL_MMIO_MAX_DATA_LENGTH]; } __packed; +/* Define connection identifier type. */ +union hv_connection_id { + u32 asu32; + struct { + u32 id:24; + u32 reserved:8; + } u; +}; + #endif diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 02a226bcf0edc..b0dbba3b9108e 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -768,15 +768,6 @@ struct vmbus_close_msg { struct vmbus_channel_close_channel msg; }; -/* Define connection identifier type. */ -union hv_connection_id { - u32 asu32; - struct { - u32 id:24; - u32 reserved:8; - } u; -}; - enum vmbus_device_type { HV_IDE = 0, HV_SCSI, From a3e72548282405056e9f8f8e9f5daba132c38bc4 Mon Sep 17 00:00:00 2001 From: Nuno Das Neves Date: Mon, 25 Nov 2024 15:24:41 -0800 Subject: [PATCH 02/16] hyperv: Clean up unnecessary #includes Remove includes of linux/hyperv.h, mshyperv.h, and hyperv-tlfs.h where they are not used. Signed-off-by: Nuno Das Neves Acked-by: Wei Liu Reviewed-by: Michael Kelley Reviewed-by: Easwar Hariharan Link: https://lore.kernel.org/r/1732577084-2122-3-git-send-email-nunodasneves@linux.microsoft.com Signed-off-by: Wei Liu Message-ID: <1732577084-2122-3-git-send-email-nunodasneves@linux.microsoft.com> --- arch/arm64/hyperv/hv_core.c | 1 - arch/x86/hyperv/hv_apic.c | 1 - arch/x86/hyperv/hv_init.c | 1 - arch/x86/hyperv/hv_proc.c | 1 - arch/x86/hyperv/ivm.c | 1 - arch/x86/hyperv/mmu.c | 1 - arch/x86/include/asm/kvm_host.h | 1 - arch/x86/include/asm/mshyperv.h | 1 - arch/x86/mm/pat/set_memory.c | 2 -- 9 files changed, 10 deletions(-) diff --git a/arch/arm64/hyperv/hv_core.c b/arch/arm64/hyperv/hv_core.c index f1ebc025e1df7..7a746a5a6b42f 100644 --- a/arch/arm64/hyperv/hv_core.c +++ b/arch/arm64/hyperv/hv_core.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c index 0569f579338b5..f022d5f64fb6b 100644 --- a/arch/x86/hyperv/hv_apic.c +++ b/arch/x86/hyperv/hv_apic.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 95eada2994e15..3562826915f94 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/hyperv/hv_proc.c b/arch/x86/hyperv/hv_proc.c index 3fa1f2ee7b0d0..b74c06c04ff1d 100644 --- a/arch/x86/hyperv/hv_proc.c +++ b/arch/x86/hyperv/hv_proc.c @@ -3,7 +3,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index 60fc3ed728304..b56d706127346 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -7,7 +7,6 @@ */ #include -#include #include #include #include diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c index 1cc113200ff55..cc8c3bd0e7c29 100644 --- a/arch/x86/hyperv/mmu.c +++ b/arch/x86/hyperv/mmu.c @@ -1,6 +1,5 @@ #define pr_fmt(fmt) "Hyper-V: " fmt -#include #include #include #include diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index e159e44a6a1b6..46f354b124889 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -24,7 +24,6 @@ #include #include #include -#include #include #include diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 5f0bc6a6d0255..6f866fb9ffee7 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -9,7 +9,6 @@ #include #include #include -#include /* * Hyper-V always provides a single IO-APIC at this MMIO address. diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 95bc50a8541c6..ef4514d64c052 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -32,8 +32,6 @@ #include #include #include -#include -#include #include "../mm_internal.h" From e68bda71a2384e4463c96bac958912b4c5e58502 Mon Sep 17 00:00:00 2001 From: Nuno Das Neves Date: Mon, 25 Nov 2024 15:24:42 -0800 Subject: [PATCH 03/16] hyperv: Add new Hyper-V headers in include/hyperv These headers contain definitions for regular Hyper-V guests (as in hyperv-tlfs.h), as well as interfaces for more privileged guests like the root partition (aka Dom0). These files are derived from headers exported from Hyper-V, rather than being derived from the TLFS document. (Although, to preserve compatibility with existing Linux code, some definitions are copied directly from hyperv-tlfs.h too). The new files follow a naming convention according to their original use: - hdk "host development kit" - gdk "guest development kit" With postfix "_mini" implying userspace-only headers, and "_ext" for extended hypercalls. The use of multiple files and their original names is primarily to keep the provenance of exactly where they came from in Hyper-V code, which is helpful for manual maintenance and extension of these definitions. Microsoft maintainers importing new definitions should take care to put them in the right file. However, Linux kernel code that uses any of the definitions need not be aware of the multiple files or assign any meaning to the new names. Linux kernel code should always just include hvhdk.h Note the new headers contain both arm64 and x86_64 definitions. Some are guarded by #ifdefs, and some are instead prefixed with the architecture, e.g. hv_x64_*. These conventions are kept from Hyper-V code as another tactic to simplify the process of importing and maintaining the definitions, rather than splitting them up into their own files in arch/x86/ and arch/arm64/. These headers are a step toward importing headers directly from Hyper-V in the future, similar to Xen public files in include/xen/interface/. Signed-off-by: Nuno Das Neves Reviewed-by: Easwar Hariharan Reviewed-by: Michael Kelley Signed-off-by: Roman Kisel Link: https://lore.kernel.org/r/1732577084-2122-4-git-send-email-nunodasneves@linux.microsoft.com Link: https://lore.kernel.org/r/20250108222138.1623703-2-romank@linux.microsoft.com Signed-off-by: Wei Liu --- MAINTAINERS | 5 + include/hyperv/hvgdk.h | 308 ++++++++ include/hyperv/hvgdk_ext.h | 46 ++ include/hyperv/hvgdk_mini.h | 1348 +++++++++++++++++++++++++++++++++++ include/hyperv/hvhdk.h | 733 +++++++++++++++++++ include/hyperv/hvhdk_mini.h | 311 ++++++++ 6 files changed, 2751 insertions(+) create mode 100644 include/hyperv/hvgdk.h create mode 100644 include/hyperv/hvgdk_ext.h create mode 100644 include/hyperv/hvgdk_mini.h create mode 100644 include/hyperv/hvhdk.h create mode 100644 include/hyperv/hvhdk_mini.h diff --git a/MAINTAINERS b/MAINTAINERS index 30cbc3d44cd53..013cbd1af04b7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10695,6 +10695,11 @@ F: drivers/video/fbdev/hyperv_fb.c F: include/asm-generic/hyperv-tlfs.h F: include/asm-generic/mshyperv.h F: include/clocksource/hyperv_timer.h +F: include/hyperv/hvgdk.h +F: include/hyperv/hvgdk_ext.h +F: include/hyperv/hvgdk_mini.h +F: include/hyperv/hvhdk.h +F: include/hyperv/hvhdk_mini.h F: include/linux/hyperv.h F: include/net/mana F: include/uapi/linux/hyperv.h diff --git a/include/hyperv/hvgdk.h b/include/hyperv/hvgdk.h new file mode 100644 index 0000000000000..dd6d4939ea29b --- /dev/null +++ b/include/hyperv/hvgdk.h @@ -0,0 +1,308 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Type definitions for the Microsoft Hypervisor. + */ +#ifndef _HV_HVGDK_H +#define _HV_HVGDK_H + +#include "hvgdk_mini.h" +#include "hvgdk_ext.h" + +/* + * The guest OS needs to register the guest ID with the hypervisor. + * The guest ID is a 64 bit entity and the structure of this ID is + * specified in the Hyper-V TLFS specification. + * + * While the current guideline does not specify how Linux guest ID(s) + * need to be generated, our plan is to publish the guidelines for + * Linux and other guest operating systems that currently are hosted + * on Hyper-V. The implementation here conforms to this yet + * unpublished guidelines. + * + * Bit(s) + * 63 - Indicates if the OS is Open Source or not; 1 is Open Source + * 62:56 - Os Type; Linux is 0x100 + * 55:48 - Distro specific identification + * 47:16 - Linux kernel version number + * 15:0 - Distro specific identification + */ + +#define HV_LINUX_VENDOR_ID 0x8100 + +/* HV_VMX_ENLIGHTENED_VMCS */ +struct hv_enlightened_vmcs { + u32 revision_id; + u32 abort; + + u16 host_es_selector; + u16 host_cs_selector; + u16 host_ss_selector; + u16 host_ds_selector; + u16 host_fs_selector; + u16 host_gs_selector; + u16 host_tr_selector; + + u16 padding16_1; + + u64 host_ia32_pat; + u64 host_ia32_efer; + + u64 host_cr0; + u64 host_cr3; + u64 host_cr4; + + u64 host_ia32_sysenter_esp; + u64 host_ia32_sysenter_eip; + u64 host_rip; + u32 host_ia32_sysenter_cs; + + u32 pin_based_vm_exec_control; + u32 vm_exit_controls; + u32 secondary_vm_exec_control; + + u64 io_bitmap_a; + u64 io_bitmap_b; + u64 msr_bitmap; + + u16 guest_es_selector; + u16 guest_cs_selector; + u16 guest_ss_selector; + u16 guest_ds_selector; + u16 guest_fs_selector; + u16 guest_gs_selector; + u16 guest_ldtr_selector; + u16 guest_tr_selector; + + u32 guest_es_limit; + u32 guest_cs_limit; + u32 guest_ss_limit; + u32 guest_ds_limit; + u32 guest_fs_limit; + u32 guest_gs_limit; + u32 guest_ldtr_limit; + u32 guest_tr_limit; + u32 guest_gdtr_limit; + u32 guest_idtr_limit; + + u32 guest_es_ar_bytes; + u32 guest_cs_ar_bytes; + u32 guest_ss_ar_bytes; + u32 guest_ds_ar_bytes; + u32 guest_fs_ar_bytes; + u32 guest_gs_ar_bytes; + u32 guest_ldtr_ar_bytes; + u32 guest_tr_ar_bytes; + + u64 guest_es_base; + u64 guest_cs_base; + u64 guest_ss_base; + u64 guest_ds_base; + u64 guest_fs_base; + u64 guest_gs_base; + u64 guest_ldtr_base; + u64 guest_tr_base; + u64 guest_gdtr_base; + u64 guest_idtr_base; + + u64 padding64_1[3]; + + u64 vm_exit_msr_store_addr; + u64 vm_exit_msr_load_addr; + u64 vm_entry_msr_load_addr; + + u64 cr3_target_value0; + u64 cr3_target_value1; + u64 cr3_target_value2; + u64 cr3_target_value3; + + u32 page_fault_error_code_mask; + u32 page_fault_error_code_match; + + u32 cr3_target_count; + u32 vm_exit_msr_store_count; + u32 vm_exit_msr_load_count; + u32 vm_entry_msr_load_count; + + u64 tsc_offset; + u64 virtual_apic_page_addr; + u64 vmcs_link_pointer; + + u64 guest_ia32_debugctl; + u64 guest_ia32_pat; + u64 guest_ia32_efer; + + u64 guest_pdptr0; + u64 guest_pdptr1; + u64 guest_pdptr2; + u64 guest_pdptr3; + + u64 guest_pending_dbg_exceptions; + u64 guest_sysenter_esp; + u64 guest_sysenter_eip; + + u32 guest_activity_state; + u32 guest_sysenter_cs; + + u64 cr0_guest_host_mask; + u64 cr4_guest_host_mask; + u64 cr0_read_shadow; + u64 cr4_read_shadow; + u64 guest_cr0; + u64 guest_cr3; + u64 guest_cr4; + u64 guest_dr7; + + u64 host_fs_base; + u64 host_gs_base; + u64 host_tr_base; + u64 host_gdtr_base; + u64 host_idtr_base; + u64 host_rsp; + + u64 ept_pointer; + + u16 virtual_processor_id; + u16 padding16_2[3]; + + u64 padding64_2[5]; + u64 guest_physical_address; + + u32 vm_instruction_error; + u32 vm_exit_reason; + u32 vm_exit_intr_info; + u32 vm_exit_intr_error_code; + u32 idt_vectoring_info_field; + u32 idt_vectoring_error_code; + u32 vm_exit_instruction_len; + u32 vmx_instruction_info; + + u64 exit_qualification; + u64 exit_io_instruction_ecx; + u64 exit_io_instruction_esi; + u64 exit_io_instruction_edi; + u64 exit_io_instruction_eip; + + u64 guest_linear_address; + u64 guest_rsp; + u64 guest_rflags; + + u32 guest_interruptibility_info; + u32 cpu_based_vm_exec_control; + u32 exception_bitmap; + u32 vm_entry_controls; + u32 vm_entry_intr_info_field; + u32 vm_entry_exception_error_code; + u32 vm_entry_instruction_len; + u32 tpr_threshold; + + u64 guest_rip; + + u32 hv_clean_fields; + u32 padding32_1; + u32 hv_synthetic_controls; + struct { + u32 nested_flush_hypercall:1; + u32 msr_bitmap:1; + u32 reserved:30; + } __packed hv_enlightenments_control; + u32 hv_vp_id; + u32 padding32_2; + u64 hv_vm_id; + u64 partition_assist_page; + u64 padding64_4[4]; + u64 guest_bndcfgs; + u64 guest_ia32_perf_global_ctrl; + u64 guest_ia32_s_cet; + u64 guest_ssp; + u64 guest_ia32_int_ssp_table_addr; + u64 guest_ia32_lbr_ctl; + u64 padding64_5[2]; + u64 xss_exit_bitmap; + u64 encls_exiting_bitmap; + u64 host_ia32_perf_global_ctrl; + u64 tsc_multiplier; + u64 host_ia32_s_cet; + u64 host_ssp; + u64 host_ia32_int_ssp_table_addr; + u64 padding64_6; +} __packed; +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE 0 + + +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP BIT(0) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP BIT(1) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2 BIT(2) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1 BIT(3) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC BIT(4) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT BIT(5) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY BIT(6) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN BIT(7) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR BIT(8) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT BIT(9) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC BIT(10) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1 BIT(11) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2 BIT(12) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER BIT(13) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1 BIT(14) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ENLIGHTENMENTSCONTROL BIT(15) + +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL 0xFFFF + +/* + * Note, Hyper-V isn't actually stealing bit 28 from Intel, just abusing it by + * pairing it with architecturally impossible exit reasons. Bit 28 is set only + * on SMI exits to a SMI transfer monitor (STM) and if and only if a MTF VM-Exit + * is pending. I.e. it will never be set by hardware for non-SMI exits (there + * are only three), nor will it ever be set unless the VMM is an STM. + */ +#define HV_VMX_SYNTHETIC_EXIT_REASON_TRAP_AFTER_FLUSH 0x10000031 + +/* + * Hyper-V uses the software reserved 32 bytes in VMCB control area to expose + * SVM enlightenments to guests. This is documented in the TLFS doc. + * Note on naming: SVM_NESTED_ENLIGHTENED_VMCB_FIELDS + */ +struct hv_vmcb_enlightenments { + struct __packed hv_enlightenments_control { + u32 nested_flush_hypercall : 1; + u32 msr_bitmap : 1; + u32 enlightened_npt_tlb: 1; + u32 reserved : 29; + } __packed hv_enlightenments_control; + u32 hv_vp_id; + u64 hv_vm_id; + u64 partition_assist_page; + u64 reserved; +} __packed; + +/* + * Hyper-V uses the software reserved clean bit in VMCB. + */ +#define HV_VMCB_NESTED_ENLIGHTENMENTS 31 + +/* Synthetic VM-Exit */ +#define HV_SVM_EXITCODE_ENL 0xf0000000 +#define HV_SVM_ENL_EXITCODE_TRAP_AFTER_FLUSH (1) + +/* VM_PARTITION_ASSIST_PAGE */ +struct hv_partition_assist_pg { + u32 tlb_lock_count; +}; + +/* Define connection identifier type. */ +union hv_connection_id { + u32 asu32; + struct { + u32 id : 24; + u32 reserved : 8; + } __packed u; +}; + +struct hv_input_unmap_gpa_pages { + u64 target_partition_id; + u64 target_gpa_base; + u32 unmap_flags; + u32 padding; +} __packed; + +#endif /* #ifndef _HV_HVGDK_H */ diff --git a/include/hyperv/hvgdk_ext.h b/include/hyperv/hvgdk_ext.h new file mode 100644 index 0000000000000..641b591ee61f4 --- /dev/null +++ b/include/hyperv/hvgdk_ext.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Type definitions for the Microsoft Hypervisor. + */ +#ifndef _HV_HVGDK_EXT_H +#define _HV_HVGDK_EXT_H + +#include "hvgdk_mini.h" + +/* Extended hypercalls */ +#define HV_EXT_CALL_QUERY_CAPABILITIES 0x8001 +#define HV_EXT_CALL_MEMORY_HEAT_HINT 0x8003 + +/* Extended hypercalls */ +enum { /* HV_EXT_CALL */ + HV_EXTCALL_QUERY_CAPABILITIES = 0x8001, + HV_EXTCALL_MEMORY_HEAT_HINT = 0x8003, +}; + +/* HV_EXT_OUTPUT_QUERY_CAPABILITIES */ +#define HV_EXT_CAPABILITY_MEMORY_COLD_DISCARD_HINT BIT(8) + +enum { /* HV_EXT_MEMORY_HEAT_HINT_TYPE */ + HV_EXTMEM_HEAT_HINT_COLD = 0, + HV_EXTMEM_HEAT_HINT_HOT = 1, + HV_EXTMEM_HEAT_HINT_COLD_DISCARD = 2, + HV_EXTMEM_HEAT_HINT_MAX +}; + +/* + * The whole argument should fit in a page to be able to pass to the hypervisor + * in one hypercall. + */ +#define HV_MEMORY_HINT_MAX_GPA_PAGE_RANGES \ + ((HV_HYP_PAGE_SIZE - sizeof(struct hv_memory_hint)) / \ + sizeof(union hv_gpa_page_range)) + +/* HvExtCallMemoryHeatHint hypercall */ +#define HV_EXT_MEMORY_HEAT_HINT_TYPE_COLD_DISCARD 2 +struct hv_memory_hint { /* HV_EXT_INPUT_MEMORY_HEAT_HINT */ + u64 heat_type : 2; /* HV_EXTMEM_HEAT_HINT_* */ + u64 reserved : 62; + union hv_gpa_page_range ranges[]; +} __packed; + +#endif /* _HV_HVGDK_EXT_H */ diff --git a/include/hyperv/hvgdk_mini.h b/include/hyperv/hvgdk_mini.h new file mode 100644 index 0000000000000..155615175965f --- /dev/null +++ b/include/hyperv/hvgdk_mini.h @@ -0,0 +1,1348 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Type definitions for the Microsoft hypervisor. + */ +#ifndef _HV_HVGDK_MINI_H +#define _HV_HVGDK_MINI_H + +#include +#include + +struct hv_u128 { + u64 low_part; + u64 high_part; +} __packed; + +/* NOTE: when adding below, update hv_status_to_string() */ +#define HV_STATUS_SUCCESS 0x0 +#define HV_STATUS_INVALID_HYPERCALL_CODE 0x2 +#define HV_STATUS_INVALID_HYPERCALL_INPUT 0x3 +#define HV_STATUS_INVALID_ALIGNMENT 0x4 +#define HV_STATUS_INVALID_PARAMETER 0x5 +#define HV_STATUS_ACCESS_DENIED 0x6 +#define HV_STATUS_INVALID_PARTITION_STATE 0x7 +#define HV_STATUS_OPERATION_DENIED 0x8 +#define HV_STATUS_UNKNOWN_PROPERTY 0x9 +#define HV_STATUS_PROPERTY_VALUE_OUT_OF_RANGE 0xA +#define HV_STATUS_INSUFFICIENT_MEMORY 0xB +#define HV_STATUS_INVALID_PARTITION_ID 0xD +#define HV_STATUS_INVALID_VP_INDEX 0xE +#define HV_STATUS_NOT_FOUND 0x10 +#define HV_STATUS_INVALID_PORT_ID 0x11 +#define HV_STATUS_INVALID_CONNECTION_ID 0x12 +#define HV_STATUS_INSUFFICIENT_BUFFERS 0x13 +#define HV_STATUS_NOT_ACKNOWLEDGED 0x14 +#define HV_STATUS_INVALID_VP_STATE 0x15 +#define HV_STATUS_NO_RESOURCES 0x1D +#define HV_STATUS_PROCESSOR_FEATURE_NOT_SUPPORTED 0x20 +#define HV_STATUS_INVALID_LP_INDEX 0x41 +#define HV_STATUS_INVALID_REGISTER_VALUE 0x50 +#define HV_STATUS_OPERATION_FAILED 0x71 +#define HV_STATUS_TIME_OUT 0x78 +#define HV_STATUS_CALL_PENDING 0x79 +#define HV_STATUS_VTL_ALREADY_ENABLED 0x86 + +/* + * The Hyper-V TimeRefCount register and the TSC + * page provide a guest VM clock with 100ns tick rate + */ +#define HV_CLOCK_HZ (NSEC_PER_SEC / 100) + +#define HV_HYP_PAGE_SHIFT 12 +#define HV_HYP_PAGE_SIZE BIT(HV_HYP_PAGE_SHIFT) +#define HV_HYP_PAGE_MASK (~(HV_HYP_PAGE_SIZE - 1)) + +#define HV_PARTITION_ID_INVALID ((u64)0) +#define HV_PARTITION_ID_SELF ((u64)-1) + +/* Hyper-V specific model specific registers (MSRs) */ + +#if defined(CONFIG_X86) +/* HV_X64_SYNTHETIC_MSR */ +#define HV_X64_MSR_GUEST_OS_ID 0x40000000 +#define HV_X64_MSR_HYPERCALL 0x40000001 +#define HV_X64_MSR_VP_INDEX 0x40000002 +#define HV_X64_MSR_RESET 0x40000003 +#define HV_X64_MSR_VP_RUNTIME 0x40000010 +#define HV_X64_MSR_TIME_REF_COUNT 0x40000020 +#define HV_X64_MSR_REFERENCE_TSC 0x40000021 +#define HV_X64_MSR_TSC_FREQUENCY 0x40000022 +#define HV_X64_MSR_APIC_FREQUENCY 0x40000023 + +/* Define the virtual APIC registers */ +#define HV_X64_MSR_EOI 0x40000070 +#define HV_X64_MSR_ICR 0x40000071 +#define HV_X64_MSR_TPR 0x40000072 +#define HV_X64_MSR_VP_ASSIST_PAGE 0x40000073 + +/* Define synthetic interrupt controller model specific registers. */ +#define HV_X64_MSR_SCONTROL 0x40000080 +#define HV_X64_MSR_SVERSION 0x40000081 +#define HV_X64_MSR_SIEFP 0x40000082 +#define HV_X64_MSR_SIMP 0x40000083 +#define HV_X64_MSR_EOM 0x40000084 +#define HV_X64_MSR_SIRBP 0x40000085 +#define HV_X64_MSR_SINT0 0x40000090 +#define HV_X64_MSR_SINT1 0x40000091 +#define HV_X64_MSR_SINT2 0x40000092 +#define HV_X64_MSR_SINT3 0x40000093 +#define HV_X64_MSR_SINT4 0x40000094 +#define HV_X64_MSR_SINT5 0x40000095 +#define HV_X64_MSR_SINT6 0x40000096 +#define HV_X64_MSR_SINT7 0x40000097 +#define HV_X64_MSR_SINT8 0x40000098 +#define HV_X64_MSR_SINT9 0x40000099 +#define HV_X64_MSR_SINT10 0x4000009A +#define HV_X64_MSR_SINT11 0x4000009B +#define HV_X64_MSR_SINT12 0x4000009C +#define HV_X64_MSR_SINT13 0x4000009D +#define HV_X64_MSR_SINT14 0x4000009E +#define HV_X64_MSR_SINT15 0x4000009F + +/* Define synthetic interrupt controller model specific registers for nested hypervisor */ +#define HV_X64_MSR_NESTED_SCONTROL 0x40001080 +#define HV_X64_MSR_NESTED_SVERSION 0x40001081 +#define HV_X64_MSR_NESTED_SIEFP 0x40001082 +#define HV_X64_MSR_NESTED_SIMP 0x40001083 +#define HV_X64_MSR_NESTED_EOM 0x40001084 +#define HV_X64_MSR_NESTED_SINT0 0x40001090 + +/* + * Synthetic Timer MSRs. Four timers per vcpu. + */ +#define HV_X64_MSR_STIMER0_CONFIG 0x400000B0 +#define HV_X64_MSR_STIMER0_COUNT 0x400000B1 +#define HV_X64_MSR_STIMER1_CONFIG 0x400000B2 +#define HV_X64_MSR_STIMER1_COUNT 0x400000B3 +#define HV_X64_MSR_STIMER2_CONFIG 0x400000B4 +#define HV_X64_MSR_STIMER2_COUNT 0x400000B5 +#define HV_X64_MSR_STIMER3_CONFIG 0x400000B6 +#define HV_X64_MSR_STIMER3_COUNT 0x400000B7 + +/* Hyper-V guest idle MSR */ +#define HV_X64_MSR_GUEST_IDLE 0x400000F0 + +/* Hyper-V guest crash notification MSR's */ +#define HV_X64_MSR_CRASH_P0 0x40000100 +#define HV_X64_MSR_CRASH_P1 0x40000101 +#define HV_X64_MSR_CRASH_P2 0x40000102 +#define HV_X64_MSR_CRASH_P3 0x40000103 +#define HV_X64_MSR_CRASH_P4 0x40000104 +#define HV_X64_MSR_CRASH_CTL 0x40000105 + +#define HV_X64_MSR_HYPERCALL_ENABLE 0x00000001 +#define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT 12 +#define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK \ + (~((1ull << HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT) - 1)) + +#define HV_X64_MSR_CRASH_PARAMS \ + (1 + (HV_X64_MSR_CRASH_P4 - HV_X64_MSR_CRASH_P0)) + +#define HV_IPI_LOW_VECTOR 0x10 +#define HV_IPI_HIGH_VECTOR 0xff + +#define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE 0x00000001 +#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT 12 +#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK \ + (~((1ull << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) + +/* Hyper-V Enlightened VMCS version mask in nested features CPUID */ +#define HV_X64_ENLIGHTENED_VMCS_VERSION 0xff + +#define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001 +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12 + +/* Number of XMM registers used in hypercall input/output */ +#define HV_HYPERCALL_MAX_XMM_REGISTERS 6 + +struct hv_reenlightenment_control { + u64 vector : 8; + u64 reserved1 : 8; + u64 enabled : 1; + u64 reserved2 : 15; + u64 target_vp : 32; +} __packed; + +struct hv_tsc_emulation_status { /* HV_TSC_EMULATION_STATUS */ + u64 inprogress : 1; + u64 reserved : 63; +} __packed; + +struct hv_tsc_emulation_control { /* HV_TSC_INVARIANT_CONTROL */ + u64 enabled : 1; + u64 reserved : 63; +} __packed; + +/* TSC emulation after migration */ +#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106 +#define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107 +#define HV_X64_MSR_TSC_EMULATION_STATUS 0x40000108 +#define HV_X64_MSR_TSC_INVARIANT_CONTROL 0x40000118 +#define HV_EXPOSE_INVARIANT_TSC BIT_ULL(0) + +#endif /* CONFIG_X86 */ + +struct hv_get_partition_id { /* HV_OUTPUT_GET_PARTITION_ID */ + u64 partition_id; +} __packed; + +/* HV_CRASH_CTL_REG_CONTENTS */ +#define HV_CRASH_CTL_CRASH_NOTIFY_MSG BIT_ULL(62) +#define HV_CRASH_CTL_CRASH_NOTIFY BIT_ULL(63) + +union hv_reference_tsc_msr { + u64 as_uint64; + struct { + u64 enable : 1; + u64 reserved : 11; + u64 pfn : 52; + } __packed; +}; + +/* The maximum number of sparse vCPU banks which can be encoded by 'struct hv_vpset' */ +#define HV_MAX_SPARSE_VCPU_BANKS (64) +/* The number of vCPUs in one sparse bank */ +#define HV_VCPUS_PER_SPARSE_BANK (64) + +/* Some of Hyper-V structs do not use hv_vpset where linux uses them */ +struct hv_vpset { /* HV_VP_SET */ + u64 format; + u64 valid_bank_mask; + u64 bank_contents[]; +} __packed; + +/* + * Version info reported by hypervisor + * Changed to a union for convenience + */ +union hv_hypervisor_version_info { + struct { + u32 build_number; + + u32 minor_version : 16; + u32 major_version : 16; + + u32 service_pack; + + u32 service_number : 24; + u32 service_branch : 8; + }; + struct { + u32 eax; + u32 ebx; + u32 ecx; + u32 edx; + }; +}; + +/* HV_CPUID_FUNCTION */ +#define HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS 0x40000000 +#define HYPERV_CPUID_INTERFACE 0x40000001 +#define HYPERV_CPUID_VERSION 0x40000002 +#define HYPERV_CPUID_FEATURES 0x40000003 +#define HYPERV_CPUID_ENLIGHTMENT_INFO 0x40000004 +#define HYPERV_CPUID_IMPLEMENT_LIMITS 0x40000005 +#define HYPERV_CPUID_CPU_MANAGEMENT_FEATURES 0x40000007 +#define HYPERV_CPUID_NESTED_FEATURES 0x4000000A +#define HYPERV_CPUID_ISOLATION_CONFIG 0x4000000C + +#define HYPERV_CPUID_VIRT_STACK_INTERFACE 0x40000081 +#define HYPERV_VS_INTERFACE_EAX_SIGNATURE 0x31235356 /* "VS#1" */ + +#define HYPERV_CPUID_VIRT_STACK_PROPERTIES 0x40000082 +/* Support for the extended IOAPIC RTE format */ +#define HYPERV_VS_PROPERTIES_EAX_EXTENDED_IOAPIC_RTE BIT(2) + +#define HYPERV_HYPERVISOR_PRESENT_BIT 0x80000000 +#define HYPERV_CPUID_MIN 0x40000005 +#define HYPERV_CPUID_MAX 0x4000ffff + +/* + * HV_X64_HYPERVISOR_FEATURES (EAX), or + * HV_PARTITION_PRIVILEGE_MASK [31-0] + */ +#define HV_MSR_VP_RUNTIME_AVAILABLE BIT(0) +#define HV_MSR_TIME_REF_COUNT_AVAILABLE BIT(1) +#define HV_MSR_SYNIC_AVAILABLE BIT(2) +#define HV_MSR_SYNTIMER_AVAILABLE BIT(3) +#define HV_MSR_APIC_ACCESS_AVAILABLE BIT(4) +#define HV_MSR_HYPERCALL_AVAILABLE BIT(5) +#define HV_MSR_VP_INDEX_AVAILABLE BIT(6) +#define HV_MSR_RESET_AVAILABLE BIT(7) +#define HV_MSR_STAT_PAGES_AVAILABLE BIT(8) +#define HV_MSR_REFERENCE_TSC_AVAILABLE BIT(9) +#define HV_MSR_GUEST_IDLE_AVAILABLE BIT(10) +#define HV_ACCESS_FREQUENCY_MSRS BIT(11) +#define HV_ACCESS_REENLIGHTENMENT BIT(13) +#define HV_ACCESS_TSC_INVARIANT BIT(15) + +/* + * HV_X64_HYPERVISOR_FEATURES (EBX), or + * HV_PARTITION_PRIVILEGE_MASK [63-32] + */ +#define HV_CREATE_PARTITIONS BIT(0) +#define HV_ACCESS_PARTITION_ID BIT(1) +#define HV_ACCESS_MEMORY_POOL BIT(2) +#define HV_ADJUST_MESSAGE_BUFFERS BIT(3) +#define HV_POST_MESSAGES BIT(4) +#define HV_SIGNAL_EVENTS BIT(5) +#define HV_CREATE_PORT BIT(6) +#define HV_CONNECT_PORT BIT(7) +#define HV_ACCESS_STATS BIT(8) +#define HV_DEBUGGING BIT(11) +#define HV_CPU_MANAGEMENT BIT(12) +#define HV_ENABLE_EXTENDED_HYPERCALLS BIT(20) +#define HV_ISOLATION BIT(22) + +#if defined(CONFIG_X86) +/* HV_X64_HYPERVISOR_FEATURES (EDX) */ +#define HV_X64_MWAIT_AVAILABLE BIT(0) +#define HV_X64_GUEST_DEBUGGING_AVAILABLE BIT(1) +#define HV_X64_PERF_MONITOR_AVAILABLE BIT(2) +#define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE BIT(3) +#define HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE BIT(4) +#define HV_X64_GUEST_IDLE_STATE_AVAILABLE BIT(5) +#define HV_FEATURE_FREQUENCY_MSRS_AVAILABLE BIT(8) +#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE BIT(10) +#define HV_FEATURE_DEBUG_MSRS_AVAILABLE BIT(11) +#define HV_FEATURE_EXT_GVA_RANGES_FLUSH BIT(14) +/* + * Support for returning hypercall output block via XMM + * registers is available + */ +#define HV_X64_HYPERCALL_XMM_OUTPUT_AVAILABLE BIT(15) +/* stimer Direct Mode is available */ +#define HV_STIMER_DIRECT_MODE_AVAILABLE BIT(19) + +/* + * Implementation recommendations. Indicates which behaviors the hypervisor + * recommends the OS implement for optimal performance. + * These are HYPERV_CPUID_ENLIGHTMENT_INFO.EAX bits. + */ +/* HV_X64_ENLIGHTENMENT_INFORMATION */ +#define HV_X64_AS_SWITCH_RECOMMENDED BIT(0) +#define HV_X64_LOCAL_TLB_FLUSH_RECOMMENDED BIT(1) +#define HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED BIT(2) +#define HV_X64_APIC_ACCESS_RECOMMENDED BIT(3) +#define HV_X64_SYSTEM_RESET_RECOMMENDED BIT(4) +#define HV_X64_RELAXED_TIMING_RECOMMENDED BIT(5) +#define HV_DEPRECATING_AEOI_RECOMMENDED BIT(9) +#define HV_X64_CLUSTER_IPI_RECOMMENDED BIT(10) +#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED BIT(11) +#define HV_X64_HYPERV_NESTED BIT(12) +#define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED BIT(14) +#define HV_X64_USE_MMIO_HYPERCALLS BIT(21) + +/* + * CPU management features identification. + * These are HYPERV_CPUID_CPU_MANAGEMENT_FEATURES.EAX bits. + */ +#define HV_X64_START_LOGICAL_PROCESSOR BIT(0) +#define HV_X64_CREATE_ROOT_VIRTUAL_PROCESSOR BIT(1) +#define HV_X64_PERFORMANCE_COUNTER_SYNC BIT(2) +#define HV_X64_RESERVED_IDENTITY_BIT BIT(31) + +/* + * Virtual processor will never share a physical core with another virtual + * processor, except for virtual processors that are reported as sibling SMT + * threads. + */ +#define HV_X64_NO_NONARCH_CORESHARING BIT(18) + +/* Nested features. These are HYPERV_CPUID_NESTED_FEATURES.EAX bits. */ +#define HV_X64_NESTED_DIRECT_FLUSH BIT(17) +#define HV_X64_NESTED_GUEST_MAPPING_FLUSH BIT(18) +#define HV_X64_NESTED_MSR_BITMAP BIT(19) + +/* Nested features #2. These are HYPERV_CPUID_NESTED_FEATURES.EBX bits. */ +#define HV_X64_NESTED_EVMCS1_PERF_GLOBAL_CTRL BIT(0) + +/* + * This is specific to AMD and specifies that enlightened TLB flush is + * supported. If guest opts in to this feature, ASID invalidations only + * flushes gva -> hpa mapping entries. To flush the TLB entries derived + * from NPT, hypercalls should be used (HvFlushGuestPhysicalAddressSpace + * or HvFlushGuestPhysicalAddressList). + */ +#define HV_X64_NESTED_ENLIGHTENED_TLB BIT(22) + +/* HYPERV_CPUID_ISOLATION_CONFIG.EAX bits. */ +#define HV_PARAVISOR_PRESENT BIT(0) + +/* HYPERV_CPUID_ISOLATION_CONFIG.EBX bits. */ +#define HV_ISOLATION_TYPE GENMASK(3, 0) +#define HV_SHARED_GPA_BOUNDARY_ACTIVE BIT(5) +#define HV_SHARED_GPA_BOUNDARY_BITS GENMASK(11, 6) + +enum hv_isolation_type { + HV_ISOLATION_TYPE_NONE = 0, /* HV_PARTITION_ISOLATION_TYPE_NONE */ + HV_ISOLATION_TYPE_VBS = 1, + HV_ISOLATION_TYPE_SNP = 2, + HV_ISOLATION_TYPE_TDX = 3 +}; + +union hv_x64_msr_hypercall_contents { + u64 as_uint64; + struct { + u64 enable : 1; + u64 reserved : 11; + u64 guest_physical_address : 52; + } __packed; +}; +#endif /* CONFIG_X86 */ + +#if defined(CONFIG_ARM64) +#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE BIT(8) +#define HV_STIMER_DIRECT_MODE_AVAILABLE BIT(13) +#endif /* CONFIG_ARM64 */ + +#if defined(CONFIG_X86) +#define HV_MAXIMUM_PROCESSORS 2048 +#elif defined(CONFIG_ARM64) /* CONFIG_X86 */ +#define HV_MAXIMUM_PROCESSORS 320 +#endif /* CONFIG_ARM64 */ + +#define HV_MAX_VP_INDEX (HV_MAXIMUM_PROCESSORS - 1) +#define HV_VP_INDEX_SELF ((u32)-2) +#define HV_ANY_VP ((u32)-1) + +union hv_vp_assist_msr_contents { /* HV_REGISTER_VP_ASSIST_PAGE */ + u64 as_uint64; + struct { + u64 enable : 1; + u64 reserved : 11; + u64 pfn : 52; + } __packed; +}; + +/* Declare the various hypercall operations. */ +/* HV_CALL_CODE */ +#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE 0x0002 +#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST 0x0003 +#define HVCALL_NOTIFY_LONG_SPIN_WAIT 0x0008 +#define HVCALL_SEND_IPI 0x000b +#define HVCALL_ENABLE_VP_VTL 0x000f +#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX 0x0013 +#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX 0x0014 +#define HVCALL_SEND_IPI_EX 0x0015 +#define HVCALL_CREATE_PARTITION 0x0040 +#define HVCALL_INITIALIZE_PARTITION 0x0041 +#define HVCALL_FINALIZE_PARTITION 0x0042 +#define HVCALL_DELETE_PARTITION 0x0043 +#define HVCALL_GET_PARTITION_PROPERTY 0x0044 +#define HVCALL_SET_PARTITION_PROPERTY 0x0045 +#define HVCALL_GET_PARTITION_ID 0x0046 +#define HVCALL_DEPOSIT_MEMORY 0x0048 +#define HVCALL_WITHDRAW_MEMORY 0x0049 +#define HVCALL_MAP_GPA_PAGES 0x004b +#define HVCALL_UNMAP_GPA_PAGES 0x004c +#define HVCALL_CREATE_VP 0x004e +#define HVCALL_DELETE_VP 0x004f +#define HVCALL_GET_VP_REGISTERS 0x0050 +#define HVCALL_SET_VP_REGISTERS 0x0051 +#define HVCALL_DELETE_PORT 0x0058 +#define HVCALL_DISCONNECT_PORT 0x005b +#define HVCALL_POST_MESSAGE 0x005c +#define HVCALL_SIGNAL_EVENT 0x005d +#define HVCALL_POST_DEBUG_DATA 0x0069 +#define HVCALL_RETRIEVE_DEBUG_DATA 0x006a +#define HVCALL_RESET_DEBUG_SESSION 0x006b +#define HVCALL_ADD_LOGICAL_PROCESSOR 0x0076 +#define HVCALL_GET_SYSTEM_PROPERTY 0x007b +#define HVCALL_MAP_DEVICE_INTERRUPT 0x007c +#define HVCALL_UNMAP_DEVICE_INTERRUPT 0x007d +#define HVCALL_RETARGET_INTERRUPT 0x007e +#define HVCALL_NOTIFY_PORT_RING_EMPTY 0x008b +#define HVCALL_ASSERT_VIRTUAL_INTERRUPT 0x0094 +#define HVCALL_CREATE_PORT 0x0095 +#define HVCALL_CONNECT_PORT 0x0096 +#define HVCALL_START_VP 0x0099 +#define HVCALL_GET_VP_ID_FROM_APIC_ID 0x009a +#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af +#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0 +#define HVCALL_DISPATCH_VP 0x00c2 +#define HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY 0x00db +#define HVCALL_MAP_VP_STATE_PAGE 0x00e1 +#define HVCALL_UNMAP_VP_STATE_PAGE 0x00e2 +#define HVCALL_GET_VP_STATE 0x00e3 +#define HVCALL_SET_VP_STATE 0x00e4 +#define HVCALL_MMIO_READ 0x0106 +#define HVCALL_MMIO_WRITE 0x0107 + +/* HV_HYPERCALL_INPUT */ +#define HV_HYPERCALL_RESULT_MASK GENMASK_ULL(15, 0) +#define HV_HYPERCALL_FAST_BIT BIT(16) +#define HV_HYPERCALL_VARHEAD_OFFSET 17 +#define HV_HYPERCALL_VARHEAD_MASK GENMASK_ULL(26, 17) +#define HV_HYPERCALL_RSVD0_MASK GENMASK_ULL(31, 27) +#define HV_HYPERCALL_NESTED BIT_ULL(31) +#define HV_HYPERCALL_REP_COMP_OFFSET 32 +#define HV_HYPERCALL_REP_COMP_1 BIT_ULL(32) +#define HV_HYPERCALL_REP_COMP_MASK GENMASK_ULL(43, 32) +#define HV_HYPERCALL_RSVD1_MASK GENMASK_ULL(47, 44) +#define HV_HYPERCALL_REP_START_OFFSET 48 +#define HV_HYPERCALL_REP_START_MASK GENMASK_ULL(59, 48) +#define HV_HYPERCALL_RSVD2_MASK GENMASK_ULL(63, 60) +#define HV_HYPERCALL_RSVD_MASK (HV_HYPERCALL_RSVD0_MASK | \ + HV_HYPERCALL_RSVD1_MASK | \ + HV_HYPERCALL_RSVD2_MASK) + +/* HvFlushGuestPhysicalAddressSpace hypercalls */ +struct hv_guest_mapping_flush { + u64 address_space; + u64 flags; +} __packed; + +/* + * HV_MAX_FLUSH_PAGES = "additional_pages" + 1. It's limited + * by the bitwidth of "additional_pages" in union hv_gpa_page_range. + */ +#define HV_MAX_FLUSH_PAGES (2048) +#define HV_GPA_PAGE_RANGE_PAGE_SIZE_2MB 0 +#define HV_GPA_PAGE_RANGE_PAGE_SIZE_1GB 1 + +#define HV_FLUSH_ALL_PROCESSORS BIT(0) +#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES BIT(1) +#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY BIT(2) +#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT BIT(3) + +/* HvFlushGuestPhysicalAddressList, HvExtCallMemoryHeatHint hypercall */ +union hv_gpa_page_range { + u64 address_space; + struct { + u64 additional_pages : 11; + u64 largepage : 1; + u64 basepfn : 52; + } page; + struct { + u64 reserved : 12; + u64 page_size : 1; + u64 reserved1 : 8; + u64 base_large_pfn : 43; + }; +}; + +/* + * All input flush parameters should be in single page. The max flush + * count is equal with how many entries of union hv_gpa_page_range can + * be populated into the input parameter page. + */ +#define HV_MAX_FLUSH_REP_COUNT ((HV_HYP_PAGE_SIZE - 2 * sizeof(u64)) / \ + sizeof(union hv_gpa_page_range)) + +struct hv_guest_mapping_flush_list { + u64 address_space; + u64 flags; + union hv_gpa_page_range gpa_list[HV_MAX_FLUSH_REP_COUNT]; +}; + +struct hv_tlb_flush { /* HV_INPUT_FLUSH_VIRTUAL_ADDRESS_LIST */ + u64 address_space; + u64 flags; + u64 processor_mask; + u64 gva_list[]; +} __packed; + +/* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */ +struct hv_tlb_flush_ex { + u64 address_space; + u64 flags; + struct hv_vpset hv_vp_set; + u64 gva_list[]; +} __packed; + +struct ms_hyperv_tsc_page { /* HV_REFERENCE_TSC_PAGE */ + volatile u32 tsc_sequence; + u32 reserved1; + volatile u64 tsc_scale; + volatile s64 tsc_offset; +} __packed; + +/* Define the number of synthetic interrupt sources. */ +#define HV_SYNIC_SINT_COUNT (16) + +/* Define the expected SynIC version. */ +#define HV_SYNIC_VERSION_1 (0x1) +/* Valid SynIC vectors are 16-255. */ +#define HV_SYNIC_FIRST_VALID_VECTOR (16) + +#define HV_SYNIC_CONTROL_ENABLE (1ULL << 0) +#define HV_SYNIC_SIMP_ENABLE (1ULL << 0) +#define HV_SYNIC_SIEFP_ENABLE (1ULL << 0) +#define HV_SYNIC_SINT_MASKED (1ULL << 16) +#define HV_SYNIC_SINT_AUTO_EOI (1ULL << 17) +#define HV_SYNIC_SINT_VECTOR_MASK (0xFF) + +# + +/* Hyper-V defined statically assigned SINTs */ +#define HV_SYNIC_INTERCEPTION_SINT_INDEX 0x00000000 +#define HV_SYNIC_IOMMU_FAULT_SINT_INDEX 0x00000001 +#define HV_SYNIC_VMBUS_SINT_INDEX 0x00000002 +#define HV_SYNIC_FIRST_UNUSED_SINT_INDEX 0x00000005 + +/* mshv assigned SINT for doorbell */ +#define HV_SYNIC_DOORBELL_SINT_INDEX HV_SYNIC_FIRST_UNUSED_SINT_INDEX + +enum hv_interrupt_type { + HV_X64_INTERRUPT_TYPE_FIXED = 0x0000, + HV_X64_INTERRUPT_TYPE_LOWESTPRIORITY = 0x0001, + HV_X64_INTERRUPT_TYPE_SMI = 0x0002, + HV_X64_INTERRUPT_TYPE_REMOTEREAD = 0x0003, + HV_X64_INTERRUPT_TYPE_NMI = 0x0004, + HV_X64_INTERRUPT_TYPE_INIT = 0x0005, + HV_X64_INTERRUPT_TYPE_SIPI = 0x0006, + HV_X64_INTERRUPT_TYPE_EXTINT = 0x0007, + HV_X64_INTERRUPT_TYPE_LOCALINT0 = 0x0008, + HV_X64_INTERRUPT_TYPE_LOCALINT1 = 0x0009, + HV_X64_INTERRUPT_TYPE_MAXIMUM = 0x000A, +}; + +/* Define synthetic interrupt source. */ +union hv_synic_sint { + u64 as_uint64; + struct { + u64 vector : 8; + u64 reserved1 : 8; + u64 masked : 1; + u64 auto_eoi : 1; + u64 polling : 1; + u64 as_intercept : 1; + u64 proxy : 1; + u64 reserved2 : 43; + } __packed; +}; + +union hv_x64_xsave_xfem_register { + u64 as_uint64; + struct { + u32 low_uint32; + u32 high_uint32; + } __packed; + struct { + u64 legacy_x87 : 1; + u64 legacy_sse : 1; + u64 avx : 1; + u64 mpx_bndreg : 1; + u64 mpx_bndcsr : 1; + u64 avx_512_op_mask : 1; + u64 avx_512_zmmhi : 1; + u64 avx_512_zmm16_31 : 1; + u64 rsvd8_9 : 2; + u64 pasid : 1; + u64 cet_u : 1; + u64 cet_s : 1; + u64 rsvd13_16 : 4; + u64 xtile_cfg : 1; + u64 xtile_data : 1; + u64 rsvd19_63 : 45; + } __packed; +}; + +/* Synthetic timer configuration */ +union hv_stimer_config { /* HV_X64_MSR_STIMER_CONFIG_CONTENTS */ + u64 as_uint64; + struct { + u64 enable : 1; + u64 periodic : 1; + u64 lazy : 1; + u64 auto_enable : 1; + u64 apic_vector : 8; + u64 direct_mode : 1; + u64 reserved_z0 : 3; + u64 sintx : 4; + u64 reserved_z1 : 44; + } __packed; +}; + +/* Define the number of synthetic timers */ +#define HV_SYNIC_STIMER_COUNT (4) + +/* Define port identifier type. */ +union hv_port_id { + u32 asu32; + struct { + u32 id : 24; + u32 reserved : 8; + } __packed u; +}; + +#define HV_MESSAGE_SIZE (256) +#define HV_MESSAGE_PAYLOAD_BYTE_COUNT (240) +#define HV_MESSAGE_PAYLOAD_QWORD_COUNT (30) + +/* Define hypervisor message types. */ +enum hv_message_type { + HVMSG_NONE = 0x00000000, + + /* Memory access messages. */ + HVMSG_UNMAPPED_GPA = 0x80000000, + HVMSG_GPA_INTERCEPT = 0x80000001, + + /* Timer notification messages. */ + HVMSG_TIMER_EXPIRED = 0x80000010, + + /* Error messages. */ + HVMSG_INVALID_VP_REGISTER_VALUE = 0x80000020, + HVMSG_UNRECOVERABLE_EXCEPTION = 0x80000021, + HVMSG_UNSUPPORTED_FEATURE = 0x80000022, + + /* + * Opaque intercept message. The original intercept message is only + * accessible from the mapped intercept message page. + */ + HVMSG_OPAQUE_INTERCEPT = 0x8000003F, + + /* Trace buffer complete messages. */ + HVMSG_EVENTLOG_BUFFERCOMPLETE = 0x80000040, + + /* Hypercall intercept */ + HVMSG_HYPERCALL_INTERCEPT = 0x80000050, + + /* SynIC intercepts */ + HVMSG_SYNIC_EVENT_INTERCEPT = 0x80000060, + HVMSG_SYNIC_SINT_INTERCEPT = 0x80000061, + HVMSG_SYNIC_SINT_DELIVERABLE = 0x80000062, + + /* Async call completion intercept */ + HVMSG_ASYNC_CALL_COMPLETION = 0x80000070, + + /* Root scheduler messages */ + HVMSG_SCHEDULER_VP_SIGNAL_BITSET = 0x80000100, + HVMSG_SCHEDULER_VP_SIGNAL_PAIR = 0x80000101, + + /* Platform-specific processor intercept messages. */ + HVMSG_X64_IO_PORT_INTERCEPT = 0x80010000, + HVMSG_X64_MSR_INTERCEPT = 0x80010001, + HVMSG_X64_CPUID_INTERCEPT = 0x80010002, + HVMSG_X64_EXCEPTION_INTERCEPT = 0x80010003, + HVMSG_X64_APIC_EOI = 0x80010004, + HVMSG_X64_LEGACY_FP_ERROR = 0x80010005, + HVMSG_X64_IOMMU_PRQ = 0x80010006, + HVMSG_X64_HALT = 0x80010007, + HVMSG_X64_INTERRUPTION_DELIVERABLE = 0x80010008, + HVMSG_X64_SIPI_INTERCEPT = 0x80010009, +}; + +/* Define the format of the SIMP register */ +union hv_synic_simp { + u64 as_uint64; + struct { + u64 simp_enabled : 1; + u64 preserved : 11; + u64 base_simp_gpa : 52; + } __packed; +}; + +union hv_message_flags { + u8 asu8; + struct { + u8 msg_pending : 1; + u8 reserved : 7; + } __packed; +}; + +struct hv_message_header { + u32 message_type; + u8 payload_size; + union hv_message_flags message_flags; + u8 reserved[2]; + union { + u64 sender; + union hv_port_id port; + }; +} __packed; + +/* + * Message format for notifications delivered via + * intercept message(as_intercept=1) + */ +struct hv_notification_message_payload { + u32 sint_index; +} __packed; + +struct hv_message { + struct hv_message_header header; + union { + u64 payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT]; + } u; +} __packed; + +/* Define the synthetic interrupt message page layout. */ +struct hv_message_page { + struct hv_message sint_message[HV_SYNIC_SINT_COUNT]; +} __packed; + +/* Define timer message payload structure. */ +struct hv_timer_message_payload { + __u32 timer_index; + __u32 reserved; + __u64 expiration_time; /* When the timer expired */ + __u64 delivery_time; /* When the message was delivered */ +} __packed; + +struct hv_x64_segment_register { + u64 base; + u32 limit; + u16 selector; + union { + struct { + u16 segment_type : 4; + u16 non_system_segment : 1; + u16 descriptor_privilege_level : 2; + u16 present : 1; + u16 reserved : 4; + u16 available : 1; + u16 _long : 1; + u16 _default : 1; + u16 granularity : 1; + } __packed; + u16 attributes; + }; +} __packed; + +struct hv_x64_table_register { + u16 pad[3]; + u16 limit; + u64 base; +} __packed; + +union hv_input_vtl { + u8 as_uint8; + struct { + u8 target_vtl : 4; + u8 use_target_vtl : 1; + u8 reserved_z : 3; + }; +} __packed; + +struct hv_init_vp_context { + u64 rip; + u64 rsp; + u64 rflags; + + struct hv_x64_segment_register cs; + struct hv_x64_segment_register ds; + struct hv_x64_segment_register es; + struct hv_x64_segment_register fs; + struct hv_x64_segment_register gs; + struct hv_x64_segment_register ss; + struct hv_x64_segment_register tr; + struct hv_x64_segment_register ldtr; + + struct hv_x64_table_register idtr; + struct hv_x64_table_register gdtr; + + u64 efer; + u64 cr0; + u64 cr3; + u64 cr4; + u64 msr_cr_pat; +} __packed; + +struct hv_enable_vp_vtl { + u64 partition_id; + u32 vp_index; + union hv_input_vtl target_vtl; + u8 mbz0; + u16 mbz1; + struct hv_init_vp_context vp_context; +} __packed; + +struct hv_get_vp_from_apic_id_in { + u64 partition_id; + union hv_input_vtl target_vtl; + u8 res[7]; + u32 apic_ids[]; +} __packed; + +struct hv_nested_enlightenments_control { + struct { + u32 directhypercall : 1; + u32 reserved : 31; + } __packed features; + struct { + u32 inter_partition_comm : 1; + u32 reserved : 31; + } __packed hypercall_controls; +} __packed; + +/* Define virtual processor assist page structure. */ +struct hv_vp_assist_page { + u32 apic_assist; + u32 reserved1; + u32 vtl_entry_reason; + u32 vtl_reserved; + u64 vtl_ret_x64rax; + u64 vtl_ret_x64rcx; + struct hv_nested_enlightenments_control nested_control; + u8 enlighten_vmentry; + u8 reserved2[7]; + u64 current_nested_vmcs; + u8 synthetic_time_unhalted_timer_expired; + u8 reserved3[7]; + u8 virtualization_fault_information[40]; + u8 reserved4[8]; + u8 intercept_message[256]; + u8 vtl_ret_actions[256]; +} __packed; + +enum hv_register_name { + /* Suspend Registers */ + HV_REGISTER_EXPLICIT_SUSPEND = 0x00000000, + HV_REGISTER_INTERCEPT_SUSPEND = 0x00000001, + HV_REGISTER_DISPATCH_SUSPEND = 0x00000003, + + /* Version - 128-bit result same as CPUID 0x40000002 */ + HV_REGISTER_HYPERVISOR_VERSION = 0x00000100, + + /* Feature Access (registers are 128 bits) - same as CPUID 0x40000003 - 0x4000000B */ + HV_REGISTER_PRIVILEGES_AND_FEATURES_INFO = 0x00000200, + HV_REGISTER_FEATURES_INFO = 0x00000201, + HV_REGISTER_IMPLEMENTATION_LIMITS_INFO = 0x00000202, + HV_REGISTER_HARDWARE_FEATURES_INFO = 0x00000203, + HV_REGISTER_CPU_MANAGEMENT_FEATURES_INFO = 0x00000204, + HV_REGISTER_SVM_FEATURES_INFO = 0x00000205, + HV_REGISTER_SKIP_LEVEL_FEATURES_INFO = 0x00000206, + HV_REGISTER_NESTED_VIRT_FEATURES_INFO = 0x00000207, + HV_REGISTER_IPT_FEATURES_INFO = 0x00000208, + + /* Guest Crash Registers */ + HV_REGISTER_GUEST_CRASH_P0 = 0x00000210, + HV_REGISTER_GUEST_CRASH_P1 = 0x00000211, + HV_REGISTER_GUEST_CRASH_P2 = 0x00000212, + HV_REGISTER_GUEST_CRASH_P3 = 0x00000213, + HV_REGISTER_GUEST_CRASH_P4 = 0x00000214, + HV_REGISTER_GUEST_CRASH_CTL = 0x00000215, + + /* Misc */ + HV_REGISTER_VP_RUNTIME = 0x00090000, + HV_REGISTER_GUEST_OS_ID = 0x00090002, + HV_REGISTER_VP_INDEX = 0x00090003, + HV_REGISTER_TIME_REF_COUNT = 0x00090004, + HV_REGISTER_CPU_MANAGEMENT_VERSION = 0x00090007, + HV_REGISTER_VP_ASSIST_PAGE = 0x00090013, + HV_REGISTER_VP_ROOT_SIGNAL_COUNT = 0x00090014, + HV_REGISTER_REFERENCE_TSC = 0x00090017, + + /* Hypervisor-defined Registers (Synic) */ + HV_REGISTER_SINT0 = 0x000A0000, + HV_REGISTER_SINT1 = 0x000A0001, + HV_REGISTER_SINT2 = 0x000A0002, + HV_REGISTER_SINT3 = 0x000A0003, + HV_REGISTER_SINT4 = 0x000A0004, + HV_REGISTER_SINT5 = 0x000A0005, + HV_REGISTER_SINT6 = 0x000A0006, + HV_REGISTER_SINT7 = 0x000A0007, + HV_REGISTER_SINT8 = 0x000A0008, + HV_REGISTER_SINT9 = 0x000A0009, + HV_REGISTER_SINT10 = 0x000A000A, + HV_REGISTER_SINT11 = 0x000A000B, + HV_REGISTER_SINT12 = 0x000A000C, + HV_REGISTER_SINT13 = 0x000A000D, + HV_REGISTER_SINT14 = 0x000A000E, + HV_REGISTER_SINT15 = 0x000A000F, + HV_REGISTER_SCONTROL = 0x000A0010, + HV_REGISTER_SVERSION = 0x000A0011, + HV_REGISTER_SIEFP = 0x000A0012, + HV_REGISTER_SIMP = 0x000A0013, + HV_REGISTER_EOM = 0x000A0014, + HV_REGISTER_SIRBP = 0x000A0015, + + HV_REGISTER_NESTED_SINT0 = 0x000A1000, + HV_REGISTER_NESTED_SINT1 = 0x000A1001, + HV_REGISTER_NESTED_SINT2 = 0x000A1002, + HV_REGISTER_NESTED_SINT3 = 0x000A1003, + HV_REGISTER_NESTED_SINT4 = 0x000A1004, + HV_REGISTER_NESTED_SINT5 = 0x000A1005, + HV_REGISTER_NESTED_SINT6 = 0x000A1006, + HV_REGISTER_NESTED_SINT7 = 0x000A1007, + HV_REGISTER_NESTED_SINT8 = 0x000A1008, + HV_REGISTER_NESTED_SINT9 = 0x000A1009, + HV_REGISTER_NESTED_SINT10 = 0x000A100A, + HV_REGISTER_NESTED_SINT11 = 0x000A100B, + HV_REGISTER_NESTED_SINT12 = 0x000A100C, + HV_REGISTER_NESTED_SINT13 = 0x000A100D, + HV_REGISTER_NESTED_SINT14 = 0x000A100E, + HV_REGISTER_NESTED_SINT15 = 0x000A100F, + HV_REGISTER_NESTED_SCONTROL = 0x000A1010, + HV_REGISTER_NESTED_SVERSION = 0x000A1011, + HV_REGISTER_NESTED_SIFP = 0x000A1012, + HV_REGISTER_NESTED_SIPP = 0x000A1013, + HV_REGISTER_NESTED_EOM = 0x000A1014, + HV_REGISTER_NESTED_SIRBP = 0x000a1015, + + /* Hypervisor-defined Registers (Synthetic Timers) */ + HV_REGISTER_STIMER0_CONFIG = 0x000B0000, + HV_REGISTER_STIMER0_COUNT = 0x000B0001, + + /* VSM */ + HV_REGISTER_VSM_VP_STATUS = 0x000D0003, +}; + +/* + * Arch compatibility regs for use with hv_set/get_register + */ +#if defined(CONFIG_X86) + +/* + * To support arch-generic code calling hv_set/get_register: + * - On x86, HV_MSR_ indicates an MSR accessed via rdmsrl/wrmsrl + * - On ARM, HV_MSR_ indicates a VP register accessed via hypercall + */ +#define HV_MSR_CRASH_P0 (HV_X64_MSR_CRASH_P0) +#define HV_MSR_CRASH_P1 (HV_X64_MSR_CRASH_P1) +#define HV_MSR_CRASH_P2 (HV_X64_MSR_CRASH_P2) +#define HV_MSR_CRASH_P3 (HV_X64_MSR_CRASH_P3) +#define HV_MSR_CRASH_P4 (HV_X64_MSR_CRASH_P4) +#define HV_MSR_CRASH_CTL (HV_X64_MSR_CRASH_CTL) + +#define HV_MSR_VP_INDEX (HV_X64_MSR_VP_INDEX) +#define HV_MSR_TIME_REF_COUNT (HV_X64_MSR_TIME_REF_COUNT) +#define HV_MSR_REFERENCE_TSC (HV_X64_MSR_REFERENCE_TSC) + +#define HV_MSR_SINT0 (HV_X64_MSR_SINT0) +#define HV_MSR_SVERSION (HV_X64_MSR_SVERSION) +#define HV_MSR_SCONTROL (HV_X64_MSR_SCONTROL) +#define HV_MSR_SIEFP (HV_X64_MSR_SIEFP) +#define HV_MSR_SIMP (HV_X64_MSR_SIMP) +#define HV_MSR_EOM (HV_X64_MSR_EOM) +#define HV_MSR_SIRBP (HV_X64_MSR_SIRBP) + +#define HV_MSR_NESTED_SCONTROL (HV_X64_MSR_NESTED_SCONTROL) +#define HV_MSR_NESTED_SVERSION (HV_X64_MSR_NESTED_SVERSION) +#define HV_MSR_NESTED_SIEFP (HV_X64_MSR_NESTED_SIEFP) +#define HV_MSR_NESTED_SIMP (HV_X64_MSR_NESTED_SIMP) +#define HV_MSR_NESTED_EOM (HV_X64_MSR_NESTED_EOM) +#define HV_MSR_NESTED_SINT0 (HV_X64_MSR_NESTED_SINT0) + +#define HV_MSR_STIMER0_CONFIG (HV_X64_MSR_STIMER0_CONFIG) +#define HV_MSR_STIMER0_COUNT (HV_X64_MSR_STIMER0_COUNT) + +#elif defined(CONFIG_ARM64) /* CONFIG_X86 */ + +#define HV_MSR_CRASH_P0 (HV_REGISTER_GUEST_CRASH_P0) +#define HV_MSR_CRASH_P1 (HV_REGISTER_GUEST_CRASH_P1) +#define HV_MSR_CRASH_P2 (HV_REGISTER_GUEST_CRASH_P2) +#define HV_MSR_CRASH_P3 (HV_REGISTER_GUEST_CRASH_P3) +#define HV_MSR_CRASH_P4 (HV_REGISTER_GUEST_CRASH_P4) +#define HV_MSR_CRASH_CTL (HV_REGISTER_GUEST_CRASH_CTL) + +#define HV_MSR_VP_INDEX (HV_REGISTER_VP_INDEX) +#define HV_MSR_TIME_REF_COUNT (HV_REGISTER_TIME_REF_COUNT) +#define HV_MSR_REFERENCE_TSC (HV_REGISTER_REFERENCE_TSC) + +#define HV_MSR_SINT0 (HV_REGISTER_SINT0) +#define HV_MSR_SCONTROL (HV_REGISTER_SCONTROL) +#define HV_MSR_SIEFP (HV_REGISTER_SIEFP) +#define HV_MSR_SIMP (HV_REGISTER_SIMP) +#define HV_MSR_EOM (HV_REGISTER_EOM) +#define HV_MSR_SIRBP (HV_REGISTER_SIRBP) + +#define HV_MSR_STIMER0_CONFIG (HV_REGISTER_STIMER0_CONFIG) +#define HV_MSR_STIMER0_COUNT (HV_REGISTER_STIMER0_COUNT) + +#endif /* CONFIG_ARM64 */ + +union hv_explicit_suspend_register { + u64 as_uint64; + struct { + u64 suspended : 1; + u64 reserved : 63; + } __packed; +}; + +union hv_intercept_suspend_register { + u64 as_uint64; + struct { + u64 suspended : 1; + u64 reserved : 63; + } __packed; +}; + +union hv_dispatch_suspend_register { + u64 as_uint64; + struct { + u64 suspended : 1; + u64 reserved : 63; + } __packed; +}; + +union hv_arm64_pending_interruption_register { + u64 as_uint64; + struct { + u64 interruption_pending : 1; + u64 interruption_type: 1; + u64 reserved : 30; + u64 error_code : 32; + } __packed; +}; + +union hv_arm64_interrupt_state_register { + u64 as_uint64; + struct { + u64 interrupt_shadow : 1; + u64 reserved : 63; + } __packed; +}; + +union hv_arm64_pending_synthetic_exception_event { + u64 as_uint64[2]; + struct { + u8 event_pending : 1; + u8 event_type : 3; + u8 reserved : 4; + u8 rsvd[3]; + u32 exception_type; + u64 context; + } __packed; +}; + +union hv_x64_interrupt_state_register { + u64 as_uint64; + struct { + u64 interrupt_shadow : 1; + u64 nmi_masked : 1; + u64 reserved : 62; + } __packed; +}; + +union hv_x64_pending_interruption_register { + u64 as_uint64; + struct { + u32 interruption_pending : 1; + u32 interruption_type : 3; + u32 deliver_error_code : 1; + u32 instruction_length : 4; + u32 nested_event : 1; + u32 reserved : 6; + u32 interruption_vector : 16; + u32 error_code; + } __packed; +}; + +union hv_register_value { + struct hv_u128 reg128; + u64 reg64; + u32 reg32; + u16 reg16; + u8 reg8; + + struct hv_x64_segment_register segment; + struct hv_x64_table_register table; + union hv_explicit_suspend_register explicit_suspend; + union hv_intercept_suspend_register intercept_suspend; + union hv_dispatch_suspend_register dispatch_suspend; +#ifdef CONFIG_ARM64 + union hv_arm64_interrupt_state_register interrupt_state; + union hv_arm64_pending_interruption_register pending_interruption; +#endif +#ifdef CONFIG_X86 + union hv_x64_interrupt_state_register interrupt_state; + union hv_x64_pending_interruption_register pending_interruption; +#endif + union hv_arm64_pending_synthetic_exception_event pending_synthetic_exception_event; +}; + +/* NOTE: Linux helper struct - NOT from Hyper-V code. */ +struct hv_output_get_vp_registers { + DECLARE_FLEX_ARRAY(union hv_register_value, values); +}; + +#if defined(CONFIG_ARM64) +/* HvGetVpRegisters returns an array of these output elements */ +struct hv_get_vp_registers_output { + union { + struct { + u32 a; + u32 b; + u32 c; + u32 d; + } as32 __packed; + struct { + u64 low; + u64 high; + } as64 __packed; + }; +}; + +#endif /* CONFIG_ARM64 */ + +struct hv_register_assoc { + u32 name; /* enum hv_register_name */ + u32 reserved1; + u64 reserved2; + union hv_register_value value; +} __packed; + +struct hv_input_get_vp_registers { + u64 partition_id; + u32 vp_index; + union hv_input_vtl input_vtl; + u8 rsvd_z8; + u16 rsvd_z16; + u32 names[]; +} __packed; + +struct hv_input_set_vp_registers { + u64 partition_id; + u32 vp_index; + union hv_input_vtl input_vtl; + u8 rsvd_z8; + u16 rsvd_z16; + struct hv_register_assoc elements[]; +} __packed; + +#define HV_UNMAP_GPA_LARGE_PAGE 0x2 + +/* HvCallSendSyntheticClusterIpi hypercall */ +struct hv_send_ipi { /* HV_INPUT_SEND_SYNTHETIC_CLUSTER_IPI */ + u32 vector; + u32 reserved; + u64 cpu_mask; +} __packed; + +#define HV_X64_VTL_MASK GENMASK(3, 0) + +/* Hyper-V memory host visibility */ +enum hv_mem_host_visibility { + VMBUS_PAGE_NOT_VISIBLE = 0, + VMBUS_PAGE_VISIBLE_READ_ONLY = 1, + VMBUS_PAGE_VISIBLE_READ_WRITE = 3 +}; + +/* HvCallModifySparseGpaPageHostVisibility hypercall */ +#define HV_MAX_MODIFY_GPA_REP_COUNT ((HV_HYP_PAGE_SIZE / sizeof(u64)) - 2) +struct hv_gpa_range_for_visibility { + u64 partition_id; + u32 host_visibility : 2; + u32 reserved0 : 30; + u32 reserved1; + u64 gpa_page_list[HV_MAX_MODIFY_GPA_REP_COUNT]; +} __packed; + +#if defined(CONFIG_X86) +union hv_msi_address_register { /* HV_MSI_ADDRESS */ + u32 as_uint32; + struct { + u32 reserved1 : 2; + u32 destination_mode : 1; + u32 redirection_hint : 1; + u32 reserved2 : 8; + u32 destination_id : 8; + u32 msi_base : 12; + }; +} __packed; + +union hv_msi_data_register { /* HV_MSI_ENTRY.Data */ + u32 as_uint32; + struct { + u32 vector : 8; + u32 delivery_mode : 3; + u32 reserved1 : 3; + u32 level_assert : 1; + u32 trigger_mode : 1; + u32 reserved2 : 16; + }; +} __packed; + +union hv_msi_entry { /* HV_MSI_ENTRY */ + + u64 as_uint64; + struct { + union hv_msi_address_register address; + union hv_msi_data_register data; + } __packed; +}; + +#elif defined(CONFIG_ARM64) /* CONFIG_X86 */ + +union hv_msi_entry { + u64 as_uint64[2]; + struct { + u64 address; + u32 data; + u32 reserved; + } __packed; +}; +#endif /* CONFIG_ARM64 */ + +union hv_ioapic_rte { + u64 as_uint64; + + struct { + u32 vector : 8; + u32 delivery_mode : 3; + u32 destination_mode : 1; + u32 delivery_status : 1; + u32 interrupt_polarity : 1; + u32 remote_irr : 1; + u32 trigger_mode : 1; + u32 interrupt_mask : 1; + u32 reserved1 : 15; + + u32 reserved2 : 24; + u32 destination_id : 8; + }; + + struct { + u32 low_uint32; + u32 high_uint32; + }; +} __packed; + +enum hv_interrupt_source { /* HV_INTERRUPT_SOURCE */ + HV_INTERRUPT_SOURCE_MSI = 1, /* MSI and MSI-X */ + HV_INTERRUPT_SOURCE_IOAPIC, +}; + +struct hv_interrupt_entry { /* HV_INTERRUPT_ENTRY */ + u32 source; + u32 reserved1; + union { + union hv_msi_entry msi_entry; + union hv_ioapic_rte ioapic_rte; + }; +} __packed; + +#define HV_DEVICE_INTERRUPT_TARGET_MULTICAST 1 +#define HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET 2 + +struct hv_device_interrupt_target { /* HV_DEVICE_INTERRUPT_TARGET */ + u32 vector; + u32 flags; /* HV_DEVICE_INTERRUPT_TARGET_* above */ + union { + u64 vp_mask; + struct hv_vpset vp_set; + }; +} __packed; + +struct hv_retarget_device_interrupt { /* HV_INPUT_RETARGET_DEVICE_INTERRUPT */ + u64 partition_id; /* use "self" */ + u64 device_id; + struct hv_interrupt_entry int_entry; + u64 reserved2; + struct hv_device_interrupt_target int_target; +} __packed __aligned(8); + +/* Data structures for HVCALL_MMIO_READ and HVCALL_MMIO_WRITE */ +#define HV_HYPERCALL_MMIO_MAX_DATA_LENGTH 64 + +struct hv_mmio_read_input { /* HV_INPUT_MEMORY_MAPPED_IO_READ */ + u64 gpa; + u32 size; + u32 reserved; +} __packed; + +struct hv_mmio_read_output { + u8 data[HV_HYPERCALL_MMIO_MAX_DATA_LENGTH]; +} __packed; + +struct hv_mmio_write_input { + u64 gpa; + u32 size; + u32 reserved; + u8 data[HV_HYPERCALL_MMIO_MAX_DATA_LENGTH]; +} __packed; + +#endif /* _HV_HVGDK_MINI_H */ diff --git a/include/hyperv/hvhdk.h b/include/hyperv/hvhdk.h new file mode 100644 index 0000000000000..64407c2a38099 --- /dev/null +++ b/include/hyperv/hvhdk.h @@ -0,0 +1,733 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Type definitions for the Microsoft hypervisor. + */ +#ifndef _HV_HVHDK_H +#define _HV_HVHDK_H + +#include + +#include "hvhdk_mini.h" +#include "hvgdk.h" + +/* Bits for dirty mask of hv_vp_register_page */ +#define HV_X64_REGISTER_CLASS_GENERAL 0 +#define HV_X64_REGISTER_CLASS_IP 1 +#define HV_X64_REGISTER_CLASS_XMM 2 +#define HV_X64_REGISTER_CLASS_SEGMENT 3 +#define HV_X64_REGISTER_CLASS_FLAGS 4 + +#define HV_VP_REGISTER_PAGE_VERSION_1 1u + +struct hv_vp_register_page { + u16 version; + u8 isvalid; + u8 rsvdz; + u32 dirty; + union { + struct { + /* General purpose registers + * (HV_X64_REGISTER_CLASS_GENERAL) + */ + union { + struct { + u64 rax; + u64 rcx; + u64 rdx; + u64 rbx; + u64 rsp; + u64 rbp; + u64 rsi; + u64 rdi; + u64 r8; + u64 r9; + u64 r10; + u64 r11; + u64 r12; + u64 r13; + u64 r14; + u64 r15; + } __packed; + + u64 gp_registers[16]; + }; + /* Instruction pointer (HV_X64_REGISTER_CLASS_IP) */ + u64 rip; + /* Flags (HV_X64_REGISTER_CLASS_FLAGS) */ + u64 rflags; + } __packed; + + u64 registers[18]; + }; + /* Volatile XMM registers (HV_X64_REGISTER_CLASS_XMM) */ + union { + struct { + struct hv_u128 xmm0; + struct hv_u128 xmm1; + struct hv_u128 xmm2; + struct hv_u128 xmm3; + struct hv_u128 xmm4; + struct hv_u128 xmm5; + } __packed; + + struct hv_u128 xmm_registers[6]; + }; + /* Segment registers (HV_X64_REGISTER_CLASS_SEGMENT) */ + union { + struct { + struct hv_x64_segment_register es; + struct hv_x64_segment_register cs; + struct hv_x64_segment_register ss; + struct hv_x64_segment_register ds; + struct hv_x64_segment_register fs; + struct hv_x64_segment_register gs; + } __packed; + + struct hv_x64_segment_register segment_registers[6]; + }; + /* Misc. control registers (cannot be set via this interface) */ + u64 cr0; + u64 cr3; + u64 cr4; + u64 cr8; + u64 efer; + u64 dr7; + union hv_x64_pending_interruption_register pending_interruption; + union hv_x64_interrupt_state_register interrupt_state; + u64 instruction_emulation_hints; +} __packed; + +#define HV_PARTITION_PROCESSOR_FEATURES_BANKS 2 + +union hv_partition_processor_features { + u64 as_uint64[HV_PARTITION_PROCESSOR_FEATURES_BANKS]; + struct { + u64 sse3_support : 1; + u64 lahf_sahf_support : 1; + u64 ssse3_support : 1; + u64 sse4_1_support : 1; + u64 sse4_2_support : 1; + u64 sse4a_support : 1; + u64 xop_support : 1; + u64 pop_cnt_support : 1; + u64 cmpxchg16b_support : 1; + u64 altmovcr8_support : 1; + u64 lzcnt_support : 1; + u64 mis_align_sse_support : 1; + u64 mmx_ext_support : 1; + u64 amd3dnow_support : 1; + u64 extended_amd3dnow_support : 1; + u64 page_1gb_support : 1; + u64 aes_support : 1; + u64 pclmulqdq_support : 1; + u64 pcid_support : 1; + u64 fma4_support : 1; + u64 f16c_support : 1; + u64 rd_rand_support : 1; + u64 rd_wr_fs_gs_support : 1; + u64 smep_support : 1; + u64 enhanced_fast_string_support : 1; + u64 bmi1_support : 1; + u64 bmi2_support : 1; + u64 hle_support_deprecated : 1; + u64 rtm_support_deprecated : 1; + u64 movbe_support : 1; + u64 npiep1_support : 1; + u64 dep_x87_fpu_save_support : 1; + u64 rd_seed_support : 1; + u64 adx_support : 1; + u64 intel_prefetch_support : 1; + u64 smap_support : 1; + u64 hle_support : 1; + u64 rtm_support : 1; + u64 rdtscp_support : 1; + u64 clflushopt_support : 1; + u64 clwb_support : 1; + u64 sha_support : 1; + u64 x87_pointers_saved_support : 1; + u64 invpcid_support : 1; + u64 ibrs_support : 1; + u64 stibp_support : 1; + u64 ibpb_support: 1; + u64 unrestricted_guest_support : 1; + u64 mdd_support : 1; + u64 fast_short_rep_mov_support : 1; + u64 l1dcache_flush_support : 1; + u64 rdcl_no_support : 1; + u64 ibrs_all_support : 1; + u64 skip_l1df_support : 1; + u64 ssb_no_support : 1; + u64 rsb_a_no_support : 1; + u64 virt_spec_ctrl_support : 1; + u64 rd_pid_support : 1; + u64 umip_support : 1; + u64 mbs_no_support : 1; + u64 mb_clear_support : 1; + u64 taa_no_support : 1; + u64 tsx_ctrl_support : 1; + /* + * N.B. The final processor feature bit in bank 0 is reserved to + * simplify potential downlevel backports. + */ + u64 reserved_bank0 : 1; + + /* N.B. Begin bank 1 processor features. */ + u64 acount_mcount_support : 1; + u64 tsc_invariant_support : 1; + u64 cl_zero_support : 1; + u64 rdpru_support : 1; + u64 la57_support : 1; + u64 mbec_support : 1; + u64 nested_virt_support : 1; + u64 psfd_support : 1; + u64 cet_ss_support : 1; + u64 cet_ibt_support : 1; + u64 vmx_exception_inject_support : 1; + u64 enqcmd_support : 1; + u64 umwait_tpause_support : 1; + u64 movdiri_support : 1; + u64 movdir64b_support : 1; + u64 cldemote_support : 1; + u64 serialize_support : 1; + u64 tsc_deadline_tmr_support : 1; + u64 tsc_adjust_support : 1; + u64 fzlrep_movsb : 1; + u64 fsrep_stosb : 1; + u64 fsrep_cmpsb : 1; + u64 reserved_bank1 : 42; + } __packed; +}; + +union hv_partition_processor_xsave_features { + struct { + u64 xsave_support : 1; + u64 xsaveopt_support : 1; + u64 avx_support : 1; + u64 reserved1 : 61; + } __packed; + u64 as_uint64; +}; + +struct hv_partition_creation_properties { + union hv_partition_processor_features disabled_processor_features; + union hv_partition_processor_xsave_features + disabled_processor_xsave_features; +} __packed; + +#define HV_PARTITION_SYNTHETIC_PROCESSOR_FEATURES_BANKS 1 + +union hv_partition_synthetic_processor_features { + u64 as_uint64[HV_PARTITION_SYNTHETIC_PROCESSOR_FEATURES_BANKS]; + + struct { + u64 hypervisor_present : 1; + /* Support for HV#1: (CPUID leaves 0x40000000 - 0x40000006)*/ + u64 hv1 : 1; + u64 access_vp_run_time_reg : 1; /* HV_X64_MSR_VP_RUNTIME */ + u64 access_partition_reference_counter : 1; /* HV_X64_MSR_TIME_REF_COUNT */ + u64 access_synic_regs : 1; /* SINT-related registers */ + /* + * Access to HV_X64_MSR_STIMER0_CONFIG through + * HV_X64_MSR_STIMER3_COUNT. + */ + u64 access_synthetic_timer_regs : 1; + u64 access_intr_ctrl_regs : 1; /* APIC MSRs and VP assist page*/ + /* HV_X64_MSR_GUEST_OS_ID and HV_X64_MSR_HYPERCALL */ + u64 access_hypercall_regs : 1; + u64 access_vp_index : 1; + u64 access_partition_reference_tsc : 1; + u64 access_guest_idle_reg : 1; + u64 access_frequency_regs : 1; + u64 reserved_z12 : 1; + u64 reserved_z13 : 1; + u64 reserved_z14 : 1; + u64 enable_extended_gva_ranges_for_flush_virtual_address_list : 1; + u64 reserved_z16 : 1; + u64 reserved_z17 : 1; + /* Use fast hypercall output. Corresponds to privilege. */ + u64 fast_hypercall_output : 1; + u64 reserved_z19 : 1; + u64 start_virtual_processor : 1; /* Can start VPs */ + u64 reserved_z21 : 1; + /* Synthetic timers in direct mode. */ + u64 direct_synthetic_timers : 1; + u64 reserved_z23 : 1; + u64 extended_processor_masks : 1; + + /* Enable various hypercalls */ + u64 tb_flush_hypercalls : 1; + u64 synthetic_cluster_ipi : 1; + u64 notify_long_spin_wait : 1; + u64 query_numa_distance : 1; + u64 signal_events : 1; + u64 retarget_device_interrupt : 1; + u64 restore_time : 1; + + /* EnlightenedVmcs nested enlightenment is supported. */ + u64 enlightened_vmcs : 1; + u64 reserved : 31; + } __packed; +}; + +#define HV_MAKE_COMPATIBILITY_VERSION(major_, minor_) \ + ((u32)((major_) << 8 | (minor_))) + +#define HV_COMPATIBILITY_21_H2 HV_MAKE_COMPATIBILITY_VERSION(0X6, 0X9) + +union hv_partition_isolation_properties { + u64 as_uint64; + struct { + u64 isolation_type: 5; + u64 isolation_host_type : 2; + u64 rsvd_z: 5; + u64 shared_gpa_boundary_page_number: 52; + } __packed; +}; + +/* + * Various isolation types supported by MSHV. + */ +#define HV_PARTITION_ISOLATION_TYPE_NONE 0 +#define HV_PARTITION_ISOLATION_TYPE_SNP 2 +#define HV_PARTITION_ISOLATION_TYPE_TDX 3 + +/* + * Various host isolation types supported by MSHV. + */ +#define HV_PARTITION_ISOLATION_HOST_TYPE_NONE 0x0 +#define HV_PARTITION_ISOLATION_HOST_TYPE_HARDWARE 0x1 +#define HV_PARTITION_ISOLATION_HOST_TYPE_RESERVED 0x2 + +/* Note: Exo partition is enabled by default */ +#define HV_PARTITION_CREATION_FLAG_EXO_PARTITION BIT(8) +#define HV_PARTITION_CREATION_FLAG_LAPIC_ENABLED BIT(13) +#define HV_PARTITION_CREATION_FLAG_INTERCEPT_MESSAGE_PAGE_ENABLED BIT(19) +#define HV_PARTITION_CREATION_FLAG_X2APIC_CAPABLE BIT(22) + +struct hv_input_create_partition { + u64 flags; + struct hv_proximity_domain_info proximity_domain_info; + u32 compatibility_version; + u32 padding; + struct hv_partition_creation_properties partition_creation_properties; + union hv_partition_isolation_properties isolation_properties; +} __packed; + +struct hv_output_create_partition { + u64 partition_id; +} __packed; + +struct hv_input_initialize_partition { + u64 partition_id; +} __packed; + +struct hv_input_finalize_partition { + u64 partition_id; +} __packed; + +struct hv_input_delete_partition { + u64 partition_id; +} __packed; + +struct hv_input_get_partition_property { + u64 partition_id; + u32 property_code; /* enum hv_partition_property_code */ + u32 padding; +} __packed; + +struct hv_output_get_partition_property { + u64 property_value; +} __packed; + +struct hv_input_set_partition_property { + u64 partition_id; + u32 property_code; /* enum hv_partition_property_code */ + u32 padding; + u64 property_value; +} __packed; + +enum hv_vp_state_page_type { + HV_VP_STATE_PAGE_REGISTERS = 0, + HV_VP_STATE_PAGE_INTERCEPT_MESSAGE = 1, + HV_VP_STATE_PAGE_COUNT +}; + +struct hv_input_map_vp_state_page { + u64 partition_id; + u32 vp_index; + u32 type; /* enum hv_vp_state_page_type */ +} __packed; + +struct hv_output_map_vp_state_page { + u64 map_location; /* GPA page number */ +} __packed; + +struct hv_input_unmap_vp_state_page { + u64 partition_id; + u32 vp_index; + u32 type; /* enum hv_vp_state_page_type */ +} __packed; + +struct hv_opaque_intercept_message { + u32 vp_index; +} __packed; + +enum hv_port_type { + HV_PORT_TYPE_MESSAGE = 1, + HV_PORT_TYPE_EVENT = 2, + HV_PORT_TYPE_MONITOR = 3, + HV_PORT_TYPE_DOORBELL = 4 /* Root Partition only */ +}; + +struct hv_port_info { + u32 port_type; /* enum hv_port_type */ + u32 padding; + union { + struct { + u32 target_sint; + u32 target_vp; + u64 rsvdz; + } message_port_info; + struct { + u32 target_sint; + u32 target_vp; + u16 base_flag_number; + u16 flag_count; + u32 rsvdz; + } event_port_info; + struct { + u64 monitor_address; + u64 rsvdz; + } monitor_port_info; + struct { + u32 target_sint; + u32 target_vp; + u64 rsvdz; + } doorbell_port_info; + }; +} __packed; + +struct hv_connection_info { + u32 port_type; + u32 padding; + union { + struct { + u64 rsvdz; + } message_connection_info; + struct { + u64 rsvdz; + } event_connection_info; + struct { + u64 monitor_address; + } monitor_connection_info; + struct { + u64 gpa; + u64 trigger_value; + u64 flags; + } doorbell_connection_info; + }; +} __packed; + +/* Define synthetic interrupt controller flag constants. */ +#define HV_EVENT_FLAGS_COUNT (256 * 8) +#define HV_EVENT_FLAGS_BYTE_COUNT (256) +#define HV_EVENT_FLAGS32_COUNT (256 / sizeof(u32)) + +/* linux side we create long version of flags to use long bit ops on flags */ +#define HV_EVENT_FLAGS_UL_COUNT (256 / sizeof(ulong)) + +/* Define the synthetic interrupt controller event flags format. */ +union hv_synic_event_flags { + unsigned char flags8[HV_EVENT_FLAGS_BYTE_COUNT]; + u32 flags32[HV_EVENT_FLAGS32_COUNT]; + ulong flags[HV_EVENT_FLAGS_UL_COUNT]; /* linux only */ +}; + +struct hv_synic_event_flags_page { + volatile union hv_synic_event_flags event_flags[HV_SYNIC_SINT_COUNT]; +}; + +#define HV_SYNIC_EVENT_RING_MESSAGE_COUNT 63 + +struct hv_synic_event_ring { + u8 signal_masked; + u8 ring_full; + u16 reserved_z; + u32 data[HV_SYNIC_EVENT_RING_MESSAGE_COUNT]; +} __packed; + +struct hv_synic_event_ring_page { + struct hv_synic_event_ring sint_event_ring[HV_SYNIC_SINT_COUNT]; +}; + +/* Define SynIC control register. */ +union hv_synic_scontrol { + u64 as_uint64; + struct { + u64 enable : 1; + u64 reserved : 63; + } __packed; +}; + +/* Define the format of the SIEFP register */ +union hv_synic_siefp { + u64 as_uint64; + struct { + u64 siefp_enabled : 1; + u64 preserved : 11; + u64 base_siefp_gpa : 52; + } __packed; +}; + +union hv_synic_sirbp { + u64 as_uint64; + struct { + u64 sirbp_enabled : 1; + u64 preserved : 11; + u64 base_sirbp_gpa : 52; + } __packed; +}; + +union hv_interrupt_control { + u64 as_uint64; + struct { + u32 interrupt_type; /* enum hv_interrupt_type */ + u32 level_triggered : 1; + u32 logical_dest_mode : 1; + u32 rsvd : 30; + } __packed; +}; + +struct hv_stimer_state { + struct { + u32 undelivered_msg_pending : 1; + u32 reserved : 31; + } __packed flags; + u32 resvd; + u64 config; + u64 count; + u64 adjustment; + u64 undelivered_exp_time; +} __packed; + +struct hv_synthetic_timers_state { + struct hv_stimer_state timers[HV_SYNIC_STIMER_COUNT]; + u64 reserved[5]; +} __packed; + +union hv_input_delete_vp { + u64 as_uint64[2]; + struct { + u64 partition_id; + u32 vp_index; + u8 reserved[4]; + } __packed; +} __packed; + +struct hv_input_assert_virtual_interrupt { + u64 partition_id; + union hv_interrupt_control control; + u64 dest_addr; /* cpu's apic id */ + u32 vector; + u8 target_vtl; + u8 rsvd_z0; + u16 rsvd_z1; +} __packed; + +struct hv_input_create_port { + u64 port_partition_id; + union hv_port_id port_id; + u8 port_vtl; + u8 min_connection_vtl; + u16 padding; + u64 connection_partition_id; + struct hv_port_info port_info; + struct hv_proximity_domain_info proximity_domain_info; +} __packed; + +union hv_input_delete_port { + u64 as_uint64[2]; + struct { + u64 port_partition_id; + union hv_port_id port_id; + u32 reserved; + }; +} __packed; + +struct hv_input_connect_port { + u64 connection_partition_id; + union hv_connection_id connection_id; + u8 connection_vtl; + u8 rsvdz0; + u16 rsvdz1; + u64 port_partition_id; + union hv_port_id port_id; + u32 reserved2; + struct hv_connection_info connection_info; + struct hv_proximity_domain_info proximity_domain_info; +} __packed; + +union hv_input_disconnect_port { + u64 as_uint64[2]; + struct { + u64 connection_partition_id; + union hv_connection_id connection_id; + u32 is_doorbell: 1; + u32 reserved: 31; + } __packed; +} __packed; + +union hv_input_notify_port_ring_empty { + u64 as_uint64; + struct { + u32 sint_index; + u32 reserved; + }; +} __packed; + +struct hv_vp_state_data_xsave { + u64 flags; + union hv_x64_xsave_xfem_register states; +} __packed; + +/* + * For getting and setting VP state, there are two options based on the state type: + * + * 1.) Data that is accessed by PFNs in the input hypercall page. This is used + * for state which may not fit into the hypercall pages. + * 2.) Data that is accessed directly in the input\output hypercall pages. + * This is used for state that will always fit into the hypercall pages. + * + * In the future this could be dynamic based on the size if needed. + * + * Note these hypercalls have an 8-byte aligned variable header size as per the tlfs + */ + +#define HV_GET_SET_VP_STATE_TYPE_PFN BIT(31) + +enum hv_get_set_vp_state_type { + /* HvGetSetVpStateLocalInterruptControllerState - APIC/GIC state */ + HV_GET_SET_VP_STATE_LAPIC_STATE = 0 | HV_GET_SET_VP_STATE_TYPE_PFN, + HV_GET_SET_VP_STATE_XSAVE = 1 | HV_GET_SET_VP_STATE_TYPE_PFN, + HV_GET_SET_VP_STATE_SIM_PAGE = 2 | HV_GET_SET_VP_STATE_TYPE_PFN, + HV_GET_SET_VP_STATE_SIEF_PAGE = 3 | HV_GET_SET_VP_STATE_TYPE_PFN, + HV_GET_SET_VP_STATE_SYNTHETIC_TIMERS = 4, +}; + +struct hv_vp_state_data { + u32 type; + u32 rsvd; + struct hv_vp_state_data_xsave xsave; +} __packed; + +struct hv_input_get_vp_state { + u64 partition_id; + u32 vp_index; + u8 input_vtl; + u8 rsvd0; + u16 rsvd1; + struct hv_vp_state_data state_data; + u64 output_data_pfns[]; +} __packed; + +union hv_output_get_vp_state { + struct hv_synthetic_timers_state synthetic_timers_state; +} __packed; + +union hv_input_set_vp_state_data { + u64 pfns; + u8 bytes; +} __packed; + +struct hv_input_set_vp_state { + u64 partition_id; + u32 vp_index; + u8 input_vtl; + u8 rsvd0; + u16 rsvd1; + struct hv_vp_state_data state_data; + union hv_input_set_vp_state_data data[]; +} __packed; + +/* + * Dispatch state for the VP communicated by the hypervisor to the + * VP-dispatching thread in the root on return from HVCALL_DISPATCH_VP. + */ +enum hv_vp_dispatch_state { + HV_VP_DISPATCH_STATE_INVALID = 0, + HV_VP_DISPATCH_STATE_BLOCKED = 1, + HV_VP_DISPATCH_STATE_READY = 2, +}; + +/* + * Dispatch event that caused the current dispatch state on return from + * HVCALL_DISPATCH_VP. + */ +enum hv_vp_dispatch_event { + HV_VP_DISPATCH_EVENT_INVALID = 0x00000000, + HV_VP_DISPATCH_EVENT_SUSPEND = 0x00000001, + HV_VP_DISPATCH_EVENT_INTERCEPT = 0x00000002, +}; + +#define HV_ROOT_SCHEDULER_MAX_VPS_PER_CHILD_PARTITION 1024 +/* The maximum array size of HV_GENERIC_SET (vp_set) buffer */ +#define HV_GENERIC_SET_QWORD_COUNT(max) (((((max) - 1) >> 6) + 1) + 2) + +struct hv_vp_signal_bitset_scheduler_message { + u64 partition_id; + u32 overflow_count; + u16 vp_count; + u16 reserved; + +#define BITSET_BUFFER_SIZE \ + HV_GENERIC_SET_QWORD_COUNT(HV_ROOT_SCHEDULER_MAX_VPS_PER_CHILD_PARTITION) + union { + struct hv_vpset bitset; + u64 bitset_buffer[BITSET_BUFFER_SIZE]; + } vp_bitset; +#undef BITSET_BUFFER_SIZE +} __packed; + +static_assert(sizeof(struct hv_vp_signal_bitset_scheduler_message) <= + (sizeof(struct hv_message) - sizeof(struct hv_message_header))); + +#define HV_MESSAGE_MAX_PARTITION_VP_PAIR_COUNT \ + (((sizeof(struct hv_message) - sizeof(struct hv_message_header)) / \ + (sizeof(u64 /* partition id */) + sizeof(u32 /* vp index */))) - 1) + +struct hv_vp_signal_pair_scheduler_message { + u32 overflow_count; + u8 vp_count; + u8 reserved1[3]; + + u64 partition_ids[HV_MESSAGE_MAX_PARTITION_VP_PAIR_COUNT]; + u32 vp_indexes[HV_MESSAGE_MAX_PARTITION_VP_PAIR_COUNT]; + + u8 reserved2[4]; +} __packed; + +static_assert(sizeof(struct hv_vp_signal_pair_scheduler_message) == + (sizeof(struct hv_message) - sizeof(struct hv_message_header))); + +/* Input and output structures for HVCALL_DISPATCH_VP */ +#define HV_DISPATCH_VP_FLAG_CLEAR_INTERCEPT_SUSPEND 0x1 +#define HV_DISPATCH_VP_FLAG_ENABLE_CALLER_INTERRUPTS 0x2 +#define HV_DISPATCH_VP_FLAG_SET_CALLER_SPEC_CTRL 0x4 +#define HV_DISPATCH_VP_FLAG_SKIP_VP_SPEC_FLUSH 0x8 +#define HV_DISPATCH_VP_FLAG_SKIP_CALLER_SPEC_FLUSH 0x10 +#define HV_DISPATCH_VP_FLAG_SKIP_CALLER_USER_SPEC_FLUSH 0x20 + +struct hv_input_dispatch_vp { + u64 partition_id; + u32 vp_index; + u32 flags; + u64 time_slice; /* in 100ns */ + u64 spec_ctrl; +} __packed; + +struct hv_output_dispatch_vp { + u32 dispatch_state; /* enum hv_vp_dispatch_state */ + u32 dispatch_event; /* enum hv_vp_dispatch_event */ +} __packed; + +#endif /* _HV_HVHDK_H */ diff --git a/include/hyperv/hvhdk_mini.h b/include/hyperv/hvhdk_mini.h new file mode 100644 index 0000000000000..f8a39d3e9ce68 --- /dev/null +++ b/include/hyperv/hvhdk_mini.h @@ -0,0 +1,311 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Type definitions for the Microsoft Hypervisor. + */ +#ifndef _HV_HVHDK_MINI_H +#define _HV_HVHDK_MINI_H + +#include "hvgdk_mini.h" + +/* + * Doorbell connection_info flags. + */ +#define HV_DOORBELL_FLAG_TRIGGER_SIZE_MASK 0x00000007 +#define HV_DOORBELL_FLAG_TRIGGER_SIZE_ANY 0x00000000 +#define HV_DOORBELL_FLAG_TRIGGER_SIZE_BYTE 0x00000001 +#define HV_DOORBELL_FLAG_TRIGGER_SIZE_WORD 0x00000002 +#define HV_DOORBELL_FLAG_TRIGGER_SIZE_DWORD 0x00000003 +#define HV_DOORBELL_FLAG_TRIGGER_SIZE_QWORD 0x00000004 +#define HV_DOORBELL_FLAG_TRIGGER_ANY_VALUE 0x80000000 + +/* Each generic set contains 64 elements */ +#define HV_GENERIC_SET_SHIFT (6) +#define HV_GENERIC_SET_MASK (63) + +enum hv_generic_set_format { + HV_GENERIC_SET_SPARSE_4K, + HV_GENERIC_SET_ALL, +}; +#define HV_GENERIC_SET_FORMAT hv_generic_set_format + +enum hv_scheduler_type { + HV_SCHEDULER_TYPE_LP = 1, /* Classic scheduler w/o SMT */ + HV_SCHEDULER_TYPE_LP_SMT = 2, /* Classic scheduler w/ SMT */ + HV_SCHEDULER_TYPE_CORE_SMT = 3, /* Core scheduler */ + HV_SCHEDULER_TYPE_ROOT = 4, /* Root / integrated scheduler */ + HV_SCHEDULER_TYPE_MAX +}; + +enum hv_partition_property_code { + /* Privilege properties */ + HV_PARTITION_PROPERTY_PRIVILEGE_FLAGS = 0x00010000, + HV_PARTITION_PROPERTY_SYNTHETIC_PROC_FEATURES = 0x00010001, + + /* Resource properties */ + HV_PARTITION_PROPERTY_GPA_PAGE_ACCESS_TRACKING = 0x00050005, + HV_PARTITION_PROPERTY_UNIMPLEMENTED_MSR_ACTION = 0x00050017, + + /* Compatibility properties */ + HV_PARTITION_PROPERTY_PROCESSOR_XSAVE_FEATURES = 0x00060002, + HV_PARTITION_PROPERTY_MAX_XSAVE_DATA_SIZE = 0x00060008, + HV_PARTITION_PROPERTY_PROCESSOR_CLOCK_FREQUENCY = 0x00060009, +}; + +enum hv_system_property { + /* Add more values when needed */ + HV_SYSTEM_PROPERTY_SCHEDULER_TYPE = 15, +}; + +struct hv_input_get_system_property { + u32 property_id; /* enum hv_system_property */ + union { + u32 as_uint32; + /* More fields to be filled in when needed */ + }; +} __packed; + +struct hv_output_get_system_property { + union { + u32 scheduler_type; /* enum hv_scheduler_type */ + }; +} __packed; + +struct hv_proximity_domain_flags { + u32 proximity_preferred : 1; + u32 reserved : 30; + u32 proximity_info_valid : 1; +} __packed; + +struct hv_proximity_domain_info { + u32 domain_id; + struct hv_proximity_domain_flags flags; +} __packed; + +/* HvDepositMemory hypercall */ +struct hv_deposit_memory { /* HV_INPUT_DEPOSIT_MEMORY */ + u64 partition_id; + u64 gpa_page_list[]; +} __packed; + +struct hv_input_withdraw_memory { + u64 partition_id; + struct hv_proximity_domain_info proximity_domain_info; +} __packed; + +struct hv_output_withdraw_memory { + DECLARE_FLEX_ARRAY(u64, gpa_page_list); +} __packed; + +/* HV Map GPA (Guest Physical Address) Flags */ +#define HV_MAP_GPA_PERMISSIONS_NONE 0x0 +#define HV_MAP_GPA_READABLE 0x1 +#define HV_MAP_GPA_WRITABLE 0x2 +#define HV_MAP_GPA_KERNEL_EXECUTABLE 0x4 +#define HV_MAP_GPA_USER_EXECUTABLE 0x8 +#define HV_MAP_GPA_EXECUTABLE 0xC +#define HV_MAP_GPA_PERMISSIONS_MASK 0xF +#define HV_MAP_GPA_ADJUSTABLE 0x8000 +#define HV_MAP_GPA_NO_ACCESS 0x10000 +#define HV_MAP_GPA_NOT_CACHED 0x200000 +#define HV_MAP_GPA_LARGE_PAGE 0x80000000 + +struct hv_input_map_gpa_pages { + u64 target_partition_id; + u64 target_gpa_base; + u32 map_flags; + u32 padding; + u64 source_gpa_page_list[]; +} __packed; + +union hv_gpa_page_access_state_flags { + struct { + u64 clear_accessed : 1; + u64 set_accessed : 1; + u64 clear_dirty : 1; + u64 set_dirty : 1; + u64 reserved : 60; + } __packed; + u64 as_uint64; +}; + +struct hv_input_get_gpa_pages_access_state { + u64 partition_id; + union hv_gpa_page_access_state_flags flags; + u64 hv_gpa_page_number; +} __packed; + +union hv_gpa_page_access_state { + struct { + u8 accessed : 1; + u8 dirty : 1; + u8 reserved: 6; + }; + u8 as_uint8; +} __packed; + +struct hv_lp_startup_status { + u64 hv_status; + u64 substatus1; + u64 substatus2; + u64 substatus3; + u64 substatus4; + u64 substatus5; + u64 substatus6; +} __packed; + +struct hv_input_add_logical_processor { + u32 lp_index; + u32 apic_id; + struct hv_proximity_domain_info proximity_domain_info; +} __packed; + +struct hv_output_add_logical_processor { + struct hv_lp_startup_status startup_status; +} __packed; + +enum { /* HV_SUBNODE_TYPE */ + HV_SUBNODE_ANY = 0, + HV_SUBNODE_SOCKET, + HV_SUBNODE_CLUSTER, + HV_SUBNODE_L3, + HV_SUBNODE_COUNT, + HV_SUBNODE_INVALID = -1 +}; + +struct hv_create_vp { /* HV_INPUT_CREATE_VP */ + u64 partition_id; + u32 vp_index; + u8 padding[3]; + u8 subnode_type; + u64 subnode_id; + struct hv_proximity_domain_info proximity_domain_info; + u64 flags; +} __packed; + +/* HV_INTERRUPT_TRIGGER_MODE */ +enum hv_interrupt_trigger_mode { + HV_INTERRUPT_TRIGGER_MODE_EDGE = 0, + HV_INTERRUPT_TRIGGER_MODE_LEVEL = 1, +}; + +/* HV_DEVICE_INTERRUPT_DESCRIPTOR */ +struct hv_device_interrupt_descriptor { + u32 interrupt_type; + u32 trigger_mode; + u32 vector_count; + u32 reserved; + struct hv_device_interrupt_target target; +} __packed; + +/* HV_INPUT_MAP_DEVICE_INTERRUPT */ +struct hv_input_map_device_interrupt { + u64 partition_id; + u64 device_id; + u32 flags; + u32 base_irt_idx; + struct hv_interrupt_entry logical_interrupt_entry; + struct hv_device_interrupt_descriptor interrupt_descriptor; +} __packed; + +/* HV_OUTPUT_MAP_DEVICE_INTERRUPT */ +struct hv_output_map_device_interrupt { + struct hv_interrupt_entry interrupt_entry; +} __packed; + +/* HV_INPUT_UNMAP_DEVICE_INTERRUPT */ +struct hv_input_unmap_device_interrupt { + u64 partition_id; + u64 device_id; + struct hv_interrupt_entry interrupt_entry; + u32 flags; +} __packed; + +#define HV_SOURCE_SHADOW_NONE 0x0 +#define HV_SOURCE_SHADOW_BRIDGE_BUS_RANGE 0x1 + +struct hv_send_ipi_ex { /* HV_INPUT_SEND_SYNTHETIC_CLUSTER_IPI_EX */ + u32 vector; + u32 reserved; + struct hv_vpset vp_set; +} __packed; + +typedef u16 hv_pci_rid; /* HV_PCI_RID */ +typedef u16 hv_pci_segment; /* HV_PCI_SEGMENT */ +typedef u64 hv_logical_device_id; +union hv_pci_bdf { /* HV_PCI_BDF */ + u16 as_uint16; + + struct { + u8 function : 3; + u8 device : 5; + u8 bus; + }; +} __packed; + +union hv_pci_bus_range { + u16 as_uint16; + + struct { + u8 subordinate_bus; + u8 secondary_bus; + }; +} __packed; + +enum hv_device_type { /* HV_DEVICE_TYPE */ + HV_DEVICE_TYPE_LOGICAL = 0, + HV_DEVICE_TYPE_PCI = 1, + HV_DEVICE_TYPE_IOAPIC = 2, + HV_DEVICE_TYPE_ACPI = 3, +}; + +union hv_device_id { /* HV_DEVICE_ID */ + u64 as_uint64; + + struct { + u64 reserved0 : 62; + u64 device_type : 2; + }; + + /* HV_DEVICE_TYPE_LOGICAL */ + struct { + u64 id : 62; + u64 device_type : 2; + } logical; + + /* HV_DEVICE_TYPE_PCI */ + struct { + union { + hv_pci_rid rid; + union hv_pci_bdf bdf; + }; + + hv_pci_segment segment; + union hv_pci_bus_range shadow_bus_range; + + u16 phantom_function_bits : 2; + u16 source_shadow : 1; + + u16 rsvdz0 : 11; + u16 device_type : 2; + } pci; + + /* HV_DEVICE_TYPE_IOAPIC */ + struct { + u8 ioapic_id; + u8 rsvdz0; + u16 rsvdz1; + u16 rsvdz2; + + u16 rsvdz3 : 14; + u16 device_type : 2; + } ioapic; + + /* HV_DEVICE_TYPE_ACPI */ + struct { + u32 input_mapping_base; + u32 input_mapping_count : 30; + u32 device_type : 2; + } acpi; +} __packed; + +#endif /* _HV_HVHDK_MINI_H */ From ef5a3c92a81a1a892ae9edf949625beb68b4bd43 Mon Sep 17 00:00:00 2001 From: Nuno Das Neves Date: Mon, 25 Nov 2024 15:24:43 -0800 Subject: [PATCH 04/16] hyperv: Switch from hyperv-tlfs.h to hyperv/hvhdk.h Switch to using hvhdk.h everywhere in the kernel. This header includes all the new Hyper-V headers in include/hyperv, which form a superset of the definitions found in hyperv-tlfs.h. This makes it easier to add new Hyper-V interfaces without being restricted to those in the TLFS doc (reflected in hyperv-tlfs.h). To be more consistent with the original Hyper-V code, the names of some definitions are changed slightly. Update those where needed. Update comments in mshyperv.h files to point to include/hyperv for adding new definitions. Signed-off-by: Nuno Das Neves Reviewed-by: Michael Kelley Reviewed-by: Easwar Hariharan Signed-off-by: Roman Kisel Reviewed-by: Easwar Hariharan Link: https://lore.kernel.org/r/1732577084-2122-5-git-send-email-nunodasneves@linux.microsoft.com Link: https://lore.kernel.org/r/20250108222138.1623703-3-romank@linux.microsoft.com Signed-off-by: Wei Liu --- arch/arm64/hyperv/hv_core.c | 2 +- arch/arm64/hyperv/mshyperv.c | 4 ++-- arch/arm64/include/asm/mshyperv.h | 7 +++---- arch/x86/hyperv/hv_init.c | 20 ++++++++++---------- arch/x86/hyperv/hv_proc.c | 2 +- arch/x86/hyperv/nested.c | 2 +- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/include/asm/mshyperv.h | 2 +- arch/x86/include/asm/svm.h | 2 +- arch/x86/kernel/cpu/mshyperv.c | 2 +- arch/x86/kvm/vmx/hyperv_evmcs.h | 2 +- arch/x86/kvm/vmx/vmx_onhyperv.h | 2 +- drivers/clocksource/hyperv_timer.c | 2 +- drivers/hv/hv_balloon.c | 4 ++-- drivers/hv/hv_common.c | 2 +- drivers/hv/hv_kvp.c | 2 +- drivers/hv/hv_snapshot.c | 2 +- drivers/hv/hyperv_vmbus.h | 2 +- include/asm-generic/mshyperv.h | 7 +++---- include/clocksource/hyperv_timer.h | 2 +- include/linux/hyperv.h | 2 +- net/vmw_vsock/hyperv_transport.c | 6 +++--- 22 files changed, 39 insertions(+), 41 deletions(-) diff --git a/arch/arm64/hyperv/hv_core.c b/arch/arm64/hyperv/hv_core.c index 7a746a5a6b42f..69004f619c579 100644 --- a/arch/arm64/hyperv/hv_core.c +++ b/arch/arm64/hyperv/hv_core.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include /* diff --git a/arch/arm64/hyperv/mshyperv.c b/arch/arm64/hyperv/mshyperv.c index b1a4de4eee293..fc49949b7df62 100644 --- a/arch/arm64/hyperv/mshyperv.c +++ b/arch/arm64/hyperv/mshyperv.c @@ -49,12 +49,12 @@ static int __init hyperv_init(void) hv_set_vpreg(HV_REGISTER_GUEST_OS_ID, guest_id); /* Get the features and hints from Hyper-V */ - hv_get_vpreg_128(HV_REGISTER_FEATURES, &result); + hv_get_vpreg_128(HV_REGISTER_PRIVILEGES_AND_FEATURES_INFO, &result); ms_hyperv.features = result.as32.a; ms_hyperv.priv_high = result.as32.b; ms_hyperv.misc_features = result.as32.c; - hv_get_vpreg_128(HV_REGISTER_ENLIGHTENMENTS, &result); + hv_get_vpreg_128(HV_REGISTER_FEATURES_INFO, &result); ms_hyperv.hints = result.as32.a; pr_info("Hyper-V: privilege flags low 0x%x, high 0x%x, hints 0x%x, misc 0x%x\n", diff --git a/arch/arm64/include/asm/mshyperv.h b/arch/arm64/include/asm/mshyperv.h index a975e1a689ddb..2e2f83bafcfbb 100644 --- a/arch/arm64/include/asm/mshyperv.h +++ b/arch/arm64/include/asm/mshyperv.h @@ -6,9 +6,8 @@ * the ARM64 architecture. See include/asm-generic/mshyperv.h for * definitions are that architecture independent. * - * Definitions that are specified in the Hyper-V Top Level Functional - * Spec (TLFS) should not go in this file, but should instead go in - * hyperv-tlfs.h. + * Definitions that are derived from Hyper-V code or headers should not go in + * this file, but should instead go in the relevant files in include/hyperv. * * Copyright (C) 2021, Microsoft, Inc. * @@ -20,7 +19,7 @@ #include #include -#include +#include /* * Declare calls to get and set Hyper-V VP register values on ARM64, which diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 3562826915f94..ba469d6b82506 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include #include @@ -415,24 +415,24 @@ static void __init hv_get_partition_id(void) static u8 __init get_vtl(void) { u64 control = HV_HYPERCALL_REP_COMP_1 | HVCALL_GET_VP_REGISTERS; - struct hv_get_vp_registers_input *input; - struct hv_get_vp_registers_output *output; + struct hv_input_get_vp_registers *input; + struct hv_output_get_vp_registers *output; unsigned long flags; u64 ret; local_irq_save(flags); input = *this_cpu_ptr(hyperv_pcpu_input_arg); - output = (struct hv_get_vp_registers_output *)input; + output = (struct hv_output_get_vp_registers *)input; - memset(input, 0, struct_size(input, element, 1)); - input->header.partitionid = HV_PARTITION_ID_SELF; - input->header.vpindex = HV_VP_INDEX_SELF; - input->header.inputvtl = 0; - input->element[0].name0 = HV_X64_REGISTER_VSM_VP_STATUS; + memset(input, 0, struct_size(input, names, 1)); + input->partition_id = HV_PARTITION_ID_SELF; + input->vp_index = HV_VP_INDEX_SELF; + input->input_vtl.as_uint8 = 0; + input->names[0] = HV_REGISTER_VSM_VP_STATUS; ret = hv_do_hypercall(control, input, output); if (hv_result_success(ret)) { - ret = output->as64.low & HV_X64_VTL_MASK; + ret = output->values[0].reg8 & HV_X64_VTL_MASK; } else { pr_err("Failed to get VTL(error: %lld) exiting...\n", ret); BUG(); diff --git a/arch/x86/hyperv/hv_proc.c b/arch/x86/hyperv/hv_proc.c index b74c06c04ff1d..ac4c834d44357 100644 --- a/arch/x86/hyperv/hv_proc.c +++ b/arch/x86/hyperv/hv_proc.c @@ -176,7 +176,7 @@ int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags) input->partition_id = partition_id; input->vp_index = vp_index; input->flags = flags; - input->subnode_type = HvSubnodeAny; + input->subnode_type = HV_SUBNODE_ANY; input->proximity_domain_info = hv_numa_node_to_pxm_info(node); status = hv_do_hypercall(HVCALL_CREATE_VP, input, NULL); local_irq_restore(irq_flags); diff --git a/arch/x86/hyperv/nested.c b/arch/x86/hyperv/nested.c index 9dc259fa322e0..1083dc8646f9d 100644 --- a/arch/x86/hyperv/nested.c +++ b/arch/x86/hyperv/nested.c @@ -11,7 +11,7 @@ #include -#include +#include #include #include diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 46f354b124889..e8aeb4b4f868e 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -35,8 +35,8 @@ #include #include #include -#include #include +#include #define __KVM_HAVE_ARCH_VCPU_DEBUGFS diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 6f866fb9ffee7..f91ab1e75f9ff 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -6,9 +6,9 @@ #include #include #include -#include #include #include +#include /* * Hyper-V always provides a single IO-APIC at this MMIO address. diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 2b59b9951c90e..77704eddba548 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -5,7 +5,7 @@ #include #include -#include +#include /* * 32-bit intercept words in the VMCB Control Area, starting diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index dc12fe5ef3caa..f285757618fc8 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kvm/vmx/hyperv_evmcs.h b/arch/x86/kvm/vmx/hyperv_evmcs.h index a543fccfc5747..6536290f42747 100644 --- a/arch/x86/kvm/vmx/hyperv_evmcs.h +++ b/arch/x86/kvm/vmx/hyperv_evmcs.h @@ -6,7 +6,7 @@ #ifndef __KVM_X86_VMX_HYPERV_EVMCS_H #define __KVM_X86_VMX_HYPERV_EVMCS_H -#include +#include #include "capabilities.h" #include "vmcs12.h" diff --git a/arch/x86/kvm/vmx/vmx_onhyperv.h b/arch/x86/kvm/vmx/vmx_onhyperv.h index bba24ed99ee6c..cdf8cbb69209d 100644 --- a/arch/x86/kvm/vmx/vmx_onhyperv.h +++ b/arch/x86/kvm/vmx/vmx_onhyperv.h @@ -3,7 +3,7 @@ #ifndef __ARCH_X86_KVM_VMX_ONHYPERV_H__ #define __ARCH_X86_KVM_VMX_ONHYPERV_H__ -#include +#include #include #include diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c index b39dee7b93af0..f00019b078a71 100644 --- a/drivers/clocksource/hyperv_timer.c +++ b/drivers/clocksource/hyperv_timer.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include static struct clock_event_device __percpu *hv_clock_event; diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index a99112e6f0b85..871b73ca3a0fb 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -28,7 +28,7 @@ #include #include -#include +#include #include @@ -1586,7 +1586,7 @@ static int hv_free_page_report(struct page_reporting_dev_info *pr_dev_info, return -ENOSPC; } - hint->type = HV_EXT_MEMORY_HEAT_HINT_TYPE_COLD_DISCARD; + hint->heat_type = HV_EXTMEM_HEAT_HINT_COLD_DISCARD; hint->reserved = 0; for_each_sg(sgl, sg, nents, i) { union hv_gpa_page_range *range; diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c index 7a35c82976e0f..c4fd07d9bf1ae 100644 --- a/drivers/hv/hv_common.c +++ b/drivers/hv/hv_common.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include /* diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c index 7400a5a4d2bd7..62795f6cbb001 100644 --- a/drivers/hv/hv_kvp.c +++ b/drivers/hv/hv_kvp.c @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include "hyperv_vmbus.h" #include "hv_utils_transport.h" diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c index bde637a96c379..2e7f537d53cf2 100644 --- a/drivers/hv/hv_snapshot.c +++ b/drivers/hv/hv_snapshot.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include "hyperv_vmbus.h" #include "hv_utils_transport.h" diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 52cb744b4d7fd..fad31e30cd532 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -15,10 +15,10 @@ #include #include #include -#include #include #include #include +#include #include "hv_trace.h" diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index 8fe7aaab25990..a7bbe504e4f32 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h @@ -6,9 +6,8 @@ * independent. See arch//include/asm/mshyperv.h for definitions * that are specific to architecture . * - * Definitions that are specified in the Hyper-V Top Level Functional - * Spec (TLFS) should not go in this file, but should instead go in - * hyperv-tlfs.h. + * Definitions that are derived from Hyper-V code or headers should not go in + * this file, but should instead go in the relevant files in include/hyperv. * * Copyright (C) 2019, Microsoft, Inc. * @@ -25,7 +24,7 @@ #include #include #include -#include +#include #define VTPM_BASE_ADDRESS 0xfed40000 diff --git a/include/clocksource/hyperv_timer.h b/include/clocksource/hyperv_timer.h index aa5233b1eba97..d48dd4176fd35 100644 --- a/include/clocksource/hyperv_timer.h +++ b/include/clocksource/hyperv_timer.h @@ -15,7 +15,7 @@ #include #include -#include +#include #define HV_MAX_MAX_DELTA_TICKS 0xffffffff #define HV_MIN_DELTA_TICKS 1 diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index b0dbba3b9108e..4179add2864b4 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -24,7 +24,7 @@ #include #include #include -#include +#include #define MAX_PAGE_BUFFER_COUNT 32 #define MAX_MULTIPAGE_BUFFER_COUNT 32 /* 128K */ diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c index 56c232cf5b0f4..31342ab502b4f 100644 --- a/net/vmw_vsock/hyperv_transport.c +++ b/net/vmw_vsock/hyperv_transport.c @@ -13,12 +13,12 @@ #include #include #include -#include +#include /* Older (VMBUS version 'VERSION_WIN10' or before) Windows hosts have some * stricter requirements on the hv_sock ring buffer size of six 4K pages. - * hyperv-tlfs defines HV_HYP_PAGE_SIZE as 4K. Newer hosts don't have this - * limitation; but, keep the defaults the same for compat. + * HV_HYP_PAGE_SIZE is defined as 4K. Newer hosts don't have this limitation; + * but, keep the defaults the same for compat. */ #define RINGBUFFER_HVS_RCV_SIZE (HV_HYP_PAGE_SIZE * 6) #define RINGBUFFER_HVS_SND_SIZE (HV_HYP_PAGE_SIZE * 6) From 962a4c7ea87884ed44ff48213f00cd5114c357e9 Mon Sep 17 00:00:00 2001 From: Nuno Das Neves Date: Mon, 25 Nov 2024 15:24:44 -0800 Subject: [PATCH 05/16] hyperv: Remove the now unused hyperv-tlfs.h files Remove all hyperv-tlfs.h files. These are no longer included anywhere. hyperv/hvhdk.h serves the same role, but with an easier path for adding new definitions. Remove the relevant lines in MAINTAINERS. Signed-off-by: Nuno Das Neves Reviewed-by: Michael Kelley Reviewed-by: Easwar Hariharan Link: https://lore.kernel.org/r/1732577084-2122-6-git-send-email-nunodasneves@linux.microsoft.com Signed-off-by: Wei Liu Message-ID: <1732577084-2122-6-git-send-email-nunodasneves@linux.microsoft.com> --- MAINTAINERS | 3 - arch/arm64/include/asm/hyperv-tlfs.h | 71 --- arch/x86/include/asm/hyperv-tlfs.h | 811 ------------------------ include/asm-generic/hyperv-tlfs.h | 883 --------------------------- 4 files changed, 1768 deletions(-) delete mode 100644 arch/arm64/include/asm/hyperv-tlfs.h delete mode 100644 arch/x86/include/asm/hyperv-tlfs.h delete mode 100644 include/asm-generic/hyperv-tlfs.h diff --git a/MAINTAINERS b/MAINTAINERS index 013cbd1af04b7..090e0c738eddd 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10673,10 +10673,8 @@ F: Documentation/devicetree/bindings/bus/microsoft,vmbus.yaml F: Documentation/networking/device_drivers/ethernet/microsoft/netvsc.rst F: Documentation/virt/hyperv F: arch/arm64/hyperv -F: arch/arm64/include/asm/hyperv-tlfs.h F: arch/arm64/include/asm/mshyperv.h F: arch/x86/hyperv -F: arch/x86/include/asm/hyperv-tlfs.h F: arch/x86/include/asm/mshyperv.h F: arch/x86/include/asm/trace/hyperv.h F: arch/x86/kernel/cpu/mshyperv.c @@ -10692,7 +10690,6 @@ F: drivers/pci/controller/pci-hyperv.c F: drivers/scsi/storvsc_drv.c F: drivers/uio/uio_hv_generic.c F: drivers/video/fbdev/hyperv_fb.c -F: include/asm-generic/hyperv-tlfs.h F: include/asm-generic/mshyperv.h F: include/clocksource/hyperv_timer.h F: include/hyperv/hvgdk.h diff --git a/arch/arm64/include/asm/hyperv-tlfs.h b/arch/arm64/include/asm/hyperv-tlfs.h deleted file mode 100644 index bc30aadedfe9d..0000000000000 --- a/arch/arm64/include/asm/hyperv-tlfs.h +++ /dev/null @@ -1,71 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -/* - * This file contains definitions from the Hyper-V Hypervisor Top-Level - * Functional Specification (TLFS): - * https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs - * - * Copyright (C) 2021, Microsoft, Inc. - * - * Author : Michael Kelley - */ - -#ifndef _ASM_HYPERV_TLFS_H -#define _ASM_HYPERV_TLFS_H - -#include - -/* - * All data structures defined in the TLFS that are shared between Hyper-V - * and a guest VM use Little Endian byte ordering. This matches the default - * byte ordering of Linux running on ARM64, so no special handling is required. - */ - -/* - * Group C Features. See the asm-generic version of hyperv-tlfs.h - * for a description of Feature Groups. - */ - -/* Crash MSRs available */ -#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE BIT(8) - -/* STIMER direct mode is available */ -#define HV_STIMER_DIRECT_MODE_AVAILABLE BIT(13) - -/* - * To support arch-generic code calling hv_set/get_register: - * - On x86, HV_MSR_ indicates an MSR accessed via rdmsrl/wrmsrl - * - On ARM, HV_MSR_ indicates a VP register accessed via hypercall - */ -#define HV_MSR_CRASH_P0 (HV_REGISTER_GUEST_CRASH_P0) -#define HV_MSR_CRASH_P1 (HV_REGISTER_GUEST_CRASH_P1) -#define HV_MSR_CRASH_P2 (HV_REGISTER_GUEST_CRASH_P2) -#define HV_MSR_CRASH_P3 (HV_REGISTER_GUEST_CRASH_P3) -#define HV_MSR_CRASH_P4 (HV_REGISTER_GUEST_CRASH_P4) -#define HV_MSR_CRASH_CTL (HV_REGISTER_GUEST_CRASH_CTL) - -#define HV_MSR_VP_INDEX (HV_REGISTER_VP_INDEX) -#define HV_MSR_TIME_REF_COUNT (HV_REGISTER_TIME_REF_COUNT) -#define HV_MSR_REFERENCE_TSC (HV_REGISTER_REFERENCE_TSC) - -#define HV_MSR_SINT0 (HV_REGISTER_SINT0) -#define HV_MSR_SCONTROL (HV_REGISTER_SCONTROL) -#define HV_MSR_SIEFP (HV_REGISTER_SIEFP) -#define HV_MSR_SIMP (HV_REGISTER_SIMP) -#define HV_MSR_EOM (HV_REGISTER_EOM) - -#define HV_MSR_STIMER0_CONFIG (HV_REGISTER_STIMER0_CONFIG) -#define HV_MSR_STIMER0_COUNT (HV_REGISTER_STIMER0_COUNT) - -union hv_msi_entry { - u64 as_uint64[2]; - struct { - u64 address; - u32 data; - u32 reserved; - } __packed; -}; - -#include - -#endif diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h deleted file mode 100644 index 3787d26810c1c..0000000000000 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ /dev/null @@ -1,811 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -/* - * This file contains definitions from Hyper-V Hypervisor Top-Level Functional - * Specification (TLFS): - * https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs - */ - -#ifndef _ASM_X86_HYPERV_TLFS_H -#define _ASM_X86_HYPERV_TLFS_H - -#include -#include -/* - * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent - * is set by CPUID(HvCpuIdFunctionVersionAndFeatures). - */ -#define HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS 0x40000000 -#define HYPERV_CPUID_INTERFACE 0x40000001 -#define HYPERV_CPUID_VERSION 0x40000002 -#define HYPERV_CPUID_FEATURES 0x40000003 -#define HYPERV_CPUID_ENLIGHTMENT_INFO 0x40000004 -#define HYPERV_CPUID_IMPLEMENT_LIMITS 0x40000005 -#define HYPERV_CPUID_CPU_MANAGEMENT_FEATURES 0x40000007 -#define HYPERV_CPUID_NESTED_FEATURES 0x4000000A -#define HYPERV_CPUID_ISOLATION_CONFIG 0x4000000C - -#define HYPERV_CPUID_VIRT_STACK_INTERFACE 0x40000081 -#define HYPERV_VS_INTERFACE_EAX_SIGNATURE 0x31235356 /* "VS#1" */ - -#define HYPERV_CPUID_VIRT_STACK_PROPERTIES 0x40000082 -/* Support for the extended IOAPIC RTE format */ -#define HYPERV_VS_PROPERTIES_EAX_EXTENDED_IOAPIC_RTE BIT(2) - -#define HYPERV_HYPERVISOR_PRESENT_BIT 0x80000000 -#define HYPERV_CPUID_MIN 0x40000005 -#define HYPERV_CPUID_MAX 0x4000ffff - -/* - * Group D Features. The bit assignments are custom to each architecture. - * On x86/x64 these are HYPERV_CPUID_FEATURES.EDX bits. - */ -/* The MWAIT instruction is available (per section MONITOR / MWAIT) */ -#define HV_X64_MWAIT_AVAILABLE BIT(0) -/* Guest debugging support is available */ -#define HV_X64_GUEST_DEBUGGING_AVAILABLE BIT(1) -/* Performance Monitor support is available*/ -#define HV_X64_PERF_MONITOR_AVAILABLE BIT(2) -/* Support for physical CPU dynamic partitioning events is available*/ -#define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE BIT(3) -/* - * Support for passing hypercall input parameter block via XMM - * registers is available - */ -#define HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE BIT(4) -/* Support for a virtual guest idle state is available */ -#define HV_X64_GUEST_IDLE_STATE_AVAILABLE BIT(5) -/* Frequency MSRs available */ -#define HV_FEATURE_FREQUENCY_MSRS_AVAILABLE BIT(8) -/* Crash MSR available */ -#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE BIT(10) -/* Support for debug MSRs available */ -#define HV_FEATURE_DEBUG_MSRS_AVAILABLE BIT(11) -/* Support for extended gva ranges for flush hypercalls available */ -#define HV_FEATURE_EXT_GVA_RANGES_FLUSH BIT(14) -/* - * Support for returning hypercall output block via XMM - * registers is available - */ -#define HV_X64_HYPERCALL_XMM_OUTPUT_AVAILABLE BIT(15) -/* stimer Direct Mode is available */ -#define HV_STIMER_DIRECT_MODE_AVAILABLE BIT(19) - -/* - * Implementation recommendations. Indicates which behaviors the hypervisor - * recommends the OS implement for optimal performance. - * These are HYPERV_CPUID_ENLIGHTMENT_INFO.EAX bits. - */ -/* - * Recommend using hypercall for address space switches rather - * than MOV to CR3 instruction - */ -#define HV_X64_AS_SWITCH_RECOMMENDED BIT(0) -/* Recommend using hypercall for local TLB flushes rather - * than INVLPG or MOV to CR3 instructions */ -#define HV_X64_LOCAL_TLB_FLUSH_RECOMMENDED BIT(1) -/* - * Recommend using hypercall for remote TLB flushes rather - * than inter-processor interrupts - */ -#define HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED BIT(2) -/* - * Recommend using MSRs for accessing APIC registers - * EOI, ICR and TPR rather than their memory-mapped counterparts - */ -#define HV_X64_APIC_ACCESS_RECOMMENDED BIT(3) -/* Recommend using the hypervisor-provided MSR to initiate a system RESET */ -#define HV_X64_SYSTEM_RESET_RECOMMENDED BIT(4) -/* - * Recommend using relaxed timing for this partition. If used, - * the VM should disable any watchdog timeouts that rely on the - * timely delivery of external interrupts - */ -#define HV_X64_RELAXED_TIMING_RECOMMENDED BIT(5) - -/* - * Recommend not using Auto End-Of-Interrupt feature - */ -#define HV_DEPRECATING_AEOI_RECOMMENDED BIT(9) - -/* - * Recommend using cluster IPI hypercalls. - */ -#define HV_X64_CLUSTER_IPI_RECOMMENDED BIT(10) - -/* Recommend using the newer ExProcessorMasks interface */ -#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED BIT(11) - -/* Indicates that the hypervisor is nested within a Hyper-V partition. */ -#define HV_X64_HYPERV_NESTED BIT(12) - -/* Recommend using enlightened VMCS */ -#define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED BIT(14) - -/* Use hypercalls for MMIO config space access */ -#define HV_X64_USE_MMIO_HYPERCALLS BIT(21) - -/* - * CPU management features identification. - * These are HYPERV_CPUID_CPU_MANAGEMENT_FEATURES.EAX bits. - */ -#define HV_X64_START_LOGICAL_PROCESSOR BIT(0) -#define HV_X64_CREATE_ROOT_VIRTUAL_PROCESSOR BIT(1) -#define HV_X64_PERFORMANCE_COUNTER_SYNC BIT(2) -#define HV_X64_RESERVED_IDENTITY_BIT BIT(31) - -/* - * Virtual processor will never share a physical core with another virtual - * processor, except for virtual processors that are reported as sibling SMT - * threads. - */ -#define HV_X64_NO_NONARCH_CORESHARING BIT(18) - -/* Nested features. These are HYPERV_CPUID_NESTED_FEATURES.EAX bits. */ -#define HV_X64_NESTED_DIRECT_FLUSH BIT(17) -#define HV_X64_NESTED_GUEST_MAPPING_FLUSH BIT(18) -#define HV_X64_NESTED_MSR_BITMAP BIT(19) - -/* Nested features #2. These are HYPERV_CPUID_NESTED_FEATURES.EBX bits. */ -#define HV_X64_NESTED_EVMCS1_PERF_GLOBAL_CTRL BIT(0) - -/* - * This is specific to AMD and specifies that enlightened TLB flush is - * supported. If guest opts in to this feature, ASID invalidations only - * flushes gva -> hpa mapping entries. To flush the TLB entries derived - * from NPT, hypercalls should be used (HvFlushGuestPhysicalAddressSpace - * or HvFlushGuestPhysicalAddressList). - */ -#define HV_X64_NESTED_ENLIGHTENED_TLB BIT(22) - -/* HYPERV_CPUID_ISOLATION_CONFIG.EAX bits. */ -#define HV_PARAVISOR_PRESENT BIT(0) - -/* HYPERV_CPUID_ISOLATION_CONFIG.EBX bits. */ -#define HV_ISOLATION_TYPE GENMASK(3, 0) -#define HV_SHARED_GPA_BOUNDARY_ACTIVE BIT(5) -#define HV_SHARED_GPA_BOUNDARY_BITS GENMASK(11, 6) - -enum hv_isolation_type { - HV_ISOLATION_TYPE_NONE = 0, - HV_ISOLATION_TYPE_VBS = 1, - HV_ISOLATION_TYPE_SNP = 2, - HV_ISOLATION_TYPE_TDX = 3 -}; - -/* Hyper-V specific model specific registers (MSRs) */ - -/* MSR used to identify the guest OS. */ -#define HV_X64_MSR_GUEST_OS_ID 0x40000000 - -/* MSR used to setup pages used to communicate with the hypervisor. */ -#define HV_X64_MSR_HYPERCALL 0x40000001 - -/* MSR used to provide vcpu index */ -#define HV_X64_MSR_VP_INDEX 0x40000002 - -/* MSR used to reset the guest OS. */ -#define HV_X64_MSR_RESET 0x40000003 - -/* MSR used to provide vcpu runtime in 100ns units */ -#define HV_X64_MSR_VP_RUNTIME 0x40000010 - -/* MSR used to read the per-partition time reference counter */ -#define HV_X64_MSR_TIME_REF_COUNT 0x40000020 - -/* A partition's reference time stamp counter (TSC) page */ -#define HV_X64_MSR_REFERENCE_TSC 0x40000021 - -/* MSR used to retrieve the TSC frequency */ -#define HV_X64_MSR_TSC_FREQUENCY 0x40000022 - -/* MSR used to retrieve the local APIC timer frequency */ -#define HV_X64_MSR_APIC_FREQUENCY 0x40000023 - -/* Define the virtual APIC registers */ -#define HV_X64_MSR_EOI 0x40000070 -#define HV_X64_MSR_ICR 0x40000071 -#define HV_X64_MSR_TPR 0x40000072 -#define HV_X64_MSR_VP_ASSIST_PAGE 0x40000073 - -/* Define synthetic interrupt controller model specific registers. */ -#define HV_X64_MSR_SCONTROL 0x40000080 -#define HV_X64_MSR_SVERSION 0x40000081 -#define HV_X64_MSR_SIEFP 0x40000082 -#define HV_X64_MSR_SIMP 0x40000083 -#define HV_X64_MSR_EOM 0x40000084 -#define HV_X64_MSR_SINT0 0x40000090 -#define HV_X64_MSR_SINT1 0x40000091 -#define HV_X64_MSR_SINT2 0x40000092 -#define HV_X64_MSR_SINT3 0x40000093 -#define HV_X64_MSR_SINT4 0x40000094 -#define HV_X64_MSR_SINT5 0x40000095 -#define HV_X64_MSR_SINT6 0x40000096 -#define HV_X64_MSR_SINT7 0x40000097 -#define HV_X64_MSR_SINT8 0x40000098 -#define HV_X64_MSR_SINT9 0x40000099 -#define HV_X64_MSR_SINT10 0x4000009A -#define HV_X64_MSR_SINT11 0x4000009B -#define HV_X64_MSR_SINT12 0x4000009C -#define HV_X64_MSR_SINT13 0x4000009D -#define HV_X64_MSR_SINT14 0x4000009E -#define HV_X64_MSR_SINT15 0x4000009F - -/* - * Define synthetic interrupt controller model specific registers for - * nested hypervisor. - */ -#define HV_X64_MSR_NESTED_SCONTROL 0x40001080 -#define HV_X64_MSR_NESTED_SVERSION 0x40001081 -#define HV_X64_MSR_NESTED_SIEFP 0x40001082 -#define HV_X64_MSR_NESTED_SIMP 0x40001083 -#define HV_X64_MSR_NESTED_EOM 0x40001084 -#define HV_X64_MSR_NESTED_SINT0 0x40001090 - -/* - * Synthetic Timer MSRs. Four timers per vcpu. - */ -#define HV_X64_MSR_STIMER0_CONFIG 0x400000B0 -#define HV_X64_MSR_STIMER0_COUNT 0x400000B1 -#define HV_X64_MSR_STIMER1_CONFIG 0x400000B2 -#define HV_X64_MSR_STIMER1_COUNT 0x400000B3 -#define HV_X64_MSR_STIMER2_CONFIG 0x400000B4 -#define HV_X64_MSR_STIMER2_COUNT 0x400000B5 -#define HV_X64_MSR_STIMER3_CONFIG 0x400000B6 -#define HV_X64_MSR_STIMER3_COUNT 0x400000B7 - -/* Hyper-V guest idle MSR */ -#define HV_X64_MSR_GUEST_IDLE 0x400000F0 - -/* Hyper-V guest crash notification MSR's */ -#define HV_X64_MSR_CRASH_P0 0x40000100 -#define HV_X64_MSR_CRASH_P1 0x40000101 -#define HV_X64_MSR_CRASH_P2 0x40000102 -#define HV_X64_MSR_CRASH_P3 0x40000103 -#define HV_X64_MSR_CRASH_P4 0x40000104 -#define HV_X64_MSR_CRASH_CTL 0x40000105 - -/* TSC emulation after migration */ -#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106 -#define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107 -#define HV_X64_MSR_TSC_EMULATION_STATUS 0x40000108 - -/* TSC invariant control */ -#define HV_X64_MSR_TSC_INVARIANT_CONTROL 0x40000118 - -/* HV_X64_MSR_TSC_INVARIANT_CONTROL bits */ -#define HV_EXPOSE_INVARIANT_TSC BIT_ULL(0) - -/* - * To support arch-generic code calling hv_set/get_register: - * - On x86, HV_MSR_ indicates an MSR accessed via rdmsrl/wrmsrl - * - On ARM, HV_MSR_ indicates a VP register accessed via hypercall - */ -#define HV_MSR_CRASH_P0 (HV_X64_MSR_CRASH_P0) -#define HV_MSR_CRASH_P1 (HV_X64_MSR_CRASH_P1) -#define HV_MSR_CRASH_P2 (HV_X64_MSR_CRASH_P2) -#define HV_MSR_CRASH_P3 (HV_X64_MSR_CRASH_P3) -#define HV_MSR_CRASH_P4 (HV_X64_MSR_CRASH_P4) -#define HV_MSR_CRASH_CTL (HV_X64_MSR_CRASH_CTL) - -#define HV_MSR_VP_INDEX (HV_X64_MSR_VP_INDEX) -#define HV_MSR_TIME_REF_COUNT (HV_X64_MSR_TIME_REF_COUNT) -#define HV_MSR_REFERENCE_TSC (HV_X64_MSR_REFERENCE_TSC) - -#define HV_MSR_SINT0 (HV_X64_MSR_SINT0) -#define HV_MSR_SVERSION (HV_X64_MSR_SVERSION) -#define HV_MSR_SCONTROL (HV_X64_MSR_SCONTROL) -#define HV_MSR_SIEFP (HV_X64_MSR_SIEFP) -#define HV_MSR_SIMP (HV_X64_MSR_SIMP) -#define HV_MSR_EOM (HV_X64_MSR_EOM) - -#define HV_MSR_NESTED_SCONTROL (HV_X64_MSR_NESTED_SCONTROL) -#define HV_MSR_NESTED_SVERSION (HV_X64_MSR_NESTED_SVERSION) -#define HV_MSR_NESTED_SIEFP (HV_X64_MSR_NESTED_SIEFP) -#define HV_MSR_NESTED_SIMP (HV_X64_MSR_NESTED_SIMP) -#define HV_MSR_NESTED_EOM (HV_X64_MSR_NESTED_EOM) -#define HV_MSR_NESTED_SINT0 (HV_X64_MSR_NESTED_SINT0) - -#define HV_MSR_STIMER0_CONFIG (HV_X64_MSR_STIMER0_CONFIG) -#define HV_MSR_STIMER0_COUNT (HV_X64_MSR_STIMER0_COUNT) - -/* - * Registers are only accessible via HVCALL_GET_VP_REGISTERS hvcall and - * there is not associated MSR address. - */ -#define HV_X64_REGISTER_VSM_VP_STATUS 0x000D0003 -#define HV_X64_VTL_MASK GENMASK(3, 0) - -/* Hyper-V memory host visibility */ -enum hv_mem_host_visibility { - VMBUS_PAGE_NOT_VISIBLE = 0, - VMBUS_PAGE_VISIBLE_READ_ONLY = 1, - VMBUS_PAGE_VISIBLE_READ_WRITE = 3 -}; - -/* HvCallModifySparseGpaPageHostVisibility hypercall */ -#define HV_MAX_MODIFY_GPA_REP_COUNT ((PAGE_SIZE / sizeof(u64)) - 2) -struct hv_gpa_range_for_visibility { - u64 partition_id; - u32 host_visibility:2; - u32 reserved0:30; - u32 reserved1; - u64 gpa_page_list[HV_MAX_MODIFY_GPA_REP_COUNT]; -} __packed; - -/* - * Declare the MSR used to setup pages used to communicate with the hypervisor. - */ -union hv_x64_msr_hypercall_contents { - u64 as_uint64; - struct { - u64 enable:1; - u64 reserved:11; - u64 guest_physical_address:52; - } __packed; -}; - -union hv_vp_assist_msr_contents { - u64 as_uint64; - struct { - u64 enable:1; - u64 reserved:11; - u64 pfn:52; - } __packed; -}; - -struct hv_reenlightenment_control { - __u64 vector:8; - __u64 reserved1:8; - __u64 enabled:1; - __u64 reserved2:15; - __u64 target_vp:32; -} __packed; - -struct hv_tsc_emulation_control { - __u64 enabled:1; - __u64 reserved:63; -} __packed; - -struct hv_tsc_emulation_status { - __u64 inprogress:1; - __u64 reserved:63; -} __packed; - -#define HV_X64_MSR_HYPERCALL_ENABLE 0x00000001 -#define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT 12 -#define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK \ - (~((1ull << HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT) - 1)) - -#define HV_X64_MSR_CRASH_PARAMS \ - (1 + (HV_X64_MSR_CRASH_P4 - HV_X64_MSR_CRASH_P0)) - -#define HV_IPI_LOW_VECTOR 0x10 -#define HV_IPI_HIGH_VECTOR 0xff - -#define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE 0x00000001 -#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT 12 -#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK \ - (~((1ull << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) - -/* Hyper-V Enlightened VMCS version mask in nested features CPUID */ -#define HV_X64_ENLIGHTENED_VMCS_VERSION 0xff - -#define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001 -#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12 - -/* Number of XMM registers used in hypercall input/output */ -#define HV_HYPERCALL_MAX_XMM_REGISTERS 6 - -struct hv_nested_enlightenments_control { - struct { - __u32 directhypercall:1; - __u32 reserved:31; - } features; - struct { - __u32 inter_partition_comm:1; - __u32 reserved:31; - } hypercallControls; -} __packed; - -/* Define virtual processor assist page structure. */ -struct hv_vp_assist_page { - __u32 apic_assist; - __u32 reserved1; - __u32 vtl_entry_reason; - __u32 vtl_reserved; - __u64 vtl_ret_x64rax; - __u64 vtl_ret_x64rcx; - struct hv_nested_enlightenments_control nested_control; - __u8 enlighten_vmentry; - __u8 reserved2[7]; - __u64 current_nested_vmcs; - __u8 synthetic_time_unhalted_timer_expired; - __u8 reserved3[7]; - __u8 virtualization_fault_information[40]; - __u8 reserved4[8]; - __u8 intercept_message[256]; - __u8 vtl_ret_actions[256]; -} __packed; - -struct hv_enlightened_vmcs { - u32 revision_id; - u32 abort; - - u16 host_es_selector; - u16 host_cs_selector; - u16 host_ss_selector; - u16 host_ds_selector; - u16 host_fs_selector; - u16 host_gs_selector; - u16 host_tr_selector; - - u16 padding16_1; - - u64 host_ia32_pat; - u64 host_ia32_efer; - - u64 host_cr0; - u64 host_cr3; - u64 host_cr4; - - u64 host_ia32_sysenter_esp; - u64 host_ia32_sysenter_eip; - u64 host_rip; - u32 host_ia32_sysenter_cs; - - u32 pin_based_vm_exec_control; - u32 vm_exit_controls; - u32 secondary_vm_exec_control; - - u64 io_bitmap_a; - u64 io_bitmap_b; - u64 msr_bitmap; - - u16 guest_es_selector; - u16 guest_cs_selector; - u16 guest_ss_selector; - u16 guest_ds_selector; - u16 guest_fs_selector; - u16 guest_gs_selector; - u16 guest_ldtr_selector; - u16 guest_tr_selector; - - u32 guest_es_limit; - u32 guest_cs_limit; - u32 guest_ss_limit; - u32 guest_ds_limit; - u32 guest_fs_limit; - u32 guest_gs_limit; - u32 guest_ldtr_limit; - u32 guest_tr_limit; - u32 guest_gdtr_limit; - u32 guest_idtr_limit; - - u32 guest_es_ar_bytes; - u32 guest_cs_ar_bytes; - u32 guest_ss_ar_bytes; - u32 guest_ds_ar_bytes; - u32 guest_fs_ar_bytes; - u32 guest_gs_ar_bytes; - u32 guest_ldtr_ar_bytes; - u32 guest_tr_ar_bytes; - - u64 guest_es_base; - u64 guest_cs_base; - u64 guest_ss_base; - u64 guest_ds_base; - u64 guest_fs_base; - u64 guest_gs_base; - u64 guest_ldtr_base; - u64 guest_tr_base; - u64 guest_gdtr_base; - u64 guest_idtr_base; - - u64 padding64_1[3]; - - u64 vm_exit_msr_store_addr; - u64 vm_exit_msr_load_addr; - u64 vm_entry_msr_load_addr; - - u64 cr3_target_value0; - u64 cr3_target_value1; - u64 cr3_target_value2; - u64 cr3_target_value3; - - u32 page_fault_error_code_mask; - u32 page_fault_error_code_match; - - u32 cr3_target_count; - u32 vm_exit_msr_store_count; - u32 vm_exit_msr_load_count; - u32 vm_entry_msr_load_count; - - u64 tsc_offset; - u64 virtual_apic_page_addr; - u64 vmcs_link_pointer; - - u64 guest_ia32_debugctl; - u64 guest_ia32_pat; - u64 guest_ia32_efer; - - u64 guest_pdptr0; - u64 guest_pdptr1; - u64 guest_pdptr2; - u64 guest_pdptr3; - - u64 guest_pending_dbg_exceptions; - u64 guest_sysenter_esp; - u64 guest_sysenter_eip; - - u32 guest_activity_state; - u32 guest_sysenter_cs; - - u64 cr0_guest_host_mask; - u64 cr4_guest_host_mask; - u64 cr0_read_shadow; - u64 cr4_read_shadow; - u64 guest_cr0; - u64 guest_cr3; - u64 guest_cr4; - u64 guest_dr7; - - u64 host_fs_base; - u64 host_gs_base; - u64 host_tr_base; - u64 host_gdtr_base; - u64 host_idtr_base; - u64 host_rsp; - - u64 ept_pointer; - - u16 virtual_processor_id; - u16 padding16_2[3]; - - u64 padding64_2[5]; - u64 guest_physical_address; - - u32 vm_instruction_error; - u32 vm_exit_reason; - u32 vm_exit_intr_info; - u32 vm_exit_intr_error_code; - u32 idt_vectoring_info_field; - u32 idt_vectoring_error_code; - u32 vm_exit_instruction_len; - u32 vmx_instruction_info; - - u64 exit_qualification; - u64 exit_io_instruction_ecx; - u64 exit_io_instruction_esi; - u64 exit_io_instruction_edi; - u64 exit_io_instruction_eip; - - u64 guest_linear_address; - u64 guest_rsp; - u64 guest_rflags; - - u32 guest_interruptibility_info; - u32 cpu_based_vm_exec_control; - u32 exception_bitmap; - u32 vm_entry_controls; - u32 vm_entry_intr_info_field; - u32 vm_entry_exception_error_code; - u32 vm_entry_instruction_len; - u32 tpr_threshold; - - u64 guest_rip; - - u32 hv_clean_fields; - u32 padding32_1; - u32 hv_synthetic_controls; - struct { - u32 nested_flush_hypercall:1; - u32 msr_bitmap:1; - u32 reserved:30; - } __packed hv_enlightenments_control; - u32 hv_vp_id; - u32 padding32_2; - u64 hv_vm_id; - u64 partition_assist_page; - u64 padding64_4[4]; - u64 guest_bndcfgs; - u64 guest_ia32_perf_global_ctrl; - u64 guest_ia32_s_cet; - u64 guest_ssp; - u64 guest_ia32_int_ssp_table_addr; - u64 guest_ia32_lbr_ctl; - u64 padding64_5[2]; - u64 xss_exit_bitmap; - u64 encls_exiting_bitmap; - u64 host_ia32_perf_global_ctrl; - u64 tsc_multiplier; - u64 host_ia32_s_cet; - u64 host_ssp; - u64 host_ia32_int_ssp_table_addr; - u64 padding64_6; -} __packed; - -#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE 0 -#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP BIT(0) -#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP BIT(1) -#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2 BIT(2) -#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1 BIT(3) -#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC BIT(4) -#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT BIT(5) -#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY BIT(6) -#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN BIT(7) -#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR BIT(8) -#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT BIT(9) -#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC BIT(10) -#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1 BIT(11) -#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2 BIT(12) -#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER BIT(13) -#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1 BIT(14) -#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ENLIGHTENMENTSCONTROL BIT(15) - -#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL 0xFFFF - -/* - * Note, Hyper-V isn't actually stealing bit 28 from Intel, just abusing it by - * pairing it with architecturally impossible exit reasons. Bit 28 is set only - * on SMI exits to a SMI transfer monitor (STM) and if and only if a MTF VM-Exit - * is pending. I.e. it will never be set by hardware for non-SMI exits (there - * are only three), nor will it ever be set unless the VMM is an STM. - */ -#define HV_VMX_SYNTHETIC_EXIT_REASON_TRAP_AFTER_FLUSH 0x10000031 - -/* - * Hyper-V uses the software reserved 32 bytes in VMCB control area to expose - * SVM enlightenments to guests. - */ -struct hv_vmcb_enlightenments { - struct __packed hv_enlightenments_control { - u32 nested_flush_hypercall:1; - u32 msr_bitmap:1; - u32 enlightened_npt_tlb: 1; - u32 reserved:29; - } __packed hv_enlightenments_control; - u32 hv_vp_id; - u64 hv_vm_id; - u64 partition_assist_page; - u64 reserved; -} __packed; - -/* - * Hyper-V uses the software reserved clean bit in VMCB. - */ -#define HV_VMCB_NESTED_ENLIGHTENMENTS 31 - -/* Synthetic VM-Exit */ -#define HV_SVM_EXITCODE_ENL 0xf0000000 -#define HV_SVM_ENL_EXITCODE_TRAP_AFTER_FLUSH (1) - -struct hv_partition_assist_pg { - u32 tlb_lock_count; -}; - -enum hv_interrupt_type { - HV_X64_INTERRUPT_TYPE_FIXED = 0x0000, - HV_X64_INTERRUPT_TYPE_LOWESTPRIORITY = 0x0001, - HV_X64_INTERRUPT_TYPE_SMI = 0x0002, - HV_X64_INTERRUPT_TYPE_REMOTEREAD = 0x0003, - HV_X64_INTERRUPT_TYPE_NMI = 0x0004, - HV_X64_INTERRUPT_TYPE_INIT = 0x0005, - HV_X64_INTERRUPT_TYPE_SIPI = 0x0006, - HV_X64_INTERRUPT_TYPE_EXTINT = 0x0007, - HV_X64_INTERRUPT_TYPE_LOCALINT0 = 0x0008, - HV_X64_INTERRUPT_TYPE_LOCALINT1 = 0x0009, - HV_X64_INTERRUPT_TYPE_MAXIMUM = 0x000A, -}; - -union hv_msi_address_register { - u32 as_uint32; - struct { - u32 reserved1:2; - u32 destination_mode:1; - u32 redirection_hint:1; - u32 reserved2:8; - u32 destination_id:8; - u32 msi_base:12; - }; -} __packed; - -union hv_msi_data_register { - u32 as_uint32; - struct { - u32 vector:8; - u32 delivery_mode:3; - u32 reserved1:3; - u32 level_assert:1; - u32 trigger_mode:1; - u32 reserved2:16; - }; -} __packed; - -/* HvRetargetDeviceInterrupt hypercall */ -union hv_msi_entry { - u64 as_uint64; - struct { - union hv_msi_address_register address; - union hv_msi_data_register data; - } __packed; -}; - -struct hv_x64_segment_register { - u64 base; - u32 limit; - u16 selector; - union { - struct { - u16 segment_type : 4; - u16 non_system_segment : 1; - u16 descriptor_privilege_level : 2; - u16 present : 1; - u16 reserved : 4; - u16 available : 1; - u16 _long : 1; - u16 _default : 1; - u16 granularity : 1; - } __packed; - u16 attributes; - }; -} __packed; - -struct hv_x64_table_register { - u16 pad[3]; - u16 limit; - u64 base; -} __packed; - -struct hv_init_vp_context { - u64 rip; - u64 rsp; - u64 rflags; - - struct hv_x64_segment_register cs; - struct hv_x64_segment_register ds; - struct hv_x64_segment_register es; - struct hv_x64_segment_register fs; - struct hv_x64_segment_register gs; - struct hv_x64_segment_register ss; - struct hv_x64_segment_register tr; - struct hv_x64_segment_register ldtr; - - struct hv_x64_table_register idtr; - struct hv_x64_table_register gdtr; - - u64 efer; - u64 cr0; - u64 cr3; - u64 cr4; - u64 msr_cr_pat; -} __packed; - -union hv_input_vtl { - u8 as_uint8; - struct { - u8 target_vtl: 4; - u8 use_target_vtl: 1; - u8 reserved_z: 3; - }; -} __packed; - -struct hv_enable_vp_vtl { - u64 partition_id; - u32 vp_index; - union hv_input_vtl target_vtl; - u8 mbz0; - u16 mbz1; - struct hv_init_vp_context vp_context; -} __packed; - -struct hv_get_vp_from_apic_id_in { - u64 partition_id; - union hv_input_vtl target_vtl; - u8 res[7]; - u32 apic_ids[]; -} __packed; - -#include - -#endif diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h deleted file mode 100644 index 52274c9aefefc..0000000000000 --- a/include/asm-generic/hyperv-tlfs.h +++ /dev/null @@ -1,883 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -/* - * This file contains definitions from Hyper-V Hypervisor Top-Level Functional - * Specification (TLFS): - * https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs - */ - -#ifndef _ASM_GENERIC_HYPERV_TLFS_H -#define _ASM_GENERIC_HYPERV_TLFS_H - -#include -#include -#include - -/* - * While not explicitly listed in the TLFS, Hyper-V always runs with a page size - * of 4096. These definitions are used when communicating with Hyper-V using - * guest physical pages and guest physical page addresses, since the guest page - * size may not be 4096 on all architectures. - */ -#define HV_HYP_PAGE_SHIFT 12 -#define HV_HYP_PAGE_SIZE BIT(HV_HYP_PAGE_SHIFT) -#define HV_HYP_PAGE_MASK (~(HV_HYP_PAGE_SIZE - 1)) - -/* - * Hyper-V provides two categories of flags relevant to guest VMs. The - * "Features" category indicates specific functionality that is available - * to guests on this particular instance of Hyper-V. The "Features" - * are presented in four groups, each of which is 32 bits. The group A - * and B definitions are common across architectures and are listed here. - * However, not all flags are relevant on all architectures. - * - * Groups C and D vary across architectures and are listed in the - * architecture specific portion of hyperv-tlfs.h. Some of these flags exist - * on multiple architectures, but the bit positions are different so they - * cannot appear in the generic portion of hyperv-tlfs.h. - * - * The "Enlightenments" category provides recommendations on whether to use - * specific enlightenments that are available. The Enlighenments are a single - * group of 32 bits, but they vary across architectures and are listed in - * the architecture specific portion of hyperv-tlfs.h. - */ - -/* - * Group A Features. - */ - -/* VP Runtime register available */ -#define HV_MSR_VP_RUNTIME_AVAILABLE BIT(0) -/* Partition Reference Counter available*/ -#define HV_MSR_TIME_REF_COUNT_AVAILABLE BIT(1) -/* Basic SynIC register available */ -#define HV_MSR_SYNIC_AVAILABLE BIT(2) -/* Synthetic Timer registers available */ -#define HV_MSR_SYNTIMER_AVAILABLE BIT(3) -/* Virtual APIC assist and VP assist page registers available */ -#define HV_MSR_APIC_ACCESS_AVAILABLE BIT(4) -/* Hypercall and Guest OS ID registers available*/ -#define HV_MSR_HYPERCALL_AVAILABLE BIT(5) -/* Access virtual processor index register available*/ -#define HV_MSR_VP_INDEX_AVAILABLE BIT(6) -/* Virtual system reset register available*/ -#define HV_MSR_RESET_AVAILABLE BIT(7) -/* Access statistics page registers available */ -#define HV_MSR_STAT_PAGES_AVAILABLE BIT(8) -/* Partition reference TSC register is available */ -#define HV_MSR_REFERENCE_TSC_AVAILABLE BIT(9) -/* Partition Guest IDLE register is available */ -#define HV_MSR_GUEST_IDLE_AVAILABLE BIT(10) -/* Partition local APIC and TSC frequency registers available */ -#define HV_ACCESS_FREQUENCY_MSRS BIT(11) -/* AccessReenlightenmentControls privilege */ -#define HV_ACCESS_REENLIGHTENMENT BIT(13) -/* AccessTscInvariantControls privilege */ -#define HV_ACCESS_TSC_INVARIANT BIT(15) - -/* - * Group B features. - */ -#define HV_CREATE_PARTITIONS BIT(0) -#define HV_ACCESS_PARTITION_ID BIT(1) -#define HV_ACCESS_MEMORY_POOL BIT(2) -#define HV_ADJUST_MESSAGE_BUFFERS BIT(3) -#define HV_POST_MESSAGES BIT(4) -#define HV_SIGNAL_EVENTS BIT(5) -#define HV_CREATE_PORT BIT(6) -#define HV_CONNECT_PORT BIT(7) -#define HV_ACCESS_STATS BIT(8) -#define HV_DEBUGGING BIT(11) -#define HV_CPU_MANAGEMENT BIT(12) -#define HV_ENABLE_EXTENDED_HYPERCALLS BIT(20) -#define HV_ISOLATION BIT(22) - -/* - * TSC page layout. - */ -struct ms_hyperv_tsc_page { - volatile u32 tsc_sequence; - u32 reserved1; - volatile u64 tsc_scale; - volatile s64 tsc_offset; -} __packed; - -union hv_reference_tsc_msr { - u64 as_uint64; - struct { - u64 enable:1; - u64 reserved:11; - u64 pfn:52; - } __packed; -}; - -/* - * The guest OS needs to register the guest ID with the hypervisor. - * The guest ID is a 64 bit entity and the structure of this ID is - * specified in the Hyper-V specification: - * - * msdn.microsoft.com/en-us/library/windows/hardware/ff542653%28v=vs.85%29.aspx - * - * While the current guideline does not specify how Linux guest ID(s) - * need to be generated, our plan is to publish the guidelines for - * Linux and other guest operating systems that currently are hosted - * on Hyper-V. The implementation here conforms to this yet - * unpublished guidelines. - * - * - * Bit(s) - * 63 - Indicates if the OS is Open Source or not; 1 is Open Source - * 62:56 - Os Type; Linux is 0x100 - * 55:48 - Distro specific identification - * 47:16 - Linux kernel version number - * 15:0 - Distro specific identification - * - * - */ - -#define HV_LINUX_VENDOR_ID 0x8100 - -/* - * Crash notification flags. - */ -#define HV_CRASH_CTL_CRASH_NOTIFY_MSG BIT_ULL(62) -#define HV_CRASH_CTL_CRASH_NOTIFY BIT_ULL(63) - -/* Declare the various hypercall operations. */ -#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE 0x0002 -#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST 0x0003 -#define HVCALL_ENABLE_VP_VTL 0x000f -#define HVCALL_NOTIFY_LONG_SPIN_WAIT 0x0008 -#define HVCALL_SEND_IPI 0x000b -#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX 0x0013 -#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX 0x0014 -#define HVCALL_SEND_IPI_EX 0x0015 -#define HVCALL_GET_PARTITION_ID 0x0046 -#define HVCALL_DEPOSIT_MEMORY 0x0048 -#define HVCALL_CREATE_VP 0x004e -#define HVCALL_GET_VP_REGISTERS 0x0050 -#define HVCALL_SET_VP_REGISTERS 0x0051 -#define HVCALL_POST_MESSAGE 0x005c -#define HVCALL_SIGNAL_EVENT 0x005d -#define HVCALL_POST_DEBUG_DATA 0x0069 -#define HVCALL_RETRIEVE_DEBUG_DATA 0x006a -#define HVCALL_RESET_DEBUG_SESSION 0x006b -#define HVCALL_ADD_LOGICAL_PROCESSOR 0x0076 -#define HVCALL_MAP_DEVICE_INTERRUPT 0x007c -#define HVCALL_UNMAP_DEVICE_INTERRUPT 0x007d -#define HVCALL_RETARGET_INTERRUPT 0x007e -#define HVCALL_START_VP 0x0099 -#define HVCALL_GET_VP_ID_FROM_APIC_ID 0x009a -#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af -#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0 -#define HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY 0x00db -#define HVCALL_MMIO_READ 0x0106 -#define HVCALL_MMIO_WRITE 0x0107 - -/* Extended hypercalls */ -#define HV_EXT_CALL_QUERY_CAPABILITIES 0x8001 -#define HV_EXT_CALL_MEMORY_HEAT_HINT 0x8003 - -#define HV_FLUSH_ALL_PROCESSORS BIT(0) -#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES BIT(1) -#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY BIT(2) -#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT BIT(3) - -/* Extended capability bits */ -#define HV_EXT_CAPABILITY_MEMORY_COLD_DISCARD_HINT BIT(8) - -enum HV_GENERIC_SET_FORMAT { - HV_GENERIC_SET_SPARSE_4K, - HV_GENERIC_SET_ALL, -}; - -#define HV_PARTITION_ID_SELF ((u64)-1) -#define HV_VP_INDEX_SELF ((u32)-2) - -#define HV_HYPERCALL_RESULT_MASK GENMASK_ULL(15, 0) -#define HV_HYPERCALL_FAST_BIT BIT(16) -#define HV_HYPERCALL_VARHEAD_OFFSET 17 -#define HV_HYPERCALL_VARHEAD_MASK GENMASK_ULL(26, 17) -#define HV_HYPERCALL_RSVD0_MASK GENMASK_ULL(31, 27) -#define HV_HYPERCALL_NESTED BIT_ULL(31) -#define HV_HYPERCALL_REP_COMP_OFFSET 32 -#define HV_HYPERCALL_REP_COMP_1 BIT_ULL(32) -#define HV_HYPERCALL_REP_COMP_MASK GENMASK_ULL(43, 32) -#define HV_HYPERCALL_RSVD1_MASK GENMASK_ULL(47, 44) -#define HV_HYPERCALL_REP_START_OFFSET 48 -#define HV_HYPERCALL_REP_START_MASK GENMASK_ULL(59, 48) -#define HV_HYPERCALL_RSVD2_MASK GENMASK_ULL(63, 60) -#define HV_HYPERCALL_RSVD_MASK (HV_HYPERCALL_RSVD0_MASK | \ - HV_HYPERCALL_RSVD1_MASK | \ - HV_HYPERCALL_RSVD2_MASK) - -/* hypercall status code */ -#define HV_STATUS_SUCCESS 0 -#define HV_STATUS_INVALID_HYPERCALL_CODE 2 -#define HV_STATUS_INVALID_HYPERCALL_INPUT 3 -#define HV_STATUS_INVALID_ALIGNMENT 4 -#define HV_STATUS_INVALID_PARAMETER 5 -#define HV_STATUS_ACCESS_DENIED 6 -#define HV_STATUS_OPERATION_DENIED 8 -#define HV_STATUS_INSUFFICIENT_MEMORY 11 -#define HV_STATUS_INVALID_PORT_ID 17 -#define HV_STATUS_INVALID_CONNECTION_ID 18 -#define HV_STATUS_INSUFFICIENT_BUFFERS 19 -#define HV_STATUS_TIME_OUT 120 -#define HV_STATUS_VTL_ALREADY_ENABLED 134 - -/* - * The Hyper-V TimeRefCount register and the TSC - * page provide a guest VM clock with 100ns tick rate - */ -#define HV_CLOCK_HZ (NSEC_PER_SEC/100) - -/* Define the number of synthetic interrupt sources. */ -#define HV_SYNIC_SINT_COUNT (16) -/* Define the expected SynIC version. */ -#define HV_SYNIC_VERSION_1 (0x1) -/* Valid SynIC vectors are 16-255. */ -#define HV_SYNIC_FIRST_VALID_VECTOR (16) - -#define HV_SYNIC_CONTROL_ENABLE (1ULL << 0) -#define HV_SYNIC_SIMP_ENABLE (1ULL << 0) -#define HV_SYNIC_SIEFP_ENABLE (1ULL << 0) -#define HV_SYNIC_SINT_MASKED (1ULL << 16) -#define HV_SYNIC_SINT_AUTO_EOI (1ULL << 17) -#define HV_SYNIC_SINT_VECTOR_MASK (0xFF) - -#define HV_SYNIC_STIMER_COUNT (4) - -/* Define synthetic interrupt controller message constants. */ -#define HV_MESSAGE_SIZE (256) -#define HV_MESSAGE_PAYLOAD_BYTE_COUNT (240) -#define HV_MESSAGE_PAYLOAD_QWORD_COUNT (30) - -/* - * Define hypervisor message types. Some of the message types - * are x86/x64 specific, but there's no good way to separate - * them out into the arch-specific version of hyperv-tlfs.h - * because C doesn't provide a way to extend enum types. - * Keeping them all in the arch neutral hyperv-tlfs.h seems - * the least messy compromise. - */ -enum hv_message_type { - HVMSG_NONE = 0x00000000, - - /* Memory access messages. */ - HVMSG_UNMAPPED_GPA = 0x80000000, - HVMSG_GPA_INTERCEPT = 0x80000001, - - /* Timer notification messages. */ - HVMSG_TIMER_EXPIRED = 0x80000010, - - /* Error messages. */ - HVMSG_INVALID_VP_REGISTER_VALUE = 0x80000020, - HVMSG_UNRECOVERABLE_EXCEPTION = 0x80000021, - HVMSG_UNSUPPORTED_FEATURE = 0x80000022, - - /* Trace buffer complete messages. */ - HVMSG_EVENTLOG_BUFFERCOMPLETE = 0x80000040, - - /* Platform-specific processor intercept messages. */ - HVMSG_X64_IOPORT_INTERCEPT = 0x80010000, - HVMSG_X64_MSR_INTERCEPT = 0x80010001, - HVMSG_X64_CPUID_INTERCEPT = 0x80010002, - HVMSG_X64_EXCEPTION_INTERCEPT = 0x80010003, - HVMSG_X64_APIC_EOI = 0x80010004, - HVMSG_X64_LEGACY_FP_ERROR = 0x80010005 -}; - -/* Define synthetic interrupt controller message flags. */ -union hv_message_flags { - __u8 asu8; - struct { - __u8 msg_pending:1; - __u8 reserved:7; - } __packed; -}; - -/* Define port identifier type. */ -union hv_port_id { - __u32 asu32; - struct { - __u32 id:24; - __u32 reserved:8; - } __packed u; -}; - -/* Define synthetic interrupt controller message header. */ -struct hv_message_header { - __u32 message_type; - __u8 payload_size; - union hv_message_flags message_flags; - __u8 reserved[2]; - union { - __u64 sender; - union hv_port_id port; - }; -} __packed; - -/* Define synthetic interrupt controller message format. */ -struct hv_message { - struct hv_message_header header; - union { - __u64 payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT]; - } u; -} __packed; - -/* Define the synthetic interrupt message page layout. */ -struct hv_message_page { - struct hv_message sint_message[HV_SYNIC_SINT_COUNT]; -} __packed; - -/* Define timer message payload structure. */ -struct hv_timer_message_payload { - __u32 timer_index; - __u32 reserved; - __u64 expiration_time; /* When the timer expired */ - __u64 delivery_time; /* When the message was delivered */ -} __packed; - - -/* Define synthetic interrupt controller flag constants. */ -#define HV_EVENT_FLAGS_COUNT (256 * 8) -#define HV_EVENT_FLAGS_LONG_COUNT (256 / sizeof(unsigned long)) - -/* - * Synthetic timer configuration. - */ -union hv_stimer_config { - u64 as_uint64; - struct { - u64 enable:1; - u64 periodic:1; - u64 lazy:1; - u64 auto_enable:1; - u64 apic_vector:8; - u64 direct_mode:1; - u64 reserved_z0:3; - u64 sintx:4; - u64 reserved_z1:44; - } __packed; -}; - - -/* Define the synthetic interrupt controller event flags format. */ -union hv_synic_event_flags { - unsigned long flags[HV_EVENT_FLAGS_LONG_COUNT]; -}; - -/* Define SynIC control register. */ -union hv_synic_scontrol { - u64 as_uint64; - struct { - u64 enable:1; - u64 reserved:63; - } __packed; -}; - -/* Define synthetic interrupt source. */ -union hv_synic_sint { - u64 as_uint64; - struct { - u64 vector:8; - u64 reserved1:8; - u64 masked:1; - u64 auto_eoi:1; - u64 polling:1; - u64 reserved2:45; - } __packed; -}; - -/* Define the format of the SIMP register */ -union hv_synic_simp { - u64 as_uint64; - struct { - u64 simp_enabled:1; - u64 preserved:11; - u64 base_simp_gpa:52; - } __packed; -}; - -/* Define the format of the SIEFP register */ -union hv_synic_siefp { - u64 as_uint64; - struct { - u64 siefp_enabled:1; - u64 preserved:11; - u64 base_siefp_gpa:52; - } __packed; -}; - -struct hv_vpset { - u64 format; - u64 valid_bank_mask; - u64 bank_contents[]; -} __packed; - -/* The maximum number of sparse vCPU banks which can be encoded by 'struct hv_vpset' */ -#define HV_MAX_SPARSE_VCPU_BANKS (64) -/* The number of vCPUs in one sparse bank */ -#define HV_VCPUS_PER_SPARSE_BANK (64) - -/* HvCallSendSyntheticClusterIpi hypercall */ -struct hv_send_ipi { - u32 vector; - u32 reserved; - u64 cpu_mask; -} __packed; - -/* HvCallSendSyntheticClusterIpiEx hypercall */ -struct hv_send_ipi_ex { - u32 vector; - u32 reserved; - struct hv_vpset vp_set; -} __packed; - -/* HvFlushGuestPhysicalAddressSpace hypercalls */ -struct hv_guest_mapping_flush { - u64 address_space; - u64 flags; -} __packed; - -/* - * HV_MAX_FLUSH_PAGES = "additional_pages" + 1. It's limited - * by the bitwidth of "additional_pages" in union hv_gpa_page_range. - */ -#define HV_MAX_FLUSH_PAGES (2048) -#define HV_GPA_PAGE_RANGE_PAGE_SIZE_2MB 0 -#define HV_GPA_PAGE_RANGE_PAGE_SIZE_1GB 1 - -/* HvFlushGuestPhysicalAddressList, HvExtCallMemoryHeatHint hypercall */ -union hv_gpa_page_range { - u64 address_space; - struct { - u64 additional_pages:11; - u64 largepage:1; - u64 basepfn:52; - } page; - struct { - u64 reserved:12; - u64 page_size:1; - u64 reserved1:8; - u64 base_large_pfn:43; - }; -}; - -/* - * All input flush parameters should be in single page. The max flush - * count is equal with how many entries of union hv_gpa_page_range can - * be populated into the input parameter page. - */ -#define HV_MAX_FLUSH_REP_COUNT ((HV_HYP_PAGE_SIZE - 2 * sizeof(u64)) / \ - sizeof(union hv_gpa_page_range)) - -struct hv_guest_mapping_flush_list { - u64 address_space; - u64 flags; - union hv_gpa_page_range gpa_list[HV_MAX_FLUSH_REP_COUNT]; -}; - -/* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */ -struct hv_tlb_flush { - u64 address_space; - u64 flags; - u64 processor_mask; - u64 gva_list[]; -} __packed; - -/* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */ -struct hv_tlb_flush_ex { - u64 address_space; - u64 flags; - struct hv_vpset hv_vp_set; - u64 gva_list[]; -} __packed; - -/* HvGetPartitionId hypercall (output only) */ -struct hv_get_partition_id { - u64 partition_id; -} __packed; - -/* HvDepositMemory hypercall */ -struct hv_deposit_memory { - u64 partition_id; - u64 gpa_page_list[]; -} __packed; - -struct hv_proximity_domain_flags { - u32 proximity_preferred : 1; - u32 reserved : 30; - u32 proximity_info_valid : 1; -} __packed; - -struct hv_proximity_domain_info { - u32 domain_id; - struct hv_proximity_domain_flags flags; -} __packed; - -struct hv_lp_startup_status { - u64 hv_status; - u64 substatus1; - u64 substatus2; - u64 substatus3; - u64 substatus4; - u64 substatus5; - u64 substatus6; -} __packed; - -/* HvAddLogicalProcessor hypercall */ -struct hv_input_add_logical_processor { - u32 lp_index; - u32 apic_id; - struct hv_proximity_domain_info proximity_domain_info; -} __packed; - -struct hv_output_add_logical_processor { - struct hv_lp_startup_status startup_status; -} __packed; - -enum HV_SUBNODE_TYPE -{ - HvSubnodeAny = 0, - HvSubnodeSocket = 1, - HvSubnodeAmdNode = 2, - HvSubnodeL3 = 3, - HvSubnodeCount = 4, - HvSubnodeInvalid = -1 -}; - -/* HvCreateVp hypercall */ -struct hv_create_vp { - u64 partition_id; - u32 vp_index; - u8 padding[3]; - u8 subnode_type; - u64 subnode_id; - struct hv_proximity_domain_info proximity_domain_info; - u64 flags; -} __packed; - -enum hv_interrupt_source { - HV_INTERRUPT_SOURCE_MSI = 1, /* MSI and MSI-X */ - HV_INTERRUPT_SOURCE_IOAPIC, -}; - -union hv_ioapic_rte { - u64 as_uint64; - - struct { - u32 vector:8; - u32 delivery_mode:3; - u32 destination_mode:1; - u32 delivery_status:1; - u32 interrupt_polarity:1; - u32 remote_irr:1; - u32 trigger_mode:1; - u32 interrupt_mask:1; - u32 reserved1:15; - - u32 reserved2:24; - u32 destination_id:8; - }; - - struct { - u32 low_uint32; - u32 high_uint32; - }; -} __packed; - -struct hv_interrupt_entry { - u32 source; - u32 reserved1; - union { - union hv_msi_entry msi_entry; - union hv_ioapic_rte ioapic_rte; - }; -} __packed; - -/* - * flags for hv_device_interrupt_target.flags - */ -#define HV_DEVICE_INTERRUPT_TARGET_MULTICAST 1 -#define HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET 2 - -struct hv_device_interrupt_target { - u32 vector; - u32 flags; - union { - u64 vp_mask; - struct hv_vpset vp_set; - }; -} __packed; - -struct hv_retarget_device_interrupt { - u64 partition_id; /* use "self" */ - u64 device_id; - struct hv_interrupt_entry int_entry; - u64 reserved2; - struct hv_device_interrupt_target int_target; -} __packed __aligned(8); - -/* - * These Hyper-V registers provide information equivalent to the CPUID - * instruction on x86/x64. - */ -#define HV_REGISTER_HYPERVISOR_VERSION 0x00000100 /*CPUID 0x40000002 */ -#define HV_REGISTER_FEATURES 0x00000200 /*CPUID 0x40000003 */ -#define HV_REGISTER_ENLIGHTENMENTS 0x00000201 /*CPUID 0x40000004 */ - -/* - * Synthetic register definitions equivalent to MSRs on x86/x64 - */ -#define HV_REGISTER_GUEST_CRASH_P0 0x00000210 -#define HV_REGISTER_GUEST_CRASH_P1 0x00000211 -#define HV_REGISTER_GUEST_CRASH_P2 0x00000212 -#define HV_REGISTER_GUEST_CRASH_P3 0x00000213 -#define HV_REGISTER_GUEST_CRASH_P4 0x00000214 -#define HV_REGISTER_GUEST_CRASH_CTL 0x00000215 - -#define HV_REGISTER_GUEST_OS_ID 0x00090002 -#define HV_REGISTER_VP_INDEX 0x00090003 -#define HV_REGISTER_TIME_REF_COUNT 0x00090004 -#define HV_REGISTER_REFERENCE_TSC 0x00090017 - -#define HV_REGISTER_SINT0 0x000A0000 -#define HV_REGISTER_SCONTROL 0x000A0010 -#define HV_REGISTER_SIEFP 0x000A0012 -#define HV_REGISTER_SIMP 0x000A0013 -#define HV_REGISTER_EOM 0x000A0014 - -#define HV_REGISTER_STIMER0_CONFIG 0x000B0000 -#define HV_REGISTER_STIMER0_COUNT 0x000B0001 - -/* HvGetVpRegisters hypercall input with variable size reg name list*/ -struct hv_get_vp_registers_input { - struct { - u64 partitionid; - u32 vpindex; - u8 inputvtl; - u8 padding[3]; - } header; - struct input { - u32 name0; - u32 name1; - } element[]; -} __packed; - -/* HvGetVpRegisters returns an array of these output elements */ -struct hv_get_vp_registers_output { - union { - struct { - u32 a; - u32 b; - u32 c; - u32 d; - } as32 __packed; - struct { - u64 low; - u64 high; - } as64 __packed; - }; -}; - -/* HvSetVpRegisters hypercall with variable size reg name/value list*/ -struct hv_set_vp_registers_input { - struct { - u64 partitionid; - u32 vpindex; - u8 inputvtl; - u8 padding[3]; - } header; - struct { - u32 name; - u32 padding1; - u64 padding2; - u64 valuelow; - u64 valuehigh; - } element[]; -} __packed; - -enum hv_device_type { - HV_DEVICE_TYPE_LOGICAL = 0, - HV_DEVICE_TYPE_PCI = 1, - HV_DEVICE_TYPE_IOAPIC = 2, - HV_DEVICE_TYPE_ACPI = 3, -}; - -typedef u16 hv_pci_rid; -typedef u16 hv_pci_segment; -typedef u64 hv_logical_device_id; -union hv_pci_bdf { - u16 as_uint16; - - struct { - u8 function:3; - u8 device:5; - u8 bus; - }; -} __packed; - -union hv_pci_bus_range { - u16 as_uint16; - - struct { - u8 subordinate_bus; - u8 secondary_bus; - }; -} __packed; - -union hv_device_id { - u64 as_uint64; - - struct { - u64 reserved0:62; - u64 device_type:2; - }; - - /* HV_DEVICE_TYPE_LOGICAL */ - struct { - u64 id:62; - u64 device_type:2; - } logical; - - /* HV_DEVICE_TYPE_PCI */ - struct { - union { - hv_pci_rid rid; - union hv_pci_bdf bdf; - }; - - hv_pci_segment segment; - union hv_pci_bus_range shadow_bus_range; - - u16 phantom_function_bits:2; - u16 source_shadow:1; - - u16 rsvdz0:11; - u16 device_type:2; - } pci; - - /* HV_DEVICE_TYPE_IOAPIC */ - struct { - u8 ioapic_id; - u8 rsvdz0; - u16 rsvdz1; - u16 rsvdz2; - - u16 rsvdz3:14; - u16 device_type:2; - } ioapic; - - /* HV_DEVICE_TYPE_ACPI */ - struct { - u32 input_mapping_base; - u32 input_mapping_count:30; - u32 device_type:2; - } acpi; -} __packed; - -enum hv_interrupt_trigger_mode { - HV_INTERRUPT_TRIGGER_MODE_EDGE = 0, - HV_INTERRUPT_TRIGGER_MODE_LEVEL = 1, -}; - -struct hv_device_interrupt_descriptor { - u32 interrupt_type; - u32 trigger_mode; - u32 vector_count; - u32 reserved; - struct hv_device_interrupt_target target; -} __packed; - -struct hv_input_map_device_interrupt { - u64 partition_id; - u64 device_id; - u64 flags; - struct hv_interrupt_entry logical_interrupt_entry; - struct hv_device_interrupt_descriptor interrupt_descriptor; -} __packed; - -struct hv_output_map_device_interrupt { - struct hv_interrupt_entry interrupt_entry; -} __packed; - -struct hv_input_unmap_device_interrupt { - u64 partition_id; - u64 device_id; - struct hv_interrupt_entry interrupt_entry; -} __packed; - -#define HV_SOURCE_SHADOW_NONE 0x0 -#define HV_SOURCE_SHADOW_BRIDGE_BUS_RANGE 0x1 - -/* - * Version info reported by hypervisor - */ -union hv_hypervisor_version_info { - struct { - u32 build_number; - - u32 minor_version : 16; - u32 major_version : 16; - - u32 service_pack; - - u32 service_number : 24; - u32 service_branch : 8; - }; - struct { - u32 eax; - u32 ebx; - u32 ecx; - u32 edx; - }; -}; - -/* - * The whole argument should fit in a page to be able to pass to the hypervisor - * in one hypercall. - */ -#define HV_MEMORY_HINT_MAX_GPA_PAGE_RANGES \ - ((HV_HYP_PAGE_SIZE - sizeof(struct hv_memory_hint)) / \ - sizeof(union hv_gpa_page_range)) - -/* HvExtCallMemoryHeatHint hypercall */ -#define HV_EXT_MEMORY_HEAT_HINT_TYPE_COLD_DISCARD 2 -struct hv_memory_hint { - u64 type:2; - u64 reserved:62; - union hv_gpa_page_range ranges[]; -} __packed; - -/* Data structures for HVCALL_MMIO_READ and HVCALL_MMIO_WRITE */ -#define HV_HYPERCALL_MMIO_MAX_DATA_LENGTH 64 - -struct hv_mmio_read_input { - u64 gpa; - u32 size; - u32 reserved; -} __packed; - -struct hv_mmio_read_output { - u8 data[HV_HYPERCALL_MMIO_MAX_DATA_LENGTH]; -} __packed; - -struct hv_mmio_write_input { - u64 gpa; - u32 size; - u32 reserved; - u8 data[HV_HYPERCALL_MMIO_MAX_DATA_LENGTH]; -} __packed; - -/* Define connection identifier type. */ -union hv_connection_id { - u32 asu32; - struct { - u32 id:24; - u32 reserved:8; - } u; -}; - -#endif From a7ae41cd808557c1d4e21c4295578fffcba0eb34 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Wed, 2 Oct 2024 20:53:29 -0700 Subject: [PATCH 06/16] x86/hyperv: Don't assume cpu_possible_mask is dense Current code allocates the hv_vp_assist_page array with size num_possible_cpus(). This code assumes cpu_possible_mask is dense, which is not true in the general case per [1]. If cpu_possible_mask is sparse, the array might be indexed by a value beyond the size of the array. However, the configurations that Hyper-V provides to guest VMs on x86 hardware, in combination with how x86 code assigns Linux CPU numbers, *does* always produce a dense cpu_possible_mask. So the dense assumption is not currently causing failures. But for robustness against future changes in how cpu_possible_mask is populated, update the code to no longer assume dense. The correct approach is to allocate the array with size "nr_cpu_ids". While this leaves unused array entries corresponding to holes in cpu_possible_mask, the holes are assumed to be minimal and hence the amount of memory wasted by unused entries is minimal. [1] https://lore.kernel.org/lkml/SN6PR02MB4157210CC36B2593F8572E5ED4692@SN6PR02MB4157.namprd02.prod.outlook.com/ Signed-off-by: Michael Kelley Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20241003035333.49261-2-mhklinux@outlook.com Signed-off-by: Wei Liu Message-ID: <20241003035333.49261-2-mhklinux@outlook.com> --- arch/x86/hyperv/hv_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index ba469d6b82506..f82d1aefaa8a9 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -472,7 +472,7 @@ void __init hyperv_init(void) if (hv_isolation_type_tdx()) hv_vp_assist_page = NULL; else - hv_vp_assist_page = kcalloc(num_possible_cpus(), + hv_vp_assist_page = kcalloc(nr_cpu_ids, sizeof(*hv_vp_assist_page), GFP_KERNEL); if (!hv_vp_assist_page) { From 16b18fdf6bc7292ae0edbf33d2d693af3240e49d Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Wed, 2 Oct 2024 20:53:30 -0700 Subject: [PATCH 07/16] Drivers: hv: Don't assume cpu_possible_mask is dense Current code allocates the hv_vp_index array with size num_possible_cpus(). This code assumes cpu_possible_mask is dense, which is not true in the general case per [1]. If cpu_possible_mask is sparse, the array might be indexed by a value beyond the size of the array. However, the configurations that Hyper-V provides to guest VMs on x86 and ARM64 hardware, in combination with how architecture specific code assigns Linux CPU numbers, *does* always produce a dense cpu_possible_mask. So the dense assumption is not currently causing failures. But for robustness against future changes in how cpu_possible_mask is populated, update the code to no longer assume dense. The correct approach is to allocate and initialize the array using size "nr_cpu_ids". While this leaves unused array entries corresponding to holes in cpu_possible_mask, the holes are assumed to be minimal and hence the amount of memory wasted by unused entries is minimal. Using nr_cpu_ids also reduces initialization time, in that the loop to initialize the array currently rescans cpu_possible_mask on each iteration. This is n-squared in the number of CPUs, which could be significant for large CPU counts. [1] https://lore.kernel.org/lkml/SN6PR02MB4157210CC36B2593F8572E5ED4692@SN6PR02MB4157.namprd02.prod.outlook.com/ Signed-off-by: Michael Kelley Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20241003035333.49261-3-mhklinux@outlook.com Signed-off-by: Wei Liu Message-ID: <20241003035333.49261-3-mhklinux@outlook.com> --- drivers/hv/hv_common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c index c4fd07d9bf1ae..c6ed3ba4bf61e 100644 --- a/drivers/hv/hv_common.c +++ b/drivers/hv/hv_common.c @@ -345,14 +345,14 @@ int __init hv_common_init(void) BUG_ON(!hyperv_pcpu_output_arg); } - hv_vp_index = kmalloc_array(num_possible_cpus(), sizeof(*hv_vp_index), + hv_vp_index = kmalloc_array(nr_cpu_ids, sizeof(*hv_vp_index), GFP_KERNEL); if (!hv_vp_index) { hv_common_free(); return -ENOMEM; } - for (i = 0; i < num_possible_cpus(); i++) + for (i = 0; i < nr_cpu_ids; i++) hv_vp_index[i] = VP_INVAL; return 0; From 4f6b64f3d3d96fb3796614362c64a4b73ddf3f7a Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Wed, 2 Oct 2024 20:53:31 -0700 Subject: [PATCH 08/16] iommu/hyper-v: Don't assume cpu_possible_mask is dense Current code gets the APIC IDs for CPUs numbered 255 and lower. This code assumes cpu_possible_mask is dense, which is not true in the general case per [1]. If cpu_possible_mask contains holes, num_possible_cpus() is less than nr_cpu_ids, so some CPUs might get skipped. Furthermore, getting the APIC ID of a CPU that isn't in cpu_possible_mask is invalid. However, the configurations that Hyper-V provides to guest VMs on x86 hardware, in combination with how x86 code assigns Linux CPU numbers, *does* always produce a dense cpu_possible_mask. So the dense assumption is not currently causing failures. But for robustness against future changes in how cpu_possible_mask is populated, update the code to no longer assume dense. The correct approach is to determine the range to scan based on nr_cpu_ids, and skip any CPUs that are not in the cpu_possible_mask. [1] https://lore.kernel.org/lkml/SN6PR02MB4157210CC36B2593F8572E5ED4692@SN6PR02MB4157.namprd02.prod.outlook.com/ Signed-off-by: Michael Kelley Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20241003035333.49261-4-mhklinux@outlook.com Signed-off-by: Wei Liu Message-ID: <20241003035333.49261-4-mhklinux@outlook.com> --- drivers/iommu/hyperv-iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/hyperv-iommu.c b/drivers/iommu/hyperv-iommu.c index 8a5c17b973100..2a86aa5d54c68 100644 --- a/drivers/iommu/hyperv-iommu.c +++ b/drivers/iommu/hyperv-iommu.c @@ -164,8 +164,8 @@ static int __init hyperv_prepare_irq_remapping(void) * max cpu affinity for IOAPIC irqs. Scan cpu 0-255 and set cpu * into ioapic_max_cpumask if its APIC ID is less than 256. */ - for (i = min_t(unsigned int, num_possible_cpus() - 1, 255); i >= 0; i--) - if (cpu_physical_id(i) < 256) + for (i = min_t(unsigned int, nr_cpu_ids - 1, 255); i >= 0; i--) + if (cpu_possible(i) && cpu_physical_id(i) < 256) cpumask_set_cpu(i, &ioapic_max_cpumask); return 0; From 5fa1da972fcf503df4fa188a673cd5d09b60b090 Mon Sep 17 00:00:00 2001 From: Naman Jain Date: Thu, 2 Jan 2025 14:52:43 +0000 Subject: [PATCH 09/16] uio_hv_generic: Add a check for HV_NIC for send, receive buffers setup Receive and send buffer allocation was originally introduced to support DPDK's networking use case. These buffer sizes were further increased to meet DPDK performance requirements. However, these large buffers are unnecessary for any other UIO use cases. Restrict the allocation of receive and send buffers only for HV_NIC device type, saving 47 MB of memory per device. While at it, fix some of the syntax related issues in the touched code which are reported by "--strict" option of checkpatch. Signed-off-by: Naman Jain Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20250102145243.2088-1-namjain@linux.microsoft.com Signed-off-by: Wei Liu Message-ID: <20250102145243.2088-1-namjain@linux.microsoft.com> --- drivers/uio/uio_hv_generic.c | 86 ++++++++++++++++++------------------ 1 file changed, 43 insertions(+), 43 deletions(-) diff --git a/drivers/uio/uio_hv_generic.c b/drivers/uio/uio_hv_generic.c index 3976360d0096d..1b19b56474950 100644 --- a/drivers/uio/uio_hv_generic.c +++ b/drivers/uio/uio_hv_generic.c @@ -296,51 +296,51 @@ hv_uio_probe(struct hv_device *dev, pdata->info.mem[MON_PAGE_MAP].size = PAGE_SIZE; pdata->info.mem[MON_PAGE_MAP].memtype = UIO_MEM_LOGICAL; - pdata->recv_buf = vzalloc(RECV_BUFFER_SIZE); - if (pdata->recv_buf == NULL) { - ret = -ENOMEM; - goto fail_free_ring; + if (channel->device_id == HV_NIC) { + pdata->recv_buf = vzalloc(RECV_BUFFER_SIZE); + if (!pdata->recv_buf) { + ret = -ENOMEM; + goto fail_free_ring; + } + + ret = vmbus_establish_gpadl(channel, pdata->recv_buf, + RECV_BUFFER_SIZE, &pdata->recv_gpadl); + if (ret) { + if (!pdata->recv_gpadl.decrypted) + vfree(pdata->recv_buf); + goto fail_close; + } + + /* put Global Physical Address Label in name */ + snprintf(pdata->recv_name, sizeof(pdata->recv_name), + "recv:%u", pdata->recv_gpadl.gpadl_handle); + pdata->info.mem[RECV_BUF_MAP].name = pdata->recv_name; + pdata->info.mem[RECV_BUF_MAP].addr = (uintptr_t)pdata->recv_buf; + pdata->info.mem[RECV_BUF_MAP].size = RECV_BUFFER_SIZE; + pdata->info.mem[RECV_BUF_MAP].memtype = UIO_MEM_VIRTUAL; + + pdata->send_buf = vzalloc(SEND_BUFFER_SIZE); + if (!pdata->send_buf) { + ret = -ENOMEM; + goto fail_close; + } + + ret = vmbus_establish_gpadl(channel, pdata->send_buf, + SEND_BUFFER_SIZE, &pdata->send_gpadl); + if (ret) { + if (!pdata->send_gpadl.decrypted) + vfree(pdata->send_buf); + goto fail_close; + } + + snprintf(pdata->send_name, sizeof(pdata->send_name), + "send:%u", pdata->send_gpadl.gpadl_handle); + pdata->info.mem[SEND_BUF_MAP].name = pdata->send_name; + pdata->info.mem[SEND_BUF_MAP].addr = (uintptr_t)pdata->send_buf; + pdata->info.mem[SEND_BUF_MAP].size = SEND_BUFFER_SIZE; + pdata->info.mem[SEND_BUF_MAP].memtype = UIO_MEM_VIRTUAL; } - ret = vmbus_establish_gpadl(channel, pdata->recv_buf, - RECV_BUFFER_SIZE, &pdata->recv_gpadl); - if (ret) { - if (!pdata->recv_gpadl.decrypted) - vfree(pdata->recv_buf); - goto fail_close; - } - - /* put Global Physical Address Label in name */ - snprintf(pdata->recv_name, sizeof(pdata->recv_name), - "recv:%u", pdata->recv_gpadl.gpadl_handle); - pdata->info.mem[RECV_BUF_MAP].name = pdata->recv_name; - pdata->info.mem[RECV_BUF_MAP].addr - = (uintptr_t)pdata->recv_buf; - pdata->info.mem[RECV_BUF_MAP].size = RECV_BUFFER_SIZE; - pdata->info.mem[RECV_BUF_MAP].memtype = UIO_MEM_VIRTUAL; - - pdata->send_buf = vzalloc(SEND_BUFFER_SIZE); - if (pdata->send_buf == NULL) { - ret = -ENOMEM; - goto fail_close; - } - - ret = vmbus_establish_gpadl(channel, pdata->send_buf, - SEND_BUFFER_SIZE, &pdata->send_gpadl); - if (ret) { - if (!pdata->send_gpadl.decrypted) - vfree(pdata->send_buf); - goto fail_close; - } - - snprintf(pdata->send_name, sizeof(pdata->send_name), - "send:%u", pdata->send_gpadl.gpadl_handle); - pdata->info.mem[SEND_BUF_MAP].name = pdata->send_name; - pdata->info.mem[SEND_BUF_MAP].addr - = (uintptr_t)pdata->send_buf; - pdata->info.mem[SEND_BUF_MAP].size = SEND_BUFFER_SIZE; - pdata->info.mem[SEND_BUF_MAP].memtype = UIO_MEM_VIRTUAL; - pdata->info.priv = pdata; pdata->device = dev; From 113386ca981c3997db6b83272c7ecf47456aeddb Mon Sep 17 00:00:00 2001 From: Naman Jain Date: Thu, 2 Jan 2025 13:07:10 +0000 Subject: [PATCH 10/16] Drivers: hv: vmbus: Wait for boot-time offers during boot and resume Channel offers are requested during VMBus initialization and resume from hibernation. Add support to wait for all boot-time channel offers to be delivered and processed before returning from vmbus_request_offers. This is in analogy to a PCI bus not returning from probe until it has scanned all devices on the bus. Without this, user mode can race with VMBus initialization and miss channel offers. User mode has no way to work around this other than sleeping for a while, since there is no way to know when VMBus has finished processing boot-time offers. With this added functionality, remove earlier logic which keeps track of count of offered channels post resume from hibernation. Once all offers delivered message is received, no further boot-time offers are going to be received. Consequently, logic to prevent suspend from happening after previous resume had missing offers, is also removed. Co-developed-by: John Starks Signed-off-by: John Starks Signed-off-by: Naman Jain Reviewed-by: Easwar Hariharan Reviewed-by: Saurabh Sengar Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20250102130712.1661-2-namjain@linux.microsoft.com Signed-off-by: Wei Liu Message-ID: <20250102130712.1661-2-namjain@linux.microsoft.com> --- drivers/hv/channel_mgmt.c | 61 +++++++++++++++++++++++++++++---------- drivers/hv/connection.c | 4 +-- drivers/hv/hyperv_vmbus.h | 14 ++------- drivers/hv/vmbus_drv.c | 16 ---------- 4 files changed, 51 insertions(+), 44 deletions(-) diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 3c6011a48dabe..6e084c2074141 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -944,16 +944,6 @@ void vmbus_initiate_unload(bool crash) vmbus_wait_for_unload(); } -static void check_ready_for_resume_event(void) -{ - /* - * If all the old primary channels have been fixed up, then it's safe - * to resume. - */ - if (atomic_dec_and_test(&vmbus_connection.nr_chan_fixup_on_resume)) - complete(&vmbus_connection.ready_for_resume_event); -} - static void vmbus_setup_channel_state(struct vmbus_channel *channel, struct vmbus_channel_offer_channel *offer) { @@ -1109,8 +1099,6 @@ static void vmbus_onoffer(struct vmbus_channel_message_header *hdr) /* Add the channel back to the array of channels. */ vmbus_channel_map_relid(oldchannel); - check_ready_for_resume_event(); - mutex_unlock(&vmbus_connection.channel_mutex); return; } @@ -1296,13 +1284,28 @@ EXPORT_SYMBOL_GPL(vmbus_hvsock_device_unregister); /* * vmbus_onoffers_delivered - - * This is invoked when all offers have been delivered. + * The CHANNELMSG_ALLOFFERS_DELIVERED message arrives after all + * boot-time offers are delivered. A boot-time offer is for the primary + * channel for any virtual hardware configured in the VM at the time it boots. + * Boot-time offers include offers for physical devices assigned to the VM + * via Hyper-V's Discrete Device Assignment (DDA) functionality that are + * handled as virtual PCI devices in Linux (e.g., NVMe devices and GPUs). + * Boot-time offers do not include offers for VMBus sub-channels. Because + * devices can be hot-added to the VM after it is booted, additional channel + * offers that aren't boot-time offers can be received at any time after the + * all-offers-delivered message. * - * Nothing to do here. + * SR-IOV NIC Virtual Functions (VFs) assigned to a VM are not considered + * to be assigned to the VM at boot-time, and offers for VFs may occur after + * the all-offers-delivered message. VFs are optional accelerators to the + * synthetic VMBus NIC and are effectively hot-added only after the VMBus + * NIC channel is opened (once it knows the guest can support it, via the + * sriov bit in the netvsc protocol). */ static void vmbus_onoffers_delivered( struct vmbus_channel_message_header *hdr) { + complete(&vmbus_connection.all_offers_delivered_event); } /* @@ -1578,7 +1581,8 @@ void vmbus_onmessage(struct vmbus_channel_message_header *hdr) } /* - * vmbus_request_offers - Send a request to get all our pending offers. + * vmbus_request_offers - Send a request to get all our pending offers + * and wait for all boot-time offers to arrive. */ int vmbus_request_offers(void) { @@ -1596,6 +1600,10 @@ int vmbus_request_offers(void) msg->msgtype = CHANNELMSG_REQUESTOFFERS; + /* + * This REQUESTOFFERS message will result in the host sending an all + * offers delivered message after all the boot-time offers are sent. + */ ret = vmbus_post_msg(msg, sizeof(struct vmbus_channel_message_header), true); @@ -1607,6 +1615,29 @@ int vmbus_request_offers(void) goto cleanup; } + /* + * Wait for the host to send all boot-time offers. + * Keeping it as a best-effort mechanism, where a warning is + * printed if a timeout occurs, and execution is resumed. + */ + if (!wait_for_completion_timeout(&vmbus_connection.all_offers_delivered_event, + secs_to_jiffies(60))) { + pr_warn("timed out waiting for all boot-time offers to be delivered.\n"); + } + + /* + * Flush handling of offer messages (which may initiate work on + * other work queues). + */ + flush_workqueue(vmbus_connection.work_queue); + + /* + * Flush workqueue for processing the incoming offers. Subchannel + * offers and their processing can happen later, so there is no need to + * flush that workqueue here. + */ + flush_workqueue(vmbus_connection.handle_primary_chan_wq); + cleanup: kfree(msginfo); diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c index f001ae880e1db..8351360bba161 100644 --- a/drivers/hv/connection.c +++ b/drivers/hv/connection.c @@ -34,8 +34,8 @@ struct vmbus_connection vmbus_connection = { .ready_for_suspend_event = COMPLETION_INITIALIZER( vmbus_connection.ready_for_suspend_event), - .ready_for_resume_event = COMPLETION_INITIALIZER( - vmbus_connection.ready_for_resume_event), + .all_offers_delivered_event = COMPLETION_INITIALIZER( + vmbus_connection.all_offers_delivered_event), }; EXPORT_SYMBOL_GPL(vmbus_connection); diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index fad31e30cd532..29780f3a74784 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -287,18 +287,10 @@ struct vmbus_connection { struct completion ready_for_suspend_event; /* - * The number of primary channels that should be "fixed up" - * upon resume: these channels are re-offered upon resume, and some - * fields of the channel offers (i.e. child_relid and connection_id) - * can change, so the old offermsg must be fixed up, before the resume - * callbacks of the VSC drivers start to further touch the channels. + * Completed once the host has offered all boot-time channels. + * Note that some channels may still be under process on a workqueue. */ - atomic_t nr_chan_fixup_on_resume; - /* - * vmbus_bus_resume() waits for "nr_chan_fixup_on_resume" to - * drop to zero. - */ - struct completion ready_for_resume_event; + struct completion all_offers_delivered_event; }; diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 2892b8da20a5e..bf5608a740561 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -2427,11 +2427,6 @@ static int vmbus_bus_suspend(struct device *dev) if (atomic_read(&vmbus_connection.nr_chan_close_on_suspend) > 0) wait_for_completion(&vmbus_connection.ready_for_suspend_event); - if (atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) != 0) { - pr_err("Can not suspend due to a previous failed resuming\n"); - return -EBUSY; - } - mutex_lock(&vmbus_connection.channel_mutex); list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { @@ -2456,17 +2451,12 @@ static int vmbus_bus_suspend(struct device *dev) pr_err("Sub-channel not deleted!\n"); WARN_ON_ONCE(1); } - - atomic_inc(&vmbus_connection.nr_chan_fixup_on_resume); } mutex_unlock(&vmbus_connection.channel_mutex); vmbus_initiate_unload(false); - /* Reset the event for the next resume. */ - reinit_completion(&vmbus_connection.ready_for_resume_event); - return 0; } @@ -2502,14 +2492,8 @@ static int vmbus_bus_resume(struct device *dev) if (ret != 0) return ret; - WARN_ON(atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) == 0); - vmbus_request_offers(); - if (wait_for_completion_timeout( - &vmbus_connection.ready_for_resume_event, secs_to_jiffies(10)) == 0) - pr_err("Some vmbus device is missing after suspending?\n"); - /* Reset the event for the next suspend. */ reinit_completion(&vmbus_connection.ready_for_suspend_event); From fcf5203e289ca0ef75a18ce74a9eb716f7f1f569 Mon Sep 17 00:00:00 2001 From: John Starks Date: Thu, 2 Jan 2025 13:07:11 +0000 Subject: [PATCH 11/16] Drivers: hv: vmbus: Log on missing offers if any When resuming from hibernation, log any channels that were present before hibernation but now are gone. In general, the boot-time devices configured for a resuming VM should be the same as the devices in the VM at the time of hibernation. It's uncommon for the configuration to have been changed such that offers are missing. Changing the configuration violates the rules for hibernation anyway. The cleanup of missing channels is not straight-forward and dependent on individual device driver functionality and implementation, so it can be added in future with separate changes. Signed-off-by: John Starks Co-developed-by: Naman Jain Signed-off-by: Naman Jain Reviewed-by: Easwar Hariharan Reviewed-by: Saurabh Sengar Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20250102130712.1661-3-namjain@linux.microsoft.com Signed-off-by: Wei Liu Message-ID: <20250102130712.1661-3-namjain@linux.microsoft.com> --- drivers/hv/vmbus_drv.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index bf5608a740561..0f6cd44fff292 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -2462,6 +2462,7 @@ static int vmbus_bus_suspend(struct device *dev) static int vmbus_bus_resume(struct device *dev) { + struct vmbus_channel *channel; struct vmbus_channel_msginfo *msginfo; size_t msgsize; int ret; @@ -2494,6 +2495,22 @@ static int vmbus_bus_resume(struct device *dev) vmbus_request_offers(); + mutex_lock(&vmbus_connection.channel_mutex); + list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { + if (channel->offermsg.child_relid != INVALID_RELID) + continue; + + /* hvsock channels are not expected to be present. */ + if (is_hvsock_channel(channel)) + continue; + + pr_err("channel %pUl/%pUl not present after resume.\n", + &channel->offermsg.offer.if_type, + &channel->offermsg.offer.if_instance); + /* ToDo: Cleanup these channels here */ + } + mutex_unlock(&vmbus_connection.channel_mutex); + /* Reset the event for the next suspend. */ reinit_completion(&vmbus_connection.ready_for_suspend_event); From 1da602ec36a3e208c070ec23895e84cbb621a12e Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Tue, 7 Jan 2025 10:09:18 -0800 Subject: [PATCH 12/16] hv_balloon: Fallback to generic_online_page() for non-HV hot added mem The Hyper-V balloon driver installs a custom callback for handling page onlining operations performed by the memory hotplug subsystem. This custom callback is global, and overrides the default callback (generic_online_page) that Linux otherwise uses. The custom callback properly handles memory that is hot-added by the balloon driver as part of a Hyper-V hot-add region. But memory can also be hot-added directly by a device driver for a vPCI device, particularly GPUs. In such a case, the custom callback installed by the balloon driver runs, but won't find the page in its hot-add region list and doesn't online it, which could cause driver initialization failures. Fix this by having the balloon custom callback run generic_online_page() when the page isn't part of a Hyper-V hot-add region, thereby doing the default Linux behavior. This allows device driver hot-adds to work properly. Similar cases are handled the same way in the virtio-mem driver. Suggested-by: Vikram Sethi Tested-by: Michael Frohlich Reviewed-by: Michael Kelley Signed-off-by: Jacob Pan Link: https://lore.kernel.org/r/20250107180918.1053933-1-jacob.pan@linux.microsoft.com Signed-off-by: Wei Liu Message-ID: <20250107180918.1053933-1-jacob.pan@linux.microsoft.com> --- drivers/hv/hv_balloon.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index 871b73ca3a0fb..fec2f18679e3f 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -766,16 +766,18 @@ static void hv_online_page(struct page *pg, unsigned int order) struct hv_hotadd_state *has; unsigned long pfn = page_to_pfn(pg); - guard(spinlock_irqsave)(&dm_device.ha_lock); - list_for_each_entry(has, &dm_device.ha_region_list, list) { - /* The page belongs to a different HAS. */ - if (pfn < has->start_pfn || - (pfn + (1UL << order) > has->end_pfn)) - continue; + scoped_guard(spinlock_irqsave, &dm_device.ha_lock) { + list_for_each_entry(has, &dm_device.ha_region_list, list) { + /* The page belongs to a different HAS. */ + if (pfn < has->start_pfn || + (pfn + (1UL << order) > has->end_pfn)) + continue; - hv_bring_pgs_online(has, pfn, 1UL << order); - break; + hv_bring_pgs_online(has, pfn, 1UL << order); + return; + } } + generic_online_page(pg, order); } static int pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt) From 9263abc7fd5d753dbf4cd4bf994bcf9c8c999918 Mon Sep 17 00:00:00 2001 From: Roman Kisel Date: Wed, 8 Jan 2025 14:21:36 -0800 Subject: [PATCH 13/16] hyperv: Enable the hypercall output page for the VTL mode Due to the hypercall page not being allocated in the VTL mode, the code resorts to using a part of the input page. Allocate the hypercall output page in the VTL mode thus enabling it to use it for output and share code with dom0. Signed-off-by: Roman Kisel Reviewed-by: Nuno Das Neves Link: https://lore.kernel.org/r/20250108222138.1623703-4-romank@linux.microsoft.com Signed-off-by: Wei Liu Message-ID: <20250108222138.1623703-4-romank@linux.microsoft.com> --- drivers/hv/hv_common.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c index c6ed3ba4bf61e..af5d1dc451f67 100644 --- a/drivers/hv/hv_common.c +++ b/drivers/hv/hv_common.c @@ -278,6 +278,11 @@ static void hv_kmsg_dump_register(void) } } +static inline bool hv_output_page_exists(void) +{ + return hv_root_partition || IS_ENABLED(CONFIG_HYPERV_VTL_MODE); +} + int __init hv_common_init(void) { int i; @@ -340,7 +345,7 @@ int __init hv_common_init(void) BUG_ON(!hyperv_pcpu_input_arg); /* Allocate the per-CPU state for output arg for root */ - if (hv_root_partition) { + if (hv_output_page_exists()) { hyperv_pcpu_output_arg = alloc_percpu(void *); BUG_ON(!hyperv_pcpu_output_arg); } @@ -435,7 +440,7 @@ int hv_common_cpu_init(unsigned int cpu) void **inputarg, **outputarg; u64 msr_vp_index; gfp_t flags; - int pgcount = hv_root_partition ? 2 : 1; + const int pgcount = hv_output_page_exists() ? 2 : 1; void *mem; int ret; @@ -453,7 +458,7 @@ int hv_common_cpu_init(unsigned int cpu) if (!mem) return -ENOMEM; - if (hv_root_partition) { + if (hv_output_page_exists()) { outputarg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg); *outputarg = (char *)mem + HV_HYP_PAGE_SIZE; } From 07412e1f163de6567f5f4a2c8a44ae96a2a05422 Mon Sep 17 00:00:00 2001 From: Roman Kisel Date: Wed, 8 Jan 2025 14:21:37 -0800 Subject: [PATCH 14/16] hyperv: Do not overlap the hvcall IO areas in get_vtl() The Top-Level Functional Specification for Hyper-V, Section 3.6 [1, 2], disallows overlapping of the input and output hypercall areas, and get_vtl(void) does overlap them. Use the output hypercall page of the current vCPU for the hypercall. [1] https://learn.microsoft.com/en-us/virtualization/hyper-v-on-windows/tlfs/hypercall-interface [2] https://github.com/MicrosoftDocs/Virtualization-Documentation/tree/main/tlfs Fixes: 8387ce06d70b ("x86/hyperv: Set Virtual Trust Level in VMBus init message") Signed-off-by: Roman Kisel Reviewed-by: Tianyu Lan Reviewed-by: Easwar Hariharan Link: https://lore.kernel.org/r/20250108222138.1623703-5-romank@linux.microsoft.com Signed-off-by: Wei Liu Message-ID: <20250108222138.1623703-5-romank@linux.microsoft.com> --- arch/x86/hyperv/hv_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index f82d1aefaa8a9..173005e6a95da 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -422,7 +422,7 @@ static u8 __init get_vtl(void) local_irq_save(flags); input = *this_cpu_ptr(hyperv_pcpu_input_arg); - output = (struct hv_output_get_vp_registers *)input; + output = *this_cpu_ptr(hyperv_pcpu_output_arg); memset(input, 0, struct_size(input, names, 1)); input->partition_id = HV_PARTITION_ID_SELF; From f285d995743269aa9f893e5e9a1065604137c1f6 Mon Sep 17 00:00:00 2001 From: Roman Kisel Date: Wed, 8 Jan 2025 14:21:38 -0800 Subject: [PATCH 15/16] hyperv: Do not overlap the hvcall IO areas in hv_vtl_apicid_to_vp_id() The Top-Level Functional Specification for Hyper-V, Section 3.6 [1, 2], disallows overlapping of the input and output hypercall areas, and hv_vtl_apicid_to_vp_id() overlaps them. Use the output hypercall page of the current vCPU for the hypercall. [1] https://learn.microsoft.com/en-us/virtualization/hyper-v-on-windows/tlfs/hypercall-interface [2] https://github.com/MicrosoftDocs/Virtualization-Documentation/tree/main/tlfs Reported-by: Michael Kelley Closes: https://lore.kernel.org/lkml/SN6PR02MB4157B98CD34781CC87A9D921D40D2@SN6PR02MB4157.namprd02.prod.outlook.com/ Signed-off-by: Roman Kisel Reviewed-by: Easwar Hariharan Reviewed-by: Nuno Das Neves Link: https://lore.kernel.org/r/20250108222138.1623703-6-romank@linux.microsoft.com Signed-off-by: Wei Liu Message-ID: <20250108222138.1623703-6-romank@linux.microsoft.com> --- arch/x86/hyperv/hv_vtl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c index 04775346369c5..4e1b1e3b56584 100644 --- a/arch/x86/hyperv/hv_vtl.c +++ b/arch/x86/hyperv/hv_vtl.c @@ -189,7 +189,7 @@ static int hv_vtl_apicid_to_vp_id(u32 apic_id) input->partition_id = HV_PARTITION_ID_SELF; input->apic_ids[0] = apic_id; - output = (u32 *)input; + output = *this_cpu_ptr(hyperv_pcpu_output_arg); control = HV_HYPERCALL_REP_COMP_1 | HVCALL_GET_VP_ID_FROM_APIC_ID; status = hv_do_hypercall(control, input, output); From 2e03358be78b65d28b66e17aca9e0c8700b0df78 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Mon, 13 Jan 2025 06:56:45 -0800 Subject: [PATCH 16/16] Documentation: hyperv: Add overview of guest VM hibernation Add documentation on how hibernation works in a guest VM on Hyper-V. Describe how VMBus devices and the VMBus itself are hibernated and resumed, along with various limitations. Signed-off-by: Michael Kelley Link: https://lore.kernel.org/r/20250113145645.1320942-1-mhklinux@outlook.com Signed-off-by: Wei Liu Message-ID: <20250113145645.1320942-1-mhklinux@outlook.com> --- Documentation/virt/hyperv/hibernation.rst | 336 ++++++++++++++++++++++ Documentation/virt/hyperv/index.rst | 1 + 2 files changed, 337 insertions(+) create mode 100644 Documentation/virt/hyperv/hibernation.rst diff --git a/Documentation/virt/hyperv/hibernation.rst b/Documentation/virt/hyperv/hibernation.rst new file mode 100644 index 0000000000000..4ff27f4a317a3 --- /dev/null +++ b/Documentation/virt/hyperv/hibernation.rst @@ -0,0 +1,336 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Hibernating Guest VMs +===================== + +Background +---------- +Linux supports the ability to hibernate itself in order to save power. +Hibernation is sometimes called suspend-to-disk, as it writes a memory +image to disk and puts the hardware into the lowest possible power +state. Upon resume from hibernation, the hardware is restarted and the +memory image is restored from disk so that it can resume execution +where it left off. See the "Hibernation" section of +Documentation/admin-guide/pm/sleep-states.rst. + +Hibernation is usually done on devices with a single user, such as a +personal laptop. For example, the laptop goes into hibernation when +the cover is closed, and resumes when the cover is opened again. +Hibernation and resume happen on the same hardware, and Linux kernel +code orchestrating the hibernation steps assumes that the hardware +configuration is not changed while in the hibernated state. + +Hibernation can be initiated within Linux by writing "disk" to +/sys/power/state or by invoking the reboot system call with the +appropriate arguments. This functionality may be wrapped by user space +commands such "systemctl hibernate" that are run directly from a +command line or in response to events such as the laptop lid closing. + +Considerations for Guest VM Hibernation +--------------------------------------- +Linux guests on Hyper-V can also be hibernated, in which case the +hardware is the virtual hardware provided by Hyper-V to the guest VM. +Only the targeted guest VM is hibernated, while other guest VMs and +the underlying Hyper-V host continue to run normally. While the +underlying Windows Hyper-V and physical hardware on which it is +running might also be hibernated using hibernation functionality in +the Windows host, host hibernation and its impact on guest VMs is not +in scope for this documentation. + +Resuming a hibernated guest VM can be more challenging than with +physical hardware because VMs make it very easy to change the hardware +configuration between the hibernation and resume. Even when the resume +is done on the same VM that hibernated, the memory size might be +changed, or virtual NICs or SCSI controllers might be added or +removed. Virtual PCI devices assigned to the VM might be added or +removed. Most such changes cause the resume steps to fail, though +adding a new virtual NIC, SCSI controller, or vPCI device should work. + +Additional complexity can ensue because the disks of the hibernated VM +can be moved to another newly created VM that otherwise has the same +virtual hardware configuration. While it is desirable for resume from +hibernation to succeed after such a move, there are challenges. See +details on this scenario and its limitations in the "Resuming on a +Different VM" section below. + +Hyper-V also provides ways to move a VM from one Hyper-V host to +another. Hyper-V tries to ensure processor model and Hyper-V version +compatibility using VM Configuration Versions, and prevents moves to +a host that isn't compatible. Linux adapts to host and processor +differences by detecting them at boot time, but such detection is not +done when resuming execution in the hibernation image. If a VM is +hibernated on one host, then resumed on a host with a different processor +model or Hyper-V version, settings recorded in the hibernation image +may not match the new host. Because Linux does not detect such +mismatches when resuming the hibernation image, undefined behavior +and failures could result. + + +Enabling Guest VM Hibernation +----------------------------- +Hibernation of a Hyper-V guest VM is disabled by default because +hibernation is incompatible with memory hot-add, as provided by the +Hyper-V balloon driver. If hot-add is used and the VM hibernates, it +hibernates with more memory than it started with. But when the VM +resumes from hibernation, Hyper-V gives the VM only the originally +assigned memory, and the memory size mismatch causes resume to fail. + +To enable a Hyper-V VM for hibernation, the Hyper-V administrator must +enable the ACPI virtual S4 sleep state in the ACPI configuration that +Hyper-V provides to the guest VM. Such enablement is accomplished by +modifying a WMI property of the VM, the steps for which are outside +the scope of this documentation but are available on the web. +Enablement is treated as the indicator that the administrator +prioritizes Linux hibernation in the VM over hot-add, so the Hyper-V +balloon driver in Linux disables hot-add. Enablement is indicated if +the contents of /sys/power/disk contains "platform" as an option. The +enablement is also visible in /sys/bus/vmbus/hibernation. See function +hv_is_hibernation_supported(). + +Linux supports ACPI sleep states on x86, but not on arm64. So Linux +guest VM hibernation is not available on Hyper-V for arm64. + +Initiating Guest VM Hibernation +------------------------------- +Guest VMs can self-initiate hibernation using the standard Linux +methods of writing "disk" to /sys/power/state or the reboot system +call. As an additional layer, Linux guests on Hyper-V support the +"Shutdown" integration service, via which a Hyper-V administrator can +tell a Linux VM to hibernate using a command outside the VM. The +command generates a request to the Hyper-V shutdown driver in Linux, +which sends the uevent "EVENT=hibernate". See kernel functions +shutdown_onchannelcallback() and send_hibernate_uevent(). A udev rule +must be provided in the VM that handles this event and initiates +hibernation. + +Handling VMBus Devices During Hibernation & Resume +-------------------------------------------------- +The VMBus bus driver, and the individual VMBus device drivers, +implement suspend and resume functions that are called as part of the +Linux orchestration of hibernation and of resuming from hibernation. +The overall approach is to leave in place the data structures for the +primary VMBus channels and their associated Linux devices, such as +SCSI controllers and others, so that they are captured in the +hibernation image. This approach allows any state associated with the +device to be persisted across the hibernation/resume. When the VM +resumes, the devices are re-offered by Hyper-V and are connected to +the data structures that already exist in the resumed hibernation +image. + +VMBus devices are identified by class and instance GUID. (See section +"VMBus device creation/deletion" in +Documentation/virt/hyperv/vmbus.rst.) Upon resume from hibernation, +the resume functions expect that the devices offered by Hyper-V have +the same class/instance GUIDs as the devices present at the time of +hibernation. Having the same class/instance GUIDs allows the offered +devices to be matched to the primary VMBus channel data structures in +the memory of the now resumed hibernation image. If any devices are +offered that don't match primary VMBus channel data structures that +already exist, they are processed normally as newly added devices. If +primary VMBus channels that exist in the resumed hibernation image are +not matched with a device offered in the resumed VM, the resume +sequence waits for 10 seconds, then proceeds. But the unmatched device +is likely to cause errors in the resumed VM. + +When resuming existing primary VMBus channels, the newly offered +relids might be different because relids can change on each VM boot, +even if the VM configuration hasn't changed. The VMBus bus driver +resume function matches the class/instance GUIDs, and updates the +relids in case they have changed. + +VMBus sub-channels are not persisted in the hibernation image. Each +VMBus device driver's suspend function must close any sub-channels +prior to hibernation. Closing a sub-channel causes Hyper-V to send a +RESCIND_CHANNELOFFER message, which Linux processes by freeing the +channel data structures so that all vestiges of the sub-channel are +removed. By contrast, primary channels are marked closed and their +ring buffers are freed, but Hyper-V does not send a rescind message, +so the channel data structure continues to exist. Upon resume, the +device driver's resume function re-allocates the ring buffer and +re-opens the existing channel. It then communicates with Hyper-V to +re-open sub-channels from scratch. + +The Linux ends of Hyper-V sockets are forced closed at the time of +hibernation. The guest can't force closing the host end of the socket, +but any host-side actions on the host end will produce an error. + +VMBus devices use the same suspend function for the "freeze" and the +"poweroff" phases, and the same resume function for the "thaw" and +"restore" phases. See the "Entering Hibernation" section of +Documentation/driver-api/pm/devices.rst for the sequencing of the +phases. + +Detailed Hibernation Sequence +----------------------------- +1. The Linux power management (PM) subsystem prepares for + hibernation by freezing user space processes and allocating + memory to hold the hibernation image. +2. As part of the "freeze" phase, Linux PM calls the "suspend" + function for each VMBus device in turn. As described above, this + function removes sub-channels, and leaves the primary channel in + a closed state. +3. Linux PM calls the "suspend" function for the VMBus bus, which + closes any Hyper-V socket channels and unloads the top-level + VMBus connection with the Hyper-V host. +4. Linux PM disables non-boot CPUs, creates the hibernation image in + the previously allocated memory, then re-enables non-boot CPUs. + The hibernation image contains the memory data structures for the + closed primary channels, but no sub-channels. +5. As part of the "thaw" phase, Linux PM calls the "resume" function + for the VMBus bus, which re-establishes the top-level VMBus + connection and requests that Hyper-V re-offer the VMBus devices. + As offers are received for the primary channels, the relids are + updated as previously described. +6. Linux PM calls the "resume" function for each VMBus device. Each + device re-opens its primary channel, and communicates with Hyper-V + to re-establish sub-channels if appropriate. The sub-channels + are re-created as new channels since they were previously removed + entirely in Step 2. +7. With VMBus devices now working again, Linux PM writes the + hibernation image from memory to disk. +8. Linux PM repeats Steps 2 and 3 above as part of the "poweroff" + phase. VMBus channels are closed and the top-level VMBus + connection is unloaded. +9. Linux PM disables non-boot CPUs, and then enters ACPI sleep state + S4. Hibernation is now complete. + +Detailed Resume Sequence +------------------------ +1. The guest VM boots into a fresh Linux OS instance. During boot, + the top-level VMBus connection is established, and synthetic + devices are enabled. This happens via the normal paths that don't + involve hibernation. +2. Linux PM hibernation code reads swap space is to find and read + the hibernation image into memory. If there is no hibernation + image, then this boot becomes a normal boot. +3. If this is a resume from hibernation, the "freeze" phase is used + to shutdown VMBus devices and unload the top-level VMBus + connection in the running fresh OS instance, just like Steps 2 + and 3 in the hibernation sequence. +4. Linux PM disables non-boot CPUs, and transfers control to the + read-in hibernation image. In the now-running hibernation image, + non-boot CPUs are restarted. +5. As part of the "resume" phase, Linux PM repeats Steps 5 and 6 + from the hibernation sequence. The top-level VMBus connection is + re-established, and offers are received and matched to primary + channels in the image. Relids are updated. VMBus device resume + functions re-open primary channels and re-create sub-channels. +6. Linux PM exits the hibernation resume sequence and the VM is now + running normally from the hibernation image. + +Key-Value Pair (KVP) Pseudo-Device Anomalies +-------------------------------------------- +The VMBus KVP device behaves differently from other pseudo-devices +offered by Hyper-V. When the KVP primary channel is closed, Hyper-V +sends a rescind message, which causes all vestiges of the device to be +removed. But Hyper-V then re-offers the device, causing it to be newly +re-created. The removal and re-creation occurs during the "freeze" +phase of hibernation, so the hibernation image contains the re-created +KVP device. Similar behavior occurs during the "freeze" phase of the +resume sequence while still in the fresh OS instance. But in both +cases, the top-level VMBus connection is subsequently unloaded, which +causes the device to be discarded on the Hyper-V side. So no harm is +done and everything still works. + +Virtual PCI devices +------------------- +Virtual PCI devices are physical PCI devices that are mapped directly +into the VM's physical address space so the VM can interact directly +with the hardware. vPCI devices include those accessed via what Hyper-V +calls "Discrete Device Assignment" (DDA), as well as SR-IOV NIC +Virtual Functions (VF) devices. See Documentation/virt/hyperv/vpci.rst. + +Hyper-V DDA devices are offered to guest VMs after the top-level VMBus +connection is established, just like VMBus synthetic devices. They are +statically assigned to the VM, and their instance GUIDs don't change +unless the Hyper-V administrator makes changes to the configuration. +DDA devices are represented in Linux as virtual PCI devices that have +a VMBus identity as well as a PCI identity. Consequently, Linux guest +hibernation first handles DDA devices as VMBus devices in order to +manage the VMBus channel. But then they are also handled as PCI +devices using the hibernation functions implemented by their native +PCI driver. + +SR-IOV NIC VFs also have a VMBus identity as well as a PCI +identity, and overall are processed similarly to DDA devices. A +difference is that VFs are not offered to the VM during initial boot +of the VM. Instead, the VMBus synthetic NIC driver first starts +operating and communicates to Hyper-V that it is prepared to accept a +VF, and then the VF offer is made. However, the VMBus connection +might later be unloaded and then re-established without the VM being +rebooted, as happens in Steps 3 and 5 in the Detailed Hibernation +Sequence above and in the Detailed Resume Sequence. In such a case, +the VFs likely became part of the VM during initial boot, so when the +VMBus connection is re-established, the VFs are offered on the +re-established connection without intervention by the synthetic NIC driver. + +UIO Devices +----------- +A VMBus device can be exposed to user space using the Hyper-V UIO +driver (uio_hv_generic.c) so that a user space driver can control and +operate the device. However, the VMBus UIO driver does not support the +suspend and resume operations needed for hibernation. If a VMBus +device is configured to use the UIO driver, hibernating the VM fails +and Linux continues to run normally. The most common use of the Hyper-V +UIO driver is for DPDK networking, but there are other uses as well. + +Resuming on a Different VM +-------------------------- +This scenario occurs in the Azure public cloud in that a hibernated +customer VM only exists as saved configuration and disks -- the VM no +longer exists on any Hyper-V host. When the customer VM is resumed, a +new Hyper-V VM with identical configuration is created, likely on a +different Hyper-V host. That new Hyper-V VM becomes the resumed +customer VM, and the steps the Linux kernel takes to resume from the +hibernation image must work in that new VM. + +While the disks and their contents are preserved from the original VM, +the Hyper-V-provided VMBus instance GUIDs of the disk controllers and +other synthetic devices would typically be different. The difference +would cause the resume from hibernation to fail, so several things are +done to solve this problem: + +* For VMBus synthetic devices that support only a single instance, + Hyper-V always assigns the same instance GUIDs. For example, the + Hyper-V mouse, the shutdown pseudo-device, the time sync pseudo + device, etc., always have the same instance GUID, both for local + Hyper-V installs as well as in the Azure cloud. + +* VMBus synthetic SCSI controllers may have multiple instances in a + VM, and in the general case instance GUIDs vary from VM to VM. + However, Azure VMs always have exactly two synthetic SCSI + controllers, and Azure code overrides the normal Hyper-V behavior + so these controllers are always assigned the same two instance + GUIDs. Consequently, when a customer VM is resumed on a newly + created VM, the instance GUIDs match. But this guarantee does not + hold for local Hyper-V installs. + +* Similarly, VMBus synthetic NICs may have multiple instances in a + VM, and the instance GUIDs vary from VM to VM. Again, Azure code + overrides the normal Hyper-V behavior so that the instance GUID + of a synthetic NIC in a customer VM does not change, even if the + customer VM is deallocated or hibernated, and then re-constituted + on a newly created VM. As with SCSI controllers, this behavior + does not hold for local Hyper-V installs. + +* vPCI devices do not have the same instance GUIDs when resuming + from hibernation on a newly created VM. Consequently, Azure does + not support hibernation for VMs that have DDA devices such as + NVMe controllers or GPUs. For SR-IOV NIC VFs, Azure removes the + VF from the VM before it hibernates so that the hibernation image + does not contain a VF device. When the VM is resumed it + instantiates a new VF, rather than trying to match against a VF + that is present in the hibernation image. Because Azure must + remove any VFs before initiating hibernation, Azure VM + hibernation must be initiated externally from the Azure Portal or + Azure CLI, which in turn uses the Shutdown integration service to + tell Linux to do the hibernation. If hibernation is self-initiated + within the Azure VM, VFs remain in the hibernation image, and are + not resumed properly. + +In summary, Azure takes special actions to remove VFs and to ensure +that VMBus device instance GUIDs match on a new/different VM, allowing +hibernation to work for most general-purpose Azure VMs sizes. While +similar special actions could be taken when resuming on a different VM +on a local Hyper-V install, orchestrating such actions is not provided +out-of-the-box by local Hyper-V and so requires custom scripting. diff --git a/Documentation/virt/hyperv/index.rst b/Documentation/virt/hyperv/index.rst index 79bc4080329ee..c84c40fd61c91 100644 --- a/Documentation/virt/hyperv/index.rst +++ b/Documentation/virt/hyperv/index.rst @@ -11,4 +11,5 @@ Hyper-V Enlightenments vmbus clocks vpci + hibernation coco