From 14388ae24544c4cc09056773c886839a2c8d256b Mon Sep 17 00:00:00 2001 From: Alexey Makhalov Date: Mon, 23 Mar 2020 19:57:03 +0000 Subject: [PATCH 1/5] x86/vmware: Make vmware_select_hypercall() __init vmware_select_hypercall() is used only by the __init functions, and should be annotated with __init as well. Signed-off-by: Alexey Makhalov Signed-off-by: Borislav Petkov Reviewed-by: Thomas Hellstrom Reviewed-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200323195707.31242-2-amakhalov@vmware.com --- arch/x86/kernel/cpu/vmware.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index 46d732696c1c8..d280560fd75e1 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -213,7 +213,7 @@ static void __init vmware_platform_setup(void) vmware_set_capabilities(); } -static u8 vmware_select_hypercall(void) +static u8 __init vmware_select_hypercall(void) { int eax, ebx, ecx, edx; From dd735f4707e603ac5b541b5f30de87c3c7bd60dd Mon Sep 17 00:00:00 2001 From: Alexey Makhalov Date: Mon, 23 Mar 2020 19:57:04 +0000 Subject: [PATCH 2/5] x86/vmware: Remove vmware_sched_clock_setup() Move cyc2ns setup logic to separate function. This separation will allow to use cyc2ns mult/shift pair not only for the sched_clock but also for other clocks such as steal_clock. Signed-off-by: Alexey Makhalov Signed-off-by: Borislav Petkov Reviewed-by: Thomas Hellstrom Reviewed-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200323195707.31242-3-amakhalov@vmware.com --- arch/x86/kernel/cpu/vmware.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index d280560fd75e1..efb22fa76ba41 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -122,7 +122,7 @@ static unsigned long long notrace vmware_sched_clock(void) return ns; } -static void __init vmware_sched_clock_setup(void) +static void __init vmware_cyc2ns_setup(void) { struct cyc2ns_data *d = &vmware_cyc2ns; unsigned long long tsc_now = rdtsc(); @@ -132,8 +132,7 @@ static void __init vmware_sched_clock_setup(void) d->cyc2ns_offset = mul_u64_u32_shr(tsc_now, d->cyc2ns_mul, d->cyc2ns_shift); - pv_ops.time.sched_clock = vmware_sched_clock; - pr_info("using sched offset of %llu ns\n", d->cyc2ns_offset); + pr_info("using clock offset of %llu ns\n", d->cyc2ns_offset); } static void __init vmware_paravirt_ops_setup(void) @@ -141,8 +140,14 @@ static void __init vmware_paravirt_ops_setup(void) pv_info.name = "VMware hypervisor"; pv_ops.cpu.io_delay = paravirt_nop; - if (vmware_tsc_khz && vmw_sched_clock) - vmware_sched_clock_setup(); + if (vmware_tsc_khz == 0) + return; + + vmware_cyc2ns_setup(); + + if (vmw_sched_clock) + pv_ops.time.sched_clock = vmware_sched_clock; + } #else #define vmware_paravirt_ops_setup() do {} while (0) From ab02bb3f55f58e7608a88188000c3353398ebe3b Mon Sep 17 00:00:00 2001 From: Alexey Makhalov Date: Mon, 23 Mar 2020 19:57:05 +0000 Subject: [PATCH 3/5] x86/vmware: Add steal time clock support for VMware guests Steal time is the amount of CPU time needed by a guest virtual machine that is not provided by the host. Steal time occurs when the host allocates this CPU time elsewhere, for example, to another guest. Steal time can be enabled by adding the VM configuration option stealclock.enable = "TRUE". It is supported by VMs that run hardware version 13 or newer. Introduce the VMware steal time infrastructure. The high level code (such as enabling, disabling and hot-plug routines) was derived from KVM. [ Tomer: use READ_ONCE macros and 32bit guests support. ] [ bp: Massage. ] Co-developed-by: Tomer Zeltzer Signed-off-by: Alexey Makhalov Signed-off-by: Tomer Zeltzer Signed-off-by: Borislav Petkov Reviewed-by: Thomas Hellstrom Reviewed-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200323195707.31242-4-amakhalov@vmware.com --- arch/x86/kernel/cpu/vmware.c | 197 +++++++++++++++++++++++++++++++++++ 1 file changed, 197 insertions(+) diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index efb22fa76ba41..cc604614f0aea 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -25,6 +25,8 @@ #include #include #include +#include +#include #include #include #include @@ -47,6 +49,11 @@ #define VMWARE_CMD_GETVCPU_INFO 68 #define VMWARE_CMD_LEGACY_X2APIC 3 #define VMWARE_CMD_VCPU_RESERVED 31 +#define VMWARE_CMD_STEALCLOCK 91 + +#define STEALCLOCK_NOT_AVAILABLE (-1) +#define STEALCLOCK_DISABLED 0 +#define STEALCLOCK_ENABLED 1 #define VMWARE_PORT(cmd, eax, ebx, ecx, edx) \ __asm__("inl (%%dx), %%eax" : \ @@ -86,6 +93,18 @@ } \ } while (0) +struct vmware_steal_time { + union { + uint64_t clock; /* stolen time counter in units of vtsc */ + struct { + /* only for little-endian */ + uint32_t clock_low; + uint32_t clock_high; + }; + }; + uint64_t reserved[7]; +}; + static unsigned long vmware_tsc_khz __ro_after_init; static u8 vmware_hypercall_mode __ro_after_init; @@ -104,6 +123,8 @@ static unsigned long vmware_get_tsc_khz(void) #ifdef CONFIG_PARAVIRT static struct cyc2ns_data vmware_cyc2ns __ro_after_init; static int vmw_sched_clock __initdata = 1; +static DEFINE_PER_CPU_DECRYPTED(struct vmware_steal_time, vmw_steal_time) __aligned(64); +static bool has_steal_clock; static __init int setup_vmw_sched_clock(char *s) { @@ -135,6 +156,163 @@ static void __init vmware_cyc2ns_setup(void) pr_info("using clock offset of %llu ns\n", d->cyc2ns_offset); } +static int vmware_cmd_stealclock(uint32_t arg1, uint32_t arg2) +{ + uint32_t result, info; + + asm volatile (VMWARE_HYPERCALL : + "=a"(result), + "=c"(info) : + "a"(VMWARE_HYPERVISOR_MAGIC), + "b"(0), + "c"(VMWARE_CMD_STEALCLOCK), + "d"(0), + "S"(arg1), + "D"(arg2) : + "memory"); + return result; +} + +static bool stealclock_enable(phys_addr_t pa) +{ + return vmware_cmd_stealclock(upper_32_bits(pa), + lower_32_bits(pa)) == STEALCLOCK_ENABLED; +} + +static int __stealclock_disable(void) +{ + return vmware_cmd_stealclock(0, 1); +} + +static void stealclock_disable(void) +{ + __stealclock_disable(); +} + +static bool vmware_is_stealclock_available(void) +{ + return __stealclock_disable() != STEALCLOCK_NOT_AVAILABLE; +} + +/** + * vmware_steal_clock() - read the per-cpu steal clock + * @cpu: the cpu number whose steal clock we want to read + * + * The function reads the steal clock if we are on a 64-bit system, otherwise + * reads it in parts, checking that the high part didn't change in the + * meantime. + * + * Return: + * The steal clock reading in ns. + */ +static uint64_t vmware_steal_clock(int cpu) +{ + struct vmware_steal_time *steal = &per_cpu(vmw_steal_time, cpu); + uint64_t clock; + + if (IS_ENABLED(CONFIG_64BIT)) + clock = READ_ONCE(steal->clock); + else { + uint32_t initial_high, low, high; + + do { + initial_high = READ_ONCE(steal->clock_high); + /* Do not reorder initial_high and high readings */ + virt_rmb(); + low = READ_ONCE(steal->clock_low); + /* Keep low reading in between */ + virt_rmb(); + high = READ_ONCE(steal->clock_high); + } while (initial_high != high); + + clock = ((uint64_t)high << 32) | low; + } + + return mul_u64_u32_shr(clock, vmware_cyc2ns.cyc2ns_mul, + vmware_cyc2ns.cyc2ns_shift); +} + +static void vmware_register_steal_time(void) +{ + int cpu = smp_processor_id(); + struct vmware_steal_time *st = &per_cpu(vmw_steal_time, cpu); + + if (!has_steal_clock) + return; + + if (!stealclock_enable(slow_virt_to_phys(st))) { + has_steal_clock = false; + return; + } + + pr_info("vmware-stealtime: cpu %d, pa %llx\n", + cpu, (unsigned long long) slow_virt_to_phys(st)); +} + +static void vmware_disable_steal_time(void) +{ + if (!has_steal_clock) + return; + + stealclock_disable(); +} + +static void vmware_guest_cpu_init(void) +{ + if (has_steal_clock) + vmware_register_steal_time(); +} + +static void vmware_pv_guest_cpu_reboot(void *unused) +{ + vmware_disable_steal_time(); +} + +static int vmware_pv_reboot_notify(struct notifier_block *nb, + unsigned long code, void *unused) +{ + if (code == SYS_RESTART) + on_each_cpu(vmware_pv_guest_cpu_reboot, NULL, 1); + return NOTIFY_DONE; +} + +static struct notifier_block vmware_pv_reboot_nb = { + .notifier_call = vmware_pv_reboot_notify, +}; + +#ifdef CONFIG_SMP +static void __init vmware_smp_prepare_boot_cpu(void) +{ + vmware_guest_cpu_init(); + native_smp_prepare_boot_cpu(); +} + +static int vmware_cpu_online(unsigned int cpu) +{ + local_irq_disable(); + vmware_guest_cpu_init(); + local_irq_enable(); + return 0; +} + +static int vmware_cpu_down_prepare(unsigned int cpu) +{ + local_irq_disable(); + vmware_disable_steal_time(); + local_irq_enable(); + return 0; +} +#endif + +static __init int activate_jump_labels(void) +{ + if (has_steal_clock) + static_key_slow_inc(¶virt_steal_enabled); + + return 0; +} +arch_initcall(activate_jump_labels); + static void __init vmware_paravirt_ops_setup(void) { pv_info.name = "VMware hypervisor"; @@ -148,6 +326,25 @@ static void __init vmware_paravirt_ops_setup(void) if (vmw_sched_clock) pv_ops.time.sched_clock = vmware_sched_clock; + if (vmware_is_stealclock_available()) { + has_steal_clock = true; + pv_ops.time.steal_clock = vmware_steal_clock; + + /* We use reboot notifier only to disable steal clock */ + register_reboot_notifier(&vmware_pv_reboot_nb); + +#ifdef CONFIG_SMP + smp_ops.smp_prepare_boot_cpu = + vmware_smp_prepare_boot_cpu; + if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, + "x86/vmware:online", + vmware_cpu_online, + vmware_cpu_down_prepare) < 0) + pr_err("vmware_guest: Failed to install cpu hotplug callbacks\n"); +#else + vmware_guest_cpu_init(); +#endif + } } #else #define vmware_paravirt_ops_setup() do {} while (0) From e73a8f38f82dd1c41b70a06556bea7dc250cc384 Mon Sep 17 00:00:00 2001 From: Alexey Makhalov Date: Mon, 23 Mar 2020 19:57:06 +0000 Subject: [PATCH 4/5] x86/vmware: Enable steal time accounting Set paravirt_steal_rq_enabled if steal clock present. paravirt_steal_rq_enabled is used in sched/core.c to adjust task progress by offsetting stolen time. Use 'no-steal-acc' off switch (share same name with KVM) to disable steal time accounting. Signed-off-by: Alexey Makhalov Signed-off-by: Borislav Petkov Reviewed-by: Thomas Hellstrom Reviewed-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200323195707.31242-5-amakhalov@vmware.com --- Documentation/admin-guide/kernel-parameters.txt | 2 +- arch/x86/kernel/cpu/vmware.c | 13 ++++++++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index c07815d230bcd..863b37d6bd2e5 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3174,7 +3174,7 @@ [X86,PV_OPS] Disable paravirtualized VMware scheduler clock and use the default one. - no-steal-acc [X86,KVM,ARM64] Disable paravirtualized steal time + no-steal-acc [X86,PV_OPS,ARM64] Disable paravirtualized steal time accounting. steal time is computed, but won't influence scheduler behaviour diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index cc604614f0aea..e885f73bebd4b 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -125,6 +125,7 @@ static struct cyc2ns_data vmware_cyc2ns __ro_after_init; static int vmw_sched_clock __initdata = 1; static DEFINE_PER_CPU_DECRYPTED(struct vmware_steal_time, vmw_steal_time) __aligned(64); static bool has_steal_clock; +static bool steal_acc __initdata = true; /* steal time accounting */ static __init int setup_vmw_sched_clock(char *s) { @@ -133,6 +134,13 @@ static __init int setup_vmw_sched_clock(char *s) } early_param("no-vmw-sched-clock", setup_vmw_sched_clock); +static __init int parse_no_stealacc(char *arg) +{ + steal_acc = false; + return 0; +} +early_param("no-steal-acc", parse_no_stealacc); + static unsigned long long notrace vmware_sched_clock(void) { unsigned long long ns; @@ -306,8 +314,11 @@ static int vmware_cpu_down_prepare(unsigned int cpu) static __init int activate_jump_labels(void) { - if (has_steal_clock) + if (has_steal_clock) { static_key_slow_inc(¶virt_steal_enabled); + if (steal_acc) + static_key_slow_inc(¶virt_steal_rq_enabled); + } return 0; } From 8fefe9dacdb0a1347d3dac871bb1bba3cbc32945 Mon Sep 17 00:00:00 2001 From: Alexey Makhalov Date: Mon, 23 Mar 2020 19:57:07 +0000 Subject: [PATCH 5/5] x86/vmware: Use bool type for vmw_sched_clock To be aligned with other bool variables. Signed-off-by: Alexey Makhalov Signed-off-by: Borislav Petkov Reviewed-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200323195707.31242-6-amakhalov@vmware.com --- arch/x86/kernel/cpu/vmware.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index e885f73bebd4b..9b6fafa69be92 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -122,14 +122,14 @@ static unsigned long vmware_get_tsc_khz(void) #ifdef CONFIG_PARAVIRT static struct cyc2ns_data vmware_cyc2ns __ro_after_init; -static int vmw_sched_clock __initdata = 1; +static bool vmw_sched_clock __initdata = true; static DEFINE_PER_CPU_DECRYPTED(struct vmware_steal_time, vmw_steal_time) __aligned(64); static bool has_steal_clock; static bool steal_acc __initdata = true; /* steal time accounting */ static __init int setup_vmw_sched_clock(char *s) { - vmw_sched_clock = 0; + vmw_sched_clock = false; return 0; } early_param("no-vmw-sched-clock", setup_vmw_sched_clock);