Skip to content

Commit

Permalink
x86: kvmclock: abolish PVCLOCK_COUNTS_FROM_ZERO
Browse files Browse the repository at this point in the history
Newer KVM won't be exposing PVCLOCK_COUNTS_FROM_ZERO anymore.
The purpose of that flags was to start counting system time from 0 when
the KVM clock has been initialized.
We can achieve the same by selecting one read as the initial point.

A simple subtraction will work unless the KVM clock count overflows
earlier (has smaller width) than scheduler's cycle count.  We should be
safe till x86_128.

Because PVCLOCK_COUNTS_FROM_ZERO was enabled only on new hypervisors,
setting sched clock as stable based on PVCLOCK_TSC_STABLE_BIT might
regress on older ones.

I presume we don't need to change kvm_clock_read instead of introducing
kvm_sched_clock_read.  A problem could arise in case sched_clock is
expected to return the same value as get_cycles, but we should have
merged those clocks in that case.

Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
Acked-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
  • Loading branch information
Radim Krčmář authored and Paolo Bonzini committed Oct 1, 2015
1 parent 1cea0ce commit 72c930d
Showing 1 changed file with 35 additions and 11 deletions.
46 changes: 35 additions & 11 deletions arch/x86/kernel/kvmclock.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
static int kvmclock = 1;
static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME;
static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK;
static cycle_t kvm_sched_clock_offset;

static int parse_no_kvmclock(char *arg)
{
Expand Down Expand Up @@ -92,6 +93,29 @@ static cycle_t kvm_clock_get_cycles(struct clocksource *cs)
return kvm_clock_read();
}

static cycle_t kvm_sched_clock_read(void)
{
return kvm_clock_read() - kvm_sched_clock_offset;
}

static inline void kvm_sched_clock_init(bool stable)
{
if (!stable) {
pv_time_ops.sched_clock = kvm_clock_read;
return;
}

kvm_sched_clock_offset = kvm_clock_read();
pv_time_ops.sched_clock = kvm_sched_clock_read;
set_sched_clock_stable();

printk(KERN_INFO "kvm-clock: using sched offset of %llu cycles\n",
kvm_sched_clock_offset);

BUILD_BUG_ON(sizeof(kvm_sched_clock_offset) >
sizeof(((struct pvclock_vcpu_time_info *)NULL)->system_time));
}

/*
* If we don't do that, there is the possibility that the guest
* will calibrate under heavy load - thus, getting a lower lpj -
Expand Down Expand Up @@ -248,7 +272,17 @@ void __init kvmclock_init(void)
memblock_free(mem, size);
return;
}
pv_time_ops.sched_clock = kvm_clock_read;

if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT))
pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT);

cpu = get_cpu();
vcpu_time = &hv_clock[cpu].pvti;
flags = pvclock_read_flags(vcpu_time);

kvm_sched_clock_init(flags & PVCLOCK_TSC_STABLE_BIT);
put_cpu();

x86_platform.calibrate_tsc = kvm_get_tsc_khz;
x86_platform.get_wallclock = kvm_get_wallclock;
x86_platform.set_wallclock = kvm_set_wallclock;
Expand All @@ -265,16 +299,6 @@ void __init kvmclock_init(void)
kvm_get_preset_lpj();
clocksource_register_hz(&kvm_clock, NSEC_PER_SEC);
pv_info.name = "KVM";

if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT))
pvclock_set_flags(~0);

cpu = get_cpu();
vcpu_time = &hv_clock[cpu].pvti;
flags = pvclock_read_flags(vcpu_time);
if (flags & PVCLOCK_COUNTS_FROM_ZERO)
set_sched_clock_stable();
put_cpu();
}

int __init kvm_setup_vsyscall_timeinfo(void)
Expand Down

0 comments on commit 72c930d

Please sign in to comment.