From dce382b6c72717b610741340f05e959764b851b8 Mon Sep 17 00:00:00 2001 From: Alexander Yarygin Date: Fri, 19 Feb 2016 12:21:26 +0300 Subject: [PATCH 1/9] KVM: s390: Add diag "watchdog functions" to trace event decoding DIAG 0x288 may occur now. Let's add its code to the diag table in sie.h. Signed-off-by: Alexander Yarygin Signed-off-by: Christian Borntraeger --- arch/s390/include/uapi/asm/sie.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/include/uapi/asm/sie.h b/arch/s390/include/uapi/asm/sie.h index ee69c0854c889..5dbaa72baa640 100644 --- a/arch/s390/include/uapi/asm/sie.h +++ b/arch/s390/include/uapi/asm/sie.h @@ -7,6 +7,7 @@ { 0x9c, "DIAG (0x9c) time slice end directed" }, \ { 0x204, "DIAG (0x204) logical-cpu utilization" }, \ { 0x258, "DIAG (0x258) page-reference services" }, \ + { 0x288, "DIAG (0x288) watchdog functions" }, \ { 0x308, "DIAG (0x308) ipl functions" }, \ { 0x500, "DIAG (0x500) KVM virtio functions" }, \ { 0x501, "DIAG (0x501) KVM breakpoint" } From 01a745ac8b6c5d323a37194c242f7c77f3402469 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 12 Feb 2016 20:41:56 +0100 Subject: [PATCH 2/9] KVM: s390: store cpu id in vcpu->cpu when scheduled in By storing the cpu id, we have a way to verify if the current cpu is owning a VCPU. Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/kvm/kvm-s390.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 28bd5ea1b08f2..bd5edb138479e 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1449,10 +1449,12 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) restore_access_regs(vcpu->run->s.regs.acrs); gmap_enable(vcpu->arch.gmap); atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); + vcpu->cpu = cpu; } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { + vcpu->cpu = -1; atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); gmap_disable(vcpu->arch.gmap); From 4287f247f6cfaea0ed73b5104e94cd737e1ac0ae Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 15 Feb 2016 09:40:12 +0100 Subject: [PATCH 3/9] KVM: s390: abstract access to the VCPU cpu timer We want to manually step the cpu timer in certain scenarios in the future. Let's abstract any access to the cpu timer, so we can hide the complexity internally. Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/kvm/interrupt.c | 5 +++-- arch/s390/kvm/kvm-s390.c | 31 +++++++++++++++++++++++-------- arch/s390/kvm/kvm-s390.h | 2 ++ 3 files changed, 28 insertions(+), 10 deletions(-) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 87e2d1a89d74e..4604e9accc650 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -182,8 +182,9 @@ static int cpu_timer_interrupts_enabled(struct kvm_vcpu *vcpu) static int cpu_timer_irq_pending(struct kvm_vcpu *vcpu) { - return (vcpu->arch.sie_block->cputm >> 63) && - cpu_timer_interrupts_enabled(vcpu); + if (!cpu_timer_interrupts_enabled(vcpu)) + return 0; + return kvm_s390_get_cpu_timer(vcpu) >> 63; } static inline int is_ioirq(unsigned long irq_type) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index bd5edb138479e..2118a2250ac7a 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1429,6 +1429,18 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) return 0; } +/* set the cpu timer - may only be called from the VCPU thread itself */ +void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm) +{ + vcpu->arch.sie_block->cputm = cputm; +} + +/* get the cpu timer - can also be called from other VCPU threads */ +__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.sie_block->cputm; +} + void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { /* Save host register state */ @@ -1476,7 +1488,7 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->gpsw.mask = 0UL; vcpu->arch.sie_block->gpsw.addr = 0UL; kvm_s390_set_prefix(vcpu, 0); - vcpu->arch.sie_block->cputm = 0UL; + kvm_s390_set_cpu_timer(vcpu, 0); vcpu->arch.sie_block->ckc = 0UL; vcpu->arch.sie_block->todpr = 0; memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64)); @@ -1723,7 +1735,7 @@ static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, (u64 __user *)reg->addr); break; case KVM_REG_S390_CPU_TIMER: - r = put_user(vcpu->arch.sie_block->cputm, + r = put_user(kvm_s390_get_cpu_timer(vcpu), (u64 __user *)reg->addr); break; case KVM_REG_S390_CLOCK_COMP: @@ -1761,6 +1773,7 @@ static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) { int r = -EINVAL; + __u64 val; switch (reg->id) { case KVM_REG_S390_TODPR: @@ -1772,8 +1785,9 @@ static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, (u64 __user *)reg->addr); break; case KVM_REG_S390_CPU_TIMER: - r = get_user(vcpu->arch.sie_block->cputm, - (u64 __user *)reg->addr); + r = get_user(val, (u64 __user *)reg->addr); + if (!r) + kvm_s390_set_cpu_timer(vcpu, val); break; case KVM_REG_S390_CLOCK_COMP: r = get_user(vcpu->arch.sie_block->ckc, @@ -2290,7 +2304,7 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); } if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) { - vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm; + kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm); vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc; vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr; vcpu->arch.sie_block->pp = kvm_run->s.regs.pp; @@ -2312,7 +2326,7 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr; kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu); memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128); - kvm_run->s.regs.cputm = vcpu->arch.sie_block->cputm; + kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu); kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc; kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr; kvm_run->s.regs.pp = vcpu->arch.sie_block->pp; @@ -2383,7 +2397,7 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa) unsigned char archmode = 1; freg_t fprs[NUM_FPRS]; unsigned int px; - u64 clkcomp; + u64 clkcomp, cputm; int rc; px = kvm_s390_get_prefix(vcpu); @@ -2417,8 +2431,9 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa) &vcpu->run->s.regs.fpc, 4); rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA, &vcpu->arch.sie_block->todpr, 4); + cputm = kvm_s390_get_cpu_timer(vcpu); rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA, - &vcpu->arch.sie_block->cputm, 8); + &cputm, 8); clkcomp = vcpu->arch.sie_block->ckc >> 8; rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA, &clkcomp, 8); diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 1c756c7dd0c2c..9787299d9a290 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -263,6 +263,8 @@ int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu); void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu); unsigned long kvm_s390_fac_list_mask_size(void); extern unsigned long kvm_s390_fac_list_mask[]; +void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm); +__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu); /* implemented in diag.c */ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu); From db0758b29709815d93a963e31e2ec87ecf74f8bd Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 15 Feb 2016 09:42:25 +0100 Subject: [PATCH 4/9] KVM: s390: step VCPU cpu timer during kvm_run ioctl Architecturally we should only provide steal time if we are scheduled away, and not if the host interprets a guest exit. We have to step the guest CPU timer in these cases. In the first shot, we will step the VCPU timer only during the kvm_run ioctl. Therefore all time spent e.g. in interception handlers or on irq delivery will be accounted for that VCPU. We have to take care of a few special cases: - Other VCPUs can test for pending irqs. We can only report a consistent value for the VCPU thread itself when adding the delta. - We have to take care of STP sync, therefore we have to extend kvm_clock_sync() and disable preemption accordingly - During any call to disable/enable/start/stop we could get premeempted and therefore get start/stop calls. Therefore we have to make sure we don't get into an inconsistent state. Whenever a VCPU is scheduled out, sleeping, in user space or just about to enter the SIE, the guest cpu timer isn't stepped. Please note that all primitives are prepared to be called from both environments (cpu timer accounting enabled or not), although not completely used in this patch yet (e.g. kvm_s390_set_cpu_timer() will never be called while cpu timer accounting is enabled). Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/include/asm/kvm_host.h | 2 + arch/s390/kvm/kvm-s390.c | 76 +++++++++++++++++++++++++++++++- 2 files changed, 76 insertions(+), 2 deletions(-) diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 727e7f7b33fdd..91796dd2a8ece 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -552,6 +552,8 @@ struct kvm_vcpu_arch { unsigned long pfault_token; unsigned long pfault_select; unsigned long pfault_compare; + bool cputm_enabled; + __u64 cputm_start; }; struct kvm_vm_stat { diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 2118a2250ac7a..76b99149dc652 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -158,6 +158,8 @@ static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val, kvm->arch.epoch -= *delta; kvm_for_each_vcpu(i, vcpu, kvm) { vcpu->arch.sie_block->epoch -= *delta; + if (vcpu->arch.cputm_enabled) + vcpu->arch.cputm_start += *delta; } } return NOTIFY_OK; @@ -1429,16 +1431,78 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) return 0; } +/* needs disabled preemption to protect from TOD sync and vcpu_load/put */ +static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu) +{ + WARN_ON_ONCE(vcpu->arch.cputm_start != 0); + vcpu->arch.cputm_start = get_tod_clock_fast(); +} + +/* needs disabled preemption to protect from TOD sync and vcpu_load/put */ +static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu) +{ + WARN_ON_ONCE(vcpu->arch.cputm_start == 0); + vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start; + vcpu->arch.cputm_start = 0; +} + +/* needs disabled preemption to protect from TOD sync and vcpu_load/put */ +static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu) +{ + WARN_ON_ONCE(vcpu->arch.cputm_enabled); + vcpu->arch.cputm_enabled = true; + __start_cpu_timer_accounting(vcpu); +} + +/* needs disabled preemption to protect from TOD sync and vcpu_load/put */ +static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu) +{ + WARN_ON_ONCE(!vcpu->arch.cputm_enabled); + __stop_cpu_timer_accounting(vcpu); + vcpu->arch.cputm_enabled = false; +} + +static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu) +{ + preempt_disable(); /* protect from TOD sync and vcpu_load/put */ + __enable_cpu_timer_accounting(vcpu); + preempt_enable(); +} + +static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu) +{ + preempt_disable(); /* protect from TOD sync and vcpu_load/put */ + __disable_cpu_timer_accounting(vcpu); + preempt_enable(); +} + /* set the cpu timer - may only be called from the VCPU thread itself */ void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm) { + preempt_disable(); /* protect from TOD sync and vcpu_load/put */ + if (vcpu->arch.cputm_enabled) + vcpu->arch.cputm_start = get_tod_clock_fast(); vcpu->arch.sie_block->cputm = cputm; + preempt_enable(); } -/* get the cpu timer - can also be called from other VCPU threads */ +/* update and get the cpu timer - can also be called from other VCPU threads */ __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu) { - return vcpu->arch.sie_block->cputm; + __u64 value; + int me; + + if (unlikely(!vcpu->arch.cputm_enabled)) + return vcpu->arch.sie_block->cputm; + + me = get_cpu(); /* also protects from TOD sync and vcpu_load/put */ + value = vcpu->arch.sie_block->cputm; + if (likely(me == vcpu->cpu)) { + /* the VCPU itself will always read consistent values */ + value -= get_tod_clock_fast() - vcpu->arch.cputm_start; + } + put_cpu(); + return value; } void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) @@ -1461,12 +1525,16 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) restore_access_regs(vcpu->run->s.regs.acrs); gmap_enable(vcpu->arch.gmap); atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); + if (vcpu->arch.cputm_enabled) + __start_cpu_timer_accounting(vcpu); vcpu->cpu = cpu; } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { vcpu->cpu = -1; + if (vcpu->arch.cputm_enabled) + __stop_cpu_timer_accounting(vcpu); atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); gmap_disable(vcpu->arch.gmap); @@ -2277,10 +2345,12 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) */ local_irq_disable(); __kvm_guest_enter(); + __disable_cpu_timer_accounting(vcpu); local_irq_enable(); exit_reason = sie64a(vcpu->arch.sie_block, vcpu->run->s.regs.gprs); local_irq_disable(); + __enable_cpu_timer_accounting(vcpu); __kvm_guest_exit(); local_irq_enable(); vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); @@ -2358,6 +2428,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) } sync_regs(vcpu, kvm_run); + enable_cpu_timer_accounting(vcpu); might_fault(); rc = __vcpu_run(vcpu); @@ -2377,6 +2448,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) rc = 0; } + disable_cpu_timer_accounting(vcpu); store_regs(vcpu, kvm_run); if (vcpu->sigset_active) From 9c23a1318eb12fcf76d9f663d2c3d88598e62a55 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 17 Feb 2016 21:53:33 +0100 Subject: [PATCH 5/9] KVM: s390: protect VCPU cpu timer with a seqcount For now, only the owning VCPU thread (that has loaded the VCPU) can get a consistent cpu timer value when calculating the delta. However, other threads might also be interested in a more recent, consistent value. Of special interest will be the timer callback of a VCPU that executes without having the VCPU loaded and could run in parallel with the VCPU thread. The cpu timer has a nice property: it is only updated by the owning VCPU thread. And speaking about accounting, a consistent value can only be calculated by looking at cputm_start and the cpu timer itself in one shot, otherwise the result might be wrong. As we only have one writing thread at a time (owning VCPU thread), we can use a seqcount instead of a seqlock and retry if the VCPU refreshed its cpu timer. This avoids any heavy locking and only introduces a counter update/check plus a handful of smp_wmb(). The owning VCPU thread should never have to retry on reads, and also for other threads this might be a very rare scenario. Please note that we have to use the raw_* variants for locking the seqcount as lockdep will produce false warnings otherwise. The rq->lock held during vcpu_load/put is also acquired from hardirq context. Lockdep cannot know that we avoid potential deadlocks by disabling preemption and thereby disable concurrent write locking attempts (via vcpu_put/load). Reviewed-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/include/asm/kvm_host.h | 8 ++++++++ arch/s390/kvm/kvm-s390.c | 30 ++++++++++++++++++++++-------- 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 91796dd2a8ece..d61e64555938f 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -553,6 +554,13 @@ struct kvm_vcpu_arch { unsigned long pfault_select; unsigned long pfault_compare; bool cputm_enabled; + /* + * The seqcount protects updates to cputm_start and sie_block.cputm, + * this way we can have non-blocking reads with consistent values. + * Only the owning VCPU thread (vcpu->cpu) is allowed to change these + * values and to start/stop/enable/disable cpu timer accounting. + */ + seqcount_t cputm_seqcount; __u64 cputm_start; }; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 76b99149dc652..38223c4603c79 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1435,15 +1435,19 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu) { WARN_ON_ONCE(vcpu->arch.cputm_start != 0); + raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); vcpu->arch.cputm_start = get_tod_clock_fast(); + raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); } /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu) { WARN_ON_ONCE(vcpu->arch.cputm_start == 0); + raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start; vcpu->arch.cputm_start = 0; + raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); } /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ @@ -1480,28 +1484,37 @@ static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu) void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm) { preempt_disable(); /* protect from TOD sync and vcpu_load/put */ + raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); if (vcpu->arch.cputm_enabled) vcpu->arch.cputm_start = get_tod_clock_fast(); vcpu->arch.sie_block->cputm = cputm; + raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); preempt_enable(); } /* update and get the cpu timer - can also be called from other VCPU threads */ __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu) { + unsigned int seq; __u64 value; - int me; if (unlikely(!vcpu->arch.cputm_enabled)) return vcpu->arch.sie_block->cputm; - me = get_cpu(); /* also protects from TOD sync and vcpu_load/put */ - value = vcpu->arch.sie_block->cputm; - if (likely(me == vcpu->cpu)) { - /* the VCPU itself will always read consistent values */ - value -= get_tod_clock_fast() - vcpu->arch.cputm_start; - } - put_cpu(); + preempt_disable(); /* protect from TOD sync and vcpu_load/put */ + do { + seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount); + /* + * If the writer would ever execute a read in the critical + * section, e.g. in irq context, we have a deadlock. + */ + WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu); + value = vcpu->arch.sie_block->cputm; + /* if cputm_start is 0, accounting is being started/stopped */ + if (likely(vcpu->arch.cputm_start)) + value -= get_tod_clock_fast() - vcpu->arch.cputm_start; + } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1)); + preempt_enable(); return value; } @@ -1704,6 +1717,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, vcpu->arch.local_int.float_int = &kvm->arch.float_int; vcpu->arch.local_int.wq = &vcpu->wq; vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags; + seqcount_init(&vcpu->arch.cputm_seqcount); rc = kvm_vcpu_init(vcpu, kvm, id); if (rc) From 5ebda31686af6bb70affdcc5777ebc7ed81c0eac Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 22 Feb 2016 13:52:27 +0100 Subject: [PATCH 6/9] KVM: s390: step the VCPU timer while in enabled wait The cpu timer is a mean to measure task execution time. We want to account everything for a VCPU for which it is responsible. Therefore, if the VCPU wants to sleep, it shall be accounted for it. We can easily get this done by not disabling cpu timer accounting when scheduled out while sleeping because of enabled wait. Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/kvm/kvm-s390.c | 4 ++-- arch/s390/kvm/kvm-s390.h | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 38223c4603c79..b54daed49c2c6 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1538,7 +1538,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) restore_access_regs(vcpu->run->s.regs.acrs); gmap_enable(vcpu->arch.gmap); atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); - if (vcpu->arch.cputm_enabled) + if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) __start_cpu_timer_accounting(vcpu); vcpu->cpu = cpu; } @@ -1546,7 +1546,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { vcpu->cpu = -1; - if (vcpu->arch.cputm_enabled) + if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) __stop_cpu_timer_accounting(vcpu); atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); gmap_disable(vcpu->arch.gmap); diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 9787299d9a290..b1f7ee3bd72df 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -54,6 +54,11 @@ static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu) return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOPPED; } +static inline int is_vcpu_idle(struct kvm_vcpu *vcpu) +{ + return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_WAIT; +} + static inline int kvm_is_ucontrol(struct kvm *kvm) { #ifdef CONFIG_KVM_S390_UCONTROL From b3c17f10fa2cfc29cf35e4821275e046e725213e Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 22 Feb 2016 14:14:50 +0100 Subject: [PATCH 7/9] KVM: s390: wake up when the VCPU cpu timer expires When the VCPU cpu timer expires, we have to wake up just like when the ckc triggers. For now, setting up a cpu timer in the guest and going into enabled wait will never lead to a wakeup. This patch fixes this problem. Just as for the ckc, we have to take care of waking up too early. We have to recalculate the sleep time and go back to sleep. Please note that the timer callback calls kvm_s390_get_cpu_timer() from interrupt context. As the timer is canceled when leaving handle_wait(), and we don't do any VCPU cpu timer writes/updates in that function, we can be sure that we will never try to read the VCPU cpu timer from the same cpu that is currentyl updating the timer (deadlock). Reported-by: Sascha Silbe Tested-by: Sascha Silbe Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/kvm/interrupt.c | 48 ++++++++++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 13 deletions(-) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 4604e9accc650..ef84a803433ee 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -909,9 +909,35 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) return ckc_irq_pending(vcpu) || cpu_timer_irq_pending(vcpu); } +static u64 __calculate_sltime(struct kvm_vcpu *vcpu) +{ + u64 now, cputm, sltime = 0; + + if (ckc_interrupts_enabled(vcpu)) { + now = kvm_s390_get_tod_clock_fast(vcpu->kvm); + sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now); + /* already expired or overflow? */ + if (!sltime || vcpu->arch.sie_block->ckc <= now) + return 0; + if (cpu_timer_interrupts_enabled(vcpu)) { + cputm = kvm_s390_get_cpu_timer(vcpu); + /* already expired? */ + if (cputm >> 63) + return 0; + return min(sltime, tod_to_ns(cputm)); + } + } else if (cpu_timer_interrupts_enabled(vcpu)) { + sltime = kvm_s390_get_cpu_timer(vcpu); + /* already expired? */ + if (sltime >> 63) + return 0; + } + return sltime; +} + int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) { - u64 now, sltime; + u64 sltime; vcpu->stat.exit_wait_state++; @@ -924,22 +950,20 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) return -EOPNOTSUPP; /* disabled wait */ } - if (!ckc_interrupts_enabled(vcpu)) { + if (!ckc_interrupts_enabled(vcpu) && + !cpu_timer_interrupts_enabled(vcpu)) { VCPU_EVENT(vcpu, 3, "%s", "enabled wait w/o timer"); __set_cpu_idle(vcpu); goto no_timer; } - now = kvm_s390_get_tod_clock_fast(vcpu->kvm); - sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now); - - /* underflow */ - if (vcpu->arch.sie_block->ckc < now) + sltime = __calculate_sltime(vcpu); + if (!sltime) return 0; __set_cpu_idle(vcpu); hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL); - VCPU_EVENT(vcpu, 4, "enabled wait via clock comparator: %llu ns", sltime); + VCPU_EVENT(vcpu, 4, "enabled wait: %llu ns", sltime); no_timer: srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); kvm_vcpu_block(vcpu); @@ -966,18 +990,16 @@ void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu) enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer) { struct kvm_vcpu *vcpu; - u64 now, sltime; + u64 sltime; vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer); - now = kvm_s390_get_tod_clock_fast(vcpu->kvm); - sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now); + sltime = __calculate_sltime(vcpu); /* * If the monotonic clock runs faster than the tod clock we might be * woken up too early and have to go back to sleep to avoid deadlocks. */ - if (vcpu->arch.sie_block->ckc > now && - hrtimer_forward_now(timer, ns_to_ktime(sltime))) + if (sltime && hrtimer_forward_now(timer, ns_to_ktime(sltime))) return HRTIMER_RESTART; kvm_s390_vcpu_wakeup(vcpu); return HRTIMER_NORESTART; From 80bc79dc0b18b17510ceb1e2d2d1999104af03c9 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 2 Dec 2015 09:43:29 +0100 Subject: [PATCH 8/9] KVM: s390: enable STFLE interpretation only if enabled for the guest Not setting the facility list designation disables STFLE interpretation, this is what we want if the guest was told to not have it. Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/kvm/kvm-s390.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index b54daed49c2c6..b6a065403bdc8 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1639,7 +1639,8 @@ static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu) vcpu->arch.cpu_id = model->cpu_id; vcpu->arch.sie_block->ibc = model->ibc; - vcpu->arch.sie_block->fac = (int) (long) model->fac->list; + if (test_kvm_facility(vcpu->kvm, 7)) + vcpu->arch.sie_block->fac = (int) (long) model->fac->list; } int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) From c54f0d6ae057444453f5167e66ed999e8cf26936 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 2 Dec 2015 08:53:52 +0100 Subject: [PATCH 9/9] KVM: s390: allocate only one DMA page per VM We can fit the 2k for the STFLE interpretation and the crypto control block into one DMA page. As we now only have to allocate one DMA page, we can clean up the code a bit. As a nice side effect, this also fixes a problem with crycbd alignment in case special allocation debug options are enabled, debugged by Sascha Silbe. Acked-by: Christian Borntraeger Reviewed-by: Dominik Dingel Acked-by: Cornelia Huck Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/include/asm/kvm_host.h | 23 +++++++----- arch/s390/kvm/kvm-s390.c | 60 ++++++++++++-------------------- arch/s390/kvm/kvm-s390.h | 4 +-- arch/s390/kvm/priv.c | 2 +- 4 files changed, 41 insertions(+), 48 deletions(-) diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index d61e64555938f..3c254952d3a7c 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -600,15 +600,11 @@ struct s390_io_adapter { #define S390_ARCH_FAC_MASK_SIZE_U64 \ (S390_ARCH_FAC_MASK_SIZE_BYTE / sizeof(u64)) -struct kvm_s390_fac { - /* facility list requested by guest */ - __u64 list[S390_ARCH_FAC_LIST_SIZE_U64]; - /* facility mask supported by kvm & hosting machine */ - __u64 mask[S390_ARCH_FAC_LIST_SIZE_U64]; -}; - struct kvm_s390_cpu_model { - struct kvm_s390_fac *fac; + /* facility mask supported by kvm & hosting machine */ + __u64 fac_mask[S390_ARCH_FAC_LIST_SIZE_U64]; + /* facility list requested by guest (in dma page) */ + __u64 *fac_list; struct cpuid cpu_id; unsigned short ibc; }; @@ -627,6 +623,16 @@ struct kvm_s390_crypto_cb { __u8 reserved80[128]; /* 0x0080 */ }; +/* + * sie_page2 has to be allocated as DMA because fac_list and crycb need + * 31bit addresses in the sie control block. + */ +struct sie_page2 { + __u64 fac_list[S390_ARCH_FAC_LIST_SIZE_U64]; /* 0x0000 */ + struct kvm_s390_crypto_cb crycb; /* 0x0800 */ + u8 reserved900[0x1000 - 0x900]; /* 0x0900 */ +} __packed; + struct kvm_arch{ void *sca; int use_esca; @@ -647,6 +653,7 @@ struct kvm_arch{ int ipte_lock_count; struct mutex ipte_mutex; spinlock_t start_stop_lock; + struct sie_page2 *sie_page2; struct kvm_s390_cpu_model model; struct kvm_s390_crypto crypto; u64 epoch; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index b6a065403bdc8..c186d55b87ac3 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -355,8 +355,8 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) if (atomic_read(&kvm->online_vcpus)) { r = -EBUSY; } else if (MACHINE_HAS_VX) { - set_kvm_facility(kvm->arch.model.fac->mask, 129); - set_kvm_facility(kvm->arch.model.fac->list, 129); + set_kvm_facility(kvm->arch.model.fac_mask, 129); + set_kvm_facility(kvm->arch.model.fac_list, 129); r = 0; } else r = -EINVAL; @@ -370,8 +370,8 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) if (atomic_read(&kvm->online_vcpus)) { r = -EBUSY; } else if (test_facility(64)) { - set_kvm_facility(kvm->arch.model.fac->mask, 64); - set_kvm_facility(kvm->arch.model.fac->list, 64); + set_kvm_facility(kvm->arch.model.fac_mask, 64); + set_kvm_facility(kvm->arch.model.fac_list, 64); r = 0; } mutex_unlock(&kvm->lock); @@ -654,7 +654,7 @@ static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr) memcpy(&kvm->arch.model.cpu_id, &proc->cpuid, sizeof(struct cpuid)); kvm->arch.model.ibc = proc->ibc; - memcpy(kvm->arch.model.fac->list, proc->fac_list, + memcpy(kvm->arch.model.fac_list, proc->fac_list, S390_ARCH_FAC_LIST_SIZE_BYTE); } else ret = -EFAULT; @@ -688,7 +688,8 @@ static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr) } memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid)); proc->ibc = kvm->arch.model.ibc; - memcpy(&proc->fac_list, kvm->arch.model.fac->list, S390_ARCH_FAC_LIST_SIZE_BYTE); + memcpy(&proc->fac_list, kvm->arch.model.fac_list, + S390_ARCH_FAC_LIST_SIZE_BYTE); if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc))) ret = -EFAULT; kfree(proc); @@ -708,7 +709,7 @@ static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr) } get_cpu_id((struct cpuid *) &mach->cpuid); mach->ibc = sclp.ibc; - memcpy(&mach->fac_mask, kvm->arch.model.fac->mask, + memcpy(&mach->fac_mask, kvm->arch.model.fac_mask, S390_ARCH_FAC_LIST_SIZE_BYTE); memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list, S390_ARCH_FAC_LIST_SIZE_BYTE); @@ -1085,16 +1086,12 @@ static void kvm_s390_get_cpu_id(struct cpuid *cpu_id) cpu_id->version = 0xff; } -static int kvm_s390_crypto_init(struct kvm *kvm) +static void kvm_s390_crypto_init(struct kvm *kvm) { if (!test_kvm_facility(kvm, 76)) - return 0; - - kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb), - GFP_KERNEL | GFP_DMA); - if (!kvm->arch.crypto.crycb) - return -ENOMEM; + return; + kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb; kvm_s390_set_crycb_format(kvm); /* Enable AES/DEA protected key functions by default */ @@ -1104,8 +1101,6 @@ static int kvm_s390_crypto_init(struct kvm *kvm) sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask, sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); - - return 0; } static void sca_dispose(struct kvm *kvm) @@ -1159,37 +1154,30 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (!kvm->arch.dbf) goto out_err; - /* - * The architectural maximum amount of facilities is 16 kbit. To store - * this amount, 2 kbyte of memory is required. Thus we need a full - * page to hold the guest facility list (arch.model.fac->list) and the - * facility mask (arch.model.fac->mask). Its address size has to be - * 31 bits and word aligned. - */ - kvm->arch.model.fac = - (struct kvm_s390_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA); - if (!kvm->arch.model.fac) + kvm->arch.sie_page2 = + (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA); + if (!kvm->arch.sie_page2) goto out_err; /* Populate the facility mask initially. */ - memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list, + memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list, S390_ARCH_FAC_LIST_SIZE_BYTE); for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) { if (i < kvm_s390_fac_list_mask_size()) - kvm->arch.model.fac->mask[i] &= kvm_s390_fac_list_mask[i]; + kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i]; else - kvm->arch.model.fac->mask[i] = 0UL; + kvm->arch.model.fac_mask[i] = 0UL; } /* Populate the facility list initially. */ - memcpy(kvm->arch.model.fac->list, kvm->arch.model.fac->mask, + kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list; + memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask, S390_ARCH_FAC_LIST_SIZE_BYTE); kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id); kvm->arch.model.ibc = sclp.ibc & 0x0fff; - if (kvm_s390_crypto_init(kvm) < 0) - goto out_err; + kvm_s390_crypto_init(kvm); spin_lock_init(&kvm->arch.float_int.lock); for (i = 0; i < FIRQ_LIST_COUNT; i++) @@ -1225,8 +1213,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) return 0; out_err: - kfree(kvm->arch.crypto.crycb); - free_page((unsigned long)kvm->arch.model.fac); + free_page((unsigned long)kvm->arch.sie_page2); debug_unregister(kvm->arch.dbf); sca_dispose(kvm); KVM_EVENT(3, "creation of vm failed: %d", rc); @@ -1272,10 +1259,9 @@ static void kvm_free_vcpus(struct kvm *kvm) void kvm_arch_destroy_vm(struct kvm *kvm) { kvm_free_vcpus(kvm); - free_page((unsigned long)kvm->arch.model.fac); sca_dispose(kvm); debug_unregister(kvm->arch.dbf); - kfree(kvm->arch.crypto.crycb); + free_page((unsigned long)kvm->arch.sie_page2); if (!kvm_is_ucontrol(kvm)) gmap_free(kvm->arch.gmap); kvm_s390_destroy_adapters(kvm); @@ -1640,7 +1626,7 @@ static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu) vcpu->arch.cpu_id = model->cpu_id; vcpu->arch.sie_block->ibc = model->ibc; if (test_kvm_facility(vcpu->kvm, 7)) - vcpu->arch.sie_block->fac = (int) (long) model->fac->list; + vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list; } int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index b1f7ee3bd72df..8621ab00ec8e1 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -160,8 +160,8 @@ static inline void kvm_s390_set_psw_cc(struct kvm_vcpu *vcpu, unsigned long cc) /* test availability of facility in a kvm instance */ static inline int test_kvm_facility(struct kvm *kvm, unsigned long nr) { - return __test_facility(nr, kvm->arch.model.fac->mask) && - __test_facility(nr, kvm->arch.model.fac->list); + return __test_facility(nr, kvm->arch.model.fac_mask) && + __test_facility(nr, kvm->arch.model.fac_list); } static inline int set_kvm_facility(u64 *fac_list, unsigned long nr) diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index add9909459864..f218ccf016c87 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -354,7 +354,7 @@ static int handle_stfl(struct kvm_vcpu *vcpu) * We need to shift the lower 32 facility bits (bit 0-31) from a u64 * into a u32 memory representation. They will remain bits 0-31. */ - fac = *vcpu->kvm->arch.model.fac->list >> 32; + fac = *vcpu->kvm->arch.model.fac_list >> 32; rc = write_guest_lc(vcpu, offsetof(struct lowcore, stfl_fac_list), &fac, sizeof(fac)); if (rc)