From ec6449a9c2296b1c04f6219f7473e0c2fedecfed Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Mon, 20 Nov 2017 12:10:15 +0100
Subject: [PATCH 01/28] KVM: arm/arm64: Don't enable/disable physical timer
 access on VHE

After the timer optimization rework we accidentally end up calling
physical timer enable/disable functions on VHE systems, which is neither
needed nor correct, since the CNTHCTL_EL2 register format is
different when HCR_EL2.E2H is set.

The CNTHCTL_EL2 is initialized when CPUs become online in
kvm_timer_init_vhe() and we don't have to call these functions on VHE
systems, which also allows us to inline the non-VHE functionality.

Reported-by: Jintack Lim <jintack@cs.columbia.edu>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/kvm/arm_arch_timer.h |  3 ---
 virt/kvm/arm/arch_timer.c    |  6 -----
 virt/kvm/arm/hyp/timer-sr.c  | 48 +++++++++++++++---------------------
 3 files changed, 20 insertions(+), 37 deletions(-)

diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
index 01ee473517e25..6e45608b23998 100644
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -93,7 +93,4 @@ void kvm_timer_init_vhe(void);
 #define vcpu_vtimer(v)	(&(v)->arch.timer_cpu.vtimer)
 #define vcpu_ptimer(v)	(&(v)->arch.timer_cpu.ptimer)
 
-void enable_el1_phys_timer_access(void);
-void disable_el1_phys_timer_access(void);
-
 #endif
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 4151250ce8da9..190c99ed1b736 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -479,9 +479,6 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
 
 	vtimer_restore_state(vcpu);
 
-	if (has_vhe())
-		disable_el1_phys_timer_access();
-
 	/* Set the background timer for the physical timer emulation. */
 	phys_timer_emulate(vcpu);
 }
@@ -510,9 +507,6 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
 	if (unlikely(!timer->enabled))
 		return;
 
-	if (has_vhe())
-		enable_el1_phys_timer_access();
-
 	vtimer_save_state(vcpu);
 
 	/*
diff --git a/virt/kvm/arm/hyp/timer-sr.c b/virt/kvm/arm/hyp/timer-sr.c
index f39861639f08f..f24404b3c8dff 100644
--- a/virt/kvm/arm/hyp/timer-sr.c
+++ b/virt/kvm/arm/hyp/timer-sr.c
@@ -27,42 +27,34 @@ void __hyp_text __kvm_timer_set_cntvoff(u32 cntvoff_low, u32 cntvoff_high)
 	write_sysreg(cntvoff, cntvoff_el2);
 }
 
-void __hyp_text enable_el1_phys_timer_access(void)
-{
-	u64 val;
-
-	/* Allow physical timer/counter access for the host */
-	val = read_sysreg(cnthctl_el2);
-	val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN;
-	write_sysreg(val, cnthctl_el2);
-}
-
-void __hyp_text disable_el1_phys_timer_access(void)
-{
-	u64 val;
-
-	/*
-	 * Disallow physical timer access for the guest
-	 * Physical counter access is allowed
-	 */
-	val = read_sysreg(cnthctl_el2);
-	val &= ~CNTHCTL_EL1PCEN;
-	val |= CNTHCTL_EL1PCTEN;
-	write_sysreg(val, cnthctl_el2);
-}
-
 void __hyp_text __timer_disable_traps(struct kvm_vcpu *vcpu)
 {
 	/*
 	 * We don't need to do this for VHE since the host kernel runs in EL2
 	 * with HCR_EL2.TGE ==1, which makes those bits have no impact.
 	 */
-	if (!has_vhe())
-		enable_el1_phys_timer_access();
+	if (!has_vhe()) {
+		u64 val;
+
+		/* Allow physical timer/counter access for the host */
+		val = read_sysreg(cnthctl_el2);
+		val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN;
+		write_sysreg(val, cnthctl_el2);
+	}
 }
 
 void __hyp_text __timer_enable_traps(struct kvm_vcpu *vcpu)
 {
-	if (!has_vhe())
-		disable_el1_phys_timer_access();
+	if (!has_vhe()) {
+		u64 val;
+
+		/*
+		 * Disallow physical timer access for the guest
+		 * Physical counter access is allowed
+		 */
+		val = read_sysreg(cnthctl_el2);
+		val &= ~CNTHCTL_EL1PCEN;
+		val |= CNTHCTL_EL1PCTEN;
+		write_sysreg(val, cnthctl_el2);
+	}
 }

From 285a90e36b138b707c4a9850f2500774b7191c99 Mon Sep 17 00:00:00 2001
From: Andre Przywara <andre.przywara@arm.com>
Date: Fri, 17 Nov 2017 17:58:21 +0000
Subject: [PATCH 02/28] KVM: arm/arm64: VGIC: extend !vgic_is_initialized guard

Commit f39d16cbabf9 ("KVM: arm/arm64: Guard kvm_vgic_map_is_active against
!vgic_initialized") introduced a check whether the VGIC has been
initialized before accessing the spinlock and the VGIC data structure.
However the vgic_get_irq() call in the variable declaration sneaked
through the net, so lets make sure that this also gets called only after
we actually allocated the arrays this function accesses.

Reviewed-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Andre Przywara <andre.przywara@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic/vgic.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
index b168a328a9e07..786cce7bd2ec6 100644
--- a/virt/kvm/arm/vgic/vgic.c
+++ b/virt/kvm/arm/vgic/vgic.c
@@ -823,13 +823,14 @@ void vgic_kick_vcpus(struct kvm *kvm)
 
 bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid)
 {
-	struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
+	struct vgic_irq *irq;
 	bool map_is_active;
 	unsigned long flags;
 
 	if (!vgic_initialized(vcpu->kvm))
 		return false;
 
+	irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
 	spin_lock_irqsave(&irq->irq_lock, flags);
 	map_is_active = irq->hw && irq->active;
 	spin_unlock_irqrestore(&irq->irq_lock, flags);

From 150009e2c70cc3c6e97f00e7595055765d32fb85 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 16 Nov 2017 17:58:15 +0000
Subject: [PATCH 03/28] KVM: arm/arm64: vgic-irqfd: Fix MSI entry allocation

Using the size of the structure we're allocating is a good idea
and avoids any surprise... In this case, we're happilly confusing
kvm_kernel_irq_routing_entry and kvm_irq_routing_entry...

Fixes: 95b110ab9a09 ("KVM: arm/arm64: Enable irqchip routing")
Cc: stable@vger.kernel.org # 4.8
Reported-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic/vgic-irqfd.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/virt/kvm/arm/vgic/vgic-irqfd.c b/virt/kvm/arm/vgic/vgic-irqfd.c
index b7baf581611ae..99e026d2dade9 100644
--- a/virt/kvm/arm/vgic/vgic-irqfd.c
+++ b/virt/kvm/arm/vgic/vgic-irqfd.c
@@ -112,8 +112,7 @@ int kvm_vgic_setup_default_irq_routing(struct kvm *kvm)
 	u32 nr = dist->nr_spis;
 	int i, ret;
 
-	entries = kcalloc(nr, sizeof(struct kvm_kernel_irq_routing_entry),
-			  GFP_KERNEL);
+	entries = kcalloc(nr, sizeof(*entries), GFP_KERNEL);
 	if (!entries)
 		return -ENOMEM;
 

From ddb4b0102cb9cdd2398d98b3e1e024e08a2f4239 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 16 Nov 2017 17:58:16 +0000
Subject: [PATCH 04/28] KVM: arm/arm64: vgic: Preserve the revious read from
 the pending table

The current pending table parsing code assumes that we keep the
previous read of the pending bits, but keep that variable in
the current block, making sure it is discarded on each loop.

We end-up using whatever is on the stack. Who knows, it might
just be the right thing...

Fixes: 280771252c1ba ("KVM: arm64: vgic-v3: KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES")
Cc: stable@vger.kernel.org # 4.12
Reported-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic/vgic-v3.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index 2f05f732d3fd4..f47e8481fa452 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -327,13 +327,13 @@ int vgic_v3_save_pending_tables(struct kvm *kvm)
 	int last_byte_offset = -1;
 	struct vgic_irq *irq;
 	int ret;
+	u8 val;
 
 	list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) {
 		int byte_offset, bit_nr;
 		struct kvm_vcpu *vcpu;
 		gpa_t pendbase, ptr;
 		bool stored;
-		u8 val;
 
 		vcpu = irq->target_vcpu;
 		if (!vcpu)

From 64afe6e9eb4841f35317da4393de21a047a883b3 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 16 Nov 2017 17:58:17 +0000
Subject: [PATCH 05/28] KVM: arm/arm64: vgic-its: Preserve the revious read
 from the pending table

The current pending table parsing code assumes that we keep the
previous read of the pending bits, but keep that variable in
the current block, making sure it is discarded on each loop.

We end-up using whatever is on the stack. Who knows, it might
just be the right thing...

Fixes: 33d3bc9556a7d ("KVM: arm64: vgic-its: Read initial LPI pending table")
Cc: stable@vger.kernel.org # 4.8
Reported-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic/vgic-its.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 1f761a9991e7d..cb2d0a2dbe5af 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -421,6 +421,7 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu)
 	u32 *intids;
 	int nr_irqs, i;
 	unsigned long flags;
+	u8 pendmask;
 
 	nr_irqs = vgic_copy_lpi_list(vcpu, &intids);
 	if (nr_irqs < 0)
@@ -428,7 +429,6 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu)
 
 	for (i = 0; i < nr_irqs; i++) {
 		int byte_offset, bit_nr;
-		u8 pendmask;
 
 		byte_offset = intids[i] / BITS_PER_BYTE;
 		bit_nr = intids[i] % BITS_PER_BYTE;

From 686f294f2f1ae40705283dd413ca1e4c14f20f93 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 16 Nov 2017 17:58:18 +0000
Subject: [PATCH 06/28] KVM: arm/arm64: vgic-its: Check result of allocation
 before use

We miss a test against NULL after allocation.

Fixes: 6d03a68f8054 ("KVM: arm64: vgic-its: Turn device_id validation into generic ID validation")
Cc: stable@vger.kernel.org # 4.8
Reported-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic/vgic-its.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index cb2d0a2dbe5af..8e633bd9cc1e7 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -821,6 +821,8 @@ static int vgic_its_alloc_collection(struct vgic_its *its,
 		return E_ITS_MAPC_COLLECTION_OOR;
 
 	collection = kzalloc(sizeof(*collection), GFP_KERNEL);
+	if (!collection)
+		return -ENOMEM;
 
 	collection->collection_id = coll_id;
 	collection->target_addr = COLLECTION_NOT_MAPPED;

From a05d1c0d03fd60ed487991e73850421e735c0135 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 16 Nov 2017 17:58:19 +0000
Subject: [PATCH 07/28] KVM: arm/arm64: vgic-v4: Only perform an unmap for
 valid vLPIs

Before performing an unmap, let's check that what we have was
really mapped the first place.

Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic/vgic-v4.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/arm/vgic/vgic-v4.c b/virt/kvm/arm/vgic/vgic-v4.c
index 53c324aa44efa..4a37292855bc7 100644
--- a/virt/kvm/arm/vgic/vgic-v4.c
+++ b/virt/kvm/arm/vgic/vgic-v4.c
@@ -337,8 +337,10 @@ int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int virq,
 		goto out;
 
 	WARN_ON(!(irq->hw && irq->host_irq == virq));
-	irq->hw = false;
-	ret = its_unmap_vlpi(virq);
+	if (irq->hw) {
+		irq->hw = false;
+		ret = its_unmap_vlpi(virq);
+	}
 
 out:
 	mutex_unlock(&its->its_lock);

From 26aa7b3b1c0fb3f1a6176a0c1847204ef4355693 Mon Sep 17 00:00:00 2001
From: Kristina Martsenko <kristina.martsenko@arm.com>
Date: Thu, 16 Nov 2017 17:58:20 +0000
Subject: [PATCH 08/28] arm64: KVM: fix VTTBR_BADDR_MASK BUG_ON off-by-one

VTTBR_BADDR_MASK is used to sanity check the size and alignment of the
VTTBR address. It seems to currently be off by one, thereby only
allowing up to 47-bit addresses (instead of 48-bit) and also
insufficiently checking the alignment. This patch fixes it.

As an example, with 4k pages, before this patch we have:

  PHYS_MASK_SHIFT = 48
  VTTBR_X = 37 - 24 = 13
  VTTBR_BADDR_SHIFT = 13 - 1 = 12
  VTTBR_BADDR_MASK = ((1 << 35) - 1) << 12 = 0x00007ffffffff000

Which is wrong, because the mask doesn't allow bit 47 of the VTTBR
address to be set, and only requires the address to be 12-bit (4k)
aligned, while it actually needs to be 13-bit (8k) aligned because we
concatenate two 4k tables.

With this patch, the mask becomes 0x0000ffffffffe000, which is what we
want.

Fixes: 0369f6a34b9f ("arm64: KVM: EL2 register definitions")
Cc: <stable@vger.kernel.org> # 3.11.x
Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Kristina Martsenko <kristina.martsenko@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_arm.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 7f069ff37f06c..715d395ef45bb 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -170,8 +170,7 @@
 #define VTCR_EL2_FLAGS			(VTCR_EL2_COMMON_BITS | VTCR_EL2_TGRAN_FLAGS)
 #define VTTBR_X				(VTTBR_X_TGRAN_MAGIC - VTCR_EL2_T0SZ_IPA)
 
-#define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
-#define VTTBR_BADDR_MASK  (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
+#define VTTBR_BADDR_MASK  (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_X)
 #define VTTBR_VMID_SHIFT  (UL(48))
 #define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT)
 

From 5553b142be11e794ebc0805950b2e8313f93d718 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 16 Nov 2017 17:58:21 +0000
Subject: [PATCH 09/28] arm: KVM: Fix VTTBR_BADDR_MASK BUG_ON off-by-one

VTTBR_BADDR_MASK is used to sanity check the size and alignment of the
VTTBR address. It seems to currently be off by one, thereby only
allowing up to 39-bit addresses (instead of 40-bit) and also
insufficiently checking the alignment. This patch fixes it.

This patch is the 32bit pendent of Kristina's arm64 fix, and
she deserves the actual kudos for pinpointing that one.

Fixes: f7ed45be3ba52 ("KVM: ARM: World-switch implementation")
Cc: <stable@vger.kernel.org> # 3.9
Reported-by: Kristina Martsenko <kristina.martsenko@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_arm.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index c8781450905be..3ab8b3781bfec 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -161,8 +161,7 @@
 #else
 #define VTTBR_X		(5 - KVM_T0SZ)
 #endif
-#define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
-#define VTTBR_BADDR_MASK  (((_AC(1, ULL) << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
+#define VTTBR_BADDR_MASK  (((_AC(1, ULL) << (40 - VTTBR_X)) - 1) << VTTBR_X)
 #define VTTBR_VMID_SHIFT  _AC(48, ULL)
 #define VTTBR_VMID_MASK(size)	(_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT)
 

From 696673d192f52c2c5a702224ee21f005318a844b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alex=20Benn=C3=A9e?= <alex.bennee@linaro.org>
Date: Thu, 16 Nov 2017 15:39:19 +0000
Subject: [PATCH 10/28] KVM: arm/arm64: debug: Introduce helper for single-step
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

After emulating instructions we may want return to user-space to handle
single-step debugging. Introduce a helper function, which, if
single-step is enabled, sets the run structure for return and returns
true.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Julien Thierry <julien.thierry@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_host.h   |  5 +++++
 arch/arm64/include/asm/kvm_host.h |  1 +
 arch/arm64/kvm/debug.c            | 21 +++++++++++++++++++++
 3 files changed, 27 insertions(+)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 242151ea69087..a9f7d3f47134a 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -285,6 +285,11 @@ static inline void kvm_arm_init_debug(void) {}
 static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) {}
+static inline bool kvm_arm_handle_step_debug(struct kvm_vcpu *vcpu,
+					     struct kvm_run *run)
+{
+	return false;
+}
 
 int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
 			       struct kvm_device_attr *attr);
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 674912d7a5719..ea6cb5b24258b 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -370,6 +370,7 @@ void kvm_arm_init_debug(void);
 void kvm_arm_setup_debug(struct kvm_vcpu *vcpu);
 void kvm_arm_clear_debug(struct kvm_vcpu *vcpu);
 void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu);
+bool kvm_arm_handle_step_debug(struct kvm_vcpu *vcpu, struct kvm_run *run);
 int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
 			       struct kvm_device_attr *attr);
 int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
index dbadfaf850a7d..fa63b28c65e08 100644
--- a/arch/arm64/kvm/debug.c
+++ b/arch/arm64/kvm/debug.c
@@ -221,3 +221,24 @@ void kvm_arm_clear_debug(struct kvm_vcpu *vcpu)
 		}
 	}
 }
+
+
+/*
+ * After successfully emulating an instruction, we might want to
+ * return to user space with a KVM_EXIT_DEBUG. We can only do this
+ * once the emulation is complete, though, so for userspace emulations
+ * we have to wait until we have re-entered KVM before calling this
+ * helper.
+ *
+ * Return true (and set exit_reason) to return to userspace or false
+ * if no further action is required.
+ */
+bool kvm_arm_handle_step_debug(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
+		run->exit_reason = KVM_EXIT_DEBUG;
+		run->debug.arch.hsr = ESR_ELx_EC_SOFTSTP_LOW << ESR_ELx_EC_SHIFT;
+		return true;
+	}
+	return false;
+}

From 7226bc2e126dfb6d81e85a75f373ea4cc3619406 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alex=20Benn=C3=A9e?= <alex.bennee@linaro.org>
Date: Thu, 16 Nov 2017 15:39:20 +0000
Subject: [PATCH 11/28] kvm: arm64: handle single-stepping trapped instructions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If we are using guest debug to single-step the guest, we need to ensure
that we exit after emulating the instruction. This only affects
instructions completely emulated by the kernel. For instructions
emulated in userspace, we need to exit and return to complete the
emulation.

The kvm_arm_handle_step_debug() helper sets up the necessary exit
state if needed.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Julien Thierry <julien.thierry@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/handle_exit.c | 49 +++++++++++++++++++++++++-----------
 1 file changed, 35 insertions(+), 14 deletions(-)

diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index b712479954692..029c28dd25e96 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -186,6 +186,40 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
 	return arm_exit_handlers[hsr_ec];
 }
 
+/*
+ * We may be single-stepping an emulated instruction. If the emulation
+ * has been completed in the kernel, we can return to userspace with a
+ * KVM_EXIT_DEBUG, otherwise userspace needs to complete its
+ * emulation first.
+ */
+static int handle_trap_exceptions(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	int handled;
+
+	/*
+	 * See ARM ARM B1.14.1: "Hyp traps on instructions
+	 * that fail their condition code check"
+	 */
+	if (!kvm_condition_valid(vcpu)) {
+		kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+		handled = 1;
+	} else {
+		exit_handle_fn exit_handler;
+
+		exit_handler = kvm_get_exit_handler(vcpu);
+		handled = exit_handler(vcpu, run);
+	}
+
+	/*
+	 * kvm_arm_handle_step_debug() sets the exit_reason on the kvm_run
+	 * structure if we need to return to userspace.
+	 */
+	if (handled > 0 && kvm_arm_handle_step_debug(vcpu, run))
+		handled = 0;
+
+	return handled;
+}
+
 /*
  * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on
  * proper exit to userspace.
@@ -193,8 +227,6 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
 int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
 		       int exception_index)
 {
-	exit_handle_fn exit_handler;
-
 	if (ARM_SERROR_PENDING(exception_index)) {
 		u8 hsr_ec = ESR_ELx_EC(kvm_vcpu_get_hsr(vcpu));
 
@@ -222,18 +254,7 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
 		kvm_inject_vabt(vcpu);
 		return 1;
 	case ARM_EXCEPTION_TRAP:
-		/*
-		 * See ARM ARM B1.14.1: "Hyp traps on instructions
-		 * that fail their condition code check"
-		 */
-		if (!kvm_condition_valid(vcpu)) {
-			kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
-			return 1;
-		}
-
-		exit_handler = kvm_get_exit_handler(vcpu);
-
-		return exit_handler(vcpu, run);
+		return handle_trap_exceptions(vcpu, run);
 	case ARM_EXCEPTION_HYP_GONE:
 		/*
 		 * EL2 has been reset to the hyp-stub. This happens when a guest

From 1eb591288b956bdd75e464e69b6b8207ffa6e5e3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alex=20Benn=C3=A9e?= <alex.bennee@linaro.org>
Date: Thu, 16 Nov 2017 15:39:21 +0000
Subject: [PATCH 12/28] kvm: arm64: handle single-step of userspace mmio
 instructions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The system state of KVM when using userspace emulation is not complete
until we return into KVM_RUN. To handle mmio related updates we wait
until they have been committed and then schedule our KVM_EXIT_DEBUG.

The kvm_arm_handle_step_debug() helper tells us if we need to return
and sets up the exit_reason for us.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/arm.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index a6524ff27de49..322c570d211e9 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -628,6 +628,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		ret = kvm_handle_mmio_return(vcpu, vcpu->run);
 		if (ret)
 			return ret;
+		if (kvm_arm_handle_step_debug(vcpu, vcpu->run))
+			return 0;
+
 	}
 
 	if (run->immediate_exit)

From e70dce73befcf96607bc6e24c2c8f84229d6721e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alex=20Benn=C3=A9e?= <alex.bennee@linaro.org>
Date: Thu, 23 Nov 2017 12:11:33 +0000
Subject: [PATCH 13/28] kvm: arm64: handle single-step during SError exceptions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When an SError arrives during single-step both the SError and debug
exceptions may be pending when the step is completed, and the
architecture doesn't define the ordering of the two.  This means that we
can observe en SError even though we've just completed a step, without
receiving a debug exception.  In that case the DBG_SPSR_SS bit will have
flipped as the instruction executed. After handling the abort in
handle_exit() we test to see if the bit is clear and we were
single-stepping before deciding if we need to exit to user space.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/handle_exit.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 029c28dd25e96..304203fa9e330 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -28,6 +28,7 @@
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_mmu.h>
 #include <asm/kvm_psci.h>
+#include <asm/debug-monitors.h>
 
 #define CREATE_TRACE_POINTS
 #include "trace.h"
@@ -252,7 +253,12 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
 		return 1;
 	case ARM_EXCEPTION_EL1_SERROR:
 		kvm_inject_vabt(vcpu);
-		return 1;
+		/* We may still need to return for single-step */
+		if (!(*vcpu_cpsr(vcpu) & DBG_SPSR_SS)
+			&& kvm_arm_handle_step_debug(vcpu, run))
+			return 0;
+		else
+			return 1;
 	case ARM_EXCEPTION_TRAP:
 		return handle_trap_exceptions(vcpu, run);
 	case ARM_EXCEPTION_HYP_GONE:

From e3feebf81744acd8b581e5eb58a93e8fdcf042a5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alex=20Benn=C3=A9e?= <alex.bennee@linaro.org>
Date: Thu, 23 Nov 2017 12:11:34 +0000
Subject: [PATCH 14/28] kvm: arm64: handle single-step of hyp emulated mmio
 instructions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is a fast-path of MMIO emulation inside hyp mode. The handling
of single-step is broadly the same as kvm_arm_handle_step_debug()
except we just setup ESR/HSR so handle_exit() does the correct thing
as we exit.

For the case of an emulated illegal access causing an SError we will
exit via the ARM_EXCEPTION_EL1_SERROR path in handle_exit(). We behave
as we would during a real SError and clear the DBG_SPSR_SS bit for the
emulated instruction.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/hyp/switch.c | 37 ++++++++++++++++++++++++++++++-------
 1 file changed, 30 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 525c01f488678..f7c651f3a8c0e 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -22,6 +22,7 @@
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_hyp.h>
 #include <asm/fpsimd.h>
+#include <asm/debug-monitors.h>
 
 static bool __hyp_text __fpsimd_enabled_nvhe(void)
 {
@@ -269,7 +270,11 @@ static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu)
 	return true;
 }
 
-static void __hyp_text __skip_instr(struct kvm_vcpu *vcpu)
+/* Skip an instruction which has been emulated. Returns true if
+ * execution can continue or false if we need to exit hyp mode because
+ * single-step was in effect.
+ */
+static bool __hyp_text __skip_instr(struct kvm_vcpu *vcpu)
 {
 	*vcpu_pc(vcpu) = read_sysreg_el2(elr);
 
@@ -282,6 +287,14 @@ static void __hyp_text __skip_instr(struct kvm_vcpu *vcpu)
 	}
 
 	write_sysreg_el2(*vcpu_pc(vcpu), elr);
+
+	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
+		vcpu->arch.fault.esr_el2 =
+			(ESR_ELx_EC_SOFTSTP_LOW << ESR_ELx_EC_SHIFT) | 0x22;
+		return false;
+	} else {
+		return true;
+	}
 }
 
 int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu)
@@ -342,13 +355,21 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu)
 			int ret = __vgic_v2_perform_cpuif_access(vcpu);
 
 			if (ret == 1) {
-				__skip_instr(vcpu);
-				goto again;
+				if (__skip_instr(vcpu))
+					goto again;
+				else
+					exit_code = ARM_EXCEPTION_TRAP;
 			}
 
 			if (ret == -1) {
-				/* Promote an illegal access to an SError */
-				__skip_instr(vcpu);
+				/* Promote an illegal access to an
+				 * SError. If we would be returning
+				 * due to single-step clear the SS
+				 * bit so handle_exit knows what to
+				 * do after dealing with the error.
+				 */
+				if (!__skip_instr(vcpu))
+					*vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS;
 				exit_code = ARM_EXCEPTION_EL1_SERROR;
 			}
 
@@ -363,8 +384,10 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu)
 		int ret = __vgic_v3_perform_cpuif_access(vcpu);
 
 		if (ret == 1) {
-			__skip_instr(vcpu);
-			goto again;
+			if (__skip_instr(vcpu))
+				goto again;
+			else
+				exit_code = ARM_EXCEPTION_TRAP;
 		}
 
 		/* 0 falls through to be handled out of EL2 */

From 22601127c0faa5db70ab88f23af11cb23c8f6cdf Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Wed, 29 Nov 2017 17:05:16 +0100
Subject: [PATCH 15/28] KVM: arm/arm64: Avoid attempting to load timer vgic
 state without a vgic

The timer optimization patches inadvertendly changed the logic to always
load the timer state as if we have a vgic, even if we don't have a vgic.

Fix this by doing the usual irqchip_in_kernel() check and call the
appropriate load function.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/arch_timer.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 190c99ed1b736..f9555b1e7f158 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -835,7 +835,10 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
 no_vgic:
 	preempt_disable();
 	timer->enabled = 1;
-	kvm_timer_vcpu_load_vgic(vcpu);
+	if (!irqchip_in_kernel(vcpu->kvm))
+		kvm_timer_vcpu_load_user(vcpu);
+	else
+		kvm_timer_vcpu_load_vgic(vcpu);
 	preempt_enable();
 
 	return 0;

From 58d0d19a204604ca0da26058828a53558b265da3 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 28 Nov 2017 15:18:19 +0000
Subject: [PATCH 16/28] kvm: arm: don't treat unavailable HYP mode as an error

Since it is perfectly legal to run the kernel at EL1, it is not
actually an error if HYP mode is not available when attempting to
initialize KVM, given that KVM support cannot be built as a module.
So demote the kvm_err() to kvm_info(), which prevents the error from
appearing on an otherwise 'quiet' console.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/arm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 322c570d211e9..ca65d06b38a8e 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -1507,7 +1507,7 @@ int kvm_arch_init(void *opaque)
 	bool in_hyp_mode;
 
 	if (!is_hyp_mode_available()) {
-		kvm_err("HYP mode not available\n");
+		kvm_info("HYP mode not available\n");
 		return -ENODEV;
 	}
 

From 7465894e90e5a47e0e52aa5f1f708653fc40020f Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 30 Nov 2017 17:00:30 +0000
Subject: [PATCH 17/28] KVM: arm/arm64: Fix spinlock acquisition in
 vgic_set_owner

vgic_set_owner acquires the irq lock without disabling interrupts,
resulting in a lockdep splat (an interrupt could fire and result
in the same lock being taken if the same virtual irq is to be
injected).

In practice, it is almost impossible to trigger this bug, but
better safe than sorry. Convert the lock acquisition to a
spin_lock_irqsave() and keep lockdep happy.

Reported-by: James Morse <james.morse@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic/vgic.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
index 786cce7bd2ec6..ecb8e25f5fe56 100644
--- a/virt/kvm/arm/vgic/vgic.c
+++ b/virt/kvm/arm/vgic/vgic.c
@@ -492,6 +492,7 @@ int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid)
 int kvm_vgic_set_owner(struct kvm_vcpu *vcpu, unsigned int intid, void *owner)
 {
 	struct vgic_irq *irq;
+	unsigned long flags;
 	int ret = 0;
 
 	if (!vgic_initialized(vcpu->kvm))
@@ -502,12 +503,12 @@ int kvm_vgic_set_owner(struct kvm_vcpu *vcpu, unsigned int intid, void *owner)
 		return -EINVAL;
 
 	irq = vgic_get_irq(vcpu->kvm, vcpu, intid);
-	spin_lock(&irq->irq_lock);
+	spin_lock_irqsave(&irq->irq_lock, flags);
 	if (irq->owner && irq->owner != owner)
 		ret = -EEXIST;
 	else
 		irq->owner = owner;
-	spin_unlock(&irq->irq_lock);
+	spin_unlock_irqrestore(&irq->irq_lock, flags);
 
 	return ret;
 }

From 6b2ad81bcfedaf36ceb8e6e71a58ad4ebd716313 Mon Sep 17 00:00:00 2001
From: Andrew Jones <drjones@redhat.com>
Date: Mon, 27 Nov 2017 19:17:18 +0100
Subject: [PATCH 18/28] KVM: arm/arm64: kvm_arch_destroy_vm cleanups

kvm_vgic_vcpu_destroy already gets called from kvm_vgic_destroy for
each vcpu, so we don't have to call it from kvm_arch_vcpu_free.

Additionally the other architectures set kvm->online_vcpus to zero
after freeing them. We might as well do that for ARM too.

Signed-off-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/arm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index ca65d06b38a8e..675844c2174a2 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -188,6 +188,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 			kvm->vcpus[i] = NULL;
 		}
 	}
+	atomic_set(&kvm->online_vcpus, 0);
 }
 
 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
@@ -296,7 +297,6 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 {
 	kvm_mmu_free_memory_caches(vcpu);
 	kvm_timer_vcpu_terminate(vcpu);
-	kvm_vgic_vcpu_destroy(vcpu);
 	kvm_pmu_vcpu_destroy(vcpu);
 	kvm_vcpu_uninit(vcpu);
 	kmem_cache_free(kvm_vcpu_cache, vcpu);

From fc396e066318c0a02208c1d3f0b62950a7714999 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Sun, 3 Dec 2017 23:54:41 +0100
Subject: [PATCH 19/28] KVM: arm/arm64: Fix broken GICH_ELRSR big endian
 conversion

We are incorrectly rearranging 32-bit words inside a 64-bit typed value
for big endian systems, which would result in never marking a virtual
interrupt as inactive on big endian systems (assuming 32 or fewer LRs on
the hardware).  Fix this by not doing any word order manipulation for
the typed values.

Cc: <stable@vger.kernel.org>
Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/hyp/vgic-v2-sr.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/virt/kvm/arm/hyp/vgic-v2-sr.c b/virt/kvm/arm/hyp/vgic-v2-sr.c
index a3f18d3623661..d7fd46fe9efb3 100644
--- a/virt/kvm/arm/hyp/vgic-v2-sr.c
+++ b/virt/kvm/arm/hyp/vgic-v2-sr.c
@@ -34,11 +34,7 @@ static void __hyp_text save_elrsr(struct kvm_vcpu *vcpu, void __iomem *base)
 	else
 		elrsr1 = 0;
 
-#ifdef CONFIG_CPU_BIG_ENDIAN
-	cpu_if->vgic_elrsr = ((u64)elrsr0 << 32) | elrsr1;
-#else
 	cpu_if->vgic_elrsr = ((u64)elrsr1 << 32) | elrsr0;
-#endif
 }
 
 static void __hyp_text save_lrs(struct kvm_vcpu *vcpu, void __iomem *base)

From f775b13eedee2f7f3c6fdd4e90fb79090ce5d339 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Tue, 14 Nov 2017 16:54:23 -0500
Subject: [PATCH 20/28] x86,kvm: move qemu/guest FPU switching out to vcpu_run
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently, every time a VCPU is scheduled out, the host kernel will
first save the guest FPU/xstate context, then load the qemu userspace
FPU context, only to then immediately save the qemu userspace FPU
context back to memory. When scheduling in a VCPU, the same extraneous
FPU loads and saves are done.

This could be avoided by moving from a model where the guest FPU is
loaded and stored with preemption disabled, to a model where the
qemu userspace FPU is swapped out for the guest FPU context for
the duration of the KVM_RUN ioctl.

This is done under the VCPU mutex, which is also taken when other
tasks inspect the VCPU FPU context, so the code should already be
safe for this change. That should come as no surprise, given that
s390 already has this optimization.

This can fix a bug where KVM calls get_user_pages while owning the
FPU, and the file system ends up requesting the FPU again:

    [258270.527947]  __warn+0xcb/0xf0
    [258270.527948]  warn_slowpath_null+0x1d/0x20
    [258270.527951]  kernel_fpu_disable+0x3f/0x50
    [258270.527953]  __kernel_fpu_begin+0x49/0x100
    [258270.527955]  kernel_fpu_begin+0xe/0x10
    [258270.527958]  crc32c_pcl_intel_update+0x84/0xb0
    [258270.527961]  crypto_shash_update+0x3f/0x110
    [258270.527968]  crc32c+0x63/0x8a [libcrc32c]
    [258270.527975]  dm_bm_checksum+0x1b/0x20 [dm_persistent_data]
    [258270.527978]  node_prepare_for_write+0x44/0x70 [dm_persistent_data]
    [258270.527985]  dm_block_manager_write_callback+0x41/0x50 [dm_persistent_data]
    [258270.527988]  submit_io+0x170/0x1b0 [dm_bufio]
    [258270.527992]  __write_dirty_buffer+0x89/0x90 [dm_bufio]
    [258270.527994]  __make_buffer_clean+0x4f/0x80 [dm_bufio]
    [258270.527996]  __try_evict_buffer+0x42/0x60 [dm_bufio]
    [258270.527998]  dm_bufio_shrink_scan+0xc0/0x130 [dm_bufio]
    [258270.528002]  shrink_slab.part.40+0x1f5/0x420
    [258270.528004]  shrink_node+0x22c/0x320
    [258270.528006]  do_try_to_free_pages+0xf5/0x330
    [258270.528008]  try_to_free_pages+0xe9/0x190
    [258270.528009]  __alloc_pages_slowpath+0x40f/0xba0
    [258270.528011]  __alloc_pages_nodemask+0x209/0x260
    [258270.528014]  alloc_pages_vma+0x1f1/0x250
    [258270.528017]  do_huge_pmd_anonymous_page+0x123/0x660
    [258270.528021]  handle_mm_fault+0xfd3/0x1330
    [258270.528025]  __get_user_pages+0x113/0x640
    [258270.528027]  get_user_pages+0x4f/0x60
    [258270.528063]  __gfn_to_pfn_memslot+0x120/0x3f0 [kvm]
    [258270.528108]  try_async_pf+0x66/0x230 [kvm]
    [258270.528135]  tdp_page_fault+0x130/0x280 [kvm]
    [258270.528149]  kvm_mmu_page_fault+0x60/0x120 [kvm]
    [258270.528158]  handle_ept_violation+0x91/0x170 [kvm_intel]
    [258270.528162]  vmx_handle_exit+0x1ca/0x1400 [kvm_intel]

No performance changes were detected in quick ping-pong tests on
my 4 socket system, which is expected since an FPU+xstate load is
on the order of 0.1us, while ping-ponging between CPUs is on the
order of 20us, and somewhat noisy.

Cc: stable@vger.kernel.org
Signed-off-by: Rik van Riel <riel@redhat.com>
Suggested-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
[Fixed a bug where reset_vcpu called put_fpu without preceding load_fpu,
 which happened inside from KVM_CREATE_VCPU ioctl. - Radim]
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 13 +++++++++++
 arch/x86/kvm/x86.c              | 39 ++++++++++++++-------------------
 include/linux/kvm_host.h        |  2 +-
 3 files changed, 31 insertions(+), 23 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 977de5fb968be..62527e053ee45 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -536,7 +536,20 @@ struct kvm_vcpu_arch {
 	struct kvm_mmu_memory_cache mmu_page_cache;
 	struct kvm_mmu_memory_cache mmu_page_header_cache;
 
+	/*
+	 * QEMU userspace and the guest each have their own FPU state.
+	 * In vcpu_run, we switch between the user and guest FPU contexts.
+	 * While running a VCPU, the VCPU thread will have the guest FPU
+	 * context.
+	 *
+	 * Note that while the PKRU state lives inside the fpu registers,
+	 * it is switched out separately at VMENTER and VMEXIT time. The
+	 * "guest_fpu" state here contains the guest FPU context, with the
+	 * host PRKU bits.
+	 */
+	struct fpu user_fpu;
 	struct fpu guest_fpu;
+
 	u64 xcr0;
 	u64 guest_supported_xcr0;
 	u32 guest_xstate_size;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index eee8e7faf1af5..c8da1680a7d64 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2937,7 +2937,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 	srcu_read_unlock(&vcpu->kvm->srcu, idx);
 	pagefault_enable();
 	kvm_x86_ops->vcpu_put(vcpu);
-	kvm_put_guest_fpu(vcpu);
 	vcpu->arch.last_host_tsc = rdtsc();
 }
 
@@ -5254,13 +5253,10 @@ static void emulator_halt(struct x86_emulate_ctxt *ctxt)
 
 static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt)
 {
-	preempt_disable();
-	kvm_load_guest_fpu(emul_to_vcpu(ctxt));
 }
 
 static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt)
 {
-	preempt_enable();
 }
 
 static int emulator_intercept(struct x86_emulate_ctxt *ctxt,
@@ -6952,7 +6948,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	preempt_disable();
 
 	kvm_x86_ops->prepare_guest_switch(vcpu);
-	kvm_load_guest_fpu(vcpu);
 
 	/*
 	 * Disable IRQs before setting IN_GUEST_MODE.  Posted interrupt
@@ -7297,12 +7292,14 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		}
 	}
 
+	kvm_load_guest_fpu(vcpu);
+
 	if (unlikely(vcpu->arch.complete_userspace_io)) {
 		int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io;
 		vcpu->arch.complete_userspace_io = NULL;
 		r = cui(vcpu);
 		if (r <= 0)
-			goto out;
+			goto out_fpu;
 	} else
 		WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
 
@@ -7311,6 +7308,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	else
 		r = vcpu_run(vcpu);
 
+out_fpu:
+	kvm_put_guest_fpu(vcpu);
 out:
 	post_kvm_run_save(vcpu);
 	kvm_sigset_deactivate(vcpu);
@@ -7704,32 +7703,25 @@ static void fx_init(struct kvm_vcpu *vcpu)
 	vcpu->arch.cr0 |= X86_CR0_ET;
 }
 
+/* Swap (qemu) user FPU context for the guest FPU context. */
 void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
 {
-	if (vcpu->guest_fpu_loaded)
-		return;
-
-	/*
-	 * Restore all possible states in the guest,
-	 * and assume host would use all available bits.
-	 * Guest xcr0 would be loaded later.
-	 */
-	vcpu->guest_fpu_loaded = 1;
-	__kernel_fpu_begin();
+	preempt_disable();
+	copy_fpregs_to_fpstate(&vcpu->arch.user_fpu);
 	/* PKRU is separately restored in kvm_x86_ops->run.  */
 	__copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state,
 				~XFEATURE_MASK_PKRU);
+	preempt_enable();
 	trace_kvm_fpu(1);
 }
 
+/* When vcpu_run ends, restore user space FPU context. */
 void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
 {
-	if (!vcpu->guest_fpu_loaded)
-		return;
-
-	vcpu->guest_fpu_loaded = 0;
+	preempt_disable();
 	copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu);
-	__kernel_fpu_end();
+	copy_kernel_to_fpregs(&vcpu->arch.user_fpu.state);
+	preempt_enable();
 	++vcpu->stat.fpu_reload;
 	trace_kvm_fpu(0);
 }
@@ -7846,7 +7838,8 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 		 * To avoid have the INIT path from kvm_apic_has_events() that be
 		 * called with loaded FPU and does not let userspace fix the state.
 		 */
-		kvm_put_guest_fpu(vcpu);
+		if (init_event)
+			kvm_put_guest_fpu(vcpu);
 		mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu.state.xsave,
 					XFEATURE_MASK_BNDREGS);
 		if (mpx_state_buffer)
@@ -7855,6 +7848,8 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 					XFEATURE_MASK_BNDCSR);
 		if (mpx_state_buffer)
 			memset(mpx_state_buffer, 0, sizeof(struct mpx_bndcsr));
+		if (init_event)
+			kvm_load_guest_fpu(vcpu);
 	}
 
 	if (!init_event) {
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 893d6d606cd0a..6bdd4b9f66115 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -232,7 +232,7 @@ struct kvm_vcpu {
 	struct mutex mutex;
 	struct kvm_run *run;
 
-	int guest_fpu_loaded, guest_xcr0_loaded;
+	int guest_xcr0_loaded;
 	struct swait_queue_head wq;
 	struct pid __rcu *pid;
 	int sigset_active;

From 6ab0b9feb82a7ac09956e6761fec73cd47789df5 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Tue, 14 Nov 2017 16:54:24 -0500
Subject: [PATCH 21/28] x86,kvm: remove KVM emulator get_fpu / put_fpu

Now that get_fpu and put_fpu do nothing, because the scheduler will
automatically load and restore the guest FPU context for us while we
are in this code (deep inside the vcpu_run main loop), we can get rid
of the get_fpu and put_fpu hooks.

Signed-off-by: Rik van Riel <riel@redhat.com>
Suggested-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_emulate.h |  2 --
 arch/x86/kvm/emulate.c             | 24 ------------------------
 arch/x86/kvm/x86.c                 | 10 ----------
 3 files changed, 36 deletions(-)

diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 034caa1a084e3..b24b1c8b39798 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -214,8 +214,6 @@ struct x86_emulate_ops {
 	void (*halt)(struct x86_emulate_ctxt *ctxt);
 	void (*wbinvd)(struct x86_emulate_ctxt *ctxt);
 	int (*fix_hypercall)(struct x86_emulate_ctxt *ctxt);
-	void (*get_fpu)(struct x86_emulate_ctxt *ctxt); /* disables preempt */
-	void (*put_fpu)(struct x86_emulate_ctxt *ctxt); /* reenables preempt */
 	int (*intercept)(struct x86_emulate_ctxt *ctxt,
 			 struct x86_instruction_info *info,
 			 enum x86_intercept_stage stage);
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index e7d04d0c8008d..abe74f779f9d7 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -1046,7 +1046,6 @@ static void fetch_register_operand(struct operand *op)
 
 static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg)
 {
-	ctxt->ops->get_fpu(ctxt);
 	switch (reg) {
 	case 0: asm("movdqa %%xmm0, %0" : "=m"(*data)); break;
 	case 1: asm("movdqa %%xmm1, %0" : "=m"(*data)); break;
@@ -1068,13 +1067,11 @@ static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg)
 #endif
 	default: BUG();
 	}
-	ctxt->ops->put_fpu(ctxt);
 }
 
 static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data,
 			  int reg)
 {
-	ctxt->ops->get_fpu(ctxt);
 	switch (reg) {
 	case 0: asm("movdqa %0, %%xmm0" : : "m"(*data)); break;
 	case 1: asm("movdqa %0, %%xmm1" : : "m"(*data)); break;
@@ -1096,12 +1093,10 @@ static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data,
 #endif
 	default: BUG();
 	}
-	ctxt->ops->put_fpu(ctxt);
 }
 
 static void read_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
 {
-	ctxt->ops->get_fpu(ctxt);
 	switch (reg) {
 	case 0: asm("movq %%mm0, %0" : "=m"(*data)); break;
 	case 1: asm("movq %%mm1, %0" : "=m"(*data)); break;
@@ -1113,12 +1108,10 @@ static void read_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
 	case 7: asm("movq %%mm7, %0" : "=m"(*data)); break;
 	default: BUG();
 	}
-	ctxt->ops->put_fpu(ctxt);
 }
 
 static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
 {
-	ctxt->ops->get_fpu(ctxt);
 	switch (reg) {
 	case 0: asm("movq %0, %%mm0" : : "m"(*data)); break;
 	case 1: asm("movq %0, %%mm1" : : "m"(*data)); break;
@@ -1130,7 +1123,6 @@ static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
 	case 7: asm("movq %0, %%mm7" : : "m"(*data)); break;
 	default: BUG();
 	}
-	ctxt->ops->put_fpu(ctxt);
 }
 
 static int em_fninit(struct x86_emulate_ctxt *ctxt)
@@ -1138,9 +1130,7 @@ static int em_fninit(struct x86_emulate_ctxt *ctxt)
 	if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
 		return emulate_nm(ctxt);
 
-	ctxt->ops->get_fpu(ctxt);
 	asm volatile("fninit");
-	ctxt->ops->put_fpu(ctxt);
 	return X86EMUL_CONTINUE;
 }
 
@@ -1151,9 +1141,7 @@ static int em_fnstcw(struct x86_emulate_ctxt *ctxt)
 	if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
 		return emulate_nm(ctxt);
 
-	ctxt->ops->get_fpu(ctxt);
 	asm volatile("fnstcw %0": "+m"(fcw));
-	ctxt->ops->put_fpu(ctxt);
 
 	ctxt->dst.val = fcw;
 
@@ -1167,9 +1155,7 @@ static int em_fnstsw(struct x86_emulate_ctxt *ctxt)
 	if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
 		return emulate_nm(ctxt);
 
-	ctxt->ops->get_fpu(ctxt);
 	asm volatile("fnstsw %0": "+m"(fsw));
-	ctxt->ops->put_fpu(ctxt);
 
 	ctxt->dst.val = fsw;
 
@@ -4001,12 +3987,8 @@ static int em_fxsave(struct x86_emulate_ctxt *ctxt)
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 
-	ctxt->ops->get_fpu(ctxt);
-
 	rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state));
 
-	ctxt->ops->put_fpu(ctxt);
-
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 
@@ -4049,8 +4031,6 @@ static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 
-	ctxt->ops->get_fpu(ctxt);
-
 	if (size < __fxstate_size(16)) {
 		rc = fxregs_fixup(&fx_state, size);
 		if (rc != X86EMUL_CONTINUE)
@@ -4066,8 +4046,6 @@ static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
 		rc = asm_safe("fxrstor %[fx]", : [fx] "m"(fx_state));
 
 out:
-	ctxt->ops->put_fpu(ctxt);
-
 	return rc;
 }
 
@@ -5317,9 +5295,7 @@ static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt)
 {
 	int rc;
 
-	ctxt->ops->get_fpu(ctxt);
 	rc = asm_safe("fwait");
-	ctxt->ops->put_fpu(ctxt);
 
 	if (unlikely(rc != X86EMUL_CONTINUE))
 		return emulate_exception(ctxt, MF_VECTOR, 0, false);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c8da1680a7d64..6ca747abfa2f3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5251,14 +5251,6 @@ static void emulator_halt(struct x86_emulate_ctxt *ctxt)
 	emul_to_vcpu(ctxt)->arch.halt_request = 1;
 }
 
-static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt)
-{
-}
-
-static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt)
-{
-}
-
 static int emulator_intercept(struct x86_emulate_ctxt *ctxt,
 			      struct x86_instruction_info *info,
 			      enum x86_intercept_stage stage)
@@ -5336,8 +5328,6 @@ static const struct x86_emulate_ops emulate_ops = {
 	.halt                = emulator_halt,
 	.wbinvd              = emulator_wbinvd,
 	.fix_hypercall       = emulator_fix_hypercall,
-	.get_fpu             = emulator_get_fpu,
-	.put_fpu             = emulator_put_fpu,
 	.intercept           = emulator_intercept,
 	.get_cpuid           = emulator_get_cpuid,
 	.set_nmi_mask        = emulator_set_nmi_mask,

From d59d51f088014f25c2562de59b9abff4f42a7468 Mon Sep 17 00:00:00 2001
From: Andrew Honig <ahonig@google.com>
Date: Fri, 1 Dec 2017 10:21:09 -0800
Subject: [PATCH 22/28] KVM: VMX: remove I/O port 0x80 bypass on Intel hosts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This fixes CVE-2017-1000407.

KVM allows guests to directly access I/O port 0x80 on Intel hosts.  If
the guest floods this port with writes it generates exceptions and
instability in the host kernel, leading to a crash.  With this change
guest writes to port 0x80 on Intel will behave the same as they
currently behave on AMD systems.

Prevent the flooding by removing the code that sets port 0x80 as a
passthrough port.  This is essentially the same as upstream patch
99f85a28a78e96d28907fe036e1671a218fee597, except that patch was
for AMD chipsets and this patch is for Intel.

Signed-off-by: Andrew Honig <ahonig@google.com>
Signed-off-by: Jim Mattson <jmattson@google.com>
Fixes: fdef3ad1b386 ("KVM: VMX: Enable io bitmaps to avoid IO port 0x80 VMEXITs")
Cc: <stable@vger.kernel.org>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/vmx.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 4704aaf6d19e2..2fd9a8cec29a2 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6755,12 +6755,7 @@ static __init int hardware_setup(void)
 	memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
 	memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
 
-	/*
-	 * Allow direct access to the PC debug port (it is often used for I/O
-	 * delays, but the vmexits simply slow things down).
-	 */
 	memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE);
-	clear_bit(0x80, vmx_io_bitmap_a);
 
 	memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
 

From 2895db67b01cb875457c9c3f30a14723b6b5dfd5 Mon Sep 17 00:00:00 2001
From: Jim Mattson <jmattson@google.com>
Date: Fri, 1 Dec 2017 09:57:56 -0800
Subject: [PATCH 23/28] KVM: VMX: fix page leak in hardware_setup()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

vmx_io_bitmap_b should not be allocated twice.

Fixes: 23611332938d ("KVM: VMX: refactor setup of global page-sized bitmaps")
Signed-off-by: Jim Mattson <jmattson@google.com>
Reviewed-by: Krish Sadhukhan <krish.sadhukhan@oracle.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/vmx.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 2fd9a8cec29a2..8eba631c4dbd5 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6751,7 +6751,6 @@ static __init int hardware_setup(void)
 			goto out;
 	}
 
-	vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL);
 	memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
 	memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
 

From d809aa238744ae5b7520b73ac5411862ccfdc1bc Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 24 Nov 2017 15:00:33 +0100
Subject: [PATCH 24/28] KVM: s390: add SPDX identifiers to the remaining files

It's good to have SPDX identifiers in all files to make it easier to
audit the kernel tree for correct licenses.

Update the arch/s390/kvm/ files with the correct SPDX license
identifier based on the license text in the file itself.  The SPDX
identifier is a legally binding shorthand, which can be used instead of
the full boiler plate text.

This work is based on a script and data from Thomas Gleixner, Philippe
Ombredanne, and Kate Stewart.

Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Cornelia Huck <cohuck@redhat.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Kate Stewart <kstewart@linuxfoundation.org>
Cc: Philippe Ombredanne <pombredanne@nexb.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Message-Id: <20171124140043.10062-3-gregkh@linuxfoundation.org>
Acked-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/Makefile    | 1 +
 arch/s390/kvm/diag.c      | 1 +
 arch/s390/kvm/gaccess.h   | 1 +
 arch/s390/kvm/guestdbg.c  | 1 +
 arch/s390/kvm/intercept.c | 1 +
 arch/s390/kvm/interrupt.c | 1 +
 arch/s390/kvm/irq.h       | 1 +
 arch/s390/kvm/kvm-s390.c  | 1 +
 arch/s390/kvm/kvm-s390.h  | 1 +
 arch/s390/kvm/priv.c      | 1 +
 arch/s390/kvm/sigp.c      | 1 +
 arch/s390/kvm/vsie.c      | 1 +
 12 files changed, 12 insertions(+)

diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index 6048b1c6e5806..2c5a8190d5cef 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 # Makefile for kernel virtual machines on s390
 #
 # Copyright IBM Corp. 2008
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index d93a2c0474bf6..481e1fe6095f3 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * handling diagnose instructions
  *
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
index bec42b852246f..443b7fa82d568 100644
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * access guest memory
  *
diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c
index bcbd86621d018..d6a1bead813cf 100644
--- a/arch/s390/kvm/guestdbg.c
+++ b/arch/s390/kvm/guestdbg.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * kvm guest debug support
  *
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 8fe034beb6232..fc76a91a78209 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * in-kernel handling for sie intercepts
  *
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index fa557372d600a..f37a34ea16011 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * handling kvm guest interrupts
  *
diff --git a/arch/s390/kvm/irq.h b/arch/s390/kvm/irq.h
index d98e4159643df..54a4a59cb4f9c 100644
--- a/arch/s390/kvm/irq.h
+++ b/arch/s390/kvm/irq.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * s390 irqchip routines
  *
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 98ad8b9e03609..0d45e32dd868f 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * hosting zSeries kernel virtual machines
  *
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 10d65dfbc3067..9777924c4b5ca 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * definition for kvm on s390
  *
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index c954ac49eee47..c9d962ac140b2 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * handling privileged instructions
  *
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index 9d592ef4104b0..d12ac5d6e8bb7 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * handling interprocessor communication
  *
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index a311938b63b3b..d2b7886da4440 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * kvm nested virtualization support for s390x
  *

From 940f89a5a37789b94f332755767c556a64b004e4 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 24 Nov 2017 15:00:39 +0100
Subject: [PATCH 25/28] KVM: s390: Remove redundant license text

Now that the SPDX tag is in all arch/s390/kvm/ files, that identifies
the license in a specific and legally-defined manner.  So the extra GPL
text wording can be removed as it is no longer needed at all.

This is done on a quest to remove the 700+ different ways that files in
the kernel describe the GPL license text.  And there's unneeded stuff
like the address (sometimes incorrect) for the FSF which is never
needed.

No copyright headers or other non-license-description text was removed.

Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Cornelia Huck <cohuck@redhat.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Message-Id: <20171124140043.10062-9-gregkh@linuxfoundation.org>
Acked-by: Cornelia Huck <cohuck@redhat.com>
Acked-by: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/Makefile    | 4 ----
 arch/s390/kvm/diag.c      | 4 ----
 arch/s390/kvm/gaccess.h   | 4 ----
 arch/s390/kvm/guestdbg.c  | 4 ----
 arch/s390/kvm/intercept.c | 4 ----
 arch/s390/kvm/interrupt.c | 4 ----
 arch/s390/kvm/irq.h       | 4 ----
 arch/s390/kvm/kvm-s390.c  | 4 ----
 arch/s390/kvm/kvm-s390.h  | 4 ----
 arch/s390/kvm/priv.c      | 4 ----
 arch/s390/kvm/sigp.c      | 4 ----
 arch/s390/kvm/vsie.c      | 4 ----
 12 files changed, 48 deletions(-)

diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index 2c5a8190d5cef..05ee90a5ea08b 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -2,10 +2,6 @@
 # Makefile for kernel virtual machines on s390
 #
 # Copyright IBM Corp. 2008
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License (version 2 only)
-# as published by the Free Software Foundation.
 
 KVM := ../../../virt/kvm
 common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o  $(KVM)/async_pf.o $(KVM)/irqchip.o $(KVM)/vfio.o
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 481e1fe6095f3..89aa114a2cbad 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -4,10 +4,6 @@
  *
  * Copyright IBM Corp. 2008, 2011
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  *               Christian Borntraeger <borntraeger@de.ibm.com>
  */
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
index 443b7fa82d568..f4c51756c4623 100644
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@@ -4,10 +4,6 @@
  *
  * Copyright IBM Corp. 2008, 2014
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  */
 
diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c
index d6a1bead813cf..b5f3e82006d0b 100644
--- a/arch/s390/kvm/guestdbg.c
+++ b/arch/s390/kvm/guestdbg.c
@@ -4,10 +4,6 @@
  *
  * Copyright IBM Corp. 2014
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
  *    Author(s): David Hildenbrand <dahi@linux.vnet.ibm.com>
  */
 #include <linux/kvm_host.h>
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index fc76a91a78209..9c7d707158622 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -4,10 +4,6 @@
  *
  * Copyright IBM Corp. 2008, 2014
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  *               Christian Borntraeger <borntraeger@de.ibm.com>
  */
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index f37a34ea16011..024ad8bcc5165 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -4,10 +4,6 @@
  *
  * Copyright IBM Corp. 2008, 2015
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  */
 
diff --git a/arch/s390/kvm/irq.h b/arch/s390/kvm/irq.h
index 54a4a59cb4f9c..484608c71dd01 100644
--- a/arch/s390/kvm/irq.h
+++ b/arch/s390/kvm/irq.h
@@ -4,10 +4,6 @@
  *
  * Copyright IBM Corp. 2014
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
  *    Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
  */
 #ifndef __KVM_IRQ_H
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 0d45e32dd868f..34375eed93ee2 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -4,10 +4,6 @@
  *
  * Copyright IBM Corp. 2008, 2009
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  *               Christian Borntraeger <borntraeger@de.ibm.com>
  *               Heiko Carstens <heiko.carstens@de.ibm.com>
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 9777924c4b5ca..5e46ba429bcb4 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -4,10 +4,6 @@
  *
  * Copyright IBM Corp. 2008, 2009
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  *               Christian Borntraeger <borntraeger@de.ibm.com>
  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index c9d962ac140b2..28b69ab56b7b3 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -4,10 +4,6 @@
  *
  * Copyright IBM Corp. 2008, 2013
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  *               Christian Borntraeger <borntraeger@de.ibm.com>
  */
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index d12ac5d6e8bb7..c1f5cde2c878e 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -4,10 +4,6 @@
  *
  * Copyright IBM Corp. 2008, 2013
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  *               Christian Borntraeger <borntraeger@de.ibm.com>
  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index d2b7886da4440..5d6ae0326d9e8 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -4,10 +4,6 @@
  *
  * Copyright IBM Corp. 2016
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
  *    Author(s): David Hildenbrand <dahi@linux.vnet.ibm.com>
  */
 #include <linux/vmalloc.h>

From bb64da9aba89765fee74b395967b18a7d6c364e9 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Tue, 21 Nov 2017 16:02:52 +0100
Subject: [PATCH 26/28] KVM: s390: mark irq_state.flags as non-usable

Old kernels did not check for zero in the irq_state.flags field and old
QEMUs did not zero the flag/reserved fields when calling
KVM_S390_*_IRQ_STATE.  Let's add comments to prevent future uses of
these fields.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 Documentation/virtual/kvm/api.txt | 15 ++++++++++++---
 arch/s390/kvm/kvm-s390.c          |  6 ++++--
 include/uapi/linux/kvm.h          |  4 ++--
 3 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index f670e4b9e7f33..57d3ee9e4bde2 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2901,14 +2901,19 @@ userspace buffer and its length:
 
 struct kvm_s390_irq_state {
 	__u64 buf;
-	__u32 flags;
+	__u32 flags;        /* will stay unused for compatibility reasons */
 	__u32 len;
-	__u32 reserved[4];
+	__u32 reserved[4];  /* will stay unused for compatibility reasons */
 };
 
 Userspace passes in the above struct and for each pending interrupt a
 struct kvm_s390_irq is copied to the provided buffer.
 
+The structure contains a flags and a reserved field for future extensions. As
+the kernel never checked for flags == 0 and QEMU never pre-zeroed flags and
+reserved, these fields can not be used in the future without breaking
+compatibility.
+
 If -ENOBUFS is returned the buffer provided was too small and userspace
 may retry with a bigger buffer.
 
@@ -2932,10 +2937,14 @@ containing a struct kvm_s390_irq_state:
 
 struct kvm_s390_irq_state {
 	__u64 buf;
+	__u32 flags;        /* will stay unused for compatibility reasons */
 	__u32 len;
-	__u32 pad;
+	__u32 reserved[4];  /* will stay unused for compatibility reasons */
 };
 
+The restrictions for flags and reserved apply as well.
+(see KVM_S390_GET_IRQ_STATE)
+
 The userspace memory referenced by buf contains a struct kvm_s390_irq
 for each interrupt to be injected into the guest.
 If one of the interrupts could not be injected for some reason the
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 34375eed93ee2..efa439f6ffb39 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1,8 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- * hosting zSeries kernel virtual machines
+ * hosting IBM Z kernel virtual machines (s390x)
  *
- * Copyright IBM Corp. 2008, 2009
+ * Copyright IBM Corp. 2008, 2017
  *
  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  *               Christian Borntraeger <borntraeger@de.ibm.com>
@@ -3808,6 +3808,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 			r = -EINVAL;
 			break;
 		}
+		/* do not use irq_state.flags, it will break old QEMUs */
 		r = kvm_s390_set_irq_state(vcpu,
 					   (void __user *) irq_state.buf,
 					   irq_state.len);
@@ -3823,6 +3824,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 			r = -EINVAL;
 			break;
 		}
+		/* do not use irq_state.flags, it will break old QEMUs */
 		r = kvm_s390_get_irq_state(vcpu,
 					   (__u8 __user *)  irq_state.buf,
 					   irq_state.len);
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 282d7613fce87..496e59a2738ba 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -630,9 +630,9 @@ struct kvm_s390_irq {
 
 struct kvm_s390_irq_state {
 	__u64 buf;
-	__u32 flags;
+	__u32 flags;        /* will stay unused for compatibility reasons */
 	__u32 len;
-	__u32 reserved[4];
+	__u32 reserved[4];  /* will stay unused for compatibility reasons */
 };
 
 /* for KVM_SET_GUEST_DEBUG */

From ca76ec9ca871e67d8cd0b6caba24aca3d3ac4546 Mon Sep 17 00:00:00 2001
From: Janosch Frank <frankja@linux.vnet.ibm.com>
Date: Mon, 4 Dec 2017 12:19:11 +0100
Subject: [PATCH 27/28] KVM: s390: Fix skey emulation permission check

All skey functions call skey_check_enable at their start, which checks
if we are in the PSTATE and injects a privileged operation exception
if we are.

Unfortunately they continue processing afterwards and perform the
operation anyhow as skey_check_enable does not deliver an error if the
exception injection was successful.

Let's move the PSTATE check into the skey functions and exit them on
such an occasion, also we now do not enable skey handling anymore in
such a case.

Signed-off-by: Janosch Frank <frankja@linux.vnet.ibm.com>
Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Fixes: a7e19ab ("KVM: s390: handle missing storage-key facility")
Cc: <stable@vger.kernel.org> # v4.8+
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/priv.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 28b69ab56b7b3..572496c688cc0 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -232,8 +232,6 @@ static int try_handle_skey(struct kvm_vcpu *vcpu)
 		VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation");
 		return -EAGAIN;
 	}
-	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
-		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 	return 0;
 }
 
@@ -244,6 +242,9 @@ static int handle_iske(struct kvm_vcpu *vcpu)
 	int reg1, reg2;
 	int rc;
 
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
 	rc = try_handle_skey(vcpu);
 	if (rc)
 		return rc != -EAGAIN ? rc : 0;
@@ -273,6 +274,9 @@ static int handle_rrbe(struct kvm_vcpu *vcpu)
 	int reg1, reg2;
 	int rc;
 
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
 	rc = try_handle_skey(vcpu);
 	if (rc)
 		return rc != -EAGAIN ? rc : 0;
@@ -308,6 +312,9 @@ static int handle_sske(struct kvm_vcpu *vcpu)
 	int reg1, reg2;
 	int rc;
 
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
 	rc = try_handle_skey(vcpu);
 	if (rc)
 		return rc != -EAGAIN ? rc : 0;

From b1394e745b9453dcb5b0671c205b770e87dedb87 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= <rkrcmar@redhat.com>
Date: Thu, 30 Nov 2017 19:05:45 +0100
Subject: [PATCH 28/28] KVM: x86: fix APIC page invalidation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implementation of the unpinned APIC page didn't update the VMCS address
cache when invalidation was done through range mmu notifiers.
This became a problem when the page notifier was removed.

Re-introduce the arch-specific helper and call it from ...range_start.

Reported-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
Fixes: 38b9917350cb ("kvm: vmx: Implement set_apic_access_page_addr")
Fixes: 369ea8242c0f ("mm/rmap: update to new mmu_notifier semantic v2")
Cc: <stable@vger.kernel.org>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
Tested-by: Wanpeng Li <wanpeng.li@hotmail.com>
Tested-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  3 +++
 arch/x86/kvm/x86.c              | 14 ++++++++++++++
 virt/kvm/kvm_main.c             |  8 ++++++++
 3 files changed, 25 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 62527e053ee45..5167984313282 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1448,4 +1448,7 @@ static inline int kvm_cpu_get_apicid(int mps_cpu)
 #define put_smstate(type, buf, offset, val)                      \
 	*(type *)((buf) + (offset) - 0x7e00) = val
 
+void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+		unsigned long start, unsigned long end);
+
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6ca747abfa2f3..faf843c9b916e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6764,6 +6764,20 @@ static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
 	kvm_x86_ops->tlb_flush(vcpu);
 }
 
+void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+		unsigned long start, unsigned long end)
+{
+	unsigned long apic_address;
+
+	/*
+	 * The physical address of apic access page is stored in the VMCS.
+	 * Update it when it becomes invalid.
+	 */
+	apic_address = gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
+	if (start <= apic_address && apic_address < end)
+		kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
+}
+
 void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
 {
 	struct page *page = NULL;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index c422c10cd1dd1..210bf820385a7 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -135,6 +135,11 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm);
 static unsigned long long kvm_createvm_count;
 static unsigned long long kvm_active_vms;
 
+__weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+		unsigned long start, unsigned long end)
+{
+}
+
 bool kvm_is_reserved_pfn(kvm_pfn_t pfn)
 {
 	if (pfn_valid(pfn))
@@ -360,6 +365,9 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
 		kvm_flush_remote_tlbs(kvm);
 
 	spin_unlock(&kvm->mmu_lock);
+
+	kvm_arch_mmu_notifier_invalidate_range(kvm, start, end);
+
 	srcu_read_unlock(&kvm->srcu, idx);
 }