From 30b072ce0356e8b141f4ca6da7220486fa3641d9 Mon Sep 17 00:00:00 2001
From: Alexis Dambricourt <alexis.dambricourt@gmail.com>
Date: Mon, 4 Jul 2016 21:05:15 +0200
Subject: [PATCH 01/11] KVM: MTRR: fix kvm_mtrr_check_gfn_range_consistency
 page fault

The following #PF may occurs:
[ 1403.317041] BUG: unable to handle kernel paging request at 0000000200000068
[ 1403.317045] IP: [<ffffffffc04c20b0>] __mtrr_lookup_var_next+0x10/0xa0 [kvm]

[ 1403.317123] Call Trace:
[ 1403.317134]  [<ffffffffc04c2a65>] ? kvm_mtrr_check_gfn_range_consistency+0xc5/0x120 [kvm]
[ 1403.317143]  [<ffffffffc04ac11f>] ? tdp_page_fault+0x9f/0x2c0 [kvm]
[ 1403.317152]  [<ffffffffc0498128>] ? kvm_set_msr_common+0x858/0xc00 [kvm]
[ 1403.317161]  [<ffffffffc04b8883>] ? x86_emulate_insn+0x273/0xd30 [kvm]
[ 1403.317171]  [<ffffffffc04c04e4>] ? kvm_cpuid+0x34/0x190 [kvm]
[ 1403.317180]  [<ffffffffc04a5bb9>] ? kvm_mmu_page_fault+0x59/0xe0 [kvm]
[ 1403.317183]  [<ffffffffc0d729e1>] ? vmx_handle_exit+0x1d1/0x14a0 [kvm_intel]
[ 1403.317185]  [<ffffffffc0d75f3f>] ? atomic_switch_perf_msrs+0x6f/0xa0 [kvm_intel]
[ 1403.317187]  [<ffffffffc0d7621d>] ? vmx_vcpu_run+0x2ad/0x420 [kvm_intel]
[ 1403.317196]  [<ffffffffc04a0962>] ? kvm_arch_vcpu_ioctl_run+0x622/0x1550 [kvm]
[ 1403.317204]  [<ffffffffc049abb9>] ? kvm_arch_vcpu_load+0x59/0x210 [kvm]
[ 1403.317206]  [<ffffffff81036245>] ? __kernel_fpu_end+0x35/0x100
[ 1403.317213]  [<ffffffffc0487eb6>] ? kvm_vcpu_ioctl+0x316/0x5d0 [kvm]
[ 1403.317215]  [<ffffffff81088225>] ? do_sigtimedwait+0xd5/0x220
[ 1403.317217]  [<ffffffff811f84dd>] ? do_vfs_ioctl+0x9d/0x5c0
[ 1403.317224]  [<ffffffffc04928ae>] ? kvm_on_user_return+0x3e/0x70 [kvm]
[ 1403.317225]  [<ffffffff811f8a74>] ? SyS_ioctl+0x74/0x80
[ 1403.317227]  [<ffffffff815bf0b6>] ? entry_SYSCALL_64_fastpath+0x1e/0xa8
[ 1403.317242] RIP  [<ffffffffc04c20b0>] __mtrr_lookup_var_next+0x10/0xa0 [kvm]

At mtrr_lookup_fixed_next(), when the condition
'if (iter->index >= ARRAY_SIZE(iter->mtrr_state->fixed_ranges))' becomes true,
mtrr_lookup_var_start() is called with iter->range with gargabe values from the
fixed MTRR union field. Then, list_prepare_entry() do not call list_entry()
initialization, keeping a garbage pointer in iter->range which is accessed in
the following __mtrr_lookup_var_next() call.

Fixes: f571c0973e4b8c888e049b6842e4b4f93b5c609c
Signed-off-by: Alexis Dambricourt <alexis@blade-group.com>
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mtrr.c | 1 +
 1 file changed, 1 insertion(+)
diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
index c146f3c262c3b..0149ac59c273c 100644
--- a/arch/x86/kvm/mtrr.c
+++ b/arch/x86/kvm/mtrr.c
@@ -539,6 +539,7 @@ static void mtrr_lookup_var_start(struct mtrr_iter *iter)
 
 	iter->fixed = false;
 	iter->start_max = iter->start;
+	iter->range = NULL;
 	iter->range = list_prepare_entry(iter->range, &mtrr_state->head, node);
 
 	__mtrr_lookup_var_next(iter);

From 9770404a0061ec46dec6e15c4b07731ce2e2d7bb Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 6 Jul 2016 14:57:24 +0200
Subject: [PATCH 02/11] KVM: SVM: fix trashing of MSR_TSC_AUX

I don't know what I was thinking when I wrote commit 46896c73c1a4 ("KVM:
svm: add support for RDTSCP", 2015-11-12); I missed write_rdtscp_aux which
obviously uses MSR_TSC_AUX.

Therefore we do need to save/restore MSR_TSC_AUX in svm_vcpu_run.

Cc: stable@vger.kernel.org
Cc: Borislav Petkov <bp@alien8.de>
Fixes: 46896c73c1a4 ("KVM: svm: add support for RDTSCP")
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 16ef31b874522..f931d3e33c7f1 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -41,6 +41,7 @@
 #include <asm/desc.h>
 #include <asm/debugreg.h>
 #include <asm/kvm_para.h>
+#include <asm/vgtod.h>
 
 #include <asm/virtext.h>
 #include "trace.h"
@@ -1530,9 +1531,6 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 			wrmsrl(MSR_AMD64_TSC_RATIO, tsc_ratio);
 		}
 	}
-	/* This assumes that the kernel never uses MSR_TSC_AUX */
-	if (static_cpu_has(X86_FEATURE_RDTSCP))
-		wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
 
 	avic_vcpu_load(vcpu, cpu);
 }
@@ -4474,6 +4472,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 	svm->vmcb->save.cr2 = vcpu->arch.cr2;
 
 	clgi();
+	if (static_cpu_has(X86_FEATURE_RDTSCP))
+		wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
 
 	local_irq_enable();
 
@@ -4550,6 +4550,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 #endif
 		);
 
+	if (static_cpu_has(X86_FEATURE_RDTSCP))
+		wrmsrl(MSR_TSC_AUX, __getcpu());
 #ifdef CONFIG_X86_64
 	wrmsrl(MSR_GS_BASE, svm->host.gs_base);
 #else

From 4e59516a12a6ef6dcb660cb3a3f70c64bd60cfec Mon Sep 17 00:00:00 2001
From: Peter Feiner <pfeiner@google.com>
Date: Thu, 7 Jul 2016 14:49:58 -0700
Subject: [PATCH 03/11] kvm: vmx: ensure VMCS is current while enabling PML

Between loading the new VMCS and enabling PML, the CPU was unpinned.
If the vCPU thread were migrated to another CPU in the interim (e.g.,
due to preemption or sleeping alloc_page), then the VMWRITEs to enable
PML would target the wrong VMCS -- or no VMCS at all:

  [ 2087.266950] vmwrite error: reg 200e value 3fe1d52000 (err -506126336)
  [ 2087.267062] vmwrite error: reg 812 value 1ff (err 511)
  [ 2087.267125] vmwrite error: reg 401e value 12229c00 (err 304258048)

This patch ensures that the VMCS remains current while enabling PML by
doing the VMWRITEs while the CPU is pinned. Allocation of the PML buffer
is hoisted out of the critical section.

Signed-off-by: Peter Feiner <pfeiner@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 56 ++++++++++++++++++++--------------------------
 1 file changed, 24 insertions(+), 32 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 64a79f271276a..e34965b37a883 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4979,6 +4979,12 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 	if (vmx_xsaves_supported())
 		vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP);
 
+	if (enable_pml) {
+		ASSERT(vmx->pml_pg);
+		vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
+		vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
+	}
+
 	return 0;
 }
 
@@ -7937,22 +7943,6 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
 	*info2 = vmcs_read32(VM_EXIT_INTR_INFO);
 }
 
-static int vmx_create_pml_buffer(struct vcpu_vmx *vmx)
-{
-	struct page *pml_pg;
-
-	pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO);
-	if (!pml_pg)
-		return -ENOMEM;
-
-	vmx->pml_pg = pml_pg;
-
-	vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
-	vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
-
-	return 0;
-}
-
 static void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx)
 {
 	if (vmx->pml_pg) {
@@ -8885,14 +8875,26 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 	if (err)
 		goto free_vcpu;
 
+	err = -ENOMEM;
+
+	/*
+	 * If PML is turned on, failure on enabling PML just results in failure
+	 * of creating the vcpu, therefore we can simplify PML logic (by
+	 * avoiding dealing with cases, such as enabling PML partially on vcpus
+	 * for the guest, etc.
+	 */
+	if (enable_pml) {
+		vmx->pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO);
+		if (!vmx->pml_pg)
+			goto uninit_vcpu;
+	}
+
 	vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
 	BUILD_BUG_ON(ARRAY_SIZE(vmx_msr_index) * sizeof(vmx->guest_msrs[0])
 		     > PAGE_SIZE);
 
-	err = -ENOMEM;
-	if (!vmx->guest_msrs) {
-		goto uninit_vcpu;
-	}
+	if (!vmx->guest_msrs)
+		goto free_pml;
 
 	vmx->loaded_vmcs = &vmx->vmcs01;
 	vmx->loaded_vmcs->vmcs = alloc_vmcs();
@@ -8936,18 +8938,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 	vmx->nested.current_vmptr = -1ull;
 	vmx->nested.current_vmcs12 = NULL;
 
-	/*
-	 * If PML is turned on, failure on enabling PML just results in failure
-	 * of creating the vcpu, therefore we can simplify PML logic (by
-	 * avoiding dealing with cases, such as enabling PML partially on vcpus
-	 * for the guest, etc.
-	 */
-	if (enable_pml) {
-		err = vmx_create_pml_buffer(vmx);
-		if (err)
-			goto free_vmcs;
-	}
-
 	return &vmx->vcpu;
 
 free_vmcs:
@@ -8955,6 +8945,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 	free_loaded_vmcs(vmx->loaded_vmcs);
 free_msrs:
 	kfree(vmx->guest_msrs);
+free_pml:
+	vmx_destroy_pml_buffer(vmx);
 uninit_vcpu:
 	kvm_vcpu_uninit(&vmx->vcpu);
 free_vcpu:

From 2f1fe81123f59271bddda673b60116bde9660385 Mon Sep 17 00:00:00 2001
From: Jim Mattson <jmattson@google.com>
Date: Fri, 8 Jul 2016 15:36:06 -0700
Subject: [PATCH 04/11] KVM: nVMX: Fix memory corruption when using VMCS
 shadowing

When freeing the nested resources of a vcpu, there is an assumption that
the vcpu's vmcs01 is the current VMCS on the CPU that executes
nested_release_vmcs12(). If this assumption is violated, the vcpu's
vmcs01 may be made active on multiple CPUs at the same time, in
violation of Intel's specification. Moreover, since the vcpu's vmcs01 is
not VMCLEARed on every CPU on which it is active, it can linger in a
CPU's VMCS cache after it has been freed and potentially
repurposed. Subsequent eviction from the CPU's VMCS cache on a capacity
miss can result in memory corruption.

It is not sufficient for vmx_free_vcpu() to call vmx_load_vmcs01(). If
the vcpu in question was last loaded on a different CPU, it must be
migrated to the current CPU before calling vmx_load_vmcs01().

Signed-off-by: Jim Mattson <jmattson@google.com>
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c  | 19 +++++++++++++++++--
 virt/kvm/kvm_main.c |  2 ++
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e34965b37a883..5bf203b58f587 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -8844,6 +8844,22 @@ static void vmx_load_vmcs01(struct kvm_vcpu *vcpu)
 	put_cpu();
 }
 
+/*
+ * Ensure that the current vmcs of the logical processor is the
+ * vmcs01 of the vcpu before calling free_nested().
+ */
+static void vmx_free_vcpu_nested(struct kvm_vcpu *vcpu)
+{
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
+       int r;
+
+       r = vcpu_load(vcpu);
+       BUG_ON(r);
+       vmx_load_vmcs01(vcpu);
+       free_nested(vmx);
+       vcpu_put(vcpu);
+}
+
 static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -8852,8 +8868,7 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
 		vmx_destroy_pml_buffer(vmx);
 	free_vpid(vmx->vpid);
 	leave_guest_mode(vcpu);
-	vmx_load_vmcs01(vcpu);
-	free_nested(vmx);
+	vmx_free_vcpu_nested(vcpu);
 	free_loaded_vmcs(vmx->loaded_vmcs);
 	kfree(vmx->guest_msrs);
 	kvm_vcpu_uninit(vcpu);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 48bd520fc702c..dd25346ec356b 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -148,6 +148,7 @@ int vcpu_load(struct kvm_vcpu *vcpu)
 	put_cpu();
 	return 0;
 }
+EXPORT_SYMBOL_GPL(vcpu_load);
 
 void vcpu_put(struct kvm_vcpu *vcpu)
 {
@@ -157,6 +158,7 @@ void vcpu_put(struct kvm_vcpu *vcpu)
 	preempt_enable();
 	mutex_unlock(&vcpu->mutex);
 }
+EXPORT_SYMBOL_GPL(vcpu_put);
 
 static void ack_flush(void *_completed)
 {

From 2be5b3f6dc84d30810cc0a94fa66205c27cf5f42 Mon Sep 17 00:00:00 2001
From: Liu Shuo <shuo.a.liu@intel.com>
Date: Tue, 12 Jul 2016 17:38:42 +0800
Subject: [PATCH 05/11] KVM: release anon file in failure path of vm creation

The failure of create debugfs of VM will return directly without release
the anon file. It will leak memory and file descriptors, even through
be not serious.

Signed-off-by: Liu Shuo <shuo.a.liu@intel.com>
Fixes: 536a6f88c49dd739961ffd53774775afed852c83
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 virt/kvm/kvm_main.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index dd25346ec356b..cc182d9760edc 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -49,6 +49,7 @@
 #include <linux/slab.h>
 #include <linux/sort.h>
 #include <linux/bsearch.h>
+#include <linux/syscalls.h>
 
 #include <asm/processor.h>
 #include <asm/io.h>
@@ -3069,6 +3070,7 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)
 
 	if (kvm_create_vm_debugfs(kvm, r) < 0) {
 		kvm_put_kvm(kvm);
+		sys_close(r);
 		return -ENOMEM;
 	}
 

From 7964218c7d8fe36a79a5e02312b1f3a16a17ee68 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 14 Jul 2016 18:49:52 +0200
Subject: [PATCH 06/11] Revert "KVM: release anon file in failure path of vm
 creation"

This reverts commit 77ecc085fed1af1000ca719522977b960aa6da52.

Al Viro colorfully says: "You should *NEVER* use sys_close() on failure
exit paths like that.  Moreover, this kvm_put_kvm() becomes a double-put,
since closing the damn file will drop that reference to kvm.  Please,
revert.  anon_inode_getfd() should be used only when there's no possible
failures past its call".

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 virt/kvm/kvm_main.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index cc182d9760edc..dd25346ec356b 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -49,7 +49,6 @@
 #include <linux/slab.h>
 #include <linux/sort.h>
 #include <linux/bsearch.h>
-#include <linux/syscalls.h>
 
 #include <asm/processor.h>
 #include <asm/io.h>
@@ -3070,7 +3069,6 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)
 
 	if (kvm_create_vm_debugfs(kvm, r) < 0) {
 		kvm_put_kvm(kvm);
-		sys_close(r);
 		return -ENOMEM;
 	}
 

From 506cfba9e726ab6842883fdd567a9a7c467e7492 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Thu, 14 Jul 2016 18:54:17 +0200
Subject: [PATCH 07/11] KVM: don't use anon_inode_getfd() before possible
 failures

Once anon_inode_getfd() has succeeded, it's impossible to undo
in a clean way and no, sys_close() is not usable in such cases.
Use anon_inode_getfile() and get_unused_fd_flags() to get struct file
and descriptor and do *not* install the file into the descriptor table
until after the last possible failure exit.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 virt/kvm/kvm_main.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index dd25346ec356b..ce3d8e5be73e3 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -3050,6 +3050,7 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)
 {
 	int r;
 	struct kvm *kvm;
+	struct file *file;
 
 	kvm = kvm_create_vm(type);
 	if (IS_ERR(kvm))
@@ -3061,17 +3062,25 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)
 		return r;
 	}
 #endif
-	r = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR | O_CLOEXEC);
+	r = get_unused_fd_flags(O_CLOEXEC);
 	if (r < 0) {
 		kvm_put_kvm(kvm);
 		return r;
 	}
+	file = anon_inode_getfile("kvm-vm", &kvm_vm_fops, kvm, O_RDWR);
+	if (IS_ERR(file)) {
+		put_unused_fd(r);
+		kvm_put_kvm(kvm);
+		return PTR_ERR(file);
+	}
 
 	if (kvm_create_vm_debugfs(kvm, r) < 0) {
-		kvm_put_kvm(kvm);
+		put_unused_fd(r);
+		fput(file);
 		return -ENOMEM;
 	}
 
+	fd_install(r, file);
 	return r;
 }
 

From 2b23c3a6e3eb2fba77eb35fdfa3d71a9aa3f33b7 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 14 Jul 2016 19:08:18 +0200
Subject: [PATCH 08/11] KVM: SVM: do not set MSR_TSC_AUX on 32-bit builds

This is unnecessary---and besides, __getcpu() is not even
available on 32-bit builds.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index f931d3e33c7f1..05c08b69cee77 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -4550,9 +4550,9 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 #endif
 		);
 
+#ifdef CONFIG_X86_64
 	if (static_cpu_has(X86_FEATURE_RDTSCP))
 		wrmsrl(MSR_TSC_AUX, __getcpu());
-#ifdef CONFIG_X86_64
 	wrmsrl(MSR_GS_BASE, svm->host.gs_base);
 #else
 	loadsegment(fs, svm->host.fs);

From 6a907cd0ad31fce30b7a7416576410a7de7c8933 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= <rkrcmar@redhat.com>
Date: Fri, 15 Jul 2016 14:38:36 +0200
Subject: [PATCH 09/11] Revert "KVM: SVM: fix trashing of MSR_TSC_AUX"

This reverts commit 9770404a0061ec46dec6e15c4b07731ce2e2d7bb.

The reverted patch is not needed as only userspace uses RDTSCP and
MSR_TSC_AUX is in host_save_user_msrs[] and therefore properly saved in
svm_vcpu_load() and restored in svm_vcpu_put() before every switch to
userspace.

The reverted patch did not allow the kernel to use RDTSCP in the future,
because of missed trashing in svm_set_msr() and 64-bit ifdef.

This reverts commit 2b23c3a6e3eb2fba77eb35fdfa3d71a9aa3f33b7.

2b23c3a6e3eb ("KVM: SVM: do not set MSR_TSC_AUX on 32-bit builds") is a
build fix for 9770404a0061 and reverting them separately would only
break more bisections.

Cc: stable@vger.kernel.org
---
 arch/x86/kvm/svm.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 05c08b69cee77..16ef31b874522 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -41,7 +41,6 @@
 #include <asm/desc.h>
 #include <asm/debugreg.h>
 #include <asm/kvm_para.h>
-#include <asm/vgtod.h>
 
 #include <asm/virtext.h>
 #include "trace.h"
@@ -1531,6 +1530,9 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 			wrmsrl(MSR_AMD64_TSC_RATIO, tsc_ratio);
 		}
 	}
+	/* This assumes that the kernel never uses MSR_TSC_AUX */
+	if (static_cpu_has(X86_FEATURE_RDTSCP))
+		wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
 
 	avic_vcpu_load(vcpu, cpu);
 }
@@ -4472,8 +4474,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 	svm->vmcb->save.cr2 = vcpu->arch.cr2;
 
 	clgi();
-	if (static_cpu_has(X86_FEATURE_RDTSCP))
-		wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
 
 	local_irq_enable();
 
@@ -4551,8 +4551,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 		);
 
 #ifdef CONFIG_X86_64
-	if (static_cpu_has(X86_FEATURE_RDTSCP))
-		wrmsrl(MSR_TSC_AUX, __getcpu());
 	wrmsrl(MSR_GS_BASE, svm->host.gs_base);
 #else
 	loadsegment(fs, svm->host.fs);

From b244c9fc251e14a083a1cbf04bef10bd99303a76 Mon Sep 17 00:00:00 2001
From: "Cao, Lei" <Lei.Cao@stratus.com>
Date: Fri, 15 Jul 2016 13:54:04 +0000
Subject: [PATCH 10/11] KVM: VMX: handle PML full VMEXIT that occurs during
 event delivery
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With PML enabled, guest will shut down if a PML full VMEXIT occurs during
event delivery. According to Intel SDM 27.2.3, PML full VMEXIT can occur when
event is being delivered through IDT, so KVM should not exit to user space
with error. Instead, it should let EXIT_REASON_PML_FULL go through and the
event will be re-injected on the next VMENTRY.

Signed-off-by: Lei Cao <lei.cao@stratus.com>
Cc: stable@vger.kernel.org
Fixes: 843e4330573c ("KVM: VMX: Add PML support in VMX")
[Shortened the summary and Cc'd stable.]
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/vmx.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 5bf203b58f587..7758680db20b7 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -8214,6 +8214,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
 	if ((vectoring_info & VECTORING_INFO_VALID_MASK) &&
 			(exit_reason != EXIT_REASON_EXCEPTION_NMI &&
 			exit_reason != EXIT_REASON_EPT_VIOLATION &&
+			exit_reason != EXIT_REASON_PML_FULL &&
 			exit_reason != EXIT_REASON_TASK_SWITCH)) {
 		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
 		vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV;

From 4c47eb1c18c38b755eb4894a6ca38f834de3ec23 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 19 Jul 2016 13:56:54 +0100
Subject: [PATCH 11/11] arm64: KVM: VHE: Context switch MDSCR_EL1
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The kprobe enablement work has uncovered that changes made by
a guest to MDSCR_EL1 were propagated to the host when VHE was
enabled, leading to unexpected exception being delivered.

Moving this register to the list of registers that are always
context-switched fixes the issue.

Fixes: 9c6c35683286 ("arm64: KVM: VHE: Split save/restore of registers shared between guest and host")
Cc: stable@vger.kernel.org #4.6
Reported-by: Tirumalesh Chalamarla <Tirumalesh.Chalamarla@cavium.com>
Tested-by: Tirumalesh Chalamarla <Tirumalesh.Chalamarla@cavium.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/arm64/kvm/hyp/sysreg-sr.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
index 0f7c40eb3f53f..9341376478370 100644
--- a/arch/arm64/kvm/hyp/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/sysreg-sr.c
@@ -27,8 +27,8 @@ static void __hyp_text __sysreg_do_nothing(struct kvm_cpu_context *ctxt) { }
 /*
  * Non-VHE: Both host and guest must save everything.
  *
- * VHE: Host must save tpidr*_el[01], actlr_el1, sp0, pc, pstate, and
- * guest must save everything.
+ * VHE: Host must save tpidr*_el[01], actlr_el1, mdscr_el1, sp0, pc,
+ * pstate, and guest must save everything.
  */
 
 static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
@@ -37,6 +37,7 @@ static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
 	ctxt->sys_regs[TPIDR_EL0]	= read_sysreg(tpidr_el0);
 	ctxt->sys_regs[TPIDRRO_EL0]	= read_sysreg(tpidrro_el0);
 	ctxt->sys_regs[TPIDR_EL1]	= read_sysreg(tpidr_el1);
+	ctxt->sys_regs[MDSCR_EL1]	= read_sysreg(mdscr_el1);
 	ctxt->gp_regs.regs.sp		= read_sysreg(sp_el0);
 	ctxt->gp_regs.regs.pc		= read_sysreg_el2(elr);
 	ctxt->gp_regs.regs.pstate	= read_sysreg_el2(spsr);
@@ -61,7 +62,6 @@ static void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt)
 	ctxt->sys_regs[AMAIR_EL1]	= read_sysreg_el1(amair);
 	ctxt->sys_regs[CNTKCTL_EL1]	= read_sysreg_el1(cntkctl);
 	ctxt->sys_regs[PAR_EL1]		= read_sysreg(par_el1);
-	ctxt->sys_regs[MDSCR_EL1]	= read_sysreg(mdscr_el1);
 
 	ctxt->gp_regs.sp_el1		= read_sysreg(sp_el1);
 	ctxt->gp_regs.elr_el1		= read_sysreg_el1(elr);
@@ -90,6 +90,7 @@ static void __hyp_text __sysreg_restore_common_state(struct kvm_cpu_context *ctx
 	write_sysreg(ctxt->sys_regs[TPIDR_EL0],	  tpidr_el0);
 	write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0);
 	write_sysreg(ctxt->sys_regs[TPIDR_EL1],	  tpidr_el1);
+	write_sysreg(ctxt->sys_regs[MDSCR_EL1],	  mdscr_el1);
 	write_sysreg(ctxt->gp_regs.regs.sp,	  sp_el0);
 	write_sysreg_el2(ctxt->gp_regs.regs.pc,	  elr);
 	write_sysreg_el2(ctxt->gp_regs.regs.pstate, spsr);
@@ -114,7 +115,6 @@ static void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt)
 	write_sysreg_el1(ctxt->sys_regs[AMAIR_EL1],	amair);
 	write_sysreg_el1(ctxt->sys_regs[CNTKCTL_EL1], 	cntkctl);
 	write_sysreg(ctxt->sys_regs[PAR_EL1],		par_el1);
-	write_sysreg(ctxt->sys_regs[MDSCR_EL1],		mdscr_el1);
 
 	write_sysreg(ctxt->gp_regs.sp_el1,		sp_el1);
 	write_sysreg_el1(ctxt->gp_regs.elr_el1,		elr);