Skip to content

Commit

Permalink
KVM: MMU: Don't use RCU for lockless shadow walking
Browse files Browse the repository at this point in the history
Using RCU for lockless shadow walking can increase the amount of memory
in use by the system, since RCU grace periods are unpredictable.  We also
have an unconditional write to a shared variable (reader_counter), which
isn't good for scaling.

Replace that with a scheme similar to x86's get_user_pages_fast(): disable
interrupts during lockless shadow walk to force the freer
(kvm_mmu_commit_zap_page()) to wait for the TLB flush IPI to find the
processor with interrupts enabled.

We also add a new vcpu->mode, READING_SHADOW_PAGE_TABLES, to prevent
kvm_flush_remote_tlbs() from avoiding the IPI.

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
  • Loading branch information
Avi Kivity authored and Marcelo Tosatti committed May 16, 2012
1 parent b2da15a commit c142786
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 49 deletions.
4 changes: 0 additions & 4 deletions arch/x86/include/asm/kvm_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -240,8 +240,6 @@ struct kvm_mmu_page {
#endif

int write_flooding_count;

struct rcu_head rcu;
};

struct kvm_pio_request {
Expand Down Expand Up @@ -540,8 +538,6 @@ struct kvm_arch {
u64 hv_guest_os_id;
u64 hv_hypercall;

atomic_t reader_counter;

#ifdef CONFIG_KVM_MMU_AUDIT
int audit_point;
#endif
Expand Down
73 changes: 29 additions & 44 deletions arch/x86/kvm/mmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -551,19 +551,29 @@ static u64 mmu_spte_get_lockless(u64 *sptep)

static void walk_shadow_page_lockless_begin(struct kvm_vcpu *vcpu)
{
rcu_read_lock();
atomic_inc(&vcpu->kvm->arch.reader_counter);

/* Increase the counter before walking shadow page table */
smp_mb__after_atomic_inc();
/*
* Prevent page table teardown by making any free-er wait during
* kvm_flush_remote_tlbs() IPI to all active vcpus.
*/
local_irq_disable();
vcpu->mode = READING_SHADOW_PAGE_TABLES;
/*
* Make sure a following spte read is not reordered ahead of the write
* to vcpu->mode.
*/
smp_mb();
}

static void walk_shadow_page_lockless_end(struct kvm_vcpu *vcpu)
{
/* Decrease the counter after walking shadow page table finished */
smp_mb__before_atomic_dec();
atomic_dec(&vcpu->kvm->arch.reader_counter);
rcu_read_unlock();
/*
* Make sure the write to vcpu->mode is not reordered in front of
* reads to sptes. If it does, kvm_commit_zap_page() can see us
* OUTSIDE_GUEST_MODE and proceed to free the shadow page table.
*/
smp_mb();
vcpu->mode = OUTSIDE_GUEST_MODE;
local_irq_enable();
}

static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
Expand Down Expand Up @@ -1989,30 +1999,6 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
return ret;
}

static void kvm_mmu_isolate_pages(struct list_head *invalid_list)
{
struct kvm_mmu_page *sp;

list_for_each_entry(sp, invalid_list, link)
kvm_mmu_isolate_page(sp);
}

static void free_pages_rcu(struct rcu_head *head)
{
struct kvm_mmu_page *next, *sp;

sp = container_of(head, struct kvm_mmu_page, rcu);
while (sp) {
if (!list_empty(&sp->link))
next = list_first_entry(&sp->link,
struct kvm_mmu_page, link);
else
next = NULL;
kvm_mmu_free_page(sp);
sp = next;
}
}

static void kvm_mmu_commit_zap_page(struct kvm *kvm,
struct list_head *invalid_list)
{
Expand All @@ -2021,25 +2007,24 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
if (list_empty(invalid_list))
return;

kvm_flush_remote_tlbs(kvm);

if (atomic_read(&kvm->arch.reader_counter)) {
kvm_mmu_isolate_pages(invalid_list);
sp = list_first_entry(invalid_list, struct kvm_mmu_page, link);
list_del_init(invalid_list);
/*
* wmb: make sure everyone sees our modifications to the page tables
* rmb: make sure we see changes to vcpu->mode
*/
smp_mb();

trace_kvm_mmu_delay_free_pages(sp);
call_rcu(&sp->rcu, free_pages_rcu);
return;
}
/*
* Wait for all vcpus to exit guest mode and/or lockless shadow
* page table walks.
*/
kvm_flush_remote_tlbs(kvm);

do {
sp = list_first_entry(invalid_list, struct kvm_mmu_page, link);
WARN_ON(!sp->role.invalid || sp->root_count);
kvm_mmu_isolate_page(sp);
kvm_mmu_free_page(sp);
} while (!list_empty(invalid_list));

}

/*
Expand Down
3 changes: 2 additions & 1 deletion include/linux/kvm_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,8 @@ int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu);
enum {
OUTSIDE_GUEST_MODE,
IN_GUEST_MODE,
EXITING_GUEST_MODE
EXITING_GUEST_MODE,
READING_SHADOW_PAGE_TABLES,
};

/*
Expand Down

0 comments on commit c142786

Please sign in to comment.