Skip to content

Commit

Permalink
---
Browse files Browse the repository at this point in the history
yaml
---
r: 258103
b: refs/heads/master
c: c2a2ac2
h: refs/heads/master
i:
  258101: 5991f43
  258099: a8c589d
  258095: 0d83c34
v: v3
  • Loading branch information
Xiao Guangrong authored and Avi Kivity committed Jul 24, 2011
1 parent 4424404 commit 0804091
Show file tree
Hide file tree
Showing 3 changed files with 133 additions and 9 deletions.
2 changes: 1 addition & 1 deletion [refs]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
---
refs/heads/master: 603e0651cfc8562b103454d7ded71f3ad1eb3a37
refs/heads/master: c2a2ac2b563ccc3a69540965b5a994c19e3817d7
8 changes: 8 additions & 0 deletions trunk/arch/x86/include/asm/kvm_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,12 @@ struct kvm_mmu_page {
unsigned int unsync_children;
unsigned long parent_ptes; /* Reverse mapping for parent_pte */
DECLARE_BITMAP(unsync_child_bitmap, 512);

#ifdef CONFIG_X86_32
int clear_spte_count;
#endif

struct rcu_head rcu;
};

struct kvm_pv_mmu_op_buffer {
Expand Down Expand Up @@ -486,6 +492,8 @@ struct kvm_arch {
u64 hv_guest_os_id;
u64 hv_hypercall;

atomic_t reader_counter;

#ifdef CONFIG_KVM_MMU_AUDIT
int audit_point;
#endif
Expand Down
132 changes: 124 additions & 8 deletions trunk/arch/x86/kvm/mmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,12 @@ struct kvm_shadow_walk_iterator {
shadow_walk_okay(&(_walker)); \
shadow_walk_next(&(_walker)))

#define for_each_shadow_entry_lockless(_vcpu, _addr, _walker, spte) \
for (shadow_walk_init(&(_walker), _vcpu, _addr); \
shadow_walk_okay(&(_walker)) && \
({ spte = mmu_spte_get_lockless(_walker.sptep); 1; }); \
__shadow_walk_next(&(_walker), spte))

static struct kmem_cache *pte_list_desc_cache;
static struct kmem_cache *mmu_page_header_cache;
static struct percpu_counter kvm_total_used_mmu_pages;
Expand Down Expand Up @@ -274,6 +280,11 @@ static u64 __update_clear_spte_slow(u64 *sptep, u64 spte)
{
return xchg(sptep, spte);
}

static u64 __get_spte_lockless(u64 *sptep)
{
return ACCESS_ONCE(*sptep);
}
#else
union split_spte {
struct {
Expand All @@ -283,6 +294,18 @@ union split_spte {
u64 spte;
};

static void count_spte_clear(u64 *sptep, u64 spte)
{
struct kvm_mmu_page *sp = page_header(__pa(sptep));

if (is_shadow_present_pte(spte))
return;

/* Ensure the spte is completely set before we increase the count */
smp_wmb();
sp->clear_spte_count++;
}

static void __set_spte(u64 *sptep, u64 spte)
{
union split_spte *ssptep, sspte;
Expand Down Expand Up @@ -318,6 +341,7 @@ static void __update_clear_spte_fast(u64 *sptep, u64 spte)
smp_wmb();

ssptep->spte_high = sspte.spte_high;
count_spte_clear(sptep, spte);
}

static u64 __update_clear_spte_slow(u64 *sptep, u64 spte)
Expand All @@ -330,9 +354,40 @@ static u64 __update_clear_spte_slow(u64 *sptep, u64 spte)
/* xchg acts as a barrier before the setting of the high bits */
orig.spte_low = xchg(&ssptep->spte_low, sspte.spte_low);
orig.spte_high = ssptep->spte_high = sspte.spte_high;
count_spte_clear(sptep, spte);

return orig.spte;
}

/*
* The idea using the light way get the spte on x86_32 guest is from
* gup_get_pte(arch/x86/mm/gup.c).
* The difference is we can not catch the spte tlb flush if we leave
* guest mode, so we emulate it by increase clear_spte_count when spte
* is cleared.
*/
static u64 __get_spte_lockless(u64 *sptep)
{
struct kvm_mmu_page *sp = page_header(__pa(sptep));
union split_spte spte, *orig = (union split_spte *)sptep;
int count;

retry:
count = sp->clear_spte_count;
smp_rmb();

spte.spte_low = orig->spte_low;
smp_rmb();

spte.spte_high = orig->spte_high;
smp_rmb();

if (unlikely(spte.spte_low != orig->spte_low ||
count != sp->clear_spte_count))
goto retry;

return spte.spte;
}
#endif

static bool spte_has_volatile_bits(u64 spte)
Expand Down Expand Up @@ -435,6 +490,28 @@ static void mmu_spte_clear_no_track(u64 *sptep)
__update_clear_spte_fast(sptep, 0ull);
}

static u64 mmu_spte_get_lockless(u64 *sptep)
{
return __get_spte_lockless(sptep);
}

static void walk_shadow_page_lockless_begin(struct kvm_vcpu *vcpu)
{
rcu_read_lock();
atomic_inc(&vcpu->kvm->arch.reader_counter);

/* Increase the counter before walking shadow page table */
smp_mb__after_atomic_inc();
}

static void walk_shadow_page_lockless_end(struct kvm_vcpu *vcpu)
{
/* Decrease the counter after walking shadow page table finished */
smp_mb__before_atomic_dec();
atomic_dec(&vcpu->kvm->arch.reader_counter);
rcu_read_unlock();
}

static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
struct kmem_cache *base_cache, int min)
{
Expand Down Expand Up @@ -1597,17 +1674,23 @@ static bool shadow_walk_okay(struct kvm_shadow_walk_iterator *iterator)
return true;
}

static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator)
static void __shadow_walk_next(struct kvm_shadow_walk_iterator *iterator,
u64 spte)
{
if (is_last_spte(*iterator->sptep, iterator->level)) {
if (is_last_spte(spte, iterator->level)) {
iterator->level = 0;
return;
}

iterator->shadow_addr = *iterator->sptep & PT64_BASE_ADDR_MASK;
iterator->shadow_addr = spte & PT64_BASE_ADDR_MASK;
--iterator->level;
}

static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator)
{
return __shadow_walk_next(iterator, *iterator->sptep);
}

static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp)
{
u64 spte;
Expand Down Expand Up @@ -1754,6 +1837,30 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
return ret;
}

static void kvm_mmu_isolate_pages(struct list_head *invalid_list)
{
struct kvm_mmu_page *sp;

list_for_each_entry(sp, invalid_list, link)
kvm_mmu_isolate_page(sp);
}

static void free_pages_rcu(struct rcu_head *head)
{
struct kvm_mmu_page *next, *sp;

sp = container_of(head, struct kvm_mmu_page, rcu);
while (sp) {
if (!list_empty(&sp->link))
next = list_first_entry(&sp->link,
struct kvm_mmu_page, link);
else
next = NULL;
kvm_mmu_free_page(sp);
sp = next;
}
}

static void kvm_mmu_commit_zap_page(struct kvm *kvm,
struct list_head *invalid_list)
{
Expand All @@ -1764,6 +1871,14 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,

kvm_flush_remote_tlbs(kvm);

if (atomic_read(&kvm->arch.reader_counter)) {
kvm_mmu_isolate_pages(invalid_list);
sp = list_first_entry(invalid_list, struct kvm_mmu_page, link);
list_del_init(invalid_list);
call_rcu(&sp->rcu, free_pages_rcu);
return;
}

do {
sp = list_first_entry(invalid_list, struct kvm_mmu_page, link);
WARN_ON(!sp->role.invalid || sp->root_count);
Expand Down Expand Up @@ -3784,16 +3899,17 @@ int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4])
{
struct kvm_shadow_walk_iterator iterator;
u64 spte;
int nr_sptes = 0;

spin_lock(&vcpu->kvm->mmu_lock);
for_each_shadow_entry(vcpu, addr, iterator) {
sptes[iterator.level-1] = *iterator.sptep;
walk_shadow_page_lockless_begin(vcpu);
for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) {
sptes[iterator.level-1] = spte;
nr_sptes++;
if (!is_shadow_present_pte(*iterator.sptep))
if (!is_shadow_present_pte(spte))
break;
}
spin_unlock(&vcpu->kvm->mmu_lock);
walk_shadow_page_lockless_end(vcpu);

return nr_sptes;
}
Expand Down

0 comments on commit 0804091

Please sign in to comment.