Skip to content

Commit

Permalink
Merge branch 'mm-delete-change-gpte' into HEAD
Browse files Browse the repository at this point in the history
The .change_pte() MMU notifier callback was intended as an optimization
and for this reason it was initially called without a surrounding
mmu_notifier_invalidate_range_{start,end}() pair.  It was only ever
implemented by KVM (which was also the original user of MMU notifiers)
and the rules on when to call set_pte_at_notify() rather than set_pte_at()
have always been pretty obscure.

It may seem a miracle that it has never caused any hard to trigger
bugs, but there's a good reason for that: KVM's implementation has
been nonfunctional for a good part of its existence.  Already in
2012, commit 6bdb913 ("mm: wrap calls to set_pte_at_notify with
invalidate_range_start and invalidate_range_end", 2012-10-09) changed the
.change_pte() callback to occur within an invalidate_range_start/end()
pair; and because KVM unmaps the sPTEs during .invalidate_range_start(),
.change_pte() has no hope of finding a sPTE to change.

Therefore, all the code for .change_pte() can be removed from both KVM
and mm/, and set_pte_at_notify() can be replaced with just set_pte_at().

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
  • Loading branch information
Paolo Bonzini committed Apr 12, 2024
2 parents 9bc60f7 + f784274 commit 531f520
Show file tree
Hide file tree
Showing 26 changed files with 16 additions and 419 deletions.
34 changes: 0 additions & 34 deletions arch/arm64/kvm/mmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -1768,40 +1768,6 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
return false;
}

bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
kvm_pfn_t pfn = pte_pfn(range->arg.pte);

if (!kvm->arch.mmu.pgt)
return false;

WARN_ON(range->end - range->start != 1);

/*
* If the page isn't tagged, defer to user_mem_abort() for sanitising
* the MTE tags. The S2 pte should have been unmapped by
* mmu_notifier_invalidate_range_end().
*/
if (kvm_has_mte(kvm) && !page_mte_tagged(pfn_to_page(pfn)))
return false;

/*
* We've moved a page around, probably through CoW, so let's treat
* it just like a translation fault and the map handler will clean
* the cache to the PoC.
*
* The MMU notifiers will have unmapped a huge PMD before calling
* ->change_pte() (which in turn calls kvm_set_spte_gfn()) and
* therefore we never need to clear out a huge PMD through this
* calling path and a memcache is not required.
*/
kvm_pgtable_stage2_map(kvm->arch.mmu.pgt, range->start << PAGE_SHIFT,
PAGE_SIZE, __pfn_to_phys(pfn),
KVM_PGTABLE_PROT_R, NULL, 0);

return false;
}

bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
u64 size = (range->end - range->start) << PAGE_SHIFT;
Expand Down
1 change: 0 additions & 1 deletion arch/loongarch/include/asm/kvm_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,6 @@ void kvm_flush_tlb_all(void);
void kvm_flush_tlb_gpa(struct kvm_vcpu *vcpu, unsigned long gpa);
int kvm_handle_mm_fault(struct kvm_vcpu *vcpu, unsigned long badv, bool write);

void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end, bool blockable);
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
Expand Down
32 changes: 0 additions & 32 deletions arch/loongarch/kvm/mmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -494,38 +494,6 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
range->end << PAGE_SHIFT, &ctx);
}

bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
unsigned long prot_bits;
kvm_pte_t *ptep;
kvm_pfn_t pfn = pte_pfn(range->arg.pte);
gpa_t gpa = range->start << PAGE_SHIFT;

ptep = kvm_populate_gpa(kvm, NULL, gpa, 0);
if (!ptep)
return false;

/* Replacing an absent or old page doesn't need flushes */
if (!kvm_pte_present(NULL, ptep) || !kvm_pte_young(*ptep)) {
kvm_set_pte(ptep, 0);
return false;
}

/* Fill new pte if write protected or page migrated */
prot_bits = _PAGE_PRESENT | __READABLE;
prot_bits |= _CACHE_MASK & pte_val(range->arg.pte);

/*
* Set _PAGE_WRITE or _PAGE_DIRTY iff old and new pte both support
* _PAGE_WRITE for map_page_fast if next page write fault
* _PAGE_DIRTY since gpa has already recorded as dirty page
*/
prot_bits |= __WRITEABLE & *ptep & pte_val(range->arg.pte);
kvm_set_pte(ptep, kvm_pfn_pte(pfn, __pgprot(prot_bits)));

return true;
}

bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
kvm_ptw_ctx ctx;
Expand Down
30 changes: 0 additions & 30 deletions arch/mips/kvm/mmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -444,36 +444,6 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
return true;
}

bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
gpa_t gpa = range->start << PAGE_SHIFT;
pte_t hva_pte = range->arg.pte;
pte_t *gpa_pte = kvm_mips_pte_for_gpa(kvm, NULL, gpa);
pte_t old_pte;

if (!gpa_pte)
return false;

/* Mapping may need adjusting depending on memslot flags */
old_pte = *gpa_pte;
if (range->slot->flags & KVM_MEM_LOG_DIRTY_PAGES && !pte_dirty(old_pte))
hva_pte = pte_mkclean(hva_pte);
else if (range->slot->flags & KVM_MEM_READONLY)
hva_pte = pte_wrprotect(hva_pte);

set_pte(gpa_pte, hva_pte);

/* Replacing an absent or old page doesn't need flushes */
if (!pte_present(old_pte) || !pte_young(old_pte))
return false;

/* Pages swapped, aged, moved, or cleaned require flushes */
return !pte_present(hva_pte) ||
!pte_young(hva_pte) ||
pte_pfn(old_pte) != pte_pfn(hva_pte) ||
(pte_dirty(old_pte) && !pte_dirty(hva_pte));
}

bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
return kvm_mips_mkold_gpa_pt(kvm, range->start, range->end);
Expand Down
1 change: 0 additions & 1 deletion arch/powerpc/include/asm/kvm_ppc.h
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,6 @@ struct kvmppc_ops {
bool (*unmap_gfn_range)(struct kvm *kvm, struct kvm_gfn_range *range);
bool (*age_gfn)(struct kvm *kvm, struct kvm_gfn_range *range);
bool (*test_age_gfn)(struct kvm *kvm, struct kvm_gfn_range *range);
bool (*set_spte_gfn)(struct kvm *kvm, struct kvm_gfn_range *range);
void (*free_memslot)(struct kvm_memory_slot *slot);
int (*init_vm)(struct kvm *kvm);
void (*destroy_vm)(struct kvm *kvm);
Expand Down
5 changes: 0 additions & 5 deletions arch/powerpc/kvm/book3s.c
Original file line number Diff line number Diff line change
Expand Up @@ -899,11 +899,6 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
return kvm->arch.kvm_ops->test_age_gfn(kvm, range);
}

bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
return kvm->arch.kvm_ops->set_spte_gfn(kvm, range);
}

int kvmppc_core_init_vm(struct kvm *kvm)
{

Expand Down
1 change: 0 additions & 1 deletion arch/powerpc/kvm/book3s.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ extern void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
extern bool kvm_unmap_gfn_range_hv(struct kvm *kvm, struct kvm_gfn_range *range);
extern bool kvm_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range);
extern bool kvm_test_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range);
extern bool kvm_set_spte_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range);

extern int kvmppc_mmu_init_pr(struct kvm_vcpu *vcpu);
extern void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu);
Expand Down
12 changes: 0 additions & 12 deletions arch/powerpc/kvm/book3s_64_mmu_hv.c
Original file line number Diff line number Diff line change
Expand Up @@ -1010,18 +1010,6 @@ bool kvm_test_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range)
return kvm_test_age_rmapp(kvm, range->slot, range->start);
}

bool kvm_set_spte_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range)
{
WARN_ON(range->start + 1 != range->end);

if (kvm_is_radix(kvm))
kvm_unmap_radix(kvm, range->slot, range->start);
else
kvm_unmap_rmapp(kvm, range->slot, range->start);

return false;
}

static int vcpus_running(struct kvm *kvm)
{
return atomic_read(&kvm->arch.vcpus_running) != 0;
Expand Down
1 change: 0 additions & 1 deletion arch/powerpc/kvm/book3s_hv.c
Original file line number Diff line number Diff line change
Expand Up @@ -6364,7 +6364,6 @@ static struct kvmppc_ops kvm_ops_hv = {
.unmap_gfn_range = kvm_unmap_gfn_range_hv,
.age_gfn = kvm_age_gfn_hv,
.test_age_gfn = kvm_test_age_gfn_hv,
.set_spte_gfn = kvm_set_spte_gfn_hv,
.free_memslot = kvmppc_core_free_memslot_hv,
.init_vm = kvmppc_core_init_vm_hv,
.destroy_vm = kvmppc_core_destroy_vm_hv,
Expand Down
7 changes: 0 additions & 7 deletions arch/powerpc/kvm/book3s_pr.c
Original file line number Diff line number Diff line change
Expand Up @@ -461,12 +461,6 @@ static bool kvm_test_age_gfn_pr(struct kvm *kvm, struct kvm_gfn_range *range)
return false;
}

static bool kvm_set_spte_gfn_pr(struct kvm *kvm, struct kvm_gfn_range *range)
{
/* The page will get remapped properly on its next fault */
return do_kvm_unmap_gfn(kvm, range);
}

/*****************************************/

static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr)
Expand Down Expand Up @@ -2071,7 +2065,6 @@ static struct kvmppc_ops kvm_ops_pr = {
.unmap_gfn_range = kvm_unmap_gfn_range_pr,
.age_gfn = kvm_age_gfn_pr,
.test_age_gfn = kvm_test_age_gfn_pr,
.set_spte_gfn = kvm_set_spte_gfn_pr,
.free_memslot = kvmppc_core_free_memslot_pr,
.init_vm = kvmppc_core_init_vm_pr,
.destroy_vm = kvmppc_core_destroy_vm_pr,
Expand Down
6 changes: 0 additions & 6 deletions arch/powerpc/kvm/e500_mmu_host.c
Original file line number Diff line number Diff line change
Expand Up @@ -747,12 +747,6 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
return false;
}

bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
/* The page will get remapped properly on its next fault */
return kvm_e500_mmu_unmap_gfn(kvm, range);
}

/*****************************************/

int e500_mmu_host_init(struct kvmppc_vcpu_e500 *vcpu_e500)
Expand Down
20 changes: 0 additions & 20 deletions arch/riscv/kvm/mmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -550,26 +550,6 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
return false;
}

bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
int ret;
kvm_pfn_t pfn = pte_pfn(range->arg.pte);

if (!kvm->arch.pgd)
return false;

WARN_ON(range->end - range->start != 1);

ret = gstage_map_page(kvm, NULL, range->start << PAGE_SHIFT,
__pfn_to_phys(pfn), PAGE_SIZE, true, true);
if (ret) {
kvm_debug("Failed to map G-stage page (error %d)\n", ret);
return true;
}

return false;
}

bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
pte_t *ptep;
Expand Down
67 changes: 7 additions & 60 deletions arch/x86/kvm/mmu/mmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -432,8 +432,8 @@ static u64 __update_clear_spte_slow(u64 *sptep, u64 spte)
* The idea using the light way get the spte on x86_32 guest is from
* gup_get_pte (mm/gup.c).
*
* An spte tlb flush may be pending, because kvm_set_pte_rmap
* coalesces them and we are running out of the MMU lock. Therefore
* An spte tlb flush may be pending, because they are coalesced and
* we are running out of the MMU lock. Therefore
* we need to protect against in-progress updates of the spte.
*
* Reading the spte while an update is in progress may get the old value
Expand Down Expand Up @@ -1448,49 +1448,11 @@ static bool __kvm_zap_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
}

static bool kvm_zap_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
struct kvm_memory_slot *slot, gfn_t gfn, int level,
pte_t unused)
struct kvm_memory_slot *slot, gfn_t gfn, int level)
{
return __kvm_zap_rmap(kvm, rmap_head, slot);
}

static bool kvm_set_pte_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
struct kvm_memory_slot *slot, gfn_t gfn, int level,
pte_t pte)
{
u64 *sptep;
struct rmap_iterator iter;
bool need_flush = false;
u64 new_spte;
kvm_pfn_t new_pfn;

WARN_ON_ONCE(pte_huge(pte));
new_pfn = pte_pfn(pte);

restart:
for_each_rmap_spte(rmap_head, &iter, sptep) {
need_flush = true;

if (pte_write(pte)) {
kvm_zap_one_rmap_spte(kvm, rmap_head, sptep);
goto restart;
} else {
new_spte = kvm_mmu_changed_pte_notifier_make_spte(
*sptep, new_pfn);

mmu_spte_clear_track_bits(kvm, sptep);
mmu_spte_set(sptep, new_spte);
}
}

if (need_flush && kvm_available_flush_remote_tlbs_range()) {
kvm_flush_remote_tlbs_gfn(kvm, gfn, level);
return false;
}

return need_flush;
}

struct slot_rmap_walk_iterator {
/* input fields. */
const struct kvm_memory_slot *slot;
Expand Down Expand Up @@ -1562,7 +1524,7 @@ static void slot_rmap_walk_next(struct slot_rmap_walk_iterator *iterator)

typedef bool (*rmap_handler_t)(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
struct kvm_memory_slot *slot, gfn_t gfn,
int level, pte_t pte);
int level);

static __always_inline bool kvm_handle_gfn_range(struct kvm *kvm,
struct kvm_gfn_range *range,
Expand All @@ -1574,7 +1536,7 @@ static __always_inline bool kvm_handle_gfn_range(struct kvm *kvm,
for_each_slot_rmap_range(range->slot, PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL,
range->start, range->end - 1, &iterator)
ret |= handler(kvm, iterator.rmap, range->slot, iterator.gfn,
iterator.level, range->arg.pte);
iterator.level);

return ret;
}
Expand All @@ -1596,22 +1558,8 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
return flush;
}

bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
bool flush = false;

if (kvm_memslots_have_rmaps(kvm))
flush = kvm_handle_gfn_range(kvm, range, kvm_set_pte_rmap);

if (tdp_mmu_enabled)
flush |= kvm_tdp_mmu_set_spte_gfn(kvm, range);

return flush;
}

static bool kvm_age_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
struct kvm_memory_slot *slot, gfn_t gfn, int level,
pte_t unused)
struct kvm_memory_slot *slot, gfn_t gfn, int level)
{
u64 *sptep;
struct rmap_iterator iter;
Expand All @@ -1624,8 +1572,7 @@ static bool kvm_age_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
}

static bool kvm_test_age_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
struct kvm_memory_slot *slot, gfn_t gfn,
int level, pte_t unused)
struct kvm_memory_slot *slot, gfn_t gfn, int level)
{
u64 *sptep;
struct rmap_iterator iter;
Expand Down
16 changes: 0 additions & 16 deletions arch/x86/kvm/mmu/spte.c
Original file line number Diff line number Diff line change
Expand Up @@ -322,22 +322,6 @@ u64 make_nonleaf_spte(u64 *child_pt, bool ad_disabled)
return spte;
}

u64 kvm_mmu_changed_pte_notifier_make_spte(u64 old_spte, kvm_pfn_t new_pfn)
{
u64 new_spte;

new_spte = old_spte & ~SPTE_BASE_ADDR_MASK;
new_spte |= (u64)new_pfn << PAGE_SHIFT;

new_spte &= ~PT_WRITABLE_MASK;
new_spte &= ~shadow_host_writable_mask;
new_spte &= ~shadow_mmu_writable_mask;

new_spte = mark_spte_for_access_track(new_spte);

return new_spte;
}

u64 mark_spte_for_access_track(u64 spte)
{
if (spte_ad_enabled(spte))
Expand Down
2 changes: 0 additions & 2 deletions arch/x86/kvm/mmu/spte.h
Original file line number Diff line number Diff line change
Expand Up @@ -496,8 +496,6 @@ static inline u64 restore_acc_track_spte(u64 spte)
return spte;
}

u64 kvm_mmu_changed_pte_notifier_make_spte(u64 old_spte, kvm_pfn_t new_pfn);

void __init kvm_mmu_spte_module_init(void);
void kvm_mmu_reset_all_pte_masks(void);

Expand Down
Loading

0 comments on commit 531f520

Please sign in to comment.