Skip to content

Commit

Permalink
[SPARC64]: Fix and re-enable dynamic TSB sizing.
Browse files Browse the repository at this point in the history
This is good for up to %50 performance improvement of some test cases.
The problem has been the race conditions, and hopefully I've plugged
them all up here.

1) There was a serious race in switch_mm() wrt. lazy TLB
   switching to and from kernel threads.

   We could erroneously skip a tsb_context_switch() and thus
   use a stale TSB across a TSB grow event.

   There is a big comment now in that function describing
   exactly how it can happen.

2) All code paths that do something with the TSB need to be
   guarded with the mm->context.lock spinlock.  This makes
   page table flushing paths properly synchronize with both
   TSB growing and TLB context changes.

3) TSB growing events are moved to the end of successful fault
   processing.  Previously it was in update_mmu_cache() but
   that is deadlock prone.  At the end of do_sparc64_fault()
   we hold no spinlocks that could deadlock the TSB grow
   sequence.  We also have dropped the address space semaphore.

While we're here, add prefetching to the copy_tsb() routine
and put it in assembler into the tsb.S file.  This piece of
code is quite time critical.

There are some small negative side effects to this code which
can be improved upon.  In particular we grab the mm->context.lock
even for the tsb insert done by update_mmu_cache() now and that's
a bit excessive.  We can get rid of that locking, and the same
lock taking in flush_tsb_user(), by disabling PSTATE_IE around
the whole operation including the capturing of the tsb pointer
and tsb_nentries value.  That would work because anyone growing
the TSB won't free up the old TSB until all cpus respond to the
TSB change cross call.

I'm not quite so confident in that optimization to put it in
right now, but eventually we might be able to and the description
is here for reference.

This code seems very solid now.  It passes several parallel GCC
bootstrap builds, and our favorite "nut cruncher" stress test which is
a full "make -j8192" build of a "make allmodconfig" kernel.  That puts
about 256 processes on each cpu's run queue, makes lots of process cpu
migrations occur, causes lots of page table and TLB flushing activity,
incurs many context version number changes, and it swaps the machine
real far out to disk even though there is 16GB of ram on this test
system. :-)

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller authored and David S. Miller committed Mar 20, 2006
1 parent a858f1c commit 7a1ac52
Show file tree
Hide file tree
Showing 5 changed files with 203 additions and 118 deletions.
71 changes: 70 additions & 1 deletion arch/sparc64/kernel/tsb.S
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,9 @@ tsb_miss_itlb:
ldxa [%g4] ASI_IMMU, %g4

/* At this point we have:
* %g4 -- missing virtual address
* %g1 -- TSB entry address
* %g3 -- FAULT_CODE_{D,I}TLB
* %g4 -- missing virtual address
* %g6 -- TAG TARGET (vaddr >> 22)
*/
tsb_miss_page_table_walk:
Expand All @@ -45,6 +46,12 @@ tsb_miss_page_table_walk:
tsb_miss_page_table_walk_sun4v_fastpath:
USER_PGTABLE_WALK_TL1(%g4, %g7, %g5, %g2, tsb_do_fault)

/* At this point we have:
* %g1 -- TSB entry address
* %g3 -- FAULT_CODE_{D,I}TLB
* %g5 -- physical address of PTE in Linux page tables
* %g6 -- TAG TARGET (vaddr >> 22)
*/
tsb_reload:
TSB_LOCK_TAG(%g1, %g2, %g7)

Expand Down Expand Up @@ -199,6 +206,7 @@ __tsb_insert:
wrpr %o5, %pstate
retl
nop
.size __tsb_insert, .-__tsb_insert

/* Flush the given TSB entry if it has the matching
* tag.
Expand All @@ -208,6 +216,7 @@ __tsb_insert:
*/
.align 32
.globl tsb_flush
.type tsb_flush,#function
tsb_flush:
sethi %hi(TSB_TAG_LOCK_HIGH), %g2
1: TSB_LOAD_TAG(%o0, %g1)
Expand All @@ -225,6 +234,7 @@ tsb_flush:
nop
2: retl
TSB_MEMBAR
.size tsb_flush, .-tsb_flush

/* Reload MMU related context switch state at
* schedule() time.
Expand All @@ -241,6 +251,7 @@ tsb_flush:
*/
.align 32
.globl __tsb_context_switch
.type __tsb_context_switch,#function
__tsb_context_switch:
rdpr %pstate, %o5
wrpr %o5, PSTATE_IE, %pstate
Expand Down Expand Up @@ -302,3 +313,61 @@ __tsb_context_switch:

retl
nop
.size __tsb_context_switch, .-__tsb_context_switch

#define TSB_PASS_BITS ((1 << TSB_TAG_LOCK_BIT) | \
(1 << TSB_TAG_INVALID_BIT))

.align 32
.globl copy_tsb
.type copy_tsb,#function
copy_tsb: /* %o0=old_tsb_base, %o1=old_tsb_size
* %o2=new_tsb_base, %o3=new_tsb_size
*/
sethi %uhi(TSB_PASS_BITS), %g7
srlx %o3, 4, %o3
add %o0, %o1, %g1 /* end of old tsb */
sllx %g7, 32, %g7
sub %o3, 1, %o3 /* %o3 == new tsb hash mask */

661: prefetcha [%o0] ASI_N, #one_read
.section .tsb_phys_patch, "ax"
.word 661b
prefetcha [%o0] ASI_PHYS_USE_EC, #one_read
.previous

90: andcc %o0, (64 - 1), %g0
bne 1f
add %o0, 64, %o5

661: prefetcha [%o5] ASI_N, #one_read
.section .tsb_phys_patch, "ax"
.word 661b
prefetcha [%o5] ASI_PHYS_USE_EC, #one_read
.previous

1: TSB_LOAD_QUAD(%o0, %g2) /* %g2/%g3 == TSB entry */
andcc %g2, %g7, %g0 /* LOCK or INVALID set? */
bne,pn %xcc, 80f /* Skip it */
sllx %g2, 22, %o4 /* TAG --> VADDR */

/* This can definitely be computed faster... */
srlx %o0, 4, %o5 /* Build index */
and %o5, 511, %o5 /* Mask index */
sllx %o5, PAGE_SHIFT, %o5 /* Put into vaddr position */
or %o4, %o5, %o4 /* Full VADDR. */
srlx %o4, PAGE_SHIFT, %o4 /* Shift down to create index */
and %o4, %o3, %o4 /* Mask with new_tsb_nents-1 */
sllx %o4, 4, %o4 /* Shift back up into tsb ent offset */
TSB_STORE(%o2 + %o4, %g2) /* Store TAG */
add %o4, 0x8, %o4 /* Advance to TTE */
TSB_STORE(%o2 + %o4, %g3) /* Store TTE */

80: add %o0, 16, %o0
cmp %o0, %g1
bne,pt %xcc, 90b
nop

retl
TSB_MEMBAR
.size copy_tsb, .-copy_tsb
8 changes: 7 additions & 1 deletion arch/sparc64/mm/fault.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include <asm/lsu.h>
#include <asm/sections.h>
#include <asm/kdebug.h>
#include <asm/mmu_context.h>

/*
* To debug kernel to catch accesses to certain virtual/physical addresses.
Expand Down Expand Up @@ -258,7 +259,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs)
struct vm_area_struct *vma;
unsigned int insn = 0;
int si_code, fault_code;
unsigned long address;
unsigned long address, mm_rss;

fault_code = get_thread_fault_code();

Expand Down Expand Up @@ -407,6 +408,11 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs)
}

up_read(&mm->mmap_sem);

mm_rss = get_mm_rss(mm);
if (unlikely(mm_rss >= mm->context.tsb_rss_limit))
tsb_grow(mm, mm_rss);

return;

/*
Expand Down
7 changes: 6 additions & 1 deletion arch/sparc64/mm/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t p
{
struct mm_struct *mm;
struct tsb *tsb;
unsigned long tag;
unsigned long tag, flags;

if (tlb_type != hypervisor) {
unsigned long pfn = pte_pfn(pte);
Expand Down Expand Up @@ -308,10 +308,15 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t p
}

mm = vma->vm_mm;

spin_lock_irqsave(&mm->context.lock, flags);

tsb = &mm->context.tsb[(address >> PAGE_SHIFT) &
(mm->context.tsb_nentries - 1UL)];
tag = (address >> 22UL);
tsb_insert(tsb, tag, pte_val(pte));

spin_unlock_irqrestore(&mm->context.lock, flags);
}

void flush_dcache_page(struct page *page)
Expand Down
Loading

0 comments on commit 7a1ac52

Please sign in to comment.