Skip to content

Commit

Permalink
gru: preload tlb for bcopy instructions
Browse files Browse the repository at this point in the history
Add anticipatory TLB dropins for GRU TLB misses that occur on BCOPY
instructions that copy large amounts of data.

Signed-off-by: Jack Steiner <steiner@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
  • Loading branch information
Jack Steiner authored and Linus Torvalds committed Dec 16, 2009
1 parent 57ebb03 commit c550222
Show file tree
Hide file tree
Showing 9 changed files with 129 additions and 16 deletions.
89 changes: 85 additions & 4 deletions drivers/misc/sgi-gru/grufault.c
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,61 @@ static int gru_vtop(struct gru_thread_state *gts, unsigned long vaddr,
}


/*
* Flush a CBE from cache. The CBE is clean in the cache. Dirty the
* CBE cacheline so that the line will be written back to home agent.
* Otherwise the line may be silently dropped. This has no impact
* except on performance.
*/
static void gru_flush_cache_cbe(struct gru_control_block_extended *cbe)
{
if (unlikely(cbe)) {
cbe->cbrexecstatus = 0; /* make CL dirty */
gru_flush_cache(cbe);
}
}

/*
* Preload the TLB with entries that may be required. Currently, preloading
* is implemented only for BCOPY. Preload <tlb_preload_count> pages OR to
* the end of the bcopy tranfer, whichever is smaller.
*/
static void gru_preload_tlb(struct gru_state *gru,
struct gru_thread_state *gts, int atomic,
unsigned long fault_vaddr, int asid, int write,
unsigned char tlb_preload_count,
struct gru_tlb_fault_handle *tfh,
struct gru_control_block_extended *cbe)
{
unsigned long vaddr = 0, gpa;
int ret, pageshift;

if (cbe->opccpy != OP_BCOPY)
return;

if (fault_vaddr == cbe->cbe_baddr0)
vaddr = fault_vaddr + GRU_CACHE_LINE_BYTES * cbe->cbe_src_cl - 1;
else if (fault_vaddr == cbe->cbe_baddr1)
vaddr = fault_vaddr + (1 << cbe->xtypecpy) * cbe->cbe_nelemcur - 1;

fault_vaddr &= PAGE_MASK;
vaddr &= PAGE_MASK;
vaddr = min(vaddr, fault_vaddr + tlb_preload_count * PAGE_SIZE);

while (vaddr > fault_vaddr) {
ret = gru_vtop(gts, vaddr, write, atomic, &gpa, &pageshift);
if (ret || tfh_write_only(tfh, gpa, GAA_RAM, vaddr, asid, write,
GRU_PAGESIZE(pageshift)))
return;
gru_dbg(grudev,
"%s: gid %d, gts 0x%p, tfh 0x%p, vaddr 0x%lx, asid 0x%x, rw %d, ps %d, gpa 0x%lx\n",
atomic ? "atomic" : "non-atomic", gru->gs_gid, gts, tfh,
vaddr, asid, write, pageshift, gpa);
vaddr -= PAGE_SIZE;
STAT(tlb_preload_page);
}
}

/*
* Drop a TLB entry into the GRU. The fault is described by info in an TFH.
* Input:
Expand All @@ -303,6 +358,8 @@ static int gru_try_dropin(struct gru_thread_state *gts,
struct gru_tlb_fault_handle *tfh,
struct gru_instruction_bits *cbk)
{
struct gru_control_block_extended *cbe = NULL;
unsigned char tlb_preload_count = gts->ts_tlb_preload_count;
int pageshift = 0, asid, write, ret, atomic = !cbk, indexway;
unsigned long gpa = 0, vaddr = 0;

Expand All @@ -313,6 +370,14 @@ static int gru_try_dropin(struct gru_thread_state *gts,
* the dropin is ignored. This eliminates the need for additional locks.
*/

/*
* Prefetch the CBE if doing TLB preloading
*/
if (unlikely(tlb_preload_count)) {
cbe = gru_tfh_to_cbe(tfh);
prefetchw(cbe);
}

/*
* Error if TFH state is IDLE or FMM mode & the user issuing a UPM call.
* Might be a hardware race OR a stupid user. Ignore FMM because FMM
Expand Down Expand Up @@ -359,6 +424,12 @@ static int gru_try_dropin(struct gru_thread_state *gts,
goto failupm;
}
}

if (unlikely(cbe) && pageshift == PAGE_SHIFT) {
gru_preload_tlb(gts->ts_gru, gts, atomic, vaddr, asid, write, tlb_preload_count, tfh, cbe);
gru_flush_cache_cbe(cbe);
}

gru_cb_set_istatus_active(cbk);
tfh_write_restart(tfh, gpa, GAA_RAM, vaddr, asid, write,
GRU_PAGESIZE(pageshift));
Expand All @@ -378,25 +449,29 @@ static int gru_try_dropin(struct gru_thread_state *gts,
tfh_user_polling_mode(tfh);
else
gru_flush_cache(tfh);
gru_flush_cache_cbe(cbe);
return -EAGAIN;

failupm:
/* Atomic failure switch CBR to UPM */
tfh_user_polling_mode(tfh);
gru_flush_cache_cbe(cbe);
STAT(tlb_dropin_fail_upm);
gru_dbg(grudev, "FAILED upm tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr);
return 1;

failfmm:
/* FMM state on UPM call */
gru_flush_cache(tfh);
gru_flush_cache_cbe(cbe);
STAT(tlb_dropin_fail_fmm);
gru_dbg(grudev, "FAILED fmm tfh: 0x%p, state %d\n", tfh, tfh->state);
return 0;

failnoexception:
/* TFH status did not show exception pending */
gru_flush_cache(tfh);
gru_flush_cache_cbe(cbe);
if (cbk)
gru_flush_cache(cbk);
STAT(tlb_dropin_fail_no_exception);
Expand All @@ -407,6 +482,7 @@ static int gru_try_dropin(struct gru_thread_state *gts,
failidle:
/* TFH state was idle - no miss pending */
gru_flush_cache(tfh);
gru_flush_cache_cbe(cbe);
if (cbk)
gru_flush_cache(cbk);
STAT(tlb_dropin_fail_idle);
Expand All @@ -416,6 +492,7 @@ static int gru_try_dropin(struct gru_thread_state *gts,
failinval:
/* All errors (atomic & non-atomic) switch CBR to EXCEPTION state */
tfh_exception(tfh);
gru_flush_cache_cbe(cbe);
STAT(tlb_dropin_fail_invalid);
gru_dbg(grudev, "FAILED inval tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr);
return -EFAULT;
Expand All @@ -426,6 +503,7 @@ static int gru_try_dropin(struct gru_thread_state *gts,
tfh_user_polling_mode(tfh);
else
gru_flush_cache(tfh);
gru_flush_cache_cbe(cbe);
STAT(tlb_dropin_fail_range_active);
gru_dbg(grudev, "FAILED range active: tfh 0x%p, vaddr 0x%lx\n",
tfh, vaddr);
Expand Down Expand Up @@ -627,7 +705,7 @@ int gru_get_exception_detail(unsigned long arg)
excdet.exceptdet1 = cbe->idef3upd;
excdet.cbrstate = cbe->cbrstate;
excdet.cbrexecstatus = cbe->cbrexecstatus;
gru_flush_cache(cbe);
gru_flush_cache_cbe(cbe);
ret = 0;
} else {
ret = -EAGAIN;
Expand Down Expand Up @@ -770,9 +848,12 @@ int gru_set_context_option(unsigned long arg)
return -EFAULT;
gru_dbg(grudev, "op %d, gseg 0x%lx, value1 0x%lx\n", req.op, req.gseg, req.val1);

gts = gru_alloc_locked_gts(req.gseg);
if (IS_ERR(gts))
return PTR_ERR(gts);
gts = gru_find_lock_gts(req.gseg);
if (!gts) {
gts = gru_alloc_locked_gts(req.gseg);
if (IS_ERR(gts))
return PTR_ERR(gts);
}

switch (req.op) {
case sco_blade_chiplet:
Expand Down
1 change: 1 addition & 0 deletions drivers/misc/sgi-gru/grufile.c
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ static int gru_create_new_context(unsigned long arg)
vdata->vd_dsr_au_count =
GRU_DS_BYTES_TO_AU(req.data_segment_bytes);
vdata->vd_cbr_au_count = GRU_CB_COUNT_TO_AU(req.control_blocks);
vdata->vd_tlb_preload_count = req.tlb_preload_count;
ret = 0;
}
up_write(&current->mm->mmap_sem);
Expand Down
11 changes: 7 additions & 4 deletions drivers/misc/sgi-gru/gruhandles.c
Original file line number Diff line number Diff line change
Expand Up @@ -165,17 +165,20 @@ int tgh_invalidate(struct gru_tlb_global_handle *tgh,
return wait_instruction_complete(tgh, tghop_invalidate);
}

void tfh_write_only(struct gru_tlb_fault_handle *tfh,
unsigned long pfn, unsigned long vaddr,
int asid, int dirty, int pagesize)
int tfh_write_only(struct gru_tlb_fault_handle *tfh,
unsigned long paddr, int gaa,
unsigned long vaddr, int asid, int dirty,
int pagesize)
{
tfh->fillasid = asid;
tfh->fillvaddr = vaddr;
tfh->pfn = pfn;
tfh->pfn = paddr >> GRU_PADDR_SHIFT;
tfh->gaa = gaa;
tfh->dirty = dirty;
tfh->pagesize = pagesize;
tfh->opc = TFHOP_WRITE_ONLY;
start_instruction(tfh);
return wait_instruction_complete(tfh, tfhop_write_only);
}

void tfh_write_restart(struct gru_tlb_fault_handle *tfh,
Expand Down
20 changes: 18 additions & 2 deletions drivers/misc/sgi-gru/gruhandles.h
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,16 @@ static inline void *gru_chiplet_vaddr(void *vaddr, int pnode, int chiplet)
return vaddr + GRU_SIZE * (2 * pnode + chiplet);
}

static inline struct gru_control_block_extended *gru_tfh_to_cbe(
struct gru_tlb_fault_handle *tfh)
{
unsigned long cbe;

cbe = (unsigned long)tfh - GRU_TFH_BASE + GRU_CBE_BASE;
return (struct gru_control_block_extended*)cbe;
}




/*
Expand Down Expand Up @@ -446,6 +456,12 @@ struct gru_control_block_extended {
unsigned int cbrexecstatus:8;
};

/* CBE fields for active BCOPY instructions */
#define cbe_baddr0 idef1upd
#define cbe_baddr1 idef3upd
#define cbe_src_cl idef6cpy
#define cbe_nelemcur idef5upd

enum gru_cbr_state {
CBRSTATE_INACTIVE,
CBRSTATE_IDLE,
Expand Down Expand Up @@ -493,8 +509,8 @@ int cch_interrupt_sync(struct gru_context_configuration_handle *cch);
int tgh_invalidate(struct gru_tlb_global_handle *tgh, unsigned long vaddr,
unsigned long vaddrmask, int asid, int pagesize, int global, int n,
unsigned short ctxbitmap);
void tfh_write_only(struct gru_tlb_fault_handle *tfh, unsigned long pfn,
unsigned long vaddr, int asid, int dirty, int pagesize);
int tfh_write_only(struct gru_tlb_fault_handle *tfh, unsigned long paddr,
int gaa, unsigned long vaddr, int asid, int dirty, int pagesize);
void tfh_write_restart(struct gru_tlb_fault_handle *tfh, unsigned long paddr,
int gaa, unsigned long vaddr, int asid, int dirty, int pagesize);
void tfh_restart(struct gru_tlb_fault_handle *tfh);
Expand Down
2 changes: 1 addition & 1 deletion drivers/misc/sgi-gru/grukservices.c
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ static void gru_load_kernel_context(struct gru_blade_state *bs, int blade_id)
down_write(&bs->bs_kgts_sema);

if (!bs->bs_kgts) {
bs->bs_kgts = gru_alloc_gts(NULL, 0, 0, 0, 0);
bs->bs_kgts = gru_alloc_gts(NULL, 0, 0, 0, 0, 0);
bs->bs_kgts->ts_user_blade_id = blade_id;
}
kgts = bs->bs_kgts;
Expand Down
1 change: 1 addition & 0 deletions drivers/misc/sgi-gru/grulib.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ struct gru_create_context_req {
unsigned int control_blocks;
unsigned int maximum_thread_count;
unsigned int options;
unsigned char tlb_preload_count;
};

/*
Expand Down
8 changes: 6 additions & 2 deletions drivers/misc/sgi-gru/grumain.c
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,8 @@ static struct gru_thread_state *gru_find_current_gts_nolock(struct gru_vma_data
* Allocate a thread state structure.
*/
struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma,
int cbr_au_count, int dsr_au_count, int options, int tsid)
int cbr_au_count, int dsr_au_count,
unsigned char tlb_preload_count, int options, int tsid)
{
struct gru_thread_state *gts;
struct gru_mm_struct *gms;
Expand All @@ -334,6 +335,7 @@ struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma,
mutex_init(&gts->ts_ctxlock);
gts->ts_cbr_au_count = cbr_au_count;
gts->ts_dsr_au_count = dsr_au_count;
gts->ts_tlb_preload_count = tlb_preload_count;
gts->ts_user_options = options;
gts->ts_user_blade_id = -1;
gts->ts_user_chiplet_id = -1;
Expand Down Expand Up @@ -403,7 +405,9 @@ struct gru_thread_state *gru_alloc_thread_state(struct vm_area_struct *vma,
struct gru_vma_data *vdata = vma->vm_private_data;
struct gru_thread_state *gts, *ngts;

gts = gru_alloc_gts(vma, vdata->vd_cbr_au_count, vdata->vd_dsr_au_count,
gts = gru_alloc_gts(vma, vdata->vd_cbr_au_count,
vdata->vd_dsr_au_count,
vdata->vd_tlb_preload_count,
vdata->vd_user_options, tsid);
if (IS_ERR(gts))
return gts;
Expand Down
4 changes: 3 additions & 1 deletion drivers/misc/sgi-gru/gruprocfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ static int statistics_show(struct seq_file *s, void *p)
printstat(s, check_context_retarget_intr);
printstat(s, check_context_unload);
printstat(s, tlb_dropin);
printstat(s, tlb_preload_page);
printstat(s, tlb_dropin_fail_no_asid);
printstat(s, tlb_dropin_fail_upm);
printstat(s, tlb_dropin_fail_invalid);
Expand Down Expand Up @@ -127,7 +128,8 @@ static int mcs_statistics_show(struct seq_file *s, void *p)
int op;
unsigned long total, count, max;
static char *id[] = {"cch_allocate", "cch_start", "cch_interrupt",
"cch_interrupt_sync", "cch_deallocate", "tgh_invalidate"};
"cch_interrupt_sync", "cch_deallocate", "tfh_write_only",
"tfh_write_restart", "tgh_invalidate"};

seq_printf(s, "%-20s%12s%12s%12s\n", "#id", "count", "aver-clks", "max-clks");
for (op = 0; op < mcsop_last; op++) {
Expand Down
9 changes: 7 additions & 2 deletions drivers/misc/sgi-gru/grutables.h
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@ struct gru_stats_s {
atomic_long_t check_context_retarget_intr;
atomic_long_t check_context_unload;
atomic_long_t tlb_dropin;
atomic_long_t tlb_preload_page;
atomic_long_t tlb_dropin_fail_no_asid;
atomic_long_t tlb_dropin_fail_upm;
atomic_long_t tlb_dropin_fail_invalid;
Expand Down Expand Up @@ -245,7 +246,8 @@ struct gru_stats_s {
};

enum mcs_op {cchop_allocate, cchop_start, cchop_interrupt, cchop_interrupt_sync,
cchop_deallocate, tghop_invalidate, mcsop_last};
cchop_deallocate, tfhop_write_only, tfhop_write_restart,
tghop_invalidate, mcsop_last};

struct mcs_op_statistic {
atomic_long_t count;
Expand Down Expand Up @@ -335,6 +337,7 @@ struct gru_vma_data {
long vd_user_options;/* misc user option flags */
int vd_cbr_au_count;
int vd_dsr_au_count;
unsigned char vd_tlb_preload_count;
};

/*
Expand All @@ -350,6 +353,7 @@ struct gru_thread_state {
struct gru_state *ts_gru; /* GRU where the context is
loaded */
struct gru_mm_struct *ts_gms; /* asid & ioproc struct */
unsigned char ts_tlb_preload_count; /* TLB preload pages */
unsigned long ts_cbr_map; /* map of allocated CBRs */
unsigned long ts_dsr_map; /* map of allocated DATA
resources */
Expand Down Expand Up @@ -661,7 +665,8 @@ extern int gru_proc_init(void);
extern void gru_proc_exit(void);

extern struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma,
int cbr_au_count, int dsr_au_count, int options, int tsid);
int cbr_au_count, int dsr_au_count,
unsigned char tlb_preload_count, int options, int tsid);
extern unsigned long gru_reserve_cb_resources(struct gru_state *gru,
int cbr_au_count, char *cbmap);
extern unsigned long gru_reserve_ds_resources(struct gru_state *gru,
Expand Down

0 comments on commit c550222

Please sign in to comment.