Skip to content

Commit

Permalink
Merge branch 'parisc-4.2-1' of git://git.kernel.org/pub/scm/linux/ker…
Browse files Browse the repository at this point in the history
…nel/git/deller/parisc-linux

Pull parisc fixes from Helge Deller:
 "We have one important patch from Dave Anglin and myself which fixes
  PTE/TLB race conditions which caused random segmentation faults on our
  debian buildd servers, and one patch from Alex Ivanov which speeds up
  the graphical text console on the STI framebuffer driver"

* 'parisc-4.2-1' of git://git.kernel.org/pub/scm/linux/kernel/git/deller/parisc-linux:
  parisc: Fix some PTE/TLB race conditions and optimize __flush_tlb_range based on timing results
  stifb: Implement hardware accelerated copyarea
  • Loading branch information
Linus Torvalds committed Jul 10, 2015
2 parents 1604f87 + 01ab605 commit b9243b5
Show file tree
Hide file tree
Showing 6 changed files with 250 additions and 170 deletions.
55 changes: 37 additions & 18 deletions arch/parisc/include/asm/pgtable.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#include <asm/processor.h>
#include <asm/cache.h>

extern spinlock_t pa_dbit_lock;
extern spinlock_t pa_tlb_lock;

/*
* kern_addr_valid(ADDR) tests if ADDR is pointing to valid kernel
Expand All @@ -33,6 +33,19 @@ extern spinlock_t pa_dbit_lock;
*/
#define kern_addr_valid(addr) (1)

/* Purge data and instruction TLB entries. Must be called holding
* the pa_tlb_lock. The TLB purge instructions are slow on SMP
* machines since the purge must be broadcast to all CPUs.
*/

static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
{
mtsp(mm->context, 1);
pdtlb(addr);
if (unlikely(split_tlb))
pitlb(addr);
}

/* Certain architectures need to do special things when PTEs
* within a page table are directly modified. Thus, the following
* hook is made available.
Expand All @@ -42,15 +55,20 @@ extern spinlock_t pa_dbit_lock;
*(pteptr) = (pteval); \
} while(0)

extern void purge_tlb_entries(struct mm_struct *, unsigned long);
#define pte_inserted(x) \
((pte_val(x) & (_PAGE_PRESENT|_PAGE_ACCESSED)) \
== (_PAGE_PRESENT|_PAGE_ACCESSED))

#define set_pte_at(mm, addr, ptep, pteval) \
do { \
#define set_pte_at(mm, addr, ptep, pteval) \
do { \
pte_t old_pte; \
unsigned long flags; \
spin_lock_irqsave(&pa_dbit_lock, flags); \
set_pte(ptep, pteval); \
purge_tlb_entries(mm, addr); \
spin_unlock_irqrestore(&pa_dbit_lock, flags); \
spin_lock_irqsave(&pa_tlb_lock, flags); \
old_pte = *ptep; \
set_pte(ptep, pteval); \
if (pte_inserted(old_pte)) \
purge_tlb_entries(mm, addr); \
spin_unlock_irqrestore(&pa_tlb_lock, flags); \
} while (0)

#endif /* !__ASSEMBLY__ */
Expand Down Expand Up @@ -268,7 +286,7 @@ extern unsigned long *empty_zero_page;

#define pte_none(x) (pte_val(x) == 0)
#define pte_present(x) (pte_val(x) & _PAGE_PRESENT)
#define pte_clear(mm,addr,xp) do { pte_val(*(xp)) = 0; } while (0)
#define pte_clear(mm, addr, xp) set_pte_at(mm, addr, xp, __pte(0))

#define pmd_flag(x) (pmd_val(x) & PxD_FLAG_MASK)
#define pmd_address(x) ((unsigned long)(pmd_val(x) &~ PxD_FLAG_MASK) << PxD_VALUE_SHIFT)
Expand Down Expand Up @@ -435,15 +453,15 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned
if (!pte_young(*ptep))
return 0;

spin_lock_irqsave(&pa_dbit_lock, flags);
spin_lock_irqsave(&pa_tlb_lock, flags);
pte = *ptep;
if (!pte_young(pte)) {
spin_unlock_irqrestore(&pa_dbit_lock, flags);
spin_unlock_irqrestore(&pa_tlb_lock, flags);
return 0;
}
set_pte(ptep, pte_mkold(pte));
purge_tlb_entries(vma->vm_mm, addr);
spin_unlock_irqrestore(&pa_dbit_lock, flags);
spin_unlock_irqrestore(&pa_tlb_lock, flags);
return 1;
}

Expand All @@ -453,22 +471,23 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
pte_t old_pte;
unsigned long flags;

spin_lock_irqsave(&pa_dbit_lock, flags);
spin_lock_irqsave(&pa_tlb_lock, flags);
old_pte = *ptep;
pte_clear(mm,addr,ptep);
purge_tlb_entries(mm, addr);
spin_unlock_irqrestore(&pa_dbit_lock, flags);
set_pte(ptep, __pte(0));
if (pte_inserted(old_pte))
purge_tlb_entries(mm, addr);
spin_unlock_irqrestore(&pa_tlb_lock, flags);

return old_pte;
}

static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
{
unsigned long flags;
spin_lock_irqsave(&pa_dbit_lock, flags);
spin_lock_irqsave(&pa_tlb_lock, flags);
set_pte(ptep, pte_wrprotect(*ptep));
purge_tlb_entries(mm, addr);
spin_unlock_irqrestore(&pa_dbit_lock, flags);
spin_unlock_irqrestore(&pa_tlb_lock, flags);
}

#define pte_same(A,B) (pte_val(A) == pte_val(B))
Expand Down
53 changes: 29 additions & 24 deletions arch/parisc/include/asm/tlbflush.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
* active at any one time on the Merced bus. This tlb purge
* synchronisation is fairly lightweight and harmless so we activate
* it on all systems not just the N class.
* It is also used to ensure PTE updates are atomic and consistent
* with the TLB.
*/
extern spinlock_t pa_tlb_lock;

Expand All @@ -24,31 +27,43 @@ extern void flush_tlb_all_local(void *);

#define smp_flush_tlb_all() flush_tlb_all()

int __flush_tlb_range(unsigned long sid,
unsigned long start, unsigned long end);

#define flush_tlb_range(vma, start, end) \
__flush_tlb_range((vma)->vm_mm->context, start, end)

#define flush_tlb_kernel_range(start, end) \
__flush_tlb_range(0, start, end)

/*
* flush_tlb_mm()
*
* XXX This code is NOT valid for HP-UX compatibility processes,
* (although it will probably work 99% of the time). HP-UX
* processes are free to play with the space id's and save them
* over long periods of time, etc. so we have to preserve the
* space and just flush the entire tlb. We need to check the
* personality in order to do that, but the personality is not
* currently being set correctly.
*
* Of course, Linux processes could do the same thing, but
* we don't support that (and the compilers, dynamic linker,
* etc. do not do that).
* The code to switch to a new context is NOT valid for processes
* which play with the space id's. Thus, we have to preserve the
* space and just flush the entire tlb. However, the compilers,
* dynamic linker, etc, do not manipulate space id's, so there
* could be a significant performance benefit in switching contexts
* and not flushing the whole tlb.
*/

static inline void flush_tlb_mm(struct mm_struct *mm)
{
BUG_ON(mm == &init_mm); /* Should never happen */

#if 1 || defined(CONFIG_SMP)
/* Except for very small threads, flushing the whole TLB is
* faster than using __flush_tlb_range. The pdtlb and pitlb
* instructions are very slow because of the TLB broadcast.
* It might be faster to do local range flushes on all CPUs
* on PA 2.0 systems.
*/
flush_tlb_all();
#else
/* FIXME: currently broken, causing space id and protection ids
* to go out of sync, resulting in faults on userspace accesses.
* to go out of sync, resulting in faults on userspace accesses.
* This approach needs further investigation since running many
* small applications (e.g., GCC testsuite) is faster on HP-UX.
*/
if (mm) {
if (mm->context != 0)
Expand All @@ -65,22 +80,12 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
{
unsigned long flags, sid;

/* For one page, it's not worth testing the split_tlb variable */

mb();
sid = vma->vm_mm->context;
purge_tlb_start(flags);
mtsp(sid, 1);
pdtlb(addr);
pitlb(addr);
if (unlikely(split_tlb))
pitlb(addr);
purge_tlb_end(flags);
}

void __flush_tlb_range(unsigned long sid,
unsigned long start, unsigned long end);

#define flush_tlb_range(vma,start,end) __flush_tlb_range((vma)->vm_mm->context,start,end)

#define flush_tlb_kernel_range(start, end) __flush_tlb_range(0,start,end)

#endif
105 changes: 67 additions & 38 deletions arch/parisc/kernel/cache.c
Original file line number Diff line number Diff line change
Expand Up @@ -342,12 +342,15 @@ EXPORT_SYMBOL(flush_data_cache_local);
EXPORT_SYMBOL(flush_kernel_icache_range_asm);

#define FLUSH_THRESHOLD 0x80000 /* 0.5MB */
int parisc_cache_flush_threshold __read_mostly = FLUSH_THRESHOLD;
static unsigned long parisc_cache_flush_threshold __read_mostly = FLUSH_THRESHOLD;

#define FLUSH_TLB_THRESHOLD (2*1024*1024) /* 2MB initial TLB threshold */
static unsigned long parisc_tlb_flush_threshold __read_mostly = FLUSH_TLB_THRESHOLD;

void __init parisc_setup_cache_timing(void)
{
unsigned long rangetime, alltime;
unsigned long size;
unsigned long size, start;

alltime = mfctl(16);
flush_data_cache();
Expand All @@ -364,14 +367,43 @@ void __init parisc_setup_cache_timing(void)
/* Racy, but if we see an intermediate value, it's ok too... */
parisc_cache_flush_threshold = size * alltime / rangetime;

parisc_cache_flush_threshold = (parisc_cache_flush_threshold + L1_CACHE_BYTES - 1) &~ (L1_CACHE_BYTES - 1);
parisc_cache_flush_threshold = L1_CACHE_ALIGN(parisc_cache_flush_threshold);
if (!parisc_cache_flush_threshold)
parisc_cache_flush_threshold = FLUSH_THRESHOLD;

if (parisc_cache_flush_threshold > cache_info.dc_size)
parisc_cache_flush_threshold = cache_info.dc_size;

printk(KERN_INFO "Setting cache flush threshold to %x (%d CPUs online)\n", parisc_cache_flush_threshold, num_online_cpus());
printk(KERN_INFO "Setting cache flush threshold to %lu kB\n",
parisc_cache_flush_threshold/1024);

/* calculate TLB flush threshold */

alltime = mfctl(16);
flush_tlb_all();
alltime = mfctl(16) - alltime;

size = PAGE_SIZE;
start = (unsigned long) _text;
rangetime = mfctl(16);
while (start < (unsigned long) _end) {
flush_tlb_kernel_range(start, start + PAGE_SIZE);
start += PAGE_SIZE;
size += PAGE_SIZE;
}
rangetime = mfctl(16) - rangetime;

printk(KERN_DEBUG "Whole TLB flush %lu cycles, flushing %lu bytes %lu cycles\n",
alltime, size, rangetime);

parisc_tlb_flush_threshold = size * alltime / rangetime;
parisc_tlb_flush_threshold *= num_online_cpus();
parisc_tlb_flush_threshold = PAGE_ALIGN(parisc_tlb_flush_threshold);
if (!parisc_tlb_flush_threshold)
parisc_tlb_flush_threshold = FLUSH_TLB_THRESHOLD;

printk(KERN_INFO "Setting TLB flush threshold to %lu kB\n",
parisc_tlb_flush_threshold/1024);
}

extern void purge_kernel_dcache_page_asm(unsigned long);
Expand Down Expand Up @@ -403,48 +435,45 @@ void copy_user_page(void *vto, void *vfrom, unsigned long vaddr,
}
EXPORT_SYMBOL(copy_user_page);

void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
{
unsigned long flags;

/* Note: purge_tlb_entries can be called at startup with
no context. */

purge_tlb_start(flags);
mtsp(mm->context, 1);
pdtlb(addr);
pitlb(addr);
purge_tlb_end(flags);
}
EXPORT_SYMBOL(purge_tlb_entries);

void __flush_tlb_range(unsigned long sid, unsigned long start,
unsigned long end)
/* __flush_tlb_range()
*
* returns 1 if all TLBs were flushed.
*/
int __flush_tlb_range(unsigned long sid, unsigned long start,
unsigned long end)
{
unsigned long npages;
unsigned long flags, size;

npages = ((end - (start & PAGE_MASK)) + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
if (npages >= 512) /* 2MB of space: arbitrary, should be tuned */
size = (end - start);
if (size >= parisc_tlb_flush_threshold) {
flush_tlb_all();
else {
unsigned long flags;
return 1;
}

/* Purge TLB entries for small ranges using the pdtlb and
pitlb instructions. These instructions execute locally
but cause a purge request to be broadcast to other TLBs. */
if (likely(!split_tlb)) {
while (start < end) {
purge_tlb_start(flags);
mtsp(sid, 1);
pdtlb(start);
purge_tlb_end(flags);
start += PAGE_SIZE;
}
return 0;
}

/* split TLB case */
while (start < end) {
purge_tlb_start(flags);
mtsp(sid, 1);
if (split_tlb) {
while (npages--) {
pdtlb(start);
pitlb(start);
start += PAGE_SIZE;
}
} else {
while (npages--) {
pdtlb(start);
start += PAGE_SIZE;
}
}
pdtlb(start);
pitlb(start);
purge_tlb_end(flags);
start += PAGE_SIZE;
}
return 0;
}

static void cacheflush_h_tmp_function(void *dummy)
Expand Down
Loading

0 comments on commit b9243b5

Please sign in to comment.