From 51c4dc042f7b03d5c92455108f6175b9aa336326 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Sat, 15 Aug 2009 07:41:45 +0900
Subject: [PATCH] --- yaml --- r: 163434 b: refs/heads/master c:
 7a90e00dda0bae66b5232d5a37155f13a0581369 h: refs/heads/master v: v3

---
 [refs]                                        |   2 +-
 trunk/arch/sh/include/asm/bugs.h              |  24 +-
 trunk/arch/sh/include/asm/cacheflush.h        |  71 +++--
 trunk/arch/sh/include/asm/page.h              |  24 +-
 trunk/arch/sh/include/asm/pgtable.h           |  34 +--
 trunk/arch/sh/include/asm/processor.h         |  13 +-
 trunk/arch/sh/include/asm/system.h            |  14 +-
 trunk/arch/sh/include/asm/system_32.h         |  15 -
 trunk/arch/sh/include/asm/system_64.h         |  10 -
 trunk/arch/sh/include/asm/types.h             |   2 -
 .../sh/include/cpu-common/cpu/cacheflush.h    |  40 +--
 .../arch/sh/include/cpu-sh2a/cpu/cacheflush.h |  34 +++
 .../arch/sh/include/cpu-sh3/cpu/cacheflush.h  |  46 +++
 .../arch/sh/include/cpu-sh4/cpu/cacheflush.h  |  43 +++
 .../arch/sh/include/cpu-sh5/cpu/cacheflush.h  |  33 ++
 trunk/arch/sh/kernel/Makefile_64              |   7 +-
 trunk/arch/sh/kernel/cpu/init.c               |   2 +
 trunk/arch/sh/kernel/cpu/sh2/entry.S          |   3 +-
 trunk/arch/sh/kernel/cpu/sh2/probe.c          |   1 -
 trunk/arch/sh/kernel/cpu/sh2a/entry.S         |   3 +-
 trunk/arch/sh/kernel/cpu/sh2a/probe.c         |   2 -
 trunk/arch/sh/kernel/cpu/sh3/entry.S          |  32 +-
 trunk/arch/sh/kernel/cpu/sh3/probe.c          |   2 -
 trunk/arch/sh/kernel/cpu/sh4/probe.c          |  10 +-
 trunk/arch/sh/kernel/cpu/sh5/probe.c          |   2 -
 trunk/arch/sh/kernel/process_64.c             |  24 +-
 trunk/arch/sh/kernel/setup.c                  |   1 -
 trunk/arch/sh/kernel/sh_ksyms_32.c            |   6 +
 trunk/arch/sh/kernel/sh_ksyms_64.c            |   9 +
 trunk/arch/sh/kernel/signal_64.c              |  38 ++-
 trunk/arch/sh/lib/Makefile                    |   2 +-
 .../sh/lib/{__clear_user.S => clear_page.S}   |  46 +++
 trunk/arch/sh/lib64/Makefile                  |   2 +-
 trunk/arch/sh/lib64/clear_page.S              |  54 ++++
 trunk/arch/sh/mm/Makefile                     |  70 +----
 trunk/arch/sh/mm/Makefile_32                  |  43 +++
 trunk/arch/sh/mm/Makefile_64                  |  46 +++
 trunk/arch/sh/mm/cache-sh2.c                  |  13 +-
 trunk/arch/sh/mm/cache-sh2a.c                 |  17 +-
 trunk/arch/sh/mm/cache-sh3.c                  |  25 +-
 trunk/arch/sh/mm/cache-sh4.c                  | 230 ++++++++++----
 trunk/arch/sh/mm/cache-sh5.c                  | 277 +++++++++++++++--
 trunk/arch/sh/mm/cache-sh7705.c               |  40 +--
 trunk/arch/sh/mm/cache.c                      | 287 ------------------
 trunk/arch/sh/mm/fault_32.c                   |  32 +-
 trunk/arch/sh/mm/fault_64.c                   |  11 +-
 trunk/arch/sh/mm/flush-sh4.c                  | 108 -------
 trunk/arch/sh/mm/init.c                       |   5 +-
 trunk/arch/sh/mm/kmap.c                       |  64 ----
 trunk/arch/sh/mm/mmap.c                       |   2 +-
 trunk/arch/sh/mm/pg-nommu.c                   |  38 +++
 trunk/arch/sh/mm/pg-sh4.c                     | 146 +++++++++
 trunk/arch/sh/mm/pg-sh7705.c                  | 138 +++++++++
 trunk/arch/sh/mm/{nommu.c => tlb-nommu.c}     |  44 +--
 trunk/arch/sh/mm/tlb-pteaex.c                 |  28 +-
 trunk/arch/sh/mm/tlb-sh3.c                    |  27 +-
 trunk/arch/sh/mm/tlb-sh4.c                    |  28 +-
 trunk/arch/sh/mm/tlb-sh5.c                    |  21 +-
 trunk/arch/sh/mm/tlbflush_64.c                |  22 +-
 59 files changed, 1461 insertions(+), 952 deletions(-)
 create mode 100644 trunk/arch/sh/include/cpu-sh2a/cpu/cacheflush.h
 create mode 100644 trunk/arch/sh/include/cpu-sh3/cpu/cacheflush.h
 create mode 100644 trunk/arch/sh/include/cpu-sh4/cpu/cacheflush.h
 create mode 100644 trunk/arch/sh/include/cpu-sh5/cpu/cacheflush.h
 rename trunk/arch/sh/lib/{__clear_user.S => clear_page.S} (76%)
 create mode 100644 trunk/arch/sh/lib64/clear_page.S
 create mode 100644 trunk/arch/sh/mm/Makefile_32
 create mode 100644 trunk/arch/sh/mm/Makefile_64
 delete mode 100644 trunk/arch/sh/mm/cache.c
 delete mode 100644 trunk/arch/sh/mm/flush-sh4.c
 delete mode 100644 trunk/arch/sh/mm/kmap.c
 create mode 100644 trunk/arch/sh/mm/pg-nommu.c
 create mode 100644 trunk/arch/sh/mm/pg-sh4.c
 create mode 100644 trunk/arch/sh/mm/pg-sh7705.c
 rename trunk/arch/sh/mm/{nommu.c => tlb-nommu.c} (54%)

diff --git a/[refs] b/[refs]
index fc45a56cbe9e..f181e969b622 100644
--- a/[refs]
+++ b/[refs]
@@ -1,2 +1,2 @@
 ---
-refs/heads/master: 1b3edd9745ef3a9ee831fe5a611757686579c9e1
+refs/heads/master: 7a90e00dda0bae66b5232d5a37155f13a0581369
diff --git a/trunk/arch/sh/include/asm/bugs.h b/trunk/arch/sh/include/asm/bugs.h
index 46260fcbdf4b..4924ff6f5439 100644
--- a/trunk/arch/sh/include/asm/bugs.h
+++ b/trunk/arch/sh/include/asm/bugs.h
@@ -21,25 +21,25 @@ static void __init check_bugs(void)
 
 	current_cpu_data.loops_per_jiffy = loops_per_jiffy;
 
-	switch (current_cpu_data.family) {
-	case CPU_FAMILY_SH2:
+	switch (current_cpu_data.type) {
+	case CPU_SH7619:
 		*p++ = '2';
 		break;
-	case CPU_FAMILY_SH2A:
+	case CPU_SH7201 ... CPU_MXG:
 		*p++ = '2';
 		*p++ = 'a';
 		break;
-	case CPU_FAMILY_SH3:
+	case CPU_SH7705 ... CPU_SH7729:
 		*p++ = '3';
 		break;
-	case CPU_FAMILY_SH4:
+	case CPU_SH7750 ... CPU_SH4_501:
 		*p++ = '4';
 		break;
-	case CPU_FAMILY_SH4A:
+	case CPU_SH7763 ... CPU_SHX3:
 		*p++ = '4';
 		*p++ = 'a';
 		break;
-	case CPU_FAMILY_SH4AL_DSP:
+	case CPU_SH7343 ... CPU_SH7366:
 		*p++ = '4';
 		*p++ = 'a';
 		*p++ = 'l';
@@ -48,15 +48,15 @@ static void __init check_bugs(void)
 		*p++ = 's';
 		*p++ = 'p';
 		break;
-	case CPU_FAMILY_SH5:
+	case CPU_SH5_101 ... CPU_SH5_103:
 		*p++ = '6';
 		*p++ = '4';
 		break;
-	case CPU_FAMILY_UNKNOWN:
+	case CPU_SH_NONE:
 		/*
-		 * Specifically use CPU_FAMILY_UNKNOWN rather than
-		 * default:, so we're able to have the compiler whine
-		 * about unhandled enumerations.
+		 * Specifically use CPU_SH_NONE rather than default:,
+		 * so we're able to have the compiler whine about
+		 * unhandled enumerations.
 		 */
 		break;
 	}
diff --git a/trunk/arch/sh/include/asm/cacheflush.h b/trunk/arch/sh/include/asm/cacheflush.h
index 25b7f46494de..4c5462daa74c 100644
--- a/trunk/arch/sh/include/asm/cacheflush.h
+++ b/trunk/arch/sh/include/asm/cacheflush.h
@@ -3,18 +3,45 @@
 
 #ifdef __KERNEL__
 
-#include <linux/mm.h>
+#ifdef CONFIG_CACHE_OFF
+/*
+ * Nothing to do when the cache is disabled, initial flush and explicit
+ * disabling is handled at CPU init time.
+ *
+ * See arch/sh/kernel/cpu/init.c:cache_init().
+ */
+#define p3_cache_init()				do { } while (0)
+#define flush_cache_all()			do { } while (0)
+#define flush_cache_mm(mm)			do { } while (0)
+#define flush_cache_dup_mm(mm)			do { } while (0)
+#define flush_cache_range(vma, start, end)	do { } while (0)
+#define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
+#define flush_dcache_page(page)			do { } while (0)
+#define flush_icache_range(start, end)		do { } while (0)
+#define flush_icache_page(vma,pg)		do { } while (0)
+#define flush_dcache_mmap_lock(mapping)		do { } while (0)
+#define flush_dcache_mmap_unlock(mapping)	do { } while (0)
+#define flush_cache_sigtramp(vaddr)		do { } while (0)
+#define flush_icache_user_range(vma,pg,adr,len)	do { } while (0)
+#define __flush_wback_region(start, size)	do { (void)(start); } while (0)
+#define __flush_purge_region(start, size)	do { (void)(start); } while (0)
+#define __flush_invalidate_region(start, size)	do { (void)(start); } while (0)
+#else
 #include <cpu/cacheflush.h>
 
-#define ARCH_HAS_FLUSH_ANON_PAGE
-extern void __flush_anon_page(struct page *page, unsigned long);
+/*
+ * Consistent DMA requires that the __flush_xxx() primitives must be set
+ * for any of the enabled non-coherent caches (most of the UP CPUs),
+ * regardless of PIPT or VIPT cache configurations.
+ */
 
-static inline void flush_anon_page(struct vm_area_struct *vma,
-				   struct page *page, unsigned long vmaddr)
-{
-	if (boot_cpu_data.dcache.n_aliases && PageAnon(page))
-		__flush_anon_page(page, vmaddr);
-}
+/* Flush (write-back only) a region (smaller than a page) */
+extern void __flush_wback_region(void *start, int size);
+/* Flush (write-back & invalidate) a region (smaller than a page) */
+extern void __flush_purge_region(void *start, int size);
+/* Flush (invalidate only) a region (smaller than a page) */
+extern void __flush_invalidate_region(void *start, int size);
+#endif
 
 #define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
 static inline void flush_kernel_dcache_page(struct page *page)
@@ -22,6 +49,7 @@ static inline void flush_kernel_dcache_page(struct page *page)
 	flush_dcache_page(page);
 }
 
+#if defined(CONFIG_CPU_SH4) && !defined(CONFIG_CACHE_OFF)
 extern void copy_to_user_page(struct vm_area_struct *vma,
 	struct page *page, unsigned long vaddr, void *dst, const void *src,
 	unsigned long len);
@@ -29,20 +57,23 @@ extern void copy_to_user_page(struct vm_area_struct *vma,
 extern void copy_from_user_page(struct vm_area_struct *vma,
 	struct page *page, unsigned long vaddr, void *dst, const void *src,
 	unsigned long len);
+#else
+#define copy_to_user_page(vma, page, vaddr, dst, src, len)	\
+	do {							\
+		flush_cache_page(vma, vaddr, page_to_pfn(page));\
+		memcpy(dst, src, len);				\
+		flush_icache_user_range(vma, page, vaddr, len);	\
+	} while (0)
+
+#define copy_from_user_page(vma, page, vaddr, dst, src, len)	\
+	do {							\
+		flush_cache_page(vma, vaddr, page_to_pfn(page));\
+		memcpy(dst, src, len);				\
+	} while (0)
+#endif
 
 #define flush_cache_vmap(start, end)		flush_cache_all()
 #define flush_cache_vunmap(start, end)		flush_cache_all()
 
-#define flush_dcache_mmap_lock(mapping)		do { } while (0)
-#define flush_dcache_mmap_unlock(mapping)	do { } while (0)
-
-void kmap_coherent_init(void);
-void *kmap_coherent(struct page *page, unsigned long addr);
-void kunmap_coherent(void);
-
-#define PG_dcache_dirty	PG_arch_1
-
-void cpu_cache_init(void);
-
 #endif /* __KERNEL__ */
 #endif /* __ASM_SH_CACHEFLUSH_H */
diff --git a/trunk/arch/sh/include/asm/page.h b/trunk/arch/sh/include/asm/page.h
index 81bffc0d6860..49592c780a6e 100644
--- a/trunk/arch/sh/include/asm/page.h
+++ b/trunk/arch/sh/include/asm/page.h
@@ -50,24 +50,26 @@ extern unsigned long shm_align_mask;
 extern unsigned long max_low_pfn, min_low_pfn;
 extern unsigned long memory_start, memory_end;
 
-static inline unsigned long
-pages_do_alias(unsigned long addr1, unsigned long addr2)
-{
-	return (addr1 ^ addr2) & shm_align_mask;
-}
-
-
-#define clear_page(page)	memset((void *)(page), 0, PAGE_SIZE)
+extern void clear_page(void *to);
 extern void copy_page(void *to, void *from);
 
+#if !defined(CONFIG_CACHE_OFF) && defined(CONFIG_MMU) && \
+	(defined(CONFIG_CPU_SH5) || defined(CONFIG_CPU_SH4) || \
+	 defined(CONFIG_SH7705_CACHE_32KB))
 struct page;
 struct vm_area_struct;
-
+extern void clear_user_page(void *to, unsigned long address, struct page *page);
+extern void copy_user_page(void *to, void *from, unsigned long address,
+			   struct page *page);
+#if defined(CONFIG_CPU_SH4)
 extern void copy_user_highpage(struct page *to, struct page *from,
 			       unsigned long vaddr, struct vm_area_struct *vma);
 #define __HAVE_ARCH_COPY_USER_HIGHPAGE
-extern void clear_user_highpage(struct page *page, unsigned long vaddr);
-#define clear_user_highpage	clear_user_highpage
+#endif
+#else
+#define clear_user_page(page, vaddr, pg)	clear_page(page)
+#define copy_user_page(to, from, vaddr, pg)	copy_page(to, from)
+#endif
 
 /*
  * These are used to make use of C type-checking..
diff --git a/trunk/arch/sh/include/asm/pgtable.h b/trunk/arch/sh/include/asm/pgtable.h
index 4f3efa7d5a64..2a011b18090b 100644
--- a/trunk/arch/sh/include/asm/pgtable.h
+++ b/trunk/arch/sh/include/asm/pgtable.h
@@ -36,12 +36,6 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
 #define	NEFF_SIGN	(1LL << (NEFF - 1))
 #define	NEFF_MASK	(-1LL << NEFF)
 
-static inline unsigned long long neff_sign_extend(unsigned long val)
-{
-	unsigned long long extended = val;
-	return (extended & NEFF_SIGN) ? (extended | NEFF_MASK) : extended;
-}
-
 #ifdef CONFIG_29BIT
 #define NPHYS		29
 #else
@@ -139,25 +133,27 @@ typedef pte_t *pte_addr_t;
  */
 #define pgtable_cache_init()	do { } while (0)
 
-struct vm_area_struct;
-
-extern void __update_cache(struct vm_area_struct *vma,
-			   unsigned long address, pte_t pte);
-extern void __update_tlb(struct vm_area_struct *vma,
-			 unsigned long address, pte_t pte);
-
-static inline void
-update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t pte)
-{
-	__update_cache(vma, address, pte);
-	__update_tlb(vma, address, pte);
-}
+#if !defined(CONFIG_CACHE_OFF) && (defined(CONFIG_CPU_SH4) || \
+	defined(CONFIG_SH7705_CACHE_32KB))
+struct mm_struct;
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
+#endif
 
+struct vm_area_struct;
+extern void update_mmu_cache(struct vm_area_struct * vma,
+			     unsigned long address, pte_t pte);
 extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
 extern void paging_init(void);
 extern void page_table_range_init(unsigned long start, unsigned long end,
 				  pgd_t *pgd);
 
+#if !defined(CONFIG_CACHE_OFF) && defined(CONFIG_CPU_SH4) && defined(CONFIG_MMU)
+extern void kmap_coherent_init(void);
+#else
+#define kmap_coherent_init()	do { } while (0)
+#endif
+
 /* arch/sh/mm/mmap.c */
 #define HAVE_ARCH_UNMAPPED_AREA
 #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
diff --git a/trunk/arch/sh/include/asm/processor.h b/trunk/arch/sh/include/asm/processor.h
index db1a4f3a755f..ff7daaf9a620 100644
--- a/trunk/arch/sh/include/asm/processor.h
+++ b/trunk/arch/sh/include/asm/processor.h
@@ -44,17 +44,6 @@ enum cpu_type {
 	CPU_SH_NONE
 };
 
-enum cpu_family {
-	CPU_FAMILY_SH2,
-	CPU_FAMILY_SH2A,
-	CPU_FAMILY_SH3,
-	CPU_FAMILY_SH4,
-	CPU_FAMILY_SH4A,
-	CPU_FAMILY_SH4AL_DSP,
-	CPU_FAMILY_SH5,
-	CPU_FAMILY_UNKNOWN,
-};
-
 /*
  * TLB information structure
  *
@@ -72,7 +61,7 @@ struct tlb_info {
 };
 
 struct sh_cpuinfo {
-	unsigned int type, family;
+	unsigned int type;
 	int cut_major, cut_minor;
 	unsigned long loops_per_jiffy;
 	unsigned long asid_cache;
diff --git a/trunk/arch/sh/include/asm/system.h b/trunk/arch/sh/include/asm/system.h
index bf7c4cbde372..ab79e1f4fbe0 100644
--- a/trunk/arch/sh/include/asm/system.h
+++ b/trunk/arch/sh/include/asm/system.h
@@ -14,6 +14,18 @@
 
 #define AT_VECTOR_SIZE_ARCH 5 /* entries in ARCH_DLINFO */
 
+#if defined(CONFIG_CPU_SH4A) || defined(CONFIG_CPU_SH5)
+#define __icbi()			\
+{					\
+	unsigned long __addr;		\
+	__addr = 0xa8000000;		\
+	__asm__ __volatile__(		\
+		"icbi   %0\n\t"		\
+		: /* no output */	\
+		: "m" (__m(__addr)));	\
+}
+#endif
+
 /*
  * A brief note on ctrl_barrier(), the control register write barrier.
  *
@@ -32,7 +44,7 @@
 #define mb()		__asm__ __volatile__ ("synco": : :"memory")
 #define rmb()		mb()
 #define wmb()		__asm__ __volatile__ ("synco": : :"memory")
-#define ctrl_barrier()	__icbi(0xa8000000)
+#define ctrl_barrier()	__icbi()
 #define read_barrier_depends()	do { } while(0)
 #else
 #define mb()		__asm__ __volatile__ ("": : :"memory")
diff --git a/trunk/arch/sh/include/asm/system_32.h b/trunk/arch/sh/include/asm/system_32.h
index 5ddd2359f3ef..6c68a51f1cc5 100644
--- a/trunk/arch/sh/include/asm/system_32.h
+++ b/trunk/arch/sh/include/asm/system_32.h
@@ -63,16 +63,6 @@ do {									\
 #define __restore_dsp(tsk)	do { } while (0)
 #endif
 
-#if defined(CONFIG_CPU_SH4A)
-#define __icbi(addr)	__asm__ __volatile__ ( "icbi @%0\n\t" : : "r" (addr))
-#else
-#define __icbi(addr)	mb()
-#endif
-
-#define __ocbp(addr)	__asm__ __volatile__ ( "ocbp @%0\n\t" : : "r" (addr))
-#define __ocbi(addr)	__asm__ __volatile__ ( "ocbi @%0\n\t" : : "r" (addr))
-#define __ocbwb(addr)	__asm__ __volatile__ ( "ocbwb @%0\n\t" : : "r" (addr))
-
 struct task_struct *__switch_to(struct task_struct *prev,
 				struct task_struct *next);
 
@@ -208,11 +198,6 @@ do {							\
 })
 #endif
 
-static inline reg_size_t register_align(void *val)
-{
-	return (unsigned long)(signed long)val;
-}
-
 int handle_unaligned_access(insn_size_t instruction, struct pt_regs *regs,
 			    struct mem_access *ma);
 
diff --git a/trunk/arch/sh/include/asm/system_64.h b/trunk/arch/sh/include/asm/system_64.h
index 8e4a03e7966c..943acf5ea07c 100644
--- a/trunk/arch/sh/include/asm/system_64.h
+++ b/trunk/arch/sh/include/asm/system_64.h
@@ -37,14 +37,4 @@ do {								\
 #define jump_to_uncached()	do { } while (0)
 #define back_to_cached()	do { } while (0)
 
-#define __icbi(addr)	__asm__ __volatile__ ( "icbi %0, 0\n\t" : : "r" (addr))
-#define __ocbp(addr)	__asm__ __volatile__ ( "ocbp %0, 0\n\t" : : "r" (addr))
-#define __ocbi(addr)	__asm__ __volatile__ ( "ocbi %0, 0\n\t" : : "r" (addr))
-#define __ocbwb(addr)	__asm__ __volatile__ ( "ocbwb %0, 0\n\t" : : "r" (addr))
-
-static inline reg_size_t register_align(void *val)
-{
-	return (unsigned long long)(signed long long)(signed long)val;
-}
-
 #endif /* __ASM_SH_SYSTEM_64_H */
diff --git a/trunk/arch/sh/include/asm/types.h b/trunk/arch/sh/include/asm/types.h
index f8421f7ad63a..c7f3c94837dd 100644
--- a/trunk/arch/sh/include/asm/types.h
+++ b/trunk/arch/sh/include/asm/types.h
@@ -11,10 +11,8 @@
 
 #ifdef CONFIG_SUPERH32
 typedef u16 insn_size_t;
-typedef u32 reg_size_t;
 #else
 typedef u32 insn_size_t;
-typedef u64 reg_size_t;
 #endif
 
 #endif /* __ASSEMBLY__ */
diff --git a/trunk/arch/sh/include/cpu-common/cpu/cacheflush.h b/trunk/arch/sh/include/cpu-common/cpu/cacheflush.h
index 8189dbd68f8f..c3db00b73605 100644
--- a/trunk/arch/sh/include/cpu-common/cpu/cacheflush.h
+++ b/trunk/arch/sh/include/cpu-common/cpu/cacheflush.h
@@ -1,12 +1,14 @@
 /*
+ * include/asm-sh/cpu-sh2/cacheflush.h
+ *
  * Copyright (C) 2003 Paul Mundt
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  */
-#ifndef __ASM_CPU_SH_CACHEFLUSH_H
-#define __ASM_CPU_SH_CACHEFLUSH_H
+#ifndef __ASM_CPU_SH2_CACHEFLUSH_H
+#define __ASM_CPU_SH2_CACHEFLUSH_H
 
 /*
  * Cache flushing:
@@ -20,23 +22,23 @@
  *  - flush_dcache_page(pg) flushes(wback&invalidates) a page for dcache
  *  - flush_icache_range(start, end) flushes(invalidates) a range for icache
  *  - flush_icache_page(vma, pg) flushes(invalidates) a page for icache
- *  - flush_cache_sigtramp(vaddr) flushes the signal trampoline
+ *
+ *  Caches are indexed (effectively) by physical address on SH-2, so
+ *  we don't need them.
  */
-extern void (*flush_cache_all)(void);
-extern void (*flush_cache_mm)(struct mm_struct *mm);
-extern void (*flush_cache_dup_mm)(struct mm_struct *mm);
-extern void (*flush_cache_page)(struct vm_area_struct *vma,
-				unsigned long addr, unsigned long pfn);
-extern void (*flush_cache_range)(struct vm_area_struct *vma,
-				 unsigned long start, unsigned long end);
-extern void (*flush_dcache_page)(struct page *page);
-extern void (*flush_icache_range)(unsigned long start, unsigned long end);
-extern void (*flush_icache_page)(struct vm_area_struct *vma,
-				 struct page *page);
-extern void (*flush_cache_sigtramp)(unsigned long address);
+#define flush_cache_all()			do { } while (0)
+#define flush_cache_mm(mm)			do { } while (0)
+#define flush_cache_dup_mm(mm)			do { } while (0)
+#define flush_cache_range(vma, start, end)	do { } while (0)
+#define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
+#define flush_dcache_page(page)			do { } while (0)
+#define flush_dcache_mmap_lock(mapping)		do { } while (0)
+#define flush_dcache_mmap_unlock(mapping)	do { } while (0)
+#define flush_icache_range(start, end)		do { } while (0)
+#define flush_icache_page(vma,pg)		do { } while (0)
+#define flush_icache_user_range(vma,pg,adr,len)	do { } while (0)
+#define flush_cache_sigtramp(vaddr)		do { } while (0)
 
-extern void (*__flush_wback_region)(void *start, int size);
-extern void (*__flush_purge_region)(void *start, int size);
-extern void (*__flush_invalidate_region)(void *start, int size);
+#define p3_cache_init()				do { } while (0)
 
-#endif /* __ASM_CPU_SH_CACHEFLUSH_H */
+#endif /* __ASM_CPU_SH2_CACHEFLUSH_H */
diff --git a/trunk/arch/sh/include/cpu-sh2a/cpu/cacheflush.h b/trunk/arch/sh/include/cpu-sh2a/cpu/cacheflush.h
new file mode 100644
index 000000000000..3d3b9205d2ac
--- /dev/null
+++ b/trunk/arch/sh/include/cpu-sh2a/cpu/cacheflush.h
@@ -0,0 +1,34 @@
+#ifndef __ASM_CPU_SH2A_CACHEFLUSH_H
+#define __ASM_CPU_SH2A_CACHEFLUSH_H
+
+/* 
+ * Cache flushing:
+ *
+ *  - flush_cache_all() flushes entire cache
+ *  - flush_cache_mm(mm) flushes the specified mm context's cache lines
+ *  - flush_cache_dup mm(mm) handles cache flushing when forking
+ *  - flush_cache_page(mm, vmaddr, pfn) flushes a single page
+ *  - flush_cache_range(vma, start, end) flushes a range of pages
+ *
+ *  - flush_dcache_page(pg) flushes(wback&invalidates) a page for dcache
+ *  - flush_icache_range(start, end) flushes(invalidates) a range for icache
+ *  - flush_icache_page(vma, pg) flushes(invalidates) a page for icache
+ *
+ *  Caches are indexed (effectively) by physical address on SH-2, so
+ *  we don't need them.
+ */
+#define flush_cache_all()			do { } while (0)
+#define flush_cache_mm(mm)			do { } while (0)
+#define flush_cache_dup_mm(mm)			do { } while (0)
+#define flush_cache_range(vma, start, end)	do { } while (0)
+#define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
+#define flush_dcache_page(page)			do { } while (0)
+#define flush_dcache_mmap_lock(mapping)		do { } while (0)
+#define flush_dcache_mmap_unlock(mapping)	do { } while (0)
+void flush_icache_range(unsigned long start, unsigned long end);
+#define flush_icache_page(vma,pg)		do { } while (0)
+#define flush_icache_user_range(vma,pg,adr,len)	do { } while (0)
+#define flush_cache_sigtramp(vaddr)		do { } while (0)
+
+#define p3_cache_init()				do { } while (0)
+#endif /* __ASM_CPU_SH2A_CACHEFLUSH_H */
diff --git a/trunk/arch/sh/include/cpu-sh3/cpu/cacheflush.h b/trunk/arch/sh/include/cpu-sh3/cpu/cacheflush.h
new file mode 100644
index 000000000000..1ac27aae6700
--- /dev/null
+++ b/trunk/arch/sh/include/cpu-sh3/cpu/cacheflush.h
@@ -0,0 +1,46 @@
+/*
+ * include/asm-sh/cpu-sh3/cacheflush.h
+ *
+ * Copyright (C) 1999 Niibe Yutaka
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#ifndef __ASM_CPU_SH3_CACHEFLUSH_H
+#define __ASM_CPU_SH3_CACHEFLUSH_H
+
+#if defined(CONFIG_SH7705_CACHE_32KB)
+/* SH7705 is an SH3 processor with 32KB cache. This has alias issues like the
+ * SH4. Unlike the SH4 this is a unified cache so we need to do some work
+ * in mmap when 'exec'ing a new binary
+ */
+ /* 32KB cache, 4kb PAGE sizes need to check bit 12 */
+#define CACHE_ALIAS 0x00001000
+
+#define PG_mapped	PG_arch_1
+
+void flush_cache_all(void);
+void flush_cache_mm(struct mm_struct *mm);
+#define flush_cache_dup_mm(mm) flush_cache_mm(mm)
+void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
+                              unsigned long end);
+void flush_cache_page(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn);
+void flush_dcache_page(struct page *pg);
+void flush_icache_range(unsigned long start, unsigned long end);
+void flush_icache_page(struct vm_area_struct *vma, struct page *page);
+
+#define flush_dcache_mmap_lock(mapping)		do { } while (0)
+#define flush_dcache_mmap_unlock(mapping)	do { } while (0)
+
+/* SH3 has unified cache so no special action needed here */
+#define flush_cache_sigtramp(vaddr)		do { } while (0)
+#define flush_icache_user_range(vma,pg,adr,len)	do { } while (0)
+
+#define p3_cache_init()				do { } while (0)
+
+#else
+#include <cpu-common/cpu/cacheflush.h>
+#endif
+
+#endif /* __ASM_CPU_SH3_CACHEFLUSH_H */
diff --git a/trunk/arch/sh/include/cpu-sh4/cpu/cacheflush.h b/trunk/arch/sh/include/cpu-sh4/cpu/cacheflush.h
new file mode 100644
index 000000000000..065306d376eb
--- /dev/null
+++ b/trunk/arch/sh/include/cpu-sh4/cpu/cacheflush.h
@@ -0,0 +1,43 @@
+/*
+ * include/asm-sh/cpu-sh4/cacheflush.h
+ *
+ * Copyright (C) 1999 Niibe Yutaka
+ * Copyright (C) 2003 Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#ifndef __ASM_CPU_SH4_CACHEFLUSH_H
+#define __ASM_CPU_SH4_CACHEFLUSH_H
+
+/*
+ *  Caches are broken on SH-4 (unless we use write-through
+ *  caching; in which case they're only semi-broken),
+ *  so we need them.
+ */
+void flush_cache_all(void);
+void flush_dcache_all(void);
+void flush_cache_mm(struct mm_struct *mm);
+#define flush_cache_dup_mm(mm) flush_cache_mm(mm)
+void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
+		       unsigned long end);
+void flush_cache_page(struct vm_area_struct *vma, unsigned long addr,
+		      unsigned long pfn);
+void flush_dcache_page(struct page *pg);
+
+#define flush_dcache_mmap_lock(mapping)		do { } while (0)
+#define flush_dcache_mmap_unlock(mapping)	do { } while (0)
+
+void flush_icache_range(unsigned long start, unsigned long end);
+void flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
+			     unsigned long addr, int len);
+
+#define flush_icache_page(vma,pg)		do { } while (0)
+
+/* Initialization of P3 area for copy_user_page */
+void p3_cache_init(void);
+
+#define PG_mapped	PG_arch_1
+
+#endif /* __ASM_CPU_SH4_CACHEFLUSH_H */
diff --git a/trunk/arch/sh/include/cpu-sh5/cpu/cacheflush.h b/trunk/arch/sh/include/cpu-sh5/cpu/cacheflush.h
new file mode 100644
index 000000000000..5a11f0b7e66a
--- /dev/null
+++ b/trunk/arch/sh/include/cpu-sh5/cpu/cacheflush.h
@@ -0,0 +1,33 @@
+#ifndef __ASM_SH_CPU_SH5_CACHEFLUSH_H
+#define __ASM_SH_CPU_SH5_CACHEFLUSH_H
+
+#ifndef __ASSEMBLY__
+
+struct vm_area_struct;
+struct page;
+struct mm_struct;
+
+extern void flush_cache_all(void);
+extern void flush_cache_mm(struct mm_struct *mm);
+extern void flush_cache_sigtramp(unsigned long vaddr);
+extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
+			      unsigned long end);
+extern void flush_cache_page(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn);
+extern void flush_dcache_page(struct page *pg);
+extern void flush_icache_range(unsigned long start, unsigned long end);
+extern void flush_icache_user_range(struct vm_area_struct *vma,
+				    struct page *page, unsigned long addr,
+				    int len);
+
+#define flush_cache_dup_mm(mm)	flush_cache_mm(mm)
+
+#define flush_dcache_mmap_lock(mapping)		do { } while (0)
+#define flush_dcache_mmap_unlock(mapping)	do { } while (0)
+
+#define flush_icache_page(vma, page)	do { } while (0)
+void p3_cache_init(void);
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __ASM_SH_CPU_SH5_CACHEFLUSH_H */
+
diff --git a/trunk/arch/sh/kernel/Makefile_64 b/trunk/arch/sh/kernel/Makefile_64
index cdfec1e499a0..639ee514266c 100644
--- a/trunk/arch/sh/kernel/Makefile_64
+++ b/trunk/arch/sh/kernel/Makefile_64
@@ -1,9 +1,8 @@
 extra-y	:= head_64.o init_task.o vmlinux.lds
 
-obj-y	:= debugtraps.o dumpstack.o idle.o io.o io_generic.o irq.o	\
-	   machvec.o process_64.o ptrace_64.o setup.o signal_64.o	\
-	   sys_sh.o sys_sh64.o syscalls_64.o time.o topology.o 		\
-	   traps.o traps_64.o unwinder.o
+obj-y	:= debugtraps.o idle.o io.o io_generic.o irq.o machvec.o process_64.o \
+	   ptrace_64.o setup.o signal_64.o sys_sh.o sys_sh64.o \
+	   syscalls_64.o time.o topology.o traps.o traps_64.o unwinder.o
 
 obj-y				+= cpu/
 obj-$(CONFIG_SMP)		+= smp.o
diff --git a/trunk/arch/sh/kernel/cpu/init.c b/trunk/arch/sh/kernel/cpu/init.c
index c832fa4cf8ed..ad85421099cd 100644
--- a/trunk/arch/sh/kernel/cpu/init.c
+++ b/trunk/arch/sh/kernel/cpu/init.c
@@ -268,9 +268,11 @@ asmlinkage void __init sh_cpu_init(void)
 	cache_init();
 
 	if (raw_smp_processor_id() == 0) {
+#ifdef CONFIG_MMU
 		shm_align_mask = max_t(unsigned long,
 				       current_cpu_data.dcache.way_size - 1,
 				       PAGE_SIZE - 1);
+#endif
 
 		/* Boot CPU sets the cache shape */
 		detect_cache_shape();
diff --git a/trunk/arch/sh/kernel/cpu/sh2/entry.S b/trunk/arch/sh/kernel/cpu/sh2/entry.S
index becc54c45692..c8a4331d9b8d 100644
--- a/trunk/arch/sh/kernel/cpu/sh2/entry.S
+++ b/trunk/arch/sh/kernel/cpu/sh2/entry.S
@@ -227,8 +227,9 @@ ENTRY(sh_bios_handler)
 	mov.l	@r15+, r14
 	add	#8,r15
 	lds.l	@r15+, pr
+	mov.l	@r15+,r15
 	rte
-	 mov.l	@r15+,r15
+	 nop
 	.align	2
 1:	.long	gdb_vbr_vector
 #endif /* CONFIG_SH_STANDARD_BIOS */
diff --git a/trunk/arch/sh/kernel/cpu/sh2/probe.c b/trunk/arch/sh/kernel/cpu/sh2/probe.c
index 1db6d8883888..5916d9096b99 100644
--- a/trunk/arch/sh/kernel/cpu/sh2/probe.c
+++ b/trunk/arch/sh/kernel/cpu/sh2/probe.c
@@ -29,7 +29,6 @@ int __init detect_cpu_and_cache_system(void)
 	 */
 	boot_cpu_data.dcache.flags |= SH_CACHE_COMBINED;
 	boot_cpu_data.icache = boot_cpu_data.dcache;
-	boot_cpu_data.family = CPU_FAMILY_SH2;
 
 	return 0;
 }
diff --git a/trunk/arch/sh/kernel/cpu/sh2a/entry.S b/trunk/arch/sh/kernel/cpu/sh2a/entry.S
index ab3903eeda5c..222742ddc0d6 100644
--- a/trunk/arch/sh/kernel/cpu/sh2a/entry.S
+++ b/trunk/arch/sh/kernel/cpu/sh2a/entry.S
@@ -176,8 +176,9 @@ ENTRY(sh_bios_handler)
 	movml.l	@r15+,r14
 	add	#8,r15
 	lds.l	@r15+, pr
+	mov.l	@r15+,r15
 	rte
-	 mov.l	@r15+,r15
+	 nop
 	.align	2
 1:	.long	gdb_vbr_vector
 #endif /* CONFIG_SH_STANDARD_BIOS */
diff --git a/trunk/arch/sh/kernel/cpu/sh2a/probe.c b/trunk/arch/sh/kernel/cpu/sh2a/probe.c
index 6825d6507164..e098e2f6aa08 100644
--- a/trunk/arch/sh/kernel/cpu/sh2a/probe.c
+++ b/trunk/arch/sh/kernel/cpu/sh2a/probe.c
@@ -15,8 +15,6 @@
 
 int __init detect_cpu_and_cache_system(void)
 {
-	boot_cpu_data.family			= CPU_FAMILY_SH2A;
-
 	/* All SH-2A CPUs have support for 16 and 32-bit opcodes.. */
 	boot_cpu_data.flags			|= CPU_HAS_OP32;
 
diff --git a/trunk/arch/sh/kernel/cpu/sh3/entry.S b/trunk/arch/sh/kernel/cpu/sh3/entry.S
index 854921c6f45b..67ad6467c694 100644
--- a/trunk/arch/sh/kernel/cpu/sh3/entry.S
+++ b/trunk/arch/sh/kernel/cpu/sh3/entry.S
@@ -113,34 +113,35 @@ OFF_TRA	=  (16*4+6*4)
 #if defined(CONFIG_MMU)
 	.align	2
 ENTRY(tlb_miss_load)
-	bra	call_handle_tlbmiss
+	bra	call_dpf
 	 mov	#0, r5
 
 	.align	2
 ENTRY(tlb_miss_store)
-	bra	call_handle_tlbmiss
+	bra	call_dpf
 	 mov	#1, r5
 
 	.align	2
 ENTRY(initial_page_write)
-	bra	call_handle_tlbmiss
-	 mov	#2, r5
+	bra	call_dpf
+	 mov	#1, r5
 
 	.align	2
 ENTRY(tlb_protection_violation_load)
-	bra	call_do_page_fault
+	bra	call_dpf
 	 mov	#0, r5
 
 	.align	2
 ENTRY(tlb_protection_violation_store)
-	bra	call_do_page_fault
+	bra	call_dpf
 	 mov	#1, r5
 
-call_handle_tlbmiss:
+call_dpf:
 	setup_frame_reg
 	mov.l	1f, r0
 	mov	r5, r8
 	mov.l	@r0, r6
+	mov	r6, r9
 	mov.l	2f, r0
 	sts	pr, r10
 	jsr	@r0
@@ -151,25 +152,16 @@ call_handle_tlbmiss:
 	 lds	r10, pr
 	rts
 	 nop
-0:
+0:	mov.l	3f, r0
+	mov	r9, r6
 	mov	r8, r5
-call_do_page_fault:
-	mov.l	1f, r0
-	mov.l	@r0, r6
-
-	sti
-
-	mov.l	3f, r0
-	mov.l	4f, r1
-	mov	r15, r4
 	jmp	@r0
-	 lds	r1, pr
+	 mov	r15, r4
 
 	.align 2
 1:	.long	MMU_TEA
-2:	.long	handle_tlbmiss
+2:	.long	__do_page_fault
 3:	.long	do_page_fault
-4:	.long	ret_from_exception
 
 	.align	2
 ENTRY(address_error_load)
diff --git a/trunk/arch/sh/kernel/cpu/sh3/probe.c b/trunk/arch/sh/kernel/cpu/sh3/probe.c
index f9c7df64eb01..10f2a760c5ee 100644
--- a/trunk/arch/sh/kernel/cpu/sh3/probe.c
+++ b/trunk/arch/sh/kernel/cpu/sh3/probe.c
@@ -107,7 +107,5 @@ int __uses_jump_to_uncached detect_cpu_and_cache_system(void)
 	boot_cpu_data.dcache.flags |= SH_CACHE_COMBINED;
 	boot_cpu_data.icache = boot_cpu_data.dcache;
 
-	boot_cpu_data.family = CPU_FAMILY_SH3;
-
 	return 0;
 }
diff --git a/trunk/arch/sh/kernel/cpu/sh4/probe.c b/trunk/arch/sh/kernel/cpu/sh4/probe.c
index 695026a3f02d..6c78d0a9c857 100644
--- a/trunk/arch/sh/kernel/cpu/sh4/probe.c
+++ b/trunk/arch/sh/kernel/cpu/sh4/probe.c
@@ -57,12 +57,8 @@ int __init detect_cpu_and_cache_system(void)
 	 * Setup some generic flags we can probe on SH-4A parts
 	 */
 	if (((pvr >> 16) & 0xff) == 0x10) {
-		boot_cpu_data.family = CPU_FAMILY_SH4A;
-
-		if ((cvr & 0x10000000) == 0) {
+		if ((cvr & 0x10000000) == 0)
 			boot_cpu_data.flags |= CPU_HAS_DSP;
-			boot_cpu_data.family = CPU_FAMILY_SH4AL_DSP;
-		}
 
 		boot_cpu_data.flags |= CPU_HAS_LLSC | CPU_HAS_PERF_COUNTER;
 		boot_cpu_data.cut_major = pvr & 0x7f;
@@ -72,7 +68,6 @@ int __init detect_cpu_and_cache_system(void)
 	} else {
 		/* And some SH-4 defaults.. */
 		boot_cpu_data.flags |= CPU_HAS_PTEA;
-		boot_cpu_data.family = CPU_FAMILY_SH4;
 	}
 
 	/* FPU detection works for everyone */
@@ -177,6 +172,9 @@ int __init detect_cpu_and_cache_system(void)
 		boot_cpu_data.icache.ways = 2;
 		boot_cpu_data.dcache.ways = 2;
 
+		break;
+	default:
+		boot_cpu_data.type = CPU_SH_NONE;
 		break;
 	}
 
diff --git a/trunk/arch/sh/kernel/cpu/sh5/probe.c b/trunk/arch/sh/kernel/cpu/sh5/probe.c
index 521d05b3f7ba..92ad844b5c12 100644
--- a/trunk/arch/sh/kernel/cpu/sh5/probe.c
+++ b/trunk/arch/sh/kernel/cpu/sh5/probe.c
@@ -34,8 +34,6 @@ int __init detect_cpu_and_cache_system(void)
 		/* CPU.VCR aliased at CIR address on SH5-101 */
 		boot_cpu_data.type = CPU_SH5_101;
 
-	boot_cpu_data.family = CPU_FAMILY_SH5;
-
 	/*
 	 * First, setup some sane values for the I-cache.
 	 */
diff --git a/trunk/arch/sh/kernel/process_64.c b/trunk/arch/sh/kernel/process_64.c
index 1192398ef582..24de74214940 100644
--- a/trunk/arch/sh/kernel/process_64.c
+++ b/trunk/arch/sh/kernel/process_64.c
@@ -425,6 +425,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 		struct task_struct *p, struct pt_regs *regs)
 {
 	struct pt_regs *childregs;
+	unsigned long long se;			/* Sign extension */
 
 #ifdef CONFIG_SH_FPU
 	if(last_task_used_math == current) {
@@ -440,19 +441,11 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 
 	*childregs = *regs;
 
-	/*
-	 * Sign extend the edited stack.
-	 * Note that thread.pc and thread.pc will stay
-	 * 32-bit wide and context switch must take care
-	 * of NEFF sign extension.
-	 */
 	if (user_mode(regs)) {
-		childregs->regs[15] = neff_sign_extend(usp);
+		childregs->regs[15] = usp;
 		p->thread.uregs = childregs;
 	} else {
-		childregs->regs[15] =
-			neff_sign_extend((unsigned long)task_stack_page(p) +
-					 THREAD_SIZE);
+		childregs->regs[15] = (unsigned long)task_stack_page(p) + THREAD_SIZE;
 	}
 
 	childregs->regs[9] = 0; /* Set return value for child */
@@ -461,6 +454,17 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 	p->thread.sp = (unsigned long) childregs;
 	p->thread.pc = (unsigned long) ret_from_fork;
 
+	/*
+	 * Sign extend the edited stack.
+         * Note that thread.pc and thread.pc will stay
+	 * 32-bit wide and context switch must take care
+	 * of NEFF sign extension.
+	 */
+
+	se = childregs->regs[15];
+	se = (se & NEFF_SIGN) ? (se | NEFF_MASK) : se;
+	childregs->regs[15] = se;
+
 	return 0;
 }
 
diff --git a/trunk/arch/sh/kernel/setup.c b/trunk/arch/sh/kernel/setup.c
index dc403e42bcab..ceb409bf7741 100644
--- a/trunk/arch/sh/kernel/setup.c
+++ b/trunk/arch/sh/kernel/setup.c
@@ -49,7 +49,6 @@
 struct sh_cpuinfo cpu_data[NR_CPUS] __read_mostly = {
 	[0] = {
 		.type			= CPU_SH_NONE,
-		.family			= CPU_FAMILY_UNKNOWN,
 		.loops_per_jiffy	= 10000000,
 	},
 };
diff --git a/trunk/arch/sh/kernel/sh_ksyms_32.c b/trunk/arch/sh/kernel/sh_ksyms_32.c
index 8dbe26b17c44..cec610888e28 100644
--- a/trunk/arch/sh/kernel/sh_ksyms_32.c
+++ b/trunk/arch/sh/kernel/sh_ksyms_32.c
@@ -101,6 +101,11 @@ EXPORT_SYMBOL(flush_cache_range);
 EXPORT_SYMBOL(flush_dcache_page);
 #endif
 
+#if !defined(CONFIG_CACHE_OFF) && defined(CONFIG_MMU) && \
+	(defined(CONFIG_CPU_SH4) || defined(CONFIG_SH7705_CACHE_32KB))
+EXPORT_SYMBOL(clear_user_page);
+#endif
+
 #ifdef CONFIG_MCOUNT
 DECLARE_EXPORT(mcount);
 #endif
@@ -109,6 +114,7 @@ EXPORT_SYMBOL(csum_partial_copy_generic);
 #ifdef CONFIG_IPV6
 EXPORT_SYMBOL(csum_ipv6_magic);
 #endif
+EXPORT_SYMBOL(clear_page);
 EXPORT_SYMBOL(copy_page);
 EXPORT_SYMBOL(__clear_user);
 EXPORT_SYMBOL(_ebss);
diff --git a/trunk/arch/sh/kernel/sh_ksyms_64.c b/trunk/arch/sh/kernel/sh_ksyms_64.c
index d008e17eb257..f5bd156ea504 100644
--- a/trunk/arch/sh/kernel/sh_ksyms_64.c
+++ b/trunk/arch/sh/kernel/sh_ksyms_64.c
@@ -30,6 +30,14 @@ extern int dump_fpu(struct pt_regs *, elf_fpregset_t *);
 EXPORT_SYMBOL(dump_fpu);
 EXPORT_SYMBOL(kernel_thread);
 
+#if !defined(CONFIG_CACHE_OFF) && defined(CONFIG_MMU)
+EXPORT_SYMBOL(clear_user_page);
+#endif
+
+#ifndef CONFIG_CACHE_OFF
+EXPORT_SYMBOL(flush_dcache_page);
+#endif
+
 #ifdef CONFIG_VT
 EXPORT_SYMBOL(screen_info);
 #endif
@@ -44,6 +52,7 @@ EXPORT_SYMBOL(__get_user_asm_l);
 EXPORT_SYMBOL(__get_user_asm_q);
 EXPORT_SYMBOL(__strnlen_user);
 EXPORT_SYMBOL(__strncpy_from_user);
+EXPORT_SYMBOL(clear_page);
 EXPORT_SYMBOL(__clear_user);
 EXPORT_SYMBOL(copy_page);
 EXPORT_SYMBOL(__copy_user);
diff --git a/trunk/arch/sh/kernel/signal_64.c b/trunk/arch/sh/kernel/signal_64.c
index 026fd1cfe17d..0663a0ee6021 100644
--- a/trunk/arch/sh/kernel/signal_64.c
+++ b/trunk/arch/sh/kernel/signal_64.c
@@ -561,11 +561,13 @@ static int setup_frame(int sig, struct k_sigaction *ka,
 	/* Set up to return from userspace.  If provided, use a stub
 	   already in userspace.  */
 	if (ka->sa.sa_flags & SA_RESTORER) {
+		DEREF_REG_PR = (unsigned long) ka->sa.sa_restorer | 0x1;
+
 		/*
 		 * On SH5 all edited pointers are subject to NEFF
 		 */
-		DEREF_REG_PR = neff_sign_extend((unsigned long)
-			ka->sa.sa_restorer | 0x1);
+		DEREF_REG_PR = (DEREF_REG_PR & NEFF_SIGN) ?
+			(DEREF_REG_PR | NEFF_MASK) : DEREF_REG_PR;
 	} else {
 		/*
 		 * Different approach on SH5.
@@ -578,8 +580,9 @@ static int setup_frame(int sig, struct k_sigaction *ka,
 		 * . being code, linker turns ShMedia bit on, always
 		 *   dereference index -1.
 		 */
-		DEREF_REG_PR = neff_sign_extend((unsigned long)
-			frame->retcode | 0x01);
+		DEREF_REG_PR = (unsigned long) frame->retcode | 0x01;
+		DEREF_REG_PR = (DEREF_REG_PR & NEFF_SIGN) ?
+			(DEREF_REG_PR | NEFF_MASK) : DEREF_REG_PR;
 
 		if (__copy_to_user(frame->retcode,
 			(void *)((unsigned long)sa_default_restorer & (~1)), 16) != 0)
@@ -593,7 +596,9 @@ static int setup_frame(int sig, struct k_sigaction *ka,
 	 * Set up registers for signal handler.
 	 * All edited pointers are subject to NEFF.
 	 */
-	regs->regs[REG_SP] = neff_sign_extend((unsigned long)frame);
+	regs->regs[REG_SP] = (unsigned long) frame;
+	regs->regs[REG_SP] = (regs->regs[REG_SP] & NEFF_SIGN) ?
+		 (regs->regs[REG_SP] | NEFF_MASK) : regs->regs[REG_SP];
 	regs->regs[REG_ARG1] = signal; /* Arg for signal handler */
 
         /* FIXME:
@@ -608,7 +613,8 @@ static int setup_frame(int sig, struct k_sigaction *ka,
 	regs->regs[REG_ARG2] = (unsigned long long)(unsigned long)(signed long)&frame->sc;
 	regs->regs[REG_ARG3] = (unsigned long long)(unsigned long)(signed long)&frame->sc;
 
-	regs->pc = neff_sign_extend((unsigned long)ka->sa.sa_handler);
+	regs->pc = (unsigned long) ka->sa.sa_handler;
+	regs->pc = (regs->pc & NEFF_SIGN) ? (regs->pc | NEFF_MASK) : regs->pc;
 
 	set_fs(USER_DS);
 
@@ -670,11 +676,13 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	/* Set up to return from userspace.  If provided, use a stub
 	   already in userspace.  */
 	if (ka->sa.sa_flags & SA_RESTORER) {
+		DEREF_REG_PR = (unsigned long) ka->sa.sa_restorer | 0x1;
+
 		/*
 		 * On SH5 all edited pointers are subject to NEFF
 		 */
-		DEREF_REG_PR = neff_sign_extend((unsigned long)
-			ka->sa.sa_restorer | 0x1);
+		DEREF_REG_PR = (DEREF_REG_PR & NEFF_SIGN) ?
+			(DEREF_REG_PR | NEFF_MASK) : DEREF_REG_PR;
 	} else {
 		/*
 		 * Different approach on SH5.
@@ -687,14 +695,15 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 		 * . being code, linker turns ShMedia bit on, always
 		 *   dereference index -1.
 		 */
-		DEREF_REG_PR = neff_sign_extend((unsigned long)
-			frame->retcode | 0x01);
+
+		DEREF_REG_PR = (unsigned long) frame->retcode | 0x01;
+		DEREF_REG_PR = (DEREF_REG_PR & NEFF_SIGN) ?
+			(DEREF_REG_PR | NEFF_MASK) : DEREF_REG_PR;
 
 		if (__copy_to_user(frame->retcode,
 			(void *)((unsigned long)sa_default_rt_restorer & (~1)), 16) != 0)
 			goto give_sigsegv;
 
-		/* Cohere the trampoline with the I-cache. */
 		flush_icache_range(DEREF_REG_PR-1, DEREF_REG_PR-1+15);
 	}
 
@@ -702,11 +711,14 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	 * Set up registers for signal handler.
 	 * All edited pointers are subject to NEFF.
 	 */
-	regs->regs[REG_SP] = neff_sign_extend((unsigned long)frame);
+	regs->regs[REG_SP] = (unsigned long) frame;
+	regs->regs[REG_SP] = (regs->regs[REG_SP] & NEFF_SIGN) ?
+		 (regs->regs[REG_SP] | NEFF_MASK) : regs->regs[REG_SP];
 	regs->regs[REG_ARG1] = signal; /* Arg for signal handler */
 	regs->regs[REG_ARG2] = (unsigned long long)(unsigned long)(signed long)&frame->info;
 	regs->regs[REG_ARG3] = (unsigned long long)(unsigned long)(signed long)&frame->uc.uc_mcontext;
-	regs->pc = neff_sign_extend((unsigned long)ka->sa.sa_handler);
+	regs->pc = (unsigned long) ka->sa.sa_handler;
+	regs->pc = (regs->pc & NEFF_SIGN) ? (regs->pc | NEFF_MASK) : regs->pc;
 
 	set_fs(USER_DS);
 
diff --git a/trunk/arch/sh/lib/Makefile b/trunk/arch/sh/lib/Makefile
index a969b47c5463..c2b28d8b2dd1 100644
--- a/trunk/arch/sh/lib/Makefile
+++ b/trunk/arch/sh/lib/Makefile
@@ -23,7 +23,7 @@ obj-y				+= io.o
 memcpy-y			:= memcpy.o
 memcpy-$(CONFIG_CPU_SH4)	:= memcpy-sh4.o
 
-lib-$(CONFIG_MMU)		+= copy_page.o __clear_user.o
+lib-$(CONFIG_MMU)		+= copy_page.o clear_page.o
 lib-$(CONFIG_MCOUNT)		+= mcount.o
 lib-y				+= $(memcpy-y) $(udivsi3-y)
 
diff --git a/trunk/arch/sh/lib/__clear_user.S b/trunk/arch/sh/lib/clear_page.S
similarity index 76%
rename from trunk/arch/sh/lib/__clear_user.S
rename to trunk/arch/sh/lib/clear_page.S
index bee9817e055d..8342bfbde64c 100644
--- a/trunk/arch/sh/lib/__clear_user.S
+++ b/trunk/arch/sh/lib/clear_page.S
@@ -8,6 +8,52 @@
 #include <linux/linkage.h>
 #include <asm/page.h>
 
+/*
+ * clear_page
+ * @to: P1 address
+ *
+ * void clear_page(void *to)
+ */
+
+/*
+ * r0 --- scratch
+ * r4 --- to
+ * r5 --- to + PAGE_SIZE
+ */
+ENTRY(clear_page)
+	mov	r4,r5
+	mov.l	.Llimit,r0
+	add	r0,r5
+	mov	#0,r0
+	!
+1:
+#if defined(CONFIG_CPU_SH4)
+	movca.l	r0,@r4
+	mov	r4,r1
+#else
+	mov.l	r0,@r4
+#endif
+	add	#32,r4
+	mov.l	r0,@-r4
+	mov.l	r0,@-r4
+	mov.l	r0,@-r4
+	mov.l	r0,@-r4
+	mov.l	r0,@-r4
+	mov.l	r0,@-r4
+	mov.l	r0,@-r4
+#if defined(CONFIG_CPU_SH4)
+	ocbwb	@r1
+#endif
+	cmp/eq	r5,r4
+	bf/s	1b
+	 add	#28,r4
+	!
+	rts
+	 nop
+
+	.balign 4
+.Llimit:	.long	(PAGE_SIZE-28)
+
 ENTRY(__clear_user)
 	!
 	mov	#0, r0
diff --git a/trunk/arch/sh/lib64/Makefile b/trunk/arch/sh/lib64/Makefile
index 1fee75aa1f98..334bb2da36ea 100644
--- a/trunk/arch/sh/lib64/Makefile
+++ b/trunk/arch/sh/lib64/Makefile
@@ -11,7 +11,7 @@
 
 # Panic should really be compiled as PIC
 lib-y  := udelay.o dbg.o panic.o memcpy.o memset.o \
-	  copy_user_memcpy.o copy_page.o strcpy.o strlen.o
+	  copy_user_memcpy.o copy_page.o clear_page.o strcpy.o strlen.o
 
 # Extracted from libgcc
 lib-y	+= udivsi3.o udivdi3.o sdivsi3.o
diff --git a/trunk/arch/sh/lib64/clear_page.S b/trunk/arch/sh/lib64/clear_page.S
new file mode 100644
index 000000000000..007ab48ecc1c
--- /dev/null
+++ b/trunk/arch/sh/lib64/clear_page.S
@@ -0,0 +1,54 @@
+/*
+   Copyright 2003 Richard Curnow, SuperH (UK) Ltd.
+
+   This file is subject to the terms and conditions of the GNU General Public
+   License.  See the file "COPYING" in the main directory of this archive
+   for more details.
+
+   Tight version of memset for the case of just clearing a page.  It turns out
+   that having the alloco's spaced out slightly due to the increment/branch
+   pair causes them to contend less for access to the cache.  Similarly,
+   keeping the stores apart from the allocos causes less contention.  => Do two
+   separate loops.  Do multiple stores per loop to amortise the
+   increment/branch cost a little.
+
+   Parameters:
+   r2 : source effective address (start of page)
+
+   Always clears 4096 bytes.
+
+   Note : alloco guarded by synco to avoid TAKum03020 erratum
+
+*/
+
+	.section .text..SHmedia32,"ax"
+	.little
+
+	.balign 8
+	.global clear_page
+clear_page:
+	pta/l 1f, tr1
+	pta/l 2f, tr2
+	ptabs/l r18, tr0
+
+	movi 4096, r7
+	add  r2, r7, r7
+	add  r2, r63, r6
+1:
+	alloco r6, 0
+	synco	! TAKum03020
+	addi	r6, 32, r6
+	bgt/l	r7, r6, tr1
+
+	add  r2, r63, r6
+2:
+	st.q  r6,   0, r63
+	st.q  r6,   8, r63
+	st.q  r6,  16, r63
+	st.q  r6,  24, r63
+	addi r6, 32, r6
+	bgt/l r7, r6, tr2
+
+	blink tr0, r63
+
+
diff --git a/trunk/arch/sh/mm/Makefile b/trunk/arch/sh/mm/Makefile
index 4ca71e9e09eb..9f4bc3d90b1e 100644
--- a/trunk/arch/sh/mm/Makefile
+++ b/trunk/arch/sh/mm/Makefile
@@ -1,67 +1,5 @@
-#
-# Makefile for the Linux SuperH-specific parts of the memory manager.
-#
-
-obj-y			:= cache.o init.o consistent.o mmap.o
-
-ifndef CONFIG_CACHE_OFF
-cacheops-$(CONFIG_CPU_SH2)		:= cache-sh2.o
-cacheops-$(CONFIG_CPU_SH2A)		:= cache-sh2a.o
-cacheops-$(CONFIG_CPU_SH3)		:= cache-sh3.o
-cacheops-$(CONFIG_CPU_SH4)		:= cache-sh4.o flush-sh4.o
-cacheops-$(CONFIG_CPU_SH5)		:= cache-sh5.o flush-sh4.o
-cacheops-$(CONFIG_SH7705_CACHE_32KB)	+= cache-sh7705.o
+ifeq ($(CONFIG_SUPERH32),y)
+include ${srctree}/arch/sh/mm/Makefile_32
+else
+include ${srctree}/arch/sh/mm/Makefile_64
 endif
-
-obj-y			+= $(cacheops-y)
-
-mmu-y			:= nommu.o extable_32.o
-mmu-$(CONFIG_MMU)	:= extable_$(BITS).o fault_$(BITS).o \
-			   ioremap_$(BITS).o kmap.o tlbflush_$(BITS).o
-
-obj-y			+= $(mmu-y)
-obj-$(CONFIG_DEBUG_FS)	+= asids-debugfs.o
-
-ifdef CONFIG_DEBUG_FS
-obj-$(CONFIG_CPU_SH4)	+= cache-debugfs.o
-endif
-
-ifdef CONFIG_MMU
-tlb-$(CONFIG_CPU_SH3)		:= tlb-sh3.o
-tlb-$(CONFIG_CPU_SH4)		:= tlb-sh4.o
-tlb-$(CONFIG_CPU_SH5)		:= tlb-sh5.o
-tlb-$(CONFIG_CPU_HAS_PTEAEX)	:= tlb-pteaex.o
-obj-y				+= $(tlb-y)
-endif
-
-obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
-obj-$(CONFIG_PMB)		+= pmb.o
-obj-$(CONFIG_PMB_FIXED)		+= pmb-fixed.o
-obj-$(CONFIG_NUMA)		+= numa.o
-
-# Special flags for fault_64.o.  This puts restrictions on the number of
-# caller-save registers that the compiler can target when building this file.
-# This is required because the code is called from a context in entry.S where
-# very few registers have been saved in the exception handler (for speed
-# reasons).
-# The caller save registers that have been saved and which can be used are
-# r2,r3,r4,r5 : argument passing
-# r15, r18 : SP and LINK
-# tr0-4 : allow all caller-save TR's.  The compiler seems to be able to make
-#         use of them, so it's probably beneficial to performance to save them
-#         and have them available for it.
-#
-# The resources not listed below are callee save, i.e. the compiler is free to
-# use any of them and will spill them to the stack itself.
-
-CFLAGS_fault_64.o += -ffixed-r7 \
-	-ffixed-r8 -ffixed-r9 -ffixed-r10 -ffixed-r11 -ffixed-r12 \
-	-ffixed-r13 -ffixed-r14 -ffixed-r16 -ffixed-r17 -ffixed-r19 \
-	-ffixed-r20 -ffixed-r21 -ffixed-r22 -ffixed-r23 \
-	-ffixed-r24 -ffixed-r25 -ffixed-r26 -ffixed-r27 \
-	-ffixed-r36 -ffixed-r37 -ffixed-r38 -ffixed-r39 -ffixed-r40 \
-	-ffixed-r41 -ffixed-r42 -ffixed-r43  \
-	-ffixed-r60 -ffixed-r61 -ffixed-r62 \
-	-fomit-frame-pointer
-
-EXTRA_CFLAGS += -Werror
diff --git a/trunk/arch/sh/mm/Makefile_32 b/trunk/arch/sh/mm/Makefile_32
new file mode 100644
index 000000000000..986a1e055834
--- /dev/null
+++ b/trunk/arch/sh/mm/Makefile_32
@@ -0,0 +1,43 @@
+#
+# Makefile for the Linux SuperH-specific parts of the memory manager.
+#
+
+obj-y			:= init.o extable_32.o consistent.o mmap.o
+
+ifndef CONFIG_CACHE_OFF
+cache-$(CONFIG_CPU_SH2)		:= cache-sh2.o
+cache-$(CONFIG_CPU_SH2A)	:= cache-sh2a.o
+cache-$(CONFIG_CPU_SH3)		:= cache-sh3.o
+cache-$(CONFIG_CPU_SH4)		:= cache-sh4.o
+cache-$(CONFIG_SH7705_CACHE_32KB)	+= cache-sh7705.o
+endif
+
+obj-y			+= $(cache-y)
+
+mmu-y			:= tlb-nommu.o pg-nommu.o
+mmu-$(CONFIG_MMU)	:= fault_32.o tlbflush_32.o ioremap_32.o
+
+obj-y			+= $(mmu-y)
+obj-$(CONFIG_DEBUG_FS)	+= asids-debugfs.o
+
+ifdef CONFIG_DEBUG_FS
+obj-$(CONFIG_CPU_SH4)	+= cache-debugfs.o
+endif
+
+ifdef CONFIG_MMU
+tlb-$(CONFIG_CPU_SH3)		:= tlb-sh3.o
+tlb-$(CONFIG_CPU_SH4)		:= tlb-sh4.o
+tlb-$(CONFIG_CPU_HAS_PTEAEX)	:= tlb-pteaex.o
+obj-y				+= $(tlb-y)
+ifndef CONFIG_CACHE_OFF
+obj-$(CONFIG_CPU_SH4)		+= pg-sh4.o
+obj-$(CONFIG_SH7705_CACHE_32KB)	+= pg-sh7705.o
+endif
+endif
+
+obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
+obj-$(CONFIG_PMB)		+= pmb.o
+obj-$(CONFIG_PMB_FIXED)		+= pmb-fixed.o
+obj-$(CONFIG_NUMA)		+= numa.o
+
+EXTRA_CFLAGS += -Werror
diff --git a/trunk/arch/sh/mm/Makefile_64 b/trunk/arch/sh/mm/Makefile_64
new file mode 100644
index 000000000000..2863ffb7006d
--- /dev/null
+++ b/trunk/arch/sh/mm/Makefile_64
@@ -0,0 +1,46 @@
+#
+# Makefile for the Linux SuperH-specific parts of the memory manager.
+#
+
+obj-y			:= init.o consistent.o mmap.o
+
+mmu-y			:= tlb-nommu.o pg-nommu.o extable_32.o
+mmu-$(CONFIG_MMU)	:= fault_64.o ioremap_64.o tlbflush_64.o tlb-sh5.o \
+			   extable_64.o
+
+ifndef CONFIG_CACHE_OFF
+obj-y			+= cache-sh5.o
+endif
+
+obj-y			+= $(mmu-y)
+obj-$(CONFIG_DEBUG_FS)	+= asids-debugfs.o
+
+obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
+obj-$(CONFIG_NUMA)		+= numa.o
+
+EXTRA_CFLAGS += -Werror
+
+# Special flags for fault_64.o.  This puts restrictions on the number of
+# caller-save registers that the compiler can target when building this file.
+# This is required because the code is called from a context in entry.S where
+# very few registers have been saved in the exception handler (for speed
+# reasons).
+# The caller save registers that have been saved and which can be used are
+# r2,r3,r4,r5 : argument passing
+# r15, r18 : SP and LINK
+# tr0-4 : allow all caller-save TR's.  The compiler seems to be able to make
+#         use of them, so it's probably beneficial to performance to save them
+#         and have them available for it.
+#
+# The resources not listed below are callee save, i.e. the compiler is free to
+# use any of them and will spill them to the stack itself.
+
+CFLAGS_fault_64.o += -ffixed-r7 \
+	-ffixed-r8 -ffixed-r9 -ffixed-r10 -ffixed-r11 -ffixed-r12 \
+	-ffixed-r13 -ffixed-r14 -ffixed-r16 -ffixed-r17 -ffixed-r19 \
+	-ffixed-r20 -ffixed-r21 -ffixed-r22 -ffixed-r23 \
+	-ffixed-r24 -ffixed-r25 -ffixed-r26 -ffixed-r27 \
+	-ffixed-r36 -ffixed-r37 -ffixed-r38 -ffixed-r39 -ffixed-r40 \
+	-ffixed-r41 -ffixed-r42 -ffixed-r43  \
+	-ffixed-r60 -ffixed-r61 -ffixed-r62 \
+	-fomit-frame-pointer
diff --git a/trunk/arch/sh/mm/cache-sh2.c b/trunk/arch/sh/mm/cache-sh2.c
index 699a71f46327..c4e80d2b764b 100644
--- a/trunk/arch/sh/mm/cache-sh2.c
+++ b/trunk/arch/sh/mm/cache-sh2.c
@@ -16,7 +16,7 @@
 #include <asm/cacheflush.h>
 #include <asm/io.h>
 
-static void sh2__flush_wback_region(void *start, int size)
+void __flush_wback_region(void *start, int size)
 {
 	unsigned long v;
 	unsigned long begin, end;
@@ -37,7 +37,7 @@ static void sh2__flush_wback_region(void *start, int size)
 	}
 }
 
-static void sh2__flush_purge_region(void *start, int size)
+void __flush_purge_region(void *start, int size)
 {
 	unsigned long v;
 	unsigned long begin, end;
@@ -51,7 +51,7 @@ static void sh2__flush_purge_region(void *start, int size)
 			  CACHE_OC_ADDRESS_ARRAY | (v & 0x00000ff0) | 0x00000008);
 }
 
-static void sh2__flush_invalidate_region(void *start, int size)
+void __flush_invalidate_region(void *start, int size)
 {
 #ifdef CONFIG_CACHE_WRITEBACK
 	/*
@@ -82,10 +82,3 @@ static void sh2__flush_invalidate_region(void *start, int size)
 			  CACHE_OC_ADDRESS_ARRAY | (v & 0x00000ff0) | 0x00000008);
 #endif
 }
-
-void __init sh2_cache_init(void)
-{
-	__flush_wback_region		= sh2__flush_wback_region;
-	__flush_purge_region		= sh2__flush_purge_region;
-	__flush_invalidate_region	= sh2__flush_invalidate_region;
-}
diff --git a/trunk/arch/sh/mm/cache-sh2a.c b/trunk/arch/sh/mm/cache-sh2a.c
index 96a41872dfd3..24d86a794065 100644
--- a/trunk/arch/sh/mm/cache-sh2a.c
+++ b/trunk/arch/sh/mm/cache-sh2a.c
@@ -15,7 +15,7 @@
 #include <asm/cacheflush.h>
 #include <asm/io.h>
 
-static void sh2a__flush_wback_region(void *start, int size)
+void __flush_wback_region(void *start, int size)
 {
 	unsigned long v;
 	unsigned long begin, end;
@@ -44,7 +44,7 @@ static void sh2a__flush_wback_region(void *start, int size)
 	local_irq_restore(flags);
 }
 
-static void sh2a__flush_purge_region(void *start, int size)
+void __flush_purge_region(void *start, int size)
 {
 	unsigned long v;
 	unsigned long begin, end;
@@ -65,7 +65,7 @@ static void sh2a__flush_purge_region(void *start, int size)
 	local_irq_restore(flags);
 }
 
-static void sh2a__flush_invalidate_region(void *start, int size)
+void __flush_invalidate_region(void *start, int size)
 {
 	unsigned long v;
 	unsigned long begin, end;
@@ -97,7 +97,7 @@ static void sh2a__flush_invalidate_region(void *start, int size)
 }
 
 /* WBack O-Cache and flush I-Cache */
-static void sh2a_flush_icache_range(unsigned long start, unsigned long end)
+void flush_icache_range(unsigned long start, unsigned long end)
 {
 	unsigned long v;
 	unsigned long flags;
@@ -127,12 +127,3 @@ static void sh2a_flush_icache_range(unsigned long start, unsigned long end)
 	back_to_cached();
 	local_irq_restore(flags);
 }
-
-void __init sh2a_cache_init(void)
-{
-	flush_icache_range		= sh2a_flush_icache_range;
-
-	__flush_wback_region		= sh2a__flush_wback_region;
-	__flush_purge_region		= sh2a__flush_purge_region;
-	__flush_invalidate_region	= sh2a__flush_invalidate_region;
-}
diff --git a/trunk/arch/sh/mm/cache-sh3.c b/trunk/arch/sh/mm/cache-sh3.c
index faef80c98134..6d1dbec08ad4 100644
--- a/trunk/arch/sh/mm/cache-sh3.c
+++ b/trunk/arch/sh/mm/cache-sh3.c
@@ -32,7 +32,7 @@
  * SIZE: Size of the region.
  */
 
-static void sh3__flush_wback_region(void *start, int size)
+void __flush_wback_region(void *start, int size)
 {
 	unsigned long v, j;
 	unsigned long begin, end;
@@ -71,7 +71,7 @@ static void sh3__flush_wback_region(void *start, int size)
  * START: Virtual Address (U0, P1, or P3)
  * SIZE: Size of the region.
  */
-static void sh3__flush_purge_region(void *start, int size)
+void __flush_purge_region(void *start, int size)
 {
 	unsigned long v;
 	unsigned long begin, end;
@@ -90,16 +90,11 @@ static void sh3__flush_purge_region(void *start, int size)
 	}
 }
 
-void __init sh3_cache_init(void)
-{
-	__flush_wback_region = sh3__flush_wback_region;
-	__flush_purge_region = sh3__flush_purge_region;
-
-	/*
-	 * No write back please
-	 *
-	 * Except I don't think there's any way to avoid the writeback.
-	 * So we just alias it to sh3__flush_purge_region(). dwmw2.
-	 */
-	__flush_invalidate_region = sh3__flush_purge_region;
-}
+/*
+ * No write back please
+ *
+ * Except I don't think there's any way to avoid the writeback. So we
+ * just alias it to __flush_purge_region(). dwmw2.
+ */
+void __flush_invalidate_region(void *start, int size)
+	__attribute__((alias("__flush_purge_region")));
diff --git a/trunk/arch/sh/mm/cache-sh4.c b/trunk/arch/sh/mm/cache-sh4.c
index 05cb04bc3940..5cfe08dbb59e 100644
--- a/trunk/arch/sh/mm/cache-sh4.c
+++ b/trunk/arch/sh/mm/cache-sh4.c
@@ -14,7 +14,6 @@
 #include <linux/mm.h>
 #include <linux/io.h>
 #include <linux/mutex.h>
-#include <linux/fs.h>
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
 
@@ -26,6 +25,13 @@
 #define MAX_DCACHE_PAGES	64	/* XXX: Tune for ways */
 #define MAX_ICACHE_PAGES	32
 
+static void __flush_dcache_segment_1way(unsigned long start,
+					unsigned long extent);
+static void __flush_dcache_segment_2way(unsigned long start,
+					unsigned long extent);
+static void __flush_dcache_segment_4way(unsigned long start,
+					unsigned long extent);
+
 static void __flush_cache_4096(unsigned long addr, unsigned long phys,
 			       unsigned long exec_offset);
 
@@ -37,13 +43,147 @@ static void __flush_cache_4096(unsigned long addr, unsigned long phys,
 static void (*__flush_dcache_segment_fn)(unsigned long, unsigned long) =
 	(void (*)(unsigned long, unsigned long))0xdeadbeef;
 
+static void compute_alias(struct cache_info *c)
+{
+	c->alias_mask = ((c->sets - 1) << c->entry_shift) & ~(PAGE_SIZE - 1);
+	c->n_aliases = c->alias_mask ? (c->alias_mask >> PAGE_SHIFT) + 1 : 0;
+}
+
+static void __init emit_cache_params(void)
+{
+	printk("PVR=%08x CVR=%08x PRR=%08x\n",
+		ctrl_inl(CCN_PVR),
+		ctrl_inl(CCN_CVR),
+		ctrl_inl(CCN_PRR));
+	printk("I-cache : n_ways=%d n_sets=%d way_incr=%d\n",
+		boot_cpu_data.icache.ways,
+		boot_cpu_data.icache.sets,
+		boot_cpu_data.icache.way_incr);
+	printk("I-cache : entry_mask=0x%08x alias_mask=0x%08x n_aliases=%d\n",
+		boot_cpu_data.icache.entry_mask,
+		boot_cpu_data.icache.alias_mask,
+		boot_cpu_data.icache.n_aliases);
+	printk("D-cache : n_ways=%d n_sets=%d way_incr=%d\n",
+		boot_cpu_data.dcache.ways,
+		boot_cpu_data.dcache.sets,
+		boot_cpu_data.dcache.way_incr);
+	printk("D-cache : entry_mask=0x%08x alias_mask=0x%08x n_aliases=%d\n",
+		boot_cpu_data.dcache.entry_mask,
+		boot_cpu_data.dcache.alias_mask,
+		boot_cpu_data.dcache.n_aliases);
+
+	/*
+	 * Emit Secondary Cache parameters if the CPU has a probed L2.
+	 */
+	if (boot_cpu_data.flags & CPU_HAS_L2_CACHE) {
+		printk("S-cache : n_ways=%d n_sets=%d way_incr=%d\n",
+			boot_cpu_data.scache.ways,
+			boot_cpu_data.scache.sets,
+			boot_cpu_data.scache.way_incr);
+		printk("S-cache : entry_mask=0x%08x alias_mask=0x%08x n_aliases=%d\n",
+			boot_cpu_data.scache.entry_mask,
+			boot_cpu_data.scache.alias_mask,
+			boot_cpu_data.scache.n_aliases);
+	}
+
+	if (!__flush_dcache_segment_fn)
+		panic("unknown number of cache ways\n");
+}
+
+/*
+ * SH-4 has virtually indexed and physically tagged cache.
+ */
+void __init p3_cache_init(void)
+{
+	compute_alias(&boot_cpu_data.icache);
+	compute_alias(&boot_cpu_data.dcache);
+	compute_alias(&boot_cpu_data.scache);
+
+	switch (boot_cpu_data.dcache.ways) {
+	case 1:
+		__flush_dcache_segment_fn = __flush_dcache_segment_1way;
+		break;
+	case 2:
+		__flush_dcache_segment_fn = __flush_dcache_segment_2way;
+		break;
+	case 4:
+		__flush_dcache_segment_fn = __flush_dcache_segment_4way;
+		break;
+	default:
+		__flush_dcache_segment_fn = NULL;
+		break;
+	}
+
+	emit_cache_params();
+}
+
+/*
+ * Write back the dirty D-caches, but not invalidate them.
+ *
+ * START: Virtual Address (U0, P1, or P3)
+ * SIZE: Size of the region.
+ */
+void __flush_wback_region(void *start, int size)
+{
+	unsigned long v;
+	unsigned long begin, end;
+
+	begin = (unsigned long)start & ~(L1_CACHE_BYTES-1);
+	end = ((unsigned long)start + size + L1_CACHE_BYTES-1)
+		& ~(L1_CACHE_BYTES-1);
+	for (v = begin; v < end; v+=L1_CACHE_BYTES) {
+		asm volatile("ocbwb	%0"
+			     : /* no output */
+			     : "m" (__m(v)));
+	}
+}
+
+/*
+ * Write back the dirty D-caches and invalidate them.
+ *
+ * START: Virtual Address (U0, P1, or P3)
+ * SIZE: Size of the region.
+ */
+void __flush_purge_region(void *start, int size)
+{
+	unsigned long v;
+	unsigned long begin, end;
+
+	begin = (unsigned long)start & ~(L1_CACHE_BYTES-1);
+	end = ((unsigned long)start + size + L1_CACHE_BYTES-1)
+		& ~(L1_CACHE_BYTES-1);
+	for (v = begin; v < end; v+=L1_CACHE_BYTES) {
+		asm volatile("ocbp	%0"
+			     : /* no output */
+			     : "m" (__m(v)));
+	}
+}
+
+/*
+ * No write back please
+ */
+void __flush_invalidate_region(void *start, int size)
+{
+	unsigned long v;
+	unsigned long begin, end;
+
+	begin = (unsigned long)start & ~(L1_CACHE_BYTES-1);
+	end = ((unsigned long)start + size + L1_CACHE_BYTES-1)
+		& ~(L1_CACHE_BYTES-1);
+	for (v = begin; v < end; v+=L1_CACHE_BYTES) {
+		asm volatile("ocbi	%0"
+			     : /* no output */
+			     : "m" (__m(v)));
+	}
+}
+
 /*
  * Write back the range of D-cache, and purge the I-cache.
  *
  * Called from kernel/module.c:sys_init_module and routine for a.out format,
  * signal handler code and kprobes code
  */
-static void sh4_flush_icache_range(unsigned long start, unsigned long end)
+void flush_icache_range(unsigned long start, unsigned long end)
 {
 	int icacheaddr;
 	unsigned long flags, v;
@@ -104,16 +244,9 @@ static inline void flush_cache_4096(unsigned long start,
  * Write back & invalidate the D-cache of the page.
  * (To avoid "alias" issues)
  */
-static void sh4_flush_dcache_page(struct page *page)
+void flush_dcache_page(struct page *page)
 {
-	struct address_space *mapping = page_mapping(page);
-
-#ifndef CONFIG_SMP
-	if (mapping && !mapping_mapped(mapping))
-		set_bit(PG_dcache_dirty, &page->flags);
-	else
-#endif
-	{
+	if (test_bit(PG_mapped, &page->flags)) {
 		unsigned long phys = PHYSADDR(page_address(page));
 		unsigned long addr = CACHE_OC_ADDRESS_ARRAY;
 		int i, n;
@@ -149,13 +282,13 @@ static void __uses_jump_to_uncached flush_icache_all(void)
 	local_irq_restore(flags);
 }
 
-static inline void flush_dcache_all(void)
+void flush_dcache_all(void)
 {
 	(*__flush_dcache_segment_fn)(0UL, boot_cpu_data.dcache.way_size);
 	wmb();
 }
 
-static void sh4_flush_cache_all(void)
+void flush_cache_all(void)
 {
 	flush_dcache_all();
 	flush_icache_all();
@@ -247,11 +380,8 @@ static void __flush_cache_mm(struct mm_struct *mm, unsigned long start,
  *
  * Caller takes mm->mmap_sem.
  */
-static void sh4_flush_cache_mm(struct mm_struct *mm)
+void flush_cache_mm(struct mm_struct *mm)
 {
-	if (cpu_context(smp_processor_id(), mm) == NO_CONTEXT)
-		return;
-
 	/*
 	 * If cache is only 4k-per-way, there are never any 'aliases'.  Since
 	 * the cache is physically tagged, the data can just be left in there.
@@ -287,15 +417,12 @@ static void sh4_flush_cache_mm(struct mm_struct *mm)
  * ADDR: Virtual Address (U0 address)
  * PFN: Physical page number
  */
-static void sh4_flush_cache_page(struct vm_area_struct *vma,
-		unsigned long address, unsigned long pfn)
+void flush_cache_page(struct vm_area_struct *vma, unsigned long address,
+		      unsigned long pfn)
 {
 	unsigned long phys = pfn << PAGE_SHIFT;
 	unsigned int alias_mask;
 
-	if (cpu_context(smp_processor_id(), vma->vm_mm) == NO_CONTEXT)
-		return;
-
 	alias_mask = boot_cpu_data.dcache.alias_mask;
 
 	/* We only need to flush D-cache when we have alias */
@@ -335,12 +462,9 @@ static void sh4_flush_cache_page(struct vm_area_struct *vma,
  * Flushing the cache lines for U0 only isn't enough.
  * We need to flush for P1 too, which may contain aliases.
  */
-static void sh4_flush_cache_range(struct vm_area_struct *vma,
-		unsigned long start, unsigned long end)
+void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
+		       unsigned long end)
 {
-	if (cpu_context(smp_processor_id(), vma->vm_mm) == NO_CONTEXT)
-		return;
-
 	/*
 	 * If cache is only 4k-per-way, there are never any 'aliases'.  Since
 	 * the cache is physically tagged, the data can just be left in there.
@@ -368,6 +492,20 @@ static void sh4_flush_cache_range(struct vm_area_struct *vma,
 	}
 }
 
+/*
+ * flush_icache_user_range
+ * @vma: VMA of the process
+ * @page: page
+ * @addr: U0 address
+ * @len: length of the range (< page size)
+ */
+void flush_icache_user_range(struct vm_area_struct *vma,
+			     struct page *page, unsigned long addr, int len)
+{
+	flush_cache_page(vma, addr, page_to_pfn(page));
+	mb();
+}
+
 /**
  * __flush_cache_4096
  *
@@ -635,41 +773,3 @@ static void __flush_dcache_segment_4way(unsigned long start,
 		a3 += linesz;
 	} while (a0 < a0e);
 }
-
-extern void __weak sh4__flush_region_init(void);
-
-/*
- * SH-4 has virtually indexed and physically tagged cache.
- */
-void __init sh4_cache_init(void)
-{
-	printk("PVR=%08x CVR=%08x PRR=%08x\n",
-		ctrl_inl(CCN_PVR),
-		ctrl_inl(CCN_CVR),
-		ctrl_inl(CCN_PRR));
-
-	switch (boot_cpu_data.dcache.ways) {
-	case 1:
-		__flush_dcache_segment_fn = __flush_dcache_segment_1way;
-		break;
-	case 2:
-		__flush_dcache_segment_fn = __flush_dcache_segment_2way;
-		break;
-	case 4:
-		__flush_dcache_segment_fn = __flush_dcache_segment_4way;
-		break;
-	default:
-		panic("unknown number of cache ways\n");
-		break;
-	}
-
-	flush_icache_range	= sh4_flush_icache_range;
-	flush_dcache_page	= sh4_flush_dcache_page;
-	flush_cache_all		= sh4_flush_cache_all;
-	flush_cache_mm		= sh4_flush_cache_mm;
-	flush_cache_dup_mm	= sh4_flush_cache_mm;
-	flush_cache_page	= sh4_flush_cache_page;
-	flush_cache_range	= sh4_flush_cache_range;
-
-	sh4__flush_region_init();
-}
diff --git a/trunk/arch/sh/mm/cache-sh5.c b/trunk/arch/sh/mm/cache-sh5.c
index d4a445c865d7..86762092508c 100644
--- a/trunk/arch/sh/mm/cache-sh5.c
+++ b/trunk/arch/sh/mm/cache-sh5.c
@@ -20,11 +20,23 @@
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
 
-extern void __weak sh4__flush_region_init(void);
-
 /* Wired TLB entry for the D-cache */
 static unsigned long long dtlb_cache_slot;
 
+void __init p3_cache_init(void)
+{
+	/* Reserve a slot for dcache colouring in the DTLB */
+	dtlb_cache_slot	= sh64_get_wired_dtlb_entry();
+}
+
+#ifdef CONFIG_DCACHE_DISABLED
+#define sh64_dcache_purge_all()					do { } while (0)
+#define sh64_dcache_purge_coloured_phy_page(paddr, eaddr)	do { } while (0)
+#define sh64_dcache_purge_user_range(mm, start, end)		do { } while (0)
+#define sh64_dcache_purge_phy_page(paddr)			do { } while (0)
+#define sh64_dcache_purge_virt_page(mm, eaddr)			do { } while (0)
+#endif
+
 /*
  * The following group of functions deal with mapping and unmapping a
  * temporary page into a DTLB slot that has been set aside for exclusive
@@ -44,6 +56,7 @@ static inline void sh64_teardown_dtlb_cache_slot(void)
 	local_irq_enable();
 }
 
+#ifndef CONFIG_ICACHE_DISABLED
 static inline void sh64_icache_inv_all(void)
 {
 	unsigned long long addr, flag, data;
@@ -201,6 +214,52 @@ static void sh64_icache_inv_user_page_range(struct mm_struct *mm,
 	}
 }
 
+/*
+ * Invalidate a small range of user context I-cache, not necessarily page
+ * (or even cache-line) aligned.
+ *
+ * Since this is used inside ptrace, the ASID in the mm context typically
+ * won't match current_asid.  We'll have to switch ASID to do this.  For
+ * safety, and given that the range will be small, do all this under cli.
+ *
+ * Note, there is a hazard that the ASID in mm->context is no longer
+ * actually associated with mm, i.e. if the mm->context has started a new
+ * cycle since mm was last active.  However, this is just a performance
+ * issue: all that happens is that we invalidate lines belonging to
+ * another mm, so the owning process has to refill them when that mm goes
+ * live again.  mm itself can't have any cache entries because there will
+ * have been a flush_cache_all when the new mm->context cycle started.
+ */
+static void sh64_icache_inv_user_small_range(struct mm_struct *mm,
+						unsigned long start, int len)
+{
+	unsigned long long eaddr = start;
+	unsigned long long eaddr_end = start + len;
+	unsigned long current_asid, mm_asid;
+	unsigned long flags;
+	unsigned long long epage_start;
+
+	/*
+	 * Align to start of cache line.  Otherwise, suppose len==8 and
+	 * start was at 32N+28 : the last 4 bytes wouldn't get invalidated.
+	 */
+	eaddr = L1_CACHE_ALIGN(start);
+	eaddr_end = start + len;
+
+	mm_asid = cpu_asid(smp_processor_id(), mm);
+	local_irq_save(flags);
+	current_asid = switch_and_save_asid(mm_asid);
+
+	epage_start = eaddr & PAGE_MASK;
+
+	while (eaddr < eaddr_end) {
+		__asm__ __volatile__("icbi %0, 0" : : "r" (eaddr));
+		eaddr += L1_CACHE_BYTES;
+	}
+	switch_and_save_asid(current_asid);
+	local_irq_restore(flags);
+}
+
 static void sh64_icache_inv_current_user_range(unsigned long start, unsigned long end)
 {
 	/* The icbi instruction never raises ITLBMISS.  i.e. if there's not a
@@ -228,7 +287,9 @@ static void sh64_icache_inv_current_user_range(unsigned long start, unsigned lon
 		addr += L1_CACHE_BYTES;
 	}
 }
+#endif /* !CONFIG_ICACHE_DISABLED */
 
+#ifndef CONFIG_DCACHE_DISABLED
 /* Buffer used as the target of alloco instructions to purge data from cache
    sets by natural eviction. -- RPC */
 #define DUMMY_ALLOCO_AREA_SIZE ((L1_CACHE_BYTES << 10) + (1024 * 4))
@@ -479,11 +540,60 @@ static void sh64_dcache_purge_user_range(struct mm_struct *mm,
 	}
 }
 
+/*
+ * Purge the range of addresses from the D-cache.
+ *
+ * The addresses lie in the superpage mapping. There's no harm if we
+ * overpurge at either end - just a small performance loss.
+ */
+void __flush_purge_region(void *start, int size)
+{
+	unsigned long long ullend, addr, aligned_start;
+
+	aligned_start = (unsigned long long)(signed long long)(signed long) start;
+	addr = L1_CACHE_ALIGN(aligned_start);
+	ullend = (unsigned long long) (signed long long) (signed long) start + size;
+
+	while (addr <= ullend) {
+		__asm__ __volatile__ ("ocbp %0, 0" : : "r" (addr));
+		addr += L1_CACHE_BYTES;
+	}
+}
+
+void __flush_wback_region(void *start, int size)
+{
+	unsigned long long ullend, addr, aligned_start;
+
+	aligned_start = (unsigned long long)(signed long long)(signed long) start;
+	addr = L1_CACHE_ALIGN(aligned_start);
+	ullend = (unsigned long long) (signed long long) (signed long) start + size;
+
+	while (addr < ullend) {
+		__asm__ __volatile__ ("ocbwb %0, 0" : : "r" (addr));
+		addr += L1_CACHE_BYTES;
+	}
+}
+
+void __flush_invalidate_region(void *start, int size)
+{
+	unsigned long long ullend, addr, aligned_start;
+
+	aligned_start = (unsigned long long)(signed long long)(signed long) start;
+	addr = L1_CACHE_ALIGN(aligned_start);
+	ullend = (unsigned long long) (signed long long) (signed long) start + size;
+
+	while (addr < ullend) {
+		__asm__ __volatile__ ("ocbi %0, 0" : : "r" (addr));
+		addr += L1_CACHE_BYTES;
+	}
+}
+#endif /* !CONFIG_DCACHE_DISABLED */
+
 /*
  * Invalidate the entire contents of both caches, after writing back to
  * memory any dirty data from the D-cache.
  */
-static void sh5_flush_cache_all(void)
+void flush_cache_all(void)
 {
 	sh64_dcache_purge_all();
 	sh64_icache_inv_all();
@@ -510,7 +620,7 @@ static void sh5_flush_cache_all(void)
  * I-cache.  This is similar to the lack of action needed in
  * flush_tlb_mm - see fault.c.
  */
-static void sh5_flush_cache_mm(struct mm_struct *mm)
+void flush_cache_mm(struct mm_struct *mm)
 {
 	sh64_dcache_purge_all();
 }
@@ -522,8 +632,8 @@ static void sh5_flush_cache_mm(struct mm_struct *mm)
  *
  * Note, 'end' is 1 byte beyond the end of the range to flush.
  */
-static void sh5_flush_cache_range(struct vm_area_struct *vma,
-		unsigned long start, unsigned long end)
+void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
+		       unsigned long end)
 {
 	struct mm_struct *mm = vma->vm_mm;
 
@@ -540,8 +650,8 @@ static void sh5_flush_cache_range(struct vm_area_struct *vma,
  *
  * Note, this is called with pte lock held.
  */
-static void sh5_flush_cache_page(struct vm_area_struct *vma,
-		unsigned long eaddr, unsigned long pfn)
+void flush_cache_page(struct vm_area_struct *vma, unsigned long eaddr,
+		      unsigned long pfn)
 {
 	sh64_dcache_purge_phy_page(pfn << PAGE_SHIFT);
 
@@ -549,7 +659,7 @@ static void sh5_flush_cache_page(struct vm_area_struct *vma,
 		sh64_icache_inv_user_page(vma, eaddr);
 }
 
-static void sh5_flush_dcache_page(struct page *page)
+void flush_dcache_page(struct page *page)
 {
 	sh64_dcache_purge_phy_page(page_to_phys(page));
 	wmb();
@@ -563,20 +673,39 @@ static void sh5_flush_dcache_page(struct page *page)
  * mapping, therefore it's guaranteed that there no cache entries for
  * the range in cache sets of the wrong colour.
  */
-static void sh5_flush_icache_range(unsigned long start, unsigned long end)
+void flush_icache_range(unsigned long start, unsigned long end)
 {
 	__flush_purge_region((void *)start, end);
 	wmb();
 	sh64_icache_inv_kernel_range(start, end);
 }
 
+/*
+ * Flush the range of user (defined by vma->vm_mm) address space starting
+ * at 'addr' for 'len' bytes from the cache.  The range does not straddle
+ * a page boundary, the unique physical page containing the range is
+ * 'page'.  This seems to be used mainly for invalidating an address
+ * range following a poke into the program text through the ptrace() call
+ * from another process (e.g. for BRK instruction insertion).
+ */
+void flush_icache_user_range(struct vm_area_struct *vma,
+			struct page *page, unsigned long addr, int len)
+{
+
+	sh64_dcache_purge_coloured_phy_page(page_to_phys(page), addr);
+	mb();
+
+	if (vma->vm_flags & VM_EXEC)
+		sh64_icache_inv_user_small_range(vma->vm_mm, addr, len);
+}
+
 /*
  * For the address range [start,end), write back the data from the
  * D-cache and invalidate the corresponding region of the I-cache for the
  * current process.  Used to flush signal trampolines on the stack to
  * make them executable.
  */
-static void sh5_flush_cache_sigtramp(unsigned long vaddr)
+void flush_cache_sigtramp(unsigned long vaddr)
 {
 	unsigned long end = vaddr + L1_CACHE_BYTES;
 
@@ -585,19 +714,121 @@ static void sh5_flush_cache_sigtramp(unsigned long vaddr)
 	sh64_icache_inv_current_user_range(vaddr, end);
 }
 
-void __init sh5_cache_init(void)
+#ifdef CONFIG_MMU
+/*
+ * These *MUST* lie in an area of virtual address space that's otherwise
+ * unused.
+ */
+#define UNIQUE_EADDR_START 0xe0000000UL
+#define UNIQUE_EADDR_END   0xe8000000UL
+
+/*
+ * Given a physical address paddr, and a user virtual address user_eaddr
+ * which will eventually be mapped to it, create a one-off kernel-private
+ * eaddr mapped to the same paddr.  This is used for creating special
+ * destination pages for copy_user_page and clear_user_page.
+ */
+static unsigned long sh64_make_unique_eaddr(unsigned long user_eaddr,
+					    unsigned long paddr)
 {
-	flush_cache_all		= sh5_flush_cache_all;
-	flush_cache_mm		= sh5_flush_cache_mm;
-	flush_cache_dup_mm	= sh5_flush_cache_mm;
-	flush_cache_page	= sh5_flush_cache_page;
-	flush_cache_range	= sh5_flush_cache_range;
-	flush_dcache_page	= sh5_flush_dcache_page;
-	flush_icache_range	= sh5_flush_icache_range;
-	flush_cache_sigtramp	= sh5_flush_cache_sigtramp;
+	static unsigned long current_pointer = UNIQUE_EADDR_START;
+	unsigned long coloured_pointer;
 
-	/* Reserve a slot for dcache colouring in the DTLB */
-	dtlb_cache_slot	= sh64_get_wired_dtlb_entry();
+	if (current_pointer == UNIQUE_EADDR_END) {
+		sh64_dcache_purge_all();
+		current_pointer = UNIQUE_EADDR_START;
+	}
+
+	coloured_pointer = (current_pointer & ~CACHE_OC_SYN_MASK) |
+				(user_eaddr & CACHE_OC_SYN_MASK);
+	sh64_setup_dtlb_cache_slot(coloured_pointer, get_asid(), paddr);
+
+	current_pointer += (PAGE_SIZE << CACHE_OC_N_SYNBITS);
+
+	return coloured_pointer;
+}
+
+static void sh64_copy_user_page_coloured(void *to, void *from,
+					 unsigned long address)
+{
+	void *coloured_to;
+
+	/*
+	 * Discard any existing cache entries of the wrong colour.  These are
+	 * present quite often, if the kernel has recently used the page
+	 * internally, then given it up, then it's been allocated to the user.
+	 */
+	sh64_dcache_purge_coloured_phy_page(__pa(to), (unsigned long)to);
+
+	coloured_to = (void *)sh64_make_unique_eaddr(address, __pa(to));
+	copy_page(from, coloured_to);
+
+	sh64_teardown_dtlb_cache_slot();
+}
+
+static void sh64_clear_user_page_coloured(void *to, unsigned long address)
+{
+	void *coloured_to;
+
+	/*
+	 * Discard any existing kernel-originated lines of the wrong
+	 * colour (as above)
+	 */
+	sh64_dcache_purge_coloured_phy_page(__pa(to), (unsigned long)to);
 
-	sh4__flush_region_init();
+	coloured_to = (void *)sh64_make_unique_eaddr(address, __pa(to));
+	clear_page(coloured_to);
+
+	sh64_teardown_dtlb_cache_slot();
+}
+
+/*
+ * 'from' and 'to' are kernel virtual addresses (within the superpage
+ * mapping of the physical RAM).  'address' is the user virtual address
+ * where the copy 'to' will be mapped after.  This allows a custom
+ * mapping to be used to ensure that the new copy is placed in the
+ * right cache sets for the user to see it without having to bounce it
+ * out via memory.  Note however : the call to flush_page_to_ram in
+ * (generic)/mm/memory.c:(break_cow) undoes all this good work in that one
+ * very important case!
+ *
+ * TBD : can we guarantee that on every call, any cache entries for
+ * 'from' are in the same colour sets as 'address' also?  i.e. is this
+ * always used just to deal with COW?  (I suspect not).
+ *
+ * There are two possibilities here for when the page 'from' was last accessed:
+ * - by the kernel : this is OK, no purge required.
+ * - by the/a user (e.g. for break_COW) : need to purge.
+ *
+ * If the potential user mapping at 'address' is the same colour as
+ * 'from' there is no need to purge any cache lines from the 'from'
+ * page mapped into cache sets of colour 'address'.  (The copy will be
+ * accessing the page through 'from').
+ */
+void copy_user_page(void *to, void *from, unsigned long address,
+		    struct page *page)
+{
+	if (((address ^ (unsigned long) from) & CACHE_OC_SYN_MASK) != 0)
+		sh64_dcache_purge_coloured_phy_page(__pa(from), address);
+
+	if (((address ^ (unsigned long) to) & CACHE_OC_SYN_MASK) == 0)
+		copy_page(to, from);
+	else
+		sh64_copy_user_page_coloured(to, from, address);
+}
+
+/*
+ * 'to' is a kernel virtual address (within the superpage mapping of the
+ * physical RAM).  'address' is the user virtual address where the 'to'
+ * page will be mapped after.  This allows a custom mapping to be used to
+ * ensure that the new copy is placed in the right cache sets for the
+ * user to see it without having to bounce it out via memory.
+ */
+void clear_user_page(void *to, unsigned long address, struct page *page)
+{
+	if (((address ^ (unsigned long) to) & CACHE_OC_SYN_MASK) == 0)
+		clear_page(to);
+	else
+		sh64_clear_user_page_coloured(to, address);
 }
+#endif
diff --git a/trunk/arch/sh/mm/cache-sh7705.c b/trunk/arch/sh/mm/cache-sh7705.c
index f1d5c803c04b..22dacc778823 100644
--- a/trunk/arch/sh/mm/cache-sh7705.c
+++ b/trunk/arch/sh/mm/cache-sh7705.c
@@ -12,7 +12,6 @@
 #include <linux/init.h>
 #include <linux/mman.h>
 #include <linux/mm.h>
-#include <linux/fs.h>
 #include <linux/threads.h>
 #include <asm/addrspace.h>
 #include <asm/page.h>
@@ -64,7 +63,7 @@ static inline void cache_wback_all(void)
  *
  * Called from kernel/module.c:sys_init_module and routine for a.out format.
  */
-static void sh7705_flush_icache_range(unsigned long start, unsigned long end)
+void flush_icache_range(unsigned long start, unsigned long end)
 {
 	__flush_wback_region((void *)start, end - start);
 }
@@ -72,7 +71,7 @@ static void sh7705_flush_icache_range(unsigned long start, unsigned long end)
 /*
  * Writeback&Invalidate the D-cache of the page
  */
-static void __flush_dcache_page(unsigned long phys)
+static void __uses_jump_to_uncached __flush_dcache_page(unsigned long phys)
 {
 	unsigned long ways, waysize, addrstart;
 	unsigned long flags;
@@ -127,17 +126,13 @@ static void __flush_dcache_page(unsigned long phys)
  * Write back & invalidate the D-cache of the page.
  * (To avoid "alias" issues)
  */
-static void sh7705_flush_dcache_page(struct page *page)
+void flush_dcache_page(struct page *page)
 {
-	struct address_space *mapping = page_mapping(page);
-
-	if (mapping && !mapping_mapped(mapping))
-		set_bit(PG_dcache_dirty, &page->flags);
-	else
+	if (test_bit(PG_mapped, &page->flags))
 		__flush_dcache_page(PHYSADDR(page_address(page)));
 }
 
-static void sh7705_flush_cache_all(void)
+void __uses_jump_to_uncached flush_cache_all(void)
 {
 	unsigned long flags;
 
@@ -149,7 +144,7 @@ static void sh7705_flush_cache_all(void)
 	local_irq_restore(flags);
 }
 
-static void sh7705_flush_cache_mm(struct mm_struct *mm)
+void flush_cache_mm(struct mm_struct *mm)
 {
 	/* Is there any good way? */
 	/* XXX: possibly call flush_cache_range for each vm area */
@@ -165,8 +160,8 @@ static void sh7705_flush_cache_mm(struct mm_struct *mm)
  * Flushing the cache lines for U0 only isn't enough.
  * We need to flush for P1 too, which may contain aliases.
  */
-static void sh7705_flush_cache_range(struct vm_area_struct *vma,
-			unsigned long start, unsigned long end)
+void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
+		       unsigned long end)
 {
 
 	/*
@@ -184,8 +179,8 @@ static void sh7705_flush_cache_range(struct vm_area_struct *vma,
  *
  * ADDRESS: Virtual Address (U0 address)
  */
-static void sh7705_flush_cache_page(struct vm_area_struct *vma,
-		unsigned long address, unsigned long pfn)
+void flush_cache_page(struct vm_area_struct *vma, unsigned long address,
+		      unsigned long pfn)
 {
 	__flush_dcache_page(pfn << PAGE_SHIFT);
 }
@@ -198,20 +193,7 @@ static void sh7705_flush_cache_page(struct vm_area_struct *vma,
  * Not entirely sure why this is necessary on SH3 with 32K cache but
  * without it we get occasional "Memory fault" when loading a program.
  */
-static void sh7705_flush_icache_page(struct vm_area_struct *vma,
-				     struct page *page)
+void flush_icache_page(struct vm_area_struct *vma, struct page *page)
 {
 	__flush_purge_region(page_address(page), PAGE_SIZE);
 }
-
-void __init sh7705_cache_init(void)
-{
-	flush_icache_range	= sh7705_flush_icache_range;
-	flush_dcache_page	= sh7705_flush_dcache_page;
-	flush_cache_all		= sh7705_flush_cache_all;
-	flush_cache_mm		= sh7705_flush_cache_mm;
-	flush_cache_dup_mm	= sh7705_flush_cache_mm;
-	flush_cache_range	= sh7705_flush_cache_range;
-	flush_cache_page	= sh7705_flush_cache_page;
-	flush_icache_page	= sh7705_flush_icache_page;
-}
diff --git a/trunk/arch/sh/mm/cache.c b/trunk/arch/sh/mm/cache.c
deleted file mode 100644
index d60239460436..000000000000
--- a/trunk/arch/sh/mm/cache.c
+++ /dev/null
@@ -1,287 +0,0 @@
-/*
- * arch/sh/mm/pg-mmu.c
- *
- * Copyright (C) 1999, 2000, 2002  Niibe Yutaka
- * Copyright (C) 2002 - 2009  Paul Mundt
- *
- * Released under the terms of the GNU GPL v2.0.
- */
-#include <linux/mm.h>
-#include <linux/init.h>
-#include <linux/mutex.h>
-#include <linux/fs.h>
-#include <linux/highmem.h>
-#include <linux/module.h>
-#include <asm/mmu_context.h>
-#include <asm/cacheflush.h>
-
-void (*flush_cache_all)(void);
-void (*flush_cache_mm)(struct mm_struct *mm);
-void (*flush_cache_dup_mm)(struct mm_struct *mm);
-void (*flush_cache_page)(struct vm_area_struct *vma,
-				unsigned long addr, unsigned long pfn);
-void (*flush_cache_range)(struct vm_area_struct *vma,
-				 unsigned long start, unsigned long end);
-void (*flush_dcache_page)(struct page *page);
-void (*flush_icache_range)(unsigned long start, unsigned long end);
-void (*flush_icache_page)(struct vm_area_struct *vma,
-				 struct page *page);
-void (*flush_cache_sigtramp)(unsigned long address);
-void (*__flush_wback_region)(void *start, int size);
-void (*__flush_purge_region)(void *start, int size);
-void (*__flush_invalidate_region)(void *start, int size);
-
-static inline void noop_flush_cache_all(void)
-{
-}
-
-static inline void noop_flush_cache_mm(struct mm_struct *mm)
-{
-}
-
-static inline void noop_flush_cache_page(struct vm_area_struct *vma,
-				unsigned long addr, unsigned long pfn)
-{
-}
-
-static inline void noop_flush_cache_range(struct vm_area_struct *vma,
-				 unsigned long start, unsigned long end)
-{
-}
-
-static inline void noop_flush_dcache_page(struct page *page)
-{
-}
-
-static inline void noop_flush_icache_range(unsigned long start,
-					   unsigned long end)
-{
-}
-
-static inline void noop_flush_icache_page(struct vm_area_struct *vma,
-					  struct page *page)
-{
-}
-
-static inline void noop_flush_cache_sigtramp(unsigned long address)
-{
-}
-
-static inline void noop__flush_region(void *start, int size)
-{
-}
-
-void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
-		       unsigned long vaddr, void *dst, const void *src,
-		       unsigned long len)
-{
-	if (boot_cpu_data.dcache.n_aliases && page_mapped(page) &&
-	    !test_bit(PG_dcache_dirty, &page->flags)) {
-		void *vto = kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK);
-		memcpy(vto, src, len);
-		kunmap_coherent();
-	} else {
-		memcpy(dst, src, len);
-		if (boot_cpu_data.dcache.n_aliases)
-			set_bit(PG_dcache_dirty, &page->flags);
-	}
-
-	if (vma->vm_flags & VM_EXEC)
-		flush_cache_page(vma, vaddr, page_to_pfn(page));
-}
-
-void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
-			 unsigned long vaddr, void *dst, const void *src,
-			 unsigned long len)
-{
-	if (boot_cpu_data.dcache.n_aliases && page_mapped(page) &&
-	    !test_bit(PG_dcache_dirty, &page->flags)) {
-		void *vfrom = kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK);
-		memcpy(dst, vfrom, len);
-		kunmap_coherent();
-	} else {
-		memcpy(dst, src, len);
-		if (boot_cpu_data.dcache.n_aliases)
-			set_bit(PG_dcache_dirty, &page->flags);
-	}
-}
-
-void copy_user_highpage(struct page *to, struct page *from,
-			unsigned long vaddr, struct vm_area_struct *vma)
-{
-	void *vfrom, *vto;
-
-	vto = kmap_atomic(to, KM_USER1);
-
-	if (boot_cpu_data.dcache.n_aliases && page_mapped(from) &&
-	    !test_bit(PG_dcache_dirty, &from->flags)) {
-		vfrom = kmap_coherent(from, vaddr);
-		copy_page(vto, vfrom);
-		kunmap_coherent();
-	} else {
-		vfrom = kmap_atomic(from, KM_USER0);
-		copy_page(vto, vfrom);
-		kunmap_atomic(vfrom, KM_USER0);
-	}
-
-	if (pages_do_alias((unsigned long)vto, vaddr & PAGE_MASK))
-		__flush_wback_region(vto, PAGE_SIZE);
-
-	kunmap_atomic(vto, KM_USER1);
-	/* Make sure this page is cleared on other CPU's too before using it */
-	smp_wmb();
-}
-EXPORT_SYMBOL(copy_user_highpage);
-
-void clear_user_highpage(struct page *page, unsigned long vaddr)
-{
-	void *kaddr = kmap_atomic(page, KM_USER0);
-
-	clear_page(kaddr);
-
-	if (pages_do_alias((unsigned long)kaddr, vaddr & PAGE_MASK))
-		__flush_wback_region(kaddr, PAGE_SIZE);
-
-	kunmap_atomic(kaddr, KM_USER0);
-}
-EXPORT_SYMBOL(clear_user_highpage);
-
-void __update_cache(struct vm_area_struct *vma,
-		    unsigned long address, pte_t pte)
-{
-	struct page *page;
-	unsigned long pfn = pte_pfn(pte);
-
-	if (!boot_cpu_data.dcache.n_aliases)
-		return;
-
-	page = pfn_to_page(pfn);
-	if (pfn_valid(pfn) && page_mapping(page)) {
-		int dirty = test_and_clear_bit(PG_dcache_dirty, &page->flags);
-		if (dirty) {
-			unsigned long addr = (unsigned long)page_address(page);
-
-			if (pages_do_alias(addr, address & PAGE_MASK))
-				__flush_wback_region((void *)addr, PAGE_SIZE);
-		}
-	}
-}
-
-void __flush_anon_page(struct page *page, unsigned long vmaddr)
-{
-	unsigned long addr = (unsigned long) page_address(page);
-
-	if (pages_do_alias(addr, vmaddr)) {
-		if (boot_cpu_data.dcache.n_aliases && page_mapped(page) &&
-		    !test_bit(PG_dcache_dirty, &page->flags)) {
-			void *kaddr;
-
-			kaddr = kmap_coherent(page, vmaddr);
-			__flush_wback_region((void *)kaddr, PAGE_SIZE);
-			kunmap_coherent();
-		} else
-			__flush_wback_region((void *)addr, PAGE_SIZE);
-	}
-}
-
-static void compute_alias(struct cache_info *c)
-{
-	c->alias_mask = ((c->sets - 1) << c->entry_shift) & ~(PAGE_SIZE - 1);
-	c->n_aliases = c->alias_mask ? (c->alias_mask >> PAGE_SHIFT) + 1 : 0;
-}
-
-static void __init emit_cache_params(void)
-{
-	printk(KERN_NOTICE "I-cache : n_ways=%d n_sets=%d way_incr=%d\n",
-		boot_cpu_data.icache.ways,
-		boot_cpu_data.icache.sets,
-		boot_cpu_data.icache.way_incr);
-	printk(KERN_NOTICE "I-cache : entry_mask=0x%08x alias_mask=0x%08x n_aliases=%d\n",
-		boot_cpu_data.icache.entry_mask,
-		boot_cpu_data.icache.alias_mask,
-		boot_cpu_data.icache.n_aliases);
-	printk(KERN_NOTICE "D-cache : n_ways=%d n_sets=%d way_incr=%d\n",
-		boot_cpu_data.dcache.ways,
-		boot_cpu_data.dcache.sets,
-		boot_cpu_data.dcache.way_incr);
-	printk(KERN_NOTICE "D-cache : entry_mask=0x%08x alias_mask=0x%08x n_aliases=%d\n",
-		boot_cpu_data.dcache.entry_mask,
-		boot_cpu_data.dcache.alias_mask,
-		boot_cpu_data.dcache.n_aliases);
-
-	/*
-	 * Emit Secondary Cache parameters if the CPU has a probed L2.
-	 */
-	if (boot_cpu_data.flags & CPU_HAS_L2_CACHE) {
-		printk(KERN_NOTICE "S-cache : n_ways=%d n_sets=%d way_incr=%d\n",
-			boot_cpu_data.scache.ways,
-			boot_cpu_data.scache.sets,
-			boot_cpu_data.scache.way_incr);
-		printk(KERN_NOTICE "S-cache : entry_mask=0x%08x alias_mask=0x%08x n_aliases=%d\n",
-			boot_cpu_data.scache.entry_mask,
-			boot_cpu_data.scache.alias_mask,
-			boot_cpu_data.scache.n_aliases);
-	}
-}
-
-void __init cpu_cache_init(void)
-{
-	compute_alias(&boot_cpu_data.icache);
-	compute_alias(&boot_cpu_data.dcache);
-	compute_alias(&boot_cpu_data.scache);
-
-	flush_cache_all		= noop_flush_cache_all;
-	flush_cache_mm		= noop_flush_cache_mm;
-	flush_cache_dup_mm	= noop_flush_cache_mm;
-	flush_cache_page	= noop_flush_cache_page;
-	flush_cache_range	= noop_flush_cache_range;
-	flush_dcache_page	= noop_flush_dcache_page;
-	flush_icache_range	= noop_flush_icache_range;
-	flush_icache_page	= noop_flush_icache_page;
-	flush_cache_sigtramp	= noop_flush_cache_sigtramp;
-
-	__flush_wback_region		= noop__flush_region;
-	__flush_purge_region		= noop__flush_region;
-	__flush_invalidate_region	= noop__flush_region;
-
-	if (boot_cpu_data.family == CPU_FAMILY_SH2) {
-		extern void __weak sh2_cache_init(void);
-
-		sh2_cache_init();
-	}
-
-	if (boot_cpu_data.family == CPU_FAMILY_SH2A) {
-		extern void __weak sh2a_cache_init(void);
-
-		sh2a_cache_init();
-	}
-
-	if (boot_cpu_data.family == CPU_FAMILY_SH3) {
-		extern void __weak sh3_cache_init(void);
-
-		sh3_cache_init();
-
-		if ((boot_cpu_data.type == CPU_SH7705) &&
-		    (boot_cpu_data.dcache.sets == 512)) {
-			extern void __weak sh7705_cache_init(void);
-
-			sh7705_cache_init();
-		}
-	}
-
-	if ((boot_cpu_data.family == CPU_FAMILY_SH4) ||
-	    (boot_cpu_data.family == CPU_FAMILY_SH4A) ||
-	    (boot_cpu_data.family == CPU_FAMILY_SH4AL_DSP)) {
-		extern void __weak sh4_cache_init(void);
-
-		sh4_cache_init();
-	}
-
-	if (boot_cpu_data.family == CPU_FAMILY_SH5) {
-		extern void __weak sh5_cache_init(void);
-
-		sh5_cache_init();
-	}
-
-	emit_cache_params();
-}
diff --git a/trunk/arch/sh/mm/fault_32.c b/trunk/arch/sh/mm/fault_32.c
index f1c93c880ed4..dbbdeba2cee5 100644
--- a/trunk/arch/sh/mm/fault_32.c
+++ b/trunk/arch/sh/mm/fault_32.c
@@ -318,15 +318,16 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
 /*
  * Called with interrupts disabled.
  */
-asmlinkage int __kprobes
-handle_tlbmiss(struct pt_regs *regs, unsigned long writeaccess,
-	       unsigned long address)
+asmlinkage int __kprobes __do_page_fault(struct pt_regs *regs,
+					 unsigned long writeaccess,
+					 unsigned long address)
 {
 	pgd_t *pgd;
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
 	pte_t entry;
+	int ret = 1;
 
 	/*
 	 * We don't take page faults for P1, P2, and parts of P4, these
@@ -337,41 +338,40 @@ handle_tlbmiss(struct pt_regs *regs, unsigned long writeaccess,
 		pgd = pgd_offset_k(address);
 	} else {
 		if (unlikely(address >= TASK_SIZE || !current->mm))
-			return 1;
+			goto out;
 
 		pgd = pgd_offset(current->mm, address);
 	}
 
 	pud = pud_offset(pgd, address);
 	if (pud_none_or_clear_bad(pud))
-		return 1;
+		goto out;
 	pmd = pmd_offset(pud, address);
 	if (pmd_none_or_clear_bad(pmd))
-		return 1;
+		goto out;
 	pte = pte_offset_kernel(pmd, address);
 	entry = *pte;
 	if (unlikely(pte_none(entry) || pte_not_present(entry)))
-		return 1;
+		goto out;
 	if (unlikely(writeaccess && !pte_write(entry)))
-		return 1;
+		goto out;
 
 	if (writeaccess)
 		entry = pte_mkdirty(entry);
 	entry = pte_mkyoung(entry);
 
-	set_pte(pte, entry);
-
 #if defined(CONFIG_CPU_SH4) && !defined(CONFIG_SMP)
 	/*
-	 * SH-4 does not set MMUCR.RC to the corresponding TLB entry in
-	 * the case of an initial page write exception, so we need to
-	 * flush it in order to avoid potential TLB entry duplication.
+	 * ITLB is not affected by "ldtlb" instruction.
+	 * So, we need to flush the entry by ourselves.
 	 */
-	if (writeaccess == 2)
-		local_flush_tlb_one(get_asid(), address & PAGE_MASK);
+	local_flush_tlb_one(get_asid(), address & PAGE_MASK);
 #endif
 
+	set_pte(pte, entry);
 	update_mmu_cache(NULL, address, entry);
 
-	return 0;
+	ret = 0;
+out:
+	return ret;
 }
diff --git a/trunk/arch/sh/mm/fault_64.c b/trunk/arch/sh/mm/fault_64.c
index 2b356cec2489..bd63b961b2a9 100644
--- a/trunk/arch/sh/mm/fault_64.c
+++ b/trunk/arch/sh/mm/fault_64.c
@@ -56,7 +56,16 @@ inline void __do_tlb_refill(unsigned long address,
 	/*
 	 * Set PTEH register
 	 */
-	pteh = neff_sign_extend(address & MMU_VPN_MASK);
+	pteh = address & MMU_VPN_MASK;
+
+	/* Sign extend based on neff. */
+#if (NEFF == 32)
+	/* Faster sign extension */
+	pteh = (unsigned long long)(signed long long)(signed long)pteh;
+#else
+	/* General case */
+	pteh = (pteh & NEFF_SIGN) ? (pteh | NEFF_MASK) : pteh;
+#endif
 
 	/* Set the ASID. */
 	pteh |= get_asid() << PTEH_ASID_SHIFT;
diff --git a/trunk/arch/sh/mm/flush-sh4.c b/trunk/arch/sh/mm/flush-sh4.c
deleted file mode 100644
index cef402678f42..000000000000
--- a/trunk/arch/sh/mm/flush-sh4.c
+++ /dev/null
@@ -1,108 +0,0 @@
-#include <linux/mm.h>
-#include <asm/mmu_context.h>
-#include <asm/cacheflush.h>
-
-/*
- * Write back the dirty D-caches, but not invalidate them.
- *
- * START: Virtual Address (U0, P1, or P3)
- * SIZE: Size of the region.
- */
-static void sh4__flush_wback_region(void *start, int size)
-{
-	reg_size_t aligned_start, v, cnt, end;
-
-	aligned_start = register_align(start);
-	v = aligned_start & ~(L1_CACHE_BYTES-1);
-	end = (aligned_start + size + L1_CACHE_BYTES-1)
-		& ~(L1_CACHE_BYTES-1);
-	cnt = (end - v) / L1_CACHE_BYTES;
-
-	while (cnt >= 8) {
-		__ocbwb(v); v += L1_CACHE_BYTES;
-		__ocbwb(v); v += L1_CACHE_BYTES;
-		__ocbwb(v); v += L1_CACHE_BYTES;
-		__ocbwb(v); v += L1_CACHE_BYTES;
-		__ocbwb(v); v += L1_CACHE_BYTES;
-		__ocbwb(v); v += L1_CACHE_BYTES;
-		__ocbwb(v); v += L1_CACHE_BYTES;
-		__ocbwb(v); v += L1_CACHE_BYTES;
-		cnt -= 8;
-	}
-
-	while (cnt) {
-		__ocbwb(v); v += L1_CACHE_BYTES;
-		cnt--;
-	}
-}
-
-/*
- * Write back the dirty D-caches and invalidate them.
- *
- * START: Virtual Address (U0, P1, or P3)
- * SIZE: Size of the region.
- */
-static void sh4__flush_purge_region(void *start, int size)
-{
-	reg_size_t aligned_start, v, cnt, end;
-
-	aligned_start = register_align(start);
-	v = aligned_start & ~(L1_CACHE_BYTES-1);
-	end = (aligned_start + size + L1_CACHE_BYTES-1)
-		& ~(L1_CACHE_BYTES-1);
-	cnt = (end - v) / L1_CACHE_BYTES;
-
-	while (cnt >= 8) {
-		__ocbp(v); v += L1_CACHE_BYTES;
-		__ocbp(v); v += L1_CACHE_BYTES;
-		__ocbp(v); v += L1_CACHE_BYTES;
-		__ocbp(v); v += L1_CACHE_BYTES;
-		__ocbp(v); v += L1_CACHE_BYTES;
-		__ocbp(v); v += L1_CACHE_BYTES;
-		__ocbp(v); v += L1_CACHE_BYTES;
-		__ocbp(v); v += L1_CACHE_BYTES;
-		cnt -= 8;
-	}
-	while (cnt) {
-		__ocbp(v); v += L1_CACHE_BYTES;
-		cnt--;
-	}
-}
-
-/*
- * No write back please
- */
-static void sh4__flush_invalidate_region(void *start, int size)
-{
-	reg_size_t aligned_start, v, cnt, end;
-
-	aligned_start = register_align(start);
-	v = aligned_start & ~(L1_CACHE_BYTES-1);
-	end = (aligned_start + size + L1_CACHE_BYTES-1)
-		& ~(L1_CACHE_BYTES-1);
-	cnt = (end - v) / L1_CACHE_BYTES;
-
-	while (cnt >= 8) {
-		__ocbi(v); v += L1_CACHE_BYTES;
-		__ocbi(v); v += L1_CACHE_BYTES;
-		__ocbi(v); v += L1_CACHE_BYTES;
-		__ocbi(v); v += L1_CACHE_BYTES;
-		__ocbi(v); v += L1_CACHE_BYTES;
-		__ocbi(v); v += L1_CACHE_BYTES;
-		__ocbi(v); v += L1_CACHE_BYTES;
-		__ocbi(v); v += L1_CACHE_BYTES;
-		cnt -= 8;
-	}
-
-	while (cnt) {
-		__ocbi(v); v += L1_CACHE_BYTES;
-		cnt--;
-	}
-}
-
-void __init sh4__flush_region_init(void)
-{
-	__flush_wback_region		= sh4__flush_wback_region;
-	__flush_invalidate_region	= sh4__flush_invalidate_region;
-	__flush_purge_region		= sh4__flush_purge_region;
-}
diff --git a/trunk/arch/sh/mm/init.c b/trunk/arch/sh/mm/init.c
index 0a9b4d855bc9..fe532aeaa16d 100644
--- a/trunk/arch/sh/mm/init.c
+++ b/trunk/arch/sh/mm/init.c
@@ -210,9 +210,6 @@ void __init mem_init(void)
 			high_memory = node_high_memory;
 	}
 
-	/* Set this up early, so we can take care of the zero page */
-	cpu_cache_init();
-
 	/* clear the zero-page */
 	memset(empty_zero_page, 0, PAGE_SIZE);
 	__flush_wback_region(empty_zero_page, PAGE_SIZE);
@@ -233,6 +230,8 @@ void __init mem_init(void)
 		datasize >> 10,
 		initsize >> 10);
 
+	p3_cache_init();
+
 	/* Initialize the vDSO */
 	vsyscall_init();
 }
diff --git a/trunk/arch/sh/mm/kmap.c b/trunk/arch/sh/mm/kmap.c
deleted file mode 100644
index 3eecf0d42f1a..000000000000
--- a/trunk/arch/sh/mm/kmap.c
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * arch/sh/mm/kmap.c
- *
- * Copyright (C) 1999, 2000, 2002  Niibe Yutaka
- * Copyright (C) 2002 - 2009  Paul Mundt
- *
- * Released under the terms of the GNU GPL v2.0.
- */
-#include <linux/mm.h>
-#include <linux/init.h>
-#include <linux/mutex.h>
-#include <linux/fs.h>
-#include <linux/highmem.h>
-#include <linux/module.h>
-#include <asm/mmu_context.h>
-#include <asm/cacheflush.h>
-
-#define kmap_get_fixmap_pte(vaddr)                                     \
-	pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)), (vaddr))
-
-static pte_t *kmap_coherent_pte;
-
-void __init kmap_coherent_init(void)
-{
-	unsigned long vaddr;
-
-	if (!boot_cpu_data.dcache.n_aliases)
-		return;
-
-	/* cache the first coherent kmap pte */
-	vaddr = __fix_to_virt(FIX_CMAP_BEGIN);
-	kmap_coherent_pte = kmap_get_fixmap_pte(vaddr);
-}
-
-void *kmap_coherent(struct page *page, unsigned long addr)
-{
-	enum fixed_addresses idx;
-	unsigned long vaddr, flags;
-	pte_t pte;
-
-	BUG_ON(test_bit(PG_dcache_dirty, &page->flags));
-
-	inc_preempt_count();
-
-	idx = (addr & current_cpu_data.dcache.alias_mask) >> PAGE_SHIFT;
-	vaddr = __fix_to_virt(FIX_CMAP_END - idx);
-	pte = mk_pte(page, PAGE_KERNEL);
-
-	local_irq_save(flags);
-	flush_tlb_one(get_asid(), vaddr);
-	local_irq_restore(flags);
-
-	update_mmu_cache(NULL, vaddr, pte);
-
-	set_pte(kmap_coherent_pte - (FIX_CMAP_END - idx), pte);
-
-	return (void *)vaddr;
-}
-
-void kunmap_coherent(void)
-{
-	dec_preempt_count();
-	preempt_check_resched();
-}
diff --git a/trunk/arch/sh/mm/mmap.c b/trunk/arch/sh/mm/mmap.c
index d2984fa42d3d..1b5fdfb4e0c2 100644
--- a/trunk/arch/sh/mm/mmap.c
+++ b/trunk/arch/sh/mm/mmap.c
@@ -14,10 +14,10 @@
 #include <asm/page.h>
 #include <asm/processor.h>
 
+#ifdef CONFIG_MMU
 unsigned long shm_align_mask = PAGE_SIZE - 1;	/* Sane caches */
 EXPORT_SYMBOL(shm_align_mask);
 
-#ifdef CONFIG_MMU
 /*
  * To avoid cache aliases, we map the shared page with same color.
  */
diff --git a/trunk/arch/sh/mm/pg-nommu.c b/trunk/arch/sh/mm/pg-nommu.c
new file mode 100644
index 000000000000..91ed4e695ff7
--- /dev/null
+++ b/trunk/arch/sh/mm/pg-nommu.c
@@ -0,0 +1,38 @@
+/*
+ * arch/sh/mm/pg-nommu.c
+ *
+ * clear_page()/copy_page() implementation for MMUless SH.
+ *
+ * Copyright (C) 2003  Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <asm/page.h>
+#include <asm/uaccess.h>
+
+void copy_page(void *to, void *from)
+{
+	memcpy(to, from, PAGE_SIZE);
+}
+
+void clear_page(void *to)
+{
+	memset(to, 0, PAGE_SIZE);
+}
+
+__kernel_size_t __copy_user(void *to, const void *from, __kernel_size_t n)
+{
+	memcpy(to, from, n);
+	return 0;
+}
+
+__kernel_size_t __clear_user(void *to, __kernel_size_t n)
+{
+	memset(to, 0, n);
+	return 0;
+}
diff --git a/trunk/arch/sh/mm/pg-sh4.c b/trunk/arch/sh/mm/pg-sh4.c
new file mode 100644
index 000000000000..2fe14da1f839
--- /dev/null
+++ b/trunk/arch/sh/mm/pg-sh4.c
@@ -0,0 +1,146 @@
+/*
+ * arch/sh/mm/pg-sh4.c
+ *
+ * Copyright (C) 1999, 2000, 2002  Niibe Yutaka
+ * Copyright (C) 2002 - 2007  Paul Mundt
+ *
+ * Released under the terms of the GNU GPL v2.0.
+ */
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/mutex.h>
+#include <linux/fs.h>
+#include <linux/highmem.h>
+#include <linux/module.h>
+#include <asm/mmu_context.h>
+#include <asm/cacheflush.h>
+
+#define CACHE_ALIAS (current_cpu_data.dcache.alias_mask)
+
+#define kmap_get_fixmap_pte(vaddr)                                     \
+	pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)), (vaddr))
+
+static pte_t *kmap_coherent_pte;
+
+void __init kmap_coherent_init(void)
+{
+	unsigned long vaddr;
+
+	/* cache the first coherent kmap pte */
+	vaddr = __fix_to_virt(FIX_CMAP_BEGIN);
+	kmap_coherent_pte = kmap_get_fixmap_pte(vaddr);
+}
+
+static inline void *kmap_coherent(struct page *page, unsigned long addr)
+{
+	enum fixed_addresses idx;
+	unsigned long vaddr, flags;
+	pte_t pte;
+
+	inc_preempt_count();
+
+	idx = (addr & current_cpu_data.dcache.alias_mask) >> PAGE_SHIFT;
+	vaddr = __fix_to_virt(FIX_CMAP_END - idx);
+	pte = mk_pte(page, PAGE_KERNEL);
+
+	local_irq_save(flags);
+	flush_tlb_one(get_asid(), vaddr);
+	local_irq_restore(flags);
+
+	update_mmu_cache(NULL, vaddr, pte);
+
+	set_pte(kmap_coherent_pte - (FIX_CMAP_END - idx), pte);
+
+	return (void *)vaddr;
+}
+
+static inline void kunmap_coherent(struct page *page)
+{
+	dec_preempt_count();
+	preempt_check_resched();
+}
+
+/*
+ * clear_user_page
+ * @to: P1 address
+ * @address: U0 address to be mapped
+ * @page: page (virt_to_page(to))
+ */
+void clear_user_page(void *to, unsigned long address, struct page *page)
+{
+	__set_bit(PG_mapped, &page->flags);
+
+	clear_page(to);
+	if ((((address & PAGE_MASK) ^ (unsigned long)to) & CACHE_ALIAS))
+		__flush_wback_region(to, PAGE_SIZE);
+}
+
+void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
+		       unsigned long vaddr, void *dst, const void *src,
+		       unsigned long len)
+{
+	void *vto;
+
+	__set_bit(PG_mapped, &page->flags);
+
+	vto = kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK);
+	memcpy(vto, src, len);
+	kunmap_coherent(vto);
+
+	if (vma->vm_flags & VM_EXEC)
+		flush_cache_page(vma, vaddr, page_to_pfn(page));
+}
+
+void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
+			 unsigned long vaddr, void *dst, const void *src,
+			 unsigned long len)
+{
+	void *vfrom;
+
+	__set_bit(PG_mapped, &page->flags);
+
+	vfrom = kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK);
+	memcpy(dst, vfrom, len);
+	kunmap_coherent(vfrom);
+}
+
+void copy_user_highpage(struct page *to, struct page *from,
+			unsigned long vaddr, struct vm_area_struct *vma)
+{
+	void *vfrom, *vto;
+
+	__set_bit(PG_mapped, &to->flags);
+
+	vto = kmap_atomic(to, KM_USER1);
+	vfrom = kmap_coherent(from, vaddr);
+	copy_page(vto, vfrom);
+	kunmap_coherent(vfrom);
+
+	if (((vaddr ^ (unsigned long)vto) & CACHE_ALIAS))
+		__flush_wback_region(vto, PAGE_SIZE);
+
+	kunmap_atomic(vto, KM_USER1);
+	/* Make sure this page is cleared on other CPU's too before using it */
+	smp_wmb();
+}
+EXPORT_SYMBOL(copy_user_highpage);
+
+/*
+ * For SH-4, we have our own implementation for ptep_get_and_clear
+ */
+pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+{
+	pte_t pte = *ptep;
+
+	pte_clear(mm, addr, ptep);
+	if (!pte_not_present(pte)) {
+		unsigned long pfn = pte_pfn(pte);
+		if (pfn_valid(pfn)) {
+			struct page *page = pfn_to_page(pfn);
+			struct address_space *mapping = page_mapping(page);
+			if (!mapping || !mapping_writably_mapped(mapping))
+				__clear_bit(PG_mapped, &page->flags);
+		}
+	}
+	return pte;
+}
diff --git a/trunk/arch/sh/mm/pg-sh7705.c b/trunk/arch/sh/mm/pg-sh7705.c
new file mode 100644
index 000000000000..eaf25147194c
--- /dev/null
+++ b/trunk/arch/sh/mm/pg-sh7705.c
@@ -0,0 +1,138 @@
+/*
+ * arch/sh/mm/pg-sh7705.c
+ *
+ * Copyright (C) 1999, 2000  Niibe Yutaka
+ * Copyright (C) 2004  Alex Song
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/threads.h>
+#include <linux/fs.h>
+#include <asm/addrspace.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/cache.h>
+#include <asm/io.h>
+#include <asm/uaccess.h>
+#include <asm/pgalloc.h>
+#include <asm/mmu_context.h>
+#include <asm/cacheflush.h>
+
+static inline void __flush_purge_virtual_region(void *p1, void *virt, int size)
+{
+	unsigned long v;
+	unsigned long begin, end;
+	unsigned long p1_begin;
+
+
+	begin = L1_CACHE_ALIGN((unsigned long)virt);
+	end = L1_CACHE_ALIGN((unsigned long)virt + size);
+
+	p1_begin = (unsigned long)p1 & ~(L1_CACHE_BYTES - 1);
+
+	/* do this the slow way as we may not have TLB entries
+	 * for virt yet. */
+	for (v = begin; v < end; v += L1_CACHE_BYTES) {
+		unsigned long p;
+	        unsigned long ways, addr;
+
+		p = __pa(p1_begin);
+
+	        ways = current_cpu_data.dcache.ways;
+		addr = CACHE_OC_ADDRESS_ARRAY;
+
+		do {
+			unsigned long data;
+
+			addr |= (v & current_cpu_data.dcache.entry_mask);
+
+			data = ctrl_inl(addr);
+			if ((data & CACHE_PHYSADDR_MASK) ==
+			       (p & CACHE_PHYSADDR_MASK)) {
+				data &= ~(SH_CACHE_UPDATED|SH_CACHE_VALID);
+				ctrl_outl(data, addr);
+			}
+
+			addr += current_cpu_data.dcache.way_incr;
+		} while (--ways);
+
+		p1_begin += L1_CACHE_BYTES;
+	}
+}
+
+/*
+ * clear_user_page
+ * @to: P1 address
+ * @address: U0 address to be mapped
+ */
+void clear_user_page(void *to, unsigned long address, struct page *pg)
+{
+	struct page *page = virt_to_page(to);
+
+	__set_bit(PG_mapped, &page->flags);
+	if (((address ^ (unsigned long)to) & CACHE_ALIAS) == 0) {
+		clear_page(to);
+		__flush_wback_region(to, PAGE_SIZE);
+	} else {
+		__flush_purge_virtual_region(to,
+					     (void *)(address & 0xfffff000),
+					     PAGE_SIZE);
+		clear_page(to);
+		__flush_wback_region(to, PAGE_SIZE);
+	}
+}
+
+/*
+ * copy_user_page
+ * @to: P1 address
+ * @from: P1 address
+ * @address: U0 address to be mapped
+ */
+void copy_user_page(void *to, void *from, unsigned long address, struct page *pg)
+{
+	struct page *page = virt_to_page(to);
+
+
+	__set_bit(PG_mapped, &page->flags);
+	if (((address ^ (unsigned long)to) & CACHE_ALIAS) == 0) {
+		copy_page(to, from);
+		__flush_wback_region(to, PAGE_SIZE);
+	} else {
+		__flush_purge_virtual_region(to,
+					     (void *)(address & 0xfffff000),
+					     PAGE_SIZE);
+		copy_page(to, from);
+		__flush_wback_region(to, PAGE_SIZE);
+	}
+}
+
+/*
+ * For SH7705, we have our own implementation for ptep_get_and_clear
+ * Copied from pg-sh4.c
+ */
+pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+{
+	pte_t pte = *ptep;
+
+	pte_clear(mm, addr, ptep);
+	if (!pte_not_present(pte)) {
+		unsigned long pfn = pte_pfn(pte);
+		if (pfn_valid(pfn)) {
+			struct page *page = pfn_to_page(pfn);
+			struct address_space *mapping = page_mapping(page);
+			if (!mapping || !mapping_writably_mapped(mapping))
+				__clear_bit(PG_mapped, &page->flags);
+		}
+	}
+
+	return pte;
+}
+
diff --git a/trunk/arch/sh/mm/nommu.c b/trunk/arch/sh/mm/tlb-nommu.c
similarity index 54%
rename from trunk/arch/sh/mm/nommu.c
rename to trunk/arch/sh/mm/tlb-nommu.c
index 51b54037216f..71c742b5aee3 100644
--- a/trunk/arch/sh/mm/nommu.c
+++ b/trunk/arch/sh/mm/tlb-nommu.c
@@ -1,41 +1,20 @@
 /*
- * arch/sh/mm/nommu.c
+ * arch/sh/mm/tlb-nommu.c
  *
- * Various helper routines and stubs for MMUless SH.
+ * TLB Operations for MMUless SH.
  *
- * Copyright (C) 2002 - 2009 Paul Mundt
+ * Copyright (C) 2002 Paul Mundt
  *
  * Released under the terms of the GNU GPL v2.0.
  */
 #include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/string.h>
 #include <linux/mm.h>
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
-#include <asm/page.h>
-#include <asm/uaccess.h>
 
 /*
  * Nothing too terribly exciting here ..
  */
-void copy_page(void *to, void *from)
-{
-	memcpy(to, from, PAGE_SIZE);
-}
-
-__kernel_size_t __copy_user(void *to, const void *from, __kernel_size_t n)
-{
-	memcpy(to, from, n);
-	return 0;
-}
-
-__kernel_size_t __clear_user(void *to, __kernel_size_t n)
-{
-	memset(to, 0, n);
-	return 0;
-}
-
 void local_flush_tlb_all(void)
 {
 	BUG();
@@ -67,21 +46,8 @@ void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)
 	BUG();
 }
 
-void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte)
-{
-}
-
-void __init kmap_coherent_init(void)
-{
-}
-
-void *kmap_coherent(struct page *page, unsigned long addr)
-{
-	BUG();
-	return NULL;
-}
-
-void kunmap_coherent(void)
+void update_mmu_cache(struct vm_area_struct * vma,
+		      unsigned long address, pte_t pte)
 {
 	BUG();
 }
diff --git a/trunk/arch/sh/mm/tlb-pteaex.c b/trunk/arch/sh/mm/tlb-pteaex.c
index 409b7c2b4b9d..2aab3ea934d7 100644
--- a/trunk/arch/sh/mm/tlb-pteaex.c
+++ b/trunk/arch/sh/mm/tlb-pteaex.c
@@ -16,16 +16,34 @@
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
 
-void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte)
+void update_mmu_cache(struct vm_area_struct * vma,
+		      unsigned long address, pte_t pte)
 {
-	unsigned long flags, pteval, vpn;
+	unsigned long flags;
+	unsigned long pteval;
+	unsigned long vpn;
 
-	/*
-	 * Handle debugger faulting in for debugee.
-	 */
+	/* Ptrace may call this routine. */
 	if (vma && current->active_mm != vma->vm_mm)
 		return;
 
+#ifndef CONFIG_CACHE_OFF
+	{
+		unsigned long pfn = pte_pfn(pte);
+
+		if (pfn_valid(pfn)) {
+			struct page *page = pfn_to_page(pfn);
+
+			if (!test_bit(PG_mapped, &page->flags)) {
+				unsigned long phys = pte_val(pte) & PTE_PHYS_MASK;
+				__flush_wback_region((void *)P1SEGADDR(phys),
+						     PAGE_SIZE);
+				__set_bit(PG_mapped, &page->flags);
+			}
+		}
+	}
+#endif
+
 	local_irq_save(flags);
 
 	/* Set PTEH register */
diff --git a/trunk/arch/sh/mm/tlb-sh3.c b/trunk/arch/sh/mm/tlb-sh3.c
index ace8e6d2f59d..17cb7c3adf22 100644
--- a/trunk/arch/sh/mm/tlb-sh3.c
+++ b/trunk/arch/sh/mm/tlb-sh3.c
@@ -27,16 +27,32 @@
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
 
-void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte)
+void update_mmu_cache(struct vm_area_struct * vma,
+		      unsigned long address, pte_t pte)
 {
-	unsigned long flags, pteval, vpn;
+	unsigned long flags;
+	unsigned long pteval;
+	unsigned long vpn;
 
-	/*
-	 * Handle debugger faulting in for debugee.
-	 */
+	/* Ptrace may call this routine. */
 	if (vma && current->active_mm != vma->vm_mm)
 		return;
 
+#if defined(CONFIG_SH7705_CACHE_32KB)
+	{
+		struct page *page = pte_page(pte);
+		unsigned long pfn = pte_pfn(pte);
+
+		if (pfn_valid(pfn) && !test_bit(PG_mapped, &page->flags)) {
+			unsigned long phys = pte_val(pte) & PTE_PHYS_MASK;
+
+			__flush_wback_region((void *)P1SEGADDR(phys),
+					     PAGE_SIZE);
+			__set_bit(PG_mapped, &page->flags);
+		}
+	}
+#endif
+
 	local_irq_save(flags);
 
 	/* Set PTEH register */
@@ -77,3 +93,4 @@ void local_flush_tlb_one(unsigned long asid, unsigned long page)
 	for (i = 0; i < ways; i++)
 		ctrl_outl(data, addr + (i << 8));
 }
+
diff --git a/trunk/arch/sh/mm/tlb-sh4.c b/trunk/arch/sh/mm/tlb-sh4.c
index 7d3c63e707a5..f0c7b7397fa6 100644
--- a/trunk/arch/sh/mm/tlb-sh4.c
+++ b/trunk/arch/sh/mm/tlb-sh4.c
@@ -15,16 +15,34 @@
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
 
-void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte)
+void update_mmu_cache(struct vm_area_struct * vma,
+		      unsigned long address, pte_t pte)
 {
-	unsigned long flags, pteval, vpn;
+	unsigned long flags;
+	unsigned long pteval;
+	unsigned long vpn;
 
-	/*
-	 * Handle debugger faulting in for debugee.
-	 */
+	/* Ptrace may call this routine. */
 	if (vma && current->active_mm != vma->vm_mm)
 		return;
 
+#ifndef CONFIG_CACHE_OFF
+	{
+		unsigned long pfn = pte_pfn(pte);
+
+		if (pfn_valid(pfn)) {
+			struct page *page = pfn_to_page(pfn);
+
+			if (!test_bit(PG_mapped, &page->flags)) {
+				unsigned long phys = pte_val(pte) & PTE_PHYS_MASK;
+				__flush_wback_region((void *)P1SEGADDR(phys),
+						     PAGE_SIZE);
+				__set_bit(PG_mapped, &page->flags);
+			}
+		}
+	}
+#endif
+
 	local_irq_save(flags);
 
 	/* Set PTEH register */
diff --git a/trunk/arch/sh/mm/tlb-sh5.c b/trunk/arch/sh/mm/tlb-sh5.c
index fdb64e41ec50..dae131243bcc 100644
--- a/trunk/arch/sh/mm/tlb-sh5.c
+++ b/trunk/arch/sh/mm/tlb-sh5.c
@@ -117,15 +117,26 @@ int sh64_put_wired_dtlb_entry(unsigned long long entry)
  * Load up a virtual<->physical translation for @eaddr<->@paddr in the
  * pre-allocated TLB slot @config_addr (see sh64_get_wired_dtlb_entry).
  */
-void sh64_setup_tlb_slot(unsigned long long config_addr, unsigned long eaddr,
-			 unsigned long asid, unsigned long paddr)
+inline void sh64_setup_tlb_slot(unsigned long long config_addr,
+				unsigned long eaddr,
+				unsigned long asid,
+				unsigned long paddr)
 {
 	unsigned long long pteh, ptel;
 
-	pteh = neff_sign_extend(eaddr);
+	/* Sign extension */
+#if (NEFF == 32)
+	pteh = (unsigned long long)(signed long long)(signed long) eaddr;
+#else
+#error "Can't sign extend more than 32 bits yet"
+#endif
 	pteh &= PAGE_MASK;
 	pteh |= (asid << PTEH_ASID_SHIFT) | PTEH_VALID;
-	ptel = neff_sign_extend(paddr);
+#if (NEFF == 32)
+	ptel = (unsigned long long)(signed long long)(signed long) paddr;
+#else
+#error "Can't sign extend more than 32 bits yet"
+#endif
 	ptel &= PAGE_MASK;
 	ptel |= (_PAGE_CACHABLE | _PAGE_READ | _PAGE_WRITE);
 
@@ -141,5 +152,5 @@ void sh64_setup_tlb_slot(unsigned long long config_addr, unsigned long eaddr,
  *
  * Teardown any existing mapping in the TLB slot @config_addr.
  */
-void sh64_teardown_tlb_slot(unsigned long long config_addr)
+inline void sh64_teardown_tlb_slot(unsigned long long config_addr)
 	__attribute__ ((alias("__flush_tlb_slot")));
diff --git a/trunk/arch/sh/mm/tlbflush_64.c b/trunk/arch/sh/mm/tlbflush_64.c
index 2dcc48528f7a..3ce40ea34824 100644
--- a/trunk/arch/sh/mm/tlbflush_64.c
+++ b/trunk/arch/sh/mm/tlbflush_64.c
@@ -329,6 +329,22 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long writeaccess,
 		goto no_context;
 }
 
+void update_mmu_cache(struct vm_area_struct * vma,
+			unsigned long address, pte_t pte)
+{
+	/*
+	 * This appears to get called once for every pte entry that gets
+	 * established => I don't think it's efficient to try refilling the
+	 * TLBs with the pages - some may not get accessed even.  Also, for
+	 * executable pages, it is impossible to determine reliably here which
+	 * TLB they should be mapped into (or both even).
+	 *
+	 * So, just do nothing here and handle faults on demand.  In the
+	 * TLBMISS handling case, the refill is now done anyway after the pte
+	 * has been fixed up, so that deals with most useful cases.
+	 */
+}
+
 void local_flush_tlb_one(unsigned long asid, unsigned long page)
 {
 	unsigned long long match, pteh=0, lpage;
@@ -337,7 +353,7 @@ void local_flush_tlb_one(unsigned long asid, unsigned long page)
 	/*
 	 * Sign-extend based on neff.
 	 */
-	lpage = neff_sign_extend(page);
+	lpage = (page & NEFF_SIGN) ? (page | NEFF_MASK) : page;
 	match = (asid << PTEH_ASID_SHIFT) | PTEH_VALID;
 	match |= lpage;
 
@@ -466,7 +482,3 @@ void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)
         /* FIXME: Optimize this later.. */
         flush_tlb_all();
 }
-
-void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte)
-{
-}