diff --git a/arch/arm/vfp/vfphw.S b/arch/arm/vfp/vfphw.S
index 0ac022f800a13..53d9f8e8fac37 100644
--- a/arch/arm/vfp/vfphw.S
+++ b/arch/arm/vfp/vfphw.S
@@ -100,10 +100,10 @@ vfp_support_entry:
 	cmp	r4, #0
 	beq	no_old_VFP_process
 	VFPFMRX	r5, FPSCR		@ current status
-	VFPFMRX	r6, FPINST		@ FPINST (always there, rev0 onwards)
-	tst	r1, #FPEXC_FPV2		@ is there an FPINST2 to read?
-	VFPFMRX	r8, FPINST2, NE		@ FPINST2 if needed - avoids reading
-					@ nonexistant reg on rev0
+	tst	r1, #FPEXC_EX		@ is there additional state to save?
+	VFPFMRX	r6, FPINST, NE		@ FPINST (only if FPEXC.EX is set)
+	tstne	r1, #FPEXC_FP2V		@ is there an FPINST2 to read?
+	VFPFMRX	r8, FPINST2, NE		@ FPINST2 if needed (and present)
 	VFPFSTMIA r4 			@ save the working registers
 	stmia	r4, {r1, r5, r6, r8}	@ save FPEXC, FPSCR, FPINST, FPINST2
 					@ and point r4 at the word at the
@@ -117,10 +117,10 @@ no_old_VFP_process:
 	VFPFLDMIA r10	 		@ reload the working registers while
 					@ FPEXC is in a safe state
 	ldmia	r10, {r1, r5, r6, r8}	@ load FPEXC, FPSCR, FPINST, FPINST2
-	tst	r1, #FPEXC_FPV2		@ is there an FPINST2 to write?
-	VFPFMXR	FPINST2, r8, NE		@ FPINST2 if needed - avoids writing
-					@ nonexistant reg on rev0
-	VFPFMXR	FPINST, r6
+	tst	r1, #FPEXC_EX		@ is there additional state to restore?
+	VFPFMXR	FPINST, r6, NE		@ restore FPINST (only if FPEXC.EX is set)
+	tstne	r1, #FPEXC_FP2V		@ is there an FPINST2 to write?
+	VFPFMXR	FPINST2, r8, NE		@ FPINST2 if needed (and present)
 	VFPFMXR	FPSCR, r5		@ restore status
 
 check_for_exception:
@@ -136,10 +136,14 @@ check_for_exception:
 
 
 look_for_VFP_exceptions:
-	tst	r1, #FPEXC_EX
+	@ Check for synchronous or asynchronous exception
+	tst	r1, #FPEXC_EX | FPEXC_DEX
 	bne	process_exception
+	@ On some implementations of the VFP subarch 1, setting FPSCR.IXE
+	@ causes all the CDP instructions to be bounced synchronously without
+	@ setting the FPEXC.EX bit
 	VFPFMRX	r5, FPSCR
-	tst	r5, #FPSCR_IXE		@ IXE doesn't set FPEXC_EX !
+	tst	r5, #FPSCR_IXE
 	bne	process_exception
 
 	@ Fall into hand on to next handler - appropriate coproc instr
@@ -150,10 +154,6 @@ look_for_VFP_exceptions:
 
 process_exception:
 	DBGSTR	"bounce"
-	sub	r2, r2, #4
-	str	r2, [sp, #S_PC]		@ retry the instruction on exit from
-					@ the imprecise exception handling in
-					@ the support code
 	mov	r2, sp			@ nothing stacked - regdump is at TOS
 	mov	lr, r9			@ setup for a return to the user code.
 
@@ -161,7 +161,7 @@ process_exception:
 	@   r0 holds the trigger instruction
 	@   r1 holds the FPEXC value
 	@   r2 pointer to register dump
-	b	VFP9_bounce		@ we have handled this - the support
+	b	VFP_bounce		@ we have handled this - the support
 					@ code will raise an exception if
 					@ required. If not, the user code will
 					@ retry the faulted instruction
@@ -175,10 +175,10 @@ vfp_save_state:
 	@ r1 - FPEXC
 	DBGSTR1	"save VFP state %p", r0
 	VFPFMRX	r2, FPSCR		@ current status
-	VFPFMRX	r3, FPINST		@ FPINST (always there, rev0 onwards)
-	tst	r1, #FPEXC_FPV2		@ is there an FPINST2 to read?
-	VFPFMRX	r12, FPINST2, NE	@ FPINST2 if needed - avoids reading
-					@ nonexistant reg on rev0
+	tst	r1, #FPEXC_EX		@ is there additional state to save?
+	VFPFMRX	r3, FPINST, NE		@ FPINST (only if FPEXC.EX is set)
+	tstne	r1, #FPEXC_FP2V		@ is there an FPINST2 to read?
+	VFPFMRX	r12, FPINST2, NE	@ FPINST2 if needed (and present)
 	VFPFSTMIA r0 			@ save the working registers
 	stmia	r0, {r1, r2, r3, r12}	@ save FPEXC, FPSCR, FPINST, FPINST2
 	mov	pc, lr
diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c
index b4e210df92f2a..32455c633f1c3 100644
--- a/arch/arm/vfp/vfpmodule.c
+++ b/arch/arm/vfp/vfpmodule.c
@@ -125,13 +125,13 @@ void vfp_raise_sigfpe(unsigned int sicode, struct pt_regs *regs)
 	send_sig_info(SIGFPE, &info, current);
 }
 
-static void vfp_panic(char *reason)
+static void vfp_panic(char *reason, u32 inst)
 {
 	int i;
 
 	printk(KERN_ERR "VFP: Error: %s\n", reason);
 	printk(KERN_ERR "VFP: EXC 0x%08x SCR 0x%08x INST 0x%08x\n",
-		fmrx(FPEXC), fmrx(FPSCR), fmrx(FPINST));
+		fmrx(FPEXC), fmrx(FPSCR), inst);
 	for (i = 0; i < 32; i += 2)
 		printk(KERN_ERR "VFP: s%2u: 0x%08x s%2u: 0x%08x\n",
 		       i, vfp_get_float(i), i+1, vfp_get_float(i+1));
@@ -147,19 +147,16 @@ static void vfp_raise_exceptions(u32 exceptions, u32 inst, u32 fpscr, struct pt_
 	pr_debug("VFP: raising exceptions %08x\n", exceptions);
 
 	if (exceptions == VFP_EXCEPTION_ERROR) {
-		vfp_panic("unhandled bounce");
+		vfp_panic("unhandled bounce", inst);
 		vfp_raise_sigfpe(0, regs);
 		return;
 	}
 
 	/*
-	 * If any of the status flags are set, update the FPSCR.
+	 * Update the FPSCR with the additional exception flags.
 	 * Comparison instructions always return at least one of
 	 * these flags set.
 	 */
-	if (exceptions & (FPSCR_N|FPSCR_Z|FPSCR_C|FPSCR_V))
-		fpscr &= ~(FPSCR_N|FPSCR_Z|FPSCR_C|FPSCR_V);
-
 	fpscr |= exceptions;
 
 	fmxr(FPSCR, fpscr);
@@ -220,35 +217,64 @@ static u32 vfp_emulate_instruction(u32 inst, u32 fpscr, struct pt_regs *regs)
 /*
  * Package up a bounce condition.
  */
-void VFP9_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
+void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
 {
-	u32 fpscr, orig_fpscr, exceptions, inst;
+	u32 fpscr, orig_fpscr, fpsid, exceptions;
 
 	pr_debug("VFP: bounce: trigger %08x fpexc %08x\n", trigger, fpexc);
 
 	/*
-	 * Enable access to the VFP so we can handle the bounce.
+	 * At this point, FPEXC can have the following configuration:
+	 *
+	 *  EX DEX IXE
+	 *  0   1   x   - synchronous exception
+	 *  1   x   0   - asynchronous exception
+	 *  1   x   1   - sychronous on VFP subarch 1 and asynchronous on later
+	 *  0   0   1   - synchronous on VFP9 (non-standard subarch 1
+	 *                implementation), undefined otherwise
+	 *
+	 * Clear various bits and enable access to the VFP so we can
+	 * handle the bounce.
 	 */
-	fmxr(FPEXC, fpexc & ~(FPEXC_EX|FPEXC_FPV2|FPEXC_INV|FPEXC_UFC|FPEXC_OFC|FPEXC_IOC));
+	fmxr(FPEXC, fpexc & ~(FPEXC_EX|FPEXC_DEX|FPEXC_FP2V|FPEXC_VV|FPEXC_TRAP_MASK));
 
+	fpsid = fmrx(FPSID);
 	orig_fpscr = fpscr = fmrx(FPSCR);
 
 	/*
-	 * If we are running with inexact exceptions enabled, we need to
-	 * emulate the trigger instruction.  Note that as we're emulating
-	 * the trigger instruction, we need to increment PC.
+	 * Check for the special VFP subarch 1 and FPSCR.IXE bit case
 	 */
-	if (fpscr & FPSCR_IXE) {
-		regs->ARM_pc += 4;
+	if ((fpsid & FPSID_ARCH_MASK) == (1 << FPSID_ARCH_BIT)
+	    && (fpscr & FPSCR_IXE)) {
+		/*
+		 * Synchronous exception, emulate the trigger instruction
+		 */
 		goto emulate;
 	}
 
-	barrier();
+	if (fpexc & FPEXC_EX) {
+		/*
+		 * Asynchronous exception. The instruction is read from FPINST
+		 * and the interrupted instruction has to be restarted.
+		 */
+		trigger = fmrx(FPINST);
+		regs->ARM_pc -= 4;
+	} else if (!(fpexc & FPEXC_DEX)) {
+		/*
+		 * Illegal combination of bits. It can be caused by an
+		 * unallocated VFP instruction but with FPSCR.IXE set and not
+		 * on VFP subarch 1.
+		 */
+		 vfp_raise_exceptions(VFP_EXCEPTION_ERROR, trigger, fpscr, regs);
+		 return;
+	}
 
 	/*
-	 * Modify fpscr to indicate the number of iterations remaining
+	 * Modify fpscr to indicate the number of iterations remaining.
+	 * If FPEXC.EX is 0, FPEXC.DEX is 1 and the FPEXC.VV bit indicates
+	 * whether FPEXC.VECITR or FPSCR.LEN is used.
 	 */
-	if (fpexc & FPEXC_EX) {
+	if (fpexc & (FPEXC_EX | FPEXC_VV)) {
 		u32 len;
 
 		len = fpexc + (1 << FPEXC_LENGTH_BIT);
@@ -262,15 +288,15 @@ void VFP9_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
 	 * FPEXC bounce reason, but this appears to be unreliable.
 	 * Emulate the bounced instruction instead.
 	 */
-	inst = fmrx(FPINST);
-	exceptions = vfp_emulate_instruction(inst, fpscr, regs);
+	exceptions = vfp_emulate_instruction(trigger, fpscr, regs);
 	if (exceptions)
-		vfp_raise_exceptions(exceptions, inst, orig_fpscr, regs);
+		vfp_raise_exceptions(exceptions, trigger, orig_fpscr, regs);
 
 	/*
-	 * If there isn't a second FP instruction, exit now.
+	 * If there isn't a second FP instruction, exit now. Note that
+	 * the FPEXC.FP2V bit is valid only if FPEXC.EX is 1.
 	 */
-	if (!(fpexc & FPEXC_FPV2))
+	if (fpexc ^ (FPEXC_EX | FPEXC_FP2V))
 		return;
 
 	/*
@@ -279,10 +305,9 @@ void VFP9_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
 	 */
 	barrier();
 	trigger = fmrx(FPINST2);
-	orig_fpscr = fpscr = fmrx(FPSCR);
 
  emulate:
-	exceptions = vfp_emulate_instruction(trigger, fpscr, regs);
+	exceptions = vfp_emulate_instruction(trigger, orig_fpscr, regs);
 	if (exceptions)
 		vfp_raise_exceptions(exceptions, trigger, orig_fpscr, regs);
 }
@@ -306,16 +331,9 @@ static int __init vfp_init(void)
 {
 	unsigned int vfpsid;
 	unsigned int cpu_arch = cpu_architecture();
-	u32 access = 0;
 
-	if (cpu_arch >= CPU_ARCH_ARMv6) {
-		access = get_copro_access();
-
-		/*
-		 * Enable full access to VFP (cp10 and cp11)
-		 */
-		set_copro_access(access | CPACC_FULL(10) | CPACC_FULL(11));
-	}
+	if (cpu_arch >= CPU_ARCH_ARMv6)
+		vfp_enable(NULL);
 
 	/*
 	 * First check that there is a VFP that we can use.
@@ -329,15 +347,9 @@ static int __init vfp_init(void)
 	vfp_vector = vfp_null_entry;
 
 	printk(KERN_INFO "VFP support v0.3: ");
-	if (VFP_arch) {
+	if (VFP_arch)
 		printk("not present\n");
-
-		/*
-		 * Restore the copro access register.
-		 */
-		if (cpu_arch >= CPU_ARCH_ARMv6)
-			set_copro_access(access);
-	} else if (vfpsid & FPSID_NODOUBLE) {
+	else if (vfpsid & FPSID_NODOUBLE) {
 		printk("no double precision support\n");
 	} else {
 		smp_call_function(vfp_enable, NULL, 1, 1);
diff --git a/include/asm-arm/vfp.h b/include/asm-arm/vfp.h
index bd6be9d7f7729..9d474d47b2661 100644
--- a/include/asm-arm/vfp.h
+++ b/include/asm-arm/vfp.h
@@ -8,6 +8,8 @@
 #define FPSID			cr0
 #define FPSCR			cr1
 #define FPEXC			cr8
+#define FPINST			cr9
+#define FPINST2			cr10
 
 /* FPSID bits */
 #define FPSID_IMPLEMENTER_BIT	(24)
@@ -28,6 +30,19 @@
 /* FPEXC bits */
 #define FPEXC_EX		(1 << 31)
 #define FPEXC_EN		(1 << 30)
+#define FPEXC_DEX		(1 << 29)
+#define FPEXC_FP2V		(1 << 28)
+#define FPEXC_VV		(1 << 27)
+#define FPEXC_TFV		(1 << 26)
+#define FPEXC_LENGTH_BIT	(8)
+#define FPEXC_LENGTH_MASK	(7 << FPEXC_LENGTH_BIT)
+#define FPEXC_IDF		(1 << 7)
+#define FPEXC_IXF		(1 << 4)
+#define FPEXC_UFF		(1 << 3)
+#define FPEXC_OFF		(1 << 2)
+#define FPEXC_DZF		(1 << 1)
+#define FPEXC_IOF		(1 << 0)
+#define FPEXC_TRAP_MASK		(FPEXC_IDF|FPEXC_IXF|FPEXC_UFF|FPEXC_OFF|FPEXC_DZF|FPEXC_IOF)
 
 /* FPSCR bits */
 #define FPSCR_DEFAULT_NAN	(1<<25)
@@ -55,21 +70,6 @@
 #define FPSCR_IXC		(1<<4)
 #define FPSCR_IDC		(1<<7)
 
-/*
- * VFP9-S specific.
- */
-#define FPINST			cr9
-#define FPINST2			cr10
-
-/* FPEXC bits */
-#define FPEXC_FPV2		(1<<28)
-#define FPEXC_LENGTH_BIT	(8)
-#define FPEXC_LENGTH_MASK	(7 << FPEXC_LENGTH_BIT)
-#define FPEXC_INV		(1 << 7)
-#define FPEXC_UFC		(1 << 3)
-#define FPEXC_OFC		(1 << 2)
-#define FPEXC_IOC		(1 << 0)
-
 /* Bit patterns for decoding the packaged operation descriptors */
 #define VFPOPDESC_LENGTH_BIT	(9)
 #define VFPOPDESC_LENGTH_MASK	(0x07 << VFPOPDESC_LENGTH_BIT)