From 79ea757a8a1684e4f3148fad15466c5c433d9467 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Fri, 26 Dec 2008 22:41:18 +0100
Subject: [PATCH] --- yaml --- r: 125425 b: refs/heads/master c:
 cfb2a494bb7dca9cf8d1632fbed14b34db051980 h: refs/heads/master i:   125423:
 16ffdefdd71883f86daf5ab2d85ddf28ed699fdb v: v3

---
 [refs]                                        |    2 +-
 trunk/Documentation/cpu-hotplug.txt           |   17 +-
 trunk/MAINTAINERS                             |    2 +
 trunk/arch/alpha/include/asm/smp.h            |    1 +
 trunk/arch/alpha/kernel/irq.c                 |    2 +-
 trunk/arch/alpha/kernel/process.c             |    2 -
 trunk/arch/alpha/kernel/smp.c                 |    7 +-
 trunk/arch/alpha/kernel/sys_dp264.c           |    8 +-
 trunk/arch/alpha/kernel/sys_titan.c           |    4 +-
 trunk/arch/arm/common/gic.c                   |    4 +-
 trunk/arch/arm/kernel/irq.c                   |    2 +-
 trunk/arch/arm/kernel/smp.c                   |   10 +
 trunk/arch/arm/mach-at91/at91rm9200_time.c    |    3 +-
 trunk/arch/arm/mach-at91/at91sam926x_time.c   |    2 +-
 trunk/arch/arm/mach-davinci/time.c            |    2 +-
 trunk/arch/arm/mach-imx/time.c                |    2 +-
 trunk/arch/arm/mach-ixp4xx/common.c           |    2 +-
 trunk/arch/arm/mach-msm/timer.c               |    2 +-
 trunk/arch/arm/mach-ns9xxx/time-ns9360.c      |    2 +-
 trunk/arch/arm/mach-omap1/time.c              |    2 +-
 trunk/arch/arm/mach-omap1/timer32k.c          |    2 +-
 trunk/arch/arm/mach-omap2/timer-gp.c          |    2 +-
 trunk/arch/arm/mach-pxa/time.c                |    2 +-
 trunk/arch/arm/mach-realview/core.c           |    2 +-
 trunk/arch/arm/mach-realview/localtimer.c     |    4 +-
 trunk/arch/arm/mach-sa1100/time.c             |    2 +-
 trunk/arch/arm/mach-versatile/core.c          |    2 +-
 trunk/arch/arm/oprofile/op_model_mpcore.c     |    4 +-
 trunk/arch/arm/plat-mxc/time.c                |    2 +-
 trunk/arch/arm/plat-orion/time.c              |    2 +-
 trunk/arch/avr32/kernel/time.c                |    2 +-
 trunk/arch/blackfin/kernel/time-ts.c          |    2 +-
 trunk/arch/cris/arch-v32/kernel/irq.c         |    4 +-
 trunk/arch/cris/arch-v32/kernel/smp.c         |    4 +
 trunk/arch/cris/include/asm/smp.h             |    1 +
 trunk/arch/ia64/hp/sim/hpsim_irq.c            |    2 +-
 trunk/arch/ia64/include/asm/kvm.h             |    6 +-
 trunk/arch/ia64/include/asm/kvm_host.h        |  196 +--
 trunk/arch/ia64/include/asm/smp.h             |    1 +
 trunk/arch/ia64/include/asm/topology.h        |    2 +
 trunk/arch/ia64/kernel/iosapic.c              |   12 +-
 trunk/arch/ia64/kernel/irq.c                  |    9 +-
 trunk/arch/ia64/kernel/msi_ia64.c             |   12 +-
 trunk/arch/ia64/kernel/smpboot.c              |   10 +-
 trunk/arch/ia64/kernel/topology.c             |    2 +-
 trunk/arch/ia64/kvm/Makefile                  |    2 +-
 trunk/arch/ia64/kvm/asm-offsets.c             |   11 +-
 trunk/arch/ia64/kvm/kvm-ia64.c                |  107 +-
 trunk/arch/ia64/kvm/kvm_lib.c                 |   15 -
 trunk/arch/ia64/kvm/kvm_minstate.h            |    4 +-
 trunk/arch/ia64/kvm/misc.h                    |    3 +-
 trunk/arch/ia64/kvm/mmio.c                    |   38 +-
 trunk/arch/ia64/kvm/process.c                 |   29 +-
 trunk/arch/ia64/kvm/vcpu.c                    |   76 +-
 trunk/arch/ia64/kvm/vcpu.h                    |    5 +-
 trunk/arch/ia64/kvm/vmm.c                     |   29 -
 trunk/arch/ia64/kvm/vmm_ivt.S                 | 1469 +++++++++--------
 trunk/arch/ia64/kvm/vtlb.c                    |    4 +-
 trunk/arch/ia64/sn/kernel/irq.c               |    6 +-
 trunk/arch/ia64/sn/kernel/msi_sn.c            |    7 +-
 trunk/arch/m32r/Kconfig                       |    1 -
 trunk/arch/m32r/kernel/smpboot.c              |    6 +
 trunk/arch/m68k/Kconfig                       |    1 -
 trunk/arch/m68knommu/platform/coldfire/pit.c  |    2 +-
 trunk/arch/mips/include/asm/irq.h             |    3 +-
 .../mips/include/asm/mach-ip27/topology.h     |    1 +
 trunk/arch/mips/include/asm/smp.h             |    3 +
 trunk/arch/mips/jazz/irq.c                    |    2 +-
 trunk/arch/mips/kernel/cevt-bcm1480.c         |    4 +-
 trunk/arch/mips/kernel/cevt-ds1287.c          |    2 +-
 trunk/arch/mips/kernel/cevt-gt641xx.c         |    2 +-
 trunk/arch/mips/kernel/cevt-r4k.c             |    2 +-
 trunk/arch/mips/kernel/cevt-sb1250.c          |    4 +-
 trunk/arch/mips/kernel/cevt-smtc.c            |    2 +-
 trunk/arch/mips/kernel/cevt-txx9.c            |    2 +-
 trunk/arch/mips/kernel/i8253.c                |    2 +-
 trunk/arch/mips/kernel/irq-gic.c              |    6 +-
 trunk/arch/mips/kernel/smp-cmp.c              |    6 +-
 trunk/arch/mips/kernel/smp-mt.c               |    2 +-
 trunk/arch/mips/kernel/smp.c                  |    7 +-
 trunk/arch/mips/kernel/smtc.c                 |    6 +-
 trunk/arch/mips/mti-malta/malta-smtc.c        |    6 +-
 trunk/arch/mips/nxp/pnx8550/common/time.c     |    1 -
 trunk/arch/mips/pmc-sierra/yosemite/smp.c     |    6 +-
 trunk/arch/mips/sgi-ip27/ip27-smp.c           |    2 +-
 trunk/arch/mips/sgi-ip27/ip27-timer.c         |    2 +-
 trunk/arch/mips/sibyte/bcm1480/irq.c          |    8 +-
 trunk/arch/mips/sibyte/bcm1480/smp.c          |    8 +-
 trunk/arch/mips/sibyte/sb1250/irq.c           |    8 +-
 trunk/arch/mips/sibyte/sb1250/smp.c           |    8 +-
 trunk/arch/mips/sni/time.c                    |    2 +-
 trunk/arch/parisc/Kconfig                     |    1 -
 trunk/arch/parisc/kernel/irq.c                |    6 +-
 trunk/arch/parisc/kernel/smp.c                |   15 +
 trunk/arch/powerpc/include/asm/disassemble.h  |   80 -
 trunk/arch/powerpc/include/asm/kvm_44x.h      |   61 -
 trunk/arch/powerpc/include/asm/kvm_host.h     |  116 +-
 trunk/arch/powerpc/include/asm/kvm_ppc.h      |   83 +-
 trunk/arch/powerpc/include/asm/mmu-44x.h      |    1 -
 trunk/arch/powerpc/include/asm/topology.h     |    1 +
 trunk/arch/powerpc/kernel/asm-offsets.c       |   21 +-
 trunk/arch/powerpc/kernel/irq.c               |    2 +-
 trunk/arch/powerpc/kernel/smp.c               |    4 +
 trunk/arch/powerpc/kernel/time.c              |    2 +-
 trunk/arch/powerpc/kvm/44x.c                  |  228 ---
 trunk/arch/powerpc/kvm/44x_emulate.c          |  371 -----
 trunk/arch/powerpc/kvm/44x_tlb.c              |  463 ++----
 trunk/arch/powerpc/kvm/44x_tlb.h              |   26 +-
 trunk/arch/powerpc/kvm/Kconfig                |   28 +-
 trunk/arch/powerpc/kvm/Makefile               |   12 +-
 trunk/arch/powerpc/kvm/booke.h                |   60 -
 .../powerpc/kvm/{booke.c => booke_guest.c}    |  418 ++---
 trunk/arch/powerpc/kvm/booke_host.c           |   83 +
 trunk/arch/powerpc/kvm/booke_interrupts.S     |   72 +-
 trunk/arch/powerpc/kvm/emulate.c              |  447 ++++-
 trunk/arch/powerpc/kvm/powerpc.c              |  130 +-
 trunk/arch/powerpc/kvm/timing.c               |  239 ---
 trunk/arch/powerpc/kvm/timing.h               |  102 --
 trunk/arch/powerpc/platforms/pseries/xics.c   |    4 +-
 trunk/arch/powerpc/sysdev/mpic.c              |    4 +-
 trunk/arch/powerpc/sysdev/mpic.h              |    2 +-
 trunk/arch/s390/Kconfig                       |    1 -
 trunk/arch/s390/kernel/smp.c                  |    6 +
 trunk/arch/s390/kernel/time.c                 |    2 +-
 trunk/arch/s390/kvm/kvm-s390.c                |   41 +-
 trunk/arch/sh/include/asm/smp.h               |    2 +-
 trunk/arch/sh/include/asm/topology.h          |    1 +
 trunk/arch/sh/kernel/smp.c                    |   10 +-
 trunk/arch/sh/kernel/timers/timer-broadcast.c |    2 +-
 trunk/arch/sh/kernel/timers/timer-tmu.c       |    2 +-
 trunk/arch/sparc/include/asm/smp_32.h         |    2 +
 trunk/arch/sparc/kernel/irq_64.c              |   11 +-
 trunk/arch/sparc/kernel/of_device_64.c        |    2 +-
 trunk/arch/sparc/kernel/pci_msi.c             |    2 +-
 trunk/arch/sparc/kernel/smp_32.c              |    6 +-
 trunk/arch/sparc/kernel/smp_64.c              |    4 +
 trunk/arch/sparc/kernel/sparc_ksyms_32.c      |    4 +
 trunk/arch/sparc/kernel/time_64.c             |    2 +-
 trunk/arch/um/kernel/smp.c                    |    7 +
 trunk/arch/um/kernel/time.c                   |    2 +-
 trunk/arch/x86/Kconfig                        |   13 +-
 trunk/arch/x86/ia32/ia32_signal.c             |    3 +-
 trunk/arch/x86/ia32/ipc32.c                   |    1 -
 trunk/arch/x86/ia32/sys_ia32.c                |    2 +-
 trunk/arch/x86/include/asm/apic.h             |    3 +-
 trunk/arch/x86/include/asm/bigsmp/apic.h      |   32 +-
 trunk/arch/x86/include/asm/bigsmp/ipi.h       |   13 +-
 trunk/arch/x86/include/asm/desc.h             |   10 +-
 trunk/arch/x86/include/asm/efi.h              |    1 -
 trunk/arch/x86/include/asm/es7000/apic.h      |   82 +-
 trunk/arch/x86/include/asm/es7000/ipi.h       |   12 +-
 trunk/arch/x86/include/asm/genapic_32.h       |   13 +-
 trunk/arch/x86/include/asm/genapic_64.h       |   14 +-
 trunk/arch/x86/include/asm/ipi.h              |   23 +-
 trunk/arch/x86/include/asm/irq.h              |    3 +-
 trunk/arch/x86/include/asm/kvm_host.h         |   45 +-
 trunk/arch/x86/include/asm/kvm_x86_emulate.h  |   11 +-
 .../x86/include/asm/mach-default/mach_apic.h  |   28 +-
 .../x86/include/asm/mach-default/mach_ipi.h   |   18 +-
 .../x86/include/asm/mach-generic/mach_apic.h  |    1 -
 trunk/arch/x86/include/asm/mpspec.h           |    2 +-
 trunk/arch/x86/include/asm/mtrr.h             |   25 -
 trunk/arch/x86/include/asm/numaq/apic.h       |   12 +-
 trunk/arch/x86/include/asm/numaq/ipi.h        |   13 +-
 trunk/arch/x86/include/asm/smp.h              |    6 +-
 trunk/arch/x86/include/asm/summit/apic.h      |   55 +-
 trunk/arch/x86/include/asm/summit/ipi.h       |    9 +-
 trunk/arch/x86/include/asm/sys_ia32.h         |  101 --
 trunk/arch/x86/include/asm/topology.h         |    2 -
 trunk/arch/x86/include/asm/uv/uv_bau.h        |   46 +-
 trunk/arch/x86/include/asm/virtext.h          |  132 --
 trunk/arch/x86/kernel/amd_iommu.c             |    2 +-
 trunk/arch/x86/kernel/amd_iommu_init.c        |    4 +-
 trunk/arch/x86/kernel/apic.c                  |   42 +-
 trunk/arch/x86/kernel/bios_uv.c               |    2 +-
 trunk/arch/x86/kernel/cpu/intel_cacheinfo.c   |   45 +-
 trunk/arch/x86/kernel/cpu/mcheck/mce_amd_64.c |  108 +-
 trunk/arch/x86/kernel/cpu/mtrr/generic.c      |   12 +-
 trunk/arch/x86/kernel/cpu/mtrr/main.c         |   10 +-
 trunk/arch/x86/kernel/cpu/mtrr/mtrr.h         |   18 +-
 trunk/arch/x86/kernel/cpuid.c                 |    6 +-
 trunk/arch/x86/kernel/crash.c                 |   18 -
 trunk/arch/x86/kernel/early_printk.c          |    2 +-
 trunk/arch/x86/kernel/genapic_flat_64.c       |  107 +-
 trunk/arch/x86/kernel/genx2apic_cluster.c     |   81 +-
 trunk/arch/x86/kernel/genx2apic_phys.c        |   78 +-
 trunk/arch/x86/kernel/genx2apic_uv_x.c        |   61 +-
 trunk/arch/x86/kernel/head64.c                |    2 +-
 trunk/arch/x86/kernel/hpet.c                  |    8 +-
 trunk/arch/x86/kernel/i8253.c                 |    2 +-
 trunk/arch/x86/kernel/io_apic.c               |  372 +++--
 trunk/arch/x86/kernel/ipi.c                   |   28 +-
 trunk/arch/x86/kernel/irq.c                   |    3 -
 trunk/arch/x86/kernel/irq_32.c                |   13 +-
 trunk/arch/x86/kernel/irq_64.c                |   15 +-
 trunk/arch/x86/kernel/irqinit_32.c            |   16 +-
 trunk/arch/x86/kernel/irqinit_64.c            |   13 -
 trunk/arch/x86/kernel/kvmclock.c              |   10 +-
 trunk/arch/x86/kernel/ldt.c                   |    4 +-
 trunk/arch/x86/kernel/mfgpt_32.c              |    2 +-
 trunk/arch/x86/kernel/mmconf-fam10h_64.c      |    3 +-
 trunk/arch/x86/kernel/mpparse.c               |   10 +-
 trunk/arch/x86/kernel/nmi.c                   |    3 +-
 trunk/arch/x86/kernel/pci-gart_64.c           |    2 +-
 trunk/arch/x86/kernel/reboot.c                |   69 +-
 trunk/arch/x86/kernel/setup_percpu.c          |   19 +-
 trunk/arch/x86/kernel/smp.c                   |    8 +-
 trunk/arch/x86/kernel/smpboot.c               |   33 +-
 trunk/arch/x86/kernel/tlb_32.c                |    2 +-
 trunk/arch/x86/kernel/tlb_64.c                |    2 +-
 trunk/arch/x86/kernel/tlb_uv.c                |    9 +
 trunk/arch/x86/kernel/traps.c                 |   45 +-
 trunk/arch/x86/kernel/vmiclock_32.c           |    2 +-
 trunk/arch/x86/kernel/xsave.c                 |    2 +-
 trunk/arch/x86/kvm/i8254.c                    |   19 -
 trunk/arch/x86/kvm/i8259.c                    |   52 +-
 trunk/arch/x86/kvm/irq.h                      |    6 -
 trunk/arch/x86/kvm/kvm_svm.h                  |    2 +-
 trunk/arch/x86/kvm/lapic.c                    |   58 +-
 trunk/arch/x86/kvm/mmu.c                      |  444 +----
 trunk/arch/x86/kvm/paging_tmpl.h              |   44 +-
 trunk/arch/x86/kvm/svm.c                      |   48 +-
 trunk/arch/x86/{include/asm => kvm}/svm.h     |    0
 trunk/arch/x86/kvm/vmx.c                      |  350 ++--
 trunk/arch/x86/{include/asm => kvm}/vmx.h     |   27 +-
 trunk/arch/x86/kvm/x86.c                      |  117 +-
 trunk/arch/x86/kvm/x86_emulate.c              |  297 ++--
 trunk/arch/x86/lguest/boot.c                  |    2 +-
 trunk/arch/x86/mach-default/setup.c           |   15 +-
 trunk/arch/x86/mach-generic/bigsmp.c          |    5 +-
 trunk/arch/x86/mach-generic/es7000.c          |    5 +-
 trunk/arch/x86/mach-generic/numaq.c           |    5 +-
 trunk/arch/x86/mach-generic/summit.c          |    5 +-
 trunk/arch/x86/mach-voyager/voyager_smp.c     |    9 +-
 trunk/arch/x86/mm/numa_64.c                   |    4 +-
 trunk/arch/x86/mm/srat_64.c                   |    2 +-
 trunk/arch/x86/pci/acpi.c                     |    2 +-
 trunk/arch/x86/pci/amd_bus.c                  |    2 +-
 trunk/arch/x86/pci/common.c                   |    3 +-
 trunk/arch/x86/pci/direct.c                   |    2 +-
 trunk/arch/x86/pci/early.c                    |    2 +-
 trunk/arch/x86/pci/fixup.c                    |    3 +-
 trunk/arch/x86/pci/i386.c                     |    2 +-
 trunk/arch/x86/pci/init.c                     |    2 +-
 trunk/arch/x86/pci/irq.c                      |    3 +-
 trunk/arch/x86/pci/legacy.c                   |    2 +-
 trunk/arch/x86/pci/mmconfig-shared.c          |    3 +-
 trunk/arch/x86/pci/mmconfig_32.c              |    2 +-
 trunk/arch/x86/pci/mmconfig_64.c              |    3 +-
 trunk/arch/x86/pci/numaq_32.c                 |    2 +-
 trunk/arch/x86/pci/olpc.c                     |    2 +-
 trunk/arch/x86/pci/pcbios.c                   |    5 +-
 .../x86/{include/asm/pci_x86.h => pci/pci.h}  |   17 +-
 trunk/arch/x86/pci/visws.c                    |    3 +-
 trunk/arch/x86/xen/mmu.c                      |   20 +-
 trunk/arch/x86/xen/smp.c                      |   27 +-
 trunk/arch/x86/xen/suspend.c                  |    3 +-
 trunk/arch/x86/xen/time.c                     |    2 +-
 trunk/arch/x86/xen/xen-ops.h                  |    2 +-
 trunk/drivers/base/cpu.c                      |    2 +-
 trunk/drivers/base/node.c                     |    4 +-
 trunk/drivers/base/topology.c                 |    4 +-
 trunk/drivers/clocksource/tcb_clksrc.c        |    2 +-
 trunk/drivers/lguest/interrupts_and_traps.c   |   13 +-
 trunk/drivers/parisc/iosapic.c                |    7 +-
 trunk/drivers/pci/hotplug/cpqphp_core.c       |    2 +-
 trunk/drivers/pci/hotplug/cpqphp_pci.c        |    2 +-
 trunk/drivers/pci/hotplug/ibmphp_core.c       |    2 +-
 trunk/drivers/pci/pci-sysfs.c                 |    4 +-
 trunk/drivers/pci/probe.c                     |    4 +-
 trunk/drivers/xen/events.c                    |    6 +-
 trunk/fs/anon_inodes.c                        |    7 +-
 trunk/include/asm-generic/topology.h          |   14 +-
 trunk/include/asm-m32r/smp.h                  |    2 +
 trunk/include/linux/clockchips.h              |    4 +-
 trunk/include/linux/compiler-gcc3.h           |    4 -
 trunk/include/linux/cpumask.h                 |   98 +-
 trunk/include/linux/interrupt.h               |    4 +-
 trunk/include/linux/irq.h                     |    3 +-
 trunk/include/linux/kvm.h                     |   18 -
 trunk/include/linux/kvm_host.h                |   12 +-
 trunk/include/linux/sched.h                   |   92 +-
 trunk/include/linux/topology.h                |    6 +-
 trunk/init/Kconfig                            |    9 -
 trunk/kernel/cpu.c                            |   11 +-
 trunk/kernel/cpuset.c                         |    4 +-
 trunk/kernel/irq/chip.c                       |    2 +-
 trunk/kernel/irq/manage.c                     |   22 +-
 trunk/kernel/irq/migration.c                  |   14 +-
 trunk/kernel/irq/proc.c                       |   29 +-
 trunk/kernel/profile.c                        |    4 +-
 trunk/kernel/rcuclassic.c                     |    2 +-
 trunk/kernel/sched.c                          |  970 +++++------
 trunk/kernel/sched_cpupri.c                   |   39 +-
 trunk/kernel/sched_cpupri.h                   |    5 +-
 trunk/kernel/sched_fair.c                     |   32 +-
 trunk/kernel/sched_rt.c                       |   73 +-
 trunk/kernel/sched_stats.h                    |    3 +-
 trunk/kernel/taskstats.c                      |    2 +-
 trunk/kernel/time/clockevents.c               |    2 -
 trunk/kernel/time/tick-broadcast.c            |    2 +-
 trunk/kernel/time/tick-common.c               |   12 +-
 trunk/kernel/time/tick-sched.c                |   10 +-
 trunk/kernel/trace/trace.c                    |    4 +-
 trunk/lib/Kconfig                             |    7 -
 trunk/mm/slub.c                               |    2 +-
 trunk/virt/kvm/ioapic.c                       |    8 +-
 trunk/virt/kvm/ioapic.h                       |    2 -
 trunk/virt/kvm/irq_comm.c                     |   19 +-
 trunk/virt/kvm/kvm_main.c                     |  420 ++---
 trunk/virt/kvm/kvm_trace.c                    |    1 -
 311 files changed, 4159 insertions(+), 7001 deletions(-)
 delete mode 100644 trunk/arch/ia64/kvm/kvm_lib.c
 delete mode 100644 trunk/arch/powerpc/include/asm/disassemble.h
 delete mode 100644 trunk/arch/powerpc/include/asm/kvm_44x.h
 delete mode 100644 trunk/arch/powerpc/kvm/44x.c
 delete mode 100644 trunk/arch/powerpc/kvm/44x_emulate.c
 delete mode 100644 trunk/arch/powerpc/kvm/booke.h
 rename trunk/arch/powerpc/kvm/{booke.c => booke_guest.c} (56%)
 create mode 100644 trunk/arch/powerpc/kvm/booke_host.c
 delete mode 100644 trunk/arch/powerpc/kvm/timing.c
 delete mode 100644 trunk/arch/powerpc/kvm/timing.h
 delete mode 100644 trunk/arch/x86/include/asm/sys_ia32.h
 delete mode 100644 trunk/arch/x86/include/asm/virtext.h
 rename trunk/arch/x86/{include/asm => kvm}/svm.h (100%)
 rename trunk/arch/x86/{include/asm => kvm}/vmx.h (93%)
 rename trunk/arch/x86/{include/asm/pci_x86.h => pci/pci.h} (88%)

diff --git a/[refs] b/[refs]
index a587c0e6a238..092e9bade019 100644
--- a/[refs]
+++ b/[refs]
@@ -1,2 +1,2 @@
 ---
-refs/heads/master: a8e782348d9f0dc64f6adb81f5f6959921949f13
+refs/heads/master: cfb2a494bb7dca9cf8d1632fbed14b34db051980
diff --git a/trunk/Documentation/cpu-hotplug.txt b/trunk/Documentation/cpu-hotplug.txt
index 9d620c153b04..94bbc27ddd4f 100644
--- a/trunk/Documentation/cpu-hotplug.txt
+++ b/trunk/Documentation/cpu-hotplug.txt
@@ -50,17 +50,16 @@ additional_cpus=n (*)	Use this to limit hotpluggable cpus. This option sets
   			cpu_possible_map = cpu_present_map + additional_cpus
 
 (*) Option valid only for following architectures
-- ia64
+- x86_64, ia64
 
-ia64 uses the number of disabled local apics in ACPI tables MADT to
-determine the number of potentially hot-pluggable cpus. The implementation
-should only rely on this to count the # of cpus, but *MUST* not rely
-on the apicid values in those tables for disabled apics. In the event
-BIOS doesn't mark such hot-pluggable cpus as disabled entries, one could
-use this parameter "additional_cpus=x" to represent those cpus in the
-cpu_possible_map.
+ia64 and x86_64 use the number of disabled local apics in ACPI tables MADT
+to determine the number of potentially hot-pluggable cpus. The implementation
+should only rely on this to count the # of cpus, but *MUST* not rely on the
+apicid values in those tables for disabled apics. In the event BIOS doesn't
+mark such hot-pluggable cpus as disabled entries, one could use this
+parameter "additional_cpus=x" to represent those cpus in the cpu_possible_map.
 
-possible_cpus=n		[s390,x86_64] use this to set hotpluggable cpus.
+possible_cpus=n		[s390 only] use this to set hotpluggable cpus.
 			This option sets possible_cpus bits in
 			cpu_possible_map. Thus keeping the numbers of bits set
 			constant even if the machine gets rebooted.
diff --git a/trunk/MAINTAINERS b/trunk/MAINTAINERS
index befacf07729f..88f43e09aeb3 100644
--- a/trunk/MAINTAINERS
+++ b/trunk/MAINTAINERS
@@ -2542,6 +2542,8 @@ W:	http://kvm.qumranet.com
 S:	Supported
 
 KERNEL VIRTUAL MACHINE For Itanium (KVM/IA64)
+P:	Anthony Xu
+M:	anthony.xu@intel.com
 P:	Xiantao Zhang
 M:	xiantao.zhang@intel.com
 L:	kvm-ia64@vger.kernel.org
diff --git a/trunk/arch/alpha/include/asm/smp.h b/trunk/arch/alpha/include/asm/smp.h
index 547e90951cec..544c69af8168 100644
--- a/trunk/arch/alpha/include/asm/smp.h
+++ b/trunk/arch/alpha/include/asm/smp.h
@@ -45,6 +45,7 @@ extern struct cpuinfo_alpha cpu_data[NR_CPUS];
 #define raw_smp_processor_id()	(current_thread_info()->cpu)
 
 extern int smp_num_cpus;
+#define cpu_possible_map	cpu_present_map
 
 extern void arch_send_call_function_single_ipi(int cpu);
 extern void arch_send_call_function_ipi(cpumask_t mask);
diff --git a/trunk/arch/alpha/kernel/irq.c b/trunk/arch/alpha/kernel/irq.c
index d0f1620007f7..c626a821cdcb 100644
--- a/trunk/arch/alpha/kernel/irq.c
+++ b/trunk/arch/alpha/kernel/irq.c
@@ -55,7 +55,7 @@ int irq_select_affinity(unsigned int irq)
 	last_cpu = cpu;
 
 	irq_desc[irq].affinity = cpumask_of_cpu(cpu);
-	irq_desc[irq].chip->set_affinity(irq, cpumask_of(cpu));
+	irq_desc[irq].chip->set_affinity(irq, cpumask_of_cpu(cpu));
 	return 0;
 }
 #endif /* CONFIG_SMP */
diff --git a/trunk/arch/alpha/kernel/process.c b/trunk/arch/alpha/kernel/process.c
index f238370c907d..351407e07e71 100644
--- a/trunk/arch/alpha/kernel/process.c
+++ b/trunk/arch/alpha/kernel/process.c
@@ -94,7 +94,6 @@ common_shutdown_1(void *generic_ptr)
 		flags |= 0x00040000UL; /* "remain halted" */
 		*pflags = flags;
 		cpu_clear(cpuid, cpu_present_map);
-		cpu_clear(cpuid, cpu_possible_map);
 		halt();
 	}
 #endif
@@ -121,7 +120,6 @@ common_shutdown_1(void *generic_ptr)
 #ifdef CONFIG_SMP
 	/* Wait for the secondaries to halt. */
 	cpu_clear(boot_cpuid, cpu_present_map);
-	cpu_clear(boot_cpuid, cpu_possible_map);
 	while (cpus_weight(cpu_present_map))
 		barrier();
 #endif
diff --git a/trunk/arch/alpha/kernel/smp.c b/trunk/arch/alpha/kernel/smp.c
index d953e510f68d..cf7da10097bb 100644
--- a/trunk/arch/alpha/kernel/smp.c
+++ b/trunk/arch/alpha/kernel/smp.c
@@ -70,6 +70,11 @@ enum ipi_message_type {
 /* Set to a secondary's cpuid when it comes online.  */
 static int smp_secondary_alive __devinitdata = 0;
 
+/* Which cpus ids came online.  */
+cpumask_t cpu_online_map;
+
+EXPORT_SYMBOL(cpu_online_map);
+
 int smp_num_probed;		/* Internal processor count */
 int smp_num_cpus = 1;		/* Number that came online.  */
 EXPORT_SYMBOL(smp_num_cpus);
@@ -435,7 +440,6 @@ setup_smp(void)
 				((char *)cpubase + i*hwrpb->processor_size);
 			if ((cpu->flags & 0x1cc) == 0x1cc) {
 				smp_num_probed++;
-				cpu_set(i, cpu_possible_map);
 				cpu_set(i, cpu_present_map);
 				cpu->pal_revision = boot_cpu_palrev;
 			}
@@ -469,7 +473,6 @@ smp_prepare_cpus(unsigned int max_cpus)
 
 	/* Nothing to do on a UP box, or when told not to.  */
 	if (smp_num_probed == 1 || max_cpus == 0) {
-		cpu_possible_map = cpumask_of_cpu(boot_cpuid);
 		cpu_present_map = cpumask_of_cpu(boot_cpuid);
 		printk(KERN_INFO "SMP mode deactivated.\n");
 		return;
diff --git a/trunk/arch/alpha/kernel/sys_dp264.c b/trunk/arch/alpha/kernel/sys_dp264.c
index ab44c164d9d4..c71b0fd7a61f 100644
--- a/trunk/arch/alpha/kernel/sys_dp264.c
+++ b/trunk/arch/alpha/kernel/sys_dp264.c
@@ -177,19 +177,19 @@ cpu_set_irq_affinity(unsigned int irq, cpumask_t affinity)
 }
 
 static void
-dp264_set_affinity(unsigned int irq, const struct cpumask *affinity)
+dp264_set_affinity(unsigned int irq, cpumask_t affinity)
 { 
 	spin_lock(&dp264_irq_lock);
-	cpu_set_irq_affinity(irq, *affinity);
+	cpu_set_irq_affinity(irq, affinity);
 	tsunami_update_irq_hw(cached_irq_mask);
 	spin_unlock(&dp264_irq_lock);
 }
 
 static void
-clipper_set_affinity(unsigned int irq, const struct cpumask *affinity)
+clipper_set_affinity(unsigned int irq, cpumask_t affinity)
 { 
 	spin_lock(&dp264_irq_lock);
-	cpu_set_irq_affinity(irq - 16, *affinity);
+	cpu_set_irq_affinity(irq - 16, affinity);
 	tsunami_update_irq_hw(cached_irq_mask);
 	spin_unlock(&dp264_irq_lock);
 }
diff --git a/trunk/arch/alpha/kernel/sys_titan.c b/trunk/arch/alpha/kernel/sys_titan.c
index 27f840a4ad3d..52c91ccc1648 100644
--- a/trunk/arch/alpha/kernel/sys_titan.c
+++ b/trunk/arch/alpha/kernel/sys_titan.c
@@ -158,10 +158,10 @@ titan_cpu_set_irq_affinity(unsigned int irq, cpumask_t affinity)
 }
 
 static void
-titan_set_irq_affinity(unsigned int irq, const struct cpumask *affinity)
+titan_set_irq_affinity(unsigned int irq, cpumask_t affinity)
 { 
 	spin_lock(&titan_irq_lock);
-	titan_cpu_set_irq_affinity(irq - 16, *affinity);
+	titan_cpu_set_irq_affinity(irq - 16, affinity);
 	titan_update_irq_hw(titan_cached_irq_mask);
 	spin_unlock(&titan_irq_lock);
 }
diff --git a/trunk/arch/arm/common/gic.c b/trunk/arch/arm/common/gic.c
index c6884ba1d5ed..7fc9860a97d7 100644
--- a/trunk/arch/arm/common/gic.c
+++ b/trunk/arch/arm/common/gic.c
@@ -109,11 +109,11 @@ static void gic_unmask_irq(unsigned int irq)
 }
 
 #ifdef CONFIG_SMP
-static void gic_set_cpu(unsigned int irq, const struct cpumask *mask_val)
+static void gic_set_cpu(unsigned int irq, cpumask_t mask_val)
 {
 	void __iomem *reg = gic_dist_base(irq) + GIC_DIST_TARGET + (gic_irq(irq) & ~3);
 	unsigned int shift = (irq % 4) * 8;
-	unsigned int cpu = cpumask_first(mask_val);
+	unsigned int cpu = first_cpu(mask_val);
 	u32 val;
 
 	spin_lock(&irq_controller_lock);
diff --git a/trunk/arch/arm/kernel/irq.c b/trunk/arch/arm/kernel/irq.c
index 7141cee1fab7..2f3eb795fa6e 100644
--- a/trunk/arch/arm/kernel/irq.c
+++ b/trunk/arch/arm/kernel/irq.c
@@ -174,7 +174,7 @@ static void route_irq(struct irq_desc *desc, unsigned int irq, unsigned int cpu)
 	pr_debug("IRQ%u: moving from cpu%u to cpu%u\n", irq, desc->cpu, cpu);
 
 	spin_lock_irq(&desc->lock);
-	desc->chip->set_affinity(irq, cpumask_of(cpu));
+	desc->chip->set_affinity(irq, cpumask_of_cpu(cpu));
 	spin_unlock_irq(&desc->lock);
 }
 
diff --git a/trunk/arch/arm/kernel/smp.c b/trunk/arch/arm/kernel/smp.c
index 55fa7ff96a3e..019237d21622 100644
--- a/trunk/arch/arm/kernel/smp.c
+++ b/trunk/arch/arm/kernel/smp.c
@@ -33,6 +33,16 @@
 #include <asm/tlbflush.h>
 #include <asm/ptrace.h>
 
+/*
+ * bitmask of present and online CPUs.
+ * The present bitmask indicates that the CPU is physically present.
+ * The online bitmask indicates that the CPU is up and running.
+ */
+cpumask_t cpu_possible_map;
+EXPORT_SYMBOL(cpu_possible_map);
+cpumask_t cpu_online_map;
+EXPORT_SYMBOL(cpu_online_map);
+
 /*
  * as from 2.5, kernels no longer have an init_tasks structure
  * so we need some other way of telling a new secondary core
diff --git a/trunk/arch/arm/mach-at91/at91rm9200_time.c b/trunk/arch/arm/mach-at91/at91rm9200_time.c
index 1ff1bda0a894..d140eae53ded 100644
--- a/trunk/arch/arm/mach-at91/at91rm9200_time.c
+++ b/trunk/arch/arm/mach-at91/at91rm9200_time.c
@@ -178,6 +178,7 @@ static struct clock_event_device clkevt = {
 	.features	= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
 	.shift		= 32,
 	.rating		= 150,
+	.cpumask	= CPU_MASK_CPU0,
 	.set_next_event	= clkevt32k_next_event,
 	.set_mode	= clkevt32k_mode,
 };
@@ -205,7 +206,7 @@ void __init at91rm9200_timer_init(void)
 	clkevt.mult = div_sc(AT91_SLOW_CLOCK, NSEC_PER_SEC, clkevt.shift);
 	clkevt.max_delta_ns = clockevent_delta2ns(AT91_ST_ALMV, &clkevt);
 	clkevt.min_delta_ns = clockevent_delta2ns(2, &clkevt) + 1;
-	clkevt.cpumask = cpumask_of(0);
+	clkevt.cpumask = cpumask_of_cpu(0);
 	clockevents_register_device(&clkevt);
 
 	/* register clocksource */
diff --git a/trunk/arch/arm/mach-at91/at91sam926x_time.c b/trunk/arch/arm/mach-at91/at91sam926x_time.c
index b63e1d5f1bad..122fd77ed580 100644
--- a/trunk/arch/arm/mach-at91/at91sam926x_time.c
+++ b/trunk/arch/arm/mach-at91/at91sam926x_time.c
@@ -91,6 +91,7 @@ static struct clock_event_device pit_clkevt = {
 	.features	= CLOCK_EVT_FEAT_PERIODIC,
 	.shift		= 32,
 	.rating		= 100,
+	.cpumask	= CPU_MASK_CPU0,
 	.set_mode	= pit_clkevt_mode,
 };
 
@@ -172,7 +173,6 @@ static void __init at91sam926x_pit_init(void)
 
 	/* Set up and register clockevents */
 	pit_clkevt.mult = div_sc(pit_rate, NSEC_PER_SEC, pit_clkevt.shift);
-	pit_clkevt.cpumask = cpumask_of(0);
 	clockevents_register_device(&pit_clkevt);
 }
 
diff --git a/trunk/arch/arm/mach-davinci/time.c b/trunk/arch/arm/mach-davinci/time.c
index f8bcd29d17a6..3b9a296b5c4b 100644
--- a/trunk/arch/arm/mach-davinci/time.c
+++ b/trunk/arch/arm/mach-davinci/time.c
@@ -322,7 +322,7 @@ static void __init davinci_timer_init(void)
 	clockevent_davinci.min_delta_ns =
 		clockevent_delta2ns(1, &clockevent_davinci);
 
-	clockevent_davinci.cpumask = cpumask_of(0);
+	clockevent_davinci.cpumask = cpumask_of_cpu(0);
 	clockevents_register_device(&clockevent_davinci);
 }
 
diff --git a/trunk/arch/arm/mach-imx/time.c b/trunk/arch/arm/mach-imx/time.c
index aff0ebcfa847..a11765f5f23b 100644
--- a/trunk/arch/arm/mach-imx/time.c
+++ b/trunk/arch/arm/mach-imx/time.c
@@ -184,7 +184,7 @@ static int __init imx_clockevent_init(unsigned long rate)
 	clockevent_imx.min_delta_ns =
 		clockevent_delta2ns(0xf, &clockevent_imx);
 
-	clockevent_imx.cpumask = cpumask_of(0);
+	clockevent_imx.cpumask = cpumask_of_cpu(0);
 
 	clockevents_register_device(&clockevent_imx);
 
diff --git a/trunk/arch/arm/mach-ixp4xx/common.c b/trunk/arch/arm/mach-ixp4xx/common.c
index f4656d2ac8a8..7766f469456b 100644
--- a/trunk/arch/arm/mach-ixp4xx/common.c
+++ b/trunk/arch/arm/mach-ixp4xx/common.c
@@ -487,7 +487,7 @@ static int __init ixp4xx_clockevent_init(void)
 		clockevent_delta2ns(0xfffffffe, &clockevent_ixp4xx);
 	clockevent_ixp4xx.min_delta_ns =
 		clockevent_delta2ns(0xf, &clockevent_ixp4xx);
-	clockevent_ixp4xx.cpumask = cpumask_of(0);
+	clockevent_ixp4xx.cpumask = cpumask_of_cpu(0);
 
 	clockevents_register_device(&clockevent_ixp4xx);
 	return 0;
diff --git a/trunk/arch/arm/mach-msm/timer.c b/trunk/arch/arm/mach-msm/timer.c
index 444d9c0f5ca6..345a14cb73c3 100644
--- a/trunk/arch/arm/mach-msm/timer.c
+++ b/trunk/arch/arm/mach-msm/timer.c
@@ -182,7 +182,7 @@ static void __init msm_timer_init(void)
 			clockevent_delta2ns(0xf0000000 >> clock->shift, ce);
 		/* 4 gets rounded down to 3 */
 		ce->min_delta_ns = clockevent_delta2ns(4, ce);
-		ce->cpumask = cpumask_of(0);
+		ce->cpumask = cpumask_of_cpu(0);
 
 		cs->mult = clocksource_hz2mult(clock->freq, cs->shift);
 		res = clocksource_register(cs);
diff --git a/trunk/arch/arm/mach-ns9xxx/time-ns9360.c b/trunk/arch/arm/mach-ns9xxx/time-ns9360.c
index 41df69721769..a63424d083d9 100644
--- a/trunk/arch/arm/mach-ns9xxx/time-ns9360.c
+++ b/trunk/arch/arm/mach-ns9xxx/time-ns9360.c
@@ -173,7 +173,7 @@ static void __init ns9360_timer_init(void)
 	ns9360_clockevent_device.min_delta_ns =
 		clockevent_delta2ns(1, &ns9360_clockevent_device);
 
-	ns9360_clockevent_device.cpumask = cpumask_of(0);
+	ns9360_clockevent_device.cpumask = cpumask_of_cpu(0);
 	clockevents_register_device(&ns9360_clockevent_device);
 
 	setup_irq(IRQ_NS9360_TIMER0 + TIMER_CLOCKEVENT,
diff --git a/trunk/arch/arm/mach-omap1/time.c b/trunk/arch/arm/mach-omap1/time.c
index 495a32c287b4..2cf7e32bd293 100644
--- a/trunk/arch/arm/mach-omap1/time.c
+++ b/trunk/arch/arm/mach-omap1/time.c
@@ -173,7 +173,7 @@ static __init void omap_init_mpu_timer(unsigned long rate)
 	clockevent_mpu_timer1.min_delta_ns =
 		clockevent_delta2ns(1, &clockevent_mpu_timer1);
 
-	clockevent_mpu_timer1.cpumask = cpumask_of(0);
+	clockevent_mpu_timer1.cpumask = cpumask_of_cpu(0);
 	clockevents_register_device(&clockevent_mpu_timer1);
 }
 
diff --git a/trunk/arch/arm/mach-omap1/timer32k.c b/trunk/arch/arm/mach-omap1/timer32k.c
index fd3f7396e162..705367ece174 100644
--- a/trunk/arch/arm/mach-omap1/timer32k.c
+++ b/trunk/arch/arm/mach-omap1/timer32k.c
@@ -187,7 +187,7 @@ static __init void omap_init_32k_timer(void)
 	clockevent_32k_timer.min_delta_ns =
 		clockevent_delta2ns(1, &clockevent_32k_timer);
 
-	clockevent_32k_timer.cpumask = cpumask_of(0);
+	clockevent_32k_timer.cpumask = cpumask_of_cpu(0);
 	clockevents_register_device(&clockevent_32k_timer);
 }
 
diff --git a/trunk/arch/arm/mach-omap2/timer-gp.c b/trunk/arch/arm/mach-omap2/timer-gp.c
index ae6036300f60..589393bedade 100644
--- a/trunk/arch/arm/mach-omap2/timer-gp.c
+++ b/trunk/arch/arm/mach-omap2/timer-gp.c
@@ -120,7 +120,7 @@ static void __init omap2_gp_clockevent_init(void)
 	clockevent_gpt.min_delta_ns =
 		clockevent_delta2ns(1, &clockevent_gpt);
 
-	clockevent_gpt.cpumask = cpumask_of(0);
+	clockevent_gpt.cpumask = cpumask_of_cpu(0);
 	clockevents_register_device(&clockevent_gpt);
 }
 
diff --git a/trunk/arch/arm/mach-pxa/time.c b/trunk/arch/arm/mach-pxa/time.c
index 95656a72268d..001624158519 100644
--- a/trunk/arch/arm/mach-pxa/time.c
+++ b/trunk/arch/arm/mach-pxa/time.c
@@ -122,6 +122,7 @@ static struct clock_event_device ckevt_pxa_osmr0 = {
 	.features	= CLOCK_EVT_FEAT_ONESHOT,
 	.shift		= 32,
 	.rating		= 200,
+	.cpumask	= CPU_MASK_CPU0,
 	.set_next_event	= pxa_osmr0_set_next_event,
 	.set_mode	= pxa_osmr0_set_mode,
 };
@@ -162,7 +163,6 @@ static void __init pxa_timer_init(void)
 		clockevent_delta2ns(0x7fffffff, &ckevt_pxa_osmr0);
 	ckevt_pxa_osmr0.min_delta_ns =
 		clockevent_delta2ns(MIN_OSCR_DELTA * 2, &ckevt_pxa_osmr0) + 1;
-	ckevt_pxa_osmr0.cpumask = cpumask_of(0);
 
 	cksrc_pxa_oscr0.mult =
 		clocksource_hz2mult(clock_tick_rate, cksrc_pxa_oscr0.shift);
diff --git a/trunk/arch/arm/mach-realview/core.c b/trunk/arch/arm/mach-realview/core.c
index bd2aa4f16141..5f1d55963ced 100644
--- a/trunk/arch/arm/mach-realview/core.c
+++ b/trunk/arch/arm/mach-realview/core.c
@@ -624,7 +624,7 @@ static struct clock_event_device timer0_clockevent =	 {
 	.set_mode	= timer_set_mode,
 	.set_next_event	= timer_set_next_event,
 	.rating		= 300,
-	.cpumask	= cpu_all_mask,
+	.cpumask	= CPU_MASK_ALL,
 };
 
 static void __init realview_clockevents_init(unsigned int timer_irq)
diff --git a/trunk/arch/arm/mach-realview/localtimer.c b/trunk/arch/arm/mach-realview/localtimer.c
index 67d6d9cc68b2..9019ef2e5611 100644
--- a/trunk/arch/arm/mach-realview/localtimer.c
+++ b/trunk/arch/arm/mach-realview/localtimer.c
@@ -154,7 +154,7 @@ void __cpuinit local_timer_setup(void)
 	clk->set_mode		= local_timer_set_mode;
 	clk->set_next_event	= local_timer_set_next_event;
 	clk->irq		= IRQ_LOCALTIMER;
-	clk->cpumask		= cpumask_of(cpu);
+	clk->cpumask		= cpumask_of_cpu(cpu);
 	clk->shift		= 20;
 	clk->mult		= div_sc(mpcore_timer_rate, NSEC_PER_SEC, clk->shift);
 	clk->max_delta_ns	= clockevent_delta2ns(0xffffffff, clk);
@@ -193,7 +193,7 @@ void __cpuinit local_timer_setup(void)
 	clk->rating		= 200;
 	clk->set_mode		= dummy_timer_set_mode;
 	clk->broadcast		= smp_timer_broadcast;
-	clk->cpumask		= cpumask_of(cpu);
+	clk->cpumask		= cpumask_of_cpu(cpu);
 
 	clockevents_register_device(clk);
 }
diff --git a/trunk/arch/arm/mach-sa1100/time.c b/trunk/arch/arm/mach-sa1100/time.c
index 711c0295c66f..8c5e727f3b75 100644
--- a/trunk/arch/arm/mach-sa1100/time.c
+++ b/trunk/arch/arm/mach-sa1100/time.c
@@ -73,6 +73,7 @@ static struct clock_event_device ckevt_sa1100_osmr0 = {
 	.features	= CLOCK_EVT_FEAT_ONESHOT,
 	.shift		= 32,
 	.rating		= 200,
+	.cpumask	= CPU_MASK_CPU0,
 	.set_next_event	= sa1100_osmr0_set_next_event,
 	.set_mode	= sa1100_osmr0_set_mode,
 };
@@ -109,7 +110,6 @@ static void __init sa1100_timer_init(void)
 		clockevent_delta2ns(0x7fffffff, &ckevt_sa1100_osmr0);
 	ckevt_sa1100_osmr0.min_delta_ns =
 		clockevent_delta2ns(MIN_OSCR_DELTA * 2, &ckevt_sa1100_osmr0) + 1;
-	ckevt_sa1100_osmr0.cpumask = cpumask_of(0);
 
 	cksrc_sa1100_oscr.mult =
 		clocksource_hz2mult(CLOCK_TICK_RATE, cksrc_sa1100_oscr.shift);
diff --git a/trunk/arch/arm/mach-versatile/core.c b/trunk/arch/arm/mach-versatile/core.c
index 1c43494f5c42..df25aa138509 100644
--- a/trunk/arch/arm/mach-versatile/core.c
+++ b/trunk/arch/arm/mach-versatile/core.c
@@ -1005,7 +1005,7 @@ static void __init versatile_timer_init(void)
 	timer0_clockevent.min_delta_ns =
 		clockevent_delta2ns(0xf, &timer0_clockevent);
 
-	timer0_clockevent.cpumask = cpumask_of(0);
+	timer0_clockevent.cpumask = cpumask_of_cpu(0);
 	clockevents_register_device(&timer0_clockevent);
 }
 
diff --git a/trunk/arch/arm/oprofile/op_model_mpcore.c b/trunk/arch/arm/oprofile/op_model_mpcore.c
index 6d6bd5899240..4de366e8b4c5 100644
--- a/trunk/arch/arm/oprofile/op_model_mpcore.c
+++ b/trunk/arch/arm/oprofile/op_model_mpcore.c
@@ -260,10 +260,10 @@ static void em_stop(void)
 static void em_route_irq(int irq, unsigned int cpu)
 {
 	struct irq_desc *desc = irq_desc + irq;
-	const struct cpumask *mask = cpumask_of(cpu);
+	cpumask_t mask = cpumask_of_cpu(cpu);
 
 	spin_lock_irq(&desc->lock);
-	desc->affinity = *mask;
+	desc->affinity = mask;
 	desc->chip->set_affinity(irq, mask);
 	spin_unlock_irq(&desc->lock);
 }
diff --git a/trunk/arch/arm/plat-mxc/time.c b/trunk/arch/arm/plat-mxc/time.c
index 758a1293bcfa..fd28f5194f71 100644
--- a/trunk/arch/arm/plat-mxc/time.c
+++ b/trunk/arch/arm/plat-mxc/time.c
@@ -190,7 +190,7 @@ static int __init mxc_clockevent_init(void)
 	clockevent_mxc.min_delta_ns =
 			clockevent_delta2ns(0xff, &clockevent_mxc);
 
-	clockevent_mxc.cpumask = cpumask_of(0);
+	clockevent_mxc.cpumask = cpumask_of_cpu(0);
 
 	clockevents_register_device(&clockevent_mxc);
 
diff --git a/trunk/arch/arm/plat-orion/time.c b/trunk/arch/arm/plat-orion/time.c
index 6fa2923e6dca..544d6b327f3a 100644
--- a/trunk/arch/arm/plat-orion/time.c
+++ b/trunk/arch/arm/plat-orion/time.c
@@ -149,6 +149,7 @@ static struct clock_event_device orion_clkevt = {
 	.features	= CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC,
 	.shift		= 32,
 	.rating		= 300,
+	.cpumask	= CPU_MASK_CPU0,
 	.set_next_event	= orion_clkevt_next_event,
 	.set_mode	= orion_clkevt_mode,
 };
@@ -198,6 +199,5 @@ void __init orion_time_init(unsigned int irq, unsigned int tclk)
 	orion_clkevt.mult = div_sc(tclk, NSEC_PER_SEC, orion_clkevt.shift);
 	orion_clkevt.max_delta_ns = clockevent_delta2ns(0xfffffffe, &orion_clkevt);
 	orion_clkevt.min_delta_ns = clockevent_delta2ns(1, &orion_clkevt);
-	orion_clkevt.cpumask = cpumask_of(0);
 	clockevents_register_device(&orion_clkevt);
 }
diff --git a/trunk/arch/avr32/kernel/time.c b/trunk/arch/avr32/kernel/time.c
index 0ff46bf873b0..283481d74a5b 100644
--- a/trunk/arch/avr32/kernel/time.c
+++ b/trunk/arch/avr32/kernel/time.c
@@ -106,6 +106,7 @@ static struct clock_event_device comparator = {
 	.features	= CLOCK_EVT_FEAT_ONESHOT,
 	.shift		= 16,
 	.rating		= 50,
+	.cpumask	= CPU_MASK_CPU0,
 	.set_next_event	= comparator_next_event,
 	.set_mode	= comparator_mode,
 };
@@ -133,7 +134,6 @@ void __init time_init(void)
 	comparator.mult = div_sc(counter_hz, NSEC_PER_SEC, comparator.shift);
 	comparator.max_delta_ns = clockevent_delta2ns((u32)~0, &comparator);
 	comparator.min_delta_ns = clockevent_delta2ns(50, &comparator) + 1;
-	comparator.cpumask = cpumask_of(0);
 
 	sysreg_write(COMPARE, 0);
 	timer_irqaction.dev_id = &comparator;
diff --git a/trunk/arch/blackfin/kernel/time-ts.c b/trunk/arch/blackfin/kernel/time-ts.c
index 0ed2badfd746..e887efc86c29 100644
--- a/trunk/arch/blackfin/kernel/time-ts.c
+++ b/trunk/arch/blackfin/kernel/time-ts.c
@@ -162,6 +162,7 @@ static struct clock_event_device clockevent_bfin = {
 	.name		= "bfin_core_timer",
 	.features	= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
 	.shift		= 32,
+	.cpumask	= CPU_MASK_CPU0,
 	.set_next_event = bfin_timer_set_next_event,
 	.set_mode	= bfin_timer_set_mode,
 };
@@ -192,7 +193,6 @@ static int __init bfin_clockevent_init(void)
 	clockevent_bfin.mult = div_sc(timer_clk, NSEC_PER_SEC, clockevent_bfin.shift);
 	clockevent_bfin.max_delta_ns = clockevent_delta2ns(-1, &clockevent_bfin);
 	clockevent_bfin.min_delta_ns = clockevent_delta2ns(100, &clockevent_bfin);
-	clockevent_bfin.cpumask = cpumask_of(0);
 	clockevents_register_device(&clockevent_bfin);
 
 	return 0;
diff --git a/trunk/arch/cris/arch-v32/kernel/irq.c b/trunk/arch/cris/arch-v32/kernel/irq.c
index 295131fee710..173c141ac9ba 100644
--- a/trunk/arch/cris/arch-v32/kernel/irq.c
+++ b/trunk/arch/cris/arch-v32/kernel/irq.c
@@ -325,11 +325,11 @@ static void end_crisv32_irq(unsigned int irq)
 {
 }
 
-void set_affinity_crisv32_irq(unsigned int irq, const struct cpumask *dest)
+void set_affinity_crisv32_irq(unsigned int irq, cpumask_t dest)
 {
 	unsigned long flags;
 	spin_lock_irqsave(&irq_lock, flags);
-	irq_allocations[irq - FIRST_IRQ].mask = *dest;
+	irq_allocations[irq - FIRST_IRQ].mask = dest;
 	spin_unlock_irqrestore(&irq_lock, flags);
 }
 
diff --git a/trunk/arch/cris/arch-v32/kernel/smp.c b/trunk/arch/cris/arch-v32/kernel/smp.c
index 9dac17334640..52e16c6436f9 100644
--- a/trunk/arch/cris/arch-v32/kernel/smp.c
+++ b/trunk/arch/cris/arch-v32/kernel/smp.c
@@ -29,7 +29,11 @@
 spinlock_t cris_atomic_locks[] = { [0 ... LOCK_COUNT - 1] = SPIN_LOCK_UNLOCKED};
 
 /* CPU masks */
+cpumask_t cpu_online_map = CPU_MASK_NONE;
+EXPORT_SYMBOL(cpu_online_map);
 cpumask_t phys_cpu_present_map = CPU_MASK_NONE;
+cpumask_t cpu_possible_map;
+EXPORT_SYMBOL(cpu_possible_map);
 EXPORT_SYMBOL(phys_cpu_present_map);
 
 /* Variables used during SMP boot */
diff --git a/trunk/arch/cris/include/asm/smp.h b/trunk/arch/cris/include/asm/smp.h
index c615a06dd757..dba33aba3e95 100644
--- a/trunk/arch/cris/include/asm/smp.h
+++ b/trunk/arch/cris/include/asm/smp.h
@@ -4,6 +4,7 @@
 #include <linux/cpumask.h>
 
 extern cpumask_t phys_cpu_present_map;
+extern cpumask_t cpu_possible_map;
 
 #define raw_smp_processor_id() (current_thread_info()->cpu)
 
diff --git a/trunk/arch/ia64/hp/sim/hpsim_irq.c b/trunk/arch/ia64/hp/sim/hpsim_irq.c
index cc0a3182db3c..c2f58ff364e7 100644
--- a/trunk/arch/ia64/hp/sim/hpsim_irq.c
+++ b/trunk/arch/ia64/hp/sim/hpsim_irq.c
@@ -22,7 +22,7 @@ hpsim_irq_noop (unsigned int irq)
 }
 
 static void
-hpsim_set_affinity_noop(unsigned int a, const struct cpumask *b)
+hpsim_set_affinity_noop (unsigned int a, cpumask_t b)
 {
 }
 
diff --git a/trunk/arch/ia64/include/asm/kvm.h b/trunk/arch/ia64/include/asm/kvm.h
index 68aa6da807c1..f38472ac2267 100644
--- a/trunk/arch/ia64/include/asm/kvm.h
+++ b/trunk/arch/ia64/include/asm/kvm.h
@@ -166,6 +166,8 @@ struct saved_vpd {
 };
 
 struct kvm_regs {
+	char *saved_guest;
+	char *saved_stack;
 	struct saved_vpd vpd;
 	/*Arch-regs*/
 	int mp_state;
@@ -198,10 +200,6 @@ struct kvm_regs {
 	unsigned long fp_psr;       /*used for lazy float register */
 	unsigned long saved_gp;
 	/*for phycial  emulation */
-
-	union context saved_guest;
-
-	unsigned long reserved[64];	/* for future use */
 };
 
 struct kvm_sregs {
diff --git a/trunk/arch/ia64/include/asm/kvm_host.h b/trunk/arch/ia64/include/asm/kvm_host.h
index 0560f3fae538..c60d324da540 100644
--- a/trunk/arch/ia64/include/asm/kvm_host.h
+++ b/trunk/arch/ia64/include/asm/kvm_host.h
@@ -23,6 +23,17 @@
 #ifndef __ASM_KVM_HOST_H
 #define __ASM_KVM_HOST_H
 
+
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/kvm.h>
+#include <linux/kvm_para.h>
+#include <linux/kvm_types.h>
+
+#include <asm/pal.h>
+#include <asm/sal.h>
+
+#define KVM_MAX_VCPUS 4
 #define KVM_MEMORY_SLOTS 32
 /* memory slots that does not exposed to userspace */
 #define KVM_PRIVATE_MEM_SLOTS 4
@@ -39,132 +50,70 @@
 #define EXIT_REASON_EXTERNAL_INTERRUPT	6
 #define EXIT_REASON_IPI			7
 #define EXIT_REASON_PTC_G		8
-#define EXIT_REASON_DEBUG		20
 
 /*Define vmm address space and vm data space.*/
-#define KVM_VMM_SIZE (__IA64_UL_CONST(16)<<20)
+#define KVM_VMM_SIZE (16UL<<20)
 #define KVM_VMM_SHIFT 24
-#define KVM_VMM_BASE 0xD000000000000000
-#define VMM_SIZE (__IA64_UL_CONST(8)<<20)
+#define KVM_VMM_BASE 0xD000000000000000UL
+#define VMM_SIZE (8UL<<20)
 
 /*
  * Define vm_buffer, used by PAL Services, base address.
- * Note: vm_buffer is in the VMM-BLOCK, the size must be < 8M
+ * Note: vmbuffer is in the VMM-BLOCK, the size must be < 8M
  */
 #define KVM_VM_BUFFER_BASE (KVM_VMM_BASE + VMM_SIZE)
-#define KVM_VM_BUFFER_SIZE (__IA64_UL_CONST(8)<<20)
-
-/*
- * kvm guest's data area looks as follow:
- *
- *            +----------------------+	-------	KVM_VM_DATA_SIZE
- *	      |	    vcpu[n]'s data   |	 |     ___________________KVM_STK_OFFSET
- *     	      |			     |	 |    /			  |
- *     	      |	       ..........    |	 |   /vcpu's struct&stack |
- *     	      |	       ..........    |	 |  /---------------------|---- 0
- *	      |	    vcpu[5]'s data   |	 | /	   vpd		  |
- *	      |	    vcpu[4]'s data   |	 |/-----------------------|
- *	      |	    vcpu[3]'s data   |	 /	   vtlb		  |
- *	      |	    vcpu[2]'s data   |	/|------------------------|
- *	      |	    vcpu[1]'s data   |/  |	   vhpt		  |
- *	      |	    vcpu[0]'s data   |____________________________|
- *            +----------------------+	 |
- *	      |	   memory dirty log  |	 |
- *            +----------------------+	 |
- *	      |	   vm's data struct  |	 |
- *            +----------------------+	 |
- *	      |			     |	 |
- *	      |			     |	 |
- *	      |			     |	 |
- *	      |			     |	 |
- *	      |			     |	 |
- *	      |			     |	 |
- *	      |			     |	 |
- *	      |	  vm's p2m table  |	 |
- *	      |			     |	 |
- *            |			     |	 |
- *	      |			     |	 |  |
- * vm's data->|			     |   |  |
- *	      +----------------------+ ------- 0
- * To support large memory, needs to increase the size of p2m.
- * To support more vcpus, needs to ensure it has enough space to
- * hold vcpus' data.
- */
-
-#define KVM_VM_DATA_SHIFT	26
-#define KVM_VM_DATA_SIZE	(__IA64_UL_CONST(1) << KVM_VM_DATA_SHIFT)
-#define KVM_VM_DATA_BASE	(KVM_VMM_BASE + KVM_VM_DATA_SIZE)
-
-#define KVM_P2M_BASE		KVM_VM_DATA_BASE
-#define KVM_P2M_SIZE		(__IA64_UL_CONST(24) << 20)
-
-#define VHPT_SHIFT		16
-#define VHPT_SIZE		(__IA64_UL_CONST(1) << VHPT_SHIFT)
-#define VHPT_NUM_ENTRIES	(__IA64_UL_CONST(1) << (VHPT_SHIFT-5))
-
-#define VTLB_SHIFT		16
-#define VTLB_SIZE		(__IA64_UL_CONST(1) << VTLB_SHIFT)
-#define VTLB_NUM_ENTRIES	(1UL << (VHPT_SHIFT-5))
-
-#define VPD_SHIFT		16
-#define VPD_SIZE		(__IA64_UL_CONST(1) << VPD_SHIFT)
-
-#define VCPU_STRUCT_SHIFT	16
-#define VCPU_STRUCT_SIZE	(__IA64_UL_CONST(1) << VCPU_STRUCT_SHIFT)
-
-#define KVM_STK_OFFSET		VCPU_STRUCT_SIZE
-
-#define KVM_VM_STRUCT_SHIFT	19
-#define KVM_VM_STRUCT_SIZE	(__IA64_UL_CONST(1) << KVM_VM_STRUCT_SHIFT)
-
-#define KVM_MEM_DIRY_LOG_SHIFT	19
-#define KVM_MEM_DIRTY_LOG_SIZE (__IA64_UL_CONST(1) << KVM_MEM_DIRY_LOG_SHIFT)
-
-#ifndef __ASSEMBLY__
-
-/*Define the max vcpus and memory for Guests.*/
-#define KVM_MAX_VCPUS	(KVM_VM_DATA_SIZE - KVM_P2M_SIZE - KVM_VM_STRUCT_SIZE -\
-			KVM_MEM_DIRTY_LOG_SIZE) / sizeof(struct kvm_vcpu_data)
-#define KVM_MAX_MEM_SIZE (KVM_P2M_SIZE >> 3 << PAGE_SHIFT)
-
-#define VMM_LOG_LEN 256
-
-#include <linux/types.h>
-#include <linux/mm.h>
-#include <linux/kvm.h>
-#include <linux/kvm_para.h>
-#include <linux/kvm_types.h>
-
-#include <asm/pal.h>
-#include <asm/sal.h>
-#include <asm/page.h>
-
-struct kvm_vcpu_data {
-	char vcpu_vhpt[VHPT_SIZE];
-	char vcpu_vtlb[VTLB_SIZE];
-	char vcpu_vpd[VPD_SIZE];
-	char vcpu_struct[VCPU_STRUCT_SIZE];
-};
-
-struct kvm_vm_data {
-	char kvm_p2m[KVM_P2M_SIZE];
-	char kvm_vm_struct[KVM_VM_STRUCT_SIZE];
-	char kvm_mem_dirty_log[KVM_MEM_DIRTY_LOG_SIZE];
-	struct kvm_vcpu_data vcpu_data[KVM_MAX_VCPUS];
-};
-
-#define VCPU_BASE(n)	KVM_VM_DATA_BASE + \
-				offsetof(struct kvm_vm_data, vcpu_data[n])
-#define VM_BASE		KVM_VM_DATA_BASE + \
-				offsetof(struct kvm_vm_data, kvm_vm_struct)
-#define KVM_MEM_DIRTY_LOG_BASE	KVM_VM_DATA_BASE + \
-				offsetof(struct kvm_vm_data, kvm_mem_dirty_log)
-
-#define VHPT_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vhpt))
-#define VTLB_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vtlb))
-#define VPD_BASE(n)  (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vpd))
-#define VCPU_STRUCT_BASE(n)	(VCPU_BASE(n) + \
-				offsetof(struct kvm_vcpu_data, vcpu_struct))
+#define KVM_VM_BUFFER_SIZE (8UL<<20)
+
+/*Define Virtual machine data layout.*/
+#define KVM_VM_DATA_SHIFT  24
+#define KVM_VM_DATA_SIZE (1UL << KVM_VM_DATA_SHIFT)
+#define KVM_VM_DATA_BASE (KVM_VMM_BASE + KVM_VMM_SIZE)
+
+
+#define KVM_P2M_BASE    KVM_VM_DATA_BASE
+#define KVM_P2M_OFS     0
+#define KVM_P2M_SIZE    (8UL << 20)
+
+#define KVM_VHPT_BASE   (KVM_P2M_BASE + KVM_P2M_SIZE)
+#define KVM_VHPT_OFS    KVM_P2M_SIZE
+#define KVM_VHPT_BLOCK_SIZE   (2UL << 20)
+#define VHPT_SHIFT      18
+#define VHPT_SIZE       (1UL << VHPT_SHIFT)
+#define VHPT_NUM_ENTRIES (1<<(VHPT_SHIFT-5))
+
+#define KVM_VTLB_BASE   (KVM_VHPT_BASE+KVM_VHPT_BLOCK_SIZE)
+#define KVM_VTLB_OFS    (KVM_VHPT_OFS+KVM_VHPT_BLOCK_SIZE)
+#define KVM_VTLB_BLOCK_SIZE   (1UL<<20)
+#define VTLB_SHIFT      17
+#define VTLB_SIZE       (1UL<<VTLB_SHIFT)
+#define VTLB_NUM_ENTRIES (1<<(VTLB_SHIFT-5))
+
+#define KVM_VPD_BASE   (KVM_VTLB_BASE+KVM_VTLB_BLOCK_SIZE)
+#define KVM_VPD_OFS    (KVM_VTLB_OFS+KVM_VTLB_BLOCK_SIZE)
+#define KVM_VPD_BLOCK_SIZE   (2UL<<20)
+#define VPD_SHIFT       16
+#define VPD_SIZE        (1UL<<VPD_SHIFT)
+
+#define KVM_VCPU_BASE   (KVM_VPD_BASE+KVM_VPD_BLOCK_SIZE)
+#define KVM_VCPU_OFS    (KVM_VPD_OFS+KVM_VPD_BLOCK_SIZE)
+#define KVM_VCPU_BLOCK_SIZE   (2UL<<20)
+#define VCPU_SHIFT 18
+#define VCPU_SIZE (1UL<<VCPU_SHIFT)
+#define MAX_VCPU_NUM KVM_VCPU_BLOCK_SIZE/VCPU_SIZE
+
+#define KVM_VM_BASE     (KVM_VCPU_BASE+KVM_VCPU_BLOCK_SIZE)
+#define KVM_VM_OFS      (KVM_VCPU_OFS+KVM_VCPU_BLOCK_SIZE)
+#define KVM_VM_BLOCK_SIZE     (1UL<<19)
+
+#define KVM_MEM_DIRTY_LOG_BASE (KVM_VM_BASE+KVM_VM_BLOCK_SIZE)
+#define KVM_MEM_DIRTY_LOG_OFS  (KVM_VM_OFS+KVM_VM_BLOCK_SIZE)
+#define KVM_MEM_DIRTY_LOG_SIZE (1UL<<19)
+
+/* Get vpd, vhpt, tlb, vcpu, base*/
+#define VPD_ADDR(n) (KVM_VPD_BASE+n*VPD_SIZE)
+#define VHPT_ADDR(n) (KVM_VHPT_BASE+n*VHPT_SIZE)
+#define VTLB_ADDR(n) (KVM_VTLB_BASE+n*VTLB_SIZE)
+#define VCPU_ADDR(n) (KVM_VCPU_BASE+n*VCPU_SIZE)
 
 /*IO section definitions*/
 #define IOREQ_READ      1
@@ -440,7 +389,6 @@ struct kvm_vcpu_arch {
 
 	unsigned long opcode;
 	unsigned long cause;
-	char log_buf[VMM_LOG_LEN];
 	union context host;
 	union context guest;
 };
@@ -455,13 +403,14 @@ struct kvm_sal_data {
 };
 
 struct kvm_arch {
-	spinlock_t dirty_log_lock;
-
 	unsigned long	vm_base;
 	unsigned long	metaphysical_rr0;
 	unsigned long	metaphysical_rr4;
 	unsigned long	vmm_init_rr;
-
+	unsigned long	vhpt_base;
+	unsigned long	vtlb_base;
+	unsigned long 	vpd_base;
+	spinlock_t dirty_log_lock;
 	struct kvm_ioapic *vioapic;
 	struct kvm_vm_stat stat;
 	struct kvm_sal_data rdv_sal_data;
@@ -563,7 +512,7 @@ struct kvm_pt_regs {
 
 static inline struct kvm_pt_regs *vcpu_regs(struct kvm_vcpu *v)
 {
-	return (struct kvm_pt_regs *) ((unsigned long) v + KVM_STK_OFFSET) - 1;
+	return (struct kvm_pt_regs *) ((unsigned long) v + IA64_STK_OFFSET) - 1;
 }
 
 typedef int kvm_vmm_entry(void);
@@ -582,6 +531,5 @@ int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run);
 void kvm_sal_emul(struct kvm_vcpu *vcpu);
 
 static inline void kvm_inject_nmi(struct kvm_vcpu *vcpu) {}
-#endif /* __ASSEMBLY__*/
 
 #endif
diff --git a/trunk/arch/ia64/include/asm/smp.h b/trunk/arch/ia64/include/asm/smp.h
index 21c402365d0e..12d96e0cd513 100644
--- a/trunk/arch/ia64/include/asm/smp.h
+++ b/trunk/arch/ia64/include/asm/smp.h
@@ -57,6 +57,7 @@ extern struct smp_boot_data {
 
 extern char no_int_routing __devinitdata;
 
+extern cpumask_t cpu_online_map;
 extern cpumask_t cpu_core_map[NR_CPUS];
 DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
 extern int smp_num_siblings;
diff --git a/trunk/arch/ia64/include/asm/topology.h b/trunk/arch/ia64/include/asm/topology.h
index a3cc9f65f954..35bcb641c9e5 100644
--- a/trunk/arch/ia64/include/asm/topology.h
+++ b/trunk/arch/ia64/include/asm/topology.h
@@ -55,6 +55,7 @@
 void build_cpu_to_node_map(void);
 
 #define SD_CPU_INIT (struct sched_domain) {		\
+	.span			= CPU_MASK_NONE,	\
 	.parent			= NULL,			\
 	.child			= NULL,			\
 	.groups			= NULL,			\
@@ -79,6 +80,7 @@ void build_cpu_to_node_map(void);
 
 /* sched_domains SD_NODE_INIT for IA64 NUMA machines */
 #define SD_NODE_INIT (struct sched_domain) {		\
+	.span			= CPU_MASK_NONE,	\
 	.parent			= NULL,			\
 	.child			= NULL,			\
 	.groups			= NULL,			\
diff --git a/trunk/arch/ia64/kernel/iosapic.c b/trunk/arch/ia64/kernel/iosapic.c
index c8adecd5b416..5c4674ae8aea 100644
--- a/trunk/arch/ia64/kernel/iosapic.c
+++ b/trunk/arch/ia64/kernel/iosapic.c
@@ -330,25 +330,25 @@ unmask_irq (unsigned int irq)
 
 
 static void
-iosapic_set_affinity(unsigned int irq, const struct cpumask *mask)
+iosapic_set_affinity (unsigned int irq, cpumask_t mask)
 {
 #ifdef CONFIG_SMP
 	u32 high32, low32;
-	int cpu, dest, rte_index;
+	int dest, rte_index;
 	int redir = (irq & IA64_IRQ_REDIRECTED) ? 1 : 0;
 	struct iosapic_rte_info *rte;
 	struct iosapic *iosapic;
 
 	irq &= (~IA64_IRQ_REDIRECTED);
 
-	cpu = cpumask_first_and(cpu_online_mask, mask);
-	if (cpu >= nr_cpu_ids)
+	cpus_and(mask, mask, cpu_online_map);
+	if (cpus_empty(mask))
 		return;
 
-	if (irq_prepare_move(irq, cpu))
+	if (irq_prepare_move(irq, first_cpu(mask)))
 		return;
 
-	dest = cpu_physical_id(cpu);
+	dest = cpu_physical_id(first_cpu(mask));
 
 	if (!iosapic_intr_info[irq].count)
 		return;			/* not an IOSAPIC interrupt */
diff --git a/trunk/arch/ia64/kernel/irq.c b/trunk/arch/ia64/kernel/irq.c
index 0b6db53fedcf..7fd18f54c056 100644
--- a/trunk/arch/ia64/kernel/irq.c
+++ b/trunk/arch/ia64/kernel/irq.c
@@ -133,6 +133,7 @@ unsigned int vectors_in_migration[NR_IRQS];
  */
 static void migrate_irqs(void)
 {
+	cpumask_t	mask;
 	irq_desc_t *desc;
 	int 		irq, new_cpu;
 
@@ -151,14 +152,15 @@ static void migrate_irqs(void)
 		if (desc->status == IRQ_PER_CPU)
 			continue;
 
-		if (cpumask_any_and(&irq_desc[irq].affinity, cpu_online_mask)
-		    >= nr_cpu_ids) {
+		cpus_and(mask, irq_desc[irq].affinity, cpu_online_map);
+		if (any_online_cpu(mask) == NR_CPUS) {
 			/*
 			 * Save it for phase 2 processing
 			 */
 			vectors_in_migration[irq] = irq;
 
 			new_cpu = any_online_cpu(cpu_online_map);
+			mask = cpumask_of_cpu(new_cpu);
 
 			/*
 			 * Al three are essential, currently WARN_ON.. maybe panic?
@@ -166,8 +168,7 @@ static void migrate_irqs(void)
 			if (desc->chip && desc->chip->disable &&
 				desc->chip->enable && desc->chip->set_affinity) {
 				desc->chip->disable(irq);
-				desc->chip->set_affinity(irq,
-							 cpumask_of(new_cpu));
+				desc->chip->set_affinity(irq, mask);
 				desc->chip->enable(irq);
 			} else {
 				WARN_ON((!(desc->chip) || !(desc->chip->disable) ||
diff --git a/trunk/arch/ia64/kernel/msi_ia64.c b/trunk/arch/ia64/kernel/msi_ia64.c
index 890339339035..702a09c13238 100644
--- a/trunk/arch/ia64/kernel/msi_ia64.c
+++ b/trunk/arch/ia64/kernel/msi_ia64.c
@@ -49,12 +49,11 @@
 static struct irq_chip	ia64_msi_chip;
 
 #ifdef CONFIG_SMP
-static void ia64_set_msi_irq_affinity(unsigned int irq,
-				      const cpumask_t *cpu_mask)
+static void ia64_set_msi_irq_affinity(unsigned int irq, cpumask_t cpu_mask)
 {
 	struct msi_msg msg;
 	u32 addr, data;
-	int cpu = first_cpu(*cpu_mask);
+	int cpu = first_cpu(cpu_mask);
 
 	if (!cpu_online(cpu))
 		return;
@@ -167,11 +166,12 @@ void arch_teardown_msi_irq(unsigned int irq)
 
 #ifdef CONFIG_DMAR
 #ifdef CONFIG_SMP
-static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
+static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
 {
 	struct irq_cfg *cfg = irq_cfg + irq;
 	struct msi_msg msg;
-	int cpu = cpumask_first(mask);
+	int cpu = first_cpu(mask);
+
 
 	if (!cpu_online(cpu))
 		return;
@@ -187,7 +187,7 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 	msg.address_lo |= MSI_ADDR_DESTID_CPU(cpu_physical_id(cpu));
 
 	dmar_msi_write(irq, &msg);
-	irq_desc[irq].affinity = *mask;
+	irq_desc[irq].affinity = mask;
 }
 #endif /* CONFIG_SMP */
 
diff --git a/trunk/arch/ia64/kernel/smpboot.c b/trunk/arch/ia64/kernel/smpboot.c
index 11463994a7d5..1dcbb85fc4ee 100644
--- a/trunk/arch/ia64/kernel/smpboot.c
+++ b/trunk/arch/ia64/kernel/smpboot.c
@@ -131,6 +131,12 @@ struct task_struct *task_for_booting_cpu;
  */
 DEFINE_PER_CPU(int, cpu_state);
 
+/* Bitmasks of currently online, and possible CPUs */
+cpumask_t cpu_online_map;
+EXPORT_SYMBOL(cpu_online_map);
+cpumask_t cpu_possible_map = CPU_MASK_NONE;
+EXPORT_SYMBOL(cpu_possible_map);
+
 cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
 EXPORT_SYMBOL(cpu_core_map);
 DEFINE_PER_CPU_SHARED_ALIGNED(cpumask_t, cpu_sibling_map);
@@ -682,7 +688,7 @@ int migrate_platform_irqs(unsigned int cpu)
 {
 	int new_cpei_cpu;
 	irq_desc_t *desc = NULL;
-	const struct cpumask *mask;
+	cpumask_t 	mask;
 	int 		retval = 0;
 
 	/*
@@ -695,7 +701,7 @@ int migrate_platform_irqs(unsigned int cpu)
 			 * Now re-target the CPEI to a different processor
 			 */
 			new_cpei_cpu = any_online_cpu(cpu_online_map);
-			mask = cpumask_of(new_cpei_cpu);
+			mask = cpumask_of_cpu(new_cpei_cpu);
 			set_cpei_target_cpu(new_cpei_cpu);
 			desc = irq_desc + ia64_cpe_irq;
 			/*
diff --git a/trunk/arch/ia64/kernel/topology.c b/trunk/arch/ia64/kernel/topology.c
index a8d61a3e9a94..c75b914f2d6b 100644
--- a/trunk/arch/ia64/kernel/topology.c
+++ b/trunk/arch/ia64/kernel/topology.c
@@ -219,7 +219,7 @@ static ssize_t show_shared_cpu_map(struct cache_info *this_leaf, char *buf)
 	cpumask_t shared_cpu_map;
 
 	cpus_and(shared_cpu_map, this_leaf->shared_cpu_map, cpu_online_map);
-	len = cpumask_scnprintf(buf, NR_CPUS+1, &shared_cpu_map);
+	len = cpumask_scnprintf(buf, NR_CPUS+1, shared_cpu_map);
 	len += sprintf(buf+len, "\n");
 	return len;
 }
diff --git a/trunk/arch/ia64/kvm/Makefile b/trunk/arch/ia64/kvm/Makefile
index 76464dc312e6..92cef66ca268 100644
--- a/trunk/arch/ia64/kvm/Makefile
+++ b/trunk/arch/ia64/kvm/Makefile
@@ -60,7 +60,7 @@ obj-$(CONFIG_KVM) += kvm.o
 
 CFLAGS_vcpu.o += -mfixed-range=f2-f5,f12-f127
 kvm-intel-objs = vmm.o vmm_ivt.o trampoline.o vcpu.o optvfault.o mmio.o \
-	vtlb.o process.o kvm_lib.o
+	vtlb.o process.o
 #Add link memcpy and memset to avoid possible structure assignment error
 kvm-intel-objs += memcpy.o memset.o
 obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/trunk/arch/ia64/kvm/asm-offsets.c b/trunk/arch/ia64/kvm/asm-offsets.c
index 0c3564a7a033..4e3dc13a619c 100644
--- a/trunk/arch/ia64/kvm/asm-offsets.c
+++ b/trunk/arch/ia64/kvm/asm-offsets.c
@@ -24,10 +24,19 @@
 
 #include <linux/autoconf.h>
 #include <linux/kvm_host.h>
-#include <linux/kbuild.h>
 
 #include "vcpu.h"
 
+#define task_struct kvm_vcpu
+
+#define DEFINE(sym, val) \
+	asm volatile("\n->" #sym " (%0) " #val : : "i" (val))
+
+#define BLANK() asm volatile("\n->" : :)
+
+#define OFFSET(_sym, _str, _mem) \
+    DEFINE(_sym, offsetof(_str, _mem));
+
 void foo(void)
 {
 	DEFINE(VMM_TASK_SIZE, sizeof(struct kvm_vcpu));
diff --git a/trunk/arch/ia64/kvm/kvm-ia64.c b/trunk/arch/ia64/kvm/kvm-ia64.c
index 0f5ebd948437..af1464f7a6ad 100644
--- a/trunk/arch/ia64/kvm/kvm-ia64.c
+++ b/trunk/arch/ia64/kvm/kvm-ia64.c
@@ -180,6 +180,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 
 	switch (ext) {
 	case KVM_CAP_IRQCHIP:
+	case KVM_CAP_USER_MEMORY:
 	case KVM_CAP_MP_STATE:
 
 		r = 1;
@@ -438,6 +439,7 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
 		expires = div64_u64(itc_diff, cyc_per_usec);
 		kt = ktime_set(0, 1000 * expires);
 
+		down_read(&vcpu->kvm->slots_lock);
 		vcpu->arch.ht_active = 1;
 		hrtimer_start(p_ht, kt, HRTIMER_MODE_ABS);
 
@@ -450,6 +452,7 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
 			if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
 				vcpu->arch.mp_state =
 					KVM_MP_STATE_RUNNABLE;
+		up_read(&vcpu->kvm->slots_lock);
 
 		if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
 			return -EINTR;
@@ -473,13 +476,6 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu,
 	return 1;
 }
 
-static int handle_vcpu_debug(struct kvm_vcpu *vcpu,
-				struct kvm_run *kvm_run)
-{
-	printk("VMM: %s", vcpu->arch.log_buf);
-	return 1;
-}
-
 static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu,
 		struct kvm_run *kvm_run) = {
 	[EXIT_REASON_VM_PANIC]              = handle_vm_error,
@@ -491,7 +487,6 @@ static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu,
 	[EXIT_REASON_EXTERNAL_INTERRUPT]    = handle_external_interrupt,
 	[EXIT_REASON_IPI]		    = handle_ipi,
 	[EXIT_REASON_PTC_G]		    = handle_global_purge,
-	[EXIT_REASON_DEBUG]		    = handle_vcpu_debug,
 
 };
 
@@ -703,24 +698,27 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	return r;
 }
 
+/*
+ * Allocate 16M memory for every vm to hold its specific data.
+ * Its memory map is defined in kvm_host.h.
+ */
 static struct kvm *kvm_alloc_kvm(void)
 {
 
 	struct kvm *kvm;
 	uint64_t  vm_base;
 
-	BUG_ON(sizeof(struct kvm) > KVM_VM_STRUCT_SIZE);
-
 	vm_base = __get_free_pages(GFP_KERNEL, get_order(KVM_VM_DATA_SIZE));
 
 	if (!vm_base)
 		return ERR_PTR(-ENOMEM);
+	printk(KERN_DEBUG"kvm: VM data's base Address:0x%lx\n", vm_base);
 
+	/* Zero all pages before use! */
 	memset((void *)vm_base, 0, KVM_VM_DATA_SIZE);
-	kvm = (struct kvm *)(vm_base +
-			offsetof(struct kvm_vm_data, kvm_vm_struct));
+
+	kvm = (struct kvm *)(vm_base + KVM_VM_OFS);
 	kvm->arch.vm_base = vm_base;
-	printk(KERN_DEBUG"kvm: vm's data area:0x%lx\n", vm_base);
 
 	return kvm;
 }
@@ -762,12 +760,21 @@ static void kvm_build_io_pmt(struct kvm *kvm)
 
 static void kvm_init_vm(struct kvm *kvm)
 {
+	long vm_base;
+
 	BUG_ON(!kvm);
 
 	kvm->arch.metaphysical_rr0 = GUEST_PHYSICAL_RR0;
 	kvm->arch.metaphysical_rr4 = GUEST_PHYSICAL_RR4;
 	kvm->arch.vmm_init_rr = VMM_INIT_RR;
 
+	vm_base = kvm->arch.vm_base;
+	if (vm_base) {
+		kvm->arch.vhpt_base = vm_base + KVM_VHPT_OFS;
+		kvm->arch.vtlb_base = vm_base + KVM_VTLB_OFS;
+		kvm->arch.vpd_base  = vm_base + KVM_VPD_OFS;
+	}
+
 	/*
 	 *Fill P2M entries for MMIO/IO ranges
 	 */
@@ -831,8 +838,9 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
 
 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 {
-	struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
 	int i;
+	struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
+	int r;
 
 	vcpu_load(vcpu);
 
@@ -849,7 +857,18 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 
 	vpd->vpr = regs->vpd.vpr;
 
-	memcpy(&vcpu->arch.guest, &regs->saved_guest, sizeof(union context));
+	r = -EFAULT;
+	r = copy_from_user(&vcpu->arch.guest, regs->saved_guest,
+						sizeof(union context));
+	if (r)
+		goto out;
+	r = copy_from_user(vcpu + 1, regs->saved_stack +
+			sizeof(struct kvm_vcpu),
+			IA64_STK_OFFSET - sizeof(struct kvm_vcpu));
+	if (r)
+		goto out;
+	vcpu->arch.exit_data =
+		((struct kvm_vcpu *)(regs->saved_stack))->arch.exit_data;
 
 	RESTORE_REGS(mp_state);
 	RESTORE_REGS(vmm_rr);
@@ -883,8 +902,9 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	set_bit(KVM_REQ_RESUME, &vcpu->requests);
 
 	vcpu_put(vcpu);
-
-	return 0;
+	r = 0;
+out:
+	return r;
 }
 
 long kvm_arch_vm_ioctl(struct file *filp,
@@ -1146,11 +1166,10 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 		/*Set entry address for first run.*/
 		regs->cr_iip = PALE_RESET_ENTRY;
 
-		/*Initialize itc offset for vcpus*/
+		/*Initilize itc offset for vcpus*/
 		itc_offset = 0UL - ia64_getreg(_IA64_REG_AR_ITC);
-		for (i = 0; i < KVM_MAX_VCPUS; i++) {
-			v = (struct kvm_vcpu *)((char *)vcpu +
-					sizeof(struct kvm_vcpu_data) * i);
+		for (i = 0; i < MAX_VCPU_NUM; i++) {
+			v = (struct kvm_vcpu *)((char *)vcpu + VCPU_SIZE * i);
 			v->arch.itc_offset = itc_offset;
 			v->arch.last_itc = 0;
 		}
@@ -1164,7 +1183,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 	vcpu->arch.apic->vcpu = vcpu;
 
 	p_ctx->gr[1] = 0;
-	p_ctx->gr[12] = (unsigned long)((char *)vmm_vcpu + KVM_STK_OFFSET);
+	p_ctx->gr[12] = (unsigned long)((char *)vmm_vcpu + IA64_STK_OFFSET);
 	p_ctx->gr[13] = (unsigned long)vmm_vcpu;
 	p_ctx->psr = 0x1008522000UL;
 	p_ctx->ar[40] = FPSR_DEFAULT; /*fpsr*/
@@ -1199,12 +1218,12 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 	vcpu->arch.hlt_timer.function = hlt_timer_fn;
 
 	vcpu->arch.last_run_cpu = -1;
-	vcpu->arch.vpd = (struct vpd *)VPD_BASE(vcpu->vcpu_id);
+	vcpu->arch.vpd = (struct vpd *)VPD_ADDR(vcpu->vcpu_id);
 	vcpu->arch.vsa_base = kvm_vsa_base;
 	vcpu->arch.__gp = kvm_vmm_gp;
 	vcpu->arch.dirty_log_lock_pa = __pa(&kvm->arch.dirty_log_lock);
-	vcpu->arch.vhpt.hash = (struct thash_data *)VHPT_BASE(vcpu->vcpu_id);
-	vcpu->arch.vtlb.hash = (struct thash_data *)VTLB_BASE(vcpu->vcpu_id);
+	vcpu->arch.vhpt.hash = (struct thash_data *)VHPT_ADDR(vcpu->vcpu_id);
+	vcpu->arch.vtlb.hash = (struct thash_data *)VTLB_ADDR(vcpu->vcpu_id);
 	init_ptce_info(vcpu);
 
 	r = 0;
@@ -1254,22 +1273,12 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 	int r;
 	int cpu;
 
-	BUG_ON(sizeof(struct kvm_vcpu) > VCPU_STRUCT_SIZE/2);
-
-	r = -EINVAL;
-	if (id >= KVM_MAX_VCPUS) {
-		printk(KERN_ERR"kvm: Can't configure vcpus > %ld",
-				KVM_MAX_VCPUS);
-		goto fail;
-	}
-
 	r = -ENOMEM;
 	if (!vm_base) {
 		printk(KERN_ERR"kvm: Create vcpu[%d] error!\n", id);
 		goto fail;
 	}
-	vcpu = (struct kvm_vcpu *)(vm_base + offsetof(struct kvm_vm_data,
-					vcpu_data[id].vcpu_struct));
+	vcpu = (struct kvm_vcpu *)(vm_base + KVM_VCPU_OFS + VCPU_SIZE * id);
 	vcpu->kvm = kvm;
 
 	cpu = get_cpu();
@@ -1365,9 +1374,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 {
-	struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
 	int i;
-
+	int r;
+	struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
 	vcpu_load(vcpu);
 
 	for (i = 0; i < 16; i++) {
@@ -1382,8 +1391,14 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	regs->vpd.vpsr = vpd->vpsr;
 	regs->vpd.vpr = vpd->vpr;
 
-	memcpy(&regs->saved_guest, &vcpu->arch.guest, sizeof(union context));
-
+	r = -EFAULT;
+	r = copy_to_user(regs->saved_guest, &vcpu->arch.guest,
+					sizeof(union context));
+	if (r)
+		goto out;
+	r = copy_to_user(regs->saved_stack, (void *)vcpu, IA64_STK_OFFSET);
+	if (r)
+		goto out;
 	SAVE_REGS(mp_state);
 	SAVE_REGS(vmm_rr);
 	memcpy(regs->itrs, vcpu->arch.itrs, sizeof(struct thash_data) * NITRS);
@@ -1411,9 +1426,10 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	SAVE_REGS(metaphysical_saved_rr4);
 	SAVE_REGS(fp_psr);
 	SAVE_REGS(saved_gp);
-
 	vcpu_put(vcpu);
-	return 0;
+	r = 0;
+out:
+	return r;
 }
 
 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
@@ -1441,9 +1457,6 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
 	struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot];
 	unsigned long base_gfn = memslot->base_gfn;
 
-	if (base_gfn + npages > (KVM_MAX_MEM_SIZE >> PAGE_SHIFT))
-		return -ENOMEM;
-
 	for (i = 0; i < npages; i++) {
 		pfn = gfn_to_pfn(kvm, base_gfn + i);
 		if (!kvm_is_mmio_pfn(pfn)) {
@@ -1618,8 +1631,8 @@ static int kvm_ia64_sync_dirty_log(struct kvm *kvm,
 	struct kvm_memory_slot *memslot;
 	int r, i;
 	long n, base;
-	unsigned long *dirty_bitmap = (unsigned long *)(kvm->arch.vm_base +
-			offsetof(struct kvm_vm_data, kvm_mem_dirty_log));
+	unsigned long *dirty_bitmap = (unsigned long *)((void *)kvm - KVM_VM_OFS
+					+ KVM_MEM_DIRTY_LOG_OFS);
 
 	r = -EINVAL;
 	if (log->slot >= KVM_MEMORY_SLOTS)
diff --git a/trunk/arch/ia64/kvm/kvm_lib.c b/trunk/arch/ia64/kvm/kvm_lib.c
deleted file mode 100644
index a85cb611ecd7..000000000000
--- a/trunk/arch/ia64/kvm/kvm_lib.c
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * kvm_lib.c: Compile some libraries for kvm-intel module.
- *
- *	Just include kernel's library, and disable symbols export.
- * 	Copyright (C) 2008, Intel Corporation.
- *  	Xiantao Zhang  (xiantao.zhang@intel.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-#undef CONFIG_MODULES
-#include "../../../lib/vsprintf.c"
-#include "../../../lib/ctype.c"
diff --git a/trunk/arch/ia64/kvm/kvm_minstate.h b/trunk/arch/ia64/kvm/kvm_minstate.h
index b2bcaa2787aa..2cc41d17cf99 100644
--- a/trunk/arch/ia64/kvm/kvm_minstate.h
+++ b/trunk/arch/ia64/kvm/kvm_minstate.h
@@ -24,8 +24,6 @@
 #include <asm/asmmacro.h>
 #include <asm/types.h>
 #include <asm/kregs.h>
-#include <asm/kvm_host.h>
-
 #include "asm-offsets.h"
 
 #define KVM_MINSTATE_START_SAVE_MIN	     					\
@@ -35,7 +33,7 @@
 	addl r22 = VMM_RBS_OFFSET,r1;            /* compute base of RBS */	\
 	;;									\
 	lfetch.fault.excl.nt1 [r22];						\
-	addl r1 = KVM_STK_OFFSET-VMM_PT_REGS_SIZE, r1;  \
+	addl r1 = IA64_STK_OFFSET-VMM_PT_REGS_SIZE,r1;  /* compute base of memory stack */  \
 	mov r23 = ar.bspstore;			/* save ar.bspstore */          \
 	;;									\
 	mov ar.bspstore = r22;				/* switch to kernel RBS */\
diff --git a/trunk/arch/ia64/kvm/misc.h b/trunk/arch/ia64/kvm/misc.h
index dd979e00b574..e585c4607344 100644
--- a/trunk/arch/ia64/kvm/misc.h
+++ b/trunk/arch/ia64/kvm/misc.h
@@ -27,8 +27,7 @@
  */
 static inline uint64_t *kvm_host_get_pmt(struct kvm *kvm)
 {
-	return (uint64_t *)(kvm->arch.vm_base +
-				offsetof(struct kvm_vm_data, kvm_p2m));
+	return (uint64_t *)(kvm->arch.vm_base + KVM_P2M_OFS);
 }
 
 static inline void kvm_set_pmt_entry(struct kvm *kvm, gfn_t gfn,
diff --git a/trunk/arch/ia64/kvm/mmio.c b/trunk/arch/ia64/kvm/mmio.c
index 21f63fffc379..7f1a858bc69f 100644
--- a/trunk/arch/ia64/kvm/mmio.c
+++ b/trunk/arch/ia64/kvm/mmio.c
@@ -66,25 +66,31 @@ void lsapic_write(struct kvm_vcpu *v, unsigned long addr,
 
 	switch (addr) {
 	case PIB_OFST_INTA:
-		panic_vm(v, "Undefined write on PIB INTA\n");
+		/*panic_domain(NULL, "Undefined write on PIB INTA\n");*/
+		panic_vm(v);
 		break;
 	case PIB_OFST_XTP:
 		if (length == 1) {
 			vlsapic_write_xtp(v, val);
 		} else {
-			panic_vm(v, "Undefined write on PIB XTP\n");
+			/*panic_domain(NULL,
+			"Undefined write on PIB XTP\n");*/
+			panic_vm(v);
 		}
 		break;
 	default:
 		if (PIB_LOW_HALF(addr)) {
-			/*Lower half */
+			/*lower half */
 			if (length != 8)
-				panic_vm(v, "Can't LHF write with size %ld!\n",
-						length);
+				/*panic_domain(NULL,
+				"Can't LHF write with size %ld!\n",
+				length);*/
+				panic_vm(v);
 			else
 				vlsapic_write_ipi(v, addr, val);
-		} else {   /*Upper half */
-			panic_vm(v, "IPI-UHF write %lx\n", addr);
+		} else {   /*	upper half
+				printk("IPI-UHF write %lx\n",addr);*/
+			panic_vm(v);
 		}
 		break;
 	}
@@ -102,18 +108,22 @@ unsigned long lsapic_read(struct kvm_vcpu *v, unsigned long addr,
 		if (length == 1) /* 1 byte load */
 			; /* There is no i8259, there is no INTA access*/
 		else
-			panic_vm(v, "Undefined read on PIB INTA\n");
+			/*panic_domain(NULL,"Undefined read on PIB INTA\n"); */
+			panic_vm(v);
 
 		break;
 	case PIB_OFST_XTP:
 		if (length == 1) {
 			result = VLSAPIC_XTP(v);
+			/* printk("read xtp %lx\n", result); */
 		} else {
-			panic_vm(v, "Undefined read on PIB XTP\n");
+			/*panic_domain(NULL,
+			"Undefined read on PIB XTP\n");*/
+			panic_vm(v);
 		}
 		break;
 	default:
-		panic_vm(v, "Undefined addr access for lsapic!\n");
+		panic_vm(v);
 		break;
 	}
 	return result;
@@ -152,7 +162,7 @@ static void mmio_access(struct kvm_vcpu *vcpu, u64 src_pa, u64 *dest,
 			/* it's necessary to ensure zero extending */
 			*dest = p->u.ioreq.data & (~0UL >> (64-(s*8)));
 	} else
-		panic_vm(vcpu, "Unhandled mmio access returned!\n");
+		panic_vm(vcpu);
 out:
 	local_irq_restore(psr);
 	return ;
@@ -314,9 +324,7 @@ void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma)
 		return;
 	} else {
 		inst_type = -1;
-		panic_vm(vcpu, "Unsupported MMIO access instruction! \
-				Bunld[0]=0x%lx, Bundle[1]=0x%lx\n",
-				bundle.i64[0], bundle.i64[1]);
+		panic_vm(vcpu);
 	}
 
 	size = 1 << size;
@@ -327,7 +335,7 @@ void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma)
 		if (inst_type == SL_INTEGER)
 			vcpu_set_gr(vcpu, inst.M1.r1, data, 0);
 		else
-			panic_vm(vcpu, "Unsupported instruction type!\n");
+			panic_vm(vcpu);
 
 	}
 	vcpu_increment_iip(vcpu);
diff --git a/trunk/arch/ia64/kvm/process.c b/trunk/arch/ia64/kvm/process.c
index 552d07724207..800817307b7b 100644
--- a/trunk/arch/ia64/kvm/process.c
+++ b/trunk/arch/ia64/kvm/process.c
@@ -527,8 +527,7 @@ void reflect_interruption(u64 ifa, u64 isr, u64 iim,
 	vector = vec2off[vec];
 
 	if (!(vpsr & IA64_PSR_IC) && (vector != IA64_DATA_NESTED_TLB_VECTOR)) {
-		panic_vm(vcpu, "Interruption with vector :0x%lx occurs "
-						"with psr.ic = 0\n", vector);
+		panic_vm(vcpu);
 		return;
 	}
 
@@ -587,7 +586,7 @@ static void set_pal_call_result(struct kvm_vcpu *vcpu)
 		vcpu_set_gr(vcpu, 10, p->u.pal_data.ret.v1, 0);
 		vcpu_set_gr(vcpu, 11, p->u.pal_data.ret.v2, 0);
 	} else
-		panic_vm(vcpu, "Mis-set for exit reason!\n");
+		panic_vm(vcpu);
 }
 
 static void set_sal_call_data(struct kvm_vcpu *vcpu)
@@ -615,7 +614,7 @@ static void set_sal_call_result(struct kvm_vcpu *vcpu)
 		vcpu_set_gr(vcpu, 10, p->u.sal_data.ret.r10, 0);
 		vcpu_set_gr(vcpu, 11, p->u.sal_data.ret.r11, 0);
 	} else
-		panic_vm(vcpu, "Mis-set for exit reason!\n");
+		panic_vm(vcpu);
 }
 
 void  kvm_ia64_handle_break(unsigned long ifa, struct kvm_pt_regs *regs,
@@ -681,7 +680,7 @@ static void generate_exirq(struct kvm_vcpu *vcpu)
 	vpsr = VCPU(vcpu, vpsr);
 	isr = vpsr & IA64_PSR_RI;
 	if (!(vpsr & IA64_PSR_IC))
-		panic_vm(vcpu, "Trying to inject one IRQ with psr.ic=0\n");
+		panic_vm(vcpu);
 	reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */
 }
 
@@ -942,20 +941,8 @@ static void vcpu_do_resume(struct kvm_vcpu *vcpu)
 	ia64_set_pta(vcpu->arch.vhpt.pta.val);
 }
 
-static void vmm_sanity_check(struct kvm_vcpu *vcpu)
-{
-	struct exit_ctl_data *p = &vcpu->arch.exit_data;
-
-	if (!vmm_sanity && p->exit_reason != EXIT_REASON_DEBUG) {
-		panic_vm(vcpu, "Failed to do vmm sanity check,"
-			"it maybe caused by crashed vmm!!\n\n");
-	}
-}
-
 static void kvm_do_resume_op(struct kvm_vcpu *vcpu)
 {
-	vmm_sanity_check(vcpu); /*Guarantee vcpu runing on healthy vmm!*/
-
 	if (test_and_clear_bit(KVM_REQ_RESUME, &vcpu->requests)) {
 		vcpu_do_resume(vcpu);
 		return;
@@ -981,11 +968,3 @@ void vmm_transition(struct kvm_vcpu *vcpu)
 						1, 0, 0, 0, 0, 0);
 	kvm_do_resume_op(vcpu);
 }
-
-void vmm_panic_handler(u64 vec)
-{
-	struct kvm_vcpu *vcpu = current_vcpu;
-	vmm_sanity = 0;
-	panic_vm(vcpu, "Unexpected interruption occurs in VMM, vector:0x%lx\n",
-			vec2off[vec]);
-}
diff --git a/trunk/arch/ia64/kvm/vcpu.c b/trunk/arch/ia64/kvm/vcpu.c
index ecd526b55323..e44027ce5667 100644
--- a/trunk/arch/ia64/kvm/vcpu.c
+++ b/trunk/arch/ia64/kvm/vcpu.c
@@ -816,9 +816,8 @@ static void vcpu_set_itc(struct kvm_vcpu *vcpu, u64 val)
 	unsigned long vitv = VCPU(vcpu, itv);
 
 	if (vcpu->vcpu_id == 0) {
-		for (i = 0; i < KVM_MAX_VCPUS; i++) {
-			v = (struct kvm_vcpu *)((char *)vcpu +
-					sizeof(struct kvm_vcpu_data) * i);
+		for (i = 0; i < MAX_VCPU_NUM; i++) {
+			v = (struct kvm_vcpu *)((char *)vcpu + VCPU_SIZE * i);
 			VMX(v, itc_offset) = itc_offset;
 			VMX(v, last_itc) = 0;
 		}
@@ -1651,8 +1650,7 @@ void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val)
 	 * Otherwise panic
 	 */
 	if (val & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM))
-		panic_vm(vcpu, "Only support guests with vpsr.pk =0 \
-				& vpsr.is=0\n");
+		panic_vm(vcpu);
 
 	/*
 	 * For those IA64_PSR bits: id/da/dd/ss/ed/ia
@@ -2105,7 +2103,7 @@ void kvm_init_all_rr(struct kvm_vcpu *vcpu)
 
 	if (is_physical_mode(vcpu)) {
 		if (vcpu->arch.mode_flags & GUEST_PHY_EMUL)
-			panic_vm(vcpu, "Machine Status conflicts!\n");
+			panic_vm(vcpu);
 
 		ia64_set_rr((VRN0 << VRN_SHIFT), vcpu->arch.metaphysical_rr0);
 		ia64_dv_serialize_data();
@@ -2154,70 +2152,10 @@ int vmm_entry(void)
 	return 0;
 }
 
-static void kvm_show_registers(struct kvm_pt_regs *regs)
-{
-	unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri;
-
-	struct kvm_vcpu *vcpu = current_vcpu;
-	if (vcpu != NULL)
-		printk("vcpu 0x%p vcpu %d\n",
-		       vcpu, vcpu->vcpu_id);
-
-	printk("psr : %016lx ifs : %016lx ip  : [<%016lx>]\n",
-	       regs->cr_ipsr, regs->cr_ifs, ip);
-
-	printk("unat: %016lx pfs : %016lx rsc : %016lx\n",
-	       regs->ar_unat, regs->ar_pfs, regs->ar_rsc);
-	printk("rnat: %016lx bspstore: %016lx pr  : %016lx\n",
-	       regs->ar_rnat, regs->ar_bspstore, regs->pr);
-	printk("ldrs: %016lx ccv : %016lx fpsr: %016lx\n",
-	       regs->loadrs, regs->ar_ccv, regs->ar_fpsr);
-	printk("csd : %016lx ssd : %016lx\n", regs->ar_csd, regs->ar_ssd);
-	printk("b0  : %016lx b6  : %016lx b7  : %016lx\n", regs->b0,
-							regs->b6, regs->b7);
-	printk("f6  : %05lx%016lx f7  : %05lx%016lx\n",
-	       regs->f6.u.bits[1], regs->f6.u.bits[0],
-	       regs->f7.u.bits[1], regs->f7.u.bits[0]);
-	printk("f8  : %05lx%016lx f9  : %05lx%016lx\n",
-	       regs->f8.u.bits[1], regs->f8.u.bits[0],
-	       regs->f9.u.bits[1], regs->f9.u.bits[0]);
-	printk("f10 : %05lx%016lx f11 : %05lx%016lx\n",
-	       regs->f10.u.bits[1], regs->f10.u.bits[0],
-	       regs->f11.u.bits[1], regs->f11.u.bits[0]);
-
-	printk("r1  : %016lx r2  : %016lx r3  : %016lx\n", regs->r1,
-							regs->r2, regs->r3);
-	printk("r8  : %016lx r9  : %016lx r10 : %016lx\n", regs->r8,
-							regs->r9, regs->r10);
-	printk("r11 : %016lx r12 : %016lx r13 : %016lx\n", regs->r11,
-							regs->r12, regs->r13);
-	printk("r14 : %016lx r15 : %016lx r16 : %016lx\n", regs->r14,
-							regs->r15, regs->r16);
-	printk("r17 : %016lx r18 : %016lx r19 : %016lx\n", regs->r17,
-							regs->r18, regs->r19);
-	printk("r20 : %016lx r21 : %016lx r22 : %016lx\n", regs->r20,
-							regs->r21, regs->r22);
-	printk("r23 : %016lx r24 : %016lx r25 : %016lx\n", regs->r23,
-							regs->r24, regs->r25);
-	printk("r26 : %016lx r27 : %016lx r28 : %016lx\n", regs->r26,
-							regs->r27, regs->r28);
-	printk("r29 : %016lx r30 : %016lx r31 : %016lx\n", regs->r29,
-							regs->r30, regs->r31);
-
-}
-
-void panic_vm(struct kvm_vcpu *v, const char *fmt, ...)
-{
-	va_list args;
-	char buf[256];
-
-	struct kvm_pt_regs *regs = vcpu_regs(v);
+void panic_vm(struct kvm_vcpu *v)
+{
 	struct exit_ctl_data *p = &v->arch.exit_data;
-	va_start(args, fmt);
-	vsnprintf(buf, sizeof(buf), fmt, args);
-	va_end(args);
-	printk(buf);
-	kvm_show_registers(regs);
+
 	p->exit_reason = EXIT_REASON_VM_PANIC;
 	vmm_transition(v);
 	/*Never to return*/
diff --git a/trunk/arch/ia64/kvm/vcpu.h b/trunk/arch/ia64/kvm/vcpu.h
index b2f12a562bdf..e9b2a4e121c0 100644
--- a/trunk/arch/ia64/kvm/vcpu.h
+++ b/trunk/arch/ia64/kvm/vcpu.h
@@ -737,12 +737,9 @@ void kvm_init_vtlb(struct kvm_vcpu *v);
 void kvm_init_vhpt(struct kvm_vcpu *v);
 void thash_init(struct thash_cb *hcb, u64 sz);
 
-void panic_vm(struct kvm_vcpu *v, const char *fmt, ...);
+void panic_vm(struct kvm_vcpu *v);
 
 extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, u64 arg3,
 		u64 arg4, u64 arg5, u64 arg6, u64 arg7);
-
-extern long vmm_sanity;
-
 #endif
 #endif	/* __VCPU_H__ */
diff --git a/trunk/arch/ia64/kvm/vmm.c b/trunk/arch/ia64/kvm/vmm.c
index 9eee5c04bacc..2275bf4e681a 100644
--- a/trunk/arch/ia64/kvm/vmm.c
+++ b/trunk/arch/ia64/kvm/vmm.c
@@ -20,7 +20,6 @@
  */
 
 
-#include<linux/kernel.h>
 #include<linux/module.h>
 #include<asm/fpswa.h>
 
@@ -32,8 +31,6 @@ MODULE_LICENSE("GPL");
 extern char kvm_ia64_ivt;
 extern fpswa_interface_t *vmm_fpswa_interface;
 
-long vmm_sanity = 1;
-
 struct kvm_vmm_info vmm_info = {
 	.module	     = THIS_MODULE,
 	.vmm_entry   = vmm_entry,
@@ -65,31 +62,5 @@ void vmm_spin_unlock(spinlock_t *lock)
 {
 	_vmm_raw_spin_unlock(lock);
 }
-
-static void vcpu_debug_exit(struct kvm_vcpu *vcpu)
-{
-	struct exit_ctl_data *p = &vcpu->arch.exit_data;
-	long psr;
-
-	local_irq_save(psr);
-	p->exit_reason = EXIT_REASON_DEBUG;
-	vmm_transition(vcpu);
-	local_irq_restore(psr);
-}
-
-asmlinkage int printk(const char *fmt, ...)
-{
-	struct kvm_vcpu *vcpu = current_vcpu;
-	va_list args;
-	int r;
-
-	memset(vcpu->arch.log_buf, 0, VMM_LOG_LEN);
-	va_start(args, fmt);
-	r = vsnprintf(vcpu->arch.log_buf, VMM_LOG_LEN, fmt, args);
-	va_end(args);
-	vcpu_debug_exit(vcpu);
-	return r;
-}
-
 module_init(kvm_vmm_init)
 module_exit(kvm_vmm_exit)
diff --git a/trunk/arch/ia64/kvm/vmm_ivt.S b/trunk/arch/ia64/kvm/vmm_ivt.S
index 3ef1a017a318..c1d7251a1480 100644
--- a/trunk/arch/ia64/kvm/vmm_ivt.S
+++ b/trunk/arch/ia64/kvm/vmm_ivt.S
@@ -1,5 +1,5 @@
 /*
- * arch/ia64/kvm/vmm_ivt.S
+ * /ia64/kvm_ivt.S
  *
  * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co
  *      Stephane Eranian <eranian@hpl.hp.com>
@@ -70,39 +70,32 @@
 # define PSR_DEFAULT_BITS   0
 #endif
 
+
 #define KVM_FAULT(n)    \
-	kvm_fault_##n:;          \
-	mov r19=n;;          \
-	br.sptk.many kvm_vmm_panic;         \
-	;;                  \
+    kvm_fault_##n:;          \
+    mov r19=n;;          \
+    br.sptk.many kvm_fault_##n;         \
+    ;;                  \
+
 
 #define KVM_REFLECT(n)    \
-	mov r31=pr;           \
-	mov r19=n;       /* prepare to save predicates */ \
-	mov r29=cr.ipsr;      \
-	;;      \
-	tbit.z p6,p7=r29,IA64_PSR_VM_BIT;       \
-(p7)	br.sptk.many kvm_dispatch_reflection;        \
-	br.sptk.many kvm_vmm_panic;      \
-
-GLOBAL_ENTRY(kvm_vmm_panic)
-	KVM_SAVE_MIN_WITH_COVER_R19
-	alloc r14=ar.pfs,0,0,1,0
-	mov out0=r15
-	adds r3=8,r2                // set up second base pointer
-	;;
-	ssm psr.ic
-	;;
-	srlz.i    // guarantee that interruption collection is on
-	;;
-	//(p15) ssm psr.i               // restore psr.i
-	addl r14=@gprel(ia64_leave_hypervisor),gp
-	;;
-	KVM_SAVE_REST
-	mov rp=r14
-	;;
-	br.call.sptk.many b6=vmm_panic_handler;
-END(kvm_vmm_panic)
+    mov r31=pr;           \
+    mov r19=n;       /* prepare to save predicates */ \
+    mov r29=cr.ipsr;      \
+    ;;      \
+    tbit.z p6,p7=r29,IA64_PSR_VM_BIT;       \
+(p7)br.sptk.many kvm_dispatch_reflection;        \
+    br.sptk.many kvm_panic;      \
+
+
+GLOBAL_ENTRY(kvm_panic)
+    br.sptk.many kvm_panic
+    ;;
+END(kvm_panic)
+
+
+
+
 
     .section .text.ivt,"ax"
 
@@ -112,307 +105,308 @@ kvm_ia64_ivt:
 ///////////////////////////////////////////////////////////////
 // 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47)
 ENTRY(kvm_vhpt_miss)
-	KVM_FAULT(0)
+    KVM_FAULT(0)
 END(kvm_vhpt_miss)
 
+
     .org kvm_ia64_ivt+0x400
 ////////////////////////////////////////////////////////////////
 // 0x0400 Entry 1 (size 64 bundles) ITLB (21)
 ENTRY(kvm_itlb_miss)
-	mov r31 = pr
-	mov r29=cr.ipsr;
-	;;
-	tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
-(p6)	br.sptk kvm_alt_itlb_miss
-	mov r19 = 1
-	br.sptk kvm_itlb_miss_dispatch
-	KVM_FAULT(1);
+    mov r31 = pr
+    mov r29=cr.ipsr;
+    ;;
+    tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
+    (p6) br.sptk kvm_alt_itlb_miss
+    mov r19 = 1
+    br.sptk kvm_itlb_miss_dispatch
+    KVM_FAULT(1);
 END(kvm_itlb_miss)
 
     .org kvm_ia64_ivt+0x0800
 //////////////////////////////////////////////////////////////////
 // 0x0800 Entry 2 (size 64 bundles) DTLB (9,48)
 ENTRY(kvm_dtlb_miss)
-	mov r31 = pr
-	mov r29=cr.ipsr;
-	;;
-	tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
-(p6)	br.sptk kvm_alt_dtlb_miss
-	br.sptk kvm_dtlb_miss_dispatch
+    mov r31 = pr
+    mov r29=cr.ipsr;
+    ;;
+    tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
+(p6)br.sptk kvm_alt_dtlb_miss
+    br.sptk kvm_dtlb_miss_dispatch
 END(kvm_dtlb_miss)
 
      .org kvm_ia64_ivt+0x0c00
 ////////////////////////////////////////////////////////////////////
 // 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19)
 ENTRY(kvm_alt_itlb_miss)
-	mov r16=cr.ifa    // get address that caused the TLB miss
-	;;
-	movl r17=PAGE_KERNEL
-	mov r24=cr.ipsr
-	movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
-	;;
-	and r19=r19,r16     // clear ed, reserved bits, and PTE control bits
-	;;
-	or r19=r17,r19      // insert PTE control bits into r19
-	;;
-	movl r20=IA64_GRANULE_SHIFT<<2
-	;;
-	mov cr.itir=r20
-	;;
-	itc.i r19		// insert the TLB entry
-	mov pr=r31,-1
-	rfi
+    mov r16=cr.ifa    // get address that caused the TLB miss
+    ;;
+    movl r17=PAGE_KERNEL
+    mov r24=cr.ipsr
+    movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
+    ;;
+    and r19=r19,r16     // clear ed, reserved bits, and PTE control bits
+    ;;
+    or r19=r17,r19      // insert PTE control bits into r19
+    ;;
+    movl r20=IA64_GRANULE_SHIFT<<2
+    ;;
+    mov cr.itir=r20
+    ;;
+    itc.i r19		// insert the TLB entry
+    mov pr=r31,-1
+    rfi
 END(kvm_alt_itlb_miss)
 
     .org kvm_ia64_ivt+0x1000
 /////////////////////////////////////////////////////////////////////
 // 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46)
 ENTRY(kvm_alt_dtlb_miss)
-	mov r16=cr.ifa		// get address that caused the TLB miss
-	;;
-	movl r17=PAGE_KERNEL
-	movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
-	mov r24=cr.ipsr
-	;;
-	and r19=r19,r16     // clear ed, reserved bits, and PTE control bits
-	;;
-	or r19=r19,r17	// insert PTE control bits into r19
-	;;
-	movl r20=IA64_GRANULE_SHIFT<<2
-	;;
-	mov cr.itir=r20
-	;;
-	itc.d r19		// insert the TLB entry
-	mov pr=r31,-1
-	rfi
+    mov r16=cr.ifa		// get address that caused the TLB miss
+    ;;
+    movl r17=PAGE_KERNEL
+    movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
+    mov r24=cr.ipsr
+    ;;
+    and r19=r19,r16     // clear ed, reserved bits, and PTE control bits
+    ;;
+    or r19=r19,r17	// insert PTE control bits into r19
+    ;;
+    movl r20=IA64_GRANULE_SHIFT<<2
+    ;;
+    mov cr.itir=r20
+    ;;
+    itc.d r19		// insert the TLB entry
+    mov pr=r31,-1
+    rfi
 END(kvm_alt_dtlb_miss)
 
     .org kvm_ia64_ivt+0x1400
 //////////////////////////////////////////////////////////////////////
 // 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45)
 ENTRY(kvm_nested_dtlb_miss)
-	KVM_FAULT(5)
+    KVM_FAULT(5)
 END(kvm_nested_dtlb_miss)
 
     .org kvm_ia64_ivt+0x1800
 /////////////////////////////////////////////////////////////////////
 // 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24)
 ENTRY(kvm_ikey_miss)
-	KVM_REFLECT(6)
+    KVM_REFLECT(6)
 END(kvm_ikey_miss)
 
     .org kvm_ia64_ivt+0x1c00
 /////////////////////////////////////////////////////////////////////
 // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
 ENTRY(kvm_dkey_miss)
-	KVM_REFLECT(7)
+    KVM_REFLECT(7)
 END(kvm_dkey_miss)
 
     .org kvm_ia64_ivt+0x2000
 ////////////////////////////////////////////////////////////////////
 // 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54)
 ENTRY(kvm_dirty_bit)
-	KVM_REFLECT(8)
+    KVM_REFLECT(8)
 END(kvm_dirty_bit)
 
     .org kvm_ia64_ivt+0x2400
 ////////////////////////////////////////////////////////////////////
 // 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27)
 ENTRY(kvm_iaccess_bit)
-	KVM_REFLECT(9)
+    KVM_REFLECT(9)
 END(kvm_iaccess_bit)
 
     .org kvm_ia64_ivt+0x2800
 ///////////////////////////////////////////////////////////////////
 // 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55)
 ENTRY(kvm_daccess_bit)
-	KVM_REFLECT(10)
+    KVM_REFLECT(10)
 END(kvm_daccess_bit)
 
     .org kvm_ia64_ivt+0x2c00
 /////////////////////////////////////////////////////////////////
 // 0x2c00 Entry 11 (size 64 bundles) Break instruction (33)
 ENTRY(kvm_break_fault)
-	mov r31=pr
-	mov r19=11
-	mov r29=cr.ipsr
-	;;
-	KVM_SAVE_MIN_WITH_COVER_R19
-	;;
-	alloc r14=ar.pfs,0,0,4,0 //(must be first in insn group!)
-	mov out0=cr.ifa
-	mov out2=cr.isr     // FIXME: pity to make this slow access twice
-	mov out3=cr.iim     // FIXME: pity to make this slow access twice
-	adds r3=8,r2                // set up second base pointer
-	;;
-	ssm psr.ic
-	;;
-	srlz.i         // guarantee that interruption collection is on
-	;;
-	//(p15)ssm psr.i               // restore psr.i
-	addl r14=@gprel(ia64_leave_hypervisor),gp
-	;;
-	KVM_SAVE_REST
-	mov rp=r14
-	;;
-	adds out1=16,sp
-	br.call.sptk.many b6=kvm_ia64_handle_break
-	;;
+    mov r31=pr
+    mov r19=11
+    mov r29=cr.ipsr
+    ;;
+    KVM_SAVE_MIN_WITH_COVER_R19
+    ;;
+    alloc r14=ar.pfs,0,0,4,0 // now it's safe (must be first in insn group!)
+    mov out0=cr.ifa
+    mov out2=cr.isr     // FIXME: pity to make this slow access twice
+    mov out3=cr.iim     // FIXME: pity to make this slow access twice
+    adds r3=8,r2                // set up second base pointer
+    ;;
+    ssm psr.ic
+    ;;
+    srlz.i                  // guarantee that interruption collection is on
+    ;;
+    //(p15)ssm psr.i               // restore psr.i
+    addl r14=@gprel(ia64_leave_hypervisor),gp
+    ;;
+    KVM_SAVE_REST
+    mov rp=r14
+    ;;
+    adds out1=16,sp
+    br.call.sptk.many b6=kvm_ia64_handle_break
+    ;;
 END(kvm_break_fault)
 
     .org kvm_ia64_ivt+0x3000
 /////////////////////////////////////////////////////////////////
 // 0x3000 Entry 12 (size 64 bundles) External Interrupt (4)
 ENTRY(kvm_interrupt)
-	mov r31=pr		// prepare to save predicates
-	mov r19=12
-	mov r29=cr.ipsr
-	;;
-	tbit.z p6,p7=r29,IA64_PSR_VM_BIT
-	tbit.z p0,p15=r29,IA64_PSR_I_BIT
-	;;
-(p7)	br.sptk kvm_dispatch_interrupt
-	;;
-	mov r27=ar.rsc		/* M */
-	mov r20=r1			/* A */
-	mov r25=ar.unat		/* M */
-	mov r26=ar.pfs		/* I */
-	mov r28=cr.iip		/* M */
-	cover			/* B (or nothing) */
-	;;
-	mov r1=sp
-	;;
-	invala			/* M */
-	mov r30=cr.ifs
-	;;
-	addl r1=-VMM_PT_REGS_SIZE,r1
-	;;
-	adds r17=2*L1_CACHE_BYTES,r1	/* really: biggest cache-line size */
-	adds r16=PT(CR_IPSR),r1
-	;;
-	lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES
-	st8 [r16]=r29			/* save cr.ipsr */
-	;;
-	lfetch.fault.excl.nt1 [r17]
-	mov r29=b0
-	;;
-	adds r16=PT(R8),r1  	/* initialize first base pointer */
-	adds r17=PT(R9),r1  	/* initialize second base pointer */
-	mov r18=r0      		/* make sure r18 isn't NaT */
-	;;
+    mov r31=pr		// prepare to save predicates
+    mov r19=12
+    mov r29=cr.ipsr
+    ;;
+    tbit.z p6,p7=r29,IA64_PSR_VM_BIT
+    tbit.z p0,p15=r29,IA64_PSR_I_BIT
+    ;;
+(p7) br.sptk kvm_dispatch_interrupt
+    ;;
+    mov r27=ar.rsc		/* M */
+    mov r20=r1			/* A */
+    mov r25=ar.unat		/* M */
+    mov r26=ar.pfs		/* I */
+    mov r28=cr.iip		/* M */
+    cover			/* B (or nothing) */
+    ;;
+    mov r1=sp
+    ;;
+    invala			/* M */
+    mov r30=cr.ifs
+    ;;
+    addl r1=-VMM_PT_REGS_SIZE,r1
+    ;;
+    adds r17=2*L1_CACHE_BYTES,r1	/* really: biggest cache-line size */
+    adds r16=PT(CR_IPSR),r1
+    ;;
+    lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES
+    st8 [r16]=r29			/* save cr.ipsr */
+    ;;
+    lfetch.fault.excl.nt1 [r17]
+    mov r29=b0
+    ;;
+    adds r16=PT(R8),r1  	/* initialize first base pointer */
+    adds r17=PT(R9),r1  	/* initialize second base pointer */
+    mov r18=r0      		/* make sure r18 isn't NaT */
+    ;;
 .mem.offset 0,0; st8.spill [r16]=r8,16
 .mem.offset 8,0; st8.spill [r17]=r9,16
         ;;
 .mem.offset 0,0; st8.spill [r16]=r10,24
 .mem.offset 8,0; st8.spill [r17]=r11,24
         ;;
-	st8 [r16]=r28,16		/* save cr.iip */
-	st8 [r17]=r30,16		/* save cr.ifs */
-	mov r8=ar.fpsr		/* M */
-	mov r9=ar.csd
-	mov r10=ar.ssd
-	movl r11=FPSR_DEFAULT	/* L-unit */
-	;;
-	st8 [r16]=r25,16		/* save ar.unat */
-	st8 [r17]=r26,16		/* save ar.pfs */
-	shl r18=r18,16		/* compute ar.rsc to be used for "loadrs" */
-	;;
-	st8 [r16]=r27,16		/* save ar.rsc */
-	adds r17=16,r17		/* skip over ar_rnat field */
-	;;
-	st8 [r17]=r31,16		/* save predicates */
-	adds r16=16,r16		/* skip over ar_bspstore field */
-	;;
-	st8 [r16]=r29,16		/* save b0 */
-	st8 [r17]=r18,16		/* save ar.rsc value for "loadrs" */
-	;;
+    st8 [r16]=r28,16		/* save cr.iip */
+    st8 [r17]=r30,16		/* save cr.ifs */
+    mov r8=ar.fpsr		/* M */
+    mov r9=ar.csd
+    mov r10=ar.ssd
+    movl r11=FPSR_DEFAULT	/* L-unit */
+    ;;
+    st8 [r16]=r25,16		/* save ar.unat */
+    st8 [r17]=r26,16		/* save ar.pfs */
+    shl r18=r18,16		/* compute ar.rsc to be used for "loadrs" */
+    ;;
+    st8 [r16]=r27,16		/* save ar.rsc */
+    adds r17=16,r17		/* skip over ar_rnat field */
+    ;;
+    st8 [r17]=r31,16		/* save predicates */
+    adds r16=16,r16		/* skip over ar_bspstore field */
+    ;;
+    st8 [r16]=r29,16		/* save b0 */
+    st8 [r17]=r18,16		/* save ar.rsc value for "loadrs" */
+    ;;
 .mem.offset 0,0; st8.spill [r16]=r20,16    /* save original r1 */
 .mem.offset 8,0; st8.spill [r17]=r12,16
-	adds r12=-16,r1
-	/* switch to kernel memory stack (with 16 bytes of scratch) */
-	;;
+    adds r12=-16,r1
+    /* switch to kernel memory stack (with 16 bytes of scratch) */
+    ;;
 .mem.offset 0,0; st8.spill [r16]=r13,16
 .mem.offset 8,0; st8.spill [r17]=r8,16 /* save ar.fpsr */
-	;;
+    ;;
 .mem.offset 0,0; st8.spill [r16]=r15,16
 .mem.offset 8,0; st8.spill [r17]=r14,16
-	dep r14=-1,r0,60,4
-	;;
+    dep r14=-1,r0,60,4
+    ;;
 .mem.offset 0,0; st8.spill [r16]=r2,16
 .mem.offset 8,0; st8.spill [r17]=r3,16
-	adds r2=VMM_PT_REGS_R16_OFFSET,r1
-	adds r14 = VMM_VCPU_GP_OFFSET,r13
-	;;
-	mov r8=ar.ccv
-	ld8 r14 = [r14]
-	;;
-	mov r1=r14       /* establish kernel global pointer */
-	;;                                          \
-	bsw.1
-	;;
-	alloc r14=ar.pfs,0,0,1,0	// must be first in an insn group
-	mov out0=r13
-	;;
-	ssm psr.ic
-	;;
-	srlz.i
-	;;
-	//(p15) ssm psr.i
-	adds r3=8,r2		// set up second base pointer for SAVE_REST
-	srlz.i			// ensure everybody knows psr.ic is back on
-	;;
+    adds r2=VMM_PT_REGS_R16_OFFSET,r1
+    adds r14 = VMM_VCPU_GP_OFFSET,r13
+    ;;
+    mov r8=ar.ccv
+    ld8 r14 = [r14]
+    ;;
+    mov r1=r14       /* establish kernel global pointer */
+    ;;                                          \
+    bsw.1
+    ;;
+    alloc r14=ar.pfs,0,0,1,0	// must be first in an insn group
+    mov out0=r13
+    ;;
+    ssm psr.ic
+    ;;
+    srlz.i
+    ;;
+    //(p15) ssm psr.i
+    adds r3=8,r2		// set up second base pointer for SAVE_REST
+    srlz.i			// ensure everybody knows psr.ic is back on
+    ;;
 .mem.offset 0,0; st8.spill [r2]=r16,16
 .mem.offset 8,0; st8.spill [r3]=r17,16
-	;;
+    ;;
 .mem.offset 0,0; st8.spill [r2]=r18,16
 .mem.offset 8,0; st8.spill [r3]=r19,16
-	;;
+    ;;
 .mem.offset 0,0; st8.spill [r2]=r20,16
 .mem.offset 8,0; st8.spill [r3]=r21,16
-	mov r18=b6
-	;;
+    mov r18=b6
+    ;;
 .mem.offset 0,0; st8.spill [r2]=r22,16
 .mem.offset 8,0; st8.spill [r3]=r23,16
-	mov r19=b7
-	;;
+    mov r19=b7
+    ;;
 .mem.offset 0,0; st8.spill [r2]=r24,16
 .mem.offset 8,0; st8.spill [r3]=r25,16
-	;;
+    ;;
 .mem.offset 0,0; st8.spill [r2]=r26,16
 .mem.offset 8,0; st8.spill [r3]=r27,16
-	;;
+    ;;
 .mem.offset 0,0; st8.spill [r2]=r28,16
 .mem.offset 8,0; st8.spill [r3]=r29,16
-	;;
+    ;;
 .mem.offset 0,0; st8.spill [r2]=r30,16
 .mem.offset 8,0; st8.spill [r3]=r31,32
-	;;
-	mov ar.fpsr=r11       /* M-unit */
-	st8 [r2]=r8,8         /* ar.ccv */
-	adds r24=PT(B6)-PT(F7),r3
-	;;
-	stf.spill [r2]=f6,32
-	stf.spill [r3]=f7,32
-	;;
-	stf.spill [r2]=f8,32
-	stf.spill [r3]=f9,32
-	;;
-	stf.spill [r2]=f10
-	stf.spill [r3]=f11
-	adds r25=PT(B7)-PT(F11),r3
-	;;
-	st8 [r24]=r18,16       /* b6 */
-	st8 [r25]=r19,16       /* b7 */
-	;;
-	st8 [r24]=r9           /* ar.csd */
-	st8 [r25]=r10          /* ar.ssd */
-	;;
-	srlz.d		// make sure we see the effect of cr.ivr
-	addl r14=@gprel(ia64_leave_nested),gp
-	;;
-	mov rp=r14
-	br.call.sptk.many b6=kvm_ia64_handle_irq
-	;;
+    ;;
+    mov ar.fpsr=r11       /* M-unit */
+    st8 [r2]=r8,8         /* ar.ccv */
+    adds r24=PT(B6)-PT(F7),r3
+    ;;
+    stf.spill [r2]=f6,32
+    stf.spill [r3]=f7,32
+    ;;
+    stf.spill [r2]=f8,32
+    stf.spill [r3]=f9,32
+    ;;
+    stf.spill [r2]=f10
+    stf.spill [r3]=f11
+    adds r25=PT(B7)-PT(F11),r3
+    ;;
+    st8 [r24]=r18,16       /* b6 */
+    st8 [r25]=r19,16       /* b7 */
+    ;;
+    st8 [r24]=r9           /* ar.csd */
+    st8 [r25]=r10          /* ar.ssd */
+    ;;
+    srlz.d		// make sure we see the effect of cr.ivr
+    addl r14=@gprel(ia64_leave_nested),gp
+    ;;
+    mov rp=r14
+    br.call.sptk.many b6=kvm_ia64_handle_irq
+    ;;
 END(kvm_interrupt)
 
     .global kvm_dispatch_vexirq
@@ -420,385 +414,387 @@ END(kvm_interrupt)
 //////////////////////////////////////////////////////////////////////
 // 0x3400 Entry 13 (size 64 bundles) Reserved
 ENTRY(kvm_virtual_exirq)
-	mov r31=pr
-	mov r19=13
-	mov r30 =r0
-	;;
+    mov r31=pr
+    mov r19=13
+    mov r30 =r0
+    ;;
 kvm_dispatch_vexirq:
-	cmp.eq p6,p0 = 1,r30
-	;;
-(p6)	add r29 = VMM_VCPU_SAVED_GP_OFFSET,r21
-	;;
-(p6)	ld8 r1 = [r29]
-	;;
-	KVM_SAVE_MIN_WITH_COVER_R19
-	alloc r14=ar.pfs,0,0,1,0
-	mov out0=r13
-
-	ssm psr.ic
-	;;
-	srlz.i // guarantee that interruption collection is on
-	;;
-	//(p15) ssm psr.i               // restore psr.i
-	adds r3=8,r2                // set up second base pointer
-	;;
-	KVM_SAVE_REST
-	addl r14=@gprel(ia64_leave_hypervisor),gp
-	;;
-	mov rp=r14
-	br.call.sptk.many b6=kvm_vexirq
+    cmp.eq p6,p0 = 1,r30
+    ;;
+(p6)add r29 = VMM_VCPU_SAVED_GP_OFFSET,r21
+    ;;
+(p6)ld8 r1 = [r29]
+    ;;
+    KVM_SAVE_MIN_WITH_COVER_R19
+    alloc r14=ar.pfs,0,0,1,0
+    mov out0=r13
+
+    ssm psr.ic
+    ;;
+    srlz.i                  // guarantee that interruption collection is on
+    ;;
+    //(p15) ssm psr.i               // restore psr.i
+    adds r3=8,r2                // set up second base pointer
+    ;;
+    KVM_SAVE_REST
+    addl r14=@gprel(ia64_leave_hypervisor),gp
+    ;;
+    mov rp=r14
+    br.call.sptk.many b6=kvm_vexirq
 END(kvm_virtual_exirq)
 
     .org kvm_ia64_ivt+0x3800
 /////////////////////////////////////////////////////////////////////
 // 0x3800 Entry 14 (size 64 bundles) Reserved
-	KVM_FAULT(14)
-	// this code segment is from 2.6.16.13
+    KVM_FAULT(14)
+    // this code segment is from 2.6.16.13
+
 
     .org kvm_ia64_ivt+0x3c00
 ///////////////////////////////////////////////////////////////////////
 // 0x3c00 Entry 15 (size 64 bundles) Reserved
-	KVM_FAULT(15)
+    KVM_FAULT(15)
+
 
     .org kvm_ia64_ivt+0x4000
 ///////////////////////////////////////////////////////////////////////
 // 0x4000 Entry 16 (size 64 bundles) Reserved
-	KVM_FAULT(16)
+    KVM_FAULT(16)
 
     .org kvm_ia64_ivt+0x4400
 //////////////////////////////////////////////////////////////////////
 // 0x4400 Entry 17 (size 64 bundles) Reserved
-	KVM_FAULT(17)
+    KVM_FAULT(17)
 
     .org kvm_ia64_ivt+0x4800
 //////////////////////////////////////////////////////////////////////
 // 0x4800 Entry 18 (size 64 bundles) Reserved
-	KVM_FAULT(18)
+    KVM_FAULT(18)
 
     .org kvm_ia64_ivt+0x4c00
 //////////////////////////////////////////////////////////////////////
 // 0x4c00 Entry 19 (size 64 bundles) Reserved
-	KVM_FAULT(19)
+    KVM_FAULT(19)
 
     .org kvm_ia64_ivt+0x5000
 //////////////////////////////////////////////////////////////////////
 // 0x5000 Entry 20 (size 16 bundles) Page Not Present
 ENTRY(kvm_page_not_present)
-	KVM_REFLECT(20)
+    KVM_REFLECT(20)
 END(kvm_page_not_present)
 
     .org kvm_ia64_ivt+0x5100
 ///////////////////////////////////////////////////////////////////////
 // 0x5100 Entry 21 (size 16 bundles) Key Permission vector
 ENTRY(kvm_key_permission)
-	KVM_REFLECT(21)
+    KVM_REFLECT(21)
 END(kvm_key_permission)
 
     .org kvm_ia64_ivt+0x5200
 //////////////////////////////////////////////////////////////////////
 // 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26)
 ENTRY(kvm_iaccess_rights)
-	KVM_REFLECT(22)
+    KVM_REFLECT(22)
 END(kvm_iaccess_rights)
 
     .org kvm_ia64_ivt+0x5300
 //////////////////////////////////////////////////////////////////////
 // 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53)
 ENTRY(kvm_daccess_rights)
-	KVM_REFLECT(23)
+    KVM_REFLECT(23)
 END(kvm_daccess_rights)
 
     .org kvm_ia64_ivt+0x5400
 /////////////////////////////////////////////////////////////////////
 // 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39)
 ENTRY(kvm_general_exception)
-	KVM_REFLECT(24)
-	KVM_FAULT(24)
+   KVM_REFLECT(24)
+   KVM_FAULT(24)
 END(kvm_general_exception)
 
     .org kvm_ia64_ivt+0x5500
 //////////////////////////////////////////////////////////////////////
 // 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35)
 ENTRY(kvm_disabled_fp_reg)
-	KVM_REFLECT(25)
+    KVM_REFLECT(25)
 END(kvm_disabled_fp_reg)
 
     .org kvm_ia64_ivt+0x5600
 ////////////////////////////////////////////////////////////////////
 // 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50)
 ENTRY(kvm_nat_consumption)
-	KVM_REFLECT(26)
+    KVM_REFLECT(26)
 END(kvm_nat_consumption)
 
     .org kvm_ia64_ivt+0x5700
 /////////////////////////////////////////////////////////////////////
 // 0x5700 Entry 27 (size 16 bundles) Speculation (40)
 ENTRY(kvm_speculation_vector)
-	KVM_REFLECT(27)
+    KVM_REFLECT(27)
 END(kvm_speculation_vector)
 
     .org kvm_ia64_ivt+0x5800
 /////////////////////////////////////////////////////////////////////
 // 0x5800 Entry 28 (size 16 bundles) Reserved
-	KVM_FAULT(28)
+    KVM_FAULT(28)
 
     .org kvm_ia64_ivt+0x5900
 ///////////////////////////////////////////////////////////////////
 // 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56)
 ENTRY(kvm_debug_vector)
-	KVM_FAULT(29)
+    KVM_FAULT(29)
 END(kvm_debug_vector)
 
     .org kvm_ia64_ivt+0x5a00
 ///////////////////////////////////////////////////////////////
 // 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57)
 ENTRY(kvm_unaligned_access)
-	KVM_REFLECT(30)
+    KVM_REFLECT(30)
 END(kvm_unaligned_access)
 
     .org kvm_ia64_ivt+0x5b00
 //////////////////////////////////////////////////////////////////////
 // 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57)
 ENTRY(kvm_unsupported_data_reference)
-	KVM_REFLECT(31)
+    KVM_REFLECT(31)
 END(kvm_unsupported_data_reference)
 
     .org kvm_ia64_ivt+0x5c00
 ////////////////////////////////////////////////////////////////////
 // 0x5c00 Entry 32 (size 16 bundles) Floating Point FAULT (65)
 ENTRY(kvm_floating_point_fault)
-	KVM_REFLECT(32)
+    KVM_REFLECT(32)
 END(kvm_floating_point_fault)
 
     .org kvm_ia64_ivt+0x5d00
 /////////////////////////////////////////////////////////////////////
 // 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66)
 ENTRY(kvm_floating_point_trap)
-	KVM_REFLECT(33)
+    KVM_REFLECT(33)
 END(kvm_floating_point_trap)
 
     .org kvm_ia64_ivt+0x5e00
 //////////////////////////////////////////////////////////////////////
 // 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66)
 ENTRY(kvm_lower_privilege_trap)
-	KVM_REFLECT(34)
+    KVM_REFLECT(34)
 END(kvm_lower_privilege_trap)
 
     .org kvm_ia64_ivt+0x5f00
 //////////////////////////////////////////////////////////////////////
 // 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68)
 ENTRY(kvm_taken_branch_trap)
-	KVM_REFLECT(35)
+    KVM_REFLECT(35)
 END(kvm_taken_branch_trap)
 
     .org kvm_ia64_ivt+0x6000
 ////////////////////////////////////////////////////////////////////
 // 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69)
 ENTRY(kvm_single_step_trap)
-	KVM_REFLECT(36)
+    KVM_REFLECT(36)
 END(kvm_single_step_trap)
     .global kvm_virtualization_fault_back
     .org kvm_ia64_ivt+0x6100
 /////////////////////////////////////////////////////////////////////
 // 0x6100 Entry 37 (size 16 bundles) Virtualization Fault
 ENTRY(kvm_virtualization_fault)
-	mov r31=pr
-	adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
-	;;
-	st8 [r16] = r1
-	adds r17 = VMM_VCPU_GP_OFFSET, r21
-	;;
-	ld8 r1 = [r17]
-	cmp.eq p6,p0=EVENT_MOV_FROM_AR,r24
-	cmp.eq p7,p0=EVENT_MOV_FROM_RR,r24
-	cmp.eq p8,p0=EVENT_MOV_TO_RR,r24
-	cmp.eq p9,p0=EVENT_RSM,r24
-	cmp.eq p10,p0=EVENT_SSM,r24
-	cmp.eq p11,p0=EVENT_MOV_TO_PSR,r24
-	cmp.eq p12,p0=EVENT_THASH,r24
-(p6)	br.dptk.many kvm_asm_mov_from_ar
-(p7)	br.dptk.many kvm_asm_mov_from_rr
-(p8)	br.dptk.many kvm_asm_mov_to_rr
-(p9)	br.dptk.many kvm_asm_rsm
-(p10)	br.dptk.many kvm_asm_ssm
-(p11)	br.dptk.many kvm_asm_mov_to_psr
-(p12)	br.dptk.many kvm_asm_thash
-	;;
+    mov r31=pr
+    adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
+    ;;
+    st8 [r16] = r1
+    adds r17 = VMM_VCPU_GP_OFFSET, r21
+    ;;
+    ld8 r1 = [r17]
+    cmp.eq p6,p0=EVENT_MOV_FROM_AR,r24
+    cmp.eq p7,p0=EVENT_MOV_FROM_RR,r24
+    cmp.eq p8,p0=EVENT_MOV_TO_RR,r24
+    cmp.eq p9,p0=EVENT_RSM,r24
+    cmp.eq p10,p0=EVENT_SSM,r24
+    cmp.eq p11,p0=EVENT_MOV_TO_PSR,r24
+    cmp.eq p12,p0=EVENT_THASH,r24
+    (p6) br.dptk.many kvm_asm_mov_from_ar
+    (p7) br.dptk.many kvm_asm_mov_from_rr
+    (p8) br.dptk.many kvm_asm_mov_to_rr
+    (p9) br.dptk.many kvm_asm_rsm
+    (p10) br.dptk.many kvm_asm_ssm
+    (p11) br.dptk.many kvm_asm_mov_to_psr
+    (p12) br.dptk.many kvm_asm_thash
+    ;;
 kvm_virtualization_fault_back:
-	adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
-	;;
-	ld8 r1 = [r16]
-	;;
-	mov r19=37
-	adds r16 = VMM_VCPU_CAUSE_OFFSET,r21
-	adds r17 = VMM_VCPU_OPCODE_OFFSET,r21
-	;;
-	st8 [r16] = r24
-	st8 [r17] = r25
-	;;
-	cmp.ne p6,p0=EVENT_RFI, r24
-(p6)	br.sptk kvm_dispatch_virtualization_fault
-	;;
-	adds r18=VMM_VPD_BASE_OFFSET,r21
-	;;
-	ld8 r18=[r18]
-	;;
-	adds r18=VMM_VPD_VIFS_OFFSET,r18
-	;;
-	ld8 r18=[r18]
-	;;
-	tbit.z p6,p0=r18,63
-(p6)	br.sptk kvm_dispatch_virtualization_fault
-	;;
-//if vifs.v=1 desert current register frame
-	alloc r18=ar.pfs,0,0,0,0
-	br.sptk kvm_dispatch_virtualization_fault
+    adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
+    ;;
+    ld8 r1 = [r16]
+    ;;
+    mov r19=37
+    adds r16 = VMM_VCPU_CAUSE_OFFSET,r21
+    adds r17 = VMM_VCPU_OPCODE_OFFSET,r21
+    ;;
+    st8 [r16] = r24
+    st8 [r17] = r25
+    ;;
+    cmp.ne p6,p0=EVENT_RFI, r24
+    (p6) br.sptk kvm_dispatch_virtualization_fault
+    ;;
+    adds r18=VMM_VPD_BASE_OFFSET,r21
+    ;;
+    ld8 r18=[r18]
+    ;;
+    adds r18=VMM_VPD_VIFS_OFFSET,r18
+    ;;
+    ld8 r18=[r18]
+    ;;
+    tbit.z p6,p0=r18,63
+    (p6) br.sptk kvm_dispatch_virtualization_fault
+    ;;
+    //if vifs.v=1 desert current register frame
+    alloc r18=ar.pfs,0,0,0,0
+    br.sptk kvm_dispatch_virtualization_fault
 END(kvm_virtualization_fault)
 
     .org kvm_ia64_ivt+0x6200
 //////////////////////////////////////////////////////////////
 // 0x6200 Entry 38 (size 16 bundles) Reserved
-	KVM_FAULT(38)
+    KVM_FAULT(38)
 
     .org kvm_ia64_ivt+0x6300
 /////////////////////////////////////////////////////////////////
 // 0x6300 Entry 39 (size 16 bundles) Reserved
-	KVM_FAULT(39)
+    KVM_FAULT(39)
 
     .org kvm_ia64_ivt+0x6400
 /////////////////////////////////////////////////////////////////
 // 0x6400 Entry 40 (size 16 bundles) Reserved
-	KVM_FAULT(40)
+    KVM_FAULT(40)
 
     .org kvm_ia64_ivt+0x6500
 //////////////////////////////////////////////////////////////////
 // 0x6500 Entry 41 (size 16 bundles) Reserved
-	KVM_FAULT(41)
+    KVM_FAULT(41)
 
     .org kvm_ia64_ivt+0x6600
 //////////////////////////////////////////////////////////////////
 // 0x6600 Entry 42 (size 16 bundles) Reserved
-	KVM_FAULT(42)
+    KVM_FAULT(42)
 
     .org kvm_ia64_ivt+0x6700
 //////////////////////////////////////////////////////////////////
 // 0x6700 Entry 43 (size 16 bundles) Reserved
-	KVM_FAULT(43)
+    KVM_FAULT(43)
 
     .org kvm_ia64_ivt+0x6800
 //////////////////////////////////////////////////////////////////
 // 0x6800 Entry 44 (size 16 bundles) Reserved
-	KVM_FAULT(44)
+    KVM_FAULT(44)
 
     .org kvm_ia64_ivt+0x6900
 ///////////////////////////////////////////////////////////////////
 // 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception
 //(17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77)
 ENTRY(kvm_ia32_exception)
-	KVM_FAULT(45)
+    KVM_FAULT(45)
 END(kvm_ia32_exception)
 
     .org kvm_ia64_ivt+0x6a00
 ////////////////////////////////////////////////////////////////////
 // 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept  (30,31,59,70,71)
 ENTRY(kvm_ia32_intercept)
-	KVM_FAULT(47)
+    KVM_FAULT(47)
 END(kvm_ia32_intercept)
 
     .org kvm_ia64_ivt+0x6c00
 /////////////////////////////////////////////////////////////////////
 // 0x6c00 Entry 48 (size 16 bundles) Reserved
-	KVM_FAULT(48)
+    KVM_FAULT(48)
 
     .org kvm_ia64_ivt+0x6d00
 //////////////////////////////////////////////////////////////////////
 // 0x6d00 Entry 49 (size 16 bundles) Reserved
-	KVM_FAULT(49)
+    KVM_FAULT(49)
 
     .org kvm_ia64_ivt+0x6e00
 //////////////////////////////////////////////////////////////////////
 // 0x6e00 Entry 50 (size 16 bundles) Reserved
-	KVM_FAULT(50)
+    KVM_FAULT(50)
 
     .org kvm_ia64_ivt+0x6f00
 /////////////////////////////////////////////////////////////////////
 // 0x6f00 Entry 51 (size 16 bundles) Reserved
-	KVM_FAULT(52)
+    KVM_FAULT(52)
 
     .org kvm_ia64_ivt+0x7100
 ////////////////////////////////////////////////////////////////////
 // 0x7100 Entry 53 (size 16 bundles) Reserved
-	KVM_FAULT(53)
+    KVM_FAULT(53)
 
     .org kvm_ia64_ivt+0x7200
 /////////////////////////////////////////////////////////////////////
 // 0x7200 Entry 54 (size 16 bundles) Reserved
-	KVM_FAULT(54)
+    KVM_FAULT(54)
 
     .org kvm_ia64_ivt+0x7300
 ////////////////////////////////////////////////////////////////////
 // 0x7300 Entry 55 (size 16 bundles) Reserved
-	KVM_FAULT(55)
+    KVM_FAULT(55)
 
     .org kvm_ia64_ivt+0x7400
 ////////////////////////////////////////////////////////////////////
 // 0x7400 Entry 56 (size 16 bundles) Reserved
-	KVM_FAULT(56)
+    KVM_FAULT(56)
 
     .org kvm_ia64_ivt+0x7500
 /////////////////////////////////////////////////////////////////////
 // 0x7500 Entry 57 (size 16 bundles) Reserved
-	KVM_FAULT(57)
+    KVM_FAULT(57)
 
     .org kvm_ia64_ivt+0x7600
 /////////////////////////////////////////////////////////////////////
 // 0x7600 Entry 58 (size 16 bundles) Reserved
-	KVM_FAULT(58)
+    KVM_FAULT(58)
 
     .org kvm_ia64_ivt+0x7700
 ////////////////////////////////////////////////////////////////////
 // 0x7700 Entry 59 (size 16 bundles) Reserved
-	KVM_FAULT(59)
+    KVM_FAULT(59)
 
     .org kvm_ia64_ivt+0x7800
 ////////////////////////////////////////////////////////////////////
 // 0x7800 Entry 60 (size 16 bundles) Reserved
-	KVM_FAULT(60)
+    KVM_FAULT(60)
 
     .org kvm_ia64_ivt+0x7900
 /////////////////////////////////////////////////////////////////////
 // 0x7900 Entry 61 (size 16 bundles) Reserved
-	KVM_FAULT(61)
+    KVM_FAULT(61)
 
     .org kvm_ia64_ivt+0x7a00
 /////////////////////////////////////////////////////////////////////
 // 0x7a00 Entry 62 (size 16 bundles) Reserved
-	KVM_FAULT(62)
+    KVM_FAULT(62)
 
     .org kvm_ia64_ivt+0x7b00
 /////////////////////////////////////////////////////////////////////
 // 0x7b00 Entry 63 (size 16 bundles) Reserved
-	KVM_FAULT(63)
+    KVM_FAULT(63)
 
     .org kvm_ia64_ivt+0x7c00
 ////////////////////////////////////////////////////////////////////
 // 0x7c00 Entry 64 (size 16 bundles) Reserved
-	KVM_FAULT(64)
+    KVM_FAULT(64)
 
     .org kvm_ia64_ivt+0x7d00
 /////////////////////////////////////////////////////////////////////
 // 0x7d00 Entry 65 (size 16 bundles) Reserved
-	KVM_FAULT(65)
+    KVM_FAULT(65)
 
     .org kvm_ia64_ivt+0x7e00
 /////////////////////////////////////////////////////////////////////
 // 0x7e00 Entry 66 (size 16 bundles) Reserved
-	KVM_FAULT(66)
+    KVM_FAULT(66)
 
     .org kvm_ia64_ivt+0x7f00
 ////////////////////////////////////////////////////////////////////
 // 0x7f00 Entry 67 (size 16 bundles) Reserved
-	KVM_FAULT(67)
+    KVM_FAULT(67)
 
     .org kvm_ia64_ivt+0x8000
 // There is no particular reason for this code to be here, other than that
@@ -808,128 +804,132 @@ END(kvm_ia32_intercept)
 
 
 ENTRY(kvm_dtlb_miss_dispatch)
-	mov r19 = 2
-	KVM_SAVE_MIN_WITH_COVER_R19
-	alloc r14=ar.pfs,0,0,3,0
-	mov out0=cr.ifa
-	mov out1=r15
-	adds r3=8,r2                // set up second base pointer
-	;;
-	ssm psr.ic
-	;;
-	srlz.i     // guarantee that interruption collection is on
-	;;
-	//(p15) ssm psr.i               // restore psr.i
-	addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
-	;;
-	KVM_SAVE_REST
-	KVM_SAVE_EXTRA
-	mov rp=r14
-	;;
-	adds out2=16,r12
-	br.call.sptk.many b6=kvm_page_fault
+    mov r19 = 2
+    KVM_SAVE_MIN_WITH_COVER_R19
+    alloc r14=ar.pfs,0,0,3,0
+    mov out0=cr.ifa
+    mov out1=r15
+    adds r3=8,r2                // set up second base pointer
+    ;;
+    ssm psr.ic
+    ;;
+    srlz.i                  // guarantee that interruption collection is on
+    ;;
+    //(p15) ssm psr.i               // restore psr.i
+    addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
+    ;;
+    KVM_SAVE_REST
+    KVM_SAVE_EXTRA
+    mov rp=r14
+    ;;
+    adds out2=16,r12
+    br.call.sptk.many b6=kvm_page_fault
 END(kvm_dtlb_miss_dispatch)
 
 ENTRY(kvm_itlb_miss_dispatch)
 
-	KVM_SAVE_MIN_WITH_COVER_R19
-	alloc r14=ar.pfs,0,0,3,0
-	mov out0=cr.ifa
-	mov out1=r15
-	adds r3=8,r2                // set up second base pointer
-	;;
-	ssm psr.ic
-	;;
-	srlz.i   // guarantee that interruption collection is on
-	;;
-	//(p15) ssm psr.i               // restore psr.i
-	addl r14=@gprel(ia64_leave_hypervisor),gp
-	;;
-	KVM_SAVE_REST
-	mov rp=r14
-	;;
-	adds out2=16,r12
-	br.call.sptk.many b6=kvm_page_fault
+    KVM_SAVE_MIN_WITH_COVER_R19
+    alloc r14=ar.pfs,0,0,3,0
+    mov out0=cr.ifa
+    mov out1=r15
+    adds r3=8,r2                // set up second base pointer
+    ;;
+    ssm psr.ic
+    ;;
+    srlz.i                  // guarantee that interruption collection is on
+    ;;
+    //(p15) ssm psr.i               // restore psr.i
+    addl r14=@gprel(ia64_leave_hypervisor),gp
+    ;;
+    KVM_SAVE_REST
+    mov rp=r14
+    ;;
+    adds out2=16,r12
+    br.call.sptk.many b6=kvm_page_fault
 END(kvm_itlb_miss_dispatch)
 
 ENTRY(kvm_dispatch_reflection)
-/*
- * Input:
- *  psr.ic: off
- *  r19:    intr type (offset into ivt, see ia64_int.h)
- *  r31:    contains saved predicates (pr)
- */
-	KVM_SAVE_MIN_WITH_COVER_R19
-	alloc r14=ar.pfs,0,0,5,0
-	mov out0=cr.ifa
-	mov out1=cr.isr
-	mov out2=cr.iim
-	mov out3=r15
-	adds r3=8,r2                // set up second base pointer
-	;;
-	ssm psr.ic
-	;;
-	srlz.i   // guarantee that interruption collection is on
-	;;
-	//(p15) ssm psr.i               // restore psr.i
-	addl r14=@gprel(ia64_leave_hypervisor),gp
-	;;
-	KVM_SAVE_REST
-	mov rp=r14
-	;;
-	adds out4=16,r12
-	br.call.sptk.many b6=reflect_interruption
+    /*
+     * Input:
+     *  psr.ic: off
+     *  r19:    intr type (offset into ivt, see ia64_int.h)
+     *  r31:    contains saved predicates (pr)
+     */
+    KVM_SAVE_MIN_WITH_COVER_R19
+    alloc r14=ar.pfs,0,0,5,0
+    mov out0=cr.ifa
+    mov out1=cr.isr
+    mov out2=cr.iim
+    mov out3=r15
+    adds r3=8,r2                // set up second base pointer
+    ;;
+    ssm psr.ic
+    ;;
+    srlz.i                  // guarantee that interruption collection is on
+    ;;
+    //(p15) ssm psr.i               // restore psr.i
+    addl r14=@gprel(ia64_leave_hypervisor),gp
+    ;;
+    KVM_SAVE_REST
+    mov rp=r14
+    ;;
+    adds out4=16,r12
+    br.call.sptk.many b6=reflect_interruption
 END(kvm_dispatch_reflection)
 
 ENTRY(kvm_dispatch_virtualization_fault)
-	adds r16 = VMM_VCPU_CAUSE_OFFSET,r21
-	adds r17 = VMM_VCPU_OPCODE_OFFSET,r21
-	;;
-	st8 [r16] = r24
-	st8 [r17] = r25
-	;;
-	KVM_SAVE_MIN_WITH_COVER_R19
-	;;
-	alloc r14=ar.pfs,0,0,2,0 // (must be first in insn group!)
-	mov out0=r13        //vcpu
-	adds r3=8,r2                // set up second base pointer
-	;;
-	ssm psr.ic
-	;;
-	srlz.i    // guarantee that interruption collection is on
-	;;
-	//(p15) ssm psr.i               // restore psr.i
-	addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
-	;;
-	KVM_SAVE_REST
-	KVM_SAVE_EXTRA
-	mov rp=r14
-	;;
-	adds out1=16,sp         //regs
-	br.call.sptk.many b6=kvm_emulate
+    adds r16 = VMM_VCPU_CAUSE_OFFSET,r21
+    adds r17 = VMM_VCPU_OPCODE_OFFSET,r21
+    ;;
+    st8 [r16] = r24
+    st8 [r17] = r25
+    ;;
+    KVM_SAVE_MIN_WITH_COVER_R19
+    ;;
+    alloc r14=ar.pfs,0,0,2,0 // now it's safe (must be first in insn group!)
+    mov out0=r13        //vcpu
+    adds r3=8,r2                // set up second base pointer
+    ;;
+    ssm psr.ic
+    ;;
+    srlz.i                  // guarantee that interruption collection is on
+    ;;
+    //(p15) ssm psr.i               // restore psr.i
+    addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
+    ;;
+    KVM_SAVE_REST
+    KVM_SAVE_EXTRA
+    mov rp=r14
+    ;;
+    adds out1=16,sp         //regs
+    br.call.sptk.many b6=kvm_emulate
 END(kvm_dispatch_virtualization_fault)
 
 
 ENTRY(kvm_dispatch_interrupt)
-	KVM_SAVE_MIN_WITH_COVER_R19	// uses r31; defines r2 and r3
-	;;
-	alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group
-	adds r3=8,r2		// set up second base pointer for SAVE_REST
-	;;
-	ssm psr.ic
-	;;
-	srlz.i
-	;;
-	//(p15) ssm psr.i
-	addl r14=@gprel(ia64_leave_hypervisor),gp
-	;;
-	KVM_SAVE_REST
-	mov rp=r14
-	;;
-	mov out0=r13		// pass pointer to pt_regs as second arg
-	br.call.sptk.many b6=kvm_ia64_handle_irq
+    KVM_SAVE_MIN_WITH_COVER_R19	// uses r31; defines r2 and r3
+    ;;
+    alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group
+    //mov out0=cr.ivr		// pass cr.ivr as first arg
+    adds r3=8,r2		// set up second base pointer for SAVE_REST
+    ;;
+    ssm psr.ic
+    ;;
+    srlz.i
+    ;;
+    //(p15) ssm psr.i
+    addl r14=@gprel(ia64_leave_hypervisor),gp
+    ;;
+    KVM_SAVE_REST
+    mov rp=r14
+    ;;
+    mov out0=r13		// pass pointer to pt_regs as second arg
+    br.call.sptk.many b6=kvm_ia64_handle_irq
 END(kvm_dispatch_interrupt)
 
+
+
+
 GLOBAL_ENTRY(ia64_leave_nested)
 	rsm psr.i
 	;;
@@ -1008,7 +1008,7 @@ GLOBAL_ENTRY(ia64_leave_nested)
 	;;
 	ldf.fill f11=[r2]
 //	mov r18=r13
-//	mov r21=r13
+//    mov r21=r13
 	adds r16=PT(CR_IPSR)+16,r12
 	adds r17=PT(CR_IIP)+16,r12
 	;;
@@ -1058,135 +1058,138 @@ GLOBAL_ENTRY(ia64_leave_nested)
 	rfi
 END(ia64_leave_nested)
 
+
+
 GLOBAL_ENTRY(ia64_leave_hypervisor_prepare)
-/*
- * work.need_resched etc. mustn't get changed
- *by this CPU before it returns to
- * user- or fsys-mode, hence we disable interrupts early on:
- */
-	adds r2 = PT(R4)+16,r12
-	adds r3 = PT(R5)+16,r12
-	adds r8 = PT(EML_UNAT)+16,r12
-	;;
-	ld8 r8 = [r8]
-	;;
-	mov ar.unat=r8
-	;;
-	ld8.fill r4=[r2],16    //load r4
-	ld8.fill r5=[r3],16    //load r5
-	;;
-	ld8.fill r6=[r2]    //load r6
-	ld8.fill r7=[r3]    //load r7
-	;;
+    /*
+     * work.need_resched etc. mustn't get changed
+     *by this CPU before it returns to
+    ;;
+     * user- or fsys-mode, hence we disable interrupts early on:
+     */
+    adds r2 = PT(R4)+16,r12
+    adds r3 = PT(R5)+16,r12
+    adds r8 = PT(EML_UNAT)+16,r12
+    ;;
+    ld8 r8 = [r8]
+    ;;
+    mov ar.unat=r8
+    ;;
+    ld8.fill r4=[r2],16    //load r4
+    ld8.fill r5=[r3],16    //load r5
+    ;;
+    ld8.fill r6=[r2]    //load r6
+    ld8.fill r7=[r3]    //load r7
+    ;;
 END(ia64_leave_hypervisor_prepare)
 //fall through
 GLOBAL_ENTRY(ia64_leave_hypervisor)
-	rsm psr.i
-	;;
-	br.call.sptk.many b0=leave_hypervisor_tail
-	;;
-	adds r20=PT(PR)+16,r12
-	adds r8=PT(EML_UNAT)+16,r12
-	;;
-	ld8 r8=[r8]
-	;;
-	mov ar.unat=r8
-	;;
-	lfetch [r20],PT(CR_IPSR)-PT(PR)
-	adds r2 = PT(B6)+16,r12
-	adds r3 = PT(B7)+16,r12
-	;;
-	lfetch [r20]
-	;;
-	ld8 r24=[r2],16        /* B6 */
-	ld8 r25=[r3],16        /* B7 */
-	;;
-	ld8 r26=[r2],16        /* ar_csd */
-	ld8 r27=[r3],16        /* ar_ssd */
-	mov b6 = r24
-	;;
-	ld8.fill r8=[r2],16
-	ld8.fill r9=[r3],16
-	mov b7 = r25
-	;;
-	mov ar.csd = r26
-	mov ar.ssd = r27
-	;;
-	ld8.fill r10=[r2],PT(R15)-PT(R10)
-	ld8.fill r11=[r3],PT(R14)-PT(R11)
-	;;
-	ld8.fill r15=[r2],PT(R16)-PT(R15)
-	ld8.fill r14=[r3],PT(R17)-PT(R14)
-	;;
-	ld8.fill r16=[r2],16
-	ld8.fill r17=[r3],16
-	;;
-	ld8.fill r18=[r2],16
-	ld8.fill r19=[r3],16
-	;;
-	ld8.fill r20=[r2],16
-	ld8.fill r21=[r3],16
-	;;
-	ld8.fill r22=[r2],16
-	ld8.fill r23=[r3],16
-	;;
-	ld8.fill r24=[r2],16
-	ld8.fill r25=[r3],16
-	;;
-	ld8.fill r26=[r2],16
-	ld8.fill r27=[r3],16
-	;;
-	ld8.fill r28=[r2],16
-	ld8.fill r29=[r3],16
-	;;
-	ld8.fill r30=[r2],PT(F6)-PT(R30)
-	ld8.fill r31=[r3],PT(F7)-PT(R31)
-	;;
-	rsm psr.i | psr.ic
-	// initiate turning off of interrupt and interruption collection
-	invala          // invalidate ALAT
-	;;
-	srlz.i          // ensure interruption collection is off
-	;;
-	bsw.0
-	;;
-	adds r16 = PT(CR_IPSR)+16,r12
-	adds r17 = PT(CR_IIP)+16,r12
-	mov r21=r13		// get current
-	;;
-	ld8 r31=[r16],16    // load cr.ipsr
-	ld8 r30=[r17],16    // load cr.iip
-	;;
-	ld8 r29=[r16],16    // load cr.ifs
-	ld8 r28=[r17],16    // load ar.unat
-	;;
-	ld8 r27=[r16],16    // load ar.pfs
-	ld8 r26=[r17],16    // load ar.rsc
-	;;
-	ld8 r25=[r16],16    // load ar.rnat
-	ld8 r24=[r17],16    // load ar.bspstore
-	;;
-	ld8 r23=[r16],16    // load predicates
-	ld8 r22=[r17],16    // load b0
-	;;
-	ld8 r20=[r16],16    // load ar.rsc value for "loadrs"
-	ld8.fill r1=[r17],16    //load r1
-	;;
-	ld8.fill r12=[r16],16    //load r12
-	ld8.fill r13=[r17],PT(R2)-PT(R13)    //load r13
-	;;
-	ld8 r19=[r16],PT(R3)-PT(AR_FPSR)    //load ar_fpsr
-	ld8.fill r2=[r17],PT(AR_CCV)-PT(R2)    //load r2
-	;;
-	ld8.fill r3=[r16]	//load r3
-	ld8 r18=[r17]	//load ar_ccv
-	;;
-	mov ar.fpsr=r19
-	mov ar.ccv=r18
-	shr.u r18=r20,16
-	;;
+    rsm psr.i
+    ;;
+    br.call.sptk.many b0=leave_hypervisor_tail
+    ;;
+    adds r20=PT(PR)+16,r12
+    adds r8=PT(EML_UNAT)+16,r12
+    ;;
+    ld8 r8=[r8]
+    ;;
+    mov ar.unat=r8
+    ;;
+    lfetch [r20],PT(CR_IPSR)-PT(PR)
+    adds r2 = PT(B6)+16,r12
+    adds r3 = PT(B7)+16,r12
+    ;;
+    lfetch [r20]
+    ;;
+    ld8 r24=[r2],16        /* B6 */
+    ld8 r25=[r3],16        /* B7 */
+    ;;
+    ld8 r26=[r2],16        /* ar_csd */
+    ld8 r27=[r3],16        /* ar_ssd */
+    mov b6 = r24
+    ;;
+    ld8.fill r8=[r2],16
+    ld8.fill r9=[r3],16
+    mov b7 = r25
+    ;;
+    mov ar.csd = r26
+    mov ar.ssd = r27
+    ;;
+    ld8.fill r10=[r2],PT(R15)-PT(R10)
+    ld8.fill r11=[r3],PT(R14)-PT(R11)
+    ;;
+    ld8.fill r15=[r2],PT(R16)-PT(R15)
+    ld8.fill r14=[r3],PT(R17)-PT(R14)
+    ;;
+    ld8.fill r16=[r2],16
+    ld8.fill r17=[r3],16
+    ;;
+    ld8.fill r18=[r2],16
+    ld8.fill r19=[r3],16
+    ;;
+    ld8.fill r20=[r2],16
+    ld8.fill r21=[r3],16
+    ;;
+    ld8.fill r22=[r2],16
+    ld8.fill r23=[r3],16
+    ;;
+    ld8.fill r24=[r2],16
+    ld8.fill r25=[r3],16
+    ;;
+    ld8.fill r26=[r2],16
+    ld8.fill r27=[r3],16
+    ;;
+    ld8.fill r28=[r2],16
+    ld8.fill r29=[r3],16
+    ;;
+    ld8.fill r30=[r2],PT(F6)-PT(R30)
+    ld8.fill r31=[r3],PT(F7)-PT(R31)
+    ;;
+    rsm psr.i | psr.ic
+    // initiate turning off of interrupt and interruption collection
+    invala          // invalidate ALAT
+    ;;
+    srlz.i          // ensure interruption collection is off
+    ;;
+    bsw.0
+    ;;
+    adds r16 = PT(CR_IPSR)+16,r12
+    adds r17 = PT(CR_IIP)+16,r12
+    mov r21=r13		// get current
+    ;;
+    ld8 r31=[r16],16    // load cr.ipsr
+    ld8 r30=[r17],16    // load cr.iip
+    ;;
+    ld8 r29=[r16],16    // load cr.ifs
+    ld8 r28=[r17],16    // load ar.unat
+    ;;
+    ld8 r27=[r16],16    // load ar.pfs
+    ld8 r26=[r17],16    // load ar.rsc
+    ;;
+    ld8 r25=[r16],16    // load ar.rnat
+    ld8 r24=[r17],16    // load ar.bspstore
+    ;;
+    ld8 r23=[r16],16    // load predicates
+    ld8 r22=[r17],16    // load b0
+    ;;
+    ld8 r20=[r16],16    // load ar.rsc value for "loadrs"
+    ld8.fill r1=[r17],16    //load r1
+    ;;
+    ld8.fill r12=[r16],16    //load r12
+    ld8.fill r13=[r17],PT(R2)-PT(R13)    //load r13
+    ;;
+    ld8 r19=[r16],PT(R3)-PT(AR_FPSR)    //load ar_fpsr
+    ld8.fill r2=[r17],PT(AR_CCV)-PT(R2)    //load r2
+    ;;
+    ld8.fill r3=[r16]	//load r3
+    ld8 r18=[r17]	//load ar_ccv
+    ;;
+    mov ar.fpsr=r19
+    mov ar.ccv=r18
+    shr.u r18=r20,16
+    ;;
 kvm_rbs_switch:
-	mov r19=96
+    mov r19=96
 
 kvm_dont_preserve_current_frame:
 /*
@@ -1198,76 +1201,76 @@ kvm_dont_preserve_current_frame:
 #   define pReturn	p7
 #   define Nregs	14
 
-	alloc loc0=ar.pfs,2,Nregs-2,2,0
-	shr.u loc1=r18,9	// RNaTslots <= floor(dirtySize / (64*8))
-	sub r19=r19,r18		// r19 = (physStackedSize + 8) - dirtySize
-	;;
-	mov ar.rsc=r20		// load ar.rsc to be used for "loadrs"
-	shladd in0=loc1,3,r19
-	mov in1=0
-	;;
-	TEXT_ALIGN(32)
+    alloc loc0=ar.pfs,2,Nregs-2,2,0
+    shr.u loc1=r18,9		// RNaTslots <= floor(dirtySize / (64*8))
+    sub r19=r19,r18		// r19 = (physStackedSize + 8) - dirtySize
+    ;;
+    mov ar.rsc=r20		// load ar.rsc to be used for "loadrs"
+    shladd in0=loc1,3,r19
+    mov in1=0
+    ;;
+    TEXT_ALIGN(32)
 kvm_rse_clear_invalid:
-	alloc loc0=ar.pfs,2,Nregs-2,2,0
-	cmp.lt pRecurse,p0=Nregs*8,in0
-	// if more than Nregs regs left to clear, (re)curse
-	add out0=-Nregs*8,in0
-	add out1=1,in1		// increment recursion count
-	mov loc1=0
-	mov loc2=0
-	;;
-	mov loc3=0
-	mov loc4=0
-	mov loc5=0
-	mov loc6=0
-	mov loc7=0
+    alloc loc0=ar.pfs,2,Nregs-2,2,0
+    cmp.lt pRecurse,p0=Nregs*8,in0
+    // if more than Nregs regs left to clear, (re)curse
+    add out0=-Nregs*8,in0
+    add out1=1,in1		// increment recursion count
+    mov loc1=0
+    mov loc2=0
+    ;;
+    mov loc3=0
+    mov loc4=0
+    mov loc5=0
+    mov loc6=0
+    mov loc7=0
 (pRecurse) br.call.dptk.few b0=kvm_rse_clear_invalid
-	;;
-	mov loc8=0
-	mov loc9=0
-	cmp.ne pReturn,p0=r0,in1
-	// if recursion count != 0, we need to do a br.ret
-	mov loc10=0
-	mov loc11=0
+    ;;
+    mov loc8=0
+    mov loc9=0
+    cmp.ne pReturn,p0=r0,in1
+    // if recursion count != 0, we need to do a br.ret
+    mov loc10=0
+    mov loc11=0
 (pReturn) br.ret.dptk.many b0
 
 #	undef pRecurse
 #	undef pReturn
 
 // loadrs has already been shifted
-	alloc r16=ar.pfs,0,0,0,0    // drop current register frame
-	;;
-	loadrs
-	;;
-	mov ar.bspstore=r24
-	;;
-	mov ar.unat=r28
-	mov ar.rnat=r25
-	mov ar.rsc=r26
-	;;
-	mov cr.ipsr=r31
-	mov cr.iip=r30
-	mov cr.ifs=r29
-	mov ar.pfs=r27
-	adds r18=VMM_VPD_BASE_OFFSET,r21
-	;;
-	ld8 r18=[r18]   //vpd
-	adds r17=VMM_VCPU_ISR_OFFSET,r21
-	;;
-	ld8 r17=[r17]
-	adds r19=VMM_VPD_VPSR_OFFSET,r18
-	;;
-	ld8 r19=[r19]        //vpsr
-	mov r25=r18
-	adds r16= VMM_VCPU_GP_OFFSET,r21
-	;;
-	ld8 r16= [r16] // Put gp in r24
-	movl r24=@gprel(ia64_vmm_entry)  // calculate return address
-	;;
-	add  r24=r24,r16
-	;;
-	br.sptk.many  kvm_vps_sync_write       // call the service
-	;;
+    alloc r16=ar.pfs,0,0,0,0    // drop current register frame
+    ;;
+    loadrs
+    ;;
+    mov ar.bspstore=r24
+    ;;
+    mov ar.unat=r28
+    mov ar.rnat=r25
+    mov ar.rsc=r26
+    ;;
+    mov cr.ipsr=r31
+    mov cr.iip=r30
+    mov cr.ifs=r29
+    mov ar.pfs=r27
+    adds r18=VMM_VPD_BASE_OFFSET,r21
+    ;;
+    ld8 r18=[r18]   //vpd
+    adds r17=VMM_VCPU_ISR_OFFSET,r21
+    ;;
+    ld8 r17=[r17]
+    adds r19=VMM_VPD_VPSR_OFFSET,r18
+    ;;
+    ld8 r19=[r19]        //vpsr
+    mov r25=r18
+    adds r16= VMM_VCPU_GP_OFFSET,r21
+    ;;
+    ld8 r16= [r16] // Put gp in r24
+    movl r24=@gprel(ia64_vmm_entry)  // calculate return address
+    ;;
+    add  r24=r24,r16
+    ;;
+    br.sptk.many  kvm_vps_sync_write       // call the service
+    ;;
 END(ia64_leave_hypervisor)
 // fall through
 GLOBAL_ENTRY(ia64_vmm_entry)
@@ -1280,14 +1283,16 @@ GLOBAL_ENTRY(ia64_vmm_entry)
  *  r22:b0
  *  r23:predicate
  */
-	mov r24=r22
-	mov r25=r18
-	tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT        // p1=vpsr.ic
-(p1) 	br.cond.sptk.few kvm_vps_resume_normal
-(p2)	br.cond.sptk.many kvm_vps_resume_handler
-	;;
+    mov r24=r22
+    mov r25=r18
+    tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT        // p1=vpsr.ic
+    (p1) br.cond.sptk.few kvm_vps_resume_normal
+    (p2) br.cond.sptk.many kvm_vps_resume_handler
+    ;;
 END(ia64_vmm_entry)
 
+
+
 /*
  * extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2,
  *                  u64 arg3, u64 arg4, u64 arg5,
@@ -1305,88 +1310,88 @@ psrsave =   loc2
 entry   =   loc3
 hostret =   r24
 
-	alloc   pfssave=ar.pfs,4,4,0,0
-	mov rpsave=rp
-	adds entry=VMM_VCPU_VSA_BASE_OFFSET, r13
-	;;
-	ld8 entry=[entry]
-1:	mov hostret=ip
-	mov r25=in1         // copy arguments
-	mov r26=in2
-	mov r27=in3
-	mov psrsave=psr
-	;;
-	tbit.nz p6,p0=psrsave,14    // IA64_PSR_I
-	tbit.nz p7,p0=psrsave,13    // IA64_PSR_IC
-	;;
-	add hostret=2f-1b,hostret   // calculate return address
-	add entry=entry,in0
-	;;
-	rsm psr.i | psr.ic
-	;;
-	srlz.i
-	mov b6=entry
-	br.cond.sptk b6         // call the service
+    alloc   pfssave=ar.pfs,4,4,0,0
+    mov rpsave=rp
+    adds entry=VMM_VCPU_VSA_BASE_OFFSET, r13
+    ;;
+    ld8 entry=[entry]
+1:  mov hostret=ip
+    mov r25=in1         // copy arguments
+    mov r26=in2
+    mov r27=in3
+    mov psrsave=psr
+    ;;
+    tbit.nz p6,p0=psrsave,14    // IA64_PSR_I
+    tbit.nz p7,p0=psrsave,13    // IA64_PSR_IC
+    ;;
+    add hostret=2f-1b,hostret   // calculate return address
+    add entry=entry,in0
+    ;;
+    rsm psr.i | psr.ic
+    ;;
+    srlz.i
+    mov b6=entry
+    br.cond.sptk b6         // call the service
 2:
-// Architectural sequence for enabling interrupts if necessary
+    // Architectural sequence for enabling interrupts if necessary
 (p7)    ssm psr.ic
-	;;
+    ;;
 (p7)    srlz.i
-	;;
+    ;;
 //(p6)    ssm psr.i
-	;;
-	mov rp=rpsave
-	mov ar.pfs=pfssave
-	mov r8=r31
-	;;
-	srlz.d
-	br.ret.sptk rp
+    ;;
+    mov rp=rpsave
+    mov ar.pfs=pfssave
+    mov r8=r31
+    ;;
+    srlz.d
+    br.ret.sptk rp
 
 END(ia64_call_vsa)
 
 #define  INIT_BSPSTORE  ((4<<30)-(12<<20)-0x100)
 
 GLOBAL_ENTRY(vmm_reset_entry)
-	//set up ipsr, iip, vpd.vpsr, dcr
-	// For IPSR: it/dt/rt=1, i/ic=1, si=1, vm/bn=1
-	// For DCR: all bits 0
-	bsw.0
-	;;
-	mov r21 =r13
-	adds r14=-VMM_PT_REGS_SIZE, r12
-	;;
-	movl r6=0x501008826000      // IPSR dt/rt/it:1;i/ic:1, si:1, vm/bn:1
-	movl r10=0x8000000000000000
-	adds r16=PT(CR_IIP), r14
-	adds r20=PT(R1), r14
-	;;
-	rsm psr.ic | psr.i
-	;;
-	srlz.i
-	;;
-	mov ar.rsc = 0
-	;;
-	flushrs
-	;;
-	mov ar.bspstore = 0
-	// clear BSPSTORE
-	;;
-	mov cr.ipsr=r6
-	mov cr.ifs=r10
-	ld8 r4 = [r16] // Set init iip for first run.
-	ld8 r1 = [r20]
-	;;
-	mov cr.iip=r4
-	adds r16=VMM_VPD_BASE_OFFSET,r13
-	;;
-	ld8 r18=[r16]
-	;;
-	adds r19=VMM_VPD_VPSR_OFFSET,r18
-	;;
-	ld8 r19=[r19]
-	mov r17=r0
-	mov r22=r0
-	mov r23=r0
-	br.cond.sptk ia64_vmm_entry
-	br.ret.sptk  b0
+    //set up ipsr, iip, vpd.vpsr, dcr
+    // For IPSR: it/dt/rt=1, i/ic=1, si=1, vm/bn=1
+    // For DCR: all bits 0
+    bsw.0
+    ;;
+    mov r21 =r13
+    adds r14=-VMM_PT_REGS_SIZE, r12
+    ;;
+    movl r6=0x501008826000      // IPSR dt/rt/it:1;i/ic:1, si:1, vm/bn:1
+    movl r10=0x8000000000000000
+    adds r16=PT(CR_IIP), r14
+    adds r20=PT(R1), r14
+    ;;
+    rsm psr.ic | psr.i
+    ;;
+    srlz.i
+    ;;
+    mov ar.rsc = 0
+    ;;
+    flushrs
+    ;;
+    mov ar.bspstore = 0
+    // clear BSPSTORE
+    ;;
+    mov cr.ipsr=r6
+    mov cr.ifs=r10
+    ld8 r4 = [r16] // Set init iip for first run.
+    ld8 r1 = [r20]
+    ;;
+    mov cr.iip=r4
+    adds r16=VMM_VPD_BASE_OFFSET,r13
+    ;;
+    ld8 r18=[r16]
+    ;;
+    adds r19=VMM_VPD_VPSR_OFFSET,r18
+    ;;
+    ld8 r19=[r19]
+    mov r17=r0
+    mov r22=r0
+    mov r23=r0
+    br.cond.sptk ia64_vmm_entry
+    br.ret.sptk  b0
 END(vmm_reset_entry)
diff --git a/trunk/arch/ia64/kvm/vtlb.c b/trunk/arch/ia64/kvm/vtlb.c
index 6b6307a3bd55..e22b93361e08 100644
--- a/trunk/arch/ia64/kvm/vtlb.c
+++ b/trunk/arch/ia64/kvm/vtlb.c
@@ -183,8 +183,8 @@ void mark_pages_dirty(struct kvm_vcpu *v, u64 pte, u64 ps)
 	u64 i, dirty_pages = 1;
 	u64 base_gfn = (pte&_PAGE_PPN_MASK) >> PAGE_SHIFT;
 	spinlock_t *lock = __kvm_va(v->arch.dirty_log_lock_pa);
-	void *dirty_bitmap = (void *)KVM_MEM_DIRTY_LOG_BASE;
-
+	void *dirty_bitmap = (void *)v - (KVM_VCPU_OFS + v->vcpu_id * VCPU_SIZE)
+						+ KVM_MEM_DIRTY_LOG_OFS;
 	dirty_pages <<= ps <= PAGE_SHIFT ? 0 : ps - PAGE_SHIFT;
 
 	vmm_spin_lock(lock);
diff --git a/trunk/arch/ia64/sn/kernel/irq.c b/trunk/arch/ia64/sn/kernel/irq.c
index 66fd705e82c0..0c66dbdd1d72 100644
--- a/trunk/arch/ia64/sn/kernel/irq.c
+++ b/trunk/arch/ia64/sn/kernel/irq.c
@@ -227,14 +227,14 @@ struct sn_irq_info *sn_retarget_vector(struct sn_irq_info *sn_irq_info,
 	return new_irq_info;
 }
 
-static void sn_set_affinity_irq(unsigned int irq, const struct cpumask *mask)
+static void sn_set_affinity_irq(unsigned int irq, cpumask_t mask)
 {
 	struct sn_irq_info *sn_irq_info, *sn_irq_info_safe;
 	nasid_t nasid;
 	int slice;
 
-	nasid = cpuid_to_nasid(cpumask_first(mask));
-	slice = cpuid_to_slice(cpumask_first(mask));
+	nasid = cpuid_to_nasid(first_cpu(mask));
+	slice = cpuid_to_slice(first_cpu(mask));
 
 	list_for_each_entry_safe(sn_irq_info, sn_irq_info_safe,
 				 sn_irq_lh[irq], list)
diff --git a/trunk/arch/ia64/sn/kernel/msi_sn.c b/trunk/arch/ia64/sn/kernel/msi_sn.c
index ca553b0429ce..83f190ffe350 100644
--- a/trunk/arch/ia64/sn/kernel/msi_sn.c
+++ b/trunk/arch/ia64/sn/kernel/msi_sn.c
@@ -151,8 +151,7 @@ int sn_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *entry)
 }
 
 #ifdef CONFIG_SMP
-static void sn_set_msi_irq_affinity(unsigned int irq,
-				    const struct cpumask *cpu_mask)
+static void sn_set_msi_irq_affinity(unsigned int irq, cpumask_t cpu_mask)
 {
 	struct msi_msg msg;
 	int slice;
@@ -165,7 +164,7 @@ static void sn_set_msi_irq_affinity(unsigned int irq,
 	struct sn_pcibus_provider *provider;
 	unsigned int cpu;
 
-	cpu = cpumask_first(cpu_mask);
+	cpu = first_cpu(cpu_mask);
 	sn_irq_info = sn_msi_info[irq].sn_irq_info;
 	if (sn_irq_info == NULL || sn_irq_info->irq_int_bit >= 0)
 		return;
@@ -205,7 +204,7 @@ static void sn_set_msi_irq_affinity(unsigned int irq,
 	msg.address_lo = (u32)(bus_addr & 0x00000000ffffffff);
 
 	write_msi_msg(irq, &msg);
-	irq_desc[irq].affinity = *cpu_mask;
+	irq_desc[irq].affinity = cpu_mask;
 }
 #endif /* CONFIG_SMP */
 
diff --git a/trunk/arch/m32r/Kconfig b/trunk/arch/m32r/Kconfig
index cabba332cc48..29047d5c259a 100644
--- a/trunk/arch/m32r/Kconfig
+++ b/trunk/arch/m32r/Kconfig
@@ -10,7 +10,6 @@ config M32R
 	default y
 	select HAVE_IDE
 	select HAVE_OPROFILE
-	select INIT_ALL_POSSIBLE
 
 config SBUS
 	bool
diff --git a/trunk/arch/m32r/kernel/smpboot.c b/trunk/arch/m32r/kernel/smpboot.c
index 0f06b3722e96..39cb6da72dcb 100644
--- a/trunk/arch/m32r/kernel/smpboot.c
+++ b/trunk/arch/m32r/kernel/smpboot.c
@@ -73,11 +73,17 @@ static unsigned int bsp_phys_id = -1;
 /* Bitmask of physically existing CPUs */
 physid_mask_t phys_cpu_present_map;
 
+/* Bitmask of currently online CPUs */
+cpumask_t cpu_online_map;
+EXPORT_SYMBOL(cpu_online_map);
+
 cpumask_t cpu_bootout_map;
 cpumask_t cpu_bootin_map;
 static cpumask_t cpu_callin_map;
 cpumask_t cpu_callout_map;
 EXPORT_SYMBOL(cpu_callout_map);
+cpumask_t cpu_possible_map = CPU_MASK_ALL;
+EXPORT_SYMBOL(cpu_possible_map);
 
 /* Per CPU bogomips and other parameters */
 struct cpuinfo_m32r cpu_data[NR_CPUS] __cacheline_aligned;
diff --git a/trunk/arch/m68k/Kconfig b/trunk/arch/m68k/Kconfig
index 836fb66f080d..c825bde17cb3 100644
--- a/trunk/arch/m68k/Kconfig
+++ b/trunk/arch/m68k/Kconfig
@@ -280,7 +280,6 @@ config M68060
 
 config MMU_MOTOROLA
 	bool
-	depends on MMU && !MMU_SUN3
 
 config MMU_SUN3
 	bool
diff --git a/trunk/arch/m68knommu/platform/coldfire/pit.c b/trunk/arch/m68knommu/platform/coldfire/pit.c
index 2a12e7fa9748..c5b916700b22 100644
--- a/trunk/arch/m68knommu/platform/coldfire/pit.c
+++ b/trunk/arch/m68knommu/platform/coldfire/pit.c
@@ -156,7 +156,7 @@ void hw_timer_init(void)
 {
 	u32 imr;
 
-	cf_pit_clockevent.cpumask = cpumask_of(smp_processor_id());
+	cf_pit_clockevent.cpumask = cpumask_of_cpu(smp_processor_id());
 	cf_pit_clockevent.mult = div_sc(FREQ, NSEC_PER_SEC, 32);
 	cf_pit_clockevent.max_delta_ns =
 		clockevent_delta2ns(0xFFFF, &cf_pit_clockevent);
diff --git a/trunk/arch/mips/include/asm/irq.h b/trunk/arch/mips/include/asm/irq.h
index abc62aa744ac..a58f0eecc68f 100644
--- a/trunk/arch/mips/include/asm/irq.h
+++ b/trunk/arch/mips/include/asm/irq.h
@@ -49,8 +49,7 @@ static inline void smtc_im_ack_irq(unsigned int irq)
 #ifdef CONFIG_MIPS_MT_SMTC_IRQAFF
 #include <linux/cpumask.h>
 
-extern void plat_set_irq_affinity(unsigned int irq,
-				  const struct cpumask *affinity);
+extern void plat_set_irq_affinity(unsigned int irq, cpumask_t affinity);
 extern void smtc_forward_irq(unsigned int irq);
 
 /*
diff --git a/trunk/arch/mips/include/asm/mach-ip27/topology.h b/trunk/arch/mips/include/asm/mach-ip27/topology.h
index 1fb959f98982..7785bec732f2 100644
--- a/trunk/arch/mips/include/asm/mach-ip27/topology.h
+++ b/trunk/arch/mips/include/asm/mach-ip27/topology.h
@@ -37,6 +37,7 @@ extern unsigned char __node_distances[MAX_COMPACT_NODES][MAX_COMPACT_NODES];
 
 /* sched_domains SD_NODE_INIT for SGI IP27 machines */
 #define SD_NODE_INIT (struct sched_domain) {		\
+	.span			= CPU_MASK_NONE,	\
 	.parent			= NULL,			\
 	.child			= NULL,			\
 	.groups			= NULL,			\
diff --git a/trunk/arch/mips/include/asm/smp.h b/trunk/arch/mips/include/asm/smp.h
index 86557b5d1b3f..0ff5b523ea77 100644
--- a/trunk/arch/mips/include/asm/smp.h
+++ b/trunk/arch/mips/include/asm/smp.h
@@ -38,6 +38,9 @@ extern int __cpu_logical_map[NR_CPUS];
 #define SMP_RESCHEDULE_YOURSELF	0x1	/* XXX braindead */
 #define SMP_CALL_FUNCTION	0x2
 
+extern cpumask_t phys_cpu_present_map;
+#define cpu_possible_map	phys_cpu_present_map
+
 extern void asmlinkage smp_bootstrap(void);
 
 /*
diff --git a/trunk/arch/mips/jazz/irq.c b/trunk/arch/mips/jazz/irq.c
index 03965cb1b252..d7f8a782aae4 100644
--- a/trunk/arch/mips/jazz/irq.c
+++ b/trunk/arch/mips/jazz/irq.c
@@ -146,7 +146,7 @@ void __init plat_time_init(void)
 
 	BUG_ON(HZ != 100);
 
-	cd->cpumask             = cpumask_of(cpu);
+	cd->cpumask             = cpumask_of_cpu(cpu);
 	clockevents_register_device(cd);
 	action->dev_id = cd;
 	setup_irq(JAZZ_TIMER_IRQ, action);
diff --git a/trunk/arch/mips/kernel/cevt-bcm1480.c b/trunk/arch/mips/kernel/cevt-bcm1480.c
index b820661678b0..0a57f86945f1 100644
--- a/trunk/arch/mips/kernel/cevt-bcm1480.c
+++ b/trunk/arch/mips/kernel/cevt-bcm1480.c
@@ -126,7 +126,7 @@ void __cpuinit sb1480_clockevent_init(void)
 	cd->min_delta_ns	= clockevent_delta2ns(2, cd);
 	cd->rating		= 200;
 	cd->irq			= irq;
-	cd->cpumask		= cpumask_of(cpu);
+	cd->cpumask		= cpumask_of_cpu(cpu);
 	cd->set_next_event	= sibyte_next_event;
 	cd->set_mode		= sibyte_set_mode;
 	clockevents_register_device(cd);
@@ -148,6 +148,6 @@ void __cpuinit sb1480_clockevent_init(void)
 	action->name	= name;
 	action->dev_id	= cd;
 
-	irq_set_affinity(irq, cpumask_of(cpu));
+	irq_set_affinity(irq, cpumask_of_cpu(cpu));
 	setup_irq(irq, action);
 }
diff --git a/trunk/arch/mips/kernel/cevt-ds1287.c b/trunk/arch/mips/kernel/cevt-ds1287.c
index 1ada45ea0700..df4acb68bfb5 100644
--- a/trunk/arch/mips/kernel/cevt-ds1287.c
+++ b/trunk/arch/mips/kernel/cevt-ds1287.c
@@ -88,6 +88,7 @@ static void ds1287_event_handler(struct clock_event_device *dev)
 static struct clock_event_device ds1287_clockevent = {
 	.name		= "ds1287",
 	.features	= CLOCK_EVT_FEAT_PERIODIC,
+	.cpumask	= CPU_MASK_CPU0,
 	.set_next_event	= ds1287_set_next_event,
 	.set_mode	= ds1287_set_mode,
 	.event_handler	= ds1287_event_handler,
@@ -121,7 +122,6 @@ int __init ds1287_clockevent_init(int irq)
 	clockevent_set_clock(cd, 32768);
 	cd->max_delta_ns = clockevent_delta2ns(0x7fffffff, cd);
 	cd->min_delta_ns = clockevent_delta2ns(0x300, cd);
-	cd->cpumask = cpumask_of(0);
 
 	clockevents_register_device(&ds1287_clockevent);
 
diff --git a/trunk/arch/mips/kernel/cevt-gt641xx.c b/trunk/arch/mips/kernel/cevt-gt641xx.c
index e9b787feedcb..6e2f58520afb 100644
--- a/trunk/arch/mips/kernel/cevt-gt641xx.c
+++ b/trunk/arch/mips/kernel/cevt-gt641xx.c
@@ -96,6 +96,7 @@ static void gt641xx_timer0_event_handler(struct clock_event_device *dev)
 static struct clock_event_device gt641xx_timer0_clockevent = {
 	.name		= "gt641xx-timer0",
 	.features	= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
+	.cpumask	= CPU_MASK_CPU0,
 	.irq		= GT641XX_TIMER0_IRQ,
 	.set_next_event	= gt641xx_timer0_set_next_event,
 	.set_mode	= gt641xx_timer0_set_mode,
@@ -131,7 +132,6 @@ static int __init gt641xx_timer0_clockevent_init(void)
 	clockevent_set_clock(cd, gt641xx_base_clock);
 	cd->max_delta_ns = clockevent_delta2ns(0x7fffffff, cd);
 	cd->min_delta_ns = clockevent_delta2ns(0x300, cd);
-	cd->cpumask = cpumask_of(0);
 
 	clockevents_register_device(&gt641xx_timer0_clockevent);
 
diff --git a/trunk/arch/mips/kernel/cevt-r4k.c b/trunk/arch/mips/kernel/cevt-r4k.c
index e1ec83b68031..4a4c59f2737a 100644
--- a/trunk/arch/mips/kernel/cevt-r4k.c
+++ b/trunk/arch/mips/kernel/cevt-r4k.c
@@ -195,7 +195,7 @@ int __cpuinit mips_clockevent_init(void)
 
 	cd->rating		= 300;
 	cd->irq			= irq;
-	cd->cpumask		= cpumask_of(cpu);
+	cd->cpumask		= cpumask_of_cpu(cpu);
 	cd->set_next_event	= mips_next_event;
 	cd->set_mode		= mips_set_clock_mode;
 	cd->event_handler	= mips_event_handler;
diff --git a/trunk/arch/mips/kernel/cevt-sb1250.c b/trunk/arch/mips/kernel/cevt-sb1250.c
index a2eebaafda52..63ac3ad462bc 100644
--- a/trunk/arch/mips/kernel/cevt-sb1250.c
+++ b/trunk/arch/mips/kernel/cevt-sb1250.c
@@ -125,7 +125,7 @@ void __cpuinit sb1250_clockevent_init(void)
 	cd->min_delta_ns	= clockevent_delta2ns(2, cd);
 	cd->rating		= 200;
 	cd->irq			= irq;
-	cd->cpumask		= cpumask_of(cpu);
+	cd->cpumask		= cpumask_of_cpu(cpu);
 	cd->set_next_event	= sibyte_next_event;
 	cd->set_mode		= sibyte_set_mode;
 	clockevents_register_device(cd);
@@ -147,6 +147,6 @@ void __cpuinit sb1250_clockevent_init(void)
 	action->name	= name;
 	action->dev_id	= cd;
 
-	irq_set_affinity(irq, cpumask_of(cpu));
+	irq_set_affinity(irq, cpumask_of_cpu(cpu));
 	setup_irq(irq, action);
 }
diff --git a/trunk/arch/mips/kernel/cevt-smtc.c b/trunk/arch/mips/kernel/cevt-smtc.c
index 6d45e24db5bf..5162fe4b5952 100644
--- a/trunk/arch/mips/kernel/cevt-smtc.c
+++ b/trunk/arch/mips/kernel/cevt-smtc.c
@@ -292,7 +292,7 @@ int __cpuinit mips_clockevent_init(void)
 
 	cd->rating		= 300;
 	cd->irq			= irq;
-	cd->cpumask		= cpumask_of(cpu);
+	cd->cpumask		= cpumask_of_cpu(cpu);
 	cd->set_next_event	= mips_next_event;
 	cd->set_mode		= mips_set_clock_mode;
 	cd->event_handler	= mips_event_handler;
diff --git a/trunk/arch/mips/kernel/cevt-txx9.c b/trunk/arch/mips/kernel/cevt-txx9.c
index eccf7d6096bd..b5fc4eb412d2 100644
--- a/trunk/arch/mips/kernel/cevt-txx9.c
+++ b/trunk/arch/mips/kernel/cevt-txx9.c
@@ -112,6 +112,7 @@ static struct clock_event_device txx9tmr_clock_event_device = {
 	.name		= "TXx9",
 	.features	= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
 	.rating		= 200,
+	.cpumask	= CPU_MASK_CPU0,
 	.set_mode	= txx9tmr_set_mode,
 	.set_next_event	= txx9tmr_set_next_event,
 };
@@ -149,7 +150,6 @@ void __init txx9_clockevent_init(unsigned long baseaddr, int irq,
 		clockevent_delta2ns(0xffffffff >> (32 - TXX9_TIMER_BITS), cd);
 	cd->min_delta_ns = clockevent_delta2ns(0xf, cd);
 	cd->irq = irq;
-	cd->cpumask = cpumask_of(0),
 	clockevents_register_device(cd);
 	setup_irq(irq, &txx9tmr_irq);
 	printk(KERN_INFO "TXx9: clockevent device at 0x%lx, irq %d\n",
diff --git a/trunk/arch/mips/kernel/i8253.c b/trunk/arch/mips/kernel/i8253.c
index f4d187825f96..b6ac55162b9a 100644
--- a/trunk/arch/mips/kernel/i8253.c
+++ b/trunk/arch/mips/kernel/i8253.c
@@ -115,7 +115,7 @@ void __init setup_pit_timer(void)
 	 * Start pit with the boot cpu mask and make it global after the
 	 * IO_APIC has been initialized.
 	 */
-	cd->cpumask = cpumask_of(cpu);
+	cd->cpumask = cpumask_of_cpu(cpu);
 	clockevent_set_clock(cd, CLOCK_TICK_RATE);
 	cd->max_delta_ns = clockevent_delta2ns(0x7FFF, cd);
 	cd->min_delta_ns = clockevent_delta2ns(0xF, cd);
diff --git a/trunk/arch/mips/kernel/irq-gic.c b/trunk/arch/mips/kernel/irq-gic.c
index 494a49a317e9..f0a4bb19e096 100644
--- a/trunk/arch/mips/kernel/irq-gic.c
+++ b/trunk/arch/mips/kernel/irq-gic.c
@@ -155,7 +155,7 @@ static void gic_unmask_irq(unsigned int irq)
 
 static DEFINE_SPINLOCK(gic_lock);
 
-static void gic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
+static void gic_set_affinity(unsigned int irq, cpumask_t cpumask)
 {
 	cpumask_t	tmp = CPU_MASK_NONE;
 	unsigned long	flags;
@@ -164,7 +164,7 @@ static void gic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
 	pr_debug(KERN_DEBUG "%s called\n", __func__);
 	irq -= _irqbase;
 
-	cpumask_and(&tmp, cpumask, cpu_online_mask);
+	cpus_and(tmp, cpumask, cpu_online_map);
 	if (cpus_empty(tmp))
 		return;
 
@@ -187,7 +187,7 @@ static void gic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
 		set_bit(irq, pcpu_masks[first_cpu(tmp)].pcpu_mask);
 
 	}
-	irq_desc[irq].affinity = *cpumask;
+	irq_desc[irq].affinity = cpumask;
 	spin_unlock_irqrestore(&gic_lock, flags);
 
 }
diff --git a/trunk/arch/mips/kernel/smp-cmp.c b/trunk/arch/mips/kernel/smp-cmp.c
index f27beca4b26d..ca476c4f62a5 100644
--- a/trunk/arch/mips/kernel/smp-cmp.c
+++ b/trunk/arch/mips/kernel/smp-cmp.c
@@ -51,10 +51,10 @@ static int __init allowcpus(char *str)
 	int len;
 
 	cpus_clear(cpu_allow_map);
-	if (cpulist_parse(str, &cpu_allow_map) == 0) {
+	if (cpulist_parse(str, cpu_allow_map) == 0) {
 		cpu_set(0, cpu_allow_map);
 		cpus_and(cpu_possible_map, cpu_possible_map, cpu_allow_map);
-		len = cpulist_scnprintf(buf, sizeof(buf)-1, &cpu_possible_map);
+		len = cpulist_scnprintf(buf, sizeof(buf)-1, cpu_possible_map);
 		buf[len] = '\0';
 		pr_debug("Allowable CPUs: %s\n", buf);
 		return 1;
@@ -226,7 +226,7 @@ void __init cmp_smp_setup(void)
 
 	for (i = 1; i < NR_CPUS; i++) {
 		if (amon_cpu_avail(i)) {
-			cpu_set(i, cpu_possible_map);
+			cpu_set(i, phys_cpu_present_map);
 			__cpu_number_map[i]	= ++ncpu;
 			__cpu_logical_map[ncpu]	= i;
 		}
diff --git a/trunk/arch/mips/kernel/smp-mt.c b/trunk/arch/mips/kernel/smp-mt.c
index 6f7ee5ac46ee..87a1816c1f45 100644
--- a/trunk/arch/mips/kernel/smp-mt.c
+++ b/trunk/arch/mips/kernel/smp-mt.c
@@ -70,7 +70,7 @@ static unsigned int __init smvp_vpe_init(unsigned int tc, unsigned int mvpconf0,
 		write_vpe_c0_vpeconf0(tmp);
 
 		/* Record this as available CPU */
-		cpu_set(tc, cpu_possible_map);
+		cpu_set(tc, phys_cpu_present_map);
 		__cpu_number_map[tc]	= ++ncpu;
 		__cpu_logical_map[ncpu]	= tc;
 	}
diff --git a/trunk/arch/mips/kernel/smp.c b/trunk/arch/mips/kernel/smp.c
index 3da94704f816..8bf88faf5afd 100644
--- a/trunk/arch/mips/kernel/smp.c
+++ b/trunk/arch/mips/kernel/smp.c
@@ -44,10 +44,15 @@
 #include <asm/mipsmtregs.h>
 #endif /* CONFIG_MIPS_MT_SMTC */
 
+cpumask_t phys_cpu_present_map;		/* Bitmask of available CPUs */
 volatile cpumask_t cpu_callin_map;	/* Bitmask of started secondaries */
+cpumask_t cpu_online_map;		/* Bitmask of currently online CPUs */
 int __cpu_number_map[NR_CPUS];		/* Map physical to logical */
 int __cpu_logical_map[NR_CPUS];		/* Map logical to physical */
 
+EXPORT_SYMBOL(phys_cpu_present_map);
+EXPORT_SYMBOL(cpu_online_map);
+
 extern void cpu_idle(void);
 
 /* Number of TCs (or siblings in Intel speak) per CPU core */
@@ -190,7 +195,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 /* preload SMP state for boot cpu */
 void __devinit smp_prepare_boot_cpu(void)
 {
-	cpu_set(0, cpu_possible_map);
+	cpu_set(0, phys_cpu_present_map);
 	cpu_set(0, cpu_online_map);
 	cpu_set(0, cpu_callin_map);
 }
diff --git a/trunk/arch/mips/kernel/smtc.c b/trunk/arch/mips/kernel/smtc.c
index b6cca01ff82b..897fb2b4751c 100644
--- a/trunk/arch/mips/kernel/smtc.c
+++ b/trunk/arch/mips/kernel/smtc.c
@@ -290,7 +290,7 @@ static void smtc_configure_tlb(void)
  * possibly leave some TCs/VPEs as "slave" processors.
  *
  * Use c0_MVPConf0 to find out how many TCs are available, setting up
- * cpu_possible_map and the logical/physical mappings.
+ * phys_cpu_present_map and the logical/physical mappings.
  */
 
 int __init smtc_build_cpu_map(int start_cpu_slot)
@@ -304,7 +304,7 @@ int __init smtc_build_cpu_map(int start_cpu_slot)
 	 */
 	ntcs = ((read_c0_mvpconf0() & MVPCONF0_PTC) >> MVPCONF0_PTC_SHIFT) + 1;
 	for (i=start_cpu_slot; i<NR_CPUS && i<ntcs; i++) {
-		cpu_set(i, cpu_possible_map);
+		cpu_set(i, phys_cpu_present_map);
 		__cpu_number_map[i] = i;
 		__cpu_logical_map[i] = i;
 	}
@@ -521,7 +521,7 @@ void smtc_prepare_cpus(int cpus)
 	 * Pull any physically present but unused TCs out of circulation.
 	 */
 	while (tc < (((val & MVPCONF0_PTC) >> MVPCONF0_PTC_SHIFT) + 1)) {
-		cpu_clear(tc, cpu_possible_map);
+		cpu_clear(tc, phys_cpu_present_map);
 		cpu_clear(tc, cpu_present_map);
 		tc++;
 	}
diff --git a/trunk/arch/mips/mti-malta/malta-smtc.c b/trunk/arch/mips/mti-malta/malta-smtc.c
index aabd7274507b..f84a46a8ae6e 100644
--- a/trunk/arch/mips/mti-malta/malta-smtc.c
+++ b/trunk/arch/mips/mti-malta/malta-smtc.c
@@ -114,9 +114,9 @@ struct plat_smp_ops msmtc_smp_ops = {
  */
 
 
-void plat_set_irq_affinity(unsigned int irq, const struct cpumask *affinity)
+void plat_set_irq_affinity(unsigned int irq, cpumask_t affinity)
 {
-	cpumask_t tmask = *affinity;
+	cpumask_t tmask = affinity;
 	int cpu = 0;
 	void smtc_set_irq_affinity(unsigned int irq, cpumask_t aff);
 
@@ -139,7 +139,7 @@ void plat_set_irq_affinity(unsigned int irq, const struct cpumask *affinity)
 	 * be made to forward to an offline "CPU".
 	 */
 
-	for_each_cpu(cpu, affinity) {
+	for_each_cpu_mask(cpu, affinity) {
 		if ((cpu_data[cpu].vpe_id != 0) || !cpu_online(cpu))
 			cpu_clear(cpu, tmask);
 	}
diff --git a/trunk/arch/mips/nxp/pnx8550/common/time.c b/trunk/arch/mips/nxp/pnx8550/common/time.c
index cf293b279098..62f495b57f93 100644
--- a/trunk/arch/mips/nxp/pnx8550/common/time.c
+++ b/trunk/arch/mips/nxp/pnx8550/common/time.c
@@ -102,7 +102,6 @@ __init void plat_time_init(void)
 	unsigned int p;
 	unsigned int pow2p;
 
-	pnx8xxx_clockevent.cpumask = cpu_none_mask;
 	clockevents_register_device(&pnx8xxx_clockevent);
 	clocksource_register(&pnx_clocksource);
 
diff --git a/trunk/arch/mips/pmc-sierra/yosemite/smp.c b/trunk/arch/mips/pmc-sierra/yosemite/smp.c
index f78c29b68d77..3a7df647ca77 100644
--- a/trunk/arch/mips/pmc-sierra/yosemite/smp.c
+++ b/trunk/arch/mips/pmc-sierra/yosemite/smp.c
@@ -141,7 +141,7 @@ static void __cpuinit yos_boot_secondary(int cpu, struct task_struct *idle)
 }
 
 /*
- * Detect available CPUs, populate cpu_possible_map before smp_init
+ * Detect available CPUs, populate phys_cpu_present_map before smp_init
  *
  * We don't want to start the secondary CPU yet nor do we have a nice probing
  * feature in PMON so we just assume presence of the secondary core.
@@ -150,10 +150,10 @@ static void __init yos_smp_setup(void)
 {
 	int i;
 
-	cpus_clear(cpu_possible_map);
+	cpus_clear(phys_cpu_present_map);
 
 	for (i = 0; i < 2; i++) {
-		cpu_set(i, cpu_possible_map);
+		cpu_set(i, phys_cpu_present_map);
 		__cpu_number_map[i]	= i;
 		__cpu_logical_map[i]	= i;
 	}
diff --git a/trunk/arch/mips/sgi-ip27/ip27-smp.c b/trunk/arch/mips/sgi-ip27/ip27-smp.c
index 5b47d6b65275..ba5cdebeaf0d 100644
--- a/trunk/arch/mips/sgi-ip27/ip27-smp.c
+++ b/trunk/arch/mips/sgi-ip27/ip27-smp.c
@@ -76,7 +76,7 @@ static int do_cpumask(cnodeid_t cnode, nasid_t nasid, int highest)
 			/* Only let it join in if it's marked enabled */
 			if ((acpu->cpu_info.flags & KLINFO_ENABLE) &&
 			    (tot_cpus_found != NR_CPUS)) {
-				cpu_set(cpuid, cpu_possible_map);
+				cpu_set(cpuid, phys_cpu_present_map);
 				alloc_cpupda(cpuid, tot_cpus_found);
 				cpus_found++;
 				tot_cpus_found++;
diff --git a/trunk/arch/mips/sgi-ip27/ip27-timer.c b/trunk/arch/mips/sgi-ip27/ip27-timer.c
index f024057a35f8..1327c2746fb7 100644
--- a/trunk/arch/mips/sgi-ip27/ip27-timer.c
+++ b/trunk/arch/mips/sgi-ip27/ip27-timer.c
@@ -134,7 +134,7 @@ void __cpuinit hub_rt_clock_event_init(void)
 	cd->min_delta_ns        = clockevent_delta2ns(0x300, cd);
 	cd->rating		= 200;
 	cd->irq			= irq;
-	cd->cpumask		= cpumask_of(cpu);
+	cd->cpumask		= cpumask_of_cpu(cpu);
 	cd->set_next_event	= rt_next_event;
 	cd->set_mode		= rt_set_mode;
 	clockevents_register_device(cd);
diff --git a/trunk/arch/mips/sibyte/bcm1480/irq.c b/trunk/arch/mips/sibyte/bcm1480/irq.c
index 12b465d404df..a35818ed4263 100644
--- a/trunk/arch/mips/sibyte/bcm1480/irq.c
+++ b/trunk/arch/mips/sibyte/bcm1480/irq.c
@@ -50,7 +50,7 @@ static void enable_bcm1480_irq(unsigned int irq);
 static void disable_bcm1480_irq(unsigned int irq);
 static void ack_bcm1480_irq(unsigned int irq);
 #ifdef CONFIG_SMP
-static void bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask);
+static void bcm1480_set_affinity(unsigned int irq, cpumask_t mask);
 #endif
 
 #ifdef CONFIG_PCI
@@ -109,7 +109,7 @@ void bcm1480_unmask_irq(int cpu, int irq)
 }
 
 #ifdef CONFIG_SMP
-static void bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask)
+static void bcm1480_set_affinity(unsigned int irq, cpumask_t mask)
 {
 	int i = 0, old_cpu, cpu, int_on, k;
 	u64 cur_ints;
@@ -117,11 +117,11 @@ static void bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask)
 	unsigned long flags;
 	unsigned int irq_dirty;
 
-	if (cpumask_weight(mask) != 1) {
+	if (cpus_weight(mask) != 1) {
 		printk("attempted to set irq affinity for irq %d to multiple CPUs\n", irq);
 		return;
 	}
-	i = cpumask_first(mask);
+	i = first_cpu(mask);
 
 	/* Convert logical CPU to physical CPU */
 	cpu = cpu_logical_map(i);
diff --git a/trunk/arch/mips/sibyte/bcm1480/smp.c b/trunk/arch/mips/sibyte/bcm1480/smp.c
index dddfda8e8294..bd9eeb43ed0e 100644
--- a/trunk/arch/mips/sibyte/bcm1480/smp.c
+++ b/trunk/arch/mips/sibyte/bcm1480/smp.c
@@ -136,7 +136,7 @@ static void __cpuinit bcm1480_boot_secondary(int cpu, struct task_struct *idle)
 
 /*
  * Use CFE to find out how many CPUs are available, setting up
- * cpu_possible_map and the logical/physical mappings.
+ * phys_cpu_present_map and the logical/physical mappings.
  * XXXKW will the boot CPU ever not be physical 0?
  *
  * Common setup before any secondaries are started
@@ -145,14 +145,14 @@ static void __init bcm1480_smp_setup(void)
 {
 	int i, num;
 
-	cpus_clear(cpu_possible_map);
-	cpu_set(0, cpu_possible_map);
+	cpus_clear(phys_cpu_present_map);
+	cpu_set(0, phys_cpu_present_map);
 	__cpu_number_map[0] = 0;
 	__cpu_logical_map[0] = 0;
 
 	for (i = 1, num = 0; i < NR_CPUS; i++) {
 		if (cfe_cpu_stop(i) == 0) {
-			cpu_set(i, cpu_possible_map);
+			cpu_set(i, phys_cpu_present_map);
 			__cpu_number_map[i] = ++num;
 			__cpu_logical_map[num] = i;
 		}
diff --git a/trunk/arch/mips/sibyte/sb1250/irq.c b/trunk/arch/mips/sibyte/sb1250/irq.c
index 808ac2959b8c..a5158483986e 100644
--- a/trunk/arch/mips/sibyte/sb1250/irq.c
+++ b/trunk/arch/mips/sibyte/sb1250/irq.c
@@ -50,7 +50,7 @@ static void enable_sb1250_irq(unsigned int irq);
 static void disable_sb1250_irq(unsigned int irq);
 static void ack_sb1250_irq(unsigned int irq);
 #ifdef CONFIG_SMP
-static void sb1250_set_affinity(unsigned int irq, const struct cpumask *mask);
+static void sb1250_set_affinity(unsigned int irq, cpumask_t mask);
 #endif
 
 #ifdef CONFIG_SIBYTE_HAS_LDT
@@ -103,16 +103,16 @@ void sb1250_unmask_irq(int cpu, int irq)
 }
 
 #ifdef CONFIG_SMP
-static void sb1250_set_affinity(unsigned int irq, const struct cpumask *mask)
+static void sb1250_set_affinity(unsigned int irq, cpumask_t mask)
 {
 	int i = 0, old_cpu, cpu, int_on;
 	u64 cur_ints;
 	struct irq_desc *desc = irq_desc + irq;
 	unsigned long flags;
 
-	i = cpumask_first(mask);
+	i = first_cpu(mask);
 
-	if (cpumask_weight(mask) > 1) {
+	if (cpus_weight(mask) > 1) {
 		printk("attempted to set irq affinity for irq %d to multiple CPUs\n", irq);
 		return;
 	}
diff --git a/trunk/arch/mips/sibyte/sb1250/smp.c b/trunk/arch/mips/sibyte/sb1250/smp.c
index 5950a288a7da..0734b933e969 100644
--- a/trunk/arch/mips/sibyte/sb1250/smp.c
+++ b/trunk/arch/mips/sibyte/sb1250/smp.c
@@ -124,7 +124,7 @@ static void __cpuinit sb1250_boot_secondary(int cpu, struct task_struct *idle)
 
 /*
  * Use CFE to find out how many CPUs are available, setting up
- * cpu_possible_map and the logical/physical mappings.
+ * phys_cpu_present_map and the logical/physical mappings.
  * XXXKW will the boot CPU ever not be physical 0?
  *
  * Common setup before any secondaries are started
@@ -133,14 +133,14 @@ static void __init sb1250_smp_setup(void)
 {
 	int i, num;
 
-	cpus_clear(cpu_possible_map);
-	cpu_set(0, cpu_possible_map);
+	cpus_clear(phys_cpu_present_map);
+	cpu_set(0, phys_cpu_present_map);
 	__cpu_number_map[0] = 0;
 	__cpu_logical_map[0] = 0;
 
 	for (i = 1, num = 0; i < NR_CPUS; i++) {
 		if (cfe_cpu_stop(i) == 0) {
-			cpu_set(i, cpu_possible_map);
+			cpu_set(i, phys_cpu_present_map);
 			__cpu_number_map[i] = ++num;
 			__cpu_logical_map[num] = i;
 		}
diff --git a/trunk/arch/mips/sni/time.c b/trunk/arch/mips/sni/time.c
index 69f5f88711cc..796e3ce28720 100644
--- a/trunk/arch/mips/sni/time.c
+++ b/trunk/arch/mips/sni/time.c
@@ -80,7 +80,7 @@ static void __init sni_a20r_timer_setup(void)
 	struct irqaction *action = &a20r_irqaction;
 	unsigned int cpu = smp_processor_id();
 
-	cd->cpumask             = cpumask_of(cpu);
+	cd->cpumask             = cpumask_of_cpu(cpu);
 	clockevents_register_device(cd);
 	action->dev_id = cd;
 	setup_irq(SNI_A20R_IRQ_TIMER, &a20r_irqaction);
diff --git a/trunk/arch/parisc/Kconfig b/trunk/arch/parisc/Kconfig
index aacf11d33723..644a70b1b04e 100644
--- a/trunk/arch/parisc/Kconfig
+++ b/trunk/arch/parisc/Kconfig
@@ -11,7 +11,6 @@ config PARISC
 	select HAVE_OPROFILE
 	select RTC_CLASS
 	select RTC_DRV_PARISC
-	select INIT_ALL_POSSIBLE
 	help
 	  The PA-RISC microprocessor is designed by Hewlett-Packard and used
 	  in many of their workstations & servers (HP9000 700 and 800 series,
diff --git a/trunk/arch/parisc/kernel/irq.c b/trunk/arch/parisc/kernel/irq.c
index 4cea935e2f99..23ef950df008 100644
--- a/trunk/arch/parisc/kernel/irq.c
+++ b/trunk/arch/parisc/kernel/irq.c
@@ -131,12 +131,12 @@ int cpu_check_affinity(unsigned int irq, cpumask_t *dest)
 	return 0;
 }
 
-static void cpu_set_affinity_irq(unsigned int irq, const struct cpumask *dest)
+static void cpu_set_affinity_irq(unsigned int irq, cpumask_t dest)
 {
-	if (cpu_check_affinity(irq, dest))
+	if (cpu_check_affinity(irq, &dest))
 		return;
 
-	irq_desc[irq].affinity = *dest;
+	irq_desc[irq].affinity = dest;
 }
 #endif
 
diff --git a/trunk/arch/parisc/kernel/smp.c b/trunk/arch/parisc/kernel/smp.c
index 80bc000523fa..d47f3975c9c6 100644
--- a/trunk/arch/parisc/kernel/smp.c
+++ b/trunk/arch/parisc/kernel/smp.c
@@ -67,6 +67,21 @@ static volatile int cpu_now_booting __read_mostly = 0;	/* track which CPU is boo
 
 static int parisc_max_cpus __read_mostly = 1;
 
+/* online cpus are ones that we've managed to bring up completely
+ * possible cpus are all valid cpu 
+ * present cpus are all detected cpu
+ *
+ * On startup we bring up the "possible" cpus. Since we discover
+ * CPUs later, we add them as hotplug, so the possible cpu mask is
+ * empty in the beginning.
+ */
+
+cpumask_t cpu_online_map   __read_mostly = CPU_MASK_NONE;	/* Bitmap of online CPUs */
+cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL;	/* Bitmap of Present CPUs */
+
+EXPORT_SYMBOL(cpu_online_map);
+EXPORT_SYMBOL(cpu_possible_map);
+
 DEFINE_PER_CPU(spinlock_t, ipi_lock) = SPIN_LOCK_UNLOCKED;
 
 enum ipi_message_type {
diff --git a/trunk/arch/powerpc/include/asm/disassemble.h b/trunk/arch/powerpc/include/asm/disassemble.h
deleted file mode 100644
index 9b198d1b3b2b..000000000000
--- a/trunk/arch/powerpc/include/asm/disassemble.h
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- *
- * Copyright IBM Corp. 2008
- *
- * Authors: Hollis Blanchard <hollisb@us.ibm.com>
- */
-
-#ifndef __ASM_PPC_DISASSEMBLE_H__
-#define __ASM_PPC_DISASSEMBLE_H__
-
-#include <linux/types.h>
-
-static inline unsigned int get_op(u32 inst)
-{
-	return inst >> 26;
-}
-
-static inline unsigned int get_xop(u32 inst)
-{
-	return (inst >> 1) & 0x3ff;
-}
-
-static inline unsigned int get_sprn(u32 inst)
-{
-	return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
-}
-
-static inline unsigned int get_dcrn(u32 inst)
-{
-	return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
-}
-
-static inline unsigned int get_rt(u32 inst)
-{
-	return (inst >> 21) & 0x1f;
-}
-
-static inline unsigned int get_rs(u32 inst)
-{
-	return (inst >> 21) & 0x1f;
-}
-
-static inline unsigned int get_ra(u32 inst)
-{
-	return (inst >> 16) & 0x1f;
-}
-
-static inline unsigned int get_rb(u32 inst)
-{
-	return (inst >> 11) & 0x1f;
-}
-
-static inline unsigned int get_rc(u32 inst)
-{
-	return inst & 0x1;
-}
-
-static inline unsigned int get_ws(u32 inst)
-{
-	return (inst >> 11) & 0x1f;
-}
-
-static inline unsigned int get_d(u32 inst)
-{
-	return inst & 0xffff;
-}
-
-#endif /* __ASM_PPC_DISASSEMBLE_H__ */
diff --git a/trunk/arch/powerpc/include/asm/kvm_44x.h b/trunk/arch/powerpc/include/asm/kvm_44x.h
deleted file mode 100644
index f49031b632ca..000000000000
--- a/trunk/arch/powerpc/include/asm/kvm_44x.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- *
- * Copyright IBM Corp. 2008
- *
- * Authors: Hollis Blanchard <hollisb@us.ibm.com>
- */
-
-#ifndef __ASM_44X_H__
-#define __ASM_44X_H__
-
-#include <linux/kvm_host.h>
-
-#define PPC44x_TLB_SIZE 64
-
-/* If the guest is expecting it, this can be as large as we like; we'd just
- * need to find some way of advertising it. */
-#define KVM44x_GUEST_TLB_SIZE 64
-
-struct kvmppc_44x_shadow_ref {
-	struct page *page;
-	u16 gtlb_index;
-	u8 writeable;
-	u8 tid;
-};
-
-struct kvmppc_vcpu_44x {
-	/* Unmodified copy of the guest's TLB. */
-	struct kvmppc_44x_tlbe guest_tlb[KVM44x_GUEST_TLB_SIZE];
-
-	/* References to guest pages in the hardware TLB. */
-	struct kvmppc_44x_shadow_ref shadow_refs[PPC44x_TLB_SIZE];
-
-	/* State of the shadow TLB at guest context switch time. */
-	struct kvmppc_44x_tlbe shadow_tlb[PPC44x_TLB_SIZE];
-	u8 shadow_tlb_mod[PPC44x_TLB_SIZE];
-
-	struct kvm_vcpu vcpu;
-};
-
-static inline struct kvmppc_vcpu_44x *to_44x(struct kvm_vcpu *vcpu)
-{
-	return container_of(vcpu, struct kvmppc_vcpu_44x, vcpu);
-}
-
-void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid);
-void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu);
-void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu);
-
-#endif /* __ASM_44X_H__ */
diff --git a/trunk/arch/powerpc/include/asm/kvm_host.h b/trunk/arch/powerpc/include/asm/kvm_host.h
index c1e436fe7738..34b52b7180cd 100644
--- a/trunk/arch/powerpc/include/asm/kvm_host.h
+++ b/trunk/arch/powerpc/include/asm/kvm_host.h
@@ -64,58 +64,27 @@ struct kvm_vcpu_stat {
 	u32 halt_wakeup;
 };
 
-struct kvmppc_44x_tlbe {
+struct tlbe {
 	u32 tid; /* Only the low 8 bits are used. */
 	u32 word0;
 	u32 word1;
 	u32 word2;
 };
 
-enum kvm_exit_types {
-	MMIO_EXITS,
-	DCR_EXITS,
-	SIGNAL_EXITS,
-	ITLB_REAL_MISS_EXITS,
-	ITLB_VIRT_MISS_EXITS,
-	DTLB_REAL_MISS_EXITS,
-	DTLB_VIRT_MISS_EXITS,
-	SYSCALL_EXITS,
-	ISI_EXITS,
-	DSI_EXITS,
-	EMULATED_INST_EXITS,
-	EMULATED_MTMSRWE_EXITS,
-	EMULATED_WRTEE_EXITS,
-	EMULATED_MTSPR_EXITS,
-	EMULATED_MFSPR_EXITS,
-	EMULATED_MTMSR_EXITS,
-	EMULATED_MFMSR_EXITS,
-	EMULATED_TLBSX_EXITS,
-	EMULATED_TLBWE_EXITS,
-	EMULATED_RFI_EXITS,
-	DEC_EXITS,
-	EXT_INTR_EXITS,
-	HALT_WAKEUP,
-	USR_PR_INST,
-	FP_UNAVAIL,
-	DEBUG_EXITS,
-	TIMEINGUEST,
-	__NUMBER_OF_KVM_EXIT_TYPES
-};
-
-/* allow access to big endian 32bit upper/lower parts and 64bit var */
-struct kvmppc_exit_timing {
-	union {
-		u64 tv64;
-		struct {
-			u32 tbu, tbl;
-		} tv32;
-	};
-};
-
 struct kvm_arch {
 };
 
 struct kvm_vcpu_arch {
+	/* Unmodified copy of the guest's TLB. */
+	struct tlbe guest_tlb[PPC44x_TLB_SIZE];
+	/* TLB that's actually used when the guest is running. */
+	struct tlbe shadow_tlb[PPC44x_TLB_SIZE];
+	/* Pages which are referenced in the shadow TLB. */
+	struct page *shadow_pages[PPC44x_TLB_SIZE];
+
+	/* Track which TLB entries we've modified in the current exit. */
+	u8 shadow_tlb_mod[PPC44x_TLB_SIZE];
+
 	u32 host_stack;
 	u32 host_pid;
 	u32 host_dbcr0;
@@ -125,32 +94,32 @@ struct kvm_vcpu_arch {
 	u32 host_msr;
 
 	u64 fpr[32];
-	ulong gpr[32];
+	u32 gpr[32];
 
-	ulong pc;
+	u32 pc;
 	u32 cr;
-	ulong ctr;
-	ulong lr;
-	ulong xer;
+	u32 ctr;
+	u32 lr;
+	u32 xer;
 
-	ulong msr;
+	u32 msr;
 	u32 mmucr;
-	ulong sprg0;
-	ulong sprg1;
-	ulong sprg2;
-	ulong sprg3;
-	ulong sprg4;
-	ulong sprg5;
-	ulong sprg6;
-	ulong sprg7;
-	ulong srr0;
-	ulong srr1;
-	ulong csrr0;
-	ulong csrr1;
-	ulong dsrr0;
-	ulong dsrr1;
-	ulong dear;
-	ulong esr;
+	u32 sprg0;
+	u32 sprg1;
+	u32 sprg2;
+	u32 sprg3;
+	u32 sprg4;
+	u32 sprg5;
+	u32 sprg6;
+	u32 sprg7;
+	u32 srr0;
+	u32 srr1;
+	u32 csrr0;
+	u32 csrr1;
+	u32 dsrr0;
+	u32 dsrr1;
+	u32 dear;
+	u32 esr;
 	u32 dec;
 	u32 decar;
 	u32 tbl;
@@ -158,7 +127,7 @@ struct kvm_vcpu_arch {
 	u32 tcr;
 	u32 tsr;
 	u32 ivor[16];
-	ulong ivpr;
+	u32 ivpr;
 	u32 pir;
 
 	u32 shadow_pid;
@@ -171,22 +140,9 @@ struct kvm_vcpu_arch {
 	u32 dbcr0;
 	u32 dbcr1;
 
-#ifdef CONFIG_KVM_EXIT_TIMING
-	struct kvmppc_exit_timing timing_exit;
-	struct kvmppc_exit_timing timing_last_enter;
-	u32 last_exit_type;
-	u32 timing_count_type[__NUMBER_OF_KVM_EXIT_TYPES];
-	u64 timing_sum_duration[__NUMBER_OF_KVM_EXIT_TYPES];
-	u64 timing_sum_quad_duration[__NUMBER_OF_KVM_EXIT_TYPES];
-	u64 timing_min_duration[__NUMBER_OF_KVM_EXIT_TYPES];
-	u64 timing_max_duration[__NUMBER_OF_KVM_EXIT_TYPES];
-	u64 timing_last_exit;
-	struct dentry *debugfs_exit_timing;
-#endif
-
 	u32 last_inst;
-	ulong fault_dear;
-	ulong fault_esr;
+	u32 fault_dear;
+	u32 fault_esr;
 	gpa_t paddr_accessed;
 
 	u8 io_gpr; /* GPR used as IO source/target */
diff --git a/trunk/arch/powerpc/include/asm/kvm_ppc.h b/trunk/arch/powerpc/include/asm/kvm_ppc.h
index 36d2a50a8487..bb62ad876de3 100644
--- a/trunk/arch/powerpc/include/asm/kvm_ppc.h
+++ b/trunk/arch/powerpc/include/asm/kvm_ppc.h
@@ -29,6 +29,11 @@
 #include <linux/kvm_types.h>
 #include <linux/kvm_host.h>
 
+struct kvm_tlb {
+	struct tlbe guest_tlb[PPC44x_TLB_SIZE];
+	struct tlbe shadow_tlb[PPC44x_TLB_SIZE];
+};
+
 enum emulation_result {
 	EMULATE_DONE,         /* no further processing */
 	EMULATE_DO_MMIO,      /* kvm_run filled with MMIO request */
@@ -36,6 +41,9 @@ enum emulation_result {
 	EMULATE_FAIL,         /* can't emulate this instruction */
 };
 
+extern const unsigned char exception_priority[];
+extern const unsigned char priority_exception[];
+
 extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
 extern char kvmppc_handlers_start[];
 extern unsigned long kvmppc_handler_len;
@@ -50,44 +58,51 @@ extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
 extern int kvmppc_emulate_instruction(struct kvm_run *run,
                                       struct kvm_vcpu *vcpu);
 extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu);
-extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
 
-extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr,
-                           u64 asid, u32 flags, u32 max_bytes,
-                           unsigned int gtlb_idx);
+extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn,
+                           u64 asid, u32 flags);
+extern void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr,
+                                  gva_t eend, u32 asid);
 extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode);
 extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid);
 
-/* Core-specific hooks */
-
-extern struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm,
-                                                unsigned int id);
-extern void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu);
-extern int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu);
-extern int kvmppc_core_check_processor_compat(void);
-extern int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
-                                      struct kvm_translation *tr);
-
-extern void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
-extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu);
-
-extern void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu);
-extern void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu);
-
-extern void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu);
-extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu);
-extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu);
-extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu);
-extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
-                                       struct kvm_interrupt *irq);
-
-extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
-                                  unsigned int op, int *advance);
-extern int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs);
-extern int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt);
-
-extern int kvmppc_booke_init(void);
-extern void kvmppc_booke_exit(void);
+/* XXX Book E specific */
+extern void kvmppc_tlbe_set_modified(struct kvm_vcpu *vcpu, unsigned int i);
+
+extern void kvmppc_check_and_deliver_interrupts(struct kvm_vcpu *vcpu);
+
+static inline void kvmppc_queue_exception(struct kvm_vcpu *vcpu, int exception)
+{
+	unsigned int priority = exception_priority[exception];
+	set_bit(priority, &vcpu->arch.pending_exceptions);
+}
+
+static inline void kvmppc_clear_exception(struct kvm_vcpu *vcpu, int exception)
+{
+	unsigned int priority = exception_priority[exception];
+	clear_bit(priority, &vcpu->arch.pending_exceptions);
+}
+
+/* Helper function for "full" MSR writes. No need to call this if only EE is
+ * changing. */
+static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
+{
+	if ((new_msr & MSR_PR) != (vcpu->arch.msr & MSR_PR))
+		kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR);
+
+	vcpu->arch.msr = new_msr;
+
+	if (vcpu->arch.msr & MSR_WE)
+		kvm_vcpu_block(vcpu);
+}
+
+static inline void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid)
+{
+	if (vcpu->arch.pid != new_pid) {
+		vcpu->arch.pid = new_pid;
+		vcpu->arch.swap_pid = 1;
+	}
+}
 
 extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu);
 
diff --git a/trunk/arch/powerpc/include/asm/mmu-44x.h b/trunk/arch/powerpc/include/asm/mmu-44x.h
index 27cc6fdcd3b7..8a97cfb08b7e 100644
--- a/trunk/arch/powerpc/include/asm/mmu-44x.h
+++ b/trunk/arch/powerpc/include/asm/mmu-44x.h
@@ -56,7 +56,6 @@
 #ifndef __ASSEMBLY__
 
 extern unsigned int tlb_44x_hwater;
-extern unsigned int tlb_44x_index;
 
 typedef struct {
 	unsigned int	id;
diff --git a/trunk/arch/powerpc/include/asm/topology.h b/trunk/arch/powerpc/include/asm/topology.h
index 373fca394a54..c32da6f97999 100644
--- a/trunk/arch/powerpc/include/asm/topology.h
+++ b/trunk/arch/powerpc/include/asm/topology.h
@@ -48,6 +48,7 @@ static inline int pcibus_to_node(struct pci_bus *bus)
 
 /* sched_domains SD_NODE_INIT for PPC64 machines */
 #define SD_NODE_INIT (struct sched_domain) {		\
+	.span			= CPU_MASK_NONE,	\
 	.parent			= NULL,			\
 	.child			= NULL,			\
 	.groups			= NULL,			\
diff --git a/trunk/arch/powerpc/kernel/asm-offsets.c b/trunk/arch/powerpc/kernel/asm-offsets.c
index 9937fe44555f..661d07d2146b 100644
--- a/trunk/arch/powerpc/kernel/asm-offsets.c
+++ b/trunk/arch/powerpc/kernel/asm-offsets.c
@@ -23,6 +23,9 @@
 #include <linux/mm.h>
 #include <linux/suspend.h>
 #include <linux/hrtimer.h>
+#ifdef CONFIG_KVM
+#include <linux/kvm_host.h>
+#endif
 #ifdef CONFIG_PPC64
 #include <linux/time.h>
 #include <linux/hardirq.h>
@@ -48,9 +51,6 @@
 #ifdef CONFIG_PPC_ISERIES
 #include <asm/iseries/alpaca.h>
 #endif
-#ifdef CONFIG_KVM
-#include <asm/kvm_44x.h>
-#endif
 
 #if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
 #include "head_booke.h"
@@ -357,10 +357,12 @@ int main(void)
 	DEFINE(PTE_SIZE, sizeof(pte_t));
 
 #ifdef CONFIG_KVM
-	DEFINE(TLBE_BYTES, sizeof(struct kvmppc_44x_tlbe));
+	DEFINE(TLBE_BYTES, sizeof(struct tlbe));
 
 	DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack));
 	DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid));
+	DEFINE(VCPU_SHADOW_TLB, offsetof(struct kvm_vcpu, arch.shadow_tlb));
+	DEFINE(VCPU_SHADOW_MOD, offsetof(struct kvm_vcpu, arch.shadow_tlb_mod));
 	DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
 	DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr));
 	DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));
@@ -383,16 +385,5 @@ int main(void)
 	DEFINE(PTE_T_LOG2, PTE_T_LOG2);
 #endif
 
-#ifdef CONFIG_KVM_EXIT_TIMING
-	DEFINE(VCPU_TIMING_EXIT_TBU, offsetof(struct kvm_vcpu,
-						arch.timing_exit.tv32.tbu));
-	DEFINE(VCPU_TIMING_EXIT_TBL, offsetof(struct kvm_vcpu,
-						arch.timing_exit.tv32.tbl));
-	DEFINE(VCPU_TIMING_LAST_ENTER_TBU, offsetof(struct kvm_vcpu,
-					arch.timing_last_enter.tv32.tbu));
-	DEFINE(VCPU_TIMING_LAST_ENTER_TBL, offsetof(struct kvm_vcpu,
-					arch.timing_last_enter.tv32.tbl));
-#endif
-
 	return 0;
 }
diff --git a/trunk/arch/powerpc/kernel/irq.c b/trunk/arch/powerpc/kernel/irq.c
index 23b8b5e36f98..ac222d0ab12e 100644
--- a/trunk/arch/powerpc/kernel/irq.c
+++ b/trunk/arch/powerpc/kernel/irq.c
@@ -237,7 +237,7 @@ void fixup_irqs(cpumask_t map)
 			mask = map;
 		}
 		if (irq_desc[irq].chip->set_affinity)
-			irq_desc[irq].chip->set_affinity(irq, &mask);
+			irq_desc[irq].chip->set_affinity(irq, mask);
 		else if (irq_desc[irq].action && !(warned++))
 			printk("Cannot set affinity for irq %i\n", irq);
 	}
diff --git a/trunk/arch/powerpc/kernel/smp.c b/trunk/arch/powerpc/kernel/smp.c
index 65484b2200b3..8ac3f721d235 100644
--- a/trunk/arch/powerpc/kernel/smp.c
+++ b/trunk/arch/powerpc/kernel/smp.c
@@ -59,9 +59,13 @@
 
 struct thread_info *secondary_ti;
 
+cpumask_t cpu_possible_map = CPU_MASK_NONE;
+cpumask_t cpu_online_map = CPU_MASK_NONE;
 DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE;
 DEFINE_PER_CPU(cpumask_t, cpu_core_map) = CPU_MASK_NONE;
 
+EXPORT_SYMBOL(cpu_online_map);
+EXPORT_SYMBOL(cpu_possible_map);
 EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
 EXPORT_PER_CPU_SYMBOL(cpu_core_map);
 
diff --git a/trunk/arch/powerpc/kernel/time.c b/trunk/arch/powerpc/kernel/time.c
index 99f1ddd68582..e1f3a5140429 100644
--- a/trunk/arch/powerpc/kernel/time.c
+++ b/trunk/arch/powerpc/kernel/time.c
@@ -844,7 +844,7 @@ static void register_decrementer_clockevent(int cpu)
 	struct clock_event_device *dec = &per_cpu(decrementers, cpu).event;
 
 	*dec = decrementer_clockevent;
-	dec->cpumask = cpumask_of(cpu);
+	dec->cpumask = cpumask_of_cpu(cpu);
 
 	printk(KERN_DEBUG "clockevent: %s mult[%lx] shift[%d] cpu[%d]\n",
 	       dec->name, dec->mult, dec->shift, cpu);
diff --git a/trunk/arch/powerpc/kvm/44x.c b/trunk/arch/powerpc/kvm/44x.c
deleted file mode 100644
index a66bec57265a..000000000000
--- a/trunk/arch/powerpc/kvm/44x.c
+++ /dev/null
@@ -1,228 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- *
- * Copyright IBM Corp. 2008
- *
- * Authors: Hollis Blanchard <hollisb@us.ibm.com>
- */
-
-#include <linux/kvm_host.h>
-#include <linux/err.h>
-
-#include <asm/reg.h>
-#include <asm/cputable.h>
-#include <asm/tlbflush.h>
-#include <asm/kvm_44x.h>
-#include <asm/kvm_ppc.h>
-
-#include "44x_tlb.h"
-
-/* Note: clearing MSR[DE] just means that the debug interrupt will not be
- * delivered *immediately*. Instead, it simply sets the appropriate DBSR bits.
- * If those DBSR bits are still set when MSR[DE] is re-enabled, the interrupt
- * will be delivered as an "imprecise debug event" (which is indicated by
- * DBSR[IDE].
- */
-static void kvm44x_disable_debug_interrupts(void)
-{
-	mtmsr(mfmsr() & ~MSR_DE);
-}
-
-void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu)
-{
-	kvm44x_disable_debug_interrupts();
-
-	mtspr(SPRN_IAC1, vcpu->arch.host_iac[0]);
-	mtspr(SPRN_IAC2, vcpu->arch.host_iac[1]);
-	mtspr(SPRN_IAC3, vcpu->arch.host_iac[2]);
-	mtspr(SPRN_IAC4, vcpu->arch.host_iac[3]);
-	mtspr(SPRN_DBCR1, vcpu->arch.host_dbcr1);
-	mtspr(SPRN_DBCR2, vcpu->arch.host_dbcr2);
-	mtspr(SPRN_DBCR0, vcpu->arch.host_dbcr0);
-	mtmsr(vcpu->arch.host_msr);
-}
-
-void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
-{
-	struct kvm_guest_debug *dbg = &vcpu->guest_debug;
-	u32 dbcr0 = 0;
-
-	vcpu->arch.host_msr = mfmsr();
-	kvm44x_disable_debug_interrupts();
-
-	/* Save host debug register state. */
-	vcpu->arch.host_iac[0] = mfspr(SPRN_IAC1);
-	vcpu->arch.host_iac[1] = mfspr(SPRN_IAC2);
-	vcpu->arch.host_iac[2] = mfspr(SPRN_IAC3);
-	vcpu->arch.host_iac[3] = mfspr(SPRN_IAC4);
-	vcpu->arch.host_dbcr0 = mfspr(SPRN_DBCR0);
-	vcpu->arch.host_dbcr1 = mfspr(SPRN_DBCR1);
-	vcpu->arch.host_dbcr2 = mfspr(SPRN_DBCR2);
-
-	/* set registers up for guest */
-
-	if (dbg->bp[0]) {
-		mtspr(SPRN_IAC1, dbg->bp[0]);
-		dbcr0 |= DBCR0_IAC1 | DBCR0_IDM;
-	}
-	if (dbg->bp[1]) {
-		mtspr(SPRN_IAC2, dbg->bp[1]);
-		dbcr0 |= DBCR0_IAC2 | DBCR0_IDM;
-	}
-	if (dbg->bp[2]) {
-		mtspr(SPRN_IAC3, dbg->bp[2]);
-		dbcr0 |= DBCR0_IAC3 | DBCR0_IDM;
-	}
-	if (dbg->bp[3]) {
-		mtspr(SPRN_IAC4, dbg->bp[3]);
-		dbcr0 |= DBCR0_IAC4 | DBCR0_IDM;
-	}
-
-	mtspr(SPRN_DBCR0, dbcr0);
-	mtspr(SPRN_DBCR1, 0);
-	mtspr(SPRN_DBCR2, 0);
-}
-
-void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
-{
-	kvmppc_44x_tlb_load(vcpu);
-}
-
-void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
-{
-	kvmppc_44x_tlb_put(vcpu);
-}
-
-int kvmppc_core_check_processor_compat(void)
-{
-	int r;
-
-	if (strcmp(cur_cpu_spec->platform, "ppc440") == 0)
-		r = 0;
-	else
-		r = -ENOTSUPP;
-
-	return r;
-}
-
-int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
-{
-	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
-	struct kvmppc_44x_tlbe *tlbe = &vcpu_44x->guest_tlb[0];
-	int i;
-
-	tlbe->tid = 0;
-	tlbe->word0 = PPC44x_TLB_16M | PPC44x_TLB_VALID;
-	tlbe->word1 = 0;
-	tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR;
-
-	tlbe++;
-	tlbe->tid = 0;
-	tlbe->word0 = 0xef600000 | PPC44x_TLB_4K | PPC44x_TLB_VALID;
-	tlbe->word1 = 0xef600000;
-	tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR
-	              | PPC44x_TLB_I | PPC44x_TLB_G;
-
-	/* Since the guest can directly access the timebase, it must know the
-	 * real timebase frequency. Accordingly, it must see the state of
-	 * CCR1[TCS]. */
-	vcpu->arch.ccr1 = mfspr(SPRN_CCR1);
-
-	for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++)
-		vcpu_44x->shadow_refs[i].gtlb_index = -1;
-
-	return 0;
-}
-
-/* 'linear_address' is actually an encoding of AS|PID|EADDR . */
-int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
-                               struct kvm_translation *tr)
-{
-	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
-	struct kvmppc_44x_tlbe *gtlbe;
-	int index;
-	gva_t eaddr;
-	u8 pid;
-	u8 as;
-
-	eaddr = tr->linear_address;
-	pid = (tr->linear_address >> 32) & 0xff;
-	as = (tr->linear_address >> 40) & 0x1;
-
-	index = kvmppc_44x_tlb_index(vcpu, eaddr, pid, as);
-	if (index == -1) {
-		tr->valid = 0;
-		return 0;
-	}
-
-	gtlbe = &vcpu_44x->guest_tlb[index];
-
-	tr->physical_address = tlb_xlate(gtlbe, eaddr);
-	/* XXX what does "writeable" and "usermode" even mean? */
-	tr->valid = 1;
-
-	return 0;
-}
-
-struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
-{
-	struct kvmppc_vcpu_44x *vcpu_44x;
-	struct kvm_vcpu *vcpu;
-	int err;
-
-	vcpu_44x = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
-	if (!vcpu_44x) {
-		err = -ENOMEM;
-		goto out;
-	}
-
-	vcpu = &vcpu_44x->vcpu;
-	err = kvm_vcpu_init(vcpu, kvm, id);
-	if (err)
-		goto free_vcpu;
-
-	return vcpu;
-
-free_vcpu:
-	kmem_cache_free(kvm_vcpu_cache, vcpu_44x);
-out:
-	return ERR_PTR(err);
-}
-
-void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
-{
-	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
-
-	kvm_vcpu_uninit(vcpu);
-	kmem_cache_free(kvm_vcpu_cache, vcpu_44x);
-}
-
-static int kvmppc_44x_init(void)
-{
-	int r;
-
-	r = kvmppc_booke_init();
-	if (r)
-		return r;
-
-	return kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), THIS_MODULE);
-}
-
-static void kvmppc_44x_exit(void)
-{
-	kvmppc_booke_exit();
-}
-
-module_init(kvmppc_44x_init);
-module_exit(kvmppc_44x_exit);
diff --git a/trunk/arch/powerpc/kvm/44x_emulate.c b/trunk/arch/powerpc/kvm/44x_emulate.c
deleted file mode 100644
index 82489a743a6f..000000000000
--- a/trunk/arch/powerpc/kvm/44x_emulate.c
+++ /dev/null
@@ -1,371 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- *
- * Copyright IBM Corp. 2008
- *
- * Authors: Hollis Blanchard <hollisb@us.ibm.com>
- */
-
-#include <asm/kvm_ppc.h>
-#include <asm/dcr.h>
-#include <asm/dcr-regs.h>
-#include <asm/disassemble.h>
-#include <asm/kvm_44x.h>
-#include "timing.h"
-
-#include "booke.h"
-#include "44x_tlb.h"
-
-#define OP_RFI      19
-
-#define XOP_RFI     50
-#define XOP_MFMSR   83
-#define XOP_WRTEE   131
-#define XOP_MTMSR   146
-#define XOP_WRTEEI  163
-#define XOP_MFDCR   323
-#define XOP_MTDCR   451
-#define XOP_TLBSX   914
-#define XOP_ICCCI   966
-#define XOP_TLBWE   978
-
-static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
-{
-	vcpu->arch.pc = vcpu->arch.srr0;
-	kvmppc_set_msr(vcpu, vcpu->arch.srr1);
-}
-
-int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
-                           unsigned int inst, int *advance)
-{
-	int emulated = EMULATE_DONE;
-	int dcrn;
-	int ra;
-	int rb;
-	int rc;
-	int rs;
-	int rt;
-	int ws;
-
-	switch (get_op(inst)) {
-	case OP_RFI:
-		switch (get_xop(inst)) {
-		case XOP_RFI:
-			kvmppc_emul_rfi(vcpu);
-			kvmppc_set_exit_type(vcpu, EMULATED_RFI_EXITS);
-			*advance = 0;
-			break;
-
-		default:
-			emulated = EMULATE_FAIL;
-			break;
-		}
-		break;
-
-	case 31:
-		switch (get_xop(inst)) {
-
-		case XOP_MFMSR:
-			rt = get_rt(inst);
-			vcpu->arch.gpr[rt] = vcpu->arch.msr;
-			kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS);
-			break;
-
-		case XOP_MTMSR:
-			rs = get_rs(inst);
-			kvmppc_set_exit_type(vcpu, EMULATED_MTMSR_EXITS);
-			kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]);
-			break;
-
-		case XOP_WRTEE:
-			rs = get_rs(inst);
-			vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
-							 | (vcpu->arch.gpr[rs] & MSR_EE);
-			kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
-			break;
-
-		case XOP_WRTEEI:
-			vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
-							 | (inst & MSR_EE);
-			kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
-			break;
-
-		case XOP_MFDCR:
-			dcrn = get_dcrn(inst);
-			rt = get_rt(inst);
-
-			/* The guest may access CPR0 registers to determine the timebase
-			 * frequency, and it must know the real host frequency because it
-			 * can directly access the timebase registers.
-			 *
-			 * It would be possible to emulate those accesses in userspace,
-			 * but userspace can really only figure out the end frequency.
-			 * We could decompose that into the factors that compute it, but
-			 * that's tricky math, and it's easier to just report the real
-			 * CPR0 values.
-			 */
-			switch (dcrn) {
-			case DCRN_CPR0_CONFIG_ADDR:
-				vcpu->arch.gpr[rt] = vcpu->arch.cpr0_cfgaddr;
-				break;
-			case DCRN_CPR0_CONFIG_DATA:
-				local_irq_disable();
-				mtdcr(DCRN_CPR0_CONFIG_ADDR,
-					  vcpu->arch.cpr0_cfgaddr);
-				vcpu->arch.gpr[rt] = mfdcr(DCRN_CPR0_CONFIG_DATA);
-				local_irq_enable();
-				break;
-			default:
-				run->dcr.dcrn = dcrn;
-				run->dcr.data =  0;
-				run->dcr.is_write = 0;
-				vcpu->arch.io_gpr = rt;
-				vcpu->arch.dcr_needed = 1;
-				kvmppc_account_exit(vcpu, DCR_EXITS);
-				emulated = EMULATE_DO_DCR;
-			}
-
-			break;
-
-		case XOP_MTDCR:
-			dcrn = get_dcrn(inst);
-			rs = get_rs(inst);
-
-			/* emulate some access in kernel */
-			switch (dcrn) {
-			case DCRN_CPR0_CONFIG_ADDR:
-				vcpu->arch.cpr0_cfgaddr = vcpu->arch.gpr[rs];
-				break;
-			default:
-				run->dcr.dcrn = dcrn;
-				run->dcr.data = vcpu->arch.gpr[rs];
-				run->dcr.is_write = 1;
-				vcpu->arch.dcr_needed = 1;
-				kvmppc_account_exit(vcpu, DCR_EXITS);
-				emulated = EMULATE_DO_DCR;
-			}
-
-			break;
-
-		case XOP_TLBWE:
-			ra = get_ra(inst);
-			rs = get_rs(inst);
-			ws = get_ws(inst);
-			emulated = kvmppc_44x_emul_tlbwe(vcpu, ra, rs, ws);
-			break;
-
-		case XOP_TLBSX:
-			rt = get_rt(inst);
-			ra = get_ra(inst);
-			rb = get_rb(inst);
-			rc = get_rc(inst);
-			emulated = kvmppc_44x_emul_tlbsx(vcpu, rt, ra, rb, rc);
-			break;
-
-		case XOP_ICCCI:
-			break;
-
-		default:
-			emulated = EMULATE_FAIL;
-		}
-
-		break;
-
-	default:
-		emulated = EMULATE_FAIL;
-	}
-
-	return emulated;
-}
-
-int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
-{
-	switch (sprn) {
-	case SPRN_MMUCR:
-		vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break;
-	case SPRN_PID:
-		kvmppc_set_pid(vcpu, vcpu->arch.gpr[rs]); break;
-	case SPRN_CCR0:
-		vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break;
-	case SPRN_CCR1:
-		vcpu->arch.ccr1 = vcpu->arch.gpr[rs]; break;
-	case SPRN_DEAR:
-		vcpu->arch.dear = vcpu->arch.gpr[rs]; break;
-	case SPRN_ESR:
-		vcpu->arch.esr = vcpu->arch.gpr[rs]; break;
-	case SPRN_DBCR0:
-		vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break;
-	case SPRN_DBCR1:
-		vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break;
-	case SPRN_TSR:
-		vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break;
-	case SPRN_TCR:
-		vcpu->arch.tcr = vcpu->arch.gpr[rs];
-		kvmppc_emulate_dec(vcpu);
-		break;
-
-	/* Note: SPRG4-7 are user-readable. These values are
-	 * loaded into the real SPRGs when resuming the
-	 * guest. */
-	case SPRN_SPRG4:
-		vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break;
-	case SPRN_SPRG5:
-		vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break;
-	case SPRN_SPRG6:
-		vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break;
-	case SPRN_SPRG7:
-		vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break;
-
-	case SPRN_IVPR:
-		vcpu->arch.ivpr = vcpu->arch.gpr[rs];
-		break;
-	case SPRN_IVOR0:
-		vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = vcpu->arch.gpr[rs];
-		break;
-	case SPRN_IVOR1:
-		vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = vcpu->arch.gpr[rs];
-		break;
-	case SPRN_IVOR2:
-		vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = vcpu->arch.gpr[rs];
-		break;
-	case SPRN_IVOR3:
-		vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = vcpu->arch.gpr[rs];
-		break;
-	case SPRN_IVOR4:
-		vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = vcpu->arch.gpr[rs];
-		break;
-	case SPRN_IVOR5:
-		vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = vcpu->arch.gpr[rs];
-		break;
-	case SPRN_IVOR6:
-		vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = vcpu->arch.gpr[rs];
-		break;
-	case SPRN_IVOR7:
-		vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = vcpu->arch.gpr[rs];
-		break;
-	case SPRN_IVOR8:
-		vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = vcpu->arch.gpr[rs];
-		break;
-	case SPRN_IVOR9:
-		vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = vcpu->arch.gpr[rs];
-		break;
-	case SPRN_IVOR10:
-		vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = vcpu->arch.gpr[rs];
-		break;
-	case SPRN_IVOR11:
-		vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = vcpu->arch.gpr[rs];
-		break;
-	case SPRN_IVOR12:
-		vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = vcpu->arch.gpr[rs];
-		break;
-	case SPRN_IVOR13:
-		vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = vcpu->arch.gpr[rs];
-		break;
-	case SPRN_IVOR14:
-		vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = vcpu->arch.gpr[rs];
-		break;
-	case SPRN_IVOR15:
-		vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = vcpu->arch.gpr[rs];
-		break;
-
-	default:
-		return EMULATE_FAIL;
-	}
-
-	kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS);
-	return EMULATE_DONE;
-}
-
-int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
-{
-	switch (sprn) {
-	/* 440 */
-	case SPRN_MMUCR:
-		vcpu->arch.gpr[rt] = vcpu->arch.mmucr; break;
-	case SPRN_CCR0:
-		vcpu->arch.gpr[rt] = vcpu->arch.ccr0; break;
-	case SPRN_CCR1:
-		vcpu->arch.gpr[rt] = vcpu->arch.ccr1; break;
-
-	/* Book E */
-	case SPRN_PID:
-		vcpu->arch.gpr[rt] = vcpu->arch.pid; break;
-	case SPRN_IVPR:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break;
-	case SPRN_DEAR:
-		vcpu->arch.gpr[rt] = vcpu->arch.dear; break;
-	case SPRN_ESR:
-		vcpu->arch.gpr[rt] = vcpu->arch.esr; break;
-	case SPRN_DBCR0:
-		vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break;
-	case SPRN_DBCR1:
-		vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break;
-
-	case SPRN_IVOR0:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL];
-		break;
-	case SPRN_IVOR1:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK];
-		break;
-	case SPRN_IVOR2:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE];
-		break;
-	case SPRN_IVOR3:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE];
-		break;
-	case SPRN_IVOR4:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL];
-		break;
-	case SPRN_IVOR5:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT];
-		break;
-	case SPRN_IVOR6:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM];
-		break;
-	case SPRN_IVOR7:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL];
-		break;
-	case SPRN_IVOR8:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL];
-		break;
-	case SPRN_IVOR9:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL];
-		break;
-	case SPRN_IVOR10:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER];
-		break;
-	case SPRN_IVOR11:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT];
-		break;
-	case SPRN_IVOR12:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG];
-		break;
-	case SPRN_IVOR13:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS];
-		break;
-	case SPRN_IVOR14:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS];
-		break;
-	case SPRN_IVOR15:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG];
-		break;
-
-	default:
-		return EMULATE_FAIL;
-	}
-
-	kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS);
-	return EMULATE_DONE;
-}
-
diff --git a/trunk/arch/powerpc/kvm/44x_tlb.c b/trunk/arch/powerpc/kvm/44x_tlb.c
index 9a34b8edb9e2..ad72c6f9811f 100644
--- a/trunk/arch/powerpc/kvm/44x_tlb.c
+++ b/trunk/arch/powerpc/kvm/44x_tlb.c
@@ -22,103 +22,20 @@
 #include <linux/kvm.h>
 #include <linux/kvm_host.h>
 #include <linux/highmem.h>
-
-#include <asm/tlbflush.h>
 #include <asm/mmu-44x.h>
 #include <asm/kvm_ppc.h>
-#include <asm/kvm_44x.h>
-#include "timing.h"
 
 #include "44x_tlb.h"
 
-#ifndef PPC44x_TLBE_SIZE
-#define PPC44x_TLBE_SIZE	PPC44x_TLB_4K
-#endif
-
-#define PAGE_SIZE_4K (1<<12)
-#define PAGE_MASK_4K (~(PAGE_SIZE_4K - 1))
-
-#define PPC44x_TLB_UATTR_MASK \
-	(PPC44x_TLB_U0|PPC44x_TLB_U1|PPC44x_TLB_U2|PPC44x_TLB_U3)
 #define PPC44x_TLB_USER_PERM_MASK (PPC44x_TLB_UX|PPC44x_TLB_UR|PPC44x_TLB_UW)
 #define PPC44x_TLB_SUPER_PERM_MASK (PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW)
 
-#ifdef DEBUG
-void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
-{
-	struct kvmppc_44x_tlbe *tlbe;
-	int i;
-
-	printk("vcpu %d TLB dump:\n", vcpu->vcpu_id);
-	printk("| %2s | %3s | %8s | %8s | %8s |\n",
-			"nr", "tid", "word0", "word1", "word2");
-
-	for (i = 0; i < ARRAY_SIZE(vcpu_44x->guest_tlb); i++) {
-		tlbe = &vcpu_44x->guest_tlb[i];
-		if (tlbe->word0 & PPC44x_TLB_VALID)
-			printk(" G%2d |  %02X | %08X | %08X | %08X |\n",
-			       i, tlbe->tid, tlbe->word0, tlbe->word1,
-			       tlbe->word2);
-	}
-}
-#endif
-
-static inline void kvmppc_44x_tlbie(unsigned int index)
-{
-	/* 0 <= index < 64, so the V bit is clear and we can use the index as
-	 * word0. */
-	asm volatile(
-		"tlbwe %[index], %[index], 0\n"
-	:
-	: [index] "r"(index)
-	);
-}
-
-static inline void kvmppc_44x_tlbre(unsigned int index,
-                                    struct kvmppc_44x_tlbe *tlbe)
-{
-	asm volatile(
-		"tlbre %[word0], %[index], 0\n"
-		"mfspr %[tid], %[sprn_mmucr]\n"
-		"andi. %[tid], %[tid], 0xff\n"
-		"tlbre %[word1], %[index], 1\n"
-		"tlbre %[word2], %[index], 2\n"
-		: [word0] "=r"(tlbe->word0),
-		  [word1] "=r"(tlbe->word1),
-		  [word2] "=r"(tlbe->word2),
-		  [tid]   "=r"(tlbe->tid)
-		: [index] "r"(index),
-		  [sprn_mmucr] "i"(SPRN_MMUCR)
-		: "cc"
-	);
-}
-
-static inline void kvmppc_44x_tlbwe(unsigned int index,
-                                    struct kvmppc_44x_tlbe *stlbe)
-{
-	unsigned long tmp;
-
-	asm volatile(
-		"mfspr %[tmp], %[sprn_mmucr]\n"
-		"rlwimi %[tmp], %[tid], 0, 0xff\n"
-		"mtspr %[sprn_mmucr], %[tmp]\n"
-		"tlbwe %[word0], %[index], 0\n"
-		"tlbwe %[word1], %[index], 1\n"
-		"tlbwe %[word2], %[index], 2\n"
-		: [tmp]   "=&r"(tmp)
-		: [word0] "r"(stlbe->word0),
-		  [word1] "r"(stlbe->word1),
-		  [word2] "r"(stlbe->word2),
-		  [tid]   "r"(stlbe->tid),
-		  [index] "r"(index),
-		  [sprn_mmucr] "i"(SPRN_MMUCR)
-	);
-}
+static unsigned int kvmppc_tlb_44x_pos;
 
 static u32 kvmppc_44x_tlb_shadow_attrib(u32 attrib, int usermode)
 {
-	/* We only care about the guest's permission and user bits. */
-	attrib &= PPC44x_TLB_PERM_MASK|PPC44x_TLB_UATTR_MASK;
+	/* Mask off reserved bits. */
+	attrib &= PPC44x_TLB_PERM_MASK|PPC44x_TLB_ATTR_MASK;
 
 	if (!usermode) {
 		/* Guest is in supervisor mode, so we need to translate guest
@@ -130,60 +47,18 @@ static u32 kvmppc_44x_tlb_shadow_attrib(u32 attrib, int usermode)
 	/* Make sure host can always access this memory. */
 	attrib |= PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW;
 
-	/* WIMGE = 0b00100 */
-	attrib |= PPC44x_TLB_M;
-
 	return attrib;
 }
 
-/* Load shadow TLB back into hardware. */
-void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu)
-{
-	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
-	int i;
-
-	for (i = 0; i <= tlb_44x_hwater; i++) {
-		struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[i];
-
-		if (get_tlb_v(stlbe) && get_tlb_ts(stlbe))
-			kvmppc_44x_tlbwe(i, stlbe);
-	}
-}
-
-static void kvmppc_44x_tlbe_set_modified(struct kvmppc_vcpu_44x *vcpu_44x,
-                                         unsigned int i)
-{
-	vcpu_44x->shadow_tlb_mod[i] = 1;
-}
-
-/* Save hardware TLB to the vcpu, and invalidate all guest mappings. */
-void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu)
-{
-	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
-	int i;
-
-	for (i = 0; i <= tlb_44x_hwater; i++) {
-		struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[i];
-
-		if (vcpu_44x->shadow_tlb_mod[i])
-			kvmppc_44x_tlbre(i, stlbe);
-
-		if (get_tlb_v(stlbe) && get_tlb_ts(stlbe))
-			kvmppc_44x_tlbie(i);
-	}
-}
-
-
 /* Search the guest TLB for a matching entry. */
 int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid,
                          unsigned int as)
 {
-	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
 	int i;
 
 	/* XXX Replace loop with fancy data structures. */
-	for (i = 0; i < ARRAY_SIZE(vcpu_44x->guest_tlb); i++) {
-		struct kvmppc_44x_tlbe *tlbe = &vcpu_44x->guest_tlb[i];
+	for (i = 0; i < PPC44x_TLB_SIZE; i++) {
+		struct tlbe *tlbe = &vcpu->arch.guest_tlb[i];
 		unsigned int tid;
 
 		if (eaddr < get_tlb_eaddr(tlbe))
@@ -208,89 +83,78 @@ int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid,
 	return -1;
 }
 
-int kvmppc_44x_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
+struct tlbe *kvmppc_44x_itlb_search(struct kvm_vcpu *vcpu, gva_t eaddr)
 {
 	unsigned int as = !!(vcpu->arch.msr & MSR_IS);
+	unsigned int index;
 
-	return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
+	index = kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
+	if (index == -1)
+		return NULL;
+	return &vcpu->arch.guest_tlb[index];
 }
 
-int kvmppc_44x_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
+struct tlbe *kvmppc_44x_dtlb_search(struct kvm_vcpu *vcpu, gva_t eaddr)
 {
 	unsigned int as = !!(vcpu->arch.msr & MSR_DS);
+	unsigned int index;
 
-	return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
+	index = kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
+	if (index == -1)
+		return NULL;
+	return &vcpu->arch.guest_tlb[index];
 }
 
-static void kvmppc_44x_shadow_release(struct kvmppc_vcpu_44x *vcpu_44x,
-                                      unsigned int stlb_index)
+static int kvmppc_44x_tlbe_is_writable(struct tlbe *tlbe)
 {
-	struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[stlb_index];
-
-	if (!ref->page)
-		return;
-
-	/* Discard from the TLB. */
-	/* Note: we could actually invalidate a host mapping, if the host overwrote
-	 * this TLB entry since we inserted a guest mapping. */
-	kvmppc_44x_tlbie(stlb_index);
-
-	/* Now release the page. */
-	if (ref->writeable)
-		kvm_release_page_dirty(ref->page);
-	else
-		kvm_release_page_clean(ref->page);
-
-	ref->page = NULL;
+	return tlbe->word2 & (PPC44x_TLB_SW|PPC44x_TLB_UW);
+}
 
-	/* XXX set tlb_44x_index to stlb_index? */
+static void kvmppc_44x_shadow_release(struct kvm_vcpu *vcpu,
+                                      unsigned int index)
+{
+	struct tlbe *stlbe = &vcpu->arch.shadow_tlb[index];
+	struct page *page = vcpu->arch.shadow_pages[index];
 
-	KVMTRACE_1D(STLB_INVAL, &vcpu_44x->vcpu, stlb_index, handler);
+	if (get_tlb_v(stlbe)) {
+		if (kvmppc_44x_tlbe_is_writable(stlbe))
+			kvm_release_page_dirty(page);
+		else
+			kvm_release_page_clean(page);
+	}
 }
 
 void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu)
 {
-	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
 	int i;
 
 	for (i = 0; i <= tlb_44x_hwater; i++)
-		kvmppc_44x_shadow_release(vcpu_44x, i);
+		kvmppc_44x_shadow_release(vcpu, i);
 }
 
-/**
- * kvmppc_mmu_map -- create a host mapping for guest memory
- *
- * If the guest wanted a larger page than the host supports, only the first
- * host page is mapped here and the rest are demand faulted.
- *
- * If the guest wanted a smaller page than the host page size, we map only the
- * guest-size page (i.e. not a full host page mapping).
- *
- * Caller must ensure that the specified guest TLB entry is safe to insert into
- * the shadow TLB.
- */
-void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, u64 asid,
-                    u32 flags, u32 max_bytes, unsigned int gtlb_index)
+void kvmppc_tlbe_set_modified(struct kvm_vcpu *vcpu, unsigned int i)
+{
+    vcpu->arch.shadow_tlb_mod[i] = 1;
+}
+
+/* Caller must ensure that the specified guest TLB entry is safe to insert into
+ * the shadow TLB. */
+void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid,
+                    u32 flags)
 {
-	struct kvmppc_44x_tlbe stlbe;
-	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
-	struct kvmppc_44x_shadow_ref *ref;
 	struct page *new_page;
+	struct tlbe *stlbe;
 	hpa_t hpaddr;
-	gfn_t gfn;
 	unsigned int victim;
 
-	/* Select TLB entry to clobber. Indirectly guard against races with the TLB
-	 * miss handler by disabling interrupts. */
-	local_irq_disable();
-	victim = ++tlb_44x_index;
-	if (victim > tlb_44x_hwater)
-		victim = 0;
-	tlb_44x_index = victim;
-	local_irq_enable();
+	/* Future optimization: don't overwrite the TLB entry containing the
+	 * current PC (or stack?). */
+	victim = kvmppc_tlb_44x_pos++;
+	if (kvmppc_tlb_44x_pos > tlb_44x_hwater)
+		kvmppc_tlb_44x_pos = 0;
+	stlbe = &vcpu->arch.shadow_tlb[victim];
 
 	/* Get reference to new page. */
-	gfn = gpaddr >> PAGE_SHIFT;
 	new_page = gfn_to_page(vcpu->kvm, gfn);
 	if (is_error_page(new_page)) {
 		printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", gfn);
@@ -299,8 +163,10 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, u64 asid,
 	}
 	hpaddr = page_to_phys(new_page);
 
-	/* Invalidate any previous shadow mappings. */
-	kvmppc_44x_shadow_release(vcpu_44x, victim);
+	/* Drop reference to old page. */
+	kvmppc_44x_shadow_release(vcpu, victim);
+
+	vcpu->arch.shadow_pages[victim] = new_page;
 
 	/* XXX Make sure (va, size) doesn't overlap any other
 	 * entries. 440x6 user manual says the result would be
@@ -308,193 +174,78 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, u64 asid,
 
 	/* XXX what about AS? */
 
-	/* Force TS=1 for all guest mappings. */
-	stlbe.word0 = PPC44x_TLB_VALID | PPC44x_TLB_TS;
-
-	if (max_bytes >= PAGE_SIZE) {
-		/* Guest mapping is larger than or equal to host page size. We can use
-		 * a "native" host mapping. */
-		stlbe.word0 |= (gvaddr & PAGE_MASK) | PPC44x_TLBE_SIZE;
-	} else {
-		/* Guest mapping is smaller than host page size. We must restrict the
-		 * size of the mapping to be at most the smaller of the two, but for
-		 * simplicity we fall back to a 4K mapping (this is probably what the
-		 * guest is using anyways). */
-		stlbe.word0 |= (gvaddr & PAGE_MASK_4K) | PPC44x_TLB_4K;
-
-		/* 'hpaddr' is a host page, which is larger than the mapping we're
-		 * inserting here. To compensate, we must add the in-page offset to the
-		 * sub-page. */
-		hpaddr |= gpaddr & (PAGE_MASK ^ PAGE_MASK_4K);
-	}
+	stlbe->tid = !(asid & 0xff);
 
-	stlbe.word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf);
-	stlbe.word2 = kvmppc_44x_tlb_shadow_attrib(flags,
+	/* Force TS=1 for all guest mappings. */
+	/* For now we hardcode 4KB mappings, but it will be important to
+	 * use host large pages in the future. */
+	stlbe->word0 = (gvaddr & PAGE_MASK) | PPC44x_TLB_VALID | PPC44x_TLB_TS
+	               | PPC44x_TLB_4K;
+	stlbe->word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf);
+	stlbe->word2 = kvmppc_44x_tlb_shadow_attrib(flags,
 	                                            vcpu->arch.msr & MSR_PR);
-	stlbe.tid = !(asid & 0xff);
-
-	/* Keep track of the reference so we can properly release it later. */
-	ref = &vcpu_44x->shadow_refs[victim];
-	ref->page = new_page;
-	ref->gtlb_index = gtlb_index;
-	ref->writeable = !!(stlbe.word2 & PPC44x_TLB_UW);
-	ref->tid = stlbe.tid;
-
-	/* Insert shadow mapping into hardware TLB. */
-	kvmppc_44x_tlbe_set_modified(vcpu_44x, victim);
-	kvmppc_44x_tlbwe(victim, &stlbe);
-	KVMTRACE_5D(STLB_WRITE, vcpu, victim, stlbe.tid, stlbe.word0, stlbe.word1,
-	            stlbe.word2, handler);
-}
+	kvmppc_tlbe_set_modified(vcpu, victim);
 
-/* For a particular guest TLB entry, invalidate the corresponding host TLB
- * mappings and release the host pages. */
-static void kvmppc_44x_invalidate(struct kvm_vcpu *vcpu,
-                                  unsigned int gtlb_index)
-{
-	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) {
-		struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[i];
-		if (ref->gtlb_index == gtlb_index)
-			kvmppc_44x_shadow_release(vcpu_44x, i);
-	}
+	KVMTRACE_5D(STLB_WRITE, vcpu, victim,
+			stlbe->tid, stlbe->word0, stlbe->word1, stlbe->word2,
+			handler);
 }
 
-void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode)
+void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr,
+                           gva_t eend, u32 asid)
 {
-	vcpu->arch.shadow_pid = !usermode;
-}
-
-void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid)
-{
-	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+	unsigned int pid = !(asid & 0xff);
 	int i;
 
-	if (unlikely(vcpu->arch.pid == new_pid))
-		return;
-
-	vcpu->arch.pid = new_pid;
-
-	/* Guest userspace runs with TID=0 mappings and PID=0, to make sure it
-	 * can't access guest kernel mappings (TID=1). When we switch to a new
-	 * guest PID, which will also use host PID=0, we must discard the old guest
-	 * userspace mappings. */
-	for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) {
-		struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[i];
-
-		if (ref->tid == 0)
-			kvmppc_44x_shadow_release(vcpu_44x, i);
-	}
-}
-
-static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
-                             const struct kvmppc_44x_tlbe *tlbe)
-{
-	gpa_t gpa;
-
-	if (!get_tlb_v(tlbe))
-		return 0;
-
-	/* Does it match current guest AS? */
-	/* XXX what about IS != DS? */
-	if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS))
-		return 0;
-
-	gpa = get_tlb_raddr(tlbe);
-	if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT))
-		/* Mapping is not for RAM. */
-		return 0;
-
-	return 1;
-}
-
-int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
-{
-	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
-	struct kvmppc_44x_tlbe *tlbe;
-	unsigned int gtlb_index;
-
-	gtlb_index = vcpu->arch.gpr[ra];
-	if (gtlb_index > KVM44x_GUEST_TLB_SIZE) {
-		printk("%s: index %d\n", __func__, gtlb_index);
-		kvmppc_dump_vcpu(vcpu);
-		return EMULATE_FAIL;
-	}
-
-	tlbe = &vcpu_44x->guest_tlb[gtlb_index];
-
-	/* Invalidate shadow mappings for the about-to-be-clobbered TLB entry. */
-	if (tlbe->word0 & PPC44x_TLB_VALID)
-		kvmppc_44x_invalidate(vcpu, gtlb_index);
-
-	switch (ws) {
-	case PPC44x_TLB_PAGEID:
-		tlbe->tid = get_mmucr_stid(vcpu);
-		tlbe->word0 = vcpu->arch.gpr[rs];
-		break;
-
-	case PPC44x_TLB_XLAT:
-		tlbe->word1 = vcpu->arch.gpr[rs];
-		break;
-
-	case PPC44x_TLB_ATTRIB:
-		tlbe->word2 = vcpu->arch.gpr[rs];
-		break;
-
-	default:
-		return EMULATE_FAIL;
-	}
+	/* XXX Replace loop with fancy data structures. */
+	for (i = 0; i <= tlb_44x_hwater; i++) {
+		struct tlbe *stlbe = &vcpu->arch.shadow_tlb[i];
+		unsigned int tid;
 
-	if (tlbe_is_host_safe(vcpu, tlbe)) {
-		u64 asid;
-		gva_t eaddr;
-		gpa_t gpaddr;
-		u32 flags;
-		u32 bytes;
+		if (!get_tlb_v(stlbe))
+			continue;
 
-		eaddr = get_tlb_eaddr(tlbe);
-		gpaddr = get_tlb_raddr(tlbe);
+		if (eend < get_tlb_eaddr(stlbe))
+			continue;
 
-		/* Use the advertised page size to mask effective and real addrs. */
-		bytes = get_tlb_bytes(tlbe);
-		eaddr &= ~(bytes - 1);
-		gpaddr &= ~(bytes - 1);
+		if (eaddr > get_tlb_end(stlbe))
+			continue;
 
-		asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid;
-		flags = tlbe->word2 & 0xffff;
+		tid = get_tlb_tid(stlbe);
+		if (tid && (tid != pid))
+			continue;
 
-		kvmppc_mmu_map(vcpu, eaddr, gpaddr, asid, flags, bytes, gtlb_index);
+		kvmppc_44x_shadow_release(vcpu, i);
+		stlbe->word0 = 0;
+		kvmppc_tlbe_set_modified(vcpu, i);
+		KVMTRACE_5D(STLB_INVAL, vcpu, i,
+				stlbe->tid, stlbe->word0, stlbe->word1,
+				stlbe->word2, handler);
 	}
-
-	KVMTRACE_5D(GTLB_WRITE, vcpu, gtlb_index, tlbe->tid, tlbe->word0,
-	            tlbe->word1, tlbe->word2, handler);
-
-	kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS);
-	return EMULATE_DONE;
 }
 
-int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb, u8 rc)
+/* Invalidate all mappings on the privilege switch after PID has been changed.
+ * The guest always runs with PID=1, so we must clear the entire TLB when
+ * switching address spaces. */
+void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode)
 {
-	u32 ea;
-	int gtlb_index;
-	unsigned int as = get_mmucr_sts(vcpu);
-	unsigned int pid = get_mmucr_stid(vcpu);
-
-	ea = vcpu->arch.gpr[rb];
-	if (ra)
-		ea += vcpu->arch.gpr[ra];
-
-	gtlb_index = kvmppc_44x_tlb_index(vcpu, ea, pid, as);
-	if (rc) {
-		if (gtlb_index < 0)
-			vcpu->arch.cr &= ~0x20000000;
-		else
-			vcpu->arch.cr |= 0x20000000;
+	int i;
+
+	if (vcpu->arch.swap_pid) {
+		/* XXX Replace loop with fancy data structures. */
+		for (i = 0; i <= tlb_44x_hwater; i++) {
+			struct tlbe *stlbe = &vcpu->arch.shadow_tlb[i];
+
+			/* Future optimization: clear only userspace mappings. */
+			kvmppc_44x_shadow_release(vcpu, i);
+			stlbe->word0 = 0;
+			kvmppc_tlbe_set_modified(vcpu, i);
+			KVMTRACE_5D(STLB_INVAL, vcpu, i,
+			            stlbe->tid, stlbe->word0, stlbe->word1,
+			            stlbe->word2, handler);
+		}
+		vcpu->arch.swap_pid = 0;
 	}
-	vcpu->arch.gpr[rt] = gtlb_index;
 
-	kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS);
-	return EMULATE_DONE;
+	vcpu->arch.shadow_pid = !usermode;
 }
diff --git a/trunk/arch/powerpc/kvm/44x_tlb.h b/trunk/arch/powerpc/kvm/44x_tlb.h
index 772191f29e62..2ccd46b6f6b7 100644
--- a/trunk/arch/powerpc/kvm/44x_tlb.h
+++ b/trunk/arch/powerpc/kvm/44x_tlb.h
@@ -25,52 +25,48 @@
 
 extern int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr,
                                 unsigned int pid, unsigned int as);
-extern int kvmppc_44x_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
-extern int kvmppc_44x_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
-
-extern int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb,
-                                 u8 rc);
-extern int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws);
+extern struct tlbe *kvmppc_44x_dtlb_search(struct kvm_vcpu *vcpu, gva_t eaddr);
+extern struct tlbe *kvmppc_44x_itlb_search(struct kvm_vcpu *vcpu, gva_t eaddr);
 
 /* TLB helper functions */
-static inline unsigned int get_tlb_size(const struct kvmppc_44x_tlbe *tlbe)
+static inline unsigned int get_tlb_size(const struct tlbe *tlbe)
 {
 	return (tlbe->word0 >> 4) & 0xf;
 }
 
-static inline gva_t get_tlb_eaddr(const struct kvmppc_44x_tlbe *tlbe)
+static inline gva_t get_tlb_eaddr(const struct tlbe *tlbe)
 {
 	return tlbe->word0 & 0xfffffc00;
 }
 
-static inline gva_t get_tlb_bytes(const struct kvmppc_44x_tlbe *tlbe)
+static inline gva_t get_tlb_bytes(const struct tlbe *tlbe)
 {
 	unsigned int pgsize = get_tlb_size(tlbe);
 	return 1 << 10 << (pgsize << 1);
 }
 
-static inline gva_t get_tlb_end(const struct kvmppc_44x_tlbe *tlbe)
+static inline gva_t get_tlb_end(const struct tlbe *tlbe)
 {
 	return get_tlb_eaddr(tlbe) + get_tlb_bytes(tlbe) - 1;
 }
 
-static inline u64 get_tlb_raddr(const struct kvmppc_44x_tlbe *tlbe)
+static inline u64 get_tlb_raddr(const struct tlbe *tlbe)
 {
 	u64 word1 = tlbe->word1;
 	return ((word1 & 0xf) << 32) | (word1 & 0xfffffc00);
 }
 
-static inline unsigned int get_tlb_tid(const struct kvmppc_44x_tlbe *tlbe)
+static inline unsigned int get_tlb_tid(const struct tlbe *tlbe)
 {
 	return tlbe->tid & 0xff;
 }
 
-static inline unsigned int get_tlb_ts(const struct kvmppc_44x_tlbe *tlbe)
+static inline unsigned int get_tlb_ts(const struct tlbe *tlbe)
 {
 	return (tlbe->word0 >> 8) & 0x1;
 }
 
-static inline unsigned int get_tlb_v(const struct kvmppc_44x_tlbe *tlbe)
+static inline unsigned int get_tlb_v(const struct tlbe *tlbe)
 {
 	return (tlbe->word0 >> 9) & 0x1;
 }
@@ -85,7 +81,7 @@ static inline unsigned int get_mmucr_sts(const struct kvm_vcpu *vcpu)
 	return (vcpu->arch.mmucr >> 16) & 0x1;
 }
 
-static inline gpa_t tlb_xlate(struct kvmppc_44x_tlbe *tlbe, gva_t eaddr)
+static inline gpa_t tlb_xlate(struct tlbe *tlbe, gva_t eaddr)
 {
 	unsigned int pgmask = get_tlb_bytes(tlbe) - 1;
 
diff --git a/trunk/arch/powerpc/kvm/Kconfig b/trunk/arch/powerpc/kvm/Kconfig
index 6dbdc4817d80..53aaa66b25e5 100644
--- a/trunk/arch/powerpc/kvm/Kconfig
+++ b/trunk/arch/powerpc/kvm/Kconfig
@@ -15,33 +15,27 @@ menuconfig VIRTUALIZATION
 if VIRTUALIZATION
 
 config KVM
-	bool
+	bool "Kernel-based Virtual Machine (KVM) support"
+	depends on 44x && EXPERIMENTAL
 	select PREEMPT_NOTIFIERS
 	select ANON_INODES
-
-config KVM_440
-	bool "KVM support for PowerPC 440 processors"
-	depends on EXPERIMENTAL && 44x
-	select KVM
+	# We can only run on Book E hosts so far
+	select KVM_BOOKE_HOST
 	---help---
-	  Support running unmodified 440 guest kernels in virtual machines on
-	  440 host processors.
+	  Support hosting virtualized guest machines. You will also
+	  need to select one or more of the processor modules below.
 
 	  This module provides access to the hardware capabilities through
 	  a character device node named /dev/kvm.
 
 	  If unsure, say N.
 
-config KVM_EXIT_TIMING
-	bool "Detailed exit timing"
-	depends on KVM
+config KVM_BOOKE_HOST
+	bool "KVM host support for Book E PowerPC processors"
+	depends on KVM && 44x
 	---help---
-	  Calculate elapsed time for every exit/enter cycle. A per-vcpu
-	  report is available in debugfs kvm/vm#_vcpu#_timing.
-	  The overhead is relatively small, however it is not recommended for
-	  production environments.
-
-	  If unsure, say N.
+	  Provides host support for KVM on Book E PowerPC processors. Currently
+	  this works on 440 processors only.
 
 config KVM_TRACE
 	bool "KVM trace support"
diff --git a/trunk/arch/powerpc/kvm/Makefile b/trunk/arch/powerpc/kvm/Makefile
index df7ba59e6d53..2a5d4397ac4b 100644
--- a/trunk/arch/powerpc/kvm/Makefile
+++ b/trunk/arch/powerpc/kvm/Makefile
@@ -8,16 +8,10 @@ common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
 
 common-objs-$(CONFIG_KVM_TRACE)  += $(addprefix ../../../virt/kvm/, kvm_trace.o)
 
-kvm-objs := $(common-objs-y) powerpc.o emulate.o
-obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o
+kvm-objs := $(common-objs-y) powerpc.o emulate.o booke_guest.o
 obj-$(CONFIG_KVM) += kvm.o
 
 AFLAGS_booke_interrupts.o := -I$(obj)
 
-kvm-440-objs := \
-	booke.o \
-	booke_interrupts.o \
-	44x.o \
-	44x_tlb.o \
-	44x_emulate.o
-obj-$(CONFIG_KVM_440) += kvm-440.o
+kvm-booke-host-objs := booke_host.o booke_interrupts.o 44x_tlb.o
+obj-$(CONFIG_KVM_BOOKE_HOST) += kvm-booke-host.o
diff --git a/trunk/arch/powerpc/kvm/booke.h b/trunk/arch/powerpc/kvm/booke.h
deleted file mode 100644
index cf7c94ca24bf..000000000000
--- a/trunk/arch/powerpc/kvm/booke.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- *
- * Copyright IBM Corp. 2008
- *
- * Authors: Hollis Blanchard <hollisb@us.ibm.com>
- */
-
-#ifndef __KVM_BOOKE_H__
-#define __KVM_BOOKE_H__
-
-#include <linux/types.h>
-#include <linux/kvm_host.h>
-#include "timing.h"
-
-/* interrupt priortity ordering */
-#define BOOKE_IRQPRIO_DATA_STORAGE 0
-#define BOOKE_IRQPRIO_INST_STORAGE 1
-#define BOOKE_IRQPRIO_ALIGNMENT 2
-#define BOOKE_IRQPRIO_PROGRAM 3
-#define BOOKE_IRQPRIO_FP_UNAVAIL 4
-#define BOOKE_IRQPRIO_SYSCALL 5
-#define BOOKE_IRQPRIO_AP_UNAVAIL 6
-#define BOOKE_IRQPRIO_DTLB_MISS 7
-#define BOOKE_IRQPRIO_ITLB_MISS 8
-#define BOOKE_IRQPRIO_MACHINE_CHECK 9
-#define BOOKE_IRQPRIO_DEBUG 10
-#define BOOKE_IRQPRIO_CRITICAL 11
-#define BOOKE_IRQPRIO_WATCHDOG 12
-#define BOOKE_IRQPRIO_EXTERNAL 13
-#define BOOKE_IRQPRIO_FIT 14
-#define BOOKE_IRQPRIO_DECREMENTER 15
-
-/* Helper function for "full" MSR writes. No need to call this if only EE is
- * changing. */
-static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
-{
-	if ((new_msr & MSR_PR) != (vcpu->arch.msr & MSR_PR))
-		kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR);
-
-	vcpu->arch.msr = new_msr;
-
-	if (vcpu->arch.msr & MSR_WE) {
-		kvm_vcpu_block(vcpu);
-		kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS);
-	};
-}
-
-#endif /* __KVM_BOOKE_H__ */
diff --git a/trunk/arch/powerpc/kvm/booke.c b/trunk/arch/powerpc/kvm/booke_guest.c
similarity index 56%
rename from trunk/arch/powerpc/kvm/booke.c
rename to trunk/arch/powerpc/kvm/booke_guest.c
index 35485dd6927e..7b2591e26bae 100644
--- a/trunk/arch/powerpc/kvm/booke.c
+++ b/trunk/arch/powerpc/kvm/booke_guest.c
@@ -24,26 +24,21 @@
 #include <linux/module.h>
 #include <linux/vmalloc.h>
 #include <linux/fs.h>
-
 #include <asm/cputable.h>
 #include <asm/uaccess.h>
 #include <asm/kvm_ppc.h>
-#include "timing.h"
-#include <asm/cacheflush.h>
-#include <asm/kvm_44x.h>
 
-#include "booke.h"
 #include "44x_tlb.h"
 
-unsigned long kvmppc_booke_handlers;
-
 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
 
 struct kvm_stats_debugfs_item debugfs_entries[] = {
+	{ "exits",      VCPU_STAT(sum_exits) },
 	{ "mmio",       VCPU_STAT(mmio_exits) },
 	{ "dcr",        VCPU_STAT(dcr_exits) },
 	{ "sig",        VCPU_STAT(signal_exits) },
+	{ "light",      VCPU_STAT(light_exits) },
 	{ "itlb_r",     VCPU_STAT(itlb_real_miss_exits) },
 	{ "itlb_v",     VCPU_STAT(itlb_virt_miss_exits) },
 	{ "dtlb_r",     VCPU_STAT(dtlb_real_miss_exits) },
@@ -58,19 +53,103 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ NULL }
 };
 
+static const u32 interrupt_msr_mask[16] = {
+	[BOOKE_INTERRUPT_CRITICAL]      = MSR_ME,
+	[BOOKE_INTERRUPT_MACHINE_CHECK] = 0,
+	[BOOKE_INTERRUPT_DATA_STORAGE]  = MSR_CE|MSR_ME|MSR_DE,
+	[BOOKE_INTERRUPT_INST_STORAGE]  = MSR_CE|MSR_ME|MSR_DE,
+	[BOOKE_INTERRUPT_EXTERNAL]      = MSR_CE|MSR_ME|MSR_DE,
+	[BOOKE_INTERRUPT_ALIGNMENT]     = MSR_CE|MSR_ME|MSR_DE,
+	[BOOKE_INTERRUPT_PROGRAM]       = MSR_CE|MSR_ME|MSR_DE,
+	[BOOKE_INTERRUPT_FP_UNAVAIL]    = MSR_CE|MSR_ME|MSR_DE,
+	[BOOKE_INTERRUPT_SYSCALL]       = MSR_CE|MSR_ME|MSR_DE,
+	[BOOKE_INTERRUPT_AP_UNAVAIL]    = MSR_CE|MSR_ME|MSR_DE,
+	[BOOKE_INTERRUPT_DECREMENTER]   = MSR_CE|MSR_ME|MSR_DE,
+	[BOOKE_INTERRUPT_FIT]           = MSR_CE|MSR_ME|MSR_DE,
+	[BOOKE_INTERRUPT_WATCHDOG]      = MSR_ME,
+	[BOOKE_INTERRUPT_DTLB_MISS]     = MSR_CE|MSR_ME|MSR_DE,
+	[BOOKE_INTERRUPT_ITLB_MISS]     = MSR_CE|MSR_ME|MSR_DE,
+	[BOOKE_INTERRUPT_DEBUG]         = MSR_ME,
+};
+
+const unsigned char exception_priority[] = {
+	[BOOKE_INTERRUPT_DATA_STORAGE] = 0,
+	[BOOKE_INTERRUPT_INST_STORAGE] = 1,
+	[BOOKE_INTERRUPT_ALIGNMENT] = 2,
+	[BOOKE_INTERRUPT_PROGRAM] = 3,
+	[BOOKE_INTERRUPT_FP_UNAVAIL] = 4,
+	[BOOKE_INTERRUPT_SYSCALL] = 5,
+	[BOOKE_INTERRUPT_AP_UNAVAIL] = 6,
+	[BOOKE_INTERRUPT_DTLB_MISS] = 7,
+	[BOOKE_INTERRUPT_ITLB_MISS] = 8,
+	[BOOKE_INTERRUPT_MACHINE_CHECK] = 9,
+	[BOOKE_INTERRUPT_DEBUG] = 10,
+	[BOOKE_INTERRUPT_CRITICAL] = 11,
+	[BOOKE_INTERRUPT_WATCHDOG] = 12,
+	[BOOKE_INTERRUPT_EXTERNAL] = 13,
+	[BOOKE_INTERRUPT_FIT] = 14,
+	[BOOKE_INTERRUPT_DECREMENTER] = 15,
+};
+
+const unsigned char priority_exception[] = {
+	BOOKE_INTERRUPT_DATA_STORAGE,
+	BOOKE_INTERRUPT_INST_STORAGE,
+	BOOKE_INTERRUPT_ALIGNMENT,
+	BOOKE_INTERRUPT_PROGRAM,
+	BOOKE_INTERRUPT_FP_UNAVAIL,
+	BOOKE_INTERRUPT_SYSCALL,
+	BOOKE_INTERRUPT_AP_UNAVAIL,
+	BOOKE_INTERRUPT_DTLB_MISS,
+	BOOKE_INTERRUPT_ITLB_MISS,
+	BOOKE_INTERRUPT_MACHINE_CHECK,
+	BOOKE_INTERRUPT_DEBUG,
+	BOOKE_INTERRUPT_CRITICAL,
+	BOOKE_INTERRUPT_WATCHDOG,
+	BOOKE_INTERRUPT_EXTERNAL,
+	BOOKE_INTERRUPT_FIT,
+	BOOKE_INTERRUPT_DECREMENTER,
+};
+
+
+void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
+{
+	struct tlbe *tlbe;
+	int i;
+
+	printk("vcpu %d TLB dump:\n", vcpu->vcpu_id);
+	printk("| %2s | %3s | %8s | %8s | %8s |\n",
+			"nr", "tid", "word0", "word1", "word2");
+
+	for (i = 0; i < PPC44x_TLB_SIZE; i++) {
+		tlbe = &vcpu->arch.guest_tlb[i];
+		if (tlbe->word0 & PPC44x_TLB_VALID)
+			printk(" G%2d |  %02X | %08X | %08X | %08X |\n",
+			       i, tlbe->tid, tlbe->word0, tlbe->word1,
+			       tlbe->word2);
+	}
+
+	for (i = 0; i < PPC44x_TLB_SIZE; i++) {
+		tlbe = &vcpu->arch.shadow_tlb[i];
+		if (tlbe->word0 & PPC44x_TLB_VALID)
+			printk(" S%2d | %02X | %08X | %08X | %08X |\n",
+			       i, tlbe->tid, tlbe->word0, tlbe->word1,
+			       tlbe->word2);
+	}
+}
+
 /* TODO: use vcpu_printf() */
 void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu)
 {
 	int i;
 
-	printk("pc:   %08lx msr:  %08lx\n", vcpu->arch.pc, vcpu->arch.msr);
-	printk("lr:   %08lx ctr:  %08lx\n", vcpu->arch.lr, vcpu->arch.ctr);
-	printk("srr0: %08lx srr1: %08lx\n", vcpu->arch.srr0, vcpu->arch.srr1);
+	printk("pc:   %08x msr:  %08x\n", vcpu->arch.pc, vcpu->arch.msr);
+	printk("lr:   %08x ctr:  %08x\n", vcpu->arch.lr, vcpu->arch.ctr);
+	printk("srr0: %08x srr1: %08x\n", vcpu->arch.srr0, vcpu->arch.srr1);
 
 	printk("exceptions: %08lx\n", vcpu->arch.pending_exceptions);
 
 	for (i = 0; i < 32; i += 4) {
-		printk("gpr%02d: %08lx %08lx %08lx %08lx\n", i,
+		printk("gpr%02d: %08x %08x %08x %08x\n", i,
 		       vcpu->arch.gpr[i],
 		       vcpu->arch.gpr[i+1],
 		       vcpu->arch.gpr[i+2],
@@ -78,96 +157,69 @@ void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu)
 	}
 }
 
-static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu,
-                                       unsigned int priority)
+/* Check if we are ready to deliver the interrupt */
+static int kvmppc_can_deliver_interrupt(struct kvm_vcpu *vcpu, int interrupt)
 {
-	set_bit(priority, &vcpu->arch.pending_exceptions);
-}
+	int r;
 
-void kvmppc_core_queue_program(struct kvm_vcpu *vcpu)
-{
-	kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM);
-}
-
-void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu)
-{
-	kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DECREMENTER);
-}
-
-int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu)
-{
-	return test_bit(BOOKE_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions);
-}
-
-void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
-                                struct kvm_interrupt *irq)
-{
-	kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_EXTERNAL);
-}
-
-/* Deliver the interrupt of the corresponding priority, if possible. */
-static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
-                                        unsigned int priority)
-{
-	int allowed = 0;
-	ulong msr_mask;
-
-	switch (priority) {
-	case BOOKE_IRQPRIO_PROGRAM:
-	case BOOKE_IRQPRIO_DTLB_MISS:
-	case BOOKE_IRQPRIO_ITLB_MISS:
-	case BOOKE_IRQPRIO_SYSCALL:
-	case BOOKE_IRQPRIO_DATA_STORAGE:
-	case BOOKE_IRQPRIO_INST_STORAGE:
-	case BOOKE_IRQPRIO_FP_UNAVAIL:
-	case BOOKE_IRQPRIO_AP_UNAVAIL:
-	case BOOKE_IRQPRIO_ALIGNMENT:
-		allowed = 1;
-		msr_mask = MSR_CE|MSR_ME|MSR_DE;
+	switch (interrupt) {
+	case BOOKE_INTERRUPT_CRITICAL:
+		r = vcpu->arch.msr & MSR_CE;
 		break;
-	case BOOKE_IRQPRIO_CRITICAL:
-	case BOOKE_IRQPRIO_WATCHDOG:
-		allowed = vcpu->arch.msr & MSR_CE;
-		msr_mask = MSR_ME;
+	case BOOKE_INTERRUPT_MACHINE_CHECK:
+		r = vcpu->arch.msr & MSR_ME;
 		break;
-	case BOOKE_IRQPRIO_MACHINE_CHECK:
-		allowed = vcpu->arch.msr & MSR_ME;
-		msr_mask = 0;
+	case BOOKE_INTERRUPT_EXTERNAL:
+		r = vcpu->arch.msr & MSR_EE;
 		break;
-	case BOOKE_IRQPRIO_EXTERNAL:
-	case BOOKE_IRQPRIO_DECREMENTER:
-	case BOOKE_IRQPRIO_FIT:
-		allowed = vcpu->arch.msr & MSR_EE;
-		msr_mask = MSR_CE|MSR_ME|MSR_DE;
+	case BOOKE_INTERRUPT_DECREMENTER:
+		r = vcpu->arch.msr & MSR_EE;
 		break;
-	case BOOKE_IRQPRIO_DEBUG:
-		allowed = vcpu->arch.msr & MSR_DE;
-		msr_mask = MSR_ME;
+	case BOOKE_INTERRUPT_FIT:
+		r = vcpu->arch.msr & MSR_EE;
 		break;
+	case BOOKE_INTERRUPT_WATCHDOG:
+		r = vcpu->arch.msr & MSR_CE;
+		break;
+	case BOOKE_INTERRUPT_DEBUG:
+		r = vcpu->arch.msr & MSR_DE;
+		break;
+	default:
+		r = 1;
 	}
 
-	if (allowed) {
-		vcpu->arch.srr0 = vcpu->arch.pc;
-		vcpu->arch.srr1 = vcpu->arch.msr;
-		vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority];
-		kvmppc_set_msr(vcpu, vcpu->arch.msr & msr_mask);
+	return r;
+}
 
-		clear_bit(priority, &vcpu->arch.pending_exceptions);
+static void kvmppc_deliver_interrupt(struct kvm_vcpu *vcpu, int interrupt)
+{
+	switch (interrupt) {
+	case BOOKE_INTERRUPT_DECREMENTER:
+		vcpu->arch.tsr |= TSR_DIS;
+		break;
 	}
 
-	return allowed;
+	vcpu->arch.srr0 = vcpu->arch.pc;
+	vcpu->arch.srr1 = vcpu->arch.msr;
+	vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[interrupt];
+	kvmppc_set_msr(vcpu, vcpu->arch.msr & interrupt_msr_mask[interrupt]);
 }
 
 /* Check pending exceptions and deliver one, if possible. */
-void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
+void kvmppc_check_and_deliver_interrupts(struct kvm_vcpu *vcpu)
 {
 	unsigned long *pending = &vcpu->arch.pending_exceptions;
+	unsigned int exception;
 	unsigned int priority;
 
-	priority = __ffs(*pending);
+	priority = find_first_bit(pending, BITS_PER_BYTE * sizeof(*pending));
 	while (priority <= BOOKE_MAX_INTERRUPT) {
-		if (kvmppc_booke_irqprio_deliver(vcpu, priority))
+		exception = priority_exception[priority];
+		if (kvmppc_can_deliver_interrupt(vcpu, exception)) {
+			kvmppc_clear_exception(vcpu, exception);
+			kvmppc_deliver_interrupt(vcpu, exception);
 			break;
+		}
 
 		priority = find_next_bit(pending,
 		                         BITS_PER_BYTE * sizeof(*pending),
@@ -186,9 +238,6 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	enum emulation_result er;
 	int r = RESUME_HOST;
 
-	/* update before a new last_exit_type is rewritten */
-	kvmppc_update_timing_stats(vcpu);
-
 	local_irq_enable();
 
 	run->exit_reason = KVM_EXIT_UNKNOWN;
@@ -202,19 +251,21 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		break;
 
 	case BOOKE_INTERRUPT_EXTERNAL:
-		kvmppc_account_exit(vcpu, EXT_INTR_EXITS);
-		if (need_resched())
-			cond_resched();
-		r = RESUME_GUEST;
-		break;
-
 	case BOOKE_INTERRUPT_DECREMENTER:
 		/* Since we switched IVPR back to the host's value, the host
 		 * handled this interrupt the moment we enabled interrupts.
 		 * Now we just offer it a chance to reschedule the guest. */
-		kvmppc_account_exit(vcpu, DEC_EXITS);
+
+		/* XXX At this point the TLB still holds our shadow TLB, so if
+		 * we do reschedule the host will fault over it. Perhaps we
+		 * should politely restore the host's entries to minimize
+		 * misses before ceding control. */
 		if (need_resched())
 			cond_resched();
+		if (exit_nr == BOOKE_INTERRUPT_DECREMENTER)
+			vcpu->stat.dec_exits++;
+		else
+			vcpu->stat.ext_intr_exits++;
 		r = RESUME_GUEST;
 		break;
 
@@ -223,19 +274,17 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			/* Program traps generated by user-level software must be handled
 			 * by the guest kernel. */
 			vcpu->arch.esr = vcpu->arch.fault_esr;
-			kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM);
+			kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_PROGRAM);
 			r = RESUME_GUEST;
-			kvmppc_account_exit(vcpu, USR_PR_INST);
 			break;
 		}
 
 		er = kvmppc_emulate_instruction(run, vcpu);
 		switch (er) {
 		case EMULATE_DONE:
-			/* don't overwrite subtypes, just account kvm_stats */
-			kvmppc_account_exit_stat(vcpu, EMULATED_INST_EXITS);
 			/* Future optimization: only reload non-volatiles if
 			 * they were actually modified by emulation. */
+			vcpu->stat.emulated_inst_exits++;
 			r = RESUME_GUEST_NV;
 			break;
 		case EMULATE_DO_DCR:
@@ -244,7 +293,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			break;
 		case EMULATE_FAIL:
 			/* XXX Deliver Program interrupt to guest. */
-			printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
+			printk(KERN_CRIT "%s: emulation at %x failed (%08x)\n",
 			       __func__, vcpu->arch.pc, vcpu->arch.last_inst);
 			/* For debugging, encode the failing instruction and
 			 * report it to userspace. */
@@ -258,53 +307,48 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		break;
 
 	case BOOKE_INTERRUPT_FP_UNAVAIL:
-		kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_FP_UNAVAIL);
-		kvmppc_account_exit(vcpu, FP_UNAVAIL);
+		kvmppc_queue_exception(vcpu, exit_nr);
 		r = RESUME_GUEST;
 		break;
 
 	case BOOKE_INTERRUPT_DATA_STORAGE:
 		vcpu->arch.dear = vcpu->arch.fault_dear;
 		vcpu->arch.esr = vcpu->arch.fault_esr;
-		kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DATA_STORAGE);
-		kvmppc_account_exit(vcpu, DSI_EXITS);
+		kvmppc_queue_exception(vcpu, exit_nr);
+		vcpu->stat.dsi_exits++;
 		r = RESUME_GUEST;
 		break;
 
 	case BOOKE_INTERRUPT_INST_STORAGE:
 		vcpu->arch.esr = vcpu->arch.fault_esr;
-		kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_INST_STORAGE);
-		kvmppc_account_exit(vcpu, ISI_EXITS);
+		kvmppc_queue_exception(vcpu, exit_nr);
+		vcpu->stat.isi_exits++;
 		r = RESUME_GUEST;
 		break;
 
 	case BOOKE_INTERRUPT_SYSCALL:
-		kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SYSCALL);
-		kvmppc_account_exit(vcpu, SYSCALL_EXITS);
+		kvmppc_queue_exception(vcpu, exit_nr);
+		vcpu->stat.syscall_exits++;
 		r = RESUME_GUEST;
 		break;
 
-	/* XXX move to a 440-specific file. */
 	case BOOKE_INTERRUPT_DTLB_MISS: {
-		struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
-		struct kvmppc_44x_tlbe *gtlbe;
+		struct tlbe *gtlbe;
 		unsigned long eaddr = vcpu->arch.fault_dear;
-		int gtlb_index;
 		gfn_t gfn;
 
 		/* Check the guest TLB. */
-		gtlb_index = kvmppc_44x_dtlb_index(vcpu, eaddr);
-		if (gtlb_index < 0) {
+		gtlbe = kvmppc_44x_dtlb_search(vcpu, eaddr);
+		if (!gtlbe) {
 			/* The guest didn't have a mapping for it. */
-			kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS);
+			kvmppc_queue_exception(vcpu, exit_nr);
 			vcpu->arch.dear = vcpu->arch.fault_dear;
 			vcpu->arch.esr = vcpu->arch.fault_esr;
-			kvmppc_account_exit(vcpu, DTLB_REAL_MISS_EXITS);
+			vcpu->stat.dtlb_real_miss_exits++;
 			r = RESUME_GUEST;
 			break;
 		}
 
-		gtlbe = &vcpu_44x->guest_tlb[gtlb_index];
 		vcpu->arch.paddr_accessed = tlb_xlate(gtlbe, eaddr);
 		gfn = vcpu->arch.paddr_accessed >> PAGE_SHIFT;
 
@@ -315,45 +359,38 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			 * b) the guest used a large mapping which we're faking
 			 * Either way, we need to satisfy the fault without
 			 * invoking the guest. */
-			kvmppc_mmu_map(vcpu, eaddr, vcpu->arch.paddr_accessed, gtlbe->tid,
-			               gtlbe->word2, get_tlb_bytes(gtlbe), gtlb_index);
-			kvmppc_account_exit(vcpu, DTLB_VIRT_MISS_EXITS);
+			kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid,
+			               gtlbe->word2);
+			vcpu->stat.dtlb_virt_miss_exits++;
 			r = RESUME_GUEST;
 		} else {
 			/* Guest has mapped and accessed a page which is not
 			 * actually RAM. */
 			r = kvmppc_emulate_mmio(run, vcpu);
-			kvmppc_account_exit(vcpu, MMIO_EXITS);
 		}
 
 		break;
 	}
 
-	/* XXX move to a 440-specific file. */
 	case BOOKE_INTERRUPT_ITLB_MISS: {
-		struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
-		struct kvmppc_44x_tlbe *gtlbe;
+		struct tlbe *gtlbe;
 		unsigned long eaddr = vcpu->arch.pc;
-		gpa_t gpaddr;
 		gfn_t gfn;
-		int gtlb_index;
 
 		r = RESUME_GUEST;
 
 		/* Check the guest TLB. */
-		gtlb_index = kvmppc_44x_itlb_index(vcpu, eaddr);
-		if (gtlb_index < 0) {
+		gtlbe = kvmppc_44x_itlb_search(vcpu, eaddr);
+		if (!gtlbe) {
 			/* The guest didn't have a mapping for it. */
-			kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ITLB_MISS);
-			kvmppc_account_exit(vcpu, ITLB_REAL_MISS_EXITS);
+			kvmppc_queue_exception(vcpu, exit_nr);
+			vcpu->stat.itlb_real_miss_exits++;
 			break;
 		}
 
-		kvmppc_account_exit(vcpu, ITLB_VIRT_MISS_EXITS);
+		vcpu->stat.itlb_virt_miss_exits++;
 
-		gtlbe = &vcpu_44x->guest_tlb[gtlb_index];
-		gpaddr = tlb_xlate(gtlbe, eaddr);
-		gfn = gpaddr >> PAGE_SHIFT;
+		gfn = tlb_xlate(gtlbe, eaddr) >> PAGE_SHIFT;
 
 		if (kvm_is_visible_gfn(vcpu->kvm, gfn)) {
 			/* The guest TLB had a mapping, but the shadow TLB
@@ -362,11 +399,12 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			 * b) the guest used a large mapping which we're faking
 			 * Either way, we need to satisfy the fault without
 			 * invoking the guest. */
-			kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlbe->tid,
-			               gtlbe->word2, get_tlb_bytes(gtlbe), gtlb_index);
+			kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid,
+			               gtlbe->word2);
 		} else {
 			/* Guest mapped and leaped at non-RAM! */
-			kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_MACHINE_CHECK);
+			kvmppc_queue_exception(vcpu,
+			                       BOOKE_INTERRUPT_MACHINE_CHECK);
 		}
 
 		break;
@@ -383,7 +421,6 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		mtspr(SPRN_DBSR, dbsr);
 
 		run->exit_reason = KVM_EXIT_DEBUG;
-		kvmppc_account_exit(vcpu, DEBUG_EXITS);
 		r = RESUME_HOST;
 		break;
 	}
@@ -395,8 +432,10 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
 	local_irq_disable();
 
-	kvmppc_core_deliver_interrupts(vcpu);
+	kvmppc_check_and_deliver_interrupts(vcpu);
 
+	/* Do some exit accounting. */
+	vcpu->stat.sum_exits++;
 	if (!(r & RESUME_HOST)) {
 		/* To avoid clobbering exit_reason, only check for signals if
 		 * we aren't already exiting to userspace for some other
@@ -404,7 +443,22 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		if (signal_pending(current)) {
 			run->exit_reason = KVM_EXIT_INTR;
 			r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
-			kvmppc_account_exit(vcpu, SIGNAL_EXITS);
+
+			vcpu->stat.signal_exits++;
+		} else {
+			vcpu->stat.light_exits++;
+		}
+	} else {
+		switch (run->exit_reason) {
+		case KVM_EXIT_MMIO:
+			vcpu->stat.mmio_exits++;
+			break;
+		case KVM_EXIT_DCR:
+			vcpu->stat.dcr_exits++;
+			break;
+		case KVM_EXIT_INTR:
+			vcpu->stat.signal_exits++;
+			break;
 		}
 	}
 
@@ -414,6 +468,20 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 /* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 {
+	struct tlbe *tlbe = &vcpu->arch.guest_tlb[0];
+
+	tlbe->tid = 0;
+	tlbe->word0 = PPC44x_TLB_16M | PPC44x_TLB_VALID;
+	tlbe->word1 = 0;
+	tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR;
+
+	tlbe++;
+	tlbe->tid = 0;
+	tlbe->word0 = 0xef600000 | PPC44x_TLB_4K | PPC44x_TLB_VALID;
+	tlbe->word1 = 0xef600000;
+	tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR
+	              | PPC44x_TLB_I | PPC44x_TLB_G;
+
 	vcpu->arch.pc = 0;
 	vcpu->arch.msr = 0;
 	vcpu->arch.gpr[1] = (16<<20) - 8; /* -8 for the callee-save LR slot */
@@ -424,9 +492,12 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 	 * before it's programmed its own IVPR. */
 	vcpu->arch.ivpr = 0x55550000;
 
-	kvmppc_init_timing_stats(vcpu);
+	/* Since the guest can directly access the timebase, it must know the
+	 * real timebase frequency. Accordingly, it must see the state of
+	 * CCR1[TCS]. */
+	vcpu->arch.ccr1 = mfspr(SPRN_CCR1);
 
-	return kvmppc_core_vcpu_setup(vcpu);
+	return 0;
 }
 
 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
@@ -465,7 +536,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	vcpu->arch.ctr = regs->ctr;
 	vcpu->arch.lr = regs->lr;
 	vcpu->arch.xer = regs->xer;
-	kvmppc_set_msr(vcpu, regs->msr);
+	vcpu->arch.msr = regs->msr;
 	vcpu->arch.srr0 = regs->srr0;
 	vcpu->arch.srr1 = regs->srr1;
 	vcpu->arch.sprg0 = regs->sprg0;
@@ -504,62 +575,31 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 	return -ENOTSUPP;
 }
 
+/* 'linear_address' is actually an encoding of AS|PID|EADDR . */
 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
                                   struct kvm_translation *tr)
 {
-	return kvmppc_core_vcpu_translate(vcpu, tr);
-}
+	struct tlbe *gtlbe;
+	int index;
+	gva_t eaddr;
+	u8 pid;
+	u8 as;
+
+	eaddr = tr->linear_address;
+	pid = (tr->linear_address >> 32) & 0xff;
+	as = (tr->linear_address >> 40) & 0x1;
+
+	index = kvmppc_44x_tlb_index(vcpu, eaddr, pid, as);
+	if (index == -1) {
+		tr->valid = 0;
+		return 0;
+	}
 
-int kvmppc_booke_init(void)
-{
-	unsigned long ivor[16];
-	unsigned long max_ivor = 0;
-	int i;
+	gtlbe = &vcpu->arch.guest_tlb[index];
 
-	/* We install our own exception handlers by hijacking IVPR. IVPR must
-	 * be 16-bit aligned, so we need a 64KB allocation. */
-	kvmppc_booke_handlers = __get_free_pages(GFP_KERNEL | __GFP_ZERO,
-	                                         VCPU_SIZE_ORDER);
-	if (!kvmppc_booke_handlers)
-		return -ENOMEM;
-
-	/* XXX make sure our handlers are smaller than Linux's */
-
-	/* Copy our interrupt handlers to match host IVORs. That way we don't
-	 * have to swap the IVORs on every guest/host transition. */
-	ivor[0] = mfspr(SPRN_IVOR0);
-	ivor[1] = mfspr(SPRN_IVOR1);
-	ivor[2] = mfspr(SPRN_IVOR2);
-	ivor[3] = mfspr(SPRN_IVOR3);
-	ivor[4] = mfspr(SPRN_IVOR4);
-	ivor[5] = mfspr(SPRN_IVOR5);
-	ivor[6] = mfspr(SPRN_IVOR6);
-	ivor[7] = mfspr(SPRN_IVOR7);
-	ivor[8] = mfspr(SPRN_IVOR8);
-	ivor[9] = mfspr(SPRN_IVOR9);
-	ivor[10] = mfspr(SPRN_IVOR10);
-	ivor[11] = mfspr(SPRN_IVOR11);
-	ivor[12] = mfspr(SPRN_IVOR12);
-	ivor[13] = mfspr(SPRN_IVOR13);
-	ivor[14] = mfspr(SPRN_IVOR14);
-	ivor[15] = mfspr(SPRN_IVOR15);
-
-	for (i = 0; i < 16; i++) {
-		if (ivor[i] > max_ivor)
-			max_ivor = ivor[i];
-
-		memcpy((void *)kvmppc_booke_handlers + ivor[i],
-		       kvmppc_handlers_start + i * kvmppc_handler_len,
-		       kvmppc_handler_len);
-	}
-	flush_icache_range(kvmppc_booke_handlers,
-	                   kvmppc_booke_handlers + max_ivor + kvmppc_handler_len);
+	tr->physical_address = tlb_xlate(gtlbe, eaddr);
+	/* XXX what does "writeable" and "usermode" even mean? */
+	tr->valid = 1;
 
 	return 0;
 }
-
-void __exit kvmppc_booke_exit(void)
-{
-	free_pages(kvmppc_booke_handlers, VCPU_SIZE_ORDER);
-	kvm_exit();
-}
diff --git a/trunk/arch/powerpc/kvm/booke_host.c b/trunk/arch/powerpc/kvm/booke_host.c
new file mode 100644
index 000000000000..b480341bc31e
--- /dev/null
+++ b/trunk/arch/powerpc/kvm/booke_host.c
@@ -0,0 +1,83 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/kvm_host.h>
+#include <linux/module.h>
+#include <asm/cacheflush.h>
+#include <asm/kvm_ppc.h>
+
+unsigned long kvmppc_booke_handlers;
+
+static int kvmppc_booke_init(void)
+{
+	unsigned long ivor[16];
+	unsigned long max_ivor = 0;
+	int i;
+
+	/* We install our own exception handlers by hijacking IVPR. IVPR must
+	 * be 16-bit aligned, so we need a 64KB allocation. */
+	kvmppc_booke_handlers = __get_free_pages(GFP_KERNEL | __GFP_ZERO,
+	                                         VCPU_SIZE_ORDER);
+	if (!kvmppc_booke_handlers)
+		return -ENOMEM;
+
+	/* XXX make sure our handlers are smaller than Linux's */
+
+	/* Copy our interrupt handlers to match host IVORs. That way we don't
+	 * have to swap the IVORs on every guest/host transition. */
+	ivor[0] = mfspr(SPRN_IVOR0);
+	ivor[1] = mfspr(SPRN_IVOR1);
+	ivor[2] = mfspr(SPRN_IVOR2);
+	ivor[3] = mfspr(SPRN_IVOR3);
+	ivor[4] = mfspr(SPRN_IVOR4);
+	ivor[5] = mfspr(SPRN_IVOR5);
+	ivor[6] = mfspr(SPRN_IVOR6);
+	ivor[7] = mfspr(SPRN_IVOR7);
+	ivor[8] = mfspr(SPRN_IVOR8);
+	ivor[9] = mfspr(SPRN_IVOR9);
+	ivor[10] = mfspr(SPRN_IVOR10);
+	ivor[11] = mfspr(SPRN_IVOR11);
+	ivor[12] = mfspr(SPRN_IVOR12);
+	ivor[13] = mfspr(SPRN_IVOR13);
+	ivor[14] = mfspr(SPRN_IVOR14);
+	ivor[15] = mfspr(SPRN_IVOR15);
+
+	for (i = 0; i < 16; i++) {
+		if (ivor[i] > max_ivor)
+			max_ivor = ivor[i];
+
+		memcpy((void *)kvmppc_booke_handlers + ivor[i],
+		       kvmppc_handlers_start + i * kvmppc_handler_len,
+		       kvmppc_handler_len);
+	}
+	flush_icache_range(kvmppc_booke_handlers,
+	                   kvmppc_booke_handlers + max_ivor + kvmppc_handler_len);
+
+	return kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE);
+}
+
+static void __exit kvmppc_booke_exit(void)
+{
+	free_pages(kvmppc_booke_handlers, VCPU_SIZE_ORDER);
+	kvm_exit();
+}
+
+module_init(kvmppc_booke_init)
+module_exit(kvmppc_booke_exit)
diff --git a/trunk/arch/powerpc/kvm/booke_interrupts.S b/trunk/arch/powerpc/kvm/booke_interrupts.S
index 084ebcd7dd83..95e165baf85f 100644
--- a/trunk/arch/powerpc/kvm/booke_interrupts.S
+++ b/trunk/arch/powerpc/kvm/booke_interrupts.S
@@ -107,18 +107,6 @@ _GLOBAL(kvmppc_resume_host)
 	li	r6, 1
 	slw	r6, r6, r5
 
-#ifdef CONFIG_KVM_EXIT_TIMING
-	/* save exit time */
-1:
-	mfspr	r7, SPRN_TBRU
-	mfspr	r8, SPRN_TBRL
-	mfspr	r9, SPRN_TBRU
-	cmpw	r9, r7
-	bne	1b
-	stw	r8, VCPU_TIMING_EXIT_TBL(r4)
-	stw	r9, VCPU_TIMING_EXIT_TBU(r4)
-#endif
-
 	/* Save the faulting instruction and all GPRs for emulation. */
 	andi.	r7, r6, NEED_INST_MASK
 	beq	..skip_inst_copy
@@ -347,6 +335,54 @@ lightweight_exit:
 	lwz	r3, VCPU_SHADOW_PID(r4)
 	mtspr	SPRN_PID, r3
 
+	/* Prevent all asynchronous TLB updates. */
+	mfmsr	r5
+	lis	r6, (MSR_EE|MSR_CE|MSR_ME|MSR_DE)@h
+	ori	r6, r6, (MSR_EE|MSR_CE|MSR_ME|MSR_DE)@l
+	andc	r6, r5, r6
+	mtmsr	r6
+
+	/* Load the guest mappings, leaving the host's "pinned" kernel mappings
+	 * in place. */
+	mfspr	r10, SPRN_MMUCR			/* Save host MMUCR. */
+	li	r5, PPC44x_TLB_SIZE
+	lis	r5, tlb_44x_hwater@ha
+	lwz	r5, tlb_44x_hwater@l(r5)
+	mtctr	r5
+	addi	r9, r4, VCPU_SHADOW_TLB
+	addi	r5, r4, VCPU_SHADOW_MOD
+	li	r3, 0
+1:
+	lbzx	r7, r3, r5
+	cmpwi	r7, 0
+	beq	3f
+
+	/* Load guest entry. */
+	mulli	r11, r3, TLBE_BYTES
+	add	r11, r11, r9
+	lwz	r7, 0(r11)
+	mtspr	SPRN_MMUCR, r7
+	lwz	r7, 4(r11)
+	tlbwe	r7, r3, PPC44x_TLB_PAGEID
+	lwz	r7, 8(r11)
+	tlbwe	r7, r3, PPC44x_TLB_XLAT
+	lwz	r7, 12(r11)
+	tlbwe	r7, r3, PPC44x_TLB_ATTRIB
+3:
+	addi	r3, r3, 1                       /* Increment index. */
+	bdnz	1b
+
+	mtspr	SPRN_MMUCR, r10			/* Restore host MMUCR. */
+
+	/* Clear bitmap of modified TLB entries */
+	li	r5, PPC44x_TLB_SIZE>>2
+	mtctr	r5
+	addi	r5, r4, VCPU_SHADOW_MOD - 4
+	li	r6, 0
+1:
+	stwu	r6, 4(r5)
+	bdnz	1b
+
 	iccci	0, 0 /* XXX hack */
 
 	/* Load some guest volatiles. */
@@ -387,18 +423,6 @@ lightweight_exit:
 	lwz	r3, VCPU_SPRG7(r4)
 	mtspr	SPRN_SPRG7, r3
 
-#ifdef CONFIG_KVM_EXIT_TIMING
-	/* save enter time */
-1:
-	mfspr	r6, SPRN_TBRU
-	mfspr	r7, SPRN_TBRL
-	mfspr	r8, SPRN_TBRU
-	cmpw	r8, r6
-	bne	1b
-	stw	r7, VCPU_TIMING_LAST_ENTER_TBL(r4)
-	stw	r8, VCPU_TIMING_LAST_ENTER_TBU(r4)
-#endif
-
 	/* Finish loading guest volatiles and jump to guest. */
 	lwz	r3, VCPU_CTR(r4)
 	mtctr	r3
diff --git a/trunk/arch/powerpc/kvm/emulate.c b/trunk/arch/powerpc/kvm/emulate.c
index d1d38daa93fb..0fce4fbdc20d 100644
--- a/trunk/arch/powerpc/kvm/emulate.c
+++ b/trunk/arch/powerpc/kvm/emulate.c
@@ -23,14 +23,161 @@
 #include <linux/string.h>
 #include <linux/kvm_host.h>
 
-#include <asm/reg.h>
+#include <asm/dcr.h>
+#include <asm/dcr-regs.h>
 #include <asm/time.h>
 #include <asm/byteorder.h>
 #include <asm/kvm_ppc.h>
-#include <asm/disassemble.h>
-#include "timing.h"
 
-void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
+#include "44x_tlb.h"
+
+/* Instruction decoding */
+static inline unsigned int get_op(u32 inst)
+{
+	return inst >> 26;
+}
+
+static inline unsigned int get_xop(u32 inst)
+{
+	return (inst >> 1) & 0x3ff;
+}
+
+static inline unsigned int get_sprn(u32 inst)
+{
+	return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
+}
+
+static inline unsigned int get_dcrn(u32 inst)
+{
+	return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
+}
+
+static inline unsigned int get_rt(u32 inst)
+{
+	return (inst >> 21) & 0x1f;
+}
+
+static inline unsigned int get_rs(u32 inst)
+{
+	return (inst >> 21) & 0x1f;
+}
+
+static inline unsigned int get_ra(u32 inst)
+{
+	return (inst >> 16) & 0x1f;
+}
+
+static inline unsigned int get_rb(u32 inst)
+{
+	return (inst >> 11) & 0x1f;
+}
+
+static inline unsigned int get_rc(u32 inst)
+{
+	return inst & 0x1;
+}
+
+static inline unsigned int get_ws(u32 inst)
+{
+	return (inst >> 11) & 0x1f;
+}
+
+static inline unsigned int get_d(u32 inst)
+{
+	return inst & 0xffff;
+}
+
+static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
+                             const struct tlbe *tlbe)
+{
+	gpa_t gpa;
+
+	if (!get_tlb_v(tlbe))
+		return 0;
+
+	/* Does it match current guest AS? */
+	/* XXX what about IS != DS? */
+	if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS))
+		return 0;
+
+	gpa = get_tlb_raddr(tlbe);
+	if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT))
+		/* Mapping is not for RAM. */
+		return 0;
+
+	return 1;
+}
+
+static int kvmppc_emul_tlbwe(struct kvm_vcpu *vcpu, u32 inst)
+{
+	u64 eaddr;
+	u64 raddr;
+	u64 asid;
+	u32 flags;
+	struct tlbe *tlbe;
+	unsigned int ra;
+	unsigned int rs;
+	unsigned int ws;
+	unsigned int index;
+
+	ra = get_ra(inst);
+	rs = get_rs(inst);
+	ws = get_ws(inst);
+
+	index = vcpu->arch.gpr[ra];
+	if (index > PPC44x_TLB_SIZE) {
+		printk("%s: index %d\n", __func__, index);
+		kvmppc_dump_vcpu(vcpu);
+		return EMULATE_FAIL;
+	}
+
+	tlbe = &vcpu->arch.guest_tlb[index];
+
+	/* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */
+	if (tlbe->word0 & PPC44x_TLB_VALID) {
+		eaddr = get_tlb_eaddr(tlbe);
+		asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid;
+		kvmppc_mmu_invalidate(vcpu, eaddr, get_tlb_end(tlbe), asid);
+	}
+
+	switch (ws) {
+	case PPC44x_TLB_PAGEID:
+		tlbe->tid = vcpu->arch.mmucr & 0xff;
+		tlbe->word0 = vcpu->arch.gpr[rs];
+		break;
+
+	case PPC44x_TLB_XLAT:
+		tlbe->word1 = vcpu->arch.gpr[rs];
+		break;
+
+	case PPC44x_TLB_ATTRIB:
+		tlbe->word2 = vcpu->arch.gpr[rs];
+		break;
+
+	default:
+		return EMULATE_FAIL;
+	}
+
+	if (tlbe_is_host_safe(vcpu, tlbe)) {
+		eaddr = get_tlb_eaddr(tlbe);
+		raddr = get_tlb_raddr(tlbe);
+		asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid;
+		flags = tlbe->word2 & 0xffff;
+
+		/* Create a 4KB mapping on the host. If the guest wanted a
+		 * large page, only the first 4KB is mapped here and the rest
+		 * are mapped on the fly. */
+		kvmppc_mmu_map(vcpu, eaddr, raddr >> PAGE_SHIFT, asid, flags);
+	}
+
+	KVMTRACE_5D(GTLB_WRITE, vcpu, index,
+			tlbe->tid, tlbe->word0, tlbe->word1, tlbe->word2,
+			handler);
+
+	return EMULATE_DONE;
+}
+
+static void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
 {
 	if (vcpu->arch.tcr & TCR_DIE) {
 		/* The decrementer ticks at the same rate as the timebase, so
@@ -46,6 +193,12 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
 	}
 }
 
+static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.pc = vcpu->arch.srr0;
+	kvmppc_set_msr(vcpu, vcpu->arch.srr1);
+}
+
 /* XXX to do:
  * lhax
  * lhaux
@@ -60,30 +213,40 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
  *
  * XXX is_bigendian should depend on MMU mapping or MSR[LE]
  */
-/* XXX Should probably auto-generate instruction decoding for a particular core
- * from opcode tables in the future. */
 int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 {
 	u32 inst = vcpu->arch.last_inst;
 	u32 ea;
 	int ra;
 	int rb;
+	int rc;
 	int rs;
 	int rt;
 	int sprn;
+	int dcrn;
 	enum emulation_result emulated = EMULATE_DONE;
 	int advance = 1;
 
-	/* this default type might be overwritten by subcategories */
-	kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS);
-
 	switch (get_op(inst)) {
-	case 3:                                             /* trap */
-		vcpu->arch.esr |= ESR_PTR;
-		kvmppc_core_queue_program(vcpu);
+	case 3:                                                 /* trap */
+		printk("trap!\n");
+		kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_PROGRAM);
 		advance = 0;
 		break;
 
+	case 19:
+		switch (get_xop(inst)) {
+		case 50:                                        /* rfi */
+			kvmppc_emul_rfi(vcpu);
+			advance = 0;
+			break;
+
+		default:
+			emulated = EMULATE_FAIL;
+			break;
+		}
+		break;
+
 	case 31:
 		switch (get_xop(inst)) {
 
@@ -92,11 +255,27 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
 			break;
 
+		case 83:                                        /* mfmsr */
+			rt = get_rt(inst);
+			vcpu->arch.gpr[rt] = vcpu->arch.msr;
+			break;
+
 		case 87:                                        /* lbzx */
 			rt = get_rt(inst);
 			emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
 			break;
 
+		case 131:                                       /* wrtee */
+			rs = get_rs(inst);
+			vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
+			                 | (vcpu->arch.gpr[rs] & MSR_EE);
+			break;
+
+		case 146:                                       /* mtmsr */
+			rs = get_rs(inst);
+			kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]);
+			break;
+
 		case 151:                                       /* stwx */
 			rs = get_rs(inst);
 			emulated = kvmppc_handle_store(run, vcpu,
@@ -104,6 +283,11 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			                               4, 1);
 			break;
 
+		case 163:                                       /* wrteei */
+			vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
+			                 | (inst & MSR_EE);
+			break;
+
 		case 215:                                       /* stbx */
 			rs = get_rs(inst);
 			emulated = kvmppc_handle_store(run, vcpu,
@@ -144,6 +328,42 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			vcpu->arch.gpr[ra] = ea;
 			break;
 
+		case 323:                                       /* mfdcr */
+			dcrn = get_dcrn(inst);
+			rt = get_rt(inst);
+
+			/* The guest may access CPR0 registers to determine the timebase
+			 * frequency, and it must know the real host frequency because it
+			 * can directly access the timebase registers.
+			 *
+			 * It would be possible to emulate those accesses in userspace,
+			 * but userspace can really only figure out the end frequency.
+			 * We could decompose that into the factors that compute it, but
+			 * that's tricky math, and it's easier to just report the real
+			 * CPR0 values.
+			 */
+			switch (dcrn) {
+			case DCRN_CPR0_CONFIG_ADDR:
+				vcpu->arch.gpr[rt] = vcpu->arch.cpr0_cfgaddr;
+				break;
+			case DCRN_CPR0_CONFIG_DATA:
+				local_irq_disable();
+				mtdcr(DCRN_CPR0_CONFIG_ADDR,
+				      vcpu->arch.cpr0_cfgaddr);
+				vcpu->arch.gpr[rt] = mfdcr(DCRN_CPR0_CONFIG_DATA);
+				local_irq_enable();
+				break;
+			default:
+				run->dcr.dcrn = dcrn;
+				run->dcr.data =  0;
+				run->dcr.is_write = 0;
+				vcpu->arch.io_gpr = rt;
+				vcpu->arch.dcr_needed = 1;
+				emulated = EMULATE_DO_DCR;
+			}
+
+			break;
+
 		case 339:                                       /* mfspr */
 			sprn = get_sprn(inst);
 			rt = get_rt(inst);
@@ -153,8 +373,26 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 				vcpu->arch.gpr[rt] = vcpu->arch.srr0; break;
 			case SPRN_SRR1:
 				vcpu->arch.gpr[rt] = vcpu->arch.srr1; break;
+			case SPRN_MMUCR:
+				vcpu->arch.gpr[rt] = vcpu->arch.mmucr; break;
+			case SPRN_PID:
+				vcpu->arch.gpr[rt] = vcpu->arch.pid; break;
+			case SPRN_IVPR:
+				vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break;
+			case SPRN_CCR0:
+				vcpu->arch.gpr[rt] = vcpu->arch.ccr0; break;
+			case SPRN_CCR1:
+				vcpu->arch.gpr[rt] = vcpu->arch.ccr1; break;
 			case SPRN_PVR:
 				vcpu->arch.gpr[rt] = vcpu->arch.pvr; break;
+			case SPRN_DEAR:
+				vcpu->arch.gpr[rt] = vcpu->arch.dear; break;
+			case SPRN_ESR:
+				vcpu->arch.gpr[rt] = vcpu->arch.esr; break;
+			case SPRN_DBCR0:
+				vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break;
+			case SPRN_DBCR1:
+				vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break;
 
 			/* Note: mftb and TBRL/TBWL are user-accessible, so
 			 * the guest can always access the real TB anyways.
@@ -175,12 +413,42 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			/* Note: SPRG4-7 are user-readable, so we don't get
 			 * a trap. */
 
+			case SPRN_IVOR0:
+				vcpu->arch.gpr[rt] = vcpu->arch.ivor[0]; break;
+			case SPRN_IVOR1:
+				vcpu->arch.gpr[rt] = vcpu->arch.ivor[1]; break;
+			case SPRN_IVOR2:
+				vcpu->arch.gpr[rt] = vcpu->arch.ivor[2]; break;
+			case SPRN_IVOR3:
+				vcpu->arch.gpr[rt] = vcpu->arch.ivor[3]; break;
+			case SPRN_IVOR4:
+				vcpu->arch.gpr[rt] = vcpu->arch.ivor[4]; break;
+			case SPRN_IVOR5:
+				vcpu->arch.gpr[rt] = vcpu->arch.ivor[5]; break;
+			case SPRN_IVOR6:
+				vcpu->arch.gpr[rt] = vcpu->arch.ivor[6]; break;
+			case SPRN_IVOR7:
+				vcpu->arch.gpr[rt] = vcpu->arch.ivor[7]; break;
+			case SPRN_IVOR8:
+				vcpu->arch.gpr[rt] = vcpu->arch.ivor[8]; break;
+			case SPRN_IVOR9:
+				vcpu->arch.gpr[rt] = vcpu->arch.ivor[9]; break;
+			case SPRN_IVOR10:
+				vcpu->arch.gpr[rt] = vcpu->arch.ivor[10]; break;
+			case SPRN_IVOR11:
+				vcpu->arch.gpr[rt] = vcpu->arch.ivor[11]; break;
+			case SPRN_IVOR12:
+				vcpu->arch.gpr[rt] = vcpu->arch.ivor[12]; break;
+			case SPRN_IVOR13:
+				vcpu->arch.gpr[rt] = vcpu->arch.ivor[13]; break;
+			case SPRN_IVOR14:
+				vcpu->arch.gpr[rt] = vcpu->arch.ivor[14]; break;
+			case SPRN_IVOR15:
+				vcpu->arch.gpr[rt] = vcpu->arch.ivor[15]; break;
+
 			default:
-				emulated = kvmppc_core_emulate_mfspr(vcpu, sprn, rt);
-				if (emulated == EMULATE_FAIL) {
-					printk("mfspr: unknown spr %x\n", sprn);
-					vcpu->arch.gpr[rt] = 0;
-				}
+				printk("mfspr: unknown spr %x\n", sprn);
+				vcpu->arch.gpr[rt] = 0;
 				break;
 			}
 			break;
@@ -210,6 +478,25 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			vcpu->arch.gpr[ra] = ea;
 			break;
 
+		case 451:                                       /* mtdcr */
+			dcrn = get_dcrn(inst);
+			rs = get_rs(inst);
+
+			/* emulate some access in kernel */
+			switch (dcrn) {
+			case DCRN_CPR0_CONFIG_ADDR:
+				vcpu->arch.cpr0_cfgaddr = vcpu->arch.gpr[rs];
+				break;
+			default:
+				run->dcr.dcrn = dcrn;
+				run->dcr.data = vcpu->arch.gpr[rs];
+				run->dcr.is_write = 1;
+				vcpu->arch.dcr_needed = 1;
+				emulated = EMULATE_DO_DCR;
+			}
+
+			break;
+
 		case 467:                                       /* mtspr */
 			sprn = get_sprn(inst);
 			rs = get_rs(inst);
@@ -218,6 +505,22 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 				vcpu->arch.srr0 = vcpu->arch.gpr[rs]; break;
 			case SPRN_SRR1:
 				vcpu->arch.srr1 = vcpu->arch.gpr[rs]; break;
+			case SPRN_MMUCR:
+				vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break;
+			case SPRN_PID:
+				kvmppc_set_pid(vcpu, vcpu->arch.gpr[rs]); break;
+			case SPRN_CCR0:
+				vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break;
+			case SPRN_CCR1:
+				vcpu->arch.ccr1 = vcpu->arch.gpr[rs]; break;
+			case SPRN_DEAR:
+				vcpu->arch.dear = vcpu->arch.gpr[rs]; break;
+			case SPRN_ESR:
+				vcpu->arch.esr = vcpu->arch.gpr[rs]; break;
+			case SPRN_DBCR0:
+				vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break;
+			case SPRN_DBCR1:
+				vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break;
 
 			/* XXX We need to context-switch the timebase for
 			 * watchdog and FIT. */
@@ -229,6 +532,14 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 				kvmppc_emulate_dec(vcpu);
 				break;
 
+			case SPRN_TSR:
+				vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break;
+
+			case SPRN_TCR:
+				vcpu->arch.tcr = vcpu->arch.gpr[rs];
+				kvmppc_emulate_dec(vcpu);
+				break;
+
 			case SPRN_SPRG0:
 				vcpu->arch.sprg0 = vcpu->arch.gpr[rs]; break;
 			case SPRN_SPRG1:
@@ -238,10 +549,56 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			case SPRN_SPRG3:
 				vcpu->arch.sprg3 = vcpu->arch.gpr[rs]; break;
 
+			/* Note: SPRG4-7 are user-readable. These values are
+			 * loaded into the real SPRGs when resuming the
+			 * guest. */
+			case SPRN_SPRG4:
+				vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break;
+			case SPRN_SPRG5:
+				vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break;
+			case SPRN_SPRG6:
+				vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break;
+			case SPRN_SPRG7:
+				vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break;
+
+			case SPRN_IVPR:
+				vcpu->arch.ivpr = vcpu->arch.gpr[rs]; break;
+			case SPRN_IVOR0:
+				vcpu->arch.ivor[0] = vcpu->arch.gpr[rs]; break;
+			case SPRN_IVOR1:
+				vcpu->arch.ivor[1] = vcpu->arch.gpr[rs]; break;
+			case SPRN_IVOR2:
+				vcpu->arch.ivor[2] = vcpu->arch.gpr[rs]; break;
+			case SPRN_IVOR3:
+				vcpu->arch.ivor[3] = vcpu->arch.gpr[rs]; break;
+			case SPRN_IVOR4:
+				vcpu->arch.ivor[4] = vcpu->arch.gpr[rs]; break;
+			case SPRN_IVOR5:
+				vcpu->arch.ivor[5] = vcpu->arch.gpr[rs]; break;
+			case SPRN_IVOR6:
+				vcpu->arch.ivor[6] = vcpu->arch.gpr[rs]; break;
+			case SPRN_IVOR7:
+				vcpu->arch.ivor[7] = vcpu->arch.gpr[rs]; break;
+			case SPRN_IVOR8:
+				vcpu->arch.ivor[8] = vcpu->arch.gpr[rs]; break;
+			case SPRN_IVOR9:
+				vcpu->arch.ivor[9] = vcpu->arch.gpr[rs]; break;
+			case SPRN_IVOR10:
+				vcpu->arch.ivor[10] = vcpu->arch.gpr[rs]; break;
+			case SPRN_IVOR11:
+				vcpu->arch.ivor[11] = vcpu->arch.gpr[rs]; break;
+			case SPRN_IVOR12:
+				vcpu->arch.ivor[12] = vcpu->arch.gpr[rs]; break;
+			case SPRN_IVOR13:
+				vcpu->arch.ivor[13] = vcpu->arch.gpr[rs]; break;
+			case SPRN_IVOR14:
+				vcpu->arch.ivor[14] = vcpu->arch.gpr[rs]; break;
+			case SPRN_IVOR15:
+				vcpu->arch.ivor[15] = vcpu->arch.gpr[rs]; break;
+
 			default:
-				emulated = kvmppc_core_emulate_mtspr(vcpu, sprn, rs);
-				if (emulated == EMULATE_FAIL)
-					printk("mtspr: unknown spr %x\n", sprn);
+				printk("mtspr: unknown spr %x\n", sprn);
+				emulated = EMULATE_FAIL;
 				break;
 			}
 			break;
@@ -272,6 +629,36 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			                               4, 0);
 			break;
 
+		case 978:                                       /* tlbwe */
+			emulated = kvmppc_emul_tlbwe(vcpu, inst);
+			break;
+
+		case 914:       {                               /* tlbsx */
+			int index;
+			unsigned int as = get_mmucr_sts(vcpu);
+			unsigned int pid = get_mmucr_stid(vcpu);
+
+			rt = get_rt(inst);
+			ra = get_ra(inst);
+			rb = get_rb(inst);
+			rc = get_rc(inst);
+
+			ea = vcpu->arch.gpr[rb];
+			if (ra)
+				ea += vcpu->arch.gpr[ra];
+
+			index = kvmppc_44x_tlb_index(vcpu, ea, pid, as);
+			if (rc) {
+				if (index < 0)
+					vcpu->arch.cr &= ~0x20000000;
+				else
+					vcpu->arch.cr |= 0x20000000;
+			}
+			vcpu->arch.gpr[rt] = index;
+
+			}
+			break;
+
 		case 790:                                       /* lhbrx */
 			rt = get_rt(inst);
 			emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0);
@@ -287,9 +674,14 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			                               2, 0);
 			break;
 
+		case 966:                                       /* iccci */
+			break;
+
 		default:
-			/* Attempt core-specific emulation below. */
+			printk("unknown: op %d xop %d\n", get_op(inst),
+				get_xop(inst));
 			emulated = EMULATE_FAIL;
+			break;
 		}
 		break;
 
@@ -372,19 +764,12 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 		break;
 
 	default:
+		printk("unknown op %d\n", get_op(inst));
 		emulated = EMULATE_FAIL;
+		break;
 	}
 
-	if (emulated == EMULATE_FAIL) {
-		emulated = kvmppc_core_emulate_op(run, vcpu, inst, &advance);
-		if (emulated == EMULATE_FAIL) {
-			advance = 0;
-			printk(KERN_ERR "Couldn't emulate instruction 0x%08x "
-			       "(op %d xop %d)\n", inst, get_op(inst), get_xop(inst));
-		}
-	}
-
-	KVMTRACE_3D(PPC_INSTR, vcpu, inst, (int)vcpu->arch.pc, emulated, entryexit);
+	KVMTRACE_3D(PPC_INSTR, vcpu, inst, vcpu->arch.pc, emulated, entryexit);
 
 	if (advance)
 		vcpu->arch.pc += 4; /* Advance past emulated instruction. */
diff --git a/trunk/arch/powerpc/kvm/powerpc.c b/trunk/arch/powerpc/kvm/powerpc.c
index 2822c8ccfaaf..8bef0efcdfe1 100644
--- a/trunk/arch/powerpc/kvm/powerpc.c
+++ b/trunk/arch/powerpc/kvm/powerpc.c
@@ -28,9 +28,9 @@
 #include <asm/uaccess.h>
 #include <asm/kvm_ppc.h>
 #include <asm/tlbflush.h>
-#include "timing.h"
 #include "../mm/mmu_decl.h"
 
+
 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
 {
 	return gfn;
@@ -99,7 +99,14 @@ void kvm_arch_hardware_unsetup(void)
 
 void kvm_arch_check_processor_compat(void *rtn)
 {
-	*(int *)rtn = kvmppc_core_check_processor_compat();
+	int r;
+
+	if (strcmp(cur_cpu_spec->platform, "ppc440") == 0)
+		r = 0;
+	else
+		r = -ENOTSUPP;
+
+	*(int *)rtn = r;
 }
 
 struct kvm *kvm_arch_create_vm(void)
@@ -137,6 +144,9 @@ int kvm_dev_ioctl_check_extension(long ext)
 	int r;
 
 	switch (ext) {
+	case KVM_CAP_USER_MEMORY:
+		r = 1;
+		break;
 	case KVM_CAP_COALESCED_MMIO:
 		r = KVM_COALESCED_MMIO_PAGE_OFFSET;
 		break;
@@ -169,15 +179,30 @@ void kvm_arch_flush_shadow(struct kvm *kvm)
 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
 {
 	struct kvm_vcpu *vcpu;
-	vcpu = kvmppc_core_vcpu_create(kvm, id);
-	kvmppc_create_vcpu_debugfs(vcpu, id);
+	int err;
+
+	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
+	if (!vcpu) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	err = kvm_vcpu_init(vcpu, kvm, id);
+	if (err)
+		goto free_vcpu;
+
 	return vcpu;
+
+free_vcpu:
+	kmem_cache_free(kvm_vcpu_cache, vcpu);
+out:
+	return ERR_PTR(err);
 }
 
 void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 {
-	kvmppc_remove_vcpu_debugfs(vcpu);
-	kvmppc_core_vcpu_free(vcpu);
+	kvm_vcpu_uninit(vcpu);
+	kmem_cache_free(kvm_vcpu_cache, vcpu);
 }
 
 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
@@ -187,14 +212,16 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 {
-	return kvmppc_core_pending_dec(vcpu);
+	unsigned int priority = exception_priority[BOOKE_INTERRUPT_DECREMENTER];
+
+	return test_bit(priority, &vcpu->arch.pending_exceptions);
 }
 
 static void kvmppc_decrementer_func(unsigned long data)
 {
 	struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
 
-	kvmppc_core_queue_dec(vcpu);
+	kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_DECREMENTER);
 
 	if (waitqueue_active(&vcpu->wq)) {
 		wake_up_interruptible(&vcpu->wq);
@@ -215,25 +242,96 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 	kvmppc_core_destroy_mmu(vcpu);
 }
 
+/* Note: clearing MSR[DE] just means that the debug interrupt will not be
+ * delivered *immediately*. Instead, it simply sets the appropriate DBSR bits.
+ * If those DBSR bits are still set when MSR[DE] is re-enabled, the interrupt
+ * will be delivered as an "imprecise debug event" (which is indicated by
+ * DBSR[IDE].
+ */
+static void kvmppc_disable_debug_interrupts(void)
+{
+	mtmsr(mfmsr() & ~MSR_DE);
+}
+
+static void kvmppc_restore_host_debug_state(struct kvm_vcpu *vcpu)
+{
+	kvmppc_disable_debug_interrupts();
+
+	mtspr(SPRN_IAC1, vcpu->arch.host_iac[0]);
+	mtspr(SPRN_IAC2, vcpu->arch.host_iac[1]);
+	mtspr(SPRN_IAC3, vcpu->arch.host_iac[2]);
+	mtspr(SPRN_IAC4, vcpu->arch.host_iac[3]);
+	mtspr(SPRN_DBCR1, vcpu->arch.host_dbcr1);
+	mtspr(SPRN_DBCR2, vcpu->arch.host_dbcr2);
+	mtspr(SPRN_DBCR0, vcpu->arch.host_dbcr0);
+	mtmsr(vcpu->arch.host_msr);
+}
+
+static void kvmppc_load_guest_debug_registers(struct kvm_vcpu *vcpu)
+{
+	struct kvm_guest_debug *dbg = &vcpu->guest_debug;
+	u32 dbcr0 = 0;
+
+	vcpu->arch.host_msr = mfmsr();
+	kvmppc_disable_debug_interrupts();
+
+	/* Save host debug register state. */
+	vcpu->arch.host_iac[0] = mfspr(SPRN_IAC1);
+	vcpu->arch.host_iac[1] = mfspr(SPRN_IAC2);
+	vcpu->arch.host_iac[2] = mfspr(SPRN_IAC3);
+	vcpu->arch.host_iac[3] = mfspr(SPRN_IAC4);
+	vcpu->arch.host_dbcr0 = mfspr(SPRN_DBCR0);
+	vcpu->arch.host_dbcr1 = mfspr(SPRN_DBCR1);
+	vcpu->arch.host_dbcr2 = mfspr(SPRN_DBCR2);
+
+	/* set registers up for guest */
+
+	if (dbg->bp[0]) {
+		mtspr(SPRN_IAC1, dbg->bp[0]);
+		dbcr0 |= DBCR0_IAC1 | DBCR0_IDM;
+	}
+	if (dbg->bp[1]) {
+		mtspr(SPRN_IAC2, dbg->bp[1]);
+		dbcr0 |= DBCR0_IAC2 | DBCR0_IDM;
+	}
+	if (dbg->bp[2]) {
+		mtspr(SPRN_IAC3, dbg->bp[2]);
+		dbcr0 |= DBCR0_IAC3 | DBCR0_IDM;
+	}
+	if (dbg->bp[3]) {
+		mtspr(SPRN_IAC4, dbg->bp[3]);
+		dbcr0 |= DBCR0_IAC4 | DBCR0_IDM;
+	}
+
+	mtspr(SPRN_DBCR0, dbcr0);
+	mtspr(SPRN_DBCR1, 0);
+	mtspr(SPRN_DBCR2, 0);
+}
+
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
+	int i;
+
 	if (vcpu->guest_debug.enabled)
-		kvmppc_core_load_guest_debugstate(vcpu);
+		kvmppc_load_guest_debug_registers(vcpu);
 
-	kvmppc_core_vcpu_load(vcpu, cpu);
+	/* Mark every guest entry in the shadow TLB entry modified, so that they
+	 * will all be reloaded on the next vcpu run (instead of being
+	 * demand-faulted). */
+	for (i = 0; i <= tlb_44x_hwater; i++)
+		kvmppc_tlbe_set_modified(vcpu, i);
 }
 
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 {
 	if (vcpu->guest_debug.enabled)
-		kvmppc_core_load_host_debugstate(vcpu);
+		kvmppc_restore_host_debug_state(vcpu);
 
 	/* Don't leave guest TLB entries resident when being de-scheduled. */
 	/* XXX It would be nice to differentiate between heavyweight exit and
 	 * sched_out here, since we could avoid the TLB flush for heavyweight
 	 * exits. */
 	_tlbil_all();
-	kvmppc_core_vcpu_put(vcpu);
 }
 
 int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
@@ -257,14 +355,14 @@ int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
 static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu,
                                      struct kvm_run *run)
 {
-	ulong *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr];
+	u32 *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr];
 	*gpr = run->dcr.data;
 }
 
 static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
                                       struct kvm_run *run)
 {
-	ulong *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr];
+	u32 *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr];
 
 	if (run->mmio.len > sizeof(*gpr)) {
 		printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len);
@@ -362,7 +460,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		vcpu->arch.dcr_needed = 0;
 	}
 
-	kvmppc_core_deliver_interrupts(vcpu);
+	kvmppc_check_and_deliver_interrupts(vcpu);
 
 	local_irq_disable();
 	kvm_guest_enter();
@@ -380,7 +478,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
 int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
 {
-	kvmppc_core_queue_external(vcpu, irq);
+	kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_EXTERNAL);
 
 	if (waitqueue_active(&vcpu->wq)) {
 		wake_up_interruptible(&vcpu->wq);
diff --git a/trunk/arch/powerpc/kvm/timing.c b/trunk/arch/powerpc/kvm/timing.c
deleted file mode 100644
index 47ee603f558e..000000000000
--- a/trunk/arch/powerpc/kvm/timing.c
+++ /dev/null
@@ -1,239 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- *
- * Copyright IBM Corp. 2008
- *
- * Authors: Hollis Blanchard <hollisb@us.ibm.com>
- *          Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
- */
-
-#include <linux/kvm_host.h>
-#include <linux/fs.h>
-#include <linux/seq_file.h>
-#include <linux/debugfs.h>
-#include <linux/uaccess.h>
-
-#include <asm/time.h>
-#include <asm-generic/div64.h>
-
-#include "timing.h"
-
-void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu)
-{
-	int i;
-
-	/* pause guest execution to avoid concurrent updates */
-	local_irq_disable();
-	mutex_lock(&vcpu->mutex);
-
-	vcpu->arch.last_exit_type = 0xDEAD;
-	for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) {
-		vcpu->arch.timing_count_type[i] = 0;
-		vcpu->arch.timing_max_duration[i] = 0;
-		vcpu->arch.timing_min_duration[i] = 0xFFFFFFFF;
-		vcpu->arch.timing_sum_duration[i] = 0;
-		vcpu->arch.timing_sum_quad_duration[i] = 0;
-	}
-	vcpu->arch.timing_last_exit = 0;
-	vcpu->arch.timing_exit.tv64 = 0;
-	vcpu->arch.timing_last_enter.tv64 = 0;
-
-	mutex_unlock(&vcpu->mutex);
-	local_irq_enable();
-}
-
-static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type)
-{
-	u64 old;
-
-	do_div(duration, tb_ticks_per_usec);
-	if (unlikely(duration > 0xFFFFFFFF)) {
-		printk(KERN_ERR"%s - duration too big -> overflow"
-			" duration %lld type %d exit #%d\n",
-			__func__, duration, type,
-			vcpu->arch.timing_count_type[type]);
-		return;
-	}
-
-	vcpu->arch.timing_count_type[type]++;
-
-	/* sum */
-	old = vcpu->arch.timing_sum_duration[type];
-	vcpu->arch.timing_sum_duration[type] += duration;
-	if (unlikely(old > vcpu->arch.timing_sum_duration[type])) {
-		printk(KERN_ERR"%s - wrap adding sum of durations"
-			" old %lld new %lld type %d exit # of type %d\n",
-			__func__, old, vcpu->arch.timing_sum_duration[type],
-			type, vcpu->arch.timing_count_type[type]);
-	}
-
-	/* square sum */
-	old = vcpu->arch.timing_sum_quad_duration[type];
-	vcpu->arch.timing_sum_quad_duration[type] += (duration*duration);
-	if (unlikely(old > vcpu->arch.timing_sum_quad_duration[type])) {
-		printk(KERN_ERR"%s - wrap adding sum of squared durations"
-			" old %lld new %lld type %d exit # of type %d\n",
-			__func__, old,
-			vcpu->arch.timing_sum_quad_duration[type],
-			type, vcpu->arch.timing_count_type[type]);
-	}
-
-	/* set min/max */
-	if (unlikely(duration < vcpu->arch.timing_min_duration[type]))
-		vcpu->arch.timing_min_duration[type] = duration;
-	if (unlikely(duration > vcpu->arch.timing_max_duration[type]))
-		vcpu->arch.timing_max_duration[type] = duration;
-}
-
-void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu)
-{
-	u64 exit = vcpu->arch.timing_last_exit;
-	u64 enter = vcpu->arch.timing_last_enter.tv64;
-
-	/* save exit time, used next exit when the reenter time is known */
-	vcpu->arch.timing_last_exit = vcpu->arch.timing_exit.tv64;
-
-	if (unlikely(vcpu->arch.last_exit_type == 0xDEAD || exit == 0))
-		return; /* skip incomplete cycle (e.g. after reset) */
-
-	/* update statistics for average and standard deviation */
-	add_exit_timing(vcpu, (enter - exit), vcpu->arch.last_exit_type);
-	/* enter -> timing_last_exit is time spent in guest - log this too */
-	add_exit_timing(vcpu, (vcpu->arch.timing_last_exit - enter),
-			TIMEINGUEST);
-}
-
-static const char *kvm_exit_names[__NUMBER_OF_KVM_EXIT_TYPES] = {
-	[MMIO_EXITS] =              "MMIO",
-	[DCR_EXITS] =               "DCR",
-	[SIGNAL_EXITS] =            "SIGNAL",
-	[ITLB_REAL_MISS_EXITS] =    "ITLBREAL",
-	[ITLB_VIRT_MISS_EXITS] =    "ITLBVIRT",
-	[DTLB_REAL_MISS_EXITS] =    "DTLBREAL",
-	[DTLB_VIRT_MISS_EXITS] =    "DTLBVIRT",
-	[SYSCALL_EXITS] =           "SYSCALL",
-	[ISI_EXITS] =               "ISI",
-	[DSI_EXITS] =               "DSI",
-	[EMULATED_INST_EXITS] =     "EMULINST",
-	[EMULATED_MTMSRWE_EXITS] =  "EMUL_WAIT",
-	[EMULATED_WRTEE_EXITS] =    "EMUL_WRTEE",
-	[EMULATED_MTSPR_EXITS] =    "EMUL_MTSPR",
-	[EMULATED_MFSPR_EXITS] =    "EMUL_MFSPR",
-	[EMULATED_MTMSR_EXITS] =    "EMUL_MTMSR",
-	[EMULATED_MFMSR_EXITS] =    "EMUL_MFMSR",
-	[EMULATED_TLBSX_EXITS] =    "EMUL_TLBSX",
-	[EMULATED_TLBWE_EXITS] =    "EMUL_TLBWE",
-	[EMULATED_RFI_EXITS] =      "EMUL_RFI",
-	[DEC_EXITS] =               "DEC",
-	[EXT_INTR_EXITS] =          "EXTINT",
-	[HALT_WAKEUP] =             "HALT",
-	[USR_PR_INST] =             "USR_PR_INST",
-	[FP_UNAVAIL] =              "FP_UNAVAIL",
-	[DEBUG_EXITS] =             "DEBUG",
-	[TIMEINGUEST] =             "TIMEINGUEST"
-};
-
-static int kvmppc_exit_timing_show(struct seq_file *m, void *private)
-{
-	struct kvm_vcpu *vcpu = m->private;
-	int i;
-
-	seq_printf(m, "%s", "type	count	min	max	sum	sum_squared\n");
-
-	for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) {
-		seq_printf(m, "%12s	%10d	%10lld	%10lld	%20lld	%20lld\n",
-			kvm_exit_names[i],
-			vcpu->arch.timing_count_type[i],
-			vcpu->arch.timing_min_duration[i],
-			vcpu->arch.timing_max_duration[i],
-			vcpu->arch.timing_sum_duration[i],
-			vcpu->arch.timing_sum_quad_duration[i]);
-	}
-	return 0;
-}
-
-/* Write 'c' to clear the timing statistics. */
-static ssize_t kvmppc_exit_timing_write(struct file *file,
-				       const char __user *user_buf,
-				       size_t count, loff_t *ppos)
-{
-	int err = -EINVAL;
-	char c;
-
-	if (count > 1) {
-		goto done;
-	}
-
-	if (get_user(c, user_buf)) {
-		err = -EFAULT;
-		goto done;
-	}
-
-	if (c == 'c') {
-		struct seq_file *seqf = (struct seq_file *)file->private_data;
-		struct kvm_vcpu *vcpu = seqf->private;
-		/* Write does not affect our buffers previously generated with
-		 * show. seq_file is locked here to prevent races of init with
-		 * a show call */
-		mutex_lock(&seqf->lock);
-		kvmppc_init_timing_stats(vcpu);
-		mutex_unlock(&seqf->lock);
-		err = count;
-	}
-
-done:
-	return err;
-}
-
-static int kvmppc_exit_timing_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, kvmppc_exit_timing_show, inode->i_private);
-}
-
-static struct file_operations kvmppc_exit_timing_fops = {
-	.owner   = THIS_MODULE,
-	.open    = kvmppc_exit_timing_open,
-	.read    = seq_read,
-	.write   = kvmppc_exit_timing_write,
-	.llseek  = seq_lseek,
-	.release = single_release,
-};
-
-void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id)
-{
-	static char dbg_fname[50];
-	struct dentry *debugfs_file;
-
-	snprintf(dbg_fname, sizeof(dbg_fname), "vm%u_vcpu%u_timing",
-		 current->pid, id);
-	debugfs_file = debugfs_create_file(dbg_fname, 0666,
-					kvm_debugfs_dir, vcpu,
-					&kvmppc_exit_timing_fops);
-
-	if (!debugfs_file) {
-		printk(KERN_ERR"%s: error creating debugfs file %s\n",
-			__func__, dbg_fname);
-		return;
-	}
-
-	vcpu->arch.debugfs_exit_timing = debugfs_file;
-}
-
-void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu)
-{
-	if (vcpu->arch.debugfs_exit_timing) {
-		debugfs_remove(vcpu->arch.debugfs_exit_timing);
-		vcpu->arch.debugfs_exit_timing = NULL;
-	}
-}
diff --git a/trunk/arch/powerpc/kvm/timing.h b/trunk/arch/powerpc/kvm/timing.h
deleted file mode 100644
index bb13b1f3cd5a..000000000000
--- a/trunk/arch/powerpc/kvm/timing.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- *
- * Copyright IBM Corp. 2008
- *
- * Authors: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
- */
-
-#ifndef __POWERPC_KVM_EXITTIMING_H__
-#define __POWERPC_KVM_EXITTIMING_H__
-
-#include <linux/kvm_host.h>
-#include <asm/kvm_host.h>
-
-#ifdef CONFIG_KVM_EXIT_TIMING
-void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu);
-void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu);
-void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id);
-void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu);
-
-static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type)
-{
-	vcpu->arch.last_exit_type = type;
-}
-
-#else
-/* if exit timing is not configured there is no need to build the c file */
-static inline void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu) {}
-static inline void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu) {}
-static inline void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu,
-						unsigned int id) {}
-static inline void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu) {}
-static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type) {}
-#endif /* CONFIG_KVM_EXIT_TIMING */
-
-/* account the exit in kvm_stats */
-static inline void kvmppc_account_exit_stat(struct kvm_vcpu *vcpu, int type)
-{
-	/* type has to be known at build time for optimization */
-	BUILD_BUG_ON(__builtin_constant_p(type));
-	switch (type) {
-	case EXT_INTR_EXITS:
-		vcpu->stat.ext_intr_exits++;
-		break;
-	case DEC_EXITS:
-		vcpu->stat.dec_exits++;
-		break;
-	case EMULATED_INST_EXITS:
-		vcpu->stat.emulated_inst_exits++;
-		break;
-	case DCR_EXITS:
-		vcpu->stat.dcr_exits++;
-		break;
-	case DSI_EXITS:
-		vcpu->stat.dsi_exits++;
-		break;
-	case ISI_EXITS:
-		vcpu->stat.isi_exits++;
-		break;
-	case SYSCALL_EXITS:
-		vcpu->stat.syscall_exits++;
-		break;
-	case DTLB_REAL_MISS_EXITS:
-		vcpu->stat.dtlb_real_miss_exits++;
-		break;
-	case DTLB_VIRT_MISS_EXITS:
-		vcpu->stat.dtlb_virt_miss_exits++;
-		break;
-	case MMIO_EXITS:
-		vcpu->stat.mmio_exits++;
-		break;
-	case ITLB_REAL_MISS_EXITS:
-		vcpu->stat.itlb_real_miss_exits++;
-		break;
-	case ITLB_VIRT_MISS_EXITS:
-		vcpu->stat.itlb_virt_miss_exits++;
-		break;
-	case SIGNAL_EXITS:
-		vcpu->stat.signal_exits++;
-		break;
-	}
-}
-
-/* wrapper to set exit time and account for it in kvm_stats */
-static inline void kvmppc_account_exit(struct kvm_vcpu *vcpu, int type)
-{
-	kvmppc_set_exit_type(vcpu, type);
-	kvmppc_account_exit_stat(vcpu, type);
-}
-
-#endif /* __POWERPC_KVM_EXITTIMING_H__ */
diff --git a/trunk/arch/powerpc/platforms/pseries/xics.c b/trunk/arch/powerpc/platforms/pseries/xics.c
index 84e058f1e1cc..f7a69021b7bf 100644
--- a/trunk/arch/powerpc/platforms/pseries/xics.c
+++ b/trunk/arch/powerpc/platforms/pseries/xics.c
@@ -332,7 +332,7 @@ static void xics_eoi_lpar(unsigned int virq)
 	lpar_xirr_info_set((0xff << 24) | irq);
 }
 
-static void xics_set_affinity(unsigned int virq, const struct cpumask *cpumask)
+static void xics_set_affinity(unsigned int virq, cpumask_t cpumask)
 {
 	unsigned int irq;
 	int status;
@@ -870,7 +870,7 @@ void xics_migrate_irqs_away(void)
 
 		/* Reset affinity to all cpus */
 		irq_desc[virq].affinity = CPU_MASK_ALL;
-		desc->chip->set_affinity(virq, cpu_all_mask);
+		desc->chip->set_affinity(virq, CPU_MASK_ALL);
 unlock:
 		spin_unlock_irqrestore(&desc->lock, flags);
 	}
diff --git a/trunk/arch/powerpc/sysdev/mpic.c b/trunk/arch/powerpc/sysdev/mpic.c
index 3e0d89dcdba2..c82babb70074 100644
--- a/trunk/arch/powerpc/sysdev/mpic.c
+++ b/trunk/arch/powerpc/sysdev/mpic.c
@@ -806,7 +806,7 @@ static void mpic_end_ipi(unsigned int irq)
 
 #endif /* CONFIG_SMP */
 
-void mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
+void mpic_set_affinity(unsigned int irq, cpumask_t cpumask)
 {
 	struct mpic *mpic = mpic_from_irq(irq);
 	unsigned int src = mpic_irq_to_hw(irq);
@@ -818,7 +818,7 @@ void mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
 	} else {
 		cpumask_t tmp;
 
-		cpumask_and(&tmp, cpumask, cpu_online_mask);
+		cpus_and(tmp, cpumask, cpu_online_map);
 
 		mpic_irq_write(src, MPIC_INFO(IRQ_DESTINATION),
 			       mpic_physmask(cpus_addr(tmp)[0]));
diff --git a/trunk/arch/powerpc/sysdev/mpic.h b/trunk/arch/powerpc/sysdev/mpic.h
index 3cef2af10f42..6209c62a426d 100644
--- a/trunk/arch/powerpc/sysdev/mpic.h
+++ b/trunk/arch/powerpc/sysdev/mpic.h
@@ -36,6 +36,6 @@ static inline int mpic_pasemi_msi_init(struct mpic *mpic)
 
 extern int mpic_set_irq_type(unsigned int virq, unsigned int flow_type);
 extern void mpic_set_vector(unsigned int virq, unsigned int vector);
-extern void mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask);
+extern void mpic_set_affinity(unsigned int irq, cpumask_t cpumask);
 
 #endif /* _POWERPC_SYSDEV_MPIC_H */
diff --git a/trunk/arch/s390/Kconfig b/trunk/arch/s390/Kconfig
index 19577aeffd7b..8152fefc97b9 100644
--- a/trunk/arch/s390/Kconfig
+++ b/trunk/arch/s390/Kconfig
@@ -83,7 +83,6 @@ config S390
 	select HAVE_KRETPROBES
 	select HAVE_KVM if 64BIT
 	select HAVE_ARCH_TRACEHOOK
-	select INIT_ALL_POSSIBLE
 
 source "init/Kconfig"
 
diff --git a/trunk/arch/s390/kernel/smp.c b/trunk/arch/s390/kernel/smp.c
index 3ed5c7a83c6c..6fc78541dc57 100644
--- a/trunk/arch/s390/kernel/smp.c
+++ b/trunk/arch/s390/kernel/smp.c
@@ -55,6 +55,12 @@
 struct _lowcore *lowcore_ptr[NR_CPUS];
 EXPORT_SYMBOL(lowcore_ptr);
 
+cpumask_t cpu_online_map = CPU_MASK_NONE;
+EXPORT_SYMBOL(cpu_online_map);
+
+cpumask_t cpu_possible_map = CPU_MASK_ALL;
+EXPORT_SYMBOL(cpu_possible_map);
+
 static struct task_struct *current_set[NR_CPUS];
 
 static u8 smp_cpu_type;
diff --git a/trunk/arch/s390/kernel/time.c b/trunk/arch/s390/kernel/time.c
index d649600df5b9..5be981a36c3e 100644
--- a/trunk/arch/s390/kernel/time.c
+++ b/trunk/arch/s390/kernel/time.c
@@ -160,7 +160,7 @@ void init_cpu_timer(void)
 	cd->min_delta_ns	= 1;
 	cd->max_delta_ns	= LONG_MAX;
 	cd->rating		= 400;
-	cd->cpumask		= cpumask_of(cpu);
+	cd->cpumask		= cpumask_of_cpu(cpu);
 	cd->set_next_event	= s390_next_event;
 	cd->set_mode		= s390_set_mode;
 
diff --git a/trunk/arch/s390/kvm/kvm-s390.c b/trunk/arch/s390/kvm/kvm-s390.c
index be8497186b96..8b00eb2ddf57 100644
--- a/trunk/arch/s390/kvm/kvm-s390.c
+++ b/trunk/arch/s390/kvm/kvm-s390.c
@@ -113,6 +113,8 @@ long kvm_arch_dev_ioctl(struct file *filp,
 int kvm_dev_ioctl_check_extension(long ext)
 {
 	switch (ext) {
+	case KVM_CAP_USER_MEMORY:
+		return 1;
 	default:
 		return 0;
 	}
@@ -183,6 +185,8 @@ struct kvm *kvm_arch_create_vm(void)
 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
 	VM_EVENT(kvm, 3, "%s", "vm created");
 
+	try_module_get(THIS_MODULE);
+
 	return kvm;
 out_nodbf:
 	free_page((unsigned long)(kvm->arch.sca));
@@ -192,33 +196,13 @@ struct kvm *kvm_arch_create_vm(void)
 	return ERR_PTR(rc);
 }
 
-void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
-{
-	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
-	free_page((unsigned long)(vcpu->arch.sie_block));
-	kvm_vcpu_uninit(vcpu);
-	kfree(vcpu);
-}
-
-static void kvm_free_vcpus(struct kvm *kvm)
-{
-	unsigned int i;
-
-	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-		if (kvm->vcpus[i]) {
-			kvm_arch_vcpu_destroy(kvm->vcpus[i]);
-			kvm->vcpus[i] = NULL;
-		}
-	}
-}
-
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
-	kvm_free_vcpus(kvm);
+	debug_unregister(kvm->arch.dbf);
 	kvm_free_physmem(kvm);
 	free_page((unsigned long)(kvm->arch.sca));
-	debug_unregister(kvm->arch.dbf);
 	kfree(kvm);
+	module_put(THIS_MODULE);
 }
 
 /* Section: vcpu related */
@@ -229,7 +213,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 
 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 {
-	/* Nothing todo */
+	/* kvm common code refers to this, but does'nt call it */
+	BUG();
 }
 
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -323,6 +308,8 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 	VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
 		 vcpu->arch.sie_block);
 
+	try_module_get(THIS_MODULE);
+
 	return vcpu;
 out_free_cpu:
 	kfree(vcpu);
@@ -330,6 +317,14 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 	return ERR_PTR(rc);
 }
 
+void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
+{
+	VCPU_EVENT(vcpu, 3, "%s", "destroy cpu");
+	free_page((unsigned long)(vcpu->arch.sie_block));
+	kfree(vcpu);
+	module_put(THIS_MODULE);
+}
+
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
 {
 	/* kvm common code refers to this, but never calls it */
diff --git a/trunk/arch/sh/include/asm/smp.h b/trunk/arch/sh/include/asm/smp.h
index c24e9c6a1736..85b660c17eb0 100644
--- a/trunk/arch/sh/include/asm/smp.h
+++ b/trunk/arch/sh/include/asm/smp.h
@@ -31,7 +31,7 @@ enum {
 };
 
 void smp_message_recv(unsigned int msg);
-void smp_timer_broadcast(const struct cpumask *mask);
+void smp_timer_broadcast(cpumask_t mask);
 
 void local_timer_interrupt(void);
 void local_timer_setup(unsigned int cpu);
diff --git a/trunk/arch/sh/include/asm/topology.h b/trunk/arch/sh/include/asm/topology.h
index 279d9cc4a007..95f0085e098a 100644
--- a/trunk/arch/sh/include/asm/topology.h
+++ b/trunk/arch/sh/include/asm/topology.h
@@ -5,6 +5,7 @@
 
 /* sched_domains SD_NODE_INIT for sh machines */
 #define SD_NODE_INIT (struct sched_domain) {		\
+	.span			= CPU_MASK_NONE,	\
 	.parent			= NULL,			\
 	.child			= NULL,			\
 	.groups			= NULL,			\
diff --git a/trunk/arch/sh/kernel/smp.c b/trunk/arch/sh/kernel/smp.c
index 8f4027412614..3c5ad1660bbc 100644
--- a/trunk/arch/sh/kernel/smp.c
+++ b/trunk/arch/sh/kernel/smp.c
@@ -31,6 +31,12 @@
 int __cpu_number_map[NR_CPUS];		/* Map physical to logical */
 int __cpu_logical_map[NR_CPUS];		/* Map logical to physical */
 
+cpumask_t cpu_possible_map;
+EXPORT_SYMBOL(cpu_possible_map);
+
+cpumask_t cpu_online_map;
+EXPORT_SYMBOL(cpu_online_map);
+
 static inline void __init smp_store_cpu_info(unsigned int cpu)
 {
 	struct sh_cpuinfo *c = cpu_data + cpu;
@@ -184,11 +190,11 @@ void arch_send_call_function_single_ipi(int cpu)
 	plat_send_ipi(cpu, SMP_MSG_FUNCTION_SINGLE);
 }
 
-void smp_timer_broadcast(const struct cpumask *mask)
+void smp_timer_broadcast(cpumask_t mask)
 {
 	int cpu;
 
-	for_each_cpu(cpu, mask)
+	for_each_cpu_mask(cpu, mask)
 		plat_send_ipi(cpu, SMP_MSG_TIMER);
 }
 
diff --git a/trunk/arch/sh/kernel/timers/timer-broadcast.c b/trunk/arch/sh/kernel/timers/timer-broadcast.c
index 96e8eaea1e62..c2317635230f 100644
--- a/trunk/arch/sh/kernel/timers/timer-broadcast.c
+++ b/trunk/arch/sh/kernel/timers/timer-broadcast.c
@@ -51,7 +51,7 @@ void __cpuinit local_timer_setup(unsigned int cpu)
 	clk->mult		= 1;
 	clk->set_mode		= dummy_timer_set_mode;
 	clk->broadcast		= smp_timer_broadcast;
-	clk->cpumask		= cpumask_of(cpu);
+	clk->cpumask		= cpumask_of_cpu(cpu);
 
 	clockevents_register_device(clk);
 }
diff --git a/trunk/arch/sh/kernel/timers/timer-tmu.c b/trunk/arch/sh/kernel/timers/timer-tmu.c
index 0db3f9510336..3c61ddd4d43e 100644
--- a/trunk/arch/sh/kernel/timers/timer-tmu.c
+++ b/trunk/arch/sh/kernel/timers/timer-tmu.c
@@ -263,7 +263,7 @@ static int tmu_timer_init(void)
 	tmu0_clockevent.min_delta_ns =
 			clockevent_delta2ns(1, &tmu0_clockevent);
 
-	tmu0_clockevent.cpumask = cpumask_of(0);
+	tmu0_clockevent.cpumask = cpumask_of_cpu(0);
 
 	clockevents_register_device(&tmu0_clockevent);
 
diff --git a/trunk/arch/sparc/include/asm/smp_32.h b/trunk/arch/sparc/include/asm/smp_32.h
index 8408d9d2a662..a8180e546a48 100644
--- a/trunk/arch/sparc/include/asm/smp_32.h
+++ b/trunk/arch/sparc/include/asm/smp_32.h
@@ -29,6 +29,8 @@
  */
 
 extern unsigned char boot_cpu_id;
+extern cpumask_t phys_cpu_present_map;
+#define cpu_possible_map phys_cpu_present_map
 
 typedef void (*smpfunc_t)(unsigned long, unsigned long, unsigned long,
 		       unsigned long, unsigned long);
diff --git a/trunk/arch/sparc/kernel/irq_64.c b/trunk/arch/sparc/kernel/irq_64.c
index cab8e0286871..a3ea2bcb95de 100644
--- a/trunk/arch/sparc/kernel/irq_64.c
+++ b/trunk/arch/sparc/kernel/irq_64.c
@@ -312,8 +312,7 @@ static void sun4u_irq_enable(unsigned int virt_irq)
 	}
 }
 
-static void sun4u_set_affinity(unsigned int virt_irq,
-			       const struct cpumask *mask)
+static void sun4u_set_affinity(unsigned int virt_irq, cpumask_t mask)
 {
 	sun4u_irq_enable(virt_irq);
 }
@@ -363,8 +362,7 @@ static void sun4v_irq_enable(unsigned int virt_irq)
 		       ino, err);
 }
 
-static void sun4v_set_affinity(unsigned int virt_irq,
-			       const struct cpumask *mask)
+static void sun4v_set_affinity(unsigned int virt_irq, cpumask_t mask)
 {
 	unsigned int ino = virt_irq_table[virt_irq].dev_ino;
 	unsigned long cpuid = irq_choose_cpu(virt_irq);
@@ -431,8 +429,7 @@ static void sun4v_virq_enable(unsigned int virt_irq)
 		       dev_handle, dev_ino, err);
 }
 
-static void sun4v_virt_set_affinity(unsigned int virt_irq,
-				    const struct cpumask *mask)
+static void sun4v_virt_set_affinity(unsigned int virt_irq, cpumask_t mask)
 {
 	unsigned long cpuid, dev_handle, dev_ino;
 	int err;
@@ -854,7 +851,7 @@ void fixup_irqs(void)
 		    !(irq_desc[irq].status & IRQ_PER_CPU)) {
 			if (irq_desc[irq].chip->set_affinity)
 				irq_desc[irq].chip->set_affinity(irq,
-					&irq_desc[irq].affinity);
+					irq_desc[irq].affinity);
 		}
 		spin_unlock_irqrestore(&irq_desc[irq].lock, flags);
 	}
diff --git a/trunk/arch/sparc/kernel/of_device_64.c b/trunk/arch/sparc/kernel/of_device_64.c
index 322046cdf85f..46e231f7c5ce 100644
--- a/trunk/arch/sparc/kernel/of_device_64.c
+++ b/trunk/arch/sparc/kernel/of_device_64.c
@@ -780,7 +780,7 @@ static unsigned int __init build_one_device_irq(struct of_device *op,
 	if (nid != -1) {
 		cpumask_t numa_mask = node_to_cpumask(nid);
 
-		irq_set_affinity(irq, &numa_mask);
+		irq_set_affinity(irq, numa_mask);
 	}
 
 	return irq;
diff --git a/trunk/arch/sparc/kernel/pci_msi.c b/trunk/arch/sparc/kernel/pci_msi.c
index 0d0cd815e83e..2e680f34f727 100644
--- a/trunk/arch/sparc/kernel/pci_msi.c
+++ b/trunk/arch/sparc/kernel/pci_msi.c
@@ -288,7 +288,7 @@ static int bringup_one_msi_queue(struct pci_pbm_info *pbm,
 	if (nid != -1) {
 		cpumask_t numa_mask = node_to_cpumask(nid);
 
-		irq_set_affinity(irq, &numa_mask);
+		irq_set_affinity(irq, numa_mask);
 	}
 	err = request_irq(irq, sparc64_msiq_interrupt, 0,
 			  "MSIQ",
diff --git a/trunk/arch/sparc/kernel/smp_32.c b/trunk/arch/sparc/kernel/smp_32.c
index 1e5ac4e282e1..e396c1f17a92 100644
--- a/trunk/arch/sparc/kernel/smp_32.c
+++ b/trunk/arch/sparc/kernel/smp_32.c
@@ -39,6 +39,8 @@ volatile unsigned long cpu_callin_map[NR_CPUS] __cpuinitdata = {0,};
 unsigned char boot_cpu_id = 0;
 unsigned char boot_cpu_id4 = 0; /* boot_cpu_id << 2 */
 
+cpumask_t cpu_online_map = CPU_MASK_NONE;
+cpumask_t phys_cpu_present_map = CPU_MASK_NONE;
 cpumask_t smp_commenced_mask = CPU_MASK_NONE;
 
 /* The only guaranteed locking primitive available on all Sparc
@@ -332,7 +334,7 @@ void __init smp_setup_cpu_possible_map(void)
 	instance = 0;
 	while (!cpu_find_by_instance(instance, NULL, &mid)) {
 		if (mid < NR_CPUS) {
-			cpu_set(mid, cpu_possible_map);
+			cpu_set(mid, phys_cpu_present_map);
 			cpu_set(mid, cpu_present_map);
 		}
 		instance++;
@@ -352,7 +354,7 @@ void __init smp_prepare_boot_cpu(void)
 
 	current_thread_info()->cpu = cpuid;
 	cpu_set(cpuid, cpu_online_map);
-	cpu_set(cpuid, cpu_possible_map);
+	cpu_set(cpuid, phys_cpu_present_map);
 }
 
 int __cpuinit __cpu_up(unsigned int cpu)
diff --git a/trunk/arch/sparc/kernel/smp_64.c b/trunk/arch/sparc/kernel/smp_64.c
index 46329799f346..bfe99d82d458 100644
--- a/trunk/arch/sparc/kernel/smp_64.c
+++ b/trunk/arch/sparc/kernel/smp_64.c
@@ -49,10 +49,14 @@
 
 int sparc64_multi_core __read_mostly;
 
+cpumask_t cpu_possible_map __read_mostly = CPU_MASK_NONE;
+cpumask_t cpu_online_map __read_mostly = CPU_MASK_NONE;
 DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE;
 cpumask_t cpu_core_map[NR_CPUS] __read_mostly =
 	{ [0 ... NR_CPUS-1] = CPU_MASK_NONE };
 
+EXPORT_SYMBOL(cpu_possible_map);
+EXPORT_SYMBOL(cpu_online_map);
 EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
 EXPORT_SYMBOL(cpu_core_map);
 
diff --git a/trunk/arch/sparc/kernel/sparc_ksyms_32.c b/trunk/arch/sparc/kernel/sparc_ksyms_32.c
index e1e97639231b..a4d45fc29b21 100644
--- a/trunk/arch/sparc/kernel/sparc_ksyms_32.c
+++ b/trunk/arch/sparc/kernel/sparc_ksyms_32.c
@@ -112,6 +112,10 @@ EXPORT_PER_CPU_SYMBOL(__cpu_data);
 #ifdef CONFIG_SMP
 /* IRQ implementation. */
 EXPORT_SYMBOL(synchronize_irq);
+
+/* CPU online map and active count. */
+EXPORT_SYMBOL(cpu_online_map);
+EXPORT_SYMBOL(phys_cpu_present_map);
 #endif
 
 EXPORT_SYMBOL(__udelay);
diff --git a/trunk/arch/sparc/kernel/time_64.c b/trunk/arch/sparc/kernel/time_64.c
index 9df8f095a8b1..141da3759091 100644
--- a/trunk/arch/sparc/kernel/time_64.c
+++ b/trunk/arch/sparc/kernel/time_64.c
@@ -763,7 +763,7 @@ void __devinit setup_sparc64_timer(void)
 	sevt = &__get_cpu_var(sparc64_events);
 
 	memcpy(sevt, &sparc64_clockevent, sizeof(*sevt));
-	sevt->cpumask = cpumask_of(smp_processor_id());
+	sevt->cpumask = cpumask_of_cpu(smp_processor_id());
 
 	clockevents_register_device(sevt);
 }
diff --git a/trunk/arch/um/kernel/smp.c b/trunk/arch/um/kernel/smp.c
index 98351c78bc81..045772142844 100644
--- a/trunk/arch/um/kernel/smp.c
+++ b/trunk/arch/um/kernel/smp.c
@@ -25,6 +25,13 @@ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 #include "irq_user.h"
 #include "os.h"
 
+/* CPU online map, set by smp_boot_cpus */
+cpumask_t cpu_online_map = CPU_MASK_NONE;
+cpumask_t cpu_possible_map = CPU_MASK_NONE;
+
+EXPORT_SYMBOL(cpu_online_map);
+EXPORT_SYMBOL(cpu_possible_map);
+
 /* Per CPU bogomips and other parameters
  * The only piece used here is the ipi pipe, which is set before SMP is
  * started and never changed.
diff --git a/trunk/arch/um/kernel/time.c b/trunk/arch/um/kernel/time.c
index b13a87a3ec95..47f04f4a3464 100644
--- a/trunk/arch/um/kernel/time.c
+++ b/trunk/arch/um/kernel/time.c
@@ -50,7 +50,7 @@ static int itimer_next_event(unsigned long delta,
 static struct clock_event_device itimer_clockevent = {
 	.name		= "itimer",
 	.rating		= 250,
-	.cpumask	= cpu_all_mask,
+	.cpumask	= CPU_MASK_ALL,
 	.features	= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
 	.set_mode	= itimer_set_mode,
 	.set_next_event = itimer_next_event,
diff --git a/trunk/arch/x86/Kconfig b/trunk/arch/x86/Kconfig
index 249d1e0824b5..0f44add3e0b7 100644
--- a/trunk/arch/x86/Kconfig
+++ b/trunk/arch/x86/Kconfig
@@ -601,20 +601,19 @@ config IOMMU_HELPER
 
 config MAXSMP
 	bool "Configure Maximum number of SMP Processors and NUMA Nodes"
-	depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL
-	select CPUMASK_OFFSTACK
+	depends on X86_64 && SMP && BROKEN
 	default n
 	help
 	  Configure maximum number of CPUS and NUMA Nodes for this architecture.
 	  If unsure, say N.
 
 config NR_CPUS
-	int "Maximum number of CPUs" if SMP && !MAXSMP
-	range 2 512 if SMP && !MAXSMP
-	default "1" if !SMP
+	int "Maximum number of CPUs (2-512)" if !MAXSMP
+	range 2 512
+	depends on SMP
 	default "4096" if MAXSMP
-	default "32" if SMP && (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000)
-	default "8" if SMP
+	default "32" if X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000
+	default "8"
 	help
 	  This allows you to specify the maximum number of CPUs which this
 	  kernel will support.  The maximum supported value is 512 and the
diff --git a/trunk/arch/x86/ia32/ia32_signal.c b/trunk/arch/x86/ia32/ia32_signal.c
index 9dabd00e9805..b195f85526e3 100644
--- a/trunk/arch/x86/ia32/ia32_signal.c
+++ b/trunk/arch/x86/ia32/ia32_signal.c
@@ -24,14 +24,15 @@
 #include <asm/ucontext.h>
 #include <asm/uaccess.h>
 #include <asm/i387.h>
+#include <asm/ia32.h>
 #include <asm/ptrace.h>
 #include <asm/ia32_unistd.h>
 #include <asm/user32.h>
 #include <asm/sigcontext32.h>
 #include <asm/proto.h>
 #include <asm/vdso.h>
+
 #include <asm/sigframe.h>
-#include <asm/sys_ia32.h>
 
 #define DEBUG_SIG 0
 
diff --git a/trunk/arch/x86/ia32/ipc32.c b/trunk/arch/x86/ia32/ipc32.c
index 29cdcd02ead3..d21991ce606c 100644
--- a/trunk/arch/x86/ia32/ipc32.c
+++ b/trunk/arch/x86/ia32/ipc32.c
@@ -8,7 +8,6 @@
 #include <linux/shm.h>
 #include <linux/ipc.h>
 #include <linux/compat.h>
-#include <asm/sys_ia32.h>
 
 asmlinkage long sys32_ipc(u32 call, int first, int second, int third,
 			  compat_uptr_t ptr, u32 fifth)
diff --git a/trunk/arch/x86/ia32/sys_ia32.c b/trunk/arch/x86/ia32/sys_ia32.c
index 6c0d7f6231af..2e09dcd3c0a6 100644
--- a/trunk/arch/x86/ia32/sys_ia32.c
+++ b/trunk/arch/x86/ia32/sys_ia32.c
@@ -44,8 +44,8 @@
 #include <asm/types.h>
 #include <asm/uaccess.h>
 #include <asm/atomic.h>
+#include <asm/ia32.h>
 #include <asm/vgtod.h>
-#include <asm/sys_ia32.h>
 
 #define AA(__x)		((unsigned long)(__x))
 
diff --git a/trunk/arch/x86/include/asm/apic.h b/trunk/arch/x86/include/asm/apic.h
index ab1d51a8855e..25caa0738af5 100644
--- a/trunk/arch/x86/include/asm/apic.h
+++ b/trunk/arch/x86/include/asm/apic.h
@@ -54,6 +54,7 @@ extern int disable_apic;
 extern int is_vsmp_box(void);
 extern void xapic_wait_icr_idle(void);
 extern u32 safe_xapic_wait_icr_idle(void);
+extern u64 xapic_icr_read(void);
 extern void xapic_icr_write(u32, u32);
 extern int setup_profiling_timer(unsigned int);
 
@@ -92,7 +93,7 @@ static inline u32 native_apic_msr_read(u32 reg)
 }
 
 #ifndef CONFIG_X86_32
-extern int x2apic;
+extern int x2apic, x2apic_preenabled;
 extern void check_x2apic(void);
 extern void enable_x2apic(void);
 extern void enable_IR_x2apic(void);
diff --git a/trunk/arch/x86/include/asm/bigsmp/apic.h b/trunk/arch/x86/include/asm/bigsmp/apic.h
index d8dd9f537911..ce547f24a1cd 100644
--- a/trunk/arch/x86/include/asm/bigsmp/apic.h
+++ b/trunk/arch/x86/include/asm/bigsmp/apic.h
@@ -9,12 +9,12 @@ static inline int apic_id_registered(void)
 	return (1);
 }
 
-static inline const cpumask_t *target_cpus(void)
+static inline cpumask_t target_cpus(void)
 {
 #ifdef CONFIG_SMP
-	return &cpu_online_map;
+        return cpu_online_map;
 #else
-	return &cpumask_of_cpu(0);
+        return cpumask_of_cpu(0);
 #endif
 }
 
@@ -79,7 +79,7 @@ static inline int apicid_to_node(int logical_apicid)
 
 static inline int cpu_present_to_apicid(int mps_cpu)
 {
-	if (mps_cpu < nr_cpu_ids)
+	if (mps_cpu < NR_CPUS)
 		return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu);
 
 	return BAD_APICID;
@@ -94,7 +94,7 @@ extern u8 cpu_2_logical_apicid[];
 /* Mapping from cpu number to logical apicid */
 static inline int cpu_to_logical_apicid(int cpu)
 {
-	if (cpu >= nr_cpu_ids)
+	if (cpu >= NR_CPUS)
 		return BAD_APICID;
 	return cpu_physical_id(cpu);
 }
@@ -119,34 +119,16 @@ static inline int check_phys_apicid_present(int boot_cpu_physical_apicid)
 }
 
 /* As we are using single CPU as destination, pick only one CPU here */
-static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
+static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
 {
 	int cpu;
 	int apicid;	
 
-	cpu = first_cpu(*cpumask);
+	cpu = first_cpu(cpumask);
 	apicid = cpu_to_logical_apicid(cpu);
 	return apicid;
 }
 
-static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask,
-						  const struct cpumask *andmask)
-{
-	int cpu;
-
-	/*
-	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
-	 * May as well be the first.
-	 */
-	for_each_cpu_and(cpu, cpumask, andmask)
-		if (cpumask_test_cpu(cpu, cpu_online_mask))
-			break;
-	if (cpu < nr_cpu_ids)
-		return cpu_to_logical_apicid(cpu);
-
-	return BAD_APICID;
-}
-
 static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
 {
 	return cpuid_apic >> index_msb;
diff --git a/trunk/arch/x86/include/asm/bigsmp/ipi.h b/trunk/arch/x86/include/asm/bigsmp/ipi.h
index 27fcd01b3ae6..9404c535b7ec 100644
--- a/trunk/arch/x86/include/asm/bigsmp/ipi.h
+++ b/trunk/arch/x86/include/asm/bigsmp/ipi.h
@@ -1,22 +1,25 @@
 #ifndef __ASM_MACH_IPI_H
 #define __ASM_MACH_IPI_H
 
-void send_IPI_mask_sequence(const struct cpumask *mask, int vector);
-void send_IPI_mask_allbutself(const struct cpumask *mask, int vector);
+void send_IPI_mask_sequence(cpumask_t mask, int vector);
 
-static inline void send_IPI_mask(const struct cpumask *mask, int vector)
+static inline void send_IPI_mask(cpumask_t mask, int vector)
 {
 	send_IPI_mask_sequence(mask, vector);
 }
 
 static inline void send_IPI_allbutself(int vector)
 {
-	send_IPI_mask_allbutself(cpu_online_mask, vector);
+	cpumask_t mask = cpu_online_map;
+	cpu_clear(smp_processor_id(), mask);
+
+	if (!cpus_empty(mask))
+		send_IPI_mask(mask, vector);
 }
 
 static inline void send_IPI_all(int vector)
 {
-	send_IPI_mask(cpu_online_mask, vector);
+	send_IPI_mask(cpu_online_map, vector);
 }
 
 #endif /* __ASM_MACH_IPI_H */
diff --git a/trunk/arch/x86/include/asm/desc.h b/trunk/arch/x86/include/asm/desc.h
index dc27705f5443..e6b82b17b072 100644
--- a/trunk/arch/x86/include/asm/desc.h
+++ b/trunk/arch/x86/include/asm/desc.h
@@ -320,14 +320,16 @@ static inline void set_intr_gate(unsigned int n, void *addr)
 	_set_gate(n, GATE_INTERRUPT, addr, 0, 0, __KERNEL_CS);
 }
 
+#define SYS_VECTOR_FREE		0
+#define SYS_VECTOR_ALLOCED	1
+
 extern int first_system_vector;
-/* used_vectors is BITMAP for irq is not managed by percpu vector_irq */
-extern unsigned long used_vectors[];
+extern char system_vectors[];
 
 static inline void alloc_system_vector(int vector)
 {
-	if (!test_bit(vector, used_vectors)) {
-		set_bit(vector, used_vectors);
+	if (system_vectors[vector] == SYS_VECTOR_FREE) {
+		system_vectors[vector] = SYS_VECTOR_ALLOCED;
 		if (first_system_vector > vector)
 			first_system_vector = vector;
 	} else
diff --git a/trunk/arch/x86/include/asm/efi.h b/trunk/arch/x86/include/asm/efi.h
index ca5ffb2856b6..a2e545c91c35 100644
--- a/trunk/arch/x86/include/asm/efi.h
+++ b/trunk/arch/x86/include/asm/efi.h
@@ -90,7 +90,6 @@ extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size);
 
 #endif /* CONFIG_X86_32 */
 
-extern int add_efi_memmap;
 extern void efi_reserve_early(void);
 extern void efi_call_phys_prelog(void);
 extern void efi_call_phys_epilog(void);
diff --git a/trunk/arch/x86/include/asm/es7000/apic.h b/trunk/arch/x86/include/asm/es7000/apic.h
index 51ac1230294e..e24ef876915f 100644
--- a/trunk/arch/x86/include/asm/es7000/apic.h
+++ b/trunk/arch/x86/include/asm/es7000/apic.h
@@ -9,14 +9,14 @@ static inline int apic_id_registered(void)
 	        return (1);
 }
 
-static inline const cpumask_t *target_cpus_cluster(void)
+static inline cpumask_t target_cpus_cluster(void)
 {
-	return &CPU_MASK_ALL;
+	return CPU_MASK_ALL;
 }
 
-static inline const cpumask_t *target_cpus(void)
+static inline cpumask_t target_cpus(void)
 {
-	return &cpumask_of_cpu(smp_processor_id());
+	return cpumask_of_cpu(smp_processor_id());
 }
 
 #define APIC_DFR_VALUE_CLUSTER		(APIC_DFR_CLUSTER)
@@ -80,10 +80,9 @@ extern int apic_version [MAX_APICS];
 static inline void setup_apic_routing(void)
 {
 	int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id());
-	printk("Enabling APIC mode:  %s. Using %d I/O APICs, target cpus %lx\n",
+	printk("Enabling APIC mode:  %s.  Using %d I/O APICs, target cpus %lx\n",
 		(apic_version[apic] == 0x14) ?
-			"Physical Cluster" : "Logical Cluster",
-			nr_ioapics, cpus_addr(*target_cpus())[0]);
+		"Physical Cluster" : "Logical Cluster", nr_ioapics, cpus_addr(target_cpus())[0]);
 }
 
 static inline int multi_timer_check(int apic, int irq)
@@ -101,7 +100,7 @@ static inline int cpu_present_to_apicid(int mps_cpu)
 {
 	if (!mps_cpu)
 		return boot_cpu_physical_apicid;
-	else if (mps_cpu < nr_cpu_ids)
+	else if (mps_cpu < NR_CPUS)
 		return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu);
 	else
 		return BAD_APICID;
@@ -121,9 +120,9 @@ extern u8 cpu_2_logical_apicid[];
 static inline int cpu_to_logical_apicid(int cpu)
 {
 #ifdef CONFIG_SMP
-	if (cpu >= nr_cpu_ids)
-		return BAD_APICID;
-	return (int)cpu_2_logical_apicid[cpu];
+       if (cpu >= NR_CPUS)
+	       return BAD_APICID;
+       return (int)cpu_2_logical_apicid[cpu];
 #else
 	return logical_smp_processor_id();
 #endif
@@ -147,15 +146,14 @@ static inline int check_phys_apicid_present(int cpu_physical_apicid)
 	return (1);
 }
 
-static inline unsigned int
-cpu_mask_to_apicid_cluster(const struct cpumask *cpumask)
+static inline unsigned int cpu_mask_to_apicid_cluster(cpumask_t cpumask)
 {
 	int num_bits_set;
 	int cpus_found = 0;
 	int cpu;
 	int apicid;
 
-	num_bits_set = cpumask_weight(cpumask);
+	num_bits_set = cpus_weight(cpumask);
 	/* Return id to all */
 	if (num_bits_set == NR_CPUS)
 		return 0xFF;
@@ -163,10 +161,10 @@ cpu_mask_to_apicid_cluster(const struct cpumask *cpumask)
 	 * The cpus in the mask must all be on the apic cluster.  If are not
 	 * on the same apicid cluster return default value of TARGET_CPUS.
 	 */
-	cpu = cpumask_first(cpumask);
+	cpu = first_cpu(cpumask);
 	apicid = cpu_to_logical_apicid(cpu);
 	while (cpus_found < num_bits_set) {
-		if (cpumask_test_cpu(cpu, cpumask)) {
+		if (cpu_isset(cpu, cpumask)) {
 			int new_apicid = cpu_to_logical_apicid(cpu);
 			if (apicid_cluster(apicid) !=
 					apicid_cluster(new_apicid)){
@@ -181,14 +179,14 @@ cpu_mask_to_apicid_cluster(const struct cpumask *cpumask)
 	return apicid;
 }
 
-static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
+static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
 {
 	int num_bits_set;
 	int cpus_found = 0;
 	int cpu;
 	int apicid;
 
-	num_bits_set = cpus_weight(*cpumask);
+	num_bits_set = cpus_weight(cpumask);
 	/* Return id to all */
 	if (num_bits_set == NR_CPUS)
 		return cpu_to_logical_apicid(0);
@@ -196,52 +194,10 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
 	 * The cpus in the mask must all be on the apic cluster.  If are not
 	 * on the same apicid cluster return default value of TARGET_CPUS.
 	 */
-	cpu = first_cpu(*cpumask);
-	apicid = cpu_to_logical_apicid(cpu);
-	while (cpus_found < num_bits_set) {
-		if (cpu_isset(cpu, *cpumask)) {
-			int new_apicid = cpu_to_logical_apicid(cpu);
-			if (apicid_cluster(apicid) !=
-					apicid_cluster(new_apicid)){
-				printk ("%s: Not a valid mask!\n", __func__);
-				return cpu_to_logical_apicid(0);
-			}
-			apicid = new_apicid;
-			cpus_found++;
-		}
-		cpu++;
-	}
-	return apicid;
-}
-
-
-static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask,
-						  const struct cpumask *andmask)
-{
-	int num_bits_set;
-	int cpus_found = 0;
-	int cpu;
-	int apicid = cpu_to_logical_apicid(0);
-	cpumask_var_t cpumask;
-
-	if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
-		return apicid;
-
-	cpumask_and(cpumask, inmask, andmask);
-	cpumask_and(cpumask, cpumask, cpu_online_mask);
-
-	num_bits_set = cpumask_weight(cpumask);
-	/* Return id to all */
-	if (num_bits_set == NR_CPUS)
-		goto exit;
-	/*
-	 * The cpus in the mask must all be on the apic cluster.  If are not
-	 * on the same apicid cluster return default value of TARGET_CPUS.
-	 */
-	cpu = cpumask_first(cpumask);
+	cpu = first_cpu(cpumask);
 	apicid = cpu_to_logical_apicid(cpu);
 	while (cpus_found < num_bits_set) {
-		if (cpumask_test_cpu(cpu, cpumask)) {
+		if (cpu_isset(cpu, cpumask)) {
 			int new_apicid = cpu_to_logical_apicid(cpu);
 			if (apicid_cluster(apicid) !=
 					apicid_cluster(new_apicid)){
@@ -253,8 +209,6 @@ static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask,
 		}
 		cpu++;
 	}
-exit:
-	free_cpumask_var(cpumask);
 	return apicid;
 }
 
diff --git a/trunk/arch/x86/include/asm/es7000/ipi.h b/trunk/arch/x86/include/asm/es7000/ipi.h
index 7e8ed24d4b8a..632a955fcc0a 100644
--- a/trunk/arch/x86/include/asm/es7000/ipi.h
+++ b/trunk/arch/x86/include/asm/es7000/ipi.h
@@ -1,22 +1,24 @@
 #ifndef __ASM_ES7000_IPI_H
 #define __ASM_ES7000_IPI_H
 
-void send_IPI_mask_sequence(const struct cpumask *mask, int vector);
-void send_IPI_mask_allbutself(const struct cpumask *mask, int vector);
+void send_IPI_mask_sequence(cpumask_t mask, int vector);
 
-static inline void send_IPI_mask(const struct cpumask *mask, int vector)
+static inline void send_IPI_mask(cpumask_t mask, int vector)
 {
 	send_IPI_mask_sequence(mask, vector);
 }
 
 static inline void send_IPI_allbutself(int vector)
 {
-	send_IPI_mask_allbutself(cpu_online_mask, vector);
+	cpumask_t mask = cpu_online_map;
+	cpu_clear(smp_processor_id(), mask);
+	if (!cpus_empty(mask))
+		send_IPI_mask(mask, vector);
 }
 
 static inline void send_IPI_all(int vector)
 {
-	send_IPI_mask(cpu_online_mask, vector);
+	send_IPI_mask(cpu_online_map, vector);
 }
 
 #endif /* __ASM_ES7000_IPI_H */
diff --git a/trunk/arch/x86/include/asm/genapic_32.h b/trunk/arch/x86/include/asm/genapic_32.h
index 746f37a7963a..0ac17d33a8c7 100644
--- a/trunk/arch/x86/include/asm/genapic_32.h
+++ b/trunk/arch/x86/include/asm/genapic_32.h
@@ -24,7 +24,7 @@ struct genapic {
 	int (*probe)(void);
 
 	int (*apic_id_registered)(void);
-	const struct cpumask *(*target_cpus)(void);
+	cpumask_t (*target_cpus)(void);
 	int int_delivery_mode;
 	int int_dest_mode;
 	int ESR_DISABLE;
@@ -57,16 +57,12 @@ struct genapic {
 
 	unsigned (*get_apic_id)(unsigned long x);
 	unsigned long apic_id_mask;
-	unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask);
-	unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask,
-					       const struct cpumask *andmask);
-	void (*vector_allocation_domain)(int cpu, struct cpumask *retmask);
+	unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask);
+	cpumask_t (*vector_allocation_domain)(int cpu);
 
 #ifdef CONFIG_SMP
 	/* ipi */
-	void (*send_IPI_mask)(const struct cpumask *mask, int vector);
-	void (*send_IPI_mask_allbutself)(const struct cpumask *mask,
-					 int vector);
+	void (*send_IPI_mask)(cpumask_t mask, int vector);
 	void (*send_IPI_allbutself)(int vector);
 	void (*send_IPI_all)(int vector);
 #endif
@@ -118,7 +114,6 @@ struct genapic {
 	APICFUNC(get_apic_id)				\
 	.apic_id_mask = APIC_ID_MASK,			\
 	APICFUNC(cpu_mask_to_apicid)			\
-	APICFUNC(cpu_mask_to_apicid_and)		\
 	APICFUNC(vector_allocation_domain)		\
 	APICFUNC(acpi_madt_oem_check)			\
 	IPIFUNC(send_IPI_mask)				\
diff --git a/trunk/arch/x86/include/asm/genapic_64.h b/trunk/arch/x86/include/asm/genapic_64.h
index adf32fb56aa6..2cae011668b7 100644
--- a/trunk/arch/x86/include/asm/genapic_64.h
+++ b/trunk/arch/x86/include/asm/genapic_64.h
@@ -1,8 +1,6 @@
 #ifndef _ASM_X86_GENAPIC_64_H
 #define _ASM_X86_GENAPIC_64_H
 
-#include <linux/cpumask.h>
-
 /*
  * Copyright 2004 James Cleverdon, IBM.
  * Subject to the GNU Public License, v.2
@@ -20,20 +18,16 @@ struct genapic {
 	u32 int_delivery_mode;
 	u32 int_dest_mode;
 	int (*apic_id_registered)(void);
-	const struct cpumask *(*target_cpus)(void);
-	void (*vector_allocation_domain)(int cpu, struct cpumask *retmask);
+	cpumask_t (*target_cpus)(void);
+	cpumask_t (*vector_allocation_domain)(int cpu);
 	void (*init_apic_ldr)(void);
 	/* ipi */
-	void (*send_IPI_mask)(const struct cpumask *mask, int vector);
-	void (*send_IPI_mask_allbutself)(const struct cpumask *mask,
-					 int vector);
+	void (*send_IPI_mask)(cpumask_t mask, int vector);
 	void (*send_IPI_allbutself)(int vector);
 	void (*send_IPI_all)(int vector);
 	void (*send_IPI_self)(int vector);
 	/* */
-	unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask);
-	unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask,
-					       const struct cpumask *andmask);
+	unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask);
 	unsigned int (*phys_pkg_id)(int index_msb);
 	unsigned int (*get_apic_id)(unsigned long x);
 	unsigned long (*set_apic_id)(unsigned int id);
diff --git a/trunk/arch/x86/include/asm/ipi.h b/trunk/arch/x86/include/asm/ipi.h
index c745a306f7d3..f89dffb28aa9 100644
--- a/trunk/arch/x86/include/asm/ipi.h
+++ b/trunk/arch/x86/include/asm/ipi.h
@@ -117,8 +117,7 @@ static inline void __send_IPI_dest_field(unsigned int mask, int vector,
 	native_apic_mem_write(APIC_ICR, cfg);
 }
 
-static inline void send_IPI_mask_sequence(const struct cpumask *mask,
-					  int vector)
+static inline void send_IPI_mask_sequence(cpumask_t mask, int vector)
 {
 	unsigned long flags;
 	unsigned long query_cpu;
@@ -129,29 +128,11 @@ static inline void send_IPI_mask_sequence(const struct cpumask *mask,
 	 * - mbligh
 	 */
 	local_irq_save(flags);
-	for_each_cpu(query_cpu, mask) {
+	for_each_cpu_mask_nr(query_cpu, mask) {
 		__send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, query_cpu),
 				      vector, APIC_DEST_PHYSICAL);
 	}
 	local_irq_restore(flags);
 }
 
-static inline void send_IPI_mask_allbutself(const struct cpumask *mask,
-					    int vector)
-{
-	unsigned long flags;
-	unsigned int query_cpu;
-	unsigned int this_cpu = smp_processor_id();
-
-	/* See Hack comment above */
-
-	local_irq_save(flags);
-	for_each_cpu(query_cpu, mask)
-		if (query_cpu != this_cpu)
-			__send_IPI_dest_field(
-				per_cpu(x86_cpu_to_apicid, query_cpu),
-				vector, APIC_DEST_PHYSICAL);
-	local_irq_restore(flags);
-}
-
 #endif /* _ASM_X86_IPI_H */
diff --git a/trunk/arch/x86/include/asm/irq.h b/trunk/arch/x86/include/asm/irq.h
index 592688ed04d3..28e409fc73f3 100644
--- a/trunk/arch/x86/include/asm/irq.h
+++ b/trunk/arch/x86/include/asm/irq.h
@@ -33,7 +33,7 @@ static inline int irq_canonicalize(int irq)
 
 #ifdef CONFIG_HOTPLUG_CPU
 #include <linux/cpumask.h>
-extern void fixup_irqs(void);
+extern void fixup_irqs(cpumask_t map);
 #endif
 
 extern unsigned int do_IRQ(struct pt_regs *regs);
@@ -42,6 +42,5 @@ extern void native_init_IRQ(void);
 
 /* Interrupt vector management */
 extern DECLARE_BITMAP(used_vectors, NR_VECTORS);
-extern int vector_used_by_percpu_irq(unsigned int vector);
 
 #endif /* _ASM_X86_IRQ_H */
diff --git a/trunk/arch/x86/include/asm/kvm_host.h b/trunk/arch/x86/include/asm/kvm_host.h
index 97215a458e5f..8346be87cfa1 100644
--- a/trunk/arch/x86/include/asm/kvm_host.h
+++ b/trunk/arch/x86/include/asm/kvm_host.h
@@ -21,7 +21,6 @@
 
 #include <asm/pvclock-abi.h>
 #include <asm/desc.h>
-#include <asm/mtrr.h>
 
 #define KVM_MAX_VCPUS 16
 #define KVM_MEMORY_SLOTS 32
@@ -87,7 +86,6 @@
 #define KVM_MIN_FREE_MMU_PAGES 5
 #define KVM_REFILL_PAGES 25
 #define KVM_MAX_CPUID_ENTRIES 40
-#define KVM_NR_FIXED_MTRR_REGION 88
 #define KVM_NR_VAR_MTRR 8
 
 extern spinlock_t kvm_lock;
@@ -182,8 +180,6 @@ struct kvm_mmu_page {
 	struct list_head link;
 	struct hlist_node hash_link;
 
-	struct list_head oos_link;
-
 	/*
 	 * The following two entries are used to key the shadow page in the
 	 * hash table.
@@ -194,16 +190,13 @@ struct kvm_mmu_page {
 	u64 *spt;
 	/* hold the gfn of each spte inside spt */
 	gfn_t *gfns;
-	/*
-	 * One bit set per slot which has memory
-	 * in this shadow page.
-	 */
-	DECLARE_BITMAP(slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
+	unsigned long slot_bitmap; /* One bit set per slot which has memory
+				    * in this shadow page.
+				    */
 	int multimapped;         /* More than one parent_pte? */
 	int root_count;          /* Currently serving as active root */
 	bool unsync;
-	bool global;
-	unsigned int unsync_children;
+	bool unsync_children;
 	union {
 		u64 *parent_pte;               /* !multimapped */
 		struct hlist_head parent_ptes; /* multimapped, kvm_pte_chain */
@@ -334,10 +327,8 @@ struct kvm_vcpu_arch {
 
 	bool nmi_pending;
 	bool nmi_injected;
-	bool nmi_window_open;
 
-	struct mtrr_state_type mtrr_state;
-	u32 pat;
+	u64 mtrr[0x100];
 };
 
 struct kvm_mem_alias {
@@ -359,13 +350,11 @@ struct kvm_arch{
 	 */
 	struct list_head active_mmu_pages;
 	struct list_head assigned_dev_head;
-	struct list_head oos_global_pages;
 	struct dmar_domain *intel_iommu_domain;
 	struct kvm_pic *vpic;
 	struct kvm_ioapic *vioapic;
 	struct kvm_pit *vpit;
 	struct hlist_head irq_ack_notifier_list;
-	int vapics_in_nmi_mode;
 
 	int round_robin_prev_vcpu;
 	unsigned int tss_addr;
@@ -389,7 +378,6 @@ struct kvm_vm_stat {
 	u32 mmu_recycled;
 	u32 mmu_cache_miss;
 	u32 mmu_unsync;
-	u32 mmu_unsync_global;
 	u32 remote_tlb_flush;
 	u32 lpages;
 };
@@ -409,7 +397,6 @@ struct kvm_vcpu_stat {
 	u32 halt_exits;
 	u32 halt_wakeup;
 	u32 request_irq_exits;
-	u32 request_nmi_exits;
 	u32 irq_exits;
 	u32 host_state_reload;
 	u32 efer_reload;
@@ -418,7 +405,6 @@ struct kvm_vcpu_stat {
 	u32 insn_emulation_fail;
 	u32 hypercalls;
 	u32 irq_injections;
-	u32 nmi_injections;
 };
 
 struct descriptor_table {
@@ -491,7 +477,6 @@ struct kvm_x86_ops {
 
 	int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
 	int (*get_tdp_level)(void);
-	int (*get_mt_mask_shift)(void);
 };
 
 extern struct kvm_x86_ops *kvm_x86_ops;
@@ -505,7 +490,7 @@ int kvm_mmu_setup(struct kvm_vcpu *vcpu);
 void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte);
 void kvm_mmu_set_base_ptes(u64 base_pte);
 void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
-		u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 mt_mask);
+		u64 dirty_mask, u64 nx_mask, u64 x_mask);
 
 int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
 void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
@@ -602,14 +587,12 @@ unsigned long segment_base(u16 selector);
 
 void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu);
 void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
-		       const u8 *new, int bytes,
-		       bool guest_initiated);
+		       const u8 *new, int bytes);
 int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva);
 void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
 int kvm_mmu_load(struct kvm_vcpu *vcpu);
 void kvm_mmu_unload(struct kvm_vcpu *vcpu);
 void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
-void kvm_mmu_sync_global(struct kvm_vcpu *vcpu);
 
 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
 
@@ -624,8 +607,6 @@ void kvm_disable_tdp(void);
 int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);
 int complete_pio(struct kvm_vcpu *vcpu);
 
-struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn);
-
 static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
 {
 	struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT);
@@ -721,6 +702,18 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code)
 	kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
 }
 
+#define ASM_VMX_VMCLEAR_RAX       ".byte 0x66, 0x0f, 0xc7, 0x30"
+#define ASM_VMX_VMLAUNCH          ".byte 0x0f, 0x01, 0xc2"
+#define ASM_VMX_VMRESUME          ".byte 0x0f, 0x01, 0xc3"
+#define ASM_VMX_VMPTRLD_RAX       ".byte 0x0f, 0xc7, 0x30"
+#define ASM_VMX_VMREAD_RDX_RAX    ".byte 0x0f, 0x78, 0xd0"
+#define ASM_VMX_VMWRITE_RAX_RDX   ".byte 0x0f, 0x79, 0xd0"
+#define ASM_VMX_VMWRITE_RSP_RDX   ".byte 0x0f, 0x79, 0xd4"
+#define ASM_VMX_VMXOFF            ".byte 0x0f, 0x01, 0xc4"
+#define ASM_VMX_VMXON_RAX         ".byte 0xf3, 0x0f, 0xc7, 0x30"
+#define ASM_VMX_INVEPT		  ".byte 0x66, 0x0f, 0x38, 0x80, 0x08"
+#define ASM_VMX_INVVPID		  ".byte 0x66, 0x0f, 0x38, 0x81, 0x08"
+
 #define MSR_IA32_TIME_STAMP_COUNTER		0x010
 
 #define TSS_IOPB_BASE_OFFSET 0x66
diff --git a/trunk/arch/x86/include/asm/kvm_x86_emulate.h b/trunk/arch/x86/include/asm/kvm_x86_emulate.h
index 6a159732881a..25179a29f208 100644
--- a/trunk/arch/x86/include/asm/kvm_x86_emulate.h
+++ b/trunk/arch/x86/include/asm/kvm_x86_emulate.h
@@ -123,7 +123,6 @@ struct decode_cache {
 	u8 ad_bytes;
 	u8 rex_prefix;
 	struct operand src;
-	struct operand src2;
 	struct operand dst;
 	bool has_seg_override;
 	u8 seg_override;
@@ -147,18 +146,22 @@ struct x86_emulate_ctxt {
 	/* Register state before/after emulation. */
 	struct kvm_vcpu *vcpu;
 
+	/* Linear faulting address (if emulating a page-faulting instruction) */
 	unsigned long eflags;
+
 	/* Emulated execution mode, represented by an X86EMUL_MODE value. */
 	int mode;
+
 	u32 cs_base;
 
 	/* decode cache */
+
 	struct decode_cache decode;
 };
 
 /* Repeat String Operation Prefix */
-#define REPE_PREFIX	1
-#define REPNE_PREFIX	2
+#define REPE_PREFIX  1
+#define REPNE_PREFIX    2
 
 /* Execution mode, passed to the emulator. */
 #define X86EMUL_MODE_REAL     0	/* Real mode.             */
@@ -167,7 +170,7 @@ struct x86_emulate_ctxt {
 #define X86EMUL_MODE_PROT64   8	/* 64-bit (long) mode.    */
 
 /* Host execution mode. */
-#if defined(CONFIG_X86_32)
+#if defined(__i386__)
 #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32
 #elif defined(CONFIG_X86_64)
 #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64
diff --git a/trunk/arch/x86/include/asm/mach-default/mach_apic.h b/trunk/arch/x86/include/asm/mach-default/mach_apic.h
index cc09cbbee27e..6cb3a467e067 100644
--- a/trunk/arch/x86/include/asm/mach-default/mach_apic.h
+++ b/trunk/arch/x86/include/asm/mach-default/mach_apic.h
@@ -8,12 +8,12 @@
 
 #define APIC_DFR_VALUE	(APIC_DFR_FLAT)
 
-static inline const struct cpumask *target_cpus(void)
+static inline cpumask_t target_cpus(void)
 { 
 #ifdef CONFIG_SMP
-	return cpu_online_mask;
+	return cpu_online_map;
 #else
-	return cpumask_of(0);
+	return cpumask_of_cpu(0);
 #endif
 } 
 
@@ -28,7 +28,6 @@ static inline const struct cpumask *target_cpus(void)
 #define apic_id_registered (genapic->apic_id_registered)
 #define init_apic_ldr (genapic->init_apic_ldr)
 #define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid)
-#define cpu_mask_to_apicid_and (genapic->cpu_mask_to_apicid_and)
 #define phys_pkg_id	(genapic->phys_pkg_id)
 #define vector_allocation_domain    (genapic->vector_allocation_domain)
 #define read_apic_id()  (GET_APIC_ID(apic_read(APIC_ID)))
@@ -62,19 +61,9 @@ static inline int apic_id_registered(void)
 	return physid_isset(read_apic_id(), phys_cpu_present_map);
 }
 
-static inline unsigned int cpu_mask_to_apicid(const struct cpumask *cpumask)
+static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
 {
-	return cpumask_bits(cpumask)[0];
-}
-
-static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask,
-						  const struct cpumask *andmask)
-{
-	unsigned long mask1 = cpumask_bits(cpumask)[0];
-	unsigned long mask2 = cpumask_bits(andmask)[0];
-	unsigned long mask3 = cpumask_bits(cpu_online_mask)[0];
-
-	return (unsigned int)(mask1 & mask2 & mask3);
+	return cpus_addr(cpumask)[0];
 }
 
 static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
@@ -99,7 +88,7 @@ static inline int apicid_to_node(int logical_apicid)
 #endif
 }
 
-static inline void vector_allocation_domain(int cpu, struct cpumask *retmask)
+static inline cpumask_t vector_allocation_domain(int cpu)
 {
         /* Careful. Some cpus do not strictly honor the set of cpus
          * specified in the interrupt destination when using lowest
@@ -109,7 +98,8 @@ static inline void vector_allocation_domain(int cpu, struct cpumask *retmask)
          * deliver interrupts to the wrong hyperthread when only one
          * hyperthread was specified in the interrupt desitination.
          */
-	*retmask = (cpumask_t) { { [0] = APIC_ALL_CPUS } };
+        cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
+        return domain;
 }
 #endif
 
@@ -141,7 +131,7 @@ static inline int cpu_to_logical_apicid(int cpu)
 
 static inline int cpu_present_to_apicid(int mps_cpu)
 {
-	if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu))
+	if (mps_cpu < NR_CPUS && cpu_present(mps_cpu))
 		return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu);
 	else
 		return BAD_APICID;
diff --git a/trunk/arch/x86/include/asm/mach-default/mach_ipi.h b/trunk/arch/x86/include/asm/mach-default/mach_ipi.h
index 191312d155da..fabca01ebacf 100644
--- a/trunk/arch/x86/include/asm/mach-default/mach_ipi.h
+++ b/trunk/arch/x86/include/asm/mach-default/mach_ipi.h
@@ -4,8 +4,7 @@
 /* Avoid include hell */
 #define NMI_VECTOR 0x02
 
-void send_IPI_mask_bitmask(const struct cpumask *mask, int vector);
-void send_IPI_mask_allbutself(const struct cpumask *mask, int vector);
+void send_IPI_mask_bitmask(cpumask_t mask, int vector);
 void __send_IPI_shortcut(unsigned int shortcut, int vector);
 
 extern int no_broadcast;
@@ -13,27 +12,28 @@ extern int no_broadcast;
 #ifdef CONFIG_X86_64
 #include <asm/genapic.h>
 #define send_IPI_mask (genapic->send_IPI_mask)
-#define send_IPI_mask_allbutself (genapic->send_IPI_mask_allbutself)
 #else
-static inline void send_IPI_mask(const struct cpumask *mask, int vector)
+static inline void send_IPI_mask(cpumask_t mask, int vector)
 {
 	send_IPI_mask_bitmask(mask, vector);
 }
-void send_IPI_mask_allbutself(const struct cpumask *mask, int vector);
 #endif
 
 static inline void __local_send_IPI_allbutself(int vector)
 {
-	if (no_broadcast || vector == NMI_VECTOR)
-		send_IPI_mask_allbutself(cpu_online_mask, vector);
-	else
+	if (no_broadcast || vector == NMI_VECTOR) {
+		cpumask_t mask = cpu_online_map;
+
+		cpu_clear(smp_processor_id(), mask);
+		send_IPI_mask(mask, vector);
+	} else
 		__send_IPI_shortcut(APIC_DEST_ALLBUT, vector);
 }
 
 static inline void __local_send_IPI_all(int vector)
 {
 	if (no_broadcast || vector == NMI_VECTOR)
-		send_IPI_mask(cpu_online_mask, vector);
+		send_IPI_mask(cpu_online_map, vector);
 	else
 		__send_IPI_shortcut(APIC_DEST_ALLINC, vector);
 }
diff --git a/trunk/arch/x86/include/asm/mach-generic/mach_apic.h b/trunk/arch/x86/include/asm/mach-generic/mach_apic.h
index 48553e958ad5..e430f47df667 100644
--- a/trunk/arch/x86/include/asm/mach-generic/mach_apic.h
+++ b/trunk/arch/x86/include/asm/mach-generic/mach_apic.h
@@ -24,7 +24,6 @@
 #define check_phys_apicid_present (genapic->check_phys_apicid_present)
 #define check_apicid_used (genapic->check_apicid_used)
 #define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid)
-#define cpu_mask_to_apicid_and (genapic->cpu_mask_to_apicid_and)
 #define vector_allocation_domain (genapic->vector_allocation_domain)
 #define enable_apic_mode (genapic->enable_apic_mode)
 #define phys_pkg_id (genapic->phys_pkg_id)
diff --git a/trunk/arch/x86/include/asm/mpspec.h b/trunk/arch/x86/include/asm/mpspec.h
index 62d14ce3cd00..91885c28f66b 100644
--- a/trunk/arch/x86/include/asm/mpspec.h
+++ b/trunk/arch/x86/include/asm/mpspec.h
@@ -6,13 +6,13 @@
 #include <asm/mpspec_def.h>
 
 extern int apic_version[MAX_APICS];
-extern int pic_mode;
 
 #ifdef CONFIG_X86_32
 #include <mach_mpspec.h>
 
 extern unsigned int def_to_bigsmp;
 extern u8 apicid_2_node[];
+extern int pic_mode;
 
 #ifdef CONFIG_X86_NUMAQ
 extern int mp_bus_id_to_node[MAX_MP_BUSSES];
diff --git a/trunk/arch/x86/include/asm/mtrr.h b/trunk/arch/x86/include/asm/mtrr.h
index cb988aab716d..7c1e4258b31e 100644
--- a/trunk/arch/x86/include/asm/mtrr.h
+++ b/trunk/arch/x86/include/asm/mtrr.h
@@ -57,31 +57,6 @@ struct mtrr_gentry {
 };
 #endif /* !__i386__ */
 
-struct mtrr_var_range {
-	u32 base_lo;
-	u32 base_hi;
-	u32 mask_lo;
-	u32 mask_hi;
-};
-
-/* In the Intel processor's MTRR interface, the MTRR type is always held in
-   an 8 bit field: */
-typedef u8 mtrr_type;
-
-#define MTRR_NUM_FIXED_RANGES 88
-#define MTRR_MAX_VAR_RANGES 256
-
-struct mtrr_state_type {
-	struct mtrr_var_range var_ranges[MTRR_MAX_VAR_RANGES];
-	mtrr_type fixed_ranges[MTRR_NUM_FIXED_RANGES];
-	unsigned char enabled;
-	unsigned char have_fixed;
-	mtrr_type def_type;
-};
-
-#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg))
-#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
-
 /*  These are the various ioctls  */
 #define MTRRIOC_ADD_ENTRY        _IOW(MTRR_IOCTL_BASE,  0, struct mtrr_sentry)
 #define MTRRIOC_SET_ENTRY        _IOW(MTRR_IOCTL_BASE,  1, struct mtrr_sentry)
diff --git a/trunk/arch/x86/include/asm/numaq/apic.h b/trunk/arch/x86/include/asm/numaq/apic.h
index c80f00d29965..0bf2a06b7a4e 100644
--- a/trunk/arch/x86/include/asm/numaq/apic.h
+++ b/trunk/arch/x86/include/asm/numaq/apic.h
@@ -7,9 +7,9 @@
 
 #define APIC_DFR_VALUE	(APIC_DFR_CLUSTER)
 
-static inline const cpumask_t *target_cpus(void)
+static inline cpumask_t target_cpus(void)
 {
-	return &CPU_MASK_ALL;
+	return CPU_MASK_ALL;
 }
 
 #define NO_BALANCE_IRQ (1)
@@ -122,13 +122,7 @@ static inline void enable_apic_mode(void)
  * We use physical apicids here, not logical, so just return the default
  * physical broadcast to stop people from breaking us
  */
-static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
-{
-	return (int) 0xF;
-}
-
-static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask,
-						  const struct cpumask *andmask)
+static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
 {
 	return (int) 0xF;
 }
diff --git a/trunk/arch/x86/include/asm/numaq/ipi.h b/trunk/arch/x86/include/asm/numaq/ipi.h
index a8374c652778..935588d286cf 100644
--- a/trunk/arch/x86/include/asm/numaq/ipi.h
+++ b/trunk/arch/x86/include/asm/numaq/ipi.h
@@ -1,22 +1,25 @@
 #ifndef __ASM_NUMAQ_IPI_H
 #define __ASM_NUMAQ_IPI_H
 
-void send_IPI_mask_sequence(const struct cpumask *mask, int vector);
-void send_IPI_mask_allbutself(const struct cpumask *mask, int vector);
+void send_IPI_mask_sequence(cpumask_t, int vector);
 
-static inline void send_IPI_mask(const struct cpumask *mask, int vector)
+static inline void send_IPI_mask(cpumask_t mask, int vector)
 {
 	send_IPI_mask_sequence(mask, vector);
 }
 
 static inline void send_IPI_allbutself(int vector)
 {
-	send_IPI_mask_allbutself(cpu_online_mask, vector);
+	cpumask_t mask = cpu_online_map;
+	cpu_clear(smp_processor_id(), mask);
+
+	if (!cpus_empty(mask))
+		send_IPI_mask(mask, vector);
 }
 
 static inline void send_IPI_all(int vector)
 {
-	send_IPI_mask(cpu_online_mask, vector);
+	send_IPI_mask(cpu_online_map, vector);
 }
 
 #endif /* __ASM_NUMAQ_IPI_H */
diff --git a/trunk/arch/x86/include/asm/smp.h b/trunk/arch/x86/include/asm/smp.h
index 830b9fcb6427..d12811ce51d9 100644
--- a/trunk/arch/x86/include/asm/smp.h
+++ b/trunk/arch/x86/include/asm/smp.h
@@ -60,7 +60,7 @@ struct smp_ops {
 	void (*cpu_die)(unsigned int cpu);
 	void (*play_dead)(void);
 
-	void (*send_call_func_ipi)(const struct cpumask *mask);
+	void (*send_call_func_ipi)(cpumask_t mask);
 	void (*send_call_func_single_ipi)(int cpu);
 };
 
@@ -125,7 +125,7 @@ static inline void arch_send_call_function_single_ipi(int cpu)
 
 static inline void arch_send_call_function_ipi(cpumask_t mask)
 {
-	smp_ops.send_call_func_ipi(&mask);
+	smp_ops.send_call_func_ipi(mask);
 }
 
 void cpu_disable_common(void);
@@ -138,7 +138,7 @@ void native_cpu_die(unsigned int cpu);
 void native_play_dead(void);
 void play_dead_common(void);
 
-void native_send_call_func_ipi(const struct cpumask *mask);
+void native_send_call_func_ipi(cpumask_t mask);
 void native_send_call_func_single_ipi(int cpu);
 
 extern void prefill_possible_map(void);
diff --git a/trunk/arch/x86/include/asm/summit/apic.h b/trunk/arch/x86/include/asm/summit/apic.h
index 99327d1be49f..9b3070f1c2ac 100644
--- a/trunk/arch/x86/include/asm/summit/apic.h
+++ b/trunk/arch/x86/include/asm/summit/apic.h
@@ -14,13 +14,13 @@
 
 #define APIC_DFR_VALUE	(APIC_DFR_CLUSTER)
 
-static inline const cpumask_t *target_cpus(void)
+static inline cpumask_t target_cpus(void)
 {
 	/* CPU_MASK_ALL (0xff) has undefined behaviour with
 	 * dest_LowestPrio mode logical clustered apic interrupt routing
 	 * Just start on cpu 0.  IRQ balancing will spread load
 	 */
-	return &cpumask_of_cpu(0);
+	return cpumask_of_cpu(0);
 }
 
 #define INT_DELIVERY_MODE (dest_LowestPrio)
@@ -137,14 +137,14 @@ static inline void enable_apic_mode(void)
 {
 }
 
-static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
+static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
 {
 	int num_bits_set;
 	int cpus_found = 0;
 	int cpu;
 	int apicid;
 
-	num_bits_set = cpus_weight(*cpumask);
+	num_bits_set = cpus_weight(cpumask);
 	/* Return id to all */
 	if (num_bits_set == NR_CPUS)
 		return (int) 0xFF;
@@ -152,10 +152,10 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
 	 * The cpus in the mask must all be on the apic cluster.  If are not
 	 * on the same apicid cluster return default value of TARGET_CPUS.
 	 */
-	cpu = first_cpu(*cpumask);
+	cpu = first_cpu(cpumask);
 	apicid = cpu_to_logical_apicid(cpu);
 	while (cpus_found < num_bits_set) {
-		if (cpu_isset(cpu, *cpumask)) {
+		if (cpu_isset(cpu, cpumask)) {
 			int new_apicid = cpu_to_logical_apicid(cpu);
 			if (apicid_cluster(apicid) !=
 					apicid_cluster(new_apicid)){
@@ -170,49 +170,6 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
 	return apicid;
 }
 
-static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask,
-						  const struct cpumask *andmask)
-{
-	int num_bits_set;
-	int cpus_found = 0;
-	int cpu;
-	int apicid = 0xFF;
-	cpumask_var_t cpumask;
-
-	if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
-		return (int) 0xFF;
-
-	cpumask_and(cpumask, inmask, andmask);
-	cpumask_and(cpumask, cpumask, cpu_online_mask);
-
-	num_bits_set = cpumask_weight(cpumask);
-	/* Return id to all */
-	if (num_bits_set == nr_cpu_ids)
-		goto exit;
-	/*
-	 * The cpus in the mask must all be on the apic cluster.  If are not
-	 * on the same apicid cluster return default value of TARGET_CPUS.
-	 */
-	cpu = cpumask_first(cpumask);
-	apicid = cpu_to_logical_apicid(cpu);
-	while (cpus_found < num_bits_set) {
-		if (cpumask_test_cpu(cpu, cpumask)) {
-			int new_apicid = cpu_to_logical_apicid(cpu);
-			if (apicid_cluster(apicid) !=
-					apicid_cluster(new_apicid)){
-				printk ("%s: Not a valid mask!\n", __func__);
-				return 0xFF;
-			}
-			apicid = apicid | new_apicid;
-			cpus_found++;
-		}
-		cpu++;
-	}
-exit:
-	free_cpumask_var(cpumask);
-	return apicid;
-}
-
 /* cpuid returns the value latched in the HW at reset, not the APIC ID
  * register's value.  For any box whose BIOS changes APIC IDs, like
  * clustered APIC systems, we must use hard_smp_processor_id.
diff --git a/trunk/arch/x86/include/asm/summit/ipi.h b/trunk/arch/x86/include/asm/summit/ipi.h
index a8a2c24f50cc..53bd1e7bd7b4 100644
--- a/trunk/arch/x86/include/asm/summit/ipi.h
+++ b/trunk/arch/x86/include/asm/summit/ipi.h
@@ -1,10 +1,9 @@
 #ifndef __ASM_SUMMIT_IPI_H
 #define __ASM_SUMMIT_IPI_H
 
-void send_IPI_mask_sequence(const cpumask_t *mask, int vector);
-void send_IPI_mask_allbutself(const cpumask_t *mask, int vector);
+void send_IPI_mask_sequence(cpumask_t mask, int vector);
 
-static inline void send_IPI_mask(const cpumask_t *mask, int vector)
+static inline void send_IPI_mask(cpumask_t mask, int vector)
 {
 	send_IPI_mask_sequence(mask, vector);
 }
@@ -15,12 +14,12 @@ static inline void send_IPI_allbutself(int vector)
 	cpu_clear(smp_processor_id(), mask);
 
 	if (!cpus_empty(mask))
-		send_IPI_mask(&mask, vector);
+		send_IPI_mask(mask, vector);
 }
 
 static inline void send_IPI_all(int vector)
 {
-	send_IPI_mask(&cpu_online_map, vector);
+	send_IPI_mask(cpu_online_map, vector);
 }
 
 #endif /* __ASM_SUMMIT_IPI_H */
diff --git a/trunk/arch/x86/include/asm/sys_ia32.h b/trunk/arch/x86/include/asm/sys_ia32.h
deleted file mode 100644
index ffb08be2a530..000000000000
--- a/trunk/arch/x86/include/asm/sys_ia32.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * sys_ia32.h - Linux ia32 syscall interfaces
- *
- * Copyright (c) 2008 Jaswinder Singh Rajput
- *
- * This file is released under the GPLv2.
- * See the file COPYING for more details.
- */
-
-#ifndef _ASM_X86_SYS_IA32_H
-#define _ASM_X86_SYS_IA32_H
-
-#include <linux/compiler.h>
-#include <linux/linkage.h>
-#include <linux/types.h>
-#include <linux/signal.h>
-#include <asm/compat.h>
-#include <asm/ia32.h>
-
-/* ia32/sys_ia32.c */
-asmlinkage long sys32_truncate64(char __user *, unsigned long, unsigned long);
-asmlinkage long sys32_ftruncate64(unsigned int, unsigned long, unsigned long);
-
-asmlinkage long sys32_stat64(char __user *, struct stat64 __user *);
-asmlinkage long sys32_lstat64(char __user *, struct stat64 __user *);
-asmlinkage long sys32_fstat64(unsigned int, struct stat64 __user *);
-asmlinkage long sys32_fstatat(unsigned int, char __user *,
-			      struct stat64 __user *, int);
-struct mmap_arg_struct;
-asmlinkage long sys32_mmap(struct mmap_arg_struct __user *);
-asmlinkage long sys32_mprotect(unsigned long, size_t, unsigned long);
-
-asmlinkage long sys32_pipe(int __user *);
-struct sigaction32;
-struct old_sigaction32;
-asmlinkage long sys32_rt_sigaction(int, struct sigaction32 __user *,
-				   struct sigaction32 __user *, unsigned int);
-asmlinkage long sys32_sigaction(int, struct old_sigaction32 __user *,
-				struct old_sigaction32 __user *);
-asmlinkage long sys32_rt_sigprocmask(int, compat_sigset_t __user *,
-				     compat_sigset_t __user *, unsigned int);
-asmlinkage long sys32_alarm(unsigned int);
-
-struct sel_arg_struct;
-asmlinkage long sys32_old_select(struct sel_arg_struct __user *);
-asmlinkage long sys32_waitpid(compat_pid_t, unsigned int *, int);
-asmlinkage long sys32_sysfs(int, u32, u32);
-
-asmlinkage long sys32_sched_rr_get_interval(compat_pid_t,
-					    struct compat_timespec __user *);
-asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *, compat_size_t);
-asmlinkage long sys32_rt_sigqueueinfo(int, int, compat_siginfo_t __user *);
-
-#ifdef CONFIG_SYSCTL_SYSCALL
-struct sysctl_ia32;
-asmlinkage long sys32_sysctl(struct sysctl_ia32 __user *);
-#endif
-
-asmlinkage long sys32_pread(unsigned int, char __user *, u32, u32, u32);
-asmlinkage long sys32_pwrite(unsigned int, char __user *, u32, u32, u32);
-
-asmlinkage long sys32_personality(unsigned long);
-asmlinkage long sys32_sendfile(int, int, compat_off_t __user *, s32);
-
-asmlinkage long sys32_mmap2(unsigned long, unsigned long, unsigned long,
-			    unsigned long, unsigned long, unsigned long);
-
-struct oldold_utsname;
-struct old_utsname;
-asmlinkage long sys32_olduname(struct oldold_utsname __user *);
-long sys32_uname(struct old_utsname __user *);
-
-long sys32_ustat(unsigned, struct ustat32 __user *);
-
-asmlinkage long sys32_execve(char __user *, compat_uptr_t __user *,
-			     compat_uptr_t __user *, struct pt_regs *);
-asmlinkage long sys32_clone(unsigned int, unsigned int, struct pt_regs *);
-
-long sys32_lseek(unsigned int, int, unsigned int);
-long sys32_kill(int, int);
-long sys32_fadvise64_64(int, __u32, __u32, __u32, __u32, int);
-long sys32_vm86_warning(void);
-long sys32_lookup_dcookie(u32, u32, char __user *, size_t);
-
-asmlinkage ssize_t sys32_readahead(int, unsigned, unsigned, size_t);
-asmlinkage long sys32_sync_file_range(int, unsigned, unsigned,
-				      unsigned, unsigned, int);
-asmlinkage long sys32_fadvise64(int, unsigned, unsigned, size_t, int);
-asmlinkage long sys32_fallocate(int, int, unsigned,
-				unsigned, unsigned, unsigned);
-
-/* ia32/ia32_signal.c */
-asmlinkage long sys32_sigsuspend(int, int, old_sigset_t);
-asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *,
-				  stack_ia32_t __user *, struct pt_regs *);
-asmlinkage long sys32_sigreturn(struct pt_regs *);
-asmlinkage long sys32_rt_sigreturn(struct pt_regs *);
-
-/* ia32/ipc32.c */
-asmlinkage long sys32_ipc(u32, int, int, int, compat_uptr_t, u32);
-#endif /* _ASM_X86_SYS_IA32_H */
diff --git a/trunk/arch/x86/include/asm/topology.h b/trunk/arch/x86/include/asm/topology.h
index 79e31e9dcdda..ff386ff50ed7 100644
--- a/trunk/arch/x86/include/asm/topology.h
+++ b/trunk/arch/x86/include/asm/topology.h
@@ -226,8 +226,6 @@ extern cpumask_t cpu_coregroup_map(int cpu);
 #define topology_core_id(cpu)			(cpu_data(cpu).cpu_core_id)
 #define topology_core_siblings(cpu)		(per_cpu(cpu_core_map, cpu))
 #define topology_thread_siblings(cpu)		(per_cpu(cpu_sibling_map, cpu))
-#define topology_core_cpumask(cpu)		(&per_cpu(cpu_core_map, cpu))
-#define topology_thread_cpumask(cpu)		(&per_cpu(cpu_sibling_map, cpu))
 
 /* indicates that pointers to the topology cpumask_t maps are valid */
 #define arch_provides_topology_pointers		yes
diff --git a/trunk/arch/x86/include/asm/uv/uv_bau.h b/trunk/arch/x86/include/asm/uv/uv_bau.h
index 50423c7b56b2..e2363253bbbf 100644
--- a/trunk/arch/x86/include/asm/uv/uv_bau.h
+++ b/trunk/arch/x86/include/asm/uv/uv_bau.h
@@ -133,61 +133,61 @@ struct bau_msg_payload {
  * see table 4.2.3.0.1 in broacast_assist spec.
  */
 struct bau_msg_header {
-	unsigned int dest_subnodeid:6;	/* must be zero */
+	int dest_subnodeid:6;	/* must be zero */
 	/* bits 5:0 */
-	unsigned int base_dest_nodeid:15; /* nasid>>1 (pnode) of */
-	/* bits 20:6 */			  /* first bit in node_map */
-	unsigned int command:8;	/* message type */
+	int base_dest_nodeid:15; /* nasid>>1 (pnode) of first bit in node_map */
+	/* bits 20:6 */
+	int command:8;		/* message type */
 	/* bits 28:21 */
 				/* 0x38: SN3net EndPoint Message */
-	unsigned int rsvd_1:3;	/* must be zero */
+	int rsvd_1:3;		/* must be zero */
 	/* bits 31:29 */
 				/* int will align on 32 bits */
-	unsigned int rsvd_2:9;	/* must be zero */
+	int rsvd_2:9;		/* must be zero */
 	/* bits 40:32 */
 				/* Suppl_A is 56-41 */
-	unsigned int payload_2a:8;/* becomes byte 16 of msg */
+	int payload_2a:8;	/* becomes byte 16 of msg */
 	/* bits 48:41 */	/* not currently using */
-	unsigned int payload_2b:8;/* becomes byte 17 of msg */
+	int payload_2b:8;	/* becomes byte 17 of msg */
 	/* bits 56:49 */	/* not currently using */
 				/* Address field (96:57) is never used as an
 				   address (these are address bits 42:3) */
-	unsigned int rsvd_3:1;	/* must be zero */
+	int rsvd_3:1;		/* must be zero */
 	/* bit 57 */
 				/* address bits 27:4 are payload */
 				/* these 24 bits become bytes 12-14 of msg */
-	unsigned int replied_to:1;/* sent as 0 by the source to byte 12 */
+	int replied_to:1;	/* sent as 0 by the source to byte 12 */
 	/* bit 58 */
 
-	unsigned int payload_1a:5;/* not currently used */
+	int payload_1a:5;	/* not currently used */
 	/* bits 63:59 */
-	unsigned int payload_1b:8;/* not currently used */
+	int payload_1b:8;	/* not currently used */
 	/* bits 71:64 */
-	unsigned int payload_1c:8;/* not currently used */
+	int payload_1c:8;	/* not currently used */
 	/* bits 79:72 */
-	unsigned int payload_1d:2;/* not currently used */
+	int payload_1d:2;	/* not currently used */
 	/* bits 81:80 */
 
-	unsigned int rsvd_4:7;	/* must be zero */
+	int rsvd_4:7;		/* must be zero */
 	/* bits 88:82 */
-	unsigned int sw_ack_flag:1;/* software acknowledge flag */
+	int sw_ack_flag:1;	/* software acknowledge flag */
 	/* bit 89 */
 				/* INTD trasactions at destination are to
 				   wait for software acknowledge */
-	unsigned int rsvd_5:6;	/* must be zero */
+	int rsvd_5:6;		/* must be zero */
 	/* bits 95:90 */
-	unsigned int rsvd_6:5;	/* must be zero */
+	int rsvd_6:5;		/* must be zero */
 	/* bits 100:96 */
-	unsigned int int_both:1;/* if 1, interrupt both sockets on the blade */
+	int int_both:1;		/* if 1, interrupt both sockets on the blade */
 	/* bit 101*/
-	unsigned int fairness:3;/* usually zero */
+	int fairness:3;		/* usually zero */
 	/* bits 104:102 */
-	unsigned int multilevel:1;	/* multi-level multicast format */
+	int multilevel:1;	/* multi-level multicast format */
 	/* bit 105 */
 				/* 0 for TLB: endpoint multi-unicast messages */
-	unsigned int chaining:1;/* next descriptor is part of this activation*/
+	int chaining:1;		/* next descriptor is part of this activation*/
 	/* bit 106 */
-	unsigned int rsvd_7:21;	/* must be zero */
+	int rsvd_7:21;		/* must be zero */
 	/* bits 127:107 */
 };
 
diff --git a/trunk/arch/x86/include/asm/virtext.h b/trunk/arch/x86/include/asm/virtext.h
deleted file mode 100644
index 593636275238..000000000000
--- a/trunk/arch/x86/include/asm/virtext.h
+++ /dev/null
@@ -1,132 +0,0 @@
-/* CPU virtualization extensions handling
- *
- * This should carry the code for handling CPU virtualization extensions
- * that needs to live in the kernel core.
- *
- * Author: Eduardo Habkost <ehabkost@redhat.com>
- *
- * Copyright (C) 2008, Red Hat Inc.
- *
- * Contains code from KVM, Copyright (C) 2006 Qumranet, Inc.
- *
- * This work is licensed under the terms of the GNU GPL, version 2.  See
- * the COPYING file in the top-level directory.
- */
-#ifndef _ASM_X86_VIRTEX_H
-#define _ASM_X86_VIRTEX_H
-
-#include <asm/processor.h>
-#include <asm/system.h>
-
-#include <asm/vmx.h>
-#include <asm/svm.h>
-
-/*
- * VMX functions:
- */
-
-static inline int cpu_has_vmx(void)
-{
-	unsigned long ecx = cpuid_ecx(1);
-	return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */
-}
-
-
-/** Disable VMX on the current CPU
- *
- * vmxoff causes a undefined-opcode exception if vmxon was not run
- * on the CPU previously. Only call this function if you know VMX
- * is enabled.
- */
-static inline void cpu_vmxoff(void)
-{
-	asm volatile (ASM_VMX_VMXOFF : : : "cc");
-	write_cr4(read_cr4() & ~X86_CR4_VMXE);
-}
-
-static inline int cpu_vmx_enabled(void)
-{
-	return read_cr4() & X86_CR4_VMXE;
-}
-
-/** Disable VMX if it is enabled on the current CPU
- *
- * You shouldn't call this if cpu_has_vmx() returns 0.
- */
-static inline void __cpu_emergency_vmxoff(void)
-{
-	if (cpu_vmx_enabled())
-		cpu_vmxoff();
-}
-
-/** Disable VMX if it is supported and enabled on the current CPU
- */
-static inline void cpu_emergency_vmxoff(void)
-{
-	if (cpu_has_vmx())
-		__cpu_emergency_vmxoff();
-}
-
-
-
-
-/*
- * SVM functions:
- */
-
-/** Check if the CPU has SVM support
- *
- * You can use the 'msg' arg to get a message describing the problem,
- * if the function returns zero. Simply pass NULL if you are not interested
- * on the messages; gcc should take care of not generating code for
- * the messages on this case.
- */
-static inline int cpu_has_svm(const char **msg)
-{
-	uint32_t eax, ebx, ecx, edx;
-
-	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
-		if (msg)
-			*msg = "not amd";
-		return 0;
-	}
-
-	cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
-	if (eax < SVM_CPUID_FUNC) {
-		if (msg)
-			*msg = "can't execute cpuid_8000000a";
-		return 0;
-	}
-
-	cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
-	if (!(ecx & (1 << SVM_CPUID_FEATURE_SHIFT))) {
-		if (msg)
-			*msg = "svm not available";
-		return 0;
-	}
-	return 1;
-}
-
-
-/** Disable SVM on the current CPU
- *
- * You should call this only if cpu_has_svm() returned true.
- */
-static inline void cpu_svm_disable(void)
-{
-	uint64_t efer;
-
-	wrmsrl(MSR_VM_HSAVE_PA, 0);
-	rdmsrl(MSR_EFER, efer);
-	wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK);
-}
-
-/** Makes sure SVM is disabled, if it is supported on the CPU
- */
-static inline void cpu_emergency_svm_disable(void)
-{
-	if (cpu_has_svm(NULL))
-		cpu_svm_disable();
-}
-
-#endif /* _ASM_X86_VIRTEX_H */
diff --git a/trunk/arch/x86/kernel/amd_iommu.c b/trunk/arch/x86/kernel/amd_iommu.c
index 658e29e0f49b..2e2da717b350 100644
--- a/trunk/arch/x86/kernel/amd_iommu.c
+++ b/trunk/arch/x86/kernel/amd_iommu.c
@@ -1296,7 +1296,7 @@ static int amd_iommu_dma_supported(struct device *dev, u64 mask)
  * we don't need to preallocate the protection domains anymore.
  * For now we have to.
  */
-static void prealloc_protection_domains(void)
+void prealloc_protection_domains(void)
 {
 	struct pci_dev *dev = NULL;
 	struct dma_ops_domain *dma_dom;
diff --git a/trunk/arch/x86/kernel/amd_iommu_init.c b/trunk/arch/x86/kernel/amd_iommu_init.c
index fb85e8d466cc..c625800c55ca 100644
--- a/trunk/arch/x86/kernel/amd_iommu_init.c
+++ b/trunk/arch/x86/kernel/amd_iommu_init.c
@@ -243,7 +243,7 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
 }
 
 /* Function to enable the hardware */
-static void __init iommu_enable(struct amd_iommu *iommu)
+void __init iommu_enable(struct amd_iommu *iommu)
 {
 	printk(KERN_INFO "AMD IOMMU: Enabling IOMMU "
 	       "at %02x:%02x.%x cap 0x%hx\n",
@@ -256,7 +256,7 @@ static void __init iommu_enable(struct amd_iommu *iommu)
 }
 
 /* Function to enable IOMMU event logging and event interrupts */
-static void __init iommu_enable_event_logging(struct amd_iommu *iommu)
+void __init iommu_enable_event_logging(struct amd_iommu *iommu)
 {
 	iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
 	iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
diff --git a/trunk/arch/x86/kernel/apic.c b/trunk/arch/x86/kernel/apic.c
index d652515e2855..b5229affb953 100644
--- a/trunk/arch/x86/kernel/apic.c
+++ b/trunk/arch/x86/kernel/apic.c
@@ -98,8 +98,8 @@ __setup("apicpmtimer", setup_apicpmtimer);
 #ifdef HAVE_X2APIC
 int x2apic;
 /* x2apic enabled before OS handover */
-static int x2apic_preenabled;
-static int disable_x2apic;
+int x2apic_preenabled;
+int disable_x2apic;
 static __init int setup_nox2apic(char *str)
 {
 	disable_x2apic = 1;
@@ -119,6 +119,8 @@ EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
 
 int first_system_vector = 0xfe;
 
+char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
+
 /*
  * Debug level, exported for io_apic.c
  */
@@ -140,7 +142,7 @@ static int lapic_next_event(unsigned long delta,
 			    struct clock_event_device *evt);
 static void lapic_timer_setup(enum clock_event_mode mode,
 			      struct clock_event_device *evt);
-static void lapic_timer_broadcast(const cpumask_t *mask);
+static void lapic_timer_broadcast(cpumask_t mask);
 static void apic_pm_activate(void);
 
 /*
@@ -226,7 +228,7 @@ void xapic_icr_write(u32 low, u32 id)
 	apic_write(APIC_ICR, low);
 }
 
-static u64 xapic_icr_read(void)
+u64 xapic_icr_read(void)
 {
 	u32 icr1, icr2;
 
@@ -266,7 +268,7 @@ void x2apic_icr_write(u32 low, u32 id)
 	wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
 }
 
-static u64 x2apic_icr_read(void)
+u64 x2apic_icr_read(void)
 {
 	unsigned long val;
 
@@ -453,7 +455,7 @@ static void lapic_timer_setup(enum clock_event_mode mode,
 /*
  * Local APIC timer broadcast function
  */
-static void lapic_timer_broadcast(const cpumask_t *mask)
+static void lapic_timer_broadcast(cpumask_t mask)
 {
 #ifdef CONFIG_SMP
 	send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
@@ -469,7 +471,7 @@ static void __cpuinit setup_APIC_timer(void)
 	struct clock_event_device *levt = &__get_cpu_var(lapic_events);
 
 	memcpy(levt, &lapic_clockevent, sizeof(*levt));
-	levt->cpumask = cpumask_of(smp_processor_id());
+	levt->cpumask = cpumask_of_cpu(smp_processor_id());
 
 	clockevents_register_device(levt);
 }
@@ -1805,32 +1807,28 @@ void disconnect_bsp_APIC(int virt_wire_setup)
 void __cpuinit generic_processor_info(int apicid, int version)
 {
 	int cpu;
+	cpumask_t tmp_map;
 
 	/*
 	 * Validate version
 	 */
 	if (version == 0x0) {
 		pr_warning("BIOS bug, APIC version is 0 for CPU#%d! "
-			   "fixing up to 0x10. (tell your hw vendor)\n",
-				version);
+			"fixing up to 0x10. (tell your hw vendor)\n",
+			version);
 		version = 0x10;
 	}
 	apic_version[apicid] = version;
 
-	if (num_processors >= nr_cpu_ids) {
-		int max = nr_cpu_ids;
-		int thiscpu = max + disabled_cpus;
-
-		pr_warning(
-			"ACPI: NR_CPUS/possible_cpus limit of %i reached."
-			"  Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
-
-		disabled_cpus++;
+	if (num_processors >= NR_CPUS) {
+		pr_warning("WARNING: NR_CPUS limit of %i reached."
+			"  Processor ignored.\n", NR_CPUS);
 		return;
 	}
 
 	num_processors++;
-	cpu = cpumask_next_zero(-1, cpu_present_mask);
+	cpus_complement(tmp_map, cpu_present_map);
+	cpu = first_cpu(tmp_map);
 
 	physid_set(apicid, phys_cpu_present_map);
 	if (apicid == boot_cpu_physical_apicid) {
@@ -1880,8 +1878,8 @@ void __cpuinit generic_processor_info(int apicid, int version)
 	}
 #endif
 
-	set_cpu_possible(cpu, true);
-	set_cpu_present(cpu, true);
+	cpu_set(cpu, cpu_possible_map);
+	cpu_set(cpu, cpu_present_map);
 }
 
 #ifdef CONFIG_X86_64
@@ -2083,7 +2081,7 @@ __cpuinit int apic_is_clustered_box(void)
 	bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
 	bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
 
-	for (i = 0; i < nr_cpu_ids; i++) {
+	for (i = 0; i < NR_CPUS; i++) {
 		/* are we being called early in kernel startup? */
 		if (bios_cpu_apicid) {
 			id = bios_cpu_apicid[i];
diff --git a/trunk/arch/x86/kernel/bios_uv.c b/trunk/arch/x86/kernel/bios_uv.c
index f63882728d91..2a0a2a3cac26 100644
--- a/trunk/arch/x86/kernel/bios_uv.c
+++ b/trunk/arch/x86/kernel/bios_uv.c
@@ -25,7 +25,7 @@
 #include <asm/uv/bios.h>
 #include <asm/uv/uv_hub.h>
 
-static struct uv_systab uv_systab;
+struct uv_systab uv_systab;
 
 s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
 {
diff --git a/trunk/arch/x86/kernel/cpu/intel_cacheinfo.c b/trunk/arch/x86/kernel/cpu/intel_cacheinfo.c
index c6ecda64f5f1..68b5d8681cbb 100644
--- a/trunk/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/trunk/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -534,16 +534,31 @@ static void __cpuinit free_cache_attributes(unsigned int cpu)
 	per_cpu(cpuid4_info, cpu) = NULL;
 }
 
-static void get_cpu_leaves(void *_retval)
+static int __cpuinit detect_cache_attributes(unsigned int cpu)
 {
-	int j, *retval = _retval, cpu = smp_processor_id();
+	struct _cpuid4_info	*this_leaf;
+	unsigned long		j;
+	int			retval;
+	cpumask_t		oldmask;
+
+	if (num_cache_leaves == 0)
+		return -ENOENT;
+
+	per_cpu(cpuid4_info, cpu) = kzalloc(
+	    sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
+	if (per_cpu(cpuid4_info, cpu) == NULL)
+		return -ENOMEM;
+
+	oldmask = current->cpus_allowed;
+	retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
+	if (retval)
+		goto out;
 
 	/* Do cpuid and store the results */
 	for (j = 0; j < num_cache_leaves; j++) {
-		struct _cpuid4_info *this_leaf;
 		this_leaf = CPUID4_INFO_IDX(cpu, j);
-		*retval = cpuid4_cache_lookup(j, this_leaf);
-		if (unlikely(*retval < 0)) {
+		retval = cpuid4_cache_lookup(j, this_leaf);
+		if (unlikely(retval < 0)) {
 			int i;
 
 			for (i = 0; i < j; i++)
@@ -552,21 +567,9 @@ static void get_cpu_leaves(void *_retval)
 		}
 		cache_shared_cpu_map_setup(cpu, j);
 	}
-}
-
-static int __cpuinit detect_cache_attributes(unsigned int cpu)
-{
-	int			retval;
-
-	if (num_cache_leaves == 0)
-		return -ENOENT;
-
-	per_cpu(cpuid4_info, cpu) = kzalloc(
-	    sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
-	if (per_cpu(cpuid4_info, cpu) == NULL)
-		return -ENOMEM;
+	set_cpus_allowed_ptr(current, &oldmask);
 
-	smp_call_function_single(cpu, get_cpu_leaves, &retval, true);
+out:
 	if (retval) {
 		kfree(per_cpu(cpuid4_info, cpu));
 		per_cpu(cpuid4_info, cpu) = NULL;
@@ -623,8 +626,8 @@ static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
 		cpumask_t *mask = &this_leaf->shared_cpu_map;
 
 		n = type?
-			cpulist_scnprintf(buf, len-2, mask) :
-			cpumask_scnprintf(buf, len-2, mask);
+			cpulist_scnprintf(buf, len-2, *mask):
+			cpumask_scnprintf(buf, len-2, *mask);
 		buf[n++] = '\n';
 		buf[n] = '\0';
 	}
diff --git a/trunk/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/trunk/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index a5a5e0530370..748c8f9e7a05 100644
--- a/trunk/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/trunk/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -83,41 +83,34 @@ static DEFINE_PER_CPU(unsigned char, bank_map);	/* see which banks are on */
  * CPU Initialization
  */
 
-struct thresh_restart {
-	struct threshold_block *b;
-	int reset;
-	u16 old_limit;
-};
-
 /* must be called with correct cpu affinity */
-static long threshold_restart_bank(void *_tr)
+static void threshold_restart_bank(struct threshold_block *b,
+				   int reset, u16 old_limit)
 {
-	struct thresh_restart *tr = _tr;
 	u32 mci_misc_hi, mci_misc_lo;
 
-	rdmsr(tr->b->address, mci_misc_lo, mci_misc_hi);
+	rdmsr(b->address, mci_misc_lo, mci_misc_hi);
 
-	if (tr->b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX))
-		tr->reset = 1;	/* limit cannot be lower than err count */
+	if (b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX))
+		reset = 1;	/* limit cannot be lower than err count */
 
-	if (tr->reset) {		/* reset err count and overflow bit */
+	if (reset) {		/* reset err count and overflow bit */
 		mci_misc_hi =
 		    (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
-		    (THRESHOLD_MAX - tr->b->threshold_limit);
-	} else if (tr->old_limit) {	/* change limit w/o reset */
+		    (THRESHOLD_MAX - b->threshold_limit);
+	} else if (old_limit) {	/* change limit w/o reset */
 		int new_count = (mci_misc_hi & THRESHOLD_MAX) +
-		    (tr->old_limit - tr->b->threshold_limit);
+		    (old_limit - b->threshold_limit);
 		mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) |
 		    (new_count & THRESHOLD_MAX);
 	}
 
-	tr->b->interrupt_enable ?
+	b->interrupt_enable ?
 	    (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) :
 	    (mci_misc_hi &= ~MASK_INT_TYPE_HI);
 
 	mci_misc_hi |= MASK_COUNT_EN_HI;
-	wrmsr(tr->b->address, mci_misc_lo, mci_misc_hi);
-	return 0;
+	wrmsr(b->address, mci_misc_lo, mci_misc_hi);
 }
 
 /* cpu init entry point, called from mce.c with preempt off */
@@ -127,7 +120,6 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c)
 	unsigned int cpu = smp_processor_id();
 	u8 lvt_off;
 	u32 low = 0, high = 0, address = 0;
-	struct thresh_restart tr;
 
 	for (bank = 0; bank < NR_BANKS; ++bank) {
 		for (block = 0; block < NR_BLOCKS; ++block) {
@@ -170,10 +162,7 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c)
 			wrmsr(address, low, high);
 
 			threshold_defaults.address = address;
-			tr.b = &threshold_defaults;
-			tr.reset = 0;
-			tr.old_limit = 0;
-			threshold_restart_bank(&tr);
+			threshold_restart_bank(&threshold_defaults, 0, 0);
 		}
 	}
 }
@@ -262,6 +251,20 @@ struct threshold_attr {
 	ssize_t(*store) (struct threshold_block *, const char *, size_t count);
 };
 
+static void affinity_set(unsigned int cpu, cpumask_t *oldmask,
+					   cpumask_t *newmask)
+{
+	*oldmask = current->cpus_allowed;
+	cpus_clear(*newmask);
+	cpu_set(cpu, *newmask);
+	set_cpus_allowed_ptr(current, newmask);
+}
+
+static void affinity_restore(const cpumask_t *oldmask)
+{
+	set_cpus_allowed_ptr(current, oldmask);
+}
+
 #define SHOW_FIELDS(name)                                           \
 static ssize_t show_ ## name(struct threshold_block * b, char *buf) \
 {                                                                   \
@@ -274,16 +277,15 @@ static ssize_t store_interrupt_enable(struct threshold_block *b,
 				      const char *buf, size_t count)
 {
 	char *end;
-	struct thresh_restart tr;
+	cpumask_t oldmask, newmask;
 	unsigned long new = simple_strtoul(buf, &end, 0);
 	if (end == buf)
 		return -EINVAL;
 	b->interrupt_enable = !!new;
 
-	tr.b = b;
-	tr.reset = 0;
-	tr.old_limit = 0;
-	work_on_cpu(b->cpu, threshold_restart_bank, &tr);
+	affinity_set(b->cpu, &oldmask, &newmask);
+	threshold_restart_bank(b, 0, 0);
+	affinity_restore(&oldmask);
 
 	return end - buf;
 }
@@ -292,7 +294,8 @@ static ssize_t store_threshold_limit(struct threshold_block *b,
 				     const char *buf, size_t count)
 {
 	char *end;
-	struct thresh_restart tr;
+	cpumask_t oldmask, newmask;
+	u16 old;
 	unsigned long new = simple_strtoul(buf, &end, 0);
 	if (end == buf)
 		return -EINVAL;
@@ -300,36 +303,34 @@ static ssize_t store_threshold_limit(struct threshold_block *b,
 		new = THRESHOLD_MAX;
 	if (new < 1)
 		new = 1;
-	tr.old_limit = b->threshold_limit;
+	old = b->threshold_limit;
 	b->threshold_limit = new;
-	tr.b = b;
-	tr.reset = 0;
 
-	work_on_cpu(b->cpu, threshold_restart_bank, &tr);
+	affinity_set(b->cpu, &oldmask, &newmask);
+	threshold_restart_bank(b, 0, old);
+	affinity_restore(&oldmask);
 
 	return end - buf;
 }
 
-static long local_error_count(void *_b)
-{
-	struct threshold_block *b = _b;
-	u32 low, high;
-
-	rdmsr(b->address, low, high);
-	return (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit);
-}
-
 static ssize_t show_error_count(struct threshold_block *b, char *buf)
 {
-	return sprintf(buf, "%lx\n", work_on_cpu(b->cpu, local_error_count, b));
+	u32 high, low;
+	cpumask_t oldmask, newmask;
+	affinity_set(b->cpu, &oldmask, &newmask);
+	rdmsr(b->address, low, high);
+	affinity_restore(&oldmask);
+	return sprintf(buf, "%x\n",
+		       (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit));
 }
 
 static ssize_t store_error_count(struct threshold_block *b,
 				 const char *buf, size_t count)
 {
-	struct thresh_restart tr = { .b = b, .reset = 1, .old_limit = 0 };
-
-	work_on_cpu(b->cpu, threshold_restart_bank, &tr);
+	cpumask_t oldmask, newmask;
+	affinity_set(b->cpu, &oldmask, &newmask);
+	threshold_restart_bank(b, 1, 0);
+	affinity_restore(&oldmask);
 	return 1;
 }
 
@@ -462,19 +463,12 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu,
 	return err;
 }
 
-static long local_allocate_threshold_blocks(void *_bank)
-{
-	unsigned int *bank = _bank;
-
-	return allocate_threshold_blocks(smp_processor_id(), *bank, 0,
-					 MSR_IA32_MC0_MISC + *bank * 4);
-}
-
 /* symlinks sibling shared banks to first core.  first core owns dir/files. */
 static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
 {
 	int i, err = 0;
 	struct threshold_bank *b = NULL;
+	cpumask_t oldmask, newmask;
 	char name[32];
 
 	sprintf(name, "threshold_bank%i", bank);
@@ -525,7 +519,11 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
 
 	per_cpu(threshold_banks, cpu)[bank] = b;
 
-	err = work_on_cpu(cpu, local_allocate_threshold_blocks, &bank);
+	affinity_set(cpu, &oldmask, &newmask);
+	err = allocate_threshold_blocks(cpu, bank, 0,
+					MSR_IA32_MC0_MISC + bank * 4);
+	affinity_restore(&oldmask);
+
 	if (err)
 		goto out_free;
 
diff --git a/trunk/arch/x86/kernel/cpu/mtrr/generic.c b/trunk/arch/x86/kernel/cpu/mtrr/generic.c
index b59ddcc88cd8..4e8d77f01eeb 100644
--- a/trunk/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/trunk/arch/x86/kernel/cpu/mtrr/generic.c
@@ -14,6 +14,14 @@
 #include <asm/pat.h>
 #include "mtrr.h"
 
+struct mtrr_state {
+	struct mtrr_var_range var_ranges[MAX_VAR_RANGES];
+	mtrr_type fixed_ranges[NUM_FIXED_RANGES];
+	unsigned char enabled;
+	unsigned char have_fixed;
+	mtrr_type def_type;
+};
+
 struct fixed_range_block {
 	int base_msr; /* start address of an MTRR block */
 	int ranges;   /* number of MTRRs in this block  */
@@ -27,12 +35,10 @@ static struct fixed_range_block fixed_range_blocks[] = {
 };
 
 static unsigned long smp_changes_mask;
+static struct mtrr_state mtrr_state = {};
 static int mtrr_state_set;
 u64 mtrr_tom2;
 
-struct mtrr_state_type mtrr_state = {};
-EXPORT_SYMBOL_GPL(mtrr_state);
-
 #undef MODULE_PARAM_PREFIX
 #define MODULE_PARAM_PREFIX "mtrr."
 
diff --git a/trunk/arch/x86/kernel/cpu/mtrr/main.c b/trunk/arch/x86/kernel/cpu/mtrr/main.c
index d259e5d2e054..1159e269e596 100644
--- a/trunk/arch/x86/kernel/cpu/mtrr/main.c
+++ b/trunk/arch/x86/kernel/cpu/mtrr/main.c
@@ -49,7 +49,7 @@
 
 u32 num_var_ranges = 0;
 
-unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
+unsigned int mtrr_usage_table[MAX_VAR_RANGES];
 static DEFINE_MUTEX(mtrr_mutex);
 
 u64 size_or_mask, size_and_mask;
@@ -574,7 +574,7 @@ struct mtrr_value {
 	unsigned long	lsize;
 };
 
-static struct mtrr_value mtrr_state[MTRR_MAX_VAR_RANGES];
+static struct mtrr_value mtrr_state[MAX_VAR_RANGES];
 
 static int mtrr_save(struct sys_device * sysdev, pm_message_t state)
 {
@@ -824,14 +824,16 @@ static int enable_mtrr_cleanup __initdata =
 
 static int __init disable_mtrr_cleanup_setup(char *str)
 {
-	enable_mtrr_cleanup = 0;
+	if (enable_mtrr_cleanup != -1)
+		enable_mtrr_cleanup = 0;
 	return 0;
 }
 early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
 
 static int __init enable_mtrr_cleanup_setup(char *str)
 {
-	enable_mtrr_cleanup = 1;
+	if (enable_mtrr_cleanup != -1)
+		enable_mtrr_cleanup = 1;
 	return 0;
 }
 early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup);
diff --git a/trunk/arch/x86/kernel/cpu/mtrr/mtrr.h b/trunk/arch/x86/kernel/cpu/mtrr/mtrr.h
index ffd60409cc6d..2dc4ec656b23 100644
--- a/trunk/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/trunk/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -8,6 +8,11 @@
 #define MTRRcap_MSR     0x0fe
 #define MTRRdefType_MSR 0x2ff
 
+#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg))
+#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
+
+#define NUM_FIXED_RANGES 88
+#define MAX_VAR_RANGES 256
 #define MTRRfix64K_00000_MSR 0x250
 #define MTRRfix16K_80000_MSR 0x258
 #define MTRRfix16K_A0000_MSR 0x259
@@ -24,7 +29,11 @@
 #define MTRR_CHANGE_MASK_VARIABLE  0x02
 #define MTRR_CHANGE_MASK_DEFTYPE   0x04
 
-extern unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
+/* In the Intel processor's MTRR interface, the MTRR type is always held in
+   an 8 bit field: */
+typedef u8 mtrr_type;
+
+extern unsigned int mtrr_usage_table[MAX_VAR_RANGES];
 
 struct mtrr_ops {
 	u32	vendor;
@@ -61,6 +70,13 @@ struct set_mtrr_context {
 	u32 ccr3;
 };
 
+struct mtrr_var_range {
+	u32 base_lo;
+	u32 base_hi;
+	u32 mask_lo;
+	u32 mask_hi;
+};
+
 void set_mtrr_done(struct set_mtrr_context *ctxt);
 void set_mtrr_cache_disable(struct set_mtrr_context *ctxt);
 void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);
diff --git a/trunk/arch/x86/kernel/cpuid.c b/trunk/arch/x86/kernel/cpuid.c
index 85d28d53f5d3..72cefd1e649b 100644
--- a/trunk/arch/x86/kernel/cpuid.c
+++ b/trunk/arch/x86/kernel/cpuid.c
@@ -39,10 +39,10 @@
 #include <linux/device.h>
 #include <linux/cpu.h>
 #include <linux/notifier.h>
-#include <linux/uaccess.h>
 
 #include <asm/processor.h>
 #include <asm/msr.h>
+#include <asm/uaccess.h>
 #include <asm/system.h>
 
 static struct class *cpuid_class;
@@ -82,7 +82,7 @@ static loff_t cpuid_seek(struct file *file, loff_t offset, int orig)
 }
 
 static ssize_t cpuid_read(struct file *file, char __user *buf,
-			  size_t count, loff_t *ppos)
+			  size_t count, loff_t * ppos)
 {
 	char __user *tmp = buf;
 	struct cpuid_regs cmd;
@@ -117,7 +117,7 @@ static int cpuid_open(struct inode *inode, struct file *file)
 	unsigned int cpu;
 	struct cpuinfo_x86 *c;
 	int ret = 0;
-
+	
 	lock_kernel();
 
 	cpu = iminor(file->f_path.dentry->d_inode);
diff --git a/trunk/arch/x86/kernel/crash.c b/trunk/arch/x86/kernel/crash.c
index c689d19e35ab..d84a852e4cd7 100644
--- a/trunk/arch/x86/kernel/crash.c
+++ b/trunk/arch/x86/kernel/crash.c
@@ -26,7 +26,6 @@
 #include <linux/kdebug.h>
 #include <asm/smp.h>
 #include <asm/reboot.h>
-#include <asm/virtext.h>
 
 #include <mach_ipi.h>
 
@@ -50,15 +49,6 @@ static void kdump_nmi_callback(int cpu, struct die_args *args)
 #endif
 	crash_save_cpu(regs, cpu);
 
-	/* Disable VMX or SVM if needed.
-	 *
-	 * We need to disable virtualization on all CPUs.
-	 * Having VMX or SVM enabled on any CPU may break rebooting
-	 * after the kdump kernel has finished its task.
-	 */
-	cpu_emergency_vmxoff();
-	cpu_emergency_svm_disable();
-
 	disable_local_APIC();
 }
 
@@ -90,14 +80,6 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
 	local_irq_disable();
 
 	kdump_nmi_shootdown_cpus();
-
-	/* Booting kdump kernel with VMX or SVM enabled won't work,
-	 * because (among other limitations) we can't disable paging
-	 * with the virt flags.
-	 */
-	cpu_emergency_vmxoff();
-	cpu_emergency_svm_disable();
-
 	lapic_shutdown();
 #if defined(CONFIG_X86_IO_APIC)
 	disable_IO_APIC();
diff --git a/trunk/arch/x86/kernel/early_printk.c b/trunk/arch/x86/kernel/early_printk.c
index 504ad198e4ad..23b138e31e9c 100644
--- a/trunk/arch/x86/kernel/early_printk.c
+++ b/trunk/arch/x86/kernel/early_printk.c
@@ -886,7 +886,7 @@ asmlinkage void early_printk(const char *fmt, ...)
 	va_list ap;
 
 	va_start(ap, fmt);
-	n = vscnprintf(buf, sizeof(buf), fmt, ap);
+	n = vscnprintf(buf, 512, fmt, ap);
 	early_console->write(early_console, buf, n);
 	va_end(ap);
 }
diff --git a/trunk/arch/x86/kernel/genapic_flat_64.c b/trunk/arch/x86/kernel/genapic_flat_64.c
index 34185488e4fb..c0262791bda4 100644
--- a/trunk/arch/x86/kernel/genapic_flat_64.c
+++ b/trunk/arch/x86/kernel/genapic_flat_64.c
@@ -30,12 +30,12 @@ static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 	return 1;
 }
 
-static const struct cpumask *flat_target_cpus(void)
+static cpumask_t flat_target_cpus(void)
 {
-	return cpu_online_mask;
+	return cpu_online_map;
 }
 
-static void flat_vector_allocation_domain(int cpu, struct cpumask *retmask)
+static cpumask_t flat_vector_allocation_domain(int cpu)
 {
 	/* Careful. Some cpus do not strictly honor the set of cpus
 	 * specified in the interrupt destination when using lowest
@@ -45,8 +45,8 @@ static void flat_vector_allocation_domain(int cpu, struct cpumask *retmask)
 	 * deliver interrupts to the wrong hyperthread when only one
 	 * hyperthread was specified in the interrupt desitination.
 	 */
-	cpumask_clear(retmask);
-	cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
+	cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
+	return domain;
 }
 
 /*
@@ -69,8 +69,9 @@ static void flat_init_apic_ldr(void)
 	apic_write(APIC_LDR, val);
 }
 
-static inline void _flat_send_IPI_mask(unsigned long mask, int vector)
+static void flat_send_IPI_mask(cpumask_t cpumask, int vector)
 {
+	unsigned long mask = cpus_addr(cpumask)[0];
 	unsigned long flags;
 
 	local_irq_save(flags);
@@ -78,41 +79,20 @@ static inline void _flat_send_IPI_mask(unsigned long mask, int vector)
 	local_irq_restore(flags);
 }
 
-static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector)
-{
-	unsigned long mask = cpumask_bits(cpumask)[0];
-
-	_flat_send_IPI_mask(mask, vector);
-}
-
-static void flat_send_IPI_mask_allbutself(const struct cpumask *cpumask,
-					  int vector)
-{
-	unsigned long mask = cpumask_bits(cpumask)[0];
-	int cpu = smp_processor_id();
-
-	if (cpu < BITS_PER_LONG)
-		clear_bit(cpu, &mask);
-	_flat_send_IPI_mask(mask, vector);
-}
-
 static void flat_send_IPI_allbutself(int vector)
 {
-	int cpu = smp_processor_id();
 #ifdef	CONFIG_HOTPLUG_CPU
 	int hotplug = 1;
 #else
 	int hotplug = 0;
 #endif
 	if (hotplug || vector == NMI_VECTOR) {
-		if (!cpumask_equal(cpu_online_mask, cpumask_of(cpu))) {
-			unsigned long mask = cpumask_bits(cpu_online_mask)[0];
+		cpumask_t allbutme = cpu_online_map;
 
-			if (cpu < BITS_PER_LONG)
-				clear_bit(cpu, &mask);
+		cpu_clear(smp_processor_id(), allbutme);
 
-			_flat_send_IPI_mask(mask, vector);
-		}
+		if (!cpus_empty(allbutme))
+			flat_send_IPI_mask(allbutme, vector);
 	} else if (num_online_cpus() > 1) {
 		__send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL);
 	}
@@ -121,7 +101,7 @@ static void flat_send_IPI_allbutself(int vector)
 static void flat_send_IPI_all(int vector)
 {
 	if (vector == NMI_VECTOR)
-		flat_send_IPI_mask(cpu_online_mask, vector);
+		flat_send_IPI_mask(cpu_online_map, vector);
 	else
 		__send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
 }
@@ -155,18 +135,9 @@ static int flat_apic_id_registered(void)
 	return physid_isset(read_xapic_id(), phys_cpu_present_map);
 }
 
-static unsigned int flat_cpu_mask_to_apicid(const struct cpumask *cpumask)
-{
-	return cpumask_bits(cpumask)[0] & APIC_ALL_CPUS;
-}
-
-static unsigned int flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
-						const struct cpumask *andmask)
+static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask)
 {
-	unsigned long mask1 = cpumask_bits(cpumask)[0] & APIC_ALL_CPUS;
-	unsigned long mask2 = cpumask_bits(andmask)[0] & APIC_ALL_CPUS;
-
-	return mask1 & mask2;
+	return cpus_addr(cpumask)[0] & APIC_ALL_CPUS;
 }
 
 static unsigned int phys_pkg_id(int index_msb)
@@ -186,10 +157,8 @@ struct genapic apic_flat =  {
 	.send_IPI_all = flat_send_IPI_all,
 	.send_IPI_allbutself = flat_send_IPI_allbutself,
 	.send_IPI_mask = flat_send_IPI_mask,
-	.send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself,
 	.send_IPI_self = apic_send_IPI_self,
 	.cpu_mask_to_apicid = flat_cpu_mask_to_apicid,
-	.cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and,
 	.phys_pkg_id = phys_pkg_id,
 	.get_apic_id = get_apic_id,
 	.set_apic_id = set_apic_id,
@@ -219,39 +188,35 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 	return 0;
 }
 
-static const struct cpumask *physflat_target_cpus(void)
+static cpumask_t physflat_target_cpus(void)
 {
-	return cpu_online_mask;
+	return cpu_online_map;
 }
 
-static void physflat_vector_allocation_domain(int cpu, struct cpumask *retmask)
+static cpumask_t physflat_vector_allocation_domain(int cpu)
 {
-	cpumask_clear(retmask);
-	cpumask_set_cpu(cpu, retmask);
+	return cpumask_of_cpu(cpu);
 }
 
-static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector)
+static void physflat_send_IPI_mask(cpumask_t cpumask, int vector)
 {
 	send_IPI_mask_sequence(cpumask, vector);
 }
 
-static void physflat_send_IPI_mask_allbutself(const struct cpumask *cpumask,
-					      int vector)
-{
-	send_IPI_mask_allbutself(cpumask, vector);
-}
-
 static void physflat_send_IPI_allbutself(int vector)
 {
-	send_IPI_mask_allbutself(cpu_online_mask, vector);
+	cpumask_t allbutme = cpu_online_map;
+
+	cpu_clear(smp_processor_id(), allbutme);
+	physflat_send_IPI_mask(allbutme, vector);
 }
 
 static void physflat_send_IPI_all(int vector)
 {
-	physflat_send_IPI_mask(cpu_online_mask, vector);
+	physflat_send_IPI_mask(cpu_online_map, vector);
 }
 
-static unsigned int physflat_cpu_mask_to_apicid(const struct cpumask *cpumask)
+static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask)
 {
 	int cpu;
 
@@ -259,31 +224,13 @@ static unsigned int physflat_cpu_mask_to_apicid(const struct cpumask *cpumask)
 	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
 	 * May as well be the first.
 	 */
-	cpu = cpumask_first(cpumask);
+	cpu = first_cpu(cpumask);
 	if ((unsigned)cpu < nr_cpu_ids)
 		return per_cpu(x86_cpu_to_apicid, cpu);
 	else
 		return BAD_APICID;
 }
 
-static unsigned int
-physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
-				const struct cpumask *andmask)
-{
-	int cpu;
-
-	/*
-	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
-	 * May as well be the first.
-	 */
-	for_each_cpu_and(cpu, cpumask, andmask)
-		if (cpumask_test_cpu(cpu, cpu_online_mask))
-			break;
-	if (cpu < nr_cpu_ids)
-		return per_cpu(x86_cpu_to_apicid, cpu);
-	return BAD_APICID;
-}
-
 struct genapic apic_physflat =  {
 	.name = "physical flat",
 	.acpi_madt_oem_check = physflat_acpi_madt_oem_check,
@@ -296,10 +243,8 @@ struct genapic apic_physflat =  {
 	.send_IPI_all = physflat_send_IPI_all,
 	.send_IPI_allbutself = physflat_send_IPI_allbutself,
 	.send_IPI_mask = physflat_send_IPI_mask,
-	.send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself,
 	.send_IPI_self = apic_send_IPI_self,
 	.cpu_mask_to_apicid = physflat_cpu_mask_to_apicid,
-	.cpu_mask_to_apicid_and = physflat_cpu_mask_to_apicid_and,
 	.phys_pkg_id = phys_pkg_id,
 	.get_apic_id = get_apic_id,
 	.set_apic_id = set_apic_id,
diff --git a/trunk/arch/x86/kernel/genx2apic_cluster.c b/trunk/arch/x86/kernel/genx2apic_cluster.c
index 6ce497cc372d..f6a2c8eb48a6 100644
--- a/trunk/arch/x86/kernel/genx2apic_cluster.c
+++ b/trunk/arch/x86/kernel/genx2apic_cluster.c
@@ -22,18 +22,19 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 
 /* Start with all IRQs pointing to boot CPU.  IRQ balancing will shift them. */
 
-static const struct cpumask *x2apic_target_cpus(void)
+static cpumask_t x2apic_target_cpus(void)
 {
-	return cpumask_of(0);
+	return cpumask_of_cpu(0);
 }
 
 /*
  * for now each logical cpu is in its own vector allocation domain.
  */
-static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
+static cpumask_t x2apic_vector_allocation_domain(int cpu)
 {
-	cpumask_clear(retmask);
-	cpumask_set_cpu(cpu, retmask);
+	cpumask_t domain = CPU_MASK_NONE;
+	cpu_set(cpu, domain);
+	return domain;
 }
 
 static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
@@ -55,53 +56,32 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
  * at once. We have 16 cpu's in a cluster. This will minimize IPI register
  * writes.
  */
-static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
+static void x2apic_send_IPI_mask(cpumask_t mask, int vector)
 {
 	unsigned long flags;
 	unsigned long query_cpu;
 
 	local_irq_save(flags);
-	for_each_cpu(query_cpu, mask)
-		__x2apic_send_IPI_dest(
-			per_cpu(x86_cpu_to_logical_apicid, query_cpu),
-			vector, APIC_DEST_LOGICAL);
-	local_irq_restore(flags);
-}
-
-static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask,
-					    int vector)
-{
-	unsigned long flags;
-	unsigned long query_cpu;
-	unsigned long this_cpu = smp_processor_id();
-
-	local_irq_save(flags);
-	for_each_cpu(query_cpu, mask)
-		if (query_cpu != this_cpu)
-			__x2apic_send_IPI_dest(
-				per_cpu(x86_cpu_to_logical_apicid, query_cpu),
-				vector, APIC_DEST_LOGICAL);
+	for_each_cpu_mask(query_cpu, mask) {
+		__x2apic_send_IPI_dest(per_cpu(x86_cpu_to_logical_apicid, query_cpu),
+				       vector, APIC_DEST_LOGICAL);
+	}
 	local_irq_restore(flags);
 }
 
 static void x2apic_send_IPI_allbutself(int vector)
 {
-	unsigned long flags;
-	unsigned long query_cpu;
-	unsigned long this_cpu = smp_processor_id();
+	cpumask_t mask = cpu_online_map;
 
-	local_irq_save(flags);
-	for_each_online_cpu(query_cpu)
-		if (query_cpu != this_cpu)
-			__x2apic_send_IPI_dest(
-				per_cpu(x86_cpu_to_logical_apicid, query_cpu),
-				vector, APIC_DEST_LOGICAL);
-	local_irq_restore(flags);
+	cpu_clear(smp_processor_id(), mask);
+
+	if (!cpus_empty(mask))
+		x2apic_send_IPI_mask(mask, vector);
 }
 
 static void x2apic_send_IPI_all(int vector)
 {
-	x2apic_send_IPI_mask(cpu_online_mask, vector);
+	x2apic_send_IPI_mask(cpu_online_map, vector);
 }
 
 static int x2apic_apic_id_registered(void)
@@ -109,38 +89,21 @@ static int x2apic_apic_id_registered(void)
 	return 1;
 }
 
-static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
+static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
 {
 	int cpu;
 
 	/*
-	 * We're using fixed IRQ delivery, can only return one logical APIC ID.
+	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
 	 * May as well be the first.
 	 */
-	cpu = cpumask_first(cpumask);
-	if ((unsigned)cpu < nr_cpu_ids)
+	cpu = first_cpu(cpumask);
+	if ((unsigned)cpu < NR_CPUS)
 		return per_cpu(x86_cpu_to_logical_apicid, cpu);
 	else
 		return BAD_APICID;
 }
 
-static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
-						  const struct cpumask *andmask)
-{
-	int cpu;
-
-	/*
-	 * We're using fixed IRQ delivery, can only return one logical APIC ID.
-	 * May as well be the first.
-	 */
-	for_each_cpu_and(cpu, cpumask, andmask)
-		if (cpumask_test_cpu(cpu, cpu_online_mask))
-			break;
-	if (cpu < nr_cpu_ids)
-		return per_cpu(x86_cpu_to_logical_apicid, cpu);
-	return BAD_APICID;
-}
-
 static unsigned int get_apic_id(unsigned long x)
 {
 	unsigned int id;
@@ -187,10 +150,8 @@ struct genapic apic_x2apic_cluster = {
 	.send_IPI_all = x2apic_send_IPI_all,
 	.send_IPI_allbutself = x2apic_send_IPI_allbutself,
 	.send_IPI_mask = x2apic_send_IPI_mask,
-	.send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
 	.send_IPI_self = x2apic_send_IPI_self,
 	.cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
-	.cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and,
 	.phys_pkg_id = phys_pkg_id,
 	.get_apic_id = get_apic_id,
 	.set_apic_id = set_apic_id,
diff --git a/trunk/arch/x86/kernel/genx2apic_phys.c b/trunk/arch/x86/kernel/genx2apic_phys.c
index 21bcc0e098ba..d042211768b7 100644
--- a/trunk/arch/x86/kernel/genx2apic_phys.c
+++ b/trunk/arch/x86/kernel/genx2apic_phys.c
@@ -29,15 +29,16 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 
 /* Start with all IRQs pointing to boot CPU.  IRQ balancing will shift them. */
 
-static const struct cpumask *x2apic_target_cpus(void)
+static cpumask_t x2apic_target_cpus(void)
 {
-	return cpumask_of(0);
+	return cpumask_of_cpu(0);
 }
 
-static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
+static cpumask_t x2apic_vector_allocation_domain(int cpu)
 {
-	cpumask_clear(retmask);
-	cpumask_set_cpu(cpu, retmask);
+	cpumask_t domain = CPU_MASK_NONE;
+	cpu_set(cpu, domain);
+	return domain;
 }
 
 static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
@@ -53,54 +54,32 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
 	x2apic_icr_write(cfg, apicid);
 }
 
-static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
+static void x2apic_send_IPI_mask(cpumask_t mask, int vector)
 {
 	unsigned long flags;
 	unsigned long query_cpu;
 
 	local_irq_save(flags);
-	for_each_cpu(query_cpu, mask) {
+	for_each_cpu_mask(query_cpu, mask) {
 		__x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
 				       vector, APIC_DEST_PHYSICAL);
 	}
 	local_irq_restore(flags);
 }
 
-static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask,
-					    int vector)
-{
-	unsigned long flags;
-	unsigned long query_cpu;
-	unsigned long this_cpu = smp_processor_id();
-
-	local_irq_save(flags);
-	for_each_cpu(query_cpu, mask) {
-		if (query_cpu != this_cpu)
-			__x2apic_send_IPI_dest(
-				per_cpu(x86_cpu_to_apicid, query_cpu),
-				vector, APIC_DEST_PHYSICAL);
-	}
-	local_irq_restore(flags);
-}
-
 static void x2apic_send_IPI_allbutself(int vector)
 {
-	unsigned long flags;
-	unsigned long query_cpu;
-	unsigned long this_cpu = smp_processor_id();
+	cpumask_t mask = cpu_online_map;
 
-	local_irq_save(flags);
-	for_each_online_cpu(query_cpu)
-		if (query_cpu != this_cpu)
-			__x2apic_send_IPI_dest(
-				per_cpu(x86_cpu_to_apicid, query_cpu),
-				vector, APIC_DEST_PHYSICAL);
-	local_irq_restore(flags);
+	cpu_clear(smp_processor_id(), mask);
+
+	if (!cpus_empty(mask))
+		x2apic_send_IPI_mask(mask, vector);
 }
 
 static void x2apic_send_IPI_all(int vector)
 {
-	x2apic_send_IPI_mask(cpu_online_mask, vector);
+	x2apic_send_IPI_mask(cpu_online_map, vector);
 }
 
 static int x2apic_apic_id_registered(void)
@@ -108,7 +87,7 @@ static int x2apic_apic_id_registered(void)
 	return 1;
 }
 
-static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
+static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
 {
 	int cpu;
 
@@ -116,30 +95,13 @@ static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
 	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
 	 * May as well be the first.
 	 */
-	cpu = cpumask_first(cpumask);
-	if ((unsigned)cpu < nr_cpu_ids)
+	cpu = first_cpu(cpumask);
+	if ((unsigned)cpu < NR_CPUS)
 		return per_cpu(x86_cpu_to_apicid, cpu);
 	else
 		return BAD_APICID;
 }
 
-static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
-						  const struct cpumask *andmask)
-{
-	int cpu;
-
-	/*
-	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
-	 * May as well be the first.
-	 */
-	for_each_cpu_and(cpu, cpumask, andmask)
-		if (cpumask_test_cpu(cpu, cpu_online_mask))
-			break;
-	if (cpu < nr_cpu_ids)
-		return per_cpu(x86_cpu_to_apicid, cpu);
-	return BAD_APICID;
-}
-
 static unsigned int get_apic_id(unsigned long x)
 {
 	unsigned int id;
@@ -161,12 +123,12 @@ static unsigned int phys_pkg_id(int index_msb)
 	return current_cpu_data.initial_apicid >> index_msb;
 }
 
-static void x2apic_send_IPI_self(int vector)
+void x2apic_send_IPI_self(int vector)
 {
 	apic_write(APIC_SELF_IPI, vector);
 }
 
-static void init_x2apic_ldr(void)
+void init_x2apic_ldr(void)
 {
 	return;
 }
@@ -183,10 +145,8 @@ struct genapic apic_x2apic_phys = {
 	.send_IPI_all = x2apic_send_IPI_all,
 	.send_IPI_allbutself = x2apic_send_IPI_allbutself,
 	.send_IPI_mask = x2apic_send_IPI_mask,
-	.send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
 	.send_IPI_self = x2apic_send_IPI_self,
 	.cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
-	.cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and,
 	.phys_pkg_id = phys_pkg_id,
 	.get_apic_id = get_apic_id,
 	.set_apic_id = set_apic_id,
diff --git a/trunk/arch/x86/kernel/genx2apic_uv_x.c b/trunk/arch/x86/kernel/genx2apic_uv_x.c
index b193e082f6ce..dece17289731 100644
--- a/trunk/arch/x86/kernel/genx2apic_uv_x.c
+++ b/trunk/arch/x86/kernel/genx2apic_uv_x.c
@@ -79,15 +79,16 @@ EXPORT_SYMBOL(sn_rtc_cycles_per_second);
 
 /* Start with all IRQs pointing to boot CPU.  IRQ balancing will shift them. */
 
-static const struct cpumask *uv_target_cpus(void)
+static cpumask_t uv_target_cpus(void)
 {
-	return cpumask_of(0);
+	return cpumask_of_cpu(0);
 }
 
-static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask)
+static cpumask_t uv_vector_allocation_domain(int cpu)
 {
-	cpumask_clear(retmask);
-	cpumask_set_cpu(cpu, retmask);
+	cpumask_t domain = CPU_MASK_NONE;
+	cpu_set(cpu, domain);
+	return domain;
 }
 
 int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip)
@@ -126,37 +127,28 @@ static void uv_send_IPI_one(int cpu, int vector)
 	uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
 }
 
-static void uv_send_IPI_mask(const struct cpumask *mask, int vector)
+static void uv_send_IPI_mask(cpumask_t mask, int vector)
 {
 	unsigned int cpu;
 
-	for_each_cpu(cpu, mask)
-		uv_send_IPI_one(cpu, vector);
-}
-
-static void uv_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
-{
-	unsigned int cpu;
-	unsigned int this_cpu = smp_processor_id();
-
-	for_each_cpu(cpu, mask)
-		if (cpu != this_cpu)
+	for_each_possible_cpu(cpu)
+		if (cpu_isset(cpu, mask))
 			uv_send_IPI_one(cpu, vector);
 }
 
 static void uv_send_IPI_allbutself(int vector)
 {
-	unsigned int cpu;
-	unsigned int this_cpu = smp_processor_id();
+	cpumask_t mask = cpu_online_map;
 
-	for_each_online_cpu(cpu)
-		if (cpu != this_cpu)
-			uv_send_IPI_one(cpu, vector);
+	cpu_clear(smp_processor_id(), mask);
+
+	if (!cpus_empty(mask))
+		uv_send_IPI_mask(mask, vector);
 }
 
 static void uv_send_IPI_all(int vector)
 {
-	uv_send_IPI_mask(cpu_online_mask, vector);
+	uv_send_IPI_mask(cpu_online_map, vector);
 }
 
 static int uv_apic_id_registered(void)
@@ -168,7 +160,7 @@ static void uv_init_apic_ldr(void)
 {
 }
 
-static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask)
+static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
 {
 	int cpu;
 
@@ -176,30 +168,13 @@ static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask)
 	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
 	 * May as well be the first.
 	 */
-	cpu = cpumask_first(cpumask);
+	cpu = first_cpu(cpumask);
 	if ((unsigned)cpu < nr_cpu_ids)
 		return per_cpu(x86_cpu_to_apicid, cpu);
 	else
 		return BAD_APICID;
 }
 
-static unsigned int uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
-					      const struct cpumask *andmask)
-{
-	int cpu;
-
-	/*
-	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
-	 * May as well be the first.
-	 */
-	for_each_cpu_and(cpu, cpumask, andmask)
-		if (cpumask_test_cpu(cpu, cpu_online_mask))
-			break;
-	if (cpu < nr_cpu_ids)
-		return per_cpu(x86_cpu_to_apicid, cpu);
-	return BAD_APICID;
-}
-
 static unsigned int get_apic_id(unsigned long x)
 {
 	unsigned int id;
@@ -247,10 +222,8 @@ struct genapic apic_x2apic_uv_x = {
 	.send_IPI_all = uv_send_IPI_all,
 	.send_IPI_allbutself = uv_send_IPI_allbutself,
 	.send_IPI_mask = uv_send_IPI_mask,
-	.send_IPI_mask_allbutself = uv_send_IPI_mask_allbutself,
 	.send_IPI_self = uv_send_IPI_self,
 	.cpu_mask_to_apicid = uv_cpu_mask_to_apicid,
-	.cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and,
 	.phys_pkg_id = phys_pkg_id,
 	.get_apic_id = get_apic_id,
 	.set_apic_id = set_apic_id,
diff --git a/trunk/arch/x86/kernel/head64.c b/trunk/arch/x86/kernel/head64.c
index b9a4d8c4b935..388e05a5fc17 100644
--- a/trunk/arch/x86/kernel/head64.c
+++ b/trunk/arch/x86/kernel/head64.c
@@ -27,7 +27,7 @@
 #include <asm/trampoline.h>
 
 /* boot cpu pda */
-static struct x8664_pda _boot_cpu_pda;
+static struct x8664_pda _boot_cpu_pda __read_mostly;
 
 #ifdef CONFIG_SMP
 /*
diff --git a/trunk/arch/x86/kernel/hpet.c b/trunk/arch/x86/kernel/hpet.c
index cd759ad90690..845ea097383e 100644
--- a/trunk/arch/x86/kernel/hpet.c
+++ b/trunk/arch/x86/kernel/hpet.c
@@ -248,7 +248,7 @@ static void hpet_legacy_clockevent_register(void)
 	 * Start hpet with the boot cpu mask and make it
 	 * global after the IO_APIC has been initialized.
 	 */
-	hpet_clockevent.cpumask = cpumask_of(smp_processor_id());
+	hpet_clockevent.cpumask = cpumask_of_cpu(smp_processor_id());
 	clockevents_register_device(&hpet_clockevent);
 	global_clock_event = &hpet_clockevent;
 	printk(KERN_DEBUG "hpet clockevent registered\n");
@@ -303,7 +303,7 @@ static void hpet_set_mode(enum clock_event_mode mode,
 			struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
 			hpet_setup_msi_irq(hdev->irq);
 			disable_irq(hdev->irq);
-			irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu));
+			irq_set_affinity(hdev->irq, cpumask_of_cpu(hdev->cpu));
 			enable_irq(hdev->irq);
 		}
 		break;
@@ -451,7 +451,7 @@ static int hpet_setup_irq(struct hpet_dev *dev)
 		return -1;
 
 	disable_irq(dev->irq);
-	irq_set_affinity(dev->irq, cpumask_of(dev->cpu));
+	irq_set_affinity(dev->irq, cpumask_of_cpu(dev->cpu));
 	enable_irq(dev->irq);
 
 	printk(KERN_DEBUG "hpet: %s irq %d for MSI\n",
@@ -502,7 +502,7 @@ static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu)
 	/* 5 usec minimum reprogramming delta. */
 	evt->min_delta_ns = 5000;
 
-	evt->cpumask = cpumask_of(hdev->cpu);
+	evt->cpumask = cpumask_of_cpu(hdev->cpu);
 	clockevents_register_device(evt);
 }
 
diff --git a/trunk/arch/x86/kernel/i8253.c b/trunk/arch/x86/kernel/i8253.c
index 10f92fb532f3..c1b5e3ece1f2 100644
--- a/trunk/arch/x86/kernel/i8253.c
+++ b/trunk/arch/x86/kernel/i8253.c
@@ -114,7 +114,7 @@ void __init setup_pit_timer(void)
 	 * Start pit with the boot cpu mask and make it global after the
 	 * IO_APIC has been initialized.
 	 */
-	pit_clockevent.cpumask = cpumask_of(smp_processor_id());
+	pit_clockevent.cpumask = cpumask_of_cpu(smp_processor_id());
 	pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC,
 				     pit_clockevent.shift);
 	pit_clockevent.max_delta_ns =
diff --git a/trunk/arch/x86/kernel/io_apic.c b/trunk/arch/x86/kernel/io_apic.c
index 69911722b9d3..74917658b004 100644
--- a/trunk/arch/x86/kernel/io_apic.c
+++ b/trunk/arch/x86/kernel/io_apic.c
@@ -136,8 +136,8 @@ static struct irq_pin_list *get_one_free_irq_2_pin(int cpu)
 
 struct irq_cfg {
 	struct irq_pin_list *irq_2_pin;
-	cpumask_var_t domain;
-	cpumask_var_t old_domain;
+	cpumask_t domain;
+	cpumask_t old_domain;
 	unsigned move_cleanup_count;
 	u8 vector;
 	u8 move_in_progress : 1;
@@ -152,22 +152,22 @@ static struct irq_cfg irq_cfgx[] = {
 #else
 static struct irq_cfg irq_cfgx[NR_IRQS] = {
 #endif
-	[0]  = { .vector = IRQ0_VECTOR,  },
-	[1]  = { .vector = IRQ1_VECTOR,  },
-	[2]  = { .vector = IRQ2_VECTOR,  },
-	[3]  = { .vector = IRQ3_VECTOR,  },
-	[4]  = { .vector = IRQ4_VECTOR,  },
-	[5]  = { .vector = IRQ5_VECTOR,  },
-	[6]  = { .vector = IRQ6_VECTOR,  },
-	[7]  = { .vector = IRQ7_VECTOR,  },
-	[8]  = { .vector = IRQ8_VECTOR,  },
-	[9]  = { .vector = IRQ9_VECTOR,  },
-	[10] = { .vector = IRQ10_VECTOR, },
-	[11] = { .vector = IRQ11_VECTOR, },
-	[12] = { .vector = IRQ12_VECTOR, },
-	[13] = { .vector = IRQ13_VECTOR, },
-	[14] = { .vector = IRQ14_VECTOR, },
-	[15] = { .vector = IRQ15_VECTOR, },
+	[0]  = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR,  },
+	[1]  = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR,  },
+	[2]  = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR,  },
+	[3]  = { .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR,  },
+	[4]  = { .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR,  },
+	[5]  = { .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR,  },
+	[6]  = { .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR,  },
+	[7]  = { .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR,  },
+	[8]  = { .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR,  },
+	[9]  = { .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR,  },
+	[10] = { .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
+	[11] = { .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
+	[12] = { .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
+	[13] = { .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
+	[14] = { .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
+	[15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
 };
 
 int __init arch_early_irq_init(void)
@@ -183,10 +183,6 @@ int __init arch_early_irq_init(void)
 	for (i = 0; i < count; i++) {
 		desc = irq_to_desc(i);
 		desc->chip_data = &cfg[i];
-		alloc_bootmem_cpumask_var(&cfg[i].domain);
-		alloc_bootmem_cpumask_var(&cfg[i].old_domain);
-		if (i < NR_IRQS_LEGACY)
-			cpumask_setall(cfg[i].domain);
 	}
 
 	return 0;
@@ -213,20 +209,6 @@ static struct irq_cfg *get_one_free_irq_cfg(int cpu)
 	node = cpu_to_node(cpu);
 
 	cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
-	if (cfg) {
-		/* FIXME: needs alloc_cpumask_var_node() */
-		if (!alloc_cpumask_var(&cfg->domain, GFP_ATOMIC)) {
-			kfree(cfg);
-			cfg = NULL;
-		} else if (!alloc_cpumask_var(&cfg->old_domain, GFP_ATOMIC)) {
-			free_cpumask_var(cfg->domain);
-			kfree(cfg);
-			cfg = NULL;
-		} else {
-			cpumask_clear(cfg->domain);
-			cpumask_clear(cfg->old_domain);
-		}
-	}
 	printk(KERN_DEBUG "  alloc irq_cfg on cpu %d node %d\n", cpu, node);
 
 	return cfg;
@@ -351,14 +333,13 @@ void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
 	}
 }
 
-static void
-set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
+static void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask)
 {
 	struct irq_cfg *cfg = desc->chip_data;
 
 	if (!cfg->move_in_progress) {
 		/* it means that domain is not changed */
-		if (!cpumask_intersects(&desc->affinity, mask))
+		if (!cpus_intersects(desc->affinity, mask))
 			cfg->move_desc_pending = 1;
 	}
 }
@@ -373,8 +354,7 @@ static struct irq_cfg *irq_cfg(unsigned int irq)
 #endif
 
 #ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC
-static inline void
-set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
+static inline void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask)
 {
 }
 #endif
@@ -505,26 +485,6 @@ static void ioapic_mask_entry(int apic, int pin)
 }
 
 #ifdef CONFIG_SMP
-static void send_cleanup_vector(struct irq_cfg *cfg)
-{
-	cpumask_var_t cleanup_mask;
-
-	if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
-		unsigned int i;
-		cfg->move_cleanup_count = 0;
-		for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
-			cfg->move_cleanup_count++;
-		for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
-			send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
-	} else {
-		cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
-		cfg->move_cleanup_count = cpumask_weight(cleanup_mask);
-		send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
-		free_cpumask_var(cleanup_mask);
-	}
-	cfg->move_in_progress = 0;
-}
-
 static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
 {
 	int apic, pin;
@@ -560,55 +520,41 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq
 	}
 }
 
-static int
-assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
+static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask);
 
-/*
- * Either sets desc->affinity to a valid value, and returns cpu_mask_to_apicid
- * of that, or returns BAD_APICID and leaves desc->affinity untouched.
- */
-static unsigned int
-set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
+static void set_ioapic_affinity_irq_desc(struct irq_desc *desc, cpumask_t mask)
 {
 	struct irq_cfg *cfg;
+	unsigned long flags;
+	unsigned int dest;
+	cpumask_t tmp;
 	unsigned int irq;
 
-	if (!cpumask_intersects(mask, cpu_online_mask))
-		return BAD_APICID;
+	cpus_and(tmp, mask, cpu_online_map);
+	if (cpus_empty(tmp))
+		return;
 
 	irq = desc->irq;
 	cfg = desc->chip_data;
 	if (assign_irq_vector(irq, cfg, mask))
-		return BAD_APICID;
+		return;
 
-	cpumask_and(&desc->affinity, cfg->domain, mask);
 	set_extra_move_desc(desc, mask);
-	return cpu_mask_to_apicid_and(&desc->affinity, cpu_online_mask);
-}
-
-static void
-set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
-{
-	struct irq_cfg *cfg;
-	unsigned long flags;
-	unsigned int dest;
-	unsigned int irq;
 
-	irq = desc->irq;
-	cfg = desc->chip_data;
+	cpus_and(tmp, cfg->domain, mask);
+	dest = cpu_mask_to_apicid(tmp);
+	/*
+	 * Only the high 8 bits are valid.
+	 */
+	dest = SET_APIC_LOGICAL_ID(dest);
 
 	spin_lock_irqsave(&ioapic_lock, flags);
-	dest = set_desc_affinity(desc, mask);
-	if (dest != BAD_APICID) {
-		/* Only the high 8 bits are valid. */
-		dest = SET_APIC_LOGICAL_ID(dest);
-		__target_IO_APIC_irq(irq, dest, cfg);
-	}
+	__target_IO_APIC_irq(irq, dest, cfg);
+	desc->affinity = mask;
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
-static void
-set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
+static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
 {
 	struct irq_desc *desc;
 
@@ -706,7 +652,7 @@ static void __unmask_IO_APIC_irq(struct irq_cfg *cfg)
 }
 
 #ifdef CONFIG_X86_64
-static void io_apic_sync(struct irq_pin_list *entry)
+void io_apic_sync(struct irq_pin_list *entry)
 {
 	/*
 	 * Synchronize the IO-APIC and the CPU by doing
@@ -1276,8 +1222,7 @@ void unlock_vector_lock(void)
 	spin_unlock(&vector_lock);
 }
 
-static int
-__assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
+static int __assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask)
 {
 	/*
 	 * NOTE! The local APIC isn't very good at handling
@@ -1292,49 +1237,49 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
 	 */
 	static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
 	unsigned int old_vector;
-	int cpu, err;
-	cpumask_var_t tmp_mask;
+	int cpu;
 
 	if ((cfg->move_in_progress) || cfg->move_cleanup_count)
 		return -EBUSY;
 
-	if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
-		return -ENOMEM;
+	/* Only try and allocate irqs on cpus that are present */
+	cpus_and(mask, mask, cpu_online_map);
 
 	old_vector = cfg->vector;
 	if (old_vector) {
-		cpumask_and(tmp_mask, mask, cpu_online_mask);
-		cpumask_and(tmp_mask, cfg->domain, tmp_mask);
-		if (!cpumask_empty(tmp_mask)) {
-			free_cpumask_var(tmp_mask);
+		cpumask_t tmp;
+		cpus_and(tmp, cfg->domain, mask);
+		if (!cpus_empty(tmp))
 			return 0;
-		}
 	}
 
-	/* Only try and allocate irqs on cpus that are present */
-	err = -ENOSPC;
-	for_each_cpu_and(cpu, mask, cpu_online_mask) {
+	for_each_cpu_mask_nr(cpu, mask) {
+		cpumask_t domain, new_mask;
 		int new_cpu;
 		int vector, offset;
 
-		vector_allocation_domain(cpu, tmp_mask);
+		domain = vector_allocation_domain(cpu);
+		cpus_and(new_mask, domain, cpu_online_map);
 
 		vector = current_vector;
 		offset = current_offset;
 next:
 		vector += 8;
 		if (vector >= first_system_vector) {
-			/* If out of vectors on large boxen, must share them. */
+			/* If we run out of vectors on large boxen, must share them. */
 			offset = (offset + 1) % 8;
 			vector = FIRST_DEVICE_VECTOR + offset;
 		}
 		if (unlikely(current_vector == vector))
 			continue;
-
-		if (test_bit(vector, used_vectors))
+#ifdef CONFIG_X86_64
+		if (vector == IA32_SYSCALL_VECTOR)
 			goto next;
-
-		for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
+#else
+		if (vector == SYSCALL_VECTOR)
+			goto next;
+#endif
+		for_each_cpu_mask_nr(new_cpu, new_mask)
 			if (per_cpu(vector_irq, new_cpu)[vector] != -1)
 				goto next;
 		/* Found one! */
@@ -1342,21 +1287,18 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
 		current_offset = offset;
 		if (old_vector) {
 			cfg->move_in_progress = 1;
-			cpumask_copy(cfg->old_domain, cfg->domain);
+			cfg->old_domain = cfg->domain;
 		}
-		for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
+		for_each_cpu_mask_nr(new_cpu, new_mask)
 			per_cpu(vector_irq, new_cpu)[vector] = irq;
 		cfg->vector = vector;
-		cpumask_copy(cfg->domain, tmp_mask);
-		err = 0;
-		break;
+		cfg->domain = domain;
+		return 0;
 	}
-	free_cpumask_var(tmp_mask);
-	return err;
+	return -ENOSPC;
 }
 
-static int
-assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
+static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask)
 {
 	int err;
 	unsigned long flags;
@@ -1369,20 +1311,23 @@ assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
 
 static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
 {
+	cpumask_t mask;
 	int cpu, vector;
 
 	BUG_ON(!cfg->vector);
 
 	vector = cfg->vector;
-	for_each_cpu_and(cpu, cfg->domain, cpu_online_mask)
+	cpus_and(mask, cfg->domain, cpu_online_map);
+	for_each_cpu_mask_nr(cpu, mask)
 		per_cpu(vector_irq, cpu)[vector] = -1;
 
 	cfg->vector = 0;
-	cpumask_clear(cfg->domain);
+	cpus_clear(cfg->domain);
 
 	if (likely(!cfg->move_in_progress))
 		return;
-	for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) {
+	cpus_and(mask, cfg->old_domain, cpu_online_map);
+	for_each_cpu_mask_nr(cpu, mask) {
 		for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
 								vector++) {
 			if (per_cpu(vector_irq, cpu)[vector] != irq)
@@ -1405,7 +1350,7 @@ void __setup_vector_irq(int cpu)
 	/* Mark the inuse vectors */
 	for_each_irq_desc(irq, desc) {
 		cfg = desc->chip_data;
-		if (!cpumask_test_cpu(cpu, cfg->domain))
+		if (!cpu_isset(cpu, cfg->domain))
 			continue;
 		vector = cfg->vector;
 		per_cpu(vector_irq, cpu)[vector] = irq;
@@ -1417,7 +1362,7 @@ void __setup_vector_irq(int cpu)
 			continue;
 
 		cfg = irq_cfg(irq);
-		if (!cpumask_test_cpu(cpu, cfg->domain))
+		if (!cpu_isset(cpu, cfg->domain))
 			per_cpu(vector_irq, cpu)[vector] = -1;
 	}
 }
@@ -1553,17 +1498,18 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_de
 {
 	struct irq_cfg *cfg;
 	struct IO_APIC_route_entry entry;
-	unsigned int dest;
+	cpumask_t mask;
 
 	if (!IO_APIC_IRQ(irq))
 		return;
 
 	cfg = desc->chip_data;
 
-	if (assign_irq_vector(irq, cfg, TARGET_CPUS))
+	mask = TARGET_CPUS;
+	if (assign_irq_vector(irq, cfg, mask))
 		return;
 
-	dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
+	cpus_and(mask, cfg->domain, mask);
 
 	apic_printk(APIC_VERBOSE,KERN_DEBUG
 		    "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
@@ -1573,7 +1519,8 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_de
 
 
 	if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
-			       dest, trigger, polarity, cfg->vector)) {
+			       cpu_mask_to_apicid(mask), trigger, polarity,
+			       cfg->vector)) {
 		printk("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
 		       mp_ioapics[apic].mp_apicid, pin);
 		__clear_irq_vector(irq, cfg);
@@ -2293,7 +2240,7 @@ static int ioapic_retrigger_irq(unsigned int irq)
 	unsigned long flags;
 
 	spin_lock_irqsave(&vector_lock, flags);
-	send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector);
+	send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector);
 	spin_unlock_irqrestore(&vector_lock, flags);
 
 	return 1;
@@ -2342,17 +2289,18 @@ static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
  * as simple as edge triggered migration and we can do the irq migration
  * with a simple atomic update to IO-APIC RTE.
  */
-static void
-migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
+static void migrate_ioapic_irq_desc(struct irq_desc *desc, cpumask_t mask)
 {
 	struct irq_cfg *cfg;
+	cpumask_t tmp, cleanup_mask;
 	struct irte irte;
 	int modify_ioapic_rte;
 	unsigned int dest;
 	unsigned long flags;
 	unsigned int irq;
 
-	if (!cpumask_intersects(mask, cpu_online_mask))
+	cpus_and(tmp, mask, cpu_online_map);
+	if (cpus_empty(tmp))
 		return;
 
 	irq = desc->irq;
@@ -2365,7 +2313,8 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
 
 	set_extra_move_desc(desc, mask);
 
-	dest = cpu_mask_to_apicid_and(cfg->domain, mask);
+	cpus_and(tmp, cfg->domain, mask);
+	dest = cpu_mask_to_apicid(tmp);
 
 	modify_ioapic_rte = desc->status & IRQ_LEVEL;
 	if (modify_ioapic_rte) {
@@ -2382,10 +2331,14 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
 	 */
 	modify_irte(irq, &irte);
 
-	if (cfg->move_in_progress)
-		send_cleanup_vector(cfg);
+	if (cfg->move_in_progress) {
+		cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
+		cfg->move_cleanup_count = cpus_weight(cleanup_mask);
+		send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+		cfg->move_in_progress = 0;
+	}
 
-	cpumask_copy(&desc->affinity, mask);
+	desc->affinity = mask;
 }
 
 static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
@@ -2407,11 +2360,11 @@ static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
 	}
 
 	/* everthing is clear. we have right of way */
-	migrate_ioapic_irq_desc(desc, &desc->pending_mask);
+	migrate_ioapic_irq_desc(desc, desc->pending_mask);
 
 	ret = 0;
 	desc->status &= ~IRQ_MOVE_PENDING;
-	cpumask_clear(&desc->pending_mask);
+	cpus_clear(desc->pending_mask);
 
 unmask:
 	unmask_IO_APIC_irq_desc(desc);
@@ -2436,7 +2389,7 @@ static void ir_irq_migration(struct work_struct *work)
 				continue;
 			}
 
-			desc->chip->set_affinity(irq, &desc->pending_mask);
+			desc->chip->set_affinity(irq, desc->pending_mask);
 			spin_unlock_irqrestore(&desc->lock, flags);
 		}
 	}
@@ -2445,20 +2398,18 @@ static void ir_irq_migration(struct work_struct *work)
 /*
  * Migrates the IRQ destination in the process context.
  */
-static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
-					    const struct cpumask *mask)
+static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, cpumask_t mask)
 {
 	if (desc->status & IRQ_LEVEL) {
 		desc->status |= IRQ_MOVE_PENDING;
-		cpumask_copy(&desc->pending_mask, mask);
+		desc->pending_mask = mask;
 		migrate_irq_remapped_level_desc(desc);
 		return;
 	}
 
 	migrate_ioapic_irq_desc(desc, mask);
 }
-static void set_ir_ioapic_affinity_irq(unsigned int irq,
-				       const struct cpumask *mask)
+static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 
@@ -2493,7 +2444,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
 		if (!cfg->move_cleanup_count)
 			goto unlock;
 
-		if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
+		if ((vector == cfg->vector) && cpu_isset(me, cfg->domain))
 			goto unlock;
 
 		__get_cpu_var(vector_irq)[vector] = -1;
@@ -2530,14 +2481,20 @@ static void irq_complete_move(struct irq_desc **descp)
 
 	vector = ~get_irq_regs()->orig_ax;
 	me = smp_processor_id();
+	if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) {
+		cpumask_t cleanup_mask;
+
 #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
 		*descp = desc = move_irq_desc(desc, me);
 		/* get the new one */
 		cfg = desc->chip_data;
 #endif
 
-	if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
-		send_cleanup_vector(cfg);
+		cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
+		cfg->move_cleanup_count = cpus_weight(cleanup_mask);
+		send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+		cfg->move_in_progress = 0;
+	}
 }
 #else
 static inline void irq_complete_move(struct irq_desc **descp) {}
@@ -3259,13 +3216,16 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
 	struct irq_cfg *cfg;
 	int err;
 	unsigned dest;
+	cpumask_t tmp;
 
 	cfg = irq_cfg(irq);
-	err = assign_irq_vector(irq, cfg, TARGET_CPUS);
+	tmp = TARGET_CPUS;
+	err = assign_irq_vector(irq, cfg, tmp);
 	if (err)
 		return err;
 
-	dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
+	cpus_and(tmp, cfg->domain, tmp);
+	dest = cpu_mask_to_apicid(tmp);
 
 #ifdef CONFIG_INTR_REMAP
 	if (irq_remapped(irq)) {
@@ -3319,18 +3279,26 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
 }
 
 #ifdef CONFIG_SMP
-static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
+static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
 	struct msi_msg msg;
 	unsigned int dest;
+	cpumask_t tmp;
 
-	dest = set_desc_affinity(desc, mask);
-	if (dest == BAD_APICID)
+	cpus_and(tmp, mask, cpu_online_map);
+	if (cpus_empty(tmp))
 		return;
 
 	cfg = desc->chip_data;
+	if (assign_irq_vector(irq, cfg, mask))
+		return;
+
+	set_extra_move_desc(desc, mask);
+
+	cpus_and(tmp, cfg->domain, mask);
+	dest = cpu_mask_to_apicid(tmp);
 
 	read_msi_msg_desc(desc, &msg);
 
@@ -3340,27 +3308,37 @@ static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
 	write_msi_msg_desc(desc, &msg);
+	desc->affinity = mask;
 }
 #ifdef CONFIG_INTR_REMAP
 /*
  * Migrate the MSI irq to another cpumask. This migration is
  * done in the process context using interrupt-remapping hardware.
  */
-static void
-ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
+static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
-	struct irq_cfg *cfg = desc->chip_data;
+	struct irq_cfg *cfg;
 	unsigned int dest;
+	cpumask_t tmp, cleanup_mask;
 	struct irte irte;
 
+	cpus_and(tmp, mask, cpu_online_map);
+	if (cpus_empty(tmp))
+		return;
+
 	if (get_irte(irq, &irte))
 		return;
 
-	dest = set_desc_affinity(desc, mask);
-	if (dest == BAD_APICID)
+	cfg = desc->chip_data;
+	if (assign_irq_vector(irq, cfg, mask))
 		return;
 
+	set_extra_move_desc(desc, mask);
+
+	cpus_and(tmp, cfg->domain, mask);
+	dest = cpu_mask_to_apicid(tmp);
+
 	irte.vector = cfg->vector;
 	irte.dest_id = IRTE_DEST(dest);
 
@@ -3374,8 +3352,14 @@ ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
 	 * at the new destination. So, time to cleanup the previous
 	 * vector allocation.
 	 */
-	if (cfg->move_in_progress)
-		send_cleanup_vector(cfg);
+	if (cfg->move_in_progress) {
+		cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
+		cfg->move_cleanup_count = cpus_weight(cleanup_mask);
+		send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+		cfg->move_in_progress = 0;
+	}
+
+	desc->affinity = mask;
 }
 
 #endif
@@ -3566,18 +3550,26 @@ void arch_teardown_msi_irq(unsigned int irq)
 
 #ifdef CONFIG_DMAR
 #ifdef CONFIG_SMP
-static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
+static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
 	struct msi_msg msg;
 	unsigned int dest;
+	cpumask_t tmp;
 
-	dest = set_desc_affinity(desc, mask);
-	if (dest == BAD_APICID)
+	cpus_and(tmp, mask, cpu_online_map);
+	if (cpus_empty(tmp))
 		return;
 
 	cfg = desc->chip_data;
+	if (assign_irq_vector(irq, cfg, mask))
+		return;
+
+	set_extra_move_desc(desc, mask);
+
+	cpus_and(tmp, cfg->domain, mask);
+	dest = cpu_mask_to_apicid(tmp);
 
 	dmar_msi_read(irq, &msg);
 
@@ -3587,6 +3579,7 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
 	dmar_msi_write(irq, &msg);
+	desc->affinity = mask;
 }
 
 #endif /* CONFIG_SMP */
@@ -3620,18 +3613,26 @@ int arch_setup_dmar_msi(unsigned int irq)
 #ifdef CONFIG_HPET_TIMER
 
 #ifdef CONFIG_SMP
-static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
+static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
 	struct msi_msg msg;
 	unsigned int dest;
+	cpumask_t tmp;
 
-	dest = set_desc_affinity(desc, mask);
-	if (dest == BAD_APICID)
+	cpus_and(tmp, mask, cpu_online_map);
+	if (cpus_empty(tmp))
 		return;
 
 	cfg = desc->chip_data;
+	if (assign_irq_vector(irq, cfg, mask))
+		return;
+
+	set_extra_move_desc(desc, mask);
+
+	cpus_and(tmp, cfg->domain, mask);
+	dest = cpu_mask_to_apicid(tmp);
 
 	hpet_msi_read(irq, &msg);
 
@@ -3641,6 +3642,7 @@ static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
 	hpet_msi_write(irq, &msg);
+	desc->affinity = mask;
 }
 
 #endif /* CONFIG_SMP */
@@ -3695,19 +3697,28 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
 	write_ht_irq_msg(irq, &msg);
 }
 
-static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
+static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
 	unsigned int dest;
+	cpumask_t tmp;
 
-	dest = set_desc_affinity(desc, mask);
-	if (dest == BAD_APICID)
+	cpus_and(tmp, mask, cpu_online_map);
+	if (cpus_empty(tmp))
 		return;
 
 	cfg = desc->chip_data;
+	if (assign_irq_vector(irq, cfg, mask))
+		return;
+
+	set_extra_move_desc(desc, mask);
+
+	cpus_and(tmp, cfg->domain, mask);
+	dest = cpu_mask_to_apicid(tmp);
 
 	target_ht_irq(irq, dest, cfg->vector);
+	desc->affinity = mask;
 }
 
 #endif
@@ -3727,14 +3738,17 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 {
 	struct irq_cfg *cfg;
 	int err;
+	cpumask_t tmp;
 
 	cfg = irq_cfg(irq);
-	err = assign_irq_vector(irq, cfg, TARGET_CPUS);
+	tmp = TARGET_CPUS;
+	err = assign_irq_vector(irq, cfg, tmp);
 	if (!err) {
 		struct ht_irq_msg msg;
 		unsigned dest;
 
-		dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
+		cpus_and(tmp, cfg->domain, tmp);
+		dest = cpu_mask_to_apicid(tmp);
 
 		msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
 
@@ -3770,7 +3784,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
 		       unsigned long mmr_offset)
 {
-	const struct cpumask *eligible_cpu = cpumask_of(cpu);
+	const cpumask_t *eligible_cpu = get_cpu_mask(cpu);
 	struct irq_cfg *cfg;
 	int mmr_pnode;
 	unsigned long mmr_value;
@@ -3780,7 +3794,7 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
 
 	cfg = irq_cfg(irq);
 
-	err = assign_irq_vector(irq, cfg, eligible_cpu);
+	err = assign_irq_vector(irq, cfg, *eligible_cpu);
 	if (err != 0)
 		return err;
 
@@ -3799,7 +3813,7 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
 	entry->polarity = 0;
 	entry->trigger = 0;
 	entry->mask = 0;
-	entry->dest = cpu_mask_to_apicid(eligible_cpu);
+	entry->dest = cpu_mask_to_apicid(*eligible_cpu);
 
 	mmr_pnode = uv_blade_to_pnode(mmr_blade);
 	uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
@@ -4010,7 +4024,7 @@ void __init setup_ioapic_dest(void)
 	int pin, ioapic, irq, irq_entry;
 	struct irq_desc *desc;
 	struct irq_cfg *cfg;
-	const struct cpumask *mask;
+	cpumask_t mask;
 
 	if (skip_ioapic_setup == 1)
 		return;
@@ -4041,7 +4055,7 @@ void __init setup_ioapic_dest(void)
 			 */
 			if (desc->status &
 			    (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
-				mask = &desc->affinity;
+				mask = desc->affinity;
 			else
 				mask = TARGET_CPUS;
 
diff --git a/trunk/arch/x86/kernel/ipi.c b/trunk/arch/x86/kernel/ipi.c
index 285bbf8831fa..f1c688e46f35 100644
--- a/trunk/arch/x86/kernel/ipi.c
+++ b/trunk/arch/x86/kernel/ipi.c
@@ -116,18 +116,18 @@ static inline void __send_IPI_dest_field(unsigned long mask, int vector)
 /*
  * This is only used on smaller machines.
  */
-void send_IPI_mask_bitmask(const struct cpumask *cpumask, int vector)
+void send_IPI_mask_bitmask(cpumask_t cpumask, int vector)
 {
-	unsigned long mask = cpumask_bits(cpumask)[0];
+	unsigned long mask = cpus_addr(cpumask)[0];
 	unsigned long flags;
 
 	local_irq_save(flags);
-	WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]);
+	WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]);
 	__send_IPI_dest_field(mask, vector);
 	local_irq_restore(flags);
 }
 
-void send_IPI_mask_sequence(const struct cpumask *mask, int vector)
+void send_IPI_mask_sequence(cpumask_t mask, int vector)
 {
 	unsigned long flags;
 	unsigned int query_cpu;
@@ -139,24 +139,12 @@ void send_IPI_mask_sequence(const struct cpumask *mask, int vector)
 	 */
 
 	local_irq_save(flags);
-	for_each_cpu(query_cpu, mask)
-		__send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), vector);
-	local_irq_restore(flags);
-}
-
-void send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
-{
-	unsigned long flags;
-	unsigned int query_cpu;
-	unsigned int this_cpu = smp_processor_id();
-
-	/* See Hack comment above */
-
-	local_irq_save(flags);
-	for_each_cpu(query_cpu, mask)
-		if (query_cpu != this_cpu)
+	for_each_possible_cpu(query_cpu) {
+		if (cpu_isset(query_cpu, mask)) {
 			__send_IPI_dest_field(cpu_to_logical_apicid(query_cpu),
 					      vector);
+		}
+	}
 	local_irq_restore(flags);
 }
 
diff --git a/trunk/arch/x86/kernel/irq.c b/trunk/arch/x86/kernel/irq.c
index bce53e1352a0..3f1d9d18df67 100644
--- a/trunk/arch/x86/kernel/irq.c
+++ b/trunk/arch/x86/kernel/irq.c
@@ -9,7 +9,6 @@
 #include <asm/apic.h>
 #include <asm/io_apic.h>
 #include <asm/smp.h>
-#include <asm/irq.h>
 
 atomic_t irq_err_count;
 
@@ -191,5 +190,3 @@ u64 arch_irq_stat(void)
 #endif
 	return sum;
 }
-
-EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq);
diff --git a/trunk/arch/x86/kernel/irq_32.c b/trunk/arch/x86/kernel/irq_32.c
index 9dc5588f336a..119fc9c8ff7f 100644
--- a/trunk/arch/x86/kernel/irq_32.c
+++ b/trunk/arch/x86/kernel/irq_32.c
@@ -233,28 +233,27 @@ unsigned int do_IRQ(struct pt_regs *regs)
 #ifdef CONFIG_HOTPLUG_CPU
 #include <mach_apic.h>
 
-/* A cpu has been removed from cpu_online_mask.  Reset irq affinities. */
-void fixup_irqs(void)
+void fixup_irqs(cpumask_t map)
 {
 	unsigned int irq;
 	static int warned;
 	struct irq_desc *desc;
 
 	for_each_irq_desc(irq, desc) {
-		const struct cpumask *affinity;
+		cpumask_t mask;
 
 		if (!desc)
 			continue;
 		if (irq == 2)
 			continue;
 
-		affinity = &desc->affinity;
-		if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
+		cpus_and(mask, desc->affinity, map);
+		if (any_online_cpu(mask) == NR_CPUS) {
 			printk("Breaking affinity for irq %i\n", irq);
-			affinity = cpu_all_mask;
+			mask = map;
 		}
 		if (desc->chip->set_affinity)
-			desc->chip->set_affinity(irq, affinity);
+			desc->chip->set_affinity(irq, mask);
 		else if (desc->action && !(warned++))
 			printk("Cannot set affinity for irq %i\n", irq);
 	}
diff --git a/trunk/arch/x86/kernel/irq_64.c b/trunk/arch/x86/kernel/irq_64.c
index 6383d50f82ea..a174a217eb1a 100644
--- a/trunk/arch/x86/kernel/irq_64.c
+++ b/trunk/arch/x86/kernel/irq_64.c
@@ -80,17 +80,16 @@ asmlinkage unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
-/* A cpu has been removed from cpu_online_mask.  Reset irq affinities. */
-void fixup_irqs(void)
+void fixup_irqs(cpumask_t map)
 {
 	unsigned int irq;
 	static int warned;
 	struct irq_desc *desc;
 
 	for_each_irq_desc(irq, desc) {
+		cpumask_t mask;
 		int break_affinity = 0;
 		int set_affinity = 1;
-		const struct cpumask *affinity;
 
 		if (!desc)
 			continue;
@@ -100,23 +99,23 @@ void fixup_irqs(void)
 		/* interrupt's are disabled at this point */
 		spin_lock(&desc->lock);
 
-		affinity = &desc->affinity;
 		if (!irq_has_action(irq) ||
-		    cpumask_equal(affinity, cpu_online_mask)) {
+		    cpus_equal(desc->affinity, map)) {
 			spin_unlock(&desc->lock);
 			continue;
 		}
 
-		if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
+		cpus_and(mask, desc->affinity, map);
+		if (cpus_empty(mask)) {
 			break_affinity = 1;
-			affinity = cpu_all_mask;
+			mask = map;
 		}
 
 		if (desc->chip->mask)
 			desc->chip->mask(irq);
 
 		if (desc->chip->set_affinity)
-			desc->chip->set_affinity(irq, affinity);
+			desc->chip->set_affinity(irq, mask);
 		else if (!(warned++))
 			set_affinity = 0;
 
diff --git a/trunk/arch/x86/kernel/irqinit_32.c b/trunk/arch/x86/kernel/irqinit_32.c
index 84723295f88a..203384ed2b5d 100644
--- a/trunk/arch/x86/kernel/irqinit_32.c
+++ b/trunk/arch/x86/kernel/irqinit_32.c
@@ -110,18 +110,6 @@ DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
 	[IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1
 };
 
-int vector_used_by_percpu_irq(unsigned int vector)
-{
-	int cpu;
-
-	for_each_online_cpu(cpu) {
-		if (per_cpu(vector_irq, cpu)[vector] != -1)
-			return 1;
-	}
-
-	return 0;
-}
-
 /* Overridden in paravirt.c */
 void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
 
@@ -158,12 +146,10 @@ void __init native_init_IRQ(void)
 	alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
 
 	/* IPI for single call function */
-	alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
-				 call_function_single_interrupt);
+	set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt);
 
 	/* Low priority IPI to cleanup after moving an irq */
 	set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
-	set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
 #endif
 
 #ifdef CONFIG_X86_LOCAL_APIC
diff --git a/trunk/arch/x86/kernel/irqinit_64.c b/trunk/arch/x86/kernel/irqinit_64.c
index 31ebfe38e96c..6190e6ef546c 100644
--- a/trunk/arch/x86/kernel/irqinit_64.c
+++ b/trunk/arch/x86/kernel/irqinit_64.c
@@ -69,18 +69,6 @@ DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
 	[IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1
 };
 
-int vector_used_by_percpu_irq(unsigned int vector)
-{
-	int cpu;
-
-	for_each_online_cpu(cpu) {
-		if (per_cpu(vector_irq, cpu)[vector] != -1)
-			return 1;
-	}
-
-	return 0;
-}
-
 void __init init_ISA_irqs(void)
 {
 	int i;
@@ -133,7 +121,6 @@ static void __init smp_intr_init(void)
 
 	/* Low priority IPI to cleanup after moving an irq */
 	set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
-	set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
 #endif
 }
 
diff --git a/trunk/arch/x86/kernel/kvmclock.c b/trunk/arch/x86/kernel/kvmclock.c
index 652fce6d2cce..e169ae9b6a62 100644
--- a/trunk/arch/x86/kernel/kvmclock.c
+++ b/trunk/arch/x86/kernel/kvmclock.c
@@ -89,17 +89,17 @@ static cycle_t kvm_clock_read(void)
  */
 static unsigned long kvm_get_tsc_khz(void)
 {
-	struct pvclock_vcpu_time_info *src;
-	src = &per_cpu(hv_clock, 0);
-	return pvclock_tsc_khz(src);
+	return preset_lpj;
 }
 
 static void kvm_get_preset_lpj(void)
 {
+	struct pvclock_vcpu_time_info *src;
 	unsigned long khz;
 	u64 lpj;
 
-	khz = kvm_get_tsc_khz();
+	src = &per_cpu(hv_clock, 0);
+	khz = pvclock_tsc_khz(src);
 
 	lpj = ((u64)khz * 1000);
 	do_div(lpj, HZ);
@@ -194,7 +194,5 @@ void __init kvmclock_init(void)
 #endif
 		kvm_get_preset_lpj();
 		clocksource_register(&kvm_clock);
-		pv_info.paravirt_enabled = 1;
-		pv_info.name = "KVM";
 	}
 }
diff --git a/trunk/arch/x86/kernel/ldt.c b/trunk/arch/x86/kernel/ldt.c
index 71f1d99a635d..eee32b43fee3 100644
--- a/trunk/arch/x86/kernel/ldt.c
+++ b/trunk/arch/x86/kernel/ldt.c
@@ -12,8 +12,8 @@
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/vmalloc.h>
-#include <linux/uaccess.h>
 
+#include <asm/uaccess.h>
 #include <asm/system.h>
 #include <asm/ldt.h>
 #include <asm/desc.h>
@@ -93,7 +93,7 @@ static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
 	if (err < 0)
 		return err;
 
-	for (i = 0; i < old->size; i++)
+	for(i = 0; i < old->size; i++)
 		write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE);
 	return 0;
 }
diff --git a/trunk/arch/x86/kernel/mfgpt_32.c b/trunk/arch/x86/kernel/mfgpt_32.c
index c12314c9e86f..3b599518c322 100644
--- a/trunk/arch/x86/kernel/mfgpt_32.c
+++ b/trunk/arch/x86/kernel/mfgpt_32.c
@@ -287,7 +287,7 @@ static struct clock_event_device mfgpt_clockevent = {
 	.set_mode = mfgpt_set_mode,
 	.set_next_event = mfgpt_next_event,
 	.rating = 250,
-	.cpumask = cpu_all_mask,
+	.cpumask = CPU_MASK_ALL,
 	.shift = 32
 };
 
diff --git a/trunk/arch/x86/kernel/mmconf-fam10h_64.c b/trunk/arch/x86/kernel/mmconf-fam10h_64.c
index 666e43df51f9..efc2f361fe85 100644
--- a/trunk/arch/x86/kernel/mmconf-fam10h_64.c
+++ b/trunk/arch/x86/kernel/mmconf-fam10h_64.c
@@ -13,7 +13,8 @@
 #include <asm/msr.h>
 #include <asm/acpi.h>
 #include <asm/mmconfig.h>
-#include <asm/pci_x86.h>
+
+#include "../pci/pci.h"
 
 struct pci_hostbridge_probe {
 	u32 bus;
diff --git a/trunk/arch/x86/kernel/mpparse.c b/trunk/arch/x86/kernel/mpparse.c
index c5c5b8df1dbc..45e3b69808ba 100644
--- a/trunk/arch/x86/kernel/mpparse.c
+++ b/trunk/arch/x86/kernel/mpparse.c
@@ -16,14 +16,14 @@
 #include <linux/bitops.h>
 #include <linux/acpi.h>
 #include <linux/module.h>
-#include <linux/smp.h>
-#include <linux/acpi.h>
 
+#include <asm/smp.h>
 #include <asm/mtrr.h>
 #include <asm/mpspec.h>
 #include <asm/pgalloc.h>
 #include <asm/io_apic.h>
 #include <asm/proto.h>
+#include <asm/acpi.h>
 #include <asm/bios_ebda.h>
 #include <asm/e820.h>
 #include <asm/trampoline.h>
@@ -95,8 +95,8 @@ static void __init MP_bus_info(struct mpc_config_bus *m)
 #endif
 
 	if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA) - 1) == 0) {
-		set_bit(m->mpc_busid, mp_bus_not_pci);
-#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
+		 set_bit(m->mpc_busid, mp_bus_not_pci);
+#if defined(CONFIG_EISA) || defined (CONFIG_MCA)
 		mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
 #endif
 	} else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) {
@@ -104,7 +104,7 @@ static void __init MP_bus_info(struct mpc_config_bus *m)
 			x86_quirks->mpc_oem_pci_bus(m);
 
 		clear_bit(m->mpc_busid, mp_bus_not_pci);
-#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
+#if defined(CONFIG_EISA) || defined (CONFIG_MCA)
 		mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
 	} else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) {
 		mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
diff --git a/trunk/arch/x86/kernel/nmi.c b/trunk/arch/x86/kernel/nmi.c
index 45a09ccdc214..8bd1bf9622a7 100644
--- a/trunk/arch/x86/kernel/nmi.c
+++ b/trunk/arch/x86/kernel/nmi.c
@@ -26,10 +26,11 @@
 #include <linux/kernel_stat.h>
 #include <linux/kdebug.h>
 #include <linux/smp.h>
-#include <linux/nmi.h>
 
 #include <asm/i8259.h>
 #include <asm/io_apic.h>
+#include <asm/smp.h>
+#include <asm/nmi.h>
 #include <asm/proto.h>
 #include <asm/timer.h>
 
diff --git a/trunk/arch/x86/kernel/pci-gart_64.c b/trunk/arch/x86/kernel/pci-gart_64.c
index 00c2bcd41463..a35eaa379ff6 100644
--- a/trunk/arch/x86/kernel/pci-gart_64.c
+++ b/trunk/arch/x86/kernel/pci-gart_64.c
@@ -52,7 +52,7 @@ static u32 *iommu_gatt_base;		/* Remapping table */
  * to trigger bugs with some popular PCI cards, in particular 3ware (but
  * has been also also seen with Qlogic at least).
  */
-static int iommu_fullflush = 1;
+int iommu_fullflush = 1;
 
 /* Allocation bitmap for the remapping area: */
 static DEFINE_SPINLOCK(iommu_bitmap_lock);
diff --git a/trunk/arch/x86/kernel/reboot.c b/trunk/arch/x86/kernel/reboot.c
index bf088c61fa40..61f718df6eec 100644
--- a/trunk/arch/x86/kernel/reboot.c
+++ b/trunk/arch/x86/kernel/reboot.c
@@ -12,8 +12,6 @@
 #include <asm/proto.h>
 #include <asm/reboot_fixups.h>
 #include <asm/reboot.h>
-#include <asm/pci_x86.h>
-#include <asm/virtext.h>
 
 #ifdef CONFIG_X86_32
 # include <linux/dmi.h>
@@ -25,6 +23,7 @@
 
 #include <mach_ipi.h>
 
+
 /*
  * Power off function, if any
  */
@@ -40,12 +39,6 @@ int reboot_force;
 static int reboot_cpu = -1;
 #endif
 
-/* This is set if we need to go through the 'emergency' path.
- * When machine_emergency_restart() is called, we may be on
- * an inconsistent state and won't be able to do a clean cleanup
- */
-static int reboot_emergency;
-
 /* This is set by the PCI code if either type 1 or type 2 PCI is detected */
 bool port_cf9_safe = false;
 
@@ -375,48 +368,6 @@ static inline void kb_wait(void)
 	}
 }
 
-static void vmxoff_nmi(int cpu, struct die_args *args)
-{
-	cpu_emergency_vmxoff();
-}
-
-/* Use NMIs as IPIs to tell all CPUs to disable virtualization
- */
-static void emergency_vmx_disable_all(void)
-{
-	/* Just make sure we won't change CPUs while doing this */
-	local_irq_disable();
-
-	/* We need to disable VMX on all CPUs before rebooting, otherwise
-	 * we risk hanging up the machine, because the CPU ignore INIT
-	 * signals when VMX is enabled.
-	 *
-	 * We can't take any locks and we may be on an inconsistent
-	 * state, so we use NMIs as IPIs to tell the other CPUs to disable
-	 * VMX and halt.
-	 *
-	 * For safety, we will avoid running the nmi_shootdown_cpus()
-	 * stuff unnecessarily, but we don't have a way to check
-	 * if other CPUs have VMX enabled. So we will call it only if the
-	 * CPU we are running on has VMX enabled.
-	 *
-	 * We will miss cases where VMX is not enabled on all CPUs. This
-	 * shouldn't do much harm because KVM always enable VMX on all
-	 * CPUs anyway. But we can miss it on the small window where KVM
-	 * is still enabling VMX.
-	 */
-	if (cpu_has_vmx() && cpu_vmx_enabled()) {
-		/* Disable VMX on this CPU.
-		 */
-		cpu_vmxoff();
-
-		/* Halt and disable VMX on the other CPUs */
-		nmi_shootdown_cpus(vmxoff_nmi);
-
-	}
-}
-
-
 void __attribute__((weak)) mach_reboot_fixups(void)
 {
 }
@@ -425,9 +376,6 @@ static void native_machine_emergency_restart(void)
 {
 	int i;
 
-	if (reboot_emergency)
-		emergency_vmx_disable_all();
-
 	/* Tell the BIOS if we want cold or warm reboot */
 	*((unsigned short *)__va(0x472)) = reboot_mode;
 
@@ -534,19 +482,13 @@ void native_machine_shutdown(void)
 #endif
 }
 
-static void __machine_emergency_restart(int emergency)
-{
-	reboot_emergency = emergency;
-	machine_ops.emergency_restart();
-}
-
 static void native_machine_restart(char *__unused)
 {
 	printk("machine restart\n");
 
 	if (!reboot_force)
 		machine_shutdown();
-	__machine_emergency_restart(0);
+	machine_emergency_restart();
 }
 
 static void native_machine_halt(void)
@@ -590,7 +532,7 @@ void machine_shutdown(void)
 
 void machine_emergency_restart(void)
 {
-	__machine_emergency_restart(1);
+	machine_ops.emergency_restart();
 }
 
 void machine_restart(char *cmd)
@@ -650,7 +592,10 @@ static int crash_nmi_callback(struct notifier_block *self,
 
 static void smp_send_nmi_allbutself(void)
 {
-	send_IPI_allbutself(NMI_VECTOR);
+	cpumask_t mask = cpu_online_map;
+	cpu_clear(safe_smp_processor_id(), mask);
+	if (!cpus_empty(mask))
+		send_IPI_mask(mask, NMI_VECTOR);
 }
 
 static struct notifier_block crash_nmi_nb = {
diff --git a/trunk/arch/x86/kernel/setup_percpu.c b/trunk/arch/x86/kernel/setup_percpu.c
index 0b63b08e7530..ae0c0d3bb770 100644
--- a/trunk/arch/x86/kernel/setup_percpu.c
+++ b/trunk/arch/x86/kernel/setup_percpu.c
@@ -152,11 +152,6 @@ void __init setup_per_cpu_areas(void)
 	old_size = PERCPU_ENOUGH_ROOM;
 	align = max_t(unsigned long, PAGE_SIZE, align);
 	size = roundup(old_size, align);
-
-	printk(KERN_INFO
-		"NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n",
-		NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids);
-
 	printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n",
 			  size);
 
@@ -173,24 +168,24 @@ void __init setup_per_cpu_areas(void)
 			       "cpu %d has no node %d or node-local memory\n",
 				cpu, node);
 			if (ptr)
-				printk(KERN_DEBUG
-					"per cpu data for cpu%d at %016lx\n",
+				printk(KERN_DEBUG "per cpu data for cpu%d at %016lx\n",
 					 cpu, __pa(ptr));
 		}
 		else {
 			ptr = __alloc_bootmem_node(NODE_DATA(node), size, align,
 							__pa(MAX_DMA_ADDRESS));
 			if (ptr)
-				printk(KERN_DEBUG
-					"per cpu data for cpu%d on node%d "
-					"at %016lx\n",
-					cpu, node, __pa(ptr));
+				printk(KERN_DEBUG "per cpu data for cpu%d on node%d at %016lx\n",
+					 cpu, node, __pa(ptr));
 		}
 #endif
 		per_cpu_offset(cpu) = ptr - __per_cpu_start;
 		memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
 	}
 
+	printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n",
+		NR_CPUS, nr_cpu_ids, nr_node_ids);
+
 	/* Setup percpu data maps */
 	setup_per_cpu_maps();
 
@@ -287,7 +282,7 @@ static void __cpuinit numa_set_cpumask(int cpu, int enable)
 	else
 		cpu_clear(cpu, *mask);
 
-	cpulist_scnprintf(buf, sizeof(buf), mask);
+	cpulist_scnprintf(buf, sizeof(buf), *mask);
 	printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
 		enable? "numa_add_cpu":"numa_remove_cpu", cpu, node, buf);
  }
diff --git a/trunk/arch/x86/kernel/smp.c b/trunk/arch/x86/kernel/smp.c
index beea2649a240..7e558db362c1 100644
--- a/trunk/arch/x86/kernel/smp.c
+++ b/trunk/arch/x86/kernel/smp.c
@@ -118,22 +118,22 @@ static void native_smp_send_reschedule(int cpu)
 		WARN_ON(1);
 		return;
 	}
-	send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR);
+	send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
 }
 
 void native_send_call_func_single_ipi(int cpu)
 {
-	send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR);
+	send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_SINGLE_VECTOR);
 }
 
-void native_send_call_func_ipi(const struct cpumask *mask)
+void native_send_call_func_ipi(cpumask_t mask)
 {
 	cpumask_t allbutself;
 
 	allbutself = cpu_online_map;
 	cpu_clear(smp_processor_id(), allbutself);
 
-	if (cpus_equal(*mask, allbutself) &&
+	if (cpus_equal(mask, allbutself) &&
 	    cpus_equal(cpu_online_map, cpu_callout_map))
 		send_IPI_allbutself(CALL_FUNCTION_VECTOR);
 	else
diff --git a/trunk/arch/x86/kernel/smpboot.c b/trunk/arch/x86/kernel/smpboot.c
index 31869bf5fabd..f8500c969442 100644
--- a/trunk/arch/x86/kernel/smpboot.c
+++ b/trunk/arch/x86/kernel/smpboot.c
@@ -102,8 +102,14 @@ EXPORT_SYMBOL(smp_num_siblings);
 /* Last level cache ID of each logical CPU */
 DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID;
 
+/* bitmap of online cpus */
+cpumask_t cpu_online_map __read_mostly;
+EXPORT_SYMBOL(cpu_online_map);
+
 cpumask_t cpu_callin_map;
 cpumask_t cpu_callout_map;
+cpumask_t cpu_possible_map;
+EXPORT_SYMBOL(cpu_possible_map);
 
 /* representing HT siblings of each logical CPU */
 DEFINE_PER_CPU(cpumask_t, cpu_sibling_map);
@@ -1254,15 +1260,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
 	check_nmi_watchdog();
 }
 
-static int __initdata setup_possible_cpus = -1;
-static int __init _setup_possible_cpus(char *str)
-{
-	get_option(&str, &setup_possible_cpus);
-	return 0;
-}
-early_param("possible_cpus", _setup_possible_cpus);
-
-
 /*
  * cpu_possible_map should be static, it cannot change as cpu's
  * are onlined, or offlined. The reason is per-cpu data-structures
@@ -1275,7 +1272,7 @@ early_param("possible_cpus", _setup_possible_cpus);
  *
  * Three ways to find out the number of additional hotplug CPUs:
  * - If the BIOS specified disabled CPUs in ACPI/mptables use that.
- * - The user can overwrite it with possible_cpus=NUM
+ * - The user can overwrite it with additional_cpus=NUM
  * - Otherwise don't reserve additional CPUs.
  * We do this because additional CPUs waste a lot of memory.
  * -AK
@@ -1288,17 +1285,9 @@ __init void prefill_possible_map(void)
 	if (!num_processors)
 		num_processors = 1;
 
-	if (setup_possible_cpus == -1)
-		possible = num_processors + disabled_cpus;
-	else
-		possible = setup_possible_cpus;
-
-	if (possible > CONFIG_NR_CPUS) {
-		printk(KERN_WARNING
-			"%d Processors exceeds NR_CPUS limit of %d\n",
-			possible, CONFIG_NR_CPUS);
-		possible = CONFIG_NR_CPUS;
-	}
+	possible = num_processors + disabled_cpus;
+	if (possible > NR_CPUS)
+		possible = NR_CPUS;
 
 	printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n",
 		possible, max_t(int, possible - num_processors, 0));
@@ -1363,7 +1352,7 @@ void cpu_disable_common(void)
 	lock_vector_lock();
 	remove_cpu_from_maps(cpu);
 	unlock_vector_lock();
-	fixup_irqs();
+	fixup_irqs(cpu_online_map);
 }
 
 int native_cpu_disable(void)
diff --git a/trunk/arch/x86/kernel/tlb_32.c b/trunk/arch/x86/kernel/tlb_32.c
index ce5054642247..8da059f949be 100644
--- a/trunk/arch/x86/kernel/tlb_32.c
+++ b/trunk/arch/x86/kernel/tlb_32.c
@@ -163,7 +163,7 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
 	 * We have to send the IPI only to
 	 * CPUs affected.
 	 */
-	send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR);
+	send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR);
 
 	while (!cpus_empty(flush_cpumask))
 		/* nothing. lockup detection does not belong here */
diff --git a/trunk/arch/x86/kernel/tlb_64.c b/trunk/arch/x86/kernel/tlb_64.c
index f8be6f1d2e48..29887d7081a9 100644
--- a/trunk/arch/x86/kernel/tlb_64.c
+++ b/trunk/arch/x86/kernel/tlb_64.c
@@ -191,7 +191,7 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
 	 * We have to send the IPI only to
 	 * CPUs affected.
 	 */
-	send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR_START + sender);
+	send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender);
 
 	while (!cpus_empty(f->flush_cpumask))
 		cpu_relax();
diff --git a/trunk/arch/x86/kernel/tlb_uv.c b/trunk/arch/x86/kernel/tlb_uv.c
index f885023167e0..6a00e5faaa74 100644
--- a/trunk/arch/x86/kernel/tlb_uv.c
+++ b/trunk/arch/x86/kernel/tlb_uv.c
@@ -582,6 +582,7 @@ static int __init uv_ptc_init(void)
 static struct bau_control * __init uv_table_bases_init(int blade, int node)
 {
 	int i;
+	int *ip;
 	struct bau_msg_status *msp;
 	struct bau_control *bau_tabp;
 
@@ -598,6 +599,13 @@ static struct bau_control * __init uv_table_bases_init(int blade, int node)
 		bau_cpubits_clear(&msp->seen_by, (int)
 				  uv_blade_nr_possible_cpus(blade));
 
+	bau_tabp->watching =
+	    kmalloc_node(sizeof(int) * DEST_NUM_RESOURCES, GFP_KERNEL, node);
+	BUG_ON(!bau_tabp->watching);
+
+	for (i = 0, ip = bau_tabp->watching; i < DEST_Q_SIZE; i++, ip++)
+		*ip = 0;
+
 	uv_bau_table_bases[blade] = bau_tabp;
 
 	return bau_tabp;
@@ -620,6 +628,7 @@ uv_table_bases_finish(int blade, int node, int cur_cpu,
 		bcp->bau_msg_head	= bau_tablesp->va_queue_first;
 		bcp->va_queue_first	= bau_tablesp->va_queue_first;
 		bcp->va_queue_last	= bau_tablesp->va_queue_last;
+		bcp->watching		= bau_tablesp->watching;
 		bcp->msg_statuses	= bau_tablesp->msg_statuses;
 		bcp->descriptor_base	= adp;
 	}
diff --git a/trunk/arch/x86/kernel/traps.c b/trunk/arch/x86/kernel/traps.c
index ce6650eb64e9..141907ab6e22 100644
--- a/trunk/arch/x86/kernel/traps.c
+++ b/trunk/arch/x86/kernel/traps.c
@@ -72,6 +72,9 @@
 
 #include "cpu/mcheck/mce.h"
 
+DECLARE_BITMAP(used_vectors, NR_VECTORS);
+EXPORT_SYMBOL_GPL(used_vectors);
+
 asmlinkage int system_call(void);
 
 /* Do we ignore FPU interrupts ? */
@@ -86,9 +89,6 @@ gate_desc idt_table[256]
 	__attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, };
 #endif
 
-DECLARE_BITMAP(used_vectors, NR_VECTORS);
-EXPORT_SYMBOL_GPL(used_vectors);
-
 static int ignore_nmis;
 
 static inline void conditional_sti(struct pt_regs *regs)
@@ -292,10 +292,8 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
 	tsk->thread.error_code = error_code;
 	tsk->thread.trap_no = 8;
 
-	/*
-	 * This is always a kernel trap and never fixable (and thus must
-	 * never return).
-	 */
+	/* This is always a kernel trap and never fixable (and thus must
+	   never return). */
 	for (;;)
 		die(str, regs, error_code);
 }
@@ -522,11 +520,9 @@ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
 }
 
 #ifdef CONFIG_X86_64
-/*
- * Help handler running on IST stack to switch back to user stack
- * for scheduling or signal handling. The actual stack switch is done in
- * entry.S
- */
+/* Help handler running on IST stack to switch back to user stack
+   for scheduling or signal handling. The actual stack switch is done in
+   entry.S */
 asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
 {
 	struct pt_regs *regs = eregs;
@@ -536,10 +532,8 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
 	/* Exception from user space */
 	else if (user_mode(eregs))
 		regs = task_pt_regs(current);
-	/*
-	 * Exception from kernel and interrupts are enabled. Move to
-	 * kernel process stack.
-	 */
+	/* Exception from kernel and interrupts are enabled. Move to
+	   kernel process stack. */
 	else if (eregs->flags & X86_EFLAGS_IF)
 		regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs));
 	if (eregs != regs)
@@ -691,7 +685,12 @@ void math_error(void __user *ip)
 	cwd = get_fpu_cwd(task);
 	swd = get_fpu_swd(task);
 
-	err = swd & ~cwd;
+	err = swd & ~cwd & 0x3f;
+
+#ifdef CONFIG_X86_32
+	if (!err)
+		return;
+#endif
 
 	if (err & 0x001) {	/* Invalid op */
 		/*
@@ -709,11 +708,7 @@ void math_error(void __user *ip)
 	} else if (err & 0x020) { /* Precision */
 		info.si_code = FPE_FLTRES;
 	} else {
-		/*
-		 * If we're using IRQ 13, or supposedly even some trap 16
-		 * implementations, it's possible we get a spurious trap...
-		 */
-		return;		/* Spurious trap, no error */
+		info.si_code = __SI_FAULT|SI_KERNEL; /* WTF? */
 	}
 	force_sig_info(SIGFPE, &info, task);
 }
@@ -946,7 +941,9 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
 
 void __init trap_init(void)
 {
+#ifdef CONFIG_X86_32
 	int i;
+#endif
 
 #ifdef CONFIG_EISA
 	void __iomem *p = early_ioremap(0x0FFFD9, 4);
@@ -1003,15 +1000,11 @@ void __init trap_init(void)
 	}
 
 	set_system_trap_gate(SYSCALL_VECTOR, &system_call);
-#endif
 
 	/* Reserve all the builtin and the syscall vector: */
 	for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
 		set_bit(i, used_vectors);
 
-#ifdef CONFIG_X86_64
-	set_bit(IA32_SYSCALL_VECTOR, used_vectors);
-#else
 	set_bit(SYSCALL_VECTOR, used_vectors);
 #endif
 	/*
diff --git a/trunk/arch/x86/kernel/vmiclock_32.c b/trunk/arch/x86/kernel/vmiclock_32.c
index c4c1f9e09402..254ee07f8635 100644
--- a/trunk/arch/x86/kernel/vmiclock_32.c
+++ b/trunk/arch/x86/kernel/vmiclock_32.c
@@ -226,7 +226,7 @@ static void __devinit vmi_time_init_clockevent(void)
 	/* Upper bound is clockevent's use of ulong for cycle deltas. */
 	evt->max_delta_ns = clockevent_delta2ns(ULONG_MAX, evt);
 	evt->min_delta_ns = clockevent_delta2ns(1, evt);
-	evt->cpumask = cpumask_of(cpu);
+	evt->cpumask = cpumask_of_cpu(cpu);
 
 	printk(KERN_WARNING "vmi: registering clock event %s. mult=%lu shift=%u\n",
 	       evt->name, evt->mult, evt->shift);
diff --git a/trunk/arch/x86/kernel/xsave.c b/trunk/arch/x86/kernel/xsave.c
index 2b54fe002e94..15c3e6999182 100644
--- a/trunk/arch/x86/kernel/xsave.c
+++ b/trunk/arch/x86/kernel/xsave.c
@@ -159,7 +159,7 @@ int save_i387_xstate(void __user *buf)
  * Restore the extended state if present. Otherwise, restore the FP/SSE
  * state.
  */
-static int restore_user_xstate(void __user *buf)
+int restore_user_xstate(void __user *buf)
 {
 	struct _fpx_sw_bytes fx_sw_user;
 	u64 mask;
diff --git a/trunk/arch/x86/kvm/i8254.c b/trunk/arch/x86/kvm/i8254.c
index e665d1c623ca..59ebd37ad79e 100644
--- a/trunk/arch/x86/kvm/i8254.c
+++ b/trunk/arch/x86/kvm/i8254.c
@@ -603,29 +603,10 @@ void kvm_free_pit(struct kvm *kvm)
 
 static void __inject_pit_timer_intr(struct kvm *kvm)
 {
-	struct kvm_vcpu *vcpu;
-	int i;
-
 	mutex_lock(&kvm->lock);
 	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1);
 	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0);
 	mutex_unlock(&kvm->lock);
-
-	/*
-	 * Provides NMI watchdog support via Virtual Wire mode.
-	 * The route is: PIT -> PIC -> LVT0 in NMI mode.
-	 *
-	 * Note: Our Virtual Wire implementation is simplified, only
-	 * propagating PIT interrupts to all VCPUs when they have set
-	 * LVT0 to NMI delivery. Other PIC interrupts are just sent to
-	 * VCPU0, and only if its LVT0 is in EXTINT mode.
-	 */
-	if (kvm->arch.vapics_in_nmi_mode > 0)
-		for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-			vcpu = kvm->vcpus[i];
-			if (vcpu)
-				kvm_apic_nmi_wd_deliver(vcpu);
-		}
 }
 
 void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
diff --git a/trunk/arch/x86/kvm/i8259.c b/trunk/arch/x86/kvm/i8259.c
index 179dcb0103fd..17e41e165f1a 100644
--- a/trunk/arch/x86/kvm/i8259.c
+++ b/trunk/arch/x86/kvm/i8259.c
@@ -26,40 +26,10 @@
  *   Port from Qemu.
  */
 #include <linux/mm.h>
-#include <linux/bitops.h>
 #include "irq.h"
 
 #include <linux/kvm_host.h>
 
-static void pic_lock(struct kvm_pic *s)
-{
-	spin_lock(&s->lock);
-}
-
-static void pic_unlock(struct kvm_pic *s)
-{
-	struct kvm *kvm = s->kvm;
-	unsigned acks = s->pending_acks;
-	bool wakeup = s->wakeup_needed;
-	struct kvm_vcpu *vcpu;
-
-	s->pending_acks = 0;
-	s->wakeup_needed = false;
-
-	spin_unlock(&s->lock);
-
-	while (acks) {
-		kvm_notify_acked_irq(kvm, __ffs(acks));
-		acks &= acks - 1;
-	}
-
-	if (wakeup) {
-		vcpu = s->kvm->vcpus[0];
-		if (vcpu)
-			kvm_vcpu_kick(vcpu);
-	}
-}
-
 static void pic_clear_isr(struct kvm_kpic_state *s, int irq)
 {
 	s->isr &= ~(1 << irq);
@@ -166,21 +136,17 @@ static void pic_update_irq(struct kvm_pic *s)
 
 void kvm_pic_update_irq(struct kvm_pic *s)
 {
-	pic_lock(s);
 	pic_update_irq(s);
-	pic_unlock(s);
 }
 
 void kvm_pic_set_irq(void *opaque, int irq, int level)
 {
 	struct kvm_pic *s = opaque;
 
-	pic_lock(s);
 	if (irq >= 0 && irq < PIC_NUM_PINS) {
 		pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
 		pic_update_irq(s);
 	}
-	pic_unlock(s);
 }
 
 /*
@@ -206,7 +172,6 @@ int kvm_pic_read_irq(struct kvm *kvm)
 	int irq, irq2, intno;
 	struct kvm_pic *s = pic_irqchip(kvm);
 
-	pic_lock(s);
 	irq = pic_get_irq(&s->pics[0]);
 	if (irq >= 0) {
 		pic_intack(&s->pics[0], irq);
@@ -231,7 +196,6 @@ int kvm_pic_read_irq(struct kvm *kvm)
 		intno = s->pics[0].irq_base + irq;
 	}
 	pic_update_irq(s);
-	pic_unlock(s);
 	kvm_notify_acked_irq(kvm, irq);
 
 	return intno;
@@ -239,7 +203,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
 
 void kvm_pic_reset(struct kvm_kpic_state *s)
 {
-	int irq, irqbase, n;
+	int irq, irqbase;
 	struct kvm *kvm = s->pics_state->irq_request_opaque;
 	struct kvm_vcpu *vcpu0 = kvm->vcpus[0];
 
@@ -250,10 +214,8 @@ void kvm_pic_reset(struct kvm_kpic_state *s)
 
 	for (irq = 0; irq < PIC_NUM_PINS/2; irq++) {
 		if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0))
-			if (s->irr & (1 << irq) || s->isr & (1 << irq)) {
-				n = irq + irqbase;
-				s->pics_state->pending_acks |= 1 << n;
-			}
+			if (s->irr & (1 << irq) || s->isr & (1 << irq))
+				kvm_notify_acked_irq(kvm, irq+irqbase);
 	}
 	s->last_irr = 0;
 	s->irr = 0;
@@ -444,7 +406,6 @@ static void picdev_write(struct kvm_io_device *this,
 			printk(KERN_ERR "PIC: non byte write\n");
 		return;
 	}
-	pic_lock(s);
 	switch (addr) {
 	case 0x20:
 	case 0x21:
@@ -457,7 +418,6 @@ static void picdev_write(struct kvm_io_device *this,
 		elcr_ioport_write(&s->pics[addr & 1], addr, data);
 		break;
 	}
-	pic_unlock(s);
 }
 
 static void picdev_read(struct kvm_io_device *this,
@@ -471,7 +431,6 @@ static void picdev_read(struct kvm_io_device *this,
 			printk(KERN_ERR "PIC: non byte read\n");
 		return;
 	}
-	pic_lock(s);
 	switch (addr) {
 	case 0x20:
 	case 0x21:
@@ -485,7 +444,6 @@ static void picdev_read(struct kvm_io_device *this,
 		break;
 	}
 	*(unsigned char *)val = data;
-	pic_unlock(s);
 }
 
 /*
@@ -501,7 +459,7 @@ static void pic_irq_request(void *opaque, int level)
 	s->output = level;
 	if (vcpu && level && (s->pics[0].isr_ack & (1 << irq))) {
 		s->pics[0].isr_ack &= ~(1 << irq);
-		s->wakeup_needed = true;
+		kvm_vcpu_kick(vcpu);
 	}
 }
 
@@ -511,8 +469,6 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm)
 	s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL);
 	if (!s)
 		return NULL;
-	spin_lock_init(&s->lock);
-	s->kvm = kvm;
 	s->pics[0].elcr_mask = 0xf8;
 	s->pics[1].elcr_mask = 0xde;
 	s->irq_request = pic_irq_request;
diff --git a/trunk/arch/x86/kvm/irq.h b/trunk/arch/x86/kvm/irq.h
index 2bf32a03ceec..f17c8f5bbf31 100644
--- a/trunk/arch/x86/kvm/irq.h
+++ b/trunk/arch/x86/kvm/irq.h
@@ -25,7 +25,6 @@
 #include <linux/mm_types.h>
 #include <linux/hrtimer.h>
 #include <linux/kvm_host.h>
-#include <linux/spinlock.h>
 
 #include "iodev.h"
 #include "ioapic.h"
@@ -60,10 +59,6 @@ struct kvm_kpic_state {
 };
 
 struct kvm_pic {
-	spinlock_t lock;
-	bool wakeup_needed;
-	unsigned pending_acks;
-	struct kvm *kvm;
 	struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */
 	irq_request_func *irq_request;
 	void *irq_request_opaque;
@@ -92,7 +87,6 @@ void kvm_pic_reset(struct kvm_kpic_state *s);
 void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
 void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
 void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
-void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu);
 void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu);
 void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu);
 void __kvm_migrate_timers(struct kvm_vcpu *vcpu);
diff --git a/trunk/arch/x86/kvm/kvm_svm.h b/trunk/arch/x86/kvm/kvm_svm.h
index 8e5ee99551f6..65ef0fc2c036 100644
--- a/trunk/arch/x86/kvm/kvm_svm.h
+++ b/trunk/arch/x86/kvm/kvm_svm.h
@@ -7,7 +7,7 @@
 #include <linux/kvm_host.h>
 #include <asm/msr.h>
 
-#include <asm/svm.h>
+#include "svm.h"
 
 static const u32 host_save_user_msrs[] = {
 #ifdef CONFIG_X86_64
diff --git a/trunk/arch/x86/kvm/lapic.c b/trunk/arch/x86/kvm/lapic.c
index afac68c0815c..0fc3cab48943 100644
--- a/trunk/arch/x86/kvm/lapic.c
+++ b/trunk/arch/x86/kvm/lapic.c
@@ -130,11 +130,6 @@ static inline int apic_lvtt_period(struct kvm_lapic *apic)
 	return apic_get_reg(apic, APIC_LVTT) & APIC_LVT_TIMER_PERIODIC;
 }
 
-static inline int apic_lvt_nmi_mode(u32 lvt_val)
-{
-	return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI;
-}
-
 static unsigned int apic_lvt_mask[APIC_LVT_NUM] = {
 	LVT_MASK | APIC_LVT_TIMER_PERIODIC,	/* LVTT */
 	LVT_MASK | APIC_MODE_MASK,	/* LVTTHMR */
@@ -359,7 +354,6 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 
 	case APIC_DM_NMI:
 		kvm_inject_nmi(vcpu);
-		kvm_vcpu_kick(vcpu);
 		break;
 
 	case APIC_DM_INIT:
@@ -386,14 +380,6 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 		}
 		break;
 
-	case APIC_DM_EXTINT:
-		/*
-		 * Should only be called by kvm_apic_local_deliver() with LVT0,
-		 * before NMI watchdog was enabled. Already handled by
-		 * kvm_apic_accept_pic_intr().
-		 */
-		break;
-
 	default:
 		printk(KERN_ERR "TODO: unsupported delivery mode %x\n",
 		       delivery_mode);
@@ -677,20 +663,6 @@ static void start_apic_timer(struct kvm_lapic *apic)
 					apic->timer.period)));
 }
 
-static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
-{
-	int nmi_wd_enabled = apic_lvt_nmi_mode(apic_get_reg(apic, APIC_LVT0));
-
-	if (apic_lvt_nmi_mode(lvt0_val)) {
-		if (!nmi_wd_enabled) {
-			apic_debug("Receive NMI setting on APIC_LVT0 "
-				   "for cpu %d\n", apic->vcpu->vcpu_id);
-			apic->vcpu->kvm->arch.vapics_in_nmi_mode++;
-		}
-	} else if (nmi_wd_enabled)
-		apic->vcpu->kvm->arch.vapics_in_nmi_mode--;
-}
-
 static void apic_mmio_write(struct kvm_io_device *this,
 			    gpa_t address, int len, const void *data)
 {
@@ -771,11 +743,10 @@ static void apic_mmio_write(struct kvm_io_device *this,
 		apic_set_reg(apic, APIC_ICR2, val & 0xff000000);
 		break;
 
-	case APIC_LVT0:
-		apic_manage_nmi_watchdog(apic, val);
 	case APIC_LVTT:
 	case APIC_LVTTHMR:
 	case APIC_LVTPC:
+	case APIC_LVT0:
 	case APIC_LVT1:
 	case APIC_LVTERR:
 		/* TODO: Check vector */
@@ -990,26 +961,12 @@ int apic_has_pending_timer(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
-static int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
-{
-	u32 reg = apic_get_reg(apic, lvt_type);
-	int vector, mode, trig_mode;
-
-	if (apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) {
-		vector = reg & APIC_VECTOR_MASK;
-		mode = reg & APIC_MODE_MASK;
-		trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
-		return __apic_accept_irq(apic, mode, vector, 1, trig_mode);
-	}
-	return 0;
-}
-
-void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu)
+static int __inject_apic_timer_irq(struct kvm_lapic *apic)
 {
-	struct kvm_lapic *apic = vcpu->arch.apic;
+	int vector;
 
-	if (apic)
-		kvm_apic_local_deliver(apic, APIC_LVT0);
+	vector = apic_lvt_vector(apic, APIC_LVTT);
+	return __apic_accept_irq(apic, APIC_DM_FIXED, vector, 1, 0);
 }
 
 static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
@@ -1104,8 +1061,9 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
 
-	if (apic && atomic_read(&apic->timer.pending) > 0) {
-		if (kvm_apic_local_deliver(apic, APIC_LVTT))
+	if (apic && apic_lvt_enabled(apic, APIC_LVTT) &&
+		atomic_read(&apic->timer.pending) > 0) {
+		if (__inject_apic_timer_irq(apic))
 			atomic_dec(&apic->timer.pending);
 	}
 }
diff --git a/trunk/arch/x86/kvm/mmu.c b/trunk/arch/x86/kvm/mmu.c
index 83f11c7474a1..410ddbc1aa2e 100644
--- a/trunk/arch/x86/kvm/mmu.c
+++ b/trunk/arch/x86/kvm/mmu.c
@@ -17,6 +17,7 @@
  *
  */
 
+#include "vmx.h"
 #include "mmu.h"
 
 #include <linux/kvm_host.h>
@@ -32,7 +33,6 @@
 #include <asm/page.h>
 #include <asm/cmpxchg.h>
 #include <asm/io.h>
-#include <asm/vmx.h>
 
 /*
  * When setting this variable to true it enables Two-Dimensional-Paging
@@ -168,7 +168,6 @@ static u64 __read_mostly shadow_x_mask;	/* mutual exclusive with nx_mask */
 static u64 __read_mostly shadow_user_mask;
 static u64 __read_mostly shadow_accessed_mask;
 static u64 __read_mostly shadow_dirty_mask;
-static u64 __read_mostly shadow_mt_mask;
 
 void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte)
 {
@@ -184,14 +183,13 @@ void kvm_mmu_set_base_ptes(u64 base_pte)
 EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes);
 
 void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
-		u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 mt_mask)
+		u64 dirty_mask, u64 nx_mask, u64 x_mask)
 {
 	shadow_user_mask = user_mask;
 	shadow_accessed_mask = accessed_mask;
 	shadow_dirty_mask = dirty_mask;
 	shadow_nx_mask = nx_mask;
 	shadow_x_mask = x_mask;
-	shadow_mt_mask = mt_mask;
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
 
@@ -386,9 +384,7 @@ static void account_shadowed(struct kvm *kvm, gfn_t gfn)
 {
 	int *write_count;
 
-	gfn = unalias_gfn(kvm, gfn);
-	write_count = slot_largepage_idx(gfn,
-					 gfn_to_memslot_unaliased(kvm, gfn));
+	write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn));
 	*write_count += 1;
 }
 
@@ -396,20 +392,16 @@ static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn)
 {
 	int *write_count;
 
-	gfn = unalias_gfn(kvm, gfn);
-	write_count = slot_largepage_idx(gfn,
-					 gfn_to_memslot_unaliased(kvm, gfn));
+	write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn));
 	*write_count -= 1;
 	WARN_ON(*write_count < 0);
 }
 
 static int has_wrprotected_page(struct kvm *kvm, gfn_t gfn)
 {
-	struct kvm_memory_slot *slot;
+	struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
 	int *largepage_idx;
 
-	gfn = unalias_gfn(kvm, gfn);
-	slot = gfn_to_memslot_unaliased(kvm, gfn);
 	if (slot) {
 		largepage_idx = slot_largepage_idx(gfn, slot);
 		return *largepage_idx;
@@ -621,7 +613,7 @@ static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte)
 	return NULL;
 }
 
-static int rmap_write_protect(struct kvm *kvm, u64 gfn)
+static void rmap_write_protect(struct kvm *kvm, u64 gfn)
 {
 	unsigned long *rmapp;
 	u64 *spte;
@@ -667,7 +659,8 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn)
 		spte = rmap_next(kvm, rmapp, spte);
 	}
 
-	return write_protected;
+	if (write_protected)
+		kvm_flush_remote_tlbs(kvm);
 }
 
 static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp)
@@ -793,11 +786,9 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
 	sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE);
 	set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
 	list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
-	INIT_LIST_HEAD(&sp->oos_link);
 	ASSERT(is_empty_shadow_page(sp->spt));
-	bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
+	sp->slot_bitmap = 0;
 	sp->multimapped = 0;
-	sp->global = 1;
 	sp->parent_pte = parent_pte;
 	--vcpu->kvm->arch.n_free_mmu_pages;
 	return sp;
@@ -909,9 +900,8 @@ static void kvm_mmu_update_unsync_bitmap(u64 *spte)
 	struct kvm_mmu_page *sp = page_header(__pa(spte));
 
 	index = spte - sp->spt;
-	if (!__test_and_set_bit(index, sp->unsync_child_bitmap))
-		sp->unsync_children++;
-	WARN_ON(!sp->unsync_children);
+	__set_bit(index, sp->unsync_child_bitmap);
+	sp->unsync_children = 1;
 }
 
 static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp)
@@ -938,6 +928,7 @@ static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp)
 
 static int unsync_walk_fn(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 {
+	sp->unsync_children = 1;
 	kvm_mmu_update_parents_unsync(sp);
 	return 1;
 }
@@ -968,66 +959,38 @@ static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
 {
 }
 
-#define KVM_PAGE_ARRAY_NR 16
-
-struct kvm_mmu_pages {
-	struct mmu_page_and_offset {
-		struct kvm_mmu_page *sp;
-		unsigned int idx;
-	} page[KVM_PAGE_ARRAY_NR];
-	unsigned int nr;
-};
-
 #define for_each_unsync_children(bitmap, idx)		\
 	for (idx = find_first_bit(bitmap, 512);		\
 	     idx < 512;					\
 	     idx = find_next_bit(bitmap, 512, idx+1))
 
-int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp,
-		   int idx)
+static int mmu_unsync_walk(struct kvm_mmu_page *sp,
+			   struct kvm_unsync_walk *walker)
 {
-	int i;
-
-	if (sp->unsync)
-		for (i=0; i < pvec->nr; i++)
-			if (pvec->page[i].sp == sp)
-				return 0;
+	int i, ret;
 
-	pvec->page[pvec->nr].sp = sp;
-	pvec->page[pvec->nr].idx = idx;
-	pvec->nr++;
-	return (pvec->nr == KVM_PAGE_ARRAY_NR);
-}
-
-static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
-			   struct kvm_mmu_pages *pvec)
-{
-	int i, ret, nr_unsync_leaf = 0;
+	if (!sp->unsync_children)
+		return 0;
 
 	for_each_unsync_children(sp->unsync_child_bitmap, i) {
 		u64 ent = sp->spt[i];
 
-		if (is_shadow_present_pte(ent) && !is_large_pte(ent)) {
+		if (is_shadow_present_pte(ent)) {
 			struct kvm_mmu_page *child;
 			child = page_header(ent & PT64_BASE_ADDR_MASK);
 
 			if (child->unsync_children) {
-				if (mmu_pages_add(pvec, child, i))
-					return -ENOSPC;
-
-				ret = __mmu_unsync_walk(child, pvec);
-				if (!ret)
-					__clear_bit(i, sp->unsync_child_bitmap);
-				else if (ret > 0)
-					nr_unsync_leaf += ret;
-				else
+				ret = mmu_unsync_walk(child, walker);
+				if (ret)
 					return ret;
+				__clear_bit(i, sp->unsync_child_bitmap);
 			}
 
 			if (child->unsync) {
-				nr_unsync_leaf++;
-				if (mmu_pages_add(pvec, child, i))
-					return -ENOSPC;
+				ret = walker->entry(child, walker);
+				__clear_bit(i, sp->unsync_child_bitmap);
+				if (ret)
+					return ret;
 			}
 		}
 	}
@@ -1035,17 +998,7 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
 	if (find_first_bit(sp->unsync_child_bitmap, 512) == 512)
 		sp->unsync_children = 0;
 
-	return nr_unsync_leaf;
-}
-
-static int mmu_unsync_walk(struct kvm_mmu_page *sp,
-			   struct kvm_mmu_pages *pvec)
-{
-	if (!sp->unsync_children)
-		return 0;
-
-	mmu_pages_add(pvec, sp, 0);
-	return __mmu_unsync_walk(sp, pvec);
+	return 0;
 }
 
 static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
@@ -1068,18 +1021,10 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
 	return NULL;
 }
 
-static void kvm_unlink_unsync_global(struct kvm *kvm, struct kvm_mmu_page *sp)
-{
-	list_del(&sp->oos_link);
-	--kvm->stat.mmu_unsync_global;
-}
-
 static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
 	WARN_ON(!sp->unsync);
 	sp->unsync = 0;
-	if (sp->global)
-		kvm_unlink_unsync_global(kvm, sp);
 	--kvm->stat.mmu_unsync;
 }
 
@@ -1092,8 +1037,7 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 		return 1;
 	}
 
-	if (rmap_write_protect(vcpu->kvm, sp->gfn))
-		kvm_flush_remote_tlbs(vcpu->kvm);
+	rmap_write_protect(vcpu->kvm, sp->gfn);
 	kvm_unlink_unsync_page(vcpu->kvm, sp);
 	if (vcpu->arch.mmu.sync_page(vcpu, sp)) {
 		kvm_mmu_zap_page(vcpu->kvm, sp);
@@ -1104,89 +1048,30 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 	return 0;
 }
 
-struct mmu_page_path {
-	struct kvm_mmu_page *parent[PT64_ROOT_LEVEL-1];
-	unsigned int idx[PT64_ROOT_LEVEL-1];
+struct sync_walker {
+	struct kvm_vcpu *vcpu;
+	struct kvm_unsync_walk walker;
 };
 
-#define for_each_sp(pvec, sp, parents, i)			\
-		for (i = mmu_pages_next(&pvec, &parents, -1),	\
-			sp = pvec.page[i].sp;			\
-			i < pvec.nr && ({ sp = pvec.page[i].sp; 1;});	\
-			i = mmu_pages_next(&pvec, &parents, i))
-
-int mmu_pages_next(struct kvm_mmu_pages *pvec, struct mmu_page_path *parents,
-		   int i)
+static int mmu_sync_fn(struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk)
 {
-	int n;
+	struct sync_walker *sync_walk = container_of(walk, struct sync_walker,
+						     walker);
+	struct kvm_vcpu *vcpu = sync_walk->vcpu;
 
-	for (n = i+1; n < pvec->nr; n++) {
-		struct kvm_mmu_page *sp = pvec->page[n].sp;
-
-		if (sp->role.level == PT_PAGE_TABLE_LEVEL) {
-			parents->idx[0] = pvec->page[n].idx;
-			return n;
-		}
-
-		parents->parent[sp->role.level-2] = sp;
-		parents->idx[sp->role.level-1] = pvec->page[n].idx;
-	}
-
-	return n;
-}
-
-void mmu_pages_clear_parents(struct mmu_page_path *parents)
-{
-	struct kvm_mmu_page *sp;
-	unsigned int level = 0;
-
-	do {
-		unsigned int idx = parents->idx[level];
-
-		sp = parents->parent[level];
-		if (!sp)
-			return;
-
-		--sp->unsync_children;
-		WARN_ON((int)sp->unsync_children < 0);
-		__clear_bit(idx, sp->unsync_child_bitmap);
-		level++;
-	} while (level < PT64_ROOT_LEVEL-1 && !sp->unsync_children);
-}
-
-static void kvm_mmu_pages_init(struct kvm_mmu_page *parent,
-			       struct mmu_page_path *parents,
-			       struct kvm_mmu_pages *pvec)
-{
-	parents->parent[parent->role.level-1] = NULL;
-	pvec->nr = 0;
+	kvm_sync_page(vcpu, sp);
+	return (need_resched() || spin_needbreak(&vcpu->kvm->mmu_lock));
 }
 
-static void mmu_sync_children(struct kvm_vcpu *vcpu,
-			      struct kvm_mmu_page *parent)
+static void mmu_sync_children(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 {
-	int i;
-	struct kvm_mmu_page *sp;
-	struct mmu_page_path parents;
-	struct kvm_mmu_pages pages;
-
-	kvm_mmu_pages_init(parent, &parents, &pages);
-	while (mmu_unsync_walk(parent, &pages)) {
-		int protected = 0;
-
-		for_each_sp(pages, sp, parents, i)
-			protected |= rmap_write_protect(vcpu->kvm, sp->gfn);
-
-		if (protected)
-			kvm_flush_remote_tlbs(vcpu->kvm);
+	struct sync_walker walker = {
+		.walker = { .entry = mmu_sync_fn, },
+		.vcpu = vcpu,
+	};
 
-		for_each_sp(pages, sp, parents, i) {
-			kvm_sync_page(vcpu, sp);
-			mmu_pages_clear_parents(&parents);
-		}
+	while (mmu_unsync_walk(sp, &walker.walker))
 		cond_resched_lock(&vcpu->kvm->mmu_lock);
-		kvm_mmu_pages_init(parent, &parents, &pages);
-	}
 }
 
 static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
@@ -1244,8 +1129,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 	sp->role = role;
 	hlist_add_head(&sp->hash_link, bucket);
 	if (!metaphysical) {
-		if (rmap_write_protect(vcpu->kvm, gfn))
-			kvm_flush_remote_tlbs(vcpu->kvm);
+		rmap_write_protect(vcpu->kvm, gfn);
 		account_shadowed(vcpu->kvm, gfn);
 	}
 	if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte)
@@ -1269,8 +1153,6 @@ static int walk_shadow(struct kvm_shadow_walk *walker,
 	if (level == PT32E_ROOT_LEVEL) {
 		shadow_addr = vcpu->arch.mmu.pae_root[(addr >> 30) & 3];
 		shadow_addr &= PT64_BASE_ADDR_MASK;
-		if (!shadow_addr)
-			return 1;
 		--level;
 	}
 
@@ -1355,29 +1237,33 @@ static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
 	}
 }
 
-static int mmu_zap_unsync_children(struct kvm *kvm,
-				   struct kvm_mmu_page *parent)
-{
-	int i, zapped = 0;
-	struct mmu_page_path parents;
-	struct kvm_mmu_pages pages;
-
-	if (parent->role.level == PT_PAGE_TABLE_LEVEL)
-		return 0;
+struct zap_walker {
+	struct kvm_unsync_walk walker;
+	struct kvm *kvm;
+	int zapped;
+};
 
-	kvm_mmu_pages_init(parent, &parents, &pages);
-	while (mmu_unsync_walk(parent, &pages)) {
-		struct kvm_mmu_page *sp;
+static int mmu_zap_fn(struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk)
+{
+	struct zap_walker *zap_walk = container_of(walk, struct zap_walker,
+						     walker);
+	kvm_mmu_zap_page(zap_walk->kvm, sp);
+	zap_walk->zapped = 1;
+	return 0;
+}
 
-		for_each_sp(pages, sp, parents, i) {
-			kvm_mmu_zap_page(kvm, sp);
-			mmu_pages_clear_parents(&parents);
-		}
-		zapped += pages.nr;
-		kvm_mmu_pages_init(parent, &parents, &pages);
-	}
+static int mmu_zap_unsync_children(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+	struct zap_walker walker = {
+		.walker = { .entry = mmu_zap_fn, },
+		.kvm = kvm,
+		.zapped = 0,
+	};
 
-	return zapped;
+	if (sp->role.level == PT_PAGE_TABLE_LEVEL)
+		return 0;
+	mmu_unsync_walk(sp, &walker.walker);
+	return walker.zapped;
 }
 
 static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
@@ -1476,7 +1362,7 @@ static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn)
 	int slot = memslot_id(kvm, gfn_to_memslot(kvm, gfn));
 	struct kvm_mmu_page *sp = page_header(__pa(pte));
 
-	__set_bit(slot, sp->slot_bitmap);
+	__set_bit(slot, &sp->slot_bitmap);
 }
 
 static void mmu_convert_notrap(struct kvm_mmu_page *sp)
@@ -1507,110 +1393,6 @@ struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva)
 	return page;
 }
 
-/*
- * The function is based on mtrr_type_lookup() in
- * arch/x86/kernel/cpu/mtrr/generic.c
- */
-static int get_mtrr_type(struct mtrr_state_type *mtrr_state,
-			 u64 start, u64 end)
-{
-	int i;
-	u64 base, mask;
-	u8 prev_match, curr_match;
-	int num_var_ranges = KVM_NR_VAR_MTRR;
-
-	if (!mtrr_state->enabled)
-		return 0xFF;
-
-	/* Make end inclusive end, instead of exclusive */
-	end--;
-
-	/* Look in fixed ranges. Just return the type as per start */
-	if (mtrr_state->have_fixed && (start < 0x100000)) {
-		int idx;
-
-		if (start < 0x80000) {
-			idx = 0;
-			idx += (start >> 16);
-			return mtrr_state->fixed_ranges[idx];
-		} else if (start < 0xC0000) {
-			idx = 1 * 8;
-			idx += ((start - 0x80000) >> 14);
-			return mtrr_state->fixed_ranges[idx];
-		} else if (start < 0x1000000) {
-			idx = 3 * 8;
-			idx += ((start - 0xC0000) >> 12);
-			return mtrr_state->fixed_ranges[idx];
-		}
-	}
-
-	/*
-	 * Look in variable ranges
-	 * Look of multiple ranges matching this address and pick type
-	 * as per MTRR precedence
-	 */
-	if (!(mtrr_state->enabled & 2))
-		return mtrr_state->def_type;
-
-	prev_match = 0xFF;
-	for (i = 0; i < num_var_ranges; ++i) {
-		unsigned short start_state, end_state;
-
-		if (!(mtrr_state->var_ranges[i].mask_lo & (1 << 11)))
-			continue;
-
-		base = (((u64)mtrr_state->var_ranges[i].base_hi) << 32) +
-		       (mtrr_state->var_ranges[i].base_lo & PAGE_MASK);
-		mask = (((u64)mtrr_state->var_ranges[i].mask_hi) << 32) +
-		       (mtrr_state->var_ranges[i].mask_lo & PAGE_MASK);
-
-		start_state = ((start & mask) == (base & mask));
-		end_state = ((end & mask) == (base & mask));
-		if (start_state != end_state)
-			return 0xFE;
-
-		if ((start & mask) != (base & mask))
-			continue;
-
-		curr_match = mtrr_state->var_ranges[i].base_lo & 0xff;
-		if (prev_match == 0xFF) {
-			prev_match = curr_match;
-			continue;
-		}
-
-		if (prev_match == MTRR_TYPE_UNCACHABLE ||
-		    curr_match == MTRR_TYPE_UNCACHABLE)
-			return MTRR_TYPE_UNCACHABLE;
-
-		if ((prev_match == MTRR_TYPE_WRBACK &&
-		     curr_match == MTRR_TYPE_WRTHROUGH) ||
-		    (prev_match == MTRR_TYPE_WRTHROUGH &&
-		     curr_match == MTRR_TYPE_WRBACK)) {
-			prev_match = MTRR_TYPE_WRTHROUGH;
-			curr_match = MTRR_TYPE_WRTHROUGH;
-		}
-
-		if (prev_match != curr_match)
-			return MTRR_TYPE_UNCACHABLE;
-	}
-
-	if (prev_match != 0xFF)
-		return prev_match;
-
-	return mtrr_state->def_type;
-}
-
-static u8 get_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
-{
-	u8 mtrr;
-
-	mtrr = get_mtrr_type(&vcpu->arch.mtrr_state, gfn << PAGE_SHIFT,
-			     (gfn << PAGE_SHIFT) + PAGE_SIZE);
-	if (mtrr == 0xfe || mtrr == 0xff)
-		mtrr = MTRR_TYPE_WRBACK;
-	return mtrr;
-}
-
 static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 {
 	unsigned index;
@@ -1627,15 +1409,9 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 		if (s->role.word != sp->role.word)
 			return 1;
 	}
+	kvm_mmu_mark_parents_unsync(vcpu, sp);
 	++vcpu->kvm->stat.mmu_unsync;
 	sp->unsync = 1;
-
-	if (sp->global) {
-		list_add(&sp->oos_link, &vcpu->kvm->arch.oos_global_pages);
-		++vcpu->kvm->stat.mmu_unsync_global;
-	} else
-		kvm_mmu_mark_parents_unsync(vcpu, sp);
-
 	mmu_convert_notrap(sp);
 	return 0;
 }
@@ -1661,24 +1437,11 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
 static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 		    unsigned pte_access, int user_fault,
 		    int write_fault, int dirty, int largepage,
-		    int global, gfn_t gfn, pfn_t pfn, bool speculative,
+		    gfn_t gfn, pfn_t pfn, bool speculative,
 		    bool can_unsync)
 {
 	u64 spte;
 	int ret = 0;
-	u64 mt_mask = shadow_mt_mask;
-	struct kvm_mmu_page *sp = page_header(__pa(shadow_pte));
-
-	if (!(vcpu->arch.cr4 & X86_CR4_PGE))
-		global = 0;
-	if (!global && sp->global) {
-		sp->global = 0;
-		if (sp->unsync) {
-			kvm_unlink_unsync_global(vcpu->kvm, sp);
-			kvm_mmu_mark_parents_unsync(vcpu, sp);
-		}
-	}
-
 	/*
 	 * We don't set the accessed bit, since we sometimes want to see
 	 * whether the guest actually used the pte (in order to detect
@@ -1697,11 +1460,6 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 		spte |= shadow_user_mask;
 	if (largepage)
 		spte |= PT_PAGE_SIZE_MASK;
-	if (mt_mask) {
-		mt_mask = get_memory_type(vcpu, gfn) <<
-			  kvm_x86_ops->get_mt_mask_shift();
-		spte |= mt_mask;
-	}
 
 	spte |= (u64)pfn << PAGE_SHIFT;
 
@@ -1716,15 +1474,6 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 
 		spte |= PT_WRITABLE_MASK;
 
-		/*
-		 * Optimization: for pte sync, if spte was writable the hash
-		 * lookup is unnecessary (and expensive). Write protection
-		 * is responsibility of mmu_get_page / kvm_sync_page.
-		 * Same reasoning can be applied to dirty page accounting.
-		 */
-		if (!can_unsync && is_writeble_pte(*shadow_pte))
-			goto set_pte;
-
 		if (mmu_need_write_protect(vcpu, gfn, can_unsync)) {
 			pgprintk("%s: found shadow page for %lx, marking ro\n",
 				 __func__, gfn);
@@ -1746,8 +1495,8 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 			 unsigned pt_access, unsigned pte_access,
 			 int user_fault, int write_fault, int dirty,
-			 int *ptwrite, int largepage, int global,
-			 gfn_t gfn, pfn_t pfn, bool speculative)
+			 int *ptwrite, int largepage, gfn_t gfn,
+			 pfn_t pfn, bool speculative)
 {
 	int was_rmapped = 0;
 	int was_writeble = is_writeble_pte(*shadow_pte);
@@ -1780,7 +1529,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 		}
 	}
 	if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault,
-		      dirty, largepage, global, gfn, pfn, speculative, true)) {
+		      dirty, largepage, gfn, pfn, speculative, true)) {
 		if (write_fault)
 			*ptwrite = 1;
 		kvm_x86_ops->tlb_flush(vcpu);
@@ -1837,7 +1586,7 @@ static int direct_map_entry(struct kvm_shadow_walk *_walk,
 	    || (walk->largepage && level == PT_DIRECTORY_LEVEL)) {
 		mmu_set_spte(vcpu, sptep, ACC_ALL, ACC_ALL,
 			     0, walk->write, 1, &walk->pt_write,
-			     walk->largepage, 0, gfn, walk->pfn, false);
+			     walk->largepage, gfn, walk->pfn, false);
 		++vcpu->stat.pf_fixed;
 		return 1;
 	}
@@ -2024,15 +1773,6 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu)
 	}
 }
 
-static void mmu_sync_global(struct kvm_vcpu *vcpu)
-{
-	struct kvm *kvm = vcpu->kvm;
-	struct kvm_mmu_page *sp, *n;
-
-	list_for_each_entry_safe(sp, n, &kvm->arch.oos_global_pages, oos_link)
-		kvm_sync_page(vcpu, sp);
-}
-
 void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
 {
 	spin_lock(&vcpu->kvm->mmu_lock);
@@ -2040,13 +1780,6 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
 	spin_unlock(&vcpu->kvm->mmu_lock);
 }
 
-void kvm_mmu_sync_global(struct kvm_vcpu *vcpu)
-{
-	spin_lock(&vcpu->kvm->mmu_lock);
-	mmu_sync_global(vcpu);
-	spin_unlock(&vcpu->kvm->mmu_lock);
-}
-
 static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr)
 {
 	return vaddr;
@@ -2445,8 +2178,7 @@ static void kvm_mmu_access_page(struct kvm_vcpu *vcpu, gfn_t gfn)
 }
 
 void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
-		       const u8 *new, int bytes,
-		       bool guest_initiated)
+		       const u8 *new, int bytes)
 {
 	gfn_t gfn = gpa >> PAGE_SHIFT;
 	struct kvm_mmu_page *sp;
@@ -2472,17 +2204,15 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 	kvm_mmu_free_some_pages(vcpu);
 	++vcpu->kvm->stat.mmu_pte_write;
 	kvm_mmu_audit(vcpu, "pre pte write");
-	if (guest_initiated) {
-		if (gfn == vcpu->arch.last_pt_write_gfn
-		    && !last_updated_pte_accessed(vcpu)) {
-			++vcpu->arch.last_pt_write_count;
-			if (vcpu->arch.last_pt_write_count >= 3)
-				flooded = 1;
-		} else {
-			vcpu->arch.last_pt_write_gfn = gfn;
-			vcpu->arch.last_pt_write_count = 1;
-			vcpu->arch.last_pte_updated = NULL;
-		}
+	if (gfn == vcpu->arch.last_pt_write_gfn
+	    && !last_updated_pte_accessed(vcpu)) {
+		++vcpu->arch.last_pt_write_count;
+		if (vcpu->arch.last_pt_write_count >= 3)
+			flooded = 1;
+	} else {
+		vcpu->arch.last_pt_write_gfn = gfn;
+		vcpu->arch.last_pt_write_count = 1;
+		vcpu->arch.last_pte_updated = NULL;
 	}
 	index = kvm_page_table_hashfn(gfn);
 	bucket = &vcpu->kvm->arch.mmu_page_hash[index];
@@ -2622,7 +2352,9 @@ EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
 
 void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
 {
+	spin_lock(&vcpu->kvm->mmu_lock);
 	vcpu->arch.mmu.invlpg(vcpu, gva);
+	spin_unlock(&vcpu->kvm->mmu_lock);
 	kvm_mmu_flush_tlb(vcpu);
 	++vcpu->stat.invlpg;
 }
@@ -2719,7 +2451,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
 		int i;
 		u64 *pt;
 
-		if (!test_bit(slot, sp->slot_bitmap))
+		if (!test_bit(slot, &sp->slot_bitmap))
 			continue;
 
 		pt = sp->spt;
@@ -3128,8 +2860,8 @@ static void audit_write_protection(struct kvm_vcpu *vcpu)
 		if (sp->role.metaphysical)
 			continue;
 
+		slot = gfn_to_memslot(vcpu->kvm, sp->gfn);
 		gfn = unalias_gfn(vcpu->kvm, sp->gfn);
-		slot = gfn_to_memslot_unaliased(vcpu->kvm, sp->gfn);
 		rmapp = &slot->rmap[gfn - slot->base_gfn];
 		if (*rmapp)
 			printk(KERN_ERR "%s: (%s) shadow page has writable"
diff --git a/trunk/arch/x86/kvm/paging_tmpl.h b/trunk/arch/x86/kvm/paging_tmpl.h
index 9fd78b6e17ad..84eee43bbe74 100644
--- a/trunk/arch/x86/kvm/paging_tmpl.h
+++ b/trunk/arch/x86/kvm/paging_tmpl.h
@@ -82,7 +82,6 @@ struct shadow_walker {
 	int *ptwrite;
 	pfn_t pfn;
 	u64 *sptep;
-	gpa_t pte_gpa;
 };
 
 static gfn_t gpte_to_gfn(pt_element_t gpte)
@@ -223,7 +222,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
 		if (ret)
 			goto walk;
 		pte |= PT_DIRTY_MASK;
-		kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte), 0);
+		kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte));
 		walker->ptes[walker->level - 1] = pte;
 	}
 
@@ -275,8 +274,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
 		return;
 	kvm_get_pfn(pfn);
 	mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
-		     gpte & PT_DIRTY_MASK, NULL, largepage,
-		     gpte & PT_GLOBAL_MASK, gpte_to_gfn(gpte),
+		     gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte),
 		     pfn, true);
 }
 
@@ -303,9 +301,8 @@ static int FNAME(shadow_walk_entry)(struct kvm_shadow_walk *_sw,
 		mmu_set_spte(vcpu, sptep, access, gw->pte_access & access,
 			     sw->user_fault, sw->write_fault,
 			     gw->ptes[gw->level-1] & PT_DIRTY_MASK,
-			     sw->ptwrite, sw->largepage,
-			     gw->ptes[gw->level-1] & PT_GLOBAL_MASK,
-			     gw->gfn, sw->pfn, false);
+			     sw->ptwrite, sw->largepage, gw->gfn, sw->pfn,
+			     false);
 		sw->sptep = sptep;
 		return 1;
 	}
@@ -469,22 +466,10 @@ static int FNAME(shadow_invlpg_entry)(struct kvm_shadow_walk *_sw,
 				      struct kvm_vcpu *vcpu, u64 addr,
 				      u64 *sptep, int level)
 {
-	struct shadow_walker *sw =
-		container_of(_sw, struct shadow_walker, walker);
 
-	/* FIXME: properly handle invlpg on large guest pages */
-	if (level == PT_PAGE_TABLE_LEVEL ||
-	    ((level == PT_DIRECTORY_LEVEL) && is_large_pte(*sptep))) {
-		struct kvm_mmu_page *sp = page_header(__pa(sptep));
-
-		sw->pte_gpa = (sp->gfn << PAGE_SHIFT);
-		sw->pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);
-
-		if (is_shadow_present_pte(*sptep)) {
+	if (level == PT_PAGE_TABLE_LEVEL) {
+		if (is_shadow_present_pte(*sptep))
 			rmap_remove(vcpu->kvm, sptep);
-			if (is_large_pte(*sptep))
-				--vcpu->kvm->stat.lpages;
-		}
 		set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
 		return 1;
 	}
@@ -495,26 +480,11 @@ static int FNAME(shadow_invlpg_entry)(struct kvm_shadow_walk *_sw,
 
 static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
 {
-	pt_element_t gpte;
 	struct shadow_walker walker = {
 		.walker = { .entry = FNAME(shadow_invlpg_entry), },
-		.pte_gpa = -1,
 	};
 
-	spin_lock(&vcpu->kvm->mmu_lock);
 	walk_shadow(&walker.walker, vcpu, gva);
-	spin_unlock(&vcpu->kvm->mmu_lock);
-	if (walker.pte_gpa == -1)
-		return;
-	if (kvm_read_guest_atomic(vcpu->kvm, walker.pte_gpa, &gpte,
-				  sizeof(pt_element_t)))
-		return;
-	if (is_present_pte(gpte) && (gpte & PT_ACCESSED_MASK)) {
-		if (mmu_topup_memory_caches(vcpu))
-			return;
-		kvm_mmu_pte_write(vcpu, walker.pte_gpa, (const u8 *)&gpte,
-				  sizeof(pt_element_t), 0);
-	}
 }
 
 static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
@@ -610,7 +580,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 		nr_present++;
 		pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
 		set_spte(vcpu, &sp->spt[i], pte_access, 0, 0,
-			 is_dirty_pte(gpte), 0, gpte & PT_GLOBAL_MASK, gfn,
+			 is_dirty_pte(gpte), 0, gfn,
 			 spte_to_pfn(sp->spt[i]), true, false);
 	}
 
diff --git a/trunk/arch/x86/kvm/svm.c b/trunk/arch/x86/kvm/svm.c
index 1452851ae258..9c4ce657d963 100644
--- a/trunk/arch/x86/kvm/svm.c
+++ b/trunk/arch/x86/kvm/svm.c
@@ -28,8 +28,6 @@
 
 #include <asm/desc.h>
 
-#include <asm/virtext.h>
-
 #define __ex(x) __kvm_handle_fault_on_reboot(x)
 
 MODULE_AUTHOR("Qumranet");
@@ -247,19 +245,34 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
 
 static int has_svm(void)
 {
-	const char *msg;
+	uint32_t eax, ebx, ecx, edx;
+
+	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
+		printk(KERN_INFO "has_svm: not amd\n");
+		return 0;
+	}
 
-	if (!cpu_has_svm(&msg)) {
-		printk(KERN_INFO "has_svn: %s\n", msg);
+	cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
+	if (eax < SVM_CPUID_FUNC) {
+		printk(KERN_INFO "has_svm: can't execute cpuid_8000000a\n");
 		return 0;
 	}
 
+	cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
+	if (!(ecx & (1 << SVM_CPUID_FEATURE_SHIFT))) {
+		printk(KERN_DEBUG "has_svm: svm not available\n");
+		return 0;
+	}
 	return 1;
 }
 
 static void svm_hardware_disable(void *garbage)
 {
-	cpu_svm_disable();
+	uint64_t efer;
+
+	wrmsrl(MSR_VM_HSAVE_PA, 0);
+	rdmsrl(MSR_EFER, efer);
+	wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK);
 }
 
 static void svm_hardware_enable(void *garbage)
@@ -759,22 +772,6 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
 	var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1;
 	var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1;
 	var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1;
-
-	/*
-	 * SVM always stores 0 for the 'G' bit in the CS selector in
-	 * the VMCB on a VMEXIT. This hurts cross-vendor migration:
-	 * Intel's VMENTRY has a check on the 'G' bit.
-	 */
-	if (seg == VCPU_SREG_CS)
-		var->g = s->limit > 0xfffff;
-
-	/*
-	 * Work around a bug where the busy flag in the tr selector
-	 * isn't exposed
-	 */
-	if (seg == VCPU_SREG_TR)
-		var->type |= 0x2;
-
 	var->unusable = !var->present;
 }
 
@@ -1102,7 +1099,6 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 	rep = (io_info & SVM_IOIO_REP_MASK) != 0;
 	down = (svm->vmcb->save.rflags & X86_EFLAGS_DF) != 0;
 
-	skip_emulated_instruction(&svm->vcpu);
 	return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port);
 }
 
@@ -1916,11 +1912,6 @@ static int get_npt_level(void)
 #endif
 }
 
-static int svm_get_mt_mask_shift(void)
-{
-	return 0;
-}
-
 static struct kvm_x86_ops svm_x86_ops = {
 	.cpu_has_kvm_support = has_svm,
 	.disabled_by_bios = is_disabled,
@@ -1976,7 +1967,6 @@ static struct kvm_x86_ops svm_x86_ops = {
 
 	.set_tss_addr = svm_set_tss_addr,
 	.get_tdp_level = get_npt_level,
-	.get_mt_mask_shift = svm_get_mt_mask_shift,
 };
 
 static int __init svm_init(void)
diff --git a/trunk/arch/x86/include/asm/svm.h b/trunk/arch/x86/kvm/svm.h
similarity index 100%
rename from trunk/arch/x86/include/asm/svm.h
rename to trunk/arch/x86/kvm/svm.h
diff --git a/trunk/arch/x86/kvm/vmx.c b/trunk/arch/x86/kvm/vmx.c
index 6259d7467648..a4018b01e1f9 100644
--- a/trunk/arch/x86/kvm/vmx.c
+++ b/trunk/arch/x86/kvm/vmx.c
@@ -16,6 +16,7 @@
  */
 
 #include "irq.h"
+#include "vmx.h"
 #include "mmu.h"
 
 #include <linux/kvm_host.h>
@@ -30,8 +31,6 @@
 
 #include <asm/io.h>
 #include <asm/desc.h>
-#include <asm/vmx.h>
-#include <asm/virtext.h>
 
 #define __ex(x) __kvm_handle_fault_on_reboot(x)
 
@@ -91,11 +90,6 @@ struct vcpu_vmx {
 	} rmode;
 	int vpid;
 	bool emulation_required;
-
-	/* Support for vnmi-less CPUs */
-	int soft_vnmi_blocked;
-	ktime_t entry_time;
-	s64 vnmi_blocked_time;
 };
 
 static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
@@ -128,7 +122,7 @@ static struct vmcs_config {
 	u32 vmentry_ctrl;
 } vmcs_config;
 
-static struct vmx_capability {
+struct vmx_capability {
 	u32 ept;
 	u32 vpid;
 } vmx_capability;
@@ -963,13 +957,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
 		pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", msr_index, data);
 
 		break;
-	case MSR_IA32_CR_PAT:
-		if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
-			vmcs_write64(GUEST_IA32_PAT, data);
-			vcpu->arch.pat = data;
-			break;
-		}
-		/* Otherwise falls through to kvm_set_msr_common */
 	default:
 		vmx_load_host_state(vmx);
 		msr = find_msr_entry(vmx, msr_index);
@@ -1045,7 +1032,8 @@ static int vmx_get_irq(struct kvm_vcpu *vcpu)
 
 static __init int cpu_has_kvm_support(void)
 {
-	return cpu_has_vmx();
+	unsigned long ecx = cpuid_ecx(1);
+	return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */
 }
 
 static __init int vmx_disabled_by_bios(void)
@@ -1091,20 +1079,11 @@ static void vmclear_local_vcpus(void)
 		__vcpu_clear(vmx);
 }
 
-
-/* Just like cpu_vmxoff(), but with the __kvm_handle_fault_on_reboot()
- * tricks.
- */
-static void kvm_cpu_vmxoff(void)
-{
-	asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
-	write_cr4(read_cr4() & ~X86_CR4_VMXE);
-}
-
 static void hardware_disable(void *garbage)
 {
 	vmclear_local_vcpus();
-	kvm_cpu_vmxoff();
+	asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
+	write_cr4(read_cr4() & ~X86_CR4_VMXE);
 }
 
 static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
@@ -1197,13 +1176,12 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 #ifdef CONFIG_X86_64
 	min |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
 #endif
-	opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT;
+	opt = 0;
 	if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS,
 				&_vmexit_control) < 0)
 		return -EIO;
 
-	min = 0;
-	opt = VM_ENTRY_LOAD_IA32_PAT;
+	min = opt = 0;
 	if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS,
 				&_vmentry_control) < 0)
 		return -EIO;
@@ -2109,9 +2087,8 @@ static void vmx_disable_intercept_for_msr(struct page *msr_bitmap, u32 msr)
  */
 static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 {
-	u32 host_sysenter_cs, msr_low, msr_high;
+	u32 host_sysenter_cs;
 	u32 junk;
-	u64 host_pat;
 	unsigned long a;
 	struct descriptor_table dt;
 	int i;
@@ -2199,20 +2176,6 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 	rdmsrl(MSR_IA32_SYSENTER_EIP, a);
 	vmcs_writel(HOST_IA32_SYSENTER_EIP, a);   /* 22.2.3 */
 
-	if (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PAT) {
-		rdmsr(MSR_IA32_CR_PAT, msr_low, msr_high);
-		host_pat = msr_low | ((u64) msr_high << 32);
-		vmcs_write64(HOST_IA32_PAT, host_pat);
-	}
-	if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
-		rdmsr(MSR_IA32_CR_PAT, msr_low, msr_high);
-		host_pat = msr_low | ((u64) msr_high << 32);
-		/* Write the default value follow host pat */
-		vmcs_write64(GUEST_IA32_PAT, host_pat);
-		/* Keep arch.pat sync with GUEST_IA32_PAT */
-		vmx->vcpu.arch.pat = host_pat;
-	}
-
 	for (i = 0; i < NR_VMX_MSR; ++i) {
 		u32 index = vmx_msr_index[i];
 		u32 data_low, data_high;
@@ -2267,8 +2230,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
 
 	vmx->vcpu.arch.rmode.active = 0;
 
-	vmx->soft_vnmi_blocked = 0;
-
 	vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
 	kvm_set_cr8(&vmx->vcpu, 0);
 	msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
@@ -2374,29 +2335,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
 	return ret;
 }
 
-static void enable_irq_window(struct kvm_vcpu *vcpu)
-{
-	u32 cpu_based_vm_exec_control;
-
-	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
-	cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
-	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
-}
-
-static void enable_nmi_window(struct kvm_vcpu *vcpu)
-{
-	u32 cpu_based_vm_exec_control;
-
-	if (!cpu_has_virtual_nmis()) {
-		enable_irq_window(vcpu);
-		return;
-	}
-
-	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
-	cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
-	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
-}
-
 static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -2420,54 +2358,10 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
 
 static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
 {
-	struct vcpu_vmx *vmx = to_vmx(vcpu);
-
-	if (!cpu_has_virtual_nmis()) {
-		/*
-		 * Tracking the NMI-blocked state in software is built upon
-		 * finding the next open IRQ window. This, in turn, depends on
-		 * well-behaving guests: They have to keep IRQs disabled at
-		 * least as long as the NMI handler runs. Otherwise we may
-		 * cause NMI nesting, maybe breaking the guest. But as this is
-		 * highly unlikely, we can live with the residual risk.
-		 */
-		vmx->soft_vnmi_blocked = 1;
-		vmx->vnmi_blocked_time = 0;
-	}
-
-	++vcpu->stat.nmi_injections;
-	if (vcpu->arch.rmode.active) {
-		vmx->rmode.irq.pending = true;
-		vmx->rmode.irq.vector = NMI_VECTOR;
-		vmx->rmode.irq.rip = kvm_rip_read(vcpu);
-		vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
-			     NMI_VECTOR | INTR_TYPE_SOFT_INTR |
-			     INTR_INFO_VALID_MASK);
-		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
-		kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1);
-		return;
-	}
 	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
 			INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
 }
 
-static void vmx_update_window_states(struct kvm_vcpu *vcpu)
-{
-	u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
-
-	vcpu->arch.nmi_window_open =
-		!(guest_intr & (GUEST_INTR_STATE_STI |
-				GUEST_INTR_STATE_MOV_SS |
-				GUEST_INTR_STATE_NMI));
-	if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked)
-		vcpu->arch.nmi_window_open = 0;
-
-	vcpu->arch.interrupt_window_open =
-		((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
-		 !(guest_intr & (GUEST_INTR_STATE_STI |
-				 GUEST_INTR_STATE_MOV_SS)));
-}
-
 static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
 {
 	int word_index = __ffs(vcpu->arch.irq_summary);
@@ -2480,49 +2374,40 @@ static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
 	kvm_queue_interrupt(vcpu, irq);
 }
 
+
 static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 				       struct kvm_run *kvm_run)
 {
-	vmx_update_window_states(vcpu);
+	u32 cpu_based_vm_exec_control;
 
-	if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
-		if (vcpu->arch.interrupt.pending) {
-			enable_nmi_window(vcpu);
-		} else if (vcpu->arch.nmi_window_open) {
-			vcpu->arch.nmi_pending = false;
-			vcpu->arch.nmi_injected = true;
-		} else {
-			enable_nmi_window(vcpu);
-			return;
-		}
-	}
-	if (vcpu->arch.nmi_injected) {
-		vmx_inject_nmi(vcpu);
-		if (vcpu->arch.nmi_pending)
-			enable_nmi_window(vcpu);
-		else if (vcpu->arch.irq_summary
-			 || kvm_run->request_interrupt_window)
-			enable_irq_window(vcpu);
-		return;
-	}
+	vcpu->arch.interrupt_window_open =
+		((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
+		 (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0);
 
-	if (vcpu->arch.interrupt_window_open) {
-		if (vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending)
-			kvm_do_inject_irq(vcpu);
+	if (vcpu->arch.interrupt_window_open &&
+	    vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending)
+		kvm_do_inject_irq(vcpu);
 
-		if (vcpu->arch.interrupt.pending)
-			vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
-	}
+	if (vcpu->arch.interrupt_window_open && vcpu->arch.interrupt.pending)
+		vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
+
+	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
 	if (!vcpu->arch.interrupt_window_open &&
 	    (vcpu->arch.irq_summary || kvm_run->request_interrupt_window))
-		enable_irq_window(vcpu);
+		/*
+		 * Interrupts blocked.  Wait for unblock.
+		 */
+		cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
+	else
+		cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
+	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
 }
 
 static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
 {
 	int ret;
 	struct kvm_userspace_memory_region tss_mem = {
-		.slot = TSS_PRIVATE_MEMSLOT,
+		.slot = 8,
 		.guest_phys_addr = addr,
 		.memory_size = PAGE_SIZE * 3,
 		.flags = 0,
@@ -2607,7 +2492,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		set_bit(irq / BITS_PER_LONG, &vcpu->arch.irq_summary);
 	}
 
-	if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR)
+	if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) /* nmi */
 		return 1;  /* already handled by vmx_vcpu_run() */
 
 	if (is_no_device(intr_info)) {
@@ -2696,7 +2581,6 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	rep = (exit_qualification & 32) != 0;
 	port = exit_qualification >> 16;
 
-	skip_emulated_instruction(vcpu);
 	return kvm_emulate_pio(vcpu, kvm_run, in, size, port);
 }
 
@@ -2883,7 +2767,6 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
 	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
 
 	KVMTRACE_0D(PEND_INTR, vcpu, handler);
-	++vcpu->stat.irq_window_exits;
 
 	/*
 	 * If the user space waits to inject interrupts, exit as soon as
@@ -2892,6 +2775,7 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
 	if (kvm_run->request_interrupt_window &&
 	    !vcpu->arch.irq_summary) {
 		kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
+		++vcpu->stat.irq_window_exits;
 		return 0;
 	}
 	return 1;
@@ -2948,7 +2832,6 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
 static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
-	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	unsigned long exit_qualification;
 	u16 tss_selector;
 	int reason;
@@ -2956,15 +2839,6 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
 
 	reason = (u32)exit_qualification >> 30;
-	if (reason == TASK_SWITCH_GATE && vmx->vcpu.arch.nmi_injected &&
-	    (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
-	    (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK)
-	    == INTR_TYPE_NMI_INTR) {
-		vcpu->arch.nmi_injected = false;
-		if (cpu_has_virtual_nmis())
-			vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
-				      GUEST_INTR_STATE_NMI);
-	}
 	tss_selector = exit_qualification;
 
 	return kvm_task_switch(vcpu, tss_selector, reason);
@@ -3053,12 +2927,16 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
 	while (!guest_state_valid(vcpu)) {
 		err = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
 
-		if (err == EMULATE_DO_MMIO)
-			break;
-
-		if (err != EMULATE_DONE) {
-			kvm_report_emulation_failure(vcpu, "emulation failure");
-			return;
+		switch (err) {
+			case EMULATE_DONE:
+				break;
+			case EMULATE_DO_MMIO:
+				kvm_report_emulation_failure(vcpu, "mmio");
+				/* TODO: Handle MMIO */
+				return;
+			default:
+				kvm_report_emulation_failure(vcpu, "emulation failure");
+				return;
 		}
 
 		if (signal_pending(current))
@@ -3070,10 +2948,8 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
 	local_irq_disable();
 	preempt_disable();
 
-	/* Guest state should be valid now except if we need to
-	 * emulate an MMIO */
-	if (guest_state_valid(vcpu))
-		vmx->emulation_required = 0;
+	/* Guest state should be valid now, no more emulation should be needed */
+	vmx->emulation_required = 0;
 }
 
 /*
@@ -3120,11 +2996,6 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)kvm_rip_read(vcpu),
 		    (u32)((u64)kvm_rip_read(vcpu) >> 32), entryexit);
 
-	/* If we need to emulate an MMIO from handle_invalid_guest_state
-	 * we just return 0 */
-	if (vmx->emulation_required && emulate_invalid_guest_state)
-		return 0;
-
 	/* Access CR3 don't cause VMExit in paging mode, so we need
 	 * to sync with guest real CR3. */
 	if (vm_need_ept() && is_paging(vcpu)) {
@@ -3141,32 +3012,9 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 
 	if ((vectoring_info & VECTORING_INFO_VALID_MASK) &&
 			(exit_reason != EXIT_REASON_EXCEPTION_NMI &&
-			exit_reason != EXIT_REASON_EPT_VIOLATION &&
-			exit_reason != EXIT_REASON_TASK_SWITCH))
-		printk(KERN_WARNING "%s: unexpected, valid vectoring info "
-		       "(0x%x) and exit reason is 0x%x\n",
-		       __func__, vectoring_info, exit_reason);
-
-	if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) {
-		if (vcpu->arch.interrupt_window_open) {
-			vmx->soft_vnmi_blocked = 0;
-			vcpu->arch.nmi_window_open = 1;
-		} else if (vmx->vnmi_blocked_time > 1000000000LL &&
-			   vcpu->arch.nmi_pending) {
-			/*
-			 * This CPU don't support us in finding the end of an
-			 * NMI-blocked window if the guest runs with IRQs
-			 * disabled. So we pull the trigger after 1 s of
-			 * futile waiting, but inform the user about this.
-			 */
-			printk(KERN_WARNING "%s: Breaking out of NMI-blocked "
-			       "state on VCPU %d after 1 s timeout\n",
-			       __func__, vcpu->vcpu_id);
-			vmx->soft_vnmi_blocked = 0;
-			vmx->vcpu.arch.nmi_window_open = 1;
-		}
-	}
-
+			exit_reason != EXIT_REASON_EPT_VIOLATION))
+		printk(KERN_WARNING "%s: unexpected, valid vectoring info and "
+		       "exit reason is 0x%x\n", __func__, exit_reason);
 	if (exit_reason < kvm_vmx_max_exit_handlers
 	    && kvm_vmx_exit_handlers[exit_reason])
 		return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run);
@@ -3194,6 +3042,51 @@ static void update_tpr_threshold(struct kvm_vcpu *vcpu)
 	vmcs_write32(TPR_THRESHOLD, (max_irr > tpr) ? tpr >> 4 : max_irr >> 4);
 }
 
+static void enable_irq_window(struct kvm_vcpu *vcpu)
+{
+	u32 cpu_based_vm_exec_control;
+
+	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+	cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
+	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+}
+
+static void enable_nmi_window(struct kvm_vcpu *vcpu)
+{
+	u32 cpu_based_vm_exec_control;
+
+	if (!cpu_has_virtual_nmis())
+		return;
+
+	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+	cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
+	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+}
+
+static int vmx_nmi_enabled(struct kvm_vcpu *vcpu)
+{
+	u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
+	return !(guest_intr & (GUEST_INTR_STATE_NMI |
+			       GUEST_INTR_STATE_MOV_SS |
+			       GUEST_INTR_STATE_STI));
+}
+
+static int vmx_irq_enabled(struct kvm_vcpu *vcpu)
+{
+	u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
+	return (!(guest_intr & (GUEST_INTR_STATE_MOV_SS |
+			       GUEST_INTR_STATE_STI)) &&
+		(vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF));
+}
+
+static void enable_intr_window(struct kvm_vcpu *vcpu)
+{
+	if (vcpu->arch.nmi_pending)
+		enable_nmi_window(vcpu);
+	else if (kvm_cpu_has_interrupt(vcpu))
+		enable_irq_window(vcpu);
+}
+
 static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
 {
 	u32 exit_intr_info;
@@ -3216,9 +3109,7 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
 		if (unblock_nmi && vector != DF_VECTOR)
 			vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
 				      GUEST_INTR_STATE_NMI);
-	} else if (unlikely(vmx->soft_vnmi_blocked))
-		vmx->vnmi_blocked_time +=
-			ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time));
+	}
 
 	idt_vectoring_info = vmx->idt_vectoring_info;
 	idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
@@ -3256,29 +3147,26 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
 {
 	update_tpr_threshold(vcpu);
 
-	vmx_update_window_states(vcpu);
-
-	if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
-		if (vcpu->arch.interrupt.pending) {
-			enable_nmi_window(vcpu);
-		} else if (vcpu->arch.nmi_window_open) {
-			vcpu->arch.nmi_pending = false;
-			vcpu->arch.nmi_injected = true;
-		} else {
-			enable_nmi_window(vcpu);
+	if (cpu_has_virtual_nmis()) {
+		if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
+			if (vcpu->arch.interrupt.pending) {
+				enable_nmi_window(vcpu);
+			} else if (vmx_nmi_enabled(vcpu)) {
+				vcpu->arch.nmi_pending = false;
+				vcpu->arch.nmi_injected = true;
+			} else {
+				enable_intr_window(vcpu);
+				return;
+			}
+		}
+		if (vcpu->arch.nmi_injected) {
+			vmx_inject_nmi(vcpu);
+			enable_intr_window(vcpu);
 			return;
 		}
 	}
-	if (vcpu->arch.nmi_injected) {
-		vmx_inject_nmi(vcpu);
-		if (vcpu->arch.nmi_pending)
-			enable_nmi_window(vcpu);
-		else if (kvm_cpu_has_interrupt(vcpu))
-			enable_irq_window(vcpu);
-		return;
-	}
 	if (!vcpu->arch.interrupt.pending && kvm_cpu_has_interrupt(vcpu)) {
-		if (vcpu->arch.interrupt_window_open)
+		if (vmx_irq_enabled(vcpu))
 			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
 		else
 			enable_irq_window(vcpu);
@@ -3286,8 +3174,6 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
 	if (vcpu->arch.interrupt.pending) {
 		vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
 		kvm_timer_intr_post(vcpu, vcpu->arch.interrupt.nr);
-		if (kvm_cpu_has_interrupt(vcpu))
-			enable_irq_window(vcpu);
 	}
 }
 
@@ -3327,10 +3213,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	u32 intr_info;
 
-	/* Record the guest's net vcpu time for enforced NMI injections. */
-	if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked))
-		vmx->entry_time = ktime_get();
-
 	/* Handle invalid guest state instead of entering VMX */
 	if (vmx->emulation_required && emulate_invalid_guest_state) {
 		handle_invalid_guest_state(vcpu, kvm_run);
@@ -3445,7 +3327,9 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	if (vmx->rmode.irq.pending)
 		fixup_rmode_irq(vmx);
 
-	vmx_update_window_states(vcpu);
+	vcpu->arch.interrupt_window_open =
+		(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
+		 (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)) == 0;
 
 	asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
 	vmx->launched = 1;
@@ -3453,7 +3337,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
 
 	/* We need to handle NMIs before interrupts are enabled */
-	if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR &&
+	if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200 &&
 	    (intr_info & INTR_INFO_VALID_MASK)) {
 		KVMTRACE_0D(NMI, vcpu, handler);
 		asm("int $2");
@@ -3571,11 +3455,6 @@ static int get_ept_level(void)
 	return VMX_EPT_DEFAULT_GAW + 1;
 }
 
-static int vmx_get_mt_mask_shift(void)
-{
-	return VMX_EPT_MT_EPTE_SHIFT;
-}
-
 static struct kvm_x86_ops vmx_x86_ops = {
 	.cpu_has_kvm_support = cpu_has_kvm_support,
 	.disabled_by_bios = vmx_disabled_by_bios,
@@ -3631,7 +3510,6 @@ static struct kvm_x86_ops vmx_x86_ops = {
 
 	.set_tss_addr = vmx_set_tss_addr,
 	.get_tdp_level = get_ept_level,
-	.get_mt_mask_shift = vmx_get_mt_mask_shift,
 };
 
 static int __init vmx_init(void)
@@ -3688,10 +3566,10 @@ static int __init vmx_init(void)
 		bypass_guest_pf = 0;
 		kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
 			VMX_EPT_WRITABLE_MASK |
+			VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT |
 			VMX_EPT_IGMT_BIT);
 		kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull,
-				VMX_EPT_EXECUTABLE_MASK,
-				VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT);
+				VMX_EPT_EXECUTABLE_MASK);
 		kvm_enable_tdp();
 	} else
 		kvm_disable_tdp();
diff --git a/trunk/arch/x86/include/asm/vmx.h b/trunk/arch/x86/kvm/vmx.h
similarity index 93%
rename from trunk/arch/x86/include/asm/vmx.h
rename to trunk/arch/x86/kvm/vmx.h
index d0238e6151d8..ec5edc339da6 100644
--- a/trunk/arch/x86/include/asm/vmx.h
+++ b/trunk/arch/x86/kvm/vmx.h
@@ -63,13 +63,10 @@
 
 #define VM_EXIT_HOST_ADDR_SPACE_SIZE            0x00000200
 #define VM_EXIT_ACK_INTR_ON_EXIT                0x00008000
-#define VM_EXIT_SAVE_IA32_PAT			0x00040000
-#define VM_EXIT_LOAD_IA32_PAT			0x00080000
 
 #define VM_ENTRY_IA32E_MODE                     0x00000200
 #define VM_ENTRY_SMM                            0x00000400
 #define VM_ENTRY_DEACT_DUAL_MONITOR             0x00000800
-#define VM_ENTRY_LOAD_IA32_PAT			0x00004000
 
 /* VMCS Encodings */
 enum vmcs_field {
@@ -115,8 +112,6 @@ enum vmcs_field {
 	VMCS_LINK_POINTER_HIGH          = 0x00002801,
 	GUEST_IA32_DEBUGCTL             = 0x00002802,
 	GUEST_IA32_DEBUGCTL_HIGH        = 0x00002803,
-	GUEST_IA32_PAT			= 0x00002804,
-	GUEST_IA32_PAT_HIGH		= 0x00002805,
 	GUEST_PDPTR0                    = 0x0000280a,
 	GUEST_PDPTR0_HIGH               = 0x0000280b,
 	GUEST_PDPTR1                    = 0x0000280c,
@@ -125,8 +120,6 @@ enum vmcs_field {
 	GUEST_PDPTR2_HIGH               = 0x0000280f,
 	GUEST_PDPTR3                    = 0x00002810,
 	GUEST_PDPTR3_HIGH               = 0x00002811,
-	HOST_IA32_PAT			= 0x00002c00,
-	HOST_IA32_PAT_HIGH		= 0x00002c01,
 	PIN_BASED_VM_EXEC_CONTROL       = 0x00004000,
 	CPU_BASED_VM_EXEC_CONTROL       = 0x00004002,
 	EXCEPTION_BITMAP                = 0x00004004,
@@ -338,9 +331,8 @@ enum vmcs_field {
 
 #define AR_RESERVD_MASK 0xfffe0f00
 
-#define TSS_PRIVATE_MEMSLOT			(KVM_MEMORY_SLOTS + 0)
-#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT	(KVM_MEMORY_SLOTS + 1)
-#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT	(KVM_MEMORY_SLOTS + 2)
+#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT	9
+#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT	10
 
 #define VMX_NR_VPIDS				(1 << 16)
 #define VMX_VPID_EXTENT_SINGLE_CONTEXT		1
@@ -364,19 +356,4 @@ enum vmcs_field {
 
 #define VMX_EPT_IDENTITY_PAGETABLE_ADDR		0xfffbc000ul
 
-
-#define ASM_VMX_VMCLEAR_RAX       ".byte 0x66, 0x0f, 0xc7, 0x30"
-#define ASM_VMX_VMLAUNCH          ".byte 0x0f, 0x01, 0xc2"
-#define ASM_VMX_VMRESUME          ".byte 0x0f, 0x01, 0xc3"
-#define ASM_VMX_VMPTRLD_RAX       ".byte 0x0f, 0xc7, 0x30"
-#define ASM_VMX_VMREAD_RDX_RAX    ".byte 0x0f, 0x78, 0xd0"
-#define ASM_VMX_VMWRITE_RAX_RDX   ".byte 0x0f, 0x79, 0xd0"
-#define ASM_VMX_VMWRITE_RSP_RDX   ".byte 0x0f, 0x79, 0xd4"
-#define ASM_VMX_VMXOFF            ".byte 0x0f, 0x01, 0xc4"
-#define ASM_VMX_VMXON_RAX         ".byte 0xf3, 0x0f, 0xc7, 0x30"
-#define ASM_VMX_INVEPT		  ".byte 0x66, 0x0f, 0x38, 0x80, 0x08"
-#define ASM_VMX_INVVPID		  ".byte 0x66, 0x0f, 0x38, 0x81, 0x08"
-
-
-
 #endif
diff --git a/trunk/arch/x86/kvm/x86.c b/trunk/arch/x86/kvm/x86.c
index 0e6aa8141dcd..f1f8ff2f1fa2 100644
--- a/trunk/arch/x86/kvm/x86.c
+++ b/trunk/arch/x86/kvm/x86.c
@@ -39,7 +39,6 @@
 #include <asm/uaccess.h>
 #include <asm/msr.h>
 #include <asm/desc.h>
-#include <asm/mtrr.h>
 
 #define MAX_IO_MSRS 256
 #define CR0_RESERVED_BITS						\
@@ -87,7 +86,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
 	{ "hypercalls", VCPU_STAT(hypercalls) },
 	{ "request_irq", VCPU_STAT(request_irq_exits) },
-	{ "request_nmi", VCPU_STAT(request_nmi_exits) },
 	{ "irq_exits", VCPU_STAT(irq_exits) },
 	{ "host_state_reload", VCPU_STAT(host_state_reload) },
 	{ "efer_reload", VCPU_STAT(efer_reload) },
@@ -95,7 +93,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "insn_emulation", VCPU_STAT(insn_emulation) },
 	{ "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
 	{ "irq_injections", VCPU_STAT(irq_injections) },
-	{ "nmi_injections", VCPU_STAT(nmi_injections) },
 	{ "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) },
 	{ "mmu_pte_write", VM_STAT(mmu_pte_write) },
 	{ "mmu_pte_updated", VM_STAT(mmu_pte_updated) },
@@ -104,7 +101,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "mmu_recycled", VM_STAT(mmu_recycled) },
 	{ "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
 	{ "mmu_unsync", VM_STAT(mmu_unsync) },
-	{ "mmu_unsync_global", VM_STAT(mmu_unsync_global) },
 	{ "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
 	{ "largepages", VM_STAT(lpages) },
 	{ NULL }
@@ -316,7 +312,6 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 	kvm_x86_ops->set_cr0(vcpu, cr0);
 	vcpu->arch.cr0 = cr0;
 
-	kvm_mmu_sync_global(vcpu);
 	kvm_mmu_reset_context(vcpu);
 	return;
 }
@@ -360,7 +355,6 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 	}
 	kvm_x86_ops->set_cr4(vcpu, cr4);
 	vcpu->arch.cr4 = cr4;
-	kvm_mmu_sync_global(vcpu);
 	kvm_mmu_reset_context(vcpu);
 }
 EXPORT_SYMBOL_GPL(kvm_set_cr4);
@@ -455,7 +449,7 @@ static u32 msrs_to_save[] = {
 	MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
 #endif
 	MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
-	MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT
+	MSR_IA32_PERF_STATUS,
 };
 
 static unsigned num_msrs_to_save;
@@ -654,38 +648,10 @@ static bool msr_mtrr_valid(unsigned msr)
 
 static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 {
-	u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
-
 	if (!msr_mtrr_valid(msr))
 		return 1;
 
-	if (msr == MSR_MTRRdefType) {
-		vcpu->arch.mtrr_state.def_type = data;
-		vcpu->arch.mtrr_state.enabled = (data & 0xc00) >> 10;
-	} else if (msr == MSR_MTRRfix64K_00000)
-		p[0] = data;
-	else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
-		p[1 + msr - MSR_MTRRfix16K_80000] = data;
-	else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
-		p[3 + msr - MSR_MTRRfix4K_C0000] = data;
-	else if (msr == MSR_IA32_CR_PAT)
-		vcpu->arch.pat = data;
-	else {	/* Variable MTRRs */
-		int idx, is_mtrr_mask;
-		u64 *pt;
-
-		idx = (msr - 0x200) / 2;
-		is_mtrr_mask = msr - 0x200 - 2 * idx;
-		if (!is_mtrr_mask)
-			pt =
-			  (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
-		else
-			pt =
-			  (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
-		*pt = data;
-	}
-
-	kvm_mmu_reset_context(vcpu);
+	vcpu->arch.mtrr[msr - 0x200] = data;
 	return 0;
 }
 
@@ -781,37 +747,10 @@ int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
 
 static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 {
-	u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
-
 	if (!msr_mtrr_valid(msr))
 		return 1;
 
-	if (msr == MSR_MTRRdefType)
-		*pdata = vcpu->arch.mtrr_state.def_type +
-			 (vcpu->arch.mtrr_state.enabled << 10);
-	else if (msr == MSR_MTRRfix64K_00000)
-		*pdata = p[0];
-	else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
-		*pdata = p[1 + msr - MSR_MTRRfix16K_80000];
-	else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
-		*pdata = p[3 + msr - MSR_MTRRfix4K_C0000];
-	else if (msr == MSR_IA32_CR_PAT)
-		*pdata = vcpu->arch.pat;
-	else {	/* Variable MTRRs */
-		int idx, is_mtrr_mask;
-		u64 *pt;
-
-		idx = (msr - 0x200) / 2;
-		is_mtrr_mask = msr - 0x200 - 2 * idx;
-		if (!is_mtrr_mask)
-			pt =
-			  (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
-		else
-			pt =
-			  (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
-		*pdata = *pt;
-	}
-
+	*pdata = vcpu->arch.mtrr[msr - 0x200];
 	return 0;
 }
 
@@ -964,6 +903,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_IRQCHIP:
 	case KVM_CAP_HLT:
 	case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
+	case KVM_CAP_USER_MEMORY:
 	case KVM_CAP_SET_TSS_ADDR:
 	case KVM_CAP_EXT_CPUID:
 	case KVM_CAP_CLOCKSOURCE:
@@ -1248,7 +1188,6 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 		int t, times = entry->eax & 0xff;
 
 		entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
-		entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
 		for (t = 1; t < times && *nent < maxnent; ++t) {
 			do_cpuid_1_ent(&entry[t], function, 0);
 			entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
@@ -1279,7 +1218,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 		entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
 		/* read more entries until level_type is zero */
 		for (i = 1; *nent < maxnent; ++i) {
-			level_type = entry[i - 1].ecx & 0xff00;
+			level_type = entry[i - 1].ecx & 0xff;
 			if (!level_type)
 				break;
 			do_cpuid_1_ent(&entry[i], function, i);
@@ -1379,15 +1318,6 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
-static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
-{
-	vcpu_load(vcpu);
-	kvm_inject_nmi(vcpu);
-	vcpu_put(vcpu);
-
-	return 0;
-}
-
 static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
 					   struct kvm_tpr_access_ctl *tac)
 {
@@ -1447,13 +1377,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		r = 0;
 		break;
 	}
-	case KVM_NMI: {
-		r = kvm_vcpu_ioctl_nmi(vcpu);
-		if (r)
-			goto out;
-		r = 0;
-		break;
-	}
 	case KVM_SET_CPUID: {
 		struct kvm_cpuid __user *cpuid_arg = argp;
 		struct kvm_cpuid cpuid;
@@ -2045,7 +1968,7 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
 	ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
 	if (ret < 0)
 		return 0;
-	kvm_mmu_pte_write(vcpu, gpa, val, bytes, 1);
+	kvm_mmu_pte_write(vcpu, gpa, val, bytes);
 	return 1;
 }
 
@@ -2481,6 +2404,8 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 	val = kvm_register_read(vcpu, VCPU_REGS_RAX);
 	memcpy(vcpu->arch.pio_data, &val, 4);
 
+	kvm_x86_ops->skip_emulated_instruction(vcpu);
+
 	pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in);
 	if (pio_dev) {
 		kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data);
@@ -2616,7 +2541,7 @@ int kvm_arch_init(void *opaque)
 	kvm_mmu_set_nonpresent_ptes(0ull, 0ull);
 	kvm_mmu_set_base_ptes(PT_PRESENT_MASK);
 	kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
-			PT_DIRTY_MASK, PT64_NX_MASK, 0, 0);
+			PT_DIRTY_MASK, PT64_NX_MASK, 0);
 	return 0;
 
 out:
@@ -2804,7 +2729,7 @@ static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i)
 
 	e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT;
 	/* when no next entry is found, the current entry[i] is reselected */
-	for (j = i + 1; ; j = (j + 1) % nent) {
+	for (j = i + 1; j == i; j = (j + 1) % nent) {
 		struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j];
 		if (ej->function == e->function) {
 			ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
@@ -3048,7 +2973,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		pr_debug("vcpu %d received sipi with vector # %x\n",
 			 vcpu->vcpu_id, vcpu->arch.sipi_vector);
 		kvm_lapic_reset(vcpu);
-		r = kvm_arch_vcpu_reset(vcpu);
+		r = kvm_x86_ops->vcpu_reset(vcpu);
 		if (r)
 			return r;
 		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
@@ -3350,9 +3275,9 @@ static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector,
 	kvm_desct->padding = 0;
 }
 
-static void get_segment_descriptor_dtable(struct kvm_vcpu *vcpu,
-					  u16 selector,
-					  struct descriptor_table *dtable)
+static void get_segment_descritptor_dtable(struct kvm_vcpu *vcpu,
+					   u16 selector,
+					   struct descriptor_table *dtable)
 {
 	if (selector & 1 << 2) {
 		struct kvm_segment kvm_seg;
@@ -3377,7 +3302,7 @@ static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
 	struct descriptor_table dtable;
 	u16 index = selector >> 3;
 
-	get_segment_descriptor_dtable(vcpu, selector, &dtable);
+	get_segment_descritptor_dtable(vcpu, selector, &dtable);
 
 	if (dtable.limit < index * 8 + 7) {
 		kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc);
@@ -3396,7 +3321,7 @@ static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
 	struct descriptor_table dtable;
 	u16 index = selector >> 3;
 
-	get_segment_descriptor_dtable(vcpu, selector, &dtable);
+	get_segment_descritptor_dtable(vcpu, selector, &dtable);
 
 	if (dtable.limit < index * 8 + 7)
 		return 1;
@@ -3975,7 +3900,6 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 	/* We do fxsave: this must be aligned. */
 	BUG_ON((unsigned long)&vcpu->arch.host_fx_image & 0xF);
 
-	vcpu->arch.mtrr_state.have_fixed = 1;
 	vcpu_load(vcpu);
 	r = kvm_arch_vcpu_reset(vcpu);
 	if (r == 0)
@@ -4001,9 +3925,6 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 
 int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
 {
-	vcpu->arch.nmi_pending = false;
-	vcpu->arch.nmi_injected = false;
-
 	return kvm_x86_ops->vcpu_reset(vcpu);
 }
 
@@ -4091,7 +4012,6 @@ struct  kvm *kvm_arch_create_vm(void)
 		return ERR_PTR(-ENOMEM);
 
 	INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
-	INIT_LIST_HEAD(&kvm->arch.oos_global_pages);
 	INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
 
 	/* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
@@ -4128,8 +4048,8 @@ static void kvm_free_vcpus(struct kvm *kvm)
 
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
-	kvm_free_all_assigned_devices(kvm);
 	kvm_iommu_unmap_guest(kvm);
+	kvm_free_all_assigned_devices(kvm);
 	kvm_free_pit(kvm);
 	kfree(kvm->arch.vpic);
 	kfree(kvm->arch.vioapic);
@@ -4207,8 +4127,7 @@ void kvm_arch_flush_shadow(struct kvm *kvm)
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
 {
 	return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE
-	       || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
-	       || vcpu->arch.nmi_pending;
+	       || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED;
 }
 
 static void vcpu_kick_intr(void *info)
diff --git a/trunk/arch/x86/kvm/x86_emulate.c b/trunk/arch/x86/kvm/x86_emulate.c
index d174db7a3370..ea051173b0da 100644
--- a/trunk/arch/x86/kvm/x86_emulate.c
+++ b/trunk/arch/x86/kvm/x86_emulate.c
@@ -58,7 +58,6 @@
 #define SrcMem32    (4<<4)	/* Memory operand (32-bit). */
 #define SrcImm      (5<<4)	/* Immediate operand. */
 #define SrcImmByte  (6<<4)	/* 8-bit sign-extended immediate operand. */
-#define SrcOne      (7<<4)	/* Implied '1' */
 #define SrcMask     (7<<4)
 /* Generic ModRM decode. */
 #define ModRM       (1<<7)
@@ -71,23 +70,17 @@
 #define Group       (1<<14)     /* Bits 3:5 of modrm byte extend opcode */
 #define GroupDual   (1<<15)     /* Alternate decoding of mod == 3 */
 #define GroupMask   0xff        /* Group number stored in bits 0:7 */
-/* Source 2 operand type */
-#define Src2None    (0<<29)
-#define Src2CL      (1<<29)
-#define Src2ImmByte (2<<29)
-#define Src2One     (3<<29)
-#define Src2Mask    (7<<29)
 
 enum {
 	Group1_80, Group1_81, Group1_82, Group1_83,
 	Group1A, Group3_Byte, Group3, Group4, Group5, Group7,
 };
 
-static u32 opcode_table[256] = {
+static u16 opcode_table[256] = {
 	/* 0x00 - 0x07 */
 	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
 	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
-	ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0,
+	0, 0, 0, 0,
 	/* 0x08 - 0x0F */
 	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
 	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
@@ -202,7 +195,7 @@ static u32 opcode_table[256] = {
 	ImplicitOps, ImplicitOps, Group | Group4, Group | Group5,
 };
 
-static u32 twobyte_table[256] = {
+static u16 twobyte_table[256] = {
 	/* 0x00 - 0x0F */
 	0, Group | GroupDual | Group7, 0, 0, 0, 0, ImplicitOps, 0,
 	ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0,
@@ -237,14 +230,9 @@ static u32 twobyte_table[256] = {
 	/* 0x90 - 0x9F */
 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 	/* 0xA0 - 0xA7 */
-	0, 0, 0, DstMem | SrcReg | ModRM | BitOp,
-	DstMem | SrcReg | Src2ImmByte | ModRM,
-	DstMem | SrcReg | Src2CL | ModRM, 0, 0,
+	0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, 0, 0,
 	/* 0xA8 - 0xAF */
-	0, 0, 0, DstMem | SrcReg | ModRM | BitOp,
-	DstMem | SrcReg | Src2ImmByte | ModRM,
-	DstMem | SrcReg | Src2CL | ModRM,
-	ModRM, 0,
+	0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, ModRM, 0,
 	/* 0xB0 - 0xB7 */
 	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 0,
 	    DstMem | SrcReg | ModRM | BitOp,
@@ -265,7 +253,7 @@ static u32 twobyte_table[256] = {
 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 };
 
-static u32 group_table[] = {
+static u16 group_table[] = {
 	[Group1_80*8] =
 	ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
 	ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
@@ -309,9 +297,9 @@ static u32 group_table[] = {
 	SrcMem16 | ModRM | Mov, SrcMem | ModRM | ByteOp,
 };
 
-static u32 group2_table[] = {
+static u16 group2_table[] = {
 	[Group7*8] =
-	SrcNone | ModRM, 0, 0, SrcNone | ModRM,
+	SrcNone | ModRM, 0, 0, 0,
 	SrcNone | ModRM | DstMem | Mov, 0,
 	SrcMem16 | ModRM | Mov, 0,
 };
@@ -371,48 +359,49 @@ static u32 group2_table[] = {
 	"andl %"_msk",%"_LO32 _tmp"; "		\
 	"orl  %"_LO32 _tmp",%"_sav"; "
 
-#ifdef CONFIG_X86_64
-#define ON64(x) x
-#else
-#define ON64(x)
-#endif
-
-#define ____emulate_2op(_op, _src, _dst, _eflags, _x, _y, _suffix)	\
-	do {								\
-		__asm__ __volatile__ (					\
-			_PRE_EFLAGS("0", "4", "2")			\
-			_op _suffix " %"_x"3,%1; "			\
-			_POST_EFLAGS("0", "4", "2")			\
-			: "=m" (_eflags), "=m" ((_dst).val),		\
-			  "=&r" (_tmp)					\
-			: _y ((_src).val), "i" (EFLAGS_MASK));		\
-	} while (0)
-
-
 /* Raw emulation: instruction has two explicit operands. */
 #define __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy) \
-	do {								\
-		unsigned long _tmp;					\
-									\
-		switch ((_dst).bytes) {					\
-		case 2:							\
-			____emulate_2op(_op,_src,_dst,_eflags,_wx,_wy,"w"); \
-			break;						\
-		case 4:							\
-			____emulate_2op(_op,_src,_dst,_eflags,_lx,_ly,"l"); \
-			break;						\
-		case 8:							\
-			ON64(____emulate_2op(_op,_src,_dst,_eflags,_qx,_qy,"q")); \
-			break;						\
-		}							\
+	do { 								    \
+		unsigned long _tmp;					    \
+									    \
+		switch ((_dst).bytes) {					    \
+		case 2:							    \
+			__asm__ __volatile__ (				    \
+				_PRE_EFLAGS("0", "4", "2")		    \
+				_op"w %"_wx"3,%1; "			    \
+				_POST_EFLAGS("0", "4", "2")		    \
+				: "=m" (_eflags), "=m" ((_dst).val),        \
+				  "=&r" (_tmp)				    \
+				: _wy ((_src).val), "i" (EFLAGS_MASK));     \
+			break;						    \
+		case 4:							    \
+			__asm__ __volatile__ (				    \
+				_PRE_EFLAGS("0", "4", "2")		    \
+				_op"l %"_lx"3,%1; "			    \
+				_POST_EFLAGS("0", "4", "2")		    \
+				: "=m" (_eflags), "=m" ((_dst).val),	    \
+				  "=&r" (_tmp)				    \
+				: _ly ((_src).val), "i" (EFLAGS_MASK));     \
+			break;						    \
+		case 8:							    \
+			__emulate_2op_8byte(_op, _src, _dst,		    \
+					    _eflags, _qx, _qy);		    \
+			break;						    \
+		}							    \
 	} while (0)
 
 #define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \
 	do {								     \
-		unsigned long _tmp;					     \
+		unsigned long __tmp;					     \
 		switch ((_dst).bytes) {				             \
 		case 1:							     \
-			____emulate_2op(_op,_src,_dst,_eflags,_bx,_by,"b");  \
+			__asm__ __volatile__ (				     \
+				_PRE_EFLAGS("0", "4", "2")		     \
+				_op"b %"_bx"3,%1; "			     \
+				_POST_EFLAGS("0", "4", "2")		     \
+				: "=m" (_eflags), "=m" ((_dst).val),	     \
+				  "=&r" (__tmp)				     \
+				: _by ((_src).val), "i" (EFLAGS_MASK));      \
 			break;						     \
 		default:						     \
 			__emulate_2op_nobyte(_op, _src, _dst, _eflags,	     \
@@ -436,69 +425,72 @@ static u32 group2_table[] = {
 	__emulate_2op_nobyte(_op, _src, _dst, _eflags,			\
 			     "w", "r", _LO32, "r", "", "r")
 
-/* Instruction has three operands and one operand is stored in ECX register */
-#define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type) 	\
-	do {									\
-		unsigned long _tmp;						\
-		_type _clv  = (_cl).val;  					\
-		_type _srcv = (_src).val;    					\
-		_type _dstv = (_dst).val;					\
-										\
-		__asm__ __volatile__ (						\
-			_PRE_EFLAGS("0", "5", "2")				\
-			_op _suffix " %4,%1 \n"					\
-			_POST_EFLAGS("0", "5", "2")				\
-			: "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp)		\
-			: "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK)		\
-			); 							\
-										\
-		(_cl).val  = (unsigned long) _clv;				\
-		(_src).val = (unsigned long) _srcv;				\
-		(_dst).val = (unsigned long) _dstv;				\
-	} while (0)
-
-#define emulate_2op_cl(_op, _cl, _src, _dst, _eflags)				\
-	do {									\
-		switch ((_dst).bytes) {						\
-		case 2:								\
-			__emulate_2op_cl(_op, _cl, _src, _dst, _eflags,  	\
-						"w", unsigned short);         	\
-			break;							\
-		case 4: 							\
-			__emulate_2op_cl(_op, _cl, _src, _dst, _eflags,  	\
-						"l", unsigned int);           	\
-			break;							\
-		case 8:								\
-			ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags,	\
-						"q", unsigned long));  		\
-			break;							\
-		}								\
-	} while (0)
-
-#define __emulate_1op(_op, _dst, _eflags, _suffix)			\
-	do {								\
-		unsigned long _tmp;					\
-									\
-		__asm__ __volatile__ (					\
-			_PRE_EFLAGS("0", "3", "2")			\
-			_op _suffix " %1; "				\
-			_POST_EFLAGS("0", "3", "2")			\
-			: "=m" (_eflags), "+m" ((_dst).val),		\
-			  "=&r" (_tmp)					\
-			: "i" (EFLAGS_MASK));				\
-	} while (0)
-
 /* Instruction has only one explicit operand (no source operand). */
 #define emulate_1op(_op, _dst, _eflags)                                    \
 	do {								\
+		unsigned long _tmp;					\
+									\
 		switch ((_dst).bytes) {				        \
-		case 1:	__emulate_1op(_op, _dst, _eflags, "b"); break;	\
-		case 2:	__emulate_1op(_op, _dst, _eflags, "w"); break;	\
-		case 4:	__emulate_1op(_op, _dst, _eflags, "l"); break;	\
-		case 8:	ON64(__emulate_1op(_op, _dst, _eflags, "q")); break; \
+		case 1:							\
+			__asm__ __volatile__ (				\
+				_PRE_EFLAGS("0", "3", "2")		\
+				_op"b %1; "				\
+				_POST_EFLAGS("0", "3", "2")		\
+				: "=m" (_eflags), "=m" ((_dst).val),	\
+				  "=&r" (_tmp)				\
+				: "i" (EFLAGS_MASK));			\
+			break;						\
+		case 2:							\
+			__asm__ __volatile__ (				\
+				_PRE_EFLAGS("0", "3", "2")		\
+				_op"w %1; "				\
+				_POST_EFLAGS("0", "3", "2")		\
+				: "=m" (_eflags), "=m" ((_dst).val),	\
+				  "=&r" (_tmp)				\
+				: "i" (EFLAGS_MASK));			\
+			break;						\
+		case 4:							\
+			__asm__ __volatile__ (				\
+				_PRE_EFLAGS("0", "3", "2")		\
+				_op"l %1; "				\
+				_POST_EFLAGS("0", "3", "2")		\
+				: "=m" (_eflags), "=m" ((_dst).val),	\
+				  "=&r" (_tmp)				\
+				: "i" (EFLAGS_MASK));			\
+			break;						\
+		case 8:							\
+			__emulate_1op_8byte(_op, _dst, _eflags);	\
+			break;						\
 		}							\
 	} while (0)
 
+/* Emulate an instruction with quadword operands (x86/64 only). */
+#if defined(CONFIG_X86_64)
+#define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy)           \
+	do {								  \
+		__asm__ __volatile__ (					  \
+			_PRE_EFLAGS("0", "4", "2")			  \
+			_op"q %"_qx"3,%1; "				  \
+			_POST_EFLAGS("0", "4", "2")			  \
+			: "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \
+			: _qy ((_src).val), "i" (EFLAGS_MASK));		\
+	} while (0)
+
+#define __emulate_1op_8byte(_op, _dst, _eflags)                           \
+	do {								  \
+		__asm__ __volatile__ (					  \
+			_PRE_EFLAGS("0", "3", "2")			  \
+			_op"q %1; "					  \
+			_POST_EFLAGS("0", "3", "2")			  \
+			: "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \
+			: "i" (EFLAGS_MASK));				  \
+	} while (0)
+
+#elif defined(__i386__)
+#define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy)
+#define __emulate_1op_8byte(_op, _dst, _eflags)
+#endif				/* __i386__ */
+
 /* Fetch next part of the instruction being emulated. */
 #define insn_fetch(_type, _size, _eip)                                  \
 ({	unsigned long _x;						\
@@ -1049,33 +1041,6 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
 		c->src.bytes = 1;
 		c->src.val = insn_fetch(s8, 1, c->eip);
 		break;
-	case SrcOne:
-		c->src.bytes = 1;
-		c->src.val = 1;
-		break;
-	}
-
-	/*
-	 * Decode and fetch the second source operand: register, memory
-	 * or immediate.
-	 */
-	switch (c->d & Src2Mask) {
-	case Src2None:
-		break;
-	case Src2CL:
-		c->src2.bytes = 1;
-		c->src2.val = c->regs[VCPU_REGS_RCX] & 0x8;
-		break;
-	case Src2ImmByte:
-		c->src2.type = OP_IMM;
-		c->src2.ptr = (unsigned long *)c->eip;
-		c->src2.bytes = 1;
-		c->src2.val = insn_fetch(u8, 1, c->eip);
-		break;
-	case Src2One:
-		c->src2.bytes = 1;
-		c->src2.val = 1;
-		break;
 	}
 
 	/* Decode and fetch the destination operand: register or memory. */
@@ -1135,33 +1100,20 @@ static inline void emulate_push(struct x86_emulate_ctxt *ctxt)
 					       c->regs[VCPU_REGS_RSP]);
 }
 
-static int emulate_pop(struct x86_emulate_ctxt *ctxt,
-		       struct x86_emulate_ops *ops)
-{
-	struct decode_cache *c = &ctxt->decode;
-	int rc;
-
-	rc = ops->read_emulated(register_address(c, ss_base(ctxt),
-						 c->regs[VCPU_REGS_RSP]),
-				&c->src.val, c->src.bytes, ctxt->vcpu);
-	if (rc != 0)
-		return rc;
-
-	register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->src.bytes);
-	return rc;
-}
-
 static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
 				struct x86_emulate_ops *ops)
 {
 	struct decode_cache *c = &ctxt->decode;
 	int rc;
 
-	c->src.bytes = c->dst.bytes;
-	rc = emulate_pop(ctxt, ops);
+	rc = ops->read_std(register_address(c, ss_base(ctxt),
+					    c->regs[VCPU_REGS_RSP]),
+			   &c->dst.val, c->dst.bytes, ctxt->vcpu);
 	if (rc != 0)
 		return rc;
-	c->dst.val = c->src.val;
+
+	register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->dst.bytes);
+
 	return 0;
 }
 
@@ -1463,15 +1415,24 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
 		emulate_1op("dec", c->dst, ctxt->eflags);
 		break;
 	case 0x50 ... 0x57:  /* push reg */
-		emulate_push(ctxt);
+		c->dst.type  = OP_MEM;
+		c->dst.bytes = c->op_bytes;
+		c->dst.val = c->src.val;
+		register_address_increment(c, &c->regs[VCPU_REGS_RSP],
+					   -c->op_bytes);
+		c->dst.ptr = (void *) register_address(
+			c, ss_base(ctxt), c->regs[VCPU_REGS_RSP]);
 		break;
 	case 0x58 ... 0x5f: /* pop reg */
 	pop_instruction:
-		c->src.bytes = c->op_bytes;
-		rc = emulate_pop(ctxt, ops);
-		if (rc != 0)
+		if ((rc = ops->read_std(register_address(c, ss_base(ctxt),
+			c->regs[VCPU_REGS_RSP]), c->dst.ptr,
+			c->op_bytes, ctxt->vcpu)) != 0)
 			goto done;
-		c->dst.val = c->src.val;
+
+		register_address_increment(c, &c->regs[VCPU_REGS_RSP],
+					   c->op_bytes);
+		c->dst.type = OP_NONE;	/* Disable writeback. */
 		break;
 	case 0x63:		/* movsxd */
 		if (ctxt->mode != X86EMUL_MODE_PROT64)
@@ -1630,9 +1591,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
 		emulate_push(ctxt);
 		break;
 	case 0x9d: /* popf */
-		c->dst.type = OP_REG;
 		c->dst.ptr = (unsigned long *) &ctxt->eflags;
-		c->dst.bytes = c->op_bytes;
 		goto pop_instruction;
 	case 0xa0 ... 0xa1:	/* mov */
 		c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
@@ -1730,9 +1689,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
 		emulate_grp2(ctxt);
 		break;
 	case 0xc3: /* ret */
-		c->dst.type = OP_REG;
 		c->dst.ptr = &c->eip;
-		c->dst.bytes = c->op_bytes;
 		goto pop_instruction;
 	case 0xc6 ... 0xc7:	/* mov (sole member of Grp11) */
 	mov:
@@ -1821,7 +1778,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
 			c->eip = saved_eip;
 			goto cannot_emulate;
 		}
-		break;
+		return 0;
 	case 0xf4:              /* hlt */
 		ctxt->vcpu->arch.halt_request = 1;
 		break;
@@ -2042,20 +1999,12 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
 		c->src.val &= (c->dst.bytes << 3) - 1;
 		emulate_2op_SrcV_nobyte("bt", c->src, c->dst, ctxt->eflags);
 		break;
-	case 0xa4: /* shld imm8, r, r/m */
-	case 0xa5: /* shld cl, r, r/m */
-		emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags);
-		break;
 	case 0xab:
 	      bts:		/* bts */
 		/* only subword offset */
 		c->src.val &= (c->dst.bytes << 3) - 1;
 		emulate_2op_SrcV_nobyte("bts", c->src, c->dst, ctxt->eflags);
 		break;
-	case 0xac: /* shrd imm8, r, r/m */
-	case 0xad: /* shrd cl, r, r/m */
-		emulate_2op_cl("shrd", c->src2, c->src, c->dst, ctxt->eflags);
-		break;
 	case 0xae:              /* clflush */
 		break;
 	case 0xb0 ... 0xb1:	/* cmpxchg */
diff --git a/trunk/arch/x86/lguest/boot.c b/trunk/arch/x86/lguest/boot.c
index a7ed208f81e3..50a779264bb1 100644
--- a/trunk/arch/x86/lguest/boot.c
+++ b/trunk/arch/x86/lguest/boot.c
@@ -738,7 +738,7 @@ static void lguest_time_init(void)
 
 	/* We can't set cpumask in the initializer: damn C limitations!  Set it
 	 * here and register our timer device. */
-	lguest_clockevent.cpumask = cpumask_of(0);
+	lguest_clockevent.cpumask = cpumask_of_cpu(0);
 	clockevents_register_device(&lguest_clockevent);
 
 	/* Finally, we unblock the timer interrupt. */
diff --git a/trunk/arch/x86/mach-default/setup.c b/trunk/arch/x86/mach-default/setup.c
index df167f265622..37b9ae4d44c5 100644
--- a/trunk/arch/x86/mach-default/setup.c
+++ b/trunk/arch/x86/mach-default/setup.c
@@ -133,28 +133,29 @@ void __init time_init_hook(void)
  **/
 void mca_nmi_hook(void)
 {
-	/*
-	 * If I recall correctly, there's a whole bunch of other things that
+	/* If I recall correctly, there's a whole bunch of other things that
 	 * we can do to check for NMI problems, but that's all I know about
 	 * at the moment.
 	 */
-	pr_warning("NMI generated from unknown source!\n");
+
+	printk("NMI generated from unknown source!\n");
 }
 #endif
 
 static __init int no_ipi_broadcast(char *str)
 {
 	get_option(&str, &no_broadcast);
-	pr_info("Using %s mode\n",
-		no_broadcast ? "No IPI Broadcast" : "IPI Broadcast");
+	printk ("Using %s mode\n", no_broadcast ? "No IPI Broadcast" :
+											"IPI Broadcast");
 	return 1;
 }
+
 __setup("no_ipi_broadcast=", no_ipi_broadcast);
 
 static int __init print_ipi_mode(void)
 {
-	pr_info("Using IPI %s mode\n",
-		no_broadcast ? "No-Shortcut" : "Shortcut");
+	printk ("Using IPI %s mode\n", no_broadcast ? "No-Shortcut" :
+											"Shortcut");
 	return 0;
 }
 
diff --git a/trunk/arch/x86/mach-generic/bigsmp.c b/trunk/arch/x86/mach-generic/bigsmp.c
index bc4c7840b2a8..3624a364b7f3 100644
--- a/trunk/arch/x86/mach-generic/bigsmp.c
+++ b/trunk/arch/x86/mach-generic/bigsmp.c
@@ -42,10 +42,9 @@ static const struct dmi_system_id bigsmp_dmi_table[] = {
 	 { }
 };
 
-static void vector_allocation_domain(int cpu, cpumask_t *retmask)
+static cpumask_t vector_allocation_domain(int cpu)
 {
-	cpus_clear(*retmask);
-	cpu_set(cpu, *retmask);
+        return cpumask_of_cpu(cpu);
 }
 
 static int probe_bigsmp(void)
diff --git a/trunk/arch/x86/mach-generic/es7000.c b/trunk/arch/x86/mach-generic/es7000.c
index 4ba5ccaa1584..7b4e6d0d1690 100644
--- a/trunk/arch/x86/mach-generic/es7000.c
+++ b/trunk/arch/x86/mach-generic/es7000.c
@@ -87,7 +87,7 @@ static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 }
 #endif
 
-static void vector_allocation_domain(int cpu, cpumask_t *retmask)
+static cpumask_t vector_allocation_domain(int cpu)
 {
 	/* Careful. Some cpus do not strictly honor the set of cpus
 	 * specified in the interrupt destination when using lowest
@@ -97,7 +97,8 @@ static void vector_allocation_domain(int cpu, cpumask_t *retmask)
 	 * deliver interrupts to the wrong hyperthread when only one
 	 * hyperthread was specified in the interrupt desitination.
 	 */
-	*retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
+	cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
+	return domain;
 }
 
 struct genapic __initdata_refok apic_es7000 = APIC_INIT("es7000", probe_es7000);
diff --git a/trunk/arch/x86/mach-generic/numaq.c b/trunk/arch/x86/mach-generic/numaq.c
index 511d7941364f..71a309b122e6 100644
--- a/trunk/arch/x86/mach-generic/numaq.c
+++ b/trunk/arch/x86/mach-generic/numaq.c
@@ -38,7 +38,7 @@ static int acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 	return 0;
 }
 
-static void vector_allocation_domain(int cpu, cpumask_t *retmask)
+static cpumask_t vector_allocation_domain(int cpu)
 {
 	/* Careful. Some cpus do not strictly honor the set of cpus
 	 * specified in the interrupt destination when using lowest
@@ -48,7 +48,8 @@ static void vector_allocation_domain(int cpu, cpumask_t *retmask)
 	 * deliver interrupts to the wrong hyperthread when only one
 	 * hyperthread was specified in the interrupt desitination.
 	 */
-	*retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
+	cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
+	return domain;
 }
 
 struct genapic apic_numaq = APIC_INIT("NUMAQ", probe_numaq);
diff --git a/trunk/arch/x86/mach-generic/summit.c b/trunk/arch/x86/mach-generic/summit.c
index 2821ffc188b5..2c6d234e0009 100644
--- a/trunk/arch/x86/mach-generic/summit.c
+++ b/trunk/arch/x86/mach-generic/summit.c
@@ -24,7 +24,7 @@ static int probe_summit(void)
 	return 0;
 }
 
-static void vector_allocation_domain(int cpu, cpumask_t *retmask)
+static cpumask_t vector_allocation_domain(int cpu)
 {
 	/* Careful. Some cpus do not strictly honor the set of cpus
 	 * specified in the interrupt destination when using lowest
@@ -34,7 +34,8 @@ static void vector_allocation_domain(int cpu, cpumask_t *retmask)
 	 * deliver interrupts to the wrong hyperthread when only one
 	 * hyperthread was specified in the interrupt desitination.
 	 */
-	*retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
+	cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
+	return domain;
 }
 
 struct genapic apic_summit = APIC_INIT("summit", probe_summit);
diff --git a/trunk/arch/x86/mach-voyager/voyager_smp.c b/trunk/arch/x86/mach-voyager/voyager_smp.c
index a5bc05492b1e..52145007bd7e 100644
--- a/trunk/arch/x86/mach-voyager/voyager_smp.c
+++ b/trunk/arch/x86/mach-voyager/voyager_smp.c
@@ -63,6 +63,11 @@ static int voyager_extended_cpus = 1;
 /* Used for the invalidate map that's also checked in the spinlock */
 static volatile unsigned long smp_invalidate_needed;
 
+/* Bitmask of currently online CPUs - used by setup.c for
+   /proc/cpuinfo, visible externally but still physical */
+cpumask_t cpu_online_map = CPU_MASK_NONE;
+EXPORT_SYMBOL(cpu_online_map);
+
 /* Bitmask of CPUs present in the system - exported by i386_syms.c, used
  * by scheduler but indexed physically */
 cpumask_t phys_cpu_present_map = CPU_MASK_NONE;
@@ -213,6 +218,8 @@ static cpumask_t smp_commenced_mask = CPU_MASK_NONE;
 /* This is for the new dynamic CPU boot code */
 cpumask_t cpu_callin_map = CPU_MASK_NONE;
 cpumask_t cpu_callout_map = CPU_MASK_NONE;
+cpumask_t cpu_possible_map = CPU_MASK_NONE;
+EXPORT_SYMBOL(cpu_possible_map);
 
 /* The per processor IRQ masks (these are usually kept in sync) */
 static __u16 vic_irq_mask[NR_CPUS] __cacheline_aligned;
@@ -672,7 +679,7 @@ void __init smp_boot_cpus(void)
 
 	/* loop over all the extended VIC CPUs and boot them.  The
 	 * Quad CPUs must be bootstrapped by their extended VIC cpu */
-	for (i = 0; i < nr_cpu_ids; i++) {
+	for (i = 0; i < NR_CPUS; i++) {
 		if (i == boot_cpu_id || !cpu_isset(i, phys_cpu_present_map))
 			continue;
 		do_boot_cpu(i);
diff --git a/trunk/arch/x86/mm/numa_64.c b/trunk/arch/x86/mm/numa_64.c
index 71a14f89f89e..cebcbf152d46 100644
--- a/trunk/arch/x86/mm/numa_64.c
+++ b/trunk/arch/x86/mm/numa_64.c
@@ -278,7 +278,7 @@ void __init numa_init_array(void)
 	int rr, i;
 
 	rr = first_node(node_online_map);
-	for (i = 0; i < nr_cpu_ids; i++) {
+	for (i = 0; i < NR_CPUS; i++) {
 		if (early_cpu_to_node(i) != NUMA_NO_NODE)
 			continue;
 		numa_set_node(i, rr);
@@ -549,7 +549,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn)
 	memnodemap[0] = 0;
 	node_set_online(0);
 	node_set(0, node_possible_map);
-	for (i = 0; i < nr_cpu_ids; i++)
+	for (i = 0; i < NR_CPUS; i++)
 		numa_set_node(i, 0);
 	e820_register_active_regions(0, start_pfn, last_pfn);
 	setup_node_bootmem(0, start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT);
diff --git a/trunk/arch/x86/mm/srat_64.c b/trunk/arch/x86/mm/srat_64.c
index 09737c8af074..51c0a2fc14fe 100644
--- a/trunk/arch/x86/mm/srat_64.c
+++ b/trunk/arch/x86/mm/srat_64.c
@@ -382,7 +382,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
 		if (!node_online(i))
 			setup_node_bootmem(i, nodes[i].start, nodes[i].end);
 
-	for (i = 0; i < nr_cpu_ids; i++) {
+	for (i = 0; i < NR_CPUS; i++) {
 		int node = early_cpu_to_node(i);
 
 		if (node == NUMA_NO_NODE)
diff --git a/trunk/arch/x86/pci/acpi.c b/trunk/arch/x86/pci/acpi.c
index 9e5752fe4d15..1d88d2b39771 100644
--- a/trunk/arch/x86/pci/acpi.c
+++ b/trunk/arch/x86/pci/acpi.c
@@ -4,7 +4,7 @@
 #include <linux/irq.h>
 #include <linux/dmi.h>
 #include <asm/numa.h>
-#include <asm/pci_x86.h>
+#include "pci.h"
 
 struct pci_root_info {
 	char *name;
diff --git a/trunk/arch/x86/pci/amd_bus.c b/trunk/arch/x86/pci/amd_bus.c
index 9bb09823b362..22e057665e55 100644
--- a/trunk/arch/x86/pci/amd_bus.c
+++ b/trunk/arch/x86/pci/amd_bus.c
@@ -2,7 +2,7 @@
 #include <linux/pci.h>
 #include <linux/topology.h>
 #include <linux/cpu.h>
-#include <asm/pci_x86.h>
+#include "pci.h"
 
 #ifdef CONFIG_X86_64
 #include <asm/pci-direct.h>
diff --git a/trunk/arch/x86/pci/common.c b/trunk/arch/x86/pci/common.c
index 62ddb73e09ed..bb1a01f089e2 100644
--- a/trunk/arch/x86/pci/common.c
+++ b/trunk/arch/x86/pci/common.c
@@ -14,7 +14,8 @@
 #include <asm/segment.h>
 #include <asm/io.h>
 #include <asm/smp.h>
-#include <asm/pci_x86.h>
+
+#include "pci.h"
 
 unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 |
 				PCI_PROBE_MMCONF;
diff --git a/trunk/arch/x86/pci/direct.c b/trunk/arch/x86/pci/direct.c
index bd13c3e4c6db..9a5af6c8fbe9 100644
--- a/trunk/arch/x86/pci/direct.c
+++ b/trunk/arch/x86/pci/direct.c
@@ -5,7 +5,7 @@
 #include <linux/pci.h>
 #include <linux/init.h>
 #include <linux/dmi.h>
-#include <asm/pci_x86.h>
+#include "pci.h"
 
 /*
  * Functions for accessing PCI base (first 256 bytes) and extended
diff --git a/trunk/arch/x86/pci/early.c b/trunk/arch/x86/pci/early.c
index f6adf2c6d751..86631ccbc25a 100644
--- a/trunk/arch/x86/pci/early.c
+++ b/trunk/arch/x86/pci/early.c
@@ -2,7 +2,7 @@
 #include <linux/pci.h>
 #include <asm/pci-direct.h>
 #include <asm/io.h>
-#include <asm/pci_x86.h>
+#include "pci.h"
 
 /* Direct PCI access. This is used for PCI accesses in early boot before
    the PCI subsystem works. */
diff --git a/trunk/arch/x86/pci/fixup.c b/trunk/arch/x86/pci/fixup.c
index 7d388d5cf548..2051dc96b8e9 100644
--- a/trunk/arch/x86/pci/fixup.c
+++ b/trunk/arch/x86/pci/fixup.c
@@ -6,7 +6,8 @@
 #include <linux/dmi.h>
 #include <linux/pci.h>
 #include <linux/init.h>
-#include <asm/pci_x86.h>
+#include "pci.h"
+
 
 static void __devinit pci_fixup_i450nx(struct pci_dev *d)
 {
diff --git a/trunk/arch/x86/pci/i386.c b/trunk/arch/x86/pci/i386.c
index e51bf2cda4b0..844df0cbbd3e 100644
--- a/trunk/arch/x86/pci/i386.c
+++ b/trunk/arch/x86/pci/i386.c
@@ -34,8 +34,8 @@
 
 #include <asm/pat.h>
 #include <asm/e820.h>
-#include <asm/pci_x86.h>
 
+#include "pci.h"
 
 static int
 skip_isa_ioresource_align(struct pci_dev *dev) {
diff --git a/trunk/arch/x86/pci/init.c b/trunk/arch/x86/pci/init.c
index bec3b048e72b..d6c950f81858 100644
--- a/trunk/arch/x86/pci/init.c
+++ b/trunk/arch/x86/pci/init.c
@@ -1,6 +1,6 @@
 #include <linux/pci.h>
 #include <linux/init.h>
-#include <asm/pci_x86.h>
+#include "pci.h"
 
 /* arch_initcall has too random ordering, so call the initializers
    in the right sequence from here. */
diff --git a/trunk/arch/x86/pci/irq.c b/trunk/arch/x86/pci/irq.c
index 373b9afe6d44..bf69dbe08bff 100644
--- a/trunk/arch/x86/pci/irq.c
+++ b/trunk/arch/x86/pci/irq.c
@@ -16,7 +16,8 @@
 #include <asm/io_apic.h>
 #include <linux/irq.h>
 #include <linux/acpi.h>
-#include <asm/pci_x86.h>
+
+#include "pci.h"
 
 #define PIRQ_SIGNATURE	(('$' << 0) + ('P' << 8) + ('I' << 16) + ('R' << 24))
 #define PIRQ_VERSION 0x0100
diff --git a/trunk/arch/x86/pci/legacy.c b/trunk/arch/x86/pci/legacy.c
index f1065b129e9c..b722dd481b39 100644
--- a/trunk/arch/x86/pci/legacy.c
+++ b/trunk/arch/x86/pci/legacy.c
@@ -3,7 +3,7 @@
  */
 #include <linux/init.h>
 #include <linux/pci.h>
-#include <asm/pci_x86.h>
+#include "pci.h"
 
 /*
  * Discover remaining PCI buses in case there are peer host bridges.
diff --git a/trunk/arch/x86/pci/mmconfig-shared.c b/trunk/arch/x86/pci/mmconfig-shared.c
index 89bf9242c80a..654a2234f8f3 100644
--- a/trunk/arch/x86/pci/mmconfig-shared.c
+++ b/trunk/arch/x86/pci/mmconfig-shared.c
@@ -15,7 +15,8 @@
 #include <linux/acpi.h>
 #include <linux/bitmap.h>
 #include <asm/e820.h>
-#include <asm/pci_x86.h>
+
+#include "pci.h"
 
 /* aperture is up to 256MB but BIOS may reserve less */
 #define MMCONFIG_APER_MIN	(2 * 1024*1024)
diff --git a/trunk/arch/x86/pci/mmconfig_32.c b/trunk/arch/x86/pci/mmconfig_32.c
index 8b2d561046a3..f3c761dce695 100644
--- a/trunk/arch/x86/pci/mmconfig_32.c
+++ b/trunk/arch/x86/pci/mmconfig_32.c
@@ -13,7 +13,7 @@
 #include <linux/init.h>
 #include <linux/acpi.h>
 #include <asm/e820.h>
-#include <asm/pci_x86.h>
+#include "pci.h"
 
 /* Assume systems with more busses have correct MCFG */
 #define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG))
diff --git a/trunk/arch/x86/pci/mmconfig_64.c b/trunk/arch/x86/pci/mmconfig_64.c
index 30007ffc8e11..a1994163c99d 100644
--- a/trunk/arch/x86/pci/mmconfig_64.c
+++ b/trunk/arch/x86/pci/mmconfig_64.c
@@ -10,7 +10,8 @@
 #include <linux/acpi.h>
 #include <linux/bitmap.h>
 #include <asm/e820.h>
-#include <asm/pci_x86.h>
+
+#include "pci.h"
 
 /* Static virtual mapping of the MMCONFIG aperture */
 struct mmcfg_virt {
diff --git a/trunk/arch/x86/pci/numaq_32.c b/trunk/arch/x86/pci/numaq_32.c
index 2089354968a2..1177845d3186 100644
--- a/trunk/arch/x86/pci/numaq_32.c
+++ b/trunk/arch/x86/pci/numaq_32.c
@@ -7,7 +7,7 @@
 #include <linux/nodemask.h>
 #include <mach_apic.h>
 #include <asm/mpspec.h>
-#include <asm/pci_x86.h>
+#include "pci.h"
 
 #define XQUAD_PORTIO_BASE 0xfe400000
 #define XQUAD_PORTIO_QUAD 0x40000  /* 256k per quad. */
diff --git a/trunk/arch/x86/pci/olpc.c b/trunk/arch/x86/pci/olpc.c
index b889d824f7c6..e11e9e803d5f 100644
--- a/trunk/arch/x86/pci/olpc.c
+++ b/trunk/arch/x86/pci/olpc.c
@@ -29,7 +29,7 @@
 #include <linux/init.h>
 #include <asm/olpc.h>
 #include <asm/geode.h>
-#include <asm/pci_x86.h>
+#include "pci.h"
 
 /*
  * In the tables below, the first two line (8 longwords) are the
diff --git a/trunk/arch/x86/pci/pcbios.c b/trunk/arch/x86/pci/pcbios.c
index b82cae970dfd..37472fc6f729 100644
--- a/trunk/arch/x86/pci/pcbios.c
+++ b/trunk/arch/x86/pci/pcbios.c
@@ -6,8 +6,9 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/uaccess.h>
-#include <asm/pci_x86.h>
-#include <asm/mach-default/pci-functions.h>
+#include "pci.h"
+#include "pci-functions.h"
+
 
 /* BIOS32 signature: "_32_" */
 #define BIOS32_SIGNATURE	(('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24))
diff --git a/trunk/arch/x86/include/asm/pci_x86.h b/trunk/arch/x86/pci/pci.h
similarity index 88%
rename from trunk/arch/x86/include/asm/pci_x86.h
rename to trunk/arch/x86/pci/pci.h
index e60fd3e14bdf..1959018aac02 100644
--- a/trunk/arch/x86/include/asm/pci_x86.h
+++ b/trunk/arch/x86/pci/pci.h
@@ -57,8 +57,7 @@ extern struct pci_ops pci_root_ops;
 struct irq_info {
 	u8 bus, devfn;			/* Bus, device and function */
 	struct {
-		u8 link;		/* IRQ line ID, chipset dependent,
-					   0 = not routed */
+		u8 link;		/* IRQ line ID, chipset dependent, 0=not routed */
 		u16 bitmap;		/* Available IRQs */
 	} __attribute__((packed)) irq[4];
 	u8 slot;			/* Slot number, 0=onboard */
@@ -70,13 +69,11 @@ struct irq_routing_table {
 	u16 version;			/* PIRQ_VERSION */
 	u16 size;			/* Table size in bytes */
 	u8 rtr_bus, rtr_devfn;		/* Where the interrupt router lies */
-	u16 exclusive_irqs;		/* IRQs devoted exclusively to
-					   PCI usage */
-	u16 rtr_vendor, rtr_device;	/* Vendor and device ID of
-					   interrupt router */
+	u16 exclusive_irqs;		/* IRQs devoted exclusively to PCI usage */
+	u16 rtr_vendor, rtr_device;	/* Vendor and device ID of interrupt router */
 	u32 miniport_data;		/* Crap */
 	u8 rfu[11];
-	u8 checksum;			/* Modulo 256 checksum must give 0 */
+	u8 checksum;			/* Modulo 256 checksum must give zero */
 	struct irq_info slots[0];
 } __attribute__((packed));
 
@@ -151,15 +148,15 @@ static inline unsigned int mmio_config_readl(void __iomem *pos)
 
 static inline void mmio_config_writeb(void __iomem *pos, u8 val)
 {
-	asm volatile("movb %%al,(%1)" : : "a" (val), "r" (pos) : "memory");
+	asm volatile("movb %%al,(%1)" :: "a" (val), "r" (pos) : "memory");
 }
 
 static inline void mmio_config_writew(void __iomem *pos, u16 val)
 {
-	asm volatile("movw %%ax,(%1)" : : "a" (val), "r" (pos) : "memory");
+	asm volatile("movw %%ax,(%1)" :: "a" (val), "r" (pos) : "memory");
 }
 
 static inline void mmio_config_writel(void __iomem *pos, u32 val)
 {
-	asm volatile("movl %%eax,(%1)" : : "a" (val), "r" (pos) : "memory");
+	asm volatile("movl %%eax,(%1)" :: "a" (val), "r" (pos) : "memory");
 }
diff --git a/trunk/arch/x86/pci/visws.c b/trunk/arch/x86/pci/visws.c
index 16d0c0eb0d19..42f4cb19faca 100644
--- a/trunk/arch/x86/pci/visws.c
+++ b/trunk/arch/x86/pci/visws.c
@@ -9,10 +9,11 @@
 #include <linux/init.h>
 
 #include <asm/setup.h>
-#include <asm/pci_x86.h>
 #include <asm/visws/cobalt.h>
 #include <asm/visws/lithium.h>
 
+#include "pci.h"
+
 static int pci_visws_enable_irq(struct pci_dev *dev) { return 0; }
 static void pci_visws_disable_irq(struct pci_dev *dev) { }
 
diff --git a/trunk/arch/x86/xen/mmu.c b/trunk/arch/x86/xen/mmu.c
index 503c240e26c7..773d68d3e912 100644
--- a/trunk/arch/x86/xen/mmu.c
+++ b/trunk/arch/x86/xen/mmu.c
@@ -1082,7 +1082,7 @@ static void drop_other_mm_ref(void *info)
 
 static void xen_drop_mm_ref(struct mm_struct *mm)
 {
-	cpumask_var_t mask;
+	cpumask_t mask;
 	unsigned cpu;
 
 	if (current->active_mm == mm) {
@@ -1094,16 +1094,7 @@ static void xen_drop_mm_ref(struct mm_struct *mm)
 	}
 
 	/* Get the "official" set of cpus referring to our pagetable. */
-	if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) {
-		for_each_online_cpu(cpu) {
-			if (!cpumask_test_cpu(cpu, &mm->cpu_vm_mask)
-			    && per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd))
-				continue;
-			smp_call_function_single(cpu, drop_other_mm_ref, mm, 1);
-		}
-		return;
-	}
-	cpumask_copy(mask, &mm->cpu_vm_mask);
+	mask = mm->cpu_vm_mask;
 
 	/* It's possible that a vcpu may have a stale reference to our
 	   cr3, because its in lazy mode, and it hasn't yet flushed
@@ -1112,12 +1103,11 @@ static void xen_drop_mm_ref(struct mm_struct *mm)
 	   if needed. */
 	for_each_online_cpu(cpu) {
 		if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd))
-			cpumask_set_cpu(cpu, mask);
+			cpu_set(cpu, mask);
 	}
 
-	if (!cpumask_empty(mask))
-		smp_call_function_many(mask, drop_other_mm_ref, mm, 1);
-	free_cpumask_var(mask);
+	if (!cpus_empty(mask))
+		smp_call_function_mask(mask, drop_other_mm_ref, mm, 1);
 }
 #else
 static void xen_drop_mm_ref(struct mm_struct *mm)
diff --git a/trunk/arch/x86/xen/smp.c b/trunk/arch/x86/xen/smp.c
index c44e2069c7c7..acd9b6705e02 100644
--- a/trunk/arch/x86/xen/smp.c
+++ b/trunk/arch/x86/xen/smp.c
@@ -33,7 +33,7 @@
 #include "xen-ops.h"
 #include "mmu.h"
 
-cpumask_var_t xen_cpu_initialized_map;
+cpumask_t xen_cpu_initialized_map;
 
 static DEFINE_PER_CPU(int, resched_irq);
 static DEFINE_PER_CPU(int, callfunc_irq);
@@ -158,7 +158,7 @@ static void __init xen_fill_possible_map(void)
 {
 	int i, rc;
 
-	for (i = 0; i < nr_cpu_ids; i++) {
+	for (i = 0; i < NR_CPUS; i++) {
 		rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
 		if (rc >= 0) {
 			num_processors++;
@@ -192,14 +192,11 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
 	if (xen_smp_intr_init(0))
 		BUG();
 
-	if (!alloc_cpumask_var(&xen_cpu_initialized_map, GFP_KERNEL))
-		panic("could not allocate xen_cpu_initialized_map\n");
-
-	cpumask_copy(xen_cpu_initialized_map, cpumask_of(0));
+	xen_cpu_initialized_map = cpumask_of_cpu(0);
 
 	/* Restrict the possible_map according to max_cpus. */
 	while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) {
-		for (cpu = nr_cpu_ids - 1; !cpu_possible(cpu); cpu--)
+		for (cpu = NR_CPUS - 1; !cpu_possible(cpu); cpu--)
 			continue;
 		cpu_clear(cpu, cpu_possible_map);
 	}
@@ -224,7 +221,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
 	struct vcpu_guest_context *ctxt;
 	struct desc_struct *gdt;
 
-	if (cpumask_test_and_set_cpu(cpu, xen_cpu_initialized_map))
+	if (cpu_test_and_set(cpu, xen_cpu_initialized_map))
 		return 0;
 
 	ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
@@ -411,23 +408,24 @@ static void xen_smp_send_reschedule(int cpu)
 	xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR);
 }
 
-static void xen_send_IPI_mask(const struct cpumask *mask,
-			      enum ipi_vector vector)
+static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector)
 {
 	unsigned cpu;
 
-	for_each_cpu_and(cpu, mask, cpu_online_mask)
+	cpus_and(mask, mask, cpu_online_map);
+
+	for_each_cpu_mask_nr(cpu, mask)
 		xen_send_IPI_one(cpu, vector);
 }
 
-static void xen_smp_send_call_function_ipi(const struct cpumask *mask)
+static void xen_smp_send_call_function_ipi(cpumask_t mask)
 {
 	int cpu;
 
 	xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
 
 	/* Make sure other vcpus get a chance to run if they need to. */
-	for_each_cpu(cpu, mask) {
+	for_each_cpu_mask_nr(cpu, mask) {
 		if (xen_vcpu_stolen(cpu)) {
 			HYPERVISOR_sched_op(SCHEDOP_yield, 0);
 			break;
@@ -437,8 +435,7 @@ static void xen_smp_send_call_function_ipi(const struct cpumask *mask)
 
 static void xen_smp_send_call_function_single_ipi(int cpu)
 {
-	xen_send_IPI_mask(cpumask_of(cpu),
-			  XEN_CALL_FUNCTION_SINGLE_VECTOR);
+	xen_send_IPI_mask(cpumask_of_cpu(cpu), XEN_CALL_FUNCTION_SINGLE_VECTOR);
 }
 
 static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
diff --git a/trunk/arch/x86/xen/suspend.c b/trunk/arch/x86/xen/suspend.c
index 212ffe012b76..2a234db5949b 100644
--- a/trunk/arch/x86/xen/suspend.c
+++ b/trunk/arch/x86/xen/suspend.c
@@ -35,8 +35,7 @@ void xen_post_suspend(int suspend_cancelled)
 			pfn_to_mfn(xen_start_info->console.domU.mfn);
 	} else {
 #ifdef CONFIG_SMP
-		BUG_ON(xen_cpu_initialized_map == NULL);
-		cpumask_copy(xen_cpu_initialized_map, cpu_online_mask);
+		xen_cpu_initialized_map = cpu_online_map;
 #endif
 		xen_vcpu_restore();
 	}
diff --git a/trunk/arch/x86/xen/time.c b/trunk/arch/x86/xen/time.c
index 65d75a6be0ba..c9f7cda48ed7 100644
--- a/trunk/arch/x86/xen/time.c
+++ b/trunk/arch/x86/xen/time.c
@@ -437,7 +437,7 @@ void xen_setup_timer(int cpu)
 	evt = &per_cpu(xen_clock_events, cpu);
 	memcpy(evt, xen_clockevent, sizeof(*evt));
 
-	evt->cpumask = cpumask_of(cpu);
+	evt->cpumask = cpumask_of_cpu(cpu);
 	evt->irq = irq;
 
 	setup_runstate_info(cpu);
diff --git a/trunk/arch/x86/xen/xen-ops.h b/trunk/arch/x86/xen/xen-ops.h
index c1f8faf0a2c5..9e1afae8461f 100644
--- a/trunk/arch/x86/xen/xen-ops.h
+++ b/trunk/arch/x86/xen/xen-ops.h
@@ -58,7 +58,7 @@ void __init xen_init_spinlocks(void);
 __cpuinit void xen_init_lock_cpu(int cpu);
 void xen_uninit_lock_cpu(int cpu);
 
-extern cpumask_var_t xen_cpu_initialized_map;
+extern cpumask_t xen_cpu_initialized_map;
 #else
 static inline void xen_smp_init(void) {}
 #endif
diff --git a/trunk/drivers/base/cpu.c b/trunk/drivers/base/cpu.c
index 4259072f5bd0..64f5d54f7edc 100644
--- a/trunk/drivers/base/cpu.c
+++ b/trunk/drivers/base/cpu.c
@@ -109,7 +109,7 @@ static SYSDEV_ATTR(crash_notes, 0400, show_crash_notes, NULL);
  */
 static ssize_t print_cpus_map(char *buf, cpumask_t *map)
 {
-	int n = cpulist_scnprintf(buf, PAGE_SIZE-2, map);
+	int n = cpulist_scnprintf(buf, PAGE_SIZE-2, *map);
 
 	buf[n++] = '\n';
 	buf[n] = '\0';
diff --git a/trunk/drivers/base/node.c b/trunk/drivers/base/node.c
index 91636cd8b6c9..f5207090885a 100644
--- a/trunk/drivers/base/node.c
+++ b/trunk/drivers/base/node.c
@@ -30,8 +30,8 @@ static ssize_t node_read_cpumap(struct sys_device *dev, int type, char *buf)
 	BUILD_BUG_ON((NR_CPUS/32 * 9) > (PAGE_SIZE-1));
 
 	len = type?
-		cpulist_scnprintf(buf, PAGE_SIZE-2, mask) :
-		cpumask_scnprintf(buf, PAGE_SIZE-2, mask);
+		cpulist_scnprintf(buf, PAGE_SIZE-2, *mask):
+		cpumask_scnprintf(buf, PAGE_SIZE-2, *mask);
  	buf[len++] = '\n';
  	buf[len] = '\0';
 	return len;
diff --git a/trunk/drivers/base/topology.c b/trunk/drivers/base/topology.c
index a8bc1cbcfa7c..199cd97e32e6 100644
--- a/trunk/drivers/base/topology.c
+++ b/trunk/drivers/base/topology.c
@@ -49,8 +49,8 @@ static ssize_t show_cpumap(int type, cpumask_t *mask, char *buf)
 
 	if (len > 1) {
 		n = type?
-			cpulist_scnprintf(buf, len-2, mask) :
-			cpumask_scnprintf(buf, len-2, mask);
+			cpulist_scnprintf(buf, len-2, *mask):
+			cpumask_scnprintf(buf, len-2, *mask);
 		buf[n++] = '\n';
 		buf[n] = '\0';
 	}
diff --git a/trunk/drivers/clocksource/tcb_clksrc.c b/trunk/drivers/clocksource/tcb_clksrc.c
index 254f1064d973..f450588e5858 100644
--- a/trunk/drivers/clocksource/tcb_clksrc.c
+++ b/trunk/drivers/clocksource/tcb_clksrc.c
@@ -154,6 +154,7 @@ static struct tc_clkevt_device clkevt = {
 		.shift		= 32,
 		/* Should be lower than at91rm9200's system timer */
 		.rating		= 125,
+		.cpumask	= CPU_MASK_CPU0,
 		.set_next_event	= tc_next_event,
 		.set_mode	= tc_mode,
 	},
@@ -194,7 +195,6 @@ static void __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx)
 	clkevt.clkevt.max_delta_ns
 		= clockevent_delta2ns(0xffff, &clkevt.clkevt);
 	clkevt.clkevt.min_delta_ns = clockevent_delta2ns(1, &clkevt.clkevt) + 1;
-	clkevt.clkevt.cpumask = cpumask_of(0);
 
 	setup_irq(irq, &tc_irqaction);
 
diff --git a/trunk/drivers/lguest/interrupts_and_traps.c b/trunk/drivers/lguest/interrupts_and_traps.c
index 415fab0125ac..a1039068f95c 100644
--- a/trunk/drivers/lguest/interrupts_and_traps.c
+++ b/trunk/drivers/lguest/interrupts_and_traps.c
@@ -222,16 +222,11 @@ bool check_syscall_vector(struct lguest *lg)
 int init_interrupts(void)
 {
 	/* If they want some strange system call vector, reserve it now */
-	if (syscall_vector != SYSCALL_VECTOR) {
-		if (test_bit(syscall_vector, used_vectors) ||
-		    vector_used_by_percpu_irq(syscall_vector)) {
-			printk(KERN_ERR "lg: couldn't reserve syscall %u\n",
-				 syscall_vector);
-			return -EBUSY;
-		}
-		set_bit(syscall_vector, used_vectors);
+	if (syscall_vector != SYSCALL_VECTOR
+	    && test_and_set_bit(syscall_vector, used_vectors)) {
+		printk("lg: couldn't reserve syscall %u\n", syscall_vector);
+		return -EBUSY;
 	}
-
 	return 0;
 }
 
diff --git a/trunk/drivers/parisc/iosapic.c b/trunk/drivers/parisc/iosapic.c
index 9dedbbd218c3..7beffcab2745 100644
--- a/trunk/drivers/parisc/iosapic.c
+++ b/trunk/drivers/parisc/iosapic.c
@@ -704,17 +704,16 @@ static unsigned int iosapic_startup_irq(unsigned int irq)
 }
 
 #ifdef CONFIG_SMP
-static void iosapic_set_affinity_irq(unsigned int irq,
-				     const struct cpumask *dest)
+static void iosapic_set_affinity_irq(unsigned int irq, cpumask_t dest)
 {
 	struct vector_info *vi = iosapic_get_vector(irq);
 	u32 d0, d1, dummy_d0;
 	unsigned long flags;
 
-	if (cpu_check_affinity(irq, dest))
+	if (cpu_check_affinity(irq, &dest))
 		return;
 
-	vi->txn_addr = txn_affinity_addr(irq, cpumask_first(dest));
+	vi->txn_addr = txn_affinity_addr(irq, first_cpu(dest));
 
 	spin_lock_irqsave(&iosapic_lock, flags);
 	/* d1 contains the destination CPU, so only want to set that
diff --git a/trunk/drivers/pci/hotplug/cpqphp_core.c b/trunk/drivers/pci/hotplug/cpqphp_core.c
index c2e1bcbb28a7..8514c3a1746a 100644
--- a/trunk/drivers/pci/hotplug/cpqphp_core.c
+++ b/trunk/drivers/pci/hotplug/cpqphp_core.c
@@ -45,7 +45,7 @@
 
 #include "cpqphp.h"
 #include "cpqphp_nvram.h"
-#include <asm/pci_x86.h>
+#include "../../../arch/x86/pci/pci.h"	/* horrible hack showing how processor dependent we are... */
 
 
 /* Global variables */
diff --git a/trunk/drivers/pci/hotplug/cpqphp_pci.c b/trunk/drivers/pci/hotplug/cpqphp_pci.c
index df146be9d2e9..09021930589f 100644
--- a/trunk/drivers/pci/hotplug/cpqphp_pci.c
+++ b/trunk/drivers/pci/hotplug/cpqphp_pci.c
@@ -37,7 +37,7 @@
 #include "../pci.h"
 #include "cpqphp.h"
 #include "cpqphp_nvram.h"
-#include <asm/pci_x86.h>
+#include "../../../arch/x86/pci/pci.h"	/* horrible hack showing how processor dependent we are... */
 
 
 u8 cpqhp_nic_irq;
diff --git a/trunk/drivers/pci/hotplug/ibmphp_core.c b/trunk/drivers/pci/hotplug/ibmphp_core.c
index dd18f857dfb0..633e743442ac 100644
--- a/trunk/drivers/pci/hotplug/ibmphp_core.c
+++ b/trunk/drivers/pci/hotplug/ibmphp_core.c
@@ -35,7 +35,7 @@
 #include <linux/delay.h>
 #include <linux/wait.h>
 #include "../pci.h"
-#include <asm/pci_x86.h>		/* for struct irq_routing_table */
+#include "../../../arch/x86/pci/pci.h"	/* for struct irq_routing_table */
 #include "ibmphp.h"
 
 #define attn_on(sl)  ibmphp_hpc_writeslot (sl, HPC_SLOT_ATTNON)
diff --git a/trunk/drivers/pci/pci-sysfs.c b/trunk/drivers/pci/pci-sysfs.c
index c88485860a0a..5d72866897a8 100644
--- a/trunk/drivers/pci/pci-sysfs.c
+++ b/trunk/drivers/pci/pci-sysfs.c
@@ -74,7 +74,7 @@ static ssize_t local_cpus_show(struct device *dev,
 	int len;
 
 	mask = pcibus_to_cpumask(to_pci_dev(dev)->bus);
-	len = cpumask_scnprintf(buf, PAGE_SIZE-2, &mask);
+	len = cpumask_scnprintf(buf, PAGE_SIZE-2, mask);
 	buf[len++] = '\n';
 	buf[len] = '\0';
 	return len;
@@ -88,7 +88,7 @@ static ssize_t local_cpulist_show(struct device *dev,
 	int len;
 
 	mask = pcibus_to_cpumask(to_pci_dev(dev)->bus);
-	len = cpulist_scnprintf(buf, PAGE_SIZE-2, &mask);
+	len = cpulist_scnprintf(buf, PAGE_SIZE-2, mask);
 	buf[len++] = '\n';
 	buf[len] = '\0';
 	return len;
diff --git a/trunk/drivers/pci/probe.c b/trunk/drivers/pci/probe.c
index 5b3f5937ecf5..003a9b3c293f 100644
--- a/trunk/drivers/pci/probe.c
+++ b/trunk/drivers/pci/probe.c
@@ -55,8 +55,8 @@ static ssize_t pci_bus_show_cpuaffinity(struct device *dev,
 
 	cpumask = pcibus_to_cpumask(to_pci_bus(dev));
 	ret = type?
-		cpulist_scnprintf(buf, PAGE_SIZE-2, &cpumask) :
-		cpumask_scnprintf(buf, PAGE_SIZE-2, &cpumask);
+		cpulist_scnprintf(buf, PAGE_SIZE-2, cpumask):
+		cpumask_scnprintf(buf, PAGE_SIZE-2, cpumask);
 	buf[ret++] = '\n';
 	buf[ret] = '\0';
 	return ret;
diff --git a/trunk/drivers/xen/events.c b/trunk/drivers/xen/events.c
index eb0dfdeaa949..e26733a9df21 100644
--- a/trunk/drivers/xen/events.c
+++ b/trunk/drivers/xen/events.c
@@ -585,7 +585,7 @@ void rebind_evtchn_irq(int evtchn, int irq)
 	spin_unlock(&irq_mapping_update_lock);
 
 	/* new event channels are always bound to cpu 0 */
-	irq_set_affinity(irq, cpumask_of(0));
+	irq_set_affinity(irq, cpumask_of_cpu(0));
 
 	/* Unmask the event channel. */
 	enable_irq(irq);
@@ -614,9 +614,9 @@ static void rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
 }
 
 
-static void set_affinity_irq(unsigned irq, const struct cpumask *dest)
+static void set_affinity_irq(unsigned irq, cpumask_t dest)
 {
-	unsigned tcpu = cpumask_first(dest);
+	unsigned tcpu = first_cpu(dest);
 	rebind_irq_to_cpu(irq, tcpu);
 }
 
diff --git a/trunk/fs/anon_inodes.c b/trunk/fs/anon_inodes.c
index 3bbdb9d02376..c16d9be1b017 100644
--- a/trunk/fs/anon_inodes.c
+++ b/trunk/fs/anon_inodes.c
@@ -79,12 +79,9 @@ int anon_inode_getfd(const char *name, const struct file_operations *fops,
 	if (IS_ERR(anon_inode_inode))
 		return -ENODEV;
 
-	if (fops->owner && !try_module_get(fops->owner))
-		return -ENOENT;
-
 	error = get_unused_fd_flags(flags);
 	if (error < 0)
-		goto err_module;
+		return error;
 	fd = error;
 
 	/*
@@ -131,8 +128,6 @@ int anon_inode_getfd(const char *name, const struct file_operations *fops,
 	dput(dentry);
 err_put_unused_fd:
 	put_unused_fd(fd);
-err_module:
-	module_put(fops->owner);
 	return error;
 }
 EXPORT_SYMBOL_GPL(anon_inode_getfd);
diff --git a/trunk/include/asm-generic/topology.h b/trunk/include/asm-generic/topology.h
index 0e9e2bc0ee96..54bbf6e04ee8 100644
--- a/trunk/include/asm-generic/topology.h
+++ b/trunk/include/asm-generic/topology.h
@@ -40,9 +40,6 @@
 #ifndef node_to_cpumask
 #define node_to_cpumask(node)	((void)node, cpu_online_map)
 #endif
-#ifndef cpumask_of_node
-#define cpumask_of_node(node)	((void)node, cpu_online_mask)
-#endif
 #ifndef node_to_first_cpu
 #define node_to_first_cpu(node)	((void)(node),0)
 #endif
@@ -57,18 +54,9 @@
 				)
 #endif
 
-#ifndef cpumask_of_pcibus
-#define cpumask_of_pcibus(bus)	(pcibus_to_node(bus) == -1 ?		\
-				 cpu_all_mask :				\
-				 cpumask_of_node(pcibus_to_node(bus)))
-#endif
-
 #endif	/* CONFIG_NUMA */
 
-/*
- * returns pointer to cpumask for specified node
- * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)"
- */
+/* returns pointer to cpumask for specified node */
 #ifndef node_to_cpumask_ptr
 
 #define	node_to_cpumask_ptr(v, node) 					\
diff --git a/trunk/include/asm-m32r/smp.h b/trunk/include/asm-m32r/smp.h
index b96a6d2ffbc3..c5dd66916692 100644
--- a/trunk/include/asm-m32r/smp.h
+++ b/trunk/include/asm-m32r/smp.h
@@ -63,6 +63,8 @@ extern volatile int cpu_2_physid[NR_CPUS];
 #define raw_smp_processor_id()	(current_thread_info()->cpu)
 
 extern cpumask_t cpu_callout_map;
+extern cpumask_t cpu_possible_map;
+extern cpumask_t cpu_present_map;
 
 static __inline__ int hard_smp_processor_id(void)
 {
diff --git a/trunk/include/linux/clockchips.h b/trunk/include/linux/clockchips.h
index cea153697ec7..ed3a5d473e52 100644
--- a/trunk/include/linux/clockchips.h
+++ b/trunk/include/linux/clockchips.h
@@ -82,13 +82,13 @@ struct clock_event_device {
 	int			shift;
 	int			rating;
 	int			irq;
-	const struct cpumask	*cpumask;
+	cpumask_t		cpumask;
 	int			(*set_next_event)(unsigned long evt,
 						  struct clock_event_device *);
 	void			(*set_mode)(enum clock_event_mode mode,
 					    struct clock_event_device *);
 	void			(*event_handler)(struct clock_event_device *);
-	void			(*broadcast)(const struct cpumask *mask);
+	void			(*broadcast)(cpumask_t mask);
 	struct list_head	list;
 	enum clock_event_mode	mode;
 	ktime_t			next_event;
diff --git a/trunk/include/linux/compiler-gcc3.h b/trunk/include/linux/compiler-gcc3.h
index 8005effc04f1..2befe6513ce4 100644
--- a/trunk/include/linux/compiler-gcc3.h
+++ b/trunk/include/linux/compiler-gcc3.h
@@ -2,10 +2,6 @@
 #error "Please don't include <linux/compiler-gcc3.h> directly, include <linux/compiler.h> instead."
 #endif
 
-#if __GNUC_MINOR__ < 2
-# error Sorry, your compiler is too old - please upgrade it.
-#endif
-
 #if __GNUC_MINOR__ >= 3
 # define __used			__attribute__((__used__))
 #else
diff --git a/trunk/include/linux/cpumask.h b/trunk/include/linux/cpumask.h
index d4bf52603e6b..21e1dd43e52a 100644
--- a/trunk/include/linux/cpumask.h
+++ b/trunk/include/linux/cpumask.h
@@ -339,6 +339,36 @@ extern cpumask_t cpu_mask_all;
 #endif
 #define	CPUMASK_PTR(v, m) 	cpumask_t *v = &(m->v)
 
+#define cpumask_scnprintf(buf, len, src) \
+			__cpumask_scnprintf((buf), (len), &(src), NR_CPUS)
+static inline int __cpumask_scnprintf(char *buf, int len,
+					const cpumask_t *srcp, int nbits)
+{
+	return bitmap_scnprintf(buf, len, srcp->bits, nbits);
+}
+
+#define cpumask_parse_user(ubuf, ulen, dst) \
+			__cpumask_parse_user((ubuf), (ulen), &(dst), NR_CPUS)
+static inline int __cpumask_parse_user(const char __user *buf, int len,
+					cpumask_t *dstp, int nbits)
+{
+	return bitmap_parse_user(buf, len, dstp->bits, nbits);
+}
+
+#define cpulist_scnprintf(buf, len, src) \
+			__cpulist_scnprintf((buf), (len), &(src), NR_CPUS)
+static inline int __cpulist_scnprintf(char *buf, int len,
+					const cpumask_t *srcp, int nbits)
+{
+	return bitmap_scnlistprintf(buf, len, srcp->bits, nbits);
+}
+
+#define cpulist_parse(buf, dst) __cpulist_parse((buf), &(dst), NR_CPUS)
+static inline int __cpulist_parse(const char *buf, cpumask_t *dstp, int nbits)
+{
+	return bitmap_parselist(buf, dstp->bits, nbits);
+}
+
 #define cpu_remap(oldbit, old, new) \
 		__cpu_remap((oldbit), &(old), &(new), NR_CPUS)
 static inline int __cpu_remap(int oldbit,
@@ -510,6 +540,9 @@ extern cpumask_t cpu_active_map;
 	[BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD	\
 }
 
+/* This produces more efficient code. */
+#define nr_cpumask_bits	NR_CPUS
+
 #else /* NR_CPUS > BITS_PER_LONG */
 
 #define CPU_BITS_ALL						\
@@ -517,15 +550,9 @@ extern cpumask_t cpu_active_map;
 	[0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL,		\
 	[BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD		\
 }
-#endif /* NR_CPUS > BITS_PER_LONG */
 
-#ifdef CONFIG_CPUMASK_OFFSTACK
-/* Assuming NR_CPUS is huge, a runtime limit is more efficient.  Also,
- * not all bits may be allocated. */
 #define nr_cpumask_bits	nr_cpu_ids
-#else
-#define nr_cpumask_bits	NR_CPUS
-#endif
+#endif /* NR_CPUS > BITS_PER_LONG */
 
 /* verify cpu argument to cpumask_* operators */
 static inline unsigned int cpumask_check(unsigned int cpu)
@@ -918,63 +945,6 @@ static inline void cpumask_copy(struct cpumask *dstp,
  */
 #define cpumask_of(cpu) (get_cpu_mask(cpu))
 
-/**
- * cpumask_scnprintf - print a cpumask into a string as comma-separated hex
- * @buf: the buffer to sprintf into
- * @len: the length of the buffer
- * @srcp: the cpumask to print
- *
- * If len is zero, returns zero.  Otherwise returns the length of the
- * (nul-terminated) @buf string.
- */
-static inline int cpumask_scnprintf(char *buf, int len,
-				    const struct cpumask *srcp)
-{
-	return bitmap_scnprintf(buf, len, srcp->bits, nr_cpumask_bits);
-}
-
-/**
- * cpumask_parse_user - extract a cpumask from a user string
- * @buf: the buffer to extract from
- * @len: the length of the buffer
- * @dstp: the cpumask to set.
- *
- * Returns -errno, or 0 for success.
- */
-static inline int cpumask_parse_user(const char __user *buf, int len,
-				     struct cpumask *dstp)
-{
-	return bitmap_parse_user(buf, len, dstp->bits, nr_cpumask_bits);
-}
-
-/**
- * cpulist_scnprintf - print a cpumask into a string as comma-separated list
- * @buf: the buffer to sprintf into
- * @len: the length of the buffer
- * @srcp: the cpumask to print
- *
- * If len is zero, returns zero.  Otherwise returns the length of the
- * (nul-terminated) @buf string.
- */
-static inline int cpulist_scnprintf(char *buf, int len,
-				    const struct cpumask *srcp)
-{
-	return bitmap_scnlistprintf(buf, len, srcp->bits, nr_cpumask_bits);
-}
-
-/**
- * cpulist_parse_user - extract a cpumask from a user string of ranges
- * @buf: the buffer to extract from
- * @len: the length of the buffer
- * @dstp: the cpumask to set.
- *
- * Returns -errno, or 0 for success.
- */
-static inline int cpulist_parse(const char *buf, struct cpumask *dstp)
-{
-	return bitmap_parselist(buf, dstp->bits, nr_cpumask_bits);
-}
-
 /**
  * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask *
  * @bitmap: the bitmap
diff --git a/trunk/include/linux/interrupt.h b/trunk/include/linux/interrupt.h
index 990355fbc54e..8cc8ef47f5b6 100644
--- a/trunk/include/linux/interrupt.h
+++ b/trunk/include/linux/interrupt.h
@@ -111,13 +111,13 @@ extern void enable_irq(unsigned int irq);
 
 extern cpumask_t irq_default_affinity;
 
-extern int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask);
+extern int irq_set_affinity(unsigned int irq, cpumask_t cpumask);
 extern int irq_can_set_affinity(unsigned int irq);
 extern int irq_select_affinity(unsigned int irq);
 
 #else /* CONFIG_SMP */
 
-static inline int irq_set_affinity(unsigned int irq, const struct cpumask *m)
+static inline int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
 {
 	return -EINVAL;
 }
diff --git a/trunk/include/linux/irq.h b/trunk/include/linux/irq.h
index f899b502f186..d64a6d49bdef 100644
--- a/trunk/include/linux/irq.h
+++ b/trunk/include/linux/irq.h
@@ -113,8 +113,7 @@ struct irq_chip {
 	void		(*eoi)(unsigned int irq);
 
 	void		(*end)(unsigned int irq);
-	void		(*set_affinity)(unsigned int irq,
-					const struct cpumask *dest);
+	void		(*set_affinity)(unsigned int irq, cpumask_t dest);
 	int		(*retrigger)(unsigned int irq);
 	int		(*set_type)(unsigned int irq, unsigned int flow_type);
 	int		(*set_wake)(unsigned int irq, unsigned int on);
diff --git a/trunk/include/linux/kvm.h b/trunk/include/linux/kvm.h
index 35525ac63337..f18b86fa8655 100644
--- a/trunk/include/linux/kvm.h
+++ b/trunk/include/linux/kvm.h
@@ -83,7 +83,6 @@ struct kvm_irqchip {
 #define KVM_EXIT_S390_SIEIC       13
 #define KVM_EXIT_S390_RESET       14
 #define KVM_EXIT_DCR              15
-#define KVM_EXIT_NMI              16
 
 /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
 struct kvm_run {
@@ -388,14 +387,6 @@ struct kvm_trace_rec {
 #define KVM_CAP_DEVICE_ASSIGNMENT 17
 #endif
 #define KVM_CAP_IOMMU 18
-#if defined(CONFIG_X86)
-#define KVM_CAP_DEVICE_MSI 20
-#endif
-/* Bug in KVM_SET_USER_MEMORY_REGION fixed: */
-#define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21
-#if defined(CONFIG_X86)
-#define KVM_CAP_USER_NMI 22
-#endif
 
 /*
  * ioctls for VM fds
@@ -467,8 +458,6 @@ struct kvm_trace_rec {
 #define KVM_S390_INITIAL_RESET    _IO(KVMIO,  0x97)
 #define KVM_GET_MP_STATE          _IOR(KVMIO,  0x98, struct kvm_mp_state)
 #define KVM_SET_MP_STATE          _IOW(KVMIO,  0x99, struct kvm_mp_state)
-/* Available with KVM_CAP_NMI */
-#define KVM_NMI                   _IO(KVMIO,  0x9a)
 
 #define KVM_TRC_INJ_VIRQ         (KVM_TRC_HANDLER + 0x02)
 #define KVM_TRC_REDELIVER_EVT    (KVM_TRC_HANDLER + 0x03)
@@ -511,17 +500,10 @@ struct kvm_assigned_irq {
 	__u32 guest_irq;
 	__u32 flags;
 	union {
-		struct {
-			__u32 addr_lo;
-			__u32 addr_hi;
-			__u32 data;
-		} guest_msi;
 		__u32 reserved[12];
 	};
 };
 
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
 
-#define KVM_DEV_IRQ_ASSIGN_ENABLE_MSI	(1 << 0)
-
 #endif
diff --git a/trunk/include/linux/kvm_host.h b/trunk/include/linux/kvm_host.h
index eafabd5c66b2..bb92be2153bc 100644
--- a/trunk/include/linux/kvm_host.h
+++ b/trunk/include/linux/kvm_host.h
@@ -16,7 +16,6 @@
 #include <linux/mm.h>
 #include <linux/preempt.h>
 #include <linux/marker.h>
-#include <linux/msi.h>
 #include <asm/signal.h>
 
 #include <linux/kvm.h>
@@ -307,14 +306,8 @@ struct kvm_assigned_dev_kernel {
 	int host_busnr;
 	int host_devfn;
 	int host_irq;
-	bool host_irq_disabled;
 	int guest_irq;
-	struct msi_msg guest_msi;
-#define KVM_ASSIGNED_DEV_GUEST_INTX	(1 << 0)
-#define KVM_ASSIGNED_DEV_GUEST_MSI	(1 << 1)
-#define KVM_ASSIGNED_DEV_HOST_INTX	(1 << 8)
-#define KVM_ASSIGNED_DEV_HOST_MSI	(1 << 9)
-	unsigned long irq_requested_type;
+	int irq_requested;
 	int irq_source_id;
 	struct pci_dev *dev;
 	struct kvm *kvm;
@@ -323,7 +316,8 @@ void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level);
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi);
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
 				   struct kvm_irq_ack_notifier *kian);
-void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian);
+void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
+				     struct kvm_irq_ack_notifier *kian);
 int kvm_request_irq_source_id(struct kvm *kvm);
 void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
 
diff --git a/trunk/include/linux/sched.h b/trunk/include/linux/sched.h
index 158d53d07765..8395e715809d 100644
--- a/trunk/include/linux/sched.h
+++ b/trunk/include/linux/sched.h
@@ -250,7 +250,7 @@ extern void init_idle_bootup_task(struct task_struct *idle);
 extern int runqueue_is_locked(void);
 extern void task_rq_unlock_wait(struct task_struct *p);
 
-extern cpumask_var_t nohz_cpu_mask;
+extern cpumask_t nohz_cpu_mask;
 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
 extern int select_nohz_load_balancer(int cpu);
 #else
@@ -758,51 +758,20 @@ enum cpu_idle_type {
 #define SD_SERIALIZE		1024	/* Only a single load balancing instance */
 #define SD_WAKE_IDLE_FAR	2048	/* Gain latency sacrificing cache hit */
 
-enum powersavings_balance_level {
-	POWERSAVINGS_BALANCE_NONE = 0,  /* No power saving load balance */
-	POWERSAVINGS_BALANCE_BASIC,	/* Fill one thread/core/package
-					 * first for long running threads
-					 */
-	POWERSAVINGS_BALANCE_WAKEUP,	/* Also bias task wakeups to semi-idle
-					 * cpu package for power savings
-					 */
-	MAX_POWERSAVINGS_BALANCE_LEVELS
-};
+#define BALANCE_FOR_MC_POWER	\
+	(sched_smt_power_savings ? SD_POWERSAVINGS_BALANCE : 0)
 
-extern int sched_mc_power_savings, sched_smt_power_savings;
+#define BALANCE_FOR_PKG_POWER	\
+	((sched_mc_power_savings || sched_smt_power_savings) ?	\
+	 SD_POWERSAVINGS_BALANCE : 0)
 
-static inline int sd_balance_for_mc_power(void)
-{
-	if (sched_smt_power_savings)
-		return SD_POWERSAVINGS_BALANCE;
+#define test_sd_parent(sd, flag)	((sd->parent &&		\
+					 (sd->parent->flags & flag)) ? 1 : 0)
 
-	return 0;
-}
-
-static inline int sd_balance_for_package_power(void)
-{
-	if (sched_mc_power_savings | sched_smt_power_savings)
-		return SD_POWERSAVINGS_BALANCE;
-
-	return 0;
-}
-
-/*
- * Optimise SD flags for power savings:
- * SD_BALANCE_NEWIDLE helps agressive task consolidation and power savings.
- * Keep default SD flags if sched_{smt,mc}_power_saving=0
- */
-
-static inline int sd_power_saving_flags(void)
-{
-	if (sched_mc_power_savings | sched_smt_power_savings)
-		return SD_BALANCE_NEWIDLE;
-
-	return 0;
-}
 
 struct sched_group {
 	struct sched_group *next;	/* Must be a circular list */
+	cpumask_t cpumask;
 
 	/*
 	 * CPU power of this group, SCHED_LOAD_SCALE being max power for a
@@ -815,15 +784,8 @@ struct sched_group {
 	 * (see include/linux/reciprocal_div.h)
 	 */
 	u32 reciprocal_cpu_power;
-
-	unsigned long cpumask[];
 };
 
-static inline struct cpumask *sched_group_cpus(struct sched_group *sg)
-{
-	return to_cpumask(sg->cpumask);
-}
-
 enum sched_domain_level {
 	SD_LV_NONE = 0,
 	SD_LV_SIBLING,
@@ -847,6 +809,7 @@ struct sched_domain {
 	struct sched_domain *parent;	/* top domain must be null terminated */
 	struct sched_domain *child;	/* bottom domain must be null terminated */
 	struct sched_group *groups;	/* the balancing groups of the domain */
+	cpumask_t span;			/* span of all CPUs in this domain */
 	unsigned long min_interval;	/* Minimum balance interval ms */
 	unsigned long max_interval;	/* Maximum balance interval ms */
 	unsigned int busy_factor;	/* less balancing by factor if busy */
@@ -901,35 +864,18 @@ struct sched_domain {
 #ifdef CONFIG_SCHED_DEBUG
 	char *name;
 #endif
-
-	/* span of all CPUs in this domain */
-	unsigned long span[];
 };
 
-static inline struct cpumask *sched_domain_span(struct sched_domain *sd)
-{
-	return to_cpumask(sd->span);
-}
-
-extern void partition_sched_domains(int ndoms_new, struct cpumask *doms_new,
+extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
 				    struct sched_domain_attr *dattr_new);
 extern int arch_reinit_sched_domains(void);
 
-/* Test a flag in parent sched domain */
-static inline int test_sd_parent(struct sched_domain *sd, int flag)
-{
-	if (sd->parent && (sd->parent->flags & flag))
-		return 1;
-
-	return 0;
-}
-
 #else /* CONFIG_SMP */
 
 struct sched_domain_attr;
 
 static inline void
-partition_sched_domains(int ndoms_new, struct cpumask *doms_new,
+partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
 			struct sched_domain_attr *dattr_new)
 {
 }
@@ -980,7 +926,7 @@ struct sched_class {
 	void (*task_wake_up) (struct rq *this_rq, struct task_struct *task);
 
 	void (*set_cpus_allowed)(struct task_struct *p,
-				 const struct cpumask *newmask);
+				 const cpumask_t *newmask);
 
 	void (*rq_online)(struct rq *rq);
 	void (*rq_offline)(struct rq *rq);
@@ -1633,12 +1579,12 @@ extern cputime_t task_gtime(struct task_struct *p);
 
 #ifdef CONFIG_SMP
 extern int set_cpus_allowed_ptr(struct task_struct *p,
-				const struct cpumask *new_mask);
+				const cpumask_t *new_mask);
 #else
 static inline int set_cpus_allowed_ptr(struct task_struct *p,
-				       const struct cpumask *new_mask)
+				       const cpumask_t *new_mask)
 {
-	if (!cpumask_test_cpu(0, new_mask))
+	if (!cpu_isset(0, *new_mask))
 		return -EINVAL;
 	return 0;
 }
@@ -2249,8 +2195,10 @@ __trace_special(void *__tr, void *__data,
 }
 #endif
 
-extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
-extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
+extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask);
+extern long sched_getaffinity(pid_t pid, cpumask_t *mask);
+
+extern int sched_mc_power_savings, sched_smt_power_savings;
 
 extern void normalize_rt_tasks(void);
 
diff --git a/trunk/include/linux/topology.h b/trunk/include/linux/topology.h
index e632d29f0544..0c5b5ac36d8e 100644
--- a/trunk/include/linux/topology.h
+++ b/trunk/include/linux/topology.h
@@ -125,8 +125,7 @@ int arch_update_cpu_topology(void);
 				| SD_WAKE_AFFINE	\
 				| SD_WAKE_BALANCE	\
 				| SD_SHARE_PKG_RESOURCES\
-				| sd_balance_for_mc_power()\
-				| sd_power_saving_flags(),\
+				| BALANCE_FOR_MC_POWER,	\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
 }
@@ -151,8 +150,7 @@ int arch_update_cpu_topology(void);
 				| SD_BALANCE_FORK	\
 				| SD_WAKE_AFFINE	\
 				| SD_WAKE_BALANCE	\
-				| sd_balance_for_package_power()\
-				| sd_power_saving_flags(),\
+				| BALANCE_FOR_PKG_POWER,\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
 }
diff --git a/trunk/init/Kconfig b/trunk/init/Kconfig
index f6281711166d..13627191a60d 100644
--- a/trunk/init/Kconfig
+++ b/trunk/init/Kconfig
@@ -924,15 +924,6 @@ config KMOD
 
 endif # MODULES
 
-config INIT_ALL_POSSIBLE
-	bool
-	help
-	  Back when each arch used to define their own cpu_online_map and
-	  cpu_possible_map, some of them chose to initialize cpu_possible_map
-	  with all 1s, and others with all 0s.  When they were centralised,
-	  it was better to provide this option than to break all the archs
-	  and have several arch maintainers persuing me down dark alleys.
-
 config STOP_MACHINE
 	bool
 	default y
diff --git a/trunk/kernel/cpu.c b/trunk/kernel/cpu.c
index bae131a1211b..8ea32e8d68b0 100644
--- a/trunk/kernel/cpu.c
+++ b/trunk/kernel/cpu.c
@@ -24,20 +24,19 @@
 cpumask_t cpu_present_map __read_mostly;
 EXPORT_SYMBOL(cpu_present_map);
 
+#ifndef CONFIG_SMP
+
 /*
  * Represents all cpu's that are currently online.
  */
-cpumask_t cpu_online_map __read_mostly;
+cpumask_t cpu_online_map __read_mostly = CPU_MASK_ALL;
 EXPORT_SYMBOL(cpu_online_map);
 
-#ifdef CONFIG_INIT_ALL_POSSIBLE
 cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL;
-#else
-cpumask_t cpu_possible_map __read_mostly;
-#endif
 EXPORT_SYMBOL(cpu_possible_map);
 
-#ifdef CONFIG_SMP
+#else /* CONFIG_SMP */
+
 /* Serializes the updates to cpu_online_map, cpu_present_map */
 static DEFINE_MUTEX(cpu_add_remove_lock);
 
diff --git a/trunk/kernel/cpuset.c b/trunk/kernel/cpuset.c
index 39c1a4c1c5a9..96c0ba13b8cd 100644
--- a/trunk/kernel/cpuset.c
+++ b/trunk/kernel/cpuset.c
@@ -896,7 +896,7 @@ static int update_cpumask(struct cpuset *cs, const char *buf)
 	if (!*buf) {
 		cpus_clear(trialcs.cpus_allowed);
 	} else {
-		retval = cpulist_parse(buf, &trialcs.cpus_allowed);
+		retval = cpulist_parse(buf, trialcs.cpus_allowed);
 		if (retval < 0)
 			return retval;
 
@@ -1482,7 +1482,7 @@ static int cpuset_sprintf_cpulist(char *page, struct cpuset *cs)
 	mask = cs->cpus_allowed;
 	mutex_unlock(&callback_mutex);
 
-	return cpulist_scnprintf(page, PAGE_SIZE, &mask);
+	return cpulist_scnprintf(page, PAGE_SIZE, mask);
 }
 
 static int cpuset_sprintf_memlist(char *page, struct cpuset *cs)
diff --git a/trunk/kernel/irq/chip.c b/trunk/kernel/irq/chip.c
index f63c706d25e1..6eb3c7952b64 100644
--- a/trunk/kernel/irq/chip.c
+++ b/trunk/kernel/irq/chip.c
@@ -46,7 +46,7 @@ void dynamic_irq_init(unsigned int irq)
 	desc->irq_count = 0;
 	desc->irqs_unhandled = 0;
 #ifdef CONFIG_SMP
-	cpumask_setall(&desc->affinity);
+	cpus_setall(desc->affinity);
 #endif
 	spin_unlock_irqrestore(&desc->lock, flags);
 }
diff --git a/trunk/kernel/irq/manage.c b/trunk/kernel/irq/manage.c
index 61c4a9b62165..540f6c49f3fa 100644
--- a/trunk/kernel/irq/manage.c
+++ b/trunk/kernel/irq/manage.c
@@ -79,7 +79,7 @@ int irq_can_set_affinity(unsigned int irq)
  *	@cpumask:	cpumask
  *
  */
-int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
+int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
@@ -91,14 +91,14 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
 
 #ifdef CONFIG_GENERIC_PENDING_IRQ
 	if (desc->status & IRQ_MOVE_PCNTXT || desc->status & IRQ_DISABLED) {
-		cpumask_copy(&desc->affinity, cpumask);
+		desc->affinity = cpumask;
 		desc->chip->set_affinity(irq, cpumask);
 	} else {
 		desc->status |= IRQ_MOVE_PENDING;
-		cpumask_copy(&desc->pending_mask, cpumask);
+		desc->pending_mask = cpumask;
 	}
 #else
-	cpumask_copy(&desc->affinity, cpumask);
+	desc->affinity = cpumask;
 	desc->chip->set_affinity(irq, cpumask);
 #endif
 	desc->status |= IRQ_AFFINITY_SET;
@@ -112,24 +112,26 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
  */
 int do_irq_select_affinity(unsigned int irq, struct irq_desc *desc)
 {
+	cpumask_t mask;
+
 	if (!irq_can_set_affinity(irq))
 		return 0;
 
+	cpus_and(mask, cpu_online_map, irq_default_affinity);
+
 	/*
 	 * Preserve an userspace affinity setup, but make sure that
 	 * one of the targets is online.
 	 */
 	if (desc->status & (IRQ_AFFINITY_SET | IRQ_NO_BALANCING)) {
-		if (cpumask_any_and(&desc->affinity, cpu_online_mask)
-		    < nr_cpu_ids)
-			goto set_affinity;
+		if (cpus_intersects(desc->affinity, cpu_online_map))
+			mask = desc->affinity;
 		else
 			desc->status &= ~IRQ_AFFINITY_SET;
 	}
 
-	cpumask_and(&desc->affinity, cpu_online_mask, &irq_default_affinity);
-set_affinity:
-	desc->chip->set_affinity(irq, &desc->affinity);
+	desc->affinity = mask;
+	desc->chip->set_affinity(irq, mask);
 
 	return 0;
 }
diff --git a/trunk/kernel/irq/migration.c b/trunk/kernel/irq/migration.c
index bd72329e630c..9db681d95814 100644
--- a/trunk/kernel/irq/migration.c
+++ b/trunk/kernel/irq/migration.c
@@ -4,6 +4,7 @@
 void move_masked_irq(int irq)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
+	cpumask_t tmp;
 
 	if (likely(!(desc->status & IRQ_MOVE_PENDING)))
 		return;
@@ -18,7 +19,7 @@ void move_masked_irq(int irq)
 
 	desc->status &= ~IRQ_MOVE_PENDING;
 
-	if (unlikely(cpumask_empty(&desc->pending_mask)))
+	if (unlikely(cpus_empty(desc->pending_mask)))
 		return;
 
 	if (!desc->chip->set_affinity)
@@ -26,6 +27,8 @@ void move_masked_irq(int irq)
 
 	assert_spin_locked(&desc->lock);
 
+	cpus_and(tmp, desc->pending_mask, cpu_online_map);
+
 	/*
 	 * If there was a valid mask to work with, please
 	 * do the disable, re-program, enable sequence.
@@ -38,13 +41,10 @@ void move_masked_irq(int irq)
 	 * For correct operation this depends on the caller
 	 * masking the irqs.
 	 */
-	if (likely(cpumask_any_and(&desc->pending_mask, cpu_online_mask)
-		   < nr_cpu_ids)) {
-		cpumask_and(&desc->affinity,
-			    &desc->pending_mask, cpu_online_mask);
-		desc->chip->set_affinity(irq, &desc->affinity);
+	if (likely(!cpus_empty(tmp))) {
+		desc->chip->set_affinity(irq,tmp);
 	}
-	cpumask_clear(&desc->pending_mask);
+	cpus_clear(desc->pending_mask);
 }
 
 void move_native_irq(int irq)
diff --git a/trunk/kernel/irq/proc.c b/trunk/kernel/irq/proc.c
index d2c0e5ee53c5..f6b3440f05bc 100644
--- a/trunk/kernel/irq/proc.c
+++ b/trunk/kernel/irq/proc.c
@@ -40,42 +40,33 @@ static ssize_t irq_affinity_proc_write(struct file *file,
 		const char __user *buffer, size_t count, loff_t *pos)
 {
 	unsigned int irq = (int)(long)PDE(file->f_path.dentry->d_inode)->data;
-	cpumask_var_t new_value;
+	cpumask_t new_value;
 	int err;
 
 	if (!irq_to_desc(irq)->chip->set_affinity || no_irq_affinity ||
 	    irq_balancing_disabled(irq))
 		return -EIO;
 
-	if (!alloc_cpumask_var(&new_value, GFP_KERNEL))
-		return -ENOMEM;
-
 	err = cpumask_parse_user(buffer, count, new_value);
 	if (err)
-		goto free_cpumask;
+		return err;
 
-	if (!is_affinity_mask_valid(*new_value)) {
-		err = -EINVAL;
-		goto free_cpumask;
-	}
+	if (!is_affinity_mask_valid(new_value))
+		return -EINVAL;
 
 	/*
 	 * Do not allow disabling IRQs completely - it's a too easy
 	 * way to make the system unusable accidentally :-) At least
 	 * one online CPU still has to be targeted.
 	 */
-	if (!cpumask_intersects(new_value, cpu_online_mask)) {
+	if (!cpus_intersects(new_value, cpu_online_map))
 		/* Special case for empty set - allow the architecture
 		   code to set default SMP affinity. */
-		err = irq_select_affinity_usr(irq) ? -EINVAL : count;
-	} else {
-		irq_set_affinity(irq, new_value);
-		err = count;
-	}
+		return irq_select_affinity_usr(irq) ? -EINVAL : count;
+
+	irq_set_affinity(irq, new_value);
 
-free_cpumask:
-	free_cpumask_var(new_value);
-	return err;
+	return count;
 }
 
 static int irq_affinity_proc_open(struct inode *inode, struct file *file)
@@ -104,7 +95,7 @@ static ssize_t default_affinity_write(struct file *file,
 	cpumask_t new_value;
 	int err;
 
-	err = cpumask_parse_user(buffer, count, &new_value);
+	err = cpumask_parse_user(buffer, count, new_value);
 	if (err)
 		return err;
 
diff --git a/trunk/kernel/profile.c b/trunk/kernel/profile.c
index 4cb7d68fed82..60adefb59b5e 100644
--- a/trunk/kernel/profile.c
+++ b/trunk/kernel/profile.c
@@ -442,7 +442,7 @@ void profile_tick(int type)
 static int prof_cpu_mask_read_proc(char *page, char **start, off_t off,
 			int count, int *eof, void *data)
 {
-	int len = cpumask_scnprintf(page, count, (cpumask_t *)data);
+	int len = cpumask_scnprintf(page, count, *(cpumask_t *)data);
 	if (count - len < 2)
 		return -EINVAL;
 	len += sprintf(page + len, "\n");
@@ -456,7 +456,7 @@ static int prof_cpu_mask_write_proc(struct file *file,
 	unsigned long full_count = count, err;
 	cpumask_t new_value;
 
-	err = cpumask_parse_user(buffer, count, &new_value);
+	err = cpumask_parse_user(buffer, count, new_value);
 	if (err)
 		return err;
 
diff --git a/trunk/kernel/rcuclassic.c b/trunk/kernel/rcuclassic.c
index c03ca3e61919..e503a002f330 100644
--- a/trunk/kernel/rcuclassic.c
+++ b/trunk/kernel/rcuclassic.c
@@ -393,7 +393,7 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp)
 		 * unnecessarily.
 		 */
 		smp_mb();
-		cpumask_andnot(&rcp->cpumask, cpu_online_mask, nohz_cpu_mask);
+		cpus_andnot(rcp->cpumask, cpu_online_map, nohz_cpu_mask);
 
 		rcp->signaled = 0;
 	}
diff --git a/trunk/kernel/sched.c b/trunk/kernel/sched.c
index 27ba1d642f0f..fff1c4a20b65 100644
--- a/trunk/kernel/sched.c
+++ b/trunk/kernel/sched.c
@@ -498,26 +498,18 @@ struct rt_rq {
  */
 struct root_domain {
 	atomic_t refcount;
-	cpumask_var_t span;
-	cpumask_var_t online;
+	cpumask_t span;
+	cpumask_t online;
 
 	/*
 	 * The "RT overload" flag: it gets set if a CPU has more than
 	 * one runnable RT task.
 	 */
-	cpumask_var_t rto_mask;
+	cpumask_t rto_mask;
 	atomic_t rto_count;
 #ifdef CONFIG_SMP
 	struct cpupri cpupri;
 #endif
-#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
-	/*
-	 * Preferred wake up cpu nominated by sched_mc balance that will be
-	 * used when most cpus are idle in the system indicating overall very
-	 * low system utilisation. Triggered at POWERSAVINGS_BALANCE_WAKEUP(2)
-	 */
-	unsigned int sched_mc_preferred_wakeup_cpu;
-#endif
 };
 
 /*
@@ -1522,7 +1514,7 @@ static int tg_shares_up(struct task_group *tg, void *data)
 	struct sched_domain *sd = data;
 	int i;
 
-	for_each_cpu(i, sched_domain_span(sd)) {
+	for_each_cpu_mask(i, sd->span) {
 		/*
 		 * If there are currently no tasks on the cpu pretend there
 		 * is one of average load so that when a new task gets to
@@ -1543,7 +1535,7 @@ static int tg_shares_up(struct task_group *tg, void *data)
 	if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE))
 		shares = tg->shares;
 
-	for_each_cpu(i, sched_domain_span(sd))
+	for_each_cpu_mask(i, sd->span)
 		update_group_shares_cpu(tg, i, shares, rq_weight);
 
 	return 0;
@@ -2109,17 +2101,15 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
 		int i;
 
 		/* Skip over this group if it has no CPUs allowed */
-		if (!cpumask_intersects(sched_group_cpus(group),
-					&p->cpus_allowed))
+		if (!cpus_intersects(group->cpumask, p->cpus_allowed))
 			continue;
 
-		local_group = cpumask_test_cpu(this_cpu,
-					       sched_group_cpus(group));
+		local_group = cpu_isset(this_cpu, group->cpumask);
 
 		/* Tally up the load of all CPUs in the group */
 		avg_load = 0;
 
-		for_each_cpu(i, sched_group_cpus(group)) {
+		for_each_cpu_mask_nr(i, group->cpumask) {
 			/* Bias balancing toward cpus of our domain */
 			if (local_group)
 				load = source_load(i, load_idx);
@@ -2151,14 +2141,17 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
  * find_idlest_cpu - find the idlest cpu among the cpus in group.
  */
 static int
-find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
+find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu,
+		cpumask_t *tmp)
 {
 	unsigned long load, min_load = ULONG_MAX;
 	int idlest = -1;
 	int i;
 
 	/* Traverse only the allowed CPUs */
-	for_each_cpu_and(i, sched_group_cpus(group), &p->cpus_allowed) {
+	cpus_and(*tmp, group->cpumask, p->cpus_allowed);
+
+	for_each_cpu_mask_nr(i, *tmp) {
 		load = weighted_cpuload(i);
 
 		if (load < min_load || (load == min_load && i == this_cpu)) {
@@ -2200,6 +2193,7 @@ static int sched_balance_self(int cpu, int flag)
 		update_shares(sd);
 
 	while (sd) {
+		cpumask_t span, tmpmask;
 		struct sched_group *group;
 		int new_cpu, weight;
 
@@ -2208,13 +2202,14 @@ static int sched_balance_self(int cpu, int flag)
 			continue;
 		}
 
+		span = sd->span;
 		group = find_idlest_group(sd, t, cpu);
 		if (!group) {
 			sd = sd->child;
 			continue;
 		}
 
-		new_cpu = find_idlest_cpu(group, t, cpu);
+		new_cpu = find_idlest_cpu(group, t, cpu, &tmpmask);
 		if (new_cpu == -1 || new_cpu == cpu) {
 			/* Now try balancing at a lower domain level of cpu */
 			sd = sd->child;
@@ -2223,10 +2218,10 @@ static int sched_balance_self(int cpu, int flag)
 
 		/* Now try balancing at a lower domain level of new_cpu */
 		cpu = new_cpu;
-		weight = cpumask_weight(sched_domain_span(sd));
 		sd = NULL;
+		weight = cpus_weight(span);
 		for_each_domain(cpu, tmp) {
-			if (weight <= cpumask_weight(sched_domain_span(tmp)))
+			if (weight <= cpus_weight(tmp->span))
 				break;
 			if (tmp->flags & flag)
 				sd = tmp;
@@ -2271,7 +2266,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
 		cpu = task_cpu(p);
 
 		for_each_domain(this_cpu, sd) {
-			if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
+			if (cpu_isset(cpu, sd->span)) {
 				update_shares(sd);
 				break;
 			}
@@ -2320,7 +2315,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
 	else {
 		struct sched_domain *sd;
 		for_each_domain(this_cpu, sd) {
-			if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
+			if (cpu_isset(cpu, sd->span)) {
 				schedstat_inc(sd, ttwu_wake_remote);
 				break;
 			}
@@ -2851,7 +2846,7 @@ static void sched_migrate_task(struct task_struct *p, int dest_cpu)
 	struct rq *rq;
 
 	rq = task_rq_lock(p, &flags);
-	if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)
+	if (!cpu_isset(dest_cpu, p->cpus_allowed)
 	    || unlikely(!cpu_active(dest_cpu)))
 		goto out;
 
@@ -2916,7 +2911,7 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu,
 	 * 2) cannot be migrated to this CPU due to cpus_allowed, or
 	 * 3) are cache-hot on their current CPU.
 	 */
-	if (!cpumask_test_cpu(this_cpu, &p->cpus_allowed)) {
+	if (!cpu_isset(this_cpu, p->cpus_allowed)) {
 		schedstat_inc(p, se.nr_failed_migrations_affine);
 		return 0;
 	}
@@ -3091,7 +3086,7 @@ static int move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
 static struct sched_group *
 find_busiest_group(struct sched_domain *sd, int this_cpu,
 		   unsigned long *imbalance, enum cpu_idle_type idle,
-		   int *sd_idle, const struct cpumask *cpus, int *balance)
+		   int *sd_idle, const cpumask_t *cpus, int *balance)
 {
 	struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups;
 	unsigned long max_load, avg_load, total_load, this_load, total_pwr;
@@ -3127,11 +3122,10 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
 		unsigned long sum_avg_load_per_task;
 		unsigned long avg_load_per_task;
 
-		local_group = cpumask_test_cpu(this_cpu,
-					       sched_group_cpus(group));
+		local_group = cpu_isset(this_cpu, group->cpumask);
 
 		if (local_group)
-			balance_cpu = cpumask_first(sched_group_cpus(group));
+			balance_cpu = first_cpu(group->cpumask);
 
 		/* Tally up the load of all CPUs in the group */
 		sum_weighted_load = sum_nr_running = avg_load = 0;
@@ -3140,8 +3134,13 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
 		max_cpu_load = 0;
 		min_cpu_load = ~0UL;
 
-		for_each_cpu_and(i, sched_group_cpus(group), cpus) {
-			struct rq *rq = cpu_rq(i);
+		for_each_cpu_mask_nr(i, group->cpumask) {
+			struct rq *rq;
+
+			if (!cpu_isset(i, *cpus))
+				continue;
+
+			rq = cpu_rq(i);
 
 			if (*sd_idle && rq->nr_running)
 				*sd_idle = 0;
@@ -3252,8 +3251,8 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
 		 */
 		if ((sum_nr_running < min_nr_running) ||
 		    (sum_nr_running == min_nr_running &&
-		     cpumask_first(sched_group_cpus(group)) >
-		     cpumask_first(sched_group_cpus(group_min)))) {
+		     first_cpu(group->cpumask) <
+		     first_cpu(group_min->cpumask))) {
 			group_min = group;
 			min_nr_running = sum_nr_running;
 			min_load_per_task = sum_weighted_load /
@@ -3268,8 +3267,8 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
 		if (sum_nr_running <= group_capacity - 1) {
 			if (sum_nr_running > leader_nr_running ||
 			    (sum_nr_running == leader_nr_running &&
-			     cpumask_first(sched_group_cpus(group)) <
-			     cpumask_first(sched_group_cpus(group_leader)))) {
+			     first_cpu(group->cpumask) >
+			      first_cpu(group_leader->cpumask))) {
 				group_leader = group;
 				leader_nr_running = sum_nr_running;
 			}
@@ -3395,10 +3394,6 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
 
 	if (this == group_leader && group_leader != group_min) {
 		*imbalance = min_load_per_task;
-		if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP) {
-			cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu =
-				cpumask_first(sched_group_cpus(group_leader));
-		}
 		return group_min;
 	}
 #endif
@@ -3412,16 +3407,16 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
  */
 static struct rq *
 find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
-		   unsigned long imbalance, const struct cpumask *cpus)
+		   unsigned long imbalance, const cpumask_t *cpus)
 {
 	struct rq *busiest = NULL, *rq;
 	unsigned long max_load = 0;
 	int i;
 
-	for_each_cpu(i, sched_group_cpus(group)) {
+	for_each_cpu_mask_nr(i, group->cpumask) {
 		unsigned long wl;
 
-		if (!cpumask_test_cpu(i, cpus))
+		if (!cpu_isset(i, *cpus))
 			continue;
 
 		rq = cpu_rq(i);
@@ -3451,7 +3446,7 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
  */
 static int load_balance(int this_cpu, struct rq *this_rq,
 			struct sched_domain *sd, enum cpu_idle_type idle,
-			int *balance, struct cpumask *cpus)
+			int *balance, cpumask_t *cpus)
 {
 	int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0;
 	struct sched_group *group;
@@ -3459,7 +3454,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
 	struct rq *busiest;
 	unsigned long flags;
 
-	cpumask_setall(cpus);
+	cpus_setall(*cpus);
 
 	/*
 	 * When power savings policy is enabled for the parent domain, idle
@@ -3519,8 +3514,8 @@ static int load_balance(int this_cpu, struct rq *this_rq,
 
 		/* All tasks on this runqueue were pinned by CPU affinity */
 		if (unlikely(all_pinned)) {
-			cpumask_clear_cpu(cpu_of(busiest), cpus);
-			if (!cpumask_empty(cpus))
+			cpu_clear(cpu_of(busiest), *cpus);
+			if (!cpus_empty(*cpus))
 				goto redo;
 			goto out_balanced;
 		}
@@ -3537,8 +3532,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
 			/* don't kick the migration_thread, if the curr
 			 * task on busiest cpu can't be moved to this_cpu
 			 */
-			if (!cpumask_test_cpu(this_cpu,
-					      &busiest->curr->cpus_allowed)) {
+			if (!cpu_isset(this_cpu, busiest->curr->cpus_allowed)) {
 				spin_unlock_irqrestore(&busiest->lock, flags);
 				all_pinned = 1;
 				goto out_one_pinned;
@@ -3613,7 +3607,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
  */
 static int
 load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd,
-			struct cpumask *cpus)
+			cpumask_t *cpus)
 {
 	struct sched_group *group;
 	struct rq *busiest = NULL;
@@ -3622,7 +3616,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd,
 	int sd_idle = 0;
 	int all_pinned = 0;
 
-	cpumask_setall(cpus);
+	cpus_setall(*cpus);
 
 	/*
 	 * When power savings policy is enabled for the parent domain, idle
@@ -3666,71 +3660,17 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd,
 		double_unlock_balance(this_rq, busiest);
 
 		if (unlikely(all_pinned)) {
-			cpumask_clear_cpu(cpu_of(busiest), cpus);
-			if (!cpumask_empty(cpus))
+			cpu_clear(cpu_of(busiest), *cpus);
+			if (!cpus_empty(*cpus))
 				goto redo;
 		}
 	}
 
 	if (!ld_moved) {
-		int active_balance = 0;
-
 		schedstat_inc(sd, lb_failed[CPU_NEWLY_IDLE]);
 		if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
 		    !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
 			return -1;
-
-		if (sched_mc_power_savings < POWERSAVINGS_BALANCE_WAKEUP)
-			return -1;
-
-		if (sd->nr_balance_failed++ < 2)
-			return -1;
-
-		/*
-		 * The only task running in a non-idle cpu can be moved to this
-		 * cpu in an attempt to completely freeup the other CPU
-		 * package. The same method used to move task in load_balance()
-		 * have been extended for load_balance_newidle() to speedup
-		 * consolidation at sched_mc=POWERSAVINGS_BALANCE_WAKEUP (2)
-		 *
-		 * The package power saving logic comes from
-		 * find_busiest_group().  If there are no imbalance, then
-		 * f_b_g() will return NULL.  However when sched_mc={1,2} then
-		 * f_b_g() will select a group from which a running task may be
-		 * pulled to this cpu in order to make the other package idle.
-		 * If there is no opportunity to make a package idle and if
-		 * there are no imbalance, then f_b_g() will return NULL and no
-		 * action will be taken in load_balance_newidle().
-		 *
-		 * Under normal task pull operation due to imbalance, there
-		 * will be more than one task in the source run queue and
-		 * move_tasks() will succeed.  ld_moved will be true and this
-		 * active balance code will not be triggered.
-		 */
-
-		/* Lock busiest in correct order while this_rq is held */
-		double_lock_balance(this_rq, busiest);
-
-		/*
-		 * don't kick the migration_thread, if the curr
-		 * task on busiest cpu can't be moved to this_cpu
-		 */
-		if (!cpu_isset(this_cpu, busiest->curr->cpus_allowed)) {
-			double_unlock_balance(this_rq, busiest);
-			all_pinned = 1;
-			return ld_moved;
-		}
-
-		if (!busiest->active_balance) {
-			busiest->active_balance = 1;
-			busiest->push_cpu = this_cpu;
-			active_balance = 1;
-		}
-
-		double_unlock_balance(this_rq, busiest);
-		if (active_balance)
-			wake_up_process(busiest->migration_thread);
-
 	} else
 		sd->nr_balance_failed = 0;
 
@@ -3756,10 +3696,7 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
 	struct sched_domain *sd;
 	int pulled_task = 0;
 	unsigned long next_balance = jiffies + HZ;
-	cpumask_var_t tmpmask;
-
-	if (!alloc_cpumask_var(&tmpmask, GFP_ATOMIC))
-		return;
+	cpumask_t tmpmask;
 
 	for_each_domain(this_cpu, sd) {
 		unsigned long interval;
@@ -3770,7 +3707,7 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
 		if (sd->flags & SD_BALANCE_NEWIDLE)
 			/* If we've pulled tasks over stop searching: */
 			pulled_task = load_balance_newidle(this_cpu, this_rq,
-							   sd, tmpmask);
+							   sd, &tmpmask);
 
 		interval = msecs_to_jiffies(sd->balance_interval);
 		if (time_after(next_balance, sd->last_balance + interval))
@@ -3785,7 +3722,6 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
 		 */
 		this_rq->next_balance = next_balance;
 	}
-	free_cpumask_var(tmpmask);
 }
 
 /*
@@ -3823,7 +3759,7 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
 	/* Search for an sd spanning us and the target CPU. */
 	for_each_domain(target_cpu, sd) {
 		if ((sd->flags & SD_LOAD_BALANCE) &&
-		    cpumask_test_cpu(busiest_cpu, sched_domain_span(sd)))
+		    cpu_isset(busiest_cpu, sd->span))
 				break;
 	}
 
@@ -3842,9 +3778,10 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
 #ifdef CONFIG_NO_HZ
 static struct {
 	atomic_t load_balancer;
-	cpumask_var_t cpu_mask;
+	cpumask_t cpu_mask;
 } nohz ____cacheline_aligned = {
 	.load_balancer = ATOMIC_INIT(-1),
+	.cpu_mask = CPU_MASK_NONE,
 };
 
 /*
@@ -3872,7 +3809,7 @@ int select_nohz_load_balancer(int stop_tick)
 	int cpu = smp_processor_id();
 
 	if (stop_tick) {
-		cpumask_set_cpu(cpu, nohz.cpu_mask);
+		cpu_set(cpu, nohz.cpu_mask);
 		cpu_rq(cpu)->in_nohz_recently = 1;
 
 		/*
@@ -3886,7 +3823,7 @@ int select_nohz_load_balancer(int stop_tick)
 		}
 
 		/* time for ilb owner also to sleep */
-		if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) {
+		if (cpus_weight(nohz.cpu_mask) == num_online_cpus()) {
 			if (atomic_read(&nohz.load_balancer) == cpu)
 				atomic_set(&nohz.load_balancer, -1);
 			return 0;
@@ -3899,10 +3836,10 @@ int select_nohz_load_balancer(int stop_tick)
 		} else if (atomic_read(&nohz.load_balancer) == cpu)
 			return 1;
 	} else {
-		if (!cpumask_test_cpu(cpu, nohz.cpu_mask))
+		if (!cpu_isset(cpu, nohz.cpu_mask))
 			return 0;
 
-		cpumask_clear_cpu(cpu, nohz.cpu_mask);
+		cpu_clear(cpu, nohz.cpu_mask);
 
 		if (atomic_read(&nohz.load_balancer) == cpu)
 			if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu)
@@ -3930,11 +3867,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
 	unsigned long next_balance = jiffies + 60*HZ;
 	int update_next_balance = 0;
 	int need_serialize;
-	cpumask_var_t tmp;
-
-	/* Fails alloc?  Rebalancing probably not a priority right now. */
-	if (!alloc_cpumask_var(&tmp, GFP_ATOMIC))
-		return;
+	cpumask_t tmp;
 
 	for_each_domain(cpu, sd) {
 		if (!(sd->flags & SD_LOAD_BALANCE))
@@ -3959,7 +3892,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
 		}
 
 		if (time_after_eq(jiffies, sd->last_balance + interval)) {
-			if (load_balance(cpu, rq, sd, idle, &balance, tmp)) {
+			if (load_balance(cpu, rq, sd, idle, &balance, &tmp)) {
 				/*
 				 * We've pulled tasks over so either we're no
 				 * longer idle, or one of our SMT siblings is
@@ -3993,8 +3926,6 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
 	 */
 	if (likely(update_next_balance))
 		rq->next_balance = next_balance;
-
-	free_cpumask_var(tmp);
 }
 
 /*
@@ -4019,13 +3950,12 @@ static void run_rebalance_domains(struct softirq_action *h)
 	 */
 	if (this_rq->idle_at_tick &&
 	    atomic_read(&nohz.load_balancer) == this_cpu) {
+		cpumask_t cpus = nohz.cpu_mask;
 		struct rq *rq;
 		int balance_cpu;
 
-		for_each_cpu(balance_cpu, nohz.cpu_mask) {
-			if (balance_cpu == this_cpu)
-				continue;
-
+		cpu_clear(this_cpu, cpus);
+		for_each_cpu_mask_nr(balance_cpu, cpus) {
 			/*
 			 * If this cpu gets work to do, stop the load balancing
 			 * work being done for other cpus. Next load
@@ -4063,7 +3993,7 @@ static inline void trigger_load_balance(struct rq *rq, int cpu)
 		rq->in_nohz_recently = 0;
 
 		if (atomic_read(&nohz.load_balancer) == cpu) {
-			cpumask_clear_cpu(cpu, nohz.cpu_mask);
+			cpu_clear(cpu, nohz.cpu_mask);
 			atomic_set(&nohz.load_balancer, -1);
 		}
 
@@ -4076,7 +4006,7 @@ static inline void trigger_load_balance(struct rq *rq, int cpu)
 			 * TBD: Traverse the sched domains and nominate
 			 * the nearest cpu in the nohz.cpu_mask.
 			 */
-			int ilb = cpumask_first(nohz.cpu_mask);
+			int ilb = first_cpu(nohz.cpu_mask);
 
 			if (ilb < nr_cpu_ids)
 				resched_cpu(ilb);
@@ -4088,7 +4018,7 @@ static inline void trigger_load_balance(struct rq *rq, int cpu)
 	 * cpus with ticks stopped, is it time for that to stop?
 	 */
 	if (rq->idle_at_tick && atomic_read(&nohz.load_balancer) == cpu &&
-	    cpumask_weight(nohz.cpu_mask) == num_online_cpus()) {
+	    cpus_weight(nohz.cpu_mask) == num_online_cpus()) {
 		resched_cpu(cpu);
 		return;
 	}
@@ -4098,7 +4028,7 @@ static inline void trigger_load_balance(struct rq *rq, int cpu)
 	 * someone else, then no need raise the SCHED_SOFTIRQ
 	 */
 	if (rq->idle_at_tick && atomic_read(&nohz.load_balancer) != cpu &&
-	    cpumask_test_cpu(cpu, nohz.cpu_mask))
+	    cpu_isset(cpu, nohz.cpu_mask))
 		return;
 #endif
 	if (time_after_eq(jiffies, rq->next_balance))
@@ -5471,9 +5401,10 @@ asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param __user *param)
 	return retval;
 }
 
-long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
+long sched_setaffinity(pid_t pid, const cpumask_t *in_mask)
 {
-	cpumask_var_t cpus_allowed, new_mask;
+	cpumask_t cpus_allowed;
+	cpumask_t new_mask = *in_mask;
 	struct task_struct *p;
 	int retval;
 
@@ -5495,14 +5426,6 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
 	get_task_struct(p);
 	read_unlock(&tasklist_lock);
 
-	if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) {
-		retval = -ENOMEM;
-		goto out_put_task;
-	}
-	if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) {
-		retval = -ENOMEM;
-		goto out_free_cpus_allowed;
-	}
 	retval = -EPERM;
 	if (!check_same_owner(p) && !capable(CAP_SYS_NICE))
 		goto out_unlock;
@@ -5511,41 +5434,37 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
 	if (retval)
 		goto out_unlock;
 
-	cpuset_cpus_allowed(p, cpus_allowed);
-	cpumask_and(new_mask, in_mask, cpus_allowed);
+	cpuset_cpus_allowed(p, &cpus_allowed);
+	cpus_and(new_mask, new_mask, cpus_allowed);
  again:
-	retval = set_cpus_allowed_ptr(p, new_mask);
+	retval = set_cpus_allowed_ptr(p, &new_mask);
 
 	if (!retval) {
-		cpuset_cpus_allowed(p, cpus_allowed);
-		if (!cpumask_subset(new_mask, cpus_allowed)) {
+		cpuset_cpus_allowed(p, &cpus_allowed);
+		if (!cpus_subset(new_mask, cpus_allowed)) {
 			/*
 			 * We must have raced with a concurrent cpuset
 			 * update. Just reset the cpus_allowed to the
 			 * cpuset's cpus_allowed
 			 */
-			cpumask_copy(new_mask, cpus_allowed);
+			new_mask = cpus_allowed;
 			goto again;
 		}
 	}
 out_unlock:
-	free_cpumask_var(new_mask);
-out_free_cpus_allowed:
-	free_cpumask_var(cpus_allowed);
-out_put_task:
 	put_task_struct(p);
 	put_online_cpus();
 	return retval;
 }
 
 static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len,
-			     struct cpumask *new_mask)
+			     cpumask_t *new_mask)
 {
-	if (len < cpumask_size())
-		cpumask_clear(new_mask);
-	else if (len > cpumask_size())
-		len = cpumask_size();
-
+	if (len < sizeof(cpumask_t)) {
+		memset(new_mask, 0, sizeof(cpumask_t));
+	} else if (len > sizeof(cpumask_t)) {
+		len = sizeof(cpumask_t);
+	}
 	return copy_from_user(new_mask, user_mask_ptr, len) ? -EFAULT : 0;
 }
 
@@ -5558,20 +5477,17 @@ static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len,
 asmlinkage long sys_sched_setaffinity(pid_t pid, unsigned int len,
 				      unsigned long __user *user_mask_ptr)
 {
-	cpumask_var_t new_mask;
+	cpumask_t new_mask;
 	int retval;
 
-	if (!alloc_cpumask_var(&new_mask, GFP_KERNEL))
-		return -ENOMEM;
+	retval = get_user_cpu_mask(user_mask_ptr, len, &new_mask);
+	if (retval)
+		return retval;
 
-	retval = get_user_cpu_mask(user_mask_ptr, len, new_mask);
-	if (retval == 0)
-		retval = sched_setaffinity(pid, new_mask);
-	free_cpumask_var(new_mask);
-	return retval;
+	return sched_setaffinity(pid, &new_mask);
 }
 
-long sched_getaffinity(pid_t pid, struct cpumask *mask)
+long sched_getaffinity(pid_t pid, cpumask_t *mask)
 {
 	struct task_struct *p;
 	int retval;
@@ -5588,7 +5504,7 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask)
 	if (retval)
 		goto out_unlock;
 
-	cpumask_and(mask, &p->cpus_allowed, cpu_online_mask);
+	cpus_and(*mask, p->cpus_allowed, cpu_online_map);
 
 out_unlock:
 	read_unlock(&tasklist_lock);
@@ -5607,24 +5523,19 @@ asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len,
 				      unsigned long __user *user_mask_ptr)
 {
 	int ret;
-	cpumask_var_t mask;
+	cpumask_t mask;
 
-	if (len < cpumask_size())
+	if (len < sizeof(cpumask_t))
 		return -EINVAL;
 
-	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
-		return -ENOMEM;
+	ret = sched_getaffinity(pid, &mask);
+	if (ret < 0)
+		return ret;
 
-	ret = sched_getaffinity(pid, mask);
-	if (ret == 0) {
-		if (copy_to_user(user_mask_ptr, mask, cpumask_size()))
-			ret = -EFAULT;
-		else
-			ret = cpumask_size();
-	}
-	free_cpumask_var(mask);
+	if (copy_to_user(user_mask_ptr, &mask, sizeof(cpumask_t)))
+		return -EFAULT;
 
-	return ret;
+	return sizeof(cpumask_t);
 }
 
 /**
@@ -5966,7 +5877,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
 	idle->se.exec_start = sched_clock();
 
 	idle->prio = idle->normal_prio = MAX_PRIO;
-	cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu));
+	idle->cpus_allowed = cpumask_of_cpu(cpu);
 	__set_task_cpu(idle, cpu);
 
 	rq->curr = rq->idle = idle;
@@ -5993,9 +5904,9 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
  * indicates which cpus entered this state. This is used
  * in the rcu update to wait only for active cpus. For system
  * which do not switch off the HZ timer nohz_cpu_mask should
- * always be CPU_BITS_NONE.
+ * always be CPU_MASK_NONE.
  */
-cpumask_var_t nohz_cpu_mask;
+cpumask_t nohz_cpu_mask = CPU_MASK_NONE;
 
 /*
  * Increase the granularity value when there are more CPUs,
@@ -6050,7 +5961,7 @@ static inline void sched_init_granularity(void)
  * task must not exit() & deallocate itself prematurely. The
  * call is not atomic; no spinlocks may be held.
  */
-int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
+int set_cpus_allowed_ptr(struct task_struct *p, const cpumask_t *new_mask)
 {
 	struct migration_req req;
 	unsigned long flags;
@@ -6058,13 +5969,13 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
 	int ret = 0;
 
 	rq = task_rq_lock(p, &flags);
-	if (!cpumask_intersects(new_mask, cpu_online_mask)) {
+	if (!cpus_intersects(*new_mask, cpu_online_map)) {
 		ret = -EINVAL;
 		goto out;
 	}
 
 	if (unlikely((p->flags & PF_THREAD_BOUND) && p != current &&
-		     !cpumask_equal(&p->cpus_allowed, new_mask))) {
+		     !cpus_equal(p->cpus_allowed, *new_mask))) {
 		ret = -EINVAL;
 		goto out;
 	}
@@ -6072,15 +5983,15 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
 	if (p->sched_class->set_cpus_allowed)
 		p->sched_class->set_cpus_allowed(p, new_mask);
 	else {
-		cpumask_copy(&p->cpus_allowed, new_mask);
-		p->rt.nr_cpus_allowed = cpumask_weight(new_mask);
+		p->cpus_allowed = *new_mask;
+		p->rt.nr_cpus_allowed = cpus_weight(*new_mask);
 	}
 
 	/* Can the task run on the task's current CPU? If so, we're done */
-	if (cpumask_test_cpu(task_cpu(p), new_mask))
+	if (cpu_isset(task_cpu(p), *new_mask))
 		goto out;
 
-	if (migrate_task(p, cpumask_any_and(cpu_online_mask, new_mask), &req)) {
+	if (migrate_task(p, any_online_cpu(*new_mask), &req)) {
 		/* Need help from migration thread: drop lock and wait. */
 		task_rq_unlock(rq, &flags);
 		wake_up_process(rq->migration_thread);
@@ -6122,7 +6033,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
 	if (task_cpu(p) != src_cpu)
 		goto done;
 	/* Affinity changed (again). */
-	if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
+	if (!cpu_isset(dest_cpu, p->cpus_allowed))
 		goto fail;
 
 	on_rq = p->se.on_rq;
@@ -6219,43 +6130,50 @@ static int __migrate_task_irq(struct task_struct *p, int src_cpu, int dest_cpu)
  */
 static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
 {
+	unsigned long flags;
+	cpumask_t mask;
+	struct rq *rq;
 	int dest_cpu;
-	/* FIXME: Use cpumask_of_node here. */
-	cpumask_t _nodemask = node_to_cpumask(cpu_to_node(dead_cpu));
-	const struct cpumask *nodemask = &_nodemask;
-
-again:
-	/* Look for allowed, online CPU in same node. */
-	for_each_cpu_and(dest_cpu, nodemask, cpu_online_mask)
-		if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
-			goto move;
-
-	/* Any allowed, online CPU? */
-	dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_online_mask);
-	if (dest_cpu < nr_cpu_ids)
-		goto move;
-
-	/* No more Mr. Nice Guy. */
-	if (dest_cpu >= nr_cpu_ids) {
-		cpuset_cpus_allowed_locked(p, &p->cpus_allowed);
-		dest_cpu = cpumask_any_and(cpu_online_mask, &p->cpus_allowed);
 
-		/*
-		 * Don't tell them about moving exiting tasks or
-		 * kernel threads (both mm NULL), since they never
-		 * leave kernel.
-		 */
-		if (p->mm && printk_ratelimit()) {
-			printk(KERN_INFO "process %d (%s) no "
-			       "longer affine to cpu%d\n",
-			       task_pid_nr(p), p->comm, dead_cpu);
-		}
-	}
+	do {
+		/* On same node? */
+		mask = node_to_cpumask(cpu_to_node(dead_cpu));
+		cpus_and(mask, mask, p->cpus_allowed);
+		dest_cpu = any_online_cpu(mask);
 
-move:
-	/* It can have affinity changed while we were choosing. */
-	if (unlikely(!__migrate_task_irq(p, dead_cpu, dest_cpu)))
-		goto again;
+		/* On any allowed CPU? */
+		if (dest_cpu >= nr_cpu_ids)
+			dest_cpu = any_online_cpu(p->cpus_allowed);
+
+		/* No more Mr. Nice Guy. */
+		if (dest_cpu >= nr_cpu_ids) {
+			cpumask_t cpus_allowed;
+
+			cpuset_cpus_allowed_locked(p, &cpus_allowed);
+			/*
+			 * Try to stay on the same cpuset, where the
+			 * current cpuset may be a subset of all cpus.
+			 * The cpuset_cpus_allowed_locked() variant of
+			 * cpuset_cpus_allowed() will not block. It must be
+			 * called within calls to cpuset_lock/cpuset_unlock.
+			 */
+			rq = task_rq_lock(p, &flags);
+			p->cpus_allowed = cpus_allowed;
+			dest_cpu = any_online_cpu(p->cpus_allowed);
+			task_rq_unlock(rq, &flags);
+
+			/*
+			 * Don't tell them about moving exiting tasks or
+			 * kernel threads (both mm NULL), since they never
+			 * leave kernel.
+			 */
+			if (p->mm && printk_ratelimit()) {
+				printk(KERN_INFO "process %d (%s) no "
+				       "longer affine to cpu%d\n",
+					task_pid_nr(p), p->comm, dead_cpu);
+			}
+		}
+	} while (!__migrate_task_irq(p, dead_cpu, dest_cpu));
 }
 
 /*
@@ -6267,7 +6185,7 @@ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
  */
 static void migrate_nr_uninterruptible(struct rq *rq_src)
 {
-	struct rq *rq_dest = cpu_rq(cpumask_any(cpu_online_mask));
+	struct rq *rq_dest = cpu_rq(any_online_cpu(*CPU_MASK_ALL_PTR));
 	unsigned long flags;
 
 	local_irq_save(flags);
@@ -6557,7 +6475,7 @@ static void set_rq_online(struct rq *rq)
 	if (!rq->online) {
 		const struct sched_class *class;
 
-		cpumask_set_cpu(rq->cpu, rq->rd->online);
+		cpu_set(rq->cpu, rq->rd->online);
 		rq->online = 1;
 
 		for_each_class(class) {
@@ -6577,7 +6495,7 @@ static void set_rq_offline(struct rq *rq)
 				class->rq_offline(rq);
 		}
 
-		cpumask_clear_cpu(rq->cpu, rq->rd->online);
+		cpu_clear(rq->cpu, rq->rd->online);
 		rq->online = 0;
 	}
 }
@@ -6618,7 +6536,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
 		rq = cpu_rq(cpu);
 		spin_lock_irqsave(&rq->lock, flags);
 		if (rq->rd) {
-			BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
+			BUG_ON(!cpu_isset(cpu, rq->rd->span));
 
 			set_rq_online(rq);
 		}
@@ -6632,7 +6550,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
 			break;
 		/* Unbind it from offline cpu so it can run. Fall thru. */
 		kthread_bind(cpu_rq(cpu)->migration_thread,
-			     cpumask_any(cpu_online_mask));
+			     any_online_cpu(cpu_online_map));
 		kthread_stop(cpu_rq(cpu)->migration_thread);
 		cpu_rq(cpu)->migration_thread = NULL;
 		break;
@@ -6682,7 +6600,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
 		rq = cpu_rq(cpu);
 		spin_lock_irqsave(&rq->lock, flags);
 		if (rq->rd) {
-			BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
+			BUG_ON(!cpu_isset(cpu, rq->rd->span));
 			set_rq_offline(rq);
 		}
 		spin_unlock_irqrestore(&rq->lock, flags);
@@ -6721,13 +6639,13 @@ early_initcall(migration_init);
 #ifdef CONFIG_SCHED_DEBUG
 
 static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
-				  struct cpumask *groupmask)
+				  cpumask_t *groupmask)
 {
 	struct sched_group *group = sd->groups;
 	char str[256];
 
-	cpulist_scnprintf(str, sizeof(str), sched_domain_span(sd));
-	cpumask_clear(groupmask);
+	cpulist_scnprintf(str, sizeof(str), sd->span);
+	cpus_clear(*groupmask);
 
 	printk(KERN_DEBUG "%*s domain %d: ", level, "", level);
 
@@ -6741,11 +6659,11 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
 
 	printk(KERN_CONT "span %s level %s\n", str, sd->name);
 
-	if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) {
+	if (!cpu_isset(cpu, sd->span)) {
 		printk(KERN_ERR "ERROR: domain->span does not contain "
 				"CPU%d\n", cpu);
 	}
-	if (!cpumask_test_cpu(cpu, sched_group_cpus(group))) {
+	if (!cpu_isset(cpu, group->cpumask)) {
 		printk(KERN_ERR "ERROR: domain->groups does not contain"
 				" CPU%d\n", cpu);
 	}
@@ -6765,32 +6683,31 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
 			break;
 		}
 
-		if (!cpumask_weight(sched_group_cpus(group))) {
+		if (!cpus_weight(group->cpumask)) {
 			printk(KERN_CONT "\n");
 			printk(KERN_ERR "ERROR: empty group\n");
 			break;
 		}
 
-		if (cpumask_intersects(groupmask, sched_group_cpus(group))) {
+		if (cpus_intersects(*groupmask, group->cpumask)) {
 			printk(KERN_CONT "\n");
 			printk(KERN_ERR "ERROR: repeated CPUs\n");
 			break;
 		}
 
-		cpumask_or(groupmask, groupmask, sched_group_cpus(group));
+		cpus_or(*groupmask, *groupmask, group->cpumask);
 
-		cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group));
+		cpulist_scnprintf(str, sizeof(str), group->cpumask);
 		printk(KERN_CONT " %s", str);
 
 		group = group->next;
 	} while (group != sd->groups);
 	printk(KERN_CONT "\n");
 
-	if (!cpumask_equal(sched_domain_span(sd), groupmask))
+	if (!cpus_equal(sd->span, *groupmask))
 		printk(KERN_ERR "ERROR: groups don't span domain->span\n");
 
-	if (sd->parent &&
-	    !cpumask_subset(groupmask, sched_domain_span(sd->parent)))
+	if (sd->parent && !cpus_subset(*groupmask, sd->parent->span))
 		printk(KERN_ERR "ERROR: parent span is not a superset "
 			"of domain->span\n");
 	return 0;
@@ -6798,7 +6715,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
 
 static void sched_domain_debug(struct sched_domain *sd, int cpu)
 {
-	cpumask_var_t groupmask;
+	cpumask_t *groupmask;
 	int level = 0;
 
 	if (!sd) {
@@ -6808,7 +6725,8 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
 
 	printk(KERN_DEBUG "CPU%d attaching sched-domain:\n", cpu);
 
-	if (!alloc_cpumask_var(&groupmask, GFP_KERNEL)) {
+	groupmask = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
+	if (!groupmask) {
 		printk(KERN_DEBUG "Cannot load-balance (out of memory)\n");
 		return;
 	}
@@ -6821,7 +6739,7 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
 		if (!sd)
 			break;
 	}
-	free_cpumask_var(groupmask);
+	kfree(groupmask);
 }
 #else /* !CONFIG_SCHED_DEBUG */
 # define sched_domain_debug(sd, cpu) do { } while (0)
@@ -6829,7 +6747,7 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
 
 static int sd_degenerate(struct sched_domain *sd)
 {
-	if (cpumask_weight(sched_domain_span(sd)) == 1)
+	if (cpus_weight(sd->span) == 1)
 		return 1;
 
 	/* Following flags need at least 2 groups */
@@ -6860,7 +6778,7 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
 	if (sd_degenerate(parent))
 		return 1;
 
-	if (!cpumask_equal(sched_domain_span(sd), sched_domain_span(parent)))
+	if (!cpus_equal(sd->span, parent->span))
 		return 0;
 
 	/* Does parent contain flags not in child? */
@@ -6884,16 +6802,6 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
 	return 1;
 }
 
-static void free_rootdomain(struct root_domain *rd)
-{
-	cpupri_cleanup(&rd->cpupri);
-
-	free_cpumask_var(rd->rto_mask);
-	free_cpumask_var(rd->online);
-	free_cpumask_var(rd->span);
-	kfree(rd);
-}
-
 static void rq_attach_root(struct rq *rq, struct root_domain *rd)
 {
 	unsigned long flags;
@@ -6903,63 +6811,38 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd)
 	if (rq->rd) {
 		struct root_domain *old_rd = rq->rd;
 
-		if (cpumask_test_cpu(rq->cpu, old_rd->online))
+		if (cpu_isset(rq->cpu, old_rd->online))
 			set_rq_offline(rq);
 
-		cpumask_clear_cpu(rq->cpu, old_rd->span);
+		cpu_clear(rq->cpu, old_rd->span);
 
 		if (atomic_dec_and_test(&old_rd->refcount))
-			free_rootdomain(old_rd);
+			kfree(old_rd);
 	}
 
 	atomic_inc(&rd->refcount);
 	rq->rd = rd;
 
-	cpumask_set_cpu(rq->cpu, rd->span);
-	if (cpumask_test_cpu(rq->cpu, cpu_online_mask))
+	cpu_set(rq->cpu, rd->span);
+	if (cpu_isset(rq->cpu, cpu_online_map))
 		set_rq_online(rq);
 
 	spin_unlock_irqrestore(&rq->lock, flags);
 }
 
-static int init_rootdomain(struct root_domain *rd, bool bootmem)
+static void init_rootdomain(struct root_domain *rd)
 {
 	memset(rd, 0, sizeof(*rd));
 
-	if (bootmem) {
-		alloc_bootmem_cpumask_var(&def_root_domain.span);
-		alloc_bootmem_cpumask_var(&def_root_domain.online);
-		alloc_bootmem_cpumask_var(&def_root_domain.rto_mask);
-		cpupri_init(&rd->cpupri, true);
-		return 0;
-	}
-
-	if (!alloc_cpumask_var(&rd->span, GFP_KERNEL))
-		goto free_rd;
-	if (!alloc_cpumask_var(&rd->online, GFP_KERNEL))
-		goto free_span;
-	if (!alloc_cpumask_var(&rd->rto_mask, GFP_KERNEL))
-		goto free_online;
-
-	if (cpupri_init(&rd->cpupri, false) != 0)
-		goto free_rto_mask;
-	return 0;
+	cpus_clear(rd->span);
+	cpus_clear(rd->online);
 
-free_rto_mask:
-	free_cpumask_var(rd->rto_mask);
-free_online:
-	free_cpumask_var(rd->online);
-free_span:
-	free_cpumask_var(rd->span);
-free_rd:
-	kfree(rd);
-	return -ENOMEM;
+	cpupri_init(&rd->cpupri);
 }
 
 static void init_defrootdomain(void)
 {
-	init_rootdomain(&def_root_domain, true);
-
+	init_rootdomain(&def_root_domain);
 	atomic_set(&def_root_domain.refcount, 1);
 }
 
@@ -6971,10 +6854,7 @@ static struct root_domain *alloc_rootdomain(void)
 	if (!rd)
 		return NULL;
 
-	if (init_rootdomain(rd, false) != 0) {
-		kfree(rd);
-		return NULL;
-	}
+	init_rootdomain(rd);
 
 	return rd;
 }
@@ -7016,12 +6896,19 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
 }
 
 /* cpus with isolated domains */
-static cpumask_var_t cpu_isolated_map;
+static cpumask_t cpu_isolated_map = CPU_MASK_NONE;
 
 /* Setup the mask of cpus configured for isolated domains */
 static int __init isolated_cpu_setup(char *str)
 {
-	cpulist_parse(str, cpu_isolated_map);
+	static int __initdata ints[NR_CPUS];
+	int i;
+
+	str = get_options(str, ARRAY_SIZE(ints), ints);
+	cpus_clear(cpu_isolated_map);
+	for (i = 1; i <= ints[0]; i++)
+		if (ints[i] < NR_CPUS)
+			cpu_set(ints[i], cpu_isolated_map);
 	return 1;
 }
 
@@ -7030,43 +6917,42 @@ __setup("isolcpus=", isolated_cpu_setup);
 /*
  * init_sched_build_groups takes the cpumask we wish to span, and a pointer
  * to a function which identifies what group(along with sched group) a CPU
- * belongs to. The return value of group_fn must be a >= 0 and < nr_cpu_ids
- * (due to the fact that we keep track of groups covered with a struct cpumask).
+ * belongs to. The return value of group_fn must be a >= 0 and < NR_CPUS
+ * (due to the fact that we keep track of groups covered with a cpumask_t).
  *
  * init_sched_build_groups will build a circular linked list of the groups
  * covered by the given span, and will set each group's ->cpumask correctly,
  * and ->cpu_power to 0.
  */
 static void
-init_sched_build_groups(const struct cpumask *span,
-			const struct cpumask *cpu_map,
-			int (*group_fn)(int cpu, const struct cpumask *cpu_map,
+init_sched_build_groups(const cpumask_t *span, const cpumask_t *cpu_map,
+			int (*group_fn)(int cpu, const cpumask_t *cpu_map,
 					struct sched_group **sg,
-					struct cpumask *tmpmask),
-			struct cpumask *covered, struct cpumask *tmpmask)
+					cpumask_t *tmpmask),
+			cpumask_t *covered, cpumask_t *tmpmask)
 {
 	struct sched_group *first = NULL, *last = NULL;
 	int i;
 
-	cpumask_clear(covered);
+	cpus_clear(*covered);
 
-	for_each_cpu(i, span) {
+	for_each_cpu_mask_nr(i, *span) {
 		struct sched_group *sg;
 		int group = group_fn(i, cpu_map, &sg, tmpmask);
 		int j;
 
-		if (cpumask_test_cpu(i, covered))
+		if (cpu_isset(i, *covered))
 			continue;
 
-		cpumask_clear(sched_group_cpus(sg));
+		cpus_clear(sg->cpumask);
 		sg->__cpu_power = 0;
 
-		for_each_cpu(j, span) {
+		for_each_cpu_mask_nr(j, *span) {
 			if (group_fn(j, cpu_map, NULL, tmpmask) != group)
 				continue;
 
-			cpumask_set_cpu(j, covered);
-			cpumask_set_cpu(j, sched_group_cpus(sg));
+			cpu_set(j, *covered);
+			cpu_set(j, sg->cpumask);
 		}
 		if (!first)
 			first = sg;
@@ -7130,10 +7016,9 @@ static int find_next_best_node(int node, nodemask_t *used_nodes)
  * should be one that prevents unnecessary balancing, but also spreads tasks
  * out optimally.
  */
-static void sched_domain_node_span(int node, struct cpumask *span)
+static void sched_domain_node_span(int node, cpumask_t *span)
 {
 	nodemask_t used_nodes;
-	/* FIXME: use cpumask_of_node() */
 	node_to_cpumask_ptr(nodemask, node);
 	int i;
 
@@ -7154,34 +7039,19 @@ static void sched_domain_node_span(int node, struct cpumask *span)
 
 int sched_smt_power_savings = 0, sched_mc_power_savings = 0;
 
-/*
- * The cpus mask in sched_group and sched_domain hangs off the end.
- * FIXME: use cpumask_var_t or dynamic percpu alloc to avoid wasting space
- * for nr_cpu_ids < CONFIG_NR_CPUS.
- */
-struct static_sched_group {
-	struct sched_group sg;
-	DECLARE_BITMAP(cpus, CONFIG_NR_CPUS);
-};
-
-struct static_sched_domain {
-	struct sched_domain sd;
-	DECLARE_BITMAP(span, CONFIG_NR_CPUS);
-};
-
 /*
  * SMT sched-domains:
  */
 #ifdef CONFIG_SCHED_SMT
-static DEFINE_PER_CPU(struct static_sched_domain, cpu_domains);
-static DEFINE_PER_CPU(struct static_sched_group, sched_group_cpus);
+static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
+static DEFINE_PER_CPU(struct sched_group, sched_group_cpus);
 
 static int
-cpu_to_cpu_group(int cpu, const struct cpumask *cpu_map,
-		 struct sched_group **sg, struct cpumask *unused)
+cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg,
+		 cpumask_t *unused)
 {
 	if (sg)
-		*sg = &per_cpu(sched_group_cpus, cpu).sg;
+		*sg = &per_cpu(sched_group_cpus, cpu);
 	return cpu;
 }
 #endif /* CONFIG_SCHED_SMT */
@@ -7190,55 +7060,56 @@ cpu_to_cpu_group(int cpu, const struct cpumask *cpu_map,
  * multi-core sched-domains:
  */
 #ifdef CONFIG_SCHED_MC
-static DEFINE_PER_CPU(struct static_sched_domain, core_domains);
-static DEFINE_PER_CPU(struct static_sched_group, sched_group_core);
+static DEFINE_PER_CPU(struct sched_domain, core_domains);
+static DEFINE_PER_CPU(struct sched_group, sched_group_core);
 #endif /* CONFIG_SCHED_MC */
 
 #if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT)
 static int
-cpu_to_core_group(int cpu, const struct cpumask *cpu_map,
-		  struct sched_group **sg, struct cpumask *mask)
+cpu_to_core_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg,
+		  cpumask_t *mask)
 {
 	int group;
 
-	cpumask_and(mask, &per_cpu(cpu_sibling_map, cpu), cpu_map);
-	group = cpumask_first(mask);
+	*mask = per_cpu(cpu_sibling_map, cpu);
+	cpus_and(*mask, *mask, *cpu_map);
+	group = first_cpu(*mask);
 	if (sg)
-		*sg = &per_cpu(sched_group_core, group).sg;
+		*sg = &per_cpu(sched_group_core, group);
 	return group;
 }
 #elif defined(CONFIG_SCHED_MC)
 static int
-cpu_to_core_group(int cpu, const struct cpumask *cpu_map,
-		  struct sched_group **sg, struct cpumask *unused)
+cpu_to_core_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg,
+		  cpumask_t *unused)
 {
 	if (sg)
-		*sg = &per_cpu(sched_group_core, cpu).sg;
+		*sg = &per_cpu(sched_group_core, cpu);
 	return cpu;
 }
 #endif
 
-static DEFINE_PER_CPU(struct static_sched_domain, phys_domains);
-static DEFINE_PER_CPU(struct static_sched_group, sched_group_phys);
+static DEFINE_PER_CPU(struct sched_domain, phys_domains);
+static DEFINE_PER_CPU(struct sched_group, sched_group_phys);
 
 static int
-cpu_to_phys_group(int cpu, const struct cpumask *cpu_map,
-		  struct sched_group **sg, struct cpumask *mask)
+cpu_to_phys_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg,
+		  cpumask_t *mask)
 {
 	int group;
 #ifdef CONFIG_SCHED_MC
-	/* FIXME: Use cpu_coregroup_mask. */
 	*mask = cpu_coregroup_map(cpu);
 	cpus_and(*mask, *mask, *cpu_map);
-	group = cpumask_first(mask);
+	group = first_cpu(*mask);
 #elif defined(CONFIG_SCHED_SMT)
-	cpumask_and(mask, &per_cpu(cpu_sibling_map, cpu), cpu_map);
-	group = cpumask_first(mask);
+	*mask = per_cpu(cpu_sibling_map, cpu);
+	cpus_and(*mask, *mask, *cpu_map);
+	group = first_cpu(*mask);
 #else
 	group = cpu;
 #endif
 	if (sg)
-		*sg = &per_cpu(sched_group_phys, group).sg;
+		*sg = &per_cpu(sched_group_phys, group);
 	return group;
 }
 
@@ -7252,21 +7123,19 @@ static DEFINE_PER_CPU(struct sched_domain, node_domains);
 static struct sched_group ***sched_group_nodes_bycpu;
 
 static DEFINE_PER_CPU(struct sched_domain, allnodes_domains);
-static DEFINE_PER_CPU(struct static_sched_group, sched_group_allnodes);
+static DEFINE_PER_CPU(struct sched_group, sched_group_allnodes);
 
-static int cpu_to_allnodes_group(int cpu, const struct cpumask *cpu_map,
-				 struct sched_group **sg,
-				 struct cpumask *nodemask)
+static int cpu_to_allnodes_group(int cpu, const cpumask_t *cpu_map,
+				 struct sched_group **sg, cpumask_t *nodemask)
 {
 	int group;
-	/* FIXME: use cpumask_of_node */
-	node_to_cpumask_ptr(pnodemask, cpu_to_node(cpu));
 
-	cpumask_and(nodemask, pnodemask, cpu_map);
-	group = cpumask_first(nodemask);
+	*nodemask = node_to_cpumask(cpu_to_node(cpu));
+	cpus_and(*nodemask, *nodemask, *cpu_map);
+	group = first_cpu(*nodemask);
 
 	if (sg)
-		*sg = &per_cpu(sched_group_allnodes, group).sg;
+		*sg = &per_cpu(sched_group_allnodes, group);
 	return group;
 }
 
@@ -7278,11 +7147,11 @@ static void init_numa_sched_groups_power(struct sched_group *group_head)
 	if (!sg)
 		return;
 	do {
-		for_each_cpu(j, sched_group_cpus(sg)) {
+		for_each_cpu_mask_nr(j, sg->cpumask) {
 			struct sched_domain *sd;
 
-			sd = &per_cpu(phys_domains, j).sd;
-			if (j != cpumask_first(sched_group_cpus(sd->groups))) {
+			sd = &per_cpu(phys_domains, j);
+			if (j != first_cpu(sd->groups->cpumask)) {
 				/*
 				 * Only add "power" once for each
 				 * physical package.
@@ -7299,12 +7168,11 @@ static void init_numa_sched_groups_power(struct sched_group *group_head)
 
 #ifdef CONFIG_NUMA
 /* Free memory allocated for various sched_group structures */
-static void free_sched_groups(const struct cpumask *cpu_map,
-			      struct cpumask *nodemask)
+static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask)
 {
 	int cpu, i;
 
-	for_each_cpu(cpu, cpu_map) {
+	for_each_cpu_mask_nr(cpu, *cpu_map) {
 		struct sched_group **sched_group_nodes
 			= sched_group_nodes_bycpu[cpu];
 
@@ -7313,11 +7181,10 @@ static void free_sched_groups(const struct cpumask *cpu_map,
 
 		for (i = 0; i < nr_node_ids; i++) {
 			struct sched_group *oldsg, *sg = sched_group_nodes[i];
-			/* FIXME: Use cpumask_of_node */
-			node_to_cpumask_ptr(pnodemask, i);
 
-			cpus_and(*nodemask, *pnodemask, *cpu_map);
-			if (cpumask_empty(nodemask))
+			*nodemask = node_to_cpumask(i);
+			cpus_and(*nodemask, *nodemask, *cpu_map);
+			if (cpus_empty(*nodemask))
 				continue;
 
 			if (sg == NULL)
@@ -7335,8 +7202,7 @@ static void free_sched_groups(const struct cpumask *cpu_map,
 	}
 }
 #else /* !CONFIG_NUMA */
-static void free_sched_groups(const struct cpumask *cpu_map,
-			      struct cpumask *nodemask)
+static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask)
 {
 }
 #endif /* CONFIG_NUMA */
@@ -7362,7 +7228,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
 
 	WARN_ON(!sd || !sd->groups);
 
-	if (cpu != cpumask_first(sched_group_cpus(sd->groups)))
+	if (cpu != first_cpu(sd->groups->cpumask))
 		return;
 
 	child = sd->child;
@@ -7427,6 +7293,48 @@ SD_INIT_FUNC(CPU)
  SD_INIT_FUNC(MC)
 #endif
 
+/*
+ * To minimize stack usage kmalloc room for cpumasks and share the
+ * space as the usage in build_sched_domains() dictates.  Used only
+ * if the amount of space is significant.
+ */
+struct allmasks {
+	cpumask_t tmpmask;			/* make this one first */
+	union {
+		cpumask_t nodemask;
+		cpumask_t this_sibling_map;
+		cpumask_t this_core_map;
+	};
+	cpumask_t send_covered;
+
+#ifdef CONFIG_NUMA
+	cpumask_t domainspan;
+	cpumask_t covered;
+	cpumask_t notcovered;
+#endif
+};
+
+#if	NR_CPUS > 128
+#define SCHED_CPUMASK_DECLARE(v)	struct allmasks *v
+static inline void sched_cpumask_alloc(struct allmasks **masks)
+{
+	*masks = kmalloc(sizeof(**masks), GFP_KERNEL);
+}
+static inline void sched_cpumask_free(struct allmasks *masks)
+{
+	kfree(masks);
+}
+#else
+#define SCHED_CPUMASK_DECLARE(v)	struct allmasks _v, *v = &_v
+static inline void sched_cpumask_alloc(struct allmasks **masks)
+{ }
+static inline void sched_cpumask_free(struct allmasks *masks)
+{ }
+#endif
+
+#define	SCHED_CPUMASK_VAR(v, a) 	cpumask_t *v = (cpumask_t *) \
+			((unsigned long)(a) + offsetof(struct allmasks, v))
+
 static int default_relax_domain_level = -1;
 
 static int __init setup_relax_domain_level(char *str)
@@ -7466,38 +7374,17 @@ static void set_domain_attribute(struct sched_domain *sd,
  * Build sched domains for a given set of cpus and attach the sched domains
  * to the individual cpus
  */
-static int __build_sched_domains(const struct cpumask *cpu_map,
+static int __build_sched_domains(const cpumask_t *cpu_map,
 				 struct sched_domain_attr *attr)
 {
-	int i, err = -ENOMEM;
+	int i;
 	struct root_domain *rd;
-	cpumask_var_t nodemask, this_sibling_map, this_core_map, send_covered,
-		tmpmask;
+	SCHED_CPUMASK_DECLARE(allmasks);
+	cpumask_t *tmpmask;
 #ifdef CONFIG_NUMA
-	cpumask_var_t domainspan, covered, notcovered;
 	struct sched_group **sched_group_nodes = NULL;
 	int sd_allnodes = 0;
 
-	if (!alloc_cpumask_var(&domainspan, GFP_KERNEL))
-		goto out;
-	if (!alloc_cpumask_var(&covered, GFP_KERNEL))
-		goto free_domainspan;
-	if (!alloc_cpumask_var(&notcovered, GFP_KERNEL))
-		goto free_covered;
-#endif
-
-	if (!alloc_cpumask_var(&nodemask, GFP_KERNEL))
-		goto free_notcovered;
-	if (!alloc_cpumask_var(&this_sibling_map, GFP_KERNEL))
-		goto free_nodemask;
-	if (!alloc_cpumask_var(&this_core_map, GFP_KERNEL))
-		goto free_this_sibling_map;
-	if (!alloc_cpumask_var(&send_covered, GFP_KERNEL))
-		goto free_this_core_map;
-	if (!alloc_cpumask_var(&tmpmask, GFP_KERNEL))
-		goto free_send_covered;
-
-#ifdef CONFIG_NUMA
 	/*
 	 * Allocate the per-node list of sched groups
 	 */
@@ -7505,37 +7392,54 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
 				    GFP_KERNEL);
 	if (!sched_group_nodes) {
 		printk(KERN_WARNING "Can not alloc sched group node list\n");
-		goto free_tmpmask;
+		return -ENOMEM;
 	}
 #endif
 
 	rd = alloc_rootdomain();
 	if (!rd) {
 		printk(KERN_WARNING "Cannot alloc root domain\n");
-		goto free_sched_groups;
+#ifdef CONFIG_NUMA
+		kfree(sched_group_nodes);
+#endif
+		return -ENOMEM;
 	}
 
+	/* get space for all scratch cpumask variables */
+	sched_cpumask_alloc(&allmasks);
+	if (!allmasks) {
+		printk(KERN_WARNING "Cannot alloc cpumask array\n");
+		kfree(rd);
 #ifdef CONFIG_NUMA
-	sched_group_nodes_bycpu[cpumask_first(cpu_map)] = sched_group_nodes;
+		kfree(sched_group_nodes);
+#endif
+		return -ENOMEM;
+	}
+
+	tmpmask = (cpumask_t *)allmasks;
+
+
+#ifdef CONFIG_NUMA
+	sched_group_nodes_bycpu[first_cpu(*cpu_map)] = sched_group_nodes;
 #endif
 
 	/*
 	 * Set up domains for cpus specified by the cpu_map.
 	 */
-	for_each_cpu(i, cpu_map) {
+	for_each_cpu_mask_nr(i, *cpu_map) {
 		struct sched_domain *sd = NULL, *p;
+		SCHED_CPUMASK_VAR(nodemask, allmasks);
 
-		/* FIXME: use cpumask_of_node */
 		*nodemask = node_to_cpumask(cpu_to_node(i));
 		cpus_and(*nodemask, *nodemask, *cpu_map);
 
 #ifdef CONFIG_NUMA
-		if (cpumask_weight(cpu_map) >
-				SD_NODES_PER_DOMAIN*cpumask_weight(nodemask)) {
+		if (cpus_weight(*cpu_map) >
+				SD_NODES_PER_DOMAIN*cpus_weight(*nodemask)) {
 			sd = &per_cpu(allnodes_domains, i);
 			SD_INIT(sd, ALLNODES);
 			set_domain_attribute(sd, attr);
-			cpumask_copy(sched_domain_span(sd), cpu_map);
+			sd->span = *cpu_map;
 			cpu_to_allnodes_group(i, cpu_map, &sd->groups, tmpmask);
 			p = sd;
 			sd_allnodes = 1;
@@ -7545,19 +7449,18 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
 		sd = &per_cpu(node_domains, i);
 		SD_INIT(sd, NODE);
 		set_domain_attribute(sd, attr);
-		sched_domain_node_span(cpu_to_node(i), sched_domain_span(sd));
+		sched_domain_node_span(cpu_to_node(i), &sd->span);
 		sd->parent = p;
 		if (p)
 			p->child = sd;
-		cpumask_and(sched_domain_span(sd),
-			    sched_domain_span(sd), cpu_map);
+		cpus_and(sd->span, sd->span, *cpu_map);
 #endif
 
 		p = sd;
-		sd = &per_cpu(phys_domains, i).sd;
+		sd = &per_cpu(phys_domains, i);
 		SD_INIT(sd, CPU);
 		set_domain_attribute(sd, attr);
-		cpumask_copy(sched_domain_span(sd), nodemask);
+		sd->span = *nodemask;
 		sd->parent = p;
 		if (p)
 			p->child = sd;
@@ -7565,12 +7468,11 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
 
 #ifdef CONFIG_SCHED_MC
 		p = sd;
-		sd = &per_cpu(core_domains, i).sd;
+		sd = &per_cpu(core_domains, i);
 		SD_INIT(sd, MC);
 		set_domain_attribute(sd, attr);
-		*sched_domain_span(sd) = cpu_coregroup_map(i);
-		cpumask_and(sched_domain_span(sd),
-			    sched_domain_span(sd), cpu_map);
+		sd->span = cpu_coregroup_map(i);
+		cpus_and(sd->span, sd->span, *cpu_map);
 		sd->parent = p;
 		p->child = sd;
 		cpu_to_core_group(i, cpu_map, &sd->groups, tmpmask);
@@ -7578,11 +7480,11 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
 
 #ifdef CONFIG_SCHED_SMT
 		p = sd;
-		sd = &per_cpu(cpu_domains, i).sd;
+		sd = &per_cpu(cpu_domains, i);
 		SD_INIT(sd, SIBLING);
 		set_domain_attribute(sd, attr);
-		cpumask_and(sched_domain_span(sd),
-			    &per_cpu(cpu_sibling_map, i), cpu_map);
+		sd->span = per_cpu(cpu_sibling_map, i);
+		cpus_and(sd->span, sd->span, *cpu_map);
 		sd->parent = p;
 		p->child = sd;
 		cpu_to_cpu_group(i, cpu_map, &sd->groups, tmpmask);
@@ -7591,10 +7493,13 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
 
 #ifdef CONFIG_SCHED_SMT
 	/* Set up CPU (sibling) groups */
-	for_each_cpu(i, cpu_map) {
-		cpumask_and(this_sibling_map,
-			    &per_cpu(cpu_sibling_map, i), cpu_map);
-		if (i != cpumask_first(this_sibling_map))
+	for_each_cpu_mask_nr(i, *cpu_map) {
+		SCHED_CPUMASK_VAR(this_sibling_map, allmasks);
+		SCHED_CPUMASK_VAR(send_covered, allmasks);
+
+		*this_sibling_map = per_cpu(cpu_sibling_map, i);
+		cpus_and(*this_sibling_map, *this_sibling_map, *cpu_map);
+		if (i != first_cpu(*this_sibling_map))
 			continue;
 
 		init_sched_build_groups(this_sibling_map, cpu_map,
@@ -7605,11 +7510,13 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
 
 #ifdef CONFIG_SCHED_MC
 	/* Set up multi-core groups */
-	for_each_cpu(i, cpu_map) {
-		/* FIXME: Use cpu_coregroup_mask */
+	for_each_cpu_mask_nr(i, *cpu_map) {
+		SCHED_CPUMASK_VAR(this_core_map, allmasks);
+		SCHED_CPUMASK_VAR(send_covered, allmasks);
+
 		*this_core_map = cpu_coregroup_map(i);
 		cpus_and(*this_core_map, *this_core_map, *cpu_map);
-		if (i != cpumask_first(this_core_map))
+		if (i != first_cpu(*this_core_map))
 			continue;
 
 		init_sched_build_groups(this_core_map, cpu_map,
@@ -7620,10 +7527,12 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
 
 	/* Set up physical groups */
 	for (i = 0; i < nr_node_ids; i++) {
-		/* FIXME: Use cpumask_of_node */
+		SCHED_CPUMASK_VAR(nodemask, allmasks);
+		SCHED_CPUMASK_VAR(send_covered, allmasks);
+
 		*nodemask = node_to_cpumask(i);
 		cpus_and(*nodemask, *nodemask, *cpu_map);
-		if (cpumask_empty(nodemask))
+		if (cpus_empty(*nodemask))
 			continue;
 
 		init_sched_build_groups(nodemask, cpu_map,
@@ -7634,6 +7543,8 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
 #ifdef CONFIG_NUMA
 	/* Set up node groups */
 	if (sd_allnodes) {
+		SCHED_CPUMASK_VAR(send_covered, allmasks);
+
 		init_sched_build_groups(cpu_map, cpu_map,
 					&cpu_to_allnodes_group,
 					send_covered, tmpmask);
@@ -7642,58 +7553,58 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
 	for (i = 0; i < nr_node_ids; i++) {
 		/* Set up node groups */
 		struct sched_group *sg, *prev;
+		SCHED_CPUMASK_VAR(nodemask, allmasks);
+		SCHED_CPUMASK_VAR(domainspan, allmasks);
+		SCHED_CPUMASK_VAR(covered, allmasks);
 		int j;
 
-		/* FIXME: Use cpumask_of_node */
 		*nodemask = node_to_cpumask(i);
-		cpumask_clear(covered);
+		cpus_clear(*covered);
 
 		cpus_and(*nodemask, *nodemask, *cpu_map);
-		if (cpumask_empty(nodemask)) {
+		if (cpus_empty(*nodemask)) {
 			sched_group_nodes[i] = NULL;
 			continue;
 		}
 
 		sched_domain_node_span(i, domainspan);
-		cpumask_and(domainspan, domainspan, cpu_map);
+		cpus_and(*domainspan, *domainspan, *cpu_map);
 
-		sg = kmalloc_node(sizeof(struct sched_group) + cpumask_size(),
-				  GFP_KERNEL, i);
+		sg = kmalloc_node(sizeof(struct sched_group), GFP_KERNEL, i);
 		if (!sg) {
 			printk(KERN_WARNING "Can not alloc domain group for "
 				"node %d\n", i);
 			goto error;
 		}
 		sched_group_nodes[i] = sg;
-		for_each_cpu(j, nodemask) {
+		for_each_cpu_mask_nr(j, *nodemask) {
 			struct sched_domain *sd;
 
 			sd = &per_cpu(node_domains, j);
 			sd->groups = sg;
 		}
 		sg->__cpu_power = 0;
-		cpumask_copy(sched_group_cpus(sg), nodemask);
+		sg->cpumask = *nodemask;
 		sg->next = sg;
-		cpumask_or(covered, covered, nodemask);
+		cpus_or(*covered, *covered, *nodemask);
 		prev = sg;
 
 		for (j = 0; j < nr_node_ids; j++) {
+			SCHED_CPUMASK_VAR(notcovered, allmasks);
 			int n = (i + j) % nr_node_ids;
-			/* FIXME: Use cpumask_of_node */
 			node_to_cpumask_ptr(pnodemask, n);
 
-			cpumask_complement(notcovered, covered);
-			cpumask_and(tmpmask, notcovered, cpu_map);
-			cpumask_and(tmpmask, tmpmask, domainspan);
-			if (cpumask_empty(tmpmask))
+			cpus_complement(*notcovered, *covered);
+			cpus_and(*tmpmask, *notcovered, *cpu_map);
+			cpus_and(*tmpmask, *tmpmask, *domainspan);
+			if (cpus_empty(*tmpmask))
 				break;
 
-			cpumask_and(tmpmask, tmpmask, pnodemask);
-			if (cpumask_empty(tmpmask))
+			cpus_and(*tmpmask, *tmpmask, *pnodemask);
+			if (cpus_empty(*tmpmask))
 				continue;
 
-			sg = kmalloc_node(sizeof(struct sched_group) +
-					  cpumask_size(),
+			sg = kmalloc_node(sizeof(struct sched_group),
 					  GFP_KERNEL, i);
 			if (!sg) {
 				printk(KERN_WARNING
@@ -7701,9 +7612,9 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
 				goto error;
 			}
 			sg->__cpu_power = 0;
-			cpumask_copy(sched_group_cpus(sg), tmpmask);
+			sg->cpumask = *tmpmask;
 			sg->next = prev->next;
-			cpumask_or(covered, covered, tmpmask);
+			cpus_or(*covered, *covered, *tmpmask);
 			prev->next = sg;
 			prev = sg;
 		}
@@ -7712,22 +7623,22 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
 
 	/* Calculate CPU power for physical packages and nodes */
 #ifdef CONFIG_SCHED_SMT
-	for_each_cpu(i, cpu_map) {
-		struct sched_domain *sd = &per_cpu(cpu_domains, i).sd;
+	for_each_cpu_mask_nr(i, *cpu_map) {
+		struct sched_domain *sd = &per_cpu(cpu_domains, i);
 
 		init_sched_groups_power(i, sd);
 	}
 #endif
 #ifdef CONFIG_SCHED_MC
-	for_each_cpu(i, cpu_map) {
-		struct sched_domain *sd = &per_cpu(core_domains, i).sd;
+	for_each_cpu_mask_nr(i, *cpu_map) {
+		struct sched_domain *sd = &per_cpu(core_domains, i);
 
 		init_sched_groups_power(i, sd);
 	}
 #endif
 
-	for_each_cpu(i, cpu_map) {
-		struct sched_domain *sd = &per_cpu(phys_domains, i).sd;
+	for_each_cpu_mask_nr(i, *cpu_map) {
+		struct sched_domain *sd = &per_cpu(phys_domains, i);
 
 		init_sched_groups_power(i, sd);
 	}
@@ -7739,78 +7650,53 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
 	if (sd_allnodes) {
 		struct sched_group *sg;
 
-		cpu_to_allnodes_group(cpumask_first(cpu_map), cpu_map, &sg,
+		cpu_to_allnodes_group(first_cpu(*cpu_map), cpu_map, &sg,
 								tmpmask);
 		init_numa_sched_groups_power(sg);
 	}
 #endif
 
 	/* Attach the domains */
-	for_each_cpu(i, cpu_map) {
+	for_each_cpu_mask_nr(i, *cpu_map) {
 		struct sched_domain *sd;
 #ifdef CONFIG_SCHED_SMT
-		sd = &per_cpu(cpu_domains, i).sd;
+		sd = &per_cpu(cpu_domains, i);
 #elif defined(CONFIG_SCHED_MC)
-		sd = &per_cpu(core_domains, i).sd;
+		sd = &per_cpu(core_domains, i);
 #else
-		sd = &per_cpu(phys_domains, i).sd;
+		sd = &per_cpu(phys_domains, i);
 #endif
 		cpu_attach_domain(sd, rd, i);
 	}
 
-	err = 0;
-
-free_tmpmask:
-	free_cpumask_var(tmpmask);
-free_send_covered:
-	free_cpumask_var(send_covered);
-free_this_core_map:
-	free_cpumask_var(this_core_map);
-free_this_sibling_map:
-	free_cpumask_var(this_sibling_map);
-free_nodemask:
-	free_cpumask_var(nodemask);
-free_notcovered:
-#ifdef CONFIG_NUMA
-	free_cpumask_var(notcovered);
-free_covered:
-	free_cpumask_var(covered);
-free_domainspan:
-	free_cpumask_var(domainspan);
-out:
-#endif
-	return err;
-
-free_sched_groups:
-#ifdef CONFIG_NUMA
-	kfree(sched_group_nodes);
-#endif
-	goto free_tmpmask;
+	sched_cpumask_free(allmasks);
+	return 0;
 
 #ifdef CONFIG_NUMA
 error:
 	free_sched_groups(cpu_map, tmpmask);
-	free_rootdomain(rd);
-	goto free_tmpmask;
+	sched_cpumask_free(allmasks);
+	kfree(rd);
+	return -ENOMEM;
 #endif
 }
 
-static int build_sched_domains(const struct cpumask *cpu_map)
+static int build_sched_domains(const cpumask_t *cpu_map)
 {
 	return __build_sched_domains(cpu_map, NULL);
 }
 
-static struct cpumask *doms_cur;	/* current sched domains */
+static cpumask_t *doms_cur;	/* current sched domains */
 static int ndoms_cur;		/* number of sched domains in 'doms_cur' */
 static struct sched_domain_attr *dattr_cur;
 				/* attribues of custom domains in 'doms_cur' */
 
 /*
  * Special case: If a kmalloc of a doms_cur partition (array of
- * cpumask) fails, then fallback to a single sched domain,
- * as determined by the single cpumask fallback_doms.
+ * cpumask_t) fails, then fallback to a single sched domain,
+ * as determined by the single cpumask_t fallback_doms.
  */
-static cpumask_var_t fallback_doms;
+static cpumask_t fallback_doms;
 
 /*
  * arch_update_cpu_topology lets virtualized architectures update the
@@ -7827,16 +7713,16 @@ int __attribute__((weak)) arch_update_cpu_topology(void)
  * For now this just excludes isolated cpus, but could be used to
  * exclude other special cases in the future.
  */
-static int arch_init_sched_domains(const struct cpumask *cpu_map)
+static int arch_init_sched_domains(const cpumask_t *cpu_map)
 {
 	int err;
 
 	arch_update_cpu_topology();
 	ndoms_cur = 1;
-	doms_cur = kmalloc(cpumask_size(), GFP_KERNEL);
+	doms_cur = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
 	if (!doms_cur)
-		doms_cur = fallback_doms;
-	cpumask_andnot(doms_cur, cpu_map, cpu_isolated_map);
+		doms_cur = &fallback_doms;
+	cpus_andnot(*doms_cur, *cpu_map, cpu_isolated_map);
 	dattr_cur = NULL;
 	err = build_sched_domains(doms_cur);
 	register_sched_domain_sysctl();
@@ -7844,8 +7730,8 @@ static int arch_init_sched_domains(const struct cpumask *cpu_map)
 	return err;
 }
 
-static void arch_destroy_sched_domains(const struct cpumask *cpu_map,
-				       struct cpumask *tmpmask)
+static void arch_destroy_sched_domains(const cpumask_t *cpu_map,
+				       cpumask_t *tmpmask)
 {
 	free_sched_groups(cpu_map, tmpmask);
 }
@@ -7854,16 +7740,15 @@ static void arch_destroy_sched_domains(const struct cpumask *cpu_map,
  * Detach sched domains from a group of cpus specified in cpu_map
  * These cpus will now be attached to the NULL domain
  */
-static void detach_destroy_domains(const struct cpumask *cpu_map)
+static void detach_destroy_domains(const cpumask_t *cpu_map)
 {
-	/* Save because hotplug lock held. */
-	static DECLARE_BITMAP(tmpmask, CONFIG_NR_CPUS);
+	cpumask_t tmpmask;
 	int i;
 
-	for_each_cpu(i, cpu_map)
+	for_each_cpu_mask_nr(i, *cpu_map)
 		cpu_attach_domain(NULL, &def_root_domain, i);
 	synchronize_sched();
-	arch_destroy_sched_domains(cpu_map, to_cpumask(tmpmask));
+	arch_destroy_sched_domains(cpu_map, &tmpmask);
 }
 
 /* handle null as "default" */
@@ -7888,7 +7773,7 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur,
  * doms_new[] to the current sched domain partitioning, doms_cur[].
  * It destroys each deleted domain and builds each new domain.
  *
- * 'doms_new' is an array of cpumask's of length 'ndoms_new'.
+ * 'doms_new' is an array of cpumask_t's of length 'ndoms_new'.
  * The masks don't intersect (don't overlap.) We should setup one
  * sched domain for each mask. CPUs not in any of the cpumasks will
  * not be load balanced. If the same cpumask appears both in the
@@ -7902,14 +7787,13 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur,
  * the single partition 'fallback_doms', it also forces the domains
  * to be rebuilt.
  *
- * If doms_new == NULL it will be replaced with cpu_online_mask.
+ * If doms_new == NULL it will be replaced with cpu_online_map.
  * ndoms_new == 0 is a special case for destroying existing domains,
  * and it will not create the default domain.
  *
  * Call with hotplug lock held
  */
-/* FIXME: Change to struct cpumask *doms_new[] */
-void partition_sched_domains(int ndoms_new, struct cpumask *doms_new,
+void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
 			     struct sched_domain_attr *dattr_new)
 {
 	int i, j, n;
@@ -7928,7 +7812,7 @@ void partition_sched_domains(int ndoms_new, struct cpumask *doms_new,
 	/* Destroy deleted domains */
 	for (i = 0; i < ndoms_cur; i++) {
 		for (j = 0; j < n && !new_topology; j++) {
-			if (cpumask_equal(&doms_cur[i], &doms_new[j])
+			if (cpus_equal(doms_cur[i], doms_new[j])
 			    && dattrs_equal(dattr_cur, i, dattr_new, j))
 				goto match1;
 		}
@@ -7940,15 +7824,15 @@ void partition_sched_domains(int ndoms_new, struct cpumask *doms_new,
 
 	if (doms_new == NULL) {
 		ndoms_cur = 0;
-		doms_new = fallback_doms;
-		cpumask_andnot(&doms_new[0], cpu_online_mask, cpu_isolated_map);
+		doms_new = &fallback_doms;
+		cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map);
 		WARN_ON_ONCE(dattr_new);
 	}
 
 	/* Build new domains */
 	for (i = 0; i < ndoms_new; i++) {
 		for (j = 0; j < ndoms_cur && !new_topology; j++) {
-			if (cpumask_equal(&doms_new[i], &doms_cur[j])
+			if (cpus_equal(doms_new[i], doms_cur[j])
 			    && dattrs_equal(dattr_new, i, dattr_cur, j))
 				goto match2;
 		}
@@ -7960,7 +7844,7 @@ void partition_sched_domains(int ndoms_new, struct cpumask *doms_new,
 	}
 
 	/* Remember the new sched domains */
-	if (doms_cur != fallback_doms)
+	if (doms_cur != &fallback_doms)
 		kfree(doms_cur);
 	kfree(dattr_cur);	/* kfree(NULL) is safe */
 	doms_cur = doms_new;
@@ -7989,25 +7873,14 @@ int arch_reinit_sched_domains(void)
 static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt)
 {
 	int ret;
-	unsigned int level = 0;
 
-	if (sscanf(buf, "%u", &level) != 1)
-		return -EINVAL;
-
-	/*
-	 * level is always be positive so don't check for
-	 * level < POWERSAVINGS_BALANCE_NONE which is 0
-	 * What happens on 0 or 1 byte write,
-	 * need to check for count as well?
-	 */
-
-	if (level >= MAX_POWERSAVINGS_BALANCE_LEVELS)
+	if (buf[0] != '0' && buf[0] != '1')
 		return -EINVAL;
 
 	if (smt)
-		sched_smt_power_savings = level;
+		sched_smt_power_savings = (buf[0] == '1');
 	else
-		sched_mc_power_savings = level;
+		sched_mc_power_savings = (buf[0] == '1');
 
 	ret = arch_reinit_sched_domains();
 
@@ -8111,9 +7984,7 @@ static int update_runtime(struct notifier_block *nfb,
 
 void __init sched_init_smp(void)
 {
-	cpumask_var_t non_isolated_cpus;
-
-	alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL);
+	cpumask_t non_isolated_cpus;
 
 #if defined(CONFIG_NUMA)
 	sched_group_nodes_bycpu = kzalloc(nr_cpu_ids * sizeof(void **),
@@ -8122,10 +7993,10 @@ void __init sched_init_smp(void)
 #endif
 	get_online_cpus();
 	mutex_lock(&sched_domains_mutex);
-	arch_init_sched_domains(cpu_online_mask);
-	cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map);
-	if (cpumask_empty(non_isolated_cpus))
-		cpumask_set_cpu(smp_processor_id(), non_isolated_cpus);
+	arch_init_sched_domains(&cpu_online_map);
+	cpus_andnot(non_isolated_cpus, cpu_possible_map, cpu_isolated_map);
+	if (cpus_empty(non_isolated_cpus))
+		cpu_set(smp_processor_id(), non_isolated_cpus);
 	mutex_unlock(&sched_domains_mutex);
 	put_online_cpus();
 
@@ -8140,13 +8011,9 @@ void __init sched_init_smp(void)
 	init_hrtick();
 
 	/* Move init over to a non-isolated CPU */
-	if (set_cpus_allowed_ptr(current, non_isolated_cpus) < 0)
+	if (set_cpus_allowed_ptr(current, &non_isolated_cpus) < 0)
 		BUG();
 	sched_init_granularity();
-	free_cpumask_var(non_isolated_cpus);
-
-	alloc_cpumask_var(&fallback_doms, GFP_KERNEL);
-	init_sched_rt_class();
 }
 #else
 void __init sched_init_smp(void)
@@ -8461,15 +8328,6 @@ void __init sched_init(void)
 	 */
 	current->sched_class = &fair_sched_class;
 
-	/* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */
-	alloc_bootmem_cpumask_var(&nohz_cpu_mask);
-#ifdef CONFIG_SMP
-#ifdef CONFIG_NO_HZ
-	alloc_bootmem_cpumask_var(&nohz.cpu_mask);
-#endif
-	alloc_bootmem_cpumask_var(&cpu_isolated_map);
-#endif /* SMP */
-
 	scheduler_running = 1;
 }
 
diff --git a/trunk/kernel/sched_cpupri.c b/trunk/kernel/sched_cpupri.c
index 018b7be1db2e..52154fefab7e 100644
--- a/trunk/kernel/sched_cpupri.c
+++ b/trunk/kernel/sched_cpupri.c
@@ -67,21 +67,24 @@ static int convert_prio(int prio)
  * Returns: (int)bool - CPUs were found
  */
 int cpupri_find(struct cpupri *cp, struct task_struct *p,
-		struct cpumask *lowest_mask)
+		cpumask_t *lowest_mask)
 {
 	int                  idx      = 0;
 	int                  task_pri = convert_prio(p->prio);
 
 	for_each_cpupri_active(cp->pri_active, idx) {
 		struct cpupri_vec *vec  = &cp->pri_to_cpu[idx];
+		cpumask_t mask;
 
 		if (idx >= task_pri)
 			break;
 
-		if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids)
+		cpus_and(mask, p->cpus_allowed, vec->mask);
+
+		if (cpus_empty(mask))
 			continue;
 
-		cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask);
+		*lowest_mask = mask;
 		return 1;
 	}
 
@@ -123,7 +126,7 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
 		vec->count--;
 		if (!vec->count)
 			clear_bit(oldpri, cp->pri_active);
-		cpumask_clear_cpu(cpu, vec->mask);
+		cpu_clear(cpu, vec->mask);
 
 		spin_unlock_irqrestore(&vec->lock, flags);
 	}
@@ -133,7 +136,7 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
 
 		spin_lock_irqsave(&vec->lock, flags);
 
-		cpumask_set_cpu(cpu, vec->mask);
+		cpu_set(cpu, vec->mask);
 		vec->count++;
 		if (vec->count == 1)
 			set_bit(newpri, cp->pri_active);
@@ -147,11 +150,10 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
 /**
  * cpupri_init - initialize the cpupri structure
  * @cp: The cpupri context
- * @bootmem: true if allocations need to use bootmem
  *
- * Returns: -ENOMEM if memory fails.
+ * Returns: (void)
  */
-int cpupri_init(struct cpupri *cp, bool bootmem)
+void cpupri_init(struct cpupri *cp)
 {
 	int i;
 
@@ -162,30 +164,11 @@ int cpupri_init(struct cpupri *cp, bool bootmem)
 
 		spin_lock_init(&vec->lock);
 		vec->count = 0;
-		if (bootmem)
-			alloc_bootmem_cpumask_var(&vec->mask);
-		else if (!alloc_cpumask_var(&vec->mask, GFP_KERNEL))
-			goto cleanup;
+		cpus_clear(vec->mask);
 	}
 
 	for_each_possible_cpu(i)
 		cp->cpu_to_pri[i] = CPUPRI_INVALID;
-	return 0;
-
-cleanup:
-	for (i--; i >= 0; i--)
-		free_cpumask_var(cp->pri_to_cpu[i].mask);
-	return -ENOMEM;
 }
 
-/**
- * cpupri_cleanup - clean up the cpupri structure
- * @cp: The cpupri context
- */
-void cpupri_cleanup(struct cpupri *cp)
-{
-	int i;
 
-	for (i = 0; i < CPUPRI_NR_PRIORITIES; i++)
-		free_cpumask_var(cp->pri_to_cpu[i].mask);
-}
diff --git a/trunk/kernel/sched_cpupri.h b/trunk/kernel/sched_cpupri.h
index 642a94ef8a0a..f25811b0f931 100644
--- a/trunk/kernel/sched_cpupri.h
+++ b/trunk/kernel/sched_cpupri.h
@@ -14,7 +14,7 @@
 struct cpupri_vec {
 	spinlock_t lock;
 	int        count;
-	cpumask_var_t mask;
+	cpumask_t  mask;
 };
 
 struct cpupri {
@@ -27,8 +27,7 @@ struct cpupri {
 int  cpupri_find(struct cpupri *cp,
 		 struct task_struct *p, cpumask_t *lowest_mask);
 void cpupri_set(struct cpupri *cp, int cpu, int pri);
-int cpupri_init(struct cpupri *cp, bool bootmem);
-void cpupri_cleanup(struct cpupri *cp);
+void cpupri_init(struct cpupri *cp);
 #else
 #define cpupri_set(cp, cpu, pri) do { } while (0)
 #define cpupri_init() do { } while (0)
diff --git a/trunk/kernel/sched_fair.c b/trunk/kernel/sched_fair.c
index 56c0efe902a7..5ad4440f0fc4 100644
--- a/trunk/kernel/sched_fair.c
+++ b/trunk/kernel/sched_fair.c
@@ -1019,33 +1019,16 @@ static void yield_task_fair(struct rq *rq)
  * search starts with cpus closest then further out as needed,
  * so we always favor a closer, idle cpu.
  * Domains may include CPUs that are not usable for migration,
- * hence we need to mask them out (cpu_active_mask)
+ * hence we need to mask them out (cpu_active_map)
  *
  * Returns the CPU we should wake onto.
  */
 #if defined(ARCH_HAS_SCHED_WAKE_IDLE)
 static int wake_idle(int cpu, struct task_struct *p)
 {
+	cpumask_t tmp;
 	struct sched_domain *sd;
 	int i;
-	unsigned int chosen_wakeup_cpu;
-	int this_cpu;
-
-	/*
-	 * At POWERSAVINGS_BALANCE_WAKEUP level, if both this_cpu and prev_cpu
-	 * are idle and this is not a kernel thread and this task's affinity
-	 * allows it to be moved to preferred cpu, then just move!
-	 */
-
-	this_cpu = smp_processor_id();
-	chosen_wakeup_cpu =
-		cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu;
-
-	if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP &&
-		idle_cpu(cpu) && idle_cpu(this_cpu) &&
-		p->mm && !(p->flags & PF_KTHREAD) &&
-		cpu_isset(chosen_wakeup_cpu, p->cpus_allowed))
-		return chosen_wakeup_cpu;
 
 	/*
 	 * If it is idle, then it is the best cpu to run this task.
@@ -1063,9 +1046,10 @@ static int wake_idle(int cpu, struct task_struct *p)
 		if ((sd->flags & SD_WAKE_IDLE)
 		    || ((sd->flags & SD_WAKE_IDLE_FAR)
 			&& !task_hot(p, task_rq(p)->clock, sd))) {
-			for_each_cpu_and(i, sched_domain_span(sd),
-					 &p->cpus_allowed) {
-				if (cpu_active(i) && idle_cpu(i)) {
+			cpus_and(tmp, sd->span, p->cpus_allowed);
+			cpus_and(tmp, tmp, cpu_active_map);
+			for_each_cpu_mask_nr(i, tmp) {
+				if (idle_cpu(i)) {
 					if (i != task_cpu(p)) {
 						schedstat_inc(p,
 						       se.nr_wakeups_idle);
@@ -1258,13 +1242,13 @@ static int select_task_rq_fair(struct task_struct *p, int sync)
 	 * this_cpu and prev_cpu are present in:
 	 */
 	for_each_domain(this_cpu, sd) {
-		if (cpumask_test_cpu(prev_cpu, sched_domain_span(sd))) {
+		if (cpu_isset(prev_cpu, sd->span)) {
 			this_sd = sd;
 			break;
 		}
 	}
 
-	if (unlikely(!cpumask_test_cpu(this_cpu, &p->cpus_allowed)))
+	if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed)))
 		goto out;
 
 	/*
diff --git a/trunk/kernel/sched_rt.c b/trunk/kernel/sched_rt.c
index 833b6d44483c..51d2af3e6191 100644
--- a/trunk/kernel/sched_rt.c
+++ b/trunk/kernel/sched_rt.c
@@ -15,7 +15,7 @@ static inline void rt_set_overload(struct rq *rq)
 	if (!rq->online)
 		return;
 
-	cpumask_set_cpu(rq->cpu, rq->rd->rto_mask);
+	cpu_set(rq->cpu, rq->rd->rto_mask);
 	/*
 	 * Make sure the mask is visible before we set
 	 * the overload count. That is checked to determine
@@ -34,7 +34,7 @@ static inline void rt_clear_overload(struct rq *rq)
 
 	/* the order here really doesn't matter */
 	atomic_dec(&rq->rd->rto_count);
-	cpumask_clear_cpu(rq->cpu, rq->rd->rto_mask);
+	cpu_clear(rq->cpu, rq->rd->rto_mask);
 }
 
 static void update_rt_migration(struct rq *rq)
@@ -139,14 +139,14 @@ static int rt_se_boosted(struct sched_rt_entity *rt_se)
 }
 
 #ifdef CONFIG_SMP
-static inline const struct cpumask *sched_rt_period_mask(void)
+static inline cpumask_t sched_rt_period_mask(void)
 {
 	return cpu_rq(smp_processor_id())->rd->span;
 }
 #else
-static inline const struct cpumask *sched_rt_period_mask(void)
+static inline cpumask_t sched_rt_period_mask(void)
 {
-	return cpu_online_mask;
+	return cpu_online_map;
 }
 #endif
 
@@ -212,9 +212,9 @@ static inline int rt_rq_throttled(struct rt_rq *rt_rq)
 	return rt_rq->rt_throttled;
 }
 
-static inline const struct cpumask *sched_rt_period_mask(void)
+static inline cpumask_t sched_rt_period_mask(void)
 {
-	return cpu_online_mask;
+	return cpu_online_map;
 }
 
 static inline
@@ -241,11 +241,11 @@ static int do_balance_runtime(struct rt_rq *rt_rq)
 	int i, weight, more = 0;
 	u64 rt_period;
 
-	weight = cpumask_weight(rd->span);
+	weight = cpus_weight(rd->span);
 
 	spin_lock(&rt_b->rt_runtime_lock);
 	rt_period = ktime_to_ns(rt_b->rt_period);
-	for_each_cpu(i, rd->span) {
+	for_each_cpu_mask_nr(i, rd->span) {
 		struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
 		s64 diff;
 
@@ -324,7 +324,7 @@ static void __disable_runtime(struct rq *rq)
 		/*
 		 * Greedy reclaim, take back as much as we can.
 		 */
-		for_each_cpu(i, rd->span) {
+		for_each_cpu_mask(i, rd->span) {
 			struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
 			s64 diff;
 
@@ -429,13 +429,13 @@ static inline int balance_runtime(struct rt_rq *rt_rq)
 static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
 {
 	int i, idle = 1;
-	const struct cpumask *span;
+	cpumask_t span;
 
 	if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
 		return 1;
 
 	span = sched_rt_period_mask();
-	for_each_cpu(i, span) {
+	for_each_cpu_mask(i, span) {
 		int enqueue = 0;
 		struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);
 		struct rq *rq = rq_of_rt_rq(rt_rq);
@@ -805,20 +805,17 @@ static int select_task_rq_rt(struct task_struct *p, int sync)
 
 static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
 {
-	cpumask_var_t mask;
+	cpumask_t mask;
 
 	if (rq->curr->rt.nr_cpus_allowed == 1)
 		return;
 
-	if (!alloc_cpumask_var(&mask, GFP_ATOMIC))
-		return;
-
 	if (p->rt.nr_cpus_allowed != 1
-	    && cpupri_find(&rq->rd->cpupri, p, mask))
-		goto free;
+	    && cpupri_find(&rq->rd->cpupri, p, &mask))
+		return;
 
-	if (!cpupri_find(&rq->rd->cpupri, rq->curr, mask))
-		goto free;
+	if (!cpupri_find(&rq->rd->cpupri, rq->curr, &mask))
+		return;
 
 	/*
 	 * There appears to be other cpus that can accept
@@ -827,8 +824,6 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
 	 */
 	requeue_task_rt(rq, p, 1);
 	resched_task(rq->curr);
-free:
-	free_cpumask_var(mask);
 }
 
 #endif /* CONFIG_SMP */
@@ -919,7 +914,7 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep);
 static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
 {
 	if (!task_running(rq, p) &&
-	    (cpu < 0 || cpumask_test_cpu(cpu, &p->cpus_allowed)) &&
+	    (cpu < 0 || cpu_isset(cpu, p->cpus_allowed)) &&
 	    (p->rt.nr_cpus_allowed > 1))
 		return 1;
 	return 0;
@@ -958,7 +953,7 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu)
 	return next;
 }
 
-static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);
+static DEFINE_PER_CPU(cpumask_t, local_cpu_mask);
 
 static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask)
 {
@@ -978,7 +973,7 @@ static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask)
 static int find_lowest_rq(struct task_struct *task)
 {
 	struct sched_domain *sd;
-	struct cpumask *lowest_mask = __get_cpu_var(local_cpu_mask);
+	cpumask_t *lowest_mask = &__get_cpu_var(local_cpu_mask);
 	int this_cpu = smp_processor_id();
 	int cpu      = task_cpu(task);
 
@@ -993,7 +988,7 @@ static int find_lowest_rq(struct task_struct *task)
 	 * I guess we might want to change cpupri_find() to ignore those
 	 * in the first place.
 	 */
-	cpumask_and(lowest_mask, lowest_mask, cpu_active_mask);
+	cpus_and(*lowest_mask, *lowest_mask, cpu_active_map);
 
 	/*
 	 * At this point we have built a mask of cpus representing the
@@ -1003,7 +998,7 @@ static int find_lowest_rq(struct task_struct *task)
 	 * We prioritize the last cpu that the task executed on since
 	 * it is most likely cache-hot in that location.
 	 */
-	if (cpumask_test_cpu(cpu, lowest_mask))
+	if (cpu_isset(cpu, *lowest_mask))
 		return cpu;
 
 	/*
@@ -1018,8 +1013,7 @@ static int find_lowest_rq(struct task_struct *task)
 			cpumask_t domain_mask;
 			int       best_cpu;
 
-			cpumask_and(&domain_mask, sched_domain_span(sd),
-				    lowest_mask);
+			cpus_and(domain_mask, sd->span, *lowest_mask);
 
 			best_cpu = pick_optimal_cpu(this_cpu,
 						    &domain_mask);
@@ -1060,8 +1054,8 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
 			 * Also make sure that it wasn't scheduled on its rq.
 			 */
 			if (unlikely(task_rq(task) != rq ||
-				     !cpumask_test_cpu(lowest_rq->cpu,
-						       &task->cpus_allowed) ||
+				     !cpu_isset(lowest_rq->cpu,
+						task->cpus_allowed) ||
 				     task_running(rq, task) ||
 				     !task->se.on_rq)) {
 
@@ -1182,7 +1176,7 @@ static int pull_rt_task(struct rq *this_rq)
 
 	next = pick_next_task_rt(this_rq);
 
-	for_each_cpu(cpu, this_rq->rd->rto_mask) {
+	for_each_cpu_mask_nr(cpu, this_rq->rd->rto_mask) {
 		if (this_cpu == cpu)
 			continue;
 
@@ -1311,9 +1305,9 @@ move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
 }
 
 static void set_cpus_allowed_rt(struct task_struct *p,
-				const struct cpumask *new_mask)
+				const cpumask_t *new_mask)
 {
-	int weight = cpumask_weight(new_mask);
+	int weight = cpus_weight(*new_mask);
 
 	BUG_ON(!rt_task(p));
 
@@ -1334,7 +1328,7 @@ static void set_cpus_allowed_rt(struct task_struct *p,
 		update_rt_migration(rq);
 	}
 
-	cpumask_copy(&p->cpus_allowed, new_mask);
+	p->cpus_allowed    = *new_mask;
 	p->rt.nr_cpus_allowed = weight;
 }
 
@@ -1377,14 +1371,6 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p,
 	if (!rq->rt.rt_nr_running)
 		pull_rt_task(rq);
 }
-
-static inline void init_sched_rt_class(void)
-{
-	unsigned int i;
-
-	for_each_possible_cpu(i)
-		alloc_cpumask_var(&per_cpu(local_cpu_mask, i), GFP_KERNEL);
-}
 #endif /* CONFIG_SMP */
 
 /*
@@ -1555,4 +1541,3 @@ static void print_rt_stats(struct seq_file *m, int cpu)
 	rcu_read_unlock();
 }
 #endif /* CONFIG_SCHED_DEBUG */
-
diff --git a/trunk/kernel/sched_stats.h b/trunk/kernel/sched_stats.h
index f2773b5d1226..3b01098164c8 100644
--- a/trunk/kernel/sched_stats.h
+++ b/trunk/kernel/sched_stats.h
@@ -42,8 +42,7 @@ static int show_schedstat(struct seq_file *seq, void *v)
 		for_each_domain(cpu, sd) {
 			enum cpu_idle_type itype;
 
-			cpumask_scnprintf(mask_str, mask_len,
-					  sched_domain_span(sd));
+			cpumask_scnprintf(mask_str, mask_len, sd->span);
 			seq_printf(seq, "domain%d %s", dcount++, mask_str);
 			for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES;
 					itype++) {
diff --git a/trunk/kernel/taskstats.c b/trunk/kernel/taskstats.c
index 6d7dc4ec4aa5..bd6be76303cf 100644
--- a/trunk/kernel/taskstats.c
+++ b/trunk/kernel/taskstats.c
@@ -352,7 +352,7 @@ static int parse(struct nlattr *na, cpumask_t *mask)
 	if (!data)
 		return -ENOMEM;
 	nla_strlcpy(data, na, len);
-	ret = cpulist_parse(data, mask);
+	ret = cpulist_parse(data, *mask);
 	kfree(data);
 	return ret;
 }
diff --git a/trunk/kernel/time/clockevents.c b/trunk/kernel/time/clockevents.c
index ea2f48af83cf..f8d968063cea 100644
--- a/trunk/kernel/time/clockevents.c
+++ b/trunk/kernel/time/clockevents.c
@@ -166,8 +166,6 @@ static void clockevents_notify_released(void)
 void clockevents_register_device(struct clock_event_device *dev)
 {
 	BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
-	BUG_ON(!dev->cpumask);
-
 	/*
 	 * A nsec2cyc multiplicator of 0 is invalid and we'd crash
 	 * on it, so fix it up and emit a warning:
diff --git a/trunk/kernel/time/tick-broadcast.c b/trunk/kernel/time/tick-broadcast.c
index 9590af2327be..f98a1b7b16e9 100644
--- a/trunk/kernel/time/tick-broadcast.c
+++ b/trunk/kernel/time/tick-broadcast.c
@@ -150,7 +150,7 @@ static void tick_do_broadcast(cpumask_t mask)
 		 */
 		cpu = first_cpu(mask);
 		td = &per_cpu(tick_cpu_device, cpu);
-		td->evtdev->broadcast(&mask);
+		td->evtdev->broadcast(mask);
 	}
 }
 
diff --git a/trunk/kernel/time/tick-common.c b/trunk/kernel/time/tick-common.c
index f8372be74122..df12434b43ca 100644
--- a/trunk/kernel/time/tick-common.c
+++ b/trunk/kernel/time/tick-common.c
@@ -136,7 +136,7 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
  */
 static void tick_setup_device(struct tick_device *td,
 			      struct clock_event_device *newdev, int cpu,
-			      const struct cpumask *cpumask)
+			      const cpumask_t *cpumask)
 {
 	ktime_t next_event;
 	void (*handler)(struct clock_event_device *) = NULL;
@@ -171,8 +171,8 @@ static void tick_setup_device(struct tick_device *td,
 	 * When the device is not per cpu, pin the interrupt to the
 	 * current cpu:
 	 */
-	if (!cpumask_equal(newdev->cpumask, cpumask))
-		irq_set_affinity(newdev->irq, cpumask);
+	if (!cpus_equal(newdev->cpumask, *cpumask))
+		irq_set_affinity(newdev->irq, *cpumask);
 
 	/*
 	 * When global broadcasting is active, check if the current
@@ -202,14 +202,14 @@ static int tick_check_new_device(struct clock_event_device *newdev)
 	spin_lock_irqsave(&tick_device_lock, flags);
 
 	cpu = smp_processor_id();
-	if (!cpumask_test_cpu(cpu, newdev->cpumask))
+	if (!cpu_isset(cpu, newdev->cpumask))
 		goto out_bc;
 
 	td = &per_cpu(tick_cpu_device, cpu);
 	curdev = td->evtdev;
 
 	/* cpu local device ? */
-	if (!cpumask_equal(newdev->cpumask, cpumask_of(cpu))) {
+	if (!cpus_equal(newdev->cpumask, cpumask_of_cpu(cpu))) {
 
 		/*
 		 * If the cpu affinity of the device interrupt can not
@@ -222,7 +222,7 @@ static int tick_check_new_device(struct clock_event_device *newdev)
 		 * If we have a cpu local device already, do not replace it
 		 * by a non cpu local device
 		 */
-		if (curdev && cpumask_equal(curdev->cpumask, cpumask_of(cpu)))
+		if (curdev && cpus_equal(curdev->cpumask, cpumask_of_cpu(cpu)))
 			goto out_bc;
 	}
 
diff --git a/trunk/kernel/time/tick-sched.c b/trunk/kernel/time/tick-sched.c
index 76a574bbef97..8f3fc2582d38 100644
--- a/trunk/kernel/time/tick-sched.c
+++ b/trunk/kernel/time/tick-sched.c
@@ -144,7 +144,7 @@ void tick_nohz_update_jiffies(void)
 	if (!ts->tick_stopped)
 		return;
 
-	cpumask_clear_cpu(cpu, nohz_cpu_mask);
+	cpu_clear(cpu, nohz_cpu_mask);
 	now = ktime_get();
 	ts->idle_waketime = now;
 
@@ -301,7 +301,7 @@ void tick_nohz_stop_sched_tick(int inidle)
 			tick_do_timer_cpu = TICK_DO_TIMER_NONE;
 
 		if (delta_jiffies > 1)
-			cpumask_set_cpu(cpu, nohz_cpu_mask);
+			cpu_set(cpu, nohz_cpu_mask);
 
 		/* Skip reprogram of event if its not changed */
 		if (ts->tick_stopped && ktime_equal(expires, dev->next_event))
@@ -319,7 +319,7 @@ void tick_nohz_stop_sched_tick(int inidle)
 				/*
 				 * sched tick not stopped!
 				 */
-				cpumask_clear_cpu(cpu, nohz_cpu_mask);
+				cpu_clear(cpu, nohz_cpu_mask);
 				goto out;
 			}
 
@@ -361,7 +361,7 @@ void tick_nohz_stop_sched_tick(int inidle)
 		 * softirq.
 		 */
 		tick_do_update_jiffies64(ktime_get());
-		cpumask_clear_cpu(cpu, nohz_cpu_mask);
+		cpu_clear(cpu, nohz_cpu_mask);
 	}
 	raise_softirq_irqoff(TIMER_SOFTIRQ);
 out:
@@ -439,7 +439,7 @@ void tick_nohz_restart_sched_tick(void)
 	select_nohz_load_balancer(0);
 	now = ktime_get();
 	tick_do_update_jiffies64(now);
-	cpumask_clear_cpu(cpu, nohz_cpu_mask);
+	cpu_clear(cpu, nohz_cpu_mask);
 
 	/*
 	 * We stopped the tick in idle. Update process times would miss the
diff --git a/trunk/kernel/trace/trace.c b/trunk/kernel/trace/trace.c
index 0e91f43b6baf..4185d5221633 100644
--- a/trunk/kernel/trace/trace.c
+++ b/trunk/kernel/trace/trace.c
@@ -2674,7 +2674,7 @@ tracing_cpumask_read(struct file *filp, char __user *ubuf,
 
 	mutex_lock(&tracing_cpumask_update_lock);
 
-	len = cpumask_scnprintf(mask_str, count, &tracing_cpumask);
+	len = cpumask_scnprintf(mask_str, count, tracing_cpumask);
 	if (count - len < 2) {
 		count = -EINVAL;
 		goto out_err;
@@ -2695,7 +2695,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
 	int err, cpu;
 
 	mutex_lock(&tracing_cpumask_update_lock);
-	err = cpumask_parse_user(ubuf, count, &tracing_cpumask_new);
+	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
 	if (err)
 		goto err_unlock;
 
diff --git a/trunk/lib/Kconfig b/trunk/lib/Kconfig
index 2ba43c4a5b07..fd4118e097f0 100644
--- a/trunk/lib/Kconfig
+++ b/trunk/lib/Kconfig
@@ -159,11 +159,4 @@ config CHECK_SIGNATURE
 config HAVE_LMB
 	boolean
 
-config CPUMASK_OFFSTACK
-	bool "Force CPU masks off stack" if DEBUG_PER_CPU_MAPS
-	help
-	  Use dynamic allocation for cpumask_var_t, instead of putting
-	  them on the stack.  This is a bit more expensive, but avoids
-	  stack overflow.
-
 endmenu
diff --git a/trunk/mm/slub.c b/trunk/mm/slub.c
index 0d861c3154b6..6cb7ad107852 100644
--- a/trunk/mm/slub.c
+++ b/trunk/mm/slub.c
@@ -3642,7 +3642,7 @@ static int list_locations(struct kmem_cache *s, char *buf,
 				len < PAGE_SIZE - 60) {
 			len += sprintf(buf + len, " cpus=");
 			len += cpulist_scnprintf(buf + len, PAGE_SIZE - len - 50,
-					&l->cpus);
+					l->cpus);
 		}
 
 		if (num_online_nodes() > 1 && !nodes_empty(l->nodes) &&
diff --git a/trunk/virt/kvm/ioapic.c b/trunk/virt/kvm/ioapic.c
index 23b81cf242af..53772bb46320 100644
--- a/trunk/virt/kvm/ioapic.c
+++ b/trunk/virt/kvm/ioapic.c
@@ -150,11 +150,10 @@ static int ioapic_inj_irq(struct kvm_ioapic *ioapic,
 static void ioapic_inj_nmi(struct kvm_vcpu *vcpu)
 {
 	kvm_inject_nmi(vcpu);
-	kvm_vcpu_kick(vcpu);
 }
 
-u32 kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
-				    u8 dest_mode)
+static u32 ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
+				       u8 dest_mode)
 {
 	u32 mask = 0;
 	int i;
@@ -208,8 +207,7 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
 		     "vector=%x trig_mode=%x\n",
 		     dest, dest_mode, delivery_mode, vector, trig_mode);
 
-	deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic, dest,
-							  dest_mode);
+	deliver_bitmask = ioapic_get_delivery_bitmask(ioapic, dest, dest_mode);
 	if (!deliver_bitmask) {
 		ioapic_debug("no target on destination\n");
 		return 0;
diff --git a/trunk/virt/kvm/ioapic.h b/trunk/virt/kvm/ioapic.h
index 49c9581d2586..cd7ae7691c9d 100644
--- a/trunk/virt/kvm/ioapic.h
+++ b/trunk/virt/kvm/ioapic.h
@@ -85,7 +85,5 @@ void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode);
 int kvm_ioapic_init(struct kvm *kvm);
 void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
 void kvm_ioapic_reset(struct kvm_ioapic *ioapic);
-u32 kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
-				u8 dest_mode);
 
 #endif
diff --git a/trunk/virt/kvm/irq_comm.c b/trunk/virt/kvm/irq_comm.c
index aa5d1e5c497e..55ad76ee2d09 100644
--- a/trunk/virt/kvm/irq_comm.c
+++ b/trunk/virt/kvm/irq_comm.c
@@ -61,9 +61,10 @@ void kvm_register_irq_ack_notifier(struct kvm *kvm,
 	hlist_add_head(&kian->link, &kvm->arch.irq_ack_notifier_list);
 }
 
-void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian)
+void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
+				     struct kvm_irq_ack_notifier *kian)
 {
-	hlist_del_init(&kian->link);
+	hlist_del(&kian->link);
 }
 
 /* The caller must hold kvm->lock mutex */
@@ -72,15 +73,11 @@ int kvm_request_irq_source_id(struct kvm *kvm)
 	unsigned long *bitmap = &kvm->arch.irq_sources_bitmap;
 	int irq_source_id = find_first_zero_bit(bitmap,
 				sizeof(kvm->arch.irq_sources_bitmap));
-
 	if (irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) {
 		printk(KERN_WARNING "kvm: exhaust allocatable IRQ sources!\n");
-		return -EFAULT;
-	}
-
-	ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
-	set_bit(irq_source_id, bitmap);
-
+		irq_source_id = -EFAULT;
+	} else
+		set_bit(irq_source_id, bitmap);
 	return irq_source_id;
 }
 
@@ -88,9 +85,7 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
 {
 	int i;
 
-	ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
-
-	if (irq_source_id < 0 ||
+	if (irq_source_id <= 0 ||
 	    irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) {
 		printk(KERN_ERR "kvm: IRQ source ID out of range!\n");
 		return;
diff --git a/trunk/virt/kvm/kvm_main.c b/trunk/virt/kvm/kvm_main.c
index fc6127cbea1f..a87f45edfae8 100644
--- a/trunk/virt/kvm/kvm_main.c
+++ b/trunk/virt/kvm/kvm_main.c
@@ -47,10 +47,6 @@
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 
-#ifdef CONFIG_X86
-#include <asm/msidef.h>
-#endif
-
 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
 #include "coalesced_mmio.h"
 #endif
@@ -64,13 +60,10 @@
 MODULE_AUTHOR("Qumranet");
 MODULE_LICENSE("GPL");
 
-static int msi2intx = 1;
-module_param(msi2intx, bool, 0);
-
 DEFINE_SPINLOCK(kvm_lock);
 LIST_HEAD(vm_list);
 
-static cpumask_var_t cpus_hardware_enabled;
+static cpumask_t cpus_hardware_enabled;
 
 struct kmem_cache *kvm_vcpu_cache;
 EXPORT_SYMBOL_GPL(kvm_vcpu_cache);
@@ -82,60 +75,9 @@ struct dentry *kvm_debugfs_dir;
 static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
 			   unsigned long arg);
 
-static bool kvm_rebooting;
+bool kvm_rebooting;
 
 #ifdef KVM_CAP_DEVICE_ASSIGNMENT
-
-#ifdef CONFIG_X86
-static void assigned_device_msi_dispatch(struct kvm_assigned_dev_kernel *dev)
-{
-	int vcpu_id;
-	struct kvm_vcpu *vcpu;
-	struct kvm_ioapic *ioapic = ioapic_irqchip(dev->kvm);
-	int dest_id = (dev->guest_msi.address_lo & MSI_ADDR_DEST_ID_MASK)
-			>> MSI_ADDR_DEST_ID_SHIFT;
-	int vector = (dev->guest_msi.data & MSI_DATA_VECTOR_MASK)
-			>> MSI_DATA_VECTOR_SHIFT;
-	int dest_mode = test_bit(MSI_ADDR_DEST_MODE_SHIFT,
-				(unsigned long *)&dev->guest_msi.address_lo);
-	int trig_mode = test_bit(MSI_DATA_TRIGGER_SHIFT,
-				(unsigned long *)&dev->guest_msi.data);
-	int delivery_mode = test_bit(MSI_DATA_DELIVERY_MODE_SHIFT,
-				(unsigned long *)&dev->guest_msi.data);
-	u32 deliver_bitmask;
-
-	BUG_ON(!ioapic);
-
-	deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic,
-				dest_id, dest_mode);
-	/* IOAPIC delivery mode value is the same as MSI here */
-	switch (delivery_mode) {
-	case IOAPIC_LOWEST_PRIORITY:
-		vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm, vector,
-				deliver_bitmask);
-		if (vcpu != NULL)
-			kvm_apic_set_irq(vcpu, vector, trig_mode);
-		else
-			printk(KERN_INFO "kvm: null lowest priority vcpu!\n");
-		break;
-	case IOAPIC_FIXED:
-		for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
-			if (!(deliver_bitmask & (1 << vcpu_id)))
-				continue;
-			deliver_bitmask &= ~(1 << vcpu_id);
-			vcpu = ioapic->kvm->vcpus[vcpu_id];
-			if (vcpu)
-				kvm_apic_set_irq(vcpu, vector, trig_mode);
-		}
-		break;
-	default:
-		printk(KERN_INFO "kvm: unsupported MSI delivery mode\n");
-	}
-}
-#else
-static void assigned_device_msi_dispatch(struct kvm_assigned_dev_kernel *dev) {}
-#endif
-
 static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
 						      int assigned_dev_id)
 {
@@ -162,16 +104,9 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
 	 * finer-grained lock, update this
 	 */
 	mutex_lock(&assigned_dev->kvm->lock);
-	if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_INTX)
-		kvm_set_irq(assigned_dev->kvm,
-			    assigned_dev->irq_source_id,
-			    assigned_dev->guest_irq, 1);
-	else if (assigned_dev->irq_requested_type &
-				KVM_ASSIGNED_DEV_GUEST_MSI) {
-		assigned_device_msi_dispatch(assigned_dev);
-		enable_irq(assigned_dev->host_irq);
-		assigned_dev->host_irq_disabled = false;
-	}
+	kvm_set_irq(assigned_dev->kvm,
+		    assigned_dev->irq_source_id,
+		    assigned_dev->guest_irq, 1);
 	mutex_unlock(&assigned_dev->kvm->lock);
 	kvm_put_kvm(assigned_dev->kvm);
 }
@@ -182,12 +117,8 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
 		(struct kvm_assigned_dev_kernel *) dev_id;
 
 	kvm_get_kvm(assigned_dev->kvm);
-
 	schedule_work(&assigned_dev->interrupt_work);
-
 	disable_irq_nosync(irq);
-	assigned_dev->host_irq_disabled = true;
-
 	return IRQ_HANDLED;
 }
 
@@ -201,32 +132,19 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
 
 	dev = container_of(kian, struct kvm_assigned_dev_kernel,
 			   ack_notifier);
-
 	kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0);
-
-	/* The guest irq may be shared so this ack may be
-	 * from another device.
-	 */
-	if (dev->host_irq_disabled) {
-		enable_irq(dev->host_irq);
-		dev->host_irq_disabled = false;
-	}
+	enable_irq(dev->host_irq);
 }
 
-static void kvm_free_assigned_irq(struct kvm *kvm,
-				  struct kvm_assigned_dev_kernel *assigned_dev)
+static void kvm_free_assigned_device(struct kvm *kvm,
+				     struct kvm_assigned_dev_kernel
+				     *assigned_dev)
 {
-	if (!irqchip_in_kernel(kvm))
-		return;
-
-	kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier);
+	if (irqchip_in_kernel(kvm) && assigned_dev->irq_requested)
+		free_irq(assigned_dev->host_irq, (void *)assigned_dev);
 
-	if (assigned_dev->irq_source_id != -1)
-		kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
-	assigned_dev->irq_source_id = -1;
-
-	if (!assigned_dev->irq_requested_type)
-		return;
+	kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier);
+	kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
 
 	if (cancel_work_sync(&assigned_dev->interrupt_work))
 		/* We had pending work. That means we will have to take
@@ -234,23 +152,6 @@ static void kvm_free_assigned_irq(struct kvm *kvm,
 		 */
 		kvm_put_kvm(kvm);
 
-	free_irq(assigned_dev->host_irq, (void *)assigned_dev);
-
-	if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI)
-		pci_disable_msi(assigned_dev->dev);
-
-	assigned_dev->irq_requested_type = 0;
-}
-
-
-static void kvm_free_assigned_device(struct kvm *kvm,
-				     struct kvm_assigned_dev_kernel
-				     *assigned_dev)
-{
-	kvm_free_assigned_irq(kvm, assigned_dev);
-
-	pci_reset_function(assigned_dev->dev);
-
 	pci_release_regions(assigned_dev->dev);
 	pci_disable_device(assigned_dev->dev);
 	pci_dev_put(assigned_dev->dev);
@@ -273,95 +174,6 @@ void kvm_free_all_assigned_devices(struct kvm *kvm)
 	}
 }
 
-static int assigned_device_update_intx(struct kvm *kvm,
-			struct kvm_assigned_dev_kernel *adev,
-			struct kvm_assigned_irq *airq)
-{
-	adev->guest_irq = airq->guest_irq;
-	adev->ack_notifier.gsi = airq->guest_irq;
-
-	if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_INTX)
-		return 0;
-
-	if (irqchip_in_kernel(kvm)) {
-		if (!msi2intx &&
-		    adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) {
-			free_irq(adev->host_irq, (void *)kvm);
-			pci_disable_msi(adev->dev);
-		}
-
-		if (!capable(CAP_SYS_RAWIO))
-			return -EPERM;
-
-		if (airq->host_irq)
-			adev->host_irq = airq->host_irq;
-		else
-			adev->host_irq = adev->dev->irq;
-
-		/* Even though this is PCI, we don't want to use shared
-		 * interrupts. Sharing host devices with guest-assigned devices
-		 * on the same interrupt line is not a happy situation: there
-		 * are going to be long delays in accepting, acking, etc.
-		 */
-		if (request_irq(adev->host_irq, kvm_assigned_dev_intr,
-				0, "kvm_assigned_intx_device", (void *)adev))
-			return -EIO;
-	}
-
-	adev->irq_requested_type = KVM_ASSIGNED_DEV_GUEST_INTX |
-				   KVM_ASSIGNED_DEV_HOST_INTX;
-	return 0;
-}
-
-#ifdef CONFIG_X86
-static int assigned_device_update_msi(struct kvm *kvm,
-			struct kvm_assigned_dev_kernel *adev,
-			struct kvm_assigned_irq *airq)
-{
-	int r;
-
-	if (airq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSI) {
-		/* x86 don't care upper address of guest msi message addr */
-		adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_MSI;
-		adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_INTX;
-		adev->guest_msi.address_lo = airq->guest_msi.addr_lo;
-		adev->guest_msi.data = airq->guest_msi.data;
-		adev->ack_notifier.gsi = -1;
-	} else if (msi2intx) {
-		adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_INTX;
-		adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_MSI;
-		adev->guest_irq = airq->guest_irq;
-		adev->ack_notifier.gsi = airq->guest_irq;
-	}
-
-	if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI)
-		return 0;
-
-	if (irqchip_in_kernel(kvm)) {
-		if (!msi2intx) {
-			if (adev->irq_requested_type &
-					KVM_ASSIGNED_DEV_HOST_INTX)
-				free_irq(adev->host_irq, (void *)adev);
-
-			r = pci_enable_msi(adev->dev);
-			if (r)
-				return r;
-		}
-
-		adev->host_irq = adev->dev->irq;
-		if (request_irq(adev->host_irq, kvm_assigned_dev_intr, 0,
-				"kvm_assigned_msi_device", (void *)adev))
-			return -EIO;
-	}
-
-	if (!msi2intx)
-		adev->irq_requested_type = KVM_ASSIGNED_DEV_GUEST_MSI;
-
-	adev->irq_requested_type |= KVM_ASSIGNED_DEV_HOST_MSI;
-	return 0;
-}
-#endif
-
 static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
 				   struct kvm_assigned_irq
 				   *assigned_irq)
@@ -378,68 +190,49 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
 		return -EINVAL;
 	}
 
-	if (!match->irq_requested_type) {
-		INIT_WORK(&match->interrupt_work,
-				kvm_assigned_dev_interrupt_work_handler);
-		if (irqchip_in_kernel(kvm)) {
-			/* Register ack nofitier */
-			match->ack_notifier.gsi = -1;
-			match->ack_notifier.irq_acked =
-					kvm_assigned_dev_ack_irq;
-			kvm_register_irq_ack_notifier(kvm,
-					&match->ack_notifier);
-
-			/* Request IRQ source ID */
-			r = kvm_request_irq_source_id(kvm);
-			if (r < 0)
-				goto out_release;
-			else
-				match->irq_source_id = r;
-
-#ifdef CONFIG_X86
-			/* Determine host device irq type, we can know the
-			 * result from dev->msi_enabled */
-			if (msi2intx)
-				pci_enable_msi(match->dev);
-#endif
-		}
+	if (match->irq_requested) {
+		match->guest_irq = assigned_irq->guest_irq;
+		match->ack_notifier.gsi = assigned_irq->guest_irq;
+		mutex_unlock(&kvm->lock);
+		return 0;
 	}
 
-	if ((!msi2intx &&
-	     (assigned_irq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSI)) ||
-	    (msi2intx && match->dev->msi_enabled)) {
-#ifdef CONFIG_X86
-		r = assigned_device_update_msi(kvm, match, assigned_irq);
-		if (r) {
-			printk(KERN_WARNING "kvm: failed to enable "
-					"MSI device!\n");
+	INIT_WORK(&match->interrupt_work,
+		  kvm_assigned_dev_interrupt_work_handler);
+
+	if (irqchip_in_kernel(kvm)) {
+		if (!capable(CAP_SYS_RAWIO)) {
+			r = -EPERM;
 			goto out_release;
 		}
-#else
-		r = -ENOTTY;
-#endif
-	} else if (assigned_irq->host_irq == 0 && match->dev->irq == 0) {
-		/* Host device IRQ 0 means don't support INTx */
-		if (!msi2intx) {
-			printk(KERN_WARNING
-			       "kvm: wait device to enable MSI!\n");
-			r = 0;
-		} else {
-			printk(KERN_WARNING
-			       "kvm: failed to enable MSI device!\n");
-			r = -ENOTTY;
+
+		if (assigned_irq->host_irq)
+			match->host_irq = assigned_irq->host_irq;
+		else
+			match->host_irq = match->dev->irq;
+		match->guest_irq = assigned_irq->guest_irq;
+		match->ack_notifier.gsi = assigned_irq->guest_irq;
+		match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
+		kvm_register_irq_ack_notifier(kvm, &match->ack_notifier);
+		r = kvm_request_irq_source_id(kvm);
+		if (r < 0)
 			goto out_release;
-		}
-	} else {
-		/* Non-sharing INTx mode */
-		r = assigned_device_update_intx(kvm, match, assigned_irq);
-		if (r) {
-			printk(KERN_WARNING "kvm: failed to enable "
-					"INTx device!\n");
+		else
+			match->irq_source_id = r;
+
+		/* Even though this is PCI, we don't want to use shared
+		 * interrupts. Sharing host devices with guest-assigned devices
+		 * on the same interrupt line is not a happy situation: there
+		 * are going to be long delays in accepting, acking, etc.
+		 */
+		if (request_irq(match->host_irq, kvm_assigned_dev_intr, 0,
+				"kvm_assigned_device", (void *)match)) {
+			r = -EIO;
 			goto out_release;
 		}
 	}
 
+	match->irq_requested = true;
 	mutex_unlock(&kvm->lock);
 	return r;
 out_release:
@@ -490,14 +283,11 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
 		       __func__);
 		goto out_disable;
 	}
-
-	pci_reset_function(dev);
-
 	match->assigned_dev_id = assigned_dev->assigned_dev_id;
 	match->host_busnr = assigned_dev->busnr;
 	match->host_devfn = assigned_dev->devfn;
 	match->dev = dev;
-	match->irq_source_id = -1;
+
 	match->kvm = kvm;
 
 	list_add(&match->list, &kvm->arch.assigned_dev_head);
@@ -565,49 +355,58 @@ static void ack_flush(void *_completed)
 {
 }
 
-static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
+void kvm_flush_remote_tlbs(struct kvm *kvm)
 {
 	int i, cpu, me;
-	cpumask_var_t cpus;
-	bool called = true;
+	cpumask_t cpus;
 	struct kvm_vcpu *vcpu;
 
-	if (alloc_cpumask_var(&cpus, GFP_ATOMIC))
-		cpumask_clear(cpus);
-
 	me = get_cpu();
+	cpus_clear(cpus);
 	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
 		vcpu = kvm->vcpus[i];
 		if (!vcpu)
 			continue;
-		if (test_and_set_bit(req, &vcpu->requests))
+		if (test_and_set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
 			continue;
 		cpu = vcpu->cpu;
-		if (cpus != NULL && cpu != -1 && cpu != me)
-			cpumask_set_cpu(cpu, cpus);
+		if (cpu != -1 && cpu != me)
+			cpu_set(cpu, cpus);
 	}
-	if (unlikely(cpus == NULL))
-		smp_call_function_many(cpu_online_mask, ack_flush, NULL, 1);
-	else if (!cpumask_empty(cpus))
-		smp_call_function_many(cpus, ack_flush, NULL, 1);
-	else
-		called = false;
+	if (cpus_empty(cpus))
+		goto out;
+	++kvm->stat.remote_tlb_flush;
+	smp_call_function_mask(cpus, ack_flush, NULL, 1);
+out:
 	put_cpu();
-	free_cpumask_var(cpus);
-	return called;
-}
-
-void kvm_flush_remote_tlbs(struct kvm *kvm)
-{
-	if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
-		++kvm->stat.remote_tlb_flush;
 }
 
 void kvm_reload_remote_mmus(struct kvm *kvm)
 {
-	make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
+	int i, cpu, me;
+	cpumask_t cpus;
+	struct kvm_vcpu *vcpu;
+
+	me = get_cpu();
+	cpus_clear(cpus);
+	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+		vcpu = kvm->vcpus[i];
+		if (!vcpu)
+			continue;
+		if (test_and_set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
+			continue;
+		cpu = vcpu->cpu;
+		if (cpu != -1 && cpu != me)
+			cpu_set(cpu, cpus);
+	}
+	if (cpus_empty(cpus))
+		goto out;
+	smp_call_function_mask(cpus, ack_flush, NULL, 1);
+out:
+	put_cpu();
 }
 
+
 int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
 {
 	struct page *page;
@@ -911,8 +710,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
 		goto out;
 	if (mem->guest_phys_addr & (PAGE_SIZE - 1))
 		goto out;
-	if (user_alloc && (mem->userspace_addr & (PAGE_SIZE - 1)))
-		goto out;
 	if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS)
 		goto out;
 	if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
@@ -1024,10 +821,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
 		goto out_free;
 	}
 
-	kvm_free_physmem_slot(&old, npages ? &new : NULL);
-	/* Slot deletion case: we have to update the current slot */
-	if (!npages)
-		*memslot = old;
+	kvm_free_physmem_slot(&old, &new);
 #ifdef CONFIG_DMAR
 	/* map the pages in iommu page table */
 	r = kvm_iommu_map_pages(kvm, base_gfn, npages);
@@ -1124,7 +918,7 @@ int kvm_is_error_hva(unsigned long addr)
 }
 EXPORT_SYMBOL_GPL(kvm_is_error_hva);
 
-struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn)
+static struct kvm_memory_slot *__gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
 {
 	int i;
 
@@ -1137,12 +931,11 @@ struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn)
 	}
 	return NULL;
 }
-EXPORT_SYMBOL_GPL(gfn_to_memslot_unaliased);
 
 struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
 {
 	gfn = unalias_gfn(kvm, gfn);
-	return gfn_to_memslot_unaliased(kvm, gfn);
+	return __gfn_to_memslot(kvm, gfn);
 }
 
 int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
@@ -1166,7 +959,7 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
 	struct kvm_memory_slot *slot;
 
 	gfn = unalias_gfn(kvm, gfn);
-	slot = gfn_to_memslot_unaliased(kvm, gfn);
+	slot = __gfn_to_memslot(kvm, gfn);
 	if (!slot)
 		return bad_hva();
 	return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE);
@@ -1417,7 +1210,7 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
 	struct kvm_memory_slot *memslot;
 
 	gfn = unalias_gfn(kvm, gfn);
-	memslot = gfn_to_memslot_unaliased(kvm, gfn);
+	memslot = __gfn_to_memslot(kvm, gfn);
 	if (memslot && memslot->dirty_bitmap) {
 		unsigned long rel_gfn = gfn - memslot->base_gfn;
 
@@ -1502,7 +1295,7 @@ static int kvm_vcpu_release(struct inode *inode, struct file *filp)
 	return 0;
 }
 
-static struct file_operations kvm_vcpu_fops = {
+static const struct file_operations kvm_vcpu_fops = {
 	.release        = kvm_vcpu_release,
 	.unlocked_ioctl = kvm_vcpu_ioctl,
 	.compat_ioctl   = kvm_vcpu_ioctl,
@@ -1896,7 +1689,7 @@ static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma)
 	return 0;
 }
 
-static struct file_operations kvm_vm_fops = {
+static const struct file_operations kvm_vm_fops = {
 	.release        = kvm_vm_release,
 	.unlocked_ioctl = kvm_vm_ioctl,
 	.compat_ioctl   = kvm_vm_ioctl,
@@ -1918,18 +1711,6 @@ static int kvm_dev_ioctl_create_vm(void)
 	return fd;
 }
 
-static long kvm_dev_ioctl_check_extension_generic(long arg)
-{
-	switch (arg) {
-	case KVM_CAP_USER_MEMORY:
-	case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
-		return 1;
-	default:
-		break;
-	}
-	return kvm_dev_ioctl_check_extension(arg);
-}
-
 static long kvm_dev_ioctl(struct file *filp,
 			  unsigned int ioctl, unsigned long arg)
 {
@@ -1949,7 +1730,7 @@ static long kvm_dev_ioctl(struct file *filp,
 		r = kvm_dev_ioctl_create_vm();
 		break;
 	case KVM_CHECK_EXTENSION:
-		r = kvm_dev_ioctl_check_extension_generic(arg);
+		r = kvm_dev_ioctl_check_extension(arg);
 		break;
 	case KVM_GET_VCPU_MMAP_SIZE:
 		r = -EINVAL;
@@ -1990,9 +1771,9 @@ static void hardware_enable(void *junk)
 {
 	int cpu = raw_smp_processor_id();
 
-	if (cpumask_test_cpu(cpu, cpus_hardware_enabled))
+	if (cpu_isset(cpu, cpus_hardware_enabled))
 		return;
-	cpumask_set_cpu(cpu, cpus_hardware_enabled);
+	cpu_set(cpu, cpus_hardware_enabled);
 	kvm_arch_hardware_enable(NULL);
 }
 
@@ -2000,9 +1781,9 @@ static void hardware_disable(void *junk)
 {
 	int cpu = raw_smp_processor_id();
 
-	if (!cpumask_test_cpu(cpu, cpus_hardware_enabled))
+	if (!cpu_isset(cpu, cpus_hardware_enabled))
 		return;
-	cpumask_clear_cpu(cpu, cpus_hardware_enabled);
+	cpu_clear(cpu, cpus_hardware_enabled);
 	kvm_arch_hardware_disable(NULL);
 }
 
@@ -2236,14 +2017,9 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
 
 	bad_pfn = page_to_pfn(bad_page);
 
-	if (!alloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) {
-		r = -ENOMEM;
-		goto out_free_0;
-	}
-
 	r = kvm_arch_hardware_setup();
 	if (r < 0)
-		goto out_free_0a;
+		goto out_free_0;
 
 	for_each_online_cpu(cpu) {
 		smp_call_function_single(cpu,
@@ -2277,8 +2053,6 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
 	}
 
 	kvm_chardev_ops.owner = module;
-	kvm_vm_fops.owner = module;
-	kvm_vcpu_fops.owner = module;
 
 	r = misc_register(&kvm_dev);
 	if (r) {
@@ -2288,9 +2062,6 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
 
 	kvm_preempt_ops.sched_in = kvm_sched_in;
 	kvm_preempt_ops.sched_out = kvm_sched_out;
-#ifndef CONFIG_X86
-	msi2intx = 0;
-#endif
 
 	return 0;
 
@@ -2307,8 +2078,6 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
 	on_each_cpu(hardware_disable, NULL, 1);
 out_free_1:
 	kvm_arch_hardware_unsetup();
-out_free_0a:
-	free_cpumask_var(cpus_hardware_enabled);
 out_free_0:
 	__free_page(bad_page);
 out:
@@ -2332,7 +2101,6 @@ void kvm_exit(void)
 	kvm_arch_hardware_unsetup();
 	kvm_arch_exit();
 	kvm_exit_debug();
-	free_cpumask_var(cpus_hardware_enabled);
 	__free_page(bad_page);
 }
 EXPORT_SYMBOL_GPL(kvm_exit);
diff --git a/trunk/virt/kvm/kvm_trace.c b/trunk/virt/kvm/kvm_trace.c
index f59874446440..41dcc845f78c 100644
--- a/trunk/virt/kvm/kvm_trace.c
+++ b/trunk/virt/kvm/kvm_trace.c
@@ -252,7 +252,6 @@ void kvm_trace_cleanup(void)
 			struct kvm_trace_probe *p = &kvm_trace_probes[i];
 			marker_probe_unregister(p->name, p->probe_func, p);
 		}
-		marker_synchronize_unregister();
 
 		relay_close(kt->rchan);
 		debugfs_remove(kt->lost_file);